1#![expect(clippy::multiple_crate_versions)] use std::{borrow::Cow, fmt, num::NonZeroUsize};
23
24use ndarray::{Array, Array1, ArrayBase, ArrayViewMut, Data, Dimension, ShapeError};
25use numcodecs::{
26 AnyArray, AnyArrayAssignError, AnyArrayDType, AnyArrayView, AnyArrayViewMut, AnyCowArray,
27 Codec, StaticCodec, StaticCodecConfig,
28};
29use schemars::{JsonSchema, JsonSchema_repr};
30use serde::{Deserialize, Serialize};
31use serde_repr::{Deserialize_repr, Serialize_repr};
32use thiserror::Error;
33
34#[cfg(test)]
35use ::serde_json as _;
36
37#[derive(Clone, Serialize, Deserialize, JsonSchema)]
38#[schemars(deny_unknown_fields)] pub struct Pcodec {
41 pub level: PcoCompressionLevel,
44 #[serde(flatten)]
46 pub mode: PcoModeSpec,
47 #[serde(flatten)]
49 pub delta: PcoDeltaSpec,
50 #[serde(flatten)]
52 pub paging: PcoPagingSpec,
53}
54
55#[derive(
56 Copy, Clone, Debug, Default, PartialEq, Eq, Serialize_repr, Deserialize_repr, JsonSchema_repr,
57)]
58#[repr(u8)]
59#[expect(missing_docs)]
66pub enum PcoCompressionLevel {
67 Level0 = 0,
68 Level1 = 1,
69 Level2 = 2,
70 Level3 = 3,
71 Level4 = 4,
72 Level5 = 5,
73 Level6 = 6,
74 Level7 = 7,
75 #[default]
76 Level8 = 8,
77 Level9 = 9,
78 Level10 = 10,
79 Level11 = 11,
80 Level12 = 12,
81}
82
83#[derive(Copy, Clone, Debug, Default, PartialEq, Serialize, Deserialize, JsonSchema)]
84#[schemars(deny_unknown_fields)] #[serde(tag = "mode", rename_all = "kebab-case")]
86pub enum PcoModeSpec {
88 #[default]
89 Auto,
94 Classic,
96 TryFloatMult {
100 float_mult_base: f64,
102 },
103 TryFloatQuant {
108 float_quant_bits: u32,
110 },
111 TryIntMult {
115 int_mult_base: u64,
117 },
118}
119
120#[derive(Copy, Clone, Debug, Default, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
121#[schemars(deny_unknown_fields)] #[serde(tag = "delta", rename_all = "kebab-case")]
123pub enum PcoDeltaSpec {
125 #[default]
126 Auto,
131 None,
136 TryConsecutive {
142 delta_encoding_order: PcoDeltaEncodingOrder,
144 },
145 TryLookback,
151}
152
153#[derive(Copy, Clone, Debug, PartialEq, Eq, Serialize_repr, Deserialize_repr, JsonSchema_repr)]
154#[repr(u8)]
155#[expect(missing_docs)]
159pub enum PcoDeltaEncodingOrder {
160 Order0 = 0,
161 Order1 = 1,
162 Order2 = 2,
163 Order3 = 3,
164 Order4 = 4,
165 Order5 = 5,
166 Order6 = 6,
167 Order7 = 7,
168}
169
170#[derive(Copy, Clone, Debug, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
171#[schemars(deny_unknown_fields)] #[serde(tag = "paging", rename_all = "kebab-case")]
173pub enum PcoPagingSpec {
175 EqualPagesUpTo {
180 #[serde(default = "default_equal_pages_up_to")]
181 equal_pages_up_to: NonZeroUsize,
183 },
184}
185
186impl Default for PcoPagingSpec {
187 fn default() -> Self {
188 Self::EqualPagesUpTo {
189 equal_pages_up_to: default_equal_pages_up_to(),
190 }
191 }
192}
193
194const fn default_equal_pages_up_to() -> NonZeroUsize {
195 NonZeroUsize::MIN.saturating_add(pco::DEFAULT_MAX_PAGE_N.saturating_sub(1))
196}
197
198impl Codec for Pcodec {
199 type Error = PcodecError;
200
201 fn encode(&self, data: AnyCowArray) -> Result<AnyArray, Self::Error> {
202 match data {
203 AnyCowArray::U16(data) => Ok(AnyArray::U8(
204 Array1::from(compress(
205 data,
206 self.level,
207 self.mode,
208 self.delta,
209 self.paging,
210 )?)
211 .into_dyn(),
212 )),
213 AnyCowArray::U32(data) => Ok(AnyArray::U8(
214 Array1::from(compress(
215 data,
216 self.level,
217 self.mode,
218 self.delta,
219 self.paging,
220 )?)
221 .into_dyn(),
222 )),
223 AnyCowArray::U64(data) => Ok(AnyArray::U8(
224 Array1::from(compress(
225 data,
226 self.level,
227 self.mode,
228 self.delta,
229 self.paging,
230 )?)
231 .into_dyn(),
232 )),
233 AnyCowArray::I16(data) => Ok(AnyArray::U8(
234 Array1::from(compress(
235 data,
236 self.level,
237 self.mode,
238 self.delta,
239 self.paging,
240 )?)
241 .into_dyn(),
242 )),
243 AnyCowArray::I32(data) => Ok(AnyArray::U8(
244 Array1::from(compress(
245 data,
246 self.level,
247 self.mode,
248 self.delta,
249 self.paging,
250 )?)
251 .into_dyn(),
252 )),
253 AnyCowArray::I64(data) => Ok(AnyArray::U8(
254 Array1::from(compress(
255 data,
256 self.level,
257 self.mode,
258 self.delta,
259 self.paging,
260 )?)
261 .into_dyn(),
262 )),
263 AnyCowArray::F32(data) => Ok(AnyArray::U8(
264 Array1::from(compress(
265 data,
266 self.level,
267 self.mode,
268 self.delta,
269 self.paging,
270 )?)
271 .into_dyn(),
272 )),
273 AnyCowArray::F64(data) => Ok(AnyArray::U8(
274 Array1::from(compress(
275 data,
276 self.level,
277 self.mode,
278 self.delta,
279 self.paging,
280 )?)
281 .into_dyn(),
282 )),
283 encoded => Err(PcodecError::UnsupportedDtype(encoded.dtype())),
284 }
285 }
286
287 fn decode(&self, encoded: AnyCowArray) -> Result<AnyArray, Self::Error> {
288 let AnyCowArray::U8(encoded) = encoded else {
289 return Err(PcodecError::EncodedDataNotBytes {
290 dtype: encoded.dtype(),
291 });
292 };
293
294 if !matches!(encoded.shape(), [_]) {
295 return Err(PcodecError::EncodedDataNotOneDimensional {
296 shape: encoded.shape().to_vec(),
297 });
298 }
299
300 decompress(&AnyCowArray::U8(encoded).as_bytes())
301 }
302
303 fn decode_into(
304 &self,
305 encoded: AnyArrayView,
306 decoded: AnyArrayViewMut,
307 ) -> Result<(), Self::Error> {
308 let AnyArrayView::U8(encoded) = encoded else {
309 return Err(PcodecError::EncodedDataNotBytes {
310 dtype: encoded.dtype(),
311 });
312 };
313
314 if !matches!(encoded.shape(), [_]) {
315 return Err(PcodecError::EncodedDataNotOneDimensional {
316 shape: encoded.shape().to_vec(),
317 });
318 }
319
320 let encoded = AnyArrayView::U8(encoded);
321 let encoded = encoded.as_bytes();
322
323 match decoded {
324 AnyArrayViewMut::U16(decoded) => decompress_into(&encoded, decoded),
325 AnyArrayViewMut::U32(decoded) => decompress_into(&encoded, decoded),
326 AnyArrayViewMut::U64(decoded) => decompress_into(&encoded, decoded),
327 AnyArrayViewMut::I16(decoded) => decompress_into(&encoded, decoded),
328 AnyArrayViewMut::I32(decoded) => decompress_into(&encoded, decoded),
329 AnyArrayViewMut::I64(decoded) => decompress_into(&encoded, decoded),
330 AnyArrayViewMut::F32(decoded) => decompress_into(&encoded, decoded),
331 AnyArrayViewMut::F64(decoded) => decompress_into(&encoded, decoded),
332 decoded => Err(PcodecError::UnsupportedDtype(decoded.dtype())),
333 }
334 }
335}
336
337impl StaticCodec for Pcodec {
338 const CODEC_ID: &'static str = "pco";
339
340 type Config<'de> = Self;
341
342 fn from_config(config: Self::Config<'_>) -> Self {
343 config
344 }
345
346 fn get_config(&self) -> StaticCodecConfig<Self> {
347 StaticCodecConfig::from(self)
348 }
349}
350
351#[derive(Debug, Error)]
352pub enum PcodecError {
354 #[error("Pco does not support the dtype {0}")]
356 UnsupportedDtype(AnyArrayDType),
357 #[error("Pco failed to encode the header")]
359 HeaderEncodeFailed {
360 source: PcoHeaderError,
362 },
363 #[error("Pco failed to encode the data")]
365 PcoEncodeFailed {
366 source: PcoCodingError,
368 },
369 #[error(
372 "Pco can only decode one-dimensional byte arrays but received an array of dtype {dtype}"
373 )]
374 EncodedDataNotBytes {
375 dtype: AnyArrayDType,
377 },
378 #[error("Pco can only decode one-dimensional byte arrays but received a byte array of shape {shape:?}")]
381 EncodedDataNotOneDimensional {
382 shape: Vec<usize>,
384 },
385 #[error("Pco failed to decode the header")]
387 HeaderDecodeFailed {
388 source: PcoHeaderError,
390 },
391 #[error("Pco failed to decode the data")]
393 PcoDecodeFailed {
394 source: PcoCodingError,
396 },
397 #[error("Pco decoded an invalid array shape header which does not fit the decoded data")]
400 DecodeInvalidShapeHeader {
401 #[from]
403 source: ShapeError,
404 },
405 #[error("Pco cannot decode into the provided array")]
407 MismatchedDecodeIntoArray {
408 #[from]
410 source: AnyArrayAssignError,
411 },
412}
413
414#[derive(Debug, Error)]
415#[error(transparent)]
416pub struct PcoHeaderError(postcard::Error);
418
419#[derive(Debug, Error)]
420#[error(transparent)]
421pub struct PcoCodingError(pco::errors::PcoError);
423
424#[expect(clippy::needless_pass_by_value)]
425pub fn compress<T: PcoElement, S: Data<Elem = T>, D: Dimension>(
434 data: ArrayBase<S, D>,
435 level: PcoCompressionLevel,
436 mode: PcoModeSpec,
437 delta: PcoDeltaSpec,
438 paging: PcoPagingSpec,
439) -> Result<Vec<u8>, PcodecError> {
440 let mut encoded_bytes = postcard::to_extend(
441 &CompressionHeader {
442 dtype: <T as PcoElement>::DTYPE,
443 shape: Cow::Borrowed(data.shape()),
444 },
445 Vec::new(),
446 )
447 .map_err(|err| PcodecError::HeaderEncodeFailed {
448 source: PcoHeaderError(err),
449 })?;
450
451 let data_owned;
452 #[expect(clippy::option_if_let_else)]
453 let data = if let Some(slice) = data.as_slice() {
454 slice
455 } else {
456 data_owned = data.into_iter().copied().collect::<Vec<T>>();
457 data_owned.as_slice()
458 };
459
460 let config = pco::ChunkConfig::default()
461 .with_compression_level(level as usize)
462 .with_mode_spec(match mode {
463 PcoModeSpec::Auto => pco::ModeSpec::Auto,
464 PcoModeSpec::Classic => pco::ModeSpec::Classic,
465 PcoModeSpec::TryFloatMult { float_mult_base } => {
466 pco::ModeSpec::TryFloatMult(float_mult_base)
467 }
468 PcoModeSpec::TryFloatQuant { float_quant_bits } => {
469 pco::ModeSpec::TryFloatQuant(float_quant_bits)
470 }
471 PcoModeSpec::TryIntMult { int_mult_base } => pco::ModeSpec::TryIntMult(int_mult_base),
472 })
473 .with_delta_spec(match delta {
474 PcoDeltaSpec::Auto => pco::DeltaSpec::Auto,
475 PcoDeltaSpec::None => pco::DeltaSpec::None,
476 PcoDeltaSpec::TryConsecutive {
477 delta_encoding_order,
478 } => pco::DeltaSpec::TryConsecutive(delta_encoding_order as usize),
479 PcoDeltaSpec::TryLookback => pco::DeltaSpec::TryLookback,
480 })
481 .with_paging_spec(match paging {
482 PcoPagingSpec::EqualPagesUpTo { equal_pages_up_to } => {
483 pco::PagingSpec::EqualPagesUpTo(equal_pages_up_to.get())
484 }
485 });
486
487 let encoded = pco::standalone::simple_compress(data, &config).map_err(|err| {
488 PcodecError::PcoEncodeFailed {
489 source: PcoCodingError(err),
490 }
491 })?;
492 encoded_bytes.extend_from_slice(&encoded);
493
494 Ok(encoded_bytes)
495}
496
497pub fn decompress(encoded: &[u8]) -> Result<AnyArray, PcodecError> {
505 let (header, data) =
506 postcard::take_from_bytes::<CompressionHeader>(encoded).map_err(|err| {
507 PcodecError::HeaderDecodeFailed {
508 source: PcoHeaderError(err),
509 }
510 })?;
511
512 let decoded = match header.dtype {
513 PcoDType::U16 => AnyArray::U16(Array::from_shape_vec(
514 &*header.shape,
515 pco::standalone::simple_decompress(data).map_err(|err| {
516 PcodecError::PcoDecodeFailed {
517 source: PcoCodingError(err),
518 }
519 })?,
520 )?),
521 PcoDType::U32 => AnyArray::U32(Array::from_shape_vec(
522 &*header.shape,
523 pco::standalone::simple_decompress(data).map_err(|err| {
524 PcodecError::PcoDecodeFailed {
525 source: PcoCodingError(err),
526 }
527 })?,
528 )?),
529 PcoDType::U64 => AnyArray::U64(Array::from_shape_vec(
530 &*header.shape,
531 pco::standalone::simple_decompress(data).map_err(|err| {
532 PcodecError::PcoDecodeFailed {
533 source: PcoCodingError(err),
534 }
535 })?,
536 )?),
537 PcoDType::I16 => AnyArray::I16(Array::from_shape_vec(
538 &*header.shape,
539 pco::standalone::simple_decompress(data).map_err(|err| {
540 PcodecError::PcoDecodeFailed {
541 source: PcoCodingError(err),
542 }
543 })?,
544 )?),
545 PcoDType::I32 => AnyArray::I32(Array::from_shape_vec(
546 &*header.shape,
547 pco::standalone::simple_decompress(data).map_err(|err| {
548 PcodecError::PcoDecodeFailed {
549 source: PcoCodingError(err),
550 }
551 })?,
552 )?),
553 PcoDType::I64 => AnyArray::I64(Array::from_shape_vec(
554 &*header.shape,
555 pco::standalone::simple_decompress(data).map_err(|err| {
556 PcodecError::PcoDecodeFailed {
557 source: PcoCodingError(err),
558 }
559 })?,
560 )?),
561 PcoDType::F32 => AnyArray::F32(Array::from_shape_vec(
562 &*header.shape,
563 pco::standalone::simple_decompress(data).map_err(|err| {
564 PcodecError::PcoDecodeFailed {
565 source: PcoCodingError(err),
566 }
567 })?,
568 )?),
569 PcoDType::F64 => AnyArray::F64(Array::from_shape_vec(
570 &*header.shape,
571 pco::standalone::simple_decompress(data).map_err(|err| {
572 PcodecError::PcoDecodeFailed {
573 source: PcoCodingError(err),
574 }
575 })?,
576 )?),
577 };
578
579 Ok(decoded)
580}
581
582pub fn decompress_into<T: PcoElement, D: Dimension>(
594 encoded: &[u8],
595 mut decoded: ArrayViewMut<T, D>,
596) -> Result<(), PcodecError> {
597 let (header, data) =
598 postcard::take_from_bytes::<CompressionHeader>(encoded).map_err(|err| {
599 PcodecError::HeaderDecodeFailed {
600 source: PcoHeaderError(err),
601 }
602 })?;
603
604 if T::DTYPE != header.dtype {
605 return Err(PcodecError::MismatchedDecodeIntoArray {
606 source: AnyArrayAssignError::DTypeMismatch {
607 src: header.dtype.into_dtype(),
608 dst: T::DTYPE.into_dtype(),
609 },
610 });
611 }
612
613 if decoded.shape() != &*header.shape {
614 return Err(PcodecError::MismatchedDecodeIntoArray {
615 source: AnyArrayAssignError::ShapeMismatch {
616 src: header.shape.into_owned(),
617 dst: decoded.shape().to_vec(),
618 },
619 });
620 }
621
622 if let Some(slice) = decoded.as_slice_mut() {
623 pco::standalone::simple_decompress_into(data, slice).map_err(|err| {
624 PcodecError::PcoDecodeFailed {
625 source: PcoCodingError(err),
626 }
627 })?;
628 return Ok(());
629 }
630
631 let dec =
632 pco::standalone::simple_decompress(data).map_err(|err| PcodecError::PcoDecodeFailed {
633 source: PcoCodingError(err),
634 })?;
635
636 if dec.len() != decoded.len() {
637 return Err(PcodecError::DecodeInvalidShapeHeader {
638 source: ShapeError::from_kind(ndarray::ErrorKind::IncompatibleShape),
639 });
640 }
641
642 decoded.iter_mut().zip(dec).for_each(|(o, d)| *o = d);
643
644 Ok(())
645}
646
647pub trait PcoElement: Copy + pco::data_types::Number {
649 const DTYPE: PcoDType;
651}
652
653impl PcoElement for u16 {
654 const DTYPE: PcoDType = PcoDType::U16;
655}
656
657impl PcoElement for u32 {
658 const DTYPE: PcoDType = PcoDType::U32;
659}
660
661impl PcoElement for u64 {
662 const DTYPE: PcoDType = PcoDType::U64;
663}
664
665impl PcoElement for i16 {
666 const DTYPE: PcoDType = PcoDType::I16;
667}
668
669impl PcoElement for i32 {
670 const DTYPE: PcoDType = PcoDType::I32;
671}
672
673impl PcoElement for i64 {
674 const DTYPE: PcoDType = PcoDType::I64;
675}
676
677impl PcoElement for f32 {
678 const DTYPE: PcoDType = PcoDType::F32;
679}
680
681impl PcoElement for f64 {
682 const DTYPE: PcoDType = PcoDType::F64;
683}
684
685#[derive(Serialize, Deserialize)]
686struct CompressionHeader<'a> {
687 dtype: PcoDType,
688 #[serde(borrow)]
689 shape: Cow<'a, [usize]>,
690}
691
692#[derive(Copy, Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
694#[expect(missing_docs)]
695pub enum PcoDType {
696 #[serde(rename = "u16", alias = "uint16")]
697 U16,
698 #[serde(rename = "u32", alias = "uint32")]
699 U32,
700 #[serde(rename = "u64", alias = "uint64")]
701 U64,
702 #[serde(rename = "i16", alias = "int16")]
703 I16,
704 #[serde(rename = "i32", alias = "int32")]
705 I32,
706 #[serde(rename = "i64", alias = "int64")]
707 I64,
708 #[serde(rename = "f32", alias = "float32")]
709 F32,
710 #[serde(rename = "f64", alias = "float64")]
711 F64,
712}
713
714impl PcoDType {
715 #[must_use]
716 pub const fn into_dtype(self) -> AnyArrayDType {
718 match self {
719 Self::U16 => AnyArrayDType::U16,
720 Self::U32 => AnyArrayDType::U32,
721 Self::U64 => AnyArrayDType::U64,
722 Self::I16 => AnyArrayDType::I16,
723 Self::I32 => AnyArrayDType::I32,
724 Self::I64 => AnyArrayDType::I64,
725 Self::F32 => AnyArrayDType::F32,
726 Self::F64 => AnyArrayDType::F64,
727 }
728 }
729}
730
731impl fmt::Display for PcoDType {
732 fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
733 fmt.write_str(match self {
734 Self::U16 => "u16",
735 Self::U32 => "u32",
736 Self::U64 => "u64",
737 Self::I16 => "i16",
738 Self::I32 => "i32",
739 Self::I64 => "i64",
740 Self::F32 => "f32",
741 Self::F64 => "f64",
742 })
743 }
744}