1#![allow(clippy::multiple_crate_versions)] use std::{borrow::Cow, fmt, num::NonZeroUsize};
23
24use ndarray::{Array, Array1, ArrayBase, ArrayViewMut, Data, Dimension, ShapeError};
25use numcodecs::{
26 AnyArray, AnyArrayAssignError, AnyArrayDType, AnyArrayView, AnyArrayViewMut, AnyCowArray,
27 Codec, StaticCodec, StaticCodecConfig, StaticCodecVersion,
28};
29use schemars::{JsonSchema, JsonSchema_repr};
30use serde::{Deserialize, Serialize};
31use serde_repr::{Deserialize_repr, Serialize_repr};
32use thiserror::Error;
33
34#[cfg(test)]
35use ::serde_json as _;
36
37type PcodecVersion = StaticCodecVersion<0, 2, 0>;
38
39#[derive(Clone, Serialize, Deserialize, JsonSchema)]
40#[schemars(deny_unknown_fields)] pub struct Pcodec {
43 pub level: PcoCompressionLevel,
46 #[serde(flatten)]
48 pub mode: PcoModeSpec,
49 #[serde(flatten)]
51 pub delta: PcoDeltaSpec,
52 #[serde(flatten)]
54 pub paging: PcoPagingSpec,
55 #[serde(default, rename = "_version")]
57 pub version: PcodecVersion,
58}
59
60#[derive(
61 Copy, Clone, Debug, Default, PartialEq, Eq, Serialize_repr, Deserialize_repr, JsonSchema_repr,
62)]
63#[repr(u8)]
64#[expect(missing_docs)]
71pub enum PcoCompressionLevel {
72 Level0 = 0,
73 Level1 = 1,
74 Level2 = 2,
75 Level3 = 3,
76 Level4 = 4,
77 Level5 = 5,
78 Level6 = 6,
79 Level7 = 7,
80 #[default]
81 Level8 = 8,
82 Level9 = 9,
83 Level10 = 10,
84 Level11 = 11,
85 Level12 = 12,
86}
87
88#[derive(Copy, Clone, Debug, Default, PartialEq, Serialize, Deserialize, JsonSchema)]
89#[schemars(deny_unknown_fields)] #[serde(tag = "mode", rename_all = "kebab-case")]
91pub enum PcoModeSpec {
93 #[default]
94 Auto,
99 Classic,
101 TryFloatMult {
105 float_mult_base: f64,
107 },
108 TryFloatQuant {
113 float_quant_bits: u32,
115 },
116 TryIntMult {
120 int_mult_base: u64,
122 },
123}
124
125#[derive(Copy, Clone, Debug, Default, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
126#[schemars(deny_unknown_fields)] #[serde(tag = "delta", rename_all = "kebab-case")]
128pub enum PcoDeltaSpec {
130 #[default]
131 Auto,
136 NoOp,
141 TryConsecutive {
147 delta_encoding_order: PcoDeltaEncodingConsecutiveOrder,
149 },
150 TryLookback,
156 TryConv1 {
163 delta_encoding_order: PcoDeltaEncodingConv1Order,
165 },
166}
167
168#[derive(Copy, Clone, Debug, PartialEq, Eq, Serialize_repr, Deserialize_repr, JsonSchema_repr)]
169#[repr(u8)]
170#[expect(missing_docs)]
174pub enum PcoDeltaEncodingConsecutiveOrder {
175 Order0 = 0,
176 Order1 = 1,
177 Order2 = 2,
178 Order3 = 3,
179 Order4 = 4,
180 Order5 = 5,
181 Order6 = 6,
182 Order7 = 7,
183}
184
185#[derive(Copy, Clone, Debug, PartialEq, Eq, Serialize_repr, Deserialize_repr, JsonSchema_repr)]
186#[repr(u8)]
187#[expect(missing_docs)]
191pub enum PcoDeltaEncodingConv1Order {
192 Order0 = 0,
193 Order1 = 1,
194 Order2 = 2,
195 Order3 = 3,
196 Order4 = 4,
197 Order5 = 5,
198 Order6 = 6,
199 Order7 = 7,
200 Order8 = 8,
201 Order9 = 9,
202 Order10 = 10,
203 Order11 = 11,
204 Order12 = 12,
205 Order13 = 13,
206 Order14 = 14,
207 Order15 = 15,
208 Order16 = 16,
209 Order17 = 17,
210 Order18 = 18,
211 Order19 = 19,
212 Order20 = 20,
213 Order21 = 21,
214 Order22 = 22,
215 Order23 = 23,
216 Order24 = 24,
217 Order25 = 25,
218 Order26 = 26,
219 Order27 = 27,
220 Order28 = 28,
221 Order29 = 29,
222 Order30 = 30,
223 Order31 = 31,
224 Order32 = 32,
225}
226
227#[derive(Copy, Clone, Debug, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
228#[schemars(deny_unknown_fields)] #[serde(tag = "paging", rename_all = "kebab-case")]
230pub enum PcoPagingSpec {
232 EqualPagesUpTo {
237 #[serde(default = "default_equal_pages_up_to")]
238 equal_pages_up_to: NonZeroUsize,
240 },
241}
242
243impl Default for PcoPagingSpec {
244 fn default() -> Self {
245 Self::EqualPagesUpTo {
246 equal_pages_up_to: default_equal_pages_up_to(),
247 }
248 }
249}
250
251const fn default_equal_pages_up_to() -> NonZeroUsize {
252 NonZeroUsize::MIN.saturating_add(pco::DEFAULT_MAX_PAGE_N.saturating_sub(1))
253}
254
255impl Codec for Pcodec {
256 type Error = PcodecError;
257
258 #[expect(clippy::too_many_lines)] fn encode(&self, data: AnyCowArray) -> Result<AnyArray, Self::Error> {
260 match data {
261 AnyCowArray::U8(data) => Ok(AnyArray::U8(
262 Array1::from(compress(
263 data,
264 self.level,
265 self.mode,
266 self.delta,
267 self.paging,
268 )?)
269 .into_dyn(),
270 )),
271 AnyCowArray::U16(data) => Ok(AnyArray::U8(
272 Array1::from(compress(
273 data,
274 self.level,
275 self.mode,
276 self.delta,
277 self.paging,
278 )?)
279 .into_dyn(),
280 )),
281 AnyCowArray::U32(data) => Ok(AnyArray::U8(
282 Array1::from(compress(
283 data,
284 self.level,
285 self.mode,
286 self.delta,
287 self.paging,
288 )?)
289 .into_dyn(),
290 )),
291 AnyCowArray::U64(data) => Ok(AnyArray::U8(
292 Array1::from(compress(
293 data,
294 self.level,
295 self.mode,
296 self.delta,
297 self.paging,
298 )?)
299 .into_dyn(),
300 )),
301 AnyCowArray::I8(data) => Ok(AnyArray::U8(
302 Array1::from(compress(
303 data,
304 self.level,
305 self.mode,
306 self.delta,
307 self.paging,
308 )?)
309 .into_dyn(),
310 )),
311 AnyCowArray::I16(data) => Ok(AnyArray::U8(
312 Array1::from(compress(
313 data,
314 self.level,
315 self.mode,
316 self.delta,
317 self.paging,
318 )?)
319 .into_dyn(),
320 )),
321 AnyCowArray::I32(data) => Ok(AnyArray::U8(
322 Array1::from(compress(
323 data,
324 self.level,
325 self.mode,
326 self.delta,
327 self.paging,
328 )?)
329 .into_dyn(),
330 )),
331 AnyCowArray::I64(data) => Ok(AnyArray::U8(
332 Array1::from(compress(
333 data,
334 self.level,
335 self.mode,
336 self.delta,
337 self.paging,
338 )?)
339 .into_dyn(),
340 )),
341 AnyCowArray::F32(data) => Ok(AnyArray::U8(
342 Array1::from(compress(
343 data,
344 self.level,
345 self.mode,
346 self.delta,
347 self.paging,
348 )?)
349 .into_dyn(),
350 )),
351 AnyCowArray::F64(data) => Ok(AnyArray::U8(
352 Array1::from(compress(
353 data,
354 self.level,
355 self.mode,
356 self.delta,
357 self.paging,
358 )?)
359 .into_dyn(),
360 )),
361 encoded => Err(PcodecError::UnsupportedDtype(encoded.dtype())),
362 }
363 }
364
365 fn decode(&self, encoded: AnyCowArray) -> Result<AnyArray, Self::Error> {
366 let AnyCowArray::U8(encoded) = encoded else {
367 return Err(PcodecError::EncodedDataNotBytes {
368 dtype: encoded.dtype(),
369 });
370 };
371
372 if !matches!(encoded.shape(), [_]) {
373 return Err(PcodecError::EncodedDataNotOneDimensional {
374 shape: encoded.shape().to_vec(),
375 });
376 }
377
378 decompress(&AnyCowArray::U8(encoded).as_bytes())
379 }
380
381 fn decode_into(
382 &self,
383 encoded: AnyArrayView,
384 decoded: AnyArrayViewMut,
385 ) -> Result<(), Self::Error> {
386 let AnyArrayView::U8(encoded) = encoded else {
387 return Err(PcodecError::EncodedDataNotBytes {
388 dtype: encoded.dtype(),
389 });
390 };
391
392 if !matches!(encoded.shape(), [_]) {
393 return Err(PcodecError::EncodedDataNotOneDimensional {
394 shape: encoded.shape().to_vec(),
395 });
396 }
397
398 let encoded = AnyArrayView::U8(encoded);
399 let encoded = encoded.as_bytes();
400
401 match decoded {
402 AnyArrayViewMut::U8(decoded) => decompress_into(&encoded, decoded),
403 AnyArrayViewMut::U16(decoded) => decompress_into(&encoded, decoded),
404 AnyArrayViewMut::U32(decoded) => decompress_into(&encoded, decoded),
405 AnyArrayViewMut::U64(decoded) => decompress_into(&encoded, decoded),
406 AnyArrayViewMut::I8(decoded) => decompress_into(&encoded, decoded),
407 AnyArrayViewMut::I16(decoded) => decompress_into(&encoded, decoded),
408 AnyArrayViewMut::I32(decoded) => decompress_into(&encoded, decoded),
409 AnyArrayViewMut::I64(decoded) => decompress_into(&encoded, decoded),
410 AnyArrayViewMut::F32(decoded) => decompress_into(&encoded, decoded),
411 AnyArrayViewMut::F64(decoded) => decompress_into(&encoded, decoded),
412 decoded => Err(PcodecError::UnsupportedDtype(decoded.dtype())),
413 }
414 }
415}
416
417impl StaticCodec for Pcodec {
418 const CODEC_ID: &'static str = "pco.rs";
419
420 type Config<'de> = Self;
421
422 fn from_config(config: Self::Config<'_>) -> Self {
423 config
424 }
425
426 fn get_config(&self) -> StaticCodecConfig<'_, Self> {
427 StaticCodecConfig::from(self)
428 }
429}
430
431#[derive(Debug, Error)]
432pub enum PcodecError {
434 #[error("Pco does not support the dtype {0}")]
436 UnsupportedDtype(AnyArrayDType),
437 #[error("Pco failed to encode the header")]
439 HeaderEncodeFailed {
440 source: PcoHeaderError,
442 },
443 #[error("Pco failed to encode the data")]
445 PcoEncodeFailed {
446 source: PcoCodingError,
448 },
449 #[error(
452 "Pco can only decode one-dimensional byte arrays but received an array of dtype {dtype}"
453 )]
454 EncodedDataNotBytes {
455 dtype: AnyArrayDType,
457 },
458 #[error(
461 "Pco can only decode one-dimensional byte arrays but received a byte array of shape {shape:?}"
462 )]
463 EncodedDataNotOneDimensional {
464 shape: Vec<usize>,
466 },
467 #[error("Pco failed to decode the header")]
469 HeaderDecodeFailed {
470 source: PcoHeaderError,
472 },
473 #[error("Pco failed to decode the data")]
475 PcoDecodeFailed {
476 source: PcoCodingError,
478 },
479 #[error("Pco decoded an invalid array shape header which does not fit the decoded data")]
482 DecodeInvalidShapeHeader {
483 #[from]
485 source: ShapeError,
486 },
487 #[error("Pco cannot decode into the provided array")]
489 MismatchedDecodeIntoArray {
490 #[from]
492 source: AnyArrayAssignError,
493 },
494}
495
496#[derive(Debug, Error)]
497#[error(transparent)]
498pub struct PcoHeaderError(postcard::Error);
500
501#[derive(Debug, Error)]
502#[error(transparent)]
503pub struct PcoCodingError(pco::errors::PcoError);
505
506#[expect(clippy::needless_pass_by_value)]
507pub fn compress<T: PcoElement, S: Data<Elem = T>, D: Dimension>(
516 data: ArrayBase<S, D>,
517 level: PcoCompressionLevel,
518 mode: PcoModeSpec,
519 delta: PcoDeltaSpec,
520 paging: PcoPagingSpec,
521) -> Result<Vec<u8>, PcodecError> {
522 let mut encoded_bytes = postcard::to_extend(
523 &CompressionHeader {
524 dtype: <T as PcoElement>::DTYPE,
525 shape: Cow::Borrowed(data.shape()),
526 version: StaticCodecVersion,
527 },
528 Vec::new(),
529 )
530 .map_err(|err| PcodecError::HeaderEncodeFailed {
531 source: PcoHeaderError(err),
532 })?;
533
534 let data_owned;
535 #[expect(clippy::option_if_let_else)]
536 let data = if let Some(slice) = data.as_slice() {
537 slice
538 } else {
539 data_owned = data.into_iter().copied().collect::<Vec<T>>();
540 data_owned.as_slice()
541 };
542
543 let config = pco::ChunkConfig::default()
544 .with_enable_8_bit(true)
545 .with_compression_level(level as usize)
546 .with_mode_spec(match mode {
547 PcoModeSpec::Auto => pco::ModeSpec::Auto,
548 PcoModeSpec::Classic => pco::ModeSpec::Classic,
549 PcoModeSpec::TryFloatMult { float_mult_base } => {
550 pco::ModeSpec::TryFloatMult(float_mult_base)
551 }
552 PcoModeSpec::TryFloatQuant { float_quant_bits } => {
553 pco::ModeSpec::TryFloatQuant(float_quant_bits)
554 }
555 PcoModeSpec::TryIntMult { int_mult_base } => pco::ModeSpec::TryIntMult(int_mult_base),
556 })
557 .with_delta_spec(match delta {
558 PcoDeltaSpec::Auto => pco::DeltaSpec::Auto,
559 PcoDeltaSpec::NoOp => pco::DeltaSpec::NoOp,
560 PcoDeltaSpec::TryConsecutive {
561 delta_encoding_order,
562 } => pco::DeltaSpec::TryConsecutive(delta_encoding_order as usize),
563 PcoDeltaSpec::TryLookback => pco::DeltaSpec::TryLookback,
564 PcoDeltaSpec::TryConv1 {
565 delta_encoding_order,
566 } => pco::DeltaSpec::TryConv1(delta_encoding_order as usize),
567 })
568 .with_paging_spec(match paging {
569 PcoPagingSpec::EqualPagesUpTo { equal_pages_up_to } => {
570 pco::PagingSpec::EqualPagesUpTo(equal_pages_up_to.get())
571 }
572 });
573
574 let encoded = pco::standalone::simple_compress(data, &config).map_err(|err| {
575 PcodecError::PcoEncodeFailed {
576 source: PcoCodingError(err),
577 }
578 })?;
579 encoded_bytes.extend_from_slice(&encoded);
580
581 Ok(encoded_bytes)
582}
583
584pub fn decompress(encoded: &[u8]) -> Result<AnyArray, PcodecError> {
592 let (header, data) =
593 postcard::take_from_bytes::<CompressionHeader>(encoded).map_err(|err| {
594 PcodecError::HeaderDecodeFailed {
595 source: PcoHeaderError(err),
596 }
597 })?;
598
599 let decoded = match header.dtype {
600 PcoDType::U8 => AnyArray::U8(Array::from_shape_vec(
601 &*header.shape,
602 pco::standalone::simple_decompress(data).map_err(|err| {
603 PcodecError::PcoDecodeFailed {
604 source: PcoCodingError(err),
605 }
606 })?,
607 )?),
608 PcoDType::U16 => AnyArray::U16(Array::from_shape_vec(
609 &*header.shape,
610 pco::standalone::simple_decompress(data).map_err(|err| {
611 PcodecError::PcoDecodeFailed {
612 source: PcoCodingError(err),
613 }
614 })?,
615 )?),
616 PcoDType::U32 => AnyArray::U32(Array::from_shape_vec(
617 &*header.shape,
618 pco::standalone::simple_decompress(data).map_err(|err| {
619 PcodecError::PcoDecodeFailed {
620 source: PcoCodingError(err),
621 }
622 })?,
623 )?),
624 PcoDType::U64 => AnyArray::U64(Array::from_shape_vec(
625 &*header.shape,
626 pco::standalone::simple_decompress(data).map_err(|err| {
627 PcodecError::PcoDecodeFailed {
628 source: PcoCodingError(err),
629 }
630 })?,
631 )?),
632 PcoDType::I8 => AnyArray::I8(Array::from_shape_vec(
633 &*header.shape,
634 pco::standalone::simple_decompress(data).map_err(|err| {
635 PcodecError::PcoDecodeFailed {
636 source: PcoCodingError(err),
637 }
638 })?,
639 )?),
640 PcoDType::I16 => AnyArray::I16(Array::from_shape_vec(
641 &*header.shape,
642 pco::standalone::simple_decompress(data).map_err(|err| {
643 PcodecError::PcoDecodeFailed {
644 source: PcoCodingError(err),
645 }
646 })?,
647 )?),
648 PcoDType::I32 => AnyArray::I32(Array::from_shape_vec(
649 &*header.shape,
650 pco::standalone::simple_decompress(data).map_err(|err| {
651 PcodecError::PcoDecodeFailed {
652 source: PcoCodingError(err),
653 }
654 })?,
655 )?),
656 PcoDType::I64 => AnyArray::I64(Array::from_shape_vec(
657 &*header.shape,
658 pco::standalone::simple_decompress(data).map_err(|err| {
659 PcodecError::PcoDecodeFailed {
660 source: PcoCodingError(err),
661 }
662 })?,
663 )?),
664 PcoDType::F32 => AnyArray::F32(Array::from_shape_vec(
665 &*header.shape,
666 pco::standalone::simple_decompress(data).map_err(|err| {
667 PcodecError::PcoDecodeFailed {
668 source: PcoCodingError(err),
669 }
670 })?,
671 )?),
672 PcoDType::F64 => AnyArray::F64(Array::from_shape_vec(
673 &*header.shape,
674 pco::standalone::simple_decompress(data).map_err(|err| {
675 PcodecError::PcoDecodeFailed {
676 source: PcoCodingError(err),
677 }
678 })?,
679 )?),
680 };
681
682 Ok(decoded)
683}
684
685pub fn decompress_into<T: PcoElement, D: Dimension>(
697 encoded: &[u8],
698 mut decoded: ArrayViewMut<T, D>,
699) -> Result<(), PcodecError> {
700 let (header, data) =
701 postcard::take_from_bytes::<CompressionHeader>(encoded).map_err(|err| {
702 PcodecError::HeaderDecodeFailed {
703 source: PcoHeaderError(err),
704 }
705 })?;
706
707 if T::DTYPE != header.dtype {
708 return Err(PcodecError::MismatchedDecodeIntoArray {
709 source: AnyArrayAssignError::DTypeMismatch {
710 src: header.dtype.into_dtype(),
711 dst: T::DTYPE.into_dtype(),
712 },
713 });
714 }
715
716 if decoded.shape() != &*header.shape {
717 return Err(PcodecError::MismatchedDecodeIntoArray {
718 source: AnyArrayAssignError::ShapeMismatch {
719 src: header.shape.into_owned(),
720 dst: decoded.shape().to_vec(),
721 },
722 });
723 }
724
725 if let Some(slice) = decoded.as_slice_mut() {
726 pco::standalone::simple_decompress_into(data, slice).map_err(|err| {
727 PcodecError::PcoDecodeFailed {
728 source: PcoCodingError(err),
729 }
730 })?;
731 return Ok(());
732 }
733
734 let dec =
735 pco::standalone::simple_decompress(data).map_err(|err| PcodecError::PcoDecodeFailed {
736 source: PcoCodingError(err),
737 })?;
738
739 if dec.len() != decoded.len() {
740 return Err(PcodecError::DecodeInvalidShapeHeader {
741 source: ShapeError::from_kind(ndarray::ErrorKind::IncompatibleShape),
742 });
743 }
744
745 decoded.iter_mut().zip(dec).for_each(|(o, d)| *o = d);
746
747 Ok(())
748}
749
750pub trait PcoElement: Copy + pco::data_types::Number {
752 const DTYPE: PcoDType;
754}
755
756impl PcoElement for u8 {
757 const DTYPE: PcoDType = PcoDType::U8;
758}
759
760impl PcoElement for u16 {
761 const DTYPE: PcoDType = PcoDType::U16;
762}
763
764impl PcoElement for u32 {
765 const DTYPE: PcoDType = PcoDType::U32;
766}
767
768impl PcoElement for u64 {
769 const DTYPE: PcoDType = PcoDType::U64;
770}
771
772impl PcoElement for i8 {
773 const DTYPE: PcoDType = PcoDType::I8;
774}
775
776impl PcoElement for i16 {
777 const DTYPE: PcoDType = PcoDType::I16;
778}
779
780impl PcoElement for i32 {
781 const DTYPE: PcoDType = PcoDType::I32;
782}
783
784impl PcoElement for i64 {
785 const DTYPE: PcoDType = PcoDType::I64;
786}
787
788impl PcoElement for f32 {
789 const DTYPE: PcoDType = PcoDType::F32;
790}
791
792impl PcoElement for f64 {
793 const DTYPE: PcoDType = PcoDType::F64;
794}
795
796#[derive(Serialize, Deserialize)]
797struct CompressionHeader<'a> {
798 dtype: PcoDType,
799 #[serde(borrow)]
800 shape: Cow<'a, [usize]>,
801 version: PcodecVersion,
802}
803
804#[derive(Copy, Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
806#[expect(missing_docs)]
807pub enum PcoDType {
808 #[serde(rename = "u8", alias = "uint8")]
809 U8,
810 #[serde(rename = "u16", alias = "uint16")]
811 U16,
812 #[serde(rename = "u32", alias = "uint32")]
813 U32,
814 #[serde(rename = "u64", alias = "uint64")]
815 U64,
816 #[serde(rename = "i8", alias = "int8")]
817 I8,
818 #[serde(rename = "i16", alias = "int16")]
819 I16,
820 #[serde(rename = "i32", alias = "int32")]
821 I32,
822 #[serde(rename = "i64", alias = "int64")]
823 I64,
824 #[serde(rename = "f32", alias = "float32")]
825 F32,
826 #[serde(rename = "f64", alias = "float64")]
827 F64,
828}
829
830impl PcoDType {
831 #[must_use]
832 pub const fn into_dtype(self) -> AnyArrayDType {
834 match self {
835 Self::U8 => AnyArrayDType::U8,
836 Self::U16 => AnyArrayDType::U16,
837 Self::U32 => AnyArrayDType::U32,
838 Self::U64 => AnyArrayDType::U64,
839 Self::I8 => AnyArrayDType::I8,
840 Self::I16 => AnyArrayDType::I16,
841 Self::I32 => AnyArrayDType::I32,
842 Self::I64 => AnyArrayDType::I64,
843 Self::F32 => AnyArrayDType::F32,
844 Self::F64 => AnyArrayDType::F64,
845 }
846 }
847}
848
849impl fmt::Display for PcoDType {
850 fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
851 fmt.write_str(match self {
852 Self::U8 => "u8",
853 Self::U16 => "u16",
854 Self::U32 => "u32",
855 Self::U64 => "u64",
856 Self::I8 => "i8",
857 Self::I16 => "i16",
858 Self::I32 => "i32",
859 Self::I64 => "i64",
860 Self::F32 => "f32",
861 Self::F64 => "f64",
862 })
863 }
864}