1#![expect(clippy::multiple_crate_versions)] use std::{borrow::Cow, fmt, num::NonZeroUsize};
23
24use ndarray::{Array, Array1, ArrayBase, ArrayViewMut, Data, Dimension, ShapeError};
25use numcodecs::{
26 AnyArray, AnyArrayAssignError, AnyArrayDType, AnyArrayView, AnyArrayViewMut, AnyCowArray,
27 Codec, StaticCodec, StaticCodecConfig, StaticCodecVersion,
28};
29use schemars::{JsonSchema, JsonSchema_repr};
30use serde::{Deserialize, Serialize};
31use serde_repr::{Deserialize_repr, Serialize_repr};
32use thiserror::Error;
33
34#[cfg(test)]
35use ::serde_json as _;
36
37type PcodecVersion = StaticCodecVersion<0, 1, 0>;
38
39#[derive(Clone, Serialize, Deserialize, JsonSchema)]
40#[schemars(deny_unknown_fields)] pub struct Pcodec {
43 pub level: PcoCompressionLevel,
46 #[serde(flatten)]
48 pub mode: PcoModeSpec,
49 #[serde(flatten)]
51 pub delta: PcoDeltaSpec,
52 #[serde(flatten)]
54 pub paging: PcoPagingSpec,
55 #[serde(default, rename = "_version")]
57 pub version: PcodecVersion,
58}
59
60#[derive(
61 Copy, Clone, Debug, Default, PartialEq, Eq, Serialize_repr, Deserialize_repr, JsonSchema_repr,
62)]
63#[repr(u8)]
64#[expect(missing_docs)]
71pub enum PcoCompressionLevel {
72 Level0 = 0,
73 Level1 = 1,
74 Level2 = 2,
75 Level3 = 3,
76 Level4 = 4,
77 Level5 = 5,
78 Level6 = 6,
79 Level7 = 7,
80 #[default]
81 Level8 = 8,
82 Level9 = 9,
83 Level10 = 10,
84 Level11 = 11,
85 Level12 = 12,
86}
87
88#[derive(Copy, Clone, Debug, Default, PartialEq, Serialize, Deserialize, JsonSchema)]
89#[schemars(deny_unknown_fields)] #[serde(tag = "mode", rename_all = "kebab-case")]
91pub enum PcoModeSpec {
93 #[default]
94 Auto,
99 Classic,
101 TryFloatMult {
105 float_mult_base: f64,
107 },
108 TryFloatQuant {
113 float_quant_bits: u32,
115 },
116 TryIntMult {
120 int_mult_base: u64,
122 },
123}
124
125#[derive(Copy, Clone, Debug, Default, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
126#[schemars(deny_unknown_fields)] #[serde(tag = "delta", rename_all = "kebab-case")]
128pub enum PcoDeltaSpec {
130 #[default]
131 Auto,
136 None,
141 TryConsecutive {
147 delta_encoding_order: PcoDeltaEncodingOrder,
149 },
150 TryLookback,
156}
157
158#[derive(Copy, Clone, Debug, PartialEq, Eq, Serialize_repr, Deserialize_repr, JsonSchema_repr)]
159#[repr(u8)]
160#[expect(missing_docs)]
164pub enum PcoDeltaEncodingOrder {
165 Order0 = 0,
166 Order1 = 1,
167 Order2 = 2,
168 Order3 = 3,
169 Order4 = 4,
170 Order5 = 5,
171 Order6 = 6,
172 Order7 = 7,
173}
174
175#[derive(Copy, Clone, Debug, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
176#[schemars(deny_unknown_fields)] #[serde(tag = "paging", rename_all = "kebab-case")]
178pub enum PcoPagingSpec {
180 EqualPagesUpTo {
185 #[serde(default = "default_equal_pages_up_to")]
186 equal_pages_up_to: NonZeroUsize,
188 },
189}
190
191impl Default for PcoPagingSpec {
192 fn default() -> Self {
193 Self::EqualPagesUpTo {
194 equal_pages_up_to: default_equal_pages_up_to(),
195 }
196 }
197}
198
199const fn default_equal_pages_up_to() -> NonZeroUsize {
200 NonZeroUsize::MIN.saturating_add(pco::DEFAULT_MAX_PAGE_N.saturating_sub(1))
201}
202
203impl Codec for Pcodec {
204 type Error = PcodecError;
205
206 fn encode(&self, data: AnyCowArray) -> Result<AnyArray, Self::Error> {
207 match data {
208 AnyCowArray::U16(data) => Ok(AnyArray::U8(
209 Array1::from(compress(
210 data,
211 self.level,
212 self.mode,
213 self.delta,
214 self.paging,
215 )?)
216 .into_dyn(),
217 )),
218 AnyCowArray::U32(data) => Ok(AnyArray::U8(
219 Array1::from(compress(
220 data,
221 self.level,
222 self.mode,
223 self.delta,
224 self.paging,
225 )?)
226 .into_dyn(),
227 )),
228 AnyCowArray::U64(data) => Ok(AnyArray::U8(
229 Array1::from(compress(
230 data,
231 self.level,
232 self.mode,
233 self.delta,
234 self.paging,
235 )?)
236 .into_dyn(),
237 )),
238 AnyCowArray::I16(data) => Ok(AnyArray::U8(
239 Array1::from(compress(
240 data,
241 self.level,
242 self.mode,
243 self.delta,
244 self.paging,
245 )?)
246 .into_dyn(),
247 )),
248 AnyCowArray::I32(data) => Ok(AnyArray::U8(
249 Array1::from(compress(
250 data,
251 self.level,
252 self.mode,
253 self.delta,
254 self.paging,
255 )?)
256 .into_dyn(),
257 )),
258 AnyCowArray::I64(data) => Ok(AnyArray::U8(
259 Array1::from(compress(
260 data,
261 self.level,
262 self.mode,
263 self.delta,
264 self.paging,
265 )?)
266 .into_dyn(),
267 )),
268 AnyCowArray::F32(data) => Ok(AnyArray::U8(
269 Array1::from(compress(
270 data,
271 self.level,
272 self.mode,
273 self.delta,
274 self.paging,
275 )?)
276 .into_dyn(),
277 )),
278 AnyCowArray::F64(data) => Ok(AnyArray::U8(
279 Array1::from(compress(
280 data,
281 self.level,
282 self.mode,
283 self.delta,
284 self.paging,
285 )?)
286 .into_dyn(),
287 )),
288 encoded => Err(PcodecError::UnsupportedDtype(encoded.dtype())),
289 }
290 }
291
292 fn decode(&self, encoded: AnyCowArray) -> Result<AnyArray, Self::Error> {
293 let AnyCowArray::U8(encoded) = encoded else {
294 return Err(PcodecError::EncodedDataNotBytes {
295 dtype: encoded.dtype(),
296 });
297 };
298
299 if !matches!(encoded.shape(), [_]) {
300 return Err(PcodecError::EncodedDataNotOneDimensional {
301 shape: encoded.shape().to_vec(),
302 });
303 }
304
305 decompress(&AnyCowArray::U8(encoded).as_bytes())
306 }
307
308 fn decode_into(
309 &self,
310 encoded: AnyArrayView,
311 decoded: AnyArrayViewMut,
312 ) -> Result<(), Self::Error> {
313 let AnyArrayView::U8(encoded) = encoded else {
314 return Err(PcodecError::EncodedDataNotBytes {
315 dtype: encoded.dtype(),
316 });
317 };
318
319 if !matches!(encoded.shape(), [_]) {
320 return Err(PcodecError::EncodedDataNotOneDimensional {
321 shape: encoded.shape().to_vec(),
322 });
323 }
324
325 let encoded = AnyArrayView::U8(encoded);
326 let encoded = encoded.as_bytes();
327
328 match decoded {
329 AnyArrayViewMut::U16(decoded) => decompress_into(&encoded, decoded),
330 AnyArrayViewMut::U32(decoded) => decompress_into(&encoded, decoded),
331 AnyArrayViewMut::U64(decoded) => decompress_into(&encoded, decoded),
332 AnyArrayViewMut::I16(decoded) => decompress_into(&encoded, decoded),
333 AnyArrayViewMut::I32(decoded) => decompress_into(&encoded, decoded),
334 AnyArrayViewMut::I64(decoded) => decompress_into(&encoded, decoded),
335 AnyArrayViewMut::F32(decoded) => decompress_into(&encoded, decoded),
336 AnyArrayViewMut::F64(decoded) => decompress_into(&encoded, decoded),
337 decoded => Err(PcodecError::UnsupportedDtype(decoded.dtype())),
338 }
339 }
340}
341
342impl StaticCodec for Pcodec {
343 const CODEC_ID: &'static str = "pco.rs";
344
345 type Config<'de> = Self;
346
347 fn from_config(config: Self::Config<'_>) -> Self {
348 config
349 }
350
351 fn get_config(&self) -> StaticCodecConfig<Self> {
352 StaticCodecConfig::from(self)
353 }
354}
355
356#[derive(Debug, Error)]
357pub enum PcodecError {
359 #[error("Pco does not support the dtype {0}")]
361 UnsupportedDtype(AnyArrayDType),
362 #[error("Pco failed to encode the header")]
364 HeaderEncodeFailed {
365 source: PcoHeaderError,
367 },
368 #[error("Pco failed to encode the data")]
370 PcoEncodeFailed {
371 source: PcoCodingError,
373 },
374 #[error(
377 "Pco can only decode one-dimensional byte arrays but received an array of dtype {dtype}"
378 )]
379 EncodedDataNotBytes {
380 dtype: AnyArrayDType,
382 },
383 #[error(
386 "Pco can only decode one-dimensional byte arrays but received a byte array of shape {shape:?}"
387 )]
388 EncodedDataNotOneDimensional {
389 shape: Vec<usize>,
391 },
392 #[error("Pco failed to decode the header")]
394 HeaderDecodeFailed {
395 source: PcoHeaderError,
397 },
398 #[error("Pco failed to decode the data")]
400 PcoDecodeFailed {
401 source: PcoCodingError,
403 },
404 #[error("Pco decoded an invalid array shape header which does not fit the decoded data")]
407 DecodeInvalidShapeHeader {
408 #[from]
410 source: ShapeError,
411 },
412 #[error("Pco cannot decode into the provided array")]
414 MismatchedDecodeIntoArray {
415 #[from]
417 source: AnyArrayAssignError,
418 },
419}
420
421#[derive(Debug, Error)]
422#[error(transparent)]
423pub struct PcoHeaderError(postcard::Error);
425
426#[derive(Debug, Error)]
427#[error(transparent)]
428pub struct PcoCodingError(pco::errors::PcoError);
430
431#[expect(clippy::needless_pass_by_value)]
432pub fn compress<T: PcoElement, S: Data<Elem = T>, D: Dimension>(
441 data: ArrayBase<S, D>,
442 level: PcoCompressionLevel,
443 mode: PcoModeSpec,
444 delta: PcoDeltaSpec,
445 paging: PcoPagingSpec,
446) -> Result<Vec<u8>, PcodecError> {
447 let mut encoded_bytes = postcard::to_extend(
448 &CompressionHeader {
449 dtype: <T as PcoElement>::DTYPE,
450 shape: Cow::Borrowed(data.shape()),
451 version: StaticCodecVersion,
452 },
453 Vec::new(),
454 )
455 .map_err(|err| PcodecError::HeaderEncodeFailed {
456 source: PcoHeaderError(err),
457 })?;
458
459 let data_owned;
460 #[expect(clippy::option_if_let_else)]
461 let data = if let Some(slice) = data.as_slice() {
462 slice
463 } else {
464 data_owned = data.into_iter().copied().collect::<Vec<T>>();
465 data_owned.as_slice()
466 };
467
468 let config = pco::ChunkConfig::default()
469 .with_compression_level(level as usize)
470 .with_mode_spec(match mode {
471 PcoModeSpec::Auto => pco::ModeSpec::Auto,
472 PcoModeSpec::Classic => pco::ModeSpec::Classic,
473 PcoModeSpec::TryFloatMult { float_mult_base } => {
474 pco::ModeSpec::TryFloatMult(float_mult_base)
475 }
476 PcoModeSpec::TryFloatQuant { float_quant_bits } => {
477 pco::ModeSpec::TryFloatQuant(float_quant_bits)
478 }
479 PcoModeSpec::TryIntMult { int_mult_base } => pco::ModeSpec::TryIntMult(int_mult_base),
480 })
481 .with_delta_spec(match delta {
482 PcoDeltaSpec::Auto => pco::DeltaSpec::Auto,
483 PcoDeltaSpec::None => pco::DeltaSpec::None,
484 PcoDeltaSpec::TryConsecutive {
485 delta_encoding_order,
486 } => pco::DeltaSpec::TryConsecutive(delta_encoding_order as usize),
487 PcoDeltaSpec::TryLookback => pco::DeltaSpec::TryLookback,
488 })
489 .with_paging_spec(match paging {
490 PcoPagingSpec::EqualPagesUpTo { equal_pages_up_to } => {
491 pco::PagingSpec::EqualPagesUpTo(equal_pages_up_to.get())
492 }
493 });
494
495 let encoded = pco::standalone::simple_compress(data, &config).map_err(|err| {
496 PcodecError::PcoEncodeFailed {
497 source: PcoCodingError(err),
498 }
499 })?;
500 encoded_bytes.extend_from_slice(&encoded);
501
502 Ok(encoded_bytes)
503}
504
505pub fn decompress(encoded: &[u8]) -> Result<AnyArray, PcodecError> {
513 let (header, data) =
514 postcard::take_from_bytes::<CompressionHeader>(encoded).map_err(|err| {
515 PcodecError::HeaderDecodeFailed {
516 source: PcoHeaderError(err),
517 }
518 })?;
519
520 let decoded = match header.dtype {
521 PcoDType::U16 => AnyArray::U16(Array::from_shape_vec(
522 &*header.shape,
523 pco::standalone::simple_decompress(data).map_err(|err| {
524 PcodecError::PcoDecodeFailed {
525 source: PcoCodingError(err),
526 }
527 })?,
528 )?),
529 PcoDType::U32 => AnyArray::U32(Array::from_shape_vec(
530 &*header.shape,
531 pco::standalone::simple_decompress(data).map_err(|err| {
532 PcodecError::PcoDecodeFailed {
533 source: PcoCodingError(err),
534 }
535 })?,
536 )?),
537 PcoDType::U64 => AnyArray::U64(Array::from_shape_vec(
538 &*header.shape,
539 pco::standalone::simple_decompress(data).map_err(|err| {
540 PcodecError::PcoDecodeFailed {
541 source: PcoCodingError(err),
542 }
543 })?,
544 )?),
545 PcoDType::I16 => AnyArray::I16(Array::from_shape_vec(
546 &*header.shape,
547 pco::standalone::simple_decompress(data).map_err(|err| {
548 PcodecError::PcoDecodeFailed {
549 source: PcoCodingError(err),
550 }
551 })?,
552 )?),
553 PcoDType::I32 => AnyArray::I32(Array::from_shape_vec(
554 &*header.shape,
555 pco::standalone::simple_decompress(data).map_err(|err| {
556 PcodecError::PcoDecodeFailed {
557 source: PcoCodingError(err),
558 }
559 })?,
560 )?),
561 PcoDType::I64 => AnyArray::I64(Array::from_shape_vec(
562 &*header.shape,
563 pco::standalone::simple_decompress(data).map_err(|err| {
564 PcodecError::PcoDecodeFailed {
565 source: PcoCodingError(err),
566 }
567 })?,
568 )?),
569 PcoDType::F32 => AnyArray::F32(Array::from_shape_vec(
570 &*header.shape,
571 pco::standalone::simple_decompress(data).map_err(|err| {
572 PcodecError::PcoDecodeFailed {
573 source: PcoCodingError(err),
574 }
575 })?,
576 )?),
577 PcoDType::F64 => AnyArray::F64(Array::from_shape_vec(
578 &*header.shape,
579 pco::standalone::simple_decompress(data).map_err(|err| {
580 PcodecError::PcoDecodeFailed {
581 source: PcoCodingError(err),
582 }
583 })?,
584 )?),
585 };
586
587 Ok(decoded)
588}
589
590pub fn decompress_into<T: PcoElement, D: Dimension>(
602 encoded: &[u8],
603 mut decoded: ArrayViewMut<T, D>,
604) -> Result<(), PcodecError> {
605 let (header, data) =
606 postcard::take_from_bytes::<CompressionHeader>(encoded).map_err(|err| {
607 PcodecError::HeaderDecodeFailed {
608 source: PcoHeaderError(err),
609 }
610 })?;
611
612 if T::DTYPE != header.dtype {
613 return Err(PcodecError::MismatchedDecodeIntoArray {
614 source: AnyArrayAssignError::DTypeMismatch {
615 src: header.dtype.into_dtype(),
616 dst: T::DTYPE.into_dtype(),
617 },
618 });
619 }
620
621 if decoded.shape() != &*header.shape {
622 return Err(PcodecError::MismatchedDecodeIntoArray {
623 source: AnyArrayAssignError::ShapeMismatch {
624 src: header.shape.into_owned(),
625 dst: decoded.shape().to_vec(),
626 },
627 });
628 }
629
630 if let Some(slice) = decoded.as_slice_mut() {
631 pco::standalone::simple_decompress_into(data, slice).map_err(|err| {
632 PcodecError::PcoDecodeFailed {
633 source: PcoCodingError(err),
634 }
635 })?;
636 return Ok(());
637 }
638
639 let dec =
640 pco::standalone::simple_decompress(data).map_err(|err| PcodecError::PcoDecodeFailed {
641 source: PcoCodingError(err),
642 })?;
643
644 if dec.len() != decoded.len() {
645 return Err(PcodecError::DecodeInvalidShapeHeader {
646 source: ShapeError::from_kind(ndarray::ErrorKind::IncompatibleShape),
647 });
648 }
649
650 decoded.iter_mut().zip(dec).for_each(|(o, d)| *o = d);
651
652 Ok(())
653}
654
655pub trait PcoElement: Copy + pco::data_types::Number {
657 const DTYPE: PcoDType;
659}
660
661impl PcoElement for u16 {
662 const DTYPE: PcoDType = PcoDType::U16;
663}
664
665impl PcoElement for u32 {
666 const DTYPE: PcoDType = PcoDType::U32;
667}
668
669impl PcoElement for u64 {
670 const DTYPE: PcoDType = PcoDType::U64;
671}
672
673impl PcoElement for i16 {
674 const DTYPE: PcoDType = PcoDType::I16;
675}
676
677impl PcoElement for i32 {
678 const DTYPE: PcoDType = PcoDType::I32;
679}
680
681impl PcoElement for i64 {
682 const DTYPE: PcoDType = PcoDType::I64;
683}
684
685impl PcoElement for f32 {
686 const DTYPE: PcoDType = PcoDType::F32;
687}
688
689impl PcoElement for f64 {
690 const DTYPE: PcoDType = PcoDType::F64;
691}
692
693#[derive(Serialize, Deserialize)]
694struct CompressionHeader<'a> {
695 dtype: PcoDType,
696 #[serde(borrow)]
697 shape: Cow<'a, [usize]>,
698 version: PcodecVersion,
699}
700
701#[derive(Copy, Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
703#[expect(missing_docs)]
704pub enum PcoDType {
705 #[serde(rename = "u16", alias = "uint16")]
706 U16,
707 #[serde(rename = "u32", alias = "uint32")]
708 U32,
709 #[serde(rename = "u64", alias = "uint64")]
710 U64,
711 #[serde(rename = "i16", alias = "int16")]
712 I16,
713 #[serde(rename = "i32", alias = "int32")]
714 I32,
715 #[serde(rename = "i64", alias = "int64")]
716 I64,
717 #[serde(rename = "f32", alias = "float32")]
718 F32,
719 #[serde(rename = "f64", alias = "float64")]
720 F64,
721}
722
723impl PcoDType {
724 #[must_use]
725 pub const fn into_dtype(self) -> AnyArrayDType {
727 match self {
728 Self::U16 => AnyArrayDType::U16,
729 Self::U32 => AnyArrayDType::U32,
730 Self::U64 => AnyArrayDType::U64,
731 Self::I16 => AnyArrayDType::I16,
732 Self::I32 => AnyArrayDType::I32,
733 Self::I64 => AnyArrayDType::I64,
734 Self::F32 => AnyArrayDType::F32,
735 Self::F64 => AnyArrayDType::F64,
736 }
737 }
738}
739
740impl fmt::Display for PcoDType {
741 fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
742 fmt.write_str(match self {
743 Self::U16 => "u16",
744 Self::U32 => "u32",
745 Self::U64 => "u64",
746 Self::I16 => "i16",
747 Self::I32 => "i32",
748 Self::I64 => "i64",
749 Self::F32 => "f32",
750 Self::F64 => "f64",
751 })
752 }
753}