numcodecs_zfp_classic/
lib.rs

1//! [![CI Status]][workflow] [![MSRV]][repo] [![Latest Version]][crates.io] [![Rust Doc Crate]][docs.rs] [![Rust Doc Main]][docs]
2//!
3//! [CI Status]: https://img.shields.io/github/actions/workflow/status/juntyr/numcodecs-rs/ci.yml?branch=main
4//! [workflow]: https://github.com/juntyr/numcodecs-rs/actions/workflows/ci.yml?query=branch%3Amain
5//!
6//! [MSRV]: https://img.shields.io/badge/MSRV-1.87.0-blue
7//! [repo]: https://github.com/juntyr/numcodecs-rs
8//!
9//! [Latest Version]: https://img.shields.io/crates/v/numcodecs-zfp-classic
10//! [crates.io]: https://crates.io/crates/numcodecs-zfp-classic
11//!
12//! [Rust Doc Crate]: https://img.shields.io/docsrs/numcodecs-zfp-classic
13//! [docs.rs]: https://docs.rs/numcodecs-zfp-classic/
14//!
15//! [Rust Doc Main]: https://img.shields.io/badge/docs-main-blue
16//! [docs]: https://juntyr.github.io/numcodecs-rs/numcodecs_zfp_classic
17//!
18//! ZFP (classic) codec implementation for the [`numcodecs`] API.
19//!
20//! This implementation uses ZFP's default
21//! [`ZFP_ROUNDING_MODE=ZFP_ROUND_NEVER`](https://zfp.readthedocs.io/en/release1.0.1/installation.html#c.ZFP_ROUNDING_MODE)
22//! rounding mode, which is known to increase bias and correlation in ZFP's
23//! errors
24//! (see <https://zfp.readthedocs.io/en/release1.0.1/faq.html#zfp-rounding>).
25//!
26//! Please see the `numcodecs-zfp` codec for an implementation that uses an
27//! improved version of ZFP.
28
29#![allow(clippy::multiple_crate_versions)] // embedded-io
30
31use std::{borrow::Cow, fmt};
32
33use ndarray::{Array, Array1, ArrayView, Dimension, Zip};
34use numcodecs::{
35    AnyArray, AnyArrayAssignError, AnyArrayDType, AnyArrayView, AnyArrayViewMut, AnyCowArray,
36    Codec, StaticCodec, StaticCodecConfig, StaticCodecVersion,
37};
38use schemars::JsonSchema;
39use serde::{Deserialize, Serialize};
40use thiserror::Error;
41
42#[cfg(test)]
43use ::serde_json as _;
44
45mod ffi;
46
47type ZfpClassicCodecVersion = StaticCodecVersion<0, 2, 0>;
48
49#[derive(Clone, Serialize, Deserialize, JsonSchema)]
50// serde cannot deny unknown fields because of the flatten
51#[schemars(deny_unknown_fields)]
52/// Codec providing compression using ZFP (classic)
53pub struct ZfpClassicCodec {
54    /// ZFP compression mode
55    #[serde(flatten)]
56    pub mode: ZfpCompressionMode,
57    /// ZFP non-finite values mode
58    #[serde(default)]
59    pub non_finite: ZfpNonFiniteValuesMode,
60    /// The codec's encoding format version. Do not provide this parameter explicitly.
61    #[serde(default, rename = "_version")]
62    pub version: ZfpClassicCodecVersion,
63}
64
65#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)]
66#[serde(tag = "mode")]
67#[serde(deny_unknown_fields)]
68/// ZFP compression mode
69pub enum ZfpCompressionMode {
70    #[serde(rename = "expert")]
71    /// The most general mode, which can describe all four other modes
72    Expert {
73        /// Minimum number of compressed bits used to represent a block
74        min_bits: u32,
75        /// Maximum number of bits used to represent a block
76        max_bits: u32,
77        /// Maximum number of bit planes encoded
78        max_prec: u32,
79        /// Smallest absolute bit plane number encoded.
80        ///
81        /// This parameter applies to floating-point data only and is ignored
82        /// for integer data.
83        min_exp: i32,
84    },
85    /// In fixed-rate mode, each d-dimensional compressed block of `$4^d$`
86    /// values is stored using a fixed number of bits. This number of
87    /// compressed bits per block is amortized over the `$4^d$` values to give
88    /// a rate of `$rate = \frac{maxbits}{4^d}$` in bits per value.
89    #[serde(rename = "fixed-rate")]
90    FixedRate {
91        /// Rate in bits per value
92        rate: f64,
93    },
94    /// In fixed-precision mode, the number of bits used to encode a block may
95    /// vary, but the number of bit planes (the precision) encoded for the
96    /// transform coefficients is fixed.
97    #[serde(rename = "fixed-precision")]
98    FixedPrecision {
99        /// Number of bit planes encoded
100        precision: u32,
101    },
102    /// In fixed-accuracy mode, all transform coefficient bit planes up to a
103    /// minimum bit plane number are encoded. The smallest absolute bit plane
104    /// number is chosen such that
105    /// `$minexp = \text{floor}(\log_{2}(tolerance))$`.
106    #[serde(rename = "fixed-accuracy")]
107    FixedAccuracy {
108        /// Absolute error tolerance
109        tolerance: f64,
110    },
111    /// Lossless per-block compression that preserves integer and floating point
112    /// bit patterns.
113    #[serde(rename = "reversible")]
114    Reversible,
115}
116
117#[derive(Copy, Clone, Debug, Default, Serialize, Deserialize, JsonSchema)]
118/// ZFP non-finite values mode
119pub enum ZfpNonFiniteValuesMode {
120    /// Deny compressing non-finite values, i.e. return an error.
121    #[default]
122    #[serde(rename = "deny")]
123    Deny,
124    /// Unsafely allow compressing non-finite values, even though undefined
125    /// behaviour may be triggered, see
126    /// <https://zfp.readthedocs.io/en/release1.0.1/faq.html#q-valid>.
127    #[serde(rename = "allow-unsafe")]
128    AllowUnsafe,
129}
130
131impl Codec for ZfpClassicCodec {
132    type Error = ZfpClassicCodecError;
133
134    fn encode(&self, data: AnyCowArray) -> Result<AnyArray, Self::Error> {
135        if matches!(data.dtype(), AnyArrayDType::I32 | AnyArrayDType::I64)
136            && matches!(
137                self.mode,
138                ZfpCompressionMode::FixedAccuracy { tolerance: _ }
139            )
140        {
141            return Err(ZfpClassicCodecError::FixedAccuracyModeIntegerData);
142        }
143
144        match data {
145            AnyCowArray::I32(data) => Ok(AnyArray::U8(
146                Array1::from(compress(data.view(), &self.mode, self.non_finite)?).into_dyn(),
147            )),
148            AnyCowArray::I64(data) => Ok(AnyArray::U8(
149                Array1::from(compress(data.view(), &self.mode, self.non_finite)?).into_dyn(),
150            )),
151            AnyCowArray::F32(data) => Ok(AnyArray::U8(
152                Array1::from(compress(data.view(), &self.mode, self.non_finite)?).into_dyn(),
153            )),
154            AnyCowArray::F64(data) => Ok(AnyArray::U8(
155                Array1::from(compress(data.view(), &self.mode, self.non_finite)?).into_dyn(),
156            )),
157            encoded => Err(ZfpClassicCodecError::UnsupportedDtype(encoded.dtype())),
158        }
159    }
160
161    fn decode(&self, encoded: AnyCowArray) -> Result<AnyArray, Self::Error> {
162        let AnyCowArray::U8(encoded) = encoded else {
163            return Err(ZfpClassicCodecError::EncodedDataNotBytes {
164                dtype: encoded.dtype(),
165            });
166        };
167
168        if !matches!(encoded.shape(), [_]) {
169            return Err(ZfpClassicCodecError::EncodedDataNotOneDimensional {
170                shape: encoded.shape().to_vec(),
171            });
172        }
173
174        decompress(&AnyCowArray::U8(encoded).as_bytes())
175    }
176
177    fn decode_into(
178        &self,
179        encoded: AnyArrayView,
180        decoded: AnyArrayViewMut,
181    ) -> Result<(), Self::Error> {
182        let AnyArrayView::U8(encoded) = encoded else {
183            return Err(ZfpClassicCodecError::EncodedDataNotBytes {
184                dtype: encoded.dtype(),
185            });
186        };
187
188        if !matches!(encoded.shape(), [_]) {
189            return Err(ZfpClassicCodecError::EncodedDataNotOneDimensional {
190                shape: encoded.shape().to_vec(),
191            });
192        }
193
194        decompress_into(&AnyArrayView::U8(encoded).as_bytes(), decoded)
195    }
196}
197
198impl StaticCodec for ZfpClassicCodec {
199    const CODEC_ID: &'static str = "zfp-classic.rs";
200
201    type Config<'de> = Self;
202
203    fn from_config(config: Self::Config<'_>) -> Self {
204        config
205    }
206
207    fn get_config(&self) -> StaticCodecConfig<'_, Self> {
208        StaticCodecConfig::from(self)
209    }
210}
211
212#[derive(Debug, Error)]
213/// Errors that may occur when applying the [`ZfpClassicCodec`].
214pub enum ZfpClassicCodecError {
215    /// [`ZfpClassicCodec`] does not support the dtype
216    #[error("ZfpClassic does not support the dtype {0}")]
217    UnsupportedDtype(AnyArrayDType),
218    /// [`ZfpClassicCodec`] does not support the fixed accuracy mode for
219    /// integer data
220    #[error("ZfpClassic does not support the fixed accuracy mode for integer data")]
221    FixedAccuracyModeIntegerData,
222    /// [`ZfpClassicCodec`] only supports 1-4 dimensional data
223    #[error("ZfpClassic only supports 1-4 dimensional data but found shape {shape:?}")]
224    ExcessiveDimensionality {
225        /// The unexpected shape of the data
226        shape: Vec<usize>,
227    },
228    /// [`ZfpClassicCodec`] was configured with an invalid expert `mode`
229    #[error("ZfpClassic was configured with an invalid expert mode {mode:?}")]
230    InvalidExpertMode {
231        /// The unexpected compression mode
232        mode: ZfpCompressionMode,
233    },
234    /// [`ZfpCodec`] does not support non-finite (infinite or NaN) floating
235    /// point data  in non-reversible lossy compression
236    #[error(
237        "Zfp does not support non-finite (infinite or NaN) floating point data in non-reversible lossy compression"
238    )]
239    NonFiniteData,
240    /// [`ZfpClassicCodec`] failed to encode the header
241    #[error("ZfpClassic failed to encode the header")]
242    HeaderEncodeFailed,
243    /// [`ZfpClassicCodec`] failed to encode the array metadata header
244    #[error("ZfpClassic failed to encode the array metadata header")]
245    MetaHeaderEncodeFailed {
246        /// Opaque source error
247        source: ZfpHeaderError,
248    },
249    /// [`ZfpClassicCodec`] failed to encode the data
250    #[error("ZfpClassic failed to encode the data")]
251    ZfpEncodeFailed,
252    /// [`ZfpClassicCodec`] can only decode one-dimensional byte arrays but
253    /// received an array of a different dtype
254    #[error(
255        "ZfpClassic can only decode one-dimensional byte arrays but received an array of dtype {dtype}"
256    )]
257    EncodedDataNotBytes {
258        /// The unexpected dtype of the encoded array
259        dtype: AnyArrayDType,
260    },
261    /// [`ZfpClassicCodec`] can only decode one-dimensional byte arrays but
262    /// received an array of a different shape
263    #[error(
264        "ZfpClassic can only decode one-dimensional byte arrays but received a byte array of shape {shape:?}"
265    )]
266    EncodedDataNotOneDimensional {
267        /// The unexpected shape of the encoded array
268        shape: Vec<usize>,
269    },
270    /// [`ZfpClassicCodec`] failed to decode the header
271    #[error("ZfpClassic failed to decode the header")]
272    HeaderDecodeFailed,
273    /// [`ZfpClassicCodec`] failed to decode the array metadata header
274    #[error("ZfpClassic failed to decode the array metadata header")]
275    MetaHeaderDecodeFailed {
276        /// Opaque source error
277        source: ZfpHeaderError,
278    },
279    /// [`ZfpClassicCodec`] cannot decode into the provided array
280    #[error("ZfpClassicCodec cannot decode into the provided array")]
281    MismatchedDecodeIntoArray {
282        /// The source of the error
283        #[from]
284        source: AnyArrayAssignError,
285    },
286    /// [`ZfpClassicCodec`] failed to decode the data
287    #[error("ZfpClassic failed to decode the data")]
288    ZfpDecodeFailed,
289}
290
291#[derive(Debug, Error)]
292#[error(transparent)]
293/// Opaque error for when encoding or decoding the header fails
294pub struct ZfpHeaderError(postcard::Error);
295
296/// Compress the `data` array using ZFP with the provided `mode`.
297///
298/// # Errors
299///
300/// Errors with
301/// - [`ZfpCodecError::NonFiniteData`] if any data element is non-finite
302///   (infinite or NaN) and a non-reversible lossy compression `mode` is used
303///   and the `non_finite` mode is not [`ZfpNonFiniteValuesMode::AllowUnsafe`]
304/// - [`ZfpClassicCodecError::ExcessiveDimensionality`] if data is more than
305///   4-dimensional
306/// - [`ZfpClassicCodecError::InvalidExpertMode`] if the `mode` has invalid
307///   expert mode parameters
308/// - [`ZfpClassicCodecError::HeaderEncodeFailed`] if encoding the ZFP header
309///   failed
310/// - [`ZfpClassicCodecError::MetaHeaderEncodeFailed`] if encoding the array
311///   metadata header failed
312/// - [`ZfpClassicCodecError::ZfpEncodeFailed`] if an opaque encoding error
313///   occurred
314pub fn compress<T: ffi::ZfpCompressible, D: Dimension>(
315    data: ArrayView<T, D>,
316    mode: &ZfpCompressionMode,
317    non_finite: ZfpNonFiniteValuesMode,
318) -> Result<Vec<u8>, ZfpClassicCodecError> {
319    if !matches!(mode, ZfpCompressionMode::Reversible)
320        && !matches!(non_finite, ZfpNonFiniteValuesMode::AllowUnsafe)
321        && !Zip::from(&data).all(|x| x.is_finite())
322    {
323        return Err(ZfpClassicCodecError::NonFiniteData);
324    }
325
326    let mut encoded = postcard::to_extend(
327        &CompressionHeader {
328            dtype: <T as ffi::ZfpCompressible>::D_TYPE,
329            shape: Cow::Borrowed(data.shape()),
330            version: StaticCodecVersion,
331        },
332        Vec::new(),
333    )
334    .map_err(|err| ZfpClassicCodecError::MetaHeaderEncodeFailed {
335        source: ZfpHeaderError(err),
336    })?;
337
338    // ZFP cannot handle zero-length dimensions
339    if data.is_empty() {
340        return Ok(encoded);
341    }
342
343    // Setup zfp structs to begin compression
344    // Squeeze the data to avoid wasting ZFP dimensions on axes of length 1
345    let field = ffi::ZfpField::new(data.into_dyn().squeeze())?;
346    let stream = ffi::ZfpCompressionStream::new(&field, mode)?;
347
348    // Allocate space based on the maximum size potentially required by zfp to
349    //  store the compressed array
350    let stream = stream.with_bitstream(field, &mut encoded);
351
352    // Write the header so we can reconstruct ZFP's mode on decompression
353    let stream = stream.write_header()?;
354
355    // Compress the field into the allocated output array
356    stream.compress()?;
357
358    Ok(encoded)
359}
360
361/// Decompress the `encoded` data into an array using ZFP.
362///
363/// # Errors
364///
365/// Errors with
366/// - [`ZfpClassicCodecError::HeaderDecodeFailed`] if decoding the ZFP header
367///   failed
368/// - [`ZfpClassicCodecError::MetaHeaderDecodeFailed`] if decoding the array
369///   metadata header failed
370/// - [`ZfpClassicCodecError::ZfpDecodeFailed`] if an opaque decoding error
371///   occurred
372pub fn decompress(encoded: &[u8]) -> Result<AnyArray, ZfpClassicCodecError> {
373    let (header, encoded) =
374        postcard::take_from_bytes::<CompressionHeader>(encoded).map_err(|err| {
375            ZfpClassicCodecError::MetaHeaderDecodeFailed {
376                source: ZfpHeaderError(err),
377            }
378        })?;
379
380    // Return empty data for zero-size arrays
381    if header.shape.iter().copied().product::<usize>() == 0 {
382        let decoded = match header.dtype {
383            ZfpDType::I32 => AnyArray::I32(Array::zeros(&*header.shape)),
384            ZfpDType::I64 => AnyArray::I64(Array::zeros(&*header.shape)),
385            ZfpDType::F32 => AnyArray::F32(Array::zeros(&*header.shape)),
386            ZfpDType::F64 => AnyArray::F64(Array::zeros(&*header.shape)),
387        };
388        return Ok(decoded);
389    }
390
391    // Setup zfp structs to begin decompression
392    let stream = ffi::ZfpDecompressionStream::new(encoded);
393
394    // Read the header to reconstruct ZFP's mode
395    let stream = stream.read_header()?;
396
397    // Decompress the field into a newly allocated output array
398    match header.dtype {
399        ZfpDType::I32 => {
400            let mut decompressed = Array::zeros(&*header.shape);
401            stream.decompress_into(decompressed.view_mut().squeeze())?;
402            Ok(AnyArray::I32(decompressed))
403        }
404        ZfpDType::I64 => {
405            let mut decompressed = Array::zeros(&*header.shape);
406            stream.decompress_into(decompressed.view_mut().squeeze())?;
407            Ok(AnyArray::I64(decompressed))
408        }
409        ZfpDType::F32 => {
410            let mut decompressed = Array::zeros(&*header.shape);
411            stream.decompress_into(decompressed.view_mut().squeeze())?;
412            Ok(AnyArray::F32(decompressed))
413        }
414        ZfpDType::F64 => {
415            let mut decompressed = Array::zeros(&*header.shape);
416            stream.decompress_into(decompressed.view_mut().squeeze())?;
417            Ok(AnyArray::F64(decompressed))
418        }
419    }
420}
421
422/// Decompress the `encoded` data into a `decoded` array using ZFP.
423///
424/// # Errors
425///
426/// Errors with
427/// - [`ZfpClassicCodecError::HeaderDecodeFailed`] if decoding the ZFP header
428///   failed
429/// - [`ZfpClassicCodecError::MetaHeaderDecodeFailed`] if decoding the array
430///   metadata header failed
431/// - [`ZfpClassicCodecError::MismatchedDecodeIntoArray`] if the `decoded`
432///   array is of the wrong dtype or shape
433/// - [`ZfpClassicCodecError::ZfpDecodeFailed`] if an opaque decoding error
434///   occurred
435pub fn decompress_into(
436    encoded: &[u8],
437    decoded: AnyArrayViewMut,
438) -> Result<(), ZfpClassicCodecError> {
439    let (header, encoded) =
440        postcard::take_from_bytes::<CompressionHeader>(encoded).map_err(|err| {
441            ZfpClassicCodecError::MetaHeaderDecodeFailed {
442                source: ZfpHeaderError(err),
443            }
444        })?;
445
446    if decoded.shape() != &*header.shape {
447        return Err(ZfpClassicCodecError::MismatchedDecodeIntoArray {
448            source: AnyArrayAssignError::ShapeMismatch {
449                src: header.shape.into_owned(),
450                dst: decoded.shape().to_vec(),
451            },
452        });
453    }
454
455    // Empty data doesn't need to be initialized
456    if decoded.is_empty() {
457        return Ok(());
458    }
459
460    // Setup zfp structs to begin decompression
461    let stream = ffi::ZfpDecompressionStream::new(encoded);
462
463    // Read the header to reconstruct ZFP's mode
464    let stream = stream.read_header()?;
465
466    // Decompress the field into the output array
467    match (decoded, header.dtype) {
468        (AnyArrayViewMut::I32(decoded), ZfpDType::I32) => stream.decompress_into(decoded.squeeze()),
469        (AnyArrayViewMut::I64(decoded), ZfpDType::I64) => stream.decompress_into(decoded.squeeze()),
470        (AnyArrayViewMut::F32(decoded), ZfpDType::F32) => stream.decompress_into(decoded.squeeze()),
471        (AnyArrayViewMut::F64(decoded), ZfpDType::F64) => stream.decompress_into(decoded.squeeze()),
472        (decoded, dtype) => Err(ZfpClassicCodecError::MismatchedDecodeIntoArray {
473            source: AnyArrayAssignError::DTypeMismatch {
474                src: dtype.into_dtype(),
475                dst: decoded.dtype(),
476            },
477        }),
478    }
479}
480
481#[derive(Serialize, Deserialize)]
482struct CompressionHeader<'a> {
483    dtype: ZfpDType,
484    #[serde(borrow)]
485    shape: Cow<'a, [usize]>,
486    version: ZfpClassicCodecVersion,
487}
488
489/// Dtypes that Zfp can compress and decompress
490#[derive(Copy, Clone, Debug, Serialize, Deserialize)]
491#[expect(missing_docs)]
492pub enum ZfpDType {
493    #[serde(rename = "i32", alias = "int32")]
494    I32,
495    #[serde(rename = "i64", alias = "int64")]
496    I64,
497    #[serde(rename = "f32", alias = "float32")]
498    F32,
499    #[serde(rename = "f64", alias = "float64")]
500    F64,
501}
502
503impl ZfpDType {
504    /// Get the corresponding [`AnyArrayDType`]
505    #[must_use]
506    pub const fn into_dtype(self) -> AnyArrayDType {
507        match self {
508            Self::I32 => AnyArrayDType::I32,
509            Self::I64 => AnyArrayDType::I64,
510            Self::F32 => AnyArrayDType::F32,
511            Self::F64 => AnyArrayDType::F64,
512        }
513    }
514}
515
516impl fmt::Display for ZfpDType {
517    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
518        fmt.write_str(match self {
519            Self::I32 => "i32",
520            Self::I64 => "i64",
521            Self::F32 => "f32",
522            Self::F64 => "f64",
523        })
524    }
525}
526
527#[cfg(test)]
528#[allow(clippy::unwrap_used)]
529mod tests {
530    use ndarray::ArrayView1;
531
532    use super::*;
533
534    #[test]
535    fn zero_length() {
536        let encoded = compress(
537            Array::<f32, _>::from_shape_vec([1, 27, 0].as_slice(), vec![])
538                .unwrap()
539                .view(),
540            &ZfpCompressionMode::FixedPrecision { precision: 7 },
541            ZfpNonFiniteValuesMode::Deny,
542        )
543        .unwrap();
544        let decoded = decompress(&encoded).unwrap();
545
546        assert_eq!(decoded.dtype(), AnyArrayDType::F32);
547        assert!(decoded.is_empty());
548        assert_eq!(decoded.shape(), &[1, 27, 0]);
549    }
550
551    #[test]
552    fn one_dimension() {
553        let data = Array::from_shape_vec(
554            [2_usize, 1, 2, 1, 1, 1].as_slice(),
555            vec![1.0, 2.0, 3.0, 4.0],
556        )
557        .unwrap();
558
559        let encoded = compress(
560            data.view(),
561            &ZfpCompressionMode::FixedAccuracy { tolerance: 0.1 },
562            ZfpNonFiniteValuesMode::Deny,
563        )
564        .unwrap();
565        let decoded = decompress(&encoded).unwrap();
566
567        assert_eq!(decoded, AnyArray::F32(data));
568    }
569
570    #[test]
571    fn small_state() {
572        for data in [
573            &[][..],
574            &[0.0],
575            &[0.0, 1.0],
576            &[0.0, 1.0, 0.0],
577            &[0.0, 1.0, 0.0, 1.0],
578        ] {
579            let encoded = compress(
580                ArrayView1::from(data),
581                &ZfpCompressionMode::FixedAccuracy { tolerance: 0.1 },
582                ZfpNonFiniteValuesMode::Deny,
583            )
584            .unwrap();
585            let decoded = decompress(&encoded).unwrap();
586
587            assert_eq!(
588                decoded,
589                AnyArray::F64(Array1::from_vec(data.to_vec()).into_dyn())
590            );
591        }
592    }
593}