numcodecs_zfp_classic/
lib.rs

1//! [![CI Status]][workflow] [![MSRV]][repo] [![Latest Version]][crates.io] [![Rust Doc Crate]][docs.rs] [![Rust Doc Main]][docs]
2//!
3//! [CI Status]: https://img.shields.io/github/actions/workflow/status/juntyr/numcodecs-rs/ci.yml?branch=main
4//! [workflow]: https://github.com/juntyr/numcodecs-rs/actions/workflows/ci.yml?query=branch%3Amain
5//!
6//! [MSRV]: https://img.shields.io/badge/MSRV-1.85.0-blue
7//! [repo]: https://github.com/juntyr/numcodecs-rs
8//!
9//! [Latest Version]: https://img.shields.io/crates/v/numcodecs-zfp-classic
10//! [crates.io]: https://crates.io/crates/numcodecs-zfp-classic
11//!
12//! [Rust Doc Crate]: https://img.shields.io/docsrs/numcodecs-zfp-classic
13//! [docs.rs]: https://docs.rs/numcodecs-zfp-classic/
14//!
15//! [Rust Doc Main]: https://img.shields.io/badge/docs-main-blue
16//! [docs]: https://juntyr.github.io/numcodecs-rs/numcodecs_zfp_classic
17//!
18//! ZFP (classic) codec implementation for the [`numcodecs`] API.
19//!
20//! This implementation uses ZFP's default
21//! [`ZFP_ROUNDING_MODE=ZFP_ROUND_NEVER`](https://zfp.readthedocs.io/en/release1.0.1/installation.html#c.ZFP_ROUNDING_MODE)
22//! rounding mode, which is known to increase bias and correlation in ZFP's
23//! errors
24//! (see <https://zfp.readthedocs.io/en/release1.0.1/faq.html#zfp-rounding>).
25//!
26//! Please see the `numcodecs-zfp` codec for an implementation that uses an
27//! improved version of ZFP.
28
29#![allow(clippy::multiple_crate_versions)] // embedded-io
30
31use std::{borrow::Cow, fmt};
32
33use ndarray::{Array, Array1, ArrayView, Dimension};
34use numcodecs::{
35    AnyArray, AnyArrayAssignError, AnyArrayDType, AnyArrayView, AnyArrayViewMut, AnyCowArray,
36    Codec, StaticCodec, StaticCodecConfig, StaticCodecVersion,
37};
38use schemars::JsonSchema;
39use serde::{Deserialize, Serialize};
40use thiserror::Error;
41
42#[cfg(test)]
43use ::serde_json as _;
44
45mod ffi;
46
47type ZfpClassicCodecVersion = StaticCodecVersion<0, 1, 0>;
48
49#[derive(Clone, Serialize, Deserialize, JsonSchema)]
50// serde cannot deny unknown fields because of the flatten
51#[schemars(deny_unknown_fields)]
52/// Codec providing compression using ZFP (classic)
53pub struct ZfpClassicCodec {
54    /// ZFP compression mode
55    #[serde(flatten)]
56    pub mode: ZfpCompressionMode,
57    /// The codec's encoding format version. Do not provide this parameter explicitly.
58    #[serde(default, rename = "_version")]
59    pub version: ZfpClassicCodecVersion,
60}
61
62#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)]
63#[serde(tag = "mode")]
64#[serde(deny_unknown_fields)]
65/// ZFP compression mode
66pub enum ZfpCompressionMode {
67    #[serde(rename = "expert")]
68    /// The most general mode, which can describe all four other modes
69    Expert {
70        /// Minimum number of compressed bits used to represent a block
71        min_bits: u32,
72        /// Maximum number of bits used to represent a block
73        max_bits: u32,
74        /// Maximum number of bit planes encoded
75        max_prec: u32,
76        /// Smallest absolute bit plane number encoded.
77        ///
78        /// This parameter applies to floating-point data only and is ignored
79        /// for integer data.
80        min_exp: i32,
81    },
82    /// In fixed-rate mode, each d-dimensional compressed block of `$4^d$`
83    /// values is stored using a fixed number of bits. This number of
84    /// compressed bits per block is amortized over the `$4^d$` values to give
85    /// a rate of `$rate = \frac{maxbits}{4^d}$` in bits per value.
86    #[serde(rename = "fixed-rate")]
87    FixedRate {
88        /// Rate in bits per value
89        rate: f64,
90    },
91    /// In fixed-precision mode, the number of bits used to encode a block may
92    /// vary, but the number of bit planes (the precision) encoded for the
93    /// transform coefficients is fixed.
94    #[serde(rename = "fixed-precision")]
95    FixedPrecision {
96        /// Number of bit planes encoded
97        precision: u32,
98    },
99    /// In fixed-accuracy mode, all transform coefficient bit planes up to a
100    /// minimum bit plane number are encoded. The smallest absolute bit plane
101    /// number is chosen such that
102    /// `$minexp = \text{floor}(\log_{2}(tolerance))$`.
103    #[serde(rename = "fixed-accuracy")]
104    FixedAccuracy {
105        /// Absolute error tolerance
106        tolerance: f64,
107    },
108    /// Lossless per-block compression that preserves integer and floating point
109    /// bit patterns.
110    #[serde(rename = "reversible")]
111    Reversible,
112}
113
114impl Codec for ZfpClassicCodec {
115    type Error = ZfpClassicCodecError;
116
117    fn encode(&self, data: AnyCowArray) -> Result<AnyArray, Self::Error> {
118        if matches!(data.dtype(), AnyArrayDType::I32 | AnyArrayDType::I64)
119            && matches!(
120                self.mode,
121                ZfpCompressionMode::FixedAccuracy { tolerance: _ }
122            )
123        {
124            return Err(ZfpClassicCodecError::FixedAccuracyModeIntegerData);
125        }
126
127        match data {
128            AnyCowArray::I32(data) => Ok(AnyArray::U8(
129                Array1::from(compress(data.view(), &self.mode)?).into_dyn(),
130            )),
131            AnyCowArray::I64(data) => Ok(AnyArray::U8(
132                Array1::from(compress(data.view(), &self.mode)?).into_dyn(),
133            )),
134            AnyCowArray::F32(data) => Ok(AnyArray::U8(
135                Array1::from(compress(data.view(), &self.mode)?).into_dyn(),
136            )),
137            AnyCowArray::F64(data) => Ok(AnyArray::U8(
138                Array1::from(compress(data.view(), &self.mode)?).into_dyn(),
139            )),
140            encoded => Err(ZfpClassicCodecError::UnsupportedDtype(encoded.dtype())),
141        }
142    }
143
144    fn decode(&self, encoded: AnyCowArray) -> Result<AnyArray, Self::Error> {
145        let AnyCowArray::U8(encoded) = encoded else {
146            return Err(ZfpClassicCodecError::EncodedDataNotBytes {
147                dtype: encoded.dtype(),
148            });
149        };
150
151        if !matches!(encoded.shape(), [_]) {
152            return Err(ZfpClassicCodecError::EncodedDataNotOneDimensional {
153                shape: encoded.shape().to_vec(),
154            });
155        }
156
157        decompress(&AnyCowArray::U8(encoded).as_bytes())
158    }
159
160    fn decode_into(
161        &self,
162        encoded: AnyArrayView,
163        decoded: AnyArrayViewMut,
164    ) -> Result<(), Self::Error> {
165        let AnyArrayView::U8(encoded) = encoded else {
166            return Err(ZfpClassicCodecError::EncodedDataNotBytes {
167                dtype: encoded.dtype(),
168            });
169        };
170
171        if !matches!(encoded.shape(), [_]) {
172            return Err(ZfpClassicCodecError::EncodedDataNotOneDimensional {
173                shape: encoded.shape().to_vec(),
174            });
175        }
176
177        decompress_into(&AnyArrayView::U8(encoded).as_bytes(), decoded)
178    }
179}
180
181impl StaticCodec for ZfpClassicCodec {
182    const CODEC_ID: &'static str = "zfp-classic.rs";
183
184    type Config<'de> = Self;
185
186    fn from_config(config: Self::Config<'_>) -> Self {
187        config
188    }
189
190    fn get_config(&self) -> StaticCodecConfig<Self> {
191        StaticCodecConfig::from(self)
192    }
193}
194
195#[derive(Debug, Error)]
196/// Errors that may occur when applying the [`ZfpClassicCodec`].
197pub enum ZfpClassicCodecError {
198    /// [`ZfpClassicCodec`] does not support the dtype
199    #[error("ZfpClassic does not support the dtype {0}")]
200    UnsupportedDtype(AnyArrayDType),
201    /// [`ZfpClassicCodec`] does not support the fixed accuracy mode for
202    /// integer data
203    #[error("ZfpClassic does not support the fixed accuracy mode for integer data")]
204    FixedAccuracyModeIntegerData,
205    /// [`ZfpClassicCodec`] only supports 1-4 dimensional data
206    #[error("ZfpClassic only supports 1-4 dimensional data but found shape {shape:?}")]
207    ExcessiveDimensionality {
208        /// The unexpected shape of the data
209        shape: Vec<usize>,
210    },
211    /// [`ZfpClassicCodec`] was configured with an invalid expert `mode`
212    #[error("ZfpClassic was configured with an invalid expert mode {mode:?}")]
213    InvalidExpertMode {
214        /// The unexpected compression mode
215        mode: ZfpCompressionMode,
216    },
217    /// [`ZfpClassicCodec`] failed to encode the header
218    #[error("ZfpClassic failed to encode the header")]
219    HeaderEncodeFailed,
220    /// [`ZfpClassicCodec`] failed to encode the array metadata header
221    #[error("ZfpClassic failed to encode the array metadata header")]
222    MetaHeaderEncodeFailed {
223        /// Opaque source error
224        source: ZfpHeaderError,
225    },
226    /// [`ZfpClassicCodec`] failed to encode the data
227    #[error("ZfpClassic failed to encode the data")]
228    ZfpEncodeFailed,
229    /// [`ZfpClassicCodec`] can only decode one-dimensional byte arrays but
230    /// received an array of a different dtype
231    #[error(
232        "ZfpClassic can only decode one-dimensional byte arrays but received an array of dtype {dtype}"
233    )]
234    EncodedDataNotBytes {
235        /// The unexpected dtype of the encoded array
236        dtype: AnyArrayDType,
237    },
238    /// [`ZfpClassicCodec`] can only decode one-dimensional byte arrays but
239    /// received an array of a different shape
240    #[error(
241        "ZfpClassic can only decode one-dimensional byte arrays but received a byte array of shape {shape:?}"
242    )]
243    EncodedDataNotOneDimensional {
244        /// The unexpected shape of the encoded array
245        shape: Vec<usize>,
246    },
247    /// [`ZfpClassicCodec`] failed to decode the header
248    #[error("ZfpClassic failed to decode the header")]
249    HeaderDecodeFailed,
250    /// [`ZfpClassicCodec`] failed to decode the array metadata header
251    #[error("ZfpClassic failed to decode the array metadata header")]
252    MetaHeaderDecodeFailed {
253        /// Opaque source error
254        source: ZfpHeaderError,
255    },
256    /// [`ZfpClassicCodec`] cannot decode into the provided array
257    #[error("ZfpClassicCodec cannot decode into the provided array")]
258    MismatchedDecodeIntoArray {
259        /// The source of the error
260        #[from]
261        source: AnyArrayAssignError,
262    },
263    /// [`ZfpClassicCodec`] failed to decode the data
264    #[error("ZfpClassic failed to decode the data")]
265    ZfpDecodeFailed,
266}
267
268#[derive(Debug, Error)]
269#[error(transparent)]
270/// Opaque error for when encoding or decoding the header fails
271pub struct ZfpHeaderError(postcard::Error);
272
273/// Compress the `data` array using ZFP with the provided `mode`.
274///
275/// # Errors
276///
277/// Errors with
278/// - [`ZfpClassicCodecError::ExcessiveDimensionality`] if data is more than
279///   4-dimensional
280/// - [`ZfpClassicCodecError::InvalidExpertMode`] if the `mode` has invalid
281///   expert mode parameters
282/// - [`ZfpClassicCodecError::HeaderEncodeFailed`] if encoding the ZFP header
283///   failed
284/// - [`ZfpClassicCodecError::MetaHeaderEncodeFailed`] if encoding the array
285///   metadata header failed
286/// - [`ZfpClassicCodecError::ZfpEncodeFailed`] if an opaque encoding error
287///   occurred
288pub fn compress<T: ffi::ZfpCompressible, D: Dimension>(
289    data: ArrayView<T, D>,
290    mode: &ZfpCompressionMode,
291) -> Result<Vec<u8>, ZfpClassicCodecError> {
292    let mut encoded = postcard::to_extend(
293        &CompressionHeader {
294            dtype: <T as ffi::ZfpCompressible>::D_TYPE,
295            shape: Cow::Borrowed(data.shape()),
296            version: StaticCodecVersion,
297        },
298        Vec::new(),
299    )
300    .map_err(|err| ZfpClassicCodecError::MetaHeaderEncodeFailed {
301        source: ZfpHeaderError(err),
302    })?;
303
304    // ZFP cannot handle zero-length dimensions
305    if data.is_empty() {
306        return Ok(encoded);
307    }
308
309    // Setup zfp structs to begin compression
310    // Squeeze the data to avoid wasting ZFP dimensions on axes of length 1
311    let field = ffi::ZfpField::new(data.into_dyn().squeeze())?;
312    let stream = ffi::ZfpCompressionStream::new(&field, mode)?;
313
314    // Allocate space based on the maximum size potentially required by zfp to
315    //  store the compressed array
316    let stream = stream.with_bitstream(field, &mut encoded);
317
318    // Write the header so we can reconstruct ZFP's mode on decompression
319    let stream = stream.write_header()?;
320
321    // Compress the field into the allocated output array
322    stream.compress()?;
323
324    Ok(encoded)
325}
326
327/// Decompress the `encoded` data into an array using ZFP.
328///
329/// # Errors
330///
331/// Errors with
332/// - [`ZfpClassicCodecError::HeaderDecodeFailed`] if decoding the ZFP header
333///   failed
334/// - [`ZfpClassicCodecError::MetaHeaderDecodeFailed`] if decoding the array
335///   metadata header failed
336/// - [`ZfpClassicCodecError::ZfpDecodeFailed`] if an opaque decoding error
337///   occurred
338pub fn decompress(encoded: &[u8]) -> Result<AnyArray, ZfpClassicCodecError> {
339    let (header, encoded) =
340        postcard::take_from_bytes::<CompressionHeader>(encoded).map_err(|err| {
341            ZfpClassicCodecError::MetaHeaderDecodeFailed {
342                source: ZfpHeaderError(err),
343            }
344        })?;
345
346    // Return empty data for zero-size arrays
347    if header.shape.iter().copied().product::<usize>() == 0 {
348        let decoded = match header.dtype {
349            ZfpDType::I32 => AnyArray::I32(Array::zeros(&*header.shape)),
350            ZfpDType::I64 => AnyArray::I64(Array::zeros(&*header.shape)),
351            ZfpDType::F32 => AnyArray::F32(Array::zeros(&*header.shape)),
352            ZfpDType::F64 => AnyArray::F64(Array::zeros(&*header.shape)),
353        };
354        return Ok(decoded);
355    }
356
357    // Setup zfp structs to begin decompression
358    let stream = ffi::ZfpDecompressionStream::new(encoded);
359
360    // Read the header to reconstruct ZFP's mode
361    let stream = stream.read_header()?;
362
363    // Decompress the field into a newly allocated output array
364    match header.dtype {
365        ZfpDType::I32 => {
366            let mut decompressed = Array::zeros(&*header.shape);
367            stream.decompress_into(decompressed.view_mut().squeeze())?;
368            Ok(AnyArray::I32(decompressed))
369        }
370        ZfpDType::I64 => {
371            let mut decompressed = Array::zeros(&*header.shape);
372            stream.decompress_into(decompressed.view_mut().squeeze())?;
373            Ok(AnyArray::I64(decompressed))
374        }
375        ZfpDType::F32 => {
376            let mut decompressed = Array::zeros(&*header.shape);
377            stream.decompress_into(decompressed.view_mut().squeeze())?;
378            Ok(AnyArray::F32(decompressed))
379        }
380        ZfpDType::F64 => {
381            let mut decompressed = Array::zeros(&*header.shape);
382            stream.decompress_into(decompressed.view_mut().squeeze())?;
383            Ok(AnyArray::F64(decompressed))
384        }
385    }
386}
387
388/// Decompress the `encoded` data into a `decoded` array using ZFP.
389///
390/// # Errors
391///
392/// Errors with
393/// - [`ZfpClassicCodecError::HeaderDecodeFailed`] if decoding the ZFP header
394///   failed
395/// - [`ZfpClassicCodecError::MetaHeaderDecodeFailed`] if decoding the array
396///   metadata header failed
397/// - [`ZfpClassicCodecError::MismatchedDecodeIntoArray`] if the `decoded`
398///   array is of the wrong dtype or shape
399/// - [`ZfpClassicCodecError::ZfpDecodeFailed`] if an opaque decoding error
400///   occurred
401pub fn decompress_into(
402    encoded: &[u8],
403    decoded: AnyArrayViewMut,
404) -> Result<(), ZfpClassicCodecError> {
405    let (header, encoded) =
406        postcard::take_from_bytes::<CompressionHeader>(encoded).map_err(|err| {
407            ZfpClassicCodecError::MetaHeaderDecodeFailed {
408                source: ZfpHeaderError(err),
409            }
410        })?;
411
412    if decoded.shape() != &*header.shape {
413        return Err(ZfpClassicCodecError::MismatchedDecodeIntoArray {
414            source: AnyArrayAssignError::ShapeMismatch {
415                src: header.shape.into_owned(),
416                dst: decoded.shape().to_vec(),
417            },
418        });
419    }
420
421    // Empty data doesn't need to be initialized
422    if decoded.is_empty() {
423        return Ok(());
424    }
425
426    // Setup zfp structs to begin decompression
427    let stream = ffi::ZfpDecompressionStream::new(encoded);
428
429    // Read the header to reconstruct ZFP's mode
430    let stream = stream.read_header()?;
431
432    // Decompress the field into the output array
433    match (decoded, header.dtype) {
434        (AnyArrayViewMut::I32(decoded), ZfpDType::I32) => stream.decompress_into(decoded.squeeze()),
435        (AnyArrayViewMut::I64(decoded), ZfpDType::I64) => stream.decompress_into(decoded.squeeze()),
436        (AnyArrayViewMut::F32(decoded), ZfpDType::F32) => stream.decompress_into(decoded.squeeze()),
437        (AnyArrayViewMut::F64(decoded), ZfpDType::F64) => stream.decompress_into(decoded.squeeze()),
438        (decoded, dtype) => Err(ZfpClassicCodecError::MismatchedDecodeIntoArray {
439            source: AnyArrayAssignError::DTypeMismatch {
440                src: dtype.into_dtype(),
441                dst: decoded.dtype(),
442            },
443        }),
444    }
445}
446
447#[derive(Serialize, Deserialize)]
448struct CompressionHeader<'a> {
449    dtype: ZfpDType,
450    #[serde(borrow)]
451    shape: Cow<'a, [usize]>,
452    version: ZfpClassicCodecVersion,
453}
454
455/// Dtypes that Zfp can compress and decompress
456#[derive(Copy, Clone, Debug, Serialize, Deserialize)]
457#[expect(missing_docs)]
458pub enum ZfpDType {
459    #[serde(rename = "i32", alias = "int32")]
460    I32,
461    #[serde(rename = "i64", alias = "int64")]
462    I64,
463    #[serde(rename = "f32", alias = "float32")]
464    F32,
465    #[serde(rename = "f64", alias = "float64")]
466    F64,
467}
468
469impl ZfpDType {
470    /// Get the corresponding [`AnyArrayDType`]
471    #[must_use]
472    pub const fn into_dtype(self) -> AnyArrayDType {
473        match self {
474            Self::I32 => AnyArrayDType::I32,
475            Self::I64 => AnyArrayDType::I64,
476            Self::F32 => AnyArrayDType::F32,
477            Self::F64 => AnyArrayDType::F64,
478        }
479    }
480}
481
482impl fmt::Display for ZfpDType {
483    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
484        fmt.write_str(match self {
485            Self::I32 => "i32",
486            Self::I64 => "i64",
487            Self::F32 => "f32",
488            Self::F64 => "f64",
489        })
490    }
491}
492
493#[cfg(test)]
494#[allow(clippy::unwrap_used)]
495mod tests {
496    use ndarray::ArrayView1;
497
498    use super::*;
499
500    #[test]
501    fn zero_length() {
502        let encoded = compress(
503            Array::<f32, _>::from_shape_vec([1, 27, 0].as_slice(), vec![])
504                .unwrap()
505                .view(),
506            &ZfpCompressionMode::FixedPrecision { precision: 7 },
507        )
508        .unwrap();
509        let decoded = decompress(&encoded).unwrap();
510
511        assert_eq!(decoded.dtype(), AnyArrayDType::F32);
512        assert!(decoded.is_empty());
513        assert_eq!(decoded.shape(), &[1, 27, 0]);
514    }
515
516    #[test]
517    fn one_dimension() {
518        let data = Array::from_shape_vec(
519            [2_usize, 1, 2, 1, 1, 1].as_slice(),
520            vec![1.0, 2.0, 3.0, 4.0],
521        )
522        .unwrap();
523
524        let encoded = compress(
525            data.view(),
526            &ZfpCompressionMode::FixedAccuracy { tolerance: 0.1 },
527        )
528        .unwrap();
529        let decoded = decompress(&encoded).unwrap();
530
531        assert_eq!(decoded, AnyArray::F32(data));
532    }
533
534    #[test]
535    fn small_state() {
536        for data in [
537            &[][..],
538            &[0.0],
539            &[0.0, 1.0],
540            &[0.0, 1.0, 0.0],
541            &[0.0, 1.0, 0.0, 1.0],
542        ] {
543            let encoded = compress(
544                ArrayView1::from(data),
545                &ZfpCompressionMode::FixedAccuracy { tolerance: 0.1 },
546            )
547            .unwrap();
548            let decoded = decompress(&encoded).unwrap();
549
550            assert_eq!(
551                decoded,
552                AnyArray::F64(Array1::from_vec(data.to_vec()).into_dyn())
553            );
554        }
555    }
556}