numcodecs_zfp_classic/
lib.rs

1//! [![CI Status]][workflow] [![MSRV]][repo] [![Latest Version]][crates.io] [![Rust Doc Crate]][docs.rs] [![Rust Doc Main]][docs]
2//!
3//! [CI Status]: https://img.shields.io/github/actions/workflow/status/juntyr/numcodecs-rs/ci.yml?branch=main
4//! [workflow]: https://github.com/juntyr/numcodecs-rs/actions/workflows/ci.yml?query=branch%3Amain
5//!
6//! [MSRV]: https://img.shields.io/badge/MSRV-1.82.0-blue
7//! [repo]: https://github.com/juntyr/numcodecs-rs
8//!
9//! [Latest Version]: https://img.shields.io/crates/v/numcodecs-zfp-classic
10//! [crates.io]: https://crates.io/crates/numcodecs-zfp-classic
11//!
12//! [Rust Doc Crate]: https://img.shields.io/docsrs/numcodecs-zfp-classic
13//! [docs.rs]: https://docs.rs/numcodecs-zfp-classic/
14//!
15//! [Rust Doc Main]: https://img.shields.io/badge/docs-main-blue
16//! [docs]: https://juntyr.github.io/numcodecs-rs/numcodecs_zfp_classic
17//!
18//! ZFP (classic) codec implementation for the [`numcodecs`] API.
19//!
20//! This implementation uses ZFP's default
21//! [`ZFP_ROUNDING_MODE=ZFP_ROUND_NEVER`](https://zfp.readthedocs.io/en/release1.0.1/installation.html#c.ZFP_ROUNDING_MODE)
22//! rounding mode, which is known to increase bias and correlation in ZFP's
23//! errors
24//! (see <https://zfp.readthedocs.io/en/release1.0.1/faq.html#zfp-rounding>).
25//!
26//! Please see the `numcodecs-zfp` codec for an implementation that uses an
27//! improved version of ZFP.
28
29#![allow(clippy::multiple_crate_versions)] // embedded-io
30
31use std::{borrow::Cow, fmt};
32
33use ndarray::{Array, Array1, ArrayView, Dimension};
34use numcodecs::{
35    AnyArray, AnyArrayAssignError, AnyArrayDType, AnyArrayView, AnyArrayViewMut, AnyCowArray,
36    Codec, StaticCodec, StaticCodecConfig,
37};
38use schemars::JsonSchema;
39use serde::{Deserialize, Serialize};
40use thiserror::Error;
41
42mod ffi;
43
44#[derive(Clone, Serialize, Deserialize, JsonSchema)]
45#[serde(transparent)]
46/// Codec providing compression using ZFP (classic)
47pub struct ZfpClassicCodec {
48    /// ZFP compression mode
49    pub mode: ZfpCompressionMode,
50}
51
52#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)]
53#[serde(tag = "mode")]
54#[serde(deny_unknown_fields)]
55/// ZFP compression mode
56pub enum ZfpCompressionMode {
57    #[serde(rename = "expert")]
58    /// The most general mode, which can describe all four other modes
59    Expert {
60        /// Minimum number of compressed bits used to represent a block
61        min_bits: u32,
62        /// Maximum number of bits used to represent a block
63        max_bits: u32,
64        /// Maximum number of bit planes encoded
65        max_prec: u32,
66        /// Smallest absolute bit plane number encoded.
67        ///
68        /// This parameter applies to floating-point data only and is ignored
69        /// for integer data.
70        min_exp: i32,
71    },
72    /// In fixed-rate mode, each d-dimensional compressed block of `$4^d$`
73    /// values is stored using a fixed number of bits. This number of
74    /// compressed bits per block is amortized over the `$4^d$` values to give
75    /// a rate of `$rate = \frac{maxbits}{4^d}$` in bits per value.
76    #[serde(rename = "fixed-rate")]
77    FixedRate {
78        /// Rate in bits per value
79        rate: f64,
80    },
81    /// In fixed-precision mode, the number of bits used to encode a block may
82    /// vary, but the number of bit planes (the precision) encoded for the
83    /// transform coefficients is fixed.
84    #[serde(rename = "fixed-precision")]
85    FixedPrecision {
86        /// Number of bit planes encoded
87        precision: u32,
88    },
89    /// In fixed-accuracy mode, all transform coefficient bit planes up to a
90    /// minimum bit plane number are encoded. The smallest absolute bit plane
91    /// number is chosen such that
92    /// `$minexp = \text{floor}(\log_{2}(tolerance))$`.
93    #[serde(rename = "fixed-accuracy")]
94    FixedAccuracy {
95        /// Absolute error tolerance
96        tolerance: f64,
97    },
98    /// Lossless per-block compression that preserves integer and floating point
99    /// bit patterns.
100    #[serde(rename = "reversible")]
101    Reversible,
102}
103
104impl Codec for ZfpClassicCodec {
105    type Error = ZfpClassicCodecError;
106
107    fn encode(&self, data: AnyCowArray) -> Result<AnyArray, Self::Error> {
108        if matches!(data.dtype(), AnyArrayDType::I32 | AnyArrayDType::I64)
109            && matches!(
110                self.mode,
111                ZfpCompressionMode::FixedAccuracy { tolerance: _ }
112            )
113        {
114            return Err(ZfpClassicCodecError::FixedAccuracyModeIntegerData);
115        }
116
117        match data {
118            AnyCowArray::I32(data) => Ok(AnyArray::U8(
119                Array1::from(compress(data.view(), &self.mode)?).into_dyn(),
120            )),
121            AnyCowArray::I64(data) => Ok(AnyArray::U8(
122                Array1::from(compress(data.view(), &self.mode)?).into_dyn(),
123            )),
124            AnyCowArray::F32(data) => Ok(AnyArray::U8(
125                Array1::from(compress(data.view(), &self.mode)?).into_dyn(),
126            )),
127            AnyCowArray::F64(data) => Ok(AnyArray::U8(
128                Array1::from(compress(data.view(), &self.mode)?).into_dyn(),
129            )),
130            encoded => Err(ZfpClassicCodecError::UnsupportedDtype(encoded.dtype())),
131        }
132    }
133
134    fn decode(&self, encoded: AnyCowArray) -> Result<AnyArray, Self::Error> {
135        let AnyCowArray::U8(encoded) = encoded else {
136            return Err(ZfpClassicCodecError::EncodedDataNotBytes {
137                dtype: encoded.dtype(),
138            });
139        };
140
141        if !matches!(encoded.shape(), [_]) {
142            return Err(ZfpClassicCodecError::EncodedDataNotOneDimensional {
143                shape: encoded.shape().to_vec(),
144            });
145        }
146
147        decompress(&AnyCowArray::U8(encoded).as_bytes())
148    }
149
150    fn decode_into(
151        &self,
152        encoded: AnyArrayView,
153        decoded: AnyArrayViewMut,
154    ) -> Result<(), Self::Error> {
155        let AnyArrayView::U8(encoded) = encoded else {
156            return Err(ZfpClassicCodecError::EncodedDataNotBytes {
157                dtype: encoded.dtype(),
158            });
159        };
160
161        if !matches!(encoded.shape(), [_]) {
162            return Err(ZfpClassicCodecError::EncodedDataNotOneDimensional {
163                shape: encoded.shape().to_vec(),
164            });
165        }
166
167        decompress_into(&AnyArrayView::U8(encoded).as_bytes(), decoded)
168    }
169}
170
171impl StaticCodec for ZfpClassicCodec {
172    const CODEC_ID: &'static str = "zfp-classic";
173
174    type Config<'de> = Self;
175
176    fn from_config(config: Self::Config<'_>) -> Self {
177        config
178    }
179
180    fn get_config(&self) -> StaticCodecConfig<Self> {
181        StaticCodecConfig::from(self)
182    }
183}
184
185#[derive(Debug, Error)]
186/// Errors that may occur when applying the [`ZfpClassicCodec`].
187pub enum ZfpClassicCodecError {
188    /// [`ZfpClassicCodec`] does not support the dtype
189    #[error("ZfpClassic does not support the dtype {0}")]
190    UnsupportedDtype(AnyArrayDType),
191    /// [`ZfpClassicCodec`] does not support the fixed accuracy mode for
192    /// integer data
193    #[error("ZfpClassic does not support the fixed accuracy mode for integer data")]
194    FixedAccuracyModeIntegerData,
195    /// [`ZfpClassicCodec`] only supports 1-4 dimensional data
196    #[error("ZfpClassic only supports 1-4 dimensional data but found shape {shape:?}")]
197    ExcessiveDimensionality {
198        /// The unexpected shape of the data
199        shape: Vec<usize>,
200    },
201    /// [`ZfpClassicCodec`] was configured with an invalid expert `mode`
202    #[error("ZfpClassic was configured with an invalid expert mode {mode:?}")]
203    InvalidExpertMode {
204        /// The unexpected compression mode
205        mode: ZfpCompressionMode,
206    },
207    /// [`ZfpClassicCodec`] failed to encode the header
208    #[error("ZfpClassic failed to encode the header")]
209    HeaderEncodeFailed,
210    /// [`ZfpClassicCodec`] failed to encode the array metadata header
211    #[error("ZfpClassic failed to encode the array metadata header")]
212    MetaHeaderEncodeFailed {
213        /// Opaque source error
214        source: ZfpHeaderError,
215    },
216    /// [`ZfpClassicCodec`] failed to encode the data
217    #[error("ZfpClassic failed to encode the data")]
218    ZfpEncodeFailed,
219    /// [`ZfpClassicCodec`] can only decode one-dimensional byte arrays but
220    /// received an array of a different dtype
221    #[error(
222        "ZfpClassic can only decode one-dimensional byte arrays but received an array of dtype {dtype}"
223    )]
224    EncodedDataNotBytes {
225        /// The unexpected dtype of the encoded array
226        dtype: AnyArrayDType,
227    },
228    /// [`ZfpClassicCodec`] can only decode one-dimensional byte arrays but
229    /// received an array of a different shape
230    #[error("ZfpClassic can only decode one-dimensional byte arrays but received a byte array of shape {shape:?}")]
231    EncodedDataNotOneDimensional {
232        /// The unexpected shape of the encoded array
233        shape: Vec<usize>,
234    },
235    /// [`ZfpClassicCodec`] failed to decode the header
236    #[error("ZfpClassic failed to decode the header")]
237    HeaderDecodeFailed,
238    /// [`ZfpClassicCodec`] failed to decode the array metadata header
239    #[error("ZfpClassic failed to decode the array metadata header")]
240    MetaHeaderDecodeFailed {
241        /// Opaque source error
242        source: ZfpHeaderError,
243    },
244    /// [`ZfpClassicCodec`] cannot decode into the provided array
245    #[error("ZfpClassicCodec cannot decode into the provided array")]
246    MismatchedDecodeIntoArray {
247        /// The source of the error
248        #[from]
249        source: AnyArrayAssignError,
250    },
251    /// [`ZfpClassicCodec`] failed to decode the data
252    #[error("ZfpClassic failed to decode the data")]
253    ZfpDecodeFailed,
254}
255
256#[derive(Debug, Error)]
257#[error(transparent)]
258/// Opaque error for when encoding or decoding the header fails
259pub struct ZfpHeaderError(postcard::Error);
260
261/// Compress the `data` array using ZFP with the provided `mode`.
262///
263/// # Errors
264///
265/// Errors with
266/// - [`ZfpClassicCodecError::ExcessiveDimensionality`] if data is more than
267///   4-dimensional
268/// - [`ZfpClassicCodecError::InvalidExpertMode`] if the `mode` has invalid
269///   expert mode parameters
270/// - [`ZfpClassicCodecError::HeaderEncodeFailed`] if encoding the ZFP header
271///   failed
272/// - [`ZfpClassicCodecError::MetaHeaderEncodeFailed`] if encoding the array
273///   metadata header failed
274/// - [`ZfpClassicCodecError::ZfpEncodeFailed`] if an opaque encoding error
275///   occurred
276pub fn compress<T: ffi::ZfpCompressible, D: Dimension>(
277    data: ArrayView<T, D>,
278    mode: &ZfpCompressionMode,
279) -> Result<Vec<u8>, ZfpClassicCodecError> {
280    let mut encoded = postcard::to_extend(
281        &CompressionHeader {
282            dtype: <T as ffi::ZfpCompressible>::D_TYPE,
283            shape: Cow::Borrowed(data.shape()),
284        },
285        Vec::new(),
286    )
287    .map_err(|err| ZfpClassicCodecError::MetaHeaderEncodeFailed {
288        source: ZfpHeaderError(err),
289    })?;
290
291    // ZFP cannot handle zero-length dimensions
292    if data.is_empty() {
293        return Ok(encoded);
294    }
295
296    // Setup zfp structs to begin compression
297    // Squeeze the data to avoid wasting ZFP dimensions on axes of length 1
298    let field = ffi::ZfpField::new(data.into_dyn().squeeze())?;
299    let stream = ffi::ZfpCompressionStream::new(&field, mode)?;
300
301    // Allocate space based on the maximum size potentially required by zfp to
302    //  store the compressed array
303    let stream = stream.with_bitstream(field, &mut encoded);
304
305    // Write the header so we can reconstruct ZFP's mode on decompression
306    let stream = stream.write_header()?;
307
308    // Compress the field into the allocated output array
309    stream.compress()?;
310
311    Ok(encoded)
312}
313
314/// Decompress the `encoded` data into an array using ZFP.
315///
316/// # Errors
317///
318/// Errors with
319/// - [`ZfpClassicCodecError::HeaderDecodeFailed`] if decoding the ZFP header
320///   failed
321/// - [`ZfpClassicCodecError::MetaHeaderDecodeFailed`] if decoding the array
322///   metadata header failed
323/// - [`ZfpClassicCodecError::ZfpDecodeFailed`] if an opaque decoding error
324///   occurred
325pub fn decompress(encoded: &[u8]) -> Result<AnyArray, ZfpClassicCodecError> {
326    let (header, encoded) =
327        postcard::take_from_bytes::<CompressionHeader>(encoded).map_err(|err| {
328            ZfpClassicCodecError::MetaHeaderDecodeFailed {
329                source: ZfpHeaderError(err),
330            }
331        })?;
332
333    // Return empty data for zero-size arrays
334    if header.shape.iter().copied().product::<usize>() == 0 {
335        let decoded = match header.dtype {
336            ZfpDType::I32 => AnyArray::I32(Array::zeros(&*header.shape)),
337            ZfpDType::I64 => AnyArray::I64(Array::zeros(&*header.shape)),
338            ZfpDType::F32 => AnyArray::F32(Array::zeros(&*header.shape)),
339            ZfpDType::F64 => AnyArray::F64(Array::zeros(&*header.shape)),
340        };
341        return Ok(decoded);
342    }
343
344    // Setup zfp structs to begin decompression
345    let stream = ffi::ZfpDecompressionStream::new(encoded);
346
347    // Read the header to reconstruct ZFP's mode
348    let stream = stream.read_header()?;
349
350    // Decompress the field into a newly allocated output array
351    match header.dtype {
352        ZfpDType::I32 => {
353            let mut decompressed = Array::zeros(&*header.shape);
354            stream.decompress_into(decompressed.view_mut().squeeze())?;
355            Ok(AnyArray::I32(decompressed))
356        }
357        ZfpDType::I64 => {
358            let mut decompressed = Array::zeros(&*header.shape);
359            stream.decompress_into(decompressed.view_mut().squeeze())?;
360            Ok(AnyArray::I64(decompressed))
361        }
362        ZfpDType::F32 => {
363            let mut decompressed = Array::zeros(&*header.shape);
364            stream.decompress_into(decompressed.view_mut().squeeze())?;
365            Ok(AnyArray::F32(decompressed))
366        }
367        ZfpDType::F64 => {
368            let mut decompressed = Array::zeros(&*header.shape);
369            stream.decompress_into(decompressed.view_mut().squeeze())?;
370            Ok(AnyArray::F64(decompressed))
371        }
372    }
373}
374
375/// Decompress the `encoded` data into a `decoded` array using ZFP.
376///
377/// # Errors
378///
379/// Errors with
380/// - [`ZfpClassicCodecError::HeaderDecodeFailed`] if decoding the ZFP header
381///   failed
382/// - [`ZfpClassicCodecError::MetaHeaderDecodeFailed`] if decoding the array
383///   metadata header failed
384/// - [`ZfpClassicCodecError::MismatchedDecodeIntoArray`] if the `decoded`
385///   array is of the wrong dtype or shape
386/// - [`ZfpClassicCodecError::ZfpDecodeFailed`] if an opaque decoding error
387///   occurred
388pub fn decompress_into(
389    encoded: &[u8],
390    decoded: AnyArrayViewMut,
391) -> Result<(), ZfpClassicCodecError> {
392    let (header, encoded) =
393        postcard::take_from_bytes::<CompressionHeader>(encoded).map_err(|err| {
394            ZfpClassicCodecError::MetaHeaderDecodeFailed {
395                source: ZfpHeaderError(err),
396            }
397        })?;
398
399    if decoded.shape() != &*header.shape {
400        return Err(ZfpClassicCodecError::MismatchedDecodeIntoArray {
401            source: AnyArrayAssignError::ShapeMismatch {
402                src: header.shape.into_owned(),
403                dst: decoded.shape().to_vec(),
404            },
405        });
406    }
407
408    // Empty data doesn't need to be initialized
409    if decoded.is_empty() {
410        return Ok(());
411    }
412
413    // Setup zfp structs to begin decompression
414    let stream = ffi::ZfpDecompressionStream::new(encoded);
415
416    // Read the header to reconstruct ZFP's mode
417    let stream = stream.read_header()?;
418
419    // Decompress the field into the output array
420    match (decoded, header.dtype) {
421        (AnyArrayViewMut::I32(decoded), ZfpDType::I32) => stream.decompress_into(decoded.squeeze()),
422        (AnyArrayViewMut::I64(decoded), ZfpDType::I64) => stream.decompress_into(decoded.squeeze()),
423        (AnyArrayViewMut::F32(decoded), ZfpDType::F32) => stream.decompress_into(decoded.squeeze()),
424        (AnyArrayViewMut::F64(decoded), ZfpDType::F64) => stream.decompress_into(decoded.squeeze()),
425        (decoded, dtype) => Err(ZfpClassicCodecError::MismatchedDecodeIntoArray {
426            source: AnyArrayAssignError::DTypeMismatch {
427                src: dtype.into_dtype(),
428                dst: decoded.dtype(),
429            },
430        }),
431    }
432}
433
434#[derive(Serialize, Deserialize)]
435struct CompressionHeader<'a> {
436    dtype: ZfpDType,
437    #[serde(borrow)]
438    shape: Cow<'a, [usize]>,
439}
440
441/// Dtypes that Zfp can compress and decompress
442#[derive(Copy, Clone, Debug, Serialize, Deserialize)]
443#[expect(missing_docs)]
444pub enum ZfpDType {
445    #[serde(rename = "i32", alias = "int32")]
446    I32,
447    #[serde(rename = "i64", alias = "int64")]
448    I64,
449    #[serde(rename = "f32", alias = "float32")]
450    F32,
451    #[serde(rename = "f64", alias = "float64")]
452    F64,
453}
454
455impl ZfpDType {
456    /// Get the corresponding [`AnyArrayDType`]
457    #[must_use]
458    pub const fn into_dtype(self) -> AnyArrayDType {
459        match self {
460            Self::I32 => AnyArrayDType::I32,
461            Self::I64 => AnyArrayDType::I64,
462            Self::F32 => AnyArrayDType::F32,
463            Self::F64 => AnyArrayDType::F64,
464        }
465    }
466}
467
468impl fmt::Display for ZfpDType {
469    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
470        fmt.write_str(match self {
471            Self::I32 => "i32",
472            Self::I64 => "i64",
473            Self::F32 => "f32",
474            Self::F64 => "f64",
475        })
476    }
477}
478
479#[cfg(test)]
480#[allow(clippy::unwrap_used)]
481mod tests {
482    use ndarray::ArrayView1;
483
484    use super::*;
485
486    #[test]
487    fn zero_length() {
488        let encoded = compress(
489            Array::<f32, _>::from_shape_vec([1, 27, 0].as_slice(), vec![])
490                .unwrap()
491                .view(),
492            &ZfpCompressionMode::FixedPrecision { precision: 7 },
493        )
494        .unwrap();
495        let decoded = decompress(&encoded).unwrap();
496
497        assert_eq!(decoded.dtype(), AnyArrayDType::F32);
498        assert!(decoded.is_empty());
499        assert_eq!(decoded.shape(), &[1, 27, 0]);
500    }
501
502    #[test]
503    fn one_dimension() {
504        let data = Array::from_shape_vec(
505            [2_usize, 1, 2, 1, 1, 1].as_slice(),
506            vec![1.0, 2.0, 3.0, 4.0],
507        )
508        .unwrap();
509
510        let encoded = compress(
511            data.view(),
512            &ZfpCompressionMode::FixedAccuracy { tolerance: 0.1 },
513        )
514        .unwrap();
515        let decoded = decompress(&encoded).unwrap();
516
517        assert_eq!(decoded, AnyArray::F32(data));
518    }
519
520    #[test]
521    fn small_state() {
522        for data in [
523            &[][..],
524            &[0.0],
525            &[0.0, 1.0],
526            &[0.0, 1.0, 0.0],
527            &[0.0, 1.0, 0.0, 1.0],
528        ] {
529            let encoded = compress(
530                ArrayView1::from(data),
531                &ZfpCompressionMode::FixedAccuracy { tolerance: 0.1 },
532            )
533            .unwrap();
534            let decoded = decompress(&encoded).unwrap();
535
536            assert_eq!(
537                decoded,
538                AnyArray::F64(Array1::from_vec(data.to_vec()).into_dyn())
539            );
540        }
541    }
542}