numcodecs_zfp/
lib.rs

1//! [![CI Status]][workflow] [![MSRV]][repo] [![Latest Version]][crates.io] [![Rust Doc Crate]][docs.rs] [![Rust Doc Main]][docs]
2//!
3//! [CI Status]: https://img.shields.io/github/actions/workflow/status/juntyr/numcodecs-rs/ci.yml?branch=main
4//! [workflow]: https://github.com/juntyr/numcodecs-rs/actions/workflows/ci.yml?query=branch%3Amain
5//!
6//! [MSRV]: https://img.shields.io/badge/MSRV-1.85.0-blue
7//! [repo]: https://github.com/juntyr/numcodecs-rs
8//!
9//! [Latest Version]: https://img.shields.io/crates/v/numcodecs-zfp
10//! [crates.io]: https://crates.io/crates/numcodecs-zfp
11//!
12//! [Rust Doc Crate]: https://img.shields.io/docsrs/numcodecs-zfp
13//! [docs.rs]: https://docs.rs/numcodecs-zfp/
14//!
15//! [Rust Doc Main]: https://img.shields.io/badge/docs-main-blue
16//! [docs]: https://juntyr.github.io/numcodecs-rs/numcodecs_zfp
17//!
18//! ZFP codec implementation for the [`numcodecs`] API.
19//!
20//! This implementation uses ZFP's
21//! [`ZFP_ROUNDING_MODE=ZFP_ROUND_FIRST`](https://zfp.readthedocs.io/en/release1.0.1/installation.html#c.ZFP_ROUNDING_MODE)
22//! and
23//! [`ZFP_WITH_TIGHT_ERROR=ON`](https://zfp.readthedocs.io/en/release1.0.1/installation.html#c.ZFP_WITH_TIGHT_ERROR)
24//! experimental features to reduce the bias and correlation in ZFP's errors
25//! (see <https://zfp.readthedocs.io/en/release1.0.1/faq.html#zfp-rounding>).
26//!
27//! This implementation also rejects non-reversibly compressing non-finite
28//! (infinite or NaN) values, since ZFP's behaviour for them is undefined
29//! (see <https://zfp.readthedocs.io/en/release1.0.1/faq.html#q-valid>).
30//!
31//! Please see the `numcodecs-zfp-classic` codec for an implementation that
32//! uses ZFP without these modifications.
33
34#![allow(clippy::multiple_crate_versions)] // embedded-io
35
36use std::{borrow::Cow, fmt};
37
38use ndarray::{Array, Array1, ArrayView, Dimension, Zip};
39use numcodecs::{
40    AnyArray, AnyArrayAssignError, AnyArrayDType, AnyArrayView, AnyArrayViewMut, AnyCowArray,
41    Codec, StaticCodec, StaticCodecConfig, StaticCodecVersion,
42};
43use schemars::JsonSchema;
44use serde::{Deserialize, Serialize};
45use thiserror::Error;
46
47#[cfg(test)]
48use ::serde_json as _;
49
50mod ffi;
51
52type ZfpCodecVersion = StaticCodecVersion<0, 1, 0>;
53
54#[derive(Clone, Serialize, Deserialize, JsonSchema)]
55// serde cannot deny unknown fields because of the flatten
56#[schemars(deny_unknown_fields)]
57/// Codec providing compression using ZFP
58pub struct ZfpCodec {
59    /// ZFP compression mode
60    #[serde(flatten)]
61    pub mode: ZfpCompressionMode,
62    /// The codec's encoding format version. Do not provide this parameter explicitly.
63    #[serde(default, rename = "_version")]
64    pub version: ZfpCodecVersion,
65}
66
67#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)]
68#[serde(tag = "mode")]
69#[serde(deny_unknown_fields)]
70/// ZFP compression mode
71pub enum ZfpCompressionMode {
72    #[serde(rename = "expert")]
73    /// The most general mode, which can describe all four other modes
74    Expert {
75        /// Minimum number of compressed bits used to represent a block
76        min_bits: u32,
77        /// Maximum number of bits used to represent a block
78        max_bits: u32,
79        /// Maximum number of bit planes encoded
80        max_prec: u32,
81        /// Smallest absolute bit plane number encoded.
82        ///
83        /// This parameter applies to floating-point data only and is ignored
84        /// for integer data.
85        min_exp: i32,
86    },
87    /// In fixed-rate mode, each d-dimensional compressed block of `$4^d$`
88    /// values is stored using a fixed number of bits. This number of
89    /// compressed bits per block is amortized over the `$4^d$` values to give
90    /// a rate of `$rate = \frac{maxbits}{4^d}$` in bits per value.
91    #[serde(rename = "fixed-rate")]
92    FixedRate {
93        /// Rate in bits per value
94        rate: f64,
95    },
96    /// In fixed-precision mode, the number of bits used to encode a block may
97    /// vary, but the number of bit planes (the precision) encoded for the
98    /// transform coefficients is fixed.
99    #[serde(rename = "fixed-precision")]
100    FixedPrecision {
101        /// Number of bit planes encoded
102        precision: u32,
103    },
104    /// In fixed-accuracy mode, all transform coefficient bit planes up to a
105    /// minimum bit plane number are encoded. The smallest absolute bit plane
106    /// number is chosen such that
107    /// `$minexp = \text{floor}(\log_{2}(tolerance))$`.
108    #[serde(rename = "fixed-accuracy")]
109    FixedAccuracy {
110        /// Absolute error tolerance
111        tolerance: f64,
112    },
113    /// Lossless per-block compression that preserves integer and floating point
114    /// bit patterns.
115    #[serde(rename = "reversible")]
116    Reversible,
117}
118
119impl Codec for ZfpCodec {
120    type Error = ZfpCodecError;
121
122    fn encode(&self, data: AnyCowArray) -> Result<AnyArray, Self::Error> {
123        if matches!(data.dtype(), AnyArrayDType::I32 | AnyArrayDType::I64)
124            && matches!(
125                self.mode,
126                ZfpCompressionMode::FixedAccuracy { tolerance: _ }
127            )
128        {
129            return Err(ZfpCodecError::FixedAccuracyModeIntegerData);
130        }
131
132        match data {
133            AnyCowArray::I32(data) => Ok(AnyArray::U8(
134                Array1::from(compress(data.view(), &self.mode)?).into_dyn(),
135            )),
136            AnyCowArray::I64(data) => Ok(AnyArray::U8(
137                Array1::from(compress(data.view(), &self.mode)?).into_dyn(),
138            )),
139            AnyCowArray::F32(data) => Ok(AnyArray::U8(
140                Array1::from(compress(data.view(), &self.mode)?).into_dyn(),
141            )),
142            AnyCowArray::F64(data) => Ok(AnyArray::U8(
143                Array1::from(compress(data.view(), &self.mode)?).into_dyn(),
144            )),
145            encoded => Err(ZfpCodecError::UnsupportedDtype(encoded.dtype())),
146        }
147    }
148
149    fn decode(&self, encoded: AnyCowArray) -> Result<AnyArray, Self::Error> {
150        let AnyCowArray::U8(encoded) = encoded else {
151            return Err(ZfpCodecError::EncodedDataNotBytes {
152                dtype: encoded.dtype(),
153            });
154        };
155
156        if !matches!(encoded.shape(), [_]) {
157            return Err(ZfpCodecError::EncodedDataNotOneDimensional {
158                shape: encoded.shape().to_vec(),
159            });
160        }
161
162        decompress(&AnyCowArray::U8(encoded).as_bytes())
163    }
164
165    fn decode_into(
166        &self,
167        encoded: AnyArrayView,
168        decoded: AnyArrayViewMut,
169    ) -> Result<(), Self::Error> {
170        let AnyArrayView::U8(encoded) = encoded else {
171            return Err(ZfpCodecError::EncodedDataNotBytes {
172                dtype: encoded.dtype(),
173            });
174        };
175
176        if !matches!(encoded.shape(), [_]) {
177            return Err(ZfpCodecError::EncodedDataNotOneDimensional {
178                shape: encoded.shape().to_vec(),
179            });
180        }
181
182        decompress_into(&AnyArrayView::U8(encoded).as_bytes(), decoded)
183    }
184}
185
186impl StaticCodec for ZfpCodec {
187    const CODEC_ID: &'static str = "zfp.rs";
188
189    type Config<'de> = Self;
190
191    fn from_config(config: Self::Config<'_>) -> Self {
192        config
193    }
194
195    fn get_config(&self) -> StaticCodecConfig<Self> {
196        StaticCodecConfig::from(self)
197    }
198}
199
200#[derive(Debug, Error)]
201/// Errors that may occur when applying the [`ZfpCodec`].
202pub enum ZfpCodecError {
203    /// [`ZfpCodec`] does not support the dtype
204    #[error("Zfp does not support the dtype {0}")]
205    UnsupportedDtype(AnyArrayDType),
206    /// [`ZfpCodec`] does not support the fixed accuracy mode for integer data
207    #[error("Zfp does not support the fixed accuracy mode for integer data")]
208    FixedAccuracyModeIntegerData,
209    /// [`ZfpCodec`] only supports 1-4 dimensional data
210    #[error("Zfp only supports 1-4 dimensional data but found shape {shape:?}")]
211    ExcessiveDimensionality {
212        /// The unexpected shape of the data
213        shape: Vec<usize>,
214    },
215    /// [`ZfpCodec`] was configured with an invalid expert `mode`
216    #[error("Zfp was configured with an invalid expert mode {mode:?}")]
217    InvalidExpertMode {
218        /// The unexpected compression mode
219        mode: ZfpCompressionMode,
220    },
221    /// [`ZfpCodec`] does not support non-finite (infinite or NaN) floating
222    /// point data  in non-reversible lossy compression
223    #[error(
224        "Zfp does not support non-finite (infinite or NaN) floating point data in non-reversible lossy compression"
225    )]
226    NonFiniteData,
227    /// [`ZfpCodec`] failed to encode the header
228    #[error("Zfp failed to encode the header")]
229    HeaderEncodeFailed,
230    /// [`ZfpCodec`] failed to encode the array metadata header
231    #[error("Zfp failed to encode the array metadata header")]
232    MetaHeaderEncodeFailed {
233        /// Opaque source error
234        source: ZfpHeaderError,
235    },
236    /// [`ZfpCodec`] failed to encode the data
237    #[error("Zfp failed to encode the data")]
238    ZfpEncodeFailed,
239    /// [`ZfpCodec`] can only decode one-dimensional byte arrays but received
240    /// an array of a different dtype
241    #[error(
242        "Zfp can only decode one-dimensional byte arrays but received an array of dtype {dtype}"
243    )]
244    EncodedDataNotBytes {
245        /// The unexpected dtype of the encoded array
246        dtype: AnyArrayDType,
247    },
248    /// [`ZfpCodec`] can only decode one-dimensional byte arrays but received
249    /// an array of a different shape
250    #[error(
251        "Zfp can only decode one-dimensional byte arrays but received a byte array of shape {shape:?}"
252    )]
253    EncodedDataNotOneDimensional {
254        /// The unexpected shape of the encoded array
255        shape: Vec<usize>,
256    },
257    /// [`ZfpCodec`] failed to decode the header
258    #[error("Zfp failed to decode the header")]
259    HeaderDecodeFailed,
260    /// [`ZfpCodec`] failed to decode the array metadata header
261    #[error("Zfp failed to decode the array metadata header")]
262    MetaHeaderDecodeFailed {
263        /// Opaque source error
264        source: ZfpHeaderError,
265    },
266    /// [`ZfpCodec`] cannot decode into the provided array
267    #[error("ZfpCodec cannot decode into the provided array")]
268    MismatchedDecodeIntoArray {
269        /// The source of the error
270        #[from]
271        source: AnyArrayAssignError,
272    },
273    /// [`ZfpCodec`] failed to decode the data
274    #[error("Zfp failed to decode the data")]
275    ZfpDecodeFailed,
276}
277
278#[derive(Debug, Error)]
279#[error(transparent)]
280/// Opaque error for when encoding or decoding the header fails
281pub struct ZfpHeaderError(postcard::Error);
282
283/// Compress the `data` array using ZFP with the provided `mode`.
284///
285/// # Errors
286///
287/// Errors with
288/// - [`ZfpCodecError::NonFiniteData`] if any data element is non-finite
289///   (infinite or NaN) and a non-reversible lossy compression `mode` is used
290/// - [`ZfpCodecError::ExcessiveDimensionality`] if data is more than
291///   4-dimensional
292/// - [`ZfpCodecError::InvalidExpertMode`] if the `mode` has invalid expert mode
293///   parameters
294/// - [`ZfpCodecError::HeaderEncodeFailed`] if encoding the ZFP header failed
295/// - [`ZfpCodecError::MetaHeaderEncodeFailed`] if encoding the array metadata
296///   header failed
297/// - [`ZfpCodecError::ZfpEncodeFailed`] if an opaque encoding error occurred
298pub fn compress<T: ffi::ZfpCompressible, D: Dimension>(
299    data: ArrayView<T, D>,
300    mode: &ZfpCompressionMode,
301) -> Result<Vec<u8>, ZfpCodecError> {
302    if !matches!(mode, ZfpCompressionMode::Reversible) && !Zip::from(&data).all(|x| x.is_finite()) {
303        return Err(ZfpCodecError::NonFiniteData);
304    }
305
306    let mut encoded = postcard::to_extend(
307        &CompressionHeader {
308            dtype: <T as ffi::ZfpCompressible>::D_TYPE,
309            shape: Cow::Borrowed(data.shape()),
310            version: StaticCodecVersion,
311        },
312        Vec::new(),
313    )
314    .map_err(|err| ZfpCodecError::MetaHeaderEncodeFailed {
315        source: ZfpHeaderError(err),
316    })?;
317
318    // ZFP cannot handle zero-length dimensions
319    if data.is_empty() {
320        return Ok(encoded);
321    }
322
323    // Setup zfp structs to begin compression
324    // Squeeze the data to avoid wasting ZFP dimensions on axes of length 1
325    let field = ffi::ZfpField::new(data.into_dyn().squeeze())?;
326    let stream = ffi::ZfpCompressionStream::new(&field, mode)?;
327
328    // Allocate space based on the maximum size potentially required by zfp to
329    //  store the compressed array
330    let stream = stream.with_bitstream(field, &mut encoded);
331
332    // Write the header so we can reconstruct ZFP's mode on decompression
333    let stream = stream.write_header()?;
334
335    // Compress the field into the allocated output array
336    stream.compress()?;
337
338    Ok(encoded)
339}
340
341/// Decompress the `encoded` data into an array using ZFP.
342///
343/// # Errors
344///
345/// Errors with
346/// - [`ZfpCodecError::HeaderDecodeFailed`] if decoding the ZFP header failed
347/// - [`ZfpCodecError::MetaHeaderDecodeFailed`] if decoding the array metadata
348///   header failed
349/// - [`ZfpCodecError::ZfpDecodeFailed`] if an opaque decoding error occurred
350pub fn decompress(encoded: &[u8]) -> Result<AnyArray, ZfpCodecError> {
351    let (header, encoded) =
352        postcard::take_from_bytes::<CompressionHeader>(encoded).map_err(|err| {
353            ZfpCodecError::MetaHeaderDecodeFailed {
354                source: ZfpHeaderError(err),
355            }
356        })?;
357
358    // Return empty data for zero-size arrays
359    if header.shape.iter().copied().product::<usize>() == 0 {
360        let decoded = match header.dtype {
361            ZfpDType::I32 => AnyArray::I32(Array::zeros(&*header.shape)),
362            ZfpDType::I64 => AnyArray::I64(Array::zeros(&*header.shape)),
363            ZfpDType::F32 => AnyArray::F32(Array::zeros(&*header.shape)),
364            ZfpDType::F64 => AnyArray::F64(Array::zeros(&*header.shape)),
365        };
366        return Ok(decoded);
367    }
368
369    // Setup zfp structs to begin decompression
370    let stream = ffi::ZfpDecompressionStream::new(encoded);
371
372    // Read the header to reconstruct ZFP's mode
373    let stream = stream.read_header()?;
374
375    // Decompress the field into a newly allocated output array
376    match header.dtype {
377        ZfpDType::I32 => {
378            let mut decompressed = Array::zeros(&*header.shape);
379            stream.decompress_into(decompressed.view_mut().squeeze())?;
380            Ok(AnyArray::I32(decompressed))
381        }
382        ZfpDType::I64 => {
383            let mut decompressed = Array::zeros(&*header.shape);
384            stream.decompress_into(decompressed.view_mut().squeeze())?;
385            Ok(AnyArray::I64(decompressed))
386        }
387        ZfpDType::F32 => {
388            let mut decompressed = Array::zeros(&*header.shape);
389            stream.decompress_into(decompressed.view_mut().squeeze())?;
390            Ok(AnyArray::F32(decompressed))
391        }
392        ZfpDType::F64 => {
393            let mut decompressed = Array::zeros(&*header.shape);
394            stream.decompress_into(decompressed.view_mut().squeeze())?;
395            Ok(AnyArray::F64(decompressed))
396        }
397    }
398}
399
400/// Decompress the `encoded` data into a `decoded` array using ZFP.
401///
402/// # Errors
403///
404/// Errors with
405/// - [`ZfpCodecError::HeaderDecodeFailed`] if decoding the ZFP header failed
406/// - [`ZfpCodecError::MetaHeaderDecodeFailed`] if decoding the array metadata
407///   header failed
408/// - [`ZfpCodecError::MismatchedDecodeIntoArray`] if the `decoded` array is of
409///   the wrong dtype or shape
410/// - [`ZfpCodecError::ZfpDecodeFailed`] if an opaque decoding error occurred
411pub fn decompress_into(encoded: &[u8], decoded: AnyArrayViewMut) -> Result<(), ZfpCodecError> {
412    let (header, encoded) =
413        postcard::take_from_bytes::<CompressionHeader>(encoded).map_err(|err| {
414            ZfpCodecError::MetaHeaderDecodeFailed {
415                source: ZfpHeaderError(err),
416            }
417        })?;
418
419    if decoded.shape() != &*header.shape {
420        return Err(ZfpCodecError::MismatchedDecodeIntoArray {
421            source: AnyArrayAssignError::ShapeMismatch {
422                src: header.shape.into_owned(),
423                dst: decoded.shape().to_vec(),
424            },
425        });
426    }
427
428    // Empty data doesn't need to be initialized
429    if decoded.is_empty() {
430        return Ok(());
431    }
432
433    // Setup zfp structs to begin decompression
434    let stream = ffi::ZfpDecompressionStream::new(encoded);
435
436    // Read the header to reconstruct ZFP's mode
437    let stream = stream.read_header()?;
438
439    // Decompress the field into the output array
440    match (decoded, header.dtype) {
441        (AnyArrayViewMut::I32(decoded), ZfpDType::I32) => stream.decompress_into(decoded.squeeze()),
442        (AnyArrayViewMut::I64(decoded), ZfpDType::I64) => stream.decompress_into(decoded.squeeze()),
443        (AnyArrayViewMut::F32(decoded), ZfpDType::F32) => stream.decompress_into(decoded.squeeze()),
444        (AnyArrayViewMut::F64(decoded), ZfpDType::F64) => stream.decompress_into(decoded.squeeze()),
445        (decoded, dtype) => Err(ZfpCodecError::MismatchedDecodeIntoArray {
446            source: AnyArrayAssignError::DTypeMismatch {
447                src: dtype.into_dtype(),
448                dst: decoded.dtype(),
449            },
450        }),
451    }
452}
453
454#[derive(Serialize, Deserialize)]
455struct CompressionHeader<'a> {
456    dtype: ZfpDType,
457    #[serde(borrow)]
458    shape: Cow<'a, [usize]>,
459    version: ZfpCodecVersion,
460}
461
462/// Dtypes that Zfp can compress and decompress
463#[derive(Copy, Clone, Debug, Serialize, Deserialize)]
464#[expect(missing_docs)]
465pub enum ZfpDType {
466    #[serde(rename = "i32", alias = "int32")]
467    I32,
468    #[serde(rename = "i64", alias = "int64")]
469    I64,
470    #[serde(rename = "f32", alias = "float32")]
471    F32,
472    #[serde(rename = "f64", alias = "float64")]
473    F64,
474}
475
476impl ZfpDType {
477    /// Get the corresponding [`AnyArrayDType`]
478    #[must_use]
479    pub const fn into_dtype(self) -> AnyArrayDType {
480        match self {
481            Self::I32 => AnyArrayDType::I32,
482            Self::I64 => AnyArrayDType::I64,
483            Self::F32 => AnyArrayDType::F32,
484            Self::F64 => AnyArrayDType::F64,
485        }
486    }
487}
488
489impl fmt::Display for ZfpDType {
490    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
491        fmt.write_str(match self {
492            Self::I32 => "i32",
493            Self::I64 => "i64",
494            Self::F32 => "f32",
495            Self::F64 => "f64",
496        })
497    }
498}
499
500#[cfg(test)]
501#[allow(clippy::unwrap_used)]
502mod tests {
503    use ndarray::ArrayView1;
504
505    use super::*;
506
507    #[test]
508    fn zero_length() {
509        let encoded = compress(
510            Array::<f32, _>::from_shape_vec([1, 27, 0].as_slice(), vec![])
511                .unwrap()
512                .view(),
513            &ZfpCompressionMode::FixedPrecision { precision: 7 },
514        )
515        .unwrap();
516        let decoded = decompress(&encoded).unwrap();
517
518        assert_eq!(decoded.dtype(), AnyArrayDType::F32);
519        assert!(decoded.is_empty());
520        assert_eq!(decoded.shape(), &[1, 27, 0]);
521    }
522
523    #[test]
524    fn one_dimension() {
525        let data = Array::from_shape_vec(
526            [2_usize, 1, 2, 1, 1, 1].as_slice(),
527            vec![1.0, 2.0, 3.0, 4.0],
528        )
529        .unwrap();
530
531        let encoded = compress(
532            data.view(),
533            &ZfpCompressionMode::FixedAccuracy { tolerance: 0.1 },
534        )
535        .unwrap();
536        let decoded = decompress(&encoded).unwrap();
537
538        assert_eq!(decoded, AnyArray::F32(data));
539    }
540
541    #[test]
542    fn small_state() {
543        for data in [
544            &[][..],
545            &[0.0],
546            &[0.0, 1.0],
547            &[0.0, 1.0, 0.0],
548            &[0.0, 1.0, 0.0, 1.0],
549        ] {
550            let encoded = compress(
551                ArrayView1::from(data),
552                &ZfpCompressionMode::FixedAccuracy { tolerance: 0.1 },
553            )
554            .unwrap();
555            let decoded = decompress(&encoded).unwrap();
556
557            assert_eq!(
558                decoded,
559                AnyArray::F64(Array1::from_vec(data.to_vec()).into_dyn())
560            );
561        }
562    }
563}