numcodecs_zfp/
lib.rs

1//! [![CI Status]][workflow] [![MSRV]][repo] [![Latest Version]][crates.io] [![Rust Doc Crate]][docs.rs] [![Rust Doc Main]][docs]
2//!
3//! [CI Status]: https://img.shields.io/github/actions/workflow/status/juntyr/numcodecs-rs/ci.yml?branch=main
4//! [workflow]: https://github.com/juntyr/numcodecs-rs/actions/workflows/ci.yml?query=branch%3Amain
5//!
6//! [MSRV]: https://img.shields.io/badge/MSRV-1.87.0-blue
7//! [repo]: https://github.com/juntyr/numcodecs-rs
8//!
9//! [Latest Version]: https://img.shields.io/crates/v/numcodecs-zfp
10//! [crates.io]: https://crates.io/crates/numcodecs-zfp
11//!
12//! [Rust Doc Crate]: https://img.shields.io/docsrs/numcodecs-zfp
13//! [docs.rs]: https://docs.rs/numcodecs-zfp/
14//!
15//! [Rust Doc Main]: https://img.shields.io/badge/docs-main-blue
16//! [docs]: https://juntyr.github.io/numcodecs-rs/numcodecs_zfp
17//!
18//! ZFP codec implementation for the [`numcodecs`] API.
19//!
20//! This implementation uses ZFP's
21//! [`ZFP_ROUNDING_MODE=ZFP_ROUND_FIRST`](https://zfp.readthedocs.io/en/release1.0.1/installation.html#c.ZFP_ROUNDING_MODE)
22//! and
23//! [`ZFP_WITH_TIGHT_ERROR=ON`](https://zfp.readthedocs.io/en/release1.0.1/installation.html#c.ZFP_WITH_TIGHT_ERROR)
24//! experimental features to reduce the bias and correlation in ZFP's errors
25//! (see <https://zfp.readthedocs.io/en/release1.0.1/faq.html#zfp-rounding>).
26//!
27//! This implementation also rejects non-reversibly compressing non-finite
28//! (infinite or NaN) values, since ZFP's behaviour for them is undefined
29//! (see <https://zfp.readthedocs.io/en/release1.0.1/faq.html#q-valid>).
30//!
31//! Please see the `numcodecs-zfp-classic` codec for an implementation that
32//! uses ZFP without these modifications.
33
34#![allow(clippy::multiple_crate_versions)] // embedded-io
35
36use std::{borrow::Cow, fmt};
37
38use ndarray::{Array, Array1, ArrayView, Dimension, Zip};
39use numcodecs::{
40    AnyArray, AnyArrayAssignError, AnyArrayDType, AnyArrayView, AnyArrayViewMut, AnyCowArray,
41    Codec, StaticCodec, StaticCodecConfig, StaticCodecVersion,
42};
43use schemars::JsonSchema;
44use serde::{Deserialize, Serialize};
45use thiserror::Error;
46
47#[cfg(test)]
48use ::serde_json as _;
49
50mod ffi;
51
52type ZfpCodecVersion = StaticCodecVersion<0, 2, 0>;
53
54#[derive(Clone, Serialize, Deserialize, JsonSchema)]
55// serde cannot deny unknown fields because of the flatten
56#[schemars(deny_unknown_fields)]
57/// Codec providing compression using ZFP
58pub struct ZfpCodec {
59    /// ZFP compression mode
60    #[serde(flatten)]
61    pub mode: ZfpCompressionMode,
62    /// ZFP non-finite values mode
63    #[serde(default)]
64    pub non_finite: ZfpNonFiniteValuesMode,
65    /// The codec's encoding format version. Do not provide this parameter explicitly.
66    #[serde(default, rename = "_version")]
67    pub version: ZfpCodecVersion,
68}
69
70#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)]
71#[serde(tag = "mode")]
72#[serde(deny_unknown_fields)]
73/// ZFP compression mode
74pub enum ZfpCompressionMode {
75    #[serde(rename = "expert")]
76    /// The most general mode, which can describe all four other modes
77    Expert {
78        /// Minimum number of compressed bits used to represent a block
79        min_bits: u32,
80        /// Maximum number of bits used to represent a block
81        max_bits: u32,
82        /// Maximum number of bit planes encoded
83        max_prec: u32,
84        /// Smallest absolute bit plane number encoded.
85        ///
86        /// This parameter applies to floating-point data only and is ignored
87        /// for integer data.
88        min_exp: i32,
89    },
90    /// In fixed-rate mode, each d-dimensional compressed block of `$4^d$`
91    /// values is stored using a fixed number of bits. This number of
92    /// compressed bits per block is amortized over the `$4^d$` values to give
93    /// a rate of `$rate = \frac{maxbits}{4^d}$` in bits per value.
94    #[serde(rename = "fixed-rate")]
95    FixedRate {
96        /// Rate in bits per value
97        rate: f64,
98    },
99    /// In fixed-precision mode, the number of bits used to encode a block may
100    /// vary, but the number of bit planes (the precision) encoded for the
101    /// transform coefficients is fixed.
102    #[serde(rename = "fixed-precision")]
103    FixedPrecision {
104        /// Number of bit planes encoded
105        precision: u32,
106    },
107    /// In fixed-accuracy mode, all transform coefficient bit planes up to a
108    /// minimum bit plane number are encoded. The smallest absolute bit plane
109    /// number is chosen such that
110    /// `$minexp = \text{floor}(\log_{2}(tolerance))$`.
111    #[serde(rename = "fixed-accuracy")]
112    FixedAccuracy {
113        /// Absolute error tolerance
114        tolerance: f64,
115    },
116    /// Lossless per-block compression that preserves integer and floating point
117    /// bit patterns.
118    #[serde(rename = "reversible")]
119    Reversible,
120}
121
122#[derive(Copy, Clone, Debug, Default, Serialize, Deserialize, JsonSchema)]
123/// ZFP non-finite values mode
124pub enum ZfpNonFiniteValuesMode {
125    /// Deny compressing non-finite values, i.e. return an error.
126    #[default]
127    #[serde(rename = "deny")]
128    Deny,
129    /// Unsafely allow compressing non-finite values, even though undefined
130    /// behaviour may be triggered, see
131    /// <https://zfp.readthedocs.io/en/release1.0.1/faq.html#q-valid>.
132    #[serde(rename = "allow-unsafe")]
133    AllowUnsafe,
134}
135
136impl Codec for ZfpCodec {
137    type Error = ZfpCodecError;
138
139    fn encode(&self, data: AnyCowArray) -> Result<AnyArray, Self::Error> {
140        if matches!(data.dtype(), AnyArrayDType::I32 | AnyArrayDType::I64)
141            && matches!(
142                self.mode,
143                ZfpCompressionMode::FixedAccuracy { tolerance: _ }
144            )
145        {
146            return Err(ZfpCodecError::FixedAccuracyModeIntegerData);
147        }
148
149        match data {
150            AnyCowArray::I32(data) => Ok(AnyArray::U8(
151                Array1::from(compress(data.view(), &self.mode, self.non_finite)?).into_dyn(),
152            )),
153            AnyCowArray::I64(data) => Ok(AnyArray::U8(
154                Array1::from(compress(data.view(), &self.mode, self.non_finite)?).into_dyn(),
155            )),
156            AnyCowArray::F32(data) => Ok(AnyArray::U8(
157                Array1::from(compress(data.view(), &self.mode, self.non_finite)?).into_dyn(),
158            )),
159            AnyCowArray::F64(data) => Ok(AnyArray::U8(
160                Array1::from(compress(data.view(), &self.mode, self.non_finite)?).into_dyn(),
161            )),
162            encoded => Err(ZfpCodecError::UnsupportedDtype(encoded.dtype())),
163        }
164    }
165
166    fn decode(&self, encoded: AnyCowArray) -> Result<AnyArray, Self::Error> {
167        let AnyCowArray::U8(encoded) = encoded else {
168            return Err(ZfpCodecError::EncodedDataNotBytes {
169                dtype: encoded.dtype(),
170            });
171        };
172
173        if !matches!(encoded.shape(), [_]) {
174            return Err(ZfpCodecError::EncodedDataNotOneDimensional {
175                shape: encoded.shape().to_vec(),
176            });
177        }
178
179        decompress(&AnyCowArray::U8(encoded).as_bytes())
180    }
181
182    fn decode_into(
183        &self,
184        encoded: AnyArrayView,
185        decoded: AnyArrayViewMut,
186    ) -> Result<(), Self::Error> {
187        let AnyArrayView::U8(encoded) = encoded else {
188            return Err(ZfpCodecError::EncodedDataNotBytes {
189                dtype: encoded.dtype(),
190            });
191        };
192
193        if !matches!(encoded.shape(), [_]) {
194            return Err(ZfpCodecError::EncodedDataNotOneDimensional {
195                shape: encoded.shape().to_vec(),
196            });
197        }
198
199        decompress_into(&AnyArrayView::U8(encoded).as_bytes(), decoded)
200    }
201}
202
203impl StaticCodec for ZfpCodec {
204    const CODEC_ID: &'static str = "zfp.rs";
205
206    type Config<'de> = Self;
207
208    fn from_config(config: Self::Config<'_>) -> Self {
209        config
210    }
211
212    fn get_config(&self) -> StaticCodecConfig<'_, Self> {
213        StaticCodecConfig::from(self)
214    }
215}
216
217#[derive(Debug, Error)]
218/// Errors that may occur when applying the [`ZfpCodec`].
219pub enum ZfpCodecError {
220    /// [`ZfpCodec`] does not support the dtype
221    #[error("Zfp does not support the dtype {0}")]
222    UnsupportedDtype(AnyArrayDType),
223    /// [`ZfpCodec`] does not support the fixed accuracy mode for integer data
224    #[error("Zfp does not support the fixed accuracy mode for integer data")]
225    FixedAccuracyModeIntegerData,
226    /// [`ZfpCodec`] only supports 1-4 dimensional data
227    #[error("Zfp only supports 1-4 dimensional data but found shape {shape:?}")]
228    ExcessiveDimensionality {
229        /// The unexpected shape of the data
230        shape: Vec<usize>,
231    },
232    /// [`ZfpCodec`] was configured with an invalid expert `mode`
233    #[error("Zfp was configured with an invalid expert mode {mode:?}")]
234    InvalidExpertMode {
235        /// The unexpected compression mode
236        mode: ZfpCompressionMode,
237    },
238    /// [`ZfpCodec`] does not support non-finite (infinite or NaN) floating
239    /// point data  in non-reversible lossy compression
240    #[error(
241        "Zfp does not support non-finite (infinite or NaN) floating point data in non-reversible lossy compression"
242    )]
243    NonFiniteData,
244    /// [`ZfpCodec`] failed to encode the header
245    #[error("Zfp failed to encode the header")]
246    HeaderEncodeFailed,
247    /// [`ZfpCodec`] failed to encode the array metadata header
248    #[error("Zfp failed to encode the array metadata header")]
249    MetaHeaderEncodeFailed {
250        /// Opaque source error
251        source: ZfpHeaderError,
252    },
253    /// [`ZfpCodec`] failed to encode the data
254    #[error("Zfp failed to encode the data")]
255    ZfpEncodeFailed,
256    /// [`ZfpCodec`] can only decode one-dimensional byte arrays but received
257    /// an array of a different dtype
258    #[error(
259        "Zfp can only decode one-dimensional byte arrays but received an array of dtype {dtype}"
260    )]
261    EncodedDataNotBytes {
262        /// The unexpected dtype of the encoded array
263        dtype: AnyArrayDType,
264    },
265    /// [`ZfpCodec`] can only decode one-dimensional byte arrays but received
266    /// an array of a different shape
267    #[error(
268        "Zfp can only decode one-dimensional byte arrays but received a byte array of shape {shape:?}"
269    )]
270    EncodedDataNotOneDimensional {
271        /// The unexpected shape of the encoded array
272        shape: Vec<usize>,
273    },
274    /// [`ZfpCodec`] failed to decode the header
275    #[error("Zfp failed to decode the header")]
276    HeaderDecodeFailed,
277    /// [`ZfpCodec`] failed to decode the array metadata header
278    #[error("Zfp failed to decode the array metadata header")]
279    MetaHeaderDecodeFailed {
280        /// Opaque source error
281        source: ZfpHeaderError,
282    },
283    /// [`ZfpCodec`] cannot decode into the provided array
284    #[error("ZfpCodec cannot decode into the provided array")]
285    MismatchedDecodeIntoArray {
286        /// The source of the error
287        #[from]
288        source: AnyArrayAssignError,
289    },
290    /// [`ZfpCodec`] failed to decode the data
291    #[error("Zfp failed to decode the data")]
292    ZfpDecodeFailed,
293}
294
295#[derive(Debug, Error)]
296#[error(transparent)]
297/// Opaque error for when encoding or decoding the header fails
298pub struct ZfpHeaderError(postcard::Error);
299
300/// Compress the `data` array using ZFP with the provided `mode`.
301///
302/// # Errors
303///
304/// Errors with
305/// - [`ZfpCodecError::NonFiniteData`] if any data element is non-finite
306///   (infinite or NaN) and a non-reversible lossy compression `mode` is used
307///   and the `non_finite` mode is not [`ZfpNonFiniteValuesMode::AllowUnsafe`]
308/// - [`ZfpCodecError::ExcessiveDimensionality`] if data is more than
309///   4-dimensional
310/// - [`ZfpCodecError::InvalidExpertMode`] if the `mode` has invalid expert mode
311///   parameters
312/// - [`ZfpCodecError::HeaderEncodeFailed`] if encoding the ZFP header failed
313/// - [`ZfpCodecError::MetaHeaderEncodeFailed`] if encoding the array metadata
314///   header failed
315/// - [`ZfpCodecError::ZfpEncodeFailed`] if an opaque encoding error occurred
316pub fn compress<T: ffi::ZfpCompressible, D: Dimension>(
317    data: ArrayView<T, D>,
318    mode: &ZfpCompressionMode,
319    non_finite: ZfpNonFiniteValuesMode,
320) -> Result<Vec<u8>, ZfpCodecError> {
321    if !matches!(mode, ZfpCompressionMode::Reversible)
322        && !matches!(non_finite, ZfpNonFiniteValuesMode::AllowUnsafe)
323        && !Zip::from(&data).all(|x| x.is_finite())
324    {
325        return Err(ZfpCodecError::NonFiniteData);
326    }
327
328    let mut encoded = postcard::to_extend(
329        &CompressionHeader {
330            dtype: <T as ffi::ZfpCompressible>::D_TYPE,
331            shape: Cow::Borrowed(data.shape()),
332            version: StaticCodecVersion,
333        },
334        Vec::new(),
335    )
336    .map_err(|err| ZfpCodecError::MetaHeaderEncodeFailed {
337        source: ZfpHeaderError(err),
338    })?;
339
340    // ZFP cannot handle zero-length dimensions
341    if data.is_empty() {
342        return Ok(encoded);
343    }
344
345    // Setup zfp structs to begin compression
346    // Squeeze the data to avoid wasting ZFP dimensions on axes of length 1
347    let field = ffi::ZfpField::new(data.into_dyn().squeeze())?;
348    let stream = ffi::ZfpCompressionStream::new(&field, mode)?;
349
350    // Allocate space based on the maximum size potentially required by zfp to
351    //  store the compressed array
352    let stream = stream.with_bitstream(field, &mut encoded);
353
354    // Write the header so we can reconstruct ZFP's mode on decompression
355    let stream = stream.write_header()?;
356
357    // Compress the field into the allocated output array
358    stream.compress()?;
359
360    Ok(encoded)
361}
362
363/// Decompress the `encoded` data into an array using ZFP.
364///
365/// # Errors
366///
367/// Errors with
368/// - [`ZfpCodecError::HeaderDecodeFailed`] if decoding the ZFP header failed
369/// - [`ZfpCodecError::MetaHeaderDecodeFailed`] if decoding the array metadata
370///   header failed
371/// - [`ZfpCodecError::ZfpDecodeFailed`] if an opaque decoding error occurred
372pub fn decompress(encoded: &[u8]) -> Result<AnyArray, ZfpCodecError> {
373    let (header, encoded) =
374        postcard::take_from_bytes::<CompressionHeader>(encoded).map_err(|err| {
375            ZfpCodecError::MetaHeaderDecodeFailed {
376                source: ZfpHeaderError(err),
377            }
378        })?;
379
380    // Return empty data for zero-size arrays
381    if header.shape.iter().copied().product::<usize>() == 0 {
382        let decoded = match header.dtype {
383            ZfpDType::I32 => AnyArray::I32(Array::zeros(&*header.shape)),
384            ZfpDType::I64 => AnyArray::I64(Array::zeros(&*header.shape)),
385            ZfpDType::F32 => AnyArray::F32(Array::zeros(&*header.shape)),
386            ZfpDType::F64 => AnyArray::F64(Array::zeros(&*header.shape)),
387        };
388        return Ok(decoded);
389    }
390
391    // Setup zfp structs to begin decompression
392    let stream = ffi::ZfpDecompressionStream::new(encoded);
393
394    // Read the header to reconstruct ZFP's mode
395    let stream = stream.read_header()?;
396
397    // Decompress the field into a newly allocated output array
398    match header.dtype {
399        ZfpDType::I32 => {
400            let mut decompressed = Array::zeros(&*header.shape);
401            stream.decompress_into(decompressed.view_mut().squeeze())?;
402            Ok(AnyArray::I32(decompressed))
403        }
404        ZfpDType::I64 => {
405            let mut decompressed = Array::zeros(&*header.shape);
406            stream.decompress_into(decompressed.view_mut().squeeze())?;
407            Ok(AnyArray::I64(decompressed))
408        }
409        ZfpDType::F32 => {
410            let mut decompressed = Array::zeros(&*header.shape);
411            stream.decompress_into(decompressed.view_mut().squeeze())?;
412            Ok(AnyArray::F32(decompressed))
413        }
414        ZfpDType::F64 => {
415            let mut decompressed = Array::zeros(&*header.shape);
416            stream.decompress_into(decompressed.view_mut().squeeze())?;
417            Ok(AnyArray::F64(decompressed))
418        }
419    }
420}
421
422/// Decompress the `encoded` data into a `decoded` array using ZFP.
423///
424/// # Errors
425///
426/// Errors with
427/// - [`ZfpCodecError::HeaderDecodeFailed`] if decoding the ZFP header failed
428/// - [`ZfpCodecError::MetaHeaderDecodeFailed`] if decoding the array metadata
429///   header failed
430/// - [`ZfpCodecError::MismatchedDecodeIntoArray`] if the `decoded` array is of
431///   the wrong dtype or shape
432/// - [`ZfpCodecError::ZfpDecodeFailed`] if an opaque decoding error occurred
433pub fn decompress_into(encoded: &[u8], decoded: AnyArrayViewMut) -> Result<(), ZfpCodecError> {
434    let (header, encoded) =
435        postcard::take_from_bytes::<CompressionHeader>(encoded).map_err(|err| {
436            ZfpCodecError::MetaHeaderDecodeFailed {
437                source: ZfpHeaderError(err),
438            }
439        })?;
440
441    if decoded.shape() != &*header.shape {
442        return Err(ZfpCodecError::MismatchedDecodeIntoArray {
443            source: AnyArrayAssignError::ShapeMismatch {
444                src: header.shape.into_owned(),
445                dst: decoded.shape().to_vec(),
446            },
447        });
448    }
449
450    // Empty data doesn't need to be initialized
451    if decoded.is_empty() {
452        return Ok(());
453    }
454
455    // Setup zfp structs to begin decompression
456    let stream = ffi::ZfpDecompressionStream::new(encoded);
457
458    // Read the header to reconstruct ZFP's mode
459    let stream = stream.read_header()?;
460
461    // Decompress the field into the output array
462    match (decoded, header.dtype) {
463        (AnyArrayViewMut::I32(decoded), ZfpDType::I32) => stream.decompress_into(decoded.squeeze()),
464        (AnyArrayViewMut::I64(decoded), ZfpDType::I64) => stream.decompress_into(decoded.squeeze()),
465        (AnyArrayViewMut::F32(decoded), ZfpDType::F32) => stream.decompress_into(decoded.squeeze()),
466        (AnyArrayViewMut::F64(decoded), ZfpDType::F64) => stream.decompress_into(decoded.squeeze()),
467        (decoded, dtype) => Err(ZfpCodecError::MismatchedDecodeIntoArray {
468            source: AnyArrayAssignError::DTypeMismatch {
469                src: dtype.into_dtype(),
470                dst: decoded.dtype(),
471            },
472        }),
473    }
474}
475
476#[derive(Serialize, Deserialize)]
477struct CompressionHeader<'a> {
478    dtype: ZfpDType,
479    #[serde(borrow)]
480    shape: Cow<'a, [usize]>,
481    version: ZfpCodecVersion,
482}
483
484/// Dtypes that Zfp can compress and decompress
485#[derive(Copy, Clone, Debug, Serialize, Deserialize)]
486#[expect(missing_docs)]
487pub enum ZfpDType {
488    #[serde(rename = "i32", alias = "int32")]
489    I32,
490    #[serde(rename = "i64", alias = "int64")]
491    I64,
492    #[serde(rename = "f32", alias = "float32")]
493    F32,
494    #[serde(rename = "f64", alias = "float64")]
495    F64,
496}
497
498impl ZfpDType {
499    /// Get the corresponding [`AnyArrayDType`]
500    #[must_use]
501    pub const fn into_dtype(self) -> AnyArrayDType {
502        match self {
503            Self::I32 => AnyArrayDType::I32,
504            Self::I64 => AnyArrayDType::I64,
505            Self::F32 => AnyArrayDType::F32,
506            Self::F64 => AnyArrayDType::F64,
507        }
508    }
509}
510
511impl fmt::Display for ZfpDType {
512    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
513        fmt.write_str(match self {
514            Self::I32 => "i32",
515            Self::I64 => "i64",
516            Self::F32 => "f32",
517            Self::F64 => "f64",
518        })
519    }
520}
521
522#[cfg(test)]
523#[allow(clippy::unwrap_used)]
524mod tests {
525    use ndarray::ArrayView1;
526
527    use super::*;
528
529    #[test]
530    fn zero_length() {
531        let encoded = compress(
532            Array::<f32, _>::from_shape_vec([1, 27, 0].as_slice(), vec![])
533                .unwrap()
534                .view(),
535            &ZfpCompressionMode::FixedPrecision { precision: 7 },
536            ZfpNonFiniteValuesMode::Deny,
537        )
538        .unwrap();
539        let decoded = decompress(&encoded).unwrap();
540
541        assert_eq!(decoded.dtype(), AnyArrayDType::F32);
542        assert!(decoded.is_empty());
543        assert_eq!(decoded.shape(), &[1, 27, 0]);
544    }
545
546    #[test]
547    fn one_dimension() {
548        let data = Array::from_shape_vec(
549            [2_usize, 1, 2, 1, 1, 1].as_slice(),
550            vec![1.0, 2.0, 3.0, 4.0],
551        )
552        .unwrap();
553
554        let encoded = compress(
555            data.view(),
556            &ZfpCompressionMode::FixedAccuracy { tolerance: 0.1 },
557            ZfpNonFiniteValuesMode::Deny,
558        )
559        .unwrap();
560        let decoded = decompress(&encoded).unwrap();
561
562        assert_eq!(decoded, AnyArray::F32(data));
563    }
564
565    #[test]
566    fn small_state() {
567        for data in [
568            &[][..],
569            &[0.0],
570            &[0.0, 1.0],
571            &[0.0, 1.0, 0.0],
572            &[0.0, 1.0, 0.0, 1.0],
573        ] {
574            let encoded = compress(
575                ArrayView1::from(data),
576                &ZfpCompressionMode::FixedAccuracy { tolerance: 0.1 },
577                ZfpNonFiniteValuesMode::Deny,
578            )
579            .unwrap();
580            let decoded = decompress(&encoded).unwrap();
581
582            assert_eq!(
583                decoded,
584                AnyArray::F64(Array1::from_vec(data.to_vec()).into_dyn())
585            );
586        }
587    }
588}