numcodecs_zlib/
lib.rs

1//! [![CI Status]][workflow] [![MSRV]][repo] [![Latest Version]][crates.io] [![Rust Doc Crate]][docs.rs] [![Rust Doc Main]][docs]
2//!
3//! [CI Status]: https://img.shields.io/github/actions/workflow/status/juntyr/numcodecs-rs/ci.yml?branch=main
4//! [workflow]: https://github.com/juntyr/numcodecs-rs/actions/workflows/ci.yml?query=branch%3Amain
5//!
6//! [MSRV]: https://img.shields.io/badge/MSRV-1.85.0-blue
7//! [repo]: https://github.com/juntyr/numcodecs-rs
8//!
9//! [Latest Version]: https://img.shields.io/crates/v/numcodecs-zlib
10//! [crates.io]: https://crates.io/crates/numcodecs-zlib
11//!
12//! [Rust Doc Crate]: https://img.shields.io/docsrs/numcodecs-zlib
13//! [docs.rs]: https://docs.rs/numcodecs-zlib/
14//!
15//! [Rust Doc Main]: https://img.shields.io/badge/docs-main-blue
16//! [docs]: https://juntyr.github.io/numcodecs-rs/numcodecs_zlib
17//!
18//! Zlib codec implementation for the [`numcodecs`] API.
19
20#![expect(clippy::multiple_crate_versions)] // embedded-io
21
22use std::borrow::Cow;
23
24use ndarray::Array1;
25use numcodecs::{
26    AnyArray, AnyArrayAssignError, AnyArrayDType, AnyArrayView, AnyArrayViewMut, AnyCowArray,
27    Codec, StaticCodec, StaticCodecConfig, StaticCodecVersion,
28};
29use schemars::{JsonSchema, JsonSchema_repr};
30use serde::{Deserialize, Serialize};
31use serde_repr::{Deserialize_repr, Serialize_repr};
32use thiserror::Error;
33
34type ZlibCodecVersion = StaticCodecVersion<0, 1, 0>;
35
36#[derive(Clone, Serialize, Deserialize, JsonSchema)]
37#[serde(deny_unknown_fields)]
38/// Codec providing compression using Zlib
39pub struct ZlibCodec {
40    /// Zlib compression level.
41    ///
42    /// The level ranges from 0, no compression, to 9, best compression.
43    pub level: ZlibLevel,
44    /// The codec's encoding format version. Do not provide this parameter explicitly.
45    #[serde(default, rename = "_version")]
46    pub version: ZlibCodecVersion,
47}
48
49#[derive(Copy, Clone, Serialize_repr, Deserialize_repr, JsonSchema_repr)]
50#[repr(u8)]
51/// Zlib compression level.
52///
53/// The level ranges from 0, no compression, to 9, best compression.
54#[expect(missing_docs)]
55pub enum ZlibLevel {
56    ZNoCompression = 0,
57    ZBestSpeed = 1,
58    ZLevel2 = 2,
59    ZLevel3 = 3,
60    ZLevel4 = 4,
61    ZLevel5 = 5,
62    ZLevel6 = 6,
63    ZLevel7 = 7,
64    ZLevel8 = 8,
65    ZBestCompression = 9,
66}
67
68impl Codec for ZlibCodec {
69    type Error = ZlibCodecError;
70
71    fn encode(&self, data: AnyCowArray) -> Result<AnyArray, Self::Error> {
72        compress(data.view(), self.level)
73            .map(|bytes| AnyArray::U8(Array1::from_vec(bytes).into_dyn()))
74    }
75
76    fn decode(&self, encoded: AnyCowArray) -> Result<AnyArray, Self::Error> {
77        let AnyCowArray::U8(encoded) = encoded else {
78            return Err(ZlibCodecError::EncodedDataNotBytes {
79                dtype: encoded.dtype(),
80            });
81        };
82
83        if !matches!(encoded.shape(), [_]) {
84            return Err(ZlibCodecError::EncodedDataNotOneDimensional {
85                shape: encoded.shape().to_vec(),
86            });
87        }
88
89        decompress(&AnyCowArray::U8(encoded).as_bytes())
90    }
91
92    fn decode_into(
93        &self,
94        encoded: AnyArrayView,
95        decoded: AnyArrayViewMut,
96    ) -> Result<(), Self::Error> {
97        let AnyArrayView::U8(encoded) = encoded else {
98            return Err(ZlibCodecError::EncodedDataNotBytes {
99                dtype: encoded.dtype(),
100            });
101        };
102
103        if !matches!(encoded.shape(), [_]) {
104            return Err(ZlibCodecError::EncodedDataNotOneDimensional {
105                shape: encoded.shape().to_vec(),
106            });
107        }
108
109        decompress_into(&AnyArrayView::U8(encoded).as_bytes(), decoded)
110    }
111}
112
113impl StaticCodec for ZlibCodec {
114    const CODEC_ID: &'static str = "zlib.rs";
115
116    type Config<'de> = Self;
117
118    fn from_config(config: Self::Config<'_>) -> Self {
119        config
120    }
121
122    fn get_config(&self) -> StaticCodecConfig<Self> {
123        StaticCodecConfig::from(self)
124    }
125}
126
127#[derive(Debug, Error)]
128/// Errors that may occur when applying the [`ZlibCodec`].
129pub enum ZlibCodecError {
130    /// [`ZlibCodec`] failed to encode the header
131    #[error("Zlib failed to encode the header")]
132    HeaderEncodeFailed {
133        /// Opaque source error
134        source: ZlibHeaderError,
135    },
136    /// [`ZlibCodec`] can only decode one-dimensional byte arrays but received
137    /// an array of a different dtype
138    #[error(
139        "Zlib can only decode one-dimensional byte arrays but received an array of dtype {dtype}"
140    )]
141    EncodedDataNotBytes {
142        /// The unexpected dtype of the encoded array
143        dtype: AnyArrayDType,
144    },
145    /// [`ZlibCodec`] can only decode one-dimensional byte arrays but received
146    /// an array of a different shape
147    #[error(
148        "Zlib can only decode one-dimensional byte arrays but received a byte array of shape {shape:?}"
149    )]
150    EncodedDataNotOneDimensional {
151        /// The unexpected shape of the encoded array
152        shape: Vec<usize>,
153    },
154    /// [`ZlibCodec`] failed to encode the header
155    #[error("Zlib failed to decode the header")]
156    HeaderDecodeFailed {
157        /// Opaque source error
158        source: ZlibHeaderError,
159    },
160    /// [`ZlibCodec`] decode consumed less encoded data, which contains trailing
161    /// junk
162    #[error("Zlib decode consumed less encoded data, which contains trailing junk")]
163    DecodeExcessiveEncodedData,
164    /// [`ZlibCodec`] produced less decoded data than expected
165    #[error("Zlib produced less decoded data than expected")]
166    DecodeProducedLess,
167    /// [`ZlibCodec`] failed to decode the encoded data
168    #[error("Zlib failed to decode the encoded data")]
169    ZlibDecodeFailed {
170        /// Opaque source error
171        source: ZlibDecodeError,
172    },
173    /// [`ZlibCodec`] cannot decode into the provided array
174    #[error("Zlib cannot decode into the provided array")]
175    MismatchedDecodeIntoArray {
176        /// The source of the error
177        #[from]
178        source: AnyArrayAssignError,
179    },
180}
181
182#[derive(Debug, Error)]
183#[error(transparent)]
184/// Opaque error for when encoding or decoding the header fails
185pub struct ZlibHeaderError(postcard::Error);
186
187#[derive(Debug, Error)]
188#[error(transparent)]
189/// Opaque error for when decoding with Zlib fails
190pub struct ZlibDecodeError(miniz_oxide::inflate::DecompressError);
191
192#[expect(clippy::needless_pass_by_value)]
193/// Compress the `array` using Zlib with the provided `level`.
194///
195/// # Errors
196///
197/// Errors with [`ZlibCodecError::HeaderEncodeFailed`] if encoding the header
198/// to the output bytevec failed.
199///
200/// # Panics
201///
202/// Panics if the infallible encoding with Zlib fails.
203pub fn compress(array: AnyArrayView, level: ZlibLevel) -> Result<Vec<u8>, ZlibCodecError> {
204    let data = array.as_bytes();
205
206    let mut encoded = postcard::to_extend(
207        &CompressionHeader {
208            dtype: array.dtype(),
209            shape: Cow::Borrowed(array.shape()),
210            version: StaticCodecVersion,
211        },
212        Vec::new(),
213    )
214    .map_err(|err| ZlibCodecError::HeaderEncodeFailed {
215        source: ZlibHeaderError(err),
216    })?;
217
218    let mut in_pos = 0;
219    let mut out_pos = encoded.len();
220
221    // The comp flags function sets the zlib flag if the window_bits parameter
222    //  is > 0.
223    let flags =
224        miniz_oxide::deflate::core::create_comp_flags_from_zip_params((level as u8).into(), 1, 0);
225    let mut compressor = miniz_oxide::deflate::core::CompressorOxide::new(flags);
226    encoded.resize(encoded.len() + (data.len() / 2).max(2), 0);
227
228    loop {
229        let (Some(data_left), Some(encoded_left)) =
230            (data.get(in_pos..), encoded.get_mut(out_pos..))
231        else {
232            #[expect(clippy::panic)] // this would be a bug and cannot be user-caused
233            {
234                panic!("Zlib encode bug: input or output is out of bounds")
235            }
236        };
237
238        let (status, bytes_in, bytes_out) = miniz_oxide::deflate::core::compress(
239            &mut compressor,
240            data_left,
241            encoded_left,
242            miniz_oxide::deflate::core::TDEFLFlush::Finish,
243        );
244
245        out_pos += bytes_out;
246        in_pos += bytes_in;
247
248        match status {
249            miniz_oxide::deflate::core::TDEFLStatus::Okay => {
250                // We need more space, so resize the vector.
251                if encoded.len().saturating_sub(out_pos) < 30 {
252                    encoded.resize(encoded.len() * 2, 0);
253                }
254            }
255            miniz_oxide::deflate::core::TDEFLStatus::Done => {
256                encoded.truncate(out_pos);
257
258                assert!(
259                    in_pos == data.len(),
260                    "Zlib encode bug: consumed less input than expected"
261                );
262
263                return Ok(encoded);
264            }
265            #[expect(clippy::panic)] // this would be a bug and cannot be user-caused
266            err => panic!("Zlib encode bug: {err:?}"),
267        }
268    }
269}
270
271/// Decompress the `encoded` data into an array using Zlib.
272///
273/// # Errors
274///
275/// Errors with
276/// - [`ZlibCodecError::HeaderDecodeFailed`] if decoding the header failed
277/// - [`ZlibCodecError::DecodeExcessiveEncodedData`] if the encoded data
278///   contains excessive trailing data junk
279/// - [`ZlibCodecError::DecodeProducedLess`] if decoding produced less data than
280///   expected
281/// - [`ZlibCodecError::ZlibDecodeFailed`] if an opaque decoding error occurred
282pub fn decompress(encoded: &[u8]) -> Result<AnyArray, ZlibCodecError> {
283    let (header, encoded) =
284        postcard::take_from_bytes::<CompressionHeader>(encoded).map_err(|err| {
285            ZlibCodecError::HeaderDecodeFailed {
286                source: ZlibHeaderError(err),
287            }
288        })?;
289
290    let (decoded, result) = AnyArray::with_zeros_bytes(header.dtype, &header.shape, |decoded| {
291        decompress_into_bytes(encoded, decoded)
292    });
293
294    result.map(|()| decoded)
295}
296
297/// Decompress the `encoded` data into a `decoded` array using Zlib.
298///
299/// # Errors
300///
301/// Errors with
302/// - [`ZlibCodecError::HeaderDecodeFailed`] if decoding the header failed
303/// - [`ZlibCodecError::MismatchedDecodeIntoArray`] if the `decoded` array is of
304///   the wrong dtype or shape
305/// - [`ZlibCodecError::HeaderDecodeFailed`] if decoding the header failed
306/// - [`ZlibCodecError::DecodeExcessiveEncodedData`] if the encoded data
307///   contains excessive trailing data junk
308/// - [`ZlibCodecError::DecodeProducedLess`] if decoding produced less data than
309///   expected
310/// - [`ZlibCodecError::ZlibDecodeFailed`] if an opaque decoding error occurred
311pub fn decompress_into(encoded: &[u8], mut decoded: AnyArrayViewMut) -> Result<(), ZlibCodecError> {
312    let (header, encoded) =
313        postcard::take_from_bytes::<CompressionHeader>(encoded).map_err(|err| {
314            ZlibCodecError::HeaderDecodeFailed {
315                source: ZlibHeaderError(err),
316            }
317        })?;
318
319    if header.dtype != decoded.dtype() {
320        return Err(ZlibCodecError::MismatchedDecodeIntoArray {
321            source: AnyArrayAssignError::DTypeMismatch {
322                src: header.dtype,
323                dst: decoded.dtype(),
324            },
325        });
326    }
327
328    if header.shape != decoded.shape() {
329        return Err(ZlibCodecError::MismatchedDecodeIntoArray {
330            source: AnyArrayAssignError::ShapeMismatch {
331                src: header.shape.into_owned(),
332                dst: decoded.shape().to_vec(),
333            },
334        });
335    }
336
337    decoded.with_bytes_mut(|decoded| decompress_into_bytes(encoded, decoded))
338}
339
340fn decompress_into_bytes(encoded: &[u8], decoded: &mut [u8]) -> Result<(), ZlibCodecError> {
341    let flags = miniz_oxide::inflate::core::inflate_flags::TINFL_FLAG_PARSE_ZLIB_HEADER
342        | miniz_oxide::inflate::core::inflate_flags::TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF;
343
344    let mut decomp = Box::<miniz_oxide::inflate::core::DecompressorOxide>::default();
345
346    let (status, in_consumed, out_consumed) =
347        miniz_oxide::inflate::core::decompress(&mut decomp, encoded, decoded, 0, flags);
348
349    match status {
350        miniz_oxide::inflate::TINFLStatus::Done => {
351            if in_consumed != encoded.len() {
352                Err(ZlibCodecError::DecodeExcessiveEncodedData)
353            } else if out_consumed == decoded.len() {
354                Ok(())
355            } else {
356                Err(ZlibCodecError::DecodeProducedLess)
357            }
358        }
359        status => Err(ZlibCodecError::ZlibDecodeFailed {
360            source: ZlibDecodeError(miniz_oxide::inflate::DecompressError {
361                status,
362                output: Vec::new(),
363            }),
364        }),
365    }
366}
367
368#[derive(Serialize, Deserialize)]
369struct CompressionHeader<'a> {
370    dtype: AnyArrayDType,
371    #[serde(borrow)]
372    shape: Cow<'a, [usize]>,
373    version: ZlibCodecVersion,
374}