numcodecs/
codec.rs

1use std::{borrow::Cow, error::Error, fmt, marker::PhantomData};
2
3use schemars::{generate::SchemaSettings, json_schema, JsonSchema, Schema, SchemaGenerator};
4use semver::{Version, VersionReq};
5use serde::{Deserialize, Deserializer, Serialize, Serializer};
6use serde_json::Value;
7
8use crate::{AnyArray, AnyArrayView, AnyArrayViewMut, AnyCowArray};
9
10/// Compression codec that [`encode`][`Codec::encode`]s and
11/// [`decode`][`Codec::decode`]s numeric n-dimensional arrays.
12pub trait Codec: 'static + Send + Sync + Clone {
13    /// Error type that may be returned during [`encode`][`Codec::encode`]ing
14    /// and [`decode`][`Codec::decode`]ing.
15    type Error: 'static + Send + Sync + Error;
16
17    /// Encodes the `data` and returns the result.
18    ///
19    /// # Errors
20    ///
21    /// Errors if encoding the buffer fails.
22    fn encode(&self, data: AnyCowArray) -> Result<AnyArray, Self::Error>;
23
24    /// Decodes the `encoded` data and returns the result.
25    ///
26    /// # Errors
27    ///
28    /// Errors if decoding the buffer fails.
29    fn decode(&self, encoded: AnyCowArray) -> Result<AnyArray, Self::Error>;
30
31    /// Decodes the `encoded` data and writes the result into the provided
32    /// `decoded` output.
33    ///
34    /// The output must have the correct type and shape.
35    ///
36    /// # Errors
37    ///
38    /// Errors if decoding the buffer fails.
39    fn decode_into(
40        &self,
41        encoded: AnyArrayView,
42        decoded: AnyArrayViewMut,
43    ) -> Result<(), Self::Error>;
44}
45
46/// Statically typed compression codec.
47pub trait StaticCodec: Codec {
48    /// Codec identifier.
49    const CODEC_ID: &'static str;
50
51    /// Configuration type, from which the codec can be created infallibly.
52    ///
53    /// The `config` must *not* contain an `id` field.
54    ///
55    /// The config *must* be compatible with JSON encoding and have a schema.
56    type Config<'de>: Serialize + Deserialize<'de> + JsonSchema;
57
58    /// Instantiate a codec from its `config`uration.
59    fn from_config(config: Self::Config<'_>) -> Self;
60
61    /// Get the configuration for this codec.
62    ///
63    /// The [`StaticCodecConfig`] ensures that the returned config includes an
64    /// `id` field with the codec's [`StaticCodec::CODEC_ID`].
65    fn get_config(&self) -> StaticCodecConfig<Self>;
66}
67
68/// Dynamically typed compression codec.
69///
70/// Every codec that implements [`StaticCodec`] also implements [`DynCodec`].
71pub trait DynCodec: Codec {
72    /// Type object type for this codec.
73    type Type: DynCodecType;
74
75    /// Returns the type object for this codec.
76    fn ty(&self) -> Self::Type;
77
78    /// Serializes the configuration parameters for this codec.
79    ///
80    /// The config *must* include an `id` field with the
81    /// [`DynCodecType::codec_id`], for which the
82    /// [`serialize_codec_config_with_id`] helper function may be used.
83    ///
84    /// The config *must* be compatible with JSON encoding.
85    ///
86    /// # Errors
87    ///
88    /// Errors if serializing the codec configuration fails.
89    fn get_config<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error>;
90}
91
92/// Type object for dynamically typed compression codecs.
93pub trait DynCodecType: 'static + Send + Sync {
94    /// Type of the instances of this codec type object.
95    type Codec: DynCodec<Type = Self>;
96
97    /// Codec identifier.
98    fn codec_id(&self) -> &str;
99
100    /// JSON schema for the codec's configuration.
101    fn codec_config_schema(&self) -> Schema;
102
103    /// Instantiate a codec of this type from a serialized `config`uration.
104    ///
105    /// The `config` must *not* contain an `id` field. If the `config` *may*
106    /// contain one, use the [`codec_from_config_with_id`] helper function.
107    ///
108    /// The `config` *must* be compatible with JSON encoding.
109    ///
110    /// # Errors
111    ///
112    /// Errors if constructing the codec fails.
113    fn codec_from_config<'de, D: Deserializer<'de>>(
114        &self,
115        config: D,
116    ) -> Result<Self::Codec, D::Error>;
117}
118
119impl<T: StaticCodec> DynCodec for T {
120    type Type = StaticCodecType<Self>;
121
122    fn ty(&self) -> Self::Type {
123        StaticCodecType::of()
124    }
125
126    fn get_config<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
127        <T as StaticCodec>::get_config(self).serialize(serializer)
128    }
129}
130
131/// Type object for statically typed compression codecs.
132pub struct StaticCodecType<T: StaticCodec> {
133    _marker: PhantomData<T>,
134}
135
136impl<T: StaticCodec> StaticCodecType<T> {
137    /// Statically obtain the type for a statically typed codec.
138    #[must_use]
139    pub const fn of() -> Self {
140        Self {
141            _marker: PhantomData::<T>,
142        }
143    }
144}
145
146impl<T: StaticCodec> DynCodecType for StaticCodecType<T> {
147    type Codec = T;
148
149    fn codec_id(&self) -> &str {
150        T::CODEC_ID
151    }
152
153    fn codec_config_schema(&self) -> Schema {
154        let mut settings = SchemaSettings::draft2020_12();
155        // TODO: perhaps this could be done as a more generally applicable
156        //       transformation instead
157        settings.inline_subschemas = true;
158        settings
159            .into_generator()
160            .into_root_schema_for::<T::Config<'static>>()
161    }
162
163    fn codec_from_config<'de, D: Deserializer<'de>>(
164        &self,
165        config: D,
166    ) -> Result<Self::Codec, D::Error> {
167        let config = T::Config::deserialize(config)?;
168        Ok(T::from_config(config))
169    }
170}
171
172/// Utility struct to serialize a [`StaticCodec`]'s [`StaticCodec::Config`]
173/// together with its [`StaticCodec::CODEC_ID`]
174#[derive(Serialize, Deserialize)]
175#[serde(bound = "")]
176pub struct StaticCodecConfig<'a, T: StaticCodec> {
177    #[serde(default)]
178    id: StaticCodecId<T>,
179    /// The configuration parameters
180    #[serde(flatten)]
181    #[serde(borrow)]
182    pub config: T::Config<'a>,
183}
184
185impl<'a, T: StaticCodec> StaticCodecConfig<'a, T> {
186    /// Wraps the `config` so that it can be serialized together with its
187    /// [`StaticCodec::CODEC_ID`]
188    #[must_use]
189    pub const fn new(config: T::Config<'a>) -> Self {
190        Self {
191            id: StaticCodecId::of(),
192            config,
193        }
194    }
195}
196
197impl<'a, T: StaticCodec> From<&T::Config<'a>> for StaticCodecConfig<'a, T>
198where
199    T::Config<'a>: Clone,
200{
201    fn from(config: &T::Config<'a>) -> Self {
202        Self::new(config.clone())
203    }
204}
205
206struct StaticCodecId<T: StaticCodec>(PhantomData<T>);
207
208impl<T: StaticCodec> StaticCodecId<T> {
209    #[must_use]
210    pub const fn of() -> Self {
211        Self(PhantomData::<T>)
212    }
213}
214
215impl<T: StaticCodec> Default for StaticCodecId<T> {
216    fn default() -> Self {
217        Self::of()
218    }
219}
220
221impl<T: StaticCodec> Serialize for StaticCodecId<T> {
222    fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
223        T::CODEC_ID.serialize(serializer)
224    }
225}
226
227impl<'de, T: StaticCodec> Deserialize<'de> for StaticCodecId<T> {
228    fn deserialize<D: Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
229        let id = Cow::<str>::deserialize(deserializer)?;
230        let id = &*id;
231
232        if id != T::CODEC_ID {
233            return Err(serde::de::Error::custom(format!(
234                "expected codec id {:?} but found {id:?}",
235                T::CODEC_ID,
236            )));
237        }
238
239        Ok(Self::of())
240    }
241}
242
243/// Utility function to serialize a codec's config together with its
244/// [`DynCodecType::codec_id`].
245///
246/// This function may be useful when implementing the [`DynCodec::get_config`]
247/// method.
248///
249/// # Errors
250///
251/// Errors if serializing the codec configuration fails.
252pub fn serialize_codec_config_with_id<T: Serialize, C: DynCodec, S: Serializer>(
253    config: &T,
254    codec: &C,
255    serializer: S,
256) -> Result<S::Ok, S::Error> {
257    #[derive(Serialize)]
258    struct DynCodecConfigWithId<'a, T> {
259        id: &'a str,
260        #[serde(flatten)]
261        config: &'a T,
262    }
263
264    DynCodecConfigWithId {
265        id: codec.ty().codec_id(),
266        config,
267    }
268    .serialize(serializer)
269}
270
271/// Utility function to instantiate a codec of the given `ty`, where the
272/// `config` *may* still contain an `id` field.
273///
274/// If the `config` does *not* contain an `id` field, use
275/// [`DynCodecType::codec_from_config`] instead.
276///
277/// # Errors
278///
279/// Errors if constructing the codec fails.
280pub fn codec_from_config_with_id<'de, T: DynCodecType, D: Deserializer<'de>>(
281    ty: &T,
282    config: D,
283) -> Result<T::Codec, D::Error> {
284    let mut config = Value::deserialize(config)?;
285
286    if let Some(config) = config.as_object_mut() {
287        if let Some(id) = config.remove("id") {
288            let codec_id = ty.codec_id();
289
290            if !matches!(id, Value::String(ref id) if id == codec_id) {
291                return Err(serde::de::Error::custom(format!(
292                    "expected codec id {codec_id:?} but found {id}"
293                )));
294            }
295        }
296    }
297
298    ty.codec_from_config(config)
299        .map_err(serde::de::Error::custom)
300}
301
302/// Marker type that represents the semantic version of a codec.
303///
304/// The codec's version can be decoupled from its implementation version to
305/// allow implementation changes that have no effect on the codec's semantics
306/// or encoded representation.
307///
308/// `StaticCodecVersion`s serialize transparently to their equivalent
309/// [`Version`]s. On deserialization, the deserialized [`Version`] is checked
310/// to be compatible (`^`) with the `StaticCodecVersion`, i.e. the
311/// `StaticCodecVersion` must be of a the same or a newer compatible version.
312pub struct StaticCodecVersion<const MAJOR: u64, const MINOR: u64, const PATCH: u64>;
313
314impl<const MAJOR: u64, const MINOR: u64, const PATCH: u64> StaticCodecVersion<MAJOR, MINOR, PATCH> {
315    /// Extract the semantic version.
316    #[must_use]
317    pub const fn version() -> Version {
318        Version::new(MAJOR, MINOR, PATCH)
319    }
320}
321
322#[expect(clippy::expl_impl_clone_on_copy)]
323impl<const MAJOR: u64, const MINOR: u64, const PATCH: u64> Clone
324    for StaticCodecVersion<MAJOR, MINOR, PATCH>
325{
326    fn clone(&self) -> Self {
327        *self
328    }
329}
330
331impl<const MAJOR: u64, const MINOR: u64, const PATCH: u64> Copy
332    for StaticCodecVersion<MAJOR, MINOR, PATCH>
333{
334}
335
336impl<const MAJOR: u64, const MINOR: u64, const PATCH: u64> fmt::Debug
337    for StaticCodecVersion<MAJOR, MINOR, PATCH>
338{
339    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
340        <semver::Version as fmt::Debug>::fmt(&Self::version(), fmt)
341    }
342}
343
344impl<const MAJOR: u64, const MINOR: u64, const PATCH: u64> fmt::Display
345    for StaticCodecVersion<MAJOR, MINOR, PATCH>
346{
347    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
348        <semver::Version as fmt::Display>::fmt(&Self::version(), fmt)
349    }
350}
351
352impl<const MAJOR: u64, const MINOR: u64, const PATCH: u64> Default
353    for StaticCodecVersion<MAJOR, MINOR, PATCH>
354{
355    fn default() -> Self {
356        Self
357    }
358}
359
360impl<const MAJOR: u64, const MINOR: u64, const PATCH: u64> Serialize
361    for StaticCodecVersion<MAJOR, MINOR, PATCH>
362{
363    fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
364        Self::version().serialize(serializer)
365    }
366}
367
368impl<'de, const MAJOR: u64, const MINOR: u64, const PATCH: u64> Deserialize<'de>
369    for StaticCodecVersion<MAJOR, MINOR, PATCH>
370{
371    fn deserialize<D: Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
372        let version = Version::deserialize(deserializer)?;
373
374        let requirement = VersionReq {
375            comparators: vec![semver::Comparator {
376                op: semver::Op::Caret,
377                major: version.major,
378                minor: Some(version.minor),
379                patch: Some(version.patch),
380                pre: version.pre,
381            }],
382        };
383
384        if !requirement.matches(&Self::version()) {
385            return Err(serde::de::Error::custom(format!(
386                "{Self} does not fulfil {requirement}"
387            )));
388        }
389
390        Ok(Self)
391    }
392}
393
394impl<const MAJOR: u64, const MINOR: u64, const PATCH: u64> JsonSchema
395    for StaticCodecVersion<MAJOR, MINOR, PATCH>
396{
397    fn schema_name() -> Cow<'static, str> {
398        Cow::Borrowed("StaticCodecVersion")
399    }
400
401    fn schema_id() -> Cow<'static, str> {
402        Cow::Borrowed(concat!(module_path!(), "::", "StaticCodecVersion"))
403    }
404
405    fn json_schema(_gen: &mut SchemaGenerator) -> Schema {
406        json_schema!({
407            "type": "string",
408            "pattern": r"^(0|[1-9]\d*)\.(0|[1-9]\d*)\.(0|[1-9]\d*)(?:-((?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\.(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\+([0-9a-zA-Z-]+(?:\.[0-9a-zA-Z-]+)*))?$",
409            "description": "A semver.org compliant semantic version number.",
410        })
411    }
412}