numcodecs/
codec.rs

1use std::{borrow::Cow, error::Error, marker::PhantomData};
2
3use schemars::{generate::SchemaSettings, JsonSchema, Schema};
4use serde::{Deserialize, Deserializer, Serialize, Serializer};
5use serde_json::Value;
6
7use crate::{AnyArray, AnyArrayView, AnyArrayViewMut, AnyCowArray};
8
9/// Compression codec that [`encode`][`Codec::encode`]s and
10/// [`decode`][`Codec::decode`]s numeric n-dimensional arrays.
11pub trait Codec: 'static + Send + Sync + Clone {
12    /// Error type that may be returned during [`encode`][`Codec::encode`]ing
13    /// and [`decode`][`Codec::decode`]ing.
14    type Error: 'static + Send + Sync + Error;
15
16    /// Encodes the `data` and returns the result.
17    ///
18    /// # Errors
19    ///
20    /// Errors if encoding the buffer fails.
21    fn encode(&self, data: AnyCowArray) -> Result<AnyArray, Self::Error>;
22
23    /// Decodes the `encoded` data and returns the result.
24    ///
25    /// # Errors
26    ///
27    /// Errors if decoding the buffer fails.
28    fn decode(&self, encoded: AnyCowArray) -> Result<AnyArray, Self::Error>;
29
30    /// Decodes the `encoded` data and writes the result into the provided
31    /// `decoded` output.
32    ///
33    /// The output must have the correct type and shape.
34    ///
35    /// # Errors
36    ///
37    /// Errors if decoding the buffer fails.
38    fn decode_into(
39        &self,
40        encoded: AnyArrayView,
41        decoded: AnyArrayViewMut,
42    ) -> Result<(), Self::Error>;
43}
44
45/// Statically typed compression codec.
46pub trait StaticCodec: Codec {
47    /// Codec identifier.
48    const CODEC_ID: &'static str;
49
50    /// Configuration type, from which the codec can be created infallibly.
51    ///
52    /// The `config` must *not* contain an `id` field.
53    ///
54    /// The config *must* be compatible with JSON encoding and have a schema.
55    type Config<'de>: Serialize + Deserialize<'de> + JsonSchema;
56
57    /// Instantiate a codec from its `config`uration.
58    fn from_config(config: Self::Config<'_>) -> Self;
59
60    /// Get the configuration for this codec.
61    ///
62    /// The [`StaticCodecConfig`] ensures that the returned config includes an
63    /// `id` field with the codec's [`StaticCodec::CODEC_ID`].
64    fn get_config(&self) -> StaticCodecConfig<Self>;
65}
66
67/// Dynamically typed compression codec.
68///
69/// Every codec that implements [`StaticCodec`] also implements [`DynCodec`].
70pub trait DynCodec: Codec {
71    /// Type object type for this codec.
72    type Type: DynCodecType;
73
74    /// Returns the type object for this codec.
75    fn ty(&self) -> Self::Type;
76
77    /// Serializes the configuration parameters for this codec.
78    ///
79    /// The config *must* include an `id` field with the
80    /// [`DynCodecType::codec_id`], for which the
81    /// [`serialize_codec_config_with_id`] helper function may be used.
82    ///
83    /// The config *must* be compatible with JSON encoding.
84    ///
85    /// # Errors
86    ///
87    /// Errors if serializing the codec configuration fails.
88    fn get_config<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error>;
89}
90
91/// Type object for dynamically typed compression codecs.
92pub trait DynCodecType: 'static + Send + Sync {
93    /// Type of the instances of this codec type object.
94    type Codec: DynCodec<Type = Self>;
95
96    /// Codec identifier.
97    fn codec_id(&self) -> &str;
98
99    /// JSON schema for the codec's configuration.
100    fn codec_config_schema(&self) -> Schema;
101
102    /// Instantiate a codec of this type from a serialized `config`uration.
103    ///
104    /// The `config` must *not* contain an `id` field. If the `config` *may*
105    /// contain one, use the [`codec_from_config_with_id`] helper function.
106    ///
107    /// The `config` *must* be compatible with JSON encoding.
108    ///
109    /// # Errors
110    ///
111    /// Errors if constructing the codec fails.
112    fn codec_from_config<'de, D: Deserializer<'de>>(
113        &self,
114        config: D,
115    ) -> Result<Self::Codec, D::Error>;
116}
117
118impl<T: StaticCodec> DynCodec for T {
119    type Type = StaticCodecType<Self>;
120
121    fn ty(&self) -> Self::Type {
122        StaticCodecType::of()
123    }
124
125    fn get_config<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
126        <T as StaticCodec>::get_config(self).serialize(serializer)
127    }
128}
129
130/// Type object for statically typed compression codecs.
131pub struct StaticCodecType<T: StaticCodec> {
132    _marker: PhantomData<T>,
133}
134
135impl<T: StaticCodec> StaticCodecType<T> {
136    /// Statically obtain the type for a statically typed codec.
137    #[must_use]
138    pub const fn of() -> Self {
139        Self {
140            _marker: PhantomData::<T>,
141        }
142    }
143}
144
145impl<T: StaticCodec> DynCodecType for StaticCodecType<T> {
146    type Codec = T;
147
148    fn codec_id(&self) -> &str {
149        T::CODEC_ID
150    }
151
152    fn codec_config_schema(&self) -> Schema {
153        let mut settings = SchemaSettings::draft2020_12();
154        // TODO: perhaps this could be done as a more generally applicable
155        //       transformation instead
156        settings.inline_subschemas = true;
157        settings
158            .into_generator()
159            .into_root_schema_for::<T::Config<'static>>()
160    }
161
162    fn codec_from_config<'de, D: Deserializer<'de>>(
163        &self,
164        config: D,
165    ) -> Result<Self::Codec, D::Error> {
166        let config = T::Config::deserialize(config)?;
167        Ok(T::from_config(config))
168    }
169}
170
171/// Utility struct to serialize a [`StaticCodec`]'s [`StaticCodec::Config`]
172/// together with its [`StaticCodec::CODEC_ID`]
173#[derive(Serialize, Deserialize)]
174#[serde(bound = "")]
175pub struct StaticCodecConfig<'a, T: StaticCodec> {
176    #[serde(default)]
177    id: StaticCodecId<T>,
178    /// The configuration parameters
179    #[serde(flatten)]
180    #[serde(borrow)]
181    pub config: T::Config<'a>,
182}
183
184impl<'a, T: StaticCodec> StaticCodecConfig<'a, T> {
185    /// Wraps the `config` so that it can be serialized together with its
186    /// [`StaticCodec::CODEC_ID`]
187    #[must_use]
188    pub const fn new(config: T::Config<'a>) -> Self {
189        Self {
190            id: StaticCodecId::of(),
191            config,
192        }
193    }
194}
195
196impl<'a, T: StaticCodec> From<&T::Config<'a>> for StaticCodecConfig<'a, T>
197where
198    T::Config<'a>: Clone,
199{
200    fn from(config: &T::Config<'a>) -> Self {
201        Self::new(config.clone())
202    }
203}
204
205struct StaticCodecId<T: StaticCodec>(PhantomData<T>);
206
207impl<T: StaticCodec> StaticCodecId<T> {
208    #[must_use]
209    pub const fn of() -> Self {
210        Self(PhantomData::<T>)
211    }
212}
213
214impl<T: StaticCodec> Default for StaticCodecId<T> {
215    fn default() -> Self {
216        Self::of()
217    }
218}
219
220impl<T: StaticCodec> Serialize for StaticCodecId<T> {
221    fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
222        T::CODEC_ID.serialize(serializer)
223    }
224}
225
226impl<'de, T: StaticCodec> Deserialize<'de> for StaticCodecId<T> {
227    fn deserialize<D: Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
228        let id = Cow::<str>::deserialize(deserializer)?;
229        let id = &*id;
230
231        if id != T::CODEC_ID {
232            return Err(serde::de::Error::custom(format!(
233                "expected codec id {:?} but found {id:?}",
234                T::CODEC_ID,
235            )));
236        }
237
238        Ok(Self::of())
239    }
240}
241
242/// Utility function to serialize a codec's config together with its
243/// [`DynCodecType::codec_id`].
244///
245/// This function may be useful when implementing the [`DynCodec::get_config`]
246/// method.
247///
248/// # Errors
249///
250/// Errors if serializing the codec configuration fails.
251pub fn serialize_codec_config_with_id<T: Serialize, C: DynCodec, S: Serializer>(
252    config: &T,
253    codec: &C,
254    serializer: S,
255) -> Result<S::Ok, S::Error> {
256    #[derive(Serialize)]
257    struct DynCodecConfigWithId<'a, T> {
258        id: &'a str,
259        #[serde(flatten)]
260        config: &'a T,
261    }
262
263    DynCodecConfigWithId {
264        id: codec.ty().codec_id(),
265        config,
266    }
267    .serialize(serializer)
268}
269
270/// Utility function to instantiate a codec of the given `ty`, where the
271/// `config` *may* still contain an `id` field.
272///
273/// If the `config` does *not* contain an `id` field, use
274/// [`DynCodecType::codec_from_config`] instead.
275///
276/// # Errors
277///
278/// Errors if constructing the codec fails.
279pub fn codec_from_config_with_id<'de, T: DynCodecType, D: Deserializer<'de>>(
280    ty: &T,
281    config: D,
282) -> Result<T::Codec, D::Error> {
283    let mut config = Value::deserialize(config)?;
284
285    if let Some(config) = config.as_object_mut() {
286        if let Some(id) = config.remove("id") {
287            let codec_id = ty.codec_id();
288
289            if !matches!(id, Value::String(ref id) if id == codec_id) {
290                return Err(serde::de::Error::custom(format!(
291                    "expected codec id {codec_id:?} but found {id}"
292                )));
293            }
294        }
295    }
296
297    ty.codec_from_config(config)
298        .map_err(serde::de::Error::custom)
299}