1use std::{borrow::Cow, num::NonZeroUsize, ops::AddAssign};
21
22use ndarray::{Array, ArrayBase, ArrayViewMut, Data, Dimension, Ix2, ShapeError, Zip, s};
23use num_traits::{ConstOne, ConstZero, Float, FloatConst};
24use numcodecs::{
25 AnyArray, AnyArrayAssignError, AnyArrayDType, AnyArrayView, AnyArrayViewMut, AnyCowArray,
26 Codec, StaticCodec, StaticCodecConfig, StaticCodecVersion,
27};
28use schemars::{JsonSchema, Schema, SchemaGenerator, json_schema};
29use serde::{Deserialize, Deserializer, Serialize, Serializer};
30use thiserror::Error;
31
32#[cfg(test)]
33use ::serde_json as _;
34
35#[derive(Clone, Serialize, Deserialize, JsonSchema)]
51pub struct RandomProjectionCodec {
53 pub seed: u64,
55 #[serde(flatten)]
57 pub reduction: RandomProjectionReduction,
58 #[serde(flatten)]
60 pub projection: RandomProjectionKind,
61 #[serde(default, rename = "_version")]
63 pub version: StaticCodecVersion<0, 1, 0>,
64}
65
66#[derive(Clone, Serialize, Deserialize, JsonSchema)]
68#[serde(tag = "reduction", rename_all = "kebab-case")]
70pub enum RandomProjectionReduction {
71 JohnsonLindenstrauss {
74 epsilon: OpenClosedUnit<f64>,
76 },
77 Explicit {
80 k: NonZeroUsize,
82 },
83}
84
85#[derive(Clone, Serialize, Deserialize, JsonSchema)]
87#[serde(tag = "projection", rename_all = "kebab-case")]
89pub enum RandomProjectionKind {
90 Gaussian,
93 Sparse {
104 #[serde(default, skip_serializing_if = "Option::is_none")]
124 density: Option<OpenClosedUnit<f64>>,
125 },
126}
127
128impl Codec for RandomProjectionCodec {
129 type Error = RandomProjectionCodecError;
130
131 fn encode(&self, data: AnyCowArray) -> Result<AnyArray, Self::Error> {
132 match data {
133 AnyCowArray::F32(data) => Ok(AnyArray::F32(
134 project_with_projection(data, self.seed, &self.reduction, &self.projection)?
135 .into_dyn(),
136 )),
137 AnyCowArray::F64(data) => Ok(AnyArray::F64(
138 project_with_projection(data, self.seed, &self.reduction, &self.projection)?
139 .into_dyn(),
140 )),
141 encoded => Err(RandomProjectionCodecError::UnsupportedDtype(
142 encoded.dtype(),
143 )),
144 }
145 }
146
147 fn decode(&self, encoded: AnyCowArray) -> Result<AnyArray, Self::Error> {
148 match encoded {
149 AnyCowArray::F32(encoded) => Ok(AnyArray::F32(
150 reconstruct_with_projection(encoded, self.seed, &self.projection)?.into_dyn(),
151 )),
152 AnyCowArray::F64(encoded) => Ok(AnyArray::F64(
153 reconstruct_with_projection(encoded, self.seed, &self.projection)?.into_dyn(),
154 )),
155 encoded => Err(RandomProjectionCodecError::UnsupportedDtype(
156 encoded.dtype(),
157 )),
158 }
159 }
160
161 fn decode_into(
162 &self,
163 encoded: AnyArrayView,
164 decoded: AnyArrayViewMut,
165 ) -> Result<(), Self::Error> {
166 match (encoded, decoded) {
167 (AnyArrayView::F32(encoded), AnyArrayViewMut::F32(decoded)) => {
168 reconstruct_into_with_projection(encoded, decoded, self.seed, &self.projection)
169 }
170 (AnyArrayView::F64(encoded), AnyArrayViewMut::F64(decoded)) => {
171 reconstruct_into_with_projection(encoded, decoded, self.seed, &self.projection)
172 }
173 (encoded @ (AnyArrayView::F32(_) | AnyArrayView::F64(_)), decoded) => {
174 Err(RandomProjectionCodecError::MismatchedDecodeIntoArray {
175 source: AnyArrayAssignError::DTypeMismatch {
176 src: encoded.dtype(),
177 dst: decoded.dtype(),
178 },
179 })
180 }
181 (encoded, _decoded) => Err(RandomProjectionCodecError::UnsupportedDtype(
182 encoded.dtype(),
183 )),
184 }
185 }
186}
187
188impl StaticCodec for RandomProjectionCodec {
189 const CODEC_ID: &'static str = "random-projection.rs";
190
191 type Config<'de> = Self;
192
193 fn from_config(config: Self::Config<'_>) -> Self {
194 config
195 }
196
197 fn get_config(&self) -> StaticCodecConfig<Self> {
198 StaticCodecConfig::from(self)
199 }
200}
201
202#[derive(Debug, Error)]
203pub enum RandomProjectionCodecError {
205 #[error("RandomProjection does not support the dtype {0}")]
207 UnsupportedDtype(AnyArrayDType),
208 #[error("RandomProjection only supports matrix / 2d-shaped arrays")]
210 NonMatrixData {
211 #[from]
213 source: ShapeError,
214 },
215 #[error("RandomProjection does not support non-finite (infinite or NaN) floating point data")]
218 NonFiniteData,
219 #[error(
222 "RandomProjection cannot encode or decode from an array with {input} samples to an array with {output} samples"
223 )]
224 NumberOfSamplesMismatch {
225 input: usize,
227 output: usize,
229 },
230 #[error("RandomProjection cannot decode from an array with zero dimensionality `K`")]
233 ProjectedArrayZeroComponents,
234 #[error("RandomProjection cannot decode from an array with corrupted dimensionality metadata")]
237 CorruptedNumberOfComponents,
238 #[error(
241 "RandomProjection cannot decode into an array with {output} features that differs from the {metadata} features stored in the encoded metadata"
242 )]
243 NumberOfFeaturesMismatch {
244 metadata: usize,
246 output: usize,
248 },
249 #[error("RandomProjection cannot decode into the provided array")]
251 MismatchedDecodeIntoArray {
252 #[from]
254 source: AnyArrayAssignError,
255 },
256}
257
258pub fn project_with_projection<T: FloatExt, S: Data<Elem = T>, D: Dimension>(
270 data: ArrayBase<S, D>,
271 seed: u64,
272 reduction: &RandomProjectionReduction,
273 projection: &RandomProjectionKind,
274) -> Result<Array<T, Ix2>, RandomProjectionCodecError> {
275 let data = data
276 .into_dimensionality()
277 .map_err(|err| RandomProjectionCodecError::NonMatrixData { source: err })?;
278
279 let (n, d) = data.dim();
280
281 let k = match reduction {
282 RandomProjectionReduction::JohnsonLindenstrauss { epsilon } => {
283 johnson_lindenstrauss_min_k(n, *epsilon)
284 }
285 RandomProjectionReduction::Explicit { k } => k.get(),
286 };
287
288 let mut projected = Array::<T, Ix2>::from_elem((n, k + 1), T::ZERO);
289
290 for p in projected.slice_mut(s!(.., k)) {
293 *p = T::from_usize(d);
294 }
295
296 match projection {
297 RandomProjectionKind::Gaussian => project_into(
298 data,
299 projected.slice_mut(s!(.., ..k)),
300 |x, y| gaussian_project(x, y, seed),
301 gaussian_normaliser(k),
302 ),
303 RandomProjectionKind::Sparse { density } => {
304 let density = density_or_ping_li_minimum(*density, d);
305 project_into(
306 data,
307 projected.slice_mut(s!(.., ..k)),
308 |x, y| sparse_project(x, y, density, seed),
309 sparse_normaliser(k, density),
310 )
311 }
312 }?;
313
314 Ok(projected)
315}
316
317#[expect(clippy::needless_pass_by_value)]
318pub fn project_into<T: FloatExt, S: Data<Elem = T>>(
333 data: ArrayBase<S, Ix2>,
334 mut projected: ArrayViewMut<T, Ix2>,
335 projection: impl Fn(usize, usize) -> T,
336 normalizer: T,
337) -> Result<(), RandomProjectionCodecError> {
338 let (n, d) = data.dim();
339 let (n2, _k) = projected.dim();
340
341 if n2 != n {
342 return Err(RandomProjectionCodecError::NumberOfSamplesMismatch {
343 input: n,
344 output: n2,
345 });
346 }
347
348 let mut skip_projection_column = Vec::with_capacity(d);
349
350 for (j, projected_j) in projected.columns_mut().into_iter().enumerate() {
351 skip_projection_column.clear();
356 for l in 0..d {
357 let p = projection(l, j);
358 if !p.is_zero() {
359 skip_projection_column.push((l, p));
360 }
361 }
362
363 for (data_i, projected_ij) in data.rows().into_iter().zip(projected_j) {
364 let mut acc = T::ZERO;
365
366 #[expect(clippy::indexing_slicing)]
367 for &(l, p) in &skip_projection_column {
369 acc += data_i[l] * p;
370 }
371
372 *projected_ij = acc * normalizer;
373 }
374 }
375
376 if !Zip::from(projected).all(|x| x.is_finite()) {
377 return Err(RandomProjectionCodecError::NonFiniteData);
378 }
379
380 Ok(())
381}
382
383pub fn reconstruct_with_projection<T: FloatExt, S: Data<Elem = T>, D: Dimension>(
401 projected: ArrayBase<S, D>,
402 seed: u64,
403 projection: &RandomProjectionKind,
404) -> Result<Array<T, Ix2>, RandomProjectionCodecError> {
405 let projected = projected
406 .into_dimensionality()
407 .map_err(|err| RandomProjectionCodecError::NonMatrixData { source: err })?;
408
409 if projected.is_empty() {
410 return Ok(projected.to_owned());
411 }
412
413 let (_n, k): (usize, usize) = projected.dim();
414 let Some(k) = k.checked_sub(1) else {
415 return Err(RandomProjectionCodecError::ProjectedArrayZeroComponents);
416 };
417
418 let ds = projected.slice(s!(.., k));
421 let Ok(Some(d)) = ds.fold(Ok(None), |acc, d| match acc {
422 Ok(None) => Ok(Some(d.into_usize())),
423 Ok(Some(d2)) if d2 == d.into_usize() => Ok(Some(d2)),
424 _ => Err(()),
425 }) else {
426 return Err(RandomProjectionCodecError::CorruptedNumberOfComponents);
427 };
428
429 let projected = projected.slice_move(s!(.., ..k));
431
432 match projection {
433 RandomProjectionKind::Gaussian => reconstruct(
434 projected,
435 d,
436 |x, y| gaussian_project(x, y, seed),
437 gaussian_normaliser(k),
438 ),
439 RandomProjectionKind::Sparse { density } => {
440 let density = density_or_ping_li_minimum(*density, d);
441 reconstruct(
442 projected,
443 d,
444 |x, y| sparse_project(x, y, density, seed),
445 sparse_normaliser(k, density),
446 )
447 }
448 }
449}
450
451#[expect(clippy::needless_pass_by_value)]
452pub fn reconstruct<T: FloatExt, S: Data<Elem = T>>(
465 projected: ArrayBase<S, Ix2>,
466 d: usize,
467 projection: impl Fn(usize, usize) -> T,
468 normalizer: T,
469) -> Result<Array<T, Ix2>, RandomProjectionCodecError> {
470 if projected.is_empty() {
471 return Ok(projected.to_owned());
472 }
473
474 let (n, k) = projected.dim();
475
476 let mut reconstructed = Array::<T, Ix2>::from_elem((n, d), T::ZERO);
477
478 let mut skip_projection_row = Vec::with_capacity(k);
479
480 for (l, reconstructed_l) in reconstructed.columns_mut().into_iter().enumerate() {
481 skip_projection_row.clear();
486 for j in 0..k {
487 let p = projection(l, j);
488 if !p.is_zero() {
489 skip_projection_row.push((j, p));
490 }
491 }
492
493 for (projected_i, reconstructed_il) in projected.rows().into_iter().zip(reconstructed_l) {
494 let mut acc = T::ZERO;
495
496 #[expect(clippy::indexing_slicing)]
497 for &(j, p) in &skip_projection_row {
499 acc += projected_i[j] * p;
500 }
501
502 *reconstructed_il = acc * normalizer;
503 }
504 }
505
506 if !Zip::from(&reconstructed).all(|x| x.is_finite()) {
507 return Err(RandomProjectionCodecError::NonFiniteData);
508 }
509
510 Ok(reconstructed)
511}
512
513pub fn reconstruct_into_with_projection<T: FloatExt, S: Data<Elem = T>, D: Dimension>(
535 projected: ArrayBase<S, D>,
536 reconstructed: ArrayViewMut<T, D>,
537 seed: u64,
538 projection: &RandomProjectionKind,
539) -> Result<(), RandomProjectionCodecError> {
540 let projected: ArrayBase<S, Ix2> = projected
541 .into_dimensionality()
542 .map_err(|err| RandomProjectionCodecError::NonMatrixData { source: err })?;
543 let reconstructed: ArrayViewMut<T, Ix2> = reconstructed
544 .into_dimensionality()
545 .map_err(|err| RandomProjectionCodecError::NonMatrixData { source: err })?;
546
547 let (n, k) = projected.dim();
548 let (n2, d2) = reconstructed.dim();
549
550 if n2 != n {
551 return Err(RandomProjectionCodecError::NumberOfSamplesMismatch {
552 input: n,
553 output: n2,
554 });
555 }
556
557 let Some(k) = k.checked_sub(1) else {
558 return Err(RandomProjectionCodecError::ProjectedArrayZeroComponents);
559 };
560
561 let ds = projected.slice(s!(.., k));
564 let Ok(Some(d)) = ds.fold(Ok(None), |acc, d| match acc {
565 Ok(None) => Ok(Some(d.into_usize())),
566 Ok(Some(d2)) if d2 == d.into_usize() => Ok(Some(d2)),
567 _ => Err(()),
568 }) else {
569 return Err(RandomProjectionCodecError::CorruptedNumberOfComponents);
570 };
571
572 if d2 != d {
573 return Err(RandomProjectionCodecError::NumberOfFeaturesMismatch {
574 metadata: d,
575 output: d2,
576 });
577 }
578
579 let projected = projected.slice_move(s!(.., ..k));
581
582 match projection {
583 RandomProjectionKind::Gaussian => reconstruct_into(
584 projected,
585 reconstructed,
586 |x, y| gaussian_project(x, y, seed),
587 gaussian_normaliser(k),
588 ),
589 RandomProjectionKind::Sparse { density } => {
590 let density = density_or_ping_li_minimum(*density, d);
591 reconstruct_into(
592 projected,
593 reconstructed,
594 |x, y| sparse_project(x, y, density, seed),
595 sparse_normaliser(k, density),
596 )
597 }
598 }
599}
600
601#[expect(clippy::needless_pass_by_value)]
602pub fn reconstruct_into<T: FloatExt, S: Data<Elem = T>>(
617 projected: ArrayBase<S, Ix2>,
618 mut reconstructed: ArrayViewMut<T, Ix2>,
619 projection: impl Fn(usize, usize) -> T,
620 normalizer: T,
621) -> Result<(), RandomProjectionCodecError> {
622 let (n, k) = projected.dim();
623 let (n2, _d) = reconstructed.dim();
624
625 if n2 != n {
626 return Err(RandomProjectionCodecError::NumberOfSamplesMismatch {
627 input: n,
628 output: n2,
629 });
630 }
631
632 let mut skip_projection_row = Vec::with_capacity(k);
633
634 for (l, reconstructed_l) in reconstructed.columns_mut().into_iter().enumerate() {
635 skip_projection_row.clear();
640 for j in 0..k {
641 let p = projection(l, j);
642 if !p.is_zero() {
643 skip_projection_row.push((j, p));
644 }
645 }
646
647 for (projected_i, reconstructed_il) in projected.rows().into_iter().zip(reconstructed_l) {
648 let mut acc = T::ZERO;
649
650 #[expect(clippy::indexing_slicing)]
651 for &(j, p) in &skip_projection_row {
653 acc += projected_i[j] * p;
654 }
655
656 *reconstructed_il = acc * normalizer;
657 }
658 }
659
660 if !Zip::from(reconstructed).all(|x| x.is_finite()) {
661 return Err(RandomProjectionCodecError::NonFiniteData);
662 }
663
664 Ok(())
665}
666
667#[must_use]
677pub fn johnson_lindenstrauss_min_k(
678 n_samples: usize,
679 OpenClosedUnit(epsilon): OpenClosedUnit<f64>,
680) -> usize {
681 #[expect(clippy::suboptimal_flops)]
682 let denominator = (epsilon * epsilon * 0.5) - (epsilon * epsilon * epsilon / 3.0);
683 #[expect(clippy::cast_precision_loss)]
684 let min_k = (n_samples as f64).ln() * 4.0 / denominator;
685 #[expect(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
686 let min_k = min_k as usize;
687 min_k
688}
689
690#[must_use]
700pub fn density_or_ping_li_minimum<T: FloatExt>(
701 density: Option<OpenClosedUnit<f64>>,
702 d: usize,
703) -> T {
704 match density {
705 Some(OpenClosedUnit(density)) => T::from_f64(density),
706 None => T::from_usize(d).sqrt().recip(),
707 }
708}
709
710fn gaussian_project<T: FloatExt>(x: usize, y: usize, seed: u64) -> T {
713 let (ClosedOpenUnit(u0), OpenClosedUnit(u1)) = T::u01x2(hash_matrix_index(x, y, seed));
714
715 let theta = -T::TAU() * u0;
716 let r = (-T::TWO * u1.ln()).sqrt();
717
718 r * theta.sin()
719}
720
721fn gaussian_normaliser<T: FloatExt>(k: usize) -> T {
722 T::from_usize(k).sqrt().recip()
723}
724
725fn sparse_project<T: FloatExt>(x: usize, y: usize, density: T, seed: u64) -> T {
729 let (ClosedOpenUnit(u0), _u1) = T::u01x2(hash_matrix_index(x, y, seed));
730
731 if u0 < (density * T::HALF) {
732 -T::ONE
733 } else if u0 < density {
734 T::ONE
735 } else {
736 T::ZERO
737 }
738}
739
740fn sparse_normaliser<T: FloatExt>(k: usize, density: T) -> T {
741 (T::from_usize(k) * density).recip().sqrt()
742}
743
744const fn hash_matrix_index(x: usize, y: usize, seed: u64) -> u64 {
745 seahash_diffuse(seahash_diffuse(x as u64) ^ seed ^ (y as u64))
746}
747
748const fn seahash_diffuse(mut x: u64) -> u64 {
749 x = x.wrapping_mul(0x6eed_0e9d_a4d9_4a4f);
759
760 let a = x >> 32;
761 let b = x >> 60;
762
763 x ^= a >> b;
764
765 x = x.wrapping_mul(0x6eed_0e9d_a4d9_4a4f);
766
767 x
768}
769
770#[expect(clippy::derive_partial_eq_without_eq)] #[derive(Copy, Clone, PartialEq, PartialOrd, Hash)]
772pub struct ClosedOpenUnit<T: FloatExt>(T);
774
775#[expect(clippy::derive_partial_eq_without_eq)] #[derive(Copy, Clone, PartialEq, PartialOrd, Hash)]
777pub struct OpenClosedUnit<T: FloatExt>(T);
779
780impl Serialize for OpenClosedUnit<f64> {
781 fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
782 serializer.serialize_f64(self.0)
783 }
784}
785
786impl<'de> Deserialize<'de> for OpenClosedUnit<f64> {
787 fn deserialize<D: Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
788 let x = f64::deserialize(deserializer)?;
789
790 if x > 0.0 && x <= 1.0 {
791 Ok(Self(x))
792 } else {
793 Err(serde::de::Error::invalid_value(
794 serde::de::Unexpected::Float(x),
795 &"a value in (0.0, 1.0]",
796 ))
797 }
798 }
799}
800
801impl JsonSchema for OpenClosedUnit<f64> {
802 fn schema_name() -> Cow<'static, str> {
803 Cow::Borrowed("OpenClosedUnitF64")
804 }
805
806 fn schema_id() -> Cow<'static, str> {
807 Cow::Borrowed(concat!(module_path!(), "::", "OpenClosedUnit<f64>"))
808 }
809
810 fn json_schema(_gen: &mut SchemaGenerator) -> Schema {
811 json_schema!({
812 "type": "number",
813 "exclusiveMinimum": 0.0,
814 "maximum": 1.0,
815 })
816 }
817}
818
819pub trait FloatExt: Float + ConstZero + ConstOne + FloatConst + AddAssign {
821 const HALF: Self;
823 const TWO: Self;
825
826 #[must_use]
828 fn from_f64(x: f64) -> Self;
829
830 #[must_use]
832 fn from_usize(n: usize) -> Self;
833
834 #[must_use]
836 fn into_usize(self) -> usize;
837
838 #[must_use]
843 fn u01x2(hash: u64) -> (ClosedOpenUnit<Self>, OpenClosedUnit<Self>);
844}
845
846impl FloatExt for f32 {
847 const HALF: Self = 0.5;
848 const TWO: Self = 2.0;
849
850 #[expect(clippy::cast_possible_truncation)]
851 fn from_f64(x: f64) -> Self {
852 x as Self
853 }
854
855 #[expect(clippy::cast_precision_loss)]
856 fn from_usize(n: usize) -> Self {
857 n as Self
858 }
859
860 #[expect(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
861 fn into_usize(self) -> usize {
862 self as usize
863 }
864
865 fn u01x2(hash: u64) -> (ClosedOpenUnit<Self>, OpenClosedUnit<Self>) {
866 #[expect(clippy::cast_possible_truncation)] let (low, high) = (
868 (hash & u64::from(u32::MAX)) as u32,
869 ((hash >> 32) & u64::from(u32::MAX)) as u32,
870 );
871
872 #[expect(clippy::cast_precision_loss)]
875 let u0 = ClosedOpenUnit(((high >> 8) as Self) * Self::from_bits(0x3380_0000_u32)); #[expect(clippy::cast_precision_loss)]
880 let u1 = OpenClosedUnit((((low >> 8) + 1) as Self) * Self::from_bits(0x3380_0000_u32)); (u0, u1)
883 }
884}
885
886impl FloatExt for f64 {
887 const HALF: Self = 0.5;
888 const TWO: Self = 2.0;
889
890 fn from_f64(x: f64) -> Self {
891 x
892 }
893
894 #[expect(clippy::cast_precision_loss)]
895 fn from_usize(n: usize) -> Self {
896 n as Self
897 }
898
899 #[expect(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
900 fn into_usize(self) -> usize {
901 self as usize
902 }
903
904 fn u01x2(hash: u64) -> (ClosedOpenUnit<Self>, OpenClosedUnit<Self>) {
905 #[expect(clippy::cast_precision_loss)]
908 let u0 =
909 ClosedOpenUnit(((hash >> 11) as Self) * Self::from_bits(0x3CA0_0000_0000_0000_u64)); let hash = seahash_diffuse(hash + 1);
912
913 #[expect(clippy::cast_precision_loss)]
916 let u1 = OpenClosedUnit(
917 (((hash >> 11) + 1) as Self) * Self::from_bits(0x3CA0_0000_0000_0000_u64),
918 ); (u0, u1)
921 }
922}
923
924#[cfg(test)]
925#[expect(clippy::unwrap_used, clippy::expect_used)]
926mod tests {
927 use ndarray_rand::RandomExt;
928 use ndarray_rand::rand_distr::{Distribution, Normal};
929
930 use super::*;
931
932 #[test]
933 fn gaussian_f32() {
934 test_error_decline::<f32>(
935 (100, 100),
936 Normal::new(42.0, 24.0).unwrap(),
937 42,
938 RandomProjectionKind::Gaussian,
939 );
940 }
941
942 #[test]
943 fn gaussian_f64() {
944 test_error_decline::<f64>(
945 (100, 100),
946 Normal::new(42.0, 24.0).unwrap(),
947 42,
948 RandomProjectionKind::Gaussian,
949 );
950 }
951
952 #[test]
953 fn achlioptas_sparse_f32() {
954 test_error_decline::<f32>(
955 (100, 100),
956 Normal::new(42.0, 24.0).unwrap(),
957 42,
958 RandomProjectionKind::Sparse {
959 density: Some(OpenClosedUnit(1.0 / 3.0)),
960 },
961 );
962 }
963
964 #[test]
965 fn achlioptas_sparse_f64() {
966 test_error_decline::<f64>(
967 (100, 100),
968 Normal::new(42.0, 24.0).unwrap(),
969 42,
970 RandomProjectionKind::Sparse {
971 density: Some(OpenClosedUnit(1.0 / 3.0)),
972 },
973 );
974 }
975
976 #[test]
977 fn ping_li_sparse_f32() {
978 test_error_decline::<f32>(
979 (100, 100),
980 Normal::new(42.0, 24.0).unwrap(),
981 42,
982 RandomProjectionKind::Sparse { density: None },
983 );
984 }
985
986 #[test]
987 fn ping_li_sparse_f64() {
988 test_error_decline::<f64>(
989 (100, 100),
990 Normal::new(42.0, 24.0).unwrap(),
991 42,
992 RandomProjectionKind::Sparse { density: None },
993 );
994 }
995
996 #[expect(clippy::needless_pass_by_value)]
997 fn test_error_decline<T: FloatExt + std::fmt::Display>(
998 shape: (usize, usize),
999 distribution: impl Distribution<T>,
1000 seed: u64,
1001 projection: RandomProjectionKind,
1002 ) {
1003 let data = Array::<T, Ix2>::random(shape, distribution);
1004
1005 let mut max_rmse = rmse(
1006 &data,
1007 &roundtrip(&data, 42, OpenClosedUnit(1.0), &projection),
1008 );
1009
1010 for epsilon in [
1011 OpenClosedUnit(0.5),
1012 OpenClosedUnit(0.1),
1013 OpenClosedUnit(0.01),
1014 ] {
1015 let new_rmse = rmse(&data, &roundtrip(&data, seed, epsilon, &projection));
1016 assert!(
1017 new_rmse <= max_rmse,
1018 "{new_rmse} > {max_rmse} for {epsilon}",
1019 epsilon = epsilon.0
1020 );
1021 max_rmse = new_rmse;
1022 }
1023 }
1024
1025 fn roundtrip<T: FloatExt>(
1026 data: &Array<T, Ix2>,
1027 seed: u64,
1028 epsilon: OpenClosedUnit<f64>,
1029 projection: &RandomProjectionKind,
1030 ) -> Array<T, Ix2> {
1031 let projected = project_with_projection(
1032 data.view(),
1033 seed,
1034 &RandomProjectionReduction::JohnsonLindenstrauss { epsilon },
1035 projection,
1036 )
1037 .expect("projecting must not fail");
1038 let reconstructed = reconstruct_with_projection(projected, seed, projection)
1039 .expect("reconstruction must not fail");
1040 #[expect(clippy::let_and_return)]
1041 reconstructed
1042 }
1043
1044 fn rmse<T: FloatExt>(data: &Array<T, Ix2>, reconstructed: &Array<T, Ix2>) -> T {
1045 let mut err = T::ZERO;
1046
1047 for (&d, &r) in data.iter().zip(reconstructed) {
1048 err += (d - r) * (d - r);
1049 }
1050
1051 let mse = err * T::from_usize(data.len()).recip();
1052 mse.sqrt()
1053 }
1054}