1use std::{borrow::Cow, num::NonZeroUsize, ops::AddAssign};
21
22use ndarray::{s, Array, ArrayBase, ArrayViewMut, Data, Dimension, Ix2, ShapeError, Zip};
23use num_traits::{ConstOne, ConstZero, Float, FloatConst};
24use numcodecs::{
25 AnyArray, AnyArrayAssignError, AnyArrayDType, AnyArrayView, AnyArrayViewMut, AnyCowArray,
26 Codec, StaticCodec, StaticCodecConfig,
27};
28use schemars::{json_schema, JsonSchema, Schema, SchemaGenerator};
29use serde::{Deserialize, Deserializer, Serialize, Serializer};
30use thiserror::Error;
31
32#[cfg(test)]
33use ::serde_json as _;
34
35#[derive(Clone, Serialize, Deserialize, JsonSchema)]
51pub struct RandomProjectionCodec {
53 pub seed: u64,
55 #[serde(flatten)]
57 pub reduction: RandomProjectionReduction,
58 #[serde(flatten)]
60 pub projection: RandomProjectionKind,
61}
62
63#[derive(Clone, Serialize, Deserialize, JsonSchema)]
65#[serde(tag = "reduction", rename_all = "kebab-case")]
67pub enum RandomProjectionReduction {
68 JohnsonLindenstrauss {
71 epsilon: OpenClosedUnit<f64>,
73 },
74 Explicit {
77 k: NonZeroUsize,
79 },
80}
81
82#[derive(Clone, Serialize, Deserialize, JsonSchema)]
84#[serde(tag = "projection", rename_all = "kebab-case")]
86pub enum RandomProjectionKind {
87 Gaussian,
90 Sparse {
101 #[serde(default, skip_serializing_if = "Option::is_none")]
121 density: Option<OpenClosedUnit<f64>>,
122 },
123}
124
125impl Codec for RandomProjectionCodec {
126 type Error = RandomProjectionCodecError;
127
128 fn encode(&self, data: AnyCowArray) -> Result<AnyArray, Self::Error> {
129 match data {
130 AnyCowArray::F32(data) => Ok(AnyArray::F32(
131 project_with_projection(data, self.seed, &self.reduction, &self.projection)?
132 .into_dyn(),
133 )),
134 AnyCowArray::F64(data) => Ok(AnyArray::F64(
135 project_with_projection(data, self.seed, &self.reduction, &self.projection)?
136 .into_dyn(),
137 )),
138 encoded => Err(RandomProjectionCodecError::UnsupportedDtype(
139 encoded.dtype(),
140 )),
141 }
142 }
143
144 fn decode(&self, encoded: AnyCowArray) -> Result<AnyArray, Self::Error> {
145 match encoded {
146 AnyCowArray::F32(encoded) => Ok(AnyArray::F32(
147 reconstruct_with_projection(encoded, self.seed, &self.projection)?.into_dyn(),
148 )),
149 AnyCowArray::F64(encoded) => Ok(AnyArray::F64(
150 reconstruct_with_projection(encoded, self.seed, &self.projection)?.into_dyn(),
151 )),
152 encoded => Err(RandomProjectionCodecError::UnsupportedDtype(
153 encoded.dtype(),
154 )),
155 }
156 }
157
158 fn decode_into(
159 &self,
160 encoded: AnyArrayView,
161 decoded: AnyArrayViewMut,
162 ) -> Result<(), Self::Error> {
163 match (encoded, decoded) {
164 (AnyArrayView::F32(encoded), AnyArrayViewMut::F32(decoded)) => {
165 reconstruct_into_with_projection(encoded, decoded, self.seed, &self.projection)
166 }
167 (AnyArrayView::F64(encoded), AnyArrayViewMut::F64(decoded)) => {
168 reconstruct_into_with_projection(encoded, decoded, self.seed, &self.projection)
169 }
170 (encoded @ (AnyArrayView::F32(_) | AnyArrayView::F64(_)), decoded) => {
171 Err(RandomProjectionCodecError::MismatchedDecodeIntoArray {
172 source: AnyArrayAssignError::DTypeMismatch {
173 src: encoded.dtype(),
174 dst: decoded.dtype(),
175 },
176 })
177 }
178 (encoded, _decoded) => Err(RandomProjectionCodecError::UnsupportedDtype(
179 encoded.dtype(),
180 )),
181 }
182 }
183}
184
185impl StaticCodec for RandomProjectionCodec {
186 const CODEC_ID: &'static str = "random-projection";
187
188 type Config<'de> = Self;
189
190 fn from_config(config: Self::Config<'_>) -> Self {
191 config
192 }
193
194 fn get_config(&self) -> StaticCodecConfig<Self> {
195 StaticCodecConfig::from(self)
196 }
197}
198
199#[derive(Debug, Error)]
200pub enum RandomProjectionCodecError {
202 #[error("RandomProjection does not support the dtype {0}")]
204 UnsupportedDtype(AnyArrayDType),
205 #[error("RandomProjection only supports matrix / 2d-shaped arrays")]
207 NonMatrixData {
208 #[from]
210 source: ShapeError,
211 },
212 #[error("RandomProjection does not support non-finite (infinite or NaN) floating point data")]
215 NonFiniteData,
216 #[error("RandomProjection cannot encode or decode from an array with {input} samples to an array with {output} samples")]
219 NumberOfSamplesMismatch {
220 input: usize,
222 output: usize,
224 },
225 #[error("RandomProjection cannot decode from an array with zero dimensionality `K`")]
228 ProjectedArrayZeroComponents,
229 #[error("RandomProjection cannot decode from an array with corrupted dimensionality metadata")]
232 CorruptedNumberOfComponents,
233 #[error("RandomProjection cannot decode into an array with {output} features that differs from the {metadata} features stored in the encoded metadata")]
236 NumberOfFeaturesMismatch {
237 metadata: usize,
239 output: usize,
241 },
242 #[error("RandomProjection cannot decode into the provided array")]
244 MismatchedDecodeIntoArray {
245 #[from]
247 source: AnyArrayAssignError,
248 },
249}
250
251pub fn project_with_projection<T: FloatExt, S: Data<Elem = T>, D: Dimension>(
263 data: ArrayBase<S, D>,
264 seed: u64,
265 reduction: &RandomProjectionReduction,
266 projection: &RandomProjectionKind,
267) -> Result<Array<T, Ix2>, RandomProjectionCodecError> {
268 let data = data
269 .into_dimensionality()
270 .map_err(|err| RandomProjectionCodecError::NonMatrixData { source: err })?;
271
272 let (n, d) = data.dim();
273
274 let k = match reduction {
275 RandomProjectionReduction::JohnsonLindenstrauss { epsilon } => {
276 johnson_lindenstrauss_min_k(n, *epsilon)
277 }
278 RandomProjectionReduction::Explicit { k } => k.get(),
279 };
280
281 let mut projected = Array::<T, Ix2>::from_elem((n, k + 1), T::ZERO);
282
283 for p in projected.slice_mut(s!(.., k)) {
286 *p = T::from_usize(d);
287 }
288
289 match projection {
290 RandomProjectionKind::Gaussian => project_into(
291 data,
292 projected.slice_mut(s!(.., ..k)),
293 |x, y| gaussian_project(x, y, seed),
294 gaussian_normaliser(k),
295 ),
296 RandomProjectionKind::Sparse { density } => {
297 let density = density_or_ping_li_minimum(*density, d);
298 project_into(
299 data,
300 projected.slice_mut(s!(.., ..k)),
301 |x, y| sparse_project(x, y, density, seed),
302 sparse_normaliser(k, density),
303 )
304 }
305 }?;
306
307 Ok(projected)
308}
309
310#[expect(clippy::needless_pass_by_value)]
311pub fn project_into<T: FloatExt, S: Data<Elem = T>>(
326 data: ArrayBase<S, Ix2>,
327 mut projected: ArrayViewMut<T, Ix2>,
328 projection: impl Fn(usize, usize) -> T,
329 normalizer: T,
330) -> Result<(), RandomProjectionCodecError> {
331 let (n, d) = data.dim();
332 let (n2, _k) = projected.dim();
333
334 if n2 != n {
335 return Err(RandomProjectionCodecError::NumberOfSamplesMismatch {
336 input: n,
337 output: n2,
338 });
339 }
340
341 let mut skip_projection_column = Vec::with_capacity(d);
342
343 for (j, projected_j) in projected.columns_mut().into_iter().enumerate() {
344 skip_projection_column.clear();
349 for l in 0..d {
350 let p = projection(l, j);
351 if !p.is_zero() {
352 skip_projection_column.push((l, p));
353 }
354 }
355
356 for (data_i, projected_ij) in data.rows().into_iter().zip(projected_j) {
357 let mut acc = T::ZERO;
358
359 #[expect(clippy::indexing_slicing)]
360 for &(l, p) in &skip_projection_column {
362 acc += data_i[l] * p;
363 }
364
365 *projected_ij = acc * normalizer;
366 }
367 }
368
369 if !Zip::from(projected).all(|x| x.is_finite()) {
370 return Err(RandomProjectionCodecError::NonFiniteData);
371 }
372
373 Ok(())
374}
375
376pub fn reconstruct_with_projection<T: FloatExt, S: Data<Elem = T>, D: Dimension>(
394 projected: ArrayBase<S, D>,
395 seed: u64,
396 projection: &RandomProjectionKind,
397) -> Result<Array<T, Ix2>, RandomProjectionCodecError> {
398 let projected = projected
399 .into_dimensionality()
400 .map_err(|err| RandomProjectionCodecError::NonMatrixData { source: err })?;
401
402 if projected.is_empty() {
403 return Ok(projected.to_owned());
404 }
405
406 let (_n, k): (usize, usize) = projected.dim();
407 let Some(k) = k.checked_sub(1) else {
408 return Err(RandomProjectionCodecError::ProjectedArrayZeroComponents);
409 };
410
411 let ds = projected.slice(s!(.., k));
414 let Ok(Some(d)) = ds.fold(Ok(None), |acc, d| match acc {
415 Ok(None) => Ok(Some(d.into_usize())),
416 Ok(Some(d2)) if d2 == d.into_usize() => Ok(Some(d2)),
417 _ => Err(()),
418 }) else {
419 return Err(RandomProjectionCodecError::CorruptedNumberOfComponents);
420 };
421
422 let projected = projected.slice_move(s!(.., ..k));
424
425 match projection {
426 RandomProjectionKind::Gaussian => reconstruct(
427 projected,
428 d,
429 |x, y| gaussian_project(x, y, seed),
430 gaussian_normaliser(k),
431 ),
432 RandomProjectionKind::Sparse { density } => {
433 let density = density_or_ping_li_minimum(*density, d);
434 reconstruct(
435 projected,
436 d,
437 |x, y| sparse_project(x, y, density, seed),
438 sparse_normaliser(k, density),
439 )
440 }
441 }
442}
443
444#[expect(clippy::needless_pass_by_value)]
445pub fn reconstruct<T: FloatExt, S: Data<Elem = T>>(
458 projected: ArrayBase<S, Ix2>,
459 d: usize,
460 projection: impl Fn(usize, usize) -> T,
461 normalizer: T,
462) -> Result<Array<T, Ix2>, RandomProjectionCodecError> {
463 if projected.is_empty() {
464 return Ok(projected.to_owned());
465 }
466
467 let (n, k) = projected.dim();
468
469 let mut reconstructed = Array::<T, Ix2>::from_elem((n, d), T::ZERO);
470
471 let mut skip_projection_row = Vec::with_capacity(k);
472
473 for (l, reconstructed_l) in reconstructed.columns_mut().into_iter().enumerate() {
474 skip_projection_row.clear();
479 for j in 0..k {
480 let p = projection(l, j);
481 if !p.is_zero() {
482 skip_projection_row.push((j, p));
483 }
484 }
485
486 for (projected_i, reconstructed_il) in projected.rows().into_iter().zip(reconstructed_l) {
487 let mut acc = T::ZERO;
488
489 #[expect(clippy::indexing_slicing)]
490 for &(j, p) in &skip_projection_row {
492 acc += projected_i[j] * p;
493 }
494
495 *reconstructed_il = acc * normalizer;
496 }
497 }
498
499 if !Zip::from(&reconstructed).all(|x| x.is_finite()) {
500 return Err(RandomProjectionCodecError::NonFiniteData);
501 }
502
503 Ok(reconstructed)
504}
505
506pub fn reconstruct_into_with_projection<T: FloatExt, S: Data<Elem = T>, D: Dimension>(
528 projected: ArrayBase<S, D>,
529 reconstructed: ArrayViewMut<T, D>,
530 seed: u64,
531 projection: &RandomProjectionKind,
532) -> Result<(), RandomProjectionCodecError> {
533 let projected: ArrayBase<S, Ix2> = projected
534 .into_dimensionality()
535 .map_err(|err| RandomProjectionCodecError::NonMatrixData { source: err })?;
536 let reconstructed: ArrayViewMut<T, Ix2> = reconstructed
537 .into_dimensionality()
538 .map_err(|err| RandomProjectionCodecError::NonMatrixData { source: err })?;
539
540 let (n, k) = projected.dim();
541 let (n2, d2) = reconstructed.dim();
542
543 if n2 != n {
544 return Err(RandomProjectionCodecError::NumberOfSamplesMismatch {
545 input: n,
546 output: n2,
547 });
548 }
549
550 let Some(k) = k.checked_sub(1) else {
551 return Err(RandomProjectionCodecError::ProjectedArrayZeroComponents);
552 };
553
554 let ds = projected.slice(s!(.., k));
557 let Ok(Some(d)) = ds.fold(Ok(None), |acc, d| match acc {
558 Ok(None) => Ok(Some(d.into_usize())),
559 Ok(Some(d2)) if d2 == d.into_usize() => Ok(Some(d2)),
560 _ => Err(()),
561 }) else {
562 return Err(RandomProjectionCodecError::CorruptedNumberOfComponents);
563 };
564
565 if d2 != d {
566 return Err(RandomProjectionCodecError::NumberOfFeaturesMismatch {
567 metadata: d,
568 output: d2,
569 });
570 }
571
572 let projected = projected.slice_move(s!(.., ..k));
574
575 match projection {
576 RandomProjectionKind::Gaussian => reconstruct_into(
577 projected,
578 reconstructed,
579 |x, y| gaussian_project(x, y, seed),
580 gaussian_normaliser(k),
581 ),
582 RandomProjectionKind::Sparse { density } => {
583 let density = density_or_ping_li_minimum(*density, d);
584 reconstruct_into(
585 projected,
586 reconstructed,
587 |x, y| sparse_project(x, y, density, seed),
588 sparse_normaliser(k, density),
589 )
590 }
591 }
592}
593
594#[expect(clippy::needless_pass_by_value)]
595pub fn reconstruct_into<T: FloatExt, S: Data<Elem = T>>(
610 projected: ArrayBase<S, Ix2>,
611 mut reconstructed: ArrayViewMut<T, Ix2>,
612 projection: impl Fn(usize, usize) -> T,
613 normalizer: T,
614) -> Result<(), RandomProjectionCodecError> {
615 let (n, k) = projected.dim();
616 let (n2, _d) = reconstructed.dim();
617
618 if n2 != n {
619 return Err(RandomProjectionCodecError::NumberOfSamplesMismatch {
620 input: n,
621 output: n2,
622 });
623 }
624
625 let mut skip_projection_row = Vec::with_capacity(k);
626
627 for (l, reconstructed_l) in reconstructed.columns_mut().into_iter().enumerate() {
628 skip_projection_row.clear();
633 for j in 0..k {
634 let p = projection(l, j);
635 if !p.is_zero() {
636 skip_projection_row.push((j, p));
637 }
638 }
639
640 for (projected_i, reconstructed_il) in projected.rows().into_iter().zip(reconstructed_l) {
641 let mut acc = T::ZERO;
642
643 #[expect(clippy::indexing_slicing)]
644 for &(j, p) in &skip_projection_row {
646 acc += projected_i[j] * p;
647 }
648
649 *reconstructed_il = acc * normalizer;
650 }
651 }
652
653 if !Zip::from(reconstructed).all(|x| x.is_finite()) {
654 return Err(RandomProjectionCodecError::NonFiniteData);
655 }
656
657 Ok(())
658}
659
660#[must_use]
670pub fn johnson_lindenstrauss_min_k(
671 n_samples: usize,
672 OpenClosedUnit(epsilon): OpenClosedUnit<f64>,
673) -> usize {
674 #[expect(clippy::suboptimal_flops)]
675 let denominator = (epsilon * epsilon * 0.5) - (epsilon * epsilon * epsilon / 3.0);
676 #[expect(clippy::cast_precision_loss)]
677 let min_k = (n_samples as f64).ln() * 4.0 / denominator;
678 #[expect(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
679 let min_k = min_k as usize;
680 min_k
681}
682
683#[must_use]
693pub fn density_or_ping_li_minimum<T: FloatExt>(
694 density: Option<OpenClosedUnit<f64>>,
695 d: usize,
696) -> T {
697 match density {
698 Some(OpenClosedUnit(density)) => T::from_f64(density),
699 None => T::from_usize(d).sqrt().recip(),
700 }
701}
702
703fn gaussian_project<T: FloatExt>(x: usize, y: usize, seed: u64) -> T {
706 let (ClosedOpenUnit(u0), OpenClosedUnit(u1)) = T::u01x2(hash_matrix_index(x, y, seed));
707
708 let theta = -T::TAU() * u0;
709 let r = (-T::TWO * u1.ln()).sqrt();
710
711 r * theta.sin()
712}
713
714fn gaussian_normaliser<T: FloatExt>(k: usize) -> T {
715 T::from_usize(k).sqrt().recip()
716}
717
718fn sparse_project<T: FloatExt>(x: usize, y: usize, density: T, seed: u64) -> T {
722 let (ClosedOpenUnit(u0), _u1) = T::u01x2(hash_matrix_index(x, y, seed));
723
724 if u0 < (density * T::HALF) {
725 -T::ONE
726 } else if u0 < density {
727 T::ONE
728 } else {
729 T::ZERO
730 }
731}
732
733fn sparse_normaliser<T: FloatExt>(k: usize, density: T) -> T {
734 (T::from_usize(k) * density).recip().sqrt()
735}
736
737const fn hash_matrix_index(x: usize, y: usize, seed: u64) -> u64 {
738 seahash_diffuse(seahash_diffuse(x as u64) ^ seed ^ (y as u64))
739}
740
741const fn seahash_diffuse(mut x: u64) -> u64 {
742 x = x.wrapping_mul(0x6eed_0e9d_a4d9_4a4f);
752
753 let a = x >> 32;
754 let b = x >> 60;
755
756 x ^= a >> b;
757
758 x = x.wrapping_mul(0x6eed_0e9d_a4d9_4a4f);
759
760 x
761}
762
763#[expect(clippy::derive_partial_eq_without_eq)] #[derive(Copy, Clone, PartialEq, PartialOrd, Hash)]
765pub struct ClosedOpenUnit<T: FloatExt>(T);
767
768#[expect(clippy::derive_partial_eq_without_eq)] #[derive(Copy, Clone, PartialEq, PartialOrd, Hash)]
770pub struct OpenClosedUnit<T: FloatExt>(T);
772
773impl Serialize for OpenClosedUnit<f64> {
774 fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
775 serializer.serialize_f64(self.0)
776 }
777}
778
779impl<'de> Deserialize<'de> for OpenClosedUnit<f64> {
780 fn deserialize<D: Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
781 let x = f64::deserialize(deserializer)?;
782
783 if x > 0.0 && x <= 1.0 {
784 Ok(Self(x))
785 } else {
786 Err(serde::de::Error::invalid_value(
787 serde::de::Unexpected::Float(x),
788 &"a value in (0.0, 1.0]",
789 ))
790 }
791 }
792}
793
794impl JsonSchema for OpenClosedUnit<f64> {
795 fn schema_name() -> Cow<'static, str> {
796 Cow::Borrowed("OpenClosedUnitF64")
797 }
798
799 fn schema_id() -> Cow<'static, str> {
800 Cow::Borrowed(concat!(module_path!(), "::", "OpenClosedUnit<f64>"))
801 }
802
803 fn json_schema(_gen: &mut SchemaGenerator) -> Schema {
804 json_schema!({
805 "type": "number",
806 "exclusiveMinimum": 0.0,
807 "maximum": 1.0,
808 })
809 }
810}
811
812pub trait FloatExt: Float + ConstZero + ConstOne + FloatConst + AddAssign {
814 const HALF: Self;
816 const TWO: Self;
818
819 #[must_use]
821 fn from_f64(x: f64) -> Self;
822
823 #[must_use]
825 fn from_usize(n: usize) -> Self;
826
827 #[must_use]
829 fn into_usize(self) -> usize;
830
831 #[must_use]
836 fn u01x2(hash: u64) -> (ClosedOpenUnit<Self>, OpenClosedUnit<Self>);
837}
838
839impl FloatExt for f32 {
840 const HALF: Self = 0.5;
841 const TWO: Self = 2.0;
842
843 #[expect(clippy::cast_possible_truncation)]
844 fn from_f64(x: f64) -> Self {
845 x as Self
846 }
847
848 #[expect(clippy::cast_precision_loss)]
849 fn from_usize(n: usize) -> Self {
850 n as Self
851 }
852
853 #[expect(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
854 fn into_usize(self) -> usize {
855 self as usize
856 }
857
858 fn u01x2(hash: u64) -> (ClosedOpenUnit<Self>, OpenClosedUnit<Self>) {
859 #[expect(clippy::cast_possible_truncation)] let (low, high) = (
861 (hash & u64::from(u32::MAX)) as u32,
862 ((hash >> 32) & u64::from(u32::MAX)) as u32,
863 );
864
865 #[expect(clippy::cast_precision_loss)]
868 let u0 = ClosedOpenUnit(((high >> 8) as Self) * Self::from_bits(0x3380_0000_u32)); #[expect(clippy::cast_precision_loss)]
873 let u1 = OpenClosedUnit((((low >> 8) + 1) as Self) * Self::from_bits(0x3380_0000_u32)); (u0, u1)
876 }
877}
878
879impl FloatExt for f64 {
880 const HALF: Self = 0.5;
881 const TWO: Self = 2.0;
882
883 fn from_f64(x: f64) -> Self {
884 x
885 }
886
887 #[expect(clippy::cast_precision_loss)]
888 fn from_usize(n: usize) -> Self {
889 n as Self
890 }
891
892 #[expect(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
893 fn into_usize(self) -> usize {
894 self as usize
895 }
896
897 fn u01x2(hash: u64) -> (ClosedOpenUnit<Self>, OpenClosedUnit<Self>) {
898 #[expect(clippy::cast_precision_loss)]
901 let u0 =
902 ClosedOpenUnit(((hash >> 11) as Self) * Self::from_bits(0x3CA0_0000_0000_0000_u64)); let hash = seahash_diffuse(hash + 1);
905
906 #[expect(clippy::cast_precision_loss)]
909 let u1 = OpenClosedUnit(
910 (((hash >> 11) + 1) as Self) * Self::from_bits(0x3CA0_0000_0000_0000_u64),
911 ); (u0, u1)
914 }
915}
916
917#[cfg(test)]
918#[expect(clippy::unwrap_used, clippy::expect_used)]
919mod tests {
920 use ndarray_rand::rand_distr::{Distribution, Normal};
921 use ndarray_rand::RandomExt;
922
923 use super::*;
924
925 #[test]
926 fn gaussian_f32() {
927 test_error_decline::<f32>(
928 (100, 100),
929 Normal::new(42.0, 24.0).unwrap(),
930 42,
931 RandomProjectionKind::Gaussian,
932 );
933 }
934
935 #[test]
936 fn gaussian_f64() {
937 test_error_decline::<f64>(
938 (100, 100),
939 Normal::new(42.0, 24.0).unwrap(),
940 42,
941 RandomProjectionKind::Gaussian,
942 );
943 }
944
945 #[test]
946 fn achlioptas_sparse_f32() {
947 test_error_decline::<f32>(
948 (100, 100),
949 Normal::new(42.0, 24.0).unwrap(),
950 42,
951 RandomProjectionKind::Sparse {
952 density: Some(OpenClosedUnit(1.0 / 3.0)),
953 },
954 );
955 }
956
957 #[test]
958 fn achlioptas_sparse_f64() {
959 test_error_decline::<f64>(
960 (100, 100),
961 Normal::new(42.0, 24.0).unwrap(),
962 42,
963 RandomProjectionKind::Sparse {
964 density: Some(OpenClosedUnit(1.0 / 3.0)),
965 },
966 );
967 }
968
969 #[test]
970 fn ping_li_sparse_f32() {
971 test_error_decline::<f32>(
972 (100, 100),
973 Normal::new(42.0, 24.0).unwrap(),
974 42,
975 RandomProjectionKind::Sparse { density: None },
976 );
977 }
978
979 #[test]
980 fn ping_li_sparse_f64() {
981 test_error_decline::<f64>(
982 (100, 100),
983 Normal::new(42.0, 24.0).unwrap(),
984 42,
985 RandomProjectionKind::Sparse { density: None },
986 );
987 }
988
989 #[expect(clippy::needless_pass_by_value)]
990 fn test_error_decline<T: FloatExt + std::fmt::Display>(
991 shape: (usize, usize),
992 distribution: impl Distribution<T>,
993 seed: u64,
994 projection: RandomProjectionKind,
995 ) {
996 let data = Array::<T, Ix2>::random(shape, distribution);
997
998 let mut max_rmse = rmse(
999 &data,
1000 &roundtrip(&data, 42, OpenClosedUnit(1.0), &projection),
1001 );
1002
1003 for epsilon in [
1004 OpenClosedUnit(0.5),
1005 OpenClosedUnit(0.1),
1006 OpenClosedUnit(0.01),
1007 ] {
1008 let new_rmse = rmse(&data, &roundtrip(&data, seed, epsilon, &projection));
1009 assert!(
1010 new_rmse <= max_rmse,
1011 "{new_rmse} > {max_rmse} for {epsilon}",
1012 epsilon = epsilon.0
1013 );
1014 max_rmse = new_rmse;
1015 }
1016 }
1017
1018 fn roundtrip<T: FloatExt>(
1019 data: &Array<T, Ix2>,
1020 seed: u64,
1021 epsilon: OpenClosedUnit<f64>,
1022 projection: &RandomProjectionKind,
1023 ) -> Array<T, Ix2> {
1024 let projected = project_with_projection(
1025 data.view(),
1026 seed,
1027 &RandomProjectionReduction::JohnsonLindenstrauss { epsilon },
1028 projection,
1029 )
1030 .expect("projecting must not fail");
1031 let reconstructed = reconstruct_with_projection(projected, seed, projection)
1032 .expect("reconstruction must not fail");
1033 #[expect(clippy::let_and_return)]
1034 reconstructed
1035 }
1036
1037 fn rmse<T: FloatExt>(data: &Array<T, Ix2>, reconstructed: &Array<T, Ix2>) -> T {
1038 let mut err = T::ZERO;
1039
1040 for (&d, &r) in data.iter().zip(reconstructed) {
1041 err += (d - r) * (d - r);
1042 }
1043
1044 let mse = err * T::from_usize(data.len()).recip();
1045 mse.sqrt()
1046 }
1047}