1use std::{borrow::Cow, num::NonZeroUsize, ops::AddAssign};
21
22use ndarray::{s, Array, ArrayBase, ArrayViewMut, Data, Dimension, Ix2, ShapeError, Zip};
23use num_traits::{ConstOne, ConstZero, Float, FloatConst};
24use numcodecs::{
25 AnyArray, AnyArrayAssignError, AnyArrayDType, AnyArrayView, AnyArrayViewMut, AnyCowArray,
26 Codec, StaticCodec, StaticCodecConfig, StaticCodecVersion,
27};
28use schemars::{json_schema, JsonSchema, Schema, SchemaGenerator};
29use serde::{Deserialize, Deserializer, Serialize, Serializer};
30use thiserror::Error;
31
32#[cfg(test)]
33use ::serde_json as _;
34
35#[derive(Clone, Serialize, Deserialize, JsonSchema)]
51pub struct RandomProjectionCodec {
53 pub seed: u64,
55 #[serde(flatten)]
57 pub reduction: RandomProjectionReduction,
58 #[serde(flatten)]
60 pub projection: RandomProjectionKind,
61 #[serde(default, rename = "_version")]
63 pub version: StaticCodecVersion<0, 1, 0>,
64}
65
66#[derive(Clone, Serialize, Deserialize, JsonSchema)]
68#[serde(tag = "reduction", rename_all = "kebab-case")]
70pub enum RandomProjectionReduction {
71 JohnsonLindenstrauss {
74 epsilon: OpenClosedUnit<f64>,
76 },
77 Explicit {
80 k: NonZeroUsize,
82 },
83}
84
85#[derive(Clone, Serialize, Deserialize, JsonSchema)]
87#[serde(tag = "projection", rename_all = "kebab-case")]
89pub enum RandomProjectionKind {
90 Gaussian,
93 Sparse {
104 #[serde(default, skip_serializing_if = "Option::is_none")]
124 density: Option<OpenClosedUnit<f64>>,
125 },
126}
127
128impl Codec for RandomProjectionCodec {
129 type Error = RandomProjectionCodecError;
130
131 fn encode(&self, data: AnyCowArray) -> Result<AnyArray, Self::Error> {
132 match data {
133 AnyCowArray::F32(data) => Ok(AnyArray::F32(
134 project_with_projection(data, self.seed, &self.reduction, &self.projection)?
135 .into_dyn(),
136 )),
137 AnyCowArray::F64(data) => Ok(AnyArray::F64(
138 project_with_projection(data, self.seed, &self.reduction, &self.projection)?
139 .into_dyn(),
140 )),
141 encoded => Err(RandomProjectionCodecError::UnsupportedDtype(
142 encoded.dtype(),
143 )),
144 }
145 }
146
147 fn decode(&self, encoded: AnyCowArray) -> Result<AnyArray, Self::Error> {
148 match encoded {
149 AnyCowArray::F32(encoded) => Ok(AnyArray::F32(
150 reconstruct_with_projection(encoded, self.seed, &self.projection)?.into_dyn(),
151 )),
152 AnyCowArray::F64(encoded) => Ok(AnyArray::F64(
153 reconstruct_with_projection(encoded, self.seed, &self.projection)?.into_dyn(),
154 )),
155 encoded => Err(RandomProjectionCodecError::UnsupportedDtype(
156 encoded.dtype(),
157 )),
158 }
159 }
160
161 fn decode_into(
162 &self,
163 encoded: AnyArrayView,
164 decoded: AnyArrayViewMut,
165 ) -> Result<(), Self::Error> {
166 match (encoded, decoded) {
167 (AnyArrayView::F32(encoded), AnyArrayViewMut::F32(decoded)) => {
168 reconstruct_into_with_projection(encoded, decoded, self.seed, &self.projection)
169 }
170 (AnyArrayView::F64(encoded), AnyArrayViewMut::F64(decoded)) => {
171 reconstruct_into_with_projection(encoded, decoded, self.seed, &self.projection)
172 }
173 (encoded @ (AnyArrayView::F32(_) | AnyArrayView::F64(_)), decoded) => {
174 Err(RandomProjectionCodecError::MismatchedDecodeIntoArray {
175 source: AnyArrayAssignError::DTypeMismatch {
176 src: encoded.dtype(),
177 dst: decoded.dtype(),
178 },
179 })
180 }
181 (encoded, _decoded) => Err(RandomProjectionCodecError::UnsupportedDtype(
182 encoded.dtype(),
183 )),
184 }
185 }
186}
187
188impl StaticCodec for RandomProjectionCodec {
189 const CODEC_ID: &'static str = "random-projection.rs";
190
191 type Config<'de> = Self;
192
193 fn from_config(config: Self::Config<'_>) -> Self {
194 config
195 }
196
197 fn get_config(&self) -> StaticCodecConfig<Self> {
198 StaticCodecConfig::from(self)
199 }
200}
201
202#[derive(Debug, Error)]
203pub enum RandomProjectionCodecError {
205 #[error("RandomProjection does not support the dtype {0}")]
207 UnsupportedDtype(AnyArrayDType),
208 #[error("RandomProjection only supports matrix / 2d-shaped arrays")]
210 NonMatrixData {
211 #[from]
213 source: ShapeError,
214 },
215 #[error("RandomProjection does not support non-finite (infinite or NaN) floating point data")]
218 NonFiniteData,
219 #[error("RandomProjection cannot encode or decode from an array with {input} samples to an array with {output} samples")]
222 NumberOfSamplesMismatch {
223 input: usize,
225 output: usize,
227 },
228 #[error("RandomProjection cannot decode from an array with zero dimensionality `K`")]
231 ProjectedArrayZeroComponents,
232 #[error("RandomProjection cannot decode from an array with corrupted dimensionality metadata")]
235 CorruptedNumberOfComponents,
236 #[error("RandomProjection cannot decode into an array with {output} features that differs from the {metadata} features stored in the encoded metadata")]
239 NumberOfFeaturesMismatch {
240 metadata: usize,
242 output: usize,
244 },
245 #[error("RandomProjection cannot decode into the provided array")]
247 MismatchedDecodeIntoArray {
248 #[from]
250 source: AnyArrayAssignError,
251 },
252}
253
254pub fn project_with_projection<T: FloatExt, S: Data<Elem = T>, D: Dimension>(
266 data: ArrayBase<S, D>,
267 seed: u64,
268 reduction: &RandomProjectionReduction,
269 projection: &RandomProjectionKind,
270) -> Result<Array<T, Ix2>, RandomProjectionCodecError> {
271 let data = data
272 .into_dimensionality()
273 .map_err(|err| RandomProjectionCodecError::NonMatrixData { source: err })?;
274
275 let (n, d) = data.dim();
276
277 let k = match reduction {
278 RandomProjectionReduction::JohnsonLindenstrauss { epsilon } => {
279 johnson_lindenstrauss_min_k(n, *epsilon)
280 }
281 RandomProjectionReduction::Explicit { k } => k.get(),
282 };
283
284 let mut projected = Array::<T, Ix2>::from_elem((n, k + 1), T::ZERO);
285
286 for p in projected.slice_mut(s!(.., k)) {
289 *p = T::from_usize(d);
290 }
291
292 match projection {
293 RandomProjectionKind::Gaussian => project_into(
294 data,
295 projected.slice_mut(s!(.., ..k)),
296 |x, y| gaussian_project(x, y, seed),
297 gaussian_normaliser(k),
298 ),
299 RandomProjectionKind::Sparse { density } => {
300 let density = density_or_ping_li_minimum(*density, d);
301 project_into(
302 data,
303 projected.slice_mut(s!(.., ..k)),
304 |x, y| sparse_project(x, y, density, seed),
305 sparse_normaliser(k, density),
306 )
307 }
308 }?;
309
310 Ok(projected)
311}
312
313#[expect(clippy::needless_pass_by_value)]
314pub fn project_into<T: FloatExt, S: Data<Elem = T>>(
329 data: ArrayBase<S, Ix2>,
330 mut projected: ArrayViewMut<T, Ix2>,
331 projection: impl Fn(usize, usize) -> T,
332 normalizer: T,
333) -> Result<(), RandomProjectionCodecError> {
334 let (n, d) = data.dim();
335 let (n2, _k) = projected.dim();
336
337 if n2 != n {
338 return Err(RandomProjectionCodecError::NumberOfSamplesMismatch {
339 input: n,
340 output: n2,
341 });
342 }
343
344 let mut skip_projection_column = Vec::with_capacity(d);
345
346 for (j, projected_j) in projected.columns_mut().into_iter().enumerate() {
347 skip_projection_column.clear();
352 for l in 0..d {
353 let p = projection(l, j);
354 if !p.is_zero() {
355 skip_projection_column.push((l, p));
356 }
357 }
358
359 for (data_i, projected_ij) in data.rows().into_iter().zip(projected_j) {
360 let mut acc = T::ZERO;
361
362 #[expect(clippy::indexing_slicing)]
363 for &(l, p) in &skip_projection_column {
365 acc += data_i[l] * p;
366 }
367
368 *projected_ij = acc * normalizer;
369 }
370 }
371
372 if !Zip::from(projected).all(|x| x.is_finite()) {
373 return Err(RandomProjectionCodecError::NonFiniteData);
374 }
375
376 Ok(())
377}
378
379pub fn reconstruct_with_projection<T: FloatExt, S: Data<Elem = T>, D: Dimension>(
397 projected: ArrayBase<S, D>,
398 seed: u64,
399 projection: &RandomProjectionKind,
400) -> Result<Array<T, Ix2>, RandomProjectionCodecError> {
401 let projected = projected
402 .into_dimensionality()
403 .map_err(|err| RandomProjectionCodecError::NonMatrixData { source: err })?;
404
405 if projected.is_empty() {
406 return Ok(projected.to_owned());
407 }
408
409 let (_n, k): (usize, usize) = projected.dim();
410 let Some(k) = k.checked_sub(1) else {
411 return Err(RandomProjectionCodecError::ProjectedArrayZeroComponents);
412 };
413
414 let ds = projected.slice(s!(.., k));
417 let Ok(Some(d)) = ds.fold(Ok(None), |acc, d| match acc {
418 Ok(None) => Ok(Some(d.into_usize())),
419 Ok(Some(d2)) if d2 == d.into_usize() => Ok(Some(d2)),
420 _ => Err(()),
421 }) else {
422 return Err(RandomProjectionCodecError::CorruptedNumberOfComponents);
423 };
424
425 let projected = projected.slice_move(s!(.., ..k));
427
428 match projection {
429 RandomProjectionKind::Gaussian => reconstruct(
430 projected,
431 d,
432 |x, y| gaussian_project(x, y, seed),
433 gaussian_normaliser(k),
434 ),
435 RandomProjectionKind::Sparse { density } => {
436 let density = density_or_ping_li_minimum(*density, d);
437 reconstruct(
438 projected,
439 d,
440 |x, y| sparse_project(x, y, density, seed),
441 sparse_normaliser(k, density),
442 )
443 }
444 }
445}
446
447#[expect(clippy::needless_pass_by_value)]
448pub fn reconstruct<T: FloatExt, S: Data<Elem = T>>(
461 projected: ArrayBase<S, Ix2>,
462 d: usize,
463 projection: impl Fn(usize, usize) -> T,
464 normalizer: T,
465) -> Result<Array<T, Ix2>, RandomProjectionCodecError> {
466 if projected.is_empty() {
467 return Ok(projected.to_owned());
468 }
469
470 let (n, k) = projected.dim();
471
472 let mut reconstructed = Array::<T, Ix2>::from_elem((n, d), T::ZERO);
473
474 let mut skip_projection_row = Vec::with_capacity(k);
475
476 for (l, reconstructed_l) in reconstructed.columns_mut().into_iter().enumerate() {
477 skip_projection_row.clear();
482 for j in 0..k {
483 let p = projection(l, j);
484 if !p.is_zero() {
485 skip_projection_row.push((j, p));
486 }
487 }
488
489 for (projected_i, reconstructed_il) in projected.rows().into_iter().zip(reconstructed_l) {
490 let mut acc = T::ZERO;
491
492 #[expect(clippy::indexing_slicing)]
493 for &(j, p) in &skip_projection_row {
495 acc += projected_i[j] * p;
496 }
497
498 *reconstructed_il = acc * normalizer;
499 }
500 }
501
502 if !Zip::from(&reconstructed).all(|x| x.is_finite()) {
503 return Err(RandomProjectionCodecError::NonFiniteData);
504 }
505
506 Ok(reconstructed)
507}
508
509pub fn reconstruct_into_with_projection<T: FloatExt, S: Data<Elem = T>, D: Dimension>(
531 projected: ArrayBase<S, D>,
532 reconstructed: ArrayViewMut<T, D>,
533 seed: u64,
534 projection: &RandomProjectionKind,
535) -> Result<(), RandomProjectionCodecError> {
536 let projected: ArrayBase<S, Ix2> = projected
537 .into_dimensionality()
538 .map_err(|err| RandomProjectionCodecError::NonMatrixData { source: err })?;
539 let reconstructed: ArrayViewMut<T, Ix2> = reconstructed
540 .into_dimensionality()
541 .map_err(|err| RandomProjectionCodecError::NonMatrixData { source: err })?;
542
543 let (n, k) = projected.dim();
544 let (n2, d2) = reconstructed.dim();
545
546 if n2 != n {
547 return Err(RandomProjectionCodecError::NumberOfSamplesMismatch {
548 input: n,
549 output: n2,
550 });
551 }
552
553 let Some(k) = k.checked_sub(1) else {
554 return Err(RandomProjectionCodecError::ProjectedArrayZeroComponents);
555 };
556
557 let ds = projected.slice(s!(.., k));
560 let Ok(Some(d)) = ds.fold(Ok(None), |acc, d| match acc {
561 Ok(None) => Ok(Some(d.into_usize())),
562 Ok(Some(d2)) if d2 == d.into_usize() => Ok(Some(d2)),
563 _ => Err(()),
564 }) else {
565 return Err(RandomProjectionCodecError::CorruptedNumberOfComponents);
566 };
567
568 if d2 != d {
569 return Err(RandomProjectionCodecError::NumberOfFeaturesMismatch {
570 metadata: d,
571 output: d2,
572 });
573 }
574
575 let projected = projected.slice_move(s!(.., ..k));
577
578 match projection {
579 RandomProjectionKind::Gaussian => reconstruct_into(
580 projected,
581 reconstructed,
582 |x, y| gaussian_project(x, y, seed),
583 gaussian_normaliser(k),
584 ),
585 RandomProjectionKind::Sparse { density } => {
586 let density = density_or_ping_li_minimum(*density, d);
587 reconstruct_into(
588 projected,
589 reconstructed,
590 |x, y| sparse_project(x, y, density, seed),
591 sparse_normaliser(k, density),
592 )
593 }
594 }
595}
596
597#[expect(clippy::needless_pass_by_value)]
598pub fn reconstruct_into<T: FloatExt, S: Data<Elem = T>>(
613 projected: ArrayBase<S, Ix2>,
614 mut reconstructed: ArrayViewMut<T, Ix2>,
615 projection: impl Fn(usize, usize) -> T,
616 normalizer: T,
617) -> Result<(), RandomProjectionCodecError> {
618 let (n, k) = projected.dim();
619 let (n2, _d) = reconstructed.dim();
620
621 if n2 != n {
622 return Err(RandomProjectionCodecError::NumberOfSamplesMismatch {
623 input: n,
624 output: n2,
625 });
626 }
627
628 let mut skip_projection_row = Vec::with_capacity(k);
629
630 for (l, reconstructed_l) in reconstructed.columns_mut().into_iter().enumerate() {
631 skip_projection_row.clear();
636 for j in 0..k {
637 let p = projection(l, j);
638 if !p.is_zero() {
639 skip_projection_row.push((j, p));
640 }
641 }
642
643 for (projected_i, reconstructed_il) in projected.rows().into_iter().zip(reconstructed_l) {
644 let mut acc = T::ZERO;
645
646 #[expect(clippy::indexing_slicing)]
647 for &(j, p) in &skip_projection_row {
649 acc += projected_i[j] * p;
650 }
651
652 *reconstructed_il = acc * normalizer;
653 }
654 }
655
656 if !Zip::from(reconstructed).all(|x| x.is_finite()) {
657 return Err(RandomProjectionCodecError::NonFiniteData);
658 }
659
660 Ok(())
661}
662
663#[must_use]
673pub fn johnson_lindenstrauss_min_k(
674 n_samples: usize,
675 OpenClosedUnit(epsilon): OpenClosedUnit<f64>,
676) -> usize {
677 #[expect(clippy::suboptimal_flops)]
678 let denominator = (epsilon * epsilon * 0.5) - (epsilon * epsilon * epsilon / 3.0);
679 #[expect(clippy::cast_precision_loss)]
680 let min_k = (n_samples as f64).ln() * 4.0 / denominator;
681 #[expect(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
682 let min_k = min_k as usize;
683 min_k
684}
685
686#[must_use]
696pub fn density_or_ping_li_minimum<T: FloatExt>(
697 density: Option<OpenClosedUnit<f64>>,
698 d: usize,
699) -> T {
700 match density {
701 Some(OpenClosedUnit(density)) => T::from_f64(density),
702 None => T::from_usize(d).sqrt().recip(),
703 }
704}
705
706fn gaussian_project<T: FloatExt>(x: usize, y: usize, seed: u64) -> T {
709 let (ClosedOpenUnit(u0), OpenClosedUnit(u1)) = T::u01x2(hash_matrix_index(x, y, seed));
710
711 let theta = -T::TAU() * u0;
712 let r = (-T::TWO * u1.ln()).sqrt();
713
714 r * theta.sin()
715}
716
717fn gaussian_normaliser<T: FloatExt>(k: usize) -> T {
718 T::from_usize(k).sqrt().recip()
719}
720
721fn sparse_project<T: FloatExt>(x: usize, y: usize, density: T, seed: u64) -> T {
725 let (ClosedOpenUnit(u0), _u1) = T::u01x2(hash_matrix_index(x, y, seed));
726
727 if u0 < (density * T::HALF) {
728 -T::ONE
729 } else if u0 < density {
730 T::ONE
731 } else {
732 T::ZERO
733 }
734}
735
736fn sparse_normaliser<T: FloatExt>(k: usize, density: T) -> T {
737 (T::from_usize(k) * density).recip().sqrt()
738}
739
740const fn hash_matrix_index(x: usize, y: usize, seed: u64) -> u64 {
741 seahash_diffuse(seahash_diffuse(x as u64) ^ seed ^ (y as u64))
742}
743
744const fn seahash_diffuse(mut x: u64) -> u64 {
745 x = x.wrapping_mul(0x6eed_0e9d_a4d9_4a4f);
755
756 let a = x >> 32;
757 let b = x >> 60;
758
759 x ^= a >> b;
760
761 x = x.wrapping_mul(0x6eed_0e9d_a4d9_4a4f);
762
763 x
764}
765
766#[expect(clippy::derive_partial_eq_without_eq)] #[derive(Copy, Clone, PartialEq, PartialOrd, Hash)]
768pub struct ClosedOpenUnit<T: FloatExt>(T);
770
771#[expect(clippy::derive_partial_eq_without_eq)] #[derive(Copy, Clone, PartialEq, PartialOrd, Hash)]
773pub struct OpenClosedUnit<T: FloatExt>(T);
775
776impl Serialize for OpenClosedUnit<f64> {
777 fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
778 serializer.serialize_f64(self.0)
779 }
780}
781
782impl<'de> Deserialize<'de> for OpenClosedUnit<f64> {
783 fn deserialize<D: Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
784 let x = f64::deserialize(deserializer)?;
785
786 if x > 0.0 && x <= 1.0 {
787 Ok(Self(x))
788 } else {
789 Err(serde::de::Error::invalid_value(
790 serde::de::Unexpected::Float(x),
791 &"a value in (0.0, 1.0]",
792 ))
793 }
794 }
795}
796
797impl JsonSchema for OpenClosedUnit<f64> {
798 fn schema_name() -> Cow<'static, str> {
799 Cow::Borrowed("OpenClosedUnitF64")
800 }
801
802 fn schema_id() -> Cow<'static, str> {
803 Cow::Borrowed(concat!(module_path!(), "::", "OpenClosedUnit<f64>"))
804 }
805
806 fn json_schema(_gen: &mut SchemaGenerator) -> Schema {
807 json_schema!({
808 "type": "number",
809 "exclusiveMinimum": 0.0,
810 "maximum": 1.0,
811 })
812 }
813}
814
815pub trait FloatExt: Float + ConstZero + ConstOne + FloatConst + AddAssign {
817 const HALF: Self;
819 const TWO: Self;
821
822 #[must_use]
824 fn from_f64(x: f64) -> Self;
825
826 #[must_use]
828 fn from_usize(n: usize) -> Self;
829
830 #[must_use]
832 fn into_usize(self) -> usize;
833
834 #[must_use]
839 fn u01x2(hash: u64) -> (ClosedOpenUnit<Self>, OpenClosedUnit<Self>);
840}
841
842impl FloatExt for f32 {
843 const HALF: Self = 0.5;
844 const TWO: Self = 2.0;
845
846 #[expect(clippy::cast_possible_truncation)]
847 fn from_f64(x: f64) -> Self {
848 x as Self
849 }
850
851 #[expect(clippy::cast_precision_loss)]
852 fn from_usize(n: usize) -> Self {
853 n as Self
854 }
855
856 #[expect(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
857 fn into_usize(self) -> usize {
858 self as usize
859 }
860
861 fn u01x2(hash: u64) -> (ClosedOpenUnit<Self>, OpenClosedUnit<Self>) {
862 #[expect(clippy::cast_possible_truncation)] let (low, high) = (
864 (hash & u64::from(u32::MAX)) as u32,
865 ((hash >> 32) & u64::from(u32::MAX)) as u32,
866 );
867
868 #[expect(clippy::cast_precision_loss)]
871 let u0 = ClosedOpenUnit(((high >> 8) as Self) * Self::from_bits(0x3380_0000_u32)); #[expect(clippy::cast_precision_loss)]
876 let u1 = OpenClosedUnit((((low >> 8) + 1) as Self) * Self::from_bits(0x3380_0000_u32)); (u0, u1)
879 }
880}
881
882impl FloatExt for f64 {
883 const HALF: Self = 0.5;
884 const TWO: Self = 2.0;
885
886 fn from_f64(x: f64) -> Self {
887 x
888 }
889
890 #[expect(clippy::cast_precision_loss)]
891 fn from_usize(n: usize) -> Self {
892 n as Self
893 }
894
895 #[expect(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
896 fn into_usize(self) -> usize {
897 self as usize
898 }
899
900 fn u01x2(hash: u64) -> (ClosedOpenUnit<Self>, OpenClosedUnit<Self>) {
901 #[expect(clippy::cast_precision_loss)]
904 let u0 =
905 ClosedOpenUnit(((hash >> 11) as Self) * Self::from_bits(0x3CA0_0000_0000_0000_u64)); let hash = seahash_diffuse(hash + 1);
908
909 #[expect(clippy::cast_precision_loss)]
912 let u1 = OpenClosedUnit(
913 (((hash >> 11) + 1) as Self) * Self::from_bits(0x3CA0_0000_0000_0000_u64),
914 ); (u0, u1)
917 }
918}
919
920#[cfg(test)]
921#[expect(clippy::unwrap_used, clippy::expect_used)]
922mod tests {
923 use ndarray_rand::rand_distr::{Distribution, Normal};
924 use ndarray_rand::RandomExt;
925
926 use super::*;
927
928 #[test]
929 fn gaussian_f32() {
930 test_error_decline::<f32>(
931 (100, 100),
932 Normal::new(42.0, 24.0).unwrap(),
933 42,
934 RandomProjectionKind::Gaussian,
935 );
936 }
937
938 #[test]
939 fn gaussian_f64() {
940 test_error_decline::<f64>(
941 (100, 100),
942 Normal::new(42.0, 24.0).unwrap(),
943 42,
944 RandomProjectionKind::Gaussian,
945 );
946 }
947
948 #[test]
949 fn achlioptas_sparse_f32() {
950 test_error_decline::<f32>(
951 (100, 100),
952 Normal::new(42.0, 24.0).unwrap(),
953 42,
954 RandomProjectionKind::Sparse {
955 density: Some(OpenClosedUnit(1.0 / 3.0)),
956 },
957 );
958 }
959
960 #[test]
961 fn achlioptas_sparse_f64() {
962 test_error_decline::<f64>(
963 (100, 100),
964 Normal::new(42.0, 24.0).unwrap(),
965 42,
966 RandomProjectionKind::Sparse {
967 density: Some(OpenClosedUnit(1.0 / 3.0)),
968 },
969 );
970 }
971
972 #[test]
973 fn ping_li_sparse_f32() {
974 test_error_decline::<f32>(
975 (100, 100),
976 Normal::new(42.0, 24.0).unwrap(),
977 42,
978 RandomProjectionKind::Sparse { density: None },
979 );
980 }
981
982 #[test]
983 fn ping_li_sparse_f64() {
984 test_error_decline::<f64>(
985 (100, 100),
986 Normal::new(42.0, 24.0).unwrap(),
987 42,
988 RandomProjectionKind::Sparse { density: None },
989 );
990 }
991
992 #[expect(clippy::needless_pass_by_value)]
993 fn test_error_decline<T: FloatExt + std::fmt::Display>(
994 shape: (usize, usize),
995 distribution: impl Distribution<T>,
996 seed: u64,
997 projection: RandomProjectionKind,
998 ) {
999 let data = Array::<T, Ix2>::random(shape, distribution);
1000
1001 let mut max_rmse = rmse(
1002 &data,
1003 &roundtrip(&data, 42, OpenClosedUnit(1.0), &projection),
1004 );
1005
1006 for epsilon in [
1007 OpenClosedUnit(0.5),
1008 OpenClosedUnit(0.1),
1009 OpenClosedUnit(0.01),
1010 ] {
1011 let new_rmse = rmse(&data, &roundtrip(&data, seed, epsilon, &projection));
1012 assert!(
1013 new_rmse <= max_rmse,
1014 "{new_rmse} > {max_rmse} for {epsilon}",
1015 epsilon = epsilon.0
1016 );
1017 max_rmse = new_rmse;
1018 }
1019 }
1020
1021 fn roundtrip<T: FloatExt>(
1022 data: &Array<T, Ix2>,
1023 seed: u64,
1024 epsilon: OpenClosedUnit<f64>,
1025 projection: &RandomProjectionKind,
1026 ) -> Array<T, Ix2> {
1027 let projected = project_with_projection(
1028 data.view(),
1029 seed,
1030 &RandomProjectionReduction::JohnsonLindenstrauss { epsilon },
1031 projection,
1032 )
1033 .expect("projecting must not fail");
1034 let reconstructed = reconstruct_with_projection(projected, seed, projection)
1035 .expect("reconstruction must not fail");
1036 #[expect(clippy::let_and_return)]
1037 reconstructed
1038 }
1039
1040 fn rmse<T: FloatExt>(data: &Array<T, Ix2>, reconstructed: &Array<T, Ix2>) -> T {
1041 let mut err = T::ZERO;
1042
1043 for (&d, &r) in data.iter().zip(reconstructed) {
1044 err += (d - r) * (d - r);
1045 }
1046
1047 let mse = err * T::from_usize(data.len()).recip();
1048 mse.sqrt()
1049 }
1050}