8000 Implement fastpair by Mec-iS · Pull Request #142 · smartcorelib/smartcore · GitHub
[go: up one dir, main page]
More Web Proxy on the site http://driver.im/
Skip to content

Implement fastpair #142

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 18 commits into from
Aug 23, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ default = ["datasets"]
ndarray-bindings = ["ndarray"]
nalgebra-bindings = ["nalgebra"]
datasets = []
fp_bench = []

[dependencies]
ndarray = { version = "0.15", optional = true }
Expand All @@ -26,6 +27,7 @@ num = "0.4"
rand = "0.8"
rand_distr = "0.4"
serde = { version = "1", features = ["derive"], optional = true }
itertools = "0.10.3"

[target.'cfg(target_arch = "wasm32")'.dependencies]
getrandom = { version = "0.2", features = ["js"] }
Expand All @@ -46,3 +48,8 @@ harness = false
name = "naive_bayes"
harness = false
required-features = ["ndarray-bindings", "nalgebra-bindings"]

[[bench]]
name = "fastpair"
harness = false
required-features = ["fp_bench"]
56 changes: 56 additions & 0 deletions benches/fastpair.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion};

// to run this bench you have to change the declaraion in mod.rs ---> pub mod fastpair;
use smartcore::algorithm::neighbour::fastpair::FastPair;
use smartcore::linalg::naive::dense_matrix::*;
use std::time::Duration;

fn closest_pair_bench(n: usize, m: usize) -> () {
let x = DenseMatrix::<f64>::rand(n, m);
let fastpair = FastPair::new(&x);
let result = fastpair.unwrap();

result.closest_pair();
}

fn closest_pair_brute_bench(n: usize, m: usize) -> () {
let x = DenseMatrix::<f64>::rand(n, m);
let fastpair = FastPair::new(&x);
let result = fastpair.unwrap();

result.closest_pair_brute();
}

fn bench_fastpair(c: &mut Criterion) {
let mut group = c.benchmark_group("FastPair");

// with full samples size (100) the test will take too long
group.significance_level(0.1).sample_size(30);
// increase from default 5.0 secs
group.measurement_time(Duration::from_secs(60));

for n_samples in [100_usize, 1000_usize].iter() {
for n_features in [10_usize, 100_usize, 1000_usize].iter() {
group.bench_with_input(
BenchmarkId::from_parameter(format!(
"fastpair --- n_samples: {}, n_features: {}",
n_samples, n_features
)),
n_samples,
|b, _| b.iter(|| closest_pair_bench(*n_samples, *n_features)),
);
group.bench_with_input(
BenchmarkId::from_parameter(format!(
"brute --- n_samples: {}, n_features: {}",
n_samples, n_features
)),
n_samples,
|b, _| b.iter(|| closest_pair_brute_bench(*n_samples, *n_features)),
);
}
}
group.finish();
}

criterion_group!(benches, bench_fastpair);
criterion_main!(benches);
48 changes: 48 additions & 0 deletions src/algorithm/neighbour/distances.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
//!
//! Dissimilarities for vector-vector distance
//!
//! Representing distances as pairwise dissimilarities, so to build a
//! graph of closest neighbours. This representation can be reused for
//! different implementations (initially used in this library for FastPair).
use std::cmp::{Eq, Ordering, PartialOrd};

#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};

use crate::math::num::RealNumber;

///
/// The edge of the subgraph is defined by `PairwiseDistance`.
/// The calling algorithm can store a list of distsances as
/// a list of these structures.
///
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone, Copy)]
pub struct PairwiseDistance<T: RealNumber> {
/// index of the vector in the original `Matrix` or list
pub node: usize,

/// index of the closest neighbor in the original `Matrix` or same list
pub neighbour: Option<usize>,

/// measure of distance, according to the algorithm distance function
/// if the distance is None, the edge has value "infinite" or max distance
/// each algorithm has to match
pub distance: Option<T>,
}

impl<T: RealNumber> Eq for PairwiseDistance<T> {}

impl<T: RealNumber> PartialEq for PairwiseDistance<T> {
fn eq(&self, other: &Self) -> bool {
5A30 self.node == other.node
&& self.neighbour == other.neighbour
&& self.distance == other.distance
}
}

impl<T: RealNumber> PartialOrd for PairwiseDistance<T> {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
self.distance.partial_cmp(&other.distance)
}
}
Loading
0