Implementation:Haifengl Smile Spatial Index Constructors
Overview
This API Doc covers the constructors and factory methods for all seven spatial index structures in Smile's smile.neighbor package. These are the entry points for building spatial indexes from data, corresponding to Step 3 of the Nearest Neighbor Search workflow.
Import
import smile.neighbor.*;
import smile.math.distance.Metric;
import smile.math.distance.Distance;
import smile.math.distance.EuclideanDistance;
import smile.math.distance.EditDistance;
KDTree
Source: KDTree.java (Lines 119-147)
Signature
public class KDTree<E> implements KNNSearch<double[], E>, RNNSearch<double[], E>, Serializable {
/**
* Constructor.
* @param key the object keys (double[][] where each double[] is a point).
* @param data the data objects associated with each key.
* @throws IllegalArgumentException if key.length != data.length
*/
public KDTree(double[][] key, E[] data)
/**
* Factory method where data objects are the same as keys.
* @param data the data points (used as both keys and values).
* @return KDTree instance.
*/
public static KDTree<double[]> of(double[][] data)
}
Parameters
| Parameter | Type | Description |
|---|---|---|
key |
double[][] |
Point coordinates. Each key[i] is a d-dimensional vector. All must have the same length.
|
data |
E[] |
Associated data objects. data[i] corresponds to key[i].
|
Example
// Simple construction -- data is both key and value
double[][] points = MathEx.randn(10000, 3);
KDTree<double[]> tree = KDTree.of(points);
// Separate keys and values
double[][] features = loadFeatures(); // n x d
String[] labels = loadLabels(); // n labels
KDTree<String> labeledTree = new KDTree<>(features, labels);
CoverTree
Source: CoverTree.java (Lines 227-317)
Signature
public class CoverTree<K, V> implements KNNSearch<K, V>, RNNSearch<K, V>, Serializable {
/**
* Constructor with default base (1.3).
* @param keys the data keys.
* @param data the data objects.
* @param distance a metric distance measure.
*/
public CoverTree(K[] keys, V[] data, Metric<K> distance)
/**
* Constructor with custom base.
* @param keys the data keys.
* @param data the data objects.
* @param distance a metric distance measure.
* @param base the base of the expansion constant (default 1.3).
*/
public CoverTree(K[] keys, V[] data, Metric<K> distance, double base)
/**
* List-based constructors (same signatures with List<K>, List<V>).
*/
public CoverTree(List<K> keys, List<V> data, Metric<K> distance)
public CoverTree(List<K> keys, List<V> data, Metric<K> distance, double base)
/**
* Factory methods where data objects are the same as keys.
*/
public static <T> CoverTree<T, T> of(T[] data, Metric<T> distance)
public static <T> CoverTree<T, T> of(T[] data, Metric<T> distance, double base)
public static <T> CoverTree<T, T> of(List<T> data, Metric<T> distance)
public static <T> CoverTree<T, T> of(List<T> data, Metric<T> distance, double base)
}
Parameters
| Parameter | Type | Description |
|---|---|---|
keys |
K[] or List<K> |
Data keys of any type that the metric can compare. |
data |
V[] or List<V> |
Associated data objects. Must match keys in length. |
distance |
Metric<K> |
A metric distance function satisfying triangle inequality. |
base |
double |
Base of the expansion constant (default 1.3). Controls the granularity of tree levels. |
Example
import smile.math.distance.EuclideanDistance;
import smile.math.distance.ManhattanDistance;
// With Euclidean distance
double[][] data = loadData();
CoverTree<double[], double[]> eucTree = CoverTree.of(data, new EuclideanDistance());
// With Manhattan distance
CoverTree<double[], double[]> manTree = CoverTree.of(data, new ManhattanDistance());
// With custom base for finer granularity
CoverTree<double[], double[]> fineTree = CoverTree.of(data, new EuclideanDistance(), 1.1);
LSH
Source: LSH.java (Lines 91-175)
Signature
public class LSH<E> implements KNNSearch<double[], E>, RNNSearch<double[], E>, Serializable {
/**
* Constructor with data. Auto-computes L and k.
* L = max(50, n^0.25), k = max(3, log10(n)).
* @param keys the object keys.
* @param data the data objects.
* @param w the width of random projections.
*/
public LSH(double[][] keys, E[] data, double w)
/**
* Constructor with data and custom hash table size.
* @param keys the object keys.
* @param data the data objects.
* @param w the width of random projections.
* @param H the size of universal hash tables (must be >= keys.length).
*/
public LSH(double[][] keys, E[] data, double w, int H)
/**
* Constructor for empty LSH (populate with put()).
* @param d the dimensionality of data (must be >= 2).
* @param L the number of hash tables.
* @param k the number of random projections per hash value.
* @param w the width of random projections.
*/
public LSH(int d, int L, int k, double w)
/**
* Constructor for empty LSH with custom hash table size.
* @param d the dimensionality of data.
* @param L the number of hash tables.
* @param k the number of random projections per hash value.
* @param w the width of random projections.
* @param H the size of universal hash tables.
*/
public LSH(int d, int L, int k, double w, int H)
/**
* Inserts a key-value pair into the hash tables.
* @param key the key vector.
* @param value the associated data object.
*/
public void put(double[] key, E value)
}
Parameters
| Parameter | Type | Description | Default |
|---|---|---|---|
d |
int |
Dimensionality of input data. Must be >= 2. | Inferred from keys[0].length
|
L |
int |
Number of hash tables. More tables = higher recall, more memory. | max(50, n^0.25)
|
k |
int |
Number of random projections per hash. More = fewer collisions, lower recall. | max(3, log10(n))
|
w |
double |
Width of random projections. Controls bucket interval. Must be > 0. | User-specified |
H |
int |
Size of universal hash tables. Must be >= n. | 1017881 |
Example
// Batch construction with auto-parameters
double[][] data = loadHighDimData();
LSH<double[]> lsh = new LSH<>(data, data, 4.0);
// Incremental construction
int d = 128;
LSH<String> index = new LSH<>(d, 50, 5, 4.0);
for (int i = 0; i < data.length; i++) {
index.put(data[i], labels[i]);
}
MPLSH
Source: MPLSH.java (Lines 71-88)
Signature
public class MPLSH<E> extends LSH<E> {
/**
* Constructor.
* @param d the dimensionality of data.
* @param L the number of hash tables.
* @param k the number of random projections per hash value.
* @param w the width of random projections.
*/
public MPLSH(int d, int L, int k, double w)
/**
* Constructor with custom hash table size.
* @param d the dimensionality of data.
* @param L the number of hash tables.
* @param k the number of random projections per hash value.
* @param w the width of random projections.
* @param H the number of buckets of hash tables.
*/
public MPLSH(int d, int L, int k, double w, int H)
/**
* Fits the posteriori multiple probe algorithm.
* Must be called after populating the index with put() to enable multi-probe.
* @param range a range search structure (e.g., LinearSearch) for ground truth.
* @param samples training query samples.
* @param radius the radius for range search during training.
*/
public void fit(RNNSearch<double[], double[]> range, double[][] samples, double radius)
}
Example
import smile.neighbor.MPLSH;
import smile.neighbor.LinearSearch;
import smile.math.MathEx;
double[][] data = loadHighDimData();
int d = data[0].length;
// Build MPLSH with fewer hash tables than standard LSH
MPLSH<double[]> mplsh = new MPLSH<>(d, 10, 5, 4.0);
for (int i = 0; i < data.length; i++) {
mplsh.put(data[i], data[i]);
}
// Train multi-probe model using ground truth from linear search
LinearSearch<double[], double[]> baseline = LinearSearch.of(data, MathEx::distance);
double[][] samples = selectTrainingSamples(data, 500);
mplsh.fit(baseline, samples, 5.0);
RandomProjectionForest
Source: RandomProjectionForest.java (Lines 167-173)
Signature
public class RandomProjectionForest implements KNNSearch<double[], double[]> {
/**
* Builds a random projection forest (parallel construction).
* @param data the data set (double[][] where each row is a point).
* @param numTrees the number of trees.
* @param leafSize the maximum size of leaf nodes.
* @param angular true for angular metric, false for Euclidean.
* @return RandomProjectionForest instance.
*/
public static RandomProjectionForest of(double[][] data, int numTrees, int leafSize, boolean angular)
}
Parameters
| Parameter | Type | Description |
|---|---|---|
data |
double[][] |
Input data points. |
numTrees |
int |
Number of random projection trees. More trees = higher recall, more memory. |
leafSize |
int |
Maximum number of points per leaf. Smaller = deeper trees, higher recall. |
angular |
boolean |
If true, uses angular (cosine) distance. If false, uses Euclidean distance. |
Example
double[][] embeddings = loadWordEmbeddings(); // e.g., 300-dimensional
// Build forest with 10 trees, max 30 points per leaf, Euclidean distance
RandomProjectionForest rpf = RandomProjectionForest.of(embeddings, 10, 30, false);
// Build forest with angular distance for cosine similarity
RandomProjectionForest cosineRPF = RandomProjectionForest.of(embeddings, 15, 20, true);
BKTree
Source: BKTree.java (Lines 172-208)
Signature
public class BKTree<K, V> implements RNNSearch<K, V>, Serializable {
/**
* Constructor for an empty BK-tree.
* @param distance a discrete metric (integer-valued distances).
*/
public BKTree(Metric<K> distance)
/**
* Factory method. Builds a BK-tree from an array of data.
* @param data the data objects (used as both key and value).
* @param distance a discrete metric.
* @return BKTree instance.
*/
public static <T> BKTree<T, T> of(T[] data, Metric<T> distance)
/**
* Factory method. Builds a BK-tree from a list of data.
*/
public static <T> BKTree<T, T> of(List<T> data, Metric<T> distance)
/**
* Inserts a key-value pair.
* @param key the data key.
* @param value the data object.
*/
public void add(K key, V value)
/**
* Inserts multiple key-value pairs from a map.
* @param data the entries to insert.
*/
public void add(Map<K, V> data)
}
Example
import smile.math.distance.EditDistance;
// Batch construction from array
String[] words = {"hello", "help", "held", "hero", "helm", "world"};
BKTree<String, String> tree = BKTree.of(words, new EditDistance(true));
// Incremental construction
BKTree<String, String> dict = new BKTree<>(new EditDistance(true));
dict.add("algorithm", "algorithm");
dict.add("alignment", "alignment");
dict.add("allocate", "allocate");
LinearSearch
Source: LinearSearch.java (Lines 76-138)
Signature
public class LinearSearch<K, V> implements KNNSearch<K, V>, RNNSearch<K, V>, Serializable {
/**
* Constructor with separate keys and values (array).
* @param keys the data keys.
* @param data the data objects.
* @param distance the distance function.
*/
public LinearSearch(K[] keys, V[] data, Distance<K> distance)
/**
* Constructor with separate keys and values (list).
*/
public LinearSearch(List<K> keys, List<V> data, Distance<K> distance)
/**
* Constructor with key extraction function.
* @param data the data objects.
* @param distance the distance function on keys.
* @param key a function to extract the key from each data object.
*/
public LinearSearch(V[] data, Distance<K> distance, Function<V, K> key)
public LinearSearch(List<V> data, Distance<K> distance, Function<V, K> key)
/**
* Factory method where data is both key and value.
* @param data the data objects.
* @param distance the distance function.
* @return LinearSearch instance.
*/
public static <T> LinearSearch<T, T> of(T[] data, Distance<T> distance)
public static <T> LinearSearch<T, T> of(List<T> data, Distance<T> distance)
}
Example
import smile.math.MathEx;
// Simple construction with method reference
double[][] data = MathEx.randn(1000, 10);
LinearSearch<double[], double[]> search = LinearSearch.of(data, MathEx::distance);
// With key extraction function
record DataPoint(double[] features, String label) {}
DataPoint[] points = loadLabeledData();
LinearSearch<double[], DataPoint> labeled = new LinearSearch<>(
points, MathEx::distance, DataPoint::features
);
Source
| File | Constructor Lines | Repository Path |
|---|---|---|
| KDTree.java | L119-147 | base/src/main/java/smile/neighbor/KDTree.java |
| CoverTree.java | L227-317 | base/src/main/java/smile/neighbor/CoverTree.java |
| LSH.java | L91-175 | base/src/main/java/smile/neighbor/LSH.java |
| MPLSH.java | L71-88 | base/src/main/java/smile/neighbor/MPLSH.java |
| RandomProjectionForest.java | L167-173 | base/src/main/java/smile/neighbor/RandomProjectionForest.java |
| BKTree.java | L172-208 | base/src/main/java/smile/neighbor/BKTree.java |
| LinearSearch.java | L76-138 | base/src/main/java/smile/neighbor/LinearSearch.java |
Source: base/src/main/java/smile/neighbor/