mlpack  2.0.1
lsh_search.hpp
Go to the documentation of this file.
1 
30 #ifndef __MLPACK_METHODS_NEIGHBOR_SEARCH_LSH_SEARCH_HPP
31 #define __MLPACK_METHODS_NEIGHBOR_SEARCH_LSH_SEARCH_HPP
32 
33 #include <mlpack/core.hpp>
34 #include <vector>
35 #include <string>
36 
39 
40 namespace mlpack {
41 namespace neighbor {
42 
50 template<typename SortPolicy = NearestNeighborSort>
51 class LSHSearch
52 {
53  public:
74  LSHSearch(const arma::mat& referenceSet,
75  const size_t numProj,
76  const size_t numTables,
77  const double hashWidth = 0.0,
78  const size_t secondHashSize = 99901,
79  const size_t bucketSize = 500);
80 
85  LSHSearch();
86 
90  ~LSHSearch();
91 
96  void Train(const arma::mat& referenceSet,
97  const size_t numProj,
98  const size_t numTables,
99  const double hashWidth = 0.0,
100  const size_t secondHashSize = 99901,
101  const size_t bucketSize = 500);
102 
122  void Search(const arma::mat& querySet,
123  const size_t k,
124  arma::Mat<size_t>& resultingNeighbors,
125  arma::mat& distances,
126  const size_t numTablesToSearch = 0);
127 
146  void Search(const size_t k,
147  arma::Mat<size_t>& resultingNeighbors,
148  arma::mat& distances,
149  const size_t numTablesToSearch = 0);
150 
156  template<typename Archive>
157  void Serialize(Archive& ar, const unsigned int /* version */);
158 
160  size_t DistanceEvaluations() const { return distanceEvaluations; }
163 
165  const arma::mat& ReferenceSet() const { return *referenceSet; }
166 
168  size_t NumProjections() const { return projections.size(); }
170  const arma::mat& Projection(const size_t i) const { return projections[i]; }
171 
173  const arma::mat& Offsets() const { return offsets; }
174 
176  const arma::vec& SecondHashWeights() const { return secondHashWeights; }
177 
179  size_t BucketSize() const { return bucketSize; }
180 
182  const arma::Mat<size_t>& SecondHashTable() const { return secondHashTable; }
183 
184  private:
198  void BuildHash();
199 
211  template<typename VecType>
212  void ReturnIndicesFromTable(const VecType& queryPoint,
213  arma::uvec& referenceIndices,
214  size_t numTablesToSearch) const;
215 
227  void BaseCase(const size_t queryIndex,
228  const size_t referenceIndex,
229  arma::Mat<size_t>& neighbors,
230  arma::mat& distances) const;
231 
244  void BaseCase(const size_t queryIndex,
245  const size_t referenceIndex,
246  const arma::mat& querySet,
247  arma::Mat<size_t>& neighbors,
248  arma::mat& distances) const;
249 
264  void InsertNeighbor(arma::mat& distances,
265  arma::Mat<size_t>& neighbors,
266  const size_t queryIndex,
267  const size_t pos,
268  const size_t neighbor,
269  const double distance) const;
270 
272  const arma::mat* referenceSet;
274  bool ownsSet;
275 
277  size_t numProj;
279  size_t numTables;
280 
282  std::vector<arma::mat> projections; // should be [numProj x dims] x numTables
283 
285  arma::mat offsets; // should be numProj x numTables
286 
288  double hashWidth;
289 
292 
294  arma::vec secondHashWeights;
295 
297  size_t bucketSize;
298 
300  arma::Mat<size_t> secondHashTable;
301 
304  arma::Col<size_t> bucketContentSize;
305 
308  arma::Col<size_t> bucketRowInHashTable;
309 
312 }; // class LSHSearch
313 
314 } // namespace neighbor
315 } // namespace mlpack
316 
317 // Include implementation.
318 #include "lsh_search_impl.hpp"
319 
320 #endif
void Train(const arma::mat &referenceSet, const size_t numProj, const size_t numTables, const double hashWidth=0.0, const size_t secondHashSize=99901, const size_t bucketSize=500)
Train the LSH model on the given dataset.
size_t numTables
The number of hash tables.
Definition: lsh_search.hpp:279
Linear algebra utility functions, generally performed on matrices or vectors.
size_t BucketSize() const
Get the bucket size of the second hash.
Definition: lsh_search.hpp:179
void BuildHash()
This function builds a hash table with two levels of hashing as presented in the paper.
LSHSearch()
Create an untrained LSH model.
size_t bucketSize
The bucket size of the second hash.
Definition: lsh_search.hpp:297
const arma::mat & Projection(const size_t i) const
Get the projection matrix of the given table.
Definition: lsh_search.hpp:170
const arma::mat & ReferenceSet() const
Return the reference dataset.
Definition: lsh_search.hpp:165
std::vector< arma::mat > projections
The std::vector containing the projection matrix of each table.
Definition: lsh_search.hpp:282
arma::Col< size_t > bucketContentSize
The number of elements present in each hash bucket; should be secondHashSize.
Definition: lsh_search.hpp:304
size_t DistanceEvaluations() const
Return the number of distance evaluations performed.
Definition: lsh_search.hpp:160
The LSHSearch class; this class builds a hash on the reference set and uses this hash to compute the ...
Definition: lsh_search.hpp:51
void Search(const arma::mat &querySet, const size_t k, arma::Mat< size_t > &resultingNeighbors, arma::mat &distances, const size_t numTablesToSearch=0)
Compute the nearest neighbors of the points in the given query set and store the output in the given ...
void ReturnIndicesFromTable(const VecType &queryPoint, arma::uvec &referenceIndices, size_t numTablesToSearch) const
This function takes a query and hashes it into each of the hash tables to get keys for the query and ...
size_t NumProjections() const
Get the number of projections.
Definition: lsh_search.hpp:168
void BaseCase(const size_t queryIndex, const size_t referenceIndex, arma::Mat< size_t > &neighbors, arma::mat &distances) const
This is a helper function that computes the distance of the query to the neighbor candidates and appr...
arma::mat offsets
The list of the offsets &#39;b&#39; for each of the projection for each table.
Definition: lsh_search.hpp:285
const arma::mat & Offsets() const
Get the offsets &#39;b&#39; for each of the projections. (One &#39;b&#39; per column.)
Definition: lsh_search.hpp:173
void Serialize(Archive &ar, const unsigned int)
Serialize the LSH model.
Include all of the base components required to write MLPACK methods, and the main MLPACK Doxygen docu...
size_t & DistanceEvaluations()
Modify the number of distance evaluations performed.
Definition: lsh_search.hpp:162
arma::Mat< size_t > secondHashTable
The final hash table; should be (< secondHashSize) x bucketSize.
Definition: lsh_search.hpp:300
const arma::vec & SecondHashWeights() const
Get the weights of the second hash.
Definition: lsh_search.hpp:176
size_t secondHashSize
The big prime representing the size of the second hash.
Definition: lsh_search.hpp:291
size_t numProj
The number of projections.
Definition: lsh_search.hpp:277
bool ownsSet
If true, we own the reference set.
Definition: lsh_search.hpp:274
void InsertNeighbor(arma::mat &distances, arma::Mat< size_t > &neighbors, const size_t queryIndex, const size_t pos, const size_t neighbor, const double distance) const
This is a helper function that efficiently inserts better neighbor candidates into an existing set of...
arma::vec secondHashWeights
The weights of the second hash.
Definition: lsh_search.hpp:294
const arma::mat * referenceSet
Reference dataset.
Definition: lsh_search.hpp:272
double hashWidth
The hash width.
Definition: lsh_search.hpp:288
const arma::Mat< size_t > & SecondHashTable() const
Get the second hash table.
Definition: lsh_search.hpp:182
arma::Col< size_t > bucketRowInHashTable
For a particular hash value, points to the row in secondHashTable corresponding to this value...
Definition: lsh_search.hpp:308
size_t distanceEvaluations
The number of distance evaluations.
Definition: lsh_search.hpp:311
~LSHSearch()
Clean memory.