HNSW#

HNSW is a graph-based nearest neighbors implementation for the CPU. This implementation provides the ability to serialize a CAGRA graph and read it as a base-layer-only hnswlib graph.

#include <raft/neighbors/hnsw.hpp>

namespace raft::neighbors::hnsw

template<typename T, typename IdxT>
std::unique_ptr<index<T>> from_cagra(raft::resources const &res, raft::neighbors::cagra::index<T, IdxT> cagra_index)#

Construct an hnswlib base-layer-only index from a CAGRA index NOTE: 1. This method uses the filesystem to write the CAGRA index in /tmp/cagra_index.bin before reading it as an hnswlib index, then deleting the temporary file.

  1. This function is only offered as a compiled symbol in libraft.so

Usage example:

// Build a CAGRA index
using namespace raft::neighbors;
// use default index parameters
cagra::index_params index_params;
// create and fill the index from a [N, D] dataset
auto index = cagra::build(res, index_params, dataset);

// Load CAGRA index as base-layer-only hnswlib index
auto hnsw_index = hnsw::from_cagra(res, index);

Template Parameters:
  • T – data element type

  • IdxT – type of the indices

Parameters:
  • res[in] raft resources

  • cagra_index[in] cagra index

template<>
std::unique_ptr<index<float>> from_cagra(raft::resources const &res, raft::neighbors::cagra::index<float, uint32_t> cagra_index)#
template<>
std::unique_ptr<index<int8_t>> from_cagra(raft::resources const &res, raft::neighbors::cagra::index<int8_t, uint32_t> cagra_index)#
template<>
std::unique_ptr<index<uint8_t>> from_cagra(raft::resources const &res, raft::neighbors::cagra::index<uint8_t, uint32_t> cagra_index)#
template<typename T>
void search(raft::resources const &res, const search_params &params, const index<T> &idx, raft::host_matrix_view<const T, int64_t, row_major> queries, raft::host_matrix_view<uint64_t, int64_t, row_major> neighbors, raft::host_matrix_view<float, int64_t, row_major> distances)#

Search hnswlib base-layer-only index constructed from a CAGRA index.

Usage example:

// Build a CAGRA index
using namespace raft::neighbors;
// use default index parameters
cagra::index_params index_params;
// create and fill the index from a [N, D] dataset
auto index = cagra::build(res, index_params, dataset);

// Save CAGRA index as base layer only hnswlib index
hnsw::serialize(res, "my_index.bin", index);

// Load CAGRA index as base layer only hnswlib index
raft::neighbors::hnsw::index* hnsw_index;
auto hnsw_index = hnsw::deserialize(res, "my_index.bin", D, raft::distance::L2Expanded);

// Search K nearest neighbors as an hnswlib index
// using host threads for concurrency
hnsw::search_params search_params;
search_params.ef = 50 // ef >= K;
search_params.num_threads = 10;
auto neighbors = raft::make_host_matrix<uint32_t>(res, n_queries, k);
auto distances = raft::make_host_matrix<float>(res, n_queries, k);
hnsw::search(res, search_params, *index, queries, neighbors, distances);
// de-allocate hnsw_index
delete hnsw_index;

Template Parameters:
  • T – data element type

  • IdxT – type of the indices

Parameters:
  • res[in] raft resources

  • params[in] configure the search

  • idx[in] cagra index

  • queries[in] a host matrix view to a row-major matrix [n_queries, index->dim()]

  • neighbors[out] a host matrix view to the indices of the neighbors in the source dataset [n_queries, k]

  • distances[out] a host matrix view to the distances to the selected neighbors [n_queries, k]

struct search_params : public raft::neighbors::ann::search_params#
#include <hnsw_types.hpp>
template<typename T>
struct index : public raft::neighbors::ann::index#
#include <hnsw_types.hpp>

Public Functions

inline index(int dim, raft::distance::DistanceType metric)#

load a base-layer-only hnswlib index originally saved from a built CAGRA index. This is a virtual class and it cannot be used directly. To create an index, use the factory function raft::neighbors::hnsw::from_cagra from the header raft/neighbors/hnsw.hpp

Parameters:
  • dim[in] dimensions of the training dataset

  • metric[in] distance metric to search. Supported metrics (“L2Expanded”, “InnerProduct”)

virtual auto get_index() const -> void const* = 0#

Get underlying index.

Serializer Methods#

#include <raft/neighbors/hnsw_serialize.cuh>

namespace raft::neighbors::hnsw

template<typename T>
std::unique_ptr<index<T>> deserialize(raft::resources const &handle, const std::string &filename, int dim, raft::distance::DistanceType metric)#

Load an hnswlib index which was serialized from a CAGRA index

Experimental, both the API and the serialization format are subject to change.

#include <raft/core/resources.hpp>

raft::resources handle;

// create a string with a filepath
std::string filename("/path/to/index");
// create an an unallocated pointer
int dim = 10;
raft::distance::DistanceType = raft::distance::L2Expanded
auto index = raft::deserialize(handle, filename, dim, metric);
Template Parameters:

T – data element type

Parameters:
  • handle[in] the raft handle

  • filename[in] the file name for saving the index

  • dim[in] dimensionality of the index

  • metric[in] metric used to build the index

Returns:

std::unique_ptr<index<T>>