Sparse Linear Algebra#
-
template<typename T>
size_t raft::sparse::linalg::csr_add_calc_inds(const int *a_ind, const int *a_indptr, const T *a_val, int nnz1, const int *b_ind, const int *b_indptr, const T *b_val, int nnz2, int m, int *out_ind, cudaStream_t stream)# Calculate the CSR row_ind array that would result from summing together two CSR matrices.
- Parameters:
a_ind – left hand row_ind array
a_indptr – left hand index_ptr array
a_val – left hand data array
nnz1 – size of left hand index_ptr and val arrays
b_ind – right hand row_ind array
b_indptr – right hand index_ptr array
b_val – right hand data array
nnz2 – size of right hand index_ptr and val arrays
m – size of output array (number of rows in final matrix)
out_ind – output row_ind array
stream – cuda stream to use
-
template<typename T>
void raft::sparse::linalg::csr_add_finalize(const int *a_ind, const int *a_indptr, const T *a_val, int nnz1, const int *b_ind, const int *b_indptr, const T *b_val, int nnz2, int m, int *c_ind, int *c_indptr, T *c_val, cudaStream_t stream)# Calculate the CSR row_ind array that would result from summing together two CSR matrices.
- Parameters:
a_ind – left hand row_ind array
a_indptr – left hand index_ptr array
a_val – left hand data array
nnz1 – size of left hand index_ptr and val arrays
b_ind – right hand row_ind array
b_indptr – right hand index_ptr array
b_val – right hand data array
nnz2 – size of right hand index_ptr and val arrays
m – size of output array (number of rows in final matrix)
c_ind – output row_ind array
c_indptr – output ind_ptr array
c_val – output data array
stream – cuda stream to use
-
template<typename T = int>
void raft::sparse::linalg::coo_degree(const T *rows, int nnz, T *results, cudaStream_t stream)# Count the number of values for each row.
- Template Parameters:
TPB_X – number of threads to use per block
- Parameters:
rows – rows array of the COO matrix
nnz – size of the rows array
results – output result array
stream – cuda stream to use
-
template<typename T>
void raft::sparse::linalg::coo_degree(COO<T> *in, int *results, cudaStream_t stream)# Count the number of values for each row.
- Template Parameters:
TPB_X – number of threads to use per block
T – type name of underlying values array
- Parameters:
in – input COO object for counting rows
results – output array with row counts (size=in->n_rows)
stream – cuda stream to use
-
template<typename T>
void raft::sparse::linalg::coo_degree_scalar(const int *rows, const T *vals, int nnz, T scalar, int *results, cudaStream_t stream = 0)# Count the number of values for each row that doesn’t match a particular scalar.
- Template Parameters:
TPB_X – number of threads to use per block
T – the type name of the underlying value arrays
- Parameters:
rows – Input COO row array
vals – Input COO val arrays
nnz – size of input COO arrays
scalar – scalar to match for counting rows
results – output row counts
stream – cuda stream to use
-
template<typename T>
void raft::sparse::linalg::coo_degree_scalar(COO<T> *in, T scalar, int *results, cudaStream_t stream)# Count the number of values for each row that doesn’t match a particular scalar.
- Template Parameters:
TPB_X – number of threads to use per block
T – the type name of the underlying value arrays
- Parameters:
in – Input COO array
scalar – scalar to match for counting rows
results – output row counts
stream – cuda stream to use
-
template<typename T>
void raft::sparse::linalg::coo_degree_nz(const int *rows, const T *vals, int nnz, int *results, cudaStream_t stream)# Count the number of nonzeros for each row.
- Template Parameters:
TPB_X – number of threads to use per block
T – the type name of the underlying value arrays
- Parameters:
rows – Input COO row array
vals – Input COO val arrays
nnz – size of input COO arrays
results – output row counts
stream – cuda stream to use
-
template<typename T>
void raft::sparse::linalg::coo_degree_nz(COO<T> *in, int *results, cudaStream_t stream)# Count the number of nonzero values for each row.
- Template Parameters:
TPB_X – number of threads to use per block
T – the type name of the underlying value arrays
- Parameters:
in – Input COO array
results – output row counts
stream – cuda stream to use
-
template<typename T>
void raft::sparse::linalg::csr_row_normalize_l1(const int *ia, const T *vals, int nnz, int m, T *result, cudaStream_t stream)# Perform L1 normalization on the rows of a given CSR-formatted sparse matrix.
- Parameters:
ia – row_ind array
vals – data array
nnz – size of data array
m – size of row_ind array
result – l1 normalized data array
stream – cuda stream to use
-
template<typename T>
void raft::sparse::linalg::csr_row_normalize_max(const int *ia, const T *vals, int nnz, int m, T *result, cudaStream_t stream)# Perform L_inf normalization on a given CSR-formatted sparse matrix.
- Parameters:
ia – row_ind array
vals – data array
nnz – size of data array
m – size of row_ind array
result – l1 normalized data array
stream – cuda stream to use
-
template<typename Type, typename IdxType = int, typename Lambda = raft::identity_op>
void raft::sparse::linalg::rowNormCsr(raft::resources const &handle, const IdxType *ia, const Type *data, const IdxType nnz, const IdxType N, Type *norm, raft::linalg::NormType type, Lambda fin_op = raft::identity_op())# Compute row-wise norm of the input matrix and perform fin_op lambda.
Row-wise norm is useful while computing pairwise distance matrix, for example. This is used in many clustering algos like knn, kmeans, dbscan, etc…
- Template Parameters:
Type – the data type
Lambda – device final lambda
IdxType – Integer type used to for addressing
- Parameters:
handle – raft handle
ia – the input matrix row index array
data – the input matrix nnz data
nnz – number of elements in data
N – number of rows
norm – the output vector of row-wise norm, size [N]
type – the type of norm to be applied
fin_op – the final lambda op
-
template<typename ValueType, typename IndexType, typename NZType, typename LayoutPolicyA, typename LayoutPolicyB>
void raft::sparse::linalg::sddmm(raft::resources const &handle, raft::device_matrix_view<const ValueType, IndexType, LayoutPolicyA> A, raft::device_matrix_view<const ValueType, IndexType, LayoutPolicyB> B, raft::device_csr_matrix_view<ValueType, IndexType, IndexType, NZType> C, const raft::linalg::Operation opA, const raft::linalg::Operation opB, raft::host_scalar_view<ValueType> alpha, raft::host_scalar_view<ValueType> beta)# This function performs the multiplication of dense matrix A and dense matrix B, followed by an element-wise multiplication with the sparsity pattern of C. It computes the following equation: C = alpha · (opA(A) * opB(B) ∘ spy(C)) + beta · C where A,B are device matrix views and C is a CSR device matrix view.
- Template Parameters:
ValueType – Data type of input/output matrices (float/double)
IndexType – Type of C
NZType – Type of C
LayoutPolicyA – layout of A
LayoutPolicyB – layout of B
- Parameters:
handle – [in] raft handle
A – [in] input raft::device_matrix_view
B – [in] input raft::device_matrix_view
C – [inout] output raft::device_csr_matrix_view
opA – [in] input Operation op(A)
opB – [in] input Operation op(B)
alpha – [in] input raft::host_scalar_view
beta – [in] input raft::host_scalar_view
-
template<typename ValueType, typename IndexType, typename NZType, typename LayoutPolicyY, typename LayoutPolicyZ>
void raft::sparse::linalg::spmm(raft::resources const &handle, const bool trans_x, const bool trans_y, const ValueType *alpha, raft::device_csr_matrix_view<const ValueType, int, int, NZType> x, raft::device_matrix_view<const ValueType, IndexType, LayoutPolicyY> y, const ValueType *beta, raft::device_matrix_view<ValueType, IndexType, LayoutPolicyZ> z)# SPMM function designed for handling all CSR * DENSE combinations of operand layouts for cuSparse. It computes the following equation: Z = alpha . X * Y + beta . Z where X is a CSR device matrix view and Y,Z are device matrix views.
- Template Parameters:
ValueType – Data type of input/output matrices (float/double)
IndexType – Type of Y and Z
NZType – Type of X
LayoutPolicyY – layout of Y
LayoutPolicyZ – layout of Z
- Parameters:
handle – [in] raft handle
trans_x – [in] transpose operation for X
trans_y – [in] transpose operation for Y
alpha – [in] scalar
x – [in] input raft::device_csr_matrix_view
y – [in] input raft::device_matrix_view
beta – [in] scalar
z – [inout] input-output raft::device_matrix_view
-
template<typename T, typename Lambda>
void raft::sparse::linalg::coo_symmetrize(COO<T> *in, COO<T> *out, Lambda reduction_op, cudaStream_t stream)# takes a COO matrix which may not be symmetric and symmetrizes it, running a custom reduction function against the each value and its transposed value.
- Parameters:
in – Input COO matrix
out – Output symmetrized COO matrix
reduction_op – a custom reduction function
stream – cuda stream to use
- template<typename value_idx = int64_t, typename value_t = float> RAFT_KERNEL raft::sparse::linalg::symmetric_find_size (const value_t __restrict__ *data, const value_idx __restrict__ *indices, const value_idx n, const int k, value_idx __restrict__ *row_sizes, value_idx __restrict__ *row_sizes2)
Find how much space needed in each row. We look through all datapoints and increment the count for each row.
TODO: This isn’t generalized. Remove in place of
symmetrize()
- Parameters:
data – Input knn distances(n, k)
indices – Input knn indices(n, k)
n – Number of rows
k – Number of n_neighbors
row_sizes – Input empty row sum 1 array(n)
row_sizes2 – Input empty row sum 2 array(n) for faster reduction
- template<typename value_idx> RAFT_KERNEL raft::sparse::linalg::reduce_find_size (const value_idx n, const int k, value_idx __restrict__ *row_sizes, const value_idx __restrict__ *row_sizes2)
Reduce sum(row_sizes) + k Reduction for symmetric_find_size kernel. Allows algo to be faster.
TODO: This isn’t generalized. Remove in place of
symmetrize()
- Parameters:
n – Number of rows
k – Number of n_neighbors
row_sizes – Input row sum 1 array(n)
row_sizes2 – Input row sum 2 array(n) for faster reduction
- template<typename value_idx = int64_t, typename value_t = float> RAFT_KERNEL raft::sparse::linalg::symmetric_sum (value_idx *__restrict__ edges, const value_t *__restrict__ data, const value_idx *__restrict__ indices, value_t *__restrict__ VAL, value_idx *__restrict__ COL, value_idx *__restrict__ ROW, const value_idx n, const int k)
Perform data + data.T operation. Can only run once row_sizes from the CSR matrix of data + data.T has been determined.
TODO: This isn’t generalized. Remove in place of
symmetrize()
- Parameters:
edges – Input row sum array(n) after reduction
data – Input knn distances(n, k)
indices – Input knn indices(n, k)
VAL – Output values for data + data.T
COL – Output column indices for data + data.T
ROW – Output row indices for data + data.T
n – Number of rows
k – Number of n_neighbors
- template<typename value_idx = int64_t, typename value_t = float, int TPB_X = 32, int TPB_Y = 32> void raft::sparse::linalg::from_knn_symmetrize_matrix (const value_idx *__restrict__ knn_indices, const value_t *__restrict__ knn_dists, const value_idx n, const int k, COO< value_t, value_idx > *out, cudaStream_t stream)
Perform data + data.T on raw KNN data. The following steps are invoked: (1) Find how much space needed in each row (2) Compute final space needed (n*k + sum(row_sizes)) == 2*n*k (3) Allocate new space (4) Prepare edges for each new row (5) Perform final data + data.T operation (6) Return summed up VAL, COL, ROW.
TODO: This isn’t generalized. Remove in place of
symmetrize()
- Parameters:
knn_indices – Input knn distances(n, k)
knn_dists – Input knn indices(n, k)
n – Number of rows
k – Number of n_neighbors
out – Output COO Matrix class
stream – Input cuda stream
-
template<typename value_idx, typename value_t>
void raft::sparse::linalg::symmetrize(raft::resources const &handle, const value_idx *rows, const value_idx *cols, const value_t *vals, size_t m, size_t n, size_t nnz, raft::sparse::COO<value_t, value_idx> &out)# Symmetrizes a COO matrix
-
template<typename value_idx, typename value_t>
void raft::sparse::linalg::csr_transpose(raft::resources const &handle, const value_idx *csr_indptr, const value_idx *csr_indices, const value_t *csr_data, value_idx *csc_indptr, value_idx *csc_indices, value_t *csc_data, value_idx csr_nrows, value_idx csr_ncols, value_idx nnz, cudaStream_t stream)# Transpose a set of CSR arrays into a set of CSC arrays.
- Template Parameters:
value_idx – : data type of the CSR index arrays
value_t – : data type of the CSR data array
- Parameters:
handle – [in] : used for invoking cusparse
csr_indptr – [in] : CSR row index array
csr_indices – [in] : CSR column indices array
csr_data – [in] : CSR data array
csc_indptr – [out] : CSC row index array
csc_indices – [out] : CSC column indices array
csc_data – [out] : CSC data array
csr_nrows – [in] : Number of rows in CSR
csr_ncols – [in] : Number of columns in CSR
nnz – [in] : Number of nonzeros of CSR
stream – [in] : Cuda stream for ordering events