rapidsai · aamijar · Jun 12, 2025 · Jun 12, 2025 · Jun 12, 2025 · Jun 12, 2025
@@ -1,5 +1,5 @@
 # =============================================================================
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+# Copyright (c) 2023-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 # in compliance with the License. You may obtain a copy of the License at
@@ -61,8 +61,8 @@ endfunction()
 # To use a different RAFT locally, set the CMake variable
 # CPM_raft_SOURCE=/path/to/local/raft
 find_and_configure_raft(VERSION  ${RAFT_VERSION}.00
-        FORK                     ${RAFT_FORK}
-        PINNED_TAG               ${RAFT_PINNED_TAG}
+        FORK                     aamijar
+        PINNED_TAG               compile-time-invocation
         ENABLE_MNMG_DEPENDENCIES OFF
         ENABLE_NVTX              OFF
         USE_RAFT_STATIC ${CUVS_USE_RAFT_STATIC}

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -199,15 +199,13 @@ void kmeansPlusPlus(raft::resources const& handle,
     // Outputs minDistanceBuf[n_trials x n_samples] where minDistance[i, :] contains updated
     // minClusterDistance that includes candidate-i
     auto minDistBuf = distBuffer.view();
-    raft::linalg::matrixVectorOp(minDistBuf.data_handle(),
-                                 pwd.data_handle(),
-                                 minClusterDistance.data_handle(),
-                                 pwd.extent(1),
-                                 pwd.extent(0),
-                                 true,
-                                 true,
-                                 raft::min_op{},
-                                 stream);
+    raft::linalg::matrixVectorOp<true, true>(minDistBuf.data_handle(),
+                                             pwd.data_handle(),
+                                             minClusterDistance.data_handle(),
+                                             pwd.extent(1),
+                                             pwd.extent(0),
+                                             raft::min_op{},
+                                             stream);
 
     // Calculate costPerCandidate[n_trials] where costPerCandidate[i] is the cluster cost when using
     // centroid candidate-i
@@ -325,15 +323,13 @@ void update_centroids(raft::resources const& handle,
   //   weight_per_cluster[n_clusters] - 1D array, weight_per_cluster[i] contains sum of weights in
   //   cluster-i.
   // Note - when weight_per_cluster[i] is 0, new_centroids[i] is reset to 0
-  raft::linalg::matrixVectorOp(new_centroids.data_handle(),
-                               new_centroids.data_handle(),
-                               weight_per_cluster.data_handle(),
-                               new_centroids.extent(1),
-                               new_centroids.extent(0),
-                               true,
-                               false,
-                               raft::div_checkzero_op{},
-                               raft::resource::get_cuda_stream(handle));
+  raft::linalg::matrixVectorOp<true, false>(new_centroids.data_handle(),
+                                            new_centroids.data_handle(),
+                                            weight_per_cluster.data_handle(),
+                                            new_centroids.extent(1),
+                                            new_centroids.extent(0),
+                                            raft::div_checkzero_op{},
+                                            raft::resource::get_cuda_stream(handle));
 
   // copy centroids[i] to new_centroids[i] when weight_per_cluster[i] is 0
   cub::ArgIndexInputIterator<DataT*> itr_wt(weight_per_cluster.data_handle());

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -315,8 +315,8 @@ void calc_centers_and_sizes(const raft::resources& handle,
   auto stream = raft::resource::get_cuda_stream(handle);
 
   if (!reset_counters) {
-    raft::linalg::matrixVectorOp(
-      centers, centers, cluster_sizes, dim, n_clusters, true, false, raft::mul_op(), stream);
+    raft::linalg::matrixVectorOp<true, false>(
+      centers, centers, cluster_sizes, dim, n_clusters, raft::mul_op(), stream);
   }
 
   rmm::device_uvector<char> workspace(0, stream, mr);
@@ -350,15 +350,8 @@ void calc_centers_and_sizes(const raft::resources& handle,
     raft::linalg::add(cluster_sizes, cluster_sizes, temp_sizes, n_clusters, stream);
   }
 
-  raft::linalg::matrixVectorOp(centers,
-                               centers,
-                               cluster_sizes,
-                               dim,
-                               n_clusters,
-                               true,
-                               false,
-                               raft::div_checkzero_op(),
-                               stream);
+  raft::linalg::matrixVectorOp<true, false>(
+    centers, centers, cluster_sizes, dim, n_clusters, raft::div_checkzero_op(), stream);
 }
 
 /** Computes the L2 norm of the dataset, converting to MathT if necessary */

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2024, NVIDIA CORPORATION.
+ * Copyright (c) 2024-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -654,14 +654,12 @@ void fit(const raft::resources& handle,
     //   samples in cluster-i.
     // Note - when wtInCluster[i] is 0, newCentroid[i] is reset to 0
 
-    raft::linalg::matrixVectorOp(
+    raft::linalg::matrixVectorOp<true, false>(
       newCentroids.data_handle(),
       newCentroids.data_handle(),
       wtInCluster.data_handle(),
       newCentroids.extent(1),
       newCentroids.extent(0),
-      true,
-      false,
       cuda::proclaim_return_type<DataT>([=] __device__(DataT mat, DataT vec) {
         if (vec == 0)
           return DataT(0);

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -101,12 +101,12 @@ void select_residuals(raft::resources const& handle,
     mapping_itr(dataset, utils::mapping<float>{});
   raft::matrix::gather(mapping_itr, (IdxT)dim, n_rows, row_ids, n_rows, tmp.data(), stream);
 
-  raft::matrix::linewise_op(handle,
-                            raft::make_device_matrix_view<const T, IdxT>(tmp.data(), n_rows, dim),
-                            raft::make_device_matrix_view<T, IdxT>(tmp.data(), n_rows, dim),
-                            true,
-                            raft::sub_op{},
-                            raft::make_device_vector_view<const T, IdxT>(center, dim));
+  raft::matrix::linewise_op<true>(
+    handle,
+    raft::make_device_matrix_view<const T, IdxT>(tmp.data(), n_rows, dim),
+    raft::make_device_matrix_view<T, IdxT>(tmp.data(), n_rows, dim),
+    raft::sub_op{},
+    raft::make_device_vector_view<const T, IdxT>(center, dim));
 
   float alpha = 1.0;
   float beta  = 0.0;

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2024, NVIDIA CORPORATION.
+ * Copyright (c) 2024-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -274,16 +274,14 @@ DataT silhouette_score(
   RAFT_CUDA_TRY(cudaMemsetAsync(
     averageDistanceBetweenSampleAndCluster.data(), 0, nRows * nLabels * sizeof(DataT), stream));
 
-  raft::linalg::matrixVectorOp(averageDistanceBetweenSampleAndCluster.data(),
-                               sampleToClusterSumOfDistances.data(),
-                               binCountArray.data(),
-                               binCountArray.data(),
-                               nLabels,
-                               nRows,
-                               true,
-                               true,
-                               DivOp<DataT>(),
-                               stream);
+  raft::linalg::matrixVectorOp<true, true>(averageDistanceBetweenSampleAndCluster.data(),
+                                           sampleToClusterSumOfDistances.data(),
+                                           binCountArray.data(),
+                                           binCountArray.data(),
+                                           nLabels,
+                                           nRows,
+                                           DivOp<DataT>(),
+                                           stream);
 
   // calculating row-wise minimum
   raft::linalg::reduce<true, true, DataT, DataT, int, raft::identity_op, raft::min_op>(

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2023-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -233,12 +233,11 @@ void InitDataset(const raft::resources& handle,
                                        raft::sq_op(),
                                        raft::add_op(),
                                        raft::sqrt_op());
-      raft::linalg::matrix_vector_op(
+      raft::linalg::matrix_vector_op<raft::Apply::ALONG_COLUMNS>(
         handle,
         raft::make_const_mdspan(dataset_view),
         raft::make_const_mdspan(dev_row_norm.view()),
         dataset_view,
-        raft::Apply::ALONG_COLUMNS,
         [normalized_norm] __device__(DataT elm, ComputeT norm) {
           const ComputeT v           = elm / norm * normalized_norm;
           const ComputeT max_v_range = std::numeric_limits<DataT>::max();