Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 9 additions & 2 deletions c/include/cuvs/cluster/kmeans.h
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ struct cuvsKMeansParams {
*/
int batch_centroids;

/** Check inertia during iterations for early convergence. */
/** Deprecated, ignored. Kept for ABI compatibility. */
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We probably shouldn't be modifying the wording here. And we probably want to use a different struct that breaks ABI, suffixed by the version (26.06).

bool inertia_check;

/**
Expand All @@ -108,7 +108,14 @@ struct cuvsKMeansParams {
* Number of samples to process per GPU batch for the batched (host-data) API.
* When set to 0, defaults to n_samples (process all at once).
*/
int64_t streaming_batch_size;
int64_t streaming_batch_size;

/**
* Number of samples to draw for KMeansPlusPlus initialization.
* When set to 0, uses heuristic min(3 * n_clusters, n_samples) for host data,
* or n_samples for device data.
*/
int64_t init_size;
};

typedef struct cuvsKMeansParams* cuvsKMeansParams_t;
Expand Down
7 changes: 4 additions & 3 deletions c/src/cluster/kmeans.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ cuvs::cluster::kmeans::params convert_params(const cuvsKMeansParams& params)
kmeans_params.oversampling_factor = params.oversampling_factor;
kmeans_params.batch_samples = params.batch_samples;
kmeans_params.batch_centroids = params.batch_centroids;
kmeans_params.inertia_check = params.inertia_check;
kmeans_params.init_size = params.init_size;
kmeans_params.streaming_batch_size = params.streaming_batch_size;
return kmeans_params;
}
Expand Down Expand Up @@ -237,10 +237,11 @@ extern "C" cuvsError_t cuvsKMeansParamsCreate(cuvsKMeansParams_t* params)
.oversampling_factor = cpp_params.oversampling_factor,
.batch_samples = cpp_params.batch_samples,
.batch_centroids = cpp_params.batch_centroids,
.inertia_check = cpp_params.inertia_check,
.inertia_check = false,
.hierarchical = false,
.hierarchical_n_iters = static_cast<int>(cpp_balanced_params.n_iters),
.streaming_batch_size = cpp_params.streaming_batch_size};
.streaming_batch_size = cpp_params.streaming_batch_size,
.init_size = cpp_params.init_size};
});
}

Expand Down
9 changes: 7 additions & 2 deletions cpp/include/cuvs/cluster/kmeans.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -113,9 +113,14 @@ struct params : base_params {
int batch_centroids = 0;

/**
* If true, check inertia during iterations for early convergence.
* Number of samples to randomly draw for the KMeansPlusPlus initialization
* step. A random subset of this size is used for centroid seeding.
* When set to 0 the default depends on the data location:
* - Device data: n_samples (use the full dataset).
* - Host data: min(3 * n_clusters, n_samples).
* Default: 0.
*/
bool inertia_check = false;
int64_t init_size = 0;

/**
* Number of samples to process per GPU batch when fitting with host data.
Expand Down
Loading
Loading