From bd106d36a99f67191a562196604480accb2b9f0f Mon Sep 17 00:00:00 2001 From: Jinfeng Li Date: Thu, 5 Sep 2024 18:46:42 -0700 Subject: [PATCH] quick fix for release (#734) Signed-off-by: Jinfeng --- python/src/spark_rapids_ml/classification.py | 2 ++ python/src/spark_rapids_ml/knn.py | 12 ++++++++---- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/python/src/spark_rapids_ml/classification.py b/python/src/spark_rapids_ml/classification.py index 8f812cee..eb01ac8c 100644 --- a/python/src/spark_rapids_ml/classification.py +++ b/python/src/spark_rapids_ml/classification.py @@ -831,6 +831,8 @@ class LogisticRegression( And it will automatically map pyspark parameters to cuML parameters. + In the case of applying LogisticRegression on sparse vectors, Spark 3.4 or above is required. + Parameters ---------- featuresCol: str or List[str] diff --git a/python/src/spark_rapids_ml/knn.py b/python/src/spark_rapids_ml/knn.py index a20845c2..43369465 100644 --- a/python/src/spark_rapids_ml/knn.py +++ b/python/src/spark_rapids_ml/knn.py @@ -851,7 +851,11 @@ def setAlgorithm(self: P, value: str) -> P: """ Sets the value of `algorithm`. """ - assert value == "ivfflat", "Only IVFFLAT algorithm is currently supported" + assert value in { + "ivfflat", + "ivfpq", + "cagra", + }, "Only ivfflat, ivfpq, and cagra are currently supported" self._set_params(algorithm=value) return self @@ -919,7 +923,7 @@ class ApproximateNearestNeighbors( the default number of approximate nearest neighbors to retrieve for each query. algorithm: str (default = 'ivfflat') - the algorithm parameter to be passed into cuML. It currently must be 'ivfflat' or 'ivfpq'. Other algorithms are expected to be supported later. + the algorithm parameter to be passed into cuML. It currently must be 'ivfflat', 'ivfpq' or 'cagra'. Other algorithms are expected to be supported later. algoParams: Optional[Dict[str, Any]] (default = None) if set, algoParam is used to configure the algorithm, on each data partition (or maxRecordsPerBatch if Arrow is enabled) of the item_df. @@ -1455,7 +1459,7 @@ def _transform_internal( start_time = time.time() - if nn_object is not "cagra": + if nn_object != "cagra": nn_object.fit(item) else: from cuvs.neighbors import cagra @@ -1473,7 +1477,7 @@ def _transform_internal( start_time = time.time() - if nn_object is not "cagra": + if nn_object != "cagra": distances, indices = nn_object.kneighbors(bcast_qfeatures.value) else: gpu_qfeatures = cp.array(