Skip to content

Commit

Permalink
simply the logic of ensureIdCol
Browse files Browse the repository at this point in the history
  • Loading branch information
lijinf2 committed May 8, 2024
1 parent 38d731f commit a61fcb4
Showing 1 changed file with 11 additions and 11 deletions.
22 changes: 11 additions & 11 deletions python/src/spark_rapids_ml/knn.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,19 +142,19 @@ def _ensureIdCol(self, df: DataFrame) -> DataFrame:
Ensure an id column exists in the input dataframe. Add the column if not exists.
Overwritten for knn assumption on error for not setting idCol and duplicate exists.
"""
if not self.isSet("idCol") and self.getIdCol() in df.columns:
raise ValueError(
f"Cannot create a default id column since a column with the default name '{self.getIdCol()}' already exists."
+ "Please specify an id column"
)

id_col_name = self.getIdCol()
df_withid = (
df
if id_col_name in df.columns
else df.select(monotonically_increasing_id().alias(id_col_name), "*")
)
return df_withid
if id_col_name in df.columns:
if self.isSet("idCol"):
return df
else:
raise ValueError(
f"Column '{id_col_name}' already exists."
+ f"Please use a different column name or set the column as the idCol."
)
else:
df_withid = df.select(monotonically_increasing_id().alias(id_col_name), "*")
return df_withid


class NearestNeighbors(
Expand Down

0 comments on commit a61fcb4

Please sign in to comment.