Skip to content

Commit

Permalink
disable non-sedona broadcast joins for all dbscan tests
Browse files Browse the repository at this point in the history
  • Loading branch information
jameswillis committed Sep 20, 2024
1 parent 5f176ea commit 8aeaee1
Showing 1 changed file with 14 additions and 0 deletions.
14 changes: 14 additions & 0 deletions python/tests/stats/test_dbscan.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,8 @@ def test_dbscan_valid_parameters(self):
self.spark.conf.set(
"sedona.join.autoBroadcastJoinThreshold", -1
)
self.spark.conf.set("spark.sql.autoBroadcastJoinThreshold", -1)

df = self.create_sample_dataframe()
for epsilon in [0.6, 0.7, 0.8]:
for min_pts in [3, 4, 5]:
Expand All @@ -105,6 +107,8 @@ def test_dbscan_valid_parameters_default_column_name(self):
self.spark.conf.set(
"sedona.join.autoBroadcastJoinThreshold", -1
)
self.spark.conf.set("spark.sql.autoBroadcastJoinThreshold", -1)

df = self.create_sample_dataframe().select(
"id", f.col("arealandmark").alias("geometryFieldName")
)
Expand All @@ -120,6 +124,8 @@ def test_dbscan_valid_parameters_polygons(self):
self.spark.conf.set(
"sedona.join.autoBroadcastJoinThreshold", -1
)
self.spark.conf.set("spark.sql.autoBroadcastJoinThreshold", -1)

df = self.create_sample_dataframe().select(
"id", ST_Buffer(f.col("arealandmark"), 0.000001).alias("geometryFieldName")
)
Expand All @@ -135,6 +141,8 @@ def test_dbscan_supports_other_distance_function(self):
self.spark.conf.set(
"sedona.join.autoBroadcastJoinThreshold", -1
)
self.spark.conf.set("spark.sql.autoBroadcastJoinThreshold", -1)

df = self.create_sample_dataframe().select(
"id", ST_Buffer(f.col("arealandmark"), 0.000001).alias("geometryFieldName")
)
Expand Down Expand Up @@ -181,6 +189,8 @@ def test_return_empty_df_when_no_clusters(self):
self.spark.conf.set(
"sedona.join.autoBroadcastJoinThreshold", -1
)
self.spark.conf.set("spark.sql.autoBroadcastJoinThreshold", -1)

df = self.create_sample_dataframe()
epsilon = 0.1
min_pts = 10000
Expand All @@ -197,6 +207,8 @@ def test_dbscan_doesnt_duplicate_border_points_in_two_clusters(self):
self.spark.conf.set(
"sedona.join.autoBroadcastJoinThreshold", -1
)
self.spark.conf.set("spark.sql.autoBroadcastJoinThreshold", -1)

input_df = self.spark.createDataFrame(
[
{"id": 10, "x": 1.0, "y": 1.8},
Expand All @@ -222,6 +234,8 @@ def test_return_outliers_false_doesnt_return_outliers(self):
self.spark.conf.set(
"sedona.join.autoBroadcastJoinThreshold", -1
)
self.spark.conf.set("spark.sql.autoBroadcastJoinThreshold", -1)

df = self.create_sample_dataframe()
for epsilon in [0.6, 0.7, 0.8]:
for min_pts in [3, 4, 5]:
Expand Down

0 comments on commit 8aeaee1

Please sign in to comment.