From 5418a5c8583aa094281b00bcf0ec9b6a3be59b80 Mon Sep 17 00:00:00 2001
From: Erik Ordentlich <eordentlich@gmail.com>
Date: Thu, 6 Jul 2023 12:48:27 -0700
Subject: [PATCH] more 23.06 dependency updates

Signed-off-by: Erik Ordentlich <eordentlich@gmail.com>
---
 jvm/README.md                                     | 6 +++---
 jvm/pom.xml                                       | 4 ++--
 notebooks/databricks/README.md                    | 2 +-
 notebooks/databricks/init-pip-cuda-11.8.sh        | 6 +++---
 python/benchmark/databricks/gpu_cluster_spec.sh   | 2 +-
 python/benchmark/databricks/init-pip-cuda-11.8.sh | 4 ++--
 python/benchmark/dataproc/init_benchmark.sh       | 5 +++--
 7 files changed, 15 insertions(+), 14 deletions(-)
diff --git a/jvm/README.md b/jvm/README.md
index a1dd6561..54726456 100644
--- a/jvm/README.md
+++ b/jvm/README.md
@@ -74,7 +74,7 @@ the _project root path_ with:
 cd jvm
 mvn clean package
 ```
-Then `rapids-4-spark-ml_2.12-23.04.0-SNAPSHOT.jar` will be generated under `target` folder.
+Then `rapids-4-spark-ml_2.12-23.06.0-SNAPSHOT.jar` will be generated under `target` folder.
 
 Users can also use the _release_ version spark-rapids plugin as the dependency if it's already been
 released in public maven repositories, see [rapids-4-spark maven repository](https://mvnrepository.com/artifact/com.nvidia/rapids-4-spark)
@@ -94,8 +94,8 @@ repository, usually in your `~/.m2/repository`.
 
 Add the artifact jar to the Spark, for example:
 ```bash
-ML_JAR="target/rapids-4-spark-ml_2.12-23.04.0-SNAPSHOT.jar"
-PLUGIN_JAR="~/.m2/repository/com/nvidia/rapids-4-spark_2.12/23.04.0-SNAPSHOT/rapids-4-spark_2.12-23.04.0-SNAPSHOT.jar"
+ML_JAR="target/rapids-4-spark-ml_2.12-23.06.0-SNAPSHOT.jar"
+PLUGIN_JAR="~/.m2/repository/com/nvidia/rapids-4-spark_2.12/23.06.0-SNAPSHOT/rapids-4-spark_2.12-23.06.0-SNAPSHOT.jar"
 
 $SPARK_HOME/bin/spark-shell --master $SPARK_MASTER \
  --driver-memory 20G \
diff --git a/jvm/pom.xml b/jvm/pom.xml
index fde9dbcd..eff9ee8c 100644
--- a/jvm/pom.xml
+++ b/jvm/pom.xml
@@ -20,7 +20,7 @@
     <modelVersion>4.0.0</modelVersion>
     <groupId>com.nvidia</groupId>
     <artifactId>rapids-4-spark-ml_2.12</artifactId>
-    <version>23.04.0-SNAPSHOT</version>
+    <version>23.06.0-SNAPSHOT</version>
     <name>RAPIDS Accelerator for Apache Spark ML</name>
     <description>The RAPIDS cuML library for Apache Spark</description>
     <inceptionYear>2021</inceptionYear>
@@ -93,7 +93,7 @@
         <dependency>
             <groupId>com.nvidia</groupId>
             <artifactId>rapids-4-spark_2.12</artifactId>
-            <version>23.04.0</version>
+            <version>23.06.0</version>
         </dependency>
 
 
diff --git a/notebooks/databricks/README.md b/notebooks/databricks/README.md
index 01c98990..d1b24ea8 100644
--- a/notebooks/databricks/README.md
+++ b/notebooks/databricks/README.md
@@ -41,7 +41,7 @@ If you already have a Databricks account, you can run the example notebooks on a
       spark.task.resource.gpu.amount 1
       spark.databricks.delta.preview.enabled true
       spark.python.worker.reuse true
-      spark.executorEnv.PYTHONPATH /databricks/jars/rapids-4-spark_2.12-23.04.0.jar:/databricks/spark/python
+      spark.executorEnv.PYTHONPATH /databricks/jars/rapids-4-spark_2.12-23.06.0.jar:/databricks/spark/python
       spark.sql.execution.arrow.maxRecordsPerBatch 100000
       spark.rapids.memory.gpu.minAllocFraction 0.0001
       spark.plugins com.nvidia.spark.SQLPlugin
diff --git a/notebooks/databricks/init-pip-cuda-11.8.sh b/notebooks/databricks/init-pip-cuda-11.8.sh
index 879785c7..63d27268 100644
--- a/notebooks/databricks/init-pip-cuda-11.8.sh
+++ b/notebooks/databricks/init-pip-cuda-11.8.sh
@@ -5,9 +5,9 @@ SPARK_RAPIDS_ML_ZIP=/dbfs/path/to/zip/file
 # also RAPIDS_VERSION (python) fields should omit any leading 0 in month/minor field (i.e. 23.6.0 and not 23.06.0)
 # while SPARK_RAPIDS_VERSION (jar) should have leading 0 in month/minor (e.g. 23.06.0 and not 23.6.0)
 RAPIDS_VERSION=23.6.0
-SPARK_RAPIDS_VERSION=23.04.0
+SPARK_RAPIDS_VERSION=23.06.0
 
-curl -L https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/${SPARK_RAPIDS_VERSION}/rapids-4-spark_2.12-${SPARK_RAPIDS_VERSION}.jar -o /databricks/jars/rapids-4-spark_2.12-${SPARK_RAPIDS_VERSION}.jar
+curl -L https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/${SPARK_RAPIDS_VERSION}/rapids-4-spark_2.12-${SPARK_RAPIDS_VERSION}-cuda11.jar -o /databricks/jars/rapids-4-spark_2.12-${SPARK_RAPIDS_VERSION}.jar
 
 # install cudatoolkit 11.8 via runfile approach
 wget https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_520.61.05_linux.run
@@ -34,7 +34,7 @@ ldconfig
 /databricks/python/bin/pip install --upgrade pip
 
 # install cudf, cuml and their rapids dependencies
-# using ~= pulls in lates micro version patches
+# using ~= pulls in latest micro version patches
 /databricks/python/bin/pip install cudf-cu11~=${RAPIDS_VERSION} \
     cuml-cu11~=${RAPIDS_VERSION} \
     pylibraft-cu11~=${RAPIDS_VERSION} \
diff --git a/python/benchmark/databricks/gpu_cluster_spec.sh b/python/benchmark/databricks/gpu_cluster_spec.sh
index becde51f..b302b944 100644
--- a/python/benchmark/databricks/gpu_cluster_spec.sh
+++ b/python/benchmark/databricks/gpu_cluster_spec.sh
@@ -9,7 +9,7 @@ cat <<EOF
         "spark.task.cpus": "1",
         "spark.databricks.delta.preview.enabled": "true",
         "spark.python.worker.reuse": "true",
-        "spark.executorEnv.PYTHONPATH": "/databricks/jars/rapids-4-spark_2.12-23.04.0.jar:/databricks/spark/python",
+        "spark.executorEnv.PYTHONPATH": "/databricks/jars/rapids-4-spark_2.12-23.06.0.jar:/databricks/spark/python",
         "spark.sql.files.minPartitionNum": "2",
         "spark.sql.execution.arrow.maxRecordsPerBatch": "10000",
         "spark.executor.cores": "8",
diff --git a/python/benchmark/databricks/init-pip-cuda-11.8.sh b/python/benchmark/databricks/init-pip-cuda-11.8.sh
index 8e1f1aab..0f44eed2 100644
--- a/python/benchmark/databricks/init-pip-cuda-11.8.sh
+++ b/python/benchmark/databricks/init-pip-cuda-11.8.sh
@@ -6,9 +6,9 @@ BENCHMARK_ZIP=/dbfs/path/to/benchmark.zip
 # also RAPIDS_VERSION (python) fields should omit any leading 0 in month/minor field (i.e. 23.6.0 and not 23.06.0)
 # while SPARK_RAPIDS_VERSION (jar) should have leading 0 in month/minor (e.g. 23.06.0 and not 23.6.0)
 RAPIDS_VERSION=23.6.0
-SPARK_RAPIDS_VERSION=23.04.0
+SPARK_RAPIDS_VERSION=23.06.0
 
-curl -L https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/${SPARK_RAPIDS_VERSION}/rapids-4-spark_2.12-${SPARK_RAPIDS_VERSION}.jar -o /databricks/jars/rapids-4-spark_2.12-${SPARK_RAPIDS_VERSION}.jar
+curl -L https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/${SPARK_RAPIDS_VERSION}/rapids-4-spark_2.12-${SPARK_RAPIDS_VERSION}-cuda11.jar -o /databricks/jars/rapids-4-spark_2.12-${SPARK_RAPIDS_VERSION}.jar
 
 # install cudatoolkit 11.8 via runfile approach
 wget https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_520.61.05_linux.run
diff --git a/python/benchmark/dataproc/init_benchmark.sh b/python/benchmark/dataproc/init_benchmark.sh
index 2a8b10a0..08da9c22 100755
--- a/python/benchmark/dataproc/init_benchmark.sh
+++ b/python/benchmark/dataproc/init_benchmark.sh
@@ -32,5 +32,6 @@ gsutil cp gs://${BENCHMARK_HOME}/benchmark_runner.py .
 gsutil cp gs://${BENCHMARK_HOME}/spark_rapids_ml.zip .
 gsutil cp gs://${BENCHMARK_HOME}/benchmark.zip .
 
-unzip spark_rapids_ml.zip -d /opt/conda/miniconda3/lib/python3.8/site-packages
-unzip benchmark.zip -d /opt/conda/miniconda3/lib/python3.8/site-packages
+python_ver=`python --version | grep -oP '3\.[0-9]+'`
+unzip spark_rapids_ml.zip -d /opt/conda/miniconda3/lib/python${python_ver}/site-packages
+unzip benchmark.zip -d /opt/conda/miniconda3/lib/python${python_ver}/site-packages