From 5418a5c8583aa094281b00bcf0ec9b6a3be59b80 Mon Sep 17 00:00:00 2001 From: Erik Ordentlich Date: Thu, 6 Jul 2023 12:48:27 -0700 Subject: [PATCH] more 23.06 dependency updates Signed-off-by: Erik Ordentlich --- jvm/README.md | 6 +++--- jvm/pom.xml | 4 ++-- notebooks/databricks/README.md | 2 +- notebooks/databricks/init-pip-cuda-11.8.sh | 6 +++--- python/benchmark/databricks/gpu_cluster_spec.sh | 2 +- python/benchmark/databricks/init-pip-cuda-11.8.sh | 4 ++-- python/benchmark/dataproc/init_benchmark.sh | 5 +++-- 7 files changed, 15 insertions(+), 14 deletions(-) diff --git a/jvm/README.md b/jvm/README.md index a1dd6561..54726456 100644 --- a/jvm/README.md +++ b/jvm/README.md @@ -74,7 +74,7 @@ the _project root path_ with: cd jvm mvn clean package ``` -Then `rapids-4-spark-ml_2.12-23.04.0-SNAPSHOT.jar` will be generated under `target` folder. +Then `rapids-4-spark-ml_2.12-23.06.0-SNAPSHOT.jar` will be generated under `target` folder. Users can also use the _release_ version spark-rapids plugin as the dependency if it's already been released in public maven repositories, see [rapids-4-spark maven repository](https://mvnrepository.com/artifact/com.nvidia/rapids-4-spark) @@ -94,8 +94,8 @@ repository, usually in your `~/.m2/repository`. Add the artifact jar to the Spark, for example: ```bash -ML_JAR="target/rapids-4-spark-ml_2.12-23.04.0-SNAPSHOT.jar" -PLUGIN_JAR="~/.m2/repository/com/nvidia/rapids-4-spark_2.12/23.04.0-SNAPSHOT/rapids-4-spark_2.12-23.04.0-SNAPSHOT.jar" +ML_JAR="target/rapids-4-spark-ml_2.12-23.06.0-SNAPSHOT.jar" +PLUGIN_JAR="~/.m2/repository/com/nvidia/rapids-4-spark_2.12/23.06.0-SNAPSHOT/rapids-4-spark_2.12-23.06.0-SNAPSHOT.jar" $SPARK_HOME/bin/spark-shell --master $SPARK_MASTER \ --driver-memory 20G \ diff --git a/jvm/pom.xml b/jvm/pom.xml index fde9dbcd..eff9ee8c 100644 --- a/jvm/pom.xml +++ b/jvm/pom.xml @@ -20,7 +20,7 @@ 4.0.0 com.nvidia rapids-4-spark-ml_2.12 - 23.04.0-SNAPSHOT + 23.06.0-SNAPSHOT RAPIDS Accelerator for Apache Spark ML The RAPIDS cuML library for Apache Spark 2021 @@ -93,7 +93,7 @@ com.nvidia rapids-4-spark_2.12 - 23.04.0 + 23.06.0 diff --git a/notebooks/databricks/README.md b/notebooks/databricks/README.md index 01c98990..d1b24ea8 100644 --- a/notebooks/databricks/README.md +++ b/notebooks/databricks/README.md @@ -41,7 +41,7 @@ If you already have a Databricks account, you can run the example notebooks on a spark.task.resource.gpu.amount 1 spark.databricks.delta.preview.enabled true spark.python.worker.reuse true - spark.executorEnv.PYTHONPATH /databricks/jars/rapids-4-spark_2.12-23.04.0.jar:/databricks/spark/python + spark.executorEnv.PYTHONPATH /databricks/jars/rapids-4-spark_2.12-23.06.0.jar:/databricks/spark/python spark.sql.execution.arrow.maxRecordsPerBatch 100000 spark.rapids.memory.gpu.minAllocFraction 0.0001 spark.plugins com.nvidia.spark.SQLPlugin diff --git a/notebooks/databricks/init-pip-cuda-11.8.sh b/notebooks/databricks/init-pip-cuda-11.8.sh index 879785c7..63d27268 100644 --- a/notebooks/databricks/init-pip-cuda-11.8.sh +++ b/notebooks/databricks/init-pip-cuda-11.8.sh @@ -5,9 +5,9 @@ SPARK_RAPIDS_ML_ZIP=/dbfs/path/to/zip/file # also RAPIDS_VERSION (python) fields should omit any leading 0 in month/minor field (i.e. 23.6.0 and not 23.06.0) # while SPARK_RAPIDS_VERSION (jar) should have leading 0 in month/minor (e.g. 23.06.0 and not 23.6.0) RAPIDS_VERSION=23.6.0 -SPARK_RAPIDS_VERSION=23.04.0 +SPARK_RAPIDS_VERSION=23.06.0 -curl -L https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/${SPARK_RAPIDS_VERSION}/rapids-4-spark_2.12-${SPARK_RAPIDS_VERSION}.jar -o /databricks/jars/rapids-4-spark_2.12-${SPARK_RAPIDS_VERSION}.jar +curl -L https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/${SPARK_RAPIDS_VERSION}/rapids-4-spark_2.12-${SPARK_RAPIDS_VERSION}-cuda11.jar -o /databricks/jars/rapids-4-spark_2.12-${SPARK_RAPIDS_VERSION}.jar # install cudatoolkit 11.8 via runfile approach wget https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_520.61.05_linux.run @@ -34,7 +34,7 @@ ldconfig /databricks/python/bin/pip install --upgrade pip # install cudf, cuml and their rapids dependencies -# using ~= pulls in lates micro version patches +# using ~= pulls in latest micro version patches /databricks/python/bin/pip install cudf-cu11~=${RAPIDS_VERSION} \ cuml-cu11~=${RAPIDS_VERSION} \ pylibraft-cu11~=${RAPIDS_VERSION} \ diff --git a/python/benchmark/databricks/gpu_cluster_spec.sh b/python/benchmark/databricks/gpu_cluster_spec.sh index becde51f..b302b944 100644 --- a/python/benchmark/databricks/gpu_cluster_spec.sh +++ b/python/benchmark/databricks/gpu_cluster_spec.sh @@ -9,7 +9,7 @@ cat <