diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 693a8bdd..f770318f 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -12,7 +12,7 @@ jobs: strategy: matrix: scala: [ "2.12.17", "2.13.10" ] - spark: [ "3.3.1", "3.3.0", "3.2.3", "3.2.2", "3.2.1", "3.2.0", "3.1.3", "3.1.2", "3.1.1", "3.1.0", "3.0.3", "3.0.2", "3.0.1", "3.0.0" ] + spark: [ "3.3.2", "3.3.1", "3.3.0", "3.2.3", "3.2.2", "3.2.1", "3.2.0", "3.1.3", "3.1.2", "3.1.1", "3.1.0", "3.0.3", "3.0.2", "3.0.1", "3.0.0" ] exclude: - scala: "2.13.10" spark: "3.1.3" diff --git a/.github/workflows/publish_dev_version.yml b/.github/workflows/publish_dev_version.yml index 19f8d4f1..4e2ab716 100644 --- a/.github/workflows/publish_dev_version.yml +++ b/.github/workflows/publish_dev_version.yml @@ -10,7 +10,7 @@ jobs: strategy: matrix: scala: [ "2.12.17", "2.13.10" ] - spark: [ "3.3.1", "3.3.0", "3.2.3", "3.2.2", "3.2.1", "3.2.0", "3.1.3", "3.1.2", "3.1.1", "3.1.0", "3.0.3", "3.0.2", "3.0.1", "3.0.0" ] + spark: [ "3.3.2", "3.3.1", "3.3.0", "3.2.3", "3.2.2", "3.2.1", "3.2.0", "3.1.3", "3.1.2", "3.1.1", "3.1.0", "3.0.3", "3.0.2", "3.0.1", "3.0.0" ] exclude: - scala: "2.13.10" spark: "3.1.3" diff --git a/.github/workflows/publish_release_version.yml b/.github/workflows/publish_release_version.yml index 3283e665..ea1998ed 100644 --- a/.github/workflows/publish_release_version.yml +++ b/.github/workflows/publish_release_version.yml @@ -9,7 +9,7 @@ jobs: strategy: matrix: scala: [ "2.12.17", "2.13.10" ] - spark: [ "3.3.1", "3.3.0", "3.2.3", "3.2.2", "3.2.1", "3.2.0", "3.1.3", "3.1.2", "3.1.1", "3.1.0", "3.0.3", "3.0.2", "3.0.1", "3.0.0" ] + spark: [ "3.3.2", "3.3.1", "3.3.0", "3.2.3", "3.2.2", "3.2.1", "3.2.0", "3.1.3", "3.1.2", "3.1.1", "3.1.0", "3.0.3", "3.0.2", "3.0.1", "3.0.0" ] exclude: - scala: "2.13.10" spark: "3.1.3" diff --git a/README.md b/README.md index 5a1fd136..88cc6289 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ [![Kotlin Stable](https://kotl.in/badges/stable.svg)](https://kotlinlang.org/docs/components-stability.html) [![JetBrains official project](http://jb.gg/badges/official.svg)](https://confluence.jetbrains.com/display/ALL/JetBrains+on+GitHub) -[![Maven Central](https://img.shields.io/maven-central/v/org.jetbrains.kotlinx.spark/kotlin-spark-api_3.3.1_2.13.svg?label=Maven%20Central)](https://search.maven.org/search?q=g:"org.jetbrains.kotlinx.spark"%20AND%20a:"kotlin-spark-api_3.3.1_2.13") +[![Maven Central](https://img.shields.io/maven-central/v/org.jetbrains.kotlinx.spark/kotlin-spark-api_3.3.2_2.13.svg?label=Maven%20Central)](https://search.maven.org/search?q=g:"org.jetbrains.kotlinx.spark"%20AND%20a:"kotlin-spark-api_3.3.2_2.13") [![Join the chat at https://gitter.im/JetBrains/kotlin-spark-api](https://badges.gitter.im/JetBrains/kotlin-spark-api.svg)](https://gitter.im/JetBrains/kotlin-spark-api?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) Your next API to work with [Apache Spark](https://spark.apache.org/). @@ -38,6 +38,8 @@ We have opened a Spark Project Improvement Proposal: [Kotlin support for Apache | Apache Spark | Scala | Kotlin for Apache Spark | |:------------:|:-----:|:-----------------------------------:| +| 3.3.2 | 2.13 | kotlin-spark-api_3.3.2_2.13:VERSION | +| | 2.12 | kotlin-spark-api_3.3.2_2.12:VERSION | | 3.3.1 | 2.13 | kotlin-spark-api_3.3.1_2.13:VERSION | | | 2.12 | kotlin-spark-api_3.3.1_2.12:VERSION | | 3.3.0 | 2.13 | kotlin-spark-api_3.3.0_2.13:VERSION | @@ -73,7 +75,7 @@ The Kotlin for Spark artifacts adhere to the following convention: The only exception to this is `scala-tuples-in-kotlin_[Scala core version]:[Kotlin for Apache Spark API version]`, which is independent of Spark. -[![Maven Central](https://img.shields.io/maven-central/v/org.jetbrains.kotlinx.spark/kotlin-spark-api_3.3.1_2.13.svg?label=Maven%20Central)](https://search.maven.org/search?q=g:"org.jetbrains.kotlinx.spark"%20AND%20a:"kotlin-spark-api_3.3.1_2.13") +[![Maven Central](https://img.shields.io/maven-central/v/org.jetbrains.kotlinx.spark/kotlin-spark-api_3.3.2_2.13.svg?label=Maven%20Central)](https://search.maven.org/search?q=g:"org.jetbrains.kotlinx.spark"%20AND%20a:"kotlin-spark-api_3.3.2_2.13") ## How to configure Kotlin for Apache Spark in your project @@ -84,7 +86,7 @@ Here's an example `pom.xml`: ```xml org.jetbrains.kotlinx.spark - kotlin-spark-api_3.3.1_2.13 + kotlin-spark-api_3.3.2_2.13 ${kotlin-spark-api.version} @@ -117,7 +119,7 @@ To it, simply add to the top of your notebook. This will get the latest version of the API, together with the latest version of Spark. To define a certain version of Spark or the API itself, simply add it like this: ```jupyterpython -%use spark(spark=3.3.1, scala=2.13, v=1.2.2) +%use spark(spark=3.3.2, scala=2.13, v=1.2.3) ``` Inside the notebook a Spark session will be initiated automatically. This can be accessed via the `spark` value. diff --git a/buildSrc/src/main/kotlin/Versions.kt b/buildSrc/src/main/kotlin/Versions.kt index d5400394..7097a47a 100644 --- a/buildSrc/src/main/kotlin/Versions.kt +++ b/buildSrc/src/main/kotlin/Versions.kt @@ -1,25 +1,25 @@ object Versions { - const val project = "1.2.3" + const val project = "1.2.4" const val groupID = "org.jetbrains.kotlinx.spark" - const val kotlin = "1.8.0" + const val kotlin = "1.8.20" const val jvmTarget = "8" - const val jupyterJvmTarget = "11" + const val jupyterJvmTarget = "8" inline val spark get() = System.getProperty("spark") as String inline val scala get() = System.getProperty("scala") as String inline val sparkMinor get() = spark.substringBeforeLast('.') inline val scalaCompat get() = scala.substringBeforeLast('.') - const val jupyter = "0.11.0-210" + const val jupyter = "0.12.0-32-1" const val kotest = "5.5.4" const val kotestTestContainers = "1.3.3" - const val dokka = "1.7.10" + const val dokka = "1.8.20" const val jcp = "7.0.5" const val mavenPublish = "0.20.0" const val atrium = "0.17.0" const val licenseGradlePluginVersion = "0.15.0" const val kafkaStreamsTestUtils = "3.1.0" - const val hadoop = "3.3.1" + const val hadoop = "3.3.6" const val kotlinxHtml = "0.7.5" const val klaxon = "5.5" const val jacksonDatabind = "2.13.4.2" diff --git a/core/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala b/core/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala index f618cf04..864fc5f7 100644 --- a/core/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala +++ b/core/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala @@ -82,15 +82,15 @@ object CatalystTypeConverters { final def toCatalyst(@Nullable maybeScalaValue: Any): CatalystType = { if (maybeScalaValue == null) { null.asInstanceOf[CatalystType] - } else if (maybeScalaValue.isInstanceOf[Option[ScalaInputType]]) { - val opt = maybeScalaValue.asInstanceOf[Option[ScalaInputType]] - if (opt.isDefined) { - toCatalystImpl(opt.get) - } else { - null.asInstanceOf[CatalystType] - } - } else { - toCatalystImpl(maybeScalaValue.asInstanceOf[ScalaInputType]) + } else maybeScalaValue match { + case opt: Option[ScalaInputType] => + if (opt.isDefined) { + toCatalystImpl(opt.get) + } else { + null.asInstanceOf[CatalystType] + } + case _ => + toCatalystImpl(maybeScalaValue.asInstanceOf[ScalaInputType]) } } @@ -429,10 +429,11 @@ object CatalystTypeConverters { // a measurable performance impact. Note that this optimization will be unnecessary if we // use code generation to construct Scala Row -> Catalyst Row converters. def convert(maybeScalaValue: Any): Any = { - if (maybeScalaValue.isInstanceOf[Option[Any]]) { - maybeScalaValue.asInstanceOf[Option[Any]].orNull - } else { - maybeScalaValue + maybeScalaValue match { + case option: Option[Any] => + option.orNull + case _ => + maybeScalaValue } } diff --git a/docs/quick-start-guide.md b/docs/quick-start-guide.md index 25a55be7..66d8e655 100644 --- a/docs/quick-start-guide.md +++ b/docs/quick-start-guide.md @@ -72,13 +72,13 @@ Here's what the `pom.xml` looks like for this example: org.jetbrains.kotlinx.spark - kotlin-spark-api_3.3.1_2.13 + kotlin-spark-api_3.3.2_2.13 1.2.3 org.apache.spark spark-sql_2.12 - 3.3.1 + 3.3.2 @@ -163,8 +163,8 @@ dependencies { // Kotlin stdlib implementation 'org.jetbrains.kotlin:kotlin-stdlib:1.8.0' // Kotlin Spark API - implementation 'org.jetbrains.kotlinx.spark:kotlin-spark-api_3.3.1_2.13:1.2.3' // Apache Spark - compileOnly 'org.apache.spark:spark-sql_2.12:3.3.1' + implementation 'org.jetbrains.kotlinx.spark:kotlin-spark-api_3.3.2_2.13:1.2.3' // Apache Spark + compileOnly 'org.apache.spark:spark-sql_2.12:3.3.2' } compileKotlin { @@ -198,9 +198,9 @@ dependencies { // Kotlin stdlib implementation ("org.jetbrains.kotlin:kotlin-stdlib:1.4.0") // Kotlin Spark API - implementation ("org.jetbrains.kotlinx.spark:kotlin-spark-api_3.3.1_2.13:1.2.3") + implementation ("org.jetbrains.kotlinx.spark:kotlin-spark-api_3.3.2_2.13:1.2.3") // Apache Spark - compileOnly ("org.apache.spark:spark-sql_2.12:3.3.1") + compileOnly ("org.apache.spark:spark-sql_2.12:3.3.2") } compileKotlin.kotlinOptions.jvmTarget = "1.8" diff --git a/gradle.properties b/gradle.properties index 416269d3..bcfebf83 100644 --- a/gradle.properties +++ b/gradle.properties @@ -7,11 +7,11 @@ GROUP=org.jetbrains.kotlinx.spark # Controls the spark and scala version for the entire project # can also be defined like ./gradlew -Pspark=X.X.X -Pscala=X.X.X build -spark=3.3.1 +spark=3.3.2 scala=2.13.10 # scala=2.12.17 skipScalaTuplesInKotlin=false org.gradle.caching=true org.gradle.parallel=false -#kotlin.incremental.useClasspathSnapshot=true \ No newline at end of file +#kotlin.incremental.useClasspathSnapshot=true diff --git a/jupyter/src/main/kotlin/org/jetbrains/kotlinx/spark/api/jupyter/Integration.kt b/jupyter/src/main/kotlin/org/jetbrains/kotlinx/spark/api/jupyter/Integration.kt index b35109c8..448751ae 100644 --- a/jupyter/src/main/kotlin/org/jetbrains/kotlinx/spark/api/jupyter/Integration.kt +++ b/jupyter/src/main/kotlin/org/jetbrains/kotlinx/spark/api/jupyter/Integration.kt @@ -84,6 +84,7 @@ abstract class Integration(private val notebook: Notebook, private val options: "org.jetbrains.kotlin:kotlin-stdlib-jdk8:$kotlinVersion", "org.jetbrains.kotlin:kotlin-reflect:$kotlinVersion", "org.apache.spark:spark-sql_$scalaCompatVersion:$sparkVersion", + "org.apache.spark:spark-yarn_$scalaCompatVersion:$sparkVersion", "org.apache.spark:spark-streaming_$scalaCompatVersion:$sparkVersion", "org.apache.spark:spark-mllib_$scalaCompatVersion:$sparkVersion", "org.apache.spark:spark-sql_$scalaCompatVersion:$sparkVersion", diff --git a/jupyter/src/test/kotlin/org/jetbrains/kotlinx/spark/api/jupyter/JupyterTests.kt b/jupyter/src/test/kotlin/org/jetbrains/kotlinx/spark/api/jupyter/JupyterTests.kt index c08def05..b82512b7 100644 --- a/jupyter/src/test/kotlin/org/jetbrains/kotlinx/spark/api/jupyter/JupyterTests.kt +++ b/jupyter/src/test/kotlin/org/jetbrains/kotlinx/spark/api/jupyter/JupyterTests.kt @@ -59,6 +59,7 @@ class JupyterTests : ShouldSpec({ librariesScanner.addLibrariesFromClassLoader( classLoader = currentClassLoader, host = this, + notebook = notebook, integrationTypeNameRules = listOf( PatternNameAcceptanceRule( acceptsFlag = false, @@ -341,6 +342,7 @@ class JupyterStreamingTests : ShouldSpec({ librariesScanner.addLibrariesFromClassLoader( classLoader = currentClassLoader, host = this, + notebook = notebook, integrationTypeNameRules = listOf( PatternNameAcceptanceRule( acceptsFlag = false,