Skip to content

Commit

Permalink
added spark connect example which does not yet work with kotlin-spark…
Browse files Browse the repository at this point in the history
…-api
  • Loading branch information
Jolanrensen committed Jun 15, 2024
1 parent eae0196 commit 22fa5ae
Show file tree
Hide file tree
Showing 9 changed files with 98 additions and 7 deletions.
2 changes: 2 additions & 0 deletions buildSrc/src/main/kotlin/Dependencies.kt
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ object Dependencies : Dsl<Dependencies> {
inline val scalaLibrary get() = "org.scala-lang:scala-library:${Versions.scala}"
inline val kotlinxHtml get() = "org.jetbrains.kotlinx:kotlinx-html-jvm:${Versions.kotlinxHtml}"
inline val sparkSql get() = "org.apache.spark:spark-sql_${Versions.scalaCompat}:${Versions.spark}"
inline val sparkSqlApi get() = "org.apache.spark:spark-sql-api_${Versions.scalaCompat}:${Versions.spark}"
inline val sparkConnectClient get() = "org.apache.spark:spark-connect-client-jvm_${Versions.scalaCompat}:${Versions.spark}"
inline val sparkMl get() = "org.apache.spark:spark-mllib_${Versions.scalaCompat}:${Versions.spark}"
inline val sparkStreaming get() = "org.apache.spark:spark-streaming_${Versions.scalaCompat}:${Versions.spark}"
inline val hadoopClient get() = "org.apache.hadoop:hadoop-client:${Versions.hadoop}"
Expand Down
Binary file modified gradle/bootstraps/compiler-plugin.jar
Binary file not shown.
Binary file modified gradle/bootstraps/gradle-plugin.jar
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -162,10 +162,11 @@ abstract class Integration(private val notebook: Notebook, private val options:
}

beforeCellExecution {
if (scalaCompatVersion.toDouble() >= 2.13)
if (scalaCompatVersion.toDouble() >= 2.13) {
execute("scala.`Console\$`.`MODULE\$`.setOutDirect(System.out)")
else
} else {
execute("""scala.Console.setOut(System.out)""")
}

beforeCellExecution()
}
Expand Down
2 changes: 1 addition & 1 deletion kotlin-spark-api/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ dependencies {
// https://github.com/FasterXML/jackson-bom/issues/52
if (Versions.spark == "3.3.1") implementation(jacksonDatabind)

if (Versions.sparkConnect) TODO("unsupported for now")
// if (Versions.sparkConnect) TODO("unsupported for now")

implementation(
kotlinStdLib,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,14 +1,12 @@
package org.jetbrains.kotlinx.spark.api

import io.kotest.core.spec.style.ShouldSpec
import io.kotest.core.spec.style.Test
import io.kotest.core.test.TestScope
import io.kotest.matchers.collections.shouldContainAll
import io.kotest.matchers.shouldBe
import org.apache.spark.api.java.JavaRDD
import org.jetbrains.kotlinx.spark.api.tuples.*
import org.jetbrains.kotlinx.spark.api.tuples.X
import org.jetbrains.kotlinx.spark.api.tuples.t
import scala.Tuple2
import java.io.Serializable

class RddTest : ShouldSpec({
context("RDD extension functions") {
Expand Down
3 changes: 3 additions & 0 deletions settings.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,11 @@ gradleEnterprise {
val spark: String by settings
val scala: String by settings
val skipScalaOnlyDependent: String by settings
val sparkConnect: String by settings
System.setProperty("spark", spark)
System.setProperty("scala", scala)
System.setProperty("skipScalaOnlyDependent", skipScalaOnlyDependent)
System.setProperty("sparkConnect", sparkConnect)

val scalaCompat
get() = scala.substringBeforeLast('.')
Expand All @@ -37,6 +39,7 @@ include("scala-tuples-in-kotlin")
include("kotlin-spark-api")
include("jupyter")
include("examples")
include("spark-connect-examples")
include("compiler-plugin")
include("gradle-plugin")

Expand Down
60 changes: 60 additions & 0 deletions spark-connect-examples/build.gradle.kts
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
import org.jetbrains.kotlin.gradle.dsl.JvmTarget

plugins {
// Needs to be installed in the local maven repository or have the bootstrap jar on the classpath
id("org.jetbrains.kotlinx.spark.api")
kotlin("jvm")
application
}

// run with `./gradlew run`
application {
mainClass = "org.jetbrains.kotlinx.spark.examples.MainKt"

// workaround for java 17
applicationDefaultJvmArgs = listOf("--add-opens", "java.base/java.nio=ALL-UNNAMED")
}

kotlinSparkApi {
enabled = true
sparkifyAnnotationFqNames = listOf("org.jetbrains.kotlinx.spark.api.plugin.annotations.Sparkify")
}

group = Versions.groupID
version = Versions.project

repositories {
mavenLocal()
mavenCentral()
}

dependencies {
Projects {
implementation(
// TODO kotlinSparkApi,
)
}

Dependencies {

// IMPORTANT!
compileOnly(sparkSqlApi)
implementation(sparkConnectClient)
}
}

// spark-connect seems to work well with java 17 as client and java 1.8 as server
// also set gradle and your project sdk to java 17
kotlin {
jvmToolchain {
languageVersion = JavaLanguageVersion.of(17)
}
compilerOptions {
jvmTarget = JvmTarget.JVM_17
}
}

tasks.withType<JavaCompile> {
sourceCompatibility = JavaVersion.VERSION_17.toString()
targetCompatibility = JavaVersion.VERSION_17.toString()
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
package org.jetbrains.kotlinx.spark.examples

import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.connect.client.REPLClassDirMonitor

// run with `./gradlew run` or set VM options: "--add-opens=java.base/java.nio=ALL-UNNAMED" in the IDE
fun main() {
val spark =
SparkSession
.builder()
.remote("sc://localhost")
.create()

val classFinder = REPLClassDirMonitor("/mnt/data/Projects/kotlin-spark-api/spark-connect-examples/build/classes")
spark.registerClassFinder(classFinder)
spark.addArtifact("/mnt/data/Projects/kotlin-spark-api/spark-connect-examples/build/libs/spark-connect-examples-2.0.0-SNAPSHOT.jar")

spark.sql("select 1").show()

spark.stop()
}

//@Sparkify
//data class Person(
// val name: String,
// val age: Int,
//)

0 comments on commit 22fa5ae

Please sign in to comment.