From 1b0b316a13a73b499e42a7edfbc994b831b69417 Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Fri, 22 Mar 2024 16:30:45 +0100 Subject: [PATCH] enabling the compiler plugin on modules, sparkifying data classes --- build.gradle.kts | 2 +- buildSrc/src/main/kotlin/Plugins.kt | 2 ++ buildSrc/src/main/kotlin/Versions.kt | 1 + kotlin-spark-api/build.gradle.kts | 4 +++- .../kotlin/org/jetbrains/kotlinx/spark/api/ApiTest.kt | 2 ++ .../kotlinx/spark/api/DatasetFunctionTest.kt | 4 ++++ .../org/jetbrains/kotlinx/spark/api/EncodingTest.kt | 10 ++++++++++ .../jetbrains/kotlinx/spark/api/TypeInferenceTest.kt | 10 ++++++---- .../kotlin/org/jetbrains/kotlinx/spark/api/UDFTest.kt | 6 ++++-- .../kotlinx/spark/api/struct/model/models.kt | 11 +++++++---- 10 files changed, 40 insertions(+), 12 deletions(-) diff --git a/build.gradle.kts b/build.gradle.kts index 325c03d4..56775b23 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -23,7 +23,7 @@ plugins { buildconfig version Versions.buildconfig apply false // Needs to be installed in the local maven repository - id("org.jetbrains.kotlinx.spark.api") version Versions.project apply false + kotlinSparkApi version Versions.kotlinSparkApiGradlePlugin apply false } group = Versions.groupID diff --git a/buildSrc/src/main/kotlin/Plugins.kt b/buildSrc/src/main/kotlin/Plugins.kt index 10af1a03..59e273b0 100644 --- a/buildSrc/src/main/kotlin/Plugins.kt +++ b/buildSrc/src/main/kotlin/Plugins.kt @@ -2,6 +2,8 @@ import org.gradle.api.Project import org.gradle.kotlin.dsl.* import org.gradle.plugin.use.PluginDependenciesSpec +inline val PluginDependenciesSpec.kotlinSparkApi + get() = id("org.jetbrains.kotlinx.spark.api") inline val PluginDependenciesSpec.kotlin get() = kotlin("jvm") diff --git a/buildSrc/src/main/kotlin/Versions.kt b/buildSrc/src/main/kotlin/Versions.kt index 4dbc3cba..59eab276 100644 --- a/buildSrc/src/main/kotlin/Versions.kt +++ b/buildSrc/src/main/kotlin/Versions.kt @@ -1,5 +1,6 @@ object Versions : Dsl { const val project = "2.0.0-SNAPSHOT" + const val kotlinSparkApiGradlePlugin = "2.0.0-SNAPSHOT" const val groupID = "org.jetbrains.kotlinx.spark" const val kotlin = "2.0.0-Beta5" const val jvmTarget = "8" diff --git a/kotlin-spark-api/build.gradle.kts b/kotlin-spark-api/build.gradle.kts index 60e66c95..f1fc85ba 100644 --- a/kotlin-spark-api/build.gradle.kts +++ b/kotlin-spark-api/build.gradle.kts @@ -1,4 +1,4 @@ -@file:Suppress("UnstableApiUsage", "NOTHING_TO_INLINE") +@file:Suppress("UnstableApiUsage") import com.igormaznitsa.jcp.gradle.JcpTask import com.vanniktech.maven.publish.JavadocJar.Dokka @@ -11,6 +11,7 @@ plugins { mavenPublishBase jcp idea + kotlinSparkApi // for @Sparkify } group = Versions.groupID @@ -19,6 +20,7 @@ version = Versions.project repositories { mavenCentral() + mavenLocal() } tasks.withType().configureEach { diff --git a/kotlin-spark-api/src/test/kotlin/org/jetbrains/kotlinx/spark/api/ApiTest.kt b/kotlin-spark-api/src/test/kotlin/org/jetbrains/kotlinx/spark/api/ApiTest.kt index 6fc1760c..65a845c7 100644 --- a/kotlin-spark-api/src/test/kotlin/org/jetbrains/kotlinx/spark/api/ApiTest.kt +++ b/kotlin-spark-api/src/test/kotlin/org/jetbrains/kotlinx/spark/api/ApiTest.kt @@ -21,6 +21,7 @@ import ch.tutteli.atrium.api.fluent.en_GB.* import ch.tutteli.atrium.api.verbs.expect import io.kotest.core.spec.style.ShouldSpec import io.kotest.matchers.shouldBe +import org.jetbrains.kotlinx.spark.api.plugin.annotations.Sparkify import scala.collection.Seq import java.io.Serializable import kotlin.collections.Iterator @@ -165,4 +166,5 @@ class ApiTest : ShouldSpec({ // (data) class must be Serializable to be broadcast +@Sparkify data class SomeClass(val a: IntArray, val b: Int) : Serializable diff --git a/kotlin-spark-api/src/test/kotlin/org/jetbrains/kotlinx/spark/api/DatasetFunctionTest.kt b/kotlin-spark-api/src/test/kotlin/org/jetbrains/kotlinx/spark/api/DatasetFunctionTest.kt index 1cf6b861..066860a8 100644 --- a/kotlin-spark-api/src/test/kotlin/org/jetbrains/kotlinx/spark/api/DatasetFunctionTest.kt +++ b/kotlin-spark-api/src/test/kotlin/org/jetbrains/kotlinx/spark/api/DatasetFunctionTest.kt @@ -33,6 +33,7 @@ import org.apache.spark.sql.functions.col import org.apache.spark.sql.streaming.GroupState import org.apache.spark.sql.streaming.GroupStateTimeout import org.jetbrains.kotlinx.spark.api.tuples.* +import org.jetbrains.kotlinx.spark.api.plugin.annotations.Sparkify import scala.Tuple2 import scala.Tuple3 import scala.Tuple4 @@ -68,8 +69,10 @@ class DatasetFunctionTest : ShouldSpec({ } should("handle join operations") { + @Sparkify data class Left(val id: Int, val name: String) + @Sparkify data class Right(val id: Int, val value: Int) val first = dsOf(Left(1, "a"), Left(2, "b")) @@ -453,4 +456,5 @@ class DatasetFunctionTest : ShouldSpec({ } }) +@Sparkify data class SomeOtherClass(val a: IntArray, val b: Int, val c: Boolean) : Serializable diff --git a/kotlin-spark-api/src/test/kotlin/org/jetbrains/kotlinx/spark/api/EncodingTest.kt b/kotlin-spark-api/src/test/kotlin/org/jetbrains/kotlinx/spark/api/EncodingTest.kt index 3b19a224..2fc9b791 100644 --- a/kotlin-spark-api/src/test/kotlin/org/jetbrains/kotlinx/spark/api/EncodingTest.kt +++ b/kotlin-spark-api/src/test/kotlin/org/jetbrains/kotlinx/spark/api/EncodingTest.kt @@ -28,6 +28,7 @@ import io.kotest.matchers.string.shouldContain import org.apache.spark.sql.Dataset import org.apache.spark.sql.types.Decimal import org.apache.spark.unsafe.types.CalendarInterval +import org.jetbrains.kotlinx.spark.api.plugin.annotations.Sparkify import org.jetbrains.kotlinx.spark.api.tuples.* import scala.* import java.math.BigDecimal @@ -600,7 +601,9 @@ class EncodingTest : ShouldSpec({ } should("handle strings converted to lists") { + @Sparkify data class Movie(val id: Long, val genres: String) + @Sparkify data class MovieExpanded(val id: Long, val genres: List) val comedies = listOf(Movie(1, "Comedy|Romance"), Movie(2, "Horror|Action")).toDS() @@ -617,8 +620,10 @@ class EncodingTest : ShouldSpec({ should("handle strings converted to arrays") { + @Sparkify data class Movie(val id: Long, val genres: String) + @Sparkify data class MovieExpanded(val id: Long, val genres: Array) { override fun equals(other: Any?): Boolean { if (this === other) return true @@ -681,6 +686,7 @@ class EncodingTest : ShouldSpec({ } }) +@Sparkify data class IsSomethingClass( val enabled: Boolean, val isEnabled: Boolean, @@ -690,14 +696,17 @@ data class IsSomethingClass( val getDouble: Double ) +@Sparkify data class DataClassWithTuple(val tuple: T) +@Sparkify data class LonLat(val lon: Double, val lat: Double) enum class SomeEnum { A, B } enum class SomeOtherEnum(val value: Int) { C(1), D(2) } +@Sparkify data class ComplexEnumDataClass( val int: Int, val string: String, @@ -711,6 +720,7 @@ data class ComplexEnumDataClass( val enumMap: Map, ) +@Sparkify data class NullFieldAbleDataClass( val optionList: List?, val optionMap: Map?, diff --git a/kotlin-spark-api/src/test/kotlin/org/jetbrains/kotlinx/spark/api/TypeInferenceTest.kt b/kotlin-spark-api/src/test/kotlin/org/jetbrains/kotlinx/spark/api/TypeInferenceTest.kt index 983b2caf..0d4d6d52 100644 --- a/kotlin-spark-api/src/test/kotlin/org/jetbrains/kotlinx/spark/api/TypeInferenceTest.kt +++ b/kotlin-spark-api/src/test/kotlin/org/jetbrains/kotlinx/spark/api/TypeInferenceTest.kt @@ -23,6 +23,7 @@ import ch.tutteli.atrium.creating.Expect import io.kotest.core.spec.style.ShouldSpec import org.apache.spark.sql.types.ArrayType import org.apache.spark.sql.types.IntegerType +import org.jetbrains.kotlinx.spark.api.plugin.annotations.Sparkify import org.jetbrains.kotlinx.spark.api.struct.model.DataType.StructType import org.jetbrains.kotlinx.spark.api.struct.model.DataType.TypeName import org.jetbrains.kotlinx.spark.api.struct.model.ElementType.ComplexElement @@ -35,8 +36,8 @@ import kotlin.reflect.typeOf @OptIn(ExperimentalStdlibApi::class) class TypeInferenceTest : ShouldSpec({ context("org.jetbrains.spark.api.org.jetbrains.spark.api.schema") { - data class Test2(val vala2: T, val para2: Pair) - data class Test(val vala: T, val tripl1: Triple, T>) + @Sparkify data class Test2(val vala2: T, val para2: Pair) + @Sparkify data class Test(val vala: T, val tripl1: Triple, T>) val struct = Struct.fromJson(kotlinEncoderFor>>().schema().prettyJson())!! should("contain correct typings") { @@ -64,9 +65,10 @@ class TypeInferenceTest : ShouldSpec({ } } context("org.jetbrains.spark.api.org.jetbrains.spark.api.schema with more complex data") { - data class Single(val vala3: T) + @Sparkify data class Single(val vala3: T) + @Sparkify data class Test2(val vala2: T, val para2: Pair>) - data class Test(val vala: T, val tripl1: Triple, T>) + @Sparkify data class Test(val vala: T, val tripl1: Triple, T>) val struct = Struct.fromJson(kotlinEncoderFor>>().schema().prettyJson())!! should("contain correct typings") { diff --git a/kotlin-spark-api/src/test/kotlin/org/jetbrains/kotlinx/spark/api/UDFTest.kt b/kotlin-spark-api/src/test/kotlin/org/jetbrains/kotlinx/spark/api/UDFTest.kt index 0f93d7cd..26b79c30 100644 --- a/kotlin-spark-api/src/test/kotlin/org/jetbrains/kotlinx/spark/api/UDFTest.kt +++ b/kotlin-spark-api/src/test/kotlin/org/jetbrains/kotlinx/spark/api/UDFTest.kt @@ -33,6 +33,7 @@ import org.apache.spark.sql.Encoder import org.apache.spark.sql.Row import org.apache.spark.sql.expressions.Aggregator import org.intellij.lang.annotations.Language +import org.jetbrains.kotlinx.spark.api.plugin.annotations.Sparkify import org.junit.jupiter.api.assertThrows import scala.collection.Seq import java.io.Serializable @@ -1261,8 +1262,8 @@ class UDFTest : ShouldSpec({ } }) -data class Employee(val name: String, val salary: Long) -data class Average(var sum: Long, var count: Long) +@Sparkify data class Employee(val name: String, val salary: Long) +@Sparkify data class Average(var sum: Long, var count: Long) private object MyAverage : Aggregator() { // A zero value for this aggregation. Should satisfy the property that any b + zero = b @@ -1316,6 +1317,7 @@ private val aggregator = aggregatorOf( private val addTwoConst = { x: Int, y: Int -> x + y } +@Sparkify data class NormalClass( val age: Int, val name: String diff --git a/kotlin-spark-api/src/test/kotlin/org/jetbrains/kotlinx/spark/api/struct/model/models.kt b/kotlin-spark-api/src/test/kotlin/org/jetbrains/kotlinx/spark/api/struct/model/models.kt index f0d365e6..72a2f99e 100644 --- a/kotlin-spark-api/src/test/kotlin/org/jetbrains/kotlinx/spark/api/struct/model/models.kt +++ b/kotlin-spark-api/src/test/kotlin/org/jetbrains/kotlinx/spark/api/struct/model/models.kt @@ -23,6 +23,7 @@ import com.beust.klaxon.Converter import com.beust.klaxon.JsonObject import com.beust.klaxon.JsonValue import com.beust.klaxon.Klaxon +import org.jetbrains.kotlinx.spark.api.plugin.annotations.Sparkify private fun Klaxon.convert( k: kotlin.reflect.KClass<*>, @@ -43,6 +44,7 @@ private val klaxon = Klaxon() .convert(DataType::class, { DataType.fromJson(it) }, { it.toJson() }, true) .convert(ElementType::class, { ElementType.fromJson(it) }, { it.toJson() }, true) +@Sparkify data class Struct( val type: String, val fields: List? = null, @@ -56,6 +58,7 @@ data class Struct( } } +@Sparkify data class StructField( val name: String, val type: DataType, @@ -66,8 +69,8 @@ data class StructField( typealias Metadata = JsonObject sealed class DataType { - data class StructType(val value: Struct) : DataType() - data class TypeName(val value: String) : DataType() + @Sparkify data class StructType(val value: Struct) : DataType() + @Sparkify data class TypeName(val value: String) : DataType() public fun toJson(): String = klaxon.toJsonString(when (this) { is StructType -> this.value @@ -84,8 +87,8 @@ sealed class DataType { } sealed class ElementType { - data class SimpleElement(val value: String) : ElementType() - data class ComplexElement(val value: Struct) : ElementType() + @Sparkify data class SimpleElement(val value: String) : ElementType() + @Sparkify data class ComplexElement(val value: Struct) : ElementType() public fun toJson(): String = klaxon.toJsonString(when (this) { is SimpleElement -> this.value