From 461e58b0d9268631cd487111ce517637ac2fad27 Mon Sep 17 00:00:00 2001 From: Prabodh Agarwal Date: Wed, 18 Sep 2024 11:30:50 +0530 Subject: [PATCH] [MINOR] pass the avro exception for better information (#11925) --- .../scala/org/apache/hudi/AvroConversionUtils.scala | 3 ++- .../org/apache/hudi/TestAvroConversionUtils.scala | 10 ++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/AvroConversionUtils.scala b/hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/AvroConversionUtils.scala index cd75da3bb5da..5291d1ef1eeb 100644 --- a/hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/AvroConversionUtils.scala +++ b/hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/AvroConversionUtils.scala @@ -25,7 +25,7 @@ import org.apache.hudi.internal.schema.HoodieSchemaException import org.apache.avro.Schema.Type import org.apache.avro.generic.GenericRecord -import org.apache.avro.{JsonProperties, Schema} +import org.apache.avro.{AvroRuntimeException, JsonProperties, Schema} import org.apache.spark.rdd.RDD import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.types.{ArrayType, DataType, MapType, StructType} @@ -149,6 +149,7 @@ object AvroConversionUtils { val avroSchema = schemaConverters.toAvroType(structType, nullable = false, structName, recordNamespace) getAvroSchemaWithDefaults(avroSchema, structType) } catch { + case a: AvroRuntimeException => throw new HoodieSchemaException(a.getMessage, a) case e: Exception => throw new HoodieSchemaException("Failed to convert struct type to avro schema: " + structType, e) } } diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestAvroConversionUtils.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestAvroConversionUtils.scala index 01de5013b829..480a24d90c5e 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestAvroConversionUtils.scala +++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestAvroConversionUtils.scala @@ -20,6 +20,7 @@ package org.apache.hudi import org.apache.avro.Schema import org.apache.avro.generic.GenericData +import org.apache.hudi.internal.schema.HoodieSchemaException import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.util.{ArrayData, MapData} import org.apache.spark.sql.types.{ArrayType, BinaryType, DataType, DataTypes, MapType, StringType, StructField, StructType} @@ -445,4 +446,13 @@ class TestAvroConversionUtils extends FunSuite with Matchers { private def checkNull(left: Any, right: Any): Boolean = { (left == null && right != null) || (left == null && right != null) } + + test("convert struct type with duplicate column names") { + val struct = new StructType().add("id", DataTypes.LongType, true) + .add("name", DataTypes.StringType, true) + .add("name", DataTypes.StringType, true) + the[HoodieSchemaException] thrownBy { + AvroConversionUtils.convertStructTypeToAvroSchema(struct, "SchemaName", "SchemaNS") + } should have message "Duplicate field name in record SchemaNS.SchemaName: name type:UNION pos:2 and name type:UNION pos:1." + } }