From b98c7a16aabc4098b70b12ad5a18a78dcf114b71 Mon Sep 17 00:00:00 2001 From: sahusanket Date: Fri, 26 Jul 2024 13:42:58 +0530 Subject: [PATCH] CDAP-21027 : Upgrade to Hadoop 3.3.6 --- amazon-s3-plugins | 2 +- cassandra-plugins/pom.xml | 10 --- core-plugins/pom.xml | 4 -- database-plugins/pom.xml | 2 +- format-avro/pom.xml | 11 +-- .../delimited/common/DateTimePattern.java | 1 - format-parquet/pom.xml | 19 ++--- .../parquet/avro/AvroSchemaConverter.java | 40 ++--------- google-cloud | 2 +- hbase-plugins/pom.xml | 4 +- http-plugins/pom.xml | 2 +- .../cdap/plugin/common/spark/VectorUtils.java | 2 +- mongodb-plugins/pom.xml | 19 +++++ pom.xml | 72 +++++-------------- spark-plugins/pom.xml | 15 ++++ transform-plugins/pom.xml | 2 +- 16 files changed, 73 insertions(+), 134 deletions(-) diff --git a/amazon-s3-plugins b/amazon-s3-plugins index 56a6186f4..7c08d8b62 160000 --- a/amazon-s3-plugins +++ b/amazon-s3-plugins @@ -1 +1 @@ -Subproject commit 56a6186f4ffac32480fe1129e6bab7d9e176b4e1 +Subproject commit 7c08d8b62490016fe005dd85bd231f75538cfd9c diff --git a/cassandra-plugins/pom.xml b/cassandra-plugins/pom.xml index a8eee59e0..c27be143a 100644 --- a/cassandra-plugins/pom.xml +++ b/cassandra-plugins/pom.xml @@ -66,16 +66,6 @@ org.apache.cassandra cassandra-all - - org.codehaus.jackson - jackson-core-asl - 1.9.13 - - - org.codehaus.jackson - jackson-mapper-asl - 1.9.13 - org.jboss.netty netty diff --git a/core-plugins/pom.xml b/core-plugins/pom.xml index c4185bbfd..d18e00dff 100644 --- a/core-plugins/pom.xml +++ b/core-plugins/pom.xml @@ -209,10 +209,6 @@ sshd-core 1.2.0 - - org.apache.hive - hive-exec - io.cdap.plugin format-avro diff --git a/database-plugins/pom.xml b/database-plugins/pom.xml index 789179f79..74efb2262 100644 --- a/database-plugins/pom.xml +++ b/database-plugins/pom.xml @@ -75,7 +75,7 @@ com.fasterxml.jackson.core jackson-databind test - 2.13.4.2 + ${jackson.databind.version} junit diff --git a/format-avro/pom.xml b/format-avro/pom.xml index 2d4673651..325645091 100644 --- a/format-avro/pom.xml +++ b/format-avro/pom.xml @@ -68,14 +68,9 @@ direct dependency for some reason --> - org.codehaus.jackson - jackson-core-asl - 1.9.13 - - - org.codehaus.jackson - jackson-mapper-asl - 1.9.13 + com.fasterxml.jackson.core + jackson-databind + ${jackson.databind.version} junit diff --git a/format-delimited/src/main/java/io/cdap/plugin/format/delimited/common/DateTimePattern.java b/format-delimited/src/main/java/io/cdap/plugin/format/delimited/common/DateTimePattern.java index 1a440ad2f..f544196c7 100644 --- a/format-delimited/src/main/java/io/cdap/plugin/format/delimited/common/DateTimePattern.java +++ b/format-delimited/src/main/java/io/cdap/plugin/format/delimited/common/DateTimePattern.java @@ -20,7 +20,6 @@ import io.cdap.plugin.format.delimited.dto.DateTimeStandard; import io.cdap.plugin.format.delimited.dto.SupportedDateTimeStandards; import org.apache.commons.lang3.StringUtils; -import org.codehaus.jackson.map.ObjectMapper; import java.io.BufferedReader; import java.io.InputStream; diff --git a/format-parquet/pom.xml b/format-parquet/pom.xml index 18339c67f..d9ecf22a0 100644 --- a/format-parquet/pom.xml +++ b/format-parquet/pom.xml @@ -81,24 +81,15 @@ parquet-avro 1.12.0 - - - org.codehaus.jackson - jackson-core-asl - 1.9.13 - - - org.codehaus.jackson - jackson-mapper-asl - 1.9.13 - com.google.guava guava + + com.fasterxml.jackson.core + jackson-databind + ${jackson.databind.version} + diff --git a/format-parquet/src/main/java/org/apache/parquet/avro/AvroSchemaConverter.java b/format-parquet/src/main/java/org/apache/parquet/avro/AvroSchemaConverter.java index c042642c8..62a3687af 100644 --- a/format-parquet/src/main/java/org/apache/parquet/avro/AvroSchemaConverter.java +++ b/format-parquet/src/main/java/org/apache/parquet/avro/AvroSchemaConverter.java @@ -16,7 +16,6 @@ * specific language governing permissions and limitations * under the License. */ - package org.apache.parquet.avro; import org.apache.avro.LogicalType; @@ -32,7 +31,6 @@ import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName; import org.apache.parquet.schema.Type; import org.apache.parquet.schema.Types; -import org.apache.parquet.schema.LogicalTypeAnnotation.UUIDLogicalTypeAnnotation; import java.util.ArrayList; import java.util.Arrays; @@ -44,12 +42,8 @@ import static java.util.Optional.empty; import static java.util.Optional.of; import static org.apache.avro.JsonProperties.NULL_VALUE; -import static org.apache.parquet.avro.AvroReadSupport.READ_INT96_AS_FIXED; -import static org.apache.parquet.avro.AvroReadSupport.READ_INT96_AS_FIXED_DEFAULT; import static org.apache.parquet.avro.AvroWriteSupport.WRITE_OLD_LIST_STRUCTURE; import static org.apache.parquet.avro.AvroWriteSupport.WRITE_OLD_LIST_STRUCTURE_DEFAULT; -import static org.apache.parquet.avro.AvroWriteSupport.WRITE_PARQUET_UUID; -import static org.apache.parquet.avro.AvroWriteSupport.WRITE_PARQUET_UUID_DEFAULT; import static org.apache.parquet.schema.LogicalTypeAnnotation.TimeUnit.MICROS; import static org.apache.parquet.schema.LogicalTypeAnnotation.TimeUnit.MILLIS; import static org.apache.parquet.schema.LogicalTypeAnnotation.dateType; @@ -58,7 +52,6 @@ import static org.apache.parquet.schema.LogicalTypeAnnotation.stringType; import static org.apache.parquet.schema.LogicalTypeAnnotation.timeType; import static org.apache.parquet.schema.LogicalTypeAnnotation.timestampType; -import static org.apache.parquet.schema.LogicalTypeAnnotation.uuidType; import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.*; import static org.apache.parquet.schema.Type.Repetition.REPEATED; @@ -78,11 +71,10 @@ public class AvroSchemaConverter { private final boolean assumeRepeatedIsListElement; private final boolean writeOldListStructure; - private final boolean writeParquetUUID; - private final boolean readInt96AsFixed; public AvroSchemaConverter() { - this(ADD_LIST_ELEMENT_RECORDS_DEFAULT); + this.assumeRepeatedIsListElement = ADD_LIST_ELEMENT_RECORDS_DEFAULT; + this.writeOldListStructure = WRITE_OLD_LIST_STRUCTURE_DEFAULT; } /** @@ -94,8 +86,6 @@ public AvroSchemaConverter() { AvroSchemaConverter(boolean assumeRepeatedIsListElement) { this.assumeRepeatedIsListElement = assumeRepeatedIsListElement; this.writeOldListStructure = WRITE_OLD_LIST_STRUCTURE_DEFAULT; - this.writeParquetUUID = WRITE_PARQUET_UUID_DEFAULT; - this.readInt96AsFixed = READ_INT96_AS_FIXED_DEFAULT; } public AvroSchemaConverter(Configuration conf) { @@ -103,8 +93,6 @@ public AvroSchemaConverter(Configuration conf) { ADD_LIST_ELEMENT_RECORDS, ADD_LIST_ELEMENT_RECORDS_DEFAULT); this.writeOldListStructure = conf.getBoolean( WRITE_OLD_LIST_STRUCTURE, WRITE_OLD_LIST_STRUCTURE_DEFAULT); - this.writeParquetUUID = conf.getBoolean(WRITE_PARQUET_UUID, WRITE_PARQUET_UUID_DEFAULT); - this.readInt96AsFixed = conf.getBoolean(READ_INT96_AS_FIXED, READ_INT96_AS_FIXED_DEFAULT); } /** @@ -159,7 +147,6 @@ private Type convertField(String fieldName, Schema schema) { private Type convertField(String fieldName, Schema schema, Type.Repetition repetition) { Types.PrimitiveBuilder builder; Schema.Type type = schema.getType(); - LogicalType logicalType = schema.getLogicalType(); if (type.equals(Schema.Type.BOOLEAN)) { builder = Types.primitive(BOOLEAN, repetition); } else if (type.equals(Schema.Type.INT)) { @@ -173,12 +160,7 @@ private Type convertField(String fieldName, Schema schema, Type.Repetition repet } else if (type.equals(Schema.Type.BYTES)) { builder = Types.primitive(BINARY, repetition); } else if (type.equals(Schema.Type.STRING)) { - if (logicalType != null && logicalType.getName().equals(LogicalTypes.uuid().getName()) && writeParquetUUID) { - builder = Types.primitive(FIXED_LEN_BYTE_ARRAY, repetition) - .length(LogicalTypeAnnotation.UUIDLogicalTypeAnnotation.BYTES); - } else { - builder = Types.primitive(BINARY, repetition).as(stringType()); - } + builder = Types.primitive(BINARY, repetition).as(stringType()); } else if (type.equals(Schema.Type.RECORD)) { return new GroupType(repetition, fieldName, convertFields(schema.getFields())); } else if (type.equals(Schema.Type.ENUM)) { @@ -206,6 +188,7 @@ private Type convertField(String fieldName, Schema schema, Type.Repetition repet // schema translation can only be done for known logical types because this // creates an equivalence + LogicalType logicalType = schema.getLogicalType(); if (logicalType != null) { if (logicalType instanceof LogicalTypes.Decimal) { LogicalTypes.Decimal decimal = (LogicalTypes.Decimal) logicalType; @@ -326,12 +309,8 @@ public Schema convertDOUBLE(PrimitiveTypeName primitiveTypeName) { } @Override public Schema convertFIXED_LEN_BYTE_ARRAY(PrimitiveTypeName primitiveTypeName) { - if (annotation instanceof LogicalTypeAnnotation.UUIDLogicalTypeAnnotation) { - return Schema.create(Schema.Type.STRING); - } else { - int size = parquetType.asPrimitiveType().getTypeLength(); - return Schema.createFixed(parquetType.getName(), null, null, size); - } + int size = parquetType.asPrimitiveType().getTypeLength(); + return Schema.createFixed(parquetType.getName(), null, null, size); } @Override public Schema convertBINARY(PrimitiveTypeName primitiveTypeName) { @@ -443,8 +422,6 @@ private LogicalTypeAnnotation convertLogicalType(LogicalType logicalType) { return timestampType(true, MILLIS); } else if (logicalType instanceof LogicalTypes.TimestampMicros) { return timestampType(true, MICROS); - } else if (logicalType.getName().equals(LogicalTypes.uuid().getName()) && writeParquetUUID) { - return uuidType(); } return null; } @@ -487,11 +464,6 @@ public Optional visit(LogicalTypeAnnotation.TimestampLogicalTypeAnn } return empty(); } - - @Override - public Optional visit(UUIDLogicalTypeAnnotation uuidLogicalType) { - return of(LogicalTypes.uuid()); - } }).orElse(null); } diff --git a/google-cloud b/google-cloud index e65c7e22b..99efc3f45 160000 --- a/google-cloud +++ b/google-cloud @@ -1 +1 @@ -Subproject commit e65c7e22b6765d687bd427371ef7c91e0e5210e9 +Subproject commit 99efc3f45bf574372edcbc4587a73e5a37e8ca93 diff --git a/hbase-plugins/pom.xml b/hbase-plugins/pom.xml index 92e47d195..0fda54a16 100644 --- a/hbase-plugins/pom.xml +++ b/hbase-plugins/pom.xml @@ -68,7 +68,7 @@ org.apache.hbase hbase-server - 0.98.6.1-hadoop2 + 2.5.8-hadoop3 org.apache.hbase @@ -127,7 +127,7 @@ org.apache.hbase hbase-testing-util - 0.98.6.1-hadoop2 + 2.5.8-hadoop3 test diff --git a/http-plugins/pom.xml b/http-plugins/pom.xml index 8c5ab4403..4048d258b 100644 --- a/http-plugins/pom.xml +++ b/http-plugins/pom.xml @@ -68,7 +68,7 @@ com.fasterxml.jackson.core jackson-databind test - 2.13.4.2 + ${jackson.databind.version} diff --git a/hydrator-common/src/main/java/io/cdap/plugin/common/spark/VectorUtils.java b/hydrator-common/src/main/java/io/cdap/plugin/common/spark/VectorUtils.java index 28d43f2a4..e4b4981f9 100644 --- a/hydrator-common/src/main/java/io/cdap/plugin/common/spark/VectorUtils.java +++ b/hydrator-common/src/main/java/io/cdap/plugin/common/spark/VectorUtils.java @@ -17,7 +17,7 @@ import io.cdap.cdap.api.data.format.StructuredRecord; import io.cdap.cdap.api.data.schema.Schema; -import org.apache.commons.lang.ArrayUtils; +import org.apache.commons.lang3.ArrayUtils; import org.apache.spark.mllib.linalg.SparseVector; import java.util.List; diff --git a/mongodb-plugins/pom.xml b/mongodb-plugins/pom.xml index 25f296566..f7ac63c87 100644 --- a/mongodb-plugins/pom.xml +++ b/mongodb-plugins/pom.xml @@ -96,6 +96,25 @@ org.apache.hive hive-exec + 3.1.3 + + + org.codehaus.jackson + jackson-mapper-asl + + + org.codehaus.jackson + jackson-core-asl + + + org.apache.calcite.avatica + avatica + + + commons-logging + commons-logging + + diff --git a/pom.xml b/pom.xml index 4f6356f43..d4d3a3afe 100644 --- a/pom.xml +++ b/pom.xml @@ -29,7 +29,7 @@ hydrator-common core-plugins - cassandra-plugins + database-plugins format-avro format-blob @@ -40,7 +40,7 @@ format-parquet format-text format-xls - hbase-plugins + http-plugins mongodb-plugins solrsearch-plugins @@ -95,15 +95,15 @@ 1.2 7.1 - 1.10.2 - 2.1.0 + 1.11.0 + 3.11.12 2.0.2.2 1.10 3.1 2.6 3.5 3.2.2 - 6.9.1 + 6.11.0-SNAPSHOT 2.0.5 1.6 1.6.0 @@ -111,7 +111,7 @@ 6.6.1 1.1.1 13.0.1 - 2.10.2 + 3.3.6 2.2.4 1.4.1 4.13.1 @@ -126,6 +126,7 @@ 4.1.75.Final 1.3.0 3.3.2 + 2.12.7.1 ${project.basedir}/src/test/java/ @@ -496,55 +497,6 @@ ${mockito.version} test - - org.apache.hive - hive-exec - 1.2.1 - test - - - commons-logging - commons-logging - - - com.google.guava - guava - - - org.apache.avro - avro-mapred - - - org.codehaus.groovy - groovy-all - - - org.slf4j - slf4j-log4j12 - - - org.codehaus.janino - janino - - - org.codehaus.janino - commons-compiler - - - stax - stax-api - - - org.pentaho - * - - - log4j - log4j - - - - org.apache.cassandra cassandra-all @@ -678,6 +630,16 @@ org.apache.avro avro ${avro.version} + + + com.fasterxml.jackson.core + jackson-core + + + com.fasterxml.jackson.core + jackson-databind + + org.apache.avro diff --git a/spark-plugins/pom.xml b/spark-plugins/pom.xml index 36ad9ceb3..8dc6ec8ce 100644 --- a/spark-plugins/pom.xml +++ b/spark-plugins/pom.xml @@ -135,6 +135,10 @@ net.java.dev.jets3t jets3t + + com.fasterxml.jackson.core + jackson-databind + @@ -158,6 +162,17 @@ org.apache.hadoop hadoop-common + + + com.fasterxml.jackson.core + jackson-databind + + + + + com.fasterxml.jackson.core + jackson-databind + ${jackson.databind.version} org.twitter4j diff --git a/transform-plugins/pom.xml b/transform-plugins/pom.xml index efa6d7475..dc0c6c1c3 100644 --- a/transform-plugins/pom.xml +++ b/transform-plugins/pom.xml @@ -72,7 +72,7 @@ com.fasterxml.jackson.core jackson-databind test - 2.13.4.2 + ${jackson.databind.version} io.cdap.cdap