From b6472ec0554f16c9a3c0902aae5763d4b263e4e9 Mon Sep 17 00:00:00 2001 From: Piotr Findeisen Date: Mon, 11 Apr 2022 11:38:02 +0200 Subject: [PATCH] Add reader and writer for Puffin, indexes and stats file format --- build.gradle | 1 + .../java/org/apache/iceberg/GuavaClasses.java | 2 + .../java/org/apache/iceberg/puffin/Blob.java | 75 +++++++ .../apache/iceberg/puffin/BlobMetadata.java | 81 ++++++++ .../apache/iceberg/puffin/FileMetadata.java | 46 +++++ .../iceberg/puffin/FileMetadataParser.java | 164 +++++++++++++++ .../org/apache/iceberg/puffin/Puffin.java | 133 ++++++++++++ .../puffin/PuffinCompressionCodec.java | 72 +++++++ .../apache/iceberg/puffin/PuffinFormat.java | 166 +++++++++++++++ .../apache/iceberg/puffin/PuffinReader.java | 187 +++++++++++++++++ .../apache/iceberg/puffin/PuffinWriter.java | 190 ++++++++++++++++++ .../iceberg/puffin/StandardBlobTypes.java | 30 +++ .../puffin/StandardPuffinProperties.java | 31 +++ .../org/apache/iceberg/util/JsonUtil.java | 10 + .../iceberg/puffin/PuffinFormatTestUtil.java | 37 ++++ .../puffin/TestFileMetadataParser.java | 120 +++++++++++ .../iceberg/puffin/TestPuffinFormat.java | 86 ++++++++ .../iceberg/puffin/TestPuffinReader.java | 153 ++++++++++++++ .../iceberg/puffin/TestPuffinWriter.java | 124 ++++++++++++ .../puffin/v1/empty-puffin-uncompressed.bin | Bin 0 -> 48 bytes .../v1/sample-metric-data-compressed-zstd.bin | Bin 0 -> 345 bytes .../v1/sample-metric-data-uncompressed.bin | Bin 0 -> 283 bytes dev/.rat-excludes | 1 + versions.props | 1 + 24 files changed, 1710 insertions(+) create mode 100644 core/src/main/java/org/apache/iceberg/puffin/Blob.java create mode 100644 core/src/main/java/org/apache/iceberg/puffin/BlobMetadata.java create mode 100644 core/src/main/java/org/apache/iceberg/puffin/FileMetadata.java create mode 100644 core/src/main/java/org/apache/iceberg/puffin/FileMetadataParser.java create mode 100644 core/src/main/java/org/apache/iceberg/puffin/Puffin.java create mode 100644 core/src/main/java/org/apache/iceberg/puffin/PuffinCompressionCodec.java create mode 100644 core/src/main/java/org/apache/iceberg/puffin/PuffinFormat.java create mode 100644 core/src/main/java/org/apache/iceberg/puffin/PuffinReader.java create mode 100644 core/src/main/java/org/apache/iceberg/puffin/PuffinWriter.java create mode 100644 core/src/main/java/org/apache/iceberg/puffin/StandardBlobTypes.java create mode 100644 core/src/main/java/org/apache/iceberg/puffin/StandardPuffinProperties.java create mode 100644 core/src/test/java/org/apache/iceberg/puffin/PuffinFormatTestUtil.java create mode 100644 core/src/test/java/org/apache/iceberg/puffin/TestFileMetadataParser.java create mode 100644 core/src/test/java/org/apache/iceberg/puffin/TestPuffinFormat.java create mode 100644 core/src/test/java/org/apache/iceberg/puffin/TestPuffinReader.java create mode 100644 core/src/test/java/org/apache/iceberg/puffin/TestPuffinWriter.java create mode 100644 core/src/test/resources/org/apache/iceberg/puffin/v1/empty-puffin-uncompressed.bin create mode 100644 core/src/test/resources/org/apache/iceberg/puffin/v1/sample-metric-data-compressed-zstd.bin create mode 100644 core/src/test/resources/org/apache/iceberg/puffin/v1/sample-metric-data-uncompressed.bin diff --git a/build.gradle b/build.gradle index 4911a3ad191d..049ca17e3fa8 100644 --- a/build.gradle +++ b/build.gradle @@ -220,6 +220,7 @@ project(':iceberg-core') { exclude group: 'org.tukaani' // xz compression is not supported } + implementation 'io.airlift:aircompressor' implementation 'org.apache.httpcomponents.client5:httpclient5' implementation "com.fasterxml.jackson.core:jackson-databind" implementation "com.fasterxml.jackson.core:jackson-core" diff --git a/bundled-guava/src/main/java/org/apache/iceberg/GuavaClasses.java b/bundled-guava/src/main/java/org/apache/iceberg/GuavaClasses.java index 2bc1cf83bf3e..70700f57ac15 100644 --- a/bundled-guava/src/main/java/org/apache/iceberg/GuavaClasses.java +++ b/bundled-guava/src/main/java/org/apache/iceberg/GuavaClasses.java @@ -49,6 +49,7 @@ import com.google.common.hash.Hashing; import com.google.common.io.CountingOutputStream; import com.google.common.io.Files; +import com.google.common.io.Resources; import com.google.common.primitives.Bytes; import com.google.common.util.concurrent.MoreExecutors; import com.google.common.util.concurrent.ThreadFactoryBuilder; @@ -89,6 +90,7 @@ public class GuavaClasses { Hashing.class.getName(); Files.class.getName(); Bytes.class.getName(); + Resources.class.getName(); MoreExecutors.class.getName(); ThreadFactoryBuilder.class.getName(); Iterables.class.getName(); diff --git a/core/src/main/java/org/apache/iceberg/puffin/Blob.java b/core/src/main/java/org/apache/iceberg/puffin/Blob.java new file mode 100644 index 000000000000..350748a5969e --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/puffin/Blob.java @@ -0,0 +1,75 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.puffin; + +import java.nio.ByteBuffer; +import java.util.List; +import java.util.Map; +import javax.annotation.Nullable; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; + +public final class Blob { + private final String type; + private final List inputFields; + private final ByteBuffer blobData; + private final PuffinCompressionCodec requestedCompression; + private final Map properties; + + public Blob(String type, List inputFields, ByteBuffer blobData) { + this(type, inputFields, blobData, null, ImmutableMap.of()); + } + + public Blob( + String type, List inputFields, ByteBuffer blobData, + @Nullable PuffinCompressionCodec requestedCompression, Map properties) { + Preconditions.checkNotNull(type, "type is null"); + Preconditions.checkNotNull(inputFields, "inputFields is null"); + Preconditions.checkNotNull(blobData, "blobData is null"); + Preconditions.checkNotNull(properties, "properties is null"); + this.type = type; + this.inputFields = ImmutableList.copyOf(inputFields); + this.blobData = blobData; + this.requestedCompression = requestedCompression; + this.properties = ImmutableMap.copyOf(properties); + } + + public String type() { + return type; + } + + public List inputFields() { + return inputFields; + } + + public ByteBuffer blobData() { + return blobData; + } + + @Nullable + public PuffinCompressionCodec requestedCompression() { + return requestedCompression; + } + + public Map properties() { + return properties; + } +} diff --git a/core/src/main/java/org/apache/iceberg/puffin/BlobMetadata.java b/core/src/main/java/org/apache/iceberg/puffin/BlobMetadata.java new file mode 100644 index 000000000000..517ce765a077 --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/puffin/BlobMetadata.java @@ -0,0 +1,81 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.puffin; + +import java.util.List; +import java.util.Map; +import javax.annotation.Nullable; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; + +public class BlobMetadata { + private final String type; + private final List inputFields; + private final long offset; + private final long length; + private final String compressionCodec; + private final Map properties; + + public BlobMetadata( + String type, List inputFields, long offset, long length, + @Nullable String compressionCodec, Map properties) { + Preconditions.checkNotNull(type, "type is null"); + Preconditions.checkNotNull(inputFields, "inputFields is null"); + Preconditions.checkNotNull(properties, "properties is null"); + this.type = type; + this.inputFields = ImmutableList.copyOf(inputFields); + this.offset = offset; + this.length = length; + this.compressionCodec = compressionCodec; + this.properties = ImmutableMap.copyOf(properties); + } + + public String type() { + return type; + } + + public List inputFields() { + return inputFields; + } + + /** + * Offset in the file + */ + public long offset() { + return offset; + } + + /** + * Length in the file + */ + public long length() { + return length; + } + + @Nullable + public String compressionCodec() { + return compressionCodec; + } + + public Map properties() { + return properties; + } +} diff --git a/core/src/main/java/org/apache/iceberg/puffin/FileMetadata.java b/core/src/main/java/org/apache/iceberg/puffin/FileMetadata.java new file mode 100644 index 000000000000..eb33edd051bc --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/puffin/FileMetadata.java @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.puffin; + +import java.util.List; +import java.util.Map; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; + +public class FileMetadata { + private final List blobs; + private final Map properties; + + public FileMetadata(List blobs, Map properties) { + Preconditions.checkNotNull(blobs, "blobs is null"); + Preconditions.checkNotNull(properties, "properties is null"); + this.blobs = ImmutableList.copyOf(blobs); + this.properties = ImmutableMap.copyOf(properties); + } + + public List blobs() { + return blobs; + } + + public Map properties() { + return properties; + } +} diff --git a/core/src/main/java/org/apache/iceberg/puffin/FileMetadataParser.java b/core/src/main/java/org/apache/iceberg/puffin/FileMetadataParser.java new file mode 100644 index 000000000000..aae714175b0e --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/puffin/FileMetadataParser.java @@ -0,0 +1,164 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.puffin; + +import com.fasterxml.jackson.core.JsonGenerator; +import com.fasterxml.jackson.databind.JsonNode; +import java.io.IOException; +import java.io.StringWriter; +import java.io.UncheckedIOException; +import java.util.List; +import java.util.Map; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; +import org.apache.iceberg.util.JsonUtil; + +public final class FileMetadataParser { + + private FileMetadataParser() { + } + + private static final String BLOBS = "blobs"; + private static final String PROPERTIES = "properties"; + + private static final String TYPE = "type"; + private static final String FIELDS = "fields"; + private static final String OFFSET = "offset"; + private static final String LENGTH = "length"; + private static final String COMPRESSION_CODEC = "compression-codec"; + + public static String toJson(FileMetadata fileMetadata, boolean pretty) { + try { + StringWriter writer = new StringWriter(); + JsonGenerator generator = JsonUtil.factory().createGenerator(writer); + if (pretty) { + generator.useDefaultPrettyPrinter(); + } + toJson(fileMetadata, generator); + generator.flush(); + return writer.toString(); + } catch (IOException e) { + throw new UncheckedIOException("Failed to write json for: " + fileMetadata, e); + } + } + + public static FileMetadata fromJson(String json) { + try { + return fromJson(JsonUtil.mapper().readValue(json, JsonNode.class)); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + } + + static FileMetadata fromJson(JsonNode json) { + return fileMetadataFromJson(json); + } + + static void toJson(FileMetadata fileMetadata, JsonGenerator generator) throws IOException { + generator.writeStartObject(); + + generator.writeArrayFieldStart(BLOBS); + for (BlobMetadata blobMetadata : fileMetadata.blobs()) { + toJson(blobMetadata, generator); + } + generator.writeEndArray(); + + generator.writeObjectFieldStart(PROPERTIES); + for (Map.Entry entry : fileMetadata.properties().entrySet()) { + generator.writeStringField(entry.getKey(), entry.getValue()); + } + generator.writeEndObject(); + + generator.writeEndObject(); + } + + static FileMetadata fileMetadataFromJson(JsonNode json) { + + ImmutableList.Builder blobs = ImmutableList.builder(); + JsonNode blobsJson = json.get(BLOBS); + Preconditions.checkArgument(blobsJson != null && blobsJson.isArray(), + "Cannot parse blobs from non-array: %s", blobsJson); + for (JsonNode blobJson : blobsJson) { + blobs.add(blobMetadataFromJson(blobJson)); + } + + Map properties = ImmutableMap.of(); + JsonNode propertiesJson = json.get(PROPERTIES); + if (propertiesJson != null) { + properties = JsonUtil.getStringMap(PROPERTIES, json); + } + + return new FileMetadata( + blobs.build(), + properties); + } + + static void toJson(BlobMetadata blobMetadata, JsonGenerator generator) throws IOException { + generator.writeStartObject(); + + generator.writeStringField(TYPE, blobMetadata.type()); + + generator.writeArrayFieldStart(FIELDS); + for (int field : blobMetadata.inputFields()) { + generator.writeNumber(field); + } + generator.writeEndArray(); + + generator.writeNumberField(OFFSET, blobMetadata.offset()); + generator.writeNumberField(LENGTH, blobMetadata.length()); + + if (blobMetadata.compressionCodec() != null) { + generator.writeStringField(COMPRESSION_CODEC, blobMetadata.compressionCodec()); + } + + if (!blobMetadata.properties().isEmpty()) { + generator.writeObjectFieldStart(PROPERTIES); + for (Map.Entry entry : blobMetadata.properties().entrySet()) { + generator.writeStringField(entry.getKey(), entry.getValue()); + } + generator.writeEndObject(); + } + + generator.writeEndObject(); + } + + static BlobMetadata blobMetadataFromJson(JsonNode json) { + String type = JsonUtil.getString(TYPE, json); + List fields = JsonUtil.getIntegerList(FIELDS, json); + long offset = JsonUtil.getLong(OFFSET, json); + long length = JsonUtil.getLong(LENGTH, json); + String compressionCodec = JsonUtil.getStringOrNull(COMPRESSION_CODEC, json); + Map properties = ImmutableMap.of(); + JsonNode propertiesJson = json.get(PROPERTIES); + if (propertiesJson != null) { + properties = JsonUtil.getStringMap(PROPERTIES, json); + } + + + return new BlobMetadata( + type, + fields, + offset, + length, + compressionCodec, + properties); + } +} diff --git a/core/src/main/java/org/apache/iceberg/puffin/Puffin.java b/core/src/main/java/org/apache/iceberg/puffin/Puffin.java new file mode 100644 index 000000000000..6cfb8be20699 --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/puffin/Puffin.java @@ -0,0 +1,133 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.puffin; + +import java.util.Map; +import org.apache.iceberg.io.InputFile; +import org.apache.iceberg.io.OutputFile; +import org.apache.iceberg.relocated.com.google.common.collect.Maps; + +/** + * Utility class for reading and writing Puffin files. + */ +public final class Puffin { + private Puffin() { + } + + public static WriteBuilder write(OutputFile outputFile) { + return new WriteBuilder(outputFile); + } + + /** + * A builder for {@link PuffinWriter}. + */ + public static class WriteBuilder { + private final OutputFile outputFile; + private final Map properties = Maps.newLinkedHashMap(); + private boolean compressFooter = false; + private PuffinCompressionCodec defaultBlobCompression = PuffinCompressionCodec.NONE; + + private WriteBuilder(OutputFile outputFile) { + this.outputFile = outputFile; + } + + /** + * Sets file-level property to be written + */ + public WriteBuilder set(String property, String value) { + properties.put(property, value); + return this; + } + + /** + * Sets file-level properties to be written + */ + public WriteBuilder setAll(Map props) { + this.properties.putAll(props); + return this; + } + + /** + * Sets file-level {@value StandardPuffinProperties#CREATED_BY_PROPERTY} property. + */ + public WriteBuilder createdBy(String applicationIdentifier) { + this.properties.put(StandardPuffinProperties.CREATED_BY_PROPERTY, applicationIdentifier); + return this; + } + + /** + * Configures the writer to compress the footer. + */ + public WriteBuilder compressFooter() { + this.compressFooter = true; + return this; + } + + /** + * Configures the writer to compress the blobs. + * Can be overwritten by {@link Blob} attribute. + */ + public WriteBuilder compressBlobs(PuffinCompressionCodec compression) { + this.defaultBlobCompression = compression; + return this; + } + + public PuffinWriter build() { + return new PuffinWriter(outputFile, properties, compressFooter, defaultBlobCompression); + } + } + + public static ReadBuilder read(InputFile inputFile) { + return new ReadBuilder(inputFile); + } + + /** + * A builder for {@link PuffinReader}. + */ + public static final class ReadBuilder { + private final InputFile inputFile; + private Long fileSize; + private Long footerSize; + + private ReadBuilder(InputFile inputFile) { + this.inputFile = inputFile; + } + + /** + * Passes known file size to the reader. This may improve read performance. + */ + public ReadBuilder withFileSize(long size) { + this.fileSize = size; + return this; + } + + /** + * Passes known footer size to the reader. This may improve read performance. + */ + public ReadBuilder withFooterSize(long size) { + this.footerSize = size; + return this; + } + + public PuffinReader build() { + return new PuffinReader(inputFile, fileSize, footerSize); + } + } +} diff --git a/core/src/main/java/org/apache/iceberg/puffin/PuffinCompressionCodec.java b/core/src/main/java/org/apache/iceberg/puffin/PuffinCompressionCodec.java new file mode 100644 index 000000000000..dc8182df4762 --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/puffin/PuffinCompressionCodec.java @@ -0,0 +1,72 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.puffin; + +import java.util.Map; +import java.util.function.Function; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import javax.annotation.Nullable; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.relocated.com.google.common.collect.Maps; + +public enum PuffinCompressionCodec { + /** + * No compression + */ + NONE(null), + + /** + * LZ4 single compression frame with content size present + */ + LZ4("lz4"), + + /** + * Zstandard single compression frame with content size present + */ + ZSTD("zstd"), + /**/; + + private static final Map BY_NAME = Stream.of(values()) + .collect(Collectors.toMap( + PuffinCompressionCodec::codecName, + Function.identity(), + (a, b) -> { + throw new UnsupportedOperationException("Two enum instances with same name"); + }, + Maps::newHashMap)); + + private final String codecName; + + PuffinCompressionCodec(String codecName) { + this.codecName = codecName; + } + + @Nullable + public String codecName() { + return codecName; + } + + public static PuffinCompressionCodec forName(@Nullable String codecName) { + PuffinCompressionCodec codec = BY_NAME.get(codecName); + Preconditions.checkArgument(codec != null, "Unknown codec name: %s", codecName); + return codec; + } +} diff --git a/core/src/main/java/org/apache/iceberg/puffin/PuffinFormat.java b/core/src/main/java/org/apache/iceberg/puffin/PuffinFormat.java new file mode 100644 index 000000000000..7a193d84e1ae --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/puffin/PuffinFormat.java @@ -0,0 +1,166 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.puffin; + +import io.airlift.compress.Compressor; +import io.airlift.compress.zstd.ZstdCompressor; +import io.airlift.compress.zstd.ZstdDecompressor; +import java.io.IOException; +import java.io.OutputStream; +import java.nio.ByteBuffer; +import java.util.Map; +import java.util.function.Function; +import java.util.stream.Stream; +import javax.annotation.Nullable; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; +import org.apache.iceberg.util.ByteBuffers; +import org.apache.iceberg.util.Pair; + +final class PuffinFormat { + private PuffinFormat() { + } + + enum Flag { + FOOTER_PAYLOAD_COMPRESSED(0, 0), + /**/; + + private static final Map, Flag> BY_BYTE_AND_BIT = Stream.of(values()) + .collect(ImmutableMap.toImmutableMap( + flag -> Pair.of(flag.byteNumber(), flag.bitNumber()), Function.identity())); + + private final int byteNumber; + private final int bitNumber; + + Flag(int byteNumber, int bitNumber) { + Preconditions.checkArgument( + 0 <= byteNumber && byteNumber < PuffinFormat.FOOTER_STRUCT_FLAGS_LENGTH, + "Invalid byteNumber"); + Preconditions.checkArgument(0 <= bitNumber && bitNumber < Byte.SIZE, "Invalid bitNumber"); + this.byteNumber = byteNumber; + this.bitNumber = bitNumber; + } + + @Nullable + static Flag fromBit(int byteNumber, int bitNumber) { + return BY_BYTE_AND_BIT.get(Pair.of(byteNumber, bitNumber)); + } + + public int byteNumber() { + return byteNumber; + } + + public int bitNumber() { + return bitNumber; + } + } + + static final int FOOTER_START_MAGIC_OFFSET = 0; + static final int FOOTER_START_MAGIC_LENGTH = getMagic().length; + + // "Footer struct" denotes the fixed-length portion of the Footer + static final int FOOTER_STRUCT_PAYLOAD_SIZE_OFFSET = 0; + static final int FOOTER_STRUCT_FLAGS_OFFSET = FOOTER_STRUCT_PAYLOAD_SIZE_OFFSET + 4; + static final int FOOTER_STRUCT_FLAGS_LENGTH = 4; + static final int FOOTER_STRUCT_MAGIC_OFFSET = FOOTER_STRUCT_FLAGS_OFFSET + FOOTER_STRUCT_FLAGS_LENGTH; + static final int FOOTER_STRUCT_LENGTH = FOOTER_STRUCT_MAGIC_OFFSET + getMagic().length; + + static final PuffinCompressionCodec FOOTER_COMPRESSION_CODEC = PuffinCompressionCodec.LZ4; + + static byte[] getMagic() { + return new byte[] {0x50, 0x46, 0x41, 0x31}; + } + + static void writeIntegerLittleEndian(OutputStream outputStream, int value) throws IOException { + outputStream.write(0xFF & value); + outputStream.write(0xFF & (value >> 8)); + outputStream.write(0xFF & (value >> 16)); + outputStream.write(0xFF & (value >> 24)); + } + + static int readIntegerLittleEndian(byte[] data, int offset) { + return Byte.toUnsignedInt(data[offset]) | + (Byte.toUnsignedInt(data[offset + 1]) << 8) | + (Byte.toUnsignedInt(data[offset + 2]) << 16) | + (Byte.toUnsignedInt(data[offset + 3]) << 24); + } + + static ByteBuffer compress(PuffinCompressionCodec codec, ByteBuffer input) { + switch (codec) { + case NONE: + return input.duplicate(); + case LZ4: + // TODO requires LZ4 frame compressor, e.g. https://github.com/airlift/aircompressor/pull/142 + break; + case ZSTD: + return compress(new ZstdCompressor(), input); + } + throw new UnsupportedOperationException("Unsupported codec: " + codec); + } + + private static ByteBuffer compress(Compressor compressor, ByteBuffer input) { + ByteBuffer output = ByteBuffer.allocate(compressor.maxCompressedLength(input.remaining())); + compressor.compress(input.duplicate(), output); + output.flip(); + return output; + } + + static ByteBuffer decompress(PuffinCompressionCodec codec, ByteBuffer input) { + switch (codec) { + case NONE: + return input.duplicate(); + + case LZ4: + // TODO requires LZ4 frame decompressor, e.g. https://github.com/airlift/aircompressor/pull/142 + break; + + case ZSTD: { + byte[] inputBytes; + int inputOffset; + int inputLength; + if (input.hasArray()) { + inputBytes = input.array(); + inputOffset = input.arrayOffset(); + inputLength = input.remaining(); + } else { + // TODO implement ZstdDecompressor.getDecompressedSize for ByteBuffer to avoid copying + inputBytes = ByteBuffers.toByteArray(input); + inputOffset = 0; + inputLength = inputBytes.length; + } + + byte[] decompressed = + new byte[Math.toIntExact(ZstdDecompressor.getDecompressedSize(inputBytes, inputOffset, inputLength))]; + int decompressedLength = + new ZstdDecompressor().decompress( + inputBytes, + inputOffset, + inputLength, + decompressed, + 0, + decompressed.length); + Preconditions.checkState(decompressedLength == decompressed.length, "Invalid decompressed length"); + return ByteBuffer.wrap(decompressed); + } + } + + throw new UnsupportedOperationException("Unsupported codec: " + codec); + } +} diff --git a/core/src/main/java/org/apache/iceberg/puffin/PuffinReader.java b/core/src/main/java/org/apache/iceberg/puffin/PuffinReader.java new file mode 100644 index 000000000000..0aecfe6456e5 --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/puffin/PuffinReader.java @@ -0,0 +1,187 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.puffin; + +import java.io.Closeable; +import java.io.IOException; +import java.io.UncheckedIOException; +import java.nio.ByteBuffer; +import java.nio.charset.StandardCharsets; +import java.util.Arrays; +import java.util.Comparator; +import java.util.EnumSet; +import java.util.List; +import java.util.Set; +import javax.annotation.Nullable; +import org.apache.iceberg.io.InputFile; +import org.apache.iceberg.io.RangeReadable; +import org.apache.iceberg.io.SeekableInputStream; +import org.apache.iceberg.puffin.PuffinFormat.Flag; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; +import org.apache.iceberg.relocated.com.google.common.io.ByteStreams; +import org.apache.iceberg.util.Pair; + +public class PuffinReader implements Closeable { + // Must not be modified + private static final byte[] MAGIC = PuffinFormat.getMagic(); + + private final long fileSize; + private final SeekableInputStream input; + private Integer knownFooterSize; + private FileMetadata knownFileMetadata; + + PuffinReader(InputFile inputFile, @Nullable Long fileSize, @Nullable Long footerSize) { + Preconditions.checkNotNull(inputFile, "inputFile is null"); + this.fileSize = fileSize == null ? inputFile.getLength() : fileSize; + this.input = inputFile.newStream(); + if (footerSize != null) { + Preconditions.checkArgument(0 < footerSize && footerSize <= this.fileSize - MAGIC.length, + "Invalid footer size: %s", footerSize); + this.knownFooterSize = Math.toIntExact(footerSize); + } + } + + public FileMetadata fileMetadata() throws IOException { + if (knownFileMetadata == null) { + int footerSize = footerSize(); + byte[] footer = readInput(fileSize - footerSize, footerSize); + + checkMagic(footer, PuffinFormat.FOOTER_START_MAGIC_OFFSET); + int footerStructOffset = footerSize - PuffinFormat.FOOTER_STRUCT_LENGTH; + checkMagic(footer, footerStructOffset + PuffinFormat.FOOTER_STRUCT_MAGIC_OFFSET); + + PuffinCompressionCodec footerCompression = PuffinCompressionCodec.NONE; + for (Flag flag : decodeFlags(footer, footerStructOffset)) { + switch (flag) { + case FOOTER_PAYLOAD_COMPRESSED: + footerCompression = PuffinFormat.FOOTER_COMPRESSION_CODEC; + break; + default: + throw new IllegalStateException("Unsupported flag: " + flag); + } + } + + int footerPayloadSize = PuffinFormat.readIntegerLittleEndian( + footer, + footerStructOffset + PuffinFormat.FOOTER_STRUCT_PAYLOAD_SIZE_OFFSET); + Preconditions.checkState( + footerSize == PuffinFormat.FOOTER_START_MAGIC_LENGTH + footerPayloadSize + PuffinFormat.FOOTER_STRUCT_LENGTH, + "Unexpected footer payload size value %s for footer size %s", footerPayloadSize, footerSize); + + ByteBuffer footerPayload = ByteBuffer.wrap(footer, 4, footerPayloadSize); + ByteBuffer footerJson = PuffinFormat.decompress(footerCompression, footerPayload); + this.knownFileMetadata = parseFileMetadata(footerJson); + } + return knownFileMetadata; + } + + private Set decodeFlags(byte[] footer, int footerStructOffset) { + EnumSet flags = EnumSet.noneOf(Flag.class); + for (int byteNumber = 0; byteNumber < PuffinFormat.FOOTER_STRUCT_FLAGS_LENGTH; byteNumber++) { + int flagByte = + Byte.toUnsignedInt(footer[footerStructOffset + PuffinFormat.FOOTER_STRUCT_FLAGS_OFFSET + byteNumber]); + int bitNumber = 0; + while (flagByte != 0) { + if ((flagByte & 0x1) != 0) { + Flag flag = Flag.fromBit(byteNumber, bitNumber); + Preconditions.checkState(flag != null, "Unknown flag byte %s and bit %s set", byteNumber, bitNumber); + flags.add(flag); + } + flagByte = flagByte >> 1; + bitNumber++; + } + } + return flags; + } + + public Iterable> readAll(List blobs) { + if (blobs.isEmpty()) { + return ImmutableList.of(); + } + + // TODO inspect blob offsets and coalesce read regions close to each other + + return () -> blobs.stream() + .sorted(Comparator.comparingLong(BlobMetadata::offset)) + .map((BlobMetadata blobMetadata) -> { + try { + input.seek(blobMetadata.offset()); + byte[] bytes = new byte[Math.toIntExact(blobMetadata.length())]; + ByteStreams.readFully(input, bytes); + ByteBuffer rawData = ByteBuffer.wrap(bytes); + PuffinCompressionCodec codec = PuffinCompressionCodec.forName(blobMetadata.compressionCodec()); + ByteBuffer data = PuffinFormat.decompress(codec, rawData); + return Pair.of(blobMetadata, data); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + }) + .iterator(); + } + + private static void checkMagic(byte[] data, int offset) { + byte[] read = Arrays.copyOfRange(data, offset, offset + MAGIC.length); + if (!Arrays.equals(read, MAGIC)) { + throw new IllegalStateException(String.format( + "Invalid file: expected magic at offset %s: %s, but got %s", + offset, Arrays.toString(MAGIC), Arrays.toString(read))); + } + } + + private int footerSize() throws IOException { + if (knownFooterSize == null) { + Preconditions.checkState( + fileSize >= PuffinFormat.FOOTER_STRUCT_LENGTH, + "Invalid file: file length %s is less tha minimal length of the footer tail %s", + fileSize, PuffinFormat.FOOTER_STRUCT_LENGTH); + byte[] footerStruct = readInput(fileSize - PuffinFormat.FOOTER_STRUCT_LENGTH, PuffinFormat.FOOTER_STRUCT_LENGTH); + checkMagic(footerStruct, PuffinFormat.FOOTER_STRUCT_MAGIC_OFFSET); + + int footerPayloadSize = PuffinFormat.readIntegerLittleEndian( + footerStruct, PuffinFormat.FOOTER_STRUCT_PAYLOAD_SIZE_OFFSET); + knownFooterSize = PuffinFormat.FOOTER_START_MAGIC_LENGTH + footerPayloadSize + PuffinFormat.FOOTER_STRUCT_LENGTH; + } + return knownFooterSize; + } + + private byte[] readInput(long offset, int length) throws IOException { + byte[] data = new byte[length]; + if (input instanceof RangeReadable) { + ((RangeReadable) input).readFully(offset, data); + } else { + input.seek(offset); + ByteStreams.readFully(input, data); + } + return data; + } + + private static FileMetadata parseFileMetadata(ByteBuffer data) { + String footerJson = StandardCharsets.UTF_8.decode(data).toString(); + return FileMetadataParser.fromJson(footerJson); + } + + @Override + public void close() throws IOException { + input.close(); + knownFooterSize = null; + knownFileMetadata = null; + } +} diff --git a/core/src/main/java/org/apache/iceberg/puffin/PuffinWriter.java b/core/src/main/java/org/apache/iceberg/puffin/PuffinWriter.java new file mode 100644 index 000000000000..ee16edc9f488 --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/puffin/PuffinWriter.java @@ -0,0 +1,190 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.puffin; + +import java.io.IOException; +import java.io.UncheckedIOException; +import java.nio.ByteBuffer; +import java.nio.channels.Channels; +import java.nio.channels.WritableByteChannel; +import java.nio.charset.StandardCharsets; +import java.util.EnumSet; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import java.util.stream.Collectors; +import org.apache.iceberg.Metrics; +import org.apache.iceberg.io.FileAppender; +import org.apache.iceberg.io.OutputFile; +import org.apache.iceberg.io.PositionOutputStream; +import org.apache.iceberg.puffin.PuffinFormat.Flag; +import org.apache.iceberg.relocated.com.google.common.base.MoreObjects; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; +import org.apache.iceberg.relocated.com.google.common.collect.Lists; + +public class PuffinWriter implements FileAppender { + // Must not be modified + private static final byte[] MAGIC = PuffinFormat.getMagic(); + + private final PositionOutputStream outputStream; + private final Map properties; + private final PuffinCompressionCodec footerCompression; + private final PuffinCompressionCodec defaultBlobCompression; + + private final List writtenBlobsMetadata = Lists.newArrayList(); + private boolean headerWritten; + private boolean finished; + private Optional footerSize = Optional.empty(); + private Optional fileSize = Optional.empty(); + + PuffinWriter( + OutputFile outputFile, Map properties, boolean compressFooter, + PuffinCompressionCodec defaultBlobCompression) { + Preconditions.checkNotNull(outputFile, "outputFile is null"); + Preconditions.checkNotNull(properties, "properties is null"); + Preconditions.checkNotNull(defaultBlobCompression, "defaultBlobCompression is null"); + this.outputStream = outputFile.create(); + this.properties = ImmutableMap.copyOf(properties); + this.footerCompression = compressFooter ? PuffinFormat.FOOTER_COMPRESSION_CODEC : PuffinCompressionCodec.NONE; + this.defaultBlobCompression = defaultBlobCompression; + } + + @Override + public void add(Blob blob) { + Preconditions.checkNotNull(blob, "blob is null"); + checkNotFinished(); + try { + writeHeaderIfNeeded(); + long fileOffset = outputStream.getPos(); + PuffinCompressionCodec codec = MoreObjects.firstNonNull(blob.requestedCompression(), defaultBlobCompression); + ByteBuffer rawData = PuffinFormat.compress(codec, blob.blobData()); + int length = rawData.remaining(); + writeFully(rawData); + writtenBlobsMetadata.add(new BlobMetadata(blob.type(), blob.inputFields(), fileOffset, length, + codec.codecName(), blob.properties())); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + } + + @Override + public Metrics metrics() { + return new Metrics(); + } + + @Override + public long length() { + return fileSize(); + } + + @Override + public void close() throws IOException { + if (!finished) { + finish(); + } + + outputStream.close(); + } + + private void writeHeaderIfNeeded() throws IOException { + if (headerWritten) { + return; + } + + this.outputStream.write(MAGIC); + this.headerWritten = true; + } + + public void finish() throws IOException { + checkNotFinished(); + writeHeaderIfNeeded(); + if (footerSize.isPresent()) { + throw new IllegalStateException("footerSize already set"); + } + + long footerOffset = outputStream.getPos(); + writeFooter(); + this.footerSize = Optional.of(Math.toIntExact(outputStream.getPos() - footerOffset)); + this.fileSize = Optional.of(outputStream.getPos()); + this.finished = true; + } + + private void writeFooter() throws IOException { + FileMetadata fileMetadata = new FileMetadata(writtenBlobsMetadata, properties); + ByteBuffer footerJson = ByteBuffer.wrap( + FileMetadataParser.toJson(fileMetadata, false).getBytes(StandardCharsets.UTF_8)); + ByteBuffer footerPayload = PuffinFormat.compress(footerCompression, footerJson); + outputStream.write(MAGIC); + int footerPayloadLength = footerPayload.remaining(); + writeFully(footerPayload); + PuffinFormat.writeIntegerLittleEndian(outputStream, footerPayloadLength); + writeFlags(); + outputStream.write(MAGIC); + } + + private void writeFlags() throws IOException { + Map> flagsByByteNumber = fileFlags().stream() + .collect(Collectors.groupingBy(Flag::byteNumber)); + for (int byteNumber = 0; byteNumber < PuffinFormat.FOOTER_STRUCT_FLAGS_LENGTH; byteNumber++) { + int byteFlag = 0; + for (Flag flag : flagsByByteNumber.getOrDefault(byteNumber, ImmutableList.of())) { + byteFlag |= 0x1 << flag.bitNumber(); + } + outputStream.write(byteFlag); + } + } + + private void writeFully(ByteBuffer buffer) throws IOException { + WritableByteChannel channel = Channels.newChannel(outputStream); + while (buffer.remaining() > 0) { + channel.write(buffer); + } + } + + public long footerSize() { + return footerSize.orElseThrow(() -> new IllegalStateException("Footer not written yet")); + } + + public long fileSize() { + return fileSize.orElseThrow(() -> new IllegalStateException("File not written yet")); + } + + public List writtenBlobsMetadata() { + return ImmutableList.copyOf(writtenBlobsMetadata); + } + + private Set fileFlags() { + EnumSet flags = EnumSet.noneOf(Flag.class); + if (footerCompression != PuffinCompressionCodec.NONE) { + flags.add(Flag.FOOTER_PAYLOAD_COMPRESSED); + } + + return flags; + } + + private void checkNotFinished() { + if (finished) { + throw new IllegalStateException("Writer already finished"); + } + } +} diff --git a/core/src/main/java/org/apache/iceberg/puffin/StandardBlobTypes.java b/core/src/main/java/org/apache/iceberg/puffin/StandardBlobTypes.java new file mode 100644 index 000000000000..befbe37c41c5 --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/puffin/StandardBlobTypes.java @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.puffin; + +public final class StandardBlobTypes { + private StandardBlobTypes() { + } + + /** + * A serialized form of a "compact" Theta sketch produced by the Apache DataSketches library + */ + public static final String APACHE_DATASKETCHES_THETA_V1 = "apache-datasketches-theta-v1"; +} diff --git a/core/src/main/java/org/apache/iceberg/puffin/StandardPuffinProperties.java b/core/src/main/java/org/apache/iceberg/puffin/StandardPuffinProperties.java new file mode 100644 index 000000000000..8d50ba9e83d4 --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/puffin/StandardPuffinProperties.java @@ -0,0 +1,31 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.puffin; + +public final class StandardPuffinProperties { + private StandardPuffinProperties() { + } + + /** + * human-readable identification of the application writing the file, + * along with its version. Example "Trino version 381". + */ + public static final String CREATED_BY_PROPERTY = "created-by"; +} diff --git a/core/src/main/java/org/apache/iceberg/util/JsonUtil.java b/core/src/main/java/org/apache/iceberg/util/JsonUtil.java index 9e6e69eb42a0..ff4a185d8aa3 100644 --- a/core/src/main/java/org/apache/iceberg/util/JsonUtil.java +++ b/core/src/main/java/org/apache/iceberg/util/JsonUtil.java @@ -166,11 +166,21 @@ public static List getStringListOrNull(String property, JsonNode node) { .build(); } + public static List getIntegerList(String property, JsonNode node) { + return ImmutableList.builder() + .addAll(new JsonIntegerArrayIterator(property, node)) + .build(); + } + public static Set getIntegerSetOrNull(String property, JsonNode node) { if (!node.has(property) || node.get(property).isNull()) { return null; } + return getIntegerSet(property, node); + } + + public static Set getIntegerSet(String property, JsonNode node) { return ImmutableSet.builder() .addAll(new JsonIntegerArrayIterator(property, node)) .build(); diff --git a/core/src/test/java/org/apache/iceberg/puffin/PuffinFormatTestUtil.java b/core/src/test/java/org/apache/iceberg/puffin/PuffinFormatTestUtil.java new file mode 100644 index 000000000000..d5e0f094f431 --- /dev/null +++ b/core/src/test/java/org/apache/iceberg/puffin/PuffinFormatTestUtil.java @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.puffin; + +import org.apache.iceberg.relocated.com.google.common.io.Resources; + +public final class PuffinFormatTestUtil { + private PuffinFormatTestUtil() { + } + + // footer size for v1/empty-puffin-uncompressed.bin + public static final long EMPTY_PUFFIN_UNCOMPRESSED_FOOTER_SIZE = 44; + + // footer size for v1/sample-metric-data-compressed-zstd.bin + public static final long SAMPLE_METRIC_DATA_COMPRESSED_ZSTD_FOOTER_SIZE = 242; + + static byte[] readTestResource(String resourceName) throws Exception { + return Resources.toByteArray(Resources.getResource(PuffinFormatTestUtil.class, resourceName)); + } +} diff --git a/core/src/test/java/org/apache/iceberg/puffin/TestFileMetadataParser.java b/core/src/test/java/org/apache/iceberg/puffin/TestFileMetadataParser.java new file mode 100644 index 000000000000..7d2257545a39 --- /dev/null +++ b/core/src/test/java/org/apache/iceberg/puffin/TestFileMetadataParser.java @@ -0,0 +1,120 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.puffin; + +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; +import org.junit.Test; + +import static org.assertj.core.api.Assertions.assertThat; + +public class TestFileMetadataParser { + @Test + public void testMinimalFileMetadata() { + testJsonSerialization( + new FileMetadata(ImmutableList.of(), ImmutableMap.of()), + "{\n" + + " \"blobs\" : [ ],\n" + + " \"properties\" : { }\n" + + "}"); + } + + @Test + public void testFileProperties() { + testJsonSerialization( + new FileMetadata(ImmutableList.of(), ImmutableMap.of("a property", "a property value")), + "{\n" + + " \"blobs\" : [ ],\n" + + " \"properties\" : {\n" + + " \"a property\" : \"a property value\"\n" + + " }\n" + + "}"); + + testJsonSerialization( + new FileMetadata( + ImmutableList.of(), + ImmutableMap.of("a property", "a property value", "another one", "also with value")), + "{\n" + + " \"blobs\" : [ ],\n" + + " \"properties\" : {\n" + + " \"a property\" : \"a property value\",\n" + + " \"another one\" : \"also with value\"\n" + + " }\n" + + "}"); + } + + @Test + public void testBlobMetadata() { + testJsonSerialization( + new FileMetadata( + ImmutableList.of( + new BlobMetadata("type-a", ImmutableList.of(1), 4, 16, null, ImmutableMap.of()), + new BlobMetadata("type-bbb", ImmutableList.of(2, 3, 4), Integer.MAX_VALUE * 10000L, 79834, null, + ImmutableMap.of())), + ImmutableMap.of()), + "{\n" + + " \"blobs\" : [ {\n" + + " \"type\" : \"type-a\",\n" + + " \"fields\" : [ 1 ],\n" + + " \"offset\" : 4,\n" + + " \"length\" : 16\n" + + " }, {\n" + + " \"type\" : \"type-bbb\",\n" + + " \"fields\" : [ 2, 3, 4 ],\n" + + " \"offset\" : 21474836470000,\n" + + " \"length\" : 79834\n" + + " } ],\n" + + " \"properties\" : { }\n" + + "}"); + } + + @Test + public void testBlobProperties() { + testJsonSerialization( + new FileMetadata( + ImmutableList.of( + new BlobMetadata( + "type-a", ImmutableList.of(1), 4, 16, null, + ImmutableMap.of("some key", "some value"))), + ImmutableMap.of()), + "{\n" + + " \"blobs\" : [ {\n" + + " \"type\" : \"type-a\",\n" + + " \"fields\" : [ 1 ],\n" + + " \"offset\" : 4,\n" + + " \"length\" : 16,\n" + + " \"properties\" : {\n" + + " \"some key\" : \"some value\"\n" + + " }\n" + + " } ],\n" + + " \"properties\" : { }\n" + + "}"); + } + + private void testJsonSerialization(FileMetadata fileMetadata, String json) { + assertThat(FileMetadataParser.toJson(fileMetadata, true)) + .isEqualTo(json); + + // Test round-trip. Note that FileMetadata doesn't implement equals() + FileMetadata parsed = FileMetadataParser.fromJson(json); + assertThat(FileMetadataParser.toJson(parsed, true)) + .isEqualTo(json); + } +} diff --git a/core/src/test/java/org/apache/iceberg/puffin/TestPuffinFormat.java b/core/src/test/java/org/apache/iceberg/puffin/TestPuffinFormat.java new file mode 100644 index 000000000000..b54a81cc7661 --- /dev/null +++ b/core/src/test/java/org/apache/iceberg/puffin/TestPuffinFormat.java @@ -0,0 +1,86 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.puffin; + +import java.io.ByteArrayOutputStream; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.util.Arrays; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.junit.Test; + +import static org.apache.iceberg.puffin.PuffinFormat.readIntegerLittleEndian; +import static org.apache.iceberg.puffin.PuffinFormat.writeIntegerLittleEndian; +import static org.apache.iceberg.relocated.com.google.common.base.Preconditions.checkArgument; +import static org.assertj.core.api.Assertions.assertThat; + +public class TestPuffinFormat { + @Test + public void testWriteIntegerLittleEndian() throws Exception { + testWriteIntegerLittleEndian(0, bytes(0, 0, 0, 0)); + testWriteIntegerLittleEndian(42, bytes(42, 0, 0, 0)); + testWriteIntegerLittleEndian(Integer.MAX_VALUE - 5, bytes(0xFA, 0xFF, 0xFF, 0x7F)); + testWriteIntegerLittleEndian(-7, bytes(0xF9, 0xFF, 0xFF, 0xFF)); + } + + private void testWriteIntegerLittleEndian(int value, byte[] expected) throws Exception { + // Sanity check: validate the expectation + ByteBuffer buffer = ByteBuffer.allocate(4).order(ByteOrder.LITTLE_ENDIAN); + buffer.putInt(value); + buffer.flip(); + byte[] written = new byte[4]; + buffer.get(written); + Preconditions.checkState(Arrays.equals(written, expected), "Invalid expected value"); + + ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); + writeIntegerLittleEndian(outputStream, value); + assertThat(outputStream.toByteArray()).isEqualTo(expected); + } + + @Test + public void testReadIntegerLittleEndian() { + testReadIntegerLittleEndian(bytes(0, 0, 0, 0), 0, 0); + testReadIntegerLittleEndian(bytes(42, 0, 0, 0), 0, 42); + testReadIntegerLittleEndian(bytes(13, 42, 0, 0, 0, 14), 1, 42); + testReadIntegerLittleEndian(bytes(13, 0xFa, 0xFF, 0xFF, 0x7F, 14), 1, Integer.MAX_VALUE - 5); + testReadIntegerLittleEndian(bytes(13, 0xF9, 0xFF, 0xFF, 0xFF, 14), 1, -7); + } + + private void testReadIntegerLittleEndian(byte[] input, int offset, int expected) { + // Sanity check: validate the expectation + Preconditions.checkArgument( + expected == ByteBuffer.wrap(input.clone(), offset, input.length - offset) + .order(ByteOrder.LITTLE_ENDIAN) + .getInt(), + "Invalid expected value"); + // actual test + assertThat(readIntegerLittleEndian(input, offset)).isEqualTo(expected); + } + + private byte[] bytes(int... unsignedBytes) { + byte[] bytes = new byte[unsignedBytes.length]; + for (int i = 0; i < unsignedBytes.length; i++) { + int value = unsignedBytes[i]; + checkArgument(0 <= value && value <= 0xFF, "Invalid value: %s", value); + bytes[i] = (byte) value; + } + return bytes; + } +} diff --git a/core/src/test/java/org/apache/iceberg/puffin/TestPuffinReader.java b/core/src/test/java/org/apache/iceberg/puffin/TestPuffinReader.java new file mode 100644 index 000000000000..867c8bcd1dcb --- /dev/null +++ b/core/src/test/java/org/apache/iceberg/puffin/TestPuffinReader.java @@ -0,0 +1,153 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.puffin; + +import java.util.Map; +import javax.annotation.Nullable; +import org.apache.iceberg.io.InMemoryInputFile; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; +import org.apache.iceberg.relocated.com.google.common.collect.Streams; +import org.apache.iceberg.util.ByteBuffers; +import org.apache.iceberg.util.Pair; +import org.junit.Test; + +import static java.nio.charset.StandardCharsets.UTF_8; +import static org.apache.iceberg.puffin.PuffinCompressionCodec.NONE; +import static org.apache.iceberg.puffin.PuffinCompressionCodec.ZSTD; +import static org.apache.iceberg.puffin.PuffinFormatTestUtil.EMPTY_PUFFIN_UNCOMPRESSED_FOOTER_SIZE; +import static org.apache.iceberg.puffin.PuffinFormatTestUtil.SAMPLE_METRIC_DATA_COMPRESSED_ZSTD_FOOTER_SIZE; +import static org.apache.iceberg.puffin.PuffinFormatTestUtil.readTestResource; +import static org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap.toImmutableMap; +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +public class TestPuffinReader { + @Test + public void testEmptyFooterUncompressed() throws Exception { + testEmpty("v1/empty-puffin-uncompressed.bin", EMPTY_PUFFIN_UNCOMPRESSED_FOOTER_SIZE); + } + + @Test + public void testEmptyWithUnknownFooterSize() throws Exception { + testEmpty("v1/empty-puffin-uncompressed.bin", null); + } + + private void testEmpty(String resourceName, @Nullable Long footerSize) throws Exception { + InMemoryInputFile inputFile = new InMemoryInputFile(readTestResource(resourceName)); + Puffin.ReadBuilder readBuilder = Puffin.read(inputFile) + .withFileSize(inputFile.getLength()); + if (footerSize != null) { + readBuilder = readBuilder.withFooterSize(footerSize); + } + try (PuffinReader reader = readBuilder.build()) { + FileMetadata fileMetadata = reader.fileMetadata(); + assertThat(fileMetadata.properties()).as("file properties") + .isEqualTo(ImmutableMap.of()); + assertThat(fileMetadata.blobs()).as("blob list") + .isEmpty(); + } + } + + @Test + public void testWrongFooterSize() throws Exception { + String resourceName = "v1/sample-metric-data-compressed-zstd.bin"; + long footerSize = SAMPLE_METRIC_DATA_COMPRESSED_ZSTD_FOOTER_SIZE; + testWrongFooterSize(resourceName, footerSize - 1, "Invalid file: expected magic at offset"); + testWrongFooterSize(resourceName, footerSize + 1, "Invalid file: expected magic at offset"); + testWrongFooterSize(resourceName, footerSize - 10, "Invalid file: expected magic at offset"); + testWrongFooterSize(resourceName, footerSize + 10, "Invalid file: expected magic at offset"); + testWrongFooterSize(resourceName, footerSize - 10000, "Invalid footer size"); + testWrongFooterSize(resourceName, footerSize + 10000, "Invalid footer size"); + } + + private void testWrongFooterSize(String resourceName, long wrongFooterSize, String expectedMessagePrefix) + throws Exception { + InMemoryInputFile inputFile = new InMemoryInputFile(readTestResource(resourceName)); + Puffin.ReadBuilder builder = Puffin.read(inputFile) + .withFileSize(inputFile.getLength()) + .withFooterSize(wrongFooterSize); + assertThatThrownBy( + () -> { + try (PuffinReader reader = builder.build()) { + reader.fileMetadata(); + } + }) + .hasMessageStartingWith(expectedMessagePrefix); + } + + @Test + public void testReadMetricDataUncompressed() throws Exception { + testReadMetricData("v1/sample-metric-data-uncompressed.bin", NONE); + } + + @Test + public void testReadMetricDataCompressedZstd() throws Exception { + testReadMetricData("v1/sample-metric-data-compressed-zstd.bin", ZSTD); + } + + private void testReadMetricData(String resourceName, PuffinCompressionCodec expectedCodec) throws Exception { + InMemoryInputFile inputFile = new InMemoryInputFile(readTestResource(resourceName)); + try (PuffinReader reader = Puffin.read(inputFile).build()) { + FileMetadata fileMetadata = reader.fileMetadata(); + assertThat(fileMetadata.properties()).as("file properties") + .isEqualTo(ImmutableMap.of("created-by", "Test 1234")); + assertThat(fileMetadata.blobs()).as("blob list") + .hasSize(2); + + BlobMetadata firstBlob = fileMetadata.blobs().get(0); + assertThat(firstBlob.type()).as("type").isEqualTo("some-blob"); + assertThat(firstBlob.inputFields()).as("columns").isEqualTo(ImmutableList.of(1)); + assertThat(firstBlob.offset()).as("offset").isEqualTo(4); + assertThat(firstBlob.compressionCodec()).as("compression codec") + .isEqualTo(expectedCodec.codecName()); + + BlobMetadata secondBlob = fileMetadata.blobs().get(1); + assertThat(secondBlob.type()).as("type").isEqualTo("some-other-blob"); + assertThat(secondBlob.inputFields()).as("columns").isEqualTo(ImmutableList.of(2)); + assertThat(secondBlob.offset()).as("offset") + .isEqualTo(firstBlob.offset() + firstBlob.length()); + assertThat(secondBlob.compressionCodec()).as("compression codec") + .isEqualTo(expectedCodec.codecName()); + + Map read = Streams.stream(reader.readAll(ImmutableList.of(firstBlob, secondBlob))) + .collect(toImmutableMap(Pair::first, pair -> ByteBuffers.toByteArray(pair.second()))); + + assertThat(read).as("read") + .containsOnlyKeys(firstBlob, secondBlob) + .containsEntry(firstBlob, "abcdefghi".getBytes(UTF_8)) + .containsEntry( + secondBlob, + "some blob \u0000 binary data 🤯 that is not very very very very very very long, is it?".getBytes(UTF_8)); + } + } + + @Test + public void testValidateFooterSizeValue() throws Exception { + // Ensure the definition of SAMPLE_METRIC_DATA_COMPRESSED_ZSTD_FOOTER_SIZE remains accurate + InMemoryInputFile inputFile = new InMemoryInputFile(readTestResource("v1/sample-metric-data-compressed-zstd.bin")); + try (PuffinReader reader = Puffin.read(inputFile) + .withFooterSize(SAMPLE_METRIC_DATA_COMPRESSED_ZSTD_FOOTER_SIZE) + .build()) { + assertThat(reader.fileMetadata().properties()) + .isEqualTo(ImmutableMap.of("created-by", "Test 1234")); + } + } +} diff --git a/core/src/test/java/org/apache/iceberg/puffin/TestPuffinWriter.java b/core/src/test/java/org/apache/iceberg/puffin/TestPuffinWriter.java new file mode 100644 index 000000000000..a9b04af1e969 --- /dev/null +++ b/core/src/test/java/org/apache/iceberg/puffin/TestPuffinWriter.java @@ -0,0 +1,124 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.puffin; + +import java.nio.ByteBuffer; +import org.apache.iceberg.io.InMemoryOutputFile; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; +import org.junit.Test; + +import static java.nio.charset.StandardCharsets.UTF_8; +import static org.apache.iceberg.puffin.PuffinCompressionCodec.NONE; +import static org.apache.iceberg.puffin.PuffinCompressionCodec.ZSTD; +import static org.apache.iceberg.puffin.PuffinFormatTestUtil.EMPTY_PUFFIN_UNCOMPRESSED_FOOTER_SIZE; +import static org.apache.iceberg.puffin.PuffinFormatTestUtil.readTestResource; +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +public class TestPuffinWriter { + @Test + public void testEmptyFooterCompressed() { + InMemoryOutputFile outputFile = new InMemoryOutputFile(); + + PuffinWriter writer = Puffin.write(outputFile) + .compressFooter() + .build(); + assertThatThrownBy(writer::footerSize) + .isInstanceOf(IllegalStateException.class) + .hasMessage("Footer not written yet"); + assertThatThrownBy(writer::finish) + .isInstanceOf(UnsupportedOperationException.class) + .hasMessage("Unsupported codec: LZ4"); + assertThatThrownBy(writer::close) + .isInstanceOf(UnsupportedOperationException.class) + .hasMessage("Unsupported codec: LZ4"); + } + + @Test + public void testEmptyFooterUncompressed() throws Exception { + InMemoryOutputFile outputFile = new InMemoryOutputFile(); + PuffinWriter writer = Puffin.write(outputFile) + .build(); + assertThatThrownBy(writer::footerSize) + .isInstanceOf(IllegalStateException.class) + .hasMessage("Footer not written yet"); + writer.finish(); + assertThat(writer.footerSize()).isEqualTo(EMPTY_PUFFIN_UNCOMPRESSED_FOOTER_SIZE); + writer.close(); + assertThat(outputFile.toByteArray()) + .isEqualTo(readTestResource("v1/empty-puffin-uncompressed.bin")); + // getFooterSize is still accessible after close() + assertThat(writer.footerSize()).isEqualTo(EMPTY_PUFFIN_UNCOMPRESSED_FOOTER_SIZE); + assertThat(writer.writtenBlobsMetadata()).isEmpty(); + } + + @Test + public void testImplicitFinish() throws Exception { + InMemoryOutputFile outputFile = new InMemoryOutputFile(); + PuffinWriter writer = Puffin.write(outputFile) + .build(); + writer.close(); + assertThat(outputFile.toByteArray()) + .isEqualTo(readTestResource("v1/empty-puffin-uncompressed.bin")); + assertThat(writer.footerSize()).isEqualTo(EMPTY_PUFFIN_UNCOMPRESSED_FOOTER_SIZE); + } + + @Test + public void testWriteMetricDataUncompressed() throws Exception { + testWriteMetric(NONE, "v1/sample-metric-data-uncompressed.bin"); + } + + @Test + public void testWriteMetricDataCompressedZstd() throws Exception { + testWriteMetric(ZSTD, "v1/sample-metric-data-compressed-zstd.bin"); + } + + private void testWriteMetric(PuffinCompressionCodec compression, String expectedResource) throws Exception { + InMemoryOutputFile outputFile = new InMemoryOutputFile(); + try (PuffinWriter writer = Puffin.write(outputFile) + .createdBy("Test 1234") + .build()) { + writer.add(new Blob("some-blob", ImmutableList.of(1), ByteBuffer.wrap("abcdefghi".getBytes(UTF_8)), + compression, ImmutableMap.of())); + + // "xxx"s are stripped away by data offsets + byte[] bytes = + "xxx some blob \u0000 binary data 🤯 that is not very very very very very very long, is it? xxx".getBytes( + UTF_8); + writer.add(new Blob("some-other-blob", ImmutableList.of(2), ByteBuffer.wrap(bytes, 4, bytes.length - 8), + compression, ImmutableMap.of())); + + assertThat(writer.writtenBlobsMetadata()).hasSize(2); + BlobMetadata firstMetadata = writer.writtenBlobsMetadata().get(0); + assertThat(firstMetadata.type()).isEqualTo("some-blob"); + assertThat(firstMetadata.inputFields()).isEqualTo(ImmutableList.of(1)); + assertThat(firstMetadata.properties()).isEqualTo(ImmutableMap.of()); + BlobMetadata secondMetadata = writer.writtenBlobsMetadata().get(1); + assertThat(secondMetadata.type()).isEqualTo("some-other-blob"); + assertThat(secondMetadata.inputFields()).isEqualTo(ImmutableList.of(2)); + assertThat(secondMetadata.properties()).isEqualTo(ImmutableMap.of()); + } + + byte[] expected = readTestResource(expectedResource); + assertThat(outputFile.toByteArray()) + .isEqualTo(expected); + } +} diff --git a/core/src/test/resources/org/apache/iceberg/puffin/v1/empty-puffin-uncompressed.bin b/core/src/test/resources/org/apache/iceberg/puffin/v1/empty-puffin-uncompressed.bin new file mode 100644 index 0000000000000000000000000000000000000000..d5f2ae62e5f0f8d1dab6ad22c5c13c6c4653c1ab GIT binary patch literal 48 tcmWG=b2JP9;%cR&ocyF>C9CLI9i@Vz{DRb?lFU>fzq+HuiM4P*cS literal 0 HcmV?d00001 diff --git a/core/src/test/resources/org/apache/iceberg/puffin/v1/sample-metric-data-compressed-zstd.bin b/core/src/test/resources/org/apache/iceberg/puffin/v1/sample-metric-data-compressed-zstd.bin new file mode 100644 index 0000000000000000000000000000000000000000..c63361501910aa4762915f2f78380a4ce63956f6 GIT binary patch literal 345 zcmaLSu}Z{15C-5?v9R_9meImugJeBCNFz4FbqCtX9c;3bWFeOgyMquzN?R*?8+$7Y zU%^+<$|vvz1b4Mi5uEZL=9`)M$#J^(u>ZchHDgTWg~qa~&Gz!~`7=G;TxT~M-mZ}6 zjmX4mtuW`Ia|$j%3# zzC*|$!bwa)Kxweiv70Ug*s}CUkWBEv^hb#}j>FM-8It+ErcIe1pxNm^Mp(4~t-iP&P=!vKDU;^X#%DyUo`vCxXa|8eY literal 0 HcmV?d00001 diff --git a/core/src/test/resources/org/apache/iceberg/puffin/v1/sample-metric-data-uncompressed.bin b/core/src/test/resources/org/apache/iceberg/puffin/v1/sample-metric-data-uncompressed.bin new file mode 100644 index 0000000000000000000000000000000000000000..15d806805df9012767ea584e54d09e918098ddfb GIT binary patch literal 283 zcmZ{eJr2S!4263X%X2#PXMiqd z#^C9&ILye?NLXM~e3UK46R0o3wc^Z)<= literal 0 HcmV?d00001 diff --git a/dev/.rat-excludes b/dev/.rat-excludes index 94874c2e0272..bb47575524b5 100644 --- a/dev/.rat-excludes +++ b/dev/.rat-excludes @@ -22,6 +22,7 @@ gradle/* .*\.svg .*\.lock .*\.json +.*\.bin package-list sitemap.xml derby.log diff --git a/versions.props b/versions.props index 940d899215ba..8f3458ab5f2d 100644 --- a/versions.props +++ b/versions.props @@ -17,6 +17,7 @@ com.github.ben-manes.caffeine:caffeine = 2.9.3 org.apache.arrow:arrow-vector = 7.0.0 org.apache.arrow:arrow-memory-netty = 7.0.0 org.roaringbitmap:RoaringBitmap = 0.9.22 +io.airlift:aircompressor = 0.21 io.netty:netty-buffer = 4.1.68.Final com.github.stephenc.findbugs:findbugs-annotations = 1.3.9-1 com.aliyun.oss:aliyun-sdk-oss = 3.10.2