Skip to content

Commit

Permalink
Use memory-backed streams in tests
Browse files Browse the repository at this point in the history
Sometimes an actual file is needed, but sometimes it's not. In the case
where it isn't, we can use an in-memory `InputFile`, `OutputFile`
implementation.
  • Loading branch information
findepi committed Apr 12, 2022
1 parent 66adec7 commit d9b424f
Show file tree
Hide file tree
Showing 7 changed files with 257 additions and 23 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import org.apache.iceberg.avro.AvroSchemaUtil;
import org.apache.iceberg.io.CloseableIterable;
import org.apache.iceberg.io.FileAppender;
import org.apache.iceberg.io.InMemoryOutputFile;
import org.apache.iceberg.io.InputFile;
import org.apache.iceberg.io.OutputFile;
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
Expand Down Expand Up @@ -251,7 +252,7 @@ public void testManifestsPartitionSummary() throws IOException {
}

private InputFile writeManifestList(ManifestFile manifest, int formatVersion) throws IOException {
OutputFile manifestList = Files.localOutput(temp.newFile());
OutputFile manifestList = new InMemoryOutputFile();
try (FileAppender<ManifestFile> writer = ManifestLists.write(
formatVersion, manifestList, SNAPSHOT_ID, SNAPSHOT_ID - 1, formatVersion > 1 ? SEQ_NUM : 0)) {
writer.add(manifest);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import org.apache.iceberg.io.CloseableIterable;
import org.apache.iceberg.io.FileAppender;
import org.apache.iceberg.io.FileIO;
import org.apache.iceberg.io.InMemoryOutputFile;
import org.apache.iceberg.io.InputFile;
import org.apache.iceberg.io.OutputFile;
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
Expand Down Expand Up @@ -235,7 +236,7 @@ void checkRewrittenManifest(ManifestFile manifest, long expectedSequenceNumber,
}

private InputFile writeManifestList(ManifestFile manifest, int formatVersion) throws IOException {
OutputFile manifestList = Files.localOutput(temp.newFile());
InMemoryOutputFile manifestList = new InMemoryOutputFile();
try (FileAppender<ManifestFile> writer = ManifestLists.write(
formatVersion, manifestList, SNAPSHOT_ID, SNAPSHOT_ID - 1, formatVersion > 1 ? SEQUENCE_NUMBER : 0)) {
writer.add(manifest);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@
import org.apache.iceberg.avro.RandomAvroData;
import org.apache.iceberg.expressions.Expressions;
import org.apache.iceberg.io.FileAppender;
import org.apache.iceberg.io.InMemoryOutputFile;
import org.apache.iceberg.io.OutputFile;
import org.apache.iceberg.relocated.com.google.common.collect.Lists;
import org.apache.iceberg.types.Types;
import org.junit.After;
Expand Down Expand Up @@ -65,11 +67,11 @@ public TestScansAndSchemaEvolution(int formatVersion) {
@Rule
public TemporaryFolder temp = new TemporaryFolder();

private DataFile createDataFile(File dataPath, String partValue) throws IOException {
private DataFile createDataFile(String partValue) throws IOException {
List<GenericData.Record> expected = RandomAvroData.generate(SCHEMA, 100, 0L);

File dataFile = new File(dataPath, FileFormat.AVRO.addExtension(UUID.randomUUID().toString()));
try (FileAppender<GenericData.Record> writer = Avro.write(Files.localOutput(dataFile))
OutputFile dataFile = new InMemoryOutputFile(FileFormat.AVRO.addExtension(UUID.randomUUID().toString()));
try (FileAppender<GenericData.Record> writer = Avro.write(dataFile)
.schema(SCHEMA)
.named("test")
.build()) {
Expand All @@ -82,7 +84,7 @@ private DataFile createDataFile(File dataPath, String partValue) throws IOExcept
PartitionData partition = new PartitionData(SPEC.partitionType());
partition.set(0, partValue);
return DataFiles.builder(SPEC)
.withInputFile(Files.localInput(dataFile))
.withInputFile(dataFile.toInputFile())
.withPartition(partition)
.withRecordCount(100)
.build();
Expand All @@ -96,13 +98,12 @@ public void cleanupTables() {
@Test
public void testPartitionSourceRename() throws IOException {
File location = temp.newFolder();
File dataLocation = new File(location, "data");
Assert.assertTrue(location.delete()); // should be created by table create

Table table = TestTables.create(location, "test", SCHEMA, SPEC, formatVersion);

DataFile fileOne = createDataFile(dataLocation, "one");
DataFile fileTwo = createDataFile(dataLocation, "two");
DataFile fileOne = createDataFile("one");
DataFile fileTwo = createDataFile("two");

table.newAppend()
.appendFile(fileOne)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
import org.apache.iceberg.data.avro.DataWriter;
import org.apache.iceberg.deletes.EqualityDeleteWriter;
import org.apache.iceberg.deletes.PositionDeleteWriter;
import org.apache.iceberg.io.InMemoryOutputFile;
import org.apache.iceberg.io.OutputFile;
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
Expand Down Expand Up @@ -73,9 +74,7 @@ public void createDeleteRecords() {

@Test
public void testEqualityDeleteWriter() throws IOException {
File deleteFile = temp.newFile();

OutputFile out = Files.localOutput(deleteFile);
OutputFile out = new InMemoryOutputFile();
EqualityDeleteWriter<Record> deleteWriter = Avro.writeDeletes(out)
.createWriterFunc(DataWriter::create)
.overwrite()
Expand Down Expand Up @@ -108,8 +107,6 @@ public void testEqualityDeleteWriter() throws IOException {

@Test
public void testPositionDeleteWriter() throws IOException {
File deleteFile = temp.newFile();

Schema deleteSchema = new Schema(
MetadataColumns.DELETE_FILE_PATH,
MetadataColumns.DELETE_FILE_POS,
Expand All @@ -119,7 +116,7 @@ public void testPositionDeleteWriter() throws IOException {
GenericRecord posDelete = GenericRecord.create(deleteSchema);
List<Record> expectedDeleteRecords = Lists.newArrayList();

OutputFile out = Files.localOutput(deleteFile);
OutputFile out = new InMemoryOutputFile();
PositionDeleteWriter<Record> deleteWriter = Avro.writeDeletes(out)
.createWriterFunc(DataWriter::create)
.overwrite()
Expand Down
12 changes: 4 additions & 8 deletions core/src/test/java/org/apache/iceberg/avro/TestGenericAvro.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,25 +19,21 @@

package org.apache.iceberg.avro;

import java.io.File;
import java.io.IOException;
import java.util.List;
import org.apache.avro.generic.GenericData.Record;
import org.apache.iceberg.Files;
import org.apache.iceberg.Schema;
import org.apache.iceberg.io.FileAppender;
import org.apache.iceberg.io.InMemoryOutputFile;
import org.apache.iceberg.relocated.com.google.common.collect.Lists;
import org.junit.Assert;

public class TestGenericAvro extends AvroDataTest {
@Override
protected void writeAndValidate(Schema schema) throws IOException {
List<Record> expected = RandomAvroData.generate(schema, 100, 0L);

File testFile = temp.newFile();
Assert.assertTrue("Delete should succeed", testFile.delete());

try (FileAppender<Record> writer = Avro.write(Files.localOutput(testFile))
InMemoryOutputFile outputFile = new InMemoryOutputFile();
try (FileAppender<Record> writer = Avro.write(outputFile)
.schema(schema)
.named("test")
.build()) {
Expand All @@ -47,7 +43,7 @@ protected void writeAndValidate(Schema schema) throws IOException {
}

List<Record> rows;
try (AvroIterable<Record> reader = Avro.read(Files.localInput(testFile))
try (AvroIterable<Record> reader = Avro.read(outputFile.toInputFile())
.project(schema)
.build()) {
rows = Lists.newArrayList(reader);
Expand Down
130 changes: 130 additions & 0 deletions core/src/test/java/org/apache/iceberg/io/InMemoryInputFile.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.iceberg.io;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.util.Objects;
import java.util.UUID;
import org.apache.iceberg.relocated.com.google.common.base.Preconditions;

public class InMemoryInputFile implements InputFile {

private final String location;
private final byte[] contents;

public InMemoryInputFile(byte[] contents) {
this("memory:" + UUID.randomUUID(), contents);
}

public InMemoryInputFile(String location, byte[] contents) {
this.location = Objects.requireNonNull(location, "location is null");
this.contents = Objects.requireNonNull(contents, "contents is null").clone();
}

@Override
public long getLength() {
return contents.length;
}

@Override
public SeekableInputStream newStream() {
return new InMemorySeekableInputStream(contents);
}

@Override
public String location() {
return location;
}

@Override
public boolean exists() {
return true;
}

private static class InMemorySeekableInputStream extends SeekableInputStream {

private final int length;
private final ByteArrayInputStream delegate;

InMemorySeekableInputStream(byte[] contents) {
this.length = contents.length;
this.delegate = new ByteArrayInputStream(contents);
}

@Override
public long getPos() throws IOException {
return length - delegate.available();
}

@Override
public void seek(long newPos) throws IOException {
delegate.reset();
Preconditions.checkState(delegate.skip(newPos) == newPos,
"Invalid position %s within stream of length %s", newPos, length);
}

@Override
public int read() {
return delegate.read();
}

@Override
public int read(byte[] b) throws IOException {
return delegate.read(b);
}

@Override
public int read(byte[] b, int off, int len) {
return delegate.read(b, off, len);
}

@Override
public long skip(long n) {
return delegate.skip(n);
}

@Override
public int available() {
return delegate.available();
}

@Override
public boolean markSupported() {
throw new UnsupportedOperationException();
}

@Override
public void mark(int readAheadLimit) {
// We use mark to implement seek
throw new UnsupportedOperationException();
}

@Override
public void reset() {
delegate.reset();
}

@Override
public void close() throws IOException {
delegate.close();
}
}
}
108 changes: 108 additions & 0 deletions core/src/test/java/org/apache/iceberg/io/InMemoryOutputFile.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.iceberg.io;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.Objects;
import java.util.UUID;

public class InMemoryOutputFile implements OutputFile {

private final String location;

private boolean exists;
private ByteArrayOutputStream contents;

public InMemoryOutputFile() {
this("memory:" + UUID.randomUUID());
}

public InMemoryOutputFile(String location) {
this.location = Objects.requireNonNull(location, "location is null");
}

@Override
public PositionOutputStream create() {
if (exists) {
throw new RuntimeException("Already exists");
}
return createOrOverwrite();
}

@Override
public PositionOutputStream createOrOverwrite() {
exists = true;
contents = new ByteArrayOutputStream();
return new InMemoryPositionOutputStream(contents);
}

@Override
public String location() {
return location;
}

@Override
public InputFile toInputFile() {
return new InMemoryInputFile(location(), getContents());
}

public byte[] getContents() {
return contents.toByteArray();
}

private static class InMemoryPositionOutputStream extends PositionOutputStream {
private final ByteArrayOutputStream delegate;

InMemoryPositionOutputStream(ByteArrayOutputStream delegate) {
this.delegate = Objects.requireNonNull(delegate, "delegate is null");
}

@Override
public long getPos() {
return delegate.size();
}

@Override
public void write(int b) {
delegate.write(b);
}

@Override
public void write(byte[] b) throws IOException {
delegate.write(b);
}

@Override
public void write(byte[] b, int off, int len) {
delegate.write(b, off, len);
}

@Override
public void flush() throws IOException {
delegate.flush();
}

@Override
public void close() throws IOException {
delegate.close();
}
}
}

0 comments on commit d9b424f

Please sign in to comment.