Skip to content

Commit

Permalink
Fix JDK bug for concatenated GZIP streams
Browse files Browse the repository at this point in the history
  • Loading branch information
electrum committed Jul 12, 2023
1 parent 3210eb1 commit 46735a5
Show file tree
Hide file tree
Showing 2 changed files with 74 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,12 @@

import io.airlift.compress.hadoop.HadoopInputStream;

import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.zip.GZIPInputStream;

import static java.lang.Math.max;
import static java.util.Objects.requireNonNull;

class JdkGzipHadoopInputStream
Expand All @@ -30,7 +32,7 @@ class JdkGzipHadoopInputStream
public JdkGzipHadoopInputStream(InputStream input, int bufferSize)
throws IOException
{
this.input = new GZIPInputStream(requireNonNull(input, "input is null"), bufferSize);
this.input = new GZIPInputStream(new GzipBufferedInputStream(input, bufferSize), bufferSize);
}

@Override
Expand Down Expand Up @@ -63,4 +65,22 @@ public void close()
{
input.close();
}

// workaround for https://bugs.openjdk.org/browse/JDK-8081450
private static class GzipBufferedInputStream
extends BufferedInputStream
{
public GzipBufferedInputStream(InputStream input, int bufferSize)
{
super(requireNonNull(input, "input is null"), bufferSize);
}

@Override
public int available()
throws IOException
{
// GZIPInputStream thinks the stream is complete if this returns zero
return max(1, super.available());
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.airlift.compress.gzip;

import com.google.common.io.ByteStreams;
import org.testng.annotations.Test;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.SequenceInputStream;
import java.util.zip.GZIPOutputStream;

import static org.assertj.core.api.Assertions.assertThat;

public class TestJdkGzipHadoopInputStream
{
@Test
public void testGzipInputStreamBug()
throws IOException
{
byte[] part1 = zip("hello ".getBytes());
byte[] part2 = zip("world".getBytes());

InputStream compressed = new SequenceInputStream(new ByteArrayInputStream(part1), new ByteArrayInputStream(part2));
byte[] data = ByteStreams.toByteArray(new JdkGzipHadoopStreams().createInputStream(compressed));

assertThat(data).isEqualTo("hello world".getBytes());
}

private static byte[] zip(byte[] data)
throws IOException
{
ByteArrayOutputStream out = new ByteArrayOutputStream();
try (OutputStream gzipOut = new GZIPOutputStream(out)) {
gzipOut.write(data);
}
return out.toByteArray();
}
}

0 comments on commit 46735a5

Please sign in to comment.