Skip to content

Commit

Permalink
Add LZ4 frame compression
Browse files Browse the repository at this point in the history
  • Loading branch information
findepi committed Apr 12, 2022
1 parent 02ade1b commit 9103d8e
Show file tree
Hide file tree
Showing 9 changed files with 766 additions and 2 deletions.
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@
<groupId>org.lz4</groupId>
<artifactId>lz4-java</artifactId>
<version>1.8.0</version>
<scope>test</scope>
<!-- TODO move to test scope once we have XxHash32 -->
</dependency>

<dependency>
Expand Down
137 changes: 137 additions & 0 deletions src/main/java/io/airlift/compress/lz4/Lz4FrameCompressor.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.airlift.compress.lz4;

import io.airlift.compress.Compressor;

import java.nio.Buffer;
import java.nio.ByteBuffer;

import static io.airlift.compress.lz4.Lz4RawCompressor.MAX_TABLE_SIZE;
import static io.airlift.compress.lz4.UnsafeUtil.getAddress;
import static java.lang.String.format;
import static java.util.Objects.requireNonNull;
import static sun.misc.Unsafe.ARRAY_BYTE_BASE_OFFSET;

/**
* This class is not thread-safe
*/
public class Lz4FrameCompressor
implements Compressor
{
private final int[] table = new int[MAX_TABLE_SIZE];

@Override
public int maxCompressedLength(int uncompressedSize)
{
return Lz4FrameRawCompressor.maxCompressedLength(uncompressedSize);
}

@Override
public int compress(byte[] input, int inputOffset, int inputLength, byte[] output, int outputOffset, int maxOutputLength)
{
verifyRange(input, inputOffset, inputLength);
verifyRange(output, outputOffset, maxOutputLength);

long inputAddress = ARRAY_BYTE_BASE_OFFSET + inputOffset;
long outputAddress = ARRAY_BYTE_BASE_OFFSET + outputOffset;

return Lz4FrameRawCompressor.compress(
input,
inputAddress,
inputLength,
output,
outputAddress,
maxOutputLength,
table);
}

@Override
public void compress(ByteBuffer inputBuffer, ByteBuffer outputBuffer)
{
if (true) {
// TODO support byte buffers, see disabled tests
throw new UnsupportedOperationException("This is disabled, does not work with direct buffers yet");
}

// Java 9+ added an overload of various methods in ByteBuffer. When compiling with Java 11+ and targeting Java 8 bytecode
// the resulting signatures are invalid for JDK 8, so accesses below result in NoSuchMethodError. Accessing the
// methods through the interface class works around the problem
// Sidenote: we can't target "javac --release 8" because Unsafe is not available in the signature data for that profile
Buffer input = inputBuffer;
Buffer output = outputBuffer;

Object inputBase;
long inputAddress;
int inputLimit;
if (input.isDirect()) {
inputBase = null;
long address = getAddress(input);
inputAddress = address + input.position();
inputLimit = input.limit();
}
else if (input.hasArray()) {
inputBase = input.array();
inputAddress = ARRAY_BYTE_BASE_OFFSET + input.arrayOffset() + input.position();
inputLimit = input.limit();
}
else {
throw new IllegalArgumentException("Unsupported input ByteBuffer implementation " + input.getClass().getName());
}

Object outputBase;
long outputAddress;
int outputLimit;
if (output.isDirect()) {
outputBase = null;
long address = getAddress(output);
outputAddress = address + output.position();
outputLimit = output.limit();
}
else if (output.hasArray()) {
outputBase = output.array();
outputAddress = ARRAY_BYTE_BASE_OFFSET + output.arrayOffset() + output.position();
outputLimit = output.limit();
}
else {
throw new IllegalArgumentException("Unsupported output ByteBuffer implementation " + output.getClass().getName());
}

// HACK: Assure JVM does not collect Slice wrappers while compressing, since the
// collection may trigger freeing of the underlying memory resulting in a segfault
// There is no other known way to signal to the JVM that an object should not be
// collected in a block, and technically, the JVM is allowed to eliminate these locks.
synchronized (input) {
synchronized (output) {
int written = Lz4FrameRawCompressor.compress(
inputBase,
inputAddress,
inputLimit,
outputBase,
outputAddress,
outputLimit,
table);
output.position(output.position() + written);
}
}
}

private static void verifyRange(byte[] data, int offset, int length)
{
requireNonNull(data, "data is null");
if (offset < 0 || length < 0 || offset + length > data.length) {
throw new IllegalArgumentException(format("Invalid offset or length (%s, %s) in array of length %s", offset, length, data.length));
}
}
}
123 changes: 123 additions & 0 deletions src/main/java/io/airlift/compress/lz4/Lz4FrameDecompressor.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.airlift.compress.lz4;

import io.airlift.compress.Decompressor;
import io.airlift.compress.MalformedInputException;

import java.nio.Buffer;
import java.nio.ByteBuffer;

import static io.airlift.compress.lz4.UnsafeUtil.getAddress;
import static java.lang.String.format;
import static java.util.Objects.requireNonNull;
import static sun.misc.Unsafe.ARRAY_BYTE_BASE_OFFSET;

public class Lz4FrameDecompressor
implements Decompressor
{
@Override
public int decompress(byte[] input, int inputOffset, int inputLength, byte[] output, int outputOffset, int maxOutputLength)
throws MalformedInputException
{
verifyRange(input, inputOffset, inputLength);
verifyRange(output, outputOffset, maxOutputLength);

return Lz4FrameRawDecompressor.decompress(
input,
ARRAY_BYTE_BASE_OFFSET + inputOffset,
inputLength,
output,
ARRAY_BYTE_BASE_OFFSET + outputOffset,
maxOutputLength);
}

@Override
public void decompress(ByteBuffer inputBuffer, ByteBuffer outputBuffer)
throws MalformedInputException
{
if (true) {
// TODO support byte buffers, see disabled tests
throw new UnsupportedOperationException("This is disabled, does not work with direct buffers yet");
}

// Java 9+ added an overload of various methods in ByteBuffer. When compiling with Java 11+ and targeting Java 8 bytecode
// the resulting signatures are invalid for JDK 8, so accesses below result in NoSuchMethodError. Accessing the
// methods through the interface class works around the problem
// Sidenote: we can't target "javac --release 8" because Unsafe is not available in the signature data for that profile
Buffer input = inputBuffer;
Buffer output = outputBuffer;

Object inputBase;
long inputAddress;
int inputLimit;
if (input.isDirect()) {
inputBase = null;
long address = getAddress(input);
inputAddress = address + input.position();
inputLimit = input.limit();
}
else if (input.hasArray()) {
inputBase = input.array();
inputAddress = ARRAY_BYTE_BASE_OFFSET + input.arrayOffset() + input.position();
inputLimit = input.limit();
}
else {
throw new IllegalArgumentException("Unsupported input ByteBuffer implementation " + input.getClass().getName());
}

Object outputBase;
long outputAddress;
int outputLimit;
if (output.isDirect()) {
outputBase = null;
long address = getAddress(output);
outputAddress = address + output.position();
outputLimit = output.limit();
}
else if (output.hasArray()) {
outputBase = output.array();
outputAddress = ARRAY_BYTE_BASE_OFFSET + output.arrayOffset() + output.position();
outputLimit = output.limit();
}
else {
throw new IllegalArgumentException("Unsupported output ByteBuffer implementation " + output.getClass().getName());
}

// HACK: Assure JVM does not collect Slice wrappers while decompressing, since the
// collection may trigger freeing of the underlying memory resulting in a segfault
// There is no other known way to signal to the JVM that an object should not be
// collected in a block, and technically, the JVM is allowed to eliminate these locks.
synchronized (input) {
synchronized (output) {
int written = Lz4FrameRawDecompressor.decompress(inputBase, inputAddress, inputLimit, outputBase, outputAddress, outputLimit);
output.position(output.position() + written);
}
}
}

public static long getDecompressedSize(byte[] input, int offset, int length)
{
int baseAddress = ARRAY_BYTE_BASE_OFFSET + offset;
return Lz4FrameRawDecompressor.getDecompressedSize(input, baseAddress, length);
}

private static void verifyRange(byte[] data, int offset, int length)
{
requireNonNull(data, "data is null");
if (offset < 0 || length < 0 || offset + length > data.length) {
throw new IllegalArgumentException(format("Invalid offset or length (%s, %s) in array of length %s", offset, length, data.length));
}
}
}
Loading

0 comments on commit 9103d8e

Please sign in to comment.