diff --git a/datafusion/common/Cargo.toml b/datafusion/common/Cargo.toml index b5cdec1be17b..733aba1e1da1 100644 --- a/datafusion/common/Cargo.toml +++ b/datafusion/common/Cargo.toml @@ -40,7 +40,7 @@ pyarrow = ["pyo3", "arrow/pyarrow"] [dependencies] ahash = { version = "0.8", default-features = false, features = ["runtime-rng"] } -apache-avro = { version = "0.16", default-features = false, features = ["snappy"], optional = true } +apache-avro = { version = "0.16", default-features = false, features = ["bzip", "snappy", "xz", "zstandard"], optional = true } arrow = { workspace = true } arrow-array = { workspace = true } arrow-buffer = { workspace = true } diff --git a/datafusion/sqllogictest/test_files/avro.slt b/datafusion/sqllogictest/test_files/avro.slt index ede11406e1a9..3309cd1cf642 100644 --- a/datafusion/sqllogictest/test_files/avro.slt +++ b/datafusion/sqllogictest/test_files/avro.slt @@ -34,6 +34,78 @@ STORED AS AVRO WITH HEADER ROW LOCATION '../../testing/data/avro/alltypes_plain.avro' +statement ok +CREATE EXTERNAL TABLE alltypes_plain_snappy ( + id INT NOT NULL, + bool_col BOOLEAN NOT NULL, + tinyint_col TINYINT NOT NULL, + smallint_col SMALLINT NOT NULL, + int_col INT NOT NULL, + bigint_col BIGINT NOT NULL, + float_col FLOAT NOT NULL, + double_col DOUBLE NOT NULL, + date_string_col BYTEA NOT NULL, + string_col VARCHAR NOT NULL, + timestamp_col TIMESTAMP NOT NULL, +) +STORED AS AVRO +WITH HEADER ROW +LOCATION '../../testing/data/avro/alltypes_plain.snappy.avro' + +statement ok +CREATE EXTERNAL TABLE alltypes_plain_bzip2 ( + id INT NOT NULL, + bool_col BOOLEAN NOT NULL, + tinyint_col TINYINT NOT NULL, + smallint_col SMALLINT NOT NULL, + int_col INT NOT NULL, + bigint_col BIGINT NOT NULL, + float_col FLOAT NOT NULL, + double_col DOUBLE NOT NULL, + date_string_col BYTEA NOT NULL, + string_col VARCHAR NOT NULL, + timestamp_col TIMESTAMP NOT NULL, +) +STORED AS AVRO +WITH HEADER ROW +LOCATION '../../testing/data/avro/alltypes_plain.bzip2.avro' + +statement ok +CREATE EXTERNAL TABLE alltypes_plain_xz ( + id INT NOT NULL, + bool_col BOOLEAN NOT NULL, + tinyint_col TINYINT NOT NULL, + smallint_col SMALLINT NOT NULL, + int_col INT NOT NULL, + bigint_col BIGINT NOT NULL, + float_col FLOAT NOT NULL, + double_col DOUBLE NOT NULL, + date_string_col BYTEA NOT NULL, + string_col VARCHAR NOT NULL, + timestamp_col TIMESTAMP NOT NULL, +) +STORED AS AVRO +WITH HEADER ROW +LOCATION '../../testing/data/avro/alltypes_plain.xz.avro' + +statement ok +CREATE EXTERNAL TABLE alltypes_plain_zstandard ( + id INT NOT NULL, + bool_col BOOLEAN NOT NULL, + tinyint_col TINYINT NOT NULL, + smallint_col SMALLINT NOT NULL, + int_col INT NOT NULL, + bigint_col BIGINT NOT NULL, + float_col FLOAT NOT NULL, + double_col DOUBLE NOT NULL, + date_string_col BYTEA NOT NULL, + string_col VARCHAR NOT NULL, + timestamp_col TIMESTAMP NOT NULL, +) +STORED AS AVRO +WITH HEADER ROW +LOCATION '../../testing/data/avro/alltypes_plain.zstandard.avro' + statement ok CREATE EXTERNAL TABLE single_nan ( mycol FLOAT @@ -73,6 +145,58 @@ SELECT id, CAST(string_col AS varchar) FROM alltypes_plain 0 0 1 1 +# test avro query with snappy +query IT +SELECT id, CAST(string_col AS varchar) FROM alltypes_plain_snappy +---- +4 0 +5 1 +6 0 +7 1 +2 0 +3 1 +0 0 +1 1 + +# test avro query with bzip2 +query IT +SELECT id, CAST(string_col AS varchar) FROM alltypes_plain_bzip2 +---- +4 0 +5 1 +6 0 +7 1 +2 0 +3 1 +0 0 +1 1 + +# test avro query with xz +query IT +SELECT id, CAST(string_col AS varchar) FROM alltypes_plain_xz +---- +4 0 +5 1 +6 0 +7 1 +2 0 +3 1 +0 0 +1 1 + +# test avro query with zstandard +query IT +SELECT id, CAST(string_col AS varchar) FROM alltypes_plain_zstandard +---- +4 0 +5 1 +6 0 +7 1 +2 0 +3 1 +0 0 +1 1 + # test avro single nan schema query R SELECT mycol FROM single_nan diff --git a/testing b/testing index 37f29510ce97..98fceecd024d 160000 --- a/testing +++ b/testing @@ -1 +1 @@ -Subproject commit 37f29510ce97cd491b8e6ed75866c6533a5ea2a1 +Subproject commit 98fceecd024dccd2f8a00e32fc144975f218acf4