Skip to content

Commit

Permalink
Support all the codecs supported by Avro (#7718)
Browse files Browse the repository at this point in the history
* Support all the codes supported by Avro

* Update testing dir
  • Loading branch information
sarutak committed Oct 2, 2023
1 parent 2ab0c00 commit e97ed66
Show file tree
Hide file tree
Showing 3 changed files with 126 additions and 2 deletions.
2 changes: 1 addition & 1 deletion datafusion/common/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ pyarrow = ["pyo3", "arrow/pyarrow"]

[dependencies]
ahash = { version = "0.8", default-features = false, features = ["runtime-rng"] }
apache-avro = { version = "0.16", default-features = false, features = ["snappy"], optional = true }
apache-avro = { version = "0.16", default-features = false, features = ["bzip", "snappy", "xz", "zstandard"], optional = true }
arrow = { workspace = true }
arrow-array = { workspace = true }
arrow-buffer = { workspace = true }
Expand Down
124 changes: 124 additions & 0 deletions datafusion/sqllogictest/test_files/avro.slt
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,78 @@ STORED AS AVRO
WITH HEADER ROW
LOCATION '../../testing/data/avro/alltypes_plain.avro'

statement ok
CREATE EXTERNAL TABLE alltypes_plain_snappy (
id INT NOT NULL,
bool_col BOOLEAN NOT NULL,
tinyint_col TINYINT NOT NULL,
smallint_col SMALLINT NOT NULL,
int_col INT NOT NULL,
bigint_col BIGINT NOT NULL,
float_col FLOAT NOT NULL,
double_col DOUBLE NOT NULL,
date_string_col BYTEA NOT NULL,
string_col VARCHAR NOT NULL,
timestamp_col TIMESTAMP NOT NULL,
)
STORED AS AVRO
WITH HEADER ROW
LOCATION '../../testing/data/avro/alltypes_plain.snappy.avro'

statement ok
CREATE EXTERNAL TABLE alltypes_plain_bzip2 (
id INT NOT NULL,
bool_col BOOLEAN NOT NULL,
tinyint_col TINYINT NOT NULL,
smallint_col SMALLINT NOT NULL,
int_col INT NOT NULL,
bigint_col BIGINT NOT NULL,
float_col FLOAT NOT NULL,
double_col DOUBLE NOT NULL,
date_string_col BYTEA NOT NULL,
string_col VARCHAR NOT NULL,
timestamp_col TIMESTAMP NOT NULL,
)
STORED AS AVRO
WITH HEADER ROW
LOCATION '../../testing/data/avro/alltypes_plain.bzip2.avro'

statement ok
CREATE EXTERNAL TABLE alltypes_plain_xz (
id INT NOT NULL,
bool_col BOOLEAN NOT NULL,
tinyint_col TINYINT NOT NULL,
smallint_col SMALLINT NOT NULL,
int_col INT NOT NULL,
bigint_col BIGINT NOT NULL,
float_col FLOAT NOT NULL,
double_col DOUBLE NOT NULL,
date_string_col BYTEA NOT NULL,
string_col VARCHAR NOT NULL,
timestamp_col TIMESTAMP NOT NULL,
)
STORED AS AVRO
WITH HEADER ROW
LOCATION '../../testing/data/avro/alltypes_plain.xz.avro'

statement ok
CREATE EXTERNAL TABLE alltypes_plain_zstandard (
id INT NOT NULL,
bool_col BOOLEAN NOT NULL,
tinyint_col TINYINT NOT NULL,
smallint_col SMALLINT NOT NULL,
int_col INT NOT NULL,
bigint_col BIGINT NOT NULL,
float_col FLOAT NOT NULL,
double_col DOUBLE NOT NULL,
date_string_col BYTEA NOT NULL,
string_col VARCHAR NOT NULL,
timestamp_col TIMESTAMP NOT NULL,
)
STORED AS AVRO
WITH HEADER ROW
LOCATION '../../testing/data/avro/alltypes_plain.zstandard.avro'

statement ok
CREATE EXTERNAL TABLE single_nan (
mycol FLOAT
Expand Down Expand Up @@ -73,6 +145,58 @@ SELECT id, CAST(string_col AS varchar) FROM alltypes_plain
0 0
1 1

# test avro query with snappy
query IT
SELECT id, CAST(string_col AS varchar) FROM alltypes_plain_snappy
----
4 0
5 1
6 0
7 1
2 0
3 1
0 0
1 1

# test avro query with bzip2
query IT
SELECT id, CAST(string_col AS varchar) FROM alltypes_plain_bzip2
----
4 0
5 1
6 0
7 1
2 0
3 1
0 0
1 1

# test avro query with xz
query IT
SELECT id, CAST(string_col AS varchar) FROM alltypes_plain_xz
----
4 0
5 1
6 0
7 1
2 0
3 1
0 0
1 1

# test avro query with zstandard
query IT
SELECT id, CAST(string_col AS varchar) FROM alltypes_plain_zstandard
----
4 0
5 1
6 0
7 1
2 0
3 1
0 0
1 1

# test avro single nan schema
query R
SELECT mycol FROM single_nan
Expand Down

0 comments on commit e97ed66

Please sign in to comment.