Skip to content

Commit

Permalink
Add support for BinaryArray in arrow-vtab (#324)
Browse files Browse the repository at this point in the history
* Add support for BinaryArray in arrow-vtab

* Fix lint
  • Loading branch information
phillipleblanc committed Jun 5, 2024
1 parent f628e5a commit 4f772b3
Show file tree
Hide file tree
Showing 2 changed files with 65 additions and 9 deletions.
54 changes: 47 additions & 7 deletions crates/duckdb/src/vtab/arrow.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@ use std::ptr::null_mut;

use crate::vtab::vector::Inserter;
use arrow::array::{
as_boolean_array, as_large_list_array, as_list_array, as_primitive_array, as_string_array, as_struct_array, Array,
ArrayData, AsArray, BooleanArray, Decimal128Array, FixedSizeListArray, GenericListArray, OffsetSizeTrait,
PrimitiveArray, StringArray, StructArray,
as_boolean_array, as_generic_binary_array, as_large_list_array, as_list_array, as_primitive_array, as_string_array,
as_struct_array, Array, ArrayData, AsArray, BinaryArray, BooleanArray, Decimal128Array, FixedSizeListArray,
GenericListArray, OffsetSizeTrait, PrimitiveArray, StringArray, StructArray,
};

use arrow::{
Expand Down Expand Up @@ -230,6 +230,9 @@ pub fn record_batch_to_duckdb_data_chunk(
DataType::Utf8 => {
string_array_to_vector(as_string_array(col.as_ref()), &mut chunk.flat_vector(i));
}
DataType::Binary => {
binary_array_to_vector(as_generic_binary_array(col.as_ref()), &mut chunk.flat_vector(i));
}
DataType::List(_) => {
list_array_to_vector(as_list_array(col.as_ref()), &mut chunk.list_vector(i))?;
}
Expand Down Expand Up @@ -430,6 +433,15 @@ fn string_array_to_vector(array: &StringArray, out: &mut FlatVector) {
}
}

fn binary_array_to_vector(array: &BinaryArray, out: &mut FlatVector) {
assert!(array.len() <= out.capacity());

for i in 0..array.len() {
let s = array.value(i);
out.insert(i, s);
}
}

fn list_array_to_vector<O: OffsetSizeTrait + AsPrimitive<usize>>(
array: &GenericListArray<O>,
out: &mut ListVector,
Expand All @@ -443,6 +455,9 @@ fn list_array_to_vector<O: OffsetSizeTrait + AsPrimitive<usize>>(
DataType::Utf8 => {
string_array_to_vector(as_string_array(value_array.as_ref()), &mut child);
}
DataType::Binary => {
binary_array_to_vector(as_generic_binary_array(value_array.as_ref()), &mut child);
}
_ => {
return Err("Nested list is not supported yet.".into());
}
Expand All @@ -469,6 +484,9 @@ fn fixed_size_list_array_to_vector(
DataType::Utf8 => {
string_array_to_vector(as_string_array(value_array.as_ref()), &mut child);
}
DataType::Binary => {
binary_array_to_vector(as_generic_binary_array(value_array.as_ref()), &mut child);
}
_ => {
return Err("Nested array is not supported yet.".into());
}
Expand All @@ -493,6 +511,9 @@ fn struct_array_to_vector(array: &StructArray, out: &mut StructVector) -> Result
DataType::Utf8 => {
string_array_to_vector(as_string_array(column.as_ref()), &mut out.child(i));
}
DataType::Binary => {
binary_array_to_vector(as_generic_binary_array(column.as_ref()), &mut out.child(i));
}
DataType::List(_) => {
list_array_to_vector(as_list_array(column.as_ref()), &mut out.list_vector_child(i))?;
}
Expand Down Expand Up @@ -560,10 +581,10 @@ mod test {
use crate::{Connection, Result};
use arrow::{
array::{
Array, ArrayRef, AsArray, Date32Array, Date64Array, Decimal256Array, FixedSizeListArray, Float64Array,
GenericListArray, Int32Array, ListArray, OffsetSizeTrait, PrimitiveArray, StringArray, StructArray,
Time32SecondArray, Time64MicrosecondArray, TimestampMicrosecondArray, TimestampMillisecondArray,
TimestampNanosecondArray, TimestampSecondArray,
Array, ArrayRef, AsArray, BinaryArray, Date32Array, Date64Array, Decimal256Array, FixedSizeListArray,
Float64Array, GenericListArray, Int32Array, ListArray, OffsetSizeTrait, PrimitiveArray, StringArray,
StructArray, Time32SecondArray, Time64MicrosecondArray, TimestampMicrosecondArray,
TimestampMillisecondArray, TimestampNanosecondArray, TimestampSecondArray,
},
buffer::{OffsetBuffer, ScalarBuffer},
datatypes::{i256, ArrowPrimitiveType, DataType, Field, Fields, Schema},
Expand Down Expand Up @@ -924,4 +945,23 @@ mod test {
)
);
}

#[test]
fn test_arrow_binary() {
let byte_array = BinaryArray::from_iter_values([b"test"].iter());
let arc: ArrayRef = Arc::new(byte_array);
let batch = RecordBatch::try_from_iter(vec![("x", arc)]).unwrap();

let db = Connection::open_in_memory().unwrap();
db.register_table_function::<ArrowVTab>("arrow").unwrap();

let mut stmt = db.prepare("SELECT * FROM arrow(?, ?)").unwrap();

let mut arr = stmt.query_arrow(arrow_recordbatch_to_query_params(batch)).unwrap();
let rb = arr.next().expect("no record batch");

let column = rb.column(0).as_any().downcast_ref::<BinaryArray>().unwrap();
assert_eq!(column.len(), 1);
assert_eq!(column.value(0), b"test");
}
}
20 changes: 18 additions & 2 deletions crates/duckdb/src/vtab/vector.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,9 @@ use crate::ffi::{
duckdb_list_entry, duckdb_list_vector_get_child, duckdb_list_vector_get_size, duckdb_list_vector_reserve,
duckdb_list_vector_set_size, duckdb_struct_type_child_count, duckdb_struct_type_child_name,
duckdb_struct_vector_get_child, duckdb_validity_set_row_invalid, duckdb_vector,
duckdb_vector_assign_string_element, duckdb_vector_ensure_validity_writable, duckdb_vector_get_column_type,
duckdb_vector_get_data, duckdb_vector_get_validity, duckdb_vector_size,
duckdb_vector_assign_string_element, duckdb_vector_assign_string_element_len,
duckdb_vector_ensure_validity_writable, duckdb_vector_get_column_type, duckdb_vector_get_data,
duckdb_vector_get_validity, duckdb_vector_size,
};

/// Vector trait.
Expand Down Expand Up @@ -113,6 +114,21 @@ impl Inserter<&str> for FlatVector {
}
}

impl Inserter<&[u8]> for FlatVector {
fn insert(&self, index: usize, value: &[u8]) {
let value_size = value.len();
unsafe {
// This function also works for binary data. https://duckdb.org/docs/api/c/api#duckdb_vector_assign_string_element_len
duckdb_vector_assign_string_element_len(
self.ptr,
index as u64,
value.as_ptr() as *const ::std::os::raw::c_char,
value_size as u64,
);
}
}
}

/// A list vector.
pub struct ListVector {
/// ListVector does not own the vector pointer.
Expand Down

0 comments on commit 4f772b3

Please sign in to comment.