diff --git a/arrow-arith/src/boolean.rs b/arrow-arith/src/boolean.rs index 5bd39a67342..61942dc90b8 100644 --- a/arrow-arith/src/boolean.rs +++ b/arrow-arith/src/boolean.rs @@ -353,7 +353,7 @@ pub fn not(left: &BooleanArray) -> Result { let data = left.data_ref(); let null_bit_buffer = data.nulls().map(|b| b.inner().sliced()); - let values = buffer_unary_not(&data.buffers()[0], left_offset, len); + let values = buffer_unary_not(data.buffers()[0], left_offset, len); let data = unsafe { ArrayData::new_unchecked( diff --git a/arrow-array/src/array/primitive_array.rs b/arrow-array/src/array/primitive_array.rs index 0e28060b25f..408f0c4ae96 100644 --- a/arrow-array/src/array/primitive_array.rs +++ b/arrow-array/src/array/primitive_array.rs @@ -1234,7 +1234,7 @@ mod tests { fn test_primitive_array_from_vec() { let buf = Buffer::from_slice_ref([0, 1, 2, 3, 4]); let arr = Int32Array::from(vec![0, 1, 2, 3, 4]); - assert_eq!(buf, arr.data.buffers()[0]); + assert_eq!(buf, *arr.data.buffers()[0]); assert_eq!(5, arr.len()); assert_eq!(0, arr.offset()); assert_eq!(0, arr.null_count()); @@ -1740,7 +1740,7 @@ mod tests { .build() .unwrap(); let arr = Int32Array::from(data); - assert_eq!(buf2, arr.data.buffers()[0]); + assert_eq!(buf2, *arr.data.buffers()[0]); assert_eq!(5, arr.len()); assert_eq!(0, arr.null_count()); for i in 0..3 { diff --git a/arrow-array/src/array/union_array.rs b/arrow-array/src/array/union_array.rs index f215fb0def9..867eb8d59fd 100644 --- a/arrow-array/src/array/union_array.rs +++ b/arrow-array/src/array/union_array.rs @@ -409,7 +409,7 @@ mod tests { // Check type ids assert_eq!( - union.data().buffers()[0], + *union.data().buffers()[0], Buffer::from_slice_ref(&expected_type_ids) ); for (i, id) in expected_type_ids.iter().enumerate() { @@ -418,7 +418,7 @@ mod tests { // Check offsets assert_eq!( - union.data().buffers()[1], + *union.data().buffers()[1], Buffer::from_slice_ref(&expected_value_offsets) ); for (i, id) in expected_value_offsets.iter().enumerate() { @@ -427,15 +427,15 @@ mod tests { // Check data assert_eq!( - union.data().child_data()[0].buffers()[0], + *union.data().child_data()[0].buffers()[0], Buffer::from_slice_ref([1_i32, 4, 6]) ); assert_eq!( - union.data().child_data()[1].buffers()[0], + *union.data().child_data()[1].buffers()[0], Buffer::from_slice_ref([2_i32, 7]) ); assert_eq!( - union.data().child_data()[2].buffers()[0], + *union.data().child_data()[2].buffers()[0], Buffer::from_slice_ref([3_i32, 5]), ); @@ -467,7 +467,7 @@ mod tests { // Check type ids assert_eq!( - union.data().buffers()[0], + *union.data().buffers()[0], Buffer::from_slice_ref(&expected_type_ids) ); for (i, id) in expected_type_ids.iter().enumerate() { @@ -476,7 +476,7 @@ mod tests { // Check offsets assert_eq!( - union.data().buffers()[1], + *union.data().buffers()[1], Buffer::from_slice_ref(&expected_value_offsets) ); for (i, id) in expected_value_offsets.iter().enumerate() { @@ -660,7 +660,7 @@ mod tests { .unwrap(); // Check type ids - assert_eq!(Buffer::from_slice_ref(type_ids), array.data().buffers()[0]); + assert_eq!(Buffer::from_slice_ref(type_ids), *array.data().buffers()[0]); for (i, id) in type_ids.iter().enumerate() { assert_eq!(id, &array.type_id(i)); } @@ -668,7 +668,7 @@ mod tests { // Check offsets assert_eq!( Buffer::from_slice_ref(value_offsets), - array.data().buffers()[1] + *array.data().buffers()[1] ); for (i, id) in value_offsets.iter().enumerate() { assert_eq!(id, &array.value_offset(i)); @@ -736,7 +736,7 @@ mod tests { // Check type ids assert_eq!( Buffer::from_slice_ref(&expected_type_ids), - union.data().buffers()[0] + *union.data().buffers()[0] ); for (i, id) in expected_type_ids.iter().enumerate() { assert_eq!(id, &union.type_id(i)); @@ -747,16 +747,16 @@ mod tests { // Check data assert_eq!( - union.data().child_data()[0].buffers()[0], + *union.data().child_data()[0].buffers()[0], Buffer::from_slice_ref([1_i32, 0, 0, 4, 0, 6, 0]), ); assert_eq!( Buffer::from_slice_ref([0_i32, 2_i32, 0, 0, 0, 0, 7]), - union.data().child_data()[1].buffers()[0] + *union.data().child_data()[1].buffers()[0] ); assert_eq!( Buffer::from_slice_ref([0_i32, 0, 3_i32, 0, 5, 0, 0]), - union.data().child_data()[2].buffers()[0] + *union.data().child_data()[2].buffers()[0] ); assert_eq!(expected_array_values.len(), union.len()); @@ -785,7 +785,7 @@ mod tests { // Check type ids assert_eq!( Buffer::from_slice_ref(&expected_type_ids), - union.data().buffers()[0] + *union.data().buffers()[0] ); for (i, id) in expected_type_ids.iter().enumerate() { assert_eq!(id, &union.type_id(i)); @@ -847,7 +847,7 @@ mod tests { // Check type ids assert_eq!( Buffer::from_slice_ref(&expected_type_ids), - union.data().buffers()[0] + *union.data().buffers()[0] ); for (i, id) in expected_type_ids.iter().enumerate() { assert_eq!(id, &union.type_id(i)); diff --git a/arrow-csv/src/reader/mod.rs b/arrow-csv/src/reader/mod.rs index e78f2d0ba71..84d55c4ae24 100644 --- a/arrow-csv/src/reader/mod.rs +++ b/arrow-csv/src/reader/mod.rs @@ -2482,7 +2482,7 @@ mod tests { for v in *values { t.update(v, None) } - assert_eq!(&t.get(), expected, "{:?}", values) + assert_eq!(&t.get(), expected, "{values:?}") } } } diff --git a/arrow-data/src/data/buffers.rs b/arrow-data/src/data/buffers.rs new file mode 100644 index 00000000000..3b57bfe0e23 --- /dev/null +++ b/arrow-data/src/data/buffers.rs @@ -0,0 +1,97 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use arrow_buffer::Buffer; +use std::iter::Chain; +use std::ops::Index; + +/// A collection of [`Buffer`] +#[derive(Debug, Default, Copy, Clone, Eq, PartialEq)] +pub struct Buffers<'a>([Option<&'a Buffer>; 2]); + +impl<'a> Buffers<'a> { + /// Temporary will be removed once ArrayData does not store `Vec` directly (#3769) + #[inline] + pub(crate) fn from_slice(a: &'a [Buffer]) -> Self { + match a.len() { + 0 => Self([None, None]), + 1 => Self([Some(&a[0]), None]), + _ => Self([Some(&a[0]), Some(&a[1])]), + } + } + + /// Returns the number of [`Buffer`] in this collection + #[inline] + pub fn len(&self) -> usize { + self.0[0].is_some() as usize + self.0[1].is_some() as usize + } + + /// Returns `true` if this collection is empty + #[inline] + pub fn is_empty(&self) -> bool { + self.0[0].is_none() && self.0[1].is_none() + } + + #[inline] + pub fn iter(&self) -> IntoIter<'a> { + self.into_iter() + } + + /// Converts this [`Buffers`] to a `Vec` + #[inline] + pub fn to_vec(&self) -> Vec { + self.iter().cloned().collect() + } +} + +impl<'a> Index for Buffers<'a> { + type Output = &'a Buffer; + + #[inline] + fn index(&self, index: usize) -> &Self::Output { + self.0[index].as_ref().unwrap() + } +} + +impl<'a> IntoIterator for Buffers<'a> { + type Item = &'a Buffer; + type IntoIter = IntoIter<'a>; + + #[inline] + fn into_iter(self) -> Self::IntoIter { + IntoIter(self.0[0].into_iter().chain(self.0[1].into_iter())) + } +} + +type OptionIter<'a> = std::option::IntoIter<&'a Buffer>; + +/// [`Iterator`] for [`Buffers`] +pub struct IntoIter<'a>(Chain, OptionIter<'a>>); + +impl<'a> Iterator for IntoIter<'a> { + type Item = &'a Buffer; + + #[inline] + fn next(&mut self) -> Option { + self.0.next() + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + self.0.size_hint() + } +} diff --git a/arrow-data/src/data/mod.rs b/arrow-data/src/data/mod.rs index d76cb9eb19e..051deef0730 100644 --- a/arrow-data/src/data/mod.rs +++ b/arrow-data/src/data/mod.rs @@ -30,6 +30,9 @@ use std::sync::Arc; use crate::equal; +mod buffers; +pub use buffers::*; + #[allow(unused)] // Private until ready (#1176) mod bytes; #[allow(unused)] // Private until ready (#1176) @@ -371,9 +374,10 @@ impl ArrayData { &self.data_type } - /// Returns a slice of the [`Buffer`]s that hold the data. - pub fn buffers(&self) -> &[Buffer] { - &self.buffers[..] + /// Returns the [`Buffers`] storing data for this [`ArrayData`] + pub fn buffers(&self) -> Buffers<'_> { + // In future ArrayData won't store data contiguously as `Vec` (#1799) + Buffers::from_slice(&self.buffers) } /// Returns a slice of children [`ArrayData`]. This will be non diff --git a/arrow-json/src/reader.rs b/arrow-json/src/reader.rs index 0ef438e3695..3ac39c110fc 100644 --- a/arrow-json/src/reader.rs +++ b/arrow-json/src/reader.rs @@ -2210,7 +2210,7 @@ mod tests { .unwrap(); // test that the list offsets are correct assert_eq!( - cc.data().buffers()[0], + *cc.data().buffers()[0], Buffer::from_slice_ref([0i32, 2, 2, 4, 5]) ); let cc = as_boolean_array(cc.values()); @@ -2230,7 +2230,7 @@ mod tests { .unwrap(); // test that the list offsets are correct assert_eq!( - dd.data().buffers()[0], + *dd.data().buffers()[0], Buffer::from_slice_ref([0i32, 1, 1, 2, 6]) ); @@ -2374,7 +2374,7 @@ mod tests { let read: &ListArray = read.as_any().downcast_ref::().unwrap(); let expected = expected.as_any().downcast_ref::().unwrap(); assert_eq!( - read.data().buffers()[0], + *read.data().buffers()[0], Buffer::from_slice_ref([0i32, 2, 3, 6, 6, 6, 7]) ); // compare list null buffers diff --git a/arrow-select/src/filter.rs b/arrow-select/src/filter.rs index d8ea9fceb85..6ba08746d8f 100644 --- a/arrow-select/src/filter.rs +++ b/arrow-select/src/filter.rs @@ -82,7 +82,7 @@ impl<'a> IndexIterator<'a> { fn new(filter: &'a BooleanArray, remaining: usize) -> Self { assert_eq!(filter.null_count(), 0); let data = filter.data(); - let iter = BitIndexIterator::new(&data.buffers()[0], data.offset(), data.len()); + let iter = BitIndexIterator::new(data.buffers()[0], data.offset(), data.len()); Self { remaining, iter } } } @@ -470,7 +470,7 @@ fn filter_boolean(values: &BooleanArray, predicate: &FilterPredicate) -> Boolean assert_eq!(data.buffers().len(), 1); assert_eq!(data.child_data().len(), 0); - let values = filter_bits(&data.buffers()[0], data.offset(), predicate); + let values = filter_bits(data.buffers()[0], data.offset(), predicate); let mut builder = ArrayDataBuilder::new(DataType::Boolean) .len(predicate.count) @@ -572,7 +572,7 @@ where Self { src_offsets: array.value_offsets(), - src_values: &array.data().buffers()[1], + src_values: array.data().buffers()[1], dst_offsets, dst_values, cur_offset, diff --git a/arrow-select/src/nullif.rs b/arrow-select/src/nullif.rs index 4b052ce004c..ea0c8e3d526 100644 --- a/arrow-select/src/nullif.rs +++ b/arrow-select/src/nullif.rs @@ -56,7 +56,7 @@ pub fn nullif(left: &dyn Array, right: &BooleanArray) -> Result ( buffer_bin_and( - &right_data.buffers()[0], + right_data.buffers()[0], right_data.offset(), nulls.buffer(), nulls.offset(), diff --git a/arrow/src/ffi.rs b/arrow/src/ffi.rs index 4d62b9e7cf6..c767a69e6bd 100644 --- a/arrow/src/ffi.rs +++ b/arrow/src/ffi.rs @@ -1158,7 +1158,7 @@ mod tests { // Check type ids assert_eq!( Buffer::from_slice_ref(&expected_type_ids), - array.data().buffers()[0] + *array.data().buffers()[0] ); for (i, id) in expected_type_ids.iter().enumerate() { assert_eq!(id, &array.type_id(i)); @@ -1222,7 +1222,7 @@ mod tests { // Check type ids assert_eq!( Buffer::from_slice_ref(&expected_type_ids), - array.data().buffers()[0] + *array.data().buffers()[0] ); for (i, id) in expected_type_ids.iter().enumerate() { assert_eq!(id, &array.type_id(i));