Skip to content

Commit

Permalink
Return Buffers from ArrayData::buffers instead of slice (#1799) (#3783)
Browse files Browse the repository at this point in the history
* Return Buffers from ArrayData::buffers instead of slice (#1799)

* Clippy
  • Loading branch information
tustvold committed Mar 2, 2023
1 parent eff058f commit 231ae9b
Show file tree
Hide file tree
Showing 10 changed files with 132 additions and 31 deletions.
2 changes: 1 addition & 1 deletion arrow-arith/src/boolean.rs
Original file line number Diff line number Diff line change
Expand Up @@ -353,7 +353,7 @@ pub fn not(left: &BooleanArray) -> Result<BooleanArray, ArrowError> {
let data = left.data_ref();
let null_bit_buffer = data.nulls().map(|b| b.inner().sliced());

let values = buffer_unary_not(&data.buffers()[0], left_offset, len);
let values = buffer_unary_not(data.buffers()[0], left_offset, len);

let data = unsafe {
ArrayData::new_unchecked(
Expand Down
4 changes: 2 additions & 2 deletions arrow-array/src/array/primitive_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1234,7 +1234,7 @@ mod tests {
fn test_primitive_array_from_vec() {
let buf = Buffer::from_slice_ref([0, 1, 2, 3, 4]);
let arr = Int32Array::from(vec![0, 1, 2, 3, 4]);
assert_eq!(buf, arr.data.buffers()[0]);
assert_eq!(buf, *arr.data.buffers()[0]);
assert_eq!(5, arr.len());
assert_eq!(0, arr.offset());
assert_eq!(0, arr.null_count());
Expand Down Expand Up @@ -1740,7 +1740,7 @@ mod tests {
.build()
.unwrap();
let arr = Int32Array::from(data);
assert_eq!(buf2, arr.data.buffers()[0]);
assert_eq!(buf2, *arr.data.buffers()[0]);
assert_eq!(5, arr.len());
assert_eq!(0, arr.null_count());
for i in 0..3 {
Expand Down
30 changes: 15 additions & 15 deletions arrow-array/src/array/union_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -409,7 +409,7 @@ mod tests {

// Check type ids
assert_eq!(
union.data().buffers()[0],
*union.data().buffers()[0],
Buffer::from_slice_ref(&expected_type_ids)
);
for (i, id) in expected_type_ids.iter().enumerate() {
Expand All @@ -418,7 +418,7 @@ mod tests {

// Check offsets
assert_eq!(
union.data().buffers()[1],
*union.data().buffers()[1],
Buffer::from_slice_ref(&expected_value_offsets)
);
for (i, id) in expected_value_offsets.iter().enumerate() {
Expand All @@ -427,15 +427,15 @@ mod tests {

// Check data
assert_eq!(
union.data().child_data()[0].buffers()[0],
*union.data().child_data()[0].buffers()[0],
Buffer::from_slice_ref([1_i32, 4, 6])
);
assert_eq!(
union.data().child_data()[1].buffers()[0],
*union.data().child_data()[1].buffers()[0],
Buffer::from_slice_ref([2_i32, 7])
);
assert_eq!(
union.data().child_data()[2].buffers()[0],
*union.data().child_data()[2].buffers()[0],
Buffer::from_slice_ref([3_i32, 5]),
);

Expand Down Expand Up @@ -467,7 +467,7 @@ mod tests {

// Check type ids
assert_eq!(
union.data().buffers()[0],
*union.data().buffers()[0],
Buffer::from_slice_ref(&expected_type_ids)
);
for (i, id) in expected_type_ids.iter().enumerate() {
Expand All @@ -476,7 +476,7 @@ mod tests {

// Check offsets
assert_eq!(
union.data().buffers()[1],
*union.data().buffers()[1],
Buffer::from_slice_ref(&expected_value_offsets)
);
for (i, id) in expected_value_offsets.iter().enumerate() {
Expand Down Expand Up @@ -660,15 +660,15 @@ mod tests {
.unwrap();

// Check type ids
assert_eq!(Buffer::from_slice_ref(type_ids), array.data().buffers()[0]);
assert_eq!(Buffer::from_slice_ref(type_ids), *array.data().buffers()[0]);
for (i, id) in type_ids.iter().enumerate() {
assert_eq!(id, &array.type_id(i));
}

// Check offsets
assert_eq!(
Buffer::from_slice_ref(value_offsets),
array.data().buffers()[1]
*array.data().buffers()[1]
);
for (i, id) in value_offsets.iter().enumerate() {
assert_eq!(id, &array.value_offset(i));
Expand Down Expand Up @@ -736,7 +736,7 @@ mod tests {
// Check type ids
assert_eq!(
Buffer::from_slice_ref(&expected_type_ids),
union.data().buffers()[0]
*union.data().buffers()[0]
);
for (i, id) in expected_type_ids.iter().enumerate() {
assert_eq!(id, &union.type_id(i));
Expand All @@ -747,16 +747,16 @@ mod tests {

// Check data
assert_eq!(
union.data().child_data()[0].buffers()[0],
*union.data().child_data()[0].buffers()[0],
Buffer::from_slice_ref([1_i32, 0, 0, 4, 0, 6, 0]),
);
assert_eq!(
Buffer::from_slice_ref([0_i32, 2_i32, 0, 0, 0, 0, 7]),
union.data().child_data()[1].buffers()[0]
*union.data().child_data()[1].buffers()[0]
);
assert_eq!(
Buffer::from_slice_ref([0_i32, 0, 3_i32, 0, 5, 0, 0]),
union.data().child_data()[2].buffers()[0]
*union.data().child_data()[2].buffers()[0]
);

assert_eq!(expected_array_values.len(), union.len());
Expand Down Expand Up @@ -785,7 +785,7 @@ mod tests {
// Check type ids
assert_eq!(
Buffer::from_slice_ref(&expected_type_ids),
union.data().buffers()[0]
*union.data().buffers()[0]
);
for (i, id) in expected_type_ids.iter().enumerate() {
assert_eq!(id, &union.type_id(i));
Expand Down Expand Up @@ -847,7 +847,7 @@ mod tests {
// Check type ids
assert_eq!(
Buffer::from_slice_ref(&expected_type_ids),
union.data().buffers()[0]
*union.data().buffers()[0]
);
for (i, id) in expected_type_ids.iter().enumerate() {
assert_eq!(id, &union.type_id(i));
Expand Down
2 changes: 1 addition & 1 deletion arrow-csv/src/reader/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2482,7 +2482,7 @@ mod tests {
for v in *values {
t.update(v, None)
}
assert_eq!(&t.get(), expected, "{:?}", values)
assert_eq!(&t.get(), expected, "{values:?}")
}
}
}
97 changes: 97 additions & 0 deletions arrow-data/src/data/buffers.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

use arrow_buffer::Buffer;
use std::iter::Chain;
use std::ops::Index;

/// A collection of [`Buffer`]
#[derive(Debug, Default, Copy, Clone, Eq, PartialEq)]
pub struct Buffers<'a>([Option<&'a Buffer>; 2]);

impl<'a> Buffers<'a> {
/// Temporary will be removed once ArrayData does not store `Vec<Buffer>` directly (#3769)
#[inline]
pub(crate) fn from_slice(a: &'a [Buffer]) -> Self {
match a.len() {
0 => Self([None, None]),
1 => Self([Some(&a[0]), None]),
_ => Self([Some(&a[0]), Some(&a[1])]),
}
}

/// Returns the number of [`Buffer`] in this collection
#[inline]
pub fn len(&self) -> usize {
self.0[0].is_some() as usize + self.0[1].is_some() as usize
}

/// Returns `true` if this collection is empty
#[inline]
pub fn is_empty(&self) -> bool {
self.0[0].is_none() && self.0[1].is_none()
}

#[inline]
pub fn iter(&self) -> IntoIter<'a> {
self.into_iter()
}

/// Converts this [`Buffers`] to a `Vec<Buffer>`
#[inline]
pub fn to_vec(&self) -> Vec<Buffer> {
self.iter().cloned().collect()
}
}

impl<'a> Index<usize> for Buffers<'a> {
type Output = &'a Buffer;

#[inline]
fn index(&self, index: usize) -> &Self::Output {
self.0[index].as_ref().unwrap()
}
}

impl<'a> IntoIterator for Buffers<'a> {
type Item = &'a Buffer;
type IntoIter = IntoIter<'a>;

#[inline]
fn into_iter(self) -> Self::IntoIter {
IntoIter(self.0[0].into_iter().chain(self.0[1].into_iter()))
}
}

type OptionIter<'a> = std::option::IntoIter<&'a Buffer>;

/// [`Iterator`] for [`Buffers`]
pub struct IntoIter<'a>(Chain<OptionIter<'a>, OptionIter<'a>>);

impl<'a> Iterator for IntoIter<'a> {
type Item = &'a Buffer;

#[inline]
fn next(&mut self) -> Option<Self::Item> {
self.0.next()
}

#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
self.0.size_hint()
}
}
10 changes: 7 additions & 3 deletions arrow-data/src/data/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@ use std::sync::Arc;

use crate::equal;

mod buffers;
pub use buffers::*;

#[allow(unused)] // Private until ready (#1176)
mod bytes;
#[allow(unused)] // Private until ready (#1176)
Expand Down Expand Up @@ -371,9 +374,10 @@ impl ArrayData {
&self.data_type
}

/// Returns a slice of the [`Buffer`]s that hold the data.
pub fn buffers(&self) -> &[Buffer] {
&self.buffers[..]
/// Returns the [`Buffers`] storing data for this [`ArrayData`]
pub fn buffers(&self) -> Buffers<'_> {
// In future ArrayData won't store data contiguously as `Vec<Buffer>` (#1799)
Buffers::from_slice(&self.buffers)
}

/// Returns a slice of children [`ArrayData`]. This will be non
Expand Down
6 changes: 3 additions & 3 deletions arrow-json/src/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2210,7 +2210,7 @@ mod tests {
.unwrap();
// test that the list offsets are correct
assert_eq!(
cc.data().buffers()[0],
*cc.data().buffers()[0],
Buffer::from_slice_ref([0i32, 2, 2, 4, 5])
);
let cc = as_boolean_array(cc.values());
Expand All @@ -2230,7 +2230,7 @@ mod tests {
.unwrap();
// test that the list offsets are correct
assert_eq!(
dd.data().buffers()[0],
*dd.data().buffers()[0],
Buffer::from_slice_ref([0i32, 1, 1, 2, 6])
);

Expand Down Expand Up @@ -2374,7 +2374,7 @@ mod tests {
let read: &ListArray = read.as_any().downcast_ref::<ListArray>().unwrap();
let expected = expected.as_any().downcast_ref::<ListArray>().unwrap();
assert_eq!(
read.data().buffers()[0],
*read.data().buffers()[0],
Buffer::from_slice_ref([0i32, 2, 3, 6, 6, 6, 7])
);
// compare list null buffers
Expand Down
6 changes: 3 additions & 3 deletions arrow-select/src/filter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ impl<'a> IndexIterator<'a> {
fn new(filter: &'a BooleanArray, remaining: usize) -> Self {
assert_eq!(filter.null_count(), 0);
let data = filter.data();
let iter = BitIndexIterator::new(&data.buffers()[0], data.offset(), data.len());
let iter = BitIndexIterator::new(data.buffers()[0], data.offset(), data.len());
Self { remaining, iter }
}
}
Expand Down Expand Up @@ -470,7 +470,7 @@ fn filter_boolean(values: &BooleanArray, predicate: &FilterPredicate) -> Boolean
assert_eq!(data.buffers().len(), 1);
assert_eq!(data.child_data().len(), 0);

let values = filter_bits(&data.buffers()[0], data.offset(), predicate);
let values = filter_bits(data.buffers()[0], data.offset(), predicate);

let mut builder = ArrayDataBuilder::new(DataType::Boolean)
.len(predicate.count)
Expand Down Expand Up @@ -572,7 +572,7 @@ where

Self {
src_offsets: array.value_offsets(),
src_values: &array.data().buffers()[1],
src_values: array.data().buffers()[1],
dst_offsets,
dst_values,
cur_offset,
Expand Down
2 changes: 1 addition & 1 deletion arrow-select/src/nullif.rs
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ pub fn nullif(left: &dyn Array, right: &BooleanArray) -> Result<ArrayRef, ArrowE
let (right, r_offset) = match right_data.nulls() {
Some(nulls) => (
buffer_bin_and(
&right_data.buffers()[0],
right_data.buffers()[0],
right_data.offset(),
nulls.buffer(),
nulls.offset(),
Expand Down
4 changes: 2 additions & 2 deletions arrow/src/ffi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1158,7 +1158,7 @@ mod tests {
// Check type ids
assert_eq!(
Buffer::from_slice_ref(&expected_type_ids),
array.data().buffers()[0]
*array.data().buffers()[0]
);
for (i, id) in expected_type_ids.iter().enumerate() {
assert_eq!(id, &array.type_id(i));
Expand Down Expand Up @@ -1222,7 +1222,7 @@ mod tests {
// Check type ids
assert_eq!(
Buffer::from_slice_ref(&expected_type_ids),
array.data().buffers()[0]
*array.data().buffers()[0]
);
for (i, id) in expected_type_ids.iter().enumerate() {
assert_eq!(id, &array.type_id(i));
Expand Down

0 comments on commit 231ae9b

Please sign in to comment.