Skip to content

Commit

Permalink
Remove RawPtrBox (apache#1811) (apache#1176)
Browse files Browse the repository at this point in the history
  • Loading branch information
tustvold committed Feb 20, 2023
1 parent 5ffc0a8 commit 6264343
Show file tree
Hide file tree
Showing 7 changed files with 85 additions and 84 deletions.
16 changes: 4 additions & 12 deletions arrow-array/src/array/boolean_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@

use crate::builder::BooleanBuilder;
use crate::iterator::BooleanIter;
use crate::raw_pointer::RawPtrBox;
use crate::{print_long_array, Array, ArrayAccessor};
use arrow_buffer::{bit_util, Buffer, MutableBuffer};
use arrow_data::bit_mask::combine_option_bitmap;
Expand Down Expand Up @@ -67,9 +66,7 @@ use std::any::Any;
#[derive(Clone)]
pub struct BooleanArray {
data: ArrayData,
/// Pointer to the value array. The lifetime of this must be <= to the value buffer
/// stored in `data`, so it's safe to store.
raw_values: RawPtrBox<u8>,
raw_values: Buffer,
}

impl std::fmt::Debug for BooleanArray {
Expand Down Expand Up @@ -102,7 +99,7 @@ impl BooleanArray {
///
/// Note this doesn't take the offset of this array into account.
pub fn values(&self) -> &Buffer {
&self.data.buffers()[0]
&self.raw_values
}

/// Returns the number of non null, true values within this array
Expand Down Expand Up @@ -328,13 +325,8 @@ impl From<ArrayData> for BooleanArray {
1,
"BooleanArray data should contain a single buffer only (values buffer)"
);
let ptr = data.buffers()[0].as_ptr();
Self {
data,
// SAFETY:
// ArrayData must be valid, and validated data type above
raw_values: unsafe { RawPtrBox::new(ptr) },
}
let raw_values = data.buffers()[0].clone();
Self { data, raw_values }
}
}

Expand Down
13 changes: 7 additions & 6 deletions arrow-array/src/array/byte_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ use crate::raw_pointer::RawPtrBox;
use crate::types::bytes::ByteArrayNativeType;
use crate::types::ByteArrayType;
use crate::{Array, ArrayAccessor, OffsetSizeTrait};
use arrow_buffer::ArrowNativeType;
use arrow_buffer::{ArrowNativeType, Buffer};
use arrow_data::ArrayData;
use arrow_schema::DataType;
use std::any::Any;
Expand All @@ -40,15 +40,15 @@ use std::any::Any;
pub struct GenericByteArray<T: ByteArrayType> {
data: ArrayData,
value_offsets: RawPtrBox<T::Offset>,
value_data: RawPtrBox<u8>,
value_data: Buffer,
}

impl<T: ByteArrayType> Clone for GenericByteArray<T> {
fn clone(&self) -> Self {
Self {
data: self.data.clone(),
value_offsets: self.value_offsets,
value_data: self.value_data,
value_data: self.value_data.clone(),
}
}
}
Expand All @@ -68,7 +68,7 @@ impl<T: ByteArrayType> GenericByteArray<T> {

/// Returns the raw value data
pub fn value_data(&self) -> &[u8] {
self.data.buffers()[1].as_slice()
self.value_data.as_slice()
}

/// Returns true if all data within this array is ASCII
Expand Down Expand Up @@ -161,6 +161,7 @@ impl<T: ByteArrayType> GenericByteArray<T> {
.slice_with_length(self.data.offset() * element_len, value_len * element_len);

drop(self.data);
drop(self.value_data);

let try_mutable_null_buffer = match null_bit_buffer {
None => Ok(None),
Expand Down Expand Up @@ -285,13 +286,13 @@ impl<T: ByteArrayType> From<ArrayData> for GenericByteArray<T> {
true => empty_offsets::<T::Offset>().as_ptr() as *const _,
false => data.buffers()[0].as_ptr(),
};
let values = data.buffers()[1].as_ptr();
let value_data = data.buffers()[1].clone();
Self {
data,
// SAFETY:
// ArrayData must be valid, and validated data type above
value_offsets: unsafe { RawPtrBox::new(offsets) },
value_data: unsafe { RawPtrBox::new(values) },
value_data,
}
}
}
Expand Down
38 changes: 10 additions & 28 deletions arrow-array/src/array/primitive_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,14 @@

use crate::builder::{BooleanBufferBuilder, BufferBuilder, PrimitiveBuilder};
use crate::iterator::PrimitiveIter;
use crate::raw_pointer::RawPtrBox;
use crate::temporal_conversions::{
as_date, as_datetime, as_datetime_with_timezone, as_duration, as_time,
};
use crate::timezone::Tz;
use crate::trusted_len::trusted_len_unzip;
use crate::types::*;
use crate::{print_long_array, Array, ArrayAccessor};
use arrow_buffer::buffer::ScalarBuffer;
use arrow_buffer::{i256, ArrowNativeType, Buffer};
use arrow_data::bit_iterator::try_for_each_valid_idx;
use arrow_data::ArrayData;
Expand Down Expand Up @@ -266,22 +266,16 @@ pub trait ArrowPrimitiveType: 'static {
/// ```
pub struct PrimitiveArray<T: ArrowPrimitiveType> {
/// Underlying ArrayData
/// # Safety
/// must have exactly one buffer, aligned to type T
data: ArrayData,
/// Pointer to the value array. The lifetime of this must be <= to the value buffer
/// stored in `data`, so it's safe to store.
/// # Safety
/// raw_values must have a value equivalent to `data.buffers()[0].raw_data()`
/// raw_values must have alignment for type T::NativeType
raw_values: RawPtrBox<T::Native>,
/// Values data
raw_values: ScalarBuffer<T::Native>,
}

impl<T: ArrowPrimitiveType> Clone for PrimitiveArray<T> {
fn clone(&self) -> Self {
Self {
data: self.data.clone(),
raw_values: self.raw_values,
raw_values: self.raw_values.clone(),
}
}
}
Expand All @@ -301,15 +295,7 @@ impl<T: ArrowPrimitiveType> PrimitiveArray<T> {
/// Returns a slice of the values of this array
#[inline]
pub fn values(&self) -> &[T::Native] {
// Soundness
// raw_values alignment & location is ensured by fn from(ArrayDataRef)
// buffer bounds/offset is ensured by the ArrayData instance.
unsafe {
std::slice::from_raw_parts(
self.raw_values.as_ptr().add(self.data.offset()),
self.len(),
)
}
&self.raw_values
}

/// Returns a new primitive array builder
Expand Down Expand Up @@ -339,8 +325,7 @@ impl<T: ArrowPrimitiveType> PrimitiveArray<T> {
/// caller must ensure that the passed in offset is less than the array len()
#[inline]
pub unsafe fn value_unchecked(&self, i: usize) -> T::Native {
let offset = i + self.offset();
*self.raw_values.as_ptr().add(offset)
*self.raw_values.get_unchecked(i)
}

/// Returns the primitive value at index `i`.
Expand Down Expand Up @@ -632,6 +617,7 @@ impl<T: ArrowPrimitiveType> PrimitiveArray<T> {
.slice_with_length(self.data.offset() * element_len, len * element_len);

drop(self.data);
drop(self.raw_values);

let try_mutable_null_buffer = match null_bit_buffer {
None => Ok(None),
Expand Down Expand Up @@ -1085,13 +1071,9 @@ impl<T: ArrowPrimitiveType> From<ArrayData> for PrimitiveArray<T> {
"PrimitiveArray data should contain a single buffer only (values buffer)"
);

let ptr = data.buffers()[0].as_ptr();
Self {
data,
// SAFETY:
// ArrayData must be valid, and validated data type above
raw_values: unsafe { RawPtrBox::new(ptr) },
}
let raw_values =
ScalarBuffer::new(data.buffers()[0].clone(), data.offset(), data.len());
Self { data, raw_values }
}
}

Expand Down
2 changes: 1 addition & 1 deletion arrow-array/src/record_batch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -529,7 +529,7 @@ mod tests {
let record_batch =
RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a), Arc::new(b)])
.unwrap();
assert_eq!(record_batch.get_array_memory_size(), 592);
assert_eq!(record_batch.get_array_memory_size(), 624);
}

fn check_batch(record_batch: RecordBatch, num_rows: usize) {
Expand Down
27 changes: 20 additions & 7 deletions arrow-buffer/src/buffer/immutable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,11 @@
// specific language governing permissions and limitations
// under the License.

use std::convert::AsRef;
use std::fmt::Debug;
use std::iter::FromIterator;
use std::ptr::NonNull;
use std::sync::Arc;
use std::{convert::AsRef, usize};

use crate::alloc::{Allocation, Deallocation};
use crate::util::bit_chunk_iterator::{BitChunks, UnalignedBitChunk};
Expand Down Expand Up @@ -136,7 +136,10 @@ impl Buffer {

/// Returns the byte slice stored in this buffer
pub fn as_slice(&self) -> &[u8] {
&self.data[self.offset..(self.offset + self.length)]
let range = self.offset..(self.offset + self.length);
// Safety:
// Range should be valid
unsafe { self.data.get_unchecked(range) }
}

/// Returns a new [Buffer] that is a slice of this buffer starting at `offset`.
Expand All @@ -145,9 +148,12 @@ impl Buffer {
/// Panics iff `offset` is larger than `len`.
pub fn slice(&self, offset: usize) -> Self {
assert!(
offset <= self.len(),
offset <= self.length,
"the offset of the new Buffer cannot exceed the existing length"
);
// This cannot overflow as
// `self.offset + self.length < self.data.len()`
// `offset < self.length`
Self {
data: self.data.clone(),
offset: self.offset + offset,
Expand All @@ -162,7 +168,7 @@ impl Buffer {
/// Panics iff `(offset + length)` is larger than the existing length.
pub fn slice_with_length(&self, offset: usize, length: usize) -> Self {
assert!(
offset + length <= self.len(),
offset.saturating_add(length) <= self.length,
"the offset of the new Buffer cannot exceed the existing length"
);
Self {
Expand Down Expand Up @@ -262,7 +268,7 @@ impl<T: AsRef<[u8]>> From<T> for Buffer {
}

/// Creating a `Buffer` instance by storing the boolean values into the buffer
impl std::iter::FromIterator<bool> for Buffer {
impl FromIterator<bool> for Buffer {
fn from_iter<I>(iter: I) -> Self
where
I: IntoIterator<Item = bool>,
Expand Down Expand Up @@ -321,10 +327,10 @@ impl Buffer {
pub unsafe fn try_from_trusted_len_iter<
E,
T: ArrowNativeType,
I: Iterator<Item = std::result::Result<T, E>>,
I: Iterator<Item = Result<T, E>>,
>(
iterator: I,
) -> std::result::Result<Self, E> {
) -> Result<Self, E> {
Ok(MutableBuffer::try_from_trusted_len_iter(iterator)?.into())
}
}
Expand Down Expand Up @@ -600,4 +606,11 @@ mod tests {
let slice = buffer.typed_data::<i32>();
assert_eq!(slice, &[2, 3, 4, 5]);
}

#[test]
#[should_panic(expected = "the offset of the new Buffer cannot exceed the existing length")]
fn slice_overflow() {
let buffer = Buffer::from(MutableBuffer::from_len_zeroed(12));
buffer.slice_with_length(2, usize::MAX);
}
}
Loading

0 comments on commit 6264343

Please sign in to comment.