Skip to content

Commit

Permalink
feat: new SharedVec for zero-copy slices
Browse files Browse the repository at this point in the history
Implements a new SharedVec type that allows for owned, zero-copy
slicing of a collection.

I replaced usage of Vec<Buffer> inside of ArrayView with the SharedVec.
  • Loading branch information
a10y committed Sep 20, 2024
1 parent 14a5252 commit 7efdbf7
Show file tree
Hide file tree
Showing 3 changed files with 131 additions and 4 deletions.
123 changes: 123 additions & 0 deletions vortex-array/src/arc_slice.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
use std::fmt;
use std::fmt::{Debug, Formatter};
use std::sync::Arc;

/// SharedVec provides shared access to a collection, along with the ability to create owned
/// slices of the collection with zero copying.
pub struct SharedVec<T> {
data: Arc<[T]>,
start: usize,
len: usize,
}

impl<T> std::ops::Deref for SharedVec<T> {
type Target = [T];

fn deref(&self) -> &Self::Target {
// SAFETY: the pointer only points at memory contained within owned `data`.
unsafe { std::slice::from_raw_parts(self.data.as_ptr().add(self.start), self.len) }
}
}

impl<T: Debug> Debug for SharedVec<T> {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
f.debug_struct("SharedVec")
.field("start", &self.start)
.field("len", &self.len)
.field("data", &self.data)
.finish()
}
}

impl<T: Clone> Clone for SharedVec<T> {
fn clone(&self) -> Self {
Self {
data: self.data.clone(),
start: self.start,
len: self.len,
}
}
}

impl<T> From<Arc<[T]>> for SharedVec<T> {
fn from(value: Arc<[T]>) -> Self {
Self {
len: value.len(),
start: 0,
data: value,
}
}
}

impl<T> From<Vec<T>> for SharedVec<T> {
fn from(value: Vec<T>) -> Self {
// moves the data from the Vec into a new owned slice.
let data: Arc<[T]> = Arc::from(value);

SharedVec::from(data)
}
}

impl<T> SharedVec<T> {
/// Create a new slice of the given vec, without copying or allocation.
pub fn slice(&self, start: usize, end: usize) -> Self {
assert!(end <= self.len, "cannot slice beyond end of SharedVec");

Self {
data: self.data.clone(),
start: self.start + start,
len: end - start,
}
}
}

#[cfg(test)]
mod test {
use std::sync::Arc;

use crate::arc_slice::SharedVec;

#[test]
fn test_simple() {
let data = vec!["alice".to_string(), "bob".to_string(), "carol".to_string()];
let data: Arc<[String]> = data.into();
let shared_vec: SharedVec<String> = data.into();

// We get iter() for free via the Deref to slice!
assert_eq!(
shared_vec.iter().collect::<Vec<_>>(),
vec!["alice", "bob", "carol"],
);
}

#[test]
fn test_slicing() {
let data = vec!["alice".to_string(), "bob".to_string(), "carol".to_string()];
let data: Arc<[String]> = data.into();
let shared_vec: SharedVec<String> = data.into();

// Original array
assert_eq!(shared_vec.len(), 3);

// Sliced once
let sliced_vec = shared_vec.slice(1, 3);
assert_eq!(sliced_vec.len(), 2);
assert_eq!(sliced_vec.iter().collect::<Vec<_>>(), vec!["bob", "carol"]);

// Sliced again
let sliced_again = sliced_vec.slice(1, 2);
assert_eq!(sliced_again.len(), 1);
assert_eq!(sliced_again.iter().collect::<Vec<_>>(), vec!["carol"]);
}

#[test]
fn test_deref() {
let data = vec!["alice".to_string(), "bob".to_string(), "carol".to_string()];
let data: Arc<[String]> = data.into();
let shared_vec: SharedVec<String> = data.into();

assert_eq!(&shared_vec[0], "alice");
assert_eq!(&shared_vec[1], "bob");
assert_eq!(&shared_vec[2], "carol");
}
}
1 change: 1 addition & 0 deletions vortex-array/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ use crate::variants::ArrayVariants;
use crate::visitor::{AcceptArrayVisitor, ArrayVisitor};

pub mod accessor;
mod arc_slice;
pub mod array;
pub mod arrow;
mod canonical;
Expand Down
11 changes: 7 additions & 4 deletions vortex-array/src/view.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ use vortex_dtype::{DType, Nullability};
use vortex_error::{vortex_bail, vortex_err, VortexError, VortexExpect as _, VortexResult};
use vortex_scalar::{PValue, Scalar, ScalarValue};

use crate::arc_slice::SharedVec;
use crate::encoding::EncodingRef;
use crate::stats::{Stat, Statistics, StatsSet};
use crate::visitor::ArrayVisitor;
Expand All @@ -21,8 +22,7 @@ pub struct ArrayView {
len: usize,
flatbuffer: Buffer,
flatbuffer_loc: usize,
// TODO(ngates): create an RC'd vector that can be lazily sliced.
buffers: Vec<Buffer>,
buffers: SharedVec<Buffer>,
ctx: Arc<Context>,
// TODO(ngates): a store a Projection. A projected ArrayView contains the full fb::Array
// metadata, but only the buffers from the selected columns. Therefore we need to know
Expand Down Expand Up @@ -66,13 +66,14 @@ impl ArrayView {
Self::cumulative_nbuffers(array)
)
}

let view = Self {
encoding,
dtype,
len,
flatbuffer,
flatbuffer_loc,
buffers,
buffers: SharedVec::from(buffers),
ctx,
};

Expand Down Expand Up @@ -136,7 +137,9 @@ impl ArrayView {
len,
flatbuffer: self.flatbuffer.clone(),
flatbuffer_loc,
buffers: self.buffers[buffer_offset..][0..buffer_count].to_vec(),
buffers: self
.buffers
.slice(buffer_offset, buffer_offset + buffer_count),
ctx: self.ctx.clone(),
})
}
Expand Down

0 comments on commit 7efdbf7

Please sign in to comment.