Skip to content

Commit

Permalink
Allow exposing dataptr only on integer and real
Browse files Browse the repository at this point in the history
  • Loading branch information
yutannihilation committed May 1, 2024
1 parent 07bf3b1 commit 2c73b36
Show file tree
Hide file tree
Showing 4 changed files with 45 additions and 67 deletions.
37 changes: 19 additions & 18 deletions src/altrep/altinteger.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,10 @@ pub trait AltInteger: Sized + IntoExtPtrSexp {
/// Package name to identify the ALTREP class.
const PACKAGE_NAME: &'static str;

/// If `true` (default), cache the SEXP with all the values copied from the
/// underlying data. If `false`, R always access to the underlying data.
const CACHE_MATERIALIZED_SEXP: bool = true;
/// If `true`, all R operations are done directly on the pointer to the
/// underlying data. In this case, the `dataptr` method must be implemented.

const EXPOSE_DATAPTR: bool = true;

/// Return the length of the data.
fn length(&mut self) -> usize;
Expand All @@ -34,8 +35,7 @@ pub trait AltInteger: Sized + IntoExtPtrSexp {
/// out-of-bound check, so you don't need to implement it here.
fn elt(&mut self, i: usize) -> i32;

/// Returns the pointer to the underlying data. This must be implemented
/// when `CACHE_MATERIALIZED_SEXP` is `true``.
/// Returns the pointer to the underlying data.
fn dataptr(&mut self) -> Option<*mut i32> {
None
}
Expand Down Expand Up @@ -63,7 +63,7 @@ pub trait AltInteger: Sized + IntoExtPtrSexp {

/// Converts the struct into an ALTREP object
fn into_altrep(self) -> crate::Result<SEXP> {
super::create_altrep_instance(self, Self::CLASS_NAME, Self::CACHE_MATERIALIZED_SEXP)
super::create_altrep_instance(self, Self::CLASS_NAME, Self::EXPOSE_DATAPTR)
}

/// Extracts the reference (`&T`) of the underlying data
Expand Down Expand Up @@ -98,9 +98,13 @@ pub fn register_altinteger_class<T: AltInteger>(
#[allow(clippy::mut_from_ref)]
#[inline]
fn materialize<T: AltInteger>(x: &mut SEXP) -> SEXP {
let data = unsafe { R_altrep_data2(*x) };
if unsafe { data != R_NilValue } {
return data;
// If the strategy is to use cache the materialized SEXP, use it when
// available.
if T::EXPOSE_DATAPTR {
let data = unsafe { R_altrep_data2(*x) };
if unsafe { data != R_NilValue } {
return data;
}
}

let self_: &mut T = match super::extract_mut_from_altrep(x) {
Expand All @@ -109,20 +113,17 @@ pub fn register_altinteger_class<T: AltInteger>(
};

let len = self_.length();
let new = crate::alloc_vector(INTSXP, len).unwrap();

let new = crate::alloc_vector(INTSXP, len).unwrap();
unsafe { Rf_protect(new) };

let dst = unsafe { std::slice::from_raw_parts_mut(INTEGER(new), len) };

self_.copy_to(dst, 0);
self_.copy_to(
unsafe { std::slice::from_raw_parts_mut(INTEGER(new), len) },
0,
);

if T::CACHE_MATERIALIZED_SEXP {
if T::EXPOSE_DATAPTR {
// Cache the materialized data in data2.
//
// Note that, for example arrow stores it in `CAR()` of data2, but this
// implementation naively uses data2. Probably that should be clever
// because data2 can be used for other purposes.
unsafe { R_set_altrep_data2(*x, new) };
}

Expand Down
26 changes: 7 additions & 19 deletions src/altrep/altlogical.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,23 +23,13 @@ pub trait AltLogical: Sized + IntoExtPtrSexp {
/// Package name to identify the ALTREP class.
const PACKAGE_NAME: &'static str;

/// If `true` (default), cache the SEXP with all the values copied from the
/// underlying data. If `false`, R always access to the underlying data.
const CACHE_MATERIALIZED_SEXP: bool = true;

/// Return the length of the data.
fn length(&mut self) -> usize;

/// Returns the value of `i`-th element. Note that, it seems R handles the
/// out-of-bound check, so you don't need to implement it here.
fn elt(&mut self, i: usize) -> bool;

/// Returns the pointer to the underlying data. This must be implemented
/// when `CACHE_MATERIALIZED_SEXP` is `true``.
fn dataptr(&mut self) -> Option<*mut i32> {
None
}

/// Copies the specified range of the data into a new memory. This is used
/// when the ALTREP needs to be materialized.
///
Expand All @@ -63,7 +53,7 @@ pub trait AltLogical: Sized + IntoExtPtrSexp {

/// Converts the struct into an ALTREP object
fn into_altrep(self) -> crate::Result<SEXP> {
super::create_altrep_instance(self, Self::CLASS_NAME, Self::CACHE_MATERIALIZED_SEXP)
super::create_altrep_instance(self, Self::CLASS_NAME, true)
}

/// Extracts the reference (`&T`) of the underlying data
Expand Down Expand Up @@ -98,6 +88,7 @@ pub fn register_altlogical_class<T: AltLogical>(
#[allow(clippy::mut_from_ref)]
#[inline]
fn materialize<T: AltLogical>(x: &mut SEXP) -> SEXP {
// Use the cached one if available
let data = unsafe { R_altrep_data2(*x) };
if unsafe { data != R_NilValue } {
return data;
Expand All @@ -109,19 +100,16 @@ pub fn register_altlogical_class<T: AltLogical>(
};

let len = self_.length();
let new = crate::alloc_vector(LGLSXP, len).unwrap();

let new = crate::alloc_vector(LGLSXP, len).unwrap();
unsafe { Rf_protect(new) };

let dst = unsafe { std::slice::from_raw_parts_mut(LOGICAL(new), len) };

self_.copy_to(dst, 0);
self_.copy_to(
unsafe { std::slice::from_raw_parts_mut(LOGICAL(new), len) },
0,
);

// Cache the materialized data in data2.
//
// Note that, for example arrow stores it in `CAR()` of data2, but this
// implementation naively uses data2. Probably that should be clever
// because data2 can be used for other purposes.
unsafe { R_set_altrep_data2(*x, new) };

// new doesn't need protection because it's used as long as this ALTREP exists.
Expand Down
37 changes: 18 additions & 19 deletions src/altrep/altreal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@ pub trait AltReal: Sized + IntoExtPtrSexp {
/// Package name to identify the ALTREP class.
const PACKAGE_NAME: &'static str;

/// If `true` (default), cache the SEXP with all the values copied from the
/// underlying data. If `false`, R always access to the underlying data.
const CACHE_MATERIALIZED_SEXP: bool = true;
/// If `true`, all R operations are done directly on the pointer to the
/// underlying data. In this case, the `dataptr` method must be implemented.
const EXPOSE_DATAPTR: bool = false;

/// Return the length of the data.
fn length(&mut self) -> usize;
Expand All @@ -33,8 +33,7 @@ pub trait AltReal: Sized + IntoExtPtrSexp {
/// out-of-bound check, so you don't need to implement it here.
fn elt(&mut self, i: usize) -> f64;

/// Returns the pointer to the underlying data. This must be implemented
/// when `CACHE_MATERIALIZED_SEXP` is `true``.
/// Returns the pointer to the underlying data.
fn dataptr(&mut self) -> Option<*mut i32> {
None
}
Expand Down Expand Up @@ -62,7 +61,7 @@ pub trait AltReal: Sized + IntoExtPtrSexp {

/// Converts the struct into an ALTREP object
fn into_altrep(self) -> crate::Result<SEXP> {
super::create_altrep_instance(self, Self::CLASS_NAME, Self::CACHE_MATERIALIZED_SEXP)
super::create_altrep_instance(self, Self::CLASS_NAME, !Self::EXPOSE_DATAPTR)
}

/// Extracts the reference (`&T`) of the underlying data
Expand Down Expand Up @@ -97,9 +96,13 @@ pub fn register_altreal_class<T: AltReal>(
#[allow(clippy::mut_from_ref)]
#[inline]
fn materialize<T: AltReal>(x: &mut SEXP) -> SEXP {
let data = unsafe { R_altrep_data2(*x) };
if unsafe { data != R_NilValue } {
return data;
// If the strategy is to use cache the materialized SEXP, use it when
// available.
if !T::EXPOSE_DATAPTR {
let data = unsafe { R_altrep_data2(*x) };
if unsafe { data != R_NilValue } {
return data;
}
}

let self_: &mut T = match super::extract_mut_from_altrep(x) {
Expand All @@ -108,20 +111,16 @@ pub fn register_altreal_class<T: AltReal>(
};

let len = self_.length();
let new = crate::alloc_vector(REALSXP, len).unwrap();

let new = crate::alloc_vector(REALSXP, len).unwrap();
unsafe { Rf_protect(new) };

let dst = unsafe { std::slice::from_raw_parts_mut(REAL(new), len) };
self_.copy_to(unsafe { std::slice::from_raw_parts_mut(REAL(new), len) }, 0);

self_.copy_to(dst, 0);

// Cache the materialized data in data2.
//
// Note that, for example arrow stores it in `CAR()` of data2, but this
// implementation naively uses data2. Probably that should be clever
// because data2 can be used for other purposes.
unsafe { R_set_altrep_data2(*x, new) };
if !T::EXPOSE_DATAPTR {
// Cache the materialized data in data2.
unsafe { R_set_altrep_data2(*x, new) };
}

// new doesn't need protection because it's used as long as this ALTREP exists.
unsafe { Rf_unprotect(1) };
Expand Down
12 changes: 1 addition & 11 deletions src/altrep/altstring.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,26 +24,16 @@ pub trait AltString: Sized + IntoExtPtrSexp {
/// Package name to identify the ALTREP class.
const PACKAGE_NAME: &'static str;

/// If `true` (default), cache the SEXP with all the values copied from the
/// underlying data. If `false`, R always access to the underlying data.
const CACHE_MATERIALIZED_SEXP: bool = true;

/// Return the length of the data.
fn length(&mut self) -> usize;

/// Returns the value of `i`-th element. Note that, it seems R handles the
/// out-of-bound check, so you don't need to implement it here.
fn elt(&mut self, i: usize) -> &str;

/// Returns the pointer to the underlying data. This must be implemented
/// when `CACHE_MATERIALIZED_SEXP` is `true``.
fn dataptr(&mut self) -> Option<*mut i32> {
None
}

/// Converts the struct into an ALTREP object
fn into_altrep(self) -> crate::Result<SEXP> {
super::create_altrep_instance(self, Self::CLASS_NAME, Self::CACHE_MATERIALIZED_SEXP)
super::create_altrep_instance(self, Self::CLASS_NAME, true)
}

/// Extracts the reference (`&T`) of the underlying data
Expand Down

0 comments on commit 2c73b36

Please sign in to comment.