From dbaf477b5cf8c3369d1e1b2e116eb666eec9dc38 Mon Sep 17 00:00:00 2001 From: Robert Kruszewski Date: Tue, 10 Sep 2024 16:54:02 +0100 Subject: [PATCH] Fix TakeFn on sliced Bitpacked array (#775) --- .../fastlanes/src/bitpacking/compute/take.rs | 34 +++++++++++++------ encodings/runend/src/compute.rs | 8 ++--- fuzz/fuzz_targets/fuzz_target_1.rs | 27 +++++++++------ 3 files changed, 45 insertions(+), 24 deletions(-) diff --git a/encodings/fastlanes/src/bitpacking/compute/take.rs b/encodings/fastlanes/src/bitpacking/compute/take.rs index e5b74afa7..8bf168a0b 100644 --- a/encodings/fastlanes/src/bitpacking/compute/take.rs +++ b/encodings/fastlanes/src/bitpacking/compute/take.rs @@ -35,9 +35,10 @@ fn take_primitive( indices .maybe_null_slice::<$P>() .iter() - .chunk_by(|idx| (**idx / 1024) as usize) + .map(|i| *i as usize + array.offset()) + .chunk_by(|idx| idx / 1024) .into_iter() - .map(|(k, g)| (k, g.map(|idx| (*idx % 1024) as u16).collect())) + .map(|(k, g)| (k, g.map(|idx| (idx % 1024) as u16).collect())) .collect() }); @@ -133,10 +134,10 @@ mod test { use itertools::Itertools; use rand::distributions::Uniform; use rand::{thread_rng, Rng}; - use vortex::array::{Primitive, PrimitiveArray, SparseArray}; - use vortex::compute::take; + use vortex::array::{PrimitiveArray, SparseArray}; use vortex::compute::unary::scalar_at; - use vortex::{ArrayDef, IntoArray, IntoArrayVariant}; + use vortex::compute::{slice, take}; + use vortex::{IntoArray, IntoArrayVariant}; use crate::BitPackedArray; @@ -146,17 +147,30 @@ mod test { // Create a u8 array modulo 63. let unpacked = PrimitiveArray::from((0..4096).map(|i| (i % 63) as u8).collect::>()); - let bitpacked = BitPackedArray::encode(unpacked.array(), 6).unwrap(); - let result = take(bitpacked.array(), &indices).unwrap(); - assert_eq!(result.encoding().id(), Primitive::ID); - - let primitive_result = result.into_primitive().unwrap(); + let primitive_result = take(bitpacked.array(), &indices) + .unwrap() + .into_primitive() + .unwrap(); let res_bytes = primitive_result.maybe_null_slice::(); assert_eq!(res_bytes, &[0, 62, 31, 33, 9, 18]); } + #[test] + fn take_sliced_indices() { + let indices = PrimitiveArray::from(vec![1919, 1921]).into_array(); + + // Create a u8 array modulo 63. + let unpacked = PrimitiveArray::from((0..4096).map(|i| (i % 63) as u8).collect::>()); + let bitpacked = BitPackedArray::encode(unpacked.array(), 6).unwrap(); + let sliced = slice(bitpacked.array(), 128, 2050).unwrap(); + + let primitive_result = take(&sliced, &indices).unwrap().into_primitive().unwrap(); + let res_bytes = primitive_result.maybe_null_slice::(); + assert_eq!(res_bytes, &[31, 33]); + } + #[test] #[cfg_attr(miri, ignore)] // This test is too slow on miri fn take_random_indices() { diff --git a/encodings/runend/src/compute.rs b/encodings/runend/src/compute.rs index 9385aede5..36885bca4 100644 --- a/encodings/runend/src/compute.rs +++ b/encodings/runend/src/compute.rs @@ -64,21 +64,21 @@ impl TakeFn for RunEndArray { } Validity::Array(original_validity) => { let dense_validity = take(&original_validity, indices)?; + let filtered_values = filter(&dense_values, &dense_validity)?; + let length = dense_validity.len(); let dense_nonnull_indices = PrimitiveArray::from( dense_validity - .clone() .into_bool()? .boolean_buffer() .set_indices() .map(|idx| idx as u64) - .collect::>(), + .collect::>(), ) .into_array(); - let length = dense_validity.len(); SparseArray::try_new( dense_nonnull_indices, - filter(&dense_values, &dense_validity)?, + filtered_values, length, Scalar::null(self.dtype().clone()), )? diff --git a/fuzz/fuzz_targets/fuzz_target_1.rs b/fuzz/fuzz_targets/fuzz_target_1.rs index 64904471a..b0ea87e27 100644 --- a/fuzz/fuzz_targets/fuzz_target_1.rs +++ b/fuzz/fuzz_targets/fuzz_target_1.rs @@ -15,12 +15,12 @@ use vortex_scalar::{PValue, Scalar, ScalarValue}; fuzz_target!(|fuzz_action: FuzzArrayAction| -> Corpus { let FuzzArrayAction { array, actions } = fuzz_action; let mut current_array = array.clone(); - for (action, expected) in actions { + for (i, (action, expected)) in actions.into_iter().enumerate() { match action { Action::Compress(c) => { match fuzz_compress(¤t_array.into_canonical().unwrap().into(), &c) { Some(compressed_array) => { - assert_array_eq(&expected.array(), &compressed_array); + assert_array_eq(&expected.array(), &compressed_array, i); current_array = compressed_array; } None => return Corpus::Reject, @@ -28,14 +28,14 @@ fuzz_target!(|fuzz_action: FuzzArrayAction| -> Corpus { } Action::Slice(range) => { current_array = slice(¤t_array, range.start, range.end).unwrap(); - assert_array_eq(&expected.array(), ¤t_array); + assert_array_eq(&expected.array(), ¤t_array, i); } Action::Take(indices) => { if indices.is_empty() { return Corpus::Reject; } current_array = take(¤t_array, &indices).unwrap(); - assert_array_eq(&expected.array(), ¤t_array); + assert_array_eq(&expected.array(), ¤t_array, i); } Action::SearchSorted(s, side) => { // TODO(robert): Ideally we'd preserve the encoding perfectly but this is close enough @@ -51,7 +51,7 @@ fuzz_target!(|fuzz_action: FuzzArrayAction| -> Corpus { sorted = fuzz_compress(&sorted, &SamplingCompressor::default()).unwrap_or(sorted); } - assert_search_sorted(sorted, s, side, expected.search()) + assert_search_sorted(sorted, s, side, expected.search(), i) } } } @@ -67,23 +67,29 @@ fn fuzz_compress(array: &Array, compressor: &SamplingCompressor) -> Option