Skip to content

Commit

Permalink
feat: allow array_slice to take an optional stride parameter (apach…
Browse files Browse the repository at this point in the history
…e#10469)

* feat: allow array_slice to take an optional stride parameter

* Use ScalarUDF::call

* Use create_function and add test

* format

* fix cargo doc
  • Loading branch information
jonahgao committed May 13, 2024
1 parent 5b74c2d commit 18fc376
Show file tree
Hide file tree
Showing 23 changed files with 74 additions and 67 deletions.
6 changes: 3 additions & 3 deletions datafusion/functions-array/src/array_has.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,19 +34,19 @@ use std::any::Any;
use std::sync::Arc;

// Create static instances of ScalarUDFs for each function
make_udf_function!(ArrayHas,
make_udf_expr_and_func!(ArrayHas,
array_has,
first_array second_array, // arg name
"returns true, if the element appears in the first array, otherwise false.", // doc
array_has_udf // internal function name
);
make_udf_function!(ArrayHasAll,
make_udf_expr_and_func!(ArrayHasAll,
array_has_all,
first_array second_array, // arg name
"returns true if each element of the second array appears in the first array; otherwise, it returns false.", // doc
array_has_all_udf // internal function name
);
make_udf_function!(ArrayHasAny,
make_udf_expr_and_func!(ArrayHasAny,
array_has_any,
first_array second_array, // arg name
"returns true if at least one element of the second array appears in the first array; otherwise, it returns false.", // doc
Expand Down
2 changes: 1 addition & 1 deletion datafusion/functions-array/src/cardinality.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ use datafusion_expr::{ColumnarValue, Expr, ScalarUDFImpl, Signature, Volatility}
use std::any::Any;
use std::sync::Arc;

make_udf_function!(
make_udf_expr_and_func!(
Cardinality,
cardinality,
array,
Expand Down
6 changes: 3 additions & 3 deletions datafusion/functions-array/src/concat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ use datafusion_expr::{

use crate::utils::{align_array_dimensions, check_datatypes, make_scalar_function};

make_udf_function!(
make_udf_expr_and_func!(
ArrayAppend,
array_append,
array element, // arg name
Expand Down Expand Up @@ -96,7 +96,7 @@ impl ScalarUDFImpl for ArrayAppend {
}
}

make_udf_function!(
make_udf_expr_and_func!(
ArrayPrepend,
array_prepend,
element array,
Expand Down Expand Up @@ -156,7 +156,7 @@ impl ScalarUDFImpl for ArrayPrepend {
}
}

make_udf_function!(
make_udf_expr_and_func!(
ArrayConcat,
array_concat,
"Concatenates arrays.",
Expand Down
4 changes: 2 additions & 2 deletions datafusion/functions-array/src/dimension.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ use datafusion_expr::expr::ScalarFunction;
use datafusion_expr::{ColumnarValue, Expr, ScalarUDFImpl, Signature, Volatility};
use std::sync::Arc;

make_udf_function!(
make_udf_expr_and_func!(
ArrayDims,
array_dims,
array,
Expand Down Expand Up @@ -88,7 +88,7 @@ impl ScalarUDFImpl for ArrayDims {
}
}

make_udf_function!(
make_udf_expr_and_func!(
ArrayNdims,
array_ndims,
array,
Expand Down
2 changes: 1 addition & 1 deletion datafusion/functions-array/src/empty.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ use datafusion_expr::{ColumnarValue, Expr, ScalarUDFImpl, Signature, Volatility}
use std::any::Any;
use std::sync::Arc;

make_udf_function!(
make_udf_expr_and_func!(
ArrayEmpty,
array_empty,
array,
Expand Down
2 changes: 1 addition & 1 deletion datafusion/functions-array/src/except.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ use std::any::Any;
use std::collections::HashSet;
use std::sync::Arc;

make_udf_function!(
make_udf_expr_and_func!(
ArrayExcept,
array_except,
first_array second_array,
Expand Down
23 changes: 13 additions & 10 deletions datafusion/functions-array/src/extract.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,31 +44,25 @@ use std::sync::Arc;
use crate::utils::make_scalar_function;

// Create static instances of ScalarUDFs for each function
make_udf_function!(
make_udf_expr_and_func!(
ArrayElement,
array_element,
array element,
"extracts the element with the index n from the array.",
array_element_udf
);

make_udf_function!(
ArraySlice,
array_slice,
array begin end stride,
"returns a slice of the array.",
array_slice_udf
);
create_func!(ArraySlice, array_slice_udf);

make_udf_function!(
make_udf_expr_and_func!(
ArrayPopFront,
array_pop_front,
array,
"returns the array without the first element.",
array_pop_front_udf
);

make_udf_function!(
make_udf_expr_and_func!(
ArrayPopBack,
array_pop_back,
array,
Expand Down Expand Up @@ -224,6 +218,15 @@ where
Ok(arrow::array::make_array(data))
}

#[doc = "returns a slice of the array."]
pub fn array_slice(array: Expr, begin: Expr, end: Expr, stride: Option<Expr>) -> Expr {
let args = match stride {
Some(stride) => vec![array, begin, end, stride],
None => vec![array, begin, end],
};
array_slice_udf().call(args)
}

#[derive(Debug)]
pub(super) struct ArraySlice {
signature: Signature,
Expand Down
2 changes: 1 addition & 1 deletion datafusion/functions-array/src/flatten.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ use datafusion_expr::{ColumnarValue, Expr, ScalarUDFImpl, Signature, Volatility}
use std::any::Any;
use std::sync::Arc;

make_udf_function!(
make_udf_expr_and_func!(
Flatten,
flatten,
array,
Expand Down
2 changes: 1 addition & 1 deletion datafusion/functions-array/src/length.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ use datafusion_expr::{ColumnarValue, Expr, ScalarUDFImpl, Signature, Volatility}
use std::any::Any;
use std::sync::Arc;

make_udf_function!(
make_udf_expr_and_func!(
ArrayLength,
array_length,
array,
Expand Down
44 changes: 21 additions & 23 deletions datafusion/functions-array/src/macros.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@
///
/// 1. Single `ScalarUDF` instance
///
/// Creates a singleton `ScalarUDF` of the `$UDF` function named `$GNAME` and a
/// function named `$NAME` which returns that function named $NAME.
/// Creates a singleton `ScalarUDF` of the `$UDF` function named `STATIC_$(UDF)` and a
/// function named `$SCALAR_UDF_FUNC` which returns that function named `STATIC_$(UDF)`.
///
/// This is used to ensure creating the list of `ScalarUDF` only happens once.
///
Expand All @@ -41,10 +41,9 @@
/// * `arg`: 0 or more named arguments for the function
/// * `DOC`: documentation string for the function
/// * `SCALAR_UDF_FUNC`: name of the function to create (just) the `ScalarUDF`
/// * `GNAME`: name for the single static instance of the `ScalarUDF`
///
/// [`ScalarUDFImpl`]: datafusion_expr::ScalarUDFImpl
macro_rules! make_udf_function {
macro_rules! make_udf_expr_and_func {
($UDF:ty, $EXPR_FN:ident, $($arg:ident)*, $DOC:expr , $SCALAR_UDF_FN:ident) => {
paste::paste! {
// "fluent expr_fn" style function
Expand All @@ -55,25 +54,7 @@ macro_rules! make_udf_function {
vec![$($arg),*],
))
}

/// Singleton instance of [`$UDF`], ensures the UDF is only created once
/// named STATIC_$(UDF). For example `STATIC_ArrayToString`
#[allow(non_upper_case_globals)]
static [< STATIC_ $UDF >]: std::sync::OnceLock<std::sync::Arc<datafusion_expr::ScalarUDF>> =
std::sync::OnceLock::new();

/// ScalarFunction that returns a [`ScalarUDF`] for [`$UDF`]
///
/// [`ScalarUDF`]: datafusion_expr::ScalarUDF
pub fn $SCALAR_UDF_FN() -> std::sync::Arc<datafusion_expr::ScalarUDF> {
[< STATIC_ $UDF >]
.get_or_init(|| {
std::sync::Arc::new(datafusion_expr::ScalarUDF::new_from_impl(
<$UDF>::new(),
))
})
.clone()
}
create_func!($UDF, $SCALAR_UDF_FN);
}
};
($UDF:ty, $EXPR_FN:ident, $DOC:expr , $SCALAR_UDF_FN:ident) => {
Expand All @@ -86,7 +67,24 @@ macro_rules! make_udf_function {
arg,
))
}
create_func!($UDF, $SCALAR_UDF_FN);
}
};
}

/// Creates a singleton `ScalarUDF` of the `$UDF` function named `STATIC_$(UDF)` and a
/// function named `$SCALAR_UDF_FUNC` which returns that function named `STATIC_$(UDF)`.
///
/// This is used to ensure creating the list of `ScalarUDF` only happens once.
///
/// # Arguments
/// * `UDF`: name of the [`ScalarUDFImpl`]
/// * `SCALAR_UDF_FUNC`: name of the function to create (just) the `ScalarUDF`
///
/// [`ScalarUDFImpl`]: datafusion_expr::ScalarUDFImpl
macro_rules! create_func {
($UDF:ty, $SCALAR_UDF_FN:ident) => {
paste::paste! {
/// Singleton instance of [`$UDF`], ensures the UDF is only created once
/// named STATIC_$(UDF). For example `STATIC_ArrayToString`
#[allow(non_upper_case_globals)]
Expand Down
2 changes: 1 addition & 1 deletion datafusion/functions-array/src/make_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ use datafusion_expr::{Expr, TypeSignature};

use crate::utils::make_scalar_function;

make_udf_function!(
make_udf_expr_and_func!(
MakeArray,
make_array,
"Returns an Arrow array using the specified input expressions.",
Expand Down
4 changes: 2 additions & 2 deletions datafusion/functions-array/src/position.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ use itertools::Itertools;

use crate::utils::{compare_element_to_list, make_scalar_function};

make_udf_function!(
make_udf_expr_and_func!(
ArrayPosition,
array_position,
array element index,
Expand Down Expand Up @@ -168,7 +168,7 @@ fn generic_position<OffsetSize: OffsetSizeTrait>(
Ok(Arc::new(UInt64Array::from(data)))
}

make_udf_function!(
make_udf_expr_and_func!(
ArrayPositions,
array_positions,
array element, // arg name
Expand Down
4 changes: 2 additions & 2 deletions datafusion/functions-array/src/range.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ use datafusion_expr::{
use std::any::Any;
use std::sync::Arc;

make_udf_function!(
make_udf_expr_and_func!(
Range,
range,
start stop step,
Expand Down Expand Up @@ -106,7 +106,7 @@ impl ScalarUDFImpl for Range {
}
}

make_udf_function!(
make_udf_expr_and_func!(
GenSeries,
gen_series,
start stop step,
Expand Down
6 changes: 3 additions & 3 deletions datafusion/functions-array/src/remove.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ use datafusion_expr::{ColumnarValue, Expr, ScalarUDFImpl, Signature, Volatility}
use std::any::Any;
use std::sync::Arc;

make_udf_function!(
make_udf_expr_and_func!(
ArrayRemove,
array_remove,
array element,
Expand Down Expand Up @@ -81,7 +81,7 @@ impl ScalarUDFImpl for ArrayRemove {
}
}

make_udf_function!(
make_udf_expr_and_func!(
ArrayRemoveN,
array_remove_n,
array element max,
Expand Down Expand Up @@ -130,7 +130,7 @@ impl ScalarUDFImpl for ArrayRemoveN {
}
}

make_udf_function!(
make_udf_expr_and_func!(
ArrayRemoveAll,
array_remove_all,
array element,
Expand Down
2 changes: 1 addition & 1 deletion datafusion/functions-array/src/repeat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ use datafusion_expr::{ColumnarValue, Expr, ScalarUDFImpl, Signature, Volatility}
use std::any::Any;
use std::sync::Arc;

make_udf_function!(
make_udf_expr_and_func!(
ArrayRepeat,
array_repeat,
element count, // arg name
Expand Down
6 changes: 3 additions & 3 deletions datafusion/functions-array/src/replace.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,19 +38,19 @@ use std::any::Any;
use std::sync::Arc;

// Create static instances of ScalarUDFs for each function
make_udf_function!(ArrayReplace,
make_udf_expr_and_func!(ArrayReplace,
array_replace,
array from to,
"replaces the first occurrence of the specified element with another specified element.",
array_replace_udf
);
make_udf_function!(ArrayReplaceN,
make_udf_expr_and_func!(ArrayReplaceN,
array_replace_n,
array from to max,
"replaces the first `max` occurrences of the specified element with another specified element.",
array_replace_n_udf
);
make_udf_function!(ArrayReplaceAll,
make_udf_expr_and_func!(ArrayReplaceAll,
array_replace_all,
array from to,
"replaces all occurrences of the specified element with another specified element.",
Expand Down
2 changes: 1 addition & 1 deletion datafusion/functions-array/src/resize.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ use datafusion_expr::{ColumnarValue, Expr, ScalarUDFImpl, Signature, Volatility}
use std::any::Any;
use std::sync::Arc;

make_udf_function!(
make_udf_expr_and_func!(
ArrayResize,
array_resize,
array size value,
Expand Down
2 changes: 1 addition & 1 deletion datafusion/functions-array/src/reverse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ use datafusion_expr::{ColumnarValue, Expr, ScalarUDFImpl, Signature, Volatility}
use std::any::Any;
use std::sync::Arc;

make_udf_function!(
make_udf_expr_and_func!(
ArrayReverse,
array_reverse,
array,
Expand Down
2 changes: 1 addition & 1 deletion datafusion/functions-array/src/rewrite.rs
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ impl FunctionRewrite for ArrayFunctionRewriter {
stop,
stride,
},
}) => Transformed::yes(array_slice(*expr, *start, *stop, *stride)),
}) => Transformed::yes(array_slice(*expr, *start, *stop, Some(*stride))),

_ => Transformed::no(expr),
};
Expand Down
6 changes: 3 additions & 3 deletions datafusion/functions-array/src/set_ops.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,23 +37,23 @@ use std::fmt::{Display, Formatter};
use std::sync::Arc;

// Create static instances of ScalarUDFs for each function
make_udf_function!(
make_udf_expr_and_func!(
ArrayUnion,
array_union,
array1 array2,
"returns an array of the elements in the union of array1 and array2 without duplicates.",
array_union_udf
);

make_udf_function!(
make_udf_expr_and_func!(
ArrayIntersect,
array_intersect,
first_array second_array,
"returns an array of the elements in the intersection of array1 and array2.",
array_intersect_udf
);

make_udf_function!(
make_udf_expr_and_func!(
ArrayDistinct,
array_distinct,
array,
Expand Down
Loading

0 comments on commit 18fc376

Please sign in to comment.