From 8fb9dcccbd439c9696e33587c18d29a4b156e5c6 Mon Sep 17 00:00:00 2001 From: tanruixiang Date: Mon, 21 Aug 2023 14:27:06 +0800 Subject: [PATCH] feat: support `array_pop_back` function --- datafusion/expr/src/built_in_function.rs | 6 + datafusion/expr/src/expr_fn.rs | 9 + .../physical-expr/src/array_expressions.rs | 161 ++++++++++++++++++ datafusion/physical-expr/src/functions.rs | 4 +- datafusion/proto/proto/datafusion.proto | 1 + datafusion/proto/src/generated/pbjson.rs | 3 + datafusion/proto/src/generated/prost.rs | 3 + .../proto/src/logical_plan/from_proto.rs | 9 +- datafusion/proto/src/logical_plan/to_proto.rs | 1 + datafusion/sqllogictest/test_files/array.slt | 75 ++++++++ docs/source/user-guide/expressions.md | 1 + .../source/user-guide/sql/scalar_functions.md | 25 +++ 12 files changed, 296 insertions(+), 2 deletions(-) diff --git a/datafusion/expr/src/built_in_function.rs b/datafusion/expr/src/built_in_function.rs index 54ffc312a3bb..7135867b4a03 100644 --- a/datafusion/expr/src/built_in_function.rs +++ b/datafusion/expr/src/built_in_function.rs @@ -134,6 +134,8 @@ pub enum BuiltinScalarFunction { ArrayHasAll, /// array_has_any ArrayHasAny, + /// array_pop_back + ArrayPopBack, /// array_dims ArrayDims, /// array_element @@ -367,6 +369,7 @@ impl BuiltinScalarFunction { BuiltinScalarFunction::ArrayElement => Volatility::Immutable, BuiltinScalarFunction::ArrayLength => Volatility::Immutable, BuiltinScalarFunction::ArrayNdims => Volatility::Immutable, + BuiltinScalarFunction::ArrayPopBack => Volatility::Immutable, BuiltinScalarFunction::ArrayPosition => Volatility::Immutable, BuiltinScalarFunction::ArrayPositions => Volatility::Immutable, BuiltinScalarFunction::ArrayPrepend => Volatility::Immutable, @@ -548,6 +551,7 @@ impl BuiltinScalarFunction { }, BuiltinScalarFunction::ArrayLength => Ok(UInt64), BuiltinScalarFunction::ArrayNdims => Ok(UInt64), + BuiltinScalarFunction::ArrayPopBack => Ok(input_expr_types[0].clone()), BuiltinScalarFunction::ArrayPosition => Ok(UInt64), BuiltinScalarFunction::ArrayPositions => { Ok(List(Arc::new(Field::new("item", UInt64, true)))) @@ -825,6 +829,7 @@ impl BuiltinScalarFunction { // for now, the list is small, as we do not have many built-in functions. match self { BuiltinScalarFunction::ArrayAppend => Signature::any(2, self.volatility()), + BuiltinScalarFunction::ArrayPopBack => Signature::any(1, self.volatility()), BuiltinScalarFunction::ArrayConcat => { Signature::variadic_any(self.volatility()) } @@ -1333,6 +1338,7 @@ fn aliases(func: &BuiltinScalarFunction) -> &'static [&'static str] { } BuiltinScalarFunction::ArrayLength => &["array_length", "list_length"], BuiltinScalarFunction::ArrayNdims => &["array_ndims", "list_ndims"], + BuiltinScalarFunction::ArrayPopBack => &["array_pop_back", "list_pop_back"], BuiltinScalarFunction::ArrayPosition => &[ "array_position", "list_position", diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs index 47692dfefb4a..8c3f24183441 100644 --- a/datafusion/expr/src/expr_fn.rs +++ b/datafusion/expr/src/expr_fn.rs @@ -545,6 +545,14 @@ scalar_expr!( array element, "appends an element to the end of an array." ); + +scalar_expr!( + ArrayPopBack, + array_pop_back, + array, + "returns the array without the last element." +); + nary_scalar_expr!(ArrayConcat, array_concat, "concatenates arrays."); scalar_expr!( ArrayHas, @@ -1081,6 +1089,7 @@ mod test { test_scalar_expr!(FromUnixtime, from_unixtime, unixtime); test_scalar_expr!(ArrayAppend, array_append, array, element); + test_scalar_expr!(ArrayPopBack, array_pop_back, array); test_unary_scalar_expr!(ArrayDims, array_dims); test_scalar_expr!(ArrayLength, array_length, array, dimension); test_unary_scalar_expr!(ArrayNdims, array_ndims); diff --git a/datafusion/physical-expr/src/array_expressions.rs b/datafusion/physical-expr/src/array_expressions.rs index 3136383c31f4..636ed070fec7 100644 --- a/datafusion/physical-expr/src/array_expressions.rs +++ b/datafusion/physical-expr/src/array_expressions.rs @@ -601,6 +601,22 @@ pub fn array_slice(args: &[ArrayRef]) -> Result { define_array_slice(list_array, key, extra_key, false) } +pub fn array_pop_back(args: &[ArrayRef]) -> Result { + let list_array = as_list_array(&args[0])?; + let key = vec![0; list_array.len()]; + let extra_key: Vec<_> = list_array + .iter() + .map(|x| x.map_or(0, |arr| arr.len() as i64 - 1)) + .collect(); + + define_array_slice( + list_array, + &Int64Array::from(key), + &Int64Array::from(extra_key), + false, + ) +} + macro_rules! append { ($ARRAY:expr, $ELEMENT:expr, $ARRAY_TYPE:ident) => {{ let mut offsets: Vec = vec![0]; @@ -1994,6 +2010,151 @@ mod tests { ); } + #[test] + fn test_array_pop_back() { + // array_pop_back([1, 2, 3, 4]) = [1, 2, 3] + let list_array = return_array().into_array(1); + let arr = array_pop_back(&[list_array]) + .expect("failed to initialize function array_pop_back"); + let result = + as_list_array(&arr).expect("failed to initialize function array_pop_back"); + assert_eq!( + &[1, 2, 3], + result + .value(0) + .as_any() + .downcast_ref::() + .unwrap() + .values() + ); + + // array_pop_back([1, 2, 3]) = [1, 2] + let list_array = Arc::new(result.clone()); + let arr = array_pop_back(&[list_array]) + .expect("failed to initialize function array_pop_back"); + let result = + as_list_array(&arr).expect("failed to initialize function array_pop_back"); + assert_eq!( + &[1, 2], + result + .value(0) + .as_any() + .downcast_ref::() + .unwrap() + .values() + ); + + // array_pop_back([1, 2]) = [1] + let list_array = Arc::new(result.clone()); + let arr = array_pop_back(&[list_array]) + .expect("failed to initialize function array_pop_back"); + let result = + as_list_array(&arr).expect("failed to initialize function array_pop_back"); + assert_eq!( + &[1], + result + .value(0) + .as_any() + .downcast_ref::() + .unwrap() + .values() + ); + + // array_pop_back([1]) = [] + let list_array = Arc::new(result.clone()); + let arr = array_pop_back(&[list_array]) + .expect("failed to initialize function array_pop_back"); + let result = + as_list_array(&arr).expect("failed to initialize function array_pop_back"); + assert_eq!( + &[], + result + .value(0) + .as_any() + .downcast_ref::() + .unwrap() + .values() + ); + // array_pop_back([]) = [] + let list_array = Arc::new(result.clone()); + let arr = array_pop_back(&[list_array]) + .expect("failed to initialize function array_pop_back"); + let result = + as_list_array(&arr).expect("failed to initialize function array_pop_back"); + assert_eq!( + &[], + result + .value(0) + .as_any() + .downcast_ref::() + .unwrap() + .values() + ); + + // array_pop_back([1, NULL, 3, NULL]) = [1, NULL, 3] + let list_array = return_array_with_nulls().into_array(1); + let arr = array_pop_back(&[list_array]) + .expect("failed to initialize function array_pop_back"); + let result = + as_list_array(&arr).expect("failed to initialize function array_pop_back"); + assert_eq!(3, result.values().len()); + assert_eq!( + &[false, true, false], + &[ + result.values().is_null(0), + result.values().is_null(1), + result.values().is_null(2) + ] + ); + } + #[test] + fn test_nested_array_pop_back() { + // array_pop_back([[1, 2, 3, 4], [5, 6, 7, 8]]) = [[1, 2, 3, 4]] + let list_array = return_nested_array().into_array(1); + let arr = array_pop_back(&[list_array]) + .expect("failed to initialize function array_slice"); + let result = + as_list_array(&arr).expect("failed to initialize function array_slice"); + assert_eq!( + &[1, 2, 3, 4], + result + .value(0) + .as_any() + .downcast_ref::() + .unwrap() + .value(0) + .as_any() + .downcast_ref::() + .unwrap() + .values() + ); + + // array_pop_back([[1, 2, 3, 4]]) = [] + let list_array = Arc::new(result.clone()); + let arr = array_pop_back(&[list_array]) + .expect("failed to initialize function array_pop_back"); + let result = + as_list_array(&arr).expect("failed to initialize function array_pop_back"); + assert!(result + .value(0) + .as_any() + .downcast_ref::() + .unwrap() + .is_empty()); + // array_pop_back([]) = [] + let list_array = Arc::new(result.clone()); + let arr = array_pop_back(&[list_array]) + .expect("failed to initialize function array_pop_back"); + let result = + as_list_array(&arr).expect("failed to initialize function array_pop_back"); + assert!(result + .value(0) + .as_any() + .downcast_ref::() + .unwrap() + .is_empty()); + } + #[test] fn test_array_slice() { // array_slice([1, 2, 3, 4], 1, 3) = [1, 2, 3] diff --git a/datafusion/physical-expr/src/functions.rs b/datafusion/physical-expr/src/functions.rs index 82226ecfa1e1..4e969ffc000a 100644 --- a/datafusion/physical-expr/src/functions.rs +++ b/datafusion/physical-expr/src/functions.rs @@ -446,10 +446,12 @@ pub fn create_physical_fun( BuiltinScalarFunction::Flatten => { Arc::new(|args| make_scalar_function(array_expressions::flatten)(args)) } - BuiltinScalarFunction::ArrayNdims => { Arc::new(|args| make_scalar_function(array_expressions::array_ndims)(args)) } + BuiltinScalarFunction::ArrayPopBack => { + Arc::new(|args| make_scalar_function(array_expressions::array_pop_back)(args)) + } BuiltinScalarFunction::ArrayPosition => { Arc::new(|args| make_scalar_function(array_expressions::array_position)(args)) } diff --git a/datafusion/proto/proto/datafusion.proto b/datafusion/proto/proto/datafusion.proto index e4ef7b1bd448..e044837340a8 100644 --- a/datafusion/proto/proto/datafusion.proto +++ b/datafusion/proto/proto/datafusion.proto @@ -597,6 +597,7 @@ enum ScalarFunction { Flatten = 112; Isnan = 113; Iszero = 114; + ArrayPopBack = 115; } message ScalarFunctionNode { diff --git a/datafusion/proto/src/generated/pbjson.rs b/datafusion/proto/src/generated/pbjson.rs index f1a9e9c7bb74..e8d76cf098ae 100644 --- a/datafusion/proto/src/generated/pbjson.rs +++ b/datafusion/proto/src/generated/pbjson.rs @@ -18947,6 +18947,7 @@ impl serde::Serialize for ScalarFunction { Self::Flatten => "Flatten", Self::Isnan => "Isnan", Self::Iszero => "Iszero", + Self::ArrayPopBack => "ArrayPopBack", }; serializer.serialize_str(variant) } @@ -19073,6 +19074,7 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction { "Flatten", "Isnan", "Iszero", + "ArrayPopBack", ]; struct GeneratedVisitor; @@ -19230,6 +19232,7 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction { "Flatten" => Ok(ScalarFunction::Flatten), "Isnan" => Ok(ScalarFunction::Isnan), "Iszero" => Ok(ScalarFunction::Iszero), + "ArrayPopBack" => Ok(ScalarFunction::ArrayPopBack), _ => Err(serde::de::Error::unknown_variant(value, FIELDS)), } } diff --git a/datafusion/proto/src/generated/prost.rs b/datafusion/proto/src/generated/prost.rs index 6cf402fe66e9..31355755b95f 100644 --- a/datafusion/proto/src/generated/prost.rs +++ b/datafusion/proto/src/generated/prost.rs @@ -2377,6 +2377,7 @@ pub enum ScalarFunction { Flatten = 112, Isnan = 113, Iszero = 114, + ArrayPopBack = 115, } impl ScalarFunction { /// String value of the enum field names used in the ProtoBuf definition. @@ -2500,6 +2501,7 @@ impl ScalarFunction { ScalarFunction::Flatten => "Flatten", ScalarFunction::Isnan => "Isnan", ScalarFunction::Iszero => "Iszero", + ScalarFunction::ArrayPopBack => "ArrayPopBack", } } /// Creates an enum from field names used in the ProtoBuf definition. @@ -2620,6 +2622,7 @@ impl ScalarFunction { "Flatten" => Some(Self::Flatten), "Isnan" => Some(Self::Isnan), "Iszero" => Some(Self::Iszero), + "ArrayPopBack" => Some(Self::ArrayPopBack), _ => None, } } diff --git a/datafusion/proto/src/logical_plan/from_proto.rs b/datafusion/proto/src/logical_plan/from_proto.rs index d3329c696764..893bcf64285e 100644 --- a/datafusion/proto/src/logical_plan/from_proto.rs +++ b/datafusion/proto/src/logical_plan/from_proto.rs @@ -34,7 +34,6 @@ use datafusion_common::{ internal_err, Column, DFField, DFSchema, DFSchemaRef, DataFusionError, OwnedTableReference, Result, ScalarValue, }; -use datafusion_expr::expr::{Alias, Placeholder}; use datafusion_expr::{ abs, acos, acosh, array, array_append, array_concat, array_dims, array_element, array_has, array_has_all, array_has_any, array_length, array_ndims, array_position, @@ -59,6 +58,10 @@ use datafusion_expr::{ JoinConstraint, JoinType, Like, Operator, TryCast, WindowFrame, WindowFrameBound, WindowFrameUnits, }; +use datafusion_expr::{ + array_pop_back, + expr::{Alias, Placeholder}, +}; use std::sync::Arc; #[derive(Debug)] @@ -460,6 +463,7 @@ impl From<&protobuf::ScalarFunction> for BuiltinScalarFunction { ScalarFunction::Flatten => Self::Flatten, ScalarFunction::ArrayLength => Self::ArrayLength, ScalarFunction::ArrayNdims => Self::ArrayNdims, + ScalarFunction::ArrayPopBack => Self::ArrayPopBack, ScalarFunction::ArrayPosition => Self::ArrayPosition, ScalarFunction::ArrayPositions => Self::ArrayPositions, ScalarFunction::ArrayPrepend => Self::ArrayPrepend, @@ -1268,6 +1272,9 @@ pub fn parse_expr( parse_expr(&args[0], registry)?, parse_expr(&args[1], registry)?, )), + ScalarFunction::ArrayPopBack => { + Ok(array_pop_back(parse_expr(&args[0], registry)?)) + } ScalarFunction::ArrayPrepend => Ok(array_prepend( parse_expr(&args[0], registry)?, parse_expr(&args[1], registry)?, diff --git a/datafusion/proto/src/logical_plan/to_proto.rs b/datafusion/proto/src/logical_plan/to_proto.rs index cb3296438165..648e530e1215 100644 --- a/datafusion/proto/src/logical_plan/to_proto.rs +++ b/datafusion/proto/src/logical_plan/to_proto.rs @@ -1459,6 +1459,7 @@ impl TryFrom<&BuiltinScalarFunction> for protobuf::ScalarFunction { BuiltinScalarFunction::Flatten => Self::Flatten, BuiltinScalarFunction::ArrayLength => Self::ArrayLength, BuiltinScalarFunction::ArrayNdims => Self::ArrayNdims, + BuiltinScalarFunction::ArrayPopBack => Self::ArrayPopBack, BuiltinScalarFunction::ArrayPosition => Self::ArrayPosition, BuiltinScalarFunction::ArrayPositions => Self::ArrayPositions, BuiltinScalarFunction::ArrayPrepend => Self::ArrayPrepend, diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt index eb949c4f8693..1d5f632bac7e 100644 --- a/datafusion/sqllogictest/test_files/array.slt +++ b/datafusion/sqllogictest/test_files/array.slt @@ -79,6 +79,17 @@ AS VALUES (make_array(51, 52, NULL, 54, 55, 56, 57, 58, 59, 60), 5, NULL) ; +statement ok +CREATE TABLE arrayspop +AS VALUES + (make_array(1, 2, NULL)), + (make_array(3, 4, 5, NULL)), + (make_array(6, 7, 8, NULL, 9)), + (make_array(NULL, NULL, 100)), + (NULL), + (make_array(NULL, 10, 11, 12)) +; + statement ok CREATE TABLE nested_arrays AS VALUES @@ -682,6 +693,67 @@ NULL 23 NULL 43 5 NULL +## array_pop_back (aliases: `list_pop_back`) + +# array_pop_back scalar function #1 +query ?? +select array_pop_back(make_array(1, 2, 3, 4, 5)), array_pop_back(make_array('h', 'e', 'l', 'l', 'o')); +---- +[1, 2, 3, 4] [h, e, l, l] + +# array_pop_back scalar function #2 (after array_pop_back, array is empty) +query ? +select array_pop_back(make_array(1)); +---- +[] + +# array_pop_back scalar function #3 (array_pop_back the empty array) +query ? +select array_pop_back(array_pop_back(make_array(1))); +---- +[] + +# array_pop_back scalar function #4 (array_pop_back the arrays which have NULL) +query ?? +select array_pop_back(make_array(1, 2, 3, 4, NULL)), array_pop_back(make_array(NULL, 'e', 'l', NULL, 'o')); +---- +[1, 2, 3, 4] [, e, l, ] + +# array_pop_back scalar function #5 (array_pop_back the nested arrays) +query ? +select array_pop_back(make_array(make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7, 4), make_array(4, 5, 6))); +---- +[[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4]] + +# array_pop_back scalar function #6 (array_pop_back the nested arrays with NULL) +query ? +select array_pop_back(make_array(make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7, 4), NULL)); +---- +[[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4]] + +# array_pop_back scalar function #7 (array_pop_back the nested arrays with NULL) +query ? +select array_pop_back(make_array(make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), NULL, make_array(1, 7, 4))); +---- +[[1, 2, 3], [2, 9, 1], [7, 8, 9], ] + +# array_pop_back scalar function #8 (after array_pop_back, nested array is empty) +query ? +select array_pop_back(make_array(make_array(1, 2, 3))); +---- +[] + +# array_pop_back with columns +query ? +select array_pop_back(column1) from arrayspop; +---- +[1, 2] +[3, 4, 5] +[6, 7, 8, ] +[, ] +[] +[, 10, 11] + ## array_slice (aliases: list_slice) # array_slice scalar function #1 (with positive indexes) @@ -2380,6 +2452,9 @@ drop table arrays; statement ok drop table slices; +statement ok +drop table arrayspop; + statement ok drop table arrays_values; diff --git a/docs/source/user-guide/expressions.md b/docs/source/user-guide/expressions.md index d8dfa7af5310..a481e525fe14 100644 --- a/docs/source/user-guide/expressions.md +++ b/docs/source/user-guide/expressions.md @@ -194,6 +194,7 @@ Unlike to some databases the math functions in Datafusion works the same way as | flatten(array) | Converts an array of arrays to a flat array `flatten([[1], [2, 3], [4, 5, 6]]) -> [1, 2, 3, 4, 5, 6]` | | array_length(array, dimension) | Returns the length of the array dimension. `array_length([1, 2, 3, 4, 5]) -> 5` | | array_ndims(array) | Returns the number of dimensions of the array. `array_ndims([[1, 2, 3], [4, 5, 6]]) -> 2` | +| array_pop_back(array) | Returns the array without the last element. `array_pop_back([1, 2, 3]) -> [1, 2]` | | array_position(array, element) | Searches for an element in the array, returns first occurrence. `array_position([1, 2, 2, 3, 4], 2) -> 2` | | array_positions(array, element) | Searches for an element in the array, returns all occurrences. `array_positions([1, 2, 2, 3, 4], 2) -> [2, 3]` | | array_prepend(array, element) | Prepends an element to the beginning of an array. `array_prepend(1, [2, 3, 4]) -> [1, 2, 3, 4]` | diff --git a/docs/source/user-guide/sql/scalar_functions.md b/docs/source/user-guide/sql/scalar_functions.md index 6dbe5c05f6d1..4489d1a78999 100644 --- a/docs/source/user-guide/sql/scalar_functions.md +++ b/docs/source/user-guide/sql/scalar_functions.md @@ -1481,6 +1481,7 @@ from_unixtime(expression) - [array_length](#array_length) - [array_ndims](#array_ndims) - [array_prepend](#array_prepend) +- [array_pop_back](#array_pop_back) - [array_position](#array_position) - [array_positions](#array_positions) - [array_push_back](#array_push_back) @@ -1827,6 +1828,30 @@ array_prepend(element, array) - list_prepend - list_push_front +### `array_pop_back` + +Returns the array without the last element. + +``` +array_pop_back(array) +``` + +#### Arguments + +- **array**: Array expression. + Can be a constant, column, or function, and any combination of array operators. + +#### Example + +``` +❯ select array_pop_back([1, 2, 3]); ++-------------------------------+ +| array_pop_back(List([1,2,3])) | ++-------------------------------+ +| [1, 2] | ++-------------------------------+ +``` + ### `array_position` Returns a string with an input string repeated a specified number.