diff --git a/datafusion/functions/src/unicode/substr.rs b/datafusion/functions/src/unicode/substr.rs index e0d5d94922f0..2f20d1ecdbe9 100644 --- a/datafusion/functions/src/unicode/substr.rs +++ b/datafusion/functions/src/unicode/substr.rs @@ -81,8 +81,13 @@ impl ScalarUDFImpl for SubstrFunc { } fn coerce_types(&self, arg_types: &[DataType]) -> Result> { - if ![DataType::LargeUtf8, DataType::Utf8View, DataType::Utf8] - .contains(&arg_types[0]) + if ![ + DataType::LargeUtf8, + DataType::Utf8View, + DataType::Utf8, + DataType::Null, + ] + .contains(&arg_types[0]) { return plan_err!( "The first argument of the {} function can only be a string, but got {:?}.", @@ -91,7 +96,7 @@ impl ScalarUDFImpl for SubstrFunc { ); } - if ![DataType::Int64, DataType::Int32].contains(&arg_types[1]) { + if ![DataType::Int64, DataType::Int32, DataType::Null].contains(&arg_types[1]) { return plan_err!( "The second argument of the {} function can only be an integer, but got {:?}.", self.name(), @@ -100,7 +105,7 @@ impl ScalarUDFImpl for SubstrFunc { } if arg_types.len() == 3 - && ![DataType::Int64, DataType::Int32].contains(&arg_types[2]) + && ![DataType::Int64, DataType::Int32, DataType::Null].contains(&arg_types[2]) { return plan_err!( "The third argument of the {} function can only be an integer, but got {:?}.", @@ -109,11 +114,17 @@ impl ScalarUDFImpl for SubstrFunc { ); } + let first_data_type = if arg_types[0] == DataType::Null { + DataType::Utf8 + } else { + arg_types[0].clone() + }; + if arg_types.len() == 2 { - Ok(vec![arg_types[0].to_owned(), DataType::Int64]) + Ok(vec![first_data_type.to_owned(), DataType::Int64]) } else { Ok(vec![ - arg_types[0].to_owned(), + first_data_type.to_owned(), DataType::Int64, DataType::Int64, ]) diff --git a/datafusion/sqllogictest/test_files/encoding.slt b/datafusion/sqllogictest/test_files/encoding.slt index 4b97d9bd811a..68bdf78115aa 100644 --- a/datafusion/sqllogictest/test_files/encoding.slt +++ b/datafusion/sqllogictest/test_files/encoding.slt @@ -28,13 +28,13 @@ CREATE TABLE test( ; # errors -query error DataFusion error: Error during planning: Execution error: User\-defined coercion failed with Plan +query error 1st argument should be Utf8 or Binary or Null, got Int64 select encode(12, 'hex'); query error DataFusion error: Error during planning: There is no built\-in encoding named 'non_encoding', currently supported encodings are: base64, hex select encode(bin_field, 'non_encoding') from test; -query error DataFusion error: Error during planning: Execution error: User\-defined coercion failed with Plan +query error 1st argument should be Utf8 or Binary or Null, got Int64 select decode(12, 'hex'); query error DataFusion error: Error during planning: There is no built\-in encoding named 'non_encoding', currently supported encodings are: base64, hex diff --git a/datafusion/sqllogictest/test_files/functions.slt b/datafusion/sqllogictest/test_files/functions.slt index aa6282953318..e887b1934e04 100644 --- a/datafusion/sqllogictest/test_files/functions.slt +++ b/datafusion/sqllogictest/test_files/functions.slt @@ -497,10 +497,10 @@ SELECT substr('alphabet', 3, CAST(NULL AS int)) ---- NULL -statement error DataFusion error: Error during planning: Execution error: User\-defined coercion failed with Plan +statement error The first argument of the substr function can only be a string, but got Int64 SELECT substr(1, 3) -statement error DataFusion error: Error during planning: Execution error: User\-defined coercion failed with Plan +statement error The first argument of the substr function can only be a string, but got Int64 SELECT substr(1, 3, 4) query T diff --git a/datafusion/sqllogictest/test_files/scalar.slt b/datafusion/sqllogictest/test_files/scalar.slt index ac9765510b7c..3b9c9a16042c 100644 --- a/datafusion/sqllogictest/test_files/scalar.slt +++ b/datafusion/sqllogictest/test_files/scalar.slt @@ -1907,7 +1907,7 @@ select position('' in '') 1 -query error DataFusion error: Error during planning: Execution error: User\-defined coercion failed with Plan +query error POSITION function can only accept strings select position(1 in 1) diff --git a/datafusion/sqllogictest/test_files/string_functions/substr/substr_literal.slt.part b/datafusion/sqllogictest/test_files/string_functions/substr/substr_literal.slt.part index dac59016627f..a5e427b008b9 100644 --- a/datafusion/sqllogictest/test_files/string_functions/substr/substr_literal.slt.part +++ b/datafusion/sqllogictest/test_files/string_functions/substr/substr_literal.slt.part @@ -123,10 +123,10 @@ SELECT substr('Hello🌏世界', 5, 3) ---- o🌏世 -statement error The SUBSTR function can only accept strings, but got Int64. +statement error The first argument of the substr function can only be a string, but got Int64 SELECT substr(1, 3) -statement error The SUBSTR function can only accept strings, but got Int64. +statement error The first argument of the substr function can only be a string, but got Int64 SELECT substr(1, 3, 4) statement error Execution error: negative substring length not allowed diff --git a/datafusion/sqllogictest/test_files/timestamps.slt b/datafusion/sqllogictest/test_files/timestamps.slt index 9ad95dd6da21..7a7a8a8703ec 100644 --- a/datafusion/sqllogictest/test_files/timestamps.slt +++ b/datafusion/sqllogictest/test_files/timestamps.slt @@ -2858,7 +2858,7 @@ statement error select to_local_time('2024-04-01T00:00:20Z'::timestamp, 'some string'); # invalid argument data type -statement error DataFusion error: Error during planning: Execution error: User\-defined coercion failed with Plan +statement error The to_local_time function can only accept Timestamp as the arg got Utf8 select to_local_time('2024-04-01T00:00:20Z'); # invalid timezone