From dd6767bae124ba4b15e6049f5a9a9a73f86f28aa Mon Sep 17 00:00:00 2001 From: Weijun-H Date: Fri, 19 Jul 2024 10:22:42 +0800 Subject: [PATCH 01/11] fix: CASE with NULL --- datafusion/expr/src/expr_schema.rs | 19 ++++++++++++- datafusion/sqllogictest/test_files/select.slt | 27 +++++++++++++++++++ 2 files changed, 45 insertions(+), 1 deletion(-) diff --git a/datafusion/expr/src/expr_schema.rs b/datafusion/expr/src/expr_schema.rs index 1df5d6c4d736..bd3c05f03b5e 100644 --- a/datafusion/expr/src/expr_schema.rs +++ b/datafusion/expr/src/expr_schema.rs @@ -112,7 +112,24 @@ impl ExprSchemable for Expr { Expr::OuterReferenceColumn(ty, _) => Ok(ty.clone()), Expr::ScalarVariable(ty, _) => Ok(ty.clone()), Expr::Literal(l) => Ok(l.data_type()), - Expr::Case(case) => case.when_then_expr[0].1.get_type(schema), + Expr::Case(case) => { + let then_type = case.when_then_expr[0].1.get_type(schema)?; + if !then_type.is_null() { + return Ok(then_type); + } + + let else_type = if let Some(e) = &case.else_expr { + e.get_type(schema)? + } else { + DataType::Null + }; + + match (then_type.clone(), else_type.clone()) { + (DataType::Null, DataType::Null) => Ok(DataType::Int64), + (DataType::Null, _) => Ok(else_type), + _ => Ok(else_type), + } + } Expr::Cast(Cast { data_type, .. }) | Expr::TryCast(TryCast { data_type, .. }) => Ok(data_type.clone()), Expr::Unnest(Unnest { expr }) => { diff --git a/datafusion/sqllogictest/test_files/select.slt b/datafusion/sqllogictest/test_files/select.slt index 03426dec874f..6884efc07e15 100644 --- a/datafusion/sqllogictest/test_files/select.slt +++ b/datafusion/sqllogictest/test_files/select.slt @@ -613,6 +613,33 @@ END; ---- 2 +# select case when type is null +query I +select CASE + WHEN NULL THEN 1 + ELSE 2 +END; +---- +2 + +# select case then type is null +query I +select CASE + WHEN 10 > 5 THEN NULL + ELSE 2 +END; +---- +NULL + +# select case else type is null +query I +select CASE + WHEN 10 = 5 THEN 1 + ELSE NULL +END; +---- +NULL + # Binary Expression for LargeUtf8 # issue: https://github.com/apache/datafusion/issues/5893 statement ok From efb003ee530475293433079e16362fb7c67af8bd Mon Sep 17 00:00:00 2001 From: Weijun-H Date: Fri, 19 Jul 2024 10:32:07 +0800 Subject: [PATCH 02/11] chore: Add tests --- datafusion/sqllogictest/test_files/aggregate.slt | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/datafusion/sqllogictest/test_files/aggregate.slt b/datafusion/sqllogictest/test_files/aggregate.slt index a0140b1c5292..81482226dadf 100644 --- a/datafusion/sqllogictest/test_files/aggregate.slt +++ b/datafusion/sqllogictest/test_files/aggregate.slt @@ -5418,6 +5418,21 @@ SELECT LAST_VALUE(column1 ORDER BY column2 DESC) IGNORE NULLS FROM t; statement ok DROP TABLE t; +# Test for CASE with NULL in aggregate function +statement ok +CREATE TABLE example(data double precision); + +query II +SELECT + sum(CASE WHEN data is NULL THEN NULL ELSE data+1 END) as then_null, + sum(CASE WHEN data is NULL THEN data+1 ELSE NULL END) as else_null, +FROM example; +---- +NULL NULL + +statement ok +drop table example; + # Test Convert FirstLast optimizer rule statement ok CREATE EXTERNAL TABLE convert_first_last_table ( From 061ffe30ad1542c65b0727832cb169aa9caf1dda Mon Sep 17 00:00:00 2001 From: Weijun-H Date: Fri, 19 Jul 2024 10:40:55 +0800 Subject: [PATCH 03/11] chore --- datafusion/expr/src/expr_schema.rs | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/datafusion/expr/src/expr_schema.rs b/datafusion/expr/src/expr_schema.rs index bd3c05f03b5e..a9fc623d6cf9 100644 --- a/datafusion/expr/src/expr_schema.rs +++ b/datafusion/expr/src/expr_schema.rs @@ -118,11 +118,10 @@ impl ExprSchemable for Expr { return Ok(then_type); } - let else_type = if let Some(e) = &case.else_expr { - e.get_type(schema)? - } else { - DataType::Null - }; + let else_type = case + .else_expr + .as_ref() + .map_or(Ok(DataType::Null), |e| e.get_type(schema))?; match (then_type.clone(), else_type.clone()) { (DataType::Null, DataType::Null) => Ok(DataType::Int64), From b4fcb26aa5273b91fe574dd3e8e811ffe2ce8f96 Mon Sep 17 00:00:00 2001 From: Weijun-H Date: Fri, 19 Jul 2024 16:05:42 +0800 Subject: [PATCH 04/11] chore: Fix CI --- datafusion/sqllogictest/test_files/aggregate.slt | 4 ++-- datafusion/sqllogictest/test_files/scalar.slt | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/datafusion/sqllogictest/test_files/aggregate.slt b/datafusion/sqllogictest/test_files/aggregate.slt index 81482226dadf..1324fbd43a59 100644 --- a/datafusion/sqllogictest/test_files/aggregate.slt +++ b/datafusion/sqllogictest/test_files/aggregate.slt @@ -5422,10 +5422,10 @@ DROP TABLE t; statement ok CREATE TABLE example(data double precision); -query II +query RR SELECT sum(CASE WHEN data is NULL THEN NULL ELSE data+1 END) as then_null, - sum(CASE WHEN data is NULL THEN data+1 ELSE NULL END) as else_null, + sum(CASE WHEN data is NULL THEN data+1 ELSE NULL END) as else_null FROM example; ---- NULL NULL diff --git a/datafusion/sqllogictest/test_files/scalar.slt b/datafusion/sqllogictest/test_files/scalar.slt index 5daa9333fb36..95fd62d95671 100644 --- a/datafusion/sqllogictest/test_files/scalar.slt +++ b/datafusion/sqllogictest/test_files/scalar.slt @@ -1238,27 +1238,27 @@ SELECT CASE WHEN NULL THEN 'foo' ELSE 'bar' END bar # case_expr_with_null() -query ? +query I select case when b is null then null else b end from (select a,b from (values (1,null),(2,3)) as t (a,b)) a; ---- NULL 3 -query ? +query I select case when b is null then null else b end from (select a,b from (values (1,1),(2,3)) as t (a,b)) a; ---- 1 3 # case_expr_with_nulls() -query ? +query I select case when b is null then null when b < 3 then null when b >=3 then b + 1 else b end from (select a,b from (values (1,null),(1,2),(2,3)) as t (a,b)) a ---- NULL NULL 4 -query ? +query I select case b when 1 then null when 2 then null when 3 then b + 1 else b end from (select a,b from (values (1,null),(1,2),(2,3)) as t (a,b)) a; ---- NULL From bc347b2e41725137da3f43608f2f13937af9ed08 Mon Sep 17 00:00:00 2001 From: Weijun-H Date: Sat, 20 Jul 2024 11:02:45 +0800 Subject: [PATCH 05/11] chore: Support all types are NULL --- datafusion/expr/src/expr_schema.rs | 9 ++++----- datafusion/functions-aggregate/src/sum.rs | 4 ++-- datafusion/sqllogictest/test_files/aggregate.slt | 7 ++++--- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/datafusion/expr/src/expr_schema.rs b/datafusion/expr/src/expr_schema.rs index a9fc623d6cf9..51eeecf5fff6 100644 --- a/datafusion/expr/src/expr_schema.rs +++ b/datafusion/expr/src/expr_schema.rs @@ -112,6 +112,7 @@ impl ExprSchemable for Expr { Expr::OuterReferenceColumn(ty, _) => Ok(ty.clone()), Expr::ScalarVariable(ty, _) => Ok(ty.clone()), Expr::Literal(l) => Ok(l.data_type()), + // Expr::Case(case) => case.when_then_expr[0].1.get_type(schema), Expr::Case(case) => { let then_type = case.when_then_expr[0].1.get_type(schema)?; if !then_type.is_null() { @@ -122,12 +123,10 @@ impl ExprSchemable for Expr { .else_expr .as_ref() .map_or(Ok(DataType::Null), |e| e.get_type(schema))?; - - match (then_type.clone(), else_type.clone()) { - (DataType::Null, DataType::Null) => Ok(DataType::Int64), - (DataType::Null, _) => Ok(else_type), - _ => Ok(else_type), + if !else_type.is_null() { + return Ok(else_type); } + Ok(DataType::Int64) } Expr::Cast(Cast { data_type, .. }) | Expr::TryCast(TryCast { data_type, .. }) => Ok(data_type.clone()), diff --git a/datafusion/functions-aggregate/src/sum.rs b/datafusion/functions-aggregate/src/sum.rs index a9f31dc05be9..4c465fd8e075 100644 --- a/datafusion/functions-aggregate/src/sum.rs +++ b/datafusion/functions-aggregate/src/sum.rs @@ -119,7 +119,7 @@ impl AggregateUDFImpl for Sum { DataType::Decimal128(_, _) | DataType::Decimal256(_, _) => { Ok(data_type.clone()) } - dt if dt.is_signed_integer() => Ok(DataType::Int64), + dt if dt.is_signed_integer() || dt.is_null() => Ok(DataType::Int64), dt if dt.is_unsigned_integer() => Ok(DataType::UInt64), dt if dt.is_floating() => Ok(DataType::Float64), _ => exec_err!("Sum not supported for {}", data_type), @@ -131,7 +131,7 @@ impl AggregateUDFImpl for Sum { fn return_type(&self, arg_types: &[DataType]) -> Result { match &arg_types[0] { - DataType::Int64 => Ok(DataType::Int64), + DataType::Int64 | DataType::Null => Ok(DataType::Int64), DataType::UInt64 => Ok(DataType::UInt64), DataType::Float64 => Ok(DataType::Float64), DataType::Decimal128(precision, scale) => { diff --git a/datafusion/sqllogictest/test_files/aggregate.slt b/datafusion/sqllogictest/test_files/aggregate.slt index 1324fbd43a59..f3bf42771511 100644 --- a/datafusion/sqllogictest/test_files/aggregate.slt +++ b/datafusion/sqllogictest/test_files/aggregate.slt @@ -5422,13 +5422,14 @@ DROP TABLE t; statement ok CREATE TABLE example(data double precision); -query RR +query RRI SELECT sum(CASE WHEN data is NULL THEN NULL ELSE data+1 END) as then_null, - sum(CASE WHEN data is NULL THEN data+1 ELSE NULL END) as else_null + sum(CASE WHEN data is NULL THEN data+1 ELSE NULL END) as else_null, + sum(CASE WHEN NULL THEN NULL ELSE NULL END) as both_null FROM example; ---- -NULL NULL +NULL NULL NULL statement ok drop table example; From 43f650bd4b5438e5aee5a78824a4ff0d060fac62 Mon Sep 17 00:00:00 2001 From: Weijun-H Date: Sat, 20 Jul 2024 11:49:21 +0800 Subject: [PATCH 06/11] chore: Fix CI --- datafusion/functions-aggregate/src/sum.rs | 4 ++-- datafusion/sqllogictest/test_files/aggregate.slt | 7 +++---- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/datafusion/functions-aggregate/src/sum.rs b/datafusion/functions-aggregate/src/sum.rs index 4c465fd8e075..a9f31dc05be9 100644 --- a/datafusion/functions-aggregate/src/sum.rs +++ b/datafusion/functions-aggregate/src/sum.rs @@ -119,7 +119,7 @@ impl AggregateUDFImpl for Sum { DataType::Decimal128(_, _) | DataType::Decimal256(_, _) => { Ok(data_type.clone()) } - dt if dt.is_signed_integer() || dt.is_null() => Ok(DataType::Int64), + dt if dt.is_signed_integer() => Ok(DataType::Int64), dt if dt.is_unsigned_integer() => Ok(DataType::UInt64), dt if dt.is_floating() => Ok(DataType::Float64), _ => exec_err!("Sum not supported for {}", data_type), @@ -131,7 +131,7 @@ impl AggregateUDFImpl for Sum { fn return_type(&self, arg_types: &[DataType]) -> Result { match &arg_types[0] { - DataType::Int64 | DataType::Null => Ok(DataType::Int64), + DataType::Int64 => Ok(DataType::Int64), DataType::UInt64 => Ok(DataType::UInt64), DataType::Float64 => Ok(DataType::Float64), DataType::Decimal128(precision, scale) => { diff --git a/datafusion/sqllogictest/test_files/aggregate.slt b/datafusion/sqllogictest/test_files/aggregate.slt index f3bf42771511..1324fbd43a59 100644 --- a/datafusion/sqllogictest/test_files/aggregate.slt +++ b/datafusion/sqllogictest/test_files/aggregate.slt @@ -5422,14 +5422,13 @@ DROP TABLE t; statement ok CREATE TABLE example(data double precision); -query RRI +query RR SELECT sum(CASE WHEN data is NULL THEN NULL ELSE data+1 END) as then_null, - sum(CASE WHEN data is NULL THEN data+1 ELSE NULL END) as else_null, - sum(CASE WHEN NULL THEN NULL ELSE NULL END) as both_null + sum(CASE WHEN data is NULL THEN data+1 ELSE NULL END) as else_null FROM example; ---- -NULL NULL NULL +NULL NULL statement ok drop table example; From 9b278e7737184797edfc50cda49e09b6c3413111 Mon Sep 17 00:00:00 2001 From: Weijun-H Date: Sun, 21 Jul 2024 09:54:05 +0800 Subject: [PATCH 07/11] chore: add more tests --- datafusion/sqllogictest/test_files/aggregate.slt | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/datafusion/sqllogictest/test_files/aggregate.slt b/datafusion/sqllogictest/test_files/aggregate.slt index 1324fbd43a59..2a5efc035f7e 100644 --- a/datafusion/sqllogictest/test_files/aggregate.slt +++ b/datafusion/sqllogictest/test_files/aggregate.slt @@ -5422,13 +5422,26 @@ DROP TABLE t; statement ok CREATE TABLE example(data double precision); +statement ok +INSERT INTO example VALUES (1), (2), (NULL), (4); + query RR SELECT sum(CASE WHEN data is NULL THEN NULL ELSE data+1 END) as then_null, sum(CASE WHEN data is NULL THEN data+1 ELSE NULL END) as else_null FROM example; ---- -NULL NULL +10 NULL + +query I +SELECT + CASE data WHEN 1 THEN NULL WHEN 2 THEN 3.3 ELSE NULL END as case_null +FROM example; +---- +NULL +3.3 +NULL +NULL statement ok drop table example; From 59bd35d13ca0f1e62ac394a681fef66073406286 Mon Sep 17 00:00:00 2001 From: Weijun-H Date: Mon, 22 Jul 2024 10:39:21 +0800 Subject: [PATCH 08/11] fix: Return first non-null type in then exprs --- datafusion/expr/src/expr_schema.rs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/datafusion/expr/src/expr_schema.rs b/datafusion/expr/src/expr_schema.rs index 51eeecf5fff6..09fa1e3696ee 100644 --- a/datafusion/expr/src/expr_schema.rs +++ b/datafusion/expr/src/expr_schema.rs @@ -114,9 +114,11 @@ impl ExprSchemable for Expr { Expr::Literal(l) => Ok(l.data_type()), // Expr::Case(case) => case.when_then_expr[0].1.get_type(schema), Expr::Case(case) => { - let then_type = case.when_then_expr[0].1.get_type(schema)?; - if !then_type.is_null() { - return Ok(then_type); + for (_, then_expr) in &case.when_then_expr { + let then_type = then_expr.get_type(schema)?; + if !then_type.is_null() { + return Ok(then_type); + } } let else_type = case From 79b70a8cfa76cfddaa77b2c002ba46439aa8ed99 Mon Sep 17 00:00:00 2001 From: Weijun-H Date: Mon, 22 Jul 2024 10:49:36 +0800 Subject: [PATCH 09/11] chore: Fix CI --- datafusion/sqllogictest/test_files/aggregate.slt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/sqllogictest/test_files/aggregate.slt b/datafusion/sqllogictest/test_files/aggregate.slt index 2a5efc035f7e..1ec843bce75f 100644 --- a/datafusion/sqllogictest/test_files/aggregate.slt +++ b/datafusion/sqllogictest/test_files/aggregate.slt @@ -5433,7 +5433,7 @@ FROM example; ---- 10 NULL -query I +query R SELECT CASE data WHEN 1 THEN NULL WHEN 2 THEN 3.3 ELSE NULL END as case_null FROM example; From b8696a587c1a8ba1650d146d1dda2382da980662 Mon Sep 17 00:00:00 2001 From: Alex Huang Date: Mon, 22 Jul 2024 12:45:34 +0800 Subject: [PATCH 10/11] Update datafusion/expr/src/expr_schema.rs Co-authored-by: Jonah Gao --- datafusion/expr/src/expr_schema.rs | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/datafusion/expr/src/expr_schema.rs b/datafusion/expr/src/expr_schema.rs index 09fa1e3696ee..b546ff7c5f72 100644 --- a/datafusion/expr/src/expr_schema.rs +++ b/datafusion/expr/src/expr_schema.rs @@ -120,15 +120,9 @@ impl ExprSchemable for Expr { return Ok(then_type); } } - - let else_type = case - .else_expr + case.else_expr .as_ref() - .map_or(Ok(DataType::Null), |e| e.get_type(schema))?; - if !else_type.is_null() { - return Ok(else_type); - } - Ok(DataType::Int64) + .map_or(Ok(DataType::Null), |e| e.get_type(schema)) } Expr::Cast(Cast { data_type, .. }) | Expr::TryCast(TryCast { data_type, .. }) => Ok(data_type.clone()), From c5b4a5780ea5454ba6b9cfcb9a0a61bcd337734f Mon Sep 17 00:00:00 2001 From: Alex Huang Date: Mon, 22 Jul 2024 12:45:42 +0800 Subject: [PATCH 11/11] Update datafusion/expr/src/expr_schema.rs Co-authored-by: Jonah Gao --- datafusion/expr/src/expr_schema.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/datafusion/expr/src/expr_schema.rs b/datafusion/expr/src/expr_schema.rs index b546ff7c5f72..5e0571f712ee 100644 --- a/datafusion/expr/src/expr_schema.rs +++ b/datafusion/expr/src/expr_schema.rs @@ -112,7 +112,6 @@ impl ExprSchemable for Expr { Expr::OuterReferenceColumn(ty, _) => Ok(ty.clone()), Expr::ScalarVariable(ty, _) => Ok(ty.clone()), Expr::Literal(l) => Ok(l.data_type()), - // Expr::Case(case) => case.when_then_expr[0].1.get_type(schema), Expr::Case(case) => { for (_, then_expr) in &case.when_then_expr { let then_type = then_expr.get_type(schema)?;