From 27304239ef79b50a443320791755bf74eed4a85d Mon Sep 17 00:00:00 2001 From: Martin Hilton Date: Sat, 10 Aug 2024 14:21:56 +0100 Subject: [PATCH] fix: make ScalarValue::Dictionary with NULL values produce NULL arrays (#11908) Update the way ScalarValue::Dictionary values are turned into arrays such that: scalar_value.is_null() == scalar_value.to_array()?.is_null(0) Previously the dictionary would be created with a valid key entry pointing to a NULL value. https://arrow.apache.org/docs/format/Columnar.html#dictionary-encoded-layout suggests that this does not constitute a NULL entry. --- datafusion/common/src/scalar/mod.rs | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/datafusion/common/src/scalar/mod.rs b/datafusion/common/src/scalar/mod.rs index 286df339adcf..fd0c11ed0ab0 100644 --- a/datafusion/common/src/scalar/mod.rs +++ b/datafusion/common/src/scalar/mod.rs @@ -801,9 +801,13 @@ fn dict_from_scalar( let values_array = value.to_array_of_size(1)?; // Create a key array with `size` elements, each of 0 - let key_array: PrimitiveArray = std::iter::repeat(Some(K::default_value())) - .take(size) - .collect(); + let key_array: PrimitiveArray = std::iter::repeat(if value.is_null() { + None + } else { + Some(K::default_value()) + }) + .take(size) + .collect(); // create a new DictionaryArray // @@ -6674,4 +6678,15 @@ mod tests { ); assert!(dense_scalar.is_null()); } + + #[test] + fn null_dictionary_scalar_produces_null_dictionary_array() { + let dictionary_scalar = ScalarValue::Dictionary( + Box::new(DataType::Int32), + Box::new(ScalarValue::Null), + ); + assert!(dictionary_scalar.is_null()); + let dictionary_array = dictionary_scalar.to_array().unwrap(); + assert!(dictionary_array.is_null(0)); + } }