diff --git a/datafusion-examples/examples/expr_api.rs b/datafusion-examples/examples/expr_api.rs index 19e70dc419e4..8079c3e14117 100644 --- a/datafusion-examples/examples/expr_api.rs +++ b/datafusion-examples/examples/expr_api.rs @@ -18,6 +18,7 @@ use arrow::array::{BooleanArray, Int32Array}; use arrow::record_batch::RecordBatch; use datafusion::arrow::datatypes::{DataType, Field, Schema, TimeUnit}; +use datafusion::common::{DFField, DFSchema}; use datafusion::error::Result; use datafusion::optimizer::simplify_expressions::{ExprSimplifier, SimplifyContext}; use datafusion::physical_expr::execution_props::ExecutionProps; @@ -29,6 +30,7 @@ use datafusion_common::{ScalarValue, ToDFSchema}; use datafusion_expr::expr::BinaryExpr; use datafusion_expr::interval_arithmetic::Interval; use datafusion_expr::{ColumnarValue, ExprSchemable, Operator}; +use std::collections::HashMap; use std::sync::Arc; /// This example demonstrates the DataFusion [`Expr`] API. @@ -45,6 +47,7 @@ use std::sync::Arc; /// 2. Evaluate [`Exprs`] against data: [`evaluate_demo`] /// 3. Simplify expressions: [`simplify_demo`] /// 4. Analyze predicates for boundary ranges: [`range_analysis_demo`] +/// 5. Get the types of the expressions: [`expression_type_demo`] #[tokio::main] async fn main() -> Result<()> { // The easiest way to do create expressions is to use the @@ -68,6 +71,9 @@ async fn main() -> Result<()> { // See how to analyze ranges in expressions range_analysis_demo()?; + // See how to determine the data types of expressions + expression_type_demo()?; + Ok(()) } @@ -256,3 +262,43 @@ pub fn physical_expr(schema: &Schema, expr: Expr) -> Result Result<()> { + let expr = col("c"); + + // To determine the DataType of an expression, DataFusion must know the + // types of the input expressions. You can provide this information using + // a schema. In this case we create a schema where the column `c` is of + // type Utf8 (a String / VARCHAR) + let schema = DFSchema::new_with_metadata( + vec![DFField::new_unqualified("c", DataType::Utf8, true)], + HashMap::new(), + ) + .unwrap(); + assert_eq!("Utf8", format!("{}", expr.get_type(&schema).unwrap())); + + // Using a schema where the column `foo` is of type Int32 + let schema = DFSchema::new_with_metadata( + vec![DFField::new_unqualified("c", DataType::Int32, true)], + HashMap::new(), + ) + .unwrap(); + assert_eq!("Int32", format!("{}", expr.get_type(&schema).unwrap())); + + // Get the type of an expression that adds 2 columns. Adding an Int32 + // and Float32 results in Float32 type + let expr = col("c1") + col("c2"); + let schema = DFSchema::new_with_metadata( + vec![ + DFField::new_unqualified("c1", DataType::Int32, true), + DFField::new_unqualified("c2", DataType::Float32, true), + ], + HashMap::new(), + ) + .unwrap(); + assert_eq!("Float32", format!("{}", expr.get_type(&schema).unwrap())); + + Ok(()) +} diff --git a/datafusion/expr/src/expr_schema.rs b/datafusion/expr/src/expr_schema.rs index d30f304a26b1..517d7a35f70a 100644 --- a/datafusion/expr/src/expr_schema.rs +++ b/datafusion/expr/src/expr_schema.rs @@ -58,6 +58,31 @@ impl ExprSchemable for Expr { /// /// Note: [DFSchema] implements [ExprSchema]. /// + /// # Examples + /// + /// ## Get the type of an expression that adds 2 columns. Adding an Int32 + /// ## and Float32 results in Float32 type + /// + /// ``` + /// # use arrow::datatypes::DataType; + /// # use datafusion_common::{DFField, DFSchema}; + /// # use datafusion_expr::{col, ExprSchemable}; + /// # use std::collections::HashMap; + /// + /// fn main() { + /// let expr = col("c1") + col("c2"); + /// let schema = DFSchema::new_with_metadata( + /// vec![ + /// DFField::new_unqualified("c1", DataType::Int32, true), + /// DFField::new_unqualified("c2", DataType::Float32, true), + /// ], + /// HashMap::new(), + /// ) + /// .unwrap(); + /// assert_eq!("Float32", format!("{}", expr.get_type(&schema).unwrap())); + /// } + /// ``` + /// /// # Errors /// /// This function errors when it is not possible to compute its diff --git a/docs/source/library-user-guide/working-with-exprs.md b/docs/source/library-user-guide/working-with-exprs.md index 96be8ef7f1ae..b7e9248a7c1f 100644 --- a/docs/source/library-user-guide/working-with-exprs.md +++ b/docs/source/library-user-guide/working-with-exprs.md @@ -180,6 +180,34 @@ Projection: Int64(1) + Int64(1) AS added_one I.e. the `add_one` UDF has been inlined into the projection. +## Getting the data type of the expression + +The `arrow::datatypes::DataType` of the expression can be obtained by calling the `get_type` given something that implements `Expr::Schemable`, for example a `DFschema` object: + +```rust +use arrow_schema::DataType; +use datafusion::common::{DFField, DFSchema}; +use datafusion::logical_expr::{col, ExprSchemable}; +use std::collections::HashMap; + +let expr = col("c1") + col("c2"); +let schema = DFSchema::new_with_metadata( + vec![ + DFField::new_unqualified("c1", DataType::Int32, true), + DFField::new_unqualified("c2", DataType::Float32, true), + ], + HashMap::new(), +) +.unwrap(); +print!("type = {}", expr.get_type(&schema).unwrap()); +``` + +This results in the following output: + +```text +type = Float32 +``` + ## Conclusion In this guide, we've seen how to create `Expr`s programmatically and how to rewrite them. This is useful for simplifying and optimizing `Expr`s. We've also seen how to test our rule to ensure it works properly.