Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add documentation about type signatures, and export TIMEZONE_WILDCARD #7726

Merged
merged 3 commits into from
Oct 7, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 16 additions & 24 deletions datafusion/expr/src/built_in_function.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@
//! Built-in functions module contains all the built-in functions definitions.

use crate::nullif::SUPPORTED_NULLIF_TYPES;
use crate::type_coercion::functions::{data_types, TIMEZONE_PLACEHOLDER};
use crate::signature::TIMEZONE_WILDCARD;
use crate::type_coercion::functions::data_types;
use crate::{
conditional_expressions, struct_expressions, utils, Signature, TypeSignature,
Volatility,
Expand Down Expand Up @@ -1022,22 +1023,22 @@ impl BuiltinScalarFunction {
Exact(vec![Utf8, Timestamp(Nanosecond, None)]),
Exact(vec![
Utf8,
Timestamp(Nanosecond, Some(TIMEZONE_PLACEHOLDER.into())),
Timestamp(Nanosecond, Some(TIMEZONE_WILDCARD.into())),
]),
Exact(vec![Utf8, Timestamp(Microsecond, None)]),
Exact(vec![
Utf8,
Timestamp(Microsecond, Some(TIMEZONE_PLACEHOLDER.into())),
Timestamp(Microsecond, Some(TIMEZONE_WILDCARD.into())),
]),
Exact(vec![Utf8, Timestamp(Millisecond, None)]),
Exact(vec![
Utf8,
Timestamp(Millisecond, Some(TIMEZONE_PLACEHOLDER.into())),
Timestamp(Millisecond, Some(TIMEZONE_WILDCARD.into())),
]),
Exact(vec![Utf8, Timestamp(Second, None)]),
Exact(vec![
Utf8,
Timestamp(Second, Some(TIMEZONE_PLACEHOLDER.into())),
Timestamp(Second, Some(TIMEZONE_WILDCARD.into())),
]),
],
self.volatility(),
Expand All @@ -1052,11 +1053,8 @@ impl BuiltinScalarFunction {
]),
Exact(vec![
Interval(MonthDayNano),
Timestamp(
array_type.clone(),
Some(TIMEZONE_PLACEHOLDER.into()),
),
Timestamp(Nanosecond, Some(TIMEZONE_PLACEHOLDER.into())),
Timestamp(array_type.clone(), Some(TIMEZONE_WILDCARD.into())),
Timestamp(Nanosecond, Some(TIMEZONE_WILDCARD.into())),
]),
Exact(vec![
Interval(DayTime),
Expand All @@ -1065,30 +1063,24 @@ impl BuiltinScalarFunction {
]),
Exact(vec![
Interval(DayTime),
Timestamp(
array_type.clone(),
Some(TIMEZONE_PLACEHOLDER.into()),
),
Timestamp(Nanosecond, Some(TIMEZONE_PLACEHOLDER.into())),
Timestamp(array_type.clone(), Some(TIMEZONE_WILDCARD.into())),
Timestamp(Nanosecond, Some(TIMEZONE_WILDCARD.into())),
]),
Exact(vec![
Interval(MonthDayNano),
Timestamp(array_type.clone(), None),
]),
Exact(vec![
Interval(MonthDayNano),
Timestamp(
array_type.clone(),
Some(TIMEZONE_PLACEHOLDER.into()),
),
Timestamp(array_type.clone(), Some(TIMEZONE_WILDCARD.into())),
]),
Exact(vec![
Interval(DayTime),
Timestamp(array_type.clone(), None),
]),
Exact(vec![
Interval(DayTime),
Timestamp(array_type, Some(TIMEZONE_PLACEHOLDER.into())),
Timestamp(array_type, Some(TIMEZONE_WILDCARD.into())),
]),
]
};
Expand All @@ -1108,22 +1100,22 @@ impl BuiltinScalarFunction {
Exact(vec![Utf8, Timestamp(Second, None)]),
Exact(vec![
Utf8,
Timestamp(Second, Some(TIMEZONE_PLACEHOLDER.into())),
Timestamp(Second, Some(TIMEZONE_WILDCARD.into())),
]),
Exact(vec![Utf8, Timestamp(Microsecond, None)]),
Exact(vec![
Utf8,
Timestamp(Microsecond, Some(TIMEZONE_PLACEHOLDER.into())),
Timestamp(Microsecond, Some(TIMEZONE_WILDCARD.into())),
]),
Exact(vec![Utf8, Timestamp(Millisecond, None)]),
Exact(vec![
Utf8,
Timestamp(Millisecond, Some(TIMEZONE_PLACEHOLDER.into())),
Timestamp(Millisecond, Some(TIMEZONE_WILDCARD.into())),
]),
Exact(vec![Utf8, Timestamp(Nanosecond, None)]),
Exact(vec![
Utf8,
Timestamp(Nanosecond, Some(TIMEZONE_PLACEHOLDER.into())),
Timestamp(Nanosecond, Some(TIMEZONE_WILDCARD.into())),
]),
],
self.volatility(),
Expand Down
2 changes: 1 addition & 1 deletion datafusion/expr/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ pub use logical_plan::*;
pub use nullif::SUPPORTED_NULLIF_TYPES;
pub use operator::Operator;
pub use partition_evaluator::PartitionEvaluator;
pub use signature::{Signature, TypeSignature, Volatility};
pub use signature::{Signature, TypeSignature, Volatility, TIMEZONE_WILDCARD};
pub use table_source::{TableProviderFilterPushDown, TableSource, TableType};
pub use udaf::AggregateUDF;
pub use udf::ScalarUDF;
Expand Down
102 changes: 76 additions & 26 deletions datafusion/expr/src/signature.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,35 +20,82 @@

use arrow::datatypes::DataType;

/// Constant that is used as a placeholder for any valid timezone.
/// This is used where a function can accept a timestamp type with any
/// valid timezone, it exists to avoid the need to enumerate all possible
/// timezones. See [`TypeSignature`] for more details.
///
/// Type coercion always ensures that functions will be executed using
/// timestamp arrays that have a valid time zone. Functions must never
/// return results with this timezone.
pub const TIMEZONE_WILDCARD: &str = "+TZ";

///A function's volatility, which defines the functions eligibility for certain optimizations
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone, Copy, Hash)]
pub enum Volatility {
/// Immutable - An immutable function will always return the same output when given the same
/// input. An example of this is [super::BuiltinScalarFunction::Cos].
/// An immutable function will always return the same output when given the same
/// input. An example of this is [super::BuiltinScalarFunction::Cos]. DataFusion
/// will attempt to inline immutable functions during planning.
Immutable,
/// Stable - A stable function may return different values given the same input across different
/// A stable function may return different values given the same input across different
/// queries but must return the same value for a given input within a query. An example of
/// this is [super::BuiltinScalarFunction::Now].
/// this is [super::BuiltinScalarFunction::Now]. DataFusion
/// will attempt to inline `Stable` functions during planning, when possible.
/// For query `select col1, now() from t1`, it might take a while to execute but
/// `now()` column will be the same for each output row, which is evaluated
/// during planning.
Stable,
/// Volatile - A volatile function may change the return value from evaluation to evaluation.
/// A volatile function may change the return value from evaluation to evaluation.
/// Multiple invocations of a volatile function may return different results when used in the
/// same query. An example of this is [super::BuiltinScalarFunction::Random].
/// same query. An example of this is [super::BuiltinScalarFunction::Random]. DataFusion
/// can not evaluate such functions during planning.
alamb marked this conversation as resolved.
Show resolved Hide resolved
/// In the query `select col1, random() from t1`, `random()` function will be evaluated
/// for each output row, resulting in a unique random value for each row.
Volatile,
}

/// A function's type signature, which defines the function's supported argument types.
/// A function's type signature defines the types of arguments the function supports.
///
/// Functions typically support only a few different types of arguments compared to the
/// different datatypes in Arrow. To make functions easy to use, when possible DataFusion
/// automatically coerces (add casts to) function arguments so they match the type signature.
///
/// For example, a function like `cos` may only be implemented for `Float64` arguments. To support a query
/// that calles `cos` with a different argument type, such as `cos(int_column)`, type coercion automatically
/// adds a cast such as `cos(CAST int_column AS DOUBLE)` during planning.
///
/// # Data Types
/// Types to match are represented using Arrow's [`DataType`]. [`DataType::Timestamp`] has an optional variable
/// timezone specification. To specify a function can handle a timestamp with *ANY* timezone, use
/// the [`TIMEZONE_WILDCARD`]. For example:
///
/// ```
/// # use arrow::datatypes::{DataType, TimeUnit};
/// # use datafusion_expr::{TIMEZONE_WILDCARD, TypeSignature};
/// let type_signature = TypeSignature::Exact(vec![
/// // A nanosecond precision timestamp with ANY timezone
/// // matches Timestamp(Nanosecond, Some("+0:00"))
/// // matches Timestamp(Nanosecond, Some("+5:00"))
/// // does not match Timestamp(Nanosecond, None)
/// DataType::Timestamp(TimeUnit::Nanosecond, Some(TIMEZONE_WILDCARD.into())),
/// ]);
/// ```
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub enum TypeSignature {
/// arbitrary number of arguments of an common type out of a list of valid types
// A function such as `concat` is `Variadic(vec![DataType::Utf8, DataType::LargeUtf8])`
/// arbitrary number of arguments of an common type out of a list of valid types.
///
/// # Examples
/// A function such as `concat` is `Variadic(vec![DataType::Utf8, DataType::LargeUtf8])`
Variadic(Vec<DataType>),
/// arbitrary number of arguments of an arbitrary but equal type
// A function such as `array` is `VariadicEqual`
// The first argument decides the type used for coercion
/// arbitrary number of arguments of an arbitrary but equal type.
/// DataFusion attempts to coerce all argument types to match the first argument's type
///
/// # Examples
/// A function such as `array` is `VariadicEqual`
VariadicEqual,
/// arbitrary number of arguments with arbitrary types
VariadicAny,
/// fixed number of arguments of an arbitrary but equal type out of a list of valid types
/// fixed number of arguments of an arbitrary but equal type out of a list of valid types.
///
/// # Examples
/// 1. A function of one argument of f64 is `Uniform(1, vec![DataType::Float64])`
Expand All @@ -58,7 +105,8 @@ pub enum TypeSignature {
Exact(Vec<DataType>),
/// fixed number of arguments of arbitrary types
Any(usize),
/// One of a list of signatures
/// Matches exactly one of a list of [`TypeSignature`]s. Coercion is attempted to match
/// the signatures in order, and stops after the first success, if any.
OneOf(Vec<TypeSignature>),
}

Expand Down Expand Up @@ -104,46 +152,48 @@ impl TypeSignature {
}
}

/// The signature of a function defines the supported argument types
/// and its volatility.
/// Defines the supported argument types ([`TypeSignature`]) and [`Volatility`] for a function.
///
/// DataFusion will automatically coerce (cast) argument types to one of the supported
/// function signatures, if possible.
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct Signature {
/// type_signature - The types that the function accepts. See [TypeSignature] for more information.
/// The data types that the function accepts. See [TypeSignature] for more information.
pub type_signature: TypeSignature,
/// volatility - The volatility of the function. See [Volatility] for more information.
/// The volatility of the function. See [Volatility] for more information.
pub volatility: Volatility,
}

impl Signature {
/// new - Creates a new Signature from any type signature and the volatility.
/// Creates a new Signature from a given type signature and volatility.
pub fn new(type_signature: TypeSignature, volatility: Volatility) -> Self {
Signature {
type_signature,
volatility,
}
}
/// variadic - Creates a variadic signature that represents an arbitrary number of arguments all from a type in common_types.
/// An arbitrary number of arguments with the same type, from those listed in `common_types`.
pub fn variadic(common_types: Vec<DataType>, volatility: Volatility) -> Self {
Self {
type_signature: TypeSignature::Variadic(common_types),
volatility,
}
}
/// variadic_equal - Creates a variadic signature that represents an arbitrary number of arguments of the same type.
/// An arbitrary number of arguments of the same type.
pub fn variadic_equal(volatility: Volatility) -> Self {
Self {
type_signature: TypeSignature::VariadicEqual,
volatility,
}
}
/// variadic_any - Creates a variadic signature that represents an arbitrary number of arguments of any type.
/// An arbitrary number of arguments of any type.
pub fn variadic_any(volatility: Volatility) -> Self {
Self {
type_signature: TypeSignature::VariadicAny,
volatility,
}
}
/// uniform - Creates a function with a fixed number of arguments of the same type, which must be from valid_types.
/// A fixed number of arguments of the same type, from those listed in `valid_types`.
pub fn uniform(
arg_count: usize,
valid_types: Vec<DataType>,
Expand All @@ -154,21 +204,21 @@ impl Signature {
volatility,
}
}
/// exact - Creates a signature which must match the types in exact_types in order.
/// Exactly matches the types in `exact_types`, in order.
pub fn exact(exact_types: Vec<DataType>, volatility: Volatility) -> Self {
Signature {
type_signature: TypeSignature::Exact(exact_types),
volatility,
}
}
/// any - Creates a signature which can a be made of any type but of a specified number
/// A specified number of arguments of any type
pub fn any(arg_count: usize, volatility: Volatility) -> Self {
Signature {
type_signature: TypeSignature::Any(arg_count),
volatility,
}
}
/// one_of Creates a signature which can match any of the [TypeSignature]s which are passed in.
/// Any one of a list of [TypeSignature]s.
pub fn one_of(type_signatures: Vec<TypeSignature>, volatility: Volatility) -> Self {
Signature {
type_signature: TypeSignature::OneOf(type_signatures),
Expand Down
13 changes: 2 additions & 11 deletions datafusion/expr/src/type_coercion/functions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,23 +15,14 @@
// specific language governing permissions and limitations
// under the License.

use crate::signature::TIMEZONE_WILDCARD;
use crate::{Signature, TypeSignature};
use arrow::{
compute::can_cast_types,
datatypes::{DataType, TimeUnit},
};
use datafusion_common::{plan_err, DataFusionError, Result};

/// Constant that is used as a placeholder for any valid timezone.
/// This is used where a function can accept a timestamp type with any
/// valid timezone, it exists to avoid the need to enumerate all possible
/// timezones.
///
/// Type coercion always ensures that functions will be executed using
/// timestamp arrays that have a valid time zone. Functions must never
/// return results with this timezone.
pub(crate) const TIMEZONE_PLACEHOLDER: &str = "+TZ";

/// Performs type coercion for function arguments.
///
/// Returns the data types to which each argument must be coerced to
Expand Down Expand Up @@ -232,7 +223,7 @@ fn coerced_from<'a>(
Utf8 | LargeUtf8 => Some(type_into.clone()),
Null if can_cast_types(type_from, type_into) => Some(type_into.clone()),

Timestamp(unit, Some(tz)) if tz.as_ref() == TIMEZONE_PLACEHOLDER => {
Timestamp(unit, Some(tz)) if tz.as_ref() == TIMEZONE_WILDCARD => {
match type_from {
Timestamp(_, Some(from_tz)) => {
Some(Timestamp(unit.clone(), Some(from_tz.clone())))
Expand Down