Skip to content

Commit

Permalink
Removes min/max/count comparison based on name in aggregate statistics
Browse files Browse the repository at this point in the history
  • Loading branch information
edmondop committed Sep 2, 2024
1 parent ac74cd3 commit b9262ec
Show file tree
Hide file tree
Showing 4 changed files with 42 additions and 22 deletions.
26 changes: 26 additions & 0 deletions datafusion/expr/src/udaf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -262,6 +262,19 @@ impl AggregateUDF {
self.inner.is_descending()
}

/// Returns true if the function is min. Used by the optimizer
pub fn is_min(&self) -> bool {
self.inner.is_min()
}

/// Returns true if the function is max. Used by the optimizer
pub fn is_max(&self) -> bool {
self.inner.is_max()
}
/// Returns true if the function is count. Used by the optimizer
pub fn is_count(&self) -> bool {
self.inner.is_count()
}
/// See [`AggregateUDFImpl::default_value`] for more details.
pub fn default_value(&self, data_type: &DataType) -> Result<ScalarValue> {
self.inner.default_value(data_type)
Expand Down Expand Up @@ -575,6 +588,19 @@ pub trait AggregateUDFImpl: Debug + Send + Sync {
None
}

// Returns true if the function is min. Used by the optimizer
fn is_min(&self) -> bool {
false
}
// Returns true if the function is max. Used by the optimizer
fn is_max(&self) -> bool {
false
}
// Returns true if the function is count. Used by the optimizer
fn is_count(&self) -> bool {
false
}

/// Returns default value of the function given the input is all `null`.
///
/// Most of the aggregate function return Null if input is Null,
Expand Down
4 changes: 4 additions & 0 deletions datafusion/functions-aggregate/src/count.rs
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,10 @@ impl AggregateUDFImpl for Count {
fn default_value(&self, _data_type: &DataType) -> Result<ScalarValue> {
Ok(ScalarValue::Int64(Some(0)))
}

fn is_count(&self) -> bool {
true
}
}

#[derive(Debug)]
Expand Down
8 changes: 8 additions & 0 deletions datafusion/functions-aggregate/src/min_max.rs
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,10 @@ impl AggregateUDFImpl for Max {
fn is_descending(&self) -> Option<bool> {
Some(true)
}

fn is_max(&self) -> bool {
true
}
fn order_sensitivity(&self) -> datafusion_expr::utils::AggregateOrderSensitivity {
datafusion_expr::utils::AggregateOrderSensitivity::Insensitive
}
Expand Down Expand Up @@ -1052,6 +1056,10 @@ impl AggregateUDFImpl for Min {
Some(false)
}

fn is_min(&self) -> bool {
true
}

fn order_sensitivity(&self) -> datafusion_expr::utils::AggregateOrderSensitivity {
datafusion_expr::utils::AggregateOrderSensitivity::Insensitive
}
Expand Down
26 changes: 4 additions & 22 deletions datafusion/physical-optimizer/src/aggregate_statistics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ fn take_optimizable_min(
match *num_rows {
0 => {
// MIN/MAX with 0 rows is always null
if is_min(agg_expr) {
if agg_expr.fun().is_min() {
if let Ok(min_data_type) =
ScalarValue::try_from(agg_expr.field().data_type())
{
Expand All @@ -191,7 +191,7 @@ fn take_optimizable_min(
}
value if value > 0 => {
let col_stats = &stats.column_statistics;
if is_min(agg_expr) {
if agg_expr.fun().is_min() {
let exprs = agg_expr.expressions();
if exprs.len() == 1 {
// TODO optimize with exprs other than Column
Expand Down Expand Up @@ -227,7 +227,7 @@ fn take_optimizable_max(
match *num_rows {
0 => {
// MIN/MAX with 0 rows is always null
if is_max(agg_expr) {
if agg_expr.fun().is_max() {
if let Ok(max_data_type) =
ScalarValue::try_from(agg_expr.field().data_type())
{
Expand All @@ -237,7 +237,7 @@ fn take_optimizable_max(
}
value if value > 0 => {
let col_stats = &stats.column_statistics;
if is_max(agg_expr) {
if agg_expr.fun().is_max() {
let exprs = agg_expr.expressions();
if exprs.len() == 1 {
// TODO optimize with exprs other than Column
Expand Down Expand Up @@ -273,22 +273,4 @@ fn is_non_distinct_count(agg_expr: &AggregateFunctionExpr) -> bool {
false
}

// TODO: Move this check into AggregateUDFImpl
// https://github.com/apache/datafusion/issues/11153
fn is_min(agg_expr: &AggregateFunctionExpr) -> bool {
if agg_expr.fun().name().to_lowercase() == "min" {
return true;
}
false
}

// TODO: Move this check into AggregateUDFImpl
// https://github.com/apache/datafusion/issues/11153
fn is_max(agg_expr: &AggregateFunctionExpr) -> bool {
if agg_expr.fun().name().to_lowercase() == "max" {
return true;
}
false
}

// See tests in datafusion/core/tests/physical_optimizer

0 comments on commit b9262ec

Please sign in to comment.