diff --git a/.github/actions/setup-windows-builder/action.yaml b/.github/actions/setup-windows-builder/action.yaml index a26a34a3db93..5e937358c7d7 100644 --- a/.github/actions/setup-windows-builder/action.yaml +++ b/.github/actions/setup-windows-builder/action.yaml @@ -38,7 +38,7 @@ runs: - name: Setup Rust toolchain shell: bash run: | - # Avoid self update to avoid CI failures: https://github.com/apache/arrow-datafusion/issues/9653 + # Avoid self update to avoid CI failures: https://github.com/apache/datafusion/issues/9653 rustup toolchain install stable --no-self-update rustup default stable rustup component add rustfmt diff --git a/.github/workflows/dev_pr.yml b/.github/workflows/dev_pr.yml index 77b257743331..11c14c5c2fee 100644 --- a/.github/workflows/dev_pr.yml +++ b/.github/workflows/dev_pr.yml @@ -34,7 +34,7 @@ jobs: runs-on: ubuntu-latest # only run for users whose permissions allow them to update PRs # otherwise labeler is failing: - # https://github.com/apache/arrow-datafusion/issues/3743 + # https://github.com/apache/datafusion/issues/3743 permissions: contents: read pull-requests: write diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index f05c5bf81d28..f27b5d12f47f 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -425,7 +425,7 @@ jobs: ci/scripts/rust_fmt.sh # Coverage job disabled due to - # https://github.com/apache/arrow-datafusion/issues/3678 + # https://github.com/apache/datafusion/issues/3678 # coverage: # name: coverage diff --git a/Cargo.toml b/Cargo.toml index 3002a5760fbc..2941be20e85d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -46,10 +46,10 @@ resolver = "2" [workspace.package] authors = ["Apache Arrow "] edition = "2021" -homepage = "https://github.com/apache/arrow-datafusion" +homepage = "https://github.com/apache/datafusion" license = "Apache-2.0" readme = "README.md" -repository = "https://github.com/apache/arrow-datafusion" +repository = "https://github.com/apache/datafusion" rust-version = "1.73" version = "37.1.0" diff --git a/README.md b/README.md index a85d42e59d13..32afc04f5f7f 100644 --- a/README.md +++ b/README.md @@ -27,14 +27,14 @@ [crates-badge]: https://img.shields.io/crates/v/datafusion.svg [crates-url]: https://crates.io/crates/datafusion [license-badge]: https://img.shields.io/badge/license-Apache%20v2-blue.svg -[license-url]: https://github.com/apache/arrow-datafusion/blob/main/LICENSE.txt -[actions-badge]: https://github.com/apache/arrow-datafusion/actions/workflows/rust.yml/badge.svg -[actions-url]: https://github.com/apache/arrow-datafusion/actions?query=branch%3Amain +[license-url]: https://github.com/apache/datafusion/blob/main/LICENSE.txt +[actions-badge]: https://github.com/apache/datafusion/actions/workflows/rust.yml/badge.svg +[actions-url]: https://github.com/apache/datafusion/actions?query=branch%3Amain [discord-badge]: https://img.shields.io/discord/885562378132000778.svg?logo=discord&style=flat-square [discord-url]: https://discord.com/invite/Qw5gKqHxUM -[Website](https://github.com/apache/arrow-datafusion) | -[Guides](https://github.com/apache/arrow-datafusion/tree/main/docs) | +[Website](https://github.com/apache/datafusion) | +[Guides](https://github.com/apache/datafusion/tree/main/docs) | [API Docs](https://docs.rs/datafusion/latest/datafusion/) | [Chat](https://discord.com/channels/885562378132000778/885562378132000781) @@ -42,7 +42,7 @@ Apache DataFusion is a very fast, extensible query engine for building high-quality data-centric systems in [Rust](http://rustlang.org), using the [Apache Arrow](https://arrow.apache.org) -in-memory format. [Python Bindings](https://github.com/apache/arrow-datafusion-python) are also available. DataFusion offers SQL and Dataframe APIs, excellent [performance](https://benchmark.clickhouse.com/), built-in support for CSV, Parquet, JSON, and Avro, extensive customization, and a great community. +in-memory format. [Python Bindings](https://github.com/apache/datafusion-python) are also available. DataFusion offers SQL and Dataframe APIs, excellent [performance](https://benchmark.clickhouse.com/), built-in support for CSV, Parquet, JSON, and Avro, extensive customization, and a great community. Here are links to some important information @@ -51,7 +51,7 @@ Here are links to some important information - [Rust Getting Started](https://arrow.apache.org/datafusion/user-guide/example-usage.html) - [Rust DataFrame API](https://arrow.apache.org/datafusion/user-guide/dataframe.html) - [Rust API docs](https://docs.rs/datafusion/latest/datafusion) -- [Rust Examples](https://github.com/apache/arrow-datafusion/tree/master/datafusion-examples) +- [Rust Examples](https://github.com/apache/datafusion/tree/master/datafusion-examples) - [Python DataFrame API](https://arrow.apache.org/datafusion-python/) - [Architecture](https://docs.rs/datafusion/latest/datafusion/index.html#architecture) @@ -102,4 +102,4 @@ each stable Rust version for 6 months after it is [released](https://github.com/rust-lang/rust/blob/master/RELEASES.md). This generally translates to support for the most recent 3 to 4 stable Rust versions. -We enforce this policy using a [MSRV CI Check](https://github.com/search?q=repo%3Aapache%2Farrow-datafusion+rust-version+language%3ATOML+path%3A%2F%5ECargo.toml%2F&type=code) +We enforce this policy using a [MSRV CI Check](https://github.com/search?q=repo%3Aapache%2Fdatafusion+rust-version+language%3ATOML+path%3A%2F%5ECargo.toml%2F&type=code) diff --git a/benchmarks/src/bin/tpch.rs b/benchmarks/src/bin/tpch.rs index 95480935700d..fc0f4ca0613c 100644 --- a/benchmarks/src/bin/tpch.rs +++ b/benchmarks/src/bin/tpch.rs @@ -47,7 +47,7 @@ enum TpchOpt { /// use `dbbench` instead. /// /// Note: this is kept to be backwards compatible with the benchmark names prior to -/// +/// #[tokio::main] async fn main() -> Result<()> { env_logger::init(); diff --git a/clippy.toml b/clippy.toml index 908f51664542..114e3bfceb27 100644 --- a/clippy.toml +++ b/clippy.toml @@ -1,6 +1,6 @@ disallowed-methods = [ - { path = "tokio::task::spawn", reason = "To provide cancel-safety, use `SpawnedTask::spawn` instead (https://github.com/apache/arrow-datafusion/issues/6513)" }, - { path = "tokio::task::spawn_blocking", reason = "To provide cancel-safety, use `SpawnedTask::spawn_blocking` instead (https://github.com/apache/arrow-datafusion/issues/6513)" }, + { path = "tokio::task::spawn", reason = "To provide cancel-safety, use `SpawnedTask::spawn` instead (https://github.com/apache/datafusion/issues/6513)" }, + { path = "tokio::task::spawn_blocking", reason = "To provide cancel-safety, use `SpawnedTask::spawn_blocking` instead (https://github.com/apache/datafusion/issues/6513)" }, ] disallowed-types = [ diff --git a/datafusion-cli/Cargo.toml b/datafusion-cli/Cargo.toml index dd0f8248df1f..c6019bc5970c 100644 --- a/datafusion-cli/Cargo.toml +++ b/datafusion-cli/Cargo.toml @@ -23,8 +23,8 @@ authors = ["Apache Arrow "] edition = "2021" keywords = ["arrow", "datafusion", "query", "sql"] license = "Apache-2.0" -homepage = "https://github.com/apache/arrow-datafusion" -repository = "https://github.com/apache/arrow-datafusion" +homepage = "https://github.com/apache/datafusion" +repository = "https://github.com/apache/datafusion" # Specify MSRV here as `cargo msrv` doesn't support workspace version rust-version = "1.73" readme = "README.md" diff --git a/datafusion-cli/README.md b/datafusion-cli/README.md index 2aae6bfc3052..a3fea22ddd0a 100644 --- a/datafusion-cli/README.md +++ b/datafusion-cli/README.md @@ -43,4 +43,4 @@ checked in `Cargo.lock` file to ensure reproducible builds. However, the `datafusion` and sub crates are intended for use as libraries and thus do not have a `Cargo.lock` file checked in. -[`datafusion cargo.toml`]: https://github.com/apache/arrow-datafusion/blob/main/Cargo.toml +[`datafusion cargo.toml`]: https://github.com/apache/datafusion/blob/main/Cargo.toml diff --git a/datafusion-examples/README.md b/datafusion-examples/README.md index 7ca90463cf8c..5c596d1cda91 100644 --- a/datafusion-examples/README.md +++ b/datafusion-examples/README.md @@ -30,7 +30,7 @@ Run `git submodule update --init` to init test files. To run the examples, use the `cargo run` command, such as: ```bash -git clone https://github.com/apache/arrow-datafusion +git clone https://github.com/apache/datafusion cd arrow-datafusion # Download test data git submodule update --init diff --git a/datafusion/core/benches/sql_planner.rs b/datafusion/core/benches/sql_planner.rs index 3946b716afef..c0e02d388af4 100644 --- a/datafusion/core/benches/sql_planner.rs +++ b/datafusion/core/benches/sql_planner.rs @@ -93,13 +93,13 @@ fn criterion_benchmark(c: &mut Criterion) { let ctx = create_context(); // Test simplest - // https://github.com/apache/arrow-datafusion/issues/5157 + // https://github.com/apache/datafusion/issues/5157 c.bench_function("logical_select_one_from_700", |b| { b.iter(|| logical_plan(&ctx, "SELECT c1 FROM t700")) }); // Test simplest - // https://github.com/apache/arrow-datafusion/issues/5157 + // https://github.com/apache/datafusion/issues/5157 c.bench_function("physical_select_one_from_700", |b| { b.iter(|| physical_plan(&ctx, "SELECT c1 FROM t700")) }); diff --git a/datafusion/core/src/catalog/mod.rs b/datafusion/core/src/catalog/mod.rs index d39fad8a5643..4a9c5170c2f5 100644 --- a/datafusion/core/src/catalog/mod.rs +++ b/datafusion/core/src/catalog/mod.rs @@ -176,8 +176,8 @@ impl CatalogProviderList for MemoryCatalogProviderList { /// read from Delta Lake tables /// /// [`datafusion-cli`]: https://arrow.apache.org/datafusion/user-guide/cli.html -/// [`DynamicFileCatalogProvider`]: https://github.com/apache/arrow-datafusion/blob/31b9b48b08592b7d293f46e75707aad7dadd7cbc/datafusion-cli/src/catalog.rs#L75 -/// [`catalog.rs`]: https://github.com/apache/arrow-datafusion/blob/main/datafusion-examples/examples/catalog.rs +/// [`DynamicFileCatalogProvider`]: https://github.com/apache/datafusion/blob/31b9b48b08592b7d293f46e75707aad7dadd7cbc/datafusion-cli/src/catalog.rs#L75 +/// [`catalog.rs`]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/catalog.rs /// [delta-rs]: https://github.com/delta-io/delta-rs /// [`UnityCatalogProvider`]: https://github.com/delta-io/delta-rs/blob/951436ecec476ce65b5ed3b58b50fb0846ca7b91/crates/deltalake-core/src/data_catalog/unity/datafusion.rs#L111-L123 /// diff --git a/datafusion/core/src/dataframe/mod.rs b/datafusion/core/src/dataframe/mod.rs index 90ae05c6805f..abf09772e5bb 100644 --- a/datafusion/core/src/dataframe/mod.rs +++ b/datafusion/core/src/dataframe/mod.rs @@ -2423,7 +2423,7 @@ mod tests { Ok(()) } - // Test issue: https://github.com/apache/arrow-datafusion/issues/7790 + // Test issue: https://github.com/apache/datafusion/issues/7790 // The join operation outputs two identical column names, but they belong to different relations. #[tokio::test] async fn with_column_join_same_columns() -> Result<()> { @@ -2503,7 +2503,7 @@ mod tests { } // Table 't1' self join - // Supplementary test of issue: https://github.com/apache/arrow-datafusion/issues/7790 + // Supplementary test of issue: https://github.com/apache/datafusion/issues/7790 #[tokio::test] async fn with_column_self_join() -> Result<()> { let df = test_table().await?.select_columns(&["c1"])?; diff --git a/datafusion/core/src/datasource/cte_worktable.rs b/datafusion/core/src/datasource/cte_worktable.rs index f8fd94d4d3fd..afc4536f068e 100644 --- a/datafusion/core/src/datasource/cte_worktable.rs +++ b/datafusion/core/src/datasource/cte_worktable.rs @@ -38,7 +38,7 @@ use crate::execution::context::SessionState; /// See here for more details: www.postgresql.org/docs/11/queries-with.html#id-1.5.6.12.5.4 pub struct CteWorkTable { /// The name of the CTE work table - // WIP, see https://github.com/apache/arrow-datafusion/issues/462 + // WIP, see https://github.com/apache/datafusion/issues/462 #[allow(dead_code)] name: String, /// This schema must be shared across both the static and recursive terms of a recursive query diff --git a/datafusion/core/src/datasource/file_format/parquet.rs b/datafusion/core/src/datasource/file_format/parquet.rs index 33092e9f9584..440624eb6f8a 100644 --- a/datafusion/core/src/datasource/file_format/parquet.rs +++ b/datafusion/core/src/datasource/file_format/parquet.rs @@ -212,7 +212,7 @@ impl FileFormat for ParquetFormat { // object stores (like local file systems) the order returned from list // is not deterministic. Thus, to ensure deterministic schema inference // sort the files first. - // https://github.com/apache/arrow-datafusion/pull/6629 + // https://github.com/apache/datafusion/pull/6629 schemas.sort_by(|(location1, _), (location2, _)| location1.cmp(location2)); let schemas = schemas @@ -1040,7 +1040,7 @@ pub(crate) mod test_util { multi_page: bool, ) -> Result<(Vec, Vec)> { // we need the tmp files to be sorted as some tests rely on the how the returning files are ordered - // https://github.com/apache/arrow-datafusion/pull/6629 + // https://github.com/apache/datafusion/pull/6629 let tmp_files = { let mut tmp_files: Vec<_> = (0..batches.len()) .map(|_| NamedTempFile::new().expect("creating temp file")) diff --git a/datafusion/core/src/datasource/file_format/write/demux.rs b/datafusion/core/src/datasource/file_format/write/demux.rs index 396da96332f6..d82c2471c596 100644 --- a/datafusion/core/src/datasource/file_format/write/demux.rs +++ b/datafusion/core/src/datasource/file_format/write/demux.rs @@ -57,7 +57,7 @@ type DemuxedStreamReceiver = UnboundedReceiver<(Path, RecordBatchReceiver)>; /// the demux task for errors and abort accordingly. The single_file_ouput parameter /// overrides all other settings to force only a single file to be written. /// partition_by parameter will additionally split the input based on the unique -/// values of a specific column ``` +/// values of a specific column ``` /// ┌───────────┐ ┌────────────┐ ┌─────────────┐ /// ┌──────▶ │ batch 1 ├────▶...──────▶│ Batch a │ │ Output File1│ /// │ └───────────┘ └────────────┘ └─────────────┘ diff --git a/datafusion/core/src/datasource/listing/helpers.rs b/datafusion/core/src/datasource/listing/helpers.rs index f97d465c442b..b415ce9d913e 100644 --- a/datafusion/core/src/datasource/listing/helpers.rs +++ b/datafusion/core/src/datasource/listing/helpers.rs @@ -90,16 +90,6 @@ pub fn expr_applicable_for_cols(col_names: &[String], expr: &Expr) -> bool { Expr::ScalarFunction(scalar_function) => { match &scalar_function.func_def { - ScalarFunctionDefinition::BuiltIn(fun) => { - match fun.volatility() { - Volatility::Immutable => Ok(TreeNodeRecursion::Continue), - // TODO: Stable functions could be `applicable`, but that would require access to the context - Volatility::Stable | Volatility::Volatile => { - is_applicable = false; - Ok(TreeNodeRecursion::Stop) - } - } - } ScalarFunctionDefinition::UDF(fun) => { match fun.signature().volatility { Volatility::Immutable => Ok(TreeNodeRecursion::Continue), diff --git a/datafusion/core/src/datasource/listing/table.rs b/datafusion/core/src/datasource/listing/table.rs index 6625abd650d7..ab40160c7b54 100644 --- a/datafusion/core/src/datasource/listing/table.rs +++ b/datafusion/core/src/datasource/listing/table.rs @@ -244,7 +244,7 @@ pub struct ListingOptions { /// the future be automatically determined, for example using /// parquet metadata. /// - /// See + /// See /// NOTE: This attribute stores all equivalent orderings (the outer `Vec`) /// where each ordering consists of an individual lexicographic /// ordering (encapsulated by a `Vec`). If there aren't diff --git a/datafusion/core/src/datasource/listing/url.rs b/datafusion/core/src/datasource/listing/url.rs index eb95dc7b1d24..82acb7a3b644 100644 --- a/datafusion/core/src/datasource/listing/url.rs +++ b/datafusion/core/src/datasource/listing/url.rs @@ -457,7 +457,7 @@ mod tests { test("/a/b*.txt", Some(("/a/", "b*.txt"))); test("/a/b/**/c*.txt", Some(("/a/b/", "**/c*.txt"))); - // https://github.com/apache/arrow-datafusion/issues/2465 + // https://github.com/apache/datafusion/issues/2465 test( "/a/b/c//alltypes_plain*.parquet", Some(("/a/b/c//", "alltypes_plain*.parquet")), diff --git a/datafusion/core/src/datasource/physical_plan/csv.rs b/datafusion/core/src/datasource/physical_plan/csv.rs index 831ef4520567..0526df8e21f6 100644 --- a/datafusion/core/src/datasource/physical_plan/csv.rs +++ b/datafusion/core/src/datasource/physical_plan/csv.rs @@ -769,7 +769,7 @@ mod tests { assert_eq!(14, csv.base_config.file_schema.fields().len()); assert_eq!(14, csv.schema().fields().len()); - // errors due to https://github.com/apache/arrow-datafusion/issues/4918 + // errors due to https://github.com/apache/datafusion/issues/4918 let mut it = csv.execute(0, task_ctx)?; let err = it.next().await.unwrap().unwrap_err().strip_backtrace(); assert_eq!( diff --git a/datafusion/core/src/datasource/physical_plan/parquet/row_groups.rs b/datafusion/core/src/datasource/physical_plan/parquet/row_groups.rs index a4dfd9b96870..4305066ceffe 100644 --- a/datafusion/core/src/datasource/physical_plan/parquet/row_groups.rs +++ b/datafusion/core/src/datasource/physical_plan/parquet/row_groups.rs @@ -49,7 +49,7 @@ use super::ParquetFileMetrics; /// did not filter out that row group. /// /// Note: This method currently ignores ColumnOrder -/// +/// pub(crate) fn prune_row_groups_by_statistics( arrow_schema: &Schema, parquet_schema: &SchemaDescriptor, @@ -63,7 +63,7 @@ pub(crate) fn prune_row_groups_by_statistics( if let Some(range) = &range { // figure out where the first dictionary page (or first data page are) // note don't use the location of metadata - // + // let col = metadata.column(0); let offset = col .dictionary_page_offset() diff --git a/datafusion/core/src/datasource/physical_plan/parquet/statistics.rs b/datafusion/core/src/datasource/physical_plan/parquet/statistics.rs index aac5aff80f16..8972c261b14a 100644 --- a/datafusion/core/src/datasource/physical_plan/parquet/statistics.rs +++ b/datafusion/core/src/datasource/physical_plan/parquet/statistics.rs @@ -360,7 +360,7 @@ mod test { #[should_panic( expected = "Inconsistent types in ScalarValue::iter_to_array. Expected Int64, got TimestampNanosecond(NULL, None)" )] - // Due to https://github.com/apache/arrow-datafusion/issues/8295 + // Due to https://github.com/apache/datafusion/issues/8295 fn roundtrip_timestamp() { Test { input: timestamp_array([ @@ -470,7 +470,7 @@ mod test { (None, None), ]), }; - // Due to https://github.com/apache/arrow-datafusion/issues/8334, + // Due to https://github.com/apache/datafusion/issues/8334, // statistics for struct arrays are not supported test.expected_min = new_null_array(test.input.data_type(), test.expected_min.len()); @@ -483,7 +483,7 @@ mod test { #[should_panic( expected = "Inconsistent types in ScalarValue::iter_to_array. Expected Utf8, got Binary(NULL)" )] - // Due to https://github.com/apache/arrow-datafusion/issues/8295 + // Due to https://github.com/apache/datafusion/issues/8295 fn roundtrip_binary() { Test { input: Arc::new(BinaryArray::from_opt_vec(vec![ diff --git a/datafusion/core/src/datasource/view.rs b/datafusion/core/src/datasource/view.rs index 31e812332c94..3f024a6b4cb7 100644 --- a/datafusion/core/src/datasource/view.rs +++ b/datafusion/core/src/datasource/view.rs @@ -158,7 +158,7 @@ mod tests { #[tokio::test] async fn issue_3242() -> Result<()> { - // regression test for https://github.com/apache/arrow-datafusion/pull/3242 + // regression test for https://github.com/apache/datafusion/pull/3242 let session_ctx = SessionContext::new_with_config( SessionConfig::new().with_information_schema(true), ); diff --git a/datafusion/core/src/execution/context/avro.rs b/datafusion/core/src/execution/context/avro.rs index 1eca3b133757..2703529264e0 100644 --- a/datafusion/core/src/execution/context/avro.rs +++ b/datafusion/core/src/execution/context/avro.rs @@ -65,7 +65,7 @@ mod tests { use async_trait::async_trait; // Test for compilation error when calling read_* functions from an #[async_trait] function. - // See https://github.com/apache/arrow-datafusion/issues/1154 + // See https://github.com/apache/datafusion/issues/1154 #[async_trait] trait CallReadTrait { async fn call_read_avro(&self) -> DataFrame; diff --git a/datafusion/core/src/execution/context/csv.rs b/datafusion/core/src/execution/context/csv.rs index f59d77664645..504ebf6d77cf 100644 --- a/datafusion/core/src/execution/context/csv.rs +++ b/datafusion/core/src/execution/context/csv.rs @@ -127,7 +127,7 @@ mod tests { } // Test for compilation error when calling read_* functions from an #[async_trait] function. - // See https://github.com/apache/arrow-datafusion/issues/1154 + // See https://github.com/apache/datafusion/issues/1154 #[async_trait] trait CallReadTrait { async fn call_read_csv(&self) -> DataFrame; diff --git a/datafusion/core/src/execution/context/parquet.rs b/datafusion/core/src/execution/context/parquet.rs index 528bb0fa05af..f7ab15d95baa 100644 --- a/datafusion/core/src/execution/context/parquet.rs +++ b/datafusion/core/src/execution/context/parquet.rs @@ -333,7 +333,7 @@ mod tests { } // Test for compilation error when calling read_* functions from an #[async_trait] function. - // See https://github.com/apache/arrow-datafusion/issues/1154 + // See https://github.com/apache/datafusion/issues/1154 #[async_trait] trait CallReadTrait { async fn call_read_parquet(&self) -> DataFrame; diff --git a/datafusion/core/src/lib.rs b/datafusion/core/src/lib.rs index 4794cd89420f..21ca6d70eb58 100644 --- a/datafusion/core/src/lib.rs +++ b/datafusion/core/src/lib.rs @@ -128,7 +128,7 @@ //! //! There are many additional annotated examples of using DataFusion in the [datafusion-examples] directory. //! -//! [datafusion-examples]: https://github.com/apache/arrow-datafusion/tree/main/datafusion-examples +//! [datafusion-examples]: https://github.com/apache/datafusion/tree/main/datafusion-examples //! //! ## Customization and Extension //! @@ -170,7 +170,7 @@ //! You can find a formal description of DataFusion's architecture in our //! [SIGMOD 2024 Paper]. //! -//! [SIGMOD 2024 Paper]: https://github.com/apache/arrow-datafusion/files/14789704/DataFusion_Query_Engine___SIGMOD_2024-FINAL.pdf +//! [SIGMOD 2024 Paper]: https://github.com/apache/datafusion/files/14789704/DataFusion_Query_Engine___SIGMOD_2024-FINAL.pdf //! //! ## Overview Presentations //! @@ -306,7 +306,7 @@ //! [`TreeNode`]: datafusion_common::tree_node::TreeNode //! [`tree_node module`]: datafusion_expr::logical_plan::tree_node //! [`ExprSimplifier`]: crate::optimizer::simplify_expressions::ExprSimplifier -//! [`expr_api`.rs]: https://github.com/apache/arrow-datafusion/blob/main/datafusion-examples/examples/expr_api.rs +//! [`expr_api`.rs]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/expr_api.rs //! //! ### Physical Plans //! @@ -379,7 +379,7 @@ //! [`RepartitionExec`]: https://docs.rs/datafusion/latest/datafusion/physical_plan/repartition/struct.RepartitionExec.html //! [Volcano style]: https://w6113.github.io/files/papers/volcanoparallelism-89.pdf //! [Morsel-Driven Parallelism]: https://db.in.tum.de/~leis/papers/morsels.pdf -//! [DataFusion paper submitted SIGMOD]: https://github.com/apache/arrow-datafusion/files/13874720/DataFusion_Query_Engine___SIGMOD_2024.pdf +//! [DataFusion paper submitted SIGMOD]: https://github.com/apache/datafusion/files/13874720/DataFusion_Query_Engine___SIGMOD_2024.pdf //! [implementors of `ExecutionPlan`]: https://docs.rs/datafusion/latest/datafusion/physical_plan/trait.ExecutionPlan.html#implementors //! //! ## Thread Scheduling @@ -488,7 +488,7 @@ pub use parquet; // re-export DataFusion sub-crates at the top level. Use `pub use *` // so that the contents of the subcrates appears in rustdocs -// for details, see https://github.com/apache/arrow-datafusion/issues/6648 +// for details, see https://github.com/apache/datafusion/issues/6648 /// re-export of [`datafusion_common`] crate pub mod common { diff --git a/datafusion/core/src/physical_optimizer/coalesce_batches.rs b/datafusion/core/src/physical_optimizer/coalesce_batches.rs index 7c0082037da0..1e234eaae137 100644 --- a/datafusion/core/src/physical_optimizer/coalesce_batches.rs +++ b/datafusion/core/src/physical_optimizer/coalesce_batches.rs @@ -59,7 +59,7 @@ impl PhysicalOptimizerRule for CoalesceBatches { // The goal here is to detect operators that could produce small batches and only // wrap those ones with a CoalesceBatchesExec operator. An alternate approach here // would be to build the coalescing logic directly into the operators - // See https://github.com/apache/arrow-datafusion/issues/139 + // See https://github.com/apache/datafusion/issues/139 let wrap_in_coalesce = plan_any.downcast_ref::().is_some() || plan_any.downcast_ref::().is_some() // Don't need to add CoalesceBatchesExec after a round robin RepartitionExec diff --git a/datafusion/core/src/physical_optimizer/enforce_distribution.rs b/datafusion/core/src/physical_optimizer/enforce_distribution.rs index c9c54a46bd1c..3cddf73c8eb1 100644 --- a/datafusion/core/src/physical_optimizer/enforce_distribution.rs +++ b/datafusion/core/src/physical_optimizer/enforce_distribution.rs @@ -3029,7 +3029,7 @@ pub(crate) mod tests { #[test] fn merge_does_not_need_sort() -> Result<()> { - // see https://github.com/apache/arrow-datafusion/issues/4331 + // see https://github.com/apache/datafusion/issues/4331 let schema = schema(); let sort_key = vec![PhysicalSortExpr { expr: col("a", &schema).unwrap(), @@ -3647,7 +3647,7 @@ pub(crate) mod tests { // The groups must have only contiguous ranges of rows from the same file // if any group has rows from multiple files, the data is no longer sorted destroyed - // https://github.com/apache/arrow-datafusion/issues/8451 + // https://github.com/apache/datafusion/issues/8451 let expected = [ "SortRequiredExec: [a@0 ASC]", "FilterExec: c@2 = 0", diff --git a/datafusion/core/src/physical_optimizer/join_selection.rs b/datafusion/core/src/physical_optimizer/join_selection.rs index f7512cb6d075..a8b308d3de84 100644 --- a/datafusion/core/src/physical_optimizer/join_selection.rs +++ b/datafusion/core/src/physical_optimizer/join_selection.rs @@ -30,8 +30,8 @@ use crate::error::Result; use crate::physical_optimizer::PhysicalOptimizerRule; use crate::physical_plan::joins::utils::{ColumnIndex, JoinFilter}; use crate::physical_plan::joins::{ - CrossJoinExec, HashJoinExec, PartitionMode, StreamJoinPartitionMode, - SymmetricHashJoinExec, + CrossJoinExec, HashJoinExec, NestedLoopJoinExec, PartitionMode, + StreamJoinPartitionMode, SymmetricHashJoinExec, }; use crate::physical_plan::projection::ProjectionExec; use crate::physical_plan::{ExecutionPlan, ExecutionPlanProperties}; @@ -199,6 +199,38 @@ fn swap_hash_join( } } +/// Swaps inputs of `NestedLoopJoinExec` and wraps it into `ProjectionExec` is required +fn swap_nl_join(join: &NestedLoopJoinExec) -> Result> { + let new_filter = swap_join_filter(join.filter()); + let new_join_type = &swap_join_type(*join.join_type()); + + let new_join = NestedLoopJoinExec::try_new( + Arc::clone(join.right()), + Arc::clone(join.left()), + new_filter, + new_join_type, + )?; + + // For Semi/Anti joins, swap result will produce same output schema, + // no need to wrap them into additional projection + let plan: Arc = if matches!( + join.join_type(), + JoinType::LeftSemi + | JoinType::RightSemi + | JoinType::LeftAnti + | JoinType::RightAnti + ) { + Arc::new(new_join) + } else { + let projection = + swap_reverting_projection(&join.left().schema(), &join.right().schema()); + + Arc::new(ProjectionExec::try_new(projection, Arc::new(new_join))?) + }; + + Ok(plan) +} + /// When the order of the join is changed by the optimizer, the columns in /// the output should not be impacted. This function creates the expressions /// that will allow to swap back the values from the original left as the first @@ -438,6 +470,14 @@ fn statistical_join_selection_subrule( } else { None } + } else if let Some(nl_join) = plan.as_any().downcast_ref::() { + let left = nl_join.left(); + let right = nl_join.right(); + if should_swap_join_order(&**left, &**right)? { + swap_nl_join(nl_join).map(Some)? + } else { + None + } } else { None }; @@ -674,9 +714,12 @@ mod tests_statistical { use arrow::datatypes::{DataType, Field, Schema}; use datafusion_common::{stats::Precision, JoinType, ScalarValue}; - use datafusion_physical_expr::expressions::Column; + use datafusion_expr::Operator; + use datafusion_physical_expr::expressions::{BinaryExpr, Column}; use datafusion_physical_expr::{PhysicalExpr, PhysicalExprRef}; + use rstest::rstest; + /// Return statistcs for empty table fn empty_statistics() -> Statistics { Statistics { @@ -762,6 +805,35 @@ mod tests_statistical { }] } + /// Create join filter for NLJoinExec with expression `big_col > small_col` + /// where both columns are 0-indexed and come from left and right inputs respectively + fn nl_join_filter() -> Option { + let column_indices = vec![ + ColumnIndex { + index: 0, + side: JoinSide::Left, + }, + ColumnIndex { + index: 0, + side: JoinSide::Right, + }, + ]; + let intermediate_schema = Schema::new(vec![ + Field::new("big_col", DataType::Int32, false), + Field::new("small_col", DataType::Int32, false), + ]); + let expression = Arc::new(BinaryExpr::new( + Arc::new(Column::new_with_schema("big_col", &intermediate_schema).unwrap()), + Operator::Gt, + Arc::new(Column::new_with_schema("small_col", &intermediate_schema).unwrap()), + )) as _; + Some(JoinFilter::new( + expression, + column_indices, + intermediate_schema, + )) + } + /// Returns three plans with statistics of (min, max, distinct_count) /// * big 100K rows @ (0, 50k, 50k) /// * medium 10K rows @ (1k, 5k, 1k) @@ -1114,6 +1186,137 @@ mod tests_statistical { crosscheck_plans(join).unwrap(); } + #[rstest( + join_type, + case::inner(JoinType::Inner), + case::left(JoinType::Left), + case::right(JoinType::Right), + case::full(JoinType::Full) + )] + #[tokio::test] + async fn test_nl_join_with_swap(join_type: JoinType) { + let (big, small) = create_big_and_small(); + + let join = Arc::new( + NestedLoopJoinExec::try_new( + Arc::clone(&big), + Arc::clone(&small), + nl_join_filter(), + &join_type, + ) + .unwrap(), + ); + + let optimized_join = JoinSelection::new() + .optimize(join.clone(), &ConfigOptions::new()) + .unwrap(); + + let swapping_projection = optimized_join + .as_any() + .downcast_ref::() + .expect("A proj is required to swap columns back to their original order"); + + assert_eq!(swapping_projection.expr().len(), 2); + let (col, name) = &swapping_projection.expr()[0]; + assert_eq!(name, "big_col"); + assert_col_expr(col, "big_col", 1); + let (col, name) = &swapping_projection.expr()[1]; + assert_eq!(name, "small_col"); + assert_col_expr(col, "small_col", 0); + + let swapped_join = swapping_projection + .input() + .as_any() + .downcast_ref::() + .expect("The type of the plan should not be changed"); + + // Assert join side of big_col swapped in filter expression + let swapped_filter = swapped_join.filter().unwrap(); + let swapped_big_col_idx = swapped_filter.schema().index_of("big_col").unwrap(); + let swapped_big_col_side = swapped_filter + .column_indices() + .get(swapped_big_col_idx) + .unwrap() + .side; + assert_eq!( + swapped_big_col_side, + JoinSide::Right, + "Filter column side should be swapped" + ); + + assert_eq!( + swapped_join.left().statistics().unwrap().total_byte_size, + Precision::Inexact(8192) + ); + assert_eq!( + swapped_join.right().statistics().unwrap().total_byte_size, + Precision::Inexact(2097152) + ); + crosscheck_plans(join.clone()).unwrap(); + } + + #[rstest( + join_type, + case::left_semi(JoinType::LeftSemi), + case::left_anti(JoinType::LeftAnti), + case::right_semi(JoinType::RightSemi), + case::right_anti(JoinType::RightAnti) + )] + #[tokio::test] + async fn test_nl_join_with_swap_no_proj(join_type: JoinType) { + let (big, small) = create_big_and_small(); + + let join = Arc::new( + NestedLoopJoinExec::try_new( + Arc::clone(&big), + Arc::clone(&small), + nl_join_filter(), + &join_type, + ) + .unwrap(), + ); + + let optimized_join = JoinSelection::new() + .optimize(join.clone(), &ConfigOptions::new()) + .unwrap(); + + let swapped_join = optimized_join + .as_any() + .downcast_ref::() + .expect("The type of the plan should not be changed"); + + // Assert before/after schemas are equal + assert_eq!( + join.schema(), + swapped_join.schema(), + "Join schema should not be modified while optimization" + ); + + // Assert join side of big_col swapped in filter expression + let swapped_filter = swapped_join.filter().unwrap(); + let swapped_big_col_idx = swapped_filter.schema().index_of("big_col").unwrap(); + let swapped_big_col_side = swapped_filter + .column_indices() + .get(swapped_big_col_idx) + .unwrap() + .side; + assert_eq!( + swapped_big_col_side, + JoinSide::Right, + "Filter column side should be swapped" + ); + + assert_eq!( + swapped_join.left().statistics().unwrap().total_byte_size, + Precision::Inexact(8192) + ); + assert_eq!( + swapped_join.right().statistics().unwrap().total_byte_size, + Precision::Inexact(2097152) + ); + crosscheck_plans(join.clone()).unwrap(); + } + #[tokio::test] async fn test_swap_reverting_projection() { let left_schema = Schema::new(vec![ diff --git a/datafusion/core/src/physical_optimizer/projection_pushdown.rs b/datafusion/core/src/physical_optimizer/projection_pushdown.rs index ed445e6d48b8..a5f5a28fb2dc 100644 --- a/datafusion/core/src/physical_optimizer/projection_pushdown.rs +++ b/datafusion/core/src/physical_optimizer/projection_pushdown.rs @@ -287,7 +287,7 @@ fn try_unifying_projections( // Merging these projections is not beneficial, e.g // If an expression is not trivial and it is referred more than 1, unifies projections will be // beneficial as caching mechanism for non-trivial computations. - // See discussion in: https://github.com/apache/arrow-datafusion/issues/8296 + // See discussion in: https://github.com/apache/datafusion/issues/8296 if column_ref_map.iter().any(|(column, count)| { *count > 1 && !is_expr_trivial(&child.expr()[column.index()].0.clone()) }) { diff --git a/datafusion/core/src/physical_optimizer/pruning.rs b/datafusion/core/src/physical_optimizer/pruning.rs index d8a3814d77e1..74bbe1f95bb8 100644 --- a/datafusion/core/src/physical_optimizer/pruning.rs +++ b/datafusion/core/src/physical_optimizer/pruning.rs @@ -185,7 +185,7 @@ pub trait PruningStatistics { /// example of how to use `PruningPredicate` to prune files based on min/max /// values. /// -/// [`pruning.rs` example in the `datafusion-examples`]: https://github.com/apache/arrow-datafusion/blob/main/datafusion-examples/examples/pruning.rs +/// [`pruning.rs` example in the `datafusion-examples`]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/pruning.rs /// /// Given an expression like `x = 5` and statistics for 3 containers (Row /// Groups, files, etc) `A`, `B`, and `C`: diff --git a/datafusion/core/tests/dataframe/mod.rs b/datafusion/core/tests/dataframe/mod.rs index 6576655a8f15..f565fba1db5b 100644 --- a/datafusion/core/tests/dataframe/mod.rs +++ b/datafusion/core/tests/dataframe/mod.rs @@ -95,7 +95,7 @@ async fn test_count_wildcard_on_where_in() -> Result<()> { .await?; // In the same SessionContext, AliasGenerator will increase subquery_alias id by 1 - // https://github.com/apache/arrow-datafusion/blame/cf45eb9020092943b96653d70fafb143cc362e19/datafusion/optimizer/src/alias.rs#L40-L43 + // https://github.com/apache/datafusion/blame/cf45eb9020092943b96653d70fafb143cc362e19/datafusion/optimizer/src/alias.rs#L40-L43 // for compare difference betwwen sql and df logical plan, we need to create a new SessionContext here let ctx = create_join_context()?; let df_results = ctx @@ -110,7 +110,7 @@ async fn test_count_wildcard_on_where_in() -> Result<()> { .select(vec![count(wildcard())])? .into_unoptimized_plan(), // Usually, into_optimized_plan() should be used here, but due to - // https://github.com/apache/arrow-datafusion/issues/5771, + // https://github.com/apache/datafusion/issues/5771, // subqueries in SQL cannot be optimized, resulting in differences in logical_plan. Therefore, into_unoptimized_plan() is temporarily used here. ), ))? @@ -147,7 +147,7 @@ async fn test_count_wildcard_on_where_exist() -> Result<()> { .select(vec![count(wildcard())])? .into_unoptimized_plan(), // Usually, into_optimized_plan() should be used here, but due to - // https://github.com/apache/arrow-datafusion/issues/5771, + // https://github.com/apache/datafusion/issues/5771, // subqueries in SQL cannot be optimized, resulting in differences in logical_plan. Therefore, into_unoptimized_plan() is temporarily used here. )))? .select(vec![col("a"), col("b")])? @@ -245,7 +245,7 @@ async fn test_count_wildcard_on_where_scalar_subquery() -> Result<()> { .await?; // In the same SessionContext, AliasGenerator will increase subquery_alias id by 1 - // https://github.com/apache/arrow-datafusion/blame/cf45eb9020092943b96653d70fafb143cc362e19/datafusion/optimizer/src/alias.rs#L40-L43 + // https://github.com/apache/datafusion/blame/cf45eb9020092943b96653d70fafb143cc362e19/datafusion/optimizer/src/alias.rs#L40-L43 // for compare difference between sql and df logical plan, we need to create a new SessionContext here let ctx = create_join_context()?; let df_results = ctx diff --git a/datafusion/core/tests/fuzz_cases/join_fuzz.rs b/datafusion/core/tests/fuzz_cases/join_fuzz.rs index e25f04dc4beb..fbfa0ffc19b4 100644 --- a/datafusion/core/tests/fuzz_cases/join_fuzz.rs +++ b/datafusion/core/tests/fuzz_cases/join_fuzz.rs @@ -21,13 +21,19 @@ use arrow::array::{ArrayRef, Int32Array}; use arrow::compute::SortOptions; use arrow::record_batch::RecordBatch; use arrow::util::pretty::pretty_format_batches; +use arrow_schema::Schema; use rand::Rng; +use datafusion::common::JoinSide; +use datafusion::logical_expr::{JoinType, Operator}; +use datafusion::physical_expr::expressions::BinaryExpr; use datafusion::physical_plan::collect; use datafusion::physical_plan::expressions::Column; -use datafusion::physical_plan::joins::{HashJoinExec, PartitionMode, SortMergeJoinExec}; +use datafusion::physical_plan::joins::utils::{ColumnIndex, JoinFilter}; +use datafusion::physical_plan::joins::{ + HashJoinExec, NestedLoopJoinExec, PartitionMode, SortMergeJoinExec, +}; use datafusion::physical_plan::memory::MemoryExec; -use datafusion_expr::JoinType; use datafusion::prelude::{SessionConfig, SessionContext}; use test_utils::stagger_batch_with_seed; @@ -73,7 +79,7 @@ async fn test_full_join_1k() { } #[tokio::test] -async fn test_semi_join_1k() { +async fn test_semi_join_10k() { run_join_test( make_staggered_batches(10000), make_staggered_batches(10000), @@ -83,7 +89,7 @@ async fn test_semi_join_1k() { } #[tokio::test] -async fn test_anti_join_1k() { +async fn test_anti_join_10k() { run_join_test( make_staggered_batches(10000), make_staggered_batches(10000), @@ -118,6 +124,46 @@ async fn run_join_test( ), ]; + // Nested loop join uses filter for joining records + let column_indices = vec![ + ColumnIndex { + index: 0, + side: JoinSide::Left, + }, + ColumnIndex { + index: 1, + side: JoinSide::Left, + }, + ColumnIndex { + index: 0, + side: JoinSide::Right, + }, + ColumnIndex { + index: 1, + side: JoinSide::Right, + }, + ]; + let intermediate_schema = Schema::new(vec![ + schema1.field_with_name("a").unwrap().to_owned(), + schema1.field_with_name("b").unwrap().to_owned(), + schema2.field_with_name("a").unwrap().to_owned(), + schema2.field_with_name("b").unwrap().to_owned(), + ]); + + let equal_a = Arc::new(BinaryExpr::new( + Arc::new(Column::new("a", 0)), + Operator::Eq, + Arc::new(Column::new("a", 2)), + )) as _; + let equal_b = Arc::new(BinaryExpr::new( + Arc::new(Column::new("b", 1)), + Operator::Eq, + Arc::new(Column::new("b", 3)), + )) as _; + let expression = Arc::new(BinaryExpr::new(equal_a, Operator::And, equal_b)) as _; + + let on_filter = JoinFilter::new(expression, column_indices, intermediate_schema); + // sort-merge join let left = Arc::new( MemoryExec::try_new(&[input1.clone()], schema1.clone(), None).unwrap(), @@ -161,9 +207,23 @@ async fn run_join_test( ); let hj_collected = collect(hj, task_ctx.clone()).await.unwrap(); + // nested loop join + let left = Arc::new( + MemoryExec::try_new(&[input1.clone()], schema1.clone(), None).unwrap(), + ); + let right = Arc::new( + MemoryExec::try_new(&[input2.clone()], schema2.clone(), None).unwrap(), + ); + let nlj = Arc::new( + NestedLoopJoinExec::try_new(left, right, Some(on_filter), &join_type) + .unwrap(), + ); + let nlj_collected = collect(nlj, task_ctx.clone()).await.unwrap(); + // compare let smj_formatted = pretty_format_batches(&smj_collected).unwrap().to_string(); let hj_formatted = pretty_format_batches(&hj_collected).unwrap().to_string(); + let nlj_formatted = pretty_format_batches(&nlj_collected).unwrap().to_string(); let mut smj_formatted_sorted: Vec<&str> = smj_formatted.trim().lines().collect(); smj_formatted_sorted.sort_unstable(); @@ -171,12 +231,31 @@ async fn run_join_test( let mut hj_formatted_sorted: Vec<&str> = hj_formatted.trim().lines().collect(); hj_formatted_sorted.sort_unstable(); + let mut nlj_formatted_sorted: Vec<&str> = nlj_formatted.trim().lines().collect(); + nlj_formatted_sorted.sort_unstable(); + for (i, (smj_line, hj_line)) in smj_formatted_sorted .iter() .zip(&hj_formatted_sorted) .enumerate() { - assert_eq!((i, smj_line), (i, hj_line)); + assert_eq!( + (i, smj_line), + (i, hj_line), + "SortMergeJoinExec and HashJoinExec produced different results" + ); + } + + for (i, (nlj_line, hj_line)) in nlj_formatted_sorted + .iter() + .zip(&hj_formatted_sorted) + .enumerate() + { + assert_eq!( + (i, nlj_line), + (i, hj_line), + "NestedLoopJoinExec and HashJoinExec produced different results" + ); } } } diff --git a/datafusion/core/tests/parquet/row_group_pruning.rs b/datafusion/core/tests/parquet/row_group_pruning.rs index d6de2b6f8ef0..1a174a325bd5 100644 --- a/datafusion/core/tests/parquet/row_group_pruning.rs +++ b/datafusion/core/tests/parquet/row_group_pruning.rs @@ -288,7 +288,7 @@ async fn prune_disabled() { // $bits: number of bits of the integer to test (8, 16, 32, 64) // $correct_bloom_filters: if false, replicates the -// https://github.com/apache/arrow-datafusion/issues/9779 bug so that tests pass +// https://github.com/apache/datafusion/issues/9779 bug so that tests pass // if and only if Bloom filters on Int8 and Int16 columns are still buggy. macro_rules! int_tests { ($bits:expr) => { @@ -448,13 +448,13 @@ macro_rules! int_tests { }; } -// int8/int16 are incorrect: https://github.com/apache/arrow-datafusion/issues/9779 +// int8/int16 are incorrect: https://github.com/apache/datafusion/issues/9779 int_tests!(32); int_tests!(64); // $bits: number of bits of the integer to test (8, 16, 32, 64) // $correct_bloom_filters: if false, replicates the -// https://github.com/apache/arrow-datafusion/issues/9779 bug so that tests pass +// https://github.com/apache/datafusion/issues/9779 bug so that tests pass // if and only if Bloom filters on UInt8 and UInt16 columns are still buggy. macro_rules! uint_tests { ($bits:expr) => { @@ -585,7 +585,7 @@ macro_rules! uint_tests { }; } -// uint8/uint16 are incorrect: https://github.com/apache/arrow-datafusion/issues/9779 +// uint8/uint16 are incorrect: https://github.com/apache/datafusion/issues/9779 uint_tests!(32); uint_tests!(64); diff --git a/datafusion/core/tests/simplification.rs b/datafusion/core/tests/simplification.rs index c5ce5d2652e0..46923ee9074f 100644 --- a/datafusion/core/tests/simplification.rs +++ b/datafusion/core/tests/simplification.rs @@ -299,7 +299,7 @@ fn select_date_plus_interval() -> Result<()> { #[test] fn simplify_project_scalar_fn() -> Result<()> { - // Issue https://github.com/apache/arrow-datafusion/issues/5996 + // Issue https://github.com/apache/datafusion/issues/5996 let schema = Schema::new(vec![Field::new("f", DataType::Float64, false)]); let plan = table_scan(Some("test"), &schema, None)? .project(vec![power(col("f"), lit(1.0))])? diff --git a/datafusion/core/tests/sql/explain_analyze.rs b/datafusion/core/tests/sql/explain_analyze.rs index d80d728d65e3..b5819dc18832 100644 --- a/datafusion/core/tests/sql/explain_analyze.rs +++ b/datafusion/core/tests/sql/explain_analyze.rs @@ -570,7 +570,7 @@ async fn csv_explain_verbose_plans() { #[rstest] #[tokio::test] async fn explain_analyze_runs_optimizers(#[values("*", "1")] count_expr: &str) { - // repro for https://github.com/apache/arrow-datafusion/issues/917 + // repro for https://github.com/apache/datafusion/issues/917 // where EXPLAIN ANALYZE was not correctly running optiimizer let ctx = SessionContext::new(); register_alltypes_parquet(&ctx).await; @@ -719,7 +719,7 @@ async fn csv_explain_analyze_order_by() { .to_string(); // Ensure that the ordering is not optimized away from the plan - // https://github.com/apache/arrow-datafusion/issues/6379 + // https://github.com/apache/datafusion/issues/6379 let needle = "SortExec: expr=[c1@0 ASC NULLS LAST], metrics=[output_rows=100, elapsed_compute"; assert_contains!(&formatted, needle); diff --git a/datafusion/core/tests/tpcds_planning.rs b/datafusion/core/tests/tpcds_planning.rs index 237771248f53..44fb0afff319 100644 --- a/datafusion/core/tests/tpcds_planning.rs +++ b/datafusion/core/tests/tpcds_planning.rs @@ -231,7 +231,7 @@ async fn tpcds_logical_q40() -> Result<()> { #[tokio::test] #[ignore] // Optimizer rule 'scalar_subquery_to_join' failed: Optimizing disjunctions not supported! -// issue: https://github.com/apache/arrow-datafusion/issues/5368 +// issue: https://github.com/apache/datafusion/issues/5368 async fn tpcds_logical_q41() -> Result<()> { create_logical_plan(41).await } diff --git a/datafusion/expr/src/aggregate_function.rs b/datafusion/expr/src/aggregate_function.rs index 85f8c74f3737..890a2ed04965 100644 --- a/datafusion/expr/src/aggregate_function.rs +++ b/datafusion/expr/src/aggregate_function.rs @@ -422,7 +422,7 @@ mod tests { // For each variant in AggregateFuncion, it converts the variant to a string // and then back to a variant. The test asserts that the original variant and // the reconstructed variant are the same. This assertion is also necessary for - // function suggestion. See https://github.com/apache/arrow-datafusion/issues/8082 + // function suggestion. See https://github.com/apache/datafusion/issues/8082 fn test_display_and_from_str() { for func_original in AggregateFunction::iter() { let func_name = func_original.to_string(); diff --git a/datafusion/expr/src/built_in_function.rs b/datafusion/expr/src/built_in_function.rs deleted file mode 100644 index 83eb2f722b08..000000000000 --- a/datafusion/expr/src/built_in_function.rs +++ /dev/null @@ -1,207 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Built-in functions module contains all the built-in functions definitions. - -use std::collections::HashMap; -use std::fmt; -use std::str::FromStr; -use std::sync::OnceLock; - -use crate::type_coercion::functions::data_types; -use crate::{FuncMonotonicity, Signature, Volatility}; - -use arrow::datatypes::DataType; -use datafusion_common::{plan_err, DataFusionError, Result}; - -use strum::IntoEnumIterator; -use strum_macros::EnumIter; - -/// Enum of all built-in scalar functions -// Contributor's guide for adding new scalar functions -// https://arrow.apache.org/datafusion/contributor-guide/index.html#how-to-add-a-new-scalar-function -#[derive(Debug, Clone, PartialEq, Eq, Hash, EnumIter, Copy)] -pub enum BuiltinScalarFunction { - // math functions - /// coalesce - Coalesce, -} - -/// Maps the sql function name to `BuiltinScalarFunction` -fn name_to_function() -> &'static HashMap<&'static str, BuiltinScalarFunction> { - static NAME_TO_FUNCTION_LOCK: OnceLock> = - OnceLock::new(); - NAME_TO_FUNCTION_LOCK.get_or_init(|| { - let mut map = HashMap::new(); - BuiltinScalarFunction::iter().for_each(|func| { - func.aliases().iter().for_each(|&a| { - map.insert(a, func); - }); - }); - map - }) -} - -/// Maps `BuiltinScalarFunction` --> canonical sql function -/// First alias in the array is used to display function names -fn function_to_name() -> &'static HashMap { - static FUNCTION_TO_NAME_LOCK: OnceLock> = - OnceLock::new(); - FUNCTION_TO_NAME_LOCK.get_or_init(|| { - let mut map = HashMap::new(); - BuiltinScalarFunction::iter().for_each(|func| { - map.insert(func, *func.aliases().first().unwrap_or(&"NO_ALIAS")); - }); - map - }) -} - -impl BuiltinScalarFunction { - /// an allowlist of functions to take zero arguments, so that they will get special treatment - /// while executing. - #[deprecated( - since = "32.0.0", - note = "please use TypeSignature::supports_zero_argument instead" - )] - pub fn supports_zero_argument(&self) -> bool { - self.signature().type_signature.supports_zero_argument() - } - - /// Returns the name of this function - pub fn name(&self) -> &str { - // .unwrap is safe here because compiler makes sure the map will have matches for each BuiltinScalarFunction - function_to_name().get(self).unwrap() - } - - /// Returns the [Volatility] of the builtin function. - pub fn volatility(&self) -> Volatility { - match self { - // Immutable scalar builtins - BuiltinScalarFunction::Coalesce => Volatility::Immutable, - } - } - - /// Returns the output [`DataType`] of this function - /// - /// This method should be invoked only after `input_expr_types` have been validated - /// against the function's `TypeSignature` using `type_coercion::functions::data_types()`. - /// - /// This method will: - /// 1. Perform additional checks on `input_expr_types` that are beyond the scope of `TypeSignature` validation. - /// 2. Deduce the output `DataType` based on the provided `input_expr_types`. - pub fn return_type(self, input_expr_types: &[DataType]) -> Result { - // Note that this function *must* return the same type that the respective physical expression returns - // or the execution panics. - - // the return type of the built in function. - // Some built-in functions' return type depends on the incoming type. - match self { - BuiltinScalarFunction::Coalesce => { - // COALESCE has multiple args and they might get coerced, get a preview of this - let coerced_types = data_types(input_expr_types, &self.signature()); - coerced_types.map(|types| types[0].clone()) - } - } - } - - /// Return the argument [`Signature`] supported by this function - pub fn signature(&self) -> Signature { - // note: the physical expression must accept the type returned by this function or the execution panics. - - // for now, the list is small, as we do not have many built-in functions. - match self { - BuiltinScalarFunction::Coalesce => { - Signature::variadic_equal(self.volatility()) - } - } - } - - /// This function specifies monotonicity behaviors for built-in scalar functions. - /// The list can be extended, only mathematical and datetime functions are - /// considered for the initial implementation of this feature. - pub fn monotonicity(&self) -> Option { - None - } - - /// Returns all names that can be used to call this function - pub fn aliases(&self) -> &'static [&'static str] { - match self { - // conditional functions - BuiltinScalarFunction::Coalesce => &["coalesce"], - } - } -} - -impl fmt::Display for BuiltinScalarFunction { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{}", self.name()) - } -} - -impl FromStr for BuiltinScalarFunction { - type Err = DataFusionError; - fn from_str(name: &str) -> Result { - if let Some(func) = name_to_function().get(name) { - Ok(*func) - } else { - plan_err!("There is no built-in function named {name}") - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - // Test for BuiltinScalarFunction's Display and from_str() implementations. - // For each variant in BuiltinScalarFunction, it converts the variant to a string - // and then back to a variant. The test asserts that the original variant and - // the reconstructed variant are the same. This assertion is also necessary for - // function suggestion. See https://github.com/apache/arrow-datafusion/issues/8082 - fn test_display_and_from_str() { - for (_, func_original) in name_to_function().iter() { - let func_name = func_original.to_string(); - let func_from_str = BuiltinScalarFunction::from_str(&func_name).unwrap(); - assert_eq!(func_from_str, *func_original); - } - } - - #[test] - fn test_coalesce_return_types() { - let coalesce = BuiltinScalarFunction::Coalesce; - let return_type = coalesce - .return_type(&[DataType::Date32, DataType::Date32]) - .unwrap(); - assert_eq!(return_type, DataType::Date32); - } - - #[test] - fn test_coalesce_return_types_dictionary() { - let coalesce = BuiltinScalarFunction::Coalesce; - let return_type = coalesce - .return_type(&[ - DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)), - DataType::Utf8, - ]) - .unwrap(); - assert_eq!( - return_type, - DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)) - ); - } -} diff --git a/datafusion/expr/src/built_in_window_function.rs b/datafusion/expr/src/built_in_window_function.rs index f4b1cd03db1f..1001bbb015ed 100644 --- a/datafusion/expr/src/built_in_window_function.rs +++ b/datafusion/expr/src/built_in_window_function.rs @@ -196,7 +196,7 @@ mod tests { // For each variant in BuiltInWindowFunction, it converts the variant to a string // and then back to a variant. The test asserts that the original variant and // the reconstructed variant are the same. This assertion is also necessary for - // function suggestion. See https://github.com/apache/arrow-datafusion/issues/8082 + // function suggestion. See https://github.com/apache/datafusion/issues/8082 fn test_display_and_from_str() { for func_original in BuiltInWindowFunction::iter() { let func_name = func_original.to_string(); diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index 2a8fe4ca90a5..08d495c3be35 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -28,8 +28,8 @@ use crate::logical_plan::Subquery; use crate::utils::expr_to_columns; use crate::window_frame; use crate::{ - aggregate_function, built_in_function, built_in_window_function, udaf, - BuiltinScalarFunction, ExprSchemable, Operator, Signature, + aggregate_function, built_in_window_function, udaf, ExprSchemable, Operator, + Signature, }; use arrow::datatypes::DataType; @@ -362,10 +362,6 @@ impl Between { #[derive(Debug, Clone, PartialEq, Eq, Hash)] /// Defines which implementation of a function for DataFusion to call. pub enum ScalarFunctionDefinition { - /// Resolved to a `BuiltinScalarFunction` - /// There is plan to migrate `BuiltinScalarFunction` to UDF-based implementation (issue#8045) - /// This variant is planned to be removed in long term - BuiltIn(BuiltinScalarFunction), /// Resolved to a user defined function UDF(Arc), /// A scalar function constructed with name. This variant can not be executed directly @@ -393,7 +389,6 @@ impl ScalarFunctionDefinition { /// Function's name for display pub fn name(&self) -> &str { match self { - ScalarFunctionDefinition::BuiltIn(fun) => fun.name(), ScalarFunctionDefinition::UDF(udf) => udf.name(), ScalarFunctionDefinition::Name(func_name) => func_name.as_ref(), } @@ -403,9 +398,6 @@ impl ScalarFunctionDefinition { /// when evaluated multiple times with the same input. pub fn is_volatile(&self) -> Result { match self { - ScalarFunctionDefinition::BuiltIn(fun) => { - Ok(fun.volatility() == crate::Volatility::Volatile) - } ScalarFunctionDefinition::UDF(udf) => { Ok(udf.signature().volatility == crate::Volatility::Volatile) } @@ -419,14 +411,6 @@ impl ScalarFunctionDefinition { } impl ScalarFunction { - /// Create a new ScalarFunction expression - pub fn new(fun: built_in_function::BuiltinScalarFunction, args: Vec) -> Self { - Self { - func_def: ScalarFunctionDefinition::BuiltIn(fun), - args, - } - } - /// Create a new ScalarFunction expression with a user-defined function (UDF) pub fn new_udf(udf: Arc, args: Vec) -> Self { Self { @@ -1282,7 +1266,7 @@ impl Expr { pub fn short_circuits(&self) -> bool { match self { Expr::ScalarFunction(ScalarFunction { func_def, .. }) => { - matches!(func_def, ScalarFunctionDefinition::BuiltIn(fun) if *fun == BuiltinScalarFunction::Coalesce) + matches!(func_def, ScalarFunctionDefinition::UDF(fun) if fun.name().eq("coalesce")) } Expr::BinaryExpr(BinaryExpr { op, .. }) => { matches!(op, Operator::And | Operator::Or) diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs index 567f260daaf9..1d976a12cc4f 100644 --- a/datafusion/expr/src/expr_fn.rs +++ b/datafusion/expr/src/expr_fn.rs @@ -19,15 +19,15 @@ use crate::expr::{ AggregateFunction, BinaryExpr, Cast, Exists, GroupingSet, InList, InSubquery, - Placeholder, ScalarFunction, TryCast, + Placeholder, TryCast, }; use crate::function::{ AccumulatorArgs, AccumulatorFactoryFunction, PartitionEvaluatorFactory, }; use crate::{ - aggregate_function, built_in_function, conditional_expressions::CaseBuilder, - logical_plan::Subquery, AggregateUDF, Expr, LogicalPlan, Operator, - ScalarFunctionImplementation, ScalarUDF, Signature, Volatility, + aggregate_function, conditional_expressions::CaseBuilder, logical_plan::Subquery, + AggregateUDF, Expr, LogicalPlan, Operator, ScalarFunctionImplementation, ScalarUDF, + Signature, Volatility, }; use crate::{AggregateUDFImpl, ColumnarValue, ScalarUDFImpl, WindowUDF, WindowUDFImpl}; use arrow::datatypes::{DataType, Field}; @@ -478,23 +478,6 @@ pub fn is_not_unknown(expr: Expr) -> Expr { Expr::IsNotUnknown(Box::new(expr)) } -macro_rules! nary_scalar_expr { - ($ENUM:ident, $FUNC:ident, $DOC:expr) => { - #[doc = $DOC ] - pub fn $FUNC(args: Vec) -> Expr { - Expr::ScalarFunction(ScalarFunction::new( - built_in_function::BuiltinScalarFunction::$ENUM, - args, - )) - } - }; -} - -// generate methods for creating the supported unary/binary expressions - -// math functions -nary_scalar_expr!(Coalesce, coalesce, "returns `coalesce(args...)`, which evaluates to the value of the first [Expr] which is not NULL"); - /// Create a CASE WHEN statement with literal WHEN expressions for comparison to the base expression. pub fn case(expr: Expr) -> CaseBuilder { CaseBuilder::new(Some(Box::new(expr)), vec![], vec![], None) diff --git a/datafusion/expr/src/expr_schema.rs b/datafusion/expr/src/expr_schema.rs index 466fd13ce207..e01ec2296a32 100644 --- a/datafusion/expr/src/expr_schema.rs +++ b/datafusion/expr/src/expr_schema.rs @@ -139,23 +139,6 @@ impl ExprSchemable for Expr { .map(|e| e.get_type(schema)) .collect::>>()?; match func_def { - ScalarFunctionDefinition::BuiltIn(fun) => { - // verify that function is invoked with correct number and type of arguments as defined in `TypeSignature` - data_types(&arg_data_types, &fun.signature()).map_err(|_| { - plan_datafusion_err!( - "{}", - utils::generate_signature_error_msg( - &format!("{fun}"), - fun.signature(), - &arg_data_types, - ) - ) - })?; - - // perform additional function arguments validation (due to limited - // expressiveness of `TypeSignature`), then infer return type - fun.return_type(&arg_data_types) - } ScalarFunctionDefinition::UDF(fun) => { // verify that function is invoked with correct number and type of arguments as defined in `TypeSignature` data_types(&arg_data_types, fun.signature()).map_err(|_| { diff --git a/datafusion/expr/src/lib.rs b/datafusion/expr/src/lib.rs index 36732324eff6..de4f31029293 100644 --- a/datafusion/expr/src/lib.rs +++ b/datafusion/expr/src/lib.rs @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -//! [DataFusion](https://github.com/apache/arrow-datafusion) +//! [DataFusion](https://github.com/apache/datafusion) //! is an extensible query execution framework that uses //! [Apache Arrow](https://arrow.apache.org) as its in-memory format. //! @@ -26,7 +26,6 @@ //! The [expr_fn] module contains functions for creating expressions. mod accumulator; -mod built_in_function; mod built_in_window_function; mod columnar_value; mod literal; @@ -60,7 +59,6 @@ pub mod window_state; pub use accumulator::Accumulator; pub use aggregate_function::AggregateFunction; -pub use built_in_function::BuiltinScalarFunction; pub use built_in_window_function::BuiltInWindowFunction; pub use columnar_value::ColumnarValue; pub use expr::{ diff --git a/datafusion/expr/src/logical_plan/builder.rs b/datafusion/expr/src/logical_plan/builder.rs index f7c0fbac537b..2810425ae1d8 100644 --- a/datafusion/expr/src/logical_plan/builder.rs +++ b/datafusion/expr/src/logical_plan/builder.rs @@ -434,7 +434,7 @@ impl LogicalPlanBuilder { /// But Distinct (A, B, C) --> (1, 2, 3), (1, 2, 4) /// (which will appear as a (1, 2), (1, 2) if a and b are projected /// - /// See for more details + /// See for more details fn add_missing_columns( curr_plan: LogicalPlan, missing_cols: &[Column], @@ -495,7 +495,7 @@ impl LogicalPlanBuilder { // This handles the special case for // SELECT col as ORDER BY // - // As described in https://github.com/apache/arrow-datafusion/issues/5293 + // As described in https://github.com/apache/datafusion/issues/5293 let all_aliases = missing_exprs.iter().all(|e| { projection_exprs.iter().any(|proj_expr| { if let Expr::Alias(Alias { expr, .. }) = proj_expr { @@ -1475,7 +1475,7 @@ pub fn wrap_projection_for_join_if_necessary( // join keys: [cast(a as int)] // // then a and cast(a as int) will use the same field name - `a` in projection schema. - // https://github.com/apache/arrow-datafusion/issues/4478 + // https://github.com/apache/datafusion/issues/4478 if matches!(key, Expr::Cast(_)) || matches!(key, Expr::TryCast(_)) { let alias = format!("{key}"); key.clone().alias(alias) diff --git a/datafusion/expr/src/simplify.rs b/datafusion/expr/src/simplify.rs index 536a01fa8571..6fae31b4a698 100644 --- a/datafusion/expr/src/simplify.rs +++ b/datafusion/expr/src/simplify.rs @@ -48,7 +48,7 @@ pub trait SimplifyInfo { /// # Example /// See the `simplify_demo` in the [`expr_api` example] /// -/// [`expr_api` example]: https://github.com/apache/arrow-datafusion/blob/main/datafusion-examples/examples/expr_api.rs +/// [`expr_api` example]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/expr_api.rs #[derive(Debug, Clone)] pub struct SimplifyContext<'a> { schema: Option, diff --git a/datafusion/expr/src/tree_node.rs b/datafusion/expr/src/tree_node.rs index 35fec509c95a..471ed0b975b0 100644 --- a/datafusion/expr/src/tree_node.rs +++ b/datafusion/expr/src/tree_node.rs @@ -283,9 +283,6 @@ impl TreeNode for Expr { .update_data(|be| Expr::Sort(Sort::new(be, asc, nulls_first))), Expr::ScalarFunction(ScalarFunction { func_def, args }) => { transform_vec(args, &mut f)?.map_data(|new_args| match func_def { - ScalarFunctionDefinition::BuiltIn(fun) => { - Ok(Expr::ScalarFunction(ScalarFunction::new(fun, new_args))) - } ScalarFunctionDefinition::UDF(fun) => { Ok(Expr::ScalarFunction(ScalarFunction::new_udf(fun, new_args))) } diff --git a/datafusion/expr/src/udaf.rs b/datafusion/expr/src/udaf.rs index 856f0dc44246..67c3b51ca373 100644 --- a/datafusion/expr/src/udaf.rs +++ b/datafusion/expr/src/udaf.rs @@ -54,12 +54,12 @@ use std::vec; /// This is a separate struct from `AggregateUDFImpl` to maintain backwards /// compatibility with the older API. /// -/// [the examples]: https://github.com/apache/arrow-datafusion/tree/main/datafusion-examples#single-process +/// [the examples]: https://github.com/apache/datafusion/tree/main/datafusion-examples#single-process /// [aggregate function]: https://en.wikipedia.org/wiki/Aggregate_function /// [`Accumulator`]: crate::Accumulator /// [`create_udaf`]: crate::expr_fn::create_udaf -/// [`simple_udaf.rs`]: https://github.com/apache/arrow-datafusion/blob/main/datafusion-examples/examples/simple_udaf.rs -/// [`advanced_udaf.rs`]: https://github.com/apache/arrow-datafusion/blob/main/datafusion-examples/examples/advanced_udaf.rs +/// [`simple_udaf.rs`]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/simple_udaf.rs +/// [`advanced_udaf.rs`]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/advanced_udaf.rs #[derive(Debug, Clone)] pub struct AggregateUDF { inner: Arc, @@ -214,7 +214,7 @@ where /// See [`advanced_udaf.rs`] for a full example with complete implementation and /// [`AggregateUDF`] for other available options. /// -/// [`advanced_udaf.rs`]: https://github.com/apache/arrow-datafusion/blob/main/datafusion-examples/examples/advanced_udaf.rs +/// [`advanced_udaf.rs`]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/advanced_udaf.rs /// /// # Basic Example /// ``` diff --git a/datafusion/expr/src/udf.rs b/datafusion/expr/src/udf.rs index 56266a05170b..069ac078a171 100644 --- a/datafusion/expr/src/udf.rs +++ b/datafusion/expr/src/udf.rs @@ -48,8 +48,8 @@ use std::sync::Arc; /// compatibility with the older API. /// /// [`create_udf`]: crate::expr_fn::create_udf -/// [`simple_udf.rs`]: https://github.com/apache/arrow-datafusion/blob/main/datafusion-examples/examples/simple_udf.rs -/// [`advanced_udf.rs`]: https://github.com/apache/arrow-datafusion/blob/main/datafusion-examples/examples/advanced_udf.rs +/// [`simple_udf.rs`]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/simple_udf.rs +/// [`advanced_udf.rs`]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/advanced_udf.rs #[derive(Debug, Clone)] pub struct ScalarUDF { inner: Arc, @@ -213,7 +213,7 @@ where /// [`ScalarUDF`] for other available options. /// /// -/// [`advanced_udf.rs`]: https://github.com/apache/arrow-datafusion/blob/main/datafusion-examples/examples/advanced_udf.rs +/// [`advanced_udf.rs`]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/advanced_udf.rs /// # Basic Example /// ``` /// # use std::any::Any; @@ -424,7 +424,7 @@ impl ScalarUDFImpl for AliasedScalarUDFImpl { } /// Implementation of [`ScalarUDFImpl`] that wraps the function style pointers -/// of the older API (see +/// of the older API (see /// for more details) struct ScalarUdfLegacyWrapper { /// The name of the function diff --git a/datafusion/expr/src/udwf.rs b/datafusion/expr/src/udwf.rs index d3925f2e1925..5a8373509a40 100644 --- a/datafusion/expr/src/udwf.rs +++ b/datafusion/expr/src/udwf.rs @@ -46,8 +46,8 @@ use std::{ /// /// [`PartitionEvaluator`]: crate::PartitionEvaluator /// [`create_udwf`]: crate::expr_fn::create_udwf -/// [`simple_udwf.rs`]: https://github.com/apache/arrow-datafusion/blob/main/datafusion-examples/examples/simple_udwf.rs -/// [`advanced_udwf.rs`]: https://github.com/apache/arrow-datafusion/blob/main/datafusion-examples/examples/advanced_udwf.rs +/// [`simple_udwf.rs`]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/simple_udwf.rs +/// [`advanced_udwf.rs`]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/advanced_udwf.rs #[derive(Debug, Clone)] pub struct WindowUDF { inner: Arc, @@ -194,7 +194,7 @@ where /// [`WindowUDF`] for other available options. /// /// -/// [`advanced_udwf.rs`]: https://github.com/apache/arrow-datafusion/blob/main/datafusion-examples/examples/advanced_udwf.rs +/// [`advanced_udwf.rs`]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/advanced_udwf.rs /// # Basic Example /// ``` /// # use std::any::Any; @@ -315,7 +315,7 @@ impl WindowUDFImpl for AliasedWindowUDFImpl { } /// Implementation of [`WindowUDFImpl`] that wraps the function style pointers -/// of the older API (see +/// of the older API (see /// for more details) pub struct WindowUDFLegacyWrapper { /// name diff --git a/datafusion/functions/src/math/coalesce.rs b/datafusion/functions/src/math/coalesce.rs new file mode 100644 index 000000000000..3e16113bbd05 --- /dev/null +++ b/datafusion/functions/src/math/coalesce.rs @@ -0,0 +1,141 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::any::Any; + +use arrow::array::{new_null_array, BooleanArray}; +use arrow::compute::kernels::zip::zip; +use arrow::compute::{and, is_not_null, is_null}; +use arrow::datatypes::DataType; + +use datafusion_common::{exec_err, Result}; +use datafusion_expr::type_coercion::functions::data_types; +use datafusion_expr::ColumnarValue; +use datafusion_expr::{ScalarUDFImpl, Signature, Volatility}; + +#[derive(Debug)] +pub struct CoalesceFunc { + signature: Signature, +} + +impl Default for CoalesceFunc { + fn default() -> Self { + CoalesceFunc::new() + } +} + +impl CoalesceFunc { + pub fn new() -> Self { + Self { + signature: Signature::variadic_equal(Volatility::Immutable), + } + } +} + +impl ScalarUDFImpl for CoalesceFunc { + fn as_any(&self) -> &dyn Any { + self + } + + fn name(&self) -> &str { + "coalesce" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, arg_types: &[DataType]) -> Result { + // COALESCE has multiple args and they might get coerced, get a preview of this + let coerced_types = data_types(arg_types, self.signature()); + coerced_types.map(|types| types[0].clone()) + } + + /// coalesce evaluates to the first value which is not NULL + fn invoke(&self, args: &[ColumnarValue]) -> Result { + // do not accept 0 arguments. + if args.is_empty() { + return exec_err!( + "coalesce was called with {} arguments. It requires at least 1.", + args.len() + ); + } + + let return_type = args[0].data_type(); + let mut return_array = args.iter().filter_map(|x| match x { + ColumnarValue::Array(array) => Some(array.len()), + _ => None, + }); + + if let Some(size) = return_array.next() { + // start with nulls as default output + let mut current_value = new_null_array(&return_type, size); + let mut remainder = BooleanArray::from(vec![true; size]); + + for arg in args { + match arg { + ColumnarValue::Array(ref array) => { + let to_apply = and(&remainder, &is_not_null(array.as_ref())?)?; + current_value = zip(&to_apply, array, ¤t_value)?; + remainder = and(&remainder, &is_null(array)?)?; + } + ColumnarValue::Scalar(value) => { + if value.is_null() { + continue; + } else { + let last_value = value.to_scalar()?; + current_value = zip(&remainder, &last_value, ¤t_value)?; + break; + } + } + } + if remainder.iter().all(|x| x == Some(false)) { + break; + } + } + Ok(ColumnarValue::Array(current_value)) + } else { + let result = args + .iter() + .filter_map(|x| match x { + ColumnarValue::Scalar(s) if !s.is_null() => Some(x.clone()), + _ => None, + }) + .next() + .unwrap_or_else(|| args[0].clone()); + Ok(result) + } + } +} + +#[cfg(test)] +mod test { + use arrow::datatypes::DataType; + + use datafusion_expr::ScalarUDFImpl; + + use crate::math; + + #[test] + fn test_coalesce_return_types() { + let coalesce = math::coalesce::CoalesceFunc::new(); + let return_type = coalesce + .return_type(&[DataType::Date32, DataType::Date32]) + .unwrap(); + assert_eq!(return_type, DataType::Date32); + } +} diff --git a/datafusion/functions/src/math/mod.rs b/datafusion/functions/src/math/mod.rs index b6e8d26b6460..1d9e5d94a90d 100644 --- a/datafusion/functions/src/math/mod.rs +++ b/datafusion/functions/src/math/mod.rs @@ -21,6 +21,7 @@ use datafusion_expr::ScalarUDF; use std::sync::Arc; pub mod abs; +pub mod coalesce; pub mod cot; pub mod factorial; pub mod gcd; @@ -46,6 +47,7 @@ make_math_unary_udf!(AtanhFunc, ATANH, atanh, atanh, Some(vec![Some(true)])); make_math_binary_udf!(Atan2, ATAN2, atan2, atan2, Some(vec![Some(true)])); make_math_unary_udf!(CbrtFunc, CBRT, cbrt, cbrt, None); make_math_unary_udf!(CeilFunc, CEIL, ceil, ceil, Some(vec![Some(true)])); +make_udf_function!(coalesce::CoalesceFunc, COALESCE, coalesce); make_math_unary_udf!(CosFunc, COS, cos, cos, None); make_math_unary_udf!(CoshFunc, COSH, cosh, cosh, None); make_udf_function!(cot::CotFunc, COT, cot); @@ -128,6 +130,11 @@ pub mod expr_fn { super::ceil().call(vec![num]) } + #[doc = "returns `coalesce(args...)`, which evaluates to the value of the first [Expr] which is not NULL"] + pub fn coalesce(args: Vec) -> Expr { + super::coalesce().call(args) + } + #[doc = "cosine"] pub fn cos(num: Expr) -> Expr { super::cos().call(vec![num]) @@ -282,6 +289,7 @@ pub fn functions() -> Vec> { atanh(), cbrt(), ceil(), + coalesce(), cos(), cosh(), cot(), diff --git a/datafusion/optimizer/README.md b/datafusion/optimizer/README.md index b0f4c5a72014..2f1f85e3a57a 100644 --- a/datafusion/optimizer/README.md +++ b/datafusion/optimizer/README.md @@ -158,8 +158,8 @@ Looking at the `EXPLAIN` output we can see that the optimizer has effectively re +---------------+-------------------------------------------------+ ``` -If the expression name is not preserved, bugs such as [#3704](https://github.com/apache/arrow-datafusion/issues/3704) -and [#3555](https://github.com/apache/arrow-datafusion/issues/3555) occur where the expected columns can not be found. +If the expression name is not preserved, bugs such as [#3704](https://github.com/apache/datafusion/issues/3704) +and [#3555](https://github.com/apache/datafusion/issues/3555) occur where the expected columns can not be found. ### Building Expression Names diff --git a/datafusion/optimizer/src/analyzer/count_wildcard_rule.rs b/datafusion/optimizer/src/analyzer/count_wildcard_rule.rs index 080ec074d3c3..c5e60ee319c9 100644 --- a/datafusion/optimizer/src/analyzer/count_wildcard_rule.rs +++ b/datafusion/optimizer/src/analyzer/count_wildcard_rule.rs @@ -29,7 +29,7 @@ use datafusion_expr::{lit, Expr, LogicalPlan, WindowFunctionDefinition}; /// Rewrite `Count(Expr:Wildcard)` to `Count(Expr:Literal)`. /// -/// Resolves issue: +/// Resolves issue: #[derive(Default)] pub struct CountWildcardRule {} diff --git a/datafusion/optimizer/src/analyzer/type_coercion.rs b/datafusion/optimizer/src/analyzer/type_coercion.rs index ac96decbdd80..7ef468abe989 100644 --- a/datafusion/optimizer/src/analyzer/type_coercion.rs +++ b/datafusion/optimizer/src/analyzer/type_coercion.rs @@ -108,7 +108,7 @@ fn analyze_internal( .into_iter() .map(|expr| { // ensure aggregate names don't change: - // https://github.com/apache/arrow-datafusion/issues/3555 + // https://github.com/apache/datafusion/issues/3555 rewrite_preserving_name(expr, &mut expr_rewrite) }) .collect::>>()?; @@ -306,16 +306,6 @@ impl TreeNodeRewriter for TypeCoercionRewriter { Ok(Transformed::yes(Expr::Case(case))) } Expr::ScalarFunction(ScalarFunction { func_def, args }) => match func_def { - ScalarFunctionDefinition::BuiltIn(fun) => { - let new_args = coerce_arguments_for_signature( - args.as_slice(), - &self.schema, - &fun.signature(), - )?; - Ok(Transformed::yes(Expr::ScalarFunction(ScalarFunction::new( - fun, new_args, - )))) - } ScalarFunctionDefinition::UDF(fun) => { let new_expr = coerce_arguments_for_signature( args.as_slice(), diff --git a/datafusion/optimizer/src/common_subexpr_eliminate.rs b/datafusion/optimizer/src/common_subexpr_eliminate.rs index 2fabd5de9282..690b596ed35f 100644 --- a/datafusion/optimizer/src/common_subexpr_eliminate.rs +++ b/datafusion/optimizer/src/common_subexpr_eliminate.rs @@ -662,7 +662,7 @@ impl TreeNodeVisitor for ExprIdentifierVisitor<'_> { type Node = Expr; fn f_down(&mut self, expr: &Expr) -> Result { - // related to https://github.com/apache/arrow-datafusion/issues/8814 + // related to https://github.com/apache/datafusion/issues/8814 // If the expr contain volatile expression or is a short-circuit expression, skip it. if expr.short_circuits() || is_volatile_expression(expr)? { self.visit_stack diff --git a/datafusion/optimizer/src/eliminate_cross_join.rs b/datafusion/optimizer/src/eliminate_cross_join.rs index 18a9c05b9dc6..ae6c1b339d5f 100644 --- a/datafusion/optimizer/src/eliminate_cross_join.rs +++ b/datafusion/optimizer/src/eliminate_cross_join.rs @@ -155,7 +155,7 @@ fn try_flatten_join_inputs( LogicalPlan::Join(join) if join.join_type == JoinType::Inner => { if join.filter.is_some() { // The filter of inner join will lost, skip this rule. - // issue: https://github.com/apache/arrow-datafusion/issues/4844 + // issue: https://github.com/apache/datafusion/issues/4844 return Ok(false); } possible_join_keys.extend(join.on.clone()); @@ -541,7 +541,7 @@ mod tests { } #[test] - /// See https://github.com/apache/arrow-datafusion/issues/7530 + /// See https://github.com/apache/datafusion/issues/7530 fn eliminate_cross_not_possible_nested_inner_join_with_filter() -> Result<()> { let t1 = test_table_scan_with_name("t1")?; let t2 = test_table_scan_with_name("t2")?; diff --git a/datafusion/optimizer/src/optimize_projections.rs b/datafusion/optimizer/src/optimize_projections.rs index b54fb248a7c7..c49095c4a3c0 100644 --- a/datafusion/optimizer/src/optimize_projections.rs +++ b/datafusion/optimizer/src/optimize_projections.rs @@ -463,7 +463,7 @@ fn merge_consecutive_projections(proj: &Projection) -> Result // If an expression is non-trivial and appears more than once, consecutive // projections will benefit from a compute-once approach. For details, see: - // https://github.com/apache/arrow-datafusion/issues/8296 + // https://github.com/apache/datafusion/issues/8296 if column_referral_map.into_iter().any(|(col, usage)| { usage > 1 && !is_expr_trivial( @@ -1351,7 +1351,7 @@ mod tests { } // Test outer projection isn't discarded despite the same schema as inner - // https://github.com/apache/arrow-datafusion/issues/8942 + // https://github.com/apache/datafusion/issues/8942 #[test] fn test_derived_column() -> Result<()> { let table_scan = test_table_scan()?; diff --git a/datafusion/optimizer/src/optimizer.rs b/datafusion/optimizer/src/optimizer.rs index ff692681ccd6..e787f56587f7 100644 --- a/datafusion/optimizer/src/optimizer.rs +++ b/datafusion/optimizer/src/optimizer.rs @@ -411,7 +411,7 @@ impl Optimizer { (Err(e), Some(orig_plan)) => { // Note to future readers: if you see this warning it signals a // bug in the DataFusion optimizer. Please consider filing a ticket - // https://github.com/apache/arrow-datafusion + // https://github.com/apache/datafusion warn!( "Skipping optimizer rule '{}' due to unexpected error: {}", rule.name(), diff --git a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs index bb14f75446df..a7df2b8ca147 100644 --- a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs +++ b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs @@ -186,7 +186,7 @@ impl ExprSimplifier { // TODO iterate until no changes are made during rewrite // (evaluating constants can enable new simplifications and // simplifications can enable new constant evaluation) - // https://github.com/apache/arrow-datafusion/issues/1160 + // https://github.com/apache/datafusion/issues/1160 expr.rewrite(&mut const_evaluator) .data()? .rewrite(&mut simplifier) @@ -212,7 +212,7 @@ impl ExprSimplifier { // Would be nice if this API could use the SimplifyInfo // rather than creating an DFSchemaRef coerces rather than doing // it manually. - // https://github.com/apache/arrow-datafusion/issues/3793 + // https://github.com/apache/datafusion/issues/3793 pub fn coerce(&self, expr: Expr, schema: DFSchemaRef) -> Result { let mut expr_rewrite = TypeCoercionRewriter { schema }; @@ -525,9 +525,6 @@ impl<'a> ConstEvaluator<'a> { | Expr::Wildcard { .. } | Expr::Placeholder(_) => false, Expr::ScalarFunction(ScalarFunction { func_def, .. }) => match func_def { - ScalarFunctionDefinition::BuiltIn(fun) => { - Self::volatility_ok(fun.volatility()) - } ScalarFunctionDefinition::UDF(fun) => { Self::volatility_ok(fun.signature().volatility) } @@ -1731,7 +1728,7 @@ mod tests { // Would be nice if this API could use the SimplifyInfo // rather than creating an DFSchemaRef coerces rather than doing // it manually. - // https://github.com/apache/arrow-datafusion/issues/3793 + // https://github.com/apache/datafusion/issues/3793 let expr = simplifier.coerce(expr, schema).unwrap(); assert_eq!(expected, simplifier.simplify(expr).unwrap()); @@ -3088,7 +3085,7 @@ mod tests { // c2 // // Need to call simplify 2x due to - // https://github.com/apache/arrow-datafusion/issues/1160 + // https://github.com/apache/datafusion/issues/1160 assert_eq!( simplify(simplify(Expr::Case(Case::new( None, @@ -3106,7 +3103,7 @@ mod tests { // ISNULL(c2) OR c2 // // Need to call simplify 2x due to - // https://github.com/apache/arrow-datafusion/issues/1160 + // https://github.com/apache/datafusion/issues/1160 assert_eq!( simplify(simplify(Expr::Case(Case::new( None, @@ -3124,7 +3121,7 @@ mod tests { // --> c1 OR NOT(c2) // // Need to call simplify 2x due to - // https://github.com/apache/arrow-datafusion/issues/1160 + // https://github.com/apache/datafusion/issues/1160 assert_eq!( simplify(simplify(Expr::Case(Case::new( None, @@ -3143,7 +3140,7 @@ mod tests { // --> c1 OR c2 // // Need to call simplify 2x due to - // https://github.com/apache/arrow-datafusion/issues/1160 + // https://github.com/apache/datafusion/issues/1160 assert_eq!( simplify(simplify(Expr::Case(Case::new( None, @@ -3395,7 +3392,7 @@ mod tests { true, ))); // TODO: Further simplify this expression - // https://github.com/apache/arrow-datafusion/issues/8970 + // https://github.com/apache/datafusion/issues/8970 // assert_eq!(simplify(expr.clone()), lit(true)); assert_eq!(simplify(expr.clone()), expr); } diff --git a/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs b/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs index 5b0314af20f4..d15d12b690da 100644 --- a/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs +++ b/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs @@ -120,7 +120,7 @@ impl SimplifyExpressions { // // This is likely related to the fact that order of the columns must // match the order of the children. see - // https://github.com/apache/arrow-datafusion/pull/8780 for more details + // https://github.com/apache/datafusion/pull/8780 for more details let simplifier = if let LogicalPlan::Join(_) = plan { simplifier.with_canonicalize(false) } else { diff --git a/datafusion/optimizer/src/utils.rs b/datafusion/optimizer/src/utils.rs index f0605018e6f3..0e89e2452a43 100644 --- a/datafusion/optimizer/src/utils.rs +++ b/datafusion/optimizer/src/utils.rs @@ -294,7 +294,7 @@ pub fn merge_schema(inputs: Vec<&LogicalPlan>) -> DFSchema { /// For example, if an expression `1 + 2` is rewritten to `3`, the name of the /// expression should be preserved: `3 as "1 + 2"` /// -/// See for details +/// See for details pub struct NamePreserver { use_alias: bool, } diff --git a/datafusion/optimizer/tests/optimizer_integration.rs b/datafusion/optimizer/tests/optimizer_integration.rs index dcaadaa8209c..180c79206664 100644 --- a/datafusion/optimizer/tests/optimizer_integration.rs +++ b/datafusion/optimizer/tests/optimizer_integration.rs @@ -58,7 +58,7 @@ fn case_when() -> Result<()> { #[test] fn subquery_filter_with_cast() -> Result<()> { - // regression test for https://github.com/apache/arrow-datafusion/issues/3760 + // regression test for https://github.com/apache/datafusion/issues/3760 let sql = "SELECT col_int32 FROM test \ WHERE col_int32 > (\ SELECT AVG(col_int32) FROM test \ @@ -102,7 +102,7 @@ fn unsigned_target_type() -> Result<()> { #[test] fn distribute_by() -> Result<()> { - // regression test for https://github.com/apache/arrow-datafusion/issues/3234 + // regression test for https://github.com/apache/datafusion/issues/3234 let sql = "SELECT col_int32, col_utf8 FROM test DISTRIBUTE BY (col_utf8)"; let plan = test_sql(sql)?; let expected = "Repartition: DistributeBy(col_utf8)\ @@ -113,7 +113,7 @@ fn distribute_by() -> Result<()> { #[test] fn semi_join_with_join_filter() -> Result<()> { - // regression test for https://github.com/apache/arrow-datafusion/issues/2888 + // regression test for https://github.com/apache/datafusion/issues/2888 let sql = "SELECT col_utf8 FROM test WHERE EXISTS (\ SELECT col_utf8 FROM test t2 WHERE test.col_int32 = t2.col_int32 \ AND test.col_uint32 != t2.col_uint32)"; @@ -130,7 +130,7 @@ fn semi_join_with_join_filter() -> Result<()> { #[test] fn anti_join_with_join_filter() -> Result<()> { - // regression test for https://github.com/apache/arrow-datafusion/issues/2888 + // regression test for https://github.com/apache/datafusion/issues/2888 let sql = "SELECT col_utf8 FROM test WHERE NOT EXISTS (\ SELECT col_utf8 FROM test t2 WHERE test.col_int32 = t2.col_int32 \ AND test.col_uint32 != t2.col_uint32)"; @@ -262,7 +262,7 @@ fn push_down_filter_groupby_expr_contains_alias() { } #[test] -// issue: https://github.com/apache/arrow-datafusion/issues/5334 +// issue: https://github.com/apache/datafusion/issues/5334 fn test_same_name_but_not_ambiguous() { let sql = "SELECT t1.col_int32 AS col_int32 FROM test t1 intersect SELECT col_int32 FROM test t2"; let plan = test_sql(sql).unwrap(); diff --git a/datafusion/physical-expr/src/aggregate/approx_distinct.rs b/datafusion/physical-expr/src/aggregate/approx_distinct.rs index 66e1310695ad..b4bde5cd52ef 100644 --- a/datafusion/physical-expr/src/aggregate/approx_distinct.rs +++ b/datafusion/physical-expr/src/aggregate/approx_distinct.rs @@ -89,7 +89,7 @@ impl AggregateExpr for ApproxDistinct { let accumulator: Box = match &self.input_data_type { // TODO u8, i8, u16, i16 shall really be done using bitmap, not HLL // TODO support for boolean (trivial case) - // https://github.com/apache/arrow-datafusion/issues/1109 + // https://github.com/apache/datafusion/issues/1109 DataType::UInt8 => Box::new(NumericHLLAccumulator::::new()), DataType::UInt16 => Box::new(NumericHLLAccumulator::::new()), DataType::UInt32 => Box::new(NumericHLLAccumulator::::new()), diff --git a/datafusion/physical-expr/src/aggregate/min_max.rs b/datafusion/physical-expr/src/aggregate/min_max.rs index 3573df3743ee..c7ba9a38c9a8 100644 --- a/datafusion/physical-expr/src/aggregate/min_max.rs +++ b/datafusion/physical-expr/src/aggregate/min_max.rs @@ -248,7 +248,7 @@ impl AggregateExpr for Max { } // It would be nice to have a fast implementation for Strings as well - // https://github.com/apache/arrow-datafusion/issues/6906 + // https://github.com/apache/datafusion/issues/6906 // This is only reached if groups_accumulator_supported is out of sync _ => internal_err!( diff --git a/datafusion/physical-expr/src/conditional_expressions.rs b/datafusion/physical-expr/src/conditional_expressions.rs deleted file mode 100644 index 87d63bfd32e2..000000000000 --- a/datafusion/physical-expr/src/conditional_expressions.rs +++ /dev/null @@ -1,79 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use arrow::array::{new_null_array, Array, BooleanArray}; -use arrow::compute::kernels::zip::zip; -use arrow::compute::{and, is_not_null, is_null}; - -use datafusion_common::{exec_err, Result}; -use datafusion_expr::ColumnarValue; - -/// coalesce evaluates to the first value which is not NULL -pub fn coalesce(args: &[ColumnarValue]) -> Result { - // do not accept 0 arguments. - if args.is_empty() { - return exec_err!( - "coalesce was called with {} arguments. It requires at least 1.", - args.len() - ); - } - - let return_type = args[0].data_type(); - let mut return_array = args.iter().filter_map(|x| match x { - ColumnarValue::Array(array) => Some(array.len()), - _ => None, - }); - - if let Some(size) = return_array.next() { - // start with nulls as default output - let mut current_value = new_null_array(&return_type, size); - let mut remainder = BooleanArray::from(vec![true; size]); - - for arg in args { - match arg { - ColumnarValue::Array(ref array) => { - let to_apply = and(&remainder, &is_not_null(array.as_ref())?)?; - current_value = zip(&to_apply, array, ¤t_value)?; - remainder = and(&remainder, &is_null(array)?)?; - } - ColumnarValue::Scalar(value) => { - if value.is_null() { - continue; - } else { - let last_value = value.to_scalar()?; - current_value = zip(&remainder, &last_value, ¤t_value)?; - break; - } - } - } - if remainder.iter().all(|x| x == Some(false)) { - break; - } - } - Ok(ColumnarValue::Array(current_value)) - } else { - let result = args - .iter() - .filter_map(|x| match x { - ColumnarValue::Scalar(s) if !s.is_null() => Some(x.clone()), - _ => None, - }) - .next() - .unwrap_or_else(|| args[0].clone()); - Ok(result) - } -} diff --git a/datafusion/physical-expr/src/equivalence/class.rs b/datafusion/physical-expr/src/equivalence/class.rs index 58519c61cf1f..6d7d8bf3cc8d 100644 --- a/datafusion/physical-expr/src/equivalence/class.rs +++ b/datafusion/physical-expr/src/equivalence/class.rs @@ -390,7 +390,7 @@ impl EquivalenceGroup { }); // TODO: Convert the algorithm below to a version that uses `HashMap`. // once `Arc` can be stored in `HashMap`. - // See issue: https://github.com/apache/arrow-datafusion/issues/8027 + // See issue: https://github.com/apache/datafusion/issues/8027 let mut new_classes = vec![]; for (source, target) in mapping.iter() { if new_classes.is_empty() { diff --git a/datafusion/physical-expr/src/equivalence/properties.rs b/datafusion/physical-expr/src/equivalence/properties.rs index 58ef5ec797b8..c8a087db209f 100644 --- a/datafusion/physical-expr/src/equivalence/properties.rs +++ b/datafusion/physical-expr/src/equivalence/properties.rs @@ -490,7 +490,7 @@ impl EquivalenceProperties { /// with A and B, we could surely use the ordering of the original ordering, However, if the A has been changed, /// for example, A-> Cast(A, Int64) or any other form, it is invalid if we continue using the original ordering /// Since it would cause bug in dependency constructions, we should substitute the input order in order to get correct - /// dependency map, happen in issue 8838: + /// dependency map, happen in issue 8838: pub fn substitute_oeq_class(&mut self, mapping: &ProjectionMapping) -> Result<()> { let orderings = &self.oeq_class.orderings; let new_order = orderings diff --git a/datafusion/physical-expr/src/expressions/binary.rs b/datafusion/physical-expr/src/expressions/binary.rs index bc107e169de4..7c57dc050db5 100644 --- a/datafusion/physical-expr/src/expressions/binary.rs +++ b/datafusion/physical-expr/src/expressions/binary.rs @@ -2940,7 +2940,7 @@ mod tests { #[test] fn relatively_deeply_nested() { - // Reproducer for https://github.com/apache/arrow-datafusion/issues/419 + // Reproducer for https://github.com/apache/datafusion/issues/419 // where even relatively shallow binary expressions overflowed // the stack in debug builds @@ -3408,7 +3408,7 @@ mod tests { .unwrap(); // is distinct: float64array is distinct decimal array // TODO: now we do not refactor the `is distinct or is not distinct` rule of coercion. - // traced by https://github.com/apache/arrow-datafusion/issues/1590 + // traced by https://github.com/apache/datafusion/issues/1590 // the decimal array will be casted to float64array apply_logic_op( &schema, diff --git a/datafusion/physical-expr/src/expressions/cast.rs b/datafusion/physical-expr/src/expressions/cast.rs index 0d94642f14e7..e87c643cdeb5 100644 --- a/datafusion/physical-expr/src/expressions/cast.rs +++ b/datafusion/physical-expr/src/expressions/cast.rs @@ -704,7 +704,7 @@ mod tests { } #[test] - #[ignore] // TODO: https://github.com/apache/arrow-datafusion/issues/5396 + #[ignore] // TODO: https://github.com/apache/datafusion/issues/5396 fn test_cast_decimal() -> Result<()> { let schema = Schema::new(vec![Field::new("a", DataType::Int64, false)]); let a = Int64Array::from(vec![100]); diff --git a/datafusion/physical-expr/src/functions.rs b/datafusion/physical-expr/src/functions.rs index 656ce711a0b0..875fe7ac3be1 100644 --- a/datafusion/physical-expr/src/functions.rs +++ b/datafusion/physical-expr/src/functions.rs @@ -37,47 +37,14 @@ use arrow::{array::ArrayRef, datatypes::Schema}; use arrow_array::Array; use datafusion_common::{DFSchema, Result, ScalarValue}; -use datafusion_expr::execution_props::ExecutionProps; pub use datafusion_expr::FuncMonotonicity; use datafusion_expr::{ - type_coercion::functions::data_types, BuiltinScalarFunction, ColumnarValue, - ScalarFunctionImplementation, + type_coercion::functions::data_types, ColumnarValue, ScalarFunctionImplementation, }; use datafusion_expr::{Expr, ScalarFunctionDefinition, ScalarUDF}; use crate::sort_properties::SortProperties; -use crate::{conditional_expressions, PhysicalExpr, ScalarFunctionExpr}; - -/// Create a physical (function) expression. -/// This function errors when `args`' can't be coerced to a valid argument type of the function. -pub fn create_builtin_physical_expr( - fun: &BuiltinScalarFunction, - input_phy_exprs: &[Arc], - input_schema: &Schema, - _execution_props: &ExecutionProps, -) -> Result> { - let input_expr_types = input_phy_exprs - .iter() - .map(|e| e.data_type(input_schema)) - .collect::>>()?; - - // verify that input data types is consistent with function's `TypeSignature` - data_types(&input_expr_types, &fun.signature())?; - - let data_type = fun.return_type(&input_expr_types)?; - - let monotonicity = fun.monotonicity(); - - let fun_def = ScalarFunctionDefinition::BuiltIn(*fun); - Ok(Arc::new(ScalarFunctionExpr::new( - &format!("{fun}"), - fun_def, - input_phy_exprs.to_vec(), - data_type, - monotonicity, - fun.signature().type_signature.supports_zero_argument(), - ))) -} +use crate::{PhysicalExpr, ScalarFunctionExpr}; /// Create a physical (function) expression. /// This function errors when `args`' can't be coerced to a valid argument type of the function. @@ -199,24 +166,6 @@ where }) } -/// Create a physical scalar function. -pub fn create_physical_fun( - fun: &BuiltinScalarFunction, -) -> Result { - Ok(match fun { - // string functions - BuiltinScalarFunction::Coalesce => Arc::new(conditional_expressions::coalesce), - }) -} - -#[deprecated( - since = "32.0.0", - note = "Moved to `expr` crate. Please use `BuiltinScalarFunction::monotonicity()` instead" -)] -pub fn get_func_monotonicity(fun: &BuiltinScalarFunction) -> Option { - fun.monotonicity() -} - /// Determines a [`ScalarFunctionExpr`]'s monotonicity for the given arguments /// and the function's behavior depending on its arguments. pub fn out_ordering( diff --git a/datafusion/physical-expr/src/lib.rs b/datafusion/physical-expr/src/lib.rs index aabcf42fe7c4..e0f19ad133e5 100644 --- a/datafusion/physical-expr/src/lib.rs +++ b/datafusion/physical-expr/src/lib.rs @@ -18,7 +18,6 @@ pub mod aggregate; pub mod analysis; pub mod binary_map; -pub mod conditional_expressions; pub mod equivalence; pub mod expressions; pub mod functions; diff --git a/datafusion/physical-expr/src/physical_expr.rs b/datafusion/physical-expr/src/physical_expr.rs index bc265d3819a5..127194f681a5 100644 --- a/datafusion/physical-expr/src/physical_expr.rs +++ b/datafusion/physical-expr/src/physical_expr.rs @@ -72,7 +72,7 @@ pub fn physical_exprs_bag_equal( pub fn deduplicate_physical_exprs(exprs: &mut Vec>) { // TODO: Once we can use `HashSet`s with `Arc`, this // function should use a `HashSet` to reduce computational complexity. - // See issue: https://github.com/apache/arrow-datafusion/issues/8027 + // See issue: https://github.com/apache/datafusion/issues/8027 let mut idx = 0; while idx < exprs.len() { let mut rest_idx = idx + 1; diff --git a/datafusion/physical-expr/src/planner.rs b/datafusion/physical-expr/src/planner.rs index 20626818c83b..bf7b52f1c147 100644 --- a/datafusion/physical-expr/src/planner.rs +++ b/datafusion/physical-expr/src/planner.rs @@ -15,11 +15,10 @@ // specific language governing permissions and limitations // under the License. -use crate::{ - expressions::{self, binary, like, Column, Literal}, - functions, udf, PhysicalExpr, -}; +use std::sync::Arc; + use arrow::datatypes::Schema; + use datafusion_common::{ exec_err, internal_err, not_impl_err, plan_err, DFSchema, Result, ScalarValue, }; @@ -31,7 +30,11 @@ use datafusion_expr::{ binary_expr, Between, BinaryExpr, Expr, GetFieldAccess, GetIndexedField, Like, Operator, ScalarFunctionDefinition, TryCast, }; -use std::sync::Arc; + +use crate::{ + expressions::{self, binary, like, Column, Literal}, + udf, PhysicalExpr, +}; /// [PhysicalExpr] evaluate DataFusion expressions such as `A + 1`, or `CAST(c1 /// AS int)`. @@ -306,14 +309,6 @@ pub fn create_physical_expr( create_physical_exprs(args, input_dfschema, execution_props)?; match func_def { - ScalarFunctionDefinition::BuiltIn(fun) => { - functions::create_builtin_physical_expr( - fun, - &physical_args, - input_schema, - execution_props, - ) - } ScalarFunctionDefinition::UDF(fun) => udf::create_physical_expr( fun.clone().as_ref(), &physical_args, @@ -390,12 +385,14 @@ where #[cfg(test)] mod tests { - use super::*; use arrow_array::{ArrayRef, BooleanArray, RecordBatch, StringArray}; use arrow_schema::{DataType, Field, Schema}; + use datafusion_common::{DFSchema, Result}; use datafusion_expr::{col, lit}; + use super::*; + #[test] fn test_create_physical_expr_scalar_input_output() -> Result<()> { let expr = col("letter").eq(lit("A")); diff --git a/datafusion/physical-expr/src/scalar_function.rs b/datafusion/physical-expr/src/scalar_function.rs index d34084236690..9ae9f3dee3e7 100644 --- a/datafusion/physical-expr/src/scalar_function.rs +++ b/datafusion/physical-expr/src/scalar_function.rs @@ -34,19 +34,19 @@ use std::fmt::{self, Debug, Formatter}; use std::hash::{Hash, Hasher}; use std::sync::Arc; -use crate::functions::{create_physical_fun, out_ordering}; -use crate::physical_expr::{down_cast_any_ref, physical_exprs_equal}; -use crate::sort_properties::SortProperties; -use crate::PhysicalExpr; - use arrow::datatypes::{DataType, Schema}; use arrow::record_batch::RecordBatch; + use datafusion_common::{internal_err, Result}; use datafusion_expr::{ - expr_vec_fmt, BuiltinScalarFunction, ColumnarValue, FuncMonotonicity, - ScalarFunctionDefinition, + expr_vec_fmt, ColumnarValue, FuncMonotonicity, ScalarFunctionDefinition, }; +use crate::functions::out_ordering; +use crate::physical_expr::{down_cast_any_ref, physical_exprs_equal}; +use crate::sort_properties::SortProperties; +use crate::PhysicalExpr; + /// Physical expression of a scalar function pub struct ScalarFunctionExpr { fun: ScalarFunctionDefinition, @@ -122,7 +122,7 @@ impl ScalarFunctionExpr { } impl fmt::Display for ScalarFunctionExpr { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { write!(f, "{}({})", self.name, expr_vec_fmt!(self.args)) } } @@ -144,24 +144,11 @@ impl PhysicalExpr for ScalarFunctionExpr { fn evaluate(&self, batch: &RecordBatch) -> Result { // evaluate the arguments, if there are no arguments we'll instead pass in a null array // indicating the batch size (as a convention) - let inputs = match ( - self.args.is_empty(), - self.name.parse::(), - ) { - // MakeArray support zero argument but has the different behavior from the array with one null. - (true, Ok(scalar_fun)) - if scalar_fun - .signature() - .type_signature - .supports_zero_argument() => - { - vec![ColumnarValue::create_null_array(batch.num_rows())] - } + let inputs = match self.args.is_empty() { // If the function supports zero argument, we pass in a null array indicating the batch size. // This is for user-defined functions. - (true, Err(_)) - if self.supports_zero_argument && self.name != "make_array" => - { + // MakeArray support zero argument but has the different behavior from the array with one null. + true if self.supports_zero_argument && self.name != "make_array" => { vec![ColumnarValue::create_null_array(batch.num_rows())] } _ => self @@ -173,10 +160,6 @@ impl PhysicalExpr for ScalarFunctionExpr { // evaluate the function match self.fun { - ScalarFunctionDefinition::BuiltIn(ref fun) => { - let fun = create_physical_fun(fun)?; - (fun)(&inputs) - } ScalarFunctionDefinition::UDF(ref fun) => fun.invoke(&inputs), ScalarFunctionDefinition::Name(_) => { internal_err!( diff --git a/datafusion/physical-plan/src/aggregates/group_values/bytes.rs b/datafusion/physical-plan/src/aggregates/group_values/bytes.rs index 4a4c5e4b0554..d073c8995a9b 100644 --- a/datafusion/physical-plan/src/aggregates/group_values/bytes.rs +++ b/datafusion/physical-plan/src/aggregates/group_values/bytes.rs @@ -101,7 +101,7 @@ impl GroupValues for GroupValuesByes { // if we only wanted to take the first n, insert the rest back // into the map we could potentially avoid this reallocation, at // the expense of much more complex code. - // see https://github.com/apache/arrow-datafusion/issues/9195 + // see https://github.com/apache/datafusion/issues/9195 let emit_group_values = map_contents.slice(0, n); let remaining_group_values = map_contents.slice(n, map_contents.len() - n); diff --git a/datafusion/physical-plan/src/insert.rs b/datafusion/physical-plan/src/insert.rs index e3f9f2c76d31..259db644ae0a 100644 --- a/datafusion/physical-plan/src/insert.rs +++ b/datafusion/physical-plan/src/insert.rs @@ -244,7 +244,7 @@ impl ExecutionPlan for DataSinkExec { // Maintains ordering in the sense that the written file will reflect // the ordering of the input. For more context, see: // - // https://github.com/apache/arrow-datafusion/pull/6354#discussion_r1195284178 + // https://github.com/apache/datafusion/pull/6354#discussion_r1195284178 vec![true] } diff --git a/datafusion/physical-plan/src/joins/nested_loop_join.rs b/datafusion/physical-plan/src/joins/nested_loop_join.rs index e6236e45f0a7..5fccd63029a1 100644 --- a/datafusion/physical-plan/src/joins/nested_loop_join.rs +++ b/datafusion/physical-plan/src/joins/nested_loop_join.rs @@ -21,21 +21,22 @@ use std::any::Any; use std::fmt::Formatter; +use std::sync::atomic::{AtomicUsize, Ordering}; use std::sync::Arc; use std::task::Poll; use crate::coalesce_batches::concat_batches; +use crate::coalesce_partitions::CoalescePartitionsExec; use crate::joins::utils::{ - append_right_indices, apply_join_filter_to_indices, build_batch_from_indices, - build_join_schema, check_join_is_valid, estimate_join_statistics, get_anti_indices, - get_final_indices_from_bit_map, get_semi_indices, - partitioned_join_output_partitioning, BuildProbeJoinMetrics, ColumnIndex, JoinFilter, - OnceAsync, OnceFut, + adjust_indices_by_join_type, adjust_right_output_partitioning, + apply_join_filter_to_indices, build_batch_from_indices, build_join_schema, + check_join_is_valid, estimate_join_statistics, get_final_indices_from_bit_map, + BuildProbeJoinMetrics, ColumnIndex, JoinFilter, OnceAsync, OnceFut, }; use crate::metrics::{ExecutionPlanMetricsSet, MetricsSet}; use crate::{ execution_mode_from_children, DisplayAs, DisplayFormatType, Distribution, - ExecutionMode, ExecutionPlan, ExecutionPlanProperties, PlanProperties, + ExecutionMode, ExecutionPlan, ExecutionPlanProperties, Partitioning, PlanProperties, RecordBatchStream, SendableRecordBatchStream, }; @@ -52,28 +53,90 @@ use datafusion_expr::JoinType; use datafusion_physical_expr::equivalence::join_equivalence_properties; use futures::{ready, Stream, StreamExt, TryStreamExt}; +use parking_lot::Mutex; + +use super::utils::need_produce_result_in_final; + +/// Shared bitmap for visited left-side indices +type SharedBitmapBuilder = Mutex; +/// Left (build-side) data +struct JoinLeftData { + /// Build-side data collected to single batch + batch: RecordBatch, + /// Shared bitmap builder for visited left indices + bitmap: SharedBitmapBuilder, + /// Counter of running probe-threads, potentially able to update `bitmap` + probe_threads_counter: AtomicUsize, + /// Memory reservation for tracking batch and bitmap + /// Cleared on `JoinLeftData` drop + #[allow(dead_code)] + reservation: MemoryReservation, +} -/// Data of the inner table side -type JoinLeftData = (RecordBatch, MemoryReservation); +impl JoinLeftData { + fn new( + batch: RecordBatch, + bitmap: SharedBitmapBuilder, + probe_threads_counter: AtomicUsize, + reservation: MemoryReservation, + ) -> Self { + Self { + batch, + bitmap, + probe_threads_counter, + reservation, + } + } + + fn batch(&self) -> &RecordBatch { + &self.batch + } -/// NestedLoopJoinExec executes partitions in parallel. -/// One input will be collected to a single partition, call it inner-table. -/// The other side of the input is treated as outer-table, and the output Partitioning is from it. -/// Giving an output partition number x, the execution will be: + fn bitmap(&self) -> &SharedBitmapBuilder { + &self.bitmap + } + + /// Decrements counter of running threads, and returns `true` + /// if caller is the last running thread + fn report_probe_completed(&self) -> bool { + self.probe_threads_counter.fetch_sub(1, Ordering::Relaxed) == 1 + } +} + +/// NestedLoopJoinExec is build-probe join operator, whose main task is to +/// perform joins without any equijoin conditions in `ON` clause. +/// +/// Execution consists of following phases: /// -/// ```text -/// for outer-table-batch in outer-table-partition-x -/// check-join(outer-table-batch, inner-table-data) -/// ``` +/// #### 1. Build phase +/// Collecting build-side data in memory, by polling all available data from build-side input. +/// Due to the absence of equijoin conditions, it's not possible to partition build-side data +/// across multiple threads of the operator, so build-side is always collected in a single +/// batch shared across all threads. +/// The operator always considers LEFT input as build-side input, so it's crucial to adjust +/// smaller input to be the LEFT one. Normally this selection is handled by physical optimizer. /// -/// One of the inputs will become inner table, and it is decided by the join type. -/// Following is the relation table: +/// #### 2. Probe phase +/// Sequentially polling batches from the probe-side input and processing them according to the +/// following logic: +/// - apply join filter (`ON` clause) to Cartesian product of probe batch and build side data +/// -- filter evaluation is executed once per build-side data row +/// - update shared bitmap of joined ("visited") build-side row indices, if required -- allows +/// to produce unmatched build-side data in case of e.g. LEFT/FULL JOIN after probing phase +/// completed +/// - perform join index alignment is required -- depending on `JoinType` +/// - produce output join batch /// -/// | JoinType | Distribution (left, right) | Inner-table | -/// |--------------------------------|--------------------------------------------|-------------| -/// | Inner/Left/LeftSemi/LeftAnti | (UnspecifiedDistribution, SinglePartition) | right | -/// | Right/RightSemi/RightAnti/Full | (SinglePartition, UnspecifiedDistribution) | left | -/// | Full | (SinglePartition, SinglePartition) | left | +/// Probing phase is executed in parallel, according to probe-side input partitioning -- one +/// thread per partition. After probe input is exhausted, each thread **ATTEMPTS** to produce +/// unmatched build-side data. +/// +/// #### 3. Producing unmatched build-side data +/// Producing unmatched build-side data as an output batch, after probe input is exhausted. +/// This step is also executed in parallel (once per probe input partition), and to avoid +/// duplicate output of unmatched data (due to shared nature build-side data), each thread +/// "reports" about probe phase completion (which means that "visited" bitmap won't be +/// updated anymore), and only the last thread, reporting about completion, will return output. /// #[derive(Debug)] pub struct NestedLoopJoinExec { @@ -112,6 +175,7 @@ impl NestedLoopJoinExec { build_join_schema(&left_schema, &right_schema, join_type); let schema = Arc::new(schema); let cache = Self::compute_properties(&left, &right, schema.clone(), *join_type); + Ok(NestedLoopJoinExec { left, right, @@ -165,15 +229,19 @@ impl NestedLoopJoinExec { ); // Get output partitioning, - let output_partitioning = if join_type == JoinType::Full { - left.output_partitioning().clone() - } else { - partitioned_join_output_partitioning( - join_type, - left.output_partitioning(), + let output_partitioning = match join_type { + JoinType::Inner | JoinType::Right => adjust_right_output_partitioning( right.output_partitioning(), - left.schema().fields.len(), - ) + left.schema().fields().len(), + ), + JoinType::RightSemi | JoinType::RightAnti => { + right.output_partitioning().clone() + } + JoinType::Left | JoinType::LeftSemi | JoinType::LeftAnti | JoinType::Full => { + Partitioning::UnknownPartitioning( + right.output_partitioning().partition_count(), + ) + } }; // Determine execution mode: @@ -218,7 +286,10 @@ impl ExecutionPlan for NestedLoopJoinExec { } fn required_input_distribution(&self) -> Vec { - distribution_from_join_type(&self.join_type) + vec![ + Distribution::SinglePartition, + Distribution::UnspecifiedDistribution, + ] } fn children(&self) -> Vec> { @@ -249,38 +320,17 @@ impl ExecutionPlan for NestedLoopJoinExec { MemoryConsumer::new(format!("NestedLoopJoinLoad[{partition}]")) .register(context.memory_pool()); - // Initialization of stream-level reservation - let reservation = - MemoryConsumer::new(format!("NestedLoopJoinStream[{partition}]")) - .register(context.memory_pool()); - - let (outer_table, inner_table) = if left_is_build_side(self.join_type) { - // left must be single partition - let inner_table = self.inner_table.once(|| { - load_specified_partition_of_input( - 0, - self.left.clone(), - context.clone(), - join_metrics.clone(), - load_reservation, - ) - }); - let outer_table = self.right.execute(partition, context)?; - (outer_table, inner_table) - } else { - // right must be single partition - let inner_table = self.inner_table.once(|| { - load_specified_partition_of_input( - 0, - self.right.clone(), - context.clone(), - join_metrics.clone(), - load_reservation, - ) - }); - let outer_table = self.left.execute(partition, context)?; - (outer_table, inner_table) - }; + let inner_table = self.inner_table.once(|| { + collect_left_input( + self.left.clone(), + context.clone(), + join_metrics.clone(), + load_reservation, + need_produce_result_in_final(self.join_type), + self.right().output_partitioning().partition_count(), + ) + }); + let outer_table = self.right.execute(partition, context)?; Ok(Box::pin(NestedLoopJoinStream { schema: self.schema.clone(), @@ -289,10 +339,8 @@ impl ExecutionPlan for NestedLoopJoinExec { outer_table, inner_table, is_exhausted: false, - visited_left_side: None, column_indices: self.column_indices.clone(), join_metrics, - reservation, })) } @@ -311,43 +359,25 @@ impl ExecutionPlan for NestedLoopJoinExec { } } -// For the nested loop join, different join type need the different distribution for -// left and right node. -fn distribution_from_join_type(join_type: &JoinType) -> Vec { - match join_type { - JoinType::Inner | JoinType::Left | JoinType::LeftSemi | JoinType::LeftAnti => { - // need the left data, and the right should be one partition - vec![ - Distribution::UnspecifiedDistribution, - Distribution::SinglePartition, - ] - } - JoinType::Right | JoinType::RightSemi | JoinType::RightAnti => { - // need the right data, and the left should be one partition - vec![ - Distribution::SinglePartition, - Distribution::UnspecifiedDistribution, - ] - } - JoinType::Full => { - // need the left and right data, and the left and right should be one partition - vec![Distribution::SinglePartition, Distribution::SinglePartition] - } - } -} - -/// Asynchronously collect the specified partition data of the input -async fn load_specified_partition_of_input( - partition: usize, +/// Asynchronously collect input into a single batch, and creates `JoinLeftData` from it +async fn collect_left_input( input: Arc, context: Arc, join_metrics: BuildProbeJoinMetrics, reservation: MemoryReservation, + with_visited_left_side: bool, + probe_threads_count: usize, ) -> Result { - let stream = input.execute(partition, context)?; + let schema = input.schema(); + let merge = if input.output_partitioning().partition_count() != 1 { + Arc::new(CoalescePartitionsExec::new(input)) + } else { + input + }; + let stream = merge.execute(0, context)?; // Load all batches and count the rows - let (batches, num_rows, _, reservation) = stream + let (batches, num_rows, metrics, mut reservation) = stream .try_fold( (Vec::new(), 0usize, join_metrics, reservation), |mut acc, batch| async { @@ -367,19 +397,31 @@ async fn load_specified_partition_of_input( ) .await?; - let merged_batch = concat_batches(&input.schema(), &batches, num_rows)?; + let merged_batch = concat_batches(&schema, &batches, num_rows)?; - Ok((merged_batch, reservation)) -} + // Reserve memory for visited_left_side bitmap if required by join type + let visited_left_side = if with_visited_left_side { + // TODO: Replace `ceil` wrapper with stable `div_cell` after + // https://github.com/rust-lang/rust/issues/88581 + let buffer_size = bit_util::ceil(merged_batch.num_rows(), 8); + reservation.try_grow(buffer_size)?; + metrics.build_mem_used.add(buffer_size); -// BuildLeft means the left relation is the single patrition side. -// For full join, both side are single partition, so it is BuildLeft and BuildRight, treat it as BuildLeft. -pub fn left_is_build_side(join_type: JoinType) -> bool { - matches!( - join_type, - JoinType::Right | JoinType::RightSemi | JoinType::RightAnti | JoinType::Full - ) + let mut buffer = BooleanBufferBuilder::new(merged_batch.num_rows()); + buffer.append_n(merged_batch.num_rows(), false); + buffer + } else { + BooleanBufferBuilder::new(0) + }; + + Ok(JoinLeftData::new( + merged_batch, + Mutex::new(visited_left_side), + AtomicUsize::new(probe_threads_count), + reservation, + )) } + /// A stream that issues [RecordBatch]es as they arrive from the right of the join. struct NestedLoopJoinStream { /// Input schema @@ -394,16 +436,12 @@ struct NestedLoopJoinStream { inner_table: OnceFut, /// There is nothing to process anymore and left side is processed in case of full join is_exhausted: bool, - /// Keeps track of the left side rows whether they are visited - visited_left_side: Option, /// Information of index and left / right placement of columns column_indices: Vec, // TODO: support null aware equal // null_equals_null: bool /// Join execution metrics join_metrics: BuildProbeJoinMetrics, - /// Memory reservation for visited_left_side - reservation: MemoryReservation, } fn build_join_indices( @@ -434,39 +472,20 @@ fn build_join_indices( } impl NestedLoopJoinStream { - /// For Right/RightSemi/RightAnti/Full joins, left is the single partition side. - fn poll_next_impl_for_build_left( + fn poll_next_impl( &mut self, cx: &mut std::task::Context<'_>, ) -> Poll>> { // all left row let build_timer = self.join_metrics.build_time.timer(); - let (left_data, _) = match ready!(self.inner_table.get(cx)) { + let left_data = match ready!(self.inner_table.get_shared(cx)) { Ok(data) => data, Err(e) => return Poll::Ready(Some(Err(e))), }; build_timer.done(); - if self.visited_left_side.is_none() && self.join_type == JoinType::Full { - // TODO: Replace `ceil` wrapper with stable `div_cell` after - // https://github.com/rust-lang/rust/issues/88581 - let visited_bitmap_size = bit_util::ceil(left_data.num_rows(), 8); - self.reservation.try_grow(visited_bitmap_size)?; - self.join_metrics.build_mem_used.add(visited_bitmap_size); - } - - // add a bitmap for full join. - let visited_left_side = self.visited_left_side.get_or_insert_with(|| { - let left_num_rows = left_data.num_rows(); - // only full join need bitmap - if self.join_type == JoinType::Full { - let mut buffer = BooleanBufferBuilder::new(left_num_rows); - buffer.append_n(left_num_rows, false); - buffer - } else { - BooleanBufferBuilder::new(0) - } - }); + // Get or initialize visited_left_side bitmap if required by join type + let visited_left_side = left_data.bitmap(); self.outer_table .poll_next_unpin(cx) @@ -478,7 +497,7 @@ impl NestedLoopJoinStream { let timer = self.join_metrics.join_time.timer(); let result = join_left_and_right_batch( - left_data, + left_data.batch(), &right_batch, self.join_type, self.filter.as_ref(), @@ -498,21 +517,32 @@ impl NestedLoopJoinStream { } Some(err) => Some(err), None => { - if self.join_type == JoinType::Full && !self.is_exhausted { + if need_produce_result_in_final(self.join_type) && !self.is_exhausted + { + // At this stage `visited_left_side` won't be updated, so it's + // safe to report about probe completion. + // + // Setting `is_exhausted` / returning None will prevent from + // multiple calls of `report_probe_completed()` + if !left_data.report_probe_completed() { + self.is_exhausted = true; + return None; + }; + // Only setting up timer, input is exhausted let timer = self.join_metrics.join_time.timer(); - // use the global left bitmap to produce the left indices and right indices - let (left_side, right_side) = get_final_indices_from_bit_map( - visited_left_side, - self.join_type, - ); + let (left_side, right_side) = + get_final_indices_from_shared_bitmap( + visited_left_side, + self.join_type, + ); let empty_right_batch = RecordBatch::new_empty(self.outer_table.schema()); // use the left and right indices to produce the batch result let result = build_batch_from_indices( &self.schema, - left_data, + left_data.batch(), &empty_right_batch, &left_side, &right_side, @@ -536,55 +566,6 @@ impl NestedLoopJoinStream { } }) } - - /// For Inner/Left/LeftSemi/LeftAnti joins, right is the single partition side. - fn poll_next_impl_for_build_right( - &mut self, - cx: &mut std::task::Context<'_>, - ) -> Poll>> { - // all right row - let build_timer = self.join_metrics.build_time.timer(); - let (right_data, _) = match ready!(self.inner_table.get(cx)) { - Ok(data) => data, - Err(e) => return Poll::Ready(Some(Err(e))), - }; - build_timer.done(); - - // for build right, bitmap is not needed. - let mut empty_visited_left_side = BooleanBufferBuilder::new(0); - self.outer_table - .poll_next_unpin(cx) - .map(|maybe_batch| match maybe_batch { - Some(Ok(left_batch)) => { - // Setting up timer & updating input metrics - self.join_metrics.input_batches.add(1); - self.join_metrics.input_rows.add(left_batch.num_rows()); - let timer = self.join_metrics.join_time.timer(); - - // Actual join execution - let result = join_left_and_right_batch( - &left_batch, - right_data, - self.join_type, - self.filter.as_ref(), - &self.column_indices, - &self.schema, - &mut empty_visited_left_side, - ); - - // Recording time & updating output metrics - if let Ok(batch) = &result { - timer.done(); - self.join_metrics.output_batches.add(1); - self.join_metrics.output_rows.add(batch.num_rows()); - } - - Some(result) - } - Some(err) => Some(err), - None => None, - }) - } } fn join_left_and_right_batch( @@ -594,7 +575,7 @@ fn join_left_and_right_batch( filter: Option<&JoinFilter>, column_indices: &[ColumnIndex], schema: &Schema, - visited_left_side: &mut BooleanBufferBuilder, + visited_left_side: &SharedBitmapBuilder, ) -> Result { let indices_result = (0..left_batch.num_rows()) .map(|left_row_index| { @@ -625,17 +606,17 @@ fn join_left_and_right_batch( Ok((left_side, right_side)) => { // set the left bitmap // and only full join need the left bitmap - if join_type == JoinType::Full { + if need_produce_result_in_final(join_type) { + let mut bitmap = visited_left_side.lock(); left_side.iter().flatten().for_each(|x| { - visited_left_side.set_bit(x as usize, true); + bitmap.set_bit(x as usize, true); }); } // adjust the two side indices base on the join type let (left_side, right_side) = adjust_indices_by_join_type( left_side, right_side, - left_batch.num_rows(), - right_batch.num_rows(), + 0..right_batch.num_rows(), join_type, ); @@ -653,86 +634,12 @@ fn join_left_and_right_batch( } } -fn adjust_indices_by_join_type( - left_indices: UInt64Array, - right_indices: UInt32Array, - count_left_batch: usize, - count_right_batch: usize, +fn get_final_indices_from_shared_bitmap( + shared_bitmap: &SharedBitmapBuilder, join_type: JoinType, ) -> (UInt64Array, UInt32Array) { - match join_type { - JoinType::Inner => (left_indices, right_indices), - JoinType::Left => { - // matched - // unmatched left row will be produced in this batch - let left_unmatched_indices = - get_anti_indices(0..count_left_batch, &left_indices); - // combine the matched and unmatched left result together - append_left_indices(left_indices, right_indices, left_unmatched_indices) - } - JoinType::LeftSemi => { - // need to remove the duplicated record in the left side - let left_indices = get_semi_indices(0..count_left_batch, &left_indices); - // the right_indices will not be used later for the `left semi` join - (left_indices, right_indices) - } - JoinType::LeftAnti => { - // need to remove the duplicated record in the left side - // get the anti index for the left side - let left_indices = get_anti_indices(0..count_left_batch, &left_indices); - // the right_indices will not be used later for the `left anti` join - (left_indices, right_indices) - } - // right/right-semi/right-anti => right = outer_table, left = inner_table - JoinType::Right | JoinType::Full => { - // matched - // unmatched right row will be produced in this batch - let right_unmatched_indices = - get_anti_indices(0..count_right_batch, &right_indices); - // combine the matched and unmatched right result together - append_right_indices(left_indices, right_indices, right_unmatched_indices) - } - JoinType::RightSemi => { - // need to remove the duplicated record in the right side - let right_indices = get_semi_indices(0..count_right_batch, &right_indices); - // the left_indices will not be used later for the `right semi` join - (left_indices, right_indices) - } - JoinType::RightAnti => { - // need to remove the duplicated record in the right side - // get the anti index for the right side - let right_indices = get_anti_indices(0..count_right_batch, &right_indices); - // the left_indices will not be used later for the `right anti` join - (left_indices, right_indices) - } - } -} - -/// Appends the `left_unmatched_indices` to the `left_indices`, -/// and fills Null to tail of `right_indices` to -/// keep the length of `left_indices` and `right_indices` consistent. -fn append_left_indices( - left_indices: UInt64Array, - right_indices: UInt32Array, - left_unmatched_indices: UInt64Array, -) -> (UInt64Array, UInt32Array) { - if left_unmatched_indices.is_empty() { - (left_indices, right_indices) - } else { - let unmatched_size = left_unmatched_indices.len(); - // the new left indices: left_indices + null array - // the new right indices: right_indices + right_unmatched_indices - let new_left_indices = left_indices - .iter() - .chain(left_unmatched_indices.iter()) - .collect::(); - let new_right_indices = right_indices - .iter() - .chain(std::iter::repeat(None).take(unmatched_size)) - .collect::(); - - (new_left_indices, new_right_indices) - } + let bitmap = shared_bitmap.lock(); + get_final_indices_from_bit_map(&bitmap, join_type) } impl Stream for NestedLoopJoinStream { @@ -742,11 +649,7 @@ impl Stream for NestedLoopJoinStream { mut self: std::pin::Pin<&mut Self>, cx: &mut std::task::Context<'_>, ) -> Poll> { - if left_is_build_side(self.join_type) { - self.poll_next_impl_for_build_left(cx) - } else { - self.poll_next_impl_for_build_right(cx) - } + self.poll_next_impl(cx) } } @@ -851,35 +754,19 @@ mod tests { context: Arc, ) -> Result<(Vec, Vec)> { let partition_count = 4; - let mut output_partition = 1; - let distribution = distribution_from_join_type(join_type); - // left - let left = if matches!(distribution[0], Distribution::SinglePartition) { - left - } else { - output_partition = partition_count; - Arc::new(RepartitionExec::try_new( - left, - Partitioning::RoundRobinBatch(partition_count), - )?) - } as Arc; - - let right = if matches!(distribution[1], Distribution::SinglePartition) { - right - } else { - output_partition = partition_count; - Arc::new(RepartitionExec::try_new( - right, - Partitioning::RoundRobinBatch(partition_count), - )?) - } as Arc; + + // Redistributing right input + let right = Arc::new(RepartitionExec::try_new( + right, + Partitioning::RoundRobinBatch(partition_count), + )?) as Arc; // Use the required distribution for nested loop join to test partition data let nested_loop_join = NestedLoopJoinExec::try_new(left, right, join_filter, join_type)?; let columns = columns(&nested_loop_join.schema()); let mut batches = vec![]; - for i in 0..output_partition { + for i in 0..partition_count { let stream = nested_loop_join.execute(i, context.clone())?; let more_batches = common::collect(stream).await?; batches.extend( diff --git a/datafusion/physical-plan/src/repartition/mod.rs b/datafusion/physical-plan/src/repartition/mod.rs index 59c71dbf89b4..faf55aaf01ef 100644 --- a/datafusion/physical-plan/src/repartition/mod.rs +++ b/datafusion/physical-plan/src/repartition/mod.rs @@ -805,11 +805,11 @@ impl RepartitionExec { // If the input stream is endless, we may spin forever and // never yield back to tokio. See - // https://github.com/apache/arrow-datafusion/issues/5278. + // https://github.com/apache/datafusion/issues/5278. // // However, yielding on every batch causes a bottleneck // when running with multiple cores. See - // https://github.com/apache/arrow-datafusion/issues/6290 + // https://github.com/apache/datafusion/issues/6290 // // Thus, heuristically yield after producing num_partition // batches diff --git a/datafusion/physical-plan/src/unnest.rs b/datafusion/physical-plan/src/unnest.rs index 45b848112ba9..d2b5e1975d56 100644 --- a/datafusion/physical-plan/src/unnest.rs +++ b/datafusion/physical-plan/src/unnest.rs @@ -614,7 +614,7 @@ mod tests { valid.append(true); // NULL with non-zero value length - // Issue https://github.com/apache/arrow-datafusion/issues/9932 + // Issue https://github.com/apache/datafusion/issues/9932 values.push(Some("?")); offsets.push(OffsetSize::from_usize(values.len()).unwrap()); valid.append(false); diff --git a/datafusion/physical-plan/src/values.rs b/datafusion/physical-plan/src/values.rs index 63e8c32349ab..3d38081fcd8c 100644 --- a/datafusion/physical-plan/src/values.rs +++ b/datafusion/physical-plan/src/values.rs @@ -252,7 +252,7 @@ mod tests { let _ = ValuesExec::try_new_from_batches(invalid_schema, batches).unwrap_err(); } - // Test issue: https://github.com/apache/arrow-datafusion/issues/8763 + // Test issue: https://github.com/apache/datafusion/issues/8763 #[test] fn new_exec_with_non_nullable_schema() { let schema = Arc::new(Schema::new(vec![Field::new( diff --git a/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs b/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs index 75e203891cad..b1c306194813 100644 --- a/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs +++ b/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs @@ -393,7 +393,7 @@ trait PartitionSearcher: Send { // Use input_schema for the buffer schema, not `record_batch.schema()` // as it may not have the "correct" schema in terms of output // nullability constraints. For details, see the following issue: - // https://github.com/apache/arrow-datafusion/issues/9320 + // https://github.com/apache/datafusion/issues/9320 .or_insert_with(|| PartitionBatchState::new(self.input_schema().clone())); partition_batch_state.extend(&partition_batch)?; } diff --git a/datafusion/proto/proto/datafusion.proto b/datafusion/proto/proto/datafusion.proto index 13709bf394bf..7aa287055818 100644 --- a/datafusion/proto/proto/datafusion.proto +++ b/datafusion/proto/proto/datafusion.proto @@ -604,7 +604,7 @@ enum ScalarFunction { // 60 was Translate // Trim = 61; // Upper = 62; - Coalesce = 63; + // 63 was Coalesce // 64 was Power // 65 was StructFun // 66 was FromUnixtime diff --git a/datafusion/proto/src/bytes/mod.rs b/datafusion/proto/src/bytes/mod.rs index 610c533d574c..901aa2455e16 100644 --- a/datafusion/proto/src/bytes/mod.rs +++ b/datafusion/proto/src/bytes/mod.rs @@ -99,7 +99,7 @@ impl Serializeable for Expr { let bytes: Bytes = buffer.into(); // the produced byte stream may lead to "recursion limit" errors, see - // https://github.com/apache/arrow-datafusion/issues/3968 + // https://github.com/apache/datafusion/issues/3968 // Until the underlying prost issue ( https://github.com/tokio-rs/prost/issues/736 ) is fixed, we try to // deserialize the data here and check for errors. // diff --git a/datafusion/proto/src/generated/pbjson.rs b/datafusion/proto/src/generated/pbjson.rs index 3a2be9907354..29724fa9cf66 100644 --- a/datafusion/proto/src/generated/pbjson.rs +++ b/datafusion/proto/src/generated/pbjson.rs @@ -22792,7 +22792,6 @@ impl serde::Serialize for ScalarFunction { { let variant = match self { Self::Unknown => "unknown", - Self::Coalesce => "Coalesce", }; serializer.serialize_str(variant) } @@ -22805,7 +22804,6 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction { { const FIELDS: &[&str] = &[ "unknown", - "Coalesce", ]; struct GeneratedVisitor; @@ -22847,7 +22845,6 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction { { match value { "unknown" => Ok(ScalarFunction::Unknown), - "Coalesce" => Ok(ScalarFunction::Coalesce), _ => Err(serde::de::Error::unknown_variant(value, FIELDS)), } } diff --git a/datafusion/proto/src/generated/prost.rs b/datafusion/proto/src/generated/prost.rs index 487cfe01fba5..400a7bf75765 100644 --- a/datafusion/proto/src/generated/prost.rs +++ b/datafusion/proto/src/generated/prost.rs @@ -2841,7 +2841,7 @@ impl JoinConstraint { pub enum ScalarFunction { /// 0 was Abs before /// The first enum value must be zero for open enums - Unknown = 0, + /// /// 1 was Acos /// 2 was Asin /// 3 was Atan @@ -2904,7 +2904,7 @@ pub enum ScalarFunction { /// 60 was Translate /// Trim = 61; /// Upper = 62; - /// + /// 63 was Coalesce /// 64 was Power /// 65 was StructFun /// 66 was FromUnixtime @@ -2978,7 +2978,7 @@ pub enum ScalarFunction { /// 136 was ToChar /// 137 was ToDate /// 138 was ToUnixtime - Coalesce = 63, + Unknown = 0, } impl ScalarFunction { /// String value of the enum field names used in the ProtoBuf definition. @@ -2988,14 +2988,12 @@ impl ScalarFunction { pub fn as_str_name(&self) -> &'static str { match self { ScalarFunction::Unknown => "unknown", - ScalarFunction::Coalesce => "Coalesce", } } /// Creates an enum from field names used in the ProtoBuf definition. pub fn from_str_name(value: &str) -> ::core::option::Option { match value { "unknown" => Some(Self::Unknown), - "Coalesce" => Some(Self::Coalesce), _ => None, } } diff --git a/datafusion/proto/src/logical_plan/from_proto.rs b/datafusion/proto/src/logical_plan/from_proto.rs index 4ccff9e7aa62..c0898db6f671 100644 --- a/datafusion/proto/src/logical_plan/from_proto.rs +++ b/datafusion/proto/src/logical_plan/from_proto.rs @@ -37,11 +37,10 @@ use datafusion_expr::expr::Unnest; use datafusion_expr::expr::{Alias, Placeholder}; use datafusion_expr::window_frame::{check_window_frame, regularize_window_order_by}; use datafusion_expr::{ - coalesce, expr::{self, InList, Sort, WindowFunction}, logical_plan::{PlanType, StringifiedPlan}, - AggregateFunction, Between, BinaryExpr, BuiltInWindowFunction, BuiltinScalarFunction, - Case, Cast, Expr, GetFieldAccess, GetIndexedField, GroupingSet, + AggregateFunction, Between, BinaryExpr, BuiltInWindowFunction, Case, Cast, Expr, + GetFieldAccess, GetIndexedField, GroupingSet, GroupingSet::GroupingSets, JoinConstraint, JoinType, Like, Operator, TryCast, WindowFrame, WindowFrameBound, WindowFrameUnits, @@ -412,16 +411,6 @@ impl From<&protobuf::StringifiedPlan> for StringifiedPlan { } } -impl From<&protobuf::ScalarFunction> for BuiltinScalarFunction { - fn from(f: &protobuf::ScalarFunction) -> Self { - use protobuf::ScalarFunction; - match f { - ScalarFunction::Unknown => todo!(), - ScalarFunction::Coalesce => Self::Coalesce, - } - } -} - impl From for AggregateFunction { fn from(agg_fun: protobuf::AggregateFunction) -> Self { match agg_fun { @@ -1278,13 +1267,9 @@ pub fn parse_expr( ExprType::ScalarFunction(expr) => { let scalar_function = protobuf::ScalarFunction::try_from(expr.fun) .map_err(|_| Error::unknown("ScalarFunction", expr.fun))?; - let args = &expr.args; match scalar_function { ScalarFunction::Unknown => Err(proto_error("Unknown scalar function")), - ScalarFunction::Coalesce => { - Ok(coalesce(parse_exprs(args, registry, codec)?)) - } } } ExprType::ScalarUdfExpr(protobuf::ScalarUdfExprNode { diff --git a/datafusion/proto/src/logical_plan/to_proto.rs b/datafusion/proto/src/logical_plan/to_proto.rs index 7ad39df2c7ed..45aebc88dc63 100644 --- a/datafusion/proto/src/logical_plan/to_proto.rs +++ b/datafusion/proto/src/logical_plan/to_proto.rs @@ -21,20 +21,6 @@ use std::sync::Arc; -use crate::protobuf::{ - self, - arrow_type::ArrowTypeEnum, - plan_type::PlanTypeEnum::{ - AnalyzedLogicalPlan, FinalAnalyzedLogicalPlan, FinalLogicalPlan, - FinalPhysicalPlan, FinalPhysicalPlanWithStats, InitialLogicalPlan, - InitialPhysicalPlan, InitialPhysicalPlanWithStats, OptimizedLogicalPlan, - OptimizedPhysicalPlan, - }, - AnalyzedLogicalPlanType, CubeNode, EmptyMessage, GroupingSetNode, LogicalExprList, - OptimizedLogicalPlanType, OptimizedPhysicalPlanType, PlaceholderNode, RollupNode, - UnionField, UnionValue, -}; - use arrow::{ array::ArrayRef, datatypes::{ @@ -44,6 +30,7 @@ use arrow::{ ipc::writer::{DictionaryTracker, IpcDataGenerator}, record_batch::RecordBatch, }; + use datafusion_common::{ Column, Constraint, Constraints, DFSchema, DFSchemaRef, ScalarValue, TableReference, }; @@ -54,8 +41,22 @@ use datafusion_expr::expr::{ }; use datafusion_expr::{ logical_plan::PlanType, logical_plan::StringifiedPlan, AggregateFunction, - BuiltInWindowFunction, BuiltinScalarFunction, Expr, JoinConstraint, JoinType, - TryCast, WindowFrame, WindowFrameBound, WindowFrameUnits, WindowFunctionDefinition, + BuiltInWindowFunction, Expr, JoinConstraint, JoinType, TryCast, WindowFrame, + WindowFrameBound, WindowFrameUnits, WindowFunctionDefinition, +}; + +use crate::protobuf::{ + self, + arrow_type::ArrowTypeEnum, + plan_type::PlanTypeEnum::{ + AnalyzedLogicalPlan, FinalAnalyzedLogicalPlan, FinalLogicalPlan, + FinalPhysicalPlan, FinalPhysicalPlanWithStats, InitialLogicalPlan, + InitialPhysicalPlan, InitialPhysicalPlanWithStats, OptimizedLogicalPlan, + OptimizedPhysicalPlan, + }, + AnalyzedLogicalPlanType, CubeNode, EmptyMessage, GroupingSetNode, LogicalExprList, + OptimizedLogicalPlanType, OptimizedPhysicalPlanType, PlaceholderNode, RollupNode, + UnionField, UnionValue, }; use super::LogicalExtensionCodec; @@ -70,8 +71,6 @@ pub enum Error { InvalidTimeUnit(TimeUnit), - UnsupportedScalarFunction(BuiltinScalarFunction), - NotImplemented(String), } @@ -93,9 +92,6 @@ impl std::fmt::Display for Error { "Only TimeUnit::Microsecond and TimeUnit::Nanosecond are valid time units, found: {time_unit:?}" ) } - Self::UnsupportedScalarFunction(function) => { - write!(f, "Unsupported scalar function {function:?}") - } Self::NotImplemented(s) => { write!(f, "Not implemented: {s}") } @@ -774,17 +770,6 @@ pub fn serialize_expr( Expr::ScalarFunction(ScalarFunction { func_def, args }) => { let args = serialize_exprs(args, codec)?; match func_def { - ScalarFunctionDefinition::BuiltIn(fun) => { - let fun: protobuf::ScalarFunction = fun.try_into()?; - protobuf::LogicalExprNode { - expr_type: Some(ExprType::ScalarFunction( - protobuf::ScalarFunctionNode { - fun: fun.into(), - args, - }, - )), - } - } ScalarFunctionDefinition::UDF(fun) => { let mut buf = Vec::new(); let _ = codec.try_encode_udf(fun.as_ref(), &mut buf); @@ -995,7 +980,7 @@ pub fn serialize_expr( | Expr::Exists { .. } | Expr::OuterReferenceColumn { .. } => { // we would need to add logical plan operators to datafusion.proto to support this - // see discussion in https://github.com/apache/arrow-datafusion/issues/2565 + // see discussion in https://github.com/apache/datafusion/issues/2565 return Err(Error::General("Proto serialization error: Expr::ScalarSubquery(_) | Expr::InSubquery(_) | Expr::Exists { .. } | Exp:OuterReferenceColumn not supported".to_string())); } Expr::GetIndexedField(GetIndexedField { expr, field }) => { @@ -1402,18 +1387,6 @@ impl TryFrom<&ScalarValue> for protobuf::ScalarValue { } } -impl TryFrom<&BuiltinScalarFunction> for protobuf::ScalarFunction { - type Error = Error; - - fn try_from(scalar: &BuiltinScalarFunction) -> Result { - let scalar_function = match scalar { - BuiltinScalarFunction::Coalesce => Self::Coalesce, - }; - - Ok(scalar_function) - } -} - impl From<&TimeUnit> for protobuf::TimeUnit { fn from(val: &TimeUnit) -> Self { match val { diff --git a/datafusion/proto/src/physical_plan/from_proto.rs b/datafusion/proto/src/physical_plan/from_proto.rs index 3bbd526f1a04..cab41373d932 100644 --- a/datafusion/proto/src/physical_plan/from_proto.rs +++ b/datafusion/proto/src/physical_plan/from_proto.rs @@ -34,7 +34,6 @@ use datafusion::datasource::file_format::parquet::ParquetSink; use datafusion::datasource::listing::{FileRange, ListingTableUrl, PartitionedFile}; use datafusion::datasource::object_store::ObjectStoreUrl; use datafusion::datasource::physical_plan::{FileScanConfig, FileSinkConfig}; -use datafusion::execution::context::ExecutionProps; use datafusion::execution::FunctionRegistry; use datafusion::logical_expr::WindowFunctionDefinition; use datafusion::physical_expr::{PhysicalSortExpr, ScalarFunctionExpr}; @@ -44,7 +43,7 @@ use datafusion::physical_plan::expressions::{ }; use datafusion::physical_plan::windows::create_window_expr; use datafusion::physical_plan::{ - functions, ColumnStatistics, Partitioning, PhysicalExpr, Statistics, WindowExpr, + ColumnStatistics, Partitioning, PhysicalExpr, Statistics, WindowExpr, }; use datafusion_common::config::{ ColumnOptions, CsvOptions, FormatOptions, JsonOptions, ParquetOptions, @@ -340,24 +339,10 @@ pub fn parse_physical_expr( convert_required!(e.arrow_type)?, )), ExprType::ScalarFunction(e) => { - let scalar_function = - protobuf::ScalarFunction::try_from(e.fun).map_err(|_| { - proto_error( - format!("Received an unknown scalar function: {}", e.fun,), - ) - })?; - - let args = parse_physical_exprs(&e.args, registry, input_schema, codec)?; - - // TODO Do not create new the ExecutionProps - let execution_props = ExecutionProps::new(); - - functions::create_builtin_physical_expr( - &(&scalar_function).into(), - &args, - input_schema, - &execution_props, - )? + return Err(proto_error(format!( + "Received an unknown scalar function: {}", + e.fun, + ))); } ExprType::ScalarUdf(e) => { let udf = match &e.fun_definition { diff --git a/datafusion/proto/src/physical_plan/to_proto.rs b/datafusion/proto/src/physical_plan/to_proto.rs index b4c23e4d0c3c..7b6f745fed6a 100644 --- a/datafusion/proto/src/physical_plan/to_proto.rs +++ b/datafusion/proto/src/physical_plan/to_proto.rs @@ -18,13 +18,11 @@ use std::{ convert::{TryFrom, TryInto}, - str::FromStr, sync::Arc, }; #[cfg(feature = "parquet")] use datafusion::datasource::file_format::parquet::ParquetSink; -use datafusion::logical_expr::BuiltinScalarFunction; use datafusion::physical_expr::window::{NthValueKind, SlidingAggregateWindowExpr}; use datafusion::physical_expr::{PhysicalSortExpr, ScalarFunctionExpr}; use datafusion::physical_plan::expressions::{ @@ -545,44 +543,30 @@ pub fn serialize_physical_expr( }) } else if let Some(expr) = expr.downcast_ref::() { let args = serialize_physical_exprs(expr.args().to_vec(), codec)?; - if let Ok(fun) = BuiltinScalarFunction::from_str(expr.name()) { - let fun: protobuf::ScalarFunction = (&fun).try_into()?; - - Ok(protobuf::PhysicalExprNode { - expr_type: Some(protobuf::physical_expr_node::ExprType::ScalarFunction( - protobuf::PhysicalScalarFunctionNode { - name: expr.name().to_string(), - fun: fun.into(), - args, - return_type: Some(expr.return_type().try_into()?), - }, - )), - }) - } else { - let mut buf = Vec::new(); - match expr.fun() { - ScalarFunctionDefinition::UDF(udf) => { - codec.try_encode_udf(udf, &mut buf)?; - } - _ => { - return not_impl_err!( - "Proto serialization error: Trying to serialize a unresolved function" - ); - } - } - let fun_definition = if buf.is_empty() { None } else { Some(buf) }; - Ok(protobuf::PhysicalExprNode { - expr_type: Some(protobuf::physical_expr_node::ExprType::ScalarUdf( - protobuf::PhysicalScalarUdfNode { - name: expr.name().to_string(), - args, - fun_definition, - return_type: Some(expr.return_type().try_into()?), - }, - )), - }) + let mut buf = Vec::new(); + match expr.fun() { + ScalarFunctionDefinition::UDF(udf) => { + codec.try_encode_udf(udf, &mut buf)?; + } + _ => { + return not_impl_err!( + "Proto serialization error: Trying to serialize a unresolved function" + ); + } } + + let fun_definition = if buf.is_empty() { None } else { Some(buf) }; + Ok(protobuf::PhysicalExprNode { + expr_type: Some(protobuf::physical_expr_node::ExprType::ScalarUdf( + protobuf::PhysicalScalarUdfNode { + name: expr.name().to_string(), + args, + fun_definition, + return_type: Some(expr.return_type().try_into()?), + }, + )), + }) } else if let Some(expr) = expr.downcast_ref::() { Ok(protobuf::PhysicalExprNode { expr_type: Some(protobuf::physical_expr_node::ExprType::LikeExpr(Box::new( diff --git a/datafusion/proto/tests/cases/serialize.rs b/datafusion/proto/tests/cases/serialize.rs index 972382b841d5..cc683e778ebc 100644 --- a/datafusion/proto/tests/cases/serialize.rs +++ b/datafusion/proto/tests/cases/serialize.rs @@ -24,6 +24,7 @@ use datafusion::execution::FunctionRegistry; use datafusion::prelude::SessionContext; use datafusion_expr::{col, create_udf, lit, ColumnarValue}; use datafusion_expr::{Expr, Volatility}; +use datafusion_functions::string; use datafusion_proto::bytes::Serializeable; use datafusion_proto::logical_plan::to_proto::serialize_expr; use datafusion_proto::logical_plan::DefaultLogicalExtensionCodec; @@ -252,17 +253,15 @@ fn context_with_udf() -> SessionContext { fn test_expression_serialization_roundtrip() { use datafusion_common::ScalarValue; use datafusion_expr::expr::ScalarFunction; - use datafusion_expr::BuiltinScalarFunction; use datafusion_proto::logical_plan::from_proto::parse_expr; - use strum::IntoEnumIterator; let ctx = SessionContext::new(); let lit = Expr::Literal(ScalarValue::Utf8(None)); - for builtin_fun in BuiltinScalarFunction::iter() { + for function in string::functions() { // default to 4 args (though some exprs like substr have error checking) let num_args = 4; let args: Vec<_> = std::iter::repeat(&lit).take(num_args).cloned().collect(); - let expr = Expr::ScalarFunction(ScalarFunction::new(builtin_fun, args)); + let expr = Expr::ScalarFunction(ScalarFunction::new_udf(function, args)); let extension_codec = DefaultLogicalExtensionCodec {}; let proto = serialize_expr(&expr, &extension_codec).unwrap(); diff --git a/datafusion/sql/src/expr/function.rs b/datafusion/sql/src/expr/function.rs index c225afec58d6..68cba15634d5 100644 --- a/datafusion/sql/src/expr/function.rs +++ b/datafusion/sql/src/expr/function.rs @@ -27,7 +27,7 @@ use datafusion_expr::{ }; use datafusion_expr::{ expr::{ScalarFunction, Unnest}, - BuiltInWindowFunction, BuiltinScalarFunction, + BuiltInWindowFunction, }; use sqlparser::ast::{ Expr as SQLExpr, Function as SQLFunction, FunctionArg, FunctionArgExpr, WindowType, @@ -55,7 +55,6 @@ pub fn suggest_valid_function( // All scalar functions and aggregate functions let mut funcs = Vec::new(); - funcs.extend(BuiltinScalarFunction::iter().map(|func| func.to_string())); funcs.extend(ctx.udfs_names()); funcs.extend(AggregateFunction::iter().map(|func| func.to_string())); funcs.extend(ctx.udafs_names()); @@ -111,7 +110,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { crate::utils::normalize_ident(name.0[0].clone()) }; - // user-defined function (UDF) should have precedence in case it has the same name as a scalar built-in function + // user-defined function (UDF) should have precedence if let Some(fm) = self.context_provider.get_function_meta(&name) { let args = self.function_args_to_expr(args, schema, planner_context)?; return Ok(Expr::ScalarFunction(ScalarFunction::new_udf(fm, args))); @@ -129,12 +128,6 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { return Ok(Expr::Unnest(Unnest::new(expr))); } - // next, scalar built-in - if let Ok(fun) = BuiltinScalarFunction::from_str(&name) { - let args = self.function_args_to_expr(args, schema, planner_context)?; - return Ok(Expr::ScalarFunction(ScalarFunction::new(fun, args))); - }; - if !order_by.is_empty() && is_function_window { return plan_err!( "Aggregate ORDER BY is not implemented for window functions" diff --git a/datafusion/sql/src/expr/mod.rs b/datafusion/sql/src/expr/mod.rs index c1042ab0e944..d65963cfe381 100644 --- a/datafusion/sql/src/expr/mod.rs +++ b/datafusion/sql/src/expr/mod.rs @@ -60,7 +60,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { // Virtual stack machine to convert SQLExpr to Expr // This allows visiting the expr tree in a depth-first manner which // produces expressions in postfix notations, i.e. `a + b` => `a b +`. - // See https://github.com/apache/arrow-datafusion/issues/1444 + // See https://github.com/apache/datafusion/issues/1444 let mut stack = vec![StackEntry::SQLExpr(Box::new(sql))]; let mut eval_stack = vec![]; diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs index ebd48b78a9df..0066f75f0d30 100644 --- a/datafusion/sql/src/planner.rs +++ b/datafusion/sql/src/planner.rs @@ -440,7 +440,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { SQLDataType::Interval => Ok(DataType::Interval(IntervalUnit::MonthDayNano)), // Explicitly list all other types so that if sqlparser // adds/changes the `SQLDataType` the compiler will tell us on upgrade - // and avoid bugs like https://github.com/apache/arrow-datafusion/issues/3059 + // and avoid bugs like https://github.com/apache/datafusion/issues/3059 SQLDataType::Nvarchar(_) | SQLDataType::JSON | SQLDataType::Uuid diff --git a/datafusion/sql/src/query.rs b/datafusion/sql/src/query.rs index 058496e88367..d5d3bcc4a13b 100644 --- a/datafusion/sql/src/query.rs +++ b/datafusion/sql/src/query.rs @@ -154,7 +154,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { /// * `Result` - An `Ok` variant containing the constant result if evaluation is successful, /// or an `Err` variant containing an error message if evaluation fails. /// -/// tracks a more general solution +/// tracks a more general solution fn get_constant_result(expr: &Expr, arg_name: &str) -> Result { match expr { Expr::Literal(ScalarValue::Int64(Some(s))) => Ok(*s), diff --git a/datafusion/sql/src/select.rs b/datafusion/sql/src/select.rs index 30eacdb44c4a..78c01e8e285c 100644 --- a/datafusion/sql/src/select.rs +++ b/datafusion/sql/src/select.rs @@ -90,7 +90,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { let projected_plan = self.project(base_plan.clone(), select_exprs.clone())?; // Place the fields of the base plan at the front so that when there are references // with the same name, the fields of the base plan will be searched first. - // See https://github.com/apache/arrow-datafusion/issues/9162 + // See https://github.com/apache/datafusion/issues/9162 let mut combined_schema = base_plan.schema().as_ref().clone(); combined_schema.merge(projected_plan.schema()); diff --git a/datafusion/sql/src/statement.rs b/datafusion/sql/src/statement.rs index 53fbfb0552bb..759a5e8ce9d3 100644 --- a/datafusion/sql/src/statement.rs +++ b/datafusion/sql/src/statement.rs @@ -799,7 +799,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { ) -> Result { if self.has_table("information_schema", "tables") { // we only support the basic "SHOW TABLES" - // https://github.com/apache/arrow-datafusion/issues/3188 + // https://github.com/apache/datafusion/issues/3188 if db_name.is_some() || filter.is_some() || full || extended { plan_err!("Unsupported parameters to SHOW TABLES") } else { diff --git a/datafusion/sql/src/unparser/ast.rs b/datafusion/sql/src/unparser/ast.rs index 0c9dcde989ca..0a76aee2e066 100644 --- a/datafusion/sql/src/unparser/ast.rs +++ b/datafusion/sql/src/unparser/ast.rs @@ -19,7 +19,7 @@ //! not exported as they will eventually be move to the SQLparser package. //! //! -//! See +//! See use core::fmt; diff --git a/datafusion/sql/src/utils.rs b/datafusion/sql/src/utils.rs index d2f1982d5418..206c933a4f35 100644 --- a/datafusion/sql/src/utils.rs +++ b/datafusion/sql/src/utils.rs @@ -156,7 +156,7 @@ pub(crate) fn resolve_positions_to_exprs( ) -> Option { match expr { // sql_expr_to_logical_expr maps number to i64 - // https://github.com/apache/arrow-datafusion/blob/8d175c759e17190980f270b5894348dc4cff9bbf/datafusion/src/sql/planner.rs#L882-L887 + // https://github.com/apache/datafusion/blob/8d175c759e17190980f270b5894348dc4cff9bbf/datafusion/src/sql/planner.rs#L882-L887 Expr::Literal(ScalarValue::Int64(Some(position))) if position > &0_i64 && position <= &(select_exprs.len() as i64) => { diff --git a/datafusion/sql/tests/sql_integration.rs b/datafusion/sql/tests/sql_integration.rs index da1baf65de34..319aa5b5fd30 100644 --- a/datafusion/sql/tests/sql_integration.rs +++ b/datafusion/sql/tests/sql_integration.rs @@ -2615,7 +2615,7 @@ fn select_multibyte_column() { #[test] fn select_groupby_orderby() { // ensure that references are correctly resolved in the order by clause - // see https://github.com/apache/arrow-datafusion/issues/4854 + // see https://github.com/apache/datafusion/issues/4854 let sql = r#"SELECT avg(age) AS "value", date_trunc('month', birth_date) AS "birth_date" @@ -3360,7 +3360,7 @@ fn hive_aggregate_with_filter() -> Result<()> { #[test] fn order_by_unaliased_name() { - // https://github.com/apache/arrow-datafusion/issues/3160 + // https://github.com/apache/datafusion/issues/3160 // This query was failing with: // SchemaError(FieldNotFound { qualifier: Some("p"), name: "state", valid_fields: ["z", "q"] }) let sql = @@ -3592,7 +3592,7 @@ fn test_noneq_with_filter_join() { #[test] fn test_one_side_constant_full_join() { // TODO: this sql should be parsed as join after - // https://github.com/apache/arrow-datafusion/issues/2877 is resolved. + // https://github.com/apache/datafusion/issues/2877 is resolved. let sql = "SELECT id, order_id \ FROM person \ FULL OUTER JOIN orders \ diff --git a/datafusion/sqllogictest/bin/sqllogictests.rs b/datafusion/sqllogictest/bin/sqllogictests.rs index 268d09681c72..560328ee8619 100644 --- a/datafusion/sqllogictest/bin/sqllogictests.rs +++ b/datafusion/sqllogictest/bin/sqllogictests.rs @@ -297,7 +297,7 @@ fn read_dir_recursive_impl(dst: &mut Vec, path: &Path) -> Result<()> { /// This structure attempts to mimic the command line options /// accepted by IDEs such as CLion that pass arguments /// -/// See for more details +/// See for more details #[derive(Parser, Debug)] #[clap(author, version, about, long_about= None)] struct Options { diff --git a/datafusion/sqllogictest/src/engines/datafusion_engine/normalize.rs b/datafusion/sqllogictest/src/engines/datafusion_engine/normalize.rs index e89bc9bb7b90..7eef1f020ffa 100644 --- a/datafusion/sqllogictest/src/engines/datafusion_engine/normalize.rs +++ b/datafusion/sqllogictest/src/engines/datafusion_engine/normalize.rs @@ -101,7 +101,7 @@ fn expand_row(mut row: Vec) -> impl Iterator> { // replace any leading spaces with '-' as // `sqllogictest` ignores whitespace differences // - // See https://github.com/apache/arrow-datafusion/issues/6328 + // See https://github.com/apache/datafusion/issues/6328 let content = l.trim_start(); let new_prefix = "-".repeat(l.len() - content.len()); // maintain for each line a number, so diff --git a/datafusion/sqllogictest/test_files/aggregate.slt b/datafusion/sqllogictest/test_files/aggregate.slt index 030b8ef8ce7d..5e4fb10456e3 100644 --- a/datafusion/sqllogictest/test_files/aggregate.slt +++ b/datafusion/sqllogictest/test_files/aggregate.slt @@ -71,7 +71,7 @@ CREATE TABLE test (c1 BIGINT,c2 BIGINT) as values # Error tests ####### -# https://github.com/apache/arrow-datafusion/issues/3353 +# https://github.com/apache/datafusion/issues/3353 statement error DataFusion error: Schema error: Schema contains duplicate unqualified field name "APPROX_DISTINCT\(aggregate_test_100\.c9\)" SELECT approx_distinct(c9) count_c9, approx_distinct(cast(c9 as varchar)) count_c9_str FROM aggregate_test_100 @@ -551,7 +551,7 @@ SELECT approx_median(col_f64_nan) FROM median_table NaN # median_multi -# test case for https://github.com/apache/arrow-datafusion/issues/3105 +# test case for https://github.com/apache/datafusion/issues/3105 # has an intermediate grouping statement ok create table cpu (host string, usage float) as select * from (values @@ -674,7 +674,7 @@ SELECT COUNT(2) FROM aggregate_test_100 100 # csv_query_approx_count -# FIX: https://github.com/apache/arrow-datafusion/issues/3353 +# FIX: https://github.com/apache/datafusion/issues/3353 # query II # SELECT approx_distinct(c9) AS count_c9, approx_distinct(cast(c9 as varchar)) count_c9_str FROM aggregate_test_100 # ---- @@ -3383,7 +3383,7 @@ query I SELECT 0 AS "t.a" FROM t HAVING MAX(t.a) = 0; ---- -# Test issue: https://github.com/apache/arrow-datafusion/issues/9161 +# Test issue: https://github.com/apache/datafusion/issues/9161 query I rowsort SELECT CAST(a AS INT) FROM t GROUP BY t.a; ---- diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt index 3456963aacfc..c3c5603dafc6 100644 --- a/datafusion/sqllogictest/test_files/array.slt +++ b/datafusion/sqllogictest/test_files/array.slt @@ -1378,7 +1378,7 @@ NULL 43 ## array_pop_back (aliases: `list_pop_back`) # array_pop_back scalar function with null -#TODO: https://github.com/apache/arrow-datafusion/issues/7142 +#TODO: https://github.com/apache/datafusion/issues/7142 # follow clickhouse and duckdb #query ? #select array_pop_back(null); @@ -1556,7 +1556,7 @@ select array_pop_back(arrow_cast(column1, 'LargeList(Int64)')) from large_arrays ## array_pop_front (aliases: `list_pop_front`) -#TODO:https://github.com/apache/arrow-datafusion/issues/7142 +#TODO:https://github.com/apache/datafusion/issues/7142 # array_pop_front scalar function with null # follow clickhouse and duckdb #query ? @@ -3020,7 +3020,7 @@ select array_positions([1, 2, 3, 4, 5], null); ---- [] -#TODO: https://github.com/apache/arrow-datafusion/issues/7142 +#TODO: https://github.com/apache/datafusion/issues/7142 # array_positions with NULL (follow PostgreSQL) #query ? #select array_positions(null, 1); @@ -4079,7 +4079,7 @@ select cardinality(arrow_cast(make_array(), 'LargeList(Null)')), cardinality(arr NULL 0 #TODO -#https://github.com/apache/arrow-datafusion/issues/9158 +#https://github.com/apache/datafusion/issues/9158 #query II #select cardinality(arrow_cast(make_array(), 'FixedSizeList(1, Null)')), cardinality(arrow_cast(make_array(make_array()), 'FixedSizeList(1, List(Null))')) #---- @@ -4165,7 +4165,7 @@ select ---- [1, , 3] [, 2.2, 3.3] [, bc] -#TODO: https://github.com/apache/arrow-datafusion/issues/7142 +#TODO: https://github.com/apache/datafusion/issues/7142 # follow PostgreSQL behavior #query ? #select @@ -4385,7 +4385,7 @@ select array_remove_n(make_array([1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], ## array_remove_all (aliases: `list_removes`) -#TODO: https://github.com/apache/arrow-datafusion/issues/7142 +#TODO: https://github.com/apache/datafusion/issues/7142 # array_remove_all with NULL elements #query ? #select array_remove_all(NULL, 1); @@ -5257,7 +5257,7 @@ true false true false false false true true false false true false true ## array_distinct -#TODO: https://github.com/apache/arrow-datafusion/issues/7142 +#TODO: https://github.com/apache/datafusion/issues/7142 #query ? #select array_distinct(null); #---- @@ -6056,7 +6056,7 @@ select array_concat(column1, [7]) from arrays_values_v2; # flatten -#TODO: https://github.com/apache/arrow-datafusion/issues/7142 +#TODO: https://github.com/apache/datafusion/issues/7142 # follow DuckDB #query ? #select flatten(NULL); @@ -6144,7 +6144,7 @@ select empty(arrow_cast(make_array(), 'LargeList(Null)')); ---- true -#TODO: https://github.com/apache/arrow-datafusion/issues/9158 +#TODO: https://github.com/apache/datafusion/issues/9158 #query B #select empty(arrow_cast(make_array(), 'FixedSizeList(0, Null)')); #---- @@ -6166,7 +6166,7 @@ select empty(arrow_cast(make_array(NULL), 'FixedSizeList(1, Null)')); ---- false -#TODO: https://github.com/apache/arrow-datafusion/issues/7142 +#TODO: https://github.com/apache/datafusion/issues/7142 # empty scalar function #4 #query B #select empty(NULL); @@ -6473,7 +6473,7 @@ select arrow_typeof(c) from test_create_array_table; List(Field { name: "item", data_type: List(Field { name: "item", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) # Test casting to array types -# issue: https://github.com/apache/arrow-datafusion/issues/9440 +# issue: https://github.com/apache/datafusion/issues/9440 query ??T select [1,2,3]::int[], [['1']]::int[][], arrow_typeof([]::text[]); ---- diff --git a/datafusion/sqllogictest/test_files/copy.slt b/datafusion/sqllogictest/test_files/copy.slt index b591d06f00a5..d695e8514b07 100644 --- a/datafusion/sqllogictest/test_files/copy.slt +++ b/datafusion/sqllogictest/test_files/copy.slt @@ -111,7 +111,7 @@ a statement ok create table test ("'test'" varchar, "'test2'" varchar, "'test3'" varchar); -# https://github.com/apache/arrow-datafusion/issues/9714 +# https://github.com/apache/datafusion/issues/9714 ## Until the partition by parsing uses ColumnDef, this test is meaningless since it becomes an overfit. Even in ## CREATE EXTERNAL TABLE, there is a schema mismatch, this should be an issue. # @@ -138,7 +138,7 @@ create table test ("'test'" varchar, "'test2'" varchar, "'test3'" varchar); #LOCATION 'test_files/scratch/copy/escape_quote/' PARTITIONED BY ("'test2'", "'test3'"); # # This triggers a panic (index out of bounds) -# https://github.com/apache/arrow-datafusion/issues/9269 +# https://github.com/apache/datafusion/issues/9269 #query #select * from validate_partitioned_escape_quote; diff --git a/datafusion/sqllogictest/test_files/cte.slt b/datafusion/sqllogictest/test_files/cte.slt index 8b972bc79b03..7d0f929962bd 100644 --- a/datafusion/sqllogictest/test_files/cte.slt +++ b/datafusion/sqllogictest/test_files/cte.slt @@ -390,7 +390,7 @@ WITH RECURSIVE "recursive_cte" AS ( SELECT 2 as "val" FROM - "recursive_cte" + "recursive_cte" FULL JOIN "sub_cte" ON 1 = 1 WHERE "recursive_cte"."val" < 2 @@ -679,7 +679,7 @@ WITH RECURSIVE my_cte AS ( SELECT a FROM my_cte; -# Test issue: https://github.com/apache/arrow-datafusion/issues/9680 +# Test issue: https://github.com/apache/datafusion/issues/9680 query I WITH RECURSIVE recursive_cte AS ( SELECT 1 as val @@ -700,7 +700,7 @@ SELECT * FROM recursive_cte; 1 2 -# Test issue: https://github.com/apache/arrow-datafusion/issues/9680 +# Test issue: https://github.com/apache/datafusion/issues/9680 # 'recursive_cte' should be on the left of the cross join, as this is the test purpose of the above query. query TT explain WITH RECURSIVE recursive_cte AS ( @@ -746,7 +746,7 @@ physical_plan 11)------ProjectionExec: expr=[2 as val] 12)--------PlaceholderRowExec -# Test issue: https://github.com/apache/arrow-datafusion/issues/9794 +# Test issue: https://github.com/apache/datafusion/issues/9794 # Non-recursive term and recursive term have different types query IT rowsort WITH RECURSIVE my_cte AS( @@ -758,7 +758,7 @@ WITH RECURSIVE my_cte AS( 1 Int32 3 Int32 -# Test issue: https://github.com/apache/arrow-datafusion/issues/9794 +# Test issue: https://github.com/apache/datafusion/issues/9794 # Non-recursive term and recursive term have different number of columns query error DataFusion error: Error during planning: Non\-recursive term and recursive term must have the same number of columns \(1 != 3\) WITH RECURSIVE my_cte AS ( @@ -767,7 +767,7 @@ WITH RECURSIVE my_cte AS ( SELECT a+2, 'a','c' FROM my_cte WHERE a<3 ) SELECT * FROM my_cte; -# Test issue: https://github.com/apache/arrow-datafusion/issues/9794 +# Test issue: https://github.com/apache/datafusion/issues/9794 # Non-recursive term and recursive term have different types, and cannot be casted query error DataFusion error: Arrow error: Cast error: Cannot cast string 'abc' to value of Int64 type WITH RECURSIVE my_cte AS ( @@ -777,7 +777,7 @@ WITH RECURSIVE my_cte AS ( ) SELECT * FROM my_cte; # Define a non-recursive CTE in the recursive WITH clause. -# Test issue: https://github.com/apache/arrow-datafusion/issues/9804 +# Test issue: https://github.com/apache/datafusion/issues/9804 query I WITH RECURSIVE cte AS ( SELECT a FROM (VALUES(1)) AS t(a) WHERE a > 2 diff --git a/datafusion/sqllogictest/test_files/dates.slt b/datafusion/sqllogictest/test_files/dates.slt index eae3e3e5960b..32c0bd14e7cc 100644 --- a/datafusion/sqllogictest/test_files/dates.slt +++ b/datafusion/sqllogictest/test_files/dates.slt @@ -19,7 +19,7 @@ ## Date/Time Handling Tests ########## -# Reproducer for https://github.com/apache/arrow-datafusion/issues/3944 +# Reproducer for https://github.com/apache/datafusion/issues/3944 statement ok CREATE TABLE test( i_item_desc VARCHAR, diff --git a/datafusion/sqllogictest/test_files/dictionary.slt b/datafusion/sqllogictest/test_files/dictionary.slt index 891a09fbc177..06b726502664 100644 --- a/datafusion/sqllogictest/test_files/dictionary.slt +++ b/datafusion/sqllogictest/test_files/dictionary.slt @@ -206,7 +206,7 @@ true false NULL true true false true NULL true false NULL true true false true NULL -# Reproducer for https://github.com/apache/arrow-datafusion/issues/8738 +# Reproducer for https://github.com/apache/datafusion/issues/8738 # This query should work correctly query P?TT rowsort SELECT diff --git a/datafusion/sqllogictest/test_files/expr.slt b/datafusion/sqllogictest/test_files/expr.slt index dacf715a9226..ff63416b3a10 100644 --- a/datafusion/sqllogictest/test_files/expr.slt +++ b/datafusion/sqllogictest/test_files/expr.slt @@ -775,7 +775,7 @@ SELECT upper(NULL) ---- NULL -# TODO issue: https://github.com/apache/arrow-datafusion/issues/6596 +# TODO issue: https://github.com/apache/datafusion/issues/6596 # query ?? #SELECT # CAST([1,2,3,4] AS INT[]) as a, diff --git a/datafusion/sqllogictest/test_files/functions.slt b/datafusion/sqllogictest/test_files/functions.slt index 38ebedf5654a..d0d2bac59e91 100644 --- a/datafusion/sqllogictest/test_files/functions.slt +++ b/datafusion/sqllogictest/test_files/functions.slt @@ -1050,7 +1050,7 @@ SELECT ---- arrow.apache.org arrow.apache.org -# Test substring_index issue https://github.com/apache/arrow-datafusion/issues/9472 +# Test substring_index issue https://github.com/apache/datafusion/issues/9472 query TTT SELECT url, diff --git a/datafusion/sqllogictest/test_files/group_by.slt b/datafusion/sqllogictest/test_files/group_by.slt index 5c5bf58dd049..7bef73833745 100644 --- a/datafusion/sqllogictest/test_files/group_by.slt +++ b/datafusion/sqllogictest/test_files/group_by.slt @@ -3429,9 +3429,9 @@ physical_plan 07)------------AggregateExec: mode=Partial, gby=[sn@1 as sn, amount@2 as amount], aggr=[SUM(l.amount)] 08)--------------ProjectionExec: expr=[amount@1 as amount, sn@2 as sn, amount@3 as amount] 09)----------------NestedLoopJoinExec: join_type=Inner, filter=sn@0 >= sn@1 -10)------------------MemoryExec: partitions=8, partition_sizes=[1, 0, 0, 0, 0, 0, 0, 0] -11)------------------CoalescePartitionsExec -12)--------------------MemoryExec: partitions=8, partition_sizes=[1, 0, 0, 0, 0, 0, 0, 0] +10)------------------CoalescePartitionsExec +11)--------------------MemoryExec: partitions=8, partition_sizes=[1, 0, 0, 0, 0, 0, 0, 0] +12)------------------MemoryExec: partitions=8, partition_sizes=[1, 0, 0, 0, 0, 0, 0, 0] query IRR SELECT r.sn, SUM(l.amount), r.amount @@ -4340,7 +4340,7 @@ physical_plan statement ok drop table t1 -# Reproducer for https://github.com/apache/arrow-datafusion/issues/8175 +# Reproducer for https://github.com/apache/datafusion/issues/8175 statement ok create table t1(state string, city string, min_temp float, area int, time timestamp) as values diff --git a/datafusion/sqllogictest/test_files/information_schema.slt b/datafusion/sqllogictest/test_files/information_schema.slt index 456ddef650ba..8f4b1a3816a3 100644 --- a/datafusion/sqllogictest/test_files/information_schema.slt +++ b/datafusion/sqllogictest/test_files/information_schema.slt @@ -320,14 +320,14 @@ SHOW datafusion.execution.batch_size VERBOSE datafusion.execution.batch_size 8192 Default batch size while creating new batches, it's especially useful for buffer-in-memory batches since creating tiny batches would result in too much metadata memory consumption # show_time_zone_default_utc -# https://github.com/apache/arrow-datafusion/issues/3255 +# https://github.com/apache/datafusion/issues/3255 query TT SHOW TIME ZONE ---- datafusion.execution.time_zone +00:00 # show_timezone_default_utc -# https://github.com/apache/arrow-datafusion/issues/3255 +# https://github.com/apache/datafusion/issues/3255 query TT SHOW TIMEZONE ---- @@ -335,14 +335,14 @@ datafusion.execution.time_zone +00:00 # show_time_zone_default_utc_verbose -# https://github.com/apache/arrow-datafusion/issues/3255 +# https://github.com/apache/datafusion/issues/3255 query TTT SHOW TIME ZONE VERBOSE ---- datafusion.execution.time_zone +00:00 The default time zone Some functions, e.g. `EXTRACT(HOUR from SOME_TIME)`, shift the underlying datetime according to this time zone, and then extract the hour # show_timezone_default_utc -# https://github.com/apache/arrow-datafusion/issues/3255 +# https://github.com/apache/datafusion/issues/3255 query TTT SHOW TIMEZONE VERBOSE ---- diff --git a/datafusion/sqllogictest/test_files/insert.slt b/datafusion/sqllogictest/test_files/insert.slt index e20e31308ed0..d19581d0cc0b 100644 --- a/datafusion/sqllogictest/test_files/insert.slt +++ b/datafusion/sqllogictest/test_files/insert.slt @@ -207,7 +207,7 @@ create table table_without_values(c1 varchar not null); # verify that the sort order of the insert query is maintained into the # insert (there should be a SortExec in the following plan) -# See https://github.com/apache/arrow-datafusion/pull/6354#discussion_r1195284178 for more background +# See https://github.com/apache/datafusion/pull/6354#discussion_r1195284178 for more background query TT explain insert into table_without_values select c1 from aggregate_test_100 order by c1; ---- diff --git a/datafusion/sqllogictest/test_files/insert_to_external.slt b/datafusion/sqllogictest/test_files/insert_to_external.slt index 65b57b61de39..2c7af6abe47c 100644 --- a/datafusion/sqllogictest/test_files/insert_to_external.slt +++ b/datafusion/sqllogictest/test_files/insert_to_external.slt @@ -448,7 +448,7 @@ LOCATION 'test_files/scratch/insert_to_external/external_parquet_table_q3/'; # verify that the sort order of the insert query is maintained into the # insert (there should be a SortExec in the following plan) -# See https://github.com/apache/arrow-datafusion/pull/6354#discussion_r1195284178 for more background +# See https://github.com/apache/datafusion/pull/6354#discussion_r1195284178 for more background query TT explain insert into table_without_values select c1 from aggregate_test_100 order by c1; ---- diff --git a/datafusion/sqllogictest/test_files/interval.slt b/datafusion/sqllogictest/test_files/interval.slt index f2ae2984f07b..eab4eed00269 100644 --- a/datafusion/sqllogictest/test_files/interval.slt +++ b/datafusion/sqllogictest/test_files/interval.slt @@ -17,7 +17,7 @@ # Use `interval` SQL literal syntax -# the types should be the same: https://github.com/apache/arrow-datafusion/issues/5801 +# the types should be the same: https://github.com/apache/datafusion/issues/5801 query TT select arrow_typeof(interval '5 months'), diff --git a/datafusion/sqllogictest/test_files/join.slt b/datafusion/sqllogictest/test_files/join.slt index f451d2e763fe..6732d3e9108b 100644 --- a/datafusion/sqllogictest/test_files/join.slt +++ b/datafusion/sqllogictest/test_files/join.slt @@ -19,7 +19,7 @@ ## Join Tests ########## -# Regression test: https://github.com/apache/arrow-datafusion/issues/4844 +# Regression test: https://github.com/apache/datafusion/issues/4844 statement ok CREATE TABLE IF NOT EXISTS students(name TEXT, mark INT) AS VALUES ('Stuart', 28), @@ -49,7 +49,7 @@ drop table IF EXISTS students; statement ok drop table IF EXISTS grades; -# issue: https://github.com/apache/arrow-datafusion/issues/5382 +# issue: https://github.com/apache/datafusion/issues/5382 statement ok CREATE TABLE IF NOT EXISTS test1(a int, b int) as select 1 as a, 2 as b; @@ -702,7 +702,7 @@ drop table IF EXISTS full_join_test; statement ok set datafusion.execution.batch_size = 8192; -# related to: https://github.com/apache/arrow-datafusion/issues/8374 +# related to: https://github.com/apache/datafusion/issues/8374 statement ok CREATE TABLE t1(a text, b int) AS VALUES ('Alice', 50), ('Alice', 100); diff --git a/datafusion/sqllogictest/test_files/joins.slt b/datafusion/sqllogictest/test_files/joins.slt index d999734ba70e..65e6c17b9203 100644 --- a/datafusion/sqllogictest/test_files/joins.slt +++ b/datafusion/sqllogictest/test_files/joins.slt @@ -265,7 +265,7 @@ CREATE TABLE b(a INT, b INT, c INT) AS VALUES (4, 400, 800) # issue_3002 -# // repro case for https://github.com/apache/arrow-datafusion/issues/3002 +# // repro case for https://github.com/apache/datafusion/issues/3002 query II select a.a, b.b from a join b on a.a = b.b @@ -1272,7 +1272,7 @@ ORDER BY t1_id NULL e # Error left anti join -# https://github.com/apache/arrow-datafusion/issues/4366 +# https://github.com/apache/datafusion/issues/4366 statement ok set datafusion.optimizer.repartition_joins = false; @@ -2010,7 +2010,8 @@ set datafusion.explain.logical_plan_only = false; statement ok set datafusion.execution.target_partitions = 4; -# Right as inner table nested loop join +# Planning inner nested loop join +# inputs are swapped due to inexact statistics + join reordering caused additional projection query TT EXPLAIN @@ -2027,17 +2028,18 @@ logical_plan 05)----Filter: join_t2.t2_int > UInt32(1) 06)------TableScan: join_t2 projection=[t2_id, t2_int] physical_plan -01)NestedLoopJoinExec: join_type=Inner, filter=t1_id@0 > t2_id@1 -02)--CoalesceBatchesExec: target_batch_size=2 -03)----FilterExec: t1_id@0 > 10 -04)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 -05)--------MemoryExec: partitions=1, partition_sizes=[1] -06)--CoalescePartitionsExec -07)----ProjectionExec: expr=[t2_id@0 as t2_id] -08)------CoalesceBatchesExec: target_batch_size=2 -09)--------FilterExec: t2_int@1 > 1 -10)----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 -11)------------MemoryExec: partitions=1, partition_sizes=[1] +01)ProjectionExec: expr=[t1_id@1 as t1_id, t2_id@0 as t2_id] +02)--NestedLoopJoinExec: join_type=Inner, filter=t1_id@0 > t2_id@1 +03)----CoalescePartitionsExec +04)------ProjectionExec: expr=[t2_id@0 as t2_id] +05)--------CoalesceBatchesExec: target_batch_size=2 +06)----------FilterExec: t2_int@1 > 1 +07)------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 +08)--------------MemoryExec: partitions=1, partition_sizes=[1] +09)----CoalesceBatchesExec: target_batch_size=2 +10)------FilterExec: t1_id@0 > 10 +11)--------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 +12)----------MemoryExec: partitions=1, partition_sizes=[1] query II SELECT join_t1.t1_id, join_t2.t2_id @@ -3473,9 +3475,9 @@ logical_plan 05)----TableScan: annotated_data projection=[a0, a, b, c, d] physical_plan 01)NestedLoopJoinExec: join_type=Inner, filter=a@1 < a@0 -02)--RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1 -03)----CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], has_header=true -04)--CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], has_header=true +02)--CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], has_header=true +03)--RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1 +04)----CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], has_header=true # Currently datafusion cannot pushdown filter conditions with scalar UDF into # cross join. diff --git a/datafusion/sqllogictest/test_files/monotonic_projection_test.slt b/datafusion/sqllogictest/test_files/monotonic_projection_test.slt index 3f12dfcda88a..943a7e07bb23 100644 --- a/datafusion/sqllogictest/test_files/monotonic_projection_test.slt +++ b/datafusion/sqllogictest/test_files/monotonic_projection_test.slt @@ -69,7 +69,7 @@ physical_plan # doesn't invalidate lexicographical ordering. # Hence '[CAST(a AS BIGINT) AS a_big ASC, b ASC]' # is valid for the given ordering: '[a ASC, b ASC]'. -# See discussion for rationale: https://github.com/apache/arrow-datafusion/issues/8838#issue-2077714891 +# See discussion for rationale: https://github.com/apache/datafusion/issues/8838#issue-2077714891 query TT EXPLAIN SELECT a, CAST(a AS BIGINT) AS a_big, b @@ -118,7 +118,7 @@ physical_plan # test for cast Utf8 # (must actually sort as the sort order for a number cast to utf8 is different than for int) -# See discussion: https://github.com/apache/arrow-datafusion/pull/9127#discussion_r1492336709 +# See discussion: https://github.com/apache/datafusion/pull/9127#discussion_r1492336709 query TT EXPLAIN SELECT diff --git a/datafusion/sqllogictest/test_files/order.slt b/datafusion/sqllogictest/test_files/order.slt index 1ad74398b911..4121de91cb8d 100644 --- a/datafusion/sqllogictest/test_files/order.slt +++ b/datafusion/sqllogictest/test_files/order.slt @@ -333,7 +333,7 @@ drop table foo; ##### -# Tests for https://github.com/apache/arrow-datafusion/issues/4854 +# Tests for https://github.com/apache/datafusion/issues/4854 # Ordering / grouping by the same column ##### statement ok @@ -704,7 +704,7 @@ physical_plan # Minimal reproduction of issue 5970 -# https://github.com/apache/arrow-datafusion/issues/5970 +# https://github.com/apache/datafusion/issues/5970 statement ok set datafusion.execution.target_partitions = 2; diff --git a/datafusion/sqllogictest/test_files/predicates.slt b/datafusion/sqllogictest/test_files/predicates.slt index 22af0cf894d1..abb36c3c0858 100644 --- a/datafusion/sqllogictest/test_files/predicates.slt +++ b/datafusion/sqllogictest/test_files/predicates.slt @@ -468,7 +468,7 @@ DROP TABLE test_float; ######### # Predicates on memory tables / statistics generation -# Reproducer for https://github.com/apache/arrow-datafusion/issues/7125 +# Reproducer for https://github.com/apache/datafusion/issues/7125 ######### statement ok @@ -508,7 +508,7 @@ DROP TABLE t; ######## # Test query with bloom filter -# Refer to https://github.com/apache/arrow-datafusion/pull/7821#pullrequestreview-1688062599 +# Refer to https://github.com/apache/datafusion/pull/7821#pullrequestreview-1688062599 ######## statement ok diff --git a/datafusion/sqllogictest/test_files/repartition.slt b/datafusion/sqllogictest/test_files/repartition.slt index 086270a7098f..3f9e6e61f1d0 100644 --- a/datafusion/sqllogictest/test_files/repartition.slt +++ b/datafusion/sqllogictest/test_files/repartition.slt @@ -75,7 +75,7 @@ DROP TABLE parquet_table; # Unbounded repartition -# See https://github.com/apache/arrow-datafusion/issues/5278 +# See https://github.com/apache/datafusion/issues/5278 # Set up unbounded table and run a query - the query plan should display a `RepartitionExec` # and a `CoalescePartitionsExec` statement ok diff --git a/datafusion/sqllogictest/test_files/repartition_scan.slt b/datafusion/sqllogictest/test_files/repartition_scan.slt index d41d36536ceb..7d2b6d4444ce 100644 --- a/datafusion/sqllogictest/test_files/repartition_scan.slt +++ b/datafusion/sqllogictest/test_files/repartition_scan.slt @@ -126,7 +126,7 @@ SELECT column1 FROM parquet_table_with_order WHERE column1 <> 42 ORDER BY column 200 # explain should not have any groups with more than one file -# https://github.com/apache/arrow-datafusion/issues/8451 +# https://github.com/apache/datafusion/issues/8451 query TT EXPLAIN SELECT column1 FROM parquet_table_with_order WHERE column1 <> 42 ORDER BY column1; ---- @@ -240,7 +240,7 @@ DROP TABLE json_table; ################### ## Use pre-existing files we don't have a way to create arrow files yet -## (https://github.com/apache/arrow-datafusion/issues/8504) +## (https://github.com/apache/datafusion/issues/8504) statement ok CREATE EXTERNAL TABLE arrow_table STORED AS ARROW @@ -248,7 +248,7 @@ LOCATION '../core/tests/data/example.arrow'; # It would be great to see the file read as "4" groups with even sizes (offsets) eventually -# https://github.com/apache/arrow-datafusion/issues/8503 +# https://github.com/apache/datafusion/issues/8503 query TT EXPLAIN SELECT * FROM arrow_table ---- diff --git a/datafusion/sqllogictest/test_files/scalar.slt b/datafusion/sqllogictest/test_files/scalar.slt index 987fb399667c..0c3fca446526 100644 --- a/datafusion/sqllogictest/test_files/scalar.slt +++ b/datafusion/sqllogictest/test_files/scalar.slt @@ -533,7 +533,7 @@ select ln(null); NULL # ln scalar ops with zero edgecases -# please see https://github.com/apache/arrow-datafusion/pull/5245#issuecomment-1426828382 +# please see https://github.com/apache/datafusion/pull/5245#issuecomment-1426828382 query R rowsort select ln(0); ---- @@ -582,7 +582,7 @@ select log(2, 2.0/3) a, log(10, 2.0/3) b; -0.584962500721 -0.176091259056 # log scalar ops with zero edgecases -# please see https://github.com/apache/arrow-datafusion/pull/5245#issuecomment-1426828382 +# please see https://github.com/apache/datafusion/pull/5245#issuecomment-1426828382 query RR rowsort select log(0) a, log(1, 64) b; ---- @@ -627,7 +627,7 @@ select log10(2.0/3); -0.176091259056 # log10 scalar ops with zero edgecases -# please see https://github.com/apache/arrow-datafusion/pull/5245#issuecomment-1426828382 +# please see https://github.com/apache/datafusion/pull/5245#issuecomment-1426828382 query R rowsort select log10(0); ---- @@ -663,7 +663,7 @@ select log2(2.0/3); -0.584962500721 # log2 scalar ops with zero edgecases -# please see https://github.com/apache/arrow-datafusion/pull/5245#issuecomment-1426828382 +# please see https://github.com/apache/datafusion/pull/5245#issuecomment-1426828382 query R rowsort select log2(0); ---- @@ -1276,7 +1276,7 @@ FROM t1 999 999 -# issue: https://github.com/apache/arrow-datafusion/issues/7004 +# issue: https://github.com/apache/datafusion/issues/7004 query B select case c1 when 'foo' then TRUE @@ -1325,7 +1325,7 @@ NULL NULL 4 -# issue: https://github.com/apache/arrow-datafusion/issues/6376 +# issue: https://github.com/apache/datafusion/issues/6376 query I select case when a = 0 then 123 end from (values(1), (0), (null)) as t(a); ---- diff --git a/datafusion/sqllogictest/test_files/select.slt b/datafusion/sqllogictest/test_files/select.slt index fc6897849177..a3a4b3bfc584 100644 --- a/datafusion/sqllogictest/test_files/select.slt +++ b/datafusion/sqllogictest/test_files/select.slt @@ -567,7 +567,7 @@ select * from (values (1)) LIMIT 10*100; 1 # More complex expressions in the limit is not supported yet. -# See issue: https://github.com/apache/arrow-datafusion/issues/9821 +# See issue: https://github.com/apache/datafusion/issues/9821 statement error DataFusion error: Error during planning: Unsupported operator for LIMIT clause select * from (values (1)) LIMIT 100/10; @@ -604,7 +604,7 @@ END; 2 # Binary Expression for LargeUtf8 -# issue: https://github.com/apache/arrow-datafusion/issues/5893 +# issue: https://github.com/apache/datafusion/issues/5893 statement ok CREATE TABLE t as select arrow_cast('Bar', 'LargeUtf8') as column1, 'B%' as column2; @@ -1461,7 +1461,7 @@ SELECT abs(x), abs(x) + abs(y) FROM t; statement ok DROP TABLE t; -# related to https://github.com/apache/arrow-datafusion/issues/8814 +# related to https://github.com/apache/datafusion/issues/8814 statement ok create table t(x int, y int) as values (1,1), (2,2), (3,3), (0,0), (4,0); @@ -1510,7 +1510,7 @@ physical_plan 01)ProjectionExec: expr=[y@1 = 0 OR 1 / CAST(y@1 AS Int64) < 1 as t.y = Int64(0) OR Int64(1) / t.y < Int64(1), x@0 = 0 OR y@1 = 0 OR 1 / CAST(y@1 AS Int64) < 1 / CAST(x@0 AS Int64) as t.x = Int64(0) OR t.y = Int64(0) OR Int64(1) / t.y < Int64(1) / t.x] 02)--MemoryExec: partitions=1, partition_sizes=[1] -# due to the reason describe in https://github.com/apache/arrow-datafusion/issues/8927, +# due to the reason describe in https://github.com/apache/datafusion/issues/8927, # the following queries will fail query error select coalesce(1, y/x), coalesce(2, y/x) from t; @@ -1593,7 +1593,7 @@ SELECT to_timestamp('I AM NOT A TIMESTAMP'); query error DataFusion error: Arrow error: Cast error: Cannot cast string '' to value of Int32 type SELECT CAST('' AS int); -# See issue: https://github.com/apache/arrow-datafusion/issues/8978 +# See issue: https://github.com/apache/datafusion/issues/8978 statement ok create table users (id int, name varchar); diff --git a/datafusion/sqllogictest/test_files/subquery.slt b/datafusion/sqllogictest/test_files/subquery.slt index 01f7dd904186..155a176e8578 100644 --- a/datafusion/sqllogictest/test_files/subquery.slt +++ b/datafusion/sqllogictest/test_files/subquery.slt @@ -999,7 +999,7 @@ true true -# issue: https://github.com/apache/arrow-datafusion/issues/7027 +# issue: https://github.com/apache/datafusion/issues/7027 query TTTT rowsort SELECT * FROM (VALUES ('catan-prod1-daily', 'success')) as jobs(cron_job_name, status) @@ -1038,7 +1038,7 @@ create table t(a bigint); # Result of query below shouldn't depend on # number of optimization passes -# See issue: https://github.com/apache/arrow-datafusion/issues/8296 +# See issue: https://github.com/apache/datafusion/issues/8296 statement ok set datafusion.optimizer.max_passes = 1; diff --git a/datafusion/sqllogictest/test_files/timestamps.slt b/datafusion/sqllogictest/test_files/timestamps.slt index 491b9b810687..32a28231d034 100644 --- a/datafusion/sqllogictest/test_files/timestamps.slt +++ b/datafusion/sqllogictest/test_files/timestamps.slt @@ -1328,7 +1328,7 @@ from timestamp_utc order by ts; # date trunc in a timezone with DST across DST boundary (note the date-trunc hour value repeats) -# Test for https://github.com/apache/arrow-datafusion/issues/8899 +# Test for https://github.com/apache/datafusion/issues/8899 query PPPP select ts, date_trunc('month', ts), date_trunc('day', ts), date_trunc('hour', ts) from timestamp_berlin order by ts; @@ -1412,7 +1412,7 @@ from timestamp_utc order by ts; # date trunc in a timezone with DST across DST boundary (note there is no midnight on 2018-11-04) -# Test for https://github.com/apache/arrow-datafusion/issues/8899 +# Test for https://github.com/apache/datafusion/issues/8899 query PPPP select ts, date_trunc('month', ts), date_trunc('day', ts), date_trunc('hour', ts) from timestamp_sao_paulo order by ts; @@ -1625,7 +1625,7 @@ drop table ts_data_secs ########## -## Timezone impact on builtin scalar functions +## Timezone impact on scalar functions # # server time = +07 ########## @@ -1690,7 +1690,7 @@ SELECT date_part('hour', TIMESTAMPTZ '2000-01-01T01:01:01Z') as part ########## -## Timezone impact on builtin scalar functions +## Timezone impact on scalar functions # # server time = UTC ########## @@ -1773,7 +1773,7 @@ SELECT date_part('hour', TIMESTAMPTZ '2000-01-01T01:01:01+07') as part ########## -## Timezone impact on builtin scalar functions +## Timezone impact on scalar functions # # irregular offsets ########## diff --git a/datafusion/sqllogictest/test_files/tpch/q11.slt.part b/datafusion/sqllogictest/test_files/tpch/q11.slt.part index 55b38333ca38..3050af6f89a2 100644 --- a/datafusion/sqllogictest/test_files/tpch/q11.slt.part +++ b/datafusion/sqllogictest/test_files/tpch/q11.slt.part @@ -75,10 +75,10 @@ logical_plan 26)----------------------TableScan: nation projection=[n_nationkey, n_name], partial_filters=[nation.n_name = Utf8("GERMANY")] physical_plan 01)GlobalLimitExec: skip=0, fetch=10 -02)--SortPreservingMergeExec: [value@1 DESC], fetch=10 -03)----SortExec: TopK(fetch=10), expr=[value@1 DESC] -04)------ProjectionExec: expr=[ps_partkey@0 as ps_partkey, SUM(partsupp.ps_supplycost * partsupp.ps_availqty)@1 as value] -05)--------NestedLoopJoinExec: join_type=Inner, filter=CAST(SUM(partsupp.ps_supplycost * partsupp.ps_availqty)@0 AS Decimal128(38, 15)) > SUM(partsupp.ps_supplycost * partsupp.ps_availqty) * Float64(0.0001)@1 +02)--SortExec: TopK(fetch=10), expr=[value@1 DESC] +03)----ProjectionExec: expr=[ps_partkey@0 as ps_partkey, SUM(partsupp.ps_supplycost * partsupp.ps_availqty)@1 as value] +04)------NestedLoopJoinExec: join_type=Inner, filter=CAST(SUM(partsupp.ps_supplycost * partsupp.ps_availqty)@0 AS Decimal128(38, 15)) > SUM(partsupp.ps_supplycost * partsupp.ps_availqty) * Float64(0.0001)@1 +05)--------CoalescePartitionsExec 06)----------AggregateExec: mode=FinalPartitioned, gby=[ps_partkey@0 as ps_partkey], aggr=[SUM(partsupp.ps_supplycost * partsupp.ps_availqty)] 07)------------CoalesceBatchesExec: target_batch_size=8192 08)--------------RepartitionExec: partitioning=Hash([ps_partkey@0], 4), input_partitions=4 @@ -103,30 +103,30 @@ physical_plan 27)------------------------------FilterExec: n_name@1 = GERMANY 28)--------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 29)----------------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/nation.tbl]]}, projection=[n_nationkey, n_name], has_header=false -30)----------ProjectionExec: expr=[CAST(CAST(SUM(partsupp.ps_supplycost * partsupp.ps_availqty)@0 AS Float64) * 0.0001 AS Decimal128(38, 15)) as SUM(partsupp.ps_supplycost * partsupp.ps_availqty) * Float64(0.0001)] -31)------------AggregateExec: mode=Final, gby=[], aggr=[SUM(partsupp.ps_supplycost * partsupp.ps_availqty)] -32)--------------CoalescePartitionsExec -33)----------------AggregateExec: mode=Partial, gby=[], aggr=[SUM(partsupp.ps_supplycost * partsupp.ps_availqty)] -34)------------------CoalesceBatchesExec: target_batch_size=8192 -35)--------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_nationkey@2, n_nationkey@0)], projection=[ps_availqty@0, ps_supplycost@1] -36)----------------------CoalesceBatchesExec: target_batch_size=8192 -37)------------------------RepartitionExec: partitioning=Hash([s_nationkey@2], 4), input_partitions=4 -38)--------------------------CoalesceBatchesExec: target_batch_size=8192 -39)----------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(ps_suppkey@0, s_suppkey@0)], projection=[ps_availqty@1, ps_supplycost@2, s_nationkey@4] -40)------------------------------CoalesceBatchesExec: target_batch_size=8192 -41)--------------------------------RepartitionExec: partitioning=Hash([ps_suppkey@0], 4), input_partitions=4 -42)----------------------------------CsvExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:0..2932049], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:2932049..5864098], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:5864098..8796147], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:8796147..11728193]]}, projection=[ps_suppkey, ps_availqty, ps_supplycost], has_header=false -43)------------------------------CoalesceBatchesExec: target_batch_size=8192 -44)--------------------------------RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4 -45)----------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 -46)------------------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/supplier.tbl]]}, projection=[s_suppkey, s_nationkey], has_header=false -47)----------------------CoalesceBatchesExec: target_batch_size=8192 -48)------------------------RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=4 -49)--------------------------ProjectionExec: expr=[n_nationkey@0 as n_nationkey] -50)----------------------------CoalesceBatchesExec: target_batch_size=8192 -51)------------------------------FilterExec: n_name@1 = GERMANY -52)--------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 -53)----------------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/nation.tbl]]}, projection=[n_nationkey, n_name], has_header=false +30)--------ProjectionExec: expr=[CAST(CAST(SUM(partsupp.ps_supplycost * partsupp.ps_availqty)@0 AS Float64) * 0.0001 AS Decimal128(38, 15)) as SUM(partsupp.ps_supplycost * partsupp.ps_availqty) * Float64(0.0001)] +31)----------AggregateExec: mode=Final, gby=[], aggr=[SUM(partsupp.ps_supplycost * partsupp.ps_availqty)] +32)------------CoalescePartitionsExec +33)--------------AggregateExec: mode=Partial, gby=[], aggr=[SUM(partsupp.ps_supplycost * partsupp.ps_availqty)] +34)----------------CoalesceBatchesExec: target_batch_size=8192 +35)------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_nationkey@2, n_nationkey@0)], projection=[ps_availqty@0, ps_supplycost@1] +36)--------------------CoalesceBatchesExec: target_batch_size=8192 +37)----------------------RepartitionExec: partitioning=Hash([s_nationkey@2], 4), input_partitions=4 +38)------------------------CoalesceBatchesExec: target_batch_size=8192 +39)--------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(ps_suppkey@0, s_suppkey@0)], projection=[ps_availqty@1, ps_supplycost@2, s_nationkey@4] +40)----------------------------CoalesceBatchesExec: target_batch_size=8192 +41)------------------------------RepartitionExec: partitioning=Hash([ps_suppkey@0], 4), input_partitions=4 +42)--------------------------------CsvExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:0..2932049], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:2932049..5864098], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:5864098..8796147], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:8796147..11728193]]}, projection=[ps_suppkey, ps_availqty, ps_supplycost], has_header=false +43)----------------------------CoalesceBatchesExec: target_batch_size=8192 +44)------------------------------RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4 +45)--------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 +46)----------------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/supplier.tbl]]}, projection=[s_suppkey, s_nationkey], has_header=false +47)--------------------CoalesceBatchesExec: target_batch_size=8192 +48)----------------------RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=4 +49)------------------------ProjectionExec: expr=[n_nationkey@0 as n_nationkey] +50)--------------------------CoalesceBatchesExec: target_batch_size=8192 +51)----------------------------FilterExec: n_name@1 = GERMANY +52)------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 +53)--------------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/nation.tbl]]}, projection=[n_nationkey, n_name], has_header=false diff --git a/datafusion/sqllogictest/test_files/tpch/q22.slt.part b/datafusion/sqllogictest/test_files/tpch/q22.slt.part index 73b3c1602565..98c8ba396552 100644 --- a/datafusion/sqllogictest/test_files/tpch/q22.slt.part +++ b/datafusion/sqllogictest/test_files/tpch/q22.slt.part @@ -82,26 +82,28 @@ physical_plan 06)----------RepartitionExec: partitioning=Hash([cntrycode@0], 4), input_partitions=4 07)------------AggregateExec: mode=Partial, gby=[cntrycode@0 as cntrycode], aggr=[COUNT(*), SUM(custsale.c_acctbal)] 08)--------------ProjectionExec: expr=[substr(c_phone@0, 1, 2) as cntrycode, c_acctbal@1 as c_acctbal] -09)----------------NestedLoopJoinExec: join_type=Inner, filter=CAST(c_acctbal@0 AS Decimal128(19, 6)) > AVG(customer.c_acctbal)@1 -10)------------------CoalesceBatchesExec: target_batch_size=8192 -11)--------------------HashJoinExec: mode=Partitioned, join_type=LeftAnti, on=[(c_custkey@0, o_custkey@0)], projection=[c_phone@1, c_acctbal@2] +09)----------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 +10)------------------NestedLoopJoinExec: join_type=Inner, filter=CAST(c_acctbal@0 AS Decimal128(19, 6)) > AVG(customer.c_acctbal)@1 +11)--------------------CoalescePartitionsExec 12)----------------------CoalesceBatchesExec: target_batch_size=8192 -13)------------------------RepartitionExec: partitioning=Hash([c_custkey@0], 4), input_partitions=4 +13)------------------------HashJoinExec: mode=Partitioned, join_type=LeftAnti, on=[(c_custkey@0, o_custkey@0)], projection=[c_phone@1, c_acctbal@2] 14)--------------------------CoalesceBatchesExec: target_batch_size=8192 -15)----------------------------FilterExec: Use substr(c_phone@1, 1, 2) IN (SET) ([Literal { value: Utf8("13") }, Literal { value: Utf8("31") }, Literal { value: Utf8("23") }, Literal { value: Utf8("29") }, Literal { value: Utf8("30") }, Literal { value: Utf8("18") }, Literal { value: Utf8("17") }]) -16)------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 -17)--------------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/customer.tbl]]}, projection=[c_custkey, c_phone, c_acctbal], has_header=false -18)----------------------CoalesceBatchesExec: target_batch_size=8192 -19)------------------------RepartitionExec: partitioning=Hash([o_custkey@0], 4), input_partitions=4 -20)--------------------------CsvExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:0..4223281], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:4223281..8446562], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:8446562..12669843], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:12669843..16893122]]}, projection=[o_custkey], has_header=false -21)------------------AggregateExec: mode=Final, gby=[], aggr=[AVG(customer.c_acctbal)] -22)--------------------CoalescePartitionsExec -23)----------------------AggregateExec: mode=Partial, gby=[], aggr=[AVG(customer.c_acctbal)] -24)------------------------ProjectionExec: expr=[c_acctbal@1 as c_acctbal] -25)--------------------------CoalesceBatchesExec: target_batch_size=8192 -26)----------------------------FilterExec: c_acctbal@1 > Some(0),15,2 AND Use substr(c_phone@0, 1, 2) IN (SET) ([Literal { value: Utf8("13") }, Literal { value: Utf8("31") }, Literal { value: Utf8("23") }, Literal { value: Utf8("29") }, Literal { value: Utf8("30") }, Literal { value: Utf8("18") }, Literal { value: Utf8("17") }]) -27)------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 -28)--------------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/customer.tbl]]}, projection=[c_phone, c_acctbal], has_header=false +15)----------------------------RepartitionExec: partitioning=Hash([c_custkey@0], 4), input_partitions=4 +16)------------------------------CoalesceBatchesExec: target_batch_size=8192 +17)--------------------------------FilterExec: Use substr(c_phone@1, 1, 2) IN (SET) ([Literal { value: Utf8("13") }, Literal { value: Utf8("31") }, Literal { value: Utf8("23") }, Literal { value: Utf8("29") }, Literal { value: Utf8("30") }, Literal { value: Utf8("18") }, Literal { value: Utf8("17") }]) +18)----------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 +19)------------------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/customer.tbl]]}, projection=[c_custkey, c_phone, c_acctbal], has_header=false +20)--------------------------CoalesceBatchesExec: target_batch_size=8192 +21)----------------------------RepartitionExec: partitioning=Hash([o_custkey@0], 4), input_partitions=4 +22)------------------------------CsvExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:0..4223281], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:4223281..8446562], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:8446562..12669843], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:12669843..16893122]]}, projection=[o_custkey], has_header=false +23)--------------------AggregateExec: mode=Final, gby=[], aggr=[AVG(customer.c_acctbal)] +24)----------------------CoalescePartitionsExec +25)------------------------AggregateExec: mode=Partial, gby=[], aggr=[AVG(customer.c_acctbal)] +26)--------------------------ProjectionExec: expr=[c_acctbal@1 as c_acctbal] +27)----------------------------CoalesceBatchesExec: target_batch_size=8192 +28)------------------------------FilterExec: c_acctbal@1 > Some(0),15,2 AND Use substr(c_phone@0, 1, 2) IN (SET) ([Literal { value: Utf8("13") }, Literal { value: Utf8("31") }, Literal { value: Utf8("23") }, Literal { value: Utf8("29") }, Literal { value: Utf8("30") }, Literal { value: Utf8("18") }, Literal { value: Utf8("17") }]) +29)--------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 +30)----------------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/customer.tbl]]}, projection=[c_phone, c_acctbal], has_header=false query TIR diff --git a/datafusion/sqllogictest/test_files/unnest.slt b/datafusion/sqllogictest/test_files/unnest.slt index 38207fa7d1d6..28f3369ca0a2 100644 --- a/datafusion/sqllogictest/test_files/unnest.slt +++ b/datafusion/sqllogictest/test_files/unnest.slt @@ -108,7 +108,7 @@ NULL NULL ## Unnest with additional column -## Issue: https://github.com/apache/arrow-datafusion/issues/9349 +## Issue: https://github.com/apache/datafusion/issues/9349 query II select unnest(column1), column3 from unnest_table; ---- diff --git a/datafusion/sqllogictest/test_files/window.slt b/datafusion/sqllogictest/test_files/window.slt index bcdee9593178..7320688cff1c 100644 --- a/datafusion/sqllogictest/test_files/window.slt +++ b/datafusion/sqllogictest/test_files/window.slt @@ -537,7 +537,7 @@ LIMIT 5 # window_frame_large_range # Range offset 10000 is too big for Int8 (i.e. the type of c3). # In this case, we should be able to still produce correct results. -# See the issue: https://github.com/apache/arrow-datafusion/issues/5346 +# See the issue: https://github.com/apache/datafusion/issues/5346 # below over clause is equivalent to OVER(ORDER BY c3 DESC RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) # in terms of behaviour. query I @@ -2457,7 +2457,7 @@ physical_plan # We should remove the type checking in physical plan after we don't skip # the failed optimizing rules by default. -# (see more in https://github.com/apache/arrow-datafusion/issues/4615) +# (see more in https://github.com/apache/datafusion/issues/4615) statement ok set datafusion.optimizer.skip_failed_rules = true diff --git a/datafusion/substrait/src/logical_plan/consumer.rs b/datafusion/substrait/src/logical_plan/consumer.rs index 73782ab27f71..fab4528c0b42 100644 --- a/datafusion/substrait/src/logical_plan/consumer.rs +++ b/datafusion/substrait/src/logical_plan/consumer.rs @@ -23,12 +23,12 @@ use datafusion::common::{ use datafusion::execution::FunctionRegistry; use datafusion::logical_expr::{ - aggregate_function, expr::find_df_window_func, BinaryExpr, BuiltinScalarFunction, - Case, Expr, LogicalPlan, Operator, + aggregate_function, expr::find_df_window_func, BinaryExpr, Case, Expr, LogicalPlan, + Operator, ScalarUDF, }; use datafusion::logical_expr::{ expr, Cast, Extension, GroupingSet, Like, LogicalPlanBuilder, Partitioning, - Repartition, ScalarUDF, Subquery, WindowFrameBound, WindowFrameUnits, + Repartition, Subquery, WindowFrameBound, WindowFrameUnits, }; use datafusion::prelude::JoinType; use datafusion::sql::TableReference; @@ -75,7 +75,6 @@ use crate::variation_const::{ }; enum ScalarFunctionType { - Builtin(BuiltinScalarFunction), Op(Operator), Expr(BuiltinExprBuilder), Udf(Arc), @@ -127,10 +126,6 @@ fn scalar_function_type_from_str( return Ok(ScalarFunctionType::Op(op)); } - if let Ok(fun) = BuiltinScalarFunction::from_str(name) { - return Ok(ScalarFunctionType::Builtin(fun)); - } - if let Some(builder) = BuiltinExprBuilder::try_from_name(name) { return Ok(ScalarFunctionType::Expr(builder)); } @@ -910,18 +905,6 @@ pub async fn from_substrait_rex( expr::ScalarFunction::new_udf(fun, args), ))) } - ScalarFunctionType::Builtin(fun) => { - let args = decode_arguments( - ctx, - input_schema, - extensions, - f.arguments.as_slice(), - ) - .await?; - Ok(Arc::new(Expr::ScalarFunction(expr::ScalarFunction::new( - fun, args, - )))) - } ScalarFunctionType::Op(op) => { if f.arguments.len() != 2 { return not_impl_err!( diff --git a/dev/changelog/10.0.0.md b/dev/changelog/10.0.0.md index d86ac0b0cc75..bc77b14cad0a 100644 --- a/dev/changelog/10.0.0.md +++ b/dev/changelog/10.0.0.md @@ -17,143 +17,143 @@ under the License. --> -## [10.0.0](https://github.com/apache/arrow-datafusion/tree/10.0.0) (2022-07-12) +## [10.0.0](https://github.com/apache/datafusion/tree/10.0.0) (2022-07-12) -[Full Changelog](https://github.com/apache/arrow-datafusion/compare/9.0.0...10.0.0) +[Full Changelog](https://github.com/apache/datafusion/compare/9.0.0...10.0.0) **Breaking changes:** -- Convert batch_size to config option [\#2771](https://github.com/apache/arrow-datafusion/pull/2771) ([andygrove](https://github.com/andygrove)) -- MINOR: Remove Offset struct [\#2734](https://github.com/apache/arrow-datafusion/pull/2734) ([andygrove](https://github.com/andygrove)) -- feat: async extension planner [\#2713](https://github.com/apache/arrow-datafusion/pull/2713) ([waynexia](https://github.com/waynexia)) -- Switch to object_store crate \(\#2489\) [\#2677](https://github.com/apache/arrow-datafusion/pull/2677) ([tustvold](https://github.com/tustvold)) +- Convert batch_size to config option [\#2771](https://github.com/apache/datafusion/pull/2771) ([andygrove](https://github.com/andygrove)) +- MINOR: Remove Offset struct [\#2734](https://github.com/apache/datafusion/pull/2734) ([andygrove](https://github.com/andygrove)) +- feat: async extension planner [\#2713](https://github.com/apache/datafusion/pull/2713) ([waynexia](https://github.com/waynexia)) +- Switch to object_store crate \(\#2489\) [\#2677](https://github.com/apache/datafusion/pull/2677) ([tustvold](https://github.com/tustvold)) **Implemented enhancements:** -- update documentation, fix styling to match main Arrow project [\#2864](https://github.com/apache/arrow-datafusion/issues/2864) -- Update top-level README [\#2850](https://github.com/apache/arrow-datafusion/issues/2850) -- \[Question\]How to call an async function in `ExecutionPlan::exec` method? [\#2847](https://github.com/apache/arrow-datafusion/issues/2847) -- Add `DataFrame::with_column` [\#2844](https://github.com/apache/arrow-datafusion/issues/2844) -- Improve ergonomics of physical expr `lit` [\#2827](https://github.com/apache/arrow-datafusion/issues/2827) -- Add Python examples for reading CSV and query by SQL in Doc [\#2824](https://github.com/apache/arrow-datafusion/issues/2824) -- eliminate multi limit-offset nodes to EmptyRelation if possible [\#2822](https://github.com/apache/arrow-datafusion/issues/2822) -- Make `LogicalPlan::Union` be consistent with other plans [\#2816](https://github.com/apache/arrow-datafusion/issues/2816) -- Use coerced data type from value and list expressions during planning inlist expression [\#2793](https://github.com/apache/arrow-datafusion/issues/2793) -- Add configuration option to enable/disalbe `CoalesceBatchesExec` [\#2790](https://github.com/apache/arrow-datafusion/issues/2790) -- Simplify FilterNullJoinKeys rule [\#2780](https://github.com/apache/arrow-datafusion/issues/2780) -- Allow configuration settings to be specified with environment variables [\#2776](https://github.com/apache/arrow-datafusion/issues/2776) -- Automatically update `configs.md` in user guide [\#2770](https://github.com/apache/arrow-datafusion/issues/2770) -- Support multiple paths for ListingTableScanNode [\#2768](https://github.com/apache/arrow-datafusion/issues/2768) -- Reduce outer joins [\#2757](https://github.com/apache/arrow-datafusion/issues/2757) -- support data type coerced and decimal in INLIST expr [\#2755](https://github.com/apache/arrow-datafusion/issues/2755) -- Change ExtensionPlanner::plan_extension\(\) to an async function [\#2749](https://github.com/apache/arrow-datafusion/issues/2749) -- Add `IsNotNull` filter to join inputs if one side of join condition does not allow null [\#2739](https://github.com/apache/arrow-datafusion/issues/2739) -- Sort preserving MergeJoin [\#2698](https://github.com/apache/arrow-datafusion/issues/2698) -- Improve readability of table scan projections in query plans [\#2697](https://github.com/apache/arrow-datafusion/issues/2697) -- DataFusion 9.0.0 Release [\#2676](https://github.com/apache/arrow-datafusion/issues/2676) -- Improve UX for `UNION` vs `UNION ALL` \(introduce a LogicalPlan::Distinct\) [\#2573](https://github.com/apache/arrow-datafusion/issues/2573) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] -- Implement some way to show the sql used to create a view [\#2529](https://github.com/apache/arrow-datafusion/issues/2529) -- Consider adopting IOx ObjectStore abstraction [\#2489](https://github.com/apache/arrow-datafusion/issues/2489) -- Support `sum0` as a built-in agg function [\#2067](https://github.com/apache/arrow-datafusion/issues/2067) -- implement grouping sets, cubes, and rollups [\#1327](https://github.com/apache/arrow-datafusion/issues/1327) -- Ruby bindings [\#1114](https://github.com/apache/arrow-datafusion/issues/1114) -- Support dates in hash join [\#2746](https://github.com/apache/arrow-datafusion/pull/2746) ([andygrove](https://github.com/andygrove)) +- update documentation, fix styling to match main Arrow project [\#2864](https://github.com/apache/datafusion/issues/2864) +- Update top-level README [\#2850](https://github.com/apache/datafusion/issues/2850) +- \[Question\]How to call an async function in `ExecutionPlan::exec` method? [\#2847](https://github.com/apache/datafusion/issues/2847) +- Add `DataFrame::with_column` [\#2844](https://github.com/apache/datafusion/issues/2844) +- Improve ergonomics of physical expr `lit` [\#2827](https://github.com/apache/datafusion/issues/2827) +- Add Python examples for reading CSV and query by SQL in Doc [\#2824](https://github.com/apache/datafusion/issues/2824) +- eliminate multi limit-offset nodes to EmptyRelation if possible [\#2822](https://github.com/apache/datafusion/issues/2822) +- Make `LogicalPlan::Union` be consistent with other plans [\#2816](https://github.com/apache/datafusion/issues/2816) +- Use coerced data type from value and list expressions during planning inlist expression [\#2793](https://github.com/apache/datafusion/issues/2793) +- Add configuration option to enable/disalbe `CoalesceBatchesExec` [\#2790](https://github.com/apache/datafusion/issues/2790) +- Simplify FilterNullJoinKeys rule [\#2780](https://github.com/apache/datafusion/issues/2780) +- Allow configuration settings to be specified with environment variables [\#2776](https://github.com/apache/datafusion/issues/2776) +- Automatically update `configs.md` in user guide [\#2770](https://github.com/apache/datafusion/issues/2770) +- Support multiple paths for ListingTableScanNode [\#2768](https://github.com/apache/datafusion/issues/2768) +- Reduce outer joins [\#2757](https://github.com/apache/datafusion/issues/2757) +- support data type coerced and decimal in INLIST expr [\#2755](https://github.com/apache/datafusion/issues/2755) +- Change ExtensionPlanner::plan_extension\(\) to an async function [\#2749](https://github.com/apache/datafusion/issues/2749) +- Add `IsNotNull` filter to join inputs if one side of join condition does not allow null [\#2739](https://github.com/apache/datafusion/issues/2739) +- Sort preserving MergeJoin [\#2698](https://github.com/apache/datafusion/issues/2698) +- Improve readability of table scan projections in query plans [\#2697](https://github.com/apache/datafusion/issues/2697) +- DataFusion 9.0.0 Release [\#2676](https://github.com/apache/datafusion/issues/2676) +- Improve UX for `UNION` vs `UNION ALL` \(introduce a LogicalPlan::Distinct\) [\#2573](https://github.com/apache/datafusion/issues/2573) [[sql](https://github.com/apache/datafusion/labels/sql)] +- Implement some way to show the sql used to create a view [\#2529](https://github.com/apache/datafusion/issues/2529) +- Consider adopting IOx ObjectStore abstraction [\#2489](https://github.com/apache/datafusion/issues/2489) +- Support `sum0` as a built-in agg function [\#2067](https://github.com/apache/datafusion/issues/2067) +- implement grouping sets, cubes, and rollups [\#1327](https://github.com/apache/datafusion/issues/1327) +- Ruby bindings [\#1114](https://github.com/apache/datafusion/issues/1114) +- Support dates in hash join [\#2746](https://github.com/apache/datafusion/pull/2746) ([andygrove](https://github.com/andygrove)) **Fixed bugs:** -- Docker Error [\#2851](https://github.com/apache/arrow-datafusion/issues/2851) -- Anti join ignores join filters [\#2842](https://github.com/apache/arrow-datafusion/issues/2842) -- Can't test or compile sub-model code after upgrade to arrow-rs 17.0.0 [\#2835](https://github.com/apache/arrow-datafusion/issues/2835) -- Not evaluate the set expr in the InList for the optimization [\#2820](https://github.com/apache/arrow-datafusion/issues/2820) -- CASE When: result type should be coercible to a common type [\#2818](https://github.com/apache/arrow-datafusion/issues/2818) -- IN/NOT IN List: NULL is not equal to NULL [\#2817](https://github.com/apache/arrow-datafusion/issues/2817) -- panic when case statement returns null [\#2798](https://github.com/apache/arrow-datafusion/issues/2798) -- InList: Can't cast the list expr data type to value expr data type directly [\#2774](https://github.com/apache/arrow-datafusion/issues/2774) -- InList Expr: expr and list values must can be converted to a same data type [\#2759](https://github.com/apache/arrow-datafusion/issues/2759) -- tpchgen docker syntax change prevents volume from binding [\#2751](https://github.com/apache/arrow-datafusion/issues/2751) -- Cannot join on date columns \(Unsupported data type in hasher: Date32\) [\#2744](https://github.com/apache/arrow-datafusion/issues/2744) -- `rewrite_expression` does not properly handle `Exists` and `ScalarSubquery` [\#2736](https://github.com/apache/arrow-datafusion/issues/2736) -- LocalFileSystem Not sorted by file name, As a result, the data lines queried in multiple files are out of order. [\#2730](https://github.com/apache/arrow-datafusion/issues/2730) -- Filter push down need consider alias columns [\#2725](https://github.com/apache/arrow-datafusion/issues/2725) -- Recent API change in `GlobalLimitExec` breaks compatibility with Ballista [\#2720](https://github.com/apache/arrow-datafusion/issues/2720) -- Common Subexpression Eliminiation pass errors if run twice on some plans: Schema contains duplicate unqualified field name 'IsNull-Column-sys.host' [\#2712](https://github.com/apache/arrow-datafusion/issues/2712) -- The data type is not compatible with other system, for example spark or PG database [\#1379](https://github.com/apache/arrow-datafusion/issues/1379) +- Docker Error [\#2851](https://github.com/apache/datafusion/issues/2851) +- Anti join ignores join filters [\#2842](https://github.com/apache/datafusion/issues/2842) +- Can't test or compile sub-model code after upgrade to arrow-rs 17.0.0 [\#2835](https://github.com/apache/datafusion/issues/2835) +- Not evaluate the set expr in the InList for the optimization [\#2820](https://github.com/apache/datafusion/issues/2820) +- CASE When: result type should be coercible to a common type [\#2818](https://github.com/apache/datafusion/issues/2818) +- IN/NOT IN List: NULL is not equal to NULL [\#2817](https://github.com/apache/datafusion/issues/2817) +- panic when case statement returns null [\#2798](https://github.com/apache/datafusion/issues/2798) +- InList: Can't cast the list expr data type to value expr data type directly [\#2774](https://github.com/apache/datafusion/issues/2774) +- InList Expr: expr and list values must can be converted to a same data type [\#2759](https://github.com/apache/datafusion/issues/2759) +- tpchgen docker syntax change prevents volume from binding [\#2751](https://github.com/apache/datafusion/issues/2751) +- Cannot join on date columns \(Unsupported data type in hasher: Date32\) [\#2744](https://github.com/apache/datafusion/issues/2744) +- `rewrite_expression` does not properly handle `Exists` and `ScalarSubquery` [\#2736](https://github.com/apache/datafusion/issues/2736) +- LocalFileSystem Not sorted by file name, As a result, the data lines queried in multiple files are out of order. [\#2730](https://github.com/apache/datafusion/issues/2730) +- Filter push down need consider alias columns [\#2725](https://github.com/apache/datafusion/issues/2725) +- Recent API change in `GlobalLimitExec` breaks compatibility with Ballista [\#2720](https://github.com/apache/datafusion/issues/2720) +- Common Subexpression Eliminiation pass errors if run twice on some plans: Schema contains duplicate unqualified field name 'IsNull-Column-sys.host' [\#2712](https://github.com/apache/datafusion/issues/2712) +- The data type is not compatible with other system, for example spark or PG database [\#1379](https://github.com/apache/datafusion/issues/1379) **Documentation updates:** -- Fix docs styling [\#2865](https://github.com/apache/arrow-datafusion/pull/2865) ([kmitchener](https://github.com/kmitchener)) -- Various updates to top-level README [\#2854](https://github.com/apache/arrow-datafusion/pull/2854) ([andygrove](https://github.com/andygrove)) -- MINOR: Add documentation for running integration tests [\#2839](https://github.com/apache/arrow-datafusion/pull/2839) ([andygrove](https://github.com/andygrove)) -- add csv registration and sql query to examples [\#2825](https://github.com/apache/arrow-datafusion/pull/2825) ([waitingkuo](https://github.com/waitingkuo)) -- \[minor\] refine doc [\#2753](https://github.com/apache/arrow-datafusion/pull/2753) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- Fix docs styling [\#2865](https://github.com/apache/datafusion/pull/2865) ([kmitchener](https://github.com/kmitchener)) +- Various updates to top-level README [\#2854](https://github.com/apache/datafusion/pull/2854) ([andygrove](https://github.com/andygrove)) +- MINOR: Add documentation for running integration tests [\#2839](https://github.com/apache/datafusion/pull/2839) ([andygrove](https://github.com/andygrove)) +- add csv registration and sql query to examples [\#2825](https://github.com/apache/datafusion/pull/2825) ([waitingkuo](https://github.com/waitingkuo)) +- \[minor\] refine doc [\#2753](https://github.com/apache/datafusion/pull/2753) ([Ted-Jiang](https://github.com/Ted-Jiang)) **Closed issues:** -- Consider adding a prominent note in the readme about ballista [\#2853](https://github.com/apache/arrow-datafusion/issues/2853) -- support decimal in \(NULL\) [\#2800](https://github.com/apache/arrow-datafusion/issues/2800) -- InList: Don't treat Null as UTF8\(None\) [\#2782](https://github.com/apache/arrow-datafusion/issues/2782) -- InList: don't need to treat Null as UTF8 data type [\#2773](https://github.com/apache/arrow-datafusion/issues/2773) -- Implement extensible configuration mechanism [\#138](https://github.com/apache/arrow-datafusion/issues/138) +- Consider adding a prominent note in the readme about ballista [\#2853](https://github.com/apache/datafusion/issues/2853) +- support decimal in \(NULL\) [\#2800](https://github.com/apache/datafusion/issues/2800) +- InList: Don't treat Null as UTF8\(None\) [\#2782](https://github.com/apache/datafusion/issues/2782) +- InList: don't need to treat Null as UTF8 data type [\#2773](https://github.com/apache/datafusion/issues/2773) +- Implement extensible configuration mechanism [\#138](https://github.com/apache/datafusion/issues/138) **Merged pull requests:** -- Update CONTRIBUTING.md [\#2876](https://github.com/apache/arrow-datafusion/pull/2876) ([waitingkuo](https://github.com/waitingkuo)) -- Make LogicalPlan::Union be consistent with other plans [\#2868](https://github.com/apache/arrow-datafusion/pull/2868) ([comphead](https://github.com/comphead)) -- minor: remove unneeded files from project root [\#2863](https://github.com/apache/arrow-datafusion/pull/2863) ([kmitchener](https://github.com/kmitchener)) -- chore: make cargo clippy happy in nigtly [\#2860](https://github.com/apache/arrow-datafusion/pull/2860) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([xudong963](https://github.com/xudong963)) -- Update to arrow 18.0.0 [\#2856](https://github.com/apache/arrow-datafusion/pull/2856) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) -- chore: remove ballista-related docker-compose file [\#2852](https://github.com/apache/arrow-datafusion/pull/2852) ([xudong963](https://github.com/xudong963)) -- Adding dataframe with_column function [\#2849](https://github.com/apache/arrow-datafusion/pull/2849) ([comphead](https://github.com/comphead)) -- anti joins now respect join filters [\#2843](https://github.com/apache/arrow-datafusion/pull/2843) ([andygrove](https://github.com/andygrove)) -- MINOR: make name meaningful and clean up code [\#2841](https://github.com/apache/arrow-datafusion/pull/2841) ([liukun4515](https://github.com/liukun4515)) -- Make `lit` implementation more concise [\#2838](https://github.com/apache/arrow-datafusion/pull/2838) ([alamb](https://github.com/alamb)) -- InList: set/list value must be evaluated to get the values [\#2834](https://github.com/apache/arrow-datafusion/pull/2834) ([liukun4515](https://github.com/liukun4515)) -- Add SHOW CREATE TABLE with initial support for views [\#2830](https://github.com/apache/arrow-datafusion/pull/2830) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([mrob95](https://github.com/mrob95)) -- Improve ergonomics of physical expr `lit` [\#2828](https://github.com/apache/arrow-datafusion/pull/2828) ([alamb](https://github.com/alamb)) -- Eliminate multi limit-offset nodes to emptyRelation [\#2823](https://github.com/apache/arrow-datafusion/pull/2823) ([AssHero](https://github.com/AssHero)) -- Fix the ci [\#2821](https://github.com/apache/arrow-datafusion/pull/2821) ([liukun4515](https://github.com/liukun4515)) -- CaseWhen: coerce the all then and else data type to a common data type [\#2819](https://github.com/apache/arrow-datafusion/pull/2819) ([liukun4515](https://github.com/liukun4515)) -- Fix `ScalarValue::isNull` calculation [\#2815](https://github.com/apache/arrow-datafusion/pull/2815) ([alamb](https://github.com/alamb)) -- Fix nullability calculation for `CASE` expressions [\#2814](https://github.com/apache/arrow-datafusion/pull/2814) ([alamb](https://github.com/alamb)) -- Bump numpy from 1.21.3 to 1.22.0 in /integration-tests [\#2811](https://github.com/apache/arrow-datafusion/pull/2811) ([xudong963](https://github.com/xudong963)) -- Fix data type calculation for `CaseExpr` s with `NULLs` [\#2810](https://github.com/apache/arrow-datafusion/pull/2810) ([AssHero](https://github.com/AssHero)) -- InList: fix bug for comparing with Null in the list using the set optimization [\#2809](https://github.com/apache/arrow-datafusion/pull/2809) ([liukun4515](https://github.com/liukun4515)) -- Use specialized dictionary kernels \(\#1178\) [\#2808](https://github.com/apache/arrow-datafusion/pull/2808) ([tustvold](https://github.com/tustvold)) -- fix schema nullability for `information_schema` schema [\#2804](https://github.com/apache/arrow-datafusion/pull/2804) ([alamb](https://github.com/alamb)) -- fix: correctly calculate join output schema nullability [\#2803](https://github.com/apache/arrow-datafusion/pull/2803) ([alamb](https://github.com/alamb)) -- Correct schema nullability declaration in tests [\#2802](https://github.com/apache/arrow-datafusion/pull/2802) ([alamb](https://github.com/alamb)) -- Don't treat Null as UTF8\(None\) and change error info. [\#2801](https://github.com/apache/arrow-datafusion/pull/2801) ([liukun4515](https://github.com/liukun4515)) -- MINOR: Remove reference to docker image that is no longer available [\#2795](https://github.com/apache/arrow-datafusion/pull/2795) ([andygrove](https://github.com/andygrove)) -- Use coerced type in inlist expr planning [\#2794](https://github.com/apache/arrow-datafusion/pull/2794) ([viirya](https://github.com/viirya)) -- Add LogicalPlan::Distinct [\#2792](https://github.com/apache/arrow-datafusion/pull/2792) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([mrob95](https://github.com/mrob95)) -- Add config option for coalesce_batches physical optimization rule, make optional [\#2791](https://github.com/apache/arrow-datafusion/pull/2791) ([andygrove](https://github.com/andygrove)) -- Improve readability of table scan projections in query plans \(remove `Some` and `None`\) [\#2789](https://github.com/apache/arrow-datafusion/pull/2789) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([comphead](https://github.com/comphead)) -- Simplify FilterNullJoinKeys rule [\#2781](https://github.com/apache/arrow-datafusion/pull/2781) ([andygrove](https://github.com/andygrove)) -- MINOR: re-export sqlparser from datafusion-sql crate [\#2779](https://github.com/apache/arrow-datafusion/pull/2779) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) -- Update to arrow 17.0.0 [\#2778](https://github.com/apache/arrow-datafusion/pull/2778) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) -- Support multiple paths for ListingTableScanNode [\#2775](https://github.com/apache/arrow-datafusion/pull/2775) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- Remove expr_sub_expressions and rewrite_expression functions [\#2772](https://github.com/apache/arrow-datafusion/pull/2772) ([mrob95](https://github.com/mrob95)) -- minor: update cranelift related dependencies [\#2769](https://github.com/apache/arrow-datafusion/pull/2769) ([xudong963](https://github.com/xudong963)) -- minor: panic rather than fail silently on bad dictionary in hash join [\#2767](https://github.com/apache/arrow-datafusion/pull/2767) ([alamb](https://github.com/alamb)) -- MINOR: make `prettier` use consistent between CI and contributing guide [\#2766](https://github.com/apache/arrow-datafusion/pull/2766) ([andygrove](https://github.com/andygrove)) -- Rewrite subexpressions of InSubquery in rewrite_expression [\#2765](https://github.com/apache/arrow-datafusion/pull/2765) ([mrob95](https://github.com/mrob95)) -- Support `DataType::Decimal` for `IN` and `NOT IN` expressions [\#2764](https://github.com/apache/arrow-datafusion/pull/2764) ([liukun4515](https://github.com/liukun4515)) -- Implement extensible configuration mechanism [\#2754](https://github.com/apache/arrow-datafusion/pull/2754) ([andygrove](https://github.com/andygrove)) -- Remove redundant docker argument [\#2752](https://github.com/apache/arrow-datafusion/pull/2752) ([avantgardnerio](https://github.com/avantgardnerio)) -- Add optimizer pass to reduce `left`/`right`/`full` joins to `inner` join if possible [\#2750](https://github.com/apache/arrow-datafusion/pull/2750) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([AssHero](https://github.com/AssHero)) -- MINOR: Remove legacy CLI context enum [\#2748](https://github.com/apache/arrow-datafusion/pull/2748) ([andygrove](https://github.com/andygrove)) -- CSE unit test for duplicate fields [\#2747](https://github.com/apache/arrow-datafusion/pull/2747) ([waynexia](https://github.com/waynexia)) -- MINOR: Improve unsupported data type error message [\#2745](https://github.com/apache/arrow-datafusion/pull/2745) ([andygrove](https://github.com/andygrove)) -- Add optimizer rule to filter out null keys before a join [\#2740](https://github.com/apache/arrow-datafusion/pull/2740) ([andygrove](https://github.com/andygrove)) -- Sort file names in a directory \#2730 [\#2735](https://github.com/apache/arrow-datafusion/pull/2735) ([yourenawo](https://github.com/yourenawo)) -- fix: filter push down with `InList` expressions [\#2729](https://github.com/apache/arrow-datafusion/pull/2729) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- \[Minor\] add debug info in optimizer.rs [\#2726](https://github.com/apache/arrow-datafusion/pull/2726) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- Add public API for GlobalLimitExec and LocalLimitExec [\#2722](https://github.com/apache/arrow-datafusion/pull/2722) ([andygrove](https://github.com/andygrove)) -- Add additional data types are supported in hash join [\#2721](https://github.com/apache/arrow-datafusion/pull/2721) ([AssHero](https://github.com/AssHero)) -- Upgrade to arrow `16.0.0` [\#2718](https://github.com/apache/arrow-datafusion/pull/2718) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) -- Fix clippy warnings with toolchain 1.63 [\#2717](https://github.com/apache/arrow-datafusion/pull/2717) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([waynexia](https://github.com/waynexia)) -- Support for GROUPING SETS/CUBE/ROLLUP [\#2716](https://github.com/apache/arrow-datafusion/pull/2716) ([thinkharderdev](https://github.com/thinkharderdev)) -- fix: check redundant fields while building projection plan [\#2715](https://github.com/apache/arrow-datafusion/pull/2715) ([waynexia](https://github.com/waynexia)) -- Sort preserving `SortMergeJoin` [\#2699](https://github.com/apache/arrow-datafusion/pull/2699) ([korowa](https://github.com/korowa)) -- fix: union schema fix [\#2688](https://github.com/apache/arrow-datafusion/pull/2688) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([gandronchik](https://github.com/gandronchik)) -- Support default precision and scale to`CAST AS DECIMAL` [\#2680](https://github.com/apache/arrow-datafusion/pull/2680) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([gandronchik](https://github.com/gandronchik)) +- Update CONTRIBUTING.md [\#2876](https://github.com/apache/datafusion/pull/2876) ([waitingkuo](https://github.com/waitingkuo)) +- Make LogicalPlan::Union be consistent with other plans [\#2868](https://github.com/apache/datafusion/pull/2868) ([comphead](https://github.com/comphead)) +- minor: remove unneeded files from project root [\#2863](https://github.com/apache/datafusion/pull/2863) ([kmitchener](https://github.com/kmitchener)) +- chore: make cargo clippy happy in nigtly [\#2860](https://github.com/apache/datafusion/pull/2860) [[sql](https://github.com/apache/datafusion/labels/sql)] ([xudong963](https://github.com/xudong963)) +- Update to arrow 18.0.0 [\#2856](https://github.com/apache/datafusion/pull/2856) [[sql](https://github.com/apache/datafusion/labels/sql)] ([alamb](https://github.com/alamb)) +- chore: remove ballista-related docker-compose file [\#2852](https://github.com/apache/datafusion/pull/2852) ([xudong963](https://github.com/xudong963)) +- Adding dataframe with_column function [\#2849](https://github.com/apache/datafusion/pull/2849) ([comphead](https://github.com/comphead)) +- anti joins now respect join filters [\#2843](https://github.com/apache/datafusion/pull/2843) ([andygrove](https://github.com/andygrove)) +- MINOR: make name meaningful and clean up code [\#2841](https://github.com/apache/datafusion/pull/2841) ([liukun4515](https://github.com/liukun4515)) +- Make `lit` implementation more concise [\#2838](https://github.com/apache/datafusion/pull/2838) ([alamb](https://github.com/alamb)) +- InList: set/list value must be evaluated to get the values [\#2834](https://github.com/apache/datafusion/pull/2834) ([liukun4515](https://github.com/liukun4515)) +- Add SHOW CREATE TABLE with initial support for views [\#2830](https://github.com/apache/datafusion/pull/2830) [[sql](https://github.com/apache/datafusion/labels/sql)] ([mrob95](https://github.com/mrob95)) +- Improve ergonomics of physical expr `lit` [\#2828](https://github.com/apache/datafusion/pull/2828) ([alamb](https://github.com/alamb)) +- Eliminate multi limit-offset nodes to emptyRelation [\#2823](https://github.com/apache/datafusion/pull/2823) ([AssHero](https://github.com/AssHero)) +- Fix the ci [\#2821](https://github.com/apache/datafusion/pull/2821) ([liukun4515](https://github.com/liukun4515)) +- CaseWhen: coerce the all then and else data type to a common data type [\#2819](https://github.com/apache/datafusion/pull/2819) ([liukun4515](https://github.com/liukun4515)) +- Fix `ScalarValue::isNull` calculation [\#2815](https://github.com/apache/datafusion/pull/2815) ([alamb](https://github.com/alamb)) +- Fix nullability calculation for `CASE` expressions [\#2814](https://github.com/apache/datafusion/pull/2814) ([alamb](https://github.com/alamb)) +- Bump numpy from 1.21.3 to 1.22.0 in /integration-tests [\#2811](https://github.com/apache/datafusion/pull/2811) ([xudong963](https://github.com/xudong963)) +- Fix data type calculation for `CaseExpr` s with `NULLs` [\#2810](https://github.com/apache/datafusion/pull/2810) ([AssHero](https://github.com/AssHero)) +- InList: fix bug for comparing with Null in the list using the set optimization [\#2809](https://github.com/apache/datafusion/pull/2809) ([liukun4515](https://github.com/liukun4515)) +- Use specialized dictionary kernels \(\#1178\) [\#2808](https://github.com/apache/datafusion/pull/2808) ([tustvold](https://github.com/tustvold)) +- fix schema nullability for `information_schema` schema [\#2804](https://github.com/apache/datafusion/pull/2804) ([alamb](https://github.com/alamb)) +- fix: correctly calculate join output schema nullability [\#2803](https://github.com/apache/datafusion/pull/2803) ([alamb](https://github.com/alamb)) +- Correct schema nullability declaration in tests [\#2802](https://github.com/apache/datafusion/pull/2802) ([alamb](https://github.com/alamb)) +- Don't treat Null as UTF8\(None\) and change error info. [\#2801](https://github.com/apache/datafusion/pull/2801) ([liukun4515](https://github.com/liukun4515)) +- MINOR: Remove reference to docker image that is no longer available [\#2795](https://github.com/apache/datafusion/pull/2795) ([andygrove](https://github.com/andygrove)) +- Use coerced type in inlist expr planning [\#2794](https://github.com/apache/datafusion/pull/2794) ([viirya](https://github.com/viirya)) +- Add LogicalPlan::Distinct [\#2792](https://github.com/apache/datafusion/pull/2792) [[sql](https://github.com/apache/datafusion/labels/sql)] ([mrob95](https://github.com/mrob95)) +- Add config option for coalesce_batches physical optimization rule, make optional [\#2791](https://github.com/apache/datafusion/pull/2791) ([andygrove](https://github.com/andygrove)) +- Improve readability of table scan projections in query plans \(remove `Some` and `None`\) [\#2789](https://github.com/apache/datafusion/pull/2789) [[sql](https://github.com/apache/datafusion/labels/sql)] ([comphead](https://github.com/comphead)) +- Simplify FilterNullJoinKeys rule [\#2781](https://github.com/apache/datafusion/pull/2781) ([andygrove](https://github.com/andygrove)) +- MINOR: re-export sqlparser from datafusion-sql crate [\#2779](https://github.com/apache/datafusion/pull/2779) [[sql](https://github.com/apache/datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) +- Update to arrow 17.0.0 [\#2778](https://github.com/apache/datafusion/pull/2778) [[sql](https://github.com/apache/datafusion/labels/sql)] ([alamb](https://github.com/alamb)) +- Support multiple paths for ListingTableScanNode [\#2775](https://github.com/apache/datafusion/pull/2775) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- Remove expr_sub_expressions and rewrite_expression functions [\#2772](https://github.com/apache/datafusion/pull/2772) ([mrob95](https://github.com/mrob95)) +- minor: update cranelift related dependencies [\#2769](https://github.com/apache/datafusion/pull/2769) ([xudong963](https://github.com/xudong963)) +- minor: panic rather than fail silently on bad dictionary in hash join [\#2767](https://github.com/apache/datafusion/pull/2767) ([alamb](https://github.com/alamb)) +- MINOR: make `prettier` use consistent between CI and contributing guide [\#2766](https://github.com/apache/datafusion/pull/2766) ([andygrove](https://github.com/andygrove)) +- Rewrite subexpressions of InSubquery in rewrite_expression [\#2765](https://github.com/apache/datafusion/pull/2765) ([mrob95](https://github.com/mrob95)) +- Support `DataType::Decimal` for `IN` and `NOT IN` expressions [\#2764](https://github.com/apache/datafusion/pull/2764) ([liukun4515](https://github.com/liukun4515)) +- Implement extensible configuration mechanism [\#2754](https://github.com/apache/datafusion/pull/2754) ([andygrove](https://github.com/andygrove)) +- Remove redundant docker argument [\#2752](https://github.com/apache/datafusion/pull/2752) ([avantgardnerio](https://github.com/avantgardnerio)) +- Add optimizer pass to reduce `left`/`right`/`full` joins to `inner` join if possible [\#2750](https://github.com/apache/datafusion/pull/2750) [[sql](https://github.com/apache/datafusion/labels/sql)] ([AssHero](https://github.com/AssHero)) +- MINOR: Remove legacy CLI context enum [\#2748](https://github.com/apache/datafusion/pull/2748) ([andygrove](https://github.com/andygrove)) +- CSE unit test for duplicate fields [\#2747](https://github.com/apache/datafusion/pull/2747) ([waynexia](https://github.com/waynexia)) +- MINOR: Improve unsupported data type error message [\#2745](https://github.com/apache/datafusion/pull/2745) ([andygrove](https://github.com/andygrove)) +- Add optimizer rule to filter out null keys before a join [\#2740](https://github.com/apache/datafusion/pull/2740) ([andygrove](https://github.com/andygrove)) +- Sort file names in a directory \#2730 [\#2735](https://github.com/apache/datafusion/pull/2735) ([yourenawo](https://github.com/yourenawo)) +- fix: filter push down with `InList` expressions [\#2729](https://github.com/apache/datafusion/pull/2729) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- \[Minor\] add debug info in optimizer.rs [\#2726](https://github.com/apache/datafusion/pull/2726) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- Add public API for GlobalLimitExec and LocalLimitExec [\#2722](https://github.com/apache/datafusion/pull/2722) ([andygrove](https://github.com/andygrove)) +- Add additional data types are supported in hash join [\#2721](https://github.com/apache/datafusion/pull/2721) ([AssHero](https://github.com/AssHero)) +- Upgrade to arrow `16.0.0` [\#2718](https://github.com/apache/datafusion/pull/2718) [[sql](https://github.com/apache/datafusion/labels/sql)] ([alamb](https://github.com/alamb)) +- Fix clippy warnings with toolchain 1.63 [\#2717](https://github.com/apache/datafusion/pull/2717) [[sql](https://github.com/apache/datafusion/labels/sql)] ([waynexia](https://github.com/waynexia)) +- Support for GROUPING SETS/CUBE/ROLLUP [\#2716](https://github.com/apache/datafusion/pull/2716) ([thinkharderdev](https://github.com/thinkharderdev)) +- fix: check redundant fields while building projection plan [\#2715](https://github.com/apache/datafusion/pull/2715) ([waynexia](https://github.com/waynexia)) +- Sort preserving `SortMergeJoin` [\#2699](https://github.com/apache/datafusion/pull/2699) ([korowa](https://github.com/korowa)) +- fix: union schema fix [\#2688](https://github.com/apache/datafusion/pull/2688) [[sql](https://github.com/apache/datafusion/labels/sql)] ([gandronchik](https://github.com/gandronchik)) +- Support default precision and scale to`CAST AS DECIMAL` [\#2680](https://github.com/apache/datafusion/pull/2680) [[sql](https://github.com/apache/datafusion/labels/sql)] ([gandronchik](https://github.com/gandronchik)) diff --git a/dev/changelog/11.0.0.md b/dev/changelog/11.0.0.md index 60fe648366ec..2dbb089a18dd 100644 --- a/dev/changelog/11.0.0.md +++ b/dev/changelog/11.0.0.md @@ -17,241 +17,241 @@ under the License. --> -## [11.0.0](https://github.com/apache/arrow-datafusion/tree/11.0.0) (2022-08-16) +## [11.0.0](https://github.com/apache/datafusion/tree/11.0.0) (2022-08-16) -[Full Changelog](https://github.com/apache/arrow-datafusion/compare/10.0.0-rc1...11.0.0) +[Full Changelog](https://github.com/apache/datafusion/compare/10.0.0-rc1...11.0.0) **Breaking changes:** -- Implement exact median, add `AggregateState` [\#3009](https://github.com/apache/arrow-datafusion/pull/3009) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) +- Implement exact median, add `AggregateState` [\#3009](https://github.com/apache/datafusion/pull/3009) [[sql](https://github.com/apache/datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) **Implemented enhancements:** -- Make RowAccumulator public [\#3138](https://github.com/apache/arrow-datafusion/issues/3138) -- docs: proposal for consolidating docs into a Contributor Guide [\#3127](https://github.com/apache/arrow-datafusion/issues/3127) -- feat: support Timestamp +/- Interval [\#3103](https://github.com/apache/arrow-datafusion/issues/3103) -- a `arrow_typeof` like posgresql's `pg_typeof` [\#3095](https://github.com/apache/arrow-datafusion/issues/3095) -- Add DataFrame section to user guide [\#3066](https://github.com/apache/arrow-datafusion/issues/3066) -- Document all scalar SQL functions in user guide [\#3065](https://github.com/apache/arrow-datafusion/issues/3065) -- Simplify implementation of approx_median so that it can be exposed in Python [\#3063](https://github.com/apache/arrow-datafusion/issues/3063) -- Support double quoted literal strings for dialects\(such as mysql,bigquery\) [\#3055](https://github.com/apache/arrow-datafusion/issues/3055) -- Simplify / speed up implementation of character_length to unicode points [\#3049](https://github.com/apache/arrow-datafusion/issues/3049) -- Follow-up on Clickbench benchmark [\#3048](https://github.com/apache/arrow-datafusion/issues/3048) -- Why the PhysicalPlanner is an async trait ? [\#3032](https://github.com/apache/arrow-datafusion/issues/3032) -- Optimize file stream metrics. [\#3024](https://github.com/apache/arrow-datafusion/issues/3024) -- Proposal: Enable typed strings expressions for VALUES clause [\#3017](https://github.com/apache/arrow-datafusion/issues/3017) -- Proposal: Add `date_bin` function [\#3015](https://github.com/apache/arrow-datafusion/issues/3015) -- The upcoming release of Arrow \(20?\) breaks datafusion [\#3006](https://github.com/apache/arrow-datafusion/issues/3006) -- Can I select some files for query based on the filtering rules in the directory? [\#2993](https://github.com/apache/arrow-datafusion/issues/2993) -- Rename FormatReader to FileOpener [\#2990](https://github.com/apache/arrow-datafusion/issues/2990) -- Derive `Hash` trait for `JoinType` [\#2971](https://github.com/apache/arrow-datafusion/issues/2971) -- CAST from Utf8 to Boolean [\#2967](https://github.com/apache/arrow-datafusion/issues/2967) -- Add baseline_metrics for FileStream to record metrics like elapsed time, record output, etc [\#2961](https://github.com/apache/arrow-datafusion/issues/2961) -- Example to show how to convert query result into rust struct [\#2959](https://github.com/apache/arrow-datafusion/issues/2959) -- simplify not clause [\#2957](https://github.com/apache/arrow-datafusion/issues/2957) -- Implement Debug for ColumnarValue [\#2950](https://github.com/apache/arrow-datafusion/issues/2950) -- Parallel fetching of column chunks when reading parquet files [\#2949](https://github.com/apache/arrow-datafusion/issues/2949) -- Extension mechanism for `SessionConfig` [\#2939](https://github.com/apache/arrow-datafusion/issues/2939) -- Streaming CSV/JSON Object Store Read [\#2935](https://github.com/apache/arrow-datafusion/issues/2935) -- Support CSV Limit Pushdown to Object Storage [\#2930](https://github.com/apache/arrow-datafusion/issues/2930) -- Add support for `pow` scalar function [\#2926](https://github.com/apache/arrow-datafusion/issues/2926) -- Add support for exact `median` aggregate function [\#2925](https://github.com/apache/arrow-datafusion/issues/2925) -- Support `mean` as synonym for `avg` [\#2922](https://github.com/apache/arrow-datafusion/issues/2922) -- Rename a column name [\#2919](https://github.com/apache/arrow-datafusion/issues/2919) -- Move `ScalarValue` tests alongside implementation, move `from_slice` to `core` [\#2913](https://github.com/apache/arrow-datafusion/issues/2913) -- Fail gracefully if optimization rule fails [\#2908](https://github.com/apache/arrow-datafusion/issues/2908) -- Make ObjectStoreRegistry as a trait which can allow Ballista to introduce a self registry ObjectStoreRegistry [\#2905](https://github.com/apache/arrow-datafusion/issues/2905) -- Remove datafusion-data-access crate [\#2903](https://github.com/apache/arrow-datafusion/issues/2903) -- Improve formatting of logical plans containing subquery expressions [\#2898](https://github.com/apache/arrow-datafusion/issues/2898) -- Atan2 added to built-in functions [\#2897](https://github.com/apache/arrow-datafusion/issues/2897) -- The explain statements only print logical plans for debug/other purpose. [\#2894](https://github.com/apache/arrow-datafusion/issues/2894) -- JSON version of `display_indent()` [\#2889](https://github.com/apache/arrow-datafusion/issues/2889) -- It would be nice to have a way to generate unique IDs in optimizer rules [\#2886](https://github.com/apache/arrow-datafusion/issues/2886) -- Add support for `TIME` literal values [\#2883](https://github.com/apache/arrow-datafusion/issues/2883) -- Add h2o benchmark [\#2879](https://github.com/apache/arrow-datafusion/issues/2879) -- Implement `from_unixtime` function [\#2871](https://github.com/apache/arrow-datafusion/issues/2871) -- Add `cast` function for creating logical cast expression [\#2870](https://github.com/apache/arrow-datafusion/issues/2870) -- Release DataFusion 10.0.0 [\#2862](https://github.com/apache/arrow-datafusion/issues/2862) -- Implement `information_schema.views` [\#2857](https://github.com/apache/arrow-datafusion/issues/2857) -- Migrate from avro_rs to apache_avro [\#2783](https://github.com/apache/arrow-datafusion/issues/2783) -- Add optimizer rule to remove `OFFSET 0` [\#2584](https://github.com/apache/arrow-datafusion/issues/2584) -- Preserve Element Name in ScalarValue::List [\#2450](https://github.com/apache/arrow-datafusion/issues/2450) -- Add EXISTS subquery support to Ballista [\#2338](https://github.com/apache/arrow-datafusion/issues/2338) -- Add documentation on supported functions to datafusion website [\#1487](https://github.com/apache/arrow-datafusion/issues/1487) -- documentations for datafusion-cli can be consolidated a bit more [\#1352](https://github.com/apache/arrow-datafusion/issues/1352) -- Optimizer: Predicate Rewrite pass for TPCH Q19 [\#217](https://github.com/apache/arrow-datafusion/issues/217) -- feat: add optimize rule `rewrite_disjunctive_predicate` [\#2858](https://github.com/apache/arrow-datafusion/pull/2858) ([xudong963](https://github.com/xudong963)) +- Make RowAccumulator public [\#3138](https://github.com/apache/datafusion/issues/3138) +- docs: proposal for consolidating docs into a Contributor Guide [\#3127](https://github.com/apache/datafusion/issues/3127) +- feat: support Timestamp +/- Interval [\#3103](https://github.com/apache/datafusion/issues/3103) +- a `arrow_typeof` like posgresql's `pg_typeof` [\#3095](https://github.com/apache/datafusion/issues/3095) +- Add DataFrame section to user guide [\#3066](https://github.com/apache/datafusion/issues/3066) +- Document all scalar SQL functions in user guide [\#3065](https://github.com/apache/datafusion/issues/3065) +- Simplify implementation of approx_median so that it can be exposed in Python [\#3063](https://github.com/apache/datafusion/issues/3063) +- Support double quoted literal strings for dialects\(such as mysql,bigquery\) [\#3055](https://github.com/apache/datafusion/issues/3055) +- Simplify / speed up implementation of character_length to unicode points [\#3049](https://github.com/apache/datafusion/issues/3049) +- Follow-up on Clickbench benchmark [\#3048](https://github.com/apache/datafusion/issues/3048) +- Why the PhysicalPlanner is an async trait ? [\#3032](https://github.com/apache/datafusion/issues/3032) +- Optimize file stream metrics. [\#3024](https://github.com/apache/datafusion/issues/3024) +- Proposal: Enable typed strings expressions for VALUES clause [\#3017](https://github.com/apache/datafusion/issues/3017) +- Proposal: Add `date_bin` function [\#3015](https://github.com/apache/datafusion/issues/3015) +- The upcoming release of Arrow \(20?\) breaks datafusion [\#3006](https://github.com/apache/datafusion/issues/3006) +- Can I select some files for query based on the filtering rules in the directory? [\#2993](https://github.com/apache/datafusion/issues/2993) +- Rename FormatReader to FileOpener [\#2990](https://github.com/apache/datafusion/issues/2990) +- Derive `Hash` trait for `JoinType` [\#2971](https://github.com/apache/datafusion/issues/2971) +- CAST from Utf8 to Boolean [\#2967](https://github.com/apache/datafusion/issues/2967) +- Add baseline_metrics for FileStream to record metrics like elapsed time, record output, etc [\#2961](https://github.com/apache/datafusion/issues/2961) +- Example to show how to convert query result into rust struct [\#2959](https://github.com/apache/datafusion/issues/2959) +- simplify not clause [\#2957](https://github.com/apache/datafusion/issues/2957) +- Implement Debug for ColumnarValue [\#2950](https://github.com/apache/datafusion/issues/2950) +- Parallel fetching of column chunks when reading parquet files [\#2949](https://github.com/apache/datafusion/issues/2949) +- Extension mechanism for `SessionConfig` [\#2939](https://github.com/apache/datafusion/issues/2939) +- Streaming CSV/JSON Object Store Read [\#2935](https://github.com/apache/datafusion/issues/2935) +- Support CSV Limit Pushdown to Object Storage [\#2930](https://github.com/apache/datafusion/issues/2930) +- Add support for `pow` scalar function [\#2926](https://github.com/apache/datafusion/issues/2926) +- Add support for exact `median` aggregate function [\#2925](https://github.com/apache/datafusion/issues/2925) +- Support `mean` as synonym for `avg` [\#2922](https://github.com/apache/datafusion/issues/2922) +- Rename a column name [\#2919](https://github.com/apache/datafusion/issues/2919) +- Move `ScalarValue` tests alongside implementation, move `from_slice` to `core` [\#2913](https://github.com/apache/datafusion/issues/2913) +- Fail gracefully if optimization rule fails [\#2908](https://github.com/apache/datafusion/issues/2908) +- Make ObjectStoreRegistry as a trait which can allow Ballista to introduce a self registry ObjectStoreRegistry [\#2905](https://github.com/apache/datafusion/issues/2905) +- Remove datafusion-data-access crate [\#2903](https://github.com/apache/datafusion/issues/2903) +- Improve formatting of logical plans containing subquery expressions [\#2898](https://github.com/apache/datafusion/issues/2898) +- Atan2 added to built-in functions [\#2897](https://github.com/apache/datafusion/issues/2897) +- The explain statements only print logical plans for debug/other purpose. [\#2894](https://github.com/apache/datafusion/issues/2894) +- JSON version of `display_indent()` [\#2889](https://github.com/apache/datafusion/issues/2889) +- It would be nice to have a way to generate unique IDs in optimizer rules [\#2886](https://github.com/apache/datafusion/issues/2886) +- Add support for `TIME` literal values [\#2883](https://github.com/apache/datafusion/issues/2883) +- Add h2o benchmark [\#2879](https://github.com/apache/datafusion/issues/2879) +- Implement `from_unixtime` function [\#2871](https://github.com/apache/datafusion/issues/2871) +- Add `cast` function for creating logical cast expression [\#2870](https://github.com/apache/datafusion/issues/2870) +- Release DataFusion 10.0.0 [\#2862](https://github.com/apache/datafusion/issues/2862) +- Implement `information_schema.views` [\#2857](https://github.com/apache/datafusion/issues/2857) +- Migrate from avro_rs to apache_avro [\#2783](https://github.com/apache/datafusion/issues/2783) +- Add optimizer rule to remove `OFFSET 0` [\#2584](https://github.com/apache/datafusion/issues/2584) +- Preserve Element Name in ScalarValue::List [\#2450](https://github.com/apache/datafusion/issues/2450) +- Add EXISTS subquery support to Ballista [\#2338](https://github.com/apache/datafusion/issues/2338) +- Add documentation on supported functions to datafusion website [\#1487](https://github.com/apache/datafusion/issues/1487) +- documentations for datafusion-cli can be consolidated a bit more [\#1352](https://github.com/apache/datafusion/issues/1352) +- Optimizer: Predicate Rewrite pass for TPCH Q19 [\#217](https://github.com/apache/datafusion/issues/217) +- feat: add optimize rule `rewrite_disjunctive_predicate` [\#2858](https://github.com/apache/datafusion/pull/2858) ([xudong963](https://github.com/xudong963)) **Fixed bugs:** -- Regression in SQL support for `ORDER BY` and aliased expressions [\#3160](https://github.com/apache/arrow-datafusion/issues/3160) -- panic when deal with `@` operator [\#3137](https://github.com/apache/arrow-datafusion/issues/3137) -- Incorrect type coercion rule for date + interval [\#3093](https://github.com/apache/arrow-datafusion/issues/3093) -- Cast string to timestamp crash while we input time before 1970 with floating number second [\#3082](https://github.com/apache/arrow-datafusion/issues/3082) -- INTEGER type does't work while importing csv [\#3059](https://github.com/apache/arrow-datafusion/issues/3059) -- Cannot GROUP BY Binary [\#3050](https://github.com/apache/arrow-datafusion/issues/3050) -- incorrect i32 coercion for `to_timestamp` [\#3046](https://github.com/apache/arrow-datafusion/issues/3046) -- Error pruning `IsNull` expressions: Column 'instance_null_count' is declared as non-nullable but contains null values [\#3042](https://github.com/apache/arrow-datafusion/issues/3042) -- I want to query some files in a directory. Is there any way? [\#3013](https://github.com/apache/arrow-datafusion/issues/3013) -- The expression to get an indexed field is only valid for `List` types \(`common_sub_expression_eliminate`\) [\#3002](https://github.com/apache/arrow-datafusion/issues/3002) -- Double to_timestamp_seconds produces abnormal result [\#2998](https://github.com/apache/arrow-datafusion/issues/2998) -- External parquet table fails when schema contains differing key / value metadata [\#2982](https://github.com/apache/arrow-datafusion/issues/2982) -- SELECT on column with uppercase column name fails with FieldNotFound error [\#2978](https://github.com/apache/arrow-datafusion/issues/2978) -- panic reading AWS-generated parquet file [\#2963](https://github.com/apache/arrow-datafusion/issues/2963) -- Can't filter rowgroup for parquet prune for some data type [\#2962](https://github.com/apache/arrow-datafusion/issues/2962) -- CI test is failing with ` final link failed: No space left on device` [\#2947](https://github.com/apache/arrow-datafusion/issues/2947) -- bug: new ObjectStore breaks backward compatibility with contrib plugins [\#2931](https://github.com/apache/arrow-datafusion/issues/2931) -- bug: file types handled wrong [\#2929](https://github.com/apache/arrow-datafusion/issues/2929) -- bug: changing the number of partitions does not increase concurrency [\#2928](https://github.com/apache/arrow-datafusion/issues/2928) -- csv_explain fails on RC verifier [\#2916](https://github.com/apache/arrow-datafusion/issues/2916) -- index out of range error from datafusion_row::write::write_field [\#2910](https://github.com/apache/arrow-datafusion/issues/2910) -- Optimization rule `CommonSubexprEliminate` creates invalid projections [\#2907](https://github.com/apache/arrow-datafusion/issues/2907) -- serde_json requires that either `std` \(default\) or `alloc` feature is enabled [\#2896](https://github.com/apache/arrow-datafusion/issues/2896) -- Inconsistent type coercion rules with comparison expressions [\#2890](https://github.com/apache/arrow-datafusion/issues/2890) -- Doc Error: the test directory link 404 which is in CONTRIBUTING.md [\#2880](https://github.com/apache/arrow-datafusion/issues/2880) -- Round trips through `ScalarValue`'s sometimes don't preserve types \(e.g. change types from `DictionaryArray`\) [\#2874](https://github.com/apache/arrow-datafusion/issues/2874) -- Error with CASE and DictionaryArrays: `ArrowError(InvalidArgumentError("arguments need to have the same data type"))` [\#2873](https://github.com/apache/arrow-datafusion/issues/2873) -- window functions not supported in expressions [\#2869](https://github.com/apache/arrow-datafusion/issues/2869) -- Unable to work with month intervals [\#2796](https://github.com/apache/arrow-datafusion/issues/2796) -- Discord invite link in communication page has expired [\#2743](https://github.com/apache/arrow-datafusion/issues/2743) -- Test \(path normalization\) failures while verifying release candidate 9.0.0 RC1 [\#2719](https://github.com/apache/arrow-datafusion/issues/2719) -- Reading parquet with \(pre-release\) arrow fails with "out of order projection is not supported" [\#2543](https://github.com/apache/arrow-datafusion/issues/2543) -- Fix SQL planner bug when resolving columns with same name as a relation [\#3003](https://github.com/apache/arrow-datafusion/pull/3003) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) -- fix `RowWriter` index out of bounds error [\#2968](https://github.com/apache/arrow-datafusion/pull/2968) ([comphead](https://github.com/comphead)) -- fix: support decimal statistic for row group prune [\#2966](https://github.com/apache/arrow-datafusion/pull/2966) ([liukun4515](https://github.com/liukun4515)) -- Fix invalid projection in `CommonSubexprEliminate` [\#2915](https://github.com/apache/arrow-datafusion/pull/2915) ([andygrove](https://github.com/andygrove)) +- Regression in SQL support for `ORDER BY` and aliased expressions [\#3160](https://github.com/apache/datafusion/issues/3160) +- panic when deal with `@` operator [\#3137](https://github.com/apache/datafusion/issues/3137) +- Incorrect type coercion rule for date + interval [\#3093](https://github.com/apache/datafusion/issues/3093) +- Cast string to timestamp crash while we input time before 1970 with floating number second [\#3082](https://github.com/apache/datafusion/issues/3082) +- INTEGER type does't work while importing csv [\#3059](https://github.com/apache/datafusion/issues/3059) +- Cannot GROUP BY Binary [\#3050](https://github.com/apache/datafusion/issues/3050) +- incorrect i32 coercion for `to_timestamp` [\#3046](https://github.com/apache/datafusion/issues/3046) +- Error pruning `IsNull` expressions: Column 'instance_null_count' is declared as non-nullable but contains null values [\#3042](https://github.com/apache/datafusion/issues/3042) +- I want to query some files in a directory. Is there any way? [\#3013](https://github.com/apache/datafusion/issues/3013) +- The expression to get an indexed field is only valid for `List` types \(`common_sub_expression_eliminate`\) [\#3002](https://github.com/apache/datafusion/issues/3002) +- Double to_timestamp_seconds produces abnormal result [\#2998](https://github.com/apache/datafusion/issues/2998) +- External parquet table fails when schema contains differing key / value metadata [\#2982](https://github.com/apache/datafusion/issues/2982) +- SELECT on column with uppercase column name fails with FieldNotFound error [\#2978](https://github.com/apache/datafusion/issues/2978) +- panic reading AWS-generated parquet file [\#2963](https://github.com/apache/datafusion/issues/2963) +- Can't filter rowgroup for parquet prune for some data type [\#2962](https://github.com/apache/datafusion/issues/2962) +- CI test is failing with ` final link failed: No space left on device` [\#2947](https://github.com/apache/datafusion/issues/2947) +- bug: new ObjectStore breaks backward compatibility with contrib plugins [\#2931](https://github.com/apache/datafusion/issues/2931) +- bug: file types handled wrong [\#2929](https://github.com/apache/datafusion/issues/2929) +- bug: changing the number of partitions does not increase concurrency [\#2928](https://github.com/apache/datafusion/issues/2928) +- csv_explain fails on RC verifier [\#2916](https://github.com/apache/datafusion/issues/2916) +- index out of range error from datafusion_row::write::write_field [\#2910](https://github.com/apache/datafusion/issues/2910) +- Optimization rule `CommonSubexprEliminate` creates invalid projections [\#2907](https://github.com/apache/datafusion/issues/2907) +- serde_json requires that either `std` \(default\) or `alloc` feature is enabled [\#2896](https://github.com/apache/datafusion/issues/2896) +- Inconsistent type coercion rules with comparison expressions [\#2890](https://github.com/apache/datafusion/issues/2890) +- Doc Error: the test directory link 404 which is in CONTRIBUTING.md [\#2880](https://github.com/apache/datafusion/issues/2880) +- Round trips through `ScalarValue`'s sometimes don't preserve types \(e.g. change types from `DictionaryArray`\) [\#2874](https://github.com/apache/datafusion/issues/2874) +- Error with CASE and DictionaryArrays: `ArrowError(InvalidArgumentError("arguments need to have the same data type"))` [\#2873](https://github.com/apache/datafusion/issues/2873) +- window functions not supported in expressions [\#2869](https://github.com/apache/datafusion/issues/2869) +- Unable to work with month intervals [\#2796](https://github.com/apache/datafusion/issues/2796) +- Discord invite link in communication page has expired [\#2743](https://github.com/apache/datafusion/issues/2743) +- Test \(path normalization\) failures while verifying release candidate 9.0.0 RC1 [\#2719](https://github.com/apache/datafusion/issues/2719) +- Reading parquet with \(pre-release\) arrow fails with "out of order projection is not supported" [\#2543](https://github.com/apache/datafusion/issues/2543) +- Fix SQL planner bug when resolving columns with same name as a relation [\#3003](https://github.com/apache/datafusion/pull/3003) [[sql](https://github.com/apache/datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) +- fix `RowWriter` index out of bounds error [\#2968](https://github.com/apache/datafusion/pull/2968) ([comphead](https://github.com/comphead)) +- fix: support decimal statistic for row group prune [\#2966](https://github.com/apache/datafusion/pull/2966) ([liukun4515](https://github.com/liukun4515)) +- Fix invalid projection in `CommonSubexprEliminate` [\#2915](https://github.com/apache/datafusion/pull/2915) ([andygrove](https://github.com/andygrove)) **Documentation updates:** -- MINOR: Fix broken links in contrib guide [\#3135](https://github.com/apache/arrow-datafusion/pull/3135) ([andygrove](https://github.com/andygrove)) -- MINOR: User Guide: Move expressions to top-level page [\#3134](https://github.com/apache/arrow-datafusion/pull/3134) ([andygrove](https://github.com/andygrove)) -- User Guide: Combine CLI pages [\#3133](https://github.com/apache/arrow-datafusion/pull/3133) ([andygrove](https://github.com/andygrove)) -- User Guide: Add documentation for JOIN syntax [\#3130](https://github.com/apache/arrow-datafusion/pull/3130) ([andygrove](https://github.com/andygrove)) -- separate contributors guide [\#3128](https://github.com/apache/arrow-datafusion/pull/3128) ([kmitchener](https://github.com/kmitchener)) -- minor: remove python docs, now they're in another project [\#3119](https://github.com/apache/arrow-datafusion/pull/3119) ([kmitchener](https://github.com/kmitchener)) -- minor: doc fixes: fix link to datafusion-python project and add link to slides for rece… [\#3118](https://github.com/apache/arrow-datafusion/pull/3118) ([kmitchener](https://github.com/kmitchener)) -- Add all scalar SQL functions to user guide [\#3090](https://github.com/apache/arrow-datafusion/pull/3090) ([andygrove](https://github.com/andygrove)) -- Add DataFrame reference to the user guide [\#3067](https://github.com/apache/arrow-datafusion/pull/3067) ([andygrove](https://github.com/andygrove)) -- MINOR: Add CeresDB to list of products using DataFusion [\#3060](https://github.com/apache/arrow-datafusion/pull/3060) ([andygrove](https://github.com/andygrove)) -- Minor: improve some docstrings about pruning [\#3041](https://github.com/apache/arrow-datafusion/pull/3041) ([alamb](https://github.com/alamb)) -- doc: add a new video link about datafusion [\#3025](https://github.com/apache/arrow-datafusion/pull/3025) ([xudong963](https://github.com/xudong963)) -- Update README.md to add CnosDB into the Known Uses [\#2933](https://github.com/apache/arrow-datafusion/pull/2933) ([cnoshb](https://github.com/cnoshb)) +- MINOR: Fix broken links in contrib guide [\#3135](https://github.com/apache/datafusion/pull/3135) ([andygrove](https://github.com/andygrove)) +- MINOR: User Guide: Move expressions to top-level page [\#3134](https://github.com/apache/datafusion/pull/3134) ([andygrove](https://github.com/andygrove)) +- User Guide: Combine CLI pages [\#3133](https://github.com/apache/datafusion/pull/3133) ([andygrove](https://github.com/andygrove)) +- User Guide: Add documentation for JOIN syntax [\#3130](https://github.com/apache/datafusion/pull/3130) ([andygrove](https://github.com/andygrove)) +- separate contributors guide [\#3128](https://github.com/apache/datafusion/pull/3128) ([kmitchener](https://github.com/kmitchener)) +- minor: remove python docs, now they're in another project [\#3119](https://github.com/apache/datafusion/pull/3119) ([kmitchener](https://github.com/kmitchener)) +- minor: doc fixes: fix link to datafusion-python project and add link to slides for rece… [\#3118](https://github.com/apache/datafusion/pull/3118) ([kmitchener](https://github.com/kmitchener)) +- Add all scalar SQL functions to user guide [\#3090](https://github.com/apache/datafusion/pull/3090) ([andygrove](https://github.com/andygrove)) +- Add DataFrame reference to the user guide [\#3067](https://github.com/apache/datafusion/pull/3067) ([andygrove](https://github.com/andygrove)) +- MINOR: Add CeresDB to list of products using DataFusion [\#3060](https://github.com/apache/datafusion/pull/3060) ([andygrove](https://github.com/andygrove)) +- Minor: improve some docstrings about pruning [\#3041](https://github.com/apache/datafusion/pull/3041) ([alamb](https://github.com/alamb)) +- doc: add a new video link about datafusion [\#3025](https://github.com/apache/datafusion/pull/3025) ([xudong963](https://github.com/xudong963)) +- Update README.md to add CnosDB into the Known Uses [\#2933](https://github.com/apache/datafusion/pull/2933) ([cnoshb](https://github.com/cnoshb)) **Performance improvements:** -- Use code points instead of grapheme clusters for string functions [\#3054](https://github.com/apache/arrow-datafusion/pull/3054) ([Dandandan](https://github.com/Dandandan)) +- Use code points instead of grapheme clusters for string functions [\#3054](https://github.com/apache/datafusion/pull/3054) ([Dandandan](https://github.com/Dandandan)) **Closed issues:** -- Rename `do_data_time_math()` to `do_date_time_math()` [\#3172](https://github.com/apache/arrow-datafusion/issues/3172) -- Automatic version updates for github actions with dependabot [\#3106](https://github.com/apache/arrow-datafusion/issues/3106) -- \[EPIC\] Proposal for Date/Time enhancement [\#3100](https://github.com/apache/arrow-datafusion/issues/3100) -- Upgrade prost/tonic everywhere [\#3028](https://github.com/apache/arrow-datafusion/issues/3028) -- \[Question\] interested in helping with documentation [\#2866](https://github.com/apache/arrow-datafusion/issues/2866) -- Introducing a new optimizer framework for datafusion. [\#2633](https://github.com/apache/arrow-datafusion/issues/2633) -- Enable discussion tab? [\#2350](https://github.com/apache/arrow-datafusion/issues/2350) -- Add support for AVG\(Timestamp\) types [\#200](https://github.com/apache/arrow-datafusion/issues/200) -- TPC-H Query 22 [\#175](https://github.com/apache/arrow-datafusion/issues/175) -- TPC-H Query 21 [\#172](https://github.com/apache/arrow-datafusion/issues/172) -- TPC-H Query 20 [\#171](https://github.com/apache/arrow-datafusion/issues/171) -- TPC-H Query 17 [\#168](https://github.com/apache/arrow-datafusion/issues/168) -- TPC-H Query 11 [\#163](https://github.com/apache/arrow-datafusion/issues/163) -- TPC-H Query 4 [\#160](https://github.com/apache/arrow-datafusion/issues/160) -- TPC-H Query 2 [\#159](https://github.com/apache/arrow-datafusion/issues/159) -- \[Datafusion\] Optimize literal expression evaluation [\#106](https://github.com/apache/arrow-datafusion/issues/106) +- Rename `do_data_time_math()` to `do_date_time_math()` [\#3172](https://github.com/apache/datafusion/issues/3172) +- Automatic version updates for github actions with dependabot [\#3106](https://github.com/apache/datafusion/issues/3106) +- \[EPIC\] Proposal for Date/Time enhancement [\#3100](https://github.com/apache/datafusion/issues/3100) +- Upgrade prost/tonic everywhere [\#3028](https://github.com/apache/datafusion/issues/3028) +- \[Question\] interested in helping with documentation [\#2866](https://github.com/apache/datafusion/issues/2866) +- Introducing a new optimizer framework for datafusion. [\#2633](https://github.com/apache/datafusion/issues/2633) +- Enable discussion tab? [\#2350](https://github.com/apache/datafusion/issues/2350) +- Add support for AVG\(Timestamp\) types [\#200](https://github.com/apache/datafusion/issues/200) +- TPC-H Query 22 [\#175](https://github.com/apache/datafusion/issues/175) +- TPC-H Query 21 [\#172](https://github.com/apache/datafusion/issues/172) +- TPC-H Query 20 [\#171](https://github.com/apache/datafusion/issues/171) +- TPC-H Query 17 [\#168](https://github.com/apache/datafusion/issues/168) +- TPC-H Query 11 [\#163](https://github.com/apache/datafusion/issues/163) +- TPC-H Query 4 [\#160](https://github.com/apache/datafusion/issues/160) +- TPC-H Query 2 [\#159](https://github.com/apache/datafusion/issues/159) +- \[Datafusion\] Optimize literal expression evaluation [\#106](https://github.com/apache/datafusion/issues/106) **Merged pull requests:** -- Rename do_data_time_math\(\) to do_date_time_math\(\) [\#3173](https://github.com/apache/arrow-datafusion/pull/3173) ([JasonLi-cn](https://github.com/JasonLi-cn)) -- \[Minor\] Remove some redundant code [\#3169](https://github.com/apache/arrow-datafusion/pull/3169) ([alamb](https://github.com/alamb)) -- Support `INTEGER` again in addition to `INT` in `CREATE TABLE` and `CAST` statements [\#3167](https://github.com/apache/arrow-datafusion/pull/3167) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) -- Fix regression in SQL parser related to resolution of aliased expressions [\#3165](https://github.com/apache/arrow-datafusion/pull/3165) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) -- update cargo lock [\#3164](https://github.com/apache/arrow-datafusion/pull/3164) ([waitingkuo](https://github.com/waitingkuo)) -- add test case for cast_timestamp_before_1970 [\#3163](https://github.com/apache/arrow-datafusion/pull/3163) ([waitingkuo](https://github.com/waitingkuo)) -- Return proper error message for ill formed variable reference [\#3162](https://github.com/apache/arrow-datafusion/pull/3162) ([alamb](https://github.com/alamb)) -- Remove outdated license text left over from arrow repo [\#3154](https://github.com/apache/arrow-datafusion/pull/3154) ([alamb](https://github.com/alamb)) -- Expose RowAccumulator in physical_plan [\#3151](https://github.com/apache/arrow-datafusion/pull/3151) ([iajoiner](https://github.com/iajoiner)) -- Rename `DateIntervalExpr` to `DateTimeIntervalExpr` [\#3150](https://github.com/apache/arrow-datafusion/pull/3150) ([alamb](https://github.com/alamb)) -- Bump actions/labeler from 4.0.0 to 4.0.1 [\#3144](https://github.com/apache/arrow-datafusion/pull/3144) ([dependabot[bot]](https://github.com/apps/dependabot)) -- User Guide: Add documentation for subquery syntax [\#3132](https://github.com/apache/arrow-datafusion/pull/3132) ([andygrove](https://github.com/andygrove)) -- MINOR: User Guide: Move Data Types and Information Schema to their own pages [\#3131](https://github.com/apache/arrow-datafusion/pull/3131) ([andygrove](https://github.com/andygrove)) -- Minor: Clean up `array` test [\#3121](https://github.com/apache/arrow-datafusion/pull/3121) ([alamb](https://github.com/alamb)) -- add arrow_typeof [\#3120](https://github.com/apache/arrow-datafusion/pull/3120) ([waitingkuo](https://github.com/waitingkuo)) -- Bump actions/labeler from 2.2.0 to 4.0.0 [\#3114](https://github.com/apache/arrow-datafusion/pull/3114) ([dependabot[bot]](https://github.com/apps/dependabot)) -- Bump actions/checkout from 2 to 3 [\#3113](https://github.com/apache/arrow-datafusion/pull/3113) ([dependabot[bot]](https://github.com/apps/dependabot)) -- Bump actions/setup-node from 2 to 3 [\#3112](https://github.com/apache/arrow-datafusion/pull/3112) ([dependabot[bot]](https://github.com/apps/dependabot)) -- Bump actions/setup-python from 3 to 4 [\#3111](https://github.com/apache/arrow-datafusion/pull/3111) ([dependabot[bot]](https://github.com/apps/dependabot)) -- Feature/support timestamp plus minus interval [\#3110](https://github.com/apache/arrow-datafusion/pull/3110) ([JasonLi-cn](https://github.com/JasonLi-cn)) -- docs: fix typo [\#3109](https://github.com/apache/arrow-datafusion/pull/3109) ([dzvon](https://github.com/dzvon)) -- Remove offset if its zero [\#3102](https://github.com/apache/arrow-datafusion/pull/3102) ([turbo1912](https://github.com/turbo1912)) -- Hash binary values [\#3098](https://github.com/apache/arrow-datafusion/pull/3098) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([Dandandan](https://github.com/Dandandan)) -- Update to object_store 0.4 [\#3089](https://github.com/apache/arrow-datafusion/pull/3089) ([tustvold](https://github.com/tustvold)) -- Add cast function for creating cast expression [\#3084](https://github.com/apache/arrow-datafusion/pull/3084) ([turbo1912](https://github.com/turbo1912)) -- Upgrade to arrow 20.0.0 \(but no change to object_store\), including `prost`, and `tonic` [\#3083](https://github.com/apache/arrow-datafusion/pull/3083) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([avantgardnerio](https://github.com/avantgardnerio)) -- impl Debug for ColumnarValue, add some docs [\#3076](https://github.com/apache/arrow-datafusion/pull/3076) ([alamb](https://github.com/alamb)) -- \[Minor\] run cargo update in datafusion-cli directory [\#3075](https://github.com/apache/arrow-datafusion/pull/3075) ([alamb](https://github.com/alamb)) -- update cargo.lock in `datafusion-cli` [\#3074](https://github.com/apache/arrow-datafusion/pull/3074) ([waitingkuo](https://github.com/waitingkuo)) -- Update sql parser to v0.20.0 [\#3072](https://github.com/apache/arrow-datafusion/pull/3072) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([waitingkuo](https://github.com/waitingkuo)) -- Add opening, scanning, processing metrics in file stream [\#3070](https://github.com/apache/arrow-datafusion/pull/3070) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- Simplify `approx_median` implementation, expose via `DataFrame` API [\#3064](https://github.com/apache/arrow-datafusion/pull/3064) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) -- docs: fix PruningStatistics example and some typos [\#3062](https://github.com/apache/arrow-datafusion/pull/3062) ([roeap](https://github.com/roeap)) -- feat: support double quoted literal strings for dialects\(such as mysql,bigquery,spark\) [\#3056](https://github.com/apache/arrow-datafusion/pull/3056) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([Rachelint](https://github.com/Rachelint)) -- Allow Overriding AsyncFileReader used by ParquetExec [\#3051](https://github.com/apache/arrow-datafusion/pull/3051) ([Cheappie](https://github.com/Cheappie)) -- to_timestamp i32 coerced to i64 [\#3047](https://github.com/apache/arrow-datafusion/pull/3047) ([waitingkuo](https://github.com/waitingkuo)) -- Fix `IsNull` pruning expression generation without null_count statistics [\#3044](https://github.com/apache/arrow-datafusion/pull/3044) ([alamb](https://github.com/alamb)) -- feat: Support `week`, `decade`, `century` for Interval literal [\#3038](https://github.com/apache/arrow-datafusion/pull/3038) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([ovr](https://github.com/ovr)) -- feat: Support Binary bitwise shift operators \(\<\< and \>\>\) [\#3037](https://github.com/apache/arrow-datafusion/pull/3037) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([ovr](https://github.com/ovr)) -- Use concat_elements_utf8 from arrow rather than custom kernel [\#3036](https://github.com/apache/arrow-datafusion/pull/3036) ([alamb](https://github.com/alamb)) -- minor: update minimal rust version to 1.62, matching arrow-rs [\#3035](https://github.com/apache/arrow-datafusion/pull/3035) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([kmitchener](https://github.com/kmitchener)) -- feat: Add `date_bin` built-in function [\#3034](https://github.com/apache/arrow-datafusion/pull/3034) ([stuartcarnie](https://github.com/stuartcarnie)) -- Split `binary_expr.rs` into smaller modules [\#3026](https://github.com/apache/arrow-datafusion/pull/3026) ([alamb](https://github.com/alamb)) -- feat: Enable typed strings expressions for VALUES clause [\#3018](https://github.com/apache/arrow-datafusion/pull/3018) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([stuartcarnie](https://github.com/stuartcarnie)) -- fix typo for PR3003 [\#3011](https://github.com/apache/arrow-datafusion/pull/3011) ([waitingkuo](https://github.com/waitingkuo)) -- feat: Add support for TIME literal values [\#3010](https://github.com/apache/arrow-datafusion/pull/3010) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([stuartcarnie](https://github.com/stuartcarnie)) -- add TimeUnit::Second as signature for ToTimestampSeconds [\#3004](https://github.com/apache/arrow-datafusion/pull/3004) ([waitingkuo](https://github.com/waitingkuo)) -- Rename FileReader to FileOpener \(\#2990\) [\#2991](https://github.com/apache/arrow-datafusion/pull/2991) ([tustvold](https://github.com/tustvold)) -- minor: collation the prune test [\#2986](https://github.com/apache/arrow-datafusion/pull/2986) ([liukun4515](https://github.com/liukun4515)) -- Optionally skip metadata from schema when merging parquet files [\#2985](https://github.com/apache/arrow-datafusion/pull/2985) ([alamb](https://github.com/alamb)) -- \[Minor\] Extract interval parsing logic, add unit tests [\#2984](https://github.com/apache/arrow-datafusion/pull/2984) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) -- Update sqlparser to 0.19 [\#2981](https://github.com/apache/arrow-datafusion/pull/2981) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) -- test: add file/SQL level test for pruning parquet row group with decimal data type. [\#2977](https://github.com/apache/arrow-datafusion/pull/2977) ([liukun4515](https://github.com/liukun4515)) -- Derive Hash for JoinType [\#2972](https://github.com/apache/arrow-datafusion/pull/2972) ([liurenjie1024](https://github.com/liurenjie1024)) -- Example that shows how to convert query result into rust struct \#2959 [\#2969](https://github.com/apache/arrow-datafusion/pull/2969) ([thomas-k-cameron](https://github.com/thomas-k-cameron)) -- Add baseline_metrics for FileStream to record metrics like elapsed ti… [\#2965](https://github.com/apache/arrow-datafusion/pull/2965) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- test: add test for decimal and pruning for decimal column [\#2960](https://github.com/apache/arrow-datafusion/pull/2960) ([liukun4515](https://github.com/liukun4515)) -- Simplify expressions with `NOT` clause [\#2958](https://github.com/apache/arrow-datafusion/pull/2958) ([AssHero](https://github.com/AssHero)) -- chore: update jit-related dependencies [\#2956](https://github.com/apache/arrow-datafusion/pull/2956) ([xudong963](https://github.com/xudong963)) -- Update to arrow `19.0.0` [\#2955](https://github.com/apache/arrow-datafusion/pull/2955) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) -- Remove CI Caching to preserve diskspace [\#2948](https://github.com/apache/arrow-datafusion/pull/2948) ([alamb](https://github.com/alamb)) -- Add metadata_size_hint for optimistic fetching of parquet metadata [\#2946](https://github.com/apache/arrow-datafusion/pull/2946) ([thinkharderdev](https://github.com/thinkharderdev)) -- Minor: Remove left over debugging statement [\#2944](https://github.com/apache/arrow-datafusion/pull/2944) ([alamb](https://github.com/alamb)) -- add Atan2 [\#2942](https://github.com/apache/arrow-datafusion/pull/2942) ([waitingkuo](https://github.com/waitingkuo)) -- Use `Arc` and remove ObjectStoreRegistry::clone [\#2941](https://github.com/apache/arrow-datafusion/pull/2941) ([tustvold](https://github.com/tustvold)) -- add extension system to `SessionConfig` [\#2940](https://github.com/apache/arrow-datafusion/pull/2940) ([crepererum](https://github.com/crepererum)) -- Update prost-build requirement from 0.7 to 0.10 [\#2937](https://github.com/apache/arrow-datafusion/pull/2937) ([dependabot[bot]](https://github.com/apps/dependabot)) -- Add streaming JSON and CSV reading, `NewlineDelimitedStream' \(\#2935\) [\#2936](https://github.com/apache/arrow-datafusion/pull/2936) ([tustvold](https://github.com/tustvold)) -- feat\(catalog\): Implement information_schema.views [\#2934](https://github.com/apache/arrow-datafusion/pull/2934) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([BaymaxHWY](https://github.com/BaymaxHWY)) -- Support `window` functions in expressions by re-write projection after building window plan [\#2932](https://github.com/apache/arrow-datafusion/pull/2932) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([AssHero](https://github.com/AssHero)) -- Add pow as synonym for power [\#2927](https://github.com/apache/arrow-datafusion/pull/2927) ([andygrove](https://github.com/andygrove)) -- Add `from_unixtime` function [\#2924](https://github.com/apache/arrow-datafusion/pull/2924) ([waitingkuo](https://github.com/waitingkuo)) -- fix\(aggregate\): support mean as synonym avg [\#2923](https://github.com/apache/arrow-datafusion/pull/2923) ([BaymaxHWY](https://github.com/BaymaxHWY)) -- Add `DataFrame::with_column_renamed` [\#2920](https://github.com/apache/arrow-datafusion/pull/2920) ([andygrove](https://github.com/andygrove)) -- Run clippy with optional features [\#2918](https://github.com/apache/arrow-datafusion/pull/2918) ([tustvold](https://github.com/tustvold)) -- Fix release verification script by not overriding `ARROW_TEST_DATA` or `PARQUET_TEST_DATA` [\#2917](https://github.com/apache/arrow-datafusion/pull/2917) ([alamb](https://github.com/alamb)) -- Move `ScalarValue` tests alongside implementation, move `from_slice` to `datafusion_core` [\#2914](https://github.com/apache/arrow-datafusion/pull/2914) ([alamb](https://github.com/alamb)) -- Optimizer should have option to skip failing rules [\#2909](https://github.com/apache/arrow-datafusion/pull/2909) ([andygrove](https://github.com/andygrove)) -- Introduce ObjectStoreProvider to create an object store based on the url [\#2906](https://github.com/apache/arrow-datafusion/pull/2906) ([yahoNanJing](https://github.com/yahoNanJing)) -- Remove datafusion-data-access crate [\#2904](https://github.com/apache/arrow-datafusion/pull/2904) ([yahoNanJing](https://github.com/yahoNanJing)) -- Combine all comparison coercion rules [\#2901](https://github.com/apache/arrow-datafusion/pull/2901) ([andygrove](https://github.com/andygrove)) -- Add `Projection::try_new` and `Projection::try_new_with_schema` [\#2900](https://github.com/apache/arrow-datafusion/pull/2900) ([andygrove](https://github.com/andygrove)) -- Improve formatting of logical plans containing subqueries [\#2899](https://github.com/apache/arrow-datafusion/pull/2899) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) -- add session option 'datafusion.explain.logical_plan'. when set to true, the explain statement will only print logical plans. [\#2895](https://github.com/apache/arrow-datafusion/pull/2895) ([AssHero](https://github.com/AssHero)) -- Preserve field name in `ScalarValue::List` [\#2893](https://github.com/apache/arrow-datafusion/pull/2893) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([comphead](https://github.com/comphead)) -- Adds optional serde support to datafusion-proto [\#2892](https://github.com/apache/arrow-datafusion/pull/2892) ([tustvold](https://github.com/tustvold)) -- Implement `ScalarValue::Dictionary` and preserve type through conversion back/forth to Array [\#2891](https://github.com/apache/arrow-datafusion/pull/2891) ([alamb](https://github.com/alamb)) -- Add an ID generator in preparation for PR 2885 [\#2887](https://github.com/apache/arrow-datafusion/pull/2887) ([avantgardnerio](https://github.com/avantgardnerio)) -- Add support for correlated subqueries & fix all related TPC-H benchmark issues [\#2885](https://github.com/apache/arrow-datafusion/pull/2885) ([avantgardnerio](https://github.com/avantgardnerio)) -- fix\(doc\): update test directory link in CONTRIBUTING.md [\#2882](https://github.com/apache/arrow-datafusion/pull/2882) ([BaymaxHWY](https://github.com/BaymaxHWY)) -- Add h2o bench groupby queries [\#2881](https://github.com/apache/arrow-datafusion/pull/2881) ([andygrove](https://github.com/andygrove)) -- Add support for month & year intervals [\#2797](https://github.com/apache/arrow-datafusion/pull/2797) ([avantgardnerio](https://github.com/avantgardnerio)) -- Migrate from avro_rs \(0.13\) to apache_avro \(0.14\) [\#2784](https://github.com/apache/arrow-datafusion/pull/2784) ([martin-g](https://github.com/martin-g)) +- Rename do_data_time_math\(\) to do_date_time_math\(\) [\#3173](https://github.com/apache/datafusion/pull/3173) ([JasonLi-cn](https://github.com/JasonLi-cn)) +- \[Minor\] Remove some redundant code [\#3169](https://github.com/apache/datafusion/pull/3169) ([alamb](https://github.com/alamb)) +- Support `INTEGER` again in addition to `INT` in `CREATE TABLE` and `CAST` statements [\#3167](https://github.com/apache/datafusion/pull/3167) [[sql](https://github.com/apache/datafusion/labels/sql)] ([alamb](https://github.com/alamb)) +- Fix regression in SQL parser related to resolution of aliased expressions [\#3165](https://github.com/apache/datafusion/pull/3165) [[sql](https://github.com/apache/datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) +- update cargo lock [\#3164](https://github.com/apache/datafusion/pull/3164) ([waitingkuo](https://github.com/waitingkuo)) +- add test case for cast_timestamp_before_1970 [\#3163](https://github.com/apache/datafusion/pull/3163) ([waitingkuo](https://github.com/waitingkuo)) +- Return proper error message for ill formed variable reference [\#3162](https://github.com/apache/datafusion/pull/3162) ([alamb](https://github.com/alamb)) +- Remove outdated license text left over from arrow repo [\#3154](https://github.com/apache/datafusion/pull/3154) ([alamb](https://github.com/alamb)) +- Expose RowAccumulator in physical_plan [\#3151](https://github.com/apache/datafusion/pull/3151) ([iajoiner](https://github.com/iajoiner)) +- Rename `DateIntervalExpr` to `DateTimeIntervalExpr` [\#3150](https://github.com/apache/datafusion/pull/3150) ([alamb](https://github.com/alamb)) +- Bump actions/labeler from 4.0.0 to 4.0.1 [\#3144](https://github.com/apache/datafusion/pull/3144) ([dependabot[bot]](https://github.com/apps/dependabot)) +- User Guide: Add documentation for subquery syntax [\#3132](https://github.com/apache/datafusion/pull/3132) ([andygrove](https://github.com/andygrove)) +- MINOR: User Guide: Move Data Types and Information Schema to their own pages [\#3131](https://github.com/apache/datafusion/pull/3131) ([andygrove](https://github.com/andygrove)) +- Minor: Clean up `array` test [\#3121](https://github.com/apache/datafusion/pull/3121) ([alamb](https://github.com/alamb)) +- add arrow_typeof [\#3120](https://github.com/apache/datafusion/pull/3120) ([waitingkuo](https://github.com/waitingkuo)) +- Bump actions/labeler from 2.2.0 to 4.0.0 [\#3114](https://github.com/apache/datafusion/pull/3114) ([dependabot[bot]](https://github.com/apps/dependabot)) +- Bump actions/checkout from 2 to 3 [\#3113](https://github.com/apache/datafusion/pull/3113) ([dependabot[bot]](https://github.com/apps/dependabot)) +- Bump actions/setup-node from 2 to 3 [\#3112](https://github.com/apache/datafusion/pull/3112) ([dependabot[bot]](https://github.com/apps/dependabot)) +- Bump actions/setup-python from 3 to 4 [\#3111](https://github.com/apache/datafusion/pull/3111) ([dependabot[bot]](https://github.com/apps/dependabot)) +- Feature/support timestamp plus minus interval [\#3110](https://github.com/apache/datafusion/pull/3110) ([JasonLi-cn](https://github.com/JasonLi-cn)) +- docs: fix typo [\#3109](https://github.com/apache/datafusion/pull/3109) ([dzvon](https://github.com/dzvon)) +- Remove offset if its zero [\#3102](https://github.com/apache/datafusion/pull/3102) ([turbo1912](https://github.com/turbo1912)) +- Hash binary values [\#3098](https://github.com/apache/datafusion/pull/3098) [[sql](https://github.com/apache/datafusion/labels/sql)] ([Dandandan](https://github.com/Dandandan)) +- Update to object_store 0.4 [\#3089](https://github.com/apache/datafusion/pull/3089) ([tustvold](https://github.com/tustvold)) +- Add cast function for creating cast expression [\#3084](https://github.com/apache/datafusion/pull/3084) ([turbo1912](https://github.com/turbo1912)) +- Upgrade to arrow 20.0.0 \(but no change to object_store\), including `prost`, and `tonic` [\#3083](https://github.com/apache/datafusion/pull/3083) [[sql](https://github.com/apache/datafusion/labels/sql)] ([avantgardnerio](https://github.com/avantgardnerio)) +- impl Debug for ColumnarValue, add some docs [\#3076](https://github.com/apache/datafusion/pull/3076) ([alamb](https://github.com/alamb)) +- \[Minor\] run cargo update in datafusion-cli directory [\#3075](https://github.com/apache/datafusion/pull/3075) ([alamb](https://github.com/alamb)) +- update cargo.lock in `datafusion-cli` [\#3074](https://github.com/apache/datafusion/pull/3074) ([waitingkuo](https://github.com/waitingkuo)) +- Update sql parser to v0.20.0 [\#3072](https://github.com/apache/datafusion/pull/3072) [[sql](https://github.com/apache/datafusion/labels/sql)] ([waitingkuo](https://github.com/waitingkuo)) +- Add opening, scanning, processing metrics in file stream [\#3070](https://github.com/apache/datafusion/pull/3070) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- Simplify `approx_median` implementation, expose via `DataFrame` API [\#3064](https://github.com/apache/datafusion/pull/3064) [[sql](https://github.com/apache/datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) +- docs: fix PruningStatistics example and some typos [\#3062](https://github.com/apache/datafusion/pull/3062) ([roeap](https://github.com/roeap)) +- feat: support double quoted literal strings for dialects\(such as mysql,bigquery,spark\) [\#3056](https://github.com/apache/datafusion/pull/3056) [[sql](https://github.com/apache/datafusion/labels/sql)] ([Rachelint](https://github.com/Rachelint)) +- Allow Overriding AsyncFileReader used by ParquetExec [\#3051](https://github.com/apache/datafusion/pull/3051) ([Cheappie](https://github.com/Cheappie)) +- to_timestamp i32 coerced to i64 [\#3047](https://github.com/apache/datafusion/pull/3047) ([waitingkuo](https://github.com/waitingkuo)) +- Fix `IsNull` pruning expression generation without null_count statistics [\#3044](https://github.com/apache/datafusion/pull/3044) ([alamb](https://github.com/alamb)) +- feat: Support `week`, `decade`, `century` for Interval literal [\#3038](https://github.com/apache/datafusion/pull/3038) [[sql](https://github.com/apache/datafusion/labels/sql)] ([ovr](https://github.com/ovr)) +- feat: Support Binary bitwise shift operators \(\<\< and \>\>\) [\#3037](https://github.com/apache/datafusion/pull/3037) [[sql](https://github.com/apache/datafusion/labels/sql)] ([ovr](https://github.com/ovr)) +- Use concat_elements_utf8 from arrow rather than custom kernel [\#3036](https://github.com/apache/datafusion/pull/3036) ([alamb](https://github.com/alamb)) +- minor: update minimal rust version to 1.62, matching arrow-rs [\#3035](https://github.com/apache/datafusion/pull/3035) [[sql](https://github.com/apache/datafusion/labels/sql)] ([kmitchener](https://github.com/kmitchener)) +- feat: Add `date_bin` built-in function [\#3034](https://github.com/apache/datafusion/pull/3034) ([stuartcarnie](https://github.com/stuartcarnie)) +- Split `binary_expr.rs` into smaller modules [\#3026](https://github.com/apache/datafusion/pull/3026) ([alamb](https://github.com/alamb)) +- feat: Enable typed strings expressions for VALUES clause [\#3018](https://github.com/apache/datafusion/pull/3018) [[sql](https://github.com/apache/datafusion/labels/sql)] ([stuartcarnie](https://github.com/stuartcarnie)) +- fix typo for PR3003 [\#3011](https://github.com/apache/datafusion/pull/3011) ([waitingkuo](https://github.com/waitingkuo)) +- feat: Add support for TIME literal values [\#3010](https://github.com/apache/datafusion/pull/3010) [[sql](https://github.com/apache/datafusion/labels/sql)] ([stuartcarnie](https://github.com/stuartcarnie)) +- add TimeUnit::Second as signature for ToTimestampSeconds [\#3004](https://github.com/apache/datafusion/pull/3004) ([waitingkuo](https://github.com/waitingkuo)) +- Rename FileReader to FileOpener \(\#2990\) [\#2991](https://github.com/apache/datafusion/pull/2991) ([tustvold](https://github.com/tustvold)) +- minor: collation the prune test [\#2986](https://github.com/apache/datafusion/pull/2986) ([liukun4515](https://github.com/liukun4515)) +- Optionally skip metadata from schema when merging parquet files [\#2985](https://github.com/apache/datafusion/pull/2985) ([alamb](https://github.com/alamb)) +- \[Minor\] Extract interval parsing logic, add unit tests [\#2984](https://github.com/apache/datafusion/pull/2984) [[sql](https://github.com/apache/datafusion/labels/sql)] ([alamb](https://github.com/alamb)) +- Update sqlparser to 0.19 [\#2981](https://github.com/apache/datafusion/pull/2981) [[sql](https://github.com/apache/datafusion/labels/sql)] ([alamb](https://github.com/alamb)) +- test: add file/SQL level test for pruning parquet row group with decimal data type. [\#2977](https://github.com/apache/datafusion/pull/2977) ([liukun4515](https://github.com/liukun4515)) +- Derive Hash for JoinType [\#2972](https://github.com/apache/datafusion/pull/2972) ([liurenjie1024](https://github.com/liurenjie1024)) +- Example that shows how to convert query result into rust struct \#2959 [\#2969](https://github.com/apache/datafusion/pull/2969) ([thomas-k-cameron](https://github.com/thomas-k-cameron)) +- Add baseline_metrics for FileStream to record metrics like elapsed ti… [\#2965](https://github.com/apache/datafusion/pull/2965) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- test: add test for decimal and pruning for decimal column [\#2960](https://github.com/apache/datafusion/pull/2960) ([liukun4515](https://github.com/liukun4515)) +- Simplify expressions with `NOT` clause [\#2958](https://github.com/apache/datafusion/pull/2958) ([AssHero](https://github.com/AssHero)) +- chore: update jit-related dependencies [\#2956](https://github.com/apache/datafusion/pull/2956) ([xudong963](https://github.com/xudong963)) +- Update to arrow `19.0.0` [\#2955](https://github.com/apache/datafusion/pull/2955) [[sql](https://github.com/apache/datafusion/labels/sql)] ([alamb](https://github.com/alamb)) +- Remove CI Caching to preserve diskspace [\#2948](https://github.com/apache/datafusion/pull/2948) ([alamb](https://github.com/alamb)) +- Add metadata_size_hint for optimistic fetching of parquet metadata [\#2946](https://github.com/apache/datafusion/pull/2946) ([thinkharderdev](https://github.com/thinkharderdev)) +- Minor: Remove left over debugging statement [\#2944](https://github.com/apache/datafusion/pull/2944) ([alamb](https://github.com/alamb)) +- add Atan2 [\#2942](https://github.com/apache/datafusion/pull/2942) ([waitingkuo](https://github.com/waitingkuo)) +- Use `Arc` and remove ObjectStoreRegistry::clone [\#2941](https://github.com/apache/datafusion/pull/2941) ([tustvold](https://github.com/tustvold)) +- add extension system to `SessionConfig` [\#2940](https://github.com/apache/datafusion/pull/2940) ([crepererum](https://github.com/crepererum)) +- Update prost-build requirement from 0.7 to 0.10 [\#2937](https://github.com/apache/datafusion/pull/2937) ([dependabot[bot]](https://github.com/apps/dependabot)) +- Add streaming JSON and CSV reading, `NewlineDelimitedStream' \(\#2935\) [\#2936](https://github.com/apache/datafusion/pull/2936) ([tustvold](https://github.com/tustvold)) +- feat\(catalog\): Implement information_schema.views [\#2934](https://github.com/apache/datafusion/pull/2934) [[sql](https://github.com/apache/datafusion/labels/sql)] ([BaymaxHWY](https://github.com/BaymaxHWY)) +- Support `window` functions in expressions by re-write projection after building window plan [\#2932](https://github.com/apache/datafusion/pull/2932) [[sql](https://github.com/apache/datafusion/labels/sql)] ([AssHero](https://github.com/AssHero)) +- Add pow as synonym for power [\#2927](https://github.com/apache/datafusion/pull/2927) ([andygrove](https://github.com/andygrove)) +- Add `from_unixtime` function [\#2924](https://github.com/apache/datafusion/pull/2924) ([waitingkuo](https://github.com/waitingkuo)) +- fix\(aggregate\): support mean as synonym avg [\#2923](https://github.com/apache/datafusion/pull/2923) ([BaymaxHWY](https://github.com/BaymaxHWY)) +- Add `DataFrame::with_column_renamed` [\#2920](https://github.com/apache/datafusion/pull/2920) ([andygrove](https://github.com/andygrove)) +- Run clippy with optional features [\#2918](https://github.com/apache/datafusion/pull/2918) ([tustvold](https://github.com/tustvold)) +- Fix release verification script by not overriding `ARROW_TEST_DATA` or `PARQUET_TEST_DATA` [\#2917](https://github.com/apache/datafusion/pull/2917) ([alamb](https://github.com/alamb)) +- Move `ScalarValue` tests alongside implementation, move `from_slice` to `datafusion_core` [\#2914](https://github.com/apache/datafusion/pull/2914) ([alamb](https://github.com/alamb)) +- Optimizer should have option to skip failing rules [\#2909](https://github.com/apache/datafusion/pull/2909) ([andygrove](https://github.com/andygrove)) +- Introduce ObjectStoreProvider to create an object store based on the url [\#2906](https://github.com/apache/datafusion/pull/2906) ([yahoNanJing](https://github.com/yahoNanJing)) +- Remove datafusion-data-access crate [\#2904](https://github.com/apache/datafusion/pull/2904) ([yahoNanJing](https://github.com/yahoNanJing)) +- Combine all comparison coercion rules [\#2901](https://github.com/apache/datafusion/pull/2901) ([andygrove](https://github.com/andygrove)) +- Add `Projection::try_new` and `Projection::try_new_with_schema` [\#2900](https://github.com/apache/datafusion/pull/2900) ([andygrove](https://github.com/andygrove)) +- Improve formatting of logical plans containing subqueries [\#2899](https://github.com/apache/datafusion/pull/2899) [[sql](https://github.com/apache/datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) +- add session option 'datafusion.explain.logical_plan'. when set to true, the explain statement will only print logical plans. [\#2895](https://github.com/apache/datafusion/pull/2895) ([AssHero](https://github.com/AssHero)) +- Preserve field name in `ScalarValue::List` [\#2893](https://github.com/apache/datafusion/pull/2893) [[sql](https://github.com/apache/datafusion/labels/sql)] ([comphead](https://github.com/comphead)) +- Adds optional serde support to datafusion-proto [\#2892](https://github.com/apache/datafusion/pull/2892) ([tustvold](https://github.com/tustvold)) +- Implement `ScalarValue::Dictionary` and preserve type through conversion back/forth to Array [\#2891](https://github.com/apache/datafusion/pull/2891) ([alamb](https://github.com/alamb)) +- Add an ID generator in preparation for PR 2885 [\#2887](https://github.com/apache/datafusion/pull/2887) ([avantgardnerio](https://github.com/avantgardnerio)) +- Add support for correlated subqueries & fix all related TPC-H benchmark issues [\#2885](https://github.com/apache/datafusion/pull/2885) ([avantgardnerio](https://github.com/avantgardnerio)) +- fix\(doc\): update test directory link in CONTRIBUTING.md [\#2882](https://github.com/apache/datafusion/pull/2882) ([BaymaxHWY](https://github.com/BaymaxHWY)) +- Add h2o bench groupby queries [\#2881](https://github.com/apache/datafusion/pull/2881) ([andygrove](https://github.com/andygrove)) +- Add support for month & year intervals [\#2797](https://github.com/apache/datafusion/pull/2797) ([avantgardnerio](https://github.com/avantgardnerio)) +- Migrate from avro_rs \(0.13\) to apache_avro \(0.14\) [\#2784](https://github.com/apache/datafusion/pull/2784) ([martin-g](https://github.com/martin-g)) diff --git a/dev/changelog/12.0.0.md b/dev/changelog/12.0.0.md index c195a30cc9dc..01d5648668f5 100644 --- a/dev/changelog/12.0.0.md +++ b/dev/changelog/12.0.0.md @@ -17,230 +17,230 @@ under the License. --> -## [12.0.0](https://github.com/apache/arrow-datafusion/tree/12.0.0) (2022-09-12) +## [12.0.0](https://github.com/apache/datafusion/tree/12.0.0) (2022-09-12) -[Full Changelog](https://github.com/apache/arrow-datafusion/compare/11.0.0...12.0.0) +[Full Changelog](https://github.com/apache/datafusion/compare/11.0.0...12.0.0) **Breaking changes:** -- Pass `return_type` to `AccumulatorFunctionImplementation ` for user defined aggregates [\#3428](https://github.com/apache/arrow-datafusion/pull/3428) ([alamb](https://github.com/alamb)) -- Use `usize` rather than `Option` to represent `Limit::skip`and `Limit::offset` [\#3374](https://github.com/apache/arrow-datafusion/pull/3374) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([HaoYang670](https://github.com/HaoYang670)) -- Deprecate legacy datafusion::logical_plan module [\#3338](https://github.com/apache/arrow-datafusion/pull/3338) ([andygrove](https://github.com/andygrove)) -- Update signature for Expr.name so that schema is no longer required [\#3336](https://github.com/apache/arrow-datafusion/pull/3336) ([andygrove](https://github.com/andygrove)) -- MINOR: rename optimizer rule to ScalarSubqueryToJoin [\#3306](https://github.com/apache/arrow-datafusion/pull/3306) ([kmitchener](https://github.com/kmitchener)) -- Add top-level `Like`, `ILike`, `SimilarTo` expressions in logical plan [\#3298](https://github.com/apache/arrow-datafusion/pull/3298) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) -- Upgrade to sqlparser 0.22 [\#3278](https://github.com/apache/arrow-datafusion/pull/3278) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) -- `Expr` variants for boolean operations [\#3275](https://github.com/apache/arrow-datafusion/pull/3275) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([sarahyurick](https://github.com/sarahyurick)) -- Upgrade to sqlparser 0.21 [\#3200](https://github.com/apache/arrow-datafusion/pull/3200) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) -- Add SQL planner support for `Like`, `ILike` and `SimilarTo`, with optional escape character [\#3101](https://github.com/apache/arrow-datafusion/pull/3101) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) +- Pass `return_type` to `AccumulatorFunctionImplementation ` for user defined aggregates [\#3428](https://github.com/apache/datafusion/pull/3428) ([alamb](https://github.com/alamb)) +- Use `usize` rather than `Option` to represent `Limit::skip`and `Limit::offset` [\#3374](https://github.com/apache/datafusion/pull/3374) [[sql](https://github.com/apache/datafusion/labels/sql)] ([HaoYang670](https://github.com/HaoYang670)) +- Deprecate legacy datafusion::logical_plan module [\#3338](https://github.com/apache/datafusion/pull/3338) ([andygrove](https://github.com/andygrove)) +- Update signature for Expr.name so that schema is no longer required [\#3336](https://github.com/apache/datafusion/pull/3336) ([andygrove](https://github.com/andygrove)) +- MINOR: rename optimizer rule to ScalarSubqueryToJoin [\#3306](https://github.com/apache/datafusion/pull/3306) ([kmitchener](https://github.com/kmitchener)) +- Add top-level `Like`, `ILike`, `SimilarTo` expressions in logical plan [\#3298](https://github.com/apache/datafusion/pull/3298) [[sql](https://github.com/apache/datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) +- Upgrade to sqlparser 0.22 [\#3278](https://github.com/apache/datafusion/pull/3278) [[sql](https://github.com/apache/datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) +- `Expr` variants for boolean operations [\#3275](https://github.com/apache/datafusion/pull/3275) [[sql](https://github.com/apache/datafusion/labels/sql)] ([sarahyurick](https://github.com/sarahyurick)) +- Upgrade to sqlparser 0.21 [\#3200](https://github.com/apache/datafusion/pull/3200) [[sql](https://github.com/apache/datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) +- Add SQL planner support for `Like`, `ILike` and `SimilarTo`, with optional escape character [\#3101](https://github.com/apache/datafusion/pull/3101) [[sql](https://github.com/apache/datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) **Implemented enhancements:** -- support `cast` inside `values` [\#3446](https://github.com/apache/arrow-datafusion/issues/3446) -- update TPCH test schemas to use Decimal128 from Float [\#3435](https://github.com/apache/arrow-datafusion/issues/3435) -- Include Bitwise operators in the documentation [\#3434](https://github.com/apache/arrow-datafusion/issues/3434) -- How to read excel file with datafusion? [\#3433](https://github.com/apache/arrow-datafusion/issues/3433) -- Pass return type to the accumulator state factory in aggregates [\#3427](https://github.com/apache/arrow-datafusion/issues/3427) -- Support bitwise XOR operator \(`#`\) [\#3420](https://github.com/apache/arrow-datafusion/issues/3420) -- support InList with datatype Date32 [\#3412](https://github.com/apache/arrow-datafusion/issues/3412) -- add simplification for `between` expression during logical plan optimization [\#3402](https://github.com/apache/arrow-datafusion/issues/3402) -- Replace From trait with TryFrom trait for datafusion-proto crate [\#3401](https://github.com/apache/arrow-datafusion/issues/3401) -- update TPC-H benchmark to Decimal types from Float [\#3392](https://github.com/apache/arrow-datafusion/issues/3392) -- Use `usize` to represent `Limit::skip` [\#3369](https://github.com/apache/arrow-datafusion/issues/3369) -- Avoid coping in `LogicalPlan::expressions` [\#3368](https://github.com/apache/arrow-datafusion/issues/3368) -- Upgrade to Arrow 22 [\#3362](https://github.com/apache/arrow-datafusion/issues/3362) -- Eliminate `OFFSET 0` in the logical plan optimization [\#3355](https://github.com/apache/arrow-datafusion/issues/3355) -- Add ability to get unoptimized logical plan from DataFrame [\#3340](https://github.com/apache/arrow-datafusion/issues/3340) -- Allow IDEs to recognize generated code [\#3332](https://github.com/apache/arrow-datafusion/issues/3332) -- `CAST` should not change the name of an expression [\#3326](https://github.com/apache/arrow-datafusion/issues/3326) -- add SQL support for unsigned integers [\#3325](https://github.com/apache/arrow-datafusion/issues/3325) -- Review use of panic in `datafusion-proto` crate [\#3318](https://github.com/apache/arrow-datafusion/issues/3318) -- Review use of panic in `datafusion-sql` crate [\#3315](https://github.com/apache/arrow-datafusion/issues/3315) -- Review use of panic in `datafusion-optimizer` crate [\#3314](https://github.com/apache/arrow-datafusion/issues/3314) -- Review use of panic in `datafusion-expr` crate [\#3312](https://github.com/apache/arrow-datafusion/issues/3312) -- Support registration of custom TableProviders through SQL [\#3310](https://github.com/apache/arrow-datafusion/issues/3310) -- Support binary data in sha hash functions [\#3308](https://github.com/apache/arrow-datafusion/issues/3308) -- add SQL support for tinyint and unsigned versions of all INTs [\#3307](https://github.com/apache/arrow-datafusion/issues/3307) -- Support binary types in InList expression [\#3300](https://github.com/apache/arrow-datafusion/issues/3300) -- Physical planner should map `IsTrue` and similar expressions to `IsDistinctFrom` [\#3288](https://github.com/apache/arrow-datafusion/issues/3288) -- Introduce physical plan version of `Operator` enum [\#3269](https://github.com/apache/arrow-datafusion/issues/3269) -- Introduce `Expr` variants for `IS [NOT] TRUE / FALSE / UNKNOWN` [\#3268](https://github.com/apache/arrow-datafusion/issues/3268) -- Add support for non-correlated subqueries [\#3266](https://github.com/apache/arrow-datafusion/issues/3266) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] -- \(Re-\)add support for glob patterns in ListingTableUrl [\#3261](https://github.com/apache/arrow-datafusion/issues/3261) -- `PreCastLitInComparisonExpressions` should use ExprRewriter and supported nested expressions [\#3259](https://github.com/apache/arrow-datafusion/issues/3259) -- implement `DROP VIEW` [\#3251](https://github.com/apache/arrow-datafusion/issues/3251) -- Upgrade to Arrow 21 [\#3224](https://github.com/apache/arrow-datafusion/issues/3224) -- Add TypeCoercion optimizer rule [\#3221](https://github.com/apache/arrow-datafusion/issues/3221) -- Create bench for approx_percentile_cont aggregate [\#3217](https://github.com/apache/arrow-datafusion/issues/3217) -- Add SQL query planner support for `DISTRIBUTED BY` [\#3207](https://github.com/apache/arrow-datafusion/issues/3207) -- Support "IS \[NOT\] UNKNOWN" syntax [\#3195](https://github.com/apache/arrow-datafusion/issues/3195) -- sqlparser 0.21 upgrade [\#3192](https://github.com/apache/arrow-datafusion/issues/3192) -- Re-implement parsing/planning for SHOW TABLES due to sqlparser changes [\#3188](https://github.com/apache/arrow-datafusion/issues/3188) -- Support `SUM` `AVG`, `MIN`, `MAX` on `Time` columns. [\#3166](https://github.com/apache/arrow-datafusion/issues/3166) -- Support "IS TRUE/FALSE" syntax [\#3159](https://github.com/apache/arrow-datafusion/issues/3159) -- Support number of histogram bins in approx_percentile_cont [\#3145](https://github.com/apache/arrow-datafusion/issues/3145) -- Support create ApproxPercentileAccumulator with TDigest max_size [\#3142](https://github.com/apache/arrow-datafusion/issues/3142) -- Remove support for `array` function and only support `array[]` style postgres syntax [\#3115](https://github.com/apache/arrow-datafusion/issues/3115) -- Allow inline column aliases for create view [\#3108](https://github.com/apache/arrow-datafusion/issues/3108) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] -- Add support for Postgres `SIMILAR TO` and `ILIKE` syntax [\#3099](https://github.com/apache/arrow-datafusion/issues/3099) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] -- Update SQL reference in user guide to cover all supported syntax [\#3091](https://github.com/apache/arrow-datafusion/issues/3091) -- DataFusion prelude should import all logical expression functions [\#3068](https://github.com/apache/arrow-datafusion/issues/3068) -- Proposal: Add similar to operator [\#3016](https://github.com/apache/arrow-datafusion/issues/3016) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] -- Release DataFusion 11.0.0 [\#3012](https://github.com/apache/arrow-datafusion/issues/3012) -- Implement "SHOW CREATE TABLE" for external tables [\#2848](https://github.com/apache/arrow-datafusion/issues/2848) -- Change java package names in protobuf files [\#2513](https://github.com/apache/arrow-datafusion/issues/2513) -- When creating `DFField` from `Expr` we should provide input plan not input schema [\#2456](https://github.com/apache/arrow-datafusion/issues/2456) -- Support "IS NOT TRUE/FALSE" syntax [\#2265](https://github.com/apache/arrow-datafusion/issues/2265) -- RFC: Spill-To-Disk Object Storage Download [\#2205](https://github.com/apache/arrow-datafusion/issues/2205) -- Support for BitwiseAnd `&`, BitOr `|` binary operators [\#1619](https://github.com/apache/arrow-datafusion/issues/1619) -- \[Question\] Usage of async object store APIs in consuming code [\#1313](https://github.com/apache/arrow-datafusion/issues/1313) -- Allow User Defined Aggregates to return multiple values / structs [\#600](https://github.com/apache/arrow-datafusion/issues/600) -- Implement vectorized hashing for dictionary types [\#331](https://github.com/apache/arrow-datafusion/issues/331) +- support `cast` inside `values` [\#3446](https://github.com/apache/datafusion/issues/3446) +- update TPCH test schemas to use Decimal128 from Float [\#3435](https://github.com/apache/datafusion/issues/3435) +- Include Bitwise operators in the documentation [\#3434](https://github.com/apache/datafusion/issues/3434) +- How to read excel file with datafusion? [\#3433](https://github.com/apache/datafusion/issues/3433) +- Pass return type to the accumulator state factory in aggregates [\#3427](https://github.com/apache/datafusion/issues/3427) +- Support bitwise XOR operator \(`#`\) [\#3420](https://github.com/apache/datafusion/issues/3420) +- support InList with datatype Date32 [\#3412](https://github.com/apache/datafusion/issues/3412) +- add simplification for `between` expression during logical plan optimization [\#3402](https://github.com/apache/datafusion/issues/3402) +- Replace From trait with TryFrom trait for datafusion-proto crate [\#3401](https://github.com/apache/datafusion/issues/3401) +- update TPC-H benchmark to Decimal types from Float [\#3392](https://github.com/apache/datafusion/issues/3392) +- Use `usize` to represent `Limit::skip` [\#3369](https://github.com/apache/datafusion/issues/3369) +- Avoid coping in `LogicalPlan::expressions` [\#3368](https://github.com/apache/datafusion/issues/3368) +- Upgrade to Arrow 22 [\#3362](https://github.com/apache/datafusion/issues/3362) +- Eliminate `OFFSET 0` in the logical plan optimization [\#3355](https://github.com/apache/datafusion/issues/3355) +- Add ability to get unoptimized logical plan from DataFrame [\#3340](https://github.com/apache/datafusion/issues/3340) +- Allow IDEs to recognize generated code [\#3332](https://github.com/apache/datafusion/issues/3332) +- `CAST` should not change the name of an expression [\#3326](https://github.com/apache/datafusion/issues/3326) +- add SQL support for unsigned integers [\#3325](https://github.com/apache/datafusion/issues/3325) +- Review use of panic in `datafusion-proto` crate [\#3318](https://github.com/apache/datafusion/issues/3318) +- Review use of panic in `datafusion-sql` crate [\#3315](https://github.com/apache/datafusion/issues/3315) +- Review use of panic in `datafusion-optimizer` crate [\#3314](https://github.com/apache/datafusion/issues/3314) +- Review use of panic in `datafusion-expr` crate [\#3312](https://github.com/apache/datafusion/issues/3312) +- Support registration of custom TableProviders through SQL [\#3310](https://github.com/apache/datafusion/issues/3310) +- Support binary data in sha hash functions [\#3308](https://github.com/apache/datafusion/issues/3308) +- add SQL support for tinyint and unsigned versions of all INTs [\#3307](https://github.com/apache/datafusion/issues/3307) +- Support binary types in InList expression [\#3300](https://github.com/apache/datafusion/issues/3300) +- Physical planner should map `IsTrue` and similar expressions to `IsDistinctFrom` [\#3288](https://github.com/apache/datafusion/issues/3288) +- Introduce physical plan version of `Operator` enum [\#3269](https://github.com/apache/datafusion/issues/3269) +- Introduce `Expr` variants for `IS [NOT] TRUE / FALSE / UNKNOWN` [\#3268](https://github.com/apache/datafusion/issues/3268) +- Add support for non-correlated subqueries [\#3266](https://github.com/apache/datafusion/issues/3266) [[sql](https://github.com/apache/datafusion/labels/sql)] +- \(Re-\)add support for glob patterns in ListingTableUrl [\#3261](https://github.com/apache/datafusion/issues/3261) +- `PreCastLitInComparisonExpressions` should use ExprRewriter and supported nested expressions [\#3259](https://github.com/apache/datafusion/issues/3259) +- implement `DROP VIEW` [\#3251](https://github.com/apache/datafusion/issues/3251) +- Upgrade to Arrow 21 [\#3224](https://github.com/apache/datafusion/issues/3224) +- Add TypeCoercion optimizer rule [\#3221](https://github.com/apache/datafusion/issues/3221) +- Create bench for approx_percentile_cont aggregate [\#3217](https://github.com/apache/datafusion/issues/3217) +- Add SQL query planner support for `DISTRIBUTED BY` [\#3207](https://github.com/apache/datafusion/issues/3207) +- Support "IS \[NOT\] UNKNOWN" syntax [\#3195](https://github.com/apache/datafusion/issues/3195) +- sqlparser 0.21 upgrade [\#3192](https://github.com/apache/datafusion/issues/3192) +- Re-implement parsing/planning for SHOW TABLES due to sqlparser changes [\#3188](https://github.com/apache/datafusion/issues/3188) +- Support `SUM` `AVG`, `MIN`, `MAX` on `Time` columns. [\#3166](https://github.com/apache/datafusion/issues/3166) +- Support "IS TRUE/FALSE" syntax [\#3159](https://github.com/apache/datafusion/issues/3159) +- Support number of histogram bins in approx_percentile_cont [\#3145](https://github.com/apache/datafusion/issues/3145) +- Support create ApproxPercentileAccumulator with TDigest max_size [\#3142](https://github.com/apache/datafusion/issues/3142) +- Remove support for `array` function and only support `array[]` style postgres syntax [\#3115](https://github.com/apache/datafusion/issues/3115) +- Allow inline column aliases for create view [\#3108](https://github.com/apache/datafusion/issues/3108) [[sql](https://github.com/apache/datafusion/labels/sql)] +- Add support for Postgres `SIMILAR TO` and `ILIKE` syntax [\#3099](https://github.com/apache/datafusion/issues/3099) [[sql](https://github.com/apache/datafusion/labels/sql)] +- Update SQL reference in user guide to cover all supported syntax [\#3091](https://github.com/apache/datafusion/issues/3091) +- DataFusion prelude should import all logical expression functions [\#3068](https://github.com/apache/datafusion/issues/3068) +- Proposal: Add similar to operator [\#3016](https://github.com/apache/datafusion/issues/3016) [[sql](https://github.com/apache/datafusion/labels/sql)] +- Release DataFusion 11.0.0 [\#3012](https://github.com/apache/datafusion/issues/3012) +- Implement "SHOW CREATE TABLE" for external tables [\#2848](https://github.com/apache/datafusion/issues/2848) +- Change java package names in protobuf files [\#2513](https://github.com/apache/datafusion/issues/2513) +- When creating `DFField` from `Expr` we should provide input plan not input schema [\#2456](https://github.com/apache/datafusion/issues/2456) +- Support "IS NOT TRUE/FALSE" syntax [\#2265](https://github.com/apache/datafusion/issues/2265) +- RFC: Spill-To-Disk Object Storage Download [\#2205](https://github.com/apache/datafusion/issues/2205) +- Support for BitwiseAnd `&`, BitOr `|` binary operators [\#1619](https://github.com/apache/datafusion/issues/1619) +- \[Question\] Usage of async object store APIs in consuming code [\#1313](https://github.com/apache/datafusion/issues/1313) +- Allow User Defined Aggregates to return multiple values / structs [\#600](https://github.com/apache/datafusion/issues/600) +- Implement vectorized hashing for dictionary types [\#331](https://github.com/apache/datafusion/issues/331) **Fixed bugs:** -- Intermittent build error when changing selected features [\#3366](https://github.com/apache/arrow-datafusion/issues/3366) -- `sql::timestamp::timestamp_add_interval_months` failing since September 1st [\#3327](https://github.com/apache/arrow-datafusion/issues/3327) -- `sql::timestamp::timestamp_add_interval_months` test fails [\#3322](https://github.com/apache/arrow-datafusion/issues/3322) -- test case `timestamp_add_interval_months` failed on master branch [\#3321](https://github.com/apache/arrow-datafusion/issues/3321) -- datafusion-proto does not support untyped null scalar values [\#3302](https://github.com/apache/arrow-datafusion/issues/3302) -- `ConfigOptions` creation is slow [\#3295](https://github.com/apache/arrow-datafusion/issues/3295) -- FilterPushDown optimization through UNION ALL results in SchemaError [\#3281](https://github.com/apache/arrow-datafusion/issues/3281) -- Execute LogicalPlans after building for TPCH Benchmarks [\#3273](https://github.com/apache/arrow-datafusion/issues/3273) -- `CREATE TABLE` should return empty DataFrame [\#3265](https://github.com/apache/arrow-datafusion/issues/3265) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] -- `CREATE EXTERNAL TABLE` from CSV creates a table with no columns if there is just a header row [\#3263](https://github.com/apache/arrow-datafusion/issues/3263) -- View TableProvider ignores projections, resulting in invalid plans [\#3240](https://github.com/apache/arrow-datafusion/issues/3240) -- CREATE VIEW should return an empty dataframe on success [\#3236](https://github.com/apache/arrow-datafusion/issues/3236) -- `DISTRIBUTE BY` expressions get removed during optimization [\#3234](https://github.com/apache/arrow-datafusion/issues/3234) -- datafusion cannot recognize chinese charactors. [\#3203](https://github.com/apache/arrow-datafusion/issues/3203) -- Panicked at 'byte index 1 is out of bounds on invalid query [\#3190](https://github.com/apache/arrow-datafusion/issues/3190) -- `like_nlike_with_null_lt` fails with latest sqlparser code [\#3187](https://github.com/apache/arrow-datafusion/issues/3187) -- Interval Literal output inconsistent date_type [\#3180](https://github.com/apache/arrow-datafusion/issues/3180) -- `array` function allows different data types [\#3123](https://github.com/apache/arrow-datafusion/issues/3123) -- eq operator doesn't work on binary data [\#3117](https://github.com/apache/arrow-datafusion/issues/3117) -- incorrect `where` clause comparison while using table alias [\#3073](https://github.com/apache/arrow-datafusion/issues/3073) -- Some functions are incorrectly declared as unary [\#3069](https://github.com/apache/arrow-datafusion/issues/3069) -- once now\(\) is called in a statement, it forever returns the same value [\#3057](https://github.com/apache/arrow-datafusion/issues/3057) -- single_distinct_to_groupby panic when group by expr is a binaryExpr [\#2994](https://github.com/apache/arrow-datafusion/issues/2994) -- Cannot have `order by` expression that references complex `group by` expression [\#2360](https://github.com/apache/arrow-datafusion/issues/2360) -- Fix some bugs in TypeCoercion rule [\#3407](https://github.com/apache/arrow-datafusion/pull/3407) ([andygrove](https://github.com/andygrove)) -- MINOR: Stop ignoring `AggregateFunction::distinct` in protobuf serde code [\#3250](https://github.com/apache/arrow-datafusion/pull/3250) ([andygrove](https://github.com/andygrove)) -- Add assertion for invariant in `create_physical_expression` and fix ViewTable projection [\#3242](https://github.com/apache/arrow-datafusion/pull/3242) ([andygrove](https://github.com/andygrove)) -- Fix bug where optimizer was removing `Partitioning::DistributeBy` expressions [\#3229](https://github.com/apache/arrow-datafusion/pull/3229) ([andygrove](https://github.com/andygrove)) +- Intermittent build error when changing selected features [\#3366](https://github.com/apache/datafusion/issues/3366) +- `sql::timestamp::timestamp_add_interval_months` failing since September 1st [\#3327](https://github.com/apache/datafusion/issues/3327) +- `sql::timestamp::timestamp_add_interval_months` test fails [\#3322](https://github.com/apache/datafusion/issues/3322) +- test case `timestamp_add_interval_months` failed on master branch [\#3321](https://github.com/apache/datafusion/issues/3321) +- datafusion-proto does not support untyped null scalar values [\#3302](https://github.com/apache/datafusion/issues/3302) +- `ConfigOptions` creation is slow [\#3295](https://github.com/apache/datafusion/issues/3295) +- FilterPushDown optimization through UNION ALL results in SchemaError [\#3281](https://github.com/apache/datafusion/issues/3281) +- Execute LogicalPlans after building for TPCH Benchmarks [\#3273](https://github.com/apache/datafusion/issues/3273) +- `CREATE TABLE` should return empty DataFrame [\#3265](https://github.com/apache/datafusion/issues/3265) [[sql](https://github.com/apache/datafusion/labels/sql)] +- `CREATE EXTERNAL TABLE` from CSV creates a table with no columns if there is just a header row [\#3263](https://github.com/apache/datafusion/issues/3263) +- View TableProvider ignores projections, resulting in invalid plans [\#3240](https://github.com/apache/datafusion/issues/3240) +- CREATE VIEW should return an empty dataframe on success [\#3236](https://github.com/apache/datafusion/issues/3236) +- `DISTRIBUTE BY` expressions get removed during optimization [\#3234](https://github.com/apache/datafusion/issues/3234) +- datafusion cannot recognize chinese charactors. [\#3203](https://github.com/apache/datafusion/issues/3203) +- Panicked at 'byte index 1 is out of bounds on invalid query [\#3190](https://github.com/apache/datafusion/issues/3190) +- `like_nlike_with_null_lt` fails with latest sqlparser code [\#3187](https://github.com/apache/datafusion/issues/3187) +- Interval Literal output inconsistent date_type [\#3180](https://github.com/apache/datafusion/issues/3180) +- `array` function allows different data types [\#3123](https://github.com/apache/datafusion/issues/3123) +- eq operator doesn't work on binary data [\#3117](https://github.com/apache/datafusion/issues/3117) +- incorrect `where` clause comparison while using table alias [\#3073](https://github.com/apache/datafusion/issues/3073) +- Some functions are incorrectly declared as unary [\#3069](https://github.com/apache/datafusion/issues/3069) +- once now\(\) is called in a statement, it forever returns the same value [\#3057](https://github.com/apache/datafusion/issues/3057) +- single_distinct_to_groupby panic when group by expr is a binaryExpr [\#2994](https://github.com/apache/datafusion/issues/2994) +- Cannot have `order by` expression that references complex `group by` expression [\#2360](https://github.com/apache/datafusion/issues/2360) +- Fix some bugs in TypeCoercion rule [\#3407](https://github.com/apache/datafusion/pull/3407) ([andygrove](https://github.com/andygrove)) +- MINOR: Stop ignoring `AggregateFunction::distinct` in protobuf serde code [\#3250](https://github.com/apache/datafusion/pull/3250) ([andygrove](https://github.com/andygrove)) +- Add assertion for invariant in `create_physical_expression` and fix ViewTable projection [\#3242](https://github.com/apache/datafusion/pull/3242) ([andygrove](https://github.com/andygrove)) +- Fix bug where optimizer was removing `Partitioning::DistributeBy` expressions [\#3229](https://github.com/apache/datafusion/pull/3229) ([andygrove](https://github.com/andygrove)) **Documentation updates:** -- \[minor\] add Coverage Status in readme [\#3220](https://github.com/apache/arrow-datafusion/pull/3220) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- \[minor\] add Coverage Status in readme [\#3220](https://github.com/apache/datafusion/pull/3220) ([Ted-Jiang](https://github.com/Ted-Jiang)) **Closed issues:** -- Add `\i` command to datafusion-cli [\#1906](https://github.com/apache/arrow-datafusion/issues/1906) -- TPC-H Query 15 [\#166](https://github.com/apache/arrow-datafusion/issues/166) +- Add `\i` command to datafusion-cli [\#1906](https://github.com/apache/datafusion/issues/1906) +- TPC-H Query 15 [\#166](https://github.com/apache/datafusion/issues/166) **Merged pull requests:** -- minor: fix some typo. [\#3453](https://github.com/apache/arrow-datafusion/pull/3453) ([jackwener](https://github.com/jackwener)) -- Update criterion requirement from 0.3 to 0.4 [\#3452](https://github.com/apache/arrow-datafusion/pull/3452) ([dependabot[bot]](https://github.com/apps/dependabot)) -- Update object_store requirement from 0.4.0 to 0.5.0 [\#3451](https://github.com/apache/arrow-datafusion/pull/3451) ([dependabot[bot]](https://github.com/apps/dependabot)) -- add `cast` support inside `values` [\#3447](https://github.com/apache/arrow-datafusion/pull/3447) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([kmitchener](https://github.com/kmitchener)) -- Use hash repartitioning for aggregates on dictionaries [\#3445](https://github.com/apache/arrow-datafusion/pull/3445) ([isidentical](https://github.com/isidentical)) -- Review `unwrap` and `panic` from the `aggregate` directory of `datafusion-physical-expr` [\#3443](https://github.com/apache/arrow-datafusion/pull/3443) ([iajoiner](https://github.com/iajoiner)) -- MINOR: Implement protobuf serde for all binary operators [\#3441](https://github.com/apache/arrow-datafusion/pull/3441) ([andygrove](https://github.com/andygrove)) -- MINOR: Add accessor methods to DateTimeIntervalExpr [\#3440](https://github.com/apache/arrow-datafusion/pull/3440) ([andygrove](https://github.com/andygrove)) -- update TPCH-mimicking tests to Decimal data type from Float, matching the benchmark [\#3438](https://github.com/apache/arrow-datafusion/pull/3438) ([kmitchener](https://github.com/kmitchener)) -- Include Bitwise operators in the documentation [\#3436](https://github.com/apache/arrow-datafusion/pull/3436) ([askoa](https://github.com/askoa)) -- minor: make sql number parsing slightly more efficient + functional [\#3432](https://github.com/apache/arrow-datafusion/pull/3432) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) -- Implement bitwise XOR operator \(`#`\) [\#3430](https://github.com/apache/arrow-datafusion/pull/3430) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([askoa](https://github.com/askoa)) -- Replace From trait with TryFrom trait for datafusion-proto crate \#3401 [\#3429](https://github.com/apache/arrow-datafusion/pull/3429) ([comphead](https://github.com/comphead)) -- Tests showing user defined aggregate returning a struct [\#3425](https://github.com/apache/arrow-datafusion/pull/3425) ([alamb](https://github.com/alamb)) -- MINOR: update optimizer rule names to be consistent style as the rest [\#3415](https://github.com/apache/arrow-datafusion/pull/3415) ([kmitchener](https://github.com/kmitchener)) -- Support date32 and date 64 in inlist node [\#3413](https://github.com/apache/arrow-datafusion/pull/3413) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- Update sqlparser requirement from 0.22 to 0.23 [\#3411](https://github.com/apache/arrow-datafusion/pull/3411) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([dependabot[bot]](https://github.com/apps/dependabot)) -- simplify the `between` expr during logical plan optimization [\#3404](https://github.com/apache/arrow-datafusion/pull/3404) ([kmitchener](https://github.com/kmitchener)) -- MINOR: Improve optimizer error [\#3403](https://github.com/apache/arrow-datafusion/pull/3403) ([andygrove](https://github.com/andygrove)) -- Review panics in the sql crate [\#3397](https://github.com/apache/arrow-datafusion/pull/3397) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([HaoYang670](https://github.com/HaoYang670)) -- changed TPC-H benchmark to use Decimal types [\#3393](https://github.com/apache/arrow-datafusion/pull/3393) ([kmitchener](https://github.com/kmitchener)) -- minor: remove redundant code. [\#3389](https://github.com/apache/arrow-datafusion/pull/3389) ([jackwener](https://github.com/jackwener)) -- Add dictionary cases to merge bench [\#3384](https://github.com/apache/arrow-datafusion/pull/3384) ([tustvold](https://github.com/tustvold)) -- Implement Eq trait for Expr and nested types [\#3381](https://github.com/apache/arrow-datafusion/pull/3381) ([jdye64](https://github.com/jdye64)) -- Minor: Improvements to type coercion rule [\#3379](https://github.com/apache/arrow-datafusion/pull/3379) ([alamb](https://github.com/alamb)) -- MINOR: Note that most communication happens on github [\#3375](https://github.com/apache/arrow-datafusion/pull/3375) ([alamb](https://github.com/alamb)) -- minor fix: clean data type for negative operation [\#3370](https://github.com/apache/arrow-datafusion/pull/3370) ([liukun4515](https://github.com/liukun4515)) -- Fix code generation for json feature [\#3367](https://github.com/apache/arrow-datafusion/pull/3367) ([avantgardnerio](https://github.com/avantgardnerio)) -- Review use of panic in datafusion-proto crate [\#3365](https://github.com/apache/arrow-datafusion/pull/3365) ([comphead](https://github.com/comphead)) -- Upgrade to arrow 22 [\#3363](https://github.com/apache/arrow-datafusion/pull/3363) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([avantgardnerio](https://github.com/avantgardnerio)) -- return empty dataframe on create table, remove a duplicate optimize call [\#3361](https://github.com/apache/arrow-datafusion/pull/3361) ([kmitchener](https://github.com/kmitchener)) -- Add SQL support for `tinyint` , `smallint`, and `unsigned int variants` [\#3359](https://github.com/apache/arrow-datafusion/pull/3359) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([kmitchener](https://github.com/kmitchener)) -- Minor: add hint in README of example [\#3358](https://github.com/apache/arrow-datafusion/pull/3358) ([jackwener](https://github.com/jackwener)) -- Collect to `HashSet` directly in `in_list` [\#3356](https://github.com/apache/arrow-datafusion/pull/3356) ([HaoYang670](https://github.com/HaoYang670)) -- MINOR: Add comments about rewrite_disjunctive_predicate [\#3351](https://github.com/apache/arrow-datafusion/pull/3351) ([alamb](https://github.com/alamb)) -- \[MINOR\] Add debug logging to plan teardown [\#3350](https://github.com/apache/arrow-datafusion/pull/3350) ([alamb](https://github.com/alamb)) -- MINOR: add df.to_unoptimized_plan\(\) to docs, remove erroneous comment [\#3348](https://github.com/apache/arrow-datafusion/pull/3348) ([kmitchener](https://github.com/kmitchener)) -- Replace `unwrap` in `convert_to_ordered_float` and add `downcast_value` [\#3347](https://github.com/apache/arrow-datafusion/pull/3347) ([iajoiner](https://github.com/iajoiner)) -- Remove panics from `common_subexpr_eliminate` [\#3346](https://github.com/apache/arrow-datafusion/pull/3346) ([andygrove](https://github.com/andygrove)) -- Remove Result.unwrap from single_distinct_to_groupby [\#3345](https://github.com/apache/arrow-datafusion/pull/3345) ([andygrove](https://github.com/andygrove)) -- Add to_unoptimized_plan [\#3344](https://github.com/apache/arrow-datafusion/pull/3344) ([iajoiner](https://github.com/iajoiner)) -- Remove panics from simplify_expressions optimizer rule [\#3343](https://github.com/apache/arrow-datafusion/pull/3343) ([andygrove](https://github.com/andygrove)) -- Remove `unreachable!` from filter push down rule [\#3342](https://github.com/apache/arrow-datafusion/pull/3342) ([andygrove](https://github.com/andygrove)) -- Replace panic in `datafusion-expr` crate [\#3341](https://github.com/apache/arrow-datafusion/pull/3341) ([iajoiner](https://github.com/iajoiner)) -- Re-implement ExprIdentifierVisitor::desc_expr to use Expr::Display [\#3339](https://github.com/apache/arrow-datafusion/pull/3339) ([andygrove](https://github.com/andygrove)) -- Fix the test`timestamp_add_interval_months` [\#3337](https://github.com/apache/arrow-datafusion/pull/3337) ([HaoYang670](https://github.com/HaoYang670)) -- Bump lz4-sys from 1.9.3 to 1.9.4 in /datafusion-cli [\#3335](https://github.com/apache/arrow-datafusion/pull/3335) ([dependabot[bot]](https://github.com/apps/dependabot)) -- Make binary operator formatting consistent between logical and physical plans [\#3331](https://github.com/apache/arrow-datafusion/pull/3331) ([andygrove](https://github.com/andygrove)) -- Fix build: Ignore failing test [\#3329](https://github.com/apache/arrow-datafusion/pull/3329) ([andygrove](https://github.com/andygrove)) -- Add `InList` support for binary type. [\#3324](https://github.com/apache/arrow-datafusion/pull/3324) ([HaoYang670](https://github.com/HaoYang670)) -- MINOR: add github action trigger [\#3323](https://github.com/apache/arrow-datafusion/pull/3323) ([waynexia](https://github.com/waynexia)) -- add explain sql test for optimizer rule PreCastLitInComparisonExpressions [\#3320](https://github.com/apache/arrow-datafusion/pull/3320) ([liukun4515](https://github.com/liukun4515)) -- Custom / Dynamic table provider factories [\#3311](https://github.com/apache/arrow-datafusion/pull/3311) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([avantgardnerio](https://github.com/avantgardnerio)) -- fix: alias group_by exprs in single_distinct_to_groupby optimizer [\#3305](https://github.com/apache/arrow-datafusion/pull/3305) ([waynexia](https://github.com/waynexia)) -- Add support for serializing null scalar values [\#3303](https://github.com/apache/arrow-datafusion/pull/3303) ([andygrove](https://github.com/andygrove)) -- Finish integrating `Expr::Is[Not]True` and similar expressions [\#3301](https://github.com/apache/arrow-datafusion/pull/3301) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) -- MINOR: Remove `unwrap` calls from `single_distinct_to_groupby optimizer` rule [\#3299](https://github.com/apache/arrow-datafusion/pull/3299) ([andygrove](https://github.com/andygrove)) -- docs: update the Python library repository [\#3297](https://github.com/apache/arrow-datafusion/pull/3297) ([haoxins](https://github.com/haoxins)) -- fix: speed up `ConfigOptions` creation [\#3296](https://github.com/apache/arrow-datafusion/pull/3296) ([crepererum](https://github.com/crepererum)) -- Execute LogicalPlans after building for TPCH Benchmarks [\#3290](https://github.com/apache/arrow-datafusion/pull/3290) ([DaltonModlin](https://github.com/DaltonModlin)) -- support for non-correlated subqueries [\#3287](https://github.com/apache/arrow-datafusion/pull/3287) ([kmitchener](https://github.com/kmitchener)) -- Add `Aggregate::try new` with validation checks [\#3286](https://github.com/apache/arrow-datafusion/pull/3286) ([andygrove](https://github.com/andygrove)) -- Fix SchemaError in FilterPushDown optimization with UNION ALL [\#3282](https://github.com/apache/arrow-datafusion/pull/3282) ([jonmmease](https://github.com/jonmmease)) -- Allow sorting by aggregated groups [\#3280](https://github.com/apache/arrow-datafusion/pull/3280) ([isidentical](https://github.com/isidentical)) -- Add show external tables [\#3279](https://github.com/apache/arrow-datafusion/pull/3279) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([psvri](https://github.com/psvri)) -- Return from task execution if send fails as there is nothing more to do \(faster cancel / limit\) [\#3276](https://github.com/apache/arrow-datafusion/pull/3276) ([nvartolomei](https://github.com/nvartolomei)) -- Let prelude import all expression functions [\#3274](https://github.com/apache/arrow-datafusion/pull/3274) ([sadilet](https://github.com/sadilet)) -- Fix no schema when CSV is only header [\#3272](https://github.com/apache/arrow-datafusion/pull/3272) ([comphead](https://github.com/comphead)) -- support inlist for pre cast literal expression [\#3270](https://github.com/apache/arrow-datafusion/pull/3270) ([liukun4515](https://github.com/liukun4515)) -- implement `drop view` [\#3267](https://github.com/apache/arrow-datafusion/pull/3267) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([kmitchener](https://github.com/kmitchener)) -- Use `ExprRewriter` in `pre_cast_lit_in_comparison` [\#3260](https://github.com/apache/arrow-datafusion/pull/3260) ([andygrove](https://github.com/andygrove)) -- Add type coercion for UDFs in logical plan [\#3254](https://github.com/apache/arrow-datafusion/pull/3254) ([andygrove](https://github.com/andygrove)) -- Support "IS NOT TRUE/FALSE" syntax [\#3252](https://github.com/apache/arrow-datafusion/pull/3252) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([sarahyurick](https://github.com/sarahyurick)) -- Implement `IS UNKNOWN`/`IS NOT UNKNOWN` operators [\#3246](https://github.com/apache/arrow-datafusion/pull/3246) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([isidentical](https://github.com/isidentical)) -- support decimal data type for the optimizer rule of PreCastLitInComparisonExpressions [\#3245](https://github.com/apache/arrow-datafusion/pull/3245) ([liukun4515](https://github.com/liukun4515)) -- chore: update cranelifts to 0.87.0 [\#3243](https://github.com/apache/arrow-datafusion/pull/3243) ([yjshen](https://github.com/yjshen)) -- Moved nullif out of unary functions [\#3241](https://github.com/apache/arrow-datafusion/pull/3241) ([comphead](https://github.com/comphead)) -- MINOR: documentation updates [\#3239](https://github.com/apache/arrow-datafusion/pull/3239) ([kmitchener](https://github.com/kmitchener)) -- MINOR: Add bounds check to Column physical expression [\#3238](https://github.com/apache/arrow-datafusion/pull/3238) ([andygrove](https://github.com/andygrove)) -- CREATE VIEW should return empty dataframe [\#3237](https://github.com/apache/arrow-datafusion/pull/3237) ([kmitchener](https://github.com/kmitchener)) -- Support "IS TRUE/FALSE" syntax \(redo\) [\#3235](https://github.com/apache/arrow-datafusion/pull/3235) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([sarahyurick](https://github.com/sarahyurick)) -- Fix propagation of optimized predicates on nested projections [\#3228](https://github.com/apache/arrow-datafusion/pull/3228) ([isidentical](https://github.com/isidentical)) -- Add more trim test cases [\#3226](https://github.com/apache/arrow-datafusion/pull/3226) ([ayushdg](https://github.com/ayushdg)) -- Upgrade to arrow 21 [\#3225](https://github.com/apache/arrow-datafusion/pull/3225) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([avantgardnerio](https://github.com/avantgardnerio)) -- Add optimizer rule for type coercion \(binary operations only\) [\#3222](https://github.com/apache/arrow-datafusion/pull/3222) ([andygrove](https://github.com/andygrove)) -- \[Improve\] Use arrow::compute::sort in approx_percentile_cont [\#3219](https://github.com/apache/arrow-datafusion/pull/3219) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- \[minor\] fix bench aggregate_query_sql meta [\#3218](https://github.com/apache/arrow-datafusion/pull/3218) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- minor: refactor simplify negate [\#3213](https://github.com/apache/arrow-datafusion/pull/3213) ([jackwener](https://github.com/jackwener)) -- MINOR: update cargo.lock and rust-version for datafusion-cli [\#3212](https://github.com/apache/arrow-datafusion/pull/3212) ([kmitchener](https://github.com/kmitchener)) -- fix issue with now\(\) returning same value across statements [\#3210](https://github.com/apache/arrow-datafusion/pull/3210) ([kmitchener](https://github.com/kmitchener)) -- Add support for inline column alias in CREATE VIEW [\#3209](https://github.com/apache/arrow-datafusion/pull/3209) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([DaltonModlin](https://github.com/DaltonModlin)) -- Add SQL query planner support for `DISTRIBUTE BY` [\#3208](https://github.com/apache/arrow-datafusion/pull/3208) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) -- minor: remove test code that's in the arrow library now [\#3206](https://github.com/apache/arrow-datafusion/pull/3206) ([kmitchener](https://github.com/kmitchener)) -- Use .get\(\) to avoid panic [\#3201](https://github.com/apache/arrow-datafusion/pull/3201) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([jklamer](https://github.com/jklamer)) -- \[Minor\] Reduce code duplication creating ScalarValue::List [\#3197](https://github.com/apache/arrow-datafusion/pull/3197) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) -- Clean up CI workflows by removing "matrix" strategy, simplifying names [\#3196](https://github.com/apache/arrow-datafusion/pull/3196) ([alamb](https://github.com/alamb)) -- optimizer: add framework for the rule of pre-add cast to the literal in comparison binary [\#3185](https://github.com/apache/arrow-datafusion/pull/3185) ([liukun4515](https://github.com/liukun4515)) -- Fix clippy [\#3182](https://github.com/apache/arrow-datafusion/pull/3182) ([alamb](https://github.com/alamb)) -- MINOR: Add notes on writing release blog posts [\#3179](https://github.com/apache/arrow-datafusion/pull/3179) ([andygrove](https://github.com/andygrove)) -- add min/max for time [\#3178](https://github.com/apache/arrow-datafusion/pull/3178) ([waitingkuo](https://github.com/waitingkuo)) -- Recursively apply remove filter rule if filter is a true scalar value [\#3175](https://github.com/apache/arrow-datafusion/pull/3175) ([byteink](https://github.com/byteink)) -- Update `ahash` requirement from 0.7 to 0.8 [\#3161](https://github.com/apache/arrow-datafusion/pull/3161) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) -- Support number of centroids in approx_percentile_cont [\#3146](https://github.com/apache/arrow-datafusion/pull/3146) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- Introduce `\i` command to execute from a file [\#3136](https://github.com/apache/arrow-datafusion/pull/3136) ([turbo1912](https://github.com/turbo1912)) -- impl binary ops between binary arrays and scalars [\#3124](https://github.com/apache/arrow-datafusion/pull/3124) ([ozgrakkurt](https://github.com/ozgrakkurt)) +- minor: fix some typo. [\#3453](https://github.com/apache/datafusion/pull/3453) ([jackwener](https://github.com/jackwener)) +- Update criterion requirement from 0.3 to 0.4 [\#3452](https://github.com/apache/datafusion/pull/3452) ([dependabot[bot]](https://github.com/apps/dependabot)) +- Update object_store requirement from 0.4.0 to 0.5.0 [\#3451](https://github.com/apache/datafusion/pull/3451) ([dependabot[bot]](https://github.com/apps/dependabot)) +- add `cast` support inside `values` [\#3447](https://github.com/apache/datafusion/pull/3447) [[sql](https://github.com/apache/datafusion/labels/sql)] ([kmitchener](https://github.com/kmitchener)) +- Use hash repartitioning for aggregates on dictionaries [\#3445](https://github.com/apache/datafusion/pull/3445) ([isidentical](https://github.com/isidentical)) +- Review `unwrap` and `panic` from the `aggregate` directory of `datafusion-physical-expr` [\#3443](https://github.com/apache/datafusion/pull/3443) ([iajoiner](https://github.com/iajoiner)) +- MINOR: Implement protobuf serde for all binary operators [\#3441](https://github.com/apache/datafusion/pull/3441) ([andygrove](https://github.com/andygrove)) +- MINOR: Add accessor methods to DateTimeIntervalExpr [\#3440](https://github.com/apache/datafusion/pull/3440) ([andygrove](https://github.com/andygrove)) +- update TPCH-mimicking tests to Decimal data type from Float, matching the benchmark [\#3438](https://github.com/apache/datafusion/pull/3438) ([kmitchener](https://github.com/kmitchener)) +- Include Bitwise operators in the documentation [\#3436](https://github.com/apache/datafusion/pull/3436) ([askoa](https://github.com/askoa)) +- minor: make sql number parsing slightly more efficient + functional [\#3432](https://github.com/apache/datafusion/pull/3432) [[sql](https://github.com/apache/datafusion/labels/sql)] ([alamb](https://github.com/alamb)) +- Implement bitwise XOR operator \(`#`\) [\#3430](https://github.com/apache/datafusion/pull/3430) [[sql](https://github.com/apache/datafusion/labels/sql)] ([askoa](https://github.com/askoa)) +- Replace From trait with TryFrom trait for datafusion-proto crate \#3401 [\#3429](https://github.com/apache/datafusion/pull/3429) ([comphead](https://github.com/comphead)) +- Tests showing user defined aggregate returning a struct [\#3425](https://github.com/apache/datafusion/pull/3425) ([alamb](https://github.com/alamb)) +- MINOR: update optimizer rule names to be consistent style as the rest [\#3415](https://github.com/apache/datafusion/pull/3415) ([kmitchener](https://github.com/kmitchener)) +- Support date32 and date 64 in inlist node [\#3413](https://github.com/apache/datafusion/pull/3413) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- Update sqlparser requirement from 0.22 to 0.23 [\#3411](https://github.com/apache/datafusion/pull/3411) [[sql](https://github.com/apache/datafusion/labels/sql)] ([dependabot[bot]](https://github.com/apps/dependabot)) +- simplify the `between` expr during logical plan optimization [\#3404](https://github.com/apache/datafusion/pull/3404) ([kmitchener](https://github.com/kmitchener)) +- MINOR: Improve optimizer error [\#3403](https://github.com/apache/datafusion/pull/3403) ([andygrove](https://github.com/andygrove)) +- Review panics in the sql crate [\#3397](https://github.com/apache/datafusion/pull/3397) [[sql](https://github.com/apache/datafusion/labels/sql)] ([HaoYang670](https://github.com/HaoYang670)) +- changed TPC-H benchmark to use Decimal types [\#3393](https://github.com/apache/datafusion/pull/3393) ([kmitchener](https://github.com/kmitchener)) +- minor: remove redundant code. [\#3389](https://github.com/apache/datafusion/pull/3389) ([jackwener](https://github.com/jackwener)) +- Add dictionary cases to merge bench [\#3384](https://github.com/apache/datafusion/pull/3384) ([tustvold](https://github.com/tustvold)) +- Implement Eq trait for Expr and nested types [\#3381](https://github.com/apache/datafusion/pull/3381) ([jdye64](https://github.com/jdye64)) +- Minor: Improvements to type coercion rule [\#3379](https://github.com/apache/datafusion/pull/3379) ([alamb](https://github.com/alamb)) +- MINOR: Note that most communication happens on github [\#3375](https://github.com/apache/datafusion/pull/3375) ([alamb](https://github.com/alamb)) +- minor fix: clean data type for negative operation [\#3370](https://github.com/apache/datafusion/pull/3370) ([liukun4515](https://github.com/liukun4515)) +- Fix code generation for json feature [\#3367](https://github.com/apache/datafusion/pull/3367) ([avantgardnerio](https://github.com/avantgardnerio)) +- Review use of panic in datafusion-proto crate [\#3365](https://github.com/apache/datafusion/pull/3365) ([comphead](https://github.com/comphead)) +- Upgrade to arrow 22 [\#3363](https://github.com/apache/datafusion/pull/3363) [[sql](https://github.com/apache/datafusion/labels/sql)] ([avantgardnerio](https://github.com/avantgardnerio)) +- return empty dataframe on create table, remove a duplicate optimize call [\#3361](https://github.com/apache/datafusion/pull/3361) ([kmitchener](https://github.com/kmitchener)) +- Add SQL support for `tinyint` , `smallint`, and `unsigned int variants` [\#3359](https://github.com/apache/datafusion/pull/3359) [[sql](https://github.com/apache/datafusion/labels/sql)] ([kmitchener](https://github.com/kmitchener)) +- Minor: add hint in README of example [\#3358](https://github.com/apache/datafusion/pull/3358) ([jackwener](https://github.com/jackwener)) +- Collect to `HashSet` directly in `in_list` [\#3356](https://github.com/apache/datafusion/pull/3356) ([HaoYang670](https://github.com/HaoYang670)) +- MINOR: Add comments about rewrite_disjunctive_predicate [\#3351](https://github.com/apache/datafusion/pull/3351) ([alamb](https://github.com/alamb)) +- \[MINOR\] Add debug logging to plan teardown [\#3350](https://github.com/apache/datafusion/pull/3350) ([alamb](https://github.com/alamb)) +- MINOR: add df.to_unoptimized_plan\(\) to docs, remove erroneous comment [\#3348](https://github.com/apache/datafusion/pull/3348) ([kmitchener](https://github.com/kmitchener)) +- Replace `unwrap` in `convert_to_ordered_float` and add `downcast_value` [\#3347](https://github.com/apache/datafusion/pull/3347) ([iajoiner](https://github.com/iajoiner)) +- Remove panics from `common_subexpr_eliminate` [\#3346](https://github.com/apache/datafusion/pull/3346) ([andygrove](https://github.com/andygrove)) +- Remove Result.unwrap from single_distinct_to_groupby [\#3345](https://github.com/apache/datafusion/pull/3345) ([andygrove](https://github.com/andygrove)) +- Add to_unoptimized_plan [\#3344](https://github.com/apache/datafusion/pull/3344) ([iajoiner](https://github.com/iajoiner)) +- Remove panics from simplify_expressions optimizer rule [\#3343](https://github.com/apache/datafusion/pull/3343) ([andygrove](https://github.com/andygrove)) +- Remove `unreachable!` from filter push down rule [\#3342](https://github.com/apache/datafusion/pull/3342) ([andygrove](https://github.com/andygrove)) +- Replace panic in `datafusion-expr` crate [\#3341](https://github.com/apache/datafusion/pull/3341) ([iajoiner](https://github.com/iajoiner)) +- Re-implement ExprIdentifierVisitor::desc_expr to use Expr::Display [\#3339](https://github.com/apache/datafusion/pull/3339) ([andygrove](https://github.com/andygrove)) +- Fix the test`timestamp_add_interval_months` [\#3337](https://github.com/apache/datafusion/pull/3337) ([HaoYang670](https://github.com/HaoYang670)) +- Bump lz4-sys from 1.9.3 to 1.9.4 in /datafusion-cli [\#3335](https://github.com/apache/datafusion/pull/3335) ([dependabot[bot]](https://github.com/apps/dependabot)) +- Make binary operator formatting consistent between logical and physical plans [\#3331](https://github.com/apache/datafusion/pull/3331) ([andygrove](https://github.com/andygrove)) +- Fix build: Ignore failing test [\#3329](https://github.com/apache/datafusion/pull/3329) ([andygrove](https://github.com/andygrove)) +- Add `InList` support for binary type. [\#3324](https://github.com/apache/datafusion/pull/3324) ([HaoYang670](https://github.com/HaoYang670)) +- MINOR: add github action trigger [\#3323](https://github.com/apache/datafusion/pull/3323) ([waynexia](https://github.com/waynexia)) +- add explain sql test for optimizer rule PreCastLitInComparisonExpressions [\#3320](https://github.com/apache/datafusion/pull/3320) ([liukun4515](https://github.com/liukun4515)) +- Custom / Dynamic table provider factories [\#3311](https://github.com/apache/datafusion/pull/3311) [[sql](https://github.com/apache/datafusion/labels/sql)] ([avantgardnerio](https://github.com/avantgardnerio)) +- fix: alias group_by exprs in single_distinct_to_groupby optimizer [\#3305](https://github.com/apache/datafusion/pull/3305) ([waynexia](https://github.com/waynexia)) +- Add support for serializing null scalar values [\#3303](https://github.com/apache/datafusion/pull/3303) ([andygrove](https://github.com/andygrove)) +- Finish integrating `Expr::Is[Not]True` and similar expressions [\#3301](https://github.com/apache/datafusion/pull/3301) [[sql](https://github.com/apache/datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) +- MINOR: Remove `unwrap` calls from `single_distinct_to_groupby optimizer` rule [\#3299](https://github.com/apache/datafusion/pull/3299) ([andygrove](https://github.com/andygrove)) +- docs: update the Python library repository [\#3297](https://github.com/apache/datafusion/pull/3297) ([haoxins](https://github.com/haoxins)) +- fix: speed up `ConfigOptions` creation [\#3296](https://github.com/apache/datafusion/pull/3296) ([crepererum](https://github.com/crepererum)) +- Execute LogicalPlans after building for TPCH Benchmarks [\#3290](https://github.com/apache/datafusion/pull/3290) ([DaltonModlin](https://github.com/DaltonModlin)) +- support for non-correlated subqueries [\#3287](https://github.com/apache/datafusion/pull/3287) ([kmitchener](https://github.com/kmitchener)) +- Add `Aggregate::try new` with validation checks [\#3286](https://github.com/apache/datafusion/pull/3286) ([andygrove](https://github.com/andygrove)) +- Fix SchemaError in FilterPushDown optimization with UNION ALL [\#3282](https://github.com/apache/datafusion/pull/3282) ([jonmmease](https://github.com/jonmmease)) +- Allow sorting by aggregated groups [\#3280](https://github.com/apache/datafusion/pull/3280) ([isidentical](https://github.com/isidentical)) +- Add show external tables [\#3279](https://github.com/apache/datafusion/pull/3279) [[sql](https://github.com/apache/datafusion/labels/sql)] ([psvri](https://github.com/psvri)) +- Return from task execution if send fails as there is nothing more to do \(faster cancel / limit\) [\#3276](https://github.com/apache/datafusion/pull/3276) ([nvartolomei](https://github.com/nvartolomei)) +- Let prelude import all expression functions [\#3274](https://github.com/apache/datafusion/pull/3274) ([sadilet](https://github.com/sadilet)) +- Fix no schema when CSV is only header [\#3272](https://github.com/apache/datafusion/pull/3272) ([comphead](https://github.com/comphead)) +- support inlist for pre cast literal expression [\#3270](https://github.com/apache/datafusion/pull/3270) ([liukun4515](https://github.com/liukun4515)) +- implement `drop view` [\#3267](https://github.com/apache/datafusion/pull/3267) [[sql](https://github.com/apache/datafusion/labels/sql)] ([kmitchener](https://github.com/kmitchener)) +- Use `ExprRewriter` in `pre_cast_lit_in_comparison` [\#3260](https://github.com/apache/datafusion/pull/3260) ([andygrove](https://github.com/andygrove)) +- Add type coercion for UDFs in logical plan [\#3254](https://github.com/apache/datafusion/pull/3254) ([andygrove](https://github.com/andygrove)) +- Support "IS NOT TRUE/FALSE" syntax [\#3252](https://github.com/apache/datafusion/pull/3252) [[sql](https://github.com/apache/datafusion/labels/sql)] ([sarahyurick](https://github.com/sarahyurick)) +- Implement `IS UNKNOWN`/`IS NOT UNKNOWN` operators [\#3246](https://github.com/apache/datafusion/pull/3246) [[sql](https://github.com/apache/datafusion/labels/sql)] ([isidentical](https://github.com/isidentical)) +- support decimal data type for the optimizer rule of PreCastLitInComparisonExpressions [\#3245](https://github.com/apache/datafusion/pull/3245) ([liukun4515](https://github.com/liukun4515)) +- chore: update cranelifts to 0.87.0 [\#3243](https://github.com/apache/datafusion/pull/3243) ([yjshen](https://github.com/yjshen)) +- Moved nullif out of unary functions [\#3241](https://github.com/apache/datafusion/pull/3241) ([comphead](https://github.com/comphead)) +- MINOR: documentation updates [\#3239](https://github.com/apache/datafusion/pull/3239) ([kmitchener](https://github.com/kmitchener)) +- MINOR: Add bounds check to Column physical expression [\#3238](https://github.com/apache/datafusion/pull/3238) ([andygrove](https://github.com/andygrove)) +- CREATE VIEW should return empty dataframe [\#3237](https://github.com/apache/datafusion/pull/3237) ([kmitchener](https://github.com/kmitchener)) +- Support "IS TRUE/FALSE" syntax \(redo\) [\#3235](https://github.com/apache/datafusion/pull/3235) [[sql](https://github.com/apache/datafusion/labels/sql)] ([sarahyurick](https://github.com/sarahyurick)) +- Fix propagation of optimized predicates on nested projections [\#3228](https://github.com/apache/datafusion/pull/3228) ([isidentical](https://github.com/isidentical)) +- Add more trim test cases [\#3226](https://github.com/apache/datafusion/pull/3226) ([ayushdg](https://github.com/ayushdg)) +- Upgrade to arrow 21 [\#3225](https://github.com/apache/datafusion/pull/3225) [[sql](https://github.com/apache/datafusion/labels/sql)] ([avantgardnerio](https://github.com/avantgardnerio)) +- Add optimizer rule for type coercion \(binary operations only\) [\#3222](https://github.com/apache/datafusion/pull/3222) ([andygrove](https://github.com/andygrove)) +- \[Improve\] Use arrow::compute::sort in approx_percentile_cont [\#3219](https://github.com/apache/datafusion/pull/3219) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- \[minor\] fix bench aggregate_query_sql meta [\#3218](https://github.com/apache/datafusion/pull/3218) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- minor: refactor simplify negate [\#3213](https://github.com/apache/datafusion/pull/3213) ([jackwener](https://github.com/jackwener)) +- MINOR: update cargo.lock and rust-version for datafusion-cli [\#3212](https://github.com/apache/datafusion/pull/3212) ([kmitchener](https://github.com/kmitchener)) +- fix issue with now\(\) returning same value across statements [\#3210](https://github.com/apache/datafusion/pull/3210) ([kmitchener](https://github.com/kmitchener)) +- Add support for inline column alias in CREATE VIEW [\#3209](https://github.com/apache/datafusion/pull/3209) [[sql](https://github.com/apache/datafusion/labels/sql)] ([DaltonModlin](https://github.com/DaltonModlin)) +- Add SQL query planner support for `DISTRIBUTE BY` [\#3208](https://github.com/apache/datafusion/pull/3208) [[sql](https://github.com/apache/datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) +- minor: remove test code that's in the arrow library now [\#3206](https://github.com/apache/datafusion/pull/3206) ([kmitchener](https://github.com/kmitchener)) +- Use .get\(\) to avoid panic [\#3201](https://github.com/apache/datafusion/pull/3201) [[sql](https://github.com/apache/datafusion/labels/sql)] ([jklamer](https://github.com/jklamer)) +- \[Minor\] Reduce code duplication creating ScalarValue::List [\#3197](https://github.com/apache/datafusion/pull/3197) [[sql](https://github.com/apache/datafusion/labels/sql)] ([alamb](https://github.com/alamb)) +- Clean up CI workflows by removing "matrix" strategy, simplifying names [\#3196](https://github.com/apache/datafusion/pull/3196) ([alamb](https://github.com/alamb)) +- optimizer: add framework for the rule of pre-add cast to the literal in comparison binary [\#3185](https://github.com/apache/datafusion/pull/3185) ([liukun4515](https://github.com/liukun4515)) +- Fix clippy [\#3182](https://github.com/apache/datafusion/pull/3182) ([alamb](https://github.com/alamb)) +- MINOR: Add notes on writing release blog posts [\#3179](https://github.com/apache/datafusion/pull/3179) ([andygrove](https://github.com/andygrove)) +- add min/max for time [\#3178](https://github.com/apache/datafusion/pull/3178) ([waitingkuo](https://github.com/waitingkuo)) +- Recursively apply remove filter rule if filter is a true scalar value [\#3175](https://github.com/apache/datafusion/pull/3175) ([byteink](https://github.com/byteink)) +- Update `ahash` requirement from 0.7 to 0.8 [\#3161](https://github.com/apache/datafusion/pull/3161) [[sql](https://github.com/apache/datafusion/labels/sql)] ([alamb](https://github.com/alamb)) +- Support number of centroids in approx_percentile_cont [\#3146](https://github.com/apache/datafusion/pull/3146) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- Introduce `\i` command to execute from a file [\#3136](https://github.com/apache/datafusion/pull/3136) ([turbo1912](https://github.com/turbo1912)) +- impl binary ops between binary arrays and scalars [\#3124](https://github.com/apache/datafusion/pull/3124) ([ozgrakkurt](https://github.com/ozgrakkurt)) diff --git a/dev/changelog/13.0.0.md b/dev/changelog/13.0.0.md index 14b42a052ef9..412aad636e1c 100644 --- a/dev/changelog/13.0.0.md +++ b/dev/changelog/13.0.0.md @@ -17,217 +17,217 @@ under the License. --> -## [13.0.0](https://github.com/apache/arrow-datafusion/tree/13.0.0) (2022-10-06) +## [13.0.0](https://github.com/apache/datafusion/tree/13.0.0) (2022-10-06) -[Full Changelog](https://github.com/apache/arrow-datafusion/compare/12.0.0...13.0.0) +[Full Changelog](https://github.com/apache/datafusion/compare/12.0.0...13.0.0) **Breaking changes:** -- Make ObjectStoreProvider fallible \(return `Result` rather than `Option`\) [\#3584](https://github.com/apache/arrow-datafusion/pull/3584) ([tustvold](https://github.com/tustvold)) -- Make `OptimizerConfig` a builder style API [\#3525](https://github.com/apache/arrow-datafusion/pull/3525) ([alamb](https://github.com/alamb)) +- Make ObjectStoreProvider fallible \(return `Result` rather than `Option`\) [\#3584](https://github.com/apache/datafusion/pull/3584) ([tustvold](https://github.com/tustvold)) +- Make `OptimizerConfig` a builder style API [\#3525](https://github.com/apache/datafusion/pull/3525) ([alamb](https://github.com/alamb)) **Implemented enhancements:** -- remove `type coercion` for ScalarUDF in the physical phase [\#3734](https://github.com/apache/arrow-datafusion/issues/3734) -- Allow with statements to specify their columns alongside their expression names [\#3716](https://github.com/apache/arrow-datafusion/issues/3716) -- Support SQLDataType::Timestamp\(TimezoneInfo\) [\#3693](https://github.com/apache/arrow-datafusion/issues/3693) -- support `type coercion` for case when expr [\#3673](https://github.com/apache/arrow-datafusion/issues/3673) -- Add simplification rules for the `Modulo` operator [\#3664](https://github.com/apache/arrow-datafusion/issues/3664) -- Add TIMESTAMPTZ [\#3659](https://github.com/apache/arrow-datafusion/issues/3659) -- Simplify `A * 0` and `A * null`. [\#3626](https://github.com/apache/arrow-datafusion/issues/3626) -- change rule of `PreCastLitInComparisonExpressions` to unwrap cast rule after \#3582 [\#3622](https://github.com/apache/arrow-datafusion/issues/3622) -- Optimize regex_replace with a known pattern / replacement [\#3613](https://github.com/apache/arrow-datafusion/issues/3613) -- Simplify `CONCAT_WS(NULL, ..)` to `NULL` [\#3607](https://github.com/apache/arrow-datafusion/issues/3607) -- Add OctoSQL to list of systems powered by DataFusion [\#3605](https://github.com/apache/arrow-datafusion/issues/3605) -- Prevent over-allocation \(and spills\) on TopK queries [\#3596](https://github.com/apache/arrow-datafusion/issues/3596) -- Allow ObjectStoreProvider to return None \(return Result\ rather than Result\) [\#3594](https://github.com/apache/arrow-datafusion/issues/3594) -- simplify between expr should consider the data type [\#3587](https://github.com/apache/arrow-datafusion/issues/3587) -- make type coercion simple and remove the evaluate logic [\#3585](https://github.com/apache/arrow-datafusion/issues/3585) -- ReduceOuterJoin optimizer support `cast or try_cast` expr. [\#3565](https://github.com/apache/arrow-datafusion/issues/3565) -- Support type coercion for subquery [\#3557](https://github.com/apache/arrow-datafusion/issues/3557) -- Make `ParquetScanOptions` public and expose a reference to the scan options from `ParquetExec` [\#3550](https://github.com/apache/arrow-datafusion/issues/3550) -- Use `fetch` limit in `get_sorted_iter` [\#3544](https://github.com/apache/arrow-datafusion/issues/3544) -- Push limit to sort [\#3528](https://github.com/apache/arrow-datafusion/issues/3528) -- Execute sorts in parallel when limit is used after sort [\#3526](https://github.com/apache/arrow-datafusion/issues/3526) -- Consolidate optimizer passes in optimizer module for better testing [\#3524](https://github.com/apache/arrow-datafusion/issues/3524) -- Support Top-K query optimization for `ORDER BY \ \[ASC [\#3515](https://github.com/apache/arrow-datafusion/issues/3515) -- support the type coercion for `like` `unlike` `istrue` `isfalse` `isunknown` [\#3509](https://github.com/apache/arrow-datafusion/issues/3509) -- Automate the pushing of releases to Homebrew [\#3506](https://github.com/apache/arrow-datafusion/issues/3506) -- Add extra DATE_PART units that are already supported in arrow-rs [\#3502](https://github.com/apache/arrow-datafusion/issues/3502) -- Release datafusion-cli 12.0.0 on Homebrew [\#3501](https://github.com/apache/arrow-datafusion/issues/3501) -- Make `from_proto_binary_op` public [\#3489](https://github.com/apache/arrow-datafusion/issues/3489) -- coercion between decimal and other types lacking, compared to other numeric types [\#3479](https://github.com/apache/arrow-datafusion/issues/3479) -- move type coercion for inlist from physical phase to logical phase [\#3468](https://github.com/apache/arrow-datafusion/issues/3468) -- Make `datafusion::physical_plan::file_format::file_strean::FileStream` public [\#3466](https://github.com/apache/arrow-datafusion/issues/3466) -- Support using offset index in `ParquetRecordBatchStream` when pushing down `RowFilter` [\#3456](https://github.com/apache/arrow-datafusion/issues/3456) -- Support timestamp data type in In_list node [\#3449](https://github.com/apache/arrow-datafusion/issues/3449) -- Evaluate expressions after type coercion [\#3431](https://github.com/apache/arrow-datafusion/issues/3431) -- Make a convenience function to register a single `RecordBatch` as a table from SessionContext [\#3426](https://github.com/apache/arrow-datafusion/issues/3426) -- add datafusion-cli support of external table locations that object_store supports [\#3424](https://github.com/apache/arrow-datafusion/issues/3424) -- pruning support cast/try_cast expr [\#3414](https://github.com/apache/arrow-datafusion/issues/3414) -- Add documentation on querying against files in object store such as S3 [\#3399](https://github.com/apache/arrow-datafusion/issues/3399) -- Remove type-coercion from physical planner [\#3388](https://github.com/apache/arrow-datafusion/issues/3388) -- support `Statement::ShowVariable` to show session configs [\#3364](https://github.com/apache/arrow-datafusion/issues/3364) -- Support `RowFilter` in `ParquetExec` [\#3360](https://github.com/apache/arrow-datafusion/issues/3360) -- Apply `TypeCoercion` rule before `FilterPushDown` [\#3289](https://github.com/apache/arrow-datafusion/issues/3289) -- Add support for `get` / `show` timezone [\#3255](https://github.com/apache/arrow-datafusion/issues/3255) -- Consider adding DataFusion to ClickBench benchmarks [\#2902](https://github.com/apache/arrow-datafusion/issues/2902) -- `filter_push_down` panics on semi/anti join with join filters [\#2888](https://github.com/apache/arrow-datafusion/issues/2888) -- Migrate the `cross join -> inner join optimization` from the planner to the optimizer [\#2859](https://github.com/apache/arrow-datafusion/issues/2859) -- ObjectStore write support [\#2185](https://github.com/apache/arrow-datafusion/issues/2185) -- DataFusion should scan Parquet statistics once per query [\#871](https://github.com/apache/arrow-datafusion/issues/871) -- Extend & generalize constant folding / evaluation in logical optimizer [\#237](https://github.com/apache/arrow-datafusion/issues/237) +- remove `type coercion` for ScalarUDF in the physical phase [\#3734](https://github.com/apache/datafusion/issues/3734) +- Allow with statements to specify their columns alongside their expression names [\#3716](https://github.com/apache/datafusion/issues/3716) +- Support SQLDataType::Timestamp\(TimezoneInfo\) [\#3693](https://github.com/apache/datafusion/issues/3693) +- support `type coercion` for case when expr [\#3673](https://github.com/apache/datafusion/issues/3673) +- Add simplification rules for the `Modulo` operator [\#3664](https://github.com/apache/datafusion/issues/3664) +- Add TIMESTAMPTZ [\#3659](https://github.com/apache/datafusion/issues/3659) +- Simplify `A * 0` and `A * null`. [\#3626](https://github.com/apache/datafusion/issues/3626) +- change rule of `PreCastLitInComparisonExpressions` to unwrap cast rule after \#3582 [\#3622](https://github.com/apache/datafusion/issues/3622) +- Optimize regex_replace with a known pattern / replacement [\#3613](https://github.com/apache/datafusion/issues/3613) +- Simplify `CONCAT_WS(NULL, ..)` to `NULL` [\#3607](https://github.com/apache/datafusion/issues/3607) +- Add OctoSQL to list of systems powered by DataFusion [\#3605](https://github.com/apache/datafusion/issues/3605) +- Prevent over-allocation \(and spills\) on TopK queries [\#3596](https://github.com/apache/datafusion/issues/3596) +- Allow ObjectStoreProvider to return None \(return Result\ rather than Result\) [\#3594](https://github.com/apache/datafusion/issues/3594) +- simplify between expr should consider the data type [\#3587](https://github.com/apache/datafusion/issues/3587) +- make type coercion simple and remove the evaluate logic [\#3585](https://github.com/apache/datafusion/issues/3585) +- ReduceOuterJoin optimizer support `cast or try_cast` expr. [\#3565](https://github.com/apache/datafusion/issues/3565) +- Support type coercion for subquery [\#3557](https://github.com/apache/datafusion/issues/3557) +- Make `ParquetScanOptions` public and expose a reference to the scan options from `ParquetExec` [\#3550](https://github.com/apache/datafusion/issues/3550) +- Use `fetch` limit in `get_sorted_iter` [\#3544](https://github.com/apache/datafusion/issues/3544) +- Push limit to sort [\#3528](https://github.com/apache/datafusion/issues/3528) +- Execute sorts in parallel when limit is used after sort [\#3526](https://github.com/apache/datafusion/issues/3526) +- Consolidate optimizer passes in optimizer module for better testing [\#3524](https://github.com/apache/datafusion/issues/3524) +- Support Top-K query optimization for `ORDER BY \ \[ASC [\#3515](https://github.com/apache/datafusion/issues/3515) +- support the type coercion for `like` `unlike` `istrue` `isfalse` `isunknown` [\#3509](https://github.com/apache/datafusion/issues/3509) +- Automate the pushing of releases to Homebrew [\#3506](https://github.com/apache/datafusion/issues/3506) +- Add extra DATE_PART units that are already supported in arrow-rs [\#3502](https://github.com/apache/datafusion/issues/3502) +- Release datafusion-cli 12.0.0 on Homebrew [\#3501](https://github.com/apache/datafusion/issues/3501) +- Make `from_proto_binary_op` public [\#3489](https://github.com/apache/datafusion/issues/3489) +- coercion between decimal and other types lacking, compared to other numeric types [\#3479](https://github.com/apache/datafusion/issues/3479) +- move type coercion for inlist from physical phase to logical phase [\#3468](https://github.com/apache/datafusion/issues/3468) +- Make `datafusion::physical_plan::file_format::file_strean::FileStream` public [\#3466](https://github.com/apache/datafusion/issues/3466) +- Support using offset index in `ParquetRecordBatchStream` when pushing down `RowFilter` [\#3456](https://github.com/apache/datafusion/issues/3456) +- Support timestamp data type in In_list node [\#3449](https://github.com/apache/datafusion/issues/3449) +- Evaluate expressions after type coercion [\#3431](https://github.com/apache/datafusion/issues/3431) +- Make a convenience function to register a single `RecordBatch` as a table from SessionContext [\#3426](https://github.com/apache/datafusion/issues/3426) +- add datafusion-cli support of external table locations that object_store supports [\#3424](https://github.com/apache/datafusion/issues/3424) +- pruning support cast/try_cast expr [\#3414](https://github.com/apache/datafusion/issues/3414) +- Add documentation on querying against files in object store such as S3 [\#3399](https://github.com/apache/datafusion/issues/3399) +- Remove type-coercion from physical planner [\#3388](https://github.com/apache/datafusion/issues/3388) +- support `Statement::ShowVariable` to show session configs [\#3364](https://github.com/apache/datafusion/issues/3364) +- Support `RowFilter` in `ParquetExec` [\#3360](https://github.com/apache/datafusion/issues/3360) +- Apply `TypeCoercion` rule before `FilterPushDown` [\#3289](https://github.com/apache/datafusion/issues/3289) +- Add support for `get` / `show` timezone [\#3255](https://github.com/apache/datafusion/issues/3255) +- Consider adding DataFusion to ClickBench benchmarks [\#2902](https://github.com/apache/datafusion/issues/2902) +- `filter_push_down` panics on semi/anti join with join filters [\#2888](https://github.com/apache/datafusion/issues/2888) +- Migrate the `cross join -> inner join optimization` from the planner to the optimizer [\#2859](https://github.com/apache/datafusion/issues/2859) +- ObjectStore write support [\#2185](https://github.com/apache/datafusion/issues/2185) +- DataFusion should scan Parquet statistics once per query [\#871](https://github.com/apache/datafusion/issues/871) +- Extend & generalize constant folding / evaluation in logical optimizer [\#237](https://github.com/apache/datafusion/issues/237) **Fixed bugs:** -- `projection_push_down` produces invalid aggregate plans in some cases [\#3738](https://github.com/apache/arrow-datafusion/issues/3738) -- `Time With Time Zone` should raise error until `DataType::Time64` support tz [\#3715](https://github.com/apache/arrow-datafusion/issues/3715) -- SQL Planner doesn't distinguish normal CTEs from the recursive ones. [\#3713](https://github.com/apache/arrow-datafusion/issues/3713) -- Fix inconsistency between column name formats [\#3711](https://github.com/apache/arrow-datafusion/issues/3711) -- Optimizer rule 'projection_push_down' failed due to unexpected error: Error during planning: Aggregate schema has wrong number of fields. Expected 3 got 8 [\#3704](https://github.com/apache/arrow-datafusion/issues/3704) -- Optimizer regressions in `unwrap_cast_in_comparison` [\#3690](https://github.com/apache/arrow-datafusion/issues/3690) -- Internal error when evaluating a predicate = "The type of Dictionary\(Int16, Utf8\) = Int64 of binary physical should be same" [\#3685](https://github.com/apache/arrow-datafusion/issues/3685) -- Specialized regexp_replace should early-abort when the input arrays are empty [\#3647](https://github.com/apache/arrow-datafusion/issues/3647) -- Internal error: Failed to coerce types Decimal128\(10, 2\) and Boolean in BETWEEN expression [\#3646](https://github.com/apache/arrow-datafusion/issues/3646) -- Internal error: Failed to coerce types Decimal128\(10, 2\) and Boolean in BETWEEN expression [\#3645](https://github.com/apache/arrow-datafusion/issues/3645) -- Type coercion error: The type of Boolean AND Decimal128\(10, 2\) of binary physical should be same [\#3644](https://github.com/apache/arrow-datafusion/issues/3644) -- LEFT JOIN not working as expected, error message is confusing [\#3639](https://github.com/apache/arrow-datafusion/issues/3639) -- `INTERSECT` and `EXCEPT` don't return an error when 2 sets have the different number of columns [\#3632](https://github.com/apache/arrow-datafusion/issues/3632) -- The datafusion-cli panics when `union` 2 table with different number of columns. [\#3630](https://github.com/apache/arrow-datafusion/issues/3630) -- The expression `col(a) / null` is not optimized. [\#3624](https://github.com/apache/arrow-datafusion/issues/3624) -- `s3_build_error` test may fail in some environments [\#3601](https://github.com/apache/arrow-datafusion/issues/3601) -- New clippy errors appears to be break the CI on the master [\#3597](https://github.com/apache/arrow-datafusion/issues/3597) -- `StringConcat` gives inconsistent result with `concat` when containing `null` [\#3569](https://github.com/apache/arrow-datafusion/issues/3569) -- simplify_expressions don't support different data type for binary [\#3556](https://github.com/apache/arrow-datafusion/issues/3556) -- Broken logical plan serialization for aggregation queries [\#3555](https://github.com/apache/arrow-datafusion/issues/3555) -- Aggregate filters do not get pushed down to table scan [\#3546](https://github.com/apache/arrow-datafusion/issues/3546) -- `docs.rs` cannot build `datafusion-proto` crate [\#3538](https://github.com/apache/arrow-datafusion/issues/3538) -- DataFusion serialization doesn't handle `ScalarValue::Dictionary, Binary, LargeBinary, Time64, IntervalMonthDayNano, Struct` [\#3531](https://github.com/apache/arrow-datafusion/issues/3531) -- What should be returned when trying to get a config in invalid format? [\#3505](https://github.com/apache/arrow-datafusion/issues/3505) -- Dividing decimal type gives wrong error: "170141183460469231731687303715884105727 is too large to store in a Decimal128 [\#3498](https://github.com/apache/arrow-datafusion/issues/3498) -- Add BitwiseXor in function `from_proto_binary_op` [\#3495](https://github.com/apache/arrow-datafusion/issues/3495) -- comparison operations with a scalar null and decimal array panics [\#3487](https://github.com/apache/arrow-datafusion/issues/3487) -- Union columns with different types [\#3467](https://github.com/apache/arrow-datafusion/issues/3467) -- Can't get the right logical plan after optimizer [\#3421](https://github.com/apache/arrow-datafusion/issues/3421) -- Fix conflict between simplify_expression rule and CAST expressions [\#3409](https://github.com/apache/arrow-datafusion/issues/3409) -- Empty array giving error [\#2439](https://github.com/apache/arrow-datafusion/issues/2439) -- Internal error: Unsupported data type in hasher: FixedSizeBinary\(16\) [\#1516](https://github.com/apache/arrow-datafusion/issues/1516) -- Predicates on to_timestamp do not work as expected with "naive" timestamp strings [\#765](https://github.com/apache/arrow-datafusion/issues/765) -- Address performance/execution plan of TPCH query 19 [\#78](https://github.com/apache/arrow-datafusion/issues/78) -- Bug fix: expr_visitor was not visiting aggregate filter expressions [\#3548](https://github.com/apache/arrow-datafusion/pull/3548) ([andygrove](https://github.com/andygrove)) +- `projection_push_down` produces invalid aggregate plans in some cases [\#3738](https://github.com/apache/datafusion/issues/3738) +- `Time With Time Zone` should raise error until `DataType::Time64` support tz [\#3715](https://github.com/apache/datafusion/issues/3715) +- SQL Planner doesn't distinguish normal CTEs from the recursive ones. [\#3713](https://github.com/apache/datafusion/issues/3713) +- Fix inconsistency between column name formats [\#3711](https://github.com/apache/datafusion/issues/3711) +- Optimizer rule 'projection_push_down' failed due to unexpected error: Error during planning: Aggregate schema has wrong number of fields. Expected 3 got 8 [\#3704](https://github.com/apache/datafusion/issues/3704) +- Optimizer regressions in `unwrap_cast_in_comparison` [\#3690](https://github.com/apache/datafusion/issues/3690) +- Internal error when evaluating a predicate = "The type of Dictionary\(Int16, Utf8\) = Int64 of binary physical should be same" [\#3685](https://github.com/apache/datafusion/issues/3685) +- Specialized regexp_replace should early-abort when the input arrays are empty [\#3647](https://github.com/apache/datafusion/issues/3647) +- Internal error: Failed to coerce types Decimal128\(10, 2\) and Boolean in BETWEEN expression [\#3646](https://github.com/apache/datafusion/issues/3646) +- Internal error: Failed to coerce types Decimal128\(10, 2\) and Boolean in BETWEEN expression [\#3645](https://github.com/apache/datafusion/issues/3645) +- Type coercion error: The type of Boolean AND Decimal128\(10, 2\) of binary physical should be same [\#3644](https://github.com/apache/datafusion/issues/3644) +- LEFT JOIN not working as expected, error message is confusing [\#3639](https://github.com/apache/datafusion/issues/3639) +- `INTERSECT` and `EXCEPT` don't return an error when 2 sets have the different number of columns [\#3632](https://github.com/apache/datafusion/issues/3632) +- The datafusion-cli panics when `union` 2 table with different number of columns. [\#3630](https://github.com/apache/datafusion/issues/3630) +- The expression `col(a) / null` is not optimized. [\#3624](https://github.com/apache/datafusion/issues/3624) +- `s3_build_error` test may fail in some environments [\#3601](https://github.com/apache/datafusion/issues/3601) +- New clippy errors appears to be break the CI on the master [\#3597](https://github.com/apache/datafusion/issues/3597) +- `StringConcat` gives inconsistent result with `concat` when containing `null` [\#3569](https://github.com/apache/datafusion/issues/3569) +- simplify_expressions don't support different data type for binary [\#3556](https://github.com/apache/datafusion/issues/3556) +- Broken logical plan serialization for aggregation queries [\#3555](https://github.com/apache/datafusion/issues/3555) +- Aggregate filters do not get pushed down to table scan [\#3546](https://github.com/apache/datafusion/issues/3546) +- `docs.rs` cannot build `datafusion-proto` crate [\#3538](https://github.com/apache/datafusion/issues/3538) +- DataFusion serialization doesn't handle `ScalarValue::Dictionary, Binary, LargeBinary, Time64, IntervalMonthDayNano, Struct` [\#3531](https://github.com/apache/datafusion/issues/3531) +- What should be returned when trying to get a config in invalid format? [\#3505](https://github.com/apache/datafusion/issues/3505) +- Dividing decimal type gives wrong error: "170141183460469231731687303715884105727 is too large to store in a Decimal128 [\#3498](https://github.com/apache/datafusion/issues/3498) +- Add BitwiseXor in function `from_proto_binary_op` [\#3495](https://github.com/apache/datafusion/issues/3495) +- comparison operations with a scalar null and decimal array panics [\#3487](https://github.com/apache/datafusion/issues/3487) +- Union columns with different types [\#3467](https://github.com/apache/datafusion/issues/3467) +- Can't get the right logical plan after optimizer [\#3421](https://github.com/apache/datafusion/issues/3421) +- Fix conflict between simplify_expression rule and CAST expressions [\#3409](https://github.com/apache/datafusion/issues/3409) +- Empty array giving error [\#2439](https://github.com/apache/datafusion/issues/2439) +- Internal error: Unsupported data type in hasher: FixedSizeBinary\(16\) [\#1516](https://github.com/apache/datafusion/issues/1516) +- Predicates on to_timestamp do not work as expected with "naive" timestamp strings [\#765](https://github.com/apache/datafusion/issues/765) +- Address performance/execution plan of TPCH query 19 [\#78](https://github.com/apache/datafusion/issues/78) +- Bug fix: expr_visitor was not visiting aggregate filter expressions [\#3548](https://github.com/apache/datafusion/pull/3548) ([andygrove](https://github.com/andygrove)) **Documentation updates:** -- Publish 8.0.0 user guide [\#2558](https://github.com/apache/arrow-datafusion/issues/2558) -- MINOR: Add Dask SQL to list of projects powered by DataFusion [\#3581](https://github.com/apache/arrow-datafusion/pull/3581) ([andygrove](https://github.com/andygrove)) -- Add Parseable as Datafusion user [\#3471](https://github.com/apache/arrow-datafusion/pull/3471) ([nitisht](https://github.com/nitisht)) +- Publish 8.0.0 user guide [\#2558](https://github.com/apache/datafusion/issues/2558) +- MINOR: Add Dask SQL to list of projects powered by DataFusion [\#3581](https://github.com/apache/datafusion/pull/3581) ([andygrove](https://github.com/andygrove)) +- Add Parseable as Datafusion user [\#3471](https://github.com/apache/datafusion/pull/3471) ([nitisht](https://github.com/nitisht)) **Closed issues:** -- Upgrade to Arrow 24.0.0 [\#3689](https://github.com/apache/arrow-datafusion/issues/3689) -- what's the best practice to get a single value from arrow array? [\#3497](https://github.com/apache/arrow-datafusion/issues/3497) -- The data type of predicate in the row filter should be same in the binary expr [\#3469](https://github.com/apache/arrow-datafusion/issues/3469) -- Extend constant folding and parquet filtering support [\#188](https://github.com/apache/arrow-datafusion/issues/188) -- Add FORMAT to explain plan and an easy to visualize format [\#96](https://github.com/apache/arrow-datafusion/issues/96) +- Upgrade to Arrow 24.0.0 [\#3689](https://github.com/apache/datafusion/issues/3689) +- what's the best practice to get a single value from arrow array? [\#3497](https://github.com/apache/datafusion/issues/3497) +- The data type of predicate in the row filter should be same in the binary expr [\#3469](https://github.com/apache/datafusion/issues/3469) +- Extend constant folding and parquet filtering support [\#188](https://github.com/apache/datafusion/issues/188) +- Add FORMAT to explain plan and an easy to visualize format [\#96](https://github.com/apache/datafusion/issues/96) **Merged pull requests:** -- Build aggregate schema in Aggregate::try_new [\#3739](https://github.com/apache/arrow-datafusion/pull/3739) ([andygrove](https://github.com/andygrove)) -- delete type coercion for scalar udf in the physical phase [\#3735](https://github.com/apache/arrow-datafusion/pull/3735) ([liukun4515](https://github.com/liukun4515)) -- Consolidate coercion code in `datafusion_expr::type_coercion` and submodules [\#3728](https://github.com/apache/arrow-datafusion/pull/3728) ([alamb](https://github.com/alamb)) -- Skip filter push down on semi/anti joins [\#3723](https://github.com/apache/arrow-datafusion/pull/3723) ([andygrove](https://github.com/andygrove)) -- Raise `Unsupported SQL type` for `Time(WithTimeZone)` and `Time(Tz)` [\#3718](https://github.com/apache/arrow-datafusion/pull/3718) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([waitingkuo](https://github.com/waitingkuo)) -- Support column aliases specified by `WITH` statements [\#3717](https://github.com/apache/arrow-datafusion/pull/3717) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([isidentical](https://github.com/isidentical)) -- Reject recursive CTEs before processing the sub-expressions [\#3714](https://github.com/apache/arrow-datafusion/pull/3714) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([isidentical](https://github.com/isidentical)) -- Make column name consistent between Expr::name and Display/Debug [\#3712](https://github.com/apache/arrow-datafusion/pull/3712) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) -- Fix aggregate type coercion bug [\#3710](https://github.com/apache/arrow-datafusion/pull/3710) ([alamb](https://github.com/alamb)) -- MINOR: Add `Expr::canonical_name` and improve docs on `Expr::name` [\#3706](https://github.com/apache/arrow-datafusion/pull/3706) ([andygrove](https://github.com/andygrove)) -- Remove type coercions from ScalarValue and aggregation function code [\#3705](https://github.com/apache/arrow-datafusion/pull/3705) ([ozankabak](https://github.com/ozankabak)) -- `unwrap_cast_in_comparison`: fix bug which can find the field for the schema [\#3699](https://github.com/apache/arrow-datafusion/pull/3699) ([liukun4515](https://github.com/liukun4515)) -- bump sql-parser 0.25 [\#3698](https://github.com/apache/arrow-datafusion/pull/3698) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([xudong963](https://github.com/xudong963)) -- Move optimizer init to optimizer crate [\#3692](https://github.com/apache/arrow-datafusion/pull/3692) ([andygrove](https://github.com/andygrove)) -- Upgrade `arrow` `parquet` and `arrow-flight` to 24.0.0 [\#3691](https://github.com/apache/arrow-datafusion/pull/3691) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) -- Fix bug in dictionary coercion and allow better coercion [\#3688](https://github.com/apache/arrow-datafusion/pull/3688) ([alamb](https://github.com/alamb)) -- \[MINOR\] Improve docstrings in binary_rule.rs [\#3687](https://github.com/apache/arrow-datafusion/pull/3687) ([alamb](https://github.com/alamb)) -- \[MINOR\] Add `ScalarValue::new_utf8`, clean up creation of literals in casting tests [\#3680](https://github.com/apache/arrow-datafusion/pull/3680) ([alamb](https://github.com/alamb)) -- Disable code coverage until we figure out why it is broken [\#3679](https://github.com/apache/arrow-datafusion/pull/3679) ([alamb](https://github.com/alamb)) -- move `type coercion` for case when expr [\#3676](https://github.com/apache/arrow-datafusion/pull/3676) ([liukun4515](https://github.com/liukun4515)) -- Update sqlparser to 0.24.0 [\#3675](https://github.com/apache/arrow-datafusion/pull/3675) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) -- Fail if field lengths are not same in INTERSECT and EXPECT [\#3674](https://github.com/apache/arrow-datafusion/pull/3674) ([askoa](https://github.com/askoa)) -- Simplification Rules for Modulo Operator [\#3669](https://github.com/apache/arrow-datafusion/pull/3669) ([askoa](https://github.com/askoa)) -- change pre_cast_lit_in_comparison to unwrap_cast_in_comparison [\#3662](https://github.com/apache/arrow-datafusion/pull/3662) ([liukun4515](https://github.com/liukun4515)) -- restore optimization for `between` in simplify expression rule [\#3661](https://github.com/apache/arrow-datafusion/pull/3661) ([liukun4515](https://github.com/liukun4515)) -- add timestamptz [\#3660](https://github.com/apache/arrow-datafusion/pull/3660) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([waitingkuo](https://github.com/waitingkuo)) -- remove the type coercion in the simplify_expressions rule [\#3657](https://github.com/apache/arrow-datafusion/pull/3657) ([liukun4515](https://github.com/liukun4515)) -- Cache collected file statistics [\#3649](https://github.com/apache/arrow-datafusion/pull/3649) ([mateuszkj](https://github.com/mateuszkj)) -- make regexp_replace early abort with empty input [\#3648](https://github.com/apache/arrow-datafusion/pull/3648) ([isidentical](https://github.com/isidentical)) -- Check each query has same number of columns when building the UNION plan [\#3638](https://github.com/apache/arrow-datafusion/pull/3638) ([HaoYang670](https://github.com/HaoYang670)) -- move the `type coercion` to the beginning of the optimizer rule and support type coercion for subquery [\#3636](https://github.com/apache/arrow-datafusion/pull/3636) ([liukun4515](https://github.com/liukun4515)) -- Add documentation for querying S3 data with CLI [\#3631](https://github.com/apache/arrow-datafusion/pull/3631) ([andygrove](https://github.com/andygrove)) -- Simplify multiplication by `0` and by `null` [\#3627](https://github.com/apache/arrow-datafusion/pull/3627) ([HaoYang670](https://github.com/HaoYang670)) -- Simplify null division. [\#3625](https://github.com/apache/arrow-datafusion/pull/3625) ([HaoYang670](https://github.com/HaoYang670)) -- support cast/try_cast expr in reduceOuterJoin [\#3621](https://github.com/apache/arrow-datafusion/pull/3621) ([AssHero](https://github.com/AssHero)) -- MINOR: fix TPC-H conversion function to not miss a row of data [\#3620](https://github.com/apache/arrow-datafusion/pull/3620) ([kmitchener](https://github.com/kmitchener)) -- Document ObjectStoreProvider [\#3619](https://github.com/apache/arrow-datafusion/pull/3619) ([tustvold](https://github.com/tustvold)) -- \[feat\] Support using offset index in ParquetRecordBatchStream when pu… [\#3616](https://github.com/apache/arrow-datafusion/pull/3616) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- Optimize `regex_replace` for scalar patterns [\#3614](https://github.com/apache/arrow-datafusion/pull/3614) ([isidentical](https://github.com/isidentical)) -- Simplify `concat_ws(null, ..)` to `null` [\#3608](https://github.com/apache/arrow-datafusion/pull/3608) ([HaoYang670](https://github.com/HaoYang670)) -- MINOR: improve docstrings on SessionContext [\#3603](https://github.com/apache/arrow-datafusion/pull/3603) ([alamb](https://github.com/alamb)) -- Merge s3_success and s3_build_error tests into one test [\#3602](https://github.com/apache/arrow-datafusion/pull/3602) ([Licht-T](https://github.com/Licht-T)) -- add `register_batch` and `read_batch` to `SessionContext` to register a single RecordBatch as a table [\#3600](https://github.com/apache/arrow-datafusion/pull/3600) ([BaymaxHWY](https://github.com/BaymaxHWY)) -- \[CI\] Fix the newly added linting errors to make clippy happy [\#3598](https://github.com/apache/arrow-datafusion/pull/3598) ([isidentical](https://github.com/isidentical)) -- Prevent over-allocations \(and spills\) on sorts with a fixed limit [\#3593](https://github.com/apache/arrow-datafusion/pull/3593) ([isidentical](https://github.com/isidentical)) -- update datafusion cli deps [\#3588](https://github.com/apache/arrow-datafusion/pull/3588) ([Jimexist](https://github.com/Jimexist)) -- Update cranelift\* dependencies `0.87` --\> `0.88` [\#3586](https://github.com/apache/arrow-datafusion/pull/3586) ([alamb](https://github.com/alamb)) -- Fix docs.rs [\#3580](https://github.com/apache/arrow-datafusion/pull/3580) ([avantgardnerio](https://github.com/avantgardnerio)) -- Fix build [\#3576](https://github.com/apache/arrow-datafusion/pull/3576) ([alamb](https://github.com/alamb)) -- Use consistent name for `TimeUnit::Millisecond` [\#3575](https://github.com/apache/arrow-datafusion/pull/3575) ([alamb](https://github.com/alamb)) -- Fix logical plan serialization [\#3574](https://github.com/apache/arrow-datafusion/pull/3574) ([thinkharderdev](https://github.com/thinkharderdev)) -- Custom window frame logic \(support `ROWS`, `RANGE`, `PRECEDING` and `FOLLOWING` for window functions\) [\#3570](https://github.com/apache/arrow-datafusion/pull/3570) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([metesynnada](https://github.com/metesynnada)) -- fix comparison of decimal array with null scalar [\#3567](https://github.com/apache/arrow-datafusion/pull/3567) ([kmitchener](https://github.com/kmitchener)) -- Reduce dependencies of `datafusion-sql` crate [\#3566](https://github.com/apache/arrow-datafusion/pull/3566) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([mbrobbel](https://github.com/mbrobbel)) -- Update pbjson-types requirement from 0.3 to 0.5 [\#3560](https://github.com/apache/arrow-datafusion/pull/3560) ([dependabot[bot]](https://github.com/apps/dependabot)) -- Update pbjson requirement from 0.3 to 0.5 [\#3559](https://github.com/apache/arrow-datafusion/pull/3559) ([dependabot[bot]](https://github.com/apps/dependabot)) -- Update pbjson-build requirement from 0.3 to 0.5 [\#3558](https://github.com/apache/arrow-datafusion/pull/3558) ([dependabot[bot]](https://github.com/apps/dependabot)) -- MINOR: enable q19 in TPCH [\#3553](https://github.com/apache/arrow-datafusion/pull/3553) ([kmitchener](https://github.com/kmitchener)) -- MINOR: remove out-of-date is_dictionary checks from binary_rule.rs [\#3552](https://github.com/apache/arrow-datafusion/pull/3552) ([kmitchener](https://github.com/kmitchener)) -- Make ParquetScanOptions public and add method to get a reference from… [\#3551](https://github.com/apache/arrow-datafusion/pull/3551) ([thinkharderdev](https://github.com/thinkharderdev)) -- fix coercion of null for decimal math in binary_rules [\#3549](https://github.com/apache/arrow-datafusion/pull/3549) ([kmitchener](https://github.com/kmitchener)) -- Use `fetch` limit in get_sorted_iter [\#3545](https://github.com/apache/arrow-datafusion/pull/3545) ([Dandandan](https://github.com/Dandandan)) -- feat: allow object store registration from datafusion-cli [\#3540](https://github.com/apache/arrow-datafusion/pull/3540) ([turbo1912](https://github.com/turbo1912)) -- Actually test that `ScalarValue`s are the same after round trip serialization [\#3537](https://github.com/apache/arrow-datafusion/pull/3537) ([alamb](https://github.com/alamb)) -- Add serialization of `ScalarValue::Struct` [\#3536](https://github.com/apache/arrow-datafusion/pull/3536) ([alamb](https://github.com/alamb)) -- Add serialization of `ScalarValue::IntervalMonthDayNano` [\#3535](https://github.com/apache/arrow-datafusion/pull/3535) ([alamb](https://github.com/alamb)) -- Add serialization of `ScalarValue::Binary` and `ScalarValue::LargeBinary`, `ScalarValue::Time64` [\#3534](https://github.com/apache/arrow-datafusion/pull/3534) ([alamb](https://github.com/alamb)) -- MINOR: Impl `Debug` for TableReference and ResolvedTableReference [\#3533](https://github.com/apache/arrow-datafusion/pull/3533) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) -- Add support for serializing `ScalarValue::Dictionary` to datafusion-proto [\#3532](https://github.com/apache/arrow-datafusion/pull/3532) ([alamb](https://github.com/alamb)) -- Push down limit to sort [\#3530](https://github.com/apache/arrow-datafusion/pull/3530) ([Dandandan](https://github.com/Dandandan)) -- Execute sort in parallel when a limit is used after sort [\#3527](https://github.com/apache/arrow-datafusion/pull/3527) ([Dandandan](https://github.com/Dandandan)) -- Config support type conversion [\#3522](https://github.com/apache/arrow-datafusion/pull/3522) ([comphead](https://github.com/comphead)) -- MINOR: Add more execs to list of supported execs [\#3519](https://github.com/apache/arrow-datafusion/pull/3519) ([andygrove](https://github.com/andygrove)) -- fix divide by zero not throwing proper error for decimal [\#3517](https://github.com/apache/arrow-datafusion/pull/3517) ([kmitchener](https://github.com/kmitchener)) -- Make FileStream and FileOpener public [\#3514](https://github.com/apache/arrow-datafusion/pull/3514) ([thinkharderdev](https://github.com/thinkharderdev)) -- feat: Union types coercion [\#3513](https://github.com/apache/arrow-datafusion/pull/3513) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([gandronchik](https://github.com/gandronchik)) -- \[DataFrame\] - Add cache function for DataFrame [\#3512](https://github.com/apache/arrow-datafusion/pull/3512) ([francis-du](https://github.com/francis-du)) -- type coercion: support is/is_not\_`bool`/like/unknown expr [\#3510](https://github.com/apache/arrow-datafusion/pull/3510) ([liukun4515](https://github.com/liukun4515)) -- MINOR: remove unused dependencies [\#3508](https://github.com/apache/arrow-datafusion/pull/3508) ([waynexia](https://github.com/waynexia)) -- Automate postrelease publishing to Homebrew [\#3507](https://github.com/apache/arrow-datafusion/pull/3507) ([iajoiner](https://github.com/iajoiner)) -- Add additional DATE_PART units [\#3503](https://github.com/apache/arrow-datafusion/pull/3503) ([jonmmease](https://github.com/jonmmease)) -- Add BitwiseXor in function from_proto_binary_op [\#3496](https://github.com/apache/arrow-datafusion/pull/3496) ([askoa](https://github.com/askoa)) -- Make the function from_proto_binary_op public [\#3490](https://github.com/apache/arrow-datafusion/pull/3490) ([askoa](https://github.com/askoa)) -- minor: fix bug in `downcast_value!` macro \(`T` --\> `$T`\) [\#3486](https://github.com/apache/arrow-datafusion/pull/3486) ([alamb](https://github.com/alamb)) -- add time_zone into ConfigOptions [\#3485](https://github.com/apache/arrow-datafusion/pull/3485) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([waitingkuo](https://github.com/waitingkuo)) -- \[MINOR\] Change `downcast_value!` macro so it does not need to use `use std::any::type_name;` [\#3484](https://github.com/apache/arrow-datafusion/pull/3484) ([alamb](https://github.com/alamb)) -- Convert more cross joins to inner joins \(Address performance/execution plan of TPCH query 19\) [\#3482](https://github.com/apache/arrow-datafusion/pull/3482) ([DhamoPS](https://github.com/DhamoPS)) -- \[minor\] Remove unused arg in macro in Inlist [\#3474](https://github.com/apache/arrow-datafusion/pull/3474) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- inlist: move type coercion to logical phase [\#3472](https://github.com/apache/arrow-datafusion/pull/3472) ([liukun4515](https://github.com/liukun4515)) -- Use the column data type as the NULL data type in the row filter [\#3470](https://github.com/apache/arrow-datafusion/pull/3470) ([liukun4515](https://github.com/liukun4515)) -- apply type coercion before filter pushdown [\#3459](https://github.com/apache/arrow-datafusion/pull/3459) ([liukun4515](https://github.com/liukun4515)) -- add FixedSizeBinary support to create_hashes [\#3458](https://github.com/apache/arrow-datafusion/pull/3458) ([mcassels](https://github.com/mcassels)) -- Support ShowVariable Statement [\#3455](https://github.com/apache/arrow-datafusion/pull/3455) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([waitingkuo](https://github.com/waitingkuo)) -- Add additional pruning tests with casts, handle unsupported predicates better [\#3454](https://github.com/apache/arrow-datafusion/pull/3454) ([alamb](https://github.com/alamb)) -- Add `InList` support for timestamp type. \(\#3449\) [\#3450](https://github.com/apache/arrow-datafusion/pull/3450) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- Evaluate expressions after type coercion [\#3444](https://github.com/apache/arrow-datafusion/pull/3444) ([Dandandan](https://github.com/Dandandan)) -- remove type coercion in the binary physical expr [\#3396](https://github.com/apache/arrow-datafusion/pull/3396) ([liukun4515](https://github.com/liukun4515)) -- Use arrow row format in SortPreservingMerge ~50-70% faster [\#3386](https://github.com/apache/arrow-datafusion/pull/3386) ([tustvold](https://github.com/tustvold)) -- Pushdown `RowFilter` in `ParquetExec` [\#3380](https://github.com/apache/arrow-datafusion/pull/3380) ([thinkharderdev](https://github.com/thinkharderdev)) +- Build aggregate schema in Aggregate::try_new [\#3739](https://github.com/apache/datafusion/pull/3739) ([andygrove](https://github.com/andygrove)) +- delete type coercion for scalar udf in the physical phase [\#3735](https://github.com/apache/datafusion/pull/3735) ([liukun4515](https://github.com/liukun4515)) +- Consolidate coercion code in `datafusion_expr::type_coercion` and submodules [\#3728](https://github.com/apache/datafusion/pull/3728) ([alamb](https://github.com/alamb)) +- Skip filter push down on semi/anti joins [\#3723](https://github.com/apache/datafusion/pull/3723) ([andygrove](https://github.com/andygrove)) +- Raise `Unsupported SQL type` for `Time(WithTimeZone)` and `Time(Tz)` [\#3718](https://github.com/apache/datafusion/pull/3718) [[sql](https://github.com/apache/datafusion/labels/sql)] ([waitingkuo](https://github.com/waitingkuo)) +- Support column aliases specified by `WITH` statements [\#3717](https://github.com/apache/datafusion/pull/3717) [[sql](https://github.com/apache/datafusion/labels/sql)] ([isidentical](https://github.com/isidentical)) +- Reject recursive CTEs before processing the sub-expressions [\#3714](https://github.com/apache/datafusion/pull/3714) [[sql](https://github.com/apache/datafusion/labels/sql)] ([isidentical](https://github.com/isidentical)) +- Make column name consistent between Expr::name and Display/Debug [\#3712](https://github.com/apache/datafusion/pull/3712) [[sql](https://github.com/apache/datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) +- Fix aggregate type coercion bug [\#3710](https://github.com/apache/datafusion/pull/3710) ([alamb](https://github.com/alamb)) +- MINOR: Add `Expr::canonical_name` and improve docs on `Expr::name` [\#3706](https://github.com/apache/datafusion/pull/3706) ([andygrove](https://github.com/andygrove)) +- Remove type coercions from ScalarValue and aggregation function code [\#3705](https://github.com/apache/datafusion/pull/3705) ([ozankabak](https://github.com/ozankabak)) +- `unwrap_cast_in_comparison`: fix bug which can find the field for the schema [\#3699](https://github.com/apache/datafusion/pull/3699) ([liukun4515](https://github.com/liukun4515)) +- bump sql-parser 0.25 [\#3698](https://github.com/apache/datafusion/pull/3698) [[sql](https://github.com/apache/datafusion/labels/sql)] ([xudong963](https://github.com/xudong963)) +- Move optimizer init to optimizer crate [\#3692](https://github.com/apache/datafusion/pull/3692) ([andygrove](https://github.com/andygrove)) +- Upgrade `arrow` `parquet` and `arrow-flight` to 24.0.0 [\#3691](https://github.com/apache/datafusion/pull/3691) [[sql](https://github.com/apache/datafusion/labels/sql)] ([alamb](https://github.com/alamb)) +- Fix bug in dictionary coercion and allow better coercion [\#3688](https://github.com/apache/datafusion/pull/3688) ([alamb](https://github.com/alamb)) +- \[MINOR\] Improve docstrings in binary_rule.rs [\#3687](https://github.com/apache/datafusion/pull/3687) ([alamb](https://github.com/alamb)) +- \[MINOR\] Add `ScalarValue::new_utf8`, clean up creation of literals in casting tests [\#3680](https://github.com/apache/datafusion/pull/3680) ([alamb](https://github.com/alamb)) +- Disable code coverage until we figure out why it is broken [\#3679](https://github.com/apache/datafusion/pull/3679) ([alamb](https://github.com/alamb)) +- move `type coercion` for case when expr [\#3676](https://github.com/apache/datafusion/pull/3676) ([liukun4515](https://github.com/liukun4515)) +- Update sqlparser to 0.24.0 [\#3675](https://github.com/apache/datafusion/pull/3675) [[sql](https://github.com/apache/datafusion/labels/sql)] ([alamb](https://github.com/alamb)) +- Fail if field lengths are not same in INTERSECT and EXPECT [\#3674](https://github.com/apache/datafusion/pull/3674) ([askoa](https://github.com/askoa)) +- Simplification Rules for Modulo Operator [\#3669](https://github.com/apache/datafusion/pull/3669) ([askoa](https://github.com/askoa)) +- change pre_cast_lit_in_comparison to unwrap_cast_in_comparison [\#3662](https://github.com/apache/datafusion/pull/3662) ([liukun4515](https://github.com/liukun4515)) +- restore optimization for `between` in simplify expression rule [\#3661](https://github.com/apache/datafusion/pull/3661) ([liukun4515](https://github.com/liukun4515)) +- add timestamptz [\#3660](https://github.com/apache/datafusion/pull/3660) [[sql](https://github.com/apache/datafusion/labels/sql)] ([waitingkuo](https://github.com/waitingkuo)) +- remove the type coercion in the simplify_expressions rule [\#3657](https://github.com/apache/datafusion/pull/3657) ([liukun4515](https://github.com/liukun4515)) +- Cache collected file statistics [\#3649](https://github.com/apache/datafusion/pull/3649) ([mateuszkj](https://github.com/mateuszkj)) +- make regexp_replace early abort with empty input [\#3648](https://github.com/apache/datafusion/pull/3648) ([isidentical](https://github.com/isidentical)) +- Check each query has same number of columns when building the UNION plan [\#3638](https://github.com/apache/datafusion/pull/3638) ([HaoYang670](https://github.com/HaoYang670)) +- move the `type coercion` to the beginning of the optimizer rule and support type coercion for subquery [\#3636](https://github.com/apache/datafusion/pull/3636) ([liukun4515](https://github.com/liukun4515)) +- Add documentation for querying S3 data with CLI [\#3631](https://github.com/apache/datafusion/pull/3631) ([andygrove](https://github.com/andygrove)) +- Simplify multiplication by `0` and by `null` [\#3627](https://github.com/apache/datafusion/pull/3627) ([HaoYang670](https://github.com/HaoYang670)) +- Simplify null division. [\#3625](https://github.com/apache/datafusion/pull/3625) ([HaoYang670](https://github.com/HaoYang670)) +- support cast/try_cast expr in reduceOuterJoin [\#3621](https://github.com/apache/datafusion/pull/3621) ([AssHero](https://github.com/AssHero)) +- MINOR: fix TPC-H conversion function to not miss a row of data [\#3620](https://github.com/apache/datafusion/pull/3620) ([kmitchener](https://github.com/kmitchener)) +- Document ObjectStoreProvider [\#3619](https://github.com/apache/datafusion/pull/3619) ([tustvold](https://github.com/tustvold)) +- \[feat\] Support using offset index in ParquetRecordBatchStream when pu… [\#3616](https://github.com/apache/datafusion/pull/3616) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- Optimize `regex_replace` for scalar patterns [\#3614](https://github.com/apache/datafusion/pull/3614) ([isidentical](https://github.com/isidentical)) +- Simplify `concat_ws(null, ..)` to `null` [\#3608](https://github.com/apache/datafusion/pull/3608) ([HaoYang670](https://github.com/HaoYang670)) +- MINOR: improve docstrings on SessionContext [\#3603](https://github.com/apache/datafusion/pull/3603) ([alamb](https://github.com/alamb)) +- Merge s3_success and s3_build_error tests into one test [\#3602](https://github.com/apache/datafusion/pull/3602) ([Licht-T](https://github.com/Licht-T)) +- add `register_batch` and `read_batch` to `SessionContext` to register a single RecordBatch as a table [\#3600](https://github.com/apache/datafusion/pull/3600) ([BaymaxHWY](https://github.com/BaymaxHWY)) +- \[CI\] Fix the newly added linting errors to make clippy happy [\#3598](https://github.com/apache/datafusion/pull/3598) ([isidentical](https://github.com/isidentical)) +- Prevent over-allocations \(and spills\) on sorts with a fixed limit [\#3593](https://github.com/apache/datafusion/pull/3593) ([isidentical](https://github.com/isidentical)) +- update datafusion cli deps [\#3588](https://github.com/apache/datafusion/pull/3588) ([Jimexist](https://github.com/Jimexist)) +- Update cranelift\* dependencies `0.87` --\> `0.88` [\#3586](https://github.com/apache/datafusion/pull/3586) ([alamb](https://github.com/alamb)) +- Fix docs.rs [\#3580](https://github.com/apache/datafusion/pull/3580) ([avantgardnerio](https://github.com/avantgardnerio)) +- Fix build [\#3576](https://github.com/apache/datafusion/pull/3576) ([alamb](https://github.com/alamb)) +- Use consistent name for `TimeUnit::Millisecond` [\#3575](https://github.com/apache/datafusion/pull/3575) ([alamb](https://github.com/alamb)) +- Fix logical plan serialization [\#3574](https://github.com/apache/datafusion/pull/3574) ([thinkharderdev](https://github.com/thinkharderdev)) +- Custom window frame logic \(support `ROWS`, `RANGE`, `PRECEDING` and `FOLLOWING` for window functions\) [\#3570](https://github.com/apache/datafusion/pull/3570) [[sql](https://github.com/apache/datafusion/labels/sql)] ([metesynnada](https://github.com/metesynnada)) +- fix comparison of decimal array with null scalar [\#3567](https://github.com/apache/datafusion/pull/3567) ([kmitchener](https://github.com/kmitchener)) +- Reduce dependencies of `datafusion-sql` crate [\#3566](https://github.com/apache/datafusion/pull/3566) [[sql](https://github.com/apache/datafusion/labels/sql)] ([mbrobbel](https://github.com/mbrobbel)) +- Update pbjson-types requirement from 0.3 to 0.5 [\#3560](https://github.com/apache/datafusion/pull/3560) ([dependabot[bot]](https://github.com/apps/dependabot)) +- Update pbjson requirement from 0.3 to 0.5 [\#3559](https://github.com/apache/datafusion/pull/3559) ([dependabot[bot]](https://github.com/apps/dependabot)) +- Update pbjson-build requirement from 0.3 to 0.5 [\#3558](https://github.com/apache/datafusion/pull/3558) ([dependabot[bot]](https://github.com/apps/dependabot)) +- MINOR: enable q19 in TPCH [\#3553](https://github.com/apache/datafusion/pull/3553) ([kmitchener](https://github.com/kmitchener)) +- MINOR: remove out-of-date is_dictionary checks from binary_rule.rs [\#3552](https://github.com/apache/datafusion/pull/3552) ([kmitchener](https://github.com/kmitchener)) +- Make ParquetScanOptions public and add method to get a reference from… [\#3551](https://github.com/apache/datafusion/pull/3551) ([thinkharderdev](https://github.com/thinkharderdev)) +- fix coercion of null for decimal math in binary_rules [\#3549](https://github.com/apache/datafusion/pull/3549) ([kmitchener](https://github.com/kmitchener)) +- Use `fetch` limit in get_sorted_iter [\#3545](https://github.com/apache/datafusion/pull/3545) ([Dandandan](https://github.com/Dandandan)) +- feat: allow object store registration from datafusion-cli [\#3540](https://github.com/apache/datafusion/pull/3540) ([turbo1912](https://github.com/turbo1912)) +- Actually test that `ScalarValue`s are the same after round trip serialization [\#3537](https://github.com/apache/datafusion/pull/3537) ([alamb](https://github.com/alamb)) +- Add serialization of `ScalarValue::Struct` [\#3536](https://github.com/apache/datafusion/pull/3536) ([alamb](https://github.com/alamb)) +- Add serialization of `ScalarValue::IntervalMonthDayNano` [\#3535](https://github.com/apache/datafusion/pull/3535) ([alamb](https://github.com/alamb)) +- Add serialization of `ScalarValue::Binary` and `ScalarValue::LargeBinary`, `ScalarValue::Time64` [\#3534](https://github.com/apache/datafusion/pull/3534) ([alamb](https://github.com/alamb)) +- MINOR: Impl `Debug` for TableReference and ResolvedTableReference [\#3533](https://github.com/apache/datafusion/pull/3533) [[sql](https://github.com/apache/datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) +- Add support for serializing `ScalarValue::Dictionary` to datafusion-proto [\#3532](https://github.com/apache/datafusion/pull/3532) ([alamb](https://github.com/alamb)) +- Push down limit to sort [\#3530](https://github.com/apache/datafusion/pull/3530) ([Dandandan](https://github.com/Dandandan)) +- Execute sort in parallel when a limit is used after sort [\#3527](https://github.com/apache/datafusion/pull/3527) ([Dandandan](https://github.com/Dandandan)) +- Config support type conversion [\#3522](https://github.com/apache/datafusion/pull/3522) ([comphead](https://github.com/comphead)) +- MINOR: Add more execs to list of supported execs [\#3519](https://github.com/apache/datafusion/pull/3519) ([andygrove](https://github.com/andygrove)) +- fix divide by zero not throwing proper error for decimal [\#3517](https://github.com/apache/datafusion/pull/3517) ([kmitchener](https://github.com/kmitchener)) +- Make FileStream and FileOpener public [\#3514](https://github.com/apache/datafusion/pull/3514) ([thinkharderdev](https://github.com/thinkharderdev)) +- feat: Union types coercion [\#3513](https://github.com/apache/datafusion/pull/3513) [[sql](https://github.com/apache/datafusion/labels/sql)] ([gandronchik](https://github.com/gandronchik)) +- \[DataFrame\] - Add cache function for DataFrame [\#3512](https://github.com/apache/datafusion/pull/3512) ([francis-du](https://github.com/francis-du)) +- type coercion: support is/is_not\_`bool`/like/unknown expr [\#3510](https://github.com/apache/datafusion/pull/3510) ([liukun4515](https://github.com/liukun4515)) +- MINOR: remove unused dependencies [\#3508](https://github.com/apache/datafusion/pull/3508) ([waynexia](https://github.com/waynexia)) +- Automate postrelease publishing to Homebrew [\#3507](https://github.com/apache/datafusion/pull/3507) ([iajoiner](https://github.com/iajoiner)) +- Add additional DATE_PART units [\#3503](https://github.com/apache/datafusion/pull/3503) ([jonmmease](https://github.com/jonmmease)) +- Add BitwiseXor in function from_proto_binary_op [\#3496](https://github.com/apache/datafusion/pull/3496) ([askoa](https://github.com/askoa)) +- Make the function from_proto_binary_op public [\#3490](https://github.com/apache/datafusion/pull/3490) ([askoa](https://github.com/askoa)) +- minor: fix bug in `downcast_value!` macro \(`T` --\> `$T`\) [\#3486](https://github.com/apache/datafusion/pull/3486) ([alamb](https://github.com/alamb)) +- add time_zone into ConfigOptions [\#3485](https://github.com/apache/datafusion/pull/3485) [[sql](https://github.com/apache/datafusion/labels/sql)] ([waitingkuo](https://github.com/waitingkuo)) +- \[MINOR\] Change `downcast_value!` macro so it does not need to use `use std::any::type_name;` [\#3484](https://github.com/apache/datafusion/pull/3484) ([alamb](https://github.com/alamb)) +- Convert more cross joins to inner joins \(Address performance/execution plan of TPCH query 19\) [\#3482](https://github.com/apache/datafusion/pull/3482) ([DhamoPS](https://github.com/DhamoPS)) +- \[minor\] Remove unused arg in macro in Inlist [\#3474](https://github.com/apache/datafusion/pull/3474) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- inlist: move type coercion to logical phase [\#3472](https://github.com/apache/datafusion/pull/3472) ([liukun4515](https://github.com/liukun4515)) +- Use the column data type as the NULL data type in the row filter [\#3470](https://github.com/apache/datafusion/pull/3470) ([liukun4515](https://github.com/liukun4515)) +- apply type coercion before filter pushdown [\#3459](https://github.com/apache/datafusion/pull/3459) ([liukun4515](https://github.com/liukun4515)) +- add FixedSizeBinary support to create_hashes [\#3458](https://github.com/apache/datafusion/pull/3458) ([mcassels](https://github.com/mcassels)) +- Support ShowVariable Statement [\#3455](https://github.com/apache/datafusion/pull/3455) [[sql](https://github.com/apache/datafusion/labels/sql)] ([waitingkuo](https://github.com/waitingkuo)) +- Add additional pruning tests with casts, handle unsupported predicates better [\#3454](https://github.com/apache/datafusion/pull/3454) ([alamb](https://github.com/alamb)) +- Add `InList` support for timestamp type. \(\#3449\) [\#3450](https://github.com/apache/datafusion/pull/3450) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- Evaluate expressions after type coercion [\#3444](https://github.com/apache/datafusion/pull/3444) ([Dandandan](https://github.com/Dandandan)) +- remove type coercion in the binary physical expr [\#3396](https://github.com/apache/datafusion/pull/3396) ([liukun4515](https://github.com/liukun4515)) +- Use arrow row format in SortPreservingMerge ~50-70% faster [\#3386](https://github.com/apache/datafusion/pull/3386) ([tustvold](https://github.com/tustvold)) +- Pushdown `RowFilter` in `ParquetExec` [\#3380](https://github.com/apache/datafusion/pull/3380) ([thinkharderdev](https://github.com/thinkharderdev)) diff --git a/dev/changelog/14.0.0.md b/dev/changelog/14.0.0.md index 00e296103a8b..8020dd1924fc 100644 --- a/dev/changelog/14.0.0.md +++ b/dev/changelog/14.0.0.md @@ -17,331 +17,331 @@ under the License. --> -## [14.0.0](https://github.com/apache/arrow-datafusion/tree/14.0.0) (2022-11-04) +## [14.0.0](https://github.com/apache/datafusion/tree/14.0.0) (2022-11-04) -[Full Changelog](https://github.com/apache/arrow-datafusion/compare/13.0.0-rc1...14.0.0) +[Full Changelog](https://github.com/apache/datafusion/compare/13.0.0-rc1...14.0.0) **Breaking changes:** -- Improve FieldNotFound errors [\#4084](https://github.com/apache/arrow-datafusion/pull/4084) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) -- Refactor: move `simplify_expression.rs` and `expr_simplifier.rs` to a new mod `simplify_expressions` [\#3951](https://github.com/apache/arrow-datafusion/pull/3951) ([HaoYang670](https://github.com/HaoYang670)) -- Support for non-u64 types for Window Bound [\#3916](https://github.com/apache/arrow-datafusion/pull/3916) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([mustafasrepo](https://github.com/mustafasrepo)) -- Expose parquet reader settings using normal DataFusion `ConfigOptions` [\#3822](https://github.com/apache/arrow-datafusion/pull/3822) ([alamb](https://github.com/alamb)) -- Add `Filter::try_new` with validation [\#3796](https://github.com/apache/arrow-datafusion/pull/3796) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) -- Change public simplify API and add a public coerce API [\#3758](https://github.com/apache/arrow-datafusion/pull/3758) ([alamb](https://github.com/alamb)) +- Improve FieldNotFound errors [\#4084](https://github.com/apache/datafusion/pull/4084) [[sql](https://github.com/apache/datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) +- Refactor: move `simplify_expression.rs` and `expr_simplifier.rs` to a new mod `simplify_expressions` [\#3951](https://github.com/apache/datafusion/pull/3951) ([HaoYang670](https://github.com/HaoYang670)) +- Support for non-u64 types for Window Bound [\#3916](https://github.com/apache/datafusion/pull/3916) [[sql](https://github.com/apache/datafusion/labels/sql)] ([mustafasrepo](https://github.com/mustafasrepo)) +- Expose parquet reader settings using normal DataFusion `ConfigOptions` [\#3822](https://github.com/apache/datafusion/pull/3822) ([alamb](https://github.com/alamb)) +- Add `Filter::try_new` with validation [\#3796](https://github.com/apache/datafusion/pull/3796) [[sql](https://github.com/apache/datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) +- Change public simplify API and add a public coerce API [\#3758](https://github.com/apache/datafusion/pull/3758) ([alamb](https://github.com/alamb)) **Implemented enhancements:** -- Automatically register tables if ObjectStore root is configured [\#4094](https://github.com/apache/arrow-datafusion/issues/4094) -- Simplify small `InList` expressions [\#4089](https://github.com/apache/arrow-datafusion/issues/4089) -- Support `SET` command [\#4067](https://github.com/apache/arrow-datafusion/issues/4067) -- add uuid\(\) function to generate unique uuid per row [\#4045](https://github.com/apache/arrow-datafusion/issues/4045) -- Publish benchmark crate so that it can be used as a library in Ballista [\#4016](https://github.com/apache/arrow-datafusion/issues/4016) -- Add statistics methods to `TableProvider` trait for use in cost-based optimizations in the logical plan [\#3983](https://github.com/apache/arrow-datafusion/issues/3983) -- Implement `current_time` Function [\#3982](https://github.com/apache/arrow-datafusion/issues/3982) -- Implement `current_date` Function [\#3981](https://github.com/apache/arrow-datafusion/issues/3981) -- Put common code used for testing code into datafusion/test_utils.rs [\#3960](https://github.com/apache/arrow-datafusion/issues/3960) -- Print the configurations of ConfigOptions in an ordered way so that we can directly compare the equality of two ConfigOptions by their debug strings [\#3952](https://github.com/apache/arrow-datafusion/issues/3952) -- Don't make dependants install protoc [\#3947](https://github.com/apache/arrow-datafusion/issues/3947) -- Implement right anti join and support it in HashBuildProbeOrder [\#3946](https://github.com/apache/arrow-datafusion/issues/3946) -- Implement right semi join and support it in HashBuildProbeOrder [\#3945](https://github.com/apache/arrow-datafusion/issues/3945) -- Refactor `simplify_expressions` and `expr_simplifier` [\#3934](https://github.com/apache/arrow-datafusion/issues/3934) -- Implement serialization for `ScalarValue::FixedSizeBinary` [\#3928](https://github.com/apache/arrow-datafusion/issues/3928) -- Support inlining view / dataframes logical plan [\#3913](https://github.com/apache/arrow-datafusion/issues/3913) -- Plans with tables from `TableProviderFactory`s can't be serialized [\#3906](https://github.com/apache/arrow-datafusion/issues/3906) -- Simplify `a AND a` and `a OR a`. [\#3895](https://github.com/apache/arrow-datafusion/issues/3895) -- Allow configuring statistics on TPC-H benchmarks [\#3888](https://github.com/apache/arrow-datafusion/issues/3888) -- CI checks stuck in queued mode [\#3883](https://github.com/apache/arrow-datafusion/issues/3883) -- Multiple optimizer passes [\#3879](https://github.com/apache/arrow-datafusion/issues/3879) -- datafusion-proto does not support view table scan [\#3874](https://github.com/apache/arrow-datafusion/issues/3874) -- TableProviderFactories need to be async and return a Result to be useful [\#3866](https://github.com/apache/arrow-datafusion/issues/3866) -- Factorize common AND factors out of OR predicates to support filterPushDown as possible [\#3858](https://github.com/apache/arrow-datafusion/issues/3858) -- Replace `concat_ws` with `concat` when the delimiter is empty string [\#3857](https://github.com/apache/arrow-datafusion/issues/3857) -- Concatenate contiguous literal arguments of `concat_ws` when doing the expression simplification [\#3856](https://github.com/apache/arrow-datafusion/issues/3856) -- Partition and Sort Enforcement [\#3854](https://github.com/apache/arrow-datafusion/issues/3854) -- Enable mimalloc by default in benchmarks [\#3851](https://github.com/apache/arrow-datafusion/issues/3851) -- Add collect statistics configuration [\#3847](https://github.com/apache/arrow-datafusion/issues/3847) -- \[SQL\] - Support cache/uncache table syntax [\#3842](https://github.com/apache/arrow-datafusion/issues/3842) -- Filter pushdown doesn't seem to apply for filter on TPC-H Q17 [\#3839](https://github.com/apache/arrow-datafusion/issues/3839) -- Support pushdown multi-columns in PageIndex pruning. [\#3834](https://github.com/apache/arrow-datafusion/issues/3834) -- Consolidate `Expr` manipulation code so it is more discoverable and make it easier to use [\#3808](https://github.com/apache/arrow-datafusion/issues/3808) -- Leverage input array's null buffer for regex replace to optimize sparse arrays [\#3803](https://github.com/apache/arrow-datafusion/issues/3803) -- Improve join cardinality estimation when there is no overlap in the min/max values [\#3802](https://github.com/apache/arrow-datafusion/issues/3802) -- datafusion-cli up to date check is failing on master [\#3798](https://github.com/apache/arrow-datafusion/issues/3798) -- Optimize benchmark q2 subquery filter [\#3789](https://github.com/apache/arrow-datafusion/issues/3789) -- Benchmark should infer schema when running against Parquet [\#3776](https://github.com/apache/arrow-datafusion/issues/3776) -- Allow specialized physical functions to provide hints for the array adapter [\#3762](https://github.com/apache/arrow-datafusion/issues/3762) -- \[User Guide\] Add `EXPLAIN` to SQL reference [\#3755](https://github.com/apache/arrow-datafusion/issues/3755) -- move `type coercion` for agg/agg udf [\#3752](https://github.com/apache/arrow-datafusion/issues/3752) -- Prevent Cargo.lock for datafusion-cli being out-of-date [\#3744](https://github.com/apache/arrow-datafusion/issues/3744) -- Add example of expr apis including simplification and coercion [\#3740](https://github.com/apache/arrow-datafusion/issues/3740) -- support `type coercion` for ScalarFunction expr in the logical phase [\#3731](https://github.com/apache/arrow-datafusion/issues/3731) -- Add support for DISTINCT projections in `decorrelate_where_exists` [\#3724](https://github.com/apache/arrow-datafusion/issues/3724) -- Add type coercion rule for `CONCAT` and `CONCAT_WS` [\#3720](https://github.com/apache/arrow-datafusion/issues/3720) -- Expose and document a simpler public API for simplify expressions [\#3709](https://github.com/apache/arrow-datafusion/issues/3709) -- Expose + document the type coercion API publicly [\#3708](https://github.com/apache/arrow-datafusion/issues/3708) -- Concatenate contiguous literal arguments of `CONCAT` during the expression simplification. [\#3683](https://github.com/apache/arrow-datafusion/issues/3683) -- DataFusion 13.0.0 Release [\#3671](https://github.com/apache/arrow-datafusion/issues/3671) -- Add division by `0` rules in the expression simplification [\#3663](https://github.com/apache/arrow-datafusion/issues/3663) -- Compressed CSV/JSON Read [\#3641](https://github.com/apache/arrow-datafusion/issues/3641) -- remove type coercion for agg [\#3623](https://github.com/apache/arrow-datafusion/issues/3623) -- extract or clause as predicate for join rels [\#3577](https://github.com/apache/arrow-datafusion/issues/3577) -- Improve performance of `regex_replace` [\#3518](https://github.com/apache/arrow-datafusion/issues/3518) -- Add benchmarks for parquet queries with filter pushdown enabled [\#3457](https://github.com/apache/arrow-datafusion/issues/3457) -- Make type coercion rule more robust [\#3390](https://github.com/apache/arrow-datafusion/issues/3390) -- `ViewTable::scan` ignores filters and limits [\#3249](https://github.com/apache/arrow-datafusion/issues/3249) -- Add `CREATE VIEW` documentation to user guide [\#3211](https://github.com/apache/arrow-datafusion/issues/3211) -- Push additional parquet filtering into the parquet scan \[EPIC\] [\#3147](https://github.com/apache/arrow-datafusion/issues/3147) -- Remove `core/logical_plan` module [\#2683](https://github.com/apache/arrow-datafusion/issues/2683) -- Datafusion Optimizer Enhancement [\#2255](https://github.com/apache/arrow-datafusion/issues/2255) -- \[Optimizer\] Eliminate self compare self [\#2252](https://github.com/apache/arrow-datafusion/issues/2252) -- Break datafusion crate into smaller crates [\#1750](https://github.com/apache/arrow-datafusion/issues/1750) -- Benchmark `constellation-rs/amadeus`'s parquet implementation [\#1341](https://github.com/apache/arrow-datafusion/issues/1341) -- Use `parquet2` async reader in `physical_plan/parquet` [\#1058](https://github.com/apache/arrow-datafusion/issues/1058) -- Table Scan Enhancement Plan [\#944](https://github.com/apache/arrow-datafusion/issues/944) -- Implement parquet page-level skipping with column index, using min/max stats [\#847](https://github.com/apache/arrow-datafusion/issues/847) -- Support min/max statistics in ParquetTable and ParquetExec [\#537](https://github.com/apache/arrow-datafusion/issues/537) +- Automatically register tables if ObjectStore root is configured [\#4094](https://github.com/apache/datafusion/issues/4094) +- Simplify small `InList` expressions [\#4089](https://github.com/apache/datafusion/issues/4089) +- Support `SET` command [\#4067](https://github.com/apache/datafusion/issues/4067) +- add uuid\(\) function to generate unique uuid per row [\#4045](https://github.com/apache/datafusion/issues/4045) +- Publish benchmark crate so that it can be used as a library in Ballista [\#4016](https://github.com/apache/datafusion/issues/4016) +- Add statistics methods to `TableProvider` trait for use in cost-based optimizations in the logical plan [\#3983](https://github.com/apache/datafusion/issues/3983) +- Implement `current_time` Function [\#3982](https://github.com/apache/datafusion/issues/3982) +- Implement `current_date` Function [\#3981](https://github.com/apache/datafusion/issues/3981) +- Put common code used for testing code into datafusion/test_utils.rs [\#3960](https://github.com/apache/datafusion/issues/3960) +- Print the configurations of ConfigOptions in an ordered way so that we can directly compare the equality of two ConfigOptions by their debug strings [\#3952](https://github.com/apache/datafusion/issues/3952) +- Don't make dependants install protoc [\#3947](https://github.com/apache/datafusion/issues/3947) +- Implement right anti join and support it in HashBuildProbeOrder [\#3946](https://github.com/apache/datafusion/issues/3946) +- Implement right semi join and support it in HashBuildProbeOrder [\#3945](https://github.com/apache/datafusion/issues/3945) +- Refactor `simplify_expressions` and `expr_simplifier` [\#3934](https://github.com/apache/datafusion/issues/3934) +- Implement serialization for `ScalarValue::FixedSizeBinary` [\#3928](https://github.com/apache/datafusion/issues/3928) +- Support inlining view / dataframes logical plan [\#3913](https://github.com/apache/datafusion/issues/3913) +- Plans with tables from `TableProviderFactory`s can't be serialized [\#3906](https://github.com/apache/datafusion/issues/3906) +- Simplify `a AND a` and `a OR a`. [\#3895](https://github.com/apache/datafusion/issues/3895) +- Allow configuring statistics on TPC-H benchmarks [\#3888](https://github.com/apache/datafusion/issues/3888) +- CI checks stuck in queued mode [\#3883](https://github.com/apache/datafusion/issues/3883) +- Multiple optimizer passes [\#3879](https://github.com/apache/datafusion/issues/3879) +- datafusion-proto does not support view table scan [\#3874](https://github.com/apache/datafusion/issues/3874) +- TableProviderFactories need to be async and return a Result to be useful [\#3866](https://github.com/apache/datafusion/issues/3866) +- Factorize common AND factors out of OR predicates to support filterPushDown as possible [\#3858](https://github.com/apache/datafusion/issues/3858) +- Replace `concat_ws` with `concat` when the delimiter is empty string [\#3857](https://github.com/apache/datafusion/issues/3857) +- Concatenate contiguous literal arguments of `concat_ws` when doing the expression simplification [\#3856](https://github.com/apache/datafusion/issues/3856) +- Partition and Sort Enforcement [\#3854](https://github.com/apache/datafusion/issues/3854) +- Enable mimalloc by default in benchmarks [\#3851](https://github.com/apache/datafusion/issues/3851) +- Add collect statistics configuration [\#3847](https://github.com/apache/datafusion/issues/3847) +- \[SQL\] - Support cache/uncache table syntax [\#3842](https://github.com/apache/datafusion/issues/3842) +- Filter pushdown doesn't seem to apply for filter on TPC-H Q17 [\#3839](https://github.com/apache/datafusion/issues/3839) +- Support pushdown multi-columns in PageIndex pruning. [\#3834](https://github.com/apache/datafusion/issues/3834) +- Consolidate `Expr` manipulation code so it is more discoverable and make it easier to use [\#3808](https://github.com/apache/datafusion/issues/3808) +- Leverage input array's null buffer for regex replace to optimize sparse arrays [\#3803](https://github.com/apache/datafusion/issues/3803) +- Improve join cardinality estimation when there is no overlap in the min/max values [\#3802](https://github.com/apache/datafusion/issues/3802) +- datafusion-cli up to date check is failing on master [\#3798](https://github.com/apache/datafusion/issues/3798) +- Optimize benchmark q2 subquery filter [\#3789](https://github.com/apache/datafusion/issues/3789) +- Benchmark should infer schema when running against Parquet [\#3776](https://github.com/apache/datafusion/issues/3776) +- Allow specialized physical functions to provide hints for the array adapter [\#3762](https://github.com/apache/datafusion/issues/3762) +- \[User Guide\] Add `EXPLAIN` to SQL reference [\#3755](https://github.com/apache/datafusion/issues/3755) +- move `type coercion` for agg/agg udf [\#3752](https://github.com/apache/datafusion/issues/3752) +- Prevent Cargo.lock for datafusion-cli being out-of-date [\#3744](https://github.com/apache/datafusion/issues/3744) +- Add example of expr apis including simplification and coercion [\#3740](https://github.com/apache/datafusion/issues/3740) +- support `type coercion` for ScalarFunction expr in the logical phase [\#3731](https://github.com/apache/datafusion/issues/3731) +- Add support for DISTINCT projections in `decorrelate_where_exists` [\#3724](https://github.com/apache/datafusion/issues/3724) +- Add type coercion rule for `CONCAT` and `CONCAT_WS` [\#3720](https://github.com/apache/datafusion/issues/3720) +- Expose and document a simpler public API for simplify expressions [\#3709](https://github.com/apache/datafusion/issues/3709) +- Expose + document the type coercion API publicly [\#3708](https://github.com/apache/datafusion/issues/3708) +- Concatenate contiguous literal arguments of `CONCAT` during the expression simplification. [\#3683](https://github.com/apache/datafusion/issues/3683) +- DataFusion 13.0.0 Release [\#3671](https://github.com/apache/datafusion/issues/3671) +- Add division by `0` rules in the expression simplification [\#3663](https://github.com/apache/datafusion/issues/3663) +- Compressed CSV/JSON Read [\#3641](https://github.com/apache/datafusion/issues/3641) +- remove type coercion for agg [\#3623](https://github.com/apache/datafusion/issues/3623) +- extract or clause as predicate for join rels [\#3577](https://github.com/apache/datafusion/issues/3577) +- Improve performance of `regex_replace` [\#3518](https://github.com/apache/datafusion/issues/3518) +- Add benchmarks for parquet queries with filter pushdown enabled [\#3457](https://github.com/apache/datafusion/issues/3457) +- Make type coercion rule more robust [\#3390](https://github.com/apache/datafusion/issues/3390) +- `ViewTable::scan` ignores filters and limits [\#3249](https://github.com/apache/datafusion/issues/3249) +- Add `CREATE VIEW` documentation to user guide [\#3211](https://github.com/apache/datafusion/issues/3211) +- Push additional parquet filtering into the parquet scan \[EPIC\] [\#3147](https://github.com/apache/datafusion/issues/3147) +- Remove `core/logical_plan` module [\#2683](https://github.com/apache/datafusion/issues/2683) +- Datafusion Optimizer Enhancement [\#2255](https://github.com/apache/datafusion/issues/2255) +- \[Optimizer\] Eliminate self compare self [\#2252](https://github.com/apache/datafusion/issues/2252) +- Break datafusion crate into smaller crates [\#1750](https://github.com/apache/datafusion/issues/1750) +- Benchmark `constellation-rs/amadeus`'s parquet implementation [\#1341](https://github.com/apache/datafusion/issues/1341) +- Use `parquet2` async reader in `physical_plan/parquet` [\#1058](https://github.com/apache/datafusion/issues/1058) +- Table Scan Enhancement Plan [\#944](https://github.com/apache/datafusion/issues/944) +- Implement parquet page-level skipping with column index, using min/max stats [\#847](https://github.com/apache/datafusion/issues/847) +- Support min/max statistics in ParquetTable and ParquetExec [\#537](https://github.com/apache/datafusion/issues/537) **Fixed bugs:** -- Clippy failing on master [\#4100](https://github.com/apache/arrow-datafusion/issues/4100) -- Panic when the number of partitions of the pipeline that throws the exception is inconsistent with the number of partitions output by the query [\#4096](https://github.com/apache/arrow-datafusion/issues/4096) -- FieldNotFound when field is available [\#4083](https://github.com/apache/arrow-datafusion/issues/4083) -- SingleDistinctToGroupBy being applied too broadly [\#4082](https://github.com/apache/arrow-datafusion/issues/4082) -- single_distinct_to_groupby strips qualifiers from group-by expressions [\#4049](https://github.com/apache/arrow-datafusion/issues/4049) -- Another Internal error when parquet predicate pushdown is enabled "Error evaluating filter predicate: [\#4046](https://github.com/apache/arrow-datafusion/issues/4046) -- Decimal multiplied by Float produces incorrect results [\#4035](https://github.com/apache/arrow-datafusion/issues/4035) -- Cannot query external table - TableScan replaced with EmptyExec [\#4027](https://github.com/apache/arrow-datafusion/issues/4027) -- benchmark q17 produces incorrect result [\#4026](https://github.com/apache/arrow-datafusion/issues/4026) -- benchmark q14 produces incorrect result [\#4025](https://github.com/apache/arrow-datafusion/issues/4025) -- benchmark q11 producing incorrect results [\#4023](https://github.com/apache/arrow-datafusion/issues/4023) -- Internal error when parquet predicate pushdown is enabled "Error evaluating filter predicate:" [\#4006](https://github.com/apache/arrow-datafusion/issues/4006) -- Incorrect results with parquet filtering pushdown enabled [\#4005](https://github.com/apache/arrow-datafusion/issues/4005) -- Wrong results when parquet page index filtering is enabled [\#4002](https://github.com/apache/arrow-datafusion/issues/4002) -- Output schema of semi join has invalid projection added after HashBuildProbeOrder [\#4001](https://github.com/apache/arrow-datafusion/issues/4001) -- `async` deserialization functions are unintuitive and possibly insecure [\#3977](https://github.com/apache/arrow-datafusion/issues/3977) -- `Expr::to_bytes` can produce output that hits `Expr::from_bytes` recursion limit [\#3968](https://github.com/apache/arrow-datafusion/issues/3968) -- Bug on propagating arrow field metadata [\#3964](https://github.com/apache/arrow-datafusion/issues/3964) -- Predicate still has cast when comparing Timestamp\(Nano, None\) to a timestamp literal, so can't be pushed down or used for pruning [\#3938](https://github.com/apache/arrow-datafusion/issues/3938) -- Error using `IN` list on dictionary encoded data: `InList does not support datatype Dictionary(Int32, Utf8).` [\#3936](https://github.com/apache/arrow-datafusion/issues/3936) -- Internal error in CAST from Timestamp\[us\] [\#3922](https://github.com/apache/arrow-datafusion/issues/3922) -- ScalarValue not implemented for FixedSizeBinary types [\#3910](https://github.com/apache/arrow-datafusion/issues/3910) -- \[DOC\] - There are unsupported DDL in the official documentation [\#3904](https://github.com/apache/arrow-datafusion/issues/3904) -- datafusion-proto deserialize with Substring\(str \[from int\] \[for int\]\) fails [\#3901](https://github.com/apache/arrow-datafusion/issues/3901) -- `count(Literal)` gives wrong column name [\#3891](https://github.com/apache/arrow-datafusion/issues/3891) -- `projection_push_down` adds duplicate projections with multiple passes [\#3881](https://github.com/apache/arrow-datafusion/issues/3881) -- Default physical planner generates empty relation for DROP TABLE, CREATE MEMORY TABLE, etc [\#3873](https://github.com/apache/arrow-datafusion/issues/3873) -- Binary expression canonical names are incorrect in some cases [\#3865](https://github.com/apache/arrow-datafusion/issues/3865) -- Using the window function lag causes panic. [\#3830](https://github.com/apache/arrow-datafusion/issues/3830) -- chrono crate : specify 0.4.22 as the minimum version due to spurious build failures [\#3827](https://github.com/apache/arrow-datafusion/issues/3827) -- datafusion-proto deserialize with q16 sql fails [\#3820](https://github.com/apache/arrow-datafusion/issues/3820) -- Filter predicates should not be aliased [\#3795](https://github.com/apache/arrow-datafusion/issues/3795) -- Write csv not save all lines of dataframe [\#3783](https://github.com/apache/arrow-datafusion/issues/3783) -- Regression in simplifying expressions in subqueries [\#3760](https://github.com/apache/arrow-datafusion/issues/3760) -- DataFusionError\(Internal\("The size of the sorted batch is larger than the size of the input batch: 2120 \> 2312"\)\) [\#3747](https://github.com/apache/arrow-datafusion/issues/3747) -- "labeler" PR check is broken [\#3743](https://github.com/apache/arrow-datafusion/issues/3743) -- `DataFrame::select_columns` doesn't work with names containing "." [\#3733](https://github.com/apache/arrow-datafusion/issues/3733) -- TPC-H Query 1 has regressed [\#3729](https://github.com/apache/arrow-datafusion/issues/3729) -- \[RUST\]\[Datafusion\] What causes "Error: Execution\("file size of 4 is less than footer"\)" error? [\#3800](https://github.com/apache/arrow-datafusion/issues/3800) -- Field names containing periods such as f.c cannot work [\#3682](https://github.com/apache/arrow-datafusion/issues/3682) -- TableProvider implementation for DataFrame does not support filter pushdown [\#3681](https://github.com/apache/arrow-datafusion/issues/3681) -- using Decimal\(0\) make system panicked [\#3665](https://github.com/apache/arrow-datafusion/issues/3665) -- Cannot query some parquet files in S3, but they work locally [\#3633](https://github.com/apache/arrow-datafusion/issues/3633) -- ` col / col` returns `1` when `col = 0` [\#3615](https://github.com/apache/arrow-datafusion/issues/3615) -- register_csv allow space in table_path [\#3589](https://github.com/apache/arrow-datafusion/issues/3589) -- Hardcoded u64 for WindowFrameBound fields [\#3571](https://github.com/apache/arrow-datafusion/issues/3571) -- `docs.rs` cannot build `datafusion-proto` crate [\#3538](https://github.com/apache/arrow-datafusion/issues/3538) -- Row Hash loads whole aggregation state to memory before sending [\#3460](https://github.com/apache/arrow-datafusion/issues/3460) -- approx_percentile_cont return wrong result when scan multi parquet files. [\#3140](https://github.com/apache/arrow-datafusion/issues/3140) -- User guide is incorrect regarding using CLI to register CSV files using schema inference [\#3001](https://github.com/apache/arrow-datafusion/issues/3001) -- Exception: Internal error, Exception: Schema error [\#2938](https://github.com/apache/arrow-datafusion/issues/2938) -- Version 0.6.0 Panic error during SQL execution [\#2738](https://github.com/apache/arrow-datafusion/issues/2738) -- wrong result when operation parquet [\#2044](https://github.com/apache/arrow-datafusion/issues/2044) -- Local object store accepts file:/// as base path, but LocalStore returns meta without the prefix. [\#1923](https://github.com/apache/arrow-datafusion/issues/1923) -- Reading nested parquet files results in `index out of bounds` [\#1383](https://github.com/apache/arrow-datafusion/issues/1383) -- `-` \(negation\) with NULL literals does not work: can't be evaluated because the expression's type is Utf8, not signed [\#1192](https://github.com/apache/arrow-datafusion/issues/1192) -- Inconsistent cast behavior [\#957](https://github.com/apache/arrow-datafusion/issues/957) -- single_distinct_to_groupby no longer drops qualifiers [\#4050](https://github.com/apache/arrow-datafusion/pull/4050) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) +- Clippy failing on master [\#4100](https://github.com/apache/datafusion/issues/4100) +- Panic when the number of partitions of the pipeline that throws the exception is inconsistent with the number of partitions output by the query [\#4096](https://github.com/apache/datafusion/issues/4096) +- FieldNotFound when field is available [\#4083](https://github.com/apache/datafusion/issues/4083) +- SingleDistinctToGroupBy being applied too broadly [\#4082](https://github.com/apache/datafusion/issues/4082) +- single_distinct_to_groupby strips qualifiers from group-by expressions [\#4049](https://github.com/apache/datafusion/issues/4049) +- Another Internal error when parquet predicate pushdown is enabled "Error evaluating filter predicate: [\#4046](https://github.com/apache/datafusion/issues/4046) +- Decimal multiplied by Float produces incorrect results [\#4035](https://github.com/apache/datafusion/issues/4035) +- Cannot query external table - TableScan replaced with EmptyExec [\#4027](https://github.com/apache/datafusion/issues/4027) +- benchmark q17 produces incorrect result [\#4026](https://github.com/apache/datafusion/issues/4026) +- benchmark q14 produces incorrect result [\#4025](https://github.com/apache/datafusion/issues/4025) +- benchmark q11 producing incorrect results [\#4023](https://github.com/apache/datafusion/issues/4023) +- Internal error when parquet predicate pushdown is enabled "Error evaluating filter predicate:" [\#4006](https://github.com/apache/datafusion/issues/4006) +- Incorrect results with parquet filtering pushdown enabled [\#4005](https://github.com/apache/datafusion/issues/4005) +- Wrong results when parquet page index filtering is enabled [\#4002](https://github.com/apache/datafusion/issues/4002) +- Output schema of semi join has invalid projection added after HashBuildProbeOrder [\#4001](https://github.com/apache/datafusion/issues/4001) +- `async` deserialization functions are unintuitive and possibly insecure [\#3977](https://github.com/apache/datafusion/issues/3977) +- `Expr::to_bytes` can produce output that hits `Expr::from_bytes` recursion limit [\#3968](https://github.com/apache/datafusion/issues/3968) +- Bug on propagating arrow field metadata [\#3964](https://github.com/apache/datafusion/issues/3964) +- Predicate still has cast when comparing Timestamp\(Nano, None\) to a timestamp literal, so can't be pushed down or used for pruning [\#3938](https://github.com/apache/datafusion/issues/3938) +- Error using `IN` list on dictionary encoded data: `InList does not support datatype Dictionary(Int32, Utf8).` [\#3936](https://github.com/apache/datafusion/issues/3936) +- Internal error in CAST from Timestamp\[us\] [\#3922](https://github.com/apache/datafusion/issues/3922) +- ScalarValue not implemented for FixedSizeBinary types [\#3910](https://github.com/apache/datafusion/issues/3910) +- \[DOC\] - There are unsupported DDL in the official documentation [\#3904](https://github.com/apache/datafusion/issues/3904) +- datafusion-proto deserialize with Substring\(str \[from int\] \[for int\]\) fails [\#3901](https://github.com/apache/datafusion/issues/3901) +- `count(Literal)` gives wrong column name [\#3891](https://github.com/apache/datafusion/issues/3891) +- `projection_push_down` adds duplicate projections with multiple passes [\#3881](https://github.com/apache/datafusion/issues/3881) +- Default physical planner generates empty relation for DROP TABLE, CREATE MEMORY TABLE, etc [\#3873](https://github.com/apache/datafusion/issues/3873) +- Binary expression canonical names are incorrect in some cases [\#3865](https://github.com/apache/datafusion/issues/3865) +- Using the window function lag causes panic. [\#3830](https://github.com/apache/datafusion/issues/3830) +- chrono crate : specify 0.4.22 as the minimum version due to spurious build failures [\#3827](https://github.com/apache/datafusion/issues/3827) +- datafusion-proto deserialize with q16 sql fails [\#3820](https://github.com/apache/datafusion/issues/3820) +- Filter predicates should not be aliased [\#3795](https://github.com/apache/datafusion/issues/3795) +- Write csv not save all lines of dataframe [\#3783](https://github.com/apache/datafusion/issues/3783) +- Regression in simplifying expressions in subqueries [\#3760](https://github.com/apache/datafusion/issues/3760) +- DataFusionError\(Internal\("The size of the sorted batch is larger than the size of the input batch: 2120 \> 2312"\)\) [\#3747](https://github.com/apache/datafusion/issues/3747) +- "labeler" PR check is broken [\#3743](https://github.com/apache/datafusion/issues/3743) +- `DataFrame::select_columns` doesn't work with names containing "." [\#3733](https://github.com/apache/datafusion/issues/3733) +- TPC-H Query 1 has regressed [\#3729](https://github.com/apache/datafusion/issues/3729) +- \[RUST\]\[Datafusion\] What causes "Error: Execution\("file size of 4 is less than footer"\)" error? [\#3800](https://github.com/apache/datafusion/issues/3800) +- Field names containing periods such as f.c cannot work [\#3682](https://github.com/apache/datafusion/issues/3682) +- TableProvider implementation for DataFrame does not support filter pushdown [\#3681](https://github.com/apache/datafusion/issues/3681) +- using Decimal\(0\) make system panicked [\#3665](https://github.com/apache/datafusion/issues/3665) +- Cannot query some parquet files in S3, but they work locally [\#3633](https://github.com/apache/datafusion/issues/3633) +- ` col / col` returns `1` when `col = 0` [\#3615](https://github.com/apache/datafusion/issues/3615) +- register_csv allow space in table_path [\#3589](https://github.com/apache/datafusion/issues/3589) +- Hardcoded u64 for WindowFrameBound fields [\#3571](https://github.com/apache/datafusion/issues/3571) +- `docs.rs` cannot build `datafusion-proto` crate [\#3538](https://github.com/apache/datafusion/issues/3538) +- Row Hash loads whole aggregation state to memory before sending [\#3460](https://github.com/apache/datafusion/issues/3460) +- approx_percentile_cont return wrong result when scan multi parquet files. [\#3140](https://github.com/apache/datafusion/issues/3140) +- User guide is incorrect regarding using CLI to register CSV files using schema inference [\#3001](https://github.com/apache/datafusion/issues/3001) +- Exception: Internal error, Exception: Schema error [\#2938](https://github.com/apache/datafusion/issues/2938) +- Version 0.6.0 Panic error during SQL execution [\#2738](https://github.com/apache/datafusion/issues/2738) +- wrong result when operation parquet [\#2044](https://github.com/apache/datafusion/issues/2044) +- Local object store accepts file:/// as base path, but LocalStore returns meta without the prefix. [\#1923](https://github.com/apache/datafusion/issues/1923) +- Reading nested parquet files results in `index out of bounds` [\#1383](https://github.com/apache/datafusion/issues/1383) +- `-` \(negation\) with NULL literals does not work: can't be evaluated because the expression's type is Utf8, not signed [\#1192](https://github.com/apache/datafusion/issues/1192) +- Inconsistent cast behavior [\#957](https://github.com/apache/datafusion/issues/957) +- single_distinct_to_groupby no longer drops qualifiers [\#4050](https://github.com/apache/datafusion/pull/4050) [[sql](https://github.com/apache/datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) **Documentation updates:** -- Clarify in docs that Identifiers are made lower-case in SQL query [\#2374](https://github.com/apache/arrow-datafusion/issues/2374) -- Fix broken links in contributor guide [\#3956](https://github.com/apache/arrow-datafusion/pull/3956) ([Jefffrey](https://github.com/Jefffrey)) -- add create view explanation [\#3925](https://github.com/apache/arrow-datafusion/pull/3925) ([retikulum](https://github.com/retikulum)) -- Update `datafusion-examples` README [\#3814](https://github.com/apache/arrow-datafusion/pull/3814) ([alamb](https://github.com/alamb)) -- Add Seafowl to list of projects using DataFusion [\#3792](https://github.com/apache/arrow-datafusion/pull/3792) ([mildbyte](https://github.com/mildbyte)) +- Clarify in docs that Identifiers are made lower-case in SQL query [\#2374](https://github.com/apache/datafusion/issues/2374) +- Fix broken links in contributor guide [\#3956](https://github.com/apache/datafusion/pull/3956) ([Jefffrey](https://github.com/Jefffrey)) +- add create view explanation [\#3925](https://github.com/apache/datafusion/pull/3925) ([retikulum](https://github.com/retikulum)) +- Update `datafusion-examples` README [\#3814](https://github.com/apache/datafusion/pull/3814) ([alamb](https://github.com/alamb)) +- Add Seafowl to list of projects using DataFusion [\#3792](https://github.com/apache/datafusion/pull/3792) ([mildbyte](https://github.com/mildbyte)) **Closed issues:** -- \[QUESTION\] How many times should be the function `create_name` called when executing a query? [\#3900](https://github.com/apache/arrow-datafusion/issues/3900) -- Improve the `Expr` string format [\#3878](https://github.com/apache/arrow-datafusion/issues/3878) -- Simplify division by zero \(division by one / multiplication by zero / multiplication by one\) for Decimal types as well [\#3643](https://github.com/apache/arrow-datafusion/issues/3643) -- InList: merge check branch [\#2833](https://github.com/apache/arrow-datafusion/issues/2833) -- Optimization InList: compare the float data type using OrderedFloat\ [\#2831](https://github.com/apache/arrow-datafusion/issues/2831) -- Outdated section of the add function of the contribution guide [\#2560](https://github.com/apache/arrow-datafusion/issues/2560) -- Optimize InList implementation with native types rather than ScalarValue [\#2165](https://github.com/apache/arrow-datafusion/issues/2165) -- Improve testing of optimizers using EXPLAIN [\#1118](https://github.com/apache/arrow-datafusion/issues/1118) -- Crash on parsing sql query with Cyrillic letters [\#184](https://github.com/apache/arrow-datafusion/issues/184) -- \[EPIC\] Support all TPC-H queries in benchmark [\#158](https://github.com/apache/arrow-datafusion/issues/158) -- Implement optional second argument to ltrim and rtrim functions [\#144](https://github.com/apache/arrow-datafusion/issues/144) -- Benchmark crate does not have a SIMD feature [\#124](https://github.com/apache/arrow-datafusion/issues/124) -- ColumnarValue::into_array should not require batch [\#113](https://github.com/apache/arrow-datafusion/issues/113) -- \[Rust\] Parquet data source does not support complex types [\#83](https://github.com/apache/arrow-datafusion/issues/83) +- \[QUESTION\] How many times should be the function `create_name` called when executing a query? [\#3900](https://github.com/apache/datafusion/issues/3900) +- Improve the `Expr` string format [\#3878](https://github.com/apache/datafusion/issues/3878) +- Simplify division by zero \(division by one / multiplication by zero / multiplication by one\) for Decimal types as well [\#3643](https://github.com/apache/datafusion/issues/3643) +- InList: merge check branch [\#2833](https://github.com/apache/datafusion/issues/2833) +- Optimization InList: compare the float data type using OrderedFloat\ [\#2831](https://github.com/apache/datafusion/issues/2831) +- Outdated section of the add function of the contribution guide [\#2560](https://github.com/apache/datafusion/issues/2560) +- Optimize InList implementation with native types rather than ScalarValue [\#2165](https://github.com/apache/datafusion/issues/2165) +- Improve testing of optimizers using EXPLAIN [\#1118](https://github.com/apache/datafusion/issues/1118) +- Crash on parsing sql query with Cyrillic letters [\#184](https://github.com/apache/datafusion/issues/184) +- \[EPIC\] Support all TPC-H queries in benchmark [\#158](https://github.com/apache/datafusion/issues/158) +- Implement optional second argument to ltrim and rtrim functions [\#144](https://github.com/apache/datafusion/issues/144) +- Benchmark crate does not have a SIMD feature [\#124](https://github.com/apache/datafusion/issues/124) +- ColumnarValue::into_array should not require batch [\#113](https://github.com/apache/datafusion/issues/113) +- \[Rust\] Parquet data source does not support complex types [\#83](https://github.com/apache/datafusion/issues/83) **Merged pull requests:** -- Appease new clippy [\#4101](https://github.com/apache/arrow-datafusion/pull/4101) ([alamb](https://github.com/alamb)) -- minor: Split parquet reader up into smaller modules [\#4099](https://github.com/apache/arrow-datafusion/pull/4099) ([alamb](https://github.com/alamb)) -- \[MINOR\] Update `SET` in cli.md [\#4098](https://github.com/apache/arrow-datafusion/pull/4098) ([waitingkuo](https://github.com/waitingkuo)) -- fix: Scheduler panic routing errors [\#4097](https://github.com/apache/arrow-datafusion/pull/4097) ([yukkit](https://github.com/yukkit)) -- Automatically register tables if ObjectStore root is configured [\#4095](https://github.com/apache/arrow-datafusion/pull/4095) ([avantgardnerio](https://github.com/avantgardnerio)) -- minor: Use Operator::swap [\#4092](https://github.com/apache/arrow-datafusion/pull/4092) ([alamb](https://github.com/alamb)) -- Simplify small InListExpr [\#4090](https://github.com/apache/arrow-datafusion/pull/4090) ([Dandandan](https://github.com/Dandandan)) -- Minor: Add arrow-rs ticket reference and turn some comments into docstrings [\#4088](https://github.com/apache/arrow-datafusion/pull/4088) ([alamb](https://github.com/alamb)) -- Support Dictionary in InListExpr [\#4070](https://github.com/apache/arrow-datafusion/pull/4070) ([tustvold](https://github.com/tustvold)) -- support `SET` variable [\#4069](https://github.com/apache/arrow-datafusion/pull/4069) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([waitingkuo](https://github.com/waitingkuo)) -- Add in list bench [\#4068](https://github.com/apache/arrow-datafusion/pull/4068) ([tustvold](https://github.com/tustvold)) -- Improve Error Handling and Readibility for downcasting `StructArray` [\#4061](https://github.com/apache/arrow-datafusion/pull/4061) ([retikulum](https://github.com/retikulum)) -- Build tests separately from running [\#4060](https://github.com/apache/arrow-datafusion/pull/4060) ([alamb](https://github.com/alamb)) -- Simplify InListExpr ~20-70% Faster [\#4057](https://github.com/apache/arrow-datafusion/pull/4057) ([tustvold](https://github.com/tustvold)) -- MINOR: Print unoptimized logical plan in execute_query of tpch benchmark [\#4056](https://github.com/apache/arrow-datafusion/pull/4056) ([viirya](https://github.com/viirya)) -- Minor: clean the code in `eliminate_filter` [\#4055](https://github.com/apache/arrow-datafusion/pull/4055) ([HaoYang670](https://github.com/HaoYang670)) -- Implement `current_time` scalar function [\#4054](https://github.com/apache/arrow-datafusion/pull/4054) ([naosense](https://github.com/naosense)) -- Cleanup hash_utils adding support for decimal256 and f16 [\#4053](https://github.com/apache/arrow-datafusion/pull/4053) ([tustvold](https://github.com/tustvold)) -- Fix multicolumn parquet predicate pushdown \(\#4046\) [\#4048](https://github.com/apache/arrow-datafusion/pull/4048) ([tustvold](https://github.com/tustvold)) -- Add CI checks that we can serde all benchmark queries [\#4047](https://github.com/apache/arrow-datafusion/pull/4047) ([andygrove](https://github.com/andygrove)) -- Enable more benchmark verification tests [\#4044](https://github.com/apache/arrow-datafusion/pull/4044) ([andygrove](https://github.com/andygrove)) -- Extract common parquet testing code to `parquet-test-util` crate [\#4042](https://github.com/apache/arrow-datafusion/pull/4042) ([alamb](https://github.com/alamb)) -- add uuid\(\) function [\#4041](https://github.com/apache/arrow-datafusion/pull/4041) ([Jimexist](https://github.com/Jimexist)) -- Update to arrow 26, change timezones [\#4039](https://github.com/apache/arrow-datafusion/pull/4039) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([tustvold](https://github.com/tustvold)) -- Fix Decimal and Floating type coerce rule [\#4038](https://github.com/apache/arrow-datafusion/pull/4038) ([viirya](https://github.com/viirya)) -- Reserve the literal expression of `Count` function [\#4031](https://github.com/apache/arrow-datafusion/pull/4031) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([HaoYang670](https://github.com/HaoYang670)) -- Implement current_date scalar function [\#4022](https://github.com/apache/arrow-datafusion/pull/4022) ([comphead](https://github.com/comphead)) -- Fix predicate pushdown bugs: project columns within DatafusionArrowPredicate \(\#4005\) \(\#4006\) [\#4021](https://github.com/apache/arrow-datafusion/pull/4021) ([tustvold](https://github.com/tustvold)) -- minor: remove redundant code/TODO [\#4019](https://github.com/apache/arrow-datafusion/pull/4019) ([jackwener](https://github.com/jackwener)) -- Add CI check to verify that benchmark queries return the expected results [\#4015](https://github.com/apache/arrow-datafusion/pull/4015) ([andygrove](https://github.com/andygrove)) -- Minor: Add TODO and tracking ticket reference [\#4012](https://github.com/apache/arrow-datafusion/pull/4012) ([alamb](https://github.com/alamb)) -- Add right anti join support and support it in HashBuildProbeOrder [\#4011](https://github.com/apache/arrow-datafusion/pull/4011) ([Dandandan](https://github.com/Dandandan)) -- MINOR: Generate expected benchmark query results [\#4010](https://github.com/apache/arrow-datafusion/pull/4010) ([andygrove](https://github.com/andygrove)) -- Minor: remove unecessary clippy allow [\#4008](https://github.com/apache/arrow-datafusion/pull/4008) ([alamb](https://github.com/alamb)) -- Minor: Do what clippy says and clean up some code [\#4007](https://github.com/apache/arrow-datafusion/pull/4007) ([alamb](https://github.com/alamb)) -- Improve Error Handling and Readibility for downcasting `Date32Array` [\#4004](https://github.com/apache/arrow-datafusion/pull/4004) ([retikulum](https://github.com/retikulum)) -- Don't add projection for semi joins in HashBuildProbeOrder [\#4000](https://github.com/apache/arrow-datafusion/pull/4000) ([Dandandan](https://github.com/Dandandan)) -- Minor: use `DataType::is_nested` [\#3995](https://github.com/apache/arrow-datafusion/pull/3995) ([alamb](https://github.com/alamb)) -- \[minor\] bump prettier version [\#3992](https://github.com/apache/arrow-datafusion/pull/3992) ([Jimexist](https://github.com/Jimexist)) -- Add parquet predicate pushdown metrics [\#3989](https://github.com/apache/arrow-datafusion/pull/3989) ([alamb](https://github.com/alamb)) -- Pin datafusion-proto build dependencies [\#3987](https://github.com/apache/arrow-datafusion/pull/3987) ([tustvold](https://github.com/tustvold)) -- Add TableProvider.statistics method [\#3986](https://github.com/apache/arrow-datafusion/pull/3986) ([andygrove](https://github.com/andygrove)) -- Add Pull Request guidelines to contributor guide [\#3985](https://github.com/apache/arrow-datafusion/pull/3985) ([alamb](https://github.com/alamb)) -- Update protos [\#3979](https://github.com/apache/arrow-datafusion/pull/3979) ([tustvold](https://github.com/tustvold)) -- Revert async changes but keep deltalake working [\#3978](https://github.com/apache/arrow-datafusion/pull/3978) ([avantgardnerio](https://github.com/avantgardnerio)) -- Correctness integration test for parquet filter pushdown [\#3976](https://github.com/apache/arrow-datafusion/pull/3976) ([alamb](https://github.com/alamb)) -- MINOR: Stop pretty printing batches in benchmark when there are no results [\#3974](https://github.com/apache/arrow-datafusion/pull/3974) ([andygrove](https://github.com/andygrove)) -- MINOR: Re-export Cast struct [\#3971](https://github.com/apache/arrow-datafusion/pull/3971) ([andygrove](https://github.com/andygrove)) -- fix: check recursion limit in `Expr::to_bytes` [\#3970](https://github.com/apache/arrow-datafusion/pull/3970) ([crepererum](https://github.com/crepererum)) -- \[Part1\] Partition and Sort Enforcement, PhysicalExpr enhancement [\#3969](https://github.com/apache/arrow-datafusion/pull/3969) ([mingmwang](https://github.com/mingmwang)) -- Support pushdown multi-columns in PageIndex pruning. [\#3967](https://github.com/apache/arrow-datafusion/pull/3967) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- Fix benchmarks README formatting [\#3966](https://github.com/apache/arrow-datafusion/pull/3966) ([Jefffrey](https://github.com/Jefffrey)) -- Bug fix on DFField to Field conversion: preserve metadata [\#3965](https://github.com/apache/arrow-datafusion/pull/3965) ([metesynnada](https://github.com/metesynnada)) -- Informative Error Message for LAG and LEAD functions [\#3963](https://github.com/apache/arrow-datafusion/pull/3963) ([mustafasrepo](https://github.com/mustafasrepo)) -- Minor: Add some docstrings to `FileScanConfig` and `RuntimeEnv` [\#3962](https://github.com/apache/arrow-datafusion/pull/3962) ([alamb](https://github.com/alamb)) -- Move common code used for testing code into datafusion/test_utils [\#3961](https://github.com/apache/arrow-datafusion/pull/3961) ([alamb](https://github.com/alamb)) -- Update minimum chrono dependency to 0.4.22 [\#3959](https://github.com/apache/arrow-datafusion/pull/3959) ([alamb](https://github.com/alamb)) -- Implement right semi join and support in HashBuildProbeorder [\#3958](https://github.com/apache/arrow-datafusion/pull/3958) ([Dandandan](https://github.com/Dandandan)) -- Print the configurations of ConfigOptions in an ordered way so that we can directly compare the equality of two ConfigOptions by their debug strings [\#3953](https://github.com/apache/arrow-datafusion/pull/3953) ([yahoNanJing](https://github.com/yahoNanJing)) -- Vendor Generated Protobuf Code \(\#3947\) [\#3950](https://github.com/apache/arrow-datafusion/pull/3950) ([tustvold](https://github.com/tustvold)) -- Implement serialization for ScalarValue::FixedSizeBinary [\#3943](https://github.com/apache/arrow-datafusion/pull/3943) ([retikulum](https://github.com/retikulum)) -- Consolidate physical join code into `datafusion/core/src/physical_plan/joins` [\#3942](https://github.com/apache/arrow-datafusion/pull/3942) ([alamb](https://github.com/alamb)) -- Add optimizer test for simplifying predicates on timestamps [\#3939](https://github.com/apache/arrow-datafusion/pull/3939) ([alamb](https://github.com/alamb)) -- Add test for querying predicate on dictionary [\#3937](https://github.com/apache/arrow-datafusion/pull/3937) ([alamb](https://github.com/alamb)) -- fix: return error for unsupported SQL [\#3933](https://github.com/apache/arrow-datafusion/pull/3933) ([Kikkon](https://github.com/Kikkon)) -- doc: fix doc about `CREATE TABLE IF NOT EXISTS` [\#3932](https://github.com/apache/arrow-datafusion/pull/3932) ([jackwener](https://github.com/jackwener)) -- Refactor Expr::Cast to use a struct. [\#3931](https://github.com/apache/arrow-datafusion/pull/3931) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([jackwener](https://github.com/jackwener)) -- minor: fix some typo. [\#3930](https://github.com/apache/arrow-datafusion/pull/3930) ([jackwener](https://github.com/jackwener)) -- chore: update cranelift-related dependencies [\#3926](https://github.com/apache/arrow-datafusion/pull/3926) ([xudong963](https://github.com/xudong963)) -- Change cast error from Internal to NotImplemented [\#3924](https://github.com/apache/arrow-datafusion/pull/3924) ([alamb](https://github.com/alamb)) -- Support inlining view / dataframes logical plan [\#3923](https://github.com/apache/arrow-datafusion/pull/3923) ([Dandandan](https://github.com/Dandandan)) -- Add test for Simplify redundant predicates [\#3915](https://github.com/apache/arrow-datafusion/pull/3915) ([src255](https://github.com/src255)) -- Implement ScalarValue for FixedSizeBinary [\#3911](https://github.com/apache/arrow-datafusion/pull/3911) ([maxburke](https://github.com/maxburke)) -- Add serde for plans with tables from `TableProviderFactory`s [\#3907](https://github.com/apache/arrow-datafusion/pull/3907) ([avantgardnerio](https://github.com/avantgardnerio)) -- Support filter/limit pushdown for views/dataframes [\#3905](https://github.com/apache/arrow-datafusion/pull/3905) ([Dandandan](https://github.com/Dandandan)) -- Factorize common AND factors out of OR predicates to support filterPu… [\#3903](https://github.com/apache/arrow-datafusion/pull/3903) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- Add `Substring(str [from int] [for int])` support in `datafusion-proto` [\#3902](https://github.com/apache/arrow-datafusion/pull/3902) ([r4ntix](https://github.com/r4ntix)) -- Revert "Factorize common AND factors out of OR predicates to supportfilter Pu… \(\#3859\)" [\#3897](https://github.com/apache/arrow-datafusion/pull/3897) ([alamb](https://github.com/alamb)) -- MINOR: Add notes on Apache Reporter [\#3893](https://github.com/apache/arrow-datafusion/pull/3893) ([andygrove](https://github.com/andygrove)) -- Allow configuring collection of statistics during TPC-H benchmarks [\#3889](https://github.com/apache/arrow-datafusion/pull/3889) ([isidentical](https://github.com/isidentical)) -- Improve formatting of binary expressions [\#3884](https://github.com/apache/arrow-datafusion/pull/3884) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) -- Multiple optimizer passes [\#3880](https://github.com/apache/arrow-datafusion/pull/3880) ([andygrove](https://github.com/andygrove)) -- \[MINOR\] Update docs with newly added configuration values [\#3877](https://github.com/apache/arrow-datafusion/pull/3877) ([alamb](https://github.com/alamb)) -- \[MINOR\] Add a hint about how to resolve the `Cargo.lock` CI check [\#3876](https://github.com/apache/arrow-datafusion/pull/3876) ([alamb](https://github.com/alamb)) -- Add `LogicalPlan::ViewTable` support in `datafusion-proto` [\#3875](https://github.com/apache/arrow-datafusion/pull/3875) ([r4ntix](https://github.com/r4ntix)) -- Optimize the `concat_ws` function [\#3869](https://github.com/apache/arrow-datafusion/pull/3869) ([HaoYang670](https://github.com/HaoYang670)) -- Implement foundational filter selectivity analysis [\#3868](https://github.com/apache/arrow-datafusion/pull/3868) ([isidentical](https://github.com/isidentical)) -- Update `TableProviderFactory` trait to support real-world use-cases [\#3867](https://github.com/apache/arrow-datafusion/pull/3867) ([avantgardnerio](https://github.com/avantgardnerio)) -- put subquery's equal clause into join on clauses instead of filter cl… [\#3862](https://github.com/apache/arrow-datafusion/pull/3862) ([AssHero](https://github.com/AssHero)) -- Factorize common AND factors out of OR predicates to support filterPu… [\#3859](https://github.com/apache/arrow-datafusion/pull/3859) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- Enable mimalloc by default in benchmark [\#3853](https://github.com/apache/arrow-datafusion/pull/3853) ([Dandandan](https://github.com/Dandandan)) -- Refactor `Expr::Between` to use a struct [\#3850](https://github.com/apache/arrow-datafusion/pull/3850) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([b41sh](https://github.com/b41sh)) -- Handle cardinality estimation for disjoint inner and outer joins [\#3848](https://github.com/apache/arrow-datafusion/pull/3848) ([isidentical](https://github.com/isidentical)) -- Add setting for statistics collection [\#3846](https://github.com/apache/arrow-datafusion/pull/3846) ([Dandandan](https://github.com/Dandandan)) -- Update to arrow 25.0.0 [\#3844](https://github.com/apache/arrow-datafusion/pull/3844) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([tustvold](https://github.com/tustvold)) -- Tweak list of optimization rules [\#3841](https://github.com/apache/arrow-datafusion/pull/3841) ([Dandandan](https://github.com/Dandandan)) -- Refactor Expr::GetIndexedField to use a struct [\#3838](https://github.com/apache/arrow-datafusion/pull/3838) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([ygf11](https://github.com/ygf11)) -- Infer the count of maximum distinct values from min/max [\#3837](https://github.com/apache/arrow-datafusion/pull/3837) ([isidentical](https://github.com/isidentical)) -- Refactor `Expr::Like`, `Expr::ILike`, `Expr::SimilarTo` to use a struct [\#3836](https://github.com/apache/arrow-datafusion/pull/3836) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([b41sh](https://github.com/b41sh)) -- Refactor Expr::BinaryExpr to use a struct [\#3835](https://github.com/apache/arrow-datafusion/pull/3835) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([zhoudongyan](https://github.com/zhoudongyan)) -- update postgres version to 15 in integration test [\#3831](https://github.com/apache/arrow-datafusion/pull/3831) ([Jimexist](https://github.com/Jimexist)) -- Fix the panic when lpad/rpad parameter is negative [\#3829](https://github.com/apache/arrow-datafusion/pull/3829) ([ZuoTiJia](https://github.com/ZuoTiJia)) -- MINOR: Document SHOW ALL in the users guide [\#3826](https://github.com/apache/arrow-datafusion/pull/3826) ([alamb](https://github.com/alamb)) -- MINOR: Add datafusion-cli documentation on showing configuration [\#3825](https://github.com/apache/arrow-datafusion/pull/3825) ([alamb](https://github.com/alamb)) -- Add/Remove Division Rules [\#3824](https://github.com/apache/arrow-datafusion/pull/3824) ([retikulum](https://github.com/retikulum)) -- Minor: Sort the output of SHOW ALL by config name [\#3823](https://github.com/apache/arrow-datafusion/pull/3823) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) -- Add `precision != 0` check when making decimal type [\#3818](https://github.com/apache/arrow-datafusion/pull/3818) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([HaoYang670](https://github.com/HaoYang670)) -- Infer schema when running benchmarks against parquet [\#3817](https://github.com/apache/arrow-datafusion/pull/3817) ([andygrove](https://github.com/andygrove)) -- Finish removing deprecated `datafusion::logical_plan` module [\#3816](https://github.com/apache/arrow-datafusion/pull/3816) ([andygrove](https://github.com/andygrove)) -- Clarify initial example with respect to capitalization [\#3815](https://github.com/apache/arrow-datafusion/pull/3815) ([alamb](https://github.com/alamb)) -- Improve expression simplification by running it twice [\#3811](https://github.com/apache/arrow-datafusion/pull/3811) ([alamb](https://github.com/alamb)) -- Make expression manipulation consistent and easier to use: `combine/split filter` `conjunction`, etc [\#3810](https://github.com/apache/arrow-datafusion/pull/3810) ([alamb](https://github.com/alamb)) -- Consolidate expression manipulation functions into `datafusion_optimizer` [\#3809](https://github.com/apache/arrow-datafusion/pull/3809) ([alamb](https://github.com/alamb)) -- Optimize `regexp_replace` when the input is a sparse array [\#3804](https://github.com/apache/arrow-datafusion/pull/3804) ([isidentical](https://github.com/isidentical)) -- Stop ignoring errors when writing DataFrame to csv, parquet, json [\#3801](https://github.com/apache/arrow-datafusion/pull/3801) ([andygrove](https://github.com/andygrove)) -- Update datafusion-cli Cargo.lock to fix CI check on master [\#3799](https://github.com/apache/arrow-datafusion/pull/3799) ([alamb](https://github.com/alamb)) -- MINOR: Benchmark regression tests [\#3790](https://github.com/apache/arrow-datafusion/pull/3790) ([andygrove](https://github.com/andygrove)) -- MINOR: Optimizer example and docs, deprecate `Expr::name` [\#3788](https://github.com/apache/arrow-datafusion/pull/3788) ([andygrove](https://github.com/andygrove)) -- Join cardinality computation for cost-based nested join optimizations [\#3787](https://github.com/apache/arrow-datafusion/pull/3787) ([isidentical](https://github.com/isidentical)) -- Optimizer now simplifies multiplication, division, module arg is a literal Decimal zero or one [\#3782](https://github.com/apache/arrow-datafusion/pull/3782) ([drrtuy](https://github.com/drrtuy)) -- Implement parquet page-level skipping with column index, using min/ma… [\#3780](https://github.com/apache/arrow-datafusion/pull/3780) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- Bump actions/labeler from 4.0.1 to 4.0.2 [\#3779](https://github.com/apache/arrow-datafusion/pull/3779) ([dependabot[bot]](https://github.com/apps/dependabot)) -- MINOR: correct `ListingOptions.try_new` docs to include the enabled stat collection [\#3775](https://github.com/apache/arrow-datafusion/pull/3775) ([isidentical](https://github.com/isidentical)) -- Teach a negative NULL expression to return NULL instead of an error [\#3771](https://github.com/apache/arrow-datafusion/pull/3771) ([drrtuy](https://github.com/drrtuy)) -- Add benchmarks for testing row filtering [\#3769](https://github.com/apache/arrow-datafusion/pull/3769) ([thinkharderdev](https://github.com/thinkharderdev)) -- move type coercion of agg and agg_udaf to logical phase [\#3768](https://github.com/apache/arrow-datafusion/pull/3768) ([liukun4515](https://github.com/liukun4515)) -- User Guide: Add `EXPLAIN` to SQL reference [\#3767](https://github.com/apache/arrow-datafusion/pull/3767) ([unvalley](https://github.com/unvalley)) -- Allow specialized implementations to produce hints for the array adapter [\#3765](https://github.com/apache/arrow-datafusion/pull/3765) ([isidentical](https://github.com/isidentical)) -- Fix optimizer regression with simplifying expressions in subquery filters [\#3764](https://github.com/apache/arrow-datafusion/pull/3764) ([andygrove](https://github.com/andygrove)) -- Run all `datafusion-examples` in CI tests [\#3761](https://github.com/apache/arrow-datafusion/pull/3761) ([alamb](https://github.com/alamb)) -- MINOR: Remove deprecated module `datafusion::logical_plan::plan` [\#3759](https://github.com/apache/arrow-datafusion/pull/3759) ([andygrove](https://github.com/andygrove)) -- Refactor `Expr::Case` to use a struct [\#3757](https://github.com/apache/arrow-datafusion/pull/3757) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) -- Do not run labeler CI check if it would fail due to permissions [\#3756](https://github.com/apache/arrow-datafusion/pull/3756) ([alamb](https://github.com/alamb)) -- MINOR: Improvements to `scalar_subquery_to_join` error handling [\#3754](https://github.com/apache/arrow-datafusion/pull/3754) ([andygrove](https://github.com/andygrove)) -- Always track the final size of the in-mem sorted arrays [\#3753](https://github.com/apache/arrow-datafusion/pull/3753) ([isidentical](https://github.com/isidentical)) -- Fix DataFrame::select_columns to handle column names with a period [\#3751](https://github.com/apache/arrow-datafusion/pull/3751) ([zhoudongyan](https://github.com/zhoudongyan)) -- Fix `ListingTableUrl` to decode percent [\#3750](https://github.com/apache/arrow-datafusion/pull/3750) ([unvalley](https://github.com/unvalley)) -- remove `type coercion` for physical ScalarFunction [\#3749](https://github.com/apache/arrow-datafusion/pull/3749) ([liukun4515](https://github.com/liukun4515)) -- CI: Add a new run to check whether `datafusion-cli` lock file is up-to-date [\#3745](https://github.com/apache/arrow-datafusion/pull/3745) ([isidentical](https://github.com/isidentical)) -- Add datafusion example of expression apis [\#3741](https://github.com/apache/arrow-datafusion/pull/3741) ([alamb](https://github.com/alamb)) -- fix subquery where exists distinct [\#3732](https://github.com/apache/arrow-datafusion/pull/3732) ([b41sh](https://github.com/b41sh)) -- Remove some uneeded code in `CommonSubexprEliminate` [\#3730](https://github.com/apache/arrow-datafusion/pull/3730) ([alamb](https://github.com/alamb)) -- Consolidate and better tests for expression re-rewriting / aliasing [\#3727](https://github.com/apache/arrow-datafusion/pull/3727) ([alamb](https://github.com/alamb)) -- Fix output schema generated by CommonSubExprEliminate [\#3726](https://github.com/apache/arrow-datafusion/pull/3726) ([alex-natzka](https://github.com/alex-natzka)) -- Add type coercion rule for `concat` and `concat_ws` [\#3721](https://github.com/apache/arrow-datafusion/pull/3721) ([HaoYang670](https://github.com/HaoYang670)) -- Expose and document a simpler public API for simplify expressions [\#3719](https://github.com/apache/arrow-datafusion/pull/3719) ([ygf11](https://github.com/ygf11)) -- Remove dead code in `UnwrapCastExprRewriter` that may mask errors [\#3703](https://github.com/apache/arrow-datafusion/pull/3703) ([alamb](https://github.com/alamb)) -- Fix `DataFrame::with_column` to handle creating column names with a period [\#3700](https://github.com/apache/arrow-datafusion/pull/3700) ([alamb](https://github.com/alamb)) -- Add simplification rules for the `CONCAT` function [\#3684](https://github.com/apache/arrow-datafusion/pull/3684) ([HaoYang670](https://github.com/HaoYang670)) -- Compressed CSV/JSON support [\#3642](https://github.com/apache/arrow-datafusion/pull/3642) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([Licht-T](https://github.com/Licht-T)) -- Simplify serialization by removing redundant `PrimitiveScalarValue` [\#3612](https://github.com/apache/arrow-datafusion/pull/3612) ([alamb](https://github.com/alamb)) -- Pushdown single column predicates from ON join clauses [\#3578](https://github.com/apache/arrow-datafusion/pull/3578) ([AssHero](https://github.com/AssHero)) -- Simplify the serialization of `ScalarValue::List` [\#3547](https://github.com/apache/arrow-datafusion/pull/3547) ([alamb](https://github.com/alamb)) -- Generate hash aggregation output in smaller record batches [\#3461](https://github.com/apache/arrow-datafusion/pull/3461) ([milenkovicm](https://github.com/milenkovicm)) -- Improve doc on lowercase treatment of columns on SQL [\#3385](https://github.com/apache/arrow-datafusion/pull/3385) ([nanicpc](https://github.com/nanicpc)) +- Appease new clippy [\#4101](https://github.com/apache/datafusion/pull/4101) ([alamb](https://github.com/alamb)) +- minor: Split parquet reader up into smaller modules [\#4099](https://github.com/apache/datafusion/pull/4099) ([alamb](https://github.com/alamb)) +- \[MINOR\] Update `SET` in cli.md [\#4098](https://github.com/apache/datafusion/pull/4098) ([waitingkuo](https://github.com/waitingkuo)) +- fix: Scheduler panic routing errors [\#4097](https://github.com/apache/datafusion/pull/4097) ([yukkit](https://github.com/yukkit)) +- Automatically register tables if ObjectStore root is configured [\#4095](https://github.com/apache/datafusion/pull/4095) ([avantgardnerio](https://github.com/avantgardnerio)) +- minor: Use Operator::swap [\#4092](https://github.com/apache/datafusion/pull/4092) ([alamb](https://github.com/alamb)) +- Simplify small InListExpr [\#4090](https://github.com/apache/datafusion/pull/4090) ([Dandandan](https://github.com/Dandandan)) +- Minor: Add arrow-rs ticket reference and turn some comments into docstrings [\#4088](https://github.com/apache/datafusion/pull/4088) ([alamb](https://github.com/alamb)) +- Support Dictionary in InListExpr [\#4070](https://github.com/apache/datafusion/pull/4070) ([tustvold](https://github.com/tustvold)) +- support `SET` variable [\#4069](https://github.com/apache/datafusion/pull/4069) [[sql](https://github.com/apache/datafusion/labels/sql)] ([waitingkuo](https://github.com/waitingkuo)) +- Add in list bench [\#4068](https://github.com/apache/datafusion/pull/4068) ([tustvold](https://github.com/tustvold)) +- Improve Error Handling and Readibility for downcasting `StructArray` [\#4061](https://github.com/apache/datafusion/pull/4061) ([retikulum](https://github.com/retikulum)) +- Build tests separately from running [\#4060](https://github.com/apache/datafusion/pull/4060) ([alamb](https://github.com/alamb)) +- Simplify InListExpr ~20-70% Faster [\#4057](https://github.com/apache/datafusion/pull/4057) ([tustvold](https://github.com/tustvold)) +- MINOR: Print unoptimized logical plan in execute_query of tpch benchmark [\#4056](https://github.com/apache/datafusion/pull/4056) ([viirya](https://github.com/viirya)) +- Minor: clean the code in `eliminate_filter` [\#4055](https://github.com/apache/datafusion/pull/4055) ([HaoYang670](https://github.com/HaoYang670)) +- Implement `current_time` scalar function [\#4054](https://github.com/apache/datafusion/pull/4054) ([naosense](https://github.com/naosense)) +- Cleanup hash_utils adding support for decimal256 and f16 [\#4053](https://github.com/apache/datafusion/pull/4053) ([tustvold](https://github.com/tustvold)) +- Fix multicolumn parquet predicate pushdown \(\#4046\) [\#4048](https://github.com/apache/datafusion/pull/4048) ([tustvold](https://github.com/tustvold)) +- Add CI checks that we can serde all benchmark queries [\#4047](https://github.com/apache/datafusion/pull/4047) ([andygrove](https://github.com/andygrove)) +- Enable more benchmark verification tests [\#4044](https://github.com/apache/datafusion/pull/4044) ([andygrove](https://github.com/andygrove)) +- Extract common parquet testing code to `parquet-test-util` crate [\#4042](https://github.com/apache/datafusion/pull/4042) ([alamb](https://github.com/alamb)) +- add uuid\(\) function [\#4041](https://github.com/apache/datafusion/pull/4041) ([Jimexist](https://github.com/Jimexist)) +- Update to arrow 26, change timezones [\#4039](https://github.com/apache/datafusion/pull/4039) [[sql](https://github.com/apache/datafusion/labels/sql)] ([tustvold](https://github.com/tustvold)) +- Fix Decimal and Floating type coerce rule [\#4038](https://github.com/apache/datafusion/pull/4038) ([viirya](https://github.com/viirya)) +- Reserve the literal expression of `Count` function [\#4031](https://github.com/apache/datafusion/pull/4031) [[sql](https://github.com/apache/datafusion/labels/sql)] ([HaoYang670](https://github.com/HaoYang670)) +- Implement current_date scalar function [\#4022](https://github.com/apache/datafusion/pull/4022) ([comphead](https://github.com/comphead)) +- Fix predicate pushdown bugs: project columns within DatafusionArrowPredicate \(\#4005\) \(\#4006\) [\#4021](https://github.com/apache/datafusion/pull/4021) ([tustvold](https://github.com/tustvold)) +- minor: remove redundant code/TODO [\#4019](https://github.com/apache/datafusion/pull/4019) ([jackwener](https://github.com/jackwener)) +- Add CI check to verify that benchmark queries return the expected results [\#4015](https://github.com/apache/datafusion/pull/4015) ([andygrove](https://github.com/andygrove)) +- Minor: Add TODO and tracking ticket reference [\#4012](https://github.com/apache/datafusion/pull/4012) ([alamb](https://github.com/alamb)) +- Add right anti join support and support it in HashBuildProbeOrder [\#4011](https://github.com/apache/datafusion/pull/4011) ([Dandandan](https://github.com/Dandandan)) +- MINOR: Generate expected benchmark query results [\#4010](https://github.com/apache/datafusion/pull/4010) ([andygrove](https://github.com/andygrove)) +- Minor: remove unecessary clippy allow [\#4008](https://github.com/apache/datafusion/pull/4008) ([alamb](https://github.com/alamb)) +- Minor: Do what clippy says and clean up some code [\#4007](https://github.com/apache/datafusion/pull/4007) ([alamb](https://github.com/alamb)) +- Improve Error Handling and Readibility for downcasting `Date32Array` [\#4004](https://github.com/apache/datafusion/pull/4004) ([retikulum](https://github.com/retikulum)) +- Don't add projection for semi joins in HashBuildProbeOrder [\#4000](https://github.com/apache/datafusion/pull/4000) ([Dandandan](https://github.com/Dandandan)) +- Minor: use `DataType::is_nested` [\#3995](https://github.com/apache/datafusion/pull/3995) ([alamb](https://github.com/alamb)) +- \[minor\] bump prettier version [\#3992](https://github.com/apache/datafusion/pull/3992) ([Jimexist](https://github.com/Jimexist)) +- Add parquet predicate pushdown metrics [\#3989](https://github.com/apache/datafusion/pull/3989) ([alamb](https://github.com/alamb)) +- Pin datafusion-proto build dependencies [\#3987](https://github.com/apache/datafusion/pull/3987) ([tustvold](https://github.com/tustvold)) +- Add TableProvider.statistics method [\#3986](https://github.com/apache/datafusion/pull/3986) ([andygrove](https://github.com/andygrove)) +- Add Pull Request guidelines to contributor guide [\#3985](https://github.com/apache/datafusion/pull/3985) ([alamb](https://github.com/alamb)) +- Update protos [\#3979](https://github.com/apache/datafusion/pull/3979) ([tustvold](https://github.com/tustvold)) +- Revert async changes but keep deltalake working [\#3978](https://github.com/apache/datafusion/pull/3978) ([avantgardnerio](https://github.com/avantgardnerio)) +- Correctness integration test for parquet filter pushdown [\#3976](https://github.com/apache/datafusion/pull/3976) ([alamb](https://github.com/alamb)) +- MINOR: Stop pretty printing batches in benchmark when there are no results [\#3974](https://github.com/apache/datafusion/pull/3974) ([andygrove](https://github.com/andygrove)) +- MINOR: Re-export Cast struct [\#3971](https://github.com/apache/datafusion/pull/3971) ([andygrove](https://github.com/andygrove)) +- fix: check recursion limit in `Expr::to_bytes` [\#3970](https://github.com/apache/datafusion/pull/3970) ([crepererum](https://github.com/crepererum)) +- \[Part1\] Partition and Sort Enforcement, PhysicalExpr enhancement [\#3969](https://github.com/apache/datafusion/pull/3969) ([mingmwang](https://github.com/mingmwang)) +- Support pushdown multi-columns in PageIndex pruning. [\#3967](https://github.com/apache/datafusion/pull/3967) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- Fix benchmarks README formatting [\#3966](https://github.com/apache/datafusion/pull/3966) ([Jefffrey](https://github.com/Jefffrey)) +- Bug fix on DFField to Field conversion: preserve metadata [\#3965](https://github.com/apache/datafusion/pull/3965) ([metesynnada](https://github.com/metesynnada)) +- Informative Error Message for LAG and LEAD functions [\#3963](https://github.com/apache/datafusion/pull/3963) ([mustafasrepo](https://github.com/mustafasrepo)) +- Minor: Add some docstrings to `FileScanConfig` and `RuntimeEnv` [\#3962](https://github.com/apache/datafusion/pull/3962) ([alamb](https://github.com/alamb)) +- Move common code used for testing code into datafusion/test_utils [\#3961](https://github.com/apache/datafusion/pull/3961) ([alamb](https://github.com/alamb)) +- Update minimum chrono dependency to 0.4.22 [\#3959](https://github.com/apache/datafusion/pull/3959) ([alamb](https://github.com/alamb)) +- Implement right semi join and support in HashBuildProbeorder [\#3958](https://github.com/apache/datafusion/pull/3958) ([Dandandan](https://github.com/Dandandan)) +- Print the configurations of ConfigOptions in an ordered way so that we can directly compare the equality of two ConfigOptions by their debug strings [\#3953](https://github.com/apache/datafusion/pull/3953) ([yahoNanJing](https://github.com/yahoNanJing)) +- Vendor Generated Protobuf Code \(\#3947\) [\#3950](https://github.com/apache/datafusion/pull/3950) ([tustvold](https://github.com/tustvold)) +- Implement serialization for ScalarValue::FixedSizeBinary [\#3943](https://github.com/apache/datafusion/pull/3943) ([retikulum](https://github.com/retikulum)) +- Consolidate physical join code into `datafusion/core/src/physical_plan/joins` [\#3942](https://github.com/apache/datafusion/pull/3942) ([alamb](https://github.com/alamb)) +- Add optimizer test for simplifying predicates on timestamps [\#3939](https://github.com/apache/datafusion/pull/3939) ([alamb](https://github.com/alamb)) +- Add test for querying predicate on dictionary [\#3937](https://github.com/apache/datafusion/pull/3937) ([alamb](https://github.com/alamb)) +- fix: return error for unsupported SQL [\#3933](https://github.com/apache/datafusion/pull/3933) ([Kikkon](https://github.com/Kikkon)) +- doc: fix doc about `CREATE TABLE IF NOT EXISTS` [\#3932](https://github.com/apache/datafusion/pull/3932) ([jackwener](https://github.com/jackwener)) +- Refactor Expr::Cast to use a struct. [\#3931](https://github.com/apache/datafusion/pull/3931) [[sql](https://github.com/apache/datafusion/labels/sql)] ([jackwener](https://github.com/jackwener)) +- minor: fix some typo. [\#3930](https://github.com/apache/datafusion/pull/3930) ([jackwener](https://github.com/jackwener)) +- chore: update cranelift-related dependencies [\#3926](https://github.com/apache/datafusion/pull/3926) ([xudong963](https://github.com/xudong963)) +- Change cast error from Internal to NotImplemented [\#3924](https://github.com/apache/datafusion/pull/3924) ([alamb](https://github.com/alamb)) +- Support inlining view / dataframes logical plan [\#3923](https://github.com/apache/datafusion/pull/3923) ([Dandandan](https://github.com/Dandandan)) +- Add test for Simplify redundant predicates [\#3915](https://github.com/apache/datafusion/pull/3915) ([src255](https://github.com/src255)) +- Implement ScalarValue for FixedSizeBinary [\#3911](https://github.com/apache/datafusion/pull/3911) ([maxburke](https://github.com/maxburke)) +- Add serde for plans with tables from `TableProviderFactory`s [\#3907](https://github.com/apache/datafusion/pull/3907) ([avantgardnerio](https://github.com/avantgardnerio)) +- Support filter/limit pushdown for views/dataframes [\#3905](https://github.com/apache/datafusion/pull/3905) ([Dandandan](https://github.com/Dandandan)) +- Factorize common AND factors out of OR predicates to support filterPu… [\#3903](https://github.com/apache/datafusion/pull/3903) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- Add `Substring(str [from int] [for int])` support in `datafusion-proto` [\#3902](https://github.com/apache/datafusion/pull/3902) ([r4ntix](https://github.com/r4ntix)) +- Revert "Factorize common AND factors out of OR predicates to supportfilter Pu… \(\#3859\)" [\#3897](https://github.com/apache/datafusion/pull/3897) ([alamb](https://github.com/alamb)) +- MINOR: Add notes on Apache Reporter [\#3893](https://github.com/apache/datafusion/pull/3893) ([andygrove](https://github.com/andygrove)) +- Allow configuring collection of statistics during TPC-H benchmarks [\#3889](https://github.com/apache/datafusion/pull/3889) ([isidentical](https://github.com/isidentical)) +- Improve formatting of binary expressions [\#3884](https://github.com/apache/datafusion/pull/3884) [[sql](https://github.com/apache/datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) +- Multiple optimizer passes [\#3880](https://github.com/apache/datafusion/pull/3880) ([andygrove](https://github.com/andygrove)) +- \[MINOR\] Update docs with newly added configuration values [\#3877](https://github.com/apache/datafusion/pull/3877) ([alamb](https://github.com/alamb)) +- \[MINOR\] Add a hint about how to resolve the `Cargo.lock` CI check [\#3876](https://github.com/apache/datafusion/pull/3876) ([alamb](https://github.com/alamb)) +- Add `LogicalPlan::ViewTable` support in `datafusion-proto` [\#3875](https://github.com/apache/datafusion/pull/3875) ([r4ntix](https://github.com/r4ntix)) +- Optimize the `concat_ws` function [\#3869](https://github.com/apache/datafusion/pull/3869) ([HaoYang670](https://github.com/HaoYang670)) +- Implement foundational filter selectivity analysis [\#3868](https://github.com/apache/datafusion/pull/3868) ([isidentical](https://github.com/isidentical)) +- Update `TableProviderFactory` trait to support real-world use-cases [\#3867](https://github.com/apache/datafusion/pull/3867) ([avantgardnerio](https://github.com/avantgardnerio)) +- put subquery's equal clause into join on clauses instead of filter cl… [\#3862](https://github.com/apache/datafusion/pull/3862) ([AssHero](https://github.com/AssHero)) +- Factorize common AND factors out of OR predicates to support filterPu… [\#3859](https://github.com/apache/datafusion/pull/3859) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- Enable mimalloc by default in benchmark [\#3853](https://github.com/apache/datafusion/pull/3853) ([Dandandan](https://github.com/Dandandan)) +- Refactor `Expr::Between` to use a struct [\#3850](https://github.com/apache/datafusion/pull/3850) [[sql](https://github.com/apache/datafusion/labels/sql)] ([b41sh](https://github.com/b41sh)) +- Handle cardinality estimation for disjoint inner and outer joins [\#3848](https://github.com/apache/datafusion/pull/3848) ([isidentical](https://github.com/isidentical)) +- Add setting for statistics collection [\#3846](https://github.com/apache/datafusion/pull/3846) ([Dandandan](https://github.com/Dandandan)) +- Update to arrow 25.0.0 [\#3844](https://github.com/apache/datafusion/pull/3844) [[sql](https://github.com/apache/datafusion/labels/sql)] ([tustvold](https://github.com/tustvold)) +- Tweak list of optimization rules [\#3841](https://github.com/apache/datafusion/pull/3841) ([Dandandan](https://github.com/Dandandan)) +- Refactor Expr::GetIndexedField to use a struct [\#3838](https://github.com/apache/datafusion/pull/3838) [[sql](https://github.com/apache/datafusion/labels/sql)] ([ygf11](https://github.com/ygf11)) +- Infer the count of maximum distinct values from min/max [\#3837](https://github.com/apache/datafusion/pull/3837) ([isidentical](https://github.com/isidentical)) +- Refactor `Expr::Like`, `Expr::ILike`, `Expr::SimilarTo` to use a struct [\#3836](https://github.com/apache/datafusion/pull/3836) [[sql](https://github.com/apache/datafusion/labels/sql)] ([b41sh](https://github.com/b41sh)) +- Refactor Expr::BinaryExpr to use a struct [\#3835](https://github.com/apache/datafusion/pull/3835) [[sql](https://github.com/apache/datafusion/labels/sql)] ([zhoudongyan](https://github.com/zhoudongyan)) +- update postgres version to 15 in integration test [\#3831](https://github.com/apache/datafusion/pull/3831) ([Jimexist](https://github.com/Jimexist)) +- Fix the panic when lpad/rpad parameter is negative [\#3829](https://github.com/apache/datafusion/pull/3829) ([ZuoTiJia](https://github.com/ZuoTiJia)) +- MINOR: Document SHOW ALL in the users guide [\#3826](https://github.com/apache/datafusion/pull/3826) ([alamb](https://github.com/alamb)) +- MINOR: Add datafusion-cli documentation on showing configuration [\#3825](https://github.com/apache/datafusion/pull/3825) ([alamb](https://github.com/alamb)) +- Add/Remove Division Rules [\#3824](https://github.com/apache/datafusion/pull/3824) ([retikulum](https://github.com/retikulum)) +- Minor: Sort the output of SHOW ALL by config name [\#3823](https://github.com/apache/datafusion/pull/3823) [[sql](https://github.com/apache/datafusion/labels/sql)] ([alamb](https://github.com/alamb)) +- Add `precision != 0` check when making decimal type [\#3818](https://github.com/apache/datafusion/pull/3818) [[sql](https://github.com/apache/datafusion/labels/sql)] ([HaoYang670](https://github.com/HaoYang670)) +- Infer schema when running benchmarks against parquet [\#3817](https://github.com/apache/datafusion/pull/3817) ([andygrove](https://github.com/andygrove)) +- Finish removing deprecated `datafusion::logical_plan` module [\#3816](https://github.com/apache/datafusion/pull/3816) ([andygrove](https://github.com/andygrove)) +- Clarify initial example with respect to capitalization [\#3815](https://github.com/apache/datafusion/pull/3815) ([alamb](https://github.com/alamb)) +- Improve expression simplification by running it twice [\#3811](https://github.com/apache/datafusion/pull/3811) ([alamb](https://github.com/alamb)) +- Make expression manipulation consistent and easier to use: `combine/split filter` `conjunction`, etc [\#3810](https://github.com/apache/datafusion/pull/3810) ([alamb](https://github.com/alamb)) +- Consolidate expression manipulation functions into `datafusion_optimizer` [\#3809](https://github.com/apache/datafusion/pull/3809) ([alamb](https://github.com/alamb)) +- Optimize `regexp_replace` when the input is a sparse array [\#3804](https://github.com/apache/datafusion/pull/3804) ([isidentical](https://github.com/isidentical)) +- Stop ignoring errors when writing DataFrame to csv, parquet, json [\#3801](https://github.com/apache/datafusion/pull/3801) ([andygrove](https://github.com/andygrove)) +- Update datafusion-cli Cargo.lock to fix CI check on master [\#3799](https://github.com/apache/datafusion/pull/3799) ([alamb](https://github.com/alamb)) +- MINOR: Benchmark regression tests [\#3790](https://github.com/apache/datafusion/pull/3790) ([andygrove](https://github.com/andygrove)) +- MINOR: Optimizer example and docs, deprecate `Expr::name` [\#3788](https://github.com/apache/datafusion/pull/3788) ([andygrove](https://github.com/andygrove)) +- Join cardinality computation for cost-based nested join optimizations [\#3787](https://github.com/apache/datafusion/pull/3787) ([isidentical](https://github.com/isidentical)) +- Optimizer now simplifies multiplication, division, module arg is a literal Decimal zero or one [\#3782](https://github.com/apache/datafusion/pull/3782) ([drrtuy](https://github.com/drrtuy)) +- Implement parquet page-level skipping with column index, using min/ma… [\#3780](https://github.com/apache/datafusion/pull/3780) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- Bump actions/labeler from 4.0.1 to 4.0.2 [\#3779](https://github.com/apache/datafusion/pull/3779) ([dependabot[bot]](https://github.com/apps/dependabot)) +- MINOR: correct `ListingOptions.try_new` docs to include the enabled stat collection [\#3775](https://github.com/apache/datafusion/pull/3775) ([isidentical](https://github.com/isidentical)) +- Teach a negative NULL expression to return NULL instead of an error [\#3771](https://github.com/apache/datafusion/pull/3771) ([drrtuy](https://github.com/drrtuy)) +- Add benchmarks for testing row filtering [\#3769](https://github.com/apache/datafusion/pull/3769) ([thinkharderdev](https://github.com/thinkharderdev)) +- move type coercion of agg and agg_udaf to logical phase [\#3768](https://github.com/apache/datafusion/pull/3768) ([liukun4515](https://github.com/liukun4515)) +- User Guide: Add `EXPLAIN` to SQL reference [\#3767](https://github.com/apache/datafusion/pull/3767) ([unvalley](https://github.com/unvalley)) +- Allow specialized implementations to produce hints for the array adapter [\#3765](https://github.com/apache/datafusion/pull/3765) ([isidentical](https://github.com/isidentical)) +- Fix optimizer regression with simplifying expressions in subquery filters [\#3764](https://github.com/apache/datafusion/pull/3764) ([andygrove](https://github.com/andygrove)) +- Run all `datafusion-examples` in CI tests [\#3761](https://github.com/apache/datafusion/pull/3761) ([alamb](https://github.com/alamb)) +- MINOR: Remove deprecated module `datafusion::logical_plan::plan` [\#3759](https://github.com/apache/datafusion/pull/3759) ([andygrove](https://github.com/andygrove)) +- Refactor `Expr::Case` to use a struct [\#3757](https://github.com/apache/datafusion/pull/3757) [[sql](https://github.com/apache/datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) +- Do not run labeler CI check if it would fail due to permissions [\#3756](https://github.com/apache/datafusion/pull/3756) ([alamb](https://github.com/alamb)) +- MINOR: Improvements to `scalar_subquery_to_join` error handling [\#3754](https://github.com/apache/datafusion/pull/3754) ([andygrove](https://github.com/andygrove)) +- Always track the final size of the in-mem sorted arrays [\#3753](https://github.com/apache/datafusion/pull/3753) ([isidentical](https://github.com/isidentical)) +- Fix DataFrame::select_columns to handle column names with a period [\#3751](https://github.com/apache/datafusion/pull/3751) ([zhoudongyan](https://github.com/zhoudongyan)) +- Fix `ListingTableUrl` to decode percent [\#3750](https://github.com/apache/datafusion/pull/3750) ([unvalley](https://github.com/unvalley)) +- remove `type coercion` for physical ScalarFunction [\#3749](https://github.com/apache/datafusion/pull/3749) ([liukun4515](https://github.com/liukun4515)) +- CI: Add a new run to check whether `datafusion-cli` lock file is up-to-date [\#3745](https://github.com/apache/datafusion/pull/3745) ([isidentical](https://github.com/isidentical)) +- Add datafusion example of expression apis [\#3741](https://github.com/apache/datafusion/pull/3741) ([alamb](https://github.com/alamb)) +- fix subquery where exists distinct [\#3732](https://github.com/apache/datafusion/pull/3732) ([b41sh](https://github.com/b41sh)) +- Remove some uneeded code in `CommonSubexprEliminate` [\#3730](https://github.com/apache/datafusion/pull/3730) ([alamb](https://github.com/alamb)) +- Consolidate and better tests for expression re-rewriting / aliasing [\#3727](https://github.com/apache/datafusion/pull/3727) ([alamb](https://github.com/alamb)) +- Fix output schema generated by CommonSubExprEliminate [\#3726](https://github.com/apache/datafusion/pull/3726) ([alex-natzka](https://github.com/alex-natzka)) +- Add type coercion rule for `concat` and `concat_ws` [\#3721](https://github.com/apache/datafusion/pull/3721) ([HaoYang670](https://github.com/HaoYang670)) +- Expose and document a simpler public API for simplify expressions [\#3719](https://github.com/apache/datafusion/pull/3719) ([ygf11](https://github.com/ygf11)) +- Remove dead code in `UnwrapCastExprRewriter` that may mask errors [\#3703](https://github.com/apache/datafusion/pull/3703) ([alamb](https://github.com/alamb)) +- Fix `DataFrame::with_column` to handle creating column names with a period [\#3700](https://github.com/apache/datafusion/pull/3700) ([alamb](https://github.com/alamb)) +- Add simplification rules for the `CONCAT` function [\#3684](https://github.com/apache/datafusion/pull/3684) ([HaoYang670](https://github.com/HaoYang670)) +- Compressed CSV/JSON support [\#3642](https://github.com/apache/datafusion/pull/3642) [[sql](https://github.com/apache/datafusion/labels/sql)] ([Licht-T](https://github.com/Licht-T)) +- Simplify serialization by removing redundant `PrimitiveScalarValue` [\#3612](https://github.com/apache/datafusion/pull/3612) ([alamb](https://github.com/alamb)) +- Pushdown single column predicates from ON join clauses [\#3578](https://github.com/apache/datafusion/pull/3578) ([AssHero](https://github.com/AssHero)) +- Simplify the serialization of `ScalarValue::List` [\#3547](https://github.com/apache/datafusion/pull/3547) ([alamb](https://github.com/alamb)) +- Generate hash aggregation output in smaller record batches [\#3461](https://github.com/apache/datafusion/pull/3461) ([milenkovicm](https://github.com/milenkovicm)) +- Improve doc on lowercase treatment of columns on SQL [\#3385](https://github.com/apache/datafusion/pull/3385) ([nanicpc](https://github.com/nanicpc)) diff --git a/dev/changelog/15.0.0.md b/dev/changelog/15.0.0.md index 474a82b1c08e..71b9d40b04b5 100644 --- a/dev/changelog/15.0.0.md +++ b/dev/changelog/15.0.0.md @@ -17,314 +17,314 @@ under the License. --> -## [15.0.0](https://github.com/apache/arrow-datafusion/tree/15.0.0) (2022-12-01) +## [15.0.0](https://github.com/apache/datafusion/tree/15.0.0) (2022-12-01) -[Full Changelog](https://github.com/apache/arrow-datafusion/compare/14.0.0-rc1...15.0.0) +[Full Changelog](https://github.com/apache/datafusion/compare/14.0.0-rc1...15.0.0) **Breaking changes:** -- Expose remaining parquet config options into ConfigOptions \(try 2\) [\#4427](https://github.com/apache/arrow-datafusion/pull/4427) ([alamb](https://github.com/alamb)) -- Config Cleanup: Remove TaskProperties and KV structure, keep key=value serialization [\#4382](https://github.com/apache/arrow-datafusion/pull/4382) ([alamb](https://github.com/alamb)) -- add `{TDigest,ScalarValue,Accumulator}::size` [\#4342](https://github.com/apache/arrow-datafusion/pull/4342) ([crepererum](https://github.com/crepererum)) -- API-break: Support `SubqueryAlias` and remove `Alias in Projection` [\#4333](https://github.com/apache/arrow-datafusion/pull/4333) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([jackwener](https://github.com/jackwener)) -- split `try_new_with_schema_alias` from original code [\#4284](https://github.com/apache/arrow-datafusion/pull/4284) ([jackwener](https://github.com/jackwener)) -- Collapse statistics in normal explain plan [\#4157](https://github.com/apache/arrow-datafusion/pull/4157) ([alamb](https://github.com/alamb)) -- Linearize binary expressions to reduce proto tree complexity [\#4115](https://github.com/apache/arrow-datafusion/pull/4115) ([isidentical](https://github.com/isidentical)) -- support `SET Timezone` [\#4107](https://github.com/apache/arrow-datafusion/pull/4107) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([waitingkuo](https://github.com/waitingkuo)) +- Expose remaining parquet config options into ConfigOptions \(try 2\) [\#4427](https://github.com/apache/datafusion/pull/4427) ([alamb](https://github.com/alamb)) +- Config Cleanup: Remove TaskProperties and KV structure, keep key=value serialization [\#4382](https://github.com/apache/datafusion/pull/4382) ([alamb](https://github.com/alamb)) +- add `{TDigest,ScalarValue,Accumulator}::size` [\#4342](https://github.com/apache/datafusion/pull/4342) ([crepererum](https://github.com/crepererum)) +- API-break: Support `SubqueryAlias` and remove `Alias in Projection` [\#4333](https://github.com/apache/datafusion/pull/4333) [[sql](https://github.com/apache/datafusion/labels/sql)] ([jackwener](https://github.com/jackwener)) +- split `try_new_with_schema_alias` from original code [\#4284](https://github.com/apache/datafusion/pull/4284) ([jackwener](https://github.com/jackwener)) +- Collapse statistics in normal explain plan [\#4157](https://github.com/apache/datafusion/pull/4157) ([alamb](https://github.com/alamb)) +- Linearize binary expressions to reduce proto tree complexity [\#4115](https://github.com/apache/datafusion/pull/4115) ([isidentical](https://github.com/isidentical)) +- support `SET Timezone` [\#4107](https://github.com/apache/datafusion/pull/4107) [[sql](https://github.com/apache/datafusion/labels/sql)] ([waitingkuo](https://github.com/waitingkuo)) **Implemented enhancements:** -- Refactor Built-in, Aggregate window functions to increase code reuse. [\#4440](https://github.com/apache/arrow-datafusion/issues/4440) -- Helper to get "root" error [\#4435](https://github.com/apache/arrow-datafusion/issues/4435) -- Do NOT convert intermediate/source errors to strings. [\#4434](https://github.com/apache/arrow-datafusion/issues/4434) -- Estimate the `total_byte_size` of the filter expression's result when selectivity is available [\#4374](https://github.com/apache/arrow-datafusion/issues/4374) -- refactor the code of the `HashJoin` [\#4356](https://github.com/apache/arrow-datafusion/issues/4356) -- `CoalesceBatchesExec` reports no ordering [\#4331](https://github.com/apache/arrow-datafusion/issues/4331) -- Introduce tournament tree to achieve better k-way sort-merging [\#4300](https://github.com/apache/arrow-datafusion/issues/4300) -- Add a checker to confirm physical optimizer rules will keep the physical plan schema immutable [\#4299](https://github.com/apache/arrow-datafusion/issues/4299) -- Remove the macro rule `unary_scalar_expr` from `expr_fn.rs` [\#4298](https://github.com/apache/arrow-datafusion/issues/4298) -- Remove Alias-in-Projection, replace it with `SubqueryAlias` [\#4291](https://github.com/apache/arrow-datafusion/issues/4291) -- reimplement `reduce_outer_join` [\#4270](https://github.com/apache/arrow-datafusion/issues/4270) -- Reimplement `filter_push_down` [\#4266](https://github.com/apache/arrow-datafusion/issues/4266) -- Reimplement `eliminate_limit` [\#4264](https://github.com/apache/arrow-datafusion/issues/4264) -- Reimplement `limit_push_down` [\#4263](https://github.com/apache/arrow-datafusion/issues/4263) -- Make a data driven SQL testing tool \(so we can reuse duckdb test suite, example\) [\#4248](https://github.com/apache/arrow-datafusion/issues/4248) -- upgrade chrono to 0.4.23 [\#4224](https://github.com/apache/arrow-datafusion/issues/4224) -- support scan non-string columns partitioned parquet files [\#4218](https://github.com/apache/arrow-datafusion/issues/4218) -- Allow optimizer rules to skip optimizing plans [\#4209](https://github.com/apache/arrow-datafusion/issues/4209) -- Supporting specifying schema when create tables [\#4183](https://github.com/apache/arrow-datafusion/issues/4183) -- Improve ergonomics of creating `ListingOptions` [\#4178](https://github.com/apache/arrow-datafusion/issues/4178) -- Add ability to specify external sort information for ParquetExec [\#4169](https://github.com/apache/arrow-datafusion/issues/4169) -- Add another method to collect referenced columns from an expression [\#4152](https://github.com/apache/arrow-datafusion/issues/4152) -- Improve `EXPLAIN ANALYZE` output for parquet exec [\#4144](https://github.com/apache/arrow-datafusion/issues/4144) -- `TableProviderFactory::create` should have `Optional` parameter [\#4142](https://github.com/apache/arrow-datafusion/issues/4142) -- Support more expressions in equality join [\#4140](https://github.com/apache/arrow-datafusion/issues/4140) -- JoinSelection Rule to choose physical join implementation: HashJoin\(Partitioned or CollectLeft\) or SortMergeJoin base on Stats [\#4139](https://github.com/apache/arrow-datafusion/issues/4139) -- Allow TPCH tooling to create a combined result for easier processing by outside tools [\#4127](https://github.com/apache/arrow-datafusion/issues/4127) -- Allow additional options when creating an external table [\#4125](https://github.com/apache/arrow-datafusion/issues/4125) -- reuse code utils::optimize_children instead of redundant implementation [\#4120](https://github.com/apache/arrow-datafusion/issues/4120) -- Add test field to PR template [\#4113](https://github.com/apache/arrow-datafusion/issues/4113) -- Allow for automatic registration of `ListingTables` [\#4111](https://github.com/apache/arrow-datafusion/issues/4111) -- Add CI check that configs.md is up-to-date [\#4108](https://github.com/apache/arrow-datafusion/issues/4108) -- Support `SET` timezone to non-UTC time zone [\#4106](https://github.com/apache/arrow-datafusion/issues/4106) -- Parquet predicates contains `and true` expressions [\#4091](https://github.com/apache/arrow-datafusion/issues/4091) -- Replace RwLock\ and Mutex\ by using DashMap [\#4077](https://github.com/apache/arrow-datafusion/issues/4077) -- add support for `.xz` compressed files [\#4074](https://github.com/apache/arrow-datafusion/issues/4074) -- add a feature gate to make support for compressed files optional [\#4073](https://github.com/apache/arrow-datafusion/issues/4073) -- Support serializing more deeply nested AND / OR expressions [\#4066](https://github.com/apache/arrow-datafusion/issues/4066) -- Use f64::total_cmp instead of OrderedFloat [\#4051](https://github.com/apache/arrow-datafusion/issues/4051) -- Add documentation to make it clear that decimal support is still experimental [\#4036](https://github.com/apache/arrow-datafusion/issues/4036) -- Simplify Pushed Down Predicates [\#4020](https://github.com/apache/arrow-datafusion/issues/4020) -- Improve HashJoinExec metrics [\#4009](https://github.com/apache/arrow-datafusion/issues/4009) -- Move physical plan serde from Ballista to DataFusion [\#3949](https://github.com/apache/arrow-datafusion/issues/3949) -- Support `SubqueryAlias` better in planner [\#3927](https://github.com/apache/arrow-datafusion/issues/3927) -- A framework for expression boundary analysis \(and statistics\) [\#3898](https://github.com/apache/arrow-datafusion/issues/3898) -- Replace `Filter: Boolean(false)` with `EmptyRelation` [\#3864](https://github.com/apache/arrow-datafusion/issues/3864) -- Implement statistics estimation for `FilterExec` [\#3845](https://github.com/apache/arrow-datafusion/issues/3845) -- Support parquet page filtering for more types: String, Binary\(Decimal\), Int96 [\#3833](https://github.com/apache/arrow-datafusion/issues/3833) -- Allow configuring parquet filter pushdown dynamically [\#3821](https://github.com/apache/arrow-datafusion/issues/3821) -- Unable to register tables in non-cloud S3 servers [\#3640](https://github.com/apache/arrow-datafusion/issues/3640) -- support more data type in prune for cast/try_cast [\#3442](https://github.com/apache/arrow-datafusion/issues/3442) -- Disable spill to disk globally [\#3264](https://github.com/apache/arrow-datafusion/issues/3264) -- Consider to categorize Operator [\#3216](https://github.com/apache/arrow-datafusion/issues/3216) -- Replace Projection.alias with SubqueryAlias [\#2212](https://github.com/apache/arrow-datafusion/issues/2212) -- \[Optimizer\] Eliminate the distinct [\#2045](https://github.com/apache/arrow-datafusion/issues/2045) -- beautify datafusion's site: https://arrow.apache.org/datafusion/ [\#1819](https://github.com/apache/arrow-datafusion/issues/1819) -- split datafusion-logical-plan sub-module [\#1755](https://github.com/apache/arrow-datafusion/issues/1755) -- convert `outer join` to `inner join` to improve performance [\#1585](https://github.com/apache/arrow-datafusion/issues/1585) -- Add sqllogictest for datafusion [\#1453](https://github.com/apache/arrow-datafusion/issues/1453) -- Add additional simplification rules [\#1406](https://github.com/apache/arrow-datafusion/issues/1406) -- support more subqueries [\#1209](https://github.com/apache/arrow-datafusion/issues/1209) -- Add baseline metrics for remaining execution plan nodes [\#1019](https://github.com/apache/arrow-datafusion/issues/1019) -- Make `ExecutionPlan` implementations immutable [\#987](https://github.com/apache/arrow-datafusion/issues/987) -- Architecture overview may be insufficient in README [\#980](https://github.com/apache/arrow-datafusion/issues/980) -- Add a separate configuration setting for parallelism of scanning parquet files [\#924](https://github.com/apache/arrow-datafusion/issues/924) -- Support hash repartion elimination [\#41](https://github.com/apache/arrow-datafusion/issues/41) +- Refactor Built-in, Aggregate window functions to increase code reuse. [\#4440](https://github.com/apache/datafusion/issues/4440) +- Helper to get "root" error [\#4435](https://github.com/apache/datafusion/issues/4435) +- Do NOT convert intermediate/source errors to strings. [\#4434](https://github.com/apache/datafusion/issues/4434) +- Estimate the `total_byte_size` of the filter expression's result when selectivity is available [\#4374](https://github.com/apache/datafusion/issues/4374) +- refactor the code of the `HashJoin` [\#4356](https://github.com/apache/datafusion/issues/4356) +- `CoalesceBatchesExec` reports no ordering [\#4331](https://github.com/apache/datafusion/issues/4331) +- Introduce tournament tree to achieve better k-way sort-merging [\#4300](https://github.com/apache/datafusion/issues/4300) +- Add a checker to confirm physical optimizer rules will keep the physical plan schema immutable [\#4299](https://github.com/apache/datafusion/issues/4299) +- Remove the macro rule `unary_scalar_expr` from `expr_fn.rs` [\#4298](https://github.com/apache/datafusion/issues/4298) +- Remove Alias-in-Projection, replace it with `SubqueryAlias` [\#4291](https://github.com/apache/datafusion/issues/4291) +- reimplement `reduce_outer_join` [\#4270](https://github.com/apache/datafusion/issues/4270) +- Reimplement `filter_push_down` [\#4266](https://github.com/apache/datafusion/issues/4266) +- Reimplement `eliminate_limit` [\#4264](https://github.com/apache/datafusion/issues/4264) +- Reimplement `limit_push_down` [\#4263](https://github.com/apache/datafusion/issues/4263) +- Make a data driven SQL testing tool \(so we can reuse duckdb test suite, example\) [\#4248](https://github.com/apache/datafusion/issues/4248) +- upgrade chrono to 0.4.23 [\#4224](https://github.com/apache/datafusion/issues/4224) +- support scan non-string columns partitioned parquet files [\#4218](https://github.com/apache/datafusion/issues/4218) +- Allow optimizer rules to skip optimizing plans [\#4209](https://github.com/apache/datafusion/issues/4209) +- Supporting specifying schema when create tables [\#4183](https://github.com/apache/datafusion/issues/4183) +- Improve ergonomics of creating `ListingOptions` [\#4178](https://github.com/apache/datafusion/issues/4178) +- Add ability to specify external sort information for ParquetExec [\#4169](https://github.com/apache/datafusion/issues/4169) +- Add another method to collect referenced columns from an expression [\#4152](https://github.com/apache/datafusion/issues/4152) +- Improve `EXPLAIN ANALYZE` output for parquet exec [\#4144](https://github.com/apache/datafusion/issues/4144) +- `TableProviderFactory::create` should have `Optional` parameter [\#4142](https://github.com/apache/datafusion/issues/4142) +- Support more expressions in equality join [\#4140](https://github.com/apache/datafusion/issues/4140) +- JoinSelection Rule to choose physical join implementation: HashJoin\(Partitioned or CollectLeft\) or SortMergeJoin base on Stats [\#4139](https://github.com/apache/datafusion/issues/4139) +- Allow TPCH tooling to create a combined result for easier processing by outside tools [\#4127](https://github.com/apache/datafusion/issues/4127) +- Allow additional options when creating an external table [\#4125](https://github.com/apache/datafusion/issues/4125) +- reuse code utils::optimize_children instead of redundant implementation [\#4120](https://github.com/apache/datafusion/issues/4120) +- Add test field to PR template [\#4113](https://github.com/apache/datafusion/issues/4113) +- Allow for automatic registration of `ListingTables` [\#4111](https://github.com/apache/datafusion/issues/4111) +- Add CI check that configs.md is up-to-date [\#4108](https://github.com/apache/datafusion/issues/4108) +- Support `SET` timezone to non-UTC time zone [\#4106](https://github.com/apache/datafusion/issues/4106) +- Parquet predicates contains `and true` expressions [\#4091](https://github.com/apache/datafusion/issues/4091) +- Replace RwLock\ and Mutex\ by using DashMap [\#4077](https://github.com/apache/datafusion/issues/4077) +- add support for `.xz` compressed files [\#4074](https://github.com/apache/datafusion/issues/4074) +- add a feature gate to make support for compressed files optional [\#4073](https://github.com/apache/datafusion/issues/4073) +- Support serializing more deeply nested AND / OR expressions [\#4066](https://github.com/apache/datafusion/issues/4066) +- Use f64::total_cmp instead of OrderedFloat [\#4051](https://github.com/apache/datafusion/issues/4051) +- Add documentation to make it clear that decimal support is still experimental [\#4036](https://github.com/apache/datafusion/issues/4036) +- Simplify Pushed Down Predicates [\#4020](https://github.com/apache/datafusion/issues/4020) +- Improve HashJoinExec metrics [\#4009](https://github.com/apache/datafusion/issues/4009) +- Move physical plan serde from Ballista to DataFusion [\#3949](https://github.com/apache/datafusion/issues/3949) +- Support `SubqueryAlias` better in planner [\#3927](https://github.com/apache/datafusion/issues/3927) +- A framework for expression boundary analysis \(and statistics\) [\#3898](https://github.com/apache/datafusion/issues/3898) +- Replace `Filter: Boolean(false)` with `EmptyRelation` [\#3864](https://github.com/apache/datafusion/issues/3864) +- Implement statistics estimation for `FilterExec` [\#3845](https://github.com/apache/datafusion/issues/3845) +- Support parquet page filtering for more types: String, Binary\(Decimal\), Int96 [\#3833](https://github.com/apache/datafusion/issues/3833) +- Allow configuring parquet filter pushdown dynamically [\#3821](https://github.com/apache/datafusion/issues/3821) +- Unable to register tables in non-cloud S3 servers [\#3640](https://github.com/apache/datafusion/issues/3640) +- support more data type in prune for cast/try_cast [\#3442](https://github.com/apache/datafusion/issues/3442) +- Disable spill to disk globally [\#3264](https://github.com/apache/datafusion/issues/3264) +- Consider to categorize Operator [\#3216](https://github.com/apache/datafusion/issues/3216) +- Replace Projection.alias with SubqueryAlias [\#2212](https://github.com/apache/datafusion/issues/2212) +- \[Optimizer\] Eliminate the distinct [\#2045](https://github.com/apache/datafusion/issues/2045) +- beautify datafusion's site: https://arrow.apache.org/datafusion/ [\#1819](https://github.com/apache/datafusion/issues/1819) +- split datafusion-logical-plan sub-module [\#1755](https://github.com/apache/datafusion/issues/1755) +- convert `outer join` to `inner join` to improve performance [\#1585](https://github.com/apache/datafusion/issues/1585) +- Add sqllogictest for datafusion [\#1453](https://github.com/apache/datafusion/issues/1453) +- Add additional simplification rules [\#1406](https://github.com/apache/datafusion/issues/1406) +- support more subqueries [\#1209](https://github.com/apache/datafusion/issues/1209) +- Add baseline metrics for remaining execution plan nodes [\#1019](https://github.com/apache/datafusion/issues/1019) +- Make `ExecutionPlan` implementations immutable [\#987](https://github.com/apache/datafusion/issues/987) +- Architecture overview may be insufficient in README [\#980](https://github.com/apache/datafusion/issues/980) +- Add a separate configuration setting for parallelism of scanning parquet files [\#924](https://github.com/apache/datafusion/issues/924) +- Support hash repartion elimination [\#41](https://github.com/apache/datafusion/issues/41) **Fixed bugs:** -- `pyarrow` CI failed [\#4448](https://github.com/apache/arrow-datafusion/issues/4448) -- `UnwrapCastInComparison` exist bug [\#4430](https://github.com/apache/arrow-datafusion/issues/4430) -- The CLI panics when passing an invalid `explain` query [\#4378](https://github.com/apache/arrow-datafusion/issues/4378) -- HashJoin should return Err when the right side input stream produce Err [\#4362](https://github.com/apache/arrow-datafusion/issues/4362) -- Optimizer check errors if resulting schema has different metadata [\#4346](https://github.com/apache/arrow-datafusion/issues/4346) -- Panic with function `to_hex` [\#4339](https://github.com/apache/arrow-datafusion/issues/4339) -- `LimitPushDown` pushdown into limit, result is wrong [\#4308](https://github.com/apache/arrow-datafusion/issues/4308) -- DESCRIBE statement issue with qualified table references [\#4303](https://github.com/apache/arrow-datafusion/issues/4303) -- Panic with window function LAST_VALUE [\#4297](https://github.com/apache/arrow-datafusion/issues/4297) -- CI failed in `Compare to postgres` [\#4294](https://github.com/apache/arrow-datafusion/issues/4294) -- Field alias can't work in where clause [\#4288](https://github.com/apache/arrow-datafusion/issues/4288) -- Some valid filters are not pushed down to parquet scan [\#4282](https://github.com/apache/arrow-datafusion/issues/4282) -- The type renaming `pub type NullColumnarValue = ColumnarValue` makes no sense [\#4271](https://github.com/apache/arrow-datafusion/issues/4271) -- Current `limit_push_down` can't support cross_join [\#4256](https://github.com/apache/arrow-datafusion/issues/4256) -- Cargo test fail [\#4253](https://github.com/apache/arrow-datafusion/issues/4253) -- RightSemi/RightAnti HashJoin has bug, the left_indices is never populated, causing failure to apply join filters. [\#4247](https://github.com/apache/arrow-datafusion/issues/4247) -- Clippy failures [\#4245](https://github.com/apache/arrow-datafusion/issues/4245) -- Cannot query s3 data from datafusion-cli [\#4239](https://github.com/apache/arrow-datafusion/issues/4239) -- Bug parsing interval with negative values [\#4237](https://github.com/apache/arrow-datafusion/issues/4237) -- `cargo test` reports errors on the master branch. [\#4236](https://github.com/apache/arrow-datafusion/issues/4236) -- Doc of the expression function`log2` is incorrect [\#4231](https://github.com/apache/arrow-datafusion/issues/4231) -- HashJoin with mode PartitionMode:CollectLeft has bug and can produce wrong result [\#4230](https://github.com/apache/arrow-datafusion/issues/4230) -- Add ambiguous check when generate projection plan [\#4210](https://github.com/apache/arrow-datafusion/issues/4210) -- What happened for NDJSON support on CLI? [\#4198](https://github.com/apache/arrow-datafusion/issues/4198) -- Add ambiguous check when generate join plan [\#4197](https://github.com/apache/arrow-datafusion/issues/4197) -- Clippy failing on master : error: use of deprecated associated function `chrono::NaiveDate::from_ymd`: use `from_ymd_opt()` instead [\#4187](https://github.com/apache/arrow-datafusion/issues/4187) -- Reimplement the `eliminate_cross_join` [\#4176](https://github.com/apache/arrow-datafusion/issues/4176) -- Incorrect handling of column names [\#4166](https://github.com/apache/arrow-datafusion/issues/4166) -- Update release scripts to support datafusion-benchmarks [\#4134](https://github.com/apache/arrow-datafusion/issues/4134) -- Bug in interpreting correctly parsed SQL with aliases [\#4123](https://github.com/apache/arrow-datafusion/issues/4123) -- The percentile argument for ApproxPercentileCont must be Float64, not Decimal128\(2, 1\) [\#4103](https://github.com/apache/arrow-datafusion/issues/4103) -- Panic when using array_agg [\#4080](https://github.com/apache/arrow-datafusion/issues/4080) -- Wrong result for FIRST_VALUE AND LAST_VALUE window functions [\#4076](https://github.com/apache/arrow-datafusion/issues/4076) -- Round error when casting float to decimal [\#4071](https://github.com/apache/arrow-datafusion/issues/4071) -- Predicate still has cast when comparing Timestamp\(Nano, None\) to a timestamp literal, so can't be pushed down or used for pruning [\#3938](https://github.com/apache/arrow-datafusion/issues/3938) -- Revisit required_child_distribution\(\), output_partitioning\(\), output_ordering\(\) implementations in ExecutionPlan's implementations [\#3653](https://github.com/apache/arrow-datafusion/issues/3653) -- Can't push down projection after do type coercion [\#3583](https://github.com/apache/arrow-datafusion/issues/3583) -- In some circumstances cast expression is not working [\#3499](https://github.com/apache/arrow-datafusion/issues/3499) -- output_partitioning\(\) and output_ordering\(\) implementations are wrong in some physical plan implementations with alias [\#3400](https://github.com/apache/arrow-datafusion/issues/3400) -- Interval Literal doesn't work for timeunit less than millisecond [\#3204](https://github.com/apache/arrow-datafusion/issues/3204) -- `INTERVAL` literal with duplicated interval types should raise error [\#3183](https://github.com/apache/arrow-datafusion/issues/3183) -- Error occurs when only using partition columns in query [\#1999](https://github.com/apache/arrow-datafusion/issues/1999) -- regex_match does not compile using the `g` flag [\#1429](https://github.com/apache/arrow-datafusion/issues/1429) -- `between` with NULL literals does not work: can't be evaluated because there isn't a common type to coerce the types to [\#1193](https://github.com/apache/arrow-datafusion/issues/1193) -- \[Datafusion\] Error with CAST: Unsupported SQL type Time [\#193](https://github.com/apache/arrow-datafusion/issues/193) +- `pyarrow` CI failed [\#4448](https://github.com/apache/datafusion/issues/4448) +- `UnwrapCastInComparison` exist bug [\#4430](https://github.com/apache/datafusion/issues/4430) +- The CLI panics when passing an invalid `explain` query [\#4378](https://github.com/apache/datafusion/issues/4378) +- HashJoin should return Err when the right side input stream produce Err [\#4362](https://github.com/apache/datafusion/issues/4362) +- Optimizer check errors if resulting schema has different metadata [\#4346](https://github.com/apache/datafusion/issues/4346) +- Panic with function `to_hex` [\#4339](https://github.com/apache/datafusion/issues/4339) +- `LimitPushDown` pushdown into limit, result is wrong [\#4308](https://github.com/apache/datafusion/issues/4308) +- DESCRIBE statement issue with qualified table references [\#4303](https://github.com/apache/datafusion/issues/4303) +- Panic with window function LAST_VALUE [\#4297](https://github.com/apache/datafusion/issues/4297) +- CI failed in `Compare to postgres` [\#4294](https://github.com/apache/datafusion/issues/4294) +- Field alias can't work in where clause [\#4288](https://github.com/apache/datafusion/issues/4288) +- Some valid filters are not pushed down to parquet scan [\#4282](https://github.com/apache/datafusion/issues/4282) +- The type renaming `pub type NullColumnarValue = ColumnarValue` makes no sense [\#4271](https://github.com/apache/datafusion/issues/4271) +- Current `limit_push_down` can't support cross_join [\#4256](https://github.com/apache/datafusion/issues/4256) +- Cargo test fail [\#4253](https://github.com/apache/datafusion/issues/4253) +- RightSemi/RightAnti HashJoin has bug, the left_indices is never populated, causing failure to apply join filters. [\#4247](https://github.com/apache/datafusion/issues/4247) +- Clippy failures [\#4245](https://github.com/apache/datafusion/issues/4245) +- Cannot query s3 data from datafusion-cli [\#4239](https://github.com/apache/datafusion/issues/4239) +- Bug parsing interval with negative values [\#4237](https://github.com/apache/datafusion/issues/4237) +- `cargo test` reports errors on the master branch. [\#4236](https://github.com/apache/datafusion/issues/4236) +- Doc of the expression function`log2` is incorrect [\#4231](https://github.com/apache/datafusion/issues/4231) +- HashJoin with mode PartitionMode:CollectLeft has bug and can produce wrong result [\#4230](https://github.com/apache/datafusion/issues/4230) +- Add ambiguous check when generate projection plan [\#4210](https://github.com/apache/datafusion/issues/4210) +- What happened for NDJSON support on CLI? [\#4198](https://github.com/apache/datafusion/issues/4198) +- Add ambiguous check when generate join plan [\#4197](https://github.com/apache/datafusion/issues/4197) +- Clippy failing on master : error: use of deprecated associated function `chrono::NaiveDate::from_ymd`: use `from_ymd_opt()` instead [\#4187](https://github.com/apache/datafusion/issues/4187) +- Reimplement the `eliminate_cross_join` [\#4176](https://github.com/apache/datafusion/issues/4176) +- Incorrect handling of column names [\#4166](https://github.com/apache/datafusion/issues/4166) +- Update release scripts to support datafusion-benchmarks [\#4134](https://github.com/apache/datafusion/issues/4134) +- Bug in interpreting correctly parsed SQL with aliases [\#4123](https://github.com/apache/datafusion/issues/4123) +- The percentile argument for ApproxPercentileCont must be Float64, not Decimal128\(2, 1\) [\#4103](https://github.com/apache/datafusion/issues/4103) +- Panic when using array_agg [\#4080](https://github.com/apache/datafusion/issues/4080) +- Wrong result for FIRST_VALUE AND LAST_VALUE window functions [\#4076](https://github.com/apache/datafusion/issues/4076) +- Round error when casting float to decimal [\#4071](https://github.com/apache/datafusion/issues/4071) +- Predicate still has cast when comparing Timestamp\(Nano, None\) to a timestamp literal, so can't be pushed down or used for pruning [\#3938](https://github.com/apache/datafusion/issues/3938) +- Revisit required_child_distribution\(\), output_partitioning\(\), output_ordering\(\) implementations in ExecutionPlan's implementations [\#3653](https://github.com/apache/datafusion/issues/3653) +- Can't push down projection after do type coercion [\#3583](https://github.com/apache/datafusion/issues/3583) +- In some circumstances cast expression is not working [\#3499](https://github.com/apache/datafusion/issues/3499) +- output_partitioning\(\) and output_ordering\(\) implementations are wrong in some physical plan implementations with alias [\#3400](https://github.com/apache/datafusion/issues/3400) +- Interval Literal doesn't work for timeunit less than millisecond [\#3204](https://github.com/apache/datafusion/issues/3204) +- `INTERVAL` literal with duplicated interval types should raise error [\#3183](https://github.com/apache/datafusion/issues/3183) +- Error occurs when only using partition columns in query [\#1999](https://github.com/apache/datafusion/issues/1999) +- regex_match does not compile using the `g` flag [\#1429](https://github.com/apache/datafusion/issues/1429) +- `between` with NULL literals does not work: can't be evaluated because there isn't a common type to coerce the types to [\#1193](https://github.com/apache/datafusion/issues/1193) +- \[Datafusion\] Error with CAST: Unsupported SQL type Time [\#193](https://github.com/apache/datafusion/issues/193) **Closed issues:** -- SQL level coverage for when memory limit is exceeded [\#4404](https://github.com/apache/arrow-datafusion/issues/4404) -- Throw error \(not `panic`\) if a listing table specifies an missing partition column [\#4350](https://github.com/apache/arrow-datafusion/issues/4350) -- Page index pruning fail on complex_expr [\#4317](https://github.com/apache/arrow-datafusion/issues/4317) -- optimize `limit-full join` in the limit push down rule [\#4275](https://github.com/apache/arrow-datafusion/issues/4275) -- `infer_schema` function is not working with s3 Urls or http endpoints [\#4269](https://github.com/apache/arrow-datafusion/issues/4269) -- Add support binary boolean operators with nulls [\#4241](https://github.com/apache/arrow-datafusion/issues/4241) -- Add additional testing to parquet predicate pushdown integration tests [\#4087](https://github.com/apache/arrow-datafusion/issues/4087) -- Add metrics for parquet page level skipping [\#4086](https://github.com/apache/arrow-datafusion/issues/4086) -- Add parquet page index pushdown metrics [\#4058](https://github.com/apache/arrow-datafusion/issues/4058) -- Throw a runtime error if the memory allocated to GroupByHash exceeds a limit [\#3940](https://github.com/apache/arrow-datafusion/issues/3940) -- support unsigned numeric data type in UnwrapCastInBinaryComparison rule [\#3702](https://github.com/apache/arrow-datafusion/issues/3702) -- Support type cast in union [\#2125](https://github.com/apache/arrow-datafusion/issues/2125) -- \[EPIC\] Memory Limited Sort \(Externalized / Spill\) [\#1568](https://github.com/apache/arrow-datafusion/issues/1568) -- Maintain partition information in Union [\#189](https://github.com/apache/arrow-datafusion/issues/189) -- Add coercion support for `NULL` literals [\#185](https://github.com/apache/arrow-datafusion/issues/185) +- SQL level coverage for when memory limit is exceeded [\#4404](https://github.com/apache/datafusion/issues/4404) +- Throw error \(not `panic`\) if a listing table specifies an missing partition column [\#4350](https://github.com/apache/datafusion/issues/4350) +- Page index pruning fail on complex_expr [\#4317](https://github.com/apache/datafusion/issues/4317) +- optimize `limit-full join` in the limit push down rule [\#4275](https://github.com/apache/datafusion/issues/4275) +- `infer_schema` function is not working with s3 Urls or http endpoints [\#4269](https://github.com/apache/datafusion/issues/4269) +- Add support binary boolean operators with nulls [\#4241](https://github.com/apache/datafusion/issues/4241) +- Add additional testing to parquet predicate pushdown integration tests [\#4087](https://github.com/apache/datafusion/issues/4087) +- Add metrics for parquet page level skipping [\#4086](https://github.com/apache/datafusion/issues/4086) +- Add parquet page index pushdown metrics [\#4058](https://github.com/apache/datafusion/issues/4058) +- Throw a runtime error if the memory allocated to GroupByHash exceeds a limit [\#3940](https://github.com/apache/datafusion/issues/3940) +- support unsigned numeric data type in UnwrapCastInBinaryComparison rule [\#3702](https://github.com/apache/datafusion/issues/3702) +- Support type cast in union [\#2125](https://github.com/apache/datafusion/issues/2125) +- \[EPIC\] Memory Limited Sort \(Externalized / Spill\) [\#1568](https://github.com/apache/datafusion/issues/1568) +- Maintain partition information in Union [\#189](https://github.com/apache/datafusion/issues/189) +- Add coercion support for `NULL` literals [\#185](https://github.com/apache/datafusion/issues/185) **Merged pull requests:** -- Make `datafusion-sql` depend on `arrow-schema` instead of `arrow` [\#4456](https://github.com/apache/arrow-datafusion/pull/4456) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([mbrobbel](https://github.com/mbrobbel)) -- replace the comparator for `decimal array op scalar` using arrow kernel [\#4453](https://github.com/apache/arrow-datafusion/pull/4453) ([liukun4515](https://github.com/liukun4515)) -- Fix pyarrow test [\#4450](https://github.com/apache/arrow-datafusion/pull/4450) ([mvanschellebeeck](https://github.com/mvanschellebeeck)) -- Replace `&Option` with `Option<&T>` [\#4446](https://github.com/apache/arrow-datafusion/pull/4446) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([askoa](https://github.com/askoa)) -- Improve error handling for array downcasting [\#4445](https://github.com/apache/arrow-datafusion/pull/4445) ([retikulum](https://github.com/retikulum)) -- Refactor Builtin Window Function Implementation [\#4441](https://github.com/apache/arrow-datafusion/pull/4441) ([mustafasrepo](https://github.com/mustafasrepo)) -- feat: `DataFusionError::find_root` [\#4437](https://github.com/apache/arrow-datafusion/pull/4437) ([crepererum](https://github.com/crepererum)) -- fix: do NOT convert errors to strings but keep the type [\#4436](https://github.com/apache/arrow-datafusion/pull/4436) ([crepererum](https://github.com/crepererum)) -- The CLI panics when passing an invalid explain query [\#4429](https://github.com/apache/arrow-datafusion/pull/4429) ([comphead](https://github.com/comphead)) -- \[minor\] use arrow kernel concat_batches instead combine_batches [\#4423](https://github.com/apache/arrow-datafusion/pull/4423) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- fix panic on to_hex function for negative numbers [\#4422](https://github.com/apache/arrow-datafusion/pull/4422) ([retikulum](https://github.com/retikulum)) -- Optimize filter executor in pull-based executor [\#4421](https://github.com/apache/arrow-datafusion/pull/4421) ([xudong963](https://github.com/xudong963)) -- optimize limit push for join case [\#4411](https://github.com/apache/arrow-datafusion/pull/4411) ([liukun4515](https://github.com/liukun4515)) -- Add integration test for erroring when memory limits are hit [\#4406](https://github.com/apache/arrow-datafusion/pull/4406) ([alamb](https://github.com/alamb)) -- feat: `ResourceExhausted` for memory limit in `AggregateStream` [\#4405](https://github.com/apache/arrow-datafusion/pull/4405) ([crepererum](https://github.com/crepererum)) -- Update to arrow 28 [\#4400](https://github.com/apache/arrow-datafusion/pull/4400) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([tustvold](https://github.com/tustvold)) -- Update rstest requirement from 0.15.0 to 0.16.0 [\#4399](https://github.com/apache/arrow-datafusion/pull/4399) ([dependabot[bot]](https://github.com/apps/dependabot)) -- Add sqllogictests \(v0\) [\#4395](https://github.com/apache/arrow-datafusion/pull/4395) ([mvanschellebeeck](https://github.com/mvanschellebeeck)) -- improve hashjoin execution metrics [\#4394](https://github.com/apache/arrow-datafusion/pull/4394) ([AssHero](https://github.com/AssHero)) -- Add `with_new_inputs` for LogicalPlan [\#4393](https://github.com/apache/arrow-datafusion/pull/4393) ([jackwener](https://github.com/jackwener)) -- Clean the code in `limit.rs`. [\#4391](https://github.com/apache/arrow-datafusion/pull/4391) ([HaoYang670](https://github.com/HaoYang670)) -- Move physical plan serde from Ballista to DataFusion [\#4390](https://github.com/apache/arrow-datafusion/pull/4390) ([Kikkon](https://github.com/Kikkon)) -- Fix page index pruning fail on complex_expr [\#4387](https://github.com/apache/arrow-datafusion/pull/4387) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- Add check for nested types in equivalent names and types [\#4380](https://github.com/apache/arrow-datafusion/pull/4380) ([alamb](https://github.com/alamb)) -- refine the code of build schema for ambiguous check, factor this out into a function [\#4379](https://github.com/apache/arrow-datafusion/pull/4379) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([AssHero](https://github.com/AssHero)) -- Refactor the Hash Join [\#4377](https://github.com/apache/arrow-datafusion/pull/4377) ([liukun4515](https://github.com/liukun4515)) -- Minor: Fix typos in the documentation [\#4376](https://github.com/apache/arrow-datafusion/pull/4376) ([martin-g](https://github.com/martin-g)) -- Include byte size estimates in the filter statistics [\#4375](https://github.com/apache/arrow-datafusion/pull/4375) ([isidentical](https://github.com/isidentical)) -- HashJoin should return Err when the right side input stream produce Err, add more join UTs to cover different join types [\#4373](https://github.com/apache/arrow-datafusion/pull/4373) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([mingmwang](https://github.com/mingmwang)) -- feat: `ResourceExhausted` for memory limit in `GroupedHashAggregateStream` [\#4371](https://github.com/apache/arrow-datafusion/pull/4371) ([crepererum](https://github.com/crepererum)) -- Use limit\(\) function instead of show_limit\(\) in the first example [\#4369](https://github.com/apache/arrow-datafusion/pull/4369) ([martin-g](https://github.com/martin-g)) -- Update env_logger requirement from 0.9 to 0.10 [\#4367](https://github.com/apache/arrow-datafusion/pull/4367) ([dependabot[bot]](https://github.com/apps/dependabot)) -- reimplement `push_down_filter` to remove global-state [\#4365](https://github.com/apache/arrow-datafusion/pull/4365) ([jackwener](https://github.com/jackwener)) -- Support to use Schedular in tpch benchmark [\#4361](https://github.com/apache/arrow-datafusion/pull/4361) ([xudong963](https://github.com/xudong963)) -- Adding more dataframe example to read csv files [\#4360](https://github.com/apache/arrow-datafusion/pull/4360) ([DataPsycho](https://github.com/DataPsycho)) -- minor: correct name and typo [\#4359](https://github.com/apache/arrow-datafusion/pull/4359) ([jackwener](https://github.com/jackwener)) -- Do not log error if page index can not be evaluated [\#4358](https://github.com/apache/arrow-datafusion/pull/4358) ([alamb](https://github.com/alamb)) -- Clean the `expr_fn` - use `scalar_expr` to create unary scalar expr functions, remove macro `unary_scalar_functions` [\#4357](https://github.com/apache/arrow-datafusion/pull/4357) ([HaoYang670](https://github.com/HaoYang670)) -- Throw error \(not `panic`\) if a listing table specifies an missing partition column [\#4354](https://github.com/apache/arrow-datafusion/pull/4354) ([doki23](https://github.com/doki23)) -- Improve error handling and add some more types for proper downcasting [\#4352](https://github.com/apache/arrow-datafusion/pull/4352) ([retikulum](https://github.com/retikulum)) -- Add check to avoid underflow in memory manager [\#4351](https://github.com/apache/arrow-datafusion/pull/4351) ([askoa](https://github.com/askoa)) -- Improve error messages when memory is exhausted while sorting [\#4348](https://github.com/apache/arrow-datafusion/pull/4348) ([alamb](https://github.com/alamb)) -- Do not error in optimizer if resulting schema has different metadata [\#4347](https://github.com/apache/arrow-datafusion/pull/4347) ([alamb](https://github.com/alamb)) -- minor: improve optimizer logging and do not repeat rule name [\#4345](https://github.com/apache/arrow-datafusion/pull/4345) ([alamb](https://github.com/alamb)) -- minor: fix typos in test names [\#4344](https://github.com/apache/arrow-datafusion/pull/4344) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) -- Minor: Add docstrings to `EliminateOuterJoins` optimizer pass [\#4343](https://github.com/apache/arrow-datafusion/pull/4343) ([alamb](https://github.com/alamb)) -- Minor: refactor: isolate common memory accounting utils [\#4341](https://github.com/apache/arrow-datafusion/pull/4341) ([crepererum](https://github.com/crepererum)) -- minor: make `plan_from_tables` return one plan instead of `Vec` [\#4336](https://github.com/apache/arrow-datafusion/pull/4336) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([jackwener](https://github.com/jackwener)) -- enhancement: when fetch == 0, pushdown limit 0 instead skip+fetch. [\#4334](https://github.com/apache/arrow-datafusion/pull/4334) ([jackwener](https://github.com/jackwener)) -- Teach optimizer that `CoalesceBatchesExec` does not destroy output order [\#4332](https://github.com/apache/arrow-datafusion/pull/4332) ([alamb](https://github.com/alamb)) -- Add ability to disable DiskManager [\#4330](https://github.com/apache/arrow-datafusion/pull/4330) ([tustvold](https://github.com/tustvold)) -- Update cli.md [\#4329](https://github.com/apache/arrow-datafusion/pull/4329) ([psvri](https://github.com/psvri)) -- fix bug: right semi join can't support the filter [\#4327](https://github.com/apache/arrow-datafusion/pull/4327) ([liukun4515](https://github.com/liukun4515)) -- reimplment `eliminate_limit` to remove `global-state`. [\#4324](https://github.com/apache/arrow-datafusion/pull/4324) ([jackwener](https://github.com/jackwener)) -- Refine Err propagation and avoid unwrap in transform closures [\#4318](https://github.com/apache/arrow-datafusion/pull/4318) ([mingmwang](https://github.com/mingmwang)) -- Add a checker to confirm physical optimizer rules will keep the physical plan schema immutable [\#4316](https://github.com/apache/arrow-datafusion/pull/4316) ([mingmwang](https://github.com/mingmwang)) -- Refactor downcasting functions with downcastvalue macro and improve error handling of `ListArray` downcasting [\#4313](https://github.com/apache/arrow-datafusion/pull/4313) ([retikulum](https://github.com/retikulum)) -- minor: add another test case to cover join ambiguous check [\#4305](https://github.com/apache/arrow-datafusion/pull/4305) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([ygf11](https://github.com/ygf11)) -- Fix DESCRIBE statement qualified table issue [\#4304](https://github.com/apache/arrow-datafusion/pull/4304) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([gruuya](https://github.com/gruuya)) -- Use tournament loser tree for k-way sort-merging, increase merge speed by 50% [\#4301](https://github.com/apache/arrow-datafusion/pull/4301) ([richox](https://github.com/richox)) -- Pin Python `setuptools` in the CI to fix integration tests [\#4296](https://github.com/apache/arrow-datafusion/pull/4296) ([isidentical](https://github.com/isidentical)) -- Support `SubqueryAlias` in optimizer, physcial planner. [\#4293](https://github.com/apache/arrow-datafusion/pull/4293) ([jackwener](https://github.com/jackwener)) -- minor: avoid a clone into string when checking ambiguous [\#4292](https://github.com/apache/arrow-datafusion/pull/4292) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([ygf11](https://github.com/ygf11)) -- replace the comparison op for decimal array op using the arrow-rs kernel [\#4290](https://github.com/apache/arrow-datafusion/pull/4290) ([liukun4515](https://github.com/liukun4515)) -- MINOR: replace `{..}` with `(_)`, typo, remove outdated TODO [\#4286](https://github.com/apache/arrow-datafusion/pull/4286) ([jackwener](https://github.com/jackwener)) -- Reduce Expr copies in `ParquetExec` [\#4283](https://github.com/apache/arrow-datafusion/pull/4283) ([alamb](https://github.com/alamb)) -- Fix issue in filter pushdown with overloaded projection index [\#4281](https://github.com/apache/arrow-datafusion/pull/4281) ([thinkharderdev](https://github.com/thinkharderdev)) -- Skip useless pruning predicates in `ParquetExec` [\#4280](https://github.com/apache/arrow-datafusion/pull/4280) ([alamb](https://github.com/alamb)) -- Push down more predicates into `ParquetExec` [\#4279](https://github.com/apache/arrow-datafusion/pull/4279) ([alamb](https://github.com/alamb)) -- Fix EXPLAIN plan for ParquetExec to show pruning_predicate [\#4278](https://github.com/apache/arrow-datafusion/pull/4278) ([alamb](https://github.com/alamb)) -- reimplement `limit_push_down` to remove global-state, enhance optimize and simplify code. [\#4276](https://github.com/apache/arrow-datafusion/pull/4276) ([jackwener](https://github.com/jackwener)) -- Bump actions/labeler from 4.0.2 to 4.1.0 [\#4274](https://github.com/apache/arrow-datafusion/pull/4274) ([dependabot[bot]](https://github.com/apps/dependabot)) -- Remove the type alias `NullColumnarValue` [\#4273](https://github.com/apache/arrow-datafusion/pull/4273) ([HaoYang670](https://github.com/HaoYang670)) -- reimplement `eliminate_outer_join` [\#4272](https://github.com/apache/arrow-datafusion/pull/4272) ([jackwener](https://github.com/jackwener)) -- Fix bugs in parsing `with header row` and `partitioned by` [\#4268](https://github.com/apache/arrow-datafusion/pull/4268) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([HaoYang670](https://github.com/HaoYang670)) -- improve error messages while downcasting `UInt32Array`, `UInt64Array` and `BooleanArray` [\#4261](https://github.com/apache/arrow-datafusion/pull/4261) ([retikulum](https://github.com/retikulum)) -- add ambiguous check for projection [\#4260](https://github.com/apache/arrow-datafusion/pull/4260) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([AssHero](https://github.com/AssHero)) -- Add ambiguous check for join [\#4258](https://github.com/apache/arrow-datafusion/pull/4258) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([ygf11](https://github.com/ygf11)) -- support cross_join in `limit_push_down` [\#4257](https://github.com/apache/arrow-datafusion/pull/4257) ([jackwener](https://github.com/jackwener)) -- Support parquet page filtering on min_max for `decimal128` and `string` columns [\#4255](https://github.com/apache/arrow-datafusion/pull/4255) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- fix conflict and UT, cleanup redundant legacy code [\#4252](https://github.com/apache/arrow-datafusion/pull/4252) ([jackwener](https://github.com/jackwener)) -- Minor: remove unecessary clone\(\) in planner [\#4249](https://github.com/apache/arrow-datafusion/pull/4249) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) -- Fix nightly clippy failures [\#4246](https://github.com/apache/arrow-datafusion/pull/4246) ([mvanschellebeeck](https://github.com/mvanschellebeeck)) -- Improve Error Handling and Readibility for downcasting `Float32Array`, `Float64Array`, `StringArray` [\#4244](https://github.com/apache/arrow-datafusion/pull/4244) ([retikulum](https://github.com/retikulum)) -- Use defaults for ListingOptions builder [\#4243](https://github.com/apache/arrow-datafusion/pull/4243) ([mvanschellebeeck](https://github.com/mvanschellebeeck)) -- Support binary boolean operators with nulls [\#4242](https://github.com/apache/arrow-datafusion/pull/4242) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- Fixing doc of the expression [\#4240](https://github.com/apache/arrow-datafusion/pull/4240) ([Creampanda](https://github.com/Creampanda)) -- Fix negative interval parsing bug [\#4238](https://github.com/apache/arrow-datafusion/pull/4238) ([Jefffrey](https://github.com/Jefffrey)) -- remove duplicate or redundant code [\#4235](https://github.com/apache/arrow-datafusion/pull/4235) ([jackwener](https://github.com/jackwener)) -- add a checker to confirm optimizer can keep plan schema immutable. [\#4233](https://github.com/apache/arrow-datafusion/pull/4233) ([jackwener](https://github.com/jackwener)) -- Fix the percentile argument for ApproxPercentileCont must be Float64, not Decimal128\(2, 1\) [\#4228](https://github.com/apache/arrow-datafusion/pull/4228) ([comphead](https://github.com/comphead)) -- refactor how we create listing tables [\#4227](https://github.com/apache/arrow-datafusion/pull/4227) ([timvw](https://github.com/timvw)) -- Update sqlparser requirement from 0.26 to 0.27 [\#4226](https://github.com/apache/arrow-datafusion/pull/4226) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) -- upgrade required chrono version to 0.4.23 [\#4225](https://github.com/apache/arrow-datafusion/pull/4225) ([waitingkuo](https://github.com/waitingkuo)) -- Support types other than String for partition columns on ListingTables [\#4221](https://github.com/apache/arrow-datafusion/pull/4221) ([doki23](https://github.com/doki23)) -- \[CBO\] JoinSelection Rule, select HashJoin Partition Mode based on the Join Type and available statistics, option for SortMergeJoin [\#4219](https://github.com/apache/arrow-datafusion/pull/4219) ([mingmwang](https://github.com/mingmwang)) -- Remove alias in Union [\#4212](https://github.com/apache/arrow-datafusion/pull/4212) ([jackwener](https://github.com/jackwener)) -- Add try_optimize method [\#4208](https://github.com/apache/arrow-datafusion/pull/4208) ([andygrove](https://github.com/andygrove)) -- Provide a builder for ListingOptions with fixups [\#4207](https://github.com/apache/arrow-datafusion/pull/4207) ([alamb](https://github.com/alamb)) -- Avoid error with empty iterators used for `ScalarValue::iter_to_array` [\#4206](https://github.com/apache/arrow-datafusion/pull/4206) ([GrandChaman](https://github.com/GrandChaman)) -- Improve error message for regexp_match 'g' flag [\#4203](https://github.com/apache/arrow-datafusion/pull/4203) ([Jefffrey](https://github.com/Jefffrey)) -- Return `ResourceExhausted` errors when memory limit is exceed in `GroupedHashAggregateStreamV2` \(Row Hash\) [\#4202](https://github.com/apache/arrow-datafusion/pull/4202) ([crepererum](https://github.com/crepererum)) -- Add additional expr boolean simplification rules [\#4200](https://github.com/apache/arrow-datafusion/pull/4200) ([Jefffrey](https://github.com/Jefffrey)) -- Update to arrow and parquet 27.0.0 [\#4199](https://github.com/apache/arrow-datafusion/pull/4199) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([tustvold](https://github.com/tustvold)) -- Support `create table` with explicit column definitions [\#4194](https://github.com/apache/arrow-datafusion/pull/4194) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([doki23](https://github.com/doki23)) -- Support all equality predicates in equality join [\#4193](https://github.com/apache/arrow-datafusion/pull/4193) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([ygf11](https://github.com/ygf11)) -- add `propagate_empty_relation` optimizer rule [\#4192](https://github.com/apache/arrow-datafusion/pull/4192) ([jackwener](https://github.com/jackwener)) -- fix clippy [\#4190](https://github.com/apache/arrow-datafusion/pull/4190) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([jackwener](https://github.com/jackwener)) -- Fix clippy by avoiding deprecated functions in chrono [\#4189](https://github.com/apache/arrow-datafusion/pull/4189) ([alamb](https://github.com/alamb)) -- Disallow duplicate interval types during parsing [\#4188](https://github.com/apache/arrow-datafusion/pull/4188) ([Jefffrey](https://github.com/Jefffrey)) -- Parse nanoseconds for intervals [\#4186](https://github.com/apache/arrow-datafusion/pull/4186) ([Jefffrey](https://github.com/Jefffrey)) -- Add rule to reimplement `Eliminate cross join` and remove it in planner [\#4185](https://github.com/apache/arrow-datafusion/pull/4185) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([jackwener](https://github.com/jackwener)) -- \[FOLLOWUP\] Enforcement Rule: resolve review comments, refactor adjust_input_keys_ordering\(\) [\#4184](https://github.com/apache/arrow-datafusion/pull/4184) ([mingmwang](https://github.com/mingmwang)) -- Simplify boolean parquet pushdown predicate [\#4182](https://github.com/apache/arrow-datafusion/pull/4182) ([Jefffrey](https://github.com/Jefffrey)) -- Minor: consolidate parquet `custom_reader` integration test into parquet_exec [\#4175](https://github.com/apache/arrow-datafusion/pull/4175) ([alamb](https://github.com/alamb)) -- minor: remove redundant println and cleanup [\#4173](https://github.com/apache/arrow-datafusion/pull/4173) ([jackwener](https://github.com/jackwener)) -- Add ability to specify external sort information for ListingTables [\#4170](https://github.com/apache/arrow-datafusion/pull/4170) ([alamb](https://github.com/alamb)) -- Improve Error Handling and Readibility for downcasting `Decimal128Array` [\#4168](https://github.com/apache/arrow-datafusion/pull/4168) ([retikulum](https://github.com/retikulum)) -- Minor: Remove completed comment on parquet row group pruning [\#4167](https://github.com/apache/arrow-datafusion/pull/4167) ([alamb](https://github.com/alamb)) -- Update hashbrown requirement from 0.12 to 0.13 [\#4164](https://github.com/apache/arrow-datafusion/pull/4164) ([dependabot[bot]](https://github.com/apps/dependabot)) -- MINOR: enable `dyn_cmp_dict` feature on arrow for physical expr crate [\#4163](https://github.com/apache/arrow-datafusion/pull/4163) ([isidentical](https://github.com/isidentical)) -- Derive filter statistic estimates from the predicate expression [\#4162](https://github.com/apache/arrow-datafusion/pull/4162) ([isidentical](https://github.com/isidentical)) -- Minor: pass `ParquetFileMetrics` to `build_row_filter` in parquet [\#4161](https://github.com/apache/arrow-datafusion/pull/4161) ([alamb](https://github.com/alamb)) -- Minor: Extract parquet row group pruning code into its own module [\#4160](https://github.com/apache/arrow-datafusion/pull/4160) ([alamb](https://github.com/alamb)) -- Full support for time32 and time64 literal values \(`ScalarValue`\) [\#4156](https://github.com/apache/arrow-datafusion/pull/4156) ([andre-cc-natzka](https://github.com/andre-cc-natzka)) -- Window frame GROUPS mode support [\#4155](https://github.com/apache/arrow-datafusion/pull/4155) ([zembunia](https://github.com/zembunia)) -- Improve error messages while downcasting Int64Array [\#4154](https://github.com/apache/arrow-datafusion/pull/4154) ([retikulum](https://github.com/retikulum)) -- Add another method to collect referenced columns from an expression [\#4153](https://github.com/apache/arrow-datafusion/pull/4153) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([ygf11](https://github.com/ygf11)) -- Remove BoxedAsyncFileReader [\#4150](https://github.com/apache/arrow-datafusion/pull/4150) ([tustvold](https://github.com/tustvold)) -- Support unsigned integers in `unwrap_cast_in_comparison` Optimizer rule [\#4149](https://github.com/apache/arrow-datafusion/pull/4149) ([alamb](https://github.com/alamb)) -- Add support for `DataType::Timestamp` casts in `unwrap_cast_in_comparison` optimizer pass [\#4148](https://github.com/apache/arrow-datafusion/pull/4148) ([alamb](https://github.com/alamb)) -- Add additional testing for `unwrap_cast_in_comparison` [\#4147](https://github.com/apache/arrow-datafusion/pull/4147) ([alamb](https://github.com/alamb)) -- improve error messages while downcasting Int32Array [\#4146](https://github.com/apache/arrow-datafusion/pull/4146) ([retikulum](https://github.com/retikulum)) -- Minor: Update docstring on unwrap_cast_in_comparison [\#4145](https://github.com/apache/arrow-datafusion/pull/4145) ([alamb](https://github.com/alamb)) -- add schema parameter to table provider factory create method [\#4143](https://github.com/apache/arrow-datafusion/pull/4143) ([milenkovicm](https://github.com/milenkovicm)) -- fix: shouldn't pass alias through into subquery. [\#4141](https://github.com/apache/arrow-datafusion/pull/4141) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([jackwener](https://github.com/jackwener)) -- Preserve the `Cast` expression in `columnize_expr` [\#4137](https://github.com/apache/arrow-datafusion/pull/4137) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([HaoYang670](https://github.com/HaoYang670)) -- Set versions to dependencies with path in benchmarks Cargo.toml file [\#4136](https://github.com/apache/arrow-datafusion/pull/4136) ([ArkashaJavelin](https://github.com/ArkashaJavelin)) -- Fix links [\#4135](https://github.com/apache/arrow-datafusion/pull/4135) ([mvanschellebeeck](https://github.com/mvanschellebeeck)) -- Use f64::total_cmp instead of OrderedFloat [\#4133](https://github.com/apache/arrow-datafusion/pull/4133) ([comphead](https://github.com/comphead)) -- Add parquet integration tests for explicitly smaller page sizes, page pruning [\#4131](https://github.com/apache/arrow-datafusion/pull/4131) ([alamb](https://github.com/alamb)) -- Consolidate `ParquetExec` tests in `parquet_exec` integration test [\#4130](https://github.com/apache/arrow-datafusion/pull/4130) ([alamb](https://github.com/alamb)) -- Minor: Use upstream `BooleanArray::true_count` [\#4129](https://github.com/apache/arrow-datafusion/pull/4129) ([alamb](https://github.com/alamb)) -- Combined TPCH runs & uniformed summaries for benchmarks [\#4128](https://github.com/apache/arrow-datafusion/pull/4128) ([isidentical](https://github.com/isidentical)) -- Enable TableProviderFactories to receive additional options when creating an external table [\#4126](https://github.com/apache/arrow-datafusion/pull/4126) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([timvw](https://github.com/timvw)) -- Add CI check that configs.md is up-to-date [\#4124](https://github.com/apache/arrow-datafusion/pull/4124) ([mvanschellebeeck](https://github.com/mvanschellebeeck)) -- \[Part3\] Partition and Sort Enforcement, Enforcement rule implementation [\#4122](https://github.com/apache/arrow-datafusion/pull/4122) ([mingmwang](https://github.com/mingmwang)) -- reuse code `utils::optimize_children` but affect inline. [\#4121](https://github.com/apache/arrow-datafusion/pull/4121) ([jackwener](https://github.com/jackwener)) -- reuse code `utils::optimize_children` instead of redundant implementation [\#4119](https://github.com/apache/arrow-datafusion/pull/4119) ([jackwener](https://github.com/jackwener)) -- Allow listing tables to be created via TableFactories [\#4112](https://github.com/apache/arrow-datafusion/pull/4112) ([avantgardnerio](https://github.com/avantgardnerio)) -- Update SQL reference to state that decimal support is currently experimental [\#4109](https://github.com/apache/arrow-datafusion/pull/4109) ([andygrove](https://github.com/andygrove)) -- Add metrics for parquet page level skipping [\#4105](https://github.com/apache/arrow-datafusion/pull/4105) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- Add parser option for parsing SQL numeric literals as decimal [\#4102](https://github.com/apache/arrow-datafusion/pull/4102) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) -- Replace RwLock\ and Mutex\ by using DashMap [\#4079](https://github.com/apache/arrow-datafusion/pull/4079) ([yahoNanJing](https://github.com/yahoNanJing)) -- Custom window frame support extended to built-in window functions [\#4078](https://github.com/apache/arrow-datafusion/pull/4078) ([mustafasrepo](https://github.com/mustafasrepo)) -- Enable tests for page index filtering in parquet filter pushdown test [\#4062](https://github.com/apache/arrow-datafusion/pull/4062) ([alamb](https://github.com/alamb)) -- \[Part2\] Partition and Sort Enforcement, ExecutionPlan enhancement [\#4043](https://github.com/apache/arrow-datafusion/pull/4043) ([mingmwang](https://github.com/mingmwang)) -- add support for xz file compression and `compression` feature [\#3993](https://github.com/apache/arrow-datafusion/pull/3993) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([Jimexist](https://github.com/Jimexist)) -- Expression boundary analysis framework [\#3912](https://github.com/apache/arrow-datafusion/pull/3912) ([isidentical](https://github.com/isidentical)) +- Make `datafusion-sql` depend on `arrow-schema` instead of `arrow` [\#4456](https://github.com/apache/datafusion/pull/4456) [[sql](https://github.com/apache/datafusion/labels/sql)] ([mbrobbel](https://github.com/mbrobbel)) +- replace the comparator for `decimal array op scalar` using arrow kernel [\#4453](https://github.com/apache/datafusion/pull/4453) ([liukun4515](https://github.com/liukun4515)) +- Fix pyarrow test [\#4450](https://github.com/apache/datafusion/pull/4450) ([mvanschellebeeck](https://github.com/mvanschellebeeck)) +- Replace `&Option` with `Option<&T>` [\#4446](https://github.com/apache/datafusion/pull/4446) [[sql](https://github.com/apache/datafusion/labels/sql)] ([askoa](https://github.com/askoa)) +- Improve error handling for array downcasting [\#4445](https://github.com/apache/datafusion/pull/4445) ([retikulum](https://github.com/retikulum)) +- Refactor Builtin Window Function Implementation [\#4441](https://github.com/apache/datafusion/pull/4441) ([mustafasrepo](https://github.com/mustafasrepo)) +- feat: `DataFusionError::find_root` [\#4437](https://github.com/apache/datafusion/pull/4437) ([crepererum](https://github.com/crepererum)) +- fix: do NOT convert errors to strings but keep the type [\#4436](https://github.com/apache/datafusion/pull/4436) ([crepererum](https://github.com/crepererum)) +- The CLI panics when passing an invalid explain query [\#4429](https://github.com/apache/datafusion/pull/4429) ([comphead](https://github.com/comphead)) +- \[minor\] use arrow kernel concat_batches instead combine_batches [\#4423](https://github.com/apache/datafusion/pull/4423) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- fix panic on to_hex function for negative numbers [\#4422](https://github.com/apache/datafusion/pull/4422) ([retikulum](https://github.com/retikulum)) +- Optimize filter executor in pull-based executor [\#4421](https://github.com/apache/datafusion/pull/4421) ([xudong963](https://github.com/xudong963)) +- optimize limit push for join case [\#4411](https://github.com/apache/datafusion/pull/4411) ([liukun4515](https://github.com/liukun4515)) +- Add integration test for erroring when memory limits are hit [\#4406](https://github.com/apache/datafusion/pull/4406) ([alamb](https://github.com/alamb)) +- feat: `ResourceExhausted` for memory limit in `AggregateStream` [\#4405](https://github.com/apache/datafusion/pull/4405) ([crepererum](https://github.com/crepererum)) +- Update to arrow 28 [\#4400](https://github.com/apache/datafusion/pull/4400) [[sql](https://github.com/apache/datafusion/labels/sql)] ([tustvold](https://github.com/tustvold)) +- Update rstest requirement from 0.15.0 to 0.16.0 [\#4399](https://github.com/apache/datafusion/pull/4399) ([dependabot[bot]](https://github.com/apps/dependabot)) +- Add sqllogictests \(v0\) [\#4395](https://github.com/apache/datafusion/pull/4395) ([mvanschellebeeck](https://github.com/mvanschellebeeck)) +- improve hashjoin execution metrics [\#4394](https://github.com/apache/datafusion/pull/4394) ([AssHero](https://github.com/AssHero)) +- Add `with_new_inputs` for LogicalPlan [\#4393](https://github.com/apache/datafusion/pull/4393) ([jackwener](https://github.com/jackwener)) +- Clean the code in `limit.rs`. [\#4391](https://github.com/apache/datafusion/pull/4391) ([HaoYang670](https://github.com/HaoYang670)) +- Move physical plan serde from Ballista to DataFusion [\#4390](https://github.com/apache/datafusion/pull/4390) ([Kikkon](https://github.com/Kikkon)) +- Fix page index pruning fail on complex_expr [\#4387](https://github.com/apache/datafusion/pull/4387) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- Add check for nested types in equivalent names and types [\#4380](https://github.com/apache/datafusion/pull/4380) ([alamb](https://github.com/alamb)) +- refine the code of build schema for ambiguous check, factor this out into a function [\#4379](https://github.com/apache/datafusion/pull/4379) [[sql](https://github.com/apache/datafusion/labels/sql)] ([AssHero](https://github.com/AssHero)) +- Refactor the Hash Join [\#4377](https://github.com/apache/datafusion/pull/4377) ([liukun4515](https://github.com/liukun4515)) +- Minor: Fix typos in the documentation [\#4376](https://github.com/apache/datafusion/pull/4376) ([martin-g](https://github.com/martin-g)) +- Include byte size estimates in the filter statistics [\#4375](https://github.com/apache/datafusion/pull/4375) ([isidentical](https://github.com/isidentical)) +- HashJoin should return Err when the right side input stream produce Err, add more join UTs to cover different join types [\#4373](https://github.com/apache/datafusion/pull/4373) [[sql](https://github.com/apache/datafusion/labels/sql)] ([mingmwang](https://github.com/mingmwang)) +- feat: `ResourceExhausted` for memory limit in `GroupedHashAggregateStream` [\#4371](https://github.com/apache/datafusion/pull/4371) ([crepererum](https://github.com/crepererum)) +- Use limit\(\) function instead of show_limit\(\) in the first example [\#4369](https://github.com/apache/datafusion/pull/4369) ([martin-g](https://github.com/martin-g)) +- Update env_logger requirement from 0.9 to 0.10 [\#4367](https://github.com/apache/datafusion/pull/4367) ([dependabot[bot]](https://github.com/apps/dependabot)) +- reimplement `push_down_filter` to remove global-state [\#4365](https://github.com/apache/datafusion/pull/4365) ([jackwener](https://github.com/jackwener)) +- Support to use Schedular in tpch benchmark [\#4361](https://github.com/apache/datafusion/pull/4361) ([xudong963](https://github.com/xudong963)) +- Adding more dataframe example to read csv files [\#4360](https://github.com/apache/datafusion/pull/4360) ([DataPsycho](https://github.com/DataPsycho)) +- minor: correct name and typo [\#4359](https://github.com/apache/datafusion/pull/4359) ([jackwener](https://github.com/jackwener)) +- Do not log error if page index can not be evaluated [\#4358](https://github.com/apache/datafusion/pull/4358) ([alamb](https://github.com/alamb)) +- Clean the `expr_fn` - use `scalar_expr` to create unary scalar expr functions, remove macro `unary_scalar_functions` [\#4357](https://github.com/apache/datafusion/pull/4357) ([HaoYang670](https://github.com/HaoYang670)) +- Throw error \(not `panic`\) if a listing table specifies an missing partition column [\#4354](https://github.com/apache/datafusion/pull/4354) ([doki23](https://github.com/doki23)) +- Improve error handling and add some more types for proper downcasting [\#4352](https://github.com/apache/datafusion/pull/4352) ([retikulum](https://github.com/retikulum)) +- Add check to avoid underflow in memory manager [\#4351](https://github.com/apache/datafusion/pull/4351) ([askoa](https://github.com/askoa)) +- Improve error messages when memory is exhausted while sorting [\#4348](https://github.com/apache/datafusion/pull/4348) ([alamb](https://github.com/alamb)) +- Do not error in optimizer if resulting schema has different metadata [\#4347](https://github.com/apache/datafusion/pull/4347) ([alamb](https://github.com/alamb)) +- minor: improve optimizer logging and do not repeat rule name [\#4345](https://github.com/apache/datafusion/pull/4345) ([alamb](https://github.com/alamb)) +- minor: fix typos in test names [\#4344](https://github.com/apache/datafusion/pull/4344) [[sql](https://github.com/apache/datafusion/labels/sql)] ([alamb](https://github.com/alamb)) +- Minor: Add docstrings to `EliminateOuterJoins` optimizer pass [\#4343](https://github.com/apache/datafusion/pull/4343) ([alamb](https://github.com/alamb)) +- Minor: refactor: isolate common memory accounting utils [\#4341](https://github.com/apache/datafusion/pull/4341) ([crepererum](https://github.com/crepererum)) +- minor: make `plan_from_tables` return one plan instead of `Vec` [\#4336](https://github.com/apache/datafusion/pull/4336) [[sql](https://github.com/apache/datafusion/labels/sql)] ([jackwener](https://github.com/jackwener)) +- enhancement: when fetch == 0, pushdown limit 0 instead skip+fetch. [\#4334](https://github.com/apache/datafusion/pull/4334) ([jackwener](https://github.com/jackwener)) +- Teach optimizer that `CoalesceBatchesExec` does not destroy output order [\#4332](https://github.com/apache/datafusion/pull/4332) ([alamb](https://github.com/alamb)) +- Add ability to disable DiskManager [\#4330](https://github.com/apache/datafusion/pull/4330) ([tustvold](https://github.com/tustvold)) +- Update cli.md [\#4329](https://github.com/apache/datafusion/pull/4329) ([psvri](https://github.com/psvri)) +- fix bug: right semi join can't support the filter [\#4327](https://github.com/apache/datafusion/pull/4327) ([liukun4515](https://github.com/liukun4515)) +- reimplment `eliminate_limit` to remove `global-state`. [\#4324](https://github.com/apache/datafusion/pull/4324) ([jackwener](https://github.com/jackwener)) +- Refine Err propagation and avoid unwrap in transform closures [\#4318](https://github.com/apache/datafusion/pull/4318) ([mingmwang](https://github.com/mingmwang)) +- Add a checker to confirm physical optimizer rules will keep the physical plan schema immutable [\#4316](https://github.com/apache/datafusion/pull/4316) ([mingmwang](https://github.com/mingmwang)) +- Refactor downcasting functions with downcastvalue macro and improve error handling of `ListArray` downcasting [\#4313](https://github.com/apache/datafusion/pull/4313) ([retikulum](https://github.com/retikulum)) +- minor: add another test case to cover join ambiguous check [\#4305](https://github.com/apache/datafusion/pull/4305) [[sql](https://github.com/apache/datafusion/labels/sql)] ([ygf11](https://github.com/ygf11)) +- Fix DESCRIBE statement qualified table issue [\#4304](https://github.com/apache/datafusion/pull/4304) [[sql](https://github.com/apache/datafusion/labels/sql)] ([gruuya](https://github.com/gruuya)) +- Use tournament loser tree for k-way sort-merging, increase merge speed by 50% [\#4301](https://github.com/apache/datafusion/pull/4301) ([richox](https://github.com/richox)) +- Pin Python `setuptools` in the CI to fix integration tests [\#4296](https://github.com/apache/datafusion/pull/4296) ([isidentical](https://github.com/isidentical)) +- Support `SubqueryAlias` in optimizer, physcial planner. [\#4293](https://github.com/apache/datafusion/pull/4293) ([jackwener](https://github.com/jackwener)) +- minor: avoid a clone into string when checking ambiguous [\#4292](https://github.com/apache/datafusion/pull/4292) [[sql](https://github.com/apache/datafusion/labels/sql)] ([ygf11](https://github.com/ygf11)) +- replace the comparison op for decimal array op using the arrow-rs kernel [\#4290](https://github.com/apache/datafusion/pull/4290) ([liukun4515](https://github.com/liukun4515)) +- MINOR: replace `{..}` with `(_)`, typo, remove outdated TODO [\#4286](https://github.com/apache/datafusion/pull/4286) ([jackwener](https://github.com/jackwener)) +- Reduce Expr copies in `ParquetExec` [\#4283](https://github.com/apache/datafusion/pull/4283) ([alamb](https://github.com/alamb)) +- Fix issue in filter pushdown with overloaded projection index [\#4281](https://github.com/apache/datafusion/pull/4281) ([thinkharderdev](https://github.com/thinkharderdev)) +- Skip useless pruning predicates in `ParquetExec` [\#4280](https://github.com/apache/datafusion/pull/4280) ([alamb](https://github.com/alamb)) +- Push down more predicates into `ParquetExec` [\#4279](https://github.com/apache/datafusion/pull/4279) ([alamb](https://github.com/alamb)) +- Fix EXPLAIN plan for ParquetExec to show pruning_predicate [\#4278](https://github.com/apache/datafusion/pull/4278) ([alamb](https://github.com/alamb)) +- reimplement `limit_push_down` to remove global-state, enhance optimize and simplify code. [\#4276](https://github.com/apache/datafusion/pull/4276) ([jackwener](https://github.com/jackwener)) +- Bump actions/labeler from 4.0.2 to 4.1.0 [\#4274](https://github.com/apache/datafusion/pull/4274) ([dependabot[bot]](https://github.com/apps/dependabot)) +- Remove the type alias `NullColumnarValue` [\#4273](https://github.com/apache/datafusion/pull/4273) ([HaoYang670](https://github.com/HaoYang670)) +- reimplement `eliminate_outer_join` [\#4272](https://github.com/apache/datafusion/pull/4272) ([jackwener](https://github.com/jackwener)) +- Fix bugs in parsing `with header row` and `partitioned by` [\#4268](https://github.com/apache/datafusion/pull/4268) [[sql](https://github.com/apache/datafusion/labels/sql)] ([HaoYang670](https://github.com/HaoYang670)) +- improve error messages while downcasting `UInt32Array`, `UInt64Array` and `BooleanArray` [\#4261](https://github.com/apache/datafusion/pull/4261) ([retikulum](https://github.com/retikulum)) +- add ambiguous check for projection [\#4260](https://github.com/apache/datafusion/pull/4260) [[sql](https://github.com/apache/datafusion/labels/sql)] ([AssHero](https://github.com/AssHero)) +- Add ambiguous check for join [\#4258](https://github.com/apache/datafusion/pull/4258) [[sql](https://github.com/apache/datafusion/labels/sql)] ([ygf11](https://github.com/ygf11)) +- support cross_join in `limit_push_down` [\#4257](https://github.com/apache/datafusion/pull/4257) ([jackwener](https://github.com/jackwener)) +- Support parquet page filtering on min_max for `decimal128` and `string` columns [\#4255](https://github.com/apache/datafusion/pull/4255) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- fix conflict and UT, cleanup redundant legacy code [\#4252](https://github.com/apache/datafusion/pull/4252) ([jackwener](https://github.com/jackwener)) +- Minor: remove unecessary clone\(\) in planner [\#4249](https://github.com/apache/datafusion/pull/4249) [[sql](https://github.com/apache/datafusion/labels/sql)] ([alamb](https://github.com/alamb)) +- Fix nightly clippy failures [\#4246](https://github.com/apache/datafusion/pull/4246) ([mvanschellebeeck](https://github.com/mvanschellebeeck)) +- Improve Error Handling and Readibility for downcasting `Float32Array`, `Float64Array`, `StringArray` [\#4244](https://github.com/apache/datafusion/pull/4244) ([retikulum](https://github.com/retikulum)) +- Use defaults for ListingOptions builder [\#4243](https://github.com/apache/datafusion/pull/4243) ([mvanschellebeeck](https://github.com/mvanschellebeeck)) +- Support binary boolean operators with nulls [\#4242](https://github.com/apache/datafusion/pull/4242) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- Fixing doc of the expression [\#4240](https://github.com/apache/datafusion/pull/4240) ([Creampanda](https://github.com/Creampanda)) +- Fix negative interval parsing bug [\#4238](https://github.com/apache/datafusion/pull/4238) ([Jefffrey](https://github.com/Jefffrey)) +- remove duplicate or redundant code [\#4235](https://github.com/apache/datafusion/pull/4235) ([jackwener](https://github.com/jackwener)) +- add a checker to confirm optimizer can keep plan schema immutable. [\#4233](https://github.com/apache/datafusion/pull/4233) ([jackwener](https://github.com/jackwener)) +- Fix the percentile argument for ApproxPercentileCont must be Float64, not Decimal128\(2, 1\) [\#4228](https://github.com/apache/datafusion/pull/4228) ([comphead](https://github.com/comphead)) +- refactor how we create listing tables [\#4227](https://github.com/apache/datafusion/pull/4227) ([timvw](https://github.com/timvw)) +- Update sqlparser requirement from 0.26 to 0.27 [\#4226](https://github.com/apache/datafusion/pull/4226) [[sql](https://github.com/apache/datafusion/labels/sql)] ([alamb](https://github.com/alamb)) +- upgrade required chrono version to 0.4.23 [\#4225](https://github.com/apache/datafusion/pull/4225) ([waitingkuo](https://github.com/waitingkuo)) +- Support types other than String for partition columns on ListingTables [\#4221](https://github.com/apache/datafusion/pull/4221) ([doki23](https://github.com/doki23)) +- \[CBO\] JoinSelection Rule, select HashJoin Partition Mode based on the Join Type and available statistics, option for SortMergeJoin [\#4219](https://github.com/apache/datafusion/pull/4219) ([mingmwang](https://github.com/mingmwang)) +- Remove alias in Union [\#4212](https://github.com/apache/datafusion/pull/4212) ([jackwener](https://github.com/jackwener)) +- Add try_optimize method [\#4208](https://github.com/apache/datafusion/pull/4208) ([andygrove](https://github.com/andygrove)) +- Provide a builder for ListingOptions with fixups [\#4207](https://github.com/apache/datafusion/pull/4207) ([alamb](https://github.com/alamb)) +- Avoid error with empty iterators used for `ScalarValue::iter_to_array` [\#4206](https://github.com/apache/datafusion/pull/4206) ([GrandChaman](https://github.com/GrandChaman)) +- Improve error message for regexp_match 'g' flag [\#4203](https://github.com/apache/datafusion/pull/4203) ([Jefffrey](https://github.com/Jefffrey)) +- Return `ResourceExhausted` errors when memory limit is exceed in `GroupedHashAggregateStreamV2` \(Row Hash\) [\#4202](https://github.com/apache/datafusion/pull/4202) ([crepererum](https://github.com/crepererum)) +- Add additional expr boolean simplification rules [\#4200](https://github.com/apache/datafusion/pull/4200) ([Jefffrey](https://github.com/Jefffrey)) +- Update to arrow and parquet 27.0.0 [\#4199](https://github.com/apache/datafusion/pull/4199) [[sql](https://github.com/apache/datafusion/labels/sql)] ([tustvold](https://github.com/tustvold)) +- Support `create table` with explicit column definitions [\#4194](https://github.com/apache/datafusion/pull/4194) [[sql](https://github.com/apache/datafusion/labels/sql)] ([doki23](https://github.com/doki23)) +- Support all equality predicates in equality join [\#4193](https://github.com/apache/datafusion/pull/4193) [[sql](https://github.com/apache/datafusion/labels/sql)] ([ygf11](https://github.com/ygf11)) +- add `propagate_empty_relation` optimizer rule [\#4192](https://github.com/apache/datafusion/pull/4192) ([jackwener](https://github.com/jackwener)) +- fix clippy [\#4190](https://github.com/apache/datafusion/pull/4190) [[sql](https://github.com/apache/datafusion/labels/sql)] ([jackwener](https://github.com/jackwener)) +- Fix clippy by avoiding deprecated functions in chrono [\#4189](https://github.com/apache/datafusion/pull/4189) ([alamb](https://github.com/alamb)) +- Disallow duplicate interval types during parsing [\#4188](https://github.com/apache/datafusion/pull/4188) ([Jefffrey](https://github.com/Jefffrey)) +- Parse nanoseconds for intervals [\#4186](https://github.com/apache/datafusion/pull/4186) ([Jefffrey](https://github.com/Jefffrey)) +- Add rule to reimplement `Eliminate cross join` and remove it in planner [\#4185](https://github.com/apache/datafusion/pull/4185) [[sql](https://github.com/apache/datafusion/labels/sql)] ([jackwener](https://github.com/jackwener)) +- \[FOLLOWUP\] Enforcement Rule: resolve review comments, refactor adjust_input_keys_ordering\(\) [\#4184](https://github.com/apache/datafusion/pull/4184) ([mingmwang](https://github.com/mingmwang)) +- Simplify boolean parquet pushdown predicate [\#4182](https://github.com/apache/datafusion/pull/4182) ([Jefffrey](https://github.com/Jefffrey)) +- Minor: consolidate parquet `custom_reader` integration test into parquet_exec [\#4175](https://github.com/apache/datafusion/pull/4175) ([alamb](https://github.com/alamb)) +- minor: remove redundant println and cleanup [\#4173](https://github.com/apache/datafusion/pull/4173) ([jackwener](https://github.com/jackwener)) +- Add ability to specify external sort information for ListingTables [\#4170](https://github.com/apache/datafusion/pull/4170) ([alamb](https://github.com/alamb)) +- Improve Error Handling and Readibility for downcasting `Decimal128Array` [\#4168](https://github.com/apache/datafusion/pull/4168) ([retikulum](https://github.com/retikulum)) +- Minor: Remove completed comment on parquet row group pruning [\#4167](https://github.com/apache/datafusion/pull/4167) ([alamb](https://github.com/alamb)) +- Update hashbrown requirement from 0.12 to 0.13 [\#4164](https://github.com/apache/datafusion/pull/4164) ([dependabot[bot]](https://github.com/apps/dependabot)) +- MINOR: enable `dyn_cmp_dict` feature on arrow for physical expr crate [\#4163](https://github.com/apache/datafusion/pull/4163) ([isidentical](https://github.com/isidentical)) +- Derive filter statistic estimates from the predicate expression [\#4162](https://github.com/apache/datafusion/pull/4162) ([isidentical](https://github.com/isidentical)) +- Minor: pass `ParquetFileMetrics` to `build_row_filter` in parquet [\#4161](https://github.com/apache/datafusion/pull/4161) ([alamb](https://github.com/alamb)) +- Minor: Extract parquet row group pruning code into its own module [\#4160](https://github.com/apache/datafusion/pull/4160) ([alamb](https://github.com/alamb)) +- Full support for time32 and time64 literal values \(`ScalarValue`\) [\#4156](https://github.com/apache/datafusion/pull/4156) ([andre-cc-natzka](https://github.com/andre-cc-natzka)) +- Window frame GROUPS mode support [\#4155](https://github.com/apache/datafusion/pull/4155) ([zembunia](https://github.com/zembunia)) +- Improve error messages while downcasting Int64Array [\#4154](https://github.com/apache/datafusion/pull/4154) ([retikulum](https://github.com/retikulum)) +- Add another method to collect referenced columns from an expression [\#4153](https://github.com/apache/datafusion/pull/4153) [[sql](https://github.com/apache/datafusion/labels/sql)] ([ygf11](https://github.com/ygf11)) +- Remove BoxedAsyncFileReader [\#4150](https://github.com/apache/datafusion/pull/4150) ([tustvold](https://github.com/tustvold)) +- Support unsigned integers in `unwrap_cast_in_comparison` Optimizer rule [\#4149](https://github.com/apache/datafusion/pull/4149) ([alamb](https://github.com/alamb)) +- Add support for `DataType::Timestamp` casts in `unwrap_cast_in_comparison` optimizer pass [\#4148](https://github.com/apache/datafusion/pull/4148) ([alamb](https://github.com/alamb)) +- Add additional testing for `unwrap_cast_in_comparison` [\#4147](https://github.com/apache/datafusion/pull/4147) ([alamb](https://github.com/alamb)) +- improve error messages while downcasting Int32Array [\#4146](https://github.com/apache/datafusion/pull/4146) ([retikulum](https://github.com/retikulum)) +- Minor: Update docstring on unwrap_cast_in_comparison [\#4145](https://github.com/apache/datafusion/pull/4145) ([alamb](https://github.com/alamb)) +- add schema parameter to table provider factory create method [\#4143](https://github.com/apache/datafusion/pull/4143) ([milenkovicm](https://github.com/milenkovicm)) +- fix: shouldn't pass alias through into subquery. [\#4141](https://github.com/apache/datafusion/pull/4141) [[sql](https://github.com/apache/datafusion/labels/sql)] ([jackwener](https://github.com/jackwener)) +- Preserve the `Cast` expression in `columnize_expr` [\#4137](https://github.com/apache/datafusion/pull/4137) [[sql](https://github.com/apache/datafusion/labels/sql)] ([HaoYang670](https://github.com/HaoYang670)) +- Set versions to dependencies with path in benchmarks Cargo.toml file [\#4136](https://github.com/apache/datafusion/pull/4136) ([ArkashaJavelin](https://github.com/ArkashaJavelin)) +- Fix links [\#4135](https://github.com/apache/datafusion/pull/4135) ([mvanschellebeeck](https://github.com/mvanschellebeeck)) +- Use f64::total_cmp instead of OrderedFloat [\#4133](https://github.com/apache/datafusion/pull/4133) ([comphead](https://github.com/comphead)) +- Add parquet integration tests for explicitly smaller page sizes, page pruning [\#4131](https://github.com/apache/datafusion/pull/4131) ([alamb](https://github.com/alamb)) +- Consolidate `ParquetExec` tests in `parquet_exec` integration test [\#4130](https://github.com/apache/datafusion/pull/4130) ([alamb](https://github.com/alamb)) +- Minor: Use upstream `BooleanArray::true_count` [\#4129](https://github.com/apache/datafusion/pull/4129) ([alamb](https://github.com/alamb)) +- Combined TPCH runs & uniformed summaries for benchmarks [\#4128](https://github.com/apache/datafusion/pull/4128) ([isidentical](https://github.com/isidentical)) +- Enable TableProviderFactories to receive additional options when creating an external table [\#4126](https://github.com/apache/datafusion/pull/4126) [[sql](https://github.com/apache/datafusion/labels/sql)] ([timvw](https://github.com/timvw)) +- Add CI check that configs.md is up-to-date [\#4124](https://github.com/apache/datafusion/pull/4124) ([mvanschellebeeck](https://github.com/mvanschellebeeck)) +- \[Part3\] Partition and Sort Enforcement, Enforcement rule implementation [\#4122](https://github.com/apache/datafusion/pull/4122) ([mingmwang](https://github.com/mingmwang)) +- reuse code `utils::optimize_children` but affect inline. [\#4121](https://github.com/apache/datafusion/pull/4121) ([jackwener](https://github.com/jackwener)) +- reuse code `utils::optimize_children` instead of redundant implementation [\#4119](https://github.com/apache/datafusion/pull/4119) ([jackwener](https://github.com/jackwener)) +- Allow listing tables to be created via TableFactories [\#4112](https://github.com/apache/datafusion/pull/4112) ([avantgardnerio](https://github.com/avantgardnerio)) +- Update SQL reference to state that decimal support is currently experimental [\#4109](https://github.com/apache/datafusion/pull/4109) ([andygrove](https://github.com/andygrove)) +- Add metrics for parquet page level skipping [\#4105](https://github.com/apache/datafusion/pull/4105) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- Add parser option for parsing SQL numeric literals as decimal [\#4102](https://github.com/apache/datafusion/pull/4102) [[sql](https://github.com/apache/datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) +- Replace RwLock\ and Mutex\ by using DashMap [\#4079](https://github.com/apache/datafusion/pull/4079) ([yahoNanJing](https://github.com/yahoNanJing)) +- Custom window frame support extended to built-in window functions [\#4078](https://github.com/apache/datafusion/pull/4078) ([mustafasrepo](https://github.com/mustafasrepo)) +- Enable tests for page index filtering in parquet filter pushdown test [\#4062](https://github.com/apache/datafusion/pull/4062) ([alamb](https://github.com/alamb)) +- \[Part2\] Partition and Sort Enforcement, ExecutionPlan enhancement [\#4043](https://github.com/apache/datafusion/pull/4043) ([mingmwang](https://github.com/mingmwang)) +- add support for xz file compression and `compression` feature [\#3993](https://github.com/apache/datafusion/pull/3993) [[sql](https://github.com/apache/datafusion/labels/sql)] ([Jimexist](https://github.com/Jimexist)) +- Expression boundary analysis framework [\#3912](https://github.com/apache/datafusion/pull/3912) ([isidentical](https://github.com/isidentical)) diff --git a/dev/changelog/16.0.0.md b/dev/changelog/16.0.0.md index fecf52f10995..d839732cd403 100644 --- a/dev/changelog/16.0.0.md +++ b/dev/changelog/16.0.0.md @@ -17,395 +17,395 @@ under the License. --> -## [16.0.0](https://github.com/apache/arrow-datafusion/tree/16.0.0) (2023-01-12) +## [16.0.0](https://github.com/apache/datafusion/tree/16.0.0) (2023-01-12) -[Full Changelog](https://github.com/apache/arrow-datafusion/compare/16.0.0-rc1...16.0.0) +[Full Changelog](https://github.com/apache/datafusion/compare/16.0.0-rc1...16.0.0) **Breaking changes:** -- Remove unused ExecutionPlan::relies_input_order \(has been replaced with `required_input_ordering`\) [\#4856](https://github.com/apache/arrow-datafusion/pull/4856) ([alamb](https://github.com/alamb)) -- Add DataFrame::into_view instead of implementing TableProvider \(\#2659\) [\#4778](https://github.com/apache/arrow-datafusion/pull/4778) ([tustvold](https://github.com/tustvold)) +- Remove unused ExecutionPlan::relies_input_order \(has been replaced with `required_input_ordering`\) [\#4856](https://github.com/apache/datafusion/pull/4856) ([alamb](https://github.com/alamb)) +- Add DataFrame::into_view instead of implementing TableProvider \(\#2659\) [\#4778](https://github.com/apache/datafusion/pull/4778) ([tustvold](https://github.com/tustvold)) **Implemented enhancements:** -- Support custom window frame with AVG aggregate function [\#4845](https://github.com/apache/arrow-datafusion/issues/4845) -- add sqllogicaltest for tpch and remove some duplicated test. [\#4801](https://github.com/apache/arrow-datafusion/issues/4801) -- Catalog Snapshot Isolation [\#4697](https://github.com/apache/arrow-datafusion/issues/4697) -- Support `select .. FROM 'parquet.file'` in datafusion-cli [\#4580](https://github.com/apache/arrow-datafusion/issues/4580) +- Support custom window frame with AVG aggregate function [\#4845](https://github.com/apache/datafusion/issues/4845) +- add sqllogicaltest for tpch and remove some duplicated test. [\#4801](https://github.com/apache/datafusion/issues/4801) +- Catalog Snapshot Isolation [\#4697](https://github.com/apache/datafusion/issues/4697) +- Support `select .. FROM 'parquet.file'` in datafusion-cli [\#4580](https://github.com/apache/datafusion/issues/4580) **Fixed bugs:** -- Regression: `write_csv` result has incorrect formatting [\#4876](https://github.com/apache/arrow-datafusion/issues/4876) -- Incorrect results for join condition against current master branch [\#4844](https://github.com/apache/arrow-datafusion/issues/4844) -- Match Postgres for stddev and variance on less than 3 values [\#4843](https://github.com/apache/arrow-datafusion/issues/4843) -- `JOIN ... USING (columns)` works incorrectly with multiple columns \(joined-over columns are missing in the output\) [\#4674](https://github.com/apache/arrow-datafusion/issues/4674) -- ROW_NUMBER window function inconsistent across partitions in multi-threaded runtime [\#4673](https://github.com/apache/arrow-datafusion/issues/4673) -- `SELECT ... FROM (tbl1 UNION tbl2)` wrongly works like `SELECT DISTINCT ... FROM (tbl1 UNION tbl2)` [\#4667](https://github.com/apache/arrow-datafusion/issues/4667) -- DataFrame TableProvider Circular Reference [\#2659](https://github.com/apache/arrow-datafusion/issues/2659) +- Regression: `write_csv` result has incorrect formatting [\#4876](https://github.com/apache/datafusion/issues/4876) +- Incorrect results for join condition against current master branch [\#4844](https://github.com/apache/datafusion/issues/4844) +- Match Postgres for stddev and variance on less than 3 values [\#4843](https://github.com/apache/datafusion/issues/4843) +- `JOIN ... USING (columns)` works incorrectly with multiple columns \(joined-over columns are missing in the output\) [\#4674](https://github.com/apache/datafusion/issues/4674) +- ROW_NUMBER window function inconsistent across partitions in multi-threaded runtime [\#4673](https://github.com/apache/datafusion/issues/4673) +- `SELECT ... FROM (tbl1 UNION tbl2)` wrongly works like `SELECT DISTINCT ... FROM (tbl1 UNION tbl2)` [\#4667](https://github.com/apache/datafusion/issues/4667) +- DataFrame TableProvider Circular Reference [\#2659](https://github.com/apache/datafusion/issues/2659) **Documentation updates:** -- Add Synnada to known uses [\#4857](https://github.com/apache/arrow-datafusion/pull/4857) ([ozankabak](https://github.com/ozankabak)) +- Add Synnada to known uses [\#4857](https://github.com/apache/datafusion/pull/4857) ([ozankabak](https://github.com/ozankabak)) **Closed issues:** -- Remove tests from `sql_integration` that were ported to `sqllogictest` [\#4498](https://github.com/apache/arrow-datafusion/issues/4498) -- How to register a http url to the `object_store` [\#4491](https://github.com/apache/arrow-datafusion/issues/4491) -- optimizer: support `unsigned <-> decimal` for unwrap_cast_in_comparion rule [\#4287](https://github.com/apache/arrow-datafusion/issues/4287) -- Add SQL support for NATURAL JOIN [\#117](https://github.com/apache/arrow-datafusion/issues/117) -- \[Datafusion\] Datafusion queries involving a column name that begins with a number produces unexpected results [\#108](https://github.com/apache/arrow-datafusion/issues/108) +- Remove tests from `sql_integration` that were ported to `sqllogictest` [\#4498](https://github.com/apache/datafusion/issues/4498) +- How to register a http url to the `object_store` [\#4491](https://github.com/apache/datafusion/issues/4491) +- optimizer: support `unsigned <-> decimal` for unwrap_cast_in_comparion rule [\#4287](https://github.com/apache/datafusion/issues/4287) +- Add SQL support for NATURAL JOIN [\#117](https://github.com/apache/datafusion/issues/117) +- \[Datafusion\] Datafusion queries involving a column name that begins with a number produces unexpected results [\#108](https://github.com/apache/datafusion/issues/108) **Merged pull requests:** -- docs: improve `Column::normalize_with_schemas` docs [\#4871](https://github.com/apache/arrow-datafusion/pull/4871) ([crepererum](https://github.com/crepererum)) -- Skip EliminateCrossJoin rule when meet non-empty join filter [\#4869](https://github.com/apache/arrow-datafusion/pull/4869) ([ygf11](https://github.com/ygf11)) -- Support for SQL Natural Join [\#4863](https://github.com/apache/arrow-datafusion/pull/4863) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([Jefffrey](https://github.com/Jefffrey)) -- Minor: Move test data into `datafusion/core/tests/data` [\#4855](https://github.com/apache/arrow-datafusion/pull/4855) ([alamb](https://github.com/alamb)) -- Covariance single row input & null skipping [\#4852](https://github.com/apache/arrow-datafusion/pull/4852) ([korowa](https://github.com/korowa)) -- Document ability to select directly from files in datafusion-cli [\#4851](https://github.com/apache/arrow-datafusion/pull/4851) ([alamb](https://github.com/alamb)) -- Fix push_down_projection through a distinct [\#4849](https://github.com/apache/arrow-datafusion/pull/4849) ([Jefffrey](https://github.com/Jefffrey)) -- Support using var/var_pop/stddev/stddev_pop in window expressions with custom frames [\#4848](https://github.com/apache/arrow-datafusion/pull/4848) ([jonmmease](https://github.com/jonmmease)) -- Update variance/stddev to work with single values [\#4847](https://github.com/apache/arrow-datafusion/pull/4847) ([jonmmease](https://github.com/jonmmease)) -- Implement retract_batch for AvgAccumulator [\#4846](https://github.com/apache/arrow-datafusion/pull/4846) ([jonmmease](https://github.com/jonmmease)) -- Support wildcard select on multiple column using joins [\#4840](https://github.com/apache/arrow-datafusion/pull/4840) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([Jefffrey](https://github.com/Jefffrey)) -- Orthogonalize distribution and sort enforcement rules into `EnforceDistribution` and `EnforceSorting` [\#4839](https://github.com/apache/arrow-datafusion/pull/4839) ([mustafasrepo](https://github.com/mustafasrepo)) -- support `select .. FROM 'parquet.file'` in datafusion-cli [\#4838](https://github.com/apache/arrow-datafusion/pull/4838) ([unconsolable](https://github.com/unconsolable)) -- Remove tests from sql_integration that were ported to sqllogictest [\#4836](https://github.com/apache/arrow-datafusion/pull/4836) ([matthewwillian](https://github.com/matthewwillian)) -- add tpch sqllogicaltest and remove some duplicated test [\#4802](https://github.com/apache/arrow-datafusion/pull/4802) ([jackwener](https://github.com/jackwener)) +- docs: improve `Column::normalize_with_schemas` docs [\#4871](https://github.com/apache/datafusion/pull/4871) ([crepererum](https://github.com/crepererum)) +- Skip EliminateCrossJoin rule when meet non-empty join filter [\#4869](https://github.com/apache/datafusion/pull/4869) ([ygf11](https://github.com/ygf11)) +- Support for SQL Natural Join [\#4863](https://github.com/apache/datafusion/pull/4863) [[sql](https://github.com/apache/datafusion/labels/sql)] ([Jefffrey](https://github.com/Jefffrey)) +- Minor: Move test data into `datafusion/core/tests/data` [\#4855](https://github.com/apache/datafusion/pull/4855) ([alamb](https://github.com/alamb)) +- Covariance single row input & null skipping [\#4852](https://github.com/apache/datafusion/pull/4852) ([korowa](https://github.com/korowa)) +- Document ability to select directly from files in datafusion-cli [\#4851](https://github.com/apache/datafusion/pull/4851) ([alamb](https://github.com/alamb)) +- Fix push_down_projection through a distinct [\#4849](https://github.com/apache/datafusion/pull/4849) ([Jefffrey](https://github.com/Jefffrey)) +- Support using var/var_pop/stddev/stddev_pop in window expressions with custom frames [\#4848](https://github.com/apache/datafusion/pull/4848) ([jonmmease](https://github.com/jonmmease)) +- Update variance/stddev to work with single values [\#4847](https://github.com/apache/datafusion/pull/4847) ([jonmmease](https://github.com/jonmmease)) +- Implement retract_batch for AvgAccumulator [\#4846](https://github.com/apache/datafusion/pull/4846) ([jonmmease](https://github.com/jonmmease)) +- Support wildcard select on multiple column using joins [\#4840](https://github.com/apache/datafusion/pull/4840) [[sql](https://github.com/apache/datafusion/labels/sql)] ([Jefffrey](https://github.com/Jefffrey)) +- Orthogonalize distribution and sort enforcement rules into `EnforceDistribution` and `EnforceSorting` [\#4839](https://github.com/apache/datafusion/pull/4839) ([mustafasrepo](https://github.com/mustafasrepo)) +- support `select .. FROM 'parquet.file'` in datafusion-cli [\#4838](https://github.com/apache/datafusion/pull/4838) ([unconsolable](https://github.com/unconsolable)) +- Remove tests from sql_integration that were ported to sqllogictest [\#4836](https://github.com/apache/datafusion/pull/4836) ([matthewwillian](https://github.com/matthewwillian)) +- add tpch sqllogicaltest and remove some duplicated test [\#4802](https://github.com/apache/datafusion/pull/4802) ([jackwener](https://github.com/jackwener)) -## [16.0.0-rc1](https://github.com/apache/arrow-datafusion/tree/16.0.0-rc1) (2023-01-07) +## [16.0.0-rc1](https://github.com/apache/datafusion/tree/16.0.0-rc1) (2023-01-07) -[Full Changelog](https://github.com/apache/arrow-datafusion/compare/15.0.0...16.0.0-rc1) +[Full Changelog](https://github.com/apache/datafusion/compare/15.0.0...16.0.0-rc1) **Breaking changes:** -- Enable PhysicalOptimizerRule lazily \(\#4806\) [\#4807](https://github.com/apache/arrow-datafusion/pull/4807) ([tustvold](https://github.com/tustvold)) -- Move ConfigOptions to core [\#4803](https://github.com/apache/arrow-datafusion/pull/4803) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([tustvold](https://github.com/tustvold)) -- remove Operator::{Like,NotLike,ILike,NotILike} [\#4792](https://github.com/apache/arrow-datafusion/pull/4792) ([unconsolable](https://github.com/unconsolable)) -- Move subquery alias assignment onto rules [\#4767](https://github.com/apache/arrow-datafusion/pull/4767) ([tustvold](https://github.com/tustvold)) -- Make SessionState members private [\#4764](https://github.com/apache/arrow-datafusion/pull/4764) ([tustvold](https://github.com/tustvold)) -- Deprecate SessionContext physical plan methods \(\#4617\) [\#4751](https://github.com/apache/arrow-datafusion/pull/4751) ([tustvold](https://github.com/tustvold)) -- Decouple physical optimizer from SessionConfig \(\#3887\) [\#4749](https://github.com/apache/arrow-datafusion/pull/4749) ([tustvold](https://github.com/tustvold)) -- Don't share ConfigOptions \(\#3886\) [\#4712](https://github.com/apache/arrow-datafusion/pull/4712) ([tustvold](https://github.com/tustvold)) -- Push SessionState into FileFormat \(\#4349\) [\#4699](https://github.com/apache/arrow-datafusion/pull/4699) ([tustvold](https://github.com/tustvold)) -- Make SessionContext members private [\#4698](https://github.com/apache/arrow-datafusion/pull/4698) ([tustvold](https://github.com/tustvold)) -- Make OptimizerConfig a trait \(\#4631\) \(\#4638\) [\#4645](https://github.com/apache/arrow-datafusion/pull/4645) ([tustvold](https://github.com/tustvold)) -- DataFrame owned SessionState [\#4633](https://github.com/apache/arrow-datafusion/pull/4633) ([tustvold](https://github.com/tustvold)) -- Make LogicalPlanBuilder consuming \(\#4622\) [\#4632](https://github.com/apache/arrow-datafusion/pull/4632) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([tustvold](https://github.com/tustvold)) -- Make DataFrame API consuming \(\#4621\) [\#4624](https://github.com/apache/arrow-datafusion/pull/4624) ([tustvold](https://github.com/tustvold)) -- Make execute_stream functions sync [\#4608](https://github.com/apache/arrow-datafusion/pull/4608) ([tustvold](https://github.com/tustvold)) -- Remove ObjectStore from FileStream \(\#4533\) [\#4601](https://github.com/apache/arrow-datafusion/pull/4601) ([tustvold](https://github.com/tustvold)) -- Remove `AggregateState` wrapper [\#4582](https://github.com/apache/arrow-datafusion/pull/4582) ([alamb](https://github.com/alamb)) -- Fix querying and defining table / view names with period [\#4530](https://github.com/apache/arrow-datafusion/pull/4530) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) -- refactor code about `subquery_alias` and `expr-alias`. [\#4451](https://github.com/apache/arrow-datafusion/pull/4451) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([jackwener](https://github.com/jackwener)) +- Enable PhysicalOptimizerRule lazily \(\#4806\) [\#4807](https://github.com/apache/datafusion/pull/4807) ([tustvold](https://github.com/tustvold)) +- Move ConfigOptions to core [\#4803](https://github.com/apache/datafusion/pull/4803) [[sql](https://github.com/apache/datafusion/labels/sql)] ([tustvold](https://github.com/tustvold)) +- remove Operator::{Like,NotLike,ILike,NotILike} [\#4792](https://github.com/apache/datafusion/pull/4792) ([unconsolable](https://github.com/unconsolable)) +- Move subquery alias assignment onto rules [\#4767](https://github.com/apache/datafusion/pull/4767) ([tustvold](https://github.com/tustvold)) +- Make SessionState members private [\#4764](https://github.com/apache/datafusion/pull/4764) ([tustvold](https://github.com/tustvold)) +- Deprecate SessionContext physical plan methods \(\#4617\) [\#4751](https://github.com/apache/datafusion/pull/4751) ([tustvold](https://github.com/tustvold)) +- Decouple physical optimizer from SessionConfig \(\#3887\) [\#4749](https://github.com/apache/datafusion/pull/4749) ([tustvold](https://github.com/tustvold)) +- Don't share ConfigOptions \(\#3886\) [\#4712](https://github.com/apache/datafusion/pull/4712) ([tustvold](https://github.com/tustvold)) +- Push SessionState into FileFormat \(\#4349\) [\#4699](https://github.com/apache/datafusion/pull/4699) ([tustvold](https://github.com/tustvold)) +- Make SessionContext members private [\#4698](https://github.com/apache/datafusion/pull/4698) ([tustvold](https://github.com/tustvold)) +- Make OptimizerConfig a trait \(\#4631\) \(\#4638\) [\#4645](https://github.com/apache/datafusion/pull/4645) ([tustvold](https://github.com/tustvold)) +- DataFrame owned SessionState [\#4633](https://github.com/apache/datafusion/pull/4633) ([tustvold](https://github.com/tustvold)) +- Make LogicalPlanBuilder consuming \(\#4622\) [\#4632](https://github.com/apache/datafusion/pull/4632) [[sql](https://github.com/apache/datafusion/labels/sql)] ([tustvold](https://github.com/tustvold)) +- Make DataFrame API consuming \(\#4621\) [\#4624](https://github.com/apache/datafusion/pull/4624) ([tustvold](https://github.com/tustvold)) +- Make execute_stream functions sync [\#4608](https://github.com/apache/datafusion/pull/4608) ([tustvold](https://github.com/tustvold)) +- Remove ObjectStore from FileStream \(\#4533\) [\#4601](https://github.com/apache/datafusion/pull/4601) ([tustvold](https://github.com/tustvold)) +- Remove `AggregateState` wrapper [\#4582](https://github.com/apache/datafusion/pull/4582) ([alamb](https://github.com/alamb)) +- Fix querying and defining table / view names with period [\#4530](https://github.com/apache/datafusion/pull/4530) [[sql](https://github.com/apache/datafusion/labels/sql)] ([alamb](https://github.com/alamb)) +- refactor code about `subquery_alias` and `expr-alias`. [\#4451](https://github.com/apache/datafusion/pull/4451) [[sql](https://github.com/apache/datafusion/labels/sql)] ([jackwener](https://github.com/jackwener)) **Implemented enhancements:** -- Move the ExtractEquijoinPredicate behind the SubqueryFilterToJoin [\#4759](https://github.com/apache/arrow-datafusion/issues/4759) -- Remove the config `datafusion.execution.coalesce_target_batch_size` [\#4756](https://github.com/apache/arrow-datafusion/issues/4756) -- SimplifyExpressions will fail when rebuild equijoin with alias [\#4754](https://github.com/apache/arrow-datafusion/issues/4754) -- Provide a constructor for the ConfigOptions with HashMap\ [\#4752](https://github.com/apache/arrow-datafusion/issues/4752) -- Non-deprecated support for planning SQL without DDL [\#4720](https://github.com/apache/arrow-datafusion/issues/4720) -- Add regression tests for planning TPC-DS queries [\#4718](https://github.com/apache/arrow-datafusion/issues/4718) -- Move the extracting join keys logic to optimizer [\#4710](https://github.com/apache/arrow-datafusion/issues/4710) -- Support compression in `IPCWriter` [\#4708](https://github.com/apache/arrow-datafusion/issues/4708) -- Support prepared statement parameter type inference [\#4700](https://github.com/apache/arrow-datafusion/issues/4700) -- PruningPredicate Use Physical not Logical Predicate [\#4695](https://github.com/apache/arrow-datafusion/issues/4695) -- Support for executing infinite files [\#4692](https://github.com/apache/arrow-datafusion/issues/4692) -- Add a sort rule to remove unnecessary SortExecs from physical plan [\#4686](https://github.com/apache/arrow-datafusion/issues/4686) -- Install `protoc` automatically when building `datafusion/proto` crate [\#4684](https://github.com/apache/arrow-datafusion/issues/4684) -- Make DfSchema wrap SchemaRef [\#4680](https://github.com/apache/arrow-datafusion/issues/4680) -- Reorder the physical plan optimizer rules [\#4678](https://github.com/apache/arrow-datafusion/issues/4678) -- Inconsistent behavior with PostgreSQL to decide Window Expressions ordering [\#4641](https://github.com/apache/arrow-datafusion/issues/4641) -- Returns error too late when parsing invalid file compression type. [\#4636](https://github.com/apache/arrow-datafusion/issues/4636) -- Make OptimizerConfig a Trait [\#4631](https://github.com/apache/arrow-datafusion/issues/4631) -- Move Optimize onto DataFrame [\#4626](https://github.com/apache/arrow-datafusion/issues/4626) -- Make LogicalPlanBuilder Consuming [\#4622](https://github.com/apache/arrow-datafusion/issues/4622) -- Make DataFrame Consuming [\#4621](https://github.com/apache/arrow-datafusion/issues/4621) -- rules don't need to recursion inside themselves [\#4613](https://github.com/apache/arrow-datafusion/issues/4613) -- \[window function\] support min max with self define sliding window. [\#4603](https://github.com/apache/arrow-datafusion/issues/4603) -- Add `try_optimize` for all_rules [\#4598](https://github.com/apache/arrow-datafusion/issues/4598) -- Refine the physical plan serialization and deserialization [\#4597](https://github.com/apache/arrow-datafusion/issues/4597) -- Normalize datafusion configuration names [\#4595](https://github.com/apache/arrow-datafusion/issues/4595) -- Add need_data_exchange in the ExecutionPlan to indicate whether a physical operator needs data exchange [\#4585](https://github.com/apache/arrow-datafusion/issues/4585) -- Bump Datafusion sql-parser dependency to 0.28 [\#4573](https://github.com/apache/arrow-datafusion/issues/4573) -- tpch test exist duplicated [\#4563](https://github.com/apache/arrow-datafusion/issues/4563) -- user-defined aggregate function as window function [\#4552](https://github.com/apache/arrow-datafusion/issues/4552) -- Convert a Prepare Logical Plan into a Logical Plan with all parameters replaced with values [\#4550](https://github.com/apache/arrow-datafusion/issues/4550) -- FileStream requires fake ObjectStore when ParquetFileReaderFactory is used [\#4533](https://github.com/apache/arrow-datafusion/issues/4533) -- Avoid reading the entire file in ChunkedStore [\#4524](https://github.com/apache/arrow-datafusion/issues/4524) -- Enrich filter statistics predictions with estimated column boundaries [\#4518](https://github.com/apache/arrow-datafusion/issues/4518) -- Show window frame info in physical plan [\#4509](https://github.com/apache/arrow-datafusion/issues/4509) -- Add sqllogictest auto labeler [\#4507](https://github.com/apache/arrow-datafusion/issues/4507) -- Optimize `is_distinct_from` / `is_not_distinct_from` [\#4482](https://github.com/apache/arrow-datafusion/issues/4482) -- Add window func related logic plan to proto ability. [\#4480](https://github.com/apache/arrow-datafusion/issues/4480) -- Make window function related struct public. [\#4479](https://github.com/apache/arrow-datafusion/issues/4479) -- Improve partition file explain plan display to show groupings [\#4466](https://github.com/apache/arrow-datafusion/issues/4466) -- Add support for non-column key for equijoin when eliminating cross join to inner join [\#4442](https://github.com/apache/arrow-datafusion/issues/4442) -- Remove the schema checking from `CrossJoinExec::try_new` [\#4431](https://github.com/apache/arrow-datafusion/issues/4431) -- Initial support for prepared statement [\#4426](https://github.com/apache/arrow-datafusion/issues/4426) -- Add support for NTILE built-in Window Function [\#4403](https://github.com/apache/arrow-datafusion/issues/4403) -- Add Support for MIN, MAX Aggregate Functions when run with custom window frames [\#4402](https://github.com/apache/arrow-datafusion/issues/4402) -- Support `INSERT INTO` statement [\#4397](https://github.com/apache/arrow-datafusion/issues/4397) -- Enhancement: split the SQL `planner` into smaller modules [\#4392](https://github.com/apache/arrow-datafusion/issues/4392) -- Proposal: Improve the join keys of logical plan [\#4389](https://github.com/apache/arrow-datafusion/issues/4389) -- Add `MergeSubqueryAlias` rule [\#4383](https://github.com/apache/arrow-datafusion/issues/4383) -- Optimizer rule support `subqueryAlias` [\#4381](https://github.com/apache/arrow-datafusion/issues/4381) -- Rewrite simple regex expressions [\#4370](https://github.com/apache/arrow-datafusion/issues/4370) -- Revisit get_statistics_with_limit\(\) method in datasource mod [\#4323](https://github.com/apache/arrow-datafusion/issues/4323) -- Support for type coercion for a \(Timestamp, Utf8\) pair [\#4311](https://github.com/apache/arrow-datafusion/issues/4311) -- replace the operation about decimal to the arrow-rs kernel [\#4289](https://github.com/apache/arrow-datafusion/issues/4289) -- change` date_part` return types to f64 [\#3997](https://github.com/apache/arrow-datafusion/issues/3997) -- Better api for setting `ConfigOptions` from SessionContext [\#3908](https://github.com/apache/arrow-datafusion/issues/3908) -- Make `ConfigOptions` easier to work with [\#3886](https://github.com/apache/arrow-datafusion/issues/3886) -- An asynchronous version of `CatalogList`/`CatalogProvider`/`SchemaProvider` [\#3777](https://github.com/apache/arrow-datafusion/issues/3777) -- Allow configs to be set with string values [\#3500](https://github.com/apache/arrow-datafusion/issues/3500) -- support scientific notation for SQL literals [\#3448](https://github.com/apache/arrow-datafusion/issues/3448) -- Adopt physical plan serde from arrow-ballista [\#3257](https://github.com/apache/arrow-datafusion/issues/3257) -- Improve codebase readability and error messages by and consistently handle downcasting [\#3152](https://github.com/apache/arrow-datafusion/issues/3152) -- Re-enable where_clauses_object_safety [\#3081](https://github.com/apache/arrow-datafusion/issues/3081) -- optimize/simplify the literal data type and remove unnecessary cast、try_cast [\#3031](https://github.com/apache/arrow-datafusion/issues/3031) -- Move `datafusion-substrait` crate into `arrow-datafusion` repo [\#2646](https://github.com/apache/arrow-datafusion/issues/2646) -- \[enhancement\] rules don't need to recursion inside themselves [\#2620](https://github.com/apache/arrow-datafusion/issues/2620) -- Add support for `GROUPING SETS` syntax in SQL planner [\#2469](https://github.com/apache/arrow-datafusion/issues/2469) -- Optimize EXISTS subquery expressions by rewriting as semi-join [\#2351](https://github.com/apache/arrow-datafusion/issues/2351) -- Add Delta Lake TableProvider [\#525](https://github.com/apache/arrow-datafusion/issues/525) -- Support window functions with window frame [\#361](https://github.com/apache/arrow-datafusion/issues/361) +- Move the ExtractEquijoinPredicate behind the SubqueryFilterToJoin [\#4759](https://github.com/apache/datafusion/issues/4759) +- Remove the config `datafusion.execution.coalesce_target_batch_size` [\#4756](https://github.com/apache/datafusion/issues/4756) +- SimplifyExpressions will fail when rebuild equijoin with alias [\#4754](https://github.com/apache/datafusion/issues/4754) +- Provide a constructor for the ConfigOptions with HashMap\ [\#4752](https://github.com/apache/datafusion/issues/4752) +- Non-deprecated support for planning SQL without DDL [\#4720](https://github.com/apache/datafusion/issues/4720) +- Add regression tests for planning TPC-DS queries [\#4718](https://github.com/apache/datafusion/issues/4718) +- Move the extracting join keys logic to optimizer [\#4710](https://github.com/apache/datafusion/issues/4710) +- Support compression in `IPCWriter` [\#4708](https://github.com/apache/datafusion/issues/4708) +- Support prepared statement parameter type inference [\#4700](https://github.com/apache/datafusion/issues/4700) +- PruningPredicate Use Physical not Logical Predicate [\#4695](https://github.com/apache/datafusion/issues/4695) +- Support for executing infinite files [\#4692](https://github.com/apache/datafusion/issues/4692) +- Add a sort rule to remove unnecessary SortExecs from physical plan [\#4686](https://github.com/apache/datafusion/issues/4686) +- Install `protoc` automatically when building `datafusion/proto` crate [\#4684](https://github.com/apache/datafusion/issues/4684) +- Make DfSchema wrap SchemaRef [\#4680](https://github.com/apache/datafusion/issues/4680) +- Reorder the physical plan optimizer rules [\#4678](https://github.com/apache/datafusion/issues/4678) +- Inconsistent behavior with PostgreSQL to decide Window Expressions ordering [\#4641](https://github.com/apache/datafusion/issues/4641) +- Returns error too late when parsing invalid file compression type. [\#4636](https://github.com/apache/datafusion/issues/4636) +- Make OptimizerConfig a Trait [\#4631](https://github.com/apache/datafusion/issues/4631) +- Move Optimize onto DataFrame [\#4626](https://github.com/apache/datafusion/issues/4626) +- Make LogicalPlanBuilder Consuming [\#4622](https://github.com/apache/datafusion/issues/4622) +- Make DataFrame Consuming [\#4621](https://github.com/apache/datafusion/issues/4621) +- rules don't need to recursion inside themselves [\#4613](https://github.com/apache/datafusion/issues/4613) +- \[window function\] support min max with self define sliding window. [\#4603](https://github.com/apache/datafusion/issues/4603) +- Add `try_optimize` for all_rules [\#4598](https://github.com/apache/datafusion/issues/4598) +- Refine the physical plan serialization and deserialization [\#4597](https://github.com/apache/datafusion/issues/4597) +- Normalize datafusion configuration names [\#4595](https://github.com/apache/datafusion/issues/4595) +- Add need_data_exchange in the ExecutionPlan to indicate whether a physical operator needs data exchange [\#4585](https://github.com/apache/datafusion/issues/4585) +- Bump Datafusion sql-parser dependency to 0.28 [\#4573](https://github.com/apache/datafusion/issues/4573) +- tpch test exist duplicated [\#4563](https://github.com/apache/datafusion/issues/4563) +- user-defined aggregate function as window function [\#4552](https://github.com/apache/datafusion/issues/4552) +- Convert a Prepare Logical Plan into a Logical Plan with all parameters replaced with values [\#4550](https://github.com/apache/datafusion/issues/4550) +- FileStream requires fake ObjectStore when ParquetFileReaderFactory is used [\#4533](https://github.com/apache/datafusion/issues/4533) +- Avoid reading the entire file in ChunkedStore [\#4524](https://github.com/apache/datafusion/issues/4524) +- Enrich filter statistics predictions with estimated column boundaries [\#4518](https://github.com/apache/datafusion/issues/4518) +- Show window frame info in physical plan [\#4509](https://github.com/apache/datafusion/issues/4509) +- Add sqllogictest auto labeler [\#4507](https://github.com/apache/datafusion/issues/4507) +- Optimize `is_distinct_from` / `is_not_distinct_from` [\#4482](https://github.com/apache/datafusion/issues/4482) +- Add window func related logic plan to proto ability. [\#4480](https://github.com/apache/datafusion/issues/4480) +- Make window function related struct public. [\#4479](https://github.com/apache/datafusion/issues/4479) +- Improve partition file explain plan display to show groupings [\#4466](https://github.com/apache/datafusion/issues/4466) +- Add support for non-column key for equijoin when eliminating cross join to inner join [\#4442](https://github.com/apache/datafusion/issues/4442) +- Remove the schema checking from `CrossJoinExec::try_new` [\#4431](https://github.com/apache/datafusion/issues/4431) +- Initial support for prepared statement [\#4426](https://github.com/apache/datafusion/issues/4426) +- Add support for NTILE built-in Window Function [\#4403](https://github.com/apache/datafusion/issues/4403) +- Add Support for MIN, MAX Aggregate Functions when run with custom window frames [\#4402](https://github.com/apache/datafusion/issues/4402) +- Support `INSERT INTO` statement [\#4397](https://github.com/apache/datafusion/issues/4397) +- Enhancement: split the SQL `planner` into smaller modules [\#4392](https://github.com/apache/datafusion/issues/4392) +- Proposal: Improve the join keys of logical plan [\#4389](https://github.com/apache/datafusion/issues/4389) +- Add `MergeSubqueryAlias` rule [\#4383](https://github.com/apache/datafusion/issues/4383) +- Optimizer rule support `subqueryAlias` [\#4381](https://github.com/apache/datafusion/issues/4381) +- Rewrite simple regex expressions [\#4370](https://github.com/apache/datafusion/issues/4370) +- Revisit get_statistics_with_limit\(\) method in datasource mod [\#4323](https://github.com/apache/datafusion/issues/4323) +- Support for type coercion for a \(Timestamp, Utf8\) pair [\#4311](https://github.com/apache/datafusion/issues/4311) +- replace the operation about decimal to the arrow-rs kernel [\#4289](https://github.com/apache/datafusion/issues/4289) +- change` date_part` return types to f64 [\#3997](https://github.com/apache/datafusion/issues/3997) +- Better api for setting `ConfigOptions` from SessionContext [\#3908](https://github.com/apache/datafusion/issues/3908) +- Make `ConfigOptions` easier to work with [\#3886](https://github.com/apache/datafusion/issues/3886) +- An asynchronous version of `CatalogList`/`CatalogProvider`/`SchemaProvider` [\#3777](https://github.com/apache/datafusion/issues/3777) +- Allow configs to be set with string values [\#3500](https://github.com/apache/datafusion/issues/3500) +- support scientific notation for SQL literals [\#3448](https://github.com/apache/datafusion/issues/3448) +- Adopt physical plan serde from arrow-ballista [\#3257](https://github.com/apache/datafusion/issues/3257) +- Improve codebase readability and error messages by and consistently handle downcasting [\#3152](https://github.com/apache/datafusion/issues/3152) +- Re-enable where_clauses_object_safety [\#3081](https://github.com/apache/datafusion/issues/3081) +- optimize/simplify the literal data type and remove unnecessary cast、try_cast [\#3031](https://github.com/apache/datafusion/issues/3031) +- Move `datafusion-substrait` crate into `arrow-datafusion` repo [\#2646](https://github.com/apache/datafusion/issues/2646) +- \[enhancement\] rules don't need to recursion inside themselves [\#2620](https://github.com/apache/datafusion/issues/2620) +- Add support for `GROUPING SETS` syntax in SQL planner [\#2469](https://github.com/apache/datafusion/issues/2469) +- Optimize EXISTS subquery expressions by rewriting as semi-join [\#2351](https://github.com/apache/datafusion/issues/2351) +- Add Delta Lake TableProvider [\#525](https://github.com/apache/datafusion/issues/525) +- Support window functions with window frame [\#361](https://github.com/apache/datafusion/issues/361) **Fixed bugs:** -- PushdownFilter rule exist bug will cause filter change wrong [\#4822](https://github.com/apache/arrow-datafusion/issues/4822) -- Unlimited memory consumption in `RepartitionExec` [\#4816](https://github.com/apache/arrow-datafusion/issues/4816) -- Physical Optimizer Config Mutation Doesn't Take Effect [\#4806](https://github.com/apache/arrow-datafusion/issues/4806) -- cargo test failed `error: linking with `cc` failed: exit status: 1` [\#4790](https://github.com/apache/arrow-datafusion/issues/4790) -- Parquet files generated by DataFusion cannot be read by Apache Spark [\#4782](https://github.com/apache/arrow-datafusion/issues/4782) -- datafusion-physical-expr doesn't compile when blake3/traits-preview is enabled [\#4781](https://github.com/apache/arrow-datafusion/issues/4781) -- Multiple ways to express `like` / `ilike` / `not like` / `not ilike` [\#4765](https://github.com/apache/arrow-datafusion/issues/4765) -- SessionState::optimize and SessionState::create_physical_plan Don't Update Query Start Time [\#4747](https://github.com/apache/arrow-datafusion/issues/4747) -- Page Filtering Incorrectly Handles Pages with Different Row Counts [\#4744](https://github.com/apache/arrow-datafusion/issues/4744) -- cargo test failing on master due to tpcds_logical_q41 stackoverflow [\#4728](https://github.com/apache/arrow-datafusion/issues/4728) -- PruningPredicate Different Evaluation Context from Query [\#4693](https://github.com/apache/arrow-datafusion/issues/4693) -- Skipping optimizer rule due to create_name not supporting wildcard [\#4681](https://github.com/apache/arrow-datafusion/issues/4681) -- Create physical plan bug: got Arrow schema with 1 and DataFusion schema with 0 [\#4677](https://github.com/apache/arrow-datafusion/issues/4677) -- Timestamp \<-\> Date32 compare doesn't work [\#4672](https://github.com/apache/arrow-datafusion/issues/4672) -- Wrongly use the function `clamp` [\#4654](https://github.com/apache/arrow-datafusion/issues/4654) -- Fix the clippy errors [\#4653](https://github.com/apache/arrow-datafusion/issues/4653) -- Filter Null Keys Update Not Taking Effect [\#4638](https://github.com/apache/arrow-datafusion/issues/4638) -- Should not generate duplicate sort keys from Window expr's partition by keys [\#4635](https://github.com/apache/arrow-datafusion/issues/4635) -- `common_sub_expression_eliminate` exists bug [\#4575](https://github.com/apache/arrow-datafusion/issues/4575) -- Confusing "Bare" in doesn't exist messages [\#4571](https://github.com/apache/arrow-datafusion/issues/4571) -- `having` shouldn't include alias in projection [\#4556](https://github.com/apache/arrow-datafusion/issues/4556) -- wrong comment about having [\#4554](https://github.com/apache/arrow-datafusion/issues/4554) -- `drop view t1, t2, ...` and `drop table t1, t2, ...` silently ignores arguments past the first [\#4531](https://github.com/apache/arrow-datafusion/issues/4531) -- Extract from timestamp doesn't support nanosecond [\#4528](https://github.com/apache/arrow-datafusion/issues/4528) -- `prepare_select_exprs` don't need `outer_query_schema` [\#4526](https://github.com/apache/arrow-datafusion/issues/4526) -- Table names with periods are not handled correctly [\#4513](https://github.com/apache/arrow-datafusion/issues/4513) -- `Push_down_projection` push redundant column. [\#4486](https://github.com/apache/arrow-datafusion/issues/4486) -- Planner don't generate `SubqueryAlias` [\#4483](https://github.com/apache/arrow-datafusion/issues/4483) -- Planner generate replicated `Projection` | `SubqueryAlias` [\#4481](https://github.com/apache/arrow-datafusion/issues/4481) -- `apply_table_alias` will ignore alias_name when columns is empty. [\#4454](https://github.com/apache/arrow-datafusion/issues/4454) -- Fix output_ordering of WindowAggExec [\#4438](https://github.com/apache/arrow-datafusion/issues/4438) -- Incorrect error for plus/minus operations over timestamps and dates [\#4420](https://github.com/apache/arrow-datafusion/issues/4420) -- Optimization rule `filter_push_down` causes `FieldNotFound` error [\#4401](https://github.com/apache/arrow-datafusion/issues/4401) -- Should not convert a normal non-inner join to Cross Join when there are non-equal Join conditions [\#4363](https://github.com/apache/arrow-datafusion/issues/4363) -- MemoryConsumer::try_grow Underflow [\#4328](https://github.com/apache/arrow-datafusion/issues/4328) -- Potential MemoryManager Deadlock [\#4325](https://github.com/apache/arrow-datafusion/issues/4325) -- `create external table` should fail to parse if syntax is incorrect [\#4262](https://github.com/apache/arrow-datafusion/issues/4262) -- Nullif func states support for Boolean type, but fails if this is attempted [\#4205](https://github.com/apache/arrow-datafusion/issues/4205) -- `ProjectionPushDown` rule don't consider the alias in projection. [\#4174](https://github.com/apache/arrow-datafusion/issues/4174) -- Stack overflow planning complex query [\#4065](https://github.com/apache/arrow-datafusion/issues/4065) -- Can not use `extract ` on the value of `now()` [\#3980](https://github.com/apache/arrow-datafusion/issues/3980) -- Bug with intervals and logical and/or [\#3944](https://github.com/apache/arrow-datafusion/issues/3944) -- CoalesceBatches doesn't provide correct elapsed_compute info in metrics [\#3894](https://github.com/apache/arrow-datafusion/issues/3894) -- Paniced at to_timestamp_micros function when the timestamp is too large. [\#3832](https://github.com/apache/arrow-datafusion/issues/3832) -- Optimizer casts decimals to different values on different platforms [\#3791](https://github.com/apache/arrow-datafusion/issues/3791) -- CSV inference reads in the whole file to memory, regardless of row limit [\#3658](https://github.com/apache/arrow-datafusion/issues/3658) -- after type coercion `CommonSubexprEliminate` will produce invalid projection [\#3635](https://github.com/apache/arrow-datafusion/issues/3635) -- panic at `attempt to multiply with overflow` when doing math on Decimal128 columns [\#3437](https://github.com/apache/arrow-datafusion/issues/3437) -- Precedence bug with date comparison to date plus interval [\#3408](https://github.com/apache/arrow-datafusion/issues/3408) -- Median aggregation using DataFrame panics: "AggregateState is not a scalar aggregate" [\#3105](https://github.com/apache/arrow-datafusion/issues/3105) -- `date_part` does't work for `now()` [\#3096](https://github.com/apache/arrow-datafusion/issues/3096) -- hash_join panics when join keys have different data types [\#2877](https://github.com/apache/arrow-datafusion/issues/2877) -- Memory manager triggers unnecessary spills [\#2829](https://github.com/apache/arrow-datafusion/issues/2829) -- Address performance/execution plan of TPCH query 9 [\#77](https://github.com/apache/arrow-datafusion/issues/77) +- PushdownFilter rule exist bug will cause filter change wrong [\#4822](https://github.com/apache/datafusion/issues/4822) +- Unlimited memory consumption in `RepartitionExec` [\#4816](https://github.com/apache/datafusion/issues/4816) +- Physical Optimizer Config Mutation Doesn't Take Effect [\#4806](https://github.com/apache/datafusion/issues/4806) +- cargo test failed `error: linking with `cc` failed: exit status: 1` [\#4790](https://github.com/apache/datafusion/issues/4790) +- Parquet files generated by DataFusion cannot be read by Apache Spark [\#4782](https://github.com/apache/datafusion/issues/4782) +- datafusion-physical-expr doesn't compile when blake3/traits-preview is enabled [\#4781](https://github.com/apache/datafusion/issues/4781) +- Multiple ways to express `like` / `ilike` / `not like` / `not ilike` [\#4765](https://github.com/apache/datafusion/issues/4765) +- SessionState::optimize and SessionState::create_physical_plan Don't Update Query Start Time [\#4747](https://github.com/apache/datafusion/issues/4747) +- Page Filtering Incorrectly Handles Pages with Different Row Counts [\#4744](https://github.com/apache/datafusion/issues/4744) +- cargo test failing on master due to tpcds_logical_q41 stackoverflow [\#4728](https://github.com/apache/datafusion/issues/4728) +- PruningPredicate Different Evaluation Context from Query [\#4693](https://github.com/apache/datafusion/issues/4693) +- Skipping optimizer rule due to create_name not supporting wildcard [\#4681](https://github.com/apache/datafusion/issues/4681) +- Create physical plan bug: got Arrow schema with 1 and DataFusion schema with 0 [\#4677](https://github.com/apache/datafusion/issues/4677) +- Timestamp \<-\> Date32 compare doesn't work [\#4672](https://github.com/apache/datafusion/issues/4672) +- Wrongly use the function `clamp` [\#4654](https://github.com/apache/datafusion/issues/4654) +- Fix the clippy errors [\#4653](https://github.com/apache/datafusion/issues/4653) +- Filter Null Keys Update Not Taking Effect [\#4638](https://github.com/apache/datafusion/issues/4638) +- Should not generate duplicate sort keys from Window expr's partition by keys [\#4635](https://github.com/apache/datafusion/issues/4635) +- `common_sub_expression_eliminate` exists bug [\#4575](https://github.com/apache/datafusion/issues/4575) +- Confusing "Bare" in doesn't exist messages [\#4571](https://github.com/apache/datafusion/issues/4571) +- `having` shouldn't include alias in projection [\#4556](https://github.com/apache/datafusion/issues/4556) +- wrong comment about having [\#4554](https://github.com/apache/datafusion/issues/4554) +- `drop view t1, t2, ...` and `drop table t1, t2, ...` silently ignores arguments past the first [\#4531](https://github.com/apache/datafusion/issues/4531) +- Extract from timestamp doesn't support nanosecond [\#4528](https://github.com/apache/datafusion/issues/4528) +- `prepare_select_exprs` don't need `outer_query_schema` [\#4526](https://github.com/apache/datafusion/issues/4526) +- Table names with periods are not handled correctly [\#4513](https://github.com/apache/datafusion/issues/4513) +- `Push_down_projection` push redundant column. [\#4486](https://github.com/apache/datafusion/issues/4486) +- Planner don't generate `SubqueryAlias` [\#4483](https://github.com/apache/datafusion/issues/4483) +- Planner generate replicated `Projection` | `SubqueryAlias` [\#4481](https://github.com/apache/datafusion/issues/4481) +- `apply_table_alias` will ignore alias_name when columns is empty. [\#4454](https://github.com/apache/datafusion/issues/4454) +- Fix output_ordering of WindowAggExec [\#4438](https://github.com/apache/datafusion/issues/4438) +- Incorrect error for plus/minus operations over timestamps and dates [\#4420](https://github.com/apache/datafusion/issues/4420) +- Optimization rule `filter_push_down` causes `FieldNotFound` error [\#4401](https://github.com/apache/datafusion/issues/4401) +- Should not convert a normal non-inner join to Cross Join when there are non-equal Join conditions [\#4363](https://github.com/apache/datafusion/issues/4363) +- MemoryConsumer::try_grow Underflow [\#4328](https://github.com/apache/datafusion/issues/4328) +- Potential MemoryManager Deadlock [\#4325](https://github.com/apache/datafusion/issues/4325) +- `create external table` should fail to parse if syntax is incorrect [\#4262](https://github.com/apache/datafusion/issues/4262) +- Nullif func states support for Boolean type, but fails if this is attempted [\#4205](https://github.com/apache/datafusion/issues/4205) +- `ProjectionPushDown` rule don't consider the alias in projection. [\#4174](https://github.com/apache/datafusion/issues/4174) +- Stack overflow planning complex query [\#4065](https://github.com/apache/datafusion/issues/4065) +- Can not use `extract ` on the value of `now()` [\#3980](https://github.com/apache/datafusion/issues/3980) +- Bug with intervals and logical and/or [\#3944](https://github.com/apache/datafusion/issues/3944) +- CoalesceBatches doesn't provide correct elapsed_compute info in metrics [\#3894](https://github.com/apache/datafusion/issues/3894) +- Paniced at to_timestamp_micros function when the timestamp is too large. [\#3832](https://github.com/apache/datafusion/issues/3832) +- Optimizer casts decimals to different values on different platforms [\#3791](https://github.com/apache/datafusion/issues/3791) +- CSV inference reads in the whole file to memory, regardless of row limit [\#3658](https://github.com/apache/datafusion/issues/3658) +- after type coercion `CommonSubexprEliminate` will produce invalid projection [\#3635](https://github.com/apache/datafusion/issues/3635) +- panic at `attempt to multiply with overflow` when doing math on Decimal128 columns [\#3437](https://github.com/apache/datafusion/issues/3437) +- Precedence bug with date comparison to date plus interval [\#3408](https://github.com/apache/datafusion/issues/3408) +- Median aggregation using DataFrame panics: "AggregateState is not a scalar aggregate" [\#3105](https://github.com/apache/datafusion/issues/3105) +- `date_part` does't work for `now()` [\#3096](https://github.com/apache/datafusion/issues/3096) +- hash_join panics when join keys have different data types [\#2877](https://github.com/apache/datafusion/issues/2877) +- Memory manager triggers unnecessary spills [\#2829](https://github.com/apache/datafusion/issues/2829) +- Address performance/execution plan of TPCH query 9 [\#77](https://github.com/apache/datafusion/issues/77) **Documentation updates:** -- Add a new open source project that is use DataFusion as query engine [\#4768](https://github.com/apache/arrow-datafusion/pull/4768) ([francis-du](https://github.com/francis-du)) +- Add a new open source project that is use DataFusion as query engine [\#4768](https://github.com/apache/datafusion/pull/4768) ([francis-du](https://github.com/francis-du)) **Closed issues:** -- move the tests in planner [\#4798](https://github.com/apache/arrow-datafusion/issues/4798) -- Make it easier to update sqltestlogic test expected output \("test script completion mode"\) [\#4570](https://github.com/apache/arrow-datafusion/issues/4570) -- Make ConfigOption names into an Enum [\#4517](https://github.com/apache/arrow-datafusion/issues/4517) -- Implement null / empty string handling for sqllogictest [\#4500](https://github.com/apache/arrow-datafusion/issues/4500) -- Write a blog about parquet predicate pushdown [\#3464](https://github.com/apache/arrow-datafusion/issues/3464) -- Ensure column names are equivalent with or without optimization [\#1123](https://github.com/apache/arrow-datafusion/issues/1123) +- move the tests in planner [\#4798](https://github.com/apache/datafusion/issues/4798) +- Make it easier to update sqltestlogic test expected output \("test script completion mode"\) [\#4570](https://github.com/apache/datafusion/issues/4570) +- Make ConfigOption names into an Enum [\#4517](https://github.com/apache/datafusion/issues/4517) +- Implement null / empty string handling for sqllogictest [\#4500](https://github.com/apache/datafusion/issues/4500) +- Write a blog about parquet predicate pushdown [\#3464](https://github.com/apache/datafusion/issues/3464) +- Ensure column names are equivalent with or without optimization [\#1123](https://github.com/apache/datafusion/issues/1123) **Merged pull requests:** -- Bump tokio from 1.23.0 to 1.23.1 in /datafusion-cli [\#4835](https://github.com/apache/arrow-datafusion/pull/4835) ([dependabot[bot]](https://github.com/apps/dependabot)) -- Fix a few links in `roadmap.md` [\#4833](https://github.com/apache/arrow-datafusion/pull/4833) ([romanz](https://github.com/romanz)) -- DataFusion 16.0.0 release prep: Update version + add changelog [\#4831](https://github.com/apache/arrow-datafusion/pull/4831) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) -- feat: use arrow row format for hash-group-by [\#4830](https://github.com/apache/arrow-datafusion/pull/4830) ([crepererum](https://github.com/crepererum)) -- refactor: split relation of planner into one part. [\#4829](https://github.com/apache/arrow-datafusion/pull/4829) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([jackwener](https://github.com/jackwener)) -- bugfix: remove cnf_rewrite in push_down_filter [\#4825](https://github.com/apache/arrow-datafusion/pull/4825) ([jackwener](https://github.com/jackwener)) -- minor: add some comments to row group pruning tests [\#4823](https://github.com/apache/arrow-datafusion/pull/4823) ([alamb](https://github.com/alamb)) -- Handle trailing tbl column in TPCH benchmarks [\#4821](https://github.com/apache/arrow-datafusion/pull/4821) ([tustvold](https://github.com/tustvold)) -- fix: account for memory in `RepartitionExec` [\#4820](https://github.com/apache/arrow-datafusion/pull/4820) ([crepererum](https://github.com/crepererum)) -- Fix clippy [\#4817](https://github.com/apache/arrow-datafusion/pull/4817) ([tustvold](https://github.com/tustvold)) -- Add test cases: row group filter with missing statistics for decimal data type [\#4810](https://github.com/apache/arrow-datafusion/pull/4810) ([liukun4515](https://github.com/liukun4515)) -- Move default catalog and schema onto ConfigOptions \(\#3887\) [\#4805](https://github.com/apache/arrow-datafusion/pull/4805) ([tustvold](https://github.com/tustvold)) -- remove duplicated test [\#4800](https://github.com/apache/arrow-datafusion/pull/4800) ([jackwener](https://github.com/jackwener)) -- Update sqlparser requirement from 0.29 to 0.30 [\#4799](https://github.com/apache/arrow-datafusion/pull/4799) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([dependabot[bot]](https://github.com/apps/dependabot)) -- rewrite the function `ensure_any_column_reference_is_unambiguous` [\#4797](https://github.com/apache/arrow-datafusion/pull/4797) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([HaoYang670](https://github.com/HaoYang670)) -- Uncomment nanoseconds tests after sql parser upgrade [\#4789](https://github.com/apache/arrow-datafusion/pull/4789) ([comphead](https://github.com/comphead)) -- fix: ListingSchemaProvider directory paths \(related: \#4204\) [\#4788](https://github.com/apache/arrow-datafusion/pull/4788) ([cfraz89](https://github.com/cfraz89)) -- Minimize stack space required to plan deeply nested binary expressions [\#4787](https://github.com/apache/arrow-datafusion/pull/4787) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) -- Minor: Refactor some sql planning code into functions [\#4785](https://github.com/apache/arrow-datafusion/pull/4785) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) -- Make datafusion-physical-expr compatible with blake3/traits-preview feature. [\#4784](https://github.com/apache/arrow-datafusion/pull/4784) ([BoredPerson](https://github.com/BoredPerson)) -- refactor: split expression pf planner into one part. [\#4783](https://github.com/apache/arrow-datafusion/pull/4783) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([jackwener](https://github.com/jackwener)) -- Fix Stack overflow in sql planning in debug builds [\#4779](https://github.com/apache/arrow-datafusion/pull/4779) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) -- Pipeline-friendly Bounded Memory Window Executor [\#4777](https://github.com/apache/arrow-datafusion/pull/4777) ([mustafasrepo](https://github.com/mustafasrepo)) -- Implement OptimizerConfig for SessionState [\#4775](https://github.com/apache/arrow-datafusion/pull/4775) ([tustvold](https://github.com/tustvold)) -- refactor: extract `parse_value` [\#4774](https://github.com/apache/arrow-datafusion/pull/4774) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([jackwener](https://github.com/jackwener)) -- Structify ConfigOptions \(\#4517\) [\#4771](https://github.com/apache/arrow-datafusion/pull/4771) ([tustvold](https://github.com/tustvold)) -- Update sqlparser to `29.0.0` [\#4770](https://github.com/apache/arrow-datafusion/pull/4770) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) -- Refactor extract_join_keys and move the ExtractEquijoinPredicate rule [\#4760](https://github.com/apache/arrow-datafusion/pull/4760) ([ygf11](https://github.com/ygf11)) -- Remove the config datafusion.execution.coalesce_target_batch_size and use datafusion.execution.batch_size instead [\#4757](https://github.com/apache/arrow-datafusion/pull/4757) ([yahoNanJing](https://github.com/yahoNanJing)) -- Add alias check for equijoin in from_plan [\#4755](https://github.com/apache/arrow-datafusion/pull/4755) ([ygf11](https://github.com/ygf11)) -- Take the top level `schema` into account when creating `UnionExec` [\#4753](https://github.com/apache/arrow-datafusion/pull/4753) ([HaoYang670](https://github.com/HaoYang670)) -- Set query_execution_start_time on snapshot from SessionContext \(\#4747\) [\#4750](https://github.com/apache/arrow-datafusion/pull/4750) ([tustvold](https://github.com/tustvold)) -- minor: Improve docstrings [\#4748](https://github.com/apache/arrow-datafusion/pull/4748) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) -- Append generated column to the schema instead of prepending for WindowAggExec [\#4746](https://github.com/apache/arrow-datafusion/pull/4746) ([mustafasrepo](https://github.com/mustafasrepo)) -- Minor: comments about coercion in physical planner [\#4745](https://github.com/apache/arrow-datafusion/pull/4745) ([alamb](https://github.com/alamb)) -- Simplify parquet filter predicate test, fix Page Filtering Incorrectly Handles Pages with Different Row Counts [\#4743](https://github.com/apache/arrow-datafusion/pull/4743) ([tustvold](https://github.com/tustvold)) -- support byte array for decimal in parquet page and row group filters [\#4742](https://github.com/apache/arrow-datafusion/pull/4742) ([liukun4515](https://github.com/liukun4515)) -- revert some code for \#4726 / remove unnecessary coercion in physical plans [\#4741](https://github.com/apache/arrow-datafusion/pull/4741) ([liukun4515](https://github.com/liukun4515)) -- Cleanup InformationSchema plumbing [\#4740](https://github.com/apache/arrow-datafusion/pull/4740) ([tustvold](https://github.com/tustvold)) -- Minor: use a common method to check the validate of equijoin predicate [\#4739](https://github.com/apache/arrow-datafusion/pull/4739) ([ygf11](https://github.com/ygf11)) -- minor: Support more data type for `null_counts` in the `PruningStatistics` [\#4738](https://github.com/apache/arrow-datafusion/pull/4738) ([liukun4515](https://github.com/liukun4515)) -- Extended datatypes & signatures support for `NULLIF` function [\#4737](https://github.com/apache/arrow-datafusion/pull/4737) ([korowa](https://github.com/korowa)) -- minor: improve debug logging for pruning predicates [\#4736](https://github.com/apache/arrow-datafusion/pull/4736) ([alamb](https://github.com/alamb)) -- refactor: parallelize `parquet_exec` test case `single_file` [\#4735](https://github.com/apache/arrow-datafusion/pull/4735) ([waynexia](https://github.com/waynexia)) -- fix: add one more projection to recover output schema [\#4733](https://github.com/apache/arrow-datafusion/pull/4733) ([waynexia](https://github.com/waynexia)) -- remove `SubqueryFilterToJoin` [\#4731](https://github.com/apache/arrow-datafusion/pull/4731) ([jackwener](https://github.com/jackwener)) -- Create writer with `arrow::ipc::IPCWriteOptions` [\#4730](https://github.com/apache/arrow-datafusion/pull/4730) ([askoa](https://github.com/askoa)) -- Implement cast between Date and Timestamp [\#4726](https://github.com/apache/arrow-datafusion/pull/4726) ([comphead](https://github.com/comphead)) -- Dynamic information_schema configuration and port more tests [\#4722](https://github.com/apache/arrow-datafusion/pull/4722) ([alamb](https://github.com/alamb)) -- Add TPC-DS query planning regression tests [\#4719](https://github.com/apache/arrow-datafusion/pull/4719) ([andygrove](https://github.com/andygrove)) -- Minor: refactor streaming CSV inference code [\#4717](https://github.com/apache/arrow-datafusion/pull/4717) ([alamb](https://github.com/alamb)) -- Reorder the physical plan optimizer rules, extract `GlobalSortSelection`, make `Repartition` optional [\#4714](https://github.com/apache/arrow-datafusion/pull/4714) ([yahoNanJing](https://github.com/yahoNanJing)) -- Eagerly construct PagePruningPredicate [\#4713](https://github.com/apache/arrow-datafusion/pull/4713) ([tustvold](https://github.com/tustvold)) -- Move the extract_join_keys to optimizer [\#4711](https://github.com/apache/arrow-datafusion/pull/4711) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([ygf11](https://github.com/ygf11)) -- Avoid to bypass `try_new/new()` to build plan directly and cleanup filter [\#4702](https://github.com/apache/arrow-datafusion/pull/4702) ([jackwener](https://github.com/jackwener)) -- MINOR: Remove where_clause_object_safety clippy ignore \(\#3081\) [\#4696](https://github.com/apache/arrow-datafusion/pull/4696) ([tustvold](https://github.com/tustvold)) -- Support for executing infinite files and boundedness-aware join reordering rule [\#4694](https://github.com/apache/arrow-datafusion/pull/4694) ([metesynnada](https://github.com/metesynnada)) -- Unnecessary SortExec removal rule from Physical Plan [\#4691](https://github.com/apache/arrow-datafusion/pull/4691) ([mustafasrepo](https://github.com/mustafasrepo)) -- minor: rename the github actions [\#4689](https://github.com/apache/arrow-datafusion/pull/4689) ([jackwener](https://github.com/jackwener)) -- FOLLOWUP: remove more recursion in optimizer rules. [\#4687](https://github.com/apache/arrow-datafusion/pull/4687) ([jackwener](https://github.com/jackwener)) -- Add line that prevents display_name from being called on Wildcard [\#4682](https://github.com/apache/arrow-datafusion/pull/4682) ([andre-cc-natzka](https://github.com/andre-cc-natzka)) -- Deprecate SessionContext::create_logical_plan \(\#4617\) [\#4679](https://github.com/apache/arrow-datafusion/pull/4679) ([tustvold](https://github.com/tustvold)) -- Support `NTILE` window function [\#4676](https://github.com/apache/arrow-datafusion/pull/4676) ([berkaycpp](https://github.com/berkaycpp)) -- Support min max aggregates in window functions with sliding windows [\#4675](https://github.com/apache/arrow-datafusion/pull/4675) ([berkaycpp](https://github.com/berkaycpp)) -- Refactor Expr::AggregateFunction and Expr::WindowFunction to use struct [\#4671](https://github.com/apache/arrow-datafusion/pull/4671) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([Jefffrey](https://github.com/Jefffrey)) -- Support type coercion for equijoin [\#4666](https://github.com/apache/arrow-datafusion/pull/4666) ([ygf11](https://github.com/ygf11)) -- Add `--complete` auto completion mode to `sqllogictests` [\#4665](https://github.com/apache/arrow-datafusion/pull/4665) ([alamb](https://github.com/alamb)) -- Fix CoalesceBatches elasped_compute metric [\#4664](https://github.com/apache/arrow-datafusion/pull/4664) ([Jefffrey](https://github.com/Jefffrey)) -- Refactor Expr::Sort to use struct [\#4663](https://github.com/apache/arrow-datafusion/pull/4663) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([Jefffrey](https://github.com/Jefffrey)) -- More descriptive error for plus/minus between timestamps/dates [\#4662](https://github.com/apache/arrow-datafusion/pull/4662) ([Jefffrey](https://github.com/Jefffrey)) -- Stream CSV file during schema inference [\#4661](https://github.com/apache/arrow-datafusion/pull/4661) ([Jefffrey](https://github.com/Jefffrey)) -- Refine the logical and physical plan serialization and deserialization [\#4659](https://github.com/apache/arrow-datafusion/pull/4659) ([yahoNanJing](https://github.com/yahoNanJing)) -- Use thiserror in sqllogictest erorr [\#4657](https://github.com/apache/arrow-datafusion/pull/4657) ([xudong963](https://github.com/xudong963)) -- fix `cargo clippy` warning [\#4652](https://github.com/apache/arrow-datafusion/pull/4652) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([jackwener](https://github.com/jackwener)) -- Improve group by hash performance: avoid group-key/-state clones for hash-groupby [\#4651](https://github.com/apache/arrow-datafusion/pull/4651) ([crepererum](https://github.com/crepererum)) -- remove recursion in optimizer rules [\#4650](https://github.com/apache/arrow-datafusion/pull/4650) ([jackwener](https://github.com/jackwener)) -- replace the arithmetic op for decimal array op decimal array using arrow kernel [\#4648](https://github.com/apache/arrow-datafusion/pull/4648) ([liukun4515](https://github.com/liukun4515)) -- simplify regex expressions [\#4646](https://github.com/apache/arrow-datafusion/pull/4646) ([crepererum](https://github.com/crepererum)) -- Avoid generate duplicate sort Keys from Window Expressions, fix bug when decide Window Expressions ordering [\#4643](https://github.com/apache/arrow-datafusion/pull/4643) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([mingmwang](https://github.com/mingmwang)) -- Refactor Expr::TryCast to use a struct [\#4642](https://github.com/apache/arrow-datafusion/pull/4642) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([ygf11](https://github.com/ygf11)) -- add `ILIKE` support [\#4639](https://github.com/apache/arrow-datafusion/pull/4639) ([crepererum](https://github.com/crepererum)) -- Detect invalid \(unsupported\) compression types when parsing [\#4637](https://github.com/apache/arrow-datafusion/pull/4637) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([HaoYang670](https://github.com/HaoYang670)) -- unwrap_cast_in_comparison.rs: support unint \<-\> decimal [\#4634](https://github.com/apache/arrow-datafusion/pull/4634) ([liukun4515](https://github.com/liukun4515)) -- MINOR: Fix incorrect config definitions [\#4623](https://github.com/apache/arrow-datafusion/pull/4623) ([andygrove](https://github.com/andygrove)) -- FOLLOWUP: remove `optimize()` [\#4619](https://github.com/apache/arrow-datafusion/pull/4619) ([jackwener](https://github.com/jackwener)) -- Optimizer: avoid every rule must recursive children in optimizer [\#4618](https://github.com/apache/arrow-datafusion/pull/4618) ([jackwener](https://github.com/jackwener)) -- fix: run logical optimizer rules for `TableScan` expressions [\#4614](https://github.com/apache/arrow-datafusion/pull/4614) ([crepererum](https://github.com/crepererum)) -- refactor: relax the signature of register\_\* in SessionContext [\#4612](https://github.com/apache/arrow-datafusion/pull/4612) ([waynexia](https://github.com/waynexia)) -- Remove the function `consume_token` from the parser [\#4609](https://github.com/apache/arrow-datafusion/pull/4609) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([HaoYang670](https://github.com/HaoYang670)) -- Make SchemaProvider::table async [\#4607](https://github.com/apache/arrow-datafusion/pull/4607) ([tustvold](https://github.com/tustvold)) -- Lazy system tables [\#4606](https://github.com/apache/arrow-datafusion/pull/4606) ([tustvold](https://github.com/tustvold)) -- Refactor: Change equijoin keys from column to expression in logical join [\#4602](https://github.com/apache/arrow-datafusion/pull/4602) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([ygf11](https://github.com/ygf11)) -- refactor: extract `assert_optimized_plan_eq` from UT. [\#4600](https://github.com/apache/arrow-datafusion/pull/4600) ([jackwener](https://github.com/jackwener)) -- add `try_optimize()` for all rules. [\#4599](https://github.com/apache/arrow-datafusion/pull/4599) ([jackwener](https://github.com/jackwener)) -- Normalize datafusion configuration names [\#4596](https://github.com/apache/arrow-datafusion/pull/4596) ([yahoNanJing](https://github.com/yahoNanJing)) -- Fix the bugs in parsing `COMPRESSION TYPE` [\#4590](https://github.com/apache/arrow-datafusion/pull/4590) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([HaoYang670](https://github.com/HaoYang670)) -- Minor: Remove datafusion-core dev dependency from datafusion-sql [\#4589](https://github.com/apache/arrow-datafusion/pull/4589) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) -- Improve error handling for array downcasting [\#4588](https://github.com/apache/arrow-datafusion/pull/4588) ([retikulum](https://github.com/retikulum)) -- Update to arrow v29 [\#4587](https://github.com/apache/arrow-datafusion/pull/4587) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([tustvold](https://github.com/tustvold)) -- Add need_data_exchange in the ExecutionPlan to indicate whether a physical operator needs data exchange [\#4586](https://github.com/apache/arrow-datafusion/pull/4586) ([yahoNanJing](https://github.com/yahoNanJing)) -- Move subset of select tests to sqllogic [\#4583](https://github.com/apache/arrow-datafusion/pull/4583) ([ajayaa](https://github.com/ajayaa)) -- bugfix: just allow having use expr in `groupby` or `aggr` [\#4579](https://github.com/apache/arrow-datafusion/pull/4579) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([jackwener](https://github.com/jackwener)) -- Output sqllogictests with arrow display rather than CSV writer [\#4578](https://github.com/apache/arrow-datafusion/pull/4578) ([alamb](https://github.com/alamb)) -- Minor: Add test case for reduce cross join [\#4577](https://github.com/apache/arrow-datafusion/pull/4577) ([ygf11](https://github.com/ygf11)) -- refactor: remove redundant `outer_query_schema` [\#4576](https://github.com/apache/arrow-datafusion/pull/4576) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([jackwener](https://github.com/jackwener)) -- Preserve the TryCast expression in columnize_expr [\#4574](https://github.com/apache/arrow-datafusion/pull/4574) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([byteink](https://github.com/byteink)) -- Remove Confusing "Bare" in does not exist messages [\#4572](https://github.com/apache/arrow-datafusion/pull/4572) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) -- Minor: Add tests for date interval predicate handling [\#4569](https://github.com/apache/arrow-datafusion/pull/4569) ([alamb](https://github.com/alamb)) -- Update sqlparser requirement from 0.27 to 0.28 [\#4568](https://github.com/apache/arrow-datafusion/pull/4568) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) -- Avoid materializing local varaibles when creating `sortMergeJoinExec` [\#4566](https://github.com/apache/arrow-datafusion/pull/4566) ([HaoYang670](https://github.com/HaoYang670)) -- Minor: Fix logical conflict [\#4565](https://github.com/apache/arrow-datafusion/pull/4565) ([alamb](https://github.com/alamb)) -- feat: support nested loop join with the initial version [\#4562](https://github.com/apache/arrow-datafusion/pull/4562) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([liukun4515](https://github.com/liukun4515)) -- feat: prepare logical plan to logical plan without params/placeholders [\#4561](https://github.com/apache/arrow-datafusion/pull/4561) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([NGA-TRAN](https://github.com/NGA-TRAN)) -- Write faster kernel for is_distinct [\#4560](https://github.com/apache/arrow-datafusion/pull/4560) ([comphead](https://github.com/comphead)) -- refactor code about `query -> plan` for subqueries [\#4559](https://github.com/apache/arrow-datafusion/pull/4559) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([jackwener](https://github.com/jackwener)) -- fix: remove wrong comment about `having` [\#4555](https://github.com/apache/arrow-datafusion/pull/4555) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([jackwener](https://github.com/jackwener)) -- feat: user-defined aggregate function\(UDAF\) as window function [\#4553](https://github.com/apache/arrow-datafusion/pull/4553) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([MichaelScofield](https://github.com/MichaelScofield)) -- Fix date_part/extract functions to support now\(\) [\#4548](https://github.com/apache/arrow-datafusion/pull/4548) ([comphead](https://github.com/comphead)) -- bump sqllogictest to 0.9.0 [\#4547](https://github.com/apache/arrow-datafusion/pull/4547) ([xxchan](https://github.com/xxchan)) -- minor: Remove more clones from the planner [\#4546](https://github.com/apache/arrow-datafusion/pull/4546) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) -- Add tests for coercion of timestamps to strings [\#4545](https://github.com/apache/arrow-datafusion/pull/4545) ([alamb](https://github.com/alamb)) -- MINOR: move sqllogictest to dev-dependencies [\#4544](https://github.com/apache/arrow-datafusion/pull/4544) ([alamb](https://github.com/alamb)) -- MINOR: add some comments about intended use of ChunkedStore [\#4541](https://github.com/apache/arrow-datafusion/pull/4541) ([alamb](https://github.com/alamb)) -- fix: remove TODOs linked to arrow\#3147 [\#4540](https://github.com/apache/arrow-datafusion/pull/4540) ([crepererum](https://github.com/crepererum)) -- refactor: remove redundant `build_join_schema()` [\#4538](https://github.com/apache/arrow-datafusion/pull/4538) ([jackwener](https://github.com/jackwener)) -- Move some create/drop tests to `ddl.slt` [\#4535](https://github.com/apache/arrow-datafusion/pull/4535) ([alamb](https://github.com/alamb)) -- Minor: Avoid cloning as many `Ident` during SQL planning [\#4534](https://github.com/apache/arrow-datafusion/pull/4534) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) -- shouldn't add `outer_query_schema` in `sql_select_to_rex` [\#4527](https://github.com/apache/arrow-datafusion/pull/4527) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([jackwener](https://github.com/jackwener)) -- Avoid reading the entire file in ChunkedStore [\#4525](https://github.com/apache/arrow-datafusion/pull/4525) ([metesynnada](https://github.com/metesynnada)) -- Simplify MemoryManager [\#4522](https://github.com/apache/arrow-datafusion/pull/4522) ([tustvold](https://github.com/tustvold)) -- Fix limited statistic collection accross files with no stats [\#4521](https://github.com/apache/arrow-datafusion/pull/4521) ([isidentical](https://github.com/isidentical)) -- refactor: make Ctes a struct to also store data types provided by prepare stmt [\#4520](https://github.com/apache/arrow-datafusion/pull/4520) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([NGA-TRAN](https://github.com/NGA-TRAN)) -- Enrich filter statistics with known column boundaries [\#4519](https://github.com/apache/arrow-datafusion/pull/4519) ([isidentical](https://github.com/isidentical)) -- Remove Option from window frame [\#4516](https://github.com/apache/arrow-datafusion/pull/4516) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([mustafasrepo](https://github.com/mustafasrepo)) -- Make nightly clippy happy [\#4515](https://github.com/apache/arrow-datafusion/pull/4515) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([xudong963](https://github.com/xudong963)) -- Remove interior mutability of `MemTable` [\#4514](https://github.com/apache/arrow-datafusion/pull/4514) ([xudong963](https://github.com/xudong963)) -- Make window function related struct public for ballista. [\#4511](https://github.com/apache/arrow-datafusion/pull/4511) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- minor: rename `push_down_limit` [\#4510](https://github.com/apache/arrow-datafusion/pull/4510) ([jackwener](https://github.com/jackwener)) -- Add get_window_frame in window_expr, show frame info in window_agg_exec [\#4508](https://github.com/apache/arrow-datafusion/pull/4508) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- Add sqllogictest auto labeler [\#4506](https://github.com/apache/arrow-datafusion/pull/4506) ([mvanschellebeeck](https://github.com/mvanschellebeeck)) -- Add some more aggregate sqllogictests and remove rust tests [\#4505](https://github.com/apache/arrow-datafusion/pull/4505) ([mvanschellebeeck](https://github.com/mvanschellebeeck)) -- Remove sqllogictests CI run [\#4504](https://github.com/apache/arrow-datafusion/pull/4504) ([mvanschellebeeck](https://github.com/mvanschellebeeck)) -- Refactor code for `insert` in sqllogictest [\#4503](https://github.com/apache/arrow-datafusion/pull/4503) ([xudong963](https://github.com/xudong963)) -- Add empty string normalization to sqllogictests [\#4501](https://github.com/apache/arrow-datafusion/pull/4501) ([alamb](https://github.com/alamb)) -- sqllogictest: A logging and command line filter [\#4497](https://github.com/apache/arrow-datafusion/pull/4497) ([alamb](https://github.com/alamb)) -- Support `insert into` statement in sqllogictest [\#4496](https://github.com/apache/arrow-datafusion/pull/4496) ([xudong963](https://github.com/xudong963)) -- Improve error handling for array downcasting [\#4493](https://github.com/apache/arrow-datafusion/pull/4493) ([retikulum](https://github.com/retikulum)) -- Unify most of `SessionConfig` settings into `ConfigOptions` [\#4492](https://github.com/apache/arrow-datafusion/pull/4492) ([alamb](https://github.com/alamb)) -- feat: support prepare statement [\#4490](https://github.com/apache/arrow-datafusion/pull/4490) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([NGA-TRAN](https://github.com/NGA-TRAN)) -- Minor: Update docstrings and comments to aggregate code [\#4489](https://github.com/apache/arrow-datafusion/pull/4489) ([alamb](https://github.com/alamb)) -- Fix panic in median "AggregateState is not a scalar aggregate" [\#4488](https://github.com/apache/arrow-datafusion/pull/4488) ([alamb](https://github.com/alamb)) -- fix `push_down_projection` push redundant columns. [\#4487](https://github.com/apache/arrow-datafusion/pull/4487) ([jackwener](https://github.com/jackwener)) -- Add window func related logic plan to proto ability. [\#4485](https://github.com/apache/arrow-datafusion/pull/4485) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- fix `Planner` don't generate `SubqueryAlias` and generate duplicated `SubqueryAlias` [\#4484](https://github.com/apache/arrow-datafusion/pull/4484) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([jackwener](https://github.com/jackwener)) -- Improve parquet partition_file output display [\#4467](https://github.com/apache/arrow-datafusion/pull/4467) ([alamb](https://github.com/alamb)) -- minor: remove redundant `unwrap()` [\#4463](https://github.com/apache/arrow-datafusion/pull/4463) ([jackwener](https://github.com/jackwener)) -- Fix `Cte` in `from` clause with duplicated cte name [\#4461](https://github.com/apache/arrow-datafusion/pull/4461) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([xudong963](https://github.com/xudong963)) -- Replace `&Option` with `Option<&T>` part 2 [\#4458](https://github.com/apache/arrow-datafusion/pull/4458) ([askoa](https://github.com/askoa)) -- Fix output_partitioning\(\), output_ordering\(\), equivalence_properties\(\) in WindowAggExec, shift the Column indexes [\#4455](https://github.com/apache/arrow-datafusion/pull/4455) ([mingmwang](https://github.com/mingmwang)) -- fix `push_down_filter` for pushing filters on grouping columns rather than aggregate columns [\#4447](https://github.com/apache/arrow-datafusion/pull/4447) ([jackwener](https://github.com/jackwener)) -- Add support for non-column key for equijoin when eliminating cross join to inner join [\#4443](https://github.com/apache/arrow-datafusion/pull/4443) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([ygf11](https://github.com/ygf11)) -- Remove the schema checking when creating `CrossJoinExec` [\#4432](https://github.com/apache/arrow-datafusion/pull/4432) ([HaoYang670](https://github.com/HaoYang670)) -- `date_part` support fractions of second [\#4385](https://github.com/apache/arrow-datafusion/pull/4385) ([comphead](https://github.com/comphead)) -- Minor: use upstream RowSelection code from arrow `intersect_row_selection` [\#4340](https://github.com/apache/arrow-datafusion/pull/4340) ([alamb](https://github.com/alamb)) -- Support type coercion for timestamp and utf8 [\#4312](https://github.com/apache/arrow-datafusion/pull/4312) ([andre-cc-natzka](https://github.com/andre-cc-natzka)) +- Bump tokio from 1.23.0 to 1.23.1 in /datafusion-cli [\#4835](https://github.com/apache/datafusion/pull/4835) ([dependabot[bot]](https://github.com/apps/dependabot)) +- Fix a few links in `roadmap.md` [\#4833](https://github.com/apache/datafusion/pull/4833) ([romanz](https://github.com/romanz)) +- DataFusion 16.0.0 release prep: Update version + add changelog [\#4831](https://github.com/apache/datafusion/pull/4831) [[sql](https://github.com/apache/datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) +- feat: use arrow row format for hash-group-by [\#4830](https://github.com/apache/datafusion/pull/4830) ([crepererum](https://github.com/crepererum)) +- refactor: split relation of planner into one part. [\#4829](https://github.com/apache/datafusion/pull/4829) [[sql](https://github.com/apache/datafusion/labels/sql)] ([jackwener](https://github.com/jackwener)) +- bugfix: remove cnf_rewrite in push_down_filter [\#4825](https://github.com/apache/datafusion/pull/4825) ([jackwener](https://github.com/jackwener)) +- minor: add some comments to row group pruning tests [\#4823](https://github.com/apache/datafusion/pull/4823) ([alamb](https://github.com/alamb)) +- Handle trailing tbl column in TPCH benchmarks [\#4821](https://github.com/apache/datafusion/pull/4821) ([tustvold](https://github.com/tustvold)) +- fix: account for memory in `RepartitionExec` [\#4820](https://github.com/apache/datafusion/pull/4820) ([crepererum](https://github.com/crepererum)) +- Fix clippy [\#4817](https://github.com/apache/datafusion/pull/4817) ([tustvold](https://github.com/tustvold)) +- Add test cases: row group filter with missing statistics for decimal data type [\#4810](https://github.com/apache/datafusion/pull/4810) ([liukun4515](https://github.com/liukun4515)) +- Move default catalog and schema onto ConfigOptions \(\#3887\) [\#4805](https://github.com/apache/datafusion/pull/4805) ([tustvold](https://github.com/tustvold)) +- remove duplicated test [\#4800](https://github.com/apache/datafusion/pull/4800) ([jackwener](https://github.com/jackwener)) +- Update sqlparser requirement from 0.29 to 0.30 [\#4799](https://github.com/apache/datafusion/pull/4799) [[sql](https://github.com/apache/datafusion/labels/sql)] ([dependabot[bot]](https://github.com/apps/dependabot)) +- rewrite the function `ensure_any_column_reference_is_unambiguous` [\#4797](https://github.com/apache/datafusion/pull/4797) [[sql](https://github.com/apache/datafusion/labels/sql)] ([HaoYang670](https://github.com/HaoYang670)) +- Uncomment nanoseconds tests after sql parser upgrade [\#4789](https://github.com/apache/datafusion/pull/4789) ([comphead](https://github.com/comphead)) +- fix: ListingSchemaProvider directory paths \(related: \#4204\) [\#4788](https://github.com/apache/datafusion/pull/4788) ([cfraz89](https://github.com/cfraz89)) +- Minimize stack space required to plan deeply nested binary expressions [\#4787](https://github.com/apache/datafusion/pull/4787) [[sql](https://github.com/apache/datafusion/labels/sql)] ([alamb](https://github.com/alamb)) +- Minor: Refactor some sql planning code into functions [\#4785](https://github.com/apache/datafusion/pull/4785) [[sql](https://github.com/apache/datafusion/labels/sql)] ([alamb](https://github.com/alamb)) +- Make datafusion-physical-expr compatible with blake3/traits-preview feature. [\#4784](https://github.com/apache/datafusion/pull/4784) ([BoredPerson](https://github.com/BoredPerson)) +- refactor: split expression pf planner into one part. [\#4783](https://github.com/apache/datafusion/pull/4783) [[sql](https://github.com/apache/datafusion/labels/sql)] ([jackwener](https://github.com/jackwener)) +- Fix Stack overflow in sql planning in debug builds [\#4779](https://github.com/apache/datafusion/pull/4779) [[sql](https://github.com/apache/datafusion/labels/sql)] ([alamb](https://github.com/alamb)) +- Pipeline-friendly Bounded Memory Window Executor [\#4777](https://github.com/apache/datafusion/pull/4777) ([mustafasrepo](https://github.com/mustafasrepo)) +- Implement OptimizerConfig for SessionState [\#4775](https://github.com/apache/datafusion/pull/4775) ([tustvold](https://github.com/tustvold)) +- refactor: extract `parse_value` [\#4774](https://github.com/apache/datafusion/pull/4774) [[sql](https://github.com/apache/datafusion/labels/sql)] ([jackwener](https://github.com/jackwener)) +- Structify ConfigOptions \(\#4517\) [\#4771](https://github.com/apache/datafusion/pull/4771) ([tustvold](https://github.com/tustvold)) +- Update sqlparser to `29.0.0` [\#4770](https://github.com/apache/datafusion/pull/4770) [[sql](https://github.com/apache/datafusion/labels/sql)] ([alamb](https://github.com/alamb)) +- Refactor extract_join_keys and move the ExtractEquijoinPredicate rule [\#4760](https://github.com/apache/datafusion/pull/4760) ([ygf11](https://github.com/ygf11)) +- Remove the config datafusion.execution.coalesce_target_batch_size and use datafusion.execution.batch_size instead [\#4757](https://github.com/apache/datafusion/pull/4757) ([yahoNanJing](https://github.com/yahoNanJing)) +- Add alias check for equijoin in from_plan [\#4755](https://github.com/apache/datafusion/pull/4755) ([ygf11](https://github.com/ygf11)) +- Take the top level `schema` into account when creating `UnionExec` [\#4753](https://github.com/apache/datafusion/pull/4753) ([HaoYang670](https://github.com/HaoYang670)) +- Set query_execution_start_time on snapshot from SessionContext \(\#4747\) [\#4750](https://github.com/apache/datafusion/pull/4750) ([tustvold](https://github.com/tustvold)) +- minor: Improve docstrings [\#4748](https://github.com/apache/datafusion/pull/4748) [[sql](https://github.com/apache/datafusion/labels/sql)] ([alamb](https://github.com/alamb)) +- Append generated column to the schema instead of prepending for WindowAggExec [\#4746](https://github.com/apache/datafusion/pull/4746) ([mustafasrepo](https://github.com/mustafasrepo)) +- Minor: comments about coercion in physical planner [\#4745](https://github.com/apache/datafusion/pull/4745) ([alamb](https://github.com/alamb)) +- Simplify parquet filter predicate test, fix Page Filtering Incorrectly Handles Pages with Different Row Counts [\#4743](https://github.com/apache/datafusion/pull/4743) ([tustvold](https://github.com/tustvold)) +- support byte array for decimal in parquet page and row group filters [\#4742](https://github.com/apache/datafusion/pull/4742) ([liukun4515](https://github.com/liukun4515)) +- revert some code for \#4726 / remove unnecessary coercion in physical plans [\#4741](https://github.com/apache/datafusion/pull/4741) ([liukun4515](https://github.com/liukun4515)) +- Cleanup InformationSchema plumbing [\#4740](https://github.com/apache/datafusion/pull/4740) ([tustvold](https://github.com/tustvold)) +- Minor: use a common method to check the validate of equijoin predicate [\#4739](https://github.com/apache/datafusion/pull/4739) ([ygf11](https://github.com/ygf11)) +- minor: Support more data type for `null_counts` in the `PruningStatistics` [\#4738](https://github.com/apache/datafusion/pull/4738) ([liukun4515](https://github.com/liukun4515)) +- Extended datatypes & signatures support for `NULLIF` function [\#4737](https://github.com/apache/datafusion/pull/4737) ([korowa](https://github.com/korowa)) +- minor: improve debug logging for pruning predicates [\#4736](https://github.com/apache/datafusion/pull/4736) ([alamb](https://github.com/alamb)) +- refactor: parallelize `parquet_exec` test case `single_file` [\#4735](https://github.com/apache/datafusion/pull/4735) ([waynexia](https://github.com/waynexia)) +- fix: add one more projection to recover output schema [\#4733](https://github.com/apache/datafusion/pull/4733) ([waynexia](https://github.com/waynexia)) +- remove `SubqueryFilterToJoin` [\#4731](https://github.com/apache/datafusion/pull/4731) ([jackwener](https://github.com/jackwener)) +- Create writer with `arrow::ipc::IPCWriteOptions` [\#4730](https://github.com/apache/datafusion/pull/4730) ([askoa](https://github.com/askoa)) +- Implement cast between Date and Timestamp [\#4726](https://github.com/apache/datafusion/pull/4726) ([comphead](https://github.com/comphead)) +- Dynamic information_schema configuration and port more tests [\#4722](https://github.com/apache/datafusion/pull/4722) ([alamb](https://github.com/alamb)) +- Add TPC-DS query planning regression tests [\#4719](https://github.com/apache/datafusion/pull/4719) ([andygrove](https://github.com/andygrove)) +- Minor: refactor streaming CSV inference code [\#4717](https://github.com/apache/datafusion/pull/4717) ([alamb](https://github.com/alamb)) +- Reorder the physical plan optimizer rules, extract `GlobalSortSelection`, make `Repartition` optional [\#4714](https://github.com/apache/datafusion/pull/4714) ([yahoNanJing](https://github.com/yahoNanJing)) +- Eagerly construct PagePruningPredicate [\#4713](https://github.com/apache/datafusion/pull/4713) ([tustvold](https://github.com/tustvold)) +- Move the extract_join_keys to optimizer [\#4711](https://github.com/apache/datafusion/pull/4711) [[sql](https://github.com/apache/datafusion/labels/sql)] ([ygf11](https://github.com/ygf11)) +- Avoid to bypass `try_new/new()` to build plan directly and cleanup filter [\#4702](https://github.com/apache/datafusion/pull/4702) ([jackwener](https://github.com/jackwener)) +- MINOR: Remove where_clause_object_safety clippy ignore \(\#3081\) [\#4696](https://github.com/apache/datafusion/pull/4696) ([tustvold](https://github.com/tustvold)) +- Support for executing infinite files and boundedness-aware join reordering rule [\#4694](https://github.com/apache/datafusion/pull/4694) ([metesynnada](https://github.com/metesynnada)) +- Unnecessary SortExec removal rule from Physical Plan [\#4691](https://github.com/apache/datafusion/pull/4691) ([mustafasrepo](https://github.com/mustafasrepo)) +- minor: rename the github actions [\#4689](https://github.com/apache/datafusion/pull/4689) ([jackwener](https://github.com/jackwener)) +- FOLLOWUP: remove more recursion in optimizer rules. [\#4687](https://github.com/apache/datafusion/pull/4687) ([jackwener](https://github.com/jackwener)) +- Add line that prevents display_name from being called on Wildcard [\#4682](https://github.com/apache/datafusion/pull/4682) ([andre-cc-natzka](https://github.com/andre-cc-natzka)) +- Deprecate SessionContext::create_logical_plan \(\#4617\) [\#4679](https://github.com/apache/datafusion/pull/4679) ([tustvold](https://github.com/tustvold)) +- Support `NTILE` window function [\#4676](https://github.com/apache/datafusion/pull/4676) ([berkaycpp](https://github.com/berkaycpp)) +- Support min max aggregates in window functions with sliding windows [\#4675](https://github.com/apache/datafusion/pull/4675) ([berkaycpp](https://github.com/berkaycpp)) +- Refactor Expr::AggregateFunction and Expr::WindowFunction to use struct [\#4671](https://github.com/apache/datafusion/pull/4671) [[sql](https://github.com/apache/datafusion/labels/sql)] ([Jefffrey](https://github.com/Jefffrey)) +- Support type coercion for equijoin [\#4666](https://github.com/apache/datafusion/pull/4666) ([ygf11](https://github.com/ygf11)) +- Add `--complete` auto completion mode to `sqllogictests` [\#4665](https://github.com/apache/datafusion/pull/4665) ([alamb](https://github.com/alamb)) +- Fix CoalesceBatches elasped_compute metric [\#4664](https://github.com/apache/datafusion/pull/4664) ([Jefffrey](https://github.com/Jefffrey)) +- Refactor Expr::Sort to use struct [\#4663](https://github.com/apache/datafusion/pull/4663) [[sql](https://github.com/apache/datafusion/labels/sql)] ([Jefffrey](https://github.com/Jefffrey)) +- More descriptive error for plus/minus between timestamps/dates [\#4662](https://github.com/apache/datafusion/pull/4662) ([Jefffrey](https://github.com/Jefffrey)) +- Stream CSV file during schema inference [\#4661](https://github.com/apache/datafusion/pull/4661) ([Jefffrey](https://github.com/Jefffrey)) +- Refine the logical and physical plan serialization and deserialization [\#4659](https://github.com/apache/datafusion/pull/4659) ([yahoNanJing](https://github.com/yahoNanJing)) +- Use thiserror in sqllogictest erorr [\#4657](https://github.com/apache/datafusion/pull/4657) ([xudong963](https://github.com/xudong963)) +- fix `cargo clippy` warning [\#4652](https://github.com/apache/datafusion/pull/4652) [[sql](https://github.com/apache/datafusion/labels/sql)] ([jackwener](https://github.com/jackwener)) +- Improve group by hash performance: avoid group-key/-state clones for hash-groupby [\#4651](https://github.com/apache/datafusion/pull/4651) ([crepererum](https://github.com/crepererum)) +- remove recursion in optimizer rules [\#4650](https://github.com/apache/datafusion/pull/4650) ([jackwener](https://github.com/jackwener)) +- replace the arithmetic op for decimal array op decimal array using arrow kernel [\#4648](https://github.com/apache/datafusion/pull/4648) ([liukun4515](https://github.com/liukun4515)) +- simplify regex expressions [\#4646](https://github.com/apache/datafusion/pull/4646) ([crepererum](https://github.com/crepererum)) +- Avoid generate duplicate sort Keys from Window Expressions, fix bug when decide Window Expressions ordering [\#4643](https://github.com/apache/datafusion/pull/4643) [[sql](https://github.com/apache/datafusion/labels/sql)] ([mingmwang](https://github.com/mingmwang)) +- Refactor Expr::TryCast to use a struct [\#4642](https://github.com/apache/datafusion/pull/4642) [[sql](https://github.com/apache/datafusion/labels/sql)] ([ygf11](https://github.com/ygf11)) +- add `ILIKE` support [\#4639](https://github.com/apache/datafusion/pull/4639) ([crepererum](https://github.com/crepererum)) +- Detect invalid \(unsupported\) compression types when parsing [\#4637](https://github.com/apache/datafusion/pull/4637) [[sql](https://github.com/apache/datafusion/labels/sql)] ([HaoYang670](https://github.com/HaoYang670)) +- unwrap_cast_in_comparison.rs: support unint \<-\> decimal [\#4634](https://github.com/apache/datafusion/pull/4634) ([liukun4515](https://github.com/liukun4515)) +- MINOR: Fix incorrect config definitions [\#4623](https://github.com/apache/datafusion/pull/4623) ([andygrove](https://github.com/andygrove)) +- FOLLOWUP: remove `optimize()` [\#4619](https://github.com/apache/datafusion/pull/4619) ([jackwener](https://github.com/jackwener)) +- Optimizer: avoid every rule must recursive children in optimizer [\#4618](https://github.com/apache/datafusion/pull/4618) ([jackwener](https://github.com/jackwener)) +- fix: run logical optimizer rules for `TableScan` expressions [\#4614](https://github.com/apache/datafusion/pull/4614) ([crepererum](https://github.com/crepererum)) +- refactor: relax the signature of register\_\* in SessionContext [\#4612](https://github.com/apache/datafusion/pull/4612) ([waynexia](https://github.com/waynexia)) +- Remove the function `consume_token` from the parser [\#4609](https://github.com/apache/datafusion/pull/4609) [[sql](https://github.com/apache/datafusion/labels/sql)] ([HaoYang670](https://github.com/HaoYang670)) +- Make SchemaProvider::table async [\#4607](https://github.com/apache/datafusion/pull/4607) ([tustvold](https://github.com/tustvold)) +- Lazy system tables [\#4606](https://github.com/apache/datafusion/pull/4606) ([tustvold](https://github.com/tustvold)) +- Refactor: Change equijoin keys from column to expression in logical join [\#4602](https://github.com/apache/datafusion/pull/4602) [[sql](https://github.com/apache/datafusion/labels/sql)] ([ygf11](https://github.com/ygf11)) +- refactor: extract `assert_optimized_plan_eq` from UT. [\#4600](https://github.com/apache/datafusion/pull/4600) ([jackwener](https://github.com/jackwener)) +- add `try_optimize()` for all rules. [\#4599](https://github.com/apache/datafusion/pull/4599) ([jackwener](https://github.com/jackwener)) +- Normalize datafusion configuration names [\#4596](https://github.com/apache/datafusion/pull/4596) ([yahoNanJing](https://github.com/yahoNanJing)) +- Fix the bugs in parsing `COMPRESSION TYPE` [\#4590](https://github.com/apache/datafusion/pull/4590) [[sql](https://github.com/apache/datafusion/labels/sql)] ([HaoYang670](https://github.com/HaoYang670)) +- Minor: Remove datafusion-core dev dependency from datafusion-sql [\#4589](https://github.com/apache/datafusion/pull/4589) [[sql](https://github.com/apache/datafusion/labels/sql)] ([alamb](https://github.com/alamb)) +- Improve error handling for array downcasting [\#4588](https://github.com/apache/datafusion/pull/4588) ([retikulum](https://github.com/retikulum)) +- Update to arrow v29 [\#4587](https://github.com/apache/datafusion/pull/4587) [[sql](https://github.com/apache/datafusion/labels/sql)] ([tustvold](https://github.com/tustvold)) +- Add need_data_exchange in the ExecutionPlan to indicate whether a physical operator needs data exchange [\#4586](https://github.com/apache/datafusion/pull/4586) ([yahoNanJing](https://github.com/yahoNanJing)) +- Move subset of select tests to sqllogic [\#4583](https://github.com/apache/datafusion/pull/4583) ([ajayaa](https://github.com/ajayaa)) +- bugfix: just allow having use expr in `groupby` or `aggr` [\#4579](https://github.com/apache/datafusion/pull/4579) [[sql](https://github.com/apache/datafusion/labels/sql)] ([jackwener](https://github.com/jackwener)) +- Output sqllogictests with arrow display rather than CSV writer [\#4578](https://github.com/apache/datafusion/pull/4578) ([alamb](https://github.com/alamb)) +- Minor: Add test case for reduce cross join [\#4577](https://github.com/apache/datafusion/pull/4577) ([ygf11](https://github.com/ygf11)) +- refactor: remove redundant `outer_query_schema` [\#4576](https://github.com/apache/datafusion/pull/4576) [[sql](https://github.com/apache/datafusion/labels/sql)] ([jackwener](https://github.com/jackwener)) +- Preserve the TryCast expression in columnize_expr [\#4574](https://github.com/apache/datafusion/pull/4574) [[sql](https://github.com/apache/datafusion/labels/sql)] ([byteink](https://github.com/byteink)) +- Remove Confusing "Bare" in does not exist messages [\#4572](https://github.com/apache/datafusion/pull/4572) [[sql](https://github.com/apache/datafusion/labels/sql)] ([alamb](https://github.com/alamb)) +- Minor: Add tests for date interval predicate handling [\#4569](https://github.com/apache/datafusion/pull/4569) ([alamb](https://github.com/alamb)) +- Update sqlparser requirement from 0.27 to 0.28 [\#4568](https://github.com/apache/datafusion/pull/4568) [[sql](https://github.com/apache/datafusion/labels/sql)] ([alamb](https://github.com/alamb)) +- Avoid materializing local varaibles when creating `sortMergeJoinExec` [\#4566](https://github.com/apache/datafusion/pull/4566) ([HaoYang670](https://github.com/HaoYang670)) +- Minor: Fix logical conflict [\#4565](https://github.com/apache/datafusion/pull/4565) ([alamb](https://github.com/alamb)) +- feat: support nested loop join with the initial version [\#4562](https://github.com/apache/datafusion/pull/4562) [[sql](https://github.com/apache/datafusion/labels/sql)] ([liukun4515](https://github.com/liukun4515)) +- feat: prepare logical plan to logical plan without params/placeholders [\#4561](https://github.com/apache/datafusion/pull/4561) [[sql](https://github.com/apache/datafusion/labels/sql)] ([NGA-TRAN](https://github.com/NGA-TRAN)) +- Write faster kernel for is_distinct [\#4560](https://github.com/apache/datafusion/pull/4560) ([comphead](https://github.com/comphead)) +- refactor code about `query -> plan` for subqueries [\#4559](https://github.com/apache/datafusion/pull/4559) [[sql](https://github.com/apache/datafusion/labels/sql)] ([jackwener](https://github.com/jackwener)) +- fix: remove wrong comment about `having` [\#4555](https://github.com/apache/datafusion/pull/4555) [[sql](https://github.com/apache/datafusion/labels/sql)] ([jackwener](https://github.com/jackwener)) +- feat: user-defined aggregate function\(UDAF\) as window function [\#4553](https://github.com/apache/datafusion/pull/4553) [[sql](https://github.com/apache/datafusion/labels/sql)] ([MichaelScofield](https://github.com/MichaelScofield)) +- Fix date_part/extract functions to support now\(\) [\#4548](https://github.com/apache/datafusion/pull/4548) ([comphead](https://github.com/comphead)) +- bump sqllogictest to 0.9.0 [\#4547](https://github.com/apache/datafusion/pull/4547) ([xxchan](https://github.com/xxchan)) +- minor: Remove more clones from the planner [\#4546](https://github.com/apache/datafusion/pull/4546) [[sql](https://github.com/apache/datafusion/labels/sql)] ([alamb](https://github.com/alamb)) +- Add tests for coercion of timestamps to strings [\#4545](https://github.com/apache/datafusion/pull/4545) ([alamb](https://github.com/alamb)) +- MINOR: move sqllogictest to dev-dependencies [\#4544](https://github.com/apache/datafusion/pull/4544) ([alamb](https://github.com/alamb)) +- MINOR: add some comments about intended use of ChunkedStore [\#4541](https://github.com/apache/datafusion/pull/4541) ([alamb](https://github.com/alamb)) +- fix: remove TODOs linked to arrow\#3147 [\#4540](https://github.com/apache/datafusion/pull/4540) ([crepererum](https://github.com/crepererum)) +- refactor: remove redundant `build_join_schema()` [\#4538](https://github.com/apache/datafusion/pull/4538) ([jackwener](https://github.com/jackwener)) +- Move some create/drop tests to `ddl.slt` [\#4535](https://github.com/apache/datafusion/pull/4535) ([alamb](https://github.com/alamb)) +- Minor: Avoid cloning as many `Ident` during SQL planning [\#4534](https://github.com/apache/datafusion/pull/4534) [[sql](https://github.com/apache/datafusion/labels/sql)] ([alamb](https://github.com/alamb)) +- shouldn't add `outer_query_schema` in `sql_select_to_rex` [\#4527](https://github.com/apache/datafusion/pull/4527) [[sql](https://github.com/apache/datafusion/labels/sql)] ([jackwener](https://github.com/jackwener)) +- Avoid reading the entire file in ChunkedStore [\#4525](https://github.com/apache/datafusion/pull/4525) ([metesynnada](https://github.com/metesynnada)) +- Simplify MemoryManager [\#4522](https://github.com/apache/datafusion/pull/4522) ([tustvold](https://github.com/tustvold)) +- Fix limited statistic collection accross files with no stats [\#4521](https://github.com/apache/datafusion/pull/4521) ([isidentical](https://github.com/isidentical)) +- refactor: make Ctes a struct to also store data types provided by prepare stmt [\#4520](https://github.com/apache/datafusion/pull/4520) [[sql](https://github.com/apache/datafusion/labels/sql)] ([NGA-TRAN](https://github.com/NGA-TRAN)) +- Enrich filter statistics with known column boundaries [\#4519](https://github.com/apache/datafusion/pull/4519) ([isidentical](https://github.com/isidentical)) +- Remove Option from window frame [\#4516](https://github.com/apache/datafusion/pull/4516) [[sql](https://github.com/apache/datafusion/labels/sql)] ([mustafasrepo](https://github.com/mustafasrepo)) +- Make nightly clippy happy [\#4515](https://github.com/apache/datafusion/pull/4515) [[sql](https://github.com/apache/datafusion/labels/sql)] ([xudong963](https://github.com/xudong963)) +- Remove interior mutability of `MemTable` [\#4514](https://github.com/apache/datafusion/pull/4514) ([xudong963](https://github.com/xudong963)) +- Make window function related struct public for ballista. [\#4511](https://github.com/apache/datafusion/pull/4511) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- minor: rename `push_down_limit` [\#4510](https://github.com/apache/datafusion/pull/4510) ([jackwener](https://github.com/jackwener)) +- Add get_window_frame in window_expr, show frame info in window_agg_exec [\#4508](https://github.com/apache/datafusion/pull/4508) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- Add sqllogictest auto labeler [\#4506](https://github.com/apache/datafusion/pull/4506) ([mvanschellebeeck](https://github.com/mvanschellebeeck)) +- Add some more aggregate sqllogictests and remove rust tests [\#4505](https://github.com/apache/datafusion/pull/4505) ([mvanschellebeeck](https://github.com/mvanschellebeeck)) +- Remove sqllogictests CI run [\#4504](https://github.com/apache/datafusion/pull/4504) ([mvanschellebeeck](https://github.com/mvanschellebeeck)) +- Refactor code for `insert` in sqllogictest [\#4503](https://github.com/apache/datafusion/pull/4503) ([xudong963](https://github.com/xudong963)) +- Add empty string normalization to sqllogictests [\#4501](https://github.com/apache/datafusion/pull/4501) ([alamb](https://github.com/alamb)) +- sqllogictest: A logging and command line filter [\#4497](https://github.com/apache/datafusion/pull/4497) ([alamb](https://github.com/alamb)) +- Support `insert into` statement in sqllogictest [\#4496](https://github.com/apache/datafusion/pull/4496) ([xudong963](https://github.com/xudong963)) +- Improve error handling for array downcasting [\#4493](https://github.com/apache/datafusion/pull/4493) ([retikulum](https://github.com/retikulum)) +- Unify most of `SessionConfig` settings into `ConfigOptions` [\#4492](https://github.com/apache/datafusion/pull/4492) ([alamb](https://github.com/alamb)) +- feat: support prepare statement [\#4490](https://github.com/apache/datafusion/pull/4490) [[sql](https://github.com/apache/datafusion/labels/sql)] ([NGA-TRAN](https://github.com/NGA-TRAN)) +- Minor: Update docstrings and comments to aggregate code [\#4489](https://github.com/apache/datafusion/pull/4489) ([alamb](https://github.com/alamb)) +- Fix panic in median "AggregateState is not a scalar aggregate" [\#4488](https://github.com/apache/datafusion/pull/4488) ([alamb](https://github.com/alamb)) +- fix `push_down_projection` push redundant columns. [\#4487](https://github.com/apache/datafusion/pull/4487) ([jackwener](https://github.com/jackwener)) +- Add window func related logic plan to proto ability. [\#4485](https://github.com/apache/datafusion/pull/4485) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- fix `Planner` don't generate `SubqueryAlias` and generate duplicated `SubqueryAlias` [\#4484](https://github.com/apache/datafusion/pull/4484) [[sql](https://github.com/apache/datafusion/labels/sql)] ([jackwener](https://github.com/jackwener)) +- Improve parquet partition_file output display [\#4467](https://github.com/apache/datafusion/pull/4467) ([alamb](https://github.com/alamb)) +- minor: remove redundant `unwrap()` [\#4463](https://github.com/apache/datafusion/pull/4463) ([jackwener](https://github.com/jackwener)) +- Fix `Cte` in `from` clause with duplicated cte name [\#4461](https://github.com/apache/datafusion/pull/4461) [[sql](https://github.com/apache/datafusion/labels/sql)] ([xudong963](https://github.com/xudong963)) +- Replace `&Option` with `Option<&T>` part 2 [\#4458](https://github.com/apache/datafusion/pull/4458) ([askoa](https://github.com/askoa)) +- Fix output_partitioning\(\), output_ordering\(\), equivalence_properties\(\) in WindowAggExec, shift the Column indexes [\#4455](https://github.com/apache/datafusion/pull/4455) ([mingmwang](https://github.com/mingmwang)) +- fix `push_down_filter` for pushing filters on grouping columns rather than aggregate columns [\#4447](https://github.com/apache/datafusion/pull/4447) ([jackwener](https://github.com/jackwener)) +- Add support for non-column key for equijoin when eliminating cross join to inner join [\#4443](https://github.com/apache/datafusion/pull/4443) [[sql](https://github.com/apache/datafusion/labels/sql)] ([ygf11](https://github.com/ygf11)) +- Remove the schema checking when creating `CrossJoinExec` [\#4432](https://github.com/apache/datafusion/pull/4432) ([HaoYang670](https://github.com/HaoYang670)) +- `date_part` support fractions of second [\#4385](https://github.com/apache/datafusion/pull/4385) ([comphead](https://github.com/comphead)) +- Minor: use upstream RowSelection code from arrow `intersect_row_selection` [\#4340](https://github.com/apache/datafusion/pull/4340) ([alamb](https://github.com/alamb)) +- Support type coercion for timestamp and utf8 [\#4312](https://github.com/apache/datafusion/pull/4312) ([andre-cc-natzka](https://github.com/andre-cc-natzka)) diff --git a/dev/changelog/16.1.0.md b/dev/changelog/16.1.0.md index 994800062b1c..66820a5a6979 100644 --- a/dev/changelog/16.1.0.md +++ b/dev/changelog/16.1.0.md @@ -17,11 +17,11 @@ under the License. --> -## [16.1.0](https://github.com/apache/arrow-datafusion/tree/16.1.0) (2023-01-19) +## [16.1.0](https://github.com/apache/datafusion/tree/16.1.0) (2023-01-19) -[Full Changelog](https://github.com/apache/arrow-datafusion/compare/16.1.0-rc1...16.0.0) +[Full Changelog](https://github.com/apache/datafusion/compare/16.1.0-rc1...16.0.0) **Merged pull requests:** -- Fix column indices in EnforceDistribution optimizer in Partial AggregateMode \(\#4878\) [\#4959](https://github.com/apache/arrow-datafusion/pull/4959) -- Make it able to specify a session id for SessionState \(\#4933\) [\#4951](https://github.com/apache/arrow-datafusion/pull/4951) +- Fix column indices in EnforceDistribution optimizer in Partial AggregateMode \(\#4878\) [\#4959](https://github.com/apache/datafusion/pull/4959) +- Make it able to specify a session id for SessionState \(\#4933\) [\#4951](https://github.com/apache/datafusion/pull/4951) diff --git a/dev/changelog/17.0.0.md b/dev/changelog/17.0.0.md index 7a35b52e3cdd..1eb32a28ec6d 100644 --- a/dev/changelog/17.0.0.md +++ b/dev/changelog/17.0.0.md @@ -17,174 +17,174 @@ under the License. --> -## [17.0.0](https://github.com/apache/arrow-datafusion/tree/17.0.0) (2023-01-27) +## [17.0.0](https://github.com/apache/datafusion/tree/17.0.0) (2023-01-27) -[Full Changelog](https://github.com/apache/arrow-datafusion/compare/17.0.0-rc1...17.0.0) +[Full Changelog](https://github.com/apache/datafusion/compare/17.0.0-rc1...17.0.0) **Breaking changes:** -- Implemented a ReadOptions trait for cleaner code. [\#5025](https://github.com/apache/arrow-datafusion/pull/5025) ([saikrishna1-bidgely](https://github.com/saikrishna1-bidgely)) +- Implemented a ReadOptions trait for cleaner code. [\#5025](https://github.com/apache/datafusion/pull/5025) ([saikrishna1-bidgely](https://github.com/saikrishna1-bidgely)) **Implemented enhancements:** -- Add null-equals-null JOIN support in Substrait producer/consumer [\#5084](https://github.com/apache/arrow-datafusion/issues/5084) -- Cleaner code for Read Options in reader methdos. [\#5024](https://github.com/apache/arrow-datafusion/issues/5024) -- Substrait donation follow-on work [\#4897](https://github.com/apache/arrow-datafusion/issues/4897) -- Add `len` method to `DataFrame` [\#1926](https://github.com/apache/arrow-datafusion/issues/1926) +- Add null-equals-null JOIN support in Substrait producer/consumer [\#5084](https://github.com/apache/datafusion/issues/5084) +- Cleaner code for Read Options in reader methdos. [\#5024](https://github.com/apache/datafusion/issues/5024) +- Substrait donation follow-on work [\#4897](https://github.com/apache/datafusion/issues/4897) +- Add `len` method to `DataFrame` [\#1926](https://github.com/apache/datafusion/issues/1926) **Fixed bugs:** -- Clippy failures in master branch and in PRs \(due to new nightly Rust\) [\#5080](https://github.com/apache/arrow-datafusion/issues/5080) +- Clippy failures in master branch and in PRs \(due to new nightly Rust\) [\#5080](https://github.com/apache/datafusion/issues/5080) **Merged pull requests:** -- Add null-equals-null join support [\#5085](https://github.com/apache/arrow-datafusion/pull/5085) ([nseekhao](https://github.com/nseekhao)) -- Optimize returned plan in roundtrip_fill_na function [\#5083](https://github.com/apache/arrow-datafusion/pull/5083) ([nseekhao](https://github.com/nseekhao)) -- fix clippy failures [\#5081](https://github.com/apache/arrow-datafusion/pull/5081) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) -- Add NULL literal support for decimal and integers [\#5077](https://github.com/apache/arrow-datafusion/pull/5077) ([nseekhao](https://github.com/nseekhao)) -- DataFrame count method [\#5071](https://github.com/apache/arrow-datafusion/pull/5071) ([Jefffrey](https://github.com/Jefffrey)) -- \[sqllogictests\] Port orderby.rs to sqllogictests [\#5062](https://github.com/apache/arrow-datafusion/pull/5062) ([alamb](https://github.com/alamb)) +- Add null-equals-null join support [\#5085](https://github.com/apache/datafusion/pull/5085) ([nseekhao](https://github.com/nseekhao)) +- Optimize returned plan in roundtrip_fill_na function [\#5083](https://github.com/apache/datafusion/pull/5083) ([nseekhao](https://github.com/nseekhao)) +- fix clippy failures [\#5081](https://github.com/apache/datafusion/pull/5081) [[sql](https://github.com/apache/datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) +- Add NULL literal support for decimal and integers [\#5077](https://github.com/apache/datafusion/pull/5077) ([nseekhao](https://github.com/nseekhao)) +- DataFrame count method [\#5071](https://github.com/apache/datafusion/pull/5071) ([Jefffrey](https://github.com/Jefffrey)) +- \[sqllogictests\] Port orderby.rs to sqllogictests [\#5062](https://github.com/apache/datafusion/pull/5062) ([alamb](https://github.com/alamb)) -## [17.0.0-rc1](https://github.com/apache/arrow-datafusion/tree/17.0.0-rc1) (2023-01-26) +## [17.0.0-rc1](https://github.com/apache/datafusion/tree/17.0.0-rc1) (2023-01-26) -[Full Changelog](https://github.com/apache/arrow-datafusion/compare/16.1.0...17.0.0-rc1) +[Full Changelog](https://github.com/apache/datafusion/compare/16.1.0...17.0.0-rc1) **Breaking changes:** -- Change ExecutionPlan::maintains_input_order to return vector \(to support multi children executors better\) [\#5035](https://github.com/apache/arrow-datafusion/pull/5035) ([mustafasrepo](https://github.com/mustafasrepo)) -- Allow overriding error type in DataFusion Result [\#5000](https://github.com/apache/arrow-datafusion/pull/5000) ([tustvold](https://github.com/tustvold)) -- Add dictionary_expresions feature \(\#4386\) [\#4999](https://github.com/apache/arrow-datafusion/pull/4999) ([tustvold](https://github.com/tustvold)) +- Change ExecutionPlan::maintains_input_order to return vector \(to support multi children executors better\) [\#5035](https://github.com/apache/datafusion/pull/5035) ([mustafasrepo](https://github.com/mustafasrepo)) +- Allow overriding error type in DataFusion Result [\#5000](https://github.com/apache/datafusion/pull/5000) ([tustvold](https://github.com/tustvold)) +- Add dictionary_expresions feature \(\#4386\) [\#4999](https://github.com/apache/datafusion/pull/4999) ([tustvold](https://github.com/tustvold)) **Implemented enhancements:** -- Retain the ordering of fields in the table schema when creating the projection for an update plan [\#5052](https://github.com/apache/arrow-datafusion/issues/5052) -- \[sqllogictest\] Remove `integration-tests` directory [\#5011](https://github.com/apache/arrow-datafusion/issues/5011) -- \[sqllogictest\] Consolidate normalization code for the postgres and non-postgres paths [\#5010](https://github.com/apache/arrow-datafusion/issues/5010) -- \[sqllogictest\] Don't orchestrate the postgres containers with rust / docker [\#5009](https://github.com/apache/arrow-datafusion/issues/5009) -- check external table exist before creating a table [\#4997](https://github.com/apache/arrow-datafusion/issues/4997) -- Implement `std::error::Error` for DataFusionError [\#4991](https://github.com/apache/arrow-datafusion/issues/4991) -- Return Vec\ instead of bool in ExecutionPlan::maintains_input_order [\#4980](https://github.com/apache/arrow-datafusion/issues/4980) -- Add support for linear range search [\#4979](https://github.com/apache/arrow-datafusion/issues/4979) -- Add support for bounded execution when window query involves UNBOUNDED PRECEDING [\#4978](https://github.com/apache/arrow-datafusion/issues/4978) -- Infer prepared statement parameter types for insert queries with values clauses [\#4976](https://github.com/apache/arrow-datafusion/issues/4976) -- The filter of outer table happens multiple time after optimizing in-subquery to join [\#4914](https://github.com/apache/arrow-datafusion/issues/4914) -- Support Describe FILE in datafusion-cli [\#4913](https://github.com/apache/arrow-datafusion/issues/4913) -- Release DataFusion 16 [\#4776](https://github.com/apache/arrow-datafusion/issues/4776) -- Support writing lists in the arrow csv writer [\#4502](https://github.com/apache/arrow-datafusion/issues/4502) -- Replace python based integration test with sqllogictest [\#4462](https://github.com/apache/arrow-datafusion/issues/4462) -- Support CREATE TABLE table_name\(...schema_fields\) [\#4396](https://github.com/apache/arrow-datafusion/issues/4396) -- Make Binary Dictionary Operations Optional [\#4386](https://github.com/apache/arrow-datafusion/issues/4386) -- Improve / Cleanup DataFusion CI [\#3045](https://github.com/apache/arrow-datafusion/issues/3045) -- More frequent DataFusion releases to crates.io \(discussion\) [\#2327](https://github.com/apache/arrow-datafusion/issues/2327) +- Retain the ordering of fields in the table schema when creating the projection for an update plan [\#5052](https://github.com/apache/datafusion/issues/5052) +- \[sqllogictest\] Remove `integration-tests` directory [\#5011](https://github.com/apache/datafusion/issues/5011) +- \[sqllogictest\] Consolidate normalization code for the postgres and non-postgres paths [\#5010](https://github.com/apache/datafusion/issues/5010) +- \[sqllogictest\] Don't orchestrate the postgres containers with rust / docker [\#5009](https://github.com/apache/datafusion/issues/5009) +- check external table exist before creating a table [\#4997](https://github.com/apache/datafusion/issues/4997) +- Implement `std::error::Error` for DataFusionError [\#4991](https://github.com/apache/datafusion/issues/4991) +- Return Vec\ instead of bool in ExecutionPlan::maintains_input_order [\#4980](https://github.com/apache/datafusion/issues/4980) +- Add support for linear range search [\#4979](https://github.com/apache/datafusion/issues/4979) +- Add support for bounded execution when window query involves UNBOUNDED PRECEDING [\#4978](https://github.com/apache/datafusion/issues/4978) +- Infer prepared statement parameter types for insert queries with values clauses [\#4976](https://github.com/apache/datafusion/issues/4976) +- The filter of outer table happens multiple time after optimizing in-subquery to join [\#4914](https://github.com/apache/datafusion/issues/4914) +- Support Describe FILE in datafusion-cli [\#4913](https://github.com/apache/datafusion/issues/4913) +- Release DataFusion 16 [\#4776](https://github.com/apache/datafusion/issues/4776) +- Support writing lists in the arrow csv writer [\#4502](https://github.com/apache/datafusion/issues/4502) +- Replace python based integration test with sqllogictest [\#4462](https://github.com/apache/datafusion/issues/4462) +- Support CREATE TABLE table_name\(...schema_fields\) [\#4396](https://github.com/apache/datafusion/issues/4396) +- Make Binary Dictionary Operations Optional [\#4386](https://github.com/apache/datafusion/issues/4386) +- Improve / Cleanup DataFusion CI [\#3045](https://github.com/apache/datafusion/issues/3045) +- More frequent DataFusion releases to crates.io \(discussion\) [\#2327](https://github.com/apache/datafusion/issues/2327) **Fixed bugs:** -- UPDATE statment for non existent column doesn't error out [\#5068](https://github.com/apache/arrow-datafusion/issues/5068) -- Limit doesn't drop on first batch when limit size == fetch size. [\#5064](https://github.com/apache/arrow-datafusion/issues/5064) -- Performance regressions since DataFusion 15.x [\#5060](https://github.com/apache/arrow-datafusion/issues/5060) -- Quoted schema and table names result in double-quoted names in logical plan. [\#5058](https://github.com/apache/arrow-datafusion/issues/5058) -- Homebrew release script has the amount of arguments being incorrect [\#5043](https://github.com/apache/arrow-datafusion/issues/5043) -- CI Failing with Out of Disk [\#5040](https://github.com/apache/arrow-datafusion/issues/5040) -- Doc links to LogicalPlan in the core package need updating. [\#5036](https://github.com/apache/arrow-datafusion/issues/5036) -- explain analyze can not see csvexec execution time metrics [\#5014](https://github.com/apache/arrow-datafusion/issues/5014) -- AVG\(nulls\) returns 0 rather than NULL [\#5007](https://github.com/apache/arrow-datafusion/issues/5007) -- Invalid Placeholders return internal error \(rather than Plan error\) [\#5005](https://github.com/apache/arrow-datafusion/issues/5005) -- select \* from csv error [\#4996](https://github.com/apache/arrow-datafusion/issues/4996) -- Incorrect nested error wrapped to `ArrowError:External` variant for joins [\#4981](https://github.com/apache/arrow-datafusion/issues/4981) +- UPDATE statment for non existent column doesn't error out [\#5068](https://github.com/apache/datafusion/issues/5068) +- Limit doesn't drop on first batch when limit size == fetch size. [\#5064](https://github.com/apache/datafusion/issues/5064) +- Performance regressions since DataFusion 15.x [\#5060](https://github.com/apache/datafusion/issues/5060) +- Quoted schema and table names result in double-quoted names in logical plan. [\#5058](https://github.com/apache/datafusion/issues/5058) +- Homebrew release script has the amount of arguments being incorrect [\#5043](https://github.com/apache/datafusion/issues/5043) +- CI Failing with Out of Disk [\#5040](https://github.com/apache/datafusion/issues/5040) +- Doc links to LogicalPlan in the core package need updating. [\#5036](https://github.com/apache/datafusion/issues/5036) +- explain analyze can not see csvexec execution time metrics [\#5014](https://github.com/apache/datafusion/issues/5014) +- AVG\(nulls\) returns 0 rather than NULL [\#5007](https://github.com/apache/datafusion/issues/5007) +- Invalid Placeholders return internal error \(rather than Plan error\) [\#5005](https://github.com/apache/datafusion/issues/5005) +- select \* from csv error [\#4996](https://github.com/apache/datafusion/issues/4996) +- Incorrect nested error wrapped to `ArrowError:External` variant for joins [\#4981](https://github.com/apache/datafusion/issues/4981) **Documentation updates:** -- MINOR: Add Substrait to feature list in README [\#4955](https://github.com/apache/arrow-datafusion/pull/4955) ([andygrove](https://github.com/andygrove)) -- Minor: comma engineering in Readme [\#4954](https://github.com/apache/arrow-datafusion/pull/4954) ([alamb](https://github.com/alamb)) -- Update main DataFusion README [\#4903](https://github.com/apache/arrow-datafusion/pull/4903) ([alamb](https://github.com/alamb)) -- Docs: Add known user - Kamu [\#4899](https://github.com/apache/arrow-datafusion/pull/4899) ([sergiimk](https://github.com/sergiimk)) +- MINOR: Add Substrait to feature list in README [\#4955](https://github.com/apache/datafusion/pull/4955) ([andygrove](https://github.com/andygrove)) +- Minor: comma engineering in Readme [\#4954](https://github.com/apache/datafusion/pull/4954) ([alamb](https://github.com/alamb)) +- Update main DataFusion README [\#4903](https://github.com/apache/datafusion/pull/4903) ([alamb](https://github.com/alamb)) +- Docs: Add known user - Kamu [\#4899](https://github.com/apache/datafusion/pull/4899) ([sergiimk](https://github.com/sergiimk)) **Closed issues:** -- Support sub directories in sqllogictest runner [\#4709](https://github.com/apache/arrow-datafusion/issues/4709) -- Bug displaying fractional seconds in `IntervalMonthDayNano` [\#4220](https://github.com/apache/arrow-datafusion/issues/4220) +- Support sub directories in sqllogictest runner [\#4709](https://github.com/apache/datafusion/issues/4709) +- Bug displaying fractional seconds in `IntervalMonthDayNano` [\#4220](https://github.com/apache/datafusion/issues/4220) **Merged pull requests:** -- Add `release-crates.sh` script [\#5070](https://github.com/apache/arrow-datafusion/pull/5070) ([iajoiner](https://github.com/iajoiner)) -- Validate assignment target column existence for UPDATE statements [\#5069](https://github.com/apache/arrow-datafusion/pull/5069) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([gruuya](https://github.com/gruuya)) -- Fix limit when size of batch to poll == skip/fetch value [\#5066](https://github.com/apache/arrow-datafusion/pull/5066) ([Dandandan](https://github.com/Dandandan)) -- Fix CREATE SCHEMA schema name double quoting issue. [\#5059](https://github.com/apache/arrow-datafusion/pull/5059) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([neumark](https://github.com/neumark)) -- Minor: Move some aggregate error tests to sqllogictests [\#5055](https://github.com/apache/arrow-datafusion/pull/5055) ([alamb](https://github.com/alamb)) -- Add decimal support to substrait serde [\#5054](https://github.com/apache/arrow-datafusion/pull/5054) ([andygrove](https://github.com/andygrove)) -- Retain schema order in projection [\#5053](https://github.com/apache/arrow-datafusion/pull/5053) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([avantgardnerio](https://github.com/avantgardnerio)) -- Improve join type support in substrait [\#5051](https://github.com/apache/arrow-datafusion/pull/5051) ([andygrove](https://github.com/andygrove)) -- \[Substrait\] ReadRel. Get column names from TableScan source [\#5050](https://github.com/apache/arrow-datafusion/pull/5050) ([andygrove](https://github.com/andygrove)) -- Ensure insert projections are of correct type [\#5049](https://github.com/apache/arrow-datafusion/pull/5049) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([avantgardnerio](https://github.com/avantgardnerio)) -- Remove unnecessary pyo3 dependency from datafusion crate [\#5048](https://github.com/apache/arrow-datafusion/pull/5048) ([tustvold](https://github.com/tustvold)) -- Cleanup CI \(\#5040\) [\#5047](https://github.com/apache/arrow-datafusion/pull/5047) ([tustvold](https://github.com/tustvold)) -- Fix homebrew publish script [\#5044](https://github.com/apache/arrow-datafusion/pull/5044) ([iajoiner](https://github.com/iajoiner)) -- Update docs links to logical plans module. [\#5037](https://github.com/apache/arrow-datafusion/pull/5037) ([vincev](https://github.com/vincev)) -- \[sqllogictest\] Read subdirectories in `test_files` [\#5033](https://github.com/apache/arrow-datafusion/pull/5033) ([melgenek](https://github.com/melgenek)) -- minor: Fix docs for create_default_catalog_and_schema [\#5032](https://github.com/apache/arrow-datafusion/pull/5032) ([alamb](https://github.com/alamb)) -- Remove python based posgres comparsion `integration-test` [\#5031](https://github.com/apache/arrow-datafusion/pull/5031) ([alamb](https://github.com/alamb)) -- \[sqllogictest\] Create empty tables [\#5026](https://github.com/apache/arrow-datafusion/pull/5026) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([melgenek](https://github.com/melgenek)) -- Simplify the `PushDownLimit`. [\#5021](https://github.com/apache/arrow-datafusion/pull/5021) ([HaoYang670](https://github.com/HaoYang670)) -- \[BugFix\] fix explain csv/json/avro exec can not see metrics bug [\#5018](https://github.com/apache/arrow-datafusion/pull/5018) ([xiaoyong-z](https://github.com/xiaoyong-z)) -- Check placeholder \_\_timeTo and return Datafusion::Plan error [\#5017](https://github.com/apache/arrow-datafusion/pull/5017) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([matthias-Q](https://github.com/matthias-Q)) -- \[sqllogictets\] Remove postgres container orchestration [\#5015](https://github.com/apache/arrow-datafusion/pull/5015) ([alamb](https://github.com/alamb)) -- Sqllogictest: use the same normalization for all tests [\#5013](https://github.com/apache/arrow-datafusion/pull/5013) ([melgenek](https://github.com/melgenek)) -- Minor: Remove invalid comments [\#5012](https://github.com/apache/arrow-datafusion/pull/5012) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([xudong963](https://github.com/xudong963)) -- AVG\(null\) is NULL \(not zero\) [\#5008](https://github.com/apache/arrow-datafusion/pull/5008) ([alamb](https://github.com/alamb)) -- Minor: improve internal error message [\#5006](https://github.com/apache/arrow-datafusion/pull/5006) ([alamb](https://github.com/alamb)) -- Support for bounded execution when window frame involves UNBOUNDED PRECEDING [\#5003](https://github.com/apache/arrow-datafusion/pull/5003) ([mustafasrepo](https://github.com/mustafasrepo)) -- Bump sqllogictest to v0.11.1 [\#5002](https://github.com/apache/arrow-datafusion/pull/5002) ([xudong963](https://github.com/xudong963)) -- Minor: Document how to create `ListingTables` [\#5001](https://github.com/apache/arrow-datafusion/pull/5001) ([alamb](https://github.com/alamb)) -- \[Enhancement\] early check table exist before create [\#4998](https://github.com/apache/arrow-datafusion/pull/4998) ([xiaoyong-z](https://github.com/xiaoyong-z)) -- \[Feature\] support describe file [\#4995](https://github.com/apache/arrow-datafusion/pull/4995) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([xiaoyong-z](https://github.com/xiaoyong-z)) -- Implement `std::error::Error::source()` for `DataFusionError`, make `DataFusionError::find_root` more generic [\#4992](https://github.com/apache/arrow-datafusion/pull/4992) ([alamb](https://github.com/alamb)) -- Add support for linear range calculation in WINDOW functions [\#4989](https://github.com/apache/arrow-datafusion/pull/4989) ([mustafasrepo](https://github.com/mustafasrepo)) -- re-export substrait crate [\#4988](https://github.com/apache/arrow-datafusion/pull/4988) ([jdye64](https://github.com/jdye64)) -- minor: Update data type support documentation [\#4984](https://github.com/apache/arrow-datafusion/pull/4984) ([alamb](https://github.com/alamb)) -- fix\(4981\): incorrect error wrapping in `OnceFut` [\#4983](https://github.com/apache/arrow-datafusion/pull/4983) ([DDtKey](https://github.com/DDtKey)) -- Infer values for inserts [\#4977](https://github.com/apache/arrow-datafusion/pull/4977) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([avantgardnerio](https://github.com/avantgardnerio)) -- Simplify GroupByHash implementation \(to prepare for more work\) [\#4972](https://github.com/apache/arrow-datafusion/pull/4972) ([alamb](https://github.com/alamb)) -- Add DataFusionError::Substrait variant to DataFusionError enum [\#4971](https://github.com/apache/arrow-datafusion/pull/4971) ([jdye64](https://github.com/jdye64)) -- refactor: display input partitions for `RepartitionExec` [\#4969](https://github.com/apache/arrow-datafusion/pull/4969) ([crepererum](https://github.com/crepererum)) -- Upgrade to Substrait 0.4.0 [\#4966](https://github.com/apache/arrow-datafusion/pull/4966) ([mbrobbel](https://github.com/mbrobbel)) -- Expose `sql_to_statement` and `statement_to_plan` on `SessionState` [\#4958](https://github.com/apache/arrow-datafusion/pull/4958) ([avantgardnerio](https://github.com/avantgardnerio)) -- Minor: Make messages consistent for LogicalPlan::Dml [\#4953](https://github.com/apache/arrow-datafusion/pull/4953) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) -- Do not resort inputs to `UnionExec` if they are already sorted [\#4946](https://github.com/apache/arrow-datafusion/pull/4946) ([alamb](https://github.com/alamb)) -- Minor: Reduce even more redundancy creating window_agg in sort_enforcement tests [\#4945](https://github.com/apache/arrow-datafusion/pull/4945) ([alamb](https://github.com/alamb)) -- Only add outer filter once when transforming exists/in subquery to join [\#4944](https://github.com/apache/arrow-datafusion/pull/4944) ([ygf11](https://github.com/ygf11)) -- fix: `FieldNotFound` error message without valid fields [\#4942](https://github.com/apache/arrow-datafusion/pull/4942) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([DDtKey](https://github.com/DDtKey)) -- Propagate planning error back to user [\#4940](https://github.com/apache/arrow-datafusion/pull/4940) ([fsdvh](https://github.com/fsdvh)) -- Make it able to specify a session id for SessionState [\#4933](https://github.com/apache/arrow-datafusion/pull/4933) ([yahoNanJing](https://github.com/yahoNanJing)) -- SUPPORT SEMI/ANTI JOIN SQL syntax in DataFusion [\#4932](https://github.com/apache/arrow-datafusion/pull/4932) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([mingmwang](https://github.com/mingmwang)) -- Support gs:// as GCS schema [\#4930](https://github.com/apache/arrow-datafusion/pull/4930) ([jychen7](https://github.com/jychen7)) -- Upgrade object_store from 0.5.0 to 0.5.3 [\#4929](https://github.com/apache/arrow-datafusion/pull/4929) ([jychen7](https://github.com/jychen7)) -- Reduce redundancy in sort_enforcement tests [\#4928](https://github.com/apache/arrow-datafusion/pull/4928) ([alamb](https://github.com/alamb)) -- Update to arrow 31 [\#4927](https://github.com/apache/arrow-datafusion/pull/4927) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([tustvold](https://github.com/tustvold)) -- Unify Row hash and hash implementation [\#4924](https://github.com/apache/arrow-datafusion/pull/4924) ([mustafasrepo](https://github.com/mustafasrepo)) -- Support join-filter pushdown for semi/anti join [\#4923](https://github.com/apache/arrow-datafusion/pull/4923) ([ygf11](https://github.com/ygf11)) -- Minor add ticket link to broken test [\#4919](https://github.com/apache/arrow-datafusion/pull/4919) ([alamb](https://github.com/alamb)) -- Improve documentation for ExprVisitor, port simple uses to new walking function [\#4916](https://github.com/apache/arrow-datafusion/pull/4916) ([alamb](https://github.com/alamb)) -- Add substrait label to PRs [\#4915](https://github.com/apache/arrow-datafusion/pull/4915) ([alamb](https://github.com/alamb)) -- Executing ProjectionExec with no column should not return an Err [\#4912](https://github.com/apache/arrow-datafusion/pull/4912) ([viirya](https://github.com/viirya)) -- Refactor: `Add LogicalPlan::observe_expressions` to walk expressions [\#4906](https://github.com/apache/arrow-datafusion/pull/4906) ([alamb](https://github.com/alamb)) -- Minor: Port information schema tests to sqllogictest [\#4905](https://github.com/apache/arrow-datafusion/pull/4905) ([alamb](https://github.com/alamb)) -- Add insert/update/delete to LogicalPlan and add SQL planner support [\#4902](https://github.com/apache/arrow-datafusion/pull/4902) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([avantgardnerio](https://github.com/avantgardnerio)) -- fix: Visit subqueries in `Expr::Alias` [\#4900](https://github.com/apache/arrow-datafusion/pull/4900) ([askoa](https://github.com/askoa)) -- \[Substrait\] Change API to return LogicalPlan instead of DataFrame [\#4896](https://github.com/apache/arrow-datafusion/pull/4896) ([andygrove](https://github.com/andygrove)) -- Upgrade to substrait 0.3 [\#4895](https://github.com/apache/arrow-datafusion/pull/4895) ([andygrove](https://github.com/andygrove)) -- Add datafusion-substrait crate to workspace [\#4893](https://github.com/apache/arrow-datafusion/pull/4893) ([andygrove](https://github.com/andygrove)) -- refactor and add simple function to deserialize and serialize proto b… [\#4892](https://github.com/apache/arrow-datafusion/pull/4892) ([jdye64](https://github.com/jdye64)) -- Update `optimize_children` to return `Result>` [\#4888](https://github.com/apache/arrow-datafusion/pull/4888) ([HaoYang670](https://github.com/HaoYang670)) -- Do not repartition inputs whose sort order is required [\#4885](https://github.com/apache/arrow-datafusion/pull/4885) ([alamb](https://github.com/alamb)) -- Minor: Add docstrings to UnionExec [\#4884](https://github.com/apache/arrow-datafusion/pull/4884) ([alamb](https://github.com/alamb)) -- Update datafusion-substrait crate to build against repo version of DataFusion [\#4879](https://github.com/apache/arrow-datafusion/pull/4879) ([andygrove](https://github.com/andygrove)) -- Fix column indices in EnforceDistribution optimizer in Partial AggregateMode [\#4878](https://github.com/apache/arrow-datafusion/pull/4878) ([jonmmease](https://github.com/jonmmease)) -- refactor: improve repartition buffering [\#4867](https://github.com/apache/arrow-datafusion/pull/4867) ([crepererum](https://github.com/crepererum)) -- Rewrite coerce_plan_expr_for_schema to fix union type coercion [\#4862](https://github.com/apache/arrow-datafusion/pull/4862) ([ygf11](https://github.com/ygf11)) -- \(\#4462\) Postgres compatibility tests using sqllogictest [\#4834](https://github.com/apache/arrow-datafusion/pull/4834) ([melgenek](https://github.com/melgenek)) -- Support non-tuple expression for in-subquery to join [\#4826](https://github.com/apache/arrow-datafusion/pull/4826) ([ygf11](https://github.com/ygf11)) -- Update to arrow `30.0.1` [\#4818](https://github.com/apache/arrow-datafusion/pull/4818) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([tustvold](https://github.com/tustvold)) -- Refine the statistics estimation for the limit and aggregate operator [\#4716](https://github.com/apache/arrow-datafusion/pull/4716) ([yahoNanJing](https://github.com/yahoNanJing)) -- Infer prepared statement parameter types [\#4701](https://github.com/apache/arrow-datafusion/pull/4701) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([avantgardnerio](https://github.com/avantgardnerio)) -- Add datafusion-substrait crate [\#4543](https://github.com/apache/arrow-datafusion/pull/4543) ([andygrove](https://github.com/andygrove)) -- Refactor loser tree code in SortPreservingMerge per PR comments [\#4407](https://github.com/apache/arrow-datafusion/pull/4407) ([alamb](https://github.com/alamb)) +- Add `release-crates.sh` script [\#5070](https://github.com/apache/datafusion/pull/5070) ([iajoiner](https://github.com/iajoiner)) +- Validate assignment target column existence for UPDATE statements [\#5069](https://github.com/apache/datafusion/pull/5069) [[sql](https://github.com/apache/datafusion/labels/sql)] ([gruuya](https://github.com/gruuya)) +- Fix limit when size of batch to poll == skip/fetch value [\#5066](https://github.com/apache/datafusion/pull/5066) ([Dandandan](https://github.com/Dandandan)) +- Fix CREATE SCHEMA schema name double quoting issue. [\#5059](https://github.com/apache/datafusion/pull/5059) [[sql](https://github.com/apache/datafusion/labels/sql)] ([neumark](https://github.com/neumark)) +- Minor: Move some aggregate error tests to sqllogictests [\#5055](https://github.com/apache/datafusion/pull/5055) ([alamb](https://github.com/alamb)) +- Add decimal support to substrait serde [\#5054](https://github.com/apache/datafusion/pull/5054) ([andygrove](https://github.com/andygrove)) +- Retain schema order in projection [\#5053](https://github.com/apache/datafusion/pull/5053) [[sql](https://github.com/apache/datafusion/labels/sql)] ([avantgardnerio](https://github.com/avantgardnerio)) +- Improve join type support in substrait [\#5051](https://github.com/apache/datafusion/pull/5051) ([andygrove](https://github.com/andygrove)) +- \[Substrait\] ReadRel. Get column names from TableScan source [\#5050](https://github.com/apache/datafusion/pull/5050) ([andygrove](https://github.com/andygrove)) +- Ensure insert projections are of correct type [\#5049](https://github.com/apache/datafusion/pull/5049) [[sql](https://github.com/apache/datafusion/labels/sql)] ([avantgardnerio](https://github.com/avantgardnerio)) +- Remove unnecessary pyo3 dependency from datafusion crate [\#5048](https://github.com/apache/datafusion/pull/5048) ([tustvold](https://github.com/tustvold)) +- Cleanup CI \(\#5040\) [\#5047](https://github.com/apache/datafusion/pull/5047) ([tustvold](https://github.com/tustvold)) +- Fix homebrew publish script [\#5044](https://github.com/apache/datafusion/pull/5044) ([iajoiner](https://github.com/iajoiner)) +- Update docs links to logical plans module. [\#5037](https://github.com/apache/datafusion/pull/5037) ([vincev](https://github.com/vincev)) +- \[sqllogictest\] Read subdirectories in `test_files` [\#5033](https://github.com/apache/datafusion/pull/5033) ([melgenek](https://github.com/melgenek)) +- minor: Fix docs for create_default_catalog_and_schema [\#5032](https://github.com/apache/datafusion/pull/5032) ([alamb](https://github.com/alamb)) +- Remove python based posgres comparsion `integration-test` [\#5031](https://github.com/apache/datafusion/pull/5031) ([alamb](https://github.com/alamb)) +- \[sqllogictest\] Create empty tables [\#5026](https://github.com/apache/datafusion/pull/5026) [[sql](https://github.com/apache/datafusion/labels/sql)] ([melgenek](https://github.com/melgenek)) +- Simplify the `PushDownLimit`. [\#5021](https://github.com/apache/datafusion/pull/5021) ([HaoYang670](https://github.com/HaoYang670)) +- \[BugFix\] fix explain csv/json/avro exec can not see metrics bug [\#5018](https://github.com/apache/datafusion/pull/5018) ([xiaoyong-z](https://github.com/xiaoyong-z)) +- Check placeholder \_\_timeTo and return Datafusion::Plan error [\#5017](https://github.com/apache/datafusion/pull/5017) [[sql](https://github.com/apache/datafusion/labels/sql)] ([matthias-Q](https://github.com/matthias-Q)) +- \[sqllogictets\] Remove postgres container orchestration [\#5015](https://github.com/apache/datafusion/pull/5015) ([alamb](https://github.com/alamb)) +- Sqllogictest: use the same normalization for all tests [\#5013](https://github.com/apache/datafusion/pull/5013) ([melgenek](https://github.com/melgenek)) +- Minor: Remove invalid comments [\#5012](https://github.com/apache/datafusion/pull/5012) [[sql](https://github.com/apache/datafusion/labels/sql)] ([xudong963](https://github.com/xudong963)) +- AVG\(null\) is NULL \(not zero\) [\#5008](https://github.com/apache/datafusion/pull/5008) ([alamb](https://github.com/alamb)) +- Minor: improve internal error message [\#5006](https://github.com/apache/datafusion/pull/5006) ([alamb](https://github.com/alamb)) +- Support for bounded execution when window frame involves UNBOUNDED PRECEDING [\#5003](https://github.com/apache/datafusion/pull/5003) ([mustafasrepo](https://github.com/mustafasrepo)) +- Bump sqllogictest to v0.11.1 [\#5002](https://github.com/apache/datafusion/pull/5002) ([xudong963](https://github.com/xudong963)) +- Minor: Document how to create `ListingTables` [\#5001](https://github.com/apache/datafusion/pull/5001) ([alamb](https://github.com/alamb)) +- \[Enhancement\] early check table exist before create [\#4998](https://github.com/apache/datafusion/pull/4998) ([xiaoyong-z](https://github.com/xiaoyong-z)) +- \[Feature\] support describe file [\#4995](https://github.com/apache/datafusion/pull/4995) [[sql](https://github.com/apache/datafusion/labels/sql)] ([xiaoyong-z](https://github.com/xiaoyong-z)) +- Implement `std::error::Error::source()` for `DataFusionError`, make `DataFusionError::find_root` more generic [\#4992](https://github.com/apache/datafusion/pull/4992) ([alamb](https://github.com/alamb)) +- Add support for linear range calculation in WINDOW functions [\#4989](https://github.com/apache/datafusion/pull/4989) ([mustafasrepo](https://github.com/mustafasrepo)) +- re-export substrait crate [\#4988](https://github.com/apache/datafusion/pull/4988) ([jdye64](https://github.com/jdye64)) +- minor: Update data type support documentation [\#4984](https://github.com/apache/datafusion/pull/4984) ([alamb](https://github.com/alamb)) +- fix\(4981\): incorrect error wrapping in `OnceFut` [\#4983](https://github.com/apache/datafusion/pull/4983) ([DDtKey](https://github.com/DDtKey)) +- Infer values for inserts [\#4977](https://github.com/apache/datafusion/pull/4977) [[sql](https://github.com/apache/datafusion/labels/sql)] ([avantgardnerio](https://github.com/avantgardnerio)) +- Simplify GroupByHash implementation \(to prepare for more work\) [\#4972](https://github.com/apache/datafusion/pull/4972) ([alamb](https://github.com/alamb)) +- Add DataFusionError::Substrait variant to DataFusionError enum [\#4971](https://github.com/apache/datafusion/pull/4971) ([jdye64](https://github.com/jdye64)) +- refactor: display input partitions for `RepartitionExec` [\#4969](https://github.com/apache/datafusion/pull/4969) ([crepererum](https://github.com/crepererum)) +- Upgrade to Substrait 0.4.0 [\#4966](https://github.com/apache/datafusion/pull/4966) ([mbrobbel](https://github.com/mbrobbel)) +- Expose `sql_to_statement` and `statement_to_plan` on `SessionState` [\#4958](https://github.com/apache/datafusion/pull/4958) ([avantgardnerio](https://github.com/avantgardnerio)) +- Minor: Make messages consistent for LogicalPlan::Dml [\#4953](https://github.com/apache/datafusion/pull/4953) [[sql](https://github.com/apache/datafusion/labels/sql)] ([alamb](https://github.com/alamb)) +- Do not resort inputs to `UnionExec` if they are already sorted [\#4946](https://github.com/apache/datafusion/pull/4946) ([alamb](https://github.com/alamb)) +- Minor: Reduce even more redundancy creating window_agg in sort_enforcement tests [\#4945](https://github.com/apache/datafusion/pull/4945) ([alamb](https://github.com/alamb)) +- Only add outer filter once when transforming exists/in subquery to join [\#4944](https://github.com/apache/datafusion/pull/4944) ([ygf11](https://github.com/ygf11)) +- fix: `FieldNotFound` error message without valid fields [\#4942](https://github.com/apache/datafusion/pull/4942) [[sql](https://github.com/apache/datafusion/labels/sql)] ([DDtKey](https://github.com/DDtKey)) +- Propagate planning error back to user [\#4940](https://github.com/apache/datafusion/pull/4940) ([fsdvh](https://github.com/fsdvh)) +- Make it able to specify a session id for SessionState [\#4933](https://github.com/apache/datafusion/pull/4933) ([yahoNanJing](https://github.com/yahoNanJing)) +- SUPPORT SEMI/ANTI JOIN SQL syntax in DataFusion [\#4932](https://github.com/apache/datafusion/pull/4932) [[sql](https://github.com/apache/datafusion/labels/sql)] ([mingmwang](https://github.com/mingmwang)) +- Support gs:// as GCS schema [\#4930](https://github.com/apache/datafusion/pull/4930) ([jychen7](https://github.com/jychen7)) +- Upgrade object_store from 0.5.0 to 0.5.3 [\#4929](https://github.com/apache/datafusion/pull/4929) ([jychen7](https://github.com/jychen7)) +- Reduce redundancy in sort_enforcement tests [\#4928](https://github.com/apache/datafusion/pull/4928) ([alamb](https://github.com/alamb)) +- Update to arrow 31 [\#4927](https://github.com/apache/datafusion/pull/4927) [[sql](https://github.com/apache/datafusion/labels/sql)] ([tustvold](https://github.com/tustvold)) +- Unify Row hash and hash implementation [\#4924](https://github.com/apache/datafusion/pull/4924) ([mustafasrepo](https://github.com/mustafasrepo)) +- Support join-filter pushdown for semi/anti join [\#4923](https://github.com/apache/datafusion/pull/4923) ([ygf11](https://github.com/ygf11)) +- Minor add ticket link to broken test [\#4919](https://github.com/apache/datafusion/pull/4919) ([alamb](https://github.com/alamb)) +- Improve documentation for ExprVisitor, port simple uses to new walking function [\#4916](https://github.com/apache/datafusion/pull/4916) ([alamb](https://github.com/alamb)) +- Add substrait label to PRs [\#4915](https://github.com/apache/datafusion/pull/4915) ([alamb](https://github.com/alamb)) +- Executing ProjectionExec with no column should not return an Err [\#4912](https://github.com/apache/datafusion/pull/4912) ([viirya](https://github.com/viirya)) +- Refactor: `Add LogicalPlan::observe_expressions` to walk expressions [\#4906](https://github.com/apache/datafusion/pull/4906) ([alamb](https://github.com/alamb)) +- Minor: Port information schema tests to sqllogictest [\#4905](https://github.com/apache/datafusion/pull/4905) ([alamb](https://github.com/alamb)) +- Add insert/update/delete to LogicalPlan and add SQL planner support [\#4902](https://github.com/apache/datafusion/pull/4902) [[sql](https://github.com/apache/datafusion/labels/sql)] ([avantgardnerio](https://github.com/avantgardnerio)) +- fix: Visit subqueries in `Expr::Alias` [\#4900](https://github.com/apache/datafusion/pull/4900) ([askoa](https://github.com/askoa)) +- \[Substrait\] Change API to return LogicalPlan instead of DataFrame [\#4896](https://github.com/apache/datafusion/pull/4896) ([andygrove](https://github.com/andygrove)) +- Upgrade to substrait 0.3 [\#4895](https://github.com/apache/datafusion/pull/4895) ([andygrove](https://github.com/andygrove)) +- Add datafusion-substrait crate to workspace [\#4893](https://github.com/apache/datafusion/pull/4893) ([andygrove](https://github.com/andygrove)) +- refactor and add simple function to deserialize and serialize proto b… [\#4892](https://github.com/apache/datafusion/pull/4892) ([jdye64](https://github.com/jdye64)) +- Update `optimize_children` to return `Result>` [\#4888](https://github.com/apache/datafusion/pull/4888) ([HaoYang670](https://github.com/HaoYang670)) +- Do not repartition inputs whose sort order is required [\#4885](https://github.com/apache/datafusion/pull/4885) ([alamb](https://github.com/alamb)) +- Minor: Add docstrings to UnionExec [\#4884](https://github.com/apache/datafusion/pull/4884) ([alamb](https://github.com/alamb)) +- Update datafusion-substrait crate to build against repo version of DataFusion [\#4879](https://github.com/apache/datafusion/pull/4879) ([andygrove](https://github.com/andygrove)) +- Fix column indices in EnforceDistribution optimizer in Partial AggregateMode [\#4878](https://github.com/apache/datafusion/pull/4878) ([jonmmease](https://github.com/jonmmease)) +- refactor: improve repartition buffering [\#4867](https://github.com/apache/datafusion/pull/4867) ([crepererum](https://github.com/crepererum)) +- Rewrite coerce_plan_expr_for_schema to fix union type coercion [\#4862](https://github.com/apache/datafusion/pull/4862) ([ygf11](https://github.com/ygf11)) +- \(\#4462\) Postgres compatibility tests using sqllogictest [\#4834](https://github.com/apache/datafusion/pull/4834) ([melgenek](https://github.com/melgenek)) +- Support non-tuple expression for in-subquery to join [\#4826](https://github.com/apache/datafusion/pull/4826) ([ygf11](https://github.com/ygf11)) +- Update to arrow `30.0.1` [\#4818](https://github.com/apache/datafusion/pull/4818) [[sql](https://github.com/apache/datafusion/labels/sql)] ([tustvold](https://github.com/tustvold)) +- Refine the statistics estimation for the limit and aggregate operator [\#4716](https://github.com/apache/datafusion/pull/4716) ([yahoNanJing](https://github.com/yahoNanJing)) +- Infer prepared statement parameter types [\#4701](https://github.com/apache/datafusion/pull/4701) [[sql](https://github.com/apache/datafusion/labels/sql)] ([avantgardnerio](https://github.com/avantgardnerio)) +- Add datafusion-substrait crate [\#4543](https://github.com/apache/datafusion/pull/4543) ([andygrove](https://github.com/andygrove)) +- Refactor loser tree code in SortPreservingMerge per PR comments [\#4407](https://github.com/apache/datafusion/pull/4407) ([alamb](https://github.com/alamb)) diff --git a/dev/changelog/18.0.0.md b/dev/changelog/18.0.0.md index f51bb947e781..f72e61467ec5 100644 --- a/dev/changelog/18.0.0.md +++ b/dev/changelog/18.0.0.md @@ -17,135 +17,135 @@ under the License. --> -## [18.0.0](https://github.com/apache/arrow-datafusion/tree/18.0.0) (2023-02-10) +## [18.0.0](https://github.com/apache/datafusion/tree/18.0.0) (2023-02-10) -[Full Changelog](https://github.com/apache/arrow-datafusion/compare/17.0.0...18.0.0) +[Full Changelog](https://github.com/apache/datafusion/compare/17.0.0...18.0.0) **Breaking changes:** -- Use DataFusionError instead of ArrowError in SendableRecordBatchStream [\#5101](https://github.com/apache/arrow-datafusion/pull/5101) ([comphead](https://github.com/comphead)) -- Update to arrow 32 and Switch to RawDecoder for JSON [\#5056](https://github.com/apache/arrow-datafusion/pull/5056) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([tustvold](https://github.com/tustvold)) +- Use DataFusionError instead of ArrowError in SendableRecordBatchStream [\#5101](https://github.com/apache/datafusion/pull/5101) ([comphead](https://github.com/comphead)) +- Update to arrow 32 and Switch to RawDecoder for JSON [\#5056](https://github.com/apache/datafusion/pull/5056) [[sql](https://github.com/apache/datafusion/labels/sql)] ([tustvold](https://github.com/tustvold)) **Implemented enhancements:** -- DiskManager to create a spill folder if doesn't exist [\#5186](https://github.com/apache/arrow-datafusion/issues/5186) -- cast expression may cause duplicate column name error [\#5174](https://github.com/apache/arrow-datafusion/issues/5174) -- Add type coercion from Dictionary to string for regular expressions [\#5154](https://github.com/apache/arrow-datafusion/issues/5154) -- Unnecessary `Filter` on Parquet datasources [\#5149](https://github.com/apache/arrow-datafusion/issues/5149) -- \[sqllogictest\] Support `pg_typeof` for Postgres compatibility tests [\#5147](https://github.com/apache/arrow-datafusion/issues/5147) -- Supporting Grafana global variables [\#5144](https://github.com/apache/arrow-datafusion/issues/5144) -- add example for standalone DataFusion server which supports Arrow Flight SQL JDBC driver [\#5139](https://github.com/apache/arrow-datafusion/issues/5139) -- Support for InList in datafusion-substrait [\#5134](https://github.com/apache/arrow-datafusion/issues/5134) -- Pipeline file opening in `FileStream` [\#5129](https://github.com/apache/arrow-datafusion/issues/5129) -- Make `parse_physical_expr` public [\#5107](https://github.com/apache/arrow-datafusion/issues/5107) -- Use DataFusionError in SendableRecordBatchStream [\#5039](https://github.com/apache/arrow-datafusion/issues/5039) -- Interval coercion:`date_bin('1 hour',...)` does not work but `date_bin(interval '1 hour', ...` does [\#4853](https://github.com/apache/arrow-datafusion/issues/4853) -- `Explain ` should not fail if meeting errors when optimizing the query [\#4766](https://github.com/apache/arrow-datafusion/issues/4766) -- Add option to determine whether to convert identifiers [\#4551](https://github.com/apache/arrow-datafusion/issues/4551) -- Replace `&Option` with `Option<&T>`. [\#4424](https://github.com/apache/arrow-datafusion/issues/4424) -- Error type in `RecordBatchStream` [\#4172](https://github.com/apache/arrow-datafusion/issues/4172) -- Support non-equi join \(e.g. `ON` clause\) in Dataframe API [\#1254](https://github.com/apache/arrow-datafusion/issues/1254) -- Allow ParquetExec to parallelize work based on row groups [\#137](https://github.com/apache/arrow-datafusion/issues/137) +- DiskManager to create a spill folder if doesn't exist [\#5186](https://github.com/apache/datafusion/issues/5186) +- cast expression may cause duplicate column name error [\#5174](https://github.com/apache/datafusion/issues/5174) +- Add type coercion from Dictionary to string for regular expressions [\#5154](https://github.com/apache/datafusion/issues/5154) +- Unnecessary `Filter` on Parquet datasources [\#5149](https://github.com/apache/datafusion/issues/5149) +- \[sqllogictest\] Support `pg_typeof` for Postgres compatibility tests [\#5147](https://github.com/apache/datafusion/issues/5147) +- Supporting Grafana global variables [\#5144](https://github.com/apache/datafusion/issues/5144) +- add example for standalone DataFusion server which supports Arrow Flight SQL JDBC driver [\#5139](https://github.com/apache/datafusion/issues/5139) +- Support for InList in datafusion-substrait [\#5134](https://github.com/apache/datafusion/issues/5134) +- Pipeline file opening in `FileStream` [\#5129](https://github.com/apache/datafusion/issues/5129) +- Make `parse_physical_expr` public [\#5107](https://github.com/apache/datafusion/issues/5107) +- Use DataFusionError in SendableRecordBatchStream [\#5039](https://github.com/apache/datafusion/issues/5039) +- Interval coercion:`date_bin('1 hour',...)` does not work but `date_bin(interval '1 hour', ...` does [\#4853](https://github.com/apache/datafusion/issues/4853) +- `Explain ` should not fail if meeting errors when optimizing the query [\#4766](https://github.com/apache/datafusion/issues/4766) +- Add option to determine whether to convert identifiers [\#4551](https://github.com/apache/datafusion/issues/4551) +- Replace `&Option` with `Option<&T>`. [\#4424](https://github.com/apache/datafusion/issues/4424) +- Error type in `RecordBatchStream` [\#4172](https://github.com/apache/datafusion/issues/4172) +- Support non-equi join \(e.g. `ON` clause\) in Dataframe API [\#1254](https://github.com/apache/datafusion/issues/1254) +- Allow ParquetExec to parallelize work based on row groups [\#137](https://github.com/apache/datafusion/issues/137) **Fixed bugs:** -- Confusing schema errors when using window partition [\#5229](https://github.com/apache/arrow-datafusion/issues/5229) -- Propagating empty_relation generate an illegal plan [\#5218](https://github.com/apache/arrow-datafusion/issues/5218) -- The test `in_list_types_struct_literal` fails when setting `skip_failed_rules` as `false` [\#5217](https://github.com/apache/arrow-datafusion/issues/5217) -- Placeholder values are not replaced in ScalarSubqueries [\#5215](https://github.com/apache/arrow-datafusion/issues/5215) -- Querying against delta lake table does not seem to work [\#5202](https://github.com/apache/arrow-datafusion/issues/5202) -- Arithmetic operation doesn't work with DictionaryArray [\#5193](https://github.com/apache/arrow-datafusion/issues/5193) -- simplify_expr\(\) invoke nullable\(\) exist bug [\#5191](https://github.com/apache/arrow-datafusion/issues/5191) -- CI is currently broken on git diff: Not a git repository [\#5180](https://github.com/apache/arrow-datafusion/issues/5180) -- `write_csv/json/parquet` isn't cancel safe [\#5178](https://github.com/apache/arrow-datafusion/issues/5178) -- no hyperlink to blaze-rs \[doc: README-"Use Cases"\] [\#5175](https://github.com/apache/arrow-datafusion/issues/5175) -- Arithmetic scalar operation doesn't work with DictionaryArray [\#5150](https://github.com/apache/arrow-datafusion/issues/5150) -- Sort operator disappear in physical_plan [\#5100](https://github.com/apache/arrow-datafusion/issues/5100) -- Window function error: InvalidArgumentError\("number of columns\(27\) must match number of fields\(35\) in schema" [\#5090](https://github.com/apache/arrow-datafusion/issues/5090) -- `INSERT` statements without target column list are not working [\#5078](https://github.com/apache/arrow-datafusion/issues/5078) -- fix file stream time scanning metrics bug [\#5019](https://github.com/apache/arrow-datafusion/issues/5019) -- Date before `1678` causes panic [\#4875](https://github.com/apache/arrow-datafusion/issues/4875) -- Can not ORDER BY an aliased group column [\#4854](https://github.com/apache/arrow-datafusion/issues/4854) -- The `filters` expressions in `TableScan` may contain fields not included in `schema`. [\#4793](https://github.com/apache/arrow-datafusion/issues/4793) -- Comparing a `Timestamp` to a `Date32` fails [\#4644](https://github.com/apache/arrow-datafusion/issues/4644) -- String --\> TableReference parsing does not properly handle `"` and `.` [\#4532](https://github.com/apache/arrow-datafusion/issues/4532) -- can't compare NULL type with NULL type [\#4335](https://github.com/apache/arrow-datafusion/issues/4335) -- Add ambiguous check when generate selection plan [\#4196](https://github.com/apache/arrow-datafusion/issues/4196) -- Internal error in CAST from Timestamp\[us\] [\#3922](https://github.com/apache/arrow-datafusion/issues/3922) -- Run median expr on parquet file column got error [\#3805](https://github.com/apache/arrow-datafusion/issues/3805) -- aliasing a field renders it missing in the order by clause [\#669](https://github.com/apache/arrow-datafusion/issues/669) -- Querying datetime data in DataFusion with an embedded timezone always fails [\#153](https://github.com/apache/arrow-datafusion/issues/153) +- Confusing schema errors when using window partition [\#5229](https://github.com/apache/datafusion/issues/5229) +- Propagating empty_relation generate an illegal plan [\#5218](https://github.com/apache/datafusion/issues/5218) +- The test `in_list_types_struct_literal` fails when setting `skip_failed_rules` as `false` [\#5217](https://github.com/apache/datafusion/issues/5217) +- Placeholder values are not replaced in ScalarSubqueries [\#5215](https://github.com/apache/datafusion/issues/5215) +- Querying against delta lake table does not seem to work [\#5202](https://github.com/apache/datafusion/issues/5202) +- Arithmetic operation doesn't work with DictionaryArray [\#5193](https://github.com/apache/datafusion/issues/5193) +- simplify_expr\(\) invoke nullable\(\) exist bug [\#5191](https://github.com/apache/datafusion/issues/5191) +- CI is currently broken on git diff: Not a git repository [\#5180](https://github.com/apache/datafusion/issues/5180) +- `write_csv/json/parquet` isn't cancel safe [\#5178](https://github.com/apache/datafusion/issues/5178) +- no hyperlink to blaze-rs \[doc: README-"Use Cases"\] [\#5175](https://github.com/apache/datafusion/issues/5175) +- Arithmetic scalar operation doesn't work with DictionaryArray [\#5150](https://github.com/apache/datafusion/issues/5150) +- Sort operator disappear in physical_plan [\#5100](https://github.com/apache/datafusion/issues/5100) +- Window function error: InvalidArgumentError\("number of columns\(27\) must match number of fields\(35\) in schema" [\#5090](https://github.com/apache/datafusion/issues/5090) +- `INSERT` statements without target column list are not working [\#5078](https://github.com/apache/datafusion/issues/5078) +- fix file stream time scanning metrics bug [\#5019](https://github.com/apache/datafusion/issues/5019) +- Date before `1678` causes panic [\#4875](https://github.com/apache/datafusion/issues/4875) +- Can not ORDER BY an aliased group column [\#4854](https://github.com/apache/datafusion/issues/4854) +- The `filters` expressions in `TableScan` may contain fields not included in `schema`. [\#4793](https://github.com/apache/datafusion/issues/4793) +- Comparing a `Timestamp` to a `Date32` fails [\#4644](https://github.com/apache/datafusion/issues/4644) +- String --\> TableReference parsing does not properly handle `"` and `.` [\#4532](https://github.com/apache/datafusion/issues/4532) +- can't compare NULL type with NULL type [\#4335](https://github.com/apache/datafusion/issues/4335) +- Add ambiguous check when generate selection plan [\#4196](https://github.com/apache/datafusion/issues/4196) +- Internal error in CAST from Timestamp\[us\] [\#3922](https://github.com/apache/datafusion/issues/3922) +- Run median expr on parquet file column got error [\#3805](https://github.com/apache/datafusion/issues/3805) +- aliasing a field renders it missing in the order by clause [\#669](https://github.com/apache/datafusion/issues/669) +- Querying datetime data in DataFusion with an embedded timezone always fails [\#153](https://github.com/apache/datafusion/issues/153) **Documentation updates:** -- Update README.md fix \[welcoming community\] links [\#5232](https://github.com/apache/arrow-datafusion/pull/5232) ([jiangzhx](https://github.com/jiangzhx)) -- Update README.md update blaze-rs link to https://github.com/blaze-init/blaze [\#5190](https://github.com/apache/arrow-datafusion/pull/5190) ([jiangzhx](https://github.com/jiangzhx)) -- Typo of greptimedb [\#5103](https://github.com/apache/arrow-datafusion/pull/5103) ([fengjiachun](https://github.com/fengjiachun)) -- chore: change `DataBend` to `Databend` [\#5096](https://github.com/apache/arrow-datafusion/pull/5096) ([xudong963](https://github.com/xudong963)) +- Update README.md fix \[welcoming community\] links [\#5232](https://github.com/apache/datafusion/pull/5232) ([jiangzhx](https://github.com/jiangzhx)) +- Update README.md update blaze-rs link to https://github.com/blaze-init/blaze [\#5190](https://github.com/apache/datafusion/pull/5190) ([jiangzhx](https://github.com/jiangzhx)) +- Typo of greptimedb [\#5103](https://github.com/apache/datafusion/pull/5103) ([fengjiachun](https://github.com/fengjiachun)) +- chore: change `DataBend` to `Databend` [\#5096](https://github.com/apache/datafusion/pull/5096) ([xudong963](https://github.com/xudong963)) **Closed issues:** -- Change coerced type for comparison between timestamp with date to timestamp [\#4761](https://github.com/apache/arrow-datafusion/issues/4761) +- Change coerced type for comparison between timestamp with date to timestamp [\#4761](https://github.com/apache/datafusion/issues/4761) **Merged pull requests:** -- fix: correct expected error in test [\#5224](https://github.com/apache/arrow-datafusion/pull/5224) ([jackwener](https://github.com/jackwener)) -- bugfix: fix propagating empty_relation generates an illegal plan [\#5219](https://github.com/apache/arrow-datafusion/pull/5219) ([yukkit](https://github.com/yukkit)) -- Replace placeholders in ScalarSubqueries [\#5216](https://github.com/apache/arrow-datafusion/pull/5216) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([avantgardnerio](https://github.com/avantgardnerio)) -- Dataframe join_on method [\#5210](https://github.com/apache/arrow-datafusion/pull/5210) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([Jefffrey](https://github.com/Jefffrey)) -- bugfix: fix eval `nullalbe()` in `simplify_exprs` [\#5208](https://github.com/apache/arrow-datafusion/pull/5208) ([jackwener](https://github.com/jackwener)) -- minor: remove unnecessary clone [\#5207](https://github.com/apache/arrow-datafusion/pull/5207) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- minor: extract `merge_schema()` function. [\#5203](https://github.com/apache/arrow-datafusion/pull/5203) ([jackwener](https://github.com/jackwener)) -- minor: remove unnecessary `continue` [\#5200](https://github.com/apache/arrow-datafusion/pull/5200) ([xiaoyong-z](https://github.com/xiaoyong-z)) -- fix\(MemTable\): make it cancel-safe and fix parallelism [\#5197](https://github.com/apache/arrow-datafusion/pull/5197) ([DDtKey](https://github.com/DDtKey)) -- fix: make `write_csv/json/parquet` cancel-safe [\#5196](https://github.com/apache/arrow-datafusion/pull/5196) ([DDtKey](https://github.com/DDtKey)) -- Support arithmetic operation on DictionaryArray [\#5194](https://github.com/apache/arrow-datafusion/pull/5194) ([viirya](https://github.com/viirya)) -- sqllogicaltest: add cleanup and use rowsort. [\#5189](https://github.com/apache/arrow-datafusion/pull/5189) ([jackwener](https://github.com/jackwener)) -- bugfix: fix `TableScan` may contain fields not included in `schema` [\#5188](https://github.com/apache/arrow-datafusion/pull/5188) ([jackwener](https://github.com/jackwener)) -- Create disk manager spill folder if doesn't exist [\#5185](https://github.com/apache/arrow-datafusion/pull/5185) ([comphead](https://github.com/comphead)) -- Parse identifiers properly for TableReferences [\#5183](https://github.com/apache/arrow-datafusion/pull/5183) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([Jefffrey](https://github.com/Jefffrey)) -- Fix decimal scalar dyn kernels [\#5179](https://github.com/apache/arrow-datafusion/pull/5179) ([viirya](https://github.com/viirya)) -- Patch git Safe Paths in CI [\#5177](https://github.com/apache/arrow-datafusion/pull/5177) ([tustvold](https://github.com/tustvold)) -- Add initial support for serializing physical plans with Substrait [\#5176](https://github.com/apache/arrow-datafusion/pull/5176) ([andygrove](https://github.com/andygrove)) -- Bump tokio from 1.24.1 to 1.24.2 in /datafusion-cli [\#5172](https://github.com/apache/arrow-datafusion/pull/5172) ([dependabot[bot]](https://github.com/apps/dependabot)) -- Make EnforceSorting global sort aware, fix sort mis-optimizations involving unions, support parallel sort + merge transformations [\#5171](https://github.com/apache/arrow-datafusion/pull/5171) ([mustafasrepo](https://github.com/mustafasrepo)) -- Update substrait README.md [\#5168](https://github.com/apache/arrow-datafusion/pull/5168) ([jiangzhx](https://github.com/jiangzhx)) -- Switch to use sum kernel from arrow-rs for Decimal128 [\#5167](https://github.com/apache/arrow-datafusion/pull/5167) ([sunchao](https://github.com/sunchao)) -- FileStream: Open next file in parallel while decoding [\#5161](https://github.com/apache/arrow-datafusion/pull/5161) ([thinkharderdev](https://github.com/thinkharderdev)) -- Fix FairSpillPool try_grow for non-spillable consumers [\#5160](https://github.com/apache/arrow-datafusion/pull/5160) ([tustvold](https://github.com/tustvold)) -- fix: treat unsupported SQL plans as "not implemented" [\#5159](https://github.com/apache/arrow-datafusion/pull/5159) ([crepererum](https://github.com/crepererum)) -- Compare NULL types [\#5158](https://github.com/apache/arrow-datafusion/pull/5158) ([melgenek](https://github.com/melgenek)) -- chore: add object_name_to_table_reference in SqlToRel [\#5155](https://github.com/apache/arrow-datafusion/pull/5155) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([jiacai2050](https://github.com/jiacai2050)) -- Ambiguity check for where selection [\#5153](https://github.com/apache/arrow-datafusion/pull/5153) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([Jefffrey](https://github.com/Jefffrey)) -- feat: Type coercion for Dictionary\(\_, \_\) to Utf8 for regex conditions [\#5152](https://github.com/apache/arrow-datafusion/pull/5152) ([stuartcarnie](https://github.com/stuartcarnie)) -- Support arithmetic scalar operation with DictionaryArray [\#5151](https://github.com/apache/arrow-datafusion/pull/5151) ([viirya](https://github.com/viirya)) -- \[sqllogictest\] Support `pg_typeof` [\#5148](https://github.com/apache/arrow-datafusion/pull/5148) ([melgenek](https://github.com/melgenek)) -- Date to Timestamp cast [\#5140](https://github.com/apache/arrow-datafusion/pull/5140) ([comphead](https://github.com/comphead)) -- add example for Flight SQL server that supports JDBC driver [\#5138](https://github.com/apache/arrow-datafusion/pull/5138) ([kmitchener](https://github.com/kmitchener)) -- Add in-list test [\#5135](https://github.com/apache/arrow-datafusion/pull/5135) ([nseekhao](https://github.com/nseekhao)) -- Bug fix: Empty Record Batch handling [\#5131](https://github.com/apache/arrow-datafusion/pull/5131) ([mustafasrepo](https://github.com/mustafasrepo)) -- Add option to control whether to normalize ident [\#5124](https://github.com/apache/arrow-datafusion/pull/5124) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([jiacai2050](https://github.com/jiacai2050)) -- Make `parse_physical_expr` public [\#5118](https://github.com/apache/arrow-datafusion/pull/5118) ([comphead](https://github.com/comphead)) -- Support coercing `utf8` to `interval` and `timestamp` \(including arguments to `date_bin`\) [\#5117](https://github.com/apache/arrow-datafusion/pull/5117) ([alamb](https://github.com/alamb)) -- Fix release issues [\#5116](https://github.com/apache/arrow-datafusion/pull/5116) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) -- minor: port date_bin tests to sqllogictests [\#5115](https://github.com/apache/arrow-datafusion/pull/5115) ([alamb](https://github.com/alamb)) -- Minor: reduce code duplication using `rewrite_expr` [\#5114](https://github.com/apache/arrow-datafusion/pull/5114) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) -- Replace &Option\ with Option\<&T\> [\#5113](https://github.com/apache/arrow-datafusion/pull/5113) ([gaoxinge](https://github.com/gaoxinge)) -- Improve `get_meet_of_orderings` to check for common prefixes [\#5111](https://github.com/apache/arrow-datafusion/pull/5111) ([ozankabak](https://github.com/ozankabak)) -- \[sqllogictest\] Apply rowsort when there is no explicit order by [\#5110](https://github.com/apache/arrow-datafusion/pull/5110) ([melgenek](https://github.com/melgenek)) -- Add unnest_column to DataFrame [\#5106](https://github.com/apache/arrow-datafusion/pull/5106) ([vincev](https://github.com/vincev)) -- Minor: reduce indent level in page filter pruning code [\#5105](https://github.com/apache/arrow-datafusion/pull/5105) ([alamb](https://github.com/alamb)) -- Replace &Option\ with Option\<&T\> [\#5102](https://github.com/apache/arrow-datafusion/pull/5102) ([gaoxinge](https://github.com/gaoxinge)) -- Minor: remove unused methods in datafusion/optimizer/src/utils.rs [\#5098](https://github.com/apache/arrow-datafusion/pull/5098) ([ygf11](https://github.com/ygf11)) -- ci: don't trigger rust ci for doc changes [\#5097](https://github.com/apache/arrow-datafusion/pull/5097) ([xudong963](https://github.com/xudong963)) -- sqllogicaltest: fix unstable slt case. [\#5095](https://github.com/apache/arrow-datafusion/pull/5095) ([jackwener](https://github.com/jackwener)) -- chore: update cranelift-module [\#5094](https://github.com/apache/arrow-datafusion/pull/5094) ([jackwener](https://github.com/jackwener)) -- refactor: Add `rewrite_expr` convenience method for rewriting `Expr`s [\#5092](https://github.com/apache/arrow-datafusion/pull/5092) ([alamb](https://github.com/alamb)) -- Minor: extract sort col rewrite into its own module, add unit tests [\#5088](https://github.com/apache/arrow-datafusion/pull/5088) ([alamb](https://github.com/alamb)) -- \[sqllogictest\] Move `decimal.rs` tests [\#5086](https://github.com/apache/arrow-datafusion/pull/5086) ([melgenek](https://github.com/melgenek)) -- Insert target columns empty fix [\#5079](https://github.com/apache/arrow-datafusion/pull/5079) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([gruuya](https://github.com/gruuya)) -- sqllogicaltest: move union.rs [\#5075](https://github.com/apache/arrow-datafusion/pull/5075) ([jackwener](https://github.com/jackwener)) -- Support ORDER BY an aliased column [\#5067](https://github.com/apache/arrow-datafusion/pull/5067) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) -- Parquet parallel scan [\#5057](https://github.com/apache/arrow-datafusion/pull/5057) ([korowa](https://github.com/korowa)) -- \[BugFix\] fix file stream time scanning metrics bug [\#5020](https://github.com/apache/arrow-datafusion/pull/5020) ([xiaoyong-z](https://github.com/xiaoyong-z)) -- Show optimization errors in explain [\#4819](https://github.com/apache/arrow-datafusion/pull/4819) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([Jefffrey](https://github.com/Jefffrey)) +- fix: correct expected error in test [\#5224](https://github.com/apache/datafusion/pull/5224) ([jackwener](https://github.com/jackwener)) +- bugfix: fix propagating empty_relation generates an illegal plan [\#5219](https://github.com/apache/datafusion/pull/5219) ([yukkit](https://github.com/yukkit)) +- Replace placeholders in ScalarSubqueries [\#5216](https://github.com/apache/datafusion/pull/5216) [[sql](https://github.com/apache/datafusion/labels/sql)] ([avantgardnerio](https://github.com/avantgardnerio)) +- Dataframe join_on method [\#5210](https://github.com/apache/datafusion/pull/5210) [[sql](https://github.com/apache/datafusion/labels/sql)] ([Jefffrey](https://github.com/Jefffrey)) +- bugfix: fix eval `nullalbe()` in `simplify_exprs` [\#5208](https://github.com/apache/datafusion/pull/5208) ([jackwener](https://github.com/jackwener)) +- minor: remove unnecessary clone [\#5207](https://github.com/apache/datafusion/pull/5207) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- minor: extract `merge_schema()` function. [\#5203](https://github.com/apache/datafusion/pull/5203) ([jackwener](https://github.com/jackwener)) +- minor: remove unnecessary `continue` [\#5200](https://github.com/apache/datafusion/pull/5200) ([xiaoyong-z](https://github.com/xiaoyong-z)) +- fix\(MemTable\): make it cancel-safe and fix parallelism [\#5197](https://github.com/apache/datafusion/pull/5197) ([DDtKey](https://github.com/DDtKey)) +- fix: make `write_csv/json/parquet` cancel-safe [\#5196](https://github.com/apache/datafusion/pull/5196) ([DDtKey](https://github.com/DDtKey)) +- Support arithmetic operation on DictionaryArray [\#5194](https://github.com/apache/datafusion/pull/5194) ([viirya](https://github.com/viirya)) +- sqllogicaltest: add cleanup and use rowsort. [\#5189](https://github.com/apache/datafusion/pull/5189) ([jackwener](https://github.com/jackwener)) +- bugfix: fix `TableScan` may contain fields not included in `schema` [\#5188](https://github.com/apache/datafusion/pull/5188) ([jackwener](https://github.com/jackwener)) +- Create disk manager spill folder if doesn't exist [\#5185](https://github.com/apache/datafusion/pull/5185) ([comphead](https://github.com/comphead)) +- Parse identifiers properly for TableReferences [\#5183](https://github.com/apache/datafusion/pull/5183) [[sql](https://github.com/apache/datafusion/labels/sql)] ([Jefffrey](https://github.com/Jefffrey)) +- Fix decimal scalar dyn kernels [\#5179](https://github.com/apache/datafusion/pull/5179) ([viirya](https://github.com/viirya)) +- Patch git Safe Paths in CI [\#5177](https://github.com/apache/datafusion/pull/5177) ([tustvold](https://github.com/tustvold)) +- Add initial support for serializing physical plans with Substrait [\#5176](https://github.com/apache/datafusion/pull/5176) ([andygrove](https://github.com/andygrove)) +- Bump tokio from 1.24.1 to 1.24.2 in /datafusion-cli [\#5172](https://github.com/apache/datafusion/pull/5172) ([dependabot[bot]](https://github.com/apps/dependabot)) +- Make EnforceSorting global sort aware, fix sort mis-optimizations involving unions, support parallel sort + merge transformations [\#5171](https://github.com/apache/datafusion/pull/5171) ([mustafasrepo](https://github.com/mustafasrepo)) +- Update substrait README.md [\#5168](https://github.com/apache/datafusion/pull/5168) ([jiangzhx](https://github.com/jiangzhx)) +- Switch to use sum kernel from arrow-rs for Decimal128 [\#5167](https://github.com/apache/datafusion/pull/5167) ([sunchao](https://github.com/sunchao)) +- FileStream: Open next file in parallel while decoding [\#5161](https://github.com/apache/datafusion/pull/5161) ([thinkharderdev](https://github.com/thinkharderdev)) +- Fix FairSpillPool try_grow for non-spillable consumers [\#5160](https://github.com/apache/datafusion/pull/5160) ([tustvold](https://github.com/tustvold)) +- fix: treat unsupported SQL plans as "not implemented" [\#5159](https://github.com/apache/datafusion/pull/5159) ([crepererum](https://github.com/crepererum)) +- Compare NULL types [\#5158](https://github.com/apache/datafusion/pull/5158) ([melgenek](https://github.com/melgenek)) +- chore: add object_name_to_table_reference in SqlToRel [\#5155](https://github.com/apache/datafusion/pull/5155) [[sql](https://github.com/apache/datafusion/labels/sql)] ([jiacai2050](https://github.com/jiacai2050)) +- Ambiguity check for where selection [\#5153](https://github.com/apache/datafusion/pull/5153) [[sql](https://github.com/apache/datafusion/labels/sql)] ([Jefffrey](https://github.com/Jefffrey)) +- feat: Type coercion for Dictionary\(\_, \_\) to Utf8 for regex conditions [\#5152](https://github.com/apache/datafusion/pull/5152) ([stuartcarnie](https://github.com/stuartcarnie)) +- Support arithmetic scalar operation with DictionaryArray [\#5151](https://github.com/apache/datafusion/pull/5151) ([viirya](https://github.com/viirya)) +- \[sqllogictest\] Support `pg_typeof` [\#5148](https://github.com/apache/datafusion/pull/5148) ([melgenek](https://github.com/melgenek)) +- Date to Timestamp cast [\#5140](https://github.com/apache/datafusion/pull/5140) ([comphead](https://github.com/comphead)) +- add example for Flight SQL server that supports JDBC driver [\#5138](https://github.com/apache/datafusion/pull/5138) ([kmitchener](https://github.com/kmitchener)) +- Add in-list test [\#5135](https://github.com/apache/datafusion/pull/5135) ([nseekhao](https://github.com/nseekhao)) +- Bug fix: Empty Record Batch handling [\#5131](https://github.com/apache/datafusion/pull/5131) ([mustafasrepo](https://github.com/mustafasrepo)) +- Add option to control whether to normalize ident [\#5124](https://github.com/apache/datafusion/pull/5124) [[sql](https://github.com/apache/datafusion/labels/sql)] ([jiacai2050](https://github.com/jiacai2050)) +- Make `parse_physical_expr` public [\#5118](https://github.com/apache/datafusion/pull/5118) ([comphead](https://github.com/comphead)) +- Support coercing `utf8` to `interval` and `timestamp` \(including arguments to `date_bin`\) [\#5117](https://github.com/apache/datafusion/pull/5117) ([alamb](https://github.com/alamb)) +- Fix release issues [\#5116](https://github.com/apache/datafusion/pull/5116) [[sql](https://github.com/apache/datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) +- minor: port date_bin tests to sqllogictests [\#5115](https://github.com/apache/datafusion/pull/5115) ([alamb](https://github.com/alamb)) +- Minor: reduce code duplication using `rewrite_expr` [\#5114](https://github.com/apache/datafusion/pull/5114) [[sql](https://github.com/apache/datafusion/labels/sql)] ([alamb](https://github.com/alamb)) +- Replace &Option\ with Option\<&T\> [\#5113](https://github.com/apache/datafusion/pull/5113) ([gaoxinge](https://github.com/gaoxinge)) +- Improve `get_meet_of_orderings` to check for common prefixes [\#5111](https://github.com/apache/datafusion/pull/5111) ([ozankabak](https://github.com/ozankabak)) +- \[sqllogictest\] Apply rowsort when there is no explicit order by [\#5110](https://github.com/apache/datafusion/pull/5110) ([melgenek](https://github.com/melgenek)) +- Add unnest_column to DataFrame [\#5106](https://github.com/apache/datafusion/pull/5106) ([vincev](https://github.com/vincev)) +- Minor: reduce indent level in page filter pruning code [\#5105](https://github.com/apache/datafusion/pull/5105) ([alamb](https://github.com/alamb)) +- Replace &Option\ with Option\<&T\> [\#5102](https://github.com/apache/datafusion/pull/5102) ([gaoxinge](https://github.com/gaoxinge)) +- Minor: remove unused methods in datafusion/optimizer/src/utils.rs [\#5098](https://github.com/apache/datafusion/pull/5098) ([ygf11](https://github.com/ygf11)) +- ci: don't trigger rust ci for doc changes [\#5097](https://github.com/apache/datafusion/pull/5097) ([xudong963](https://github.com/xudong963)) +- sqllogicaltest: fix unstable slt case. [\#5095](https://github.com/apache/datafusion/pull/5095) ([jackwener](https://github.com/jackwener)) +- chore: update cranelift-module [\#5094](https://github.com/apache/datafusion/pull/5094) ([jackwener](https://github.com/jackwener)) +- refactor: Add `rewrite_expr` convenience method for rewriting `Expr`s [\#5092](https://github.com/apache/datafusion/pull/5092) ([alamb](https://github.com/alamb)) +- Minor: extract sort col rewrite into its own module, add unit tests [\#5088](https://github.com/apache/datafusion/pull/5088) ([alamb](https://github.com/alamb)) +- \[sqllogictest\] Move `decimal.rs` tests [\#5086](https://github.com/apache/datafusion/pull/5086) ([melgenek](https://github.com/melgenek)) +- Insert target columns empty fix [\#5079](https://github.com/apache/datafusion/pull/5079) [[sql](https://github.com/apache/datafusion/labels/sql)] ([gruuya](https://github.com/gruuya)) +- sqllogicaltest: move union.rs [\#5075](https://github.com/apache/datafusion/pull/5075) ([jackwener](https://github.com/jackwener)) +- Support ORDER BY an aliased column [\#5067](https://github.com/apache/datafusion/pull/5067) [[sql](https://github.com/apache/datafusion/labels/sql)] ([alamb](https://github.com/alamb)) +- Parquet parallel scan [\#5057](https://github.com/apache/datafusion/pull/5057) ([korowa](https://github.com/korowa)) +- \[BugFix\] fix file stream time scanning metrics bug [\#5020](https://github.com/apache/datafusion/pull/5020) ([xiaoyong-z](https://github.com/xiaoyong-z)) +- Show optimization errors in explain [\#4819](https://github.com/apache/datafusion/pull/4819) [[sql](https://github.com/apache/datafusion/labels/sql)] ([Jefffrey](https://github.com/Jefffrey)) diff --git a/dev/changelog/19.0.0.md b/dev/changelog/19.0.0.md index 6e4abcf8ffba..8672f6d48d4e 100644 --- a/dev/changelog/19.0.0.md +++ b/dev/changelog/19.0.0.md @@ -17,189 +17,189 @@ under the License. --> -## [19.0.0](https://github.com/apache/arrow-datafusion/tree/19.0.0) (2023-02-24) +## [19.0.0](https://github.com/apache/datafusion/tree/19.0.0) (2023-02-24) -[Full Changelog](https://github.com/apache/arrow-datafusion/compare/18.0.0...19.0.0) +[Full Changelog](https://github.com/apache/datafusion/compare/18.0.0...19.0.0) **Breaking changes:** -- Use DataFusionError instead of ArrowError in SendableRecordBatchStream [\#5101](https://github.com/apache/arrow-datafusion/pull/5101) ([comphead](https://github.com/comphead)) -- Update to arrow 32 and Switch to RawDecoder for JSON [\#5056](https://github.com/apache/arrow-datafusion/pull/5056) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([tustvold](https://github.com/tustvold)) -- Allow `SessionContext::read_csv`, etc to read multiple files [\#4908](https://github.com/apache/arrow-datafusion/pull/4908) ([saikrishna1-bidgely](https://github.com/saikrishna1-bidgely)) +- Use DataFusionError instead of ArrowError in SendableRecordBatchStream [\#5101](https://github.com/apache/datafusion/pull/5101) ([comphead](https://github.com/comphead)) +- Update to arrow 32 and Switch to RawDecoder for JSON [\#5056](https://github.com/apache/datafusion/pull/5056) [[sql](https://github.com/apache/datafusion/labels/sql)] ([tustvold](https://github.com/tustvold)) +- Allow `SessionContext::read_csv`, etc to read multiple files [\#4908](https://github.com/apache/datafusion/pull/4908) ([saikrishna1-bidgely](https://github.com/saikrishna1-bidgely)) **Implemented enhancements:** -- Ignore Arrow in dependabot [\#5340](https://github.com/apache/arrow-datafusion/issues/5340) -- Provide access to internal fields of SessionContext [\#5317](https://github.com/apache/arrow-datafusion/issues/5317) -- Investigate performance drop for DISTINCT queries [\#5313](https://github.com/apache/arrow-datafusion/issues/5313) -- \[DOC\] Update math expression documentation [\#5312](https://github.com/apache/arrow-datafusion/issues/5312) -- Replace merge_batches with concat_batches [\#5297](https://github.com/apache/arrow-datafusion/issues/5297) -- Support for some of the window frame range queries [\#5275](https://github.com/apache/arrow-datafusion/issues/5275) -- Make `log` function to be in sync with PostgresSql [\#5259](https://github.com/apache/arrow-datafusion/issues/5259) -- \[SQLLogicTest\] Make schema validation ignore nullable and metadata attributes [\#5231](https://github.com/apache/arrow-datafusion/issues/5231) -- Add support for linear groups search [\#5213](https://github.com/apache/arrow-datafusion/issues/5213) -- Add SQL function overload `LOG(base, x)` for logarithm of x to base [\#5206](https://github.com/apache/arrow-datafusion/issues/5206) -- `all_schema()` will get schema of child of child of .... [\#5192](https://github.com/apache/arrow-datafusion/issues/5192) -- Enable parquet parallel scans by default [\#5125](https://github.com/apache/arrow-datafusion/issues/5125) -- Don't repartition ProjectionExec when it does not compute anything [\#4968](https://github.com/apache/arrow-datafusion/issues/4968) -- Support non-tuple expression for Exists Subquery to Join [\#4934](https://github.com/apache/arrow-datafusion/issues/4934) -- Read multiple files/folders using `read_csv` [\#4909](https://github.com/apache/arrow-datafusion/issues/4909) +- Ignore Arrow in dependabot [\#5340](https://github.com/apache/datafusion/issues/5340) +- Provide access to internal fields of SessionContext [\#5317](https://github.com/apache/datafusion/issues/5317) +- Investigate performance drop for DISTINCT queries [\#5313](https://github.com/apache/datafusion/issues/5313) +- \[DOC\] Update math expression documentation [\#5312](https://github.com/apache/datafusion/issues/5312) +- Replace merge_batches with concat_batches [\#5297](https://github.com/apache/datafusion/issues/5297) +- Support for some of the window frame range queries [\#5275](https://github.com/apache/datafusion/issues/5275) +- Make `log` function to be in sync with PostgresSql [\#5259](https://github.com/apache/datafusion/issues/5259) +- \[SQLLogicTest\] Make schema validation ignore nullable and metadata attributes [\#5231](https://github.com/apache/datafusion/issues/5231) +- Add support for linear groups search [\#5213](https://github.com/apache/datafusion/issues/5213) +- Add SQL function overload `LOG(base, x)` for logarithm of x to base [\#5206](https://github.com/apache/datafusion/issues/5206) +- `all_schema()` will get schema of child of child of .... [\#5192](https://github.com/apache/datafusion/issues/5192) +- Enable parquet parallel scans by default [\#5125](https://github.com/apache/datafusion/issues/5125) +- Don't repartition ProjectionExec when it does not compute anything [\#4968](https://github.com/apache/datafusion/issues/4968) +- Support non-tuple expression for Exists Subquery to Join [\#4934](https://github.com/apache/datafusion/issues/4934) +- Read multiple files/folders using `read_csv` [\#4909](https://github.com/apache/datafusion/issues/4909) **Fixed bugs:** -- Make inline_table_scan optimize whole plan during first optimization stage. [\#5364](https://github.com/apache/arrow-datafusion/issues/5364) -- tpcds_logical_q8 ambiguous name. [\#5334](https://github.com/apache/arrow-datafusion/issues/5334) -- Protobuf serialisation is missing for GetIndexedFieldExpr [\#5323](https://github.com/apache/arrow-datafusion/issues/5323) -- Indexing a nested list with 0 or an index larger than list size is not handled correctly [\#5310](https://github.com/apache/arrow-datafusion/issues/5310) -- Protobuf serialization drops `preserve_partitioning` from `SortExec` [\#5305](https://github.com/apache/arrow-datafusion/issues/5305) -- data file without suffix can't be read correctly [\#5301](https://github.com/apache/arrow-datafusion/issues/5301) -- Idk [\#5298](https://github.com/apache/arrow-datafusion/issues/5298) -- Error with query that has DISTINCT with ORDER BY and aliased select list [\#5293](https://github.com/apache/arrow-datafusion/issues/5293) -- Optimizer prunes UnnestExec on aggregate count [\#5281](https://github.com/apache/arrow-datafusion/issues/5281) -- Strange Behaviour on RepartitionExec with CoalescePartitionsExec. [\#5278](https://github.com/apache/arrow-datafusion/issues/5278) -- Error "For SELECT DISTINCT, ORDER BY expressions id must appear in select list" may be over eager [\#5255](https://github.com/apache/arrow-datafusion/issues/5255) -- SQL allows SORT BY keyword [\#5247](https://github.com/apache/arrow-datafusion/issues/5247) -- test `sort_on_window_null_string` failed after disable `skip_fail`. [\#5233](https://github.com/apache/arrow-datafusion/issues/5233) -- Dataframe API adds ?table? qualifier [\#5187](https://github.com/apache/arrow-datafusion/issues/5187) -- Re-ordering Projections in scan are not working anymore \(since DF15\) [\#5146](https://github.com/apache/arrow-datafusion/issues/5146) -- parquet page level skipping \(page index pruning\) doesn't work with evolved schemas [\#5104](https://github.com/apache/arrow-datafusion/issues/5104) -- Incorrect results on queries with `distinct` and orderby [\#5065](https://github.com/apache/arrow-datafusion/issues/5065) -- NestedLoopJoin will panic when right child contains RepartitionExec [\#5022](https://github.com/apache/arrow-datafusion/issues/5022) -- JSON projection only work when the index is in ascending order [\#4832](https://github.com/apache/arrow-datafusion/issues/4832) -- Stack overflows when planning tpcds 22 in debug mode [\#4786](https://github.com/apache/arrow-datafusion/issues/4786) -- Failed to create Left anti join physical plan due to SchemaError::FieldNotFound [\#4366](https://github.com/apache/arrow-datafusion/issues/4366) -- Filters/limit are not pushdown druing optimalization for table with alias [\#2270](https://github.com/apache/arrow-datafusion/issues/2270) +- Make inline_table_scan optimize whole plan during first optimization stage. [\#5364](https://github.com/apache/datafusion/issues/5364) +- tpcds_logical_q8 ambiguous name. [\#5334](https://github.com/apache/datafusion/issues/5334) +- Protobuf serialisation is missing for GetIndexedFieldExpr [\#5323](https://github.com/apache/datafusion/issues/5323) +- Indexing a nested list with 0 or an index larger than list size is not handled correctly [\#5310](https://github.com/apache/datafusion/issues/5310) +- Protobuf serialization drops `preserve_partitioning` from `SortExec` [\#5305](https://github.com/apache/datafusion/issues/5305) +- data file without suffix can't be read correctly [\#5301](https://github.com/apache/datafusion/issues/5301) +- Idk [\#5298](https://github.com/apache/datafusion/issues/5298) +- Error with query that has DISTINCT with ORDER BY and aliased select list [\#5293](https://github.com/apache/datafusion/issues/5293) +- Optimizer prunes UnnestExec on aggregate count [\#5281](https://github.com/apache/datafusion/issues/5281) +- Strange Behaviour on RepartitionExec with CoalescePartitionsExec. [\#5278](https://github.com/apache/datafusion/issues/5278) +- Error "For SELECT DISTINCT, ORDER BY expressions id must appear in select list" may be over eager [\#5255](https://github.com/apache/datafusion/issues/5255) +- SQL allows SORT BY keyword [\#5247](https://github.com/apache/datafusion/issues/5247) +- test `sort_on_window_null_string` failed after disable `skip_fail`. [\#5233](https://github.com/apache/datafusion/issues/5233) +- Dataframe API adds ?table? qualifier [\#5187](https://github.com/apache/datafusion/issues/5187) +- Re-ordering Projections in scan are not working anymore \(since DF15\) [\#5146](https://github.com/apache/datafusion/issues/5146) +- parquet page level skipping \(page index pruning\) doesn't work with evolved schemas [\#5104](https://github.com/apache/datafusion/issues/5104) +- Incorrect results on queries with `distinct` and orderby [\#5065](https://github.com/apache/datafusion/issues/5065) +- NestedLoopJoin will panic when right child contains RepartitionExec [\#5022](https://github.com/apache/datafusion/issues/5022) +- JSON projection only work when the index is in ascending order [\#4832](https://github.com/apache/datafusion/issues/4832) +- Stack overflows when planning tpcds 22 in debug mode [\#4786](https://github.com/apache/datafusion/issues/4786) +- Failed to create Left anti join physical plan due to SchemaError::FieldNotFound [\#4366](https://github.com/apache/datafusion/issues/4366) +- Filters/limit are not pushdown druing optimalization for table with alias [\#2270](https://github.com/apache/datafusion/issues/2270) **Documentation updates:** -- Update README.md fix \[welcoming community\] links [\#5232](https://github.com/apache/arrow-datafusion/pull/5232) ([jiangzhx](https://github.com/jiangzhx)) -- Update README.md update blaze-rs link to https://github.com/blaze-init/blaze [\#5190](https://github.com/apache/arrow-datafusion/pull/5190) ([jiangzhx](https://github.com/jiangzhx)) -- Typo of greptimedb [\#5103](https://github.com/apache/arrow-datafusion/pull/5103) ([fengjiachun](https://github.com/fengjiachun)) -- chore: change `DataBend` to `Databend` [\#5096](https://github.com/apache/arrow-datafusion/pull/5096) ([xudong963](https://github.com/xudong963)) +- Update README.md fix \[welcoming community\] links [\#5232](https://github.com/apache/datafusion/pull/5232) ([jiangzhx](https://github.com/jiangzhx)) +- Update README.md update blaze-rs link to https://github.com/blaze-init/blaze [\#5190](https://github.com/apache/datafusion/pull/5190) ([jiangzhx](https://github.com/jiangzhx)) +- Typo of greptimedb [\#5103](https://github.com/apache/datafusion/pull/5103) ([fengjiachun](https://github.com/fengjiachun)) +- chore: change `DataBend` to `Databend` [\#5096](https://github.com/apache/datafusion/pull/5096) ([xudong963](https://github.com/xudong963)) **Closed issues:** -- Implement column number / column type verification for sqllogictest [\#4499](https://github.com/apache/arrow-datafusion/issues/4499) +- Implement column number / column type verification for sqllogictest [\#4499](https://github.com/apache/datafusion/issues/4499) **Merged pull requests:** -- generate new projection plan in inline_table_scan instead of discarding [\#5371](https://github.com/apache/arrow-datafusion/pull/5371) ([jackwener](https://github.com/jackwener)) -- minor: fix rule name and comment. [\#5370](https://github.com/apache/arrow-datafusion/pull/5370) ([jackwener](https://github.com/jackwener)) -- minor: port limit tests to sqllogictests [\#5355](https://github.com/apache/arrow-datafusion/pull/5355) ([jackwener](https://github.com/jackwener)) -- feat: add rule to merge projection. [\#5349](https://github.com/apache/arrow-datafusion/pull/5349) ([jackwener](https://github.com/jackwener)) -- Ignore Arrow in dependabot [\#5341](https://github.com/apache/arrow-datafusion/pull/5341) ([iajoiner](https://github.com/iajoiner)) -- minor: remove useless `.get()` [\#5336](https://github.com/apache/arrow-datafusion/pull/5336) ([jackwener](https://github.com/jackwener)) -- bugfix: fix tpcds_logical_q8 ambiguous name. [\#5335](https://github.com/apache/arrow-datafusion/pull/5335) ([jackwener](https://github.com/jackwener)) -- minor: disable tpcds_logical_q10/q35 [\#5333](https://github.com/apache/arrow-datafusion/pull/5333) ([jackwener](https://github.com/jackwener)) -- minor: port intersection sql tests to sqllogictests [\#5331](https://github.com/apache/arrow-datafusion/pull/5331) ([alamb](https://github.com/alamb)) -- minor: port more window tests to sqllogictests [\#5330](https://github.com/apache/arrow-datafusion/pull/5330) ([alamb](https://github.com/alamb)) -- MINOR: nicer error messages for cli, use display format rather than debug [\#5329](https://github.com/apache/arrow-datafusion/pull/5329) ([kmitchener](https://github.com/kmitchener)) -- Add missing protobuf serialisation functionality GetIndexedFieldExpr. [\#5324](https://github.com/apache/arrow-datafusion/pull/5324) ([ahmedriza](https://github.com/ahmedriza)) -- chore: small typo in the example README [\#5319](https://github.com/apache/arrow-datafusion/pull/5319) ([gianarb](https://github.com/gianarb)) -- feat: add accessor to SessionContext fields for ContextProvider impl [\#5318](https://github.com/apache/arrow-datafusion/pull/5318) ([sunng87](https://github.com/sunng87)) -- \[DOC\] Update math expression documentation [\#5316](https://github.com/apache/arrow-datafusion/pull/5316) ([comphead](https://github.com/comphead)) -- Fix nested list indexing when the index is 0 or larger than the list size [\#5311](https://github.com/apache/arrow-datafusion/pull/5311) ([ahmedriza](https://github.com/ahmedriza)) -- Fix SortExec bench case and Add SortExec input cases to bench for SortPreservingMergeExec [\#5308](https://github.com/apache/arrow-datafusion/pull/5308) ([jaylmiller](https://github.com/jaylmiller)) -- Allow DISTINCT with ORDER BY and an aliased select list [\#5307](https://github.com/apache/arrow-datafusion/pull/5307) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) -- Serialize preserve_partitioning in SortExec [\#5306](https://github.com/apache/arrow-datafusion/pull/5306) ([thinkharderdev](https://github.com/thinkharderdev)) -- fix: correct plan builder when test `scalar_subquery_project_expr` [\#5304](https://github.com/apache/arrow-datafusion/pull/5304) ([jackwener](https://github.com/jackwener)) -- Make SQL query consistent with API syntax expression in code examples [\#5303](https://github.com/apache/arrow-datafusion/pull/5303) ([ongchi](https://github.com/ongchi)) -- enable tpcds-64 test [\#5302](https://github.com/apache/arrow-datafusion/pull/5302) ([jackwener](https://github.com/jackwener)) -- Feature/merge batches removal [\#5300](https://github.com/apache/arrow-datafusion/pull/5300) ([berkaysynnada](https://github.com/berkaysynnada)) -- fix: add yield point to `RepartitionExec` [\#5299](https://github.com/apache/arrow-datafusion/pull/5299) ([crepererum](https://github.com/crepererum)) -- `datafusion.optimizer.repartition_file_scans` enabled by default [\#5295](https://github.com/apache/arrow-datafusion/pull/5295) ([korowa](https://github.com/korowa)) -- minor: derive Ord/PartialOrd/Eq/PartialEq traits for `ObjectStoreUrl` [\#5288](https://github.com/apache/arrow-datafusion/pull/5288) ([crepererum](https://github.com/crepererum)) -- Fix the potential bug of check_all_column_from_schema [\#5287](https://github.com/apache/arrow-datafusion/pull/5287) ([ygf11](https://github.com/ygf11)) -- Linear search support for Window Group queries [\#5286](https://github.com/apache/arrow-datafusion/pull/5286) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([mustafasrepo](https://github.com/mustafasrepo)) -- Prevent optimizer from pruning UnnestExec. [\#5282](https://github.com/apache/arrow-datafusion/pull/5282) ([vincev](https://github.com/vincev)) -- Minor: Add fetch to SortExec display [\#5279](https://github.com/apache/arrow-datafusion/pull/5279) ([thinkharderdev](https://github.com/thinkharderdev)) -- Set `catalog_list` from outside for `SessionState`. [\#5277](https://github.com/apache/arrow-datafusion/pull/5277) ([MichaelScofield](https://github.com/MichaelScofield)) -- Support page skipping / page_index pushdown for evolved schemas [\#5268](https://github.com/apache/arrow-datafusion/pull/5268) ([alamb](https://github.com/alamb)) -- Use upstream newline_delimited_stream [\#5267](https://github.com/apache/arrow-datafusion/pull/5267) ([tustvold](https://github.com/tustvold)) -- Support non-tuple expression for exists-subquery to join [\#5264](https://github.com/apache/arrow-datafusion/pull/5264) ([ygf11](https://github.com/ygf11)) -- minor: Fix cargo fmt [\#5263](https://github.com/apache/arrow-datafusion/pull/5263) ([alamb](https://github.com/alamb)) -- minor: replace `unwrap()` with `?` [\#5262](https://github.com/apache/arrow-datafusion/pull/5262) ([jackwener](https://github.com/jackwener)) -- Preserve `TableScan.projection` order in `push_down_projection` optimizer rule [\#5261](https://github.com/apache/arrow-datafusion/pull/5261) ([korowa](https://github.com/korowa)) -- Minor: refactor ParquetExec roundtrip tests [\#5260](https://github.com/apache/arrow-datafusion/pull/5260) ([alamb](https://github.com/alamb)) -- \[fix\]\[plan\] relax the check for distinct, order by for dataframe [\#5258](https://github.com/apache/arrow-datafusion/pull/5258) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([xiaoyong-z](https://github.com/xiaoyong-z)) -- enhance the checking of type errors in the test `window_frame_creation` [\#5257](https://github.com/apache/arrow-datafusion/pull/5257) ([HaoYang670](https://github.com/HaoYang670)) -- SQL planning benchmarks for very wide tables [\#5256](https://github.com/apache/arrow-datafusion/pull/5256) ([alamb](https://github.com/alamb)) -- Minor: Add negative test for SORT BY [\#5254](https://github.com/apache/arrow-datafusion/pull/5254) ([alamb](https://github.com/alamb)) -- \[sqllogictest\] Define output types and check them in tests [\#5253](https://github.com/apache/arrow-datafusion/pull/5253) ([melgenek](https://github.com/melgenek)) -- Minor: port some explain test to sqllogictest, add filename normalization [\#5252](https://github.com/apache/arrow-datafusion/pull/5252) ([alamb](https://github.com/alamb)) -- Disallow SORT BY in SQL [\#5249](https://github.com/apache/arrow-datafusion/pull/5249) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([Jefffrey](https://github.com/Jefffrey)) -- \[SQLLogicTest\] Make schema validation ignore nullable and metadata attributes [\#5246](https://github.com/apache/arrow-datafusion/pull/5246) ([comphead](https://github.com/comphead)) -- Add SQL function overload LOG\(base, x\) for logarithm of x to base [\#5245](https://github.com/apache/arrow-datafusion/pull/5245) ([comphead](https://github.com/comphead)) -- Update sqllogictest requirement from 0.11.1 to 0.12.0 \#5237 [\#5244](https://github.com/apache/arrow-datafusion/pull/5244) ([alamb](https://github.com/alamb)) -- Test case for NDJsonExec with randomly ordered projection [\#5243](https://github.com/apache/arrow-datafusion/pull/5243) ([korowa](https://github.com/korowa)) -- Update to arrow `33.0.0` [\#5241](https://github.com/apache/arrow-datafusion/pull/5241) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([tustvold](https://github.com/tustvold)) -- DataFusion 18.0.0 Release [\#5240](https://github.com/apache/arrow-datafusion/pull/5240) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) -- fix clippy in nightly [\#5238](https://github.com/apache/arrow-datafusion/pull/5238) ([jackwener](https://github.com/jackwener)) -- refactor: correct the implementation of `all_schemas()` [\#5236](https://github.com/apache/arrow-datafusion/pull/5236) ([jackwener](https://github.com/jackwener)) -- bugfix: fix error when `get_coerced_window_frame` meet `utf8` [\#5234](https://github.com/apache/arrow-datafusion/pull/5234) ([jackwener](https://github.com/jackwener)) -- Feature/sort enforcement refactor [\#5228](https://github.com/apache/arrow-datafusion/pull/5228) ([mustafasrepo](https://github.com/mustafasrepo)) -- Minor: Fix doc links and typos [\#5225](https://github.com/apache/arrow-datafusion/pull/5225) ([Jefffrey](https://github.com/Jefffrey)) -- fix: correct expected error in test [\#5224](https://github.com/apache/arrow-datafusion/pull/5224) ([jackwener](https://github.com/jackwener)) -- bugfix: fix propagating empty_relation generates an illegal plan [\#5219](https://github.com/apache/arrow-datafusion/pull/5219) ([yukkit](https://github.com/yukkit)) -- Replace placeholders in ScalarSubqueries [\#5216](https://github.com/apache/arrow-datafusion/pull/5216) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([avantgardnerio](https://github.com/avantgardnerio)) -- Dataframe join_on method [\#5210](https://github.com/apache/arrow-datafusion/pull/5210) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([Jefffrey](https://github.com/Jefffrey)) -- bugfix: fix eval `nullalbe()` in `simplify_exprs` [\#5208](https://github.com/apache/arrow-datafusion/pull/5208) ([jackwener](https://github.com/jackwener)) -- minor: remove unnecessary clone [\#5207](https://github.com/apache/arrow-datafusion/pull/5207) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- minor: extract `merge_schema()` function. [\#5203](https://github.com/apache/arrow-datafusion/pull/5203) ([jackwener](https://github.com/jackwener)) -- minor: remove unnecessary `continue` [\#5200](https://github.com/apache/arrow-datafusion/pull/5200) ([xiaoyong-z](https://github.com/xiaoyong-z)) -- Minor: Begin porting some window tests to sqllogictests [\#5199](https://github.com/apache/arrow-datafusion/pull/5199) ([alamb](https://github.com/alamb)) -- fix\(MemTable\): make it cancel-safe and fix parallelism [\#5197](https://github.com/apache/arrow-datafusion/pull/5197) ([DDtKey](https://github.com/DDtKey)) -- fix: make `write_csv/json/parquet` cancel-safe [\#5196](https://github.com/apache/arrow-datafusion/pull/5196) ([DDtKey](https://github.com/DDtKey)) -- Support arithmetic operation on DictionaryArray [\#5194](https://github.com/apache/arrow-datafusion/pull/5194) ([viirya](https://github.com/viirya)) -- sqllogicaltest: add cleanup and use rowsort. [\#5189](https://github.com/apache/arrow-datafusion/pull/5189) ([jackwener](https://github.com/jackwener)) -- bugfix: fix `TableScan` may contain fields not included in `schema` [\#5188](https://github.com/apache/arrow-datafusion/pull/5188) ([jackwener](https://github.com/jackwener)) -- Create disk manager spill folder if doesn't exist [\#5185](https://github.com/apache/arrow-datafusion/pull/5185) ([comphead](https://github.com/comphead)) -- Parse identifiers properly for TableReferences [\#5183](https://github.com/apache/arrow-datafusion/pull/5183) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([Jefffrey](https://github.com/Jefffrey)) -- Fix decimal scalar dyn kernels [\#5179](https://github.com/apache/arrow-datafusion/pull/5179) ([viirya](https://github.com/viirya)) -- Patch git Safe Paths in CI [\#5177](https://github.com/apache/arrow-datafusion/pull/5177) ([tustvold](https://github.com/tustvold)) -- Add initial support for serializing physical plans with Substrait [\#5176](https://github.com/apache/arrow-datafusion/pull/5176) ([andygrove](https://github.com/andygrove)) -- Bump tokio from 1.24.1 to 1.24.2 in /datafusion-cli [\#5172](https://github.com/apache/arrow-datafusion/pull/5172) ([dependabot[bot]](https://github.com/apps/dependabot)) -- Make EnforceSorting global sort aware, fix sort mis-optimizations involving unions, support parallel sort + merge transformations [\#5171](https://github.com/apache/arrow-datafusion/pull/5171) ([mustafasrepo](https://github.com/mustafasrepo)) -- Update substrait README.md [\#5168](https://github.com/apache/arrow-datafusion/pull/5168) ([jiangzhx](https://github.com/jiangzhx)) -- Switch to use sum kernel from arrow-rs for Decimal128 [\#5167](https://github.com/apache/arrow-datafusion/pull/5167) ([sunchao](https://github.com/sunchao)) -- FileStream: Open next file in parallel while decoding [\#5161](https://github.com/apache/arrow-datafusion/pull/5161) ([thinkharderdev](https://github.com/thinkharderdev)) -- Fix FairSpillPool try_grow for non-spillable consumers [\#5160](https://github.com/apache/arrow-datafusion/pull/5160) ([tustvold](https://github.com/tustvold)) -- fix: treat unsupported SQL plans as "not implemented" [\#5159](https://github.com/apache/arrow-datafusion/pull/5159) ([crepererum](https://github.com/crepererum)) -- Compare NULL types [\#5158](https://github.com/apache/arrow-datafusion/pull/5158) ([melgenek](https://github.com/melgenek)) -- Always wrapping OnceAsync for the inner table side in NestedLoopJoinExec [\#5156](https://github.com/apache/arrow-datafusion/pull/5156) ([ygf11](https://github.com/ygf11)) -- chore: add object_name_to_table_reference in SqlToRel [\#5155](https://github.com/apache/arrow-datafusion/pull/5155) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([jiacai2050](https://github.com/jiacai2050)) -- Ambiguity check for where selection [\#5153](https://github.com/apache/arrow-datafusion/pull/5153) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([Jefffrey](https://github.com/Jefffrey)) -- feat: Type coercion for Dictionary\(\_, \_\) to Utf8 for regex conditions [\#5152](https://github.com/apache/arrow-datafusion/pull/5152) ([stuartcarnie](https://github.com/stuartcarnie)) -- Support arithmetic scalar operation with DictionaryArray [\#5151](https://github.com/apache/arrow-datafusion/pull/5151) ([viirya](https://github.com/viirya)) -- \[sqllogictest\] Support `pg_typeof` [\#5148](https://github.com/apache/arrow-datafusion/pull/5148) ([melgenek](https://github.com/melgenek)) -- Date to Timestamp cast [\#5140](https://github.com/apache/arrow-datafusion/pull/5140) ([comphead](https://github.com/comphead)) -- add example for Flight SQL server that supports JDBC driver [\#5138](https://github.com/apache/arrow-datafusion/pull/5138) ([kmitchener](https://github.com/kmitchener)) -- Add in-list test [\#5135](https://github.com/apache/arrow-datafusion/pull/5135) ([nseekhao](https://github.com/nseekhao)) -- \[BugFix\] abort plan if order by column not in select list [\#5132](https://github.com/apache/arrow-datafusion/pull/5132) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([xiaoyong-z](https://github.com/xiaoyong-z)) -- Bug fix: Empty Record Batch handling [\#5131](https://github.com/apache/arrow-datafusion/pull/5131) ([mustafasrepo](https://github.com/mustafasrepo)) -- Add option to control whether to normalize ident [\#5124](https://github.com/apache/arrow-datafusion/pull/5124) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([jiacai2050](https://github.com/jiacai2050)) -- Make `parse_physical_expr` public [\#5118](https://github.com/apache/arrow-datafusion/pull/5118) ([comphead](https://github.com/comphead)) -- Support coercing `utf8` to `interval` and `timestamp` \(including arguments to `date_bin`\) [\#5117](https://github.com/apache/arrow-datafusion/pull/5117) ([alamb](https://github.com/alamb)) -- Fix release issues [\#5116](https://github.com/apache/arrow-datafusion/pull/5116) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) -- minor: port date_bin tests to sqllogictests [\#5115](https://github.com/apache/arrow-datafusion/pull/5115) ([alamb](https://github.com/alamb)) -- Minor: reduce code duplication using `rewrite_expr` [\#5114](https://github.com/apache/arrow-datafusion/pull/5114) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) -- Replace &Option\ with Option\<&T\> [\#5113](https://github.com/apache/arrow-datafusion/pull/5113) ([gaoxinge](https://github.com/gaoxinge)) -- Improve `get_meet_of_orderings` to check for common prefixes [\#5111](https://github.com/apache/arrow-datafusion/pull/5111) ([ozankabak](https://github.com/ozankabak)) -- \[sqllogictest\] Apply rowsort when there is no explicit order by [\#5110](https://github.com/apache/arrow-datafusion/pull/5110) ([melgenek](https://github.com/melgenek)) -- Add unnest_column to DataFrame [\#5106](https://github.com/apache/arrow-datafusion/pull/5106) ([vincev](https://github.com/vincev)) -- Minor: reduce indent level in page filter pruning code [\#5105](https://github.com/apache/arrow-datafusion/pull/5105) ([alamb](https://github.com/alamb)) -- Replace &Option\ with Option\<&T\> [\#5102](https://github.com/apache/arrow-datafusion/pull/5102) ([gaoxinge](https://github.com/gaoxinge)) -- Minor: remove unused methods in datafusion/optimizer/src/utils.rs [\#5098](https://github.com/apache/arrow-datafusion/pull/5098) ([ygf11](https://github.com/ygf11)) -- ci: don't trigger rust ci for doc changes [\#5097](https://github.com/apache/arrow-datafusion/pull/5097) ([xudong963](https://github.com/xudong963)) -- sqllogicaltest: fix unstable slt case. [\#5095](https://github.com/apache/arrow-datafusion/pull/5095) ([jackwener](https://github.com/jackwener)) -- chore: update cranelift-module [\#5094](https://github.com/apache/arrow-datafusion/pull/5094) ([jackwener](https://github.com/jackwener)) -- refactor: Add `rewrite_expr` convenience method for rewriting `Expr`s [\#5092](https://github.com/apache/arrow-datafusion/pull/5092) ([alamb](https://github.com/alamb)) -- Minor: extract sort col rewrite into its own module, add unit tests [\#5088](https://github.com/apache/arrow-datafusion/pull/5088) ([alamb](https://github.com/alamb)) -- \[sqllogictest\] Move `decimal.rs` tests [\#5086](https://github.com/apache/arrow-datafusion/pull/5086) ([melgenek](https://github.com/melgenek)) -- Insert target columns empty fix [\#5079](https://github.com/apache/arrow-datafusion/pull/5079) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([gruuya](https://github.com/gruuya)) -- sqllogicaltest: move union.rs [\#5075](https://github.com/apache/arrow-datafusion/pull/5075) ([jackwener](https://github.com/jackwener)) -- \[Enhancement\] Don't repartition ProjectionExec when it does not compute anything [\#5074](https://github.com/apache/arrow-datafusion/pull/5074) ([xiaoyong-z](https://github.com/xiaoyong-z)) -- Support ORDER BY an aliased column [\#5067](https://github.com/apache/arrow-datafusion/pull/5067) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) -- Parquet parallel scan [\#5057](https://github.com/apache/arrow-datafusion/pull/5057) ([korowa](https://github.com/korowa)) -- \[BugFix\] fix file stream time scanning metrics bug [\#5020](https://github.com/apache/arrow-datafusion/pull/5020) ([xiaoyong-z](https://github.com/xiaoyong-z)) -- Show optimization errors in explain [\#4819](https://github.com/apache/arrow-datafusion/pull/4819) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([Jefffrey](https://github.com/Jefffrey)) +- generate new projection plan in inline_table_scan instead of discarding [\#5371](https://github.com/apache/datafusion/pull/5371) ([jackwener](https://github.com/jackwener)) +- minor: fix rule name and comment. [\#5370](https://github.com/apache/datafusion/pull/5370) ([jackwener](https://github.com/jackwener)) +- minor: port limit tests to sqllogictests [\#5355](https://github.com/apache/datafusion/pull/5355) ([jackwener](https://github.com/jackwener)) +- feat: add rule to merge projection. [\#5349](https://github.com/apache/datafusion/pull/5349) ([jackwener](https://github.com/jackwener)) +- Ignore Arrow in dependabot [\#5341](https://github.com/apache/datafusion/pull/5341) ([iajoiner](https://github.com/iajoiner)) +- minor: remove useless `.get()` [\#5336](https://github.com/apache/datafusion/pull/5336) ([jackwener](https://github.com/jackwener)) +- bugfix: fix tpcds_logical_q8 ambiguous name. [\#5335](https://github.com/apache/datafusion/pull/5335) ([jackwener](https://github.com/jackwener)) +- minor: disable tpcds_logical_q10/q35 [\#5333](https://github.com/apache/datafusion/pull/5333) ([jackwener](https://github.com/jackwener)) +- minor: port intersection sql tests to sqllogictests [\#5331](https://github.com/apache/datafusion/pull/5331) ([alamb](https://github.com/alamb)) +- minor: port more window tests to sqllogictests [\#5330](https://github.com/apache/datafusion/pull/5330) ([alamb](https://github.com/alamb)) +- MINOR: nicer error messages for cli, use display format rather than debug [\#5329](https://github.com/apache/datafusion/pull/5329) ([kmitchener](https://github.com/kmitchener)) +- Add missing protobuf serialisation functionality GetIndexedFieldExpr. [\#5324](https://github.com/apache/datafusion/pull/5324) ([ahmedriza](https://github.com/ahmedriza)) +- chore: small typo in the example README [\#5319](https://github.com/apache/datafusion/pull/5319) ([gianarb](https://github.com/gianarb)) +- feat: add accessor to SessionContext fields for ContextProvider impl [\#5318](https://github.com/apache/datafusion/pull/5318) ([sunng87](https://github.com/sunng87)) +- \[DOC\] Update math expression documentation [\#5316](https://github.com/apache/datafusion/pull/5316) ([comphead](https://github.com/comphead)) +- Fix nested list indexing when the index is 0 or larger than the list size [\#5311](https://github.com/apache/datafusion/pull/5311) ([ahmedriza](https://github.com/ahmedriza)) +- Fix SortExec bench case and Add SortExec input cases to bench for SortPreservingMergeExec [\#5308](https://github.com/apache/datafusion/pull/5308) ([jaylmiller](https://github.com/jaylmiller)) +- Allow DISTINCT with ORDER BY and an aliased select list [\#5307](https://github.com/apache/datafusion/pull/5307) [[sql](https://github.com/apache/datafusion/labels/sql)] ([alamb](https://github.com/alamb)) +- Serialize preserve_partitioning in SortExec [\#5306](https://github.com/apache/datafusion/pull/5306) ([thinkharderdev](https://github.com/thinkharderdev)) +- fix: correct plan builder when test `scalar_subquery_project_expr` [\#5304](https://github.com/apache/datafusion/pull/5304) ([jackwener](https://github.com/jackwener)) +- Make SQL query consistent with API syntax expression in code examples [\#5303](https://github.com/apache/datafusion/pull/5303) ([ongchi](https://github.com/ongchi)) +- enable tpcds-64 test [\#5302](https://github.com/apache/datafusion/pull/5302) ([jackwener](https://github.com/jackwener)) +- Feature/merge batches removal [\#5300](https://github.com/apache/datafusion/pull/5300) ([berkaysynnada](https://github.com/berkaysynnada)) +- fix: add yield point to `RepartitionExec` [\#5299](https://github.com/apache/datafusion/pull/5299) ([crepererum](https://github.com/crepererum)) +- `datafusion.optimizer.repartition_file_scans` enabled by default [\#5295](https://github.com/apache/datafusion/pull/5295) ([korowa](https://github.com/korowa)) +- minor: derive Ord/PartialOrd/Eq/PartialEq traits for `ObjectStoreUrl` [\#5288](https://github.com/apache/datafusion/pull/5288) ([crepererum](https://github.com/crepererum)) +- Fix the potential bug of check_all_column_from_schema [\#5287](https://github.com/apache/datafusion/pull/5287) ([ygf11](https://github.com/ygf11)) +- Linear search support for Window Group queries [\#5286](https://github.com/apache/datafusion/pull/5286) [[sql](https://github.com/apache/datafusion/labels/sql)] ([mustafasrepo](https://github.com/mustafasrepo)) +- Prevent optimizer from pruning UnnestExec. [\#5282](https://github.com/apache/datafusion/pull/5282) ([vincev](https://github.com/vincev)) +- Minor: Add fetch to SortExec display [\#5279](https://github.com/apache/datafusion/pull/5279) ([thinkharderdev](https://github.com/thinkharderdev)) +- Set `catalog_list` from outside for `SessionState`. [\#5277](https://github.com/apache/datafusion/pull/5277) ([MichaelScofield](https://github.com/MichaelScofield)) +- Support page skipping / page_index pushdown for evolved schemas [\#5268](https://github.com/apache/datafusion/pull/5268) ([alamb](https://github.com/alamb)) +- Use upstream newline_delimited_stream [\#5267](https://github.com/apache/datafusion/pull/5267) ([tustvold](https://github.com/tustvold)) +- Support non-tuple expression for exists-subquery to join [\#5264](https://github.com/apache/datafusion/pull/5264) ([ygf11](https://github.com/ygf11)) +- minor: Fix cargo fmt [\#5263](https://github.com/apache/datafusion/pull/5263) ([alamb](https://github.com/alamb)) +- minor: replace `unwrap()` with `?` [\#5262](https://github.com/apache/datafusion/pull/5262) ([jackwener](https://github.com/jackwener)) +- Preserve `TableScan.projection` order in `push_down_projection` optimizer rule [\#5261](https://github.com/apache/datafusion/pull/5261) ([korowa](https://github.com/korowa)) +- Minor: refactor ParquetExec roundtrip tests [\#5260](https://github.com/apache/datafusion/pull/5260) ([alamb](https://github.com/alamb)) +- \[fix\]\[plan\] relax the check for distinct, order by for dataframe [\#5258](https://github.com/apache/datafusion/pull/5258) [[sql](https://github.com/apache/datafusion/labels/sql)] ([xiaoyong-z](https://github.com/xiaoyong-z)) +- enhance the checking of type errors in the test `window_frame_creation` [\#5257](https://github.com/apache/datafusion/pull/5257) ([HaoYang670](https://github.com/HaoYang670)) +- SQL planning benchmarks for very wide tables [\#5256](https://github.com/apache/datafusion/pull/5256) ([alamb](https://github.com/alamb)) +- Minor: Add negative test for SORT BY [\#5254](https://github.com/apache/datafusion/pull/5254) ([alamb](https://github.com/alamb)) +- \[sqllogictest\] Define output types and check them in tests [\#5253](https://github.com/apache/datafusion/pull/5253) ([melgenek](https://github.com/melgenek)) +- Minor: port some explain test to sqllogictest, add filename normalization [\#5252](https://github.com/apache/datafusion/pull/5252) ([alamb](https://github.com/alamb)) +- Disallow SORT BY in SQL [\#5249](https://github.com/apache/datafusion/pull/5249) [[sql](https://github.com/apache/datafusion/labels/sql)] ([Jefffrey](https://github.com/Jefffrey)) +- \[SQLLogicTest\] Make schema validation ignore nullable and metadata attributes [\#5246](https://github.com/apache/datafusion/pull/5246) ([comphead](https://github.com/comphead)) +- Add SQL function overload LOG\(base, x\) for logarithm of x to base [\#5245](https://github.com/apache/datafusion/pull/5245) ([comphead](https://github.com/comphead)) +- Update sqllogictest requirement from 0.11.1 to 0.12.0 \#5237 [\#5244](https://github.com/apache/datafusion/pull/5244) ([alamb](https://github.com/alamb)) +- Test case for NDJsonExec with randomly ordered projection [\#5243](https://github.com/apache/datafusion/pull/5243) ([korowa](https://github.com/korowa)) +- Update to arrow `33.0.0` [\#5241](https://github.com/apache/datafusion/pull/5241) [[sql](https://github.com/apache/datafusion/labels/sql)] ([tustvold](https://github.com/tustvold)) +- DataFusion 18.0.0 Release [\#5240](https://github.com/apache/datafusion/pull/5240) [[sql](https://github.com/apache/datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) +- fix clippy in nightly [\#5238](https://github.com/apache/datafusion/pull/5238) ([jackwener](https://github.com/jackwener)) +- refactor: correct the implementation of `all_schemas()` [\#5236](https://github.com/apache/datafusion/pull/5236) ([jackwener](https://github.com/jackwener)) +- bugfix: fix error when `get_coerced_window_frame` meet `utf8` [\#5234](https://github.com/apache/datafusion/pull/5234) ([jackwener](https://github.com/jackwener)) +- Feature/sort enforcement refactor [\#5228](https://github.com/apache/datafusion/pull/5228) ([mustafasrepo](https://github.com/mustafasrepo)) +- Minor: Fix doc links and typos [\#5225](https://github.com/apache/datafusion/pull/5225) ([Jefffrey](https://github.com/Jefffrey)) +- fix: correct expected error in test [\#5224](https://github.com/apache/datafusion/pull/5224) ([jackwener](https://github.com/jackwener)) +- bugfix: fix propagating empty_relation generates an illegal plan [\#5219](https://github.com/apache/datafusion/pull/5219) ([yukkit](https://github.com/yukkit)) +- Replace placeholders in ScalarSubqueries [\#5216](https://github.com/apache/datafusion/pull/5216) [[sql](https://github.com/apache/datafusion/labels/sql)] ([avantgardnerio](https://github.com/avantgardnerio)) +- Dataframe join_on method [\#5210](https://github.com/apache/datafusion/pull/5210) [[sql](https://github.com/apache/datafusion/labels/sql)] ([Jefffrey](https://github.com/Jefffrey)) +- bugfix: fix eval `nullalbe()` in `simplify_exprs` [\#5208](https://github.com/apache/datafusion/pull/5208) ([jackwener](https://github.com/jackwener)) +- minor: remove unnecessary clone [\#5207](https://github.com/apache/datafusion/pull/5207) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- minor: extract `merge_schema()` function. [\#5203](https://github.com/apache/datafusion/pull/5203) ([jackwener](https://github.com/jackwener)) +- minor: remove unnecessary `continue` [\#5200](https://github.com/apache/datafusion/pull/5200) ([xiaoyong-z](https://github.com/xiaoyong-z)) +- Minor: Begin porting some window tests to sqllogictests [\#5199](https://github.com/apache/datafusion/pull/5199) ([alamb](https://github.com/alamb)) +- fix\(MemTable\): make it cancel-safe and fix parallelism [\#5197](https://github.com/apache/datafusion/pull/5197) ([DDtKey](https://github.com/DDtKey)) +- fix: make `write_csv/json/parquet` cancel-safe [\#5196](https://github.com/apache/datafusion/pull/5196) ([DDtKey](https://github.com/DDtKey)) +- Support arithmetic operation on DictionaryArray [\#5194](https://github.com/apache/datafusion/pull/5194) ([viirya](https://github.com/viirya)) +- sqllogicaltest: add cleanup and use rowsort. [\#5189](https://github.com/apache/datafusion/pull/5189) ([jackwener](https://github.com/jackwener)) +- bugfix: fix `TableScan` may contain fields not included in `schema` [\#5188](https://github.com/apache/datafusion/pull/5188) ([jackwener](https://github.com/jackwener)) +- Create disk manager spill folder if doesn't exist [\#5185](https://github.com/apache/datafusion/pull/5185) ([comphead](https://github.com/comphead)) +- Parse identifiers properly for TableReferences [\#5183](https://github.com/apache/datafusion/pull/5183) [[sql](https://github.com/apache/datafusion/labels/sql)] ([Jefffrey](https://github.com/Jefffrey)) +- Fix decimal scalar dyn kernels [\#5179](https://github.com/apache/datafusion/pull/5179) ([viirya](https://github.com/viirya)) +- Patch git Safe Paths in CI [\#5177](https://github.com/apache/datafusion/pull/5177) ([tustvold](https://github.com/tustvold)) +- Add initial support for serializing physical plans with Substrait [\#5176](https://github.com/apache/datafusion/pull/5176) ([andygrove](https://github.com/andygrove)) +- Bump tokio from 1.24.1 to 1.24.2 in /datafusion-cli [\#5172](https://github.com/apache/datafusion/pull/5172) ([dependabot[bot]](https://github.com/apps/dependabot)) +- Make EnforceSorting global sort aware, fix sort mis-optimizations involving unions, support parallel sort + merge transformations [\#5171](https://github.com/apache/datafusion/pull/5171) ([mustafasrepo](https://github.com/mustafasrepo)) +- Update substrait README.md [\#5168](https://github.com/apache/datafusion/pull/5168) ([jiangzhx](https://github.com/jiangzhx)) +- Switch to use sum kernel from arrow-rs for Decimal128 [\#5167](https://github.com/apache/datafusion/pull/5167) ([sunchao](https://github.com/sunchao)) +- FileStream: Open next file in parallel while decoding [\#5161](https://github.com/apache/datafusion/pull/5161) ([thinkharderdev](https://github.com/thinkharderdev)) +- Fix FairSpillPool try_grow for non-spillable consumers [\#5160](https://github.com/apache/datafusion/pull/5160) ([tustvold](https://github.com/tustvold)) +- fix: treat unsupported SQL plans as "not implemented" [\#5159](https://github.com/apache/datafusion/pull/5159) ([crepererum](https://github.com/crepererum)) +- Compare NULL types [\#5158](https://github.com/apache/datafusion/pull/5158) ([melgenek](https://github.com/melgenek)) +- Always wrapping OnceAsync for the inner table side in NestedLoopJoinExec [\#5156](https://github.com/apache/datafusion/pull/5156) ([ygf11](https://github.com/ygf11)) +- chore: add object_name_to_table_reference in SqlToRel [\#5155](https://github.com/apache/datafusion/pull/5155) [[sql](https://github.com/apache/datafusion/labels/sql)] ([jiacai2050](https://github.com/jiacai2050)) +- Ambiguity check for where selection [\#5153](https://github.com/apache/datafusion/pull/5153) [[sql](https://github.com/apache/datafusion/labels/sql)] ([Jefffrey](https://github.com/Jefffrey)) +- feat: Type coercion for Dictionary\(\_, \_\) to Utf8 for regex conditions [\#5152](https://github.com/apache/datafusion/pull/5152) ([stuartcarnie](https://github.com/stuartcarnie)) +- Support arithmetic scalar operation with DictionaryArray [\#5151](https://github.com/apache/datafusion/pull/5151) ([viirya](https://github.com/viirya)) +- \[sqllogictest\] Support `pg_typeof` [\#5148](https://github.com/apache/datafusion/pull/5148) ([melgenek](https://github.com/melgenek)) +- Date to Timestamp cast [\#5140](https://github.com/apache/datafusion/pull/5140) ([comphead](https://github.com/comphead)) +- add example for Flight SQL server that supports JDBC driver [\#5138](https://github.com/apache/datafusion/pull/5138) ([kmitchener](https://github.com/kmitchener)) +- Add in-list test [\#5135](https://github.com/apache/datafusion/pull/5135) ([nseekhao](https://github.com/nseekhao)) +- \[BugFix\] abort plan if order by column not in select list [\#5132](https://github.com/apache/datafusion/pull/5132) [[sql](https://github.com/apache/datafusion/labels/sql)] ([xiaoyong-z](https://github.com/xiaoyong-z)) +- Bug fix: Empty Record Batch handling [\#5131](https://github.com/apache/datafusion/pull/5131) ([mustafasrepo](https://github.com/mustafasrepo)) +- Add option to control whether to normalize ident [\#5124](https://github.com/apache/datafusion/pull/5124) [[sql](https://github.com/apache/datafusion/labels/sql)] ([jiacai2050](https://github.com/jiacai2050)) +- Make `parse_physical_expr` public [\#5118](https://github.com/apache/datafusion/pull/5118) ([comphead](https://github.com/comphead)) +- Support coercing `utf8` to `interval` and `timestamp` \(including arguments to `date_bin`\) [\#5117](https://github.com/apache/datafusion/pull/5117) ([alamb](https://github.com/alamb)) +- Fix release issues [\#5116](https://github.com/apache/datafusion/pull/5116) [[sql](https://github.com/apache/datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) +- minor: port date_bin tests to sqllogictests [\#5115](https://github.com/apache/datafusion/pull/5115) ([alamb](https://github.com/alamb)) +- Minor: reduce code duplication using `rewrite_expr` [\#5114](https://github.com/apache/datafusion/pull/5114) [[sql](https://github.com/apache/datafusion/labels/sql)] ([alamb](https://github.com/alamb)) +- Replace &Option\ with Option\<&T\> [\#5113](https://github.com/apache/datafusion/pull/5113) ([gaoxinge](https://github.com/gaoxinge)) +- Improve `get_meet_of_orderings` to check for common prefixes [\#5111](https://github.com/apache/datafusion/pull/5111) ([ozankabak](https://github.com/ozankabak)) +- \[sqllogictest\] Apply rowsort when there is no explicit order by [\#5110](https://github.com/apache/datafusion/pull/5110) ([melgenek](https://github.com/melgenek)) +- Add unnest_column to DataFrame [\#5106](https://github.com/apache/datafusion/pull/5106) ([vincev](https://github.com/vincev)) +- Minor: reduce indent level in page filter pruning code [\#5105](https://github.com/apache/datafusion/pull/5105) ([alamb](https://github.com/alamb)) +- Replace &Option\ with Option\<&T\> [\#5102](https://github.com/apache/datafusion/pull/5102) ([gaoxinge](https://github.com/gaoxinge)) +- Minor: remove unused methods in datafusion/optimizer/src/utils.rs [\#5098](https://github.com/apache/datafusion/pull/5098) ([ygf11](https://github.com/ygf11)) +- ci: don't trigger rust ci for doc changes [\#5097](https://github.com/apache/datafusion/pull/5097) ([xudong963](https://github.com/xudong963)) +- sqllogicaltest: fix unstable slt case. [\#5095](https://github.com/apache/datafusion/pull/5095) ([jackwener](https://github.com/jackwener)) +- chore: update cranelift-module [\#5094](https://github.com/apache/datafusion/pull/5094) ([jackwener](https://github.com/jackwener)) +- refactor: Add `rewrite_expr` convenience method for rewriting `Expr`s [\#5092](https://github.com/apache/datafusion/pull/5092) ([alamb](https://github.com/alamb)) +- Minor: extract sort col rewrite into its own module, add unit tests [\#5088](https://github.com/apache/datafusion/pull/5088) ([alamb](https://github.com/alamb)) +- \[sqllogictest\] Move `decimal.rs` tests [\#5086](https://github.com/apache/datafusion/pull/5086) ([melgenek](https://github.com/melgenek)) +- Insert target columns empty fix [\#5079](https://github.com/apache/datafusion/pull/5079) [[sql](https://github.com/apache/datafusion/labels/sql)] ([gruuya](https://github.com/gruuya)) +- sqllogicaltest: move union.rs [\#5075](https://github.com/apache/datafusion/pull/5075) ([jackwener](https://github.com/jackwener)) +- \[Enhancement\] Don't repartition ProjectionExec when it does not compute anything [\#5074](https://github.com/apache/datafusion/pull/5074) ([xiaoyong-z](https://github.com/xiaoyong-z)) +- Support ORDER BY an aliased column [\#5067](https://github.com/apache/datafusion/pull/5067) [[sql](https://github.com/apache/datafusion/labels/sql)] ([alamb](https://github.com/alamb)) +- Parquet parallel scan [\#5057](https://github.com/apache/datafusion/pull/5057) ([korowa](https://github.com/korowa)) +- \[BugFix\] fix file stream time scanning metrics bug [\#5020](https://github.com/apache/datafusion/pull/5020) ([xiaoyong-z](https://github.com/xiaoyong-z)) +- Show optimization errors in explain [\#4819](https://github.com/apache/datafusion/pull/4819) [[sql](https://github.com/apache/datafusion/labels/sql)] ([Jefffrey](https://github.com/Jefffrey)) diff --git a/dev/changelog/20.0.0.md b/dev/changelog/20.0.0.md index 8f95d573801c..05e4ed8c4ab3 100644 --- a/dev/changelog/20.0.0.md +++ b/dev/changelog/20.0.0.md @@ -17,143 +17,143 @@ under the License. --> -## [20.0.0](https://github.com/apache/arrow-datafusion/tree/20.0.0) (2023-03-10) +## [20.0.0](https://github.com/apache/datafusion/tree/20.0.0) (2023-03-10) -[Full Changelog](https://github.com/apache/arrow-datafusion/compare/19.0.0...20.0.0 +[Full Changelog](https://github.com/apache/datafusion/compare/19.0.0...20.0.0 **Breaking changes:** -- Minor: Move TableProviderFactories up out of `RuntimeEnv` and into `SessionState` [#5477](https://github.com/apache/arrow-datafusion/pull/5477) (alamb) -- chore: Remove references from SessionState from physical_plan [#5455](https://github.com/apache/arrow-datafusion/pull/5455) (alamb) -- Implement `Debug` for `ExecutionProps` and `VarProvider` [#5489](https://github.com/apache/arrow-datafusion/pull/5489) (alamb) +- Minor: Move TableProviderFactories up out of `RuntimeEnv` and into `SessionState` [#5477](https://github.com/apache/datafusion/pull/5477) (alamb) +- chore: Remove references from SessionState from physical_plan [#5455](https://github.com/apache/datafusion/pull/5455) (alamb) +- Implement `Debug` for `ExecutionProps` and `VarProvider` [#5489](https://github.com/apache/datafusion/pull/5489) (alamb) **Implemented enhancements:** -- Add UserDefinedLogicalNodeCore [#5521](https://github.com/apache/arrow-datafusion/pull/5521) (mslapek) -- feat: add `arrow_cast` function to support supports arbitrary arrow types [#5166](https://github.com/apache/arrow-datafusion/pull/5166) (alamb) -- feat: interval add timestamp [#5491](https://github.com/apache/arrow-datafusion/pull/5491) (Weijun-H) -- feat: `ParquetExec` predicate preservation [#5495](https://github.com/apache/arrow-datafusion/pull/5495) (crepererum) -- feat: add optimization rules for bitwise operations [#5423](https://github.com/apache/arrow-datafusion/pull/5423) (izveigor) -- feat: Support bitwise operations for unsigned integer types [#5476](https://github.com/apache/arrow-datafusion/pull/5476) (izveigor) -- feat: eliminate the duplicated sort keys in Order By clause [#5462](https://github.com/apache/arrow-datafusion/pull/5462) (jackwener) -- feat: add name() method to UserDefinedLogicalNode [#5450](https://github.com/apache/arrow-datafusion/pull/5450) (waynexia) -- feat: express unsigned literal in substrait [#5448](https://github.com/apache/arrow-datafusion/pull/5448) (waynexia) -- feat: `extensions_options` macro [#5442](https://github.com/apache/arrow-datafusion/pull/5442) (crepererum) -- [feat]:fast check has column [#5328](https://github.com/apache/arrow-datafusion/pull/5328) (suxiaogang223) -- feat: eliminate unnecessary projection. [#5366](https://github.com/apache/arrow-datafusion/pull/5366) (jackwener) +- Add UserDefinedLogicalNodeCore [#5521](https://github.com/apache/datafusion/pull/5521) (mslapek) +- feat: add `arrow_cast` function to support supports arbitrary arrow types [#5166](https://github.com/apache/datafusion/pull/5166) (alamb) +- feat: interval add timestamp [#5491](https://github.com/apache/datafusion/pull/5491) (Weijun-H) +- feat: `ParquetExec` predicate preservation [#5495](https://github.com/apache/datafusion/pull/5495) (crepererum) +- feat: add optimization rules for bitwise operations [#5423](https://github.com/apache/datafusion/pull/5423) (izveigor) +- feat: Support bitwise operations for unsigned integer types [#5476](https://github.com/apache/datafusion/pull/5476) (izveigor) +- feat: eliminate the duplicated sort keys in Order By clause [#5462](https://github.com/apache/datafusion/pull/5462) (jackwener) +- feat: add name() method to UserDefinedLogicalNode [#5450](https://github.com/apache/datafusion/pull/5450) (waynexia) +- feat: express unsigned literal in substrait [#5448](https://github.com/apache/datafusion/pull/5448) (waynexia) +- feat: `extensions_options` macro [#5442](https://github.com/apache/datafusion/pull/5442) (crepererum) +- [feat]:fast check has column [#5328](https://github.com/apache/datafusion/pull/5328) (suxiaogang223) +- feat: eliminate unnecessary projection. [#5366](https://github.com/apache/datafusion/pull/5366) (jackwener) **Fixed bugs:** -- revert accidently deleted size code in count_distinct [#5533](https://github.com/apache/arrow-datafusion/pull/5533) (comphead) -- fix: return schema of ExtensionPlan instead of its children's [#5514](https://github.com/apache/arrow-datafusion/pull/5514) (waynexia) -- fix: logical merge conflict -- hash_join tests with passing boolean by value [#5531](https://github.com/apache/arrow-datafusion/pull/5531) (korowa) -- fix: build union schema with child has same column name but qualifier… [#5452](https://github.com/apache/arrow-datafusion/pull/5452) (yukkit) -- Fix is_distinct from for float NaN values [#5446](https://github.com/apache/arrow-datafusion/pull/5446) (comphead) -- Bug/union wrong casting [#5342](https://github.com/apache/arrow-datafusion/pull/5342) (berkaysynnada) -- fix nested loop join with literal join filter [#5431](https://github.com/apache/arrow-datafusion/pull/5431) (ygf11) -- Fix filter pushdown for extension plans [#5425](https://github.com/apache/arrow-datafusion/pull/5425) (thinkharderdev) -- Bug fix: Window frame range value outside the type range [#5384](https://github.com/apache/arrow-datafusion/pull/5384) (mustafasrepo) -- fix: misc phys. expression display bugs [#5387](https://github.com/apache/arrow-datafusion/pull/5387) (crepererum) +- revert accidently deleted size code in count_distinct [#5533](https://github.com/apache/datafusion/pull/5533) (comphead) +- fix: return schema of ExtensionPlan instead of its children's [#5514](https://github.com/apache/datafusion/pull/5514) (waynexia) +- fix: logical merge conflict -- hash_join tests with passing boolean by value [#5531](https://github.com/apache/datafusion/pull/5531) (korowa) +- fix: build union schema with child has same column name but qualifier… [#5452](https://github.com/apache/datafusion/pull/5452) (yukkit) +- Fix is_distinct from for float NaN values [#5446](https://github.com/apache/datafusion/pull/5446) (comphead) +- Bug/union wrong casting [#5342](https://github.com/apache/datafusion/pull/5342) (berkaysynnada) +- fix nested loop join with literal join filter [#5431](https://github.com/apache/datafusion/pull/5431) (ygf11) +- Fix filter pushdown for extension plans [#5425](https://github.com/apache/datafusion/pull/5425) (thinkharderdev) +- Bug fix: Window frame range value outside the type range [#5384](https://github.com/apache/datafusion/pull/5384) (mustafasrepo) +- fix: misc phys. expression display bugs [#5387](https://github.com/apache/datafusion/pull/5387) (crepererum) **Documentation updates:** -- Minor: Improve docs for UserDefinedLogicalNode `dyn_eq` and `dyn_hash` [#5515](https://github.com/apache/arrow-datafusion/pull/5515) (alamb) -- chore: add known project ZincObserve [#5376](https://github.com/apache/arrow-datafusion/pull/5376) (hengfeiyang) -- docs: clarify spark [#5391](https://github.com/apache/arrow-datafusion/pull/5391) (hyoklee) +- Minor: Improve docs for UserDefinedLogicalNode `dyn_eq` and `dyn_hash` [#5515](https://github.com/apache/datafusion/pull/5515) (alamb) +- chore: add known project ZincObserve [#5376](https://github.com/apache/datafusion/pull/5376) (hengfeiyang) +- docs: clarify spark [#5391](https://github.com/apache/datafusion/pull/5391) (hyoklee) **Merged pull requests:** -- Manual changelog for 20.0.0 [#5551](https://github.com/apache/arrow-datafusion/pull/5551) (andygrove) -- Prepare for 20.0.0 release [Part 1] [#5539](https://github.com/apache/arrow-datafusion/pull/5539) (andygrove) -- chore: deduplicate workspace fields in Cargo.toml [#5519](https://github.com/apache/arrow-datafusion/pull/5519) (waynexia) -- Add necessary features to optimizer [#5540](https://github.com/apache/arrow-datafusion/pull/5540) (viirya) -- Minor: add the concise way for matching numerics [#5537](https://github.com/apache/arrow-datafusion/pull/5537) (izveigor) -- Add UserDefinedLogicalNodeCore [#5521](https://github.com/apache/arrow-datafusion/pull/5521) (mslapek) -- revert accidently deleted size code in count_distinct [#5533](https://github.com/apache/arrow-datafusion/pull/5533) (comphead) -- fix: return schema of ExtensionPlan instead of its children's [#5514](https://github.com/apache/arrow-datafusion/pull/5514) (waynexia) -- Minor: Move `ObjectStoreRegistry` to datafusion_execution crate [#5478](https://github.com/apache/arrow-datafusion/pull/5478) (alamb) -- Minor: Add db-benchmark URL to db-benchmark readme [#5503](https://github.com/apache/arrow-datafusion/pull/5503) (alamb) -- minor: fix clippy problem in new version. [#5532](https://github.com/apache/arrow-datafusion/pull/5532) (jackwener) -- fix: logical merge conflict -- hash_join tests with passing boolean by value [#5531](https://github.com/apache/arrow-datafusion/pull/5531) (korowa) -- Memory limited hash join [#5490](https://github.com/apache/arrow-datafusion/pull/5490) (korowa) -- minor: improve error style [#5510](https://github.com/apache/arrow-datafusion/pull/5510) (alamb) -- feat: add `arrow_cast` function to support supports arbitrary arrow types [#5166](https://github.com/apache/arrow-datafusion/pull/5166) (alamb) -- build(deps): update sqlparser requirement from 0.30 to 0.32 w/ API update [#5457](https://github.com/apache/arrow-datafusion/pull/5457) (alamb) -- Allow setting config extensions for TaskContext [#5497](https://github.com/apache/arrow-datafusion/pull/5497) (mpurins-coralogix) -- Minor: Improve docs for UserDefinedLogicalNode `dyn_eq` and `dyn_hash` [#5515](https://github.com/apache/arrow-datafusion/pull/5515) (alamb) -- feat: interval add timestamp [#5491](https://github.com/apache/arrow-datafusion/pull/5491) (Weijun-H) -- Pass booleans by value instead of by reference [#5487](https://github.com/apache/arrow-datafusion/pull/5487) (maxburke) -- Minor: Move TableProviderFactories up out of `RuntimeEnv` and into `SessionState` [#5477](https://github.com/apache/arrow-datafusion/pull/5477) (alamb) -- feat: `ParquetExec` predicate preservation [#5495](https://github.com/apache/arrow-datafusion/pull/5495) (crepererum) -- feat: add optimization rules for bitwise operations [#5423](https://github.com/apache/arrow-datafusion/pull/5423) (izveigor) -- chore: Remove references from SessionState from physical_plan [#5455](https://github.com/apache/arrow-datafusion/pull/5455) (alamb) -- Implement `Debug` for `ExecutionProps` and `VarProvider` [#5489](https://github.com/apache/arrow-datafusion/pull/5489) (alamb) -- feat: Support bitwise operations for unsigned integer types [#5476](https://github.com/apache/arrow-datafusion/pull/5476) (izveigor) -- Apply workaround for #5444 to `DataFrame::describe` [#5468](https://github.com/apache/arrow-datafusion/pull/5468) (jiangzhx) -- feat: eliminate the duplicated sort keys in Order By clause [#5462](https://github.com/apache/arrow-datafusion/pull/5462) (jackwener) -- Propagate timezone to created arrays [#5481](https://github.com/apache/arrow-datafusion/pull/5481) (maxburke) -- refactor: make GeometricMean not to have update and merge [#5469](https://github.com/apache/arrow-datafusion/pull/5469) (Weijun-H) -- feat: add name() method to UserDefinedLogicalNode [#5450](https://github.com/apache/arrow-datafusion/pull/5450) (waynexia) -- Comment out description text in issue templates [#5482](https://github.com/apache/arrow-datafusion/pull/5482) (Jefffrey) -- feat: express unsigned literal in substrait [#5448](https://github.com/apache/arrow-datafusion/pull/5448) (waynexia) -- fix: build union schema with child has same column name but qualifier… [#5452](https://github.com/apache/arrow-datafusion/pull/5452) (yukkit) -- refactor: make sum_distinct not to have update and merge [#5474](https://github.com/apache/arrow-datafusion/pull/5474) (Weijun-H) -- `compute_decimal_op_dyn_scalar` should not cast lhs array to decimal array [#5465](https://github.com/apache/arrow-datafusion/pull/5465) (viirya) -- feat: `extensions_options` macro [#5442](https://github.com/apache/arrow-datafusion/pull/5442) (crepererum) -- Enable hash joins on FixedSizeBinary columns [#5461](https://github.com/apache/arrow-datafusion/pull/5461) (maxburke) -- Fix is_distinct from for float NaN values [#5446](https://github.com/apache/arrow-datafusion/pull/5446) (comphead) -- Implement/fix Eq and Hash for Expr and LogicalPlan [#5421](https://github.com/apache/arrow-datafusion/pull/5421) (mslapek) -- [feat]:fast check has column [#5328](https://github.com/apache/arrow-datafusion/pull/5328) (suxiaogang223) -- Parquet sorting benchmark [#5433](https://github.com/apache/arrow-datafusion/pull/5433) (jaylmiller) -- refactor count_distinct to not to have update and merge [#5408](https://github.com/apache/arrow-datafusion/pull/5408) (Weijun-H) -- build(deps): update zstd requirement from 0.11 to 0.12 [#5458](https://github.com/apache/arrow-datafusion/pull/5458) (alamb) -- Upgrade bytes to 1.4 [#5460](https://github.com/apache/arrow-datafusion/pull/5460) (viirya) -- add std,median result to describe method [#5445](https://github.com/apache/arrow-datafusion/pull/5445) (jiangzhx) -- minor: Port more window tests to sqlogictests [#5434](https://github.com/apache/arrow-datafusion/pull/5434) (alamb) -- Use compute_op_dyn_scalar for datatime [#5315](https://github.com/apache/arrow-datafusion/pull/5315) (viirya) -- add a unit test that cover cast bug. [#5443](https://github.com/apache/arrow-datafusion/pull/5443) (jackwener) -- create new `datafusion-execution` crate, start splitting code out [#5432](https://github.com/apache/arrow-datafusion/pull/5432) (alamb) -- minor: fix clippy in nightly. [#5440](https://github.com/apache/arrow-datafusion/pull/5440) (jackwener) -- Support for Sliding Windows Joins with Symmetric Hash Join (SHJ) [#5322](https://github.com/apache/arrow-datafusion/pull/5322) (metesynnada) -- refactor: ParquetExec logical expr. => phys. expr. [#5419](https://github.com/apache/arrow-datafusion/pull/5419) (crepererum) -- Update README.md fix [DataFusion] links [#5438](https://github.com/apache/arrow-datafusion/pull/5438) (jiangzhx) -- add mean result for describe method [#5435](https://github.com/apache/arrow-datafusion/pull/5435) (jiangzhx) -- add expr_fn::median [#5437](https://github.com/apache/arrow-datafusion/pull/5437) (jiangzhx) -- Bug/union wrong casting [#5342](https://github.com/apache/arrow-datafusion/pull/5342) (berkaysynnada) -- reimplement `push_down_projection` and `prune_column`. [#4465](https://github.com/apache/arrow-datafusion/pull/4465) (jackwener) -- Add `expr_fn::stddev` [#5409](https://github.com/apache/arrow-datafusion/pull/5409) (jiangzhx) -- fix nested loop join with literal join filter [#5431](https://github.com/apache/arrow-datafusion/pull/5431) (ygf11) -- add a describe method on DataFrame like Polars [#5226](https://github.com/apache/arrow-datafusion/pull/5226) (jiangzhx) -- Memory reservation & metrics for cross join [#5339](https://github.com/apache/arrow-datafusion/pull/5339) (korowa) -- Optimize count_distinct.size [#5377](https://github.com/apache/arrow-datafusion/pull/5377) (comphead) -- Fix filter pushdown for extension plans [#5425](https://github.com/apache/arrow-datafusion/pull/5425) (thinkharderdev) -- Also push down all filters in TableProvider [#5420](https://github.com/apache/arrow-datafusion/pull/5420) (avantgardnerio) -- Update arrow 34 [#5375](https://github.com/apache/arrow-datafusion/pull/5375) (tustvold) -- Parquet limit pushdown (#5404) [#5416](https://github.com/apache/arrow-datafusion/pull/5416) (tustvold) -- Move file format config.rs to live with the rest of the datasource code [#5406](https://github.com/apache/arrow-datafusion/pull/5406) (alamb) -- Support Zstd compressed files [#5397](https://github.com/apache/arrow-datafusion/pull/5397) (dennybritz) -- Add example of catalog API usage (#5291) [#5326](https://github.com/apache/arrow-datafusion/pull/5326) (jaylmiller) -- Add support for protobuf serialisation of Arrow Map type [#5359](https://github.com/apache/arrow-datafusion/pull/5359) (ahmedriza) -- minor: port window tests to slt (part 2) [#5399](https://github.com/apache/arrow-datafusion/pull/5399) (alamb) -- fix(docs): fix typos [#5403](https://github.com/apache/arrow-datafusion/pull/5403) (WenyXu) -- Try to push down full filter before break-up [#5367](https://github.com/apache/arrow-datafusion/pull/5367) (avantgardnerio) -- enhance: remove more projection. [#5402](https://github.com/apache/arrow-datafusion/pull/5402) (jackwener) -- refactor `push_down_filter` to fix dead-loop and use optimizer_recurse. [#5337](https://github.com/apache/arrow-datafusion/pull/5337) (jackwener) -- feat: eliminate unnecessary projection. [#5366](https://github.com/apache/arrow-datafusion/pull/5366) (jackwener) -- minor: add forgotten large_utf8 [#5393](https://github.com/apache/arrow-datafusion/pull/5393) (jackwener) -- Minor: add tests for subquery to join [#5363](https://github.com/apache/arrow-datafusion/pull/5363) (ygf11) -- bugfix: fix master `bors` problem. [#5395](https://github.com/apache/arrow-datafusion/pull/5395) (jackwener) -- Rule ReplaceDistinctWithAggregate [#5354](https://github.com/apache/arrow-datafusion/pull/5354) (mingmwang) -- chore: add known project ZincObserve [#5376](https://github.com/apache/arrow-datafusion/pull/5376) (hengfeiyang) -- refactor: parquet pruning simplifications [#5386](https://github.com/apache/arrow-datafusion/pull/5386) (crepererum) -- Minor: intersect expressions optimization [#5388](https://github.com/apache/arrow-datafusion/pull/5388) (izveigor) -- docs: clarify spark [#5391](https://github.com/apache/arrow-datafusion/pull/5391) (hyoklee) -- UDF zero params #5378 [#5380](https://github.com/apache/arrow-datafusion/pull/5380) (jaylmiller) -- Minor: added some tests for coercion type [#5389](https://github.com/apache/arrow-datafusion/pull/5389) (izveigor) -- minor: make table resolution an independent function ... [#5373](https://github.com/apache/arrow-datafusion/pull/5373) (MichaelScofield) -- minor: port predicates tests to sqllogictests [#5374](https://github.com/apache/arrow-datafusion/pull/5374) (jackwener) -- Bug fix: Window frame range value outside the type range [#5384](https://github.com/apache/arrow-datafusion/pull/5384) (mustafasrepo) -- Fixed small typos in files of the optimizer [#5356](https://github.com/apache/arrow-datafusion/pull/5356) (izveigor) -- fix: misc phys. expression display bugs [#5387](https://github.com/apache/arrow-datafusion/pull/5387) (crepererum) -- Prepare for 19.0.0 release [#5381](https://github.com/apache/arrow-datafusion/pull/5381) (andygrove) -- minor: disable tpcds-q41 due to not support decorrelate disjunction subquery [#5369](https://github.com/apache/arrow-datafusion/pull/5369) (jackwener) +- Manual changelog for 20.0.0 [#5551](https://github.com/apache/datafusion/pull/5551) (andygrove) +- Prepare for 20.0.0 release [Part 1] [#5539](https://github.com/apache/datafusion/pull/5539) (andygrove) +- chore: deduplicate workspace fields in Cargo.toml [#5519](https://github.com/apache/datafusion/pull/5519) (waynexia) +- Add necessary features to optimizer [#5540](https://github.com/apache/datafusion/pull/5540) (viirya) +- Minor: add the concise way for matching numerics [#5537](https://github.com/apache/datafusion/pull/5537) (izveigor) +- Add UserDefinedLogicalNodeCore [#5521](https://github.com/apache/datafusion/pull/5521) (mslapek) +- revert accidently deleted size code in count_distinct [#5533](https://github.com/apache/datafusion/pull/5533) (comphead) +- fix: return schema of ExtensionPlan instead of its children's [#5514](https://github.com/apache/datafusion/pull/5514) (waynexia) +- Minor: Move `ObjectStoreRegistry` to datafusion_execution crate [#5478](https://github.com/apache/datafusion/pull/5478) (alamb) +- Minor: Add db-benchmark URL to db-benchmark readme [#5503](https://github.com/apache/datafusion/pull/5503) (alamb) +- minor: fix clippy problem in new version. [#5532](https://github.com/apache/datafusion/pull/5532) (jackwener) +- fix: logical merge conflict -- hash_join tests with passing boolean by value [#5531](https://github.com/apache/datafusion/pull/5531) (korowa) +- Memory limited hash join [#5490](https://github.com/apache/datafusion/pull/5490) (korowa) +- minor: improve error style [#5510](https://github.com/apache/datafusion/pull/5510) (alamb) +- feat: add `arrow_cast` function to support supports arbitrary arrow types [#5166](https://github.com/apache/datafusion/pull/5166) (alamb) +- build(deps): update sqlparser requirement from 0.30 to 0.32 w/ API update [#5457](https://github.com/apache/datafusion/pull/5457) (alamb) +- Allow setting config extensions for TaskContext [#5497](https://github.com/apache/datafusion/pull/5497) (mpurins-coralogix) +- Minor: Improve docs for UserDefinedLogicalNode `dyn_eq` and `dyn_hash` [#5515](https://github.com/apache/datafusion/pull/5515) (alamb) +- feat: interval add timestamp [#5491](https://github.com/apache/datafusion/pull/5491) (Weijun-H) +- Pass booleans by value instead of by reference [#5487](https://github.com/apache/datafusion/pull/5487) (maxburke) +- Minor: Move TableProviderFactories up out of `RuntimeEnv` and into `SessionState` [#5477](https://github.com/apache/datafusion/pull/5477) (alamb) +- feat: `ParquetExec` predicate preservation [#5495](https://github.com/apache/datafusion/pull/5495) (crepererum) +- feat: add optimization rules for bitwise operations [#5423](https://github.com/apache/datafusion/pull/5423) (izveigor) +- chore: Remove references from SessionState from physical_plan [#5455](https://github.com/apache/datafusion/pull/5455) (alamb) +- Implement `Debug` for `ExecutionProps` and `VarProvider` [#5489](https://github.com/apache/datafusion/pull/5489) (alamb) +- feat: Support bitwise operations for unsigned integer types [#5476](https://github.com/apache/datafusion/pull/5476) (izveigor) +- Apply workaround for #5444 to `DataFrame::describe` [#5468](https://github.com/apache/datafusion/pull/5468) (jiangzhx) +- feat: eliminate the duplicated sort keys in Order By clause [#5462](https://github.com/apache/datafusion/pull/5462) (jackwener) +- Propagate timezone to created arrays [#5481](https://github.com/apache/datafusion/pull/5481) (maxburke) +- refactor: make GeometricMean not to have update and merge [#5469](https://github.com/apache/datafusion/pull/5469) (Weijun-H) +- feat: add name() method to UserDefinedLogicalNode [#5450](https://github.com/apache/datafusion/pull/5450) (waynexia) +- Comment out description text in issue templates [#5482](https://github.com/apache/datafusion/pull/5482) (Jefffrey) +- feat: express unsigned literal in substrait [#5448](https://github.com/apache/datafusion/pull/5448) (waynexia) +- fix: build union schema with child has same column name but qualifier… [#5452](https://github.com/apache/datafusion/pull/5452) (yukkit) +- refactor: make sum_distinct not to have update and merge [#5474](https://github.com/apache/datafusion/pull/5474) (Weijun-H) +- `compute_decimal_op_dyn_scalar` should not cast lhs array to decimal array [#5465](https://github.com/apache/datafusion/pull/5465) (viirya) +- feat: `extensions_options` macro [#5442](https://github.com/apache/datafusion/pull/5442) (crepererum) +- Enable hash joins on FixedSizeBinary columns [#5461](https://github.com/apache/datafusion/pull/5461) (maxburke) +- Fix is_distinct from for float NaN values [#5446](https://github.com/apache/datafusion/pull/5446) (comphead) +- Implement/fix Eq and Hash for Expr and LogicalPlan [#5421](https://github.com/apache/datafusion/pull/5421) (mslapek) +- [feat]:fast check has column [#5328](https://github.com/apache/datafusion/pull/5328) (suxiaogang223) +- Parquet sorting benchmark [#5433](https://github.com/apache/datafusion/pull/5433) (jaylmiller) +- refactor count_distinct to not to have update and merge [#5408](https://github.com/apache/datafusion/pull/5408) (Weijun-H) +- build(deps): update zstd requirement from 0.11 to 0.12 [#5458](https://github.com/apache/datafusion/pull/5458) (alamb) +- Upgrade bytes to 1.4 [#5460](https://github.com/apache/datafusion/pull/5460) (viirya) +- add std,median result to describe method [#5445](https://github.com/apache/datafusion/pull/5445) (jiangzhx) +- minor: Port more window tests to sqlogictests [#5434](https://github.com/apache/datafusion/pull/5434) (alamb) +- Use compute_op_dyn_scalar for datatime [#5315](https://github.com/apache/datafusion/pull/5315) (viirya) +- add a unit test that cover cast bug. [#5443](https://github.com/apache/datafusion/pull/5443) (jackwener) +- create new `datafusion-execution` crate, start splitting code out [#5432](https://github.com/apache/datafusion/pull/5432) (alamb) +- minor: fix clippy in nightly. [#5440](https://github.com/apache/datafusion/pull/5440) (jackwener) +- Support for Sliding Windows Joins with Symmetric Hash Join (SHJ) [#5322](https://github.com/apache/datafusion/pull/5322) (metesynnada) +- refactor: ParquetExec logical expr. => phys. expr. [#5419](https://github.com/apache/datafusion/pull/5419) (crepererum) +- Update README.md fix [DataFusion] links [#5438](https://github.com/apache/datafusion/pull/5438) (jiangzhx) +- add mean result for describe method [#5435](https://github.com/apache/datafusion/pull/5435) (jiangzhx) +- add expr_fn::median [#5437](https://github.com/apache/datafusion/pull/5437) (jiangzhx) +- Bug/union wrong casting [#5342](https://github.com/apache/datafusion/pull/5342) (berkaysynnada) +- reimplement `push_down_projection` and `prune_column`. [#4465](https://github.com/apache/datafusion/pull/4465) (jackwener) +- Add `expr_fn::stddev` [#5409](https://github.com/apache/datafusion/pull/5409) (jiangzhx) +- fix nested loop join with literal join filter [#5431](https://github.com/apache/datafusion/pull/5431) (ygf11) +- add a describe method on DataFrame like Polars [#5226](https://github.com/apache/datafusion/pull/5226) (jiangzhx) +- Memory reservation & metrics for cross join [#5339](https://github.com/apache/datafusion/pull/5339) (korowa) +- Optimize count_distinct.size [#5377](https://github.com/apache/datafusion/pull/5377) (comphead) +- Fix filter pushdown for extension plans [#5425](https://github.com/apache/datafusion/pull/5425) (thinkharderdev) +- Also push down all filters in TableProvider [#5420](https://github.com/apache/datafusion/pull/5420) (avantgardnerio) +- Update arrow 34 [#5375](https://github.com/apache/datafusion/pull/5375) (tustvold) +- Parquet limit pushdown (#5404) [#5416](https://github.com/apache/datafusion/pull/5416) (tustvold) +- Move file format config.rs to live with the rest of the datasource code [#5406](https://github.com/apache/datafusion/pull/5406) (alamb) +- Support Zstd compressed files [#5397](https://github.com/apache/datafusion/pull/5397) (dennybritz) +- Add example of catalog API usage (#5291) [#5326](https://github.com/apache/datafusion/pull/5326) (jaylmiller) +- Add support for protobuf serialisation of Arrow Map type [#5359](https://github.com/apache/datafusion/pull/5359) (ahmedriza) +- minor: port window tests to slt (part 2) [#5399](https://github.com/apache/datafusion/pull/5399) (alamb) +- fix(docs): fix typos [#5403](https://github.com/apache/datafusion/pull/5403) (WenyXu) +- Try to push down full filter before break-up [#5367](https://github.com/apache/datafusion/pull/5367) (avantgardnerio) +- enhance: remove more projection. [#5402](https://github.com/apache/datafusion/pull/5402) (jackwener) +- refactor `push_down_filter` to fix dead-loop and use optimizer_recurse. [#5337](https://github.com/apache/datafusion/pull/5337) (jackwener) +- feat: eliminate unnecessary projection. [#5366](https://github.com/apache/datafusion/pull/5366) (jackwener) +- minor: add forgotten large_utf8 [#5393](https://github.com/apache/datafusion/pull/5393) (jackwener) +- Minor: add tests for subquery to join [#5363](https://github.com/apache/datafusion/pull/5363) (ygf11) +- bugfix: fix master `bors` problem. [#5395](https://github.com/apache/datafusion/pull/5395) (jackwener) +- Rule ReplaceDistinctWithAggregate [#5354](https://github.com/apache/datafusion/pull/5354) (mingmwang) +- chore: add known project ZincObserve [#5376](https://github.com/apache/datafusion/pull/5376) (hengfeiyang) +- refactor: parquet pruning simplifications [#5386](https://github.com/apache/datafusion/pull/5386) (crepererum) +- Minor: intersect expressions optimization [#5388](https://github.com/apache/datafusion/pull/5388) (izveigor) +- docs: clarify spark [#5391](https://github.com/apache/datafusion/pull/5391) (hyoklee) +- UDF zero params #5378 [#5380](https://github.com/apache/datafusion/pull/5380) (jaylmiller) +- Minor: added some tests for coercion type [#5389](https://github.com/apache/datafusion/pull/5389) (izveigor) +- minor: make table resolution an independent function ... [#5373](https://github.com/apache/datafusion/pull/5373) (MichaelScofield) +- minor: port predicates tests to sqllogictests [#5374](https://github.com/apache/datafusion/pull/5374) (jackwener) +- Bug fix: Window frame range value outside the type range [#5384](https://github.com/apache/datafusion/pull/5384) (mustafasrepo) +- Fixed small typos in files of the optimizer [#5356](https://github.com/apache/datafusion/pull/5356) (izveigor) +- fix: misc phys. expression display bugs [#5387](https://github.com/apache/datafusion/pull/5387) (crepererum) +- Prepare for 19.0.0 release [#5381](https://github.com/apache/datafusion/pull/5381) (andygrove) +- minor: disable tpcds-q41 due to not support decorrelate disjunction subquery [#5369](https://github.com/apache/datafusion/pull/5369) (jackwener) diff --git a/dev/changelog/21.0.0.md b/dev/changelog/21.0.0.md index 98d93b9597a3..0e1b59bc199c 100644 --- a/dev/changelog/21.0.0.md +++ b/dev/changelog/21.0.0.md @@ -17,119 +17,119 @@ under the License. --> -## [21.0.0](https://github.com/apache/arrow-datafusion/tree/21.0.0) (2023-03-24) +## [21.0.0](https://github.com/apache/datafusion/tree/21.0.0) (2023-03-24) -[Full Changelog](https://github.com/apache/arrow-datafusion/compare/20.0.0...21.0.0) +[Full Changelog](https://github.com/apache/datafusion/compare/20.0.0...21.0.0) **Breaking changes:** -- Support arbitrary user defined partition column in `ListingTable` (rather than assuming they are always Dictionary encoded) [#5545](https://github.com/apache/arrow-datafusion/pull/5545) (crepererum) -- Use TableReference for TableScan [#5615](https://github.com/apache/arrow-datafusion/pull/5615) (alamb) -- Update the type of `param_values` to `&[ScalarValue]` in function `replace_params_with_values` [#5640](https://github.com/apache/arrow-datafusion/pull/5640) (HaoYang670) +- Support arbitrary user defined partition column in `ListingTable` (rather than assuming they are always Dictionary encoded) [#5545](https://github.com/apache/datafusion/pull/5545) (crepererum) +- Use TableReference for TableScan [#5615](https://github.com/apache/datafusion/pull/5615) (alamb) +- Update the type of `param_values` to `&[ScalarValue]` in function `replace_params_with_values` [#5640](https://github.com/apache/datafusion/pull/5640) (HaoYang670) **Implemented enhancements:** -- feat: extract (epoch from col) [#5555](https://github.com/apache/arrow-datafusion/pull/5555) (Weijun-H) -- INSERT INTO support for MemTable [#5520](https://github.com/apache/arrow-datafusion/pull/5520) (metesynnada) -- Memory limited nested-loop join [#5564](https://github.com/apache/arrow-datafusion/pull/5564) (korowa) -- Timestamp subtraction and interval operations for `ScalarValue` [#5603](https://github.com/apache/arrow-datafusion/pull/5603) (berkaysynnada) -- Substrait: Add cast expression with bool, integers and decimal128 support [#5137](https://github.com/apache/arrow-datafusion/pull/5137) (nseekhao) -- Support `date_bin` with 2 arguments [#5643](https://github.com/apache/arrow-datafusion/pull/5643) (Weijun-H) -- improve: support combining multiple grouping expressions [#5559](https://github.com/apache/arrow-datafusion/pull/5559) (yukkit) -- Substrait: Add support for WindowFunction [#5653](https://github.com/apache/arrow-datafusion/pull/5653) (nseekhao) -- feat: `date_bin` supports MonthDayNano, microsecond and nanosecond units [#5698](https://github.com/apache/arrow-datafusion/pull/5698) (stuartcarnie) -- Handle serialization of TryCast [#5692](https://github.com/apache/arrow-datafusion/pull/5692) (thinkharderdev) +- feat: extract (epoch from col) [#5555](https://github.com/apache/datafusion/pull/5555) (Weijun-H) +- INSERT INTO support for MemTable [#5520](https://github.com/apache/datafusion/pull/5520) (metesynnada) +- Memory limited nested-loop join [#5564](https://github.com/apache/datafusion/pull/5564) (korowa) +- Timestamp subtraction and interval operations for `ScalarValue` [#5603](https://github.com/apache/datafusion/pull/5603) (berkaysynnada) +- Substrait: Add cast expression with bool, integers and decimal128 support [#5137](https://github.com/apache/datafusion/pull/5137) (nseekhao) +- Support `date_bin` with 2 arguments [#5643](https://github.com/apache/datafusion/pull/5643) (Weijun-H) +- improve: support combining multiple grouping expressions [#5559](https://github.com/apache/datafusion/pull/5559) (yukkit) +- Substrait: Add support for WindowFunction [#5653](https://github.com/apache/datafusion/pull/5653) (nseekhao) +- feat: `date_bin` supports MonthDayNano, microsecond and nanosecond units [#5698](https://github.com/apache/datafusion/pull/5698) (stuartcarnie) +- Handle serialization of TryCast [#5692](https://github.com/apache/datafusion/pull/5692) (thinkharderdev) **Fixed bugs:** -- fix: failed to execute sql with subquery [#5542](https://github.com/apache/arrow-datafusion/pull/5542) (MichaelScofield) -- fix: cast literal to timestamp [#5517](https://github.com/apache/arrow-datafusion/pull/5517) (Weijun-H) -- fix dataframe only boolean/binary column got error on describe [#5585](https://github.com/apache/arrow-datafusion/pull/5585) (jiangzhx) -- Median returns null on empty input instead of error [#5624](https://github.com/apache/arrow-datafusion/pull/5624) (toppyy) -- add CountWildcardRule to fix error on Count(Expr:Wildcard) with DataFrame API [#5627](https://github.com/apache/arrow-datafusion/pull/5627) (jiangzhx) -- fix: correct CountWildcardRule and move analyzer into a new directory. [#5671](https://github.com/apache/arrow-datafusion/pull/5671) (jackwener) +- fix: failed to execute sql with subquery [#5542](https://github.com/apache/datafusion/pull/5542) (MichaelScofield) +- fix: cast literal to timestamp [#5517](https://github.com/apache/datafusion/pull/5517) (Weijun-H) +- fix dataframe only boolean/binary column got error on describe [#5585](https://github.com/apache/datafusion/pull/5585) (jiangzhx) +- Median returns null on empty input instead of error [#5624](https://github.com/apache/datafusion/pull/5624) (toppyy) +- add CountWildcardRule to fix error on Count(Expr:Wildcard) with DataFrame API [#5627](https://github.com/apache/datafusion/pull/5627) (jiangzhx) +- fix: correct CountWildcardRule and move analyzer into a new directory. [#5671](https://github.com/apache/datafusion/pull/5671) (jackwener) **Documentation updates:** -- Minor: improve docstrings for `ObjectStoreRegistry` and `ObjectStoreProvider` [#5577](https://github.com/apache/arrow-datafusion/pull/5577) (alamb) -- Clarify differences of DataFusion with other systems in README.md [#5578](https://github.com/apache/arrow-datafusion/pull/5578) (alamb) -- Minor: Document docs build process [#5687](https://github.com/apache/arrow-datafusion/pull/5687) (alamb) +- Minor: improve docstrings for `ObjectStoreRegistry` and `ObjectStoreProvider` [#5577](https://github.com/apache/datafusion/pull/5577) (alamb) +- Clarify differences of DataFusion with other systems in README.md [#5578](https://github.com/apache/datafusion/pull/5578) (alamb) +- Minor: Document docs build process [#5687](https://github.com/apache/datafusion/pull/5687) (alamb) **Merged pull requests:** -- Refactor DecorrelateWhereExists and add back Distinct if needs [#5345](https://github.com/apache/arrow-datafusion/pull/5345) (ygf11) -- Simplify simplify test cases, support `^`, `&`, `|`, `<<` and `>>` operators for building exprs [#5511](https://github.com/apache/arrow-datafusion/pull/5511) (alamb) -- minor: improve sqllogictest docs [#5553](https://github.com/apache/arrow-datafusion/pull/5553) (alamb) -- Remove unused dependencies found by cargo-machete [#5552](https://github.com/apache/arrow-datafusion/pull/5552) (Jefffrey) -- make AggregateStatistics return the same result whether optimizer disabled or enabled [#5485](https://github.com/apache/arrow-datafusion/pull/5485) (jiangzhx) -- Avoid circular(ish) dependency parquet-test-utils on datafusion, try 2 [#5536](https://github.com/apache/arrow-datafusion/pull/5536) (alamb) -- Enforce ambiguity check whilst normalizing columns [#5509](https://github.com/apache/arrow-datafusion/pull/5509) (Jefffrey) -- Generated changelog for 20.0.0 [#5563](https://github.com/apache/arrow-datafusion/pull/5563) (andygrove) -- fix: failed to execute sql with subquery [#5542](https://github.com/apache/arrow-datafusion/pull/5542) (MichaelScofield) -- Revert describe count() workaround [#5556](https://github.com/apache/arrow-datafusion/pull/5556) (Jefffrey) -- fix: cast literal to timestamp [#5517](https://github.com/apache/arrow-datafusion/pull/5517) (Weijun-H) -- feat: extract (epoch from col) [#5555](https://github.com/apache/arrow-datafusion/pull/5555) (Weijun-H) -- Minor: improve docstrings for `ObjectStoreRegistry` and `ObjectStoreProvider` [#5577](https://github.com/apache/arrow-datafusion/pull/5577) (alamb) -- Minor: Move RuntimeEnv to `datafusion_execution` [#5580](https://github.com/apache/arrow-datafusion/pull/5580) (alamb) -- INSERT INTO support for MemTable [#5520](https://github.com/apache/arrow-datafusion/pull/5520) (metesynnada) -- Minor: restore explicit match to help avoid subtle bugs in the future when new `Expr` variants are added [#5579](https://github.com/apache/arrow-datafusion/pull/5579) (alamb) -- refactor: add more error info when array is empty [#5560](https://github.com/apache/arrow-datafusion/pull/5560) (Weijun-H) -- Memory limited nested-loop join [#5564](https://github.com/apache/arrow-datafusion/pull/5564) (korowa) -- Support catalog.schema.table.column in SQL SELECT and WHERE [#5343](https://github.com/apache/arrow-datafusion/pull/5343) (Jefffrey) -- Minor: clean up aggregates.slt tests [#5599](https://github.com/apache/arrow-datafusion/pull/5599) (alamb) -- Minor: Port more aggregate tests to sqllogictests [#5574](https://github.com/apache/arrow-datafusion/pull/5574) (alamb) -- Add a utility function to get all of the PartitionedFile for an ExecutionPlan [#5572](https://github.com/apache/arrow-datafusion/pull/5572) (yahoNanJing) -- minor: port some join tests to sqllogictests [#5567](https://github.com/apache/arrow-datafusion/pull/5567) (ygf11) -- Support arbitrary user defined partition column in `ListingTable` (rather than assuming they are always Dictionary encoded) [#5545](https://github.com/apache/arrow-datafusion/pull/5545) (crepererum) -- feat: add the similar optimization function for bitwise negative [#5516](https://github.com/apache/arrow-datafusion/pull/5516) (izveigor) -- Clarify differences of DataFusion with other systems in README.md [#5578](https://github.com/apache/arrow-datafusion/pull/5578) (alamb) -- Minor: Add more documentation about table_partition_columns [#5576](https://github.com/apache/arrow-datafusion/pull/5576) (alamb) -- Add Analyzer phase to DataFusion , add basic validation logic to Subquery Plans and Expressions [#5570](https://github.com/apache/arrow-datafusion/pull/5570) (mingmwang) -- Use TableReference for TableScan [#5615](https://github.com/apache/arrow-datafusion/pull/5615) (alamb) -- Preserve casts in rewrite_sort_cols_by_aggs [#5611](https://github.com/apache/arrow-datafusion/pull/5611) (mpurins-coralogix) -- Miscellaneous ArrayData Cleanup [#5612](https://github.com/apache/arrow-datafusion/pull/5612) (tustvold) -- Update substrait requirement from 0.4 to 0.5 [#5620](https://github.com/apache/arrow-datafusion/pull/5620) (dependabot[bot]) -- Do not break pipeline for window queries with GROUPS [#5587](https://github.com/apache/arrow-datafusion/pull/5587) (mustafasrepo) -- fix dataframe only boolean/binary column got error on describe [#5585](https://github.com/apache/arrow-datafusion/pull/5585) (jiangzhx) -- Minor: Add Documentation and Examples to `TableReference` [#5616](https://github.com/apache/arrow-datafusion/pull/5616) (alamb) -- [FOLLOWUP] eliminate the duplicated sort keys in Order By clause [#5607](https://github.com/apache/arrow-datafusion/pull/5607) (mingmwang) -- Update default behaviour of compression algorithms (support multistreams) [#5629](https://github.com/apache/arrow-datafusion/pull/5629) (metesynnada) -- Timestamp subtraction and interval operations for `ScalarValue` [#5603](https://github.com/apache/arrow-datafusion/pull/5603) (berkaysynnada) -- Use modulus dyn kernels for arithmetic expressions [#5634](https://github.com/apache/arrow-datafusion/pull/5634) (viirya) -- Minor: reduce cloning in `infer_placeholder_types` [#5638](https://github.com/apache/arrow-datafusion/pull/5638) (alamb) -- Move `SessionConfig` to `datafusion_execution` [#5581](https://github.com/apache/arrow-datafusion/pull/5581) (alamb) -- Update the type of `param_values` to `&[ScalarValue]` in function `replace_params_with_values` [#5640](https://github.com/apache/arrow-datafusion/pull/5640) (HaoYang670) -- WITH ORDER support on CREATE EXTERNAL TABLE [#5618](https://github.com/apache/arrow-datafusion/pull/5618) (metesynnada) -- Median returns null on empty input instead of error [#5624](https://github.com/apache/arrow-datafusion/pull/5624) (toppyy) -- feat: Memory limited merge join [#5632](https://github.com/apache/arrow-datafusion/pull/5632) (korowa) -- Update rstest requirement from 0.16.0 to 0.17.0 [#5648](https://github.com/apache/arrow-datafusion/pull/5648) (dependabot[bot]) -- add CountWildcardRule to fix error on Count(Expr:Wildcard) with DataFrame API [#5627](https://github.com/apache/arrow-datafusion/pull/5627) (jiangzhx) -- Add OuterReferenceColumn to Expr to represent correlated expression [#5593](https://github.com/apache/arrow-datafusion/pull/5593) (mingmwang) -- Minor: Simplify `Result` [#5659](https://github.com/apache/arrow-datafusion/pull/5659) (comphead) -- minor: remove redundant `DataFusionError` and fix `clippy` [#5669](https://github.com/apache/arrow-datafusion/pull/5669) (jackwener) -- Substrait: Add cast expression with bool, integers and decimal128 support [#5137](https://github.com/apache/arrow-datafusion/pull/5137) (nseekhao) -- Support `date_bin` with 2 arguments [#5643](https://github.com/apache/arrow-datafusion/pull/5643) (Weijun-H) -- Add LogicalPlanSignature and use in the optimizer loop [#5623](https://github.com/apache/arrow-datafusion/pull/5623) (mslapek) -- fix: correct CountWildcardRule and move analyzer into a new directory. [#5671](https://github.com/apache/arrow-datafusion/pull/5671) (jackwener) -- refactoring: added tests and fixed comments in "math_expressions" [#5656](https://github.com/apache/arrow-datafusion/pull/5656) (izveigor) -- improve: support combining multiple grouping expressions [#5559](https://github.com/apache/arrow-datafusion/pull/5559) (yukkit) -- community: polish issue template [#5668](https://github.com/apache/arrow-datafusion/pull/5668) (jackwener) -- minor: correct issue template [#5679](https://github.com/apache/arrow-datafusion/pull/5679) (jackwener) -- Change ObjectStoreRegistry from struct to trait to provide polymorphism [#5543](https://github.com/apache/arrow-datafusion/pull/5543) (yahoNanJing) -- Minor: Add `Extensions::new()` [#5676](https://github.com/apache/arrow-datafusion/pull/5676) (alamb) -- minor: add with_plan for Subquery [#5680](https://github.com/apache/arrow-datafusion/pull/5680) (jackwener) -- minor: reduce replication in `date_bin` implementation [#5673](https://github.com/apache/arrow-datafusion/pull/5673) (alamb) -- Fixes #5500 - Add a GitHub Actions workflow that builds the docs [#5670](https://github.com/apache/arrow-datafusion/pull/5670) (martin-g) -- Minor: port some content to the docs [#5684](https://github.com/apache/arrow-datafusion/pull/5684) (alamb) -- Docs: Add logo back to sidebar [#5688](https://github.com/apache/arrow-datafusion/pull/5688) (alamb) -- Substrait: Add support for WindowFunction [#5653](https://github.com/apache/arrow-datafusion/pull/5653) (nseekhao) -- Add -o option to all e2e benches [#5658](https://github.com/apache/arrow-datafusion/pull/5658) (jaylmiller) -- create table default to null [#5606](https://github.com/apache/arrow-datafusion/pull/5606) (Weijun-H) -- Minor: Document docs build process [#5687](https://github.com/apache/arrow-datafusion/pull/5687) (alamb) -- Minor: change doc formatting to force a republish [#5702](https://github.com/apache/arrow-datafusion/pull/5702) (alamb) -- Move `TaskContext` to datafusion-execution [#5677](https://github.com/apache/arrow-datafusion/pull/5677) (alamb) -- feat: `date_bin` supports MonthDayNano, microsecond and nanosecond units [#5698](https://github.com/apache/arrow-datafusion/pull/5698) (stuartcarnie) -- Return plan error when adding utf8 and timestamp [#5696](https://github.com/apache/arrow-datafusion/pull/5696) (Weijun-H) -- Handle serialization of TryCast [#5692](https://github.com/apache/arrow-datafusion/pull/5692) (thinkharderdev) -- analyzer: move InlineTableScan into Analyzer. [#5683](https://github.com/apache/arrow-datafusion/pull/5683) (jackwener) -- minor: Add doc comments to clarify what Analyzer is for [#5705](https://github.com/apache/arrow-datafusion/pull/5705) (alamb) +- Refactor DecorrelateWhereExists and add back Distinct if needs [#5345](https://github.com/apache/datafusion/pull/5345) (ygf11) +- Simplify simplify test cases, support `^`, `&`, `|`, `<<` and `>>` operators for building exprs [#5511](https://github.com/apache/datafusion/pull/5511) (alamb) +- minor: improve sqllogictest docs [#5553](https://github.com/apache/datafusion/pull/5553) (alamb) +- Remove unused dependencies found by cargo-machete [#5552](https://github.com/apache/datafusion/pull/5552) (Jefffrey) +- make AggregateStatistics return the same result whether optimizer disabled or enabled [#5485](https://github.com/apache/datafusion/pull/5485) (jiangzhx) +- Avoid circular(ish) dependency parquet-test-utils on datafusion, try 2 [#5536](https://github.com/apache/datafusion/pull/5536) (alamb) +- Enforce ambiguity check whilst normalizing columns [#5509](https://github.com/apache/datafusion/pull/5509) (Jefffrey) +- Generated changelog for 20.0.0 [#5563](https://github.com/apache/datafusion/pull/5563) (andygrove) +- fix: failed to execute sql with subquery [#5542](https://github.com/apache/datafusion/pull/5542) (MichaelScofield) +- Revert describe count() workaround [#5556](https://github.com/apache/datafusion/pull/5556) (Jefffrey) +- fix: cast literal to timestamp [#5517](https://github.com/apache/datafusion/pull/5517) (Weijun-H) +- feat: extract (epoch from col) [#5555](https://github.com/apache/datafusion/pull/5555) (Weijun-H) +- Minor: improve docstrings for `ObjectStoreRegistry` and `ObjectStoreProvider` [#5577](https://github.com/apache/datafusion/pull/5577) (alamb) +- Minor: Move RuntimeEnv to `datafusion_execution` [#5580](https://github.com/apache/datafusion/pull/5580) (alamb) +- INSERT INTO support for MemTable [#5520](https://github.com/apache/datafusion/pull/5520) (metesynnada) +- Minor: restore explicit match to help avoid subtle bugs in the future when new `Expr` variants are added [#5579](https://github.com/apache/datafusion/pull/5579) (alamb) +- refactor: add more error info when array is empty [#5560](https://github.com/apache/datafusion/pull/5560) (Weijun-H) +- Memory limited nested-loop join [#5564](https://github.com/apache/datafusion/pull/5564) (korowa) +- Support catalog.schema.table.column in SQL SELECT and WHERE [#5343](https://github.com/apache/datafusion/pull/5343) (Jefffrey) +- Minor: clean up aggregates.slt tests [#5599](https://github.com/apache/datafusion/pull/5599) (alamb) +- Minor: Port more aggregate tests to sqllogictests [#5574](https://github.com/apache/datafusion/pull/5574) (alamb) +- Add a utility function to get all of the PartitionedFile for an ExecutionPlan [#5572](https://github.com/apache/datafusion/pull/5572) (yahoNanJing) +- minor: port some join tests to sqllogictests [#5567](https://github.com/apache/datafusion/pull/5567) (ygf11) +- Support arbitrary user defined partition column in `ListingTable` (rather than assuming they are always Dictionary encoded) [#5545](https://github.com/apache/datafusion/pull/5545) (crepererum) +- feat: add the similar optimization function for bitwise negative [#5516](https://github.com/apache/datafusion/pull/5516) (izveigor) +- Clarify differences of DataFusion with other systems in README.md [#5578](https://github.com/apache/datafusion/pull/5578) (alamb) +- Minor: Add more documentation about table_partition_columns [#5576](https://github.com/apache/datafusion/pull/5576) (alamb) +- Add Analyzer phase to DataFusion , add basic validation logic to Subquery Plans and Expressions [#5570](https://github.com/apache/datafusion/pull/5570) (mingmwang) +- Use TableReference for TableScan [#5615](https://github.com/apache/datafusion/pull/5615) (alamb) +- Preserve casts in rewrite_sort_cols_by_aggs [#5611](https://github.com/apache/datafusion/pull/5611) (mpurins-coralogix) +- Miscellaneous ArrayData Cleanup [#5612](https://github.com/apache/datafusion/pull/5612) (tustvold) +- Update substrait requirement from 0.4 to 0.5 [#5620](https://github.com/apache/datafusion/pull/5620) (dependabot[bot]) +- Do not break pipeline for window queries with GROUPS [#5587](https://github.com/apache/datafusion/pull/5587) (mustafasrepo) +- fix dataframe only boolean/binary column got error on describe [#5585](https://github.com/apache/datafusion/pull/5585) (jiangzhx) +- Minor: Add Documentation and Examples to `TableReference` [#5616](https://github.com/apache/datafusion/pull/5616) (alamb) +- [FOLLOWUP] eliminate the duplicated sort keys in Order By clause [#5607](https://github.com/apache/datafusion/pull/5607) (mingmwang) +- Update default behaviour of compression algorithms (support multistreams) [#5629](https://github.com/apache/datafusion/pull/5629) (metesynnada) +- Timestamp subtraction and interval operations for `ScalarValue` [#5603](https://github.com/apache/datafusion/pull/5603) (berkaysynnada) +- Use modulus dyn kernels for arithmetic expressions [#5634](https://github.com/apache/datafusion/pull/5634) (viirya) +- Minor: reduce cloning in `infer_placeholder_types` [#5638](https://github.com/apache/datafusion/pull/5638) (alamb) +- Move `SessionConfig` to `datafusion_execution` [#5581](https://github.com/apache/datafusion/pull/5581) (alamb) +- Update the type of `param_values` to `&[ScalarValue]` in function `replace_params_with_values` [#5640](https://github.com/apache/datafusion/pull/5640) (HaoYang670) +- WITH ORDER support on CREATE EXTERNAL TABLE [#5618](https://github.com/apache/datafusion/pull/5618) (metesynnada) +- Median returns null on empty input instead of error [#5624](https://github.com/apache/datafusion/pull/5624) (toppyy) +- feat: Memory limited merge join [#5632](https://github.com/apache/datafusion/pull/5632) (korowa) +- Update rstest requirement from 0.16.0 to 0.17.0 [#5648](https://github.com/apache/datafusion/pull/5648) (dependabot[bot]) +- add CountWildcardRule to fix error on Count(Expr:Wildcard) with DataFrame API [#5627](https://github.com/apache/datafusion/pull/5627) (jiangzhx) +- Add OuterReferenceColumn to Expr to represent correlated expression [#5593](https://github.com/apache/datafusion/pull/5593) (mingmwang) +- Minor: Simplify `Result` [#5659](https://github.com/apache/datafusion/pull/5659) (comphead) +- minor: remove redundant `DataFusionError` and fix `clippy` [#5669](https://github.com/apache/datafusion/pull/5669) (jackwener) +- Substrait: Add cast expression with bool, integers and decimal128 support [#5137](https://github.com/apache/datafusion/pull/5137) (nseekhao) +- Support `date_bin` with 2 arguments [#5643](https://github.com/apache/datafusion/pull/5643) (Weijun-H) +- Add LogicalPlanSignature and use in the optimizer loop [#5623](https://github.com/apache/datafusion/pull/5623) (mslapek) +- fix: correct CountWildcardRule and move analyzer into a new directory. [#5671](https://github.com/apache/datafusion/pull/5671) (jackwener) +- refactoring: added tests and fixed comments in "math_expressions" [#5656](https://github.com/apache/datafusion/pull/5656) (izveigor) +- improve: support combining multiple grouping expressions [#5559](https://github.com/apache/datafusion/pull/5559) (yukkit) +- community: polish issue template [#5668](https://github.com/apache/datafusion/pull/5668) (jackwener) +- minor: correct issue template [#5679](https://github.com/apache/datafusion/pull/5679) (jackwener) +- Change ObjectStoreRegistry from struct to trait to provide polymorphism [#5543](https://github.com/apache/datafusion/pull/5543) (yahoNanJing) +- Minor: Add `Extensions::new()` [#5676](https://github.com/apache/datafusion/pull/5676) (alamb) +- minor: add with_plan for Subquery [#5680](https://github.com/apache/datafusion/pull/5680) (jackwener) +- minor: reduce replication in `date_bin` implementation [#5673](https://github.com/apache/datafusion/pull/5673) (alamb) +- Fixes #5500 - Add a GitHub Actions workflow that builds the docs [#5670](https://github.com/apache/datafusion/pull/5670) (martin-g) +- Minor: port some content to the docs [#5684](https://github.com/apache/datafusion/pull/5684) (alamb) +- Docs: Add logo back to sidebar [#5688](https://github.com/apache/datafusion/pull/5688) (alamb) +- Substrait: Add support for WindowFunction [#5653](https://github.com/apache/datafusion/pull/5653) (nseekhao) +- Add -o option to all e2e benches [#5658](https://github.com/apache/datafusion/pull/5658) (jaylmiller) +- create table default to null [#5606](https://github.com/apache/datafusion/pull/5606) (Weijun-H) +- Minor: Document docs build process [#5687](https://github.com/apache/datafusion/pull/5687) (alamb) +- Minor: change doc formatting to force a republish [#5702](https://github.com/apache/datafusion/pull/5702) (alamb) +- Move `TaskContext` to datafusion-execution [#5677](https://github.com/apache/datafusion/pull/5677) (alamb) +- feat: `date_bin` supports MonthDayNano, microsecond and nanosecond units [#5698](https://github.com/apache/datafusion/pull/5698) (stuartcarnie) +- Return plan error when adding utf8 and timestamp [#5696](https://github.com/apache/datafusion/pull/5696) (Weijun-H) +- Handle serialization of TryCast [#5692](https://github.com/apache/datafusion/pull/5692) (thinkharderdev) +- analyzer: move InlineTableScan into Analyzer. [#5683](https://github.com/apache/datafusion/pull/5683) (jackwener) +- minor: Add doc comments to clarify what Analyzer is for [#5705](https://github.com/apache/datafusion/pull/5705) (alamb) diff --git a/dev/changelog/21.1.0.md b/dev/changelog/21.1.0.md index 6cd79c93b3f4..eae0372f09ef 100644 --- a/dev/changelog/21.1.0.md +++ b/dev/changelog/21.1.0.md @@ -17,10 +17,10 @@ under the License. --> -## [21.1.0](https://github.com/apache/arrow-datafusion/tree/21.1.0) (2023-03-24) +## [21.1.0](https://github.com/apache/datafusion/tree/21.1.0) (2023-03-24) -[Full Changelog](https://github.com/apache/arrow-datafusion/compare/21.0.0...21.1.0) +[Full Changelog](https://github.com/apache/datafusion/compare/21.0.0...21.1.0) **Fixed bugs:** -- fix: Use consistent arrow version (do not use both `arrow 34.0.0` and `arrow-array 35.0.0`) [#5765](https://github.com/apache/arrow-datafusion/pull/5765) +- fix: Use consistent arrow version (do not use both `arrow 34.0.0` and `arrow-array 35.0.0`) [#5765](https://github.com/apache/datafusion/pull/5765) diff --git a/dev/changelog/22.0.0.md b/dev/changelog/22.0.0.md index 7e02fb5d5ab3..34d8dfccfaad 100644 --- a/dev/changelog/22.0.0.md +++ b/dev/changelog/22.0.0.md @@ -17,125 +17,125 @@ under the License. --> -## [22.0.0](https://github.com/apache/arrow-datafusion/tree/22.0.0) (2023-04-07) +## [22.0.0](https://github.com/apache/datafusion/tree/22.0.0) (2023-04-07) -[Full Changelog](https://github.com/apache/arrow-datafusion/compare/21.1.0...22.0.0) +[Full Changelog](https://github.com/apache/datafusion/compare/21.1.0...22.0.0) **Breaking changes:** -- Introduce a common trait TreeNode for ExecutionPlan, PhysicalExpr, LogicalExpr, LogicalPlan [#5630](https://github.com/apache/arrow-datafusion/pull/5630) (yahoNanJing) -- Minor: Reduce clones in AnalyzerRule [#5728](https://github.com/apache/arrow-datafusion/pull/5728) (alamb) -- Change required input ordering physical plan API to allow any NULLS FIRST / LAST and ASC / DESC [#5772](https://github.com/apache/arrow-datafusion/pull/5772) (mustafasrepo) -- Remove batch_idx from SortKeyCursor [#5855](https://github.com/apache/arrow-datafusion/pull/5855) (tustvold) -- Top down `EnforceSorting`, Extended testbench for `EnforceSorting` rule to prepare for refactors, additional functionality such as pushdowns over unions [#5661](https://github.com/apache/arrow-datafusion/pull/5661) (mustafasrepo) -- Move `TransactionStart`/`TransactionEnd`/`SetVariable` into `LogicalPlan::Statement` [#5842](https://github.com/apache/arrow-datafusion/pull/5842) (alamb) +- Introduce a common trait TreeNode for ExecutionPlan, PhysicalExpr, LogicalExpr, LogicalPlan [#5630](https://github.com/apache/datafusion/pull/5630) (yahoNanJing) +- Minor: Reduce clones in AnalyzerRule [#5728](https://github.com/apache/datafusion/pull/5728) (alamb) +- Change required input ordering physical plan API to allow any NULLS FIRST / LAST and ASC / DESC [#5772](https://github.com/apache/datafusion/pull/5772) (mustafasrepo) +- Remove batch_idx from SortKeyCursor [#5855](https://github.com/apache/datafusion/pull/5855) (tustvold) +- Top down `EnforceSorting`, Extended testbench for `EnforceSorting` rule to prepare for refactors, additional functionality such as pushdowns over unions [#5661](https://github.com/apache/datafusion/pull/5661) (mustafasrepo) +- Move `TransactionStart`/`TransactionEnd`/`SetVariable` into `LogicalPlan::Statement` [#5842](https://github.com/apache/datafusion/pull/5842) (alamb) **Implemented enhancements:** -- feat: Simplify LOG and POWER functions [#5816](https://github.com/apache/arrow-datafusion/pull/5816) (izveigor) -- feat: Add expression rewrite rules for LIKE and ILIKE [#5819](https://github.com/apache/arrow-datafusion/pull/5819) (Weijun-H) -- feat: BuiltinScalarFunction::Cbrt [#5839](https://github.com/apache/arrow-datafusion/pull/5839) (izveigor) -- feat: Quote column names if required in error messages [#5778](https://github.com/apache/arrow-datafusion/pull/5778) (alamb) +- feat: Simplify LOG and POWER functions [#5816](https://github.com/apache/datafusion/pull/5816) (izveigor) +- feat: Add expression rewrite rules for LIKE and ILIKE [#5819](https://github.com/apache/datafusion/pull/5819) (Weijun-H) +- feat: BuiltinScalarFunction::Cbrt [#5839](https://github.com/apache/datafusion/pull/5839) (izveigor) +- feat: Quote column names if required in error messages [#5778](https://github.com/apache/datafusion/pull/5778) (alamb) **Fixed bugs:** -- Fix parquet pruning when column names have periods [#5710](https://github.com/apache/arrow-datafusion/pull/5710) (alamb) -- fix: parse table name into TableReference on converting substrait read [#5716](https://github.com/apache/arrow-datafusion/pull/5716) (waynexia) -- fix: Enhance case expression type coercion [#5820](https://github.com/apache/arrow-datafusion/pull/5820) (Jefffrey) -- fix: type_coercion support BinaryExpr ( interval , timestamp ). [#5845](https://github.com/apache/arrow-datafusion/pull/5845) (jackwener) -- fix: coerce type for InSubquery and fix timestamp minus timestamp. [#5853](https://github.com/apache/arrow-datafusion/pull/5853) (jackwener) +- Fix parquet pruning when column names have periods [#5710](https://github.com/apache/datafusion/pull/5710) (alamb) +- fix: parse table name into TableReference on converting substrait read [#5716](https://github.com/apache/datafusion/pull/5716) (waynexia) +- fix: Enhance case expression type coercion [#5820](https://github.com/apache/datafusion/pull/5820) (Jefffrey) +- fix: type_coercion support BinaryExpr ( interval , timestamp ). [#5845](https://github.com/apache/datafusion/pull/5845) (jackwener) +- fix: coerce type for InSubquery and fix timestamp minus timestamp. [#5853](https://github.com/apache/datafusion/pull/5853) (jackwener) **Documentation updates:** -- chore: update sql function documentation [#5780](https://github.com/apache/arrow-datafusion/pull/5780) (sanderson) -- Minor: fix docs build [#5795](https://github.com/apache/arrow-datafusion/pull/5795) (alamb) -- Move content from README.md to docs site [#5824](https://github.com/apache/arrow-datafusion/pull/5824) (alamb) -- Update docs/source/contributor-guide/index.md [#5872](https://github.com/apache/arrow-datafusion/pull/5872) (2010YOUY01) +- chore: update sql function documentation [#5780](https://github.com/apache/datafusion/pull/5780) (sanderson) +- Minor: fix docs build [#5795](https://github.com/apache/datafusion/pull/5795) (alamb) +- Move content from README.md to docs site [#5824](https://github.com/apache/datafusion/pull/5824) (alamb) +- Update docs/source/contributor-guide/index.md [#5872](https://github.com/apache/datafusion/pull/5872) (2010YOUY01) **Merged pull requests:** -- Fix parquet pruning when column names have periods [#5710](https://github.com/apache/arrow-datafusion/pull/5710) (alamb) -- Executing LocalLimitExec with no column should not return an Err [#5709](https://github.com/apache/arrow-datafusion/pull/5709) (kazuyukitanimura) -- Minor: Comments to .asf.yaml [#5703](https://github.com/apache/arrow-datafusion/pull/5703) (alamb) -- Exclude some .github files from rat license check [#5720](https://github.com/apache/arrow-datafusion/pull/5720) (andygrove) -- Minor: Trigger docs CI build on changes to asf.yaml [#5726](https://github.com/apache/arrow-datafusion/pull/5726) (alamb) -- Use consistent arrow version (do not use both arrrow 34 and arrow-array 35) [#5724](https://github.com/apache/arrow-datafusion/pull/5724) (tustvold) -- LIMIT edge cases [#5723](https://github.com/apache/arrow-datafusion/pull/5723) (comphead) -- Put the file "type_coercion" in the same named fold and rename the file "mod.rs" [#5736](https://github.com/apache/arrow-datafusion/pull/5736) (HaoYang670) -- fix: parse table name into TableReference on converting substrait read [#5716](https://github.com/apache/arrow-datafusion/pull/5716) (waynexia) -- Modify tests for TPCH explain plans to avoid regressions [#5741](https://github.com/apache/arrow-datafusion/pull/5741) (jiangyinzuo) -- Minor: port select tests to sqllogictests [#5740](https://github.com/apache/arrow-datafusion/pull/5740) (alamb) -- Introduce a common trait TreeNode for ExecutionPlan, PhysicalExpr, LogicalExpr, LogicalPlan [#5630](https://github.com/apache/arrow-datafusion/pull/5630) (yahoNanJing) -- Minor: Reduce clones in AnalyzerRule [#5728](https://github.com/apache/arrow-datafusion/pull/5728) (alamb) -- Upgrade to substrait 0.5.1 and set the version field of produced plans [#5707](https://github.com/apache/arrow-datafusion/pull/5707) (mbrobbel) -- excluding doctests for mac/win64 platform ,Make them consistent with amd64 [#5730](https://github.com/apache/arrow-datafusion/pull/5730) (jiangzhx) -- fix test of benchmarks warning [#5737](https://github.com/apache/arrow-datafusion/pull/5737) (r4ntix) -- Move protoc generation to binary crate (#5718) [#5742](https://github.com/apache/arrow-datafusion/pull/5742) (l0kr) -- Add compare.py to compare the output of multiple benchmarks [#5655](https://github.com/apache/arrow-datafusion/pull/5655) (alamb) -- Move and rename `expr_rewriter.rs` [#5743](https://github.com/apache/arrow-datafusion/pull/5743) (HaoYang670) -- Minor: port some decimal tests to sqllogictests [#5739](https://github.com/apache/arrow-datafusion/pull/5739) (alamb) -- Update to arrow 36 [#5685](https://github.com/apache/arrow-datafusion/pull/5685) (tustvold) -- Minor: Avoid an unecessary contruction in `map_children` some extra plan construction [#5761](https://github.com/apache/arrow-datafusion/pull/5761) (alamb) -- minor: fix typos in planner.rs error msg [#5776](https://github.com/apache/arrow-datafusion/pull/5776) (jiangzhx) -- minor: add timestampstz utf8 conversion test [#5777](https://github.com/apache/arrow-datafusion/pull/5777) (comphead) -- Update prost-build requirement from =0.11.7 to =0.11.8 [#5773](https://github.com/apache/arrow-datafusion/pull/5773) (dependabot[bot]) -- infer right side nullability for LEFT join [#5748](https://github.com/apache/arrow-datafusion/pull/5748) (comphead) -- MINOR: simplify sqllogic test schema check [#5769](https://github.com/apache/arrow-datafusion/pull/5769) (comphead) -- Change required input ordering physical plan API to allow any NULLS FIRST / LAST and ASC / DESC [#5772](https://github.com/apache/arrow-datafusion/pull/5772) (mustafasrepo) -- Support timestamp and interval arithmetic [#5764](https://github.com/apache/arrow-datafusion/pull/5764) (berkaysynnada) -- chore: update sql function documentation [#5780](https://github.com/apache/arrow-datafusion/pull/5780) (sanderson) -- Minor: fix docs build [#5795](https://github.com/apache/arrow-datafusion/pull/5795) (alamb) -- Minor: use workspace arrow-array rather than hard coded 34 [#5794](https://github.com/apache/arrow-datafusion/pull/5794) (alamb) -- Return an error for invalid placeholder `$0` instead of panicking [#5787](https://github.com/apache/arrow-datafusion/pull/5787) (kawadakk) -- Bump substrait version to 0.6.0 [#5798](https://github.com/apache/arrow-datafusion/pull/5798) (jdye64) -- Support `INTERVAL` SQL Type [#5792](https://github.com/apache/arrow-datafusion/pull/5792) (alamb) -- Minor: fix flaking test [#5805](https://github.com/apache/arrow-datafusion/pull/5805) (alamb) -- Incorrect row comparison for tpch queries in benchmarks [#5784](https://github.com/apache/arrow-datafusion/pull/5784) (viirya) -- Update ctor requirement from 0.1.22 to 0.2.0 [#5752](https://github.com/apache/arrow-datafusion/pull/5752) (dependabot[bot]) -- Minor: Port some timestamp tests to sqllogictests [#5804](https://github.com/apache/arrow-datafusion/pull/5804) (alamb) -- Minor: remove typed_min_max_batch_decimal128 [#5809](https://github.com/apache/arrow-datafusion/pull/5809) (izveigor) -- Minor: Run rust workflow on changes to .github [#5758](https://github.com/apache/arrow-datafusion/pull/5758) (alamb) -- Minor: clean up timestamp arithmetic tests [#5803](https://github.com/apache/arrow-datafusion/pull/5803) (alamb) -- improve Filter pushdown to Join [#5770](https://github.com/apache/arrow-datafusion/pull/5770) (mingmwang) -- Support `round()` function with two parameters [#5807](https://github.com/apache/arrow-datafusion/pull/5807) (viirya) -- Fix datatype of case expression [#5734](https://github.com/apache/arrow-datafusion/pull/5734) (mslapek) -- Minor: Add ticket reference as comment [#5822](https://github.com/apache/arrow-datafusion/pull/5822) (alamb) -- Forward port version and Changelog for `21.1.0` [#5767](https://github.com/apache/arrow-datafusion/pull/5767) (alamb) -- Implement LogicalPlan support for transactions [#5827](https://github.com/apache/arrow-datafusion/pull/5827) (avantgardnerio) -- Minor: port more timestamp tests to sqllogictests [#5832](https://github.com/apache/arrow-datafusion/pull/5832) (alamb) -- feat: Simplify LOG and POWER functions [#5816](https://github.com/apache/arrow-datafusion/pull/5816) (izveigor) -- fix: Enhance case expression type coercion [#5820](https://github.com/apache/arrow-datafusion/pull/5820) (Jefffrey) -- feat: Add expression rewrite rules for LIKE and ILIKE [#5819](https://github.com/apache/arrow-datafusion/pull/5819) (Weijun-H) -- fix: type_coercion support BinaryExpr ( interval , timestamp ). [#5845](https://github.com/apache/arrow-datafusion/pull/5845) (jackwener) -- Add primary key information to CreateMemoryTable LogicalPlan node [#5835](https://github.com/apache/arrow-datafusion/pull/5835) (avantgardnerio) -- Expose substrait protoc feature [#5852](https://github.com/apache/arrow-datafusion/pull/5852) (andygrove) -- minor(sqlparser): encapsulate PlanerContext, reduce some clones [#5814](https://github.com/apache/arrow-datafusion/pull/5814) (alamb) -- Remove batch_idx from SortKeyCursor [#5855](https://github.com/apache/arrow-datafusion/pull/5855) (tustvold) -- Improving optimizer performance by eliminating unnecessary sort and distribution passes, add more SymmetricHashJoin improvements [#5754](https://github.com/apache/arrow-datafusion/pull/5754) (metesynnada) -- Poll next open file future while scanning current file [#5800](https://github.com/apache/arrow-datafusion/pull/5800) (nenorbot) -- Top down `EnforceSorting`, Extended testbench for `EnforceSorting` rule to prepare for refactors, additional functionality such as pushdowns over unions [#5661](https://github.com/apache/arrow-datafusion/pull/5661) (mustafasrepo) -- Move `TransactionStart`/`TransactionEnd`/`SetVariable` into `LogicalPlan::Statement` [#5842](https://github.com/apache/arrow-datafusion/pull/5842) (alamb) -- Move content from README.md to docs site [#5824](https://github.com/apache/arrow-datafusion/pull/5824) (alamb) -- Fix `interval` to use consistent units and arrow parser [#5806](https://github.com/apache/arrow-datafusion/pull/5806) (alamb) -- Enhance Asynchronous Performance of SHJ Implementation [#5864](https://github.com/apache/arrow-datafusion/pull/5864) (metesynnada) -- Prove timestamptz <=> timestamp now works [#5869](https://github.com/apache/arrow-datafusion/pull/5869) (comphead) -- Update docs/source/contributor-guide/index.md [#5872](https://github.com/apache/arrow-datafusion/pull/5872) (2010YOUY01) -- fix: coerce type for InSubquery and fix timestamp minus timestamp. [#5853](https://github.com/apache/arrow-datafusion/pull/5853) (jackwener) -- chore: update sqllogictest version 0.13.2. [#5875](https://github.com/apache/arrow-datafusion/pull/5875) (jackwener) -- Minor: Add crates.io / API links to website [#5871](https://github.com/apache/arrow-datafusion/pull/5871) (alamb) -- minor: made `information_schema` pub [#5862](https://github.com/apache/arrow-datafusion/pull/5862) (MichaelScofield) -- Update substrait requirement from 0.6.0 to 0.7.1 [#5876](https://github.com/apache/arrow-datafusion/pull/5876) (dependabot[bot]) -- refactor: move type_coercion to analyzer [#5831](https://github.com/apache/arrow-datafusion/pull/5831) (jackwener) -- feat: BuiltinScalarFunction::Cbrt [#5839](https://github.com/apache/arrow-datafusion/pull/5839) (izveigor) -- [Minor]: Update `architecture.md` to include April tech talks [#5865](https://github.com/apache/arrow-datafusion/pull/5865) (comphead) -- [sqllogictest] Run tests on Windows [#5870](https://github.com/apache/arrow-datafusion/pull/5870) (melgenek) -- Support create object store source tables without depending on environment variables [#5732](https://github.com/apache/arrow-datafusion/pull/5732) (r4ntix) -- feat: Quote column names if required in error messages [#5778](https://github.com/apache/arrow-datafusion/pull/5778) (alamb) -- [MINOR]: Refactor to increase readability [#5874](https://github.com/apache/arrow-datafusion/pull/5874) (mustafasrepo) -- More realistic sort benchmarks [#5881](https://github.com/apache/arrow-datafusion/pull/5881) (tustvold) -- Removal of arithmetic operations for temporal values to binary.rs [#5846](https://github.com/apache/arrow-datafusion/pull/5846) (berkaysynnada) -- Moving PipelineFixer above all rules to use ExecutionPlan APIs [#5880](https://github.com/apache/arrow-datafusion/pull/5880) (metesynnada) -- Add assert on hash children partition count [#5768](https://github.com/apache/arrow-datafusion/pull/5768) (duongcongtoai) -- Use ScalarValue for single input on math expression [#5891](https://github.com/apache/arrow-datafusion/pull/5891) (viirya) -- Generify SortPreservingMerge (#5882) (#5879) [#5886](https://github.com/apache/arrow-datafusion/pull/5886) (tustvold) -- Fix: allow arbitrary exprs in VALUES clause [#5813](https://github.com/apache/arrow-datafusion/pull/5813) (alamb) +- Fix parquet pruning when column names have periods [#5710](https://github.com/apache/datafusion/pull/5710) (alamb) +- Executing LocalLimitExec with no column should not return an Err [#5709](https://github.com/apache/datafusion/pull/5709) (kazuyukitanimura) +- Minor: Comments to .asf.yaml [#5703](https://github.com/apache/datafusion/pull/5703) (alamb) +- Exclude some .github files from rat license check [#5720](https://github.com/apache/datafusion/pull/5720) (andygrove) +- Minor: Trigger docs CI build on changes to asf.yaml [#5726](https://github.com/apache/datafusion/pull/5726) (alamb) +- Use consistent arrow version (do not use both arrrow 34 and arrow-array 35) [#5724](https://github.com/apache/datafusion/pull/5724) (tustvold) +- LIMIT edge cases [#5723](https://github.com/apache/datafusion/pull/5723) (comphead) +- Put the file "type_coercion" in the same named fold and rename the file "mod.rs" [#5736](https://github.com/apache/datafusion/pull/5736) (HaoYang670) +- fix: parse table name into TableReference on converting substrait read [#5716](https://github.com/apache/datafusion/pull/5716) (waynexia) +- Modify tests for TPCH explain plans to avoid regressions [#5741](https://github.com/apache/datafusion/pull/5741) (jiangyinzuo) +- Minor: port select tests to sqllogictests [#5740](https://github.com/apache/datafusion/pull/5740) (alamb) +- Introduce a common trait TreeNode for ExecutionPlan, PhysicalExpr, LogicalExpr, LogicalPlan [#5630](https://github.com/apache/datafusion/pull/5630) (yahoNanJing) +- Minor: Reduce clones in AnalyzerRule [#5728](https://github.com/apache/datafusion/pull/5728) (alamb) +- Upgrade to substrait 0.5.1 and set the version field of produced plans [#5707](https://github.com/apache/datafusion/pull/5707) (mbrobbel) +- excluding doctests for mac/win64 platform ,Make them consistent with amd64 [#5730](https://github.com/apache/datafusion/pull/5730) (jiangzhx) +- fix test of benchmarks warning [#5737](https://github.com/apache/datafusion/pull/5737) (r4ntix) +- Move protoc generation to binary crate (#5718) [#5742](https://github.com/apache/datafusion/pull/5742) (l0kr) +- Add compare.py to compare the output of multiple benchmarks [#5655](https://github.com/apache/datafusion/pull/5655) (alamb) +- Move and rename `expr_rewriter.rs` [#5743](https://github.com/apache/datafusion/pull/5743) (HaoYang670) +- Minor: port some decimal tests to sqllogictests [#5739](https://github.com/apache/datafusion/pull/5739) (alamb) +- Update to arrow 36 [#5685](https://github.com/apache/datafusion/pull/5685) (tustvold) +- Minor: Avoid an unecessary contruction in `map_children` some extra plan construction [#5761](https://github.com/apache/datafusion/pull/5761) (alamb) +- minor: fix typos in planner.rs error msg [#5776](https://github.com/apache/datafusion/pull/5776) (jiangzhx) +- minor: add timestampstz utf8 conversion test [#5777](https://github.com/apache/datafusion/pull/5777) (comphead) +- Update prost-build requirement from =0.11.7 to =0.11.8 [#5773](https://github.com/apache/datafusion/pull/5773) (dependabot[bot]) +- infer right side nullability for LEFT join [#5748](https://github.com/apache/datafusion/pull/5748) (comphead) +- MINOR: simplify sqllogic test schema check [#5769](https://github.com/apache/datafusion/pull/5769) (comphead) +- Change required input ordering physical plan API to allow any NULLS FIRST / LAST and ASC / DESC [#5772](https://github.com/apache/datafusion/pull/5772) (mustafasrepo) +- Support timestamp and interval arithmetic [#5764](https://github.com/apache/datafusion/pull/5764) (berkaysynnada) +- chore: update sql function documentation [#5780](https://github.com/apache/datafusion/pull/5780) (sanderson) +- Minor: fix docs build [#5795](https://github.com/apache/datafusion/pull/5795) (alamb) +- Minor: use workspace arrow-array rather than hard coded 34 [#5794](https://github.com/apache/datafusion/pull/5794) (alamb) +- Return an error for invalid placeholder `$0` instead of panicking [#5787](https://github.com/apache/datafusion/pull/5787) (kawadakk) +- Bump substrait version to 0.6.0 [#5798](https://github.com/apache/datafusion/pull/5798) (jdye64) +- Support `INTERVAL` SQL Type [#5792](https://github.com/apache/datafusion/pull/5792) (alamb) +- Minor: fix flaking test [#5805](https://github.com/apache/datafusion/pull/5805) (alamb) +- Incorrect row comparison for tpch queries in benchmarks [#5784](https://github.com/apache/datafusion/pull/5784) (viirya) +- Update ctor requirement from 0.1.22 to 0.2.0 [#5752](https://github.com/apache/datafusion/pull/5752) (dependabot[bot]) +- Minor: Port some timestamp tests to sqllogictests [#5804](https://github.com/apache/datafusion/pull/5804) (alamb) +- Minor: remove typed_min_max_batch_decimal128 [#5809](https://github.com/apache/datafusion/pull/5809) (izveigor) +- Minor: Run rust workflow on changes to .github [#5758](https://github.com/apache/datafusion/pull/5758) (alamb) +- Minor: clean up timestamp arithmetic tests [#5803](https://github.com/apache/datafusion/pull/5803) (alamb) +- improve Filter pushdown to Join [#5770](https://github.com/apache/datafusion/pull/5770) (mingmwang) +- Support `round()` function with two parameters [#5807](https://github.com/apache/datafusion/pull/5807) (viirya) +- Fix datatype of case expression [#5734](https://github.com/apache/datafusion/pull/5734) (mslapek) +- Minor: Add ticket reference as comment [#5822](https://github.com/apache/datafusion/pull/5822) (alamb) +- Forward port version and Changelog for `21.1.0` [#5767](https://github.com/apache/datafusion/pull/5767) (alamb) +- Implement LogicalPlan support for transactions [#5827](https://github.com/apache/datafusion/pull/5827) (avantgardnerio) +- Minor: port more timestamp tests to sqllogictests [#5832](https://github.com/apache/datafusion/pull/5832) (alamb) +- feat: Simplify LOG and POWER functions [#5816](https://github.com/apache/datafusion/pull/5816) (izveigor) +- fix: Enhance case expression type coercion [#5820](https://github.com/apache/datafusion/pull/5820) (Jefffrey) +- feat: Add expression rewrite rules for LIKE and ILIKE [#5819](https://github.com/apache/datafusion/pull/5819) (Weijun-H) +- fix: type_coercion support BinaryExpr ( interval , timestamp ). [#5845](https://github.com/apache/datafusion/pull/5845) (jackwener) +- Add primary key information to CreateMemoryTable LogicalPlan node [#5835](https://github.com/apache/datafusion/pull/5835) (avantgardnerio) +- Expose substrait protoc feature [#5852](https://github.com/apache/datafusion/pull/5852) (andygrove) +- minor(sqlparser): encapsulate PlanerContext, reduce some clones [#5814](https://github.com/apache/datafusion/pull/5814) (alamb) +- Remove batch_idx from SortKeyCursor [#5855](https://github.com/apache/datafusion/pull/5855) (tustvold) +- Improving optimizer performance by eliminating unnecessary sort and distribution passes, add more SymmetricHashJoin improvements [#5754](https://github.com/apache/datafusion/pull/5754) (metesynnada) +- Poll next open file future while scanning current file [#5800](https://github.com/apache/datafusion/pull/5800) (nenorbot) +- Top down `EnforceSorting`, Extended testbench for `EnforceSorting` rule to prepare for refactors, additional functionality such as pushdowns over unions [#5661](https://github.com/apache/datafusion/pull/5661) (mustafasrepo) +- Move `TransactionStart`/`TransactionEnd`/`SetVariable` into `LogicalPlan::Statement` [#5842](https://github.com/apache/datafusion/pull/5842) (alamb) +- Move content from README.md to docs site [#5824](https://github.com/apache/datafusion/pull/5824) (alamb) +- Fix `interval` to use consistent units and arrow parser [#5806](https://github.com/apache/datafusion/pull/5806) (alamb) +- Enhance Asynchronous Performance of SHJ Implementation [#5864](https://github.com/apache/datafusion/pull/5864) (metesynnada) +- Prove timestamptz <=> timestamp now works [#5869](https://github.com/apache/datafusion/pull/5869) (comphead) +- Update docs/source/contributor-guide/index.md [#5872](https://github.com/apache/datafusion/pull/5872) (2010YOUY01) +- fix: coerce type for InSubquery and fix timestamp minus timestamp. [#5853](https://github.com/apache/datafusion/pull/5853) (jackwener) +- chore: update sqllogictest version 0.13.2. [#5875](https://github.com/apache/datafusion/pull/5875) (jackwener) +- Minor: Add crates.io / API links to website [#5871](https://github.com/apache/datafusion/pull/5871) (alamb) +- minor: made `information_schema` pub [#5862](https://github.com/apache/datafusion/pull/5862) (MichaelScofield) +- Update substrait requirement from 0.6.0 to 0.7.1 [#5876](https://github.com/apache/datafusion/pull/5876) (dependabot[bot]) +- refactor: move type_coercion to analyzer [#5831](https://github.com/apache/datafusion/pull/5831) (jackwener) +- feat: BuiltinScalarFunction::Cbrt [#5839](https://github.com/apache/datafusion/pull/5839) (izveigor) +- [Minor]: Update `architecture.md` to include April tech talks [#5865](https://github.com/apache/datafusion/pull/5865) (comphead) +- [sqllogictest] Run tests on Windows [#5870](https://github.com/apache/datafusion/pull/5870) (melgenek) +- Support create object store source tables without depending on environment variables [#5732](https://github.com/apache/datafusion/pull/5732) (r4ntix) +- feat: Quote column names if required in error messages [#5778](https://github.com/apache/datafusion/pull/5778) (alamb) +- [MINOR]: Refactor to increase readability [#5874](https://github.com/apache/datafusion/pull/5874) (mustafasrepo) +- More realistic sort benchmarks [#5881](https://github.com/apache/datafusion/pull/5881) (tustvold) +- Removal of arithmetic operations for temporal values to binary.rs [#5846](https://github.com/apache/datafusion/pull/5846) (berkaysynnada) +- Moving PipelineFixer above all rules to use ExecutionPlan APIs [#5880](https://github.com/apache/datafusion/pull/5880) (metesynnada) +- Add assert on hash children partition count [#5768](https://github.com/apache/datafusion/pull/5768) (duongcongtoai) +- Use ScalarValue for single input on math expression [#5891](https://github.com/apache/datafusion/pull/5891) (viirya) +- Generify SortPreservingMerge (#5882) (#5879) [#5886](https://github.com/apache/datafusion/pull/5886) (tustvold) +- Fix: allow arbitrary exprs in VALUES clause [#5813](https://github.com/apache/datafusion/pull/5813) (alamb) diff --git a/dev/changelog/23.0.0.md b/dev/changelog/23.0.0.md index abd5f0a0887d..334ee7e4c93c 100644 --- a/dev/changelog/23.0.0.md +++ b/dev/changelog/23.0.0.md @@ -17,127 +17,127 @@ under the License. --> -## [23.0.0](https://github.com/apache/arrow-datafusion/tree/23.0.0) (2023-04-21) +## [23.0.0](https://github.com/apache/datafusion/tree/23.0.0) (2023-04-21) -[Full Changelog](https://github.com/apache/arrow-datafusion/compare/22.0.0...23.0.0) +[Full Changelog](https://github.com/apache/datafusion/compare/22.0.0...23.0.0) **Breaking changes:** -- Add new physical rule CombinePartialFinalAggregate [#5837](https://github.com/apache/arrow-datafusion/pull/5837) (mingmwang) -- feat: Remove compact row since it's no longer used [#6021](https://github.com/apache/arrow-datafusion/pull/6021) (yjshen) +- Add new physical rule CombinePartialFinalAggregate [#5837](https://github.com/apache/datafusion/pull/5837) (mingmwang) +- feat: Remove compact row since it's no longer used [#6021](https://github.com/apache/datafusion/pull/6021) (yjshen) **Implemented enhancements:** -- feat: extend substrait type support, including type variations [#5775](https://github.com/apache/arrow-datafusion/pull/5775) (waynexia) -- feat: hyperbolic functions [#5884](https://github.com/apache/arrow-datafusion/pull/5884) (izveigor) -- feat: Support SQL filter clause for aggregate expressions, add SQL dialect support [#5868](https://github.com/apache/arrow-datafusion/pull/5868) (yjshen) -- feat: Implement the bitwise_not in NotExpr [#5902](https://github.com/apache/arrow-datafusion/pull/5902) (RTEnzyme) -- feat: implementation of the constant "Pi" [#5965](https://github.com/apache/arrow-datafusion/pull/5965) (izveigor) -- feat: allow the customization of analyzer rules [#5963](https://github.com/apache/arrow-datafusion/pull/5963) (waynexia) -- feat: extra sqllogictests for scalar functions [#5887](https://github.com/apache/arrow-datafusion/pull/5887) (izveigor) -- feat: degrees and radians [#6023](https://github.com/apache/arrow-datafusion/pull/6023) (izveigor) -- feat: allow TableScan without projection [#6032](https://github.com/apache/arrow-datafusion/pull/6032) (waynexia) -- feat: support month and year interval for date_bin on constant data [#5982](https://github.com/apache/arrow-datafusion/pull/5982) (NGA-TRAN) +- feat: extend substrait type support, including type variations [#5775](https://github.com/apache/datafusion/pull/5775) (waynexia) +- feat: hyperbolic functions [#5884](https://github.com/apache/datafusion/pull/5884) (izveigor) +- feat: Support SQL filter clause for aggregate expressions, add SQL dialect support [#5868](https://github.com/apache/datafusion/pull/5868) (yjshen) +- feat: Implement the bitwise_not in NotExpr [#5902](https://github.com/apache/datafusion/pull/5902) (RTEnzyme) +- feat: implementation of the constant "Pi" [#5965](https://github.com/apache/datafusion/pull/5965) (izveigor) +- feat: allow the customization of analyzer rules [#5963](https://github.com/apache/datafusion/pull/5963) (waynexia) +- feat: extra sqllogictests for scalar functions [#5887](https://github.com/apache/datafusion/pull/5887) (izveigor) +- feat: degrees and radians [#6023](https://github.com/apache/datafusion/pull/6023) (izveigor) +- feat: allow TableScan without projection [#6032](https://github.com/apache/datafusion/pull/6032) (waynexia) +- feat: support month and year interval for date_bin on constant data [#5982](https://github.com/apache/datafusion/pull/5982) (NGA-TRAN) **Fixed bugs:** -- fix: type coercion for expr/subquery in InSubquery [#5883](https://github.com/apache/arrow-datafusion/pull/5883) (jackwener) -- fix: binaryExpr not supported for LargeUtf8 [#5896](https://github.com/apache/arrow-datafusion/pull/5896) (jackwener) -- fix: match boolean null for substrait [#5947](https://github.com/apache/arrow-datafusion/pull/5947) (waynexia) -- fix: return `NotImplemented` when execute `SELECT INTO` syntax [#5945](https://github.com/apache/arrow-datafusion/pull/5945) (r4ntix) -- fix: support `date - timestamp` and `timestamp - date` [#5960](https://github.com/apache/arrow-datafusion/pull/5960) (jackwener) -- fix: do not depend on time 0.1 [#5972](https://github.com/apache/arrow-datafusion/pull/5972) (crepererum) -- fix: largeUtf8 don't support `is distinct from` [#5993](https://github.com/apache/arrow-datafusion/pull/5993) (jackwener) -- fix: Do not panic on invalid placeholders [#5998](https://github.com/apache/arrow-datafusion/pull/5998) (Weijun-H) -- fix: Allow values in subqueries [#6018](https://github.com/apache/arrow-datafusion/pull/6018) (scsmithr) -- fix: split "union" and "interleave" [#6045](https://github.com/apache/arrow-datafusion/pull/6045) (crepererum) +- fix: type coercion for expr/subquery in InSubquery [#5883](https://github.com/apache/datafusion/pull/5883) (jackwener) +- fix: binaryExpr not supported for LargeUtf8 [#5896](https://github.com/apache/datafusion/pull/5896) (jackwener) +- fix: match boolean null for substrait [#5947](https://github.com/apache/datafusion/pull/5947) (waynexia) +- fix: return `NotImplemented` when execute `SELECT INTO` syntax [#5945](https://github.com/apache/datafusion/pull/5945) (r4ntix) +- fix: support `date - timestamp` and `timestamp - date` [#5960](https://github.com/apache/datafusion/pull/5960) (jackwener) +- fix: do not depend on time 0.1 [#5972](https://github.com/apache/datafusion/pull/5972) (crepererum) +- fix: largeUtf8 don't support `is distinct from` [#5993](https://github.com/apache/datafusion/pull/5993) (jackwener) +- fix: Do not panic on invalid placeholders [#5998](https://github.com/apache/datafusion/pull/5998) (Weijun-H) +- fix: Allow values in subqueries [#6018](https://github.com/apache/datafusion/pull/6018) (scsmithr) +- fix: split "union" and "interleave" [#6045](https://github.com/apache/datafusion/pull/6045) (crepererum) **Documentation updates:** -- docs: improve expressions.md [#5978](https://github.com/apache/arrow-datafusion/pull/5978) (izveigor) -- minor: add developer's guide link in README.md [#6055](https://github.com/apache/arrow-datafusion/pull/6055) (jackwener) +- docs: improve expressions.md [#5978](https://github.com/apache/datafusion/pull/5978) (izveigor) +- minor: add developer's guide link in README.md [#6055](https://github.com/apache/datafusion/pull/6055) (jackwener) **Merged pull requests:** -- fix: type coercion for expr/subquery in InSubquery [#5883](https://github.com/apache/arrow-datafusion/pull/5883) (jackwener) -- Minor: fix the architecture slide url [#5920](https://github.com/apache/arrow-datafusion/pull/5920) (gaoxinge) -- Count agg support multiple expressions [#5908](https://github.com/apache/arrow-datafusion/pull/5908) (allenma) -- feat: extend substrait type support, including type variations [#5775](https://github.com/apache/arrow-datafusion/pull/5775) (waynexia) -- fix: binaryExpr not supported for LargeUtf8 [#5896](https://github.com/apache/arrow-datafusion/pull/5896) (jackwener) -- Clean up SortExec creation and add doc comments [#5889](https://github.com/apache/arrow-datafusion/pull/5889) (alamb) -- Improve file scan time opening metric to include start_next_file [#5900](https://github.com/apache/arrow-datafusion/pull/5900) (alamb) -- Remove `unsafe` code (`transmute`) in datafusion-proto [#5946](https://github.com/apache/arrow-datafusion/pull/5946) (alamb) -- support `date_trunc` with `millisecond` and `microsecond` and keep the same type as the input [#5818](https://github.com/apache/arrow-datafusion/pull/5818) (Weijun-H) -- fix: match boolean null for substrait [#5947](https://github.com/apache/arrow-datafusion/pull/5947) (waynexia) -- feat: hyperbolic functions [#5884](https://github.com/apache/arrow-datafusion/pull/5884) (izveigor) -- Planner: normalize_ident only when enable_ident_normalization is enabled [#5785](https://github.com/apache/arrow-datafusion/pull/5785) (ayushdg) -- Improve contributor guide and main API landing page [#5921](https://github.com/apache/arrow-datafusion/pull/5921) (alamb) -- minor: Refactor row_hash implementation [#5936](https://github.com/apache/arrow-datafusion/pull/5936) (mustafasrepo) -- Specialize Primitive Cursor -- make sorts / merges on a single primitive column faster [#5897](https://github.com/apache/arrow-datafusion/pull/5897) (tustvold) -- just match for BinaryExpr Date/Time +/- Interval [#5932](https://github.com/apache/arrow-datafusion/pull/5932) (jackwener) -- Improve avg/sum Aggregator performance for Decimal [#5866](https://github.com/apache/arrow-datafusion/pull/5866) (mingmwang) -- fix: return `NotImplemented` when execute `SELECT INTO` syntax [#5945](https://github.com/apache/arrow-datafusion/pull/5945) (r4ntix) -- Update arrow 37 [#5782](https://github.com/apache/arrow-datafusion/pull/5782) (tustvold) -- Scalar arithmetic should return error when overflows. [#5811](https://github.com/apache/arrow-datafusion/pull/5811) (zhzy0077) -- feat: Support SQL filter clause for aggregate expressions, add SQL dialect support [#5868](https://github.com/apache/arrow-datafusion/pull/5868) (yjshen) -- test: add Between UT for type_coercion [#5929](https://github.com/apache/arrow-datafusion/pull/5929) (jackwener) -- feat: Implement the bitwise_not in NotExpr [#5902](https://github.com/apache/arrow-datafusion/pull/5902) (RTEnzyme) -- Minor: Add `DFField.with_nullable(bool)` [#5966](https://github.com/apache/arrow-datafusion/pull/5966) (comphead) -- Move error check from pipeline fixer to pipeline checker [#5938](https://github.com/apache/arrow-datafusion/pull/5938) (mustafasrepo) -- when inferring the schema of compressed CSV, decompress before newline-delimited chunking [#5860](https://github.com/apache/arrow-datafusion/pull/5860) (jiangzhx) -- Update sqlparser requirement from 0.32 to 0.33 [#5957](https://github.com/apache/arrow-datafusion/pull/5957) (dependabot[bot]) -- fix: support `date - timestamp` and `timestamp - date` [#5960](https://github.com/apache/arrow-datafusion/pull/5960) (jackwener) -- minor: Add `Expr::between` to clean up boilerplate [#5967](https://github.com/apache/arrow-datafusion/pull/5967) (alamb) -- Add new physical rule CombinePartialFinalAggregate [#5837](https://github.com/apache/arrow-datafusion/pull/5837) (mingmwang) -- Specialized Cursor for StringArray and BinaryArray [#5964](https://github.com/apache/arrow-datafusion/pull/5964) (tustvold) -- add an example of using DataFrame to create a subquery [#5961](https://github.com/apache/arrow-datafusion/pull/5961) (jiangzhx) -- feat: implementation of the constant "Pi" [#5965](https://github.com/apache/arrow-datafusion/pull/5965) (izveigor) -- fix: do not depend on time 0.1 [#5972](https://github.com/apache/arrow-datafusion/pull/5972) (crepererum) -- Minor: fix wrong code comment [#5979](https://github.com/apache/arrow-datafusion/pull/5979) (viirya) -- [DOCS]: consolidate doc site content simplify navbar [#5962](https://github.com/apache/arrow-datafusion/pull/5962) (alamb) -- minor: port some expr tests to sqllogictests, improve error message [#5968](https://github.com/apache/arrow-datafusion/pull/5968) (alamb) -- minor: fix doctest that runs accidentally [#5989](https://github.com/apache/arrow-datafusion/pull/5989) (yjshen) -- Row `AVG` accumulator support Decimal type [#5973](https://github.com/apache/arrow-datafusion/pull/5973) (mingmwang) -- Treat Partition by columns as set for window functions [#5951](https://github.com/apache/arrow-datafusion/pull/5951) (mustafasrepo) -- feat: allow the customization of analyzer rules [#5963](https://github.com/apache/arrow-datafusion/pull/5963) (waynexia) -- feat: extra sqllogictests for scalar functions [#5887](https://github.com/apache/arrow-datafusion/pull/5887) (izveigor) -- docs: improve expressions.md [#5978](https://github.com/apache/arrow-datafusion/pull/5978) (izveigor) -- Minor: Improve doc comments in FileStream [#5898](https://github.com/apache/arrow-datafusion/pull/5898) (alamb) -- Update prost-build requirement from =0.11.8 to =0.11.9 [#5987](https://github.com/apache/arrow-datafusion/pull/5987) (dependabot[bot]) -- Don't use parquet file offset for file range pruning [#5997](https://github.com/apache/arrow-datafusion/pull/5997) (tustvold) -- Streaming Memory Reservation in SHJ [#5937](https://github.com/apache/arrow-datafusion/pull/5937) (metesynnada) -- Temporal datatype support for interval arithmetic [#5971](https://github.com/apache/arrow-datafusion/pull/5971) (berkaysynnada) -- Remove optimize_children and replace with map_children [#5984](https://github.com/apache/arrow-datafusion/pull/5984) (2010YOUY01) -- doc: fix site sidebar logo position [#5990](https://github.com/apache/arrow-datafusion/pull/5990) (Jefffrey) -- fix: largeUtf8 don't support `is distinct from` [#5993](https://github.com/apache/arrow-datafusion/pull/5993) (jackwener) -- Proper resolution for old name in with_column_renamed [#5992](https://github.com/apache/arrow-datafusion/pull/5992) (Jefffrey) -- minor:update subquery example [#6011](https://github.com/apache/arrow-datafusion/pull/6011) (jiangzhx) -- fix: Do not panic on invalid placeholders [#5998](https://github.com/apache/arrow-datafusion/pull/5998) (Weijun-H) -- fix: Allow values in subqueries [#6018](https://github.com/apache/arrow-datafusion/pull/6018) (scsmithr) -- chore: make JsonOpener and CsvOpener public [#6004](https://github.com/apache/arrow-datafusion/pull/6004) (WenyXu) -- update count_wildcard_rule for more scenario [#6010](https://github.com/apache/arrow-datafusion/pull/6010) (jiangzhx) -- Add analyzer output to verbose explain [#6020](https://github.com/apache/arrow-datafusion/pull/6020) (Jefffrey) -- Minor: Fix compilation error [#6029](https://github.com/apache/arrow-datafusion/pull/6029) (viirya) -- Open/Closed bounds for interval arithmetic [#6007](https://github.com/apache/arrow-datafusion/pull/6007) (berkaysynnada) -- minor:move log_plan to utils [#6030](https://github.com/apache/arrow-datafusion/pull/6030) (jiangzhx) -- Use OwnedTableReference for subquery aliases [#6022](https://github.com/apache/arrow-datafusion/pull/6022) (scsmithr) -- Update sql doc [#6025](https://github.com/apache/arrow-datafusion/pull/6025) (Jefffrey) -- feat: Remove compact row since it's no longer used [#6021](https://github.com/apache/arrow-datafusion/pull/6021) (yjshen) -- Add test for handling precision overflow when casting from integer to decimal [#6041](https://github.com/apache/arrow-datafusion/pull/6041) (viirya) -- feat: degrees and radians [#6023](https://github.com/apache/arrow-datafusion/pull/6023) (izveigor) -- feat: allow TableScan without projection [#6032](https://github.com/apache/arrow-datafusion/pull/6032) (waynexia) -- refactor: make `FirstSelector` not to have `update` and `merge` [#6038](https://github.com/apache/arrow-datafusion/pull/6038) (Weijun-H) -- Port math.rs to sqllogictest [#6037](https://github.com/apache/arrow-datafusion/pull/6037) (2010YOUY01) -- Row accumulator support update Scalar values [#6003](https://github.com/apache/arrow-datafusion/pull/6003) (mingmwang) -- Minor: tweak docuement of sort enforcement optimizer rule [#6054](https://github.com/apache/arrow-datafusion/pull/6054) (waynexia) -- Decimal multiply kernel should not cause precision loss [#5980](https://github.com/apache/arrow-datafusion/pull/5980) (viirya) -- Clean up rustdoc and add doc lint [#6044](https://github.com/apache/arrow-datafusion/pull/6044) (alamb) -- Minor: port more create_drop table tests to sqllogictests [#6031](https://github.com/apache/arrow-datafusion/pull/6031) (jiangzhx) -- minor feat: impl FromStr for JoinType enum [#6033](https://github.com/apache/arrow-datafusion/pull/6033) (nkarpov) -- Float support on interval artihmetics [#6048](https://github.com/apache/arrow-datafusion/pull/6048) (metesynnada) -- minor: add developer's guide link in README.md [#6055](https://github.com/apache/arrow-datafusion/pull/6055) (jackwener) -- feat: support month and year interval for date_bin on constant data [#5982](https://github.com/apache/arrow-datafusion/pull/5982) (NGA-TRAN) -- Update rust_lint.sh to include newly added `cargo doc` check [#6070](https://github.com/apache/arrow-datafusion/pull/6070) (alamb) -- Add support for UDAF in physical plan serialization [#6063](https://github.com/apache/arrow-datafusion/pull/6063) (thinkharderdev) -- fix: split "union" and "interleave" [#6045](https://github.com/apache/arrow-datafusion/pull/6045) (crepererum) -- Minor: assorted schema handling debug improvements [#6076](https://github.com/apache/arrow-datafusion/pull/6076) (crepererum) +- fix: type coercion for expr/subquery in InSubquery [#5883](https://github.com/apache/datafusion/pull/5883) (jackwener) +- Minor: fix the architecture slide url [#5920](https://github.com/apache/datafusion/pull/5920) (gaoxinge) +- Count agg support multiple expressions [#5908](https://github.com/apache/datafusion/pull/5908) (allenma) +- feat: extend substrait type support, including type variations [#5775](https://github.com/apache/datafusion/pull/5775) (waynexia) +- fix: binaryExpr not supported for LargeUtf8 [#5896](https://github.com/apache/datafusion/pull/5896) (jackwener) +- Clean up SortExec creation and add doc comments [#5889](https://github.com/apache/datafusion/pull/5889) (alamb) +- Improve file scan time opening metric to include start_next_file [#5900](https://github.com/apache/datafusion/pull/5900) (alamb) +- Remove `unsafe` code (`transmute`) in datafusion-proto [#5946](https://github.com/apache/datafusion/pull/5946) (alamb) +- support `date_trunc` with `millisecond` and `microsecond` and keep the same type as the input [#5818](https://github.com/apache/datafusion/pull/5818) (Weijun-H) +- fix: match boolean null for substrait [#5947](https://github.com/apache/datafusion/pull/5947) (waynexia) +- feat: hyperbolic functions [#5884](https://github.com/apache/datafusion/pull/5884) (izveigor) +- Planner: normalize_ident only when enable_ident_normalization is enabled [#5785](https://github.com/apache/datafusion/pull/5785) (ayushdg) +- Improve contributor guide and main API landing page [#5921](https://github.com/apache/datafusion/pull/5921) (alamb) +- minor: Refactor row_hash implementation [#5936](https://github.com/apache/datafusion/pull/5936) (mustafasrepo) +- Specialize Primitive Cursor -- make sorts / merges on a single primitive column faster [#5897](https://github.com/apache/datafusion/pull/5897) (tustvold) +- just match for BinaryExpr Date/Time +/- Interval [#5932](https://github.com/apache/datafusion/pull/5932) (jackwener) +- Improve avg/sum Aggregator performance for Decimal [#5866](https://github.com/apache/datafusion/pull/5866) (mingmwang) +- fix: return `NotImplemented` when execute `SELECT INTO` syntax [#5945](https://github.com/apache/datafusion/pull/5945) (r4ntix) +- Update arrow 37 [#5782](https://github.com/apache/datafusion/pull/5782) (tustvold) +- Scalar arithmetic should return error when overflows. [#5811](https://github.com/apache/datafusion/pull/5811) (zhzy0077) +- feat: Support SQL filter clause for aggregate expressions, add SQL dialect support [#5868](https://github.com/apache/datafusion/pull/5868) (yjshen) +- test: add Between UT for type_coercion [#5929](https://github.com/apache/datafusion/pull/5929) (jackwener) +- feat: Implement the bitwise_not in NotExpr [#5902](https://github.com/apache/datafusion/pull/5902) (RTEnzyme) +- Minor: Add `DFField.with_nullable(bool)` [#5966](https://github.com/apache/datafusion/pull/5966) (comphead) +- Move error check from pipeline fixer to pipeline checker [#5938](https://github.com/apache/datafusion/pull/5938) (mustafasrepo) +- when inferring the schema of compressed CSV, decompress before newline-delimited chunking [#5860](https://github.com/apache/datafusion/pull/5860) (jiangzhx) +- Update sqlparser requirement from 0.32 to 0.33 [#5957](https://github.com/apache/datafusion/pull/5957) (dependabot[bot]) +- fix: support `date - timestamp` and `timestamp - date` [#5960](https://github.com/apache/datafusion/pull/5960) (jackwener) +- minor: Add `Expr::between` to clean up boilerplate [#5967](https://github.com/apache/datafusion/pull/5967) (alamb) +- Add new physical rule CombinePartialFinalAggregate [#5837](https://github.com/apache/datafusion/pull/5837) (mingmwang) +- Specialized Cursor for StringArray and BinaryArray [#5964](https://github.com/apache/datafusion/pull/5964) (tustvold) +- add an example of using DataFrame to create a subquery [#5961](https://github.com/apache/datafusion/pull/5961) (jiangzhx) +- feat: implementation of the constant "Pi" [#5965](https://github.com/apache/datafusion/pull/5965) (izveigor) +- fix: do not depend on time 0.1 [#5972](https://github.com/apache/datafusion/pull/5972) (crepererum) +- Minor: fix wrong code comment [#5979](https://github.com/apache/datafusion/pull/5979) (viirya) +- [DOCS]: consolidate doc site content simplify navbar [#5962](https://github.com/apache/datafusion/pull/5962) (alamb) +- minor: port some expr tests to sqllogictests, improve error message [#5968](https://github.com/apache/datafusion/pull/5968) (alamb) +- minor: fix doctest that runs accidentally [#5989](https://github.com/apache/datafusion/pull/5989) (yjshen) +- Row `AVG` accumulator support Decimal type [#5973](https://github.com/apache/datafusion/pull/5973) (mingmwang) +- Treat Partition by columns as set for window functions [#5951](https://github.com/apache/datafusion/pull/5951) (mustafasrepo) +- feat: allow the customization of analyzer rules [#5963](https://github.com/apache/datafusion/pull/5963) (waynexia) +- feat: extra sqllogictests for scalar functions [#5887](https://github.com/apache/datafusion/pull/5887) (izveigor) +- docs: improve expressions.md [#5978](https://github.com/apache/datafusion/pull/5978) (izveigor) +- Minor: Improve doc comments in FileStream [#5898](https://github.com/apache/datafusion/pull/5898) (alamb) +- Update prost-build requirement from =0.11.8 to =0.11.9 [#5987](https://github.com/apache/datafusion/pull/5987) (dependabot[bot]) +- Don't use parquet file offset for file range pruning [#5997](https://github.com/apache/datafusion/pull/5997) (tustvold) +- Streaming Memory Reservation in SHJ [#5937](https://github.com/apache/datafusion/pull/5937) (metesynnada) +- Temporal datatype support for interval arithmetic [#5971](https://github.com/apache/datafusion/pull/5971) (berkaysynnada) +- Remove optimize_children and replace with map_children [#5984](https://github.com/apache/datafusion/pull/5984) (2010YOUY01) +- doc: fix site sidebar logo position [#5990](https://github.com/apache/datafusion/pull/5990) (Jefffrey) +- fix: largeUtf8 don't support `is distinct from` [#5993](https://github.com/apache/datafusion/pull/5993) (jackwener) +- Proper resolution for old name in with_column_renamed [#5992](https://github.com/apache/datafusion/pull/5992) (Jefffrey) +- minor:update subquery example [#6011](https://github.com/apache/datafusion/pull/6011) (jiangzhx) +- fix: Do not panic on invalid placeholders [#5998](https://github.com/apache/datafusion/pull/5998) (Weijun-H) +- fix: Allow values in subqueries [#6018](https://github.com/apache/datafusion/pull/6018) (scsmithr) +- chore: make JsonOpener and CsvOpener public [#6004](https://github.com/apache/datafusion/pull/6004) (WenyXu) +- update count_wildcard_rule for more scenario [#6010](https://github.com/apache/datafusion/pull/6010) (jiangzhx) +- Add analyzer output to verbose explain [#6020](https://github.com/apache/datafusion/pull/6020) (Jefffrey) +- Minor: Fix compilation error [#6029](https://github.com/apache/datafusion/pull/6029) (viirya) +- Open/Closed bounds for interval arithmetic [#6007](https://github.com/apache/datafusion/pull/6007) (berkaysynnada) +- minor:move log_plan to utils [#6030](https://github.com/apache/datafusion/pull/6030) (jiangzhx) +- Use OwnedTableReference for subquery aliases [#6022](https://github.com/apache/datafusion/pull/6022) (scsmithr) +- Update sql doc [#6025](https://github.com/apache/datafusion/pull/6025) (Jefffrey) +- feat: Remove compact row since it's no longer used [#6021](https://github.com/apache/datafusion/pull/6021) (yjshen) +- Add test for handling precision overflow when casting from integer to decimal [#6041](https://github.com/apache/datafusion/pull/6041) (viirya) +- feat: degrees and radians [#6023](https://github.com/apache/datafusion/pull/6023) (izveigor) +- feat: allow TableScan without projection [#6032](https://github.com/apache/datafusion/pull/6032) (waynexia) +- refactor: make `FirstSelector` not to have `update` and `merge` [#6038](https://github.com/apache/datafusion/pull/6038) (Weijun-H) +- Port math.rs to sqllogictest [#6037](https://github.com/apache/datafusion/pull/6037) (2010YOUY01) +- Row accumulator support update Scalar values [#6003](https://github.com/apache/datafusion/pull/6003) (mingmwang) +- Minor: tweak docuement of sort enforcement optimizer rule [#6054](https://github.com/apache/datafusion/pull/6054) (waynexia) +- Decimal multiply kernel should not cause precision loss [#5980](https://github.com/apache/datafusion/pull/5980) (viirya) +- Clean up rustdoc and add doc lint [#6044](https://github.com/apache/datafusion/pull/6044) (alamb) +- Minor: port more create_drop table tests to sqllogictests [#6031](https://github.com/apache/datafusion/pull/6031) (jiangzhx) +- minor feat: impl FromStr for JoinType enum [#6033](https://github.com/apache/datafusion/pull/6033) (nkarpov) +- Float support on interval artihmetics [#6048](https://github.com/apache/datafusion/pull/6048) (metesynnada) +- minor: add developer's guide link in README.md [#6055](https://github.com/apache/datafusion/pull/6055) (jackwener) +- feat: support month and year interval for date_bin on constant data [#5982](https://github.com/apache/datafusion/pull/5982) (NGA-TRAN) +- Update rust_lint.sh to include newly added `cargo doc` check [#6070](https://github.com/apache/datafusion/pull/6070) (alamb) +- Add support for UDAF in physical plan serialization [#6063](https://github.com/apache/datafusion/pull/6063) (thinkharderdev) +- fix: split "union" and "interleave" [#6045](https://github.com/apache/datafusion/pull/6045) (crepererum) +- Minor: assorted schema handling debug improvements [#6076](https://github.com/apache/datafusion/pull/6076) (crepererum) diff --git a/dev/changelog/24.0.0.md b/dev/changelog/24.0.0.md index 454475e99752..c55a75840ff8 100644 --- a/dev/changelog/24.0.0.md +++ b/dev/changelog/24.0.0.md @@ -17,147 +17,147 @@ under the License. --> -## [24.0.0](https://github.com/apache/arrow-datafusion/tree/24.0.0) (2023-05-06) +## [24.0.0](https://github.com/apache/datafusion/tree/24.0.0) (2023-05-06) -[Full Changelog](https://github.com/apache/arrow-datafusion/compare/23.0.0...24.0.0) +[Full Changelog](https://github.com/apache/datafusion/compare/23.0.0...24.0.0) **Breaking changes:** -- Extract `LogicalPlan::Create*` DDL related plan structures into `LogicalPlan::Ddl` [#6121](https://github.com/apache/arrow-datafusion/pull/6121) (alamb) -- Complete Extracting LogicalPlan::Drop\* DDL related plan structures into LogicalPlan::Ddl [#6144](https://github.com/apache/arrow-datafusion/pull/6144) (alamb) -- Remove JIT based on cranelift, and `jit` feature [#6164](https://github.com/apache/arrow-datafusion/pull/6164) (yjshen) -- Remove Rayon-based Scheduler [#6169](https://github.com/apache/arrow-datafusion/pull/6169) (tustvold) -- Cleanup join memory accounting in CrossJoin and NestedLoopsJoin [#6188](https://github.com/apache/arrow-datafusion/pull/6188) (tustvold) +- Extract `LogicalPlan::Create*` DDL related plan structures into `LogicalPlan::Ddl` [#6121](https://github.com/apache/datafusion/pull/6121) (alamb) +- Complete Extracting LogicalPlan::Drop\* DDL related plan structures into LogicalPlan::Ddl [#6144](https://github.com/apache/datafusion/pull/6144) (alamb) +- Remove JIT based on cranelift, and `jit` feature [#6164](https://github.com/apache/datafusion/pull/6164) (yjshen) +- Remove Rayon-based Scheduler [#6169](https://github.com/apache/datafusion/pull/6169) (tustvold) +- Cleanup join memory accounting in CrossJoin and NestedLoopsJoin [#6188](https://github.com/apache/datafusion/pull/6188) (tustvold) **Implemented enhancements:** -- feat: date_bin output type same with input type [#6053](https://github.com/apache/arrow-datafusion/pull/6053) (jiacai2050) -- feat: add version field to ExecutionContext [#6052](https://github.com/apache/arrow-datafusion/pull/6052) (Weijun-H) -- Implement Streaming Aggregation: Do not break pipeline in aggregation if group by columns are ordered (V2) [#6124](https://github.com/apache/arrow-datafusion/pull/6124) (mustafasrepo) -- Make decimal multiplication allow precision-loss in DataFusion [#6103](https://github.com/apache/arrow-datafusion/pull/6103) (viirya) -- Add support for scientific notation (remove old unimplemented() for scientific notation) [#6142](https://github.com/apache/arrow-datafusion/pull/6142) (2010YOUY01) -- Support DROP SCHEMA statements [#6096](https://github.com/apache/arrow-datafusion/pull/6096) (jaylmiller) -- feat: make threshold for using scalar update in aggregate configurable [#6166](https://github.com/apache/arrow-datafusion/pull/6166) (yjshen) -- Adaptive in-memory sort (~2x faster) (#5879) [#6163](https://github.com/apache/arrow-datafusion/pull/6163) (tustvold) -- Add Support for Ordering Equivalence [#6160](https://github.com/apache/arrow-datafusion/pull/6160) (mustafasrepo) -- Sum and Avg should be able to take inputs of type Dictionary [#6197](https://github.com/apache/arrow-datafusion/pull/6197) (viirya) -- CLI: Add support for AWS named profiles [#6161](https://github.com/apache/arrow-datafusion/pull/6161) (mr-brobot) -- feat: concurrent physical planning [#6138](https://github.com/apache/arrow-datafusion/pull/6138) (crepererum) -- feat: allow scalars to appear as the LHS arg in arithmetic operations on temporal values [#6196](https://github.com/apache/arrow-datafusion/pull/6196) (kyle-mccarthy) -- Select Into support [#6219](https://github.com/apache/arrow-datafusion/pull/6219) (berkaysynnada) +- feat: date_bin output type same with input type [#6053](https://github.com/apache/datafusion/pull/6053) (jiacai2050) +- feat: add version field to ExecutionContext [#6052](https://github.com/apache/datafusion/pull/6052) (Weijun-H) +- Implement Streaming Aggregation: Do not break pipeline in aggregation if group by columns are ordered (V2) [#6124](https://github.com/apache/datafusion/pull/6124) (mustafasrepo) +- Make decimal multiplication allow precision-loss in DataFusion [#6103](https://github.com/apache/datafusion/pull/6103) (viirya) +- Add support for scientific notation (remove old unimplemented() for scientific notation) [#6142](https://github.com/apache/datafusion/pull/6142) (2010YOUY01) +- Support DROP SCHEMA statements [#6096](https://github.com/apache/datafusion/pull/6096) (jaylmiller) +- feat: make threshold for using scalar update in aggregate configurable [#6166](https://github.com/apache/datafusion/pull/6166) (yjshen) +- Adaptive in-memory sort (~2x faster) (#5879) [#6163](https://github.com/apache/datafusion/pull/6163) (tustvold) +- Add Support for Ordering Equivalence [#6160](https://github.com/apache/datafusion/pull/6160) (mustafasrepo) +- Sum and Avg should be able to take inputs of type Dictionary [#6197](https://github.com/apache/datafusion/pull/6197) (viirya) +- CLI: Add support for AWS named profiles [#6161](https://github.com/apache/datafusion/pull/6161) (mr-brobot) +- feat: concurrent physical planning [#6138](https://github.com/apache/datafusion/pull/6138) (crepererum) +- feat: allow scalars to appear as the LHS arg in arithmetic operations on temporal values [#6196](https://github.com/apache/datafusion/pull/6196) (kyle-mccarthy) +- Select Into support [#6219](https://github.com/apache/datafusion/pull/6219) (berkaysynnada) **Fixed bugs:** -- fix: Use arrow bitwise_op as binary_kernels [#6093](https://github.com/apache/arrow-datafusion/pull/6093) (RTEnzyme) -- fix: allow group by same expr in Aggregate [#6091](https://github.com/apache/arrow-datafusion/pull/6091) (jackwener) -- fix: null handling of `ScalarValue::Struct` [#6085](https://github.com/apache/arrow-datafusion/pull/6085) (crepererum) -- fix: make simplify_expressions use a single schema for resolution [#6077](https://github.com/apache/arrow-datafusion/pull/6077) (wolffcm) -- fix: incorrect column pruning in sql with window operations [#6039](https://github.com/apache/arrow-datafusion/pull/6039) (yukkit) -- fix: allow group by same expr in Aggregate [#6097](https://github.com/apache/arrow-datafusion/pull/6097) (jackwener) -- fix: `reassign_predicate_columns` w/ in-list expr [#6114](https://github.com/apache/arrow-datafusion/pull/6114) (crepererum) -- fix: `common_subexpr_eliminate` and aggregates [#6129](https://github.com/apache/arrow-datafusion/pull/6129) (crepererum) -- fix: type coercion ignore prevent `Interval - Date` [#6177](https://github.com/apache/arrow-datafusion/pull/6177) (jackwener) -- fix: `common_subexpr_eliminate` w/ aggregates and relations [#6199](https://github.com/apache/arrow-datafusion/pull/6199) (crepererum) -- fix: slt fail due to bors problem [#6242](https://github.com/apache/arrow-datafusion/pull/6242) (jackwener) +- fix: Use arrow bitwise_op as binary_kernels [#6093](https://github.com/apache/datafusion/pull/6093) (RTEnzyme) +- fix: allow group by same expr in Aggregate [#6091](https://github.com/apache/datafusion/pull/6091) (jackwener) +- fix: null handling of `ScalarValue::Struct` [#6085](https://github.com/apache/datafusion/pull/6085) (crepererum) +- fix: make simplify_expressions use a single schema for resolution [#6077](https://github.com/apache/datafusion/pull/6077) (wolffcm) +- fix: incorrect column pruning in sql with window operations [#6039](https://github.com/apache/datafusion/pull/6039) (yukkit) +- fix: allow group by same expr in Aggregate [#6097](https://github.com/apache/datafusion/pull/6097) (jackwener) +- fix: `reassign_predicate_columns` w/ in-list expr [#6114](https://github.com/apache/datafusion/pull/6114) (crepererum) +- fix: `common_subexpr_eliminate` and aggregates [#6129](https://github.com/apache/datafusion/pull/6129) (crepererum) +- fix: type coercion ignore prevent `Interval - Date` [#6177](https://github.com/apache/datafusion/pull/6177) (jackwener) +- fix: `common_subexpr_eliminate` w/ aggregates and relations [#6199](https://github.com/apache/datafusion/pull/6199) (crepererum) +- fix: slt fail due to bors problem [#6242](https://github.com/apache/datafusion/pull/6242) (jackwener) **Documentation updates:** -- chore: Update api docs for `SessionContext`, `TaskContext`, etc [#6106](https://github.com/apache/arrow-datafusion/pull/6106) (alamb) -- docs: consolidate datafusion-cli docs [#6218](https://github.com/apache/arrow-datafusion/pull/6218) (alamb) +- chore: Update api docs for `SessionContext`, `TaskContext`, etc [#6106](https://github.com/apache/datafusion/pull/6106) (alamb) +- docs: consolidate datafusion-cli docs [#6218](https://github.com/apache/datafusion/pull/6218) (alamb) **Merged pull requests:** -- fix: Use arrow bitwise_op as binary_kernels [#6093](https://github.com/apache/arrow-datafusion/pull/6093) (RTEnzyme) -- fix: allow group by same expr in Aggregate [#6091](https://github.com/apache/arrow-datafusion/pull/6091) (jackwener) -- Update DataFusion architecture documentation [#6056](https://github.com/apache/arrow-datafusion/pull/6056) (alamb) -- feat: date_bin output type same with input type [#6053](https://github.com/apache/arrow-datafusion/pull/6053) (jiacai2050) -- fix: null handling of `ScalarValue::Struct` [#6085](https://github.com/apache/arrow-datafusion/pull/6085) (crepererum) -- fix: make simplify_expressions use a single schema for resolution [#6077](https://github.com/apache/arrow-datafusion/pull/6077) (wolffcm) -- fix: incorrect column pruning in sql with window operations [#6039](https://github.com/apache/arrow-datafusion/pull/6039) (yukkit) -- Substrait: Handle multiple select with the same table/columns in INTERSECT [#6059](https://github.com/apache/arrow-datafusion/pull/6059) (nseekhao) -- fix: allow group by same expr in Aggregate [#6097](https://github.com/apache/arrow-datafusion/pull/6097) (jackwener) -- chore(deps): update regex and regex-syntax requirement from 0.6.28 to 0.7.1 [#6095](https://github.com/apache/arrow-datafusion/pull/6095) (alamb) -- test: more sqllogicaltest for GROUPBY. [#6100](https://github.com/apache/arrow-datafusion/pull/6100) (jackwener) -- minor: replace unwrap() with Err [#6101](https://github.com/apache/arrow-datafusion/pull/6101) (jackwener) -- Remove temporal to kernels_arrow [#6069](https://github.com/apache/arrow-datafusion/pull/6069) (berkaysynnada) -- feat: add version field to ExecutionContext [#6052](https://github.com/apache/arrow-datafusion/pull/6052) (Weijun-H) -- chore(deps): update substrait requirement from 0.7.1 to 0.8.0 [#6105](https://github.com/apache/arrow-datafusion/pull/6105) (dependabot[bot]) -- refactor(sqllogictests): port group by test to sqllogic [#6088](https://github.com/apache/arrow-datafusion/pull/6088) (aprimadi) -- Use arrow kernels for bitwise operations [#6098](https://github.com/apache/arrow-datafusion/pull/6098) (alamb) -- Unordered PARTITION BY column implementation (to prevent pipeline breaking) [#6036](https://github.com/apache/arrow-datafusion/pull/6036) (mustafasrepo) -- Add from_string_hash_map() method for SessionConfig [#6111](https://github.com/apache/arrow-datafusion/pull/6111) (yahoNanJing) -- Parallelise collect_partitioned [#6109](https://github.com/apache/arrow-datafusion/pull/6109) (tustvold) -- Fix column mapping in output_ordering() and output_partitioning() for ProjectionExec and AggregateExec [#6113](https://github.com/apache/arrow-datafusion/pull/6113) (mingmwang) -- fix: `reassign_predicate_columns` w/ in-list expr [#6114](https://github.com/apache/arrow-datafusion/pull/6114) (crepererum) -- Update to arrow 38 [#6115](https://github.com/apache/arrow-datafusion/pull/6115) (tustvold) -- Minor: Split `DmlStatement` into its own module [#6120](https://github.com/apache/arrow-datafusion/pull/6120) (alamb) -- Remove input schema from PhysicalExpr, move the validation logic to physical expression planner [#6122](https://github.com/apache/arrow-datafusion/pull/6122) (mingmwang) -- Move Scalar Subquery validation logic to the Analyzer [#6084](https://github.com/apache/arrow-datafusion/pull/6084) (mingmwang) -- chore: Update api docs for `SessionContext`, `TaskContext`, etc [#6106](https://github.com/apache/arrow-datafusion/pull/6106) (alamb) -- Implement Streaming Aggregation: Do not break pipeline in aggregation if group by columns are ordered (V2) [#6124](https://github.com/apache/arrow-datafusion/pull/6124) (mustafasrepo) -- Minor: Mark default_session_builder deprecated, add since markings [#6123](https://github.com/apache/arrow-datafusion/pull/6123) (alamb) -- [MINOR]: Add GroupByOrderMode to the AggregateExec display [#6141](https://github.com/apache/arrow-datafusion/pull/6141) (mustafasrepo) -- Extract `LogicalPlan::Create*` DDL related plan structures into `LogicalPlan::Ddl` [#6121](https://github.com/apache/arrow-datafusion/pull/6121) (alamb) -- Fix broken build due to merge conflict [#6143](https://github.com/apache/arrow-datafusion/pull/6143) (alamb) -- [Bug fix] Source projection output ordering [#6136](https://github.com/apache/arrow-datafusion/pull/6136) (mustafasrepo) -- Make decimal multiplication allow precision-loss in DataFusion [#6103](https://github.com/apache/arrow-datafusion/pull/6103) (viirya) -- MemoryExec INSERT INTO refactor to use ExecutionPlan [#6049](https://github.com/apache/arrow-datafusion/pull/6049) (metesynnada) -- Complete Extracting LogicalPlan::Drop\* DDL related plan structures into LogicalPlan::Ddl [#6144](https://github.com/apache/arrow-datafusion/pull/6144) (alamb) -- fix: `common_subexpr_eliminate` and aggregates [#6129](https://github.com/apache/arrow-datafusion/pull/6129) (crepererum) -- Add support for scientific notation (remove old unimplemented() for scientific notation) [#6142](https://github.com/apache/arrow-datafusion/pull/6142) (2010YOUY01) -- Use ParquetObjectReader [#6130](https://github.com/apache/arrow-datafusion/pull/6130) (tustvold) -- Add bench.sh script to automate benchmarking DataFusion against itself [#6131](https://github.com/apache/arrow-datafusion/pull/6131) (alamb) -- Port test in select.rs to sqllogic [#6158](https://github.com/apache/arrow-datafusion/pull/6158) (aprimadi) -- Support uint64 literals [#6146](https://github.com/apache/arrow-datafusion/pull/6146) (jackkleeman) -- Remove JIT based on cranelift, and `jit` feature [#6164](https://github.com/apache/arrow-datafusion/pull/6164) (yjshen) -- minor: add decimal roundtrip tests for the row format [#6165](https://github.com/apache/arrow-datafusion/pull/6165) (yjshen) -- Support DROP SCHEMA statements [#6096](https://github.com/apache/arrow-datafusion/pull/6096) (jaylmiller) -- Improve `compare.py` output to use `min` times and better column titles [#6134](https://github.com/apache/arrow-datafusion/pull/6134) (alamb) -- Fix GroupByOrderMode documentation [#6168](https://github.com/apache/arrow-datafusion/pull/6168) (aprimadi) -- Remove Rayon-based Scheduler [#6169](https://github.com/apache/arrow-datafusion/pull/6169) (tustvold) -- [Minor] Fix typo in SQL data types doc [#6178](https://github.com/apache/arrow-datafusion/pull/6178) (alamb) -- feat: make threshold for using scalar update in aggregate configurable [#6166](https://github.com/apache/arrow-datafusion/pull/6166) (yjshen) -- Handle ScalarValue::Dictionary in add_to_row and update_avg_to_row [#6175](https://github.com/apache/arrow-datafusion/pull/6175) (viirya) -- chore(datetime_expression): support uppercase granularity for `DATE_TRUNC` func [#6185](https://github.com/apache/arrow-datafusion/pull/6185) (appletreeisyellow) -- Simplify HashJoin memory accounting [#6170](https://github.com/apache/arrow-datafusion/pull/6170) (tustvold) -- Adaptive in-memory sort (~2x faster) (#5879) [#6163](https://github.com/apache/arrow-datafusion/pull/6163) (tustvold) -- refactor: optimizer shouldn't assume failed rules are internal error [#6184](https://github.com/apache/arrow-datafusion/pull/6184) (wolffcm) -- fix: type coercion ignore prevent `Interval - Date` [#6177](https://github.com/apache/arrow-datafusion/pull/6177) (jackwener) -- minor: remove default match for function signature [#6186](https://github.com/apache/arrow-datafusion/pull/6186) (alamb) -- Add Support for Ordering Equivalence [#6160](https://github.com/apache/arrow-datafusion/pull/6160) (mustafasrepo) -- Remove db-benchmark (and add them to arrow-datafusion-python repo instead) [#6204](https://github.com/apache/arrow-datafusion/pull/6204) (andygrove) -- fix: `common_subexpr_eliminate` w/ aggregates and relations [#6199](https://github.com/apache/arrow-datafusion/pull/6199) (crepererum) -- Add parquet filter and sort to bench.sh [#6172](https://github.com/apache/arrow-datafusion/pull/6172) (alamb) -- minor: remove unused code in binary.rs [#6203](https://github.com/apache/arrow-datafusion/pull/6203) (alamb) -- Sum and Avg should be able to take inputs of type Dictionary [#6197](https://github.com/apache/arrow-datafusion/pull/6197) (viirya) -- minor: fix tiny typo in table.rs [#6220](https://github.com/apache/arrow-datafusion/pull/6220) (okue) -- Lower some log levels from `debug` to `trace` during plan execution [#6193](https://github.com/apache/arrow-datafusion/pull/6193) (alamb) -- minor: fix typo in comments. [#6225](https://github.com/apache/arrow-datafusion/pull/6225) (jackwener) -- CLI: Add support for AWS named profiles [#6161](https://github.com/apache/arrow-datafusion/pull/6161) (mr-brobot) -- feat: concurrent physical planning [#6138](https://github.com/apache/arrow-datafusion/pull/6138) (crepererum) -- Cleanup join memory accounting in CrossJoin and NestedLoopsJoin [#6188](https://github.com/apache/arrow-datafusion/pull/6188) (tustvold) -- minor: Move shift_date tests to be with code [#6200](https://github.com/apache/arrow-datafusion/pull/6200) (alamb) -- Minor: rename variable for clarity [#6229](https://github.com/apache/arrow-datafusion/pull/6229) (alamb) -- feat: allow scalars to appear as the LHS arg in arithmetic operations on temporal values [#6196](https://github.com/apache/arrow-datafusion/pull/6196) (kyle-mccarthy) -- Supply consistent format output for FileScanConfig params [#6202](https://github.com/apache/arrow-datafusion/pull/6202) (tz70s) -- minor: refactor code to avoid same code. [#6222](https://github.com/apache/arrow-datafusion/pull/6222) (jackwener) -- minor: change error type for window expressions [#6231](https://github.com/apache/arrow-datafusion/pull/6231) (comphead) -- Add sqllogic test coverage for interval arithmetic [#6201](https://github.com/apache/arrow-datafusion/pull/6201) (alamb) -- Simplify MemoryWriteExec [#6154](https://github.com/apache/arrow-datafusion/pull/6154) (tustvold) -- refactor: separate get_result_type from `coerce_type` [#6221](https://github.com/apache/arrow-datafusion/pull/6221) (jackwener) -- fix: slt fail due to bors problem [#6242](https://github.com/apache/arrow-datafusion/pull/6242) (jackwener) -- Port tests in errors.rs to sqllogictest [#6239](https://github.com/apache/arrow-datafusion/pull/6239) (parkma99) -- Select Into support [#6219](https://github.com/apache/arrow-datafusion/pull/6219) (berkaysynnada) -- refactor: use get_result_type to replace binary_operator_data_type. [#6241](https://github.com/apache/arrow-datafusion/pull/6241) (jackwener) -- Port test in union.rs to sqllogic [#6224](https://github.com/apache/arrow-datafusion/pull/6224) (parkma99) -- Port tests in expr.rs to sqllogictest [#6240](https://github.com/apache/arrow-datafusion/pull/6240) (parkma99) -- Port tests in cast.rs to sqllogictest [#6244](https://github.com/apache/arrow-datafusion/pull/6244) (parkma99) -- Port tests in identifiers.rs to sqllogictest [#6245](https://github.com/apache/arrow-datafusion/pull/6245) (parkma99) -- Minor: allow creating infinite external tables via SQL (for testing) [#6235](https://github.com/apache/arrow-datafusion/pull/6235) (alamb) -- Minor: consolidate test data [#6217](https://github.com/apache/arrow-datafusion/pull/6217) (alamb) -- docs: consolidate datafusion-cli docs [#6218](https://github.com/apache/arrow-datafusion/pull/6218) (alamb) -- Port tests in wildcard.rs to sqllogictest [#6249](https://github.com/apache/arrow-datafusion/pull/6249) (masanobbb) -- Simplify and speed up MemoryExec insert [#6236](https://github.com/apache/arrow-datafusion/pull/6236) (alamb) -- minor: fix typo [#6253](https://github.com/apache/arrow-datafusion/pull/6253) (okue) -- refactor: separate get_common_type / get_result_type for temporal type [#6250](https://github.com/apache/arrow-datafusion/pull/6250) (jackwener) -- Port tests in set_variable.rs to sqllogictest [#6255](https://github.com/apache/arrow-datafusion/pull/6255) (parkma99) +- fix: Use arrow bitwise_op as binary_kernels [#6093](https://github.com/apache/datafusion/pull/6093) (RTEnzyme) +- fix: allow group by same expr in Aggregate [#6091](https://github.com/apache/datafusion/pull/6091) (jackwener) +- Update DataFusion architecture documentation [#6056](https://github.com/apache/datafusion/pull/6056) (alamb) +- feat: date_bin output type same with input type [#6053](https://github.com/apache/datafusion/pull/6053) (jiacai2050) +- fix: null handling of `ScalarValue::Struct` [#6085](https://github.com/apache/datafusion/pull/6085) (crepererum) +- fix: make simplify_expressions use a single schema for resolution [#6077](https://github.com/apache/datafusion/pull/6077) (wolffcm) +- fix: incorrect column pruning in sql with window operations [#6039](https://github.com/apache/datafusion/pull/6039) (yukkit) +- Substrait: Handle multiple select with the same table/columns in INTERSECT [#6059](https://github.com/apache/datafusion/pull/6059) (nseekhao) +- fix: allow group by same expr in Aggregate [#6097](https://github.com/apache/datafusion/pull/6097) (jackwener) +- chore(deps): update regex and regex-syntax requirement from 0.6.28 to 0.7.1 [#6095](https://github.com/apache/datafusion/pull/6095) (alamb) +- test: more sqllogicaltest for GROUPBY. [#6100](https://github.com/apache/datafusion/pull/6100) (jackwener) +- minor: replace unwrap() with Err [#6101](https://github.com/apache/datafusion/pull/6101) (jackwener) +- Remove temporal to kernels_arrow [#6069](https://github.com/apache/datafusion/pull/6069) (berkaysynnada) +- feat: add version field to ExecutionContext [#6052](https://github.com/apache/datafusion/pull/6052) (Weijun-H) +- chore(deps): update substrait requirement from 0.7.1 to 0.8.0 [#6105](https://github.com/apache/datafusion/pull/6105) (dependabot[bot]) +- refactor(sqllogictests): port group by test to sqllogic [#6088](https://github.com/apache/datafusion/pull/6088) (aprimadi) +- Use arrow kernels for bitwise operations [#6098](https://github.com/apache/datafusion/pull/6098) (alamb) +- Unordered PARTITION BY column implementation (to prevent pipeline breaking) [#6036](https://github.com/apache/datafusion/pull/6036) (mustafasrepo) +- Add from_string_hash_map() method for SessionConfig [#6111](https://github.com/apache/datafusion/pull/6111) (yahoNanJing) +- Parallelise collect_partitioned [#6109](https://github.com/apache/datafusion/pull/6109) (tustvold) +- Fix column mapping in output_ordering() and output_partitioning() for ProjectionExec and AggregateExec [#6113](https://github.com/apache/datafusion/pull/6113) (mingmwang) +- fix: `reassign_predicate_columns` w/ in-list expr [#6114](https://github.com/apache/datafusion/pull/6114) (crepererum) +- Update to arrow 38 [#6115](https://github.com/apache/datafusion/pull/6115) (tustvold) +- Minor: Split `DmlStatement` into its own module [#6120](https://github.com/apache/datafusion/pull/6120) (alamb) +- Remove input schema from PhysicalExpr, move the validation logic to physical expression planner [#6122](https://github.com/apache/datafusion/pull/6122) (mingmwang) +- Move Scalar Subquery validation logic to the Analyzer [#6084](https://github.com/apache/datafusion/pull/6084) (mingmwang) +- chore: Update api docs for `SessionContext`, `TaskContext`, etc [#6106](https://github.com/apache/datafusion/pull/6106) (alamb) +- Implement Streaming Aggregation: Do not break pipeline in aggregation if group by columns are ordered (V2) [#6124](https://github.com/apache/datafusion/pull/6124) (mustafasrepo) +- Minor: Mark default_session_builder deprecated, add since markings [#6123](https://github.com/apache/datafusion/pull/6123) (alamb) +- [MINOR]: Add GroupByOrderMode to the AggregateExec display [#6141](https://github.com/apache/datafusion/pull/6141) (mustafasrepo) +- Extract `LogicalPlan::Create*` DDL related plan structures into `LogicalPlan::Ddl` [#6121](https://github.com/apache/datafusion/pull/6121) (alamb) +- Fix broken build due to merge conflict [#6143](https://github.com/apache/datafusion/pull/6143) (alamb) +- [Bug fix] Source projection output ordering [#6136](https://github.com/apache/datafusion/pull/6136) (mustafasrepo) +- Make decimal multiplication allow precision-loss in DataFusion [#6103](https://github.com/apache/datafusion/pull/6103) (viirya) +- MemoryExec INSERT INTO refactor to use ExecutionPlan [#6049](https://github.com/apache/datafusion/pull/6049) (metesynnada) +- Complete Extracting LogicalPlan::Drop\* DDL related plan structures into LogicalPlan::Ddl [#6144](https://github.com/apache/datafusion/pull/6144) (alamb) +- fix: `common_subexpr_eliminate` and aggregates [#6129](https://github.com/apache/datafusion/pull/6129) (crepererum) +- Add support for scientific notation (remove old unimplemented() for scientific notation) [#6142](https://github.com/apache/datafusion/pull/6142) (2010YOUY01) +- Use ParquetObjectReader [#6130](https://github.com/apache/datafusion/pull/6130) (tustvold) +- Add bench.sh script to automate benchmarking DataFusion against itself [#6131](https://github.com/apache/datafusion/pull/6131) (alamb) +- Port test in select.rs to sqllogic [#6158](https://github.com/apache/datafusion/pull/6158) (aprimadi) +- Support uint64 literals [#6146](https://github.com/apache/datafusion/pull/6146) (jackkleeman) +- Remove JIT based on cranelift, and `jit` feature [#6164](https://github.com/apache/datafusion/pull/6164) (yjshen) +- minor: add decimal roundtrip tests for the row format [#6165](https://github.com/apache/datafusion/pull/6165) (yjshen) +- Support DROP SCHEMA statements [#6096](https://github.com/apache/datafusion/pull/6096) (jaylmiller) +- Improve `compare.py` output to use `min` times and better column titles [#6134](https://github.com/apache/datafusion/pull/6134) (alamb) +- Fix GroupByOrderMode documentation [#6168](https://github.com/apache/datafusion/pull/6168) (aprimadi) +- Remove Rayon-based Scheduler [#6169](https://github.com/apache/datafusion/pull/6169) (tustvold) +- [Minor] Fix typo in SQL data types doc [#6178](https://github.com/apache/datafusion/pull/6178) (alamb) +- feat: make threshold for using scalar update in aggregate configurable [#6166](https://github.com/apache/datafusion/pull/6166) (yjshen) +- Handle ScalarValue::Dictionary in add_to_row and update_avg_to_row [#6175](https://github.com/apache/datafusion/pull/6175) (viirya) +- chore(datetime_expression): support uppercase granularity for `DATE_TRUNC` func [#6185](https://github.com/apache/datafusion/pull/6185) (appletreeisyellow) +- Simplify HashJoin memory accounting [#6170](https://github.com/apache/datafusion/pull/6170) (tustvold) +- Adaptive in-memory sort (~2x faster) (#5879) [#6163](https://github.com/apache/datafusion/pull/6163) (tustvold) +- refactor: optimizer shouldn't assume failed rules are internal error [#6184](https://github.com/apache/datafusion/pull/6184) (wolffcm) +- fix: type coercion ignore prevent `Interval - Date` [#6177](https://github.com/apache/datafusion/pull/6177) (jackwener) +- minor: remove default match for function signature [#6186](https://github.com/apache/datafusion/pull/6186) (alamb) +- Add Support for Ordering Equivalence [#6160](https://github.com/apache/datafusion/pull/6160) (mustafasrepo) +- Remove db-benchmark (and add them to arrow-datafusion-python repo instead) [#6204](https://github.com/apache/datafusion/pull/6204) (andygrove) +- fix: `common_subexpr_eliminate` w/ aggregates and relations [#6199](https://github.com/apache/datafusion/pull/6199) (crepererum) +- Add parquet filter and sort to bench.sh [#6172](https://github.com/apache/datafusion/pull/6172) (alamb) +- minor: remove unused code in binary.rs [#6203](https://github.com/apache/datafusion/pull/6203) (alamb) +- Sum and Avg should be able to take inputs of type Dictionary [#6197](https://github.com/apache/datafusion/pull/6197) (viirya) +- minor: fix tiny typo in table.rs [#6220](https://github.com/apache/datafusion/pull/6220) (okue) +- Lower some log levels from `debug` to `trace` during plan execution [#6193](https://github.com/apache/datafusion/pull/6193) (alamb) +- minor: fix typo in comments. [#6225](https://github.com/apache/datafusion/pull/6225) (jackwener) +- CLI: Add support for AWS named profiles [#6161](https://github.com/apache/datafusion/pull/6161) (mr-brobot) +- feat: concurrent physical planning [#6138](https://github.com/apache/datafusion/pull/6138) (crepererum) +- Cleanup join memory accounting in CrossJoin and NestedLoopsJoin [#6188](https://github.com/apache/datafusion/pull/6188) (tustvold) +- minor: Move shift_date tests to be with code [#6200](https://github.com/apache/datafusion/pull/6200) (alamb) +- Minor: rename variable for clarity [#6229](https://github.com/apache/datafusion/pull/6229) (alamb) +- feat: allow scalars to appear as the LHS arg in arithmetic operations on temporal values [#6196](https://github.com/apache/datafusion/pull/6196) (kyle-mccarthy) +- Supply consistent format output for FileScanConfig params [#6202](https://github.com/apache/datafusion/pull/6202) (tz70s) +- minor: refactor code to avoid same code. [#6222](https://github.com/apache/datafusion/pull/6222) (jackwener) +- minor: change error type for window expressions [#6231](https://github.com/apache/datafusion/pull/6231) (comphead) +- Add sqllogic test coverage for interval arithmetic [#6201](https://github.com/apache/datafusion/pull/6201) (alamb) +- Simplify MemoryWriteExec [#6154](https://github.com/apache/datafusion/pull/6154) (tustvold) +- refactor: separate get_result_type from `coerce_type` [#6221](https://github.com/apache/datafusion/pull/6221) (jackwener) +- fix: slt fail due to bors problem [#6242](https://github.com/apache/datafusion/pull/6242) (jackwener) +- Port tests in errors.rs to sqllogictest [#6239](https://github.com/apache/datafusion/pull/6239) (parkma99) +- Select Into support [#6219](https://github.com/apache/datafusion/pull/6219) (berkaysynnada) +- refactor: use get_result_type to replace binary_operator_data_type. [#6241](https://github.com/apache/datafusion/pull/6241) (jackwener) +- Port test in union.rs to sqllogic [#6224](https://github.com/apache/datafusion/pull/6224) (parkma99) +- Port tests in expr.rs to sqllogictest [#6240](https://github.com/apache/datafusion/pull/6240) (parkma99) +- Port tests in cast.rs to sqllogictest [#6244](https://github.com/apache/datafusion/pull/6244) (parkma99) +- Port tests in identifiers.rs to sqllogictest [#6245](https://github.com/apache/datafusion/pull/6245) (parkma99) +- Minor: allow creating infinite external tables via SQL (for testing) [#6235](https://github.com/apache/datafusion/pull/6235) (alamb) +- Minor: consolidate test data [#6217](https://github.com/apache/datafusion/pull/6217) (alamb) +- docs: consolidate datafusion-cli docs [#6218](https://github.com/apache/datafusion/pull/6218) (alamb) +- Port tests in wildcard.rs to sqllogictest [#6249](https://github.com/apache/datafusion/pull/6249) (masanobbb) +- Simplify and speed up MemoryExec insert [#6236](https://github.com/apache/datafusion/pull/6236) (alamb) +- minor: fix typo [#6253](https://github.com/apache/datafusion/pull/6253) (okue) +- refactor: separate get_common_type / get_result_type for temporal type [#6250](https://github.com/apache/datafusion/pull/6250) (jackwener) +- Port tests in set_variable.rs to sqllogictest [#6255](https://github.com/apache/datafusion/pull/6255) (parkma99) diff --git a/dev/changelog/25.0.0.md b/dev/changelog/25.0.0.md index 199e65e59ed9..7bc860ecc967 100644 --- a/dev/changelog/25.0.0.md +++ b/dev/changelog/25.0.0.md @@ -17,101 +17,101 @@ under the License. --> -## [25.0.0](https://github.com/apache/arrow-datafusion/tree/25.0.0) (2023-05-19) +## [25.0.0](https://github.com/apache/datafusion/tree/25.0.0) (2023-05-19) -[Full Changelog](https://github.com/apache/arrow-datafusion/compare/24.0.0...25.0.0) +[Full Changelog](https://github.com/apache/datafusion/compare/24.0.0...25.0.0) **Breaking changes:** -- refactor: Expr::ScalarFunction to use a struct [#6281](https://github.com/apache/arrow-datafusion/pull/6281) (jackwener) -- Remove SizedRecordBatchStream [#6309](https://github.com/apache/arrow-datafusion/pull/6309) (tustvold) -- Add support for ordering sensitive aggregation [#6332](https://github.com/apache/arrow-datafusion/pull/6332) (mustafasrepo) -- Cleanup ExternalSorter metrics (#5885) [#6364](https://github.com/apache/arrow-datafusion/pull/6364) (tustvold) +- refactor: Expr::ScalarFunction to use a struct [#6281](https://github.com/apache/datafusion/pull/6281) (jackwener) +- Remove SizedRecordBatchStream [#6309](https://github.com/apache/datafusion/pull/6309) (tustvold) +- Add support for ordering sensitive aggregation [#6332](https://github.com/apache/datafusion/pull/6332) (mustafasrepo) +- Cleanup ExternalSorter metrics (#5885) [#6364](https://github.com/apache/datafusion/pull/6364) (tustvold) **Implemented enhancements:** -- feat: LCM, GCD and Factorial [#6149](https://github.com/apache/arrow-datafusion/pull/6149) (izveigor) -- feat: negation of `Intervals` [#6312](https://github.com/apache/arrow-datafusion/pull/6312) (izveigor) -- feat: support bitwise and boolean aggregate functions [#6276](https://github.com/apache/arrow-datafusion/pull/6276) (izveigor) -- feat: min/max agg for bool [#6226](https://github.com/apache/arrow-datafusion/pull/6226) (crepererum) -- feat: add pattern for simplifying exprs like `str ~ '^foo$'` [#6369](https://github.com/apache/arrow-datafusion/pull/6369) (wolffcm) +- feat: LCM, GCD and Factorial [#6149](https://github.com/apache/datafusion/pull/6149) (izveigor) +- feat: negation of `Intervals` [#6312](https://github.com/apache/datafusion/pull/6312) (izveigor) +- feat: support bitwise and boolean aggregate functions [#6276](https://github.com/apache/datafusion/pull/6276) (izveigor) +- feat: min/max agg for bool [#6226](https://github.com/apache/datafusion/pull/6226) (crepererum) +- feat: add pattern for simplifying exprs like `str ~ '^foo$'` [#6369](https://github.com/apache/datafusion/pull/6369) (wolffcm) **Fixed bugs:** -- fix: `projection_push_down` don't consider VarProvider in columns. [#6254](https://github.com/apache/arrow-datafusion/pull/6254) (jackwener) +- fix: `projection_push_down` don't consider VarProvider in columns. [#6254](https://github.com/apache/datafusion/pull/6254) (jackwener) **Documentation updates:** -- Bump snmalloc version in the example usage doc [#6344](https://github.com/apache/arrow-datafusion/pull/6344) (qrilka) +- Bump snmalloc version in the example usage doc [#6344](https://github.com/apache/datafusion/pull/6344) (qrilka) **Merged pull requests:** -- doc: add more sql example in ddl.md [#6266](https://github.com/apache/arrow-datafusion/pull/6266) (jackwener) -- Port tests in functions.rs to sqllogictest [#6256](https://github.com/apache/arrow-datafusion/pull/6256) (parkma99) -- Prepare 24.0.0 release [#6262](https://github.com/apache/arrow-datafusion/pull/6262) (andygrove) -- fix: `projection_push_down` don't consider VarProvider in columns. [#6254](https://github.com/apache/arrow-datafusion/pull/6254) (jackwener) -- Minor: rename NestedLoopsJoin memory reservation for clarity [#6228](https://github.com/apache/arrow-datafusion/pull/6228) (alamb) -- refactor: Expr::ScalarFunction to use a struct [#6281](https://github.com/apache/arrow-datafusion/pull/6281) (jackwener) -- Minor: document syntax of CREATE EXTERNAL TABLE [#6247](https://github.com/apache/arrow-datafusion/pull/6247) (alamb) -- feat: LCM, GCD and Factorial [#6149](https://github.com/apache/arrow-datafusion/pull/6149) (izveigor) -- Port tests in unicode.rs to sqllogictest [#6259](https://github.com/apache/arrow-datafusion/pull/6259) (parkma99) -- minor: remove prefix in type_coercion [#6283](https://github.com/apache/arrow-datafusion/pull/6283) (jackwener) -- Enable parser to parse create external clauses in arbitrary order [#6257](https://github.com/apache/arrow-datafusion/pull/6257) (aprimadi) -- minor: Remove dead code for casting dictionaries [#6286](https://github.com/apache/arrow-datafusion/pull/6286) (alamb) -- refactor: Expr::ScalarUDF to use a struct [#6284](https://github.com/apache/arrow-datafusion/pull/6284) (jackwener) -- refactor: Expr::AggregateUDF to use a struct [#6294](https://github.com/apache/arrow-datafusion/pull/6294) (my-vegetable-has-exploded) -- refactor: Expr::Exists to use a struct. [#6292](https://github.com/apache/arrow-datafusion/pull/6292) (QuenKar) -- refactor: Expr::InList to use a struct [#6293](https://github.com/apache/arrow-datafusion/pull/6293) (jackwener) -- Fix `CREATE EXTERNAL TABLE` doesn't work with non-standard file ext [#6274](https://github.com/apache/arrow-datafusion/pull/6274) (aprimadi) -- Support `interval '1 month' + date/timestamp`: Handle binary op interval in logical AST builder [#6270](https://github.com/apache/arrow-datafusion/pull/6270) (aprimadi) -- chore(deps): update substrait requirement from 0.8.0 to 0.9.0 [#6296](https://github.com/apache/arrow-datafusion/pull/6296) (dependabot[bot]) -- Refactor: Expr::InSubquery to use a struct [#6295](https://github.com/apache/arrow-datafusion/pull/6295) (gitccl) -- Add more documentation to SortPreservingMergeStream [#6260](https://github.com/apache/arrow-datafusion/pull/6260) (aprimadi) -- refactor: Expr::PlaceHolder to use a struct [#6304](https://github.com/apache/arrow-datafusion/pull/6304) (jackwener) -- Fix inconsistent array type for binary numerical operators result between array and scalar [#6269](https://github.com/apache/arrow-datafusion/pull/6269) (viirya) -- Remove SizedRecordBatchStream [#6309](https://github.com/apache/arrow-datafusion/pull/6309) (tustvold) -- Improve error message for CREATE EXTERNAL TABLE [#6291](https://github.com/apache/arrow-datafusion/pull/6291) (parkma99) -- Remove the PhysicalSortExpr restriction on union get meet [#6273](https://github.com/apache/arrow-datafusion/pull/6273) (berkaysynnada) -- Disable `skip_failed_rules` optimizer config by default [#6265](https://github.com/apache/arrow-datafusion/pull/6265) (jackwener) -- Update Arrow 39 [#6252](https://github.com/apache/arrow-datafusion/pull/6252) (tustvold) -- Port tests in `json.rs` to sqllogictest [#6314](https://github.com/apache/arrow-datafusion/pull/6314) (gitccl) -- corrected order of cd command to git checkout [#6318](https://github.com/apache/arrow-datafusion/pull/6318) (sunny-jain-maersk) -- Port remainder of `window.rs` to sqllogictest [#6234](https://github.com/apache/arrow-datafusion/pull/6234) (alamb) -- feat: negation of `Intervals` [#6312](https://github.com/apache/arrow-datafusion/pull/6312) (izveigor) -- [parquet] Avoid read parquet index when there is no filter pushdown. [#6317](https://github.com/apache/arrow-datafusion/pull/6317) (Ted-Jiang) -- Parallel merge sort (#6162) [#6308](https://github.com/apache/arrow-datafusion/pull/6308) (tustvold) -- Port some tests in joins.rs to sqllogictest [#6306](https://github.com/apache/arrow-datafusion/pull/6306) (parkma99) -- Fix case evaluation with NULL [#6334](https://github.com/apache/arrow-datafusion/pull/6334) (byteink) -- Support null values in Avro string columns [#6307](https://github.com/apache/arrow-datafusion/pull/6307) (nenorbot) -- Add bdt to the list of known users [#6341](https://github.com/apache/arrow-datafusion/pull/6341) (qrilka) -- Port tests in errors.rs to context.rs [#6340](https://github.com/apache/arrow-datafusion/pull/6340) (masanobbb) -- Enable parquet page level skipping (page index pruning) by default [#5099](https://github.com/apache/arrow-datafusion/pull/5099) (alamb) -- Bump snmalloc version in the example usage doc [#6344](https://github.com/apache/arrow-datafusion/pull/6344) (qrilka) -- Port tests in explain.rs to sqllogictests [#6343](https://github.com/apache/arrow-datafusion/pull/6343) (my-vegetable-has-exploded) -- Improve parallelism of repartition operator with multiple cores [#6310](https://github.com/apache/arrow-datafusion/pull/6310) (alamb) -- Ordering satisfy consider ordering equivalence of different lengths [#6330](https://github.com/apache/arrow-datafusion/pull/6330) (berkaysynnada) -- Minor: Update documentation for `datafusion.execution.parquet.enable_page_index` [#6342](https://github.com/apache/arrow-datafusion/pull/6342) (alamb) -- refine decimal multiply, avoid cast to wider type [#6331](https://github.com/apache/arrow-datafusion/pull/6331) (mingmwang) -- Combine the two rules: DecorrelateWhereExists and DecorrelateWhereIn [#6271](https://github.com/apache/arrow-datafusion/pull/6271) (mingmwang) -- feat: support bitwise and boolean aggregate functions [#6276](https://github.com/apache/arrow-datafusion/pull/6276) (izveigor) -- Fix explain plan formatting in sqllogictest [#6329](https://github.com/apache/arrow-datafusion/pull/6329) (alamb) -- Fix expected output [#6353](https://github.com/apache/arrow-datafusion/pull/6353) (alamb) -- Simplify IsNotNull and IsNull expression [#6345](https://github.com/apache/arrow-datafusion/pull/6345) (byteink) -- Add support for ordering sensitive aggregation [#6332](https://github.com/apache/arrow-datafusion/pull/6332) (mustafasrepo) -- chore(deps): update substrait requirement from 0.9.0 to 0.10.0 [#6351](https://github.com/apache/arrow-datafusion/pull/6351) (dependabot[bot]) -- Minor: Update the testing section of contributor guide [#6357](https://github.com/apache/arrow-datafusion/pull/6357) (alamb) -- Add support for reading Arrow files [#6337](https://github.com/apache/arrow-datafusion/pull/6337) (jonmmease) -- Support CREATE TABLE via SQL for infinite streams [#6352](https://github.com/apache/arrow-datafusion/pull/6352) (aprimadi) -- feat: min/max agg for bool [#6226](https://github.com/apache/arrow-datafusion/pull/6226) (crepererum) -- Fix variable shadowing in test code [#6361](https://github.com/apache/arrow-datafusion/pull/6361) (lokax) -- Cleanup ExternalSorter metrics (#5885) [#6364](https://github.com/apache/arrow-datafusion/pull/6364) (tustvold) -- Fix nullability calculation for boolean expressions [#6365](https://github.com/apache/arrow-datafusion/pull/6365) (byteink) -- Faster ListingTable partition listing (#6182) [#6183](https://github.com/apache/arrow-datafusion/pull/6183) (tustvold) -- feat: add pattern for simplifying exprs like `str ~ '^foo$'` [#6369](https://github.com/apache/arrow-datafusion/pull/6369) (wolffcm) -- Minor: remove left over println [#6375](https://github.com/apache/arrow-datafusion/pull/6375) (alamb) -- Limit the number of partition files to be displayed for FileGroupsDisplay [#6359](https://github.com/apache/arrow-datafusion/pull/6359) (yahoNanJing) -- Switch to non-recursive on heap virtual stack when building logical plan from SQL expression [#6360](https://github.com/apache/arrow-datafusion/pull/6360) (aprimadi) -- Fix UNION ALL aliasing [#6373](https://github.com/apache/arrow-datafusion/pull/6373) (comphead) -- [sqllogictest] port tests in avro.rs to sqllogictest [#6362](https://github.com/apache/arrow-datafusion/pull/6362) (e1ijah1) -- Concurrent Parquet Schema Inference [#6366](https://github.com/apache/arrow-datafusion/pull/6366) (tustvold) -- Add` COPY .. TO ..` syntax support [#6355](https://github.com/apache/arrow-datafusion/pull/6355) (alamb) +- doc: add more sql example in ddl.md [#6266](https://github.com/apache/datafusion/pull/6266) (jackwener) +- Port tests in functions.rs to sqllogictest [#6256](https://github.com/apache/datafusion/pull/6256) (parkma99) +- Prepare 24.0.0 release [#6262](https://github.com/apache/datafusion/pull/6262) (andygrove) +- fix: `projection_push_down` don't consider VarProvider in columns. [#6254](https://github.com/apache/datafusion/pull/6254) (jackwener) +- Minor: rename NestedLoopsJoin memory reservation for clarity [#6228](https://github.com/apache/datafusion/pull/6228) (alamb) +- refactor: Expr::ScalarFunction to use a struct [#6281](https://github.com/apache/datafusion/pull/6281) (jackwener) +- Minor: document syntax of CREATE EXTERNAL TABLE [#6247](https://github.com/apache/datafusion/pull/6247) (alamb) +- feat: LCM, GCD and Factorial [#6149](https://github.com/apache/datafusion/pull/6149) (izveigor) +- Port tests in unicode.rs to sqllogictest [#6259](https://github.com/apache/datafusion/pull/6259) (parkma99) +- minor: remove prefix in type_coercion [#6283](https://github.com/apache/datafusion/pull/6283) (jackwener) +- Enable parser to parse create external clauses in arbitrary order [#6257](https://github.com/apache/datafusion/pull/6257) (aprimadi) +- minor: Remove dead code for casting dictionaries [#6286](https://github.com/apache/datafusion/pull/6286) (alamb) +- refactor: Expr::ScalarUDF to use a struct [#6284](https://github.com/apache/datafusion/pull/6284) (jackwener) +- refactor: Expr::AggregateUDF to use a struct [#6294](https://github.com/apache/datafusion/pull/6294) (my-vegetable-has-exploded) +- refactor: Expr::Exists to use a struct. [#6292](https://github.com/apache/datafusion/pull/6292) (QuenKar) +- refactor: Expr::InList to use a struct [#6293](https://github.com/apache/datafusion/pull/6293) (jackwener) +- Fix `CREATE EXTERNAL TABLE` doesn't work with non-standard file ext [#6274](https://github.com/apache/datafusion/pull/6274) (aprimadi) +- Support `interval '1 month' + date/timestamp`: Handle binary op interval in logical AST builder [#6270](https://github.com/apache/datafusion/pull/6270) (aprimadi) +- chore(deps): update substrait requirement from 0.8.0 to 0.9.0 [#6296](https://github.com/apache/datafusion/pull/6296) (dependabot[bot]) +- Refactor: Expr::InSubquery to use a struct [#6295](https://github.com/apache/datafusion/pull/6295) (gitccl) +- Add more documentation to SortPreservingMergeStream [#6260](https://github.com/apache/datafusion/pull/6260) (aprimadi) +- refactor: Expr::PlaceHolder to use a struct [#6304](https://github.com/apache/datafusion/pull/6304) (jackwener) +- Fix inconsistent array type for binary numerical operators result between array and scalar [#6269](https://github.com/apache/datafusion/pull/6269) (viirya) +- Remove SizedRecordBatchStream [#6309](https://github.com/apache/datafusion/pull/6309) (tustvold) +- Improve error message for CREATE EXTERNAL TABLE [#6291](https://github.com/apache/datafusion/pull/6291) (parkma99) +- Remove the PhysicalSortExpr restriction on union get meet [#6273](https://github.com/apache/datafusion/pull/6273) (berkaysynnada) +- Disable `skip_failed_rules` optimizer config by default [#6265](https://github.com/apache/datafusion/pull/6265) (jackwener) +- Update Arrow 39 [#6252](https://github.com/apache/datafusion/pull/6252) (tustvold) +- Port tests in `json.rs` to sqllogictest [#6314](https://github.com/apache/datafusion/pull/6314) (gitccl) +- corrected order of cd command to git checkout [#6318](https://github.com/apache/datafusion/pull/6318) (sunny-jain-maersk) +- Port remainder of `window.rs` to sqllogictest [#6234](https://github.com/apache/datafusion/pull/6234) (alamb) +- feat: negation of `Intervals` [#6312](https://github.com/apache/datafusion/pull/6312) (izveigor) +- [parquet] Avoid read parquet index when there is no filter pushdown. [#6317](https://github.com/apache/datafusion/pull/6317) (Ted-Jiang) +- Parallel merge sort (#6162) [#6308](https://github.com/apache/datafusion/pull/6308) (tustvold) +- Port some tests in joins.rs to sqllogictest [#6306](https://github.com/apache/datafusion/pull/6306) (parkma99) +- Fix case evaluation with NULL [#6334](https://github.com/apache/datafusion/pull/6334) (byteink) +- Support null values in Avro string columns [#6307](https://github.com/apache/datafusion/pull/6307) (nenorbot) +- Add bdt to the list of known users [#6341](https://github.com/apache/datafusion/pull/6341) (qrilka) +- Port tests in errors.rs to context.rs [#6340](https://github.com/apache/datafusion/pull/6340) (masanobbb) +- Enable parquet page level skipping (page index pruning) by default [#5099](https://github.com/apache/datafusion/pull/5099) (alamb) +- Bump snmalloc version in the example usage doc [#6344](https://github.com/apache/datafusion/pull/6344) (qrilka) +- Port tests in explain.rs to sqllogictests [#6343](https://github.com/apache/datafusion/pull/6343) (my-vegetable-has-exploded) +- Improve parallelism of repartition operator with multiple cores [#6310](https://github.com/apache/datafusion/pull/6310) (alamb) +- Ordering satisfy consider ordering equivalence of different lengths [#6330](https://github.com/apache/datafusion/pull/6330) (berkaysynnada) +- Minor: Update documentation for `datafusion.execution.parquet.enable_page_index` [#6342](https://github.com/apache/datafusion/pull/6342) (alamb) +- refine decimal multiply, avoid cast to wider type [#6331](https://github.com/apache/datafusion/pull/6331) (mingmwang) +- Combine the two rules: DecorrelateWhereExists and DecorrelateWhereIn [#6271](https://github.com/apache/datafusion/pull/6271) (mingmwang) +- feat: support bitwise and boolean aggregate functions [#6276](https://github.com/apache/datafusion/pull/6276) (izveigor) +- Fix explain plan formatting in sqllogictest [#6329](https://github.com/apache/datafusion/pull/6329) (alamb) +- Fix expected output [#6353](https://github.com/apache/datafusion/pull/6353) (alamb) +- Simplify IsNotNull and IsNull expression [#6345](https://github.com/apache/datafusion/pull/6345) (byteink) +- Add support for ordering sensitive aggregation [#6332](https://github.com/apache/datafusion/pull/6332) (mustafasrepo) +- chore(deps): update substrait requirement from 0.9.0 to 0.10.0 [#6351](https://github.com/apache/datafusion/pull/6351) (dependabot[bot]) +- Minor: Update the testing section of contributor guide [#6357](https://github.com/apache/datafusion/pull/6357) (alamb) +- Add support for reading Arrow files [#6337](https://github.com/apache/datafusion/pull/6337) (jonmmease) +- Support CREATE TABLE via SQL for infinite streams [#6352](https://github.com/apache/datafusion/pull/6352) (aprimadi) +- feat: min/max agg for bool [#6226](https://github.com/apache/datafusion/pull/6226) (crepererum) +- Fix variable shadowing in test code [#6361](https://github.com/apache/datafusion/pull/6361) (lokax) +- Cleanup ExternalSorter metrics (#5885) [#6364](https://github.com/apache/datafusion/pull/6364) (tustvold) +- Fix nullability calculation for boolean expressions [#6365](https://github.com/apache/datafusion/pull/6365) (byteink) +- Faster ListingTable partition listing (#6182) [#6183](https://github.com/apache/datafusion/pull/6183) (tustvold) +- feat: add pattern for simplifying exprs like `str ~ '^foo$'` [#6369](https://github.com/apache/datafusion/pull/6369) (wolffcm) +- Minor: remove left over println [#6375](https://github.com/apache/datafusion/pull/6375) (alamb) +- Limit the number of partition files to be displayed for FileGroupsDisplay [#6359](https://github.com/apache/datafusion/pull/6359) (yahoNanJing) +- Switch to non-recursive on heap virtual stack when building logical plan from SQL expression [#6360](https://github.com/apache/datafusion/pull/6360) (aprimadi) +- Fix UNION ALL aliasing [#6373](https://github.com/apache/datafusion/pull/6373) (comphead) +- [sqllogictest] port tests in avro.rs to sqllogictest [#6362](https://github.com/apache/datafusion/pull/6362) (e1ijah1) +- Concurrent Parquet Schema Inference [#6366](https://github.com/apache/datafusion/pull/6366) (tustvold) +- Add` COPY .. TO ..` syntax support [#6355](https://github.com/apache/datafusion/pull/6355) (alamb) diff --git a/dev/changelog/26.0.0.md b/dev/changelog/26.0.0.md index 3e5e210cdadc..7771e4e4636a 100644 --- a/dev/changelog/26.0.0.md +++ b/dev/changelog/26.0.0.md @@ -17,100 +17,100 @@ under the License. --> -## [26.0.0](https://github.com/apache/arrow-datafusion/tree/26.0.0) (2023-06-02) +## [26.0.0](https://github.com/apache/datafusion/tree/26.0.0) (2023-06-02) -[Full Changelog](https://github.com/apache/arrow-datafusion/compare/25.0.0...26.0.0) +[Full Changelog](https://github.com/apache/datafusion/compare/25.0.0...26.0.0) **Breaking changes:** -- feat: implement serialize/deserialize for extension logical plan [#6378](https://github.com/apache/arrow-datafusion/pull/6378) (waynexia) -- Use `std::ops` traits for `Exprs` rather than custom function names [#6465](https://github.com/apache/arrow-datafusion/pull/6465) (LouisGariepy) -- Support Defining Ordering Equivalence at the Source [#6469](https://github.com/apache/arrow-datafusion/pull/6469) (berkaysynnada) +- feat: implement serialize/deserialize for extension logical plan [#6378](https://github.com/apache/datafusion/pull/6378) (waynexia) +- Use `std::ops` traits for `Exprs` rather than custom function names [#6465](https://github.com/apache/datafusion/pull/6465) (LouisGariepy) +- Support Defining Ordering Equivalence at the Source [#6469](https://github.com/apache/datafusion/pull/6469) (berkaysynnada) **Implemented enhancements:** -- feat: support type cast in SchemaAdapter [#6404](https://github.com/apache/arrow-datafusion/pull/6404) (e1ijah1) -- feat: eliminate useless join | convert inner to outer when condition is true [#6443](https://github.com/apache/arrow-datafusion/pull/6443) (jackwener) -- feat: datafusion-cli support executes sql with escaped characters [#6498](https://github.com/apache/arrow-datafusion/pull/6498) (r4ntix) -- feat: fix docs [#6534](https://github.com/apache/arrow-datafusion/pull/6534) (Folyd) +- feat: support type cast in SchemaAdapter [#6404](https://github.com/apache/datafusion/pull/6404) (e1ijah1) +- feat: eliminate useless join | convert inner to outer when condition is true [#6443](https://github.com/apache/datafusion/pull/6443) (jackwener) +- feat: datafusion-cli support executes sql with escaped characters [#6498](https://github.com/apache/datafusion/pull/6498) (r4ntix) +- feat: fix docs [#6534](https://github.com/apache/datafusion/pull/6534) (Folyd) **Fixed bugs:** -- fix: error instead of panic when date_bin interval is 0 [#6522](https://github.com/apache/arrow-datafusion/pull/6522) (NGA-TRAN) +- fix: error instead of panic when date_bin interval is 0 [#6522](https://github.com/apache/datafusion/pull/6522) (NGA-TRAN) **Documentation updates:** -- User guide lists window functions [#6402](https://github.com/apache/arrow-datafusion/pull/6402) (toppyy) -- Improve getting started guide, add note about compatible `arrow` versions [#6472](https://github.com/apache/arrow-datafusion/pull/6472) (alamb) -- Add link to Python Bindings in top-level README [#6532](https://github.com/apache/arrow-datafusion/pull/6532) (andygrove) +- User guide lists window functions [#6402](https://github.com/apache/datafusion/pull/6402) (toppyy) +- Improve getting started guide, add note about compatible `arrow` versions [#6472](https://github.com/apache/datafusion/pull/6472) (alamb) +- Add link to Python Bindings in top-level README [#6532](https://github.com/apache/datafusion/pull/6532) (andygrove) **Merged pull requests:** -- Minor: Update docs with extended file format support [#6356](https://github.com/apache/arrow-datafusion/pull/6356) (alamb) -- chore(deps): update async-compression requirement from 0.3.14 to 0.4.0 [#6336](https://github.com/apache/arrow-datafusion/pull/6336) (dependabot[bot]) -- INSERT returns number of rows written, add `InsertExec` to handle common case. [#6354](https://github.com/apache/arrow-datafusion/pull/6354) (alamb) -- Minor: used named constant for the schema inference concurrency limit [#6389](https://github.com/apache/arrow-datafusion/pull/6389) (alamb) -- Prepare for 25.0.0 Release [#6390](https://github.com/apache/arrow-datafusion/pull/6390) (andygrove) -- minor: Add Python script for generating changelog content [#6391](https://github.com/apache/arrow-datafusion/pull/6391) (andygrove) -- Support is [not] distinct from for binaryarray types [#6394](https://github.com/apache/arrow-datafusion/pull/6394) (Dandandan) -- Minor: Improve documentation of `MemoryPool` [#6388](https://github.com/apache/arrow-datafusion/pull/6388) (alamb) -- More scalar subqueries support [#6372](https://github.com/apache/arrow-datafusion/pull/6372) (mingmwang) -- feat: support type cast in SchemaAdapter [#6404](https://github.com/apache/arrow-datafusion/pull/6404) (e1ijah1) -- Update arrow 40 [#6412](https://github.com/apache/arrow-datafusion/pull/6412) (tustvold) -- feat: implement serialize/deserialize for extension logical plan [#6378](https://github.com/apache/arrow-datafusion/pull/6378) (waynexia) -- Do not run avro sqllogictests tests unless the avro feature is enabled [#6429](https://github.com/apache/arrow-datafusion/pull/6429) (alamb) -- chore(deps): update sqlparser requirement from 0.33 to 0.34 [#6416](https://github.com/apache/arrow-datafusion/pull/6416) (alamb) -- run tests with avro enabled when verifying a release candidate [#6401](https://github.com/apache/arrow-datafusion/pull/6401) (andygrove) -- Minor: remove unecessary cranelift dependency [#6430](https://github.com/apache/arrow-datafusion/pull/6430) (alamb) -- Improve error message for wrong built-in scalar function signatures. [#6415](https://github.com/apache/arrow-datafusion/pull/6415) (2010YOUY01) -- refactor: split `CommonSubexprEliminate::try_optimize` [#6348](https://github.com/apache/arrow-datafusion/pull/6348) (crepererum) -- chore(deps): update criterion requirement from 0.4 to 0.5 [#6434](https://github.com/apache/arrow-datafusion/pull/6434) (dependabot[bot]) -- Fix UNION ALL aliasing with more complex queries [#6417](https://github.com/apache/arrow-datafusion/pull/6417) (comphead) -- Reduce output when `sqllogictest` runs successfully, and run tests in parallel [#6393](https://github.com/apache/arrow-datafusion/pull/6393) (alamb) -- Named window support [#6419](https://github.com/apache/arrow-datafusion/pull/6419) (berkaysynnada) -- Lexicographical Ordering Equivalence Support [#6431](https://github.com/apache/arrow-datafusion/pull/6431) (mustafasrepo) -- Fix Select Into with Order By and Limit [#6442](https://github.com/apache/arrow-datafusion/pull/6442) (berkaysynnada) -- Fix typo in faq.md [#6451](https://github.com/apache/arrow-datafusion/pull/6451) (haoxins) -- Skip casting result array for binary numerical operators result between array and scalar if possible [#6438](https://github.com/apache/arrow-datafusion/pull/6438) (viirya) -- Minor fix for the Debug of FileScanConfig [#6454](https://github.com/apache/arrow-datafusion/pull/6454) (yahoNanJing) -- Improve Display for BuiltinScalarFunction [#6448](https://github.com/apache/arrow-datafusion/pull/6448) (2010YOUY01) -- Fix wrong benchmark column names in `compare.py` after the last refactor [#6459](https://github.com/apache/arrow-datafusion/pull/6459) (nvartolomei) -- Minor: remove dead code `sort_expr_list_eq_strict_order` [#6460](https://github.com/apache/arrow-datafusion/pull/6460) (alamb) -- feat: eliminate useless join | convert inner to outer when condition is true [#6443](https://github.com/apache/arrow-datafusion/pull/6443) (jackwener) -- Minor add more docs to equivalence class code [#6461](https://github.com/apache/arrow-datafusion/pull/6461) (alamb) -- Add support for FIRST_VALUE, LAST_VALUE Aggregate Functions [#6445](https://github.com/apache/arrow-datafusion/pull/6445) (mustafasrepo) -- Ordering Equivalence Builder [#6452](https://github.com/apache/arrow-datafusion/pull/6452) (mustafasrepo) -- Fix protobuf install command for Fedora Linux. [#6466](https://github.com/apache/arrow-datafusion/pull/6466) (LouisGariepy) -- Continue PR 4757 [#6456](https://github.com/apache/arrow-datafusion/pull/6456) (yahoNanJing) -- minor: Use parse_multipart_identifier from sqlparser [#6467](https://github.com/apache/arrow-datafusion/pull/6467) (Jefffrey) -- User guide lists window functions [#6402](https://github.com/apache/arrow-datafusion/pull/6402) (toppyy) -- minor: fix comment docs [#6473](https://github.com/apache/arrow-datafusion/pull/6473) (Folyd) -- Clippy lints [#6464](https://github.com/apache/arrow-datafusion/pull/6464) (LouisGariepy) -- Use `std::ops` traits for `Exprs` rather than custom function names [#6465](https://github.com/apache/arrow-datafusion/pull/6465) (LouisGariepy) -- Refactor temporal arithmetic [#6433](https://github.com/apache/arrow-datafusion/pull/6433) (berkaysynnada) -- Fix case evaluation with NULL [#6477](https://github.com/apache/arrow-datafusion/pull/6477) (byteink) -- Substrait: Support Expr::ScalarFunction [#6471](https://github.com/apache/arrow-datafusion/pull/6471) (jayzhan211) -- Support Defining Ordering Equivalence at the Source [#6469](https://github.com/apache/arrow-datafusion/pull/6469) (berkaysynnada) -- Improve getting started guide, add note about compatible `arrow` versions [#6472](https://github.com/apache/arrow-datafusion/pull/6472) (alamb) -- Add tpch test cases with data. [#6435](https://github.com/apache/arrow-datafusion/pull/6435) (liurenjie1024) -- Minor: Improve the tpch sqllogictest docs [#6493](https://github.com/apache/arrow-datafusion/pull/6493) (alamb) -- reduce search complexity for BuiltinScalarFunction [#6479](https://github.com/apache/arrow-datafusion/pull/6479) (comphead) -- Fix builds fail with error: symbol `init___rust_ctor___ctor` is already defined #6495 [#6505](https://github.com/apache/arrow-datafusion/pull/6505) (alamb) -- Don't optimize AnalyzeExec (#6379) (try 2) [#6494](https://github.com/apache/arrow-datafusion/pull/6494) (alamb) -- Rewrite large OR chains as IN lists [#6414](https://github.com/apache/arrow-datafusion/pull/6414) (aprimadi) -- Add SELECT _ EXCLUDE, SELECT _ EXCEPT support [#6481](https://github.com/apache/arrow-datafusion/pull/6481) (mustafasrepo) -- Bug fix, First accumulator multiple batch aware [#6503](https://github.com/apache/arrow-datafusion/pull/6503) (mustafasrepo) -- Make `FileStream` error handling configurable [#6491](https://github.com/apache/arrow-datafusion/pull/6491) (thinkharderdev) -- Minimize clone in OrInListSimplifier [#6509](https://github.com/apache/arrow-datafusion/pull/6509) (aprimadi) -- Add serde for `DROP VIEW` [#6499](https://github.com/apache/arrow-datafusion/pull/6499) (Dandandan) -- Support simplifying expressions like `~ ^(bar|foo)$` [#6487](https://github.com/apache/arrow-datafusion/pull/6487) (tanruixiang) -- minor: make window frame error messages more consistent [#6519](https://github.com/apache/arrow-datafusion/pull/6519) (comphead) -- Minor: Clean up `use`s to point at real crates [#6515](https://github.com/apache/arrow-datafusion/pull/6515) (alamb) -- Standardize `RUST_LOG` configuration test setup [#6506](https://github.com/apache/arrow-datafusion/pull/6506) (alamb) -- Fix new clippy lint [#6535](https://github.com/apache/arrow-datafusion/pull/6535) (alamb) -- feat: datafusion-cli support executes sql with escaped characters [#6498](https://github.com/apache/arrow-datafusion/pull/6498) (r4ntix) -- Minor: Add EXCEPT/EXCLUDE to SQL guide [#6512](https://github.com/apache/arrow-datafusion/pull/6512) (alamb) -- fix: error instead of panic when date_bin interval is 0 [#6522](https://github.com/apache/arrow-datafusion/pull/6522) (NGA-TRAN) -- Add link to Python Bindings in top-level README [#6532](https://github.com/apache/arrow-datafusion/pull/6532) (andygrove) -- feat: fix docs [#6534](https://github.com/apache/arrow-datafusion/pull/6534) (Folyd) -- Resolve contradictory requirements by conversion of ordering sensitive aggregators [#6482](https://github.com/apache/arrow-datafusion/pull/6482) (mustafasrepo) +- Minor: Update docs with extended file format support [#6356](https://github.com/apache/datafusion/pull/6356) (alamb) +- chore(deps): update async-compression requirement from 0.3.14 to 0.4.0 [#6336](https://github.com/apache/datafusion/pull/6336) (dependabot[bot]) +- INSERT returns number of rows written, add `InsertExec` to handle common case. [#6354](https://github.com/apache/datafusion/pull/6354) (alamb) +- Minor: used named constant for the schema inference concurrency limit [#6389](https://github.com/apache/datafusion/pull/6389) (alamb) +- Prepare for 25.0.0 Release [#6390](https://github.com/apache/datafusion/pull/6390) (andygrove) +- minor: Add Python script for generating changelog content [#6391](https://github.com/apache/datafusion/pull/6391) (andygrove) +- Support is [not] distinct from for binaryarray types [#6394](https://github.com/apache/datafusion/pull/6394) (Dandandan) +- Minor: Improve documentation of `MemoryPool` [#6388](https://github.com/apache/datafusion/pull/6388) (alamb) +- More scalar subqueries support [#6372](https://github.com/apache/datafusion/pull/6372) (mingmwang) +- feat: support type cast in SchemaAdapter [#6404](https://github.com/apache/datafusion/pull/6404) (e1ijah1) +- Update arrow 40 [#6412](https://github.com/apache/datafusion/pull/6412) (tustvold) +- feat: implement serialize/deserialize for extension logical plan [#6378](https://github.com/apache/datafusion/pull/6378) (waynexia) +- Do not run avro sqllogictests tests unless the avro feature is enabled [#6429](https://github.com/apache/datafusion/pull/6429) (alamb) +- chore(deps): update sqlparser requirement from 0.33 to 0.34 [#6416](https://github.com/apache/datafusion/pull/6416) (alamb) +- run tests with avro enabled when verifying a release candidate [#6401](https://github.com/apache/datafusion/pull/6401) (andygrove) +- Minor: remove unecessary cranelift dependency [#6430](https://github.com/apache/datafusion/pull/6430) (alamb) +- Improve error message for wrong built-in scalar function signatures. [#6415](https://github.com/apache/datafusion/pull/6415) (2010YOUY01) +- refactor: split `CommonSubexprEliminate::try_optimize` [#6348](https://github.com/apache/datafusion/pull/6348) (crepererum) +- chore(deps): update criterion requirement from 0.4 to 0.5 [#6434](https://github.com/apache/datafusion/pull/6434) (dependabot[bot]) +- Fix UNION ALL aliasing with more complex queries [#6417](https://github.com/apache/datafusion/pull/6417) (comphead) +- Reduce output when `sqllogictest` runs successfully, and run tests in parallel [#6393](https://github.com/apache/datafusion/pull/6393) (alamb) +- Named window support [#6419](https://github.com/apache/datafusion/pull/6419) (berkaysynnada) +- Lexicographical Ordering Equivalence Support [#6431](https://github.com/apache/datafusion/pull/6431) (mustafasrepo) +- Fix Select Into with Order By and Limit [#6442](https://github.com/apache/datafusion/pull/6442) (berkaysynnada) +- Fix typo in faq.md [#6451](https://github.com/apache/datafusion/pull/6451) (haoxins) +- Skip casting result array for binary numerical operators result between array and scalar if possible [#6438](https://github.com/apache/datafusion/pull/6438) (viirya) +- Minor fix for the Debug of FileScanConfig [#6454](https://github.com/apache/datafusion/pull/6454) (yahoNanJing) +- Improve Display for BuiltinScalarFunction [#6448](https://github.com/apache/datafusion/pull/6448) (2010YOUY01) +- Fix wrong benchmark column names in `compare.py` after the last refactor [#6459](https://github.com/apache/datafusion/pull/6459) (nvartolomei) +- Minor: remove dead code `sort_expr_list_eq_strict_order` [#6460](https://github.com/apache/datafusion/pull/6460) (alamb) +- feat: eliminate useless join | convert inner to outer when condition is true [#6443](https://github.com/apache/datafusion/pull/6443) (jackwener) +- Minor add more docs to equivalence class code [#6461](https://github.com/apache/datafusion/pull/6461) (alamb) +- Add support for FIRST_VALUE, LAST_VALUE Aggregate Functions [#6445](https://github.com/apache/datafusion/pull/6445) (mustafasrepo) +- Ordering Equivalence Builder [#6452](https://github.com/apache/datafusion/pull/6452) (mustafasrepo) +- Fix protobuf install command for Fedora Linux. [#6466](https://github.com/apache/datafusion/pull/6466) (LouisGariepy) +- Continue PR 4757 [#6456](https://github.com/apache/datafusion/pull/6456) (yahoNanJing) +- minor: Use parse_multipart_identifier from sqlparser [#6467](https://github.com/apache/datafusion/pull/6467) (Jefffrey) +- User guide lists window functions [#6402](https://github.com/apache/datafusion/pull/6402) (toppyy) +- minor: fix comment docs [#6473](https://github.com/apache/datafusion/pull/6473) (Folyd) +- Clippy lints [#6464](https://github.com/apache/datafusion/pull/6464) (LouisGariepy) +- Use `std::ops` traits for `Exprs` rather than custom function names [#6465](https://github.com/apache/datafusion/pull/6465) (LouisGariepy) +- Refactor temporal arithmetic [#6433](https://github.com/apache/datafusion/pull/6433) (berkaysynnada) +- Fix case evaluation with NULL [#6477](https://github.com/apache/datafusion/pull/6477) (byteink) +- Substrait: Support Expr::ScalarFunction [#6471](https://github.com/apache/datafusion/pull/6471) (jayzhan211) +- Support Defining Ordering Equivalence at the Source [#6469](https://github.com/apache/datafusion/pull/6469) (berkaysynnada) +- Improve getting started guide, add note about compatible `arrow` versions [#6472](https://github.com/apache/datafusion/pull/6472) (alamb) +- Add tpch test cases with data. [#6435](https://github.com/apache/datafusion/pull/6435) (liurenjie1024) +- Minor: Improve the tpch sqllogictest docs [#6493](https://github.com/apache/datafusion/pull/6493) (alamb) +- reduce search complexity for BuiltinScalarFunction [#6479](https://github.com/apache/datafusion/pull/6479) (comphead) +- Fix builds fail with error: symbol `init___rust_ctor___ctor` is already defined #6495 [#6505](https://github.com/apache/datafusion/pull/6505) (alamb) +- Don't optimize AnalyzeExec (#6379) (try 2) [#6494](https://github.com/apache/datafusion/pull/6494) (alamb) +- Rewrite large OR chains as IN lists [#6414](https://github.com/apache/datafusion/pull/6414) (aprimadi) +- Add SELECT _ EXCLUDE, SELECT _ EXCEPT support [#6481](https://github.com/apache/datafusion/pull/6481) (mustafasrepo) +- Bug fix, First accumulator multiple batch aware [#6503](https://github.com/apache/datafusion/pull/6503) (mustafasrepo) +- Make `FileStream` error handling configurable [#6491](https://github.com/apache/datafusion/pull/6491) (thinkharderdev) +- Minimize clone in OrInListSimplifier [#6509](https://github.com/apache/datafusion/pull/6509) (aprimadi) +- Add serde for `DROP VIEW` [#6499](https://github.com/apache/datafusion/pull/6499) (Dandandan) +- Support simplifying expressions like `~ ^(bar|foo)$` [#6487](https://github.com/apache/datafusion/pull/6487) (tanruixiang) +- minor: make window frame error messages more consistent [#6519](https://github.com/apache/datafusion/pull/6519) (comphead) +- Minor: Clean up `use`s to point at real crates [#6515](https://github.com/apache/datafusion/pull/6515) (alamb) +- Standardize `RUST_LOG` configuration test setup [#6506](https://github.com/apache/datafusion/pull/6506) (alamb) +- Fix new clippy lint [#6535](https://github.com/apache/datafusion/pull/6535) (alamb) +- feat: datafusion-cli support executes sql with escaped characters [#6498](https://github.com/apache/datafusion/pull/6498) (r4ntix) +- Minor: Add EXCEPT/EXCLUDE to SQL guide [#6512](https://github.com/apache/datafusion/pull/6512) (alamb) +- fix: error instead of panic when date_bin interval is 0 [#6522](https://github.com/apache/datafusion/pull/6522) (NGA-TRAN) +- Add link to Python Bindings in top-level README [#6532](https://github.com/apache/datafusion/pull/6532) (andygrove) +- feat: fix docs [#6534](https://github.com/apache/datafusion/pull/6534) (Folyd) +- Resolve contradictory requirements by conversion of ordering sensitive aggregators [#6482](https://github.com/apache/datafusion/pull/6482) (mustafasrepo) diff --git a/dev/changelog/27.0.0.md b/dev/changelog/27.0.0.md index 305e238b8861..71225968e447 100644 --- a/dev/changelog/27.0.0.md +++ b/dev/changelog/27.0.0.md @@ -17,187 +17,187 @@ under the License. --> -## [27.0.0](https://github.com/apache/arrow-datafusion/tree/27.0.0) (2023-06-26) +## [27.0.0](https://github.com/apache/datafusion/tree/27.0.0) (2023-06-26) -[Full Changelog](https://github.com/apache/arrow-datafusion/compare/26.0.0...27.0.0) +[Full Changelog](https://github.com/apache/datafusion/compare/26.0.0...27.0.0) **Breaking changes:** -- Remove `avro_to_arrow::reader::Reader::next` in favor of `Iterator` implementation. [#6538](https://github.com/apache/arrow-datafusion/pull/6538) (LouisGariepy) -- Add support for appending data to external tables - CSV [#6526](https://github.com/apache/arrow-datafusion/pull/6526) (mustafasrepo) -- Move `physical_plan::file_format` to `datasource::plan` [#6516](https://github.com/apache/arrow-datafusion/pull/6516) (alamb) -- Remove `FromSlice` in favor of `From` impl in upstream arrow-rs code [#6587](https://github.com/apache/arrow-datafusion/pull/6587) (alamb) -- Improve main api doc page, move `avro_to_arrow` to `datasource` [#6564](https://github.com/apache/arrow-datafusion/pull/6564) (alamb) -- Fix Clippy module inception (unwrap `datasource::datasource` and `catalog::catalog` [#6640](https://github.com/apache/arrow-datafusion/pull/6640) (LouisGariepy) -- refactor: unify generic expr rewrite functions into the `datafusion_expr::expr_rewriter` [#6644](https://github.com/apache/arrow-datafusion/pull/6644) (r4ntix) -- Move `PhysicalPlanner` to `physical_planer` module [#6570](https://github.com/apache/arrow-datafusion/pull/6570) (alamb) -- Update documentation for creating User Defined Aggregates (AggregateUDF) [#6729](https://github.com/apache/arrow-datafusion/pull/6729) (alamb) -- Support User Defined Window Functions [#6703](https://github.com/apache/arrow-datafusion/pull/6703) (alamb) -- Minor: Move `PartitionStream` to physical_plan [#6756](https://github.com/apache/arrow-datafusion/pull/6756) (alamb) +- Remove `avro_to_arrow::reader::Reader::next` in favor of `Iterator` implementation. [#6538](https://github.com/apache/datafusion/pull/6538) (LouisGariepy) +- Add support for appending data to external tables - CSV [#6526](https://github.com/apache/datafusion/pull/6526) (mustafasrepo) +- Move `physical_plan::file_format` to `datasource::plan` [#6516](https://github.com/apache/datafusion/pull/6516) (alamb) +- Remove `FromSlice` in favor of `From` impl in upstream arrow-rs code [#6587](https://github.com/apache/datafusion/pull/6587) (alamb) +- Improve main api doc page, move `avro_to_arrow` to `datasource` [#6564](https://github.com/apache/datafusion/pull/6564) (alamb) +- Fix Clippy module inception (unwrap `datasource::datasource` and `catalog::catalog` [#6640](https://github.com/apache/datafusion/pull/6640) (LouisGariepy) +- refactor: unify generic expr rewrite functions into the `datafusion_expr::expr_rewriter` [#6644](https://github.com/apache/datafusion/pull/6644) (r4ntix) +- Move `PhysicalPlanner` to `physical_planer` module [#6570](https://github.com/apache/datafusion/pull/6570) (alamb) +- Update documentation for creating User Defined Aggregates (AggregateUDF) [#6729](https://github.com/apache/datafusion/pull/6729) (alamb) +- Support User Defined Window Functions [#6703](https://github.com/apache/datafusion/pull/6703) (alamb) +- Minor: Move `PartitionStream` to physical_plan [#6756](https://github.com/apache/datafusion/pull/6756) (alamb) **Implemented enhancements:** -- feat: support type coercion in Parquet Reader [#6458](https://github.com/apache/arrow-datafusion/pull/6458) (e1ijah1) -- feat: New functions and operations for working with arrays [#6384](https://github.com/apache/arrow-datafusion/pull/6384) (izveigor) -- feat: `DISTINCT` bitwise and boolean aggregate functions [#6581](https://github.com/apache/arrow-datafusion/pull/6581) (izveigor) -- feat: make_array support empty arguments [#6593](https://github.com/apache/arrow-datafusion/pull/6593) (parkma99) -- feat: encapsulate physical optimizer rules into a struct [#6645](https://github.com/apache/arrow-datafusion/pull/6645) (waynexia) -- feat: new concatenation operator for working with arrays [#6615](https://github.com/apache/arrow-datafusion/pull/6615) (izveigor) -- feat: add `-c option` to pass the SQL query directly as an argument on datafusion-cli [#6765](https://github.com/apache/arrow-datafusion/pull/6765) (r4ntix) +- feat: support type coercion in Parquet Reader [#6458](https://github.com/apache/datafusion/pull/6458) (e1ijah1) +- feat: New functions and operations for working with arrays [#6384](https://github.com/apache/datafusion/pull/6384) (izveigor) +- feat: `DISTINCT` bitwise and boolean aggregate functions [#6581](https://github.com/apache/datafusion/pull/6581) (izveigor) +- feat: make_array support empty arguments [#6593](https://github.com/apache/datafusion/pull/6593) (parkma99) +- feat: encapsulate physical optimizer rules into a struct [#6645](https://github.com/apache/datafusion/pull/6645) (waynexia) +- feat: new concatenation operator for working with arrays [#6615](https://github.com/apache/datafusion/pull/6615) (izveigor) +- feat: add `-c option` to pass the SQL query directly as an argument on datafusion-cli [#6765](https://github.com/apache/datafusion/pull/6765) (r4ntix) **Fixed bugs:** -- fix: ignore panics if racing against catalog/schema changes [#6536](https://github.com/apache/arrow-datafusion/pull/6536) (Weijun-H) -- fix: type coercion support date - date [#6578](https://github.com/apache/arrow-datafusion/pull/6578) (jackwener) -- fix: avoid panic in `list_files_for_scan` [#6605](https://github.com/apache/arrow-datafusion/pull/6605) (Folyd) -- fix: analyze/optimize plan in `CREATE TABLE AS SELECT` [#6610](https://github.com/apache/arrow-datafusion/pull/6610) (jackwener) -- fix: remove type coercion of case expression in Expr::Schema [#6614](https://github.com/apache/arrow-datafusion/pull/6614) (jackwener) -- fix: correct test timestamp_add_interval_months [#6622](https://github.com/apache/arrow-datafusion/pull/6622) (jackwener) -- fix: fix more panics in `ListingTable` [#6636](https://github.com/apache/arrow-datafusion/pull/6636) (Folyd) -- fix: median with even number of `Decimal128` not working [#6634](https://github.com/apache/arrow-datafusion/pull/6634) (izveigor) -- fix: port unstable subquery to sqllogicaltest [#6659](https://github.com/apache/arrow-datafusion/pull/6659) (jackwener) -- fix: correct wrong test [#6667](https://github.com/apache/arrow-datafusion/pull/6667) (jackwener) -- fix: from_plan shouldn't use original schema [#6595](https://github.com/apache/arrow-datafusion/pull/6595) (jackwener) -- fix: correct the error type [#6712](https://github.com/apache/arrow-datafusion/pull/6712) (jackwener) -- fix: parser for negative intervals [#6698](https://github.com/apache/arrow-datafusion/pull/6698) (izveigor) +- fix: ignore panics if racing against catalog/schema changes [#6536](https://github.com/apache/datafusion/pull/6536) (Weijun-H) +- fix: type coercion support date - date [#6578](https://github.com/apache/datafusion/pull/6578) (jackwener) +- fix: avoid panic in `list_files_for_scan` [#6605](https://github.com/apache/datafusion/pull/6605) (Folyd) +- fix: analyze/optimize plan in `CREATE TABLE AS SELECT` [#6610](https://github.com/apache/datafusion/pull/6610) (jackwener) +- fix: remove type coercion of case expression in Expr::Schema [#6614](https://github.com/apache/datafusion/pull/6614) (jackwener) +- fix: correct test timestamp_add_interval_months [#6622](https://github.com/apache/datafusion/pull/6622) (jackwener) +- fix: fix more panics in `ListingTable` [#6636](https://github.com/apache/datafusion/pull/6636) (Folyd) +- fix: median with even number of `Decimal128` not working [#6634](https://github.com/apache/datafusion/pull/6634) (izveigor) +- fix: port unstable subquery to sqllogicaltest [#6659](https://github.com/apache/datafusion/pull/6659) (jackwener) +- fix: correct wrong test [#6667](https://github.com/apache/datafusion/pull/6667) (jackwener) +- fix: from_plan shouldn't use original schema [#6595](https://github.com/apache/datafusion/pull/6595) (jackwener) +- fix: correct the error type [#6712](https://github.com/apache/datafusion/pull/6712) (jackwener) +- fix: parser for negative intervals [#6698](https://github.com/apache/datafusion/pull/6698) (izveigor) **Documentation updates:** -- Minor: Fix doc for round function [#6661](https://github.com/apache/arrow-datafusion/pull/6661) (viirya) -- Docs: Improve documentation for `struct` function` [#6754](https://github.com/apache/arrow-datafusion/pull/6754) (alamb) +- Minor: Fix doc for round function [#6661](https://github.com/apache/datafusion/pull/6661) (viirya) +- Docs: Improve documentation for `struct` function` [#6754](https://github.com/apache/datafusion/pull/6754) (alamb) **Merged pull requests:** -- fix: ignore panics if racing against catalog/schema changes [#6536](https://github.com/apache/arrow-datafusion/pull/6536) (Weijun-H) -- Remove `avro_to_arrow::reader::Reader::next` in favor of `Iterator` implementation. [#6538](https://github.com/apache/arrow-datafusion/pull/6538) (LouisGariepy) -- Support ordering analysis with expressions (not just columns) by Replace `OrderedColumn` with `PhysicalSortExpr` [#6501](https://github.com/apache/arrow-datafusion/pull/6501) (mustafasrepo) -- Prepare for 26.0.0 release [#6533](https://github.com/apache/arrow-datafusion/pull/6533) (andygrove) -- fix Incorrect function-name matching with disabled enable_ident_normalization [#6528](https://github.com/apache/arrow-datafusion/pull/6528) (parkma99) -- Improve error messages with function name suggestion. [#6520](https://github.com/apache/arrow-datafusion/pull/6520) (2010YOUY01) -- Docs: add more PR guidance in contributing guide (smaller PRs) [#6546](https://github.com/apache/arrow-datafusion/pull/6546) (alamb) -- feat: support type coercion in Parquet Reader [#6458](https://github.com/apache/arrow-datafusion/pull/6458) (e1ijah1) -- Update to object_store 0.6 and arrow 41 [#6374](https://github.com/apache/arrow-datafusion/pull/6374) (tustvold) -- feat: New functions and operations for working with arrays [#6384](https://github.com/apache/arrow-datafusion/pull/6384) (izveigor) -- Add support for appending data to external tables - CSV [#6526](https://github.com/apache/arrow-datafusion/pull/6526) (mustafasrepo) -- [Minor] Update hashbrown to 0.14 [#6562](https://github.com/apache/arrow-datafusion/pull/6562) (Dandandan) -- refactor: use bitwise and boolean compute functions [#6568](https://github.com/apache/arrow-datafusion/pull/6568) (izveigor) -- Fix panic propagation in `CoalescePartitions`, consolidates panic propagation into `RecordBatchReceiverStream` [#6507](https://github.com/apache/arrow-datafusion/pull/6507) (alamb) -- Move `physical_plan::file_format` to `datasource::plan` [#6516](https://github.com/apache/arrow-datafusion/pull/6516) (alamb) -- refactor: remove type_coercion in PhysicalExpr. [#6575](https://github.com/apache/arrow-datafusion/pull/6575) (jackwener) -- Minor: remove `tokio_stream` dependency [#6565](https://github.com/apache/arrow-datafusion/pull/6565) (alamb) -- minor: remove useless mut and borrow() [#6580](https://github.com/apache/arrow-datafusion/pull/6580) (jackwener) -- Add tests for object_store builders of datafusion-cli [#6576](https://github.com/apache/arrow-datafusion/pull/6576) (r4ntix) -- Avoid per-batch field lookups in SchemaMapping [#6563](https://github.com/apache/arrow-datafusion/pull/6563) (tustvold) -- Move `JoinType` and `JoinCondition` to `datafusion_common` [#6572](https://github.com/apache/arrow-datafusion/pull/6572) (alamb) -- chore(deps): update substrait requirement from 0.10.0 to 0.11.0 [#6579](https://github.com/apache/arrow-datafusion/pull/6579) (dependabot[bot]) -- refactor: bitwise kernel right and left shifts [#6585](https://github.com/apache/arrow-datafusion/pull/6585) (izveigor) -- fix: type coercion support date - date [#6578](https://github.com/apache/arrow-datafusion/pull/6578) (jackwener) -- make page filter public [#6523](https://github.com/apache/arrow-datafusion/pull/6523) (jiacai2050) -- Minor: Remove some `use crate::` uses in physical_plan [#6573](https://github.com/apache/arrow-datafusion/pull/6573) (alamb) -- feat: `DISTINCT` bitwise and boolean aggregate functions [#6581](https://github.com/apache/arrow-datafusion/pull/6581) (izveigor) -- Make the struct function return the correct data type. [#6594](https://github.com/apache/arrow-datafusion/pull/6594) (jiangzhx) -- fix: avoid panic in `list_files_for_scan` [#6605](https://github.com/apache/arrow-datafusion/pull/6605) (Folyd) -- fix: analyze/optimize plan in `CREATE TABLE AS SELECT` [#6610](https://github.com/apache/arrow-datafusion/pull/6610) (jackwener) -- Minor: Add additional docstrings to Window function implementations [#6592](https://github.com/apache/arrow-datafusion/pull/6592) (alamb) -- Remove `FromSlice` in favor of `From` impl in upstream arrow-rs code [#6587](https://github.com/apache/arrow-datafusion/pull/6587) (alamb) -- [Minor] Cleanup tpch benchmark [#6609](https://github.com/apache/arrow-datafusion/pull/6609) (Dandandan) -- Revert "feat: Implement the bitwise_not in NotExpr (#5902)" [#6599](https://github.com/apache/arrow-datafusion/pull/6599) (jackwener) -- Port remaining tests in functions.rs to sqllogictest [#6608](https://github.com/apache/arrow-datafusion/pull/6608) (jiangzhx) -- fix: remove type coercion of case expression in Expr::Schema [#6614](https://github.com/apache/arrow-datafusion/pull/6614) (jackwener) -- Minor: use upstream `dialect_from_str` [#6616](https://github.com/apache/arrow-datafusion/pull/6616) (alamb) -- Minor: Move `PlanType`, `StringifiedPlan` and `ToStringifiedPlan` `datafusion_common` [#6571](https://github.com/apache/arrow-datafusion/pull/6571) (alamb) -- fix: correct test timestamp_add_interval_months [#6622](https://github.com/apache/arrow-datafusion/pull/6622) (jackwener) -- Impl `Literal` trait for `NonZero*` types [#6627](https://github.com/apache/arrow-datafusion/pull/6627) (Folyd) -- style: make clippy happy and remove redundant prefix [#6624](https://github.com/apache/arrow-datafusion/pull/6624) (jackwener) -- Substrait: Fix incorrect join key fields (indices) when same table is being used more than once [#6135](https://github.com/apache/arrow-datafusion/pull/6135) (nseekhao) -- Minor: Add debug logging for schema mismatch errors [#6626](https://github.com/apache/arrow-datafusion/pull/6626) (alamb) -- Minor: Move functionality into `BuildInScalarFunction` [#6612](https://github.com/apache/arrow-datafusion/pull/6612) (alamb) -- Add datafusion-cli tests to the CI Job [#6600](https://github.com/apache/arrow-datafusion/pull/6600) (r4ntix) -- Refactor joins test to sqllogic [#6525](https://github.com/apache/arrow-datafusion/pull/6525) (aprimadi) -- fix: fix more panics in `ListingTable` [#6636](https://github.com/apache/arrow-datafusion/pull/6636) (Folyd) -- fix: median with even number of `Decimal128` not working [#6634](https://github.com/apache/arrow-datafusion/pull/6634) (izveigor) -- Unify formatting of both groups and files up to 5 elements [#6637](https://github.com/apache/arrow-datafusion/pull/6637) (qrilka) -- feat: make_array support empty arguments [#6593](https://github.com/apache/arrow-datafusion/pull/6593) (parkma99) -- Minor: cleanup the unnecessary CREATE TABLE aggregate_test_100 statement at aggregate.slt [#6641](https://github.com/apache/arrow-datafusion/pull/6641) (jiangzhx) -- chore(deps): update sqllogictest requirement from 0.13.2 to 0.14.0 [#6646](https://github.com/apache/arrow-datafusion/pull/6646) (dependabot[bot]) -- Improve main api doc page, move `avro_to_arrow` to `datasource` [#6564](https://github.com/apache/arrow-datafusion/pull/6564) (alamb) -- Minor: Move `include_rank` into `BuiltInWindowFunctionExpr` [#6620](https://github.com/apache/arrow-datafusion/pull/6620) (alamb) -- Prioritize UDF over scalar built-in function in case of function name… [#6601](https://github.com/apache/arrow-datafusion/pull/6601) (epsio-banay) -- feat: encapsulate physical optimizer rules into a struct [#6645](https://github.com/apache/arrow-datafusion/pull/6645) (waynexia) -- Fix date_trunc signature [#6632](https://github.com/apache/arrow-datafusion/pull/6632) (alamb) -- Return correct scalar types for date_trunc [#6638](https://github.com/apache/arrow-datafusion/pull/6638) (viirya) -- Insert supports specifying column names in any order [#6628](https://github.com/apache/arrow-datafusion/pull/6628) (jonahgao) -- Fix Clippy module inception (unwrap `datasource::datasource` and `catalog::catalog` [#6640](https://github.com/apache/arrow-datafusion/pull/6640) (LouisGariepy) -- Add hash support for PhysicalExpr and PhysicalSortExpr [#6625](https://github.com/apache/arrow-datafusion/pull/6625) (mustafasrepo) -- Port tests in joins.rs to sqllogictes [#6642](https://github.com/apache/arrow-datafusion/pull/6642) (jiangzhx) -- Minor: Add test for date_trunc schema on scalars [#6655](https://github.com/apache/arrow-datafusion/pull/6655) (alamb) -- Simplify and encapsulate window function state management [#6621](https://github.com/apache/arrow-datafusion/pull/6621) (alamb) -- Minor: Move get_equal_orderings into `BuiltInWindowFunctionExpr`, remove `BuiltInWindowFunctionExpr::as_any` [#6619](https://github.com/apache/arrow-datafusion/pull/6619) (alamb) -- minor: use sql to setup test data for joins.slt rather than rust [#6656](https://github.com/apache/arrow-datafusion/pull/6656) (alamb) -- Support wider range of Subquery, handle the Count bug [#6457](https://github.com/apache/arrow-datafusion/pull/6457) (mingmwang) -- fix: port unstable subquery to sqllogicaltest [#6659](https://github.com/apache/arrow-datafusion/pull/6659) (jackwener) -- Minor: Fix doc for round function [#6661](https://github.com/apache/arrow-datafusion/pull/6661) (viirya) -- refactor: unify generic expr rewrite functions into the `datafusion_expr::expr_rewriter` [#6644](https://github.com/apache/arrow-datafusion/pull/6644) (r4ntix) -- Minor: add test cases for coercion bitwise shifts [#6651](https://github.com/apache/arrow-datafusion/pull/6651) (izveigor) -- refactor: unify replace count(\*) analyzer by removing it in sql crate [#6660](https://github.com/apache/arrow-datafusion/pull/6660) (jackwener) -- Combine evaluate_stateful and evaluate_inside_range [#6665](https://github.com/apache/arrow-datafusion/pull/6665) (mustafasrepo) -- Support internal cast for BuiltinScalarFunction::MakeArray [#6607](https://github.com/apache/arrow-datafusion/pull/6607) (jayzhan211) -- minor: use sql to setup test data for aggregate.slt rather than rust [#6664](https://github.com/apache/arrow-datafusion/pull/6664) (jiangzhx) -- Minor: Add tests for User Defined Aggregate functions [#6669](https://github.com/apache/arrow-datafusion/pull/6669) (alamb) -- fix: correct wrong test [#6667](https://github.com/apache/arrow-datafusion/pull/6667) (jackwener) -- fix: from_plan shouldn't use original schema [#6595](https://github.com/apache/arrow-datafusion/pull/6595) (jackwener) -- feat: new concatenation operator for working with arrays [#6615](https://github.com/apache/arrow-datafusion/pull/6615) (izveigor) -- Minor: Add more doc strings to WindowExpr [#6663](https://github.com/apache/arrow-datafusion/pull/6663) (alamb) -- minor: `with_new_inputs` replace `from_plan` [#6680](https://github.com/apache/arrow-datafusion/pull/6680) (jackwener) -- Docs: Update roadmap to point at EPIC's, clarify project goals [#6639](https://github.com/apache/arrow-datafusion/pull/6639) (alamb) -- Disable incremental compilation on CI [#6688](https://github.com/apache/arrow-datafusion/pull/6688) (alamb) -- Allow `AggregateUDF` to define retractable batch , implement sliding window functions [#6671](https://github.com/apache/arrow-datafusion/pull/6671) (alamb) -- Minor: Update user guide [#6692](https://github.com/apache/arrow-datafusion/pull/6692) (comphead) -- Minor: consolidate repartition test into sql_integration to save builder space and build time [#6685](https://github.com/apache/arrow-datafusion/pull/6685) (alamb) -- Minor: combine `statistics`, `filter_pushdown` and `custom_sources provider` tests together to reduce CI disk space [#6683](https://github.com/apache/arrow-datafusion/pull/6683) (alamb) -- Move `PhysicalPlanner` to `physical_planer` module [#6570](https://github.com/apache/arrow-datafusion/pull/6570) (alamb) -- Rename integration tests to match crate they are defined in [#6687](https://github.com/apache/arrow-datafusion/pull/6687) (alamb) -- Minor: combine fuzz tests into a single binary to save builder space and build time [#6684](https://github.com/apache/arrow-datafusion/pull/6684) (alamb) -- Minor: consolidate datafusion_substrait tests into `substrait_integration` to save builder space and build time #6685 [#6686](https://github.com/apache/arrow-datafusion/pull/6686) (alamb) -- removed self.all_values.len() from inside reserve [#6689](https://github.com/apache/arrow-datafusion/pull/6689) (BryanEmond) -- Replace supports_bounded_execution with supports_retract_batch [#6695](https://github.com/apache/arrow-datafusion/pull/6695) (mustafasrepo) -- Move `dataframe` and `dataframe_functon` into `core_integration` test binary [#6697](https://github.com/apache/arrow-datafusion/pull/6697) (alamb) -- refactor: fix clippy allow too many arguments [#6705](https://github.com/apache/arrow-datafusion/pull/6705) (aprimadi) -- Fix documentation typo [#6704](https://github.com/apache/arrow-datafusion/pull/6704) (aprimadi) -- fix: correct the error type [#6712](https://github.com/apache/arrow-datafusion/pull/6712) (jackwener) -- Port test in subqueries.rs from rust to sqllogictest [#6675](https://github.com/apache/arrow-datafusion/pull/6675) (jiangzhx) -- Improve performance/memory usage of HashJoin datastructure (5-15% improvement on selected TPC-H queries) [#6679](https://github.com/apache/arrow-datafusion/pull/6679) (Dandandan) -- refactor: alias() should skip add alias for `Expr::Sort` [#6707](https://github.com/apache/arrow-datafusion/pull/6707) (jackwener) -- chore(deps): update strum/strum_macros requirement from 0.24 to 0.25 [#6717](https://github.com/apache/arrow-datafusion/pull/6717) (jackwener) -- Move alias generator to per-query execution props [#6706](https://github.com/apache/arrow-datafusion/pull/6706) (aprimadi) -- fix: parser for negative intervals [#6698](https://github.com/apache/arrow-datafusion/pull/6698) (izveigor) -- Minor: Improve UX for setting `ExecutionProps::query_execution_start_time` [#6719](https://github.com/apache/arrow-datafusion/pull/6719) (alamb) -- add Eq and PartialEq to ListingTableUrl [#6725](https://github.com/apache/arrow-datafusion/pull/6725) (fsdvh) -- Support Expr::InList to Substrait::RexType [#6604](https://github.com/apache/arrow-datafusion/pull/6604) (jayzhan211) -- MINOR: Add maintains input order flag to CoalesceBatches [#6730](https://github.com/apache/arrow-datafusion/pull/6730) (mustafasrepo) -- Minor: Update copyight date on website [#6727](https://github.com/apache/arrow-datafusion/pull/6727) (alamb) -- Display all partitions and files in EXPLAIN VERBOSE [#6711](https://github.com/apache/arrow-datafusion/pull/6711) (qrilka) -- Update `arrow`, `arrow-flight` and `parquet` to `42.0.0` [#6702](https://github.com/apache/arrow-datafusion/pull/6702) (alamb) -- Move `PartitionEvaluator` and window_state structures to `datafusion_expr` crate [#6690](https://github.com/apache/arrow-datafusion/pull/6690) (alamb) -- Hash Join Vectorized collision checking [#6724](https://github.com/apache/arrow-datafusion/pull/6724) (Dandandan) -- Return null for date_trunc(null) instead of panic [#6723](https://github.com/apache/arrow-datafusion/pull/6723) (BryanEmond) -- `derive(Debug)` for `Expr` [#6708](https://github.com/apache/arrow-datafusion/pull/6708) (parkma99) -- refactor: extract merge_projection common function. [#6735](https://github.com/apache/arrow-datafusion/pull/6735) (jackwener) -- Fix up some `DataFusionError::Internal` errors with correct type [#6721](https://github.com/apache/arrow-datafusion/pull/6721) (alamb) -- Minor: remove some uses of unwrap [#6738](https://github.com/apache/arrow-datafusion/pull/6738) (alamb) -- Minor: remove dead code with decimal datatypes from `in_list` [#6737](https://github.com/apache/arrow-datafusion/pull/6737) (izveigor) -- Update documentation for creating User Defined Aggregates (AggregateUDF) [#6729](https://github.com/apache/arrow-datafusion/pull/6729) (alamb) -- Support User Defined Window Functions [#6703](https://github.com/apache/arrow-datafusion/pull/6703) (alamb) -- MINOR: Aggregate ordering substrait support [#6745](https://github.com/apache/arrow-datafusion/pull/6745) (mustafasrepo) -- chore(deps): update itertools requirement from 0.10 to 0.11 [#6752](https://github.com/apache/arrow-datafusion/pull/6752) (jackwener) -- refactor: move some code in physical_plan/common.rs before tests module [#6749](https://github.com/apache/arrow-datafusion/pull/6749) (aprimadi) -- Add support for order-sensitive aggregation for multipartitions [#6734](https://github.com/apache/arrow-datafusion/pull/6734) (mustafasrepo) -- Update sqlparser-rs to version `0.35.0` [#6753](https://github.com/apache/arrow-datafusion/pull/6753) (alamb) -- Docs: Update SQL status page [#6736](https://github.com/apache/arrow-datafusion/pull/6736) (alamb) -- fix typo [#6761](https://github.com/apache/arrow-datafusion/pull/6761) (Weijun-H) -- Minor: Move `PartitionStream` to physical_plan [#6756](https://github.com/apache/arrow-datafusion/pull/6756) (alamb) -- Docs: Improve documentation for `struct` function` [#6754](https://github.com/apache/arrow-datafusion/pull/6754) (alamb) -- add UT to verify the fix on "issues/6606" [#6762](https://github.com/apache/arrow-datafusion/pull/6762) (mingmwang) -- Re-export modules individually to fix rustdocs [#6757](https://github.com/apache/arrow-datafusion/pull/6757) (alamb) -- Order Preserving RepartitionExec Implementation [#6742](https://github.com/apache/arrow-datafusion/pull/6742) (mustafasrepo) -- feat: add `-c option` to pass the SQL query directly as an argument on datafusion-cli [#6765](https://github.com/apache/arrow-datafusion/pull/6765) (r4ntix) +- fix: ignore panics if racing against catalog/schema changes [#6536](https://github.com/apache/datafusion/pull/6536) (Weijun-H) +- Remove `avro_to_arrow::reader::Reader::next` in favor of `Iterator` implementation. [#6538](https://github.com/apache/datafusion/pull/6538) (LouisGariepy) +- Support ordering analysis with expressions (not just columns) by Replace `OrderedColumn` with `PhysicalSortExpr` [#6501](https://github.com/apache/datafusion/pull/6501) (mustafasrepo) +- Prepare for 26.0.0 release [#6533](https://github.com/apache/datafusion/pull/6533) (andygrove) +- fix Incorrect function-name matching with disabled enable_ident_normalization [#6528](https://github.com/apache/datafusion/pull/6528) (parkma99) +- Improve error messages with function name suggestion. [#6520](https://github.com/apache/datafusion/pull/6520) (2010YOUY01) +- Docs: add more PR guidance in contributing guide (smaller PRs) [#6546](https://github.com/apache/datafusion/pull/6546) (alamb) +- feat: support type coercion in Parquet Reader [#6458](https://github.com/apache/datafusion/pull/6458) (e1ijah1) +- Update to object_store 0.6 and arrow 41 [#6374](https://github.com/apache/datafusion/pull/6374) (tustvold) +- feat: New functions and operations for working with arrays [#6384](https://github.com/apache/datafusion/pull/6384) (izveigor) +- Add support for appending data to external tables - CSV [#6526](https://github.com/apache/datafusion/pull/6526) (mustafasrepo) +- [Minor] Update hashbrown to 0.14 [#6562](https://github.com/apache/datafusion/pull/6562) (Dandandan) +- refactor: use bitwise and boolean compute functions [#6568](https://github.com/apache/datafusion/pull/6568) (izveigor) +- Fix panic propagation in `CoalescePartitions`, consolidates panic propagation into `RecordBatchReceiverStream` [#6507](https://github.com/apache/datafusion/pull/6507) (alamb) +- Move `physical_plan::file_format` to `datasource::plan` [#6516](https://github.com/apache/datafusion/pull/6516) (alamb) +- refactor: remove type_coercion in PhysicalExpr. [#6575](https://github.com/apache/datafusion/pull/6575) (jackwener) +- Minor: remove `tokio_stream` dependency [#6565](https://github.com/apache/datafusion/pull/6565) (alamb) +- minor: remove useless mut and borrow() [#6580](https://github.com/apache/datafusion/pull/6580) (jackwener) +- Add tests for object_store builders of datafusion-cli [#6576](https://github.com/apache/datafusion/pull/6576) (r4ntix) +- Avoid per-batch field lookups in SchemaMapping [#6563](https://github.com/apache/datafusion/pull/6563) (tustvold) +- Move `JoinType` and `JoinCondition` to `datafusion_common` [#6572](https://github.com/apache/datafusion/pull/6572) (alamb) +- chore(deps): update substrait requirement from 0.10.0 to 0.11.0 [#6579](https://github.com/apache/datafusion/pull/6579) (dependabot[bot]) +- refactor: bitwise kernel right and left shifts [#6585](https://github.com/apache/datafusion/pull/6585) (izveigor) +- fix: type coercion support date - date [#6578](https://github.com/apache/datafusion/pull/6578) (jackwener) +- make page filter public [#6523](https://github.com/apache/datafusion/pull/6523) (jiacai2050) +- Minor: Remove some `use crate::` uses in physical_plan [#6573](https://github.com/apache/datafusion/pull/6573) (alamb) +- feat: `DISTINCT` bitwise and boolean aggregate functions [#6581](https://github.com/apache/datafusion/pull/6581) (izveigor) +- Make the struct function return the correct data type. [#6594](https://github.com/apache/datafusion/pull/6594) (jiangzhx) +- fix: avoid panic in `list_files_for_scan` [#6605](https://github.com/apache/datafusion/pull/6605) (Folyd) +- fix: analyze/optimize plan in `CREATE TABLE AS SELECT` [#6610](https://github.com/apache/datafusion/pull/6610) (jackwener) +- Minor: Add additional docstrings to Window function implementations [#6592](https://github.com/apache/datafusion/pull/6592) (alamb) +- Remove `FromSlice` in favor of `From` impl in upstream arrow-rs code [#6587](https://github.com/apache/datafusion/pull/6587) (alamb) +- [Minor] Cleanup tpch benchmark [#6609](https://github.com/apache/datafusion/pull/6609) (Dandandan) +- Revert "feat: Implement the bitwise_not in NotExpr (#5902)" [#6599](https://github.com/apache/datafusion/pull/6599) (jackwener) +- Port remaining tests in functions.rs to sqllogictest [#6608](https://github.com/apache/datafusion/pull/6608) (jiangzhx) +- fix: remove type coercion of case expression in Expr::Schema [#6614](https://github.com/apache/datafusion/pull/6614) (jackwener) +- Minor: use upstream `dialect_from_str` [#6616](https://github.com/apache/datafusion/pull/6616) (alamb) +- Minor: Move `PlanType`, `StringifiedPlan` and `ToStringifiedPlan` `datafusion_common` [#6571](https://github.com/apache/datafusion/pull/6571) (alamb) +- fix: correct test timestamp_add_interval_months [#6622](https://github.com/apache/datafusion/pull/6622) (jackwener) +- Impl `Literal` trait for `NonZero*` types [#6627](https://github.com/apache/datafusion/pull/6627) (Folyd) +- style: make clippy happy and remove redundant prefix [#6624](https://github.com/apache/datafusion/pull/6624) (jackwener) +- Substrait: Fix incorrect join key fields (indices) when same table is being used more than once [#6135](https://github.com/apache/datafusion/pull/6135) (nseekhao) +- Minor: Add debug logging for schema mismatch errors [#6626](https://github.com/apache/datafusion/pull/6626) (alamb) +- Minor: Move functionality into `BuildInScalarFunction` [#6612](https://github.com/apache/datafusion/pull/6612) (alamb) +- Add datafusion-cli tests to the CI Job [#6600](https://github.com/apache/datafusion/pull/6600) (r4ntix) +- Refactor joins test to sqllogic [#6525](https://github.com/apache/datafusion/pull/6525) (aprimadi) +- fix: fix more panics in `ListingTable` [#6636](https://github.com/apache/datafusion/pull/6636) (Folyd) +- fix: median with even number of `Decimal128` not working [#6634](https://github.com/apache/datafusion/pull/6634) (izveigor) +- Unify formatting of both groups and files up to 5 elements [#6637](https://github.com/apache/datafusion/pull/6637) (qrilka) +- feat: make_array support empty arguments [#6593](https://github.com/apache/datafusion/pull/6593) (parkma99) +- Minor: cleanup the unnecessary CREATE TABLE aggregate_test_100 statement at aggregate.slt [#6641](https://github.com/apache/datafusion/pull/6641) (jiangzhx) +- chore(deps): update sqllogictest requirement from 0.13.2 to 0.14.0 [#6646](https://github.com/apache/datafusion/pull/6646) (dependabot[bot]) +- Improve main api doc page, move `avro_to_arrow` to `datasource` [#6564](https://github.com/apache/datafusion/pull/6564) (alamb) +- Minor: Move `include_rank` into `BuiltInWindowFunctionExpr` [#6620](https://github.com/apache/datafusion/pull/6620) (alamb) +- Prioritize UDF over scalar built-in function in case of function name… [#6601](https://github.com/apache/datafusion/pull/6601) (epsio-banay) +- feat: encapsulate physical optimizer rules into a struct [#6645](https://github.com/apache/datafusion/pull/6645) (waynexia) +- Fix date_trunc signature [#6632](https://github.com/apache/datafusion/pull/6632) (alamb) +- Return correct scalar types for date_trunc [#6638](https://github.com/apache/datafusion/pull/6638) (viirya) +- Insert supports specifying column names in any order [#6628](https://github.com/apache/datafusion/pull/6628) (jonahgao) +- Fix Clippy module inception (unwrap `datasource::datasource` and `catalog::catalog` [#6640](https://github.com/apache/datafusion/pull/6640) (LouisGariepy) +- Add hash support for PhysicalExpr and PhysicalSortExpr [#6625](https://github.com/apache/datafusion/pull/6625) (mustafasrepo) +- Port tests in joins.rs to sqllogictes [#6642](https://github.com/apache/datafusion/pull/6642) (jiangzhx) +- Minor: Add test for date_trunc schema on scalars [#6655](https://github.com/apache/datafusion/pull/6655) (alamb) +- Simplify and encapsulate window function state management [#6621](https://github.com/apache/datafusion/pull/6621) (alamb) +- Minor: Move get_equal_orderings into `BuiltInWindowFunctionExpr`, remove `BuiltInWindowFunctionExpr::as_any` [#6619](https://github.com/apache/datafusion/pull/6619) (alamb) +- minor: use sql to setup test data for joins.slt rather than rust [#6656](https://github.com/apache/datafusion/pull/6656) (alamb) +- Support wider range of Subquery, handle the Count bug [#6457](https://github.com/apache/datafusion/pull/6457) (mingmwang) +- fix: port unstable subquery to sqllogicaltest [#6659](https://github.com/apache/datafusion/pull/6659) (jackwener) +- Minor: Fix doc for round function [#6661](https://github.com/apache/datafusion/pull/6661) (viirya) +- refactor: unify generic expr rewrite functions into the `datafusion_expr::expr_rewriter` [#6644](https://github.com/apache/datafusion/pull/6644) (r4ntix) +- Minor: add test cases for coercion bitwise shifts [#6651](https://github.com/apache/datafusion/pull/6651) (izveigor) +- refactor: unify replace count(\*) analyzer by removing it in sql crate [#6660](https://github.com/apache/datafusion/pull/6660) (jackwener) +- Combine evaluate_stateful and evaluate_inside_range [#6665](https://github.com/apache/datafusion/pull/6665) (mustafasrepo) +- Support internal cast for BuiltinScalarFunction::MakeArray [#6607](https://github.com/apache/datafusion/pull/6607) (jayzhan211) +- minor: use sql to setup test data for aggregate.slt rather than rust [#6664](https://github.com/apache/datafusion/pull/6664) (jiangzhx) +- Minor: Add tests for User Defined Aggregate functions [#6669](https://github.com/apache/datafusion/pull/6669) (alamb) +- fix: correct wrong test [#6667](https://github.com/apache/datafusion/pull/6667) (jackwener) +- fix: from_plan shouldn't use original schema [#6595](https://github.com/apache/datafusion/pull/6595) (jackwener) +- feat: new concatenation operator for working with arrays [#6615](https://github.com/apache/datafusion/pull/6615) (izveigor) +- Minor: Add more doc strings to WindowExpr [#6663](https://github.com/apache/datafusion/pull/6663) (alamb) +- minor: `with_new_inputs` replace `from_plan` [#6680](https://github.com/apache/datafusion/pull/6680) (jackwener) +- Docs: Update roadmap to point at EPIC's, clarify project goals [#6639](https://github.com/apache/datafusion/pull/6639) (alamb) +- Disable incremental compilation on CI [#6688](https://github.com/apache/datafusion/pull/6688) (alamb) +- Allow `AggregateUDF` to define retractable batch , implement sliding window functions [#6671](https://github.com/apache/datafusion/pull/6671) (alamb) +- Minor: Update user guide [#6692](https://github.com/apache/datafusion/pull/6692) (comphead) +- Minor: consolidate repartition test into sql_integration to save builder space and build time [#6685](https://github.com/apache/datafusion/pull/6685) (alamb) +- Minor: combine `statistics`, `filter_pushdown` and `custom_sources provider` tests together to reduce CI disk space [#6683](https://github.com/apache/datafusion/pull/6683) (alamb) +- Move `PhysicalPlanner` to `physical_planer` module [#6570](https://github.com/apache/datafusion/pull/6570) (alamb) +- Rename integration tests to match crate they are defined in [#6687](https://github.com/apache/datafusion/pull/6687) (alamb) +- Minor: combine fuzz tests into a single binary to save builder space and build time [#6684](https://github.com/apache/datafusion/pull/6684) (alamb) +- Minor: consolidate datafusion_substrait tests into `substrait_integration` to save builder space and build time #6685 [#6686](https://github.com/apache/datafusion/pull/6686) (alamb) +- removed self.all_values.len() from inside reserve [#6689](https://github.com/apache/datafusion/pull/6689) (BryanEmond) +- Replace supports_bounded_execution with supports_retract_batch [#6695](https://github.com/apache/datafusion/pull/6695) (mustafasrepo) +- Move `dataframe` and `dataframe_functon` into `core_integration` test binary [#6697](https://github.com/apache/datafusion/pull/6697) (alamb) +- refactor: fix clippy allow too many arguments [#6705](https://github.com/apache/datafusion/pull/6705) (aprimadi) +- Fix documentation typo [#6704](https://github.com/apache/datafusion/pull/6704) (aprimadi) +- fix: correct the error type [#6712](https://github.com/apache/datafusion/pull/6712) (jackwener) +- Port test in subqueries.rs from rust to sqllogictest [#6675](https://github.com/apache/datafusion/pull/6675) (jiangzhx) +- Improve performance/memory usage of HashJoin datastructure (5-15% improvement on selected TPC-H queries) [#6679](https://github.com/apache/datafusion/pull/6679) (Dandandan) +- refactor: alias() should skip add alias for `Expr::Sort` [#6707](https://github.com/apache/datafusion/pull/6707) (jackwener) +- chore(deps): update strum/strum_macros requirement from 0.24 to 0.25 [#6717](https://github.com/apache/datafusion/pull/6717) (jackwener) +- Move alias generator to per-query execution props [#6706](https://github.com/apache/datafusion/pull/6706) (aprimadi) +- fix: parser for negative intervals [#6698](https://github.com/apache/datafusion/pull/6698) (izveigor) +- Minor: Improve UX for setting `ExecutionProps::query_execution_start_time` [#6719](https://github.com/apache/datafusion/pull/6719) (alamb) +- add Eq and PartialEq to ListingTableUrl [#6725](https://github.com/apache/datafusion/pull/6725) (fsdvh) +- Support Expr::InList to Substrait::RexType [#6604](https://github.com/apache/datafusion/pull/6604) (jayzhan211) +- MINOR: Add maintains input order flag to CoalesceBatches [#6730](https://github.com/apache/datafusion/pull/6730) (mustafasrepo) +- Minor: Update copyight date on website [#6727](https://github.com/apache/datafusion/pull/6727) (alamb) +- Display all partitions and files in EXPLAIN VERBOSE [#6711](https://github.com/apache/datafusion/pull/6711) (qrilka) +- Update `arrow`, `arrow-flight` and `parquet` to `42.0.0` [#6702](https://github.com/apache/datafusion/pull/6702) (alamb) +- Move `PartitionEvaluator` and window_state structures to `datafusion_expr` crate [#6690](https://github.com/apache/datafusion/pull/6690) (alamb) +- Hash Join Vectorized collision checking [#6724](https://github.com/apache/datafusion/pull/6724) (Dandandan) +- Return null for date_trunc(null) instead of panic [#6723](https://github.com/apache/datafusion/pull/6723) (BryanEmond) +- `derive(Debug)` for `Expr` [#6708](https://github.com/apache/datafusion/pull/6708) (parkma99) +- refactor: extract merge_projection common function. [#6735](https://github.com/apache/datafusion/pull/6735) (jackwener) +- Fix up some `DataFusionError::Internal` errors with correct type [#6721](https://github.com/apache/datafusion/pull/6721) (alamb) +- Minor: remove some uses of unwrap [#6738](https://github.com/apache/datafusion/pull/6738) (alamb) +- Minor: remove dead code with decimal datatypes from `in_list` [#6737](https://github.com/apache/datafusion/pull/6737) (izveigor) +- Update documentation for creating User Defined Aggregates (AggregateUDF) [#6729](https://github.com/apache/datafusion/pull/6729) (alamb) +- Support User Defined Window Functions [#6703](https://github.com/apache/datafusion/pull/6703) (alamb) +- MINOR: Aggregate ordering substrait support [#6745](https://github.com/apache/datafusion/pull/6745) (mustafasrepo) +- chore(deps): update itertools requirement from 0.10 to 0.11 [#6752](https://github.com/apache/datafusion/pull/6752) (jackwener) +- refactor: move some code in physical_plan/common.rs before tests module [#6749](https://github.com/apache/datafusion/pull/6749) (aprimadi) +- Add support for order-sensitive aggregation for multipartitions [#6734](https://github.com/apache/datafusion/pull/6734) (mustafasrepo) +- Update sqlparser-rs to version `0.35.0` [#6753](https://github.com/apache/datafusion/pull/6753) (alamb) +- Docs: Update SQL status page [#6736](https://github.com/apache/datafusion/pull/6736) (alamb) +- fix typo [#6761](https://github.com/apache/datafusion/pull/6761) (Weijun-H) +- Minor: Move `PartitionStream` to physical_plan [#6756](https://github.com/apache/datafusion/pull/6756) (alamb) +- Docs: Improve documentation for `struct` function` [#6754](https://github.com/apache/datafusion/pull/6754) (alamb) +- add UT to verify the fix on "issues/6606" [#6762](https://github.com/apache/datafusion/pull/6762) (mingmwang) +- Re-export modules individually to fix rustdocs [#6757](https://github.com/apache/datafusion/pull/6757) (alamb) +- Order Preserving RepartitionExec Implementation [#6742](https://github.com/apache/datafusion/pull/6742) (mustafasrepo) +- feat: add `-c option` to pass the SQL query directly as an argument on datafusion-cli [#6765](https://github.com/apache/datafusion/pull/6765) (r4ntix) diff --git a/dev/changelog/28.0.0.md b/dev/changelog/28.0.0.md index a51427be5c34..66f0986b3fab 100644 --- a/dev/changelog/28.0.0.md +++ b/dev/changelog/28.0.0.md @@ -17,178 +17,178 @@ under the License. --> -## [28.0.0](https://github.com/apache/arrow-datafusion/tree/28.0.0) (2023-07-21) +## [28.0.0](https://github.com/apache/datafusion/tree/28.0.0) (2023-07-21) -[Full Changelog](https://github.com/apache/arrow-datafusion/compare/27.0.0...28.0.0) +[Full Changelog](https://github.com/apache/datafusion/compare/27.0.0...28.0.0) **Breaking changes:** -- Cleanup type coercion (#3419) [#6778](https://github.com/apache/arrow-datafusion/pull/6778) (tustvold) -- refactor: encapsulate Alias as a struct [#6795](https://github.com/apache/arrow-datafusion/pull/6795) (jackwener) -- Set `DisplayAs` to be a supertrait of `ExecutionPlan` [#6835](https://github.com/apache/arrow-datafusion/pull/6835) (qrilka) -- [MINOR] Remove unnecessary api from MemTable [#6861](https://github.com/apache/arrow-datafusion/pull/6861) (metesynnada) -- refactor: Merge Expr::Like and Expr::ILike [#7007](https://github.com/apache/arrow-datafusion/pull/7007) (waynexia) +- Cleanup type coercion (#3419) [#6778](https://github.com/apache/datafusion/pull/6778) (tustvold) +- refactor: encapsulate Alias as a struct [#6795](https://github.com/apache/datafusion/pull/6795) (jackwener) +- Set `DisplayAs` to be a supertrait of `ExecutionPlan` [#6835](https://github.com/apache/datafusion/pull/6835) (qrilka) +- [MINOR] Remove unnecessary api from MemTable [#6861](https://github.com/apache/datafusion/pull/6861) (metesynnada) +- refactor: Merge Expr::Like and Expr::ILike [#7007](https://github.com/apache/datafusion/pull/7007) (waynexia) **Implemented enhancements:** -- feat: `array_contains` [#6618](https://github.com/apache/arrow-datafusion/pull/6618) (izveigor) -- feat: support `NULL` in array functions [#6662](https://github.com/apache/arrow-datafusion/pull/6662) (izveigor) -- feat: implement posgres style `encode`/`decode` [#6821](https://github.com/apache/arrow-datafusion/pull/6821) (ozgrakkurt) -- feat: column support for `array_append`, `array_prepend`, `array_position` and `array_positions` [#6805](https://github.com/apache/arrow-datafusion/pull/6805) (izveigor) -- feat: preserve metadata for `Field` and `Schema` in proto [#6865](https://github.com/apache/arrow-datafusion/pull/6865) (jonahgao) -- feat: Add graphviz display format for execution plan. [#6726](https://github.com/apache/arrow-datafusion/pull/6726) (liurenjie1024) -- feat: implement substrait join filter support [#6868](https://github.com/apache/arrow-datafusion/pull/6868) (nseekhao) -- feat: column support for `array_dims`, `array_ndims`, `cardinality` and `array_length` [#6864](https://github.com/apache/arrow-datafusion/pull/6864) (izveigor) -- feat: support for `NestedLoopJoinExec` in datafusion-proto [#6902](https://github.com/apache/arrow-datafusion/pull/6902) (r4ntix) -- feat: add round trip test of physical plan in tpch unit tests [#6918](https://github.com/apache/arrow-datafusion/pull/6918) (r4ntix) -- feat: implement substrait for LIKE/ILIKE expr [#6840](https://github.com/apache/arrow-datafusion/pull/6840) (waynexia) -- feat: array functions treat an array as an element [#6986](https://github.com/apache/arrow-datafusion/pull/6986) (izveigor) +- feat: `array_contains` [#6618](https://github.com/apache/datafusion/pull/6618) (izveigor) +- feat: support `NULL` in array functions [#6662](https://github.com/apache/datafusion/pull/6662) (izveigor) +- feat: implement posgres style `encode`/`decode` [#6821](https://github.com/apache/datafusion/pull/6821) (ozgrakkurt) +- feat: column support for `array_append`, `array_prepend`, `array_position` and `array_positions` [#6805](https://github.com/apache/datafusion/pull/6805) (izveigor) +- feat: preserve metadata for `Field` and `Schema` in proto [#6865](https://github.com/apache/datafusion/pull/6865) (jonahgao) +- feat: Add graphviz display format for execution plan. [#6726](https://github.com/apache/datafusion/pull/6726) (liurenjie1024) +- feat: implement substrait join filter support [#6868](https://github.com/apache/datafusion/pull/6868) (nseekhao) +- feat: column support for `array_dims`, `array_ndims`, `cardinality` and `array_length` [#6864](https://github.com/apache/datafusion/pull/6864) (izveigor) +- feat: support for `NestedLoopJoinExec` in datafusion-proto [#6902](https://github.com/apache/datafusion/pull/6902) (r4ntix) +- feat: add round trip test of physical plan in tpch unit tests [#6918](https://github.com/apache/datafusion/pull/6918) (r4ntix) +- feat: implement substrait for LIKE/ILIKE expr [#6840](https://github.com/apache/datafusion/pull/6840) (waynexia) +- feat: array functions treat an array as an element [#6986](https://github.com/apache/datafusion/pull/6986) (izveigor) **Fixed bugs:** -- fix: incorrect nullability of `between` expr [#6786](https://github.com/apache/arrow-datafusion/pull/6786) (jonahgao) -- fix: incorrect nullability of `InList` expr [#6799](https://github.com/apache/arrow-datafusion/pull/6799) (jonahgao) -- fix: from_plan generate Agg can be with different schema. [#6820](https://github.com/apache/arrow-datafusion/pull/6820) (jackwener) -- fix: incorrect nullability of `Like` expressions [#6829](https://github.com/apache/arrow-datafusion/pull/6829) (jonahgao) -- fix: incorrect simplification of case expr [#7006](https://github.com/apache/arrow-datafusion/pull/7006) (jonahgao) -- fix: `array_concat` with arrays with different dimensions, add `_list*` aliases for `_array*` functions [#7008](https://github.com/apache/arrow-datafusion/pull/7008) (izveigor) +- fix: incorrect nullability of `between` expr [#6786](https://github.com/apache/datafusion/pull/6786) (jonahgao) +- fix: incorrect nullability of `InList` expr [#6799](https://github.com/apache/datafusion/pull/6799) (jonahgao) +- fix: from_plan generate Agg can be with different schema. [#6820](https://github.com/apache/datafusion/pull/6820) (jackwener) +- fix: incorrect nullability of `Like` expressions [#6829](https://github.com/apache/datafusion/pull/6829) (jonahgao) +- fix: incorrect simplification of case expr [#7006](https://github.com/apache/datafusion/pull/7006) (jonahgao) +- fix: `array_concat` with arrays with different dimensions, add `_list*` aliases for `_array*` functions [#7008](https://github.com/apache/datafusion/pull/7008) (izveigor) **Documentation updates:** -- docs: Add `encode` and `decode` to the user guide [#6856](https://github.com/apache/arrow-datafusion/pull/6856) (alamb) +- docs: Add `encode` and `decode` to the user guide [#6856](https://github.com/apache/datafusion/pull/6856) (alamb) **Merged pull requests:** -- chore(deps): update indexmap requirement from 1.9.2 to 2.0.0 [#6766](https://github.com/apache/arrow-datafusion/pull/6766) (dependabot[bot]) -- Support IsDistinctFrom and IsNotDistinctFrom on interval types [#6776](https://github.com/apache/arrow-datafusion/pull/6776) (joroKr21) -- Protect main branch [#6775](https://github.com/apache/arrow-datafusion/pull/6775) (tustvold) -- Prepare 27.0.0 release [#6773](https://github.com/apache/arrow-datafusion/pull/6773) (andygrove) -- Support hex string literal [#6767](https://github.com/apache/arrow-datafusion/pull/6767) (ShiKaiWi) -- feat: `array_contains` [#6618](https://github.com/apache/arrow-datafusion/pull/6618) (izveigor) -- Make 'date_trunc' returns the same type as its input [#6654](https://github.com/apache/arrow-datafusion/pull/6654) (Weijun-H) -- Fix inserting into a table with non-nullable columns [#6722](https://github.com/apache/arrow-datafusion/pull/6722) (jonahgao) -- Cleanup type coercion (#3419) [#6778](https://github.com/apache/arrow-datafusion/pull/6778) (tustvold) -- Properly project grouping set expressions [#6777](https://github.com/apache/arrow-datafusion/pull/6777) (fsdvh) -- Minor: Simplify `date_trunc` code and add comments [#6783](https://github.com/apache/arrow-datafusion/pull/6783) (alamb) -- Minor: Add array / array sqllogic tests for `array_contains` [#6771](https://github.com/apache/arrow-datafusion/pull/6771) (alamb) -- Minor: Make `date_trunc` code easier to understand [#6789](https://github.com/apache/arrow-datafusion/pull/6789) (alamb) -- feat: support `NULL` in array functions [#6662](https://github.com/apache/arrow-datafusion/pull/6662) (izveigor) -- fix: incorrect nullability of `between` expr [#6786](https://github.com/apache/arrow-datafusion/pull/6786) (jonahgao) -- Use checked division kernel [#6792](https://github.com/apache/arrow-datafusion/pull/6792) (tustvold) -- Minor: add sqllogictests for binary data type [#6770](https://github.com/apache/arrow-datafusion/pull/6770) (alamb) -- refactor: encapsulate Alias as a struct [#6795](https://github.com/apache/arrow-datafusion/pull/6795) (jackwener) -- chore(deps): bump actions/labeler from 4.1.0 to 4.2.0 [#6803](https://github.com/apache/arrow-datafusion/pull/6803) (dependabot[bot]) -- Consistently coerce dictionaries for arithmetic [#6785](https://github.com/apache/arrow-datafusion/pull/6785) (tustvold) -- Implement serialization for UDWF and UDAF in plan protobuf [#6769](https://github.com/apache/arrow-datafusion/pull/6769) (parkma99) -- fix: incorrect nullability of `InList` expr [#6799](https://github.com/apache/arrow-datafusion/pull/6799) (jonahgao) -- Fix timestamp_add_interval_months to pass any date [#6815](https://github.com/apache/arrow-datafusion/pull/6815) (jayzhan211) -- Minor: Log TPCH benchmark results [#6813](https://github.com/apache/arrow-datafusion/pull/6813) (alamb) -- Refactor Decimal128 averaging code to be vectorizable (and easier to read) [#6810](https://github.com/apache/arrow-datafusion/pull/6810) (alamb) -- Minor: Encapsulate `return_type` and `signature` in `AggregateFunction` and `WindowFunction` [#6748](https://github.com/apache/arrow-datafusion/pull/6748) (alamb) -- fix: from_plan generate Agg can be with different schema. [#6820](https://github.com/apache/arrow-datafusion/pull/6820) (jackwener) -- [MINOR] Improve performance of `create_hashes` [#6816](https://github.com/apache/arrow-datafusion/pull/6816) (Dandandan) -- Add fetch to `SortPreservingMergeExec` and `SortPreservingMergeStream` [#6811](https://github.com/apache/arrow-datafusion/pull/6811) (Dandandan) -- chore(deps): update substrait requirement from 0.11.0 to 0.12.0 [#6825](https://github.com/apache/arrow-datafusion/pull/6825) (dependabot[bot]) -- Upgrade arrow 43 [#6812](https://github.com/apache/arrow-datafusion/pull/6812) (tustvold) -- Fix cargo build warning [#6831](https://github.com/apache/arrow-datafusion/pull/6831) (viirya) -- Simplify `IsUnkown` and `IsNotUnkown` expression [#6830](https://github.com/apache/arrow-datafusion/pull/6830) (jonahgao) -- fix: incorrect nullability of `Like` expressions [#6829](https://github.com/apache/arrow-datafusion/pull/6829) (jonahgao) -- Minor: Add one more assert to `hash_array_primitive` [#6834](https://github.com/apache/arrow-datafusion/pull/6834) (alamb) -- revert #6595 #6820 [#6827](https://github.com/apache/arrow-datafusion/pull/6827) (jackwener) -- Add Duration to ScalarValue [#6838](https://github.com/apache/arrow-datafusion/pull/6838) (tustvold) -- Replace AbortOnDrop / AbortDropOnMany with tokio JoinSet [#6750](https://github.com/apache/arrow-datafusion/pull/6750) (aprimadi) -- Add clickbench queries to sqllogictest coverage [#6836](https://github.com/apache/arrow-datafusion/pull/6836) (alamb) -- feat: implement posgres style `encode`/`decode` [#6821](https://github.com/apache/arrow-datafusion/pull/6821) (ozgrakkurt) -- chore(deps): update rstest requirement from 0.17.0 to 0.18.0 [#6847](https://github.com/apache/arrow-datafusion/pull/6847) (dependabot[bot]) -- [minior] support serde for some function [#6846](https://github.com/apache/arrow-datafusion/pull/6846) (liukun4515) -- Support fixed_size_list for make_array [#6759](https://github.com/apache/arrow-datafusion/pull/6759) (jayzhan211) -- Improve median performance. [#6837](https://github.com/apache/arrow-datafusion/pull/6837) (vincev) -- Mismatch in MemTable of Select Into when projecting on aggregate window functions [#6566](https://github.com/apache/arrow-datafusion/pull/6566) (berkaysynnada) -- feat: column support for `array_append`, `array_prepend`, `array_position` and `array_positions` [#6805](https://github.com/apache/arrow-datafusion/pull/6805) (izveigor) -- MINOR: Fix ordering of the aggregate_source_with_order table [#6852](https://github.com/apache/arrow-datafusion/pull/6852) (mustafasrepo) -- Return error when internal multiplication overflowing in decimal division kernel [#6833](https://github.com/apache/arrow-datafusion/pull/6833) (viirya) -- Deprecate ScalarValue::and, ScalarValue::or (#6842) [#6844](https://github.com/apache/arrow-datafusion/pull/6844) (tustvold) -- chore(deps): update bigdecimal requirement from 0.3.0 to 0.4.0 [#6848](https://github.com/apache/arrow-datafusion/pull/6848) (dependabot[bot]) -- feat: preserve metadata for `Field` and `Schema` in proto [#6865](https://github.com/apache/arrow-datafusion/pull/6865) (jonahgao) -- Set `DisplayAs` to be a supertrait of `ExecutionPlan` [#6835](https://github.com/apache/arrow-datafusion/pull/6835) (qrilka) -- [MINOR] Remove unnecessary api from MemTable [#6861](https://github.com/apache/arrow-datafusion/pull/6861) (metesynnada) -- Adjustment of HashJoinExec APIs to Preserve Probe Side Order [#6858](https://github.com/apache/arrow-datafusion/pull/6858) (metesynnada) -- [MINOR] Adding order into StreamingTableExec [#6860](https://github.com/apache/arrow-datafusion/pull/6860) (metesynnada) -- Docs: try and clarify what `PartitionEvaluator` functions are called [#6869](https://github.com/apache/arrow-datafusion/pull/6869) (alamb) -- docs: Add `encode` and `decode` to the user guide [#6856](https://github.com/apache/arrow-datafusion/pull/6856) (alamb) -- Fix build on main due to logical conflict [#6875](https://github.com/apache/arrow-datafusion/pull/6875) (alamb) -- feat: Add graphviz display format for execution plan. [#6726](https://github.com/apache/arrow-datafusion/pull/6726) (liurenjie1024) -- Fix (another) logical conflict [#6882](https://github.com/apache/arrow-datafusion/pull/6882) (alamb) -- Minor: Consolidate display related traits [#6883](https://github.com/apache/arrow-datafusion/pull/6883) (alamb) -- test: parquet use the byte array as the physical type to store decimal [#6851](https://github.com/apache/arrow-datafusion/pull/6851) (smallzhongfeng) -- Make streaming_merge public [#6874](https://github.com/apache/arrow-datafusion/pull/6874) (kazuyukitanimura) -- Performance: Use a specialized sum accumulator for retractable aggregregates [#6888](https://github.com/apache/arrow-datafusion/pull/6888) (alamb) -- Support array concatenation for arrays with different dimensions [#6872](https://github.com/apache/arrow-datafusion/pull/6872) (jayzhan211) -- feat: implement substrait join filter support [#6868](https://github.com/apache/arrow-datafusion/pull/6868) (nseekhao) -- feat: column support for `array_dims`, `array_ndims`, `cardinality` and `array_length` [#6864](https://github.com/apache/arrow-datafusion/pull/6864) (izveigor) -- Add FixedSizeBinary support to binary_op_dyn_scalar [#6891](https://github.com/apache/arrow-datafusion/pull/6891) (maxburke) -- Minor: deleted duplicated substrait integration test [#6894](https://github.com/apache/arrow-datafusion/pull/6894) (alamb) -- Minor: add test cases with columns for math expressions [#6787](https://github.com/apache/arrow-datafusion/pull/6787) (izveigor) -- Minor: reduce redundant code [#6901](https://github.com/apache/arrow-datafusion/pull/6901) (smallzhongfeng) -- Minor: Add some more doc comments to `BoundedAggregateStream` [#6881](https://github.com/apache/arrow-datafusion/pull/6881) (alamb) -- feat: support for `NestedLoopJoinExec` in datafusion-proto [#6902](https://github.com/apache/arrow-datafusion/pull/6902) (r4ntix) -- Fix `make_array` null handling, update tests [#6900](https://github.com/apache/arrow-datafusion/pull/6900) (alamb) -- chore(deps): bump actions/labeler from 4.2.0 to 4.3.0 [#6911](https://github.com/apache/arrow-datafusion/pull/6911) (dependabot[bot]) -- Minor: Add TPCH scale factor 10 to bench.sh, use 10 iteration [#6893](https://github.com/apache/arrow-datafusion/pull/6893) (alamb) -- Minor: Add output to aggregrate_fuzz.rs on failure [#6905](https://github.com/apache/arrow-datafusion/pull/6905) (alamb) -- allow window UDF to return null [#6915](https://github.com/apache/arrow-datafusion/pull/6915) (mhilton) -- Minor: Add factory method to PartitionedFile to create File Scan [#6909](https://github.com/apache/arrow-datafusion/pull/6909) (comphead) -- [minor]fix doc to remove duplicate content [#6923](https://github.com/apache/arrow-datafusion/pull/6923) (liukun4515) -- Revert "chore(deps): update bigdecimal requirement from 0.3.0 to 0.4.0 (#6848)" [#6896](https://github.com/apache/arrow-datafusion/pull/6896) (alamb) -- [Minor] Make FileScanConfig::project pub [#6931](https://github.com/apache/arrow-datafusion/pull/6931) (Dandandan) -- feat: add round trip test of physical plan in tpch unit tests [#6918](https://github.com/apache/arrow-datafusion/pull/6918) (r4ntix) -- Minor: Use thiserror to implement the `From` trait for `DFSqlLogicTestError` [#6924](https://github.com/apache/arrow-datafusion/pull/6924) (jonahgao) -- parallel csv scan [#6801](https://github.com/apache/arrow-datafusion/pull/6801) (2010YOUY01) -- Add additional test coverage for aggregaes using dates/times/timestamps/decimals [#6939](https://github.com/apache/arrow-datafusion/pull/6939) (alamb) -- Replace repartition execs with sort preserving repartition execs [#6921](https://github.com/apache/arrow-datafusion/pull/6921) (mertak) -- Vectorized hash grouping [#6904](https://github.com/apache/arrow-datafusion/pull/6904) (alamb) -- Fix incorrect results in `BitAnd` GroupsAccumulator [#6957](https://github.com/apache/arrow-datafusion/pull/6957) (alamb) -- Fixes for clippy 1.71 [#6959](https://github.com/apache/arrow-datafusion/pull/6959) (alamb) -- Improve unnest_column performance [#6903](https://github.com/apache/arrow-datafusion/pull/6903) (vincev) -- Pass `schema_infer_max_records` to JsonFormat. [#6945](https://github.com/apache/arrow-datafusion/pull/6945) (vincev) -- deps: bump sqllogictest to 0.15.0 [#6941](https://github.com/apache/arrow-datafusion/pull/6941) (jonahgao) -- Preserve field metadata across expressions in logical plans [#6920](https://github.com/apache/arrow-datafusion/pull/6920) (dexterduck) -- Support equality and comparison between interval arrays and scalars [#6948](https://github.com/apache/arrow-datafusion/pull/6948) (joroKr21) -- chore(deps): update bigdecimal requirement from 0.3.0 to 0.4.1 [#6946](https://github.com/apache/arrow-datafusion/pull/6946) (dependabot[bot]) -- feat: implement substrait for LIKE/ILIKE expr [#6840](https://github.com/apache/arrow-datafusion/pull/6840) (waynexia) -- Minor: Add comments about initial value for `BitAnd` accumulator [#6964](https://github.com/apache/arrow-datafusion/pull/6964) (alamb) -- [Functions] Support Arithmetic function COT() [#6925](https://github.com/apache/arrow-datafusion/pull/6925) (Syleechan) -- Minor: remove duplication in Min/Max accumulator [#6960](https://github.com/apache/arrow-datafusion/pull/6960) (alamb) -- [MINOR]Add new tests [#6953](https://github.com/apache/arrow-datafusion/pull/6953) (mustafasrepo) -- Column support for array concat [#6879](https://github.com/apache/arrow-datafusion/pull/6879) (jayzhan211) -- Minor: Add FixedSizeBinaryTest [#6895](https://github.com/apache/arrow-datafusion/pull/6895) (alamb) -- [MINOR] Remove update state api from PartitionEvaluator [#6966](https://github.com/apache/arrow-datafusion/pull/6966) (mustafasrepo) -- Fix required partitioning of Single aggregation mode [#6950](https://github.com/apache/arrow-datafusion/pull/6950) (Dandandan) -- [MINOR] Remove global sort rule from planner [#6965](https://github.com/apache/arrow-datafusion/pull/6965) (mustafasrepo) -- Column support for array_to_string [#6940](https://github.com/apache/arrow-datafusion/pull/6940) (jayzhan211) -- chore: fix format [#6991](https://github.com/apache/arrow-datafusion/pull/6991) (Weijun-H) -- Extend Ordering Equivalence Support [#6956](https://github.com/apache/arrow-datafusion/pull/6956) (mustafasrepo) -- chore: break earlier in macro `contains!` [#6989](https://github.com/apache/arrow-datafusion/pull/6989) (Weijun-H) -- fix: incorrect simplification of case expr [#7006](https://github.com/apache/arrow-datafusion/pull/7006) (jonahgao) -- Minor: Add String/Binary aggregate tests [#6962](https://github.com/apache/arrow-datafusion/pull/6962) (alamb) -- [MINOR] Supporting repartition joins conf in SHJ [#6998](https://github.com/apache/arrow-datafusion/pull/6998) (metesynnada) -- [MINOR] Code refactor on hash join utils [#6999](https://github.com/apache/arrow-datafusion/pull/6999) (metesynnada) -- feat: array functions treat an array as an element [#6986](https://github.com/apache/arrow-datafusion/pull/6986) (izveigor) -- [MINOR] Moving some test utils from EnsureSorting to test_utils [#7009](https://github.com/apache/arrow-datafusion/pull/7009) (metesynnada) -- MINOR: Bug fix, Use correct ordering equivalence when window expr contains partition by [#7011](https://github.com/apache/arrow-datafusion/pull/7011) (mustafasrepo) -- refactor: Merge Expr::Like and Expr::ILike [#7007](https://github.com/apache/arrow-datafusion/pull/7007) (waynexia) -- Docs: Add docs to `RepartitionExec` and architecture guide [#7003](https://github.com/apache/arrow-datafusion/pull/7003) (alamb) -- Consolidate `BoundedAggregateStream` [#6932](https://github.com/apache/arrow-datafusion/pull/6932) (alamb) -- Minor: Improve aggregate test coverage more [#6952](https://github.com/apache/arrow-datafusion/pull/6952) (alamb) -- Don't store hashes in GroupOrdering [#7029](https://github.com/apache/arrow-datafusion/pull/7029) (tustvold) -- Extract GroupValues (#6969) [#7016](https://github.com/apache/arrow-datafusion/pull/7016) (tustvold) -- Refactor AnalysisContext and statistics() of FilterExec [#6982](https://github.com/apache/arrow-datafusion/pull/6982) (berkaysynnada) -- Fix `datafusion-cli/Dockerfile` to build successfully [#7031](https://github.com/apache/arrow-datafusion/pull/7031) (sarutak) -- functions: support trunc() function with one or two args [#6942](https://github.com/apache/arrow-datafusion/pull/6942) (Syleechan) -- Move the column aliases below the SubqueryAlias [#7035](https://github.com/apache/arrow-datafusion/pull/7035) (jonahgao) -- fix: `array_concat` with arrays with different dimensions, add `_list*` aliases for `_array*` functions [#7008](https://github.com/apache/arrow-datafusion/pull/7008) (izveigor) -- Add support for ClickBench in bench.sh [#7005](https://github.com/apache/arrow-datafusion/pull/7005) (alamb) -- Remove RowAccumulators and datafusion-row [#6968](https://github.com/apache/arrow-datafusion/pull/6968) (alamb) -- Decimal256 coercion [#7034](https://github.com/apache/arrow-datafusion/pull/7034) (jdye64) -- Double RawTable on grow instead of triple [#7041](https://github.com/apache/arrow-datafusion/pull/7041) (tustvold) -- Specialize single column primitive group values [#7043](https://github.com/apache/arrow-datafusion/pull/7043) (tustvold) +- chore(deps): update indexmap requirement from 1.9.2 to 2.0.0 [#6766](https://github.com/apache/datafusion/pull/6766) (dependabot[bot]) +- Support IsDistinctFrom and IsNotDistinctFrom on interval types [#6776](https://github.com/apache/datafusion/pull/6776) (joroKr21) +- Protect main branch [#6775](https://github.com/apache/datafusion/pull/6775) (tustvold) +- Prepare 27.0.0 release [#6773](https://github.com/apache/datafusion/pull/6773) (andygrove) +- Support hex string literal [#6767](https://github.com/apache/datafusion/pull/6767) (ShiKaiWi) +- feat: `array_contains` [#6618](https://github.com/apache/datafusion/pull/6618) (izveigor) +- Make 'date_trunc' returns the same type as its input [#6654](https://github.com/apache/datafusion/pull/6654) (Weijun-H) +- Fix inserting into a table with non-nullable columns [#6722](https://github.com/apache/datafusion/pull/6722) (jonahgao) +- Cleanup type coercion (#3419) [#6778](https://github.com/apache/datafusion/pull/6778) (tustvold) +- Properly project grouping set expressions [#6777](https://github.com/apache/datafusion/pull/6777) (fsdvh) +- Minor: Simplify `date_trunc` code and add comments [#6783](https://github.com/apache/datafusion/pull/6783) (alamb) +- Minor: Add array / array sqllogic tests for `array_contains` [#6771](https://github.com/apache/datafusion/pull/6771) (alamb) +- Minor: Make `date_trunc` code easier to understand [#6789](https://github.com/apache/datafusion/pull/6789) (alamb) +- feat: support `NULL` in array functions [#6662](https://github.com/apache/datafusion/pull/6662) (izveigor) +- fix: incorrect nullability of `between` expr [#6786](https://github.com/apache/datafusion/pull/6786) (jonahgao) +- Use checked division kernel [#6792](https://github.com/apache/datafusion/pull/6792) (tustvold) +- Minor: add sqllogictests for binary data type [#6770](https://github.com/apache/datafusion/pull/6770) (alamb) +- refactor: encapsulate Alias as a struct [#6795](https://github.com/apache/datafusion/pull/6795) (jackwener) +- chore(deps): bump actions/labeler from 4.1.0 to 4.2.0 [#6803](https://github.com/apache/datafusion/pull/6803) (dependabot[bot]) +- Consistently coerce dictionaries for arithmetic [#6785](https://github.com/apache/datafusion/pull/6785) (tustvold) +- Implement serialization for UDWF and UDAF in plan protobuf [#6769](https://github.com/apache/datafusion/pull/6769) (parkma99) +- fix: incorrect nullability of `InList` expr [#6799](https://github.com/apache/datafusion/pull/6799) (jonahgao) +- Fix timestamp_add_interval_months to pass any date [#6815](https://github.com/apache/datafusion/pull/6815) (jayzhan211) +- Minor: Log TPCH benchmark results [#6813](https://github.com/apache/datafusion/pull/6813) (alamb) +- Refactor Decimal128 averaging code to be vectorizable (and easier to read) [#6810](https://github.com/apache/datafusion/pull/6810) (alamb) +- Minor: Encapsulate `return_type` and `signature` in `AggregateFunction` and `WindowFunction` [#6748](https://github.com/apache/datafusion/pull/6748) (alamb) +- fix: from_plan generate Agg can be with different schema. [#6820](https://github.com/apache/datafusion/pull/6820) (jackwener) +- [MINOR] Improve performance of `create_hashes` [#6816](https://github.com/apache/datafusion/pull/6816) (Dandandan) +- Add fetch to `SortPreservingMergeExec` and `SortPreservingMergeStream` [#6811](https://github.com/apache/datafusion/pull/6811) (Dandandan) +- chore(deps): update substrait requirement from 0.11.0 to 0.12.0 [#6825](https://github.com/apache/datafusion/pull/6825) (dependabot[bot]) +- Upgrade arrow 43 [#6812](https://github.com/apache/datafusion/pull/6812) (tustvold) +- Fix cargo build warning [#6831](https://github.com/apache/datafusion/pull/6831) (viirya) +- Simplify `IsUnkown` and `IsNotUnkown` expression [#6830](https://github.com/apache/datafusion/pull/6830) (jonahgao) +- fix: incorrect nullability of `Like` expressions [#6829](https://github.com/apache/datafusion/pull/6829) (jonahgao) +- Minor: Add one more assert to `hash_array_primitive` [#6834](https://github.com/apache/datafusion/pull/6834) (alamb) +- revert #6595 #6820 [#6827](https://github.com/apache/datafusion/pull/6827) (jackwener) +- Add Duration to ScalarValue [#6838](https://github.com/apache/datafusion/pull/6838) (tustvold) +- Replace AbortOnDrop / AbortDropOnMany with tokio JoinSet [#6750](https://github.com/apache/datafusion/pull/6750) (aprimadi) +- Add clickbench queries to sqllogictest coverage [#6836](https://github.com/apache/datafusion/pull/6836) (alamb) +- feat: implement posgres style `encode`/`decode` [#6821](https://github.com/apache/datafusion/pull/6821) (ozgrakkurt) +- chore(deps): update rstest requirement from 0.17.0 to 0.18.0 [#6847](https://github.com/apache/datafusion/pull/6847) (dependabot[bot]) +- [minior] support serde for some function [#6846](https://github.com/apache/datafusion/pull/6846) (liukun4515) +- Support fixed_size_list for make_array [#6759](https://github.com/apache/datafusion/pull/6759) (jayzhan211) +- Improve median performance. [#6837](https://github.com/apache/datafusion/pull/6837) (vincev) +- Mismatch in MemTable of Select Into when projecting on aggregate window functions [#6566](https://github.com/apache/datafusion/pull/6566) (berkaysynnada) +- feat: column support for `array_append`, `array_prepend`, `array_position` and `array_positions` [#6805](https://github.com/apache/datafusion/pull/6805) (izveigor) +- MINOR: Fix ordering of the aggregate_source_with_order table [#6852](https://github.com/apache/datafusion/pull/6852) (mustafasrepo) +- Return error when internal multiplication overflowing in decimal division kernel [#6833](https://github.com/apache/datafusion/pull/6833) (viirya) +- Deprecate ScalarValue::and, ScalarValue::or (#6842) [#6844](https://github.com/apache/datafusion/pull/6844) (tustvold) +- chore(deps): update bigdecimal requirement from 0.3.0 to 0.4.0 [#6848](https://github.com/apache/datafusion/pull/6848) (dependabot[bot]) +- feat: preserve metadata for `Field` and `Schema` in proto [#6865](https://github.com/apache/datafusion/pull/6865) (jonahgao) +- Set `DisplayAs` to be a supertrait of `ExecutionPlan` [#6835](https://github.com/apache/datafusion/pull/6835) (qrilka) +- [MINOR] Remove unnecessary api from MemTable [#6861](https://github.com/apache/datafusion/pull/6861) (metesynnada) +- Adjustment of HashJoinExec APIs to Preserve Probe Side Order [#6858](https://github.com/apache/datafusion/pull/6858) (metesynnada) +- [MINOR] Adding order into StreamingTableExec [#6860](https://github.com/apache/datafusion/pull/6860) (metesynnada) +- Docs: try and clarify what `PartitionEvaluator` functions are called [#6869](https://github.com/apache/datafusion/pull/6869) (alamb) +- docs: Add `encode` and `decode` to the user guide [#6856](https://github.com/apache/datafusion/pull/6856) (alamb) +- Fix build on main due to logical conflict [#6875](https://github.com/apache/datafusion/pull/6875) (alamb) +- feat: Add graphviz display format for execution plan. [#6726](https://github.com/apache/datafusion/pull/6726) (liurenjie1024) +- Fix (another) logical conflict [#6882](https://github.com/apache/datafusion/pull/6882) (alamb) +- Minor: Consolidate display related traits [#6883](https://github.com/apache/datafusion/pull/6883) (alamb) +- test: parquet use the byte array as the physical type to store decimal [#6851](https://github.com/apache/datafusion/pull/6851) (smallzhongfeng) +- Make streaming_merge public [#6874](https://github.com/apache/datafusion/pull/6874) (kazuyukitanimura) +- Performance: Use a specialized sum accumulator for retractable aggregregates [#6888](https://github.com/apache/datafusion/pull/6888) (alamb) +- Support array concatenation for arrays with different dimensions [#6872](https://github.com/apache/datafusion/pull/6872) (jayzhan211) +- feat: implement substrait join filter support [#6868](https://github.com/apache/datafusion/pull/6868) (nseekhao) +- feat: column support for `array_dims`, `array_ndims`, `cardinality` and `array_length` [#6864](https://github.com/apache/datafusion/pull/6864) (izveigor) +- Add FixedSizeBinary support to binary_op_dyn_scalar [#6891](https://github.com/apache/datafusion/pull/6891) (maxburke) +- Minor: deleted duplicated substrait integration test [#6894](https://github.com/apache/datafusion/pull/6894) (alamb) +- Minor: add test cases with columns for math expressions [#6787](https://github.com/apache/datafusion/pull/6787) (izveigor) +- Minor: reduce redundant code [#6901](https://github.com/apache/datafusion/pull/6901) (smallzhongfeng) +- Minor: Add some more doc comments to `BoundedAggregateStream` [#6881](https://github.com/apache/datafusion/pull/6881) (alamb) +- feat: support for `NestedLoopJoinExec` in datafusion-proto [#6902](https://github.com/apache/datafusion/pull/6902) (r4ntix) +- Fix `make_array` null handling, update tests [#6900](https://github.com/apache/datafusion/pull/6900) (alamb) +- chore(deps): bump actions/labeler from 4.2.0 to 4.3.0 [#6911](https://github.com/apache/datafusion/pull/6911) (dependabot[bot]) +- Minor: Add TPCH scale factor 10 to bench.sh, use 10 iteration [#6893](https://github.com/apache/datafusion/pull/6893) (alamb) +- Minor: Add output to aggregrate_fuzz.rs on failure [#6905](https://github.com/apache/datafusion/pull/6905) (alamb) +- allow window UDF to return null [#6915](https://github.com/apache/datafusion/pull/6915) (mhilton) +- Minor: Add factory method to PartitionedFile to create File Scan [#6909](https://github.com/apache/datafusion/pull/6909) (comphead) +- [minor]fix doc to remove duplicate content [#6923](https://github.com/apache/datafusion/pull/6923) (liukun4515) +- Revert "chore(deps): update bigdecimal requirement from 0.3.0 to 0.4.0 (#6848)" [#6896](https://github.com/apache/datafusion/pull/6896) (alamb) +- [Minor] Make FileScanConfig::project pub [#6931](https://github.com/apache/datafusion/pull/6931) (Dandandan) +- feat: add round trip test of physical plan in tpch unit tests [#6918](https://github.com/apache/datafusion/pull/6918) (r4ntix) +- Minor: Use thiserror to implement the `From` trait for `DFSqlLogicTestError` [#6924](https://github.com/apache/datafusion/pull/6924) (jonahgao) +- parallel csv scan [#6801](https://github.com/apache/datafusion/pull/6801) (2010YOUY01) +- Add additional test coverage for aggregaes using dates/times/timestamps/decimals [#6939](https://github.com/apache/datafusion/pull/6939) (alamb) +- Replace repartition execs with sort preserving repartition execs [#6921](https://github.com/apache/datafusion/pull/6921) (mertak) +- Vectorized hash grouping [#6904](https://github.com/apache/datafusion/pull/6904) (alamb) +- Fix incorrect results in `BitAnd` GroupsAccumulator [#6957](https://github.com/apache/datafusion/pull/6957) (alamb) +- Fixes for clippy 1.71 [#6959](https://github.com/apache/datafusion/pull/6959) (alamb) +- Improve unnest_column performance [#6903](https://github.com/apache/datafusion/pull/6903) (vincev) +- Pass `schema_infer_max_records` to JsonFormat. [#6945](https://github.com/apache/datafusion/pull/6945) (vincev) +- deps: bump sqllogictest to 0.15.0 [#6941](https://github.com/apache/datafusion/pull/6941) (jonahgao) +- Preserve field metadata across expressions in logical plans [#6920](https://github.com/apache/datafusion/pull/6920) (dexterduck) +- Support equality and comparison between interval arrays and scalars [#6948](https://github.com/apache/datafusion/pull/6948) (joroKr21) +- chore(deps): update bigdecimal requirement from 0.3.0 to 0.4.1 [#6946](https://github.com/apache/datafusion/pull/6946) (dependabot[bot]) +- feat: implement substrait for LIKE/ILIKE expr [#6840](https://github.com/apache/datafusion/pull/6840) (waynexia) +- Minor: Add comments about initial value for `BitAnd` accumulator [#6964](https://github.com/apache/datafusion/pull/6964) (alamb) +- [Functions] Support Arithmetic function COT() [#6925](https://github.com/apache/datafusion/pull/6925) (Syleechan) +- Minor: remove duplication in Min/Max accumulator [#6960](https://github.com/apache/datafusion/pull/6960) (alamb) +- [MINOR]Add new tests [#6953](https://github.com/apache/datafusion/pull/6953) (mustafasrepo) +- Column support for array concat [#6879](https://github.com/apache/datafusion/pull/6879) (jayzhan211) +- Minor: Add FixedSizeBinaryTest [#6895](https://github.com/apache/datafusion/pull/6895) (alamb) +- [MINOR] Remove update state api from PartitionEvaluator [#6966](https://github.com/apache/datafusion/pull/6966) (mustafasrepo) +- Fix required partitioning of Single aggregation mode [#6950](https://github.com/apache/datafusion/pull/6950) (Dandandan) +- [MINOR] Remove global sort rule from planner [#6965](https://github.com/apache/datafusion/pull/6965) (mustafasrepo) +- Column support for array_to_string [#6940](https://github.com/apache/datafusion/pull/6940) (jayzhan211) +- chore: fix format [#6991](https://github.com/apache/datafusion/pull/6991) (Weijun-H) +- Extend Ordering Equivalence Support [#6956](https://github.com/apache/datafusion/pull/6956) (mustafasrepo) +- chore: break earlier in macro `contains!` [#6989](https://github.com/apache/datafusion/pull/6989) (Weijun-H) +- fix: incorrect simplification of case expr [#7006](https://github.com/apache/datafusion/pull/7006) (jonahgao) +- Minor: Add String/Binary aggregate tests [#6962](https://github.com/apache/datafusion/pull/6962) (alamb) +- [MINOR] Supporting repartition joins conf in SHJ [#6998](https://github.com/apache/datafusion/pull/6998) (metesynnada) +- [MINOR] Code refactor on hash join utils [#6999](https://github.com/apache/datafusion/pull/6999) (metesynnada) +- feat: array functions treat an array as an element [#6986](https://github.com/apache/datafusion/pull/6986) (izveigor) +- [MINOR] Moving some test utils from EnsureSorting to test_utils [#7009](https://github.com/apache/datafusion/pull/7009) (metesynnada) +- MINOR: Bug fix, Use correct ordering equivalence when window expr contains partition by [#7011](https://github.com/apache/datafusion/pull/7011) (mustafasrepo) +- refactor: Merge Expr::Like and Expr::ILike [#7007](https://github.com/apache/datafusion/pull/7007) (waynexia) +- Docs: Add docs to `RepartitionExec` and architecture guide [#7003](https://github.com/apache/datafusion/pull/7003) (alamb) +- Consolidate `BoundedAggregateStream` [#6932](https://github.com/apache/datafusion/pull/6932) (alamb) +- Minor: Improve aggregate test coverage more [#6952](https://github.com/apache/datafusion/pull/6952) (alamb) +- Don't store hashes in GroupOrdering [#7029](https://github.com/apache/datafusion/pull/7029) (tustvold) +- Extract GroupValues (#6969) [#7016](https://github.com/apache/datafusion/pull/7016) (tustvold) +- Refactor AnalysisContext and statistics() of FilterExec [#6982](https://github.com/apache/datafusion/pull/6982) (berkaysynnada) +- Fix `datafusion-cli/Dockerfile` to build successfully [#7031](https://github.com/apache/datafusion/pull/7031) (sarutak) +- functions: support trunc() function with one or two args [#6942](https://github.com/apache/datafusion/pull/6942) (Syleechan) +- Move the column aliases below the SubqueryAlias [#7035](https://github.com/apache/datafusion/pull/7035) (jonahgao) +- fix: `array_concat` with arrays with different dimensions, add `_list*` aliases for `_array*` functions [#7008](https://github.com/apache/datafusion/pull/7008) (izveigor) +- Add support for ClickBench in bench.sh [#7005](https://github.com/apache/datafusion/pull/7005) (alamb) +- Remove RowAccumulators and datafusion-row [#6968](https://github.com/apache/datafusion/pull/6968) (alamb) +- Decimal256 coercion [#7034](https://github.com/apache/datafusion/pull/7034) (jdye64) +- Double RawTable on grow instead of triple [#7041](https://github.com/apache/datafusion/pull/7041) (tustvold) +- Specialize single column primitive group values [#7043](https://github.com/apache/datafusion/pull/7043) (tustvold) diff --git a/dev/changelog/29.0.0.md b/dev/changelog/29.0.0.md index 6d946eb61cba..cf83a283f1d1 100644 --- a/dev/changelog/29.0.0.md +++ b/dev/changelog/29.0.0.md @@ -17,146 +17,146 @@ under the License. --> -## [29.0.0](https://github.com/apache/arrow-datafusion/tree/29.0.0) (2023-08-11) +## [29.0.0](https://github.com/apache/datafusion/tree/29.0.0) (2023-08-11) -[Full Changelog](https://github.com/apache/arrow-datafusion/compare/28.0.0...29.0.0) +[Full Changelog](https://github.com/apache/datafusion/compare/28.0.0...29.0.0) **Breaking changes:** -- change the input_type parameter of the create_udaf function from DataType to Vec [#7096](https://github.com/apache/arrow-datafusion/pull/7096) (jiangzhx) -- Implement `array_slice` and `array_element`, remove `array_trim` [#6936](https://github.com/apache/arrow-datafusion/pull/6936) (izveigor) -- improve the ergonomics of creating field and list array accesses [#7215](https://github.com/apache/arrow-datafusion/pull/7215) (izveigor) -- Update Arrow 45.0.0 And Datum Arithmetic, change Decimal Division semantics [#6832](https://github.com/apache/arrow-datafusion/pull/6832) (tustvold) +- change the input_type parameter of the create_udaf function from DataType to Vec [#7096](https://github.com/apache/datafusion/pull/7096) (jiangzhx) +- Implement `array_slice` and `array_element`, remove `array_trim` [#6936](https://github.com/apache/datafusion/pull/6936) (izveigor) +- improve the ergonomics of creating field and list array accesses [#7215](https://github.com/apache/datafusion/pull/7215) (izveigor) +- Update Arrow 45.0.0 And Datum Arithmetic, change Decimal Division semantics [#6832](https://github.com/apache/datafusion/pull/6832) (tustvold) **Implemented enhancements:** -- feat: support SQL array replacement and removement functions [#7057](https://github.com/apache/arrow-datafusion/pull/7057) (izveigor) -- feat: array containment operator `@>` and `<@` [#6885](https://github.com/apache/arrow-datafusion/pull/6885) (izveigor) -- feat: add sqllogictests crate [#7134](https://github.com/apache/arrow-datafusion/pull/7134) (tshauck) -- feat: allow `datafusion-cli` to accept multiple statements [#7138](https://github.com/apache/arrow-datafusion/pull/7138) (NiwakaDev) -- feat: Add linear regression aggregate functions [#7211](https://github.com/apache/arrow-datafusion/pull/7211) (2010YOUY01) +- feat: support SQL array replacement and removement functions [#7057](https://github.com/apache/datafusion/pull/7057) (izveigor) +- feat: array containment operator `@>` and `<@` [#6885](https://github.com/apache/datafusion/pull/6885) (izveigor) +- feat: add sqllogictests crate [#7134](https://github.com/apache/datafusion/pull/7134) (tshauck) +- feat: allow `datafusion-cli` to accept multiple statements [#7138](https://github.com/apache/datafusion/pull/7138) (NiwakaDev) +- feat: Add linear regression aggregate functions [#7211](https://github.com/apache/datafusion/pull/7211) (2010YOUY01) **Fixed bugs:** -- fix: disallow interval - timestamp [#7086](https://github.com/apache/arrow-datafusion/pull/7086) (jackwener) -- fix: Projection columns_map remove name search [#7099](https://github.com/apache/arrow-datafusion/pull/7099) (mustafasrepo) -- fix: fix index bug and add test to check it [#7124](https://github.com/apache/arrow-datafusion/pull/7124) (mustafasrepo) -- fix: Fix panic in filter predicate [#7126](https://github.com/apache/arrow-datafusion/pull/7126) (alamb) -- fix: correct count(\*) alias [#7081](https://github.com/apache/arrow-datafusion/pull/7081) (jackwener) -- fix: skip compression tests on --no-default-features [#7172](https://github.com/apache/arrow-datafusion/pull/7172) (not-my-profile) -- fix: typo in substrait [#7224](https://github.com/apache/arrow-datafusion/pull/7224) (waynexia) +- fix: disallow interval - timestamp [#7086](https://github.com/apache/datafusion/pull/7086) (jackwener) +- fix: Projection columns_map remove name search [#7099](https://github.com/apache/datafusion/pull/7099) (mustafasrepo) +- fix: fix index bug and add test to check it [#7124](https://github.com/apache/datafusion/pull/7124) (mustafasrepo) +- fix: Fix panic in filter predicate [#7126](https://github.com/apache/datafusion/pull/7126) (alamb) +- fix: correct count(\*) alias [#7081](https://github.com/apache/datafusion/pull/7081) (jackwener) +- fix: skip compression tests on --no-default-features [#7172](https://github.com/apache/datafusion/pull/7172) (not-my-profile) +- fix: typo in substrait [#7224](https://github.com/apache/datafusion/pull/7224) (waynexia) **Documentation updates:** -- Add additional links to main README [#7102](https://github.com/apache/arrow-datafusion/pull/7102) (alamb) -- docs: fix broken link [#7177](https://github.com/apache/arrow-datafusion/pull/7177) (SteveLauC) +- Add additional links to main README [#7102](https://github.com/apache/datafusion/pull/7102) (alamb) +- docs: fix broken link [#7177](https://github.com/apache/datafusion/pull/7177) (SteveLauC) **Merged pull requests:** -- [Minor] Speedup to_array_of_size for Decimal128 [#7055](https://github.com/apache/arrow-datafusion/pull/7055) (Dandandan) -- Replace `array_contains` with SQL array functions: `array_has`, `array_has_any`, `array_has_all` [#6990](https://github.com/apache/arrow-datafusion/pull/6990) (jayzhan211) -- Add more Decimal256 type coercion [#7047](https://github.com/apache/arrow-datafusion/pull/7047) (viirya) -- Create `dfbench`, split up `tpch` benchmark runner into modules [#7054](https://github.com/apache/arrow-datafusion/pull/7054) (alamb) -- chore(deps): update sqlparser requirement from 0.35 to 0.36.1 [#7051](https://github.com/apache/arrow-datafusion/pull/7051) (alamb) -- use ObjectStore for dataframe writes [#6987](https://github.com/apache/arrow-datafusion/pull/6987) (devinjdangelo) -- Prepare 28.0.0 Release [#7056](https://github.com/apache/arrow-datafusion/pull/7056) (andygrove) -- refactor: with_inputs() can use original schema to avoid recompute schema. [#7069](https://github.com/apache/arrow-datafusion/pull/7069) (jackwener) -- Fix cli tests [#7083](https://github.com/apache/arrow-datafusion/pull/7083) (mustafasrepo) -- Ignore blank lines and comments at the end of query files for datafusion-cli [#7076](https://github.com/apache/arrow-datafusion/pull/7076) (sarutak) -- Support case sensitive column for `with_column_renamed` [#7063](https://github.com/apache/arrow-datafusion/pull/7063) (comphead) -- Add Decimal256 to `ScalarValue` [#7048](https://github.com/apache/arrow-datafusion/pull/7048) (viirya) -- Enrich CSV reader config: quote & escape [#6927](https://github.com/apache/arrow-datafusion/pull/6927) (parkma99) -- [Refactor] PipelineFixer physical optimizer rule removal [#7059](https://github.com/apache/arrow-datafusion/pull/7059) (metesynnada) -- fix: disallow interval - timestamp [#7086](https://github.com/apache/arrow-datafusion/pull/7086) (jackwener) -- Add Utf8->Binary type coercion for comparison [#7080](https://github.com/apache/arrow-datafusion/pull/7080) (jonahgao) -- Refactor Replace Repartition rule [#7090](https://github.com/apache/arrow-datafusion/pull/7090) (mustafasrepo) -- change the input_type parameter of the create_udaf function from DataType to Vec [#7096](https://github.com/apache/arrow-datafusion/pull/7096) (jiangzhx) -- fix: Projection columns_map remove name search [#7099](https://github.com/apache/arrow-datafusion/pull/7099) (mustafasrepo) -- Minor: Refine doc comments for BuiltinScalarFunction::return_dimension [#7045](https://github.com/apache/arrow-datafusion/pull/7045) (alamb) -- Relax check during aggregate partial mode. [#7101](https://github.com/apache/arrow-datafusion/pull/7101) (mustafasrepo) -- refactor byte_to_string and string_to_byte [#7091](https://github.com/apache/arrow-datafusion/pull/7091) (parkma99) -- Minor: add test + docs for 2 argument trunc with columns [#7042](https://github.com/apache/arrow-datafusion/pull/7042) (alamb) -- Move inactive projects to a different section [#7104](https://github.com/apache/arrow-datafusion/pull/7104) (alamb) -- Port remaining information_schema rust tests to sqllogictests [#7050](https://github.com/apache/arrow-datafusion/pull/7050) (palash25) -- Change `rust-version` in Cargo.toml to comply with MSRV [#7107](https://github.com/apache/arrow-datafusion/pull/7107) (sarutak) -- create all needed folders in advance for benchmarks [#7105](https://github.com/apache/arrow-datafusion/pull/7105) (smiklos) -- Initial support for functional dependencies handling primary key and unique constraints [#7040](https://github.com/apache/arrow-datafusion/pull/7040) (mustafasrepo) -- Add ClickBench queries to DataFusion benchmark runner [#7060](https://github.com/apache/arrow-datafusion/pull/7060) (alamb) -- feat: support SQL array replacement and removement functions [#7057](https://github.com/apache/arrow-datafusion/pull/7057) (izveigor) -- [doc], [minor]. Update docstring of group by rewrite. [#7111](https://github.com/apache/arrow-datafusion/pull/7111) (mustafasrepo) -- Add additional links to main README [#7102](https://github.com/apache/arrow-datafusion/pull/7102) (alamb) -- fix: fix index bug and add test to check it [#7124](https://github.com/apache/arrow-datafusion/pull/7124) (mustafasrepo) -- fix: Fix panic in filter predicate [#7126](https://github.com/apache/arrow-datafusion/pull/7126) (alamb) -- Add MSRV check as a GA job [#7123](https://github.com/apache/arrow-datafusion/pull/7123) (sarutak) -- Minor: move `AnalysisContext` out of physical_expr and into its own module [#7127](https://github.com/apache/arrow-datafusion/pull/7127) (alamb) -- fix: correct count(\*) alias [#7081](https://github.com/apache/arrow-datafusion/pull/7081) (jackwener) -- `make_array` with column of list [#7137](https://github.com/apache/arrow-datafusion/pull/7137) (jayzhan211) -- feat: array containment operator `@>` and `<@` [#6885](https://github.com/apache/arrow-datafusion/pull/6885) (izveigor) -- [MINOR]: Make memory exec partition number =1, in test utils [#7148](https://github.com/apache/arrow-datafusion/pull/7148) (mustafasrepo) -- Substrait union/union all [#7117](https://github.com/apache/arrow-datafusion/pull/7117) (nseekhao) -- minor: Remove mac m1 compilation for size_of_scalar test [#7151](https://github.com/apache/arrow-datafusion/pull/7151) (mustafasrepo) -- chore: add config option for allowing bounded use of sort-preserving operators [#7164](https://github.com/apache/arrow-datafusion/pull/7164) (wolffcm) -- chore: edition use workspace [#7140](https://github.com/apache/arrow-datafusion/pull/7140) (jackwener) -- [bug]: Fix multi partition wrong column requirement bug [#7129](https://github.com/apache/arrow-datafusion/pull/7129) (mustafasrepo) -- Refactor memory_limit tests to make them easier to extend [#7131](https://github.com/apache/arrow-datafusion/pull/7131) (alamb) -- Minor: show output ordering in MemoryExec [#7169](https://github.com/apache/arrow-datafusion/pull/7169) (alamb) -- Move ordering equivalence, and output ordering for joins to util functions [#7167](https://github.com/apache/arrow-datafusion/pull/7167) (mustafasrepo) -- Add regr_slope() aggregate function [#7135](https://github.com/apache/arrow-datafusion/pull/7135) (2010YOUY01) -- Add expression for array_agg [#7159](https://github.com/apache/arrow-datafusion/pull/7159) (willrnch) -- fix: skip compression tests on --no-default-features [#7172](https://github.com/apache/arrow-datafusion/pull/7172) (not-my-profile) -- HashJoin order fixing [#7155](https://github.com/apache/arrow-datafusion/pull/7155) (metesynnada) -- tweak: demote heading levels in PR template [#7176](https://github.com/apache/arrow-datafusion/pull/7176) (not-my-profile) -- feat: add sqllogictests crate [#7134](https://github.com/apache/arrow-datafusion/pull/7134) (tshauck) -- docs: fix broken link [#7177](https://github.com/apache/arrow-datafusion/pull/7177) (SteveLauC) -- Add nanvl builtin function [#7171](https://github.com/apache/arrow-datafusion/pull/7171) (sarutak) -- chore(deps): update apache-avro requirement from 0.14 to 0.15 [#7174](https://github.com/apache/arrow-datafusion/pull/7174) (jackwener) -- make dataframe.task_ctx public [#7183](https://github.com/apache/arrow-datafusion/pull/7183) (milenkovicm) -- feat: allow `datafusion-cli` to accept multiple statements [#7138](https://github.com/apache/arrow-datafusion/pull/7138) (NiwakaDev) -- Add `plan_err!` error macro [#7115](https://github.com/apache/arrow-datafusion/pull/7115) (comphead) -- refactor: add ExecutionPlan::file_scan_config to avoid downcasting [#7175](https://github.com/apache/arrow-datafusion/pull/7175) (not-my-profile) -- Minor: Add documentation + diagrams for ExternalSorter [#7179](https://github.com/apache/arrow-datafusion/pull/7179) (alamb) -- Support simplifying expressions such as `~ ^(ba_r|foo)$` , where the string includes underline [#7186](https://github.com/apache/arrow-datafusion/pull/7186) (tanruixiang) -- Add MemoryReservation::{split_off, take, new_empty} [#7184](https://github.com/apache/arrow-datafusion/pull/7184) (alamb) -- Update bench.sh to only run 5 iterations [#7189](https://github.com/apache/arrow-datafusion/pull/7189) (alamb) -- Implement `array_slice` and `array_element`, remove `array_trim` [#6936](https://github.com/apache/arrow-datafusion/pull/6936) (izveigor) -- Unify DataFrame and SQL (Insert Into) Write Methods [#7141](https://github.com/apache/arrow-datafusion/pull/7141) (devinjdangelo) -- Minor: Further Increase stack_size to prevent roundtrip_deeply_nested test stack overflow [#7208](https://github.com/apache/arrow-datafusion/pull/7208) (devinjdangelo) -- Don't track files generated by regen.sh [#7204](https://github.com/apache/arrow-datafusion/pull/7204) (sarutak) -- Update some docs/scripts to reflect the removed/added packages. [#7202](https://github.com/apache/arrow-datafusion/pull/7202) (sarutak) -- Implement `array_repeat`, remove `array_fill` [#7199](https://github.com/apache/arrow-datafusion/pull/7199) (izveigor) -- Use tokio only if running from a multi-thread tokio context [#7205](https://github.com/apache/arrow-datafusion/pull/7205) (viirya) -- Remove Outdated NY Taxi benchmark [#7210](https://github.com/apache/arrow-datafusion/pull/7210) (alamb) -- improve the ergonomics of creating field and list array accesses [#7215](https://github.com/apache/arrow-datafusion/pull/7215) (izveigor) -- [MINOR] Document refactor on NestedLoopJoin [#7217](https://github.com/apache/arrow-datafusion/pull/7217) (metesynnada) -- Docs: Add GlareDB to list of DataFusion users [#7223](https://github.com/apache/arrow-datafusion/pull/7223) (alamb) -- fix: typo in substrait [#7224](https://github.com/apache/arrow-datafusion/pull/7224) (waynexia) -- Minor: Add constructors to GetFieldAccessExpr and add docs [#7219](https://github.com/apache/arrow-datafusion/pull/7219) (alamb) -- chore: required at least 1 approve before merge [#7226](https://github.com/apache/arrow-datafusion/pull/7226) (jackwener) -- feat: Add linear regression aggregate functions [#7211](https://github.com/apache/arrow-datafusion/pull/7211) (2010YOUY01) -- Add `Expr::field`, `Expr::index`, and `Expr::slice`, add docs [#7218](https://github.com/apache/arrow-datafusion/pull/7218) (alamb) -- Extend insert into support to include Json backed tables [#7212](https://github.com/apache/arrow-datafusion/pull/7212) (devinjdangelo) -- Minor: rename `GetFieldAccessCharacteristic` and add docs [#7220](https://github.com/apache/arrow-datafusion/pull/7220) (alamb) -- Minor: Remove unecessary `clone_with_replacement` [#7232](https://github.com/apache/arrow-datafusion/pull/7232) (alamb) -- Update Arrow 45.0.0 And Datum Arithmetic, change Decimal Division semantics [#6832](https://github.com/apache/arrow-datafusion/pull/6832) (tustvold) -- Support `make_array` null handling in nested version [#7207](https://github.com/apache/arrow-datafusion/pull/7207) (jayzhan211) -- [Minor], Bug Fix: Add empty ordering check at the source. [#7230](https://github.com/apache/arrow-datafusion/pull/7230) (mustafasrepo) -- Minor: with preserve order now receives argument [#7231](https://github.com/apache/arrow-datafusion/pull/7231) (mustafasrepo) -- Minor: Remove [[example]] table from datafusion-examples/Cargo.toml [#7235](https://github.com/apache/arrow-datafusion/pull/7235) (sarutak) -- Remove additional cast from TPCH q8 [#7233](https://github.com/apache/arrow-datafusion/pull/7233) (viirya) -- Minor: Move `project_schema` to `datafusion_common` [#7237](https://github.com/apache/arrow-datafusion/pull/7237) (alamb) -- Minor: Extract ExecutionPlanVisitor to its own module [#7236](https://github.com/apache/arrow-datafusion/pull/7236) (alamb) -- Minor: Move streams out of `physical_plan` module [#7234](https://github.com/apache/arrow-datafusion/pull/7234) (alamb) -- doc: Add link to contributor's guide for new functions within the src [#7240](https://github.com/apache/arrow-datafusion/pull/7240) (2010YOUY01) -- Account for memory usage in SortPreservingMerge (#5885) [#7130](https://github.com/apache/arrow-datafusion/pull/7130) (alamb) -- Deprecate `batch_byte_size` [#7245](https://github.com/apache/arrow-datafusion/pull/7245) (alamb) -- Minor: Move `Partitioning` and`Distribution` to physical_expr [#7238](https://github.com/apache/arrow-datafusion/pull/7238) (alamb) -- Minor: remove duplication in `create_writer` [#7229](https://github.com/apache/arrow-datafusion/pull/7229) (alamb) -- Support array `flatten` sql function [#7239](https://github.com/apache/arrow-datafusion/pull/7239) (jayzhan211) -- Minor: fix clippy for memory_limit test [#7248](https://github.com/apache/arrow-datafusion/pull/7248) (yjshen) -- Update `physical_plan` tests to not use SessionContext [#7243](https://github.com/apache/arrow-datafusion/pull/7243) (alamb) -- Add API to make `unnest` consistent with DuckDB/ClickHouse, add option for preserve_nulls, update docs [#7168](https://github.com/apache/arrow-datafusion/pull/7168) (alamb) -- chore(sqllogictests-doc): add testing set up [#7258](https://github.com/apache/arrow-datafusion/pull/7258) (appletreeisyellow) -- Avoid to use TempDir::into_path for temporary dirs expected to be deleted automatically [#7252](https://github.com/apache/arrow-datafusion/pull/7252) (sarutak) -- [MINOR]: update benefits_from_input_partitioning implementation for projection and repartition [#7246](https://github.com/apache/arrow-datafusion/pull/7246) (mustafasrepo) -- Adding order equivalence support on MemoryExec [#7259](https://github.com/apache/arrow-datafusion/pull/7259) (metesynnada) -- chore(functions): fix function names typo [#7269](https://github.com/apache/arrow-datafusion/pull/7269) (appletreeisyellow) +- [Minor] Speedup to_array_of_size for Decimal128 [#7055](https://github.com/apache/datafusion/pull/7055) (Dandandan) +- Replace `array_contains` with SQL array functions: `array_has`, `array_has_any`, `array_has_all` [#6990](https://github.com/apache/datafusion/pull/6990) (jayzhan211) +- Add more Decimal256 type coercion [#7047](https://github.com/apache/datafusion/pull/7047) (viirya) +- Create `dfbench`, split up `tpch` benchmark runner into modules [#7054](https://github.com/apache/datafusion/pull/7054) (alamb) +- chore(deps): update sqlparser requirement from 0.35 to 0.36.1 [#7051](https://github.com/apache/datafusion/pull/7051) (alamb) +- use ObjectStore for dataframe writes [#6987](https://github.com/apache/datafusion/pull/6987) (devinjdangelo) +- Prepare 28.0.0 Release [#7056](https://github.com/apache/datafusion/pull/7056) (andygrove) +- refactor: with_inputs() can use original schema to avoid recompute schema. [#7069](https://github.com/apache/datafusion/pull/7069) (jackwener) +- Fix cli tests [#7083](https://github.com/apache/datafusion/pull/7083) (mustafasrepo) +- Ignore blank lines and comments at the end of query files for datafusion-cli [#7076](https://github.com/apache/datafusion/pull/7076) (sarutak) +- Support case sensitive column for `with_column_renamed` [#7063](https://github.com/apache/datafusion/pull/7063) (comphead) +- Add Decimal256 to `ScalarValue` [#7048](https://github.com/apache/datafusion/pull/7048) (viirya) +- Enrich CSV reader config: quote & escape [#6927](https://github.com/apache/datafusion/pull/6927) (parkma99) +- [Refactor] PipelineFixer physical optimizer rule removal [#7059](https://github.com/apache/datafusion/pull/7059) (metesynnada) +- fix: disallow interval - timestamp [#7086](https://github.com/apache/datafusion/pull/7086) (jackwener) +- Add Utf8->Binary type coercion for comparison [#7080](https://github.com/apache/datafusion/pull/7080) (jonahgao) +- Refactor Replace Repartition rule [#7090](https://github.com/apache/datafusion/pull/7090) (mustafasrepo) +- change the input_type parameter of the create_udaf function from DataType to Vec [#7096](https://github.com/apache/datafusion/pull/7096) (jiangzhx) +- fix: Projection columns_map remove name search [#7099](https://github.com/apache/datafusion/pull/7099) (mustafasrepo) +- Minor: Refine doc comments for BuiltinScalarFunction::return_dimension [#7045](https://github.com/apache/datafusion/pull/7045) (alamb) +- Relax check during aggregate partial mode. [#7101](https://github.com/apache/datafusion/pull/7101) (mustafasrepo) +- refactor byte_to_string and string_to_byte [#7091](https://github.com/apache/datafusion/pull/7091) (parkma99) +- Minor: add test + docs for 2 argument trunc with columns [#7042](https://github.com/apache/datafusion/pull/7042) (alamb) +- Move inactive projects to a different section [#7104](https://github.com/apache/datafusion/pull/7104) (alamb) +- Port remaining information_schema rust tests to sqllogictests [#7050](https://github.com/apache/datafusion/pull/7050) (palash25) +- Change `rust-version` in Cargo.toml to comply with MSRV [#7107](https://github.com/apache/datafusion/pull/7107) (sarutak) +- create all needed folders in advance for benchmarks [#7105](https://github.com/apache/datafusion/pull/7105) (smiklos) +- Initial support for functional dependencies handling primary key and unique constraints [#7040](https://github.com/apache/datafusion/pull/7040) (mustafasrepo) +- Add ClickBench queries to DataFusion benchmark runner [#7060](https://github.com/apache/datafusion/pull/7060) (alamb) +- feat: support SQL array replacement and removement functions [#7057](https://github.com/apache/datafusion/pull/7057) (izveigor) +- [doc], [minor]. Update docstring of group by rewrite. [#7111](https://github.com/apache/datafusion/pull/7111) (mustafasrepo) +- Add additional links to main README [#7102](https://github.com/apache/datafusion/pull/7102) (alamb) +- fix: fix index bug and add test to check it [#7124](https://github.com/apache/datafusion/pull/7124) (mustafasrepo) +- fix: Fix panic in filter predicate [#7126](https://github.com/apache/datafusion/pull/7126) (alamb) +- Add MSRV check as a GA job [#7123](https://github.com/apache/datafusion/pull/7123) (sarutak) +- Minor: move `AnalysisContext` out of physical_expr and into its own module [#7127](https://github.com/apache/datafusion/pull/7127) (alamb) +- fix: correct count(\*) alias [#7081](https://github.com/apache/datafusion/pull/7081) (jackwener) +- `make_array` with column of list [#7137](https://github.com/apache/datafusion/pull/7137) (jayzhan211) +- feat: array containment operator `@>` and `<@` [#6885](https://github.com/apache/datafusion/pull/6885) (izveigor) +- [MINOR]: Make memory exec partition number =1, in test utils [#7148](https://github.com/apache/datafusion/pull/7148) (mustafasrepo) +- Substrait union/union all [#7117](https://github.com/apache/datafusion/pull/7117) (nseekhao) +- minor: Remove mac m1 compilation for size_of_scalar test [#7151](https://github.com/apache/datafusion/pull/7151) (mustafasrepo) +- chore: add config option for allowing bounded use of sort-preserving operators [#7164](https://github.com/apache/datafusion/pull/7164) (wolffcm) +- chore: edition use workspace [#7140](https://github.com/apache/datafusion/pull/7140) (jackwener) +- [bug]: Fix multi partition wrong column requirement bug [#7129](https://github.com/apache/datafusion/pull/7129) (mustafasrepo) +- Refactor memory_limit tests to make them easier to extend [#7131](https://github.com/apache/datafusion/pull/7131) (alamb) +- Minor: show output ordering in MemoryExec [#7169](https://github.com/apache/datafusion/pull/7169) (alamb) +- Move ordering equivalence, and output ordering for joins to util functions [#7167](https://github.com/apache/datafusion/pull/7167) (mustafasrepo) +- Add regr_slope() aggregate function [#7135](https://github.com/apache/datafusion/pull/7135) (2010YOUY01) +- Add expression for array_agg [#7159](https://github.com/apache/datafusion/pull/7159) (willrnch) +- fix: skip compression tests on --no-default-features [#7172](https://github.com/apache/datafusion/pull/7172) (not-my-profile) +- HashJoin order fixing [#7155](https://github.com/apache/datafusion/pull/7155) (metesynnada) +- tweak: demote heading levels in PR template [#7176](https://github.com/apache/datafusion/pull/7176) (not-my-profile) +- feat: add sqllogictests crate [#7134](https://github.com/apache/datafusion/pull/7134) (tshauck) +- docs: fix broken link [#7177](https://github.com/apache/datafusion/pull/7177) (SteveLauC) +- Add nanvl builtin function [#7171](https://github.com/apache/datafusion/pull/7171) (sarutak) +- chore(deps): update apache-avro requirement from 0.14 to 0.15 [#7174](https://github.com/apache/datafusion/pull/7174) (jackwener) +- make dataframe.task_ctx public [#7183](https://github.com/apache/datafusion/pull/7183) (milenkovicm) +- feat: allow `datafusion-cli` to accept multiple statements [#7138](https://github.com/apache/datafusion/pull/7138) (NiwakaDev) +- Add `plan_err!` error macro [#7115](https://github.com/apache/datafusion/pull/7115) (comphead) +- refactor: add ExecutionPlan::file_scan_config to avoid downcasting [#7175](https://github.com/apache/datafusion/pull/7175) (not-my-profile) +- Minor: Add documentation + diagrams for ExternalSorter [#7179](https://github.com/apache/datafusion/pull/7179) (alamb) +- Support simplifying expressions such as `~ ^(ba_r|foo)$` , where the string includes underline [#7186](https://github.com/apache/datafusion/pull/7186) (tanruixiang) +- Add MemoryReservation::{split_off, take, new_empty} [#7184](https://github.com/apache/datafusion/pull/7184) (alamb) +- Update bench.sh to only run 5 iterations [#7189](https://github.com/apache/datafusion/pull/7189) (alamb) +- Implement `array_slice` and `array_element`, remove `array_trim` [#6936](https://github.com/apache/datafusion/pull/6936) (izveigor) +- Unify DataFrame and SQL (Insert Into) Write Methods [#7141](https://github.com/apache/datafusion/pull/7141) (devinjdangelo) +- Minor: Further Increase stack_size to prevent roundtrip_deeply_nested test stack overflow [#7208](https://github.com/apache/datafusion/pull/7208) (devinjdangelo) +- Don't track files generated by regen.sh [#7204](https://github.com/apache/datafusion/pull/7204) (sarutak) +- Update some docs/scripts to reflect the removed/added packages. [#7202](https://github.com/apache/datafusion/pull/7202) (sarutak) +- Implement `array_repeat`, remove `array_fill` [#7199](https://github.com/apache/datafusion/pull/7199) (izveigor) +- Use tokio only if running from a multi-thread tokio context [#7205](https://github.com/apache/datafusion/pull/7205) (viirya) +- Remove Outdated NY Taxi benchmark [#7210](https://github.com/apache/datafusion/pull/7210) (alamb) +- improve the ergonomics of creating field and list array accesses [#7215](https://github.com/apache/datafusion/pull/7215) (izveigor) +- [MINOR] Document refactor on NestedLoopJoin [#7217](https://github.com/apache/datafusion/pull/7217) (metesynnada) +- Docs: Add GlareDB to list of DataFusion users [#7223](https://github.com/apache/datafusion/pull/7223) (alamb) +- fix: typo in substrait [#7224](https://github.com/apache/datafusion/pull/7224) (waynexia) +- Minor: Add constructors to GetFieldAccessExpr and add docs [#7219](https://github.com/apache/datafusion/pull/7219) (alamb) +- chore: required at least 1 approve before merge [#7226](https://github.com/apache/datafusion/pull/7226) (jackwener) +- feat: Add linear regression aggregate functions [#7211](https://github.com/apache/datafusion/pull/7211) (2010YOUY01) +- Add `Expr::field`, `Expr::index`, and `Expr::slice`, add docs [#7218](https://github.com/apache/datafusion/pull/7218) (alamb) +- Extend insert into support to include Json backed tables [#7212](https://github.com/apache/datafusion/pull/7212) (devinjdangelo) +- Minor: rename `GetFieldAccessCharacteristic` and add docs [#7220](https://github.com/apache/datafusion/pull/7220) (alamb) +- Minor: Remove unecessary `clone_with_replacement` [#7232](https://github.com/apache/datafusion/pull/7232) (alamb) +- Update Arrow 45.0.0 And Datum Arithmetic, change Decimal Division semantics [#6832](https://github.com/apache/datafusion/pull/6832) (tustvold) +- Support `make_array` null handling in nested version [#7207](https://github.com/apache/datafusion/pull/7207) (jayzhan211) +- [Minor], Bug Fix: Add empty ordering check at the source. [#7230](https://github.com/apache/datafusion/pull/7230) (mustafasrepo) +- Minor: with preserve order now receives argument [#7231](https://github.com/apache/datafusion/pull/7231) (mustafasrepo) +- Minor: Remove [[example]] table from datafusion-examples/Cargo.toml [#7235](https://github.com/apache/datafusion/pull/7235) (sarutak) +- Remove additional cast from TPCH q8 [#7233](https://github.com/apache/datafusion/pull/7233) (viirya) +- Minor: Move `project_schema` to `datafusion_common` [#7237](https://github.com/apache/datafusion/pull/7237) (alamb) +- Minor: Extract ExecutionPlanVisitor to its own module [#7236](https://github.com/apache/datafusion/pull/7236) (alamb) +- Minor: Move streams out of `physical_plan` module [#7234](https://github.com/apache/datafusion/pull/7234) (alamb) +- doc: Add link to contributor's guide for new functions within the src [#7240](https://github.com/apache/datafusion/pull/7240) (2010YOUY01) +- Account for memory usage in SortPreservingMerge (#5885) [#7130](https://github.com/apache/datafusion/pull/7130) (alamb) +- Deprecate `batch_byte_size` [#7245](https://github.com/apache/datafusion/pull/7245) (alamb) +- Minor: Move `Partitioning` and`Distribution` to physical_expr [#7238](https://github.com/apache/datafusion/pull/7238) (alamb) +- Minor: remove duplication in `create_writer` [#7229](https://github.com/apache/datafusion/pull/7229) (alamb) +- Support array `flatten` sql function [#7239](https://github.com/apache/datafusion/pull/7239) (jayzhan211) +- Minor: fix clippy for memory_limit test [#7248](https://github.com/apache/datafusion/pull/7248) (yjshen) +- Update `physical_plan` tests to not use SessionContext [#7243](https://github.com/apache/datafusion/pull/7243) (alamb) +- Add API to make `unnest` consistent with DuckDB/ClickHouse, add option for preserve_nulls, update docs [#7168](https://github.com/apache/datafusion/pull/7168) (alamb) +- chore(sqllogictests-doc): add testing set up [#7258](https://github.com/apache/datafusion/pull/7258) (appletreeisyellow) +- Avoid to use TempDir::into_path for temporary dirs expected to be deleted automatically [#7252](https://github.com/apache/datafusion/pull/7252) (sarutak) +- [MINOR]: update benefits_from_input_partitioning implementation for projection and repartition [#7246](https://github.com/apache/datafusion/pull/7246) (mustafasrepo) +- Adding order equivalence support on MemoryExec [#7259](https://github.com/apache/datafusion/pull/7259) (metesynnada) +- chore(functions): fix function names typo [#7269](https://github.com/apache/datafusion/pull/7269) (appletreeisyellow) diff --git a/dev/changelog/30.0.0.md b/dev/changelog/30.0.0.md index e71355549744..ecd18360b3e8 100644 --- a/dev/changelog/30.0.0.md +++ b/dev/changelog/30.0.0.md @@ -17,67 +17,67 @@ under the License. --> -## [30.0.0](https://github.com/apache/arrow-datafusion/tree/30.0.0) (2023-08-22) +## [30.0.0](https://github.com/apache/datafusion/tree/30.0.0) (2023-08-22) -[Full Changelog](https://github.com/apache/arrow-datafusion/compare/29.0.0...30.0.0) +[Full Changelog](https://github.com/apache/datafusion/compare/29.0.0...30.0.0) **Implemented enhancements:** -- feat: Add support for PostgreSQL bitwise XOR operator [#7256](https://github.com/apache/arrow-datafusion/pull/7256) (jonahgao) +- feat: Add support for PostgreSQL bitwise XOR operator [#7256](https://github.com/apache/datafusion/pull/7256) (jonahgao) **Fixed bugs:** -- fix(functions): support `Dictionary` for string and int functions [#7262](https://github.com/apache/arrow-datafusion/pull/7262) (appletreeisyellow) -- fix: CLI should support different sql dialects [#7263](https://github.com/apache/arrow-datafusion/pull/7263) (jonahgao) -- fix: build_timestamp_list data type mismatch [#7267](https://github.com/apache/arrow-datafusion/pull/7267) (yukkit) +- fix(functions): support `Dictionary` for string and int functions [#7262](https://github.com/apache/datafusion/pull/7262) (appletreeisyellow) +- fix: CLI should support different sql dialects [#7263](https://github.com/apache/datafusion/pull/7263) (jonahgao) +- fix: build_timestamp_list data type mismatch [#7267](https://github.com/apache/datafusion/pull/7267) (yukkit) **Documentation updates:** -- Minor: Remove stubbed out redundant Execution Plan section of library user guide [#7309](https://github.com/apache/arrow-datafusion/pull/7309) (alamb) +- Minor: Remove stubbed out redundant Execution Plan section of library user guide [#7309](https://github.com/apache/datafusion/pull/7309) (alamb) **Merged pull requests:** -- chore(functions): fix function names typo [#7269](https://github.com/apache/arrow-datafusion/pull/7269) (appletreeisyellow) -- fix(functions): support `Dictionary` for string and int functions [#7262](https://github.com/apache/arrow-datafusion/pull/7262) (appletreeisyellow) -- Change benefits_from_partitioning flag to vector [#7247](https://github.com/apache/arrow-datafusion/pull/7247) (mustafasrepo) -- fix: CLI should support different sql dialects [#7263](https://github.com/apache/arrow-datafusion/pull/7263) (jonahgao) -- fix: build_timestamp_list data type mismatch [#7267](https://github.com/apache/arrow-datafusion/pull/7267) (yukkit) -- feat: Add support for PostgreSQL bitwise XOR operator [#7256](https://github.com/apache/arrow-datafusion/pull/7256) (jonahgao) -- Improve error message for aggregate/window functions [#7265](https://github.com/apache/arrow-datafusion/pull/7265) (2010YOUY01) -- Extend insert into to support Parquet backed tables [#7244](https://github.com/apache/arrow-datafusion/pull/7244) (devinjdangelo) -- Operators documentation [#7264](https://github.com/apache/arrow-datafusion/pull/7264) (spaydar) -- Minor: Add upstream ticket reference in comments [#7275](https://github.com/apache/arrow-datafusion/pull/7275) (alamb) -- Add parquet-filter and sort benchmarks to dfbench [#7120](https://github.com/apache/arrow-datafusion/pull/7120) (alamb) -- Allow `skip_failed_rules` to skip buggy logical plan rules that have a schema mismatch [#7277](https://github.com/apache/arrow-datafusion/pull/7277) (smiklos) -- Enable creating and inserting to empty external tables via SQL [#7276](https://github.com/apache/arrow-datafusion/pull/7276) (devinjdangelo) -- Prepare 29.0.0 Release [#7270](https://github.com/apache/arrow-datafusion/pull/7270) (andygrove) -- Hotfix: Test in information_schema.slt fails [#7286](https://github.com/apache/arrow-datafusion/pull/7286) (sarutak) -- Move sqllogictests to sqllogictests crate to break cyclic dependency [#7284](https://github.com/apache/arrow-datafusion/pull/7284) (alamb) -- Add isnan and iszero [#7274](https://github.com/apache/arrow-datafusion/pull/7274) (sarutak) -- Add library guide for table provider and catalog providers [#7287](https://github.com/apache/arrow-datafusion/pull/7287) (tshauck) -- Implement Support for Copy To Logical and Physical plans [#7283](https://github.com/apache/arrow-datafusion/pull/7283) (devinjdangelo) -- Add `internal_err!` error macro [#7293](https://github.com/apache/arrow-datafusion/pull/7293) (comphead) -- refactor: data types in `array_expressions` [#7280](https://github.com/apache/arrow-datafusion/pull/7280) (izveigor) -- Fix Unnest for array aggregations. [#7300](https://github.com/apache/arrow-datafusion/pull/7300) (vincev) -- Minor: Followup tasks for `nanvl` [#7311](https://github.com/apache/arrow-datafusion/pull/7311) (sarutak) -- Minor: Remove stubbed out redundant Execution Plan section of library user guide [#7309](https://github.com/apache/arrow-datafusion/pull/7309) (alamb) -- Minor: fix some parquet writer session level defaults [#7295](https://github.com/apache/arrow-datafusion/pull/7295) (devinjdangelo) -- Add Sqllogictests for INSERT INTO external table [#7294](https://github.com/apache/arrow-datafusion/pull/7294) (devinjdangelo) -- Minor: Fix documentation typos for array expressions [#7314](https://github.com/apache/arrow-datafusion/pull/7314) (Weijun-H) -- Qualify filter fields in the update plan [#7316](https://github.com/apache/arrow-datafusion/pull/7316) (gruuya) -- chore(deps): update tokio requirement to 1.28 [#7324](https://github.com/apache/arrow-datafusion/pull/7324) (jonahgao) -- Bug-fix / Join Output Orderings [#7296](https://github.com/apache/arrow-datafusion/pull/7296) (berkaysynnada) -- Add `internal_err` error macros. Part 2 [#7321](https://github.com/apache/arrow-datafusion/pull/7321) (comphead) -- Minor: Improve doc comments to datafusion-sql [#7318](https://github.com/apache/arrow-datafusion/pull/7318) (alamb) -- Minor: make memory_limit tests more self describing [#7190](https://github.com/apache/arrow-datafusion/pull/7190) (alamb) -- Minor: Improve docstrings for `LogicalPlan` [#7331](https://github.com/apache/arrow-datafusion/pull/7331) (alamb) -- minor: fix doc/typo [#7341](https://github.com/apache/arrow-datafusion/pull/7341) (jackwener) -- Minor: Extract `FileScanConfig` into its own module [#7335](https://github.com/apache/arrow-datafusion/pull/7335) (alamb) -- Minor: Move shared testing code into datafusion_common [#7334](https://github.com/apache/arrow-datafusion/pull/7334) (alamb) -- refine: `substr` error [#7339](https://github.com/apache/arrow-datafusion/pull/7339) (Weijun-H) -- Add `not_impl_err` error macro [#7340](https://github.com/apache/arrow-datafusion/pull/7340) (comphead) -- chore: public sql_statement_to_plan_with_context() [#7268](https://github.com/apache/arrow-datafusion/pull/7268) (waynexia) -- Deprecate ScalarValue bitor, bitand, and bitxor (#6842) [#7351](https://github.com/apache/arrow-datafusion/pull/7351) (tustvold) -- feature: Support `EXPLAIN COPY` [#7291](https://github.com/apache/arrow-datafusion/pull/7291) (alamb) -- Add `SQLOptions` for controlling allowed SQL statements, update docs [#7333](https://github.com/apache/arrow-datafusion/pull/7333) (alamb) -- Refactor: Consolidate OutputFileFormat and FileType into datafusion_common [#7336](https://github.com/apache/arrow-datafusion/pull/7336) (devinjdangelo) +- chore(functions): fix function names typo [#7269](https://github.com/apache/datafusion/pull/7269) (appletreeisyellow) +- fix(functions): support `Dictionary` for string and int functions [#7262](https://github.com/apache/datafusion/pull/7262) (appletreeisyellow) +- Change benefits_from_partitioning flag to vector [#7247](https://github.com/apache/datafusion/pull/7247) (mustafasrepo) +- fix: CLI should support different sql dialects [#7263](https://github.com/apache/datafusion/pull/7263) (jonahgao) +- fix: build_timestamp_list data type mismatch [#7267](https://github.com/apache/datafusion/pull/7267) (yukkit) +- feat: Add support for PostgreSQL bitwise XOR operator [#7256](https://github.com/apache/datafusion/pull/7256) (jonahgao) +- Improve error message for aggregate/window functions [#7265](https://github.com/apache/datafusion/pull/7265) (2010YOUY01) +- Extend insert into to support Parquet backed tables [#7244](https://github.com/apache/datafusion/pull/7244) (devinjdangelo) +- Operators documentation [#7264](https://github.com/apache/datafusion/pull/7264) (spaydar) +- Minor: Add upstream ticket reference in comments [#7275](https://github.com/apache/datafusion/pull/7275) (alamb) +- Add parquet-filter and sort benchmarks to dfbench [#7120](https://github.com/apache/datafusion/pull/7120) (alamb) +- Allow `skip_failed_rules` to skip buggy logical plan rules that have a schema mismatch [#7277](https://github.com/apache/datafusion/pull/7277) (smiklos) +- Enable creating and inserting to empty external tables via SQL [#7276](https://github.com/apache/datafusion/pull/7276) (devinjdangelo) +- Prepare 29.0.0 Release [#7270](https://github.com/apache/datafusion/pull/7270) (andygrove) +- Hotfix: Test in information_schema.slt fails [#7286](https://github.com/apache/datafusion/pull/7286) (sarutak) +- Move sqllogictests to sqllogictests crate to break cyclic dependency [#7284](https://github.com/apache/datafusion/pull/7284) (alamb) +- Add isnan and iszero [#7274](https://github.com/apache/datafusion/pull/7274) (sarutak) +- Add library guide for table provider and catalog providers [#7287](https://github.com/apache/datafusion/pull/7287) (tshauck) +- Implement Support for Copy To Logical and Physical plans [#7283](https://github.com/apache/datafusion/pull/7283) (devinjdangelo) +- Add `internal_err!` error macro [#7293](https://github.com/apache/datafusion/pull/7293) (comphead) +- refactor: data types in `array_expressions` [#7280](https://github.com/apache/datafusion/pull/7280) (izveigor) +- Fix Unnest for array aggregations. [#7300](https://github.com/apache/datafusion/pull/7300) (vincev) +- Minor: Followup tasks for `nanvl` [#7311](https://github.com/apache/datafusion/pull/7311) (sarutak) +- Minor: Remove stubbed out redundant Execution Plan section of library user guide [#7309](https://github.com/apache/datafusion/pull/7309) (alamb) +- Minor: fix some parquet writer session level defaults [#7295](https://github.com/apache/datafusion/pull/7295) (devinjdangelo) +- Add Sqllogictests for INSERT INTO external table [#7294](https://github.com/apache/datafusion/pull/7294) (devinjdangelo) +- Minor: Fix documentation typos for array expressions [#7314](https://github.com/apache/datafusion/pull/7314) (Weijun-H) +- Qualify filter fields in the update plan [#7316](https://github.com/apache/datafusion/pull/7316) (gruuya) +- chore(deps): update tokio requirement to 1.28 [#7324](https://github.com/apache/datafusion/pull/7324) (jonahgao) +- Bug-fix / Join Output Orderings [#7296](https://github.com/apache/datafusion/pull/7296) (berkaysynnada) +- Add `internal_err` error macros. Part 2 [#7321](https://github.com/apache/datafusion/pull/7321) (comphead) +- Minor: Improve doc comments to datafusion-sql [#7318](https://github.com/apache/datafusion/pull/7318) (alamb) +- Minor: make memory_limit tests more self describing [#7190](https://github.com/apache/datafusion/pull/7190) (alamb) +- Minor: Improve docstrings for `LogicalPlan` [#7331](https://github.com/apache/datafusion/pull/7331) (alamb) +- minor: fix doc/typo [#7341](https://github.com/apache/datafusion/pull/7341) (jackwener) +- Minor: Extract `FileScanConfig` into its own module [#7335](https://github.com/apache/datafusion/pull/7335) (alamb) +- Minor: Move shared testing code into datafusion_common [#7334](https://github.com/apache/datafusion/pull/7334) (alamb) +- refine: `substr` error [#7339](https://github.com/apache/datafusion/pull/7339) (Weijun-H) +- Add `not_impl_err` error macro [#7340](https://github.com/apache/datafusion/pull/7340) (comphead) +- chore: public sql_statement_to_plan_with_context() [#7268](https://github.com/apache/datafusion/pull/7268) (waynexia) +- Deprecate ScalarValue bitor, bitand, and bitxor (#6842) [#7351](https://github.com/apache/datafusion/pull/7351) (tustvold) +- feature: Support `EXPLAIN COPY` [#7291](https://github.com/apache/datafusion/pull/7291) (alamb) +- Add `SQLOptions` for controlling allowed SQL statements, update docs [#7333](https://github.com/apache/datafusion/pull/7333) (alamb) +- Refactor: Consolidate OutputFileFormat and FileType into datafusion_common [#7336](https://github.com/apache/datafusion/pull/7336) (devinjdangelo) diff --git a/dev/changelog/31.0.0.md b/dev/changelog/31.0.0.md index 9f606ffd51e1..1bffe576d038 100644 --- a/dev/changelog/31.0.0.md +++ b/dev/changelog/31.0.0.md @@ -17,107 +17,107 @@ under the License. --> -## [31.0.0](https://github.com/apache/arrow-datafusion/tree/31.0.0) (2023-09-08) +## [31.0.0](https://github.com/apache/datafusion/tree/31.0.0) (2023-09-08) -[Full Changelog](https://github.com/apache/arrow-datafusion/compare/30.0.0...31.0.0) +[Full Changelog](https://github.com/apache/datafusion/compare/30.0.0...31.0.0) **Breaking changes:** -- Specialize Avg and Sum accumulators (#6842) [#7358](https://github.com/apache/arrow-datafusion/pull/7358) (tustvold) -- Use datum arithmetic scalar value [#7375](https://github.com/apache/arrow-datafusion/pull/7375) (tustvold) +- Specialize Avg and Sum accumulators (#6842) [#7358](https://github.com/apache/datafusion/pull/7358) (tustvold) +- Use datum arithmetic scalar value [#7375](https://github.com/apache/datafusion/pull/7375) (tustvold) **Implemented enhancements:** -- feat: `array-empty` [#7313](https://github.com/apache/arrow-datafusion/pull/7313) (Weijun-H) -- Support `REPLACE` SQL alias syntax [#7368](https://github.com/apache/arrow-datafusion/pull/7368) (berkaysynnada) -- feat: support primary key alternate syntax [#7160](https://github.com/apache/arrow-datafusion/pull/7160) (parkma99) -- docs: Add `Expr` library developer page [#7359](https://github.com/apache/arrow-datafusion/pull/7359) (tshauck) -- feat: support Binary for `min/max` [#7397](https://github.com/apache/arrow-datafusion/pull/7397) (Weijun-H) -- feat: Add memory pool configuration to `datafusion-cli` [#7424](https://github.com/apache/arrow-datafusion/pull/7424) (Weijun-H) -- Support Configuring Arrow RecordBatch Writers via SQL Statement Options [#7390](https://github.com/apache/arrow-datafusion/pull/7390) (devinjdangelo) -- Add ROLLUP and GROUPING SETS substrait support [#7382](https://github.com/apache/arrow-datafusion/pull/7382) (nseekhao) -- feat: Allow creating a ValuesExec from record batches [#7444](https://github.com/apache/arrow-datafusion/pull/7444) (scsmithr) -- minor: Add ARROW to `CREATE EXTERNAL TABLE` docs and add example of `COMPRESSION TYPE` [#7489](https://github.com/apache/arrow-datafusion/pull/7489) (alamb) -- Support Configuring Parquet Column Specific Options via SQL Statement Options [#7466](https://github.com/apache/arrow-datafusion/pull/7466) (devinjdangelo) -- Write Multiple Parquet Files in Parallel [#7483](https://github.com/apache/arrow-datafusion/pull/7483) (devinjdangelo) -- feat: explain with statistics [#7459](https://github.com/apache/arrow-datafusion/pull/7459) (korowa) +- feat: `array-empty` [#7313](https://github.com/apache/datafusion/pull/7313) (Weijun-H) +- Support `REPLACE` SQL alias syntax [#7368](https://github.com/apache/datafusion/pull/7368) (berkaysynnada) +- feat: support primary key alternate syntax [#7160](https://github.com/apache/datafusion/pull/7160) (parkma99) +- docs: Add `Expr` library developer page [#7359](https://github.com/apache/datafusion/pull/7359) (tshauck) +- feat: support Binary for `min/max` [#7397](https://github.com/apache/datafusion/pull/7397) (Weijun-H) +- feat: Add memory pool configuration to `datafusion-cli` [#7424](https://github.com/apache/datafusion/pull/7424) (Weijun-H) +- Support Configuring Arrow RecordBatch Writers via SQL Statement Options [#7390](https://github.com/apache/datafusion/pull/7390) (devinjdangelo) +- Add ROLLUP and GROUPING SETS substrait support [#7382](https://github.com/apache/datafusion/pull/7382) (nseekhao) +- feat: Allow creating a ValuesExec from record batches [#7444](https://github.com/apache/datafusion/pull/7444) (scsmithr) +- minor: Add ARROW to `CREATE EXTERNAL TABLE` docs and add example of `COMPRESSION TYPE` [#7489](https://github.com/apache/datafusion/pull/7489) (alamb) +- Support Configuring Parquet Column Specific Options via SQL Statement Options [#7466](https://github.com/apache/datafusion/pull/7466) (devinjdangelo) +- Write Multiple Parquet Files in Parallel [#7483](https://github.com/apache/datafusion/pull/7483) (devinjdangelo) +- feat: explain with statistics [#7459](https://github.com/apache/datafusion/pull/7459) (korowa) **Fixed bugs:** -- fix: union_distinct shouldn't remove child distinct [#7346](https://github.com/apache/arrow-datafusion/pull/7346) (jackwener) -- fix: inconsistent scalar types in `DistinctArrayAggAccumulator` state [#7385](https://github.com/apache/arrow-datafusion/pull/7385) (korowa) -- fix: incorrect nullability calculation of `InListExpr` [#7496](https://github.com/apache/arrow-datafusion/pull/7496) (jonahgao) +- fix: union_distinct shouldn't remove child distinct [#7346](https://github.com/apache/datafusion/pull/7346) (jackwener) +- fix: inconsistent scalar types in `DistinctArrayAggAccumulator` state [#7385](https://github.com/apache/datafusion/pull/7385) (korowa) +- fix: incorrect nullability calculation of `InListExpr` [#7496](https://github.com/apache/datafusion/pull/7496) (jonahgao) **Merged pull requests:** -- Remove redundant type check in Avg [#7374](https://github.com/apache/arrow-datafusion/pull/7374) (viirya) -- feat: `array-empty` [#7313](https://github.com/apache/arrow-datafusion/pull/7313) (Weijun-H) -- Minor: add `WriteOp::name` and `DmlStatement::name` [#7329](https://github.com/apache/arrow-datafusion/pull/7329) (alamb) -- Specialize Median Accumulator [#7376](https://github.com/apache/arrow-datafusion/pull/7376) (tustvold) -- Specialize Avg and Sum accumulators (#6842) [#7358](https://github.com/apache/arrow-datafusion/pull/7358) (tustvold) -- Change error type of invalid argument to PlanError rather than InternalError, remove misleading comments [#7355](https://github.com/apache/arrow-datafusion/pull/7355) (alamb) -- Implement `array_pop_back` function [#7348](https://github.com/apache/arrow-datafusion/pull/7348) (tanruixiang) -- Add `exec_err!` error macro [#7361](https://github.com/apache/arrow-datafusion/pull/7361) (comphead) -- Update sqlparser requirement from 0.36.1 to 0.37.0 [#7387](https://github.com/apache/arrow-datafusion/pull/7387) (viirya) -- DML documentation [#7362](https://github.com/apache/arrow-datafusion/pull/7362) (spaydar) -- Support `REPLACE` SQL alias syntax [#7368](https://github.com/apache/arrow-datafusion/pull/7368) (berkaysynnada) -- Bug-fix/next_value() of Min/Max Scalar Values [#7384](https://github.com/apache/arrow-datafusion/pull/7384) (berkaysynnada) -- Prepare 30.0.0 release [#7372](https://github.com/apache/arrow-datafusion/pull/7372) (andygrove) -- fix: union_distinct shouldn't remove child distinct [#7346](https://github.com/apache/arrow-datafusion/pull/7346) (jackwener) -- feat: support primary key alternate syntax [#7160](https://github.com/apache/arrow-datafusion/pull/7160) (parkma99) -- Merge hash table implementations and remove leftover utilities [#7366](https://github.com/apache/arrow-datafusion/pull/7366) (metesynnada) -- Minor: remove stray `println!` from `array_expressions.rs` [#7389](https://github.com/apache/arrow-datafusion/pull/7389) (alamb) -- Projection Order Propagation [#7364](https://github.com/apache/arrow-datafusion/pull/7364) (berkaysynnada) -- Document and `scratch` directory for sqllogictest and make test specific [#7312](https://github.com/apache/arrow-datafusion/pull/7312) (alamb) -- Minor: Move test for `Distribution` and `Partitioning` with code [#7392](https://github.com/apache/arrow-datafusion/pull/7392) (alamb) -- Minor: move datasource statistics code into its own module [#7391](https://github.com/apache/arrow-datafusion/pull/7391) (alamb) -- Use datum arithmetic scalar value [#7375](https://github.com/apache/arrow-datafusion/pull/7375) (tustvold) -- Fix IN expr for NaN [#7378](https://github.com/apache/arrow-datafusion/pull/7378) (sarutak) -- Unnecessary to list all files during partition pruning [#7395](https://github.com/apache/arrow-datafusion/pull/7395) (smallzhongfeng) -- Optimize `Unnest` and implement `skip_nulls=true` if specified [#7371](https://github.com/apache/arrow-datafusion/pull/7371) (smiklos) -- Docs: Add query syntax to `COPY` docs [#7388](https://github.com/apache/arrow-datafusion/pull/7388) (alamb) -- Clean up clippy for Rust 1.72 release [#7399](https://github.com/apache/arrow-datafusion/pull/7399) (alamb) -- fix: inconsistent scalar types in `DistinctArrayAggAccumulator` state [#7385](https://github.com/apache/arrow-datafusion/pull/7385) (korowa) -- Fix python CI [#7416](https://github.com/apache/arrow-datafusion/pull/7416) (tustvold) -- docs: Add `Expr` library developer page [#7359](https://github.com/apache/arrow-datafusion/pull/7359) (tshauck) -- Update ObjectStore 0.7.0 and Arrow 46.0.0 [#7282](https://github.com/apache/arrow-datafusion/pull/7282) (tustvold) -- Fix Decimal256 scalar display string in sqllogictest [#7404](https://github.com/apache/arrow-datafusion/pull/7404) (viirya) -- feat: support Binary for `min/max` [#7397](https://github.com/apache/arrow-datafusion/pull/7397) (Weijun-H) -- Make sqllogictest distinguish NaN from -NaN [#7403](https://github.com/apache/arrow-datafusion/pull/7403) (sarutak) -- Replace lazy_static with OnceLock [#7409](https://github.com/apache/arrow-datafusion/pull/7409) (sarutak) -- Minor: Remove the unreached simplification rule for `0 / 0` [#7405](https://github.com/apache/arrow-datafusion/pull/7405) (jonahgao) -- feat: Add memory pool configuration to `datafusion-cli` [#7424](https://github.com/apache/arrow-datafusion/pull/7424) (Weijun-H) -- Minor: Debug log when FairPool is created [#7431](https://github.com/apache/arrow-datafusion/pull/7431) (alamb) -- Support Configuring Arrow RecordBatch Writers via SQL Statement Options [#7390](https://github.com/apache/arrow-datafusion/pull/7390) (devinjdangelo) -- Add ROLLUP and GROUPING SETS substrait support [#7382](https://github.com/apache/arrow-datafusion/pull/7382) (nseekhao) -- Refactor sort_fuzz test to clarify what is covered [#7439](https://github.com/apache/arrow-datafusion/pull/7439) (alamb) -- Use DateTime::from_naive_utc_and_offset instead of DateTime::from_utc [#7451](https://github.com/apache/arrow-datafusion/pull/7451) (sarutak) -- Update substrait requirement from 0.12.0 to 0.13.1 [#7443](https://github.com/apache/arrow-datafusion/pull/7443) (viirya) -- [minior fix]: adjust the projection statistics [#7428](https://github.com/apache/arrow-datafusion/pull/7428) (liukun4515) -- Add new known users: Arroyo and Restate [#7464](https://github.com/apache/arrow-datafusion/pull/7464) (jychen7) -- ScalarFunctionExpr Maintaining Order [#7417](https://github.com/apache/arrow-datafusion/pull/7417) (berkaysynnada) -- Bug-fix/find_orderings_of_exprs [#7457](https://github.com/apache/arrow-datafusion/pull/7457) (berkaysynnada) -- Update prost-derive requirement from 0.11 to 0.12 [#7468](https://github.com/apache/arrow-datafusion/pull/7468) (dependabot[bot]) -- Revert "Update prost-derive requirement from 0.11 to 0.12 (#7468)" [#7476](https://github.com/apache/arrow-datafusion/pull/7476) (viirya) -- Return error if spill file does not exist in ExternalSorter [#7479](https://github.com/apache/arrow-datafusion/pull/7479) (viirya) -- [minor fix]: Remove unused duplicate `file_type.rs` [#7478](https://github.com/apache/arrow-datafusion/pull/7478) (sarutak) -- Minor: more flexible pool size setting for datafusion-cli [#7454](https://github.com/apache/arrow-datafusion/pull/7454) (yjshen) -- Bump actions/checkout from 3 to 4 [#7480](https://github.com/apache/arrow-datafusion/pull/7480) (dependabot[bot]) -- Support Write Options in DataFrame::write\_\* methods [#7435](https://github.com/apache/arrow-datafusion/pull/7435) (devinjdangelo) -- cp_solver, Duration vs Interval cases [#7475](https://github.com/apache/arrow-datafusion/pull/7475) (berkaysynnada) -- feat: Allow creating a ValuesExec from record batches [#7444](https://github.com/apache/arrow-datafusion/pull/7444) (scsmithr) -- Make `LogicalPlan::with_new_exprs,` deprecate `from_plan` [#7396](https://github.com/apache/arrow-datafusion/pull/7396) (alamb) -- refactor: change file type logic for create table [#7477](https://github.com/apache/arrow-datafusion/pull/7477) (tshauck) -- minor: do not fail analyzer if subquery plan contains extension [#7455](https://github.com/apache/arrow-datafusion/pull/7455) (waynexia) -- Make IN expr work with multiple items [#7449](https://github.com/apache/arrow-datafusion/pull/7449) (sarutak) -- Minor: Add doc comments and example for `ScalarVaue::to_scalar` [#7491](https://github.com/apache/arrow-datafusion/pull/7491) (alamb) -- minor: Add ARROW to `CREATE EXTERNAL TABLE` docs and add example of `COMPRESSION TYPE` [#7489](https://github.com/apache/arrow-datafusion/pull/7489) (alamb) -- Add backtrace to error messages [#7434](https://github.com/apache/arrow-datafusion/pull/7434) (comphead) -- Make sqllogictest platform-independent for the sign of NaN [#7462](https://github.com/apache/arrow-datafusion/pull/7462) (sarutak) -- Support Configuring Parquet Column Specific Options via SQL Statement Options [#7466](https://github.com/apache/arrow-datafusion/pull/7466) (devinjdangelo) -- Minor: improve error message [#7498](https://github.com/apache/arrow-datafusion/pull/7498) (alamb) -- Write Multiple Parquet Files in Parallel [#7483](https://github.com/apache/arrow-datafusion/pull/7483) (devinjdangelo) -- `PrimitiveGroupsAccumulator` should propagate timestamp timezone information properly [#7494](https://github.com/apache/arrow-datafusion/pull/7494) (sunchao) -- Minor: Add `ScalarValue::data_type()` for consistency with other APIs [#7492](https://github.com/apache/arrow-datafusion/pull/7492) (alamb) -- feat: explain with statistics [#7459](https://github.com/apache/arrow-datafusion/pull/7459) (korowa) -- fix: incorrect nullability calculation of `InListExpr` [#7496](https://github.com/apache/arrow-datafusion/pull/7496) (jonahgao) +- Remove redundant type check in Avg [#7374](https://github.com/apache/datafusion/pull/7374) (viirya) +- feat: `array-empty` [#7313](https://github.com/apache/datafusion/pull/7313) (Weijun-H) +- Minor: add `WriteOp::name` and `DmlStatement::name` [#7329](https://github.com/apache/datafusion/pull/7329) (alamb) +- Specialize Median Accumulator [#7376](https://github.com/apache/datafusion/pull/7376) (tustvold) +- Specialize Avg and Sum accumulators (#6842) [#7358](https://github.com/apache/datafusion/pull/7358) (tustvold) +- Change error type of invalid argument to PlanError rather than InternalError, remove misleading comments [#7355](https://github.com/apache/datafusion/pull/7355) (alamb) +- Implement `array_pop_back` function [#7348](https://github.com/apache/datafusion/pull/7348) (tanruixiang) +- Add `exec_err!` error macro [#7361](https://github.com/apache/datafusion/pull/7361) (comphead) +- Update sqlparser requirement from 0.36.1 to 0.37.0 [#7387](https://github.com/apache/datafusion/pull/7387) (viirya) +- DML documentation [#7362](https://github.com/apache/datafusion/pull/7362) (spaydar) +- Support `REPLACE` SQL alias syntax [#7368](https://github.com/apache/datafusion/pull/7368) (berkaysynnada) +- Bug-fix/next_value() of Min/Max Scalar Values [#7384](https://github.com/apache/datafusion/pull/7384) (berkaysynnada) +- Prepare 30.0.0 release [#7372](https://github.com/apache/datafusion/pull/7372) (andygrove) +- fix: union_distinct shouldn't remove child distinct [#7346](https://github.com/apache/datafusion/pull/7346) (jackwener) +- feat: support primary key alternate syntax [#7160](https://github.com/apache/datafusion/pull/7160) (parkma99) +- Merge hash table implementations and remove leftover utilities [#7366](https://github.com/apache/datafusion/pull/7366) (metesynnada) +- Minor: remove stray `println!` from `array_expressions.rs` [#7389](https://github.com/apache/datafusion/pull/7389) (alamb) +- Projection Order Propagation [#7364](https://github.com/apache/datafusion/pull/7364) (berkaysynnada) +- Document and `scratch` directory for sqllogictest and make test specific [#7312](https://github.com/apache/datafusion/pull/7312) (alamb) +- Minor: Move test for `Distribution` and `Partitioning` with code [#7392](https://github.com/apache/datafusion/pull/7392) (alamb) +- Minor: move datasource statistics code into its own module [#7391](https://github.com/apache/datafusion/pull/7391) (alamb) +- Use datum arithmetic scalar value [#7375](https://github.com/apache/datafusion/pull/7375) (tustvold) +- Fix IN expr for NaN [#7378](https://github.com/apache/datafusion/pull/7378) (sarutak) +- Unnecessary to list all files during partition pruning [#7395](https://github.com/apache/datafusion/pull/7395) (smallzhongfeng) +- Optimize `Unnest` and implement `skip_nulls=true` if specified [#7371](https://github.com/apache/datafusion/pull/7371) (smiklos) +- Docs: Add query syntax to `COPY` docs [#7388](https://github.com/apache/datafusion/pull/7388) (alamb) +- Clean up clippy for Rust 1.72 release [#7399](https://github.com/apache/datafusion/pull/7399) (alamb) +- fix: inconsistent scalar types in `DistinctArrayAggAccumulator` state [#7385](https://github.com/apache/datafusion/pull/7385) (korowa) +- Fix python CI [#7416](https://github.com/apache/datafusion/pull/7416) (tustvold) +- docs: Add `Expr` library developer page [#7359](https://github.com/apache/datafusion/pull/7359) (tshauck) +- Update ObjectStore 0.7.0 and Arrow 46.0.0 [#7282](https://github.com/apache/datafusion/pull/7282) (tustvold) +- Fix Decimal256 scalar display string in sqllogictest [#7404](https://github.com/apache/datafusion/pull/7404) (viirya) +- feat: support Binary for `min/max` [#7397](https://github.com/apache/datafusion/pull/7397) (Weijun-H) +- Make sqllogictest distinguish NaN from -NaN [#7403](https://github.com/apache/datafusion/pull/7403) (sarutak) +- Replace lazy_static with OnceLock [#7409](https://github.com/apache/datafusion/pull/7409) (sarutak) +- Minor: Remove the unreached simplification rule for `0 / 0` [#7405](https://github.com/apache/datafusion/pull/7405) (jonahgao) +- feat: Add memory pool configuration to `datafusion-cli` [#7424](https://github.com/apache/datafusion/pull/7424) (Weijun-H) +- Minor: Debug log when FairPool is created [#7431](https://github.com/apache/datafusion/pull/7431) (alamb) +- Support Configuring Arrow RecordBatch Writers via SQL Statement Options [#7390](https://github.com/apache/datafusion/pull/7390) (devinjdangelo) +- Add ROLLUP and GROUPING SETS substrait support [#7382](https://github.com/apache/datafusion/pull/7382) (nseekhao) +- Refactor sort_fuzz test to clarify what is covered [#7439](https://github.com/apache/datafusion/pull/7439) (alamb) +- Use DateTime::from_naive_utc_and_offset instead of DateTime::from_utc [#7451](https://github.com/apache/datafusion/pull/7451) (sarutak) +- Update substrait requirement from 0.12.0 to 0.13.1 [#7443](https://github.com/apache/datafusion/pull/7443) (viirya) +- [minior fix]: adjust the projection statistics [#7428](https://github.com/apache/datafusion/pull/7428) (liukun4515) +- Add new known users: Arroyo and Restate [#7464](https://github.com/apache/datafusion/pull/7464) (jychen7) +- ScalarFunctionExpr Maintaining Order [#7417](https://github.com/apache/datafusion/pull/7417) (berkaysynnada) +- Bug-fix/find_orderings_of_exprs [#7457](https://github.com/apache/datafusion/pull/7457) (berkaysynnada) +- Update prost-derive requirement from 0.11 to 0.12 [#7468](https://github.com/apache/datafusion/pull/7468) (dependabot[bot]) +- Revert "Update prost-derive requirement from 0.11 to 0.12 (#7468)" [#7476](https://github.com/apache/datafusion/pull/7476) (viirya) +- Return error if spill file does not exist in ExternalSorter [#7479](https://github.com/apache/datafusion/pull/7479) (viirya) +- [minor fix]: Remove unused duplicate `file_type.rs` [#7478](https://github.com/apache/datafusion/pull/7478) (sarutak) +- Minor: more flexible pool size setting for datafusion-cli [#7454](https://github.com/apache/datafusion/pull/7454) (yjshen) +- Bump actions/checkout from 3 to 4 [#7480](https://github.com/apache/datafusion/pull/7480) (dependabot[bot]) +- Support Write Options in DataFrame::write\_\* methods [#7435](https://github.com/apache/datafusion/pull/7435) (devinjdangelo) +- cp_solver, Duration vs Interval cases [#7475](https://github.com/apache/datafusion/pull/7475) (berkaysynnada) +- feat: Allow creating a ValuesExec from record batches [#7444](https://github.com/apache/datafusion/pull/7444) (scsmithr) +- Make `LogicalPlan::with_new_exprs,` deprecate `from_plan` [#7396](https://github.com/apache/datafusion/pull/7396) (alamb) +- refactor: change file type logic for create table [#7477](https://github.com/apache/datafusion/pull/7477) (tshauck) +- minor: do not fail analyzer if subquery plan contains extension [#7455](https://github.com/apache/datafusion/pull/7455) (waynexia) +- Make IN expr work with multiple items [#7449](https://github.com/apache/datafusion/pull/7449) (sarutak) +- Minor: Add doc comments and example for `ScalarVaue::to_scalar` [#7491](https://github.com/apache/datafusion/pull/7491) (alamb) +- minor: Add ARROW to `CREATE EXTERNAL TABLE` docs and add example of `COMPRESSION TYPE` [#7489](https://github.com/apache/datafusion/pull/7489) (alamb) +- Add backtrace to error messages [#7434](https://github.com/apache/datafusion/pull/7434) (comphead) +- Make sqllogictest platform-independent for the sign of NaN [#7462](https://github.com/apache/datafusion/pull/7462) (sarutak) +- Support Configuring Parquet Column Specific Options via SQL Statement Options [#7466](https://github.com/apache/datafusion/pull/7466) (devinjdangelo) +- Minor: improve error message [#7498](https://github.com/apache/datafusion/pull/7498) (alamb) +- Write Multiple Parquet Files in Parallel [#7483](https://github.com/apache/datafusion/pull/7483) (devinjdangelo) +- `PrimitiveGroupsAccumulator` should propagate timestamp timezone information properly [#7494](https://github.com/apache/datafusion/pull/7494) (sunchao) +- Minor: Add `ScalarValue::data_type()` for consistency with other APIs [#7492](https://github.com/apache/datafusion/pull/7492) (alamb) +- feat: explain with statistics [#7459](https://github.com/apache/datafusion/pull/7459) (korowa) +- fix: incorrect nullability calculation of `InListExpr` [#7496](https://github.com/apache/datafusion/pull/7496) (jonahgao) diff --git a/dev/changelog/32.0.0.md b/dev/changelog/32.0.0.md index 781fd5001552..f5709f1e137c 100644 --- a/dev/changelog/32.0.0.md +++ b/dev/changelog/32.0.0.md @@ -17,179 +17,179 @@ under the License. --> -## [32.0.0](https://github.com/apache/arrow-datafusion/tree/32.0.0) (2023-10-07) +## [32.0.0](https://github.com/apache/datafusion/tree/32.0.0) (2023-10-07) -[Full Changelog](https://github.com/apache/arrow-datafusion/compare/31.0.0...32.0.0) +[Full Changelog](https://github.com/apache/datafusion/compare/31.0.0...32.0.0) **Breaking changes:** -- Remove implicit interval type coercion from ScalarValue comparison [#7514](https://github.com/apache/arrow-datafusion/pull/7514) (tustvold) -- Remove get_scan_files and ExecutionPlan::file_scan_config (#7357) [#7487](https://github.com/apache/arrow-datafusion/pull/7487) (tustvold) -- Move `FileCompressionType` out of `common` and into `core` [#7596](https://github.com/apache/arrow-datafusion/pull/7596) (haohuaijin) -- Update arrow 47.0.0 in DataFusion [#7587](https://github.com/apache/arrow-datafusion/pull/7587) (tustvold) -- Rename `bounded_order_preserving_variants` config to `prefer_exising_sort` and update docs [#7723](https://github.com/apache/arrow-datafusion/pull/7723) (alamb) +- Remove implicit interval type coercion from ScalarValue comparison [#7514](https://github.com/apache/datafusion/pull/7514) (tustvold) +- Remove get_scan_files and ExecutionPlan::file_scan_config (#7357) [#7487](https://github.com/apache/datafusion/pull/7487) (tustvold) +- Move `FileCompressionType` out of `common` and into `core` [#7596](https://github.com/apache/datafusion/pull/7596) (haohuaijin) +- Update arrow 47.0.0 in DataFusion [#7587](https://github.com/apache/datafusion/pull/7587) (tustvold) +- Rename `bounded_order_preserving_variants` config to `prefer_exising_sort` and update docs [#7723](https://github.com/apache/datafusion/pull/7723) (alamb) **Implemented enhancements:** -- Parallelize Stateless (CSV/JSON) File Write Serialization [#7452](https://github.com/apache/arrow-datafusion/pull/7452) (devinjdangelo) -- Create a Priority Queue based Aggregation with `limit` [#7192](https://github.com/apache/arrow-datafusion/pull/7192) (avantgardnerio) -- feat: add guarantees to simplification [#7467](https://github.com/apache/arrow-datafusion/pull/7467) (wjones127) -- [Minor]: Produce better plan when group by contains all of the ordering requirements [#7542](https://github.com/apache/arrow-datafusion/pull/7542) (mustafasrepo) -- Make AvroArrowArrayReader possible to scan Avro backed table which contains nested records [#7525](https://github.com/apache/arrow-datafusion/pull/7525) (sarutak) -- feat: Support spilling for hash aggregation [#7400](https://github.com/apache/arrow-datafusion/pull/7400) (kazuyukitanimura) -- Parallelize Parquet Serialization [#7562](https://github.com/apache/arrow-datafusion/pull/7562) (devinjdangelo) -- feat: natively support more data types for the `abs` function. [#7568](https://github.com/apache/arrow-datafusion/pull/7568) (jonahgao) -- feat: Parallel collecting parquet files statistics #7573 [#7595](https://github.com/apache/arrow-datafusion/pull/7595) (hengfeiyang) -- Support hashing List columns [#7616](https://github.com/apache/arrow-datafusion/pull/7616) (jonmmease) -- feat: Better large output display in datafusion-cli with --maxrows option [#7617](https://github.com/apache/arrow-datafusion/pull/7617) (2010YOUY01) -- feat: make parse_float_as_decimal work on negative numbers [#7648](https://github.com/apache/arrow-datafusion/pull/7648) (jonahgao) -- Update Default Parquet Write Compression [#7692](https://github.com/apache/arrow-datafusion/pull/7692) (devinjdangelo) -- Support all the codecs supported by Avro [#7718](https://github.com/apache/arrow-datafusion/pull/7718) (sarutak) -- Optimize "ORDER BY + LIMIT" queries for speed / memory with special TopK operator [#7721](https://github.com/apache/arrow-datafusion/pull/7721) (Dandandan) +- Parallelize Stateless (CSV/JSON) File Write Serialization [#7452](https://github.com/apache/datafusion/pull/7452) (devinjdangelo) +- Create a Priority Queue based Aggregation with `limit` [#7192](https://github.com/apache/datafusion/pull/7192) (avantgardnerio) +- feat: add guarantees to simplification [#7467](https://github.com/apache/datafusion/pull/7467) (wjones127) +- [Minor]: Produce better plan when group by contains all of the ordering requirements [#7542](https://github.com/apache/datafusion/pull/7542) (mustafasrepo) +- Make AvroArrowArrayReader possible to scan Avro backed table which contains nested records [#7525](https://github.com/apache/datafusion/pull/7525) (sarutak) +- feat: Support spilling for hash aggregation [#7400](https://github.com/apache/datafusion/pull/7400) (kazuyukitanimura) +- Parallelize Parquet Serialization [#7562](https://github.com/apache/datafusion/pull/7562) (devinjdangelo) +- feat: natively support more data types for the `abs` function. [#7568](https://github.com/apache/datafusion/pull/7568) (jonahgao) +- feat: Parallel collecting parquet files statistics #7573 [#7595](https://github.com/apache/datafusion/pull/7595) (hengfeiyang) +- Support hashing List columns [#7616](https://github.com/apache/datafusion/pull/7616) (jonmmease) +- feat: Better large output display in datafusion-cli with --maxrows option [#7617](https://github.com/apache/datafusion/pull/7617) (2010YOUY01) +- feat: make parse_float_as_decimal work on negative numbers [#7648](https://github.com/apache/datafusion/pull/7648) (jonahgao) +- Update Default Parquet Write Compression [#7692](https://github.com/apache/datafusion/pull/7692) (devinjdangelo) +- Support all the codecs supported by Avro [#7718](https://github.com/apache/datafusion/pull/7718) (sarutak) +- Optimize "ORDER BY + LIMIT" queries for speed / memory with special TopK operator [#7721](https://github.com/apache/datafusion/pull/7721) (Dandandan) **Fixed bugs:** -- fix: inconsistent behaviors when dividing floating numbers by zero [#7503](https://github.com/apache/arrow-datafusion/pull/7503) (jonahgao) -- fix: skip EliminateCrossJoin rule if inner join with filter is found [#7529](https://github.com/apache/arrow-datafusion/pull/7529) (epsio-banay) -- fix: check for precision overflow when parsing float as decimal [#7627](https://github.com/apache/arrow-datafusion/pull/7627) (jonahgao) -- fix: substrait limit when fetch is None [#7669](https://github.com/apache/arrow-datafusion/pull/7669) (waynexia) -- fix: coerce text to timestamps with timezones [#7720](https://github.com/apache/arrow-datafusion/pull/7720) (mhilton) -- fix: avro_to_arrow: Handle avro nested nullable struct (union) [#7663](https://github.com/apache/arrow-datafusion/pull/7663) (Samrose-Ahmed) +- fix: inconsistent behaviors when dividing floating numbers by zero [#7503](https://github.com/apache/datafusion/pull/7503) (jonahgao) +- fix: skip EliminateCrossJoin rule if inner join with filter is found [#7529](https://github.com/apache/datafusion/pull/7529) (epsio-banay) +- fix: check for precision overflow when parsing float as decimal [#7627](https://github.com/apache/datafusion/pull/7627) (jonahgao) +- fix: substrait limit when fetch is None [#7669](https://github.com/apache/datafusion/pull/7669) (waynexia) +- fix: coerce text to timestamps with timezones [#7720](https://github.com/apache/datafusion/pull/7720) (mhilton) +- fix: avro_to_arrow: Handle avro nested nullable struct (union) [#7663](https://github.com/apache/datafusion/pull/7663) (Samrose-Ahmed) **Documentation updates:** -- Documentation Updates for New Write Related Features [#7520](https://github.com/apache/arrow-datafusion/pull/7520) (devinjdangelo) -- Create 2023 Q4 roadmap [#7551](https://github.com/apache/arrow-datafusion/pull/7551) (graydenshand) -- docs: add section on supports_filters_pushdown [#7680](https://github.com/apache/arrow-datafusion/pull/7680) (tshauck) -- Add LanceDB to the list of Known Users [#7716](https://github.com/apache/arrow-datafusion/pull/7716) (alamb) -- Document crate feature flags [#7713](https://github.com/apache/arrow-datafusion/pull/7713) (alamb) +- Documentation Updates for New Write Related Features [#7520](https://github.com/apache/datafusion/pull/7520) (devinjdangelo) +- Create 2023 Q4 roadmap [#7551](https://github.com/apache/datafusion/pull/7551) (graydenshand) +- docs: add section on supports_filters_pushdown [#7680](https://github.com/apache/datafusion/pull/7680) (tshauck) +- Add LanceDB to the list of Known Users [#7716](https://github.com/apache/datafusion/pull/7716) (alamb) +- Document crate feature flags [#7713](https://github.com/apache/datafusion/pull/7713) (alamb) **Merged pull requests:** -- Prepare 31.0.0 release [#7508](https://github.com/apache/arrow-datafusion/pull/7508) (andygrove) -- Minor(proto): Implement `TryFrom<&DFSchema>` for `protobuf::DfSchema` [#7505](https://github.com/apache/arrow-datafusion/pull/7505) (jonahgao) -- fix: inconsistent behaviors when dividing floating numbers by zero [#7503](https://github.com/apache/arrow-datafusion/pull/7503) (jonahgao) -- Parallelize Stateless (CSV/JSON) File Write Serialization [#7452](https://github.com/apache/arrow-datafusion/pull/7452) (devinjdangelo) -- Minor: Remove stray comment markings from encoding error message [#7512](https://github.com/apache/arrow-datafusion/pull/7512) (devinjdangelo) -- Remove implicit interval type coercion from ScalarValue comparison [#7514](https://github.com/apache/arrow-datafusion/pull/7514) (tustvold) -- Minor: deprecate ScalarValue::get_datatype() [#7507](https://github.com/apache/arrow-datafusion/pull/7507) (Weijun-H) -- Propagate error from spawned task reading spills [#7510](https://github.com/apache/arrow-datafusion/pull/7510) (viirya) -- Refactor the EnforceDistribution Rule [#7488](https://github.com/apache/arrow-datafusion/pull/7488) (mustafasrepo) -- Remove get_scan_files and ExecutionPlan::file_scan_config (#7357) [#7487](https://github.com/apache/arrow-datafusion/pull/7487) (tustvold) -- Simplify ScalarValue::distance (#7517) [#7519](https://github.com/apache/arrow-datafusion/pull/7519) (tustvold) -- typo: change `delimeter` to `delimiter` [#7521](https://github.com/apache/arrow-datafusion/pull/7521) (Weijun-H) -- Fix some simplification rules for floating-point arithmetic operations [#7515](https://github.com/apache/arrow-datafusion/pull/7515) (jonahgao) -- Documentation Updates for New Write Related Features [#7520](https://github.com/apache/arrow-datafusion/pull/7520) (devinjdangelo) -- [MINOR]: Move tests from repartition to enforce_distribution file [#7539](https://github.com/apache/arrow-datafusion/pull/7539) (mustafasrepo) -- Update the async-trait crate to resolve clippy bug [#7541](https://github.com/apache/arrow-datafusion/pull/7541) (metesynnada) -- Fix flaky `test_sort_fetch_memory_calculation` test [#7534](https://github.com/apache/arrow-datafusion/pull/7534) (viirya) -- Move common code to utils [#7545](https://github.com/apache/arrow-datafusion/pull/7545) (mustafasrepo) -- Minor: Add comments and clearer constructors to `Interval` [#7526](https://github.com/apache/arrow-datafusion/pull/7526) (alamb) -- fix: skip EliminateCrossJoin rule if inner join with filter is found [#7529](https://github.com/apache/arrow-datafusion/pull/7529) (epsio-banay) -- Create a Priority Queue based Aggregation with `limit` [#7192](https://github.com/apache/arrow-datafusion/pull/7192) (avantgardnerio) -- feat: add guarantees to simplification [#7467](https://github.com/apache/arrow-datafusion/pull/7467) (wjones127) -- [Minor]: Produce better plan when group by contains all of the ordering requirements [#7542](https://github.com/apache/arrow-datafusion/pull/7542) (mustafasrepo) -- Minor: beautify interval display [#7554](https://github.com/apache/arrow-datafusion/pull/7554) (Weijun-H) -- replace ptree with termtree [#7560](https://github.com/apache/arrow-datafusion/pull/7560) (avantgardnerio) -- Make AvroArrowArrayReader possible to scan Avro backed table which contains nested records [#7525](https://github.com/apache/arrow-datafusion/pull/7525) (sarutak) -- Fix a race condition issue on reading spilled file [#7538](https://github.com/apache/arrow-datafusion/pull/7538) (sarutak) -- [MINOR]: Add is single method [#7558](https://github.com/apache/arrow-datafusion/pull/7558) (mustafasrepo) -- Fix `describe ` to work without SessionContext [#7441](https://github.com/apache/arrow-datafusion/pull/7441) (alamb) -- Make the tests in SHJ faster [#7543](https://github.com/apache/arrow-datafusion/pull/7543) (metesynnada) -- feat: Support spilling for hash aggregation [#7400](https://github.com/apache/arrow-datafusion/pull/7400) (kazuyukitanimura) -- Make backtrace as a cargo feature [#7527](https://github.com/apache/arrow-datafusion/pull/7527) (comphead) -- Minor: Fix `clippy` by switching to `timestamp_nanos_opt` instead of (deprecated) `timestamp_nanos` [#7572](https://github.com/apache/arrow-datafusion/pull/7572) (alamb) -- Update sqllogictest requirement from 0.15.0 to 0.16.0 [#7569](https://github.com/apache/arrow-datafusion/pull/7569) (dependabot[bot]) -- extract `datafusion-physical-plan` to its own crate [#7432](https://github.com/apache/arrow-datafusion/pull/7432) (alamb) -- First and Last Accumulators should update with state row excluding is_set flag [#7565](https://github.com/apache/arrow-datafusion/pull/7565) (viirya) -- refactor: simplify code of eliminate_cross_join.rs [#7561](https://github.com/apache/arrow-datafusion/pull/7561) (jackwener) -- Update release instructions for datafusion-physical-plan crate [#7576](https://github.com/apache/arrow-datafusion/pull/7576) (alamb) -- Minor: Update chrono pin to `0.4.31` [#7575](https://github.com/apache/arrow-datafusion/pull/7575) (alamb) -- [feat] Introduce cacheManager in session ctx and make StatisticsCache share in session [#7570](https://github.com/apache/arrow-datafusion/pull/7570) (Ted-Jiang) -- Enhance/Refactor Ordering Equivalence Properties [#7566](https://github.com/apache/arrow-datafusion/pull/7566) (mustafasrepo) -- fix misplaced statements in sqllogictest [#7586](https://github.com/apache/arrow-datafusion/pull/7586) (jonahgao) -- Update substrait requirement from 0.13.1 to 0.14.0 [#7585](https://github.com/apache/arrow-datafusion/pull/7585) (dependabot[bot]) -- chore: use the `create_udwf` function in `simple_udwf`, consistent with `simple_udf` and `simple_udaf` [#7579](https://github.com/apache/arrow-datafusion/pull/7579) (tanruixiang) -- Implement protobuf serialization for AnalyzeExec [#7574](https://github.com/apache/arrow-datafusion/pull/7574) (adhish20) -- chore: fix catalog's usage docs error and add docs about `CatalogList` trait [#7582](https://github.com/apache/arrow-datafusion/pull/7582) (tanruixiang) -- Implement `CardinalityAwareRowConverter` while doing streaming merge [#7401](https://github.com/apache/arrow-datafusion/pull/7401) (JayjeetAtGithub) -- Parallelize Parquet Serialization [#7562](https://github.com/apache/arrow-datafusion/pull/7562) (devinjdangelo) -- feat: natively support more data types for the `abs` function. [#7568](https://github.com/apache/arrow-datafusion/pull/7568) (jonahgao) -- implement string_to_array [#7577](https://github.com/apache/arrow-datafusion/pull/7577) (casperhart) -- Create 2023 Q4 roadmap [#7551](https://github.com/apache/arrow-datafusion/pull/7551) (graydenshand) -- chore: reduce `physical-plan` dependencies [#7599](https://github.com/apache/arrow-datafusion/pull/7599) (crepererum) -- Minor: add githubs start/fork buttons to documentation page [#7588](https://github.com/apache/arrow-datafusion/pull/7588) (alamb) -- Minor: add more examples for `CREATE EXTERNAL TABLE` doc [#7594](https://github.com/apache/arrow-datafusion/pull/7594) (comphead) -- Update nix requirement from 0.26.1 to 0.27.1 [#7438](https://github.com/apache/arrow-datafusion/pull/7438) (dependabot[bot]) -- Update sqllogictest requirement from 0.16.0 to 0.17.0 [#7606](https://github.com/apache/arrow-datafusion/pull/7606) (dependabot[bot]) -- Fix panic in TopK [#7609](https://github.com/apache/arrow-datafusion/pull/7609) (avantgardnerio) -- Move `FileCompressionType` out of `common` and into `core` [#7596](https://github.com/apache/arrow-datafusion/pull/7596) (haohuaijin) -- Expose contents of Constraints [#7603](https://github.com/apache/arrow-datafusion/pull/7603) (tv42) -- Change the unbounded_output API default [#7605](https://github.com/apache/arrow-datafusion/pull/7605) (metesynnada) -- feat: Parallel collecting parquet files statistics #7573 [#7595](https://github.com/apache/arrow-datafusion/pull/7595) (hengfeiyang) -- Support hashing List columns [#7616](https://github.com/apache/arrow-datafusion/pull/7616) (jonmmease) -- [MINOR] Make the sink input aware of its plan [#7610](https://github.com/apache/arrow-datafusion/pull/7610) (metesynnada) -- [MINOR] Reduce complexity on SHJ [#7607](https://github.com/apache/arrow-datafusion/pull/7607) (metesynnada) -- feat: Better large output display in datafusion-cli with --maxrows option [#7617](https://github.com/apache/arrow-datafusion/pull/7617) (2010YOUY01) -- Minor: add examples for `arrow_cast` and `arrow_typeof` to user guide [#7615](https://github.com/apache/arrow-datafusion/pull/7615) (alamb) -- [MINOR]: Fix stack overflow bug for get field access expr [#7623](https://github.com/apache/arrow-datafusion/pull/7623) (mustafasrepo) -- Group By All [#7622](https://github.com/apache/arrow-datafusion/pull/7622) (berkaysynnada) -- Implement protobuf serialization for `(Bounded)WindowAggExec`. [#7557](https://github.com/apache/arrow-datafusion/pull/7557) (vrongmeal) -- Make it possible to compile datafusion-common without default features [#7625](https://github.com/apache/arrow-datafusion/pull/7625) (jonmmease) -- Minor: Adding backtrace documentation [#7628](https://github.com/apache/arrow-datafusion/pull/7628) (comphead) -- fix(5975/5976): timezone handling for timestamps and `date_trunc`, `date_part` and `date_bin` [#7614](https://github.com/apache/arrow-datafusion/pull/7614) (wiedld) -- Minor: remove unecessary `Arc`s in datetime_expressions [#7630](https://github.com/apache/arrow-datafusion/pull/7630) (alamb) -- fix: check for precision overflow when parsing float as decimal [#7627](https://github.com/apache/arrow-datafusion/pull/7627) (jonahgao) -- Update arrow 47.0.0 in DataFusion [#7587](https://github.com/apache/arrow-datafusion/pull/7587) (tustvold) -- Add test crate to compile DataFusion with wasm-pack [#7633](https://github.com/apache/arrow-datafusion/pull/7633) (jonmmease) -- Minor: Update documentation of case expression [#7646](https://github.com/apache/arrow-datafusion/pull/7646) (ongchi) -- Minor: improve docstrings on `SessionState` [#7654](https://github.com/apache/arrow-datafusion/pull/7654) (alamb) -- Update example in the DataFrame documentation. [#7650](https://github.com/apache/arrow-datafusion/pull/7650) (jsimpson-gro) -- Add HTTP object store example [#7602](https://github.com/apache/arrow-datafusion/pull/7602) (pka) -- feat: make parse_float_as_decimal work on negative numbers [#7648](https://github.com/apache/arrow-datafusion/pull/7648) (jonahgao) -- Minor: add doc comments to `ExtractEquijoinPredicate` [#7658](https://github.com/apache/arrow-datafusion/pull/7658) (alamb) -- [MINOR]: Do not add unnecessary hash repartition to the physical plan [#7667](https://github.com/apache/arrow-datafusion/pull/7667) (mustafasrepo) -- Minor: add ticket references to parallel parquet writing code [#7592](https://github.com/apache/arrow-datafusion/pull/7592) (alamb) -- Minor: Add ticket reference and add test comment [#7593](https://github.com/apache/arrow-datafusion/pull/7593) (alamb) -- Support Avro's Enum type and Fixed type [#7635](https://github.com/apache/arrow-datafusion/pull/7635) (sarutak) -- Minor: Migrate datafusion-proto tests into it own binary [#7668](https://github.com/apache/arrow-datafusion/pull/7668) (ongchi) -- Upgrade apache-avro to 0.16 [#7674](https://github.com/apache/arrow-datafusion/pull/7674) (sarutak) -- Move window analysis to the window method [#7672](https://github.com/apache/arrow-datafusion/pull/7672) (mustafasrepo) -- Don't add filters to projection in TableScan [#7670](https://github.com/apache/arrow-datafusion/pull/7670) (Dandandan) -- Minor: Improve `TableProviderFilterPushDown` docs [#7685](https://github.com/apache/arrow-datafusion/pull/7685) (alamb) -- FIX: Test timestamp with table [#7701](https://github.com/apache/arrow-datafusion/pull/7701) (jayzhan211) -- Fix bug in `SimplifyExpressions` [#7699](https://github.com/apache/arrow-datafusion/pull/7699) (Dandandan) -- Enhance Enforce Dist capabilities to fix, sub optimal bad plans [#7671](https://github.com/apache/arrow-datafusion/pull/7671) (mustafasrepo) -- docs: add section on supports_filters_pushdown [#7680](https://github.com/apache/arrow-datafusion/pull/7680) (tshauck) -- Improve cache usage in CI [#7678](https://github.com/apache/arrow-datafusion/pull/7678) (sarutak) -- fix: substrait limit when fetch is None [#7669](https://github.com/apache/arrow-datafusion/pull/7669) (waynexia) -- minor: revert parsing precedence between Aggr and UDAF [#7682](https://github.com/apache/arrow-datafusion/pull/7682) (waynexia) -- Minor: Move hash utils to common [#7684](https://github.com/apache/arrow-datafusion/pull/7684) (jayzhan211) -- Update Default Parquet Write Compression [#7692](https://github.com/apache/arrow-datafusion/pull/7692) (devinjdangelo) -- Stop using cache for the benchmark job [#7706](https://github.com/apache/arrow-datafusion/pull/7706) (sarutak) -- Change rust.yml to run benchmark [#7708](https://github.com/apache/arrow-datafusion/pull/7708) (sarutak) -- Extend infer_placeholder_types to support BETWEEN predicates [#7703](https://github.com/apache/arrow-datafusion/pull/7703) (andrelmartins) -- Minor: Add comment explaining why verify benchmark results uses release mode [#7712](https://github.com/apache/arrow-datafusion/pull/7712) (alamb) -- Support all the codecs supported by Avro [#7718](https://github.com/apache/arrow-datafusion/pull/7718) (sarutak) -- Update substrait requirement from 0.14.0 to 0.15.0 [#7719](https://github.com/apache/arrow-datafusion/pull/7719) (dependabot[bot]) -- fix: coerce text to timestamps with timezones [#7720](https://github.com/apache/arrow-datafusion/pull/7720) (mhilton) -- Add LanceDB to the list of Known Users [#7716](https://github.com/apache/arrow-datafusion/pull/7716) (alamb) -- Enable avro reading/writing in datafusion-cli [#7715](https://github.com/apache/arrow-datafusion/pull/7715) (alamb) -- Document crate feature flags [#7713](https://github.com/apache/arrow-datafusion/pull/7713) (alamb) -- Minor: Consolidate UDF tests [#7704](https://github.com/apache/arrow-datafusion/pull/7704) (alamb) -- Minor: fix CI failure due to Cargo.lock in datafusioncli [#7733](https://github.com/apache/arrow-datafusion/pull/7733) (yjshen) -- MINOR: change file to column index in page_filter trace log [#7730](https://github.com/apache/arrow-datafusion/pull/7730) (mapleFU) -- preserve array type / timezone in `date_bin` and `date_trunc` functions [#7729](https://github.com/apache/arrow-datafusion/pull/7729) (mhilton) -- Remove redundant is_numeric for DataType [#7734](https://github.com/apache/arrow-datafusion/pull/7734) (qrilka) -- fix: avro_to_arrow: Handle avro nested nullable struct (union) [#7663](https://github.com/apache/arrow-datafusion/pull/7663) (Samrose-Ahmed) -- Rename `SessionContext::with_config_rt` to `SessionContext::new_with_config_from_rt`, etc [#7631](https://github.com/apache/arrow-datafusion/pull/7631) (alamb) -- Rename `bounded_order_preserving_variants` config to `prefer_exising_sort` and update docs [#7723](https://github.com/apache/arrow-datafusion/pull/7723) (alamb) -- Optimize "ORDER BY + LIMIT" queries for speed / memory with special TopK operator [#7721](https://github.com/apache/arrow-datafusion/pull/7721) (Dandandan) -- Minor: Improve crate docs [#7740](https://github.com/apache/arrow-datafusion/pull/7740) (alamb) -- [MINOR]: Resolve linter errors in the main [#7753](https://github.com/apache/arrow-datafusion/pull/7753) (mustafasrepo) -- Minor: Build concat_internal() with ListArray construction instead of ArrayData [#7748](https://github.com/apache/arrow-datafusion/pull/7748) (jayzhan211) -- Minor: Add comment on input_schema from AggregateExec [#7727](https://github.com/apache/arrow-datafusion/pull/7727) (viirya) -- Fix column name for COUNT(\*) set by AggregateStatistics [#7757](https://github.com/apache/arrow-datafusion/pull/7757) (qrilka) -- Add documentation about type signatures, and export `TIMEZONE_WILDCARD` [#7726](https://github.com/apache/arrow-datafusion/pull/7726) (alamb) -- [feat] Support cache ListFiles result cache in session level [#7620](https://github.com/apache/arrow-datafusion/pull/7620) (Ted-Jiang) -- Support `SHOW ALL VERBOSE` to show settings description [#7735](https://github.com/apache/arrow-datafusion/pull/7735) (comphead) +- Prepare 31.0.0 release [#7508](https://github.com/apache/datafusion/pull/7508) (andygrove) +- Minor(proto): Implement `TryFrom<&DFSchema>` for `protobuf::DfSchema` [#7505](https://github.com/apache/datafusion/pull/7505) (jonahgao) +- fix: inconsistent behaviors when dividing floating numbers by zero [#7503](https://github.com/apache/datafusion/pull/7503) (jonahgao) +- Parallelize Stateless (CSV/JSON) File Write Serialization [#7452](https://github.com/apache/datafusion/pull/7452) (devinjdangelo) +- Minor: Remove stray comment markings from encoding error message [#7512](https://github.com/apache/datafusion/pull/7512) (devinjdangelo) +- Remove implicit interval type coercion from ScalarValue comparison [#7514](https://github.com/apache/datafusion/pull/7514) (tustvold) +- Minor: deprecate ScalarValue::get_datatype() [#7507](https://github.com/apache/datafusion/pull/7507) (Weijun-H) +- Propagate error from spawned task reading spills [#7510](https://github.com/apache/datafusion/pull/7510) (viirya) +- Refactor the EnforceDistribution Rule [#7488](https://github.com/apache/datafusion/pull/7488) (mustafasrepo) +- Remove get_scan_files and ExecutionPlan::file_scan_config (#7357) [#7487](https://github.com/apache/datafusion/pull/7487) (tustvold) +- Simplify ScalarValue::distance (#7517) [#7519](https://github.com/apache/datafusion/pull/7519) (tustvold) +- typo: change `delimeter` to `delimiter` [#7521](https://github.com/apache/datafusion/pull/7521) (Weijun-H) +- Fix some simplification rules for floating-point arithmetic operations [#7515](https://github.com/apache/datafusion/pull/7515) (jonahgao) +- Documentation Updates for New Write Related Features [#7520](https://github.com/apache/datafusion/pull/7520) (devinjdangelo) +- [MINOR]: Move tests from repartition to enforce_distribution file [#7539](https://github.com/apache/datafusion/pull/7539) (mustafasrepo) +- Update the async-trait crate to resolve clippy bug [#7541](https://github.com/apache/datafusion/pull/7541) (metesynnada) +- Fix flaky `test_sort_fetch_memory_calculation` test [#7534](https://github.com/apache/datafusion/pull/7534) (viirya) +- Move common code to utils [#7545](https://github.com/apache/datafusion/pull/7545) (mustafasrepo) +- Minor: Add comments and clearer constructors to `Interval` [#7526](https://github.com/apache/datafusion/pull/7526) (alamb) +- fix: skip EliminateCrossJoin rule if inner join with filter is found [#7529](https://github.com/apache/datafusion/pull/7529) (epsio-banay) +- Create a Priority Queue based Aggregation with `limit` [#7192](https://github.com/apache/datafusion/pull/7192) (avantgardnerio) +- feat: add guarantees to simplification [#7467](https://github.com/apache/datafusion/pull/7467) (wjones127) +- [Minor]: Produce better plan when group by contains all of the ordering requirements [#7542](https://github.com/apache/datafusion/pull/7542) (mustafasrepo) +- Minor: beautify interval display [#7554](https://github.com/apache/datafusion/pull/7554) (Weijun-H) +- replace ptree with termtree [#7560](https://github.com/apache/datafusion/pull/7560) (avantgardnerio) +- Make AvroArrowArrayReader possible to scan Avro backed table which contains nested records [#7525](https://github.com/apache/datafusion/pull/7525) (sarutak) +- Fix a race condition issue on reading spilled file [#7538](https://github.com/apache/datafusion/pull/7538) (sarutak) +- [MINOR]: Add is single method [#7558](https://github.com/apache/datafusion/pull/7558) (mustafasrepo) +- Fix `describe
` to work without SessionContext [#7441](https://github.com/apache/datafusion/pull/7441) (alamb) +- Make the tests in SHJ faster [#7543](https://github.com/apache/datafusion/pull/7543) (metesynnada) +- feat: Support spilling for hash aggregation [#7400](https://github.com/apache/datafusion/pull/7400) (kazuyukitanimura) +- Make backtrace as a cargo feature [#7527](https://github.com/apache/datafusion/pull/7527) (comphead) +- Minor: Fix `clippy` by switching to `timestamp_nanos_opt` instead of (deprecated) `timestamp_nanos` [#7572](https://github.com/apache/datafusion/pull/7572) (alamb) +- Update sqllogictest requirement from 0.15.0 to 0.16.0 [#7569](https://github.com/apache/datafusion/pull/7569) (dependabot[bot]) +- extract `datafusion-physical-plan` to its own crate [#7432](https://github.com/apache/datafusion/pull/7432) (alamb) +- First and Last Accumulators should update with state row excluding is_set flag [#7565](https://github.com/apache/datafusion/pull/7565) (viirya) +- refactor: simplify code of eliminate_cross_join.rs [#7561](https://github.com/apache/datafusion/pull/7561) (jackwener) +- Update release instructions for datafusion-physical-plan crate [#7576](https://github.com/apache/datafusion/pull/7576) (alamb) +- Minor: Update chrono pin to `0.4.31` [#7575](https://github.com/apache/datafusion/pull/7575) (alamb) +- [feat] Introduce cacheManager in session ctx and make StatisticsCache share in session [#7570](https://github.com/apache/datafusion/pull/7570) (Ted-Jiang) +- Enhance/Refactor Ordering Equivalence Properties [#7566](https://github.com/apache/datafusion/pull/7566) (mustafasrepo) +- fix misplaced statements in sqllogictest [#7586](https://github.com/apache/datafusion/pull/7586) (jonahgao) +- Update substrait requirement from 0.13.1 to 0.14.0 [#7585](https://github.com/apache/datafusion/pull/7585) (dependabot[bot]) +- chore: use the `create_udwf` function in `simple_udwf`, consistent with `simple_udf` and `simple_udaf` [#7579](https://github.com/apache/datafusion/pull/7579) (tanruixiang) +- Implement protobuf serialization for AnalyzeExec [#7574](https://github.com/apache/datafusion/pull/7574) (adhish20) +- chore: fix catalog's usage docs error and add docs about `CatalogList` trait [#7582](https://github.com/apache/datafusion/pull/7582) (tanruixiang) +- Implement `CardinalityAwareRowConverter` while doing streaming merge [#7401](https://github.com/apache/datafusion/pull/7401) (JayjeetAtGithub) +- Parallelize Parquet Serialization [#7562](https://github.com/apache/datafusion/pull/7562) (devinjdangelo) +- feat: natively support more data types for the `abs` function. [#7568](https://github.com/apache/datafusion/pull/7568) (jonahgao) +- implement string_to_array [#7577](https://github.com/apache/datafusion/pull/7577) (casperhart) +- Create 2023 Q4 roadmap [#7551](https://github.com/apache/datafusion/pull/7551) (graydenshand) +- chore: reduce `physical-plan` dependencies [#7599](https://github.com/apache/datafusion/pull/7599) (crepererum) +- Minor: add githubs start/fork buttons to documentation page [#7588](https://github.com/apache/datafusion/pull/7588) (alamb) +- Minor: add more examples for `CREATE EXTERNAL TABLE` doc [#7594](https://github.com/apache/datafusion/pull/7594) (comphead) +- Update nix requirement from 0.26.1 to 0.27.1 [#7438](https://github.com/apache/datafusion/pull/7438) (dependabot[bot]) +- Update sqllogictest requirement from 0.16.0 to 0.17.0 [#7606](https://github.com/apache/datafusion/pull/7606) (dependabot[bot]) +- Fix panic in TopK [#7609](https://github.com/apache/datafusion/pull/7609) (avantgardnerio) +- Move `FileCompressionType` out of `common` and into `core` [#7596](https://github.com/apache/datafusion/pull/7596) (haohuaijin) +- Expose contents of Constraints [#7603](https://github.com/apache/datafusion/pull/7603) (tv42) +- Change the unbounded_output API default [#7605](https://github.com/apache/datafusion/pull/7605) (metesynnada) +- feat: Parallel collecting parquet files statistics #7573 [#7595](https://github.com/apache/datafusion/pull/7595) (hengfeiyang) +- Support hashing List columns [#7616](https://github.com/apache/datafusion/pull/7616) (jonmmease) +- [MINOR] Make the sink input aware of its plan [#7610](https://github.com/apache/datafusion/pull/7610) (metesynnada) +- [MINOR] Reduce complexity on SHJ [#7607](https://github.com/apache/datafusion/pull/7607) (metesynnada) +- feat: Better large output display in datafusion-cli with --maxrows option [#7617](https://github.com/apache/datafusion/pull/7617) (2010YOUY01) +- Minor: add examples for `arrow_cast` and `arrow_typeof` to user guide [#7615](https://github.com/apache/datafusion/pull/7615) (alamb) +- [MINOR]: Fix stack overflow bug for get field access expr [#7623](https://github.com/apache/datafusion/pull/7623) (mustafasrepo) +- Group By All [#7622](https://github.com/apache/datafusion/pull/7622) (berkaysynnada) +- Implement protobuf serialization for `(Bounded)WindowAggExec`. [#7557](https://github.com/apache/datafusion/pull/7557) (vrongmeal) +- Make it possible to compile datafusion-common without default features [#7625](https://github.com/apache/datafusion/pull/7625) (jonmmease) +- Minor: Adding backtrace documentation [#7628](https://github.com/apache/datafusion/pull/7628) (comphead) +- fix(5975/5976): timezone handling for timestamps and `date_trunc`, `date_part` and `date_bin` [#7614](https://github.com/apache/datafusion/pull/7614) (wiedld) +- Minor: remove unecessary `Arc`s in datetime_expressions [#7630](https://github.com/apache/datafusion/pull/7630) (alamb) +- fix: check for precision overflow when parsing float as decimal [#7627](https://github.com/apache/datafusion/pull/7627) (jonahgao) +- Update arrow 47.0.0 in DataFusion [#7587](https://github.com/apache/datafusion/pull/7587) (tustvold) +- Add test crate to compile DataFusion with wasm-pack [#7633](https://github.com/apache/datafusion/pull/7633) (jonmmease) +- Minor: Update documentation of case expression [#7646](https://github.com/apache/datafusion/pull/7646) (ongchi) +- Minor: improve docstrings on `SessionState` [#7654](https://github.com/apache/datafusion/pull/7654) (alamb) +- Update example in the DataFrame documentation. [#7650](https://github.com/apache/datafusion/pull/7650) (jsimpson-gro) +- Add HTTP object store example [#7602](https://github.com/apache/datafusion/pull/7602) (pka) +- feat: make parse_float_as_decimal work on negative numbers [#7648](https://github.com/apache/datafusion/pull/7648) (jonahgao) +- Minor: add doc comments to `ExtractEquijoinPredicate` [#7658](https://github.com/apache/datafusion/pull/7658) (alamb) +- [MINOR]: Do not add unnecessary hash repartition to the physical plan [#7667](https://github.com/apache/datafusion/pull/7667) (mustafasrepo) +- Minor: add ticket references to parallel parquet writing code [#7592](https://github.com/apache/datafusion/pull/7592) (alamb) +- Minor: Add ticket reference and add test comment [#7593](https://github.com/apache/datafusion/pull/7593) (alamb) +- Support Avro's Enum type and Fixed type [#7635](https://github.com/apache/datafusion/pull/7635) (sarutak) +- Minor: Migrate datafusion-proto tests into it own binary [#7668](https://github.com/apache/datafusion/pull/7668) (ongchi) +- Upgrade apache-avro to 0.16 [#7674](https://github.com/apache/datafusion/pull/7674) (sarutak) +- Move window analysis to the window method [#7672](https://github.com/apache/datafusion/pull/7672) (mustafasrepo) +- Don't add filters to projection in TableScan [#7670](https://github.com/apache/datafusion/pull/7670) (Dandandan) +- Minor: Improve `TableProviderFilterPushDown` docs [#7685](https://github.com/apache/datafusion/pull/7685) (alamb) +- FIX: Test timestamp with table [#7701](https://github.com/apache/datafusion/pull/7701) (jayzhan211) +- Fix bug in `SimplifyExpressions` [#7699](https://github.com/apache/datafusion/pull/7699) (Dandandan) +- Enhance Enforce Dist capabilities to fix, sub optimal bad plans [#7671](https://github.com/apache/datafusion/pull/7671) (mustafasrepo) +- docs: add section on supports_filters_pushdown [#7680](https://github.com/apache/datafusion/pull/7680) (tshauck) +- Improve cache usage in CI [#7678](https://github.com/apache/datafusion/pull/7678) (sarutak) +- fix: substrait limit when fetch is None [#7669](https://github.com/apache/datafusion/pull/7669) (waynexia) +- minor: revert parsing precedence between Aggr and UDAF [#7682](https://github.com/apache/datafusion/pull/7682) (waynexia) +- Minor: Move hash utils to common [#7684](https://github.com/apache/datafusion/pull/7684) (jayzhan211) +- Update Default Parquet Write Compression [#7692](https://github.com/apache/datafusion/pull/7692) (devinjdangelo) +- Stop using cache for the benchmark job [#7706](https://github.com/apache/datafusion/pull/7706) (sarutak) +- Change rust.yml to run benchmark [#7708](https://github.com/apache/datafusion/pull/7708) (sarutak) +- Extend infer_placeholder_types to support BETWEEN predicates [#7703](https://github.com/apache/datafusion/pull/7703) (andrelmartins) +- Minor: Add comment explaining why verify benchmark results uses release mode [#7712](https://github.com/apache/datafusion/pull/7712) (alamb) +- Support all the codecs supported by Avro [#7718](https://github.com/apache/datafusion/pull/7718) (sarutak) +- Update substrait requirement from 0.14.0 to 0.15.0 [#7719](https://github.com/apache/datafusion/pull/7719) (dependabot[bot]) +- fix: coerce text to timestamps with timezones [#7720](https://github.com/apache/datafusion/pull/7720) (mhilton) +- Add LanceDB to the list of Known Users [#7716](https://github.com/apache/datafusion/pull/7716) (alamb) +- Enable avro reading/writing in datafusion-cli [#7715](https://github.com/apache/datafusion/pull/7715) (alamb) +- Document crate feature flags [#7713](https://github.com/apache/datafusion/pull/7713) (alamb) +- Minor: Consolidate UDF tests [#7704](https://github.com/apache/datafusion/pull/7704) (alamb) +- Minor: fix CI failure due to Cargo.lock in datafusioncli [#7733](https://github.com/apache/datafusion/pull/7733) (yjshen) +- MINOR: change file to column index in page_filter trace log [#7730](https://github.com/apache/datafusion/pull/7730) (mapleFU) +- preserve array type / timezone in `date_bin` and `date_trunc` functions [#7729](https://github.com/apache/datafusion/pull/7729) (mhilton) +- Remove redundant is_numeric for DataType [#7734](https://github.com/apache/datafusion/pull/7734) (qrilka) +- fix: avro_to_arrow: Handle avro nested nullable struct (union) [#7663](https://github.com/apache/datafusion/pull/7663) (Samrose-Ahmed) +- Rename `SessionContext::with_config_rt` to `SessionContext::new_with_config_from_rt`, etc [#7631](https://github.com/apache/datafusion/pull/7631) (alamb) +- Rename `bounded_order_preserving_variants` config to `prefer_exising_sort` and update docs [#7723](https://github.com/apache/datafusion/pull/7723) (alamb) +- Optimize "ORDER BY + LIMIT" queries for speed / memory with special TopK operator [#7721](https://github.com/apache/datafusion/pull/7721) (Dandandan) +- Minor: Improve crate docs [#7740](https://github.com/apache/datafusion/pull/7740) (alamb) +- [MINOR]: Resolve linter errors in the main [#7753](https://github.com/apache/datafusion/pull/7753) (mustafasrepo) +- Minor: Build concat_internal() with ListArray construction instead of ArrayData [#7748](https://github.com/apache/datafusion/pull/7748) (jayzhan211) +- Minor: Add comment on input_schema from AggregateExec [#7727](https://github.com/apache/datafusion/pull/7727) (viirya) +- Fix column name for COUNT(\*) set by AggregateStatistics [#7757](https://github.com/apache/datafusion/pull/7757) (qrilka) +- Add documentation about type signatures, and export `TIMEZONE_WILDCARD` [#7726](https://github.com/apache/datafusion/pull/7726) (alamb) +- [feat] Support cache ListFiles result cache in session level [#7620](https://github.com/apache/datafusion/pull/7620) (Ted-Jiang) +- Support `SHOW ALL VERBOSE` to show settings description [#7735](https://github.com/apache/datafusion/pull/7735) (comphead) diff --git a/dev/changelog/33.0.0.md b/dev/changelog/33.0.0.md index 17862a64a951..af9a45d4e5cf 100644 --- a/dev/changelog/33.0.0.md +++ b/dev/changelog/33.0.0.md @@ -17,276 +17,276 @@ under the License. --> -## [33.0.0](https://github.com/apache/arrow-datafusion/tree/33.0.0) (2023-11-12) +## [33.0.0](https://github.com/apache/datafusion/tree/33.0.0) (2023-11-12) -[Full Changelog](https://github.com/apache/arrow-datafusion/compare/32.0.0...33.0.0) +[Full Changelog](https://github.com/apache/datafusion/compare/32.0.0...33.0.0) **Breaking changes:** -- Refactor Statistics, introduce precision estimates (`Exact`, `Inexact`, `Absent`) [#7793](https://github.com/apache/arrow-datafusion/pull/7793) (berkaysynnada) -- Remove redundant unwrap in `ScalarValue::new_primitive`, return a `Result` [#7830](https://github.com/apache/arrow-datafusion/pull/7830) (maruschin) -- Add `parquet` feature flag, enabled by default, and make parquet conditional [#7745](https://github.com/apache/arrow-datafusion/pull/7745) (ongchi) -- Change input for `to_timestamp` function to be seconds rather than nanoseconds, add `to_timestamp_nanos` [#7844](https://github.com/apache/arrow-datafusion/pull/7844) (comphead) -- Percent Decode URL Paths (#8009) [#8012](https://github.com/apache/arrow-datafusion/pull/8012) (tustvold) -- chore: remove panics in datafusion-common::scalar by making more operations return `Result` [#7901](https://github.com/apache/arrow-datafusion/pull/7901) (junjunjd) -- Combine `Expr::Wildcard` and `Wxpr::QualifiedWildcard`, add `wildcard()` expr fn [#8105](https://github.com/apache/arrow-datafusion/pull/8105) (alamb) +- Refactor Statistics, introduce precision estimates (`Exact`, `Inexact`, `Absent`) [#7793](https://github.com/apache/datafusion/pull/7793) (berkaysynnada) +- Remove redundant unwrap in `ScalarValue::new_primitive`, return a `Result` [#7830](https://github.com/apache/datafusion/pull/7830) (maruschin) +- Add `parquet` feature flag, enabled by default, and make parquet conditional [#7745](https://github.com/apache/datafusion/pull/7745) (ongchi) +- Change input for `to_timestamp` function to be seconds rather than nanoseconds, add `to_timestamp_nanos` [#7844](https://github.com/apache/datafusion/pull/7844) (comphead) +- Percent Decode URL Paths (#8009) [#8012](https://github.com/apache/datafusion/pull/8012) (tustvold) +- chore: remove panics in datafusion-common::scalar by making more operations return `Result` [#7901](https://github.com/apache/datafusion/pull/7901) (junjunjd) +- Combine `Expr::Wildcard` and `Wxpr::QualifiedWildcard`, add `wildcard()` expr fn [#8105](https://github.com/apache/datafusion/pull/8105) (alamb) **Performance related:** -- Add distinct union optimization [#7788](https://github.com/apache/arrow-datafusion/pull/7788) (maruschin) -- Fix join order for TPCH Q17 & Q18 by improving FilterExec statistics [#8126](https://github.com/apache/arrow-datafusion/pull/8126) (andygrove) -- feat: add column statistics into explain [#8112](https://github.com/apache/arrow-datafusion/pull/8112) (NGA-TRAN) +- Add distinct union optimization [#7788](https://github.com/apache/datafusion/pull/7788) (maruschin) +- Fix join order for TPCH Q17 & Q18 by improving FilterExec statistics [#8126](https://github.com/apache/datafusion/pull/8126) (andygrove) +- feat: add column statistics into explain [#8112](https://github.com/apache/datafusion/pull/8112) (NGA-TRAN) **Implemented enhancements:** -- Support InsertInto Sorted ListingTable [#7743](https://github.com/apache/arrow-datafusion/pull/7743) (devinjdangelo) -- External Table Primary key support [#7755](https://github.com/apache/arrow-datafusion/pull/7755) (mustafasrepo) -- add interval arithmetic for timestamp types [#7758](https://github.com/apache/arrow-datafusion/pull/7758) (mhilton) -- Interval Arithmetic NegativeExpr Support [#7804](https://github.com/apache/arrow-datafusion/pull/7804) (berkaysynnada) -- Exactness Indicator of Parameters: Precision [#7809](https://github.com/apache/arrow-datafusion/pull/7809) (berkaysynnada) -- Implement GetIndexedField for map-typed columns [#7825](https://github.com/apache/arrow-datafusion/pull/7825) (swgillespie) -- Fix precision loss when coercing date_part utf8 argument [#7846](https://github.com/apache/arrow-datafusion/pull/7846) (Dandandan) -- Support `Binary`/`LargeBinary` --> `Utf8`/`LargeUtf8` in ilike and string functions [#7840](https://github.com/apache/arrow-datafusion/pull/7840) (alamb) -- Support Decimal256 on AVG aggregate expression [#7853](https://github.com/apache/arrow-datafusion/pull/7853) (viirya) -- Support Decimal256 column in create external table [#7866](https://github.com/apache/arrow-datafusion/pull/7866) (viirya) -- Support Decimal256 in Min/Max aggregate expressions [#7881](https://github.com/apache/arrow-datafusion/pull/7881) (viirya) -- Implement Hive-Style Partitioned Write Support [#7801](https://github.com/apache/arrow-datafusion/pull/7801) (devinjdangelo) -- feat: support `Decimal256` for the `abs` function [#7904](https://github.com/apache/arrow-datafusion/pull/7904) (jonahgao) -- Parallelize Serialization of Columns within Parquet RowGroups [#7655](https://github.com/apache/arrow-datafusion/pull/7655) (devinjdangelo) -- feat: Use bloom filter when reading parquet to skip row groups [#7821](https://github.com/apache/arrow-datafusion/pull/7821) (hengfeiyang) -- Support Partitioning Data by Dictionary Encoded String Array Types [#7896](https://github.com/apache/arrow-datafusion/pull/7896) (devinjdangelo) -- Read only enough bytes to infer Arrow IPC file schema via stream [#7962](https://github.com/apache/arrow-datafusion/pull/7962) (Jefffrey) -- feat: Support determining extensions from names like `foo.parquet.snappy` as well as `foo.parquet` [#7972](https://github.com/apache/arrow-datafusion/pull/7972) (Weijun-H) -- feat: Protobuf serde for Json file sink [#8062](https://github.com/apache/arrow-datafusion/pull/8062) (Jefffrey) -- feat: support target table alias in update statement [#8080](https://github.com/apache/arrow-datafusion/pull/8080) (jonahgao) -- feat: support UDAF in substrait producer/consumer [#8119](https://github.com/apache/arrow-datafusion/pull/8119) (waynexia) +- Support InsertInto Sorted ListingTable [#7743](https://github.com/apache/datafusion/pull/7743) (devinjdangelo) +- External Table Primary key support [#7755](https://github.com/apache/datafusion/pull/7755) (mustafasrepo) +- add interval arithmetic for timestamp types [#7758](https://github.com/apache/datafusion/pull/7758) (mhilton) +- Interval Arithmetic NegativeExpr Support [#7804](https://github.com/apache/datafusion/pull/7804) (berkaysynnada) +- Exactness Indicator of Parameters: Precision [#7809](https://github.com/apache/datafusion/pull/7809) (berkaysynnada) +- Implement GetIndexedField for map-typed columns [#7825](https://github.com/apache/datafusion/pull/7825) (swgillespie) +- Fix precision loss when coercing date_part utf8 argument [#7846](https://github.com/apache/datafusion/pull/7846) (Dandandan) +- Support `Binary`/`LargeBinary` --> `Utf8`/`LargeUtf8` in ilike and string functions [#7840](https://github.com/apache/datafusion/pull/7840) (alamb) +- Support Decimal256 on AVG aggregate expression [#7853](https://github.com/apache/datafusion/pull/7853) (viirya) +- Support Decimal256 column in create external table [#7866](https://github.com/apache/datafusion/pull/7866) (viirya) +- Support Decimal256 in Min/Max aggregate expressions [#7881](https://github.com/apache/datafusion/pull/7881) (viirya) +- Implement Hive-Style Partitioned Write Support [#7801](https://github.com/apache/datafusion/pull/7801) (devinjdangelo) +- feat: support `Decimal256` for the `abs` function [#7904](https://github.com/apache/datafusion/pull/7904) (jonahgao) +- Parallelize Serialization of Columns within Parquet RowGroups [#7655](https://github.com/apache/datafusion/pull/7655) (devinjdangelo) +- feat: Use bloom filter when reading parquet to skip row groups [#7821](https://github.com/apache/datafusion/pull/7821) (hengfeiyang) +- Support Partitioning Data by Dictionary Encoded String Array Types [#7896](https://github.com/apache/datafusion/pull/7896) (devinjdangelo) +- Read only enough bytes to infer Arrow IPC file schema via stream [#7962](https://github.com/apache/datafusion/pull/7962) (Jefffrey) +- feat: Support determining extensions from names like `foo.parquet.snappy` as well as `foo.parquet` [#7972](https://github.com/apache/datafusion/pull/7972) (Weijun-H) +- feat: Protobuf serde for Json file sink [#8062](https://github.com/apache/datafusion/pull/8062) (Jefffrey) +- feat: support target table alias in update statement [#8080](https://github.com/apache/datafusion/pull/8080) (jonahgao) +- feat: support UDAF in substrait producer/consumer [#8119](https://github.com/apache/datafusion/pull/8119) (waynexia) **Fixed bugs:** -- fix: preserve column qualifier for `DataFrame::with_column` [#7792](https://github.com/apache/arrow-datafusion/pull/7792) (jonahgao) -- fix: don't push down volatile predicates in projection [#7909](https://github.com/apache/arrow-datafusion/pull/7909) (haohuaijin) -- fix: generate logical plan for `UPDATE SET FROM` statement [#7984](https://github.com/apache/arrow-datafusion/pull/7984) (jonahgao) -- fix: single_distinct_aggretation_to_group_by fail [#7997](https://github.com/apache/arrow-datafusion/pull/7997) (haohuaijin) -- fix: clippy warnings from nightly rust 1.75 [#8025](https://github.com/apache/arrow-datafusion/pull/8025) (waynexia) -- fix: DataFusion suggests invalid functions [#8083](https://github.com/apache/arrow-datafusion/pull/8083) (jonahgao) -- fix: add encode/decode to protobuf encoding [#8089](https://github.com/apache/arrow-datafusion/pull/8089) (Syleechan) +- fix: preserve column qualifier for `DataFrame::with_column` [#7792](https://github.com/apache/datafusion/pull/7792) (jonahgao) +- fix: don't push down volatile predicates in projection [#7909](https://github.com/apache/datafusion/pull/7909) (haohuaijin) +- fix: generate logical plan for `UPDATE SET FROM` statement [#7984](https://github.com/apache/datafusion/pull/7984) (jonahgao) +- fix: single_distinct_aggretation_to_group_by fail [#7997](https://github.com/apache/datafusion/pull/7997) (haohuaijin) +- fix: clippy warnings from nightly rust 1.75 [#8025](https://github.com/apache/datafusion/pull/8025) (waynexia) +- fix: DataFusion suggests invalid functions [#8083](https://github.com/apache/datafusion/pull/8083) (jonahgao) +- fix: add encode/decode to protobuf encoding [#8089](https://github.com/apache/datafusion/pull/8089) (Syleechan) **Documentation updates:** -- Minor: Improve TableProvider document, and add ascii art [#7759](https://github.com/apache/arrow-datafusion/pull/7759) (alamb) -- Expose arrow-schema `serde` crate feature flag [#7829](https://github.com/apache/arrow-datafusion/pull/7829) (lewiszlw) -- doc: fix ExecutionContext to SessionContext in custom-table-providers.md [#7903](https://github.com/apache/arrow-datafusion/pull/7903) (ZENOTME) -- Minor: Document `parquet` crate feature [#7927](https://github.com/apache/arrow-datafusion/pull/7927) (alamb) -- Add some initial content about creating logical plans [#7952](https://github.com/apache/arrow-datafusion/pull/7952) (andygrove) -- Minor: Add implementation examples to ExecutionPlan::execute [#8013](https://github.com/apache/arrow-datafusion/pull/8013) (tustvold) -- Minor: Improve documentation for Filter Pushdown [#8023](https://github.com/apache/arrow-datafusion/pull/8023) (alamb) -- Minor: Improve `ExecutionPlan` documentation [#8019](https://github.com/apache/arrow-datafusion/pull/8019) (alamb) -- Improve comments for `PartitionSearchMode` struct [#8047](https://github.com/apache/arrow-datafusion/pull/8047) (ozankabak) -- Prepare 33.0.0 Release [#8057](https://github.com/apache/arrow-datafusion/pull/8057) (andygrove) -- Improve documentation for calculate_prune_length method in `SymmetricHashJoin` [#8125](https://github.com/apache/arrow-datafusion/pull/8125) (Asura7969) -- docs: show creation of DFSchema [#8132](https://github.com/apache/arrow-datafusion/pull/8132) (wjones127) -- Improve documentation site to make it easier to find communication on Slack/Discord [#8138](https://github.com/apache/arrow-datafusion/pull/8138) (alamb) +- Minor: Improve TableProvider document, and add ascii art [#7759](https://github.com/apache/datafusion/pull/7759) (alamb) +- Expose arrow-schema `serde` crate feature flag [#7829](https://github.com/apache/datafusion/pull/7829) (lewiszlw) +- doc: fix ExecutionContext to SessionContext in custom-table-providers.md [#7903](https://github.com/apache/datafusion/pull/7903) (ZENOTME) +- Minor: Document `parquet` crate feature [#7927](https://github.com/apache/datafusion/pull/7927) (alamb) +- Add some initial content about creating logical plans [#7952](https://github.com/apache/datafusion/pull/7952) (andygrove) +- Minor: Add implementation examples to ExecutionPlan::execute [#8013](https://github.com/apache/datafusion/pull/8013) (tustvold) +- Minor: Improve documentation for Filter Pushdown [#8023](https://github.com/apache/datafusion/pull/8023) (alamb) +- Minor: Improve `ExecutionPlan` documentation [#8019](https://github.com/apache/datafusion/pull/8019) (alamb) +- Improve comments for `PartitionSearchMode` struct [#8047](https://github.com/apache/datafusion/pull/8047) (ozankabak) +- Prepare 33.0.0 Release [#8057](https://github.com/apache/datafusion/pull/8057) (andygrove) +- Improve documentation for calculate_prune_length method in `SymmetricHashJoin` [#8125](https://github.com/apache/datafusion/pull/8125) (Asura7969) +- docs: show creation of DFSchema [#8132](https://github.com/apache/datafusion/pull/8132) (wjones127) +- Improve documentation site to make it easier to find communication on Slack/Discord [#8138](https://github.com/apache/datafusion/pull/8138) (alamb) **Merged pull requests:** -- Minor: Improve TableProvider document, and add ascii art [#7759](https://github.com/apache/arrow-datafusion/pull/7759) (alamb) -- Prepare 32.0.0 Release [#7769](https://github.com/apache/arrow-datafusion/pull/7769) (andygrove) -- Minor: Change all file links to GitHub in document [#7768](https://github.com/apache/arrow-datafusion/pull/7768) (ongchi) -- Minor: Improve `PruningPredicate` documentation [#7738](https://github.com/apache/arrow-datafusion/pull/7738) (alamb) -- Support InsertInto Sorted ListingTable [#7743](https://github.com/apache/arrow-datafusion/pull/7743) (devinjdangelo) -- Minor: improve documentation to `stagger_batch` [#7754](https://github.com/apache/arrow-datafusion/pull/7754) (alamb) -- External Table Primary key support [#7755](https://github.com/apache/arrow-datafusion/pull/7755) (mustafasrepo) -- Minor: Build array_array() with ListArray construction instead of ArrayData [#7780](https://github.com/apache/arrow-datafusion/pull/7780) (jayzhan211) -- Minor: Remove unnecessary `#[cfg(feature = "avro")]` [#7773](https://github.com/apache/arrow-datafusion/pull/7773) (sarutak) -- add interval arithmetic for timestamp types [#7758](https://github.com/apache/arrow-datafusion/pull/7758) (mhilton) -- Minor: make tests deterministic [#7771](https://github.com/apache/arrow-datafusion/pull/7771) (Weijun-H) -- Minor: Improve `Interval` Docs [#7782](https://github.com/apache/arrow-datafusion/pull/7782) (alamb) -- `DataSink` additions [#7778](https://github.com/apache/arrow-datafusion/pull/7778) (Dandandan) -- Update substrait requirement from 0.15.0 to 0.16.0 [#7783](https://github.com/apache/arrow-datafusion/pull/7783) (dependabot[bot]) -- Move nested union optimization from plan builder to logical optimizer [#7695](https://github.com/apache/arrow-datafusion/pull/7695) (maruschin) -- Minor: comments that explain the schema used in simply_expressions [#7747](https://github.com/apache/arrow-datafusion/pull/7747) (alamb) -- Update regex-syntax requirement from 0.7.1 to 0.8.0 [#7784](https://github.com/apache/arrow-datafusion/pull/7784) (dependabot[bot]) -- Minor: Add sql test for `UNION` / `UNION ALL` + plans [#7787](https://github.com/apache/arrow-datafusion/pull/7787) (alamb) -- fix: preserve column qualifier for `DataFrame::with_column` [#7792](https://github.com/apache/arrow-datafusion/pull/7792) (jonahgao) -- Interval Arithmetic NegativeExpr Support [#7804](https://github.com/apache/arrow-datafusion/pull/7804) (berkaysynnada) -- Exactness Indicator of Parameters: Precision [#7809](https://github.com/apache/arrow-datafusion/pull/7809) (berkaysynnada) -- add `LogicalPlanBuilder::join_on` [#7805](https://github.com/apache/arrow-datafusion/pull/7805) (haohuaijin) -- Fix SortPreservingRepartition with no existing ordering. [#7811](https://github.com/apache/arrow-datafusion/pull/7811) (mustafasrepo) -- Update zstd requirement from 0.12 to 0.13 [#7806](https://github.com/apache/arrow-datafusion/pull/7806) (dependabot[bot]) -- [Minor]: Remove input_schema field from window executor [#7810](https://github.com/apache/arrow-datafusion/pull/7810) (mustafasrepo) -- refactor(7181): move streaming_merge() into separate mod from the merge node [#7799](https://github.com/apache/arrow-datafusion/pull/7799) (wiedld) -- Improve update error [#7777](https://github.com/apache/arrow-datafusion/pull/7777) (lewiszlw) -- Minor: Update LogicalPlan::join_on API, use it more [#7814](https://github.com/apache/arrow-datafusion/pull/7814) (alamb) -- Add distinct union optimization [#7788](https://github.com/apache/arrow-datafusion/pull/7788) (maruschin) -- Make CI fail on any occurrence of rust-tomlfmt failed [#7774](https://github.com/apache/arrow-datafusion/pull/7774) (ongchi) -- Encode all join conditions in a single expression field [#7612](https://github.com/apache/arrow-datafusion/pull/7612) (nseekhao) -- Update substrait requirement from 0.16.0 to 0.17.0 [#7808](https://github.com/apache/arrow-datafusion/pull/7808) (dependabot[bot]) -- Minor: include `sort` expressions in `SortPreservingRepartitionExec` explain plan [#7796](https://github.com/apache/arrow-datafusion/pull/7796) (alamb) -- minor: add more document to Wildcard expr [#7822](https://github.com/apache/arrow-datafusion/pull/7822) (waynexia) -- Minor: Move `Monotonicity` to `expr` crate [#7820](https://github.com/apache/arrow-datafusion/pull/7820) (2010YOUY01) -- Use code block for better formatting of rustdoc for PhysicalGroupBy [#7823](https://github.com/apache/arrow-datafusion/pull/7823) (qrilka) -- Update explain plan to show `TopK` operator [#7826](https://github.com/apache/arrow-datafusion/pull/7826) (haohuaijin) -- Extract ReceiverStreamBuilder [#7817](https://github.com/apache/arrow-datafusion/pull/7817) (tustvold) -- Extend backtrace coverage for `DatafusionError::Plan` errors errors [#7803](https://github.com/apache/arrow-datafusion/pull/7803) (comphead) -- Add documentation and usability for prepared parameters [#7785](https://github.com/apache/arrow-datafusion/pull/7785) (alamb) -- Implement GetIndexedField for map-typed columns [#7825](https://github.com/apache/arrow-datafusion/pull/7825) (swgillespie) -- Minor: Assert `streaming_merge` has non empty sort exprs [#7795](https://github.com/apache/arrow-datafusion/pull/7795) (alamb) -- Minor: Upgrade docs for `PhysicalExpr::{propagate_constraints, evaluate_bounds}` [#7812](https://github.com/apache/arrow-datafusion/pull/7812) (alamb) -- Change ScalarValue::List to store ArrayRef [#7629](https://github.com/apache/arrow-datafusion/pull/7629) (jayzhan211) -- [MINOR]:Do not introduce unnecessary repartition when row count is 1. [#7832](https://github.com/apache/arrow-datafusion/pull/7832) (mustafasrepo) -- Minor: Add tests for binary / utf8 coercion [#7839](https://github.com/apache/arrow-datafusion/pull/7839) (alamb) -- Avoid panics on error while encoding/decoding ListValue::Array as protobuf [#7837](https://github.com/apache/arrow-datafusion/pull/7837) (alamb) -- Refactor Statistics, introduce precision estimates (`Exact`, `Inexact`, `Absent`) [#7793](https://github.com/apache/arrow-datafusion/pull/7793) (berkaysynnada) -- Remove redundant unwrap in `ScalarValue::new_primitive`, return a `Result` [#7830](https://github.com/apache/arrow-datafusion/pull/7830) (maruschin) -- Fix precision loss when coercing date_part utf8 argument [#7846](https://github.com/apache/arrow-datafusion/pull/7846) (Dandandan) -- Add operator section to user guide, Add `std::ops` operations to `prelude`, and add `not()` expr_fn [#7732](https://github.com/apache/arrow-datafusion/pull/7732) (ongchi) -- Expose arrow-schema `serde` crate feature flag [#7829](https://github.com/apache/arrow-datafusion/pull/7829) (lewiszlw) -- Improve `ContextProvider` naming: rename` get_table_provider` --> `get_table_source`, deprecate `get_table_provider` [#7831](https://github.com/apache/arrow-datafusion/pull/7831) (lewiszlw) -- DataSink Dynamic Execution Time Demux [#7791](https://github.com/apache/arrow-datafusion/pull/7791) (devinjdangelo) -- Add small column on empty projection [#7833](https://github.com/apache/arrow-datafusion/pull/7833) (ch-sc) -- feat(7849): coerce TIMESTAMP to TIMESTAMPTZ [#7850](https://github.com/apache/arrow-datafusion/pull/7850) (mhilton) -- Support `Binary`/`LargeBinary` --> `Utf8`/`LargeUtf8` in ilike and string functions [#7840](https://github.com/apache/arrow-datafusion/pull/7840) (alamb) -- Minor: fix typo in comments [#7856](https://github.com/apache/arrow-datafusion/pull/7856) (haohuaijin) -- Minor: improve `join` / `join_on` docs [#7813](https://github.com/apache/arrow-datafusion/pull/7813) (alamb) -- Support Decimal256 on AVG aggregate expression [#7853](https://github.com/apache/arrow-datafusion/pull/7853) (viirya) -- Minor: fix typo in comments [#7861](https://github.com/apache/arrow-datafusion/pull/7861) (alamb) -- Minor: fix typo in GreedyMemoryPool documentation [#7864](https://github.com/apache/arrow-datafusion/pull/7864) (avh4) -- Minor: fix multiple typos [#7863](https://github.com/apache/arrow-datafusion/pull/7863) (Smoothieewastaken) -- Minor: Fix docstring typos [#7873](https://github.com/apache/arrow-datafusion/pull/7873) (alamb) -- Add CursorValues Decoupling Cursor Data from Cursor Position [#7855](https://github.com/apache/arrow-datafusion/pull/7855) (tustvold) -- Support Decimal256 column in create external table [#7866](https://github.com/apache/arrow-datafusion/pull/7866) (viirya) -- Support Decimal256 in Min/Max aggregate expressions [#7881](https://github.com/apache/arrow-datafusion/pull/7881) (viirya) -- Implement Hive-Style Partitioned Write Support [#7801](https://github.com/apache/arrow-datafusion/pull/7801) (devinjdangelo) -- Minor: fix config typo [#7874](https://github.com/apache/arrow-datafusion/pull/7874) (alamb) -- Add Decimal256 sqllogictests for SUM, MEDIAN and COUNT aggregate expressions [#7889](https://github.com/apache/arrow-datafusion/pull/7889) (viirya) -- [test] add fuzz test for topk [#7772](https://github.com/apache/arrow-datafusion/pull/7772) (Tangruilin) -- Allow Setting Minimum Parallelism with RowCount Based Demuxer [#7841](https://github.com/apache/arrow-datafusion/pull/7841) (devinjdangelo) -- Drop single quotes to make warnings for parquet options not confusing [#7902](https://github.com/apache/arrow-datafusion/pull/7902) (qrilka) -- Add multi-column topk fuzz tests [#7898](https://github.com/apache/arrow-datafusion/pull/7898) (alamb) -- Change `FileScanConfig.table_partition_cols` from `(String, DataType)` to `Field`s [#7890](https://github.com/apache/arrow-datafusion/pull/7890) (NGA-TRAN) -- Maintain time zone in `ScalarValue::new_list` [#7899](https://github.com/apache/arrow-datafusion/pull/7899) (Dandandan) -- [MINOR]: Move joinside struct to common [#7908](https://github.com/apache/arrow-datafusion/pull/7908) (mustafasrepo) -- doc: fix ExecutionContext to SessionContext in custom-table-providers.md [#7903](https://github.com/apache/arrow-datafusion/pull/7903) (ZENOTME) -- Update arrow 48.0.0 [#7854](https://github.com/apache/arrow-datafusion/pull/7854) (tustvold) -- feat: support `Decimal256` for the `abs` function [#7904](https://github.com/apache/arrow-datafusion/pull/7904) (jonahgao) -- [MINOR] Simplify Aggregate, and Projection output_partitioning implementation [#7907](https://github.com/apache/arrow-datafusion/pull/7907) (mustafasrepo) -- Bump actions/setup-node from 3 to 4 [#7915](https://github.com/apache/arrow-datafusion/pull/7915) (dependabot[bot]) -- [Bug Fix]: Fix bug, first last reverse [#7914](https://github.com/apache/arrow-datafusion/pull/7914) (mustafasrepo) -- Minor: provide default implementation for ExecutionPlan::statistics [#7911](https://github.com/apache/arrow-datafusion/pull/7911) (alamb) -- Update substrait requirement from 0.17.0 to 0.18.0 [#7916](https://github.com/apache/arrow-datafusion/pull/7916) (dependabot[bot]) -- Minor: Remove unnecessary clone in datafusion_proto [#7921](https://github.com/apache/arrow-datafusion/pull/7921) (ongchi) -- [MINOR]: Simplify code, change requirement from PhysicalSortExpr to PhysicalSortRequirement [#7913](https://github.com/apache/arrow-datafusion/pull/7913) (mustafasrepo) -- [Minor] Move combine_join util to under equivalence.rs [#7917](https://github.com/apache/arrow-datafusion/pull/7917) (mustafasrepo) -- support scan empty projection [#7920](https://github.com/apache/arrow-datafusion/pull/7920) (haohuaijin) -- Cleanup logical optimizer rules. [#7919](https://github.com/apache/arrow-datafusion/pull/7919) (mustafasrepo) -- Parallelize Serialization of Columns within Parquet RowGroups [#7655](https://github.com/apache/arrow-datafusion/pull/7655) (devinjdangelo) -- feat: Use bloom filter when reading parquet to skip row groups [#7821](https://github.com/apache/arrow-datafusion/pull/7821) (hengfeiyang) -- fix: don't push down volatile predicates in projection [#7909](https://github.com/apache/arrow-datafusion/pull/7909) (haohuaijin) -- Add `parquet` feature flag, enabled by default, and make parquet conditional [#7745](https://github.com/apache/arrow-datafusion/pull/7745) (ongchi) -- [MINOR]: Simplify enforce_distribution, minor changes [#7924](https://github.com/apache/arrow-datafusion/pull/7924) (mustafasrepo) -- Add simple window query to sqllogictest [#7928](https://github.com/apache/arrow-datafusion/pull/7928) (Jefffrey) -- ci: upgrade node to version 20 [#7918](https://github.com/apache/arrow-datafusion/pull/7918) (crepererum) -- Change input for `to_timestamp` function to be seconds rather than nanoseconds, add `to_timestamp_nanos` [#7844](https://github.com/apache/arrow-datafusion/pull/7844) (comphead) -- Minor: Document `parquet` crate feature [#7927](https://github.com/apache/arrow-datafusion/pull/7927) (alamb) -- Minor: reduce some `#cfg(feature = "parquet")` [#7929](https://github.com/apache/arrow-datafusion/pull/7929) (alamb) -- Minor: reduce use of `#cfg(feature = "parquet")` in tests [#7930](https://github.com/apache/arrow-datafusion/pull/7930) (alamb) -- Fix CI failures on `to_timestamp()` calls [#7941](https://github.com/apache/arrow-datafusion/pull/7941) (comphead) -- minor: add a datatype casting for the updated value [#7922](https://github.com/apache/arrow-datafusion/pull/7922) (jonahgao) -- Minor:add `avro` feature in datafusion-examples to make `avro_sql` run [#7946](https://github.com/apache/arrow-datafusion/pull/7946) (haohuaijin) -- Add simple exclude all columns test to sqllogictest [#7945](https://github.com/apache/arrow-datafusion/pull/7945) (Jefffrey) -- Support Partitioning Data by Dictionary Encoded String Array Types [#7896](https://github.com/apache/arrow-datafusion/pull/7896) (devinjdangelo) -- Minor: Remove array() in array_expression [#7961](https://github.com/apache/arrow-datafusion/pull/7961) (jayzhan211) -- Minor: simplify update code [#7943](https://github.com/apache/arrow-datafusion/pull/7943) (alamb) -- Add some initial content about creating logical plans [#7952](https://github.com/apache/arrow-datafusion/pull/7952) (andygrove) -- Minor: Change from `&mut SessionContext` to `&SessionContext` in substrait [#7965](https://github.com/apache/arrow-datafusion/pull/7965) (my-vegetable-has-exploded) -- Fix crate READMEs [#7964](https://github.com/apache/arrow-datafusion/pull/7964) (Jefffrey) -- Minor: Improve `HashJoinExec` documentation [#7953](https://github.com/apache/arrow-datafusion/pull/7953) (alamb) -- chore: clean useless clone baesd on clippy [#7973](https://github.com/apache/arrow-datafusion/pull/7973) (Weijun-H) -- Add README.md to `core`, `execution` and `physical-plan` crates [#7970](https://github.com/apache/arrow-datafusion/pull/7970) (alamb) -- Move source repartitioning into `ExecutionPlan::repartition` [#7936](https://github.com/apache/arrow-datafusion/pull/7936) (alamb) -- minor: fix broken links in README.md [#7986](https://github.com/apache/arrow-datafusion/pull/7986) (jonahgao) -- Minor: Upate the `sqllogictest` crate README [#7971](https://github.com/apache/arrow-datafusion/pull/7971) (alamb) -- Improve MemoryCatalogProvider default impl block placement [#7975](https://github.com/apache/arrow-datafusion/pull/7975) (lewiszlw) -- Fix `ScalarValue` handling of NULL values for ListArray [#7969](https://github.com/apache/arrow-datafusion/pull/7969) (viirya) -- Refactor of Ordering and Prunability Traversals and States [#7985](https://github.com/apache/arrow-datafusion/pull/7985) (berkaysynnada) -- Keep output as scalar for scalar function if all inputs are scalar [#7967](https://github.com/apache/arrow-datafusion/pull/7967) (viirya) -- Fix crate READMEs for core, execution, physical-plan [#7990](https://github.com/apache/arrow-datafusion/pull/7990) (Jefffrey) -- Update sqlparser requirement from 0.38.0 to 0.39.0 [#7983](https://github.com/apache/arrow-datafusion/pull/7983) (jackwener) -- Fix panic in multiple distinct aggregates by fixing `ScalarValue::new_list` [#7989](https://github.com/apache/arrow-datafusion/pull/7989) (alamb) -- Minor: Add `MemoryReservation::consumer` getter [#8000](https://github.com/apache/arrow-datafusion/pull/8000) (milenkovicm) -- fix: generate logical plan for `UPDATE SET FROM` statement [#7984](https://github.com/apache/arrow-datafusion/pull/7984) (jonahgao) -- Create temporary files for reading or writing [#8005](https://github.com/apache/arrow-datafusion/pull/8005) (smallzhongfeng) -- Minor: fix comment on SortExec::with_fetch method [#8011](https://github.com/apache/arrow-datafusion/pull/8011) (westonpace) -- Fix: dataframe_subquery example Optimizer rule `common_sub_expression_eliminate` failed [#8016](https://github.com/apache/arrow-datafusion/pull/8016) (smallzhongfeng) -- Percent Decode URL Paths (#8009) [#8012](https://github.com/apache/arrow-datafusion/pull/8012) (tustvold) -- Minor: Extract common deps into workspace [#7982](https://github.com/apache/arrow-datafusion/pull/7982) (lewiszlw) -- minor: change some plan_err to exec_err [#7996](https://github.com/apache/arrow-datafusion/pull/7996) (waynexia) -- Minor: error on unsupported RESPECT NULLs syntax [#7998](https://github.com/apache/arrow-datafusion/pull/7998) (alamb) -- Break GroupedHashAggregateStream spill batch into smaller chunks [#8004](https://github.com/apache/arrow-datafusion/pull/8004) (milenkovicm) -- Minor: Add implementation examples to ExecutionPlan::execute [#8013](https://github.com/apache/arrow-datafusion/pull/8013) (tustvold) -- Minor: Extend wrap_into_list_array to accept multiple args [#7993](https://github.com/apache/arrow-datafusion/pull/7993) (jayzhan211) -- GroupedHashAggregateStream should register spillable consumer [#8002](https://github.com/apache/arrow-datafusion/pull/8002) (milenkovicm) -- fix: single_distinct_aggretation_to_group_by fail [#7997](https://github.com/apache/arrow-datafusion/pull/7997) (haohuaijin) -- Read only enough bytes to infer Arrow IPC file schema via stream [#7962](https://github.com/apache/arrow-datafusion/pull/7962) (Jefffrey) -- Minor: remove a strange char [#8030](https://github.com/apache/arrow-datafusion/pull/8030) (haohuaijin) -- Minor: Improve documentation for Filter Pushdown [#8023](https://github.com/apache/arrow-datafusion/pull/8023) (alamb) -- Minor: Improve `ExecutionPlan` documentation [#8019](https://github.com/apache/arrow-datafusion/pull/8019) (alamb) -- fix: clippy warnings from nightly rust 1.75 [#8025](https://github.com/apache/arrow-datafusion/pull/8025) (waynexia) -- Minor: Avoid recomputing compute_array_ndims in align_array_dimensions [#7963](https://github.com/apache/arrow-datafusion/pull/7963) (jayzhan211) -- Minor: fix doc and fmt CI check [#8037](https://github.com/apache/arrow-datafusion/pull/8037) (alamb) -- Minor: remove uncessary #cfg test [#8036](https://github.com/apache/arrow-datafusion/pull/8036) (alamb) -- Minor: Improve documentation for `PartitionStream` and `StreamingTableExec` [#8035](https://github.com/apache/arrow-datafusion/pull/8035) (alamb) -- Combine Equivalence and Ordering equivalence to simplify state [#8006](https://github.com/apache/arrow-datafusion/pull/8006) (mustafasrepo) -- Encapsulate `ProjectionMapping` as a struct [#8033](https://github.com/apache/arrow-datafusion/pull/8033) (alamb) -- Minor: Fix bugs in docs for `to_timestamp`, `to_timestamp_seconds`, ... [#8040](https://github.com/apache/arrow-datafusion/pull/8040) (alamb) -- Improve comments for `PartitionSearchMode` struct [#8047](https://github.com/apache/arrow-datafusion/pull/8047) (ozankabak) -- General approach for Array replace [#8050](https://github.com/apache/arrow-datafusion/pull/8050) (jayzhan211) -- Minor: Remove the irrelevant note from the Expression API doc [#8053](https://github.com/apache/arrow-datafusion/pull/8053) (ongchi) -- Minor: Add more documentation about Partitioning [#8022](https://github.com/apache/arrow-datafusion/pull/8022) (alamb) -- Minor: improve documentation for IsNotNull, DISTINCT, etc [#8052](https://github.com/apache/arrow-datafusion/pull/8052) (alamb) -- Prepare 33.0.0 Release [#8057](https://github.com/apache/arrow-datafusion/pull/8057) (andygrove) -- Minor: improve error message by adding types to message [#8065](https://github.com/apache/arrow-datafusion/pull/8065) (alamb) -- Minor: Remove redundant BuiltinScalarFunction::supports_zero_argument() [#8059](https://github.com/apache/arrow-datafusion/pull/8059) (2010YOUY01) -- Add example to ci [#8060](https://github.com/apache/arrow-datafusion/pull/8060) (smallzhongfeng) -- Update substrait requirement from 0.18.0 to 0.19.0 [#8076](https://github.com/apache/arrow-datafusion/pull/8076) (dependabot[bot]) -- Fix incorrect results in COUNT(\*) queries with LIMIT [#8049](https://github.com/apache/arrow-datafusion/pull/8049) (msirek) -- feat: Support determining extensions from names like `foo.parquet.snappy` as well as `foo.parquet` [#7972](https://github.com/apache/arrow-datafusion/pull/7972) (Weijun-H) -- Use FairSpillPool for TaskContext with spillable config [#8072](https://github.com/apache/arrow-datafusion/pull/8072) (viirya) -- Minor: Improve HashJoinStream docstrings [#8070](https://github.com/apache/arrow-datafusion/pull/8070) (alamb) -- Fixing broken link [#8085](https://github.com/apache/arrow-datafusion/pull/8085) (edmondop) -- fix: DataFusion suggests invalid functions [#8083](https://github.com/apache/arrow-datafusion/pull/8083) (jonahgao) -- Replace macro with function for `array_repeat` [#8071](https://github.com/apache/arrow-datafusion/pull/8071) (jayzhan211) -- Minor: remove unnecessary projection in `single_distinct_to_group_by` rule [#8061](https://github.com/apache/arrow-datafusion/pull/8061) (haohuaijin) -- minor: Remove duplicate version numbers for arrow, object_store, and parquet dependencies [#8095](https://github.com/apache/arrow-datafusion/pull/8095) (andygrove) -- fix: add encode/decode to protobuf encoding [#8089](https://github.com/apache/arrow-datafusion/pull/8089) (Syleechan) -- feat: Protobuf serde for Json file sink [#8062](https://github.com/apache/arrow-datafusion/pull/8062) (Jefffrey) -- Minor: use `Expr::alias` in a few places to make the code more concise [#8097](https://github.com/apache/arrow-datafusion/pull/8097) (alamb) -- Minor: Cleanup BuiltinScalarFunction::return_type() [#8088](https://github.com/apache/arrow-datafusion/pull/8088) (2010YOUY01) -- Update sqllogictest requirement from 0.17.0 to 0.18.0 [#8102](https://github.com/apache/arrow-datafusion/pull/8102) (dependabot[bot]) -- Projection Pushdown in PhysicalPlan [#8073](https://github.com/apache/arrow-datafusion/pull/8073) (berkaysynnada) -- Push limit into aggregation for DISTINCT ... LIMIT queries [#8038](https://github.com/apache/arrow-datafusion/pull/8038) (msirek) -- Bug-fix in Filter and Limit statistics [#8094](https://github.com/apache/arrow-datafusion/pull/8094) (berkaysynnada) -- feat: support target table alias in update statement [#8080](https://github.com/apache/arrow-datafusion/pull/8080) (jonahgao) -- Minor: Simlify downcast functions in cast.rs. [#8103](https://github.com/apache/arrow-datafusion/pull/8103) (Weijun-H) -- Fix ArrayAgg schema mismatch issue [#8055](https://github.com/apache/arrow-datafusion/pull/8055) (jayzhan211) -- Minor: Support `nulls` in `array_replace`, avoid a copy [#8054](https://github.com/apache/arrow-datafusion/pull/8054) (alamb) -- Minor: Improve the document format of JoinHashMap [#8090](https://github.com/apache/arrow-datafusion/pull/8090) (Asura7969) -- Simplify ProjectionPushdown and make it more general [#8109](https://github.com/apache/arrow-datafusion/pull/8109) (alamb) -- Minor: clean up the code regarding clippy [#8122](https://github.com/apache/arrow-datafusion/pull/8122) (Weijun-H) -- Support remaining functions in protobuf serialization, add `expr_fn` for `StructFunction` [#8100](https://github.com/apache/arrow-datafusion/pull/8100) (JacobOgle) -- Minor: Cleanup BuiltinScalarFunction's phys-expr creation [#8114](https://github.com/apache/arrow-datafusion/pull/8114) (2010YOUY01) -- rewrite `array_append/array_prepend` to remove deplicate codes [#8108](https://github.com/apache/arrow-datafusion/pull/8108) (Veeupup) -- Implementation of `array_intersect` [#8081](https://github.com/apache/arrow-datafusion/pull/8081) (Veeupup) -- Minor: fix ci break [#8136](https://github.com/apache/arrow-datafusion/pull/8136) (haohuaijin) -- Improve documentation for calculate_prune_length method in `SymmetricHashJoin` [#8125](https://github.com/apache/arrow-datafusion/pull/8125) (Asura7969) -- Minor: remove duplicated `array_replace` tests [#8066](https://github.com/apache/arrow-datafusion/pull/8066) (alamb) -- Minor: Fix temporary files created but not deleted during testing [#8115](https://github.com/apache/arrow-datafusion/pull/8115) (2010YOUY01) -- chore: remove panics in datafusion-common::scalar by making more operations return `Result` [#7901](https://github.com/apache/arrow-datafusion/pull/7901) (junjunjd) -- Fix join order for TPCH Q17 & Q18 by improving FilterExec statistics [#8126](https://github.com/apache/arrow-datafusion/pull/8126) (andygrove) -- Fix: Do not try and preserve order when there is no order to preserve in RepartitionExec [#8127](https://github.com/apache/arrow-datafusion/pull/8127) (alamb) -- feat: add column statistics into explain [#8112](https://github.com/apache/arrow-datafusion/pull/8112) (NGA-TRAN) -- Add subtrait support for `IS NULL` and `IS NOT NULL` [#8093](https://github.com/apache/arrow-datafusion/pull/8093) (tgujar) -- Combine `Expr::Wildcard` and `Wxpr::QualifiedWildcard`, add `wildcard()` expr fn [#8105](https://github.com/apache/arrow-datafusion/pull/8105) (alamb) -- docs: show creation of DFSchema [#8132](https://github.com/apache/arrow-datafusion/pull/8132) (wjones127) -- feat: support UDAF in substrait producer/consumer [#8119](https://github.com/apache/arrow-datafusion/pull/8119) (waynexia) -- Improve documentation site to make it easier to find communication on Slack/Discord [#8138](https://github.com/apache/arrow-datafusion/pull/8138) (alamb) +- Minor: Improve TableProvider document, and add ascii art [#7759](https://github.com/apache/datafusion/pull/7759) (alamb) +- Prepare 32.0.0 Release [#7769](https://github.com/apache/datafusion/pull/7769) (andygrove) +- Minor: Change all file links to GitHub in document [#7768](https://github.com/apache/datafusion/pull/7768) (ongchi) +- Minor: Improve `PruningPredicate` documentation [#7738](https://github.com/apache/datafusion/pull/7738) (alamb) +- Support InsertInto Sorted ListingTable [#7743](https://github.com/apache/datafusion/pull/7743) (devinjdangelo) +- Minor: improve documentation to `stagger_batch` [#7754](https://github.com/apache/datafusion/pull/7754) (alamb) +- External Table Primary key support [#7755](https://github.com/apache/datafusion/pull/7755) (mustafasrepo) +- Minor: Build array_array() with ListArray construction instead of ArrayData [#7780](https://github.com/apache/datafusion/pull/7780) (jayzhan211) +- Minor: Remove unnecessary `#[cfg(feature = "avro")]` [#7773](https://github.com/apache/datafusion/pull/7773) (sarutak) +- add interval arithmetic for timestamp types [#7758](https://github.com/apache/datafusion/pull/7758) (mhilton) +- Minor: make tests deterministic [#7771](https://github.com/apache/datafusion/pull/7771) (Weijun-H) +- Minor: Improve `Interval` Docs [#7782](https://github.com/apache/datafusion/pull/7782) (alamb) +- `DataSink` additions [#7778](https://github.com/apache/datafusion/pull/7778) (Dandandan) +- Update substrait requirement from 0.15.0 to 0.16.0 [#7783](https://github.com/apache/datafusion/pull/7783) (dependabot[bot]) +- Move nested union optimization from plan builder to logical optimizer [#7695](https://github.com/apache/datafusion/pull/7695) (maruschin) +- Minor: comments that explain the schema used in simply_expressions [#7747](https://github.com/apache/datafusion/pull/7747) (alamb) +- Update regex-syntax requirement from 0.7.1 to 0.8.0 [#7784](https://github.com/apache/datafusion/pull/7784) (dependabot[bot]) +- Minor: Add sql test for `UNION` / `UNION ALL` + plans [#7787](https://github.com/apache/datafusion/pull/7787) (alamb) +- fix: preserve column qualifier for `DataFrame::with_column` [#7792](https://github.com/apache/datafusion/pull/7792) (jonahgao) +- Interval Arithmetic NegativeExpr Support [#7804](https://github.com/apache/datafusion/pull/7804) (berkaysynnada) +- Exactness Indicator of Parameters: Precision [#7809](https://github.com/apache/datafusion/pull/7809) (berkaysynnada) +- add `LogicalPlanBuilder::join_on` [#7805](https://github.com/apache/datafusion/pull/7805) (haohuaijin) +- Fix SortPreservingRepartition with no existing ordering. [#7811](https://github.com/apache/datafusion/pull/7811) (mustafasrepo) +- Update zstd requirement from 0.12 to 0.13 [#7806](https://github.com/apache/datafusion/pull/7806) (dependabot[bot]) +- [Minor]: Remove input_schema field from window executor [#7810](https://github.com/apache/datafusion/pull/7810) (mustafasrepo) +- refactor(7181): move streaming_merge() into separate mod from the merge node [#7799](https://github.com/apache/datafusion/pull/7799) (wiedld) +- Improve update error [#7777](https://github.com/apache/datafusion/pull/7777) (lewiszlw) +- Minor: Update LogicalPlan::join_on API, use it more [#7814](https://github.com/apache/datafusion/pull/7814) (alamb) +- Add distinct union optimization [#7788](https://github.com/apache/datafusion/pull/7788) (maruschin) +- Make CI fail on any occurrence of rust-tomlfmt failed [#7774](https://github.com/apache/datafusion/pull/7774) (ongchi) +- Encode all join conditions in a single expression field [#7612](https://github.com/apache/datafusion/pull/7612) (nseekhao) +- Update substrait requirement from 0.16.0 to 0.17.0 [#7808](https://github.com/apache/datafusion/pull/7808) (dependabot[bot]) +- Minor: include `sort` expressions in `SortPreservingRepartitionExec` explain plan [#7796](https://github.com/apache/datafusion/pull/7796) (alamb) +- minor: add more document to Wildcard expr [#7822](https://github.com/apache/datafusion/pull/7822) (waynexia) +- Minor: Move `Monotonicity` to `expr` crate [#7820](https://github.com/apache/datafusion/pull/7820) (2010YOUY01) +- Use code block for better formatting of rustdoc for PhysicalGroupBy [#7823](https://github.com/apache/datafusion/pull/7823) (qrilka) +- Update explain plan to show `TopK` operator [#7826](https://github.com/apache/datafusion/pull/7826) (haohuaijin) +- Extract ReceiverStreamBuilder [#7817](https://github.com/apache/datafusion/pull/7817) (tustvold) +- Extend backtrace coverage for `DatafusionError::Plan` errors errors [#7803](https://github.com/apache/datafusion/pull/7803) (comphead) +- Add documentation and usability for prepared parameters [#7785](https://github.com/apache/datafusion/pull/7785) (alamb) +- Implement GetIndexedField for map-typed columns [#7825](https://github.com/apache/datafusion/pull/7825) (swgillespie) +- Minor: Assert `streaming_merge` has non empty sort exprs [#7795](https://github.com/apache/datafusion/pull/7795) (alamb) +- Minor: Upgrade docs for `PhysicalExpr::{propagate_constraints, evaluate_bounds}` [#7812](https://github.com/apache/datafusion/pull/7812) (alamb) +- Change ScalarValue::List to store ArrayRef [#7629](https://github.com/apache/datafusion/pull/7629) (jayzhan211) +- [MINOR]:Do not introduce unnecessary repartition when row count is 1. [#7832](https://github.com/apache/datafusion/pull/7832) (mustafasrepo) +- Minor: Add tests for binary / utf8 coercion [#7839](https://github.com/apache/datafusion/pull/7839) (alamb) +- Avoid panics on error while encoding/decoding ListValue::Array as protobuf [#7837](https://github.com/apache/datafusion/pull/7837) (alamb) +- Refactor Statistics, introduce precision estimates (`Exact`, `Inexact`, `Absent`) [#7793](https://github.com/apache/datafusion/pull/7793) (berkaysynnada) +- Remove redundant unwrap in `ScalarValue::new_primitive`, return a `Result` [#7830](https://github.com/apache/datafusion/pull/7830) (maruschin) +- Fix precision loss when coercing date_part utf8 argument [#7846](https://github.com/apache/datafusion/pull/7846) (Dandandan) +- Add operator section to user guide, Add `std::ops` operations to `prelude`, and add `not()` expr_fn [#7732](https://github.com/apache/datafusion/pull/7732) (ongchi) +- Expose arrow-schema `serde` crate feature flag [#7829](https://github.com/apache/datafusion/pull/7829) (lewiszlw) +- Improve `ContextProvider` naming: rename` get_table_provider` --> `get_table_source`, deprecate `get_table_provider` [#7831](https://github.com/apache/datafusion/pull/7831) (lewiszlw) +- DataSink Dynamic Execution Time Demux [#7791](https://github.com/apache/datafusion/pull/7791) (devinjdangelo) +- Add small column on empty projection [#7833](https://github.com/apache/datafusion/pull/7833) (ch-sc) +- feat(7849): coerce TIMESTAMP to TIMESTAMPTZ [#7850](https://github.com/apache/datafusion/pull/7850) (mhilton) +- Support `Binary`/`LargeBinary` --> `Utf8`/`LargeUtf8` in ilike and string functions [#7840](https://github.com/apache/datafusion/pull/7840) (alamb) +- Minor: fix typo in comments [#7856](https://github.com/apache/datafusion/pull/7856) (haohuaijin) +- Minor: improve `join` / `join_on` docs [#7813](https://github.com/apache/datafusion/pull/7813) (alamb) +- Support Decimal256 on AVG aggregate expression [#7853](https://github.com/apache/datafusion/pull/7853) (viirya) +- Minor: fix typo in comments [#7861](https://github.com/apache/datafusion/pull/7861) (alamb) +- Minor: fix typo in GreedyMemoryPool documentation [#7864](https://github.com/apache/datafusion/pull/7864) (avh4) +- Minor: fix multiple typos [#7863](https://github.com/apache/datafusion/pull/7863) (Smoothieewastaken) +- Minor: Fix docstring typos [#7873](https://github.com/apache/datafusion/pull/7873) (alamb) +- Add CursorValues Decoupling Cursor Data from Cursor Position [#7855](https://github.com/apache/datafusion/pull/7855) (tustvold) +- Support Decimal256 column in create external table [#7866](https://github.com/apache/datafusion/pull/7866) (viirya) +- Support Decimal256 in Min/Max aggregate expressions [#7881](https://github.com/apache/datafusion/pull/7881) (viirya) +- Implement Hive-Style Partitioned Write Support [#7801](https://github.com/apache/datafusion/pull/7801) (devinjdangelo) +- Minor: fix config typo [#7874](https://github.com/apache/datafusion/pull/7874) (alamb) +- Add Decimal256 sqllogictests for SUM, MEDIAN and COUNT aggregate expressions [#7889](https://github.com/apache/datafusion/pull/7889) (viirya) +- [test] add fuzz test for topk [#7772](https://github.com/apache/datafusion/pull/7772) (Tangruilin) +- Allow Setting Minimum Parallelism with RowCount Based Demuxer [#7841](https://github.com/apache/datafusion/pull/7841) (devinjdangelo) +- Drop single quotes to make warnings for parquet options not confusing [#7902](https://github.com/apache/datafusion/pull/7902) (qrilka) +- Add multi-column topk fuzz tests [#7898](https://github.com/apache/datafusion/pull/7898) (alamb) +- Change `FileScanConfig.table_partition_cols` from `(String, DataType)` to `Field`s [#7890](https://github.com/apache/datafusion/pull/7890) (NGA-TRAN) +- Maintain time zone in `ScalarValue::new_list` [#7899](https://github.com/apache/datafusion/pull/7899) (Dandandan) +- [MINOR]: Move joinside struct to common [#7908](https://github.com/apache/datafusion/pull/7908) (mustafasrepo) +- doc: fix ExecutionContext to SessionContext in custom-table-providers.md [#7903](https://github.com/apache/datafusion/pull/7903) (ZENOTME) +- Update arrow 48.0.0 [#7854](https://github.com/apache/datafusion/pull/7854) (tustvold) +- feat: support `Decimal256` for the `abs` function [#7904](https://github.com/apache/datafusion/pull/7904) (jonahgao) +- [MINOR] Simplify Aggregate, and Projection output_partitioning implementation [#7907](https://github.com/apache/datafusion/pull/7907) (mustafasrepo) +- Bump actions/setup-node from 3 to 4 [#7915](https://github.com/apache/datafusion/pull/7915) (dependabot[bot]) +- [Bug Fix]: Fix bug, first last reverse [#7914](https://github.com/apache/datafusion/pull/7914) (mustafasrepo) +- Minor: provide default implementation for ExecutionPlan::statistics [#7911](https://github.com/apache/datafusion/pull/7911) (alamb) +- Update substrait requirement from 0.17.0 to 0.18.0 [#7916](https://github.com/apache/datafusion/pull/7916) (dependabot[bot]) +- Minor: Remove unnecessary clone in datafusion_proto [#7921](https://github.com/apache/datafusion/pull/7921) (ongchi) +- [MINOR]: Simplify code, change requirement from PhysicalSortExpr to PhysicalSortRequirement [#7913](https://github.com/apache/datafusion/pull/7913) (mustafasrepo) +- [Minor] Move combine_join util to under equivalence.rs [#7917](https://github.com/apache/datafusion/pull/7917) (mustafasrepo) +- support scan empty projection [#7920](https://github.com/apache/datafusion/pull/7920) (haohuaijin) +- Cleanup logical optimizer rules. [#7919](https://github.com/apache/datafusion/pull/7919) (mustafasrepo) +- Parallelize Serialization of Columns within Parquet RowGroups [#7655](https://github.com/apache/datafusion/pull/7655) (devinjdangelo) +- feat: Use bloom filter when reading parquet to skip row groups [#7821](https://github.com/apache/datafusion/pull/7821) (hengfeiyang) +- fix: don't push down volatile predicates in projection [#7909](https://github.com/apache/datafusion/pull/7909) (haohuaijin) +- Add `parquet` feature flag, enabled by default, and make parquet conditional [#7745](https://github.com/apache/datafusion/pull/7745) (ongchi) +- [MINOR]: Simplify enforce_distribution, minor changes [#7924](https://github.com/apache/datafusion/pull/7924) (mustafasrepo) +- Add simple window query to sqllogictest [#7928](https://github.com/apache/datafusion/pull/7928) (Jefffrey) +- ci: upgrade node to version 20 [#7918](https://github.com/apache/datafusion/pull/7918) (crepererum) +- Change input for `to_timestamp` function to be seconds rather than nanoseconds, add `to_timestamp_nanos` [#7844](https://github.com/apache/datafusion/pull/7844) (comphead) +- Minor: Document `parquet` crate feature [#7927](https://github.com/apache/datafusion/pull/7927) (alamb) +- Minor: reduce some `#cfg(feature = "parquet")` [#7929](https://github.com/apache/datafusion/pull/7929) (alamb) +- Minor: reduce use of `#cfg(feature = "parquet")` in tests [#7930](https://github.com/apache/datafusion/pull/7930) (alamb) +- Fix CI failures on `to_timestamp()` calls [#7941](https://github.com/apache/datafusion/pull/7941) (comphead) +- minor: add a datatype casting for the updated value [#7922](https://github.com/apache/datafusion/pull/7922) (jonahgao) +- Minor:add `avro` feature in datafusion-examples to make `avro_sql` run [#7946](https://github.com/apache/datafusion/pull/7946) (haohuaijin) +- Add simple exclude all columns test to sqllogictest [#7945](https://github.com/apache/datafusion/pull/7945) (Jefffrey) +- Support Partitioning Data by Dictionary Encoded String Array Types [#7896](https://github.com/apache/datafusion/pull/7896) (devinjdangelo) +- Minor: Remove array() in array_expression [#7961](https://github.com/apache/datafusion/pull/7961) (jayzhan211) +- Minor: simplify update code [#7943](https://github.com/apache/datafusion/pull/7943) (alamb) +- Add some initial content about creating logical plans [#7952](https://github.com/apache/datafusion/pull/7952) (andygrove) +- Minor: Change from `&mut SessionContext` to `&SessionContext` in substrait [#7965](https://github.com/apache/datafusion/pull/7965) (my-vegetable-has-exploded) +- Fix crate READMEs [#7964](https://github.com/apache/datafusion/pull/7964) (Jefffrey) +- Minor: Improve `HashJoinExec` documentation [#7953](https://github.com/apache/datafusion/pull/7953) (alamb) +- chore: clean useless clone baesd on clippy [#7973](https://github.com/apache/datafusion/pull/7973) (Weijun-H) +- Add README.md to `core`, `execution` and `physical-plan` crates [#7970](https://github.com/apache/datafusion/pull/7970) (alamb) +- Move source repartitioning into `ExecutionPlan::repartition` [#7936](https://github.com/apache/datafusion/pull/7936) (alamb) +- minor: fix broken links in README.md [#7986](https://github.com/apache/datafusion/pull/7986) (jonahgao) +- Minor: Upate the `sqllogictest` crate README [#7971](https://github.com/apache/datafusion/pull/7971) (alamb) +- Improve MemoryCatalogProvider default impl block placement [#7975](https://github.com/apache/datafusion/pull/7975) (lewiszlw) +- Fix `ScalarValue` handling of NULL values for ListArray [#7969](https://github.com/apache/datafusion/pull/7969) (viirya) +- Refactor of Ordering and Prunability Traversals and States [#7985](https://github.com/apache/datafusion/pull/7985) (berkaysynnada) +- Keep output as scalar for scalar function if all inputs are scalar [#7967](https://github.com/apache/datafusion/pull/7967) (viirya) +- Fix crate READMEs for core, execution, physical-plan [#7990](https://github.com/apache/datafusion/pull/7990) (Jefffrey) +- Update sqlparser requirement from 0.38.0 to 0.39.0 [#7983](https://github.com/apache/datafusion/pull/7983) (jackwener) +- Fix panic in multiple distinct aggregates by fixing `ScalarValue::new_list` [#7989](https://github.com/apache/datafusion/pull/7989) (alamb) +- Minor: Add `MemoryReservation::consumer` getter [#8000](https://github.com/apache/datafusion/pull/8000) (milenkovicm) +- fix: generate logical plan for `UPDATE SET FROM` statement [#7984](https://github.com/apache/datafusion/pull/7984) (jonahgao) +- Create temporary files for reading or writing [#8005](https://github.com/apache/datafusion/pull/8005) (smallzhongfeng) +- Minor: fix comment on SortExec::with_fetch method [#8011](https://github.com/apache/datafusion/pull/8011) (westonpace) +- Fix: dataframe_subquery example Optimizer rule `common_sub_expression_eliminate` failed [#8016](https://github.com/apache/datafusion/pull/8016) (smallzhongfeng) +- Percent Decode URL Paths (#8009) [#8012](https://github.com/apache/datafusion/pull/8012) (tustvold) +- Minor: Extract common deps into workspace [#7982](https://github.com/apache/datafusion/pull/7982) (lewiszlw) +- minor: change some plan_err to exec_err [#7996](https://github.com/apache/datafusion/pull/7996) (waynexia) +- Minor: error on unsupported RESPECT NULLs syntax [#7998](https://github.com/apache/datafusion/pull/7998) (alamb) +- Break GroupedHashAggregateStream spill batch into smaller chunks [#8004](https://github.com/apache/datafusion/pull/8004) (milenkovicm) +- Minor: Add implementation examples to ExecutionPlan::execute [#8013](https://github.com/apache/datafusion/pull/8013) (tustvold) +- Minor: Extend wrap_into_list_array to accept multiple args [#7993](https://github.com/apache/datafusion/pull/7993) (jayzhan211) +- GroupedHashAggregateStream should register spillable consumer [#8002](https://github.com/apache/datafusion/pull/8002) (milenkovicm) +- fix: single_distinct_aggretation_to_group_by fail [#7997](https://github.com/apache/datafusion/pull/7997) (haohuaijin) +- Read only enough bytes to infer Arrow IPC file schema via stream [#7962](https://github.com/apache/datafusion/pull/7962) (Jefffrey) +- Minor: remove a strange char [#8030](https://github.com/apache/datafusion/pull/8030) (haohuaijin) +- Minor: Improve documentation for Filter Pushdown [#8023](https://github.com/apache/datafusion/pull/8023) (alamb) +- Minor: Improve `ExecutionPlan` documentation [#8019](https://github.com/apache/datafusion/pull/8019) (alamb) +- fix: clippy warnings from nightly rust 1.75 [#8025](https://github.com/apache/datafusion/pull/8025) (waynexia) +- Minor: Avoid recomputing compute_array_ndims in align_array_dimensions [#7963](https://github.com/apache/datafusion/pull/7963) (jayzhan211) +- Minor: fix doc and fmt CI check [#8037](https://github.com/apache/datafusion/pull/8037) (alamb) +- Minor: remove uncessary #cfg test [#8036](https://github.com/apache/datafusion/pull/8036) (alamb) +- Minor: Improve documentation for `PartitionStream` and `StreamingTableExec` [#8035](https://github.com/apache/datafusion/pull/8035) (alamb) +- Combine Equivalence and Ordering equivalence to simplify state [#8006](https://github.com/apache/datafusion/pull/8006) (mustafasrepo) +- Encapsulate `ProjectionMapping` as a struct [#8033](https://github.com/apache/datafusion/pull/8033) (alamb) +- Minor: Fix bugs in docs for `to_timestamp`, `to_timestamp_seconds`, ... [#8040](https://github.com/apache/datafusion/pull/8040) (alamb) +- Improve comments for `PartitionSearchMode` struct [#8047](https://github.com/apache/datafusion/pull/8047) (ozankabak) +- General approach for Array replace [#8050](https://github.com/apache/datafusion/pull/8050) (jayzhan211) +- Minor: Remove the irrelevant note from the Expression API doc [#8053](https://github.com/apache/datafusion/pull/8053) (ongchi) +- Minor: Add more documentation about Partitioning [#8022](https://github.com/apache/datafusion/pull/8022) (alamb) +- Minor: improve documentation for IsNotNull, DISTINCT, etc [#8052](https://github.com/apache/datafusion/pull/8052) (alamb) +- Prepare 33.0.0 Release [#8057](https://github.com/apache/datafusion/pull/8057) (andygrove) +- Minor: improve error message by adding types to message [#8065](https://github.com/apache/datafusion/pull/8065) (alamb) +- Minor: Remove redundant BuiltinScalarFunction::supports_zero_argument() [#8059](https://github.com/apache/datafusion/pull/8059) (2010YOUY01) +- Add example to ci [#8060](https://github.com/apache/datafusion/pull/8060) (smallzhongfeng) +- Update substrait requirement from 0.18.0 to 0.19.0 [#8076](https://github.com/apache/datafusion/pull/8076) (dependabot[bot]) +- Fix incorrect results in COUNT(\*) queries with LIMIT [#8049](https://github.com/apache/datafusion/pull/8049) (msirek) +- feat: Support determining extensions from names like `foo.parquet.snappy` as well as `foo.parquet` [#7972](https://github.com/apache/datafusion/pull/7972) (Weijun-H) +- Use FairSpillPool for TaskContext with spillable config [#8072](https://github.com/apache/datafusion/pull/8072) (viirya) +- Minor: Improve HashJoinStream docstrings [#8070](https://github.com/apache/datafusion/pull/8070) (alamb) +- Fixing broken link [#8085](https://github.com/apache/datafusion/pull/8085) (edmondop) +- fix: DataFusion suggests invalid functions [#8083](https://github.com/apache/datafusion/pull/8083) (jonahgao) +- Replace macro with function for `array_repeat` [#8071](https://github.com/apache/datafusion/pull/8071) (jayzhan211) +- Minor: remove unnecessary projection in `single_distinct_to_group_by` rule [#8061](https://github.com/apache/datafusion/pull/8061) (haohuaijin) +- minor: Remove duplicate version numbers for arrow, object_store, and parquet dependencies [#8095](https://github.com/apache/datafusion/pull/8095) (andygrove) +- fix: add encode/decode to protobuf encoding [#8089](https://github.com/apache/datafusion/pull/8089) (Syleechan) +- feat: Protobuf serde for Json file sink [#8062](https://github.com/apache/datafusion/pull/8062) (Jefffrey) +- Minor: use `Expr::alias` in a few places to make the code more concise [#8097](https://github.com/apache/datafusion/pull/8097) (alamb) +- Minor: Cleanup BuiltinScalarFunction::return_type() [#8088](https://github.com/apache/datafusion/pull/8088) (2010YOUY01) +- Update sqllogictest requirement from 0.17.0 to 0.18.0 [#8102](https://github.com/apache/datafusion/pull/8102) (dependabot[bot]) +- Projection Pushdown in PhysicalPlan [#8073](https://github.com/apache/datafusion/pull/8073) (berkaysynnada) +- Push limit into aggregation for DISTINCT ... LIMIT queries [#8038](https://github.com/apache/datafusion/pull/8038) (msirek) +- Bug-fix in Filter and Limit statistics [#8094](https://github.com/apache/datafusion/pull/8094) (berkaysynnada) +- feat: support target table alias in update statement [#8080](https://github.com/apache/datafusion/pull/8080) (jonahgao) +- Minor: Simlify downcast functions in cast.rs. [#8103](https://github.com/apache/datafusion/pull/8103) (Weijun-H) +- Fix ArrayAgg schema mismatch issue [#8055](https://github.com/apache/datafusion/pull/8055) (jayzhan211) +- Minor: Support `nulls` in `array_replace`, avoid a copy [#8054](https://github.com/apache/datafusion/pull/8054) (alamb) +- Minor: Improve the document format of JoinHashMap [#8090](https://github.com/apache/datafusion/pull/8090) (Asura7969) +- Simplify ProjectionPushdown and make it more general [#8109](https://github.com/apache/datafusion/pull/8109) (alamb) +- Minor: clean up the code regarding clippy [#8122](https://github.com/apache/datafusion/pull/8122) (Weijun-H) +- Support remaining functions in protobuf serialization, add `expr_fn` for `StructFunction` [#8100](https://github.com/apache/datafusion/pull/8100) (JacobOgle) +- Minor: Cleanup BuiltinScalarFunction's phys-expr creation [#8114](https://github.com/apache/datafusion/pull/8114) (2010YOUY01) +- rewrite `array_append/array_prepend` to remove deplicate codes [#8108](https://github.com/apache/datafusion/pull/8108) (Veeupup) +- Implementation of `array_intersect` [#8081](https://github.com/apache/datafusion/pull/8081) (Veeupup) +- Minor: fix ci break [#8136](https://github.com/apache/datafusion/pull/8136) (haohuaijin) +- Improve documentation for calculate_prune_length method in `SymmetricHashJoin` [#8125](https://github.com/apache/datafusion/pull/8125) (Asura7969) +- Minor: remove duplicated `array_replace` tests [#8066](https://github.com/apache/datafusion/pull/8066) (alamb) +- Minor: Fix temporary files created but not deleted during testing [#8115](https://github.com/apache/datafusion/pull/8115) (2010YOUY01) +- chore: remove panics in datafusion-common::scalar by making more operations return `Result` [#7901](https://github.com/apache/datafusion/pull/7901) (junjunjd) +- Fix join order for TPCH Q17 & Q18 by improving FilterExec statistics [#8126](https://github.com/apache/datafusion/pull/8126) (andygrove) +- Fix: Do not try and preserve order when there is no order to preserve in RepartitionExec [#8127](https://github.com/apache/datafusion/pull/8127) (alamb) +- feat: add column statistics into explain [#8112](https://github.com/apache/datafusion/pull/8112) (NGA-TRAN) +- Add subtrait support for `IS NULL` and `IS NOT NULL` [#8093](https://github.com/apache/datafusion/pull/8093) (tgujar) +- Combine `Expr::Wildcard` and `Wxpr::QualifiedWildcard`, add `wildcard()` expr fn [#8105](https://github.com/apache/datafusion/pull/8105) (alamb) +- docs: show creation of DFSchema [#8132](https://github.com/apache/datafusion/pull/8132) (wjones127) +- feat: support UDAF in substrait producer/consumer [#8119](https://github.com/apache/datafusion/pull/8119) (waynexia) +- Improve documentation site to make it easier to find communication on Slack/Discord [#8138](https://github.com/apache/datafusion/pull/8138) (alamb) diff --git a/dev/changelog/34.0.0.md b/dev/changelog/34.0.0.md index c5526f60531c..68e3b24d84a7 100644 --- a/dev/changelog/34.0.0.md +++ b/dev/changelog/34.0.0.md @@ -17,257 +17,257 @@ under the License. --> -## [34.0.0](https://github.com/apache/arrow-datafusion/tree/34.0.0) (2023-12-11) +## [34.0.0](https://github.com/apache/datafusion/tree/34.0.0) (2023-12-11) -[Full Changelog](https://github.com/apache/arrow-datafusion/compare/33.0.0...34.0.0) +[Full Changelog](https://github.com/apache/datafusion/compare/33.0.0...34.0.0) **Breaking changes:** -- Implement `DISTINCT ON` from Postgres [#7981](https://github.com/apache/arrow-datafusion/pull/7981) (gruuya) -- Encapsulate `EquivalenceClass` into a struct [#8034](https://github.com/apache/arrow-datafusion/pull/8034) (alamb) -- Make fields of `ScalarUDF` , `AggregateUDF` and `WindowUDF` non `pub` [#8079](https://github.com/apache/arrow-datafusion/pull/8079) (alamb) -- Implement StreamTable and StreamTableProvider (#7994) [#8021](https://github.com/apache/arrow-datafusion/pull/8021) (tustvold) -- feat: make FixedSizeList scalar also an ArrayRef [#8221](https://github.com/apache/arrow-datafusion/pull/8221) (wjones127) -- Remove FileWriterMode and ListingTableInsertMode (#7994) [#8017](https://github.com/apache/arrow-datafusion/pull/8017) (tustvold) -- Refactor: Unify `Expr::ScalarFunction` and `Expr::ScalarUDF`, introduce unresolved functions by name [#8258](https://github.com/apache/arrow-datafusion/pull/8258) (2010YOUY01) -- Refactor aggregate function handling [#8358](https://github.com/apache/arrow-datafusion/pull/8358) (Weijun-H) -- Move `PartitionSearchMode` into datafusion_physical_plan, rename to `InputOrderMode` [#8364](https://github.com/apache/arrow-datafusion/pull/8364) (alamb) -- Split `EmptyExec` into `PlaceholderRowExec` [#8446](https://github.com/apache/arrow-datafusion/pull/8446) (razeghi71) +- Implement `DISTINCT ON` from Postgres [#7981](https://github.com/apache/datafusion/pull/7981) (gruuya) +- Encapsulate `EquivalenceClass` into a struct [#8034](https://github.com/apache/datafusion/pull/8034) (alamb) +- Make fields of `ScalarUDF` , `AggregateUDF` and `WindowUDF` non `pub` [#8079](https://github.com/apache/datafusion/pull/8079) (alamb) +- Implement StreamTable and StreamTableProvider (#7994) [#8021](https://github.com/apache/datafusion/pull/8021) (tustvold) +- feat: make FixedSizeList scalar also an ArrayRef [#8221](https://github.com/apache/datafusion/pull/8221) (wjones127) +- Remove FileWriterMode and ListingTableInsertMode (#7994) [#8017](https://github.com/apache/datafusion/pull/8017) (tustvold) +- Refactor: Unify `Expr::ScalarFunction` and `Expr::ScalarUDF`, introduce unresolved functions by name [#8258](https://github.com/apache/datafusion/pull/8258) (2010YOUY01) +- Refactor aggregate function handling [#8358](https://github.com/apache/datafusion/pull/8358) (Weijun-H) +- Move `PartitionSearchMode` into datafusion_physical_plan, rename to `InputOrderMode` [#8364](https://github.com/apache/datafusion/pull/8364) (alamb) +- Split `EmptyExec` into `PlaceholderRowExec` [#8446](https://github.com/apache/datafusion/pull/8446) (razeghi71) **Implemented enhancements:** -- feat: show statistics in explain verbose [#8113](https://github.com/apache/arrow-datafusion/pull/8113) (NGA-TRAN) -- feat:implement postgres style 'overlay' string function [#8117](https://github.com/apache/arrow-datafusion/pull/8117) (Syleechan) -- feat: fill missing values with NULLs while inserting [#8146](https://github.com/apache/arrow-datafusion/pull/8146) (jonahgao) -- feat: to_array_of_size for ScalarValue::FixedSizeList [#8225](https://github.com/apache/arrow-datafusion/pull/8225) (wjones127) -- feat:implement calcite style 'levenshtein' string function [#8168](https://github.com/apache/arrow-datafusion/pull/8168) (Syleechan) -- feat: roundtrip FixedSizeList Scalar to protobuf [#8239](https://github.com/apache/arrow-datafusion/pull/8239) (wjones127) -- feat: impl the basic `string_agg` function [#8148](https://github.com/apache/arrow-datafusion/pull/8148) (haohuaijin) -- feat: support simplifying BinaryExpr with arbitrary guarantees in GuaranteeRewriter [#8256](https://github.com/apache/arrow-datafusion/pull/8256) (wjones127) -- feat: support customizing column default values for inserting [#8283](https://github.com/apache/arrow-datafusion/pull/8283) (jonahgao) -- feat:implement sql style 'substr_index' string function [#8272](https://github.com/apache/arrow-datafusion/pull/8272) (Syleechan) -- feat:implement sql style 'find_in_set' string function [#8328](https://github.com/apache/arrow-datafusion/pull/8328) (Syleechan) -- feat: support `LargeList` in `array_empty` [#8321](https://github.com/apache/arrow-datafusion/pull/8321) (Weijun-H) -- feat: support `LargeList` in `make_array` and `array_length` [#8121](https://github.com/apache/arrow-datafusion/pull/8121) (Weijun-H) -- feat: ScalarValue from String [#8411](https://github.com/apache/arrow-datafusion/pull/8411) (QuenKar) -- feat: support `LargeList` for `array_has`, `array_has_all` and `array_has_any` [#8322](https://github.com/apache/arrow-datafusion/pull/8322) (Weijun-H) -- feat: customize column default values for external tables [#8415](https://github.com/apache/arrow-datafusion/pull/8415) (jonahgao) -- feat: Support `array_sort`(`list_sort`) [#8279](https://github.com/apache/arrow-datafusion/pull/8279) (Asura7969) -- feat: support `InterleaveExecNode` in the proto [#8460](https://github.com/apache/arrow-datafusion/pull/8460) (liukun4515) -- feat: improve string statistics display in datafusion-cli `parquet_metadata` function [#8535](https://github.com/apache/arrow-datafusion/pull/8535) (asimsedhain) +- feat: show statistics in explain verbose [#8113](https://github.com/apache/datafusion/pull/8113) (NGA-TRAN) +- feat:implement postgres style 'overlay' string function [#8117](https://github.com/apache/datafusion/pull/8117) (Syleechan) +- feat: fill missing values with NULLs while inserting [#8146](https://github.com/apache/datafusion/pull/8146) (jonahgao) +- feat: to_array_of_size for ScalarValue::FixedSizeList [#8225](https://github.com/apache/datafusion/pull/8225) (wjones127) +- feat:implement calcite style 'levenshtein' string function [#8168](https://github.com/apache/datafusion/pull/8168) (Syleechan) +- feat: roundtrip FixedSizeList Scalar to protobuf [#8239](https://github.com/apache/datafusion/pull/8239) (wjones127) +- feat: impl the basic `string_agg` function [#8148](https://github.com/apache/datafusion/pull/8148) (haohuaijin) +- feat: support simplifying BinaryExpr with arbitrary guarantees in GuaranteeRewriter [#8256](https://github.com/apache/datafusion/pull/8256) (wjones127) +- feat: support customizing column default values for inserting [#8283](https://github.com/apache/datafusion/pull/8283) (jonahgao) +- feat:implement sql style 'substr_index' string function [#8272](https://github.com/apache/datafusion/pull/8272) (Syleechan) +- feat:implement sql style 'find_in_set' string function [#8328](https://github.com/apache/datafusion/pull/8328) (Syleechan) +- feat: support `LargeList` in `array_empty` [#8321](https://github.com/apache/datafusion/pull/8321) (Weijun-H) +- feat: support `LargeList` in `make_array` and `array_length` [#8121](https://github.com/apache/datafusion/pull/8121) (Weijun-H) +- feat: ScalarValue from String [#8411](https://github.com/apache/datafusion/pull/8411) (QuenKar) +- feat: support `LargeList` for `array_has`, `array_has_all` and `array_has_any` [#8322](https://github.com/apache/datafusion/pull/8322) (Weijun-H) +- feat: customize column default values for external tables [#8415](https://github.com/apache/datafusion/pull/8415) (jonahgao) +- feat: Support `array_sort`(`list_sort`) [#8279](https://github.com/apache/datafusion/pull/8279) (Asura7969) +- feat: support `InterleaveExecNode` in the proto [#8460](https://github.com/apache/datafusion/pull/8460) (liukun4515) +- feat: improve string statistics display in datafusion-cli `parquet_metadata` function [#8535](https://github.com/apache/datafusion/pull/8535) (asimsedhain) **Fixed bugs:** -- fix: Timestamp with timezone not considered `join on` [#8150](https://github.com/apache/arrow-datafusion/pull/8150) (ACking-you) -- fix: wrong result of range function [#8313](https://github.com/apache/arrow-datafusion/pull/8313) (smallzhongfeng) -- fix: make `ntile` work in some corner cases [#8371](https://github.com/apache/arrow-datafusion/pull/8371) (haohuaijin) -- fix: Changed labeler.yml to latest format [#8431](https://github.com/apache/arrow-datafusion/pull/8431) (viirya) -- fix: Literal in `ORDER BY` window definition should not be an ordinal referring to relation column [#8419](https://github.com/apache/arrow-datafusion/pull/8419) (viirya) -- fix: ORDER BY window definition should work on null literal [#8444](https://github.com/apache/arrow-datafusion/pull/8444) (viirya) -- fix: RANGE frame for corner cases with empty ORDER BY clause should be treated as constant sort [#8445](https://github.com/apache/arrow-datafusion/pull/8445) (viirya) -- fix: don't unifies projection if expr is non-trival [#8454](https://github.com/apache/arrow-datafusion/pull/8454) (haohuaijin) -- fix: support uppercase when parsing `Interval` [#8478](https://github.com/apache/arrow-datafusion/pull/8478) (QuenKar) -- fix: incorrect set preserve_partitioning in SortExec [#8485](https://github.com/apache/arrow-datafusion/pull/8485) (haohuaijin) -- fix: Pull stats in `IdentVisitor`/`GraphvizVisitor` only when requested [#8514](https://github.com/apache/arrow-datafusion/pull/8514) (vrongmeal) -- fix: volatile expressions should not be target of common subexpt elimination [#8520](https://github.com/apache/arrow-datafusion/pull/8520) (viirya) +- fix: Timestamp with timezone not considered `join on` [#8150](https://github.com/apache/datafusion/pull/8150) (ACking-you) +- fix: wrong result of range function [#8313](https://github.com/apache/datafusion/pull/8313) (smallzhongfeng) +- fix: make `ntile` work in some corner cases [#8371](https://github.com/apache/datafusion/pull/8371) (haohuaijin) +- fix: Changed labeler.yml to latest format [#8431](https://github.com/apache/datafusion/pull/8431) (viirya) +- fix: Literal in `ORDER BY` window definition should not be an ordinal referring to relation column [#8419](https://github.com/apache/datafusion/pull/8419) (viirya) +- fix: ORDER BY window definition should work on null literal [#8444](https://github.com/apache/datafusion/pull/8444) (viirya) +- fix: RANGE frame for corner cases with empty ORDER BY clause should be treated as constant sort [#8445](https://github.com/apache/datafusion/pull/8445) (viirya) +- fix: don't unifies projection if expr is non-trival [#8454](https://github.com/apache/datafusion/pull/8454) (haohuaijin) +- fix: support uppercase when parsing `Interval` [#8478](https://github.com/apache/datafusion/pull/8478) (QuenKar) +- fix: incorrect set preserve_partitioning in SortExec [#8485](https://github.com/apache/datafusion/pull/8485) (haohuaijin) +- fix: Pull stats in `IdentVisitor`/`GraphvizVisitor` only when requested [#8514](https://github.com/apache/datafusion/pull/8514) (vrongmeal) +- fix: volatile expressions should not be target of common subexpt elimination [#8520](https://github.com/apache/datafusion/pull/8520) (viirya) **Documentation updates:** -- Library Guide: Add Using the DataFrame API [#8319](https://github.com/apache/arrow-datafusion/pull/8319) (Veeupup) -- Minor: Add installation link to README.md [#8389](https://github.com/apache/arrow-datafusion/pull/8389) (Weijun-H) -- Prepare version 34.0.0 [#8508](https://github.com/apache/arrow-datafusion/pull/8508) (andygrove) +- Library Guide: Add Using the DataFrame API [#8319](https://github.com/apache/datafusion/pull/8319) (Veeupup) +- Minor: Add installation link to README.md [#8389](https://github.com/apache/datafusion/pull/8389) (Weijun-H) +- Prepare version 34.0.0 [#8508](https://github.com/apache/datafusion/pull/8508) (andygrove) **Merged pull requests:** -- Fix typo in partitioning.rs [#8134](https://github.com/apache/arrow-datafusion/pull/8134) (lewiszlw) -- Implement `DISTINCT ON` from Postgres [#7981](https://github.com/apache/arrow-datafusion/pull/7981) (gruuya) -- Prepare 33.0.0-rc2 [#8144](https://github.com/apache/arrow-datafusion/pull/8144) (andygrove) -- Avoid concat in `array_append` [#8137](https://github.com/apache/arrow-datafusion/pull/8137) (jayzhan211) -- Replace macro with function for array_remove [#8106](https://github.com/apache/arrow-datafusion/pull/8106) (jayzhan211) -- Implement `array_union` [#7897](https://github.com/apache/arrow-datafusion/pull/7897) (edmondop) -- Minor: Document `ExecutionPlan::equivalence_properties` more thoroughly [#8128](https://github.com/apache/arrow-datafusion/pull/8128) (alamb) -- feat: show statistics in explain verbose [#8113](https://github.com/apache/arrow-datafusion/pull/8113) (NGA-TRAN) -- feat:implement postgres style 'overlay' string function [#8117](https://github.com/apache/arrow-datafusion/pull/8117) (Syleechan) -- Minor: Encapsulate `LeftJoinData` into a struct (rather than anonymous enum) and add comments [#8153](https://github.com/apache/arrow-datafusion/pull/8153) (alamb) -- Update sqllogictest requirement from 0.18.0 to 0.19.0 [#8163](https://github.com/apache/arrow-datafusion/pull/8163) (dependabot[bot]) -- feat: fill missing values with NULLs while inserting [#8146](https://github.com/apache/arrow-datafusion/pull/8146) (jonahgao) -- Introduce return type for aggregate sum [#8141](https://github.com/apache/arrow-datafusion/pull/8141) (jayzhan211) -- implement range/generate_series func [#8140](https://github.com/apache/arrow-datafusion/pull/8140) (Veeupup) -- Encapsulate `EquivalenceClass` into a struct [#8034](https://github.com/apache/arrow-datafusion/pull/8034) (alamb) -- Revert "Minor: remove unnecessary projection in `single_distinct_to_g… [#8176](https://github.com/apache/arrow-datafusion/pull/8176) (NGA-TRAN) -- Preserve all of the valid orderings during merging. [#8169](https://github.com/apache/arrow-datafusion/pull/8169) (mustafasrepo) -- Make fields of `ScalarUDF` , `AggregateUDF` and `WindowUDF` non `pub` [#8079](https://github.com/apache/arrow-datafusion/pull/8079) (alamb) -- Fix logical conflicts [#8187](https://github.com/apache/arrow-datafusion/pull/8187) (tustvold) -- Minor: Update JoinHashMap comment example to make it clearer [#8154](https://github.com/apache/arrow-datafusion/pull/8154) (alamb) -- Implement StreamTable and StreamTableProvider (#7994) [#8021](https://github.com/apache/arrow-datafusion/pull/8021) (tustvold) -- [MINOR]: Remove unused Results [#8189](https://github.com/apache/arrow-datafusion/pull/8189) (mustafasrepo) -- Minor: clean up the code based on clippy [#8179](https://github.com/apache/arrow-datafusion/pull/8179) (Weijun-H) -- Minor: simplify filter statistics code [#8174](https://github.com/apache/arrow-datafusion/pull/8174) (alamb) -- Replace macro with function for `array_position` and `array_positions` [#8170](https://github.com/apache/arrow-datafusion/pull/8170) (jayzhan211) -- Add Library Guide for User Defined Functions: Window/Aggregate [#8171](https://github.com/apache/arrow-datafusion/pull/8171) (Veeupup) -- Add more stream docs [#8192](https://github.com/apache/arrow-datafusion/pull/8192) (tustvold) -- Implement func `array_pop_front` [#8142](https://github.com/apache/arrow-datafusion/pull/8142) (Veeupup) -- Moving arrow_files SQL tests to sqllogictest [#8217](https://github.com/apache/arrow-datafusion/pull/8217) (edmondop) -- fix regression in the use of name in ProjectionPushdown [#8219](https://github.com/apache/arrow-datafusion/pull/8219) (alamb) -- [MINOR]: Fix column indices in the planning tests [#8191](https://github.com/apache/arrow-datafusion/pull/8191) (mustafasrepo) -- Remove unnecessary reassignment [#8232](https://github.com/apache/arrow-datafusion/pull/8232) (qrilka) -- Update itertools requirement from 0.11 to 0.12 [#8233](https://github.com/apache/arrow-datafusion/pull/8233) (crepererum) -- Port tests in subqueries.rs to sqllogictest [#8231](https://github.com/apache/arrow-datafusion/pull/8231) (PsiACE) -- feat: make FixedSizeList scalar also an ArrayRef [#8221](https://github.com/apache/arrow-datafusion/pull/8221) (wjones127) -- Add versions to datafusion dependencies [#8238](https://github.com/apache/arrow-datafusion/pull/8238) (andygrove) -- feat: to_array_of_size for ScalarValue::FixedSizeList [#8225](https://github.com/apache/arrow-datafusion/pull/8225) (wjones127) -- feat:implement calcite style 'levenshtein' string function [#8168](https://github.com/apache/arrow-datafusion/pull/8168) (Syleechan) -- feat: roundtrip FixedSizeList Scalar to protobuf [#8239](https://github.com/apache/arrow-datafusion/pull/8239) (wjones127) -- Update prost-build requirement from =0.12.1 to =0.12.2 [#8244](https://github.com/apache/arrow-datafusion/pull/8244) (dependabot[bot]) -- Minor: Port tests in `displayable.rs` to sqllogictest [#8246](https://github.com/apache/arrow-datafusion/pull/8246) (Weijun-H) -- Minor: add `with_estimated_selectivity ` to Precision [#8177](https://github.com/apache/arrow-datafusion/pull/8177) (alamb) -- fix: Timestamp with timezone not considered `join on` [#8150](https://github.com/apache/arrow-datafusion/pull/8150) (ACking-you) -- Replace macro in array_array to remove duplicate codes [#8252](https://github.com/apache/arrow-datafusion/pull/8252) (Veeupup) -- Port tests in projection.rs to sqllogictest [#8240](https://github.com/apache/arrow-datafusion/pull/8240) (PsiACE) -- Introduce `array_except` function [#8135](https://github.com/apache/arrow-datafusion/pull/8135) (jayzhan211) -- Port tests in `describe.rs` to sqllogictest [#8242](https://github.com/apache/arrow-datafusion/pull/8242) (Asura7969) -- Remove FileWriterMode and ListingTableInsertMode (#7994) [#8017](https://github.com/apache/arrow-datafusion/pull/8017) (tustvold) -- Minor: clean up the code based on Clippy [#8257](https://github.com/apache/arrow-datafusion/pull/8257) (Weijun-H) -- Update arrow 49.0.0 and object_store 0.8.0 [#8029](https://github.com/apache/arrow-datafusion/pull/8029) (tustvold) -- feat: impl the basic `string_agg` function [#8148](https://github.com/apache/arrow-datafusion/pull/8148) (haohuaijin) -- Minor: Make schema of grouping set columns nullable [#8248](https://github.com/apache/arrow-datafusion/pull/8248) (markusa380) -- feat: support simplifying BinaryExpr with arbitrary guarantees in GuaranteeRewriter [#8256](https://github.com/apache/arrow-datafusion/pull/8256) (wjones127) -- Making stream joins extensible: A new Trait implementation for SHJ [#8234](https://github.com/apache/arrow-datafusion/pull/8234) (metesynnada) -- Don't Canonicalize Filesystem Paths in ListingTableUrl / support new external tables for files that do not (yet) exist [#8014](https://github.com/apache/arrow-datafusion/pull/8014) (tustvold) -- Minor: Add sql level test for inserting into non-existent directory [#8278](https://github.com/apache/arrow-datafusion/pull/8278) (alamb) -- Replace `array_has/array_has_all/array_has_any` macro to remove duplicate code [#8263](https://github.com/apache/arrow-datafusion/pull/8263) (Veeupup) -- Fix bug in field level metadata matching code [#8286](https://github.com/apache/arrow-datafusion/pull/8286) (alamb) -- Refactor Interval Arithmetic Updates [#8276](https://github.com/apache/arrow-datafusion/pull/8276) (berkaysynnada) -- [MINOR]: Remove unecessary orderings from the final plan [#8289](https://github.com/apache/arrow-datafusion/pull/8289) (mustafasrepo) -- consistent logical & physical `NTILE` return types [#8270](https://github.com/apache/arrow-datafusion/pull/8270) (korowa) -- make `array_union`/`array_except`/`array_intersect` handle empty/null arrays rightly [#8269](https://github.com/apache/arrow-datafusion/pull/8269) (Veeupup) -- improve file path validation when reading parquet [#8267](https://github.com/apache/arrow-datafusion/pull/8267) (Weijun-H) -- [Benchmarks] Make `partitions` default to number of cores instead of 2 [#8292](https://github.com/apache/arrow-datafusion/pull/8292) (andygrove) -- Update prost-build requirement from =0.12.2 to =0.12.3 [#8298](https://github.com/apache/arrow-datafusion/pull/8298) (dependabot[bot]) -- Fix Display for List [#8261](https://github.com/apache/arrow-datafusion/pull/8261) (jayzhan211) -- feat: support customizing column default values for inserting [#8283](https://github.com/apache/arrow-datafusion/pull/8283) (jonahgao) -- support `LargeList` for `arrow_cast`, support `ScalarValue::LargeList` [#8290](https://github.com/apache/arrow-datafusion/pull/8290) (Weijun-H) -- Minor: remove useless clone based on Clippy [#8300](https://github.com/apache/arrow-datafusion/pull/8300) (Weijun-H) -- Calculate ordering equivalence for expressions (rather than just columns) [#8281](https://github.com/apache/arrow-datafusion/pull/8281) (mustafasrepo) -- Fix sqllogictests link in contributor-guide/index.md [#8314](https://github.com/apache/arrow-datafusion/pull/8314) (qrilka) -- Refactor: Unify `Expr::ScalarFunction` and `Expr::ScalarUDF`, introduce unresolved functions by name [#8258](https://github.com/apache/arrow-datafusion/pull/8258) (2010YOUY01) -- Support no distinct aggregate sum/min/max in `single_distinct_to_group_by` rule [#8266](https://github.com/apache/arrow-datafusion/pull/8266) (haohuaijin) -- feat:implement sql style 'substr_index' string function [#8272](https://github.com/apache/arrow-datafusion/pull/8272) (Syleechan) -- Fixing issues with for timestamp literals [#8193](https://github.com/apache/arrow-datafusion/pull/8193) (comphead) -- Projection Pushdown over StreamingTableExec [#8299](https://github.com/apache/arrow-datafusion/pull/8299) (berkaysynnada) -- minor: fix documentation [#8323](https://github.com/apache/arrow-datafusion/pull/8323) (comphead) -- fix: wrong result of range function [#8313](https://github.com/apache/arrow-datafusion/pull/8313) (smallzhongfeng) -- Minor: rename parquet.rs to parquet/mod.rs [#8301](https://github.com/apache/arrow-datafusion/pull/8301) (alamb) -- refactor: output ordering [#8304](https://github.com/apache/arrow-datafusion/pull/8304) (QuenKar) -- Update substrait requirement from 0.19.0 to 0.20.0 [#8339](https://github.com/apache/arrow-datafusion/pull/8339) (dependabot[bot]) -- Port tests in `aggregates.rs` to sqllogictest [#8316](https://github.com/apache/arrow-datafusion/pull/8316) (edmondop) -- Library Guide: Add Using the DataFrame API [#8319](https://github.com/apache/arrow-datafusion/pull/8319) (Veeupup) -- Port tests in limit.rs to sqllogictest [#8315](https://github.com/apache/arrow-datafusion/pull/8315) (zhangxffff) -- move array function unit_tests to sqllogictest [#8332](https://github.com/apache/arrow-datafusion/pull/8332) (Veeupup) -- NTH_VALUE reverse support [#8327](https://github.com/apache/arrow-datafusion/pull/8327) (mustafasrepo) -- Optimize Projections during Logical Plan [#8340](https://github.com/apache/arrow-datafusion/pull/8340) (mustafasrepo) -- [MINOR]: Move merge projections tests to under optimize projections [#8352](https://github.com/apache/arrow-datafusion/pull/8352) (mustafasrepo) -- Add `quote` and `escape` attributes to create csv external table [#8351](https://github.com/apache/arrow-datafusion/pull/8351) (Asura7969) -- Minor: Add DataFrame test [#8341](https://github.com/apache/arrow-datafusion/pull/8341) (alamb) -- Minor: clean up the code based on Clippy [#8359](https://github.com/apache/arrow-datafusion/pull/8359) (Weijun-H) -- Minor: Make it easier to work with Expr::ScalarFunction [#8350](https://github.com/apache/arrow-datafusion/pull/8350) (alamb) -- Minor: Move some datafusion-optimizer::utils down to datafusion-expr::utils [#8354](https://github.com/apache/arrow-datafusion/pull/8354) (Jesse-Bakker) -- Minor: Make `BuiltInScalarFunction::alias` a method [#8349](https://github.com/apache/arrow-datafusion/pull/8349) (alamb) -- Extract parquet statistics to its own module, add tests [#8294](https://github.com/apache/arrow-datafusion/pull/8294) (alamb) -- feat:implement sql style 'find_in_set' string function [#8328](https://github.com/apache/arrow-datafusion/pull/8328) (Syleechan) -- Support LargeUtf8 to Temporal Coercion [#8357](https://github.com/apache/arrow-datafusion/pull/8357) (jayzhan211) -- Refactor aggregate function handling [#8358](https://github.com/apache/arrow-datafusion/pull/8358) (Weijun-H) -- Implement Aliases for ScalarUDF [#8360](https://github.com/apache/arrow-datafusion/pull/8360) (Veeupup) -- Minor: Remove unnecessary name field in `ScalarFunctionDefintion` [#8365](https://github.com/apache/arrow-datafusion/pull/8365) (alamb) -- feat: support `LargeList` in `array_empty` [#8321](https://github.com/apache/arrow-datafusion/pull/8321) (Weijun-H) -- Double type argument for to_timestamp function [#8159](https://github.com/apache/arrow-datafusion/pull/8159) (spaydar) -- Support User Defined Table Function [#8306](https://github.com/apache/arrow-datafusion/pull/8306) (Veeupup) -- Document timestamp input limits [#8369](https://github.com/apache/arrow-datafusion/pull/8369) (comphead) -- fix: make `ntile` work in some corner cases [#8371](https://github.com/apache/arrow-datafusion/pull/8371) (haohuaijin) -- Minor: Refactor array_union function to use a generic union_arrays function [#8381](https://github.com/apache/arrow-datafusion/pull/8381) (Weijun-H) -- Minor: Refactor function argument handling in `ScalarFunctionDefinition` [#8387](https://github.com/apache/arrow-datafusion/pull/8387) (Weijun-H) -- Materialize dictionaries in group keys [#8291](https://github.com/apache/arrow-datafusion/pull/8291) (qrilka) -- Rewrite `array_ndims` to fix List(Null) handling [#8320](https://github.com/apache/arrow-datafusion/pull/8320) (jayzhan211) -- Docs: Improve the documentation on `ScalarValue` [#8378](https://github.com/apache/arrow-datafusion/pull/8378) (alamb) -- Avoid concat for `array_replace` [#8337](https://github.com/apache/arrow-datafusion/pull/8337) (jayzhan211) -- add a summary table to benchmark compare output [#8399](https://github.com/apache/arrow-datafusion/pull/8399) (razeghi71) -- Refactors on TreeNode Implementations [#8395](https://github.com/apache/arrow-datafusion/pull/8395) (berkaysynnada) -- feat: support `LargeList` in `make_array` and `array_length` [#8121](https://github.com/apache/arrow-datafusion/pull/8121) (Weijun-H) -- remove `unalias` TableScan filters when create Physical Filter [#8404](https://github.com/apache/arrow-datafusion/pull/8404) (jackwener) -- Update custom-table-providers.md [#8409](https://github.com/apache/arrow-datafusion/pull/8409) (nickpoorman) -- fix transforming `LogicalPlan::Explain` use `TreeNode::transform` fails [#8400](https://github.com/apache/arrow-datafusion/pull/8400) (haohuaijin) -- Docs: Fix `array_except` documentation example error [#8407](https://github.com/apache/arrow-datafusion/pull/8407) (Asura7969) -- Support named query parameters [#8384](https://github.com/apache/arrow-datafusion/pull/8384) (Asura7969) -- Minor: Add installation link to README.md [#8389](https://github.com/apache/arrow-datafusion/pull/8389) (Weijun-H) -- Update code comment for the cases of regularized RANGE frame and add tests for ORDER BY cases with RANGE frame [#8410](https://github.com/apache/arrow-datafusion/pull/8410) (viirya) -- Minor: Add example with parameters to LogicalPlan [#8418](https://github.com/apache/arrow-datafusion/pull/8418) (alamb) -- Minor: Improve `PruningPredicate` documentation [#8394](https://github.com/apache/arrow-datafusion/pull/8394) (alamb) -- feat: ScalarValue from String [#8411](https://github.com/apache/arrow-datafusion/pull/8411) (QuenKar) -- Bump actions/labeler from 4.3.0 to 5.0.0 [#8422](https://github.com/apache/arrow-datafusion/pull/8422) (dependabot[bot]) -- Update sqlparser requirement from 0.39.0 to 0.40.0 [#8338](https://github.com/apache/arrow-datafusion/pull/8338) (dependabot[bot]) -- feat: support `LargeList` for `array_has`, `array_has_all` and `array_has_any` [#8322](https://github.com/apache/arrow-datafusion/pull/8322) (Weijun-H) -- Union `schema` can't be a subset of the child schema [#8408](https://github.com/apache/arrow-datafusion/pull/8408) (jackwener) -- Move `PartitionSearchMode` into datafusion_physical_plan, rename to `InputOrderMode` [#8364](https://github.com/apache/arrow-datafusion/pull/8364) (alamb) -- Make filter selectivity for statistics configurable [#8243](https://github.com/apache/arrow-datafusion/pull/8243) (edmondop) -- fix: Changed labeler.yml to latest format [#8431](https://github.com/apache/arrow-datafusion/pull/8431) (viirya) -- Minor: Use `ScalarValue::from` impl for strings [#8429](https://github.com/apache/arrow-datafusion/pull/8429) (alamb) -- Support crossjoin in substrait. [#8427](https://github.com/apache/arrow-datafusion/pull/8427) (my-vegetable-has-exploded) -- Fix ambiguous reference when aliasing in combination with `ORDER BY` [#8425](https://github.com/apache/arrow-datafusion/pull/8425) (Asura7969) -- Minor: convert marcro `list-slice` and `slice` to function [#8424](https://github.com/apache/arrow-datafusion/pull/8424) (Weijun-H) -- Remove macro in iter_to_array for List [#8414](https://github.com/apache/arrow-datafusion/pull/8414) (jayzhan211) -- fix: Literal in `ORDER BY` window definition should not be an ordinal referring to relation column [#8419](https://github.com/apache/arrow-datafusion/pull/8419) (viirya) -- feat: customize column default values for external tables [#8415](https://github.com/apache/arrow-datafusion/pull/8415) (jonahgao) -- feat: Support `array_sort`(`list_sort`) [#8279](https://github.com/apache/arrow-datafusion/pull/8279) (Asura7969) -- Bugfix: Remove df-cli specific SQL statment options before executing with DataFusion [#8426](https://github.com/apache/arrow-datafusion/pull/8426) (devinjdangelo) -- Detect when filters on unique constraints make subqueries scalar [#8312](https://github.com/apache/arrow-datafusion/pull/8312) (Jesse-Bakker) -- Add alias check to optimize projections merge [#8438](https://github.com/apache/arrow-datafusion/pull/8438) (mustafasrepo) -- Fix PartialOrd for ScalarValue::List/FixSizeList/LargeList [#8253](https://github.com/apache/arrow-datafusion/pull/8253) (jayzhan211) -- Support parquet_metadata for datafusion-cli [#8413](https://github.com/apache/arrow-datafusion/pull/8413) (Veeupup) -- Fix bug in optimizing a nested count [#8459](https://github.com/apache/arrow-datafusion/pull/8459) (Dandandan) -- Bump actions/setup-python from 4 to 5 [#8449](https://github.com/apache/arrow-datafusion/pull/8449) (dependabot[bot]) -- fix: ORDER BY window definition should work on null literal [#8444](https://github.com/apache/arrow-datafusion/pull/8444) (viirya) -- flx clippy warnings [#8455](https://github.com/apache/arrow-datafusion/pull/8455) (waynexia) -- fix: RANGE frame for corner cases with empty ORDER BY clause should be treated as constant sort [#8445](https://github.com/apache/arrow-datafusion/pull/8445) (viirya) -- Preserve `dict_id` on `Field` during serde roundtrip [#8457](https://github.com/apache/arrow-datafusion/pull/8457) (avantgardnerio) -- feat: support `InterleaveExecNode` in the proto [#8460](https://github.com/apache/arrow-datafusion/pull/8460) (liukun4515) -- [BUG FIX]: Proper Empty Batch handling in window execution [#8466](https://github.com/apache/arrow-datafusion/pull/8466) (mustafasrepo) -- Minor: update `cast` [#8458](https://github.com/apache/arrow-datafusion/pull/8458) (Weijun-H) -- fix: don't unifies projection if expr is non-trival [#8454](https://github.com/apache/arrow-datafusion/pull/8454) (haohuaijin) -- Minor: Add new bloom filter predicate tests [#8433](https://github.com/apache/arrow-datafusion/pull/8433) (alamb) -- Add PRIMARY KEY Aggregate support to dataframe API [#8356](https://github.com/apache/arrow-datafusion/pull/8356) (mustafasrepo) -- Minor: refactor `data_trunc` to reduce duplicated code [#8430](https://github.com/apache/arrow-datafusion/pull/8430) (Weijun-H) -- Support array_distinct function. [#8268](https://github.com/apache/arrow-datafusion/pull/8268) (my-vegetable-has-exploded) -- Add primary key support to stream table [#8467](https://github.com/apache/arrow-datafusion/pull/8467) (mustafasrepo) -- Add `evaluate_demo` and `range_analysis_demo` to Expr examples [#8377](https://github.com/apache/arrow-datafusion/pull/8377) (alamb) -- Minor: fix function name typo [#8473](https://github.com/apache/arrow-datafusion/pull/8473) (Weijun-H) -- Minor: Fix comment typo in table.rs: s/indentical/identical/ [#8469](https://github.com/apache/arrow-datafusion/pull/8469) (KeunwooLee-at) -- Remove `define_array_slice` and reuse `array_slice` for `array_pop_front/back` [#8401](https://github.com/apache/arrow-datafusion/pull/8401) (jayzhan211) -- Minor: refactor `trim` to clean up duplicated code [#8434](https://github.com/apache/arrow-datafusion/pull/8434) (Weijun-H) -- Split `EmptyExec` into `PlaceholderRowExec` [#8446](https://github.com/apache/arrow-datafusion/pull/8446) (razeghi71) -- Enable non-uniform field type for structs created in DataFusion [#8463](https://github.com/apache/arrow-datafusion/pull/8463) (dlovell) -- Minor: Add multi ordering test for array agg order [#8439](https://github.com/apache/arrow-datafusion/pull/8439) (jayzhan211) -- Sort filenames when reading parquet to ensure consistent schema [#6629](https://github.com/apache/arrow-datafusion/pull/6629) (thomas-k-cameron) -- Minor: Improve comments in EnforceDistribution tests [#8474](https://github.com/apache/arrow-datafusion/pull/8474) (alamb) -- fix: support uppercase when parsing `Interval` [#8478](https://github.com/apache/arrow-datafusion/pull/8478) (QuenKar) -- Better Equivalence (ordering and exact equivalence) Propagation through ProjectionExec [#8484](https://github.com/apache/arrow-datafusion/pull/8484) (mustafasrepo) -- Add `today` alias for `current_date` [#8423](https://github.com/apache/arrow-datafusion/pull/8423) (smallzhongfeng) -- Minor: remove useless clone in `array_expression` [#8495](https://github.com/apache/arrow-datafusion/pull/8495) (Weijun-H) -- fix: incorrect set preserve_partitioning in SortExec [#8485](https://github.com/apache/arrow-datafusion/pull/8485) (haohuaijin) -- Explicitly mark parquet for tests in datafusion-common [#8497](https://github.com/apache/arrow-datafusion/pull/8497) (Dennis40816) -- Minor/Doc: Clarify DataFrame::write_table Documentation [#8519](https://github.com/apache/arrow-datafusion/pull/8519) (devinjdangelo) -- fix: Pull stats in `IdentVisitor`/`GraphvizVisitor` only when requested [#8514](https://github.com/apache/arrow-datafusion/pull/8514) (vrongmeal) -- Change display of RepartitionExec from SortPreservingRepartitionExec to RepartitionExec preserve_order=true [#8521](https://github.com/apache/arrow-datafusion/pull/8521) (JacobOgle) -- Fix `DataFrame::cache` errors with `Plan("Mismatch between schema and batches")` [#8510](https://github.com/apache/arrow-datafusion/pull/8510) (Asura7969) -- Minor: update pbjson_dependency [#8470](https://github.com/apache/arrow-datafusion/pull/8470) (alamb) -- Minor: Update prost-derive dependency [#8471](https://github.com/apache/arrow-datafusion/pull/8471) (alamb) -- Minor/Doc: Add DataFrame::write_table to DataFrame user guide [#8527](https://github.com/apache/arrow-datafusion/pull/8527) (devinjdangelo) -- Minor: Add repartition_file.slt end to end test for repartitioning files, and supporting tweaks [#8505](https://github.com/apache/arrow-datafusion/pull/8505) (alamb) -- Prepare version 34.0.0 [#8508](https://github.com/apache/arrow-datafusion/pull/8508) (andygrove) -- refactor: use ExprBuilder to consume substrait expr and use macro to generate error [#8515](https://github.com/apache/arrow-datafusion/pull/8515) (waynexia) -- [MINOR]: Make some slt tests deterministic [#8525](https://github.com/apache/arrow-datafusion/pull/8525) (mustafasrepo) -- fix: volatile expressions should not be target of common subexpt elimination [#8520](https://github.com/apache/arrow-datafusion/pull/8520) (viirya) -- Minor: Add LakeSoul to the list of Known Users [#8536](https://github.com/apache/arrow-datafusion/pull/8536) (xuchen-plus) -- Fix regression with Incorrect results when reading parquet files with different schemas and statistics [#8533](https://github.com/apache/arrow-datafusion/pull/8533) (alamb) -- feat: improve string statistics display in datafusion-cli `parquet_metadata` function [#8535](https://github.com/apache/arrow-datafusion/pull/8535) (asimsedhain) -- Defer file creation to write [#8539](https://github.com/apache/arrow-datafusion/pull/8539) (tustvold) -- Minor: Improve error handling in sqllogictest runner [#8544](https://github.com/apache/arrow-datafusion/pull/8544) (alamb) +- Fix typo in partitioning.rs [#8134](https://github.com/apache/datafusion/pull/8134) (lewiszlw) +- Implement `DISTINCT ON` from Postgres [#7981](https://github.com/apache/datafusion/pull/7981) (gruuya) +- Prepare 33.0.0-rc2 [#8144](https://github.com/apache/datafusion/pull/8144) (andygrove) +- Avoid concat in `array_append` [#8137](https://github.com/apache/datafusion/pull/8137) (jayzhan211) +- Replace macro with function for array_remove [#8106](https://github.com/apache/datafusion/pull/8106) (jayzhan211) +- Implement `array_union` [#7897](https://github.com/apache/datafusion/pull/7897) (edmondop) +- Minor: Document `ExecutionPlan::equivalence_properties` more thoroughly [#8128](https://github.com/apache/datafusion/pull/8128) (alamb) +- feat: show statistics in explain verbose [#8113](https://github.com/apache/datafusion/pull/8113) (NGA-TRAN) +- feat:implement postgres style 'overlay' string function [#8117](https://github.com/apache/datafusion/pull/8117) (Syleechan) +- Minor: Encapsulate `LeftJoinData` into a struct (rather than anonymous enum) and add comments [#8153](https://github.com/apache/datafusion/pull/8153) (alamb) +- Update sqllogictest requirement from 0.18.0 to 0.19.0 [#8163](https://github.com/apache/datafusion/pull/8163) (dependabot[bot]) +- feat: fill missing values with NULLs while inserting [#8146](https://github.com/apache/datafusion/pull/8146) (jonahgao) +- Introduce return type for aggregate sum [#8141](https://github.com/apache/datafusion/pull/8141) (jayzhan211) +- implement range/generate_series func [#8140](https://github.com/apache/datafusion/pull/8140) (Veeupup) +- Encapsulate `EquivalenceClass` into a struct [#8034](https://github.com/apache/datafusion/pull/8034) (alamb) +- Revert "Minor: remove unnecessary projection in `single_distinct_to_g… [#8176](https://github.com/apache/datafusion/pull/8176) (NGA-TRAN) +- Preserve all of the valid orderings during merging. [#8169](https://github.com/apache/datafusion/pull/8169) (mustafasrepo) +- Make fields of `ScalarUDF` , `AggregateUDF` and `WindowUDF` non `pub` [#8079](https://github.com/apache/datafusion/pull/8079) (alamb) +- Fix logical conflicts [#8187](https://github.com/apache/datafusion/pull/8187) (tustvold) +- Minor: Update JoinHashMap comment example to make it clearer [#8154](https://github.com/apache/datafusion/pull/8154) (alamb) +- Implement StreamTable and StreamTableProvider (#7994) [#8021](https://github.com/apache/datafusion/pull/8021) (tustvold) +- [MINOR]: Remove unused Results [#8189](https://github.com/apache/datafusion/pull/8189) (mustafasrepo) +- Minor: clean up the code based on clippy [#8179](https://github.com/apache/datafusion/pull/8179) (Weijun-H) +- Minor: simplify filter statistics code [#8174](https://github.com/apache/datafusion/pull/8174) (alamb) +- Replace macro with function for `array_position` and `array_positions` [#8170](https://github.com/apache/datafusion/pull/8170) (jayzhan211) +- Add Library Guide for User Defined Functions: Window/Aggregate [#8171](https://github.com/apache/datafusion/pull/8171) (Veeupup) +- Add more stream docs [#8192](https://github.com/apache/datafusion/pull/8192) (tustvold) +- Implement func `array_pop_front` [#8142](https://github.com/apache/datafusion/pull/8142) (Veeupup) +- Moving arrow_files SQL tests to sqllogictest [#8217](https://github.com/apache/datafusion/pull/8217) (edmondop) +- fix regression in the use of name in ProjectionPushdown [#8219](https://github.com/apache/datafusion/pull/8219) (alamb) +- [MINOR]: Fix column indices in the planning tests [#8191](https://github.com/apache/datafusion/pull/8191) (mustafasrepo) +- Remove unnecessary reassignment [#8232](https://github.com/apache/datafusion/pull/8232) (qrilka) +- Update itertools requirement from 0.11 to 0.12 [#8233](https://github.com/apache/datafusion/pull/8233) (crepererum) +- Port tests in subqueries.rs to sqllogictest [#8231](https://github.com/apache/datafusion/pull/8231) (PsiACE) +- feat: make FixedSizeList scalar also an ArrayRef [#8221](https://github.com/apache/datafusion/pull/8221) (wjones127) +- Add versions to datafusion dependencies [#8238](https://github.com/apache/datafusion/pull/8238) (andygrove) +- feat: to_array_of_size for ScalarValue::FixedSizeList [#8225](https://github.com/apache/datafusion/pull/8225) (wjones127) +- feat:implement calcite style 'levenshtein' string function [#8168](https://github.com/apache/datafusion/pull/8168) (Syleechan) +- feat: roundtrip FixedSizeList Scalar to protobuf [#8239](https://github.com/apache/datafusion/pull/8239) (wjones127) +- Update prost-build requirement from =0.12.1 to =0.12.2 [#8244](https://github.com/apache/datafusion/pull/8244) (dependabot[bot]) +- Minor: Port tests in `displayable.rs` to sqllogictest [#8246](https://github.com/apache/datafusion/pull/8246) (Weijun-H) +- Minor: add `with_estimated_selectivity ` to Precision [#8177](https://github.com/apache/datafusion/pull/8177) (alamb) +- fix: Timestamp with timezone not considered `join on` [#8150](https://github.com/apache/datafusion/pull/8150) (ACking-you) +- Replace macro in array_array to remove duplicate codes [#8252](https://github.com/apache/datafusion/pull/8252) (Veeupup) +- Port tests in projection.rs to sqllogictest [#8240](https://github.com/apache/datafusion/pull/8240) (PsiACE) +- Introduce `array_except` function [#8135](https://github.com/apache/datafusion/pull/8135) (jayzhan211) +- Port tests in `describe.rs` to sqllogictest [#8242](https://github.com/apache/datafusion/pull/8242) (Asura7969) +- Remove FileWriterMode and ListingTableInsertMode (#7994) [#8017](https://github.com/apache/datafusion/pull/8017) (tustvold) +- Minor: clean up the code based on Clippy [#8257](https://github.com/apache/datafusion/pull/8257) (Weijun-H) +- Update arrow 49.0.0 and object_store 0.8.0 [#8029](https://github.com/apache/datafusion/pull/8029) (tustvold) +- feat: impl the basic `string_agg` function [#8148](https://github.com/apache/datafusion/pull/8148) (haohuaijin) +- Minor: Make schema of grouping set columns nullable [#8248](https://github.com/apache/datafusion/pull/8248) (markusa380) +- feat: support simplifying BinaryExpr with arbitrary guarantees in GuaranteeRewriter [#8256](https://github.com/apache/datafusion/pull/8256) (wjones127) +- Making stream joins extensible: A new Trait implementation for SHJ [#8234](https://github.com/apache/datafusion/pull/8234) (metesynnada) +- Don't Canonicalize Filesystem Paths in ListingTableUrl / support new external tables for files that do not (yet) exist [#8014](https://github.com/apache/datafusion/pull/8014) (tustvold) +- Minor: Add sql level test for inserting into non-existent directory [#8278](https://github.com/apache/datafusion/pull/8278) (alamb) +- Replace `array_has/array_has_all/array_has_any` macro to remove duplicate code [#8263](https://github.com/apache/datafusion/pull/8263) (Veeupup) +- Fix bug in field level metadata matching code [#8286](https://github.com/apache/datafusion/pull/8286) (alamb) +- Refactor Interval Arithmetic Updates [#8276](https://github.com/apache/datafusion/pull/8276) (berkaysynnada) +- [MINOR]: Remove unecessary orderings from the final plan [#8289](https://github.com/apache/datafusion/pull/8289) (mustafasrepo) +- consistent logical & physical `NTILE` return types [#8270](https://github.com/apache/datafusion/pull/8270) (korowa) +- make `array_union`/`array_except`/`array_intersect` handle empty/null arrays rightly [#8269](https://github.com/apache/datafusion/pull/8269) (Veeupup) +- improve file path validation when reading parquet [#8267](https://github.com/apache/datafusion/pull/8267) (Weijun-H) +- [Benchmarks] Make `partitions` default to number of cores instead of 2 [#8292](https://github.com/apache/datafusion/pull/8292) (andygrove) +- Update prost-build requirement from =0.12.2 to =0.12.3 [#8298](https://github.com/apache/datafusion/pull/8298) (dependabot[bot]) +- Fix Display for List [#8261](https://github.com/apache/datafusion/pull/8261) (jayzhan211) +- feat: support customizing column default values for inserting [#8283](https://github.com/apache/datafusion/pull/8283) (jonahgao) +- support `LargeList` for `arrow_cast`, support `ScalarValue::LargeList` [#8290](https://github.com/apache/datafusion/pull/8290) (Weijun-H) +- Minor: remove useless clone based on Clippy [#8300](https://github.com/apache/datafusion/pull/8300) (Weijun-H) +- Calculate ordering equivalence for expressions (rather than just columns) [#8281](https://github.com/apache/datafusion/pull/8281) (mustafasrepo) +- Fix sqllogictests link in contributor-guide/index.md [#8314](https://github.com/apache/datafusion/pull/8314) (qrilka) +- Refactor: Unify `Expr::ScalarFunction` and `Expr::ScalarUDF`, introduce unresolved functions by name [#8258](https://github.com/apache/datafusion/pull/8258) (2010YOUY01) +- Support no distinct aggregate sum/min/max in `single_distinct_to_group_by` rule [#8266](https://github.com/apache/datafusion/pull/8266) (haohuaijin) +- feat:implement sql style 'substr_index' string function [#8272](https://github.com/apache/datafusion/pull/8272) (Syleechan) +- Fixing issues with for timestamp literals [#8193](https://github.com/apache/datafusion/pull/8193) (comphead) +- Projection Pushdown over StreamingTableExec [#8299](https://github.com/apache/datafusion/pull/8299) (berkaysynnada) +- minor: fix documentation [#8323](https://github.com/apache/datafusion/pull/8323) (comphead) +- fix: wrong result of range function [#8313](https://github.com/apache/datafusion/pull/8313) (smallzhongfeng) +- Minor: rename parquet.rs to parquet/mod.rs [#8301](https://github.com/apache/datafusion/pull/8301) (alamb) +- refactor: output ordering [#8304](https://github.com/apache/datafusion/pull/8304) (QuenKar) +- Update substrait requirement from 0.19.0 to 0.20.0 [#8339](https://github.com/apache/datafusion/pull/8339) (dependabot[bot]) +- Port tests in `aggregates.rs` to sqllogictest [#8316](https://github.com/apache/datafusion/pull/8316) (edmondop) +- Library Guide: Add Using the DataFrame API [#8319](https://github.com/apache/datafusion/pull/8319) (Veeupup) +- Port tests in limit.rs to sqllogictest [#8315](https://github.com/apache/datafusion/pull/8315) (zhangxffff) +- move array function unit_tests to sqllogictest [#8332](https://github.com/apache/datafusion/pull/8332) (Veeupup) +- NTH_VALUE reverse support [#8327](https://github.com/apache/datafusion/pull/8327) (mustafasrepo) +- Optimize Projections during Logical Plan [#8340](https://github.com/apache/datafusion/pull/8340) (mustafasrepo) +- [MINOR]: Move merge projections tests to under optimize projections [#8352](https://github.com/apache/datafusion/pull/8352) (mustafasrepo) +- Add `quote` and `escape` attributes to create csv external table [#8351](https://github.com/apache/datafusion/pull/8351) (Asura7969) +- Minor: Add DataFrame test [#8341](https://github.com/apache/datafusion/pull/8341) (alamb) +- Minor: clean up the code based on Clippy [#8359](https://github.com/apache/datafusion/pull/8359) (Weijun-H) +- Minor: Make it easier to work with Expr::ScalarFunction [#8350](https://github.com/apache/datafusion/pull/8350) (alamb) +- Minor: Move some datafusion-optimizer::utils down to datafusion-expr::utils [#8354](https://github.com/apache/datafusion/pull/8354) (Jesse-Bakker) +- Minor: Make `BuiltInScalarFunction::alias` a method [#8349](https://github.com/apache/datafusion/pull/8349) (alamb) +- Extract parquet statistics to its own module, add tests [#8294](https://github.com/apache/datafusion/pull/8294) (alamb) +- feat:implement sql style 'find_in_set' string function [#8328](https://github.com/apache/datafusion/pull/8328) (Syleechan) +- Support LargeUtf8 to Temporal Coercion [#8357](https://github.com/apache/datafusion/pull/8357) (jayzhan211) +- Refactor aggregate function handling [#8358](https://github.com/apache/datafusion/pull/8358) (Weijun-H) +- Implement Aliases for ScalarUDF [#8360](https://github.com/apache/datafusion/pull/8360) (Veeupup) +- Minor: Remove unnecessary name field in `ScalarFunctionDefintion` [#8365](https://github.com/apache/datafusion/pull/8365) (alamb) +- feat: support `LargeList` in `array_empty` [#8321](https://github.com/apache/datafusion/pull/8321) (Weijun-H) +- Double type argument for to_timestamp function [#8159](https://github.com/apache/datafusion/pull/8159) (spaydar) +- Support User Defined Table Function [#8306](https://github.com/apache/datafusion/pull/8306) (Veeupup) +- Document timestamp input limits [#8369](https://github.com/apache/datafusion/pull/8369) (comphead) +- fix: make `ntile` work in some corner cases [#8371](https://github.com/apache/datafusion/pull/8371) (haohuaijin) +- Minor: Refactor array_union function to use a generic union_arrays function [#8381](https://github.com/apache/datafusion/pull/8381) (Weijun-H) +- Minor: Refactor function argument handling in `ScalarFunctionDefinition` [#8387](https://github.com/apache/datafusion/pull/8387) (Weijun-H) +- Materialize dictionaries in group keys [#8291](https://github.com/apache/datafusion/pull/8291) (qrilka) +- Rewrite `array_ndims` to fix List(Null) handling [#8320](https://github.com/apache/datafusion/pull/8320) (jayzhan211) +- Docs: Improve the documentation on `ScalarValue` [#8378](https://github.com/apache/datafusion/pull/8378) (alamb) +- Avoid concat for `array_replace` [#8337](https://github.com/apache/datafusion/pull/8337) (jayzhan211) +- add a summary table to benchmark compare output [#8399](https://github.com/apache/datafusion/pull/8399) (razeghi71) +- Refactors on TreeNode Implementations [#8395](https://github.com/apache/datafusion/pull/8395) (berkaysynnada) +- feat: support `LargeList` in `make_array` and `array_length` [#8121](https://github.com/apache/datafusion/pull/8121) (Weijun-H) +- remove `unalias` TableScan filters when create Physical Filter [#8404](https://github.com/apache/datafusion/pull/8404) (jackwener) +- Update custom-table-providers.md [#8409](https://github.com/apache/datafusion/pull/8409) (nickpoorman) +- fix transforming `LogicalPlan::Explain` use `TreeNode::transform` fails [#8400](https://github.com/apache/datafusion/pull/8400) (haohuaijin) +- Docs: Fix `array_except` documentation example error [#8407](https://github.com/apache/datafusion/pull/8407) (Asura7969) +- Support named query parameters [#8384](https://github.com/apache/datafusion/pull/8384) (Asura7969) +- Minor: Add installation link to README.md [#8389](https://github.com/apache/datafusion/pull/8389) (Weijun-H) +- Update code comment for the cases of regularized RANGE frame and add tests for ORDER BY cases with RANGE frame [#8410](https://github.com/apache/datafusion/pull/8410) (viirya) +- Minor: Add example with parameters to LogicalPlan [#8418](https://github.com/apache/datafusion/pull/8418) (alamb) +- Minor: Improve `PruningPredicate` documentation [#8394](https://github.com/apache/datafusion/pull/8394) (alamb) +- feat: ScalarValue from String [#8411](https://github.com/apache/datafusion/pull/8411) (QuenKar) +- Bump actions/labeler from 4.3.0 to 5.0.0 [#8422](https://github.com/apache/datafusion/pull/8422) (dependabot[bot]) +- Update sqlparser requirement from 0.39.0 to 0.40.0 [#8338](https://github.com/apache/datafusion/pull/8338) (dependabot[bot]) +- feat: support `LargeList` for `array_has`, `array_has_all` and `array_has_any` [#8322](https://github.com/apache/datafusion/pull/8322) (Weijun-H) +- Union `schema` can't be a subset of the child schema [#8408](https://github.com/apache/datafusion/pull/8408) (jackwener) +- Move `PartitionSearchMode` into datafusion_physical_plan, rename to `InputOrderMode` [#8364](https://github.com/apache/datafusion/pull/8364) (alamb) +- Make filter selectivity for statistics configurable [#8243](https://github.com/apache/datafusion/pull/8243) (edmondop) +- fix: Changed labeler.yml to latest format [#8431](https://github.com/apache/datafusion/pull/8431) (viirya) +- Minor: Use `ScalarValue::from` impl for strings [#8429](https://github.com/apache/datafusion/pull/8429) (alamb) +- Support crossjoin in substrait. [#8427](https://github.com/apache/datafusion/pull/8427) (my-vegetable-has-exploded) +- Fix ambiguous reference when aliasing in combination with `ORDER BY` [#8425](https://github.com/apache/datafusion/pull/8425) (Asura7969) +- Minor: convert marcro `list-slice` and `slice` to function [#8424](https://github.com/apache/datafusion/pull/8424) (Weijun-H) +- Remove macro in iter_to_array for List [#8414](https://github.com/apache/datafusion/pull/8414) (jayzhan211) +- fix: Literal in `ORDER BY` window definition should not be an ordinal referring to relation column [#8419](https://github.com/apache/datafusion/pull/8419) (viirya) +- feat: customize column default values for external tables [#8415](https://github.com/apache/datafusion/pull/8415) (jonahgao) +- feat: Support `array_sort`(`list_sort`) [#8279](https://github.com/apache/datafusion/pull/8279) (Asura7969) +- Bugfix: Remove df-cli specific SQL statment options before executing with DataFusion [#8426](https://github.com/apache/datafusion/pull/8426) (devinjdangelo) +- Detect when filters on unique constraints make subqueries scalar [#8312](https://github.com/apache/datafusion/pull/8312) (Jesse-Bakker) +- Add alias check to optimize projections merge [#8438](https://github.com/apache/datafusion/pull/8438) (mustafasrepo) +- Fix PartialOrd for ScalarValue::List/FixSizeList/LargeList [#8253](https://github.com/apache/datafusion/pull/8253) (jayzhan211) +- Support parquet_metadata for datafusion-cli [#8413](https://github.com/apache/datafusion/pull/8413) (Veeupup) +- Fix bug in optimizing a nested count [#8459](https://github.com/apache/datafusion/pull/8459) (Dandandan) +- Bump actions/setup-python from 4 to 5 [#8449](https://github.com/apache/datafusion/pull/8449) (dependabot[bot]) +- fix: ORDER BY window definition should work on null literal [#8444](https://github.com/apache/datafusion/pull/8444) (viirya) +- flx clippy warnings [#8455](https://github.com/apache/datafusion/pull/8455) (waynexia) +- fix: RANGE frame for corner cases with empty ORDER BY clause should be treated as constant sort [#8445](https://github.com/apache/datafusion/pull/8445) (viirya) +- Preserve `dict_id` on `Field` during serde roundtrip [#8457](https://github.com/apache/datafusion/pull/8457) (avantgardnerio) +- feat: support `InterleaveExecNode` in the proto [#8460](https://github.com/apache/datafusion/pull/8460) (liukun4515) +- [BUG FIX]: Proper Empty Batch handling in window execution [#8466](https://github.com/apache/datafusion/pull/8466) (mustafasrepo) +- Minor: update `cast` [#8458](https://github.com/apache/datafusion/pull/8458) (Weijun-H) +- fix: don't unifies projection if expr is non-trival [#8454](https://github.com/apache/datafusion/pull/8454) (haohuaijin) +- Minor: Add new bloom filter predicate tests [#8433](https://github.com/apache/datafusion/pull/8433) (alamb) +- Add PRIMARY KEY Aggregate support to dataframe API [#8356](https://github.com/apache/datafusion/pull/8356) (mustafasrepo) +- Minor: refactor `data_trunc` to reduce duplicated code [#8430](https://github.com/apache/datafusion/pull/8430) (Weijun-H) +- Support array_distinct function. [#8268](https://github.com/apache/datafusion/pull/8268) (my-vegetable-has-exploded) +- Add primary key support to stream table [#8467](https://github.com/apache/datafusion/pull/8467) (mustafasrepo) +- Add `evaluate_demo` and `range_analysis_demo` to Expr examples [#8377](https://github.com/apache/datafusion/pull/8377) (alamb) +- Minor: fix function name typo [#8473](https://github.com/apache/datafusion/pull/8473) (Weijun-H) +- Minor: Fix comment typo in table.rs: s/indentical/identical/ [#8469](https://github.com/apache/datafusion/pull/8469) (KeunwooLee-at) +- Remove `define_array_slice` and reuse `array_slice` for `array_pop_front/back` [#8401](https://github.com/apache/datafusion/pull/8401) (jayzhan211) +- Minor: refactor `trim` to clean up duplicated code [#8434](https://github.com/apache/datafusion/pull/8434) (Weijun-H) +- Split `EmptyExec` into `PlaceholderRowExec` [#8446](https://github.com/apache/datafusion/pull/8446) (razeghi71) +- Enable non-uniform field type for structs created in DataFusion [#8463](https://github.com/apache/datafusion/pull/8463) (dlovell) +- Minor: Add multi ordering test for array agg order [#8439](https://github.com/apache/datafusion/pull/8439) (jayzhan211) +- Sort filenames when reading parquet to ensure consistent schema [#6629](https://github.com/apache/datafusion/pull/6629) (thomas-k-cameron) +- Minor: Improve comments in EnforceDistribution tests [#8474](https://github.com/apache/datafusion/pull/8474) (alamb) +- fix: support uppercase when parsing `Interval` [#8478](https://github.com/apache/datafusion/pull/8478) (QuenKar) +- Better Equivalence (ordering and exact equivalence) Propagation through ProjectionExec [#8484](https://github.com/apache/datafusion/pull/8484) (mustafasrepo) +- Add `today` alias for `current_date` [#8423](https://github.com/apache/datafusion/pull/8423) (smallzhongfeng) +- Minor: remove useless clone in `array_expression` [#8495](https://github.com/apache/datafusion/pull/8495) (Weijun-H) +- fix: incorrect set preserve_partitioning in SortExec [#8485](https://github.com/apache/datafusion/pull/8485) (haohuaijin) +- Explicitly mark parquet for tests in datafusion-common [#8497](https://github.com/apache/datafusion/pull/8497) (Dennis40816) +- Minor/Doc: Clarify DataFrame::write_table Documentation [#8519](https://github.com/apache/datafusion/pull/8519) (devinjdangelo) +- fix: Pull stats in `IdentVisitor`/`GraphvizVisitor` only when requested [#8514](https://github.com/apache/datafusion/pull/8514) (vrongmeal) +- Change display of RepartitionExec from SortPreservingRepartitionExec to RepartitionExec preserve_order=true [#8521](https://github.com/apache/datafusion/pull/8521) (JacobOgle) +- Fix `DataFrame::cache` errors with `Plan("Mismatch between schema and batches")` [#8510](https://github.com/apache/datafusion/pull/8510) (Asura7969) +- Minor: update pbjson_dependency [#8470](https://github.com/apache/datafusion/pull/8470) (alamb) +- Minor: Update prost-derive dependency [#8471](https://github.com/apache/datafusion/pull/8471) (alamb) +- Minor/Doc: Add DataFrame::write_table to DataFrame user guide [#8527](https://github.com/apache/datafusion/pull/8527) (devinjdangelo) +- Minor: Add repartition_file.slt end to end test for repartitioning files, and supporting tweaks [#8505](https://github.com/apache/datafusion/pull/8505) (alamb) +- Prepare version 34.0.0 [#8508](https://github.com/apache/datafusion/pull/8508) (andygrove) +- refactor: use ExprBuilder to consume substrait expr and use macro to generate error [#8515](https://github.com/apache/datafusion/pull/8515) (waynexia) +- [MINOR]: Make some slt tests deterministic [#8525](https://github.com/apache/datafusion/pull/8525) (mustafasrepo) +- fix: volatile expressions should not be target of common subexpt elimination [#8520](https://github.com/apache/datafusion/pull/8520) (viirya) +- Minor: Add LakeSoul to the list of Known Users [#8536](https://github.com/apache/datafusion/pull/8536) (xuchen-plus) +- Fix regression with Incorrect results when reading parquet files with different schemas and statistics [#8533](https://github.com/apache/datafusion/pull/8533) (alamb) +- feat: improve string statistics display in datafusion-cli `parquet_metadata` function [#8535](https://github.com/apache/datafusion/pull/8535) (asimsedhain) +- Defer file creation to write [#8539](https://github.com/apache/datafusion/pull/8539) (tustvold) +- Minor: Improve error handling in sqllogictest runner [#8544](https://github.com/apache/datafusion/pull/8544) (alamb) diff --git a/dev/changelog/35.0.0.md b/dev/changelog/35.0.0.md index b48b2b7aaa12..1b8fa38ca3d0 100644 --- a/dev/changelog/35.0.0.md +++ b/dev/changelog/35.0.0.md @@ -17,279 +17,279 @@ under the License. --> -## [35.0.0](https://github.com/apache/arrow-datafusion/tree/35.0.0) (2024-01-20) +## [35.0.0](https://github.com/apache/datafusion/tree/35.0.0) (2024-01-20) -[Full Changelog](https://github.com/apache/arrow-datafusion/compare/34.0.0...35.0.0) +[Full Changelog](https://github.com/apache/datafusion/compare/34.0.0...35.0.0) **Breaking changes:** -- Minor: make SubqueryAlias::try_new take Arc [#8542](https://github.com/apache/arrow-datafusion/pull/8542) (sadboy) -- Remove ListingTable and FileScanConfig Unbounded (#8540) [#8573](https://github.com/apache/arrow-datafusion/pull/8573) (tustvold) -- Rename `ParamValues::{LIST -> List,MAP -> Map}` [#8611](https://github.com/apache/arrow-datafusion/pull/8611) (kawadakk) -- Rename `expr::window_function::WindowFunction` to `WindowFunctionDefinition`, make structure consistent with ScalarFunction [#8382](https://github.com/apache/arrow-datafusion/pull/8382) (edmondop) -- Implement `ScalarUDF` in terms of `ScalarUDFImpl` trait [#8713](https://github.com/apache/arrow-datafusion/pull/8713) (alamb) -- Change `ScalarValue::{List, LargeList, FixedSizedList}` to take specific types rather than `ArrayRef` [#8562](https://github.com/apache/arrow-datafusion/pull/8562) (rspears74) -- Remove unused array_expression.rs and `SUPPORTED_ARRAY_TYPES` [#8807](https://github.com/apache/arrow-datafusion/pull/8807) (alamb) -- Simplify physical expression creation API (not require schema) [#8823](https://github.com/apache/arrow-datafusion/pull/8823) (comphead) -- Determine causal window frames to produce early results. [#8842](https://github.com/apache/arrow-datafusion/pull/8842) (mustafasrepo) +- Minor: make SubqueryAlias::try_new take Arc [#8542](https://github.com/apache/datafusion/pull/8542) (sadboy) +- Remove ListingTable and FileScanConfig Unbounded (#8540) [#8573](https://github.com/apache/datafusion/pull/8573) (tustvold) +- Rename `ParamValues::{LIST -> List,MAP -> Map}` [#8611](https://github.com/apache/datafusion/pull/8611) (kawadakk) +- Rename `expr::window_function::WindowFunction` to `WindowFunctionDefinition`, make structure consistent with ScalarFunction [#8382](https://github.com/apache/datafusion/pull/8382) (edmondop) +- Implement `ScalarUDF` in terms of `ScalarUDFImpl` trait [#8713](https://github.com/apache/datafusion/pull/8713) (alamb) +- Change `ScalarValue::{List, LargeList, FixedSizedList}` to take specific types rather than `ArrayRef` [#8562](https://github.com/apache/datafusion/pull/8562) (rspears74) +- Remove unused array_expression.rs and `SUPPORTED_ARRAY_TYPES` [#8807](https://github.com/apache/datafusion/pull/8807) (alamb) +- Simplify physical expression creation API (not require schema) [#8823](https://github.com/apache/datafusion/pull/8823) (comphead) +- Determine causal window frames to produce early results. [#8842](https://github.com/apache/datafusion/pull/8842) (mustafasrepo) **Implemented enhancements:** -- feat: implement Unary Expr in substrait [#8534](https://github.com/apache/arrow-datafusion/pull/8534) (waynexia) -- feat: implement Repartition plan in substrait [#8526](https://github.com/apache/arrow-datafusion/pull/8526) (waynexia) -- feat: support largelist in array_slice [#8561](https://github.com/apache/arrow-datafusion/pull/8561) (Weijun-H) -- feat: support `LargeList` in `array_positions` [#8571](https://github.com/apache/arrow-datafusion/pull/8571) (Weijun-H) -- feat: support `LargeList` in `array_element` [#8570](https://github.com/apache/arrow-datafusion/pull/8570) (Weijun-H) -- feat: support `LargeList` in `array_dims` [#8592](https://github.com/apache/arrow-datafusion/pull/8592) (Weijun-H) -- feat: support `LargeList` in `array_remove` [#8595](https://github.com/apache/arrow-datafusion/pull/8595) (Weijun-H) -- feat: support inlist in LiteralGurantee for pruning [#8654](https://github.com/apache/arrow-datafusion/pull/8654) (my-vegetable-has-exploded) -- feat: support 'LargeList' in `array_pop_front` and `array_pop_back` [#8569](https://github.com/apache/arrow-datafusion/pull/8569) (Weijun-H) -- feat: support `LargeList` in `array_position` [#8714](https://github.com/apache/arrow-datafusion/pull/8714) (Weijun-H) -- feat: support `LargeList` in `array_ndims` [#8716](https://github.com/apache/arrow-datafusion/pull/8716) (Weijun-H) -- feat: remove filters with null constants [#8700](https://github.com/apache/arrow-datafusion/pull/8700) (asimsedhain) -- feat: support LargeList in array_repeat [#8725](https://github.com/apache/arrow-datafusion/pull/8725) (Weijun-H) -- feat: native types in `DistinctCountAccumulator` for primitive types [#8721](https://github.com/apache/arrow-datafusion/pull/8721) (korowa) -- feat: support `LargeList` in `cardinality` [#8726](https://github.com/apache/arrow-datafusion/pull/8726) (Weijun-H) -- feat: support `largelist` in `array_to_string` [#8729](https://github.com/apache/arrow-datafusion/pull/8729) (Weijun-H) -- feat: Add bloom filter metric to ParquetExec [#8772](https://github.com/apache/arrow-datafusion/pull/8772) (my-vegetable-has-exploded) -- feat: support `array_resize` [#8744](https://github.com/apache/arrow-datafusion/pull/8744) (Weijun-H) -- feat: add more components to the wasm-pack compatible list [#8843](https://github.com/apache/arrow-datafusion/pull/8843) (waynexia) +- feat: implement Unary Expr in substrait [#8534](https://github.com/apache/datafusion/pull/8534) (waynexia) +- feat: implement Repartition plan in substrait [#8526](https://github.com/apache/datafusion/pull/8526) (waynexia) +- feat: support largelist in array_slice [#8561](https://github.com/apache/datafusion/pull/8561) (Weijun-H) +- feat: support `LargeList` in `array_positions` [#8571](https://github.com/apache/datafusion/pull/8571) (Weijun-H) +- feat: support `LargeList` in `array_element` [#8570](https://github.com/apache/datafusion/pull/8570) (Weijun-H) +- feat: support `LargeList` in `array_dims` [#8592](https://github.com/apache/datafusion/pull/8592) (Weijun-H) +- feat: support `LargeList` in `array_remove` [#8595](https://github.com/apache/datafusion/pull/8595) (Weijun-H) +- feat: support inlist in LiteralGurantee for pruning [#8654](https://github.com/apache/datafusion/pull/8654) (my-vegetable-has-exploded) +- feat: support 'LargeList' in `array_pop_front` and `array_pop_back` [#8569](https://github.com/apache/datafusion/pull/8569) (Weijun-H) +- feat: support `LargeList` in `array_position` [#8714](https://github.com/apache/datafusion/pull/8714) (Weijun-H) +- feat: support `LargeList` in `array_ndims` [#8716](https://github.com/apache/datafusion/pull/8716) (Weijun-H) +- feat: remove filters with null constants [#8700](https://github.com/apache/datafusion/pull/8700) (asimsedhain) +- feat: support LargeList in array_repeat [#8725](https://github.com/apache/datafusion/pull/8725) (Weijun-H) +- feat: native types in `DistinctCountAccumulator` for primitive types [#8721](https://github.com/apache/datafusion/pull/8721) (korowa) +- feat: support `LargeList` in `cardinality` [#8726](https://github.com/apache/datafusion/pull/8726) (Weijun-H) +- feat: support `largelist` in `array_to_string` [#8729](https://github.com/apache/datafusion/pull/8729) (Weijun-H) +- feat: Add bloom filter metric to ParquetExec [#8772](https://github.com/apache/datafusion/pull/8772) (my-vegetable-has-exploded) +- feat: support `array_resize` [#8744](https://github.com/apache/datafusion/pull/8744) (Weijun-H) +- feat: add more components to the wasm-pack compatible list [#8843](https://github.com/apache/datafusion/pull/8843) (waynexia) **Fixed bugs:** -- fix: make sure CASE WHEN pick first true branch when WHEN clause is true [#8477](https://github.com/apache/arrow-datafusion/pull/8477) (haohuaijin) -- fix: `Antarctica/Vostok` tz offset changed in chrono-tz 0.8.5 [#8677](https://github.com/apache/arrow-datafusion/pull/8677) (korowa) -- fix: struct field don't push down to TableScan [#8774](https://github.com/apache/arrow-datafusion/pull/8774) (haohuaijin) -- fix: failed to create ValuesExec with non-nullable schema [#8776](https://github.com/apache/arrow-datafusion/pull/8776) (jonahgao) -- fix: fix markdown table in docs [#8812](https://github.com/apache/arrow-datafusion/pull/8812) (tshauck) -- fix: don't extract common sub expr in `CASE WHEN` clause [#8833](https://github.com/apache/arrow-datafusion/pull/8833) (haohuaijin) +- fix: make sure CASE WHEN pick first true branch when WHEN clause is true [#8477](https://github.com/apache/datafusion/pull/8477) (haohuaijin) +- fix: `Antarctica/Vostok` tz offset changed in chrono-tz 0.8.5 [#8677](https://github.com/apache/datafusion/pull/8677) (korowa) +- fix: struct field don't push down to TableScan [#8774](https://github.com/apache/datafusion/pull/8774) (haohuaijin) +- fix: failed to create ValuesExec with non-nullable schema [#8776](https://github.com/apache/datafusion/pull/8776) (jonahgao) +- fix: fix markdown table in docs [#8812](https://github.com/apache/datafusion/pull/8812) (tshauck) +- fix: don't extract common sub expr in `CASE WHEN` clause [#8833](https://github.com/apache/datafusion/pull/8833) (haohuaijin) **Documentation updates:** -- docs: update udf docs for udtf [#8546](https://github.com/apache/arrow-datafusion/pull/8546) (tshauck) -- Doc: Clarify When Limit is Pushed Down to TableProvider::Scan [#8686](https://github.com/apache/arrow-datafusion/pull/8686) (devinjdangelo) -- Minor: Improve `PruningPredicate` docstrings [#8748](https://github.com/apache/arrow-datafusion/pull/8748) (alamb) -- Minor: Add documentation about stream cancellation [#8747](https://github.com/apache/arrow-datafusion/pull/8747) (alamb) -- docs: add sudo for install commands [#8804](https://github.com/apache/arrow-datafusion/pull/8804) (caicancai) -- docs: document SessionConfig [#8771](https://github.com/apache/arrow-datafusion/pull/8771) (wjones127) -- Upgrade to object_store `0.9.0` and arrow `50.0.0` [#8758](https://github.com/apache/arrow-datafusion/pull/8758) (tustvold) -- docs: fix wrong pushdown name & a typo [#8875](https://github.com/apache/arrow-datafusion/pull/8875) (SteveLauC) -- docs: Update contributor guide with installation instructions [#8876](https://github.com/apache/arrow-datafusion/pull/8876) (caicancai) -- docs: fix wrong name in sub-crates' README [#8889](https://github.com/apache/arrow-datafusion/pull/8889) (SteveLauC) -- docs: add an example for RecordBatchReceiverStreamBuilder [#8888](https://github.com/apache/arrow-datafusion/pull/8888) (SteveLauC) +- docs: update udf docs for udtf [#8546](https://github.com/apache/datafusion/pull/8546) (tshauck) +- Doc: Clarify When Limit is Pushed Down to TableProvider::Scan [#8686](https://github.com/apache/datafusion/pull/8686) (devinjdangelo) +- Minor: Improve `PruningPredicate` docstrings [#8748](https://github.com/apache/datafusion/pull/8748) (alamb) +- Minor: Add documentation about stream cancellation [#8747](https://github.com/apache/datafusion/pull/8747) (alamb) +- docs: add sudo for install commands [#8804](https://github.com/apache/datafusion/pull/8804) (caicancai) +- docs: document SessionConfig [#8771](https://github.com/apache/datafusion/pull/8771) (wjones127) +- Upgrade to object_store `0.9.0` and arrow `50.0.0` [#8758](https://github.com/apache/datafusion/pull/8758) (tustvold) +- docs: fix wrong pushdown name & a typo [#8875](https://github.com/apache/datafusion/pull/8875) (SteveLauC) +- docs: Update contributor guide with installation instructions [#8876](https://github.com/apache/datafusion/pull/8876) (caicancai) +- docs: fix wrong name in sub-crates' README [#8889](https://github.com/apache/datafusion/pull/8889) (SteveLauC) +- docs: add an example for RecordBatchReceiverStreamBuilder [#8888](https://github.com/apache/datafusion/pull/8888) (SteveLauC) **Merged pull requests:** -- Remove order_bys from AggregateExec state [#8537](https://github.com/apache/arrow-datafusion/pull/8537) (mustafasrepo) -- Fix count(null) and count(distinct null) [#8511](https://github.com/apache/arrow-datafusion/pull/8511) (joroKr21) -- Minor: reduce code duplication in `date_bin_impl` [#8528](https://github.com/apache/arrow-datafusion/pull/8528) (Weijun-H) -- Add metrics for UnnestExec [#8482](https://github.com/apache/arrow-datafusion/pull/8482) (simonvandel) -- Prepare 34.0.0-rc3 [#8549](https://github.com/apache/arrow-datafusion/pull/8549) (andygrove) -- fix: make sure CASE WHEN pick first true branch when WHEN clause is true [#8477](https://github.com/apache/arrow-datafusion/pull/8477) (haohuaijin) -- Minor: make SubqueryAlias::try_new take Arc [#8542](https://github.com/apache/arrow-datafusion/pull/8542) (sadboy) -- Fallback on null empty value in ExprBoundaries::try_from_column [#8501](https://github.com/apache/arrow-datafusion/pull/8501) (razeghi71) -- Add test for DataFrame::write_table [#8531](https://github.com/apache/arrow-datafusion/pull/8531) (devinjdangelo) -- [MINOR]: Generate empty column at placeholder exec [#8553](https://github.com/apache/arrow-datafusion/pull/8553) (mustafasrepo) -- Minor: Remove now dead `SUPPORTED_STRUCT_TYPES` [#8480](https://github.com/apache/arrow-datafusion/pull/8480) (alamb) -- [MINOR]: Add getter methods to first and last value [#8555](https://github.com/apache/arrow-datafusion/pull/8555) (mustafasrepo) -- [MINOR]: Some code changes and a new empty batch guard for SHJ [#8557](https://github.com/apache/arrow-datafusion/pull/8557) (metesynnada) -- docs: update udf docs for udtf [#8546](https://github.com/apache/arrow-datafusion/pull/8546) (tshauck) -- feat: implement Unary Expr in substrait [#8534](https://github.com/apache/arrow-datafusion/pull/8534) (waynexia) -- Fix `compute_record_batch_statistics` wrong with `projection` [#8489](https://github.com/apache/arrow-datafusion/pull/8489) (Asura7969) -- Minor: Cleanup warning in scalar.rs test [#8563](https://github.com/apache/arrow-datafusion/pull/8563) (jayzhan211) -- Minor: move some invariants out of the loop [#8564](https://github.com/apache/arrow-datafusion/pull/8564) (haohuaijin) -- feat: implement Repartition plan in substrait [#8526](https://github.com/apache/arrow-datafusion/pull/8526) (waynexia) -- Fix sort order aware file group parallelization [#8517](https://github.com/apache/arrow-datafusion/pull/8517) (alamb) -- feat: support largelist in array_slice [#8561](https://github.com/apache/arrow-datafusion/pull/8561) (Weijun-H) -- minor: fix to support scalars [#8559](https://github.com/apache/arrow-datafusion/pull/8559) (comphead) -- refactor: `HashJoinStream` state machine [#8538](https://github.com/apache/arrow-datafusion/pull/8538) (korowa) -- Remove ListingTable and FileScanConfig Unbounded (#8540) [#8573](https://github.com/apache/arrow-datafusion/pull/8573) (tustvold) -- Update substrait requirement from 0.20.0 to 0.21.0 [#8574](https://github.com/apache/arrow-datafusion/pull/8574) (dependabot[bot]) -- [minor]: Fix rank calculation bug when empty order by is seen [#8567](https://github.com/apache/arrow-datafusion/pull/8567) (mustafasrepo) -- Add `LiteralGuarantee` on columns to extract conditions required for `PhysicalExpr` expressions to evaluate to true [#8437](https://github.com/apache/arrow-datafusion/pull/8437) (alamb) -- [MINOR]: Parametrize sort-preservation tests to exercise all situations (unbounded/bounded sources and flag behavior) [#8575](https://github.com/apache/arrow-datafusion/pull/8575) (mustafasrepo) -- Minor: Add some comments to scalar_udf example [#8576](https://github.com/apache/arrow-datafusion/pull/8576) (alamb) -- Move Coercion for MakeArray to `coerce_arguments_for_signature` and introduce another one for ArrayAppend [#8317](https://github.com/apache/arrow-datafusion/pull/8317) (jayzhan211) -- feat: support `LargeList` in `array_positions` [#8571](https://github.com/apache/arrow-datafusion/pull/8571) (Weijun-H) -- feat: support `LargeList` in `array_element` [#8570](https://github.com/apache/arrow-datafusion/pull/8570) (Weijun-H) -- Increase test coverage for unbounded and bounded cases [#8581](https://github.com/apache/arrow-datafusion/pull/8581) (mustafasrepo) -- Port tests in `parquet.rs` to sqllogictest [#8560](https://github.com/apache/arrow-datafusion/pull/8560) (hiltontj) -- Minor: avoid a copy in Expr::unalias [#8588](https://github.com/apache/arrow-datafusion/pull/8588) (alamb) -- Minor: support complex expr as the arg in the ApproxPercentileCont function [#8580](https://github.com/apache/arrow-datafusion/pull/8580) (liukun4515) -- Bugfix: Add functional dependency check and aggregate try_new schema [#8584](https://github.com/apache/arrow-datafusion/pull/8584) (mustafasrepo) -- Remove GroupByOrderMode [#8593](https://github.com/apache/arrow-datafusion/pull/8593) (ozankabak) -- Minor: replace` not-impl-err` in `array_expression` [#8589](https://github.com/apache/arrow-datafusion/pull/8589) (Weijun-H) -- Substrait insubquery [#8363](https://github.com/apache/arrow-datafusion/pull/8363) (tgujar) -- Minor: port last test from parquet.rs [#8587](https://github.com/apache/arrow-datafusion/pull/8587) (alamb) -- Minor: consolidate map sqllogictest tests [#8550](https://github.com/apache/arrow-datafusion/pull/8550) (alamb) -- feat: support `LargeList` in `array_dims` [#8592](https://github.com/apache/arrow-datafusion/pull/8592) (Weijun-H) -- Fix regression in regenerating protobuf source [#8603](https://github.com/apache/arrow-datafusion/pull/8603) (andygrove) -- Remove unbounded_input from FileSinkOptions [#8605](https://github.com/apache/arrow-datafusion/pull/8605) (devinjdangelo) -- Add `arrow_err!` macros, optional backtrace to ArrowError [#8586](https://github.com/apache/arrow-datafusion/pull/8586) (comphead) -- Add examples of DataFrame::write\* methods without S3 dependency [#8606](https://github.com/apache/arrow-datafusion/pull/8606) (devinjdangelo) -- Implement logical plan serde for CopyTo [#8618](https://github.com/apache/arrow-datafusion/pull/8618) (andygrove) -- Fix InListExpr to return the correct number of rows [#8601](https://github.com/apache/arrow-datafusion/pull/8601) (alamb) -- Remove ListingTable single_file option [#8604](https://github.com/apache/arrow-datafusion/pull/8604) (devinjdangelo) -- feat: support `LargeList` in `array_remove` [#8595](https://github.com/apache/arrow-datafusion/pull/8595) (Weijun-H) -- Rename `ParamValues::{LIST -> List,MAP -> Map}` [#8611](https://github.com/apache/arrow-datafusion/pull/8611) (kawadakk) -- Support binary temporal coercion for Date64 and Timestamp types [#8616](https://github.com/apache/arrow-datafusion/pull/8616) (Asura7969) -- Add new configuration item `listing_table_ignore_subdirectory` [#8565](https://github.com/apache/arrow-datafusion/pull/8565) (Asura7969) -- Optimize the parameter types of `ParamValues`'s methods [#8613](https://github.com/apache/arrow-datafusion/pull/8613) (kawadakk) -- Do not panic on zero placeholders in `ParamValues::get_placeholders_with_values` [#8615](https://github.com/apache/arrow-datafusion/pull/8615) (kawadakk) -- Fix #8507: Non-null sub-field on nullable struct-field has wrong nullity [#8623](https://github.com/apache/arrow-datafusion/pull/8623) (marvinlanhenke) -- Implement `contained` API in PruningPredicate [#8440](https://github.com/apache/arrow-datafusion/pull/8440) (alamb) -- Add partial serde support for ParquetWriterOptions [#8627](https://github.com/apache/arrow-datafusion/pull/8627) (andygrove) -- Minor: add arguments length check in `array_expressions` [#8622](https://github.com/apache/arrow-datafusion/pull/8622) (Weijun-H) -- Minor: improve dataframe functional dependency tests [#8630](https://github.com/apache/arrow-datafusion/pull/8630) (alamb) -- Improve regexp_match performance by avoiding cloning Regex [#8631](https://github.com/apache/arrow-datafusion/pull/8631) (viirya) -- Minor: improve `listing_table_ignore_subdirectory` config documentation [#8634](https://github.com/apache/arrow-datafusion/pull/8634) (alamb) -- Support Writing Arrow files [#8608](https://github.com/apache/arrow-datafusion/pull/8608) (devinjdangelo) -- Filter pushdown into cross join [#8626](https://github.com/apache/arrow-datafusion/pull/8626) (mustafasrepo) -- [MINOR] Remove duplicate test utility and move one utility function for better organization [#8652](https://github.com/apache/arrow-datafusion/pull/8652) (metesynnada) -- [MINOR]: Add new test for filter pushdown into cross join [#8648](https://github.com/apache/arrow-datafusion/pull/8648) (mustafasrepo) -- Rewrite bloom filters to use contains API [#8442](https://github.com/apache/arrow-datafusion/pull/8442) (alamb) -- Split equivalence code into smaller modules. [#8649](https://github.com/apache/arrow-datafusion/pull/8649) (tushushu) -- Move parquet_schema.rs from sql to parquet tests [#8644](https://github.com/apache/arrow-datafusion/pull/8644) (alamb) -- Fix group by aliased expression in LogicalPLanBuilder::aggregate [#8629](https://github.com/apache/arrow-datafusion/pull/8629) (alamb) -- Refactor `array_union` and `array_intersect` functions to one general function [#8516](https://github.com/apache/arrow-datafusion/pull/8516) (Weijun-H) -- Minor: avoid extra clone in datafusion-proto::physical_plan [#8650](https://github.com/apache/arrow-datafusion/pull/8650) (ongchi) -- Minor: name some constant values in arrow writer, parquet writer [#8642](https://github.com/apache/arrow-datafusion/pull/8642) (alamb) -- TreeNode Refactor Part 2 [#8653](https://github.com/apache/arrow-datafusion/pull/8653) (berkaysynnada) -- feat: support inlist in LiteralGurantee for pruning [#8654](https://github.com/apache/arrow-datafusion/pull/8654) (my-vegetable-has-exploded) -- Streaming CLI support [#8651](https://github.com/apache/arrow-datafusion/pull/8651) (berkaysynnada) -- Add serde support for CSV FileTypeWriterOptions [#8641](https://github.com/apache/arrow-datafusion/pull/8641) (andygrove) -- Add trait based ScalarUDF API [#8578](https://github.com/apache/arrow-datafusion/pull/8578) (alamb) -- Handle ordering of first last aggregation inside aggregator [#8662](https://github.com/apache/arrow-datafusion/pull/8662) (mustafasrepo) -- feat: support 'LargeList' in `array_pop_front` and `array_pop_back` [#8569](https://github.com/apache/arrow-datafusion/pull/8569) (Weijun-H) -- chore: rename ceresdb to apache horaedb [#8674](https://github.com/apache/arrow-datafusion/pull/8674) (tanruixiang) -- Minor: clean up code [#8671](https://github.com/apache/arrow-datafusion/pull/8671) (Weijun-H) -- fix: `Antarctica/Vostok` tz offset changed in chrono-tz 0.8.5 [#8677](https://github.com/apache/arrow-datafusion/pull/8677) (korowa) -- Make the BatchSerializer behind Arc to avoid unnecessary struct creation [#8666](https://github.com/apache/arrow-datafusion/pull/8666) (metesynnada) -- Implement serde for CSV and Parquet FileSinkExec [#8646](https://github.com/apache/arrow-datafusion/pull/8646) (andygrove) -- [pruning] Add shortcut when all units have been pruned [#8675](https://github.com/apache/arrow-datafusion/pull/8675) (Ted-Jiang) -- Change first/last implementation to prevent redundant comparisons when data is already sorted [#8678](https://github.com/apache/arrow-datafusion/pull/8678) (mustafasrepo) -- minor: remove useless conversion [#8684](https://github.com/apache/arrow-datafusion/pull/8684) (comphead) -- refactor: modified `JoinHashMap` build order for `HashJoinStream` [#8658](https://github.com/apache/arrow-datafusion/pull/8658) (korowa) -- Start setting up tpch planning benchmarks [#8665](https://github.com/apache/arrow-datafusion/pull/8665) (matthewmturner) -- Doc: Clarify When Limit is Pushed Down to TableProvider::Scan [#8686](https://github.com/apache/arrow-datafusion/pull/8686) (devinjdangelo) -- Closes #8502: Parallel NDJSON file reading [#8659](https://github.com/apache/arrow-datafusion/pull/8659) (marvinlanhenke) -- Improve `array_prepend` signature for null and empty array [#8625](https://github.com/apache/arrow-datafusion/pull/8625) (jayzhan211) -- Cleanup TreeNode implementations [#8672](https://github.com/apache/arrow-datafusion/pull/8672) (viirya) -- Update sqlparser requirement from 0.40.0 to 0.41.0 [#8647](https://github.com/apache/arrow-datafusion/pull/8647) (dependabot[bot]) -- Update scalar functions doc for extract/datepart [#8682](https://github.com/apache/arrow-datafusion/pull/8682) (Jefffrey) -- Remove DescribeTableStmt in parser in favour of existing functionality from sqlparser-rs [#8703](https://github.com/apache/arrow-datafusion/pull/8703) (Jefffrey) -- Simplify `NULL [NOT] IN (..)` expressions [#8691](https://github.com/apache/arrow-datafusion/pull/8691) (asimsedhain) -- Rename `expr::window_function::WindowFunction` to `WindowFunctionDefinition`, make structure consistent with ScalarFunction [#8382](https://github.com/apache/arrow-datafusion/pull/8382) (edmondop) -- Deprecate duplicate function `LogicalPlan::with_new_inputs` [#8707](https://github.com/apache/arrow-datafusion/pull/8707) (viirya) -- Minor: refactor bloom filter tests to reduce duplication [#8435](https://github.com/apache/arrow-datafusion/pull/8435) (alamb) -- Minor: clean up code based on `Clippy` [#8715](https://github.com/apache/arrow-datafusion/pull/8715) (Weijun-H) -- Minor: Unbounded Output of AnalyzeExec [#8717](https://github.com/apache/arrow-datafusion/pull/8717) (berkaysynnada) -- feat: support `LargeList` in `array_position` [#8714](https://github.com/apache/arrow-datafusion/pull/8714) (Weijun-H) -- feat: support `LargeList` in `array_ndims` [#8716](https://github.com/apache/arrow-datafusion/pull/8716) (Weijun-H) -- feat: remove filters with null constants [#8700](https://github.com/apache/arrow-datafusion/pull/8700) (asimsedhain) -- support `LargeList` in `array_prepend` and `array_append` [#8679](https://github.com/apache/arrow-datafusion/pull/8679) (Weijun-H) -- Support for `extract(epoch from date)` for Date32 and Date64 [#8695](https://github.com/apache/arrow-datafusion/pull/8695) (Jefffrey) -- Implement trait based API for defining WindowUDF [#8719](https://github.com/apache/arrow-datafusion/pull/8719) (guojidan) -- Minor: Introduce utils::hash for StructArray [#8552](https://github.com/apache/arrow-datafusion/pull/8552) (jayzhan211) -- [CI] Improve windows machine CI test time [#8730](https://github.com/apache/arrow-datafusion/pull/8730) (comphead) -- fix guarantees in allways_true of PruningPredicate [#8732](https://github.com/apache/arrow-datafusion/pull/8732) (my-vegetable-has-exploded) -- Minor: Avoid memory copy in construct window exprs [#8718](https://github.com/apache/arrow-datafusion/pull/8718) (Ted-Jiang) -- feat: support LargeList in array_repeat [#8725](https://github.com/apache/arrow-datafusion/pull/8725) (Weijun-H) -- Minor: Ctrl+C Termination in CLI [#8739](https://github.com/apache/arrow-datafusion/pull/8739) (berkaysynnada) -- Add support for functional dependency for ROW_NUMBER window function. [#8737](https://github.com/apache/arrow-datafusion/pull/8737) (mustafasrepo) -- Minor: reduce code duplication in PruningPredicate test [#8441](https://github.com/apache/arrow-datafusion/pull/8441) (alamb) -- feat: native types in `DistinctCountAccumulator` for primitive types [#8721](https://github.com/apache/arrow-datafusion/pull/8721) (korowa) -- [MINOR]: Add a test case for when target partition is 1, no hash repartition is added to the plan. [#8757](https://github.com/apache/arrow-datafusion/pull/8757) (mustafasrepo) -- Minor: Improve `PruningPredicate` docstrings [#8748](https://github.com/apache/arrow-datafusion/pull/8748) (alamb) -- feat: support `LargeList` in `cardinality` [#8726](https://github.com/apache/arrow-datafusion/pull/8726) (Weijun-H) -- Add reproducer for #8738 [#8750](https://github.com/apache/arrow-datafusion/pull/8750) (alamb) -- Minor: Use faster check for column name in schema merge [#8765](https://github.com/apache/arrow-datafusion/pull/8765) (matthewmturner) -- Minor: Add documentation about stream cancellation [#8747](https://github.com/apache/arrow-datafusion/pull/8747) (alamb) -- Move `repartition_file_scans` out of `enable_round_robin` check in `EnforceDistribution` rule [#8731](https://github.com/apache/arrow-datafusion/pull/8731) (viirya) -- Clean internal implementation of WindowUDF [#8746](https://github.com/apache/arrow-datafusion/pull/8746) (guojidan) -- feat: support `largelist` in `array_to_string` [#8729](https://github.com/apache/arrow-datafusion/pull/8729) (Weijun-H) -- [MINOR] CLI error handling on streaming use cases [#8761](https://github.com/apache/arrow-datafusion/pull/8761) (metesynnada) -- Convert Binary Operator `StringConcat` to Function for `array_concat`, `array_append` and `array_prepend` [#8636](https://github.com/apache/arrow-datafusion/pull/8636) (jayzhan211) -- Minor: Fix incorrect indices for hashing struct [#8775](https://github.com/apache/arrow-datafusion/pull/8775) (jayzhan211) -- Minor: Improve library docs to mention TreeNode, ExprSimplifier, PruningPredicate and cp_solver [#8749](https://github.com/apache/arrow-datafusion/pull/8749) (alamb) -- [MINOR] Add logo source files [#8762](https://github.com/apache/arrow-datafusion/pull/8762) (andygrove) -- Add Apache attribution to site footer [#8760](https://github.com/apache/arrow-datafusion/pull/8760) (alamb) -- ci: speed up win64 test [#8728](https://github.com/apache/arrow-datafusion/pull/8728) (Jefffrey) -- Add `schema_err!` error macros with optional backtrace [#8620](https://github.com/apache/arrow-datafusion/pull/8620) (comphead) -- Fix regression by reverting Materialize dictionaries in group keys [#8740](https://github.com/apache/arrow-datafusion/pull/8740) (alamb) -- fix: struct field don't push down to TableScan [#8774](https://github.com/apache/arrow-datafusion/pull/8774) (haohuaijin) -- Implement `ScalarUDF` in terms of `ScalarUDFImpl` trait [#8713](https://github.com/apache/arrow-datafusion/pull/8713) (alamb) -- Minor: Fix error messages in array expressions [#8781](https://github.com/apache/arrow-datafusion/pull/8781) (Weijun-H) -- Move tests from `expr.rs` to sqllogictests. Part1 [#8773](https://github.com/apache/arrow-datafusion/pull/8773) (comphead) -- Permit running `sqllogictest` as a rust test in IDEs (+ use clap for sqllogicttest parsing, accept (and ignore) rust test harness arguments) [#8288](https://github.com/apache/arrow-datafusion/pull/8288) (alamb) -- Minor: Use standard tree walk in Projection Pushdown [#8787](https://github.com/apache/arrow-datafusion/pull/8787) (alamb) -- Implement trait based API for define AggregateUDF [#8733](https://github.com/apache/arrow-datafusion/pull/8733) (guojidan) -- Minor: Improve `DataFusionError` documentation [#8792](https://github.com/apache/arrow-datafusion/pull/8792) (alamb) -- fix: failed to create ValuesExec with non-nullable schema [#8776](https://github.com/apache/arrow-datafusion/pull/8776) (jonahgao) -- Update substrait requirement from 0.21.0 to 0.22.1 [#8796](https://github.com/apache/arrow-datafusion/pull/8796) (dependabot[bot]) -- Bump follow-redirects from 1.15.3 to 1.15.4 in /datafusion/wasmtest/datafusion-wasm-app [#8798](https://github.com/apache/arrow-datafusion/pull/8798) (dependabot[bot]) -- Minor: array_pop_first should be array_pop_front in documentation [#8797](https://github.com/apache/arrow-datafusion/pull/8797) (ongchi) -- feat: Add bloom filter metric to ParquetExec [#8772](https://github.com/apache/arrow-datafusion/pull/8772) (my-vegetable-has-exploded) -- Add note on using larger row group size [#8745](https://github.com/apache/arrow-datafusion/pull/8745) (twitu) -- Change `ScalarValue::{List, LargeList, FixedSizedList}` to take specific types rather than `ArrayRef` [#8562](https://github.com/apache/arrow-datafusion/pull/8562) (rspears74) -- fix: fix markdown table in docs [#8812](https://github.com/apache/arrow-datafusion/pull/8812) (tshauck) -- docs: add sudo for install commands [#8804](https://github.com/apache/arrow-datafusion/pull/8804) (caicancai) -- Standardize `CompressionTypeVariant` encoding in protobuf [#8785](https://github.com/apache/arrow-datafusion/pull/8785) (tushushu) -- Make benefits_from_input_partitioning Default in SHJ [#8801](https://github.com/apache/arrow-datafusion/pull/8801) (metesynnada) -- refactor: standardize exec_from funcs arg order [#8809](https://github.com/apache/arrow-datafusion/pull/8809) (tshauck) -- [Minor] extract const and add doc and more tests for in_list pruning [#8815](https://github.com/apache/arrow-datafusion/pull/8815) (Ted-Jiang) -- [MINOR]: Add size check for aggregate [#8813](https://github.com/apache/arrow-datafusion/pull/8813) (mustafasrepo) -- Minor: chores: Update clippy in pre-commit.sh [#8810](https://github.com/apache/arrow-datafusion/pull/8810) (my-vegetable-has-exploded) -- Cleanup the usage of round-robin repartitioning [#8794](https://github.com/apache/arrow-datafusion/pull/8794) (viirya) -- Implement monotonicity for ScalarUDF [#8799](https://github.com/apache/arrow-datafusion/pull/8799) (guojidan) -- Remove unused array_expression.rs and `SUPPORTED_ARRAY_TYPES` [#8807](https://github.com/apache/arrow-datafusion/pull/8807) (alamb) -- feat: support `array_resize` [#8744](https://github.com/apache/arrow-datafusion/pull/8744) (Weijun-H) -- Minor: typo in `arrays.slt` [#8831](https://github.com/apache/arrow-datafusion/pull/8831) (Weijun-H) -- docs: document SessionConfig [#8771](https://github.com/apache/arrow-datafusion/pull/8771) (wjones127) -- Minor: Improve `datafusion-proto` documentation [#8822](https://github.com/apache/arrow-datafusion/pull/8822) (alamb) -- [CI] Refactor CI builders [#8826](https://github.com/apache/arrow-datafusion/pull/8826) (comphead) -- Serialize function signature simplifications [#8802](https://github.com/apache/arrow-datafusion/pull/8802) (metesynnada) -- Port tests in `group_by.rs` to sqllogictest [#8834](https://github.com/apache/arrow-datafusion/pull/8834) (hiltontj) -- Simplify physical expression creation API (not require schema) [#8823](https://github.com/apache/arrow-datafusion/pull/8823) (comphead) -- feat: add more components to the wasm-pack compatible list [#8843](https://github.com/apache/arrow-datafusion/pull/8843) (waynexia) -- Port tests in timestamp.rs to sqllogictest. Part 1 [#8818](https://github.com/apache/arrow-datafusion/pull/8818) (caicancai) -- Upgrade to object_store `0.9.0` and arrow `50.0.0` [#8758](https://github.com/apache/arrow-datafusion/pull/8758) (tustvold) -- Fix ApproxPercentileCont signature [#8825](https://github.com/apache/arrow-datafusion/pull/8825) (joroKr21) -- Minor: Update `with_column_rename` method doc [#8858](https://github.com/apache/arrow-datafusion/pull/8858) (comphead) -- Minor: Document `parquet_metadata` function [#8852](https://github.com/apache/arrow-datafusion/pull/8852) (alamb) -- Speedup new_with_metadata by removing sort [#8855](https://github.com/apache/arrow-datafusion/pull/8855) (simonvandel) -- Minor: fix wrong function call [#8847](https://github.com/apache/arrow-datafusion/pull/8847) (Weijun-H) -- Add options of parquet bloom filter and page index in Session config [#8869](https://github.com/apache/arrow-datafusion/pull/8869) (Ted-Jiang) -- Port tests in timestamp.rs to sqllogictest [#8859](https://github.com/apache/arrow-datafusion/pull/8859) (caicancai) -- test: Port `order.rs` tests to sqllogictest [#8857](https://github.com/apache/arrow-datafusion/pull/8857) (simicd) -- Determine causal window frames to produce early results. [#8842](https://github.com/apache/arrow-datafusion/pull/8842) (mustafasrepo) -- docs: fix wrong pushdown name & a typo [#8875](https://github.com/apache/arrow-datafusion/pull/8875) (SteveLauC) -- fix: don't extract common sub expr in `CASE WHEN` clause [#8833](https://github.com/apache/arrow-datafusion/pull/8833) (haohuaijin) -- Add "Extended" clickbench queries [#8861](https://github.com/apache/arrow-datafusion/pull/8861) (alamb) -- Change cli to propagate error to exit code [#8856](https://github.com/apache/arrow-datafusion/pull/8856) (tshauck) -- test: Port tests in `predicates.rs` to sqllogictest [#8879](https://github.com/apache/arrow-datafusion/pull/8879) (simicd) -- docs: Update contributor guide with installation instructions [#8876](https://github.com/apache/arrow-datafusion/pull/8876) (caicancai) -- Minor: add tests for casts between nested `List` and `LargeList` [#8882](https://github.com/apache/arrow-datafusion/pull/8882) (Weijun-H) -- Disable Parallel Parquet Writer by Default, Improve Writing Test Coverage [#8854](https://github.com/apache/arrow-datafusion/pull/8854) (devinjdangelo) -- Support for order sensitive `NTH_VALUE` aggregation, make reverse `ARRAY_AGG` more efficient [#8841](https://github.com/apache/arrow-datafusion/pull/8841) (mustafasrepo) -- test: Port tests in `csv_files.rs` to sqllogictest [#8885](https://github.com/apache/arrow-datafusion/pull/8885) (simicd) -- test: Port tests in `references.rs` to sqllogictest [#8877](https://github.com/apache/arrow-datafusion/pull/8877) (simicd) -- fix bug with `to_timestamp` and `InitCap` logical serialization, add roundtrip test between expression and proto, [#8868](https://github.com/apache/arrow-datafusion/pull/8868) (Weijun-H) -- Support `LargeListArray` scalar values and `align_array_dimensions` [#8881](https://github.com/apache/arrow-datafusion/pull/8881) (Weijun-H) -- refactor: rename FileStream.file_reader to file_opener & update doc [#8883](https://github.com/apache/arrow-datafusion/pull/8883) (SteveLauC) -- docs: fix wrong name in sub-crates' README [#8889](https://github.com/apache/arrow-datafusion/pull/8889) (SteveLauC) -- Recursive CTEs: Stage 1 - add config flag [#8828](https://github.com/apache/arrow-datafusion/pull/8828) (matthewgapp) -- Support array literal with scalar function [#8884](https://github.com/apache/arrow-datafusion/pull/8884) (jayzhan211) -- Bump actions/cache from 3 to 4 [#8903](https://github.com/apache/arrow-datafusion/pull/8903) (dependabot[bot]) -- Fix `datafusion-cli` print output [#8895](https://github.com/apache/arrow-datafusion/pull/8895) (alamb) -- docs: add an example for RecordBatchReceiverStreamBuilder [#8888](https://github.com/apache/arrow-datafusion/pull/8888) (SteveLauC) -- Fix "Projection references non-aggregate values" by updating `rebase_expr` to use `transform_down` [#8890](https://github.com/apache/arrow-datafusion/pull/8890) (wizardxz) -- Add serde support for Arrow FileTypeWriterOptions [#8850](https://github.com/apache/arrow-datafusion/pull/8850) (tushushu) -- Improve `datafusion-cli` print format tests [#8896](https://github.com/apache/arrow-datafusion/pull/8896) (alamb) -- Recursive CTEs: Stage 2 - add support for sql -> logical plan generation [#8839](https://github.com/apache/arrow-datafusion/pull/8839) (matthewgapp) -- Minor: remove null in `array-append` and `array-prepend` [#8901](https://github.com/apache/arrow-datafusion/pull/8901) (Weijun-H) -- Add support for FixedSizeList type in `arrow_cast`, hashing [#8344](https://github.com/apache/arrow-datafusion/pull/8344) (Weijun-H) -- aggregate_statistics should only optimize MIN/MAX when relation is not empty [#8914](https://github.com/apache/arrow-datafusion/pull/8914) (viirya) -- support to_timestamp with optional chrono formats [#8886](https://github.com/apache/arrow-datafusion/pull/8886) (Omega359) -- Minor: Document third argument of `date_bin` as optional and default value [#8912](https://github.com/apache/arrow-datafusion/pull/8912) (alamb) -- Minor: distinguish parquet row group pruning type in unit test [#8921](https://github.com/apache/arrow-datafusion/pull/8921) (Ted-Jiang) +- Remove order_bys from AggregateExec state [#8537](https://github.com/apache/datafusion/pull/8537) (mustafasrepo) +- Fix count(null) and count(distinct null) [#8511](https://github.com/apache/datafusion/pull/8511) (joroKr21) +- Minor: reduce code duplication in `date_bin_impl` [#8528](https://github.com/apache/datafusion/pull/8528) (Weijun-H) +- Add metrics for UnnestExec [#8482](https://github.com/apache/datafusion/pull/8482) (simonvandel) +- Prepare 34.0.0-rc3 [#8549](https://github.com/apache/datafusion/pull/8549) (andygrove) +- fix: make sure CASE WHEN pick first true branch when WHEN clause is true [#8477](https://github.com/apache/datafusion/pull/8477) (haohuaijin) +- Minor: make SubqueryAlias::try_new take Arc [#8542](https://github.com/apache/datafusion/pull/8542) (sadboy) +- Fallback on null empty value in ExprBoundaries::try_from_column [#8501](https://github.com/apache/datafusion/pull/8501) (razeghi71) +- Add test for DataFrame::write_table [#8531](https://github.com/apache/datafusion/pull/8531) (devinjdangelo) +- [MINOR]: Generate empty column at placeholder exec [#8553](https://github.com/apache/datafusion/pull/8553) (mustafasrepo) +- Minor: Remove now dead `SUPPORTED_STRUCT_TYPES` [#8480](https://github.com/apache/datafusion/pull/8480) (alamb) +- [MINOR]: Add getter methods to first and last value [#8555](https://github.com/apache/datafusion/pull/8555) (mustafasrepo) +- [MINOR]: Some code changes and a new empty batch guard for SHJ [#8557](https://github.com/apache/datafusion/pull/8557) (metesynnada) +- docs: update udf docs for udtf [#8546](https://github.com/apache/datafusion/pull/8546) (tshauck) +- feat: implement Unary Expr in substrait [#8534](https://github.com/apache/datafusion/pull/8534) (waynexia) +- Fix `compute_record_batch_statistics` wrong with `projection` [#8489](https://github.com/apache/datafusion/pull/8489) (Asura7969) +- Minor: Cleanup warning in scalar.rs test [#8563](https://github.com/apache/datafusion/pull/8563) (jayzhan211) +- Minor: move some invariants out of the loop [#8564](https://github.com/apache/datafusion/pull/8564) (haohuaijin) +- feat: implement Repartition plan in substrait [#8526](https://github.com/apache/datafusion/pull/8526) (waynexia) +- Fix sort order aware file group parallelization [#8517](https://github.com/apache/datafusion/pull/8517) (alamb) +- feat: support largelist in array_slice [#8561](https://github.com/apache/datafusion/pull/8561) (Weijun-H) +- minor: fix to support scalars [#8559](https://github.com/apache/datafusion/pull/8559) (comphead) +- refactor: `HashJoinStream` state machine [#8538](https://github.com/apache/datafusion/pull/8538) (korowa) +- Remove ListingTable and FileScanConfig Unbounded (#8540) [#8573](https://github.com/apache/datafusion/pull/8573) (tustvold) +- Update substrait requirement from 0.20.0 to 0.21.0 [#8574](https://github.com/apache/datafusion/pull/8574) (dependabot[bot]) +- [minor]: Fix rank calculation bug when empty order by is seen [#8567](https://github.com/apache/datafusion/pull/8567) (mustafasrepo) +- Add `LiteralGuarantee` on columns to extract conditions required for `PhysicalExpr` expressions to evaluate to true [#8437](https://github.com/apache/datafusion/pull/8437) (alamb) +- [MINOR]: Parametrize sort-preservation tests to exercise all situations (unbounded/bounded sources and flag behavior) [#8575](https://github.com/apache/datafusion/pull/8575) (mustafasrepo) +- Minor: Add some comments to scalar_udf example [#8576](https://github.com/apache/datafusion/pull/8576) (alamb) +- Move Coercion for MakeArray to `coerce_arguments_for_signature` and introduce another one for ArrayAppend [#8317](https://github.com/apache/datafusion/pull/8317) (jayzhan211) +- feat: support `LargeList` in `array_positions` [#8571](https://github.com/apache/datafusion/pull/8571) (Weijun-H) +- feat: support `LargeList` in `array_element` [#8570](https://github.com/apache/datafusion/pull/8570) (Weijun-H) +- Increase test coverage for unbounded and bounded cases [#8581](https://github.com/apache/datafusion/pull/8581) (mustafasrepo) +- Port tests in `parquet.rs` to sqllogictest [#8560](https://github.com/apache/datafusion/pull/8560) (hiltontj) +- Minor: avoid a copy in Expr::unalias [#8588](https://github.com/apache/datafusion/pull/8588) (alamb) +- Minor: support complex expr as the arg in the ApproxPercentileCont function [#8580](https://github.com/apache/datafusion/pull/8580) (liukun4515) +- Bugfix: Add functional dependency check and aggregate try_new schema [#8584](https://github.com/apache/datafusion/pull/8584) (mustafasrepo) +- Remove GroupByOrderMode [#8593](https://github.com/apache/datafusion/pull/8593) (ozankabak) +- Minor: replace` not-impl-err` in `array_expression` [#8589](https://github.com/apache/datafusion/pull/8589) (Weijun-H) +- Substrait insubquery [#8363](https://github.com/apache/datafusion/pull/8363) (tgujar) +- Minor: port last test from parquet.rs [#8587](https://github.com/apache/datafusion/pull/8587) (alamb) +- Minor: consolidate map sqllogictest tests [#8550](https://github.com/apache/datafusion/pull/8550) (alamb) +- feat: support `LargeList` in `array_dims` [#8592](https://github.com/apache/datafusion/pull/8592) (Weijun-H) +- Fix regression in regenerating protobuf source [#8603](https://github.com/apache/datafusion/pull/8603) (andygrove) +- Remove unbounded_input from FileSinkOptions [#8605](https://github.com/apache/datafusion/pull/8605) (devinjdangelo) +- Add `arrow_err!` macros, optional backtrace to ArrowError [#8586](https://github.com/apache/datafusion/pull/8586) (comphead) +- Add examples of DataFrame::write\* methods without S3 dependency [#8606](https://github.com/apache/datafusion/pull/8606) (devinjdangelo) +- Implement logical plan serde for CopyTo [#8618](https://github.com/apache/datafusion/pull/8618) (andygrove) +- Fix InListExpr to return the correct number of rows [#8601](https://github.com/apache/datafusion/pull/8601) (alamb) +- Remove ListingTable single_file option [#8604](https://github.com/apache/datafusion/pull/8604) (devinjdangelo) +- feat: support `LargeList` in `array_remove` [#8595](https://github.com/apache/datafusion/pull/8595) (Weijun-H) +- Rename `ParamValues::{LIST -> List,MAP -> Map}` [#8611](https://github.com/apache/datafusion/pull/8611) (kawadakk) +- Support binary temporal coercion for Date64 and Timestamp types [#8616](https://github.com/apache/datafusion/pull/8616) (Asura7969) +- Add new configuration item `listing_table_ignore_subdirectory` [#8565](https://github.com/apache/datafusion/pull/8565) (Asura7969) +- Optimize the parameter types of `ParamValues`'s methods [#8613](https://github.com/apache/datafusion/pull/8613) (kawadakk) +- Do not panic on zero placeholders in `ParamValues::get_placeholders_with_values` [#8615](https://github.com/apache/datafusion/pull/8615) (kawadakk) +- Fix #8507: Non-null sub-field on nullable struct-field has wrong nullity [#8623](https://github.com/apache/datafusion/pull/8623) (marvinlanhenke) +- Implement `contained` API in PruningPredicate [#8440](https://github.com/apache/datafusion/pull/8440) (alamb) +- Add partial serde support for ParquetWriterOptions [#8627](https://github.com/apache/datafusion/pull/8627) (andygrove) +- Minor: add arguments length check in `array_expressions` [#8622](https://github.com/apache/datafusion/pull/8622) (Weijun-H) +- Minor: improve dataframe functional dependency tests [#8630](https://github.com/apache/datafusion/pull/8630) (alamb) +- Improve regexp_match performance by avoiding cloning Regex [#8631](https://github.com/apache/datafusion/pull/8631) (viirya) +- Minor: improve `listing_table_ignore_subdirectory` config documentation [#8634](https://github.com/apache/datafusion/pull/8634) (alamb) +- Support Writing Arrow files [#8608](https://github.com/apache/datafusion/pull/8608) (devinjdangelo) +- Filter pushdown into cross join [#8626](https://github.com/apache/datafusion/pull/8626) (mustafasrepo) +- [MINOR] Remove duplicate test utility and move one utility function for better organization [#8652](https://github.com/apache/datafusion/pull/8652) (metesynnada) +- [MINOR]: Add new test for filter pushdown into cross join [#8648](https://github.com/apache/datafusion/pull/8648) (mustafasrepo) +- Rewrite bloom filters to use contains API [#8442](https://github.com/apache/datafusion/pull/8442) (alamb) +- Split equivalence code into smaller modules. [#8649](https://github.com/apache/datafusion/pull/8649) (tushushu) +- Move parquet_schema.rs from sql to parquet tests [#8644](https://github.com/apache/datafusion/pull/8644) (alamb) +- Fix group by aliased expression in LogicalPLanBuilder::aggregate [#8629](https://github.com/apache/datafusion/pull/8629) (alamb) +- Refactor `array_union` and `array_intersect` functions to one general function [#8516](https://github.com/apache/datafusion/pull/8516) (Weijun-H) +- Minor: avoid extra clone in datafusion-proto::physical_plan [#8650](https://github.com/apache/datafusion/pull/8650) (ongchi) +- Minor: name some constant values in arrow writer, parquet writer [#8642](https://github.com/apache/datafusion/pull/8642) (alamb) +- TreeNode Refactor Part 2 [#8653](https://github.com/apache/datafusion/pull/8653) (berkaysynnada) +- feat: support inlist in LiteralGurantee for pruning [#8654](https://github.com/apache/datafusion/pull/8654) (my-vegetable-has-exploded) +- Streaming CLI support [#8651](https://github.com/apache/datafusion/pull/8651) (berkaysynnada) +- Add serde support for CSV FileTypeWriterOptions [#8641](https://github.com/apache/datafusion/pull/8641) (andygrove) +- Add trait based ScalarUDF API [#8578](https://github.com/apache/datafusion/pull/8578) (alamb) +- Handle ordering of first last aggregation inside aggregator [#8662](https://github.com/apache/datafusion/pull/8662) (mustafasrepo) +- feat: support 'LargeList' in `array_pop_front` and `array_pop_back` [#8569](https://github.com/apache/datafusion/pull/8569) (Weijun-H) +- chore: rename ceresdb to apache horaedb [#8674](https://github.com/apache/datafusion/pull/8674) (tanruixiang) +- Minor: clean up code [#8671](https://github.com/apache/datafusion/pull/8671) (Weijun-H) +- fix: `Antarctica/Vostok` tz offset changed in chrono-tz 0.8.5 [#8677](https://github.com/apache/datafusion/pull/8677) (korowa) +- Make the BatchSerializer behind Arc to avoid unnecessary struct creation [#8666](https://github.com/apache/datafusion/pull/8666) (metesynnada) +- Implement serde for CSV and Parquet FileSinkExec [#8646](https://github.com/apache/datafusion/pull/8646) (andygrove) +- [pruning] Add shortcut when all units have been pruned [#8675](https://github.com/apache/datafusion/pull/8675) (Ted-Jiang) +- Change first/last implementation to prevent redundant comparisons when data is already sorted [#8678](https://github.com/apache/datafusion/pull/8678) (mustafasrepo) +- minor: remove useless conversion [#8684](https://github.com/apache/datafusion/pull/8684) (comphead) +- refactor: modified `JoinHashMap` build order for `HashJoinStream` [#8658](https://github.com/apache/datafusion/pull/8658) (korowa) +- Start setting up tpch planning benchmarks [#8665](https://github.com/apache/datafusion/pull/8665) (matthewmturner) +- Doc: Clarify When Limit is Pushed Down to TableProvider::Scan [#8686](https://github.com/apache/datafusion/pull/8686) (devinjdangelo) +- Closes #8502: Parallel NDJSON file reading [#8659](https://github.com/apache/datafusion/pull/8659) (marvinlanhenke) +- Improve `array_prepend` signature for null and empty array [#8625](https://github.com/apache/datafusion/pull/8625) (jayzhan211) +- Cleanup TreeNode implementations [#8672](https://github.com/apache/datafusion/pull/8672) (viirya) +- Update sqlparser requirement from 0.40.0 to 0.41.0 [#8647](https://github.com/apache/datafusion/pull/8647) (dependabot[bot]) +- Update scalar functions doc for extract/datepart [#8682](https://github.com/apache/datafusion/pull/8682) (Jefffrey) +- Remove DescribeTableStmt in parser in favour of existing functionality from sqlparser-rs [#8703](https://github.com/apache/datafusion/pull/8703) (Jefffrey) +- Simplify `NULL [NOT] IN (..)` expressions [#8691](https://github.com/apache/datafusion/pull/8691) (asimsedhain) +- Rename `expr::window_function::WindowFunction` to `WindowFunctionDefinition`, make structure consistent with ScalarFunction [#8382](https://github.com/apache/datafusion/pull/8382) (edmondop) +- Deprecate duplicate function `LogicalPlan::with_new_inputs` [#8707](https://github.com/apache/datafusion/pull/8707) (viirya) +- Minor: refactor bloom filter tests to reduce duplication [#8435](https://github.com/apache/datafusion/pull/8435) (alamb) +- Minor: clean up code based on `Clippy` [#8715](https://github.com/apache/datafusion/pull/8715) (Weijun-H) +- Minor: Unbounded Output of AnalyzeExec [#8717](https://github.com/apache/datafusion/pull/8717) (berkaysynnada) +- feat: support `LargeList` in `array_position` [#8714](https://github.com/apache/datafusion/pull/8714) (Weijun-H) +- feat: support `LargeList` in `array_ndims` [#8716](https://github.com/apache/datafusion/pull/8716) (Weijun-H) +- feat: remove filters with null constants [#8700](https://github.com/apache/datafusion/pull/8700) (asimsedhain) +- support `LargeList` in `array_prepend` and `array_append` [#8679](https://github.com/apache/datafusion/pull/8679) (Weijun-H) +- Support for `extract(epoch from date)` for Date32 and Date64 [#8695](https://github.com/apache/datafusion/pull/8695) (Jefffrey) +- Implement trait based API for defining WindowUDF [#8719](https://github.com/apache/datafusion/pull/8719) (guojidan) +- Minor: Introduce utils::hash for StructArray [#8552](https://github.com/apache/datafusion/pull/8552) (jayzhan211) +- [CI] Improve windows machine CI test time [#8730](https://github.com/apache/datafusion/pull/8730) (comphead) +- fix guarantees in allways_true of PruningPredicate [#8732](https://github.com/apache/datafusion/pull/8732) (my-vegetable-has-exploded) +- Minor: Avoid memory copy in construct window exprs [#8718](https://github.com/apache/datafusion/pull/8718) (Ted-Jiang) +- feat: support LargeList in array_repeat [#8725](https://github.com/apache/datafusion/pull/8725) (Weijun-H) +- Minor: Ctrl+C Termination in CLI [#8739](https://github.com/apache/datafusion/pull/8739) (berkaysynnada) +- Add support for functional dependency for ROW_NUMBER window function. [#8737](https://github.com/apache/datafusion/pull/8737) (mustafasrepo) +- Minor: reduce code duplication in PruningPredicate test [#8441](https://github.com/apache/datafusion/pull/8441) (alamb) +- feat: native types in `DistinctCountAccumulator` for primitive types [#8721](https://github.com/apache/datafusion/pull/8721) (korowa) +- [MINOR]: Add a test case for when target partition is 1, no hash repartition is added to the plan. [#8757](https://github.com/apache/datafusion/pull/8757) (mustafasrepo) +- Minor: Improve `PruningPredicate` docstrings [#8748](https://github.com/apache/datafusion/pull/8748) (alamb) +- feat: support `LargeList` in `cardinality` [#8726](https://github.com/apache/datafusion/pull/8726) (Weijun-H) +- Add reproducer for #8738 [#8750](https://github.com/apache/datafusion/pull/8750) (alamb) +- Minor: Use faster check for column name in schema merge [#8765](https://github.com/apache/datafusion/pull/8765) (matthewmturner) +- Minor: Add documentation about stream cancellation [#8747](https://github.com/apache/datafusion/pull/8747) (alamb) +- Move `repartition_file_scans` out of `enable_round_robin` check in `EnforceDistribution` rule [#8731](https://github.com/apache/datafusion/pull/8731) (viirya) +- Clean internal implementation of WindowUDF [#8746](https://github.com/apache/datafusion/pull/8746) (guojidan) +- feat: support `largelist` in `array_to_string` [#8729](https://github.com/apache/datafusion/pull/8729) (Weijun-H) +- [MINOR] CLI error handling on streaming use cases [#8761](https://github.com/apache/datafusion/pull/8761) (metesynnada) +- Convert Binary Operator `StringConcat` to Function for `array_concat`, `array_append` and `array_prepend` [#8636](https://github.com/apache/datafusion/pull/8636) (jayzhan211) +- Minor: Fix incorrect indices for hashing struct [#8775](https://github.com/apache/datafusion/pull/8775) (jayzhan211) +- Minor: Improve library docs to mention TreeNode, ExprSimplifier, PruningPredicate and cp_solver [#8749](https://github.com/apache/datafusion/pull/8749) (alamb) +- [MINOR] Add logo source files [#8762](https://github.com/apache/datafusion/pull/8762) (andygrove) +- Add Apache attribution to site footer [#8760](https://github.com/apache/datafusion/pull/8760) (alamb) +- ci: speed up win64 test [#8728](https://github.com/apache/datafusion/pull/8728) (Jefffrey) +- Add `schema_err!` error macros with optional backtrace [#8620](https://github.com/apache/datafusion/pull/8620) (comphead) +- Fix regression by reverting Materialize dictionaries in group keys [#8740](https://github.com/apache/datafusion/pull/8740) (alamb) +- fix: struct field don't push down to TableScan [#8774](https://github.com/apache/datafusion/pull/8774) (haohuaijin) +- Implement `ScalarUDF` in terms of `ScalarUDFImpl` trait [#8713](https://github.com/apache/datafusion/pull/8713) (alamb) +- Minor: Fix error messages in array expressions [#8781](https://github.com/apache/datafusion/pull/8781) (Weijun-H) +- Move tests from `expr.rs` to sqllogictests. Part1 [#8773](https://github.com/apache/datafusion/pull/8773) (comphead) +- Permit running `sqllogictest` as a rust test in IDEs (+ use clap for sqllogicttest parsing, accept (and ignore) rust test harness arguments) [#8288](https://github.com/apache/datafusion/pull/8288) (alamb) +- Minor: Use standard tree walk in Projection Pushdown [#8787](https://github.com/apache/datafusion/pull/8787) (alamb) +- Implement trait based API for define AggregateUDF [#8733](https://github.com/apache/datafusion/pull/8733) (guojidan) +- Minor: Improve `DataFusionError` documentation [#8792](https://github.com/apache/datafusion/pull/8792) (alamb) +- fix: failed to create ValuesExec with non-nullable schema [#8776](https://github.com/apache/datafusion/pull/8776) (jonahgao) +- Update substrait requirement from 0.21.0 to 0.22.1 [#8796](https://github.com/apache/datafusion/pull/8796) (dependabot[bot]) +- Bump follow-redirects from 1.15.3 to 1.15.4 in /datafusion/wasmtest/datafusion-wasm-app [#8798](https://github.com/apache/datafusion/pull/8798) (dependabot[bot]) +- Minor: array_pop_first should be array_pop_front in documentation [#8797](https://github.com/apache/datafusion/pull/8797) (ongchi) +- feat: Add bloom filter metric to ParquetExec [#8772](https://github.com/apache/datafusion/pull/8772) (my-vegetable-has-exploded) +- Add note on using larger row group size [#8745](https://github.com/apache/datafusion/pull/8745) (twitu) +- Change `ScalarValue::{List, LargeList, FixedSizedList}` to take specific types rather than `ArrayRef` [#8562](https://github.com/apache/datafusion/pull/8562) (rspears74) +- fix: fix markdown table in docs [#8812](https://github.com/apache/datafusion/pull/8812) (tshauck) +- docs: add sudo for install commands [#8804](https://github.com/apache/datafusion/pull/8804) (caicancai) +- Standardize `CompressionTypeVariant` encoding in protobuf [#8785](https://github.com/apache/datafusion/pull/8785) (tushushu) +- Make benefits_from_input_partitioning Default in SHJ [#8801](https://github.com/apache/datafusion/pull/8801) (metesynnada) +- refactor: standardize exec_from funcs arg order [#8809](https://github.com/apache/datafusion/pull/8809) (tshauck) +- [Minor] extract const and add doc and more tests for in_list pruning [#8815](https://github.com/apache/datafusion/pull/8815) (Ted-Jiang) +- [MINOR]: Add size check for aggregate [#8813](https://github.com/apache/datafusion/pull/8813) (mustafasrepo) +- Minor: chores: Update clippy in pre-commit.sh [#8810](https://github.com/apache/datafusion/pull/8810) (my-vegetable-has-exploded) +- Cleanup the usage of round-robin repartitioning [#8794](https://github.com/apache/datafusion/pull/8794) (viirya) +- Implement monotonicity for ScalarUDF [#8799](https://github.com/apache/datafusion/pull/8799) (guojidan) +- Remove unused array_expression.rs and `SUPPORTED_ARRAY_TYPES` [#8807](https://github.com/apache/datafusion/pull/8807) (alamb) +- feat: support `array_resize` [#8744](https://github.com/apache/datafusion/pull/8744) (Weijun-H) +- Minor: typo in `arrays.slt` [#8831](https://github.com/apache/datafusion/pull/8831) (Weijun-H) +- docs: document SessionConfig [#8771](https://github.com/apache/datafusion/pull/8771) (wjones127) +- Minor: Improve `datafusion-proto` documentation [#8822](https://github.com/apache/datafusion/pull/8822) (alamb) +- [CI] Refactor CI builders [#8826](https://github.com/apache/datafusion/pull/8826) (comphead) +- Serialize function signature simplifications [#8802](https://github.com/apache/datafusion/pull/8802) (metesynnada) +- Port tests in `group_by.rs` to sqllogictest [#8834](https://github.com/apache/datafusion/pull/8834) (hiltontj) +- Simplify physical expression creation API (not require schema) [#8823](https://github.com/apache/datafusion/pull/8823) (comphead) +- feat: add more components to the wasm-pack compatible list [#8843](https://github.com/apache/datafusion/pull/8843) (waynexia) +- Port tests in timestamp.rs to sqllogictest. Part 1 [#8818](https://github.com/apache/datafusion/pull/8818) (caicancai) +- Upgrade to object_store `0.9.0` and arrow `50.0.0` [#8758](https://github.com/apache/datafusion/pull/8758) (tustvold) +- Fix ApproxPercentileCont signature [#8825](https://github.com/apache/datafusion/pull/8825) (joroKr21) +- Minor: Update `with_column_rename` method doc [#8858](https://github.com/apache/datafusion/pull/8858) (comphead) +- Minor: Document `parquet_metadata` function [#8852](https://github.com/apache/datafusion/pull/8852) (alamb) +- Speedup new_with_metadata by removing sort [#8855](https://github.com/apache/datafusion/pull/8855) (simonvandel) +- Minor: fix wrong function call [#8847](https://github.com/apache/datafusion/pull/8847) (Weijun-H) +- Add options of parquet bloom filter and page index in Session config [#8869](https://github.com/apache/datafusion/pull/8869) (Ted-Jiang) +- Port tests in timestamp.rs to sqllogictest [#8859](https://github.com/apache/datafusion/pull/8859) (caicancai) +- test: Port `order.rs` tests to sqllogictest [#8857](https://github.com/apache/datafusion/pull/8857) (simicd) +- Determine causal window frames to produce early results. [#8842](https://github.com/apache/datafusion/pull/8842) (mustafasrepo) +- docs: fix wrong pushdown name & a typo [#8875](https://github.com/apache/datafusion/pull/8875) (SteveLauC) +- fix: don't extract common sub expr in `CASE WHEN` clause [#8833](https://github.com/apache/datafusion/pull/8833) (haohuaijin) +- Add "Extended" clickbench queries [#8861](https://github.com/apache/datafusion/pull/8861) (alamb) +- Change cli to propagate error to exit code [#8856](https://github.com/apache/datafusion/pull/8856) (tshauck) +- test: Port tests in `predicates.rs` to sqllogictest [#8879](https://github.com/apache/datafusion/pull/8879) (simicd) +- docs: Update contributor guide with installation instructions [#8876](https://github.com/apache/datafusion/pull/8876) (caicancai) +- Minor: add tests for casts between nested `List` and `LargeList` [#8882](https://github.com/apache/datafusion/pull/8882) (Weijun-H) +- Disable Parallel Parquet Writer by Default, Improve Writing Test Coverage [#8854](https://github.com/apache/datafusion/pull/8854) (devinjdangelo) +- Support for order sensitive `NTH_VALUE` aggregation, make reverse `ARRAY_AGG` more efficient [#8841](https://github.com/apache/datafusion/pull/8841) (mustafasrepo) +- test: Port tests in `csv_files.rs` to sqllogictest [#8885](https://github.com/apache/datafusion/pull/8885) (simicd) +- test: Port tests in `references.rs` to sqllogictest [#8877](https://github.com/apache/datafusion/pull/8877) (simicd) +- fix bug with `to_timestamp` and `InitCap` logical serialization, add roundtrip test between expression and proto, [#8868](https://github.com/apache/datafusion/pull/8868) (Weijun-H) +- Support `LargeListArray` scalar values and `align_array_dimensions` [#8881](https://github.com/apache/datafusion/pull/8881) (Weijun-H) +- refactor: rename FileStream.file_reader to file_opener & update doc [#8883](https://github.com/apache/datafusion/pull/8883) (SteveLauC) +- docs: fix wrong name in sub-crates' README [#8889](https://github.com/apache/datafusion/pull/8889) (SteveLauC) +- Recursive CTEs: Stage 1 - add config flag [#8828](https://github.com/apache/datafusion/pull/8828) (matthewgapp) +- Support array literal with scalar function [#8884](https://github.com/apache/datafusion/pull/8884) (jayzhan211) +- Bump actions/cache from 3 to 4 [#8903](https://github.com/apache/datafusion/pull/8903) (dependabot[bot]) +- Fix `datafusion-cli` print output [#8895](https://github.com/apache/datafusion/pull/8895) (alamb) +- docs: add an example for RecordBatchReceiverStreamBuilder [#8888](https://github.com/apache/datafusion/pull/8888) (SteveLauC) +- Fix "Projection references non-aggregate values" by updating `rebase_expr` to use `transform_down` [#8890](https://github.com/apache/datafusion/pull/8890) (wizardxz) +- Add serde support for Arrow FileTypeWriterOptions [#8850](https://github.com/apache/datafusion/pull/8850) (tushushu) +- Improve `datafusion-cli` print format tests [#8896](https://github.com/apache/datafusion/pull/8896) (alamb) +- Recursive CTEs: Stage 2 - add support for sql -> logical plan generation [#8839](https://github.com/apache/datafusion/pull/8839) (matthewgapp) +- Minor: remove null in `array-append` and `array-prepend` [#8901](https://github.com/apache/datafusion/pull/8901) (Weijun-H) +- Add support for FixedSizeList type in `arrow_cast`, hashing [#8344](https://github.com/apache/datafusion/pull/8344) (Weijun-H) +- aggregate_statistics should only optimize MIN/MAX when relation is not empty [#8914](https://github.com/apache/datafusion/pull/8914) (viirya) +- support to_timestamp with optional chrono formats [#8886](https://github.com/apache/datafusion/pull/8886) (Omega359) +- Minor: Document third argument of `date_bin` as optional and default value [#8912](https://github.com/apache/datafusion/pull/8912) (alamb) +- Minor: distinguish parquet row group pruning type in unit test [#8921](https://github.com/apache/datafusion/pull/8921) (Ted-Jiang) diff --git a/dev/changelog/36.0.0.md b/dev/changelog/36.0.0.md index 86f6a380ceb0..db03d9f5f7c1 100644 --- a/dev/changelog/36.0.0.md +++ b/dev/changelog/36.0.0.md @@ -17,248 +17,248 @@ under the License. --> -## [36.0.0](https://github.com/apache/arrow-datafusion/tree/36.0.0) (2024-02-16) +## [36.0.0](https://github.com/apache/datafusion/tree/36.0.0) (2024-02-16) -[Full Changelog](https://github.com/apache/arrow-datafusion/compare/35.0.0...36.0.0) +[Full Changelog](https://github.com/apache/datafusion/compare/35.0.0...36.0.0) **Breaking changes:** -- Deprecate make_scalar_function [#8878](https://github.com/apache/arrow-datafusion/pull/8878) (viirya) -- Change `Accumulator::evaluate` and `Accumulator::state` to take `&mut self` [#8925](https://github.com/apache/arrow-datafusion/pull/8925) (alamb) -- Rename `CatalogList` to `CatalogProviderList` [#9002](https://github.com/apache/arrow-datafusion/pull/9002) (comphead) -- Remove some recursive cloning from logical planning [#9050](https://github.com/apache/arrow-datafusion/pull/9050) (ozankabak) -- Support `FixedSizeList` type coercion [#8902](https://github.com/apache/arrow-datafusion/pull/8902) (Weijun-H) -- Add `ColumnarValue::values_to_arrays`, deprecate `columnar_values_to_array` [#9114](https://github.com/apache/arrow-datafusion/pull/9114) (alamb) +- Deprecate make_scalar_function [#8878](https://github.com/apache/datafusion/pull/8878) (viirya) +- Change `Accumulator::evaluate` and `Accumulator::state` to take `&mut self` [#8925](https://github.com/apache/datafusion/pull/8925) (alamb) +- Rename `CatalogList` to `CatalogProviderList` [#9002](https://github.com/apache/datafusion/pull/9002) (comphead) +- Remove some recursive cloning from logical planning [#9050](https://github.com/apache/datafusion/pull/9050) (ozankabak) +- Support `FixedSizeList` type coercion [#8902](https://github.com/apache/datafusion/pull/8902) (Weijun-H) +- Add `ColumnarValue::values_to_arrays`, deprecate `columnar_values_to_array` [#9114](https://github.com/apache/datafusion/pull/9114) (alamb) **Performance related:** -- Minor: Add new Extended ClickBench benchmark queries [#8950](https://github.com/apache/arrow-datafusion/pull/8950) (alamb) +- Minor: Add new Extended ClickBench benchmark queries [#8950](https://github.com/apache/datafusion/pull/8950) (alamb) **Implemented enhancements:** -- feat: support `stride` in `array_slice`, change indexes to be`1` based [#8829](https://github.com/apache/arrow-datafusion/pull/8829) (Weijun-H) -- feat: emitting partial join results in `HashJoinStream` [#8020](https://github.com/apache/arrow-datafusion/pull/8020) (korowa) -- feat:implement sql style 'ends_with' and 'instr' string function [#8862](https://github.com/apache/arrow-datafusion/pull/8862) (zy-kkk) -- feat: Support parquet bloom filter pruning for decimal128 [#8930](https://github.com/apache/arrow-datafusion/pull/8930) (Ted-Jiang) -- feat: Disable client console highlight by default [#9013](https://github.com/apache/arrow-datafusion/pull/9013) (comphead) -- feat: support the ergonomics of getting list slice with stride [#8946](https://github.com/apache/arrow-datafusion/pull/8946) (Weijun-H) -- feat: Parallel Arrow file format reading [#8897](https://github.com/apache/arrow-datafusion/pull/8897) (my-vegetable-has-exploded) -- feat: support array_reverse [#9023](https://github.com/apache/arrow-datafusion/pull/9023) (Weijun-H) -- feat: issue #8969 adding position function [#8988](https://github.com/apache/arrow-datafusion/pull/8988) (Lordworms) -- feat: support `LargeList` in `flatten` [#9110](https://github.com/apache/arrow-datafusion/pull/9110) (Weijun-H) -- feat: improve `make_date` performance [#9112](https://github.com/apache/arrow-datafusion/pull/9112) (r3stl355) -- feat: add github action to self-assign the issue [#9132](https://github.com/apache/arrow-datafusion/pull/9132) (r3stl355) -- feat: add ability to query the remote http(s) location directly in datafusion-cli [#9150](https://github.com/apache/arrow-datafusion/pull/9150) (r3stl355) -- feat: implement select directly from s3 and gcs locations in datafusion-cli [#9199](https://github.com/apache/arrow-datafusion/pull/9199) (r3stl355) -- feat: support block gzip for streams [#9175](https://github.com/apache/arrow-datafusion/pull/9175) (tshauck) +- feat: support `stride` in `array_slice`, change indexes to be`1` based [#8829](https://github.com/apache/datafusion/pull/8829) (Weijun-H) +- feat: emitting partial join results in `HashJoinStream` [#8020](https://github.com/apache/datafusion/pull/8020) (korowa) +- feat:implement sql style 'ends_with' and 'instr' string function [#8862](https://github.com/apache/datafusion/pull/8862) (zy-kkk) +- feat: Support parquet bloom filter pruning for decimal128 [#8930](https://github.com/apache/datafusion/pull/8930) (Ted-Jiang) +- feat: Disable client console highlight by default [#9013](https://github.com/apache/datafusion/pull/9013) (comphead) +- feat: support the ergonomics of getting list slice with stride [#8946](https://github.com/apache/datafusion/pull/8946) (Weijun-H) +- feat: Parallel Arrow file format reading [#8897](https://github.com/apache/datafusion/pull/8897) (my-vegetable-has-exploded) +- feat: support array_reverse [#9023](https://github.com/apache/datafusion/pull/9023) (Weijun-H) +- feat: issue #8969 adding position function [#8988](https://github.com/apache/datafusion/pull/8988) (Lordworms) +- feat: support `LargeList` in `flatten` [#9110](https://github.com/apache/datafusion/pull/9110) (Weijun-H) +- feat: improve `make_date` performance [#9112](https://github.com/apache/datafusion/pull/9112) (r3stl355) +- feat: add github action to self-assign the issue [#9132](https://github.com/apache/datafusion/pull/9132) (r3stl355) +- feat: add ability to query the remote http(s) location directly in datafusion-cli [#9150](https://github.com/apache/datafusion/pull/9150) (r3stl355) +- feat: implement select directly from s3 and gcs locations in datafusion-cli [#9199](https://github.com/apache/datafusion/pull/9199) (r3stl355) +- feat: support block gzip for streams [#9175](https://github.com/apache/datafusion/pull/9175) (tshauck) **Fixed bugs:** -- fix: recursive initialize method [#8937](https://github.com/apache/arrow-datafusion/pull/8937) (waynexia) -- fix: common_subexpr_eliminate rule should not apply to short-circuit expression [#8928](https://github.com/apache/arrow-datafusion/pull/8928) (haohuaijin) -- fix: issue #8922 make row group test more readable [#8941](https://github.com/apache/arrow-datafusion/pull/8941) (Lordworms) -- fix: allow placeholders to be substituted when coercible [#8977](https://github.com/apache/arrow-datafusion/pull/8977) (kallisti-dev) -- fix: unambiguously truncate time in date_trunc function [#9068](https://github.com/apache/arrow-datafusion/pull/9068) (mhilton) -- fix: schema metadata retrieval when listing parquet table [#9134](https://github.com/apache/arrow-datafusion/pull/9134) (brayanjuls) +- fix: recursive initialize method [#8937](https://github.com/apache/datafusion/pull/8937) (waynexia) +- fix: common_subexpr_eliminate rule should not apply to short-circuit expression [#8928](https://github.com/apache/datafusion/pull/8928) (haohuaijin) +- fix: issue #8922 make row group test more readable [#8941](https://github.com/apache/datafusion/pull/8941) (Lordworms) +- fix: allow placeholders to be substituted when coercible [#8977](https://github.com/apache/datafusion/pull/8977) (kallisti-dev) +- fix: unambiguously truncate time in date_trunc function [#9068](https://github.com/apache/datafusion/pull/9068) (mhilton) +- fix: schema metadata retrieval when listing parquet table [#9134](https://github.com/apache/datafusion/pull/9134) (brayanjuls) **Documentation updates:** -- Prepare 35.0.0-rc1 [#8924](https://github.com/apache/arrow-datafusion/pull/8924) (andygrove) -- Update project links [#8954](https://github.com/apache/arrow-datafusion/pull/8954) (comphead) -- Document parallelism and thread scheduling in the architecture guide [#8986](https://github.com/apache/arrow-datafusion/pull/8986) (alamb) -- chore: fix license badge in README [#9008](https://github.com/apache/arrow-datafusion/pull/9008) (suyanhanx) -- docs: fix array_position docs [#9003](https://github.com/apache/arrow-datafusion/pull/9003) (tshauck) -- Docs: improve contributor guide to explain how to work with tickets [#8999](https://github.com/apache/arrow-datafusion/pull/8999) (alamb) -- Document minimum required rust version [#9071](https://github.com/apache/arrow-datafusion/pull/9071) (comphead) -- Minor: Add ParadeDB to the list of users [#9018](https://github.com/apache/arrow-datafusion/pull/9018) (alamb) -- Update minimum rust version to 1.72 [#8997](https://github.com/apache/arrow-datafusion/pull/8997) (alamb) -- docs: add docs and example showing how to get the expression data type [#9118](https://github.com/apache/arrow-datafusion/pull/9118) (r3stl355) -- chore: Fix incorrect comment in substrait consumer [#9123](https://github.com/apache/arrow-datafusion/pull/9123) (caicancai) -- Minor: Fix Self referential links in readme [#9119](https://github.com/apache/arrow-datafusion/pull/9119) (alamb) -- Examples link in catalogs.rs leads to a 404 [#9194](https://github.com/apache/arrow-datafusion/pull/9194) (Omega359) -- Create `datafusion-functions-array` crate and move `ArrayToString` function into it [#9113](https://github.com/apache/arrow-datafusion/pull/9113) (alamb) +- Prepare 35.0.0-rc1 [#8924](https://github.com/apache/datafusion/pull/8924) (andygrove) +- Update project links [#8954](https://github.com/apache/datafusion/pull/8954) (comphead) +- Document parallelism and thread scheduling in the architecture guide [#8986](https://github.com/apache/datafusion/pull/8986) (alamb) +- chore: fix license badge in README [#9008](https://github.com/apache/datafusion/pull/9008) (suyanhanx) +- docs: fix array_position docs [#9003](https://github.com/apache/datafusion/pull/9003) (tshauck) +- Docs: improve contributor guide to explain how to work with tickets [#8999](https://github.com/apache/datafusion/pull/8999) (alamb) +- Document minimum required rust version [#9071](https://github.com/apache/datafusion/pull/9071) (comphead) +- Minor: Add ParadeDB to the list of users [#9018](https://github.com/apache/datafusion/pull/9018) (alamb) +- Update minimum rust version to 1.72 [#8997](https://github.com/apache/datafusion/pull/8997) (alamb) +- docs: add docs and example showing how to get the expression data type [#9118](https://github.com/apache/datafusion/pull/9118) (r3stl355) +- chore: Fix incorrect comment in substrait consumer [#9123](https://github.com/apache/datafusion/pull/9123) (caicancai) +- Minor: Fix Self referential links in readme [#9119](https://github.com/apache/datafusion/pull/9119) (alamb) +- Examples link in catalogs.rs leads to a 404 [#9194](https://github.com/apache/datafusion/pull/9194) (Omega359) +- Create `datafusion-functions-array` crate and move `ArrayToString` function into it [#9113](https://github.com/apache/datafusion/pull/9113) (alamb) **Merged pull requests:** -- Add hash_join_single_partition_threshold_rows config [#8720](https://github.com/apache/arrow-datafusion/pull/8720) (maruschin) -- Prepare 35.0.0-rc1 [#8924](https://github.com/apache/arrow-datafusion/pull/8924) (andygrove) -- feat: support `stride` in `array_slice`, change indexes to be`1` based [#8829](https://github.com/apache/arrow-datafusion/pull/8829) (Weijun-H) -- fix: recursive initialize method [#8937](https://github.com/apache/arrow-datafusion/pull/8937) (waynexia) -- Fix expr partial ord test [#8908](https://github.com/apache/arrow-datafusion/pull/8908) (mustafasrepo) -- Simplify windows builtin functions return type [#8920](https://github.com/apache/arrow-datafusion/pull/8920) (comphead) -- Fix handling of nested leaf columns in parallel parquet writer [#8923](https://github.com/apache/arrow-datafusion/pull/8923) (devinjdangelo) -- feat: emitting partial join results in `HashJoinStream` [#8020](https://github.com/apache/arrow-datafusion/pull/8020) (korowa) -- fix: common_subexpr_eliminate rule should not apply to short-circuit expression [#8928](https://github.com/apache/arrow-datafusion/pull/8928) (haohuaijin) -- Support GroupsAccumulator accumulator for udaf [#8892](https://github.com/apache/arrow-datafusion/pull/8892) (guojidan) -- test: Port tests in `partitioned_csv.rs` to sqllogictest [#8919](https://github.com/apache/arrow-datafusion/pull/8919) (simicd) -- [CI] Fix RUSTFLAGS [#8929](https://github.com/apache/arrow-datafusion/pull/8929) (Jefffrey) -- Minor: Update datafusion-cli README to explain why it is not in the w… [#8938](https://github.com/apache/arrow-datafusion/pull/8938) (alamb) -- Add syntax highlight to datafusion-cli [#8918](https://github.com/apache/arrow-datafusion/pull/8918) (trungda) -- Update substrait requirement from 0.22.1 to 0.23.0 [#8943](https://github.com/apache/arrow-datafusion/pull/8943) (dependabot[bot]) -- Deprecate make_scalar_function [#8878](https://github.com/apache/arrow-datafusion/pull/8878) (viirya) -- Update project links [#8954](https://github.com/apache/arrow-datafusion/pull/8954) (comphead) -- fix: issue #8922 make row group test more readable [#8941](https://github.com/apache/arrow-datafusion/pull/8941) (Lordworms) -- feat:implement sql style 'ends_with' and 'instr' string function [#8862](https://github.com/apache/arrow-datafusion/pull/8862) (zy-kkk) -- [MINOR]: Extract aggregate topk function to `aggregate_topk.slt` [#8948](https://github.com/apache/arrow-datafusion/pull/8948) (mustafasrepo) -- Combine multiple `IN` lists in `ExprSimplifier` [#8949](https://github.com/apache/arrow-datafusion/pull/8949) (jayzhan211) -- Fix clippy failures: error: use of deprecated function `functions::make_scalar_function [#8972](https://github.com/apache/arrow-datafusion/pull/8972) (alamb) -- feat: Support parquet bloom filter pruning for decimal128 [#8930](https://github.com/apache/arrow-datafusion/pull/8930) (Ted-Jiang) -- [MINOR]: Update create_window_expr to refer only input schema [#8945](https://github.com/apache/arrow-datafusion/pull/8945) (mustafasrepo) -- Don't error in simplify_expressions rule [#8957](https://github.com/apache/arrow-datafusion/pull/8957) (haohuaijin) -- Use .zip to avoid unwrap [#8956](https://github.com/apache/arrow-datafusion/pull/8956) (Luv-Ray) -- Change `Accumulator::evaluate` and `Accumulator::state` to take `&mut self` [#8925](https://github.com/apache/arrow-datafusion/pull/8925) (alamb) -- Enhance simplifier by adding Canonicalize [#8780](https://github.com/apache/arrow-datafusion/pull/8780) (yyy1000) -- Find the correct fields when using page filter on `struct` fields in parquet [#8848](https://github.com/apache/arrow-datafusion/pull/8848) (manoj-inukolunu) -- fix: allow placeholders to be substituted when coercible [#8977](https://github.com/apache/arrow-datafusion/pull/8977) (kallisti-dev) -- Minor: improve CatalogProvider documentation with rationale and info about remote catalogs [#8968](https://github.com/apache/arrow-datafusion/pull/8968) (alamb) -- Improve to_timestamp docs [#8981](https://github.com/apache/arrow-datafusion/pull/8981) (Omega359) -- Add helper function for processing scalar function input [#8962](https://github.com/apache/arrow-datafusion/pull/8962) (viirya) -- Fix optimize projections bug [#8960](https://github.com/apache/arrow-datafusion/pull/8960) (mustafasrepo) -- NOT operator not return internal error when args are not boolean value [#8982](https://github.com/apache/arrow-datafusion/pull/8982) (guojidan) -- Minor: Add new Extended ClickBench benchmark queries [#8950](https://github.com/apache/arrow-datafusion/pull/8950) (alamb) -- Minor: Add comments to MSRV CI check to help if it fails [#8995](https://github.com/apache/arrow-datafusion/pull/8995) (alamb) -- Minor: Document memory management design on `MemoryPool` [#8966](https://github.com/apache/arrow-datafusion/pull/8966) (alamb) -- Fix LEAD/LAG window functions when default value null [#8989](https://github.com/apache/arrow-datafusion/pull/8989) (comphead) -- Optimize MIN/MAX when relation is empty [#8940](https://github.com/apache/arrow-datafusion/pull/8940) (viirya) -- [task #8203] Port tests in joins.rs to sqllogictest [#8996](https://github.com/apache/arrow-datafusion/pull/8996) (Tangruilin) -- [task #8213]Port tests in select.rs to sqllogictest [#8967](https://github.com/apache/arrow-datafusion/pull/8967) (Tangruilin) -- test: Port (last) `repartition.rs` query to sqllogictest [#8936](https://github.com/apache/arrow-datafusion/pull/8936) (simicd) -- Update to sqlparser `0.42.0` [#9000](https://github.com/apache/arrow-datafusion/pull/9000) (alamb) -- [MINOR]: Fix Optimize Projections Bug [#8992](https://github.com/apache/arrow-datafusion/pull/8992) (mustafasrepo) -- Make Topk aggregate tests deterministic [#8998](https://github.com/apache/arrow-datafusion/pull/8998) (mustafasrepo) -- Add support for Postgres LIKE operators [#8894](https://github.com/apache/arrow-datafusion/pull/8894) (gruuya) -- bug: Datafusion doesn't respect case sensitive table references [#8964](https://github.com/apache/arrow-datafusion/pull/8964) (xhwhis) -- Document parallelism and thread scheduling in the architecture guide [#8986](https://github.com/apache/arrow-datafusion/pull/8986) (alamb) -- Fix None Projections in Projection Pushdown [#9005](https://github.com/apache/arrow-datafusion/pull/9005) (berkaysynnada) -- Lead and Lag window functions should support default value with datatype other than Int64 [#9001](https://github.com/apache/arrow-datafusion/pull/9001) (viirya) -- chore: fix license badge in README [#9008](https://github.com/apache/arrow-datafusion/pull/9008) (suyanhanx) -- Minor: fix: #9010 - Optimizer schema change assert error is incorrect [#9012](https://github.com/apache/arrow-datafusion/pull/9012) (curtisleefulton) -- docs: fix array_position docs [#9003](https://github.com/apache/arrow-datafusion/pull/9003) (tshauck) -- Rename `CatalogList` to `CatalogProviderList` [#9002](https://github.com/apache/arrow-datafusion/pull/9002) (comphead) -- Safeguard against potential inexact row count being smaller than exact null count [#9007](https://github.com/apache/arrow-datafusion/pull/9007) (gruuya) -- Recursive CTEs: Stage 3 - add execution support [#8840](https://github.com/apache/arrow-datafusion/pull/8840) (matthewgapp) -- sqllogictest: move the creation of the nan_table from Rust to slt [#9022](https://github.com/apache/arrow-datafusion/pull/9022) (jonahgao) -- TreeNode refactor code deduplication: Part 3 [#8817](https://github.com/apache/arrow-datafusion/pull/8817) (ozankabak) -- feat: Disable client console highlight by default [#9013](https://github.com/apache/arrow-datafusion/pull/9013) (comphead) -- [task #8917] Implement information_schema.schemata [#8993](https://github.com/apache/arrow-datafusion/pull/8993) (Tangruilin) -- Properly encode STRING_AGG, NTH_VALUE in physical plan protobufs [#9027](https://github.com/apache/arrow-datafusion/pull/9027) (scsmithr) -- [task #8201] Port tests in expr.rs to sqllogictest, finish the left c… [#9014](https://github.com/apache/arrow-datafusion/pull/9014) (Tangruilin) -- Fix the clippy error of use of deprecated method [#9034](https://github.com/apache/arrow-datafusion/pull/9034) (viirya) -- feat: support the ergonomics of getting list slice with stride [#8946](https://github.com/apache/arrow-datafusion/pull/8946) (Weijun-H) -- Cache common referred expression at the window input [#9009](https://github.com/apache/arrow-datafusion/pull/9009) (mustafasrepo) -- Optimize `COUNT( DISTINCT ...)` for strings (up to 9x faster) [#8849](https://github.com/apache/arrow-datafusion/pull/8849) (jayzhan211) -- feat: Parallel Arrow file format reading [#8897](https://github.com/apache/arrow-datafusion/pull/8897) (my-vegetable-has-exploded) -- Change remove from swap to shift in index map [#9049](https://github.com/apache/arrow-datafusion/pull/9049) (mustafasrepo) -- Relax join keys constraint from Column to any physical expression for physical join operators [#8991](https://github.com/apache/arrow-datafusion/pull/8991) (viirya) -- Minor: Improve memory helper trait documentation [#9025](https://github.com/apache/arrow-datafusion/pull/9025) (alamb) -- Docs: improve contributor guide to explain how to work with tickets [#8999](https://github.com/apache/arrow-datafusion/pull/8999) (alamb) -- fix issue where upper and lower functions only work correctly on ascii character [#9054](https://github.com/apache/arrow-datafusion/pull/9054) (Omega359) -- Minor: small updates to bench.sh [#9035](https://github.com/apache/arrow-datafusion/pull/9035) (kmitchener) -- Chore: explicitly list out all Expr types in TypeCoercionRewriter::mutate [#9038](https://github.com/apache/arrow-datafusion/pull/9038) (guojidan) -- Minor: improve scalar functions document [#9029](https://github.com/apache/arrow-datafusion/pull/9029) (Weijun-H) -- [MINOR] Alter a SHJ test for relaxing "on" condition [#9065](https://github.com/apache/arrow-datafusion/pull/9065) (metesynnada) -- Remove some recursive cloning from logical planning [#9050](https://github.com/apache/arrow-datafusion/pull/9050) (ozankabak) -- minor: remove useless macro [#8979](https://github.com/apache/arrow-datafusion/pull/8979) (jackwener) -- Causality Analysis for Builtin Window Functions [#9048](https://github.com/apache/arrow-datafusion/pull/9048) (mustafasrepo) -- Minor: add doc examples for RawTableAllocExt [#9059](https://github.com/apache/arrow-datafusion/pull/9059) (alamb) -- Update substrait requirement from 0.23.0 to 0.24.0 [#9067](https://github.com/apache/arrow-datafusion/pull/9067) (dependabot[bot]) -- Remove single_file_output option from FileSinkConfig and Copy statement [#9041](https://github.com/apache/arrow-datafusion/pull/9041) (yyy1000) -- Add a make_date function [#9040](https://github.com/apache/arrow-datafusion/pull/9040) (Omega359) -- Speedup `DFSchema::merge` using HashSet indices [#9020](https://github.com/apache/arrow-datafusion/pull/9020) (simonvandel) -- Document minimum required rust version [#9071](https://github.com/apache/arrow-datafusion/pull/9071) (comphead) -- Return proper number of expressions for nth_value_agg [#9044](https://github.com/apache/arrow-datafusion/pull/9044) (mustafasrepo) -- ScalarUDF with zero arguments should be provided with one null array as parameter [#9031](https://github.com/apache/arrow-datafusion/pull/9031) (viirya) -- Update strum requirement from 0.25.0 to 0.26.1 [#9046](https://github.com/apache/arrow-datafusion/pull/9046) (dependabot[bot]) -- Create `datafusion-functions` crate, extract encode and decode to [#8705](https://github.com/apache/arrow-datafusion/pull/8705) (alamb) -- Add documentation for streaming usecase [#9070](https://github.com/apache/arrow-datafusion/pull/9070) (mustafasrepo) -- fix: unambiguously truncate time in date_trunc function [#9068](https://github.com/apache/arrow-datafusion/pull/9068) (mhilton) -- feat: support array_reverse [#9023](https://github.com/apache/arrow-datafusion/pull/9023) (Weijun-H) -- prettier to_timestamp_invoke [#9078](https://github.com/apache/arrow-datafusion/pull/9078) (Tangruilin) -- Handle invalid types for negation [#9066](https://github.com/apache/arrow-datafusion/pull/9066) (trungda) -- Minor: reduce unwraps in datetime_expressions.rs [#9072](https://github.com/apache/arrow-datafusion/pull/9072) (alamb) -- Remove custom doubling strategy + add examples to `VecAllocEx` [#9058](https://github.com/apache/arrow-datafusion/pull/9058) (alamb) -- Split physical_plan_tpch into separate benchmarks [#9043](https://github.com/apache/arrow-datafusion/pull/9043) (simonvandel) -- Minor: Add ParadeDB to the list of users [#9018](https://github.com/apache/arrow-datafusion/pull/9018) (alamb) -- [MINOR]: Add check for unnecessary projection [#9079](https://github.com/apache/arrow-datafusion/pull/9079) (mustafasrepo) -- chore(placeholder): update error message and add tests [#9073](https://github.com/apache/arrow-datafusion/pull/9073) (appletreeisyellow) -- refer to #8781, convert the internal_err! in datetime_expression.rs to exec_err! [#9083](https://github.com/apache/arrow-datafusion/pull/9083) (Tangruilin) -- Add benchmarks for to_timestamp and make_date functions [#9086](https://github.com/apache/arrow-datafusion/pull/9086) (Omega359) -- chore: Clarify ParadeDB branding [#9088](https://github.com/apache/arrow-datafusion/pull/9088) (philippemnoel) -- doc: Add example how to include latest datafusion [#9076](https://github.com/apache/arrow-datafusion/pull/9076) (comphead) -- Update minimum rust version to 1.72 [#8997](https://github.com/apache/arrow-datafusion/pull/8997) (alamb) -- Fix typo in an error message [#9099](https://github.com/apache/arrow-datafusion/pull/9099) (AdamGS) -- Update InfluxDB links in Known Users section of documentation [#9092](https://github.com/apache/arrow-datafusion/pull/9092) (alamb) -- Support `FixedSizeList` type coercion [#8902](https://github.com/apache/arrow-datafusion/pull/8902) (Weijun-H) -- Improve Canonicalize API [#8983](https://github.com/apache/arrow-datafusion/pull/8983) (alamb) -- Update env_logger requirement from 0.10 to 0.11 [#8944](https://github.com/apache/arrow-datafusion/pull/8944) (dependabot[bot]) -- Split count_distinct.rs into separate modules [#9087](https://github.com/apache/arrow-datafusion/pull/9087) (alamb) -- Fix update_expr for projection pushdown [#9096](https://github.com/apache/arrow-datafusion/pull/9096) (viirya) -- Improve `InListSImplifier` -- add test, commend and avoid clones [#8971](https://github.com/apache/arrow-datafusion/pull/8971) (alamb) -- feat: issue #8969 adding position function [#8988](https://github.com/apache/arrow-datafusion/pull/8988) (Lordworms) -- Cleanup regex_expressions.rs to remove \_regexp_match function [#9107](https://github.com/apache/arrow-datafusion/pull/9107) (Omega359) -- Unnest with single expression [#9069](https://github.com/apache/arrow-datafusion/pull/9069) (jayzhan211) -- Minor: improve GroupsAccumulator and Accumulator documentation [#8963](https://github.com/apache/arrow-datafusion/pull/8963) (alamb) -- move InList related simplify to one place [#9037](https://github.com/apache/arrow-datafusion/pull/9037) (guojidan) -- docs: add docs and example showing how to get the expression data type [#9118](https://github.com/apache/arrow-datafusion/pull/9118) (r3stl355) -- Add http(s) support to the command line [#8753](https://github.com/apache/arrow-datafusion/pull/8753) (kcolford) -- Remove External Table Backwards Compatibility Options [#9105](https://github.com/apache/arrow-datafusion/pull/9105) (yyy1000) -- feat: support `LargeList` in `flatten` [#9110](https://github.com/apache/arrow-datafusion/pull/9110) (Weijun-H) -- feat: improve `make_date` performance [#9112](https://github.com/apache/arrow-datafusion/pull/9112) (r3stl355) -- Refactor min/max value update in Parquet statistics [#9120](https://github.com/apache/arrow-datafusion/pull/9120) (Weijun-H) -- chore: Fix incorrect comment in substrait consumer [#9123](https://github.com/apache/arrow-datafusion/pull/9123) (caicancai) -- Minor: Fix Self referential links in readme [#9119](https://github.com/apache/arrow-datafusion/pull/9119) (alamb) -- Add `ColumnarValue::values_to_arrays`, deprecate `columnar_values_to_array` [#9114](https://github.com/apache/arrow-datafusion/pull/9114) (alamb) -- Support Copy with Remote Object Stores in datafusion-cli [#9064](https://github.com/apache/arrow-datafusion/pull/9064) (manoj-inukolunu) -- Fix Dockerfile min rust version to 1.72 [#9135](https://github.com/apache/arrow-datafusion/pull/9135) (alamb) -- fix: schema metadata retrieval when listing parquet table [#9134](https://github.com/apache/arrow-datafusion/pull/9134) (brayanjuls) -- Update parse_protobuf_file_scan_config to remove any partition columns from the file_schema in FileScanConfig [#9126](https://github.com/apache/arrow-datafusion/pull/9126) (bcmcmill) -- feat: add github action to self-assign the issue [#9132](https://github.com/apache/arrow-datafusion/pull/9132) (r3stl355) -- Fix NULL values in FixedSizeList creation [#9141](https://github.com/apache/arrow-datafusion/pull/9141) (Weijun-H) -- Add `FunctionRegistry::register_udaf` and `FunctionRegistry::register_udwf` [#9075](https://github.com/apache/arrow-datafusion/pull/9075) (alamb) -- Change ScalarValue::Struct to ArrayRef [#7893](https://github.com/apache/arrow-datafusion/pull/7893) (jayzhan211) -- Support join filter for `SortMergeJoin` [#9080](https://github.com/apache/arrow-datafusion/pull/9080) (viirya) -- Typo in docstring [#9149](https://github.com/apache/arrow-datafusion/pull/9149) (tv42) -- RecordBatchReceiverStreamBuilder: don't stringify errors [#9155](https://github.com/apache/arrow-datafusion/pull/9155) (tv42) -- port position test to scalar [#9128](https://github.com/apache/arrow-datafusion/pull/9128) (Lordworms) -- Minor: Improve `DataFrame` docs, add examples [#9159](https://github.com/apache/arrow-datafusion/pull/9159) (alamb) -- feat: add ability to query the remote http(s) location directly in datafusion-cli [#9150](https://github.com/apache/arrow-datafusion/pull/9150) (r3stl355) -- Add `regexp_like, improve docs and examples for `regexp_match` [#9137](https://github.com/apache/arrow-datafusion/pull/9137) (Omega359) -- Partial Sort Plan Implementation [#9125](https://github.com/apache/arrow-datafusion/pull/9125) (ahmetenis) -- Update tonic requirement from 0.10 to 0.11 [#9176](https://github.com/apache/arrow-datafusion/pull/9176) (dependabot[bot]) -- minor: fix error message function naming [#9168](https://github.com/apache/arrow-datafusion/pull/9168) (comphead) -- Minor: Update `DataFrame::write_table` docs [#9169](https://github.com/apache/arrow-datafusion/pull/9169) (alamb) -- Improve PhysicalExpr documentation [#9180](https://github.com/apache/arrow-datafusion/pull/9180) (alamb) -- Fix sphinx warnings [#9142](https://github.com/apache/arrow-datafusion/pull/9142) (ongchi) -- Use concat to simplify Nested Scalar creation [#9174](https://github.com/apache/arrow-datafusion/pull/9174) (jayzhan211) -- Minor: Remove unecessary map_err [#9186](https://github.com/apache/arrow-datafusion/pull/9186) (alamb) -- Add example of using `PruningPredicate` to datafusion-examples [#9183](https://github.com/apache/arrow-datafusion/pull/9183) (alamb) -- Use prep_null_mask_filter to handle nulls in selection mask [#9163](https://github.com/apache/arrow-datafusion/pull/9163) (viirya) -- [Document] Adding UDF by impl ScalarUDFImpl [#9172](https://github.com/apache/arrow-datafusion/pull/9172) (yyy1000) -- Docs: Extend `PruningPredicate` with background and implementation info [#9184](https://github.com/apache/arrow-datafusion/pull/9184) (alamb) -- chore: make tokio a workspace dependency [#9187](https://github.com/apache/arrow-datafusion/pull/9187) (PsiACE) -- Examples link in catalogs.rs leads to a 404 [#9194](https://github.com/apache/arrow-datafusion/pull/9194) (Omega359) -- Add test pipeline for Mac aarch64 [#9191](https://github.com/apache/arrow-datafusion/pull/9191) (viirya) -- Add string aggregate grouping fuzz test, add `MemTable::with_sort_exprs` [#9190](https://github.com/apache/arrow-datafusion/pull/9190) (alamb) -- Create `datafusion-functions-array` crate and move `ArrayToString` function into it [#9113](https://github.com/apache/arrow-datafusion/pull/9113) (alamb) -- Add constant expression support to equivalence properties [#9198](https://github.com/apache/arrow-datafusion/pull/9198) (mustafasrepo) -- chore: update tpch-docker docker repository [#9204](https://github.com/apache/arrow-datafusion/pull/9204) (pmcgleenon) -- feat: implement select directly from s3 and gcs locations in datafusion-cli [#9199](https://github.com/apache/arrow-datafusion/pull/9199) (r3stl355) -- MINOR: Add "fs" feature to "tokio", fix "features" typo. [#9210](https://github.com/apache/arrow-datafusion/pull/9210) (mustafasrepo) -- Add `to_char` function implementation using chrono formats [#9181](https://github.com/apache/arrow-datafusion/pull/9181) (Omega359) -- Add `SessionContext::read_batches` [#9197](https://github.com/apache/arrow-datafusion/pull/9197) (Lordworms) -- feat: support block gzip for streams [#9175](https://github.com/apache/arrow-datafusion/pull/9175) (tshauck) -- chore(pruning): Support `IS NOT NULL` predicates in `PruningPredicate` [#9208](https://github.com/apache/arrow-datafusion/pull/9208) (appletreeisyellow) -- Add cargo audit CI [#9182](https://github.com/apache/arrow-datafusion/pull/9182) (ongchi) -- Move `nullif` and `isnan` to datafusion-functions [#9216](https://github.com/apache/arrow-datafusion/pull/9216) (alamb) -- Bugfix - Projection Removal Conditions [#9215](https://github.com/apache/arrow-datafusion/pull/9215) (berkaysynnada) -- Partitioning fixes [#9207](https://github.com/apache/arrow-datafusion/pull/9207) (esheppa) -- Return an error when a column does not exist in window function [#9202](https://github.com/apache/arrow-datafusion/pull/9202) (PhVHoang) -- Revert "chore(pruning): Support `IS NOT NULL` predicates in `PruningPredicate` (#9208)" [#9232](https://github.com/apache/arrow-datafusion/pull/9232) (appletreeisyellow) -- Improve documentation on how to build `ScalarValue::Struct` and add `ScalarStructBuilder` [#9229](https://github.com/apache/arrow-datafusion/pull/9229) (alamb) -- Minor: improve Display of output ordering of `StreamTableExec` [#9225](https://github.com/apache/arrow-datafusion/pull/9225) (mustafasrepo) -- Support compute return types from argument values (not just their DataTypes) [#8985](https://github.com/apache/arrow-datafusion/pull/8985) (yyy1000) -- Dont call multiunzip when no stats [#9220](https://github.com/apache/arrow-datafusion/pull/9220) (matthewmturner) -- Use setup-macos-aarch64-builder for aarch64 CI pipeline [#9242](https://github.com/apache/arrow-datafusion/pull/9242) (viirya) -- GROUP-BY prioritizes input columns in case of ambiguity [#9228](https://github.com/apache/arrow-datafusion/pull/9228) (jonahgao) -- Minor: chore: improve catalog test in mod.rs [#9244](https://github.com/apache/arrow-datafusion/pull/9244) (caicancai) -- Add example for `ScalarStructBuilder::new_null`, fix display for `null` `ScalarValue::Struct` [#9238](https://github.com/apache/arrow-datafusion/pull/9238) (alamb) +- Add hash_join_single_partition_threshold_rows config [#8720](https://github.com/apache/datafusion/pull/8720) (maruschin) +- Prepare 35.0.0-rc1 [#8924](https://github.com/apache/datafusion/pull/8924) (andygrove) +- feat: support `stride` in `array_slice`, change indexes to be`1` based [#8829](https://github.com/apache/datafusion/pull/8829) (Weijun-H) +- fix: recursive initialize method [#8937](https://github.com/apache/datafusion/pull/8937) (waynexia) +- Fix expr partial ord test [#8908](https://github.com/apache/datafusion/pull/8908) (mustafasrepo) +- Simplify windows builtin functions return type [#8920](https://github.com/apache/datafusion/pull/8920) (comphead) +- Fix handling of nested leaf columns in parallel parquet writer [#8923](https://github.com/apache/datafusion/pull/8923) (devinjdangelo) +- feat: emitting partial join results in `HashJoinStream` [#8020](https://github.com/apache/datafusion/pull/8020) (korowa) +- fix: common_subexpr_eliminate rule should not apply to short-circuit expression [#8928](https://github.com/apache/datafusion/pull/8928) (haohuaijin) +- Support GroupsAccumulator accumulator for udaf [#8892](https://github.com/apache/datafusion/pull/8892) (guojidan) +- test: Port tests in `partitioned_csv.rs` to sqllogictest [#8919](https://github.com/apache/datafusion/pull/8919) (simicd) +- [CI] Fix RUSTFLAGS [#8929](https://github.com/apache/datafusion/pull/8929) (Jefffrey) +- Minor: Update datafusion-cli README to explain why it is not in the w… [#8938](https://github.com/apache/datafusion/pull/8938) (alamb) +- Add syntax highlight to datafusion-cli [#8918](https://github.com/apache/datafusion/pull/8918) (trungda) +- Update substrait requirement from 0.22.1 to 0.23.0 [#8943](https://github.com/apache/datafusion/pull/8943) (dependabot[bot]) +- Deprecate make_scalar_function [#8878](https://github.com/apache/datafusion/pull/8878) (viirya) +- Update project links [#8954](https://github.com/apache/datafusion/pull/8954) (comphead) +- fix: issue #8922 make row group test more readable [#8941](https://github.com/apache/datafusion/pull/8941) (Lordworms) +- feat:implement sql style 'ends_with' and 'instr' string function [#8862](https://github.com/apache/datafusion/pull/8862) (zy-kkk) +- [MINOR]: Extract aggregate topk function to `aggregate_topk.slt` [#8948](https://github.com/apache/datafusion/pull/8948) (mustafasrepo) +- Combine multiple `IN` lists in `ExprSimplifier` [#8949](https://github.com/apache/datafusion/pull/8949) (jayzhan211) +- Fix clippy failures: error: use of deprecated function `functions::make_scalar_function [#8972](https://github.com/apache/datafusion/pull/8972) (alamb) +- feat: Support parquet bloom filter pruning for decimal128 [#8930](https://github.com/apache/datafusion/pull/8930) (Ted-Jiang) +- [MINOR]: Update create_window_expr to refer only input schema [#8945](https://github.com/apache/datafusion/pull/8945) (mustafasrepo) +- Don't error in simplify_expressions rule [#8957](https://github.com/apache/datafusion/pull/8957) (haohuaijin) +- Use .zip to avoid unwrap [#8956](https://github.com/apache/datafusion/pull/8956) (Luv-Ray) +- Change `Accumulator::evaluate` and `Accumulator::state` to take `&mut self` [#8925](https://github.com/apache/datafusion/pull/8925) (alamb) +- Enhance simplifier by adding Canonicalize [#8780](https://github.com/apache/datafusion/pull/8780) (yyy1000) +- Find the correct fields when using page filter on `struct` fields in parquet [#8848](https://github.com/apache/datafusion/pull/8848) (manoj-inukolunu) +- fix: allow placeholders to be substituted when coercible [#8977](https://github.com/apache/datafusion/pull/8977) (kallisti-dev) +- Minor: improve CatalogProvider documentation with rationale and info about remote catalogs [#8968](https://github.com/apache/datafusion/pull/8968) (alamb) +- Improve to_timestamp docs [#8981](https://github.com/apache/datafusion/pull/8981) (Omega359) +- Add helper function for processing scalar function input [#8962](https://github.com/apache/datafusion/pull/8962) (viirya) +- Fix optimize projections bug [#8960](https://github.com/apache/datafusion/pull/8960) (mustafasrepo) +- NOT operator not return internal error when args are not boolean value [#8982](https://github.com/apache/datafusion/pull/8982) (guojidan) +- Minor: Add new Extended ClickBench benchmark queries [#8950](https://github.com/apache/datafusion/pull/8950) (alamb) +- Minor: Add comments to MSRV CI check to help if it fails [#8995](https://github.com/apache/datafusion/pull/8995) (alamb) +- Minor: Document memory management design on `MemoryPool` [#8966](https://github.com/apache/datafusion/pull/8966) (alamb) +- Fix LEAD/LAG window functions when default value null [#8989](https://github.com/apache/datafusion/pull/8989) (comphead) +- Optimize MIN/MAX when relation is empty [#8940](https://github.com/apache/datafusion/pull/8940) (viirya) +- [task #8203] Port tests in joins.rs to sqllogictest [#8996](https://github.com/apache/datafusion/pull/8996) (Tangruilin) +- [task #8213]Port tests in select.rs to sqllogictest [#8967](https://github.com/apache/datafusion/pull/8967) (Tangruilin) +- test: Port (last) `repartition.rs` query to sqllogictest [#8936](https://github.com/apache/datafusion/pull/8936) (simicd) +- Update to sqlparser `0.42.0` [#9000](https://github.com/apache/datafusion/pull/9000) (alamb) +- [MINOR]: Fix Optimize Projections Bug [#8992](https://github.com/apache/datafusion/pull/8992) (mustafasrepo) +- Make Topk aggregate tests deterministic [#8998](https://github.com/apache/datafusion/pull/8998) (mustafasrepo) +- Add support for Postgres LIKE operators [#8894](https://github.com/apache/datafusion/pull/8894) (gruuya) +- bug: Datafusion doesn't respect case sensitive table references [#8964](https://github.com/apache/datafusion/pull/8964) (xhwhis) +- Document parallelism and thread scheduling in the architecture guide [#8986](https://github.com/apache/datafusion/pull/8986) (alamb) +- Fix None Projections in Projection Pushdown [#9005](https://github.com/apache/datafusion/pull/9005) (berkaysynnada) +- Lead and Lag window functions should support default value with datatype other than Int64 [#9001](https://github.com/apache/datafusion/pull/9001) (viirya) +- chore: fix license badge in README [#9008](https://github.com/apache/datafusion/pull/9008) (suyanhanx) +- Minor: fix: #9010 - Optimizer schema change assert error is incorrect [#9012](https://github.com/apache/datafusion/pull/9012) (curtisleefulton) +- docs: fix array_position docs [#9003](https://github.com/apache/datafusion/pull/9003) (tshauck) +- Rename `CatalogList` to `CatalogProviderList` [#9002](https://github.com/apache/datafusion/pull/9002) (comphead) +- Safeguard against potential inexact row count being smaller than exact null count [#9007](https://github.com/apache/datafusion/pull/9007) (gruuya) +- Recursive CTEs: Stage 3 - add execution support [#8840](https://github.com/apache/datafusion/pull/8840) (matthewgapp) +- sqllogictest: move the creation of the nan_table from Rust to slt [#9022](https://github.com/apache/datafusion/pull/9022) (jonahgao) +- TreeNode refactor code deduplication: Part 3 [#8817](https://github.com/apache/datafusion/pull/8817) (ozankabak) +- feat: Disable client console highlight by default [#9013](https://github.com/apache/datafusion/pull/9013) (comphead) +- [task #8917] Implement information_schema.schemata [#8993](https://github.com/apache/datafusion/pull/8993) (Tangruilin) +- Properly encode STRING_AGG, NTH_VALUE in physical plan protobufs [#9027](https://github.com/apache/datafusion/pull/9027) (scsmithr) +- [task #8201] Port tests in expr.rs to sqllogictest, finish the left c… [#9014](https://github.com/apache/datafusion/pull/9014) (Tangruilin) +- Fix the clippy error of use of deprecated method [#9034](https://github.com/apache/datafusion/pull/9034) (viirya) +- feat: support the ergonomics of getting list slice with stride [#8946](https://github.com/apache/datafusion/pull/8946) (Weijun-H) +- Cache common referred expression at the window input [#9009](https://github.com/apache/datafusion/pull/9009) (mustafasrepo) +- Optimize `COUNT( DISTINCT ...)` for strings (up to 9x faster) [#8849](https://github.com/apache/datafusion/pull/8849) (jayzhan211) +- feat: Parallel Arrow file format reading [#8897](https://github.com/apache/datafusion/pull/8897) (my-vegetable-has-exploded) +- Change remove from swap to shift in index map [#9049](https://github.com/apache/datafusion/pull/9049) (mustafasrepo) +- Relax join keys constraint from Column to any physical expression for physical join operators [#8991](https://github.com/apache/datafusion/pull/8991) (viirya) +- Minor: Improve memory helper trait documentation [#9025](https://github.com/apache/datafusion/pull/9025) (alamb) +- Docs: improve contributor guide to explain how to work with tickets [#8999](https://github.com/apache/datafusion/pull/8999) (alamb) +- fix issue where upper and lower functions only work correctly on ascii character [#9054](https://github.com/apache/datafusion/pull/9054) (Omega359) +- Minor: small updates to bench.sh [#9035](https://github.com/apache/datafusion/pull/9035) (kmitchener) +- Chore: explicitly list out all Expr types in TypeCoercionRewriter::mutate [#9038](https://github.com/apache/datafusion/pull/9038) (guojidan) +- Minor: improve scalar functions document [#9029](https://github.com/apache/datafusion/pull/9029) (Weijun-H) +- [MINOR] Alter a SHJ test for relaxing "on" condition [#9065](https://github.com/apache/datafusion/pull/9065) (metesynnada) +- Remove some recursive cloning from logical planning [#9050](https://github.com/apache/datafusion/pull/9050) (ozankabak) +- minor: remove useless macro [#8979](https://github.com/apache/datafusion/pull/8979) (jackwener) +- Causality Analysis for Builtin Window Functions [#9048](https://github.com/apache/datafusion/pull/9048) (mustafasrepo) +- Minor: add doc examples for RawTableAllocExt [#9059](https://github.com/apache/datafusion/pull/9059) (alamb) +- Update substrait requirement from 0.23.0 to 0.24.0 [#9067](https://github.com/apache/datafusion/pull/9067) (dependabot[bot]) +- Remove single_file_output option from FileSinkConfig and Copy statement [#9041](https://github.com/apache/datafusion/pull/9041) (yyy1000) +- Add a make_date function [#9040](https://github.com/apache/datafusion/pull/9040) (Omega359) +- Speedup `DFSchema::merge` using HashSet indices [#9020](https://github.com/apache/datafusion/pull/9020) (simonvandel) +- Document minimum required rust version [#9071](https://github.com/apache/datafusion/pull/9071) (comphead) +- Return proper number of expressions for nth_value_agg [#9044](https://github.com/apache/datafusion/pull/9044) (mustafasrepo) +- ScalarUDF with zero arguments should be provided with one null array as parameter [#9031](https://github.com/apache/datafusion/pull/9031) (viirya) +- Update strum requirement from 0.25.0 to 0.26.1 [#9046](https://github.com/apache/datafusion/pull/9046) (dependabot[bot]) +- Create `datafusion-functions` crate, extract encode and decode to [#8705](https://github.com/apache/datafusion/pull/8705) (alamb) +- Add documentation for streaming usecase [#9070](https://github.com/apache/datafusion/pull/9070) (mustafasrepo) +- fix: unambiguously truncate time in date_trunc function [#9068](https://github.com/apache/datafusion/pull/9068) (mhilton) +- feat: support array_reverse [#9023](https://github.com/apache/datafusion/pull/9023) (Weijun-H) +- prettier to_timestamp_invoke [#9078](https://github.com/apache/datafusion/pull/9078) (Tangruilin) +- Handle invalid types for negation [#9066](https://github.com/apache/datafusion/pull/9066) (trungda) +- Minor: reduce unwraps in datetime_expressions.rs [#9072](https://github.com/apache/datafusion/pull/9072) (alamb) +- Remove custom doubling strategy + add examples to `VecAllocEx` [#9058](https://github.com/apache/datafusion/pull/9058) (alamb) +- Split physical_plan_tpch into separate benchmarks [#9043](https://github.com/apache/datafusion/pull/9043) (simonvandel) +- Minor: Add ParadeDB to the list of users [#9018](https://github.com/apache/datafusion/pull/9018) (alamb) +- [MINOR]: Add check for unnecessary projection [#9079](https://github.com/apache/datafusion/pull/9079) (mustafasrepo) +- chore(placeholder): update error message and add tests [#9073](https://github.com/apache/datafusion/pull/9073) (appletreeisyellow) +- refer to #8781, convert the internal_err! in datetime_expression.rs to exec_err! [#9083](https://github.com/apache/datafusion/pull/9083) (Tangruilin) +- Add benchmarks for to_timestamp and make_date functions [#9086](https://github.com/apache/datafusion/pull/9086) (Omega359) +- chore: Clarify ParadeDB branding [#9088](https://github.com/apache/datafusion/pull/9088) (philippemnoel) +- doc: Add example how to include latest datafusion [#9076](https://github.com/apache/datafusion/pull/9076) (comphead) +- Update minimum rust version to 1.72 [#8997](https://github.com/apache/datafusion/pull/8997) (alamb) +- Fix typo in an error message [#9099](https://github.com/apache/datafusion/pull/9099) (AdamGS) +- Update InfluxDB links in Known Users section of documentation [#9092](https://github.com/apache/datafusion/pull/9092) (alamb) +- Support `FixedSizeList` type coercion [#8902](https://github.com/apache/datafusion/pull/8902) (Weijun-H) +- Improve Canonicalize API [#8983](https://github.com/apache/datafusion/pull/8983) (alamb) +- Update env_logger requirement from 0.10 to 0.11 [#8944](https://github.com/apache/datafusion/pull/8944) (dependabot[bot]) +- Split count_distinct.rs into separate modules [#9087](https://github.com/apache/datafusion/pull/9087) (alamb) +- Fix update_expr for projection pushdown [#9096](https://github.com/apache/datafusion/pull/9096) (viirya) +- Improve `InListSImplifier` -- add test, commend and avoid clones [#8971](https://github.com/apache/datafusion/pull/8971) (alamb) +- feat: issue #8969 adding position function [#8988](https://github.com/apache/datafusion/pull/8988) (Lordworms) +- Cleanup regex_expressions.rs to remove \_regexp_match function [#9107](https://github.com/apache/datafusion/pull/9107) (Omega359) +- Unnest with single expression [#9069](https://github.com/apache/datafusion/pull/9069) (jayzhan211) +- Minor: improve GroupsAccumulator and Accumulator documentation [#8963](https://github.com/apache/datafusion/pull/8963) (alamb) +- move InList related simplify to one place [#9037](https://github.com/apache/datafusion/pull/9037) (guojidan) +- docs: add docs and example showing how to get the expression data type [#9118](https://github.com/apache/datafusion/pull/9118) (r3stl355) +- Add http(s) support to the command line [#8753](https://github.com/apache/datafusion/pull/8753) (kcolford) +- Remove External Table Backwards Compatibility Options [#9105](https://github.com/apache/datafusion/pull/9105) (yyy1000) +- feat: support `LargeList` in `flatten` [#9110](https://github.com/apache/datafusion/pull/9110) (Weijun-H) +- feat: improve `make_date` performance [#9112](https://github.com/apache/datafusion/pull/9112) (r3stl355) +- Refactor min/max value update in Parquet statistics [#9120](https://github.com/apache/datafusion/pull/9120) (Weijun-H) +- chore: Fix incorrect comment in substrait consumer [#9123](https://github.com/apache/datafusion/pull/9123) (caicancai) +- Minor: Fix Self referential links in readme [#9119](https://github.com/apache/datafusion/pull/9119) (alamb) +- Add `ColumnarValue::values_to_arrays`, deprecate `columnar_values_to_array` [#9114](https://github.com/apache/datafusion/pull/9114) (alamb) +- Support Copy with Remote Object Stores in datafusion-cli [#9064](https://github.com/apache/datafusion/pull/9064) (manoj-inukolunu) +- Fix Dockerfile min rust version to 1.72 [#9135](https://github.com/apache/datafusion/pull/9135) (alamb) +- fix: schema metadata retrieval when listing parquet table [#9134](https://github.com/apache/datafusion/pull/9134) (brayanjuls) +- Update parse_protobuf_file_scan_config to remove any partition columns from the file_schema in FileScanConfig [#9126](https://github.com/apache/datafusion/pull/9126) (bcmcmill) +- feat: add github action to self-assign the issue [#9132](https://github.com/apache/datafusion/pull/9132) (r3stl355) +- Fix NULL values in FixedSizeList creation [#9141](https://github.com/apache/datafusion/pull/9141) (Weijun-H) +- Add `FunctionRegistry::register_udaf` and `FunctionRegistry::register_udwf` [#9075](https://github.com/apache/datafusion/pull/9075) (alamb) +- Change ScalarValue::Struct to ArrayRef [#7893](https://github.com/apache/datafusion/pull/7893) (jayzhan211) +- Support join filter for `SortMergeJoin` [#9080](https://github.com/apache/datafusion/pull/9080) (viirya) +- Typo in docstring [#9149](https://github.com/apache/datafusion/pull/9149) (tv42) +- RecordBatchReceiverStreamBuilder: don't stringify errors [#9155](https://github.com/apache/datafusion/pull/9155) (tv42) +- port position test to scalar [#9128](https://github.com/apache/datafusion/pull/9128) (Lordworms) +- Minor: Improve `DataFrame` docs, add examples [#9159](https://github.com/apache/datafusion/pull/9159) (alamb) +- feat: add ability to query the remote http(s) location directly in datafusion-cli [#9150](https://github.com/apache/datafusion/pull/9150) (r3stl355) +- Add `regexp_like, improve docs and examples for `regexp_match` [#9137](https://github.com/apache/datafusion/pull/9137) (Omega359) +- Partial Sort Plan Implementation [#9125](https://github.com/apache/datafusion/pull/9125) (ahmetenis) +- Update tonic requirement from 0.10 to 0.11 [#9176](https://github.com/apache/datafusion/pull/9176) (dependabot[bot]) +- minor: fix error message function naming [#9168](https://github.com/apache/datafusion/pull/9168) (comphead) +- Minor: Update `DataFrame::write_table` docs [#9169](https://github.com/apache/datafusion/pull/9169) (alamb) +- Improve PhysicalExpr documentation [#9180](https://github.com/apache/datafusion/pull/9180) (alamb) +- Fix sphinx warnings [#9142](https://github.com/apache/datafusion/pull/9142) (ongchi) +- Use concat to simplify Nested Scalar creation [#9174](https://github.com/apache/datafusion/pull/9174) (jayzhan211) +- Minor: Remove unecessary map_err [#9186](https://github.com/apache/datafusion/pull/9186) (alamb) +- Add example of using `PruningPredicate` to datafusion-examples [#9183](https://github.com/apache/datafusion/pull/9183) (alamb) +- Use prep_null_mask_filter to handle nulls in selection mask [#9163](https://github.com/apache/datafusion/pull/9163) (viirya) +- [Document] Adding UDF by impl ScalarUDFImpl [#9172](https://github.com/apache/datafusion/pull/9172) (yyy1000) +- Docs: Extend `PruningPredicate` with background and implementation info [#9184](https://github.com/apache/datafusion/pull/9184) (alamb) +- chore: make tokio a workspace dependency [#9187](https://github.com/apache/datafusion/pull/9187) (PsiACE) +- Examples link in catalogs.rs leads to a 404 [#9194](https://github.com/apache/datafusion/pull/9194) (Omega359) +- Add test pipeline for Mac aarch64 [#9191](https://github.com/apache/datafusion/pull/9191) (viirya) +- Add string aggregate grouping fuzz test, add `MemTable::with_sort_exprs` [#9190](https://github.com/apache/datafusion/pull/9190) (alamb) +- Create `datafusion-functions-array` crate and move `ArrayToString` function into it [#9113](https://github.com/apache/datafusion/pull/9113) (alamb) +- Add constant expression support to equivalence properties [#9198](https://github.com/apache/datafusion/pull/9198) (mustafasrepo) +- chore: update tpch-docker docker repository [#9204](https://github.com/apache/datafusion/pull/9204) (pmcgleenon) +- feat: implement select directly from s3 and gcs locations in datafusion-cli [#9199](https://github.com/apache/datafusion/pull/9199) (r3stl355) +- MINOR: Add "fs" feature to "tokio", fix "features" typo. [#9210](https://github.com/apache/datafusion/pull/9210) (mustafasrepo) +- Add `to_char` function implementation using chrono formats [#9181](https://github.com/apache/datafusion/pull/9181) (Omega359) +- Add `SessionContext::read_batches` [#9197](https://github.com/apache/datafusion/pull/9197) (Lordworms) +- feat: support block gzip for streams [#9175](https://github.com/apache/datafusion/pull/9175) (tshauck) +- chore(pruning): Support `IS NOT NULL` predicates in `PruningPredicate` [#9208](https://github.com/apache/datafusion/pull/9208) (appletreeisyellow) +- Add cargo audit CI [#9182](https://github.com/apache/datafusion/pull/9182) (ongchi) +- Move `nullif` and `isnan` to datafusion-functions [#9216](https://github.com/apache/datafusion/pull/9216) (alamb) +- Bugfix - Projection Removal Conditions [#9215](https://github.com/apache/datafusion/pull/9215) (berkaysynnada) +- Partitioning fixes [#9207](https://github.com/apache/datafusion/pull/9207) (esheppa) +- Return an error when a column does not exist in window function [#9202](https://github.com/apache/datafusion/pull/9202) (PhVHoang) +- Revert "chore(pruning): Support `IS NOT NULL` predicates in `PruningPredicate` (#9208)" [#9232](https://github.com/apache/datafusion/pull/9232) (appletreeisyellow) +- Improve documentation on how to build `ScalarValue::Struct` and add `ScalarStructBuilder` [#9229](https://github.com/apache/datafusion/pull/9229) (alamb) +- Minor: improve Display of output ordering of `StreamTableExec` [#9225](https://github.com/apache/datafusion/pull/9225) (mustafasrepo) +- Support compute return types from argument values (not just their DataTypes) [#8985](https://github.com/apache/datafusion/pull/8985) (yyy1000) +- Dont call multiunzip when no stats [#9220](https://github.com/apache/datafusion/pull/9220) (matthewmturner) +- Use setup-macos-aarch64-builder for aarch64 CI pipeline [#9242](https://github.com/apache/datafusion/pull/9242) (viirya) +- GROUP-BY prioritizes input columns in case of ambiguity [#9228](https://github.com/apache/datafusion/pull/9228) (jonahgao) +- Minor: chore: improve catalog test in mod.rs [#9244](https://github.com/apache/datafusion/pull/9244) (caicancai) +- Add example for `ScalarStructBuilder::new_null`, fix display for `null` `ScalarValue::Struct` [#9238](https://github.com/apache/datafusion/pull/9238) (alamb) diff --git a/dev/changelog/37.0.0.md b/dev/changelog/37.0.0.md index b1fcd5fdf008..e72ae239b73d 100644 --- a/dev/changelog/37.0.0.md +++ b/dev/changelog/37.0.0.md @@ -17,331 +17,331 @@ under the License. --> -## [37.0.0](https://github.com/apache/arrow-datafusion/tree/37.0.0) (2024-03-28) +## [37.0.0](https://github.com/apache/datafusion/tree/37.0.0) (2024-03-28) -[Full Changelog](https://github.com/apache/arrow-datafusion/compare/36.0.0...37.0.0) +[Full Changelog](https://github.com/apache/datafusion/compare/36.0.0...37.0.0) **Breaking changes:** -- refactor: Change `SchemaProvider::table` to return `Result` rather than `Option<..>` [#9307](https://github.com/apache/arrow-datafusion/pull/9307) (crepererum) -- feat: issue_9285: port builtin reg function into datafusion-function-\* (1/3 regexpmatch) [#9329](https://github.com/apache/arrow-datafusion/pull/9329) (Lordworms) -- Cache common plan properties to eliminate recursive calls in physical plan [#9346](https://github.com/apache/arrow-datafusion/pull/9346) (mustafasrepo) -- Consolidate `TreeNode` transform and rewrite APIs [#8891](https://github.com/apache/arrow-datafusion/pull/8891) (peter-toth) -- Extend argument types for udf `return_type_from_exprs` [#9522](https://github.com/apache/arrow-datafusion/pull/9522) (jayzhan211) -- Systematic Configuration in 'Create External Table' and 'Copy To' Options [#9382](https://github.com/apache/arrow-datafusion/pull/9382) (metesynnada) -- Move trim functions (btrim, ltrim, rtrim) to datafusion_functions, make expr_fn API consistent [#9730](https://github.com/apache/arrow-datafusion/pull/9730) (Omega359) +- refactor: Change `SchemaProvider::table` to return `Result` rather than `Option<..>` [#9307](https://github.com/apache/datafusion/pull/9307) (crepererum) +- feat: issue_9285: port builtin reg function into datafusion-function-\* (1/3 regexpmatch) [#9329](https://github.com/apache/datafusion/pull/9329) (Lordworms) +- Cache common plan properties to eliminate recursive calls in physical plan [#9346](https://github.com/apache/datafusion/pull/9346) (mustafasrepo) +- Consolidate `TreeNode` transform and rewrite APIs [#8891](https://github.com/apache/datafusion/pull/8891) (peter-toth) +- Extend argument types for udf `return_type_from_exprs` [#9522](https://github.com/apache/datafusion/pull/9522) (jayzhan211) +- Systematic Configuration in 'Create External Table' and 'Copy To' Options [#9382](https://github.com/apache/datafusion/pull/9382) (metesynnada) +- Move trim functions (btrim, ltrim, rtrim) to datafusion_functions, make expr_fn API consistent [#9730](https://github.com/apache/datafusion/pull/9730) (Omega359) **Performance related:** -- perf: improve to_field performance [#9722](https://github.com/apache/arrow-datafusion/pull/9722) (haohuaijin) +- perf: improve to_field performance [#9722](https://github.com/apache/datafusion/pull/9722) (haohuaijin) **Implemented enhancements:** -- feat: support for defining ARRAY columns in `CREATE TABLE` [#9381](https://github.com/apache/arrow-datafusion/pull/9381) (jonahgao) -- feat: support `unnest` in FROM clause [#9355](https://github.com/apache/arrow-datafusion/pull/9355) (jonahgao) -- feat: support nvl2 function [#9364](https://github.com/apache/arrow-datafusion/pull/9364) (guojidan) -- feat: issue #9224 substitute tlide in table path [#9259](https://github.com/apache/arrow-datafusion/pull/9259) (Lordworms) -- feat: replace std Instant with wasm-compatible wrapper [#9189](https://github.com/apache/arrow-datafusion/pull/9189) (waynexia) -- feat: support `unnest` with additional columns [#9400](https://github.com/apache/arrow-datafusion/pull/9400) (jonahgao) -- feat: Support `EscapedStringLiteral`, update sqlparser to `0.44.0` [#9268](https://github.com/apache/arrow-datafusion/pull/9268) (JasonLi-cn) -- feat: add support for fixed list wildcard in type signature [#9312](https://github.com/apache/arrow-datafusion/pull/9312) (universalmind303) -- feat: Add projection to HashJoinExec. [#9236](https://github.com/apache/arrow-datafusion/pull/9236) (my-vegetable-has-exploded) -- feat: function name hints for UDFs [#9407](https://github.com/apache/arrow-datafusion/pull/9407) (SteveLauC) -- feat: Introduce convert Expr to SQL string API and basic feature [#9517](https://github.com/apache/arrow-datafusion/pull/9517) (backkem) -- feat: implement more expr_to_sql functionality [#9578](https://github.com/apache/arrow-datafusion/pull/9578) (devinjdangelo) -- feat: implement aggregation and subquery plans to SQL [#9606](https://github.com/apache/arrow-datafusion/pull/9606) (devinjdangelo) -- feat: track memory usage for recursive CTE, enable recursive CTEs by default [#9619](https://github.com/apache/arrow-datafusion/pull/9619) (jonahgao) -- feat: Between expr to sql string [#9803](https://github.com/apache/arrow-datafusion/pull/9803) (sebastian2296) -- feat: Expose `array_empty` and `list_empty` functions as alias of `empty` function [#9807](https://github.com/apache/arrow-datafusion/pull/9807) (erenavsarogullari) -- feat: Not expr to string [#9802](https://github.com/apache/arrow-datafusion/pull/9802) (sebastian2296) -- feat: pass SessionState not SessionConfig to FunctionFactory::create [#9837](https://github.com/apache/arrow-datafusion/pull/9837) (tshauck) +- feat: support for defining ARRAY columns in `CREATE TABLE` [#9381](https://github.com/apache/datafusion/pull/9381) (jonahgao) +- feat: support `unnest` in FROM clause [#9355](https://github.com/apache/datafusion/pull/9355) (jonahgao) +- feat: support nvl2 function [#9364](https://github.com/apache/datafusion/pull/9364) (guojidan) +- feat: issue #9224 substitute tlide in table path [#9259](https://github.com/apache/datafusion/pull/9259) (Lordworms) +- feat: replace std Instant with wasm-compatible wrapper [#9189](https://github.com/apache/datafusion/pull/9189) (waynexia) +- feat: support `unnest` with additional columns [#9400](https://github.com/apache/datafusion/pull/9400) (jonahgao) +- feat: Support `EscapedStringLiteral`, update sqlparser to `0.44.0` [#9268](https://github.com/apache/datafusion/pull/9268) (JasonLi-cn) +- feat: add support for fixed list wildcard in type signature [#9312](https://github.com/apache/datafusion/pull/9312) (universalmind303) +- feat: Add projection to HashJoinExec. [#9236](https://github.com/apache/datafusion/pull/9236) (my-vegetable-has-exploded) +- feat: function name hints for UDFs [#9407](https://github.com/apache/datafusion/pull/9407) (SteveLauC) +- feat: Introduce convert Expr to SQL string API and basic feature [#9517](https://github.com/apache/datafusion/pull/9517) (backkem) +- feat: implement more expr_to_sql functionality [#9578](https://github.com/apache/datafusion/pull/9578) (devinjdangelo) +- feat: implement aggregation and subquery plans to SQL [#9606](https://github.com/apache/datafusion/pull/9606) (devinjdangelo) +- feat: track memory usage for recursive CTE, enable recursive CTEs by default [#9619](https://github.com/apache/datafusion/pull/9619) (jonahgao) +- feat: Between expr to sql string [#9803](https://github.com/apache/datafusion/pull/9803) (sebastian2296) +- feat: Expose `array_empty` and `list_empty` functions as alias of `empty` function [#9807](https://github.com/apache/datafusion/pull/9807) (erenavsarogullari) +- feat: Not expr to string [#9802](https://github.com/apache/datafusion/pull/9802) (sebastian2296) +- feat: pass SessionState not SessionConfig to FunctionFactory::create [#9837](https://github.com/apache/datafusion/pull/9837) (tshauck) **Fixed bugs:** -- fix: use `JoinSet` to make spawned tasks cancel-safe [#9318](https://github.com/apache/arrow-datafusion/pull/9318) (DDtKey) -- fix: nvl function's return type [#9357](https://github.com/apache/arrow-datafusion/pull/9357) (guojidan) -- fix: panic in isnan() when no args are given [#9377](https://github.com/apache/arrow-datafusion/pull/9377) (SteveLauC) -- fix: using test data sample for catalog example [#9372](https://github.com/apache/arrow-datafusion/pull/9372) (korowa) -- fix: sort_batch function unsupported mixed types with list [#9410](https://github.com/apache/arrow-datafusion/pull/9410) (JasonLi-cn) -- fix: casting to ARRAY types failed [#9441](https://github.com/apache/arrow-datafusion/pull/9441) (jonahgao) -- fix: reading from partitioned `json` & `arrow` tables [#9431](https://github.com/apache/arrow-datafusion/pull/9431) (korowa) -- fix: coalesce function should return correct data type [#9459](https://github.com/apache/arrow-datafusion/pull/9459) (viirya) -- fix: `generate_series` and `range` panic on edge cases [#9503](https://github.com/apache/arrow-datafusion/pull/9503) (jonahgao) -- fix: `substr_index` not handling negative occurrence correctly [#9475](https://github.com/apache/arrow-datafusion/pull/9475) (jonahgao) -- fix: support two argument TRIM [#9521](https://github.com/apache/arrow-datafusion/pull/9521) (tshauck) -- fix: incorrect null handling in `range` and `generate_series` [#9574](https://github.com/apache/arrow-datafusion/pull/9574) (jonahgao) -- fix: recursive cte hangs on joins [#9687](https://github.com/apache/arrow-datafusion/pull/9687) (jonahgao) -- fix: parallel parquet can underflow when max_record_batch_rows < execution.batch_size [#9737](https://github.com/apache/arrow-datafusion/pull/9737) (devinjdangelo) -- fix: change placeholder errors from Internal to Plan [#9745](https://github.com/apache/arrow-datafusion/pull/9745) (erratic-pattern) -- fix: ensure mutual compatibility of the two input schemas from recursive CTEs [#9795](https://github.com/apache/arrow-datafusion/pull/9795) (jonahgao) +- fix: use `JoinSet` to make spawned tasks cancel-safe [#9318](https://github.com/apache/datafusion/pull/9318) (DDtKey) +- fix: nvl function's return type [#9357](https://github.com/apache/datafusion/pull/9357) (guojidan) +- fix: panic in isnan() when no args are given [#9377](https://github.com/apache/datafusion/pull/9377) (SteveLauC) +- fix: using test data sample for catalog example [#9372](https://github.com/apache/datafusion/pull/9372) (korowa) +- fix: sort_batch function unsupported mixed types with list [#9410](https://github.com/apache/datafusion/pull/9410) (JasonLi-cn) +- fix: casting to ARRAY types failed [#9441](https://github.com/apache/datafusion/pull/9441) (jonahgao) +- fix: reading from partitioned `json` & `arrow` tables [#9431](https://github.com/apache/datafusion/pull/9431) (korowa) +- fix: coalesce function should return correct data type [#9459](https://github.com/apache/datafusion/pull/9459) (viirya) +- fix: `generate_series` and `range` panic on edge cases [#9503](https://github.com/apache/datafusion/pull/9503) (jonahgao) +- fix: `substr_index` not handling negative occurrence correctly [#9475](https://github.com/apache/datafusion/pull/9475) (jonahgao) +- fix: support two argument TRIM [#9521](https://github.com/apache/datafusion/pull/9521) (tshauck) +- fix: incorrect null handling in `range` and `generate_series` [#9574](https://github.com/apache/datafusion/pull/9574) (jonahgao) +- fix: recursive cte hangs on joins [#9687](https://github.com/apache/datafusion/pull/9687) (jonahgao) +- fix: parallel parquet can underflow when max_record_batch_rows < execution.batch_size [#9737](https://github.com/apache/datafusion/pull/9737) (devinjdangelo) +- fix: change placeholder errors from Internal to Plan [#9745](https://github.com/apache/datafusion/pull/9745) (erratic-pattern) +- fix: ensure mutual compatibility of the two input schemas from recursive CTEs [#9795](https://github.com/apache/datafusion/pull/9795) (jonahgao) **Documentation updates:** -- docs: put flatten in top fn list [#9376](https://github.com/apache/arrow-datafusion/pull/9376) (SteveLauC) -- Update documentation so list_to_string alias to point to array_to_string [#9374](https://github.com/apache/arrow-datafusion/pull/9374) (monkwire) -- Uplift keys/dependencies to use more workspace inheritance [#9293](https://github.com/apache/arrow-datafusion/pull/9293) (Jefffrey) -- docs: update contributor guide (migration to sqllogictest is done) [#9408](https://github.com/apache/arrow-datafusion/pull/9408) (SteveLauC) -- Move the to_timestamp\* functions to datafusion-functions [#9388](https://github.com/apache/arrow-datafusion/pull/9388) (Omega359) -- NEW Logo [#9385](https://github.com/apache/arrow-datafusion/pull/9385) (pinarbayata) -- Minor: docs: rm duplicate words. [#9449](https://github.com/apache/arrow-datafusion/pull/9449) (my-vegetable-has-exploded) -- Update contributor guide with updated scalar function howto [#9438](https://github.com/apache/arrow-datafusion/pull/9438) (Omega359) -- docs: fix extraneous char in array functions table of contents [#9560](https://github.com/apache/arrow-datafusion/pull/9560) (tshauck) -- doc: Add missing doc link [#9631](https://github.com/apache/arrow-datafusion/pull/9631) (Weijun-H) -- chore: remove repetitive word `the the` --> `the` in docs / comments [#9673](https://github.com/apache/arrow-datafusion/pull/9673) (InventiveCoder) -- Update example-usage.md to remove reference to simd and rust nightly. [#9677](https://github.com/apache/arrow-datafusion/pull/9677) (Omega359) -- Minor: Improve documentation for `LogicalPlan::expressions` [#9698](https://github.com/apache/arrow-datafusion/pull/9698) (alamb) -- Add Minimum Supported Rust Version policy to docs [#9681](https://github.com/apache/arrow-datafusion/pull/9681) (alamb) -- doc: Updated known users list and usage dependency description [#9718](https://github.com/apache/arrow-datafusion/pull/9718) (comphead) +- docs: put flatten in top fn list [#9376](https://github.com/apache/datafusion/pull/9376) (SteveLauC) +- Update documentation so list_to_string alias to point to array_to_string [#9374](https://github.com/apache/datafusion/pull/9374) (monkwire) +- Uplift keys/dependencies to use more workspace inheritance [#9293](https://github.com/apache/datafusion/pull/9293) (Jefffrey) +- docs: update contributor guide (migration to sqllogictest is done) [#9408](https://github.com/apache/datafusion/pull/9408) (SteveLauC) +- Move the to_timestamp\* functions to datafusion-functions [#9388](https://github.com/apache/datafusion/pull/9388) (Omega359) +- NEW Logo [#9385](https://github.com/apache/datafusion/pull/9385) (pinarbayata) +- Minor: docs: rm duplicate words. [#9449](https://github.com/apache/datafusion/pull/9449) (my-vegetable-has-exploded) +- Update contributor guide with updated scalar function howto [#9438](https://github.com/apache/datafusion/pull/9438) (Omega359) +- docs: fix extraneous char in array functions table of contents [#9560](https://github.com/apache/datafusion/pull/9560) (tshauck) +- doc: Add missing doc link [#9631](https://github.com/apache/datafusion/pull/9631) (Weijun-H) +- chore: remove repetitive word `the the` --> `the` in docs / comments [#9673](https://github.com/apache/datafusion/pull/9673) (InventiveCoder) +- Update example-usage.md to remove reference to simd and rust nightly. [#9677](https://github.com/apache/datafusion/pull/9677) (Omega359) +- Minor: Improve documentation for `LogicalPlan::expressions` [#9698](https://github.com/apache/datafusion/pull/9698) (alamb) +- Add Minimum Supported Rust Version policy to docs [#9681](https://github.com/apache/datafusion/pull/9681) (alamb) +- doc: Updated known users list and usage dependency description [#9718](https://github.com/apache/datafusion/pull/9718) (comphead) **Merged pull requests:** -- refactor: Change `SchemaProvider::table` to return `Result` rather than `Option<..>` [#9307](https://github.com/apache/arrow-datafusion/pull/9307) (crepererum) -- fix write_partitioned_parquet_results test case bug [#9360](https://github.com/apache/arrow-datafusion/pull/9360) (guojidan) -- fix: use `JoinSet` to make spawned tasks cancel-safe [#9318](https://github.com/apache/arrow-datafusion/pull/9318) (DDtKey) -- Update nix requirement from 0.27.1 to 0.28.0 [#9344](https://github.com/apache/arrow-datafusion/pull/9344) (dependabot[bot]) -- Replace usages of internal_err with exec_err where appropriate [#9241](https://github.com/apache/arrow-datafusion/pull/9241) (Omega359) -- feat : Support for deregistering user defined functions [#9239](https://github.com/apache/arrow-datafusion/pull/9239) (mobley-trent) -- fix: nvl function's return type [#9357](https://github.com/apache/arrow-datafusion/pull/9357) (guojidan) -- refactor: move acos() to function crate [#9297](https://github.com/apache/arrow-datafusion/pull/9297) (SteveLauC) -- docs: put flatten in top fn list [#9376](https://github.com/apache/arrow-datafusion/pull/9376) (SteveLauC) -- Update documentation so list_to_string alias to point to array_to_string [#9374](https://github.com/apache/arrow-datafusion/pull/9374) (monkwire) -- feat: issue_9285: port builtin reg function into datafusion-function-\* (1/3 regexpmatch) [#9329](https://github.com/apache/arrow-datafusion/pull/9329) (Lordworms) -- Add test to verify issue #9161 [#9265](https://github.com/apache/arrow-datafusion/pull/9265) (jonahgao) -- refactor: fix error macros hygiene (always import `DataFusionError`) [#9366](https://github.com/apache/arrow-datafusion/pull/9366) (crepererum) -- feat: support for defining ARRAY columns in `CREATE TABLE` [#9381](https://github.com/apache/arrow-datafusion/pull/9381) (jonahgao) -- fix: panic in isnan() when no args are given [#9377](https://github.com/apache/arrow-datafusion/pull/9377) (SteveLauC) -- feat: support `unnest` in FROM clause [#9355](https://github.com/apache/arrow-datafusion/pull/9355) (jonahgao) -- feat: support nvl2 function [#9364](https://github.com/apache/arrow-datafusion/pull/9364) (guojidan) -- refactor: move asin() to function crate [#9379](https://github.com/apache/arrow-datafusion/pull/9379) (SteveLauC) -- fix: using test data sample for catalog example [#9372](https://github.com/apache/arrow-datafusion/pull/9372) (korowa) -- delete tail space, fix `error: unused import: DataFusionError` [#9386](https://github.com/apache/arrow-datafusion/pull/9386) (Tangruilin) -- Run cargo-fmt on `datafusion-functions/core` [#9367](https://github.com/apache/arrow-datafusion/pull/9367) (alamb) -- Cache common plan properties to eliminate recursive calls in physical plan [#9346](https://github.com/apache/arrow-datafusion/pull/9346) (mustafasrepo) -- Run cargo-fmt on all of `datafusion-functions` [#9390](https://github.com/apache/arrow-datafusion/pull/9390) (alamb) -- feat: issue #9224 substitute tlide in table path [#9259](https://github.com/apache/arrow-datafusion/pull/9259) (Lordworms) -- port range function and change gen_series logic [#9352](https://github.com/apache/arrow-datafusion/pull/9352) (Lordworms) -- [MINOR]: Generate physical plan, instead of logical plan in the bench test [#9383](https://github.com/apache/arrow-datafusion/pull/9383) (mustafasrepo) -- Add `to_date` function [#9019](https://github.com/apache/arrow-datafusion/pull/9019) (Tangruilin) -- Minor: clarify performance in docs for `ScalarUDF`, `ScalarUDAF` and `ScalarUDWF` [#9384](https://github.com/apache/arrow-datafusion/pull/9384) (alamb) -- feat: replace std Instant with wasm-compatible wrapper [#9189](https://github.com/apache/arrow-datafusion/pull/9189) (waynexia) -- Uplift keys/dependencies to use more workspace inheritance [#9293](https://github.com/apache/arrow-datafusion/pull/9293) (Jefffrey) -- Improve documentation for ExecutionPlanProperties, use consistent field name [#9389](https://github.com/apache/arrow-datafusion/pull/9389) (alamb) -- Doc: Workaround for Running cargo test locally without signficant memory [#9402](https://github.com/apache/arrow-datafusion/pull/9402) (devinjdangelo) -- feat: support `unnest` with additional columns [#9400](https://github.com/apache/arrow-datafusion/pull/9400) (jonahgao) -- Minor: improve the display name of `unnest` expressions [#9412](https://github.com/apache/arrow-datafusion/pull/9412) (jonahgao) -- Minor: Move function signature check to planning stage [#9401](https://github.com/apache/arrow-datafusion/pull/9401) (2010YOUY01) -- chore(deps): update substrait requirement from 0.24.0 to 0.25.1 [#9406](https://github.com/apache/arrow-datafusion/pull/9406) (dependabot[bot]) -- docs: update contributor guide (migration to sqllogictest is done) [#9408](https://github.com/apache/arrow-datafusion/pull/9408) (SteveLauC) -- Move the to_timestamp\* functions to datafusion-functions [#9388](https://github.com/apache/arrow-datafusion/pull/9388) (Omega359) -- Minor: Support LargeList List Range indexing and fix large list handling in ConstEvaluator [#9393](https://github.com/apache/arrow-datafusion/pull/9393) (jayzhan211) -- NEW Logo [#9385](https://github.com/apache/arrow-datafusion/pull/9385) (pinarbayata) -- Handle serde for ScalarUDF [#9395](https://github.com/apache/arrow-datafusion/pull/9395) (yyy1000) -- Minior: Add tests with `sqrt` with negative argument [#9426](https://github.com/apache/arrow-datafusion/pull/9426) (caicancai) -- Move SpawnedTask from datafusion_physical_plan to new `datafusion_common_runtime` crate [#9414](https://github.com/apache/arrow-datafusion/pull/9414) (mustafasrepo) -- Re-export datafusion-functions-array [#9433](https://github.com/apache/arrow-datafusion/pull/9433) (andygrove) -- Minor: Support LargeList for ListIndex [#9424](https://github.com/apache/arrow-datafusion/pull/9424) (PsiACE) -- move ArrayDims, ArrayNdims and Cardinality to datafusion-function-crate [#9425](https://github.com/apache/arrow-datafusion/pull/9425) (Weijun-H) -- refactor: make instr() an alias of strpos() [#9396](https://github.com/apache/arrow-datafusion/pull/9396) (SteveLauC) -- Add test case for invalid tz in timestamp literal [#9429](https://github.com/apache/arrow-datafusion/pull/9429) (MohamedAbdeen21) -- Minor: simplify call [#9434](https://github.com/apache/arrow-datafusion/pull/9434) (alamb) -- Support IGNORE NULLS for LEAD window function [#9419](https://github.com/apache/arrow-datafusion/pull/9419) (comphead) -- fix sqllogicaltest result [#9444](https://github.com/apache/arrow-datafusion/pull/9444) (jackwener) -- Minor: docs: rm duplicate words. [#9449](https://github.com/apache/arrow-datafusion/pull/9449) (my-vegetable-has-exploded) -- minor: fix cargo clippy some warning [#9442](https://github.com/apache/arrow-datafusion/pull/9442) (jackwener) -- port regexp_like function and port related tests [#9397](https://github.com/apache/arrow-datafusion/pull/9397) (Lordworms) -- fix: sort_batch function unsupported mixed types with list [#9410](https://github.com/apache/arrow-datafusion/pull/9410) (JasonLi-cn) -- refactor: add `join_unwind` to `SpawnedTask` [#9422](https://github.com/apache/arrow-datafusion/pull/9422) (DDtKey) -- Ignore null LEAD support for small batch sizes. [#9445](https://github.com/apache/arrow-datafusion/pull/9445) (mustafasrepo) -- fix: casting to ARRAY types failed [#9441](https://github.com/apache/arrow-datafusion/pull/9441) (jonahgao) -- fix: reading from partitioned `json` & `arrow` tables [#9431](https://github.com/apache/arrow-datafusion/pull/9431) (korowa) -- feat: Support `EscapedStringLiteral`, update sqlparser to `0.44.0` [#9268](https://github.com/apache/arrow-datafusion/pull/9268) (JasonLi-cn) -- Minor: fix LEAD test description [#9451](https://github.com/apache/arrow-datafusion/pull/9451) (comphead) -- Consolidate `TreeNode` transform and rewrite APIs [#8891](https://github.com/apache/arrow-datafusion/pull/8891) (peter-toth) -- Support `Date32` arguments for `generate_series` [#9420](https://github.com/apache/arrow-datafusion/pull/9420) (Lordworms) -- Minor: change doc for range [#9455](https://github.com/apache/arrow-datafusion/pull/9455) (Lordworms) -- doc: add missing function index in scalar_expression.md [#9462](https://github.com/apache/arrow-datafusion/pull/9462) (Weijun-H) -- build: Update bigdecimal version in `Cargo.toml` [#9471](https://github.com/apache/arrow-datafusion/pull/9471) (comphead) -- chore(deps): update base64 requirement from 0.21 to 0.22 [#9446](https://github.com/apache/arrow-datafusion/pull/9446) (dependabot[bot]) -- Port regexp_replace functions and related tests [#9454](https://github.com/apache/arrow-datafusion/pull/9454) (Lordworms) -- Update contributor guide with updated scalar function howto [#9438](https://github.com/apache/arrow-datafusion/pull/9438) (Omega359) -- feat: add support for fixed list wildcard in type signature [#9312](https://github.com/apache/arrow-datafusion/pull/9312) (universalmind303) -- Add a `ScalarUDFImpl::simplfy()` API, move `SimplifyInfo` et al to datafusion_expr [#9304](https://github.com/apache/arrow-datafusion/pull/9304) (jayzhan211) -- Implement IGNORE NULLS for FIRST_VALUE [#9411](https://github.com/apache/arrow-datafusion/pull/9411) (huaxingao) -- Add plugable handler for `CREATE FUNCTION` [#9333](https://github.com/apache/arrow-datafusion/pull/9333) (milenkovicm) -- Enable configurable display of partition sizes in the explain statement [#9474](https://github.com/apache/arrow-datafusion/pull/9474) (jayzhan211) -- Reduce casts for LEAD/LAG [#9468](https://github.com/apache/arrow-datafusion/pull/9468) (comphead) -- [CI build] fix chrono suggestions [#9486](https://github.com/apache/arrow-datafusion/pull/9486) (comphead) -- Make regex dependency optional in datafusion-functions, add CI checks for function packages [#9473](https://github.com/apache/arrow-datafusion/pull/9473) (alamb) -- fix: coalesce function should return correct data type [#9459](https://github.com/apache/arrow-datafusion/pull/9459) (viirya) -- LEAD/LAG calculate default value once [#9485](https://github.com/apache/arrow-datafusion/pull/9485) (comphead) -- chore: simplify the return type of `validate_data_types()` [#9491](https://github.com/apache/arrow-datafusion/pull/9491) (waynexia) -- minor: use arrow-rs casting from Float to Timestamp [#9500](https://github.com/apache/arrow-datafusion/pull/9500) (comphead) -- chore(deps): update substrait requirement from 0.25.1 to 0.27.0 [#9502](https://github.com/apache/arrow-datafusion/pull/9502) (dependabot[bot]) -- fix: `generate_series` and `range` panic on edge cases [#9503](https://github.com/apache/arrow-datafusion/pull/9503) (jonahgao) -- Fix undeterministic behaviour of schema nullability of lag window query [#9508](https://github.com/apache/arrow-datafusion/pull/9508) (mustafasrepo) -- Add `to_unixtime` function [#9077](https://github.com/apache/arrow-datafusion/pull/9077) (Tangruilin) -- Minor: fixed transformed state in UDF Simplify [#9484](https://github.com/apache/arrow-datafusion/pull/9484) (alamb) -- test: port strpos test in physical_expr/src/functions to sqllogictest [#9439](https://github.com/apache/arrow-datafusion/pull/9439) (SteveLauC) -- Port ArrayHas family to `functions-array` [#9496](https://github.com/apache/arrow-datafusion/pull/9496) (jayzhan211) -- port array_empty and array_length to datafusion-function-array crate [#9510](https://github.com/apache/arrow-datafusion/pull/9510) (Weijun-H) -- fix: `substr_index` not handling negative occurrence correctly [#9475](https://github.com/apache/arrow-datafusion/pull/9475) (jonahgao) -- [minor] extract collect file statistics method and add doc [#9490](https://github.com/apache/arrow-datafusion/pull/9490) (Ted-Jiang) -- test: sqllogictests for multiple tables join [#9480](https://github.com/apache/arrow-datafusion/pull/9480) (korowa) -- Add support for ignore nulls for LEAD, LAG in WindowAggExec [#9498](https://github.com/apache/arrow-datafusion/pull/9498) (Lordworms) -- Minior: Improve log expr description [#9516](https://github.com/apache/arrow-datafusion/pull/9516) (caicancai) -- port flatten to datafusion-function-array [#9523](https://github.com/apache/arrow-datafusion/pull/9523) (Weijun-H) -- feat: Add projection to HashJoinExec. [#9236](https://github.com/apache/arrow-datafusion/pull/9236) (my-vegetable-has-exploded) -- Add example for `FunctionFactory` [#9482](https://github.com/apache/arrow-datafusion/pull/9482) (milenkovicm) -- Move date_part, date_trunc, date_bin functions to datafusion-functions [#9435](https://github.com/apache/arrow-datafusion/pull/9435) (Omega359) -- fix: support two argument TRIM [#9521](https://github.com/apache/arrow-datafusion/pull/9521) (tshauck) -- Remove physical expr of ListIndex and ListRange, convert to `array_element` and `array_slice` functions [#9492](https://github.com/apache/arrow-datafusion/pull/9492) (jayzhan211) -- feat: function name hints for UDFs [#9407](https://github.com/apache/arrow-datafusion/pull/9407) (SteveLauC) -- Minor: Improve documentation for registering `AnalyzerRule` [#9520](https://github.com/apache/arrow-datafusion/pull/9520) (alamb) -- Extend argument types for udf `return_type_from_exprs` [#9522](https://github.com/apache/arrow-datafusion/pull/9522) (jayzhan211) -- move make_array array_append array_prepend array_concat function to datafusion-functions-array crate [#9504](https://github.com/apache/arrow-datafusion/pull/9504) (guojidan) -- Port `StringToArray` to `function-arrays` subcrate [#9543](https://github.com/apache/arrow-datafusion/pull/9543) (erenavsarogullari) -- Minor: remove `..` pattern matching in sql planner [#9531](https://github.com/apache/arrow-datafusion/pull/9531) (alamb) -- Minor: Fix document Interval syntax [#9542](https://github.com/apache/arrow-datafusion/pull/9542) (yyy1000) -- Port `struct` to datafusion-functions [#9546](https://github.com/apache/arrow-datafusion/pull/9546) (yyy1000) -- UDAF and UDWF support aliases [#9489](https://github.com/apache/arrow-datafusion/pull/9489) (lewiszlw) -- docs: fix extraneous char in array functions table of contents [#9560](https://github.com/apache/arrow-datafusion/pull/9560) (tshauck) -- [MINOR]: Fix undeterministic test [#9559](https://github.com/apache/arrow-datafusion/pull/9559) (mustafasrepo) -- Port `arrow_typeof` to datafusion-function [#9524](https://github.com/apache/arrow-datafusion/pull/9524) (yyy1000) -- feat: Introduce convert Expr to SQL string API and basic feature [#9517](https://github.com/apache/arrow-datafusion/pull/9517) (backkem) -- Port `ArraySort` to `function-arrays` subcrate [#9551](https://github.com/apache/arrow-datafusion/pull/9551) (erenavsarogullari) -- refactor: unify some plan optimization in CommonSubexprEliminate [#9556](https://github.com/apache/arrow-datafusion/pull/9556) (jackwener) -- Port `ArrayDistinct` to `functions-array` subcrate [#9549](https://github.com/apache/arrow-datafusion/pull/9549) (erenavsarogullari) -- Minor: add a sql_planner benchmarks to reflecte select many field on a huge table [#9536](https://github.com/apache/arrow-datafusion/pull/9536) (haohuaijin) -- Support IGNORE NULLS for FIRST/LAST window function [#9470](https://github.com/apache/arrow-datafusion/pull/9470) (huaxingao) -- Systematic Configuration in 'Create External Table' and 'Copy To' Options [#9382](https://github.com/apache/arrow-datafusion/pull/9382) (metesynnada) -- fix: incorrect null handling in `range` and `generate_series` [#9574](https://github.com/apache/arrow-datafusion/pull/9574) (jonahgao) -- Update README.md [#9572](https://github.com/apache/arrow-datafusion/pull/9572) (Abdullahsab3) -- Port tan, tanh to datafusion-functions [#9535](https://github.com/apache/arrow-datafusion/pull/9535) (ongchi) -- feat(9493): provide access to FileMetaData for files written with ParquetSink [#9548](https://github.com/apache/arrow-datafusion/pull/9548) (wiedld) -- Export datafusion-functions UDFs publically [#9585](https://github.com/apache/arrow-datafusion/pull/9585) (alamb) -- Update the comment and Add a check [#9571](https://github.com/apache/arrow-datafusion/pull/9571) (colommar) -- Port `ArrayRepeat` to `functions-array` subcrate [#9568](https://github.com/apache/arrow-datafusion/pull/9568) (erenavsarogullari) -- Fix ApproxPercentileAccumulator on zero values [#9582](https://github.com/apache/arrow-datafusion/pull/9582) (Dandandan) -- Add `FunctionRewrite` API, Move Array specific rewrites to `datafusion_functions_array` [#9583](https://github.com/apache/arrow-datafusion/pull/9583) (alamb) -- Move from_unixtime, now, current_date, current_time functions to datafusion-functions [#9537](https://github.com/apache/arrow-datafusion/pull/9537) (Omega359) -- minor: update Debug trait impl for WindowsFrame [#9587](https://github.com/apache/arrow-datafusion/pull/9587) (comphead) -- Initial support LogicalPlan to SQL String [#9596](https://github.com/apache/arrow-datafusion/pull/9596) (backkem) -- refactor: use a common macro to define math UDFs [#9598](https://github.com/apache/arrow-datafusion/pull/9598) (jonahgao) -- Move all `crypto` related functions to `datafusion-functions` [#9590](https://github.com/apache/arrow-datafusion/pull/9590) (Lordworms) -- Remove physical expr of NamedStructField, convert to `get_field` function call [#9563](https://github.com/apache/arrow-datafusion/pull/9563) (yyy1000) -- Add `/benchmark` github command to comparison benchmark between base and pr commit [#9461](https://github.com/apache/arrow-datafusion/pull/9461) (gruuya) -- support unnest as subexpression [#9592](https://github.com/apache/arrow-datafusion/pull/9592) (YjyJeff) -- feat: implement more expr_to_sql functionality [#9578](https://github.com/apache/arrow-datafusion/pull/9578) (devinjdangelo) -- Port `ArrayResize` to `functions-array` subcrate [#9570](https://github.com/apache/arrow-datafusion/pull/9570) (erenavsarogullari) -- Move make_date, to_char to datafusion-functions [#9601](https://github.com/apache/arrow-datafusion/pull/9601) (Omega359) -- Fix to_timestamp benchmark [#9608](https://github.com/apache/arrow-datafusion/pull/9608) (Omega359) -- feat: implement aggregation and subquery plans to SQL [#9606](https://github.com/apache/arrow-datafusion/pull/9606) (devinjdangelo) -- Port ArrayElem/Slice/PopFront/Back into `functions-array` [#9615](https://github.com/apache/arrow-datafusion/pull/9615) (jayzhan211) -- Minor: Remove datafusion-functions-array dependency from datafusion-optimizer [#9621](https://github.com/apache/arrow-datafusion/pull/9621) (alamb) -- Enable TTY during bench data generation [#9626](https://github.com/apache/arrow-datafusion/pull/9626) (gruuya) -- Remove constant expressions from SortExprs in the SortExec [#9618](https://github.com/apache/arrow-datafusion/pull/9618) (mustafasrepo) -- Try fixing missing results name in the benchmark step [#9632](https://github.com/apache/arrow-datafusion/pull/9632) (gruuya) -- feat: track memory usage for recursive CTE, enable recursive CTEs by default [#9619](https://github.com/apache/arrow-datafusion/pull/9619) (jonahgao) -- doc: Add missing doc link [#9631](https://github.com/apache/arrow-datafusion/pull/9631) (Weijun-H) -- Add explicit move of PR bench results if they were placed in HEAD dir [#9636](https://github.com/apache/arrow-datafusion/pull/9636) (gruuya) -- Add `array_reverse` function to datafusion-function-\* crate [#9630](https://github.com/apache/arrow-datafusion/pull/9630) (Weijun-H) -- Move parts of `InListSimplifier` simplify rules to `Simplifier` [#9628](https://github.com/apache/arrow-datafusion/pull/9628) (jayzhan211) -- Port Array Union and Intersect to `functions-array` [#9629](https://github.com/apache/arrow-datafusion/pull/9629) (jayzhan211) -- Port `ArrayPosition` and `ArrayPositions` to `functions-array` subcrate [#9617](https://github.com/apache/arrow-datafusion/pull/9617) (erenavsarogullari) -- Optimize make_date (#9089) [#9600](https://github.com/apache/arrow-datafusion/pull/9600) (vojtechtoman) -- Support AT TIME ZONE clause [#9647](https://github.com/apache/arrow-datafusion/pull/9647) (tinfoil-knight) -- Window Linear Mode use smaller buffers [#9597](https://github.com/apache/arrow-datafusion/pull/9597) (mustafasrepo) -- Port `ArrayExcept` to `functions-array` subcrate [#9634](https://github.com/apache/arrow-datafusion/pull/9634) (erenavsarogullari) -- chore: improve array expression doc and clean up array_expression.rs [#9650](https://github.com/apache/arrow-datafusion/pull/9650) (Weijun-H) -- Minor: remove clone in `exprlist_to_fields` [#9657](https://github.com/apache/arrow-datafusion/pull/9657) (jayzhan211) -- Port `ArrayRemove`, `ArrayRemoveN`, `ArrayRemoveAll` to `functions-array` subcrate [#9656](https://github.com/apache/arrow-datafusion/pull/9656) (erenavsarogullari) -- Minor: Remove redundant dependencies from `datafusion-functions/Cargo.toml` [#9622](https://github.com/apache/arrow-datafusion/pull/9622) (alamb) -- Support IGNORE NULLS for NTH_VALUE window function [#9625](https://github.com/apache/arrow-datafusion/pull/9625) (huaxingao) -- Improve Robustness of Unparser Testing and Implementation [#9623](https://github.com/apache/arrow-datafusion/pull/9623) (devinjdangelo) -- Adding Constant Check for FilterExec [#9649](https://github.com/apache/arrow-datafusion/pull/9649) (Lordworms) -- chore(deps-dev): bump follow-redirects from 1.15.4 to 1.15.6 in /datafusion/wasmtest/datafusion-wasm-app [#9609](https://github.com/apache/arrow-datafusion/pull/9609) (dependabot[bot]) -- move array_replace family functions to datafusion-function-array crate [#9651](https://github.com/apache/arrow-datafusion/pull/9651) (Weijun-H) -- chore: remove repetitive word `the the` --> `the` in docs / comments [#9673](https://github.com/apache/arrow-datafusion/pull/9673) (InventiveCoder) -- Update example-usage.md to remove reference to simd and rust nightly. [#9677](https://github.com/apache/arrow-datafusion/pull/9677) (Omega359) -- [MINOR]: Remove some `.unwrap`s from nth_value.rs file [#9674](https://github.com/apache/arrow-datafusion/pull/9674) (mustafasrepo) -- minor: Remove deprecated methods [#9627](https://github.com/apache/arrow-datafusion/pull/9627) (comphead) -- Migrate `arrow_cast` to a UDF [#9610](https://github.com/apache/arrow-datafusion/pull/9610) (alamb) -- parquet: Add row*groups_matched*{statistics,bloom_filter} statistics [#9640](https://github.com/apache/arrow-datafusion/pull/9640) (progval) -- Make COPY TO align with CREATE EXTERNAL TABLE [#9604](https://github.com/apache/arrow-datafusion/pull/9604) (metesynnada) -- Support "A column is known to be entirely NULL" in `PruningPredicate` [#9223](https://github.com/apache/arrow-datafusion/pull/9223) (appletreeisyellow) -- Suppress self update for windows CI runner [#9661](https://github.com/apache/arrow-datafusion/pull/9661) (jayzhan211) -- add schema to SQL ast builder [#9624](https://github.com/apache/arrow-datafusion/pull/9624) (sardination) -- core/tests/parquet/row_group_pruning.rs: Add tests for strings [#9642](https://github.com/apache/arrow-datafusion/pull/9642) (progval) -- Fix incorrect results with multiple `COUNT(DISTINCT..)` aggregates on dictionaries [#9679](https://github.com/apache/arrow-datafusion/pull/9679) (alamb) -- parquet: Add support for Bloom filters on binary columns [#9644](https://github.com/apache/arrow-datafusion/pull/9644) (progval) -- Update Arrow/Parquet to `51.0.0`, tonic to `0.11` [#9613](https://github.com/apache/arrow-datafusion/pull/9613) (tustvold) -- Move inlist rule to expr_simplifier [#9692](https://github.com/apache/arrow-datafusion/pull/9692) (jayzhan211) -- Support Serde for ScalarUDF in Physical Expressions [#9436](https://github.com/apache/arrow-datafusion/pull/9436) (yyy1000) -- Support Union types in `ScalarValue` [#9683](https://github.com/apache/arrow-datafusion/pull/9683) (avantgardnerio) -- parquet: Add support for row group pruning on FixedSizeBinary [#9646](https://github.com/apache/arrow-datafusion/pull/9646) (progval) -- Minor: Improve documentation for `LogicalPlan::expressions` [#9698](https://github.com/apache/arrow-datafusion/pull/9698) (alamb) -- Make builtin window function output datatype to be derived from schema [#9686](https://github.com/apache/arrow-datafusion/pull/9686) (comphead) -- refactor: Extract `array_to_string` and `string_to_array` from `functions-array` subcrate' s `kernels` and `udf` containers [#9704](https://github.com/apache/arrow-datafusion/pull/9704) (erenavsarogullari) -- Add Minimum Supported Rust Version policy to docs [#9681](https://github.com/apache/arrow-datafusion/pull/9681) (alamb) -- doc: Add DataFusion profiling documentation for MacOS [#9711](https://github.com/apache/arrow-datafusion/pull/9711) (comphead) -- Minor: add ticket reference to commented out test [#9715](https://github.com/apache/arrow-datafusion/pull/9715) (alamb) -- Minor: Rename path from `common_runtime` to `common-runtime` [#9717](https://github.com/apache/arrow-datafusion/pull/9717) (alamb) -- Use object_store:BufWriter to replace put_multipart [#9648](https://github.com/apache/arrow-datafusion/pull/9648) (yyy1000) -- Fix COPY TO failing on passing format options through CLI [#9709](https://github.com/apache/arrow-datafusion/pull/9709) (tinfoil-knight) -- fix: recursive cte hangs on joins [#9687](https://github.com/apache/arrow-datafusion/pull/9687) (jonahgao) -- Move `starts_with`, `to_hex`,` trim`, `upper` to datafusion-functions (and add string_expressions) [#9541](https://github.com/apache/arrow-datafusion/pull/9541) (Tangruilin) -- Support for `extract(x from time)` / `date_part` from time types [#8693](https://github.com/apache/arrow-datafusion/pull/8693) (Jefffrey) -- doc: Updated known users list and usage dependency description [#9718](https://github.com/apache/arrow-datafusion/pull/9718) (comphead) -- Minor: improve documentation for `CommonSubexprEliminate` [#9700](https://github.com/apache/arrow-datafusion/pull/9700) (alamb) -- build: modify code to comply with latest clippy requirement [#9725](https://github.com/apache/arrow-datafusion/pull/9725) (comphead) -- Minor: return internal error rather than panic on unexpected error in COUNT DISTINCT [#9712](https://github.com/apache/arrow-datafusion/pull/9712) (alamb) -- fix(9678): short circuiting prevented population of visited stack, for common subexpr elimination optimization [#9685](https://github.com/apache/arrow-datafusion/pull/9685) (wiedld) -- perf: improve to_field performance [#9722](https://github.com/apache/arrow-datafusion/pull/9722) (haohuaijin) -- Minor: Run ScalarValue size test on aarch again [#9728](https://github.com/apache/arrow-datafusion/pull/9728) (alamb) -- Move trim functions (btrim, ltrim, rtrim) to datafusion_functions, make expr_fn API consistent [#9730](https://github.com/apache/arrow-datafusion/pull/9730) (Omega359) -- make format prefix optional for format options in COPY [#9723](https://github.com/apache/arrow-datafusion/pull/9723) (tinfoil-knight) -- refactor: Extract `range` and `gen_series` functions from `functions-array` subcrate' s `kernels` and `udf` containers [#9720](https://github.com/apache/arrow-datafusion/pull/9720) (erenavsarogullari) -- Move ascii function to datafusion_functions [#9740](https://github.com/apache/arrow-datafusion/pull/9740) (PsiACE) -- adding expr to string for IsNotNull IsTrue IsFalse and IsUnkown [#9739](https://github.com/apache/arrow-datafusion/pull/9739) (Lordworms) -- fix: parallel parquet can underflow when max_record_batch_rows < execution.batch_size [#9737](https://github.com/apache/arrow-datafusion/pull/9737) (devinjdangelo) -- support format in options of COPY command [#9744](https://github.com/apache/arrow-datafusion/pull/9744) (tinfoil-knight) -- Move lower, octet_length to datafusion-functions [#9747](https://github.com/apache/arrow-datafusion/pull/9747) (Omega359) -- Fixed missing trim() in rust api [#9749](https://github.com/apache/arrow-datafusion/pull/9749) (Omega359) -- refactor: Extract `array_length`, `array_reverse` and `array_sort` functions from `functions-array` subcrate' s `kernels` and `udf` containers [#9751](https://github.com/apache/arrow-datafusion/pull/9751) (erenavsarogullari) -- refactor: Extract `array_empty` and `array_repeat` functions from `functions-array` subcrate' s `kernels` and `udf` containers [#9762](https://github.com/apache/arrow-datafusion/pull/9762) (erenavsarogullari) -- Minor: remove an outdated TODO in `TypeCoercion` [#9752](https://github.com/apache/arrow-datafusion/pull/9752) (jonahgao) -- refactor: Extract `array_resize` and `cardinality` functions from `functions-array` subcrate' s `kernels` and `udf` containers [#9766](https://github.com/apache/arrow-datafusion/pull/9766) (erenavsarogullari) -- fix: change placeholder errors from Internal to Plan [#9745](https://github.com/apache/arrow-datafusion/pull/9745) (erratic-pattern) -- Move levenshtein, uuid, overlay to datafusion-functions [#9760](https://github.com/apache/arrow-datafusion/pull/9760) (Omega359) -- improve null handling for to_char [#9689](https://github.com/apache/arrow-datafusion/pull/9689) (tinfoil-knight) -- Add Expr->String for ScalarFunction and InList [#9759](https://github.com/apache/arrow-datafusion/pull/9759) (yyy1000) -- Move repeat, replace, split_part to datafusion_functions [#9784](https://github.com/apache/arrow-datafusion/pull/9784) (Omega359) -- refactor: Extract `array_dims`, `array_ndims` and `flatten` functions from `functions-array` subcrate' s `kernels` and `udf` containers [#9786](https://github.com/apache/arrow-datafusion/pull/9786) (erenavsarogullari) -- Minor: Improve documentation about `ColumnarValues::values_to_array` [#9774](https://github.com/apache/arrow-datafusion/pull/9774) (alamb) -- Fix panic in `struct` function with mixed scalar/array arguments [#9775](https://github.com/apache/arrow-datafusion/pull/9775) (alamb) -- refactor: Apply minor refactorings to `functions-array` crate [#9788](https://github.com/apache/arrow-datafusion/pull/9788) (erenavsarogullari) -- Move bit_length and chr functions to datafusion_functions [#9782](https://github.com/apache/arrow-datafusion/pull/9782) (PsiACE) -- Support tencent cloud COS storage in `datafusion-cli` [#9734](https://github.com/apache/arrow-datafusion/pull/9734) (harveyyue) -- Make it easier to register configuration extension ... [#9781](https://github.com/apache/arrow-datafusion/pull/9781) (milenkovicm) -- Expr to Sql : Case [#9798](https://github.com/apache/arrow-datafusion/pull/9798) (yyy1000) -- feat: Between expr to sql string [#9803](https://github.com/apache/arrow-datafusion/pull/9803) (sebastian2296) -- feat: Expose `array_empty` and `list_empty` functions as alias of `empty` function [#9807](https://github.com/apache/arrow-datafusion/pull/9807) (erenavsarogullari) -- Support Expr `Like` to sql [#9805](https://github.com/apache/arrow-datafusion/pull/9805) (Weijun-H) -- feat: Not expr to string [#9802](https://github.com/apache/arrow-datafusion/pull/9802) (sebastian2296) -- [Minor]: Move some repetitive codes to functions(proto) [#9811](https://github.com/apache/arrow-datafusion/pull/9811) (mustafasrepo) -- Implement IGNORE NULLS for LAST_VALUE [#9801](https://github.com/apache/arrow-datafusion/pull/9801) (huaxingao) -- [MINOR]: Move some repetitive codes to functions [#9810](https://github.com/apache/arrow-datafusion/pull/9810) (mustafasrepo) -- fix: ensure mutual compatibility of the two input schemas from recursive CTEs [#9795](https://github.com/apache/arrow-datafusion/pull/9795) (jonahgao) -- Add support for constant expression evaluation in limit [#9790](https://github.com/apache/arrow-datafusion/pull/9790) (mustafasrepo) -- Projection Pushdown through user defined LogicalPlan nodes. [#9690](https://github.com/apache/arrow-datafusion/pull/9690) (mustafasrepo) -- chore(deps): update substrait requirement from 0.27.0 to 0.28.0 [#9809](https://github.com/apache/arrow-datafusion/pull/9809) (dependabot[bot]) -- Run TPC-H SF10 during PR benchmarks [#9822](https://github.com/apache/arrow-datafusion/pull/9822) (gruuya) -- Expose `parser` on DFParser to enable user controlled parsing [#9729](https://github.com/apache/arrow-datafusion/pull/9729) (tshauck) -- Disable parallel reading for gziped ndjson file [#9799](https://github.com/apache/arrow-datafusion/pull/9799) (Lordworms) -- Optimize to_timestamp (with format) (#9090) [#9833](https://github.com/apache/arrow-datafusion/pull/9833) (vojtechtoman) -- Create unicode module in datafusion/functions/src/unicode and unicode_expressions feature flag, move char_length function [#9825](https://github.com/apache/arrow-datafusion/pull/9825) (Omega359) -- [Minor] Update TCPDS tests, remove some #[ignore]d tests [#9829](https://github.com/apache/arrow-datafusion/pull/9829) (Dandandan) -- doc: Adding baseline benchmark example [#9827](https://github.com/apache/arrow-datafusion/pull/9827) (comphead) -- Add name method to execution plan [#9793](https://github.com/apache/arrow-datafusion/pull/9793) (matthewmturner) -- chore(deps-dev): bump express from 4.18.2 to 4.19.2 in /datafusion/wasmtest/datafusion-wasm-app [#9826](https://github.com/apache/arrow-datafusion/pull/9826) (dependabot[bot]) -- feat: pass SessionState not SessionConfig to FunctionFactory::create [#9837](https://github.com/apache/arrow-datafusion/pull/9837) (tshauck) +- refactor: Change `SchemaProvider::table` to return `Result` rather than `Option<..>` [#9307](https://github.com/apache/datafusion/pull/9307) (crepererum) +- fix write_partitioned_parquet_results test case bug [#9360](https://github.com/apache/datafusion/pull/9360) (guojidan) +- fix: use `JoinSet` to make spawned tasks cancel-safe [#9318](https://github.com/apache/datafusion/pull/9318) (DDtKey) +- Update nix requirement from 0.27.1 to 0.28.0 [#9344](https://github.com/apache/datafusion/pull/9344) (dependabot[bot]) +- Replace usages of internal_err with exec_err where appropriate [#9241](https://github.com/apache/datafusion/pull/9241) (Omega359) +- feat : Support for deregistering user defined functions [#9239](https://github.com/apache/datafusion/pull/9239) (mobley-trent) +- fix: nvl function's return type [#9357](https://github.com/apache/datafusion/pull/9357) (guojidan) +- refactor: move acos() to function crate [#9297](https://github.com/apache/datafusion/pull/9297) (SteveLauC) +- docs: put flatten in top fn list [#9376](https://github.com/apache/datafusion/pull/9376) (SteveLauC) +- Update documentation so list_to_string alias to point to array_to_string [#9374](https://github.com/apache/datafusion/pull/9374) (monkwire) +- feat: issue_9285: port builtin reg function into datafusion-function-\* (1/3 regexpmatch) [#9329](https://github.com/apache/datafusion/pull/9329) (Lordworms) +- Add test to verify issue #9161 [#9265](https://github.com/apache/datafusion/pull/9265) (jonahgao) +- refactor: fix error macros hygiene (always import `DataFusionError`) [#9366](https://github.com/apache/datafusion/pull/9366) (crepererum) +- feat: support for defining ARRAY columns in `CREATE TABLE` [#9381](https://github.com/apache/datafusion/pull/9381) (jonahgao) +- fix: panic in isnan() when no args are given [#9377](https://github.com/apache/datafusion/pull/9377) (SteveLauC) +- feat: support `unnest` in FROM clause [#9355](https://github.com/apache/datafusion/pull/9355) (jonahgao) +- feat: support nvl2 function [#9364](https://github.com/apache/datafusion/pull/9364) (guojidan) +- refactor: move asin() to function crate [#9379](https://github.com/apache/datafusion/pull/9379) (SteveLauC) +- fix: using test data sample for catalog example [#9372](https://github.com/apache/datafusion/pull/9372) (korowa) +- delete tail space, fix `error: unused import: DataFusionError` [#9386](https://github.com/apache/datafusion/pull/9386) (Tangruilin) +- Run cargo-fmt on `datafusion-functions/core` [#9367](https://github.com/apache/datafusion/pull/9367) (alamb) +- Cache common plan properties to eliminate recursive calls in physical plan [#9346](https://github.com/apache/datafusion/pull/9346) (mustafasrepo) +- Run cargo-fmt on all of `datafusion-functions` [#9390](https://github.com/apache/datafusion/pull/9390) (alamb) +- feat: issue #9224 substitute tlide in table path [#9259](https://github.com/apache/datafusion/pull/9259) (Lordworms) +- port range function and change gen_series logic [#9352](https://github.com/apache/datafusion/pull/9352) (Lordworms) +- [MINOR]: Generate physical plan, instead of logical plan in the bench test [#9383](https://github.com/apache/datafusion/pull/9383) (mustafasrepo) +- Add `to_date` function [#9019](https://github.com/apache/datafusion/pull/9019) (Tangruilin) +- Minor: clarify performance in docs for `ScalarUDF`, `ScalarUDAF` and `ScalarUDWF` [#9384](https://github.com/apache/datafusion/pull/9384) (alamb) +- feat: replace std Instant with wasm-compatible wrapper [#9189](https://github.com/apache/datafusion/pull/9189) (waynexia) +- Uplift keys/dependencies to use more workspace inheritance [#9293](https://github.com/apache/datafusion/pull/9293) (Jefffrey) +- Improve documentation for ExecutionPlanProperties, use consistent field name [#9389](https://github.com/apache/datafusion/pull/9389) (alamb) +- Doc: Workaround for Running cargo test locally without signficant memory [#9402](https://github.com/apache/datafusion/pull/9402) (devinjdangelo) +- feat: support `unnest` with additional columns [#9400](https://github.com/apache/datafusion/pull/9400) (jonahgao) +- Minor: improve the display name of `unnest` expressions [#9412](https://github.com/apache/datafusion/pull/9412) (jonahgao) +- Minor: Move function signature check to planning stage [#9401](https://github.com/apache/datafusion/pull/9401) (2010YOUY01) +- chore(deps): update substrait requirement from 0.24.0 to 0.25.1 [#9406](https://github.com/apache/datafusion/pull/9406) (dependabot[bot]) +- docs: update contributor guide (migration to sqllogictest is done) [#9408](https://github.com/apache/datafusion/pull/9408) (SteveLauC) +- Move the to_timestamp\* functions to datafusion-functions [#9388](https://github.com/apache/datafusion/pull/9388) (Omega359) +- Minor: Support LargeList List Range indexing and fix large list handling in ConstEvaluator [#9393](https://github.com/apache/datafusion/pull/9393) (jayzhan211) +- NEW Logo [#9385](https://github.com/apache/datafusion/pull/9385) (pinarbayata) +- Handle serde for ScalarUDF [#9395](https://github.com/apache/datafusion/pull/9395) (yyy1000) +- Minior: Add tests with `sqrt` with negative argument [#9426](https://github.com/apache/datafusion/pull/9426) (caicancai) +- Move SpawnedTask from datafusion_physical_plan to new `datafusion_common_runtime` crate [#9414](https://github.com/apache/datafusion/pull/9414) (mustafasrepo) +- Re-export datafusion-functions-array [#9433](https://github.com/apache/datafusion/pull/9433) (andygrove) +- Minor: Support LargeList for ListIndex [#9424](https://github.com/apache/datafusion/pull/9424) (PsiACE) +- move ArrayDims, ArrayNdims and Cardinality to datafusion-function-crate [#9425](https://github.com/apache/datafusion/pull/9425) (Weijun-H) +- refactor: make instr() an alias of strpos() [#9396](https://github.com/apache/datafusion/pull/9396) (SteveLauC) +- Add test case for invalid tz in timestamp literal [#9429](https://github.com/apache/datafusion/pull/9429) (MohamedAbdeen21) +- Minor: simplify call [#9434](https://github.com/apache/datafusion/pull/9434) (alamb) +- Support IGNORE NULLS for LEAD window function [#9419](https://github.com/apache/datafusion/pull/9419) (comphead) +- fix sqllogicaltest result [#9444](https://github.com/apache/datafusion/pull/9444) (jackwener) +- Minor: docs: rm duplicate words. [#9449](https://github.com/apache/datafusion/pull/9449) (my-vegetable-has-exploded) +- minor: fix cargo clippy some warning [#9442](https://github.com/apache/datafusion/pull/9442) (jackwener) +- port regexp_like function and port related tests [#9397](https://github.com/apache/datafusion/pull/9397) (Lordworms) +- fix: sort_batch function unsupported mixed types with list [#9410](https://github.com/apache/datafusion/pull/9410) (JasonLi-cn) +- refactor: add `join_unwind` to `SpawnedTask` [#9422](https://github.com/apache/datafusion/pull/9422) (DDtKey) +- Ignore null LEAD support for small batch sizes. [#9445](https://github.com/apache/datafusion/pull/9445) (mustafasrepo) +- fix: casting to ARRAY types failed [#9441](https://github.com/apache/datafusion/pull/9441) (jonahgao) +- fix: reading from partitioned `json` & `arrow` tables [#9431](https://github.com/apache/datafusion/pull/9431) (korowa) +- feat: Support `EscapedStringLiteral`, update sqlparser to `0.44.0` [#9268](https://github.com/apache/datafusion/pull/9268) (JasonLi-cn) +- Minor: fix LEAD test description [#9451](https://github.com/apache/datafusion/pull/9451) (comphead) +- Consolidate `TreeNode` transform and rewrite APIs [#8891](https://github.com/apache/datafusion/pull/8891) (peter-toth) +- Support `Date32` arguments for `generate_series` [#9420](https://github.com/apache/datafusion/pull/9420) (Lordworms) +- Minor: change doc for range [#9455](https://github.com/apache/datafusion/pull/9455) (Lordworms) +- doc: add missing function index in scalar_expression.md [#9462](https://github.com/apache/datafusion/pull/9462) (Weijun-H) +- build: Update bigdecimal version in `Cargo.toml` [#9471](https://github.com/apache/datafusion/pull/9471) (comphead) +- chore(deps): update base64 requirement from 0.21 to 0.22 [#9446](https://github.com/apache/datafusion/pull/9446) (dependabot[bot]) +- Port regexp_replace functions and related tests [#9454](https://github.com/apache/datafusion/pull/9454) (Lordworms) +- Update contributor guide with updated scalar function howto [#9438](https://github.com/apache/datafusion/pull/9438) (Omega359) +- feat: add support for fixed list wildcard in type signature [#9312](https://github.com/apache/datafusion/pull/9312) (universalmind303) +- Add a `ScalarUDFImpl::simplfy()` API, move `SimplifyInfo` et al to datafusion_expr [#9304](https://github.com/apache/datafusion/pull/9304) (jayzhan211) +- Implement IGNORE NULLS for FIRST_VALUE [#9411](https://github.com/apache/datafusion/pull/9411) (huaxingao) +- Add plugable handler for `CREATE FUNCTION` [#9333](https://github.com/apache/datafusion/pull/9333) (milenkovicm) +- Enable configurable display of partition sizes in the explain statement [#9474](https://github.com/apache/datafusion/pull/9474) (jayzhan211) +- Reduce casts for LEAD/LAG [#9468](https://github.com/apache/datafusion/pull/9468) (comphead) +- [CI build] fix chrono suggestions [#9486](https://github.com/apache/datafusion/pull/9486) (comphead) +- Make regex dependency optional in datafusion-functions, add CI checks for function packages [#9473](https://github.com/apache/datafusion/pull/9473) (alamb) +- fix: coalesce function should return correct data type [#9459](https://github.com/apache/datafusion/pull/9459) (viirya) +- LEAD/LAG calculate default value once [#9485](https://github.com/apache/datafusion/pull/9485) (comphead) +- chore: simplify the return type of `validate_data_types()` [#9491](https://github.com/apache/datafusion/pull/9491) (waynexia) +- minor: use arrow-rs casting from Float to Timestamp [#9500](https://github.com/apache/datafusion/pull/9500) (comphead) +- chore(deps): update substrait requirement from 0.25.1 to 0.27.0 [#9502](https://github.com/apache/datafusion/pull/9502) (dependabot[bot]) +- fix: `generate_series` and `range` panic on edge cases [#9503](https://github.com/apache/datafusion/pull/9503) (jonahgao) +- Fix undeterministic behaviour of schema nullability of lag window query [#9508](https://github.com/apache/datafusion/pull/9508) (mustafasrepo) +- Add `to_unixtime` function [#9077](https://github.com/apache/datafusion/pull/9077) (Tangruilin) +- Minor: fixed transformed state in UDF Simplify [#9484](https://github.com/apache/datafusion/pull/9484) (alamb) +- test: port strpos test in physical_expr/src/functions to sqllogictest [#9439](https://github.com/apache/datafusion/pull/9439) (SteveLauC) +- Port ArrayHas family to `functions-array` [#9496](https://github.com/apache/datafusion/pull/9496) (jayzhan211) +- port array_empty and array_length to datafusion-function-array crate [#9510](https://github.com/apache/datafusion/pull/9510) (Weijun-H) +- fix: `substr_index` not handling negative occurrence correctly [#9475](https://github.com/apache/datafusion/pull/9475) (jonahgao) +- [minor] extract collect file statistics method and add doc [#9490](https://github.com/apache/datafusion/pull/9490) (Ted-Jiang) +- test: sqllogictests for multiple tables join [#9480](https://github.com/apache/datafusion/pull/9480) (korowa) +- Add support for ignore nulls for LEAD, LAG in WindowAggExec [#9498](https://github.com/apache/datafusion/pull/9498) (Lordworms) +- Minior: Improve log expr description [#9516](https://github.com/apache/datafusion/pull/9516) (caicancai) +- port flatten to datafusion-function-array [#9523](https://github.com/apache/datafusion/pull/9523) (Weijun-H) +- feat: Add projection to HashJoinExec. [#9236](https://github.com/apache/datafusion/pull/9236) (my-vegetable-has-exploded) +- Add example for `FunctionFactory` [#9482](https://github.com/apache/datafusion/pull/9482) (milenkovicm) +- Move date_part, date_trunc, date_bin functions to datafusion-functions [#9435](https://github.com/apache/datafusion/pull/9435) (Omega359) +- fix: support two argument TRIM [#9521](https://github.com/apache/datafusion/pull/9521) (tshauck) +- Remove physical expr of ListIndex and ListRange, convert to `array_element` and `array_slice` functions [#9492](https://github.com/apache/datafusion/pull/9492) (jayzhan211) +- feat: function name hints for UDFs [#9407](https://github.com/apache/datafusion/pull/9407) (SteveLauC) +- Minor: Improve documentation for registering `AnalyzerRule` [#9520](https://github.com/apache/datafusion/pull/9520) (alamb) +- Extend argument types for udf `return_type_from_exprs` [#9522](https://github.com/apache/datafusion/pull/9522) (jayzhan211) +- move make_array array_append array_prepend array_concat function to datafusion-functions-array crate [#9504](https://github.com/apache/datafusion/pull/9504) (guojidan) +- Port `StringToArray` to `function-arrays` subcrate [#9543](https://github.com/apache/datafusion/pull/9543) (erenavsarogullari) +- Minor: remove `..` pattern matching in sql planner [#9531](https://github.com/apache/datafusion/pull/9531) (alamb) +- Minor: Fix document Interval syntax [#9542](https://github.com/apache/datafusion/pull/9542) (yyy1000) +- Port `struct` to datafusion-functions [#9546](https://github.com/apache/datafusion/pull/9546) (yyy1000) +- UDAF and UDWF support aliases [#9489](https://github.com/apache/datafusion/pull/9489) (lewiszlw) +- docs: fix extraneous char in array functions table of contents [#9560](https://github.com/apache/datafusion/pull/9560) (tshauck) +- [MINOR]: Fix undeterministic test [#9559](https://github.com/apache/datafusion/pull/9559) (mustafasrepo) +- Port `arrow_typeof` to datafusion-function [#9524](https://github.com/apache/datafusion/pull/9524) (yyy1000) +- feat: Introduce convert Expr to SQL string API and basic feature [#9517](https://github.com/apache/datafusion/pull/9517) (backkem) +- Port `ArraySort` to `function-arrays` subcrate [#9551](https://github.com/apache/datafusion/pull/9551) (erenavsarogullari) +- refactor: unify some plan optimization in CommonSubexprEliminate [#9556](https://github.com/apache/datafusion/pull/9556) (jackwener) +- Port `ArrayDistinct` to `functions-array` subcrate [#9549](https://github.com/apache/datafusion/pull/9549) (erenavsarogullari) +- Minor: add a sql_planner benchmarks to reflecte select many field on a huge table [#9536](https://github.com/apache/datafusion/pull/9536) (haohuaijin) +- Support IGNORE NULLS for FIRST/LAST window function [#9470](https://github.com/apache/datafusion/pull/9470) (huaxingao) +- Systematic Configuration in 'Create External Table' and 'Copy To' Options [#9382](https://github.com/apache/datafusion/pull/9382) (metesynnada) +- fix: incorrect null handling in `range` and `generate_series` [#9574](https://github.com/apache/datafusion/pull/9574) (jonahgao) +- Update README.md [#9572](https://github.com/apache/datafusion/pull/9572) (Abdullahsab3) +- Port tan, tanh to datafusion-functions [#9535](https://github.com/apache/datafusion/pull/9535) (ongchi) +- feat(9493): provide access to FileMetaData for files written with ParquetSink [#9548](https://github.com/apache/datafusion/pull/9548) (wiedld) +- Export datafusion-functions UDFs publically [#9585](https://github.com/apache/datafusion/pull/9585) (alamb) +- Update the comment and Add a check [#9571](https://github.com/apache/datafusion/pull/9571) (colommar) +- Port `ArrayRepeat` to `functions-array` subcrate [#9568](https://github.com/apache/datafusion/pull/9568) (erenavsarogullari) +- Fix ApproxPercentileAccumulator on zero values [#9582](https://github.com/apache/datafusion/pull/9582) (Dandandan) +- Add `FunctionRewrite` API, Move Array specific rewrites to `datafusion_functions_array` [#9583](https://github.com/apache/datafusion/pull/9583) (alamb) +- Move from_unixtime, now, current_date, current_time functions to datafusion-functions [#9537](https://github.com/apache/datafusion/pull/9537) (Omega359) +- minor: update Debug trait impl for WindowsFrame [#9587](https://github.com/apache/datafusion/pull/9587) (comphead) +- Initial support LogicalPlan to SQL String [#9596](https://github.com/apache/datafusion/pull/9596) (backkem) +- refactor: use a common macro to define math UDFs [#9598](https://github.com/apache/datafusion/pull/9598) (jonahgao) +- Move all `crypto` related functions to `datafusion-functions` [#9590](https://github.com/apache/datafusion/pull/9590) (Lordworms) +- Remove physical expr of NamedStructField, convert to `get_field` function call [#9563](https://github.com/apache/datafusion/pull/9563) (yyy1000) +- Add `/benchmark` github command to comparison benchmark between base and pr commit [#9461](https://github.com/apache/datafusion/pull/9461) (gruuya) +- support unnest as subexpression [#9592](https://github.com/apache/datafusion/pull/9592) (YjyJeff) +- feat: implement more expr_to_sql functionality [#9578](https://github.com/apache/datafusion/pull/9578) (devinjdangelo) +- Port `ArrayResize` to `functions-array` subcrate [#9570](https://github.com/apache/datafusion/pull/9570) (erenavsarogullari) +- Move make_date, to_char to datafusion-functions [#9601](https://github.com/apache/datafusion/pull/9601) (Omega359) +- Fix to_timestamp benchmark [#9608](https://github.com/apache/datafusion/pull/9608) (Omega359) +- feat: implement aggregation and subquery plans to SQL [#9606](https://github.com/apache/datafusion/pull/9606) (devinjdangelo) +- Port ArrayElem/Slice/PopFront/Back into `functions-array` [#9615](https://github.com/apache/datafusion/pull/9615) (jayzhan211) +- Minor: Remove datafusion-functions-array dependency from datafusion-optimizer [#9621](https://github.com/apache/datafusion/pull/9621) (alamb) +- Enable TTY during bench data generation [#9626](https://github.com/apache/datafusion/pull/9626) (gruuya) +- Remove constant expressions from SortExprs in the SortExec [#9618](https://github.com/apache/datafusion/pull/9618) (mustafasrepo) +- Try fixing missing results name in the benchmark step [#9632](https://github.com/apache/datafusion/pull/9632) (gruuya) +- feat: track memory usage for recursive CTE, enable recursive CTEs by default [#9619](https://github.com/apache/datafusion/pull/9619) (jonahgao) +- doc: Add missing doc link [#9631](https://github.com/apache/datafusion/pull/9631) (Weijun-H) +- Add explicit move of PR bench results if they were placed in HEAD dir [#9636](https://github.com/apache/datafusion/pull/9636) (gruuya) +- Add `array_reverse` function to datafusion-function-\* crate [#9630](https://github.com/apache/datafusion/pull/9630) (Weijun-H) +- Move parts of `InListSimplifier` simplify rules to `Simplifier` [#9628](https://github.com/apache/datafusion/pull/9628) (jayzhan211) +- Port Array Union and Intersect to `functions-array` [#9629](https://github.com/apache/datafusion/pull/9629) (jayzhan211) +- Port `ArrayPosition` and `ArrayPositions` to `functions-array` subcrate [#9617](https://github.com/apache/datafusion/pull/9617) (erenavsarogullari) +- Optimize make_date (#9089) [#9600](https://github.com/apache/datafusion/pull/9600) (vojtechtoman) +- Support AT TIME ZONE clause [#9647](https://github.com/apache/datafusion/pull/9647) (tinfoil-knight) +- Window Linear Mode use smaller buffers [#9597](https://github.com/apache/datafusion/pull/9597) (mustafasrepo) +- Port `ArrayExcept` to `functions-array` subcrate [#9634](https://github.com/apache/datafusion/pull/9634) (erenavsarogullari) +- chore: improve array expression doc and clean up array_expression.rs [#9650](https://github.com/apache/datafusion/pull/9650) (Weijun-H) +- Minor: remove clone in `exprlist_to_fields` [#9657](https://github.com/apache/datafusion/pull/9657) (jayzhan211) +- Port `ArrayRemove`, `ArrayRemoveN`, `ArrayRemoveAll` to `functions-array` subcrate [#9656](https://github.com/apache/datafusion/pull/9656) (erenavsarogullari) +- Minor: Remove redundant dependencies from `datafusion-functions/Cargo.toml` [#9622](https://github.com/apache/datafusion/pull/9622) (alamb) +- Support IGNORE NULLS for NTH_VALUE window function [#9625](https://github.com/apache/datafusion/pull/9625) (huaxingao) +- Improve Robustness of Unparser Testing and Implementation [#9623](https://github.com/apache/datafusion/pull/9623) (devinjdangelo) +- Adding Constant Check for FilterExec [#9649](https://github.com/apache/datafusion/pull/9649) (Lordworms) +- chore(deps-dev): bump follow-redirects from 1.15.4 to 1.15.6 in /datafusion/wasmtest/datafusion-wasm-app [#9609](https://github.com/apache/datafusion/pull/9609) (dependabot[bot]) +- move array_replace family functions to datafusion-function-array crate [#9651](https://github.com/apache/datafusion/pull/9651) (Weijun-H) +- chore: remove repetitive word `the the` --> `the` in docs / comments [#9673](https://github.com/apache/datafusion/pull/9673) (InventiveCoder) +- Update example-usage.md to remove reference to simd and rust nightly. [#9677](https://github.com/apache/datafusion/pull/9677) (Omega359) +- [MINOR]: Remove some `.unwrap`s from nth_value.rs file [#9674](https://github.com/apache/datafusion/pull/9674) (mustafasrepo) +- minor: Remove deprecated methods [#9627](https://github.com/apache/datafusion/pull/9627) (comphead) +- Migrate `arrow_cast` to a UDF [#9610](https://github.com/apache/datafusion/pull/9610) (alamb) +- parquet: Add row*groups_matched*{statistics,bloom_filter} statistics [#9640](https://github.com/apache/datafusion/pull/9640) (progval) +- Make COPY TO align with CREATE EXTERNAL TABLE [#9604](https://github.com/apache/datafusion/pull/9604) (metesynnada) +- Support "A column is known to be entirely NULL" in `PruningPredicate` [#9223](https://github.com/apache/datafusion/pull/9223) (appletreeisyellow) +- Suppress self update for windows CI runner [#9661](https://github.com/apache/datafusion/pull/9661) (jayzhan211) +- add schema to SQL ast builder [#9624](https://github.com/apache/datafusion/pull/9624) (sardination) +- core/tests/parquet/row_group_pruning.rs: Add tests for strings [#9642](https://github.com/apache/datafusion/pull/9642) (progval) +- Fix incorrect results with multiple `COUNT(DISTINCT..)` aggregates on dictionaries [#9679](https://github.com/apache/datafusion/pull/9679) (alamb) +- parquet: Add support for Bloom filters on binary columns [#9644](https://github.com/apache/datafusion/pull/9644) (progval) +- Update Arrow/Parquet to `51.0.0`, tonic to `0.11` [#9613](https://github.com/apache/datafusion/pull/9613) (tustvold) +- Move inlist rule to expr_simplifier [#9692](https://github.com/apache/datafusion/pull/9692) (jayzhan211) +- Support Serde for ScalarUDF in Physical Expressions [#9436](https://github.com/apache/datafusion/pull/9436) (yyy1000) +- Support Union types in `ScalarValue` [#9683](https://github.com/apache/datafusion/pull/9683) (avantgardnerio) +- parquet: Add support for row group pruning on FixedSizeBinary [#9646](https://github.com/apache/datafusion/pull/9646) (progval) +- Minor: Improve documentation for `LogicalPlan::expressions` [#9698](https://github.com/apache/datafusion/pull/9698) (alamb) +- Make builtin window function output datatype to be derived from schema [#9686](https://github.com/apache/datafusion/pull/9686) (comphead) +- refactor: Extract `array_to_string` and `string_to_array` from `functions-array` subcrate' s `kernels` and `udf` containers [#9704](https://github.com/apache/datafusion/pull/9704) (erenavsarogullari) +- Add Minimum Supported Rust Version policy to docs [#9681](https://github.com/apache/datafusion/pull/9681) (alamb) +- doc: Add DataFusion profiling documentation for MacOS [#9711](https://github.com/apache/datafusion/pull/9711) (comphead) +- Minor: add ticket reference to commented out test [#9715](https://github.com/apache/datafusion/pull/9715) (alamb) +- Minor: Rename path from `common_runtime` to `common-runtime` [#9717](https://github.com/apache/datafusion/pull/9717) (alamb) +- Use object_store:BufWriter to replace put_multipart [#9648](https://github.com/apache/datafusion/pull/9648) (yyy1000) +- Fix COPY TO failing on passing format options through CLI [#9709](https://github.com/apache/datafusion/pull/9709) (tinfoil-knight) +- fix: recursive cte hangs on joins [#9687](https://github.com/apache/datafusion/pull/9687) (jonahgao) +- Move `starts_with`, `to_hex`,` trim`, `upper` to datafusion-functions (and add string_expressions) [#9541](https://github.com/apache/datafusion/pull/9541) (Tangruilin) +- Support for `extract(x from time)` / `date_part` from time types [#8693](https://github.com/apache/datafusion/pull/8693) (Jefffrey) +- doc: Updated known users list and usage dependency description [#9718](https://github.com/apache/datafusion/pull/9718) (comphead) +- Minor: improve documentation for `CommonSubexprEliminate` [#9700](https://github.com/apache/datafusion/pull/9700) (alamb) +- build: modify code to comply with latest clippy requirement [#9725](https://github.com/apache/datafusion/pull/9725) (comphead) +- Minor: return internal error rather than panic on unexpected error in COUNT DISTINCT [#9712](https://github.com/apache/datafusion/pull/9712) (alamb) +- fix(9678): short circuiting prevented population of visited stack, for common subexpr elimination optimization [#9685](https://github.com/apache/datafusion/pull/9685) (wiedld) +- perf: improve to_field performance [#9722](https://github.com/apache/datafusion/pull/9722) (haohuaijin) +- Minor: Run ScalarValue size test on aarch again [#9728](https://github.com/apache/datafusion/pull/9728) (alamb) +- Move trim functions (btrim, ltrim, rtrim) to datafusion_functions, make expr_fn API consistent [#9730](https://github.com/apache/datafusion/pull/9730) (Omega359) +- make format prefix optional for format options in COPY [#9723](https://github.com/apache/datafusion/pull/9723) (tinfoil-knight) +- refactor: Extract `range` and `gen_series` functions from `functions-array` subcrate' s `kernels` and `udf` containers [#9720](https://github.com/apache/datafusion/pull/9720) (erenavsarogullari) +- Move ascii function to datafusion_functions [#9740](https://github.com/apache/datafusion/pull/9740) (PsiACE) +- adding expr to string for IsNotNull IsTrue IsFalse and IsUnkown [#9739](https://github.com/apache/datafusion/pull/9739) (Lordworms) +- fix: parallel parquet can underflow when max_record_batch_rows < execution.batch_size [#9737](https://github.com/apache/datafusion/pull/9737) (devinjdangelo) +- support format in options of COPY command [#9744](https://github.com/apache/datafusion/pull/9744) (tinfoil-knight) +- Move lower, octet_length to datafusion-functions [#9747](https://github.com/apache/datafusion/pull/9747) (Omega359) +- Fixed missing trim() in rust api [#9749](https://github.com/apache/datafusion/pull/9749) (Omega359) +- refactor: Extract `array_length`, `array_reverse` and `array_sort` functions from `functions-array` subcrate' s `kernels` and `udf` containers [#9751](https://github.com/apache/datafusion/pull/9751) (erenavsarogullari) +- refactor: Extract `array_empty` and `array_repeat` functions from `functions-array` subcrate' s `kernels` and `udf` containers [#9762](https://github.com/apache/datafusion/pull/9762) (erenavsarogullari) +- Minor: remove an outdated TODO in `TypeCoercion` [#9752](https://github.com/apache/datafusion/pull/9752) (jonahgao) +- refactor: Extract `array_resize` and `cardinality` functions from `functions-array` subcrate' s `kernels` and `udf` containers [#9766](https://github.com/apache/datafusion/pull/9766) (erenavsarogullari) +- fix: change placeholder errors from Internal to Plan [#9745](https://github.com/apache/datafusion/pull/9745) (erratic-pattern) +- Move levenshtein, uuid, overlay to datafusion-functions [#9760](https://github.com/apache/datafusion/pull/9760) (Omega359) +- improve null handling for to_char [#9689](https://github.com/apache/datafusion/pull/9689) (tinfoil-knight) +- Add Expr->String for ScalarFunction and InList [#9759](https://github.com/apache/datafusion/pull/9759) (yyy1000) +- Move repeat, replace, split_part to datafusion_functions [#9784](https://github.com/apache/datafusion/pull/9784) (Omega359) +- refactor: Extract `array_dims`, `array_ndims` and `flatten` functions from `functions-array` subcrate' s `kernels` and `udf` containers [#9786](https://github.com/apache/datafusion/pull/9786) (erenavsarogullari) +- Minor: Improve documentation about `ColumnarValues::values_to_array` [#9774](https://github.com/apache/datafusion/pull/9774) (alamb) +- Fix panic in `struct` function with mixed scalar/array arguments [#9775](https://github.com/apache/datafusion/pull/9775) (alamb) +- refactor: Apply minor refactorings to `functions-array` crate [#9788](https://github.com/apache/datafusion/pull/9788) (erenavsarogullari) +- Move bit_length and chr functions to datafusion_functions [#9782](https://github.com/apache/datafusion/pull/9782) (PsiACE) +- Support tencent cloud COS storage in `datafusion-cli` [#9734](https://github.com/apache/datafusion/pull/9734) (harveyyue) +- Make it easier to register configuration extension ... [#9781](https://github.com/apache/datafusion/pull/9781) (milenkovicm) +- Expr to Sql : Case [#9798](https://github.com/apache/datafusion/pull/9798) (yyy1000) +- feat: Between expr to sql string [#9803](https://github.com/apache/datafusion/pull/9803) (sebastian2296) +- feat: Expose `array_empty` and `list_empty` functions as alias of `empty` function [#9807](https://github.com/apache/datafusion/pull/9807) (erenavsarogullari) +- Support Expr `Like` to sql [#9805](https://github.com/apache/datafusion/pull/9805) (Weijun-H) +- feat: Not expr to string [#9802](https://github.com/apache/datafusion/pull/9802) (sebastian2296) +- [Minor]: Move some repetitive codes to functions(proto) [#9811](https://github.com/apache/datafusion/pull/9811) (mustafasrepo) +- Implement IGNORE NULLS for LAST_VALUE [#9801](https://github.com/apache/datafusion/pull/9801) (huaxingao) +- [MINOR]: Move some repetitive codes to functions [#9810](https://github.com/apache/datafusion/pull/9810) (mustafasrepo) +- fix: ensure mutual compatibility of the two input schemas from recursive CTEs [#9795](https://github.com/apache/datafusion/pull/9795) (jonahgao) +- Add support for constant expression evaluation in limit [#9790](https://github.com/apache/datafusion/pull/9790) (mustafasrepo) +- Projection Pushdown through user defined LogicalPlan nodes. [#9690](https://github.com/apache/datafusion/pull/9690) (mustafasrepo) +- chore(deps): update substrait requirement from 0.27.0 to 0.28.0 [#9809](https://github.com/apache/datafusion/pull/9809) (dependabot[bot]) +- Run TPC-H SF10 during PR benchmarks [#9822](https://github.com/apache/datafusion/pull/9822) (gruuya) +- Expose `parser` on DFParser to enable user controlled parsing [#9729](https://github.com/apache/datafusion/pull/9729) (tshauck) +- Disable parallel reading for gziped ndjson file [#9799](https://github.com/apache/datafusion/pull/9799) (Lordworms) +- Optimize to_timestamp (with format) (#9090) [#9833](https://github.com/apache/datafusion/pull/9833) (vojtechtoman) +- Create unicode module in datafusion/functions/src/unicode and unicode_expressions feature flag, move char_length function [#9825](https://github.com/apache/datafusion/pull/9825) (Omega359) +- [Minor] Update TCPDS tests, remove some #[ignore]d tests [#9829](https://github.com/apache/datafusion/pull/9829) (Dandandan) +- doc: Adding baseline benchmark example [#9827](https://github.com/apache/datafusion/pull/9827) (comphead) +- Add name method to execution plan [#9793](https://github.com/apache/datafusion/pull/9793) (matthewmturner) +- chore(deps-dev): bump express from 4.18.2 to 4.19.2 in /datafusion/wasmtest/datafusion-wasm-app [#9826](https://github.com/apache/datafusion/pull/9826) (dependabot[bot]) +- feat: pass SessionState not SessionConfig to FunctionFactory::create [#9837](https://github.com/apache/datafusion/pull/9837) (tshauck) diff --git a/dev/changelog/37.1.0.md b/dev/changelog/37.1.0.md index a8c6647e0eda..a4376d6cdbf4 100644 --- a/dev/changelog/37.1.0.md +++ b/dev/changelog/37.1.0.md @@ -17,14 +17,14 @@ under the License. --> -## [37.1.0](https://github.com/apache/arrow-datafusion/tree/37.1.0) (2024-04-18) +## [37.1.0](https://github.com/apache/datafusion/tree/37.1.0) (2024-04-18) -[Full Changelog](https://github.com/apache/arrow-datafusion/compare/37.0.0...37.1.0) +[Full Changelog](https://github.com/apache/datafusion/compare/37.0.0...37.1.0) **Merged pull requests:** -- Backport to 37: fix `NamedStructField` should be rewritten in OperatorToFunction in subquery regression [#10103](https://github.com/apache/arrow-datafusion/pull/10103) (alamb) -- Backport to 37: fix Coercion stopped working for coalesce on a dictionary column [#10104](https://github.com/apache/arrow-datafusion/pull/10104) (alamb) -- Backport to 37: group by count distinct doesn't work for timestamps with time zone [#10105](https://github.com/apache/arrow-datafusion/pull/10105) (alamb) -- Backport to 37: make udf structs public [#10107](https://github.com/apache/arrow-datafusion/pull/10107) (alamb) -- Backport to 37: Reduce DataFrame stack size and fix large futures warnings [#10123](https://github.com/apache/arrow-datafusion/pull/10123) (sergiimk) +- Backport to 37: fix `NamedStructField` should be rewritten in OperatorToFunction in subquery regression [#10103](https://github.com/apache/datafusion/pull/10103) (alamb) +- Backport to 37: fix Coercion stopped working for coalesce on a dictionary column [#10104](https://github.com/apache/datafusion/pull/10104) (alamb) +- Backport to 37: group by count distinct doesn't work for timestamps with time zone [#10105](https://github.com/apache/datafusion/pull/10105) (alamb) +- Backport to 37: make udf structs public [#10107](https://github.com/apache/datafusion/pull/10107) (alamb) +- Backport to 37: Reduce DataFrame stack size and fix large futures warnings [#10123](https://github.com/apache/datafusion/pull/10123) (sergiimk) diff --git a/dev/changelog/5.0.0.md b/dev/changelog/5.0.0.md index fe8535929b34..2681d522c601 100644 --- a/dev/changelog/5.0.0.md +++ b/dev/changelog/5.0.0.md @@ -17,294 +17,294 @@ under the License. --> -## [5.0.0](https://github.com/apache/arrow-datafusion/tree/5.0.0) (2021-08-10) +## [5.0.0](https://github.com/apache/datafusion/tree/5.0.0) (2021-08-10) -[Full Changelog](https://github.com/apache/arrow-datafusion/compare/4.0.0...5.0.0) +[Full Changelog](https://github.com/apache/datafusion/compare/4.0.0...5.0.0) **Breaking changes:** -- Box ScalarValue:Lists, reduce size by half size [\#788](https://github.com/apache/arrow-datafusion/pull/788) ([alamb](https://github.com/alamb)) -- JOIN conditions are order dependent [\#778](https://github.com/apache/arrow-datafusion/pull/778) ([seddonm1](https://github.com/seddonm1)) -- Show the result of all optimizer passes in EXPLAIN VERBOSE [\#759](https://github.com/apache/arrow-datafusion/pull/759) ([alamb](https://github.com/alamb)) -- \#723 Datafusion add option in ExecutionConfig to enable/disable parquet pruning [\#749](https://github.com/apache/arrow-datafusion/pull/749) ([lvheyang](https://github.com/lvheyang)) -- Update API for extension planning to include logical plan [\#643](https://github.com/apache/arrow-datafusion/pull/643) ([alamb](https://github.com/alamb)) -- Rename MergeExec to CoalescePartitionsExec [\#635](https://github.com/apache/arrow-datafusion/pull/635) ([andygrove](https://github.com/andygrove)) -- fix 593, reduce cloning by taking ownership in logical planner's `from` fn [\#610](https://github.com/apache/arrow-datafusion/pull/610) ([Jimexist](https://github.com/Jimexist)) -- fix join column handling logic for `On` and `Using` constraints [\#605](https://github.com/apache/arrow-datafusion/pull/605) ([houqp](https://github.com/houqp)) -- Rewrite pruning logic in terms of PruningStatistics using Array trait \(option 2\) [\#426](https://github.com/apache/arrow-datafusion/pull/426) ([alamb](https://github.com/alamb)) -- Support reading from NdJson formatted data sources [\#404](https://github.com/apache/arrow-datafusion/pull/404) ([heymind](https://github.com/heymind)) -- Add metrics to RepartitionExec [\#398](https://github.com/apache/arrow-datafusion/pull/398) ([andygrove](https://github.com/andygrove)) -- Use 4.x arrow-rs from crates.io rather than git sha [\#395](https://github.com/apache/arrow-datafusion/pull/395) ([alamb](https://github.com/alamb)) -- Return Vec\ from PredicateBuilder rather than an `Fn` [\#370](https://github.com/apache/arrow-datafusion/pull/370) ([alamb](https://github.com/alamb)) -- Refactor: move RowGroupPredicateBuilder into its own module, rename to PruningPredicateBuilder [\#365](https://github.com/apache/arrow-datafusion/pull/365) ([alamb](https://github.com/alamb)) -- \[Datafusion\] NOW\(\) function support [\#288](https://github.com/apache/arrow-datafusion/pull/288) ([msathis](https://github.com/msathis)) -- Implement select distinct [\#262](https://github.com/apache/arrow-datafusion/pull/262) ([Dandandan](https://github.com/Dandandan)) -- Refactor datafusion/src/physical_plan/common.rs build_file_list to take less param and reuse code [\#253](https://github.com/apache/arrow-datafusion/pull/253) ([Jimexist](https://github.com/Jimexist)) -- Support qualified columns in queries [\#55](https://github.com/apache/arrow-datafusion/pull/55) ([houqp](https://github.com/houqp)) -- Read CSV format text from stdin or memory [\#54](https://github.com/apache/arrow-datafusion/pull/54) ([heymind](https://github.com/heymind)) -- Use atomics for SQLMetric implementation, remove unused name field [\#25](https://github.com/apache/arrow-datafusion/pull/25) ([returnString](https://github.com/returnString)) +- Box ScalarValue:Lists, reduce size by half size [\#788](https://github.com/apache/datafusion/pull/788) ([alamb](https://github.com/alamb)) +- JOIN conditions are order dependent [\#778](https://github.com/apache/datafusion/pull/778) ([seddonm1](https://github.com/seddonm1)) +- Show the result of all optimizer passes in EXPLAIN VERBOSE [\#759](https://github.com/apache/datafusion/pull/759) ([alamb](https://github.com/alamb)) +- \#723 Datafusion add option in ExecutionConfig to enable/disable parquet pruning [\#749](https://github.com/apache/datafusion/pull/749) ([lvheyang](https://github.com/lvheyang)) +- Update API for extension planning to include logical plan [\#643](https://github.com/apache/datafusion/pull/643) ([alamb](https://github.com/alamb)) +- Rename MergeExec to CoalescePartitionsExec [\#635](https://github.com/apache/datafusion/pull/635) ([andygrove](https://github.com/andygrove)) +- fix 593, reduce cloning by taking ownership in logical planner's `from` fn [\#610](https://github.com/apache/datafusion/pull/610) ([Jimexist](https://github.com/Jimexist)) +- fix join column handling logic for `On` and `Using` constraints [\#605](https://github.com/apache/datafusion/pull/605) ([houqp](https://github.com/houqp)) +- Rewrite pruning logic in terms of PruningStatistics using Array trait \(option 2\) [\#426](https://github.com/apache/datafusion/pull/426) ([alamb](https://github.com/alamb)) +- Support reading from NdJson formatted data sources [\#404](https://github.com/apache/datafusion/pull/404) ([heymind](https://github.com/heymind)) +- Add metrics to RepartitionExec [\#398](https://github.com/apache/datafusion/pull/398) ([andygrove](https://github.com/andygrove)) +- Use 4.x arrow-rs from crates.io rather than git sha [\#395](https://github.com/apache/datafusion/pull/395) ([alamb](https://github.com/alamb)) +- Return Vec\ from PredicateBuilder rather than an `Fn` [\#370](https://github.com/apache/datafusion/pull/370) ([alamb](https://github.com/alamb)) +- Refactor: move RowGroupPredicateBuilder into its own module, rename to PruningPredicateBuilder [\#365](https://github.com/apache/datafusion/pull/365) ([alamb](https://github.com/alamb)) +- \[Datafusion\] NOW\(\) function support [\#288](https://github.com/apache/datafusion/pull/288) ([msathis](https://github.com/msathis)) +- Implement select distinct [\#262](https://github.com/apache/datafusion/pull/262) ([Dandandan](https://github.com/Dandandan)) +- Refactor datafusion/src/physical_plan/common.rs build_file_list to take less param and reuse code [\#253](https://github.com/apache/datafusion/pull/253) ([Jimexist](https://github.com/Jimexist)) +- Support qualified columns in queries [\#55](https://github.com/apache/datafusion/pull/55) ([houqp](https://github.com/houqp)) +- Read CSV format text from stdin or memory [\#54](https://github.com/apache/datafusion/pull/54) ([heymind](https://github.com/heymind)) +- Use atomics for SQLMetric implementation, remove unused name field [\#25](https://github.com/apache/datafusion/pull/25) ([returnString](https://github.com/returnString)) **Implemented enhancements:** -- Allow extension nodes to correctly plan physical expressions with relations [\#642](https://github.com/apache/arrow-datafusion/issues/642) -- Filters aren't passed down to table scans in a union [\#557](https://github.com/apache/arrow-datafusion/issues/557) -- Support pruning for `boolean` columns [\#490](https://github.com/apache/arrow-datafusion/issues/490) -- Implement SQLMetrics for RepartitionExec [\#397](https://github.com/apache/arrow-datafusion/issues/397) -- DataFusion benchmarks should show executed plan with metrics after query completes [\#396](https://github.com/apache/arrow-datafusion/issues/396) -- Use published versions of arrow rather than github shas [\#393](https://github.com/apache/arrow-datafusion/issues/393) -- Add Compare to GroupByScalar [\#364](https://github.com/apache/arrow-datafusion/issues/364) -- Reusable "row group pruning" logic [\#363](https://github.com/apache/arrow-datafusion/issues/363) -- Add an Order Preserving merge operator [\#362](https://github.com/apache/arrow-datafusion/issues/362) -- Implement Postgres compatible `now()` function [\#251](https://github.com/apache/arrow-datafusion/issues/251) -- COUNT DISTINCT does not support dictionary types [\#249](https://github.com/apache/arrow-datafusion/issues/249) -- Use standard make_null_array for CASE [\#222](https://github.com/apache/arrow-datafusion/issues/222) -- Implement date_trunc\(\) function [\#203](https://github.com/apache/arrow-datafusion/issues/203) -- COUNT DISTINCT does not support for `Float64` [\#199](https://github.com/apache/arrow-datafusion/issues/199) -- Update SQLMetric to use atomics rather than a Mutex [\#30](https://github.com/apache/arrow-datafusion/issues/30) -- Implement PartialOrd for ScalarValue [\#838](https://github.com/apache/arrow-datafusion/pull/838) ([viirya](https://github.com/viirya)) -- Support date datatypes in max/min [\#820](https://github.com/apache/arrow-datafusion/pull/820) ([viirya](https://github.com/viirya)) -- Implement vectorized hashing for DictionaryArray types [\#812](https://github.com/apache/arrow-datafusion/pull/812) ([alamb](https://github.com/alamb)) -- Convert unsupported conditions in left right join to filters [\#796](https://github.com/apache/arrow-datafusion/pull/796) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([Dandandan](https://github.com/Dandandan)) -- Implement streaming versions of Dataframe.collect methods [\#789](https://github.com/apache/arrow-datafusion/pull/789) ([andygrove](https://github.com/andygrove)) -- impl from str for column and scalar [\#762](https://github.com/apache/arrow-datafusion/pull/762) ([Jimexist](https://github.com/Jimexist)) -- impl fmt::Display for PlanType [\#752](https://github.com/apache/arrow-datafusion/pull/752) ([Jimexist](https://github.com/Jimexist)) -- Remove unnecessary projection in logical plan optimization phase [\#747](https://github.com/apache/arrow-datafusion/pull/747) ([waynexia](https://github.com/waynexia)) -- Support table columns alias [\#735](https://github.com/apache/arrow-datafusion/pull/735) ([Dandandan](https://github.com/Dandandan)) -- Derive PartialEq for datasource enums [\#734](https://github.com/apache/arrow-datafusion/pull/734) ([alamb](https://github.com/alamb)) -- Allow filetype to be lowercase, Implement FromStr for FileType [\#728](https://github.com/apache/arrow-datafusion/pull/728) ([Jimexist](https://github.com/Jimexist)) -- Update to use arrow 5.0 [\#721](https://github.com/apache/arrow-datafusion/pull/721) ([alamb](https://github.com/alamb)) -- \#554: Lead/lag window function with offset and default value arguments [\#687](https://github.com/apache/arrow-datafusion/pull/687) ([jgoday](https://github.com/jgoday)) -- dedup using join column in wildcard expansion [\#678](https://github.com/apache/arrow-datafusion/pull/678) ([houqp](https://github.com/houqp)) -- Implement metrics for HashJoinExec [\#664](https://github.com/apache/arrow-datafusion/pull/664) ([andygrove](https://github.com/andygrove)) -- Show physical plan with metrics in benchmark [\#662](https://github.com/apache/arrow-datafusion/pull/662) ([andygrove](https://github.com/andygrove)) -- Allow non-equijoin filters in join condition [\#660](https://github.com/apache/arrow-datafusion/pull/660) ([Dandandan](https://github.com/Dandandan)) -- Add End-to-end test for parquet pruning + metrics for ParquetExec [\#657](https://github.com/apache/arrow-datafusion/pull/657) ([alamb](https://github.com/alamb)) -- Add support for leading field in interval [\#647](https://github.com/apache/arrow-datafusion/pull/647) ([Dandandan](https://github.com/Dandandan)) -- Remove hard-coded PartitionMode from Ballista serde [\#637](https://github.com/apache/arrow-datafusion/pull/637) ([andygrove](https://github.com/andygrove)) -- Ballista: Implement scalable distributed joins [\#634](https://github.com/apache/arrow-datafusion/pull/634) ([andygrove](https://github.com/andygrove)) -- implement rank and dense_rank function and refactor built-in window function evaluation [\#631](https://github.com/apache/arrow-datafusion/pull/631) ([Jimexist](https://github.com/Jimexist)) -- Improve "field not found" error messages [\#625](https://github.com/apache/arrow-datafusion/pull/625) ([andygrove](https://github.com/andygrove)) -- Support modulus op [\#577](https://github.com/apache/arrow-datafusion/pull/577) ([gangliao](https://github.com/gangliao)) -- implement `std::default::Default` for execution config [\#570](https://github.com/apache/arrow-datafusion/pull/570) ([Jimexist](https://github.com/Jimexist)) -- `to_timestamp_millis()`, `to_timestamp_micros()`, `to_timestamp_seconds()` [\#567](https://github.com/apache/arrow-datafusion/pull/567) ([velvia](https://github.com/velvia)) -- Filter push down for Union [\#559](https://github.com/apache/arrow-datafusion/pull/559) ([Dandandan](https://github.com/Dandandan)) -- Implement window functions with `partition_by` clause [\#558](https://github.com/apache/arrow-datafusion/pull/558) ([Jimexist](https://github.com/Jimexist)) -- support table alias in join clause [\#547](https://github.com/apache/arrow-datafusion/pull/547) ([houqp](https://github.com/houqp)) -- Not equal predicate in physical_planning pruning [\#544](https://github.com/apache/arrow-datafusion/pull/544) ([jgoday](https://github.com/jgoday)) -- add error handling and boundary checking for window frames [\#530](https://github.com/apache/arrow-datafusion/pull/530) ([Jimexist](https://github.com/Jimexist)) -- Implement window functions with `order_by` clause [\#520](https://github.com/apache/arrow-datafusion/pull/520) ([Jimexist](https://github.com/Jimexist)) -- support group by column positions [\#519](https://github.com/apache/arrow-datafusion/pull/519) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([jychen7](https://github.com/jychen7)) -- Implement constant folding for CAST [\#513](https://github.com/apache/arrow-datafusion/pull/513) ([msathis](https://github.com/msathis)) -- Add window frame constructs - alternative [\#506](https://github.com/apache/arrow-datafusion/pull/506) ([Jimexist](https://github.com/Jimexist)) -- Add `partition by` constructs in window functions and modify logical planning [\#501](https://github.com/apache/arrow-datafusion/pull/501) ([Jimexist](https://github.com/Jimexist)) -- Add support for boolean columns in pruning logic [\#500](https://github.com/apache/arrow-datafusion/pull/500) ([alamb](https://github.com/alamb)) -- \#215 resolve aliases for group by exprs [\#485](https://github.com/apache/arrow-datafusion/pull/485) ([jychen7](https://github.com/jychen7)) -- Support anti join [\#482](https://github.com/apache/arrow-datafusion/pull/482) ([Dandandan](https://github.com/Dandandan)) -- Support semi join [\#470](https://github.com/apache/arrow-datafusion/pull/470) ([Dandandan](https://github.com/Dandandan)) -- add `order by` construct in window function and logical plans [\#463](https://github.com/apache/arrow-datafusion/pull/463) ([Jimexist](https://github.com/Jimexist)) -- Remove reundant filters \(e.g. c\> 5 AND c\>5 --\> c\>5\) [\#436](https://github.com/apache/arrow-datafusion/pull/436) ([jgoday](https://github.com/jgoday)) -- fix: display the content of debug explain [\#434](https://github.com/apache/arrow-datafusion/pull/434) ([NGA-TRAN](https://github.com/NGA-TRAN)) -- implement lead and lag built-in window function [\#429](https://github.com/apache/arrow-datafusion/pull/429) ([Jimexist](https://github.com/Jimexist)) -- add support for ndjson for datafusion-cli [\#427](https://github.com/apache/arrow-datafusion/pull/427) ([Jimexist](https://github.com/Jimexist)) -- add `first_value`, `last_value`, and `nth_value` built-in window functions [\#403](https://github.com/apache/arrow-datafusion/pull/403) ([Jimexist](https://github.com/Jimexist)) -- export both `now` and `random` functions [\#389](https://github.com/apache/arrow-datafusion/pull/389) ([Jimexist](https://github.com/Jimexist)) -- Function to create `ArrayRef` from an iterator of ScalarValues [\#381](https://github.com/apache/arrow-datafusion/pull/381) ([alamb](https://github.com/alamb)) -- Sort preserving merge \(\#362\) [\#379](https://github.com/apache/arrow-datafusion/pull/379) ([tustvold](https://github.com/tustvold)) -- Add support for multiple partitions with SortExec \(\#362\) [\#378](https://github.com/apache/arrow-datafusion/pull/378) ([tustvold](https://github.com/tustvold)) -- add window expression stream, delegated window aggregation to aggregate functions, and implement `row_number` [\#375](https://github.com/apache/arrow-datafusion/pull/375) ([Jimexist](https://github.com/Jimexist)) -- Add PartialOrd and Ord to GroupByScalar \(\#364\) [\#368](https://github.com/apache/arrow-datafusion/pull/368) ([tustvold](https://github.com/tustvold)) -- Implement readable explain plans for physical plans [\#337](https://github.com/apache/arrow-datafusion/pull/337) ([alamb](https://github.com/alamb)) -- Add window expression part 1 - logical and physical planning, structure, to/from proto, and explain, for empty over clause only [\#334](https://github.com/apache/arrow-datafusion/pull/334) ([Jimexist](https://github.com/Jimexist)) -- Use NullArray to Pass row count to ScalarFunctions that take 0 arguments [\#328](https://github.com/apache/arrow-datafusion/pull/328) ([Jimexist](https://github.com/Jimexist)) -- add --quiet/-q flag and allow timing info to be turned on/off [\#323](https://github.com/apache/arrow-datafusion/pull/323) ([Jimexist](https://github.com/Jimexist)) -- Implement hash partitioned aggregation [\#320](https://github.com/apache/arrow-datafusion/pull/320) ([Dandandan](https://github.com/Dandandan)) -- Support COUNT\(DISTINCT timestamps\) [\#319](https://github.com/apache/arrow-datafusion/pull/319) ([charlibot](https://github.com/charlibot)) -- add random SQL function [\#303](https://github.com/apache/arrow-datafusion/pull/303) ([Jimexist](https://github.com/Jimexist)) -- allow datafusion cli to take -- comments [\#296](https://github.com/apache/arrow-datafusion/pull/296) ([Jimexist](https://github.com/Jimexist)) -- Add json print format mode to datafusion cli [\#295](https://github.com/apache/arrow-datafusion/pull/295) ([Jimexist](https://github.com/Jimexist)) -- Add print format param with support for tsv print format to datafusion cli [\#292](https://github.com/apache/arrow-datafusion/pull/292) ([Jimexist](https://github.com/Jimexist)) -- Add print format param and support for csv print format to datafusion cli [\#289](https://github.com/apache/arrow-datafusion/pull/289) ([Jimexist](https://github.com/Jimexist)) -- allow datafusion-cli to take a file param [\#285](https://github.com/apache/arrow-datafusion/pull/285) ([Jimexist](https://github.com/Jimexist)) -- add param validation for datafusion-cli [\#284](https://github.com/apache/arrow-datafusion/pull/284) ([Jimexist](https://github.com/Jimexist)) -- \[breaking change\] fix 265, log should be log10, and add ln [\#271](https://github.com/apache/arrow-datafusion/pull/271) ([Jimexist](https://github.com/Jimexist)) -- Implement count distinct for dictionary arrays [\#256](https://github.com/apache/arrow-datafusion/pull/256) ([alamb](https://github.com/alamb)) -- Count distinct floats [\#252](https://github.com/apache/arrow-datafusion/pull/252) ([pjmore](https://github.com/pjmore)) -- Add rule to eliminate `LIMIT 0` and replace it with an `EmptyRelation` [\#213](https://github.com/apache/arrow-datafusion/pull/213) ([Dandandan](https://github.com/Dandandan)) -- Allow table providers to indicate their type for catalog metadata [\#205](https://github.com/apache/arrow-datafusion/pull/205) ([returnString](https://github.com/returnString)) -- Use arrow eq kernels in CaseWhen expression evaluation [\#52](https://github.com/apache/arrow-datafusion/pull/52) ([Dandandan](https://github.com/Dandandan)) -- Re-export Arrow and Parquet crates from DataFusion [\#39](https://github.com/apache/arrow-datafusion/pull/39) ([returnString](https://github.com/returnString)) -- \[DataFusion\] Optimize hash join inner workings, null handling fix [\#24](https://github.com/apache/arrow-datafusion/pull/24) ([Dandandan](https://github.com/Dandandan)) -- \[ARROW-12441\] \[DataFusion\] Cross join implementation [\#11](https://github.com/apache/arrow-datafusion/pull/11) ([Dandandan](https://github.com/Dandandan)) +- Allow extension nodes to correctly plan physical expressions with relations [\#642](https://github.com/apache/datafusion/issues/642) +- Filters aren't passed down to table scans in a union [\#557](https://github.com/apache/datafusion/issues/557) +- Support pruning for `boolean` columns [\#490](https://github.com/apache/datafusion/issues/490) +- Implement SQLMetrics for RepartitionExec [\#397](https://github.com/apache/datafusion/issues/397) +- DataFusion benchmarks should show executed plan with metrics after query completes [\#396](https://github.com/apache/datafusion/issues/396) +- Use published versions of arrow rather than github shas [\#393](https://github.com/apache/datafusion/issues/393) +- Add Compare to GroupByScalar [\#364](https://github.com/apache/datafusion/issues/364) +- Reusable "row group pruning" logic [\#363](https://github.com/apache/datafusion/issues/363) +- Add an Order Preserving merge operator [\#362](https://github.com/apache/datafusion/issues/362) +- Implement Postgres compatible `now()` function [\#251](https://github.com/apache/datafusion/issues/251) +- COUNT DISTINCT does not support dictionary types [\#249](https://github.com/apache/datafusion/issues/249) +- Use standard make_null_array for CASE [\#222](https://github.com/apache/datafusion/issues/222) +- Implement date_trunc\(\) function [\#203](https://github.com/apache/datafusion/issues/203) +- COUNT DISTINCT does not support for `Float64` [\#199](https://github.com/apache/datafusion/issues/199) +- Update SQLMetric to use atomics rather than a Mutex [\#30](https://github.com/apache/datafusion/issues/30) +- Implement PartialOrd for ScalarValue [\#838](https://github.com/apache/datafusion/pull/838) ([viirya](https://github.com/viirya)) +- Support date datatypes in max/min [\#820](https://github.com/apache/datafusion/pull/820) ([viirya](https://github.com/viirya)) +- Implement vectorized hashing for DictionaryArray types [\#812](https://github.com/apache/datafusion/pull/812) ([alamb](https://github.com/alamb)) +- Convert unsupported conditions in left right join to filters [\#796](https://github.com/apache/datafusion/pull/796) [[sql](https://github.com/apache/datafusion/labels/sql)] ([Dandandan](https://github.com/Dandandan)) +- Implement streaming versions of Dataframe.collect methods [\#789](https://github.com/apache/datafusion/pull/789) ([andygrove](https://github.com/andygrove)) +- impl from str for column and scalar [\#762](https://github.com/apache/datafusion/pull/762) ([Jimexist](https://github.com/Jimexist)) +- impl fmt::Display for PlanType [\#752](https://github.com/apache/datafusion/pull/752) ([Jimexist](https://github.com/Jimexist)) +- Remove unnecessary projection in logical plan optimization phase [\#747](https://github.com/apache/datafusion/pull/747) ([waynexia](https://github.com/waynexia)) +- Support table columns alias [\#735](https://github.com/apache/datafusion/pull/735) ([Dandandan](https://github.com/Dandandan)) +- Derive PartialEq for datasource enums [\#734](https://github.com/apache/datafusion/pull/734) ([alamb](https://github.com/alamb)) +- Allow filetype to be lowercase, Implement FromStr for FileType [\#728](https://github.com/apache/datafusion/pull/728) ([Jimexist](https://github.com/Jimexist)) +- Update to use arrow 5.0 [\#721](https://github.com/apache/datafusion/pull/721) ([alamb](https://github.com/alamb)) +- \#554: Lead/lag window function with offset and default value arguments [\#687](https://github.com/apache/datafusion/pull/687) ([jgoday](https://github.com/jgoday)) +- dedup using join column in wildcard expansion [\#678](https://github.com/apache/datafusion/pull/678) ([houqp](https://github.com/houqp)) +- Implement metrics for HashJoinExec [\#664](https://github.com/apache/datafusion/pull/664) ([andygrove](https://github.com/andygrove)) +- Show physical plan with metrics in benchmark [\#662](https://github.com/apache/datafusion/pull/662) ([andygrove](https://github.com/andygrove)) +- Allow non-equijoin filters in join condition [\#660](https://github.com/apache/datafusion/pull/660) ([Dandandan](https://github.com/Dandandan)) +- Add End-to-end test for parquet pruning + metrics for ParquetExec [\#657](https://github.com/apache/datafusion/pull/657) ([alamb](https://github.com/alamb)) +- Add support for leading field in interval [\#647](https://github.com/apache/datafusion/pull/647) ([Dandandan](https://github.com/Dandandan)) +- Remove hard-coded PartitionMode from Ballista serde [\#637](https://github.com/apache/datafusion/pull/637) ([andygrove](https://github.com/andygrove)) +- Ballista: Implement scalable distributed joins [\#634](https://github.com/apache/datafusion/pull/634) ([andygrove](https://github.com/andygrove)) +- implement rank and dense_rank function and refactor built-in window function evaluation [\#631](https://github.com/apache/datafusion/pull/631) ([Jimexist](https://github.com/Jimexist)) +- Improve "field not found" error messages [\#625](https://github.com/apache/datafusion/pull/625) ([andygrove](https://github.com/andygrove)) +- Support modulus op [\#577](https://github.com/apache/datafusion/pull/577) ([gangliao](https://github.com/gangliao)) +- implement `std::default::Default` for execution config [\#570](https://github.com/apache/datafusion/pull/570) ([Jimexist](https://github.com/Jimexist)) +- `to_timestamp_millis()`, `to_timestamp_micros()`, `to_timestamp_seconds()` [\#567](https://github.com/apache/datafusion/pull/567) ([velvia](https://github.com/velvia)) +- Filter push down for Union [\#559](https://github.com/apache/datafusion/pull/559) ([Dandandan](https://github.com/Dandandan)) +- Implement window functions with `partition_by` clause [\#558](https://github.com/apache/datafusion/pull/558) ([Jimexist](https://github.com/Jimexist)) +- support table alias in join clause [\#547](https://github.com/apache/datafusion/pull/547) ([houqp](https://github.com/houqp)) +- Not equal predicate in physical_planning pruning [\#544](https://github.com/apache/datafusion/pull/544) ([jgoday](https://github.com/jgoday)) +- add error handling and boundary checking for window frames [\#530](https://github.com/apache/datafusion/pull/530) ([Jimexist](https://github.com/Jimexist)) +- Implement window functions with `order_by` clause [\#520](https://github.com/apache/datafusion/pull/520) ([Jimexist](https://github.com/Jimexist)) +- support group by column positions [\#519](https://github.com/apache/datafusion/pull/519) [[sql](https://github.com/apache/datafusion/labels/sql)] ([jychen7](https://github.com/jychen7)) +- Implement constant folding for CAST [\#513](https://github.com/apache/datafusion/pull/513) ([msathis](https://github.com/msathis)) +- Add window frame constructs - alternative [\#506](https://github.com/apache/datafusion/pull/506) ([Jimexist](https://github.com/Jimexist)) +- Add `partition by` constructs in window functions and modify logical planning [\#501](https://github.com/apache/datafusion/pull/501) ([Jimexist](https://github.com/Jimexist)) +- Add support for boolean columns in pruning logic [\#500](https://github.com/apache/datafusion/pull/500) ([alamb](https://github.com/alamb)) +- \#215 resolve aliases for group by exprs [\#485](https://github.com/apache/datafusion/pull/485) ([jychen7](https://github.com/jychen7)) +- Support anti join [\#482](https://github.com/apache/datafusion/pull/482) ([Dandandan](https://github.com/Dandandan)) +- Support semi join [\#470](https://github.com/apache/datafusion/pull/470) ([Dandandan](https://github.com/Dandandan)) +- add `order by` construct in window function and logical plans [\#463](https://github.com/apache/datafusion/pull/463) ([Jimexist](https://github.com/Jimexist)) +- Remove reundant filters \(e.g. c\> 5 AND c\>5 --\> c\>5\) [\#436](https://github.com/apache/datafusion/pull/436) ([jgoday](https://github.com/jgoday)) +- fix: display the content of debug explain [\#434](https://github.com/apache/datafusion/pull/434) ([NGA-TRAN](https://github.com/NGA-TRAN)) +- implement lead and lag built-in window function [\#429](https://github.com/apache/datafusion/pull/429) ([Jimexist](https://github.com/Jimexist)) +- add support for ndjson for datafusion-cli [\#427](https://github.com/apache/datafusion/pull/427) ([Jimexist](https://github.com/Jimexist)) +- add `first_value`, `last_value`, and `nth_value` built-in window functions [\#403](https://github.com/apache/datafusion/pull/403) ([Jimexist](https://github.com/Jimexist)) +- export both `now` and `random` functions [\#389](https://github.com/apache/datafusion/pull/389) ([Jimexist](https://github.com/Jimexist)) +- Function to create `ArrayRef` from an iterator of ScalarValues [\#381](https://github.com/apache/datafusion/pull/381) ([alamb](https://github.com/alamb)) +- Sort preserving merge \(\#362\) [\#379](https://github.com/apache/datafusion/pull/379) ([tustvold](https://github.com/tustvold)) +- Add support for multiple partitions with SortExec \(\#362\) [\#378](https://github.com/apache/datafusion/pull/378) ([tustvold](https://github.com/tustvold)) +- add window expression stream, delegated window aggregation to aggregate functions, and implement `row_number` [\#375](https://github.com/apache/datafusion/pull/375) ([Jimexist](https://github.com/Jimexist)) +- Add PartialOrd and Ord to GroupByScalar \(\#364\) [\#368](https://github.com/apache/datafusion/pull/368) ([tustvold](https://github.com/tustvold)) +- Implement readable explain plans for physical plans [\#337](https://github.com/apache/datafusion/pull/337) ([alamb](https://github.com/alamb)) +- Add window expression part 1 - logical and physical planning, structure, to/from proto, and explain, for empty over clause only [\#334](https://github.com/apache/datafusion/pull/334) ([Jimexist](https://github.com/Jimexist)) +- Use NullArray to Pass row count to ScalarFunctions that take 0 arguments [\#328](https://github.com/apache/datafusion/pull/328) ([Jimexist](https://github.com/Jimexist)) +- add --quiet/-q flag and allow timing info to be turned on/off [\#323](https://github.com/apache/datafusion/pull/323) ([Jimexist](https://github.com/Jimexist)) +- Implement hash partitioned aggregation [\#320](https://github.com/apache/datafusion/pull/320) ([Dandandan](https://github.com/Dandandan)) +- Support COUNT\(DISTINCT timestamps\) [\#319](https://github.com/apache/datafusion/pull/319) ([charlibot](https://github.com/charlibot)) +- add random SQL function [\#303](https://github.com/apache/datafusion/pull/303) ([Jimexist](https://github.com/Jimexist)) +- allow datafusion cli to take -- comments [\#296](https://github.com/apache/datafusion/pull/296) ([Jimexist](https://github.com/Jimexist)) +- Add json print format mode to datafusion cli [\#295](https://github.com/apache/datafusion/pull/295) ([Jimexist](https://github.com/Jimexist)) +- Add print format param with support for tsv print format to datafusion cli [\#292](https://github.com/apache/datafusion/pull/292) ([Jimexist](https://github.com/Jimexist)) +- Add print format param and support for csv print format to datafusion cli [\#289](https://github.com/apache/datafusion/pull/289) ([Jimexist](https://github.com/Jimexist)) +- allow datafusion-cli to take a file param [\#285](https://github.com/apache/datafusion/pull/285) ([Jimexist](https://github.com/Jimexist)) +- add param validation for datafusion-cli [\#284](https://github.com/apache/datafusion/pull/284) ([Jimexist](https://github.com/Jimexist)) +- \[breaking change\] fix 265, log should be log10, and add ln [\#271](https://github.com/apache/datafusion/pull/271) ([Jimexist](https://github.com/Jimexist)) +- Implement count distinct for dictionary arrays [\#256](https://github.com/apache/datafusion/pull/256) ([alamb](https://github.com/alamb)) +- Count distinct floats [\#252](https://github.com/apache/datafusion/pull/252) ([pjmore](https://github.com/pjmore)) +- Add rule to eliminate `LIMIT 0` and replace it with an `EmptyRelation` [\#213](https://github.com/apache/datafusion/pull/213) ([Dandandan](https://github.com/Dandandan)) +- Allow table providers to indicate their type for catalog metadata [\#205](https://github.com/apache/datafusion/pull/205) ([returnString](https://github.com/returnString)) +- Use arrow eq kernels in CaseWhen expression evaluation [\#52](https://github.com/apache/datafusion/pull/52) ([Dandandan](https://github.com/Dandandan)) +- Re-export Arrow and Parquet crates from DataFusion [\#39](https://github.com/apache/datafusion/pull/39) ([returnString](https://github.com/returnString)) +- \[DataFusion\] Optimize hash join inner workings, null handling fix [\#24](https://github.com/apache/datafusion/pull/24) ([Dandandan](https://github.com/Dandandan)) +- \[ARROW-12441\] \[DataFusion\] Cross join implementation [\#11](https://github.com/apache/datafusion/pull/11) ([Dandandan](https://github.com/Dandandan)) **Fixed bugs:** -- Projection pushdown removes unqualified column names even when they are used [\#617](https://github.com/apache/arrow-datafusion/issues/617) -- Panic while running join datatypes/schema.rs:165:10 [\#601](https://github.com/apache/arrow-datafusion/issues/601) -- Indentation is incorrect for joins in formatted physical plans [\#345](https://github.com/apache/arrow-datafusion/issues/345) -- Error while running `COUNT DISTINCT (timestamp)`: 'Unexpected DataType for list [\#314](https://github.com/apache/arrow-datafusion/issues/314) -- When joining two tables, get Error: Plan\("Schema contains duplicate unqualified field name \'xxx\'"\) [\#311](https://github.com/apache/arrow-datafusion/issues/311) -- Incorrect answers with SELECT DISTINCT queries [\#250](https://github.com/apache/arrow-datafusion/issues/250) -- Intermitent failure in CI join_with_hash_collision [\#227](https://github.com/apache/arrow-datafusion/issues/227) -- `Concat` from Dataframe API no longer accepts multiple expressions [\#226](https://github.com/apache/arrow-datafusion/issues/226) -- Fix right, full join handling when having multiple non-matching rows at the left side [\#845](https://github.com/apache/arrow-datafusion/pull/845) ([Dandandan](https://github.com/Dandandan)) -- Qualified field resolution too strict [\#810](https://github.com/apache/arrow-datafusion/pull/810) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([seddonm1](https://github.com/seddonm1)) -- Better join order resolution logic [\#797](https://github.com/apache/arrow-datafusion/pull/797) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([seddonm1](https://github.com/seddonm1)) -- Produce correct answers for Group BY NULL \(Option 1\) [\#793](https://github.com/apache/arrow-datafusion/pull/793) ([alamb](https://github.com/alamb)) -- Use consistent version of string_to_timestamp_nanos in DataFusion [\#767](https://github.com/apache/arrow-datafusion/pull/767) ([alamb](https://github.com/alamb)) -- \#723 limit pruning rule to simple expression [\#764](https://github.com/apache/arrow-datafusion/pull/764) ([lvheyang](https://github.com/lvheyang)) -- \#699 fix return type conflict when calling builtin math fuctions [\#716](https://github.com/apache/arrow-datafusion/pull/716) ([lvheyang](https://github.com/lvheyang)) -- Fix Date32 and Date64 parquet row group pruning [\#690](https://github.com/apache/arrow-datafusion/pull/690) ([alamb](https://github.com/alamb)) -- Remove qualifiers on pushed down predicates / Fix parquet pruning [\#689](https://github.com/apache/arrow-datafusion/pull/689) ([alamb](https://github.com/alamb)) -- use `Weak` ptr to break catalog list \<\> info schema cyclic reference [\#681](https://github.com/apache/arrow-datafusion/pull/681) ([crepererum](https://github.com/crepererum)) -- honor table name for csv/parquet scan in ballista plan serde [\#629](https://github.com/apache/arrow-datafusion/pull/629) ([houqp](https://github.com/houqp)) -- fix 621, where unnamed window functions shall be differentiated by partition and order by clause [\#622](https://github.com/apache/arrow-datafusion/pull/622) ([Jimexist](https://github.com/Jimexist)) -- RFC: Do not prune out unnecessary columns with unqualified references [\#619](https://github.com/apache/arrow-datafusion/pull/619) ([alamb](https://github.com/alamb)) -- \[fix\] select \* on empty table [\#613](https://github.com/apache/arrow-datafusion/pull/613) ([rdettai](https://github.com/rdettai)) -- fix 592, support alias in window functions [\#607](https://github.com/apache/arrow-datafusion/pull/607) ([Jimexist](https://github.com/Jimexist)) -- RepartitionExec should not error if output has hung up [\#576](https://github.com/apache/arrow-datafusion/pull/576) ([alamb](https://github.com/alamb)) -- Fix pruning on not equal predicate [\#561](https://github.com/apache/arrow-datafusion/pull/561) ([alamb](https://github.com/alamb)) -- hash float arrays using primitive usigned integer type [\#556](https://github.com/apache/arrow-datafusion/pull/556) ([houqp](https://github.com/houqp)) -- Return errors properly from RepartitionExec [\#521](https://github.com/apache/arrow-datafusion/pull/521) ([alamb](https://github.com/alamb)) -- refactor sort exec stream and combine batches [\#515](https://github.com/apache/arrow-datafusion/pull/515) ([Jimexist](https://github.com/Jimexist)) -- Fix display of execution time in datafusion-cli [\#514](https://github.com/apache/arrow-datafusion/pull/514) ([Dandandan](https://github.com/Dandandan)) -- Wrong aggregation arguments error. [\#505](https://github.com/apache/arrow-datafusion/pull/505) ([jgoday](https://github.com/jgoday)) -- fix window aggregation with alias and add integration test case [\#454](https://github.com/apache/arrow-datafusion/pull/454) ([Jimexist](https://github.com/Jimexist)) -- fix: don't duplicate existing filters [\#409](https://github.com/apache/arrow-datafusion/pull/409) ([e-dard](https://github.com/e-dard)) -- Fixed incorrect logical type in GroupByScalar. [\#391](https://github.com/apache/arrow-datafusion/pull/391) ([jorgecarleitao](https://github.com/jorgecarleitao)) -- Fix indented display for multi-child nodes [\#358](https://github.com/apache/arrow-datafusion/pull/358) ([alamb](https://github.com/alamb)) -- Fix SQL planner to support multibyte column names [\#357](https://github.com/apache/arrow-datafusion/pull/357) ([agatan](https://github.com/agatan)) -- Fix wrong projection 'optimization' [\#268](https://github.com/apache/arrow-datafusion/pull/268) ([Dandandan](https://github.com/Dandandan)) -- Fix Left join implementation is incorrect for 0 or multiple batches on the right side [\#238](https://github.com/apache/arrow-datafusion/pull/238) ([Dandandan](https://github.com/Dandandan)) -- Count distinct boolean [\#230](https://github.com/apache/arrow-datafusion/pull/230) ([pjmore](https://github.com/pjmore)) -- Fix Filter / where clause without column names is removed in optimization pass [\#225](https://github.com/apache/arrow-datafusion/pull/225) ([Dandandan](https://github.com/Dandandan)) +- Projection pushdown removes unqualified column names even when they are used [\#617](https://github.com/apache/datafusion/issues/617) +- Panic while running join datatypes/schema.rs:165:10 [\#601](https://github.com/apache/datafusion/issues/601) +- Indentation is incorrect for joins in formatted physical plans [\#345](https://github.com/apache/datafusion/issues/345) +- Error while running `COUNT DISTINCT (timestamp)`: 'Unexpected DataType for list [\#314](https://github.com/apache/datafusion/issues/314) +- When joining two tables, get Error: Plan\("Schema contains duplicate unqualified field name \'xxx\'"\) [\#311](https://github.com/apache/datafusion/issues/311) +- Incorrect answers with SELECT DISTINCT queries [\#250](https://github.com/apache/datafusion/issues/250) +- Intermitent failure in CI join_with_hash_collision [\#227](https://github.com/apache/datafusion/issues/227) +- `Concat` from Dataframe API no longer accepts multiple expressions [\#226](https://github.com/apache/datafusion/issues/226) +- Fix right, full join handling when having multiple non-matching rows at the left side [\#845](https://github.com/apache/datafusion/pull/845) ([Dandandan](https://github.com/Dandandan)) +- Qualified field resolution too strict [\#810](https://github.com/apache/datafusion/pull/810) [[sql](https://github.com/apache/datafusion/labels/sql)] ([seddonm1](https://github.com/seddonm1)) +- Better join order resolution logic [\#797](https://github.com/apache/datafusion/pull/797) [[sql](https://github.com/apache/datafusion/labels/sql)] ([seddonm1](https://github.com/seddonm1)) +- Produce correct answers for Group BY NULL \(Option 1\) [\#793](https://github.com/apache/datafusion/pull/793) ([alamb](https://github.com/alamb)) +- Use consistent version of string_to_timestamp_nanos in DataFusion [\#767](https://github.com/apache/datafusion/pull/767) ([alamb](https://github.com/alamb)) +- \#723 limit pruning rule to simple expression [\#764](https://github.com/apache/datafusion/pull/764) ([lvheyang](https://github.com/lvheyang)) +- \#699 fix return type conflict when calling builtin math fuctions [\#716](https://github.com/apache/datafusion/pull/716) ([lvheyang](https://github.com/lvheyang)) +- Fix Date32 and Date64 parquet row group pruning [\#690](https://github.com/apache/datafusion/pull/690) ([alamb](https://github.com/alamb)) +- Remove qualifiers on pushed down predicates / Fix parquet pruning [\#689](https://github.com/apache/datafusion/pull/689) ([alamb](https://github.com/alamb)) +- use `Weak` ptr to break catalog list \<\> info schema cyclic reference [\#681](https://github.com/apache/datafusion/pull/681) ([crepererum](https://github.com/crepererum)) +- honor table name for csv/parquet scan in ballista plan serde [\#629](https://github.com/apache/datafusion/pull/629) ([houqp](https://github.com/houqp)) +- fix 621, where unnamed window functions shall be differentiated by partition and order by clause [\#622](https://github.com/apache/datafusion/pull/622) ([Jimexist](https://github.com/Jimexist)) +- RFC: Do not prune out unnecessary columns with unqualified references [\#619](https://github.com/apache/datafusion/pull/619) ([alamb](https://github.com/alamb)) +- \[fix\] select \* on empty table [\#613](https://github.com/apache/datafusion/pull/613) ([rdettai](https://github.com/rdettai)) +- fix 592, support alias in window functions [\#607](https://github.com/apache/datafusion/pull/607) ([Jimexist](https://github.com/Jimexist)) +- RepartitionExec should not error if output has hung up [\#576](https://github.com/apache/datafusion/pull/576) ([alamb](https://github.com/alamb)) +- Fix pruning on not equal predicate [\#561](https://github.com/apache/datafusion/pull/561) ([alamb](https://github.com/alamb)) +- hash float arrays using primitive usigned integer type [\#556](https://github.com/apache/datafusion/pull/556) ([houqp](https://github.com/houqp)) +- Return errors properly from RepartitionExec [\#521](https://github.com/apache/datafusion/pull/521) ([alamb](https://github.com/alamb)) +- refactor sort exec stream and combine batches [\#515](https://github.com/apache/datafusion/pull/515) ([Jimexist](https://github.com/Jimexist)) +- Fix display of execution time in datafusion-cli [\#514](https://github.com/apache/datafusion/pull/514) ([Dandandan](https://github.com/Dandandan)) +- Wrong aggregation arguments error. [\#505](https://github.com/apache/datafusion/pull/505) ([jgoday](https://github.com/jgoday)) +- fix window aggregation with alias and add integration test case [\#454](https://github.com/apache/datafusion/pull/454) ([Jimexist](https://github.com/Jimexist)) +- fix: don't duplicate existing filters [\#409](https://github.com/apache/datafusion/pull/409) ([e-dard](https://github.com/e-dard)) +- Fixed incorrect logical type in GroupByScalar. [\#391](https://github.com/apache/datafusion/pull/391) ([jorgecarleitao](https://github.com/jorgecarleitao)) +- Fix indented display for multi-child nodes [\#358](https://github.com/apache/datafusion/pull/358) ([alamb](https://github.com/alamb)) +- Fix SQL planner to support multibyte column names [\#357](https://github.com/apache/datafusion/pull/357) ([agatan](https://github.com/agatan)) +- Fix wrong projection 'optimization' [\#268](https://github.com/apache/datafusion/pull/268) ([Dandandan](https://github.com/Dandandan)) +- Fix Left join implementation is incorrect for 0 or multiple batches on the right side [\#238](https://github.com/apache/datafusion/pull/238) ([Dandandan](https://github.com/Dandandan)) +- Count distinct boolean [\#230](https://github.com/apache/datafusion/pull/230) ([pjmore](https://github.com/pjmore)) +- Fix Filter / where clause without column names is removed in optimization pass [\#225](https://github.com/apache/datafusion/pull/225) ([Dandandan](https://github.com/Dandandan)) **Documentation updates:** -- No way to get to the examples from docs.rs [\#186](https://github.com/apache/arrow-datafusion/issues/186) -- Update docs to use vendored version of arrow [\#772](https://github.com/apache/arrow-datafusion/pull/772) ([alamb](https://github.com/alamb)) -- Fix typo in DEVELOPERS.md [\#692](https://github.com/apache/arrow-datafusion/pull/692) ([lvheyang](https://github.com/lvheyang)) -- update stale documentations related to window functions [\#598](https://github.com/apache/arrow-datafusion/pull/598) ([Jimexist](https://github.com/Jimexist)) -- update readme to reflect work on window functions [\#471](https://github.com/apache/arrow-datafusion/pull/471) ([Jimexist](https://github.com/Jimexist)) -- Add examples section to datafusion crate doc [\#457](https://github.com/apache/arrow-datafusion/pull/457) ([mluts](https://github.com/mluts)) -- add invariants spec [\#443](https://github.com/apache/arrow-datafusion/pull/443) ([houqp](https://github.com/houqp)) -- add output field name rfc [\#422](https://github.com/apache/arrow-datafusion/pull/422) ([houqp](https://github.com/houqp)) -- Update more docs and also the developer.md doc [\#414](https://github.com/apache/arrow-datafusion/pull/414) ([Jimexist](https://github.com/Jimexist)) -- use prettier to format md files [\#367](https://github.com/apache/arrow-datafusion/pull/367) ([Jimexist](https://github.com/Jimexist)) -- Add new logo svg with white background [\#313](https://github.com/apache/arrow-datafusion/pull/313) ([parthsarthy](https://github.com/parthsarthy)) -- Add projects \(Squirtle and Tensorbase\) to list in readme [\#312](https://github.com/apache/arrow-datafusion/pull/312) ([parthsarthy](https://github.com/parthsarthy)) -- docs - fix the ballista link [\#274](https://github.com/apache/arrow-datafusion/pull/274) ([haoxins](https://github.com/haoxins)) -- misc\(README\): Replace Cube.js with Cube Store [\#248](https://github.com/apache/arrow-datafusion/pull/248) ([ovr](https://github.com/ovr)) -- Initial docs for SQL syntax [\#242](https://github.com/apache/arrow-datafusion/pull/242) ([Dandandan](https://github.com/Dandandan)) -- Deduplicate README.md [\#79](https://github.com/apache/arrow-datafusion/pull/79) ([msathis](https://github.com/msathis)) +- No way to get to the examples from docs.rs [\#186](https://github.com/apache/datafusion/issues/186) +- Update docs to use vendored version of arrow [\#772](https://github.com/apache/datafusion/pull/772) ([alamb](https://github.com/alamb)) +- Fix typo in DEVELOPERS.md [\#692](https://github.com/apache/datafusion/pull/692) ([lvheyang](https://github.com/lvheyang)) +- update stale documentations related to window functions [\#598](https://github.com/apache/datafusion/pull/598) ([Jimexist](https://github.com/Jimexist)) +- update readme to reflect work on window functions [\#471](https://github.com/apache/datafusion/pull/471) ([Jimexist](https://github.com/Jimexist)) +- Add examples section to datafusion crate doc [\#457](https://github.com/apache/datafusion/pull/457) ([mluts](https://github.com/mluts)) +- add invariants spec [\#443](https://github.com/apache/datafusion/pull/443) ([houqp](https://github.com/houqp)) +- add output field name rfc [\#422](https://github.com/apache/datafusion/pull/422) ([houqp](https://github.com/houqp)) +- Update more docs and also the developer.md doc [\#414](https://github.com/apache/datafusion/pull/414) ([Jimexist](https://github.com/Jimexist)) +- use prettier to format md files [\#367](https://github.com/apache/datafusion/pull/367) ([Jimexist](https://github.com/Jimexist)) +- Add new logo svg with white background [\#313](https://github.com/apache/datafusion/pull/313) ([parthsarthy](https://github.com/parthsarthy)) +- Add projects \(Squirtle and Tensorbase\) to list in readme [\#312](https://github.com/apache/datafusion/pull/312) ([parthsarthy](https://github.com/parthsarthy)) +- docs - fix the ballista link [\#274](https://github.com/apache/datafusion/pull/274) ([haoxins](https://github.com/haoxins)) +- misc\(README\): Replace Cube.js with Cube Store [\#248](https://github.com/apache/datafusion/pull/248) ([ovr](https://github.com/ovr)) +- Initial docs for SQL syntax [\#242](https://github.com/apache/datafusion/pull/242) ([Dandandan](https://github.com/Dandandan)) +- Deduplicate README.md [\#79](https://github.com/apache/datafusion/pull/79) ([msathis](https://github.com/msathis)) **Performance improvements:** -- Speed up inlist for strings and primitives [\#813](https://github.com/apache/arrow-datafusion/pull/813) ([Dandandan](https://github.com/Dandandan)) -- perf: improve performance of `SortPreservingMergeExec` operator [\#722](https://github.com/apache/arrow-datafusion/pull/722) ([e-dard](https://github.com/e-dard)) -- Optimize min/max queries with table statistics [\#719](https://github.com/apache/arrow-datafusion/pull/719) ([b41sh](https://github.com/b41sh)) -- perf: Improve materialisation performance of SortPreservingMergeExec [\#691](https://github.com/apache/arrow-datafusion/pull/691) ([e-dard](https://github.com/e-dard)) -- Optimize count\(\*\) with table statistics [\#620](https://github.com/apache/arrow-datafusion/pull/620) ([Dandandan](https://github.com/Dandandan)) -- optimize window function's `find_ranges_in_range` [\#595](https://github.com/apache/arrow-datafusion/pull/595) ([Jimexist](https://github.com/Jimexist)) -- Collapse sort into window expr and do sort within logical phase [\#571](https://github.com/apache/arrow-datafusion/pull/571) ([Jimexist](https://github.com/Jimexist)) -- Use repartition in window functions to speed up [\#569](https://github.com/apache/arrow-datafusion/pull/569) ([Jimexist](https://github.com/Jimexist)) -- Constant fold / optimize `to_timestamp` function during planning [\#387](https://github.com/apache/arrow-datafusion/pull/387) ([msathis](https://github.com/msathis)) -- Speed up `create_batch_from_map` [\#339](https://github.com/apache/arrow-datafusion/pull/339) ([Dandandan](https://github.com/Dandandan)) -- Simplify math expression code \(use unary kernel\) [\#309](https://github.com/apache/arrow-datafusion/pull/309) ([Dandandan](https://github.com/Dandandan)) +- Speed up inlist for strings and primitives [\#813](https://github.com/apache/datafusion/pull/813) ([Dandandan](https://github.com/Dandandan)) +- perf: improve performance of `SortPreservingMergeExec` operator [\#722](https://github.com/apache/datafusion/pull/722) ([e-dard](https://github.com/e-dard)) +- Optimize min/max queries with table statistics [\#719](https://github.com/apache/datafusion/pull/719) ([b41sh](https://github.com/b41sh)) +- perf: Improve materialisation performance of SortPreservingMergeExec [\#691](https://github.com/apache/datafusion/pull/691) ([e-dard](https://github.com/e-dard)) +- Optimize count\(\*\) with table statistics [\#620](https://github.com/apache/datafusion/pull/620) ([Dandandan](https://github.com/Dandandan)) +- optimize window function's `find_ranges_in_range` [\#595](https://github.com/apache/datafusion/pull/595) ([Jimexist](https://github.com/Jimexist)) +- Collapse sort into window expr and do sort within logical phase [\#571](https://github.com/apache/datafusion/pull/571) ([Jimexist](https://github.com/Jimexist)) +- Use repartition in window functions to speed up [\#569](https://github.com/apache/datafusion/pull/569) ([Jimexist](https://github.com/Jimexist)) +- Constant fold / optimize `to_timestamp` function during planning [\#387](https://github.com/apache/datafusion/pull/387) ([msathis](https://github.com/msathis)) +- Speed up `create_batch_from_map` [\#339](https://github.com/apache/datafusion/pull/339) ([Dandandan](https://github.com/Dandandan)) +- Simplify math expression code \(use unary kernel\) [\#309](https://github.com/apache/datafusion/pull/309) ([Dandandan](https://github.com/Dandandan)) **Closed issues:** -- Confirm git tagging strategy for releases [\#770](https://github.com/apache/arrow-datafusion/issues/770) -- arrow::util::pretty::pretty_format_batches missing [\#769](https://github.com/apache/arrow-datafusion/issues/769) -- move the `assert_batches_eq!` macros to a non part of datafusion [\#745](https://github.com/apache/arrow-datafusion/issues/745) -- fix an issue where aliases are not respected in generating downstream schemas in window expr [\#592](https://github.com/apache/arrow-datafusion/issues/592) -- make the planner to print more succinct and useful information in window function explain clause [\#526](https://github.com/apache/arrow-datafusion/issues/526) -- move window frame module to be in `logical_plan` [\#517](https://github.com/apache/arrow-datafusion/issues/517) -- use a more rust idiomatic way of handling nth_value [\#448](https://github.com/apache/arrow-datafusion/issues/448) -- create a test with more than one partition for window functions [\#435](https://github.com/apache/arrow-datafusion/issues/435) -- COUNT DISTINCT does not support for `Boolean` [\#202](https://github.com/apache/arrow-datafusion/issues/202) -- Read CSV format text from stdin or memory [\#198](https://github.com/apache/arrow-datafusion/issues/198) -- Fix null handling hash join [\#195](https://github.com/apache/arrow-datafusion/issues/195) -- Allow TableProviders to indicate their type for the information schema [\#191](https://github.com/apache/arrow-datafusion/issues/191) -- Make DataFrame extensible [\#190](https://github.com/apache/arrow-datafusion/issues/190) -- TPC-H Query 19 [\#170](https://github.com/apache/arrow-datafusion/issues/170) -- TPC-H Query 7 [\#161](https://github.com/apache/arrow-datafusion/issues/161) -- Upgrade hashbrown to 0.10 [\#151](https://github.com/apache/arrow-datafusion/issues/151) -- Implement vectorized hashing for hash aggregate [\#149](https://github.com/apache/arrow-datafusion/issues/149) -- More efficient LEFT join implementation [\#143](https://github.com/apache/arrow-datafusion/issues/143) -- Implement vectorized hashing [\#142](https://github.com/apache/arrow-datafusion/issues/142) -- RFC Roadmap for 2021 \(DataFusion\) [\#140](https://github.com/apache/arrow-datafusion/issues/140) -- Implement hash partitioning [\#131](https://github.com/apache/arrow-datafusion/issues/131) -- Grouping by column position [\#110](https://github.com/apache/arrow-datafusion/issues/110) -- \[Datafusion\] GROUP BY with a high cardinality doesn't seem to finish [\#107](https://github.com/apache/arrow-datafusion/issues/107) -- \[Rust\] Add support for JSON data sources [\#103](https://github.com/apache/arrow-datafusion/issues/103) -- \[Rust\] Implement metrics framework [\#95](https://github.com/apache/arrow-datafusion/issues/95) -- Publically export Arrow crate from datafusion [\#36](https://github.com/apache/arrow-datafusion/issues/36) -- Implement hash-partitioned hash aggregate [\#27](https://github.com/apache/arrow-datafusion/issues/27) -- Consider using GitHub pages for DataFusion/Ballista documentation [\#18](https://github.com/apache/arrow-datafusion/issues/18) -- Update "repository" in Cargo.toml [\#16](https://github.com/apache/arrow-datafusion/issues/16) +- Confirm git tagging strategy for releases [\#770](https://github.com/apache/datafusion/issues/770) +- arrow::util::pretty::pretty_format_batches missing [\#769](https://github.com/apache/datafusion/issues/769) +- move the `assert_batches_eq!` macros to a non part of datafusion [\#745](https://github.com/apache/datafusion/issues/745) +- fix an issue where aliases are not respected in generating downstream schemas in window expr [\#592](https://github.com/apache/datafusion/issues/592) +- make the planner to print more succinct and useful information in window function explain clause [\#526](https://github.com/apache/datafusion/issues/526) +- move window frame module to be in `logical_plan` [\#517](https://github.com/apache/datafusion/issues/517) +- use a more rust idiomatic way of handling nth_value [\#448](https://github.com/apache/datafusion/issues/448) +- create a test with more than one partition for window functions [\#435](https://github.com/apache/datafusion/issues/435) +- COUNT DISTINCT does not support for `Boolean` [\#202](https://github.com/apache/datafusion/issues/202) +- Read CSV format text from stdin or memory [\#198](https://github.com/apache/datafusion/issues/198) +- Fix null handling hash join [\#195](https://github.com/apache/datafusion/issues/195) +- Allow TableProviders to indicate their type for the information schema [\#191](https://github.com/apache/datafusion/issues/191) +- Make DataFrame extensible [\#190](https://github.com/apache/datafusion/issues/190) +- TPC-H Query 19 [\#170](https://github.com/apache/datafusion/issues/170) +- TPC-H Query 7 [\#161](https://github.com/apache/datafusion/issues/161) +- Upgrade hashbrown to 0.10 [\#151](https://github.com/apache/datafusion/issues/151) +- Implement vectorized hashing for hash aggregate [\#149](https://github.com/apache/datafusion/issues/149) +- More efficient LEFT join implementation [\#143](https://github.com/apache/datafusion/issues/143) +- Implement vectorized hashing [\#142](https://github.com/apache/datafusion/issues/142) +- RFC Roadmap for 2021 \(DataFusion\) [\#140](https://github.com/apache/datafusion/issues/140) +- Implement hash partitioning [\#131](https://github.com/apache/datafusion/issues/131) +- Grouping by column position [\#110](https://github.com/apache/datafusion/issues/110) +- \[Datafusion\] GROUP BY with a high cardinality doesn't seem to finish [\#107](https://github.com/apache/datafusion/issues/107) +- \[Rust\] Add support for JSON data sources [\#103](https://github.com/apache/datafusion/issues/103) +- \[Rust\] Implement metrics framework [\#95](https://github.com/apache/datafusion/issues/95) +- Publically export Arrow crate from datafusion [\#36](https://github.com/apache/datafusion/issues/36) +- Implement hash-partitioned hash aggregate [\#27](https://github.com/apache/datafusion/issues/27) +- Consider using GitHub pages for DataFusion/Ballista documentation [\#18](https://github.com/apache/datafusion/issues/18) +- Update "repository" in Cargo.toml [\#16](https://github.com/apache/datafusion/issues/16) **Merged pull requests:** -- Use `RawTable` API in hash join [\#827](https://github.com/apache/arrow-datafusion/pull/827) ([Dandandan](https://github.com/Dandandan)) -- Add test for window functions on dictionary [\#823](https://github.com/apache/arrow-datafusion/pull/823) ([alamb](https://github.com/alamb)) -- Update dependencies: prost to 0.8 and tonic to 0.5 [\#818](https://github.com/apache/arrow-datafusion/pull/818) ([alamb](https://github.com/alamb)) -- Move `hash_array` into hash_utils.rs [\#807](https://github.com/apache/arrow-datafusion/pull/807) ([alamb](https://github.com/alamb)) -- Remove GroupByScalar and use ScalarValue in preparation for supporting null values in GroupBy [\#786](https://github.com/apache/arrow-datafusion/pull/786) ([alamb](https://github.com/alamb)) -- fix 226, make `concat`, `concat_ws`, and `random` work with `Python` crate [\#761](https://github.com/apache/arrow-datafusion/pull/761) ([Jimexist](https://github.com/Jimexist)) -- Test for parquet pruning disabling [\#754](https://github.com/apache/arrow-datafusion/pull/754) ([alamb](https://github.com/alamb)) -- Add explain verbose with limit push down [\#751](https://github.com/apache/arrow-datafusion/pull/751) ([Jimexist](https://github.com/Jimexist)) -- Move assert_batches_eq! macros to test_utils.rs [\#746](https://github.com/apache/arrow-datafusion/pull/746) ([alamb](https://github.com/alamb)) -- Show optimized physical and logical plans in EXPLAIN [\#744](https://github.com/apache/arrow-datafusion/pull/744) ([alamb](https://github.com/alamb)) -- update `python` crate to support latest pyo3 syntax and gil sematics [\#741](https://github.com/apache/arrow-datafusion/pull/741) ([Jimexist](https://github.com/Jimexist)) -- update `python` crate dependencies [\#740](https://github.com/apache/arrow-datafusion/pull/740) ([Jimexist](https://github.com/Jimexist)) -- provide more details on required .parquet file extension error message [\#729](https://github.com/apache/arrow-datafusion/pull/729) ([Jimexist](https://github.com/Jimexist)) -- split up windows functions into a dedicated module with separate files [\#724](https://github.com/apache/arrow-datafusion/pull/724) ([Jimexist](https://github.com/Jimexist)) -- Use pytest in integration test [\#715](https://github.com/apache/arrow-datafusion/pull/715) ([Jimexist](https://github.com/Jimexist)) -- replace once iter chain with array::IntoIter [\#704](https://github.com/apache/arrow-datafusion/pull/704) ([houqp](https://github.com/houqp)) -- avoid iterator materialization in column index lookup [\#703](https://github.com/apache/arrow-datafusion/pull/703) ([houqp](https://github.com/houqp)) -- Fix build with 1.52.1 [\#696](https://github.com/apache/arrow-datafusion/pull/696) ([alamb](https://github.com/alamb)) -- Fix test output due to logical merge conflict [\#694](https://github.com/apache/arrow-datafusion/pull/694) ([alamb](https://github.com/alamb)) -- add more integration tests [\#668](https://github.com/apache/arrow-datafusion/pull/668) ([Jimexist](https://github.com/Jimexist)) -- Bump arrow and parquet versions to 4.4 [\#654](https://github.com/apache/arrow-datafusion/pull/654) ([toddtreece](https://github.com/toddtreece)) -- Add query 15 to TPC-H queries [\#645](https://github.com/apache/arrow-datafusion/pull/645) ([Dandandan](https://github.com/Dandandan)) -- Improve error message and comments [\#641](https://github.com/apache/arrow-datafusion/pull/641) ([alamb](https://github.com/alamb)) -- add integration tests for rank, dense_rank, fix last_value evaluation with rank [\#638](https://github.com/apache/arrow-datafusion/pull/638) ([Jimexist](https://github.com/Jimexist)) -- round trip TPCH queries in tests [\#630](https://github.com/apache/arrow-datafusion/pull/630) ([houqp](https://github.com/houqp)) -- use Into\ as argument type wherever applicable [\#615](https://github.com/apache/arrow-datafusion/pull/615) ([houqp](https://github.com/houqp)) -- reuse alias map in aggregate logical planning and refactor position resolution [\#606](https://github.com/apache/arrow-datafusion/pull/606) ([Jimexist](https://github.com/Jimexist)) -- fix clippy warnings [\#581](https://github.com/apache/arrow-datafusion/pull/581) ([Jimexist](https://github.com/Jimexist)) -- Add benchmarks to window function queries [\#564](https://github.com/apache/arrow-datafusion/pull/564) ([Jimexist](https://github.com/Jimexist)) -- reuse code for now function expr creation [\#548](https://github.com/apache/arrow-datafusion/pull/548) ([houqp](https://github.com/houqp)) -- turn on clippy rule for needless borrow [\#545](https://github.com/apache/arrow-datafusion/pull/545) ([Jimexist](https://github.com/Jimexist)) -- Refactor hash aggregates's planner building code [\#539](https://github.com/apache/arrow-datafusion/pull/539) ([Jimexist](https://github.com/Jimexist)) -- Cleanup Repartition Exec code [\#538](https://github.com/apache/arrow-datafusion/pull/538) ([alamb](https://github.com/alamb)) -- reuse datafusion physical planner in ballista building from protobuf [\#532](https://github.com/apache/arrow-datafusion/pull/532) ([Jimexist](https://github.com/Jimexist)) -- remove redundant `into_iter()` calls [\#527](https://github.com/apache/arrow-datafusion/pull/527) ([Jimexist](https://github.com/Jimexist)) -- Fix 517 - move `window_frames` module to `logical_plan` [\#518](https://github.com/apache/arrow-datafusion/pull/518) ([Jimexist](https://github.com/Jimexist)) -- Refactor window aggregation, simplify batch processing logic [\#516](https://github.com/apache/arrow-datafusion/pull/516) ([Jimexist](https://github.com/Jimexist)) -- Add datafusion::test_util, resolve test data paths without env vars [\#498](https://github.com/apache/arrow-datafusion/pull/498) ([mluts](https://github.com/mluts)) -- Avoid warnings in tests when compiling without default features [\#489](https://github.com/apache/arrow-datafusion/pull/489) ([alamb](https://github.com/alamb)) -- update cargo.toml in python crate and fix unit test due to hash joins [\#483](https://github.com/apache/arrow-datafusion/pull/483) ([Jimexist](https://github.com/Jimexist)) -- use prettier check in CI [\#453](https://github.com/apache/arrow-datafusion/pull/453) ([Jimexist](https://github.com/Jimexist)) -- Optimize `nth_value`, remove `first_value`, `last_value` structs and use idiomatic rust style [\#452](https://github.com/apache/arrow-datafusion/pull/452) ([Jimexist](https://github.com/Jimexist)) -- Fixed typo / logical merge conflict [\#433](https://github.com/apache/arrow-datafusion/pull/433) ([jorgecarleitao](https://github.com/jorgecarleitao)) -- include test data and add aggregation tests in integration test [\#425](https://github.com/apache/arrow-datafusion/pull/425) ([Jimexist](https://github.com/Jimexist)) -- Add some padding around the logo [\#411](https://github.com/apache/arrow-datafusion/pull/411) ([parthsarthy](https://github.com/parthsarthy)) -- Benchmark subcommand to distinguish between DataFusion and Ballista [\#402](https://github.com/apache/arrow-datafusion/pull/402) ([jgoday](https://github.com/jgoday)) -- refactor datafusion/`scalar_value` to use more macro and avoid dup code [\#392](https://github.com/apache/arrow-datafusion/pull/392) ([Jimexist](https://github.com/Jimexist)) -- Update TPC-H benchmark to show physical plan when debug mode is enabled [\#386](https://github.com/apache/arrow-datafusion/pull/386) ([andygrove](https://github.com/andygrove)) -- Update arrow dependencies again [\#341](https://github.com/apache/arrow-datafusion/pull/341) ([alamb](https://github.com/alamb)) -- Update arrow-rs deps [\#317](https://github.com/apache/arrow-datafusion/pull/317) ([alamb](https://github.com/alamb)) -- Update PR template by commenting out instructions [\#315](https://github.com/apache/arrow-datafusion/pull/315) ([alamb](https://github.com/alamb)) -- fix clippy warning [\#286](https://github.com/apache/arrow-datafusion/pull/286) ([Jimexist](https://github.com/Jimexist)) -- add integration test to compare datafusion-cli against psql [\#281](https://github.com/apache/arrow-datafusion/pull/281) ([Jimexist](https://github.com/Jimexist)) -- Update arrow deps [\#269](https://github.com/apache/arrow-datafusion/pull/269) ([alamb](https://github.com/alamb)) -- Use multi-stage build dockerfile in datafusion-cli and reduce image size from 2.16GB to 89.9MB [\#266](https://github.com/apache/arrow-datafusion/pull/266) ([Jimexist](https://github.com/Jimexist)) -- Enable redundant_field_names clippy lint [\#261](https://github.com/apache/arrow-datafusion/pull/261) ([Dandandan](https://github.com/Dandandan)) -- fix clippy lint [\#259](https://github.com/apache/arrow-datafusion/pull/259) ([alamb](https://github.com/alamb)) -- Move datafusion-cli to new crate [\#231](https://github.com/apache/arrow-datafusion/pull/231) ([Dandandan](https://github.com/Dandandan)) -- Make test join_with_hash_collision deterministic [\#229](https://github.com/apache/arrow-datafusion/pull/229) ([Dandandan](https://github.com/Dandandan)) -- Update arrow-rs deps \(to fix build due to flatbuffers update\) [\#224](https://github.com/apache/arrow-datafusion/pull/224) ([alamb](https://github.com/alamb)) -- Use standard make_null_array for CASE [\#223](https://github.com/apache/arrow-datafusion/pull/223) ([alamb](https://github.com/alamb)) -- update arrow-rs deps to latest master [\#216](https://github.com/apache/arrow-datafusion/pull/216) ([alamb](https://github.com/alamb)) -- MINOR: Remove empty rust dir [\#61](https://github.com/apache/arrow-datafusion/pull/61) ([andygrove](https://github.com/andygrove)) +- Use `RawTable` API in hash join [\#827](https://github.com/apache/datafusion/pull/827) ([Dandandan](https://github.com/Dandandan)) +- Add test for window functions on dictionary [\#823](https://github.com/apache/datafusion/pull/823) ([alamb](https://github.com/alamb)) +- Update dependencies: prost to 0.8 and tonic to 0.5 [\#818](https://github.com/apache/datafusion/pull/818) ([alamb](https://github.com/alamb)) +- Move `hash_array` into hash_utils.rs [\#807](https://github.com/apache/datafusion/pull/807) ([alamb](https://github.com/alamb)) +- Remove GroupByScalar and use ScalarValue in preparation for supporting null values in GroupBy [\#786](https://github.com/apache/datafusion/pull/786) ([alamb](https://github.com/alamb)) +- fix 226, make `concat`, `concat_ws`, and `random` work with `Python` crate [\#761](https://github.com/apache/datafusion/pull/761) ([Jimexist](https://github.com/Jimexist)) +- Test for parquet pruning disabling [\#754](https://github.com/apache/datafusion/pull/754) ([alamb](https://github.com/alamb)) +- Add explain verbose with limit push down [\#751](https://github.com/apache/datafusion/pull/751) ([Jimexist](https://github.com/Jimexist)) +- Move assert_batches_eq! macros to test_utils.rs [\#746](https://github.com/apache/datafusion/pull/746) ([alamb](https://github.com/alamb)) +- Show optimized physical and logical plans in EXPLAIN [\#744](https://github.com/apache/datafusion/pull/744) ([alamb](https://github.com/alamb)) +- update `python` crate to support latest pyo3 syntax and gil sematics [\#741](https://github.com/apache/datafusion/pull/741) ([Jimexist](https://github.com/Jimexist)) +- update `python` crate dependencies [\#740](https://github.com/apache/datafusion/pull/740) ([Jimexist](https://github.com/Jimexist)) +- provide more details on required .parquet file extension error message [\#729](https://github.com/apache/datafusion/pull/729) ([Jimexist](https://github.com/Jimexist)) +- split up windows functions into a dedicated module with separate files [\#724](https://github.com/apache/datafusion/pull/724) ([Jimexist](https://github.com/Jimexist)) +- Use pytest in integration test [\#715](https://github.com/apache/datafusion/pull/715) ([Jimexist](https://github.com/Jimexist)) +- replace once iter chain with array::IntoIter [\#704](https://github.com/apache/datafusion/pull/704) ([houqp](https://github.com/houqp)) +- avoid iterator materialization in column index lookup [\#703](https://github.com/apache/datafusion/pull/703) ([houqp](https://github.com/houqp)) +- Fix build with 1.52.1 [\#696](https://github.com/apache/datafusion/pull/696) ([alamb](https://github.com/alamb)) +- Fix test output due to logical merge conflict [\#694](https://github.com/apache/datafusion/pull/694) ([alamb](https://github.com/alamb)) +- add more integration tests [\#668](https://github.com/apache/datafusion/pull/668) ([Jimexist](https://github.com/Jimexist)) +- Bump arrow and parquet versions to 4.4 [\#654](https://github.com/apache/datafusion/pull/654) ([toddtreece](https://github.com/toddtreece)) +- Add query 15 to TPC-H queries [\#645](https://github.com/apache/datafusion/pull/645) ([Dandandan](https://github.com/Dandandan)) +- Improve error message and comments [\#641](https://github.com/apache/datafusion/pull/641) ([alamb](https://github.com/alamb)) +- add integration tests for rank, dense_rank, fix last_value evaluation with rank [\#638](https://github.com/apache/datafusion/pull/638) ([Jimexist](https://github.com/Jimexist)) +- round trip TPCH queries in tests [\#630](https://github.com/apache/datafusion/pull/630) ([houqp](https://github.com/houqp)) +- use Into\ as argument type wherever applicable [\#615](https://github.com/apache/datafusion/pull/615) ([houqp](https://github.com/houqp)) +- reuse alias map in aggregate logical planning and refactor position resolution [\#606](https://github.com/apache/datafusion/pull/606) ([Jimexist](https://github.com/Jimexist)) +- fix clippy warnings [\#581](https://github.com/apache/datafusion/pull/581) ([Jimexist](https://github.com/Jimexist)) +- Add benchmarks to window function queries [\#564](https://github.com/apache/datafusion/pull/564) ([Jimexist](https://github.com/Jimexist)) +- reuse code for now function expr creation [\#548](https://github.com/apache/datafusion/pull/548) ([houqp](https://github.com/houqp)) +- turn on clippy rule for needless borrow [\#545](https://github.com/apache/datafusion/pull/545) ([Jimexist](https://github.com/Jimexist)) +- Refactor hash aggregates's planner building code [\#539](https://github.com/apache/datafusion/pull/539) ([Jimexist](https://github.com/Jimexist)) +- Cleanup Repartition Exec code [\#538](https://github.com/apache/datafusion/pull/538) ([alamb](https://github.com/alamb)) +- reuse datafusion physical planner in ballista building from protobuf [\#532](https://github.com/apache/datafusion/pull/532) ([Jimexist](https://github.com/Jimexist)) +- remove redundant `into_iter()` calls [\#527](https://github.com/apache/datafusion/pull/527) ([Jimexist](https://github.com/Jimexist)) +- Fix 517 - move `window_frames` module to `logical_plan` [\#518](https://github.com/apache/datafusion/pull/518) ([Jimexist](https://github.com/Jimexist)) +- Refactor window aggregation, simplify batch processing logic [\#516](https://github.com/apache/datafusion/pull/516) ([Jimexist](https://github.com/Jimexist)) +- Add datafusion::test_util, resolve test data paths without env vars [\#498](https://github.com/apache/datafusion/pull/498) ([mluts](https://github.com/mluts)) +- Avoid warnings in tests when compiling without default features [\#489](https://github.com/apache/datafusion/pull/489) ([alamb](https://github.com/alamb)) +- update cargo.toml in python crate and fix unit test due to hash joins [\#483](https://github.com/apache/datafusion/pull/483) ([Jimexist](https://github.com/Jimexist)) +- use prettier check in CI [\#453](https://github.com/apache/datafusion/pull/453) ([Jimexist](https://github.com/Jimexist)) +- Optimize `nth_value`, remove `first_value`, `last_value` structs and use idiomatic rust style [\#452](https://github.com/apache/datafusion/pull/452) ([Jimexist](https://github.com/Jimexist)) +- Fixed typo / logical merge conflict [\#433](https://github.com/apache/datafusion/pull/433) ([jorgecarleitao](https://github.com/jorgecarleitao)) +- include test data and add aggregation tests in integration test [\#425](https://github.com/apache/datafusion/pull/425) ([Jimexist](https://github.com/Jimexist)) +- Add some padding around the logo [\#411](https://github.com/apache/datafusion/pull/411) ([parthsarthy](https://github.com/parthsarthy)) +- Benchmark subcommand to distinguish between DataFusion and Ballista [\#402](https://github.com/apache/datafusion/pull/402) ([jgoday](https://github.com/jgoday)) +- refactor datafusion/`scalar_value` to use more macro and avoid dup code [\#392](https://github.com/apache/datafusion/pull/392) ([Jimexist](https://github.com/Jimexist)) +- Update TPC-H benchmark to show physical plan when debug mode is enabled [\#386](https://github.com/apache/datafusion/pull/386) ([andygrove](https://github.com/andygrove)) +- Update arrow dependencies again [\#341](https://github.com/apache/datafusion/pull/341) ([alamb](https://github.com/alamb)) +- Update arrow-rs deps [\#317](https://github.com/apache/datafusion/pull/317) ([alamb](https://github.com/alamb)) +- Update PR template by commenting out instructions [\#315](https://github.com/apache/datafusion/pull/315) ([alamb](https://github.com/alamb)) +- fix clippy warning [\#286](https://github.com/apache/datafusion/pull/286) ([Jimexist](https://github.com/Jimexist)) +- add integration test to compare datafusion-cli against psql [\#281](https://github.com/apache/datafusion/pull/281) ([Jimexist](https://github.com/Jimexist)) +- Update arrow deps [\#269](https://github.com/apache/datafusion/pull/269) ([alamb](https://github.com/alamb)) +- Use multi-stage build dockerfile in datafusion-cli and reduce image size from 2.16GB to 89.9MB [\#266](https://github.com/apache/datafusion/pull/266) ([Jimexist](https://github.com/Jimexist)) +- Enable redundant_field_names clippy lint [\#261](https://github.com/apache/datafusion/pull/261) ([Dandandan](https://github.com/Dandandan)) +- fix clippy lint [\#259](https://github.com/apache/datafusion/pull/259) ([alamb](https://github.com/alamb)) +- Move datafusion-cli to new crate [\#231](https://github.com/apache/datafusion/pull/231) ([Dandandan](https://github.com/Dandandan)) +- Make test join_with_hash_collision deterministic [\#229](https://github.com/apache/datafusion/pull/229) ([Dandandan](https://github.com/Dandandan)) +- Update arrow-rs deps \(to fix build due to flatbuffers update\) [\#224](https://github.com/apache/datafusion/pull/224) ([alamb](https://github.com/alamb)) +- Use standard make_null_array for CASE [\#223](https://github.com/apache/datafusion/pull/223) ([alamb](https://github.com/alamb)) +- update arrow-rs deps to latest master [\#216](https://github.com/apache/datafusion/pull/216) ([alamb](https://github.com/alamb)) +- MINOR: Remove empty rust dir [\#61](https://github.com/apache/datafusion/pull/61) ([andygrove](https://github.com/andygrove)) diff --git a/dev/changelog/6.0.0.md b/dev/changelog/6.0.0.md index 68ce4802aab2..7d871dbffb39 100644 --- a/dev/changelog/6.0.0.md +++ b/dev/changelog/6.0.0.md @@ -17,188 +17,188 @@ under the License. --> -## [6.0.0](https://github.com/apache/arrow-datafusion/tree/6.0.0) (2021-11-13) +## [6.0.0](https://github.com/apache/datafusion/tree/6.0.0) (2021-11-13) -[Full Changelog](https://github.com/apache/arrow-datafusion/compare/5.0.0...6.0.0) +[Full Changelog](https://github.com/apache/datafusion/compare/5.0.0...6.0.0) **Breaking changes:** -- Removed deprecated with_concurrency [\#1200](https://github.com/apache/arrow-datafusion/pull/1200) ([rdettai](https://github.com/rdettai)) -- File partitioning for ListingTable [\#1141](https://github.com/apache/arrow-datafusion/pull/1141) ([rdettai](https://github.com/rdettai)) -- Add function volatility to Signature [\#1071](https://github.com/apache/arrow-datafusion/pull/1071) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([pjmore](https://github.com/pjmore)) -- fix: allow duplicate field names in table join, fix output with duplicated names [\#1023](https://github.com/apache/arrow-datafusion/pull/1023) ([houqp](https://github.com/houqp)) -- Make TableProvider.scan\(\) and PhysicalPlanner::create_physical_plan\(\) async [\#1013](https://github.com/apache/arrow-datafusion/pull/1013) ([rdettai](https://github.com/rdettai)) -- Reorganize table providers by table format [\#1010](https://github.com/apache/arrow-datafusion/pull/1010) ([rdettai](https://github.com/rdettai)) -- Make Metrics::labels\(\) public [\#999](https://github.com/apache/arrow-datafusion/pull/999) ([alamb](https://github.com/alamb)) -- Rename NthValue::{first_value,last_value,nth_value} to satisfy clippy in Rust 1.55 [\#986](https://github.com/apache/arrow-datafusion/pull/986) ([alamb](https://github.com/alamb)) -- Move CBOs and Statistics to physical plan [\#965](https://github.com/apache/arrow-datafusion/pull/965) ([rdettai](https://github.com/rdettai)) -- Update to sqlparser v 0.10.0 [\#934](https://github.com/apache/arrow-datafusion/pull/934) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) -- FilePartition and PartitionedFile for scanning flexibility [\#932](https://github.com/apache/arrow-datafusion/pull/932) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([yjshen](https://github.com/yjshen)) -- Improve SQLMetric APIs, port existing metrics [\#908](https://github.com/apache/arrow-datafusion/pull/908) ([alamb](https://github.com/alamb)) -- Add support for EXPLAIN ANALYZE [\#858](https://github.com/apache/arrow-datafusion/pull/858) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) -- Rename concurrency to target_partitions [\#706](https://github.com/apache/arrow-datafusion/pull/706) ([andygrove](https://github.com/andygrove)) +- Removed deprecated with_concurrency [\#1200](https://github.com/apache/datafusion/pull/1200) ([rdettai](https://github.com/rdettai)) +- File partitioning for ListingTable [\#1141](https://github.com/apache/datafusion/pull/1141) ([rdettai](https://github.com/rdettai)) +- Add function volatility to Signature [\#1071](https://github.com/apache/datafusion/pull/1071) [[sql](https://github.com/apache/datafusion/labels/sql)] ([pjmore](https://github.com/pjmore)) +- fix: allow duplicate field names in table join, fix output with duplicated names [\#1023](https://github.com/apache/datafusion/pull/1023) ([houqp](https://github.com/houqp)) +- Make TableProvider.scan\(\) and PhysicalPlanner::create_physical_plan\(\) async [\#1013](https://github.com/apache/datafusion/pull/1013) ([rdettai](https://github.com/rdettai)) +- Reorganize table providers by table format [\#1010](https://github.com/apache/datafusion/pull/1010) ([rdettai](https://github.com/rdettai)) +- Make Metrics::labels\(\) public [\#999](https://github.com/apache/datafusion/pull/999) ([alamb](https://github.com/alamb)) +- Rename NthValue::{first_value,last_value,nth_value} to satisfy clippy in Rust 1.55 [\#986](https://github.com/apache/datafusion/pull/986) ([alamb](https://github.com/alamb)) +- Move CBOs and Statistics to physical plan [\#965](https://github.com/apache/datafusion/pull/965) ([rdettai](https://github.com/rdettai)) +- Update to sqlparser v 0.10.0 [\#934](https://github.com/apache/datafusion/pull/934) [[sql](https://github.com/apache/datafusion/labels/sql)] ([alamb](https://github.com/alamb)) +- FilePartition and PartitionedFile for scanning flexibility [\#932](https://github.com/apache/datafusion/pull/932) [[sql](https://github.com/apache/datafusion/labels/sql)] ([yjshen](https://github.com/yjshen)) +- Improve SQLMetric APIs, port existing metrics [\#908](https://github.com/apache/datafusion/pull/908) ([alamb](https://github.com/alamb)) +- Add support for EXPLAIN ANALYZE [\#858](https://github.com/apache/datafusion/pull/858) [[sql](https://github.com/apache/datafusion/labels/sql)] ([alamb](https://github.com/alamb)) +- Rename concurrency to target_partitions [\#706](https://github.com/apache/datafusion/pull/706) ([andygrove](https://github.com/andygrove)) **Implemented enhancements:** -- Add booleans support to the `CASE` statement [\#1156](https://github.com/apache/arrow-datafusion/issues/1156) -- Implement General Purpose Constant Folding with the Expression Evaluator [\#1070](https://github.com/apache/arrow-datafusion/issues/1070) -- Mark volatility categories of functions [\#1069](https://github.com/apache/arrow-datafusion/issues/1069) -- Add "show" support to DataFrame API [\#937](https://github.com/apache/arrow-datafusion/issues/937) -- Add support for TRIM BOTH/LEADING/TRAILING [\#935](https://github.com/apache/arrow-datafusion/issues/935) -- Add "baseline" metrics to all built in operators [\#866](https://github.com/apache/arrow-datafusion/issues/866) -- Add SQL support for referencing fields in structs [\#119](https://github.com/apache/arrow-datafusion/issues/119) -- add filename completer for create table statement [\#1278](https://github.com/apache/arrow-datafusion/pull/1278) ([Jimexist](https://github.com/Jimexist)) -- Add drop table support [\#1266](https://github.com/apache/arrow-datafusion/pull/1266) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([viirya](https://github.com/viirya)) -- Dataframe supports except and update readme [\#1261](https://github.com/apache/arrow-datafusion/pull/1261) ([xudong963](https://github.com/xudong963)) -- Implement EXCEPT & EXCEPT DISTINCT [\#1259](https://github.com/apache/arrow-datafusion/pull/1259) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([xudong963](https://github.com/xudong963)) -- Add DataFrame support for `INTERSECT` and update readme [\#1258](https://github.com/apache/arrow-datafusion/pull/1258) ([xudong963](https://github.com/xudong963)) -- use arrow 6.1.0 [\#1255](https://github.com/apache/arrow-datafusion/pull/1255) ([Jimexist](https://github.com/Jimexist)) -- fix 1250, add editor support for datafusion cli with validation [\#1251](https://github.com/apache/arrow-datafusion/pull/1251) ([Jimexist](https://github.com/Jimexist)) -- Add support for `create table as` via MemTable [\#1243](https://github.com/apache/arrow-datafusion/pull/1243) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([Dandandan](https://github.com/Dandandan)) -- Add cli show columns command to describe tables [\#1231](https://github.com/apache/arrow-datafusion/pull/1231) ([Jimexist](https://github.com/Jimexist)) -- datafusion-cli to add list table command [\#1229](https://github.com/apache/arrow-datafusion/pull/1229) ([Jimexist](https://github.com/Jimexist)) -- datafusion cli to handle EoF and interrupt signal [\#1225](https://github.com/apache/arrow-datafusion/pull/1225) ([Jimexist](https://github.com/Jimexist)) -- add \q as quit command and add \? for help [\#1224](https://github.com/apache/arrow-datafusion/pull/1224) ([Jimexist](https://github.com/Jimexist)) -- Add algebraic simplifications to constant_folding [\#1208](https://github.com/apache/arrow-datafusion/pull/1208) ([matthewmturner](https://github.com/matthewmturner)) -- Improve GetIndexedFieldExpr adding utf8 key based access for struct v… [\#1204](https://github.com/apache/arrow-datafusion/pull/1204) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([Igosuki](https://github.com/Igosuki)) -- Fix `between` in select query [\#1202](https://github.com/apache/arrow-datafusion/pull/1202) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([capkurmagati](https://github.com/capkurmagati)) -- Move code to fold Stable functions like `now()` from `Simplifier` to `ConstEvaluator` [\#1176](https://github.com/apache/arrow-datafusion/pull/1176) ([alamb](https://github.com/alamb)) -- DataFrame supports window function [\#1167](https://github.com/apache/arrow-datafusion/pull/1167) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([xudong963](https://github.com/xudong963)) -- add values list expression [\#1165](https://github.com/apache/arrow-datafusion/pull/1165) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([Jimexist](https://github.com/Jimexist)) -- Add booleans support to the CASE statement [\#1161](https://github.com/apache/arrow-datafusion/pull/1161) ([xudong963](https://github.com/xudong963)) -- Improve error messages when operations are not supported [\#1158](https://github.com/apache/arrow-datafusion/pull/1158) ([alamb](https://github.com/alamb)) -- Generic constant expression evaluation [\#1153](https://github.com/apache/arrow-datafusion/pull/1153) ([alamb](https://github.com/alamb)) -- python `lit` function to support bool and byte vec [\#1152](https://github.com/apache/arrow-datafusion/pull/1152) ([Jimexist](https://github.com/Jimexist)) -- \[nit\] simplify datafusion optimizer module codes [\#1146](https://github.com/apache/arrow-datafusion/pull/1146) ([panarch](https://github.com/panarch)) -- Add ScalarValue support for arbitrary list elements [\#1142](https://github.com/apache/arrow-datafusion/pull/1142) ([jonmmease](https://github.com/jonmmease)) -- Multiple files per partitions for CSV Avro Json [\#1138](https://github.com/apache/arrow-datafusion/pull/1138) ([rdettai](https://github.com/rdettai)) -- Implement INTERSECT & INTERSECT DISTINCT [\#1135](https://github.com/apache/arrow-datafusion/pull/1135) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([xudong963](https://github.com/xudong963)) -- Simplify file struct abstractions [\#1120](https://github.com/apache/arrow-datafusion/pull/1120) ([rdettai](https://github.com/rdettai)) -- Implement `is [not] distinct from` [\#1117](https://github.com/apache/arrow-datafusion/pull/1117) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([Dandandan](https://github.com/Dandandan)) -- Clean up spawned task on drop for `RepartitionExec`, `SortPreservingMergeExec`, `WindowAggExec` [\#1112](https://github.com/apache/arrow-datafusion/pull/1112) ([crepererum](https://github.com/crepererum)) -- add hyperloglog implementation \(`add` and `count`\) [\#1095](https://github.com/apache/arrow-datafusion/pull/1095) ([Jimexist](https://github.com/Jimexist)) -- Add ScalarValue::Struct variant [\#1091](https://github.com/apache/arrow-datafusion/pull/1091) ([jonmmease](https://github.com/jonmmease)) -- add digest\(utf8, method\) function and refactor all current hash digest functions [\#1090](https://github.com/apache/arrow-datafusion/pull/1090) ([Jimexist](https://github.com/Jimexist)) -- \[crypto\] add `blake3` algorithm to `digest` function [\#1086](https://github.com/apache/arrow-datafusion/pull/1086) ([Jimexist](https://github.com/Jimexist)) -- \[crypto\] add blake2b and blake2s functions [\#1081](https://github.com/apache/arrow-datafusion/pull/1081) ([Jimexist](https://github.com/Jimexist)) -- \[nit\] make schema qualifier error message in field lookup more readable [\#1079](https://github.com/apache/arrow-datafusion/pull/1079) ([Jimexist](https://github.com/Jimexist)) -- \[window function\] add `percent_rank` window function [\#1077](https://github.com/apache/arrow-datafusion/pull/1077) ([Jimexist](https://github.com/Jimexist)) -- \[window function\] add `cume_dist` implementation [\#1076](https://github.com/apache/arrow-datafusion/pull/1076) ([Jimexist](https://github.com/Jimexist)) -- Add a LogicalPlanBuilder::schema\(\) function [\#1075](https://github.com/apache/arrow-datafusion/pull/1075) ([alamb](https://github.com/alamb)) -- Add support for UNION \[DISTINCT\] sql [\#1068](https://github.com/apache/arrow-datafusion/pull/1068) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([xudong963](https://github.com/xudong963)) -- fix: fix joins on Float32/Float64 columns bug [\#1054](https://github.com/apache/arrow-datafusion/pull/1054) ([francis-du](https://github.com/francis-du)) -- Update sqlparser-rs to 0.11 [\#1052](https://github.com/apache/arrow-datafusion/pull/1052) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) -- Support querying CSV files without providing the schema [\#1050](https://github.com/apache/arrow-datafusion/pull/1050) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([xudong963](https://github.com/xudong963)) -- remove hard coded partition count in ballista logicalplan deserialization [\#1044](https://github.com/apache/arrow-datafusion/pull/1044) ([xudong963](https://github.com/xudong963)) -- feat: add lit_timestamp_nanosecond [\#1030](https://github.com/apache/arrow-datafusion/pull/1030) ([NGA-TRAN](https://github.com/NGA-TRAN)) -- Ignore metadata on schema merge [\#1024](https://github.com/apache/arrow-datafusion/pull/1024) ([Smurphy000](https://github.com/Smurphy000)) -- add ExecutionConfig.with_optimizer_rules [\#1022](https://github.com/apache/arrow-datafusion/pull/1022) ([seddonm1](https://github.com/seddonm1)) -- Add baseline execution stats to `WindowAggExec` and `UnionExec`, and fixup `CoalescePartitionsExec` [\#1018](https://github.com/apache/arrow-datafusion/pull/1018) ([alamb](https://github.com/alamb)) -- Derive PartialOrd for Expr [\#1015](https://github.com/apache/arrow-datafusion/pull/1015) ([alamb](https://github.com/alamb)) -- Indexed field access for List [\#1006](https://github.com/apache/arrow-datafusion/pull/1006) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([Igosuki](https://github.com/Igosuki)) -- Add metrics for Limit and Projection, and CoalesceBatches [\#1004](https://github.com/apache/arrow-datafusion/pull/1004) ([alamb](https://github.com/alamb)) -- Update DataFusion to arrow 6.0 [\#984](https://github.com/apache/arrow-datafusion/pull/984) ([alamb](https://github.com/alamb)) -- Implement Display for Expr, improve operator display [\#971](https://github.com/apache/arrow-datafusion/pull/971) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([matthewmturner](https://github.com/matthewmturner)) -- Add metrics for FilterExec [\#960](https://github.com/apache/arrow-datafusion/pull/960) ([alamb](https://github.com/alamb)) -- Change compound column field name rules [\#952](https://github.com/apache/arrow-datafusion/pull/952) ([waynexia](https://github.com/waynexia)) -- ObjectStore API to read from remote storage systems [\#950](https://github.com/apache/arrow-datafusion/pull/950) ([yjshen](https://github.com/yjshen)) -- Add baseline metrics to `SortPreservingMergeExec` [\#948](https://github.com/apache/arrow-datafusion/pull/948) ([alamb](https://github.com/alamb)) -- Add support for TRIM LEADING/TRAILING/BOTH syntax [\#947](https://github.com/apache/arrow-datafusion/pull/947) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([adsharma](https://github.com/adsharma)) -- fixes \#933 replace placeholder fmt_as fr ExecutionPlan impls [\#939](https://github.com/apache/arrow-datafusion/pull/939) ([tiphaineruy](https://github.com/tiphaineruy)) -- Add metrics for SortExect + HashAggregateExec [\#938](https://github.com/apache/arrow-datafusion/pull/938) ([alamb](https://github.com/alamb)) -- Add some additional asserts in `utils::from_plan` [\#930](https://github.com/apache/arrow-datafusion/pull/930) ([alamb](https://github.com/alamb)) -- Avro Table Provider [\#910](https://github.com/apache/arrow-datafusion/pull/910) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([Igosuki](https://github.com/Igosuki)) -- Add BaselineMetrics, Timestamp metrics, add for `CoalescePartitionsExec`, rename output_time -\> elapsed_compute [\#909](https://github.com/apache/arrow-datafusion/pull/909) ([alamb](https://github.com/alamb)) -- add cross join support to ballista [\#891](https://github.com/apache/arrow-datafusion/pull/891) ([houqp](https://github.com/houqp)) -- Add Ballista support to DataFusion CLI [\#889](https://github.com/apache/arrow-datafusion/pull/889) ([andygrove](https://github.com/andygrove)) -- support like on DictionaryArray [\#876](https://github.com/apache/arrow-datafusion/pull/876) ([b41sh](https://github.com/b41sh)) -- Register table based on known schema without file IO [\#872](https://github.com/apache/arrow-datafusion/pull/872) ([Dandandan](https://github.com/Dandandan)) -- Add support for PostgreSQL regex match [\#870](https://github.com/apache/arrow-datafusion/pull/870) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([b41sh](https://github.com/b41sh)) -- Include planning time in datafusion-cli printing [\#860](https://github.com/apache/arrow-datafusion/pull/860) ([Dandandan](https://github.com/Dandandan)) -- Implement basic common subexpression eliminate optimization [\#792](https://github.com/apache/arrow-datafusion/pull/792) ([waynexia](https://github.com/waynexia)) -- Impl `ops::Not` for `expr` [\#763](https://github.com/apache/arrow-datafusion/pull/763) ([Jimexist](https://github.com/Jimexist)) +- Add booleans support to the `CASE` statement [\#1156](https://github.com/apache/datafusion/issues/1156) +- Implement General Purpose Constant Folding with the Expression Evaluator [\#1070](https://github.com/apache/datafusion/issues/1070) +- Mark volatility categories of functions [\#1069](https://github.com/apache/datafusion/issues/1069) +- Add "show" support to DataFrame API [\#937](https://github.com/apache/datafusion/issues/937) +- Add support for TRIM BOTH/LEADING/TRAILING [\#935](https://github.com/apache/datafusion/issues/935) +- Add "baseline" metrics to all built in operators [\#866](https://github.com/apache/datafusion/issues/866) +- Add SQL support for referencing fields in structs [\#119](https://github.com/apache/datafusion/issues/119) +- add filename completer for create table statement [\#1278](https://github.com/apache/datafusion/pull/1278) ([Jimexist](https://github.com/Jimexist)) +- Add drop table support [\#1266](https://github.com/apache/datafusion/pull/1266) [[sql](https://github.com/apache/datafusion/labels/sql)] ([viirya](https://github.com/viirya)) +- Dataframe supports except and update readme [\#1261](https://github.com/apache/datafusion/pull/1261) ([xudong963](https://github.com/xudong963)) +- Implement EXCEPT & EXCEPT DISTINCT [\#1259](https://github.com/apache/datafusion/pull/1259) [[sql](https://github.com/apache/datafusion/labels/sql)] ([xudong963](https://github.com/xudong963)) +- Add DataFrame support for `INTERSECT` and update readme [\#1258](https://github.com/apache/datafusion/pull/1258) ([xudong963](https://github.com/xudong963)) +- use arrow 6.1.0 [\#1255](https://github.com/apache/datafusion/pull/1255) ([Jimexist](https://github.com/Jimexist)) +- fix 1250, add editor support for datafusion cli with validation [\#1251](https://github.com/apache/datafusion/pull/1251) ([Jimexist](https://github.com/Jimexist)) +- Add support for `create table as` via MemTable [\#1243](https://github.com/apache/datafusion/pull/1243) [[sql](https://github.com/apache/datafusion/labels/sql)] ([Dandandan](https://github.com/Dandandan)) +- Add cli show columns command to describe tables [\#1231](https://github.com/apache/datafusion/pull/1231) ([Jimexist](https://github.com/Jimexist)) +- datafusion-cli to add list table command [\#1229](https://github.com/apache/datafusion/pull/1229) ([Jimexist](https://github.com/Jimexist)) +- datafusion cli to handle EoF and interrupt signal [\#1225](https://github.com/apache/datafusion/pull/1225) ([Jimexist](https://github.com/Jimexist)) +- add \q as quit command and add \? for help [\#1224](https://github.com/apache/datafusion/pull/1224) ([Jimexist](https://github.com/Jimexist)) +- Add algebraic simplifications to constant_folding [\#1208](https://github.com/apache/datafusion/pull/1208) ([matthewmturner](https://github.com/matthewmturner)) +- Improve GetIndexedFieldExpr adding utf8 key based access for struct v… [\#1204](https://github.com/apache/datafusion/pull/1204) [[sql](https://github.com/apache/datafusion/labels/sql)] ([Igosuki](https://github.com/Igosuki)) +- Fix `between` in select query [\#1202](https://github.com/apache/datafusion/pull/1202) [[sql](https://github.com/apache/datafusion/labels/sql)] ([capkurmagati](https://github.com/capkurmagati)) +- Move code to fold Stable functions like `now()` from `Simplifier` to `ConstEvaluator` [\#1176](https://github.com/apache/datafusion/pull/1176) ([alamb](https://github.com/alamb)) +- DataFrame supports window function [\#1167](https://github.com/apache/datafusion/pull/1167) [[sql](https://github.com/apache/datafusion/labels/sql)] ([xudong963](https://github.com/xudong963)) +- add values list expression [\#1165](https://github.com/apache/datafusion/pull/1165) [[sql](https://github.com/apache/datafusion/labels/sql)] ([Jimexist](https://github.com/Jimexist)) +- Add booleans support to the CASE statement [\#1161](https://github.com/apache/datafusion/pull/1161) ([xudong963](https://github.com/xudong963)) +- Improve error messages when operations are not supported [\#1158](https://github.com/apache/datafusion/pull/1158) ([alamb](https://github.com/alamb)) +- Generic constant expression evaluation [\#1153](https://github.com/apache/datafusion/pull/1153) ([alamb](https://github.com/alamb)) +- python `lit` function to support bool and byte vec [\#1152](https://github.com/apache/datafusion/pull/1152) ([Jimexist](https://github.com/Jimexist)) +- \[nit\] simplify datafusion optimizer module codes [\#1146](https://github.com/apache/datafusion/pull/1146) ([panarch](https://github.com/panarch)) +- Add ScalarValue support for arbitrary list elements [\#1142](https://github.com/apache/datafusion/pull/1142) ([jonmmease](https://github.com/jonmmease)) +- Multiple files per partitions for CSV Avro Json [\#1138](https://github.com/apache/datafusion/pull/1138) ([rdettai](https://github.com/rdettai)) +- Implement INTERSECT & INTERSECT DISTINCT [\#1135](https://github.com/apache/datafusion/pull/1135) [[sql](https://github.com/apache/datafusion/labels/sql)] ([xudong963](https://github.com/xudong963)) +- Simplify file struct abstractions [\#1120](https://github.com/apache/datafusion/pull/1120) ([rdettai](https://github.com/rdettai)) +- Implement `is [not] distinct from` [\#1117](https://github.com/apache/datafusion/pull/1117) [[sql](https://github.com/apache/datafusion/labels/sql)] ([Dandandan](https://github.com/Dandandan)) +- Clean up spawned task on drop for `RepartitionExec`, `SortPreservingMergeExec`, `WindowAggExec` [\#1112](https://github.com/apache/datafusion/pull/1112) ([crepererum](https://github.com/crepererum)) +- add hyperloglog implementation \(`add` and `count`\) [\#1095](https://github.com/apache/datafusion/pull/1095) ([Jimexist](https://github.com/Jimexist)) +- Add ScalarValue::Struct variant [\#1091](https://github.com/apache/datafusion/pull/1091) ([jonmmease](https://github.com/jonmmease)) +- add digest\(utf8, method\) function and refactor all current hash digest functions [\#1090](https://github.com/apache/datafusion/pull/1090) ([Jimexist](https://github.com/Jimexist)) +- \[crypto\] add `blake3` algorithm to `digest` function [\#1086](https://github.com/apache/datafusion/pull/1086) ([Jimexist](https://github.com/Jimexist)) +- \[crypto\] add blake2b and blake2s functions [\#1081](https://github.com/apache/datafusion/pull/1081) ([Jimexist](https://github.com/Jimexist)) +- \[nit\] make schema qualifier error message in field lookup more readable [\#1079](https://github.com/apache/datafusion/pull/1079) ([Jimexist](https://github.com/Jimexist)) +- \[window function\] add `percent_rank` window function [\#1077](https://github.com/apache/datafusion/pull/1077) ([Jimexist](https://github.com/Jimexist)) +- \[window function\] add `cume_dist` implementation [\#1076](https://github.com/apache/datafusion/pull/1076) ([Jimexist](https://github.com/Jimexist)) +- Add a LogicalPlanBuilder::schema\(\) function [\#1075](https://github.com/apache/datafusion/pull/1075) ([alamb](https://github.com/alamb)) +- Add support for UNION \[DISTINCT\] sql [\#1068](https://github.com/apache/datafusion/pull/1068) [[sql](https://github.com/apache/datafusion/labels/sql)] ([xudong963](https://github.com/xudong963)) +- fix: fix joins on Float32/Float64 columns bug [\#1054](https://github.com/apache/datafusion/pull/1054) ([francis-du](https://github.com/francis-du)) +- Update sqlparser-rs to 0.11 [\#1052](https://github.com/apache/datafusion/pull/1052) [[sql](https://github.com/apache/datafusion/labels/sql)] ([alamb](https://github.com/alamb)) +- Support querying CSV files without providing the schema [\#1050](https://github.com/apache/datafusion/pull/1050) [[sql](https://github.com/apache/datafusion/labels/sql)] ([xudong963](https://github.com/xudong963)) +- remove hard coded partition count in ballista logicalplan deserialization [\#1044](https://github.com/apache/datafusion/pull/1044) ([xudong963](https://github.com/xudong963)) +- feat: add lit_timestamp_nanosecond [\#1030](https://github.com/apache/datafusion/pull/1030) ([NGA-TRAN](https://github.com/NGA-TRAN)) +- Ignore metadata on schema merge [\#1024](https://github.com/apache/datafusion/pull/1024) ([Smurphy000](https://github.com/Smurphy000)) +- add ExecutionConfig.with_optimizer_rules [\#1022](https://github.com/apache/datafusion/pull/1022) ([seddonm1](https://github.com/seddonm1)) +- Add baseline execution stats to `WindowAggExec` and `UnionExec`, and fixup `CoalescePartitionsExec` [\#1018](https://github.com/apache/datafusion/pull/1018) ([alamb](https://github.com/alamb)) +- Derive PartialOrd for Expr [\#1015](https://github.com/apache/datafusion/pull/1015) ([alamb](https://github.com/alamb)) +- Indexed field access for List [\#1006](https://github.com/apache/datafusion/pull/1006) [[sql](https://github.com/apache/datafusion/labels/sql)] ([Igosuki](https://github.com/Igosuki)) +- Add metrics for Limit and Projection, and CoalesceBatches [\#1004](https://github.com/apache/datafusion/pull/1004) ([alamb](https://github.com/alamb)) +- Update DataFusion to arrow 6.0 [\#984](https://github.com/apache/datafusion/pull/984) ([alamb](https://github.com/alamb)) +- Implement Display for Expr, improve operator display [\#971](https://github.com/apache/datafusion/pull/971) [[sql](https://github.com/apache/datafusion/labels/sql)] ([matthewmturner](https://github.com/matthewmturner)) +- Add metrics for FilterExec [\#960](https://github.com/apache/datafusion/pull/960) ([alamb](https://github.com/alamb)) +- Change compound column field name rules [\#952](https://github.com/apache/datafusion/pull/952) ([waynexia](https://github.com/waynexia)) +- ObjectStore API to read from remote storage systems [\#950](https://github.com/apache/datafusion/pull/950) ([yjshen](https://github.com/yjshen)) +- Add baseline metrics to `SortPreservingMergeExec` [\#948](https://github.com/apache/datafusion/pull/948) ([alamb](https://github.com/alamb)) +- Add support for TRIM LEADING/TRAILING/BOTH syntax [\#947](https://github.com/apache/datafusion/pull/947) [[sql](https://github.com/apache/datafusion/labels/sql)] ([adsharma](https://github.com/adsharma)) +- fixes \#933 replace placeholder fmt_as fr ExecutionPlan impls [\#939](https://github.com/apache/datafusion/pull/939) ([tiphaineruy](https://github.com/tiphaineruy)) +- Add metrics for SortExect + HashAggregateExec [\#938](https://github.com/apache/datafusion/pull/938) ([alamb](https://github.com/alamb)) +- Add some additional asserts in `utils::from_plan` [\#930](https://github.com/apache/datafusion/pull/930) ([alamb](https://github.com/alamb)) +- Avro Table Provider [\#910](https://github.com/apache/datafusion/pull/910) [[sql](https://github.com/apache/datafusion/labels/sql)] ([Igosuki](https://github.com/Igosuki)) +- Add BaselineMetrics, Timestamp metrics, add for `CoalescePartitionsExec`, rename output_time -\> elapsed_compute [\#909](https://github.com/apache/datafusion/pull/909) ([alamb](https://github.com/alamb)) +- add cross join support to ballista [\#891](https://github.com/apache/datafusion/pull/891) ([houqp](https://github.com/houqp)) +- Add Ballista support to DataFusion CLI [\#889](https://github.com/apache/datafusion/pull/889) ([andygrove](https://github.com/andygrove)) +- support like on DictionaryArray [\#876](https://github.com/apache/datafusion/pull/876) ([b41sh](https://github.com/b41sh)) +- Register table based on known schema without file IO [\#872](https://github.com/apache/datafusion/pull/872) ([Dandandan](https://github.com/Dandandan)) +- Add support for PostgreSQL regex match [\#870](https://github.com/apache/datafusion/pull/870) [[sql](https://github.com/apache/datafusion/labels/sql)] ([b41sh](https://github.com/b41sh)) +- Include planning time in datafusion-cli printing [\#860](https://github.com/apache/datafusion/pull/860) ([Dandandan](https://github.com/Dandandan)) +- Implement basic common subexpression eliminate optimization [\#792](https://github.com/apache/datafusion/pull/792) ([waynexia](https://github.com/waynexia)) +- Impl `ops::Not` for `expr` [\#763](https://github.com/apache/datafusion/pull/763) ([Jimexist](https://github.com/Jimexist)) **Fixed bugs:** -- Can not use `between` in the select list: [\#1196](https://github.com/apache/arrow-datafusion/issues/1196) -- ORDER BY does not work with literals: Sort operation is not applicable to scalar value 'foo' [\#1195](https://github.com/apache/arrow-datafusion/issues/1195) -- window functions with NULL literals in `partition by` and `order by` do not work: Internal\("Sort operation is not applicable to scalar value NULL"\) [\#1194](https://github.com/apache/arrow-datafusion/issues/1194) -- Operation name not included in internal errors -- Internal\("Data type Boolean not supported for binary operation on dyn arrays"\) [\#1157](https://github.com/apache/arrow-datafusion/issues/1157) -- Physical plan explain UNION query says "ExecutionPlan\(PlaceHolder\)" [\#933](https://github.com/apache/arrow-datafusion/issues/933) -- Can not use LIKE on DictionaryArray encoded strings [\#815](https://github.com/apache/arrow-datafusion/issues/815) -- physical_plan::repartition::tests::repartition_with_dropping_output_stream failing locally [\#614](https://github.com/apache/arrow-datafusion/issues/614) -- Fix some `BuiltinScalarFunction` panics with zero arguments [\#1249](https://github.com/apache/arrow-datafusion/pull/1249) ([capkurmagati](https://github.com/capkurmagati)) -- fix: not do boolean folding on NULL and/or expr [\#1245](https://github.com/apache/arrow-datafusion/pull/1245) ([NGA-TRAN](https://github.com/NGA-TRAN)) -- ignore case of `with header row` in sql when creating external table [\#1237](https://github.com/apache/arrow-datafusion/pull/1237) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([lichuan6](https://github.com/lichuan6)) -- fix: Min/Max aggregation data type should not be dictionary [\#1235](https://github.com/apache/arrow-datafusion/pull/1235) ([NGA-TRAN](https://github.com/NGA-TRAN)) -- Fix build with `--no-default-features` [\#1219](https://github.com/apache/arrow-datafusion/pull/1219) ([alamb](https://github.com/alamb)) -- Prevent "future cannot be sent between threads safely" compilation error [\#1155](https://github.com/apache/arrow-datafusion/pull/1155) ([jonmmease](https://github.com/jonmmease)) -- Clean up spawned task on drop for `AnalyzeExec`, `CoalescePartitionsExec`, `HashAggregateExec` [\#1121](https://github.com/apache/arrow-datafusion/pull/1121) ([crepererum](https://github.com/crepererum)) -- Clean up spawned task on `SortStream` drop [\#1105](https://github.com/apache/arrow-datafusion/pull/1105) ([crepererum](https://github.com/crepererum)) -- fix UNION ALL bug: thread 'main' panicked at 'index out of bounds: the len is 1 but the index is 1', ./src/datatypes/schema.rs:165:10 [\#1088](https://github.com/apache/arrow-datafusion/pull/1088) ([xudong963](https://github.com/xudong963)) -- python: fix generated table name in dataframe creation [\#1078](https://github.com/apache/arrow-datafusion/pull/1078) ([houqp](https://github.com/houqp)) -- fix subquery alias [\#1067](https://github.com/apache/arrow-datafusion/pull/1067) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([xudong963](https://github.com/xudong963)) -- fix pattern handling in regexp_match function [\#1065](https://github.com/apache/arrow-datafusion/pull/1065) ([houqp](https://github.com/houqp)) -- fix: joins on Timestamp columns [\#1055](https://github.com/apache/arrow-datafusion/pull/1055) ([francis-du](https://github.com/francis-du)) -- Fix metric name typo [\#943](https://github.com/apache/arrow-datafusion/pull/943) ([alamb](https://github.com/alamb)) -- EXPLAIN ANALYZE should run all Optimizer passes [\#929](https://github.com/apache/arrow-datafusion/pull/929) ([alamb](https://github.com/alamb)) +- Can not use `between` in the select list: [\#1196](https://github.com/apache/datafusion/issues/1196) +- ORDER BY does not work with literals: Sort operation is not applicable to scalar value 'foo' [\#1195](https://github.com/apache/datafusion/issues/1195) +- window functions with NULL literals in `partition by` and `order by` do not work: Internal\("Sort operation is not applicable to scalar value NULL"\) [\#1194](https://github.com/apache/datafusion/issues/1194) +- Operation name not included in internal errors -- Internal\("Data type Boolean not supported for binary operation on dyn arrays"\) [\#1157](https://github.com/apache/datafusion/issues/1157) +- Physical plan explain UNION query says "ExecutionPlan\(PlaceHolder\)" [\#933](https://github.com/apache/datafusion/issues/933) +- Can not use LIKE on DictionaryArray encoded strings [\#815](https://github.com/apache/datafusion/issues/815) +- physical_plan::repartition::tests::repartition_with_dropping_output_stream failing locally [\#614](https://github.com/apache/datafusion/issues/614) +- Fix some `BuiltinScalarFunction` panics with zero arguments [\#1249](https://github.com/apache/datafusion/pull/1249) ([capkurmagati](https://github.com/capkurmagati)) +- fix: not do boolean folding on NULL and/or expr [\#1245](https://github.com/apache/datafusion/pull/1245) ([NGA-TRAN](https://github.com/NGA-TRAN)) +- ignore case of `with header row` in sql when creating external table [\#1237](https://github.com/apache/datafusion/pull/1237) [[sql](https://github.com/apache/datafusion/labels/sql)] ([lichuan6](https://github.com/lichuan6)) +- fix: Min/Max aggregation data type should not be dictionary [\#1235](https://github.com/apache/datafusion/pull/1235) ([NGA-TRAN](https://github.com/NGA-TRAN)) +- Fix build with `--no-default-features` [\#1219](https://github.com/apache/datafusion/pull/1219) ([alamb](https://github.com/alamb)) +- Prevent "future cannot be sent between threads safely" compilation error [\#1155](https://github.com/apache/datafusion/pull/1155) ([jonmmease](https://github.com/jonmmease)) +- Clean up spawned task on drop for `AnalyzeExec`, `CoalescePartitionsExec`, `HashAggregateExec` [\#1121](https://github.com/apache/datafusion/pull/1121) ([crepererum](https://github.com/crepererum)) +- Clean up spawned task on `SortStream` drop [\#1105](https://github.com/apache/datafusion/pull/1105) ([crepererum](https://github.com/crepererum)) +- fix UNION ALL bug: thread 'main' panicked at 'index out of bounds: the len is 1 but the index is 1', ./src/datatypes/schema.rs:165:10 [\#1088](https://github.com/apache/datafusion/pull/1088) ([xudong963](https://github.com/xudong963)) +- python: fix generated table name in dataframe creation [\#1078](https://github.com/apache/datafusion/pull/1078) ([houqp](https://github.com/houqp)) +- fix subquery alias [\#1067](https://github.com/apache/datafusion/pull/1067) [[sql](https://github.com/apache/datafusion/labels/sql)] ([xudong963](https://github.com/xudong963)) +- fix pattern handling in regexp_match function [\#1065](https://github.com/apache/datafusion/pull/1065) ([houqp](https://github.com/houqp)) +- fix: joins on Timestamp columns [\#1055](https://github.com/apache/datafusion/pull/1055) ([francis-du](https://github.com/francis-du)) +- Fix metric name typo [\#943](https://github.com/apache/datafusion/pull/943) ([alamb](https://github.com/alamb)) +- EXPLAIN ANALYZE should run all Optimizer passes [\#929](https://github.com/apache/datafusion/pull/929) ([alamb](https://github.com/alamb)) **Documentation updates:** -- update docs to fix DataFusion User Guide link [\#1238](https://github.com/apache/arrow-datafusion/pull/1238) ([jiangzhx](https://github.com/jiangzhx)) -- \[docs\] datafusion cli run via homebrew [\#1198](https://github.com/apache/arrow-datafusion/pull/1198) ([Jimexist](https://github.com/Jimexist)) -- add support for unary and binary values in values list, update docs [\#1172](https://github.com/apache/arrow-datafusion/pull/1172) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([Jimexist](https://github.com/Jimexist)) -- Add additional docstring comments to `from_plan` [\#1168](https://github.com/apache/arrow-datafusion/pull/1168) ([alamb](https://github.com/alamb)) -- \[nit\] fix document issue for `approx_distinct` [\#1110](https://github.com/apache/arrow-datafusion/pull/1110) ([Jimexist](https://github.com/Jimexist)) -- implement `approx_distinct` function using HyperLogLog [\#1087](https://github.com/apache/arrow-datafusion/pull/1087) ([Jimexist](https://github.com/Jimexist)) -- Remove unused `use` statements from examples [\#1032](https://github.com/apache/arrow-datafusion/pull/1032) ([alamb](https://github.com/alamb)) -- consolidate datafusion docs with sphinx [\#993](https://github.com/apache/arrow-datafusion/pull/993) ([houqp](https://github.com/houqp)) -- Updated user-guide library docs with optimized config [\#976](https://github.com/apache/arrow-datafusion/pull/976) ([matthewmturner](https://github.com/matthewmturner)) -- Improve User Guide [\#954](https://github.com/apache/arrow-datafusion/pull/954) ([andygrove](https://github.com/andygrove)) -- \[MINOR\] Fix typos in doc comments [\#945](https://github.com/apache/arrow-datafusion/pull/945) ([alamb](https://github.com/alamb)) -- \[DataFusion\] - Add show and show_limit function for DataFrame [\#923](https://github.com/apache/arrow-datafusion/pull/923) ([francis-du](https://github.com/francis-du)) -- Typo fix in DataFusion crate documentation [\#914](https://github.com/apache/arrow-datafusion/pull/914) ([antoinewdg](https://github.com/antoinewdg)) +- update docs to fix DataFusion User Guide link [\#1238](https://github.com/apache/datafusion/pull/1238) ([jiangzhx](https://github.com/jiangzhx)) +- \[docs\] datafusion cli run via homebrew [\#1198](https://github.com/apache/datafusion/pull/1198) ([Jimexist](https://github.com/Jimexist)) +- add support for unary and binary values in values list, update docs [\#1172](https://github.com/apache/datafusion/pull/1172) [[sql](https://github.com/apache/datafusion/labels/sql)] ([Jimexist](https://github.com/Jimexist)) +- Add additional docstring comments to `from_plan` [\#1168](https://github.com/apache/datafusion/pull/1168) ([alamb](https://github.com/alamb)) +- \[nit\] fix document issue for `approx_distinct` [\#1110](https://github.com/apache/datafusion/pull/1110) ([Jimexist](https://github.com/Jimexist)) +- implement `approx_distinct` function using HyperLogLog [\#1087](https://github.com/apache/datafusion/pull/1087) ([Jimexist](https://github.com/Jimexist)) +- Remove unused `use` statements from examples [\#1032](https://github.com/apache/datafusion/pull/1032) ([alamb](https://github.com/alamb)) +- consolidate datafusion docs with sphinx [\#993](https://github.com/apache/datafusion/pull/993) ([houqp](https://github.com/houqp)) +- Updated user-guide library docs with optimized config [\#976](https://github.com/apache/datafusion/pull/976) ([matthewmturner](https://github.com/matthewmturner)) +- Improve User Guide [\#954](https://github.com/apache/datafusion/pull/954) ([andygrove](https://github.com/andygrove)) +- \[MINOR\] Fix typos in doc comments [\#945](https://github.com/apache/datafusion/pull/945) ([alamb](https://github.com/alamb)) +- \[DataFusion\] - Add show and show_limit function for DataFrame [\#923](https://github.com/apache/datafusion/pull/923) ([francis-du](https://github.com/francis-du)) +- Typo fix in DataFusion crate documentation [\#914](https://github.com/apache/datafusion/pull/914) ([antoinewdg](https://github.com/antoinewdg)) **Performance improvements:** -- Improve avro reader performance by avoiding some cloning on avro_rs::Value [\#1206](https://github.com/apache/arrow-datafusion/pull/1206) ([Igosuki](https://github.com/Igosuki)) -- optimize build profile for datafusion python binding, cli and ballista [\#1137](https://github.com/apache/arrow-datafusion/pull/1137) ([houqp](https://github.com/houqp)) -- Avoid stack overflow by reducing stack usage of `BinaryExpr::evaluate` in debug builds [\#1047](https://github.com/apache/arrow-datafusion/pull/1047) ([alamb](https://github.com/alamb)) -- Add ScalarValue::eq_array optimized comparison function [\#844](https://github.com/apache/arrow-datafusion/pull/844) ([alamb](https://github.com/alamb)) -- Rework GroupByHash to for faster performance and support grouping by nulls [\#808](https://github.com/apache/arrow-datafusion/pull/808) ([alamb](https://github.com/alamb)) +- Improve avro reader performance by avoiding some cloning on avro_rs::Value [\#1206](https://github.com/apache/datafusion/pull/1206) ([Igosuki](https://github.com/Igosuki)) +- optimize build profile for datafusion python binding, cli and ballista [\#1137](https://github.com/apache/datafusion/pull/1137) ([houqp](https://github.com/houqp)) +- Avoid stack overflow by reducing stack usage of `BinaryExpr::evaluate` in debug builds [\#1047](https://github.com/apache/datafusion/pull/1047) ([alamb](https://github.com/alamb)) +- Add ScalarValue::eq_array optimized comparison function [\#844](https://github.com/apache/datafusion/pull/844) ([alamb](https://github.com/alamb)) +- Rework GroupByHash to for faster performance and support grouping by nulls [\#808](https://github.com/apache/datafusion/pull/808) ([alamb](https://github.com/alamb)) **Closed issues:** -- InList expr with NULL literals do not work [\#1190](https://github.com/apache/arrow-datafusion/issues/1190) -- update the homepage README to include values, `approx_distinct`, etc. [\#1171](https://github.com/apache/arrow-datafusion/issues/1171) -- \[Python\]: Inconsistencies with Python package name [\#1011](https://github.com/apache/arrow-datafusion/issues/1011) -- Wanting to contribute to project where to start? [\#983](https://github.com/apache/arrow-datafusion/issues/983) -- delete redundant code [\#973](https://github.com/apache/arrow-datafusion/issues/973) -- How to build DataFusion python wheel [\#853](https://github.com/apache/arrow-datafusion/issues/853) -- Add support for partition pruning [\#204](https://github.com/apache/arrow-datafusion/issues/204) -- \[Datafusion\] Support joins on TimestampMillisecond columns [\#187](https://github.com/apache/arrow-datafusion/issues/187) -- TPC-H Query 21 [\#173](https://github.com/apache/arrow-datafusion/issues/173) -- TPC-H Query 13 [\#164](https://github.com/apache/arrow-datafusion/issues/164) -- TPC-H Query 8 [\#162](https://github.com/apache/arrow-datafusion/issues/162) -- implement split_part\(string, delimiter, position\) [\#157](https://github.com/apache/arrow-datafusion/issues/157) -- Join Statement: Schema contains duplicate unqualified field name [\#155](https://github.com/apache/arrow-datafusion/issues/155) -- ParquetTable should avoid scanning all files twice [\#136](https://github.com/apache/arrow-datafusion/issues/136) -- Add support for reading partitioned Parquet files [\#133](https://github.com/apache/arrow-datafusion/issues/133) -- Add support for Parquet schema merging [\#132](https://github.com/apache/arrow-datafusion/issues/132) -- Catalog abstraction [\#126](https://github.com/apache/arrow-datafusion/issues/126) -- Optimizer rules should work with qualified column names [\#125](https://github.com/apache/arrow-datafusion/issues/125) -- Add optional qualifier to Expr::Column [\#121](https://github.com/apache/arrow-datafusion/issues/121) -- Implement modulus expression [\#99](https://github.com/apache/arrow-datafusion/issues/99) -- \[Rust\] Add constant folding to expressions during logically planning [\#98](https://github.com/apache/arrow-datafusion/issues/98) -- \[Rust\] Implement pretty print for physical query plan [\#93](https://github.com/apache/arrow-datafusion/issues/93) -- Can not group by boolean columns \(add boolean to valid keys of groupBy\) [\#91](https://github.com/apache/arrow-datafusion/issues/91) -- improve performance of building literal arrays [\#90](https://github.com/apache/arrow-datafusion/issues/90) -- \[rust\]\[datafusion\] optimize count\(\*\) queries on parquet sources [\#89](https://github.com/apache/arrow-datafusion/issues/89) -- Produce a design for a metrics framework [\#21](https://github.com/apache/arrow-datafusion/issues/21) +- InList expr with NULL literals do not work [\#1190](https://github.com/apache/datafusion/issues/1190) +- update the homepage README to include values, `approx_distinct`, etc. [\#1171](https://github.com/apache/datafusion/issues/1171) +- \[Python\]: Inconsistencies with Python package name [\#1011](https://github.com/apache/datafusion/issues/1011) +- Wanting to contribute to project where to start? [\#983](https://github.com/apache/datafusion/issues/983) +- delete redundant code [\#973](https://github.com/apache/datafusion/issues/973) +- How to build DataFusion python wheel [\#853](https://github.com/apache/datafusion/issues/853) +- Add support for partition pruning [\#204](https://github.com/apache/datafusion/issues/204) +- \[Datafusion\] Support joins on TimestampMillisecond columns [\#187](https://github.com/apache/datafusion/issues/187) +- TPC-H Query 21 [\#173](https://github.com/apache/datafusion/issues/173) +- TPC-H Query 13 [\#164](https://github.com/apache/datafusion/issues/164) +- TPC-H Query 8 [\#162](https://github.com/apache/datafusion/issues/162) +- implement split_part\(string, delimiter, position\) [\#157](https://github.com/apache/datafusion/issues/157) +- Join Statement: Schema contains duplicate unqualified field name [\#155](https://github.com/apache/datafusion/issues/155) +- ParquetTable should avoid scanning all files twice [\#136](https://github.com/apache/datafusion/issues/136) +- Add support for reading partitioned Parquet files [\#133](https://github.com/apache/datafusion/issues/133) +- Add support for Parquet schema merging [\#132](https://github.com/apache/datafusion/issues/132) +- Catalog abstraction [\#126](https://github.com/apache/datafusion/issues/126) +- Optimizer rules should work with qualified column names [\#125](https://github.com/apache/datafusion/issues/125) +- Add optional qualifier to Expr::Column [\#121](https://github.com/apache/datafusion/issues/121) +- Implement modulus expression [\#99](https://github.com/apache/datafusion/issues/99) +- \[Rust\] Add constant folding to expressions during logically planning [\#98](https://github.com/apache/datafusion/issues/98) +- \[Rust\] Implement pretty print for physical query plan [\#93](https://github.com/apache/datafusion/issues/93) +- Can not group by boolean columns \(add boolean to valid keys of groupBy\) [\#91](https://github.com/apache/datafusion/issues/91) +- improve performance of building literal arrays [\#90](https://github.com/apache/datafusion/issues/90) +- \[rust\]\[datafusion\] optimize count\(\*\) queries on parquet sources [\#89](https://github.com/apache/datafusion/issues/89) +- Produce a design for a metrics framework [\#21](https://github.com/apache/datafusion/issues/21) **Merged pull requests:** -- Add timezome string to stablize test [\#1265](https://github.com/apache/arrow-datafusion/pull/1265) ([viirya](https://github.com/viirya)) -- numerical_coercion pattern match optimize [\#1256](https://github.com/apache/arrow-datafusion/pull/1256) ([Jimexist](https://github.com/Jimexist)) -- fix and update window function sql tests [\#1059](https://github.com/apache/arrow-datafusion/pull/1059) ([Jimexist](https://github.com/Jimexist)) -- reduce ScalarValue from trait boilerplate with macro [\#989](https://github.com/apache/arrow-datafusion/pull/989) ([houqp](https://github.com/houqp)) +- Add timezome string to stablize test [\#1265](https://github.com/apache/datafusion/pull/1265) ([viirya](https://github.com/viirya)) +- numerical_coercion pattern match optimize [\#1256](https://github.com/apache/datafusion/pull/1256) ([Jimexist](https://github.com/Jimexist)) +- fix and update window function sql tests [\#1059](https://github.com/apache/datafusion/pull/1059) ([Jimexist](https://github.com/Jimexist)) +- reduce ScalarValue from trait boilerplate with macro [\#989](https://github.com/apache/datafusion/pull/989) ([houqp](https://github.com/houqp)) diff --git a/dev/changelog/7.0.0.md b/dev/changelog/7.0.0.md index 4d2606d7bfbe..adaa22917074 100644 --- a/dev/changelog/7.0.0.md +++ b/dev/changelog/7.0.0.md @@ -17,294 +17,294 @@ under the License. --> -## [7.0.0](https://github.com/apache/arrow-datafusion/tree/7.0.0) (2022-02-14) +## [7.0.0](https://github.com/apache/datafusion/tree/7.0.0) (2022-02-14) -[Full Changelog](https://github.com/apache/arrow-datafusion/compare/6.0.0...7.0.0) +[Full Changelog](https://github.com/apache/datafusion/compare/6.0.0...7.0.0) **Breaking changes:** -- Consolidate various configurations options, remove unrelated `batch_size` [\#1565](https://github.com/apache/arrow-datafusion/issues/1565) -- Extract logical plans in LogicalPlan as independent struct [\#1228](https://github.com/apache/arrow-datafusion/issues/1228) -- Update `ExecutionPlan` to know about sortedness and repartitioning optimizer pass respect the invariants [\#1776](https://github.com/apache/arrow-datafusion/pull/1776) ([alamb](https://github.com/alamb)) -- Update to `arrow 8.0.0` [\#1673](https://github.com/apache/arrow-datafusion/pull/1673) ([alamb](https://github.com/alamb)) -- Remove non idiomatic `DataFusionError::into_arrow_external_error` in favor of From conversion [\#1645](https://github.com/apache/arrow-datafusion/pull/1645) ([alamb](https://github.com/alamb)) -- Remove `Accumulator::update` and `Accumulator::merge` [\#1582](https://github.com/apache/arrow-datafusion/pull/1582) ([Jimexist](https://github.com/Jimexist)) -- implement `Hash` for various types and replace `PartialOrd` [\#1580](https://github.com/apache/arrow-datafusion/pull/1580) ([Jimexist](https://github.com/Jimexist)) -- Replace `DatafusionError` with `GenericError` in `ObjectStore` interface [\#1541](https://github.com/apache/arrow-datafusion/pull/1541) ([matthewmturner](https://github.com/matthewmturner)) -- Make `FLOAT` SQL type map to `Float32` rather than `Float64` [\#1423](https://github.com/apache/arrow-datafusion/pull/1423) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([liukun4515](https://github.com/liukun4515)) -- Map `REAL` SQL type to `Float32` rather than `Float64` to be consistent with pg [\#1390](https://github.com/apache/arrow-datafusion/pull/1390) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([hntd187](https://github.com/hntd187)) +- Consolidate various configurations options, remove unrelated `batch_size` [\#1565](https://github.com/apache/datafusion/issues/1565) +- Extract logical plans in LogicalPlan as independent struct [\#1228](https://github.com/apache/datafusion/issues/1228) +- Update `ExecutionPlan` to know about sortedness and repartitioning optimizer pass respect the invariants [\#1776](https://github.com/apache/datafusion/pull/1776) ([alamb](https://github.com/alamb)) +- Update to `arrow 8.0.0` [\#1673](https://github.com/apache/datafusion/pull/1673) ([alamb](https://github.com/alamb)) +- Remove non idiomatic `DataFusionError::into_arrow_external_error` in favor of From conversion [\#1645](https://github.com/apache/datafusion/pull/1645) ([alamb](https://github.com/alamb)) +- Remove `Accumulator::update` and `Accumulator::merge` [\#1582](https://github.com/apache/datafusion/pull/1582) ([Jimexist](https://github.com/Jimexist)) +- implement `Hash` for various types and replace `PartialOrd` [\#1580](https://github.com/apache/datafusion/pull/1580) ([Jimexist](https://github.com/Jimexist)) +- Replace `DatafusionError` with `GenericError` in `ObjectStore` interface [\#1541](https://github.com/apache/datafusion/pull/1541) ([matthewmturner](https://github.com/matthewmturner)) +- Make `FLOAT` SQL type map to `Float32` rather than `Float64` [\#1423](https://github.com/apache/datafusion/pull/1423) [[sql](https://github.com/apache/datafusion/labels/sql)] ([liukun4515](https://github.com/liukun4515)) +- Map `REAL` SQL type to `Float32` rather than `Float64` to be consistent with pg [\#1390](https://github.com/apache/datafusion/pull/1390) [[sql](https://github.com/apache/datafusion/labels/sql)] ([hntd187](https://github.com/hntd187)) **Implemented enhancements:** -- Create new `datafusion_expr` crate [\#1753](https://github.com/apache/arrow-datafusion/issues/1753) -- Create new `datafusion_common` crate [\#1752](https://github.com/apache/arrow-datafusion/issues/1752) -- API to get Expr's type and nullability without a `DFSchema` [\#1725](https://github.com/apache/arrow-datafusion/issues/1725) -- Cleaner API to create `Expr::ScalarFunction` programatically [\#1718](https://github.com/apache/arrow-datafusion/issues/1718) -- Introduce a `Vec` based row-wise representation for DataFusion [\#1708](https://github.com/apache/arrow-datafusion/issues/1708) -- Simplify creating new `ListingTable` [\#1705](https://github.com/apache/arrow-datafusion/issues/1705) -- Implement TableProvider for DataFrameImpl to allow registration of logical plans [\#1698](https://github.com/apache/arrow-datafusion/issues/1698) -- Public Expr simplification API [\#1694](https://github.com/apache/arrow-datafusion/issues/1694) -- Query Optimizer: Add OUTER --\> INNER join conversion [\#1670](https://github.com/apache/arrow-datafusion/issues/1670) -- Support reading from CSV, Avro and Json files that have mergeable/compatible, but not identical schemas [\#1669](https://github.com/apache/arrow-datafusion/issues/1669) -- Remove `DataFusionError::into_arrow_external_error` in favor of `From` conversion [\#1644](https://github.com/apache/arrow-datafusion/issues/1644) -- Include join type in display implementation for logical plan [\#1620](https://github.com/apache/arrow-datafusion/issues/1620) -- Switch datafusion to using `eq_dyn_scalar`, etc kernels [\#1610](https://github.com/apache/arrow-datafusion/issues/1610) -- Proposal: Remove `Accumulator::update` and `Accumulator::merge` [\#1549](https://github.com/apache/arrow-datafusion/issues/1549) -- Replace DataFusionError/Result with impl Error for ObjectStore and Reader [\#1540](https://github.com/apache/arrow-datafusion/issues/1540) -- Add `approx_quantile` support [\#1538](https://github.com/apache/arrow-datafusion/issues/1538) -- support sorting decimal data type [\#1522](https://github.com/apache/arrow-datafusion/issues/1522) -- Keep all datafusion's packages up to date with Dependabot [\#1472](https://github.com/apache/arrow-datafusion/issues/1472) -- ExecutionContext support init ExecutionContextState with `new(state: Arc>)` method [\#1439](https://github.com/apache/arrow-datafusion/issues/1439) -- support the decimal scalar value [\#1393](https://github.com/apache/arrow-datafusion/issues/1393) -- Documentation for using scalar functions with the DataFrame API [\#1364](https://github.com/apache/arrow-datafusion/issues/1364) -- Support `boolean == boolean` and `boolean != boolean` operators [\#1159](https://github.com/apache/arrow-datafusion/issues/1159) -- Support DataType::Decimal\(15, 2\) in TPC-H benchmark [\#174](https://github.com/apache/arrow-datafusion/issues/174) -- Make `MemoryStream` public [\#150](https://github.com/apache/arrow-datafusion/issues/150) -- Add support for Parquet schema merging [\#132](https://github.com/apache/arrow-datafusion/issues/132) -- Add SQL support for IN expression [\#118](https://github.com/apache/arrow-datafusion/issues/118) -- Add logging to datafusion-cli [\#1789](https://github.com/apache/arrow-datafusion/pull/1789) ([alamb](https://github.com/alamb)) -- Add `approx_median()` aggregate function [\#1729](https://github.com/apache/arrow-datafusion/pull/1729) ([realno](https://github.com/realno)) -- Add join type for logical plan display [\#1674](https://github.com/apache/arrow-datafusion/pull/1674) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([xudong963](https://github.com/xudong963)) -- Fix null comparison for Parquet pruning predicate [\#1595](https://github.com/apache/arrow-datafusion/pull/1595) ([viirya](https://github.com/viirya)) -- Add `corr` aggregate function [\#1561](https://github.com/apache/arrow-datafusion/pull/1561) ([realno](https://github.com/realno)) -- Add `covar`, `covar_pop` and `covar_samp` aggregate functions [\#1551](https://github.com/apache/arrow-datafusion/pull/1551) ([realno](https://github.com/realno)) -- Add `approx_quantile()` aggregation function [\#1539](https://github.com/apache/arrow-datafusion/pull/1539) ([domodwyer](https://github.com/domodwyer)) -- Initial MemoryManager and DiskManager APIs for query execution + External Sort implementation [\#1526](https://github.com/apache/arrow-datafusion/pull/1526) ([yjshen](https://github.com/yjshen)) -- Add `stddev` and `variance` [\#1525](https://github.com/apache/arrow-datafusion/pull/1525) ([realno](https://github.com/realno)) -- Add `rem` operation for Expr [\#1467](https://github.com/apache/arrow-datafusion/pull/1467) ([liukun4515](https://github.com/liukun4515)) -- support decimal data type in create table [\#1431](https://github.com/apache/arrow-datafusion/pull/1431) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([liukun4515](https://github.com/liukun4515)) -- Ordering by index in select expression [\#1419](https://github.com/apache/arrow-datafusion/pull/1419) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([hntd187](https://github.com/hntd187)) -- Add support for `ORDER BY` on unprojected columns [\#1415](https://github.com/apache/arrow-datafusion/pull/1415) ([viirya](https://github.com/viirya)) -- Support decimal for `min` and `max` aggregate [\#1407](https://github.com/apache/arrow-datafusion/pull/1407) ([liukun4515](https://github.com/liukun4515)) -- Consolidate `ConstantFolding` and `SimplifyExpression` [\#1375](https://github.com/apache/arrow-datafusion/pull/1375) ([alamb](https://github.com/alamb)) -- Datafusion cli quiet mode command to contain option bool [\#1345](https://github.com/apache/arrow-datafusion/pull/1345) ([Jimexist](https://github.com/Jimexist)) -- Implement `array_agg` aggregate function [\#1300](https://github.com/apache/arrow-datafusion/pull/1300) ([viirya](https://github.com/viirya)) -- Add a command to switch output format in cli [\#1284](https://github.com/apache/arrow-datafusion/pull/1284) ([capkurmagati](https://github.com/capkurmagati)) -- Support `=`, `<`, `<=`, `>`, `>=`, `!=`, `is distinct from`, `is not distinct from` for `BooleanArray` [\#1163](https://github.com/apache/arrow-datafusion/pull/1163) ([alamb](https://github.com/alamb)) +- Create new `datafusion_expr` crate [\#1753](https://github.com/apache/datafusion/issues/1753) +- Create new `datafusion_common` crate [\#1752](https://github.com/apache/datafusion/issues/1752) +- API to get Expr's type and nullability without a `DFSchema` [\#1725](https://github.com/apache/datafusion/issues/1725) +- Cleaner API to create `Expr::ScalarFunction` programatically [\#1718](https://github.com/apache/datafusion/issues/1718) +- Introduce a `Vec` based row-wise representation for DataFusion [\#1708](https://github.com/apache/datafusion/issues/1708) +- Simplify creating new `ListingTable` [\#1705](https://github.com/apache/datafusion/issues/1705) +- Implement TableProvider for DataFrameImpl to allow registration of logical plans [\#1698](https://github.com/apache/datafusion/issues/1698) +- Public Expr simplification API [\#1694](https://github.com/apache/datafusion/issues/1694) +- Query Optimizer: Add OUTER --\> INNER join conversion [\#1670](https://github.com/apache/datafusion/issues/1670) +- Support reading from CSV, Avro and Json files that have mergeable/compatible, but not identical schemas [\#1669](https://github.com/apache/datafusion/issues/1669) +- Remove `DataFusionError::into_arrow_external_error` in favor of `From` conversion [\#1644](https://github.com/apache/datafusion/issues/1644) +- Include join type in display implementation for logical plan [\#1620](https://github.com/apache/datafusion/issues/1620) +- Switch datafusion to using `eq_dyn_scalar`, etc kernels [\#1610](https://github.com/apache/datafusion/issues/1610) +- Proposal: Remove `Accumulator::update` and `Accumulator::merge` [\#1549](https://github.com/apache/datafusion/issues/1549) +- Replace DataFusionError/Result with impl Error for ObjectStore and Reader [\#1540](https://github.com/apache/datafusion/issues/1540) +- Add `approx_quantile` support [\#1538](https://github.com/apache/datafusion/issues/1538) +- support sorting decimal data type [\#1522](https://github.com/apache/datafusion/issues/1522) +- Keep all datafusion's packages up to date with Dependabot [\#1472](https://github.com/apache/datafusion/issues/1472) +- ExecutionContext support init ExecutionContextState with `new(state: Arc>)` method [\#1439](https://github.com/apache/datafusion/issues/1439) +- support the decimal scalar value [\#1393](https://github.com/apache/datafusion/issues/1393) +- Documentation for using scalar functions with the DataFrame API [\#1364](https://github.com/apache/datafusion/issues/1364) +- Support `boolean == boolean` and `boolean != boolean` operators [\#1159](https://github.com/apache/datafusion/issues/1159) +- Support DataType::Decimal\(15, 2\) in TPC-H benchmark [\#174](https://github.com/apache/datafusion/issues/174) +- Make `MemoryStream` public [\#150](https://github.com/apache/datafusion/issues/150) +- Add support for Parquet schema merging [\#132](https://github.com/apache/datafusion/issues/132) +- Add SQL support for IN expression [\#118](https://github.com/apache/datafusion/issues/118) +- Add logging to datafusion-cli [\#1789](https://github.com/apache/datafusion/pull/1789) ([alamb](https://github.com/alamb)) +- Add `approx_median()` aggregate function [\#1729](https://github.com/apache/datafusion/pull/1729) ([realno](https://github.com/realno)) +- Add join type for logical plan display [\#1674](https://github.com/apache/datafusion/pull/1674) [[sql](https://github.com/apache/datafusion/labels/sql)] ([xudong963](https://github.com/xudong963)) +- Fix null comparison for Parquet pruning predicate [\#1595](https://github.com/apache/datafusion/pull/1595) ([viirya](https://github.com/viirya)) +- Add `corr` aggregate function [\#1561](https://github.com/apache/datafusion/pull/1561) ([realno](https://github.com/realno)) +- Add `covar`, `covar_pop` and `covar_samp` aggregate functions [\#1551](https://github.com/apache/datafusion/pull/1551) ([realno](https://github.com/realno)) +- Add `approx_quantile()` aggregation function [\#1539](https://github.com/apache/datafusion/pull/1539) ([domodwyer](https://github.com/domodwyer)) +- Initial MemoryManager and DiskManager APIs for query execution + External Sort implementation [\#1526](https://github.com/apache/datafusion/pull/1526) ([yjshen](https://github.com/yjshen)) +- Add `stddev` and `variance` [\#1525](https://github.com/apache/datafusion/pull/1525) ([realno](https://github.com/realno)) +- Add `rem` operation for Expr [\#1467](https://github.com/apache/datafusion/pull/1467) ([liukun4515](https://github.com/liukun4515)) +- support decimal data type in create table [\#1431](https://github.com/apache/datafusion/pull/1431) [[sql](https://github.com/apache/datafusion/labels/sql)] ([liukun4515](https://github.com/liukun4515)) +- Ordering by index in select expression [\#1419](https://github.com/apache/datafusion/pull/1419) [[sql](https://github.com/apache/datafusion/labels/sql)] ([hntd187](https://github.com/hntd187)) +- Add support for `ORDER BY` on unprojected columns [\#1415](https://github.com/apache/datafusion/pull/1415) ([viirya](https://github.com/viirya)) +- Support decimal for `min` and `max` aggregate [\#1407](https://github.com/apache/datafusion/pull/1407) ([liukun4515](https://github.com/liukun4515)) +- Consolidate `ConstantFolding` and `SimplifyExpression` [\#1375](https://github.com/apache/datafusion/pull/1375) ([alamb](https://github.com/alamb)) +- Datafusion cli quiet mode command to contain option bool [\#1345](https://github.com/apache/datafusion/pull/1345) ([Jimexist](https://github.com/Jimexist)) +- Implement `array_agg` aggregate function [\#1300](https://github.com/apache/datafusion/pull/1300) ([viirya](https://github.com/viirya)) +- Add a command to switch output format in cli [\#1284](https://github.com/apache/datafusion/pull/1284) ([capkurmagati](https://github.com/capkurmagati)) +- Support `=`, `<`, `<=`, `>`, `>=`, `!=`, `is distinct from`, `is not distinct from` for `BooleanArray` [\#1163](https://github.com/apache/datafusion/pull/1163) ([alamb](https://github.com/alamb)) **Fixed bugs:** -- Unsupported data type in hasher: Timestamp\(Second, None\) [\#1768](https://github.com/apache/arrow-datafusion/issues/1768) -- SQL column identifiers should be converted to lowercase when unquoted [\#1746](https://github.com/apache/arrow-datafusion/issues/1746) -- Data type Dictionary\(Int32, Utf8\) not supported for binary operation 'eq' on dyn arrays [\#1605](https://github.com/apache/arrow-datafusion/issues/1605) -- datafusion doesn't process predicate pushdown correctly when there is outer join [\#1586](https://github.com/apache/arrow-datafusion/issues/1586) -- casting `Int64` to `Float64` unsuccessfully caused tpch8 to fail [\#1576](https://github.com/apache/arrow-datafusion/issues/1576) -- CTE/WITH .. UNION ALL confuses name resolution in WHERE [\#1509](https://github.com/apache/arrow-datafusion/issues/1509) -- ORDER BY min\(x\) results in error `Plan("No field named 'foo.x'. Valid fields are 'MIN(foo.x)'.")` [\#1479](https://github.com/apache/arrow-datafusion/issues/1479) -- Sort discards field metadata on the output schema [\#1476](https://github.com/apache/arrow-datafusion/issues/1476) -- Datafusion should not strip out timezone information from existing types [\#1454](https://github.com/apache/arrow-datafusion/issues/1454) -- Error on some queries: "column types must match schema types, expected XXX but found YYY" [\#1447](https://github.com/apache/arrow-datafusion/issues/1447) -- Query failing to return any results when filter is an equality check on strings \(bad statistics in parquet\) [\#1433](https://github.com/apache/arrow-datafusion/issues/1433) -- Field names containing period such as `f.c1` cannot be named in SQL query [\#1432](https://github.com/apache/arrow-datafusion/issues/1432) -- `Select *` returns an unexpected result [\#1412](https://github.com/apache/arrow-datafusion/issues/1412) -- Turn off unused default features of chrono and ahash [\#1398](https://github.com/apache/arrow-datafusion/issues/1398) -- real data type is float32 in PG database, but in the datafusion it is as float64 [\#1380](https://github.com/apache/arrow-datafusion/issues/1380) -- TPC-H q10 performance regression \(expression for filter with added alias is not pushed down\) [\#1367](https://github.com/apache/arrow-datafusion/issues/1367) -- ProjectionExec Loses Field Metadata [\#1361](https://github.com/apache/arrow-datafusion/issues/1361) -- Support Filter on unprojected columns [\#1351](https://github.com/apache/arrow-datafusion/issues/1351) -- NULLS ORDER is inconsistent with postgres [\#1343](https://github.com/apache/arrow-datafusion/issues/1343) -- Fix bug while merging `RecordBatch`, add `SortPreservingMerge` fuzz tester [\#1678](https://github.com/apache/arrow-datafusion/pull/1678) ([alamb](https://github.com/alamb)) -- fix a cte block with same name for many times [\#1639](https://github.com/apache/arrow-datafusion/pull/1639) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([xudong963](https://github.com/xudong963)) -- fix: casting Int64 to Float64 unsuccessfully caused tpch8 to fail [\#1601](https://github.com/apache/arrow-datafusion/pull/1601) ([xudong963](https://github.com/xudong963)) -- Fix single_distinct_to_groupby for arbitrary expressions [\#1519](https://github.com/apache/arrow-datafusion/pull/1519) ([james727](https://github.com/james727)) -- Fix SortExec discards field metadata on the output schema [\#1477](https://github.com/apache/arrow-datafusion/pull/1477) ([alamb](https://github.com/alamb)) -- fix calculate in many_to_many_hash_partition test. [\#1463](https://github.com/apache/arrow-datafusion/pull/1463) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- Add Timezone to Scalar::Time\* types, and better timezone awareness to Datafusion's time types [\#1455](https://github.com/apache/arrow-datafusion/pull/1455) ([maxburke](https://github.com/maxburke)) -- Support identifiers with `.` in them [\#1449](https://github.com/apache/arrow-datafusion/pull/1449) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) -- Fixes for working with functions in dataframes, additional documentation [\#1430](https://github.com/apache/arrow-datafusion/pull/1430) ([tobyhede](https://github.com/tobyhede)) -- \[Minor\] Fix `send_time` metric for hash-repartition [\#1421](https://github.com/apache/arrow-datafusion/pull/1421) ([Dandandan](https://github.com/Dandandan)) -- fix: Select \* returns an unexpected result [\#1413](https://github.com/apache/arrow-datafusion/pull/1413) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([xudong963](https://github.com/xudong963)) -- Make cli handle multiple whitespaces [\#1388](https://github.com/apache/arrow-datafusion/pull/1388) ([capkurmagati](https://github.com/capkurmagati)) -- Metadata is kept in projections for non-derived columns [\#1378](https://github.com/apache/arrow-datafusion/pull/1378) ([hntd187](https://github.com/hntd187)) -- Fix Predicate Pushdown: split_members should be able to split aliased predicate [\#1368](https://github.com/apache/arrow-datafusion/pull/1368) ([viirya](https://github.com/viirya)) -- Change the arg names and make parameters more meaningful [\#1357](https://github.com/apache/arrow-datafusion/pull/1357) ([liukun4515](https://github.com/liukun4515)) -- collect table stats by default for listing table [\#1347](https://github.com/apache/arrow-datafusion/pull/1347) ([houqp](https://github.com/houqp)) -- fix: make nulls-order consistent with postgres [\#1344](https://github.com/apache/arrow-datafusion/pull/1344) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([xudong963](https://github.com/xudong963)) -- Avoid changing expression names during constant folding [\#1319](https://github.com/apache/arrow-datafusion/pull/1319) ([viirya](https://github.com/viirya)) -- improve error message for invalid create table statement [\#1294](https://github.com/apache/arrow-datafusion/pull/1294) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([houqp](https://github.com/houqp)) -- Forbid creating the table with the same name [\#1288](https://github.com/apache/arrow-datafusion/pull/1288) ([liukun4515](https://github.com/liukun4515)) +- Unsupported data type in hasher: Timestamp\(Second, None\) [\#1768](https://github.com/apache/datafusion/issues/1768) +- SQL column identifiers should be converted to lowercase when unquoted [\#1746](https://github.com/apache/datafusion/issues/1746) +- Data type Dictionary\(Int32, Utf8\) not supported for binary operation 'eq' on dyn arrays [\#1605](https://github.com/apache/datafusion/issues/1605) +- datafusion doesn't process predicate pushdown correctly when there is outer join [\#1586](https://github.com/apache/datafusion/issues/1586) +- casting `Int64` to `Float64` unsuccessfully caused tpch8 to fail [\#1576](https://github.com/apache/datafusion/issues/1576) +- CTE/WITH .. UNION ALL confuses name resolution in WHERE [\#1509](https://github.com/apache/datafusion/issues/1509) +- ORDER BY min\(x\) results in error `Plan("No field named 'foo.x'. Valid fields are 'MIN(foo.x)'.")` [\#1479](https://github.com/apache/datafusion/issues/1479) +- Sort discards field metadata on the output schema [\#1476](https://github.com/apache/datafusion/issues/1476) +- Datafusion should not strip out timezone information from existing types [\#1454](https://github.com/apache/datafusion/issues/1454) +- Error on some queries: "column types must match schema types, expected XXX but found YYY" [\#1447](https://github.com/apache/datafusion/issues/1447) +- Query failing to return any results when filter is an equality check on strings \(bad statistics in parquet\) [\#1433](https://github.com/apache/datafusion/issues/1433) +- Field names containing period such as `f.c1` cannot be named in SQL query [\#1432](https://github.com/apache/datafusion/issues/1432) +- `Select *` returns an unexpected result [\#1412](https://github.com/apache/datafusion/issues/1412) +- Turn off unused default features of chrono and ahash [\#1398](https://github.com/apache/datafusion/issues/1398) +- real data type is float32 in PG database, but in the datafusion it is as float64 [\#1380](https://github.com/apache/datafusion/issues/1380) +- TPC-H q10 performance regression \(expression for filter with added alias is not pushed down\) [\#1367](https://github.com/apache/datafusion/issues/1367) +- ProjectionExec Loses Field Metadata [\#1361](https://github.com/apache/datafusion/issues/1361) +- Support Filter on unprojected columns [\#1351](https://github.com/apache/datafusion/issues/1351) +- NULLS ORDER is inconsistent with postgres [\#1343](https://github.com/apache/datafusion/issues/1343) +- Fix bug while merging `RecordBatch`, add `SortPreservingMerge` fuzz tester [\#1678](https://github.com/apache/datafusion/pull/1678) ([alamb](https://github.com/alamb)) +- fix a cte block with same name for many times [\#1639](https://github.com/apache/datafusion/pull/1639) [[sql](https://github.com/apache/datafusion/labels/sql)] ([xudong963](https://github.com/xudong963)) +- fix: casting Int64 to Float64 unsuccessfully caused tpch8 to fail [\#1601](https://github.com/apache/datafusion/pull/1601) ([xudong963](https://github.com/xudong963)) +- Fix single_distinct_to_groupby for arbitrary expressions [\#1519](https://github.com/apache/datafusion/pull/1519) ([james727](https://github.com/james727)) +- Fix SortExec discards field metadata on the output schema [\#1477](https://github.com/apache/datafusion/pull/1477) ([alamb](https://github.com/alamb)) +- fix calculate in many_to_many_hash_partition test. [\#1463](https://github.com/apache/datafusion/pull/1463) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- Add Timezone to Scalar::Time\* types, and better timezone awareness to Datafusion's time types [\#1455](https://github.com/apache/datafusion/pull/1455) ([maxburke](https://github.com/maxburke)) +- Support identifiers with `.` in them [\#1449](https://github.com/apache/datafusion/pull/1449) [[sql](https://github.com/apache/datafusion/labels/sql)] ([alamb](https://github.com/alamb)) +- Fixes for working with functions in dataframes, additional documentation [\#1430](https://github.com/apache/datafusion/pull/1430) ([tobyhede](https://github.com/tobyhede)) +- \[Minor\] Fix `send_time` metric for hash-repartition [\#1421](https://github.com/apache/datafusion/pull/1421) ([Dandandan](https://github.com/Dandandan)) +- fix: Select \* returns an unexpected result [\#1413](https://github.com/apache/datafusion/pull/1413) [[sql](https://github.com/apache/datafusion/labels/sql)] ([xudong963](https://github.com/xudong963)) +- Make cli handle multiple whitespaces [\#1388](https://github.com/apache/datafusion/pull/1388) ([capkurmagati](https://github.com/capkurmagati)) +- Metadata is kept in projections for non-derived columns [\#1378](https://github.com/apache/datafusion/pull/1378) ([hntd187](https://github.com/hntd187)) +- Fix Predicate Pushdown: split_members should be able to split aliased predicate [\#1368](https://github.com/apache/datafusion/pull/1368) ([viirya](https://github.com/viirya)) +- Change the arg names and make parameters more meaningful [\#1357](https://github.com/apache/datafusion/pull/1357) ([liukun4515](https://github.com/liukun4515)) +- collect table stats by default for listing table [\#1347](https://github.com/apache/datafusion/pull/1347) ([houqp](https://github.com/houqp)) +- fix: make nulls-order consistent with postgres [\#1344](https://github.com/apache/datafusion/pull/1344) [[sql](https://github.com/apache/datafusion/labels/sql)] ([xudong963](https://github.com/xudong963)) +- Avoid changing expression names during constant folding [\#1319](https://github.com/apache/datafusion/pull/1319) ([viirya](https://github.com/viirya)) +- improve error message for invalid create table statement [\#1294](https://github.com/apache/datafusion/pull/1294) [[sql](https://github.com/apache/datafusion/labels/sql)] ([houqp](https://github.com/houqp)) +- Forbid creating the table with the same name [\#1288](https://github.com/apache/datafusion/pull/1288) ([liukun4515](https://github.com/liukun4515)) **Documentation updates:** -- Clarify docs about `Accumulator::update` and `Accumulator::update_batch` [\#1542](https://github.com/apache/arrow-datafusion/pull/1542) ([alamb](https://github.com/alamb)) -- Fix duplicated `cargo run --example parquet_sql` [\#1482](https://github.com/apache/arrow-datafusion/pull/1482) ([sergey-melnychuk](https://github.com/sergey-melnychuk)) -- add documentation to Datafusion cli's new commands [\#1348](https://github.com/apache/arrow-datafusion/pull/1348) ([liukun4515](https://github.com/liukun4515)) -- fix some clippy warnings from nightly channel [\#1277](https://github.com/apache/arrow-datafusion/pull/1277) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([Jimexist](https://github.com/Jimexist)) +- Clarify docs about `Accumulator::update` and `Accumulator::update_batch` [\#1542](https://github.com/apache/datafusion/pull/1542) ([alamb](https://github.com/alamb)) +- Fix duplicated `cargo run --example parquet_sql` [\#1482](https://github.com/apache/datafusion/pull/1482) ([sergey-melnychuk](https://github.com/sergey-melnychuk)) +- add documentation to Datafusion cli's new commands [\#1348](https://github.com/apache/datafusion/pull/1348) ([liukun4515](https://github.com/liukun4515)) +- fix some clippy warnings from nightly channel [\#1277](https://github.com/apache/datafusion/pull/1277) [[sql](https://github.com/apache/datafusion/labels/sql)] ([Jimexist](https://github.com/Jimexist)) **Performance improvements:** -- Parquet pruning predicate for `IS NULL` [\#1591](https://github.com/apache/arrow-datafusion/issues/1591) -- Fix predicate pushdown for outer joins [\#1618](https://github.com/apache/arrow-datafusion/pull/1618) ([james727](https://github.com/james727)) -- fix: sql planner creates cross join instead of inner join from select predicates [\#1566](https://github.com/apache/arrow-datafusion/pull/1566) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([xudong963](https://github.com/xudong963)) -- Split fetch_metadata into fetch_statistics and fetch_schema [\#1365](https://github.com/apache/arrow-datafusion/pull/1365) ([Dandandan](https://github.com/Dandandan)) -- Optimize the performance queries with a single distinct aggregate [\#1315](https://github.com/apache/arrow-datafusion/pull/1315) ([ic4y](https://github.com/ic4y)) -- Left join could use bitmap for left join instead of Vec\ [\#1291](https://github.com/apache/arrow-datafusion/pull/1291) ([boazberman](https://github.com/boazberman)) +- Parquet pruning predicate for `IS NULL` [\#1591](https://github.com/apache/datafusion/issues/1591) +- Fix predicate pushdown for outer joins [\#1618](https://github.com/apache/datafusion/pull/1618) ([james727](https://github.com/james727)) +- fix: sql planner creates cross join instead of inner join from select predicates [\#1566](https://github.com/apache/datafusion/pull/1566) [[sql](https://github.com/apache/datafusion/labels/sql)] ([xudong963](https://github.com/xudong963)) +- Split fetch_metadata into fetch_statistics and fetch_schema [\#1365](https://github.com/apache/datafusion/pull/1365) ([Dandandan](https://github.com/Dandandan)) +- Optimize the performance queries with a single distinct aggregate [\#1315](https://github.com/apache/datafusion/pull/1315) ([ic4y](https://github.com/ic4y)) +- Left join could use bitmap for left join instead of Vec\ [\#1291](https://github.com/apache/datafusion/pull/1291) ([boazberman](https://github.com/boazberman)) **Closed issues:** -- Add `release compile` to CI [\#1728](https://github.com/apache/arrow-datafusion/issues/1728) -- DiskManager and TempFiles getting created several times per query [\#1690](https://github.com/apache/arrow-datafusion/issues/1690) -- Add a test for the `pyarrow` feature in CI [\#1635](https://github.com/apache/arrow-datafusion/issues/1635) -- SQL tests for when sorting exceeded available memory and had to spill to disk [\#1573](https://github.com/apache/arrow-datafusion/issues/1573) -- Consolidate the N-way merging code and `SortPreservingMergeStream` \(which has quite good tests of what is often quite tricky code, and it will be performance critical\) [\#1572](https://github.com/apache/arrow-datafusion/issues/1572) -- Consolidate the `SortExec` code \(so there is only a single sort operator that does in memory sorting if it has enough memory budget but then spills to disk if needed\). [\#1571](https://github.com/apache/arrow-datafusion/issues/1571) -- Track memory usage in Non Limited Operators [\#1569](https://github.com/apache/arrow-datafusion/issues/1569) -- \[Question\] Why does ballista store tables in the client instead of in the SchedulerServer [\#1473](https://github.com/apache/arrow-datafusion/issues/1473) -- Consolidate Projection for Schema and RecordBatch [\#1425](https://github.com/apache/arrow-datafusion/issues/1425) -- Support Sort on unprojected columns [\#1372](https://github.com/apache/arrow-datafusion/issues/1372) -- Unused code in hash_aggregate [\#1362](https://github.com/apache/arrow-datafusion/issues/1362) -- Why use the expr types before coercion to get the result type? [\#1358](https://github.com/apache/arrow-datafusion/issues/1358) -- A problem about the projection_push_down optimizer gathers valid columns [\#1312](https://github.com/apache/arrow-datafusion/issues/1312) -- apply constant folding to `LogicalPlan::Values` [\#1170](https://github.com/apache/arrow-datafusion/issues/1170) -- reduce usage of `IntoIterator` in logical plan builder window fn [\#372](https://github.com/apache/arrow-datafusion/issues/372) -- Why does DataFusion throw a Tokio 0.2 runtime error? [\#176](https://github.com/apache/arrow-datafusion/issues/176) -- TPC-H Query 14 [\#165](https://github.com/apache/arrow-datafusion/issues/165) -- Length kernel returns bytes not character length [\#156](https://github.com/apache/arrow-datafusion/issues/156) -- Split the logical operators out into separate source files [\#115](https://github.com/apache/arrow-datafusion/issues/115) +- Add `release compile` to CI [\#1728](https://github.com/apache/datafusion/issues/1728) +- DiskManager and TempFiles getting created several times per query [\#1690](https://github.com/apache/datafusion/issues/1690) +- Add a test for the `pyarrow` feature in CI [\#1635](https://github.com/apache/datafusion/issues/1635) +- SQL tests for when sorting exceeded available memory and had to spill to disk [\#1573](https://github.com/apache/datafusion/issues/1573) +- Consolidate the N-way merging code and `SortPreservingMergeStream` \(which has quite good tests of what is often quite tricky code, and it will be performance critical\) [\#1572](https://github.com/apache/datafusion/issues/1572) +- Consolidate the `SortExec` code \(so there is only a single sort operator that does in memory sorting if it has enough memory budget but then spills to disk if needed\). [\#1571](https://github.com/apache/datafusion/issues/1571) +- Track memory usage in Non Limited Operators [\#1569](https://github.com/apache/datafusion/issues/1569) +- \[Question\] Why does ballista store tables in the client instead of in the SchedulerServer [\#1473](https://github.com/apache/datafusion/issues/1473) +- Consolidate Projection for Schema and RecordBatch [\#1425](https://github.com/apache/datafusion/issues/1425) +- Support Sort on unprojected columns [\#1372](https://github.com/apache/datafusion/issues/1372) +- Unused code in hash_aggregate [\#1362](https://github.com/apache/datafusion/issues/1362) +- Why use the expr types before coercion to get the result type? [\#1358](https://github.com/apache/datafusion/issues/1358) +- A problem about the projection_push_down optimizer gathers valid columns [\#1312](https://github.com/apache/datafusion/issues/1312) +- apply constant folding to `LogicalPlan::Values` [\#1170](https://github.com/apache/datafusion/issues/1170) +- reduce usage of `IntoIterator` in logical plan builder window fn [\#372](https://github.com/apache/datafusion/issues/372) +- Why does DataFusion throw a Tokio 0.2 runtime error? [\#176](https://github.com/apache/datafusion/issues/176) +- TPC-H Query 14 [\#165](https://github.com/apache/datafusion/issues/165) +- Length kernel returns bytes not character length [\#156](https://github.com/apache/datafusion/issues/156) +- Split the logical operators out into separate source files [\#115](https://github.com/apache/datafusion/issues/115) **Merged pull requests:** -- Fixup some doc warnings [\#1811](https://github.com/apache/arrow-datafusion/pull/1811) ([alamb](https://github.com/alamb)) -- Ensure most of links in docs are correct [\#1808](https://github.com/apache/arrow-datafusion/pull/1808) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([HaoYang670](https://github.com/HaoYang670)) -- Update CHANGELOG.md, update release scripts [\#1807](https://github.com/apache/arrow-datafusion/pull/1807) ([alamb](https://github.com/alamb)) -- Update versions for split crates [\#1803](https://github.com/apache/arrow-datafusion/pull/1803) ([matthewmturner](https://github.com/matthewmturner)) -- Improve the error message and UX of tpch benchmark program [\#1800](https://github.com/apache/arrow-datafusion/pull/1800) ([alamb](https://github.com/alamb)) -- rename references of expr in logical plan module after datafusion-expr split [\#1797](https://github.com/apache/arrow-datafusion/pull/1797) ([Jimexist](https://github.com/Jimexist)) -- Update to sqlparser 0.14 [\#1796](https://github.com/apache/arrow-datafusion/pull/1796) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) -- \[split/13\] move rest of expr to expr_fn in datafusion-expr module [\#1794](https://github.com/apache/arrow-datafusion/pull/1794) ([Jimexist](https://github.com/Jimexist)) -- Update datafusion versions [\#1793](https://github.com/apache/arrow-datafusion/pull/1793) ([matthewmturner](https://github.com/matthewmturner)) -- Less verbose plans in debug logging [\#1787](https://github.com/apache/arrow-datafusion/pull/1787) ([alamb](https://github.com/alamb)) -- \[split/11\] split expr type and null info to be expr-schemable [\#1784](https://github.com/apache/arrow-datafusion/pull/1784) ([Jimexist](https://github.com/Jimexist)) -- Introduce `Row` format backed by raw bytes [\#1782](https://github.com/apache/arrow-datafusion/pull/1782) ([yjshen](https://github.com/yjshen)) -- rewrite predicates before pushing to union inputs [\#1781](https://github.com/apache/arrow-datafusion/pull/1781) ([korowa](https://github.com/korowa)) -- Update datafusion to use arrow 9.0.0 [\#1775](https://github.com/apache/arrow-datafusion/pull/1775) ([alamb](https://github.com/alamb)) -- \[split/10\] split up expr for rewriting, visiting, and simplification traits [\#1774](https://github.com/apache/arrow-datafusion/pull/1774) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([Jimexist](https://github.com/Jimexist)) -- \#1768 Support TimeUnit::Second in hasher [\#1769](https://github.com/apache/arrow-datafusion/pull/1769) ([jychen7](https://github.com/jychen7)) -- TPC-H benchmark can optionally write JSON output file with benchmark summary [\#1766](https://github.com/apache/arrow-datafusion/pull/1766) ([andygrove](https://github.com/andygrove)) -- \[split/8\] move `Accumulator` and `ColumnarValue` to datafusion-expr [\#1765](https://github.com/apache/arrow-datafusion/pull/1765) ([Jimexist](https://github.com/Jimexist)) -- \[split/7\] move built-in scalar function to datafusion-expr [\#1764](https://github.com/apache/arrow-datafusion/pull/1764) ([Jimexist](https://github.com/Jimexist)) -- \[split/6\] move signature, type signature, volatility to datafusion-expr [\#1763](https://github.com/apache/arrow-datafusion/pull/1763) ([Jimexist](https://github.com/Jimexist)) -- \[split/9+12\] move udf, udaf, `Expr` to datafusion-expr module [\#1762](https://github.com/apache/arrow-datafusion/pull/1762) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([Jimexist](https://github.com/Jimexist)) -- \[split/5\] move window frame and operator to datafusion-expr module [\#1761](https://github.com/apache/arrow-datafusion/pull/1761) ([Jimexist](https://github.com/Jimexist)) -- \[split/4\] move scalar value to datafusion-common [\#1760](https://github.com/apache/arrow-datafusion/pull/1760) ([Jimexist](https://github.com/Jimexist)) -- \[split/3\] split datafusion expr module and move aggregate and window function expr [\#1759](https://github.com/apache/arrow-datafusion/pull/1759) ([Jimexist](https://github.com/Jimexist)) -- \[split/2\] move column and dfschema to datafusion-common module [\#1758](https://github.com/apache/arrow-datafusion/pull/1758) ([Jimexist](https://github.com/Jimexist)) -- Use ordered-float 2.10 [\#1756](https://github.com/apache/arrow-datafusion/pull/1756) ([andygrove](https://github.com/andygrove)) -- \[split/1\] split datafusion-common module [\#1751](https://github.com/apache/arrow-datafusion/pull/1751) ([Jimexist](https://github.com/Jimexist)) -- use clap 3 style args parsing for datafusion cli [\#1749](https://github.com/apache/arrow-datafusion/pull/1749) ([Jimexist](https://github.com/Jimexist)) -- fix: Case insensitive unquoted identifiers in SQL [\#1747](https://github.com/apache/arrow-datafusion/pull/1747) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([mkmik](https://github.com/mkmik)) -- Move more tests out of context.rs [\#1743](https://github.com/apache/arrow-datafusion/pull/1743) ([alamb](https://github.com/alamb)) -- Move optimize test out of context.rs [\#1742](https://github.com/apache/arrow-datafusion/pull/1742) ([alamb](https://github.com/alamb)) -- Fix typos in crate documentation [\#1739](https://github.com/apache/arrow-datafusion/pull/1739) ([r4ntix](https://github.com/r4ntix)) -- add `cargo check --release` to ci [\#1737](https://github.com/apache/arrow-datafusion/pull/1737) ([xudong963](https://github.com/xudong963)) -- Update parking_lot requirement from 0.11 to 0.12 [\#1735](https://github.com/apache/arrow-datafusion/pull/1735) ([dependabot[bot]](https://github.com/apps/dependabot)) -- Create built-in scalar functions programmatically [\#1734](https://github.com/apache/arrow-datafusion/pull/1734) ([HaoYang670](https://github.com/HaoYang670)) -- Prevent repartitioning of certain operator's direct children \(\#1731\) [\#1732](https://github.com/apache/arrow-datafusion/pull/1732) ([tustvold](https://github.com/tustvold)) -- API to get Expr's type and nullability without a `DFSchema` [\#1726](https://github.com/apache/arrow-datafusion/pull/1726) ([alamb](https://github.com/alamb)) -- minor: fix `cargo run --release` error [\#1723](https://github.com/apache/arrow-datafusion/pull/1723) ([xudong963](https://github.com/xudong963)) -- substitute `parking_lot::Mutex` for `std::sync::Mutex` [\#1720](https://github.com/apache/arrow-datafusion/pull/1720) ([xudong963](https://github.com/xudong963)) -- Convert boolean case expressions to boolean logic [\#1719](https://github.com/apache/arrow-datafusion/pull/1719) ([tustvold](https://github.com/tustvold)) -- Add Expression Simplification API [\#1717](https://github.com/apache/arrow-datafusion/pull/1717) ([alamb](https://github.com/alamb)) -- Create ListingTableConfig which includes file format and schema inference [\#1715](https://github.com/apache/arrow-datafusion/pull/1715) ([matthewmturner](https://github.com/matthewmturner)) -- make `select_to_plan` clearer [\#1714](https://github.com/apache/arrow-datafusion/pull/1714) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([xudong963](https://github.com/xudong963)) -- Add upper bound for public function `signature` [\#1713](https://github.com/apache/arrow-datafusion/pull/1713) ([HaoYang670](https://github.com/HaoYang670)) -- Add tests and CI for optional pyarrow module [\#1711](https://github.com/apache/arrow-datafusion/pull/1711) ([wjones127](https://github.com/wjones127)) -- Create SchemaAdapter trait to map table schema to file schemas [\#1709](https://github.com/apache/arrow-datafusion/pull/1709) ([thinkharderdev](https://github.com/thinkharderdev)) -- refine test in repartition.rs & coalesce_batches.rs [\#1707](https://github.com/apache/arrow-datafusion/pull/1707) ([xudong963](https://github.com/xudong963)) -- Fuzz test for spillable sort [\#1706](https://github.com/apache/arrow-datafusion/pull/1706) ([yjshen](https://github.com/yjshen)) -- Support `create_physical_expr` and `ExecutionContextState` or `DefaultPhysicalPlanner` for faster speed [\#1700](https://github.com/apache/arrow-datafusion/pull/1700) ([alamb](https://github.com/alamb)) -- Implement TableProvider for DataFrameImpl [\#1699](https://github.com/apache/arrow-datafusion/pull/1699) ([cpcloud](https://github.com/cpcloud)) -- Move timestamp related tests out of context.rs and into sql integration test [\#1696](https://github.com/apache/arrow-datafusion/pull/1696) ([alamb](https://github.com/alamb)) -- Lazy TempDir creation in DiskManager [\#1695](https://github.com/apache/arrow-datafusion/pull/1695) ([alamb](https://github.com/alamb)) -- Add `MemTrackingMetrics` to ease memory tracking for non-limited memory consumers [\#1691](https://github.com/apache/arrow-datafusion/pull/1691) ([yjshen](https://github.com/yjshen)) -- \(minor\) Reduce memory manager and disk manager logs from `info!` to `debug!` [\#1689](https://github.com/apache/arrow-datafusion/pull/1689) ([alamb](https://github.com/alamb)) -- Make `SortPreservingMergeStream` stable on input stream order [\#1687](https://github.com/apache/arrow-datafusion/pull/1687) ([alamb](https://github.com/alamb)) -- Incorporate dyn scalar kernels [\#1685](https://github.com/apache/arrow-datafusion/pull/1685) ([matthewmturner](https://github.com/matthewmturner)) -- Move `information_schema` tests out of execution/context.rs to `sql_integration` tests [\#1684](https://github.com/apache/arrow-datafusion/pull/1684) ([alamb](https://github.com/alamb)) -- Add a new metric type: `Gauge` + `CurrentMemoryUsage` to metrics [\#1682](https://github.com/apache/arrow-datafusion/pull/1682) ([yjshen](https://github.com/yjshen)) -- refactor array_agg to not to have `update` and `merge` [\#1681](https://github.com/apache/arrow-datafusion/pull/1681) ([Jimexist](https://github.com/Jimexist)) -- Use NamedTempFile rather than `String` in DiskManager [\#1680](https://github.com/apache/arrow-datafusion/pull/1680) ([alamb](https://github.com/alamb)) -- upgrade clap to version 3 [\#1672](https://github.com/apache/arrow-datafusion/pull/1672) ([Jimexist](https://github.com/Jimexist)) -- Improve configuration and resource use of `MemoryManager` and `DiskManager` [\#1668](https://github.com/apache/arrow-datafusion/pull/1668) ([alamb](https://github.com/alamb)) -- feat: Support quarter granularity in date_trunc function [\#1667](https://github.com/apache/arrow-datafusion/pull/1667) ([ovr](https://github.com/ovr)) -- Fix can not load parquet table form spark in datafusion-cli. [\#1665](https://github.com/apache/arrow-datafusion/pull/1665) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- Make `MemoryManager` and `MemoryStream` public [\#1664](https://github.com/apache/arrow-datafusion/pull/1664) ([yjshen](https://github.com/yjshen)) -- \[Cleanup\] Move `AggregatedMetricsSet` to `metrics` for further reuse [\#1663](https://github.com/apache/arrow-datafusion/pull/1663) ([yjshen](https://github.com/yjshen)) -- fix: substr - correct behaivour with negative start pos [\#1660](https://github.com/apache/arrow-datafusion/pull/1660) ([ovr](https://github.com/ovr)) -- suppport bitwise and as an example [\#1653](https://github.com/apache/arrow-datafusion/pull/1653) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([liukun4515](https://github.com/liukun4515)) -- refine match pattern related code [\#1650](https://github.com/apache/arrow-datafusion/pull/1650) ([xudong963](https://github.com/xudong963)) -- update md-5, sha2, blake2 [\#1647](https://github.com/apache/arrow-datafusion/pull/1647) ([xudong963](https://github.com/xudong963)) -- Add `DataFusionError` -\> `ArrowError` conversion [\#1643](https://github.com/apache/arrow-datafusion/pull/1643) ([alamb](https://github.com/alamb)) -- Add `spill_count` and `spilled_bytes` to `BaselineMetrics`, test sort with spill [\#1641](https://github.com/apache/arrow-datafusion/pull/1641) ([yjshen](https://github.com/yjshen)) -- support hash decimal array and group by [\#1640](https://github.com/apache/arrow-datafusion/pull/1640) ([liukun4515](https://github.com/liukun4515)) -- Consolidate Schema and RecordBatch projection [\#1638](https://github.com/apache/arrow-datafusion/pull/1638) ([alamb](https://github.com/alamb)) -- Update hashbrown requirement from 0.11 to 0.12 [\#1631](https://github.com/apache/arrow-datafusion/pull/1631) ([dependabot[bot]](https://github.com/apps/dependabot)) -- Update pyo3 requirement from 0.14 to 0.15 [\#1627](https://github.com/apache/arrow-datafusion/pull/1627) ([dependabot[bot]](https://github.com/apps/dependabot)) -- Optimize `SortPreservingMergeStream` to avoid `SortKeyCursor` sharing [\#1624](https://github.com/apache/arrow-datafusion/pull/1624) ([yjshen](https://github.com/yjshen)) -- Handle merging of evolved schemas in ParquetExec [\#1622](https://github.com/apache/arrow-datafusion/pull/1622) ([thinkharderdev](https://github.com/thinkharderdev)) -- feat: Support Substring\(str \[from int\] \[for int\]\) [\#1621](https://github.com/apache/arrow-datafusion/pull/1621) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([ovr](https://github.com/ovr)) -- feat: Support complex interval via IntervalMonthDayNano [\#1615](https://github.com/apache/arrow-datafusion/pull/1615) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([ovr](https://github.com/ovr)) -- consolidate binary_expr coercion rule code into `binary_rule.rs` module [\#1607](https://github.com/apache/arrow-datafusion/pull/1607) ([alamb](https://github.com/alamb)) -- Fix comparison of dictionary arrays [\#1606](https://github.com/apache/arrow-datafusion/pull/1606) ([alamb](https://github.com/alamb)) -- add test for decimal to decimal [\#1603](https://github.com/apache/arrow-datafusion/pull/1603) ([liukun4515](https://github.com/liukun4515)) -- update nightly version [\#1597](https://github.com/apache/arrow-datafusion/pull/1597) ([Jimexist](https://github.com/Jimexist)) -- Consolidate sort and external_sort [\#1596](https://github.com/apache/arrow-datafusion/pull/1596) ([yjshen](https://github.com/yjshen)) -- support from_slice for binary, string, and boolean array types [\#1589](https://github.com/apache/arrow-datafusion/pull/1589) ([Jimexist](https://github.com/Jimexist)) -- add from_slice trait to ease arrow2 migration [\#1588](https://github.com/apache/arrow-datafusion/pull/1588) ([Jimexist](https://github.com/Jimexist)) -- Implement ARRAY_AGG\(DISTINCT ...\) [\#1579](https://github.com/apache/arrow-datafusion/pull/1579) ([james727](https://github.com/james727)) -- Rename sql integration tests from `mod` to `sql_integration` [\#1575](https://github.com/apache/arrow-datafusion/pull/1575) ([alamb](https://github.com/alamb)) -- minor: improve the benchmark readme [\#1567](https://github.com/apache/arrow-datafusion/pull/1567) ([xudong963](https://github.com/xudong963)) -- Consolidate `batch_size` configuration in `ExecutionConfig`, `RuntimeConfig` and `PhysicalPlanConfig` [\#1562](https://github.com/apache/arrow-datafusion/pull/1562) ([yjshen](https://github.com/yjshen)) -- Update to rust 1.58 [\#1557](https://github.com/apache/arrow-datafusion/pull/1557) ([xudong963](https://github.com/xudong963)) -- support mathematics operation for decimal data type [\#1554](https://github.com/apache/arrow-datafusion/pull/1554) ([liukun4515](https://github.com/liukun4515)) -- Address clippy warnings [\#1553](https://github.com/apache/arrow-datafusion/pull/1553) ([sergey-melnychuk](https://github.com/sergey-melnychuk)) -- enhance arithmetic operation for array with scalar [\#1552](https://github.com/apache/arrow-datafusion/pull/1552) ([liukun4515](https://github.com/liukun4515)) -- Remove unused `update` and `merge` implementations from Aggregates and supporting `ScalarValue` arithmetic [\#1550](https://github.com/apache/arrow-datafusion/pull/1550) ([alamb](https://github.com/alamb)) -- Add batch operations to stddev [\#1547](https://github.com/apache/arrow-datafusion/pull/1547) ([realno](https://github.com/realno)) -- Mark ARRAY_AGG\(DISTINCT ...\) not implemented [\#1534](https://github.com/apache/arrow-datafusion/pull/1534) ([james727](https://github.com/james727)) -- Update to arrow-7.0.0 [\#1523](https://github.com/apache/arrow-datafusion/pull/1523) ([alamb](https://github.com/alamb)) -- Fix ORDER BY on aggregate [\#1506](https://github.com/apache/arrow-datafusion/pull/1506) ([viirya](https://github.com/viirya)) -- Add example on how to query multiple parquet files [\#1497](https://github.com/apache/arrow-datafusion/pull/1497) ([nitisht](https://github.com/nitisht)) -- Refactor testing modules [\#1491](https://github.com/apache/arrow-datafusion/pull/1491) ([hntd187](https://github.com/hntd187)) -- add rfcs for datafusion [\#1490](https://github.com/apache/arrow-datafusion/pull/1490) ([xudong963](https://github.com/xudong963)) -- support comparison for decimal data type and refactor the binary coercion rule [\#1483](https://github.com/apache/arrow-datafusion/pull/1483) ([liukun4515](https://github.com/liukun4515)) -- Minor: Rename `predicate_builder` --\> `pruning_predicate` for consistency [\#1481](https://github.com/apache/arrow-datafusion/pull/1481) ([alamb](https://github.com/alamb)) -- Tests for support try_cast/cast decimal to numeric [\#1465](https://github.com/apache/arrow-datafusion/pull/1465) ([liukun4515](https://github.com/liukun4515)) -- Avoid send empty batches for Hash partitioning. [\#1459](https://github.com/apache/arrow-datafusion/pull/1459) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- Planner code cleanup [\#1450](https://github.com/apache/arrow-datafusion/pull/1450) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) -- Fix bug in projection: "column types must match schema types, expected XXX but found YYY" [\#1448](https://github.com/apache/arrow-datafusion/pull/1448) ([alamb](https://github.com/alamb)) -- Update arrow-rs to 6.4.0 and replace boolean comparison in datafusion with arrow compute kernel [\#1446](https://github.com/apache/arrow-datafusion/pull/1446) ([xudong963](https://github.com/xudong963)) -- support cast/try_cast for decimal: signed numeric to decimal [\#1442](https://github.com/apache/arrow-datafusion/pull/1442) ([liukun4515](https://github.com/liukun4515)) -- Consolidate decimal error checking and improve error messages [\#1438](https://github.com/apache/arrow-datafusion/pull/1438) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) -- use 0.13 sql parser [\#1435](https://github.com/apache/arrow-datafusion/pull/1435) ([Jimexist](https://github.com/Jimexist)) -- Minor Code cleanups [\#1428](https://github.com/apache/arrow-datafusion/pull/1428) ([alamb](https://github.com/alamb)) -- Clarify communication on bi-weekly sync [\#1427](https://github.com/apache/arrow-datafusion/pull/1427) ([alamb](https://github.com/alamb)) -- support sum/avg agg for decimal, change sum\(float32\) --\> float64 [\#1408](https://github.com/apache/arrow-datafusion/pull/1408) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([liukun4515](https://github.com/liukun4515)) -- Fix bugs with nullability during rewrites: Combine `simplify` and `Simplifier` [\#1401](https://github.com/apache/arrow-datafusion/pull/1401) ([alamb](https://github.com/alamb)) -- Minimize features [\#1399](https://github.com/apache/arrow-datafusion/pull/1399) ([carols10cents](https://github.com/carols10cents)) -- Update rust vesion to 1.57 [\#1395](https://github.com/apache/arrow-datafusion/pull/1395) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([xudong963](https://github.com/xudong963)) -- support decimal scalar value [\#1394](https://github.com/apache/arrow-datafusion/pull/1394) ([liukun4515](https://github.com/liukun4515)) -- Add coercion rules for AggregateFunctions [\#1387](https://github.com/apache/arrow-datafusion/pull/1387) ([liukun4515](https://github.com/liukun4515)) -- upgrade the arrow-rs version [\#1385](https://github.com/apache/arrow-datafusion/pull/1385) ([liukun4515](https://github.com/liukun4515)) -- add array agg name [\#1382](https://github.com/apache/arrow-datafusion/pull/1382) ([liukun4515](https://github.com/liukun4515)) -- Make tests for `simplify` and `Simplifer` consistent [\#1376](https://github.com/apache/arrow-datafusion/pull/1376) ([alamb](https://github.com/alamb)) -- Refactor: Consolidate expression simplification code in `simplify_expression.rs` [\#1374](https://github.com/apache/arrow-datafusion/pull/1374) ([alamb](https://github.com/alamb)) -- remove unused code in hash_aggregate [\#1370](https://github.com/apache/arrow-datafusion/pull/1370) ([ic4y](https://github.com/ic4y)) -- Use `BufReader` for LocalFileReader to revert performance regression in parquet reading [\#1366](https://github.com/apache/arrow-datafusion/pull/1366) ([Dandandan](https://github.com/Dandandan)) -- Add unit test for constant folding on values [\#1355](https://github.com/apache/arrow-datafusion/pull/1355) ([viirya](https://github.com/viirya)) -- Extract logical plan: rename the plan name \(follow up\) [\#1354](https://github.com/apache/arrow-datafusion/pull/1354) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([liukun4515](https://github.com/liukun4515)) -- Moved aggr_test_schema to test_utils [\#1338](https://github.com/apache/arrow-datafusion/pull/1338) ([rdettai](https://github.com/rdettai)) -- upgrade arrow-rs to 6.2.0 [\#1334](https://github.com/apache/arrow-datafusion/pull/1334) ([liukun4515](https://github.com/liukun4515)) -- Update release instructions [\#1331](https://github.com/apache/arrow-datafusion/pull/1331) ([alamb](https://github.com/alamb)) -- \#1268: allow datafusion-cli to toggle quiet flag within CLI [\#1330](https://github.com/apache/arrow-datafusion/pull/1330) ([jgoday](https://github.com/jgoday)) -- Extract Aggregate, Sort, and Join to struct from AggregatePlan [\#1326](https://github.com/apache/arrow-datafusion/pull/1326) ([matthewmturner](https://github.com/matthewmturner)) -- Extract `EmptyRelation`, `Limit`, `Values` from `LogicalPlan` [\#1325](https://github.com/apache/arrow-datafusion/pull/1325) ([liukun4515](https://github.com/liukun4515)) -- Extract CrossJoin, Repartition, Union in LogicalPlan [\#1322](https://github.com/apache/arrow-datafusion/pull/1322) ([liukun4515](https://github.com/liukun4515)) -- Fifth batch of updating sql tests to use assert_batches_eq [\#1318](https://github.com/apache/arrow-datafusion/pull/1318) ([matthewmturner](https://github.com/matthewmturner)) -- Extract Explain, Analyze, Extension in LogicalPlan as independent struct [\#1317](https://github.com/apache/arrow-datafusion/pull/1317) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([xudong963](https://github.com/xudong963)) -- Extract CreateMemoryTable, DropTable, CreateExternalTable in LogicalPlan as independent struct [\#1311](https://github.com/apache/arrow-datafusion/pull/1311) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([liukun4515](https://github.com/liukun4515)) -- Extract Projection, Filter, Window in LogicalPlan as independent struct [\#1309](https://github.com/apache/arrow-datafusion/pull/1309) ([ic4y](https://github.com/ic4y)) -- Add PSQL comparison tests for except, intersect [\#1292](https://github.com/apache/arrow-datafusion/pull/1292) ([mrob95](https://github.com/mrob95)) -- Extract logical plans in LogicalPlan as independent struct: TableScan [\#1290](https://github.com/apache/arrow-datafusion/pull/1290) ([xudong963](https://github.com/xudong963)) -- Add statement helper command to cli [\#1285](https://github.com/apache/arrow-datafusion/pull/1285) ([matthewmturner](https://github.com/matthewmturner)) -- Python bindings for window functions [\#819](https://github.com/apache/arrow-datafusion/pull/819) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([jgoday](https://github.com/jgoday)) +- Fixup some doc warnings [\#1811](https://github.com/apache/datafusion/pull/1811) ([alamb](https://github.com/alamb)) +- Ensure most of links in docs are correct [\#1808](https://github.com/apache/datafusion/pull/1808) [[sql](https://github.com/apache/datafusion/labels/sql)] ([HaoYang670](https://github.com/HaoYang670)) +- Update CHANGELOG.md, update release scripts [\#1807](https://github.com/apache/datafusion/pull/1807) ([alamb](https://github.com/alamb)) +- Update versions for split crates [\#1803](https://github.com/apache/datafusion/pull/1803) ([matthewmturner](https://github.com/matthewmturner)) +- Improve the error message and UX of tpch benchmark program [\#1800](https://github.com/apache/datafusion/pull/1800) ([alamb](https://github.com/alamb)) +- rename references of expr in logical plan module after datafusion-expr split [\#1797](https://github.com/apache/datafusion/pull/1797) ([Jimexist](https://github.com/Jimexist)) +- Update to sqlparser 0.14 [\#1796](https://github.com/apache/datafusion/pull/1796) [[sql](https://github.com/apache/datafusion/labels/sql)] ([alamb](https://github.com/alamb)) +- \[split/13\] move rest of expr to expr_fn in datafusion-expr module [\#1794](https://github.com/apache/datafusion/pull/1794) ([Jimexist](https://github.com/Jimexist)) +- Update datafusion versions [\#1793](https://github.com/apache/datafusion/pull/1793) ([matthewmturner](https://github.com/matthewmturner)) +- Less verbose plans in debug logging [\#1787](https://github.com/apache/datafusion/pull/1787) ([alamb](https://github.com/alamb)) +- \[split/11\] split expr type and null info to be expr-schemable [\#1784](https://github.com/apache/datafusion/pull/1784) ([Jimexist](https://github.com/Jimexist)) +- Introduce `Row` format backed by raw bytes [\#1782](https://github.com/apache/datafusion/pull/1782) ([yjshen](https://github.com/yjshen)) +- rewrite predicates before pushing to union inputs [\#1781](https://github.com/apache/datafusion/pull/1781) ([korowa](https://github.com/korowa)) +- Update datafusion to use arrow 9.0.0 [\#1775](https://github.com/apache/datafusion/pull/1775) ([alamb](https://github.com/alamb)) +- \[split/10\] split up expr for rewriting, visiting, and simplification traits [\#1774](https://github.com/apache/datafusion/pull/1774) [[sql](https://github.com/apache/datafusion/labels/sql)] ([Jimexist](https://github.com/Jimexist)) +- \#1768 Support TimeUnit::Second in hasher [\#1769](https://github.com/apache/datafusion/pull/1769) ([jychen7](https://github.com/jychen7)) +- TPC-H benchmark can optionally write JSON output file with benchmark summary [\#1766](https://github.com/apache/datafusion/pull/1766) ([andygrove](https://github.com/andygrove)) +- \[split/8\] move `Accumulator` and `ColumnarValue` to datafusion-expr [\#1765](https://github.com/apache/datafusion/pull/1765) ([Jimexist](https://github.com/Jimexist)) +- \[split/7\] move built-in scalar function to datafusion-expr [\#1764](https://github.com/apache/datafusion/pull/1764) ([Jimexist](https://github.com/Jimexist)) +- \[split/6\] move signature, type signature, volatility to datafusion-expr [\#1763](https://github.com/apache/datafusion/pull/1763) ([Jimexist](https://github.com/Jimexist)) +- \[split/9+12\] move udf, udaf, `Expr` to datafusion-expr module [\#1762](https://github.com/apache/datafusion/pull/1762) [[sql](https://github.com/apache/datafusion/labels/sql)] ([Jimexist](https://github.com/Jimexist)) +- \[split/5\] move window frame and operator to datafusion-expr module [\#1761](https://github.com/apache/datafusion/pull/1761) ([Jimexist](https://github.com/Jimexist)) +- \[split/4\] move scalar value to datafusion-common [\#1760](https://github.com/apache/datafusion/pull/1760) ([Jimexist](https://github.com/Jimexist)) +- \[split/3\] split datafusion expr module and move aggregate and window function expr [\#1759](https://github.com/apache/datafusion/pull/1759) ([Jimexist](https://github.com/Jimexist)) +- \[split/2\] move column and dfschema to datafusion-common module [\#1758](https://github.com/apache/datafusion/pull/1758) ([Jimexist](https://github.com/Jimexist)) +- Use ordered-float 2.10 [\#1756](https://github.com/apache/datafusion/pull/1756) ([andygrove](https://github.com/andygrove)) +- \[split/1\] split datafusion-common module [\#1751](https://github.com/apache/datafusion/pull/1751) ([Jimexist](https://github.com/Jimexist)) +- use clap 3 style args parsing for datafusion cli [\#1749](https://github.com/apache/datafusion/pull/1749) ([Jimexist](https://github.com/Jimexist)) +- fix: Case insensitive unquoted identifiers in SQL [\#1747](https://github.com/apache/datafusion/pull/1747) [[sql](https://github.com/apache/datafusion/labels/sql)] ([mkmik](https://github.com/mkmik)) +- Move more tests out of context.rs [\#1743](https://github.com/apache/datafusion/pull/1743) ([alamb](https://github.com/alamb)) +- Move optimize test out of context.rs [\#1742](https://github.com/apache/datafusion/pull/1742) ([alamb](https://github.com/alamb)) +- Fix typos in crate documentation [\#1739](https://github.com/apache/datafusion/pull/1739) ([r4ntix](https://github.com/r4ntix)) +- add `cargo check --release` to ci [\#1737](https://github.com/apache/datafusion/pull/1737) ([xudong963](https://github.com/xudong963)) +- Update parking_lot requirement from 0.11 to 0.12 [\#1735](https://github.com/apache/datafusion/pull/1735) ([dependabot[bot]](https://github.com/apps/dependabot)) +- Create built-in scalar functions programmatically [\#1734](https://github.com/apache/datafusion/pull/1734) ([HaoYang670](https://github.com/HaoYang670)) +- Prevent repartitioning of certain operator's direct children \(\#1731\) [\#1732](https://github.com/apache/datafusion/pull/1732) ([tustvold](https://github.com/tustvold)) +- API to get Expr's type and nullability without a `DFSchema` [\#1726](https://github.com/apache/datafusion/pull/1726) ([alamb](https://github.com/alamb)) +- minor: fix `cargo run --release` error [\#1723](https://github.com/apache/datafusion/pull/1723) ([xudong963](https://github.com/xudong963)) +- substitute `parking_lot::Mutex` for `std::sync::Mutex` [\#1720](https://github.com/apache/datafusion/pull/1720) ([xudong963](https://github.com/xudong963)) +- Convert boolean case expressions to boolean logic [\#1719](https://github.com/apache/datafusion/pull/1719) ([tustvold](https://github.com/tustvold)) +- Add Expression Simplification API [\#1717](https://github.com/apache/datafusion/pull/1717) ([alamb](https://github.com/alamb)) +- Create ListingTableConfig which includes file format and schema inference [\#1715](https://github.com/apache/datafusion/pull/1715) ([matthewmturner](https://github.com/matthewmturner)) +- make `select_to_plan` clearer [\#1714](https://github.com/apache/datafusion/pull/1714) [[sql](https://github.com/apache/datafusion/labels/sql)] ([xudong963](https://github.com/xudong963)) +- Add upper bound for public function `signature` [\#1713](https://github.com/apache/datafusion/pull/1713) ([HaoYang670](https://github.com/HaoYang670)) +- Add tests and CI for optional pyarrow module [\#1711](https://github.com/apache/datafusion/pull/1711) ([wjones127](https://github.com/wjones127)) +- Create SchemaAdapter trait to map table schema to file schemas [\#1709](https://github.com/apache/datafusion/pull/1709) ([thinkharderdev](https://github.com/thinkharderdev)) +- refine test in repartition.rs & coalesce_batches.rs [\#1707](https://github.com/apache/datafusion/pull/1707) ([xudong963](https://github.com/xudong963)) +- Fuzz test for spillable sort [\#1706](https://github.com/apache/datafusion/pull/1706) ([yjshen](https://github.com/yjshen)) +- Support `create_physical_expr` and `ExecutionContextState` or `DefaultPhysicalPlanner` for faster speed [\#1700](https://github.com/apache/datafusion/pull/1700) ([alamb](https://github.com/alamb)) +- Implement TableProvider for DataFrameImpl [\#1699](https://github.com/apache/datafusion/pull/1699) ([cpcloud](https://github.com/cpcloud)) +- Move timestamp related tests out of context.rs and into sql integration test [\#1696](https://github.com/apache/datafusion/pull/1696) ([alamb](https://github.com/alamb)) +- Lazy TempDir creation in DiskManager [\#1695](https://github.com/apache/datafusion/pull/1695) ([alamb](https://github.com/alamb)) +- Add `MemTrackingMetrics` to ease memory tracking for non-limited memory consumers [\#1691](https://github.com/apache/datafusion/pull/1691) ([yjshen](https://github.com/yjshen)) +- \(minor\) Reduce memory manager and disk manager logs from `info!` to `debug!` [\#1689](https://github.com/apache/datafusion/pull/1689) ([alamb](https://github.com/alamb)) +- Make `SortPreservingMergeStream` stable on input stream order [\#1687](https://github.com/apache/datafusion/pull/1687) ([alamb](https://github.com/alamb)) +- Incorporate dyn scalar kernels [\#1685](https://github.com/apache/datafusion/pull/1685) ([matthewmturner](https://github.com/matthewmturner)) +- Move `information_schema` tests out of execution/context.rs to `sql_integration` tests [\#1684](https://github.com/apache/datafusion/pull/1684) ([alamb](https://github.com/alamb)) +- Add a new metric type: `Gauge` + `CurrentMemoryUsage` to metrics [\#1682](https://github.com/apache/datafusion/pull/1682) ([yjshen](https://github.com/yjshen)) +- refactor array_agg to not to have `update` and `merge` [\#1681](https://github.com/apache/datafusion/pull/1681) ([Jimexist](https://github.com/Jimexist)) +- Use NamedTempFile rather than `String` in DiskManager [\#1680](https://github.com/apache/datafusion/pull/1680) ([alamb](https://github.com/alamb)) +- upgrade clap to version 3 [\#1672](https://github.com/apache/datafusion/pull/1672) ([Jimexist](https://github.com/Jimexist)) +- Improve configuration and resource use of `MemoryManager` and `DiskManager` [\#1668](https://github.com/apache/datafusion/pull/1668) ([alamb](https://github.com/alamb)) +- feat: Support quarter granularity in date_trunc function [\#1667](https://github.com/apache/datafusion/pull/1667) ([ovr](https://github.com/ovr)) +- Fix can not load parquet table form spark in datafusion-cli. [\#1665](https://github.com/apache/datafusion/pull/1665) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- Make `MemoryManager` and `MemoryStream` public [\#1664](https://github.com/apache/datafusion/pull/1664) ([yjshen](https://github.com/yjshen)) +- \[Cleanup\] Move `AggregatedMetricsSet` to `metrics` for further reuse [\#1663](https://github.com/apache/datafusion/pull/1663) ([yjshen](https://github.com/yjshen)) +- fix: substr - correct behaivour with negative start pos [\#1660](https://github.com/apache/datafusion/pull/1660) ([ovr](https://github.com/ovr)) +- suppport bitwise and as an example [\#1653](https://github.com/apache/datafusion/pull/1653) [[sql](https://github.com/apache/datafusion/labels/sql)] ([liukun4515](https://github.com/liukun4515)) +- refine match pattern related code [\#1650](https://github.com/apache/datafusion/pull/1650) ([xudong963](https://github.com/xudong963)) +- update md-5, sha2, blake2 [\#1647](https://github.com/apache/datafusion/pull/1647) ([xudong963](https://github.com/xudong963)) +- Add `DataFusionError` -\> `ArrowError` conversion [\#1643](https://github.com/apache/datafusion/pull/1643) ([alamb](https://github.com/alamb)) +- Add `spill_count` and `spilled_bytes` to `BaselineMetrics`, test sort with spill [\#1641](https://github.com/apache/datafusion/pull/1641) ([yjshen](https://github.com/yjshen)) +- support hash decimal array and group by [\#1640](https://github.com/apache/datafusion/pull/1640) ([liukun4515](https://github.com/liukun4515)) +- Consolidate Schema and RecordBatch projection [\#1638](https://github.com/apache/datafusion/pull/1638) ([alamb](https://github.com/alamb)) +- Update hashbrown requirement from 0.11 to 0.12 [\#1631](https://github.com/apache/datafusion/pull/1631) ([dependabot[bot]](https://github.com/apps/dependabot)) +- Update pyo3 requirement from 0.14 to 0.15 [\#1627](https://github.com/apache/datafusion/pull/1627) ([dependabot[bot]](https://github.com/apps/dependabot)) +- Optimize `SortPreservingMergeStream` to avoid `SortKeyCursor` sharing [\#1624](https://github.com/apache/datafusion/pull/1624) ([yjshen](https://github.com/yjshen)) +- Handle merging of evolved schemas in ParquetExec [\#1622](https://github.com/apache/datafusion/pull/1622) ([thinkharderdev](https://github.com/thinkharderdev)) +- feat: Support Substring\(str \[from int\] \[for int\]\) [\#1621](https://github.com/apache/datafusion/pull/1621) [[sql](https://github.com/apache/datafusion/labels/sql)] ([ovr](https://github.com/ovr)) +- feat: Support complex interval via IntervalMonthDayNano [\#1615](https://github.com/apache/datafusion/pull/1615) [[sql](https://github.com/apache/datafusion/labels/sql)] ([ovr](https://github.com/ovr)) +- consolidate binary_expr coercion rule code into `binary_rule.rs` module [\#1607](https://github.com/apache/datafusion/pull/1607) ([alamb](https://github.com/alamb)) +- Fix comparison of dictionary arrays [\#1606](https://github.com/apache/datafusion/pull/1606) ([alamb](https://github.com/alamb)) +- add test for decimal to decimal [\#1603](https://github.com/apache/datafusion/pull/1603) ([liukun4515](https://github.com/liukun4515)) +- update nightly version [\#1597](https://github.com/apache/datafusion/pull/1597) ([Jimexist](https://github.com/Jimexist)) +- Consolidate sort and external_sort [\#1596](https://github.com/apache/datafusion/pull/1596) ([yjshen](https://github.com/yjshen)) +- support from_slice for binary, string, and boolean array types [\#1589](https://github.com/apache/datafusion/pull/1589) ([Jimexist](https://github.com/Jimexist)) +- add from_slice trait to ease arrow2 migration [\#1588](https://github.com/apache/datafusion/pull/1588) ([Jimexist](https://github.com/Jimexist)) +- Implement ARRAY_AGG\(DISTINCT ...\) [\#1579](https://github.com/apache/datafusion/pull/1579) ([james727](https://github.com/james727)) +- Rename sql integration tests from `mod` to `sql_integration` [\#1575](https://github.com/apache/datafusion/pull/1575) ([alamb](https://github.com/alamb)) +- minor: improve the benchmark readme [\#1567](https://github.com/apache/datafusion/pull/1567) ([xudong963](https://github.com/xudong963)) +- Consolidate `batch_size` configuration in `ExecutionConfig`, `RuntimeConfig` and `PhysicalPlanConfig` [\#1562](https://github.com/apache/datafusion/pull/1562) ([yjshen](https://github.com/yjshen)) +- Update to rust 1.58 [\#1557](https://github.com/apache/datafusion/pull/1557) ([xudong963](https://github.com/xudong963)) +- support mathematics operation for decimal data type [\#1554](https://github.com/apache/datafusion/pull/1554) ([liukun4515](https://github.com/liukun4515)) +- Address clippy warnings [\#1553](https://github.com/apache/datafusion/pull/1553) ([sergey-melnychuk](https://github.com/sergey-melnychuk)) +- enhance arithmetic operation for array with scalar [\#1552](https://github.com/apache/datafusion/pull/1552) ([liukun4515](https://github.com/liukun4515)) +- Remove unused `update` and `merge` implementations from Aggregates and supporting `ScalarValue` arithmetic [\#1550](https://github.com/apache/datafusion/pull/1550) ([alamb](https://github.com/alamb)) +- Add batch operations to stddev [\#1547](https://github.com/apache/datafusion/pull/1547) ([realno](https://github.com/realno)) +- Mark ARRAY_AGG\(DISTINCT ...\) not implemented [\#1534](https://github.com/apache/datafusion/pull/1534) ([james727](https://github.com/james727)) +- Update to arrow-7.0.0 [\#1523](https://github.com/apache/datafusion/pull/1523) ([alamb](https://github.com/alamb)) +- Fix ORDER BY on aggregate [\#1506](https://github.com/apache/datafusion/pull/1506) ([viirya](https://github.com/viirya)) +- Add example on how to query multiple parquet files [\#1497](https://github.com/apache/datafusion/pull/1497) ([nitisht](https://github.com/nitisht)) +- Refactor testing modules [\#1491](https://github.com/apache/datafusion/pull/1491) ([hntd187](https://github.com/hntd187)) +- add rfcs for datafusion [\#1490](https://github.com/apache/datafusion/pull/1490) ([xudong963](https://github.com/xudong963)) +- support comparison for decimal data type and refactor the binary coercion rule [\#1483](https://github.com/apache/datafusion/pull/1483) ([liukun4515](https://github.com/liukun4515)) +- Minor: Rename `predicate_builder` --\> `pruning_predicate` for consistency [\#1481](https://github.com/apache/datafusion/pull/1481) ([alamb](https://github.com/alamb)) +- Tests for support try_cast/cast decimal to numeric [\#1465](https://github.com/apache/datafusion/pull/1465) ([liukun4515](https://github.com/liukun4515)) +- Avoid send empty batches for Hash partitioning. [\#1459](https://github.com/apache/datafusion/pull/1459) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- Planner code cleanup [\#1450](https://github.com/apache/datafusion/pull/1450) [[sql](https://github.com/apache/datafusion/labels/sql)] ([alamb](https://github.com/alamb)) +- Fix bug in projection: "column types must match schema types, expected XXX but found YYY" [\#1448](https://github.com/apache/datafusion/pull/1448) ([alamb](https://github.com/alamb)) +- Update arrow-rs to 6.4.0 and replace boolean comparison in datafusion with arrow compute kernel [\#1446](https://github.com/apache/datafusion/pull/1446) ([xudong963](https://github.com/xudong963)) +- support cast/try_cast for decimal: signed numeric to decimal [\#1442](https://github.com/apache/datafusion/pull/1442) ([liukun4515](https://github.com/liukun4515)) +- Consolidate decimal error checking and improve error messages [\#1438](https://github.com/apache/datafusion/pull/1438) [[sql](https://github.com/apache/datafusion/labels/sql)] ([alamb](https://github.com/alamb)) +- use 0.13 sql parser [\#1435](https://github.com/apache/datafusion/pull/1435) ([Jimexist](https://github.com/Jimexist)) +- Minor Code cleanups [\#1428](https://github.com/apache/datafusion/pull/1428) ([alamb](https://github.com/alamb)) +- Clarify communication on bi-weekly sync [\#1427](https://github.com/apache/datafusion/pull/1427) ([alamb](https://github.com/alamb)) +- support sum/avg agg for decimal, change sum\(float32\) --\> float64 [\#1408](https://github.com/apache/datafusion/pull/1408) [[sql](https://github.com/apache/datafusion/labels/sql)] ([liukun4515](https://github.com/liukun4515)) +- Fix bugs with nullability during rewrites: Combine `simplify` and `Simplifier` [\#1401](https://github.com/apache/datafusion/pull/1401) ([alamb](https://github.com/alamb)) +- Minimize features [\#1399](https://github.com/apache/datafusion/pull/1399) ([carols10cents](https://github.com/carols10cents)) +- Update rust vesion to 1.57 [\#1395](https://github.com/apache/datafusion/pull/1395) [[sql](https://github.com/apache/datafusion/labels/sql)] ([xudong963](https://github.com/xudong963)) +- support decimal scalar value [\#1394](https://github.com/apache/datafusion/pull/1394) ([liukun4515](https://github.com/liukun4515)) +- Add coercion rules for AggregateFunctions [\#1387](https://github.com/apache/datafusion/pull/1387) ([liukun4515](https://github.com/liukun4515)) +- upgrade the arrow-rs version [\#1385](https://github.com/apache/datafusion/pull/1385) ([liukun4515](https://github.com/liukun4515)) +- add array agg name [\#1382](https://github.com/apache/datafusion/pull/1382) ([liukun4515](https://github.com/liukun4515)) +- Make tests for `simplify` and `Simplifer` consistent [\#1376](https://github.com/apache/datafusion/pull/1376) ([alamb](https://github.com/alamb)) +- Refactor: Consolidate expression simplification code in `simplify_expression.rs` [\#1374](https://github.com/apache/datafusion/pull/1374) ([alamb](https://github.com/alamb)) +- remove unused code in hash_aggregate [\#1370](https://github.com/apache/datafusion/pull/1370) ([ic4y](https://github.com/ic4y)) +- Use `BufReader` for LocalFileReader to revert performance regression in parquet reading [\#1366](https://github.com/apache/datafusion/pull/1366) ([Dandandan](https://github.com/Dandandan)) +- Add unit test for constant folding on values [\#1355](https://github.com/apache/datafusion/pull/1355) ([viirya](https://github.com/viirya)) +- Extract logical plan: rename the plan name \(follow up\) [\#1354](https://github.com/apache/datafusion/pull/1354) [[sql](https://github.com/apache/datafusion/labels/sql)] ([liukun4515](https://github.com/liukun4515)) +- Moved aggr_test_schema to test_utils [\#1338](https://github.com/apache/datafusion/pull/1338) ([rdettai](https://github.com/rdettai)) +- upgrade arrow-rs to 6.2.0 [\#1334](https://github.com/apache/datafusion/pull/1334) ([liukun4515](https://github.com/liukun4515)) +- Update release instructions [\#1331](https://github.com/apache/datafusion/pull/1331) ([alamb](https://github.com/alamb)) +- \#1268: allow datafusion-cli to toggle quiet flag within CLI [\#1330](https://github.com/apache/datafusion/pull/1330) ([jgoday](https://github.com/jgoday)) +- Extract Aggregate, Sort, and Join to struct from AggregatePlan [\#1326](https://github.com/apache/datafusion/pull/1326) ([matthewmturner](https://github.com/matthewmturner)) +- Extract `EmptyRelation`, `Limit`, `Values` from `LogicalPlan` [\#1325](https://github.com/apache/datafusion/pull/1325) ([liukun4515](https://github.com/liukun4515)) +- Extract CrossJoin, Repartition, Union in LogicalPlan [\#1322](https://github.com/apache/datafusion/pull/1322) ([liukun4515](https://github.com/liukun4515)) +- Fifth batch of updating sql tests to use assert_batches_eq [\#1318](https://github.com/apache/datafusion/pull/1318) ([matthewmturner](https://github.com/matthewmturner)) +- Extract Explain, Analyze, Extension in LogicalPlan as independent struct [\#1317](https://github.com/apache/datafusion/pull/1317) [[sql](https://github.com/apache/datafusion/labels/sql)] ([xudong963](https://github.com/xudong963)) +- Extract CreateMemoryTable, DropTable, CreateExternalTable in LogicalPlan as independent struct [\#1311](https://github.com/apache/datafusion/pull/1311) [[sql](https://github.com/apache/datafusion/labels/sql)] ([liukun4515](https://github.com/liukun4515)) +- Extract Projection, Filter, Window in LogicalPlan as independent struct [\#1309](https://github.com/apache/datafusion/pull/1309) ([ic4y](https://github.com/ic4y)) +- Add PSQL comparison tests for except, intersect [\#1292](https://github.com/apache/datafusion/pull/1292) ([mrob95](https://github.com/mrob95)) +- Extract logical plans in LogicalPlan as independent struct: TableScan [\#1290](https://github.com/apache/datafusion/pull/1290) ([xudong963](https://github.com/xudong963)) +- Add statement helper command to cli [\#1285](https://github.com/apache/datafusion/pull/1285) ([matthewmturner](https://github.com/matthewmturner)) +- Python bindings for window functions [\#819](https://github.com/apache/datafusion/pull/819) [[sql](https://github.com/apache/datafusion/labels/sql)] ([jgoday](https://github.com/jgoday)) diff --git a/dev/changelog/7.1.0.md b/dev/changelog/7.1.0.md index 751023e32c56..697c484716c0 100644 --- a/dev/changelog/7.1.0.md +++ b/dev/changelog/7.1.0.md @@ -17,10 +17,10 @@ under the License. --> -## [7.1.0](https://github.com/apache/arrow-datafusion/tree/7.1.0) (2022-04-10) +## [7.1.0](https://github.com/apache/datafusion/tree/7.1.0) (2022-04-10) -[Full Changelog](https://github.com/apache/arrow-datafusion/compare/7.0.0...7.1.0) +[Full Changelog](https://github.com/apache/datafusion/compare/7.0.0...7.1.0) **Fixed bugs:** -- By default, use only 1000 rows to infer the schema [\#2159](https://github.com/apache/arrow-datafusion/pull/2159) +- By default, use only 1000 rows to infer the schema [\#2159](https://github.com/apache/datafusion/pull/2159) diff --git a/dev/changelog/8.0.0.md b/dev/changelog/8.0.0.md index dcfb85f3ea21..b7d758eb7686 100644 --- a/dev/changelog/8.0.0.md +++ b/dev/changelog/8.0.0.md @@ -17,303 +17,303 @@ under the License. --> -## [8.0.0](https://github.com/apache/arrow-datafusion/tree/8.0.0) (2022-05-12) +## [8.0.0](https://github.com/apache/datafusion/tree/8.0.0) (2022-05-12) -[Full Changelog](https://github.com/apache/arrow-datafusion/compare/7.1.0-rc1...8.0.0) +[Full Changelog](https://github.com/apache/datafusion/compare/7.1.0-rc1...8.0.0) **Breaking changes:** -- Add SQL planner support for `ROLLUP` and `CUBE` grouping set expressions [\#2446](https://github.com/apache/arrow-datafusion/pull/2446) ([andygrove](https://github.com/andygrove)) -- Make `ExecutionPlan::execute` Sync [\#2434](https://github.com/apache/arrow-datafusion/pull/2434) ([tustvold](https://github.com/tustvold)) -- Introduce new `DataFusionError::SchemaError` type [\#2371](https://github.com/apache/arrow-datafusion/pull/2371) ([andygrove](https://github.com/andygrove)) -- Add `Expr::InSubquery` and `Expr::ScalarSubquery` [\#2342](https://github.com/apache/arrow-datafusion/pull/2342) ([andygrove](https://github.com/andygrove)) -- Add `Expr::Exists` to represent EXISTS subquery expression [\#2339](https://github.com/apache/arrow-datafusion/pull/2339) ([andygrove](https://github.com/andygrove)) -- Move `LogicalPlan` enum to `datafusion-expr` crate [\#2294](https://github.com/apache/arrow-datafusion/pull/2294) ([andygrove](https://github.com/andygrove)) -- Remove dependency from `LogicalPlan::TableScan` to `ExecutionPlan` [\#2284](https://github.com/apache/arrow-datafusion/pull/2284) ([andygrove](https://github.com/andygrove)) -- Move logical expression type-coercion code from `physical-expr` crate to `expr` crate [\#2257](https://github.com/apache/arrow-datafusion/pull/2257) ([andygrove](https://github.com/andygrove)) -- feat: 2061 create external table ddl table partition cols [\#2099](https://github.com/apache/arrow-datafusion/pull/2099) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([jychen7](https://github.com/jychen7)) -- Reorganize the project folders [\#2081](https://github.com/apache/arrow-datafusion/pull/2081) ([yahoNanJing](https://github.com/yahoNanJing)) -- Support more ScalarFunction in Ballista [\#2008](https://github.com/apache/arrow-datafusion/pull/2008) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- Merge dataframe and dataframe imp [\#1998](https://github.com/apache/arrow-datafusion/pull/1998) ([vchag](https://github.com/vchag)) -- Rename `ExecutionContext` to `SessionContext`, `ExecutionContextState` to `SessionState`, add `TaskContext` to support multi-tenancy configurations - Part 1 [\#1987](https://github.com/apache/arrow-datafusion/pull/1987) ([mingmwang](https://github.com/mingmwang)) -- Add Coalesce function [\#1969](https://github.com/apache/arrow-datafusion/pull/1969) ([msathis](https://github.com/msathis)) -- Add Create Schema functionality in SQL [\#1959](https://github.com/apache/arrow-datafusion/pull/1959) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([matthewmturner](https://github.com/matthewmturner)) -- omit some clone when converting sql to logical plan [\#1945](https://github.com/apache/arrow-datafusion/pull/1945) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([doki23](https://github.com/doki23)) -- \[split/16\] move physical plan expressions folder to datafusion-physical-expr crate [\#1889](https://github.com/apache/arrow-datafusion/pull/1889) ([Jimexist](https://github.com/Jimexist)) -- remove sync constraint of SendableRecordBatchStream [\#1884](https://github.com/apache/arrow-datafusion/pull/1884) ([doki23](https://github.com/doki23)) -- \[split/15\] move built in window expr and partition evaluator [\#1865](https://github.com/apache/arrow-datafusion/pull/1865) ([Jimexist](https://github.com/Jimexist)) +- Add SQL planner support for `ROLLUP` and `CUBE` grouping set expressions [\#2446](https://github.com/apache/datafusion/pull/2446) ([andygrove](https://github.com/andygrove)) +- Make `ExecutionPlan::execute` Sync [\#2434](https://github.com/apache/datafusion/pull/2434) ([tustvold](https://github.com/tustvold)) +- Introduce new `DataFusionError::SchemaError` type [\#2371](https://github.com/apache/datafusion/pull/2371) ([andygrove](https://github.com/andygrove)) +- Add `Expr::InSubquery` and `Expr::ScalarSubquery` [\#2342](https://github.com/apache/datafusion/pull/2342) ([andygrove](https://github.com/andygrove)) +- Add `Expr::Exists` to represent EXISTS subquery expression [\#2339](https://github.com/apache/datafusion/pull/2339) ([andygrove](https://github.com/andygrove)) +- Move `LogicalPlan` enum to `datafusion-expr` crate [\#2294](https://github.com/apache/datafusion/pull/2294) ([andygrove](https://github.com/andygrove)) +- Remove dependency from `LogicalPlan::TableScan` to `ExecutionPlan` [\#2284](https://github.com/apache/datafusion/pull/2284) ([andygrove](https://github.com/andygrove)) +- Move logical expression type-coercion code from `physical-expr` crate to `expr` crate [\#2257](https://github.com/apache/datafusion/pull/2257) ([andygrove](https://github.com/andygrove)) +- feat: 2061 create external table ddl table partition cols [\#2099](https://github.com/apache/datafusion/pull/2099) [[sql](https://github.com/apache/datafusion/labels/sql)] ([jychen7](https://github.com/jychen7)) +- Reorganize the project folders [\#2081](https://github.com/apache/datafusion/pull/2081) ([yahoNanJing](https://github.com/yahoNanJing)) +- Support more ScalarFunction in Ballista [\#2008](https://github.com/apache/datafusion/pull/2008) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- Merge dataframe and dataframe imp [\#1998](https://github.com/apache/datafusion/pull/1998) ([vchag](https://github.com/vchag)) +- Rename `ExecutionContext` to `SessionContext`, `ExecutionContextState` to `SessionState`, add `TaskContext` to support multi-tenancy configurations - Part 1 [\#1987](https://github.com/apache/datafusion/pull/1987) ([mingmwang](https://github.com/mingmwang)) +- Add Coalesce function [\#1969](https://github.com/apache/datafusion/pull/1969) ([msathis](https://github.com/msathis)) +- Add Create Schema functionality in SQL [\#1959](https://github.com/apache/datafusion/pull/1959) [[sql](https://github.com/apache/datafusion/labels/sql)] ([matthewmturner](https://github.com/matthewmturner)) +- omit some clone when converting sql to logical plan [\#1945](https://github.com/apache/datafusion/pull/1945) [[sql](https://github.com/apache/datafusion/labels/sql)] ([doki23](https://github.com/doki23)) +- \[split/16\] move physical plan expressions folder to datafusion-physical-expr crate [\#1889](https://github.com/apache/datafusion/pull/1889) ([Jimexist](https://github.com/Jimexist)) +- remove sync constraint of SendableRecordBatchStream [\#1884](https://github.com/apache/datafusion/pull/1884) ([doki23](https://github.com/doki23)) +- \[split/15\] move built in window expr and partition evaluator [\#1865](https://github.com/apache/datafusion/pull/1865) ([Jimexist](https://github.com/Jimexist)) **Implemented enhancements:** -- Include `Expr` to `datafusion::prelude` [\#2347](https://github.com/apache/arrow-datafusion/issues/2347) -- Implement `Serialization` API for DataFusion [\#2340](https://github.com/apache/arrow-datafusion/issues/2340) -- Implement `power` function [\#1493](https://github.com/apache/arrow-datafusion/issues/1493) -- allow `lit` python function to support `boolean` and other types [\#1136](https://github.com/apache/arrow-datafusion/issues/1136) -- Automate dependency updates [\#37](https://github.com/apache/arrow-datafusion/issues/37) -- Add `CREATE VIEW` [\#2279](https://github.com/apache/arrow-datafusion/pull/2279) ([matthewmturner](https://github.com/matthewmturner)) -- \[Ballista\] Support Union in ballista. [\#2098](https://github.com/apache/arrow-datafusion/pull/2098) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- Change the DataFusion explain plans to make it clearer in the predicate/filter [\#2063](https://github.com/apache/arrow-datafusion/pull/2063) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- Add `write_json`, `read_json`, `register_json`, and `JsonFormat` to `CREATE EXTERNAL TABLE` functionality [\#2023](https://github.com/apache/arrow-datafusion/pull/2023) ([matthewmturner](https://github.com/matthewmturner)) -- Qualified wildcard [\#2012](https://github.com/apache/arrow-datafusion/pull/2012) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([doki23](https://github.com/doki23)) -- support bitwise or/'|' operation [\#1876](https://github.com/apache/arrow-datafusion/pull/1876) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([liukun4515](https://github.com/liukun4515)) -- Introduce JIT code generation [\#1849](https://github.com/apache/arrow-datafusion/pull/1849) ([yjshen](https://github.com/yjshen)) +- Include `Expr` to `datafusion::prelude` [\#2347](https://github.com/apache/datafusion/issues/2347) +- Implement `Serialization` API for DataFusion [\#2340](https://github.com/apache/datafusion/issues/2340) +- Implement `power` function [\#1493](https://github.com/apache/datafusion/issues/1493) +- allow `lit` python function to support `boolean` and other types [\#1136](https://github.com/apache/datafusion/issues/1136) +- Automate dependency updates [\#37](https://github.com/apache/datafusion/issues/37) +- Add `CREATE VIEW` [\#2279](https://github.com/apache/datafusion/pull/2279) ([matthewmturner](https://github.com/matthewmturner)) +- \[Ballista\] Support Union in ballista. [\#2098](https://github.com/apache/datafusion/pull/2098) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- Change the DataFusion explain plans to make it clearer in the predicate/filter [\#2063](https://github.com/apache/datafusion/pull/2063) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- Add `write_json`, `read_json`, `register_json`, and `JsonFormat` to `CREATE EXTERNAL TABLE` functionality [\#2023](https://github.com/apache/datafusion/pull/2023) ([matthewmturner](https://github.com/matthewmturner)) +- Qualified wildcard [\#2012](https://github.com/apache/datafusion/pull/2012) [[sql](https://github.com/apache/datafusion/labels/sql)] ([doki23](https://github.com/doki23)) +- support bitwise or/'|' operation [\#1876](https://github.com/apache/datafusion/pull/1876) [[sql](https://github.com/apache/datafusion/labels/sql)] ([liukun4515](https://github.com/liukun4515)) +- Introduce JIT code generation [\#1849](https://github.com/apache/datafusion/pull/1849) ([yjshen](https://github.com/yjshen)) **Fixed bugs:** -- CASE expr with NULL literals panics `'WHEN expression did not return a BooleanArray'` [\#1189](https://github.com/apache/arrow-datafusion/issues/1189) -- Function calls with NULL literals do not work [\#1188](https://github.com/apache/arrow-datafusion/issues/1188) -- Add SQL planner support for calling `round` function with two arguments [\#2503](https://github.com/apache/arrow-datafusion/pull/2503) ([andygrove](https://github.com/andygrove)) -- nested query fix [\#2402](https://github.com/apache/arrow-datafusion/pull/2402) ([comphead](https://github.com/comphead)) -- fix issue\#2058 file_format/json.rs attempt to subtract with overflow [\#2066](https://github.com/apache/arrow-datafusion/pull/2066) ([silence-coding](https://github.com/silence-coding)) -- fix bug the optimizer rule filter push down [\#2039](https://github.com/apache/arrow-datafusion/pull/2039) ([jackwener](https://github.com/jackwener)) -- fix: replace `ExecutionContex` and `ExecutionConfig` with `SessionContext` and `SessionConfig` [\#2030](https://github.com/apache/arrow-datafusion/pull/2030) ([xudong963](https://github.com/xudong963)) -- Fixed parquet path partitioning when only selecting partitioned columns [\#2000](https://github.com/apache/arrow-datafusion/pull/2000) ([pjmore](https://github.com/pjmore)) -- Fix ambiguous reference error in filter plan [\#1925](https://github.com/apache/arrow-datafusion/pull/1925) ([jonmmease](https://github.com/jonmmease)) -- platform aware partition parsing [\#1867](https://github.com/apache/arrow-datafusion/pull/1867) ([korowa](https://github.com/korowa)) -- Fix incorrect aggregation in case that GROUP BY contains duplicate column names [\#1855](https://github.com/apache/arrow-datafusion/pull/1855) ([alex-natzka](https://github.com/alex-natzka)) +- CASE expr with NULL literals panics `'WHEN expression did not return a BooleanArray'` [\#1189](https://github.com/apache/datafusion/issues/1189) +- Function calls with NULL literals do not work [\#1188](https://github.com/apache/datafusion/issues/1188) +- Add SQL planner support for calling `round` function with two arguments [\#2503](https://github.com/apache/datafusion/pull/2503) ([andygrove](https://github.com/andygrove)) +- nested query fix [\#2402](https://github.com/apache/datafusion/pull/2402) ([comphead](https://github.com/comphead)) +- fix issue\#2058 file_format/json.rs attempt to subtract with overflow [\#2066](https://github.com/apache/datafusion/pull/2066) ([silence-coding](https://github.com/silence-coding)) +- fix bug the optimizer rule filter push down [\#2039](https://github.com/apache/datafusion/pull/2039) ([jackwener](https://github.com/jackwener)) +- fix: replace `ExecutionContex` and `ExecutionConfig` with `SessionContext` and `SessionConfig` [\#2030](https://github.com/apache/datafusion/pull/2030) ([xudong963](https://github.com/xudong963)) +- Fixed parquet path partitioning when only selecting partitioned columns [\#2000](https://github.com/apache/datafusion/pull/2000) ([pjmore](https://github.com/pjmore)) +- Fix ambiguous reference error in filter plan [\#1925](https://github.com/apache/datafusion/pull/1925) ([jonmmease](https://github.com/jonmmease)) +- platform aware partition parsing [\#1867](https://github.com/apache/datafusion/pull/1867) ([korowa](https://github.com/korowa)) +- Fix incorrect aggregation in case that GROUP BY contains duplicate column names [\#1855](https://github.com/apache/datafusion/pull/1855) ([alex-natzka](https://github.com/alex-natzka)) **Documentation updates:** -- MINOR: Make crate READMEs consistent [\#2437](https://github.com/apache/arrow-datafusion/pull/2437) ([andygrove](https://github.com/andygrove)) -- minor: Improve documentation for DFSchema join and merge functions [\#2367](https://github.com/apache/arrow-datafusion/pull/2367) ([andygrove](https://github.com/andygrove)) -- Change the code location and add annotation [\#2037](https://github.com/apache/arrow-datafusion/pull/2037) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([jackwener](https://github.com/jackwener)) -- Fix typos \(Datafusion -\> DataFusion\) [\#1993](https://github.com/apache/arrow-datafusion/pull/1993) ([andygrove](https://github.com/andygrove)) -- Add examples to use MemTable and TableProvider \(\#1864\) [\#1946](https://github.com/apache/arrow-datafusion/pull/1946) ([PierreZ](https://github.com/PierreZ)) -- Add doc for building `datafusion-cli` when connect the ballista [\#1866](https://github.com/apache/arrow-datafusion/pull/1866) ([liukun4515](https://github.com/liukun4515)) -- Add benchmarks section to DEVELOPERS.md [\#1838](https://github.com/apache/arrow-datafusion/pull/1838) ([tustvold](https://github.com/tustvold)) +- MINOR: Make crate READMEs consistent [\#2437](https://github.com/apache/datafusion/pull/2437) ([andygrove](https://github.com/andygrove)) +- minor: Improve documentation for DFSchema join and merge functions [\#2367](https://github.com/apache/datafusion/pull/2367) ([andygrove](https://github.com/andygrove)) +- Change the code location and add annotation [\#2037](https://github.com/apache/datafusion/pull/2037) [[sql](https://github.com/apache/datafusion/labels/sql)] ([jackwener](https://github.com/jackwener)) +- Fix typos \(Datafusion -\> DataFusion\) [\#1993](https://github.com/apache/datafusion/pull/1993) ([andygrove](https://github.com/andygrove)) +- Add examples to use MemTable and TableProvider \(\#1864\) [\#1946](https://github.com/apache/datafusion/pull/1946) ([PierreZ](https://github.com/PierreZ)) +- Add doc for building `datafusion-cli` when connect the ballista [\#1866](https://github.com/apache/datafusion/pull/1866) ([liukun4515](https://github.com/liukun4515)) +- Add benchmarks section to DEVELOPERS.md [\#1838](https://github.com/apache/datafusion/pull/1838) ([tustvold](https://github.com/tustvold)) **Performance improvements:** -- Avoid an Arc::clone per row in benchmark [\#1975](https://github.com/apache/arrow-datafusion/pull/1975) ([jhorstmann](https://github.com/jhorstmann)) -- Update datafusion-cli allocator [\#1878](https://github.com/apache/arrow-datafusion/pull/1878) ([matthewmturner](https://github.com/matthewmturner)) +- Avoid an Arc::clone per row in benchmark [\#1975](https://github.com/apache/datafusion/pull/1975) ([jhorstmann](https://github.com/jhorstmann)) +- Update datafusion-cli allocator [\#1878](https://github.com/apache/datafusion/pull/1878) ([matthewmturner](https://github.com/matthewmturner)) **Closed issues:** -- Make expected result string in unit tests more readable [\#2412](https://github.com/apache/arrow-datafusion/issues/2412) -- remove duplicated `fn aggregate()` in aggregate expression tests [\#2399](https://github.com/apache/arrow-datafusion/issues/2399) -- split `distinct_expression.rs` into `count_distinct.rs` and `array_agg_distinct.rs` [\#2385](https://github.com/apache/arrow-datafusion/issues/2385) -- move sql tests in `context.rs` to corresponding test files in `datafustion/core/tests/sql` [\#2328](https://github.com/apache/arrow-datafusion/issues/2328) -- Date32/Date64 as join keys for merge join [\#2314](https://github.com/apache/arrow-datafusion/issues/2314) -- Error precision and scale for decimal coercion in logic comparison [\#2232](https://github.com/apache/arrow-datafusion/issues/2232) -- Support Multiple row layout [\#2188](https://github.com/apache/arrow-datafusion/issues/2188) -- TPC-H Query 18 [\#169](https://github.com/apache/arrow-datafusion/issues/169) -- TPC-H Query 16 [\#167](https://github.com/apache/arrow-datafusion/issues/167) -- Implement Sort-Merge Join [\#141](https://github.com/apache/arrow-datafusion/issues/141) -- Split logical expressions out into separate source files [\#114](https://github.com/apache/arrow-datafusion/issues/114) +- Make expected result string in unit tests more readable [\#2412](https://github.com/apache/datafusion/issues/2412) +- remove duplicated `fn aggregate()` in aggregate expression tests [\#2399](https://github.com/apache/datafusion/issues/2399) +- split `distinct_expression.rs` into `count_distinct.rs` and `array_agg_distinct.rs` [\#2385](https://github.com/apache/datafusion/issues/2385) +- move sql tests in `context.rs` to corresponding test files in `datafustion/core/tests/sql` [\#2328](https://github.com/apache/datafusion/issues/2328) +- Date32/Date64 as join keys for merge join [\#2314](https://github.com/apache/datafusion/issues/2314) +- Error precision and scale for decimal coercion in logic comparison [\#2232](https://github.com/apache/datafusion/issues/2232) +- Support Multiple row layout [\#2188](https://github.com/apache/datafusion/issues/2188) +- TPC-H Query 18 [\#169](https://github.com/apache/datafusion/issues/169) +- TPC-H Query 16 [\#167](https://github.com/apache/datafusion/issues/167) +- Implement Sort-Merge Join [\#141](https://github.com/apache/datafusion/issues/141) +- Split logical expressions out into separate source files [\#114](https://github.com/apache/datafusion/issues/114) **Merged pull requests:** -- Minor: remove code that is now included in arrow-rs [\#2511](https://github.com/apache/arrow-datafusion/pull/2511) ([alamb](https://github.com/alamb)) -- MINOR: Enable multi-statement benchmark queries [\#2507](https://github.com/apache/arrow-datafusion/pull/2507) ([andygrove](https://github.com/andygrove)) -- MINOR: Add ignored tests for all remaining benchmark queries [\#2506](https://github.com/apache/arrow-datafusion/pull/2506) ([andygrove](https://github.com/andygrove)) -- Update to `sqlparser` `0.17.0` [\#2500](https://github.com/apache/arrow-datafusion/pull/2500) ([alamb](https://github.com/alamb)) -- Add metrics for ParquetExec [\#2499](https://github.com/apache/arrow-datafusion/pull/2499) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- Limit cpu cores used when generating changelog [\#2494](https://github.com/apache/arrow-datafusion/pull/2494) ([andygrove](https://github.com/andygrove)) -- Optimize MergeJoin by storing joined indices instead of creating small record batches for each match [\#2492](https://github.com/apache/arrow-datafusion/pull/2492) ([richox](https://github.com/richox)) -- Add SQL planner support for `grouping()` aggregate expressions [\#2486](https://github.com/apache/arrow-datafusion/pull/2486) ([andygrove](https://github.com/andygrove)) -- MINOR: Parameterize changelog script [\#2484](https://github.com/apache/arrow-datafusion/pull/2484) ([jychen7](https://github.com/jychen7)) -- Numeric, String, Boolean comparisons with literal `NULL` [\#2481](https://github.com/apache/arrow-datafusion/pull/2481) ([WinkerDu](https://github.com/WinkerDu)) -- Adds unit test cases of mathematical expressions working with `null` literal [\#2478](https://github.com/apache/arrow-datafusion/pull/2478) ([WinkerDu](https://github.com/WinkerDu)) -- Minor: Move test code from `context.rs` into `sql_integration` [\#2473](https://github.com/apache/arrow-datafusion/pull/2473) ([alamb](https://github.com/alamb)) -- Minor: Use ExprVisitor to find columns referenced by expr [\#2471](https://github.com/apache/arrow-datafusion/pull/2471) ([alamb](https://github.com/alamb)) -- minor: remove expr dependency from the row crate, update crate-deps.dot/svg [\#2470](https://github.com/apache/arrow-datafusion/pull/2470) ([yjshen](https://github.com/yjshen)) -- Fix `read_from_registered_table_with_glob_path` fails if path contains // \#2465 [\#2468](https://github.com/apache/arrow-datafusion/pull/2468) ([timvw](https://github.com/timvw)) -- Add support for list_dir\(\) on local fs [\#2467](https://github.com/apache/arrow-datafusion/pull/2467) ([wjones127](https://github.com/wjones127)) -- MINOR: Partial fix for SQL aggregate queries with aliases [\#2464](https://github.com/apache/arrow-datafusion/pull/2464) ([andygrove](https://github.com/andygrove)) -- minor: move struct definition out of `aggregate/mod.rs`, etc [\#2458](https://github.com/apache/arrow-datafusion/pull/2458) ([WinkerDu](https://github.com/WinkerDu)) -- Fix bugs in SQL planner with GROUP BY scalar function and alias [\#2457](https://github.com/apache/arrow-datafusion/pull/2457) ([andygrove](https://github.com/andygrove)) -- feat: Support CompoundIdentifier as GetIndexedField access [\#2454](https://github.com/apache/arrow-datafusion/pull/2454) ([ovr](https://github.com/ovr)) -- Table provider error propagation [\#2438](https://github.com/apache/arrow-datafusion/pull/2438) ([jdye64](https://github.com/jdye64)) -- MINOR: Improve error messages for GROUP BY / HAVING queries [\#2435](https://github.com/apache/arrow-datafusion/pull/2435) ([andygrove](https://github.com/andygrove)) -- minor: remove redundant code [\#2432](https://github.com/apache/arrow-datafusion/pull/2432) ([jackwener](https://github.com/jackwener)) -- minor: update versions and paths in changelog scripts [\#2429](https://github.com/apache/arrow-datafusion/pull/2429) ([andygrove](https://github.com/andygrove)) -- Fix Ballista executing during plan [\#2428](https://github.com/apache/arrow-datafusion/pull/2428) ([tustvold](https://github.com/tustvold)) -- minor: format table result vec & remove some unnecessary semicolons [\#2425](https://github.com/apache/arrow-datafusion/pull/2425) ([WinkerDu](https://github.com/WinkerDu)) -- Basic support for `IN` and `NOT IN` Subqueries by rewriting them to `SEMI` / `ANTI` Join [\#2421](https://github.com/apache/arrow-datafusion/pull/2421) ([korowa](https://github.com/korowa)) -- Allow subqueries without aliases [\#2418](https://github.com/apache/arrow-datafusion/pull/2418) ([andygrove](https://github.com/andygrove)) -- Fix bug in subquery join filters referencing outer query [\#2416](https://github.com/apache/arrow-datafusion/pull/2416) ([andygrove](https://github.com/andygrove)) -- MINOR: remove duplicated function `format_state_name()` [\#2414](https://github.com/apache/arrow-datafusion/pull/2414) ([WinkerDu](https://github.com/WinkerDu)) -- Make expected result string in unit tests more readable [\#2413](https://github.com/apache/arrow-datafusion/pull/2413) ([WinkerDu](https://github.com/WinkerDu)) -- `sum(distinct)` support [\#2405](https://github.com/apache/arrow-datafusion/pull/2405) ([WinkerDu](https://github.com/WinkerDu)) -- Update ordered-float requirement from 2.10 to 3.0 [\#2403](https://github.com/apache/arrow-datafusion/pull/2403) ([dependabot[bot]](https://github.com/apps/dependabot)) -- remove duplicated `fn aggregate()` in aggregate expression tests [\#2400](https://github.com/apache/arrow-datafusion/pull/2400) ([WinkerDu](https://github.com/WinkerDu)) -- Support type-coercion from Decimal to Float64 [\#2396](https://github.com/apache/arrow-datafusion/pull/2396) ([comphead](https://github.com/comphead)) -- minor: SchemaError code cleanup and improvements [\#2391](https://github.com/apache/arrow-datafusion/pull/2391) ([andygrove](https://github.com/andygrove)) -- Support struct_expr generate struct in sql [\#2389](https://github.com/apache/arrow-datafusion/pull/2389) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- Re-organize and rename aggregates physical plan [\#2388](https://github.com/apache/arrow-datafusion/pull/2388) ([yjshen](https://github.com/yjshen)) -- refactor `distinct_expressions.rs` and split into `count_distinct.rs` and `array_agg_distinct.rs` [\#2386](https://github.com/apache/arrow-datafusion/pull/2386) ([WinkerDu](https://github.com/WinkerDu)) -- Allow CTEs to be referenced from subquery expressions [\#2384](https://github.com/apache/arrow-datafusion/pull/2384) ([andygrove](https://github.com/andygrove)) -- Upgrade to arrow 13 [\#2382](https://github.com/apache/arrow-datafusion/pull/2382) ([alamb](https://github.com/alamb)) -- Grouped Aggregate in row format [\#2375](https://github.com/apache/arrow-datafusion/pull/2375) ([yjshen](https://github.com/yjshen)) -- Fix bugs with CTE aliasing and normalize all identifiers in the SQL planner [\#2373](https://github.com/apache/arrow-datafusion/pull/2373) ([andygrove](https://github.com/andygrove)) -- Stop optimizing queries twice [\#2369](https://github.com/apache/arrow-datafusion/pull/2369) ([andygrove](https://github.com/andygrove)) -- feat: Support casting to arrays to primitive type [\#2366](https://github.com/apache/arrow-datafusion/pull/2366) ([ovr](https://github.com/ovr)) -- Add proper support for `null` literal by introducing `ScalarValue::Null` [\#2364](https://github.com/apache/arrow-datafusion/pull/2364) ([WinkerDu](https://github.com/WinkerDu)) -- minor: fix duplicate column bug in subquery support [\#2362](https://github.com/apache/arrow-datafusion/pull/2362) ([andygrove](https://github.com/andygrove)) -- Normalize subquery aliases [\#2359](https://github.com/apache/arrow-datafusion/pull/2359) ([andygrove](https://github.com/andygrove)) -- Implement physical planner support for DATE +/- INTERVAL [\#2357](https://github.com/apache/arrow-datafusion/pull/2357) ([andygrove](https://github.com/andygrove)) -- Add SQL query planner support for Scalar Subqueries [\#2354](https://github.com/apache/arrow-datafusion/pull/2354) ([andygrove](https://github.com/andygrove)) -- Add SQL query planner support for IN subqueries [\#2352](https://github.com/apache/arrow-datafusion/pull/2352) ([andygrove](https://github.com/andygrove)) -- Add `Expr` to prelude [\#2348](https://github.com/apache/arrow-datafusion/pull/2348) ([alamb](https://github.com/alamb)) -- Add SQL planner support for EXISTS subqueries [\#2344](https://github.com/apache/arrow-datafusion/pull/2344) ([andygrove](https://github.com/andygrove)) -- Add public Serialization/Deserialization API for `Expr` to/from bytes [\#2341](https://github.com/apache/arrow-datafusion/pull/2341) ([alamb](https://github.com/alamb)) -- Support for date32 and date64 in sort merge join [\#2336](https://github.com/apache/arrow-datafusion/pull/2336) ([hntd187](https://github.com/hntd187)) -- \[physical-expr\] move aggregate exprs and window exprs to their own modules [\#2335](https://github.com/apache/arrow-datafusion/pull/2335) ([yjshen](https://github.com/yjshen)) -- fix: union schema [\#2334](https://github.com/apache/arrow-datafusion/pull/2334) ([gandronchik](https://github.com/gandronchik)) -- Improve sql integration test organization [\#2333](https://github.com/apache/arrow-datafusion/pull/2333) ([alamb](https://github.com/alamb)) -- Support scalar values for func Array [\#2332](https://github.com/apache/arrow-datafusion/pull/2332) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- move sql tests from `context.rs` to corresponding test files in `tests/sql` [\#2329](https://github.com/apache/arrow-datafusion/pull/2329) ([WinkerDu](https://github.com/WinkerDu)) -- deprecate `index_of` and make `index_of_column_by_name` public [\#2320](https://github.com/apache/arrow-datafusion/pull/2320) ([jdye64](https://github.com/jdye64)) -- Fix HashJoin evaluating during plan [\#2317](https://github.com/apache/arrow-datafusion/pull/2317) ([tustvold](https://github.com/tustvold)) -- minor: remove two source files that only had re-exports [\#2313](https://github.com/apache/arrow-datafusion/pull/2313) ([andygrove](https://github.com/andygrove)) -- Don't sort batches during plan [\#2312](https://github.com/apache/arrow-datafusion/pull/2312) ([tustvold](https://github.com/tustvold)) -- Move case/when expressions to datafusion-expr crate [\#2311](https://github.com/apache/arrow-datafusion/pull/2311) ([andygrove](https://github.com/andygrove)) -- Fix CrossJoinExec evaluating during plan [\#2310](https://github.com/apache/arrow-datafusion/pull/2310) ([tustvold](https://github.com/tustvold)) -- Make SortPreservingMerge Usable Outside Tokio \(\#2201\) [\#2305](https://github.com/apache/arrow-datafusion/pull/2305) ([tustvold](https://github.com/tustvold)) -- chore: update cranelift to 0.83.0 [\#2304](https://github.com/apache/arrow-datafusion/pull/2304) ([yjshen](https://github.com/yjshen)) -- Always increment timer on record [\#2298](https://github.com/apache/arrow-datafusion/pull/2298) ([tustvold](https://github.com/tustvold)) -- Remove unnecessary env var for parquet_sql example [\#2297](https://github.com/apache/arrow-datafusion/pull/2297) ([sergey-melnychuk](https://github.com/sergey-melnychuk)) -- Simplify sort streams [\#2296](https://github.com/apache/arrow-datafusion/pull/2296) ([tustvold](https://github.com/tustvold)) -- MINOR: beautify code with neat idents [\#2295](https://github.com/apache/arrow-datafusion/pull/2295) ([WinkerDu](https://github.com/WinkerDu)) -- Move FileType enum from sql module to logical_plan module [\#2290](https://github.com/apache/arrow-datafusion/pull/2290) ([andygrove](https://github.com/andygrove)) -- Remove Parquet Empty Projection Workaround [\#2289](https://github.com/apache/arrow-datafusion/pull/2289) ([tustvold](https://github.com/tustvold)) -- Add BatchPartitioner \(\#2285\) [\#2287](https://github.com/apache/arrow-datafusion/pull/2287) ([tustvold](https://github.com/tustvold)) -- Make row its crate to make it accessible from physical-expr [\#2283](https://github.com/apache/arrow-datafusion/pull/2283) ([yjshen](https://github.com/yjshen)) -- Enable filter pushdown when using In_list on parquet [\#2282](https://github.com/apache/arrow-datafusion/pull/2282) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- Update uuid requirement from 0.8 to 1.0 [\#2280](https://github.com/apache/arrow-datafusion/pull/2280) ([dependabot[bot]](https://github.com/apps/dependabot)) -- Add bytes scanned metric to ParquetExec [\#2273](https://github.com/apache/arrow-datafusion/pull/2273) ([thinkharderdev](https://github.com/thinkharderdev)) -- Fix outer join output with all-null indices on empty batch [\#2272](https://github.com/apache/arrow-datafusion/pull/2272) ([yjshen](https://github.com/yjshen)) -- Re-export DataFusion crates [\#2264](https://github.com/apache/arrow-datafusion/pull/2264) ([andygrove](https://github.com/andygrove)) -- rewrite approx_median to approx_percentile_cont while planning phase [\#2262](https://github.com/apache/arrow-datafusion/pull/2262) ([korowa](https://github.com/korowa)) -- Introduce RowLayout to represent rows for different purposes [\#2261](https://github.com/apache/arrow-datafusion/pull/2261) ([yjshen](https://github.com/yjshen)) -- fix string coercion missing in Eq/NotEq operator [\#2258](https://github.com/apache/arrow-datafusion/pull/2258) ([WinkerDu](https://github.com/WinkerDu)) -- Update to Arrow 12.0.0, update tonic and prost [\#2253](https://github.com/apache/arrow-datafusion/pull/2253) ([alamb](https://github.com/alamb)) -- minor: move field_util from `physical-expr` crate to `expr` crate [\#2250](https://github.com/apache/arrow-datafusion/pull/2250) ([andygrove](https://github.com/andygrove)) -- Move identifer case tests to `sql_integ`, add negative cases, Debug for `DataFrame` [\#2243](https://github.com/apache/arrow-datafusion/pull/2243) ([alamb](https://github.com/alamb)) -- Implement sort-merge join [\#2242](https://github.com/apache/arrow-datafusion/pull/2242) ([richox](https://github.com/richox)) -- fix: find the right wider decimal datatype for comparison operation [\#2241](https://github.com/apache/arrow-datafusion/pull/2241) ([liukun4515](https://github.com/liukun4515)) -- Fix join without constraints [\#2240](https://github.com/apache/arrow-datafusion/pull/2240) ([Dandandan](https://github.com/Dandandan)) -- Add type coercion rule for date + interval [\#2235](https://github.com/apache/arrow-datafusion/pull/2235) ([andygrove](https://github.com/andygrove)) -- support array with scalar arithmetic operation for decimal data type [\#2233](https://github.com/apache/arrow-datafusion/pull/2233) ([liukun4515](https://github.com/liukun4515)) -- chore: add `debug!` log in some execution operators [\#2231](https://github.com/apache/arrow-datafusion/pull/2231) ([NGA-TRAN](https://github.com/NGA-TRAN)) -- Introduce new optional scheduler, using Morsel-driven Parallelism + rayon \(\#2199\) [\#2226](https://github.com/apache/arrow-datafusion/pull/2226) ([tustvold](https://github.com/tustvold)) -- minor: add editor config file [\#2224](https://github.com/apache/arrow-datafusion/pull/2224) ([jackwener](https://github.com/jackwener)) -- minor: Refactor to avoid repeated code in replace_qualifier [\#2222](https://github.com/apache/arrow-datafusion/pull/2222) ([andygrove](https://github.com/andygrove)) -- update cli readme [\#2220](https://github.com/apache/arrow-datafusion/pull/2220) ([liukun4515](https://github.com/liukun4515)) -- Use `filter` \(filter_record_batch\) instead of `take` to avoid using indices [\#2218](https://github.com/apache/arrow-datafusion/pull/2218) ([Dandandan](https://github.com/Dandandan)) -- Add single line description of ExecutionPlan \(\#2216\) [\#2217](https://github.com/apache/arrow-datafusion/pull/2217) ([tustvold](https://github.com/tustvold)) -- Remove tokio::spawn from HashAggregateExec \(\#2201\) [\#2215](https://github.com/apache/arrow-datafusion/pull/2215) ([tustvold](https://github.com/tustvold)) -- Remove tokio::spawn from WindowAggExec \(\#2201\) [\#2203](https://github.com/apache/arrow-datafusion/pull/2203) ([tustvold](https://github.com/tustvold)) -- Make ParquetExec usable outside of a tokio runtime \(\#2201\) [\#2202](https://github.com/apache/arrow-datafusion/pull/2202) ([tustvold](https://github.com/tustvold)) -- add sql level test for decimal data type [\#2200](https://github.com/apache/arrow-datafusion/pull/2200) ([liukun4515](https://github.com/liukun4515)) -- `case when` supports `NULL` constant [\#2197](https://github.com/apache/arrow-datafusion/pull/2197) ([WinkerDu](https://github.com/WinkerDu)) -- feat: Support simple Arrays with Literals [\#2194](https://github.com/apache/arrow-datafusion/pull/2194) ([ovr](https://github.com/ovr)) -- \[Ballista\] Enable ApproxPercentileWithWeight in Ballista and fill UT [\#2192](https://github.com/apache/arrow-datafusion/pull/2192) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- refactor: simplify `prepare_select_exprs` [\#2190](https://github.com/apache/arrow-datafusion/pull/2190) ([jackwener](https://github.com/jackwener)) -- Multiple row-layout support, part-1: Restructure code for clearness [\#2189](https://github.com/apache/arrow-datafusion/pull/2189) ([yjshen](https://github.com/yjshen)) -- make nightly clippy happy [\#2186](https://github.com/apache/arrow-datafusion/pull/2186) ([xudong963](https://github.com/xudong963)) -- \[Ballista\]Make PhysicalAggregateExprNode has repeated PhysicalExprNode [\#2184](https://github.com/apache/arrow-datafusion/pull/2184) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- MINOR: handle `NULL` in advance to avoid value copy in `string_concat` [\#2183](https://github.com/apache/arrow-datafusion/pull/2183) ([WinkerDu](https://github.com/WinkerDu)) -- fix: Sort with a lot of repetition values [\#2182](https://github.com/apache/arrow-datafusion/pull/2182) ([yjshen](https://github.com/yjshen)) -- cli: update lockfile [\#2178](https://github.com/apache/arrow-datafusion/pull/2178) ([happysalada](https://github.com/happysalada)) -- Add LogicalPlan::SubqueryAlias [\#2172](https://github.com/apache/arrow-datafusion/pull/2172) ([andygrove](https://github.com/andygrove)) -- minor: Avoid per cell evaluation in Coalesce, use zip in CaseWhen [\#2171](https://github.com/apache/arrow-datafusion/pull/2171) ([yjshen](https://github.com/yjshen)) -- Handle merged schemas in parquet pruning [\#2170](https://github.com/apache/arrow-datafusion/pull/2170) ([thinkharderdev](https://github.com/thinkharderdev)) -- Implement fast path of with_new_children\(\) in ExecutionPlan [\#2168](https://github.com/apache/arrow-datafusion/pull/2168) ([mingmwang](https://github.com/mingmwang)) -- enable explain for ballista [\#2163](https://github.com/apache/arrow-datafusion/pull/2163) ([doki23](https://github.com/doki23)) -- Add delimiter for create external table [\#2162](https://github.com/apache/arrow-datafusion/pull/2162) ([matthewmturner](https://github.com/matthewmturner)) -- \[MINOR\] enable `EXTRACT week` and add test \(after sqlparser update to 0.16\) [\#2157](https://github.com/apache/arrow-datafusion/pull/2157) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- Optimize the evaluation of `IN` for large lists using InSet [\#2156](https://github.com/apache/arrow-datafusion/pull/2156) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- Update sqlparser requirement from 0.15 to 0.16 [\#2152](https://github.com/apache/arrow-datafusion/pull/2152) ([dependabot[bot]](https://github.com/apps/dependabot)) -- fix `not(null)` with constant `null` [\#2144](https://github.com/apache/arrow-datafusion/pull/2144) ([WinkerDu](https://github.com/WinkerDu)) -- Add IF NOT EXISTS to `CREATE TABLE` and `CREATE EXTERNAL TABLE` [\#2143](https://github.com/apache/arrow-datafusion/pull/2143) ([matthewmturner](https://github.com/matthewmturner)) -- implement 'StringConcat' operator to support sql like "select 'aa' || 'b' " [\#2142](https://github.com/apache/arrow-datafusion/pull/2142) ([WinkerDu](https://github.com/WinkerDu)) -- \#2109 By default, use only 1000 rows to infer the schema [\#2139](https://github.com/apache/arrow-datafusion/pull/2139) ([jychen7](https://github.com/jychen7)) -- \[CLI\] Add show tables in ballista for datafusion-cli [\#2137](https://github.com/apache/arrow-datafusion/pull/2137) ([gaojun2048](https://github.com/gaojun2048)) -- fix: incorrect memory usage track for sort [\#2135](https://github.com/apache/arrow-datafusion/pull/2135) ([yjshen](https://github.com/yjshen)) -- Update quarterly roadmap for Q2 [\#2133](https://github.com/apache/arrow-datafusion/pull/2133) ([matthewmturner](https://github.com/matthewmturner)) -- Reduce SortExec memory usage by void constructing single huge batch [\#2132](https://github.com/apache/arrow-datafusion/pull/2132) ([yjshen](https://github.com/yjshen)) -- MINOR: fix concat_ws corner bug [\#2128](https://github.com/apache/arrow-datafusion/pull/2128) ([WinkerDu](https://github.com/WinkerDu)) -- Minor add clarifying comment in parquet [\#2127](https://github.com/apache/arrow-datafusion/pull/2127) ([alamb](https://github.com/alamb)) -- Minor: make disk_manager public [\#2126](https://github.com/apache/arrow-datafusion/pull/2126) ([yjshen](https://github.com/yjshen)) -- JIT-compille DataFusion expression with column name [\#2124](https://github.com/apache/arrow-datafusion/pull/2124) ([Dandandan](https://github.com/Dandandan)) -- minor: replace array_equals in case evaluation with eq_dyn from arrow-rs [\#2121](https://github.com/apache/arrow-datafusion/pull/2121) ([alamb](https://github.com/alamb)) -- Serialize timezone in timestamp scalar values [\#2120](https://github.com/apache/arrow-datafusion/pull/2120) ([thinkharderdev](https://github.com/thinkharderdev)) -- minor: fix some clippy warnings from nightly rust [\#2119](https://github.com/apache/arrow-datafusion/pull/2119) ([alamb](https://github.com/alamb)) -- Fix case evaluation with NULLs [\#2118](https://github.com/apache/arrow-datafusion/pull/2118) ([alamb](https://github.com/alamb)) -- issue\#1967 ignore channel close [\#2113](https://github.com/apache/arrow-datafusion/pull/2113) ([silence-coding](https://github.com/silence-coding)) -- cli: add cargo.lock [\#2112](https://github.com/apache/arrow-datafusion/pull/2112) ([happysalada](https://github.com/happysalada)) -- doc: update release schedule [\#2110](https://github.com/apache/arrow-datafusion/pull/2110) ([jychen7](https://github.com/jychen7)) -- fix df union all bug [\#2108](https://github.com/apache/arrow-datafusion/pull/2108) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([WinkerDu](https://github.com/WinkerDu)) -- Reduce repetition in Decimal binary kernels, upgrade to arrow 11.1 [\#2107](https://github.com/apache/arrow-datafusion/pull/2107) ([alamb](https://github.com/alamb)) -- update zlib version to 1.2.12 [\#2106](https://github.com/apache/arrow-datafusion/pull/2106) ([waitingkuo](https://github.com/waitingkuo)) -- Create jit-expression from datafusion expression [\#2103](https://github.com/apache/arrow-datafusion/pull/2103) ([Dandandan](https://github.com/Dandandan)) -- Add CREATE DATABASE command to SQL [\#2094](https://github.com/apache/arrow-datafusion/pull/2094) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([matthewmturner](https://github.com/matthewmturner)) -- Refactor SessionContext, BallistaContext to support multi-tenancy configurations - Part 3 [\#2091](https://github.com/apache/arrow-datafusion/pull/2091) ([mingmwang](https://github.com/mingmwang)) -- minor: remove duplicate test [\#2089](https://github.com/apache/arrow-datafusion/pull/2089) ([jackwener](https://github.com/jackwener)) -- minor: remove repeated test [\#2085](https://github.com/apache/arrow-datafusion/pull/2085) ([jackwener](https://github.com/jackwener)) -- Fix lost filters and projections in ParquetExec, CSVExec etc [\#2077](https://github.com/apache/arrow-datafusion/pull/2077) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- Remove dependency of common for the storage crate [\#2076](https://github.com/apache/arrow-datafusion/pull/2076) ([yahoNanJing](https://github.com/yahoNanJing)) -- [MINOR] fix doc in `EXTRACT\(field FROM source\) [\#2074](https://github.com/apache/arrow-datafusion/pull/2074) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- \[Bug\]\[Datafusion\] fix TaskContext session_config bug [\#2070](https://github.com/apache/arrow-datafusion/pull/2070) ([gaojun2048](https://github.com/gaojun2048)) -- Short-circuit evaluation for `CaseWhen` [\#2068](https://github.com/apache/arrow-datafusion/pull/2068) ([yjshen](https://github.com/yjshen)) -- split datafusion-object-store module [\#2065](https://github.com/apache/arrow-datafusion/pull/2065) ([yahoNanJing](https://github.com/yahoNanJing)) -- Allow `CatalogProvider::register_catalog` to return an error [\#2052](https://github.com/apache/arrow-datafusion/pull/2052) ([alamb](https://github.com/alamb)) -- Add test in register_catalog and change to use named symbolic constants [\#2050](https://github.com/apache/arrow-datafusion/pull/2050) ([alamb](https://github.com/alamb)) -- Update to arrow/parquet 11.0 [\#2048](https://github.com/apache/arrow-datafusion/pull/2048) ([alamb](https://github.com/alamb)) -- minor: format comments \(`//` to `// `\) [\#2047](https://github.com/apache/arrow-datafusion/pull/2047) ([jackwener](https://github.com/jackwener)) -- use cargo-tomlfmt to check Cargo.toml formatting in CI [\#2033](https://github.com/apache/arrow-datafusion/pull/2033) ([WinkerDu](https://github.com/WinkerDu)) -- feat: \#2004 approx percentile with weight [\#2031](https://github.com/apache/arrow-datafusion/pull/2031) ([jychen7](https://github.com/jychen7)) -- Refactor SessionContext, SessionState and SessionConfig to support multi-tenancy configurations - Part 2 [\#2029](https://github.com/apache/arrow-datafusion/pull/2029) ([mingmwang](https://github.com/mingmwang)) -- Simplify prerequisites for running examples [\#2028](https://github.com/apache/arrow-datafusion/pull/2028) ([doki23](https://github.com/doki23)) -- Replace usage of `println!` with logger macros [\#2020](https://github.com/apache/arrow-datafusion/pull/2020) ([silence-coding](https://github.com/silence-coding)) -- Automatically test examples in user guide [\#2018](https://github.com/apache/arrow-datafusion/pull/2018) ([vchag](https://github.com/vchag)) -- return VecDeque for DFParser::parse_sql [\#2017](https://github.com/apache/arrow-datafusion/pull/2017) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([doki23](https://github.com/doki23)) -- Eliminate the scalar value filter [\#2002](https://github.com/apache/arrow-datafusion/pull/2002) ([jackwener](https://github.com/jackwener)) -- Fixing a typo in documentation [\#1997](https://github.com/apache/arrow-datafusion/pull/1997) ([psvri](https://github.com/psvri)) -- Correct documentation of ExprVisitor [\#1996](https://github.com/apache/arrow-datafusion/pull/1996) ([alamb](https://github.com/alamb)) -- Make it possible to only scan part of a parquet file in a partition [\#1990](https://github.com/apache/arrow-datafusion/pull/1990) ([yjshen](https://github.com/yjshen)) -- Update Dockerfile to fix integration tests [\#1982](https://github.com/apache/arrow-datafusion/pull/1982) ([andygrove](https://github.com/andygrove)) -- Remove some more unecessary cloning in sql_expr_to_logical_expr [\#1981](https://github.com/apache/arrow-datafusion/pull/1981) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) -- Add ticket reference to clippy allow [\#1978](https://github.com/apache/arrow-datafusion/pull/1978) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) -- Implement EXTRACT expression with week, month, day, hour [\#1974](https://github.com/apache/arrow-datafusion/pull/1974) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- Address typo in ExprVisitable trait documentation [\#1970](https://github.com/apache/arrow-datafusion/pull/1970) ([jdye64](https://github.com/jdye64)) -- Update sqlparser requirement from 0.14 to 0.15 [\#1966](https://github.com/apache/arrow-datafusion/pull/1966) ([dependabot[bot]](https://github.com/apps/dependabot)) -- PruningPredicate should take owned Expr [\#1960](https://github.com/apache/arrow-datafusion/pull/1960) ([thinkharderdev](https://github.com/thinkharderdev)) -- Update to arrow 10.0.0, pyo3 0.16 [\#1957](https://github.com/apache/arrow-datafusion/pull/1957) ([alamb](https://github.com/alamb)) -- update jit-related dependencies [\#1953](https://github.com/apache/arrow-datafusion/pull/1953) ([xudong963](https://github.com/xudong963)) -- minor code refinement: `if_exists` name change, wildcard field for logical plan, etc. [\#1951](https://github.com/apache/arrow-datafusion/pull/1951) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([xudong963](https://github.com/xudong963)) -- Allow different types of query variables \(`@@var`\) rather than just string [\#1943](https://github.com/apache/arrow-datafusion/pull/1943) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([maxburke](https://github.com/maxburke)) -- Pruning serialization [\#1941](https://github.com/apache/arrow-datafusion/pull/1941) ([thinkharderdev](https://github.com/thinkharderdev)) -- Add write_parquet to `DataFrame` [\#1940](https://github.com/apache/arrow-datafusion/pull/1940) ([matthewmturner](https://github.com/matthewmturner)) -- Fix select from EmptyExec always return 0 row after optimizer passes [\#1938](https://github.com/apache/arrow-datafusion/pull/1938) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- Add debug log when waiting for spilling on other consumers [\#1933](https://github.com/apache/arrow-datafusion/pull/1933) ([viirya](https://github.com/viirya)) -- Add db benchmark script [\#1928](https://github.com/apache/arrow-datafusion/pull/1928) ([matthewmturner](https://github.com/matthewmturner)) -- Add write_csv to DataFrame [\#1922](https://github.com/apache/arrow-datafusion/pull/1922) ([matthewmturner](https://github.com/matthewmturner)) -- \[MINOR\] Update copyright year in Docs [\#1918](https://github.com/apache/arrow-datafusion/pull/1918) ([alamb](https://github.com/alamb)) -- add metadata to DFSchema, close \#1806. [\#1914](https://github.com/apache/arrow-datafusion/pull/1914) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([jiacai2050](https://github.com/jiacai2050)) -- Clippy fix on nightly [\#1907](https://github.com/apache/arrow-datafusion/pull/1907) ([yjshen](https://github.com/yjshen)) -- Updated Rust version to 1.59 in all the files [\#1903](https://github.com/apache/arrow-datafusion/pull/1903) ([NaincyKumariKnoldus](https://github.com/NaincyKumariKnoldus)) -- support extract second and minute in expr. [\#1901](https://github.com/apache/arrow-datafusion/pull/1901) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- Update crate descriptions [\#1899](https://github.com/apache/arrow-datafusion/pull/1899) ([alamb](https://github.com/alamb)) -- Remove uneeded Mutex in Ballista Client [\#1898](https://github.com/apache/arrow-datafusion/pull/1898) ([alamb](https://github.com/alamb)) -- \[split/17\] move the rest of physical expr to datafusion-physical-expr crate [\#1892](https://github.com/apache/arrow-datafusion/pull/1892) ([Jimexist](https://github.com/Jimexist)) -- Avoid unnecessary branching in row read/write if schema is null-free [\#1891](https://github.com/apache/arrow-datafusion/pull/1891) ([yjshen](https://github.com/yjshen)) -- Make parquet support optional for datafusion-common crate [\#1886](https://github.com/apache/arrow-datafusion/pull/1886) ([jonmmease](https://github.com/jonmmease)) -- Fix clippy lints [\#1885](https://github.com/apache/arrow-datafusion/pull/1885) ([HaoYang670](https://github.com/HaoYang670)) -- Add support for `~/.datafusionrc` and cli option for overriding it to datafusion-cli [\#1875](https://github.com/apache/arrow-datafusion/pull/1875) ([matthewmturner](https://github.com/matthewmturner)) -- \[Minor\] Clean up DecimalArray API Usage [\#1869](https://github.com/apache/arrow-datafusion/pull/1869) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) -- Changes after went through "Datafusion as a library section" [\#1868](https://github.com/apache/arrow-datafusion/pull/1868) ([nonontb](https://github.com/nonontb)) -- Enhance MemorySchemaProvider to support `register_listing_table` [\#1863](https://github.com/apache/arrow-datafusion/pull/1863) ([matthewmturner](https://github.com/matthewmturner)) -- Increase default partition column type from Dict\(UInt8\) to Dict\(UInt16\) [\#1860](https://github.com/apache/arrow-datafusion/pull/1860) ([Igosuki](https://github.com/Igosuki)) -- Update to arrow 9.1.0 [\#1851](https://github.com/apache/arrow-datafusion/pull/1851) ([alamb](https://github.com/alamb)) -- move some tests out of context and into sql [\#1846](https://github.com/apache/arrow-datafusion/pull/1846) ([alamb](https://github.com/alamb)) -- \[split/14\] create `datafusion-physical-expr` module [\#1843](https://github.com/apache/arrow-datafusion/pull/1843) ([Jimexist](https://github.com/Jimexist)) -- Return `Error` when parquet reader fails rather than no data with `println!` [\#1837](https://github.com/apache/arrow-datafusion/pull/1837) ([alamb](https://github.com/alamb)) -- determine build side in hash join by `total_byte_size` instead of `num_rows` [\#1831](https://github.com/apache/arrow-datafusion/pull/1831) ([xudong963](https://github.com/xudong963)) -- Make ballista support an optional feature to datafusion-cli [\#1816](https://github.com/apache/arrow-datafusion/pull/1816) ([alamb](https://github.com/alamb)) -- Update documentation example for change in API [\#1812](https://github.com/apache/arrow-datafusion/pull/1812) ([alamb](https://github.com/alamb)) -- rename references of expr in physical plan module after datafusion-expr split [\#1798](https://github.com/apache/arrow-datafusion/pull/1798) ([Jimexist](https://github.com/Jimexist)) -- DataFusion + Conbench Integration [\#1791](https://github.com/apache/arrow-datafusion/pull/1791) ([dianaclarke](https://github.com/dianaclarke)) -- The returned path value of get_by_uri should be self-described with entire path [\#1779](https://github.com/apache/arrow-datafusion/pull/1779) ([yahoNanJing](https://github.com/yahoNanJing)) -- Use`eq_dyn`, `neq_dyn`, `lt_dyn`, `lt_eq_dyn`, `gt_dyn`, `gt_eq_dyn` kernels from arrow [\#1475](https://github.com/apache/arrow-datafusion/pull/1475) ([alamb](https://github.com/alamb)) +- Minor: remove code that is now included in arrow-rs [\#2511](https://github.com/apache/datafusion/pull/2511) ([alamb](https://github.com/alamb)) +- MINOR: Enable multi-statement benchmark queries [\#2507](https://github.com/apache/datafusion/pull/2507) ([andygrove](https://github.com/andygrove)) +- MINOR: Add ignored tests for all remaining benchmark queries [\#2506](https://github.com/apache/datafusion/pull/2506) ([andygrove](https://github.com/andygrove)) +- Update to `sqlparser` `0.17.0` [\#2500](https://github.com/apache/datafusion/pull/2500) ([alamb](https://github.com/alamb)) +- Add metrics for ParquetExec [\#2499](https://github.com/apache/datafusion/pull/2499) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- Limit cpu cores used when generating changelog [\#2494](https://github.com/apache/datafusion/pull/2494) ([andygrove](https://github.com/andygrove)) +- Optimize MergeJoin by storing joined indices instead of creating small record batches for each match [\#2492](https://github.com/apache/datafusion/pull/2492) ([richox](https://github.com/richox)) +- Add SQL planner support for `grouping()` aggregate expressions [\#2486](https://github.com/apache/datafusion/pull/2486) ([andygrove](https://github.com/andygrove)) +- MINOR: Parameterize changelog script [\#2484](https://github.com/apache/datafusion/pull/2484) ([jychen7](https://github.com/jychen7)) +- Numeric, String, Boolean comparisons with literal `NULL` [\#2481](https://github.com/apache/datafusion/pull/2481) ([WinkerDu](https://github.com/WinkerDu)) +- Adds unit test cases of mathematical expressions working with `null` literal [\#2478](https://github.com/apache/datafusion/pull/2478) ([WinkerDu](https://github.com/WinkerDu)) +- Minor: Move test code from `context.rs` into `sql_integration` [\#2473](https://github.com/apache/datafusion/pull/2473) ([alamb](https://github.com/alamb)) +- Minor: Use ExprVisitor to find columns referenced by expr [\#2471](https://github.com/apache/datafusion/pull/2471) ([alamb](https://github.com/alamb)) +- minor: remove expr dependency from the row crate, update crate-deps.dot/svg [\#2470](https://github.com/apache/datafusion/pull/2470) ([yjshen](https://github.com/yjshen)) +- Fix `read_from_registered_table_with_glob_path` fails if path contains // \#2465 [\#2468](https://github.com/apache/datafusion/pull/2468) ([timvw](https://github.com/timvw)) +- Add support for list_dir\(\) on local fs [\#2467](https://github.com/apache/datafusion/pull/2467) ([wjones127](https://github.com/wjones127)) +- MINOR: Partial fix for SQL aggregate queries with aliases [\#2464](https://github.com/apache/datafusion/pull/2464) ([andygrove](https://github.com/andygrove)) +- minor: move struct definition out of `aggregate/mod.rs`, etc [\#2458](https://github.com/apache/datafusion/pull/2458) ([WinkerDu](https://github.com/WinkerDu)) +- Fix bugs in SQL planner with GROUP BY scalar function and alias [\#2457](https://github.com/apache/datafusion/pull/2457) ([andygrove](https://github.com/andygrove)) +- feat: Support CompoundIdentifier as GetIndexedField access [\#2454](https://github.com/apache/datafusion/pull/2454) ([ovr](https://github.com/ovr)) +- Table provider error propagation [\#2438](https://github.com/apache/datafusion/pull/2438) ([jdye64](https://github.com/jdye64)) +- MINOR: Improve error messages for GROUP BY / HAVING queries [\#2435](https://github.com/apache/datafusion/pull/2435) ([andygrove](https://github.com/andygrove)) +- minor: remove redundant code [\#2432](https://github.com/apache/datafusion/pull/2432) ([jackwener](https://github.com/jackwener)) +- minor: update versions and paths in changelog scripts [\#2429](https://github.com/apache/datafusion/pull/2429) ([andygrove](https://github.com/andygrove)) +- Fix Ballista executing during plan [\#2428](https://github.com/apache/datafusion/pull/2428) ([tustvold](https://github.com/tustvold)) +- minor: format table result vec & remove some unnecessary semicolons [\#2425](https://github.com/apache/datafusion/pull/2425) ([WinkerDu](https://github.com/WinkerDu)) +- Basic support for `IN` and `NOT IN` Subqueries by rewriting them to `SEMI` / `ANTI` Join [\#2421](https://github.com/apache/datafusion/pull/2421) ([korowa](https://github.com/korowa)) +- Allow subqueries without aliases [\#2418](https://github.com/apache/datafusion/pull/2418) ([andygrove](https://github.com/andygrove)) +- Fix bug in subquery join filters referencing outer query [\#2416](https://github.com/apache/datafusion/pull/2416) ([andygrove](https://github.com/andygrove)) +- MINOR: remove duplicated function `format_state_name()` [\#2414](https://github.com/apache/datafusion/pull/2414) ([WinkerDu](https://github.com/WinkerDu)) +- Make expected result string in unit tests more readable [\#2413](https://github.com/apache/datafusion/pull/2413) ([WinkerDu](https://github.com/WinkerDu)) +- `sum(distinct)` support [\#2405](https://github.com/apache/datafusion/pull/2405) ([WinkerDu](https://github.com/WinkerDu)) +- Update ordered-float requirement from 2.10 to 3.0 [\#2403](https://github.com/apache/datafusion/pull/2403) ([dependabot[bot]](https://github.com/apps/dependabot)) +- remove duplicated `fn aggregate()` in aggregate expression tests [\#2400](https://github.com/apache/datafusion/pull/2400) ([WinkerDu](https://github.com/WinkerDu)) +- Support type-coercion from Decimal to Float64 [\#2396](https://github.com/apache/datafusion/pull/2396) ([comphead](https://github.com/comphead)) +- minor: SchemaError code cleanup and improvements [\#2391](https://github.com/apache/datafusion/pull/2391) ([andygrove](https://github.com/andygrove)) +- Support struct_expr generate struct in sql [\#2389](https://github.com/apache/datafusion/pull/2389) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- Re-organize and rename aggregates physical plan [\#2388](https://github.com/apache/datafusion/pull/2388) ([yjshen](https://github.com/yjshen)) +- refactor `distinct_expressions.rs` and split into `count_distinct.rs` and `array_agg_distinct.rs` [\#2386](https://github.com/apache/datafusion/pull/2386) ([WinkerDu](https://github.com/WinkerDu)) +- Allow CTEs to be referenced from subquery expressions [\#2384](https://github.com/apache/datafusion/pull/2384) ([andygrove](https://github.com/andygrove)) +- Upgrade to arrow 13 [\#2382](https://github.com/apache/datafusion/pull/2382) ([alamb](https://github.com/alamb)) +- Grouped Aggregate in row format [\#2375](https://github.com/apache/datafusion/pull/2375) ([yjshen](https://github.com/yjshen)) +- Fix bugs with CTE aliasing and normalize all identifiers in the SQL planner [\#2373](https://github.com/apache/datafusion/pull/2373) ([andygrove](https://github.com/andygrove)) +- Stop optimizing queries twice [\#2369](https://github.com/apache/datafusion/pull/2369) ([andygrove](https://github.com/andygrove)) +- feat: Support casting to arrays to primitive type [\#2366](https://github.com/apache/datafusion/pull/2366) ([ovr](https://github.com/ovr)) +- Add proper support for `null` literal by introducing `ScalarValue::Null` [\#2364](https://github.com/apache/datafusion/pull/2364) ([WinkerDu](https://github.com/WinkerDu)) +- minor: fix duplicate column bug in subquery support [\#2362](https://github.com/apache/datafusion/pull/2362) ([andygrove](https://github.com/andygrove)) +- Normalize subquery aliases [\#2359](https://github.com/apache/datafusion/pull/2359) ([andygrove](https://github.com/andygrove)) +- Implement physical planner support for DATE +/- INTERVAL [\#2357](https://github.com/apache/datafusion/pull/2357) ([andygrove](https://github.com/andygrove)) +- Add SQL query planner support for Scalar Subqueries [\#2354](https://github.com/apache/datafusion/pull/2354) ([andygrove](https://github.com/andygrove)) +- Add SQL query planner support for IN subqueries [\#2352](https://github.com/apache/datafusion/pull/2352) ([andygrove](https://github.com/andygrove)) +- Add `Expr` to prelude [\#2348](https://github.com/apache/datafusion/pull/2348) ([alamb](https://github.com/alamb)) +- Add SQL planner support for EXISTS subqueries [\#2344](https://github.com/apache/datafusion/pull/2344) ([andygrove](https://github.com/andygrove)) +- Add public Serialization/Deserialization API for `Expr` to/from bytes [\#2341](https://github.com/apache/datafusion/pull/2341) ([alamb](https://github.com/alamb)) +- Support for date32 and date64 in sort merge join [\#2336](https://github.com/apache/datafusion/pull/2336) ([hntd187](https://github.com/hntd187)) +- \[physical-expr\] move aggregate exprs and window exprs to their own modules [\#2335](https://github.com/apache/datafusion/pull/2335) ([yjshen](https://github.com/yjshen)) +- fix: union schema [\#2334](https://github.com/apache/datafusion/pull/2334) ([gandronchik](https://github.com/gandronchik)) +- Improve sql integration test organization [\#2333](https://github.com/apache/datafusion/pull/2333) ([alamb](https://github.com/alamb)) +- Support scalar values for func Array [\#2332](https://github.com/apache/datafusion/pull/2332) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- move sql tests from `context.rs` to corresponding test files in `tests/sql` [\#2329](https://github.com/apache/datafusion/pull/2329) ([WinkerDu](https://github.com/WinkerDu)) +- deprecate `index_of` and make `index_of_column_by_name` public [\#2320](https://github.com/apache/datafusion/pull/2320) ([jdye64](https://github.com/jdye64)) +- Fix HashJoin evaluating during plan [\#2317](https://github.com/apache/datafusion/pull/2317) ([tustvold](https://github.com/tustvold)) +- minor: remove two source files that only had re-exports [\#2313](https://github.com/apache/datafusion/pull/2313) ([andygrove](https://github.com/andygrove)) +- Don't sort batches during plan [\#2312](https://github.com/apache/datafusion/pull/2312) ([tustvold](https://github.com/tustvold)) +- Move case/when expressions to datafusion-expr crate [\#2311](https://github.com/apache/datafusion/pull/2311) ([andygrove](https://github.com/andygrove)) +- Fix CrossJoinExec evaluating during plan [\#2310](https://github.com/apache/datafusion/pull/2310) ([tustvold](https://github.com/tustvold)) +- Make SortPreservingMerge Usable Outside Tokio \(\#2201\) [\#2305](https://github.com/apache/datafusion/pull/2305) ([tustvold](https://github.com/tustvold)) +- chore: update cranelift to 0.83.0 [\#2304](https://github.com/apache/datafusion/pull/2304) ([yjshen](https://github.com/yjshen)) +- Always increment timer on record [\#2298](https://github.com/apache/datafusion/pull/2298) ([tustvold](https://github.com/tustvold)) +- Remove unnecessary env var for parquet_sql example [\#2297](https://github.com/apache/datafusion/pull/2297) ([sergey-melnychuk](https://github.com/sergey-melnychuk)) +- Simplify sort streams [\#2296](https://github.com/apache/datafusion/pull/2296) ([tustvold](https://github.com/tustvold)) +- MINOR: beautify code with neat idents [\#2295](https://github.com/apache/datafusion/pull/2295) ([WinkerDu](https://github.com/WinkerDu)) +- Move FileType enum from sql module to logical_plan module [\#2290](https://github.com/apache/datafusion/pull/2290) ([andygrove](https://github.com/andygrove)) +- Remove Parquet Empty Projection Workaround [\#2289](https://github.com/apache/datafusion/pull/2289) ([tustvold](https://github.com/tustvold)) +- Add BatchPartitioner \(\#2285\) [\#2287](https://github.com/apache/datafusion/pull/2287) ([tustvold](https://github.com/tustvold)) +- Make row its crate to make it accessible from physical-expr [\#2283](https://github.com/apache/datafusion/pull/2283) ([yjshen](https://github.com/yjshen)) +- Enable filter pushdown when using In_list on parquet [\#2282](https://github.com/apache/datafusion/pull/2282) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- Update uuid requirement from 0.8 to 1.0 [\#2280](https://github.com/apache/datafusion/pull/2280) ([dependabot[bot]](https://github.com/apps/dependabot)) +- Add bytes scanned metric to ParquetExec [\#2273](https://github.com/apache/datafusion/pull/2273) ([thinkharderdev](https://github.com/thinkharderdev)) +- Fix outer join output with all-null indices on empty batch [\#2272](https://github.com/apache/datafusion/pull/2272) ([yjshen](https://github.com/yjshen)) +- Re-export DataFusion crates [\#2264](https://github.com/apache/datafusion/pull/2264) ([andygrove](https://github.com/andygrove)) +- rewrite approx_median to approx_percentile_cont while planning phase [\#2262](https://github.com/apache/datafusion/pull/2262) ([korowa](https://github.com/korowa)) +- Introduce RowLayout to represent rows for different purposes [\#2261](https://github.com/apache/datafusion/pull/2261) ([yjshen](https://github.com/yjshen)) +- fix string coercion missing in Eq/NotEq operator [\#2258](https://github.com/apache/datafusion/pull/2258) ([WinkerDu](https://github.com/WinkerDu)) +- Update to Arrow 12.0.0, update tonic and prost [\#2253](https://github.com/apache/datafusion/pull/2253) ([alamb](https://github.com/alamb)) +- minor: move field_util from `physical-expr` crate to `expr` crate [\#2250](https://github.com/apache/datafusion/pull/2250) ([andygrove](https://github.com/andygrove)) +- Move identifer case tests to `sql_integ`, add negative cases, Debug for `DataFrame` [\#2243](https://github.com/apache/datafusion/pull/2243) ([alamb](https://github.com/alamb)) +- Implement sort-merge join [\#2242](https://github.com/apache/datafusion/pull/2242) ([richox](https://github.com/richox)) +- fix: find the right wider decimal datatype for comparison operation [\#2241](https://github.com/apache/datafusion/pull/2241) ([liukun4515](https://github.com/liukun4515)) +- Fix join without constraints [\#2240](https://github.com/apache/datafusion/pull/2240) ([Dandandan](https://github.com/Dandandan)) +- Add type coercion rule for date + interval [\#2235](https://github.com/apache/datafusion/pull/2235) ([andygrove](https://github.com/andygrove)) +- support array with scalar arithmetic operation for decimal data type [\#2233](https://github.com/apache/datafusion/pull/2233) ([liukun4515](https://github.com/liukun4515)) +- chore: add `debug!` log in some execution operators [\#2231](https://github.com/apache/datafusion/pull/2231) ([NGA-TRAN](https://github.com/NGA-TRAN)) +- Introduce new optional scheduler, using Morsel-driven Parallelism + rayon \(\#2199\) [\#2226](https://github.com/apache/datafusion/pull/2226) ([tustvold](https://github.com/tustvold)) +- minor: add editor config file [\#2224](https://github.com/apache/datafusion/pull/2224) ([jackwener](https://github.com/jackwener)) +- minor: Refactor to avoid repeated code in replace_qualifier [\#2222](https://github.com/apache/datafusion/pull/2222) ([andygrove](https://github.com/andygrove)) +- update cli readme [\#2220](https://github.com/apache/datafusion/pull/2220) ([liukun4515](https://github.com/liukun4515)) +- Use `filter` \(filter_record_batch\) instead of `take` to avoid using indices [\#2218](https://github.com/apache/datafusion/pull/2218) ([Dandandan](https://github.com/Dandandan)) +- Add single line description of ExecutionPlan \(\#2216\) [\#2217](https://github.com/apache/datafusion/pull/2217) ([tustvold](https://github.com/tustvold)) +- Remove tokio::spawn from HashAggregateExec \(\#2201\) [\#2215](https://github.com/apache/datafusion/pull/2215) ([tustvold](https://github.com/tustvold)) +- Remove tokio::spawn from WindowAggExec \(\#2201\) [\#2203](https://github.com/apache/datafusion/pull/2203) ([tustvold](https://github.com/tustvold)) +- Make ParquetExec usable outside of a tokio runtime \(\#2201\) [\#2202](https://github.com/apache/datafusion/pull/2202) ([tustvold](https://github.com/tustvold)) +- add sql level test for decimal data type [\#2200](https://github.com/apache/datafusion/pull/2200) ([liukun4515](https://github.com/liukun4515)) +- `case when` supports `NULL` constant [\#2197](https://github.com/apache/datafusion/pull/2197) ([WinkerDu](https://github.com/WinkerDu)) +- feat: Support simple Arrays with Literals [\#2194](https://github.com/apache/datafusion/pull/2194) ([ovr](https://github.com/ovr)) +- \[Ballista\] Enable ApproxPercentileWithWeight in Ballista and fill UT [\#2192](https://github.com/apache/datafusion/pull/2192) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- refactor: simplify `prepare_select_exprs` [\#2190](https://github.com/apache/datafusion/pull/2190) ([jackwener](https://github.com/jackwener)) +- Multiple row-layout support, part-1: Restructure code for clearness [\#2189](https://github.com/apache/datafusion/pull/2189) ([yjshen](https://github.com/yjshen)) +- make nightly clippy happy [\#2186](https://github.com/apache/datafusion/pull/2186) ([xudong963](https://github.com/xudong963)) +- \[Ballista\]Make PhysicalAggregateExprNode has repeated PhysicalExprNode [\#2184](https://github.com/apache/datafusion/pull/2184) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- MINOR: handle `NULL` in advance to avoid value copy in `string_concat` [\#2183](https://github.com/apache/datafusion/pull/2183) ([WinkerDu](https://github.com/WinkerDu)) +- fix: Sort with a lot of repetition values [\#2182](https://github.com/apache/datafusion/pull/2182) ([yjshen](https://github.com/yjshen)) +- cli: update lockfile [\#2178](https://github.com/apache/datafusion/pull/2178) ([happysalada](https://github.com/happysalada)) +- Add LogicalPlan::SubqueryAlias [\#2172](https://github.com/apache/datafusion/pull/2172) ([andygrove](https://github.com/andygrove)) +- minor: Avoid per cell evaluation in Coalesce, use zip in CaseWhen [\#2171](https://github.com/apache/datafusion/pull/2171) ([yjshen](https://github.com/yjshen)) +- Handle merged schemas in parquet pruning [\#2170](https://github.com/apache/datafusion/pull/2170) ([thinkharderdev](https://github.com/thinkharderdev)) +- Implement fast path of with_new_children\(\) in ExecutionPlan [\#2168](https://github.com/apache/datafusion/pull/2168) ([mingmwang](https://github.com/mingmwang)) +- enable explain for ballista [\#2163](https://github.com/apache/datafusion/pull/2163) ([doki23](https://github.com/doki23)) +- Add delimiter for create external table [\#2162](https://github.com/apache/datafusion/pull/2162) ([matthewmturner](https://github.com/matthewmturner)) +- \[MINOR\] enable `EXTRACT week` and add test \(after sqlparser update to 0.16\) [\#2157](https://github.com/apache/datafusion/pull/2157) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- Optimize the evaluation of `IN` for large lists using InSet [\#2156](https://github.com/apache/datafusion/pull/2156) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- Update sqlparser requirement from 0.15 to 0.16 [\#2152](https://github.com/apache/datafusion/pull/2152) ([dependabot[bot]](https://github.com/apps/dependabot)) +- fix `not(null)` with constant `null` [\#2144](https://github.com/apache/datafusion/pull/2144) ([WinkerDu](https://github.com/WinkerDu)) +- Add IF NOT EXISTS to `CREATE TABLE` and `CREATE EXTERNAL TABLE` [\#2143](https://github.com/apache/datafusion/pull/2143) ([matthewmturner](https://github.com/matthewmturner)) +- implement 'StringConcat' operator to support sql like "select 'aa' || 'b' " [\#2142](https://github.com/apache/datafusion/pull/2142) ([WinkerDu](https://github.com/WinkerDu)) +- \#2109 By default, use only 1000 rows to infer the schema [\#2139](https://github.com/apache/datafusion/pull/2139) ([jychen7](https://github.com/jychen7)) +- \[CLI\] Add show tables in ballista for datafusion-cli [\#2137](https://github.com/apache/datafusion/pull/2137) ([gaojun2048](https://github.com/gaojun2048)) +- fix: incorrect memory usage track for sort [\#2135](https://github.com/apache/datafusion/pull/2135) ([yjshen](https://github.com/yjshen)) +- Update quarterly roadmap for Q2 [\#2133](https://github.com/apache/datafusion/pull/2133) ([matthewmturner](https://github.com/matthewmturner)) +- Reduce SortExec memory usage by void constructing single huge batch [\#2132](https://github.com/apache/datafusion/pull/2132) ([yjshen](https://github.com/yjshen)) +- MINOR: fix concat_ws corner bug [\#2128](https://github.com/apache/datafusion/pull/2128) ([WinkerDu](https://github.com/WinkerDu)) +- Minor add clarifying comment in parquet [\#2127](https://github.com/apache/datafusion/pull/2127) ([alamb](https://github.com/alamb)) +- Minor: make disk_manager public [\#2126](https://github.com/apache/datafusion/pull/2126) ([yjshen](https://github.com/yjshen)) +- JIT-compille DataFusion expression with column name [\#2124](https://github.com/apache/datafusion/pull/2124) ([Dandandan](https://github.com/Dandandan)) +- minor: replace array_equals in case evaluation with eq_dyn from arrow-rs [\#2121](https://github.com/apache/datafusion/pull/2121) ([alamb](https://github.com/alamb)) +- Serialize timezone in timestamp scalar values [\#2120](https://github.com/apache/datafusion/pull/2120) ([thinkharderdev](https://github.com/thinkharderdev)) +- minor: fix some clippy warnings from nightly rust [\#2119](https://github.com/apache/datafusion/pull/2119) ([alamb](https://github.com/alamb)) +- Fix case evaluation with NULLs [\#2118](https://github.com/apache/datafusion/pull/2118) ([alamb](https://github.com/alamb)) +- issue\#1967 ignore channel close [\#2113](https://github.com/apache/datafusion/pull/2113) ([silence-coding](https://github.com/silence-coding)) +- cli: add cargo.lock [\#2112](https://github.com/apache/datafusion/pull/2112) ([happysalada](https://github.com/happysalada)) +- doc: update release schedule [\#2110](https://github.com/apache/datafusion/pull/2110) ([jychen7](https://github.com/jychen7)) +- fix df union all bug [\#2108](https://github.com/apache/datafusion/pull/2108) [[sql](https://github.com/apache/datafusion/labels/sql)] ([WinkerDu](https://github.com/WinkerDu)) +- Reduce repetition in Decimal binary kernels, upgrade to arrow 11.1 [\#2107](https://github.com/apache/datafusion/pull/2107) ([alamb](https://github.com/alamb)) +- update zlib version to 1.2.12 [\#2106](https://github.com/apache/datafusion/pull/2106) ([waitingkuo](https://github.com/waitingkuo)) +- Create jit-expression from datafusion expression [\#2103](https://github.com/apache/datafusion/pull/2103) ([Dandandan](https://github.com/Dandandan)) +- Add CREATE DATABASE command to SQL [\#2094](https://github.com/apache/datafusion/pull/2094) [[sql](https://github.com/apache/datafusion/labels/sql)] ([matthewmturner](https://github.com/matthewmturner)) +- Refactor SessionContext, BallistaContext to support multi-tenancy configurations - Part 3 [\#2091](https://github.com/apache/datafusion/pull/2091) ([mingmwang](https://github.com/mingmwang)) +- minor: remove duplicate test [\#2089](https://github.com/apache/datafusion/pull/2089) ([jackwener](https://github.com/jackwener)) +- minor: remove repeated test [\#2085](https://github.com/apache/datafusion/pull/2085) ([jackwener](https://github.com/jackwener)) +- Fix lost filters and projections in ParquetExec, CSVExec etc [\#2077](https://github.com/apache/datafusion/pull/2077) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- Remove dependency of common for the storage crate [\#2076](https://github.com/apache/datafusion/pull/2076) ([yahoNanJing](https://github.com/yahoNanJing)) +- [MINOR] fix doc in `EXTRACT\(field FROM source\) [\#2074](https://github.com/apache/datafusion/pull/2074) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- \[Bug\]\[Datafusion\] fix TaskContext session_config bug [\#2070](https://github.com/apache/datafusion/pull/2070) ([gaojun2048](https://github.com/gaojun2048)) +- Short-circuit evaluation for `CaseWhen` [\#2068](https://github.com/apache/datafusion/pull/2068) ([yjshen](https://github.com/yjshen)) +- split datafusion-object-store module [\#2065](https://github.com/apache/datafusion/pull/2065) ([yahoNanJing](https://github.com/yahoNanJing)) +- Allow `CatalogProvider::register_catalog` to return an error [\#2052](https://github.com/apache/datafusion/pull/2052) ([alamb](https://github.com/alamb)) +- Add test in register_catalog and change to use named symbolic constants [\#2050](https://github.com/apache/datafusion/pull/2050) ([alamb](https://github.com/alamb)) +- Update to arrow/parquet 11.0 [\#2048](https://github.com/apache/datafusion/pull/2048) ([alamb](https://github.com/alamb)) +- minor: format comments \(`//` to `// `\) [\#2047](https://github.com/apache/datafusion/pull/2047) ([jackwener](https://github.com/jackwener)) +- use cargo-tomlfmt to check Cargo.toml formatting in CI [\#2033](https://github.com/apache/datafusion/pull/2033) ([WinkerDu](https://github.com/WinkerDu)) +- feat: \#2004 approx percentile with weight [\#2031](https://github.com/apache/datafusion/pull/2031) ([jychen7](https://github.com/jychen7)) +- Refactor SessionContext, SessionState and SessionConfig to support multi-tenancy configurations - Part 2 [\#2029](https://github.com/apache/datafusion/pull/2029) ([mingmwang](https://github.com/mingmwang)) +- Simplify prerequisites for running examples [\#2028](https://github.com/apache/datafusion/pull/2028) ([doki23](https://github.com/doki23)) +- Replace usage of `println!` with logger macros [\#2020](https://github.com/apache/datafusion/pull/2020) ([silence-coding](https://github.com/silence-coding)) +- Automatically test examples in user guide [\#2018](https://github.com/apache/datafusion/pull/2018) ([vchag](https://github.com/vchag)) +- return VecDeque for DFParser::parse_sql [\#2017](https://github.com/apache/datafusion/pull/2017) [[sql](https://github.com/apache/datafusion/labels/sql)] ([doki23](https://github.com/doki23)) +- Eliminate the scalar value filter [\#2002](https://github.com/apache/datafusion/pull/2002) ([jackwener](https://github.com/jackwener)) +- Fixing a typo in documentation [\#1997](https://github.com/apache/datafusion/pull/1997) ([psvri](https://github.com/psvri)) +- Correct documentation of ExprVisitor [\#1996](https://github.com/apache/datafusion/pull/1996) ([alamb](https://github.com/alamb)) +- Make it possible to only scan part of a parquet file in a partition [\#1990](https://github.com/apache/datafusion/pull/1990) ([yjshen](https://github.com/yjshen)) +- Update Dockerfile to fix integration tests [\#1982](https://github.com/apache/datafusion/pull/1982) ([andygrove](https://github.com/andygrove)) +- Remove some more unecessary cloning in sql_expr_to_logical_expr [\#1981](https://github.com/apache/datafusion/pull/1981) [[sql](https://github.com/apache/datafusion/labels/sql)] ([alamb](https://github.com/alamb)) +- Add ticket reference to clippy allow [\#1978](https://github.com/apache/datafusion/pull/1978) [[sql](https://github.com/apache/datafusion/labels/sql)] ([alamb](https://github.com/alamb)) +- Implement EXTRACT expression with week, month, day, hour [\#1974](https://github.com/apache/datafusion/pull/1974) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- Address typo in ExprVisitable trait documentation [\#1970](https://github.com/apache/datafusion/pull/1970) ([jdye64](https://github.com/jdye64)) +- Update sqlparser requirement from 0.14 to 0.15 [\#1966](https://github.com/apache/datafusion/pull/1966) ([dependabot[bot]](https://github.com/apps/dependabot)) +- PruningPredicate should take owned Expr [\#1960](https://github.com/apache/datafusion/pull/1960) ([thinkharderdev](https://github.com/thinkharderdev)) +- Update to arrow 10.0.0, pyo3 0.16 [\#1957](https://github.com/apache/datafusion/pull/1957) ([alamb](https://github.com/alamb)) +- update jit-related dependencies [\#1953](https://github.com/apache/datafusion/pull/1953) ([xudong963](https://github.com/xudong963)) +- minor code refinement: `if_exists` name change, wildcard field for logical plan, etc. [\#1951](https://github.com/apache/datafusion/pull/1951) [[sql](https://github.com/apache/datafusion/labels/sql)] ([xudong963](https://github.com/xudong963)) +- Allow different types of query variables \(`@@var`\) rather than just string [\#1943](https://github.com/apache/datafusion/pull/1943) [[sql](https://github.com/apache/datafusion/labels/sql)] ([maxburke](https://github.com/maxburke)) +- Pruning serialization [\#1941](https://github.com/apache/datafusion/pull/1941) ([thinkharderdev](https://github.com/thinkharderdev)) +- Add write_parquet to `DataFrame` [\#1940](https://github.com/apache/datafusion/pull/1940) ([matthewmturner](https://github.com/matthewmturner)) +- Fix select from EmptyExec always return 0 row after optimizer passes [\#1938](https://github.com/apache/datafusion/pull/1938) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- Add debug log when waiting for spilling on other consumers [\#1933](https://github.com/apache/datafusion/pull/1933) ([viirya](https://github.com/viirya)) +- Add db benchmark script [\#1928](https://github.com/apache/datafusion/pull/1928) ([matthewmturner](https://github.com/matthewmturner)) +- Add write_csv to DataFrame [\#1922](https://github.com/apache/datafusion/pull/1922) ([matthewmturner](https://github.com/matthewmturner)) +- \[MINOR\] Update copyright year in Docs [\#1918](https://github.com/apache/datafusion/pull/1918) ([alamb](https://github.com/alamb)) +- add metadata to DFSchema, close \#1806. [\#1914](https://github.com/apache/datafusion/pull/1914) [[sql](https://github.com/apache/datafusion/labels/sql)] ([jiacai2050](https://github.com/jiacai2050)) +- Clippy fix on nightly [\#1907](https://github.com/apache/datafusion/pull/1907) ([yjshen](https://github.com/yjshen)) +- Updated Rust version to 1.59 in all the files [\#1903](https://github.com/apache/datafusion/pull/1903) ([NaincyKumariKnoldus](https://github.com/NaincyKumariKnoldus)) +- support extract second and minute in expr. [\#1901](https://github.com/apache/datafusion/pull/1901) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- Update crate descriptions [\#1899](https://github.com/apache/datafusion/pull/1899) ([alamb](https://github.com/alamb)) +- Remove uneeded Mutex in Ballista Client [\#1898](https://github.com/apache/datafusion/pull/1898) ([alamb](https://github.com/alamb)) +- \[split/17\] move the rest of physical expr to datafusion-physical-expr crate [\#1892](https://github.com/apache/datafusion/pull/1892) ([Jimexist](https://github.com/Jimexist)) +- Avoid unnecessary branching in row read/write if schema is null-free [\#1891](https://github.com/apache/datafusion/pull/1891) ([yjshen](https://github.com/yjshen)) +- Make parquet support optional for datafusion-common crate [\#1886](https://github.com/apache/datafusion/pull/1886) ([jonmmease](https://github.com/jonmmease)) +- Fix clippy lints [\#1885](https://github.com/apache/datafusion/pull/1885) ([HaoYang670](https://github.com/HaoYang670)) +- Add support for `~/.datafusionrc` and cli option for overriding it to datafusion-cli [\#1875](https://github.com/apache/datafusion/pull/1875) ([matthewmturner](https://github.com/matthewmturner)) +- \[Minor\] Clean up DecimalArray API Usage [\#1869](https://github.com/apache/datafusion/pull/1869) [[sql](https://github.com/apache/datafusion/labels/sql)] ([alamb](https://github.com/alamb)) +- Changes after went through "Datafusion as a library section" [\#1868](https://github.com/apache/datafusion/pull/1868) ([nonontb](https://github.com/nonontb)) +- Enhance MemorySchemaProvider to support `register_listing_table` [\#1863](https://github.com/apache/datafusion/pull/1863) ([matthewmturner](https://github.com/matthewmturner)) +- Increase default partition column type from Dict\(UInt8\) to Dict\(UInt16\) [\#1860](https://github.com/apache/datafusion/pull/1860) ([Igosuki](https://github.com/Igosuki)) +- Update to arrow 9.1.0 [\#1851](https://github.com/apache/datafusion/pull/1851) ([alamb](https://github.com/alamb)) +- move some tests out of context and into sql [\#1846](https://github.com/apache/datafusion/pull/1846) ([alamb](https://github.com/alamb)) +- \[split/14\] create `datafusion-physical-expr` module [\#1843](https://github.com/apache/datafusion/pull/1843) ([Jimexist](https://github.com/Jimexist)) +- Return `Error` when parquet reader fails rather than no data with `println!` [\#1837](https://github.com/apache/datafusion/pull/1837) ([alamb](https://github.com/alamb)) +- determine build side in hash join by `total_byte_size` instead of `num_rows` [\#1831](https://github.com/apache/datafusion/pull/1831) ([xudong963](https://github.com/xudong963)) +- Make ballista support an optional feature to datafusion-cli [\#1816](https://github.com/apache/datafusion/pull/1816) ([alamb](https://github.com/alamb)) +- Update documentation example for change in API [\#1812](https://github.com/apache/datafusion/pull/1812) ([alamb](https://github.com/alamb)) +- rename references of expr in physical plan module after datafusion-expr split [\#1798](https://github.com/apache/datafusion/pull/1798) ([Jimexist](https://github.com/Jimexist)) +- DataFusion + Conbench Integration [\#1791](https://github.com/apache/datafusion/pull/1791) ([dianaclarke](https://github.com/dianaclarke)) +- The returned path value of get_by_uri should be self-described with entire path [\#1779](https://github.com/apache/datafusion/pull/1779) ([yahoNanJing](https://github.com/yahoNanJing)) +- Use`eq_dyn`, `neq_dyn`, `lt_dyn`, `lt_eq_dyn`, `gt_dyn`, `gt_eq_dyn` kernels from arrow [\#1475](https://github.com/apache/datafusion/pull/1475) ([alamb](https://github.com/alamb)) diff --git a/dev/changelog/9.0.0.md b/dev/changelog/9.0.0.md index f47127aa5f4c..a74f56da2f19 100644 --- a/dev/changelog/9.0.0.md +++ b/dev/changelog/9.0.0.md @@ -17,162 +17,162 @@ under the License. --> -## [9.0.0](https://github.com/apache/arrow-datafusion/tree/9.0.0) (2022-06-10) +## [9.0.0](https://github.com/apache/datafusion/tree/9.0.0) (2022-06-10) -[Full Changelog](https://github.com/apache/arrow-datafusion/compare/8.0.0-rc2...9.0.0) +[Full Changelog](https://github.com/apache/datafusion/compare/8.0.0-rc2...9.0.0) **Breaking changes:** -- MINOR: Move `simplify_expression` rule to `datafusion-optimizer` crate [\#2686](https://github.com/apache/arrow-datafusion/pull/2686) ([andygrove](https://github.com/andygrove)) -- Move physical expression planning to `datafusion-physical-expr` crate [\#2682](https://github.com/apache/arrow-datafusion/pull/2682) ([andygrove](https://github.com/andygrove)) -- Create new `datafusion-optimizer` crate for logical optimizer rules [\#2675](https://github.com/apache/arrow-datafusion/pull/2675) ([andygrove](https://github.com/andygrove)) -- Remove `ExecutionProps` dependency from `OptimizerRule` [\#2666](https://github.com/apache/arrow-datafusion/pull/2666) ([andygrove](https://github.com/andygrove)) -- Remove ObjectStoreSchemaProvider \(\#2656\) [\#2665](https://github.com/apache/arrow-datafusion/pull/2665) ([tustvold](https://github.com/tustvold)) -- Move `LogicalPlanBuilder` to `datafusion-expr` crate [\#2576](https://github.com/apache/arrow-datafusion/pull/2576) ([andygrove](https://github.com/andygrove)) -- `LogicalPlanBuilder` now uses `TableSource` instead of `TableProvider` [\#2569](https://github.com/apache/arrow-datafusion/pull/2569) ([andygrove](https://github.com/andygrove)) -- Remove `scan_empty` method from `LogicalPlanBuilder` [\#2568](https://github.com/apache/arrow-datafusion/pull/2568) ([andygrove](https://github.com/andygrove)) -- MINOR: Move expression utils from sql module to expr crate [\#2553](https://github.com/apache/arrow-datafusion/pull/2553) ([andygrove](https://github.com/andygrove)) -- Remove `scan_json` methods from `LogicalPlanBuilder` [\#2541](https://github.com/apache/arrow-datafusion/pull/2541) ([andygrove](https://github.com/andygrove)) -- Remove `scan_avro` methods from `LogicalPlanBuilder` [\#2540](https://github.com/apache/arrow-datafusion/pull/2540) ([andygrove](https://github.com/andygrove)) -- Remove `scan_parquet` methods from `LogicalPlanBuilder` [\#2539](https://github.com/apache/arrow-datafusion/pull/2539) ([andygrove](https://github.com/andygrove)) -- MINOR: Move `ExprVisitable` and `exprlist_to_columns` to datafusion-expr crate [\#2538](https://github.com/apache/arrow-datafusion/pull/2538) ([andygrove](https://github.com/andygrove)) -- Remove `scan_csv` methods from `LogicalPlanBuilder` [\#2537](https://github.com/apache/arrow-datafusion/pull/2537) ([andygrove](https://github.com/andygrove)) -- Fix Redundant ScalarValue Boxed Collection [\#2523](https://github.com/apache/arrow-datafusion/pull/2523) ([comphead](https://github.com/comphead)) -- Support for OFFSET in LogicalPlan [\#2521](https://github.com/apache/arrow-datafusion/pull/2521) ([jdye64](https://github.com/jdye64)) +- MINOR: Move `simplify_expression` rule to `datafusion-optimizer` crate [\#2686](https://github.com/apache/datafusion/pull/2686) ([andygrove](https://github.com/andygrove)) +- Move physical expression planning to `datafusion-physical-expr` crate [\#2682](https://github.com/apache/datafusion/pull/2682) ([andygrove](https://github.com/andygrove)) +- Create new `datafusion-optimizer` crate for logical optimizer rules [\#2675](https://github.com/apache/datafusion/pull/2675) ([andygrove](https://github.com/andygrove)) +- Remove `ExecutionProps` dependency from `OptimizerRule` [\#2666](https://github.com/apache/datafusion/pull/2666) ([andygrove](https://github.com/andygrove)) +- Remove ObjectStoreSchemaProvider \(\#2656\) [\#2665](https://github.com/apache/datafusion/pull/2665) ([tustvold](https://github.com/tustvold)) +- Move `LogicalPlanBuilder` to `datafusion-expr` crate [\#2576](https://github.com/apache/datafusion/pull/2576) ([andygrove](https://github.com/andygrove)) +- `LogicalPlanBuilder` now uses `TableSource` instead of `TableProvider` [\#2569](https://github.com/apache/datafusion/pull/2569) ([andygrove](https://github.com/andygrove)) +- Remove `scan_empty` method from `LogicalPlanBuilder` [\#2568](https://github.com/apache/datafusion/pull/2568) ([andygrove](https://github.com/andygrove)) +- MINOR: Move expression utils from sql module to expr crate [\#2553](https://github.com/apache/datafusion/pull/2553) ([andygrove](https://github.com/andygrove)) +- Remove `scan_json` methods from `LogicalPlanBuilder` [\#2541](https://github.com/apache/datafusion/pull/2541) ([andygrove](https://github.com/andygrove)) +- Remove `scan_avro` methods from `LogicalPlanBuilder` [\#2540](https://github.com/apache/datafusion/pull/2540) ([andygrove](https://github.com/andygrove)) +- Remove `scan_parquet` methods from `LogicalPlanBuilder` [\#2539](https://github.com/apache/datafusion/pull/2539) ([andygrove](https://github.com/andygrove)) +- MINOR: Move `ExprVisitable` and `exprlist_to_columns` to datafusion-expr crate [\#2538](https://github.com/apache/datafusion/pull/2538) ([andygrove](https://github.com/andygrove)) +- Remove `scan_csv` methods from `LogicalPlanBuilder` [\#2537](https://github.com/apache/datafusion/pull/2537) ([andygrove](https://github.com/andygrove)) +- Fix Redundant ScalarValue Boxed Collection [\#2523](https://github.com/apache/datafusion/pull/2523) ([comphead](https://github.com/comphead)) +- Support for OFFSET in LogicalPlan [\#2521](https://github.com/apache/datafusion/pull/2521) ([jdye64](https://github.com/jdye64)) **Implemented enhancements:** -- \[EPIC\] JIT support for `DataFusion` [\#2703](https://github.com/apache/arrow-datafusion/issues/2703) -- Show column names instead of column indices in query plans [\#2689](https://github.com/apache/arrow-datafusion/issues/2689) -- Proposal: remove automated ballista CI checks from DataFusion [\#2679](https://github.com/apache/arrow-datafusion/issues/2679) -- Pass SessionState to TableProvider [\#2658](https://github.com/apache/arrow-datafusion/issues/2658) -- Is ObjectStoreSchemaProvider Still Needed? [\#2656](https://github.com/apache/arrow-datafusion/issues/2656) -- Add logical plan support to `datafusion-proto` [\#2630](https://github.com/apache/arrow-datafusion/issues/2630) -- Like, NotLike expressions work with literal `NULL` [\#2626](https://github.com/apache/arrow-datafusion/issues/2626) -- Move `JOIN ON` predicates push down logic from planner to optimizer [\#2619](https://github.com/apache/arrow-datafusion/issues/2619) -- Remove `ExecutionProps` from `OptimizerRule` trait [\#2614](https://github.com/apache/arrow-datafusion/issues/2614) -- Add, Minus, Multiply, divide, Modulo operator work with literal `NULL` [\#2609](https://github.com/apache/arrow-datafusion/issues/2609) -- Support `DESCRIBE
` to show table schemas [\#2606](https://github.com/apache/arrow-datafusion/issues/2606) -- Support `CREATE OR REPLACE TABLE` [\#2605](https://github.com/apache/arrow-datafusion/issues/2605) -- filter_push_down tests should not rely on TableProvider and ExecutionPlan [\#2600](https://github.com/apache/arrow-datafusion/issues/2600) -- Move logical optimizer rules out of the core datafusion crate [\#2599](https://github.com/apache/arrow-datafusion/issues/2599) -- Push Limit through outer Join [\#2579](https://github.com/apache/arrow-datafusion/issues/2579) -- `datafusion_proto` crate should have exhaustive match statements for handling `Expr` [\#2565](https://github.com/apache/arrow-datafusion/issues/2565) -- String representation of Expr variant [\#2563](https://github.com/apache/arrow-datafusion/issues/2563) -- File URI Scheme Interpretation [\#2562](https://github.com/apache/arrow-datafusion/issues/2562) -- Implement physical plan for OFFSET [\#2551](https://github.com/apache/arrow-datafusion/issues/2551) -- Update limit pushdown rule to support offsets [\#2550](https://github.com/apache/arrow-datafusion/issues/2550) -- Move `LogicalPlanBuilder` to `datafusion-expr` crate [\#2536](https://github.com/apache/arrow-datafusion/issues/2536) -- Logical optimizer rule "simplify expressions" should not depend on the core datafusion crate [\#2535](https://github.com/apache/arrow-datafusion/issues/2535) -- Support optional filter in Join [\#2509](https://github.com/apache/arrow-datafusion/issues/2509) -- Improve SQL planner & logical plan support for JOIN conditions [\#2496](https://github.com/apache/arrow-datafusion/issues/2496) -- Numeric, String, Boolean comparisons with literal `NULL` [\#2482](https://github.com/apache/arrow-datafusion/issues/2482) -- Redundant ScalarValue Boxed Collection [\#2449](https://github.com/apache/arrow-datafusion/issues/2449) -- ObjectStore Directory Semantics [\#2445](https://github.com/apache/arrow-datafusion/issues/2445) -- Add support for `OFFSET` in SQL query planner + logical plan [\#2377](https://github.com/apache/arrow-datafusion/issues/2377) -- SQL planner should use `TableSource` not `TableProvider` [\#2346](https://github.com/apache/arrow-datafusion/issues/2346) -- Move SQL query planning to new crate [\#2345](https://github.com/apache/arrow-datafusion/issues/2345) -- Update LogicalPlan rustdoc code to not use LogicalPlanBuilder [\#2308](https://github.com/apache/arrow-datafusion/issues/2308) -- \[Optimizer\] Refactor `convert join` [\#2256](https://github.com/apache/arrow-datafusion/issues/2256) -- \[Optimizer\] Infer is not null predicate from `where clause` [\#2254](https://github.com/apache/arrow-datafusion/issues/2254) -- Support ArrayIndex for ScalarValue\(List\) [\#2207](https://github.com/apache/arrow-datafusion/issues/2207) -- \[Ballista\] Fill functional gaps between datafusion and ballista [\#2062](https://github.com/apache/arrow-datafusion/issues/2062) -- \[Ballista\] support datafusion built_in UDAF work in ballista cluster [\#1985](https://github.com/apache/arrow-datafusion/issues/1985) -- Export C API [\#1113](https://github.com/apache/arrow-datafusion/issues/1113) +- \[EPIC\] JIT support for `DataFusion` [\#2703](https://github.com/apache/datafusion/issues/2703) +- Show column names instead of column indices in query plans [\#2689](https://github.com/apache/datafusion/issues/2689) +- Proposal: remove automated ballista CI checks from DataFusion [\#2679](https://github.com/apache/datafusion/issues/2679) +- Pass SessionState to TableProvider [\#2658](https://github.com/apache/datafusion/issues/2658) +- Is ObjectStoreSchemaProvider Still Needed? [\#2656](https://github.com/apache/datafusion/issues/2656) +- Add logical plan support to `datafusion-proto` [\#2630](https://github.com/apache/datafusion/issues/2630) +- Like, NotLike expressions work with literal `NULL` [\#2626](https://github.com/apache/datafusion/issues/2626) +- Move `JOIN ON` predicates push down logic from planner to optimizer [\#2619](https://github.com/apache/datafusion/issues/2619) +- Remove `ExecutionProps` from `OptimizerRule` trait [\#2614](https://github.com/apache/datafusion/issues/2614) +- Add, Minus, Multiply, divide, Modulo operator work with literal `NULL` [\#2609](https://github.com/apache/datafusion/issues/2609) +- Support `DESCRIBE
` to show table schemas [\#2606](https://github.com/apache/datafusion/issues/2606) +- Support `CREATE OR REPLACE TABLE` [\#2605](https://github.com/apache/datafusion/issues/2605) +- filter_push_down tests should not rely on TableProvider and ExecutionPlan [\#2600](https://github.com/apache/datafusion/issues/2600) +- Move logical optimizer rules out of the core datafusion crate [\#2599](https://github.com/apache/datafusion/issues/2599) +- Push Limit through outer Join [\#2579](https://github.com/apache/datafusion/issues/2579) +- `datafusion_proto` crate should have exhaustive match statements for handling `Expr` [\#2565](https://github.com/apache/datafusion/issues/2565) +- String representation of Expr variant [\#2563](https://github.com/apache/datafusion/issues/2563) +- File URI Scheme Interpretation [\#2562](https://github.com/apache/datafusion/issues/2562) +- Implement physical plan for OFFSET [\#2551](https://github.com/apache/datafusion/issues/2551) +- Update limit pushdown rule to support offsets [\#2550](https://github.com/apache/datafusion/issues/2550) +- Move `LogicalPlanBuilder` to `datafusion-expr` crate [\#2536](https://github.com/apache/datafusion/issues/2536) +- Logical optimizer rule "simplify expressions" should not depend on the core datafusion crate [\#2535](https://github.com/apache/datafusion/issues/2535) +- Support optional filter in Join [\#2509](https://github.com/apache/datafusion/issues/2509) +- Improve SQL planner & logical plan support for JOIN conditions [\#2496](https://github.com/apache/datafusion/issues/2496) +- Numeric, String, Boolean comparisons with literal `NULL` [\#2482](https://github.com/apache/datafusion/issues/2482) +- Redundant ScalarValue Boxed Collection [\#2449](https://github.com/apache/datafusion/issues/2449) +- ObjectStore Directory Semantics [\#2445](https://github.com/apache/datafusion/issues/2445) +- Add support for `OFFSET` in SQL query planner + logical plan [\#2377](https://github.com/apache/datafusion/issues/2377) +- SQL planner should use `TableSource` not `TableProvider` [\#2346](https://github.com/apache/datafusion/issues/2346) +- Move SQL query planning to new crate [\#2345](https://github.com/apache/datafusion/issues/2345) +- Update LogicalPlan rustdoc code to not use LogicalPlanBuilder [\#2308](https://github.com/apache/datafusion/issues/2308) +- \[Optimizer\] Refactor `convert join` [\#2256](https://github.com/apache/datafusion/issues/2256) +- \[Optimizer\] Infer is not null predicate from `where clause` [\#2254](https://github.com/apache/datafusion/issues/2254) +- Support ArrayIndex for ScalarValue\(List\) [\#2207](https://github.com/apache/datafusion/issues/2207) +- \[Ballista\] Fill functional gaps between datafusion and ballista [\#2062](https://github.com/apache/datafusion/issues/2062) +- \[Ballista\] support datafusion built_in UDAF work in ballista cluster [\#1985](https://github.com/apache/datafusion/issues/1985) +- Export C API [\#1113](https://github.com/apache/datafusion/issues/1113) **Fixed bugs:** -- Fix Typos in Docs [\#2695](https://github.com/apache/arrow-datafusion/issues/2695) -- Unable to build a docker image [\#2691](https://github.com/apache/arrow-datafusion/issues/2691) -- Optimization pass `AggregateStatistics` changes type of output from `Int64` to `UInt64` [\#2673](https://github.com/apache/arrow-datafusion/issues/2673) -- ViewTable Circular Reference [\#2657](https://github.com/apache/arrow-datafusion/issues/2657) -- `ScalarValue::to_array_of_size` panics computing statistics for nested parquet file [\#2653](https://github.com/apache/arrow-datafusion/issues/2653) -- The result type of count/count_distinct [\#2635](https://github.com/apache/arrow-datafusion/issues/2635) -- limit_push_down is not working properly with OFFSET [\#2624](https://github.com/apache/arrow-datafusion/issues/2624) -- Avro Tests Fail To Compile [\#2570](https://github.com/apache/arrow-datafusion/issues/2570) -- Unused Window functions experssion is wrongly removed from LogicalPlan during optimalization [\#2542](https://github.com/apache/arrow-datafusion/issues/2542) -- Bug: ObjectStoreRegistry get_by_uri does not return correct path when "scheme" is provided [\#2525](https://github.com/apache/arrow-datafusion/issues/2525) -- There are duplicate and inconsistent copies of `datafusion.proto` [\#2514](https://github.com/apache/arrow-datafusion/issues/2514) -- Projection pushdown produces incorrect results when column names are reused [\#2462](https://github.com/apache/arrow-datafusion/issues/2462) -- Incorrect Parquet Projection For Nested Types [\#2453](https://github.com/apache/arrow-datafusion/issues/2453) -- LogicalPlanBuilder::scan_csv creates scans with invalid table names [\#2278](https://github.com/apache/arrow-datafusion/issues/2278) -- Inner join incorrectly pushdown predicate with OR operation [\#2271](https://github.com/apache/arrow-datafusion/issues/2271) -- Ignored alias for columns with aggregate function and incorrect results when collecting statistics is enabled [\#2176](https://github.com/apache/arrow-datafusion/issues/2176) -- Join on path partitioned columns fails with error [\#2145](https://github.com/apache/arrow-datafusion/issues/2145) +- Fix Typos in Docs [\#2695](https://github.com/apache/datafusion/issues/2695) +- Unable to build a docker image [\#2691](https://github.com/apache/datafusion/issues/2691) +- Optimization pass `AggregateStatistics` changes type of output from `Int64` to `UInt64` [\#2673](https://github.com/apache/datafusion/issues/2673) +- ViewTable Circular Reference [\#2657](https://github.com/apache/datafusion/issues/2657) +- `ScalarValue::to_array_of_size` panics computing statistics for nested parquet file [\#2653](https://github.com/apache/datafusion/issues/2653) +- The result type of count/count_distinct [\#2635](https://github.com/apache/datafusion/issues/2635) +- limit_push_down is not working properly with OFFSET [\#2624](https://github.com/apache/datafusion/issues/2624) +- Avro Tests Fail To Compile [\#2570](https://github.com/apache/datafusion/issues/2570) +- Unused Window functions experssion is wrongly removed from LogicalPlan during optimalization [\#2542](https://github.com/apache/datafusion/issues/2542) +- Bug: ObjectStoreRegistry get_by_uri does not return correct path when "scheme" is provided [\#2525](https://github.com/apache/datafusion/issues/2525) +- There are duplicate and inconsistent copies of `datafusion.proto` [\#2514](https://github.com/apache/datafusion/issues/2514) +- Projection pushdown produces incorrect results when column names are reused [\#2462](https://github.com/apache/datafusion/issues/2462) +- Incorrect Parquet Projection For Nested Types [\#2453](https://github.com/apache/datafusion/issues/2453) +- LogicalPlanBuilder::scan_csv creates scans with invalid table names [\#2278](https://github.com/apache/datafusion/issues/2278) +- Inner join incorrectly pushdown predicate with OR operation [\#2271](https://github.com/apache/datafusion/issues/2271) +- Ignored alias for columns with aggregate function and incorrect results when collecting statistics is enabled [\#2176](https://github.com/apache/datafusion/issues/2176) +- Join on path partitioned columns fails with error [\#2145](https://github.com/apache/datafusion/issues/2145) **Documentation updates:** -- Fix Ballista link [\#2654](https://github.com/apache/arrow-datafusion/pull/2654) ([dsaxton](https://github.com/dsaxton)) -- MINOR: Add Blaze as a project using DataFusion [\#2618](https://github.com/apache/arrow-datafusion/pull/2618) ([yjshen](https://github.com/yjshen)) -- \[MINOR\] remove datafusion-cli's ballista feature from docs [\#2612](https://github.com/apache/arrow-datafusion/pull/2612) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- chore\(doc\) remove ballista from datafusion-cli readme [\#2604](https://github.com/apache/arrow-datafusion/pull/2604) ([ming535](https://github.com/ming535)) +- Fix Ballista link [\#2654](https://github.com/apache/datafusion/pull/2654) ([dsaxton](https://github.com/dsaxton)) +- MINOR: Add Blaze as a project using DataFusion [\#2618](https://github.com/apache/datafusion/pull/2618) ([yjshen](https://github.com/yjshen)) +- \[MINOR\] remove datafusion-cli's ballista feature from docs [\#2612](https://github.com/apache/datafusion/pull/2612) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- chore\(doc\) remove ballista from datafusion-cli readme [\#2604](https://github.com/apache/datafusion/pull/2604) ([ming535](https://github.com/ming535)) **Closed issues:** -- \[Question\] Converting TableSource to custom TableProvider [\#2644](https://github.com/apache/arrow-datafusion/issues/2644) -- \[Question\] Why DataFusion is shipped with arrow version 9.1.0 on crates.io ? [\#2474](https://github.com/apache/arrow-datafusion/issues/2474) +- \[Question\] Converting TableSource to custom TableProvider [\#2644](https://github.com/apache/datafusion/issues/2644) +- \[Question\] Why DataFusion is shipped with arrow version 9.1.0 on crates.io ? [\#2474](https://github.com/apache/datafusion/issues/2474) **Merged pull requests:** -- Test optional features in CI [\#2708](https://github.com/apache/arrow-datafusion/pull/2708) ([tustvold](https://github.com/tustvold)) -- support indexed fields proto [\#2707](https://github.com/apache/arrow-datafusion/pull/2707) ([nl5887](https://github.com/nl5887)) -- Update sqlparser-rs to 0.18.0 [\#2705](https://github.com/apache/arrow-datafusion/pull/2705) ([alamb](https://github.com/alamb)) -- \[MINOR\]: Add documentation to `datafusion-row` modules [\#2704](https://github.com/apache/arrow-datafusion/pull/2704) ([alamb](https://github.com/alamb)) -- Make sure that the data types are supported in hashjoin before genera… [\#2702](https://github.com/apache/arrow-datafusion/pull/2702) ([AssHero](https://github.com/AssHero)) -- Move remaining code out of legacy `core/logical_plan` module [\#2701](https://github.com/apache/arrow-datafusion/pull/2701) ([andygrove](https://github.com/andygrove)) -- Move some tests from core to expr [\#2700](https://github.com/apache/arrow-datafusion/pull/2700) ([andygrove](https://github.com/andygrove)) -- MINOR: Improve Docs Readability [\#2696](https://github.com/apache/arrow-datafusion/pull/2696) ([ryanrussell](https://github.com/ryanrussell)) -- Combine limit and offset to `fetch` and `skip` and implement physical plan support [\#2694](https://github.com/apache/arrow-datafusion/pull/2694) ([ming535](https://github.com/ming535)) -- MINOR: Add datafusion-sql example [\#2693](https://github.com/apache/arrow-datafusion/pull/2693) ([andygrove](https://github.com/andygrove)) -- Remove Ballista related lines from Dockerfile [\#2692](https://github.com/apache/arrow-datafusion/pull/2692) ([mocknen](https://github.com/mocknen)) -- Show column names instead of indices in query plans [\#2690](https://github.com/apache/arrow-datafusion/pull/2690) ([andygrove](https://github.com/andygrove)) -- MINOR: Remove uses of TryClone for Parquet [\#2681](https://github.com/apache/arrow-datafusion/pull/2681) ([tustvold](https://github.com/tustvold)) -- Fix `AggregateStatistics` optimization so it doesn't change output type [\#2674](https://github.com/apache/arrow-datafusion/pull/2674) ([alamb](https://github.com/alamb)) -- If statistics of column Max/Min value does not exists in parquet file, sent Min/Max to None [\#2671](https://github.com/apache/arrow-datafusion/pull/2671) ([AssHero](https://github.com/AssHero)) -- MINOR: Move more expression code to `datafusion-expr` crate [\#2669](https://github.com/apache/arrow-datafusion/pull/2669) ([andygrove](https://github.com/andygrove)) -- MINOR: Rewrite imports in optimizer moduler [\#2667](https://github.com/apache/arrow-datafusion/pull/2667) ([andygrove](https://github.com/andygrove)) -- Update snmalloc-rs requirement from 0.2 to 0.3 [\#2663](https://github.com/apache/arrow-datafusion/pull/2663) ([dependabot[bot]](https://github.com/apps/dependabot)) -- Add module doc for RuntimeEnv, SessionContext, TaskContext, etc... [\#2655](https://github.com/apache/arrow-datafusion/pull/2655) ([tustvold](https://github.com/tustvold)) -- Prune unused dependencies from datafusion-proto [\#2651](https://github.com/apache/arrow-datafusion/pull/2651) ([tustvold](https://github.com/tustvold)) -- MINOR: Implement serde for join filter [\#2649](https://github.com/apache/arrow-datafusion/pull/2649) ([andygrove](https://github.com/andygrove)) -- pushdown support for predicates in `ON` clause of joins [\#2647](https://github.com/apache/arrow-datafusion/pull/2647) ([korowa](https://github.com/korowa)) -- Move `SortKeyCursor` and `RowIndex` into modules, add `sort_key_cursor` test [\#2645](https://github.com/apache/arrow-datafusion/pull/2645) ([alamb](https://github.com/alamb)) -- Implement DESCRIBE \ [\#2642](https://github.com/apache/arrow-datafusion/pull/2642) ([LiuYuHui](https://github.com/LiuYuHui)) -- Implement `LogicalPlan` serde in `datafusion-proto` [\#2639](https://github.com/apache/arrow-datafusion/pull/2639) ([andygrove](https://github.com/andygrove)) -- Fix limit + offset pushdown [\#2638](https://github.com/apache/arrow-datafusion/pull/2638) ([ming535](https://github.com/ming535)) -- change result type of count/count_distinct from uint64 to int64 [\#2636](https://github.com/apache/arrow-datafusion/pull/2636) ([liukun4515](https://github.com/liukun4515)) -- if none columns in window expr are needed, remove the window exprs [\#2634](https://github.com/apache/arrow-datafusion/pull/2634) ([AssHero](https://github.com/AssHero)) -- Like, NotLike expressions work with literal `NULL` [\#2627](https://github.com/apache/arrow-datafusion/pull/2627) ([WinkerDu](https://github.com/WinkerDu)) -- MINOR: Refactor `datafusion-proto` dependencies and imports [\#2623](https://github.com/apache/arrow-datafusion/pull/2623) ([andygrove](https://github.com/andygrove)) -- MINOR: add optimizer struct [\#2616](https://github.com/apache/arrow-datafusion/pull/2616) ([jackwener](https://github.com/jackwener)) -- Remove FilterPushDown dependency on physical plan [\#2615](https://github.com/apache/arrow-datafusion/pull/2615) ([andygrove](https://github.com/andygrove)) -- Support CREATE OR REPLACE TABLE [\#2613](https://github.com/apache/arrow-datafusion/pull/2613) ([AssHero](https://github.com/AssHero)) -- Support binary mathematical operators work with `NULL` literals [\#2610](https://github.com/apache/arrow-datafusion/pull/2610) ([WinkerDu](https://github.com/WinkerDu)) -- chore: try fix CI coverage [\#2608](https://github.com/apache/arrow-datafusion/pull/2608) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- MINOR: Rename benchmark crate [\#2607](https://github.com/apache/arrow-datafusion/pull/2607) ([andygrove](https://github.com/andygrove)) -- chore\(dep\): bump cranelift to 0.84.0 [\#2598](https://github.com/apache/arrow-datafusion/pull/2598) ([waynexia](https://github.com/waynexia)) -- fix some typos [\#2597](https://github.com/apache/arrow-datafusion/pull/2597) ([ming535](https://github.com/ming535)) -- Support limit pushdown through left right outer join [\#2596](https://github.com/apache/arrow-datafusion/pull/2596) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- Unignore rustdoc code examples in `datafusion-expr` crate [\#2590](https://github.com/apache/arrow-datafusion/pull/2590) ([andygrove](https://github.com/andygrove)) -- Evaluate JIT'd expression over arrays [\#2587](https://github.com/apache/arrow-datafusion/pull/2587) ([waynexia](https://github.com/waynexia)) -- \[minor\]Fix ci clippy for unused import [\#2586](https://github.com/apache/arrow-datafusion/pull/2586) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- \[Doc\]add doc for enable SIMD need `cargo nightly` [\#2577](https://github.com/apache/arrow-datafusion/pull/2577) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- Add DataFrame `union_distinct` and fix documentation for `distinct` [\#2574](https://github.com/apache/arrow-datafusion/pull/2574) ([andygrove](https://github.com/andygrove)) -- Fix avro tests \(\#2570\) [\#2571](https://github.com/apache/arrow-datafusion/pull/2571) ([tustvold](https://github.com/tustvold)) -- Make datafusion-proto match exhaustive [\#2567](https://github.com/apache/arrow-datafusion/pull/2567) ([andygrove](https://github.com/andygrove)) -- Support limit push down for offset_plan [\#2566](https://github.com/apache/arrow-datafusion/pull/2566) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- Introduce Expr.variant_name\(\) function [\#2564](https://github.com/apache/arrow-datafusion/pull/2564) ([jdye64](https://github.com/jdye64)) -- Fix some 404 links in the contribution guide [\#2561](https://github.com/apache/arrow-datafusion/pull/2561) ([hi-rustin](https://github.com/hi-rustin)) -- Update datafusion-cli readme cli version [\#2559](https://github.com/apache/arrow-datafusion/pull/2559) ([hi-rustin](https://github.com/hi-rustin)) -- MINOR: Move `expr_rewriter.rs` to `datafusion-expr` crate [\#2552](https://github.com/apache/arrow-datafusion/pull/2552) ([andygrove](https://github.com/andygrove)) -- Fix `JOIN`s with complex predicates in ON \(split ON expressions only by AND operator\) [\#2534](https://github.com/apache/arrow-datafusion/pull/2534) ([korowa](https://github.com/korowa)) -- Reduce duplication in file scan tests [\#2533](https://github.com/apache/arrow-datafusion/pull/2533) ([tustvold](https://github.com/tustvold)) -- Fix size_of_scalar test [\#2531](https://github.com/apache/arrow-datafusion/pull/2531) ([alamb](https://github.com/alamb)) -- Update to arrow-rs 14.0.0 [\#2528](https://github.com/apache/arrow-datafusion/pull/2528) ([alamb](https://github.com/alamb)) -- ObjectStoreRegistry get_by_uri now returns correct path when "scheme" is provided [\#2526](https://github.com/apache/arrow-datafusion/pull/2526) ([timvw](https://github.com/timvw)) -- MINOR: Add ORDER BY clause to test [\#2524](https://github.com/apache/arrow-datafusion/pull/2524) ([andygrove](https://github.com/andygrove)) -- Remove unused `binary_array_op_scalar!` in binary.rs [\#2512](https://github.com/apache/arrow-datafusion/pull/2512) ([alamb](https://github.com/alamb)) -- fix `NULL column` evaluation, tests for same [\#2510](https://github.com/apache/arrow-datafusion/pull/2510) ([alamb](https://github.com/alamb)) -- Fix projection pushdown produces incorrect results when column names are reused [\#2463](https://github.com/apache/arrow-datafusion/pull/2463) ([jonmmease](https://github.com/jonmmease)) -- Benchmark for sort preserving merge [\#2431](https://github.com/apache/arrow-datafusion/pull/2431) ([alamb](https://github.com/alamb)) -- Support GetIndexedFieldExpr for ScalarValue [\#2196](https://github.com/apache/arrow-datafusion/pull/2196) ([ovr](https://github.com/ovr)) +- Test optional features in CI [\#2708](https://github.com/apache/datafusion/pull/2708) ([tustvold](https://github.com/tustvold)) +- support indexed fields proto [\#2707](https://github.com/apache/datafusion/pull/2707) ([nl5887](https://github.com/nl5887)) +- Update sqlparser-rs to 0.18.0 [\#2705](https://github.com/apache/datafusion/pull/2705) ([alamb](https://github.com/alamb)) +- \[MINOR\]: Add documentation to `datafusion-row` modules [\#2704](https://github.com/apache/datafusion/pull/2704) ([alamb](https://github.com/alamb)) +- Make sure that the data types are supported in hashjoin before genera… [\#2702](https://github.com/apache/datafusion/pull/2702) ([AssHero](https://github.com/AssHero)) +- Move remaining code out of legacy `core/logical_plan` module [\#2701](https://github.com/apache/datafusion/pull/2701) ([andygrove](https://github.com/andygrove)) +- Move some tests from core to expr [\#2700](https://github.com/apache/datafusion/pull/2700) ([andygrove](https://github.com/andygrove)) +- MINOR: Improve Docs Readability [\#2696](https://github.com/apache/datafusion/pull/2696) ([ryanrussell](https://github.com/ryanrussell)) +- Combine limit and offset to `fetch` and `skip` and implement physical plan support [\#2694](https://github.com/apache/datafusion/pull/2694) ([ming535](https://github.com/ming535)) +- MINOR: Add datafusion-sql example [\#2693](https://github.com/apache/datafusion/pull/2693) ([andygrove](https://github.com/andygrove)) +- Remove Ballista related lines from Dockerfile [\#2692](https://github.com/apache/datafusion/pull/2692) ([mocknen](https://github.com/mocknen)) +- Show column names instead of indices in query plans [\#2690](https://github.com/apache/datafusion/pull/2690) ([andygrove](https://github.com/andygrove)) +- MINOR: Remove uses of TryClone for Parquet [\#2681](https://github.com/apache/datafusion/pull/2681) ([tustvold](https://github.com/tustvold)) +- Fix `AggregateStatistics` optimization so it doesn't change output type [\#2674](https://github.com/apache/datafusion/pull/2674) ([alamb](https://github.com/alamb)) +- If statistics of column Max/Min value does not exists in parquet file, sent Min/Max to None [\#2671](https://github.com/apache/datafusion/pull/2671) ([AssHero](https://github.com/AssHero)) +- MINOR: Move more expression code to `datafusion-expr` crate [\#2669](https://github.com/apache/datafusion/pull/2669) ([andygrove](https://github.com/andygrove)) +- MINOR: Rewrite imports in optimizer moduler [\#2667](https://github.com/apache/datafusion/pull/2667) ([andygrove](https://github.com/andygrove)) +- Update snmalloc-rs requirement from 0.2 to 0.3 [\#2663](https://github.com/apache/datafusion/pull/2663) ([dependabot[bot]](https://github.com/apps/dependabot)) +- Add module doc for RuntimeEnv, SessionContext, TaskContext, etc... [\#2655](https://github.com/apache/datafusion/pull/2655) ([tustvold](https://github.com/tustvold)) +- Prune unused dependencies from datafusion-proto [\#2651](https://github.com/apache/datafusion/pull/2651) ([tustvold](https://github.com/tustvold)) +- MINOR: Implement serde for join filter [\#2649](https://github.com/apache/datafusion/pull/2649) ([andygrove](https://github.com/andygrove)) +- pushdown support for predicates in `ON` clause of joins [\#2647](https://github.com/apache/datafusion/pull/2647) ([korowa](https://github.com/korowa)) +- Move `SortKeyCursor` and `RowIndex` into modules, add `sort_key_cursor` test [\#2645](https://github.com/apache/datafusion/pull/2645) ([alamb](https://github.com/alamb)) +- Implement DESCRIBE \ [\#2642](https://github.com/apache/datafusion/pull/2642) ([LiuYuHui](https://github.com/LiuYuHui)) +- Implement `LogicalPlan` serde in `datafusion-proto` [\#2639](https://github.com/apache/datafusion/pull/2639) ([andygrove](https://github.com/andygrove)) +- Fix limit + offset pushdown [\#2638](https://github.com/apache/datafusion/pull/2638) ([ming535](https://github.com/ming535)) +- change result type of count/count_distinct from uint64 to int64 [\#2636](https://github.com/apache/datafusion/pull/2636) ([liukun4515](https://github.com/liukun4515)) +- if none columns in window expr are needed, remove the window exprs [\#2634](https://github.com/apache/datafusion/pull/2634) ([AssHero](https://github.com/AssHero)) +- Like, NotLike expressions work with literal `NULL` [\#2627](https://github.com/apache/datafusion/pull/2627) ([WinkerDu](https://github.com/WinkerDu)) +- MINOR: Refactor `datafusion-proto` dependencies and imports [\#2623](https://github.com/apache/datafusion/pull/2623) ([andygrove](https://github.com/andygrove)) +- MINOR: add optimizer struct [\#2616](https://github.com/apache/datafusion/pull/2616) ([jackwener](https://github.com/jackwener)) +- Remove FilterPushDown dependency on physical plan [\#2615](https://github.com/apache/datafusion/pull/2615) ([andygrove](https://github.com/andygrove)) +- Support CREATE OR REPLACE TABLE [\#2613](https://github.com/apache/datafusion/pull/2613) ([AssHero](https://github.com/AssHero)) +- Support binary mathematical operators work with `NULL` literals [\#2610](https://github.com/apache/datafusion/pull/2610) ([WinkerDu](https://github.com/WinkerDu)) +- chore: try fix CI coverage [\#2608](https://github.com/apache/datafusion/pull/2608) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- MINOR: Rename benchmark crate [\#2607](https://github.com/apache/datafusion/pull/2607) ([andygrove](https://github.com/andygrove)) +- chore\(dep\): bump cranelift to 0.84.0 [\#2598](https://github.com/apache/datafusion/pull/2598) ([waynexia](https://github.com/waynexia)) +- fix some typos [\#2597](https://github.com/apache/datafusion/pull/2597) ([ming535](https://github.com/ming535)) +- Support limit pushdown through left right outer join [\#2596](https://github.com/apache/datafusion/pull/2596) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- Unignore rustdoc code examples in `datafusion-expr` crate [\#2590](https://github.com/apache/datafusion/pull/2590) ([andygrove](https://github.com/andygrove)) +- Evaluate JIT'd expression over arrays [\#2587](https://github.com/apache/datafusion/pull/2587) ([waynexia](https://github.com/waynexia)) +- \[minor\]Fix ci clippy for unused import [\#2586](https://github.com/apache/datafusion/pull/2586) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- \[Doc\]add doc for enable SIMD need `cargo nightly` [\#2577](https://github.com/apache/datafusion/pull/2577) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- Add DataFrame `union_distinct` and fix documentation for `distinct` [\#2574](https://github.com/apache/datafusion/pull/2574) ([andygrove](https://github.com/andygrove)) +- Fix avro tests \(\#2570\) [\#2571](https://github.com/apache/datafusion/pull/2571) ([tustvold](https://github.com/tustvold)) +- Make datafusion-proto match exhaustive [\#2567](https://github.com/apache/datafusion/pull/2567) ([andygrove](https://github.com/andygrove)) +- Support limit push down for offset_plan [\#2566](https://github.com/apache/datafusion/pull/2566) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- Introduce Expr.variant_name\(\) function [\#2564](https://github.com/apache/datafusion/pull/2564) ([jdye64](https://github.com/jdye64)) +- Fix some 404 links in the contribution guide [\#2561](https://github.com/apache/datafusion/pull/2561) ([hi-rustin](https://github.com/hi-rustin)) +- Update datafusion-cli readme cli version [\#2559](https://github.com/apache/datafusion/pull/2559) ([hi-rustin](https://github.com/hi-rustin)) +- MINOR: Move `expr_rewriter.rs` to `datafusion-expr` crate [\#2552](https://github.com/apache/datafusion/pull/2552) ([andygrove](https://github.com/andygrove)) +- Fix `JOIN`s with complex predicates in ON \(split ON expressions only by AND operator\) [\#2534](https://github.com/apache/datafusion/pull/2534) ([korowa](https://github.com/korowa)) +- Reduce duplication in file scan tests [\#2533](https://github.com/apache/datafusion/pull/2533) ([tustvold](https://github.com/tustvold)) +- Fix size_of_scalar test [\#2531](https://github.com/apache/datafusion/pull/2531) ([alamb](https://github.com/alamb)) +- Update to arrow-rs 14.0.0 [\#2528](https://github.com/apache/datafusion/pull/2528) ([alamb](https://github.com/alamb)) +- ObjectStoreRegistry get_by_uri now returns correct path when "scheme" is provided [\#2526](https://github.com/apache/datafusion/pull/2526) ([timvw](https://github.com/timvw)) +- MINOR: Add ORDER BY clause to test [\#2524](https://github.com/apache/datafusion/pull/2524) ([andygrove](https://github.com/andygrove)) +- Remove unused `binary_array_op_scalar!` in binary.rs [\#2512](https://github.com/apache/datafusion/pull/2512) ([alamb](https://github.com/alamb)) +- fix `NULL column` evaluation, tests for same [\#2510](https://github.com/apache/datafusion/pull/2510) ([alamb](https://github.com/alamb)) +- Fix projection pushdown produces incorrect results when column names are reused [\#2463](https://github.com/apache/datafusion/pull/2463) ([jonmmease](https://github.com/jonmmease)) +- Benchmark for sort preserving merge [\#2431](https://github.com/apache/datafusion/pull/2431) ([alamb](https://github.com/alamb)) +- Support GetIndexedFieldExpr for ScalarValue [\#2196](https://github.com/apache/datafusion/pull/2196) ([ovr](https://github.com/ovr)) diff --git a/dev/depcheck/README.md b/dev/depcheck/README.md index 4a628cdd88e9..4c1571051f22 100644 --- a/dev/depcheck/README.md +++ b/dev/depcheck/README.md @@ -23,4 +23,4 @@ in the DataFusion codebase. Specifically, it checks that no create's tests depend on another crate which depends on the first, which prevents publishing to crates.io, for example -[issue 9272]: https://github.com/apache/arrow-datafusion/issues/9277: +[issue 9272]: https://github.com/apache/datafusion/issues/9277: diff --git a/dev/depcheck/src/main.rs b/dev/depcheck/src/main.rs index b52074c9b1d3..1599fdd4188d 100644 --- a/dev/depcheck/src/main.rs +++ b/dev/depcheck/src/main.rs @@ -29,7 +29,7 @@ use cargo::util::config::Config; /// (which prevents publishing on crates.io) by parsing the Cargo.toml files and /// checking the dependency graph. /// -/// See https://github.com/apache/arrow-datafusion/issues/9278 for more details +/// See https://github.com/apache/datafusion/issues/9278 for more details fn main() -> CargoResult<()> { let config = Config::default()?; // This is the path for the depcheck binary diff --git a/dev/release/README.md b/dev/release/README.md index e1aaae2aee69..4d0ff0e3aea9 100644 --- a/dev/release/README.md +++ b/dev/release/README.md @@ -39,12 +39,12 @@ patch release: 1. Follow normal workflow to create PR to `main` branch and wait for its approval and merge. 1. After PR is squash merged to `main`, branch from most recent release branch (e.g. `branch-37`), cherry-pick the commit and create a PR targeting the release branch [example backport PR]. -[example release issue]: https://github.com/apache/arrow-datafusion/issues/9904 -[example backport pr]: https://github.com/apache/arrow-datafusion/pull/10123 +[example release issue]: https://github.com/apache/datafusion/issues/9904 +[example backport pr]: https://github.com/apache/datafusion/pull/10123 ## Release Prerequisite -- Have upstream git repo `git@github.com:apache/arrow-datafusion.git` add as git remote `apache`. +- Have upstream git repo `git@github.com:apache/datafusion.git` add as git remote `apache`. - Created a personal access token in GitHub for changelog automation script. - Github PAT should be created with `repo` access - Make sure your signing key is added to the following files in SVN: @@ -96,7 +96,7 @@ pip3 install PyGitHub Run the following command to generate the changelog content. ```bash -$ GITHUB_TOKEN= ./dev/release/generate-changelog.py apache/arrow-datafusion 24.0.0 HEAD > dev/changelog/25.0.0.md +$ GITHUB_TOKEN= ./dev/release/generate-changelog.py apache/datafusion 24.0.0 HEAD > dev/changelog/25.0.0.md ``` This script creates a changelog from GitHub PRs based on the labels associated with them as well as looking for @@ -116,9 +116,9 @@ This process is not fully automated, so there are some additional manual steps: - Add the following content (copy from the previous version's changelog and update as appropriate: ``` -## [24.0.0](https://github.com/apache/arrow-datafusion/tree/24.0.0) (2023-05-06) +## [24.0.0](https://github.com/apache/datafusion/tree/24.0.0) (2023-05-06) -[Full Changelog](https://github.com/apache/arrow-datafusion/compare/23.0.0...24.0.0) +[Full Changelog](https://github.com/apache/datafusion/compare/23.0.0...24.0.0) ``` ## Prepare release commits and PR @@ -126,7 +126,7 @@ This process is not fully automated, so there are some additional manual steps: Prepare a PR to update `CHANGELOG.md` and versions to reflect the planned release. -See [#9697](https://github.com/apache/arrow-datafusion/pull/9697) for an example. +See [#9697](https://github.com/apache/datafusion/pull/9697) for an example. Here are the commands that could be used to prepare the `5.1.0` release: @@ -222,9 +222,9 @@ Here is my vote: +1 -[1]: https://github.com/apache/arrow-datafusion/tree/a5dd428f57e62db20a945e8b1895de91405958c4 +[1]: https://github.com/apache/datafusion/tree/a5dd428f57e62db20a945e8b1895de91405958c4 [2]: https://dist.apache.org/repos/dist/dev/arrow/apache-arrow-datafusion-5.1.0 -[3]: https://github.com/apache/arrow-datafusion/blob/a5dd428f57e62db20a945e8b1895de91405958c4/CHANGELOG.md +[3]: https://github.com/apache/datafusion/blob/a5dd428f57e62db20a945e8b1895de91405958c4/CHANGELOG.md ``` For the release to become "official" it needs at least three PMC members to vote +1 on it. diff --git a/dev/release/create-tarball.sh b/dev/release/create-tarball.sh index 29ac5d47e9c0..e345773287cf 100755 --- a/dev/release/create-tarball.sh +++ b/dev/release/create-tarball.sh @@ -95,7 +95,7 @@ on the release. The vote will be open for at least 72 hours. Only votes from PMC members are binding, but all members of the community are encouraged to test the release and vote with "(non-binding)". -The standard verification procedure is documented at https://github.com/apache/arrow-datafusion/blob/main/dev/release/README.md#verifying-release-candidates. +The standard verification procedure is documented at https://github.com/apache/datafusion/blob/main/dev/release/README.md#verifying-release-candidates. [ ] +1 Release this as Apache DataFusion ${version} [ ] +0 @@ -105,9 +105,9 @@ Here is my vote: +1 -[1]: https://github.com/apache/arrow-datafusion/tree/${release_hash} +[1]: https://github.com/apache/datafusion/tree/${release_hash} [2]: ${url} -[3]: https://github.com/apache/arrow-datafusion/blob/${release_hash}/CHANGELOG.md +[3]: https://github.com/apache/datafusion/blob/${release_hash}/CHANGELOG.md MAIL echo "---------------------------------------------------------" diff --git a/dev/release/download-python-wheels.py b/dev/release/download-python-wheels.py index 043cb924a0c9..ca9789ec67ff 100644 --- a/dev/release/download-python-wheels.py +++ b/dev/release/download-python-wheels.py @@ -53,7 +53,7 @@ def main(): "Accept": "application/vnd.github.v3+json", "Authorization": f"token {ghp_token}", } - url = f"https://api.github.com/repos/apache/arrow-datafusion/actions/runs?branch={tag}" + url = f"https://api.github.com/repos/apache/datafusion/actions/runs?branch={tag}" resp = requests.get(url, headers=headers) resp.raise_for_status() diff --git a/dev/release/generate-changelog.py b/dev/release/generate-changelog.py index f419bdb3a1ac..74e77ce846e5 100755 --- a/dev/release/generate-changelog.py +++ b/dev/release/generate-changelog.py @@ -103,7 +103,7 @@ def cli(args=None): args = sys.argv[1:] parser = argparse.ArgumentParser() - parser.add_argument("project", help="The project name e.g. apache/arrow-datafusion") + parser.add_argument("project", help="The project name e.g. apache/datafusion") parser.add_argument("tag1", help="The previous release tag") parser.add_argument("tag2", help="The current release tag") args = parser.parse_args() diff --git a/docs/README.md b/docs/README.md index 8b55e8756e19..32fe98466ce7 100644 --- a/docs/README.md +++ b/docs/README.md @@ -58,12 +58,12 @@ automatically updated. This documentation is hosted at https://arrow.apache.org/datafusion/ When the PR is merged to the `main` branch of the DataFusion -repository, a [github workflow](https://github.com/apache/arrow-datafusion/blob/main/.github/workflows/docs.yaml) which: +repository, a [github workflow](https://github.com/apache/datafusion/blob/main/.github/workflows/docs.yaml) which: 1. Builds the html content -2. Pushes the html content to the [`asf-site`](https://github.com/apache/arrow-datafusion/tree/asf-site) branch in this repository. +2. Pushes the html content to the [`asf-site`](https://github.com/apache/datafusion/tree/asf-site) branch in this repository. The Apache Software Foundation provides https://arrow.apache.org/, which serves content based on the configuration in -[.asf.yaml](https://github.com/apache/arrow-datafusion/blob/main/.asf.yaml), +[.asf.yaml](https://github.com/apache/datafusion/blob/main/.asf.yaml), which specifies the target as https://arrow.apache.org/datafusion/. diff --git a/docs/source/contributor-guide/architecture.md b/docs/source/contributor-guide/architecture.md index 23232e347fe9..68541f877768 100644 --- a/docs/source/contributor-guide/architecture.md +++ b/docs/source/contributor-guide/architecture.md @@ -24,4 +24,4 @@ DataFusion's code structure and organization is described in the possible. You can find the most up to date version in the [source code]. [crates.io documentation]: https://docs.rs/datafusion/latest/datafusion/index.html#architecture -[source code]: https://github.com/apache/arrow-datafusion/blob/main/datafusion/core/src/lib.rs +[source code]: https://github.com/apache/datafusion/blob/main/datafusion/core/src/lib.rs diff --git a/docs/source/contributor-guide/communication.md b/docs/source/contributor-guide/communication.md index fcec6c36fd2f..6e8e28cee309 100644 --- a/docs/source/contributor-guide/communication.md +++ b/docs/source/contributor-guide/communication.md @@ -29,7 +29,7 @@ conduct](https://www.apache.org/foundation/policies/conduct.html). ## GitHub The vast majority of communication occurs in the open on our -[github repository](https://github.com/apache/arrow-datafusion) in the form of tickets, issues, discussions, and Pull Requests. +[github repository](https://github.com/apache/datafusion) in the form of tickets, issues, discussions, and Pull Requests. ## Slack and Discord diff --git a/docs/source/contributor-guide/index.md b/docs/source/contributor-guide/index.md index 9edc3fa2fb00..110fddd995f5 100644 --- a/docs/source/contributor-guide/index.md +++ b/docs/source/contributor-guide/index.md @@ -60,7 +60,7 @@ helps avoid wasted effort by determining early if the feature is a good fit for DataFusion before too much time is invested. It also often helps to discuss your ideas with the community to get feedback on implementation. -[good-first-issue]: https://github.com/apache/arrow-datafusion/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22 +[good-first-issue]: https://github.com/apache/datafusion/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22 # Developer's guide @@ -162,7 +162,7 @@ DataFusion is written in Rust and it uses a standard rust toolkit: - `cargo test` to test - etc. -Note that running `cargo test` requires significant memory resources, due to cargo running many tests in parallel by default. If you run into issues with slow tests or system lock ups, you can significantly reduce the memory required by instead running `cargo test -- --test-threads=1`. For more information see [this issue](https://github.com/apache/arrow-datafusion/issues/5347). +Note that running `cargo test` requires significant memory resources, due to cargo running many tests in parallel by default. If you run into issues with slow tests or system lock ups, you can significantly reduce the memory required by instead running `cargo test -- --test-threads=1`. For more information see [this issue](https://github.com/apache/datafusion/issues/5347). Testing setup: @@ -196,7 +196,7 @@ Tests for code in an individual module are defined in the same source file with ### sqllogictests Tests -DataFusion's SQL implementation is tested using [sqllogictest](https://github.com/apache/arrow-datafusion/tree/main/datafusion/sqllogictest) which are run like any other Rust test using `cargo test --test sqllogictests`. +DataFusion's SQL implementation is tested using [sqllogictest](https://github.com/apache/datafusion/tree/main/datafusion/sqllogictest) which are run like any other Rust test using `cargo test --test sqllogictests`. `sqllogictests` tests may be less convenient for new contributors who are familiar with writing `.rs` tests as they require learning another tool. However, `sqllogictest` based tests are much easier to develop and maintain as they 1) do not require a slow recompile/link cycle and 2) can be automatically updated via `cargo test --test sqllogictests -- --complete`. @@ -204,7 +204,7 @@ Like similar systems such as [DuckDB](https://duckdb.org/dev/testing), DataFusio ### Rust Integration Tests -There are several tests of the public interface of the DataFusion library in the [tests](https://github.com/apache/arrow-datafusion/tree/main/datafusion/core/tests) directory. +There are several tests of the public interface of the DataFusion library in the [tests](https://github.com/apache/datafusion/tree/main/datafusion/core/tests) directory. You can run these tests individually using `cargo` as normal command such as @@ -224,7 +224,7 @@ Criterion integrates with Cargo's built-in [benchmark support](https://doc.rust- cargo bench --bench BENCHMARK_NAME ``` -A full list of benchmarks can be found [here](https://github.com/apache/arrow-datafusion/tree/main/datafusion/core/benches). +A full list of benchmarks can be found [here](https://github.com/apache/datafusion/tree/main/datafusion/core/benches). _[cargo-criterion](https://github.com/bheisler/cargo-criterion) may also be used for more advanced reporting._ @@ -263,7 +263,7 @@ More information on [Baselines](https://bheisler.github.io/criterion.rs/book/use ### Upstream Benchmark Suites -Instructions and tooling for running upstream benchmark suites against DataFusion can be found in [benchmarks](https://github.com/apache/arrow-datafusion/tree/main/benchmarks). +Instructions and tooling for running upstream benchmark suites against DataFusion can be found in [benchmarks](https://github.com/apache/datafusion/tree/main/benchmarks). These are valuable for comparative evaluation against alternative Arrow implementations and query engines. diff --git a/docs/source/contributor-guide/quarterly_roadmap.md b/docs/source/contributor-guide/quarterly_roadmap.md index 26c503f7e506..ee82617225aa 100644 --- a/docs/source/contributor-guide/quarterly_roadmap.md +++ b/docs/source/contributor-guide/quarterly_roadmap.md @@ -23,9 +23,9 @@ A quarterly roadmap will be published to give the DataFusion community visibilit ## 2023 Q4 -- Improve data output (`COPY`, `INSERT` and DataFrame) output capability [#6569](https://github.com/apache/arrow-datafusion/issues/6569) -- Implementation of `ARRAY` types and related functions [#6980](https://github.com/apache/arrow-datafusion/issues/6980) -- Write an industrial paper about DataFusion for SIGMOD [#6782](https://github.com/apache/arrow-datafusion/issues/6782) +- Improve data output (`COPY`, `INSERT` and DataFrame) output capability [#6569](https://github.com/apache/datafusion/issues/6569) +- Implementation of `ARRAY` types and related functions [#6980](https://github.com/apache/datafusion/issues/6980) +- Write an industrial paper about DataFusion for SIGMOD [#6782](https://github.com/apache/datafusion/issues/6782) ## 2022 Q2 diff --git a/docs/source/contributor-guide/roadmap.md b/docs/source/contributor-guide/roadmap.md index a7e81555b77a..a6d78d9311aa 100644 --- a/docs/source/contributor-guide/roadmap.md +++ b/docs/source/contributor-guide/roadmap.md @@ -26,7 +26,7 @@ align to that vision. ## Planning `EPIC`s DataFusion uses [GitHub -issues](https://github.com/apache/arrow-datafusion/issues) to track +issues](https://github.com/apache/datafusion/issues) to track planned work. We collect related tickets using tracking issues labeled with `[EPIC]` which contain discussion and links to more detailed items. @@ -42,4 +42,4 @@ start a conversation using a github issue or the [dev@arrow.apache.org](mailto:dev@arrow.apache.org) mailing list to make review efficient and avoid surprises. -[The current list of `EPIC`s can be found here](https://github.com/apache/arrow-datafusion/issues?q=is%3Aissue+is%3Aopen+epic). +[The current list of `EPIC`s can be found here](https://github.com/apache/datafusion/issues?q=is%3Aissue+is%3Aopen+epic). diff --git a/docs/source/index.rst b/docs/source/index.rst index 257d09b7d417..41cd7d9aee2a 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -27,9 +27,9 @@ Apache DataFusion

- Star + Star - Fork + Fork

DataFusion is a very fast, extensible query engine for building high-quality data-centric systems in @@ -46,7 +46,7 @@ The `example usage`_ section in the user guide and the `datafusion-examples`_ co Please see the `developer’s guide`_ for contributing and `communication`_ for getting in touch with us. .. _example usage: user-guide/example-usage.html -.. _datafusion-examples: https://github.com/apache/arrow-datafusion/tree/master/datafusion-examples +.. _datafusion-examples: https://github.com/apache/datafusion/tree/master/datafusion-examples .. _developer’s guide: contributor-guide/index.html#developer-s-guide .. _communication: contributor-guide/communication.html @@ -55,10 +55,10 @@ Please see the `developer’s guide`_ for contributing and `communication`_ for :maxdepth: 1 :caption: Links - Github and Issue Tracker + Github and Issue Tracker crates.io API Docs - Code of conduct + Code of conduct .. _toc.guide: .. toctree:: diff --git a/docs/source/library-user-guide/adding-udfs.md b/docs/source/library-user-guide/adding-udfs.md index 653c1f9d3784..f805f0a99292 100644 --- a/docs/source/library-user-guide/adding-udfs.md +++ b/docs/source/library-user-guide/adding-udfs.md @@ -174,7 +174,7 @@ let udf = create_udf( [`scalarudf`]: https://docs.rs/datafusion/latest/datafusion/logical_expr/struct.ScalarUDF.html [`create_udf`]: https://docs.rs/datafusion/latest/datafusion/logical_expr/fn.create_udf.html [`process_scalar_func_inputs`]: https://docs.rs/datafusion/latest/datafusion/physical_expr/functions/fn.process_scalar_func_inputs.html -[`advanced_udf.rs`]: https://github.com/apache/arrow-datafusion/blob/main/datafusion-examples/examples/advanced_udf.rs +[`advanced_udf.rs`]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/advanced_udf.rs A few things to note: @@ -297,7 +297,7 @@ let smooth_it = create_udwf( [`windowudf`]: https://docs.rs/datafusion/latest/datafusion/logical_expr/struct.WindowUDF.html [`create_udwf`]: https://docs.rs/datafusion/latest/datafusion/logical_expr/fn.create_udwf.html -[`advanced_udwf.rs`]: https://github.com/apache/arrow-datafusion/blob/main/datafusion-examples/examples/advanced_udwf.rs +[`advanced_udwf.rs`]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/advanced_udwf.rs The `create_udwf` has five arguments to check: @@ -319,7 +319,7 @@ ctx.register_udwf(smooth_it); At this point, you can use the `smooth_it` function in your query: -For example, if we have a [`cars.csv`](https://github.com/apache/arrow-datafusion/blob/main/datafusion/core/tests/data/cars.csv) whose contents like +For example, if we have a [`cars.csv`](https://github.com/apache/datafusion/blob/main/datafusion/core/tests/data/cars.csv) whose contents like ``` car,speed,time @@ -500,7 +500,7 @@ let geometric_mean = create_udaf( [`aggregateudf`]: https://docs.rs/datafusion/latest/datafusion/logical_expr/struct.AggregateUDF.html [`create_udaf`]: https://docs.rs/datafusion/latest/datafusion/logical_expr/fn.create_udaf.html -[`advanced_udaf.rs`]: https://github.com/apache/arrow-datafusion/blob/main/datafusion-examples/examples/advanced_udaf.rs +[`advanced_udaf.rs`]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/advanced_udaf.rs The `create_udaf` has six arguments to check: @@ -619,7 +619,7 @@ pretty::print_batches(&results)?; // +---+ ``` -[1]: https://github.com/apache/arrow-datafusion/blob/main/datafusion-examples/examples/simple_udf.rs -[2]: https://github.com/apache/arrow-datafusion/blob/main/datafusion-examples/examples/simple_udwf.rs -[3]: https://github.com/apache/arrow-datafusion/blob/main/datafusion-examples/examples/simple_udaf.rs -[4]: https://github.com/apache/arrow-datafusion/blob/main/datafusion-examples/examples/simple_udtf.rs +[1]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/simple_udf.rs +[2]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/simple_udwf.rs +[3]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/simple_udaf.rs +[4]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/simple_udtf.rs diff --git a/docs/source/library-user-guide/catalogs.md b/docs/source/library-user-guide/catalogs.md index d30e26f1964a..e9d157df5f2a 100644 --- a/docs/source/library-user-guide/catalogs.md +++ b/docs/source/library-user-guide/catalogs.md @@ -19,7 +19,7 @@ # Catalogs, Schemas, and Tables -This section describes how to create and manage catalogs, schemas, and tables in DataFusion. For those wanting to dive into the code quickly please see the [example](https://github.com/apache/arrow-datafusion/blob/main/datafusion-examples/examples/catalog.rs). +This section describes how to create and manage catalogs, schemas, and tables in DataFusion. For those wanting to dive into the code quickly please see the [example](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/catalog.rs). ## General Concepts diff --git a/docs/source/library-user-guide/custom-table-providers.md b/docs/source/library-user-guide/custom-table-providers.md index 11024f77e0d0..f53ac6cfae97 100644 --- a/docs/source/library-user-guide/custom-table-providers.md +++ b/docs/source/library-user-guide/custom-table-providers.md @@ -176,6 +176,6 @@ More abstractly, see the following traits for more information on how to impleme - `FileFormat` - a trait for reading a file format - `ListingTableProvider` - a useful trait for implementing a `TableProvider` that lists files in a directory -[ex]: https://github.com/apache/arrow-datafusion/blob/a5e86fae3baadbd99f8fd0df83f45fde22f7b0c6/datafusion-examples/examples/custom_datasource.rs#L214C1-L276 -[csv]: https://github.com/apache/arrow-datafusion/blob/a5e86fae3baadbd99f8fd0df83f45fde22f7b0c6/datafusion/core/src/datasource/physical_plan/csv.rs#L57-L70 -[parquet]: https://github.com/apache/arrow-datafusion/blob/a5e86fae3baadbd99f8fd0df83f45fde22f7b0c6/datafusion/core/src/datasource/physical_plan/parquet.rs#L77-L104 +[ex]: https://github.com/apache/datafusion/blob/a5e86fae3baadbd99f8fd0df83f45fde22f7b0c6/datafusion-examples/examples/custom_datasource.rs#L214C1-L276 +[csv]: https://github.com/apache/datafusion/blob/a5e86fae3baadbd99f8fd0df83f45fde22f7b0c6/datafusion/core/src/datasource/physical_plan/csv.rs#L57-L70 +[parquet]: https://github.com/apache/datafusion/blob/a5e86fae3baadbd99f8fd0df83f45fde22f7b0c6/datafusion/core/src/datasource/physical_plan/parquet.rs#L77-L104 diff --git a/docs/source/library-user-guide/working-with-exprs.md b/docs/source/library-user-guide/working-with-exprs.md index a839420aa5b2..e0c9e69eb6ed 100644 --- a/docs/source/library-user-guide/working-with-exprs.md +++ b/docs/source/library-user-guide/working-with-exprs.md @@ -19,7 +19,7 @@ # Working with `Expr`s - + `Expr` is short for "expression". It is a core abstraction in DataFusion for representing a computation, and follows the standard "expression tree" abstraction found in most compilers and databases. @@ -52,7 +52,7 @@ As the writer of a library, you can use `Expr`s to represent computations that y ## Creating and Evaluating `Expr`s -Please see [expr_api.rs](https://github.com/apache/arrow-datafusion/blob/main/datafusion-examples/examples/expr_api.rs) for well commented code for creating, evaluating, simplifying, and analyzing `Expr`s. +Please see [expr_api.rs](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/expr_api.rs) for well commented code for creating, evaluating, simplifying, and analyzing `Expr`s. ## A Scalar UDF Example @@ -80,7 +80,7 @@ If you'd like to learn more about `Expr`s, before we get into the details of cre ## Rewriting `Expr`s -[rewrite_expr.rs](https://github.com/apache/arrow-datafusion/blob/main/datafusion-examples/examples/rewrite_expr.rs) contains example code for rewriting `Expr`s. +[rewrite_expr.rs](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/rewrite_expr.rs) contains example code for rewriting `Expr`s. Rewriting Expressions is the process of taking an `Expr` and transforming it into another `Expr`. This is useful for a number of reasons, including: diff --git a/docs/source/user-guide/cli/installation.md b/docs/source/user-guide/cli/installation.md index f7fcd290729d..3a71240783e5 100644 --- a/docs/source/user-guide/cli/installation.md +++ b/docs/source/user-guide/cli/installation.md @@ -55,7 +55,7 @@ that there is `.dockerignore` file in the root of the repository that may need t this to work. ```bash -git clone https://github.com/apache/arrow-datafusion +git clone https://github.com/apache/datafusion cd arrow-datafusion git checkout 12.0.0 docker build -f datafusion-cli/Dockerfile . --tag datafusion-cli diff --git a/docs/source/user-guide/example-usage.md b/docs/source/user-guide/example-usage.md index 6e4bf68fa018..25b398461f0e 100644 --- a/docs/source/user-guide/example-usage.md +++ b/docs/source/user-guide/example-usage.md @@ -19,9 +19,9 @@ # Example Usage -In this example some simple processing is performed on the [`example.csv`](https://github.com/apache/arrow-datafusion/blob/main/datafusion/core/tests/data/example.csv) file. +In this example some simple processing is performed on the [`example.csv`](https://github.com/apache/datafusion/blob/main/datafusion/core/tests/data/example.csv) file. -Even [`more code examples`](https://github.com/apache/arrow-datafusion/tree/main/datafusion-examples) attached to the project. +Even [`more code examples`](https://github.com/apache/datafusion/tree/main/datafusion-examples) attached to the project. ## Add published DataFusion dependency @@ -35,23 +35,23 @@ tokio = "1.0" ## Add latest non published DataFusion dependency -DataFusion changes are published to `crates.io` according to [release schedule](https://github.com/apache/arrow-datafusion/blob/main/dev/release/README.md#release-process) +DataFusion changes are published to `crates.io` according to [release schedule](https://github.com/apache/datafusion/blob/main/dev/release/README.md#release-process) In case if it is required to test out DataFusion changes which are merged but yet to be published, Cargo supports adding dependency directly to Github branch ```toml -datafusion = { git = "https://github.com/apache/arrow-datafusion", branch = "main"} +datafusion = { git = "https://github.com/apache/datafusion", branch = "main"} ``` Also it works on the package level ```toml -datafusion-common = { git = "https://github.com/apache/arrow-datafusion", branch = "main", package = "datafusion-common"} +datafusion-common = { git = "https://github.com/apache/datafusion", branch = "main", package = "datafusion-common"} ``` And with features ```toml -datafusion = { git = "https://github.com/apache/arrow-datafusion", branch = "main", default-features = false, features = ["unicode_expressions"] } +datafusion = { git = "https://github.com/apache/datafusion", branch = "main", default-features = false, features = ["unicode_expressions"] } ``` More on [Cargo dependencies](https://doc.rust-lang.org/cargo/reference/specifying-dependencies.html#specifying-dependencies) @@ -279,4 +279,28 @@ backtrace: 0: std::backtrace_rs::backtrace::libunwind::trace ............ ``` +The backtraces are useful when debugging code. If there is a test in `datafusion/core/src/physical_planner.rs` + +``` +#[tokio::test] +async fn test_get_backtrace_for_failed_code() -> Result<()> { + let ctx = SessionContext::new(); + + let sql = " + select row_numer() over (partition by a order by a) from (select 1 a); + "; + + let _ = ctx.sql(sql).await?.collect().await?; + + Ok(()) +} +``` + +To obtain a backtrace: + +```bash +cargo build --features=backtrace +RUST_BACKTRACE=1 cargo test --features=backtrace --package datafusion --lib -- physical_planner::tests::test_get_backtrace_for_failed_code --exact --nocapture +``` + Note: The backtrace wrapped into systems calls, so some steps on top of the backtrace can be ignored diff --git a/docs/source/user-guide/introduction.md b/docs/source/user-guide/introduction.md index be15848407a2..c81cb00a26ea 100644 --- a/docs/source/user-guide/introduction.md +++ b/docs/source/user-guide/introduction.md @@ -27,7 +27,7 @@ project. DataFusion offers SQL and Dataframe APIs, excellent [performance](https://benchmark.clickhouse.com/), built-in support for CSV, Parquet, JSON, and Avro, [python bindings], extensive customization, a great community, and more. -[python bindings]: https://github.com/apache/arrow-datafusion-python +[python bindings]: https://github.com/apache/datafusion-python ## Project Goals @@ -96,7 +96,7 @@ Here are some active projects using DataFusion: - [Arroyo](https://github.com/ArroyoSystems/arroyo) Distributed stream processing engine in Rust - [Ballista](https://github.com/apache/arrow-ballista) Distributed SQL Query Engine -- [Comet](https://github.com/apache/arrow-datafusion-comet) Apache Spark native query execution plugin +- [Comet](https://github.com/apache/datafusion-comet) Apache Spark native query execution plugin - [CnosDB](https://github.com/cnosdb/cnosdb) Open Source Distributed Time Series Database - [Cube Store](https://github.com/cube-js/cube.js/tree/master/rust) - [Dask SQL](https://github.com/dask-contrib/dask-sql) Distributed SQL query engine in Python @@ -161,7 +161,7 @@ provide integrations with other systems, some of which are described below: ### Language Bindings - [datafusion-c](https://github.com/datafusion-contrib/datafusion-c) -- [datafusion-python](https://github.com/apache/arrow-datafusion-python) +- [datafusion-python](https://github.com/apache/datafusion-python) - [datafusion-ruby](https://github.com/datafusion-contrib/datafusion-ruby) - [datafusion-java](https://github.com/datafusion-contrib/datafusion-java) diff --git a/docs/source/user-guide/sql/data_types.md b/docs/source/user-guide/sql/data_types.md index bfbd3433f1cf..0e974550a84d 100644 --- a/docs/source/user-guide/sql/data_types.md +++ b/docs/source/user-guide/sql/data_types.md @@ -60,20 +60,20 @@ select arrow_cast(now(), 'Timestamp(Second, None)'); ## Numeric Types -| SQL DataType | Arrow DataType | Notes | -| ------------------------------------ | :----------------------------- | ----------------------------------------------------------------------------------------------------------- | -| `TINYINT` | `Int8` | | -| `SMALLINT` | `Int16` | | -| `INT` or `INTEGER` | `Int32` | | -| `BIGINT` | `Int64` | | -| `TINYINT UNSIGNED` | `UInt8` | | -| `SMALLINT UNSIGNED` | `UInt16` | | -| `INT UNSIGNED` or `INTEGER UNSIGNED` | `UInt32` | | -| `BIGINT UNSIGNED` | `UInt64` | | -| `FLOAT` | `Float32` | | -| `REAL` | `Float32` | | -| `DOUBLE` | `Float64` | | -| `DECIMAL(precision, scale)` | `Decimal128(precision, scale)` | Decimal support is currently experimental ([#3523](https://github.com/apache/arrow-datafusion/issues/3523)) | +| SQL DataType | Arrow DataType | Notes | +| ------------------------------------ | :----------------------------- | ----------------------------------------------------------------------------------------------------- | +| `TINYINT` | `Int8` | | +| `SMALLINT` | `Int16` | | +| `INT` or `INTEGER` | `Int32` | | +| `BIGINT` | `Int64` | | +| `TINYINT UNSIGNED` | `UInt8` | | +| `SMALLINT UNSIGNED` | `UInt16` | | +| `INT UNSIGNED` or `INTEGER UNSIGNED` | `UInt32` | | +| `BIGINT UNSIGNED` | `UInt64` | | +| `FLOAT` | `Float32` | | +| `REAL` | `Float32` | | +| `DOUBLE` | `Float64` | | +| `DECIMAL(precision, scale)` | `Decimal128(precision, scale)` | Decimal support is currently experimental ([#3523](https://github.com/apache/datafusion/issues/3523)) | ## Date/Time Types diff --git a/docs/source/user-guide/sql/scalar_functions.md b/docs/source/user-guide/sql/scalar_functions.md index 217bd5f05a86..624e86db3565 100644 --- a/docs/source/user-guide/sql/scalar_functions.md +++ b/docs/source/user-guide/sql/scalar_functions.md @@ -1342,7 +1342,7 @@ SELECT regexp_like('aBc', '(b|d)', 'i'); +--------------------------------------------------+ ``` -Additional examples can be found [here](https://github.com/apache/arrow-datafusion/blob/main/datafusion-examples/examples/regexp.rs) +Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/regexp.rs) ### `regexp_match` @@ -1383,7 +1383,7 @@ SELECT regexp_match('aBc', '(b|d)', 'i'); +---------------------------------------------------+ ``` -Additional examples can be found [here](https://github.com/apache/arrow-datafusion/blob/main/datafusion-examples/examples/regexp.rs) +Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/regexp.rs) ### `regexp_replace` @@ -1427,7 +1427,7 @@ SELECT regexp_replace('aBc', '(b|d)', 'Ab\\1a', 'i'); +-------------------------------------------------------------------+ ``` -Additional examples can be found [here](https://github.com/apache/arrow-datafusion/blob/main/datafusion-examples/examples/regexp.rs) +Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/regexp.rs) ### `position` @@ -1664,7 +1664,7 @@ make_date(year, month, day) +-----------------------------------------------+ ``` -Additional examples can be found [here](https://github.com/apache/arrow-datafusion/blob/main/datafusion-examples/examples/make_date.rs) +Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/make_date.rs) ### `to_char` @@ -1696,7 +1696,7 @@ to_char(expression, format) Additional examples can be found [here] -[here]: https://github.com/apache/arrow-datafusion/blob/main/datafusion-examples/examples/to_char.rs +[here]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_char.rs #### Aliases @@ -1745,7 +1745,7 @@ to_timestamp(expression[, ..., format_n]) +--------------------------------------------------------------------------------------------------------+ ``` -Additional examples can be found [here](https://github.com/apache/arrow-datafusion/blob/main/datafusion-examples/examples/to_timestamp.rs) +Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_timestamp.rs) ### `to_timestamp_millis` @@ -1784,7 +1784,7 @@ to_timestamp_millis(expression[, ..., format_n]) +---------------------------------------------------------------------------------------------------------------+ ``` -Additional examples can be found [here](https://github.com/apache/arrow-datafusion/blob/main/datafusion-examples/examples/to_timestamp.rs) +Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_timestamp.rs) ### `to_timestamp_micros` @@ -1823,7 +1823,7 @@ to_timestamp_micros(expression[, ..., format_n]) +---------------------------------------------------------------------------------------------------------------+ ``` -Additional examples can be found [here](https://github.com/apache/arrow-datafusion/blob/main/datafusion-examples/examples/to_timestamp.rs) +Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_timestamp.rs) ### `to_timestamp_nanos` @@ -1862,7 +1862,7 @@ to_timestamp_nanos(expression[, ..., format_n]) +---------------------------------------------------------------------------------------------------------------+ ``` -Additional examples can be found [here](https://github.com/apache/arrow-datafusion/blob/main/datafusion-examples/examples/to_timestamp.rs) +Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_timestamp.rs) ### `to_timestamp_seconds` @@ -1901,7 +1901,7 @@ to_timestamp_seconds(expression[, ..., format_n]) +----------------------------------------------------------------------------------------------------------------+ ``` -Additional examples can be found [here](https://github.com/apache/arrow-datafusion/blob/main/datafusion-examples/examples/to_timestamp.rs) +Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_timestamp.rs) ### `from_unixtime` diff --git a/docs/source/user-guide/sql/sql_status.md b/docs/source/user-guide/sql/sql_status.md index 709534adf46e..cb9bc0bb67b3 100644 --- a/docs/source/user-guide/sql/sql_status.md +++ b/docs/source/user-guide/sql/sql_status.md @@ -53,14 +53,14 @@ - [x] `SHOW CREATE TABLE ` - [x] Basic SQL [Information Schema](./information_schema.md) (`TABLES`, `VIEWS`, `COLUMNS`) - [ ] Full SQL [Information Schema](./information_schema.md) support -- [ ] Support for nested types (`ARRAY`/`LIST` and `STRUCT`. See [#2326](https://github.com/apache/arrow-datafusion/issues/2326) for details) +- [ ] Support for nested types (`ARRAY`/`LIST` and `STRUCT`. See [#2326](https://github.com/apache/datafusion/issues/2326) for details) - [x] Read support - [x] Write support - [x] Field access (`col['field']` and [`col[1]`]) - [x] [Array Functions](./scalar_functions.md#array-functions) - [ ] [Struct Functions](./scalar_functions.md#struct-functions) - [x] `struct` - - [ ] [Postgres JSON operators](https://github.com/apache/arrow-datafusion/issues/6631) (`->`, `->>`, etc.) + - [ ] [Postgres JSON operators](https://github.com/apache/datafusion/issues/6631) (`->`, `->>`, etc.) - [x] Subqueries - [x] Common Table Expressions (CTE) - [x] Set Operations (`UNION [ALL]`, `INTERSECT [ALL]`, `EXCEPT[ALL]`) diff --git a/python/README.md b/python/README.md index bdaf62c49b1c..74e431b7d5a1 100644 --- a/python/README.md +++ b/python/README.md @@ -19,4 +19,4 @@ # DataFusion in Python -This directory is now moved to its [dedicated repository](https://github.com/apache/arrow-datafusion-python). +This directory is now moved to its [dedicated repository](https://github.com/apache/datafusion-python).