From 4d389c2590370d85bfe3af77f5243d5b40f5a222 Mon Sep 17 00:00:00 2001 From: Junhao Liu Date: Tue, 30 Jan 2024 13:46:57 -0600 Subject: [PATCH] Remove single_file_output option from FileSinkConfig and Copy statement (#9041) * initial: remove single file config in all related files * judge according to collections * feat: change test cases * fix test cases * test case and doc * format doc using prettier * regen proto files --- datafusion/core/src/dataframe/mod.rs | 2 -- datafusion/core/src/dataframe/parquet.rs | 1 - .../core/src/datasource/file_format/arrow.rs | 1 - .../src/datasource/file_format/parquet.rs | 1 - .../src/datasource/file_format/write/demux.rs | 2 +- .../file_format/write/orchestration.rs | 2 -- .../core/src/datasource/listing/table.rs | 1 - .../core/src/datasource/physical_plan/csv.rs | 4 +-- .../core/src/datasource/physical_plan/json.rs | 4 +-- .../core/src/datasource/physical_plan/mod.rs | 4 --- .../datasource/physical_plan/parquet/mod.rs | 4 +-- datafusion/core/src/physical_planner.rs | 2 -- datafusion/expr/src/logical_plan/builder.rs | 2 -- datafusion/expr/src/logical_plan/dml.rs | 4 --- datafusion/expr/src/logical_plan/plan.rs | 6 ++-- datafusion/proto/proto/datafusion.proto | 2 -- datafusion/proto/src/generated/pbjson.rs | 36 ------------------- datafusion/proto/src/generated/prost.rs | 4 --- datafusion/proto/src/logical_plan/mod.rs | 3 -- .../proto/src/physical_plan/from_proto.rs | 1 - .../proto/src/physical_plan/to_proto.rs | 1 - .../tests/cases/roundtrip_logical_plan.rs | 7 ---- .../tests/cases/roundtrip_physical_plan.rs | 3 -- datafusion/sql/src/statement.rs | 6 ---- datafusion/sql/tests/sql_integration.rs | 6 ++-- datafusion/sqllogictest/test_files/copy.slt | 34 +++++++++--------- .../sqllogictest/test_files/csv_files.slt | 4 +-- .../sqllogictest/test_files/group_by.slt | 8 ++--- .../sqllogictest/test_files/parquet.slt | 8 ++--- .../sqllogictest/test_files/repartition.slt | 2 +- .../test_files/repartition_scan.slt | 8 ++--- .../test_files/schema_evolution.slt | 8 ++--- docs/source/user-guide/sql/dml.md | 2 +- docs/source/user-guide/sql/write_options.md | 12 +++---- 34 files changed, 53 insertions(+), 142 deletions(-) diff --git a/datafusion/core/src/dataframe/mod.rs b/datafusion/core/src/dataframe/mod.rs index 285e761e2e6f..4fd543f0eab8 100644 --- a/datafusion/core/src/dataframe/mod.rs +++ b/datafusion/core/src/dataframe/mod.rs @@ -1075,7 +1075,6 @@ impl DataFrame { self.plan, path.into(), FileType::CSV, - options.single_file_output, copy_options, )? .build()?; @@ -1100,7 +1099,6 @@ impl DataFrame { self.plan, path.into(), FileType::JSON, - options.single_file_output, copy_options, )? .build()?; diff --git a/datafusion/core/src/dataframe/parquet.rs b/datafusion/core/src/dataframe/parquet.rs index 36ef90c987e3..7c0ec1edc2c0 100644 --- a/datafusion/core/src/dataframe/parquet.rs +++ b/datafusion/core/src/dataframe/parquet.rs @@ -53,7 +53,6 @@ impl DataFrame { self.plan, path.into(), FileType::PARQUET, - options.single_file_output, copy_options, )? .build()?; diff --git a/datafusion/core/src/datasource/file_format/arrow.rs b/datafusion/core/src/datasource/file_format/arrow.rs index 650f8c844eda..ead2db5a10c0 100644 --- a/datafusion/core/src/datasource/file_format/arrow.rs +++ b/datafusion/core/src/datasource/file_format/arrow.rs @@ -236,7 +236,6 @@ impl DataSink for ArrowFileSink { part_col, self.config.table_paths[0].clone(), "arrow".into(), - self.config.single_file_output, ); let mut file_write_tasks: JoinSet> = diff --git a/datafusion/core/src/datasource/file_format/parquet.rs b/datafusion/core/src/datasource/file_format/parquet.rs index fdf6277a5ed2..408233469ea2 100644 --- a/datafusion/core/src/datasource/file_format/parquet.rs +++ b/datafusion/core/src/datasource/file_format/parquet.rs @@ -749,7 +749,6 @@ impl DataSink for ParquetSink { part_col, self.config.table_paths[0].clone(), "parquet".into(), - self.config.single_file_output, ); let mut file_write_tasks: JoinSet> = diff --git a/datafusion/core/src/datasource/file_format/write/demux.rs b/datafusion/core/src/datasource/file_format/write/demux.rs index dbfeb67eaeb9..94d915827e4f 100644 --- a/datafusion/core/src/datasource/file_format/write/demux.rs +++ b/datafusion/core/src/datasource/file_format/write/demux.rs @@ -76,10 +76,10 @@ pub(crate) fn start_demuxer_task( partition_by: Option>, base_output_path: ListingTableUrl, file_extension: String, - single_file_output: bool, ) -> (JoinHandle>, DemuxedStreamReceiver) { let (tx, rx) = tokio::sync::mpsc::unbounded_channel(); let context = context.clone(); + let single_file_output = !base_output_path.is_collection(); let task: JoinHandle> = match partition_by { Some(parts) => { // There could be an arbitrarily large number of parallel hive style partitions being written to, so we cannot diff --git a/datafusion/core/src/datasource/file_format/write/orchestration.rs b/datafusion/core/src/datasource/file_format/write/orchestration.rs index 106b4e0d50e5..1a3042cbc00b 100644 --- a/datafusion/core/src/datasource/file_format/write/orchestration.rs +++ b/datafusion/core/src/datasource/file_format/write/orchestration.rs @@ -220,7 +220,6 @@ pub(crate) async fn stateless_multipart_put( .runtime_env() .object_store(&config.object_store_url)?; - let single_file_output = config.single_file_output; let base_output_path = &config.table_paths[0]; let part_cols = if !config.table_partition_cols.is_empty() { Some(config.table_partition_cols.clone()) @@ -234,7 +233,6 @@ pub(crate) async fn stateless_multipart_put( part_cols, base_output_path.clone(), file_extension, - single_file_output, ); let rb_buffer_size = &context diff --git a/datafusion/core/src/datasource/listing/table.rs b/datafusion/core/src/datasource/listing/table.rs index de207b6d9019..8ec16956b6a1 100644 --- a/datafusion/core/src/datasource/listing/table.rs +++ b/datafusion/core/src/datasource/listing/table.rs @@ -776,7 +776,6 @@ impl TableProvider for ListingTable { file_groups, output_schema: self.schema(), table_partition_cols: self.options.table_partition_cols.clone(), - single_file_output: false, overwrite, file_type_writer_options, }; diff --git a/datafusion/core/src/datasource/physical_plan/csv.rs b/datafusion/core/src/datasource/physical_plan/csv.rs index a818c572f7f5..04959c7904a9 100644 --- a/datafusion/core/src/datasource/physical_plan/csv.rs +++ b/datafusion/core/src/datasource/physical_plan/csv.rs @@ -1028,8 +1028,8 @@ mod tests { ctx.runtime_env().register_object_store(&local_url, local); // execute a simple query and write the results to CSV - let out_dir = tmp_dir.as_ref().to_str().unwrap().to_string() + "/out"; - let out_dir_url = "file://local/out"; + let out_dir = tmp_dir.as_ref().to_str().unwrap().to_string() + "/out/"; + let out_dir_url = "file://local/out/"; let df = ctx.sql("SELECT c1, c2 FROM test").await?; df.write_csv(out_dir_url, DataFrameWriteOptions::new(), None) .await?; diff --git a/datafusion/core/src/datasource/physical_plan/json.rs b/datafusion/core/src/datasource/physical_plan/json.rs index a8a371fed91e..c033c4b89891 100644 --- a/datafusion/core/src/datasource/physical_plan/json.rs +++ b/datafusion/core/src/datasource/physical_plan/json.rs @@ -742,8 +742,8 @@ mod tests { ctx.runtime_env().register_object_store(&local_url, local); // execute a simple query and write the results to CSV - let out_dir = tmp_dir.as_ref().to_str().unwrap().to_string() + "/out"; - let out_dir_url = "file://local/out"; + let out_dir = tmp_dir.as_ref().to_str().unwrap().to_string() + "/out/"; + let out_dir_url = "file://local/out/"; let df = ctx.sql("SELECT a, b FROM test").await?; df.write_json(out_dir_url, DataFrameWriteOptions::new()) .await?; diff --git a/datafusion/core/src/datasource/physical_plan/mod.rs b/datafusion/core/src/datasource/physical_plan/mod.rs index 24155d3fd167..11eb9e7867bb 100644 --- a/datafusion/core/src/datasource/physical_plan/mod.rs +++ b/datafusion/core/src/datasource/physical_plan/mod.rs @@ -91,10 +91,6 @@ pub struct FileSinkConfig { /// A vector of column names and their corresponding data types, /// representing the partitioning columns for the file pub table_partition_cols: Vec<(String, DataType)>, - /// If true, it is assumed there is a single table_path which is a file to which all data should be written - /// regardless of input partitioning. Otherwise, each table path is assumed to be a directory - /// to which each output partition is written to its own output file. - pub single_file_output: bool, /// Controls whether existing data should be overwritten by this sink pub overwrite: bool, /// Contains settings specific to writing a given FileType, e.g. parquet max_row_group_size diff --git a/datafusion/core/src/datasource/physical_plan/parquet/mod.rs b/datafusion/core/src/datasource/physical_plan/parquet/mod.rs index 7215cdd60716..171f7cdc59a4 100644 --- a/datafusion/core/src/datasource/physical_plan/parquet/mod.rs +++ b/datafusion/core/src/datasource/physical_plan/parquet/mod.rs @@ -2089,8 +2089,8 @@ mod tests { ctx.runtime_env().register_object_store(&local_url, local); // execute a simple query and write the results to parquet - let out_dir = tmp_dir.as_ref().to_str().unwrap().to_string() + "/out"; - let out_dir_url = "file://local/out"; + let out_dir = tmp_dir.as_ref().to_str().unwrap().to_string() + "/out/"; + let out_dir_url = "file://local/out/"; let df = ctx.sql("SELECT c1, c2 FROM test").await?; df.write_parquet(out_dir_url, DataFrameWriteOptions::new(), None) .await?; diff --git a/datafusion/core/src/physical_planner.rs b/datafusion/core/src/physical_planner.rs index d4ef40493df3..6aa0c93fad0f 100644 --- a/datafusion/core/src/physical_planner.rs +++ b/datafusion/core/src/physical_planner.rs @@ -562,7 +562,6 @@ impl DefaultPhysicalPlanner { input, output_url, file_format, - single_file_output, copy_options, }) => { let input_exec = self.create_initial_plan(input, session_state).await?; @@ -588,7 +587,6 @@ impl DefaultPhysicalPlanner { file_groups: vec![], output_schema: Arc::new(schema), table_partition_cols: vec![], - single_file_output: *single_file_output, overwrite: false, file_type_writer_options }; diff --git a/datafusion/expr/src/logical_plan/builder.rs b/datafusion/expr/src/logical_plan/builder.rs index 6ae5d37f9e26..68499f09d22b 100644 --- a/datafusion/expr/src/logical_plan/builder.rs +++ b/datafusion/expr/src/logical_plan/builder.rs @@ -263,14 +263,12 @@ impl LogicalPlanBuilder { input: LogicalPlan, output_url: String, file_format: FileType, - single_file_output: bool, copy_options: CopyOptions, ) -> Result { Ok(Self::from(LogicalPlan::Copy(CopyTo { input: Arc::new(input), output_url, file_format, - single_file_output, copy_options, }))) } diff --git a/datafusion/expr/src/logical_plan/dml.rs b/datafusion/expr/src/logical_plan/dml.rs index 4cd56b89ac63..794c64998935 100644 --- a/datafusion/expr/src/logical_plan/dml.rs +++ b/datafusion/expr/src/logical_plan/dml.rs @@ -36,10 +36,6 @@ pub struct CopyTo { pub output_url: String, /// The file format to output (explicitly defined or inferred from file extension) pub file_format: FileType, - /// If false, it is assumed output_url is a file to which all data should be written - /// regardless of input partitioning. Otherwise, output_url is assumed to be a directory - /// to which each output partition is written to its own output file - pub single_file_output: bool, /// Arbitrary options as tuples pub copy_options: CopyOptions, } diff --git a/datafusion/expr/src/logical_plan/plan.rs b/datafusion/expr/src/logical_plan/plan.rs index b72dd7f5ec92..b7d75dc0ae80 100644 --- a/datafusion/expr/src/logical_plan/plan.rs +++ b/datafusion/expr/src/logical_plan/plan.rs @@ -615,12 +615,11 @@ impl LogicalPlan { output_url, file_format, copy_options, - single_file_output, }) => Ok(LogicalPlan::Copy(CopyTo { input: Arc::new(inputs.swap_remove(0)), output_url: output_url.clone(), file_format: file_format.clone(), - single_file_output: *single_file_output, + copy_options: copy_options.clone(), })), LogicalPlan::Values(Values { schema, .. }) => { @@ -1551,7 +1550,6 @@ impl LogicalPlan { input: _, output_url, file_format, - single_file_output, copy_options, }) => { let op_str = match copy_options { @@ -1565,7 +1563,7 @@ impl LogicalPlan { CopyOptions::WriterOptions(_) => "".into(), }; - write!(f, "CopyTo: format={file_format} output_url={output_url} single_file_output={single_file_output} options: ({op_str})") + write!(f, "CopyTo: format={file_format} output_url={output_url} options: ({op_str})") } LogicalPlan::Ddl(ddl) => { write!(f, "{}", ddl.display()) diff --git a/datafusion/proto/proto/datafusion.proto b/datafusion/proto/proto/datafusion.proto index 1d5ca5917140..b5fd202f513e 100644 --- a/datafusion/proto/proto/datafusion.proto +++ b/datafusion/proto/proto/datafusion.proto @@ -322,7 +322,6 @@ message DistinctOnNode { message CopyToNode { LogicalPlanNode input = 1; string output_url = 2; - bool single_file_output = 3; oneof CopyOptions { SQLOptions sql_options = 4; FileTypeWriterOptions writer_options = 5; @@ -1267,7 +1266,6 @@ message FileSinkConfig { repeated string table_paths = 3; Schema output_schema = 4; repeated PartitionColumn table_partition_cols = 5; - bool single_file_output = 7; bool overwrite = 8; FileTypeWriterOptions file_type_writer_options = 9; } diff --git a/datafusion/proto/src/generated/pbjson.rs b/datafusion/proto/src/generated/pbjson.rs index 47667fb68c43..599a8e5bb520 100644 --- a/datafusion/proto/src/generated/pbjson.rs +++ b/datafusion/proto/src/generated/pbjson.rs @@ -3792,9 +3792,6 @@ impl serde::Serialize for CopyToNode { if !self.output_url.is_empty() { len += 1; } - if self.single_file_output { - len += 1; - } if !self.file_type.is_empty() { len += 1; } @@ -3808,9 +3805,6 @@ impl serde::Serialize for CopyToNode { if !self.output_url.is_empty() { struct_ser.serialize_field("outputUrl", &self.output_url)?; } - if self.single_file_output { - struct_ser.serialize_field("singleFileOutput", &self.single_file_output)?; - } if !self.file_type.is_empty() { struct_ser.serialize_field("fileType", &self.file_type)?; } @@ -3837,8 +3831,6 @@ impl<'de> serde::Deserialize<'de> for CopyToNode { "input", "output_url", "outputUrl", - "single_file_output", - "singleFileOutput", "file_type", "fileType", "sql_options", @@ -3851,7 +3843,6 @@ impl<'de> serde::Deserialize<'de> for CopyToNode { enum GeneratedField { Input, OutputUrl, - SingleFileOutput, FileType, SqlOptions, WriterOptions, @@ -3878,7 +3869,6 @@ impl<'de> serde::Deserialize<'de> for CopyToNode { match value { "input" => Ok(GeneratedField::Input), "outputUrl" | "output_url" => Ok(GeneratedField::OutputUrl), - "singleFileOutput" | "single_file_output" => Ok(GeneratedField::SingleFileOutput), "fileType" | "file_type" => Ok(GeneratedField::FileType), "sqlOptions" | "sql_options" => Ok(GeneratedField::SqlOptions), "writerOptions" | "writer_options" => Ok(GeneratedField::WriterOptions), @@ -3903,7 +3893,6 @@ impl<'de> serde::Deserialize<'de> for CopyToNode { { let mut input__ = None; let mut output_url__ = None; - let mut single_file_output__ = None; let mut file_type__ = None; let mut copy_options__ = None; while let Some(k) = map_.next_key()? { @@ -3920,12 +3909,6 @@ impl<'de> serde::Deserialize<'de> for CopyToNode { } output_url__ = Some(map_.next_value()?); } - GeneratedField::SingleFileOutput => { - if single_file_output__.is_some() { - return Err(serde::de::Error::duplicate_field("singleFileOutput")); - } - single_file_output__ = Some(map_.next_value()?); - } GeneratedField::FileType => { if file_type__.is_some() { return Err(serde::de::Error::duplicate_field("fileType")); @@ -3951,7 +3934,6 @@ impl<'de> serde::Deserialize<'de> for CopyToNode { Ok(CopyToNode { input: input__, output_url: output_url__.unwrap_or_default(), - single_file_output: single_file_output__.unwrap_or_default(), file_type: file_type__.unwrap_or_default(), copy_options: copy_options__, }) @@ -8208,9 +8190,6 @@ impl serde::Serialize for FileSinkConfig { if !self.table_partition_cols.is_empty() { len += 1; } - if self.single_file_output { - len += 1; - } if self.overwrite { len += 1; } @@ -8233,9 +8212,6 @@ impl serde::Serialize for FileSinkConfig { if !self.table_partition_cols.is_empty() { struct_ser.serialize_field("tablePartitionCols", &self.table_partition_cols)?; } - if self.single_file_output { - struct_ser.serialize_field("singleFileOutput", &self.single_file_output)?; - } if self.overwrite { struct_ser.serialize_field("overwrite", &self.overwrite)?; } @@ -8262,8 +8238,6 @@ impl<'de> serde::Deserialize<'de> for FileSinkConfig { "outputSchema", "table_partition_cols", "tablePartitionCols", - "single_file_output", - "singleFileOutput", "overwrite", "file_type_writer_options", "fileTypeWriterOptions", @@ -8276,7 +8250,6 @@ impl<'de> serde::Deserialize<'de> for FileSinkConfig { TablePaths, OutputSchema, TablePartitionCols, - SingleFileOutput, Overwrite, FileTypeWriterOptions, } @@ -8305,7 +8278,6 @@ impl<'de> serde::Deserialize<'de> for FileSinkConfig { "tablePaths" | "table_paths" => Ok(GeneratedField::TablePaths), "outputSchema" | "output_schema" => Ok(GeneratedField::OutputSchema), "tablePartitionCols" | "table_partition_cols" => Ok(GeneratedField::TablePartitionCols), - "singleFileOutput" | "single_file_output" => Ok(GeneratedField::SingleFileOutput), "overwrite" => Ok(GeneratedField::Overwrite), "fileTypeWriterOptions" | "file_type_writer_options" => Ok(GeneratedField::FileTypeWriterOptions), _ => Err(serde::de::Error::unknown_field(value, FIELDS)), @@ -8332,7 +8304,6 @@ impl<'de> serde::Deserialize<'de> for FileSinkConfig { let mut table_paths__ = None; let mut output_schema__ = None; let mut table_partition_cols__ = None; - let mut single_file_output__ = None; let mut overwrite__ = None; let mut file_type_writer_options__ = None; while let Some(k) = map_.next_key()? { @@ -8367,12 +8338,6 @@ impl<'de> serde::Deserialize<'de> for FileSinkConfig { } table_partition_cols__ = Some(map_.next_value()?); } - GeneratedField::SingleFileOutput => { - if single_file_output__.is_some() { - return Err(serde::de::Error::duplicate_field("singleFileOutput")); - } - single_file_output__ = Some(map_.next_value()?); - } GeneratedField::Overwrite => { if overwrite__.is_some() { return Err(serde::de::Error::duplicate_field("overwrite")); @@ -8393,7 +8358,6 @@ impl<'de> serde::Deserialize<'de> for FileSinkConfig { table_paths: table_paths__.unwrap_or_default(), output_schema: output_schema__, table_partition_cols: table_partition_cols__.unwrap_or_default(), - single_file_output: single_file_output__.unwrap_or_default(), overwrite: overwrite__.unwrap_or_default(), file_type_writer_options: file_type_writer_options__, }) diff --git a/datafusion/proto/src/generated/prost.rs b/datafusion/proto/src/generated/prost.rs index 485dbd48b8c7..83d704ac142d 100644 --- a/datafusion/proto/src/generated/prost.rs +++ b/datafusion/proto/src/generated/prost.rs @@ -509,8 +509,6 @@ pub struct CopyToNode { pub input: ::core::option::Option<::prost::alloc::boxed::Box>, #[prost(string, tag = "2")] pub output_url: ::prost::alloc::string::String, - #[prost(bool, tag = "3")] - pub single_file_output: bool, #[prost(string, tag = "6")] pub file_type: ::prost::alloc::string::String, #[prost(oneof = "copy_to_node::CopyOptions", tags = "4, 5")] @@ -1740,8 +1738,6 @@ pub struct FileSinkConfig { pub output_schema: ::core::option::Option, #[prost(message, repeated, tag = "5")] pub table_partition_cols: ::prost::alloc::vec::Vec, - #[prost(bool, tag = "7")] - pub single_file_output: bool, #[prost(bool, tag = "8")] pub overwrite: bool, #[prost(message, optional, tag = "9")] diff --git a/datafusion/proto/src/logical_plan/mod.rs b/datafusion/proto/src/logical_plan/mod.rs index d95d69780301..7a6dab85de34 100644 --- a/datafusion/proto/src/logical_plan/mod.rs +++ b/datafusion/proto/src/logical_plan/mod.rs @@ -918,7 +918,6 @@ impl AsLogicalPlan for LogicalPlanNode { input: Arc::new(input), output_url: copy.output_url.clone(), file_format: FileType::from_str(©.file_type)?, - single_file_output: copy.single_file_output, copy_options, }, )) @@ -1640,7 +1639,6 @@ impl AsLogicalPlan for LogicalPlanNode { LogicalPlan::Copy(dml::CopyTo { input, output_url, - single_file_output, file_format, copy_options, }) => { @@ -1723,7 +1721,6 @@ impl AsLogicalPlan for LogicalPlanNode { logical_plan_type: Some(LogicalPlanType::CopyTo(Box::new( protobuf::CopyToNode { input: Some(Box::new(input)), - single_file_output: *single_file_output, output_url: output_url.to_string(), file_type: file_format.to_string(), copy_options: copy_options_proto, diff --git a/datafusion/proto/src/physical_plan/from_proto.rs b/datafusion/proto/src/physical_plan/from_proto.rs index 454f74dfd132..2f0b2c9e08b6 100644 --- a/datafusion/proto/src/physical_plan/from_proto.rs +++ b/datafusion/proto/src/physical_plan/from_proto.rs @@ -801,7 +801,6 @@ impl TryFrom<&protobuf::FileSinkConfig> for FileSinkConfig { table_paths, output_schema: Arc::new(convert_required!(conf.output_schema)?), table_partition_cols, - single_file_output: conf.single_file_output, overwrite: conf.overwrite, file_type_writer_options: convert_required!(conf.file_type_writer_options)?, }) diff --git a/datafusion/proto/src/physical_plan/to_proto.rs b/datafusion/proto/src/physical_plan/to_proto.rs index a67410da57f4..96d43e7e08ca 100644 --- a/datafusion/proto/src/physical_plan/to_proto.rs +++ b/datafusion/proto/src/physical_plan/to_proto.rs @@ -878,7 +878,6 @@ impl TryFrom<&FileSinkConfig> for protobuf::FileSinkConfig { table_paths, output_schema: Some(conf.output_schema.as_ref().try_into()?), table_partition_cols, - single_file_output: conf.single_file_output, overwrite: conf.overwrite, file_type_writer_options: Some(file_type_writer_options.try_into()?), }) diff --git a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs index 17d47a65d8d1..1d935ebcd383 100644 --- a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs +++ b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs @@ -323,7 +323,6 @@ async fn roundtrip_logical_plan_copy_to_sql_options() -> Result<()> { input: Arc::new(input), output_url: "test.csv".to_string(), file_format: FileType::CSV, - single_file_output: true, copy_options: CopyOptions::SQLOptions(StatementOptions::from(&options)), }); @@ -354,7 +353,6 @@ async fn roundtrip_logical_plan_copy_to_writer_options() -> Result<()> { input: Arc::new(input), output_url: "test.parquet".to_string(), file_format: FileType::PARQUET, - single_file_output: true, copy_options: CopyOptions::WriterOptions(Box::new( FileTypeWriterOptions::Parquet(ParquetWriterOptions::new(writer_properties)), )), @@ -368,7 +366,6 @@ async fn roundtrip_logical_plan_copy_to_writer_options() -> Result<()> { LogicalPlan::Copy(copy_to) => { assert_eq!("test.parquet", copy_to.output_url); assert_eq!(FileType::PARQUET, copy_to.file_format); - assert!(copy_to.single_file_output); match ©_to.copy_options { CopyOptions::WriterOptions(y) => match y.as_ref() { FileTypeWriterOptions::Parquet(p) => { @@ -404,7 +401,6 @@ async fn roundtrip_logical_plan_copy_to_arrow() -> Result<()> { input: Arc::new(input), output_url: "test.arrow".to_string(), file_format: FileType::ARROW, - single_file_output: true, copy_options: CopyOptions::WriterOptions(Box::new(FileTypeWriterOptions::Arrow( ArrowWriterOptions::new(), ))), @@ -418,7 +414,6 @@ async fn roundtrip_logical_plan_copy_to_arrow() -> Result<()> { LogicalPlan::Copy(copy_to) => { assert_eq!("test.arrow", copy_to.output_url); assert_eq!(FileType::ARROW, copy_to.file_format); - assert!(copy_to.single_file_output); match ©_to.copy_options { CopyOptions::WriterOptions(y) => match y.as_ref() { FileTypeWriterOptions::Arrow(_) => {} @@ -451,7 +446,6 @@ async fn roundtrip_logical_plan_copy_to_csv() -> Result<()> { input: Arc::new(input), output_url: "test.csv".to_string(), file_format: FileType::CSV, - single_file_output: true, copy_options: CopyOptions::WriterOptions(Box::new(FileTypeWriterOptions::CSV( CsvWriterOptions::new( writer_properties, @@ -468,7 +462,6 @@ async fn roundtrip_logical_plan_copy_to_csv() -> Result<()> { LogicalPlan::Copy(copy_to) => { assert_eq!("test.csv", copy_to.output_url); assert_eq!(FileType::CSV, copy_to.file_format); - assert!(copy_to.single_file_output); match ©_to.copy_options { CopyOptions::WriterOptions(y) => match y.as_ref() { FileTypeWriterOptions::CSV(p) => { diff --git a/datafusion/proto/tests/cases/roundtrip_physical_plan.rs b/datafusion/proto/tests/cases/roundtrip_physical_plan.rs index f2f1b0ea0d86..d72dc152f14d 100644 --- a/datafusion/proto/tests/cases/roundtrip_physical_plan.rs +++ b/datafusion/proto/tests/cases/roundtrip_physical_plan.rs @@ -792,7 +792,6 @@ fn roundtrip_json_sink() -> Result<()> { table_paths: vec![ListingTableUrl::parse("file:///")?], output_schema: schema.clone(), table_partition_cols: vec![("plan_type".to_string(), DataType::Utf8)], - single_file_output: true, overwrite: true, file_type_writer_options: FileTypeWriterOptions::JSON(JsonWriterOptions::new( CompressionTypeVariant::UNCOMPRESSED, @@ -828,7 +827,6 @@ fn roundtrip_csv_sink() -> Result<()> { table_paths: vec![ListingTableUrl::parse("file:///")?], output_schema: schema.clone(), table_partition_cols: vec![("plan_type".to_string(), DataType::Utf8)], - single_file_output: true, overwrite: true, file_type_writer_options: FileTypeWriterOptions::CSV(CsvWriterOptions::new( WriterBuilder::default(), @@ -887,7 +885,6 @@ fn roundtrip_parquet_sink() -> Result<()> { table_paths: vec![ListingTableUrl::parse("file:///")?], output_schema: schema.clone(), table_partition_cols: vec![("plan_type".to_string(), DataType::Utf8)], - single_file_output: true, overwrite: true, file_type_writer_options: FileTypeWriterOptions::Parquet( ParquetWriterOptions::new(WriterProperties::default()), diff --git a/datafusion/sql/src/statement.rs b/datafusion/sql/src/statement.rs index 84a394f324cf..47eca70ef3e2 100644 --- a/datafusion/sql/src/statement.rs +++ b/datafusion/sql/src/statement.rs @@ -718,11 +718,6 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { let mut statement_options = StatementOptions::new(options); let file_format = statement_options.try_infer_file_type(&statement.target)?; - let single_file_output = - statement_options.take_bool_option("single_file_output")?; - - // COPY defaults to outputting a single file if not otherwise specified - let single_file_output = single_file_output.unwrap_or(true); let copy_options = CopyOptions::SQLOptions(statement_options); @@ -730,7 +725,6 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { input: Arc::new(input), output_url: statement.target, file_format, - single_file_output, copy_options, })) } diff --git a/datafusion/sql/tests/sql_integration.rs b/datafusion/sql/tests/sql_integration.rs index c88e2d1130ed..9e6d46a7b3ee 100644 --- a/datafusion/sql/tests/sql_integration.rs +++ b/datafusion/sql/tests/sql_integration.rs @@ -386,7 +386,7 @@ fn plan_rollback_transaction_chained() { fn plan_copy_to() { let sql = "COPY test_decimal to 'output.csv'"; let plan = r#" -CopyTo: format=csv output_url=output.csv single_file_output=true options: () +CopyTo: format=csv output_url=output.csv options: () TableScan: test_decimal "# .trim(); @@ -398,7 +398,7 @@ fn plan_explain_copy_to() { let sql = "EXPLAIN COPY test_decimal to 'output.csv'"; let plan = r#" Explain - CopyTo: format=csv output_url=output.csv single_file_output=true options: () + CopyTo: format=csv output_url=output.csv options: () TableScan: test_decimal "# .trim(); @@ -409,7 +409,7 @@ Explain fn plan_copy_to_query() { let sql = "COPY (select * from test_decimal limit 10) to 'output.csv'"; let plan = r#" -CopyTo: format=csv output_url=output.csv single_file_output=true options: () +CopyTo: format=csv output_url=output.csv options: () Limit: skip=0, fetch=10 Projection: test_decimal.id, test_decimal.price TableScan: test_decimal diff --git a/datafusion/sqllogictest/test_files/copy.slt b/datafusion/sqllogictest/test_files/copy.slt index c9b3bdfa338b..dd2ce16a632e 100644 --- a/datafusion/sqllogictest/test_files/copy.slt +++ b/datafusion/sqllogictest/test_files/copy.slt @@ -21,15 +21,15 @@ create table source_table(col1 integer, col2 varchar) as values (1, 'Foo'), (2, # Copy to directory as multiple files query IT -COPY source_table TO 'test_files/scratch/copy/table' (format parquet, single_file_output false, compression 'zstd(10)'); +COPY source_table TO 'test_files/scratch/copy/table/' (format parquet, compression 'zstd(10)'); ---- 2 query TT -EXPLAIN COPY source_table TO 'test_files/scratch/copy/table' (format parquet, single_file_output false, compression 'zstd(10)'); +EXPLAIN COPY source_table TO 'test_files/scratch/copy/table/' (format parquet, compression 'zstd(10)'); ---- logical_plan -CopyTo: format=parquet output_url=test_files/scratch/copy/table single_file_output=false options: (compression 'zstd(10)') +CopyTo: format=parquet output_url=test_files/scratch/copy/table/ options: (compression 'zstd(10)') --TableScan: source_table projection=[col1, col2] physical_plan FileSinkExec: sink=ParquetSink(file_groups=[]) @@ -37,16 +37,16 @@ FileSinkExec: sink=ParquetSink(file_groups=[]) # Error case query error DataFusion error: Invalid or Unsupported Configuration: Format not explicitly set and unable to get file extension! -EXPLAIN COPY source_table to 'test_files/scratch/copy/table' +EXPLAIN COPY source_table to 'test_files/scratch/copy/table/' query error DataFusion error: SQL error: ParserError\("Expected end of statement, found: query"\) -EXPLAIN COPY source_table to 'test_files/scratch/copy/table' (format parquet, single_file_output false) +EXPLAIN COPY source_table to 'test_files/scratch/copy/table/' (format parquet) query TT -EXPLAIN COPY source_table to 'test_files/scratch/copy/table' (format parquet, per_thread_output true) +EXPLAIN COPY source_table to 'test_files/scratch/copy/table/' (format parquet, per_thread_output true) # Copy more files to directory via query query IT -COPY (select * from source_table UNION ALL select * from source_table) to 'test_files/scratch/copy/table' (format parquet, single_file_output false); +COPY (select * from source_table UNION ALL select * from source_table) to 'test_files/scratch/copy/table/' (format parquet); ---- 4 @@ -67,7 +67,7 @@ select * from validate_parquet; query ? copy (values (struct(timestamp '2021-01-01 01:00:01', 1)), (struct(timestamp '2022-01-01 01:00:01', 2)), (struct(timestamp '2023-01-03 01:00:01', 3)), (struct(timestamp '2024-01-01 01:00:01', 4))) -to 'test_files/scratch/copy/table_nested2' (format parquet, single_file_output false); +to 'test_files/scratch/copy/table_nested2/' (format parquet); ---- 4 @@ -86,7 +86,7 @@ query ?? COPY (values (struct ('foo', (struct ('foo', make_array(struct('a',1), struct('b',2))))), make_array(timestamp '2023-01-01 01:00:01',timestamp '2023-01-01 01:00:01')), (struct('bar', (struct ('foo', make_array(struct('aa',10), struct('bb',20))))), make_array(timestamp '2024-01-01 01:00:01', timestamp '2024-01-01 01:00:01'))) -to 'test_files/scratch/copy/table_nested' (format parquet, single_file_output false); +to 'test_files/scratch/copy/table_nested/' (format parquet); ---- 2 @@ -103,7 +103,7 @@ select * from validate_parquet_nested; query ? copy (values ([struct('foo', 1), struct('bar', 2)])) to 'test_files/scratch/copy/array_of_struct/' -(format parquet, single_file_output false); +(format parquet); ---- 1 @@ -119,7 +119,7 @@ select * from validate_array_of_struct; query ? copy (values (struct('foo', [1,2,3], struct('bar', [2,3,4])))) to 'test_files/scratch/copy/struct_with_array/' -(format parquet, single_file_output false); +(format parquet); ---- 1 @@ -136,9 +136,8 @@ select * from validate_struct_with_array; # Copy parquet with all supported statment overrides query IT COPY source_table -TO 'test_files/scratch/copy/table_with_options' +TO 'test_files/scratch/copy/table_with_options/' (format parquet, -single_file_output false, compression snappy, 'compression::col1' 'zstd(5)', 'compression::col2' snappy, @@ -195,7 +194,7 @@ select * from validate_parquet_single; # copy from table to folder of compressed json files query IT -COPY source_table to 'test_files/scratch/copy/table_json_gz' (format json, single_file_output false, compression 'gzip'); +COPY source_table to 'test_files/scratch/copy/table_json_gz' (format json, compression 'gzip'); ---- 2 @@ -211,7 +210,7 @@ select * from validate_json_gz; # copy from table to folder of compressed csv files query IT -COPY source_table to 'test_files/scratch/copy/table_csv' (format csv, single_file_output false, header false, compression 'gzip'); +COPY source_table to 'test_files/scratch/copy/table_csv' (format csv, header false, compression 'gzip'); ---- 2 @@ -243,7 +242,7 @@ select * from validate_single_csv; # Copy from table to folder of json query IT -COPY source_table to 'test_files/scratch/copy/table_json' (format json, single_file_output false); +COPY source_table to 'test_files/scratch/copy/table_json' (format json); ---- 2 @@ -278,7 +277,6 @@ query IT COPY source_table to 'test_files/scratch/copy/table_csv_with_options' (format csv, -single_file_output false, header false, compression 'uncompressed', datetime_format '%FT%H:%M:%S.%9f', @@ -340,7 +338,7 @@ d bar # Copy from table to folder of json query IT -COPY source_table to 'test_files/scratch/copy/table_arrow' (format arrow, single_file_output false); +COPY source_table to 'test_files/scratch/copy/table_arrow' (format arrow); ---- 2 diff --git a/datafusion/sqllogictest/test_files/csv_files.slt b/datafusion/sqllogictest/test_files/csv_files.slt index 5393083e6c53..0e8171b5a870 100644 --- a/datafusion/sqllogictest/test_files/csv_files.slt +++ b/datafusion/sqllogictest/test_files/csv_files.slt @@ -92,14 +92,14 @@ CREATE TABLE src_table_2 ( query ITII COPY src_table_1 TO 'test_files/scratch/csv_files/csv_partitions/1.csv' -(FORMAT CSV, SINGLE_FILE_OUTPUT true); +(FORMAT CSV); ---- 4 query ITII COPY src_table_2 TO 'test_files/scratch/csv_files/csv_partitions/2.csv' -(FORMAT CSV, SINGLE_FILE_OUTPUT true); +(FORMAT CSV); ---- 4 diff --git a/datafusion/sqllogictest/test_files/group_by.slt b/datafusion/sqllogictest/test_files/group_by.slt index c75929049c18..decf40cfa1b8 100644 --- a/datafusion/sqllogictest/test_files/group_by.slt +++ b/datafusion/sqllogictest/test_files/group_by.slt @@ -4306,28 +4306,28 @@ CREATE TABLE src_table ( query PI COPY (SELECT * FROM src_table) TO 'test_files/scratch/group_by/timestamp_table/0.csv' -(FORMAT CSV, SINGLE_FILE_OUTPUT true); +(FORMAT CSV); ---- 10 query PI COPY (SELECT * FROM src_table) TO 'test_files/scratch/group_by/timestamp_table/1.csv' -(FORMAT CSV, SINGLE_FILE_OUTPUT true); +(FORMAT CSV); ---- 10 query PI COPY (SELECT * FROM src_table) TO 'test_files/scratch/group_by/timestamp_table/2.csv' -(FORMAT CSV, SINGLE_FILE_OUTPUT true); +(FORMAT CSV); ---- 10 query PI COPY (SELECT * FROM src_table) TO 'test_files/scratch/group_by/timestamp_table/3.csv' -(FORMAT CSV, SINGLE_FILE_OUTPUT true); +(FORMAT CSV); ---- 10 diff --git a/datafusion/sqllogictest/test_files/parquet.slt b/datafusion/sqllogictest/test_files/parquet.slt index 0f26c14f0017..b7cd1243cb0f 100644 --- a/datafusion/sqllogictest/test_files/parquet.slt +++ b/datafusion/sqllogictest/test_files/parquet.slt @@ -45,7 +45,7 @@ CREATE TABLE src_table ( query ITID COPY (SELECT * FROM src_table LIMIT 3) TO 'test_files/scratch/parquet/test_table/0.parquet' -(FORMAT PARQUET, SINGLE_FILE_OUTPUT true); +(FORMAT PARQUET); ---- 3 @@ -53,7 +53,7 @@ TO 'test_files/scratch/parquet/test_table/0.parquet' query ITID COPY (SELECT * FROM src_table WHERE int_col > 3 LIMIT 3) TO 'test_files/scratch/parquet/test_table/1.parquet' -(FORMAT PARQUET, SINGLE_FILE_OUTPUT true); +(FORMAT PARQUET); ---- 3 @@ -128,7 +128,7 @@ SortPreservingMergeExec: [string_col@1 ASC NULLS LAST,int_col@0 ASC NULLS LAST] query ITID COPY (SELECT * FROM src_table WHERE int_col > 6 LIMIT 3) TO 'test_files/scratch/parquet/test_table/2.parquet' -(FORMAT PARQUET, SINGLE_FILE_OUTPUT true); +(FORMAT PARQUET); ---- 3 @@ -281,7 +281,7 @@ LIMIT 10; query ITID COPY (SELECT * FROM src_table WHERE int_col > 6 LIMIT 3) TO 'test_files/scratch/parquet/test_table/subdir/3.parquet' -(FORMAT PARQUET, SINGLE_FILE_OUTPUT true); +(FORMAT PARQUET); ---- 3 diff --git a/datafusion/sqllogictest/test_files/repartition.slt b/datafusion/sqllogictest/test_files/repartition.slt index 7c141adf82b1..391a6739b060 100644 --- a/datafusion/sqllogictest/test_files/repartition.slt +++ b/datafusion/sqllogictest/test_files/repartition.slt @@ -25,7 +25,7 @@ set datafusion.execution.target_partitions = 4; statement ok COPY (VALUES (1, 2), (2, 5), (3, 2), (4, 5), (5, 0)) TO 'test_files/scratch/repartition/parquet_table/2.parquet' -(FORMAT PARQUET, SINGLE_FILE_OUTPUT true); +(FORMAT PARQUET); statement ok CREATE EXTERNAL TABLE parquet_table(column1 int, column2 int) diff --git a/datafusion/sqllogictest/test_files/repartition_scan.slt b/datafusion/sqllogictest/test_files/repartition_scan.slt index 08ad5e711ff6..9b4b449340b0 100644 --- a/datafusion/sqllogictest/test_files/repartition_scan.slt +++ b/datafusion/sqllogictest/test_files/repartition_scan.slt @@ -35,7 +35,7 @@ set datafusion.optimizer.repartition_file_min_size = 1; # Note filename 2.parquet to test sorting (on local file systems it is often listed before 1.parquet) statement ok COPY (VALUES (1), (2), (3), (4), (5)) TO 'test_files/scratch/repartition_scan/parquet_table/2.parquet' -(FORMAT PARQUET, SINGLE_FILE_OUTPUT true); +(FORMAT PARQUET); statement ok CREATE EXTERNAL TABLE parquet_table(column1 int) @@ -86,7 +86,7 @@ set datafusion.optimizer.enable_round_robin_repartition = true; # create a second parquet file statement ok COPY (VALUES (100), (200)) TO 'test_files/scratch/repartition_scan/parquet_table/1.parquet' -(FORMAT PARQUET, SINGLE_FILE_OUTPUT true); +(FORMAT PARQUET); ## Still expect to see the scan read the file as "4" groups with even sizes. One group should read ## parts of both files. @@ -158,7 +158,7 @@ DROP TABLE parquet_table_with_order; # create a single csv file statement ok COPY (VALUES (1), (2), (3), (4), (5)) TO 'test_files/scratch/repartition_scan/csv_table/1.csv' -(FORMAT csv, SINGLE_FILE_OUTPUT true, HEADER true); +(FORMAT csv, HEADER true); statement ok CREATE EXTERNAL TABLE csv_table(column1 int) @@ -202,7 +202,7 @@ DROP TABLE csv_table; # create a single json file statement ok COPY (VALUES (1), (2), (3), (4), (5)) TO 'test_files/scratch/repartition_scan/json_table/1.json' -(FORMAT json, SINGLE_FILE_OUTPUT true); +(FORMAT json); statement ok CREATE EXTERNAL TABLE json_table (column1 int) diff --git a/datafusion/sqllogictest/test_files/schema_evolution.slt b/datafusion/sqllogictest/test_files/schema_evolution.slt index 36d54159e24d..aee0e97edc1e 100644 --- a/datafusion/sqllogictest/test_files/schema_evolution.slt +++ b/datafusion/sqllogictest/test_files/schema_evolution.slt @@ -31,7 +31,7 @@ COPY ( SELECT column1 as a, column2 as b FROM ( VALUES ('foo', 1), ('foo', 2), ('foo', 3) ) ) TO 'test_files/scratch/schema_evolution/parquet_table/1.parquet' -(FORMAT PARQUET, SINGLE_FILE_OUTPUT true); +(FORMAT PARQUET); # File2 has only b @@ -40,7 +40,7 @@ COPY ( SELECT column1 as b FROM ( VALUES (10) ) ) TO 'test_files/scratch/schema_evolution/parquet_table/2.parquet' -(FORMAT PARQUET, SINGLE_FILE_OUTPUT true); +(FORMAT PARQUET); # File3 has a column from 'z' which does not appear in the table # but also values from a which do appear in the table @@ -49,7 +49,7 @@ COPY ( SELECT column1 as z, column2 as a FROM ( VALUES ('bar', 'foo'), ('blarg', 'foo') ) ) TO 'test_files/scratch/schema_evolution/parquet_table/3.parquet' -(FORMAT PARQUET, SINGLE_FILE_OUTPUT true); +(FORMAT PARQUET); # File4 has data for b and a (reversed) and d statement ok @@ -57,7 +57,7 @@ COPY ( SELECT column1 as b, column2 as a, column3 as c FROM ( VALUES (100, 'foo', 10.5), (200, 'foo', 12.6), (300, 'bzz', 13.7) ) ) TO 'test_files/scratch/schema_evolution/parquet_table/4.parquet' -(FORMAT PARQUET, SINGLE_FILE_OUTPUT true); +(FORMAT PARQUET); # The logical distribution of `a`, `b` and `c` in the files is like this: # diff --git a/docs/source/user-guide/sql/dml.md b/docs/source/user-guide/sql/dml.md index c3226936e7ac..79b1d6625e8f 100644 --- a/docs/source/user-guide/sql/dml.md +++ b/docs/source/user-guide/sql/dml.md @@ -49,7 +49,7 @@ Copy the contents of `source_table` to one or more Parquet formatted files in the `dir_name` directory: ```sql -> COPY source_table TO 'dir_name' (FORMAT parquet, SINGLE_FILE_OUTPUT false); +> COPY source_table TO 'dir_name' (FORMAT parquet); +-------+ | count | +-------+ diff --git a/docs/source/user-guide/sql/write_options.md b/docs/source/user-guide/sql/write_options.md index 75aa0d77b95c..150da7c53d1f 100644 --- a/docs/source/user-guide/sql/write_options.md +++ b/docs/source/user-guide/sql/write_options.md @@ -46,7 +46,7 @@ NULL_VALUE 'NAN' ); ``` -When running `INSERT INTO my_table ...`, the options from the `CREATE TABLE` will be respected (gzip compression, special delimiter, and header row included). Note that compression, header, and delimiter settings can also be specified within the `OPTIONS` tuple list. Dedicated syntax within the SQL statement always takes precedence over arbitrary option tuples, so if both are specified the `OPTIONS` setting will be ignored. NULL_VALUE is a CSV format specific option that determines how null values should be encoded within the CSV file. +When running `INSERT INTO my_table ...`, the options from the `CREATE TABLE` will be respected (gzip compression, special delimiter, and header row included). There will be a single output file if the output path doesn't have folder format, i.e. ending with a `\`. Note that compression, header, and delimiter settings can also be specified within the `OPTIONS` tuple list. Dedicated syntax within the SQL statement always takes precedence over arbitrary option tuples, so if both are specified the `OPTIONS` setting will be ignored. NULL_VALUE is a CSV format specific option that determines how null values should be encoded within the CSV file. Finally, options can be passed when running a `COPY` command. @@ -54,13 +54,12 @@ Finally, options can be passed when running a `COPY` command. COPY source_table TO 'test/table_with_options' (format parquet, -single_file_output false, compression snappy, 'compression::col1' 'zstd(5)', ) ``` -In this example, we write the entirety of `source_table` out to a folder of parquet files. The option `single_file_output` set to false, indicates that the destination path should be interpreted as a folder to which the query will output multiple files. One parquet file will be written in parallel to the folder for each partition in the query. The next option `compression` set to `snappy` indicates that unless otherwise specified all columns should use the snappy compression codec. The option `compression::col1` sets an override, so that the column `col1` in the parquet file will use `ZSTD` compression codec with compression level `5`. In general, parquet options which support column specific settings can be specified with the syntax `OPTION::COLUMN.NESTED.PATH`. +In this example, we write the entirety of `source_table` out to a folder of parquet files. One parquet file will be written in parallel to the folder for each partition in the query. The next option `compression` set to `snappy` indicates that unless otherwise specified all columns should use the snappy compression codec. The option `compression::col1` sets an override, so that the column `col1` in the parquet file will use `ZSTD` compression codec with compression level `5`. In general, parquet options which support column specific settings can be specified with the syntax `OPTION::COLUMN.NESTED.PATH`. ## Available Options @@ -68,10 +67,9 @@ In this example, we write the entirety of `source_table` out to a folder of parq The following special options are specific to the `COPY` command. -| Option | Description | Default Value | -| ------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------- | -| SINGLE_FILE_OUTPUT | If true, COPY query will write output to a single file. Otherwise, multiple files will be written to a directory in parallel. | true | -| FORMAT | Specifies the file format COPY query will write out. If single_file_output is false or the format cannot be inferred from the file extension, then FORMAT must be specified. | N/A | +| Option | Description | Default Value | +| ------ | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------- | +| FORMAT | Specifies the file format COPY query will write out. If there're more than one output file or the format cannot be inferred from the file extension, then FORMAT must be specified. | N/A | ### JSON Format Specific Options