From 23d7b6fce2dd0d273d38fbd597f2f84c76bc51dd Mon Sep 17 00:00:00 2001 From: Kyligence Git Date: Fri, 6 Sep 2024 11:53:44 -0500 Subject: [PATCH] [GLUTEN-1632][CH]Daily Update Clickhouse Version (20240906) (#7137) * [GLUTEN-1632][CH]Daily Update Clickhouse Version (20240906) * Fix build due to https://github.com/ClickHouse/ClickHouse/pull/65832 * Fix UT due to https://github.com/ClickHouse/ClickHouse/pull/65832 * Fix conflict with https://github.com/apache/incubator-gluten/pull/7122 * Fix conflict with https://github.com/apache/incubator-gluten/pull/7029 * Run GlutenClickHouseMergeTreeCacheDataSSuite locally --------- Co-authored-by: kyligence-git Co-authored-by: Chang Chen --- ...enClickHouseMergeTreeCacheDataSSuite.scala | 1 + cpp-ch/clickhouse.version | 4 ++-- .../Parser/MergeTreeRelParser.cpp | 3 +-- .../Storages/Cache/CacheManager.cpp | 3 +-- .../Storages/MergeTree/SparkMergeTreeMeta.cpp | 1 - .../MergeTree/SparkStorageMergeTree.h | 20 ++++++++++++++++++- 6 files changed, 24 insertions(+), 8 deletions(-) diff --git a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseMergeTreeCacheDataSSuite.scala b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseMergeTreeCacheDataSSuite.scala index 960c92178993..a55067185e68 100644 --- a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseMergeTreeCacheDataSSuite.scala +++ b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseMergeTreeCacheDataSSuite.scala @@ -58,6 +58,7 @@ class GlutenClickHouseMergeTreeCacheDataSSuite .set( "spark.gluten.sql.columnar.backend.ch.runtime_settings.mergetree.merge_after_insert", "false") + // .set("spark.gluten.sql.columnar.backend.ch.runtime_config.path", "/data") // for local test } override protected def beforeEach(): Unit = { diff --git a/cpp-ch/clickhouse.version b/cpp-ch/clickhouse.version index 93bf97d78cee..0b81f8afe85d 100644 --- a/cpp-ch/clickhouse.version +++ b/cpp-ch/clickhouse.version @@ -1,3 +1,3 @@ CH_ORG=Kyligence -CH_BRANCH=rebase_ch/20240830 -CH_COMMIT=5e2eaab52ac +CH_BRANCH=rebase_ch/20240906 +CH_COMMIT=4513a954b95 \ No newline at end of file diff --git a/cpp-ch/local-engine/Parser/MergeTreeRelParser.cpp b/cpp-ch/local-engine/Parser/MergeTreeRelParser.cpp index b85ed25e15dc..20340a09907c 100644 --- a/cpp-ch/local-engine/Parser/MergeTreeRelParser.cpp +++ b/cpp-ch/local-engine/Parser/MergeTreeRelParser.cpp @@ -96,8 +96,7 @@ DB::QueryPlanPtr MergeTreeRelParser::parseReadRel( auto read_step = storage->reader.readFromParts( selected_parts, - /* alter_conversions = */ - {}, + storage->getMutationsSnapshot({}), names_and_types_list.getNames(), storage_snapshot, *query_info, diff --git a/cpp-ch/local-engine/Storages/Cache/CacheManager.cpp b/cpp-ch/local-engine/Storages/Cache/CacheManager.cpp index e74a74ae0fa6..a2943f0b72d4 100644 --- a/cpp-ch/local-engine/Storages/Cache/CacheManager.cpp +++ b/cpp-ch/local-engine/Storages/Cache/CacheManager.cpp @@ -101,8 +101,7 @@ Task CacheManager::cachePart(const MergeTreeTableInstance & table, const MergeTr = StorageMergeTreeFactory::getDataPartsByNames(storage->getStorageID(), "", {job_detail.table.parts.front().name}); auto read_step = storage->reader.readFromParts( selected_parts, - /* alter_conversions = */ - {}, + storage->getMutationsSnapshot({}), names_and_types_list.getNames(), storage_snapshot, *query_info, diff --git a/cpp-ch/local-engine/Storages/MergeTree/SparkMergeTreeMeta.cpp b/cpp-ch/local-engine/Storages/MergeTree/SparkMergeTreeMeta.cpp index 788b1b39fc20..2921fc887f32 100644 --- a/cpp-ch/local-engine/Storages/MergeTree/SparkMergeTreeMeta.cpp +++ b/cpp-ch/local-engine/Storages/MergeTree/SparkMergeTreeMeta.cpp @@ -286,7 +286,6 @@ RangesInDataParts MergeTreeTableInstance::extractRange(DataPartsVector parts_vec ranges_in_data_part.data_part = name_index.at(part.name); ranges_in_data_part.part_index_in_query = 0; ranges_in_data_part.ranges.emplace_back(MarkRange(part.begin, part.end)); - ranges_in_data_part.alter_conversions = std::make_shared(); return ranges_in_data_part; }); return ranges_in_data_parts; diff --git a/cpp-ch/local-engine/Storages/MergeTree/SparkStorageMergeTree.h b/cpp-ch/local-engine/Storages/MergeTree/SparkStorageMergeTree.h index fbd19de643ea..7d51a731b77c 100644 --- a/cpp-ch/local-engine/Storages/MergeTree/SparkStorageMergeTree.h +++ b/cpp-ch/local-engine/Storages/MergeTree/SparkStorageMergeTree.h @@ -50,6 +50,19 @@ class SparkStorageMergeTree : public MergeTreeData { friend class MergeSparkMergeTreeTask; + struct SparkMutationsSnapshot : public IMutationsSnapshot + { + SparkMutationsSnapshot() = default; + + MutationCommands getAlterMutationCommandsForPart(const MergeTreeData::DataPartPtr & part) const override { return {}; } + std::shared_ptr cloneEmpty() const override + { + return std::make_shared(); + } + + NameSet getAllUpdatedColumns() const override { return {}; } + }; + public: static void wrapRangesInDataParts(DB::ReadFromMergeTree & source, const DB::RangesInDataParts & ranges); static void analysisPartsByRanges(DB::ReadFromMergeTree & source, const DB::RangesInDataParts & ranges_in_data_parts); @@ -94,8 +107,13 @@ class SparkStorageMergeTree : public MergeTreeData void replacePartitionFrom(const StoragePtr & source_table, const ASTPtr & partition, bool replace, ContextPtr context) override; void movePartitionToTable(const StoragePtr & dest_table, const ASTPtr & partition, ContextPtr context) override; bool partIsAssignedToBackgroundOperation(const DataPartPtr & part) const override; - MutationCommands getAlterMutationCommandsForPart(const DataPartPtr & /*part*/) const override { return {}; } void attachRestoredParts(MutableDataPartsVector && /*parts*/) override { throw std::runtime_error("not implement"); } + +public: + MutationsSnapshotPtr getMutationsSnapshot(const IMutationsSnapshot::Params & /*params*/) const override + { + return std::make_shared(); + }; }; class SparkWriteStorageMergeTree final : public SparkStorageMergeTree