This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch branch-2.1-lakehouse in repository https://gitbox.apache.org/repos/asf/doris.git
commit 5179917c7852b8bdda16c65788e928cc32ccadb0 Author: daidai <changyu...@selectdb.com> AuthorDate: Fri Jan 10 18:18:27 2025 +0800 [fix](hive)fix select count(*) hive full acid tb opt error. (#46732) ### What problem does this PR solve? Problem Summary: before pr : #44038 In the previous PR, the generation method of split in the count( * ) scenario was optimized. However, there were some problems with the hive acid table. This PR mainly fixes this and adds tests. In the count( * ) scenario, reading the hive full acid table cannot be optimized, and the file still needs to be split (merge on read is required), and the hive insert only acid table does not need to be split. --- .../vec/exec/format/table/transactional_hive_reader.cpp | 1 + .../doris/datasource/hive/source/HiveScanNode.java | 4 ++-- .../external_table_p0/hive/test_transactional_hive.out | Bin 835 -> 925 bytes .../hive/test_hive_translation_insert_only.out | Bin 181 -> 235 bytes .../hive/test_transactional_hive.groovy | 12 ++++++++++++ .../hive/test_hive_translation_insert_only.groovy | 5 +++++ 6 files changed, 20 insertions(+), 2 deletions(-) diff --git a/be/src/vec/exec/format/table/transactional_hive_reader.cpp b/be/src/vec/exec/format/table/transactional_hive_reader.cpp index 18642ab1218..caf24270018 100644 --- a/be/src/vec/exec/format/table/transactional_hive_reader.cpp +++ b/be/src/vec/exec/format/table/transactional_hive_reader.cpp @@ -205,6 +205,7 @@ Status TransactionalHiveReader::init_row_filters(const TFileRangeDesc& range, ++num_delete_files; } if (num_delete_rows > 0) { + orc_reader->set_push_down_agg_type(TPushAggOp::NONE); orc_reader->set_delete_rows(&_delete_rows); COUNTER_UPDATE(_transactional_orc_profile.num_delete_files, num_delete_files); COUNTER_UPDATE(_transactional_orc_profile.num_delete_rows, num_delete_rows); diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java index d66a4c08e9d..890f6147f33 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java @@ -296,12 +296,12 @@ public class HiveScanNode extends FileQueryScanNode { * we don't need to split the file because for parquet/orc format, only metadata is read. * If we split the file, we will read metadata of a file multiple times, which is not efficient. * - * - Hive Transactional Table may need merge on read, so do not apply this optimization. + * - Hive Full Acid Transactional Table may need merge on read, so do not apply this optimization. * - If the file format is not parquet/orc, eg, text, we need to split the file to increase the parallelism. */ boolean needSplit = true; if (getPushDownAggNoGroupingOp() == TPushAggOp.COUNT - && hiveTransaction != null) { + && !(hmsTable.isHiveTransactionalTable() && hmsTable.isFullAcidTable())) { int totalFileNum = 0; for (FileCacheValue fileCacheValue : fileCaches) { if (fileCacheValue.getFiles() != null) { diff --git a/regression-test/data/external_table_p0/hive/test_transactional_hive.out b/regression-test/data/external_table_p0/hive/test_transactional_hive.out index 060fa8c048e..94e32a43db7 100644 Binary files a/regression-test/data/external_table_p0/hive/test_transactional_hive.out and b/regression-test/data/external_table_p0/hive/test_transactional_hive.out differ diff --git a/regression-test/data/external_table_p2/hive/test_hive_translation_insert_only.out b/regression-test/data/external_table_p2/hive/test_hive_translation_insert_only.out index e4bdb3fe32d..f43a630f4a3 100644 Binary files a/regression-test/data/external_table_p2/hive/test_hive_translation_insert_only.out and b/regression-test/data/external_table_p2/hive/test_hive_translation_insert_only.out differ diff --git a/regression-test/suites/external_table_p0/hive/test_transactional_hive.groovy b/regression-test/suites/external_table_p0/hive/test_transactional_hive.groovy index 4f7008ec172..a12ab8a4f78 100644 --- a/regression-test/suites/external_table_p0/hive/test_transactional_hive.groovy +++ b/regression-test/suites/external_table_p0/hive/test_transactional_hive.groovy @@ -114,6 +114,14 @@ suite("test_transactional_hive", "p0,external,hive,external_docker,external_dock } } + + def test_acid_count = { + qt_count_1 """ select count(*) from orc_full_acid; """ // 3 + qt_count_2 """ select count(*) from orc_full_acid_par; """ // 6 + qt_count_3 """ select count(*) from orc_to_acid_compacted_tb; """ //4 + qt_count_4 """ select count(*) from orc_acid_minor; """ //3 + qt_count_5 """ select count(*) from orc_acid_major; """ //3 + } String enabled = context.config.otherConfigs.get("enableHiveTest") @@ -148,6 +156,10 @@ suite("test_transactional_hive", "p0,external,hive,external_docker,external_dock test_acid() test_acid_write() + + test_acid_count() + + sql """drop catalog if exists ${catalog_name}""" } finally { } diff --git a/regression-test/suites/external_table_p2/hive/test_hive_translation_insert_only.groovy b/regression-test/suites/external_table_p2/hive/test_hive_translation_insert_only.groovy index 758417c3237..f7135175152 100644 --- a/regression-test/suites/external_table_p2/hive/test_hive_translation_insert_only.groovy +++ b/regression-test/suites/external_table_p2/hive/test_hive_translation_insert_only.groovy @@ -45,6 +45,11 @@ suite("test_hive_translation_insert_only", "p2,external,hive,external_remote,ext qt_2 """ select * from parquet_insert_only_major order by id """ qt_3 """ select * from orc_insert_only_minor order by id """ + qt_count_1 """ select count(*) from text_insert_only """ //4 + qt_count_2 """ select count(*) from parquet_insert_only_major """ //5 + qt_count_3 """ select count(*) from orc_insert_only_minor """ //5 + + sql """drop catalog ${hms_catalog_name};""" } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org