This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new 1e64740f236 [fix](hive)fix select count(*) hive full acid tb opt 
error. (#46732) (#46805)
1e64740f236 is described below

commit 1e64740f236f1ada66b4148f75ba5b8446ff6673
Author: daidai <changyu...@selectdb.com>
AuthorDate: Sun Jan 12 08:54:45 2025 +0800

    [fix](hive)fix select count(*) hive full acid tb opt error. (#46732) 
(#46805)
    
    bp #46732
---
 .../vec/exec/format/table/transactional_hive_reader.cpp   |  1 +
 .../apache/doris/datasource/hive/source/HiveScanNode.java |  4 ++--
 .../external_table_p0/hive/test_transactional_hive.out    | 15 +++++++++++++++
 .../hive/test_hive_translation_insert_only.out            | 10 ++++++++++
 .../external_table_p0/hive/test_transactional_hive.groovy | 12 ++++++++++++
 .../hive/test_hive_translation_insert_only.groovy         |  5 +++++
 6 files changed, 45 insertions(+), 2 deletions(-)

diff --git a/be/src/vec/exec/format/table/transactional_hive_reader.cpp 
b/be/src/vec/exec/format/table/transactional_hive_reader.cpp
index bc4262b7451..f840b0af252 100644
--- a/be/src/vec/exec/format/table/transactional_hive_reader.cpp
+++ b/be/src/vec/exec/format/table/transactional_hive_reader.cpp
@@ -180,6 +180,7 @@ Status TransactionalHiveReader::init_row_filters(const 
TFileRangeDesc& range,
         ++num_delete_files;
     }
     if (num_delete_rows > 0) {
+        orc_reader->set_push_down_agg_type(TPushAggOp::NONE);
         orc_reader->set_delete_rows(&_delete_rows);
         COUNTER_UPDATE(_transactional_orc_profile.num_delete_files, 
num_delete_files);
         COUNTER_UPDATE(_transactional_orc_profile.num_delete_rows, 
num_delete_rows);
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java
 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java
index 1e09fa6d909..b14dfbf02f4 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java
@@ -292,12 +292,12 @@ public class HiveScanNode extends FileQueryScanNode {
          * we don't need to split the file because for parquet/orc format, 
only metadata is read.
          * If we split the file, we will read metadata of a file multiple 
times, which is not efficient.
          *
-         * - Hive Transactional Table may need merge on read, so do not apply 
this optimization.
+         * - Hive Full Acid Transactional Table may need merge on read, so do 
not apply this optimization.
          * - If the file format is not parquet/orc, eg, text, we need to split 
the file to increase the parallelism.
          */
         boolean needSplit = true;
         if (getPushDownAggNoGroupingOp() == TPushAggOp.COUNT
-                && hiveTransaction != null) {
+                && !(hmsTable.isHiveTransactionalTable() && 
hmsTable.isFullAcidTable())) {
             int totalFileNum = 0;
             for (FileCacheValue fileCacheValue : fileCaches) {
                 if (fileCacheValue.getFiles() != null) {
diff --git 
a/regression-test/data/external_table_p0/hive/test_transactional_hive.out 
b/regression-test/data/external_table_p0/hive/test_transactional_hive.out
index 060fa8c048e..94e32a43db7 100644
--- a/regression-test/data/external_table_p0/hive/test_transactional_hive.out
+++ b/regression-test/data/external_table_p0/hive/test_transactional_hive.out
@@ -122,3 +122,18 @@ F
 -- !16 --
 4      DD
 
+-- !count_1 --
+3
+
+-- !count_2 --
+6
+
+-- !count_3 --
+4
+
+-- !count_4 --
+3
+
+-- !count_5 --
+3
+
diff --git 
a/regression-test/data/external_table_p2/hive/test_hive_translation_insert_only.out
 
b/regression-test/data/external_table_p2/hive/test_hive_translation_insert_only.out
index 04fccc9d4c0..f43a630f4a3 100644
--- 
a/regression-test/data/external_table_p2/hive/test_hive_translation_insert_only.out
+++ 
b/regression-test/data/external_table_p2/hive/test_hive_translation_insert_only.out
@@ -18,3 +18,13 @@
 3      C
 4      D
 5      E
+
+-- !count_1 --
+4
+
+-- !count_2 --
+5
+
+-- !count_3 --
+5
+
diff --git 
a/regression-test/suites/external_table_p0/hive/test_transactional_hive.groovy 
b/regression-test/suites/external_table_p0/hive/test_transactional_hive.groovy
index 81f2358e9da..8c092d40b66 100644
--- 
a/regression-test/suites/external_table_p0/hive/test_transactional_hive.groovy
+++ 
b/regression-test/suites/external_table_p0/hive/test_transactional_hive.groovy
@@ -115,6 +115,14 @@ suite("test_transactional_hive", 
"p0,external,hive,external_docker,external_dock
 
         }
     }
+    
+    def test_acid_count = {
+        qt_count_1 """ select count(*) from orc_full_acid; """ // 3 
+        qt_count_2 """ select count(*) from orc_full_acid_par; """  // 6
+        qt_count_3 """ select count(*) from orc_to_acid_compacted_tb; """ //4
+        qt_count_4 """ select count(*) from orc_acid_minor; """ //3
+        qt_count_5 """ select count(*) from orc_acid_major; """ //3
+    }
 
 
     String enabled = context.config.otherConfigs.get("enableHiveTest")
@@ -149,6 +157,10 @@ suite("test_transactional_hive", 
"p0,external,hive,external_docker,external_dock
             test_acid()
             test_acid_write()
 
+
+            test_acid_count()
+            
+            
             sql """drop catalog if exists ${catalog_name}"""
         } finally {
         }
diff --git 
a/regression-test/suites/external_table_p2/hive/test_hive_translation_insert_only.groovy
 
b/regression-test/suites/external_table_p2/hive/test_hive_translation_insert_only.groovy
index 9b021e1dc81..900a031db7b 100644
--- 
a/regression-test/suites/external_table_p2/hive/test_hive_translation_insert_only.groovy
+++ 
b/regression-test/suites/external_table_p2/hive/test_hive_translation_insert_only.groovy
@@ -45,6 +45,11 @@ suite("test_hive_translation_insert_only", 
"p2,external,hive,external_remote,ext
     qt_2 """ select * from parquet_insert_only_major order by id """ 
     qt_3 """ select * from orc_insert_only_minor order by id """ 
 
+    qt_count_1 """ select count(*) from text_insert_only """ //4 
+    qt_count_2 """ select count(*) from parquet_insert_only_major """ //5 
+    qt_count_3 """ select count(*) from orc_insert_only_minor """ //5
+
+
     sql """drop catalog ${hms_catalog_name};"""
 }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to