This is an automated email from the ASF dual-hosted git repository.

airborne pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.0 by this push:
     new 4ac11ebacc2 [fix] (inverted index) remove tmp columns in block 
(#39369) (#39655)
4ac11ebacc2 is described below

commit 4ac11ebacc2ac646fe6e61b8d673f2c3a2447dc3
Author: Sun Chenyang <csun5...@gmail.com>
AuthorDate: Wed Aug 21 09:14:34 2024 +0800

    [fix] (inverted index) remove tmp columns in block (#39369) (#39655)
    
    ## Proposed changes
    
    pick from master #39369
---
 be/src/vec/core/block.cpp                             |  9 +++++++++
 be/src/vec/core/block.h                               |  5 +++++
 be/src/vec/exec/scan/vscan_node.cpp                   |  7 +------
 be/src/vec/exec/scan/vscanner.cpp                     |  9 +++------
 be/src/vec/olap/vcollect_iterator.cpp                 | 19 +++++--------------
 .../suites/inverted_index_p0/topn_clear_block.groovy  | 10 ++++++++++
 6 files changed, 33 insertions(+), 26 deletions(-)

diff --git a/be/src/vec/core/block.cpp b/be/src/vec/core/block.cpp
index f8f3d9b8883..a2768633d97 100644
--- a/be/src/vec/core/block.cpp
+++ b/be/src/vec/core/block.cpp
@@ -704,6 +704,15 @@ void Block::clear_column_data(int column_size) noexcept {
     row_same_bit.clear();
 }
 
+void Block::erase_tmp_columns() noexcept {
+    auto all_column_names = get_names();
+    for (auto& name : all_column_names) {
+        if (name.rfind(BeConsts::BLOCK_TEMP_COLUMN_PREFIX, 0) == 0) {
+            erase(name);
+        }
+    }
+}
+
 void Block::swap(Block& other) noexcept {
     data.swap(other.data);
     index_by_name.swap(other.index_by_name);
diff --git a/be/src/vec/core/block.h b/be/src/vec/core/block.h
index 939597d50b6..a2016851d7e 100644
--- a/be/src/vec/core/block.h
+++ b/be/src/vec/core/block.h
@@ -408,6 +408,11 @@ public:
     // for debug purpose.
     std::string print_use_count();
 
+    // remove tmp columns in block
+    // in inverted index apply logic, in order to optimize query performance,
+    // we built some temporary columns into block
+    void erase_tmp_columns() noexcept;
+
 private:
     void erase_impl(size_t position);
 };
diff --git a/be/src/vec/exec/scan/vscan_node.cpp 
b/be/src/vec/exec/scan/vscan_node.cpp
index fe6195d2e3c..580fb275293 100644
--- a/be/src/vec/exec/scan/vscan_node.cpp
+++ b/be/src/vec/exec/scan/vscan_node.cpp
@@ -228,12 +228,7 @@ Status VScanNode::get_next(RuntimeState* state, 
vectorized::Block* block, bool*
     // remove them when query leave scan node to avoid other nodes use 
block->columns() to make a wrong decision
     Defer drop_block_temp_column {[&]() {
         std::unique_lock l(_block_lock);
-        auto all_column_names = block->get_names();
-        for (auto& name : all_column_names) {
-            if (name.rfind(BeConsts::BLOCK_TEMP_COLUMN_PREFIX, 0) == 0) {
-                block->erase(name);
-            }
-        }
+        block->erase_tmp_columns();
     }};
 
     if (state->is_cancelled()) {
diff --git a/be/src/vec/exec/scan/vscanner.cpp 
b/be/src/vec/exec/scan/vscanner.cpp
index 2408f7ec709..5b7c8861a34 100644
--- a/be/src/vec/exec/scan/vscanner.cpp
+++ b/be/src/vec/exec/scan/vscanner.cpp
@@ -79,6 +79,8 @@ Status VScanner::get_block(RuntimeState* state, Block* block, 
bool* eof) {
                 RETURN_IF_ERROR(_get_block_impl(state, block, eof));
                 if (*eof) {
                     DCHECK(block->rows() == 0);
+                    // clear TEMP columns to avoid column align problem
+                    block->erase_tmp_columns();
                     break;
                 }
                 _num_rows_read += block->rows();
@@ -115,6 +117,7 @@ Status VScanner::get_block(RuntimeState* state, Block* 
block, bool* eof) {
 }
 
 Status VScanner::_filter_output_block(Block* block) {
+    Defer clear_tmp_block([&]() { block->erase_tmp_columns(); });
     if (block->has(BeConsts::BLOCK_TEMP_COLUMN_SCANNER_FILTERED)) {
         // scanner filter_block is already done (only by _topn_next 
currently), just skip it
         return Status::OK();
@@ -122,12 +125,6 @@ Status VScanner::_filter_output_block(Block* block) {
     auto old_rows = block->rows();
     Status st = VExprContext::filter_block(_conjuncts, block, 
block->columns());
     _counter.num_rows_unselected += old_rows - block->rows();
-    auto all_column_names = block->get_names();
-    for (auto& name : all_column_names) {
-        if (name.rfind(BeConsts::BLOCK_TEMP_COLUMN_PREFIX, 0) == 0) {
-            block->erase(name);
-        }
-    }
     return st;
 }
 
diff --git a/be/src/vec/olap/vcollect_iterator.cpp 
b/be/src/vec/olap/vcollect_iterator.cpp
index 5290fbcc3d5..899e011f0df 100644
--- a/be/src/vec/olap/vcollect_iterator.cpp
+++ b/be/src/vec/olap/vcollect_iterator.cpp
@@ -256,18 +256,7 @@ Status VCollectIterator::_topn_next(Block* block) {
     }
 
     // clear TEMP columns to avoid column align problem
-    auto clear_temp_columns = [](Block* block) {
-        auto all_column_names = block->get_names();
-        for (auto& name : all_column_names) {
-            if (name.rfind(BeConsts::BLOCK_TEMP_COLUMN_PREFIX, 0) == 0) {
-                // clear TEMP columns from block to prevent from storage 
engine merge with this
-                // fake column
-                block->erase(name);
-            }
-        }
-    };
-
-    clear_temp_columns(block);
+    block->erase_tmp_columns();
     auto clone_block = block->clone_empty();
     MutableBlock mutable_block = 
vectorized::MutableBlock::build_mutable_block(&clone_block);
 
@@ -304,7 +293,7 @@ Status VCollectIterator::_topn_next(Block* block) {
                     eof = true;
                     if (block->rows() == 0) {
                         // clear TEMP columns to avoid column align problem in 
segment iterator
-                        clear_temp_columns(block);
+                        block->erase_tmp_columns();
                         break;
                     }
                 } else {
@@ -318,7 +307,7 @@ Status VCollectIterator::_topn_next(Block* block) {
             RETURN_IF_ERROR(VExprContext::filter_block(
                     _reader->_reader_context.filter_block_conjuncts, block, 
block->columns()));
             // clear TMPE columns to avoid column align problem in 
mutable_block.add_rows bellow
-            clear_temp_columns(block);
+            block->erase_tmp_columns();
 
             // update read rows
             read_rows += block->rows();
@@ -860,6 +849,8 @@ Status 
VCollectIterator::Level1Iterator::_normal_next(Block* block) {
         if (!_children.empty()) {
             _cur_child = std::move(*(_children.begin()));
             _children.pop_front();
+            // clear TEMP columns to avoid column align problem
+            block->erase_tmp_columns();
             return _normal_next(block);
         } else {
             _cur_child.reset();
diff --git a/regression-test/suites/inverted_index_p0/topn_clear_block.groovy 
b/regression-test/suites/inverted_index_p0/topn_clear_block.groovy
index 7486a658d60..586173e0f3e 100644
--- a/regression-test/suites/inverted_index_p0/topn_clear_block.groovy
+++ b/regression-test/suites/inverted_index_p0/topn_clear_block.groovy
@@ -78,7 +78,17 @@ suite("test_clear_block") {
     sql """ delete from dup_httplogs where size = 24736; """
     sql """ delete from dup_httplogs where request = 'GET /images/hm_bg.jpg 
HTTP/1.0'; """
 
+    sql """ set enable_match_without_inverted_index = false """
     sql """ sync """
 
     qt_sql """ SELECT clientip from ${dupTableName} WHERE clientip NOT IN 
(NULL, '') or clientip IN ('17.0.0.0') ORDER BY id LIMIT 2 """
+        
+    def result1 = sql """ SELECT clientip from ${dupTableName} WHERE clientip 
NOT IN (NULL, '') or clientip IN ('17.0.0.0') ORDER BY id LIMIT 5000 """
+    def result2 = sql """ SELECT clientip from ${dupTableName} WHERE clientip 
NOT IN (NULL, '') or clientip IN ('17.0.0.0') ORDER BY id LIMIT 5000 """
+    if (result1 != result2) {
+        logger.info("result1 is: {}", result1)
+        logger.info("result2 is: {}", result2)
+        assertTrue(false)
+    }
+    
 }
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to