This is an automated email from the ASF dual-hosted git repository. airborne pushed a commit to branch branch-2.0 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push: new 4ac11ebacc2 [fix] (inverted index) remove tmp columns in block (#39369) (#39655) 4ac11ebacc2 is described below commit 4ac11ebacc2ac646fe6e61b8d673f2c3a2447dc3 Author: Sun Chenyang <csun5...@gmail.com> AuthorDate: Wed Aug 21 09:14:34 2024 +0800 [fix] (inverted index) remove tmp columns in block (#39369) (#39655) ## Proposed changes pick from master #39369 --- be/src/vec/core/block.cpp | 9 +++++++++ be/src/vec/core/block.h | 5 +++++ be/src/vec/exec/scan/vscan_node.cpp | 7 +------ be/src/vec/exec/scan/vscanner.cpp | 9 +++------ be/src/vec/olap/vcollect_iterator.cpp | 19 +++++-------------- .../suites/inverted_index_p0/topn_clear_block.groovy | 10 ++++++++++ 6 files changed, 33 insertions(+), 26 deletions(-) diff --git a/be/src/vec/core/block.cpp b/be/src/vec/core/block.cpp index f8f3d9b8883..a2768633d97 100644 --- a/be/src/vec/core/block.cpp +++ b/be/src/vec/core/block.cpp @@ -704,6 +704,15 @@ void Block::clear_column_data(int column_size) noexcept { row_same_bit.clear(); } +void Block::erase_tmp_columns() noexcept { + auto all_column_names = get_names(); + for (auto& name : all_column_names) { + if (name.rfind(BeConsts::BLOCK_TEMP_COLUMN_PREFIX, 0) == 0) { + erase(name); + } + } +} + void Block::swap(Block& other) noexcept { data.swap(other.data); index_by_name.swap(other.index_by_name); diff --git a/be/src/vec/core/block.h b/be/src/vec/core/block.h index 939597d50b6..a2016851d7e 100644 --- a/be/src/vec/core/block.h +++ b/be/src/vec/core/block.h @@ -408,6 +408,11 @@ public: // for debug purpose. std::string print_use_count(); + // remove tmp columns in block + // in inverted index apply logic, in order to optimize query performance, + // we built some temporary columns into block + void erase_tmp_columns() noexcept; + private: void erase_impl(size_t position); }; diff --git a/be/src/vec/exec/scan/vscan_node.cpp b/be/src/vec/exec/scan/vscan_node.cpp index fe6195d2e3c..580fb275293 100644 --- a/be/src/vec/exec/scan/vscan_node.cpp +++ b/be/src/vec/exec/scan/vscan_node.cpp @@ -228,12 +228,7 @@ Status VScanNode::get_next(RuntimeState* state, vectorized::Block* block, bool* // remove them when query leave scan node to avoid other nodes use block->columns() to make a wrong decision Defer drop_block_temp_column {[&]() { std::unique_lock l(_block_lock); - auto all_column_names = block->get_names(); - for (auto& name : all_column_names) { - if (name.rfind(BeConsts::BLOCK_TEMP_COLUMN_PREFIX, 0) == 0) { - block->erase(name); - } - } + block->erase_tmp_columns(); }}; if (state->is_cancelled()) { diff --git a/be/src/vec/exec/scan/vscanner.cpp b/be/src/vec/exec/scan/vscanner.cpp index 2408f7ec709..5b7c8861a34 100644 --- a/be/src/vec/exec/scan/vscanner.cpp +++ b/be/src/vec/exec/scan/vscanner.cpp @@ -79,6 +79,8 @@ Status VScanner::get_block(RuntimeState* state, Block* block, bool* eof) { RETURN_IF_ERROR(_get_block_impl(state, block, eof)); if (*eof) { DCHECK(block->rows() == 0); + // clear TEMP columns to avoid column align problem + block->erase_tmp_columns(); break; } _num_rows_read += block->rows(); @@ -115,6 +117,7 @@ Status VScanner::get_block(RuntimeState* state, Block* block, bool* eof) { } Status VScanner::_filter_output_block(Block* block) { + Defer clear_tmp_block([&]() { block->erase_tmp_columns(); }); if (block->has(BeConsts::BLOCK_TEMP_COLUMN_SCANNER_FILTERED)) { // scanner filter_block is already done (only by _topn_next currently), just skip it return Status::OK(); @@ -122,12 +125,6 @@ Status VScanner::_filter_output_block(Block* block) { auto old_rows = block->rows(); Status st = VExprContext::filter_block(_conjuncts, block, block->columns()); _counter.num_rows_unselected += old_rows - block->rows(); - auto all_column_names = block->get_names(); - for (auto& name : all_column_names) { - if (name.rfind(BeConsts::BLOCK_TEMP_COLUMN_PREFIX, 0) == 0) { - block->erase(name); - } - } return st; } diff --git a/be/src/vec/olap/vcollect_iterator.cpp b/be/src/vec/olap/vcollect_iterator.cpp index 5290fbcc3d5..899e011f0df 100644 --- a/be/src/vec/olap/vcollect_iterator.cpp +++ b/be/src/vec/olap/vcollect_iterator.cpp @@ -256,18 +256,7 @@ Status VCollectIterator::_topn_next(Block* block) { } // clear TEMP columns to avoid column align problem - auto clear_temp_columns = [](Block* block) { - auto all_column_names = block->get_names(); - for (auto& name : all_column_names) { - if (name.rfind(BeConsts::BLOCK_TEMP_COLUMN_PREFIX, 0) == 0) { - // clear TEMP columns from block to prevent from storage engine merge with this - // fake column - block->erase(name); - } - } - }; - - clear_temp_columns(block); + block->erase_tmp_columns(); auto clone_block = block->clone_empty(); MutableBlock mutable_block = vectorized::MutableBlock::build_mutable_block(&clone_block); @@ -304,7 +293,7 @@ Status VCollectIterator::_topn_next(Block* block) { eof = true; if (block->rows() == 0) { // clear TEMP columns to avoid column align problem in segment iterator - clear_temp_columns(block); + block->erase_tmp_columns(); break; } } else { @@ -318,7 +307,7 @@ Status VCollectIterator::_topn_next(Block* block) { RETURN_IF_ERROR(VExprContext::filter_block( _reader->_reader_context.filter_block_conjuncts, block, block->columns())); // clear TMPE columns to avoid column align problem in mutable_block.add_rows bellow - clear_temp_columns(block); + block->erase_tmp_columns(); // update read rows read_rows += block->rows(); @@ -860,6 +849,8 @@ Status VCollectIterator::Level1Iterator::_normal_next(Block* block) { if (!_children.empty()) { _cur_child = std::move(*(_children.begin())); _children.pop_front(); + // clear TEMP columns to avoid column align problem + block->erase_tmp_columns(); return _normal_next(block); } else { _cur_child.reset(); diff --git a/regression-test/suites/inverted_index_p0/topn_clear_block.groovy b/regression-test/suites/inverted_index_p0/topn_clear_block.groovy index 7486a658d60..586173e0f3e 100644 --- a/regression-test/suites/inverted_index_p0/topn_clear_block.groovy +++ b/regression-test/suites/inverted_index_p0/topn_clear_block.groovy @@ -78,7 +78,17 @@ suite("test_clear_block") { sql """ delete from dup_httplogs where size = 24736; """ sql """ delete from dup_httplogs where request = 'GET /images/hm_bg.jpg HTTP/1.0'; """ + sql """ set enable_match_without_inverted_index = false """ sql """ sync """ qt_sql """ SELECT clientip from ${dupTableName} WHERE clientip NOT IN (NULL, '') or clientip IN ('17.0.0.0') ORDER BY id LIMIT 2 """ + + def result1 = sql """ SELECT clientip from ${dupTableName} WHERE clientip NOT IN (NULL, '') or clientip IN ('17.0.0.0') ORDER BY id LIMIT 5000 """ + def result2 = sql """ SELECT clientip from ${dupTableName} WHERE clientip NOT IN (NULL, '') or clientip IN ('17.0.0.0') ORDER BY id LIMIT 5000 """ + if (result1 != result2) { + logger.info("result1 is: {}", result1) + logger.info("result2 is: {}", result2) + assertTrue(false) + } + } \ No newline at end of file --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org