This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 9756be6bf0 [improvement](stream-load) use vector instead of skiplist 
when insert dup keys (#18686)
9756be6bf0 is described below

commit 9756be6bf04f4fbf6234885c7cd5223350af07d4
Author: huanghaibin <284824...@qq.com>
AuthorDate: Sun Apr 23 09:40:09 2023 +0800

    [improvement](stream-load) use vector instead of skiplist when insert dup 
keys (#18686)
---
 be/src/olap/memtable.cpp | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/be/src/olap/memtable.cpp b/be/src/olap/memtable.cpp
index 1b582b2064..b42d524ef5 100644
--- a/be/src/olap/memtable.cpp
+++ b/be/src/olap/memtable.cpp
@@ -214,8 +214,7 @@ void MemTable::_insert_one_row_from_block(RowInBlock* 
row_in_block) {
     _rows++;
     bool overwritten = false;
     if (_keys_type == KeysType::DUP_KEYS) {
-        // TODO: dup keys only need sort opertaion. Rethink skiplist is the 
beat way to sort columns?
-        _vec_skip_list->Insert(row_in_block, &overwritten);
+        // for dup keys, already store row_in_block in vector and will sort it 
on flush stage.
         DCHECK(!overwritten) << "Duplicate key model meet overwrite in 
SkipList";
         return;
     }
@@ -266,11 +265,23 @@ void MemTable::_collect_vskiplist_results() {
     VecTable::Iterator it(_vec_skip_list.get());
     vectorized::Block in_block = _input_mutable_block.to_block();
     if (_keys_type == KeysType::DUP_KEYS) {
+        vectorized::MutableBlock mutable_block =
+                vectorized::MutableBlock::build_mutable_block(&in_block);
+        _vec_row_comparator->set_block(&mutable_block);
+        std::sort(_row_in_blocks.begin(), _row_in_blocks.end(),
+                  [this](const RowInBlock* l, const RowInBlock* r) -> bool {
+                      auto value = (*(this->_vec_row_comparator))(l, r);
+                      if (value == 0) {
+                          return l->_row_pos > r->_row_pos;
+                      } else {
+                          return value < 0;
+                      }
+                  });
         std::vector<int> row_pos_vec;
         DCHECK(in_block.rows() <= std::numeric_limits<int>::max());
         row_pos_vec.reserve(in_block.rows());
-        for (it.SeekToFirst(); it.Valid(); it.Next()) {
-            row_pos_vec.emplace_back(it.key()->_row_pos);
+        for (int i = 0; i < _row_in_blocks.size(); i++) {
+            row_pos_vec.emplace_back(_row_in_blocks[i]->_row_pos);
         }
         _output_mutable_block.add_rows(&in_block, row_pos_vec.data(),
                                        row_pos_vec.data() + in_block.rows());


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to