This is an automated email from the ASF dual-hosted git repository. dataroaring pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 9756be6bf0 [improvement](stream-load) use vector instead of skiplist when insert dup keys (#18686) 9756be6bf0 is described below commit 9756be6bf04f4fbf6234885c7cd5223350af07d4 Author: huanghaibin <284824...@qq.com> AuthorDate: Sun Apr 23 09:40:09 2023 +0800 [improvement](stream-load) use vector instead of skiplist when insert dup keys (#18686) --- be/src/olap/memtable.cpp | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/be/src/olap/memtable.cpp b/be/src/olap/memtable.cpp index 1b582b2064..b42d524ef5 100644 --- a/be/src/olap/memtable.cpp +++ b/be/src/olap/memtable.cpp @@ -214,8 +214,7 @@ void MemTable::_insert_one_row_from_block(RowInBlock* row_in_block) { _rows++; bool overwritten = false; if (_keys_type == KeysType::DUP_KEYS) { - // TODO: dup keys only need sort opertaion. Rethink skiplist is the beat way to sort columns? - _vec_skip_list->Insert(row_in_block, &overwritten); + // for dup keys, already store row_in_block in vector and will sort it on flush stage. DCHECK(!overwritten) << "Duplicate key model meet overwrite in SkipList"; return; } @@ -266,11 +265,23 @@ void MemTable::_collect_vskiplist_results() { VecTable::Iterator it(_vec_skip_list.get()); vectorized::Block in_block = _input_mutable_block.to_block(); if (_keys_type == KeysType::DUP_KEYS) { + vectorized::MutableBlock mutable_block = + vectorized::MutableBlock::build_mutable_block(&in_block); + _vec_row_comparator->set_block(&mutable_block); + std::sort(_row_in_blocks.begin(), _row_in_blocks.end(), + [this](const RowInBlock* l, const RowInBlock* r) -> bool { + auto value = (*(this->_vec_row_comparator))(l, r); + if (value == 0) { + return l->_row_pos > r->_row_pos; + } else { + return value < 0; + } + }); std::vector<int> row_pos_vec; DCHECK(in_block.rows() <= std::numeric_limits<int>::max()); row_pos_vec.reserve(in_block.rows()); - for (it.SeekToFirst(); it.Valid(); it.Next()) { - row_pos_vec.emplace_back(it.key()->_row_pos); + for (int i = 0; i < _row_in_blocks.size(); i++) { + row_pos_vec.emplace_back(_row_in_blocks[i]->_row_pos); } _output_mutable_block.add_rows(&in_block, row_pos_vec.data(), row_pos_vec.data() + in_block.rows()); --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org