zhannngchen commented on code in PR #24788: URL: https://github.com/apache/doris/pull/24788#discussion_r1378535603
########## be/src/olap/rowset/segment_v2/segment.cpp: ########## @@ -284,13 +284,17 @@ Status Segment::load_index() { Status Segment::_load_index_impl() { return _load_index_once.call([this] { + bool load_short_key_index = _tablet_schema->keys_type() != UNIQUE_KEYS || Review Comment: Seems no need to change here? MoW table will not load short key index, and `CLUSTER BY` now only works on MoW table ########## be/src/olap/rowset/segment_v2/segment.cpp: ########## @@ -463,6 +475,25 @@ Status Segment::lookup_row_key(const Slice& key, bool with_seq_col, RowLocation* "key with higher sequence id exists"); } } + if (!has_seq_col && has_rowid) { + Slice sought_key_without_rowid = + Slice(sought_key.get_data(), sought_key.get_size() - rowid_length); + // compare key + if (key_without_seq.compare(sought_key_without_rowid) != 0) { + return Status::Error<ErrorCode::KEY_NOT_FOUND>("Can't find key in the segment"); + } + } + if (has_rowid) { Review Comment: Add comment here: ``` // found the key, use rowid in pk index if necessary. ``` ########## be/src/olap/rowset/segment_v2/segment_writer.cpp: ########## @@ -816,6 +904,33 @@ std::string SegmentWriter::_full_encode_keys( return encoded_keys; } +std::string SegmentWriter::_full_encode_keys( + std::vector<const KeyCoder*>& key_coders, Review Comment: should be a const reference type ########## be/src/olap/rowset/segment_v2/segment_writer.cpp: ########## @@ -816,6 +904,33 @@ std::string SegmentWriter::_full_encode_keys( return encoded_keys; } +std::string SegmentWriter::_full_encode_keys( Review Comment: you can rewrite the previous method in this way: ``` std::string SegmentWriter::_full_encode_keys( const std::vector<vectorized::IOlapColumnDataAccessor*>& key_columns, size_t pos, bool null_first) { _full_encode_keys(_key_coders, key_columns, pos, null_first); } ``` ########## be/src/olap/rowset/segment_v2/segment_iterator.cpp: ########## @@ -1212,6 +1213,12 @@ Status SegmentIterator::_lookup_ordinal_from_pk_index(const RowCursor& key, bool // The sequence column needs to be removed from primary key index when comparing key bool has_seq_col = _segment->_tablet_schema->has_sequence_col(); + bool has_rowid = !_segment->_tablet_schema->cluster_key_idxes().empty(); Review Comment: Is it possiable to add a new page type for IndexedColumnWrter that can write key:value into it, using the rowid as value. In this case, we don't need to process logics of rowid here. ########## be/src/olap/rowset/segment_v2/segment_iterator.cpp: ########## @@ -1228,13 +1235,31 @@ Status SegmentIterator::_lookup_ordinal_from_pk_index(const RowCursor& key, bool Slice sought_key = Slice(index_column->get_data_at(0).data, index_column->get_data_at(0).size); Slice sought_key_without_seq = - Slice(sought_key.get_data(), sought_key.get_size() - seq_col_length); + Slice(sought_key.get_data(), sought_key.get_size() - seq_col_length - rowid_length); // compare key if (Slice(index_key).compare(sought_key_without_seq) == 0) { exact_match = true; } } + if (!has_seq_col && has_rowid) { Review Comment: else if (has_rowid) ########## be/src/olap/rowset/segment_v2/segment.h: ########## @@ -95,16 +95,17 @@ class Segment : public std::enable_shared_from_this<Segment> { std::unique_ptr<InvertedIndexIterator>* iter); const ShortKeyIndexDecoder* get_short_key_index() const { - DCHECK(_load_index_once.has_called() && _load_index_once.stored_result().ok()); + // DCHECK(_load_index_once.has_called() && _load_index_once.stored_result().ok()); Review Comment: why remove the DCHECK? ########## be/src/olap/tablet.h: ########## @@ -423,7 +423,7 @@ class Tablet final : public BaseTablet { const std::vector<RowsetSharedPtr>& specified_rowsets, RowLocation* row_location, uint32_t version, std::vector<std::unique_ptr<SegmentCacheHandle>>& segment_caches, - RowsetSharedPtr* rowset = nullptr); + RowsetSharedPtr* rowset = nullptr, bool with_rowid = true); Review Comment: why default value is true? ########## be/src/olap/rowset/segment_v2/segment_writer.cpp: ########## @@ -745,22 +774,81 @@ Status SegmentWriter::append_block(const vectorized::Block* block, size_t row_po converted_result.second->get_data(), num_rows)); } if (_has_key) { - if (_tablet_schema->keys_type() == UNIQUE_KEYS && _opts.enable_unique_key_merge_on_write) { + bool need_primary_key_indexes = (_tablet_schema->keys_type() == UNIQUE_KEYS && + _opts.enable_unique_key_merge_on_write); + bool need_short_key_indexes = + !need_primary_key_indexes || + (need_primary_key_indexes && _tablet_schema->cluster_key_idxes().size() > 0); + if (need_primary_key_indexes) { // create primary indexes - std::string last_key; - for (size_t pos = 0; pos < num_rows; pos++) { - std::string key = _full_encode_keys(key_columns, pos); - if (_tablet_schema->has_sequence_col()) { - _encode_seq_column(seq_column, pos, &key); + if (!need_short_key_indexes) { Review Comment: Maybe you need to add some member function to make the logic here more clear. Such as: ``` _append_for_pk() _append_for_short_key() _append_for_cluster_key() ``` ########## be/src/olap/rowset/segment_v2/segment_writer.cpp: ########## @@ -745,22 +774,81 @@ Status SegmentWriter::append_block(const vectorized::Block* block, size_t row_po converted_result.second->get_data(), num_rows)); } if (_has_key) { - if (_tablet_schema->keys_type() == UNIQUE_KEYS && _opts.enable_unique_key_merge_on_write) { + bool need_primary_key_indexes = (_tablet_schema->keys_type() == UNIQUE_KEYS && + _opts.enable_unique_key_merge_on_write); Review Comment: Add comments here, for now we don't need to query short key index for `CLUSTER BY` feature, but we still write the index for future usage. ########## be/src/olap/memtable.cpp: ########## @@ -290,6 +290,56 @@ size_t MemTable::_sort() { return same_keys_num; } +void MemTable::_sort_by_cluster_keys() { + SCOPED_RAW_TIMER(&_stat.sort_ns); + _stat.sort_times++; + // sort all rows + vectorized::Block in_block = _output_mutable_block.to_block(); + auto cloneBlock = in_block.clone_without_columns(); + _output_mutable_block = vectorized::MutableBlock::build_mutable_block(&cloneBlock); + vectorized::MutableBlock mutable_block = + vectorized::MutableBlock::build_mutable_block(&in_block); + + std::vector<RowInBlock*> row_in_blocks; + std::unique_ptr<int, std::function<void(int*)>> row_in_blocks_deleter((int*)0x01, [&](int*) { + std::for_each(row_in_blocks.begin(), row_in_blocks.end(), + std::default_delete<RowInBlock>()); + }); + row_in_blocks.reserve(mutable_block.rows()); + for (size_t i = 0; i < mutable_block.rows(); i++) { + row_in_blocks.emplace_back(new RowInBlock {i}); + } + Tie tie = Tie(0, mutable_block.rows()); + + for (auto i : _tablet_schema->cluster_key_idxes()) { + auto cmp = [&](const RowInBlock* lhs, const RowInBlock* rhs) -> int { + return mutable_block.compare_one_column(lhs->_row_pos, rhs->_row_pos, i, -1); + }; + _sort_one_column(row_in_blocks, tie, cmp); + } + + // sort extra round by _row_pos to make the sort stable + auto iter = tie.iter(); + while (iter.next()) { + pdqsort(std::next(row_in_blocks.begin(), iter.left()), + std::next(row_in_blocks.begin(), iter.right()), + [](const RowInBlock* lhs, const RowInBlock* rhs) -> bool { + return lhs->_row_pos < rhs->_row_pos; + }); + } + + in_block = mutable_block.to_block(); Review Comment: It's confusing for the usage of `in_block` ``` vectorized::Block in_block = _output_mutable_block.to_block(); vectorized::MutableBlock mutable_block = vectorized::MutableBlock::build_mutable_block(&in_block); in_block = mutable_block.to_block(); ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org