This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch branch-1.2-lts in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-1.2-lts by this push: new f78ca00064 [Enchancement](merge-on-write) check the correctness of rowid conversion after compaction (#16689) (#17006) f78ca00064 is described below commit f78ca0006489a2e7d43eb8b9408debc705d34554 Author: Xin Liao <liaoxin...@126.com> AuthorDate: Wed Feb 22 19:15:52 2023 +0800 [Enchancement](merge-on-write) check the correctness of rowid conversion after compaction (#16689) (#17006) MoW updates the delete bitmap of the imported data during the compaction by rowid conversion. The correctness of rowid conversion is very important to the result of delete bitmap. So I add a rowid conversion result check. cherry-pick from: #16689 --- be/src/olap/compaction.cpp | 12 ++++-- be/src/olap/primary_key_index.cpp | 2 +- be/src/olap/rowset/segment_v2/segment.cpp | 16 ++++++++ be/src/olap/rowset/segment_v2/segment.h | 2 + be/src/olap/tablet.cpp | 65 ++++++++++++++++++++++++++++++- be/src/olap/tablet.h | 5 +++ 6 files changed, 96 insertions(+), 6 deletions(-) diff --git a/be/src/olap/compaction.cpp b/be/src/olap/compaction.cpp index e56738b3e1..6441749536 100644 --- a/be/src/olap/compaction.cpp +++ b/be/src/olap/compaction.cpp @@ -439,22 +439,26 @@ Status Compaction::modify_rowsets() { _tablet->enable_unique_key_merge_on_write()) { Version version = _tablet->max_version(); DeleteBitmap output_rowset_delete_bitmap(_tablet->tablet_id()); + std::map<RowsetSharedPtr, std::list<std::pair<RowLocation, RowLocation>>> location_map; // Convert the delete bitmap of the input rowsets to output rowset. // New loads are not blocked, so some keys of input rowsets might // be deleted during the time. We need to deal with delete bitmap // of incremental data later. _tablet->calc_compaction_output_rowset_delete_bitmap(_input_rowsets, _rowid_conversion, 0, - version.second + 1, + version.second + 1, &location_map, &output_rowset_delete_bitmap); + RETURN_IF_ERROR(_tablet->check_rowid_conversion(_output_rowset, location_map)); + location_map.clear(); { std::lock_guard<std::mutex> wrlock_(_tablet->get_rowset_update_lock()); std::lock_guard<std::shared_mutex> wrlock(_tablet->get_header_lock()); // Convert the delete bitmap of the input rowsets to output rowset for // incremental data. - _tablet->calc_compaction_output_rowset_delete_bitmap(_input_rowsets, _rowid_conversion, - version.second, UINT64_MAX, - &output_rowset_delete_bitmap); + _tablet->calc_compaction_output_rowset_delete_bitmap( + _input_rowsets, _rowid_conversion, version.second, UINT64_MAX, &location_map, + &output_rowset_delete_bitmap); + RETURN_IF_ERROR(_tablet->check_rowid_conversion(_output_rowset, location_map)); _tablet->merge_delete_bitmap(output_rowset_delete_bitmap); RETURN_NOT_OK(_tablet->modify_rowsets(output_rowsets, _input_rowsets, true)); diff --git a/be/src/olap/primary_key_index.cpp b/be/src/olap/primary_key_index.cpp index 6e2c3d954d..7c2c5fe16a 100644 --- a/be/src/olap/primary_key_index.cpp +++ b/be/src/olap/primary_key_index.cpp @@ -27,7 +27,7 @@ Status PrimaryKeyIndexBuilder::init() { // TODO(liaoxin) using the column type directly if there's only one column in unique key columns const auto* type_info = get_scalar_type_info<OLAP_FIELD_TYPE_VARCHAR>(); segment_v2::IndexedColumnWriterOptions options; - options.write_ordinal_index = false; + options.write_ordinal_index = true; options.write_value_index = true; options.encoding = segment_v2::EncodingInfo::get_default_encoding(type_info, true); // TODO(liaoxin) test to confirm whether it needs to be compressed diff --git a/be/src/olap/rowset/segment_v2/segment.cpp b/be/src/olap/rowset/segment_v2/segment.cpp index 24af866655..d4442bc4b9 100644 --- a/be/src/olap/rowset/segment_v2/segment.cpp +++ b/be/src/olap/rowset/segment_v2/segment.cpp @@ -341,5 +341,21 @@ Status Segment::lookup_row_key(const Slice& key, RowLocation* row_location) { return Status::OK(); } +Status Segment::read_key_by_rowid(uint32_t row_id, std::string* key) { + RETURN_IF_ERROR(load_pk_index_and_bf()); + std::unique_ptr<segment_v2::IndexedColumnIterator> iter; + RETURN_IF_ERROR(_pk_index_reader->new_iterator(&iter)); + + auto index_type = vectorized::DataTypeFactory::instance().create_data_type( + _pk_index_reader->type_info()->type(), 1, 0); + auto index_column = index_type->create_column(); + RETURN_IF_ERROR(iter->seek_to_ordinal(row_id)); + size_t num_read = 1; + RETURN_IF_ERROR(iter->next_batch(&num_read, index_column)); + CHECK(num_read == 1); + *key = index_column->get_data_at(0).to_string(); + return Status::OK(); +} + } // namespace segment_v2 } // namespace doris diff --git a/be/src/olap/rowset/segment_v2/segment.h b/be/src/olap/rowset/segment_v2/segment.h index b0dea5aeb8..c2dd12508e 100644 --- a/be/src/olap/rowset/segment_v2/segment.h +++ b/be/src/olap/rowset/segment_v2/segment.h @@ -93,6 +93,8 @@ public: Status lookup_row_key(const Slice& key, RowLocation* row_location); + Status read_key_by_rowid(uint32_t row_id, std::string* key); + // only used by UT const SegmentFooterPB& footer() const { return _footer; } diff --git a/be/src/olap/tablet.cpp b/be/src/olap/tablet.cpp index 691658bb49..196d8f7cdd 100644 --- a/be/src/olap/tablet.cpp +++ b/be/src/olap/tablet.cpp @@ -2213,7 +2213,9 @@ Status Tablet::update_delete_bitmap(const RowsetSharedPtr& rowset, DeleteBitmapP void Tablet::calc_compaction_output_rowset_delete_bitmap( const std::vector<RowsetSharedPtr>& input_rowsets, const RowIdConversion& rowid_conversion, - uint64_t start_version, uint64_t end_version, DeleteBitmap* output_rowset_delete_bitmap) { + uint64_t start_version, uint64_t end_version, + std::map<RowsetSharedPtr, std::list<std::pair<RowLocation, RowLocation>>>* location_map, + DeleteBitmap* output_rowset_delete_bitmap) { RowLocation src; RowLocation dst; for (auto& rowset : input_rowsets) { @@ -2242,6 +2244,7 @@ void Tablet::calc_compaction_output_rowset_delete_bitmap( << " src location: |" << src.rowset_id << "|" << src.segment_id << "|" << src.row_id << " start version: " << start_version << "end version" << end_version; + (*location_map)[rowset].emplace_back(src, dst); output_rowset_delete_bitmap->add({dst.rowset_id, dst.segment_id, cur_version}, dst.row_id); } @@ -2254,6 +2257,66 @@ void Tablet::merge_delete_bitmap(const DeleteBitmap& delete_bitmap) { _tablet_meta->delete_bitmap().merge(delete_bitmap); } +Status Tablet::check_rowid_conversion( + RowsetSharedPtr dst_rowset, + const std::map<RowsetSharedPtr, std::list<std::pair<RowLocation, RowLocation>>>& + location_map) { + if (location_map.empty()) { + VLOG_DEBUG << "check_rowid_conversion, location_map is empty"; + return Status::OK(); + } + std::vector<segment_v2::SegmentSharedPtr> dst_segments; + _load_rowset_segments(dst_rowset, &dst_segments); + std::unordered_map<RowsetId, std::vector<segment_v2::SegmentSharedPtr>, HashOfRowsetId> + input_rowsets_segment; + + VLOG_DEBUG << "check_rowid_conversion, dst_segments size: " << dst_segments.size(); + for (auto [src_rowset, locations] : location_map) { + std::vector<segment_v2::SegmentSharedPtr>& segments = + input_rowsets_segment[src_rowset->rowset_id()]; + if (segments.empty()) { + _load_rowset_segments(src_rowset, &segments); + } + for (auto& [src, dst] : locations) { + std::string src_key; + std::string dst_key; + Status s = segments[src.segment_id]->read_key_by_rowid(src.row_id, &src_key); + if (UNLIKELY(s.code() == TStatusCode::NOT_IMPLEMENTED_ERROR)) { + LOG(INFO) << "primary key index of old version does not " + "support reading key by rowid"; + break; + } + if (UNLIKELY(!s)) { + LOG(WARNING) << "failed to get src key: |" << src.rowset_id << "|" << src.segment_id + << "|" << src.row_id << " status: " << s; + DCHECK(false); + return s; + } + + s = dst_segments[dst.segment_id]->read_key_by_rowid(dst.row_id, &dst_key); + if (UNLIKELY(!s)) { + LOG(WARNING) << "failed to get dst key: |" << dst.rowset_id << "|" << dst.segment_id + << "|" << dst.row_id << " status: " << s; + DCHECK(false); + return s; + } + + VLOG_DEBUG << "check_rowid_conversion, src: |" << src.rowset_id << "|" << src.segment_id + << "|" << src.row_id << "|" << src_key << " dst: |" << dst.rowset_id << "|" + << dst.segment_id << "|" << dst.row_id << "|" << dst_key; + if (UNLIKELY(src_key.compare(dst_key) != 0)) { + LOG(WARNING) << "failed to check key, src key: |" << src.rowset_id << "|" + << src.segment_id << "|" << src.row_id << "|" << src_key + << " dst key: |" << dst.rowset_id << "|" << dst.segment_id << "|" + << dst.row_id << "|" << dst_key; + DCHECK(false); + return Status::InternalError("failed to check rowid conversion"); + } + } + } + return Status::OK(); +} + RowsetIdUnorderedSet Tablet::all_rs_id(int64_t max_version) const { RowsetIdUnorderedSet rowset_ids; for (const auto& rs_it : _rs_version_map) { diff --git a/be/src/olap/tablet.h b/be/src/olap/tablet.h index a9293bfd54..e439427661 100644 --- a/be/src/olap/tablet.h +++ b/be/src/olap/tablet.h @@ -356,8 +356,13 @@ public: void calc_compaction_output_rowset_delete_bitmap( const std::vector<RowsetSharedPtr>& input_rowsets, const RowIdConversion& rowid_conversion, uint64_t start_version, uint64_t end_version, + std::map<RowsetSharedPtr, std::list<std::pair<RowLocation, RowLocation>>>* location_map, DeleteBitmap* output_rowset_delete_bitmap); void merge_delete_bitmap(const DeleteBitmap& delete_bitmap); + Status check_rowid_conversion( + RowsetSharedPtr dst_rowset, + const std::map<RowsetSharedPtr, std::list<std::pair<RowLocation, RowLocation>>>& + location_map); RowsetIdUnorderedSet all_rs_id(int64_t max_version) const; void remove_self_owned_remote_rowsets(); --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org