This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch branch-1.2-lts
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-1.2-lts by this push:
     new f78ca00064 [Enchancement](merge-on-write) check the correctness of 
rowid conversion after compaction (#16689) (#17006)
f78ca00064 is described below

commit f78ca0006489a2e7d43eb8b9408debc705d34554
Author: Xin Liao <liaoxin...@126.com>
AuthorDate: Wed Feb 22 19:15:52 2023 +0800

    [Enchancement](merge-on-write) check the correctness of rowid conversion 
after compaction (#16689) (#17006)
    
    MoW updates the delete bitmap of the imported data during the compaction by 
rowid conversion.
    The correctness of rowid conversion is very important to the result of 
delete bitmap.
    So I add a rowid conversion result check.
    cherry-pick from: #16689
---
 be/src/olap/compaction.cpp                | 12 ++++--
 be/src/olap/primary_key_index.cpp         |  2 +-
 be/src/olap/rowset/segment_v2/segment.cpp | 16 ++++++++
 be/src/olap/rowset/segment_v2/segment.h   |  2 +
 be/src/olap/tablet.cpp                    | 65 ++++++++++++++++++++++++++++++-
 be/src/olap/tablet.h                      |  5 +++
 6 files changed, 96 insertions(+), 6 deletions(-)

diff --git a/be/src/olap/compaction.cpp b/be/src/olap/compaction.cpp
index e56738b3e1..6441749536 100644
--- a/be/src/olap/compaction.cpp
+++ b/be/src/olap/compaction.cpp
@@ -439,22 +439,26 @@ Status Compaction::modify_rowsets() {
         _tablet->enable_unique_key_merge_on_write()) {
         Version version = _tablet->max_version();
         DeleteBitmap output_rowset_delete_bitmap(_tablet->tablet_id());
+        std::map<RowsetSharedPtr, std::list<std::pair<RowLocation, 
RowLocation>>> location_map;
         // Convert the delete bitmap of the input rowsets to output rowset.
         // New loads are not blocked, so some keys of input rowsets might
         // be deleted during the time. We need to deal with delete bitmap
         // of incremental data later.
         _tablet->calc_compaction_output_rowset_delete_bitmap(_input_rowsets, 
_rowid_conversion, 0,
-                                                             version.second + 
1,
+                                                             version.second + 
1, &location_map,
                                                              
&output_rowset_delete_bitmap);
+        RETURN_IF_ERROR(_tablet->check_rowid_conversion(_output_rowset, 
location_map));
+        location_map.clear();
         {
             std::lock_guard<std::mutex> 
wrlock_(_tablet->get_rowset_update_lock());
             std::lock_guard<std::shared_mutex> 
wrlock(_tablet->get_header_lock());
 
             // Convert the delete bitmap of the input rowsets to output rowset 
for
             // incremental data.
-            
_tablet->calc_compaction_output_rowset_delete_bitmap(_input_rowsets, 
_rowid_conversion,
-                                                                 
version.second, UINT64_MAX,
-                                                                 
&output_rowset_delete_bitmap);
+            _tablet->calc_compaction_output_rowset_delete_bitmap(
+                    _input_rowsets, _rowid_conversion, version.second, 
UINT64_MAX, &location_map,
+                    &output_rowset_delete_bitmap);
+            RETURN_IF_ERROR(_tablet->check_rowid_conversion(_output_rowset, 
location_map));
 
             _tablet->merge_delete_bitmap(output_rowset_delete_bitmap);
             RETURN_NOT_OK(_tablet->modify_rowsets(output_rowsets, 
_input_rowsets, true));
diff --git a/be/src/olap/primary_key_index.cpp 
b/be/src/olap/primary_key_index.cpp
index 6e2c3d954d..7c2c5fe16a 100644
--- a/be/src/olap/primary_key_index.cpp
+++ b/be/src/olap/primary_key_index.cpp
@@ -27,7 +27,7 @@ Status PrimaryKeyIndexBuilder::init() {
     // TODO(liaoxin) using the column type directly if there's only one column 
in unique key columns
     const auto* type_info = get_scalar_type_info<OLAP_FIELD_TYPE_VARCHAR>();
     segment_v2::IndexedColumnWriterOptions options;
-    options.write_ordinal_index = false;
+    options.write_ordinal_index = true;
     options.write_value_index = true;
     options.encoding = 
segment_v2::EncodingInfo::get_default_encoding(type_info, true);
     // TODO(liaoxin) test to confirm whether it needs to be compressed
diff --git a/be/src/olap/rowset/segment_v2/segment.cpp 
b/be/src/olap/rowset/segment_v2/segment.cpp
index 24af866655..d4442bc4b9 100644
--- a/be/src/olap/rowset/segment_v2/segment.cpp
+++ b/be/src/olap/rowset/segment_v2/segment.cpp
@@ -341,5 +341,21 @@ Status Segment::lookup_row_key(const Slice& key, 
RowLocation* row_location) {
     return Status::OK();
 }
 
+Status Segment::read_key_by_rowid(uint32_t row_id, std::string* key) {
+    RETURN_IF_ERROR(load_pk_index_and_bf());
+    std::unique_ptr<segment_v2::IndexedColumnIterator> iter;
+    RETURN_IF_ERROR(_pk_index_reader->new_iterator(&iter));
+
+    auto index_type = vectorized::DataTypeFactory::instance().create_data_type(
+            _pk_index_reader->type_info()->type(), 1, 0);
+    auto index_column = index_type->create_column();
+    RETURN_IF_ERROR(iter->seek_to_ordinal(row_id));
+    size_t num_read = 1;
+    RETURN_IF_ERROR(iter->next_batch(&num_read, index_column));
+    CHECK(num_read == 1);
+    *key = index_column->get_data_at(0).to_string();
+    return Status::OK();
+}
+
 } // namespace segment_v2
 } // namespace doris
diff --git a/be/src/olap/rowset/segment_v2/segment.h 
b/be/src/olap/rowset/segment_v2/segment.h
index b0dea5aeb8..c2dd12508e 100644
--- a/be/src/olap/rowset/segment_v2/segment.h
+++ b/be/src/olap/rowset/segment_v2/segment.h
@@ -93,6 +93,8 @@ public:
 
     Status lookup_row_key(const Slice& key, RowLocation* row_location);
 
+    Status read_key_by_rowid(uint32_t row_id, std::string* key);
+
     // only used by UT
     const SegmentFooterPB& footer() const { return _footer; }
 
diff --git a/be/src/olap/tablet.cpp b/be/src/olap/tablet.cpp
index 691658bb49..196d8f7cdd 100644
--- a/be/src/olap/tablet.cpp
+++ b/be/src/olap/tablet.cpp
@@ -2213,7 +2213,9 @@ Status Tablet::update_delete_bitmap(const 
RowsetSharedPtr& rowset, DeleteBitmapP
 
 void Tablet::calc_compaction_output_rowset_delete_bitmap(
         const std::vector<RowsetSharedPtr>& input_rowsets, const 
RowIdConversion& rowid_conversion,
-        uint64_t start_version, uint64_t end_version, DeleteBitmap* 
output_rowset_delete_bitmap) {
+        uint64_t start_version, uint64_t end_version,
+        std::map<RowsetSharedPtr, std::list<std::pair<RowLocation, 
RowLocation>>>* location_map,
+        DeleteBitmap* output_rowset_delete_bitmap) {
     RowLocation src;
     RowLocation dst;
     for (auto& rowset : input_rowsets) {
@@ -2242,6 +2244,7 @@ void Tablet::calc_compaction_output_rowset_delete_bitmap(
                                << " src location: |" << src.rowset_id << "|" 
<< src.segment_id
                                << "|" << src.row_id << " start version: " << 
start_version
                                << "end version" << end_version;
+                    (*location_map)[rowset].emplace_back(src, dst);
                     output_rowset_delete_bitmap->add({dst.rowset_id, 
dst.segment_id, cur_version},
                                                      dst.row_id);
                 }
@@ -2254,6 +2257,66 @@ void Tablet::merge_delete_bitmap(const DeleteBitmap& 
delete_bitmap) {
     _tablet_meta->delete_bitmap().merge(delete_bitmap);
 }
 
+Status Tablet::check_rowid_conversion(
+        RowsetSharedPtr dst_rowset,
+        const std::map<RowsetSharedPtr, std::list<std::pair<RowLocation, 
RowLocation>>>&
+                location_map) {
+    if (location_map.empty()) {
+        VLOG_DEBUG << "check_rowid_conversion, location_map is empty";
+        return Status::OK();
+    }
+    std::vector<segment_v2::SegmentSharedPtr> dst_segments;
+    _load_rowset_segments(dst_rowset, &dst_segments);
+    std::unordered_map<RowsetId, std::vector<segment_v2::SegmentSharedPtr>, 
HashOfRowsetId>
+            input_rowsets_segment;
+
+    VLOG_DEBUG << "check_rowid_conversion, dst_segments size: " << 
dst_segments.size();
+    for (auto [src_rowset, locations] : location_map) {
+        std::vector<segment_v2::SegmentSharedPtr>& segments =
+                input_rowsets_segment[src_rowset->rowset_id()];
+        if (segments.empty()) {
+            _load_rowset_segments(src_rowset, &segments);
+        }
+        for (auto& [src, dst] : locations) {
+            std::string src_key;
+            std::string dst_key;
+            Status s = segments[src.segment_id]->read_key_by_rowid(src.row_id, 
&src_key);
+            if (UNLIKELY(s.code() == TStatusCode::NOT_IMPLEMENTED_ERROR)) {
+                LOG(INFO) << "primary key index of old version does not "
+                             "support reading key by rowid";
+                break;
+            }
+            if (UNLIKELY(!s)) {
+                LOG(WARNING) << "failed to get src key: |" << src.rowset_id << 
"|" << src.segment_id
+                             << "|" << src.row_id << " status: " << s;
+                DCHECK(false);
+                return s;
+            }
+
+            s = dst_segments[dst.segment_id]->read_key_by_rowid(dst.row_id, 
&dst_key);
+            if (UNLIKELY(!s)) {
+                LOG(WARNING) << "failed to get dst key: |" << dst.rowset_id << 
"|" << dst.segment_id
+                             << "|" << dst.row_id << " status: " << s;
+                DCHECK(false);
+                return s;
+            }
+
+            VLOG_DEBUG << "check_rowid_conversion, src: |" << src.rowset_id << 
"|" << src.segment_id
+                       << "|" << src.row_id << "|" << src_key << " dst: |" << 
dst.rowset_id << "|"
+                       << dst.segment_id << "|" << dst.row_id << "|" << 
dst_key;
+            if (UNLIKELY(src_key.compare(dst_key) != 0)) {
+                LOG(WARNING) << "failed to check key, src key: |" << 
src.rowset_id << "|"
+                             << src.segment_id << "|" << src.row_id << "|" << 
src_key
+                             << " dst key: |" << dst.rowset_id << "|" << 
dst.segment_id << "|"
+                             << dst.row_id << "|" << dst_key;
+                DCHECK(false);
+                return Status::InternalError("failed to check rowid 
conversion");
+            }
+        }
+    }
+    return Status::OK();
+}
+
 RowsetIdUnorderedSet Tablet::all_rs_id(int64_t max_version) const {
     RowsetIdUnorderedSet rowset_ids;
     for (const auto& rs_it : _rs_version_map) {
diff --git a/be/src/olap/tablet.h b/be/src/olap/tablet.h
index a9293bfd54..e439427661 100644
--- a/be/src/olap/tablet.h
+++ b/be/src/olap/tablet.h
@@ -356,8 +356,13 @@ public:
     void calc_compaction_output_rowset_delete_bitmap(
             const std::vector<RowsetSharedPtr>& input_rowsets,
             const RowIdConversion& rowid_conversion, uint64_t start_version, 
uint64_t end_version,
+            std::map<RowsetSharedPtr, std::list<std::pair<RowLocation, 
RowLocation>>>* location_map,
             DeleteBitmap* output_rowset_delete_bitmap);
     void merge_delete_bitmap(const DeleteBitmap& delete_bitmap);
+    Status check_rowid_conversion(
+            RowsetSharedPtr dst_rowset,
+            const std::map<RowsetSharedPtr, std::list<std::pair<RowLocation, 
RowLocation>>>&
+                    location_map);
     RowsetIdUnorderedSet all_rs_id(int64_t max_version) const;
 
     void remove_self_owned_remote_rowsets();


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to