This is an automated email from the ASF dual-hosted git repository.

kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 26497c49a40394fec5ca664791784f75fba945b4
Author: abmdocrt <yukang.lian2...@gmail.com>
AuthorDate: Thu Jul 13 21:10:15 2023 +0800

    [Fix](MoW) Fix bug about caculating all committed rowsets delete bitmaps 
when do comapction (#21760)
---
 be/src/olap/compaction.cpp | 46 +++++++++++++++++++++++++++++++++++-----------
 be/src/olap/compaction.h   |  3 +++
 2 files changed, 38 insertions(+), 11 deletions(-)

diff --git a/be/src/olap/compaction.cpp b/be/src/olap/compaction.cpp
index ec4f6a6ff9..e4c0b59d8c 100644
--- a/be/src/olap/compaction.cpp
+++ b/be/src/olap/compaction.cpp
@@ -614,17 +614,26 @@ Status Compaction::modify_rowsets(const 
Merger::Statistics* stats) {
 
             // Step2: calculate all rowsets' delete bitmaps which are 
published during compaction.
             for (auto& it : commit_tablet_txn_info_vec) {
-                DeleteBitmap output_delete_bitmap(_tablet->tablet_id());
-                _tablet->calc_compaction_output_rowset_delete_bitmap(
-                        _input_rowsets, _rowid_conversion, 0, UINT64_MAX, 
&missed_rows,
-                        &location_map, *it.delete_bitmap.get(), 
&output_delete_bitmap);
-                it.delete_bitmap->merge(output_delete_bitmap);
-                // Step3: write back updated delete bitmap and tablet info.
-                it.rowset_ids.insert(_output_rowset->rowset_id());
-                
StorageEngine::instance()->txn_manager()->set_txn_related_delete_bitmap(
-                        it.partition_id, it.transaction_id, 
_tablet->tablet_id(),
-                        _tablet->schema_hash(), _tablet->tablet_uid(), true, 
it.delete_bitmap,
-                        it.rowset_ids);
+                if (!_check_if_includes_input_rowsets(it.rowset_ids)) {
+                    // When calculating the delete bitmap of all committed 
rowsets relative to the compaction,
+                    // there may be cases where the compacted rowsets are 
newer than the committed rowsets.
+                    // At this time, row number conversion cannot be 
performed, otherwise data will be missing.
+                    // Therefore, we need to check if every committed rowset 
has calculated delete bitmap for
+                    // all compaction input rowsets.
+                    continue;
+                } else {
+                    DeleteBitmap output_delete_bitmap(_tablet->tablet_id());
+                    _tablet->calc_compaction_output_rowset_delete_bitmap(
+                            _input_rowsets, _rowid_conversion, 0, UINT64_MAX, 
&missed_rows,
+                            &location_map, *it.delete_bitmap.get(), 
&output_delete_bitmap);
+                    it.delete_bitmap->merge(output_delete_bitmap);
+                    // Step3: write back updated delete bitmap and tablet info.
+                    it.rowset_ids.insert(_output_rowset->rowset_id());
+                    
StorageEngine::instance()->txn_manager()->set_txn_related_delete_bitmap(
+                            it.partition_id, it.transaction_id, 
_tablet->tablet_id(),
+                            _tablet->schema_hash(), _tablet->tablet_uid(), 
true, it.delete_bitmap,
+                            it.rowset_ids);
+                }
             }
 
             // Convert the delete bitmap of the input rowsets to output rowset 
for
@@ -668,6 +677,21 @@ Status Compaction::modify_rowsets(const 
Merger::Statistics* stats) {
     return Status::OK();
 }
 
+bool Compaction::_check_if_includes_input_rowsets(
+        const RowsetIdUnorderedSet& commit_rowset_ids_set) const {
+    std::vector<RowsetId> commit_rowset_ids {};
+    commit_rowset_ids.insert(commit_rowset_ids.end(), 
commit_rowset_ids_set.begin(),
+                             commit_rowset_ids_set.end());
+    std::sort(commit_rowset_ids.begin(), commit_rowset_ids.end());
+    std::vector<RowsetId> input_rowset_ids {};
+    for (const auto& rowset : _input_rowsets) {
+        input_rowset_ids.emplace_back(rowset->rowset_meta()->rowset_id());
+    }
+    std::sort(input_rowset_ids.begin(), input_rowset_ids.end());
+    return std::includes(commit_rowset_ids.begin(), commit_rowset_ids.end(),
+                         input_rowset_ids.begin(), input_rowset_ids.end());
+}
+
 void Compaction::gc_output_rowset() {
     if (_state != CompactionState::SUCCESS && _output_rowset != nullptr) {
         if (!_output_rowset->is_local()) {
diff --git a/be/src/olap/compaction.h b/be/src/olap/compaction.h
index bed9ae403f..853d970a48 100644
--- a/be/src/olap/compaction.h
+++ b/be/src/olap/compaction.h
@@ -94,6 +94,9 @@ protected:
 
     void init_profile(const std::string& label);
 
+private:
+    bool _check_if_includes_input_rowsets(const RowsetIdUnorderedSet& 
commit_rowset_ids_set) const;
+
 protected:
     // the root tracker for this compaction
     std::shared_ptr<MemTrackerLimiter> _mem_tracker;


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to