This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 0a5fd99d02 [feature-wip](unique-key-merge-on-write) speed up 
publish_txn (#11557)
0a5fd99d02 is described below

commit 0a5fd99d02a269d5709038ac493790060337553e
Author: yixiutt <102007456+yixi...@users.noreply.github.com>
AuthorDate: Mon Aug 8 18:57:55 2022 +0800

    [feature-wip](unique-key-merge-on-write) speed up publish_txn (#11557)
    
    In our origin design, we calc delete bitmap in publish txn, and this 
operation
    will cost too much time as it will load segment data and lookup row key in 
pre
    rowset and segments.And publish version task should run in order, so it'll 
lead
    to timeout in publish_txn.
    
    In this pr, we seperate delete_bitmap calculation to tow part, one of it 
will be
    done in flush mem table, so this work can run parallel. And we calc final
    delete_bitmap in publish_txn, get a rowset_id set that should be included 
and
    remove rowsets that has been compacted, the rowset difference between 
memtable_flush
    and publish_txn is really small so publish_txn become very fast.In our test,
    publish_txn cost about 10ms.
    
    Co-authored-by: yixiutt <yi...@selectdb.com>
---
 be/src/exec/olap_scanner.cpp              |   4 +-
 be/src/olap/delta_writer.cpp              |  15 ++-
 be/src/olap/delta_writer.h                |   4 +
 be/src/olap/memtable.cpp                  |  34 ++++-
 be/src/olap/memtable.h                    |   8 +-
 be/src/olap/olap_common.h                 |   2 +
 be/src/olap/rowset/beta_rowset.cpp        |  16 +++
 be/src/olap/rowset/beta_rowset.h          |   2 +
 be/src/olap/rowset/beta_rowset_writer.cpp |  53 +++++---
 be/src/olap/rowset/beta_rowset_writer.h   |   5 +
 be/src/olap/rowset/rowset_tree.cpp        |   9 +-
 be/src/olap/rowset/rowset_tree.h          |   4 +-
 be/src/olap/rowset/rowset_writer.h        |   5 +
 be/src/olap/tablet.cpp                    | 159 +++++++++++++++++++++++-
 be/src/olap/tablet.h                      |  32 ++++-
 be/src/olap/tablet_meta.h                 |   3 +-
 be/src/olap/txn_manager.cpp               | 198 +++++++++++-------------------
 be/src/olap/txn_manager.h                 |  29 ++++-
 be/test/olap/rowset/rowset_tree_test.cpp  |  35 ++++--
 be/test/olap/tablet_test.cpp              |  24 ++--
 20 files changed, 457 insertions(+), 184 deletions(-)

diff --git a/be/src/exec/olap_scanner.cpp b/be/src/exec/olap_scanner.cpp
index 161b1e0d3b..d440a8ceb7 100644
--- a/be/src/exec/olap_scanner.cpp
+++ b/be/src/exec/olap_scanner.cpp
@@ -260,7 +260,9 @@ Status OlapScanner::_init_tablet_reader_params(
         _tablet_reader_params.use_page_cache = true;
     }
 
-    _tablet_reader_params.delete_bitmap = 
&_tablet->tablet_meta()->delete_bitmap();
+    if (_tablet->enable_unique_key_merge_on_write()) {
+        _tablet_reader_params.delete_bitmap = 
&_tablet->tablet_meta()->delete_bitmap();
+    }
 
     return Status::OK();
 }
diff --git a/be/src/olap/delta_writer.cpp b/be/src/olap/delta_writer.cpp
index 58c914e6d4..1d1ec6c01d 100644
--- a/be/src/olap/delta_writer.cpp
+++ b/be/src/olap/delta_writer.cpp
@@ -138,6 +138,13 @@ Status DeltaWriter::init() {
     
RETURN_NOT_OK(_storage_engine->memtable_flush_executor()->create_flush_token(
             &_flush_token, _rowset_writer->type(), _req.is_high_priority));
 
+    // create delete bitmap and get rowset ids snapshot
+    if (_tablet->enable_unique_key_merge_on_write()) {
+        _delete_bitmap = std::make_shared<DeleteBitmap>(-1);
+        std::lock_guard<std::shared_mutex> lck(_tablet->get_header_lock());
+        _rowset_ids = _tablet->all_rs_id();
+    }
+
     _is_init = true;
     return Status::OK();
 }
@@ -283,7 +290,8 @@ Status DeltaWriter::wait_flush() {
 
 void DeltaWriter::_reset_mem_table() {
     _mem_table.reset(new MemTable(_tablet, _schema.get(), 
_tablet_schema.get(), _req.slots,
-                                  _req.tuple_desc, _rowset_writer.get(), 
_is_vec));
+                                  _req.tuple_desc, _rowset_writer.get(), 
_delete_bitmap,
+                                  _rowset_ids, _is_vec));
 }
 
 Status DeltaWriter::close() {
@@ -336,6 +344,11 @@ Status DeltaWriter::close_wait(const PSlaveTabletNodes& 
slave_tablet_nodes,
                      << " for rowset: " << _cur_rowset->rowset_id();
         return res;
     }
+    if (_tablet->enable_unique_key_merge_on_write()) {
+        _storage_engine->txn_manager()->set_txn_related_delete_bitmap(
+                _req.partition_id, _req.txn_id, _tablet->tablet_id(), 
_tablet->schema_hash(),
+                _tablet->tablet_uid(), true, _delete_bitmap, _rowset_ids);
+    }
 
     _delta_written_success = true;
 
diff --git a/be/src/olap/delta_writer.h b/be/src/olap/delta_writer.h
index f7194f8a4c..e433d8a668 100644
--- a/be/src/olap/delta_writer.h
+++ b/be/src/olap/delta_writer.h
@@ -162,6 +162,10 @@ private:
     std::unordered_set<int64_t> _unfinished_slave_node;
     PSuccessSlaveTabletNodeIds _success_slave_node_ids;
     std::shared_mutex _slave_node_lock;
+
+    DeleteBitmapPtr _delete_bitmap;
+    // current rowset_ids, used to do diff in publish_version
+    RowsetIdUnorderedSet _rowset_ids;
 };
 
 } // namespace doris
diff --git a/be/src/olap/memtable.cpp b/be/src/olap/memtable.cpp
index a9675fba7d..95b9277be3 100644
--- a/be/src/olap/memtable.cpp
+++ b/be/src/olap/memtable.cpp
@@ -19,6 +19,7 @@
 
 #include "common/logging.h"
 #include "olap/row.h"
+#include "olap/rowset/beta_rowset.h"
 #include "olap/rowset/rowset_writer.h"
 #include "olap/schema.h"
 #include "runtime/tuple.h"
@@ -31,7 +32,8 @@ namespace doris {
 
 MemTable::MemTable(TabletSharedPtr tablet, Schema* schema, const TabletSchema* 
tablet_schema,
                    const std::vector<SlotDescriptor*>* slot_descs, 
TupleDescriptor* tuple_desc,
-                   RowsetWriter* rowset_writer, bool support_vec)
+                   RowsetWriter* rowset_writer, DeleteBitmapPtr delete_bitmap,
+                   const RowsetIdUnorderedSet& rowset_ids, bool support_vec)
         : _tablet(std::move(tablet)),
           _schema(schema),
           _tablet_schema(tablet_schema),
@@ -46,7 +48,9 @@ MemTable::MemTable(TabletSharedPtr tablet, Schema* schema, 
const TabletSchema* t
           _agg_functions(schema->num_columns()),
           _offsets_of_aggregate_states(schema->num_columns()),
           _total_size_of_aggregate_states(0),
-          _mem_usage(0) {
+          _mem_usage(0),
+          _delete_bitmap(delete_bitmap),
+          _rowset_ids(rowset_ids) {
     if (support_vec) {
         _skip_list = nullptr;
         _vec_row_comparator = std::make_shared<RowInBlockComparator>(_schema);
@@ -293,10 +297,7 @@ void MemTable::_replace_row(const ContiguousRow& src_row, 
TableKey row_in_skipli
         auto dst_cell = dst_row.cell(cid);
         auto src_cell = src_row.cell(cid);
         auto column = _schema->column(cid);
-        // Dest cell already allocated memory, use dirct_copy rather than 
deep_copy(which will
-        // allocate memory for dst_cell). If dst_cell's size is smaller than 
src_cell, direct_copy
-        // will reallocate the memory to fit the src_cell's data.
-        column->direct_copy(&dst_cell, src_cell);
+        column->deep_copy(&dst_cell, src_cell, _table_mem_pool.get());
     }
 }
 
@@ -397,15 +398,36 @@ bool MemTable::need_to_agg() {
                                              : memory_usage() >= 
config::memtable_max_buffer_size;
 }
 
+Status MemTable::_generate_delete_bitmap() {
+    // generate delete bitmap, build a tmp rowset and load recent segment
+    if (_tablet->enable_unique_key_merge_on_write()) {
+        auto rowset = _rowset_writer->build_tmp();
+        auto beta_rowset = reinterpret_cast<BetaRowset*>(rowset.get());
+        std::vector<segment_v2::SegmentSharedPtr> segments;
+        segment_v2::SegmentSharedPtr segment;
+        if (beta_rowset->num_segments() == 0) {
+            return Status::OK();
+        }
+        RETURN_IF_ERROR(beta_rowset->load_segment(beta_rowset->num_segments() 
- 1, &segment));
+        segments.push_back(segment);
+        std::lock_guard<std::shared_mutex> 
meta_wrlock(_tablet->get_header_lock());
+        RETURN_IF_ERROR(_tablet->calc_delete_bitmap(beta_rowset->rowset_id(), 
segments,
+                                                    &_rowset_ids, 
_delete_bitmap));
+    }
+    return Status::OK();
+}
+
 Status MemTable::flush() {
     VLOG_CRITICAL << "begin to flush memtable for tablet: " << tablet_id()
                   << ", memsize: " << memory_usage() << ", rows: " << _rows;
     int64_t duration_ns = 0;
     RETURN_NOT_OK(_do_flush(duration_ns));
+    RETURN_NOT_OK(_generate_delete_bitmap());
     DorisMetrics::instance()->memtable_flush_total->increment(1);
     
DorisMetrics::instance()->memtable_flush_duration_us->increment(duration_ns / 
1000);
     VLOG_CRITICAL << "after flush memtable for tablet: " << tablet_id()
                   << ", flushsize: " << _flush_size;
+
     return Status::OK();
 }
 
diff --git a/be/src/olap/memtable.h b/be/src/olap/memtable.h
index 0a18f51753..8ab9d1614f 100644
--- a/be/src/olap/memtable.h
+++ b/be/src/olap/memtable.h
@@ -43,7 +43,8 @@ class MemTable {
 public:
     MemTable(TabletSharedPtr tablet, Schema* schema, const TabletSchema* 
tablet_schema,
              const std::vector<SlotDescriptor*>* slot_descs, TupleDescriptor* 
tuple_desc,
-             RowsetWriter* rowset_writer, bool support_vec = false);
+             RowsetWriter* rowset_writer, DeleteBitmapPtr delete_bitmap,
+             const RowsetIdUnorderedSet& rowset_ids, bool support_vec = false);
     ~MemTable();
 
     int64_t tablet_id() const { return _tablet->tablet_id(); }
@@ -141,6 +142,8 @@ private:
     void _insert_one_row_from_block(RowInBlock* row_in_block);
     void _aggregate_two_row_in_block(RowInBlock* new_row, RowInBlock* 
row_in_skiplist);
 
+    Status _generate_delete_bitmap();
+
 private:
     TabletSharedPtr _tablet;
     Schema* _schema;
@@ -203,6 +206,9 @@ private:
     size_t _total_size_of_aggregate_states;
     std::vector<RowInBlock*> _row_in_blocks;
     size_t _mem_usage;
+
+    DeleteBitmapPtr _delete_bitmap;
+    RowsetIdUnorderedSet _rowset_ids;
 }; // class MemTable
 
 inline std::ostream& operator<<(std::ostream& os, const MemTable& table) {
diff --git a/be/src/olap/olap_common.h b/be/src/olap/olap_common.h
index f665de32a4..160cf4121c 100644
--- a/be/src/olap/olap_common.h
+++ b/be/src/olap/olap_common.h
@@ -430,4 +430,6 @@ struct HashOfRowsetId {
     }
 };
 
+using RowsetIdUnorderedSet = std::unordered_set<RowsetId, HashOfRowsetId>;
+
 } // namespace doris
diff --git a/be/src/olap/rowset/beta_rowset.cpp 
b/be/src/olap/rowset/beta_rowset.cpp
index 07e321ed0e..24801e4c7f 100644
--- a/be/src/olap/rowset/beta_rowset.cpp
+++ b/be/src/olap/rowset/beta_rowset.cpp
@@ -95,6 +95,22 @@ Status 
BetaRowset::load_segments(std::vector<segment_v2::SegmentSharedPtr>* segm
     return Status::OK();
 }
 
+Status BetaRowset::load_segment(int64_t seg_id, segment_v2::SegmentSharedPtr* 
segment) {
+    DCHECK(seg_id >= 0);
+    auto fs = _rowset_meta->fs();
+    if (!fs || _schema == nullptr) {
+        return Status::OLAPInternalError(OLAP_ERR_INIT_FAILED);
+    }
+    auto seg_path = segment_file_path(seg_id);
+    auto s = segment_v2::Segment::open(fs, seg_path, seg_id, _schema, segment);
+    if (!s.ok()) {
+        LOG(WARNING) << "failed to open segment. " << seg_path << " under 
rowset " << unique_id()
+                     << " : " << s.to_string();
+        return Status::OLAPInternalError(OLAP_ERR_ROWSET_LOAD_FAILED);
+    }
+    return Status::OK();
+}
+
 Status BetaRowset::create_reader(RowsetReaderSharedPtr* result) {
     // NOTE: We use std::static_pointer_cast for performance
     result->reset(new 
BetaRowsetReader(std::static_pointer_cast<BetaRowset>(shared_from_this())));
diff --git a/be/src/olap/rowset/beta_rowset.h b/be/src/olap/rowset/beta_rowset.h
index cc098da20f..e36538a413 100644
--- a/be/src/olap/rowset/beta_rowset.h
+++ b/be/src/olap/rowset/beta_rowset.h
@@ -76,6 +76,8 @@ public:
 
     Status load_segments(std::vector<segment_v2::SegmentSharedPtr>* segments);
 
+    Status load_segment(int64_t seg_id, segment_v2::SegmentSharedPtr* segment);
+
 protected:
     BetaRowset(TabletSchemaSPtr schema, const std::string& tablet_path,
                RowsetMetaSharedPtr rowset_meta);
diff --git a/be/src/olap/rowset/beta_rowset_writer.cpp 
b/be/src/olap/rowset/beta_rowset_writer.cpp
index 491eed055d..851901adb0 100644
--- a/be/src/olap/rowset/beta_rowset_writer.cpp
+++ b/be/src/olap/rowset/beta_rowset_writer.cpp
@@ -235,23 +235,7 @@ RowsetSharedPtr BetaRowsetWriter::build() {
     // When building a rowset, we must ensure that the current _segment_writer 
has been
     // flushed, that is, the current _segment_writer is nullptr
     DCHECK(_segment_writer == nullptr) << "segment must be null when build 
rowset";
-    _rowset_meta->set_num_rows(_num_rows_written);
-    _rowset_meta->set_total_disk_size(_total_data_size);
-    _rowset_meta->set_data_disk_size(_total_data_size);
-    _rowset_meta->set_index_disk_size(_total_index_size);
-    // TODO write zonemap to meta
-    _rowset_meta->set_empty(_num_rows_written == 0);
-    _rowset_meta->set_creation_time(time(nullptr));
-    _rowset_meta->set_num_segments(_num_segment);
-    if (_num_segment <= 1) {
-        _rowset_meta->set_segments_overlap(NONOVERLAPPING);
-    }
-    if (_is_pending) {
-        _rowset_meta->set_rowset_state(COMMITTED);
-    } else {
-        _rowset_meta->set_rowset_state(VISIBLE);
-    }
-    _rowset_meta->set_segments_key_bounds(_segments_encoded_key_bounds);
+    _build_rowset_meta(_rowset_meta);
 
     if (_rowset_meta->oldest_write_timestamp() == -1) {
         _rowset_meta->set_oldest_write_timestamp(UnixSeconds());
@@ -272,6 +256,41 @@ RowsetSharedPtr BetaRowsetWriter::build() {
     return rowset;
 }
 
+void BetaRowsetWriter::_build_rowset_meta(std::shared_ptr<RowsetMeta> 
rowset_meta) {
+    rowset_meta->set_num_rows(_num_rows_written);
+    rowset_meta->set_total_disk_size(_total_data_size);
+    rowset_meta->set_data_disk_size(_total_data_size);
+    rowset_meta->set_index_disk_size(_total_index_size);
+    // TODO write zonemap to meta
+    rowset_meta->set_empty(_num_rows_written == 0);
+    rowset_meta->set_creation_time(time(nullptr));
+    rowset_meta->set_num_segments(_num_segment);
+    if (_num_segment <= 1) {
+        rowset_meta->set_segments_overlap(NONOVERLAPPING);
+    }
+    if (_is_pending) {
+        rowset_meta->set_rowset_state(COMMITTED);
+    } else {
+        rowset_meta->set_rowset_state(VISIBLE);
+    }
+    rowset_meta->set_segments_key_bounds(_segments_encoded_key_bounds);
+}
+
+RowsetSharedPtr BetaRowsetWriter::build_tmp() {
+    std::shared_ptr<RowsetMeta> rowset_meta_ = std::make_shared<RowsetMeta>();
+    *rowset_meta_ = *_rowset_meta;
+    _build_rowset_meta(rowset_meta_);
+
+    RowsetSharedPtr rowset;
+    auto status = RowsetFactory::create_rowset(_context.tablet_schema, 
_context.tablet_path,
+                                               rowset_meta_, &rowset);
+    if (!status.ok()) {
+        LOG(WARNING) << "rowset init failed when build new rowset, res=" << 
status;
+        return nullptr;
+    }
+    return rowset;
+}
+
 Status BetaRowsetWriter::_create_segment_writer(
         std::unique_ptr<segment_v2::SegmentWriter>* writer) {
     auto path = BetaRowset::local_segment_path(_context.tablet_path, 
_context.rowset_id,
diff --git a/be/src/olap/rowset/beta_rowset_writer.h 
b/be/src/olap/rowset/beta_rowset_writer.h
index e046f3d101..c3833f65d8 100644
--- a/be/src/olap/rowset/beta_rowset_writer.h
+++ b/be/src/olap/rowset/beta_rowset_writer.h
@@ -57,6 +57,10 @@ public:
 
     RowsetSharedPtr build() override;
 
+    // build a tmp rowset for load segment to calc delete_bitmap
+    // for this segment
+    RowsetSharedPtr build_tmp() override;
+
     Version version() override { return _context.version; }
 
     int64_t num_rows() const override { return _num_rows_written; }
@@ -79,6 +83,7 @@ private:
     Status _create_segment_writer(std::unique_ptr<segment_v2::SegmentWriter>* 
writer);
 
     Status _flush_segment_writer(std::unique_ptr<segment_v2::SegmentWriter>* 
writer);
+    void _build_rowset_meta(std::shared_ptr<RowsetMeta> rowset_meta);
 
 private:
     RowsetWriterContext _context;
diff --git a/be/src/olap/rowset/rowset_tree.cpp 
b/be/src/olap/rowset/rowset_tree.cpp
index d7cd879e28..d66a25e4f4 100644
--- a/be/src/olap/rowset/rowset_tree.cpp
+++ b/be/src/olap/rowset/rowset_tree.cpp
@@ -25,6 +25,7 @@
 
 #include <cstddef>
 #include <functional>
+#include <iterator>
 #include <memory>
 #include <ostream>
 #include <string>
@@ -196,8 +197,10 @@ void RowsetTree::FindRowsetsIntersectingInterval(
 }
 
 void RowsetTree::FindRowsetsWithKeyInRange(
-        const Slice& encoded_key, vector<std::pair<RowsetSharedPtr, int32_t>>* 
rowsets) const {
+        const Slice& encoded_key, const RowsetIdUnorderedSet* rowset_ids,
+        vector<std::pair<RowsetSharedPtr, int32_t>>* rowsets) const {
     DCHECK(initted_);
+    DCHECK(rowset_ids != nullptr);
 
     // Query the interval tree to efficiently find rowsets with known bounds
     // whose ranges overlap the probe key.
@@ -206,7 +209,9 @@ void RowsetTree::FindRowsetsWithKeyInRange(
     tree_->FindContainingPoint(encoded_key, &from_tree);
     rowsets->reserve(rowsets->size() + from_tree.size());
     for (RowsetWithBounds* rs : from_tree) {
-        rowsets->emplace_back(rs->rowset, rs->segment_id);
+        if (rowset_ids->find(rs->rowset->rowset_id()) != rowset_ids->end()) {
+            rowsets->emplace_back(rs->rowset, rs->segment_id);
+        }
     }
 }
 
diff --git a/be/src/olap/rowset/rowset_tree.h b/be/src/olap/rowset/rowset_tree.h
index 92503dbf50..8228bc26d7 100644
--- a/be/src/olap/rowset/rowset_tree.h
+++ b/be/src/olap/rowset/rowset_tree.h
@@ -72,11 +72,11 @@ public:
     Status Init(const RowsetVector& rowsets);
     ~RowsetTree();
 
-    // Return all Rowsets whose range may contain the given encoded key.
+    // Return Rowsets whose id in rowset_ids and range may contain the given 
encoded key.
     //
     // The returned pointers are guaranteed to be valid at least until this
     // RowsetTree object is Reset().
-    void FindRowsetsWithKeyInRange(const Slice& encoded_key,
+    void FindRowsetsWithKeyInRange(const Slice& encoded_key, const 
RowsetIdUnorderedSet* rowset_ids,
                                    vector<std::pair<RowsetSharedPtr, 
int32_t>>* rowsets) const;
 
     // Call 'cb(rowset, index)' for each (rowset, index) pair such that
diff --git a/be/src/olap/rowset/rowset_writer.h 
b/be/src/olap/rowset/rowset_writer.h
index 114baae9f8..325e996fe6 100644
--- a/be/src/olap/rowset/rowset_writer.h
+++ b/be/src/olap/rowset/rowset_writer.h
@@ -68,6 +68,11 @@ public:
     // return nullptr when failed
     virtual RowsetSharedPtr build() = 0;
 
+    // we have to load segment data to build delete_bitmap for current segment,
+    // so we  build a tmp rowset ptr to load segment data.
+    // real build will be called in DeltaWriter close_wait.
+    virtual RowsetSharedPtr build_tmp() = 0;
+
     virtual Version version() = 0;
 
     virtual int64_t num_rows() const = 0;
diff --git a/be/src/olap/tablet.cpp b/be/src/olap/tablet.cpp
index 811998ea3f..86ff3dd2d9 100644
--- a/be/src/olap/tablet.cpp
+++ b/be/src/olap/tablet.cpp
@@ -1876,10 +1876,10 @@ TabletSchemaSPtr Tablet::tablet_schema() const {
     return rowset_meta->tablet_schema();
 }
 
-Status Tablet::lookup_row_key(const Slice& encoded_key, RowLocation* 
row_location,
-                              uint32_t version) {
+Status Tablet::lookup_row_key(const Slice& encoded_key, const 
RowsetIdUnorderedSet* rowset_ids,
+                              RowLocation* row_location, uint32_t version) {
     std::vector<std::pair<RowsetSharedPtr, int32_t>> selected_rs;
-    _rowset_tree->FindRowsetsWithKeyInRange(encoded_key, &selected_rs);
+    _rowset_tree->FindRowsetsWithKeyInRange(encoded_key, rowset_ids, 
&selected_rs);
     if (selected_rs.empty()) {
         return Status::NotFound("No rowsets contains the key in key range");
     }
@@ -1919,6 +1919,159 @@ Status Tablet::lookup_row_key(const Slice& encoded_key, 
RowLocation* row_locatio
     return Status::NotFound("can't find key in all rowsets");
 }
 
+// load segment may do io so it should out lock
+Status Tablet::_load_rowset_segments(const RowsetSharedPtr& rowset,
+                                     
std::vector<segment_v2::SegmentSharedPtr>* segments) {
+    auto beta_rowset = reinterpret_cast<BetaRowset*>(rowset.get());
+    RETURN_IF_ERROR(beta_rowset->load_segments(segments));
+    return Status::OK();
+}
+
+// caller should hold meta_lock
+Status Tablet::calc_delete_bitmap(RowsetId rowset_id,
+                                  const 
std::vector<segment_v2::SegmentSharedPtr>& segments,
+                                  const RowsetIdUnorderedSet* 
specified_rowset_ids,
+                                  DeleteBitmapPtr delete_bitmap, bool 
check_pre_segments) {
+    std::vector<segment_v2::SegmentSharedPtr> pre_segments;
+    OlapStopWatch watch;
+    int64_t end_version = max_version_unlocked().second;
+    Version dummy_version(end_version + 1, end_version + 1);
+    for (auto& seg : segments) {
+        seg->load_index(); // We need index blocks to iterate
+        auto pk_idx = seg->get_primary_key_index();
+        int cnt = 0;
+        int total = pk_idx->num_rows();
+        int32_t remaining = total;
+        bool exact_match = false;
+        std::string last_key;
+        int batch_size = 1024;
+        MemPool pool;
+        while (remaining > 0) {
+            std::unique_ptr<segment_v2::IndexedColumnIterator> iter;
+            RETURN_IF_ERROR(pk_idx->new_iterator(&iter));
+
+            size_t num_to_read = std::min(batch_size, remaining);
+            std::unique_ptr<ColumnVectorBatch> cvb;
+            RETURN_IF_ERROR(ColumnVectorBatch::create(num_to_read, false, 
pk_idx->type_info(),
+                                                      nullptr, &cvb));
+            ColumnBlock block(cvb.get(), &pool);
+            ColumnBlockView column_block_view(&block);
+            Slice last_key_slice(last_key);
+            RETURN_IF_ERROR(iter->seek_at_or_after(&last_key_slice, 
&exact_match));
+
+            size_t num_read = num_to_read;
+            RETURN_IF_ERROR(iter->next_batch(&num_read, &column_block_view));
+            DCHECK(num_to_read == num_read);
+            last_key = (reinterpret_cast<const Slice*>(cvb->cell_ptr(num_read 
- 1)))->to_string();
+
+            // exclude last_key, last_key will be read in next batch.
+            if (num_read == batch_size && num_read != remaining) {
+                num_read -= 1;
+            }
+            for (size_t i = 0; i < num_read; i++) {
+                const Slice* key = reinterpret_cast<const 
Slice*>(cvb->cell_ptr(i));
+                // first check if exist in pre segment
+                if (check_pre_segments) {
+                    bool find = _check_pk_in_pre_segments(pre_segments, *key, 
dummy_version,
+                                                          delete_bitmap);
+                    if (find) {
+                        cnt++;
+                        continue;
+                    }
+                }
+                RowLocation loc;
+                auto st = lookup_row_key(*key, specified_rowset_ids, &loc, 
dummy_version.first - 1);
+                CHECK(st.ok() || st.is_not_found());
+                if (st.is_not_found()) continue;
+                ++cnt;
+                delete_bitmap->add({loc.rowset_id, loc.segment_id, 
dummy_version.first},
+                                   loc.row_id);
+            }
+            remaining -= num_read;
+        }
+        if (check_pre_segments) {
+            pre_segments.emplace_back(seg);
+        }
+    }
+    LOG(INFO) << "construct delete bitmap tablet: " << tablet_id() << " 
rowset: " << rowset_id
+              << " dummy_version: " << dummy_version << " cost: " << 
watch.get_elapse_time_us()
+              << "(us)";
+    return Status::OK();
+}
+
+bool Tablet::_check_pk_in_pre_segments(
+        const std::vector<segment_v2::SegmentSharedPtr>& pre_segments, const 
Slice& key,
+        const Version& version, DeleteBitmapPtr delete_bitmap) {
+    for (auto it = pre_segments.rbegin(); it != pre_segments.rend(); ++it) {
+        RowLocation loc;
+        auto st = (*it)->lookup_row_key(key, &loc);
+        CHECK(st.ok() || st.is_not_found());
+        if (st.is_not_found()) {
+            continue;
+        }
+        delete_bitmap->add({loc.rowset_id, loc.segment_id, version.first}, 
loc.row_id);
+        return true;
+    }
+    return false;
+}
+
+void Tablet::_rowset_ids_difference(const RowsetIdUnorderedSet& cur,
+                                    const RowsetIdUnorderedSet& pre, 
RowsetIdUnorderedSet* to_add,
+                                    RowsetIdUnorderedSet* to_del) {
+    for (const auto& id : cur) {
+        if (pre.find(id) == pre.end()) {
+            to_add->insert(id);
+        }
+    }
+    for (const auto& id : pre) {
+        if (cur.find(id) == cur.end()) {
+            to_del->insert(id);
+        }
+    }
+}
+
+Status Tablet::update_delete_bitmap(const RowsetSharedPtr& rowset, 
DeleteBitmapPtr delete_bitmap,
+                                    const RowsetIdUnorderedSet& 
pre_rowset_ids) {
+    RowsetIdUnorderedSet cur_rowset_ids;
+    RowsetIdUnorderedSet rowset_ids_to_add;
+    RowsetIdUnorderedSet rowset_ids_to_del;
+    int64_t cur_version = rowset->start_version();
+
+    std::vector<segment_v2::SegmentSharedPtr> segments;
+    _load_rowset_segments(rowset, &segments);
+
+    std::lock_guard<std::shared_mutex> meta_wrlock(_meta_lock);
+    cur_rowset_ids = all_rs_id();
+    _rowset_ids_difference(cur_rowset_ids, pre_rowset_ids, &rowset_ids_to_add, 
&rowset_ids_to_del);
+    LOG(INFO) << "rowset_ids_to_add: " << rowset_ids_to_add.size()
+              << ", rowset_ids_to_del: " << rowset_ids_to_del.size();
+    for (const auto& to_del : rowset_ids_to_del) {
+        delete_bitmap->remove({to_del, 0, 0}, {to_del, UINT32_MAX, INT64_MAX});
+    }
+    if (!rowset_ids_to_add.empty()) {
+        RETURN_IF_ERROR(calc_delete_bitmap(rowset->rowset_id(), segments, 
&rowset_ids_to_add,
+                                           delete_bitmap, true));
+    }
+
+    // update version
+    for (auto iter = delete_bitmap->delete_bitmap.begin();
+         iter != delete_bitmap->delete_bitmap.end(); ++iter) {
+        int ret = _tablet_meta->delete_bitmap().set(
+                {std::get<0>(iter->first), std::get<1>(iter->first), 
cur_version}, iter->second);
+        DCHECK(ret == 1);
+    }
+
+    return Status::OK();
+}
+
+RowsetIdUnorderedSet Tablet::all_rs_id() const {
+    RowsetIdUnorderedSet rowset_ids;
+    for (const auto& rs_it : _rs_version_map) {
+        rowset_ids.insert(rs_it.second->rowset_id());
+    }
+    return rowset_ids;
+}
+
 void Tablet::remove_self_owned_remote_rowsets() {
     DCHECK(_state == TABLET_SHUTDOWN);
     for (const auto& rs : _self_owned_remote_rowsets) {
diff --git a/be/src/olap/tablet.h b/be/src/olap/tablet.h
index 2fb368930d..5e814efee3 100644
--- a/be/src/olap/tablet.h
+++ b/be/src/olap/tablet.h
@@ -36,6 +36,7 @@
 #include "olap/rowset/rowset.h"
 #include "olap/rowset/rowset_reader.h"
 #include "olap/rowset/rowset_tree.h"
+#include "olap/rowset/segment_v2/segment.h"
 #include "olap/tablet_meta.h"
 #include "olap/tuple.h"
 #include "olap/utils.h"
@@ -93,6 +94,7 @@ public:
     size_t num_rows();
     int version_count() const;
     Version max_version() const;
+    Version max_version_unlocked() const;
     CumulativeCompactionPolicy* cumulative_compaction_policy();
     bool enable_unique_key_merge_on_write() const;
 
@@ -307,7 +309,23 @@ public:
     // Lookup the row location of `encoded_key`, the function sets 
`row_location` on success.
     // NOTE: the method only works in unique key model with primary key index, 
you will got a
     //       not supported error in other data model.
-    Status lookup_row_key(const Slice& encoded_key, RowLocation* row_location, 
uint32_t version);
+    Status lookup_row_key(const Slice& encoded_key, const 
RowsetIdUnorderedSet* rowset_ids,
+                          RowLocation* row_location, uint32_t version);
+
+    // calc delete bitmap when flush memtable, use a fake version to calc
+    // For example, cur max version is 5, and we use version 6 to calc but
+    // finally this rowset publish version with 8, we should make up data
+    // for rowset 6-7. Also, if a compaction happens between commit_txn and
+    // publish_txn, we should remove compaction input rowsets' delete_bitmap
+    // and build newly generated rowset's delete_bitmap
+    Status calc_delete_bitmap(RowsetId rowset_id,
+                              const std::vector<segment_v2::SegmentSharedPtr>& 
segments,
+                              const RowsetIdUnorderedSet* specified_rowset_ids,
+                              DeleteBitmapPtr delete_bitmap, bool 
check_pre_segments = false);
+
+    Status update_delete_bitmap(const RowsetSharedPtr& rowset, DeleteBitmapPtr 
delete_bitmap,
+                                const RowsetIdUnorderedSet& pre_rowset_ids);
+    RowsetIdUnorderedSet all_rs_id() const;
 
     void remove_self_owned_remote_rowsets();
 
@@ -347,6 +365,14 @@ private:
     bool _reconstruct_version_tracker_if_necessary();
     void _init_context_common_fields(RowsetWriterContext& context);
 
+    bool _check_pk_in_pre_segments(const 
std::vector<segment_v2::SegmentSharedPtr>& pre_segments,
+                                   const Slice& key, const Version& version,
+                                   DeleteBitmapPtr delete_bitmap);
+    void _rowset_ids_difference(const RowsetIdUnorderedSet& cur, const 
RowsetIdUnorderedSet& pre,
+                                RowsetIdUnorderedSet* to_add, 
RowsetIdUnorderedSet* to_del);
+    Status _load_rowset_segments(const RowsetSharedPtr& rowset,
+                                 std::vector<segment_v2::SegmentSharedPtr>* 
segments);
+
 public:
     static const int64_t K_INVALID_CUMULATIVE_POINT = -1;
 
@@ -495,6 +521,10 @@ inline Version Tablet::max_version() const {
     return _tablet_meta->max_version();
 }
 
+inline Version Tablet::max_version_unlocked() const {
+    return _tablet_meta->max_version();
+}
+
 inline KeysType Tablet::keys_type() const {
     return _schema->keys_type();
 }
diff --git a/be/src/olap/tablet_meta.h b/be/src/olap/tablet_meta.h
index 9e589e9fdd..22cd1d3254 100644
--- a/be/src/olap/tablet_meta.h
+++ b/be/src/olap/tablet_meta.h
@@ -70,6 +70,7 @@ class DataDir;
 class TabletMeta;
 class DeleteBitmap;
 using TabletMetaSharedPtr = std::shared_ptr<TabletMeta>;
+using DeleteBitmapPtr = std::shared_ptr<DeleteBitmap>;
 
 // Class encapsulates meta of tablet.
 // The concurrency control is handled in Tablet Class, not in this class.
@@ -337,7 +338,7 @@ public:
     /**
      * Sets the bitmap of specific segment, it's may be insertion or 
replacement
      *
-     * @return 0 if the insertion took place, 1 if the assignment took place
+     * @return 1 if the insertion took place, 0 if the assignment took place
      */
     int set(const BitmapKey& bmk, const roaring::Roaring& 
segment_delete_bitmap);
 
diff --git a/be/src/olap/txn_manager.cpp b/be/src/olap/txn_manager.cpp
index 252998446e..efb4185f46 100644
--- a/be/src/olap/txn_manager.cpp
+++ b/be/src/olap/txn_manager.cpp
@@ -168,6 +168,31 @@ Status TxnManager::prepare_txn(TPartitionId partition_id, 
TTransactionId transac
     return Status::OK();
 }
 
+void TxnManager::set_txn_related_delete_bitmap(TPartitionId partition_id,
+                                               TTransactionId transaction_id, 
TTabletId tablet_id,
+                                               SchemaHash schema_hash, 
TabletUid tablet_uid,
+                                               bool unique_key_merge_on_write,
+                                               DeleteBitmapPtr delete_bitmap,
+                                               const RowsetIdUnorderedSet& 
rowset_ids) {
+    pair<int64_t, int64_t> key(partition_id, transaction_id);
+    TabletInfo tablet_info(tablet_id, schema_hash, tablet_uid);
+
+    std::unique_lock<std::mutex> txn_lock(_get_txn_lock(transaction_id));
+    {
+        // get tx
+        std::shared_lock rdlock(_get_txn_map_lock(transaction_id));
+        txn_tablet_map_t& txn_tablet_map = _get_txn_tablet_map(transaction_id);
+        auto it = txn_tablet_map.find(key);
+        DCHECK(it != txn_tablet_map.end());
+        auto load_itr = it->second.find(tablet_info);
+        DCHECK(load_itr != it->second.end());
+        TabletTxnInfo& load_info = load_itr->second;
+        load_info.unique_key_merge_on_write = unique_key_merge_on_write;
+        load_info.delete_bitmap = delete_bitmap;
+        load_info.rowset_ids = rowset_ids;
+    }
+}
+
 Status TxnManager::commit_txn(OlapMeta* meta, TPartitionId partition_id,
                               TTransactionId transaction_id, TTabletId 
tablet_id,
                               SchemaHash schema_hash, TabletUid tablet_uid,
@@ -264,39 +289,59 @@ Status TxnManager::publish_txn(OlapMeta* meta, 
TPartitionId partition_id,
     pair<int64_t, int64_t> key(partition_id, transaction_id);
     TabletInfo tablet_info(tablet_id, schema_hash, tablet_uid);
     RowsetSharedPtr rowset_ptr = nullptr;
-    std::unique_lock<std::mutex> txn_lock(_get_txn_lock(transaction_id));
+    TabletTxnInfo* load_info = nullptr;
     {
-        std::shared_lock rlock(_get_txn_map_lock(transaction_id));
-        txn_tablet_map_t& txn_tablet_map = _get_txn_tablet_map(transaction_id);
-        auto it = txn_tablet_map.find(key);
-        if (it != txn_tablet_map.end()) {
-            auto load_itr = it->second.find(tablet_info);
-            if (load_itr != it->second.end()) {
-                // found load for txn,tablet
-                // case 1: user commit rowset, then the load id must be equal
-                TabletTxnInfo& load_info = load_itr->second;
-                rowset_ptr = load_info.rowset;
+        std::unique_lock<std::mutex> txn_lock(_get_txn_lock(transaction_id));
+        {
+            std::shared_lock rlock(_get_txn_map_lock(transaction_id));
+            txn_tablet_map_t& txn_tablet_map = 
_get_txn_tablet_map(transaction_id);
+            auto it = txn_tablet_map.find(key);
+            if (it != txn_tablet_map.end()) {
+                auto load_itr = it->second.find(tablet_info);
+                if (load_itr != it->second.end()) {
+                    // found load for txn,tablet
+                    // case 1: user commit rowset, then the load id must be 
equal
+                    load_info = &load_itr->second;
+                    rowset_ptr = load_info->rowset;
+                }
+            }
+        }
+        // save meta need access disk, it maybe very slow, so that it is not 
in global txn lock
+        // it is under a single txn lock
+        if (rowset_ptr != nullptr) {
+            // TODO(ygl): rowset is already set version here, memory is 
changed, if save failed
+            // it maybe a fatal error
+            rowset_ptr->make_visible(version);
+            Status save_status =
+                    RowsetMetaManager::save(meta, tablet_uid, 
rowset_ptr->rowset_id(),
+                                            
rowset_ptr->rowset_meta()->get_rowset_pb());
+            if (save_status != Status::OK()) {
+                LOG(WARNING) << "save committed rowset failed. when publish 
txn rowset_id:"
+                             << rowset_ptr->rowset_id() << ", tablet id: " << 
tablet_id
+                             << ", txn id:" << transaction_id;
+                return Status::OLAPInternalError(OLAP_ERR_ROWSET_SAVE_FAILED);
             }
+        } else {
+            return Status::OLAPInternalError(OLAP_ERR_TRANSACTION_NOT_EXIST);
         }
     }
-    // save meta need access disk, it maybe very slow, so that it is not in 
global txn lock
-    // it is under a single txn lock
-    if (rowset_ptr != nullptr) {
-        // TODO(ygl): rowset is already set version here, memory is changed, 
if save failed
-        // it maybe a fatal error
-        rowset_ptr->make_visible(version);
-        Status save_status = RowsetMetaManager::save(meta, tablet_uid, 
rowset_ptr->rowset_id(),
-                                                     
rowset_ptr->rowset_meta()->get_rowset_pb());
-        if (save_status != Status::OK()) {
-            LOG(WARNING) << "save committed rowset failed. when publish txn 
rowset_id:"
-                         << rowset_ptr->rowset_id() << ", tablet id: " << 
tablet_id
-                         << ", txn id:" << transaction_id;
-            return Status::OLAPInternalError(OLAP_ERR_ROWSET_SAVE_FAILED);
+    // update delete_bitmap
+    {
+        auto tablet = 
StorageEngine::instance()->tablet_manager()->get_tablet(tablet_id);
+#ifdef BE_TEST
+        if (tablet == nullptr) {
+            return Status::OK();
+        }
+#endif
+        if (load_info != nullptr && load_info->unique_key_merge_on_write) {
+            RETURN_IF_ERROR(tablet->update_delete_bitmap(rowset_ptr, 
load_info->delete_bitmap,
+                                                         
load_info->rowset_ids));
+            std::lock_guard<std::shared_mutex> 
wrlock(tablet->get_header_lock());
+            tablet->save_meta();
         }
-    } else {
-        return Status::OLAPInternalError(OLAP_ERR_TRANSACTION_NOT_EXIST);
     }
     {
+        std::unique_lock<std::mutex> txn_lock(_get_txn_lock(transaction_id));
         std::lock_guard<std::shared_mutex> 
wrlock(_get_txn_map_lock(transaction_id));
         txn_tablet_map_t& txn_tablet_map = _get_txn_tablet_map(transaction_id);
         auto it = txn_tablet_map.find(key);
@@ -313,110 +358,9 @@ Status TxnManager::publish_txn(OlapMeta* meta, 
TPartitionId partition_id,
             }
         }
     }
-    auto tablet = 
StorageEngine::instance()->tablet_manager()->get_tablet(tablet_id);
-#ifdef BE_TEST
-    if (tablet == nullptr) {
-        return Status::OK();
-    }
-#endif
-    // Check if have to build extra delete bitmap for table of UNIQUE_KEY model
-    if (!tablet->enable_unique_key_merge_on_write() ||
-        tablet->tablet_meta()->preferred_rowset_type() != 
RowsetTypePB::BETA_ROWSET ||
-        rowset_ptr->keys_type() != KeysType::UNIQUE_KEYS) {
-        return Status::OK();
-    }
-    CHECK(version.first == version.second) << "impossible: " << version;
-
-    // For each key in current set, check if it overwrites any previously
-    // written keys
-    OlapStopWatch watch;
-    std::vector<segment_v2::SegmentSharedPtr> segments;
-    std::vector<segment_v2::SegmentSharedPtr> pre_segments;
-    auto beta_rowset = reinterpret_cast<BetaRowset*>(rowset_ptr.get());
-    Status st = beta_rowset->load_segments(&segments);
-    if (!st.ok()) return st;
-    // lock tablet meta to modify delete bitmap
-    std::lock_guard<std::shared_mutex> meta_wrlock(tablet->get_header_lock());
-    for (auto& seg : segments) {
-        seg->load_index(); // We need index blocks to iterate
-        auto pk_idx = seg->get_primary_key_index();
-        int cnt = 0;
-        int total = pk_idx->num_rows();
-        int32_t remaining = total;
-        bool exact_match = false;
-        std::string last_key;
-        int batch_size = 1024;
-        MemPool pool;
-        while (remaining > 0) {
-            std::unique_ptr<segment_v2::IndexedColumnIterator> iter;
-            RETURN_IF_ERROR(pk_idx->new_iterator(&iter));
-
-            size_t num_to_read = std::min(batch_size, remaining);
-            std::unique_ptr<ColumnVectorBatch> cvb;
-            RETURN_IF_ERROR(ColumnVectorBatch::create(num_to_read, false, 
pk_idx->type_info(),
-                                                      nullptr, &cvb));
-            ColumnBlock block(cvb.get(), &pool);
-            ColumnBlockView column_block_view(&block);
-            Slice last_key_slice(last_key);
-            RETURN_IF_ERROR(iter->seek_at_or_after(&last_key_slice, 
&exact_match));
-
-            size_t num_read = num_to_read;
-            RETURN_IF_ERROR(iter->next_batch(&num_read, &column_block_view));
-            DCHECK(num_to_read == num_read);
-            last_key = (reinterpret_cast<const Slice*>(cvb->cell_ptr(num_read 
- 1)))->to_string();
-
-            // exclude last_key, last_key will be read in next batch.
-            if (num_read == batch_size && num_read != remaining) {
-                num_read -= 1;
-            }
-            for (size_t i = 0; i < num_read; i++) {
-                const Slice* key = reinterpret_cast<const 
Slice*>(cvb->cell_ptr(i));
-                // first check if exist in pre segment
-                bool find = _check_pk_in_pre_segments(pre_segments, *key, 
tablet, version);
-                if (find) {
-                    cnt++;
-                    continue;
-                }
-                RowLocation loc;
-                st = tablet->lookup_row_key(*key, &loc, version.first - 1);
-                CHECK(st.ok() || st.is_not_found());
-                if (st.is_not_found()) continue;
-                ++cnt;
-                // TODO: we can just set a bitmap onece we are done while 
iteration
-                tablet->tablet_meta()->delete_bitmap().add(
-                        {loc.rowset_id, loc.segment_id, version.first}, 
loc.row_id);
-            }
-            remaining -= num_read;
-        }
-
-        LOG(INFO) << "construct delete bitmap tablet: " << tablet->tablet_id()
-                  << " rowset: " << beta_rowset->rowset_id() << " segment: " 
<< seg->id()
-                  << " version: " << version << " delete: " << cnt << "/" << 
total;
-        pre_segments.emplace_back(seg);
-    }
-    tablet->save_meta();
-    LOG(INFO) << "finished to update delete bitmap, tablet: " << 
tablet->tablet_id()
-              << " version: " << version << ", elapse(us): " << 
watch.get_elapse_time_us();
     return Status::OK();
 }
 
-bool TxnManager::_check_pk_in_pre_segments(
-        const std::vector<segment_v2::SegmentSharedPtr>& pre_segments, const 
Slice& key,
-        TabletSharedPtr tablet, const Version& version) {
-    for (auto it = pre_segments.rbegin(); it != pre_segments.rend(); ++it) {
-        RowLocation loc;
-        auto st = (*it)->lookup_row_key(key, &loc);
-        CHECK(st.ok() || st.is_not_found());
-        if (st.is_not_found()) {
-            continue;
-        }
-        tablet->tablet_meta()->delete_bitmap().add({loc.rowset_id, 
loc.segment_id, version.first},
-                                                   loc.row_id);
-        return true;
-    }
-    return false;
-}
-
 // txn could be rollbacked if it does not have related rowset
 // if the txn has related rowset then could not rollback it, because it
 // may be committed in another thread and our current thread meets errors when 
writing to data file
diff --git a/be/src/olap/txn_manager.h b/be/src/olap/txn_manager.h
index db6af360dc..197307f589 100644
--- a/be/src/olap/txn_manager.h
+++ b/be/src/olap/txn_manager.h
@@ -44,6 +44,7 @@
 #include "olap/rowset/rowset_meta.h"
 #include "olap/rowset/segment_v2/segment.h"
 #include "olap/tablet.h"
+#include "olap/tablet_meta.h"
 #include "util/time.h"
 
 namespace doris {
@@ -52,10 +53,26 @@ class DeltaWriter;
 struct TabletTxnInfo {
     PUniqueId load_id;
     RowsetSharedPtr rowset;
+    bool unique_key_merge_on_write;
+    DeleteBitmapPtr delete_bitmap;
+    // records rowsets calc in commit txn
+    RowsetIdUnorderedSet rowset_ids;
     int64_t creation_time;
 
     TabletTxnInfo(PUniqueId load_id, RowsetSharedPtr rowset)
-            : load_id(load_id), rowset(rowset), creation_time(UnixSeconds()) {}
+            : load_id(load_id),
+              rowset(rowset),
+              unique_key_merge_on_write(false),
+              creation_time(UnixSeconds()) {}
+
+    TabletTxnInfo(PUniqueId load_id, RowsetSharedPtr rowset, bool 
merge_on_write,
+                  DeleteBitmapPtr delete_bitmap, const RowsetIdUnorderedSet& 
ids)
+            : load_id(load_id),
+              rowset(rowset),
+              unique_key_merge_on_write(merge_on_write),
+              delete_bitmap(delete_bitmap),
+              rowset_ids(ids),
+              creation_time(UnixSeconds()) {}
 
     TabletTxnInfo() {}
 };
@@ -146,6 +163,12 @@ public:
     void finish_slave_tablet_pull_rowset(int64_t transaction_id, int64_t 
tablet_id, int64_t node_id,
                                          bool is_succeed);
 
+    void set_txn_related_delete_bitmap(TPartitionId partition_id, 
TTransactionId transaction_id,
+                                       TTabletId tablet_id, SchemaHash 
schema_hash,
+                                       TabletUid tablet_uid, bool 
unique_key_merge_on_write,
+                                       DeleteBitmapPtr delete_bitmap,
+                                       const RowsetIdUnorderedSet& rowset_ids);
+
 private:
     using TxnKey = std::pair<int64_t, int64_t>; // partition_id, 
transaction_id;
 
@@ -188,10 +211,6 @@ private:
     void _insert_txn_partition_map_unlocked(int64_t transaction_id, int64_t 
partition_id);
     void _clear_txn_partition_map_unlocked(int64_t transaction_id, int64_t 
partition_id);
 
-    bool _check_pk_in_pre_segments(const 
std::vector<segment_v2::SegmentSharedPtr>& pre_segments,
-                                   const Slice& key, TabletSharedPtr tablet,
-                                   const Version& version);
-
 private:
     const int32_t _txn_map_shard_size;
 
diff --git a/be/test/olap/rowset/rowset_tree_test.cpp 
b/be/test/olap/rowset/rowset_tree_test.cpp
index de5dc45124..06ed7ea194 100644
--- a/be/test/olap/rowset/rowset_tree_test.cpp
+++ b/be/test/olap/rowset/rowset_tree_test.cpp
@@ -101,11 +101,23 @@ private:
 };
 
 TEST_F(TestRowsetTree, TestTree) {
+    RowsetIdUnorderedSet rowset_ids;
     RowsetVector vec;
-    vec.push_back(create_rowset("0", "5"));
-    vec.push_back(create_rowset("3", "5"));
-    vec.push_back(create_rowset("5", "9"));
-    vec.push_back(create_rowset("0", "0", true));
+    auto rowset1 = create_rowset("0", "5");
+    vec.push_back(rowset1);
+    rowset_ids.insert(rowset1->rowset_id());
+
+    auto rowset2 = create_rowset("3", "5");
+    vec.push_back(rowset2);
+    rowset_ids.insert(rowset2->rowset_id());
+
+    auto rowset3 = create_rowset("5", "9");
+    vec.push_back(rowset3);
+    rowset_ids.insert(rowset3->rowset_id());
+
+    auto rowset4 = create_rowset("0", "0", true);
+    vec.push_back(rowset4);
+    rowset_ids.insert(rowset4->rowset_id());
 
     RowsetTree tree;
     ASSERT_FALSE(tree.Init(vec).ok());
@@ -115,13 +127,13 @@ TEST_F(TestRowsetTree, TestTree) {
 
     // "2" overlaps 0-5
     vector<std::pair<RowsetSharedPtr, int32_t>> out;
-    tree.FindRowsetsWithKeyInRange("2", &out);
+    tree.FindRowsetsWithKeyInRange("2", &rowset_ids, &out);
     ASSERT_EQ(1, out.size());
     ASSERT_EQ(vec[0].get(), out[0].first.get());
 
     // "4" overlaps 0-5, 3-5
     out.clear();
-    tree.FindRowsetsWithKeyInRange("4", &out);
+    tree.FindRowsetsWithKeyInRange("4", &rowset_ids, &out);
     ASSERT_EQ(2, out.size());
     ASSERT_EQ(vec[0].get(), out[0].first.get());
     ASSERT_EQ(vec[1].get(), out[1].first.get());
@@ -149,14 +161,14 @@ TEST_F(TestRowsetTree, TestTree) {
 
     // "3" overlaps 0-5, 3-5
     out.clear();
-    tree.FindRowsetsWithKeyInRange("3", &out);
+    tree.FindRowsetsWithKeyInRange("3", &rowset_ids, &out);
     ASSERT_EQ(2, out.size());
     ASSERT_EQ(vec[0].get(), out[0].first.get());
     ASSERT_EQ(vec[1].get(), out[1].first.get());
 
     // "5" overlaps 0-5, 3-5, 5-9
     out.clear();
-    tree.FindRowsetsWithKeyInRange("5", &out);
+    tree.FindRowsetsWithKeyInRange("5", &rowset_ids, &out);
     ASSERT_EQ(3, out.size());
     ASSERT_EQ(vec[0].get(), out[0].first.get());
     ASSERT_EQ(vec[1].get(), out[1].first.get());
@@ -333,10 +345,15 @@ TEST_P(TestRowsetTreePerformance, TestPerformance) {
 
     MonotonicStopWatch one_at_time_timer;
     MonotonicStopWatch batch_timer;
+    RowsetIdUnorderedSet rowset_ids;
     for (int i = 0; i < kNumIterations; i++) {
+        rowset_ids.clear();
         // Create a bunch of rowsets, each of which spans about 10% of the 
"row space".
         // The row space here is 4-digit 0-padded numbers.
         RowsetVector vec = GenerateRandomRowsets(kNumRowsets);
+        for (auto rowset : vec) {
+            rowset_ids.insert(rowset->rowset_id());
+        }
 
         RowsetTree tree;
         ASSERT_TRUE(tree.Init(vec).ok());
@@ -353,7 +370,7 @@ TEST_P(TestRowsetTreePerformance, TestPerformance) {
             vector<std::pair<RowsetSharedPtr, int32_t>> out;
             for (const auto& q : queries) {
                 out.clear();
-                tree.FindRowsetsWithKeyInRange(Slice(q), &out);
+                tree.FindRowsetsWithKeyInRange(Slice(q), &rowset_ids, &out);
                 individual_matches += out.size();
             }
         }
diff --git a/be/test/olap/tablet_test.cpp b/be/test/olap/tablet_test.cpp
index 2156ca87cc..197a53666c 100644
--- a/be/test/olap/tablet_test.cpp
+++ b/be/test/olap/tablet_test.cpp
@@ -376,6 +376,7 @@ TEST_F(TestTablet, rowset_tree_update) {
     tschema.keys_type = TKeysType::UNIQUE_KEYS;
     TabletMetaSharedPtr tablet_meta = new_tablet_meta(tschema, true);
     TabletSharedPtr tablet(new Tablet(tablet_meta, nullptr));
+    RowsetIdUnorderedSet rowset_ids;
     tablet->init();
 
     RowsetMetaSharedPtr rsm1(new RowsetMeta());
@@ -386,6 +387,7 @@ TEST_F(TestTablet, rowset_tree_update) {
     RowsetSharedPtr rs_ptr1;
     MockRowset::create_rowset(tablet->tablet_schema(), "", rsm1, &rs_ptr1, 
false);
     tablet->add_inc_rowset(rs_ptr1);
+    rowset_ids.insert(id1);
 
     RowsetMetaSharedPtr rsm2(new RowsetMeta());
     init_rs_meta(rsm2, 8, 8, convert_key_bounds({{"500", "999"}}));
@@ -396,27 +398,33 @@ TEST_F(TestTablet, rowset_tree_update) {
     RowsetSharedPtr rs_ptr2;
     MockRowset::create_rowset(tablet->tablet_schema(), "", rsm2, &rs_ptr2, 
false);
     tablet->add_inc_rowset(rs_ptr2);
+    rowset_ids.insert(id2);
+
+    RowsetId id3;
+    id3.init(540081);
+    rowset_ids.insert(id3);
 
     RowLocation loc;
     // Key not in range.
-    ASSERT_TRUE(tablet->lookup_row_key("99", &loc, 7).is_not_found());
+    ASSERT_TRUE(tablet->lookup_row_key("99", &rowset_ids, &loc, 
7).is_not_found());
     // Version too low.
-    ASSERT_TRUE(tablet->lookup_row_key("101", &loc, 3).is_not_found());
+    ASSERT_TRUE(tablet->lookup_row_key("101", &rowset_ids, &loc, 
3).is_not_found());
     // Hit a segment, but since we don't have real data, return an internal 
error when loading the
     // segment.
-    ASSERT_TRUE(tablet->lookup_row_key("101", &loc, 7).precise_code() ==
+    LOG(INFO) << tablet->lookup_row_key("101", &rowset_ids, &loc, 
7).to_string();
+    ASSERT_TRUE(tablet->lookup_row_key("101", &rowset_ids, &loc, 
7).precise_code() ==
                 OLAP_ERR_ROWSET_LOAD_FAILED);
     // Key not in range.
-    ASSERT_TRUE(tablet->lookup_row_key("201", &loc, 7).is_not_found());
-    ASSERT_TRUE(tablet->lookup_row_key("300", &loc, 7).precise_code() ==
+    ASSERT_TRUE(tablet->lookup_row_key("201", &rowset_ids, &loc, 
7).is_not_found());
+    ASSERT_TRUE(tablet->lookup_row_key("300", &rowset_ids, &loc, 
7).precise_code() ==
                 OLAP_ERR_ROWSET_LOAD_FAILED);
     // Key not in range.
-    ASSERT_TRUE(tablet->lookup_row_key("499", &loc, 7).is_not_found());
+    ASSERT_TRUE(tablet->lookup_row_key("499", &rowset_ids, &loc, 
7).is_not_found());
     // Version too low.
-    ASSERT_TRUE(tablet->lookup_row_key("500", &loc, 7).is_not_found());
+    ASSERT_TRUE(tablet->lookup_row_key("500", &rowset_ids, &loc, 
7).is_not_found());
     // Hit a segment, but since we don't have real data, return an internal 
error when loading the
     // segment.
-    ASSERT_TRUE(tablet->lookup_row_key("500", &loc, 8).precise_code() ==
+    ASSERT_TRUE(tablet->lookup_row_key("500", &rowset_ids, &loc, 
8).precise_code() ==
                 OLAP_ERR_ROWSET_LOAD_FAILED);
 }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to