zhannngchen commented on code in PR #22566:
URL: https://github.com/apache/doris/pull/22566#discussion_r1288028685


##########
be/src/olap/delete_bitmap_calculator.cpp:
##########
@@ -83,116 +91,209 @@ Status 
MergeIndexDeleteBitmapCalculatorContext::_next_batch(size_t row_id) {
     return Status::OK();
 }
 
-bool MergeIndexDeleteBitmapCalculatorContext::Comparator::operator()(
-        MergeIndexDeleteBitmapCalculatorContext* lhs,
-        MergeIndexDeleteBitmapCalculatorContext* rhs) const {
+bool MergedPKIndexDeleteBitmapCalculatorContext::Comparator::operator()(
+        MergedPKIndexDeleteBitmapCalculatorContext* lhs,
+        MergedPKIndexDeleteBitmapCalculatorContext* rhs) const {
     // std::proiroty_queue is a max heap, and function should return the 
result of `lhs < rhs`
     // so if the result of the function is true, rhs will be popped before lhs
     Slice key1, key2;
-    RETURN_IF_ERROR(lhs->get_current_key(key1));
-    RETURN_IF_ERROR(rhs->get_current_key(key2));
-    if (_sequence_length == 0) {
-        auto cmp_result = key1.compare(key2);
-        // when key1 is the same as key2,
-        // we want the one with greater segment id to be popped first
-        return cmp_result ? (cmp_result > 0) : (lhs->segment_id() < 
rhs->segment_id());
-    }
-    // smaller key popped first
-    auto key1_without_seq = Slice(key1.get_data(), key1.get_size() - 
_sequence_length);
-    auto key2_without_seq = Slice(key2.get_data(), key2.get_size() - 
_sequence_length);
-    auto cmp_result = key1_without_seq.compare(key2_without_seq);
-    if (cmp_result != 0) {
-        return cmp_result > 0;
-    }
-    // greater sequence value popped first
-    auto key1_sequence_val =
-            Slice(key1.get_data() + key1.get_size() - _sequence_length, 
_sequence_length);
-    auto key2_sequence_val =
-            Slice(key2.get_data() + key2.get_size() - _sequence_length, 
_sequence_length);
-    cmp_result = key1_sequence_val.compare(key2_sequence_val);
-    if (cmp_result != 0) {
-        return cmp_result < 0;
-    }
-    // greater segment id popped first
+    Status st;
+    st = lhs->get_current_key(&key1);
+    CHECK(LIKELY(st.ok())) << fmt::format("Reading key1 but get st = {}", st);
+    st = rhs->get_current_key(&key2);
+    CHECK(LIKELY(st.ok())) << fmt::format("Reading key2 but get st = {}", st);
+    int key_cmp_result = compare_key(key1, key2);
+    if (LIKELY(key_cmp_result != 0)) {
+        return key_cmp_result > 0;
+    }
+    int seq_cmp_result = compare_seq(key1, key2);
+    if (LIKELY(seq_cmp_result != 0)) {
+        return seq_cmp_result < 0;
+    }
+    if (LIKELY(lhs->end_version() != rhs->end_version())) {
+        return lhs->end_version() < rhs->end_version();
+    }
     return lhs->segment_id() < rhs->segment_id();
 }
 
-bool MergeIndexDeleteBitmapCalculatorContext::Comparator::is_key_same(Slice 
const& lhs,
-                                                                      Slice 
const& rhs) const {
+int MergedPKIndexDeleteBitmapCalculatorContext::Comparator::compare_key(Slice 
const& lhs,
+                                                                        Slice 
const& rhs) const {
     auto lhs_without_seq = Slice(lhs.get_data(), lhs.get_size() - 
_sequence_length);
     auto rhs_without_seq = Slice(rhs.get_data(), rhs.get_size() - 
_sequence_length);
-    return lhs_without_seq.compare(rhs_without_seq) == 0;
+    return lhs_without_seq.compare(rhs_without_seq);
+}
+
+int MergedPKIndexDeleteBitmapCalculatorContext::Comparator::compare_seq(Slice 
const& lhs,
+                                                                        Slice 
const& rhs) const {
+    if (UNLIKELY(_sequence_length == 0)) {
+        return 0;
+    }
+    auto lhs_seq = Slice(lhs.get_data() + lhs.get_size() - _sequence_length, 
_sequence_length);
+    auto rhs_seq = Slice(rhs.get_data() + rhs.get_size() - _sequence_length, 
_sequence_length);
+    return lhs_seq.compare(rhs_seq);
+}
+
+bool MergedPKIndexDeleteBitmapCalculatorContext::Comparator::is_key_same(Slice 
const& lhs,
+                                                                         Slice 
const& rhs) const {
+    return compare_key(lhs, rhs) == 0;
 }
 
-Status MergeIndexDeleteBitmapCalculator::init(RowsetId rowset_id,
-                                              std::vector<SegmentSharedPtr> 
const& segments,
-                                              size_t seq_col_length, size_t 
max_batch_size) {
-    _rowset_id = rowset_id;
+Status MergedPKIndexDeleteBitmapCalculator::init(std::vector<SegmentSharedPtr> 
const& segments,
+                                                 const std::vector<int64_t>* 
end_versions,
+                                                 size_t num_base_segments, 
size_t seq_col_length,
+                                                 size_t max_batch_size) {
     _seq_col_length = seq_col_length;
-    _comparator = 
MergeIndexDeleteBitmapCalculatorContext::Comparator(seq_col_length);
-    _contexts.reserve(segments.size());
+    _comparator = 
MergedPKIndexDeleteBitmapCalculatorContext::Comparator(seq_col_length);
+    size_t num_segments = segments.size();
+    _contexts.reserve(num_segments);
     _heap = std::make_unique<Heap>(_comparator);
 
-    for (auto& segment : segments) {
+    for (size_t i = 0; i < num_segments; ++i) {
+        auto&& segment = segments[i];
+        int64_t end_version = end_versions ? (*end_versions)[i] : 0;
+        segment->rowset_id();

Review Comment:
   useless?



##########
be/src/olap/delete_bitmap_calculator.cpp:
##########
@@ -83,116 +91,209 @@ Status 
MergeIndexDeleteBitmapCalculatorContext::_next_batch(size_t row_id) {
     return Status::OK();
 }
 
-bool MergeIndexDeleteBitmapCalculatorContext::Comparator::operator()(
-        MergeIndexDeleteBitmapCalculatorContext* lhs,
-        MergeIndexDeleteBitmapCalculatorContext* rhs) const {
+bool MergedPKIndexDeleteBitmapCalculatorContext::Comparator::operator()(
+        MergedPKIndexDeleteBitmapCalculatorContext* lhs,
+        MergedPKIndexDeleteBitmapCalculatorContext* rhs) const {
     // std::proiroty_queue is a max heap, and function should return the 
result of `lhs < rhs`
     // so if the result of the function is true, rhs will be popped before lhs
     Slice key1, key2;
-    RETURN_IF_ERROR(lhs->get_current_key(key1));
-    RETURN_IF_ERROR(rhs->get_current_key(key2));
-    if (_sequence_length == 0) {
-        auto cmp_result = key1.compare(key2);
-        // when key1 is the same as key2,
-        // we want the one with greater segment id to be popped first
-        return cmp_result ? (cmp_result > 0) : (lhs->segment_id() < 
rhs->segment_id());
-    }
-    // smaller key popped first
-    auto key1_without_seq = Slice(key1.get_data(), key1.get_size() - 
_sequence_length);
-    auto key2_without_seq = Slice(key2.get_data(), key2.get_size() - 
_sequence_length);
-    auto cmp_result = key1_without_seq.compare(key2_without_seq);
-    if (cmp_result != 0) {
-        return cmp_result > 0;
-    }
-    // greater sequence value popped first
-    auto key1_sequence_val =
-            Slice(key1.get_data() + key1.get_size() - _sequence_length, 
_sequence_length);
-    auto key2_sequence_val =
-            Slice(key2.get_data() + key2.get_size() - _sequence_length, 
_sequence_length);
-    cmp_result = key1_sequence_val.compare(key2_sequence_val);
-    if (cmp_result != 0) {
-        return cmp_result < 0;
-    }
-    // greater segment id popped first
+    Status st;
+    st = lhs->get_current_key(&key1);
+    CHECK(LIKELY(st.ok())) << fmt::format("Reading key1 but get st = {}", st);
+    st = rhs->get_current_key(&key2);
+    CHECK(LIKELY(st.ok())) << fmt::format("Reading key2 but get st = {}", st);
+    int key_cmp_result = compare_key(key1, key2);
+    if (LIKELY(key_cmp_result != 0)) {
+        return key_cmp_result > 0;
+    }
+    int seq_cmp_result = compare_seq(key1, key2);
+    if (LIKELY(seq_cmp_result != 0)) {
+        return seq_cmp_result < 0;
+    }
+    if (LIKELY(lhs->end_version() != rhs->end_version())) {
+        return lhs->end_version() < rhs->end_version();
+    }
     return lhs->segment_id() < rhs->segment_id();
 }
 
-bool MergeIndexDeleteBitmapCalculatorContext::Comparator::is_key_same(Slice 
const& lhs,
-                                                                      Slice 
const& rhs) const {
+int MergedPKIndexDeleteBitmapCalculatorContext::Comparator::compare_key(Slice 
const& lhs,
+                                                                        Slice 
const& rhs) const {
     auto lhs_without_seq = Slice(lhs.get_data(), lhs.get_size() - 
_sequence_length);
     auto rhs_without_seq = Slice(rhs.get_data(), rhs.get_size() - 
_sequence_length);
-    return lhs_without_seq.compare(rhs_without_seq) == 0;
+    return lhs_without_seq.compare(rhs_without_seq);
+}
+
+int MergedPKIndexDeleteBitmapCalculatorContext::Comparator::compare_seq(Slice 
const& lhs,
+                                                                        Slice 
const& rhs) const {
+    if (UNLIKELY(_sequence_length == 0)) {
+        return 0;
+    }
+    auto lhs_seq = Slice(lhs.get_data() + lhs.get_size() - _sequence_length, 
_sequence_length);
+    auto rhs_seq = Slice(rhs.get_data() + rhs.get_size() - _sequence_length, 
_sequence_length);
+    return lhs_seq.compare(rhs_seq);
+}
+
+bool MergedPKIndexDeleteBitmapCalculatorContext::Comparator::is_key_same(Slice 
const& lhs,
+                                                                         Slice 
const& rhs) const {
+    return compare_key(lhs, rhs) == 0;
 }
 
-Status MergeIndexDeleteBitmapCalculator::init(RowsetId rowset_id,
-                                              std::vector<SegmentSharedPtr> 
const& segments,
-                                              size_t seq_col_length, size_t 
max_batch_size) {
-    _rowset_id = rowset_id;
+Status MergedPKIndexDeleteBitmapCalculator::init(std::vector<SegmentSharedPtr> 
const& segments,
+                                                 const std::vector<int64_t>* 
end_versions,
+                                                 size_t num_base_segments, 
size_t seq_col_length,
+                                                 size_t max_batch_size) {
     _seq_col_length = seq_col_length;
-    _comparator = 
MergeIndexDeleteBitmapCalculatorContext::Comparator(seq_col_length);
-    _contexts.reserve(segments.size());
+    _comparator = 
MergedPKIndexDeleteBitmapCalculatorContext::Comparator(seq_col_length);
+    size_t num_segments = segments.size();
+    _contexts.reserve(num_segments);
     _heap = std::make_unique<Heap>(_comparator);
 
-    for (auto& segment : segments) {
+    for (size_t i = 0; i < num_segments; ++i) {
+        auto&& segment = segments[i];
+        int64_t end_version = end_versions ? (*end_versions)[i] : 0;
+        segment->rowset_id();
         RETURN_IF_ERROR(segment->load_index());
         auto pk_idx = segment->get_primary_key_index();
         std::unique_ptr<segment_v2::IndexedColumnIterator> index;
         RETURN_IF_ERROR(pk_idx->new_iterator(&index));
         auto index_type = 
vectorized::DataTypeFactory::instance().create_data_type(
                 pk_idx->type_info()->type(), 1, 0);
-        _contexts.emplace_back(std::move(index), index_type, segment->id(), 
pk_idx->num_rows());
+        bool is_base_segment = i < num_base_segments;
+        _contexts.emplace_back(std::move(index), index_type, 
segment->rowset_id(), end_version,
+                               segment->id(), is_base_segment, 
pk_idx->num_rows(), max_batch_size);
         _heap->push(&_contexts.back());
     }
     return Status::OK();
 }
 
-Status MergeIndexDeleteBitmapCalculator::calculate_one(RowLocation& loc) {
-    // get the location of a out-of-date row
-    while (!_heap->empty()) {
-        auto cur_ctx = _heap->top();
+Status MergedPKIndexDeleteBitmapCalculator::init(
+        std::vector<SegmentSharedPtr> const& segments,
+        const std::vector<RowsetSharedPtr>* specified_rowsets,
+        bool calc_delete_bitmap_between_segments, size_t seq_col_length, 
size_t max_batch_size) {

Review Comment:
   parameter `calc_delete_bitmap_between_segments` seems not necessary, you can 
use `specified_rowsets == nullptr` instead?



##########
be/src/olap/delete_bitmap_calculator.cpp:
##########
@@ -83,116 +91,209 @@ Status 
MergeIndexDeleteBitmapCalculatorContext::_next_batch(size_t row_id) {
     return Status::OK();
 }
 
-bool MergeIndexDeleteBitmapCalculatorContext::Comparator::operator()(
-        MergeIndexDeleteBitmapCalculatorContext* lhs,
-        MergeIndexDeleteBitmapCalculatorContext* rhs) const {
+bool MergedPKIndexDeleteBitmapCalculatorContext::Comparator::operator()(
+        MergedPKIndexDeleteBitmapCalculatorContext* lhs,
+        MergedPKIndexDeleteBitmapCalculatorContext* rhs) const {
     // std::proiroty_queue is a max heap, and function should return the 
result of `lhs < rhs`
     // so if the result of the function is true, rhs will be popped before lhs
     Slice key1, key2;
-    RETURN_IF_ERROR(lhs->get_current_key(key1));
-    RETURN_IF_ERROR(rhs->get_current_key(key2));
-    if (_sequence_length == 0) {
-        auto cmp_result = key1.compare(key2);
-        // when key1 is the same as key2,
-        // we want the one with greater segment id to be popped first
-        return cmp_result ? (cmp_result > 0) : (lhs->segment_id() < 
rhs->segment_id());
-    }
-    // smaller key popped first
-    auto key1_without_seq = Slice(key1.get_data(), key1.get_size() - 
_sequence_length);
-    auto key2_without_seq = Slice(key2.get_data(), key2.get_size() - 
_sequence_length);
-    auto cmp_result = key1_without_seq.compare(key2_without_seq);
-    if (cmp_result != 0) {
-        return cmp_result > 0;
-    }
-    // greater sequence value popped first
-    auto key1_sequence_val =
-            Slice(key1.get_data() + key1.get_size() - _sequence_length, 
_sequence_length);
-    auto key2_sequence_val =
-            Slice(key2.get_data() + key2.get_size() - _sequence_length, 
_sequence_length);
-    cmp_result = key1_sequence_val.compare(key2_sequence_val);
-    if (cmp_result != 0) {
-        return cmp_result < 0;
-    }
-    // greater segment id popped first
+    Status st;
+    st = lhs->get_current_key(&key1);
+    CHECK(LIKELY(st.ok())) << fmt::format("Reading key1 but get st = {}", st);
+    st = rhs->get_current_key(&key2);
+    CHECK(LIKELY(st.ok())) << fmt::format("Reading key2 but get st = {}", st);
+    int key_cmp_result = compare_key(key1, key2);
+    if (LIKELY(key_cmp_result != 0)) {
+        return key_cmp_result > 0;
+    }
+    int seq_cmp_result = compare_seq(key1, key2);
+    if (LIKELY(seq_cmp_result != 0)) {
+        return seq_cmp_result < 0;
+    }
+    if (LIKELY(lhs->end_version() != rhs->end_version())) {
+        return lhs->end_version() < rhs->end_version();
+    }
     return lhs->segment_id() < rhs->segment_id();
 }
 
-bool MergeIndexDeleteBitmapCalculatorContext::Comparator::is_key_same(Slice 
const& lhs,
-                                                                      Slice 
const& rhs) const {
+int MergedPKIndexDeleteBitmapCalculatorContext::Comparator::compare_key(Slice 
const& lhs,
+                                                                        Slice 
const& rhs) const {
     auto lhs_without_seq = Slice(lhs.get_data(), lhs.get_size() - 
_sequence_length);
     auto rhs_without_seq = Slice(rhs.get_data(), rhs.get_size() - 
_sequence_length);
-    return lhs_without_seq.compare(rhs_without_seq) == 0;
+    return lhs_without_seq.compare(rhs_without_seq);
+}
+
+int MergedPKIndexDeleteBitmapCalculatorContext::Comparator::compare_seq(Slice 
const& lhs,
+                                                                        Slice 
const& rhs) const {
+    if (UNLIKELY(_sequence_length == 0)) {
+        return 0;
+    }
+    auto lhs_seq = Slice(lhs.get_data() + lhs.get_size() - _sequence_length, 
_sequence_length);
+    auto rhs_seq = Slice(rhs.get_data() + rhs.get_size() - _sequence_length, 
_sequence_length);
+    return lhs_seq.compare(rhs_seq);
+}
+
+bool MergedPKIndexDeleteBitmapCalculatorContext::Comparator::is_key_same(Slice 
const& lhs,
+                                                                         Slice 
const& rhs) const {
+    return compare_key(lhs, rhs) == 0;
 }
 
-Status MergeIndexDeleteBitmapCalculator::init(RowsetId rowset_id,
-                                              std::vector<SegmentSharedPtr> 
const& segments,
-                                              size_t seq_col_length, size_t 
max_batch_size) {
-    _rowset_id = rowset_id;
+Status MergedPKIndexDeleteBitmapCalculator::init(std::vector<SegmentSharedPtr> 
const& segments,
+                                                 const std::vector<int64_t>* 
end_versions,
+                                                 size_t num_base_segments, 
size_t seq_col_length,
+                                                 size_t max_batch_size) {
     _seq_col_length = seq_col_length;
-    _comparator = 
MergeIndexDeleteBitmapCalculatorContext::Comparator(seq_col_length);
-    _contexts.reserve(segments.size());
+    _comparator = 
MergedPKIndexDeleteBitmapCalculatorContext::Comparator(seq_col_length);
+    size_t num_segments = segments.size();

Review Comment:
   Add a DCHECK_LT(num_base_segments, num_segments);



##########
be/src/olap/delete_bitmap_calculator.cpp:
##########
@@ -83,116 +91,209 @@ Status 
MergeIndexDeleteBitmapCalculatorContext::_next_batch(size_t row_id) {
     return Status::OK();
 }
 
-bool MergeIndexDeleteBitmapCalculatorContext::Comparator::operator()(
-        MergeIndexDeleteBitmapCalculatorContext* lhs,
-        MergeIndexDeleteBitmapCalculatorContext* rhs) const {
+bool MergedPKIndexDeleteBitmapCalculatorContext::Comparator::operator()(
+        MergedPKIndexDeleteBitmapCalculatorContext* lhs,
+        MergedPKIndexDeleteBitmapCalculatorContext* rhs) const {
     // std::proiroty_queue is a max heap, and function should return the 
result of `lhs < rhs`
     // so if the result of the function is true, rhs will be popped before lhs
     Slice key1, key2;
-    RETURN_IF_ERROR(lhs->get_current_key(key1));
-    RETURN_IF_ERROR(rhs->get_current_key(key2));
-    if (_sequence_length == 0) {
-        auto cmp_result = key1.compare(key2);
-        // when key1 is the same as key2,
-        // we want the one with greater segment id to be popped first
-        return cmp_result ? (cmp_result > 0) : (lhs->segment_id() < 
rhs->segment_id());
-    }
-    // smaller key popped first
-    auto key1_without_seq = Slice(key1.get_data(), key1.get_size() - 
_sequence_length);
-    auto key2_without_seq = Slice(key2.get_data(), key2.get_size() - 
_sequence_length);
-    auto cmp_result = key1_without_seq.compare(key2_without_seq);
-    if (cmp_result != 0) {
-        return cmp_result > 0;
-    }
-    // greater sequence value popped first
-    auto key1_sequence_val =
-            Slice(key1.get_data() + key1.get_size() - _sequence_length, 
_sequence_length);
-    auto key2_sequence_val =
-            Slice(key2.get_data() + key2.get_size() - _sequence_length, 
_sequence_length);
-    cmp_result = key1_sequence_val.compare(key2_sequence_val);
-    if (cmp_result != 0) {
-        return cmp_result < 0;
-    }
-    // greater segment id popped first
+    Status st;
+    st = lhs->get_current_key(&key1);
+    CHECK(LIKELY(st.ok())) << fmt::format("Reading key1 but get st = {}", st);

Review Comment:
   use DCHECK



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to