zhannngchen commented on code in PR #22566:
URL: https://github.com/apache/doris/pull/22566#discussion_r1289444110


##########
be/src/olap/delete_bitmap_calculator.cpp:
##########
@@ -83,116 +91,242 @@ Status 
MergeIndexDeleteBitmapCalculatorContext::_next_batch(size_t row_id) {
     return Status::OK();
 }
 
-bool MergeIndexDeleteBitmapCalculatorContext::Comparator::operator()(
-        MergeIndexDeleteBitmapCalculatorContext* lhs,
-        MergeIndexDeleteBitmapCalculatorContext* rhs) const {
+bool MergedPKIndexDeleteBitmapCalculatorContext::Comparator::operator()(
+        MergedPKIndexDeleteBitmapCalculatorContext* lhs,
+        MergedPKIndexDeleteBitmapCalculatorContext* rhs) const {
     // std::proiroty_queue is a max heap, and function should return the 
result of `lhs < rhs`
     // so if the result of the function is true, rhs will be popped before lhs
     Slice key1, key2;
-    RETURN_IF_ERROR(lhs->get_current_key(key1));
-    RETURN_IF_ERROR(rhs->get_current_key(key2));
-    if (_sequence_length == 0) {
-        auto cmp_result = key1.compare(key2);
-        // when key1 is the same as key2,
-        // we want the one with greater segment id to be popped first
-        return cmp_result ? (cmp_result > 0) : (lhs->segment_id() < 
rhs->segment_id());
-    }
-    // smaller key popped first
-    auto key1_without_seq = Slice(key1.get_data(), key1.get_size() - 
_sequence_length);
-    auto key2_without_seq = Slice(key2.get_data(), key2.get_size() - 
_sequence_length);
-    auto cmp_result = key1_without_seq.compare(key2_without_seq);
-    if (cmp_result != 0) {
-        return cmp_result > 0;
-    }
-    // greater sequence value popped first
-    auto key1_sequence_val =
-            Slice(key1.get_data() + key1.get_size() - _sequence_length, 
_sequence_length);
-    auto key2_sequence_val =
-            Slice(key2.get_data() + key2.get_size() - _sequence_length, 
_sequence_length);
-    cmp_result = key1_sequence_val.compare(key2_sequence_val);
-    if (cmp_result != 0) {
-        return cmp_result < 0;
-    }
-    // greater segment id popped first
+    lhs->get_current_key(&key1);
+    rhs->get_current_key(&key2);
+    // We start by comparing the key portions of the two records.
+    // If they are not equal, we want the record with the smaller key to be 
popped from the heap first.
+    // Therefore, we need to return the result of key1 > key2.
+    int key_cmp_result = compare_key(key1, key2);
+    if (LIKELY(key_cmp_result != 0)) {
+        return key_cmp_result > 0;
+    }
+    // If key portions of the two records are equal,
+    // we want the record with the larger sequence value to be popped from the 
heap first.
+    // Therefore, we need to return the result of seq1 < seq2.
+    int seq_cmp_result = compare_seq(key1, key2);
+    if (LIKELY(seq_cmp_result != 0)) {
+        return seq_cmp_result < 0;
+    }
+    // Otherwise, if they have the same key and sequence value,
+    // we want the record with the larger version value to be popped from the 
heap first.
+    // Therefore, we need to return the result of version1 < version2.
+    if (LIKELY(lhs->end_version() != rhs->end_version())) {
+        return lhs->end_version() < rhs->end_version();
+    }
+    // Lastly, if they have the same key, sequence value and version,
+    // we want the record with the larger segment id to be popped from the 
heap first.
+    // Therefore, we need to return the result of segid1 < segid2.
     return lhs->segment_id() < rhs->segment_id();
 }
 
-bool MergeIndexDeleteBitmapCalculatorContext::Comparator::is_key_same(Slice 
const& lhs,
-                                                                      Slice 
const& rhs) const {
+int MergedPKIndexDeleteBitmapCalculatorContext::Comparator::compare_key(Slice 
const& lhs,
+                                                                        Slice 
const& rhs) const {
     auto lhs_without_seq = Slice(lhs.get_data(), lhs.get_size() - 
_sequence_length);
     auto rhs_without_seq = Slice(rhs.get_data(), rhs.get_size() - 
_sequence_length);
-    return lhs_without_seq.compare(rhs_without_seq) == 0;
+    return lhs_without_seq.compare(rhs_without_seq);
+}
+
+int MergedPKIndexDeleteBitmapCalculatorContext::Comparator::compare_seq(Slice 
const& lhs,
+                                                                        Slice 
const& rhs) const {
+    if (LIKELY(_sequence_length == 0)) {
+        return 0;
+    }
+    auto lhs_seq = Slice(lhs.get_data() + lhs.get_size() - _sequence_length, 
_sequence_length);
+    auto rhs_seq = Slice(rhs.get_data() + rhs.get_size() - _sequence_length, 
_sequence_length);
+    return lhs_seq.compare(rhs_seq);
+}
+
+bool MergedPKIndexDeleteBitmapCalculatorContext::Comparator::is_key_same(Slice 
const& lhs,
+                                                                         Slice 
const& rhs) const {
+    return compare_key(lhs, rhs) == 0;
 }
 
-Status MergeIndexDeleteBitmapCalculator::init(RowsetId rowset_id,
-                                              std::vector<SegmentSharedPtr> 
const& segments,
-                                              size_t seq_col_length, size_t 
max_batch_size) {
-    _rowset_id = rowset_id;
+Status MergedPKIndexDeleteBitmapCalculator::init(std::vector<SegmentSharedPtr> 
const& segments,
+                                                 const std::vector<int64_t>* 
end_versions,
+                                                 size_t num_delta_segments, 
size_t seq_col_length,
+                                                 size_t max_batch_size) {
+    // The first `num_delta_segments` segments are the delta data, others are 
the base data.
+    DCHECK_LE(num_delta_segments, segments.size());
     _seq_col_length = seq_col_length;
-    _comparator = 
MergeIndexDeleteBitmapCalculatorContext::Comparator(seq_col_length);
-    _contexts.reserve(segments.size());
+    _comparator = 
MergedPKIndexDeleteBitmapCalculatorContext::Comparator(seq_col_length);
+    size_t num_segments = segments.size();
+    _contexts.reserve(num_segments);
     _heap = std::make_unique<Heap>(_comparator);
 
-    for (auto& segment : segments) {
+    for (size_t i = 0; i < num_segments; ++i) {
+        auto&& segment = segments[i];
+        int64_t end_version = end_versions ? (*end_versions)[i] : 0;
         RETURN_IF_ERROR(segment->load_index());
         auto pk_idx = segment->get_primary_key_index();
         std::unique_ptr<segment_v2::IndexedColumnIterator> index;
         RETURN_IF_ERROR(pk_idx->new_iterator(&index));
         auto index_type = 
vectorized::DataTypeFactory::instance().create_data_type(
                 pk_idx->type_info()->type(), 1, 0);
-        _contexts.emplace_back(std::move(index), index_type, segment->id(), 
pk_idx->num_rows());
+        bool is_delta_segment = i < num_delta_segments;
+        _contexts.emplace_back(std::move(index), index_type, 
segment->rowset_id(), end_version,
+                               segment->id(), is_delta_segment, 
pk_idx->num_rows(), max_batch_size);

Review Comment:
   in the ctor, the 6th parameter is `is_base_segment` rather than 
`is_delta_segment`, it's not match



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to