dataroaring commented on code in PR #41701:
URL: https://github.com/apache/doris/pull/41701#discussion_r1798677787


##########
be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp:
##########
@@ -849,32 +906,44 @@ Status 
VerticalSegmentWriter::_merge_rows_for_sequence_column(
     RETURN_IF_ERROR(_generate_encoded_default_seq_value(
             *_tablet_schema, *_opts.rowset_ctx->partial_update_info, 
&encoded_default_seq_value));
 
-    for (size_t block_pos = data.row_pos; block_pos < data.row_pos + 
data.num_rows; block_pos++) {
-        size_t delta_pos = block_pos - data.row_pos;
-        auto& skip_bitmap = skip_bitmaps->at(block_pos);
-        std::string key = _full_encode_keys(key_columns, delta_pos);
-        bool row_has_sequence_col = (!skip_bitmap.contains(seq_col_unique_id));
-        Status st;
-        if (delta_pos > 0 && previous_key == key) {
-            DCHECK(previous_has_seq_col == !row_has_sequence_col);
-            ++duplicate_keys;
-            RowLocation loc;
-            RowsetSharedPtr rowset;
-            size_t rid_missing_seq {};
-            size_t rid_with_seq {};
-            if (row_has_sequence_col) {
-                rid_missing_seq = block_pos - 1;
-                rid_with_seq = block_pos;
+    // batched_rows store rows with the same key that have different pattern:
+    //   0 -> row without sequence col and with delete sign
+    //   1 -> row without sequence col and without delete sign
+    //   2 -> row has sequence col with delete sign
+    //   3 -> row has sequence col and without delete sign
+    std::array<int64_t, 4> batched_rows {};
+    batched_rows.fill(-1);
+    bool has_same_rows {false};
+    auto get_idx = [](bool with_seq_col, bool has_delete_sign) {
+        if (!with_seq_col) {
+            if (has_delete_sign) {
+                return 0;
+            } else {
+                return 1;
+            }

Review Comment:
   return has_delete_sign ? 0 : 1.



##########
be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp:
##########
@@ -849,32 +906,44 @@ Status 
VerticalSegmentWriter::_merge_rows_for_sequence_column(
     RETURN_IF_ERROR(_generate_encoded_default_seq_value(
             *_tablet_schema, *_opts.rowset_ctx->partial_update_info, 
&encoded_default_seq_value));
 
-    for (size_t block_pos = data.row_pos; block_pos < data.row_pos + 
data.num_rows; block_pos++) {
-        size_t delta_pos = block_pos - data.row_pos;
-        auto& skip_bitmap = skip_bitmaps->at(block_pos);
-        std::string key = _full_encode_keys(key_columns, delta_pos);
-        bool row_has_sequence_col = (!skip_bitmap.contains(seq_col_unique_id));
-        Status st;
-        if (delta_pos > 0 && previous_key == key) {
-            DCHECK(previous_has_seq_col == !row_has_sequence_col);
-            ++duplicate_keys;
-            RowLocation loc;
-            RowsetSharedPtr rowset;
-            size_t rid_missing_seq {};
-            size_t rid_with_seq {};
-            if (row_has_sequence_col) {
-                rid_missing_seq = block_pos - 1;
-                rid_with_seq = block_pos;
+    // batched_rows store rows with the same key that have different pattern:
+    //   0 -> row without sequence col and with delete sign
+    //   1 -> row without sequence col and without delete sign
+    //   2 -> row has sequence col with delete sign
+    //   3 -> row has sequence col and without delete sign
+    std::array<int64_t, 4> batched_rows {};
+    batched_rows.fill(-1);
+    bool has_same_rows {false};
+    auto get_idx = [](bool with_seq_col, bool has_delete_sign) {
+        if (!with_seq_col) {
+            if (has_delete_sign) {
+                return 0;
+            } else {
+                return 1;
+            }
+        } else {
+            if (has_delete_sign) {
+                return 2;
             } else {
-                rid_missing_seq = block_pos;
-                rid_with_seq = block_pos - 1;
+                return 3;
             }
+        }
+    };
+
+    auto find_rows_to_filter = [&](const std::string& key) {
+        bool has_row_with_seq_col = (batched_rows[0] != -1 || batched_rows[1] 
!= -1);
+        bool has_row_without_seq_col = (batched_rows[2] != -1 || 
batched_rows[3] != -1);
+        if (has_row_with_seq_col && has_row_without_seq_col) {

Review Comment:
   add comment to explain when the if is false.



##########
be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp:
##########
@@ -849,32 +906,44 @@ Status 
VerticalSegmentWriter::_merge_rows_for_sequence_column(
     RETURN_IF_ERROR(_generate_encoded_default_seq_value(
             *_tablet_schema, *_opts.rowset_ctx->partial_update_info, 
&encoded_default_seq_value));
 
-    for (size_t block_pos = data.row_pos; block_pos < data.row_pos + 
data.num_rows; block_pos++) {
-        size_t delta_pos = block_pos - data.row_pos;
-        auto& skip_bitmap = skip_bitmaps->at(block_pos);
-        std::string key = _full_encode_keys(key_columns, delta_pos);
-        bool row_has_sequence_col = (!skip_bitmap.contains(seq_col_unique_id));
-        Status st;
-        if (delta_pos > 0 && previous_key == key) {
-            DCHECK(previous_has_seq_col == !row_has_sequence_col);
-            ++duplicate_keys;
-            RowLocation loc;
-            RowsetSharedPtr rowset;
-            size_t rid_missing_seq {};
-            size_t rid_with_seq {};
-            if (row_has_sequence_col) {
-                rid_missing_seq = block_pos - 1;
-                rid_with_seq = block_pos;
+    // batched_rows store rows with the same key that have different pattern:
+    //   0 -> row without sequence col and with delete sign
+    //   1 -> row without sequence col and without delete sign
+    //   2 -> row has sequence col with delete sign
+    //   3 -> row has sequence col and without delete sign
+    std::array<int64_t, 4> batched_rows {};
+    batched_rows.fill(-1);
+    bool has_same_rows {false};
+    auto get_idx = [](bool with_seq_col, bool has_delete_sign) {
+        if (!with_seq_col) {
+            if (has_delete_sign) {
+                return 0;
+            } else {
+                return 1;
+            }
+        } else {
+            if (has_delete_sign) {
+                return 2;
             } else {
-                rid_missing_seq = block_pos;
-                rid_with_seq = block_pos - 1;
+                return 3;
             }

Review Comment:
   return has_delete_sign ? 2 : 3;



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to