dataroaring commented on code in PR #41701: URL: https://github.com/apache/doris/pull/41701#discussion_r1798677787
########## be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp: ########## @@ -849,32 +906,44 @@ Status VerticalSegmentWriter::_merge_rows_for_sequence_column( RETURN_IF_ERROR(_generate_encoded_default_seq_value( *_tablet_schema, *_opts.rowset_ctx->partial_update_info, &encoded_default_seq_value)); - for (size_t block_pos = data.row_pos; block_pos < data.row_pos + data.num_rows; block_pos++) { - size_t delta_pos = block_pos - data.row_pos; - auto& skip_bitmap = skip_bitmaps->at(block_pos); - std::string key = _full_encode_keys(key_columns, delta_pos); - bool row_has_sequence_col = (!skip_bitmap.contains(seq_col_unique_id)); - Status st; - if (delta_pos > 0 && previous_key == key) { - DCHECK(previous_has_seq_col == !row_has_sequence_col); - ++duplicate_keys; - RowLocation loc; - RowsetSharedPtr rowset; - size_t rid_missing_seq {}; - size_t rid_with_seq {}; - if (row_has_sequence_col) { - rid_missing_seq = block_pos - 1; - rid_with_seq = block_pos; + // batched_rows store rows with the same key that have different pattern: + // 0 -> row without sequence col and with delete sign + // 1 -> row without sequence col and without delete sign + // 2 -> row has sequence col with delete sign + // 3 -> row has sequence col and without delete sign + std::array<int64_t, 4> batched_rows {}; + batched_rows.fill(-1); + bool has_same_rows {false}; + auto get_idx = [](bool with_seq_col, bool has_delete_sign) { + if (!with_seq_col) { + if (has_delete_sign) { + return 0; + } else { + return 1; + } Review Comment: return has_delete_sign ? 0 : 1. ########## be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp: ########## @@ -849,32 +906,44 @@ Status VerticalSegmentWriter::_merge_rows_for_sequence_column( RETURN_IF_ERROR(_generate_encoded_default_seq_value( *_tablet_schema, *_opts.rowset_ctx->partial_update_info, &encoded_default_seq_value)); - for (size_t block_pos = data.row_pos; block_pos < data.row_pos + data.num_rows; block_pos++) { - size_t delta_pos = block_pos - data.row_pos; - auto& skip_bitmap = skip_bitmaps->at(block_pos); - std::string key = _full_encode_keys(key_columns, delta_pos); - bool row_has_sequence_col = (!skip_bitmap.contains(seq_col_unique_id)); - Status st; - if (delta_pos > 0 && previous_key == key) { - DCHECK(previous_has_seq_col == !row_has_sequence_col); - ++duplicate_keys; - RowLocation loc; - RowsetSharedPtr rowset; - size_t rid_missing_seq {}; - size_t rid_with_seq {}; - if (row_has_sequence_col) { - rid_missing_seq = block_pos - 1; - rid_with_seq = block_pos; + // batched_rows store rows with the same key that have different pattern: + // 0 -> row without sequence col and with delete sign + // 1 -> row without sequence col and without delete sign + // 2 -> row has sequence col with delete sign + // 3 -> row has sequence col and without delete sign + std::array<int64_t, 4> batched_rows {}; + batched_rows.fill(-1); + bool has_same_rows {false}; + auto get_idx = [](bool with_seq_col, bool has_delete_sign) { + if (!with_seq_col) { + if (has_delete_sign) { + return 0; + } else { + return 1; + } + } else { + if (has_delete_sign) { + return 2; } else { - rid_missing_seq = block_pos; - rid_with_seq = block_pos - 1; + return 3; } + } + }; + + auto find_rows_to_filter = [&](const std::string& key) { + bool has_row_with_seq_col = (batched_rows[0] != -1 || batched_rows[1] != -1); + bool has_row_without_seq_col = (batched_rows[2] != -1 || batched_rows[3] != -1); + if (has_row_with_seq_col && has_row_without_seq_col) { Review Comment: add comment to explain when the if is false. ########## be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp: ########## @@ -849,32 +906,44 @@ Status VerticalSegmentWriter::_merge_rows_for_sequence_column( RETURN_IF_ERROR(_generate_encoded_default_seq_value( *_tablet_schema, *_opts.rowset_ctx->partial_update_info, &encoded_default_seq_value)); - for (size_t block_pos = data.row_pos; block_pos < data.row_pos + data.num_rows; block_pos++) { - size_t delta_pos = block_pos - data.row_pos; - auto& skip_bitmap = skip_bitmaps->at(block_pos); - std::string key = _full_encode_keys(key_columns, delta_pos); - bool row_has_sequence_col = (!skip_bitmap.contains(seq_col_unique_id)); - Status st; - if (delta_pos > 0 && previous_key == key) { - DCHECK(previous_has_seq_col == !row_has_sequence_col); - ++duplicate_keys; - RowLocation loc; - RowsetSharedPtr rowset; - size_t rid_missing_seq {}; - size_t rid_with_seq {}; - if (row_has_sequence_col) { - rid_missing_seq = block_pos - 1; - rid_with_seq = block_pos; + // batched_rows store rows with the same key that have different pattern: + // 0 -> row without sequence col and with delete sign + // 1 -> row without sequence col and without delete sign + // 2 -> row has sequence col with delete sign + // 3 -> row has sequence col and without delete sign + std::array<int64_t, 4> batched_rows {}; + batched_rows.fill(-1); + bool has_same_rows {false}; + auto get_idx = [](bool with_seq_col, bool has_delete_sign) { + if (!with_seq_col) { + if (has_delete_sign) { + return 0; + } else { + return 1; + } + } else { + if (has_delete_sign) { + return 2; } else { - rid_missing_seq = block_pos; - rid_with_seq = block_pos - 1; + return 3; } Review Comment: return has_delete_sign ? 2 : 3; -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org