github-actions[bot] commented on code in PR #41701:
URL: https://github.com/apache/doris/pull/41701#discussion_r1805832638


##########
be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp:
##########
@@ -558,6 +567,32 @@ Status 
VerticalSegmentWriter::_append_block_with_partial_content(RowsInBlock& da
     return Status::OK();
 }
 
+Status VerticalSegmentWriter::_filter_block(RowsInBlock& data,
+                                            vectorized::MutableColumnPtr 
filter_column,
+                                            int duplicate_rows, std::string 
col_name) {
+    auto num_cols = data.block->columns();
+    auto* block = const_cast<vectorized::Block*>(data.block);
+    block->insert(
+            {std::move(filter_column), 
std::make_shared<vectorized::DataTypeUInt8>(), col_name});
+    RETURN_IF_ERROR(vectorized::Block::filter_block(block, num_cols, 
num_cols));
+    DCHECK_EQ(num_cols, data.block->columns());
+    int merged_rows = data.num_rows - block->rows();
+    VLOG_DEBUG << fmt::format(
+            "filter_block_for_flexible_partial_update[{}] after filter: "
+            "data.block:{}\n",
+            col_name, data.block->dump_data());
+    if (duplicate_rows != merged_rows) {
+        auto msg = fmt::format(
+                "filter_block_for_flexible_partial_update {}: duplicate_rows 
!= merged_rows, "
+                "duplicate_keys={}, merged_rows={}, num_rows={}, 
mutable_block->rows()={}",
+                col_name, duplicate_rows, merged_rows, data.num_rows, 
block->rows());
+        DCHECK(false) << msg;
+        return Status::InternalError<false>(msg);
+    }
+    data.num_rows = block->rows();
+    return Status::OK();
+}
+
 Status VerticalSegmentWriter::_append_block_with_flexible_partial_content(

Review Comment:
   warning: function '_append_block_with_flexible_partial_content' has 
cognitive complexity of 86 (threshold 50) 
[readability-function-cognitive-complexity]
   ```cpp
   Status VerticalSegmentWriter::_append_block_with_flexible_partial_content(
                                 ^
   ```
   <details>
   <summary>Additional context</summary>
   
   **be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp:613:** nesting 
level increased to 1
   ```cpp
       auto get_skip_bitmaps = [&skip_bitmap_col_idx](const vectorized::Block* 
block) {
                               ^
   ```
   **be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp:631:** +1, 
including nesting penalty of 0, nesting level increased to 1
   ```cpp
       
DBUG_EXECUTE_IF("VerticalSegmentWriter._append_block_with_flexible_partial_content.sleep",
       ^
   ```
   **be/src/util/debug_points.h:36:** expanded from macro 'DBUG_EXECUTE_IF'
   ```cpp
       if (UNLIKELY(config::enable_debug_points)) {                             
 \
       ^
   ```
   **be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp:631:** +2, 
including nesting penalty of 1, nesting level increased to 2
   ```cpp
       
DBUG_EXECUTE_IF("VerticalSegmentWriter._append_block_with_flexible_partial_content.sleep",
       ^
   ```
   **be/src/util/debug_points.h:38:** expanded from macro 'DBUG_EXECUTE_IF'
   ```cpp
           if (dp) {                                                            
 \
           ^
   ```
   **be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp:640:** nesting 
level increased to 1
   ```cpp
               [&full_block, &data,
               ^
   ```
   **be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp:643:** +2, 
including nesting penalty of 1, nesting level increased to 2
   ```cpp
           for (std::size_t cid {0}; cid < _num_sort_key_columns; cid++) {
           ^
   ```
   **be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp:645:** +3, 
including nesting penalty of 2, nesting level increased to 3
   ```cpp
               
RETURN_IF_ERROR(_olap_data_convertor->set_source_content_with_specifid_column(
               ^
   ```
   **be/src/common/status.h:629:** expanded from macro 'RETURN_IF_ERROR'
   ```cpp
       do {                                \
       ^
   ```
   **be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp:645:** +4, 
including nesting penalty of 3, nesting level increased to 4
   ```cpp
               
RETURN_IF_ERROR(_olap_data_convertor->set_source_content_with_specifid_column(
               ^
   ```
   **be/src/common/status.h:631:** expanded from macro 'RETURN_IF_ERROR'
   ```cpp
           if (UNLIKELY(!_status_.ok())) { \
           ^
   ```
   **be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp:648:** +3, 
including nesting penalty of 2, nesting level increased to 3
   ```cpp
               if (!status.ok()) {
               ^
   ```
   **be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp:656:** nesting 
level increased to 1
   ```cpp
       auto encode_seq_column = [&data, &schema_has_sequence_col,
                                ^
   ```
   **be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp:659:** +2, 
including nesting penalty of 1, nesting level increased to 2
   ```cpp
           if (schema_has_sequence_col) {
           ^
   ```
   **be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp:661:** +3, 
including nesting penalty of 2, nesting level increased to 3
   ```cpp
               
RETURN_IF_ERROR(_olap_data_convertor->set_source_content_with_specifid_column(
               ^
   ```
   **be/src/common/status.h:629:** expanded from macro 'RETURN_IF_ERROR'
   ```cpp
       do {                                \
       ^
   ```
   **be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp:661:** +4, 
including nesting penalty of 3, nesting level increased to 4
   ```cpp
               
RETURN_IF_ERROR(_olap_data_convertor->set_source_content_with_specifid_column(
               ^
   ```
   **be/src/common/status.h:631:** expanded from macro 'RETURN_IF_ERROR'
   ```cpp
           if (UNLIKELY(!_status_.ok())) { \
           ^
   ```
   **be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp:665:** +3, 
including nesting penalty of 2, nesting level increased to 3
   ```cpp
               if (!status.ok()) {
               ^
   ```
   **be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp:676:** +1, 
including nesting penalty of 0, nesting level increased to 1
   ```cpp
       RETURN_IF_ERROR(encode_key_columns(key_columns));
       ^
   ```
   **be/src/common/status.h:629:** expanded from macro 'RETURN_IF_ERROR'
   ```cpp
       do {                                \
       ^
   ```
   **be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp:676:** +2, 
including nesting penalty of 1, nesting level increased to 2
   ```cpp
       RETURN_IF_ERROR(encode_key_columns(key_columns));
       ^
   ```
   **be/src/common/status.h:631:** expanded from macro 'RETURN_IF_ERROR'
   ```cpp
           if (UNLIKELY(!_status_.ok())) { \
           ^
   ```
   **be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp:683:** +1, 
including nesting penalty of 0, nesting level increased to 1
   ```cpp
       RETURN_IF_ERROR(encode_seq_column(seq_column));
       ^
   ```
   **be/src/common/status.h:629:** expanded from macro 'RETURN_IF_ERROR'
   ```cpp
       do {                                \
       ^
   ```
   **be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp:683:** +2, 
including nesting penalty of 1, nesting level increased to 2
   ```cpp
       RETURN_IF_ERROR(encode_seq_column(seq_column));
       ^
   ```
   **be/src/common/status.h:631:** expanded from macro 'RETURN_IF_ERROR'
   ```cpp
           if (UNLIKELY(!_status_.ok())) { \
           ^
   ```
   **be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp:690:** +1, 
including nesting penalty of 0, nesting level increased to 1
   ```cpp
       if (schema_has_sequence_col) {
       ^
   ```
   **be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp:691:** +2, 
including nesting penalty of 1, nesting level increased to 2
   ```cpp
           RETURN_IF_ERROR(_merge_rows_for_sequence_column(data, skip_bitmaps, 
key_columns, seq_column,
           ^
   ```
   **be/src/common/status.h:629:** expanded from macro 'RETURN_IF_ERROR'
   ```cpp
       do {                                \
       ^
   ```
   **be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp:691:** +3, 
including nesting penalty of 2, nesting level increased to 3
   ```cpp
           RETURN_IF_ERROR(_merge_rows_for_sequence_column(data, skip_bitmaps, 
key_columns, seq_column,
           ^
   ```
   **be/src/common/status.h:631:** expanded from macro 'RETURN_IF_ERROR'
   ```cpp
           if (UNLIKELY(!_status_.ok())) { \
           ^
   ```
   **be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp:694:** +2, 
including nesting penalty of 1, nesting level increased to 2
   ```cpp
           if (origin_rows != data.num_rows) {
           ^
   ```
   **be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp:697:** +3, 
including nesting penalty of 2, nesting level increased to 3
   ```cpp
               RETURN_IF_ERROR(encode_key_columns(key_columns));
               ^
   ```
   **be/src/common/status.h:629:** expanded from macro 'RETURN_IF_ERROR'
   ```cpp
       do {                                \
       ^
   ```
   **be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp:697:** +4, 
including nesting penalty of 3, nesting level increased to 4
   ```cpp
               RETURN_IF_ERROR(encode_key_columns(key_columns));
               ^
   ```
   **be/src/common/status.h:631:** expanded from macro 'RETURN_IF_ERROR'
   ```cpp
           if (UNLIKELY(!_status_.ok())) { \
           ^
   ```
   **be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp:698:** +3, 
including nesting penalty of 2, nesting level increased to 3
   ```cpp
               RETURN_IF_ERROR(encode_seq_column(seq_column));
               ^
   ```
   **be/src/common/status.h:629:** expanded from macro 'RETURN_IF_ERROR'
   ```cpp
       do {                                \
       ^
   ```
   **be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp:698:** +4, 
including nesting penalty of 3, nesting level increased to 4
   ```cpp
               RETURN_IF_ERROR(encode_seq_column(seq_column));
               ^
   ```
   **be/src/common/status.h:631:** expanded from macro 'RETURN_IF_ERROR'
   ```cpp
           if (UNLIKELY(!_status_.ok())) { \
           ^
   ```
   **be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp:707:** +1, 
including nesting penalty of 0, nesting level increased to 1
   ```cpp
       RETURN_IF_ERROR(_merge_rows_for_insert_after_delete(
       ^
   ```
   **be/src/common/status.h:629:** expanded from macro 'RETURN_IF_ERROR'
   ```cpp
       do {                                \
       ^
   ```
   **be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp:707:** +2, 
including nesting penalty of 1, nesting level increased to 2
   ```cpp
       RETURN_IF_ERROR(_merge_rows_for_insert_after_delete(
       ^
   ```
   **be/src/common/status.h:631:** expanded from macro 'RETURN_IF_ERROR'
   ```cpp
           if (UNLIKELY(!_status_.ok())) { \
           ^
   ```
   **be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp:709:** +1, 
including nesting penalty of 0, nesting level increased to 1
   ```cpp
       if (data.num_rows != origin_rows) {
       ^
   ```
   **be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp:712:** +2, 
including nesting penalty of 1, nesting level increased to 2
   ```cpp
           RETURN_IF_ERROR(encode_key_columns(key_columns));
           ^
   ```
   **be/src/common/status.h:629:** expanded from macro 'RETURN_IF_ERROR'
   ```cpp
       do {                                \
       ^
   ```
   **be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp:712:** +3, 
including nesting penalty of 2, nesting level increased to 3
   ```cpp
           RETURN_IF_ERROR(encode_key_columns(key_columns));
           ^
   ```
   **be/src/common/status.h:631:** expanded from macro 'RETURN_IF_ERROR'
   ```cpp
           if (UNLIKELY(!_status_.ok())) { \
           ^
   ```
   **be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp:713:** +2, 
including nesting penalty of 1, nesting level increased to 2
   ```cpp
           RETURN_IF_ERROR(encode_seq_column(seq_column));
           ^
   ```
   **be/src/common/status.h:629:** expanded from macro 'RETURN_IF_ERROR'
   ```cpp
       do {                                \
       ^
   ```
   **be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp:713:** +3, 
including nesting penalty of 2, nesting level increased to 3
   ```cpp
           RETURN_IF_ERROR(encode_seq_column(seq_column));
           ^
   ```
   **be/src/common/status.h:631:** expanded from macro 'RETURN_IF_ERROR'
   ```cpp
           if (UNLIKELY(!_status_.ok())) { \
           ^
   ```
   **be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp:720:** +1, 
including nesting penalty of 0, nesting level increased to 1
   ```cpp
       for (std::size_t cid {0}; cid < _num_sort_key_columns; cid++) {
       ^
   ```
   **be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp:723:** +2, 
including nesting penalty of 1, nesting level increased to 2
   ```cpp
           RETURN_IF_ERROR(_column_writers[cid]->append(column->get_nullmap(), 
column->get_data(),
           ^
   ```
   **be/src/common/status.h:629:** expanded from macro 'RETURN_IF_ERROR'
   ```cpp
       do {                                \
       ^
   ```
   **be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp:723:** +3, 
including nesting penalty of 2, nesting level increased to 3
   ```cpp
           RETURN_IF_ERROR(_column_writers[cid]->append(column->get_nullmap(), 
column->get_data(),
           ^
   ```
   **be/src/common/status.h:631:** expanded from macro 'RETURN_IF_ERROR'
   ```cpp
           if (UNLIKELY(!_status_.ok())) { \
           ^
   ```
   **be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp:731:** +1, 
including nesting penalty of 0, nesting level increased to 1
   ```cpp
       RETURN_IF_ERROR(_generate_flexible_read_plan(
       ^
   ```
   **be/src/common/status.h:629:** expanded from macro 'RETURN_IF_ERROR'
   ```cpp
       do {                                \
       ^
   ```
   **be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp:731:** +2, 
including nesting penalty of 1, nesting level increased to 2
   ```cpp
       RETURN_IF_ERROR(_generate_flexible_read_plan(
       ^
   ```
   **be/src/common/status.h:631:** expanded from macro 'RETURN_IF_ERROR'
   ```cpp
           if (UNLIKELY(!_status_.ok())) { \
           ^
   ```
   **be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp:737:** +1, 
including nesting penalty of 0, nesting level increased to 1
   ```cpp
       if (config::enable_merge_on_write_correctness_check) {
       ^
   ```
   **be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp:743:** +1, 
including nesting penalty of 0, nesting level increased to 1
   ```cpp
       RETURN_IF_ERROR(read_plan.fill_non_primary_key_columns(
       ^
   ```
   **be/src/common/status.h:629:** expanded from macro 'RETURN_IF_ERROR'
   ```cpp
       do {                                \
       ^
   ```
   **be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp:743:** +2, 
including nesting penalty of 1, nesting level increased to 2
   ```cpp
       RETURN_IF_ERROR(read_plan.fill_non_primary_key_columns(
       ^
   ```
   **be/src/common/status.h:631:** expanded from macro 'RETURN_IF_ERROR'
   ```cpp
           if (UNLIKELY(!_status_.ok())) { \
           ^
   ```
   **be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp:777:** +1, 
including nesting penalty of 0, nesting level increased to 1
   ```cpp
       if (_num_rows_written != data.row_pos ||
       ^
   ```
   **be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp:786:** +1, 
including nesting penalty of 0, nesting level increased to 1
   ```cpp
       RETURN_IF_ERROR(_generate_primary_key_index(_key_coders, key_columns, 
seq_column, data.num_rows,
       ^
   ```
   **be/src/common/status.h:629:** expanded from macro 'RETURN_IF_ERROR'
   ```cpp
       do {                                \
       ^
   ```
   **be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp:786:** +2, 
including nesting penalty of 1, nesting level increased to 2
   ```cpp
       RETURN_IF_ERROR(_generate_primary_key_index(_key_coders, key_columns, 
seq_column, data.num_rows,
       ^
   ```
   **be/src/common/status.h:631:** expanded from macro 'RETURN_IF_ERROR'
   ```cpp
           if (UNLIKELY(!_status_.ok())) { \
           ^
   ```
   
   </details>
   



##########
be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp:
##########
@@ -831,98 +884,289 @@
 Status VerticalSegmentWriter::_merge_rows_for_sequence_column(

Review Comment:
   warning: function '_merge_rows_for_sequence_column' has cognitive complexity 
of 79 (threshold 50) [readability-function-cognitive-complexity]
   ```cpp
   Status VerticalSegmentWriter::_merge_rows_for_sequence_column(
                                 ^
   ```
   <details>
   <summary>Additional context</summary>
   
   **be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp:905:** nesting 
level increased to 1
   ```cpp
       auto append_row = [&](int64_t rid, BitmapValue& skip_bitmap, bool 
have_delete_sign) {
                         ^
   ```
   **be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp:906:** +2, 
including nesting penalty of 1, nesting level increased to 2
   ```cpp
           if (have_delete_sign) {
           ^
   ```
   **be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp:908:** +3, 
including nesting penalty of 2, nesting level increased to 3
   ```cpp
               if (cur_rows > 0) {
               ^
   ```
   **be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp:921:** +1, 
nesting level increased to 2
   ```cpp
           } else {
             ^
   ```
   **be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp:922:** +3, 
including nesting penalty of 2, nesting level increased to 3
   ```cpp
               if ((cur_rows == 1 && !have_rows_with_delete_sign) || cur_rows 
== 2) {
               ^
   ```
   **be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp:922:** +1
   ```cpp
               if ((cur_rows == 1 && !have_rows_with_delete_sign) || cur_rows 
== 2) {
                                                                  ^
   ```
   **be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp:922:** +1
   ```cpp
               if ((cur_rows == 1 && !have_rows_with_delete_sign) || cur_rows 
== 2) {
                                  ^
   ```
   **be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp:945:** +1, 
nesting level increased to 3
   ```cpp
               } else {
                 ^
   ```
   **be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp:956:** nesting 
level increased to 1
   ```cpp
       auto aggregate_rows = [&](std::string key, int64_t start, int64_t end) 
-> Status {
                             ^
   ```
   **be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp:958:** +2, 
including nesting penalty of 1, nesting level increased to 2
   ```cpp
           if (end - start == 1) {
           ^
   ```
   **be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp:973:** +1
   ```cpp
           DCHECK(st.is<KEY_NOT_FOUND>() || st.ok());
                                         ^
   ```
   **be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp:976:** +2, 
including nesting penalty of 1, nesting level increased to 2
   ```cpp
           if (st.ok()) {
           ^
   ```
   **be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp:977:** +3, 
including nesting penalty of 2, nesting level increased to 3
   ```cpp
               for (int64_t i {start}; i < end; i++) {
               ^
   ```
   **be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp:983:** +4, 
including nesting penalty of 3, nesting level increased to 4
   ```cpp
                   if (!row_has_sequence_col) {
                   ^
   ```
   **be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp:987:** +1, 
nesting level increased to 4
   ```cpp
                   } else {
                     ^
   ```
   **be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp:990:** +5, 
including nesting penalty of 4, nesting level increased to 5
   ```cpp
                       if (Slice {seq_val}.compare(Slice 
{previous_encoded_seq_value}) >= 0) {
                       ^
   ```
   **be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp:997:** +1, 
nesting level increased to 2
   ```cpp
           } else {
             ^
   ```
   **be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp:1001:** +3, 
including nesting penalty of 2, nesting level increased to 3
   ```cpp
               if (row_has_sequence_col) {
               ^
   ```
   **be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp:1006:** +1, 
nesting level increased to 3
   ```cpp
               } else {
                 ^
   ```
   **be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp:1008:** +4, 
including nesting penalty of 3, nesting level increased to 4
   ```cpp
                   RETURN_IF_ERROR(_generate_encoded_default_seq_value(
                   ^
   ```
   **be/src/common/status.h:629:** expanded from macro 'RETURN_IF_ERROR'
   ```cpp
       do {                                \
       ^
   ```
   **be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp:1008:** +5, 
including nesting penalty of 4, nesting level increased to 5
   ```cpp
                   RETURN_IF_ERROR(_generate_encoded_default_seq_value(
                   ^
   ```
   **be/src/common/status.h:631:** expanded from macro 'RETURN_IF_ERROR'
   ```cpp
           if (UNLIKELY(!_status_.ok())) { \
           ^
   ```
   **be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp:1015:** +2, 
including nesting penalty of 1, nesting level increased to 2
   ```cpp
           for (int64_t rid {pos}; rid < end; rid++) {
           ^
   ```
   **be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp:1019:** +1
   ```cpp
                       (!skip_bitmap.contains(delete_sign_col_unique_id) && 
delete_signs[rid] != 0);
                                                                         ^
   ```
   **be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp:1020:** +3, 
including nesting penalty of 2, nesting level increased to 3
   ```cpp
               if (!row_has_sequence_col) {
               ^
   ```
   **be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp:1022:** +1, 
nesting level increased to 3
   ```cpp
               } else {
                 ^
   ```
   **be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp:1025:** +4, 
including nesting penalty of 3, nesting level increased to 4
   ```cpp
                   if (Slice {seq_val}.compare(Slice {cur_seq_val}) >= 0) {
                   ^
   ```
   **be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp:1028:** +1, 
nesting level increased to 4
   ```cpp
                   } else {
                     ^
   ```
   **be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp:1039:** +1, 
including nesting penalty of 0, nesting level increased to 1
   ```cpp
       for (size_t block_pos {0}; block_pos < data.num_rows; block_pos++) {
       ^
   ```
   **be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp:1041:** +2, 
including nesting penalty of 1, nesting level increased to 2
   ```cpp
           if (block_pos > 0 && previous_key == key) {
           ^
   ```
   **be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp:1041:** +1
   ```cpp
           if (block_pos > 0 && previous_key == key) {
                             ^
   ```
   **be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp:1043:** +1, 
nesting level increased to 2
   ```cpp
           } else {
             ^
   ```
   **be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp:1044:** +3, 
including nesting penalty of 2, nesting level increased to 3
   ```cpp
               if (same_key_rows > 0) {
               ^
   ```
   **be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp:1045:** +4, 
including nesting penalty of 3, nesting level increased to 4
   ```cpp
                   RETURN_IF_ERROR(aggregate_rows(std::move(previous_key), 
block_pos - same_key_rows,
                   ^
   ```
   **be/src/common/status.h:629:** expanded from macro 'RETURN_IF_ERROR'
   ```cpp
       do {                                \
       ^
   ```
   **be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp:1045:** +5, 
including nesting penalty of 4, nesting level increased to 5
   ```cpp
                   RETURN_IF_ERROR(aggregate_rows(std::move(previous_key), 
block_pos - same_key_rows,
                   ^
   ```
   **be/src/common/status.h:631:** expanded from macro 'RETURN_IF_ERROR'
   ```cpp
           if (UNLIKELY(!_status_.ok())) { \
           ^
   ```
   **be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp:1052:** +1, 
including nesting penalty of 0, nesting level increased to 1
   ```cpp
       if (same_key_rows > 0) {
       ^
   ```
   **be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp:1053:** +2, 
including nesting penalty of 1, nesting level increased to 2
   ```cpp
           RETURN_IF_ERROR(aggregate_rows(std::move(previous_key), 
data.num_rows - same_key_rows,
           ^
   ```
   **be/src/common/status.h:629:** expanded from macro 'RETURN_IF_ERROR'
   ```cpp
       do {                                \
       ^
   ```
   **be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp:1053:** +3, 
including nesting penalty of 2, nesting level increased to 3
   ```cpp
           RETURN_IF_ERROR(aggregate_rows(std::move(previous_key), 
data.num_rows - same_key_rows,
           ^
   ```
   **be/src/common/status.h:631:** expanded from macro 'RETURN_IF_ERROR'
   ```cpp
           if (UNLIKELY(!_status_.ok())) { \
           ^
   ```
   
   </details>
   



##########
be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp:
##########
@@ -831,98 +884,289 @@
 Status VerticalSegmentWriter::_merge_rows_for_sequence_column(
         RowsInBlock& data, std::vector<BitmapValue>* skip_bitmaps,
         const std::vector<vectorized::IOlapColumnDataAccessor*>& key_columns,
-        vectorized::IOlapColumnDataAccessor* seq_column,
+        vectorized::IOlapColumnDataAccessor* seq_column, const signed char* 
delete_signs,
         const std::vector<RowsetSharedPtr>& specified_rowsets,
         std::vector<std::unique_ptr<SegmentCacheHandle>>& segment_caches) {
     VLOG_DEBUG << fmt::format(
             "VerticalSegmentWriter::_merge_rows_for_sequence_column enter: 
data.block:{}\n",
             data.block->dump_data());
+    // the process logic here is the same as 
MemTable::_aggregate_for_flexible_partial_update_without_seq_col()
+    // after this function, there will be at most 2 rows for a specified key
+    auto* block = const_cast<vectorized::Block*>(data.block);
     auto seq_col_unique_id = 
_tablet_schema->column(_tablet_schema->sequence_col_idx()).unique_id();
+    auto delete_sign_col_unique_id =
+            
_tablet_schema->column(_tablet_schema->delete_sign_idx()).unique_id();
+
+    auto filtered_block = _tablet_schema->create_block();
+    vectorized::MutableBlock output_block =
+            vectorized::MutableBlock::build_mutable_block(&filtered_block);
+    int cur_rows {0};
+    bool have_rows_with_delete_sign {false};
+
+    auto append_row = [&](int64_t rid, BitmapValue& skip_bitmap, bool 
have_delete_sign) {
+        if (have_delete_sign) {
+            // remove all the preious batched rows
+            if (cur_rows > 0) {
+                for (size_t cid {0}; cid < _tablet_schema->num_columns(); 
cid++) {
+                    DCHECK_GE(output_block.mutable_columns()[cid]->size(), 
cur_rows);
+                    output_block.mutable_columns()[cid]->pop_back(cur_rows);
+                }
+            }
+            output_block.add_row(block, rid);
+            cur_rows = 1;
+            have_rows_with_delete_sign = true;
+            VLOG_DEBUG << fmt::format(
+                    "append a row with delete sign, after append, cur_rows={}, 
"
+                    "output_block.rows()={}",
+                    cur_rows, output_block.rows());
+        } else {
+            if ((cur_rows == 1 && !have_rows_with_delete_sign) || cur_rows == 
2) {
+                for (size_t cid {0}; cid < _tablet_schema->num_columns(); 
cid++) {
+                    if (cid == _tablet_schema->skip_bitmap_col_idx()) {
+                        auto& cur_skip_bitmap = 
assert_cast<vectorized::ColumnBitmap*>(
+                                                        
output_block.mutable_columns()[cid].get())
+                                                        ->get_data()
+                                                        .back();
+                        const auto& new_row_skip_bitmap =
+                                assert_cast<vectorized::ColumnBitmap*>(
+                                        
block->get_by_position(cid).column->assume_mutable().get())
+                                        ->get_data()[rid];
+                        cur_skip_bitmap &= new_row_skip_bitmap;
+                        continue;
+                    }
+                    if 
(!skip_bitmap.contains(_tablet_schema->column(cid).unique_id())) {
+                        output_block.mutable_columns()[cid]->pop_back(1);
+                        output_block.mutable_columns()[cid]->insert_from(
+                                *block->get_by_position(cid).column, rid);
+                    }
+                }
+                VLOG_DEBUG << fmt::format(
+                        "merge a row, after merge, cur_rows={}, 
output_block.rows()={}", cur_rows,
+                        output_block.rows());
+            } else {
+                output_block.add_row(block, rid);
+                cur_rows++;
+                VLOG_DEBUG << fmt::format(
+                        "append a new row, after append, cur_rows={}, 
output_block.rows()={}",
+                        cur_rows, output_block.rows());
+            }
+        }
+    };
+
+    // aggregate rows with same keys in block range [start, end)
+    auto aggregate_rows = [&](std::string key, int64_t start, int64_t end) -> 
Status {
+        VLOG_DEBUG << fmt::format("merge rows in range=({}-{})", start, end);
+        if (end - start == 1) {
+            output_block.add_row(block, start);
+            VLOG_DEBUG << fmt::format("append a row directly, rid={}", start);
+            return Status::OK();
+        }
+        cur_rows = 0;
+        have_rows_with_delete_sign = false;
+
+        RowLocation loc;
+        RowsetSharedPtr rowset;
+        std::string previous_encoded_seq_value {};
+        Status st = _tablet->lookup_row_key(key, _tablet_schema.get(), false, 
specified_rowsets,
+                                            &loc, _mow_context->max_version, 
segment_caches,
+                                            &rowset, true, 
&previous_encoded_seq_value);
+        int64_t pos = start;
+        DCHECK(st.is<KEY_NOT_FOUND>() || st.ok());
+
+        std::string cur_seq_val;
+        if (st.ok()) {
+            for (int64_t i {start}; i < end; i++) {
+                auto& skip_bitmap = skip_bitmaps->at(i);
+                bool row_has_sequence_col = 
(!skip_bitmap.contains(seq_col_unique_id));
+                // Find the first row that has larger sequence value than the 
existing row's
+                // or the first row that doesn't specify the sequence column.
+                // Discard all the rows before and begin to do aggregation 
from that row
+                if (!row_has_sequence_col) {
+                    cur_seq_val = std::move(previous_encoded_seq_value);
+                    pos = i;
+                    break;
+                } else {
+                    std::string seq_val {};
+                    _encode_seq_column(seq_column, i, &seq_val);
+                    if (Slice {seq_val}.compare(Slice 
{previous_encoded_seq_value}) >= 0) {
+                        cur_seq_val = std::move(seq_val);
+                        pos = i;
+                        break;
+                    }
+                }
+            }
+        } else {
+            pos = start;
+            auto& skip_bitmap = skip_bitmaps->at(pos);
+            bool row_has_sequence_col = 
(!skip_bitmap.contains(seq_col_unique_id));
+            if (row_has_sequence_col) {
+                std::string seq_val {};
+                // for rows that don't specify seqeunce col, seq_val will be 
encoded to minial value
+                _encode_seq_column(seq_column, pos, &seq_val);
+                cur_seq_val = std::move(seq_val);
+            } else {
+                cur_seq_val.clear();
+                RETURN_IF_ERROR(_generate_encoded_default_seq_value(
+                        *_tablet_schema, 
*_opts.rowset_ctx->partial_update_info, &cur_seq_val));
+            }
+        }
+        cur_rows = 0;
+        VLOG_DEBUG << fmt::format("start pos={}, output_block.rows()={}", pos, 
output_block.rows());
+
+        for (int64_t rid {pos}; rid < end; rid++) {
+            auto& skip_bitmap = skip_bitmaps->at(rid);
+            bool row_has_sequence_col = 
(!skip_bitmap.contains(seq_col_unique_id));
+            bool have_delete_sign =
+                    (!skip_bitmap.contains(delete_sign_col_unique_id) && 
delete_signs[rid] != 0);
+            if (!row_has_sequence_col) {
+                append_row(rid, skip_bitmap, have_delete_sign);
+            } else {
+                std::string seq_val {};
+                _encode_seq_column(seq_column, rid, &seq_val);
+                if (Slice {seq_val}.compare(Slice {cur_seq_val}) >= 0) {
+                    append_row(rid, skip_bitmap, have_delete_sign);
+                    cur_seq_val = std::move(seq_val);
+                } else {
+                    VLOG_DEBUG << fmt::format(
+                            "skip rid={} becasue its seq value is lower than 
the previous", rid);
+                }
+            }
+        }
+        return Status::OK();
+    };
+
+    int same_key_rows {0};
     std::string previous_key {};
-    bool previous_has_seq_col {false};
-    int duplicate_keys {0};
+    for (size_t block_pos {0}; block_pos < data.num_rows; block_pos++) {
+        std::string key = _full_encode_keys(key_columns, block_pos);
+        if (block_pos > 0 && previous_key == key) {
+            same_key_rows++;
+        } else {
+            if (same_key_rows > 0) {
+                RETURN_IF_ERROR(aggregate_rows(std::move(previous_key), 
block_pos - same_key_rows,
+                                               block_pos));
+            }
+            same_key_rows = 1;
+        }
+        previous_key = std::move(key);
+    }
+    if (same_key_rows > 0) {
+        RETURN_IF_ERROR(aggregate_rows(std::move(previous_key), data.num_rows 
- same_key_rows,
+                                       data.num_rows));
+    }
 
+    block->swap(output_block.to_block());
+    data.num_rows = block->rows();
+    VLOG_DEBUG << fmt::format(
+            "VerticalSegmentWriter::_merge_rows_for_sequence_column after: 
data.block:{}\n",
+            data.block->dump_data());
+    return Status::OK();
+}
+
+Status VerticalSegmentWriter::_merge_rows_for_insert_after_delete(

Review Comment:
   warning: function '_merge_rows_for_insert_after_delete' exceeds recommended 
size/complexity thresholds [readability-function-size]
   ```cpp
   Status VerticalSegmentWriter::_merge_rows_for_insert_after_delete(
                                 ^
   ```
   <details>
   <summary>Additional context</summary>
   
   **be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp:1065:** 101 
lines including whitespace and comments (threshold 80)
   ```cpp
   Status VerticalSegmentWriter::_merge_rows_for_insert_after_delete(
                                 ^
   ```
   
   </details>
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org


Reply via email to