This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 7d2e431856a [fix](compaction) fix mismatch between segment key and 
value column rows during compaction (#37960)
7d2e431856a is described below

commit 7d2e431856a495b6c573a74690d7a72b05462b46
Author: Luwei <814383...@qq.com>
AuthorDate: Mon Jul 22 17:09:32 2024 +0800

    [fix](compaction) fix mismatch between segment key and value column rows 
during compaction (#37960)
    
    When a block is splitted to 3 segments, old code just handles 2 and the
    last is overlowed.
---
 be/src/olap/rowset/vertical_beta_rowset_writer.cpp | 54 ++++++++++------------
 1 file changed, 24 insertions(+), 30 deletions(-)

diff --git a/be/src/olap/rowset/vertical_beta_rowset_writer.cpp 
b/be/src/olap/rowset/vertical_beta_rowset_writer.cpp
index 1de7d4f50dc..942ced616fc 100644
--- a/be/src/olap/rowset/vertical_beta_rowset_writer.cpp
+++ b/be/src/olap/rowset/vertical_beta_rowset_writer.cpp
@@ -83,36 +83,30 @@ Status VerticalBetaRowsetWriter<T>::add_columns(const 
vectorized::Block* block,
         RETURN_IF_ERROR(_segment_writers[_cur_writer_idx]->append_block(block, 
0, num_rows));
     } else {
         // value columns
-        uint32_t num_rows_written = 
_segment_writers[_cur_writer_idx]->num_rows_written();
-        VLOG_NOTICE << "num_rows_written: " << num_rows_written
-                    << ", _cur_writer_idx: " << _cur_writer_idx;
-        uint32_t num_rows_key_group = 
_segment_writers[_cur_writer_idx]->row_count();
-        // init if it's first value column write in current segment
-        if (_cur_writer_idx == 0 && num_rows_written == 0) {
-            VLOG_NOTICE << "init first value column segment writer";
-            RETURN_IF_ERROR(_segment_writers[_cur_writer_idx]->init(col_ids, 
is_key));
-        }
-        // when splitting segment, need to make rows align between key columns 
and value columns
-        size_t start_offset = 0;
-        size_t limit = num_rows;
-        if (num_rows_written + num_rows >= num_rows_key_group &&
-            _cur_writer_idx < _segment_writers.size() - 1) {
-            RETURN_IF_ERROR(_segment_writers[_cur_writer_idx]->append_block(
-                    block, 0, num_rows_key_group - num_rows_written));
-            
RETURN_IF_ERROR(_flush_columns(_segment_writers[_cur_writer_idx].get()));
-            start_offset = num_rows_key_group - num_rows_written;
-            limit = num_rows - start_offset;
-            ++_cur_writer_idx;
-            // switch to next writer
-            RETURN_IF_ERROR(_segment_writers[_cur_writer_idx]->init(col_ids, 
is_key));
-            num_rows_written = 0;
-            num_rows_key_group = 
_segment_writers[_cur_writer_idx]->row_count();
-        }
-        if (limit > 0) {
-            RETURN_IF_ERROR(
-                    _segment_writers[_cur_writer_idx]->append_block(block, 
start_offset, limit));
-            DCHECK(_segment_writers[_cur_writer_idx]->num_rows_written() <=
-                   _segment_writers[_cur_writer_idx]->row_count());
+        int64_t left = num_rows;
+        while (left > 0) {
+            uint32_t num_rows_written = 
_segment_writers[_cur_writer_idx]->num_rows_written();
+            VLOG_NOTICE << "num_rows_written: " << num_rows_written
+                        << ", _cur_writer_idx: " << _cur_writer_idx;
+            uint32_t num_rows_key_group = 
_segment_writers[_cur_writer_idx]->row_count();
+            CHECK_LE(num_rows_written, num_rows_key_group);
+            // init if it's first value column write in current segment
+            if (num_rows_written == 0) {
+                VLOG_NOTICE << "init first value column segment writer";
+                
RETURN_IF_ERROR(_segment_writers[_cur_writer_idx]->init(col_ids, is_key));
+            }
+
+            int64_t to_write = num_rows_written + left >= num_rows_key_group
+                                       ? num_rows_key_group - num_rows_written
+                                       : left;
+            
RETURN_IF_ERROR(_segment_writers[_cur_writer_idx]->append_block(block, num_rows 
- left,
+                                                                            
to_write));
+            left -= to_write;
+            CHECK_GE(left, 0);
+
+            if (left > 0) {
+                ++_cur_writer_idx;
+            }
         }
     }
     if (is_key) {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to