This is an automated email from the ASF dual-hosted git repository. dataroaring pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 7d2e431856a [fix](compaction) fix mismatch between segment key and value column rows during compaction (#37960) 7d2e431856a is described below commit 7d2e431856a495b6c573a74690d7a72b05462b46 Author: Luwei <814383...@qq.com> AuthorDate: Mon Jul 22 17:09:32 2024 +0800 [fix](compaction) fix mismatch between segment key and value column rows during compaction (#37960) When a block is splitted to 3 segments, old code just handles 2 and the last is overlowed. --- be/src/olap/rowset/vertical_beta_rowset_writer.cpp | 54 ++++++++++------------ 1 file changed, 24 insertions(+), 30 deletions(-) diff --git a/be/src/olap/rowset/vertical_beta_rowset_writer.cpp b/be/src/olap/rowset/vertical_beta_rowset_writer.cpp index 1de7d4f50dc..942ced616fc 100644 --- a/be/src/olap/rowset/vertical_beta_rowset_writer.cpp +++ b/be/src/olap/rowset/vertical_beta_rowset_writer.cpp @@ -83,36 +83,30 @@ Status VerticalBetaRowsetWriter<T>::add_columns(const vectorized::Block* block, RETURN_IF_ERROR(_segment_writers[_cur_writer_idx]->append_block(block, 0, num_rows)); } else { // value columns - uint32_t num_rows_written = _segment_writers[_cur_writer_idx]->num_rows_written(); - VLOG_NOTICE << "num_rows_written: " << num_rows_written - << ", _cur_writer_idx: " << _cur_writer_idx; - uint32_t num_rows_key_group = _segment_writers[_cur_writer_idx]->row_count(); - // init if it's first value column write in current segment - if (_cur_writer_idx == 0 && num_rows_written == 0) { - VLOG_NOTICE << "init first value column segment writer"; - RETURN_IF_ERROR(_segment_writers[_cur_writer_idx]->init(col_ids, is_key)); - } - // when splitting segment, need to make rows align between key columns and value columns - size_t start_offset = 0; - size_t limit = num_rows; - if (num_rows_written + num_rows >= num_rows_key_group && - _cur_writer_idx < _segment_writers.size() - 1) { - RETURN_IF_ERROR(_segment_writers[_cur_writer_idx]->append_block( - block, 0, num_rows_key_group - num_rows_written)); - RETURN_IF_ERROR(_flush_columns(_segment_writers[_cur_writer_idx].get())); - start_offset = num_rows_key_group - num_rows_written; - limit = num_rows - start_offset; - ++_cur_writer_idx; - // switch to next writer - RETURN_IF_ERROR(_segment_writers[_cur_writer_idx]->init(col_ids, is_key)); - num_rows_written = 0; - num_rows_key_group = _segment_writers[_cur_writer_idx]->row_count(); - } - if (limit > 0) { - RETURN_IF_ERROR( - _segment_writers[_cur_writer_idx]->append_block(block, start_offset, limit)); - DCHECK(_segment_writers[_cur_writer_idx]->num_rows_written() <= - _segment_writers[_cur_writer_idx]->row_count()); + int64_t left = num_rows; + while (left > 0) { + uint32_t num_rows_written = _segment_writers[_cur_writer_idx]->num_rows_written(); + VLOG_NOTICE << "num_rows_written: " << num_rows_written + << ", _cur_writer_idx: " << _cur_writer_idx; + uint32_t num_rows_key_group = _segment_writers[_cur_writer_idx]->row_count(); + CHECK_LE(num_rows_written, num_rows_key_group); + // init if it's first value column write in current segment + if (num_rows_written == 0) { + VLOG_NOTICE << "init first value column segment writer"; + RETURN_IF_ERROR(_segment_writers[_cur_writer_idx]->init(col_ids, is_key)); + } + + int64_t to_write = num_rows_written + left >= num_rows_key_group + ? num_rows_key_group - num_rows_written + : left; + RETURN_IF_ERROR(_segment_writers[_cur_writer_idx]->append_block(block, num_rows - left, + to_write)); + left -= to_write; + CHECK_GE(left, 0); + + if (left > 0) { + ++_cur_writer_idx; + } } } if (is_key) { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org