xy720 commented on code in PR #18295: URL: https://github.com/apache/doris/pull/18295#discussion_r1158571682
########## be/src/vec/olap/olap_data_convertor.cpp: ########## @@ -893,28 +893,44 @@ Status OlapBlockDataConvertor::OlapColumnDataConvertorMap::convert_to_olap( // offsets data auto& offsets = column_map->get_offsets(); - - // Now map column offsets data layout in memory is [3, 6, 9], and in disk should be [0, 3, 6, 9] - _offsets.clear(); - _offsets.reserve(offsets.size() + 1); - _offsets.push_back( - _base_row); // _offsets must start with current map offsets which coming blocks in continue pages - for (auto it = offsets.begin(); it < offsets.end(); ++it) { - _offsets.push_back(*it + _base_row); - } - int64_t start_index = _row_pos - 1; int64_t end_index = _row_pos + _num_rows - 1; - auto start = offsets[start_index]; - auto size = offsets[end_index] - start; + auto start_offset = offsets[start_index]; + auto size = offsets[end_index] - start_offset; + + // NOTICE here are two situation: + // 1. Multi-SegmentWriter with different olap_convertor to convert same column_map(in memory which is from same block) + // eg: Block(6 row): column_map offsets in memory: [10, 21, 33, 43, 54, 66] + // After SegmentWriter1 with olap_convertor1 deal with first 3 rows: _offsets(pre-disk)=[0, 10, 21], _base_row=33 + // then Maybe SegmentWriter will flush data (see BetaRowsetWriter::_add_block(max_row_add < 1)) Review Comment: ```suggestion // then SegmentWriter may flush data (see BetaRowsetWriter::_add_block(max_row_add < 1)) ``` ########## be/src/vec/olap/olap_data_convertor.cpp: ########## @@ -893,28 +893,44 @@ Status OlapBlockDataConvertor::OlapColumnDataConvertorMap::convert_to_olap( // offsets data auto& offsets = column_map->get_offsets(); - - // Now map column offsets data layout in memory is [3, 6, 9], and in disk should be [0, 3, 6, 9] - _offsets.clear(); - _offsets.reserve(offsets.size() + 1); - _offsets.push_back( - _base_row); // _offsets must start with current map offsets which coming blocks in continue pages - for (auto it = offsets.begin(); it < offsets.end(); ++it) { - _offsets.push_back(*it + _base_row); - } - int64_t start_index = _row_pos - 1; int64_t end_index = _row_pos + _num_rows - 1; - auto start = offsets[start_index]; - auto size = offsets[end_index] - start; + auto start_offset = offsets[start_index]; + auto size = offsets[end_index] - start_offset; + + // NOTICE here are two situation: + // 1. Multi-SegmentWriter with different olap_convertor to convert same column_map(in memory which is from same block) + // eg: Block(6 row): column_map offsets in memory: [10, 21, 33, 43, 54, 66] + // After SegmentWriter1 with olap_convertor1 deal with first 3 rows: _offsets(pre-disk)=[0, 10, 21], _base_row=33 + // then Maybe SegmentWriter will flush data (see BetaRowsetWriter::_add_block(max_row_add < 1)) + // ColumnWriter will flush offset array to disk [0, 10, 21, 33] + // --------- ---- + // |--_offsets |--set_next_array_item_ordinal(_kv_writers[0]->get_next_rowid()) + // new SegmentWriter2 with olap_convertor2 deal with next map offsets [43, 54, 66] + // but in disk here is new segment file offset should start with 0, so after convert: + // _offsets(pre-disk)=[0, 10, 21], _base_row=33, After flush data finally in disk: [0, 10, 21, 33] + //2. One-SegmentWriter with olap_convertor to convertor different blocks into one page + // eg: Two blocks -> block1 [10, 21, 33] and block2 [1, 3, 6] + // After first convert: _offsets_1(pre-disk)=[0, 10, 21], _base_row=33, without flush, just append to page, + // then deal with coming block2, after current convert: + // _offsets_2=[33, 34, 36], _base_row=39 Review Comment: ```suggestion // _offsets_2=[33, 34, 36], _base_offset=39 ``` ########## be/src/vec/olap/olap_data_convertor.cpp: ########## @@ -893,28 +893,44 @@ Status OlapBlockDataConvertor::OlapColumnDataConvertorMap::convert_to_olap( // offsets data auto& offsets = column_map->get_offsets(); - - // Now map column offsets data layout in memory is [3, 6, 9], and in disk should be [0, 3, 6, 9] - _offsets.clear(); - _offsets.reserve(offsets.size() + 1); - _offsets.push_back( - _base_row); // _offsets must start with current map offsets which coming blocks in continue pages - for (auto it = offsets.begin(); it < offsets.end(); ++it) { - _offsets.push_back(*it + _base_row); - } - int64_t start_index = _row_pos - 1; int64_t end_index = _row_pos + _num_rows - 1; - auto start = offsets[start_index]; - auto size = offsets[end_index] - start; + auto start_offset = offsets[start_index]; + auto size = offsets[end_index] - start_offset; + + // NOTICE here are two situation: + // 1. Multi-SegmentWriter with different olap_convertor to convert same column_map(in memory which is from same block) + // eg: Block(6 row): column_map offsets in memory: [10, 21, 33, 43, 54, 66] + // After SegmentWriter1 with olap_convertor1 deal with first 3 rows: _offsets(pre-disk)=[0, 10, 21], _base_row=33 Review Comment: ```suggestion // After SegmentWriter1 with olap_convertor1 deal with first 3 rows: _offsets(pre-disk)=[0, 10, 21], _base_offset=33 ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org