xy720 commented on code in PR #18295:
URL: https://github.com/apache/doris/pull/18295#discussion_r1158571682


##########
be/src/vec/olap/olap_data_convertor.cpp:
##########
@@ -893,28 +893,44 @@ Status 
OlapBlockDataConvertor::OlapColumnDataConvertorMap::convert_to_olap(
 
     // offsets data
     auto& offsets = column_map->get_offsets();
-
-    // Now map column offsets data layout in memory is [3, 6, 9], and in disk 
should be [0, 3, 6, 9]
-    _offsets.clear();
-    _offsets.reserve(offsets.size() + 1);
-    _offsets.push_back(
-            _base_row); // _offsets must start with current map offsets which 
coming blocks in continue pages
-    for (auto it = offsets.begin(); it < offsets.end(); ++it) {
-        _offsets.push_back(*it + _base_row);
-    }
-
     int64_t start_index = _row_pos - 1;
     int64_t end_index = _row_pos + _num_rows - 1;
-    auto start = offsets[start_index];
-    auto size = offsets[end_index] - start;
+    auto start_offset = offsets[start_index];
+    auto size = offsets[end_index] - start_offset;
+
+    // NOTICE here are two situation:
+    // 1. Multi-SegmentWriter with different olap_convertor to convert same 
column_map(in memory which is from same block)
+    //   eg: Block(6 row): column_map offsets in memory: [10, 21, 33, 43, 54, 
66]
+    //   After SegmentWriter1 with olap_convertor1 deal with first 3 rows:  
_offsets(pre-disk)=[0, 10, 21], _base_row=33
+    //   then Maybe SegmentWriter will flush data (see 
BetaRowsetWriter::_add_block(max_row_add < 1))

Review Comment:
   ```suggestion
       //   then SegmentWriter may flush data (see 
BetaRowsetWriter::_add_block(max_row_add < 1))
   ```



##########
be/src/vec/olap/olap_data_convertor.cpp:
##########
@@ -893,28 +893,44 @@ Status 
OlapBlockDataConvertor::OlapColumnDataConvertorMap::convert_to_olap(
 
     // offsets data
     auto& offsets = column_map->get_offsets();
-
-    // Now map column offsets data layout in memory is [3, 6, 9], and in disk 
should be [0, 3, 6, 9]
-    _offsets.clear();
-    _offsets.reserve(offsets.size() + 1);
-    _offsets.push_back(
-            _base_row); // _offsets must start with current map offsets which 
coming blocks in continue pages
-    for (auto it = offsets.begin(); it < offsets.end(); ++it) {
-        _offsets.push_back(*it + _base_row);
-    }
-
     int64_t start_index = _row_pos - 1;
     int64_t end_index = _row_pos + _num_rows - 1;
-    auto start = offsets[start_index];
-    auto size = offsets[end_index] - start;
+    auto start_offset = offsets[start_index];
+    auto size = offsets[end_index] - start_offset;
+
+    // NOTICE here are two situation:
+    // 1. Multi-SegmentWriter with different olap_convertor to convert same 
column_map(in memory which is from same block)
+    //   eg: Block(6 row): column_map offsets in memory: [10, 21, 33, 43, 54, 
66]
+    //   After SegmentWriter1 with olap_convertor1 deal with first 3 rows:  
_offsets(pre-disk)=[0, 10, 21], _base_row=33
+    //   then Maybe SegmentWriter will flush data (see 
BetaRowsetWriter::_add_block(max_row_add < 1))
+    //   ColumnWriter will flush offset array to disk [0, 10, 21, 33]
+    //                                                 ---------  ----
+    //                                                 |--_offsets  
|--set_next_array_item_ordinal(_kv_writers[0]->get_next_rowid())
+    //   new SegmentWriter2 with olap_convertor2 deal with next map offsets 
[43, 54, 66]
+    //   but in disk here is new segment file offset should start with 0, so 
after convert:
+    //      _offsets(pre-disk)=[0, 10, 21], _base_row=33, After flush data 
finally in disk: [0, 10, 21, 33]
+    //2. One-SegmentWriter with olap_convertor to convertor different blocks 
into one page
+    //    eg: Two blocks -> block1 [10, 21, 33] and block2 [1, 3, 6]
+    //      After first convert: _offsets_1(pre-disk)=[0, 10, 21], 
_base_row=33, without flush, just append to page,
+    //      then deal with coming block2, after current convert:
+    //      _offsets_2=[33, 34, 36], _base_row=39

Review Comment:
   ```suggestion
       //      _offsets_2=[33, 34, 36], _base_offset=39
   ```



##########
be/src/vec/olap/olap_data_convertor.cpp:
##########
@@ -893,28 +893,44 @@ Status 
OlapBlockDataConvertor::OlapColumnDataConvertorMap::convert_to_olap(
 
     // offsets data
     auto& offsets = column_map->get_offsets();
-
-    // Now map column offsets data layout in memory is [3, 6, 9], and in disk 
should be [0, 3, 6, 9]
-    _offsets.clear();
-    _offsets.reserve(offsets.size() + 1);
-    _offsets.push_back(
-            _base_row); // _offsets must start with current map offsets which 
coming blocks in continue pages
-    for (auto it = offsets.begin(); it < offsets.end(); ++it) {
-        _offsets.push_back(*it + _base_row);
-    }
-
     int64_t start_index = _row_pos - 1;
     int64_t end_index = _row_pos + _num_rows - 1;
-    auto start = offsets[start_index];
-    auto size = offsets[end_index] - start;
+    auto start_offset = offsets[start_index];
+    auto size = offsets[end_index] - start_offset;
+
+    // NOTICE here are two situation:
+    // 1. Multi-SegmentWriter with different olap_convertor to convert same 
column_map(in memory which is from same block)
+    //   eg: Block(6 row): column_map offsets in memory: [10, 21, 33, 43, 54, 
66]
+    //   After SegmentWriter1 with olap_convertor1 deal with first 3 rows:  
_offsets(pre-disk)=[0, 10, 21], _base_row=33

Review Comment:
   ```suggestion
       //   After SegmentWriter1 with olap_convertor1 deal with first 3 rows:  
_offsets(pre-disk)=[0, 10, 21], _base_offset=33
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to