Re: [PR] [Improvement](segment iterator) remove range in first read to save time [doris]

via GitHub Fri, 10 Nov 2023 19:16:24 -0800


xiaokang commented on code in PR #26689:
URL: https://github.com/apache/doris/pull/26689#discussion_r1390101855



##########
be/src/olap/rowset/segment_v2/segment_iterator.cpp:
##########
@@ -1614,53 +1647,27 @@ Status SegmentIterator::_read_columns_by_index(uint32_t 
nrows_read_limit, uint32
                                                bool set_block_rowid) {
     SCOPED_RAW_TIMER(&_opts.stats->first_read_ns);
 
-    do {
-        uint32_t range_from = 0;
-        uint32_t range_to = 0;
-        bool has_next_range =
-                _range_iter->next_range(nrows_read_limit - nrows_read, 
&range_from, &range_to);
-        if (!has_next_range) {
-            break;
-        }
-
-        size_t rows_to_read = range_to - range_from;
-        _cur_rowid = range_to;
-
-        if (set_block_rowid) {
-            // Here use std::iota is better performance than for-loop, maybe 
for-loop is not vectorized
-            auto start = _block_rowids.data() + nrows_read;
-            auto end = start + rows_to_read;
-            std::iota(start, end, range_from);
-            nrows_read += rows_to_read;
-        } else {
-            nrows_read += rows_to_read;
-        }
-
-        _split_row_ranges.emplace_back(std::pair {range_from, range_to});
-    } while (nrows_read < nrows_read_limit && !_opts.read_orderby_key_reverse);
+    nrows_read = _range_iter->read_batch_rowids(_block_rowids.data(), 
nrows_read_limit);
+    bool is_continuous = (nrows_read > 1) &&
+                         (_block_rowids[nrows_read - 1] - _block_rowids[0] == 
nrows_read - 1);
 
     for (auto cid : _first_read_column_ids) {
         auto& column = _current_return_columns[cid];
         if (_prune_column(cid, column, true, nrows_read)) {
             continue;
         }
-        for (auto& range : _split_row_ranges) {
-            size_t nrows = range.second - range.first;
-            {
-                _opts.stats->block_first_read_seek_num += 1;
-                if (_opts.runtime_state && 
_opts.runtime_state->enable_profile()) {
-                    SCOPED_RAW_TIMER(&_opts.stats->block_first_read_seek_ns);
-                    
RETURN_IF_ERROR(_column_iterators[cid]->seek_to_ordinal(range.first));
-                } else {
-                    
RETURN_IF_ERROR(_column_iterators[cid]->seek_to_ordinal(range.first));
-                }
-            }
-            size_t rows_read = nrows;
+
+        if (is_continuous) {
+            size_t rows_read = nrows_read;
+            
RETURN_IF_ERROR(_column_iterators[cid]->seek_to_ordinal(_block_rowids[0]));

Review Comment:
   add profile as before



##########
be/src/olap/rowset/segment_v2/segment_iterator.cpp:
##########
@@ -141,6 +140,9 @@ class SegmentIterator::BitmapRangeIterator {
         *to = *from + range_size;
         return true;
     }
+    virtual uint32_t read_batch_rowids(rowid_t* buf, uint32_t batch_size) {

Review Comment:
   add comment for the sematics



##########
be/src/olap/rowset/segment_v2/segment_iterator.cpp:
##########
@@ -141,6 +140,9 @@ class SegmentIterator::BitmapRangeIterator {
         *to = *from + range_size;
         return true;
     }
+    virtual uint32_t read_batch_rowids(rowid_t* buf, uint32_t batch_size) {

Review Comment:
   btw, add a blank line



##########
be/src/olap/rowset/segment_v2/segment_iterator.cpp:
##########
@@ -2138,22 +2143,21 @@ void 
SegmentIterator::_build_index_result_column(uint16_t* sel_rowid_idx, uint16
     size_t idx_in_row_range = 0;
     size_t idx_in_selected = 0;
     // _split_row_ranges store multiple ranges which split in function 
_read_columns_by_index(),
-    // index_result is a column predicate apply result in a whole segement,
-    // but a scanner thread one time can read max rows limit by block_row_max,
+    // index_result is a column predicate apply result in a whole segment,
+    // but a scanner thread can read max rows limit by block_row_max one time,
     // so split _row_bitmap by one time scan range, in order to match size of 
one scanner thread read rows.
-    for (auto origin_row_range : _split_row_ranges) {
-        for (size_t rowid = origin_row_range.first; rowid < 
origin_row_range.second; ++rowid) {
-            if (sel_rowid_idx == nullptr || (idx_in_selected < select_size &&
-                                             idx_in_row_range == 
sel_rowid_idx[idx_in_selected])) {
-                if (index_result.contains(rowid)) {
-                    vec_match_pred[idx_in_block++] = true;
-                } else {
-                    vec_match_pred[idx_in_block++] = false;
-                }
-                idx_in_selected++;
+    for (uint32_t idx = 0; idx < _current_batch_rows_read; idx++) {
+        auto rowid = _block_rowids[idx];
+        if (sel_rowid_idx == nullptr ||
+            (idx_in_selected < select_size && idx_in_row_range == 
sel_rowid_idx[idx_in_selected])) {

Review Comment:
   The original logic `idx_in_row_range == sel_rowid_idx[idx_in_selected])` 
indicate that data in sel_rowid_idx is relative index in read block. It need to 
be verified once more.



##########
be/src/olap/rowset/segment_v2/segment_iterator.cpp:
##########
@@ -1614,53 +1647,27 @@ Status SegmentIterator::_read_columns_by_index(uint32_t 
nrows_read_limit, uint32
                                                bool set_block_rowid) {
     SCOPED_RAW_TIMER(&_opts.stats->first_read_ns);
 
-    do {
-        uint32_t range_from = 0;
-        uint32_t range_to = 0;
-        bool has_next_range =
-                _range_iter->next_range(nrows_read_limit - nrows_read, 
&range_from, &range_to);
-        if (!has_next_range) {
-            break;
-        }
-
-        size_t rows_to_read = range_to - range_from;
-        _cur_rowid = range_to;
-
-        if (set_block_rowid) {
-            // Here use std::iota is better performance than for-loop, maybe 
for-loop is not vectorized
-            auto start = _block_rowids.data() + nrows_read;
-            auto end = start + rows_to_read;
-            std::iota(start, end, range_from);
-            nrows_read += rows_to_read;
-        } else {
-            nrows_read += rows_to_read;
-        }
-
-        _split_row_ranges.emplace_back(std::pair {range_from, range_to});
-    } while (nrows_read < nrows_read_limit && !_opts.read_orderby_key_reverse);
+    nrows_read = _range_iter->read_batch_rowids(_block_rowids.data(), 
nrows_read_limit);
+    bool is_continuous = (nrows_read > 1) &&
+                         (_block_rowids[nrows_read - 1] - _block_rowids[0] == 
nrows_read - 1);
 
     for (auto cid : _first_read_column_ids) {
         auto& column = _current_return_columns[cid];
         if (_prune_column(cid, column, true, nrows_read)) {
             continue;
         }
-        for (auto& range : _split_row_ranges) {
-            size_t nrows = range.second - range.first;
-            {
-                _opts.stats->block_first_read_seek_num += 1;
-                if (_opts.runtime_state && 
_opts.runtime_state->enable_profile()) {
-                    SCOPED_RAW_TIMER(&_opts.stats->block_first_read_seek_ns);
-                    
RETURN_IF_ERROR(_column_iterators[cid]->seek_to_ordinal(range.first));
-                } else {
-                    
RETURN_IF_ERROR(_column_iterators[cid]->seek_to_ordinal(range.first));
-                }
-            }
-            size_t rows_read = nrows;
+
+        if (is_continuous) {
+            size_t rows_read = nrows_read;
+            
RETURN_IF_ERROR(_column_iterators[cid]->seek_to_ordinal(_block_rowids[0]));
             RETURN_IF_ERROR(_column_iterators[cid]->next_batch(&rows_read, 
column));
-            if (rows_read != nrows) {
-                return Status::Error<ErrorCode::INTERNAL_ERROR>("nrows({}) != 
rows_read({})", nrows,
-                                                                rows_read);
+            if (rows_read != nrows_read) {
+                return Status::Error<ErrorCode::INTERNAL_ERROR>("nrows({}) != 
rows_read({})",
+                                                                nrows_read, 
rows_read);
             }
+        } else {
+            
RETURN_IF_ERROR(_column_iterators[cid]->read_by_rowids(_block_rowids.data(), 
nrows_read,

Review Comment:
   add profile



##########
be/src/olap/rowset/segment_v2/segment_iterator.cpp:
##########
@@ -2138,22 +2143,21 @@ void 
SegmentIterator::_build_index_result_column(uint16_t* sel_rowid_idx, uint16
     size_t idx_in_row_range = 0;
     size_t idx_in_selected = 0;
     // _split_row_ranges store multiple ranges which split in function 
_read_columns_by_index(),
-    // index_result is a column predicate apply result in a whole segement,
-    // but a scanner thread one time can read max rows limit by block_row_max,
+    // index_result is a column predicate apply result in a whole segment,
+    // but a scanner thread can read max rows limit by block_row_max one time,
     // so split _row_bitmap by one time scan range, in order to match size of 
one scanner thread read rows.
-    for (auto origin_row_range : _split_row_ranges) {
-        for (size_t rowid = origin_row_range.first; rowid < 
origin_row_range.second; ++rowid) {
-            if (sel_rowid_idx == nullptr || (idx_in_selected < select_size &&
-                                             idx_in_row_range == 
sel_rowid_idx[idx_in_selected])) {
-                if (index_result.contains(rowid)) {
-                    vec_match_pred[idx_in_block++] = true;
-                } else {
-                    vec_match_pred[idx_in_block++] = false;
-                }
-                idx_in_selected++;
+    for (uint32_t idx = 0; idx < _current_batch_rows_read; idx++) {

Review Comment:
   The original logic is complicated and there are too many idx. We can replace 
idx by i to reduce idx variables.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Re: [PR] [Improvement](segment iterator) remove range in first read to save time [doris]

Reply via email to