This is an automated email from the ASF dual-hosted git repository. kxiao pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new b66a259e5ed [opt](inverted index) topn opt reads only limit number of records (#33163) b66a259e5ed is described below commit b66a259e5ed5fcac1ec11d5967a559ad300dc71b Author: zzzxl <33418555+zzzxl1...@users.noreply.github.com> AuthorDate: Thu Apr 11 15:07:20 2024 +0800 [opt](inverted index) topn opt reads only limit number of records (#33163) --- be/src/olap/iterators.h | 1 + be/src/olap/rowset/beta_rowset_reader.cpp | 1 + be/src/olap/rowset/beta_rowset_reader.h | 3 +++ be/src/olap/rowset/rowset_reader.h | 2 ++ be/src/olap/rowset/rowset_reader_context.h | 1 + be/src/olap/rowset/segment_v2/segment_iterator.cpp | 20 ++++++++++++++++++++ be/src/olap/rowset/segment_v2/segment_iterator.h | 2 ++ be/src/vec/olap/vcollect_iterator.cpp | 1 + 8 files changed, 31 insertions(+) diff --git a/be/src/olap/iterators.h b/be/src/olap/iterators.h index e4b62d157d1..95e905185ac 100644 --- a/be/src/olap/iterators.h +++ b/be/src/olap/iterators.h @@ -119,6 +119,7 @@ public: // slots that cast may be eliminated in storage layer std::map<std::string, PrimitiveType> target_cast_type_for_variants; RowRanges row_ranges; + size_t topn_limit = 0; }; class RowwiseIterator; diff --git a/be/src/olap/rowset/beta_rowset_reader.cpp b/be/src/olap/rowset/beta_rowset_reader.cpp index 75fb6a8d4de..cca7765a610 100644 --- a/be/src/olap/rowset/beta_rowset_reader.cpp +++ b/be/src/olap/rowset/beta_rowset_reader.cpp @@ -101,6 +101,7 @@ Status BetaRowsetReader::get_segment_iterators(RowsetReaderContext* read_context _read_options.rowset_id = _rowset->rowset_id(); _read_options.version = _rowset->version(); _read_options.tablet_id = _rowset->rowset_meta()->tablet_id(); + _read_options.topn_limit = _topn_limit; if (_read_context->lower_bound_keys != nullptr) { for (int i = 0; i < _read_context->lower_bound_keys->size(); ++i) { _read_options.key_ranges.emplace_back(&_read_context->lower_bound_keys->at(i), diff --git a/be/src/olap/rowset/beta_rowset_reader.h b/be/src/olap/rowset/beta_rowset_reader.h index fd275988164..ee23b2d9b9f 100644 --- a/be/src/olap/rowset/beta_rowset_reader.h +++ b/be/src/olap/rowset/beta_rowset_reader.h @@ -84,6 +84,8 @@ public: RowsetReaderSharedPtr clone() override; + void set_topn_limit(size_t topn_limit) override { _topn_limit = topn_limit; } + private: [[nodiscard]] Status _init_iterator_once(); [[nodiscard]] Status _init_iterator(); @@ -123,6 +125,7 @@ private: StorageReadOptions _read_options; bool _empty = false; + size_t _topn_limit = 0; }; } // namespace doris diff --git a/be/src/olap/rowset/rowset_reader.h b/be/src/olap/rowset/rowset_reader.h index 45449952431..9ada1706d89 100644 --- a/be/src/olap/rowset/rowset_reader.h +++ b/be/src/olap/rowset/rowset_reader.h @@ -88,6 +88,8 @@ public: virtual bool update_profile(RuntimeProfile* profile) = 0; virtual RowsetReaderSharedPtr clone() = 0; + + virtual void set_topn_limit(size_t topn_limit) = 0; }; } // namespace doris diff --git a/be/src/olap/rowset/rowset_reader_context.h b/be/src/olap/rowset/rowset_reader_context.h index 8bfdeda60a8..44cf8556412 100644 --- a/be/src/olap/rowset/rowset_reader_context.h +++ b/be/src/olap/rowset/rowset_reader_context.h @@ -84,6 +84,7 @@ struct RowsetReaderContext { // slots that cast may be eliminated in storage layer std::map<std::string, PrimitiveType> target_cast_type_for_variants; int64_t ttl_seconds = 0; + size_t topn_limit = 0; }; } // namespace doris diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp index e69d707499e..dbfa9aeefb3 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp +++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp @@ -508,6 +508,7 @@ Status SegmentIterator::_get_row_ranges_by_column_conditions() { ++it; } } + _col_preds_except_leafnode_of_andnode.clear(); } _opts.stats->rows_inverted_index_filtered += (input_rows - _row_bitmap.cardinality()); } @@ -2218,6 +2219,9 @@ Status SegmentIterator::_next_batch_internal(vectorized::Block* block) { _current_batch_rows_read = 0; uint32_t nrows_read_limit = _opts.block_row_max; + if (_can_opt_topn_reads()) { + nrows_read_limit = std::min(static_cast<uint32_t>(_opts.topn_limit), nrows_read_limit); + } RETURN_IF_ERROR(_read_columns_by_index( nrows_read_limit, _current_batch_rows_read, _lazy_materialization_read || _opts.record_rowids || _is_need_expr_eval)); @@ -2677,5 +2681,21 @@ bool SegmentIterator::_has_delete_predicate(ColumnId cid) { return delete_columns_set.contains(cid); } +bool SegmentIterator::_can_opt_topn_reads() const { + if (_opts.topn_limit <= 0) { + return false; + } + + if (_opts.delete_condition_predicates->num_of_column_predicate() > 0) { + return false; + } + + if (!_col_predicates.empty() || !_col_preds_except_leafnode_of_andnode.empty()) { + return false; + } + + return true; +} + } // namespace segment_v2 } // namespace doris diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.h b/be/src/olap/rowset/segment_v2/segment_iterator.h index 1bf7cf22c3d..84c10f3b8b2 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.h +++ b/be/src/olap/rowset/segment_v2/segment_iterator.h @@ -380,6 +380,8 @@ private: bool _has_delete_predicate(ColumnId cid); + bool _can_opt_topn_reads() const; + class BitmapRangeIterator; class BackwardBitmapRangeIterator; diff --git a/be/src/vec/olap/vcollect_iterator.cpp b/be/src/vec/olap/vcollect_iterator.cpp index 90a38eef51e..0a6bafca49c 100644 --- a/be/src/vec/olap/vcollect_iterator.cpp +++ b/be/src/vec/olap/vcollect_iterator.cpp @@ -95,6 +95,7 @@ void VCollectIterator::init(TabletReader* reader, bool ori_data_overlapping, boo Status VCollectIterator::add_child(const RowSetSplits& rs_splits) { if (use_topn_next()) { + rs_splits.rs_reader->set_topn_limit(_topn_limit); _rs_splits.push_back(rs_splits); return Status::OK(); } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org