This is an automated email from the ASF dual-hosted git repository.

kxiao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new b66a259e5ed [opt](inverted index) topn opt reads only limit number of 
records (#33163)
b66a259e5ed is described below

commit b66a259e5ed5fcac1ec11d5967a559ad300dc71b
Author: zzzxl <33418555+zzzxl1...@users.noreply.github.com>
AuthorDate: Thu Apr 11 15:07:20 2024 +0800

    [opt](inverted index) topn opt reads only limit number of records (#33163)
---
 be/src/olap/iterators.h                            |  1 +
 be/src/olap/rowset/beta_rowset_reader.cpp          |  1 +
 be/src/olap/rowset/beta_rowset_reader.h            |  3 +++
 be/src/olap/rowset/rowset_reader.h                 |  2 ++
 be/src/olap/rowset/rowset_reader_context.h         |  1 +
 be/src/olap/rowset/segment_v2/segment_iterator.cpp | 20 ++++++++++++++++++++
 be/src/olap/rowset/segment_v2/segment_iterator.h   |  2 ++
 be/src/vec/olap/vcollect_iterator.cpp              |  1 +
 8 files changed, 31 insertions(+)

diff --git a/be/src/olap/iterators.h b/be/src/olap/iterators.h
index e4b62d157d1..95e905185ac 100644
--- a/be/src/olap/iterators.h
+++ b/be/src/olap/iterators.h
@@ -119,6 +119,7 @@ public:
     // slots that cast may be eliminated in storage layer
     std::map<std::string, PrimitiveType> target_cast_type_for_variants;
     RowRanges row_ranges;
+    size_t topn_limit = 0;
 };
 
 class RowwiseIterator;
diff --git a/be/src/olap/rowset/beta_rowset_reader.cpp 
b/be/src/olap/rowset/beta_rowset_reader.cpp
index 75fb6a8d4de..cca7765a610 100644
--- a/be/src/olap/rowset/beta_rowset_reader.cpp
+++ b/be/src/olap/rowset/beta_rowset_reader.cpp
@@ -101,6 +101,7 @@ Status 
BetaRowsetReader::get_segment_iterators(RowsetReaderContext* read_context
     _read_options.rowset_id = _rowset->rowset_id();
     _read_options.version = _rowset->version();
     _read_options.tablet_id = _rowset->rowset_meta()->tablet_id();
+    _read_options.topn_limit = _topn_limit;
     if (_read_context->lower_bound_keys != nullptr) {
         for (int i = 0; i < _read_context->lower_bound_keys->size(); ++i) {
             
_read_options.key_ranges.emplace_back(&_read_context->lower_bound_keys->at(i),
diff --git a/be/src/olap/rowset/beta_rowset_reader.h 
b/be/src/olap/rowset/beta_rowset_reader.h
index fd275988164..ee23b2d9b9f 100644
--- a/be/src/olap/rowset/beta_rowset_reader.h
+++ b/be/src/olap/rowset/beta_rowset_reader.h
@@ -84,6 +84,8 @@ public:
 
     RowsetReaderSharedPtr clone() override;
 
+    void set_topn_limit(size_t topn_limit) override { _topn_limit = 
topn_limit; }
+
 private:
     [[nodiscard]] Status _init_iterator_once();
     [[nodiscard]] Status _init_iterator();
@@ -123,6 +125,7 @@ private:
     StorageReadOptions _read_options;
 
     bool _empty = false;
+    size_t _topn_limit = 0;
 };
 
 } // namespace doris
diff --git a/be/src/olap/rowset/rowset_reader.h 
b/be/src/olap/rowset/rowset_reader.h
index 45449952431..9ada1706d89 100644
--- a/be/src/olap/rowset/rowset_reader.h
+++ b/be/src/olap/rowset/rowset_reader.h
@@ -88,6 +88,8 @@ public:
     virtual bool update_profile(RuntimeProfile* profile) = 0;
 
     virtual RowsetReaderSharedPtr clone() = 0;
+
+    virtual void set_topn_limit(size_t topn_limit) = 0;
 };
 
 } // namespace doris
diff --git a/be/src/olap/rowset/rowset_reader_context.h 
b/be/src/olap/rowset/rowset_reader_context.h
index 8bfdeda60a8..44cf8556412 100644
--- a/be/src/olap/rowset/rowset_reader_context.h
+++ b/be/src/olap/rowset/rowset_reader_context.h
@@ -84,6 +84,7 @@ struct RowsetReaderContext {
     // slots that cast may be eliminated in storage layer
     std::map<std::string, PrimitiveType> target_cast_type_for_variants;
     int64_t ttl_seconds = 0;
+    size_t topn_limit = 0;
 };
 
 } // namespace doris
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp 
b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index e69d707499e..dbfa9aeefb3 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -508,6 +508,7 @@ Status 
SegmentIterator::_get_row_ranges_by_column_conditions() {
                     ++it;
                 }
             }
+            _col_preds_except_leafnode_of_andnode.clear();
         }
         _opts.stats->rows_inverted_index_filtered += (input_rows - 
_row_bitmap.cardinality());
     }
@@ -2218,6 +2219,9 @@ Status 
SegmentIterator::_next_batch_internal(vectorized::Block* block) {
 
     _current_batch_rows_read = 0;
     uint32_t nrows_read_limit = _opts.block_row_max;
+    if (_can_opt_topn_reads()) {
+        nrows_read_limit = std::min(static_cast<uint32_t>(_opts.topn_limit), 
nrows_read_limit);
+    }
     RETURN_IF_ERROR(_read_columns_by_index(
             nrows_read_limit, _current_batch_rows_read,
             _lazy_materialization_read || _opts.record_rowids || 
_is_need_expr_eval));
@@ -2677,5 +2681,21 @@ bool SegmentIterator::_has_delete_predicate(ColumnId 
cid) {
     return delete_columns_set.contains(cid);
 }
 
+bool SegmentIterator::_can_opt_topn_reads() const {
+    if (_opts.topn_limit <= 0) {
+        return false;
+    }
+
+    if (_opts.delete_condition_predicates->num_of_column_predicate() > 0) {
+        return false;
+    }
+
+    if (!_col_predicates.empty() || 
!_col_preds_except_leafnode_of_andnode.empty()) {
+        return false;
+    }
+
+    return true;
+}
+
 } // namespace segment_v2
 } // namespace doris
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.h 
b/be/src/olap/rowset/segment_v2/segment_iterator.h
index 1bf7cf22c3d..84c10f3b8b2 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.h
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.h
@@ -380,6 +380,8 @@ private:
 
     bool _has_delete_predicate(ColumnId cid);
 
+    bool _can_opt_topn_reads() const;
+
     class BitmapRangeIterator;
     class BackwardBitmapRangeIterator;
 
diff --git a/be/src/vec/olap/vcollect_iterator.cpp 
b/be/src/vec/olap/vcollect_iterator.cpp
index 90a38eef51e..0a6bafca49c 100644
--- a/be/src/vec/olap/vcollect_iterator.cpp
+++ b/be/src/vec/olap/vcollect_iterator.cpp
@@ -95,6 +95,7 @@ void VCollectIterator::init(TabletReader* reader, bool 
ori_data_overlapping, boo
 
 Status VCollectIterator::add_child(const RowSetSplits& rs_splits) {
     if (use_topn_next()) {
+        rs_splits.rs_reader->set_topn_limit(_topn_limit);
         _rs_splits.push_back(rs_splits);
         return Status::OK();
     }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to