This is an automated email from the ASF dual-hosted git repository.

airborne pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new faee56f01a3 [Enhancement](inverted index) improve expr 
evaluate_inverted_index performace and remove useless code (#40600)
faee56f01a3 is described below

commit faee56f01a33aad792ba3ccdb745bffd026b8f3f
Author: airborne12 <airborn...@gmail.com>
AuthorDate: Wed Sep 11 14:13:14 2024 +0800

    [Enhancement](inverted index) improve expr evaluate_inverted_index 
performace and remove useless code (#40600)
    
    ## Proposed changes
    Pre-allocate vectors based on an estimated or known size in vexpr
    evaluated_inverted_index
---
 be/src/olap/rowset/segment_v2/segment_iterator.cpp | 30 ---------------
 be/src/olap/rowset/segment_v2/segment_iterator.h   | 16 --------
 be/src/vec/exprs/vexpr.cpp                         | 45 ++++++++++++++--------
 3 files changed, 28 insertions(+), 63 deletions(-)

diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp 
b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index ff8a36d8788..1014f65b7ae 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -927,36 +927,6 @@ bool SegmentIterator::_need_read_data(ColumnId cid) {
     return true;
 }
 
-bool SegmentIterator::_is_target_expr_match_predicate(const 
vectorized::VExprSPtr& expr,
-                                                      const MatchPredicate* 
match_pred,
-                                                      const Schema* schema) {
-    if (!expr || expr->node_type() != TExprNodeType::MATCH_PRED) {
-        return false;
-    }
-
-    const auto& children = expr->children();
-    if (children.size() != 2 || !children[0]->is_slot_ref() || 
!children[1]->is_constant()) {
-        return false;
-    }
-
-    auto slot_ref = dynamic_cast<vectorized::VSlotRef*>(children[0].get());
-    if (!slot_ref) {
-        LOG(WARNING) << children[0]->debug_string() << " should be SlotRef";
-        return false;
-    }
-    std::shared_ptr<ColumnPtrWrapper> const_col_wrapper;
-    // children 1 is VLiteral, we do not need expr context.
-    auto res = children[1]->get_const_col(nullptr /* context */, 
&const_col_wrapper);
-    if (!res.ok() || !const_col_wrapper) {
-        return false;
-    }
-
-    const auto const_column =
-            
check_and_get_column<vectorized::ColumnConst>(const_col_wrapper->column_ptr);
-    return const_column && match_pred->column_id() == 
schema->column_id(slot_ref->column_id()) &&
-           StringRef(match_pred->get_value()) == const_column->get_data_at(0);
-}
-
 Status SegmentIterator::_apply_inverted_index() {
     std::vector<ColumnPredicate*> remaining_predicates;
     std::set<const ColumnPredicate*> no_need_to_pass_column_predicate_set;
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.h 
b/be/src/olap/rowset/segment_v2/segment_iterator.h
index f5c133485aa..50876cd7c55 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.h
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.h
@@ -362,22 +362,6 @@ private:
         return 0;
     }
 
-    bool _is_match_predicate_and_not_remaining(
-            ColumnPredicate* pred, const std::vector<ColumnPredicate*>& 
remaining_predicates) {
-        return pred->type() == PredicateType::MATCH &&
-               std::find(remaining_predicates.begin(), 
remaining_predicates.end(), pred) ==
-                       remaining_predicates.end();
-    }
-
-    void _delete_expr_from_conjunct_roots(const vectorized::VExprSPtr& expr,
-                                          vectorized::VExprSPtrs& 
conjunct_roots) {
-        conjunct_roots.erase(std::remove(conjunct_roots.begin(), 
conjunct_roots.end(), expr),
-                             conjunct_roots.end());
-    }
-
-    bool _is_target_expr_match_predicate(const vectorized::VExprSPtr& expr,
-                                         const MatchPredicate* match_pred, 
const Schema* schema);
-
     Status _convert_to_expected_type(const std::vector<ColumnId>& col_ids);
 
     bool _no_need_read_key_data(ColumnId cid, vectorized::MutableColumnPtr& 
column,
diff --git a/be/src/vec/exprs/vexpr.cpp b/be/src/vec/exprs/vexpr.cpp
index 482a1a2e498..da496cdd9f0 100644
--- a/be/src/vec/exprs/vexpr.cpp
+++ b/be/src/vec/exprs/vexpr.cpp
@@ -613,15 +613,26 @@ Status VExpr::get_result_from_const(vectorized::Block* 
block, const std::string&
 
 Status VExpr::_evaluate_inverted_index(VExprContext* context, const 
FunctionBasePtr& function,
                                        uint32_t segment_num_rows) {
+    // Pre-allocate vectors based on an estimated or known size
     std::vector<segment_v2::InvertedIndexIterator*> iterators;
     std::vector<vectorized::IndexFieldNameAndTypePair> data_type_with_names;
     std::vector<int> column_ids;
     vectorized::ColumnsWithTypeAndName arguments;
     VExprSPtrs children_exprs;
-    for (auto child : children()) {
-        // if child is cast expr, we need to ensure target data type is the 
same with storage data type.
-        // or they are all string type
-        // and if data type is array, we need to get the nested data type to 
ensure that.
+
+    // Reserve space to avoid multiple reallocations
+    const size_t estimated_size = children().size();
+    iterators.reserve(estimated_size);
+    data_type_with_names.reserve(estimated_size);
+    column_ids.reserve(estimated_size);
+    children_exprs.reserve(estimated_size);
+
+    auto index_context = context->get_inverted_index_context();
+
+    // if child is cast expr, we need to ensure target data type is the same 
with storage data type.
+    // or they are all string type
+    // and if data type is array, we need to get the nested data type to 
ensure that.
+    for (const auto& child : children()) {
         if (child->node_type() == TExprNodeType::CAST_EXPR) {
             auto* cast_expr = assert_cast<VCastExpr*>(child.get());
             DCHECK_EQ(cast_expr->children().size(), 1);
@@ -663,7 +674,11 @@ Status VExpr::_evaluate_inverted_index(VExprContext* 
context, const FunctionBase
         }
     }
 
-    for (auto child : children_exprs) {
+    if (children_exprs.empty()) {
+        return Status::OK(); // Early exit if no children to process
+    }
+
+    for (const auto& child : children_exprs) {
         if (child->is_slot_ref()) {
             auto* column_slot_ref = assert_cast<VSlotRef*>(child.get());
             auto column_id = column_slot_ref->column_id();
@@ -694,25 +709,21 @@ Status VExpr::_evaluate_inverted_index(VExprContext* 
context, const FunctionBase
                                    column_literal->get_data_type(), 
column_literal->expr_name());
         }
     }
-    auto result_bitmap = segment_v2::InvertedIndexResultBitmap();
-    if (iterators.empty()) {
-        return Status::OK();
-    }
-    // If arguments are empty, it means the left value in the expression is 
not a literal.
-    if (arguments.empty()) {
-        return Status::OK();
+
+    if (iterators.empty() || arguments.empty()) {
+        return Status::OK(); // Nothing to evaluate or no literals to compare 
against
     }
+
+    auto result_bitmap = segment_v2::InvertedIndexResultBitmap();
     auto res = function->evaluate_inverted_index(arguments, 
data_type_with_names, iterators,
                                                  segment_num_rows, 
result_bitmap);
     if (!res.ok()) {
         return res;
     }
     if (!result_bitmap.is_empty()) {
-        
context->get_inverted_index_context()->set_inverted_index_result_for_expr(this,
-                                                                               
   result_bitmap);
-        for (auto column_id : column_ids) {
-            
context->get_inverted_index_context()->set_true_for_inverted_index_status(this,
-                                                                               
       column_id);
+        index_context->set_inverted_index_result_for_expr(this, result_bitmap);
+        for (int column_id : column_ids) {
+            index_context->set_true_for_inverted_index_status(this, column_id);
         }
         // set fast_execute when expr evaluated by inverted index correctly
         _can_fast_execute = true;


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to