xiaokang commented on code in PR #38908:
URL: https://github.com/apache/doris/pull/38908#discussion_r1726548055


##########
be/src/olap/rowset/segment_v2/segment_iterator.cpp:
##########
@@ -2836,125 +2425,47 @@ Status 
SegmentIterator::current_block_row_locations(std::vector<RowLocation>* bl
     return Status::OK();
 }
 
-void SegmentIterator::_calculate_pred_in_remaining_conjunct_root(
-        const vectorized::VExprSPtr& expr) {
-    if (expr == nullptr) {
-        return;
-    }
-
-    if (expr->fn().name.function_name == "multi_match") {
-        return;
-    }
-
-    auto& children = expr->children();
-    for (int i = 0; i < children.size(); ++i) {
-        _calculate_pred_in_remaining_conjunct_root(children[i]);
-    }
-
-    auto node_type = expr->node_type();
-    if (node_type == TExprNodeType::SLOT_REF) {
-        auto slot_expr = 
std::dynamic_pointer_cast<doris::vectorized::VSlotRef>(expr);
-        if (_column_predicate_info->column_name.empty()) {
-            _column_predicate_info->column_name = expr->expr_name();
-            _column_predicate_info->column_id = slot_expr->column_id();
-        } else {
-            // If column name already exists, create a new ColumnPredicateInfo
-            // if expr is columnA > columnB, then column name will exist, in 
this situation, we need to add it to _column_pred_in_remaining_vconjunct
-            auto new_column_pred_info = 
std::make_shared<ColumnPredicateInfo>();
-            new_column_pred_info->column_name = expr->expr_name();
-            new_column_pred_info->column_id = slot_expr->column_id();
-            
_column_pred_in_remaining_vconjunct[new_column_pred_info->column_name].push_back(
-                    *new_column_pred_info);
-        }
-    } else if (_is_literal_node(node_type)) {
-        auto v_literal_expr = static_cast<const 
doris::vectorized::VLiteral*>(expr.get());
-        _column_predicate_info->query_values.insert(v_literal_expr->value());
-    } else if (node_type == TExprNodeType::NULL_LITERAL) {
-        if (!_column_predicate_info->column_name.empty()) {
-            auto v_literal_expr = static_cast<const 
doris::vectorized::VLiteral*>(expr.get());
-            
_column_predicate_info->query_values.insert(v_literal_expr->value());
-        }
-    } else {
-        if (node_type == TExprNodeType::MATCH_PRED) {
-            _column_predicate_info->query_op = "match";
-        } else if (node_type == TExprNodeType::IN_PRED) {
-            if (expr->op() == TExprOpcode::type::FILTER_IN) {
-                _column_predicate_info->query_op = "in";
-            } else {
-                _column_predicate_info->query_op = "not_in";
-            }
-        } else if (node_type != TExprNodeType::COMPOUND_PRED) {
-            _column_predicate_info->query_op = expr->fn().name.function_name;
-        }
-
-        if (!_column_predicate_info->is_empty()) {
-            
_column_pred_in_remaining_vconjunct[_column_predicate_info->column_name].push_back(
-                    *_column_predicate_info);
-            _column_predicate_info.reset(new ColumnPredicateInfo());
-        }
+Status SegmentIterator::_construct_compound_expr_context() {
+    auto inverted_index_context = 
std::make_shared<vectorized::InvertedIndexContext>(
+            _schema->column_ids(), _inverted_index_iterators, 
_storage_name_and_type,
+            _common_expr_inverted_index_status);
+    for (const auto& expr_ctx : _opts.common_expr_ctxs_push_down) {
+        vectorized::VExprContextSPtr context;
+        RETURN_IF_ERROR(expr_ctx->clone(_opts.runtime_state, context));
+        context->set_inverted_index_context(inverted_index_context);
+        _common_expr_ctxs_push_down.emplace_back(context);
     }
+    return Status::OK();
 }
 
-void SegmentIterator::_calculate_func_in_remaining_conjunct_root() {
-    auto hash = [](const vectorized::VExprSPtr& expr) -> std::size_t {
-        return std::hash<std::string>()(expr->expr_name());
-    };
-    auto equal = [](const vectorized::VExprSPtr& lhs, const 
vectorized::VExprSPtr& rhs) -> bool {
-        return lhs->equals(*rhs);
-    };
-
-    uint32_t next_id = 0;
-    std::unordered_map<vectorized::VExprSPtr, uint32_t, decltype(hash), 
decltype(equal)> unique_map(
-            0, hash, equal);
-
-    auto gen_func_unique_id = [&unique_map, &next_id](const 
vectorized::VExprSPtr& expr) {
-        auto it = unique_map.find(expr);
-        if (it != unique_map.end()) {
-            return it->second;
-        } else {
-            unique_map[expr] = ++next_id;
-            return next_id;
-        }
-    };
-
+void SegmentIterator::_calculate_expr_in_remaining_conjunct_root() {
     for (const auto& root_expr_ctx : _common_expr_ctxs_push_down) {
         const auto& root_expr = root_expr_ctx->root();
         if (root_expr == nullptr) {
             continue;
         }
 
-        std::stack<std::pair<vectorized::VExprSPtr, bool>> stack;
-        stack.emplace(root_expr, false);
+        std::stack<vectorized::VExprSPtr> stack;
+        stack.emplace(root_expr);
 
         while (!stack.empty()) {
-            const auto& [expr, has_compound_pred] = stack.top();
+            const auto& expr = stack.top();
             stack.pop();
 
-            bool current_has_compound_pred =
-                    has_compound_pred || (expr->node_type() == 
TExprNodeType::COMPOUND_PRED);
-
-            if (expr->fn().name.function_name == "multi_match") {
-                expr->set_index_unique_id(gen_func_unique_id(expr));
-                if (current_has_compound_pred) {
-                    compound_func_exprs.emplace_back(expr);
-                } else {
-                    no_compound_func_exprs.emplace_back(expr);
-                }
-
-                for (int32_t i = expr->get_num_children() - 1; i >= 0; i--) {
-                    auto child_expr = expr->get_child(i);
-                    if (child_expr->node_type() == 
TExprNodeType::type::SLOT_REF) {
-                        std::string result_sign = 
BeConsts::BLOCK_TEMP_COLUMN_PREFIX +
-                                                  
std::to_string(expr->index_unique_id());
-                        
_func_name_to_result_sign[child_expr->expr_name()].push_back(result_sign);
+            if (vectorized::VExpr::is_directly_action_on_a_slot(*expr)) {
+                for (const auto& child : expr->children()) {
+                    if (child->is_slot_ref()) {

Review Comment:
   If multiple children are slot ref, many columns will be added to 
_common_expr_inverted_index_status. Is that right?



##########
be/src/olap/rowset/segment_v2/segment_iterator.cpp:
##########
@@ -2836,125 +2425,47 @@ Status 
SegmentIterator::current_block_row_locations(std::vector<RowLocation>* bl
     return Status::OK();
 }
 
-void SegmentIterator::_calculate_pred_in_remaining_conjunct_root(
-        const vectorized::VExprSPtr& expr) {
-    if (expr == nullptr) {
-        return;
-    }
-
-    if (expr->fn().name.function_name == "multi_match") {
-        return;
-    }
-
-    auto& children = expr->children();
-    for (int i = 0; i < children.size(); ++i) {
-        _calculate_pred_in_remaining_conjunct_root(children[i]);
-    }
-
-    auto node_type = expr->node_type();
-    if (node_type == TExprNodeType::SLOT_REF) {
-        auto slot_expr = 
std::dynamic_pointer_cast<doris::vectorized::VSlotRef>(expr);
-        if (_column_predicate_info->column_name.empty()) {
-            _column_predicate_info->column_name = expr->expr_name();
-            _column_predicate_info->column_id = slot_expr->column_id();
-        } else {
-            // If column name already exists, create a new ColumnPredicateInfo
-            // if expr is columnA > columnB, then column name will exist, in 
this situation, we need to add it to _column_pred_in_remaining_vconjunct
-            auto new_column_pred_info = 
std::make_shared<ColumnPredicateInfo>();
-            new_column_pred_info->column_name = expr->expr_name();
-            new_column_pred_info->column_id = slot_expr->column_id();
-            
_column_pred_in_remaining_vconjunct[new_column_pred_info->column_name].push_back(
-                    *new_column_pred_info);
-        }
-    } else if (_is_literal_node(node_type)) {
-        auto v_literal_expr = static_cast<const 
doris::vectorized::VLiteral*>(expr.get());
-        _column_predicate_info->query_values.insert(v_literal_expr->value());
-    } else if (node_type == TExprNodeType::NULL_LITERAL) {
-        if (!_column_predicate_info->column_name.empty()) {
-            auto v_literal_expr = static_cast<const 
doris::vectorized::VLiteral*>(expr.get());
-            
_column_predicate_info->query_values.insert(v_literal_expr->value());
-        }
-    } else {
-        if (node_type == TExprNodeType::MATCH_PRED) {
-            _column_predicate_info->query_op = "match";
-        } else if (node_type == TExprNodeType::IN_PRED) {
-            if (expr->op() == TExprOpcode::type::FILTER_IN) {
-                _column_predicate_info->query_op = "in";
-            } else {
-                _column_predicate_info->query_op = "not_in";
-            }
-        } else if (node_type != TExprNodeType::COMPOUND_PRED) {
-            _column_predicate_info->query_op = expr->fn().name.function_name;
-        }
-
-        if (!_column_predicate_info->is_empty()) {
-            
_column_pred_in_remaining_vconjunct[_column_predicate_info->column_name].push_back(
-                    *_column_predicate_info);
-            _column_predicate_info.reset(new ColumnPredicateInfo());
-        }
+Status SegmentIterator::_construct_compound_expr_context() {
+    auto inverted_index_context = 
std::make_shared<vectorized::InvertedIndexContext>(
+            _schema->column_ids(), _inverted_index_iterators, 
_storage_name_and_type,
+            _common_expr_inverted_index_status);
+    for (const auto& expr_ctx : _opts.common_expr_ctxs_push_down) {
+        vectorized::VExprContextSPtr context;
+        RETURN_IF_ERROR(expr_ctx->clone(_opts.runtime_state, context));
+        context->set_inverted_index_context(inverted_index_context);
+        _common_expr_ctxs_push_down.emplace_back(context);
     }
+    return Status::OK();
 }
 
-void SegmentIterator::_calculate_func_in_remaining_conjunct_root() {
-    auto hash = [](const vectorized::VExprSPtr& expr) -> std::size_t {
-        return std::hash<std::string>()(expr->expr_name());
-    };
-    auto equal = [](const vectorized::VExprSPtr& lhs, const 
vectorized::VExprSPtr& rhs) -> bool {
-        return lhs->equals(*rhs);
-    };
-
-    uint32_t next_id = 0;
-    std::unordered_map<vectorized::VExprSPtr, uint32_t, decltype(hash), 
decltype(equal)> unique_map(
-            0, hash, equal);
-
-    auto gen_func_unique_id = [&unique_map, &next_id](const 
vectorized::VExprSPtr& expr) {
-        auto it = unique_map.find(expr);
-        if (it != unique_map.end()) {
-            return it->second;
-        } else {
-            unique_map[expr] = ++next_id;
-            return next_id;
-        }
-    };
-
+void SegmentIterator::_calculate_expr_in_remaining_conjunct_root() {
     for (const auto& root_expr_ctx : _common_expr_ctxs_push_down) {
         const auto& root_expr = root_expr_ctx->root();
         if (root_expr == nullptr) {
             continue;
         }
 
-        std::stack<std::pair<vectorized::VExprSPtr, bool>> stack;
-        stack.emplace(root_expr, false);
+        std::stack<vectorized::VExprSPtr> stack;
+        stack.emplace(root_expr);
 
         while (!stack.empty()) {
-            const auto& [expr, has_compound_pred] = stack.top();
+            const auto& expr = stack.top();
             stack.pop();
 
-            bool current_has_compound_pred =
-                    has_compound_pred || (expr->node_type() == 
TExprNodeType::COMPOUND_PRED);
-
-            if (expr->fn().name.function_name == "multi_match") {
-                expr->set_index_unique_id(gen_func_unique_id(expr));
-                if (current_has_compound_pred) {
-                    compound_func_exprs.emplace_back(expr);
-                } else {
-                    no_compound_func_exprs.emplace_back(expr);
-                }
-
-                for (int32_t i = expr->get_num_children() - 1; i >= 0; i--) {
-                    auto child_expr = expr->get_child(i);
-                    if (child_expr->node_type() == 
TExprNodeType::type::SLOT_REF) {
-                        std::string result_sign = 
BeConsts::BLOCK_TEMP_COLUMN_PREFIX +
-                                                  
std::to_string(expr->index_unique_id());
-                        
_func_name_to_result_sign[child_expr->expr_name()].push_back(result_sign);
+            if (vectorized::VExpr::is_directly_action_on_a_slot(*expr)) {

Review Comment:
   Duplicate loop on children. You can just loop on children and check 
is_slot_ref().



##########
be/src/vec/exprs/vexpr.cpp:
##########
@@ -285,7 +286,11 @@ Status VExpr::create_expr(const TExprNode& expr_node, 
VExprSPtr& expr) {
         case TExprNodeType::NULL_AWARE_BINARY_PRED:
         case TExprNodeType::FUNCTION_CALL:
         case TExprNodeType::COMPUTE_FUNCTION_CALL: {
-            expr = VectorizedFnCall::create_shared(expr_node);
+            if (expr_node.fn.name.function_name == "multi_match") {

Review Comment:
   add comment for reason of hard code.



##########
be/src/olap/rowset/segment_v2/segment_iterator.cpp:
##########
@@ -362,42 +354,20 @@ Status SegmentIterator::_init_impl(const 
StorageReadOptions& opts) {
         }
     }
 
+    RETURN_IF_ERROR(_construct_compound_expr_context());

Review Comment:
   If user upgrade from an old version eg. 1.2.8, common expr may be disabled 
by default.



##########
be/src/vec/functions/match.cpp:
##########
@@ -24,85 +24,122 @@
 #include "util/debug_points.h"
 
 namespace doris::vectorized {
+Status FunctionMatchBase::evaluate_inverted_index(
+        const ColumnsWithTypeAndName& arguments,
+        const vectorized::IndexFieldNameAndTypePair& data_type_with_name,
+        segment_v2::InvertedIndexIterator* iter, uint32_t num_rows,
+        segment_v2::InvertedIndexResultBitmap& bitmap_result) const {
+    DCHECK(arguments.size() == 1);
+    if (iter == nullptr) {
+        return Status::OK();
+    }
+    if (get_name() == "match_phrase" || get_name() == "match_phrase_prefix" ||

Review Comment:
   define and use const



##########
be/src/vec/functions/functions_comparison.h:
##########
@@ -526,6 +528,69 @@ class FunctionComparison : public IFunction {
         return std::make_shared<DataTypeUInt8>();
     }
 
+    Status evaluate_inverted_index(
+            const ColumnsWithTypeAndName& arguments,
+            const vectorized::IndexFieldNameAndTypePair& data_type_with_name,
+            segment_v2::InvertedIndexIterator* iter, uint32_t num_rows,
+            segment_v2::InvertedIndexResultBitmap& bitmap_result) const 
override {
+        DCHECK(arguments.size() == 1);
+        if (iter == nullptr) {
+            return Status::OK();
+        }
+        if (iter->get_inverted_index_reader_type() ==
+            segment_v2::InvertedIndexReaderType::FULLTEXT) {
+            //NOT support comparison predicate when parser is FULLTEXT for 
expr inverted index evaluate.
+            return Status::OK();
+        }
+        std::string column_name = data_type_with_name.first;
+        Field param_value;
+        arguments[0].column->get(0, param_value);
+        auto param_type = 
arguments[0].type->get_type_as_type_descriptor().type;
+
+        std::unique_ptr<segment_v2::InvertedIndexQueryParamFactory> 
query_param = nullptr;
+        
RETURN_IF_ERROR(segment_v2::InvertedIndexQueryParamFactory::create_query_value(
+                param_type, &param_value, query_param));
+        segment_v2::InvertedIndexQueryType query_type =
+                segment_v2::InvertedIndexQueryType::UNKNOWN_QUERY;
+        if (name == "eq") {

Review Comment:
   use NameEquals::name



##########
be/src/vec/functions/array/function_array_index.h:
##########
@@ -115,19 +115,23 @@ class FunctionArrayIndex : public IFunction {
         return Status::OK();
     }
 
-    /**
-     * eval inverted index. we can filter array rows with inverted index iter
-     */
-    Status eval_inverted_index(FunctionContext* context,
-                               const vectorized::IndexFieldNameAndTypePair& 
data_type_with_name,
-                               segment_v2::InvertedIndexIterator* iter, 
uint32_t num_rows,
-                               roaring::Roaring* bitmap) const override {
+    Status evaluate_inverted_index(
+            const ColumnsWithTypeAndName& arguments,
+            const vectorized::IndexFieldNameAndTypePair& data_type_with_name,
+            segment_v2::InvertedIndexIterator* iter, uint32_t num_rows,
+            segment_v2::InvertedIndexResultBitmap& bitmap_result) const 
override {
+        DCHECK(arguments.size() == 1);
+        if (iter == nullptr) {
+            return Status::OK();
+        }
         std::shared_ptr<roaring::Roaring> roaring = 
std::make_shared<roaring::Roaring>();
-        auto* param_value = reinterpret_cast<ParamValue*>(
-                context->get_function_state(FunctionContext::FRAGMENT_LOCAL));
-        if (param_value == nullptr || param_value->value.is_null()) {
-            return Status::Error<ErrorCode::INVERTED_INDEX_EVALUATE_SKIPPED>(
-                    "Inverted index evaluate skipped, param_value is nullptr 
or value is null");
+        Field param_value;
+        arguments[0].column->get(0, param_value);
+        auto param_type = 
arguments[0].type->get_type_as_type_descriptor().type;
+        if (param_value.is_null()) {
+            return Status::OK();

Review Comment:
   add comment



##########
be/src/vec/exprs/vexpr_context.h:
##########
@@ -73,17 +188,13 @@ class VExprContext {
     // execute expr with inverted index which column a, b has inverted indexes
     //  but some situation although column b has indexes, but apply index is 
not useful, we should
     //  skip this expr, just do not apply index anymore.
-    /**
-     * @param colid_to_inverted_index_iter contains all column id to inverted 
index iterator mapping from segmentIterator
-     * @param num_rows number of rows in one segment.
-     * @param bitmap roaring bitmap to store the result. 0 is present filed by 
index.
-     * @return status not ok means execute failed.
-     */
-    [[nodiscard]] Status eval_inverted_index(
-            const std::unordered_map<ColumnId, 
std::pair<vectorized::IndexFieldNameAndTypePair,
-                                                         
segment_v2::InvertedIndexIterator*>>&
-                    colid_to_inverted_index_iter,
-            uint32_t num_rows, roaring::Roaring* bitmap);
+
+    [[nodiscard]] static Status evaluate_inverted_index(const 
VExprContextSPtrs& conjuncts,

Review Comment:
   Where is this funciton called?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to