xiaokang commented on code in PR #38908: URL: https://github.com/apache/doris/pull/38908#discussion_r1726548055
########## be/src/olap/rowset/segment_v2/segment_iterator.cpp: ########## @@ -2836,125 +2425,47 @@ Status SegmentIterator::current_block_row_locations(std::vector<RowLocation>* bl return Status::OK(); } -void SegmentIterator::_calculate_pred_in_remaining_conjunct_root( - const vectorized::VExprSPtr& expr) { - if (expr == nullptr) { - return; - } - - if (expr->fn().name.function_name == "multi_match") { - return; - } - - auto& children = expr->children(); - for (int i = 0; i < children.size(); ++i) { - _calculate_pred_in_remaining_conjunct_root(children[i]); - } - - auto node_type = expr->node_type(); - if (node_type == TExprNodeType::SLOT_REF) { - auto slot_expr = std::dynamic_pointer_cast<doris::vectorized::VSlotRef>(expr); - if (_column_predicate_info->column_name.empty()) { - _column_predicate_info->column_name = expr->expr_name(); - _column_predicate_info->column_id = slot_expr->column_id(); - } else { - // If column name already exists, create a new ColumnPredicateInfo - // if expr is columnA > columnB, then column name will exist, in this situation, we need to add it to _column_pred_in_remaining_vconjunct - auto new_column_pred_info = std::make_shared<ColumnPredicateInfo>(); - new_column_pred_info->column_name = expr->expr_name(); - new_column_pred_info->column_id = slot_expr->column_id(); - _column_pred_in_remaining_vconjunct[new_column_pred_info->column_name].push_back( - *new_column_pred_info); - } - } else if (_is_literal_node(node_type)) { - auto v_literal_expr = static_cast<const doris::vectorized::VLiteral*>(expr.get()); - _column_predicate_info->query_values.insert(v_literal_expr->value()); - } else if (node_type == TExprNodeType::NULL_LITERAL) { - if (!_column_predicate_info->column_name.empty()) { - auto v_literal_expr = static_cast<const doris::vectorized::VLiteral*>(expr.get()); - _column_predicate_info->query_values.insert(v_literal_expr->value()); - } - } else { - if (node_type == TExprNodeType::MATCH_PRED) { - _column_predicate_info->query_op = "match"; - } else if (node_type == TExprNodeType::IN_PRED) { - if (expr->op() == TExprOpcode::type::FILTER_IN) { - _column_predicate_info->query_op = "in"; - } else { - _column_predicate_info->query_op = "not_in"; - } - } else if (node_type != TExprNodeType::COMPOUND_PRED) { - _column_predicate_info->query_op = expr->fn().name.function_name; - } - - if (!_column_predicate_info->is_empty()) { - _column_pred_in_remaining_vconjunct[_column_predicate_info->column_name].push_back( - *_column_predicate_info); - _column_predicate_info.reset(new ColumnPredicateInfo()); - } +Status SegmentIterator::_construct_compound_expr_context() { + auto inverted_index_context = std::make_shared<vectorized::InvertedIndexContext>( + _schema->column_ids(), _inverted_index_iterators, _storage_name_and_type, + _common_expr_inverted_index_status); + for (const auto& expr_ctx : _opts.common_expr_ctxs_push_down) { + vectorized::VExprContextSPtr context; + RETURN_IF_ERROR(expr_ctx->clone(_opts.runtime_state, context)); + context->set_inverted_index_context(inverted_index_context); + _common_expr_ctxs_push_down.emplace_back(context); } + return Status::OK(); } -void SegmentIterator::_calculate_func_in_remaining_conjunct_root() { - auto hash = [](const vectorized::VExprSPtr& expr) -> std::size_t { - return std::hash<std::string>()(expr->expr_name()); - }; - auto equal = [](const vectorized::VExprSPtr& lhs, const vectorized::VExprSPtr& rhs) -> bool { - return lhs->equals(*rhs); - }; - - uint32_t next_id = 0; - std::unordered_map<vectorized::VExprSPtr, uint32_t, decltype(hash), decltype(equal)> unique_map( - 0, hash, equal); - - auto gen_func_unique_id = [&unique_map, &next_id](const vectorized::VExprSPtr& expr) { - auto it = unique_map.find(expr); - if (it != unique_map.end()) { - return it->second; - } else { - unique_map[expr] = ++next_id; - return next_id; - } - }; - +void SegmentIterator::_calculate_expr_in_remaining_conjunct_root() { for (const auto& root_expr_ctx : _common_expr_ctxs_push_down) { const auto& root_expr = root_expr_ctx->root(); if (root_expr == nullptr) { continue; } - std::stack<std::pair<vectorized::VExprSPtr, bool>> stack; - stack.emplace(root_expr, false); + std::stack<vectorized::VExprSPtr> stack; + stack.emplace(root_expr); while (!stack.empty()) { - const auto& [expr, has_compound_pred] = stack.top(); + const auto& expr = stack.top(); stack.pop(); - bool current_has_compound_pred = - has_compound_pred || (expr->node_type() == TExprNodeType::COMPOUND_PRED); - - if (expr->fn().name.function_name == "multi_match") { - expr->set_index_unique_id(gen_func_unique_id(expr)); - if (current_has_compound_pred) { - compound_func_exprs.emplace_back(expr); - } else { - no_compound_func_exprs.emplace_back(expr); - } - - for (int32_t i = expr->get_num_children() - 1; i >= 0; i--) { - auto child_expr = expr->get_child(i); - if (child_expr->node_type() == TExprNodeType::type::SLOT_REF) { - std::string result_sign = BeConsts::BLOCK_TEMP_COLUMN_PREFIX + - std::to_string(expr->index_unique_id()); - _func_name_to_result_sign[child_expr->expr_name()].push_back(result_sign); + if (vectorized::VExpr::is_directly_action_on_a_slot(*expr)) { + for (const auto& child : expr->children()) { + if (child->is_slot_ref()) { Review Comment: If multiple children are slot ref, many columns will be added to _common_expr_inverted_index_status. Is that right? ########## be/src/olap/rowset/segment_v2/segment_iterator.cpp: ########## @@ -2836,125 +2425,47 @@ Status SegmentIterator::current_block_row_locations(std::vector<RowLocation>* bl return Status::OK(); } -void SegmentIterator::_calculate_pred_in_remaining_conjunct_root( - const vectorized::VExprSPtr& expr) { - if (expr == nullptr) { - return; - } - - if (expr->fn().name.function_name == "multi_match") { - return; - } - - auto& children = expr->children(); - for (int i = 0; i < children.size(); ++i) { - _calculate_pred_in_remaining_conjunct_root(children[i]); - } - - auto node_type = expr->node_type(); - if (node_type == TExprNodeType::SLOT_REF) { - auto slot_expr = std::dynamic_pointer_cast<doris::vectorized::VSlotRef>(expr); - if (_column_predicate_info->column_name.empty()) { - _column_predicate_info->column_name = expr->expr_name(); - _column_predicate_info->column_id = slot_expr->column_id(); - } else { - // If column name already exists, create a new ColumnPredicateInfo - // if expr is columnA > columnB, then column name will exist, in this situation, we need to add it to _column_pred_in_remaining_vconjunct - auto new_column_pred_info = std::make_shared<ColumnPredicateInfo>(); - new_column_pred_info->column_name = expr->expr_name(); - new_column_pred_info->column_id = slot_expr->column_id(); - _column_pred_in_remaining_vconjunct[new_column_pred_info->column_name].push_back( - *new_column_pred_info); - } - } else if (_is_literal_node(node_type)) { - auto v_literal_expr = static_cast<const doris::vectorized::VLiteral*>(expr.get()); - _column_predicate_info->query_values.insert(v_literal_expr->value()); - } else if (node_type == TExprNodeType::NULL_LITERAL) { - if (!_column_predicate_info->column_name.empty()) { - auto v_literal_expr = static_cast<const doris::vectorized::VLiteral*>(expr.get()); - _column_predicate_info->query_values.insert(v_literal_expr->value()); - } - } else { - if (node_type == TExprNodeType::MATCH_PRED) { - _column_predicate_info->query_op = "match"; - } else if (node_type == TExprNodeType::IN_PRED) { - if (expr->op() == TExprOpcode::type::FILTER_IN) { - _column_predicate_info->query_op = "in"; - } else { - _column_predicate_info->query_op = "not_in"; - } - } else if (node_type != TExprNodeType::COMPOUND_PRED) { - _column_predicate_info->query_op = expr->fn().name.function_name; - } - - if (!_column_predicate_info->is_empty()) { - _column_pred_in_remaining_vconjunct[_column_predicate_info->column_name].push_back( - *_column_predicate_info); - _column_predicate_info.reset(new ColumnPredicateInfo()); - } +Status SegmentIterator::_construct_compound_expr_context() { + auto inverted_index_context = std::make_shared<vectorized::InvertedIndexContext>( + _schema->column_ids(), _inverted_index_iterators, _storage_name_and_type, + _common_expr_inverted_index_status); + for (const auto& expr_ctx : _opts.common_expr_ctxs_push_down) { + vectorized::VExprContextSPtr context; + RETURN_IF_ERROR(expr_ctx->clone(_opts.runtime_state, context)); + context->set_inverted_index_context(inverted_index_context); + _common_expr_ctxs_push_down.emplace_back(context); } + return Status::OK(); } -void SegmentIterator::_calculate_func_in_remaining_conjunct_root() { - auto hash = [](const vectorized::VExprSPtr& expr) -> std::size_t { - return std::hash<std::string>()(expr->expr_name()); - }; - auto equal = [](const vectorized::VExprSPtr& lhs, const vectorized::VExprSPtr& rhs) -> bool { - return lhs->equals(*rhs); - }; - - uint32_t next_id = 0; - std::unordered_map<vectorized::VExprSPtr, uint32_t, decltype(hash), decltype(equal)> unique_map( - 0, hash, equal); - - auto gen_func_unique_id = [&unique_map, &next_id](const vectorized::VExprSPtr& expr) { - auto it = unique_map.find(expr); - if (it != unique_map.end()) { - return it->second; - } else { - unique_map[expr] = ++next_id; - return next_id; - } - }; - +void SegmentIterator::_calculate_expr_in_remaining_conjunct_root() { for (const auto& root_expr_ctx : _common_expr_ctxs_push_down) { const auto& root_expr = root_expr_ctx->root(); if (root_expr == nullptr) { continue; } - std::stack<std::pair<vectorized::VExprSPtr, bool>> stack; - stack.emplace(root_expr, false); + std::stack<vectorized::VExprSPtr> stack; + stack.emplace(root_expr); while (!stack.empty()) { - const auto& [expr, has_compound_pred] = stack.top(); + const auto& expr = stack.top(); stack.pop(); - bool current_has_compound_pred = - has_compound_pred || (expr->node_type() == TExprNodeType::COMPOUND_PRED); - - if (expr->fn().name.function_name == "multi_match") { - expr->set_index_unique_id(gen_func_unique_id(expr)); - if (current_has_compound_pred) { - compound_func_exprs.emplace_back(expr); - } else { - no_compound_func_exprs.emplace_back(expr); - } - - for (int32_t i = expr->get_num_children() - 1; i >= 0; i--) { - auto child_expr = expr->get_child(i); - if (child_expr->node_type() == TExprNodeType::type::SLOT_REF) { - std::string result_sign = BeConsts::BLOCK_TEMP_COLUMN_PREFIX + - std::to_string(expr->index_unique_id()); - _func_name_to_result_sign[child_expr->expr_name()].push_back(result_sign); + if (vectorized::VExpr::is_directly_action_on_a_slot(*expr)) { Review Comment: Duplicate loop on children. You can just loop on children and check is_slot_ref(). ########## be/src/vec/exprs/vexpr.cpp: ########## @@ -285,7 +286,11 @@ Status VExpr::create_expr(const TExprNode& expr_node, VExprSPtr& expr) { case TExprNodeType::NULL_AWARE_BINARY_PRED: case TExprNodeType::FUNCTION_CALL: case TExprNodeType::COMPUTE_FUNCTION_CALL: { - expr = VectorizedFnCall::create_shared(expr_node); + if (expr_node.fn.name.function_name == "multi_match") { Review Comment: add comment for reason of hard code. ########## be/src/olap/rowset/segment_v2/segment_iterator.cpp: ########## @@ -362,42 +354,20 @@ Status SegmentIterator::_init_impl(const StorageReadOptions& opts) { } } + RETURN_IF_ERROR(_construct_compound_expr_context()); Review Comment: If user upgrade from an old version eg. 1.2.8, common expr may be disabled by default. ########## be/src/vec/functions/match.cpp: ########## @@ -24,85 +24,122 @@ #include "util/debug_points.h" namespace doris::vectorized { +Status FunctionMatchBase::evaluate_inverted_index( + const ColumnsWithTypeAndName& arguments, + const vectorized::IndexFieldNameAndTypePair& data_type_with_name, + segment_v2::InvertedIndexIterator* iter, uint32_t num_rows, + segment_v2::InvertedIndexResultBitmap& bitmap_result) const { + DCHECK(arguments.size() == 1); + if (iter == nullptr) { + return Status::OK(); + } + if (get_name() == "match_phrase" || get_name() == "match_phrase_prefix" || Review Comment: define and use const ########## be/src/vec/functions/functions_comparison.h: ########## @@ -526,6 +528,69 @@ class FunctionComparison : public IFunction { return std::make_shared<DataTypeUInt8>(); } + Status evaluate_inverted_index( + const ColumnsWithTypeAndName& arguments, + const vectorized::IndexFieldNameAndTypePair& data_type_with_name, + segment_v2::InvertedIndexIterator* iter, uint32_t num_rows, + segment_v2::InvertedIndexResultBitmap& bitmap_result) const override { + DCHECK(arguments.size() == 1); + if (iter == nullptr) { + return Status::OK(); + } + if (iter->get_inverted_index_reader_type() == + segment_v2::InvertedIndexReaderType::FULLTEXT) { + //NOT support comparison predicate when parser is FULLTEXT for expr inverted index evaluate. + return Status::OK(); + } + std::string column_name = data_type_with_name.first; + Field param_value; + arguments[0].column->get(0, param_value); + auto param_type = arguments[0].type->get_type_as_type_descriptor().type; + + std::unique_ptr<segment_v2::InvertedIndexQueryParamFactory> query_param = nullptr; + RETURN_IF_ERROR(segment_v2::InvertedIndexQueryParamFactory::create_query_value( + param_type, ¶m_value, query_param)); + segment_v2::InvertedIndexQueryType query_type = + segment_v2::InvertedIndexQueryType::UNKNOWN_QUERY; + if (name == "eq") { Review Comment: use NameEquals::name ########## be/src/vec/functions/array/function_array_index.h: ########## @@ -115,19 +115,23 @@ class FunctionArrayIndex : public IFunction { return Status::OK(); } - /** - * eval inverted index. we can filter array rows with inverted index iter - */ - Status eval_inverted_index(FunctionContext* context, - const vectorized::IndexFieldNameAndTypePair& data_type_with_name, - segment_v2::InvertedIndexIterator* iter, uint32_t num_rows, - roaring::Roaring* bitmap) const override { + Status evaluate_inverted_index( + const ColumnsWithTypeAndName& arguments, + const vectorized::IndexFieldNameAndTypePair& data_type_with_name, + segment_v2::InvertedIndexIterator* iter, uint32_t num_rows, + segment_v2::InvertedIndexResultBitmap& bitmap_result) const override { + DCHECK(arguments.size() == 1); + if (iter == nullptr) { + return Status::OK(); + } std::shared_ptr<roaring::Roaring> roaring = std::make_shared<roaring::Roaring>(); - auto* param_value = reinterpret_cast<ParamValue*>( - context->get_function_state(FunctionContext::FRAGMENT_LOCAL)); - if (param_value == nullptr || param_value->value.is_null()) { - return Status::Error<ErrorCode::INVERTED_INDEX_EVALUATE_SKIPPED>( - "Inverted index evaluate skipped, param_value is nullptr or value is null"); + Field param_value; + arguments[0].column->get(0, param_value); + auto param_type = arguments[0].type->get_type_as_type_descriptor().type; + if (param_value.is_null()) { + return Status::OK(); Review Comment: add comment ########## be/src/vec/exprs/vexpr_context.h: ########## @@ -73,17 +188,13 @@ class VExprContext { // execute expr with inverted index which column a, b has inverted indexes // but some situation although column b has indexes, but apply index is not useful, we should // skip this expr, just do not apply index anymore. - /** - * @param colid_to_inverted_index_iter contains all column id to inverted index iterator mapping from segmentIterator - * @param num_rows number of rows in one segment. - * @param bitmap roaring bitmap to store the result. 0 is present filed by index. - * @return status not ok means execute failed. - */ - [[nodiscard]] Status eval_inverted_index( - const std::unordered_map<ColumnId, std::pair<vectorized::IndexFieldNameAndTypePair, - segment_v2::InvertedIndexIterator*>>& - colid_to_inverted_index_iter, - uint32_t num_rows, roaring::Roaring* bitmap); + + [[nodiscard]] static Status evaluate_inverted_index(const VExprContextSPtrs& conjuncts, Review Comment: Where is this funciton called? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org