This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch branch-2.1 in repository https://gitbox.apache.org/repos/asf/doris.git
commit 4b60265744ae7909f7acf92ff46edaa5e8b7fe31 Author: Mryange <59914473+mrya...@users.noreply.github.com> AuthorDate: Tue Apr 9 15:57:49 2024 +0800 [feature](expr) add type check when expr prepare (#33330) --- be/src/exec/exec_node.cpp | 12 +++++----- be/src/pipeline/pipeline_x/operator.cpp | 5 ++++ be/src/pipeline/pipeline_x/operator.h | 2 ++ be/src/vec/core/columns_with_type_and_name.h | 2 +- be/src/vec/exec/scan/vscanner.cpp | 7 +----- be/src/vec/exprs/vexpr.cpp | 35 ++++++++++++++++++++++++++++ be/src/vec/exprs/vexpr.h | 3 +++ be/src/vec/utils/util.hpp | 19 ++++++++++++--- 8 files changed, 69 insertions(+), 16 deletions(-) diff --git a/be/src/exec/exec_node.cpp b/be/src/exec/exec_node.cpp index 63b88aa9de2..7fdda0c5c87 100644 --- a/be/src/exec/exec_node.cpp +++ b/be/src/exec/exec_node.cpp @@ -170,6 +170,11 @@ Status ExecNode::prepare(RuntimeState* state) { RETURN_IF_ERROR(vectorized::VExpr::prepare(_projections, state, projections_row_desc())); + if (has_output_row_descriptor()) { + RETURN_IF_ERROR( + vectorized::VExpr::check_expr_output_type(_projections, *_output_row_descriptor)); + } + for (auto& i : _children) { RETURN_IF_ERROR(i->prepare(state)); } @@ -582,12 +587,7 @@ Status ExecNode::do_projections(vectorized::Block* origin_block, vectorized::Blo auto& mutable_columns = mutable_block.mutable_columns(); - if (mutable_columns.size() != _projections.size()) { - return Status::InternalError( - "Logical error during processing {}, output of projections {} mismatches with " - "exec node output {}", - this->get_name(), _projections.size(), mutable_columns.size()); - } + DCHECK_EQ(mutable_columns.size(), _projections.size()); for (int i = 0; i < mutable_columns.size(); ++i) { auto result_column_id = -1; diff --git a/be/src/pipeline/pipeline_x/operator.cpp b/be/src/pipeline/pipeline_x/operator.cpp index 4a16cb65a01..08f0c4b73cc 100644 --- a/be/src/pipeline/pipeline_x/operator.cpp +++ b/be/src/pipeline/pipeline_x/operator.cpp @@ -152,6 +152,11 @@ Status OperatorXBase::prepare(RuntimeState* state) { } RETURN_IF_ERROR(vectorized::VExpr::prepare(_projections, state, projections_row_desc())); + if (has_output_row_desc()) { + RETURN_IF_ERROR( + vectorized::VExpr::check_expr_output_type(_projections, *_output_row_descriptor)); + } + if (_child_x && !is_source()) { RETURN_IF_ERROR(_child_x->prepare(state)); } diff --git a/be/src/pipeline/pipeline_x/operator.h b/be/src/pipeline/pipeline_x/operator.h index 45e42390bc5..7a0a5d12172 100644 --- a/be/src/pipeline/pipeline_x/operator.h +++ b/be/src/pipeline/pipeline_x/operator.h @@ -327,6 +327,8 @@ public: return _output_row_descriptor.get(); } + bool has_output_row_desc() const { return _output_row_descriptor != nullptr; } + [[nodiscard]] bool is_source() const override { return false; } [[nodiscard]] virtual Status get_block_after_projects(RuntimeState* state, diff --git a/be/src/vec/core/columns_with_type_and_name.h b/be/src/vec/core/columns_with_type_and_name.h index c70775fcaea..82eae3158ab 100644 --- a/be/src/vec/core/columns_with_type_and_name.h +++ b/be/src/vec/core/columns_with_type_and_name.h @@ -31,5 +31,5 @@ namespace doris::vectorized { using ColumnsWithTypeAndName = std::vector<ColumnWithTypeAndName>; using NameAndTypePair = std::pair<std::string, DataTypePtr>; - +using NameAndTypePairs = std::vector<NameAndTypePair>; } // namespace doris::vectorized diff --git a/be/src/vec/exec/scan/vscanner.cpp b/be/src/vec/exec/scan/vscanner.cpp index de0b6b45691..ed972badb88 100644 --- a/be/src/vec/exec/scan/vscanner.cpp +++ b/be/src/vec/exec/scan/vscanner.cpp @@ -212,12 +212,7 @@ Status VScanner::_do_projections(vectorized::Block* origin_block, vectorized::Bl auto& mutable_columns = mutable_block.mutable_columns(); - if (mutable_columns.size() != _projections.size()) { - return Status::InternalError( - "Logical error in scanner, output of projections {} mismatches with " - "scanner output {}", - _projections.size(), mutable_columns.size()); - } + DCHECK_EQ(mutable_columns.size(), _projections.size()); for (int i = 0; i < mutable_columns.size(); ++i) { auto result_column_id = -1; diff --git a/be/src/vec/exprs/vexpr.cpp b/be/src/vec/exprs/vexpr.cpp index ee811c65aba..1fc74deb8c9 100644 --- a/be/src/vec/exprs/vexpr.cpp +++ b/be/src/vec/exprs/vexpr.cpp @@ -19,6 +19,7 @@ #include <fmt/format.h> #include <gen_cpp/Exprs_types.h> +#include <gen_cpp/FrontendService_types.h> #include <thrift/protocol/TDebugProtocol.h> #include <algorithm> @@ -32,6 +33,7 @@ #include "vec/columns/column_vector.h" #include "vec/columns/columns_number.h" #include "vec/data_types/data_type_factory.hpp" +#include "vec/data_types/data_type_nullable.h" #include "vec/data_types/data_type_number.h" #include "vec/exprs/varray_literal.h" #include "vec/exprs/vcase_expr.h" @@ -407,6 +409,39 @@ Status VExpr::create_expr_trees(const std::vector<TExpr>& texprs, VExprContextSP return Status::OK(); } +Status VExpr::check_expr_output_type(const VExprContextSPtrs& ctxs, + const RowDescriptor& output_row_desc) { + if (ctxs.empty()) { + return Status::OK(); + } + auto name_and_types = VectorizedUtils::create_name_and_data_types(output_row_desc); + if (ctxs.size() != name_and_types.size()) { + return Status::InternalError( + "output type size not match expr size {} , expected output size {} ", ctxs.size(), + name_and_types.size()); + } + auto check_type_can_be_converted = [](DataTypePtr& from, DataTypePtr& to) -> bool { + if (to->equals(*from)) { + return true; + } + if (to->is_nullable() && !from->is_nullable()) { + return remove_nullable(to)->equals(*from); + } + return false; + }; + for (int i = 0; i < ctxs.size(); i++) { + auto real_expr_type = ctxs[i]->root()->data_type(); + auto&& [name, expected_type] = name_and_types[i]; + if (!check_type_can_be_converted(real_expr_type, expected_type)) { + return Status::InternalError( + "output type not match expr type , col name {} , expected type {} , real type " + "{}", + name, expected_type->get_name(), real_expr_type->get_name()); + } + } + return Status::OK(); +} + Status VExpr::prepare(const VExprContextSPtrs& ctxs, RuntimeState* state, const RowDescriptor& row_desc) { for (auto ctx : ctxs) { diff --git a/be/src/vec/exprs/vexpr.h b/be/src/vec/exprs/vexpr.h index 42a46d8a8f3..57bb4a1cf6d 100644 --- a/be/src/vec/exprs/vexpr.h +++ b/be/src/vec/exprs/vexpr.h @@ -162,6 +162,9 @@ public: static Status create_tree_from_thrift(const std::vector<TExprNode>& nodes, int* node_idx, VExprSPtr& root_expr, VExprContextSPtr& ctx); + + static Status check_expr_output_type(const VExprContextSPtrs& ctxs, + const RowDescriptor& output_row_desc); virtual const VExprSPtrs& children() const { return _children; } void set_children(const VExprSPtrs& children) { _children = children; } void set_children(VExprSPtrs&& children) { _children = std::move(children); } diff --git a/be/src/vec/utils/util.hpp b/be/src/vec/utils/util.hpp index 440bbff1538..30609799e7f 100644 --- a/be/src/vec/utils/util.hpp +++ b/be/src/vec/utils/util.hpp @@ -66,12 +66,12 @@ public: } return MutableBlock(block); } - static ColumnsWithTypeAndName create_columns_with_type_and_name( - const RowDescriptor& row_desc, bool ignore_trivial_slot = true) { + + static ColumnsWithTypeAndName create_columns_with_type_and_name(const RowDescriptor& row_desc) { ColumnsWithTypeAndName columns_with_type_and_name; for (const auto& tuple_desc : row_desc.tuple_descriptors()) { for (const auto& slot_desc : tuple_desc->slots()) { - if (ignore_trivial_slot && !slot_desc->need_materialize()) { + if (!slot_desc->need_materialize()) { continue; } columns_with_type_and_name.emplace_back(nullptr, slot_desc->get_data_type_ptr(), @@ -81,6 +81,19 @@ public: return columns_with_type_and_name; } + static NameAndTypePairs create_name_and_data_types(const RowDescriptor& row_desc) { + NameAndTypePairs name_with_types; + for (const auto& tuple_desc : row_desc.tuple_descriptors()) { + for (const auto& slot_desc : tuple_desc->slots()) { + if (!slot_desc->need_materialize()) { + continue; + } + name_with_types.emplace_back(slot_desc->col_name(), slot_desc->get_data_type_ptr()); + } + } + return name_with_types; + } + static ColumnsWithTypeAndName create_empty_block(const RowDescriptor& row_desc, bool ignore_trivial_slot = true) { ColumnsWithTypeAndName columns_with_type_and_name; --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org