This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 4b60265744ae7909f7acf92ff46edaa5e8b7fe31
Author: Mryange <59914473+mrya...@users.noreply.github.com>
AuthorDate: Tue Apr 9 15:57:49 2024 +0800

    [feature](expr) add type check when expr prepare  (#33330)
---
 be/src/exec/exec_node.cpp                    | 12 +++++-----
 be/src/pipeline/pipeline_x/operator.cpp      |  5 ++++
 be/src/pipeline/pipeline_x/operator.h        |  2 ++
 be/src/vec/core/columns_with_type_and_name.h |  2 +-
 be/src/vec/exec/scan/vscanner.cpp            |  7 +-----
 be/src/vec/exprs/vexpr.cpp                   | 35 ++++++++++++++++++++++++++++
 be/src/vec/exprs/vexpr.h                     |  3 +++
 be/src/vec/utils/util.hpp                    | 19 ++++++++++++---
 8 files changed, 69 insertions(+), 16 deletions(-)

diff --git a/be/src/exec/exec_node.cpp b/be/src/exec/exec_node.cpp
index 63b88aa9de2..7fdda0c5c87 100644
--- a/be/src/exec/exec_node.cpp
+++ b/be/src/exec/exec_node.cpp
@@ -170,6 +170,11 @@ Status ExecNode::prepare(RuntimeState* state) {
 
     RETURN_IF_ERROR(vectorized::VExpr::prepare(_projections, state, 
projections_row_desc()));
 
+    if (has_output_row_descriptor()) {
+        RETURN_IF_ERROR(
+                vectorized::VExpr::check_expr_output_type(_projections, 
*_output_row_descriptor));
+    }
+
     for (auto& i : _children) {
         RETURN_IF_ERROR(i->prepare(state));
     }
@@ -582,12 +587,7 @@ Status ExecNode::do_projections(vectorized::Block* 
origin_block, vectorized::Blo
 
     auto& mutable_columns = mutable_block.mutable_columns();
 
-    if (mutable_columns.size() != _projections.size()) {
-        return Status::InternalError(
-                "Logical error during processing {}, output of projections {} 
mismatches with "
-                "exec node output {}",
-                this->get_name(), _projections.size(), mutable_columns.size());
-    }
+    DCHECK_EQ(mutable_columns.size(), _projections.size());
 
     for (int i = 0; i < mutable_columns.size(); ++i) {
         auto result_column_id = -1;
diff --git a/be/src/pipeline/pipeline_x/operator.cpp 
b/be/src/pipeline/pipeline_x/operator.cpp
index 4a16cb65a01..08f0c4b73cc 100644
--- a/be/src/pipeline/pipeline_x/operator.cpp
+++ b/be/src/pipeline/pipeline_x/operator.cpp
@@ -152,6 +152,11 @@ Status OperatorXBase::prepare(RuntimeState* state) {
     }
     RETURN_IF_ERROR(vectorized::VExpr::prepare(_projections, state, 
projections_row_desc()));
 
+    if (has_output_row_desc()) {
+        RETURN_IF_ERROR(
+                vectorized::VExpr::check_expr_output_type(_projections, 
*_output_row_descriptor));
+    }
+
     if (_child_x && !is_source()) {
         RETURN_IF_ERROR(_child_x->prepare(state));
     }
diff --git a/be/src/pipeline/pipeline_x/operator.h 
b/be/src/pipeline/pipeline_x/operator.h
index 45e42390bc5..7a0a5d12172 100644
--- a/be/src/pipeline/pipeline_x/operator.h
+++ b/be/src/pipeline/pipeline_x/operator.h
@@ -327,6 +327,8 @@ public:
         return _output_row_descriptor.get();
     }
 
+    bool has_output_row_desc() const { return _output_row_descriptor != 
nullptr; }
+
     [[nodiscard]] bool is_source() const override { return false; }
 
     [[nodiscard]] virtual Status get_block_after_projects(RuntimeState* state,
diff --git a/be/src/vec/core/columns_with_type_and_name.h 
b/be/src/vec/core/columns_with_type_and_name.h
index c70775fcaea..82eae3158ab 100644
--- a/be/src/vec/core/columns_with_type_and_name.h
+++ b/be/src/vec/core/columns_with_type_and_name.h
@@ -31,5 +31,5 @@ namespace doris::vectorized {
 
 using ColumnsWithTypeAndName = std::vector<ColumnWithTypeAndName>;
 using NameAndTypePair = std::pair<std::string, DataTypePtr>;
-
+using NameAndTypePairs = std::vector<NameAndTypePair>;
 } // namespace doris::vectorized
diff --git a/be/src/vec/exec/scan/vscanner.cpp 
b/be/src/vec/exec/scan/vscanner.cpp
index de0b6b45691..ed972badb88 100644
--- a/be/src/vec/exec/scan/vscanner.cpp
+++ b/be/src/vec/exec/scan/vscanner.cpp
@@ -212,12 +212,7 @@ Status VScanner::_do_projections(vectorized::Block* 
origin_block, vectorized::Bl
 
     auto& mutable_columns = mutable_block.mutable_columns();
 
-    if (mutable_columns.size() != _projections.size()) {
-        return Status::InternalError(
-                "Logical error in scanner, output of projections {} mismatches 
with "
-                "scanner output {}",
-                _projections.size(), mutable_columns.size());
-    }
+    DCHECK_EQ(mutable_columns.size(), _projections.size());
 
     for (int i = 0; i < mutable_columns.size(); ++i) {
         auto result_column_id = -1;
diff --git a/be/src/vec/exprs/vexpr.cpp b/be/src/vec/exprs/vexpr.cpp
index ee811c65aba..1fc74deb8c9 100644
--- a/be/src/vec/exprs/vexpr.cpp
+++ b/be/src/vec/exprs/vexpr.cpp
@@ -19,6 +19,7 @@
 
 #include <fmt/format.h>
 #include <gen_cpp/Exprs_types.h>
+#include <gen_cpp/FrontendService_types.h>
 #include <thrift/protocol/TDebugProtocol.h>
 
 #include <algorithm>
@@ -32,6 +33,7 @@
 #include "vec/columns/column_vector.h"
 #include "vec/columns/columns_number.h"
 #include "vec/data_types/data_type_factory.hpp"
+#include "vec/data_types/data_type_nullable.h"
 #include "vec/data_types/data_type_number.h"
 #include "vec/exprs/varray_literal.h"
 #include "vec/exprs/vcase_expr.h"
@@ -407,6 +409,39 @@ Status VExpr::create_expr_trees(const std::vector<TExpr>& 
texprs, VExprContextSP
     return Status::OK();
 }
 
+Status VExpr::check_expr_output_type(const VExprContextSPtrs& ctxs,
+                                     const RowDescriptor& output_row_desc) {
+    if (ctxs.empty()) {
+        return Status::OK();
+    }
+    auto name_and_types = 
VectorizedUtils::create_name_and_data_types(output_row_desc);
+    if (ctxs.size() != name_and_types.size()) {
+        return Status::InternalError(
+                "output type size not match expr size {} , expected output 
size {} ", ctxs.size(),
+                name_and_types.size());
+    }
+    auto check_type_can_be_converted = [](DataTypePtr& from, DataTypePtr& to) 
-> bool {
+        if (to->equals(*from)) {
+            return true;
+        }
+        if (to->is_nullable() && !from->is_nullable()) {
+            return remove_nullable(to)->equals(*from);
+        }
+        return false;
+    };
+    for (int i = 0; i < ctxs.size(); i++) {
+        auto real_expr_type = ctxs[i]->root()->data_type();
+        auto&& [name, expected_type] = name_and_types[i];
+        if (!check_type_can_be_converted(real_expr_type, expected_type)) {
+            return Status::InternalError(
+                    "output type not match expr type  , col name {} , expected 
type {} , real type "
+                    "{}",
+                    name, expected_type->get_name(), 
real_expr_type->get_name());
+        }
+    }
+    return Status::OK();
+}
+
 Status VExpr::prepare(const VExprContextSPtrs& ctxs, RuntimeState* state,
                       const RowDescriptor& row_desc) {
     for (auto ctx : ctxs) {
diff --git a/be/src/vec/exprs/vexpr.h b/be/src/vec/exprs/vexpr.h
index 42a46d8a8f3..57bb4a1cf6d 100644
--- a/be/src/vec/exprs/vexpr.h
+++ b/be/src/vec/exprs/vexpr.h
@@ -162,6 +162,9 @@ public:
 
     static Status create_tree_from_thrift(const std::vector<TExprNode>& nodes, 
int* node_idx,
                                           VExprSPtr& root_expr, 
VExprContextSPtr& ctx);
+
+    static Status check_expr_output_type(const VExprContextSPtrs& ctxs,
+                                         const RowDescriptor& output_row_desc);
     virtual const VExprSPtrs& children() const { return _children; }
     void set_children(const VExprSPtrs& children) { _children = children; }
     void set_children(VExprSPtrs&& children) { _children = 
std::move(children); }
diff --git a/be/src/vec/utils/util.hpp b/be/src/vec/utils/util.hpp
index 440bbff1538..30609799e7f 100644
--- a/be/src/vec/utils/util.hpp
+++ b/be/src/vec/utils/util.hpp
@@ -66,12 +66,12 @@ public:
         }
         return MutableBlock(block);
     }
-    static ColumnsWithTypeAndName create_columns_with_type_and_name(
-            const RowDescriptor& row_desc, bool ignore_trivial_slot = true) {
+
+    static ColumnsWithTypeAndName create_columns_with_type_and_name(const 
RowDescriptor& row_desc) {
         ColumnsWithTypeAndName columns_with_type_and_name;
         for (const auto& tuple_desc : row_desc.tuple_descriptors()) {
             for (const auto& slot_desc : tuple_desc->slots()) {
-                if (ignore_trivial_slot && !slot_desc->need_materialize()) {
+                if (!slot_desc->need_materialize()) {
                     continue;
                 }
                 columns_with_type_and_name.emplace_back(nullptr, 
slot_desc->get_data_type_ptr(),
@@ -81,6 +81,19 @@ public:
         return columns_with_type_and_name;
     }
 
+    static NameAndTypePairs create_name_and_data_types(const RowDescriptor& 
row_desc) {
+        NameAndTypePairs name_with_types;
+        for (const auto& tuple_desc : row_desc.tuple_descriptors()) {
+            for (const auto& slot_desc : tuple_desc->slots()) {
+                if (!slot_desc->need_materialize()) {
+                    continue;
+                }
+                name_with_types.emplace_back(slot_desc->col_name(), 
slot_desc->get_data_type_ptr());
+            }
+        }
+        return name_with_types;
+    }
+
     static ColumnsWithTypeAndName create_empty_block(const RowDescriptor& 
row_desc,
                                                      bool ignore_trivial_slot 
= true) {
         ColumnsWithTypeAndName columns_with_type_and_name;


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to