This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.1 by this push:
     new 53773ae6b77 [opt](join) check datatype of intermediate slots in hash 
join (#38556) (#38792)
53773ae6b77 is described below

commit 53773ae6b77350890ba5d370876f9030cbb36972
Author: Jerry Hu <mrh...@gmail.com>
AuthorDate: Mon Aug 5 09:03:21 2024 +0800

    [opt](join) check datatype of intermediate slots in hash join (#38556) 
(#38792)
    
    ## Proposed changes
    
    pick #38556
---
 be/src/pipeline/exec/hashjoin_probe_operator.cpp | 51 ++++++++++++++++++++++++
 1 file changed, 51 insertions(+)

diff --git a/be/src/pipeline/exec/hashjoin_probe_operator.cpp 
b/be/src/pipeline/exec/hashjoin_probe_operator.cpp
index 002a79f2db2..b7dd0622fe3 100644
--- a/be/src/pipeline/exec/hashjoin_probe_operator.cpp
+++ b/be/src/pipeline/exec/hashjoin_probe_operator.cpp
@@ -21,6 +21,9 @@
 
 #include "common/logging.h"
 #include "pipeline/exec/operator.h"
+#include "runtime/descriptors.h"
+#include "vec/common/assert_cast.h"
+#include "vec/data_types/data_type_nullable.h"
 
 namespace doris {
 namespace pipeline {
@@ -637,6 +640,54 @@ Status HashJoinProbeOperatorX::prepare(RuntimeState* 
state) {
     _left_table_data_types = 
vectorized::VectorizedUtils::get_data_types(_child_x->row_desc());
     _right_table_column_names =
             
vectorized::VectorizedUtils::get_column_names(_build_side_child->row_desc());
+
+    std::vector<const SlotDescriptor*> slots_to_check;
+    for (const auto& tuple_descriptor : 
_intermediate_row_desc->tuple_descriptors()) {
+        for (const auto& slot : tuple_descriptor->slots()) {
+            slots_to_check.emplace_back(slot);
+        }
+    }
+
+    if (_is_mark_join) {
+        const auto* last_one = slots_to_check.back();
+        slots_to_check.pop_back();
+        auto data_type = last_one->get_data_type_ptr();
+        if (!data_type->is_nullable()) {
+            return Status::InternalError(
+                    "The last column for mark join should be Nullable(UInt8), 
not {}",
+                    data_type->get_name());
+        }
+
+        const auto& null_data_type = assert_cast<const 
vectorized::DataTypeNullable&>(*data_type);
+        if (null_data_type.get_nested_type()->get_type_id() != 
vectorized::TypeIndex::UInt8) {
+            return Status::InternalError(
+                    "The last column for mark join should be Nullable(UInt8), 
not {}",
+                    data_type->get_name());
+        }
+    }
+
+    const int right_col_idx =
+            (_is_right_semi_anti && !_have_other_join_conjunct) ? 0 : 
_left_table_data_types.size();
+    size_t idx = 0;
+    for (const auto* slot : slots_to_check) {
+        auto data_type = slot->get_data_type_ptr();
+        auto target_data_type = idx < right_col_idx ? 
_left_table_data_types[idx]
+                                                    : 
_right_table_data_types[idx - right_col_idx];
+        ++idx;
+        if (data_type->equals(*target_data_type)) {
+            continue;
+        }
+
+        auto data_type_non_nullable = vectorized::remove_nullable(data_type);
+        if (data_type_non_nullable->equals(*target_data_type)) {
+            continue;
+        }
+
+        return Status::InternalError("intermediate slot({}) data type not 
match: '{}' vs '{}'",
+                                     slot->id(), data_type->get_name(),
+                                     _left_table_data_types[idx]->get_name());
+    }
+
     _build_side_child.reset();
     return Status::OK();
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to