This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.1 by this push:
     new 5a810122a22 [debug](load) check the column type when string column is 
invalid (#39337)
5a810122a22 is described below

commit 5a810122a22bbd4e600630ac3b66a84570e10123
Author: Jerry Hu <mrh...@gmail.com>
AuthorDate: Sat Aug 24 18:14:21 2024 +0800

    [debug](load) check the column type when string column is invalid (#39337)
    
    ## Proposed changes
    
    Issue Number: close #xxx
    
    <!--Describe your changes.-->
---
 be/src/vec/sink/vtablet_block_convertor.cpp | 28 ++++++++++++++++++++++------
 1 file changed, 22 insertions(+), 6 deletions(-)

diff --git a/be/src/vec/sink/vtablet_block_convertor.cpp 
b/be/src/vec/sink/vtablet_block_convertor.cpp
index 4446e44f431..96de68f5976 100644
--- a/be/src/vec/sink/vtablet_block_convertor.cpp
+++ b/be/src/vec/sink/vtablet_block_convertor.cpp
@@ -202,10 +202,11 @@ Status 
OlapTableBlockConvertor::_validate_column(RuntimeState* state, const Type
         return ret;
     };
 
-    auto column_ptr = 
vectorized::check_and_get_column<vectorized::ColumnNullable>(*column);
-    auto& real_column_ptr = column_ptr == nullptr ? column : 
(column_ptr->get_nested_column_ptr());
-    auto null_map = column_ptr == nullptr ? nullptr : 
column_ptr->get_null_map_data().data();
-    auto need_to_validate = [&null_map, this](size_t j, size_t row) {
+    const auto* column_ptr = 
vectorized::check_and_get_column<vectorized::ColumnNullable>(*column);
+    const auto& real_column_ptr =
+            column_ptr == nullptr ? column : 
(column_ptr->get_nested_column_ptr());
+    const auto* null_map = column_ptr == nullptr ? nullptr : 
column_ptr->get_null_map_data().data();
+    const auto need_to_validate = [&null_map, this](size_t j, size_t row) {
         return !_filter_map[row] && (null_map == nullptr || null_map[j] == 0);
     };
 
@@ -213,7 +214,7 @@ Status 
OlapTableBlockConvertor::_validate_column(RuntimeState* state, const Type
     case TYPE_CHAR:
     case TYPE_VARCHAR:
     case TYPE_STRING: {
-        const auto column_string =
+        const auto* column_string =
                 assert_cast<const 
vectorized::ColumnString*>(real_column_ptr.get());
 
         size_t limit = config::string_type_length_soft_limit_bytes;
@@ -222,12 +223,22 @@ Status 
OlapTableBlockConvertor::_validate_column(RuntimeState* state, const Type
             limit = std::min(config::string_type_length_soft_limit_bytes, 
type.len);
         }
 
-        auto* __restrict offsets = column_string->get_offsets().data();
+        const auto* __restrict offsets = column_string->get_offsets().data();
         int invalid_count = 0;
         for (int j = 0; j < row_count; ++j) {
             invalid_count += (offsets[j] - offsets[j - 1]) > limit;
         }
 
+        auto check_column_type = [&]() {
+            const auto& real_column = *real_column_ptr;
+            if (nullptr == dynamic_cast<const 
vectorized::ColumnString*>(&real_column)) {
+                return Status::InternalError(
+                        "invalid column(#{}) type: {}, expect type: 
ColumnString, is nereids: {}",
+                        slot_index, demangle(typeid(real_column).name()), 
state->is_nereids());
+            }
+            return Status::OK();
+        };
+
         if (invalid_count) {
             for (size_t j = 0; j < row_count; ++j) {
                 auto row = rows ? (*rows)[j] : j;
@@ -235,6 +246,11 @@ Status 
OlapTableBlockConvertor::_validate_column(RuntimeState* state, const Type
                     auto str_val = column_string->get_data_at(j);
                     bool invalid = str_val.size > limit;
                     if (invalid) {
+                        auto st = check_column_type();
+                        if (!st.ok()) {
+                            LOG(WARNING) << "check column type failed: " << 
st.to_string();
+                            return st;
+                        }
                         if (str_val.size > type.len) {
                             fmt::format_to(error_msg, "{}",
                                            "the length of input is too long 
than schema. ");


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to