This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.0 by this push:
     new 41b5aeb1c5 [fix](concat) ColumnString::chars is resized with wrong 
size (#22610)
41b5aeb1c5 is described below

commit 41b5aeb1c5332e41dca42ac8bd6558ed5057b184
Author: TengJianPing <18241664+jackte...@users.noreply.github.com>
AuthorDate: Fri Aug 4 19:13:35 2023 +0800

    [fix](concat) ColumnString::chars is resized with wrong size (#22610)
    
    FunctionStringConcat::execute_impl resized with size that include string 
null terminator, which causes ColumnString::chars.size() does not match with 
ColumnString::offsets.back, this will cause problems for some string functions, 
e.g. like and regexp.
---
 be/src/vec/columns/column_string.cpp    | 17 ++++++++++
 be/src/vec/columns/column_string.h      |  2 ++
 be/src/vec/functions/function_string.h  |  2 --
 be/test/vec/core/column_string_test.cpp | 59 +++++++++++++++++++++++++++++++++
 4 files changed, 78 insertions(+), 2 deletions(-)

diff --git a/be/src/vec/columns/column_string.cpp 
b/be/src/vec/columns/column_string.cpp
index ed3cd28be9..5d2670acb7 100644
--- a/be/src/vec/columns/column_string.cpp
+++ b/be/src/vec/columns/column_string.cpp
@@ -35,6 +35,23 @@
 
 namespace doris::vectorized {
 
+void ColumnString::sanity_check() const {
+    auto count = offsets.size();
+    if (chars.size() != offsets[count - 1]) {
+        LOG(FATAL) << "row count: " << count << ", chars.size(): " << 
chars.size() << ", offset["
+                   << count - 1 << "]: " << offsets[count - 1];
+    }
+    if (offsets[-1] != 0) {
+        LOG(FATAL) << "wrong offsets[-1]: " << offsets[-1];
+    }
+    for (size_t i = 0; i < count; ++i) {
+        if (offsets[i] < offsets[i - 1]) {
+            LOG(FATAL) << "row count: " << count << ", offsets[" << i << "]: " 
<< offsets[i]
+                       << ", offsets[" << i - 1 << "]: " << offsets[i - 1];
+        }
+    }
+}
+
 MutableColumnPtr ColumnString::clone_resized(size_t to_size) const {
     auto res = ColumnString::create();
     if (to_size == 0) {
diff --git a/be/src/vec/columns/column_string.h 
b/be/src/vec/columns/column_string.h
index 63ebeb4686..26a7093140 100644
--- a/be/src/vec/columns/column_string.h
+++ b/be/src/vec/columns/column_string.h
@@ -106,6 +106,8 @@ private:
               chars(src.chars.begin(), src.chars.end()) {}
 
 public:
+    void sanity_check() const;
+
     const char* get_family_name() const override { return "String"; }
 
     size_t size() const override { return offsets.size(); }
diff --git a/be/src/vec/functions/function_string.h 
b/be/src/vec/functions/function_string.h
index 83f98d726a..32e373ffa0 100644
--- a/be/src/vec/functions/function_string.h
+++ b/be/src/vec/functions/function_string.h
@@ -776,8 +776,6 @@ public:
         if ((UNLIKELY(UINT_MAX - input_rows_count < res_reserve_size))) {
             return Status::BufferAllocFailed("concat output is too large to 
allocate");
         }
-        // for each terminal zero
-        res_reserve_size += input_rows_count;
 
         res_data.resize(res_reserve_size);
 
diff --git a/be/test/vec/core/column_string_test.cpp 
b/be/test/vec/core/column_string_test.cpp
new file mode 100644
index 0000000000..81f41bd11c
--- /dev/null
+++ b/be/test/vec/core/column_string_test.cpp
@@ -0,0 +1,59 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vec/columns/column_string.h"
+
+#include <gtest/gtest.h>
+
+#include "vec/core/block.h"
+#include "vec/data_types/data_type_string.h"
+#include "vec/functions/function_string.h"
+
+namespace doris::vectorized {
+TEST(ColumnStringTest, TestConcat) {
+    Block block;
+    vectorized::DataTypePtr str_type = 
std::make_shared<vectorized::DataTypeString>();
+
+    auto str_col0 = ColumnString::create();
+    std::vector<std::string> vals0 = {"aaa", "bb", "cccc"};
+    for (auto& v : vals0) {
+        str_col0->insert_data(v.data(), v.size());
+    }
+    block.insert({std::move(str_col0), str_type, "test_str_col0"});
+
+    auto str_col1 = ColumnString::create();
+    std::vector<std::string> vals1 = {"3", "2", "4"};
+    for (auto& v : vals1) {
+        str_col1->insert_data(v.data(), v.size());
+    }
+    block.insert({std::move(str_col1), str_type, "test_str_col1"});
+
+    auto str_col_res = ColumnString::create();
+    block.insert({std::move(str_col_res), str_type, "test_str_res"});
+
+    ColumnNumbers arguments = {0, 1};
+
+    FunctionStringConcat func_concat;
+    auto status = func_concat.execute_impl(nullptr, block, arguments, 2, 3);
+    EXPECT_TRUE(status.ok());
+
+    auto actual_res_col = block.get_by_position(2).column;
+    EXPECT_EQ(actual_res_col->size(), 3);
+    auto actual_res_col_str = assert_cast<const 
ColumnString*>(actual_res_col.get());
+    actual_res_col_str->sanity_check();
+}
+} // namespace doris::vectorized
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to