This is an automated email from the ASF dual-hosted git repository. lihaopeng pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-doris.git
The following commit(s) were added to refs/heads/master by this push: new 13c1d20426 [Bug] [Vectorized] add padding when load char type data (#9734) 13c1d20426 is described below commit 13c1d204266abb3e817ee4c43bda57153b3b54e4 Author: Pxl <pxl...@qq.com> AuthorDate: Thu May 26 16:51:01 2022 +0800 [Bug] [Vectorized] add padding when load char type data (#9734) --- be/CMakeLists.txt | 2 +- be/src/olap/memtable.cpp | 2 +- be/src/vec/columns/column_string.h | 1 + be/src/vec/olap/olap_data_convertor.cpp | 6 +++ be/src/vec/olap/olap_data_convertor.h | 31 +++++++++++++ be/test/CMakeLists.txt | 1 + be/test/vec/exec/vtablet_sink_test.cpp | 2 +- be/test/vec/olap/char_type_padding_test.cpp | 68 +++++++++++++++++++++++++++++ 8 files changed, 110 insertions(+), 3 deletions(-) diff --git a/be/CMakeLists.txt b/be/CMakeLists.txt index 7c53258326..6079cfa5fc 100644 --- a/be/CMakeLists.txt +++ b/be/CMakeLists.txt @@ -396,7 +396,7 @@ set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -DBOOST_SYSTEM_NO_DEPRECATED") set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -DBRPC_ENABLE_CPU_PROFILER") if (USE_LLD) - set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -fuse-ld=lld") + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fuse-ld=lld") endif () if (USE_LIBCPP AND COMPILER_CLANG) diff --git a/be/src/olap/memtable.cpp b/be/src/olap/memtable.cpp index cf58f7e4ba..77b0313004 100644 --- a/be/src/olap/memtable.cpp +++ b/be/src/olap/memtable.cpp @@ -282,7 +282,7 @@ void MemTable::_collect_vskiplist_results() { _input_mutable_block.swap(_output_mutable_block); //TODO(weixang):opt here. std::unique_ptr<vectorized::Block> empty_input_block = - std::move(in_block.create_same_struct_block(0)); + in_block.create_same_struct_block(0); _output_mutable_block = vectorized::MutableBlock::build_mutable_block(empty_input_block.get()); _output_mutable_block.clear_column_data(); diff --git a/be/src/vec/columns/column_string.h b/be/src/vec/columns/column_string.h index 47cb68c4fa..421255a77b 100644 --- a/be/src/vec/columns/column_string.h +++ b/be/src/vec/columns/column_string.h @@ -45,6 +45,7 @@ public: private: friend class COWHelper<IColumn, ColumnString>; + friend class OlapBlockDataConvertor; /// Maps i'th position to offset to i+1'th element. Last offset maps to the end of all chars (is the size of all chars). Offsets offsets; diff --git a/be/src/vec/olap/olap_data_convertor.cpp b/be/src/vec/olap/olap_data_convertor.cpp index 3c03cf5111..d1910919b5 100644 --- a/be/src/vec/olap/olap_data_convertor.cpp +++ b/be/src/vec/olap/olap_data_convertor.cpp @@ -358,6 +358,12 @@ Status OlapBlockDataConvertor::OlapColumnDataConvertorChar::convert_to_olap() { assert(column_string); + // If column_string is not padded to full, we should do padding here. + if (should_padding(column_string, _length)) { + _column = clone_and_padding(column_string, _length); + column_string = assert_cast<const vectorized::ColumnString*>(_column.get()); + } + const ColumnString::Char* char_data = column_string->get_chars().data(); const ColumnString::Offset* offset_cur = column_string->get_offsets().data() + _row_pos; const ColumnString::Offset* offset_end = offset_cur + _num_rows; diff --git a/be/src/vec/olap/olap_data_convertor.h b/be/src/vec/olap/olap_data_convertor.h index eb104b1414..d0f5d01525 100644 --- a/be/src/vec/olap/olap_data_convertor.h +++ b/be/src/vec/olap/olap_data_convertor.h @@ -17,6 +17,9 @@ #pragma once #include "olap/tablet_schema.h" +#include "vec/columns/column.h" +#include "vec/columns/column_string.h" +#include "vec/common/string_ref.h" #include "vec/core/block.h" namespace doris::vectorized { @@ -99,8 +102,36 @@ private: Status convert_to_olap() override; private: + static bool should_padding(const ColumnString* column, size_t padding_length) { + // Check sum of data length, including terminating zero. + return column->size() * (padding_length + 1) != column->chars.size(); + } + + static ColumnPtr clone_and_padding(const ColumnString* input, size_t padding_length) { + auto column = vectorized::ColumnString::create(); + auto padded_column = + assert_cast<vectorized::ColumnString*>(column->assume_mutable().get()); + + column->offsets.resize(input->size()); + column->chars.resize(input->size() * (padding_length + 1)); + memset(padded_column->chars.data(), 0, input->size() * (padding_length + 1)); + + for (size_t i = 0; i < input->size(); i++) { + column->offsets[i] = (i + 1) * (padding_length + 1); + + auto str = input->get_data_at(i); + if (str.size) { + memcpy(padded_column->chars.data() + i * (padding_length + 1), str.data, + str.size); + } + } + + return column; + } + size_t _length; PaddedPODArray<Slice> _slice; + ColumnPtr _column = nullptr; }; class OlapColumnDataConvertorVarChar : public OlapColumnDataConvertorBase { diff --git a/be/test/CMakeLists.txt b/be/test/CMakeLists.txt index 810fbd5279..20052e823e 100644 --- a/be/test/CMakeLists.txt +++ b/be/test/CMakeLists.txt @@ -357,6 +357,7 @@ set(VEC_TEST_FILES vec/function/table_function_test.cpp vec/runtime/vdata_stream_test.cpp vec/utils/arrow_column_to_doris_column_test.cpp + vec/olap/char_type_padding_test.cpp ) add_executable(doris_be_test diff --git a/be/test/vec/exec/vtablet_sink_test.cpp b/be/test/vec/exec/vtablet_sink_test.cpp index d8ab7da2c3..67ae97128d 100644 --- a/be/test/vec/exec/vtablet_sink_test.cpp +++ b/be/test/vec/exec/vtablet_sink_test.cpp @@ -115,7 +115,7 @@ public: void tablet_writer_add_block(google::protobuf::RpcController* controller, const PTabletWriterAddBlockRequest* request, PTabletWriterAddBlockResult* response, - google::protobuf::Closure* done) { + google::protobuf::Closure* done) override { brpc::ClosureGuard done_guard(done); { std::lock_guard<std::mutex> l(_lock); diff --git a/be/test/vec/olap/char_type_padding_test.cpp b/be/test/vec/olap/char_type_padding_test.cpp new file mode 100644 index 0000000000..fa7eaffc31 --- /dev/null +++ b/be/test/vec/olap/char_type_padding_test.cpp @@ -0,0 +1,68 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include <gtest/gtest.h> + +#include "vec/columns/column_string.h" +#include "vec/olap/olap_data_convertor.h" + +namespace doris::vectorized { + +using ConvertorChar = OlapBlockDataConvertor::OlapColumnDataConvertorChar; + +TEST(CharTypePaddingTest, CharTypePaddingFullTest) { + auto input = ColumnString::create(); + + std::string str = "Allemande"; + size_t rows = 10; + + for (size_t i = 0; i < rows; i++) { + input->insert_data(str.data(), str.length()); + } + EXPECT_FALSE(ConvertorChar::should_padding(input, str.length())); + + input->insert_data(str.data(), str.length() - 1); + EXPECT_TRUE(ConvertorChar::should_padding(input, str.length())); +} + +TEST(CharTypePaddingTest, CharTypePaddingDataTest) { + auto input = ColumnString::create(); + + std::string str = "Allemande"; + + size_t rows = str.length(); + for (int i = 0; i < rows; i++) { + input->insert_data(str.data(), str.length() - i); + } + + auto output = ConvertorChar::clone_and_padding(input, str.length()); + + for (int i = 0; i < rows; i++) { + auto cell = output->get_data_at(i).to_string(); + EXPECT_EQ(cell.length(), str.length()); + + auto str_real = std::string(cell.data(), str.length() - i); + auto str_expect = str.substr(0, str.length() - i); + EXPECT_EQ(str_real, str_expect); + + for (int j = str.length() - i; j < str.length(); j++) { + EXPECT_EQ(cell[j], 0); + } + } +} + +} // namespace doris::vectorized --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org