This is an automated email from the ASF dual-hosted git repository. zhangstar333 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 77bec724630 [BE](ut) add UT about bitmap/hll/quantile_state datatype (#47845) 77bec724630 is described below commit 77bec72463044c371e044e1ad5b5b5166650f85d Author: zhangstar333 <zhangs...@selectdb.com> AuthorDate: Wed Feb 19 09:48:10 2025 +0800 [BE](ut) add UT about bitmap/hll/quantile_state datatype (#47845) ### What problem does this PR solve? Problem Summary: add some ut test about bitmap/hll/quantile_state/agg_state/fixed_length_object datatype --- be/src/vec/data_types/data_type.h | 6 + be/test/vec/data_types/common_data_type_test.h | 17 +- .../vec/data_types/data_type_agg_state_test.cpp | 257 +++++++++++++++++++++ be/test/vec/data_types/data_type_array_test.cpp | 22 +- be/test/vec/data_types/data_type_bitmap_test.cpp | 218 +++++++++++++++++ .../data_type_fixed_length_object_test.cpp | 153 ++++++++++++ be/test/vec/data_types/data_type_hll_test.cpp | 216 +++++++++++++++++ be/test/vec/data_types/data_type_ip_test.cpp | 4 +- .../data_types/data_type_quantile_state_test.cpp | 198 ++++++++++++++++ 9 files changed, 1073 insertions(+), 18 deletions(-) diff --git a/be/src/vec/data_types/data_type.h b/be/src/vec/data_types/data_type.h index 7f1ee0cd850..3cc32156d20 100644 --- a/be/src/vec/data_types/data_type.h +++ b/be/src/vec/data_types/data_type.h @@ -272,6 +272,7 @@ struct WhichDataType { bool is_struct() const { return idx == TypeIndex::Struct; } bool is_map() const { return idx == TypeIndex::Map; } bool is_set() const { return idx == TypeIndex::Set; } + bool is_fixed_length_object() const { return idx == TypeIndex::FixedLengthObject; } bool is_nothing() const { return idx == TypeIndex::Nothing; } bool is_nullable() const { return idx == TypeIndex::Nullable; } @@ -371,6 +372,11 @@ bool is_string_or_fixed_string(const T& data_type) { return WhichDataType(data_type).is_string_or_fixed_string(); } +template <typename T> +bool is_fixed_length_object(const T& data_type) { + return WhichDataType(data_type).is_fixed_length_object(); +} + inline bool is_not_decimal_but_comparable_to_decimal(const DataTypePtr& data_type) { WhichDataType which(data_type); return which.is_int() || which.is_uint(); diff --git a/be/test/vec/data_types/common_data_type_test.h b/be/test/vec/data_types/common_data_type_test.h index 643f8669fbe..0b85e3977c2 100644 --- a/be/test/vec/data_types/common_data_type_test.h +++ b/be/test/vec/data_types/common_data_type_test.h @@ -22,6 +22,8 @@ #include <fstream> #include <iostream> +#include "agent/be_exec_version_manager.h" +#include "olap/schema.h" #include "vec/columns/column.h" #include "vec/core/field.h" #include "vec/core/types.h" @@ -52,6 +54,10 @@ namespace doris::vectorized { static bool gen_check_data_in_assert = true; class CommonDataTypeTest : public ::testing::Test { +public: + CommonDataTypeTest() = default; + void TestBody() override {} + protected: // Helper function to load data from CSV, with index which splited by spliter and load to columns void load_data_from_csv(const DataTypeSerDeSPtrs serders, MutableColumns& columns, @@ -164,7 +170,8 @@ public: ASSERT_EQ(const_col->operator[](i), default_const_col->operator[](i)); } // get_uncompressed_serialized_bytes - ASSERT_EQ(data_type->get_uncompressed_serialized_bytes(*column, 0), + ASSERT_EQ(data_type->get_uncompressed_serialized_bytes( + *column, BeExecVersionManager::get_newest_version()), uncompressed_serialized_bytes); } @@ -249,7 +256,7 @@ public: } // nt be_exec_version, PBlock* pblock, size_t* uncompressed_bytes, // size_t* compressed_bytes, segment_v2::CompressionTypePB compression_type, - size_t be_exec_version = 2; + size_t be_exec_version = BeExecVersionManager::get_newest_version(); auto pblock = std::make_unique<PBlock>(); size_t uncompressed_bytes = 0; size_t compressed_bytes = 0; @@ -262,9 +269,9 @@ public: st = block_1->deserialize(*pblock); ASSERT_EQ(st.ok(), true); // check block_1 and block is same - for (int i = 0; i < block->rows(); ++i) { - auto& col = block->get_by_position(i); - auto& col_1 = block_1->get_by_position(i); + for (auto col_idx = 0; col_idx < block->columns(); ++col_idx) { + auto& col = block->get_by_position(col_idx); + auto& col_1 = block_1->get_by_position(col_idx); ASSERT_EQ(col.column->size(), col_1.column->size()); for (int j = 0; j < col.column->size(); ++j) { ASSERT_EQ(col.column->operator[](j), col_1.column->operator[](j)); diff --git a/be/test/vec/data_types/data_type_agg_state_test.cpp b/be/test/vec/data_types/data_type_agg_state_test.cpp new file mode 100644 index 00000000000..192573de4f3 --- /dev/null +++ b/be/test/vec/data_types/data_type_agg_state_test.cpp @@ -0,0 +1,257 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "vec/data_types/data_type_agg_state.h" + +#include <gtest/gtest-message.h> +#include <gtest/gtest-test-part.h> +#include <gtest/gtest.h> + +#include <iostream> +#include <memory> + +#include "agent/be_exec_version_manager.h" +#include "vec/columns/column.h" +#include "vec/columns/column_fixed_length_object.h" +#include "vec/columns/columns_number.h" +#include "vec/common/assert_cast.h" +#include "vec/common/schema_util.h" +#include "vec/core/field.h" +#include "vec/core/types.h" +#include "vec/data_types/common_data_type_serder_test.h" +#include "vec/data_types/common_data_type_test.h" +#include "vec/data_types/data_type.h" +#include "vec/data_types/data_type_factory.hpp" +#include "vec/data_types/data_type_nullable.h" + +// 1. datatype meta info: +// get_type_id, get_type_as_type_descriptor, get_storage_field_type, have_subtypes, get_pdata_type (const IDataType *data_type), to_pb_column_meta (PColumnMeta *col_meta) +// get_family_name, get_is_parametric, should_align_right_in_pretty_formats +// text_can_contain_only_valid_utf8 +// have_maximum_size_of_value, get_maximum_size_of_value_in_memory, get_size_of_value_in_memory +// get_precision, get_scale +// is_null_literal, is_value_represented_by_number, is_value_unambiguously_represented_in_contiguous_memory_region +// 2. datatype creation with column : create_column, create_column_const (size_t size, const Field &field), create_column_const_with_default_value (size_t size), get_uncompressed_serialized_bytes (const IColumn &column, int be_exec_version) +// 3. serde related: get_serde (int nesting_level=1) +// to_string (const IColumn &column, size_t row_num, BufferWritable &ostr), to_string (const IColumn &column, size_t row_num), to_string_batch (const IColumn &column, ColumnString &column_to), from_string (ReadBuffer &rb, IColumn *column) +// 4. serialize/serialize_as_stream/deserialize/deserialize_as_stream +// serialize (const IColumn &column, char *buf, int be_exec_version), deserialize (const char *buf, MutableColumnPtr *column, int be_exec_version) + +namespace doris::vectorized { + +class DataTypeAggStateTest : public ::testing::TestWithParam<int> { +public: + void SetUp() override { + rows_value = GetParam(); + helper = std::make_unique<CommonDataTypeTest>(); + } + std::unique_ptr<CommonDataTypeTest> helper; + DataTypePtr sub_type = std::make_shared<DataTypeInt32>(); + DataTypes sub_types = {sub_type}; + // DataTypeAggState---> column_fixed_length_object + DataTypePtr datatype_agg_state_count = std::make_shared<DataTypeAggState>( + sub_types, false, "count", BeExecVersionManager::get_newest_version()); + // DataTypeAggState---> column_string + DataTypePtr datatype_agg_state_hll_union = std::make_shared<DataTypeAggState>( + sub_types, false, "hll_union", BeExecVersionManager::get_newest_version()); + int rows_value; +}; + +TEST_P(DataTypeAggStateTest, MetaInfoTest) { + TypeDescriptor agg_state_type_descriptor = {PrimitiveType::TYPE_AGG_STATE}; + auto col_meta = std::make_shared<PColumnMeta>(); + col_meta->set_type(PGenericType_TypeId_AGG_STATE); + CommonDataTypeTest::DataTypeMetaInfo agg_state_meta_info_to_assert = { + .type_id = TypeIndex::AggState, + .type_as_type_descriptor = &agg_state_type_descriptor, + .family_name = "AggState", + .has_subtypes = false, + .storage_field_type = doris::FieldType::OLAP_FIELD_TYPE_AGG_STATE, + .should_align_right_in_pretty_formats = false, + .text_can_contain_only_valid_utf8 = false, + .have_maximum_size_of_value = false, + .size_of_value_in_memory = size_t(-1), + .precision = size_t(-1), + .scale = size_t(-1), + .is_null_literal = false, + .is_value_represented_by_number = false, + .pColumnMeta = col_meta.get(), + .is_value_unambiguously_represented_in_contiguous_memory_region = true, + .default_field = Field(String()), + }; + helper->meta_info_assert(datatype_agg_state_count, agg_state_meta_info_to_assert); +} + +TEST_P(DataTypeAggStateTest, CreateColumnTest) { + Field default_field = Field(String()); + std::cout << "create_column_assert: " << datatype_agg_state_count->get_name() << std::endl; + auto column = (datatype_agg_state_count)->create_column(); + ASSERT_EQ(column->size(), 0); + column->insert_default(); + auto fixed_length_column = ColumnFixedLengthObject::create(8); + fixed_length_column->insert(default_field); + ASSERT_EQ(fixed_length_column->size(), 1); + + for (int i = 0; i < 1; ++i) { + ASSERT_EQ(fixed_length_column->operator[](i), column->operator[](i)); + } + // get_uncompressed_serialized_bytes + ASSERT_EQ(datatype_agg_state_count->get_uncompressed_serialized_bytes( + *column, BeExecVersionManager::get_newest_version()), + 25); +} + +void insert_data_agg_state(MutableColumns* agg_state_cols, DataTypePtr datatype_agg_state, + int rows_value, std::vector<std::string>* data_strs = nullptr) { + auto column_fixed = datatype_agg_state->create_column(); + agg_state_cols->push_back(column_fixed->get_ptr()); + std::cout << "insert_data_agg_state: " << datatype_agg_state->get_name() << " " + << column_fixed->get_name() << std::endl; + if (column_fixed->is_column_string()) { + ASSERT_TRUE(is_string(assert_cast<const DataTypeAggState*>(datatype_agg_state.get()) + ->get_serialized_type())); + auto* column = assert_cast<ColumnString*>((*agg_state_cols)[0].get()); + for (size_t i = 0; i != rows_value; ++i) { + auto val = std::to_string(i); + column->insert_data(val.c_str(), val.size()); + if (data_strs) { + data_strs->push_back(val); + } + // std::cout<<"insert_data_agg_state: "<<val<<" "<<val.size()<<" "<<column->get_data_at(i).to_string()<<std::endl; + } + } else { + assert_cast<ColumnFixedLengthObject*>((*agg_state_cols)[0].get())->set_item_size(8); + column_fixed->resize(rows_value); + ASSERT_TRUE(is_fixed_length_object( + assert_cast<const DataTypeAggState*>(datatype_agg_state.get()) + ->get_serialized_type())); + auto& data = assert_cast<ColumnFixedLengthObject*>((*agg_state_cols)[0].get())->get_data(); + for (size_t i = 0; i != rows_value; ++i) { + data[i] = i; + } + } + std::cout << "finish insert data" << std::endl; +} + +// // not support function: get_filed + +// test to_string | to_string_batch | from_string +TEST_P(DataTypeAggStateTest, FromAndToStringTest) { + MutableColumns agg_state_cols; + std::vector<std::string> data_strs; + insert_data_agg_state(&agg_state_cols, datatype_agg_state_hll_union, rows_value, &data_strs); + + { + // to_string_batch | from_string + auto col_to = ColumnString::create(); + datatype_agg_state_hll_union->to_string_batch(*agg_state_cols[0]->get_ptr(), *col_to); + ASSERT_EQ(col_to->size(), agg_state_cols[0]->get_ptr()->size()); + // from_string assert col_to to assert_column and check same with mutableColumn + auto assert_column = datatype_agg_state_hll_union->create_column(); + for (int i = 0; i < col_to->size(); ++i) { + std::string s = col_to->get_data_at(i).to_string(); + std::cout << "s: " << s << std::endl; + ReadBuffer rb(s.data(), s.size()); + ASSERT_EQ(Status::OK(), + datatype_agg_state_hll_union->from_string(rb, assert_column.get())); + ASSERT_EQ(assert_column->operator[](i), agg_state_cols[0]->get_ptr()->operator[](i)) + << "i: " << i << " s: " << s + << " datatype: " << datatype_agg_state_hll_union->get_name() + << " assert_column: " << assert_column->get_name() + << " mutableColumn:" << agg_state_cols[0]->get_ptr()->get_name() << std::endl; + } + std::cout << "finish to_string_batch | from_string test" << std::endl; + } + + { + // to_string | from_string + auto ser_col = ColumnString::create(); + ser_col->reserve(agg_state_cols[0]->get_ptr()->size()); + VectorBufferWriter buffer_writer(*ser_col.get()); + for (int i = 0; i < agg_state_cols[0]->get_ptr()->size(); ++i) { + datatype_agg_state_hll_union->to_string(*agg_state_cols[0]->get_ptr(), i, + buffer_writer); + std::string res = + datatype_agg_state_hll_union->to_string(*agg_state_cols[0]->get_ptr(), i); + buffer_writer.commit(); + EXPECT_EQ(data_strs[i], ser_col->get_data_at(i).to_string()); + } + // check ser_col to assert_column and check same with mutableColumn + auto assert_column_1 = datatype_agg_state_hll_union->create_column(); + for (int i = 0; i < ser_col->size(); ++i) { + std::string s = ser_col->get_data_at(i).to_string(); + ReadBuffer rb(s.data(), s.size()); + ASSERT_EQ(Status::OK(), + datatype_agg_state_hll_union->from_string(rb, assert_column_1.get())); + auto aaa = assert_column_1->operator[](i); + ASSERT_EQ(assert_column_1->operator[](i), agg_state_cols[0]->get_ptr()->operator[](i)); + } + std::cout << "finish to_string | from_string test" << std::endl; + } +} + +// // serialize / deserialize +TEST_P(DataTypeAggStateTest, SerializeDeserializeTest) { + MutableColumns agg_state_cols; + insert_data_agg_state(&agg_state_cols, datatype_agg_state_hll_union, rows_value); + + auto* column = assert_cast<ColumnString*>(agg_state_cols[0].get()); + auto size = datatype_agg_state_hll_union->get_uncompressed_serialized_bytes( + *column, BeExecVersionManager::get_newest_version()); + std::unique_ptr<char[]> buf = std::make_unique<char[]>(size); + auto* result = datatype_agg_state_hll_union->serialize( + *column, buf.get(), BeExecVersionManager::get_newest_version()); + ASSERT_EQ(result, buf.get() + size); + + auto column2 = datatype_agg_state_hll_union->create_column(); + datatype_agg_state_hll_union->deserialize(buf.get(), &column2, + BeExecVersionManager::get_newest_version()); + for (size_t i = 0; i != rows_value; ++i) { + auto* column_res = assert_cast<ColumnString*>(column2.get()); + ASSERT_EQ(column->get_data_at(i).to_string(), column_res->get_data_at(i).to_string()); + } + helper->serialize_deserialize_assert(agg_state_cols, {datatype_agg_state_hll_union}); + std::cout << "finish serialize deserialize test" << std::endl; +} + +// // serialize / deserialize +TEST_P(DataTypeAggStateTest, SerializeDeserializeTest2) { + MutableColumns agg_state_cols; + insert_data_agg_state(&agg_state_cols, datatype_agg_state_count, rows_value); + + auto* column = assert_cast<ColumnFixedLengthObject*>(agg_state_cols[0].get()); + auto size = datatype_agg_state_count->get_uncompressed_serialized_bytes( + *column, BeExecVersionManager::get_newest_version()); + std::unique_ptr<char[]> buf = std::make_unique<char[]>(size); + auto* result = datatype_agg_state_count->serialize(*column, buf.get(), + BeExecVersionManager::get_newest_version()); + ASSERT_EQ(result, buf.get() + size); + + auto column2 = datatype_agg_state_count->create_column(); + datatype_agg_state_count->deserialize(buf.get(), &column2, + BeExecVersionManager::get_newest_version()); + for (size_t i = 0; i != rows_value; ++i) { + auto* column_res = assert_cast<ColumnFixedLengthObject*>(column2.get()); + ASSERT_EQ(column->get_data_at(i).to_string(), column_res->get_data_at(i).to_string()); + } + helper->serialize_deserialize_assert(agg_state_cols, {datatype_agg_state_count}); + std::cout << "finish serialize deserialize test2" << std::endl; +} + +INSTANTIATE_TEST_SUITE_P(Params, DataTypeAggStateTest, ::testing::Values(0, 1, 31)); + +} // namespace doris::vectorized \ No newline at end of file diff --git a/be/test/vec/data_types/data_type_array_test.cpp b/be/test/vec/data_types/data_type_array_test.cpp index d50ae0be26b..1c0ee6f97ef 100644 --- a/be/test/vec/data_types/data_type_array_test.cpp +++ b/be/test/vec/data_types/data_type_array_test.cpp @@ -363,59 +363,59 @@ TEST_F(DataTypeArrayTest, CreateColumnTest) { auto type = remove_nullable(array_types[i]); // any different nested type in arr with same default array ? Field default_field_array = Array(); - create_column_assert(type, default_field_array, 16); + create_column_assert(type, default_field_array, 51); // 17 * 3 } { auto type = remove_nullable(array_types[13]); Field default_field_array = Array(); - create_column_assert(type, default_field_array, 24); + create_column_assert(type, default_field_array, 59); // add addtional sizeof(8) } // for decimal32/64/128/256 here uncompressed size is 16 // one scalar type for (int i = 14; i < 18; i++) { auto type = remove_nullable(array_types[i]); Field default_field_array = Array(); - create_column_assert(type, default_field_array, 16); + create_column_assert(type, default_field_array, 51); } // for array-array-scala for (int i = 18; i < 31; i++) { auto type = remove_nullable(array_types[i]); Field default_field_array = Array(); - create_column_assert(type, default_field_array, 28); + create_column_assert(type, default_field_array, 85); // 17 * 5 } { // string type auto type = remove_nullable(array_types[31]); Field default_field_array = Array(); - create_column_assert(type, default_field_array, 36); + create_column_assert(type, default_field_array, 93); // add addtional sizeof(8) } for (int i = 32; i < 36; i++) { auto type = remove_nullable(array_types[i]); Field default_field_array = Array(); - create_column_assert(type, default_field_array, 28); + create_column_assert(type, default_field_array, 85); // 17 * 5 } // for array-map { auto type = remove_nullable(array_types[36]); Field default_field_array = Array(); - create_column_assert(type, default_field_array, 44); + create_column_assert(type, default_field_array, 127); // 17 * 7 + 8 add addtional sizeof(8) type = remove_nullable(array_types[39]); default_field_array = Array(); - create_column_assert(type, default_field_array, 44); + create_column_assert(type, default_field_array, 127); } { auto type = remove_nullable(array_types[37]); Field default_field_array = Array(); - create_column_assert(type, default_field_array, 36); + create_column_assert(type, default_field_array, 119); type = remove_nullable(array_types[38]); default_field_array = Array(); - create_column_assert(type, default_field_array, 36); + create_column_assert(type, default_field_array, 119); // 17 * 7 } // for array-struct { auto type = remove_nullable(array_types[40]); Field default_field_array = Array(); - create_column_assert(type, default_field_array, 76); + create_column_assert(type, default_field_array, 297); // 17 * 17 } } diff --git a/be/test/vec/data_types/data_type_bitmap_test.cpp b/be/test/vec/data_types/data_type_bitmap_test.cpp new file mode 100644 index 00000000000..58291f06a79 --- /dev/null +++ b/be/test/vec/data_types/data_type_bitmap_test.cpp @@ -0,0 +1,218 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "vec/data_types/data_type_bitmap.h" + +#include <gtest/gtest-message.h> +#include <gtest/gtest-test-part.h> +#include <gtest/gtest.h> + +#include <iostream> + +#include "agent/be_exec_version_manager.h" +#include "util/bitmap_value.h" +#include "vec/columns/column.h" +#include "vec/common/assert_cast.h" +#include "vec/core/field.h" +#include "vec/core/types.h" +#include "vec/data_types/common_data_type_serder_test.h" +#include "vec/data_types/common_data_type_test.h" +#include "vec/data_types/data_type.h" +#include "vec/data_types/data_type_factory.hpp" +#include "vec/data_types/data_type_nullable.h" + +// 1. datatype meta info: +// get_type_id, get_type_as_type_descriptor, get_storage_field_type, have_subtypes, get_pdata_type (const IDataType *data_type), to_pb_column_meta (PColumnMeta *col_meta) +// get_family_name, get_is_parametric, should_align_right_in_pretty_formats +// text_can_contain_only_valid_utf8 +// have_maximum_size_of_value, get_maximum_size_of_value_in_memory, get_size_of_value_in_memory +// get_precision, get_scale +// is_null_literal, is_value_represented_by_number, is_value_unambiguously_represented_in_contiguous_memory_region +// 2. datatype creation with column : create_column, create_column_const (size_t size, const Field &field), create_column_const_with_default_value (size_t size), get_uncompressed_serialized_bytes (const IColumn &column, int be_exec_version) +// 3. serde related: get_serde (int nesting_level=1) +// to_string (const IColumn &column, size_t row_num, BufferWritable &ostr), to_string (const IColumn &column, size_t row_num), to_string_batch (const IColumn &column, ColumnString &column_to), from_string (ReadBuffer &rb, IColumn *column) +// 4. serialize/serialize_as_stream/deserialize/deserialize_as_stream +// serialize (const IColumn &column, char *buf, int be_exec_version), deserialize (const char *buf, MutableColumnPtr *column, int be_exec_version) + +namespace doris::vectorized { + +class DataTypeBitMapTest : public ::testing::TestWithParam<int> { +public: + void SetUp() override { + rows_value = GetParam(); + helper = std::make_unique<CommonDataTypeTest>(); + } + std::unique_ptr<CommonDataTypeTest> helper; + DataTypePtr dt_bitmap = + DataTypeFactory::instance().create_data_type(FieldType::OLAP_FIELD_TYPE_OBJECT, 0, 0); + int rows_value; +}; + +TEST_P(DataTypeBitMapTest, MetaInfoTest) { + TypeDescriptor bitmap_type_descriptor = {PrimitiveType::TYPE_OBJECT}; + auto col_meta = std::make_shared<PColumnMeta>(); + col_meta->set_type(PGenericType_TypeId_BITMAP); + CommonDataTypeTest::DataTypeMetaInfo bitmap_meta_info_to_assert = { + .type_id = TypeIndex::BitMap, + .type_as_type_descriptor = &bitmap_type_descriptor, + .family_name = "BitMap", + .has_subtypes = false, + .storage_field_type = doris::FieldType::OLAP_FIELD_TYPE_OBJECT, + .should_align_right_in_pretty_formats = false, + .text_can_contain_only_valid_utf8 = true, + .have_maximum_size_of_value = false, + .size_of_value_in_memory = size_t(-1), + .precision = size_t(-1), + .scale = size_t(-1), + .is_null_literal = false, + .is_value_represented_by_number = false, + .pColumnMeta = col_meta.get(), + .is_value_unambiguously_represented_in_contiguous_memory_region = true, + .default_field = BitmapValue::empty_bitmap(), + }; + helper->meta_info_assert(dt_bitmap, bitmap_meta_info_to_assert); +} + +TEST_P(DataTypeBitMapTest, CreateColumnTest) { + Field default_field_bitmap = BitmapValue::empty_bitmap(); + helper->create_column_assert(dt_bitmap, default_field_bitmap, 17); +} + +void insert_data_bitmap(MutableColumns* bitmap_cols, DataTypePtr dt_bitmap, int rows_value, + std::vector<std::string>* data_strs = nullptr) { + auto serde_bitmap = dt_bitmap->get_serde(1); + auto column_bitmap = dt_bitmap->create_column(); + + bitmap_cols->push_back(column_bitmap->get_ptr()); + DataTypeSerDeSPtrs serde = {dt_bitmap->get_serde()}; + auto& data = assert_cast<ColumnBitmap*>((*bitmap_cols)[0].get())->get_data(); + for (size_t i = 0; i != rows_value; ++i) { + BitmapValue bitmap_value; + for (size_t j = 0; j <= i; ++j) { + bitmap_value.add(j); + } + if (data_strs) { + data_strs->push_back(bitmap_value.to_string()); + } + std::string memory_buffer(bitmap_value.getSizeInBytes(), '0'); + bitmap_value.write_to(memory_buffer.data()); + data.emplace_back(std::move(bitmap_value)); + } + std::cout << "finish insert data" << std::endl; +} + +// not support function: get_filed + +// test to_string | to_string_batch | from_string +TEST_P(DataTypeBitMapTest, FromAndToStringTest) { + MutableColumns bitmap_cols; + std::vector<std::string> data_strs; + insert_data_bitmap(&bitmap_cols, dt_bitmap, rows_value, &data_strs); + + { + // to_string_batch | from_string + auto col_to = ColumnString::create(); + dt_bitmap->to_string_batch(*bitmap_cols[0]->get_ptr(), *col_to); + ASSERT_EQ(col_to->size(), bitmap_cols[0]->get_ptr()->size()); + // from_string assert col_to to assert_column and check same with mutableColumn + auto assert_column = dt_bitmap->create_column(); + for (int i = 0; i < col_to->size(); ++i) { + std::string s = col_to->get_data_at(i).to_string(); + ReadBuffer rb(s.data(), s.size()); + ASSERT_EQ(Status::OK(), dt_bitmap->from_string(rb, assert_column.get())); + ASSERT_EQ(assert_column->operator[](i), bitmap_cols[0]->get_ptr()->operator[](i)) + << "i: " << i << " s: " << s << " datatype: " << dt_bitmap->get_name() + << " assert_column: " << assert_column->get_name() + << " mutableColumn:" << bitmap_cols[0]->get_ptr()->get_name() << std::endl; + } + std::cout << "finish to_string_batch | from_string test" << std::endl; + } + + { + // to_string | from_string + auto ser_col = ColumnString::create(); + ser_col->reserve(bitmap_cols[0]->get_ptr()->size()); + VectorBufferWriter buffer_writer(*ser_col.get()); + for (int i = 0; i < bitmap_cols[0]->get_ptr()->size(); ++i) { + dt_bitmap->to_string(*bitmap_cols[0]->get_ptr(), i, buffer_writer); + std::string res = dt_bitmap->to_string(*bitmap_cols[0]->get_ptr(), i); + buffer_writer.commit(); + EXPECT_EQ(res, data_strs[i]); + } + // check ser_col to assert_column and check same with mutableColumn + auto assert_column_1 = dt_bitmap->create_column(); + for (int i = 0; i < ser_col->size(); ++i) { + std::string s = ser_col->get_data_at(i).to_string(); + ReadBuffer rb(s.data(), s.size()); + ASSERT_EQ(Status::OK(), dt_bitmap->from_string(rb, assert_column_1.get())); + auto aaa = assert_column_1->operator[](i); + ASSERT_EQ(assert_column_1->operator[](i), bitmap_cols[0]->get_ptr()->operator[](i)); + } + std::cout << "finish to_string | from_string test" << std::endl; + } +} + +// serialize / deserialize +TEST_P(DataTypeBitMapTest, SerializeDeserializeTest) { + MutableColumns bitmap_cols; + insert_data_bitmap(&bitmap_cols, dt_bitmap, rows_value); + + auto* column = assert_cast<ColumnBitmap*>(bitmap_cols[0].get()); + auto size = dt_bitmap->get_uncompressed_serialized_bytes( + *column, BeExecVersionManager::get_newest_version()); + std::unique_ptr<char[]> buf = std::make_unique<char[]>(size); + auto* result = + dt_bitmap->serialize(*column, buf.get(), BeExecVersionManager::get_newest_version()); + ASSERT_EQ(result, buf.get() + size); + + auto column2 = dt_bitmap->create_column(); + dt_bitmap->deserialize(buf.get(), &column2, BeExecVersionManager::get_newest_version()); + for (size_t i = 0; i != rows_value; ++i) { + auto* column_res = assert_cast<ColumnBitmap*>(column2.get()); + ASSERT_EQ(column->get_data()[i].to_string(), column_res->get_data()[i].to_string()); + } + helper->serialize_deserialize_assert(bitmap_cols, {dt_bitmap}); + std::cout << "finish serialize deserialize test" << std::endl; +} + +// serialize / deserialize as stream +TEST_P(DataTypeBitMapTest, SerializeDeserializeAsStreamTest) { + MutableColumns bitmap_cols; + insert_data_bitmap(&bitmap_cols, dt_bitmap, rows_value); + + auto ser_col = ColumnString::create(); + VectorBufferWriter buffer_writer(*ser_col.get()); + auto* column_data = assert_cast<ColumnBitmap*>(bitmap_cols[0].get()); + auto c = dt_bitmap->create_column(); + auto* column_res = assert_cast<ColumnBitmap*>(c.get()); + column_res->resize(rows_value); + for (size_t i = 0; i != rows_value; ++i) { + doris::vectorized::DataTypeBitMap::serialize_as_stream(column_data->get_element(i), + buffer_writer); + buffer_writer.commit(); + BufferReadable buffer_readable(ser_col->get_data_at(i)); + doris::vectorized::DataTypeBitMap::deserialize_as_stream(column_res->get_element(i), + buffer_readable); + ASSERT_EQ(column_data->get_data()[i].to_string(), column_res->get_data()[i].to_string()); + } + std::cout << "finish serialize deserialize as stream test" << std::endl; +} +// sh run-be-ut.sh --run --filter=Params/DataTypeBitMapTest.* +// need rows_value to cover bitmap all type: empty/single/set/bitmap +INSTANTIATE_TEST_SUITE_P(Params, DataTypeBitMapTest, ::testing::Values(0, 1, 31, 1024)); + +} // namespace doris::vectorized \ No newline at end of file diff --git a/be/test/vec/data_types/data_type_fixed_length_object_test.cpp b/be/test/vec/data_types/data_type_fixed_length_object_test.cpp new file mode 100644 index 00000000000..09762819eb7 --- /dev/null +++ b/be/test/vec/data_types/data_type_fixed_length_object_test.cpp @@ -0,0 +1,153 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "vec/data_types/data_type_fixed_length_object.h" + +#include <gtest/gtest-message.h> +#include <gtest/gtest-test-part.h> +#include <gtest/gtest.h> + +#include <iostream> + +#include "agent/be_exec_version_manager.h" +#include "util/bitmap_value.h" +#include "vec/columns/column.h" +#include "vec/columns/column_fixed_length_object.h" +#include "vec/common/assert_cast.h" +#include "vec/core/field.h" +#include "vec/core/types.h" +#include "vec/data_types/common_data_type_serder_test.h" +#include "vec/data_types/common_data_type_test.h" +#include "vec/data_types/data_type.h" +#include "vec/data_types/data_type_factory.hpp" +#include "vec/data_types/data_type_nullable.h" + +// 1. datatype meta info: +// get_type_id, get_type_as_type_descriptor, get_storage_field_type, have_subtypes, get_pdata_type (const IDataType *data_type), to_pb_column_meta (PColumnMeta *col_meta) +// get_family_name, get_is_parametric, should_align_right_in_pretty_formats +// text_can_contain_only_valid_utf8 +// have_maximum_size_of_value, get_maximum_size_of_value_in_memory, get_size_of_value_in_memory +// get_precision, get_scale +// is_null_literal, is_value_represented_by_number, is_value_unambiguously_represented_in_contiguous_memory_region +// 2. datatype creation with column : create_column, create_column_const (size_t size, const Field &field), create_column_const_with_default_value (size_t size), get_uncompressed_serialized_bytes (const IColumn &column, int be_exec_version) +// 3. serde related: get_serde (int nesting_level=1) +// to_string (const IColumn &column, size_t row_num, BufferWritable &ostr), to_string (const IColumn &column, size_t row_num), to_string_batch (const IColumn &column, ColumnString &column_to), from_string (ReadBuffer &rb, IColumn *column) +// 4. serialize/serialize_as_stream/deserialize/deserialize_as_stream +// serialize (const IColumn &column, char *buf, int be_exec_version), deserialize (const char *buf, MutableColumnPtr *column, int be_exec_version) + +namespace doris::vectorized { + +class DataTypeFixedLengthObjectTest : public ::testing::TestWithParam<int> { +public: + void SetUp() override { + rows_value = GetParam(); + helper = std::make_unique<CommonDataTypeTest>(); + } + std::unique_ptr<CommonDataTypeTest> helper; + int rows_value; + DataTypePtr datatype_fixed_length = std::make_shared<DataTypeFixedLengthObject>(); +}; + +TEST_P(DataTypeFixedLengthObjectTest, MetaInfoTest) { + TypeDescriptor bitmap_type_descriptor = {PrimitiveType::INVALID_TYPE}; + auto col_meta = std::make_shared<PColumnMeta>(); + col_meta->set_type(PGenericType_TypeId_FIXEDLENGTHOBJECT); + CommonDataTypeTest::DataTypeMetaInfo bitmap_meta_info_to_assert = { + .type_id = TypeIndex::FixedLengthObject, + .type_as_type_descriptor = &bitmap_type_descriptor, + .family_name = "DataTypeFixedLengthObject", + .has_subtypes = false, + .storage_field_type = doris::FieldType::OLAP_FIELD_TYPE_NONE, + .should_align_right_in_pretty_formats = false, + .text_can_contain_only_valid_utf8 = false, + .have_maximum_size_of_value = false, + .size_of_value_in_memory = size_t(-1), + .precision = size_t(-1), + .scale = size_t(-1), + .is_null_literal = false, + .is_value_represented_by_number = false, + .pColumnMeta = col_meta.get(), + .is_value_unambiguously_represented_in_contiguous_memory_region = false, + .default_field = Field(String()), + }; + helper->meta_info_assert(datatype_fixed_length, bitmap_meta_info_to_assert); +} + +TEST_P(DataTypeFixedLengthObjectTest, CreateColumnTest) { + Field default_field = Field(String()); + std::cout << "create_column_assert: " << datatype_fixed_length->get_name() << std::endl; + auto column = (datatype_fixed_length)->create_column(); + ASSERT_EQ(column->size(), 0); + auto fixed_length_column = ColumnFixedLengthObject::create(8); + fixed_length_column->insert(default_field); + ASSERT_EQ(fixed_length_column->size(), 1); + auto default_const_col = ColumnFixedLengthObject::create(8); + auto data = fixed_length_column->get_data_at(0); + default_const_col->insert_data(data.data, data.size); + for (int i = 0; i < 1; ++i) { + ASSERT_EQ(fixed_length_column->operator[](i), default_const_col->operator[](i)); + } + // get_uncompressed_serialized_bytes + ASSERT_EQ(datatype_fixed_length->get_uncompressed_serialized_bytes( + *column, BeExecVersionManager::get_newest_version()), + 17); +} + +void insert_data_fixed_length_data(MutableColumns* fixed_length_cols, + DataTypePtr datatype_fixed_length, int rows_value, + std::vector<std::string>* data_strs = nullptr) { + auto serde_fixed_length = datatype_fixed_length->get_serde(1); + auto column_fixed = ColumnFixedLengthObject::create(sizeof(size_t)); + column_fixed->resize(rows_value); + fixed_length_cols->push_back(column_fixed->get_ptr()); + DataTypeSerDeSPtrs serde = {datatype_fixed_length->get_serde()}; + auto& data = assert_cast<ColumnFixedLengthObject*>((*fixed_length_cols)[0].get())->get_data(); + for (size_t i = 0; i != rows_value; ++i) { + data[i] = i; + } + std::cout << "finish insert data" << std::endl; +} + +// not support function: get_filed to_string | to_string_batch | from_string + +// serialize / deserialize +TEST_P(DataTypeFixedLengthObjectTest, SerializeDeserializeTest) { + MutableColumns fixed_length_cols; + insert_data_fixed_length_data(&fixed_length_cols, datatype_fixed_length, rows_value); + + auto* column = assert_cast<ColumnFixedLengthObject*>(fixed_length_cols[0].get()); + auto size = datatype_fixed_length->get_uncompressed_serialized_bytes( + *column, BeExecVersionManager::get_newest_version()); + std::unique_ptr<char[]> buf = std::make_unique<char[]>(size); + auto* result = datatype_fixed_length->serialize(*column, buf.get(), + BeExecVersionManager::get_newest_version()); + ASSERT_EQ(result, buf.get() + size); + + auto column2 = datatype_fixed_length->create_column(); + datatype_fixed_length->deserialize(buf.get(), &column2, + BeExecVersionManager::get_newest_version()); + for (size_t i = 0; i != rows_value; ++i) { + auto* column_res = assert_cast<ColumnFixedLengthObject*>(column2.get()); + ASSERT_EQ(column->get_data()[i], column_res->get_data()[i]); + } + helper->serialize_deserialize_assert(fixed_length_cols, {datatype_fixed_length}); + std::cout << "finish serialize deserialize test" << std::endl; +} + +INSTANTIATE_TEST_SUITE_P(Params, DataTypeFixedLengthObjectTest, ::testing::Values(0, 1, 31, 1024)); + +} // namespace doris::vectorized \ No newline at end of file diff --git a/be/test/vec/data_types/data_type_hll_test.cpp b/be/test/vec/data_types/data_type_hll_test.cpp new file mode 100644 index 00000000000..e16f6045217 --- /dev/null +++ b/be/test/vec/data_types/data_type_hll_test.cpp @@ -0,0 +1,216 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "vec/data_types/data_type_hll.h" + +#include <gtest/gtest-message.h> +#include <gtest/gtest-test-part.h> +#include <gtest/gtest.h> + +#include <iostream> + +#include "agent/be_exec_version_manager.h" +#include "vec/columns/column.h" +#include "vec/core/field.h" +#include "vec/core/types.h" +#include "vec/data_types/common_data_type_serder_test.h" +#include "vec/data_types/common_data_type_test.h" +#include "vec/data_types/data_type.h" +#include "vec/data_types/data_type_factory.hpp" +#include "vec/data_types/data_type_nullable.h" + +// this test is gonna to be a data type test template for all DataType which should make ut test to coverage the function defined +// for example DataTypeHLL should test this function: +// 1. datatype meta info: +// get_type_id, get_type_as_type_descriptor, get_storage_field_type, have_subtypes, get_pdata_type (const IDataType *data_type), to_pb_column_meta (PColumnMeta *col_meta) +// get_family_name, get_is_parametric, should_align_right_in_pretty_formats +// text_can_contain_only_valid_utf8 +// have_maximum_size_of_value, get_maximum_size_of_value_in_memory, get_size_of_value_in_memory +// get_precision, get_scale +// is_null_literal, is_value_represented_by_number, is_value_unambiguously_represented_in_contiguous_memory_region +// 2. datatype creation with column : create_column, create_column_const (size_t size, const Field &field), create_column_const_with_default_value (size_t size), get_uncompressed_serialized_bytes (const IColumn &column, int be_exec_version) +// 3. serde related: get_serde (int nesting_level=1) +// to_string (const IColumn &column, size_t row_num, BufferWritable &ostr), to_string (const IColumn &column, size_t row_num), to_string_batch (const IColumn &column, ColumnString &column_to), from_string (ReadBuffer &rb, IColumn *column) +// serialize (const IColumn &column, char *buf, int be_exec_version), deserialize (const char *buf, MutableColumnPtr *column, int be_exec_version) + +namespace doris::vectorized { + +class DataTypeHLLTest : public ::testing::TestWithParam<int> { +protected: + void SetUp() override { + rows_value = GetParam(); + helper = std::make_unique<CommonDataTypeTest>(); + } + +public: + std::unique_ptr<CommonDataTypeTest> helper; + int rows_value; + DataTypePtr dt_hll = + DataTypeFactory::instance().create_data_type(FieldType::OLAP_FIELD_TYPE_HLL, 0, 0); +}; + +TEST_P(DataTypeHLLTest, MetaInfoTest) { + TypeDescriptor hll_type_descriptor = {PrimitiveType::TYPE_HLL}; + auto col_meta = std::make_shared<PColumnMeta>(); + col_meta->set_type(PGenericType_TypeId_HLL); + CommonDataTypeTest::DataTypeMetaInfo hll_meta_info_to_assert = { + .type_id = TypeIndex::HLL, + .type_as_type_descriptor = &hll_type_descriptor, + .family_name = "HLL", + .has_subtypes = false, + .storage_field_type = doris::FieldType::OLAP_FIELD_TYPE_HLL, + .should_align_right_in_pretty_formats = false, + .text_can_contain_only_valid_utf8 = true, + .have_maximum_size_of_value = false, + .size_of_value_in_memory = size_t(-1), + .precision = size_t(-1), + .scale = size_t(-1), + .is_null_literal = false, + .is_value_represented_by_number = false, + .pColumnMeta = col_meta.get(), + .is_value_unambiguously_represented_in_contiguous_memory_region = true, + .default_field = HyperLogLog::empty(), + }; + helper->meta_info_assert(dt_hll, hll_meta_info_to_assert); +} + +TEST_P(DataTypeHLLTest, CreateColumnTest) { + Field default_field_hll = HyperLogLog::empty(); + helper->create_column_assert(dt_hll, default_field_hll, 17); +} + +void insert_data_hll(MutableColumns* hll_cols, DataTypePtr datetype_hll, int rows_value, + std::vector<std::string>* data_strs = nullptr) { + auto serde_hll = datetype_hll->get_serde(1); + auto column_hll = datetype_hll->create_column(); + + hll_cols->push_back(column_hll->get_ptr()); + DataTypeSerDeSPtrs serde = {datetype_hll->get_serde()}; + auto& data = assert_cast<ColumnHLL*>((*hll_cols)[0].get())->get_data(); + for (size_t i = 0; i != rows_value; ++i) { + HyperLogLog hll_value; + for (size_t j = 0; j <= i; ++j) { + hll_value.update(j); + } + if (data_strs) { + data_strs->push_back(hll_value.to_string()); + } + std::string memory_buffer(hll_value.max_serialized_size(), '0'); + hll_value.serialize(reinterpret_cast<uint8_t*>(memory_buffer.data())); + data.emplace_back(std::move(hll_value)); + } + std::cout << "finish insert data" << std::endl; +} + +// test to_string | to_string_batch | from_string +TEST_P(DataTypeHLLTest, FromAndToStringTest) { + MutableColumns hll_cols; + std::vector<std::string> data_strs; + insert_data_hll(&hll_cols, dt_hll, rows_value, &data_strs); + + { + // to_string_batch | from_string + auto col_to = ColumnString::create(); + dt_hll->to_string_batch(*hll_cols[0]->get_ptr(), *col_to); + ASSERT_EQ(col_to->size(), hll_cols[0]->get_ptr()->size()); + // from_string assert col_to to assert_column and check same with mutableColumn + auto assert_column = dt_hll->create_column(); + for (int i = 0; i < col_to->size(); ++i) { + std::string s = col_to->get_data_at(i).to_string(); + ReadBuffer rb(s.data(), s.size()); + ASSERT_EQ(Status::OK(), dt_hll->from_string(rb, assert_column.get())); + ASSERT_EQ(assert_column->operator[](i), hll_cols[0]->get_ptr()->operator[](i)) + << "i: " << i << " s: " << s << " datatype: " << dt_hll->get_name() + << " assert_column: " << assert_column->get_name() + << " mutableColumn:" << hll_cols[0]->get_ptr()->get_name() << std::endl; + } + std::cout << "finish to_string_batch | from_string test" << std::endl; + } + + { + // to_string | from_string + auto ser_col = ColumnString::create(); + ser_col->reserve(hll_cols[0]->get_ptr()->size()); + VectorBufferWriter buffer_writer(*ser_col.get()); + for (int i = 0; i < hll_cols[0]->get_ptr()->size(); ++i) { + dt_hll->to_string(*hll_cols[0]->get_ptr(), i, buffer_writer); + std::string res = dt_hll->to_string(*hll_cols[0]->get_ptr(), i); + buffer_writer.commit(); + EXPECT_EQ(res, "HLL()"); // HLL to_string is not implemented + } + // check ser_col to assert_column and check same with mutableColumn + auto assert_column_1 = dt_hll->create_column(); + for (int i = 0; i < ser_col->size(); ++i) { + std::string s = ser_col->get_data_at(i).to_string(); + ReadBuffer rb(s.data(), s.size()); + ASSERT_EQ(Status::OK(), dt_hll->from_string(rb, assert_column_1.get())); + auto aaa = assert_column_1->operator[](i); + ASSERT_EQ(assert_column_1->operator[](i), hll_cols[0]->get_ptr()->operator[](i)); + } + std::cout << "finish to_string | from_string test" << std::endl; + } +} + +// serialize / deserialize +TEST_P(DataTypeHLLTest, SerializeDeserializeTest) { + MutableColumns hll_cols; + insert_data_hll(&hll_cols, dt_hll, rows_value); + + auto* column = assert_cast<ColumnHLL*>(hll_cols[0].get()); + auto size = dt_hll->get_uncompressed_serialized_bytes( + *column, BeExecVersionManager::get_newest_version()); + std::unique_ptr<char[]> buf = std::make_unique<char[]>(size); + auto* result = + dt_hll->serialize(*column, buf.get(), BeExecVersionManager::get_newest_version()); + ASSERT_EQ(result, buf.get() + size); + + auto column2 = dt_hll->create_column(); + dt_hll->deserialize(buf.get(), &column2, BeExecVersionManager::get_newest_version()); + for (size_t i = 0; i != rows_value; ++i) { + auto* column_res = assert_cast<ColumnHLL*>(column2.get()); + ASSERT_EQ(column->get_data()[i].to_string(), column_res->get_data()[i].to_string()); + } + helper->serialize_deserialize_assert(hll_cols, {dt_hll}); + std::cout << "finish serialize deserialize test" << std::endl; +} + +// serialize / deserialize as stream +TEST_P(DataTypeHLLTest, SerializeDeserializeAsStreamTest) { + MutableColumns hll_cols; + insert_data_hll(&hll_cols, dt_hll, rows_value); + + auto ser_col = ColumnString::create(); + VectorBufferWriter buffer_writer(*ser_col.get()); + auto* column_data = assert_cast<ColumnHLL*>(hll_cols[0].get()); + auto c = dt_hll->create_column(); + auto* column_res = assert_cast<ColumnHLL*>(c.get()); + column_res->resize(rows_value); + for (size_t i = 0; i != rows_value; ++i) { + doris::vectorized::DataTypeHLL::serialize_as_stream(column_data->get_element(i), + buffer_writer); + buffer_writer.commit(); + BufferReadable buffer_readable(ser_col->get_data_at(i)); + doris::vectorized::DataTypeHLL::deserialize_as_stream(column_res->get_element(i), + buffer_readable); + ASSERT_EQ(column_data->get_data()[i].to_string(), column_res->get_data()[i].to_string()); + } + std::cout << "finish serialize deserialize as stream test" << std::endl; +} + +INSTANTIATE_TEST_SUITE_P(Params, DataTypeHLLTest, ::testing::Values(0, 1, 10, 100)); + +} // namespace doris::vectorized \ No newline at end of file diff --git a/be/test/vec/data_types/data_type_ip_test.cpp b/be/test/vec/data_types/data_type_ip_test.cpp index c500c7cf2dd..d26806b7dcb 100644 --- a/be/test/vec/data_types/data_type_ip_test.cpp +++ b/be/test/vec/data_types/data_type_ip_test.cpp @@ -126,8 +126,8 @@ TEST_F(DataTypeIPTest, MetaInfoTest) { TEST_F(DataTypeIPTest, CreateColumnTest) { Field default_field_ipv4 = IPv4(0); Field default_field_ipv6 = IPv6(0); - create_column_assert(dt_ipv4, default_field_ipv4, 4); - create_column_assert(dt_ipv6, default_field_ipv6, 4); + create_column_assert(dt_ipv4, default_field_ipv4, 17); + create_column_assert(dt_ipv6, default_field_ipv6, 17); } TEST_F(DataTypeIPTest, GetFieldTest) { diff --git a/be/test/vec/data_types/data_type_quantile_state_test.cpp b/be/test/vec/data_types/data_type_quantile_state_test.cpp new file mode 100644 index 00000000000..dcd8d58503c --- /dev/null +++ b/be/test/vec/data_types/data_type_quantile_state_test.cpp @@ -0,0 +1,198 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include <gtest/gtest-message.h> +#include <gtest/gtest-test-part.h> +#include <gtest/gtest.h> + +#include <iostream> + +#include "agent/be_exec_version_manager.h" +#include "vec/columns/column.h" +#include "vec/core/field.h" +#include "vec/core/types.h" +#include "vec/data_types/common_data_type_serder_test.h" +#include "vec/data_types/common_data_type_test.h" +#include "vec/data_types/data_type.h" +#include "vec/data_types/data_type_factory.hpp" +#include "vec/data_types/data_type_nullable.h" +#include "vec/data_types/data_type_quantilestate.h" + +// this test is gonna to be a data type test template for all DataType which should make ut test to coverage the function defined +// for example DataTypeQuantileState should test this function: +// 1. datatype meta info: +// get_type_id, get_type_as_type_descriptor, get_storage_field_type, have_subtypes, get_pdata_type (const IDataType *data_type), to_pb_column_meta (PColumnMeta *col_meta) +// get_family_name, get_is_parametric, should_align_right_in_pretty_formats +// text_can_contain_only_valid_utf8 +// have_maximum_size_of_value, get_maximum_size_of_value_in_memory, get_size_of_value_in_memory +// get_precision, get_scale +// is_null_literal, is_value_represented_by_number, is_value_unambiguously_represented_in_contiguous_memory_region +// 2. datatype creation with column : create_column, create_column_const (size_t size, const Field &field), create_column_const_with_default_value (size_t size), get_uncompressed_serialized_bytes (const IColumn &column, int be_exec_version) +// 3. serde related: get_serde (int nesting_level=1) +// to_string (const IColumn &column, size_t row_num, BufferWritable &ostr), to_string (const IColumn &column, size_t row_num), to_string_batch (const IColumn &column, ColumnString &column_to) +// serialize (const IColumn &column, char *buf, int be_exec_version), deserialize (const char *buf, MutableColumnPtr *column, int be_exec_version) + +namespace doris::vectorized { + +class DataTypeQuantileStateTest : public ::testing::TestWithParam<int> { +protected: + void SetUp() override { + rows_value = GetParam(); + helper = std::make_unique<CommonDataTypeTest>(); + } + +public: + std::unique_ptr<CommonDataTypeTest> helper; + int rows_value; + DataTypePtr datatype_quantile_state = DataTypeFactory::instance().create_data_type( + FieldType::OLAP_FIELD_TYPE_QUANTILE_STATE, 0, 0); +}; + +TEST_P(DataTypeQuantileStateTest, MetaInfoTest) { + TypeDescriptor quantile_state_type_descriptor = {PrimitiveType::TYPE_QUANTILE_STATE}; + auto col_meta = std::make_shared<PColumnMeta>(); + col_meta->set_type(PGenericType_TypeId_QUANTILE_STATE); + CommonDataTypeTest::DataTypeMetaInfo quantile_state_meta_info_to_assert = { + .type_id = TypeIndex::QuantileState, + .type_as_type_descriptor = &quantile_state_type_descriptor, + .family_name = "QuantileState", + .has_subtypes = false, + .storage_field_type = doris::FieldType::OLAP_FIELD_TYPE_QUANTILE_STATE, + .should_align_right_in_pretty_formats = false, + .text_can_contain_only_valid_utf8 = true, + .have_maximum_size_of_value = false, + .size_of_value_in_memory = size_t(-1), + .precision = size_t(-1), + .scale = size_t(-1), + .is_null_literal = false, + .is_value_represented_by_number = false, + .pColumnMeta = col_meta.get(), + .is_value_unambiguously_represented_in_contiguous_memory_region = true, + .default_field = QuantileState(), + }; + helper->meta_info_assert(datatype_quantile_state, quantile_state_meta_info_to_assert); +} + +TEST_P(DataTypeQuantileStateTest, CreateColumnTest) { + Field default_field_quantile_state = QuantileState(); + helper->create_column_assert(datatype_quantile_state, default_field_quantile_state, 17); +} + +void insert_data_quantile_state(MutableColumns* quantile_state_cols, + DataTypePtr datetype_quantile_state, int rows_value, + std::vector<std::string>* data_strs = nullptr) { + auto serde_quantile_state = datetype_quantile_state->get_serde(1); + auto column_quantile_state = datetype_quantile_state->create_column(); + + quantile_state_cols->push_back(column_quantile_state->get_ptr()); + DataTypeSerDeSPtrs serde = {datetype_quantile_state->get_serde()}; + auto& data = assert_cast<ColumnQuantileState*>((*quantile_state_cols)[0].get())->get_data(); + for (size_t i = 0; i != rows_value; ++i) { + QuantileState quantile_state_value; + for (size_t j = 0; j <= i; ++j) { + quantile_state_value.add_value(j); + } + std::string memory_buffer(quantile_state_value.get_serialized_size(), '0'); + quantile_state_value.serialize(reinterpret_cast<uint8_t*>(memory_buffer.data())); + data.emplace_back(std::move(quantile_state_value)); + } + std::cout << "finish insert data" << std::endl; +} + +// test to_string | to_string_batch | from_string +TEST_P(DataTypeQuantileStateTest, FromAndToStringTest) { + MutableColumns quantile_state_cols; + std::vector<std::string> data_strs; + insert_data_quantile_state(&quantile_state_cols, datatype_quantile_state, rows_value, + &data_strs); + + { + // to_string_batch | from_string + auto col_to = ColumnString::create(); + datatype_quantile_state->to_string_batch(*quantile_state_cols[0]->get_ptr(), *col_to); + ASSERT_EQ(col_to->size(), quantile_state_cols[0]->get_ptr()->size()); + std::cout << "finish to_string_batch | from_string not support test" << std::endl; + } + + { + // to_string | from_string + auto ser_col = ColumnString::create(); + ser_col->reserve(quantile_state_cols[0]->get_ptr()->size()); + VectorBufferWriter buffer_writer(*ser_col.get()); + for (int i = 0; i < quantile_state_cols[0]->get_ptr()->size(); ++i) { + datatype_quantile_state->to_string(*quantile_state_cols[0]->get_ptr(), i, + buffer_writer); + std::string res = + datatype_quantile_state->to_string(*quantile_state_cols[0]->get_ptr(), i); + buffer_writer.commit(); + EXPECT_EQ(res, "QuantileState()"); // QuantileState to_string is not implemented + } + std::cout << "finish to_string | from_string not support test" << std::endl; + } +} + +// serialize / deserialize +TEST_P(DataTypeQuantileStateTest, SerializeDeserializeTest) { + MutableColumns quantile_state_cols; + insert_data_quantile_state(&quantile_state_cols, datatype_quantile_state, rows_value); + + auto* column = assert_cast<ColumnQuantileState*>(quantile_state_cols[0].get()); + auto size = datatype_quantile_state->get_uncompressed_serialized_bytes( + *column, BeExecVersionManager::get_newest_version()); + std::unique_ptr<char[]> buf = std::make_unique<char[]>(size); + auto* result = datatype_quantile_state->serialize(*column, buf.get(), + BeExecVersionManager::get_newest_version()); + ASSERT_EQ(result, buf.get() + size); + + auto column2 = datatype_quantile_state->create_column(); + datatype_quantile_state->deserialize(buf.get(), &column2, + BeExecVersionManager::get_newest_version()); + for (size_t i = 0; i != rows_value; ++i) { + auto* column_res = assert_cast<ColumnQuantileState*>(column2.get()); + ASSERT_EQ(column->get_data()[i].get_serialized_size(), + column_res->get_data()[i].get_serialized_size()); + } + helper->serialize_deserialize_assert(quantile_state_cols, {datatype_quantile_state}); + std::cout << "finish serialize deserialize test" << std::endl; +} + +// serialize / deserialize as stream +TEST_P(DataTypeQuantileStateTest, SerializeDeserializeAsStreamTest) { + MutableColumns quantile_state_cols; + insert_data_quantile_state(&quantile_state_cols, datatype_quantile_state, rows_value); + + auto ser_col = ColumnString::create(); + VectorBufferWriter buffer_writer(*ser_col.get()); + auto* column_data = assert_cast<ColumnQuantileState*>(quantile_state_cols[0].get()); + auto c = datatype_quantile_state->create_column(); + auto* column_res = assert_cast<ColumnQuantileState*>(c.get()); + column_res->resize(rows_value); + for (size_t i = 0; i != rows_value; ++i) { + doris::vectorized::DataTypeQuantileState::serialize_as_stream(column_data->get_element(i), + buffer_writer); + buffer_writer.commit(); + BufferReadable buffer_readable(ser_col->get_data_at(i)); + doris::vectorized::DataTypeQuantileState::deserialize_as_stream(column_res->get_element(i), + buffer_readable); + ASSERT_EQ(column_data->get_data()[i].get_serialized_size(), + column_res->get_data()[i].get_serialized_size()); + } + std::cout << "finish serialize deserialize as stream test" << std::endl; +} + +INSTANTIATE_TEST_SUITE_P(Params, DataTypeQuantileStateTest, ::testing::Values(0, 1, 100, 1000)); +} // namespace doris::vectorized \ No newline at end of file --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org