This is an automated email from the ASF dual-hosted git repository. zhangstar333 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new af55267b0ee [BE](ut) add be ut for column complex test case (#48166) af55267b0ee is described below commit af55267b0ee68eb6a3f8bf0086a871f2c6273aa1 Author: zhangstar333 <zhangs...@selectdb.com> AuthorDate: Tue Feb 25 13:19:52 2025 +0800 [BE](ut) add be ut for column complex test case (#48166) ### What problem does this PR solve? add be ut for column complex test case --- be/src/vec/columns/column_complex.h | 2 +- be/src/vec/columns/column_fixed_length_object.h | 11 +- .../columns/column_fixed_length_object_test.cpp | 168 ++++++++ be/test/vec/core/column_complex_test.cpp | 477 +++++++++++++++++++++ 4 files changed, 654 insertions(+), 4 deletions(-) diff --git a/be/src/vec/columns/column_complex.h b/be/src/vec/columns/column_complex.h index ded4f96dc72..d439ed9e91c 100644 --- a/be/src/vec/columns/column_complex.h +++ b/be/src/vec/columns/column_complex.h @@ -87,7 +87,7 @@ public: if (UNLIKELY(num == 0)) { return; } - + // the offsets size should be num + 1 for (size_t i = 0; i != num; ++i) { insert_binary_data(data + offsets[i], offsets[i + 1] - offsets[i]); } diff --git a/be/src/vec/columns/column_fixed_length_object.h b/be/src/vec/columns/column_fixed_length_object.h index 0fd5b35e8d9..ad7cb07d635 100644 --- a/be/src/vec/columns/column_fixed_length_object.h +++ b/be/src/vec/columns/column_fixed_length_object.h @@ -187,14 +187,19 @@ public: ColumnPtr filter(const IColumn::Filter& filter, ssize_t result_size_hint) const override { column_match_filter_size(size(), filter.size()); auto res = create(_item_size); - res->resize(result_size_hint); - - for (size_t i = 0, pos = 0; i < filter.size(); i++) { + size_t column_size = size(); + if (result_size_hint > 0) { + res->reserve(result_size_hint); + } + res->resize(column_size); + size_t pos = 0; + for (size_t i = 0; i < filter.size(); i++) { if (filter[i]) { memcpy(&res->_data[pos * _item_size], &_data[i * _item_size], _item_size); pos++; } } + res->resize(pos); return res; } diff --git a/be/test/vec/columns/column_fixed_length_object_test.cpp b/be/test/vec/columns/column_fixed_length_object_test.cpp index fd0a3687d65..69f1edb2897 100644 --- a/be/test/vec/columns/column_fixed_length_object_test.cpp +++ b/be/test/vec/columns/column_fixed_length_object_test.cpp @@ -20,6 +20,7 @@ #include <gtest/gtest-message.h> #include <gtest/gtest-test-part.h> +#include <gtest/gtest.h> #include <stddef.h> #include <memory> @@ -27,6 +28,7 @@ #include "gtest/gtest_pred_impl.h" #include "vec/columns/column_vector.h" #include "vec/common/sip_hash.h" +#include "vec/common/string_ref.h" namespace doris::vectorized { @@ -85,4 +87,170 @@ TEST(ColumnFixedLenghtObjectTest, UpdateHashWithValue) { EXPECT_EQ(hash1.get64(), hash2.get64()); } + +TEST(ColumnFixedLenghtObjectTest, GetDataAtTest) { + auto column_fixed = ColumnFixedLengthObject::create(sizeof(int64_t)); + EXPECT_EQ(sizeof(int64_t), column_fixed->item_size()); + EXPECT_EQ(8, column_fixed->item_size()); + column_fixed->set_item_size(8); + ASSERT_EQ(column_fixed->get_name(), "ColumnFixedLengthObject"); + ASSERT_EQ(column_fixed->size(), 0); + ASSERT_EQ(column_fixed->get_data().size(), 0); + ASSERT_EQ(column_fixed->byte_size(), 0); + ASSERT_EQ(column_fixed->allocated_bytes(), 0); + std::cout << "1. test name item_size size success" << std::endl; + + column_fixed->insert_default(); + ASSERT_EQ(*reinterpret_cast<const int64_t*>(column_fixed->get_data_at(0).data), 0); + ASSERT_EQ(column_fixed->size(), 1); + ASSERT_EQ(column_fixed->byte_size(), 8); + ASSERT_EQ(column_fixed->allocated_bytes(), 64); + column_fixed->pop_back(1); + ASSERT_EQ(column_fixed->size(), 0); + ASSERT_EQ(column_fixed->byte_size(), 0); + ASSERT_EQ(column_fixed->allocated_bytes(), 64); + std::cout << "2. test byte_size allocated_bytes success" << std::endl; + + auto column_fixed2 = ColumnFixedLengthObject::create(sizeof(int64_t)); + column_fixed->resize(2); + ASSERT_TRUE(column_fixed->has_enough_capacity(*column_fixed2)); + *((int64_t*)column_fixed->get_data().data()) = 11; + *((int64_t*)&(column_fixed->get_data()[column_fixed->item_size()])) = 22; + ASSERT_EQ(column_fixed->size(), 2); + ASSERT_EQ(column_fixed->byte_size(), 16); + ASSERT_EQ(column_fixed->allocated_bytes(), 64); + std::cout << "3. test has_enough_capacity and value data success" << std::endl; + + Field res; + column_fixed->get(0, res); + column_fixed2->insert(res); + ASSERT_EQ(column_fixed2->size(), 1); + ASSERT_EQ(column_fixed->operator[](0), column_fixed2->operator[](0)); + column_fixed2->insert_from(*column_fixed, 1); + ASSERT_EQ(column_fixed2->size(), 2); + ASSERT_EQ(column_fixed->operator[](1), column_fixed2->operator[](1)); + //capacity and size is 32 16 16 + ASSERT_TRUE(column_fixed->has_enough_capacity(*column_fixed2)); + std::cout << "4. test get/insert/insert_from/has_enough_capacity data success" << std::endl; + + column_fixed2->clear(); + ASSERT_EQ(column_fixed2->size(), 0); + column_fixed2->insert_range_from(*column_fixed, 0, 2); + ASSERT_EQ(column_fixed2->size(), 2); + ASSERT_EQ(column_fixed->operator[](0), column_fixed2->operator[](0)); + ASSERT_EQ(column_fixed->operator[](1), column_fixed2->operator[](1)); + ASSERT_EQ(*reinterpret_cast<const int64_t*>(column_fixed2->get_data_at(0).data), 11); + ASSERT_EQ(*reinterpret_cast<const int64_t*>(column_fixed2->get_data_at(1).data), 22); + std::cout << "5. test clear/insert_range_from data success" << std::endl; + + int64_t val = 33; + column_fixed2->insert_data(reinterpret_cast<const char*>(&val), sizeof(val)); + ASSERT_EQ(column_fixed2->size(), 3); + auto value = column_fixed2->get_data_at(2); + ASSERT_EQ(*reinterpret_cast<const int64_t*>(value.data), 33); + std::cout << "6. test insert_data data success" << std::endl; + + auto column_fixed3 = ColumnFixedLengthObject::create(sizeof(int64_t)); + std::vector<uint32_t> indexs = {0, 1, 2}; + column_fixed3->insert_indices_from(*column_fixed2, indexs.data(), + indexs.data() + indexs.size()); + ASSERT_EQ(column_fixed3->size(), 3); + ASSERT_EQ(column_fixed2->operator[](0), column_fixed3->operator[](0)); + ASSERT_EQ(column_fixed2->operator[](1), column_fixed3->operator[](1)); + ASSERT_EQ(column_fixed2->operator[](2), column_fixed3->operator[](2)); + ASSERT_EQ(*reinterpret_cast<const int64_t*>(column_fixed3->get_data_at(2).data), 33); + std::cout << "7. test insert_indices_from data success" << std::endl; + + auto column_fixed4 = ColumnFixedLengthObject::create(sizeof(int64_t)); + std::vector<StringRef> strings; + for (int i = 0; i < 3; i++) { + strings.push_back(column_fixed2->get_data_at(i)); + } + column_fixed4->insert_many_strings(strings.data(), 3); + ASSERT_EQ(column_fixed4->size(), 3); + ASSERT_EQ(column_fixed2->operator[](0), column_fixed4->operator[](0)); + ASSERT_EQ(column_fixed2->operator[](1), column_fixed4->operator[](1)); + ASSERT_EQ(*reinterpret_cast<const int64_t*>(column_fixed4->get_data_at(0).data), 11); + ASSERT_EQ(*reinterpret_cast<const int64_t*>(column_fixed4->get_data_at(1).data), 22); + ASSERT_EQ(*reinterpret_cast<const int64_t*>(column_fixed2->get_data_at(2).data), 33); + // ASSERT_EQ(*reinterpret_cast<const int64_t*>(column_fixed4->get_data_at(2).data), 33); + // ASSERT_EQ(column_fixed2->get_data_at(2).to_string(), column_fixed4->get_data_at(2).to_string()); + std::cout << "8. test insert_many_strings data success" << std::endl; + + auto column_fixed5 = ColumnFixedLengthObject::create(sizeof(int64_t)); + std::string buffer; + std::vector<uint32_t> buffer_offsets(4, 0); + for (int i = 0; i < 3; ++i) { + buffer.append(strings[i]); + buffer_offsets[i + 1] = buffer_offsets[i] + strings[i].size; + } + column_fixed5->insert_many_continuous_binary_data(buffer.data(), buffer_offsets.data(), 3); + ASSERT_EQ(column_fixed5->size(), 3); + ASSERT_EQ(column_fixed2->operator[](0), column_fixed5->operator[](0)); + ASSERT_EQ(column_fixed2->operator[](1), column_fixed5->operator[](1)); + ASSERT_EQ(*reinterpret_cast<const int64_t*>(column_fixed5->get_data_at(0).data), 11); + ASSERT_EQ(*reinterpret_cast<const int64_t*>(column_fixed5->get_data_at(1).data), 22); + ASSERT_EQ(*reinterpret_cast<const int64_t*>(column_fixed5->get_data_at(2).data), 33); + std::cout << "9. test insert_many_continuous_binary_data data success" << std::endl; + + auto column_fixed6 = ColumnFixedLengthObject::create(sizeof(int64_t)); + column_fixed6->insert_range_from(*column_fixed5, 0, 3); + ASSERT_EQ(*reinterpret_cast<const int64_t*>(column_fixed6->get_data_at(0).data), 11); + ASSERT_EQ(*reinterpret_cast<const int64_t*>(column_fixed6->get_data_at(1).data), 22); + ASSERT_EQ(*reinterpret_cast<const int64_t*>(column_fixed6->get_data_at(2).data), 33); + column_fixed6->replace_column_data(*column_fixed5, 2, 0); + ASSERT_EQ(*reinterpret_cast<const int64_t*>(column_fixed6->get_data_at(0).data), 33); + ASSERT_EQ(*reinterpret_cast<const int64_t*>(column_fixed6->get_data_at(1).data), 22); + ASSERT_EQ(*reinterpret_cast<const int64_t*>(column_fixed6->get_data_at(2).data), 33); + std::cout << "10. test replace_column_data data success" << std::endl; + + vectorized::IColumn::Filter filter {0, 1, 0}; + ASSERT_EQ(column_fixed5->clone()->filter(filter), 1); + auto column_filter_res = column_fixed6->filter(filter, 0); + ASSERT_EQ(column_filter_res->size(), 1); + ASSERT_EQ(*reinterpret_cast<const int64_t*>(column_filter_res->get_data_at(0).data), 22); + std::cout << "11. test filter data success" << std::endl; + + IColumn::Permutation perm {2, 1, 0}; + auto column_permute = column_fixed6->permute(perm, 3); + ASSERT_EQ(column_permute->size(), 3); + ASSERT_EQ(*reinterpret_cast<const int64_t*>(column_permute->get_data_at(0).data), 33); + ASSERT_EQ(*reinterpret_cast<const int64_t*>(column_permute->get_data_at(1).data), 22); + ASSERT_EQ(*reinterpret_cast<const int64_t*>(column_permute->get_data_at(2).data), 33); + std::cout << "12. test permute data success" << std::endl; + + IColumn::Offsets offsets2 {2, 4, 6}; + std::vector<int> res_idx {0, 0, 1, 1, 2, 2}; + auto column_replicate_res = column_fixed6->replicate(offsets2); + ASSERT_EQ(column_replicate_res->size(), 6); + ASSERT_EQ(column_fixed6->size(), 3); + for (int i = 0; i < 6; ++i) { + ASSERT_EQ(column_fixed6->operator[](res_idx[i]), column_replicate_res->operator[](i)); + } + std::cout << "13. test more val data value and replicate success" << std::endl; + + Arena arena; + const char* pos = nullptr; + StringRef key(pos, 0); + for (int i = 0; i < 3; ++i) { + auto cur_ref = column_fixed6->serialize_value_into_arena(i, arena, pos); + key.data = cur_ref.data - key.size; + key.size += cur_ref.size; + } + ASSERT_EQ(key.size, 24); + ASSERT_EQ(*reinterpret_cast<const int64_t*>(key.data), 33); + std::cout << "14. test serialize_value_into_arena data success" << std::endl; + + auto column_fixed7 = ColumnFixedLengthObject::create(sizeof(int64_t)); + + const char* begin = key.data; + for (size_t i = 0; i < 3; ++i) { + begin = column_fixed7->deserialize_and_insert_from_arena(begin); + } + ASSERT_EQ(column_fixed7->size(), 3); + ASSERT_EQ(*reinterpret_cast<const int64_t*>(column_fixed7->get_data_at(0).data), 33); + ASSERT_EQ(*reinterpret_cast<const int64_t*>(column_fixed7->get_data_at(1).data), 22); + ASSERT_EQ(*reinterpret_cast<const int64_t*>(column_fixed7->get_data_at(2).data), 33); + std::cout << "15. test deserialize_and_insert_from_arena data success" << std::endl; +} } // namespace doris::vectorized diff --git a/be/test/vec/core/column_complex_test.cpp b/be/test/vec/core/column_complex_test.cpp index a0fbcccdd15..5427fd482c4 100644 --- a/be/test/vec/core/column_complex_test.cpp +++ b/be/test/vec/core/column_complex_test.cpp @@ -23,13 +23,20 @@ #include <gtest/gtest.h> #include <stddef.h> +#include <cstdlib> #include <memory> #include <string> +#include <tuple> +#include <vector> #include "agent/be_exec_version_manager.h" #include "gtest/gtest_pred_impl.h" #include "util/bitmap_value.h" +#include "vec/columns/column.h" +#include "vec/common/string_ref.h" +#include "vec/core/block.h" #include "vec/core/field.h" +#include "vec/core/types.h" #include "vec/data_types/data_type.h" #include "vec/data_types/data_type_bitmap.h" #include "vec/data_types/data_type_quantilestate.h" @@ -56,6 +63,476 @@ TEST(ColumnComplexTest, DataTypeBitmapTest) { std::make_shared<DataTypeBitMap>(); } +TEST(ColumnComplexTest, GetDataAtTest) { + auto column_bitmap = ColumnBitmap::create(); + auto column_hll = ColumnHLL::create(); + auto column_quantile_state = ColumnQuantileState::create(); + ASSERT_EQ(column_bitmap->byte_size(), 0); + + auto column_bitmap_verify = ColumnBitmap::create(); + auto column_hll_verify = ColumnHLL::create(); + auto column_quantile_state_verify = ColumnQuantileState::create(); + + column_bitmap->reserve(10); + // empty data value + column_bitmap->insert_value(BitmapValue::empty_bitmap()); + column_hll->insert_value(HyperLogLog()); + column_quantile_state->insert_value(QuantileState()); + + column_bitmap_verify->insert_default(); + column_hll_verify->insert_default(); + column_quantile_state_verify->insert_many_defaults(1); + ASSERT_EQ(column_bitmap_verify->byte_size(), 80); + ASSERT_EQ(column_hll_verify->byte_size(), 64); + ASSERT_EQ(column_quantile_state_verify->byte_size(), 64); + ASSERT_EQ(column_bitmap_verify->allocated_bytes(), 80); + ASSERT_EQ(column_hll_verify->allocated_bytes(), 64); + ASSERT_EQ(column_quantile_state_verify->allocated_bytes(), 64); + std::cout << "1. test byte_size/allocated_bytes empty success" << std::endl; + + //column_bitmap have reserve 10, but only insert 1, so the capacity is 10 + ASSERT_TRUE(column_bitmap->has_enough_capacity(*column_bitmap_verify)); + ASSERT_FALSE(column_hll->has_enough_capacity(*column_hll_verify)); + ASSERT_FALSE(column_quantile_state->has_enough_capacity(*column_quantile_state_verify)); + std::cout << "2. test has_enough_capacity success" << std::endl; + + ASSERT_EQ(column_bitmap->size(), 1); + ASSERT_EQ(column_bitmap_verify->size(), 1); + ASSERT_EQ(column_bitmap->get_data_at(0), column_bitmap_verify->get_data_at(0)); + ASSERT_EQ(column_hll->get_data_at(0), column_hll_verify->get_data_at(0)); + ASSERT_EQ(column_quantile_state->operator[](0), column_quantile_state_verify->operator[](0)); + std::cout << "3. test insert_default/insert_value empty success" << std::endl; + + std::srand(std::time(nullptr)); + auto lambda_function = []() -> std::tuple<UInt64, double> { + auto rand_val = random(); + auto uint64_val = UInt64(rand_val % std::numeric_limits<UInt64>::max()); + auto double_val = static_cast<double>(rand_val) / std::numeric_limits<int>::max(); + // std::cout << "val: " << rand_val << " " << rand_val << " " << double_val << std::endl; + return std::tuple {uint64_val, double_val}; + }; + UInt64 uint64_val = 0; + double double_val = 0; + std::tie(uint64_val, double_val) = lambda_function(); + // single data value + auto bitmap = BitmapValue(); + bitmap.add(uint64_val); + auto hll = HyperLogLog(); + hll.update(uint64_val); + auto quantile_state = QuantileState(); + quantile_state.add_value(double_val); + + // insert random value to idx 1 + column_bitmap->insert_value(bitmap); + column_hll->insert_value(hll); + column_quantile_state->insert_value(quantile_state); + + auto& bitmap_verify_data = column_bitmap_verify->get_data(); + auto& hll_verify_data = column_hll_verify->get_data(); + auto& quantile_state_verify_data = column_quantile_state_verify->get_data(); + // update verify data at idx 0 + bitmap_verify_data[0].add(uint64_val); + hll_verify_data[0].update(uint64_val); + quantile_state_verify_data[0].add_value(double_val); + ASSERT_EQ(column_bitmap->size(), 2); + ASSERT_EQ(column_bitmap_verify->size(), 1); + ASSERT_EQ(column_bitmap->get_element(1).to_string(), bitmap_verify_data[0].to_string()); + ASSERT_EQ(column_hll->get_element(1).to_string(), + column_hll_verify->get_element(0).to_string()); + ASSERT_EQ(column_quantile_state->get_data_at(1), column_quantile_state_verify->get_data_at(0)); + std::cout << "4. test insert/update/get_element data success" << std::endl; + + // insert data from column idx 1 + column_bitmap_verify->insert_from(*column_bitmap, 1); + column_hll_verify->insert_from(*column_hll, 1); + column_quantile_state_verify->insert_from(*column_quantile_state, 1); + ASSERT_EQ(column_bitmap->size(), 2); + ASSERT_EQ(column_bitmap_verify->size(), 2); + ASSERT_EQ(column_bitmap->get_element(1).to_string(), + column_bitmap_verify->get_element(1).to_string()); + ASSERT_EQ(column_hll->get_element(1).to_string(), + column_hll_verify->get_element(1).to_string()); + ASSERT_EQ(column_quantile_state->operator[](1), column_quantile_state_verify->operator[](1)); + ASSERT_EQ(column_bitmap->operator[](1), column_bitmap_verify->operator[](1)); + ASSERT_EQ(column_hll->operator[](1), column_hll_verify->operator[](1)); + std::cout << "5. test insert_from data success" << std::endl; + + Field field1, field2, field3; + column_bitmap->get(1, field1); + column_hll->get(1, field2); + column_quantile_state->get(1, field3); + + //pop_back data from column idx 1 + column_bitmap_verify->pop_back(1); + column_hll_verify->pop_back(1); + column_quantile_state_verify->pop_back(1); + //insert data from field + column_bitmap_verify->insert(field1); + column_hll_verify->insert(field2); + column_quantile_state_verify->insert(field3); + + ASSERT_EQ(column_bitmap->size(), 2); + ASSERT_EQ(column_bitmap_verify->size(), 2); + ASSERT_EQ(column_bitmap->get_element(1).to_string(), + column_bitmap_verify->get_element(1).to_string()); + ASSERT_EQ(column_hll->get_element(1).to_string(), + column_hll_verify->get_element(1).to_string()); + ASSERT_EQ(column_quantile_state->operator[](1), column_quantile_state_verify->operator[](1)); + ASSERT_EQ(column_bitmap->operator[](1), column_bitmap_verify->operator[](1)); + ASSERT_EQ(column_hll->operator[](1), column_hll_verify->operator[](1)); + std::cout << "6. test get/insert data and pop_back success" << std::endl; + + std::tie(uint64_val, double_val) = lambda_function(); + // two val in data value + bitmap.add(uint64_val); + hll.update(uint64_val); + quantile_state.add_value(double_val); + column_bitmap->insert_value(bitmap); + column_hll->insert_value(hll); + column_quantile_state->insert_value(quantile_state); + + column_bitmap_verify->insert_data(column_bitmap->get_data_at(2).data, + column_bitmap->get_data_at(2).size); + column_hll_verify->insert_data(column_hll->get_data_at(2).data, + column_hll->get_data_at(2).size); + column_quantile_state_verify->insert_data(column_quantile_state->get_data_at(2).data, + column_quantile_state->get_data_at(2).size); + ASSERT_EQ(column_bitmap->size(), 3); + ASSERT_EQ(column_bitmap_verify->size(), 3); + ASSERT_EQ(column_bitmap->get_element(2).to_string(), + column_bitmap_verify->get_element(2).to_string()); + ASSERT_EQ(column_hll->get_element(2).to_string(), + column_hll_verify->get_element(2).to_string()); + ASSERT_EQ(column_quantile_state->operator[](2), column_quantile_state_verify->operator[](2)); + ASSERT_EQ(column_bitmap->operator[](2), column_bitmap_verify->operator[](2)); + ASSERT_EQ(column_hll->operator[](2), column_hll_verify->operator[](2)); + std::cout << "7. test two val data value and insert_data success" << std::endl; + + // more val in data value + for (int range = 1; range < 100; ++range) { + std::tie(uint64_val, double_val) = lambda_function(); + bitmap.add(uint64_val); + hll.update(uint64_val); + quantile_state.add_value(double_val); + } + column_bitmap->insert_value(bitmap); + column_hll->insert_value(hll); + column_quantile_state->insert_value(quantile_state); + + std::string buffer; + buffer.resize(bitmap.getSizeInBytes(), '0'); + bitmap.write_to(const_cast<char*>(buffer.data())); + column_bitmap_verify->insert_binary_data(buffer.data(), buffer.size()); + + buffer.resize(hll.max_serialized_size(), '0'); + size_t actual_size = hll.serialize((uint8_t*)buffer.data()); + buffer.resize(actual_size); + column_hll_verify->insert_binary_data(buffer.data(), buffer.size()); + + buffer.resize(quantile_state.get_serialized_size()); + quantile_state.serialize(const_cast<uint8_t*>(reinterpret_cast<uint8_t*>(buffer.data()))); + column_quantile_state_verify->insert_binary_data(buffer.data(), buffer.size()); + + ASSERT_EQ(column_bitmap->size(), 4); + ASSERT_EQ(column_bitmap_verify->size(), 4); + ASSERT_EQ(column_bitmap->get_element(3).to_string(), + column_bitmap_verify->get_element(3).to_string()); + ASSERT_EQ(column_hll->get_element(3).to_string(), + column_hll_verify->get_element(3).to_string()); + ASSERT_EQ(column_quantile_state->operator[](3), column_quantile_state_verify->operator[](3)); + ASSERT_EQ(column_bitmap->operator[](3), column_bitmap_verify->operator[](3)); + ASSERT_EQ(column_hll->operator[](3), column_hll_verify->operator[](3)); + std::cout << "8. test more val data value and insert_binary_data success" << std::endl; + + column_bitmap_verify->clear(); + column_hll_verify->clear(); + column_quantile_state_verify->clear(); + ASSERT_EQ(column_bitmap_verify->size(), 0); + + std::vector<StringRef> bitmap_strings, hll_strings, quantile_state_strings; + auto rows = column_bitmap->size(); + std::vector<std::string> bitmap_buffers(rows), hll_buffers(rows), quantile_state_buffers(rows); + for (int i = 0; i < column_bitmap->size(); ++i) { + auto bitmap = column_bitmap->get_element(i); + bitmap_buffers[i].resize(bitmap.getSizeInBytes(), '0'); + bitmap.write_to(const_cast<char*>(bitmap_buffers[i].data())); + bitmap_strings.emplace_back(bitmap_buffers[i].data(), bitmap_buffers[i].size()); + + auto hll = column_hll->get_element(i); + hll_buffers[i].resize(hll.max_serialized_size(), '0'); + size_t actual_size = hll.serialize((uint8_t*)hll_buffers[i].data()); + hll_buffers[i].resize(actual_size); + hll_strings.emplace_back(hll_buffers[i].data(), hll_buffers[i].size()); + + auto quantile_state = column_quantile_state->get_element(i); + quantile_state_buffers[i].resize(quantile_state.get_serialized_size()); + quantile_state.serialize( + const_cast<uint8_t*>(reinterpret_cast<uint8_t*>(quantile_state_buffers[i].data()))); + quantile_state_strings.emplace_back(quantile_state_buffers[i].data(), + quantile_state_buffers[i].size()); + } + column_bitmap_verify->insert_many_strings(bitmap_strings.data(), column_bitmap->size()); + column_hll_verify->insert_many_strings(hll_strings.data(), column_hll->size()); + column_quantile_state_verify->insert_many_strings(quantile_state_strings.data(), + column_quantile_state->size()); + ASSERT_EQ(rows, column_bitmap_verify->size()); + ASSERT_EQ(rows, column_hll_verify->size()); + ASSERT_EQ(rows, column_quantile_state_verify->size()); + for (int i = 0; i < rows; ++i) { + ASSERT_EQ(column_bitmap->get_element(i).to_string(), + column_bitmap_verify->get_element(i).to_string()); + ASSERT_EQ(column_hll->get_element(i).to_string(), + column_hll_verify->get_element(i).to_string()); + ASSERT_EQ(column_quantile_state->operator[](i), + column_quantile_state_verify->operator[](i)); + ASSERT_EQ(column_bitmap->operator[](i), column_bitmap_verify->operator[](i)); + ASSERT_EQ(column_hll->operator[](i), column_hll_verify->operator[](i)); + } + std::cout << "9. test more val data value and insert_many_strings success" << std::endl; + + auto column_bitmap_verify2 = column_bitmap_verify->clone_empty(); + auto column_hll_verify2 = column_hll_verify->clone_resized(0); + auto column_quantile_state_verify2 = column_quantile_state_verify->clone_empty(); + ASSERT_EQ(column_bitmap_verify2->size(), 0); + ASSERT_EQ(column_hll_verify2->size(), 0); + ASSERT_EQ(column_quantile_state_verify2->size(), 0); + std::string bitmap_buffer, hll_buffer, quantile_state_buffer; + //the offset should be more than one value + std::vector<uint32_t> bitmap_offsets(rows + 1, 0), hll_offsets(rows + 1, 0), + quantile_state_offsets(rows + 1, 0); + for (int i = 0; i < rows; ++i) { + bitmap_buffer.append(bitmap_strings[i]); + hll_buffer.append(hll_buffers[i]); + quantile_state_buffer.append(quantile_state_buffers[i]); + bitmap_offsets[i + 1] = bitmap_offsets[i] + bitmap_strings[i].size; + hll_offsets[i + 1] = hll_offsets[i] + hll_strings[i].size; + quantile_state_offsets[i + 1] = quantile_state_offsets[i] + quantile_state_strings[i].size; + } + column_bitmap_verify2->insert_many_continuous_binary_data(bitmap_buffer.data(), + bitmap_offsets.data(), rows); + column_hll_verify2->insert_many_continuous_binary_data(hll_buffer.data(), hll_offsets.data(), + rows); + column_quantile_state_verify2->insert_many_continuous_binary_data( + quantile_state_buffer.data(), quantile_state_offsets.data(), rows); + ASSERT_EQ(rows, column_bitmap_verify2->size()); + ASSERT_EQ(rows, column_hll_verify2->size()); + ASSERT_EQ(rows, column_quantile_state_verify2->size()); + for (int i = 0; i < rows; ++i) { + ASSERT_EQ(column_quantile_state->operator[](i), + column_quantile_state_verify2->operator[](i)); + ASSERT_EQ(column_bitmap->operator[](i), column_bitmap_verify2->operator[](i)); + ASSERT_EQ(column_hll->operator[](i), column_hll_verify2->operator[](i)); + } + std::cout << "10. test more val data value and insert_many_continuous_binary_data success" + << std::endl; + + column_bitmap_verify->resize(rows); + column_hll_verify->resize(rows); + column_quantile_state_verify->resize(rows); + column_bitmap_verify->clear(); + column_hll_verify->clear(); + column_quantile_state_verify->clear(); + column_bitmap_verify->insert_range_from(*column_bitmap, 0, rows); + column_hll_verify->insert_range_from(*column_hll, 0, rows); + column_quantile_state_verify->insert_range_from(*column_quantile_state, 0, rows); + ASSERT_EQ(rows, column_bitmap_verify->size()); + ASSERT_EQ(rows, column_hll_verify->size()); + ASSERT_EQ(rows, column_quantile_state_verify->size()); + for (int i = 0; i < rows; ++i) { + ASSERT_EQ(column_quantile_state->operator[](i), + column_quantile_state_verify->operator[](i)); + ASSERT_EQ(column_bitmap->operator[](i), column_bitmap_verify->operator[](i)); + ASSERT_EQ(column_hll->operator[](i), column_hll_verify->operator[](i)); + } + std::cout << "11. test more val data value and insert_range_from success" << std::endl; + + column_bitmap_verify->clear(); + column_hll_verify->clear(); + column_quantile_state_verify->clear(); + std::vector<uint32_t> indices; + for (int i = 0; i < rows; ++i) { + indices.push_back(i); + } + column_bitmap_verify->insert_indices_from(*column_bitmap, indices.data(), + indices.data() + rows); + column_hll_verify->insert_indices_from(*column_hll, indices.data(), indices.data() + rows); + column_quantile_state_verify->insert_indices_from(*column_quantile_state, indices.data(), + indices.data() + rows); + ASSERT_EQ(rows, column_bitmap_verify->size()); + ASSERT_EQ(rows, column_hll_verify->size()); + ASSERT_EQ(rows, column_quantile_state_verify->size()); + for (int i = 0; i < rows; ++i) { + ASSERT_EQ(column_quantile_state->operator[](i), + column_quantile_state_verify->operator[](i)); + ASSERT_EQ(column_bitmap->operator[](i), column_bitmap_verify->operator[](i)); + ASSERT_EQ(column_hll->operator[](i), column_hll_verify->operator[](i)); + } + std::cout << "12. test more val data value and insert_indices_from success" << std::endl; + + column_bitmap->insert_default(); + column_hll->insert_default(); + column_quantile_state->insert_default(); + column_bitmap_verify->insert_many_from(*column_bitmap, rows, 1); + column_hll_verify->insert_many_from(*column_hll, rows, 1); + column_quantile_state_verify->insert_many_from(*column_quantile_state, rows, 1); + rows = rows + 1; + ASSERT_EQ(rows, column_bitmap_verify->size()); + ASSERT_EQ(rows, column_hll_verify->size()); + ASSERT_EQ(rows, column_quantile_state_verify->size()); + for (int i = 0; i < rows; ++i) { + ASSERT_EQ(column_quantile_state->operator[](i), + column_quantile_state_verify->operator[](i)); + ASSERT_EQ(column_bitmap->operator[](i), column_bitmap_verify->operator[](i)); + ASSERT_EQ(column_hll->operator[](i), column_hll_verify->operator[](i)); + } + std::cout << "13. test more val data value and insert_many_from success" << std::endl; + + column_bitmap->replace_column_data(*column_bitmap_verify, 0, 4); + column_hll->replace_column_data(*column_hll_verify, 0, 4); + column_quantile_state->replace_column_data(*column_quantile_state_verify, 0, 4); + for (int i = 0; i < rows; ++i) { + ASSERT_EQ(column_quantile_state->operator[](i), + column_quantile_state_verify->operator[](i)); + ASSERT_EQ(column_bitmap->operator[](i), column_bitmap_verify->operator[](i)); + ASSERT_EQ(column_hll->operator[](i), column_hll_verify->operator[](i)); + } + std::cout << "14. test more val data value and replace_column_data success" << std::endl; + + vectorized::IColumn::Filter filter; + for (int i = 0; i < rows; i++) { + filter.push_back(i % 2); + } + // filter data 0 1 0 1 0 + ASSERT_EQ(column_bitmap_verify->clone()->filter(filter), 2); + ASSERT_EQ(column_hll_verify->clone()->filter(filter), 2); + ASSERT_EQ(column_quantile_state_verify->clone()->filter(filter), 2); + auto column_filter_res_bitmap = column_bitmap->filter(filter, 0); + auto column_filter_res_hll = column_hll->filter(filter, 0); + auto column_filter_res_quantile_state = column_quantile_state->filter(filter, 0); + + ASSERT_EQ(column_filter_res_bitmap->size(), 2); + ASSERT_EQ(column_filter_res_hll->size(), 2); + ASSERT_EQ(column_filter_res_quantile_state->size(), 2); + for (int i = 0, j = 0; i < rows; ++i) { + if (i % 2) { + ASSERT_EQ(column_quantile_state->operator[](i), + column_filter_res_quantile_state->operator[](j)); + ASSERT_EQ(column_bitmap->operator[](i), column_filter_res_bitmap->operator[](j)); + ASSERT_EQ(column_hll->operator[](i), column_filter_res_hll->operator[](j)); + j++; + } + } + + filter.clear(); + filter.resize_fill(rows, 1); + // filter data 1 1 1 1 1 + ASSERT_EQ(column_bitmap_verify->clone()->filter(filter), rows); + ASSERT_EQ(column_hll_verify->clone()->filter(filter), rows); + ASSERT_EQ(column_quantile_state_verify->clone()->filter(filter), rows); + column_filter_res_bitmap = column_bitmap->filter(filter, 0); + column_filter_res_hll = column_hll->filter(filter, 0); + column_filter_res_quantile_state = column_quantile_state->filter(filter, 0); + ASSERT_EQ(column_filter_res_bitmap->size(), rows); + ASSERT_EQ(column_filter_res_hll->size(), rows); + ASSERT_EQ(column_filter_res_quantile_state->size(), rows); + for (int i = 0, j = 0; i < rows; ++i) { + if (i % 2) { + ASSERT_EQ(column_quantile_state->operator[](i), + column_filter_res_quantile_state->operator[](j)); + ASSERT_EQ(column_bitmap->operator[](i), column_filter_res_bitmap->operator[](j)); + ASSERT_EQ(column_hll->operator[](i), column_filter_res_hll->operator[](j)); + j++; + } + } + + filter.clear(); + filter.resize_fill(rows, 0); + // filter data 0 0 0 0 0 + ASSERT_EQ(column_bitmap_verify->clone()->filter(filter), 0); + ASSERT_EQ(column_hll_verify->clone()->filter(filter), 0); + ASSERT_EQ(column_quantile_state_verify->clone()->filter(filter), 0); + column_filter_res_bitmap = column_bitmap->filter(filter, 0); + column_filter_res_hll = column_hll->filter(filter, 0); + column_filter_res_quantile_state = column_quantile_state->filter(filter, 0); + ASSERT_EQ(column_filter_res_bitmap->size(), 0); + ASSERT_EQ(column_filter_res_hll->size(), 0); + ASSERT_EQ(column_filter_res_quantile_state->size(), 0); + std::cout << "15. test more val data value and filter success" << std::endl; + + IColumn::Permutation perm {4, 3, 2, 1, 0}; + auto column_bitmap_perm = column_bitmap->permute(perm, rows); + auto column_hll_perm = column_hll->permute(perm, rows); + auto column_quantile_state_perm = column_quantile_state->permute(perm, rows); + ASSERT_EQ(column_bitmap_perm->size(), 5); + ASSERT_EQ(column_hll_perm->size(), 5); + ASSERT_EQ(column_quantile_state_perm->size(), 5); + for (int i = 0, j = 4; i < rows; ++i, --j) { + ASSERT_EQ(column_quantile_state->operator[](i), column_quantile_state_perm->operator[](j)); + ASSERT_EQ(column_bitmap->operator[](i), column_bitmap_perm->operator[](j)); + ASSERT_EQ(column_hll->operator[](i), column_hll_perm->operator[](j)); + } + + IColumn::Permutation perm2 {0, 1, 2, 3, 4}; + auto column_bitmap_perm2 = column_bitmap->permute(perm, rows); + auto column_hll_perm2 = column_hll->permute(perm, rows); + auto column_quantile_state_perm2 = column_quantile_state->permute(perm, rows); + ASSERT_EQ(column_bitmap_perm2->size(), 5); + ASSERT_EQ(column_hll_perm2->size(), 5); + ASSERT_EQ(column_quantile_state_perm2->size(), 5); + for (int i = 0; i < rows; ++i) { + ASSERT_EQ(column_quantile_state->operator[](i), column_quantile_state_perm->operator[](i)); + ASSERT_EQ(column_bitmap->operator[](i), column_bitmap_perm->operator[](i)); + ASSERT_EQ(column_hll->operator[](i), column_hll_perm->operator[](i)); + } + + IColumn::Permutation perm3 {4, 2, 0, 1, 3}; + std::vector<int> permute_idx {2, 3, 1, 4, 0}; + auto column_bitmap_perm3 = column_bitmap->permute(perm, rows); + auto column_hll_perm3 = column_hll->permute(perm, rows); + auto column_quantile_state_perm3 = column_quantile_state->permute(perm, rows); + ASSERT_EQ(column_bitmap_perm3->size(), 5); + ASSERT_EQ(column_hll_perm3->size(), 5); + ASSERT_EQ(column_quantile_state_perm3->size(), 5); + for (int i = 0; i < rows; ++i) { + ASSERT_EQ(column_quantile_state->operator[](i), + column_quantile_state_perm3->operator[](permute_idx[i])); + ASSERT_EQ(column_bitmap->operator[](i), column_bitmap_perm3->operator[](permute_idx[i])); + ASSERT_EQ(column_hll->operator[](i), column_hll_perm3->operator[](permute_idx[i])); + } + std::cout << "16. test more val data value and permute success" << std::endl; + + IColumn::Offsets offsets {1, 2, 3, 4, 5}; + auto column_bitmap_replicate = column_bitmap->replicate(offsets); + auto column_hll_replicate = column_hll->replicate(offsets); + auto column_quantile_state_replicate = column_quantile_state->replicate(offsets); + ASSERT_EQ(column_bitmap_replicate->size(), 5); + ASSERT_EQ(column_hll_replicate->size(), 5); + ASSERT_EQ(column_quantile_state_replicate->size(), 5); + for (int i = 0; i < 5; ++i) { + ASSERT_EQ(column_quantile_state->operator[](i), + column_quantile_state_replicate->operator[](i)); + ASSERT_EQ(column_bitmap->operator[](i), column_bitmap_replicate->operator[](i)); + ASSERT_EQ(column_hll->operator[](i), column_hll_replicate->operator[](i)); + } + + IColumn::Offsets offsets2 {2, 4, 6, 8, 10}; + std::vector<int> res_idx {0, 0, 1, 1, 2, 2, 3, 3, 4, 4}; + auto column_bitmap_replicate2 = column_bitmap->replicate(offsets2); + auto column_hll_replicate2 = column_hll->replicate(offsets2); + auto column_quantile_state_replicate2 = column_quantile_state->replicate(offsets2); + ASSERT_EQ(column_bitmap_replicate2->size(), 10); + ASSERT_EQ(column_hll_replicate2->size(), 10); + ASSERT_EQ(column_quantile_state_replicate2->size(), 10); + ASSERT_EQ(column_quantile_state->size(), 5); + for (int i = 0; i < 10; ++i) { + ASSERT_EQ(column_quantile_state->operator[](res_idx[i]), + column_quantile_state_replicate2->operator[](i)); + ASSERT_EQ(column_bitmap->operator[](res_idx[i]), column_bitmap_replicate2->operator[](i)); + ASSERT_EQ(column_hll->operator[](res_idx[i]), column_hll_replicate2->operator[](i)); + } + std::cout << "17. test more val data value and replicate success" << std::endl; +} + class ColumnBitmapTest : public testing::Test { public: virtual void SetUp() override {} --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org