amorynan commented on code in PR #42269: URL: https://github.com/apache/doris/pull/42269#discussion_r1870963099
########## be/test/vec/columns/common_column_test.h: ########## @@ -0,0 +1,766 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include <gtest/gtest-message.h> +#include <gtest/gtest-test-part.h> +#include <gtest/gtest.h> + +#include "olap/schema.h" +#include "vec/columns/column.h" +#include "vec/columns/column_array.h" +#include "vec/columns/column_map.h" +#include "vec/columns/columns_number.h" +#include "vec/core/field.h" +#include "vec/core/sort_block.h" +#include "vec/core/sort_description.h" +#include "vec/core/types.h" +#include "vec/data_types/data_type.h" +#include "vec/data_types/data_type_array.h" +#include "vec/data_types/data_type_map.h" + +// this test is gonna to be a column test template for all column which should make ut test to coverage the function defined in column +// for example column_array should test this function: +// size, reserve, resize, empty, byte_size, allocated_bytes, clone_resized, +// get_shrinked_column, filter, filter_by_selector, serialize_vec, deserialize_vec, get_max_row_byte_size +// +namespace doris::vectorized { + +class CommonColumnTest : public ::testing::Test { +public: + void SetUp() override { + col_str = ColumnString::create(); + col_str->insert_data("aaa", 3); + col_str->insert_data("bb", 2); + col_str->insert_data("cccc", 4); + + col_int = ColumnInt64::create(); + col_int->insert_value(1); + col_int->insert_value(2); + col_int->insert_value(3); + + col_dcm = ColumnDecimal64::create(0, 3); + col_dcm->insert_value(1.23); + col_dcm->insert_value(4.56); + col_dcm->insert_value(7.89); + + col_arr = ColumnArray::create(ColumnInt64::create(), ColumnArray::ColumnOffsets::create()); + Array array1 = {1, 2, 3}; + Array array2 = {4}; + col_arr->insert(array1); + col_arr->insert(Array()); + col_arr->insert(array2); + + col_map = ColumnMap::create(ColumnString::create(), ColumnInt64::create(), + ColumnArray::ColumnOffsets::create()); + Array k1 = {"a", "b", "c"}; + Array v1 = {1, 2, 3}; + Array k2 = {"d"}; + Array v2 = {4}; + Array a = Array(); + Map map1, map2, map3; + map1.push_back(k1); + map1.push_back(v1); + col_map->insert(map1); + map3.push_back(a); + map3.push_back(a); + col_map->insert(map3); + map2.push_back(k2); + map2.push_back(v2); + col_map->insert(map2); + } + + ColumnString::MutablePtr col_str; + ColumnInt64::MutablePtr col_int; + ColumnDecimal64::MutablePtr col_dcm; + ColumnArray::MutablePtr col_arr; + ColumnMap::MutablePtr col_map; + + void checkColumn(const IColumn& col1, const IColumn& col2, const IDataType& dataType, + size_t column_size) { + if (WhichDataType(dataType).is_map()) { + auto map1 = check_and_get_column<ColumnMap>(col1); + auto map2 = check_and_get_column<ColumnMap>(col2); + const DataTypeMap& rhs_map = static_cast<const DataTypeMap&>(dataType); + checkColumn(map1->get_keys(), map2->get_keys(), *rhs_map.get_key_type(), + map1->get_keys().size()); + checkColumn(map2->get_values(), map2->get_values(), *rhs_map.get_value_type(), + map1->get_values().size()); + } else { + if (WhichDataType(dataType).is_int8()) { + auto c1 = check_and_get_column<ColumnInt8>(col1); + auto c2 = check_and_get_column<ColumnInt8>(col2); + for (size_t i = 0; i < column_size; ++i) { + EXPECT_EQ(c1->get_element(i), c2->get_element(i)); + } + } else if (WhichDataType(dataType).is_int16()) { + auto c1 = check_and_get_column<ColumnInt16>(col1); + auto c2 = check_and_get_column<ColumnInt16>(col2); + for (size_t i = 0; i < column_size; ++i) { + EXPECT_EQ(c1->get_element(i), c2->get_element(i)); + } + } else if (WhichDataType(dataType).is_int32()) { + auto c1 = check_and_get_column<ColumnInt32>(col1); + auto c2 = check_and_get_column<ColumnInt32>(col2); + for (size_t i = 0; i < column_size; ++i) { + EXPECT_EQ(c1->get_element(i), c2->get_element(i)); + } + } else if (WhichDataType(dataType).is_int64()) { + auto c1 = check_and_get_column<ColumnInt64>(col1); + auto c2 = check_and_get_column<ColumnInt64>(col2); + for (size_t i = 0; i < column_size; ++i) { + EXPECT_EQ(c1->get_element(i), c2->get_element(i)); + } + } else if (WhichDataType(dataType).is_int128()) { + auto c1 = check_and_get_column<ColumnInt128>(col1); + auto c2 = check_and_get_column<ColumnInt128>(col2); + for (size_t i = 0; i < column_size; ++i) { + EXPECT_EQ(c1->get_element(i), c2->get_element(i)); + } + } else if (WhichDataType(dataType).is_float32()) { + auto c1 = check_and_get_column<ColumnFloat32>(col1); + auto c2 = check_and_get_column<ColumnFloat32>(col2); + for (size_t i = 0; i < column_size; ++i) { + EXPECT_EQ(c1->get_element(i), c2->get_element(i)); + } + } else if (WhichDataType(dataType).is_float64()) { + auto c1 = check_and_get_column<ColumnFloat64>(col1); + auto c2 = check_and_get_column<ColumnFloat64>(col2); + for (size_t i = 0; i < column_size; ++i) { + EXPECT_EQ(c1->get_element(i), c2->get_element(i)); + } + } else if (WhichDataType(dataType).is_uint8()) { + auto c1 = check_and_get_column<ColumnUInt8>(col1); + auto c2 = check_and_get_column<ColumnUInt8>(col2); + for (size_t i = 0; i < column_size; ++i) { + EXPECT_EQ(c1->get_element(i), c2->get_element(i)); + } + } else if (WhichDataType(dataType).is_uint16()) { + auto c1 = check_and_get_column<ColumnUInt16>(col1); + auto c2 = check_and_get_column<ColumnUInt16>(col2); + for (size_t i = 0; i < column_size; ++i) { + EXPECT_EQ(c1->get_element(i), c2->get_element(i)); + } + } else if (WhichDataType(dataType).is_uint32()) { + auto c1 = check_and_get_column<ColumnUInt32>(col1); + auto c2 = check_and_get_column<ColumnUInt32>(col2); + for (size_t i = 0; i < column_size; ++i) { + EXPECT_EQ(c1->get_element(i), c2->get_element(i)); + } + } else if (WhichDataType(dataType).is_uint64()) { + auto c1 = check_and_get_column<ColumnUInt64>(col1); + auto c2 = check_and_get_column<ColumnUInt64>(col2); + for (size_t i = 0; i < column_size; ++i) { + EXPECT_EQ(c1->get_element(i), c2->get_element(i)); + } + } else if (WhichDataType(dataType).is_decimal()) { + auto c1 = check_and_get_column<ColumnDecimal64>(col1); + auto c2 = check_and_get_column<ColumnDecimal64>(col2); + for (size_t i = 0; i < column_size; ++i) { + EXPECT_EQ(c1->get_element(i), c2->get_element(i)); + } + } else if (WhichDataType(dataType).is_decimal32()) { + auto c1 = check_and_get_column<ColumnDecimal32>(col1); + auto c2 = check_and_get_column<ColumnDecimal32>(col2); + for (size_t i = 0; i < column_size; ++i) { + EXPECT_EQ(c1->get_element(i), c2->get_element(i)); + } + } else if (WhichDataType(dataType).is_decimal64()) { + auto c1 = check_and_get_column<ColumnDecimal64>(col1); + auto c2 = check_and_get_column<ColumnDecimal64>(col2); + for (size_t i = 0; i < column_size; ++i) { + EXPECT_EQ(c1->get_element(i), c2->get_element(i)); + } + } else if (WhichDataType(dataType).is_decimal128v2()) { + auto c1 = check_and_get_column<ColumnDecimal128V2>(col1); + auto c2 = check_and_get_column<ColumnDecimal128V2>(col2); + for (size_t i = 0; i < column_size; ++i) { + EXPECT_EQ(c1->get_element(i), c2->get_element(i)); + } + } else if (WhichDataType(dataType).is_decimal128v3()) { + auto c1 = check_and_get_column<ColumnDecimal128V3>(col1); + auto c2 = check_and_get_column<ColumnDecimal128V3>(col2); + for (size_t i = 0; i < column_size; ++i) { + EXPECT_EQ(c1->get_element(i), c2->get_element(i)); + } + } else if (WhichDataType(dataType).is_decimal256()) { + auto c1 = check_and_get_column<ColumnDecimal<Decimal256>>(col1); + auto c2 = check_and_get_column<ColumnDecimal<Decimal256>>(col1); + for (size_t i = 0; i < column_size; ++i) { + EXPECT_EQ(c1->get_element(i), c2->get_element(i)); + } + } else { + for (size_t i = 0; i < column_size; ++i) { + EXPECT_EQ(col1.get_data_at(i), col2.get_data_at(i)); + } + } + } + } + + void printColumn(const IColumn& column, const IDataType& dataType) { + std::cout << "column total size: " << column.size() << std::endl; + if (WhichDataType(dataType).is_map()) { + auto map = check_and_get_column<ColumnMap>(column); + std::cout << "map {keys, values}" << std::endl; + const DataTypeMap& rhs_map = static_cast<const DataTypeMap&>(dataType); + printColumn(map->get_keys(), *rhs_map.get_key_type()); + printColumn(map->get_values(), *rhs_map.get_value_type()); + } else if (WhichDataType(dataType).is_array()) { + auto array = check_and_get_column<ColumnArray>(column); + std::cout << "array: " << std::endl; + const auto& rhs_array = static_cast<const DataTypeArray&>(dataType); + printColumn(array->get_data(), *rhs_array.get_nested_type()); + } else { + size_t column_size = column.size(); + std::cout << column.get_name() << ": " << std::endl; + if (WhichDataType(dataType).is_int8()) { + auto col = check_and_get_column<ColumnInt8>(column); + for (size_t i = 0; i < column_size; ++i) { + std::cout << col->get_element(i) << " "; + } + } else if (WhichDataType(dataType).is_int16()) { + auto col = check_and_get_column<ColumnInt16>(column); + for (size_t i = 0; i < column_size; ++i) { + std::cout << col->get_element(i) << " "; + } + } else if (WhichDataType(dataType).is_int32()) { + auto col = check_and_get_column<ColumnInt32>(column); + for (size_t i = 0; i < column_size; ++i) { + std::cout << col->get_element(i) << " "; + } + } else if (WhichDataType(dataType).is_int64()) { + auto col = check_and_get_column<ColumnInt64>(column); + for (size_t i = 0; i < column_size; ++i) { + std::cout << col->get_element(i) << " "; + } + } else if (WhichDataType(dataType).is_int128()) { + auto col = check_and_get_column<ColumnInt128>(column); + for (size_t i = 0; i < column_size; ++i) { + std::cout << col->get_element(i) << " "; + } + } else if (WhichDataType(dataType).is_float32()) { + auto col = check_and_get_column<ColumnFloat32>(column); + for (size_t i = 0; i < column_size; ++i) { + std::cout << col->get_element(i) << " "; + } + } else if (WhichDataType(dataType).is_float64()) { + auto col = check_and_get_column<ColumnFloat64>(column); + for (size_t i = 0; i < column_size; ++i) { + std::cout << col->get_element(i) << " "; + } + } else if (WhichDataType(dataType).is_uint8()) { + auto col = check_and_get_column<ColumnUInt8>(column); + for (size_t i = 0; i < column_size; ++i) { + std::cout << col->get_element(i) << " "; + } + } else if (WhichDataType(dataType).is_uint16()) { + auto col = check_and_get_column<ColumnUInt16>(column); + for (size_t i = 0; i < column_size; ++i) { + std::cout << col->get_element(i) << " "; + } + } else if (WhichDataType(dataType).is_uint32()) { + auto col = check_and_get_column<ColumnUInt32>(column); + for (size_t i = 0; i < column_size; ++i) { + std::cout << col->get_element(i) << " "; + } + } else if (WhichDataType(dataType).is_uint64()) { + auto col = check_and_get_column<ColumnUInt64>(column); + for (size_t i = 0; i < column_size; ++i) { + std::cout << col->get_element(i) << " "; + } + } else if (WhichDataType(dataType).is_uint128()) { + auto col = check_and_get_column<ColumnUInt128>(column); + for (size_t i = 0; i < column_size; ++i) { + std::cout << col->get_data_at(i) << " "; + } + } else if (WhichDataType(dataType).is_decimal()) { + auto col = check_and_get_column<ColumnDecimal64>(column); + for (size_t i = 0; i < column_size; ++i) { + std::cout << col->get_element(i) << " "; + } + } else if (WhichDataType(dataType).is_decimal32()) { + auto col = check_and_get_column<ColumnDecimal32>(column); + for (size_t i = 0; i < column_size; ++i) { + std::cout << col->get_element(i) << " "; + } + } else if (WhichDataType(dataType).is_decimal64()) { + auto col = check_and_get_column<ColumnDecimal64>(column); + for (size_t i = 0; i < column_size; ++i) { + std::cout << col->get_element(i) << " "; + } + } else if (WhichDataType(dataType).is_decimal128v2()) { + auto col = check_and_get_column<ColumnDecimal128V2>(column); + for (size_t i = 0; i < column_size; ++i) { + std::cout << col->get_element(i) << " "; + } + } else if (WhichDataType(dataType).is_decimal128v3()) { + auto col = check_and_get_column<ColumnDecimal128V3>(column); + for (size_t i = 0; i < column_size; ++i) { + std::cout << col->get_element(i) << " "; + } + } else if (WhichDataType(dataType).is_decimal256()) { + auto col = check_and_get_column<ColumnDecimal<Decimal256>>(column); + for (size_t i = 0; i < column_size; ++i) { + std::cout << col->get_element(i) << " "; + } + } else if (WhichDataType(dataType).is_date()) { + auto col = check_and_get_column<ColumnDate>(column); + for (size_t i = 0; i < column_size; ++i) { + std::cout << col->get_data_at(i) << " "; + } + } else if (WhichDataType(dataType).is_date_time()) { + auto col = check_and_get_column<ColumnDateTime>(column); + for (size_t i = 0; i < column_size; ++i) { + std::cout << col->get_data_at(i) << " "; + } + } else if (WhichDataType(dataType).is_date_v2()) { + auto col = check_and_get_column<ColumnDateV2>(column); + for (size_t i = 0; i < column_size; ++i) { + std::cout << col->get_data_at(i) << " "; + } + } else if (WhichDataType(dataType).is_date_time_v2()) { + auto col = check_and_get_column<ColumnDateTimeV2>(column); + for (size_t i = 0; i < column_size; ++i) { + std::cout << col->get_data_at(i) << " "; + } + } else { + std::cout << "data type: " << dataType.get_name() << std::endl; + std::cout << "column type: " << column.get_name() << std::endl; + for (size_t i = 0; i < column_size; ++i) { + std::cout << column.get_data_at(i).to_string() << " "; + } + } + std::cout << std::endl; + } + } + // column size changed calculation: + // size, reserve, resize, empty, byte_size, allocated_bytes, clone_resized, get_shrinked_column + // cut(LIMIT operation), shrink + void sizeAssert(MutableColumnPtr col, size_t expect_size) { + EXPECT_EQ(col->size(), expect_size); + } + + // empty just use size() == 0 to impl as default behavior + void emptyAssert(MutableColumnPtr col) { EXPECT_EQ(col->size(), 0); } + + // reserve, resize, byte_size, allocated_bytes, clone_resized, get_shrinked_column + void reserveAssert(MutableColumnPtr col, size_t expect_size) { + col->reserve(expect_size); + EXPECT_EQ(col->allocated_bytes(), expect_size); + } + + // cut(LIMIT operation) will cut the column with the given from and to, and return the new column + // notice return column is clone from origin column + void cutAssert(MutableColumnPtr col, size_t from, size_t to, size_t expect_size) { + auto ori = col->size(); + auto ptr = col->cut(from, to); + EXPECT_EQ(ptr->size(), expect_size); Review Comment: done -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org