This is an automated email from the ASF dual-hosted git repository. gabriellee pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 8a8d37c271c [refine](column) ColumnArray does not implement the insert data function. (#43926) 8a8d37c271c is described below commit 8a8d37c271c9ba2031ee6dcd024940fb669c04ef Author: Mryange <yanxuech...@selectdb.com> AuthorDate: Mon Nov 25 15:34:47 2024 +0800 [refine](column) ColumnArray does not implement the insert data function. (#43926) ColumnArray cannot determine how many elements it contains from a single block of memory. The original approach where the result of get data at cannot be inserted back into insert data. Therefore, this function is not implemented directly. --- be/src/vec/columns/column.h | 11 ------ be/src/vec/columns/column_array.cpp | 52 ++++----------------------- be/src/vec/columns/column_complex.h | 3 -- be/src/vec/columns/column_const.cpp | 33 ----------------- be/src/vec/columns/column_const.h | 10 ------ be/src/vec/columns/column_decimal.h | 2 -- be/src/vec/columns/column_dictionary.h | 4 --- be/src/vec/columns/column_nullable.h | 6 ---- be/src/vec/columns/column_object.h | 5 --- be/src/vec/columns/column_vector.h | 2 -- be/src/vec/columns/predicate_column.h | 3 -- be/test/vec/columns/column_hash_func_test.cpp | 6 ++-- 12 files changed, 9 insertions(+), 128 deletions(-) diff --git a/be/src/vec/columns/column.h b/be/src/vec/columns/column.h index 19462b182bd..96408579a84 100644 --- a/be/src/vec/columns/column.h +++ b/be/src/vec/columns/column.h @@ -601,23 +601,12 @@ public: * To avoid confusion between these cases, we don't have isContiguous method. */ - /// Values in column are represented as continuous memory segment of fixed size. Implies values_have_fixed_size. - virtual bool is_fixed_and_contiguous() const { return false; } - - /// If is_fixed_and_contiguous, returns the underlying data array, otherwise throws an exception. virtual StringRef get_raw_data() const { throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, "Column {} is not a contiguous block of memory", get_name()); return StringRef {}; } - /// If values_have_fixed_size, returns size of value, otherwise throw an exception. - virtual size_t size_of_value_if_fixed() const { - throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, - "Values of column {} are not fixed size.", get_name()); - return 0; - } - /// Returns ratio of values in column, that are equal to default value of column. /// Checks only @sample_ratio ratio of rows. virtual double get_ratio_of_default_rows(double sample_ratio = 1.0) const { return 0.0; } diff --git a/be/src/vec/columns/column_array.cpp b/be/src/vec/columns/column_array.cpp index bd4464e2caf..0c5a53cdb24 100644 --- a/be/src/vec/columns/column_array.cpp +++ b/be/src/vec/columns/column_array.cpp @@ -151,26 +151,13 @@ void ColumnArray::get(size_t n, Field& res) const { } StringRef ColumnArray::get_data_at(size_t n) const { - /** Returns the range of memory that covers all elements of the array. - * Works for arrays of fixed length values. - * For arrays of strings and arrays of arrays, the resulting chunk of memory may not be one-to-one correspondence with the elements, - * since it contains only the data laid in succession, but not the offsets. - */ - size_t offset_of_first_elem = offset_at(n); - StringRef first; - if (offset_of_first_elem < get_data().size()) { - first = get_data().get_data_at(offset_of_first_elem); - } - - size_t array_size = size_at(n); - if (array_size == 0) { - return StringRef(first.data, 0); - } - - size_t offset_of_last_elem = offset_at(n + 1) - 1; - StringRef last = get_data().get_data_at(offset_of_last_elem); + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "Method get_data_at is not supported for " + get_name()); +} - return StringRef(first.data, last.data + last.size - first.data); +void ColumnArray::insert_data(const char* pos, size_t length) { + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "Method insert_data is not supported for " + get_name()); } bool ColumnArray::is_default_at(size_t n) const { @@ -178,33 +165,6 @@ bool ColumnArray::is_default_at(size_t n) const { return offsets_data[n] == offsets_data[static_cast<ssize_t>(n) - 1]; } -void ColumnArray::insert_data(const char* pos, size_t length) { - /** Similarly - only for arrays of fixed length values. - */ - if (!data->is_fixed_and_contiguous()) { - throw doris::Exception(ErrorCode::INTERNAL_ERROR, - "Method insert_data should have_fixed_size, {} is not suitable", - get_name()); - } - - size_t field_size = data->size_of_value_if_fixed(); - - size_t elems = 0; - - if (length) { - const char* end = pos + length; - for (; pos + field_size <= end; pos += field_size, ++elems) - data->insert_data(pos, field_size); - - if (pos != end) - throw doris::Exception(ErrorCode::INTERNAL_ERROR, - "Incorrect length argument for method ColumnArray::insert_data"); - __builtin_unreachable(); - } - - get_offsets().push_back(get_offsets().back() + elems); -} - StringRef ColumnArray::serialize_value_into_arena(size_t n, Arena& arena, char const*& begin) const { size_t array_size = size_at(n); diff --git a/be/src/vec/columns/column_complex.h b/be/src/vec/columns/column_complex.h index 24b6b7ddbd7..14ae940c9d7 100644 --- a/be/src/vec/columns/column_complex.h +++ b/be/src/vec/columns/column_complex.h @@ -207,9 +207,6 @@ public: // TODO add hash function } - bool is_fixed_and_contiguous() const override { return true; } - size_t size_of_value_if_fixed() const override { return sizeof(T); } - StringRef get_raw_data() const override { return StringRef(reinterpret_cast<const char*>(data.data()), data.size()); } diff --git a/be/src/vec/columns/column_const.cpp b/be/src/vec/columns/column_const.cpp index a4b3127ad6c..f751f1d8d3e 100644 --- a/be/src/vec/columns/column_const.cpp +++ b/be/src/vec/columns/column_const.cpp @@ -110,39 +110,6 @@ ColumnPtr ColumnConst::permute(const Permutation& perm, size_t limit) const { return ColumnConst::create(data, limit); } -void ColumnConst::update_crcs_with_value(uint32_t* __restrict hashes, doris::PrimitiveType type, - uint32_t rows, uint32_t offset, - const uint8_t* __restrict null_data) const { - DCHECK(null_data == nullptr); - DCHECK(rows == size()); - auto real_data = data->get_data_at(0); - if (real_data.data == nullptr) { - for (int i = 0; i < rows; ++i) { - hashes[i] = HashUtil::zlib_crc_hash_null(hashes[i]); - } - } else { - for (int i = 0; i < rows; ++i) { - hashes[i] = RawValue::zlib_crc32(real_data.data, real_data.size, type, hashes[i]); - } - } -} - -void ColumnConst::update_hashes_with_value(uint64_t* __restrict hashes, - const uint8_t* __restrict null_data) const { - DCHECK(null_data == nullptr); - auto real_data = data->get_data_at(0); - auto real_size = size(); - if (real_data.data == nullptr) { - for (int i = 0; i < real_size; ++i) { - hashes[i] = HashUtil::xxHash64NullWithSeed(hashes[i]); - } - } else { - for (int i = 0; i < real_size; ++i) { - hashes[i] = HashUtil::xxHash64WithSeed(real_data.data, real_data.size, hashes[i]); - } - } -} - void ColumnConst::get_permutation(bool /*reverse*/, size_t /*limit*/, int /*nan_direction_hint*/, Permutation& res) const { res.resize(s); diff --git a/be/src/vec/columns/column_const.h b/be/src/vec/columns/column_const.h index 980d9d64148..ee3860f0635 100644 --- a/be/src/vec/columns/column_const.h +++ b/be/src/vec/columns/column_const.h @@ -208,14 +208,6 @@ public: data->update_hash_with_value(0, hash); } - // (TODO.Amory) here may not use column_const update hash, and PrimitiveType is not used. - void update_crcs_with_value(uint32_t* __restrict hashes, PrimitiveType type, uint32_t rows, - uint32_t offset = 0, - const uint8_t* __restrict null_data = nullptr) const override; - - void update_hashes_with_value(uint64_t* __restrict hashes, - const uint8_t* __restrict null_data) const override; - ColumnPtr filter(const Filter& filt, ssize_t result_size_hint) const override; size_t filter(const Filter& filter) override; @@ -263,8 +255,6 @@ public: bool is_concrete_nullable() const override { return is_column_nullable(*data); } bool only_null() const override { return data->is_null_at(0); } bool is_numeric() const override { return data->is_numeric(); } - bool is_fixed_and_contiguous() const override { return data->is_fixed_and_contiguous(); } - size_t size_of_value_if_fixed() const override { return data->size_of_value_if_fixed(); } StringRef get_raw_data() const override { return data->get_raw_data(); } /// Not part of the common interface. diff --git a/be/src/vec/columns/column_decimal.h b/be/src/vec/columns/column_decimal.h index d754831cc56..4c2f69d5ef3 100644 --- a/be/src/vec/columns/column_decimal.h +++ b/be/src/vec/columns/column_decimal.h @@ -106,8 +106,6 @@ public: bool is_numeric() const override { return false; } bool is_column_decimal() const override { return true; } - bool is_fixed_and_contiguous() const override { return true; } - size_t size_of_value_if_fixed() const override { return sizeof(T); } size_t size() const override { return data.size(); } size_t byte_size() const override { return data.size() * sizeof(data[0]); } diff --git a/be/src/vec/columns/column_dictionary.h b/be/src/vec/columns/column_dictionary.h index 69e04973af7..ae7d001a31d 100644 --- a/be/src/vec/columns/column_dictionary.h +++ b/be/src/vec/columns/column_dictionary.h @@ -158,10 +158,6 @@ public: __builtin_unreachable(); } - bool is_fixed_and_contiguous() const override { return true; } - - size_t size_of_value_if_fixed() const override { return sizeof(T); } - [[noreturn]] StringRef get_raw_data() const override { throw doris::Exception(ErrorCode::INTERNAL_ERROR, "get_raw_data not supported in ColumnDictionary"); diff --git a/be/src/vec/columns/column_nullable.h b/be/src/vec/columns/column_nullable.h index 2b87aa982ca..252144fbc5f 100644 --- a/be/src/vec/columns/column_nullable.h +++ b/be/src/vec/columns/column_nullable.h @@ -334,18 +334,12 @@ public: bool is_column_array() const override { return get_nested_column().is_column_array(); } bool is_column_map() const override { return get_nested_column().is_column_map(); } bool is_column_struct() const override { return get_nested_column().is_column_struct(); } - bool is_fixed_and_contiguous() const override { return false; } bool is_exclusive() const override { return IColumn::is_exclusive() && nested_column->is_exclusive() && get_null_map_column().is_exclusive(); } - size_t size_of_value_if_fixed() const override { - return get_null_map_column().size_of_value_if_fixed() + - nested_column->size_of_value_if_fixed(); - } - bool only_null() const override { return size() == 1 && is_null_at(0); } // used in schema change diff --git a/be/src/vec/columns/column_object.h b/be/src/vec/columns/column_object.h index 1c8f38056c9..21bb4469115 100644 --- a/be/src/vec/columns/column_object.h +++ b/be/src/vec/columns/column_object.h @@ -525,11 +525,6 @@ public: throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, "get_raw_data" + get_name()); } - size_t size_of_value_if_fixed() const override { - throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, - "size_of_value_if_fixed" + get_name()); - } - StringRef get_data_at(size_t) const override { throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, "get_data_at" + get_name()); } diff --git a/be/src/vec/columns/column_vector.h b/be/src/vec/columns/column_vector.h index 2676d6d3444..2cb320b6992 100644 --- a/be/src/vec/columns/column_vector.h +++ b/be/src/vec/columns/column_vector.h @@ -373,8 +373,6 @@ public: ColumnPtr replicate(const IColumn::Offsets& offsets) const override; - bool is_fixed_and_contiguous() const override { return true; } - size_t size_of_value_if_fixed() const override { return sizeof(T); } StringRef get_raw_data() const override { return StringRef(reinterpret_cast<const char*>(data.data()), data.size()); } diff --git a/be/src/vec/columns/predicate_column.h b/be/src/vec/columns/predicate_column.h index c2c6456d862..7e15656fe1d 100644 --- a/be/src/vec/columns/predicate_column.h +++ b/be/src/vec/columns/predicate_column.h @@ -376,9 +376,6 @@ public: __builtin_unreachable(); } - bool is_fixed_and_contiguous() const override { return true; } - size_t size_of_value_if_fixed() const override { return sizeof(T); } - [[noreturn]] StringRef get_raw_data() const override { throw doris::Exception(ErrorCode::INTERNAL_ERROR, "get_raw_data not supported in PredicateColumnType"); diff --git a/be/test/vec/columns/column_hash_func_test.cpp b/be/test/vec/columns/column_hash_func_test.cpp index c49f1e0a578..4db279b6bb2 100644 --- a/be/test/vec/columns/column_hash_func_test.cpp +++ b/be/test/vec/columns/column_hash_func_test.cpp @@ -71,11 +71,11 @@ TEST(HashFuncTest, ArrayTypeTest) { DataTypePtr a = std::make_shared<DataTypeArray>(d); ColumnPtr col_a = a->create_column_const_with_default_value(1); // xxHash - EXPECT_NO_FATAL_FAILURE(col_a->update_hashes_with_value(xx_hashes)); + EXPECT_NO_FATAL_FAILURE(unpack_if_const(col_a).first->update_hashes_with_value(xx_hashes)); std::cout << xx_hashes[0] << std::endl; // crcHash - EXPECT_NO_FATAL_FAILURE( - col_a->update_crcs_with_value(crc_hashes, PrimitiveType::TYPE_ARRAY, 1)); + EXPECT_NO_FATAL_FAILURE(unpack_if_const(col_a).first->update_crcs_with_value( + crc_hashes, PrimitiveType::TYPE_ARRAY, 1)); std::cout << crc_hashes[0] << std::endl; } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org