This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 92e1f687e40 [chore](columns) remove update_hashes_with_value for SipHash (#31224) 92e1f687e40 is described below commit 92e1f687e40d64775cb13565d976f10961d3e4bf Author: Uniqueyou <134280716+wyxxx...@users.noreply.github.com> AuthorDate: Wed Feb 21 23:56:50 2024 +0800 [chore](columns) remove update_hashes_with_value for SipHash (#31224) --- be/src/vec/columns/column.h | 22 ------------------ be/src/vec/columns/column_array.cpp | 5 ---- be/src/vec/columns/column_array.h | 3 --- be/src/vec/columns/column_complex.h | 11 ++------- be/src/vec/columns/column_const.cpp | 16 ------------- be/src/vec/columns/column_const.h | 3 --- be/src/vec/columns/column_decimal.cpp | 6 ----- be/src/vec/columns/column_decimal.h | 2 -- be/src/vec/columns/column_map.cpp | 5 ---- be/src/vec/columns/column_map.h | 3 --- be/src/vec/columns/column_nullable.cpp | 19 --------------- be/src/vec/columns/column_nullable.h | 2 -- be/src/vec/columns/column_string.h | 5 ---- be/src/vec/columns/column_struct.cpp | 5 ---- be/src/vec/columns/column_struct.h | 3 --- be/src/vec/columns/column_vector.cpp | 6 ----- be/src/vec/columns/column_vector.h | 3 --- be/test/vec/columns/column_hash_func_test.cpp | 33 --------------------------- 18 files changed, 2 insertions(+), 150 deletions(-) diff --git a/be/src/vec/columns/column.h b/be/src/vec/columns/column.h index 322456a8f77..bf869961544 100644 --- a/be/src/vec/columns/column.h +++ b/be/src/vec/columns/column.h @@ -46,19 +46,6 @@ class SipHash; -#define SIP_HASHES_FUNCTION_COLUMN_IMPL() \ - auto s = hashes.size(); \ - DCHECK(s == size()); \ - if (null_data == nullptr) { \ - for (size_t i = 0; i < s; i++) { \ - update_hash_with_value(i, hashes[i]); \ - } \ - } else { \ - for (size_t i = 0; i < s; i++) { \ - if (null_data[i] == 0) update_hash_with_value(i, hashes[i]); \ - } \ - } - #define DO_CRC_HASHES_FUNCTION_COLUMN_IMPL() \ if (null_data == nullptr) { \ for (size_t i = 0; i < s; i++) { \ @@ -362,15 +349,6 @@ public: LOG(FATAL) << get_name() << " update_hash_with_value siphash not supported"; } - /// Update state of hash function with value of n elements to avoid the virtual function call - /// null_data to mark whether need to do hash compute, null_data == nullptr - /// means all element need to do hash function, else only *null_data != 0 need to do hash func - /// do xxHash here, faster than other hash method - virtual void update_hashes_with_value(std::vector<SipHash>& hashes, - const uint8_t* __restrict null_data = nullptr) const { - LOG(FATAL) << get_name() << " update_hashes_with_value siphash not supported"; - } - /// Update state of hash function with value of n elements to avoid the virtual function call /// null_data to mark whether need to do hash compute, null_data == nullptr /// means all element need to do hash function, else only *null_data != 0 need to do hash func diff --git a/be/src/vec/columns/column_array.cpp b/be/src/vec/columns/column_array.cpp index 6d2914d8053..9abf13b9f3f 100644 --- a/be/src/vec/columns/column_array.cpp +++ b/be/src/vec/columns/column_array.cpp @@ -291,11 +291,6 @@ void ColumnArray::update_hash_with_value(size_t n, SipHash& hash) const { for (size_t i = 0; i < array_size; ++i) get_data().update_hash_with_value(offset + i, hash); } -void ColumnArray::update_hashes_with_value(std::vector<SipHash>& hashes, - const uint8_t* __restrict null_data) const { - SIP_HASHES_FUNCTION_COLUMN_IMPL(); -} - // for every array row calculate xxHash void ColumnArray::update_xxHash_with_value(size_t start, size_t end, uint64_t& hash, const uint8_t* __restrict null_data) const { diff --git a/be/src/vec/columns/column_array.h b/be/src/vec/columns/column_array.h index 046bc22ac48..17408bfa633 100644 --- a/be/src/vec/columns/column_array.h +++ b/be/src/vec/columns/column_array.h @@ -142,9 +142,6 @@ public: void update_crc_with_value(size_t start, size_t end, uint32_t& hash, const uint8_t* __restrict null_data) const override; - void update_hashes_with_value(std::vector<SipHash>& hashes, - const uint8_t* __restrict null_data) const override; - void update_hashes_with_value(uint64_t* __restrict hashes, const uint8_t* __restrict null_data = nullptr) const override; diff --git a/be/src/vec/columns/column_complex.h b/be/src/vec/columns/column_complex.h index 8e9686cb2a5..70d32da0b9b 100644 --- a/be/src/vec/columns/column_complex.h +++ b/be/src/vec/columns/column_complex.h @@ -220,15 +220,8 @@ public: // TODO add hash function } - virtual void update_hashes_with_value( - std::vector<SipHash>& hashes, - const uint8_t* __restrict null_data = nullptr) const override { - // TODO add hash function - } - - virtual void update_hashes_with_value( - uint64_t* __restrict hashes, - const uint8_t* __restrict null_data = nullptr) const override { + void update_hashes_with_value(uint64_t* __restrict hashes, + const uint8_t* __restrict null_data = nullptr) const override { // TODO add hash function } diff --git a/be/src/vec/columns/column_const.cpp b/be/src/vec/columns/column_const.cpp index f7efec1f72a..e06e53b4289 100644 --- a/be/src/vec/columns/column_const.cpp +++ b/be/src/vec/columns/column_const.cpp @@ -93,22 +93,6 @@ ColumnPtr ColumnConst::permute(const Permutation& perm, size_t limit) const { return ColumnConst::create(data, limit); } -void ColumnConst::update_hashes_with_value(std::vector<SipHash>& hashes, - const uint8_t* __restrict null_data) const { - DCHECK(null_data == nullptr); - DCHECK(hashes.size() == size()); - auto real_data = data->get_data_at(0); - if (real_data.data == nullptr) { - for (auto& hash : hashes) { - hash.update(0); - } - } else { - for (auto& hash : hashes) { - hash.update(real_data.data, real_data.size); - } - } -} - void ColumnConst::update_crcs_with_value(uint32_t* __restrict hashes, doris::PrimitiveType type, uint32_t rows, uint32_t offset, const uint8_t* __restrict null_data) const { diff --git a/be/src/vec/columns/column_const.h b/be/src/vec/columns/column_const.h index 5498fbf7c20..746cb00fd5d 100644 --- a/be/src/vec/columns/column_const.h +++ b/be/src/vec/columns/column_const.h @@ -177,9 +177,6 @@ public: data->update_hash_with_value(0, hash); } - void update_hashes_with_value(std::vector<SipHash>& hashes, - const uint8_t* __restrict null_data) const override; - // (TODO.Amory) here may not use column_const update hash, and PrimitiveType is not used. void update_crcs_with_value(uint32_t* __restrict hashes, PrimitiveType type, uint32_t rows, uint32_t offset = 0, diff --git a/be/src/vec/columns/column_decimal.cpp b/be/src/vec/columns/column_decimal.cpp index 95b247fc668..be81a7ad4cb 100644 --- a/be/src/vec/columns/column_decimal.cpp +++ b/be/src/vec/columns/column_decimal.cpp @@ -131,12 +131,6 @@ void ColumnDecimal<T>::update_hash_with_value(size_t n, SipHash& hash) const { hash.update(data[n]); } -template <typename T> -void ColumnDecimal<T>::update_hashes_with_value(std::vector<SipHash>& hashes, - const uint8_t* __restrict null_data) const { - SIP_HASHES_FUNCTION_COLUMN_IMPL(); -} - template <typename T> void ColumnDecimal<T>::update_crc_with_value(size_t start, size_t end, uint32_t& hash, const uint8_t* __restrict null_data) const { diff --git a/be/src/vec/columns/column_decimal.h b/be/src/vec/columns/column_decimal.h index 49b58ebaa4f..920f6ad8438 100644 --- a/be/src/vec/columns/column_decimal.h +++ b/be/src/vec/columns/column_decimal.h @@ -176,8 +176,6 @@ public: const uint8_t* null_map) override; void update_hash_with_value(size_t n, SipHash& hash) const override; - void update_hashes_with_value(std::vector<SipHash>& hashes, - const uint8_t* __restrict null_data) const override; void update_hashes_with_value(uint64_t* __restrict hashes, const uint8_t* __restrict null_data) const override; void update_crcs_with_value(uint32_t* __restrict hashes, PrimitiveType type, uint32_t rows, diff --git a/be/src/vec/columns/column_map.cpp b/be/src/vec/columns/column_map.cpp index dcfcdb46d70..4a1df8a6e26 100644 --- a/be/src/vec/columns/column_map.cpp +++ b/be/src/vec/columns/column_map.cpp @@ -271,11 +271,6 @@ void ColumnMap::update_hash_with_value(size_t n, SipHash& hash) const { } } -void ColumnMap::update_hashes_with_value(std::vector<SipHash>& hashes, - const uint8_t* __restrict null_data) const { - SIP_HASHES_FUNCTION_COLUMN_IMPL(); -} - void ColumnMap::update_xxHash_with_value(size_t start, size_t end, uint64_t& hash, const uint8_t* __restrict null_data) const { auto& offsets = get_offsets(); diff --git a/be/src/vec/columns/column_map.h b/be/src/vec/columns/column_map.h index 206660d6a06..0e6ad3c3d91 100644 --- a/be/src/vec/columns/column_map.h +++ b/be/src/vec/columns/column_map.h @@ -182,9 +182,6 @@ public: void update_crc_with_value(size_t start, size_t end, uint32_t& hash, const uint8_t* __restrict null_data) const override; - void update_hashes_with_value(std::vector<SipHash>& hashes, - const uint8_t* __restrict null_data) const override; - void update_hashes_with_value(uint64_t* __restrict hashes, const uint8_t* __restrict null_data = nullptr) const override; diff --git a/be/src/vec/columns/column_nullable.cpp b/be/src/vec/columns/column_nullable.cpp index 0cd671eb110..e20b87af826 100644 --- a/be/src/vec/columns/column_nullable.cpp +++ b/be/src/vec/columns/column_nullable.cpp @@ -93,25 +93,6 @@ void ColumnNullable::update_hash_with_value(size_t n, SipHash& hash) const { } } -void ColumnNullable::update_hashes_with_value(std::vector<SipHash>& hashes, - const uint8_t* __restrict null_data) const { - DCHECK(null_data == nullptr); - auto s = hashes.size(); - DCHECK(s == size()); - const auto* __restrict real_null_data = - assert_cast<const ColumnUInt8&>(*null_map).get_data().data(); - if (!has_null()) { - nested_column->update_hashes_with_value(hashes, nullptr); - } else { - for (int i = 0; i < s; ++i) { - if (real_null_data[i] != 0) { - hashes[i].update(0); - } - } - nested_column->update_hashes_with_value(hashes, real_null_data); - } -} - void ColumnNullable::update_crcs_with_value(uint32_t* __restrict hashes, doris::PrimitiveType type, uint32_t rows, uint32_t offset, const uint8_t* __restrict null_data) const { diff --git a/be/src/vec/columns/column_nullable.h b/be/src/vec/columns/column_nullable.h index eca4c57fceb..de01907650e 100644 --- a/be/src/vec/columns/column_nullable.h +++ b/be/src/vec/columns/column_nullable.h @@ -219,8 +219,6 @@ public: const uint8_t* __restrict null_data) const override; void update_hash_with_value(size_t n, SipHash& hash) const override; - void update_hashes_with_value(std::vector<SipHash>& hashes, - const uint8_t* __restrict null_data) const override; void update_crcs_with_value(uint32_t* __restrict hash, PrimitiveType type, uint32_t rows, uint32_t offset, const uint8_t* __restrict null_data) const override; diff --git a/be/src/vec/columns/column_string.h b/be/src/vec/columns/column_string.h index 5c50f5ed2f1..9bf43c9c627 100644 --- a/be/src/vec/columns/column_string.h +++ b/be/src/vec/columns/column_string.h @@ -451,11 +451,6 @@ public: hash.update(reinterpret_cast<const char*>(&chars[offset]), string_size); } - void update_hashes_with_value(std::vector<SipHash>& hashes, - const uint8_t* __restrict null_data) const override { - SIP_HASHES_FUNCTION_COLUMN_IMPL(); - } - void update_crcs_with_value(uint32_t* __restrict hashes, PrimitiveType type, uint32_t rows, uint32_t offset, const uint8_t* __restrict null_data) const override; diff --git a/be/src/vec/columns/column_struct.cpp b/be/src/vec/columns/column_struct.cpp index f6ab9c9604f..290452a1c8b 100644 --- a/be/src/vec/columns/column_struct.cpp +++ b/be/src/vec/columns/column_struct.cpp @@ -206,11 +206,6 @@ void ColumnStruct::update_hash_with_value(size_t n, SipHash& hash) const { } } -void ColumnStruct::update_hashes_with_value(std::vector<SipHash>& hashes, - const uint8_t* __restrict null_data) const { - SIP_HASHES_FUNCTION_COLUMN_IMPL(); -} - void ColumnStruct::update_xxHash_with_value(size_t start, size_t end, uint64_t& hash, const uint8_t* __restrict null_data) const { for (const auto& column : columns) { diff --git a/be/src/vec/columns/column_struct.h b/be/src/vec/columns/column_struct.h index 1b1daee4452..d91d3800648 100644 --- a/be/src/vec/columns/column_struct.h +++ b/be/src/vec/columns/column_struct.h @@ -113,9 +113,6 @@ public: void update_crc_with_value(size_t start, size_t end, uint32_t& hash, const uint8_t* __restrict null_data) const override; - void update_hashes_with_value(std::vector<SipHash>& hashes, - const uint8_t* __restrict null_data) const override; - void update_hashes_with_value(uint64_t* __restrict hashes, const uint8_t* __restrict null_data = nullptr) const override; diff --git a/be/src/vec/columns/column_vector.cpp b/be/src/vec/columns/column_vector.cpp index 71f55af0a79..acc6fc1d7f3 100644 --- a/be/src/vec/columns/column_vector.cpp +++ b/be/src/vec/columns/column_vector.cpp @@ -110,12 +110,6 @@ void ColumnVector<T>::update_hash_with_value(size_t n, SipHash& hash) const { hash.update(data[n]); } -template <typename T> -void ColumnVector<T>::update_hashes_with_value(std::vector<SipHash>& hashes, - const uint8_t* __restrict null_data) const { - SIP_HASHES_FUNCTION_COLUMN_IMPL(); -} - template <typename T> void ColumnVector<T>::update_hashes_with_value(uint64_t* __restrict hashes, const uint8_t* __restrict null_data) const { diff --git a/be/src/vec/columns/column_vector.h b/be/src/vec/columns/column_vector.h index dbc7524eaac..ff9197df357 100644 --- a/be/src/vec/columns/column_vector.h +++ b/be/src/vec/columns/column_vector.h @@ -321,9 +321,6 @@ public: } void update_hash_with_value(size_t n, SipHash& hash) const override; - void update_hashes_with_value(std::vector<SipHash>& hashes, - const uint8_t* __restrict null_data) const override; - void update_crcs_with_value(uint32_t* __restrict hashes, PrimitiveType type, uint32_t rows, uint32_t offset, const uint8_t* __restrict null_data) const override; diff --git a/be/test/vec/columns/column_hash_func_test.cpp b/be/test/vec/columns/column_hash_func_test.cpp index a95c7071af5..0a9471a1ab3 100644 --- a/be/test/vec/columns/column_hash_func_test.cpp +++ b/be/test/vec/columns/column_hash_func_test.cpp @@ -62,22 +62,14 @@ DataTypes create_scala_data_types() { TEST(HashFuncTest, ArrayTypeTest) { DataTypes dataTypes = create_scala_data_types(); - std::vector<uint64_t> sip_hash_vals(1); std::vector<uint64_t> xx_hash_vals(1); std::vector<uint32_t> crc_hash_vals(1); - auto* __restrict sip_hashes = sip_hash_vals.data(); auto* __restrict xx_hashes = xx_hash_vals.data(); auto* __restrict crc_hashes = crc_hash_vals.data(); for (auto d : dataTypes) { DataTypePtr a = std::make_shared<DataTypeArray>(d); ColumnPtr col_a = a->create_column_const_with_default_value(1); - // sipHash - std::vector<SipHash> siphashs(1); - col_a->update_hashes_with_value(siphashs); - EXPECT_NO_FATAL_FAILURE(col_a->update_hashes_with_value(siphashs)); - sip_hashes[0] = siphashs[0].get64(); - std::cout << sip_hashes[0] << std::endl; // xxHash EXPECT_NO_FATAL_FAILURE(col_a->update_hashes_with_value(xx_hashes)); std::cout << xx_hashes[0] << std::endl; @@ -184,22 +176,11 @@ TEST(HashFuncTest, ArrayCornerCaseTest) { EXPECT_EQ(array_mutable_col->size(), 3); - std::vector<uint64_t> sip_hash_vals(3); std::vector<uint64_t> xx_hash_vals(3); std::vector<uint32_t> crc_hash_vals(3); - auto* __restrict sip_hashes = sip_hash_vals.data(); auto* __restrict xx_hashes = xx_hash_vals.data(); auto* __restrict crc_hashes = crc_hash_vals.data(); - // sipHash - std::vector<SipHash> siphashs(3); - array_mutable_col->update_hashes_with_value(siphashs); - EXPECT_NO_FATAL_FAILURE(array_mutable_col->update_hashes_with_value(siphashs)); - sip_hashes[0] = siphashs[0].get64(); - sip_hashes[1] = siphashs[1].get64(); - sip_hashes[2] = siphashs[2].get64(); - EXPECT_EQ(sip_hashes[0], sip_hash_vals[1]); - EXPECT_TRUE(sip_hash_vals[0] != sip_hash_vals[2]); // xxHash EXPECT_NO_FATAL_FAILURE(array_mutable_col->update_hashes_with_value(xx_hashes)); EXPECT_EQ(xx_hashes[0], xx_hashes[1]); @@ -214,21 +195,14 @@ TEST(HashFuncTest, ArrayCornerCaseTest) { TEST(HashFuncTest, MapTypeTest) { DataTypes dataTypes = create_scala_data_types(); - std::vector<uint64_t> sip_hash_vals(1); std::vector<uint64_t> xx_hash_vals(1); std::vector<uint32_t> crc_hash_vals(1); - auto* __restrict sip_hashes = sip_hash_vals.data(); auto* __restrict xx_hashes = xx_hash_vals.data(); auto* __restrict crc_hashes = crc_hash_vals.data(); // data_type_map for (int i = 0; i < dataTypes.size() - 1; ++i) { DataTypePtr a = std::make_shared<DataTypeMap>(dataTypes[i], dataTypes[i + 1]); ColumnPtr col_a = a->create_column_const_with_default_value(1); - // sipHash - std::vector<SipHash> siphashs(1); - EXPECT_NO_FATAL_FAILURE(unpack_if_const(col_a).first->update_hashes_with_value(siphashs)); - sip_hashes[0] = siphashs[0].get64(); - std::cout << sip_hashes[0] << std::endl; // xxHash EXPECT_NO_FATAL_FAILURE(unpack_if_const(col_a).first->update_hashes_with_value(xx_hashes)); std::cout << xx_hashes[0] << std::endl; @@ -242,21 +216,14 @@ TEST(HashFuncTest, MapTypeTest) { TEST(HashFuncTest, StructTypeTest) { DataTypes dataTypes = create_scala_data_types(); - std::vector<uint64_t> sip_hash_vals(1); std::vector<uint64_t> xx_hash_vals(1); std::vector<uint32_t> crc_hash_vals(1); - auto* __restrict sip_hashes = sip_hash_vals.data(); auto* __restrict xx_hashes = xx_hash_vals.data(); auto* __restrict crc_hashes = crc_hash_vals.data(); // data_type_struct DataTypePtr a = std::make_shared<DataTypeStruct>(dataTypes); ColumnPtr col_a = a->create_column_const_with_default_value(1); - // sipHash - std::vector<SipHash> siphashs(1); - EXPECT_NO_FATAL_FAILURE(unpack_if_const(col_a).first->update_hashes_with_value(siphashs)); - sip_hashes[0] = siphashs[0].get64(); - std::cout << sip_hashes[0] << std::endl; // xxHash EXPECT_NO_FATAL_FAILURE(unpack_if_const(col_a).first->update_hashes_with_value(xx_hashes)); std::cout << xx_hashes[0] << std::endl; --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org