This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 92e1f687e40 [chore](columns) remove update_hashes_with_value for 
SipHash (#31224)
92e1f687e40 is described below

commit 92e1f687e40d64775cb13565d976f10961d3e4bf
Author: Uniqueyou <134280716+wyxxx...@users.noreply.github.com>
AuthorDate: Wed Feb 21 23:56:50 2024 +0800

    [chore](columns) remove update_hashes_with_value for SipHash (#31224)
---
 be/src/vec/columns/column.h                   | 22 ------------------
 be/src/vec/columns/column_array.cpp           |  5 ----
 be/src/vec/columns/column_array.h             |  3 ---
 be/src/vec/columns/column_complex.h           | 11 ++-------
 be/src/vec/columns/column_const.cpp           | 16 -------------
 be/src/vec/columns/column_const.h             |  3 ---
 be/src/vec/columns/column_decimal.cpp         |  6 -----
 be/src/vec/columns/column_decimal.h           |  2 --
 be/src/vec/columns/column_map.cpp             |  5 ----
 be/src/vec/columns/column_map.h               |  3 ---
 be/src/vec/columns/column_nullable.cpp        | 19 ---------------
 be/src/vec/columns/column_nullable.h          |  2 --
 be/src/vec/columns/column_string.h            |  5 ----
 be/src/vec/columns/column_struct.cpp          |  5 ----
 be/src/vec/columns/column_struct.h            |  3 ---
 be/src/vec/columns/column_vector.cpp          |  6 -----
 be/src/vec/columns/column_vector.h            |  3 ---
 be/test/vec/columns/column_hash_func_test.cpp | 33 ---------------------------
 18 files changed, 2 insertions(+), 150 deletions(-)

diff --git a/be/src/vec/columns/column.h b/be/src/vec/columns/column.h
index 322456a8f77..bf869961544 100644
--- a/be/src/vec/columns/column.h
+++ b/be/src/vec/columns/column.h
@@ -46,19 +46,6 @@
 
 class SipHash;
 
-#define SIP_HASHES_FUNCTION_COLUMN_IMPL()                                \
-    auto s = hashes.size();                                              \
-    DCHECK(s == size());                                                 \
-    if (null_data == nullptr) {                                          \
-        for (size_t i = 0; i < s; i++) {                                 \
-            update_hash_with_value(i, hashes[i]);                        \
-        }                                                                \
-    } else {                                                             \
-        for (size_t i = 0; i < s; i++) {                                 \
-            if (null_data[i] == 0) update_hash_with_value(i, hashes[i]); \
-        }                                                                \
-    }
-
 #define DO_CRC_HASHES_FUNCTION_COLUMN_IMPL()                                   
      \
     if (null_data == nullptr) {                                                
      \
         for (size_t i = 0; i < s; i++) {                                       
      \
@@ -362,15 +349,6 @@ public:
         LOG(FATAL) << get_name() << " update_hash_with_value siphash not 
supported";
     }
 
-    /// Update state of hash function with value of n elements to avoid the 
virtual function call
-    /// null_data to mark whether need to do hash compute, null_data == nullptr
-    /// means all element need to do hash function, else only *null_data != 0 
need to do hash func
-    /// do xxHash here, faster than other hash method
-    virtual void update_hashes_with_value(std::vector<SipHash>& hashes,
-                                          const uint8_t* __restrict null_data 
= nullptr) const {
-        LOG(FATAL) << get_name() << " update_hashes_with_value siphash not 
supported";
-    }
-
     /// Update state of hash function with value of n elements to avoid the 
virtual function call
     /// null_data to mark whether need to do hash compute, null_data == nullptr
     /// means all element need to do hash function, else only *null_data != 0 
need to do hash func
diff --git a/be/src/vec/columns/column_array.cpp 
b/be/src/vec/columns/column_array.cpp
index 6d2914d8053..9abf13b9f3f 100644
--- a/be/src/vec/columns/column_array.cpp
+++ b/be/src/vec/columns/column_array.cpp
@@ -291,11 +291,6 @@ void ColumnArray::update_hash_with_value(size_t n, 
SipHash& hash) const {
     for (size_t i = 0; i < array_size; ++i) 
get_data().update_hash_with_value(offset + i, hash);
 }
 
-void ColumnArray::update_hashes_with_value(std::vector<SipHash>& hashes,
-                                           const uint8_t* __restrict 
null_data) const {
-    SIP_HASHES_FUNCTION_COLUMN_IMPL();
-}
-
 // for every array row calculate xxHash
 void ColumnArray::update_xxHash_with_value(size_t start, size_t end, uint64_t& 
hash,
                                            const uint8_t* __restrict 
null_data) const {
diff --git a/be/src/vec/columns/column_array.h 
b/be/src/vec/columns/column_array.h
index 046bc22ac48..17408bfa633 100644
--- a/be/src/vec/columns/column_array.h
+++ b/be/src/vec/columns/column_array.h
@@ -142,9 +142,6 @@ public:
     void update_crc_with_value(size_t start, size_t end, uint32_t& hash,
                                const uint8_t* __restrict null_data) const 
override;
 
-    void update_hashes_with_value(std::vector<SipHash>& hashes,
-                                  const uint8_t* __restrict null_data) const 
override;
-
     void update_hashes_with_value(uint64_t* __restrict hashes,
                                   const uint8_t* __restrict null_data = 
nullptr) const override;
 
diff --git a/be/src/vec/columns/column_complex.h 
b/be/src/vec/columns/column_complex.h
index 8e9686cb2a5..70d32da0b9b 100644
--- a/be/src/vec/columns/column_complex.h
+++ b/be/src/vec/columns/column_complex.h
@@ -220,15 +220,8 @@ public:
         // TODO add hash function
     }
 
-    virtual void update_hashes_with_value(
-            std::vector<SipHash>& hashes,
-            const uint8_t* __restrict null_data = nullptr) const override {
-        // TODO add hash function
-    }
-
-    virtual void update_hashes_with_value(
-            uint64_t* __restrict hashes,
-            const uint8_t* __restrict null_data = nullptr) const override {
+    void update_hashes_with_value(uint64_t* __restrict hashes,
+                                  const uint8_t* __restrict null_data = 
nullptr) const override {
         // TODO add hash function
     }
 
diff --git a/be/src/vec/columns/column_const.cpp 
b/be/src/vec/columns/column_const.cpp
index f7efec1f72a..e06e53b4289 100644
--- a/be/src/vec/columns/column_const.cpp
+++ b/be/src/vec/columns/column_const.cpp
@@ -93,22 +93,6 @@ ColumnPtr ColumnConst::permute(const Permutation& perm, 
size_t limit) const {
     return ColumnConst::create(data, limit);
 }
 
-void ColumnConst::update_hashes_with_value(std::vector<SipHash>& hashes,
-                                           const uint8_t* __restrict 
null_data) const {
-    DCHECK(null_data == nullptr);
-    DCHECK(hashes.size() == size());
-    auto real_data = data->get_data_at(0);
-    if (real_data.data == nullptr) {
-        for (auto& hash : hashes) {
-            hash.update(0);
-        }
-    } else {
-        for (auto& hash : hashes) {
-            hash.update(real_data.data, real_data.size);
-        }
-    }
-}
-
 void ColumnConst::update_crcs_with_value(uint32_t* __restrict hashes, 
doris::PrimitiveType type,
                                          uint32_t rows, uint32_t offset,
                                          const uint8_t* __restrict null_data) 
const {
diff --git a/be/src/vec/columns/column_const.h 
b/be/src/vec/columns/column_const.h
index 5498fbf7c20..746cb00fd5d 100644
--- a/be/src/vec/columns/column_const.h
+++ b/be/src/vec/columns/column_const.h
@@ -177,9 +177,6 @@ public:
         data->update_hash_with_value(0, hash);
     }
 
-    void update_hashes_with_value(std::vector<SipHash>& hashes,
-                                  const uint8_t* __restrict null_data) const 
override;
-
     // (TODO.Amory) here may not use column_const update hash, and 
PrimitiveType is not used.
     void update_crcs_with_value(uint32_t* __restrict hashes, PrimitiveType 
type, uint32_t rows,
                                 uint32_t offset = 0,
diff --git a/be/src/vec/columns/column_decimal.cpp 
b/be/src/vec/columns/column_decimal.cpp
index 95b247fc668..be81a7ad4cb 100644
--- a/be/src/vec/columns/column_decimal.cpp
+++ b/be/src/vec/columns/column_decimal.cpp
@@ -131,12 +131,6 @@ void ColumnDecimal<T>::update_hash_with_value(size_t n, 
SipHash& hash) const {
     hash.update(data[n]);
 }
 
-template <typename T>
-void ColumnDecimal<T>::update_hashes_with_value(std::vector<SipHash>& hashes,
-                                                const uint8_t* __restrict 
null_data) const {
-    SIP_HASHES_FUNCTION_COLUMN_IMPL();
-}
-
 template <typename T>
 void ColumnDecimal<T>::update_crc_with_value(size_t start, size_t end, 
uint32_t& hash,
                                              const uint8_t* __restrict 
null_data) const {
diff --git a/be/src/vec/columns/column_decimal.h 
b/be/src/vec/columns/column_decimal.h
index 49b58ebaa4f..920f6ad8438 100644
--- a/be/src/vec/columns/column_decimal.h
+++ b/be/src/vec/columns/column_decimal.h
@@ -176,8 +176,6 @@ public:
                                        const uint8_t* null_map) override;
 
     void update_hash_with_value(size_t n, SipHash& hash) const override;
-    void update_hashes_with_value(std::vector<SipHash>& hashes,
-                                  const uint8_t* __restrict null_data) const 
override;
     void update_hashes_with_value(uint64_t* __restrict hashes,
                                   const uint8_t* __restrict null_data) const 
override;
     void update_crcs_with_value(uint32_t* __restrict hashes, PrimitiveType 
type, uint32_t rows,
diff --git a/be/src/vec/columns/column_map.cpp 
b/be/src/vec/columns/column_map.cpp
index dcfcdb46d70..4a1df8a6e26 100644
--- a/be/src/vec/columns/column_map.cpp
+++ b/be/src/vec/columns/column_map.cpp
@@ -271,11 +271,6 @@ void ColumnMap::update_hash_with_value(size_t n, SipHash& 
hash) const {
     }
 }
 
-void ColumnMap::update_hashes_with_value(std::vector<SipHash>& hashes,
-                                         const uint8_t* __restrict null_data) 
const {
-    SIP_HASHES_FUNCTION_COLUMN_IMPL();
-}
-
 void ColumnMap::update_xxHash_with_value(size_t start, size_t end, uint64_t& 
hash,
                                          const uint8_t* __restrict null_data) 
const {
     auto& offsets = get_offsets();
diff --git a/be/src/vec/columns/column_map.h b/be/src/vec/columns/column_map.h
index 206660d6a06..0e6ad3c3d91 100644
--- a/be/src/vec/columns/column_map.h
+++ b/be/src/vec/columns/column_map.h
@@ -182,9 +182,6 @@ public:
     void update_crc_with_value(size_t start, size_t end, uint32_t& hash,
                                const uint8_t* __restrict null_data) const 
override;
 
-    void update_hashes_with_value(std::vector<SipHash>& hashes,
-                                  const uint8_t* __restrict null_data) const 
override;
-
     void update_hashes_with_value(uint64_t* __restrict hashes,
                                   const uint8_t* __restrict null_data = 
nullptr) const override;
 
diff --git a/be/src/vec/columns/column_nullable.cpp 
b/be/src/vec/columns/column_nullable.cpp
index 0cd671eb110..e20b87af826 100644
--- a/be/src/vec/columns/column_nullable.cpp
+++ b/be/src/vec/columns/column_nullable.cpp
@@ -93,25 +93,6 @@ void ColumnNullable::update_hash_with_value(size_t n, 
SipHash& hash) const {
     }
 }
 
-void ColumnNullable::update_hashes_with_value(std::vector<SipHash>& hashes,
-                                              const uint8_t* __restrict 
null_data) const {
-    DCHECK(null_data == nullptr);
-    auto s = hashes.size();
-    DCHECK(s == size());
-    const auto* __restrict real_null_data =
-            assert_cast<const ColumnUInt8&>(*null_map).get_data().data();
-    if (!has_null()) {
-        nested_column->update_hashes_with_value(hashes, nullptr);
-    } else {
-        for (int i = 0; i < s; ++i) {
-            if (real_null_data[i] != 0) {
-                hashes[i].update(0);
-            }
-        }
-        nested_column->update_hashes_with_value(hashes, real_null_data);
-    }
-}
-
 void ColumnNullable::update_crcs_with_value(uint32_t* __restrict hashes, 
doris::PrimitiveType type,
                                             uint32_t rows, uint32_t offset,
                                             const uint8_t* __restrict 
null_data) const {
diff --git a/be/src/vec/columns/column_nullable.h 
b/be/src/vec/columns/column_nullable.h
index eca4c57fceb..de01907650e 100644
--- a/be/src/vec/columns/column_nullable.h
+++ b/be/src/vec/columns/column_nullable.h
@@ -219,8 +219,6 @@ public:
                                const uint8_t* __restrict null_data) const 
override;
 
     void update_hash_with_value(size_t n, SipHash& hash) const override;
-    void update_hashes_with_value(std::vector<SipHash>& hashes,
-                                  const uint8_t* __restrict null_data) const 
override;
     void update_crcs_with_value(uint32_t* __restrict hash, PrimitiveType type, 
uint32_t rows,
                                 uint32_t offset,
                                 const uint8_t* __restrict null_data) const 
override;
diff --git a/be/src/vec/columns/column_string.h 
b/be/src/vec/columns/column_string.h
index 5c50f5ed2f1..9bf43c9c627 100644
--- a/be/src/vec/columns/column_string.h
+++ b/be/src/vec/columns/column_string.h
@@ -451,11 +451,6 @@ public:
         hash.update(reinterpret_cast<const char*>(&chars[offset]), 
string_size);
     }
 
-    void update_hashes_with_value(std::vector<SipHash>& hashes,
-                                  const uint8_t* __restrict null_data) const 
override {
-        SIP_HASHES_FUNCTION_COLUMN_IMPL();
-    }
-
     void update_crcs_with_value(uint32_t* __restrict hashes, PrimitiveType 
type, uint32_t rows,
                                 uint32_t offset,
                                 const uint8_t* __restrict null_data) const 
override;
diff --git a/be/src/vec/columns/column_struct.cpp 
b/be/src/vec/columns/column_struct.cpp
index f6ab9c9604f..290452a1c8b 100644
--- a/be/src/vec/columns/column_struct.cpp
+++ b/be/src/vec/columns/column_struct.cpp
@@ -206,11 +206,6 @@ void ColumnStruct::update_hash_with_value(size_t n, 
SipHash& hash) const {
     }
 }
 
-void ColumnStruct::update_hashes_with_value(std::vector<SipHash>& hashes,
-                                            const uint8_t* __restrict 
null_data) const {
-    SIP_HASHES_FUNCTION_COLUMN_IMPL();
-}
-
 void ColumnStruct::update_xxHash_with_value(size_t start, size_t end, 
uint64_t& hash,
                                             const uint8_t* __restrict 
null_data) const {
     for (const auto& column : columns) {
diff --git a/be/src/vec/columns/column_struct.h 
b/be/src/vec/columns/column_struct.h
index 1b1daee4452..d91d3800648 100644
--- a/be/src/vec/columns/column_struct.h
+++ b/be/src/vec/columns/column_struct.h
@@ -113,9 +113,6 @@ public:
     void update_crc_with_value(size_t start, size_t end, uint32_t& hash,
                                const uint8_t* __restrict null_data) const 
override;
 
-    void update_hashes_with_value(std::vector<SipHash>& hashes,
-                                  const uint8_t* __restrict null_data) const 
override;
-
     void update_hashes_with_value(uint64_t* __restrict hashes,
                                   const uint8_t* __restrict null_data = 
nullptr) const override;
 
diff --git a/be/src/vec/columns/column_vector.cpp 
b/be/src/vec/columns/column_vector.cpp
index 71f55af0a79..acc6fc1d7f3 100644
--- a/be/src/vec/columns/column_vector.cpp
+++ b/be/src/vec/columns/column_vector.cpp
@@ -110,12 +110,6 @@ void ColumnVector<T>::update_hash_with_value(size_t n, 
SipHash& hash) const {
     hash.update(data[n]);
 }
 
-template <typename T>
-void ColumnVector<T>::update_hashes_with_value(std::vector<SipHash>& hashes,
-                                               const uint8_t* __restrict 
null_data) const {
-    SIP_HASHES_FUNCTION_COLUMN_IMPL();
-}
-
 template <typename T>
 void ColumnVector<T>::update_hashes_with_value(uint64_t* __restrict hashes,
                                                const uint8_t* __restrict 
null_data) const {
diff --git a/be/src/vec/columns/column_vector.h 
b/be/src/vec/columns/column_vector.h
index dbc7524eaac..ff9197df357 100644
--- a/be/src/vec/columns/column_vector.h
+++ b/be/src/vec/columns/column_vector.h
@@ -321,9 +321,6 @@ public:
     }
     void update_hash_with_value(size_t n, SipHash& hash) const override;
 
-    void update_hashes_with_value(std::vector<SipHash>& hashes,
-                                  const uint8_t* __restrict null_data) const 
override;
-
     void update_crcs_with_value(uint32_t* __restrict hashes, PrimitiveType 
type, uint32_t rows,
                                 uint32_t offset,
                                 const uint8_t* __restrict null_data) const 
override;
diff --git a/be/test/vec/columns/column_hash_func_test.cpp 
b/be/test/vec/columns/column_hash_func_test.cpp
index a95c7071af5..0a9471a1ab3 100644
--- a/be/test/vec/columns/column_hash_func_test.cpp
+++ b/be/test/vec/columns/column_hash_func_test.cpp
@@ -62,22 +62,14 @@ DataTypes create_scala_data_types() {
 TEST(HashFuncTest, ArrayTypeTest) {
     DataTypes dataTypes = create_scala_data_types();
 
-    std::vector<uint64_t> sip_hash_vals(1);
     std::vector<uint64_t> xx_hash_vals(1);
     std::vector<uint32_t> crc_hash_vals(1);
-    auto* __restrict sip_hashes = sip_hash_vals.data();
     auto* __restrict xx_hashes = xx_hash_vals.data();
     auto* __restrict crc_hashes = crc_hash_vals.data();
 
     for (auto d : dataTypes) {
         DataTypePtr a = std::make_shared<DataTypeArray>(d);
         ColumnPtr col_a = a->create_column_const_with_default_value(1);
-        // sipHash
-        std::vector<SipHash> siphashs(1);
-        col_a->update_hashes_with_value(siphashs);
-        EXPECT_NO_FATAL_FAILURE(col_a->update_hashes_with_value(siphashs));
-        sip_hashes[0] = siphashs[0].get64();
-        std::cout << sip_hashes[0] << std::endl;
         // xxHash
         EXPECT_NO_FATAL_FAILURE(col_a->update_hashes_with_value(xx_hashes));
         std::cout << xx_hashes[0] << std::endl;
@@ -184,22 +176,11 @@ TEST(HashFuncTest, ArrayCornerCaseTest) {
 
     EXPECT_EQ(array_mutable_col->size(), 3);
 
-    std::vector<uint64_t> sip_hash_vals(3);
     std::vector<uint64_t> xx_hash_vals(3);
     std::vector<uint32_t> crc_hash_vals(3);
-    auto* __restrict sip_hashes = sip_hash_vals.data();
     auto* __restrict xx_hashes = xx_hash_vals.data();
     auto* __restrict crc_hashes = crc_hash_vals.data();
 
-    // sipHash
-    std::vector<SipHash> siphashs(3);
-    array_mutable_col->update_hashes_with_value(siphashs);
-    
EXPECT_NO_FATAL_FAILURE(array_mutable_col->update_hashes_with_value(siphashs));
-    sip_hashes[0] = siphashs[0].get64();
-    sip_hashes[1] = siphashs[1].get64();
-    sip_hashes[2] = siphashs[2].get64();
-    EXPECT_EQ(sip_hashes[0], sip_hash_vals[1]);
-    EXPECT_TRUE(sip_hash_vals[0] != sip_hash_vals[2]);
     // xxHash
     
EXPECT_NO_FATAL_FAILURE(array_mutable_col->update_hashes_with_value(xx_hashes));
     EXPECT_EQ(xx_hashes[0], xx_hashes[1]);
@@ -214,21 +195,14 @@ TEST(HashFuncTest, ArrayCornerCaseTest) {
 TEST(HashFuncTest, MapTypeTest) {
     DataTypes dataTypes = create_scala_data_types();
 
-    std::vector<uint64_t> sip_hash_vals(1);
     std::vector<uint64_t> xx_hash_vals(1);
     std::vector<uint32_t> crc_hash_vals(1);
-    auto* __restrict sip_hashes = sip_hash_vals.data();
     auto* __restrict xx_hashes = xx_hash_vals.data();
     auto* __restrict crc_hashes = crc_hash_vals.data();
     // data_type_map
     for (int i = 0; i < dataTypes.size() - 1; ++i) {
         DataTypePtr a = std::make_shared<DataTypeMap>(dataTypes[i], 
dataTypes[i + 1]);
         ColumnPtr col_a = a->create_column_const_with_default_value(1);
-        // sipHash
-        std::vector<SipHash> siphashs(1);
-        
EXPECT_NO_FATAL_FAILURE(unpack_if_const(col_a).first->update_hashes_with_value(siphashs));
-        sip_hashes[0] = siphashs[0].get64();
-        std::cout << sip_hashes[0] << std::endl;
         // xxHash
         
EXPECT_NO_FATAL_FAILURE(unpack_if_const(col_a).first->update_hashes_with_value(xx_hashes));
         std::cout << xx_hashes[0] << std::endl;
@@ -242,21 +216,14 @@ TEST(HashFuncTest, MapTypeTest) {
 TEST(HashFuncTest, StructTypeTest) {
     DataTypes dataTypes = create_scala_data_types();
 
-    std::vector<uint64_t> sip_hash_vals(1);
     std::vector<uint64_t> xx_hash_vals(1);
     std::vector<uint32_t> crc_hash_vals(1);
-    auto* __restrict sip_hashes = sip_hash_vals.data();
     auto* __restrict xx_hashes = xx_hash_vals.data();
     auto* __restrict crc_hashes = crc_hash_vals.data();
 
     // data_type_struct
     DataTypePtr a = std::make_shared<DataTypeStruct>(dataTypes);
     ColumnPtr col_a = a->create_column_const_with_default_value(1);
-    // sipHash
-    std::vector<SipHash> siphashs(1);
-    
EXPECT_NO_FATAL_FAILURE(unpack_if_const(col_a).first->update_hashes_with_value(siphashs));
-    sip_hashes[0] = siphashs[0].get64();
-    std::cout << sip_hashes[0] << std::endl;
     // xxHash
     
EXPECT_NO_FATAL_FAILURE(unpack_if_const(col_a).first->update_hashes_with_value(xx_hashes));
     std::cout << xx_hashes[0] << std::endl;


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to