This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new f65103e2a6b [Chore](runtime-filter) unify interfaces of bloom filter 
and remove some unused code (#27822)
f65103e2a6b is described below

commit f65103e2a6bdade1d144b58044c66c64bf25927a
Author: Pxl <pxl...@qq.com>
AuthorDate: Sat Dec 2 07:42:55 2023 +0800

    [Chore](runtime-filter) unify interfaces of bloom filter and remove some 
unused code (#27822)
    
    * unify interfaces of bloom filter and remove some unused code
---
 be/src/exprs/bloom_filter_func.h              |  54 +++++-------
 be/src/exprs/runtime_filter.cpp               |  67 ---------------
 be/src/exprs/runtime_filter.h                 |   4 -
 be/src/olap/bloom_filter_predicate.h          |  17 +---
 be/src/vec/exprs/vbloom_predicate.cpp         |  11 +--
 be/test/exprs/bloom_filter_predicate_test.cpp | 117 --------------------------
 6 files changed, 24 insertions(+), 246 deletions(-)

diff --git a/be/src/exprs/bloom_filter_func.h b/be/src/exprs/bloom_filter_func.h
index 0323d44315f..6ea805ee2ee 100644
--- a/be/src/exprs/bloom_filter_func.h
+++ b/be/src/exprs/bloom_filter_func.h
@@ -20,6 +20,7 @@
 #include "exprs/block_bloom_filter.hpp"
 #include "exprs/runtime_filter.h"
 #include "olap/rowset/segment_v2/bloom_filter.h" // IWYU pragma: keep
+#include "vec/columns/column_dictionary.h"
 #include "vec/common/string_ref.h"
 
 namespace doris {
@@ -198,12 +199,6 @@ public:
 
     virtual void insert(const void* data) = 0;
 
-    virtual bool find(const void* data) const = 0;
-
-    virtual bool find_olap_engine(const void* data) const = 0;
-
-    virtual bool find_uint32_t(uint32_t data) const = 0;
-
     virtual void insert_fixed_len(const vectorized::ColumnPtr& column, size_t 
start) = 0;
 
     virtual void find_fixed_len(const vectorized::ColumnPtr& column, uint8_t* 
results) = 0;
@@ -336,16 +331,11 @@ struct CommonFindOp : BaseOp {
     }
 
     void insert(BloomFilterAdaptor& bloom_filter, const void* data) const {
-        bloom_filter.add_element(((T*)data)[0]);
-    }
-    bool find(const BloomFilterAdaptor& bloom_filter, const void* data) const {
-        return bloom_filter.test_element(((T*)data)[0]);
+        bloom_filter.add_element(*(T*)data);
     }
+
     bool find_olap_engine(const BloomFilterAdaptor& bloom_filter, const void* 
data) const override {
-        return find(bloom_filter, data);
-    }
-    bool find(const BloomFilterAdaptor& bloom_filter, uint32_t data) const {
-        return bloom_filter.test(data);
+        return bloom_filter.test_element(*(T*)data);
     }
 };
 
@@ -412,21 +402,10 @@ struct StringFindOp : public BaseOp {
         }
     }
 
-    static bool find(const BloomFilterAdaptor& bloom_filter, const void* data) 
{
+    bool find_olap_engine(const BloomFilterAdaptor& bloom_filter, const void* 
data) const override {
         const auto* value = reinterpret_cast<const StringRef*>(data);
-        if (value == nullptr) {
-            return false;
-        }
         return bloom_filter.test(*value);
     }
-
-    bool find_olap_engine(const BloomFilterAdaptor& bloom_filter, const void* 
data) const override {
-        return StringFindOp::find(bloom_filter, data);
-    }
-
-    static bool find(const BloomFilterAdaptor& bloom_filter, uint32_t data) {
-        return bloom_filter.test(data);
-    }
 };
 
 // We do not need to judge whether data is empty, because null will not appear
@@ -487,17 +466,22 @@ public:
         dummy.find_batch(*_bloom_filter, column, results);
     }
 
-    bool find(const void* data) const override {
-        DCHECK(_bloom_filter != nullptr);
-        return dummy.find(*_bloom_filter, data);
-    }
-
-    bool find_olap_engine(const void* data) const override {
-        return dummy.find_olap_engine(*_bloom_filter, data);
+    template <bool is_nullable>
+    uint16_t find_dict_olap_engine(const vectorized::ColumnDictI32* column, 
const uint8* nullmap,
+                                   uint16_t* offsets, int number) {
+        uint16_t new_size = 0;
+        for (uint16_t i = 0; i < number; i++) {
+            uint16_t idx = offsets[i];
+            offsets[new_size] = idx;
+            if constexpr (is_nullable) {
+                new_size += !nullmap[idx] && 
_bloom_filter->test(column->get_hash_value(idx));
+            } else {
+                new_size += _bloom_filter->test(column->get_hash_value(idx));
+            }
+        }
+        return new_size;
     }
 
-    bool find_uint32_t(uint32_t data) const override { return 
dummy.find(*_bloom_filter, data); }
-
     uint16_t find_fixed_len_olap_engine(const char* data, const uint8* 
nullmap, uint16_t* offsets,
                                         int number, bool is_parse_column) 
override {
         return dummy.find_batch_olap_engine(*_bloom_filter, data, nullmap, 
offsets, number,
diff --git a/be/src/exprs/runtime_filter.cpp b/be/src/exprs/runtime_filter.cpp
index 31bf2025988..8398311654f 100644
--- a/be/src/exprs/runtime_filter.cpp
+++ b/be/src/exprs/runtime_filter.cpp
@@ -388,43 +388,6 @@ public:
 
     BloomFilterFuncBase* get_bloomfilter() const { return 
_context.bloom_filter_func.get(); }
 
-    void insert(const void* data) {
-        switch (_filter_type) {
-        case RuntimeFilterType::IN_FILTER: {
-            if (_is_ignored_in_filter) {
-                break;
-            }
-            _context.hybrid_set->insert(data);
-            break;
-        }
-        case RuntimeFilterType::MIN_FILTER:
-        case RuntimeFilterType::MAX_FILTER:
-        case RuntimeFilterType::MINMAX_FILTER: {
-            _context.minmax_func->insert(data);
-            break;
-        }
-        case RuntimeFilterType::BLOOM_FILTER: {
-            _context.bloom_filter_func->insert(data);
-            break;
-        }
-        case RuntimeFilterType::IN_OR_BLOOM_FILTER: {
-            if (_is_bloomfilter) {
-                _context.bloom_filter_func->insert(data);
-            } else {
-                _context.hybrid_set->insert(data);
-            }
-            break;
-        }
-        case RuntimeFilterType::BITMAP_FILTER: {
-            _context.bitmap_filter_func->insert(data);
-            break;
-        }
-        default:
-            DCHECK(false);
-            break;
-        }
-    }
-
     void insert_fixed_len(const vectorized::ColumnPtr& column, size_t start) {
         switch (_filter_type) {
         case RuntimeFilterType::IN_FILTER: {
@@ -458,24 +421,6 @@ public:
         }
     }
 
-    void insert(const StringRef& value) {
-        switch (_column_return_type) {
-        case TYPE_CHAR:
-        case TYPE_VARCHAR:
-        case TYPE_HLL:
-        case TYPE_STRING: {
-            // StringRef->StringRef
-            StringRef data = StringRef(value.data, value.size);
-            insert(reinterpret_cast<const void*>(&data));
-            break;
-        }
-
-        default:
-            insert(reinterpret_cast<const void*>(value.data));
-            break;
-        }
-    }
-
     void insert_batch(const vectorized::ColumnPtr& column, size_t start) {
         if (get_real_type() == RuntimeFilterType::BITMAP_FILTER) {
             bitmap_filter_insert_batch(column, start);
@@ -1050,18 +995,6 @@ void IRuntimeFilter::copy_from_other(IRuntimeFilter* 
other) {
     _wrapper->_context = other->_wrapper->_context;
 }
 
-void IRuntimeFilter::insert(const void* data) {
-    DCHECK(is_producer());
-    if (!_is_ignored) {
-        _wrapper->insert(data);
-    }
-}
-
-void IRuntimeFilter::insert(const StringRef& value) {
-    DCHECK(is_producer());
-    _wrapper->insert(value);
-}
-
 void IRuntimeFilter::insert_batch(const vectorized::ColumnPtr column, size_t 
start) {
     DCHECK(is_producer());
     _wrapper->insert_batch(column, start);
diff --git a/be/src/exprs/runtime_filter.h b/be/src/exprs/runtime_filter.h
index 4b8c982fee3..187d0d757e9 100644
--- a/be/src/exprs/runtime_filter.h
+++ b/be/src/exprs/runtime_filter.h
@@ -244,10 +244,6 @@ public:
 
     void copy_from_other(IRuntimeFilter* other);
 
-    // insert data to build filter
-    // only used for producer
-    void insert(const void* data);
-    void insert(const StringRef& data);
     void insert_batch(vectorized::ColumnPtr column, size_t start);
 
     // publish filter
diff --git a/be/src/olap/bloom_filter_predicate.h 
b/be/src/olap/bloom_filter_predicate.h
index 51abd68a4b4..bacf61b8362 100644
--- a/be/src/olap/bloom_filter_predicate.h
+++ b/be/src/olap/bloom_filter_predicate.h
@@ -69,21 +69,12 @@ private:
 
         uint16_t new_size = 0;
         if (column.is_column_dictionary()) {
-            const auto* dict_col = reinterpret_cast<const 
vectorized::ColumnDictI32*>(&column);
-            for (uint16_t i = 0; i < size; i++) {
-                uint16_t idx = sel[i];
-                sel[new_size] = idx;
-                if constexpr (is_nullable) {
-                    new_size += !null_map[idx] &&
-                                
_specific_filter->find_uint32_t(dict_col->get_hash_value(idx));
-                } else {
-                    new_size += 
_specific_filter->find_uint32_t(dict_col->get_hash_value(idx));
-                }
-            }
+            const auto* dict_col = assert_cast<const 
vectorized::ColumnDictI32*>(&column);
+            new_size = _specific_filter->template 
find_dict_olap_engine<is_nullable>(
+                    dict_col, null_map, sel, size);
         } else {
             const auto& data =
-                    reinterpret_cast<
-                            const 
vectorized::PredicateColumnType<PredicateEvaluateType<T>>*>(
+                    assert_cast<const 
vectorized::PredicateColumnType<PredicateEvaluateType<T>>*>(
                             &column)
                             ->get_data();
             new_size = 
_specific_filter->find_fixed_len_olap_engine((char*)data.data(), null_map,
diff --git a/be/src/vec/exprs/vbloom_predicate.cpp 
b/be/src/vec/exprs/vbloom_predicate.cpp
index 176ecb219ce..f72657c528a 100644
--- a/be/src/vec/exprs/vbloom_predicate.cpp
+++ b/be/src/vec/exprs/vbloom_predicate.cpp
@@ -89,16 +89,7 @@ Status VBloomPredicate::execute(VExprContext* context, 
Block* block, int* result
     size_t sz = argument_column->size();
     res_data_column->resize(sz);
     auto* ptr = 
((ColumnVector<UInt8>*)res_data_column.get())->get_data().data();
-    auto type = 
WhichDataType(remove_nullable(block->get_by_position(arguments[0]).type));
-    if (type.is_string_or_fixed_string()) {
-        for (size_t i = 0; i < sz; i++) {
-            auto ele = argument_column->get_data_at(i);
-            const StringRef v(ele.data, ele.size);
-            ptr[i] = _filter->find(reinterpret_cast<const void*>(&v));
-        }
-    } else {
-        _filter->find_fixed_len(argument_column, ptr);
-    }
+    _filter->find_fixed_len(argument_column, ptr);
 
     if (_data_type->is_nullable()) {
         auto null_map = ColumnVector<UInt8>::create(block->rows(), 0);
diff --git a/be/test/exprs/bloom_filter_predicate_test.cpp 
b/be/test/exprs/bloom_filter_predicate_test.cpp
deleted file mode 100644
index 8c33ed13a6d..00000000000
--- a/be/test/exprs/bloom_filter_predicate_test.cpp
+++ /dev/null
@@ -1,117 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include <gtest/gtest-message.h>
-#include <gtest/gtest-test-part.h>
-#include <string.h>
-
-#include <memory>
-#include <string>
-
-#include "common/object_pool.h"
-#include "common/status.h"
-#include "exprs/bloom_filter_func.h"
-#include "exprs/create_predicate_function.h"
-#include "gtest/gtest_pred_impl.h"
-#include "runtime/define_primitive_type.h"
-#include "vec/common/string_ref.h"
-
-namespace doris {
-class BloomFilterPredicateTest : public testing::Test {
-public:
-    BloomFilterPredicateTest() = default;
-    virtual void SetUp() {}
-    virtual void TearDown() {}
-};
-
-TEST_F(BloomFilterPredicateTest, bloom_filter_func_int_test) {
-    std::unique_ptr<BloomFilterFuncBase> 
func(create_bloom_filter(PrimitiveType::TYPE_INT));
-    EXPECT_TRUE(func->init(1024, 0.05).ok());
-    const int data_size = 1024;
-    int data[data_size];
-    for (int i = 0; i < data_size; i++) {
-        data[i] = i;
-        func->insert((const void*)&data[i]);
-    }
-    for (int i = 0; i < data_size; i++) {
-        EXPECT_TRUE(func->find((const void*)&data[i]));
-    }
-    // test not exist val
-    int not_exist_val = 0x3355ff;
-    EXPECT_FALSE(func->find((const void*)&not_exist_val));
-}
-
-TEST_F(BloomFilterPredicateTest, bloom_filter_func_stringval_test) {
-    std::unique_ptr<BloomFilterFuncBase> 
func(create_bloom_filter(PrimitiveType::TYPE_VARCHAR));
-    EXPECT_TRUE(func->init(1024, 0.05).ok());
-    ObjectPool obj_pool;
-    const int data_size = 1024;
-    StringRef data[data_size];
-    for (int i = 0; i < data_size; i++) {
-        auto str = obj_pool.add(new std::string(std::to_string(i)));
-        data[i] = StringRef(*str);
-        func->insert((const void*)&data[i]);
-    }
-    for (int i = 0; i < data_size; i++) {
-        EXPECT_TRUE(func->find((const void*)&data[i]));
-    }
-    // test not exist value
-    std::string not_exist_str = "0x3355ff";
-    StringRef not_exist_val(not_exist_str);
-    EXPECT_FALSE(func->find((const void*)&not_exist_val));
-
-    // test fixed char
-    func.reset(create_bloom_filter(PrimitiveType::TYPE_CHAR));
-    EXPECT_TRUE(func->init(1024, 0.05).ok());
-
-    auto varchar_true_str = obj_pool.add(new std::string("true"));
-    StringRef varchar_true(*varchar_true_str);
-    func->insert((const void*)&varchar_true);
-
-    auto varchar_false_str = obj_pool.add(new std::string("false"));
-    StringRef varchar_false(*varchar_false_str);
-    func->insert((const void*)&varchar_false);
-
-    StringRef fixed_char_true;
-    char true_buf[100] = "true";
-    memset(true_buf + strlen(true_buf), 0, 100 - strlen(true_buf));
-    fixed_char_true.data = true_buf;
-    fixed_char_true.size = 10;
-
-    StringRef fixed_char_false;
-    char false_buf[100] = "false";
-    memset(false_buf + strlen(false_buf), 0, 100 - strlen(false_buf));
-    fixed_char_false.data = false_buf;
-    fixed_char_false.size = 10;
-
-    EXPECT_TRUE(func->find_olap_engine((const void*)&fixed_char_true));
-    EXPECT_TRUE(func->find_olap_engine((const void*)&fixed_char_false));
-
-    func->find(nullptr);
-}
-
-TEST_F(BloomFilterPredicateTest, bloom_filter_size_test) {
-    std::unique_ptr<BloomFilterFuncBase> 
func(create_bloom_filter(PrimitiveType::TYPE_VARCHAR));
-    int length = 4096;
-    static_cast<void>(func->init_with_fixed_length(4096));
-    char* data = nullptr;
-    int len;
-    static_cast<void>(func->get_data(&data, &len));
-    EXPECT_EQ(length, len);
-}
-
-} // namespace doris


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to