This is an automated email from the ASF dual-hosted git repository. panxiaolei pushed a commit to branch refactor_rf in repository https://gitbox.apache.org/repos/asf/doris.git
commit 4b6720a403335854262f5ee6ac606f599b8cc066 Author: Hu Shenggang <hushengg...@selectdb.com> AuthorDate: Fri Mar 7 11:54:30 2025 +0800 [test](bloom_filter_func) add case about algorithm --- be/test/exprs/bloom_filter_func_test.cpp | 58 ++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/be/test/exprs/bloom_filter_func_test.cpp b/be/test/exprs/bloom_filter_func_test.cpp index 5562132cc58..a99452ecda6 100644 --- a/be/test/exprs/bloom_filter_func_test.cpp +++ b/be/test/exprs/bloom_filter_func_test.cpp @@ -34,6 +34,7 @@ #include "runtime/define_primitive_type.h" #include "runtime/primitive_type.h" #include "testutil/column_helper.h" +#include "util/url_coding.h" #include "vec/columns/column_decimal.h" namespace doris { @@ -314,6 +315,63 @@ TEST_F(BloomFilterFuncTest, Merge) { ASSERT_FALSE(st); } +/// The purpose of this case is to detect changes after modifying the Bloom filter algorithm to prevent compatibility issues. +TEST_F(BloomFilterFuncTest, HashAlgorithm) { + std::string BloomFilterBinary = + "AAAAQAAAAIAACAAAAABAACAAAAAAQAAAAQAAAACAAAAAACAEAACAAgAACAQEAAAgAASAAAABAgAAEAAIDAAAAA" + "CQAQAAYgAAAAIBAiBAgAABAgBABAAICAAABCGAAIABBAAAAAAAIABAAAAACAAAAAAAABAAQAAAAAAAIBAAAAAA" + "AQAiABEAAQBAIgAAgBAQEEAAACACAQAABEgAAggAAQAAAUAQAAEQECCAAABAAIgHAAAACAEAAgAJQABAIAEAAA" + "gAAEAAAAAAEAAAAAAQAAABAAAQAAAAAEAAAAAEAACAEAAAAAUAAAAAIBAgCAAAQAAIAAAACBAIABAAAAAABg"; + BloomFilterFunc<PrimitiveType::TYPE_INT> bloom_filter_func(false); + const size_t runtime_length = 1024; + RuntimeFilterParams params {1, + RuntimeFilterType::BLOOM_FILTER, + PrimitiveType::TYPE_INT, + false, + 0, + 0, + 0, + 256, + 0, + 0, + false, + false}; + bloom_filter_func.init_params(¶ms); + + ASSERT_TRUE(bloom_filter_func.init_with_fixed_length(runtime_length)); + + auto column = vectorized::ColumnHelper::create_column<vectorized::DataTypeInt32>( + {1, 3, 5, 7, 9, 12, 14, 16, 2001, 2002, 2003, 4096, 4097, 4098, 4099, 4100}); + + bloom_filter_func.insert_fixed_len(column, 0); + + char* data = nullptr; + int size; + bloom_filter_func.get_data(&data, &size); + + std::string encode_string; + base64_encode(std::string(data, size), &encode_string); + ASSERT_EQ(strlen(BloomFilterBinary.c_str()), strlen(encode_string.c_str())); + ASSERT_EQ(memcmp(BloomFilterBinary.data(), encode_string.data(), + strlen(BloomFilterBinary.c_str())), + 0); + + params.enable_fixed_len_to_uint32_v2 = true; + + BloomFilterFunc<PrimitiveType::TYPE_INT> bloom_filter_func2(false); + bloom_filter_func2.init_params(¶ms); + ASSERT_TRUE(bloom_filter_func2.init_with_fixed_length(runtime_length)); + + bloom_filter_func2.insert_fixed_len(column, 0); + bloom_filter_func.get_data(&data, &size); + base64_encode(std::string(data, size), &encode_string); + + ASSERT_EQ(strlen(BloomFilterBinary.c_str()), strlen(encode_string.c_str())); + ASSERT_EQ(memcmp(BloomFilterBinary.data(), encode_string.data(), + strlen(BloomFilterBinary.c_str())), + 0); +} + TEST_F(BloomFilterFuncTest, MergeLargeData) { BloomFilterFunc<PrimitiveType::TYPE_INT> bloom_filter_func(false); const size_t runtime_length = 1024; --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org