This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-4.0 by this push:
new ca244a4375f [fix](cache) fix concurrent read-write issue on shared
roaring bitmap in inverted index (#59253) (#59411)
ca244a4375f is described below
commit ca244a4375f47b1702e7c73b0560af538706e67a
Author: zzzxl <[email protected]>
AuthorDate: Mon Dec 29 10:33:51 2025 +0800
[fix](cache) fix concurrent read-write issue on shared roaring bitmap in
inverted index (#59253) (#59411)
https://github.com/apache/doris/pull/59253
---
be/src/vec/functions/function_ip.h | 31 ++++----
be/test/vec/function/function_ip_test.cpp | 121 ++++++++++++++++++++++++++++++
2 files changed, 135 insertions(+), 17 deletions(-)
diff --git a/be/src/vec/functions/function_ip.h
b/be/src/vec/functions/function_ip.h
index 90bd34b00f2..82d5eaf8c75 100644
--- a/be/src/vec/functions/function_ip.h
+++ b/be/src/vec/functions/function_ip.h
@@ -713,14 +713,14 @@ public:
// >= min ip
RETURN_IF_ERROR(segment_v2::InvertedIndexQueryParamFactory::create_query_value(
param_type, &min_ip, query_param));
- segment_v2::InvertedIndexParam res_param;
- res_param.column_name = data_type_with_name.first;
- res_param.column_type = data_type_with_name.second;
- res_param.query_type =
segment_v2::InvertedIndexQueryType::GREATER_EQUAL_QUERY;
- res_param.query_value = query_param->get_value();
- res_param.num_rows = num_rows;
- res_param.roaring = std::make_shared<roaring::Roaring>();
- RETURN_IF_ERROR(iter->read_from_index(&res_param));
+ segment_v2::InvertedIndexParam min_param;
+ min_param.column_name = data_type_with_name.first;
+ min_param.column_type = data_type_with_name.second;
+ min_param.query_type =
segment_v2::InvertedIndexQueryType::GREATER_EQUAL_QUERY;
+ min_param.query_value = query_param->get_value();
+ min_param.num_rows = num_rows;
+ min_param.roaring = std::make_shared<roaring::Roaring>();
+ RETURN_IF_ERROR(iter->read_from_index(&min_param));
// <= max ip
RETURN_IF_ERROR(segment_v2::InvertedIndexQueryParamFactory::create_query_value(
@@ -734,21 +734,18 @@ public:
max_param.roaring = std::make_shared<roaring::Roaring>();
RETURN_IF_ERROR(iter->read_from_index(&max_param));
+ auto result_roaring = std::make_shared<roaring::Roaring>();
+ *result_roaring = *min_param.roaring & *max_param.roaring;
+
DBUG_EXECUTE_IF("ip.inverted_index_filtered", {
auto req_id =
DebugPoints::instance()->get_debug_param_or_default<int32_t>(
"ip.inverted_index_filtered", "req_id", 0);
LOG(INFO) << "execute inverted index req_id: " << req_id
- << " min: " << res_param.roaring->cardinality();
- });
- *res_param.roaring &= *max_param.roaring;
- DBUG_EXECUTE_IF("ip.inverted_index_filtered", {
- auto req_id =
DebugPoints::instance()->get_debug_param_or_default<int32_t>(
- "ip.inverted_index_filtered", "req_id", 0);
- LOG(INFO) << "execute inverted index req_id: " << req_id
+ << " min: " << min_param.roaring->cardinality()
<< " max: " << max_param.roaring->cardinality()
- << " result: " << res_param.roaring->cardinality();
+ << " result: " << result_roaring->cardinality();
});
- segment_v2::InvertedIndexResultBitmap result(res_param.roaring,
null_bitmap);
+ segment_v2::InvertedIndexResultBitmap result(result_roaring,
null_bitmap);
bitmap_result = result;
bitmap_result.mask_out_null();
return Status::OK();
diff --git a/be/test/vec/function/function_ip_test.cpp
b/be/test/vec/function/function_ip_test.cpp
index bbeb396d58f..fdd9a773e62 100644
--- a/be/test/vec/function/function_ip_test.cpp
+++ b/be/test/vec/function/function_ip_test.cpp
@@ -15,8 +15,13 @@
// specific language governing permissions and limitations
// under the License.
+#include "vec/functions/function_ip.h"
+
#include "function_test_util.h"
#include "gtest/gtest_pred_impl.h"
+#include "olap/rowset/segment_v2/index_iterator.h"
+#include "olap/rowset/segment_v2/inverted_index_reader.h"
+#include "vec/columns/column_const.h"
#include "vec/core/types.h"
#include "vec/data_types/data_type_ipv6.h"
#include "vec/data_types/data_type_number.h"
@@ -158,4 +163,120 @@ TEST(FunctionIpTest, FunctionCutIPv6Test) {
static_cast<void>(check_function<DataTypeString, true>(func_name,
input_types, data_set));
}
+class MockIndexReader : public segment_v2::InvertedIndexReader {
+public:
+ MockIndexReader(const TabletIndex& index_meta)
+ : segment_v2::InvertedIndexReader(&index_meta, nullptr) {}
+ ~MockIndexReader() override = default;
+ segment_v2::InvertedIndexReaderType type() override {
+ return segment_v2::InvertedIndexReaderType::BKD;
+ }
+ Status query(const segment_v2::IndexQueryContextPtr& context, const
std::string& column_name,
+ const void* query_value, segment_v2::InvertedIndexQueryType
query_type,
+ std::shared_ptr<roaring::Roaring>& bit_map) override {
+ return Status::OK();
+ }
+ Status try_query(const segment_v2::IndexQueryContextPtr& context,
+ const std::string& column_name, const void* query_value,
+ segment_v2::InvertedIndexQueryType query_type, size_t*
count) override {
+ return Status::OK();
+ }
+ Status new_iterator(std::unique_ptr<segment_v2::IndexIterator>* iterator)
override {
+ return Status::OK();
+ }
+};
+
+class MockIndexIterator : public segment_v2::IndexIterator {
+public:
+ MockIndexIterator(std::shared_ptr<MockIndexReader> reader) :
_reader(reader) {}
+ ~MockIndexIterator() override = default;
+ segment_v2::IndexReaderPtr get_reader(segment_v2::IndexReaderType
reader_type) const override {
+ if
(std::holds_alternative<segment_v2::InvertedIndexReaderType>(reader_type)) {
+ if (std::get<segment_v2::InvertedIndexReaderType>(reader_type) ==
+ segment_v2::InvertedIndexReaderType::BKD) {
+ return _reader;
+ }
+ }
+ return nullptr;
+ }
+ Status read_from_index(const segment_v2::IndexParam& param) override {
+ auto* p = std::get<segment_v2::InvertedIndexParam*>(param);
+ if (p->query_type ==
segment_v2::InvertedIndexQueryType::GREATER_EQUAL_QUERY) {
+ p->roaring->addRange(10, 20);
+ } else if (p->query_type ==
segment_v2::InvertedIndexQueryType::LESS_EQUAL_QUERY) {
+ p->roaring->addRange(15, 25);
+ }
+ return Status::OK();
+ }
+ Status read_null_bitmap(segment_v2::InvertedIndexQueryCacheHandle*
cache_handle) override {
+ return Status::OK();
+ }
+ Result<bool> has_null() override { return false; }
+
+private:
+ std::shared_ptr<MockIndexReader> _reader;
+};
+
+TEST(FunctionIpTest, evaluate_inverted_index) {
+ FunctionIsIPAddressInRange func;
+
+ // IPv4 test
+ {
+ auto cidr_col = ColumnString::create();
+ cidr_col->insert_data("127.0.0.0/8", 11);
+ auto const_cidr_col = ColumnConst::create(std::move(cidr_col), 1);
+
+ ColumnsWithTypeAndName arguments = {
+ {std::move(const_cidr_col),
std::make_shared<DataTypeString>(), "cidr"}};
+
+ std::vector<IndexFieldNameAndTypePair> data_type_with_names = {
+ {"ip_addr", std::make_shared<DataTypeIPv4>()}};
+
+ TabletIndex index_meta;
+ auto reader = std::make_shared<MockIndexReader>(index_meta);
+ auto iter = std::make_unique<MockIndexIterator>(reader);
+ std::vector<segment_v2::IndexIterator*> iterators = {iter.get()};
+
+ segment_v2::InvertedIndexResultBitmap bitmap_result;
+ auto status = func.evaluate_inverted_index(arguments,
data_type_with_names, iterators, 100,
+ bitmap_result);
+ ASSERT_TRUE(status.ok());
+
+ // min_param: [10, 20), max_param: [15, 25)
+ // intersection: [15, 20) -> 15, 16, 17, 18, 19
+ ASSERT_EQ(bitmap_result.get_data_bitmap()->cardinality(), 5);
+ for (int i = 15; i < 20; ++i) {
+ ASSERT_TRUE(bitmap_result.get_data_bitmap()->contains(i));
+ }
+ }
+
+ // IPv6 test
+ {
+ auto cidr_col = ColumnString::create();
+ cidr_col->insert_data("ffff::/16", 9);
+ auto const_cidr_col = ColumnConst::create(std::move(cidr_col), 1);
+
+ ColumnsWithTypeAndName arguments = {
+ {std::move(const_cidr_col),
std::make_shared<DataTypeString>(), "cidr"}};
+
+ std::vector<IndexFieldNameAndTypePair> data_type_with_names = {
+ {"ip_addr", std::make_shared<DataTypeIPv6>()}};
+
+ TabletIndex index_meta;
+ auto reader = std::make_shared<MockIndexReader>(index_meta);
+ auto iter = std::make_unique<MockIndexIterator>(reader);
+ std::vector<segment_v2::IndexIterator*> iterators = {iter.get()};
+
+ segment_v2::InvertedIndexResultBitmap bitmap_result;
+ auto status = func.evaluate_inverted_index(arguments,
data_type_with_names, iterators, 100,
+ bitmap_result);
+ ASSERT_TRUE(status.ok());
+
+ ASSERT_EQ(bitmap_result.get_data_bitmap()->cardinality(), 5);
+ for (int i = 15; i < 20; ++i) {
+ ASSERT_TRUE(bitmap_result.get_data_bitmap()->contains(i));
+ }
+ }
+}
+
} // namespace doris::vectorized
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]