This is an automated email from the ASF dual-hosted git repository.

airborne pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 34a27a375b2 [fix](inverted index) Fix NULL bitmap handling in MATCH OR 
queries (#56699)
34a27a375b2 is described below

commit 34a27a375b21ca3d5030b363abd9e8e95deccf85
Author: Jack <[email protected]>
AuthorDate: Sat Oct 4 12:30:33 2025 +0800

    [fix](inverted index) Fix NULL bitmap handling in MATCH OR queries (#56699)
    
    Related PR: #56139
    
    Problem Summary:
    
    This PR fixes a bug in NULL bitmap handling for MATCH OR queries in
    inverted index query. The bug was causing incorrect boolean logic
    evaluation when combining TRUE and NULL values in OR operations.
---
 .../olap/rowset/segment_v2/inverted_index_reader.h |  10 +-
 .../segment_v2/inverted_index_reader_test.cpp      | 102 +++++++++++++
 .../test_match_or_null_semantics.groovy            | 160 +++++++++++++++++++++
 3 files changed, 271 insertions(+), 1 deletion(-)

diff --git a/be/src/olap/rowset/segment_v2/inverted_index_reader.h 
b/be/src/olap/rowset/segment_v2/inverted_index_reader.h
index 9416b943d5a..6647173f5e8 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_reader.h
+++ b/be/src/olap/rowset/segment_v2/inverted_index_reader.h
@@ -135,8 +135,16 @@ public:
     // Operator |=
     InvertedIndexResultBitmap& operator|=(const InvertedIndexResultBitmap& 
other) {
         if (_data_bitmap && _null_bitmap && other._data_bitmap && 
other._null_bitmap) {
-            auto new_null_bitmap = (*_null_bitmap | *other._null_bitmap) - 
*_data_bitmap;
+            // SQL three-valued logic for OR:
+            // - TRUE OR anything = TRUE (not NULL)
+            // - FALSE OR NULL = NULL
+            // - NULL OR NULL = NULL
+            // Result is NULL when the row is NULL on either side while the 
other side
+            // is not TRUE. Rows that become TRUE must be removed from the 
NULL bitmap.
             *_data_bitmap |= *other._data_bitmap;
+            auto new_null_bitmap =
+                    (*_null_bitmap - *other._data_bitmap) | 
(*other._null_bitmap - *_data_bitmap);
+            new_null_bitmap -= *_data_bitmap;
             *_null_bitmap = std::move(new_null_bitmap);
         }
         return *this;
diff --git a/be/test/olap/rowset/segment_v2/inverted_index_reader_test.cpp 
b/be/test/olap/rowset/segment_v2/inverted_index_reader_test.cpp
index ae85a05fb72..e73dc642f74 100644
--- a/be/test/olap/rowset/segment_v2/inverted_index_reader_test.cpp
+++ b/be/test/olap/rowset/segment_v2/inverted_index_reader_test.cpp
@@ -3546,4 +3546,106 @@ TEST_F(InvertedIndexReaderTest, UnsupportedDataTypes) {
     test_unsupported_data_types();
 }
 
+// Test InvertedIndexResultBitmap operator|= with NULL handling
+TEST_F(InvertedIndexReaderTest, ResultBitmapOrOperatorNullHandling) {
+    // Test SQL three-valued logic for OR:
+    // - TRUE OR NULL = TRUE (not NULL)
+    // - FALSE OR NULL = NULL
+    // - NULL OR NULL = NULL
+
+    // Case 1: TRUE OR NULL = TRUE
+    {
+        auto data_a = std::make_shared<roaring::Roaring>();
+        auto null_a = std::make_shared<roaring::Roaring>();
+        data_a->add(1); // row 1 is TRUE
+        // row 2 is FALSE (not in data_a, not in null_a)
+
+        auto data_b = std::make_shared<roaring::Roaring>();
+        auto null_b = std::make_shared<roaring::Roaring>();
+        null_b->add(1); // row 1 is NULL
+        data_b->add(2); // row 2 is TRUE
+
+        InvertedIndexResultBitmap bitmap_a(data_a, null_a);
+        InvertedIndexResultBitmap bitmap_b(data_b, null_b);
+
+        bitmap_a |= bitmap_b;
+
+        // Result: row 1 should be TRUE (TRUE OR NULL = TRUE)
+        //         row 2 should be TRUE (FALSE OR TRUE = TRUE)
+        EXPECT_TRUE(bitmap_a.get_data_bitmap()->contains(1));
+        EXPECT_TRUE(bitmap_a.get_data_bitmap()->contains(2));
+        EXPECT_FALSE(bitmap_a.get_null_bitmap()->contains(1)); // row 1 is not 
NULL
+        EXPECT_FALSE(bitmap_a.get_null_bitmap()->contains(2)); // row 2 is not 
NULL
+    }
+
+    // Case 2: FALSE OR NULL = NULL
+    {
+        auto data_a = std::make_shared<roaring::Roaring>();
+        auto null_a = std::make_shared<roaring::Roaring>();
+        // row 0 is FALSE
+
+        auto data_b = std::make_shared<roaring::Roaring>();
+        auto null_b = std::make_shared<roaring::Roaring>();
+        null_b->add(0); // row 0 is NULL
+
+        InvertedIndexResultBitmap bitmap_a(data_a, null_a);
+        InvertedIndexResultBitmap bitmap_b(data_b, null_b);
+
+        bitmap_a |= bitmap_b;
+
+        // Result: row 0 should be NULL (FALSE OR NULL = NULL)
+        EXPECT_FALSE(bitmap_a.get_data_bitmap()->contains(0));
+        EXPECT_TRUE(bitmap_a.get_null_bitmap()->contains(0));
+    }
+
+    // Case 3: NULL OR NULL = NULL
+    {
+        auto data_a = std::make_shared<roaring::Roaring>();
+        auto null_a = std::make_shared<roaring::Roaring>();
+        null_a->add(5); // row 5 is NULL
+
+        auto data_b = std::make_shared<roaring::Roaring>();
+        auto null_b = std::make_shared<roaring::Roaring>();
+        null_b->add(5); // row 5 is NULL
+
+        InvertedIndexResultBitmap bitmap_a(data_a, null_a);
+        InvertedIndexResultBitmap bitmap_b(data_b, null_b);
+
+        bitmap_a |= bitmap_b;
+
+        // Result: row 5 should be NULL (NULL OR NULL = NULL)
+        EXPECT_FALSE(bitmap_a.get_data_bitmap()->contains(5));
+        EXPECT_TRUE(bitmap_a.get_null_bitmap()->contains(5));
+    }
+
+    // Case 4: Complex scenario - cross-field OR with NULL
+    // Simulating: field1="value" OR field2="value" where field2 has NULL
+    {
+        auto data_field1 = std::make_shared<roaring::Roaring>();
+        auto null_field1 = std::make_shared<roaring::Roaring>();
+        data_field1->addRange(0, 15); // rows 0-14 match field1
+
+        auto data_field2 = std::make_shared<roaring::Roaring>();
+        auto null_field2 = std::make_shared<roaring::Roaring>();
+        null_field2->addRange(0, 15); // rows 0-14 have NULL in field2
+        data_field2->add(20);         // row 20 matches field2
+
+        InvertedIndexResultBitmap bitmap_field1(data_field1, null_field1);
+        InvertedIndexResultBitmap bitmap_field2(data_field2, null_field2);
+
+        bitmap_field1 |= bitmap_field2;
+
+        // Result: rows 0-14 should be TRUE (TRUE OR NULL = TRUE)
+        //         row 20 should be TRUE
+        for (uint32_t i = 0; i < 15; ++i) {
+            EXPECT_TRUE(bitmap_field1.get_data_bitmap()->contains(i))
+                    << "Row " << i << " should be TRUE";
+            EXPECT_FALSE(bitmap_field1.get_null_bitmap()->contains(i))
+                    << "Row " << i << " should not be NULL";
+        }
+        EXPECT_TRUE(bitmap_field1.get_data_bitmap()->contains(20));
+        EXPECT_FALSE(bitmap_field1.get_null_bitmap()->contains(20));
+    }
+}
+
 } // namespace doris::segment_v2
diff --git 
a/regression-test/suites/inverted_index_p0/test_match_or_null_semantics.groovy 
b/regression-test/suites/inverted_index_p0/test_match_or_null_semantics.groovy
new file mode 100644
index 00000000000..f869490bd35
--- /dev/null
+++ 
b/regression-test/suites/inverted_index_p0/test_match_or_null_semantics.groovy
@@ -0,0 +1,160 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_match_or_null_semantics") {
+    // This test verifies the fix for the bug in 
InvertedIndexResultBitmap::operator|=()
+    // in inverted_index_reader.h where NULL bitmaps were incorrectly combined 
using OR
+    // instead of AND for MATCH syntax queries with 
enable_common_expr_pushdown=true
+    //
+    // Bug location: be/src/olap/rowset/segment_v2/inverted_index_reader.h:138
+    // The bug caused rows with (TRUE OR NULL) to be incorrectly filtered out
+
+    def tableName = "test_match_or_null_table"
+
+    sql "DROP TABLE IF EXISTS ${tableName}"
+
+    sql """
+        CREATE TABLE ${tableName} (
+            id INT,
+            title TEXT,
+            content TEXT,
+            INDEX idx_title (title) USING INVERTED PROPERTIES("parser" = 
"english"),
+            INDEX idx_content (content) USING INVERTED PROPERTIES("parser" = 
"english")
+        ) ENGINE=OLAP
+        DUPLICATE KEY(id)
+        DISTRIBUTED BY HASH(id) BUCKETS 3
+        PROPERTIES (
+            "replication_allocation" = "tag.location.default: 1"
+        )
+    """
+
+    // Insert test data
+    // Rows 1-15: title matches "Philosophy", content is NULL (TRUE OR NULL = 
TRUE)
+    // Row 16: title doesn't match, content matches "Disney+ Hotstar" (FALSE 
OR TRUE = TRUE)
+    // Rows 17-20: title doesn't match, content is NULL (FALSE OR NULL = NULL, 
excluded)
+    sql """ INSERT INTO ${tableName} VALUES
+        (1, 'Philosophy 101', NULL),
+        (2, 'Ancient Philosophy', NULL),
+        (3, 'Modern Philosophy', NULL),
+        (4, 'Eastern Philosophy', NULL),
+        (5, 'Western Philosophy', NULL),
+        (6, 'Philosophy of Mind', NULL),
+        (7, 'Philosophy of Science', NULL),
+        (8, 'Philosophy Basics', NULL),
+        (9, 'Greek Philosophy', NULL),
+        (10, 'Medieval Philosophy', NULL),
+        (11, 'Renaissance Philosophy', NULL),
+        (12, 'Contemporary Philosophy', NULL),
+        (13, 'Philosophy and Logic', NULL),
+        (14, 'Philosophy Fundamentals', NULL),
+        (15, 'Introduction to Philosophy', NULL),
+        (16, 'Science Today', 'Disney+ Hotstar streaming service'),
+        (17, 'Random Article', NULL),
+        (18, 'Another Topic', NULL),
+        (19, 'Sample Entry', NULL),
+        (20, 'Test Data', NULL)
+    """
+
+    // Enable pushdown to trigger the bug in 
InvertedIndexResultBitmap::operator|=
+    sql "SET enable_common_expr_pushdown = true"
+
+    // Test 1: Core bug scenario - cross-field OR with NULL
+    // Before fix: returned 1 row (only row 16, lost 15 rows with NULL content)
+    // After fix: returns 16 rows (rows 1-16)
+    def test1 = sql """
+        SELECT COUNT(*) FROM ${tableName}
+        WHERE title MATCH_ALL 'Philosophy' OR content MATCH_ALL 'Disney+ 
Hotstar'
+    """
+
+    assertEquals(16, test1[0][0], "MATCH should return 16 rows (15 with title 
match + 1 with content match)")
+    logger.info("Test 1 PASSED: Cross-field OR with NULL - 16 rows returned")
+
+    // Test 2: Verify the 15 critical rows with NULL content are included
+    def test2 = sql """
+        SELECT COUNT(*) FROM ${tableName}
+        WHERE title MATCH_ALL 'Philosophy'
+          AND content IS NULL
+    """
+
+    assertEquals(15, test2[0][0], "Should have 15 rows with title match and 
NULL content")
+    logger.info("Test 2 PASSED: 15 rows with NULL content correctly exist")
+
+    // Test 3: Verify these 15 rows are included in the OR query
+    def test3 = sql """
+        SELECT COUNT(*) FROM ${tableName}
+        WHERE title MATCH_ALL 'Philosophy'
+          AND content IS NULL
+          AND (title MATCH_ALL 'Philosophy' OR content MATCH_ALL 'Disney+ 
Hotstar')
+    """
+
+    assertEquals(15, test3[0][0], "The 15 NULL content rows should be included 
(TRUE OR NULL = TRUE)")
+    logger.info("Test 3 PASSED: TRUE OR NULL correctly returns TRUE")
+
+    // Test 4: Three-way OR with NULL
+    def test4 = sql """
+        SELECT COUNT(*) FROM ${tableName}
+        WHERE title MATCH_ALL 'Philosophy'
+           OR content MATCH_ALL 'Disney+ Hotstar'
+           OR content MATCH_ALL 'streaming'
+    """
+
+    assertEquals(16, test4[0][0], "Three-way OR should also return 16 rows")
+    logger.info("Test 4 PASSED: Three-way OR with NULL")
+
+    // Test 5: Nested OR with NULL
+    def test5 = sql """
+        SELECT COUNT(*) FROM ${tableName}
+        WHERE title MATCH_ALL 'Philosophy'
+           OR (content MATCH_ALL 'Disney+ Hotstar' OR content MATCH_ALL 
'streaming')
+    """
+
+    assertEquals(16, test5[0][0], "Nested OR should return 16 rows")
+    logger.info("Test 5 PASSED: Nested OR with NULL")
+
+    // Test 6: OR within AND with NULL
+    def test6 = sql """
+        SELECT COUNT(*) FROM ${tableName}
+        WHERE id <= 10
+          AND (title MATCH_ALL 'Philosophy' OR content MATCH_ALL 'Disney')
+    """
+
+    assertEquals(10, test6[0][0], "OR within AND should return 10 rows (rows 
1-10 all have title match)")
+    logger.info("Test 6 PASSED: OR within AND with NULL")
+
+    // Test 7: NOT with OR and NULL (SQL three-valued logic)
+    // Rows 1-16: OR = TRUE -> NOT TRUE = FALSE (excluded)
+    // Rows 17-20: OR = NULL -> NOT NULL = NULL (excluded)
+    def test7 = sql """
+        SELECT COUNT(*) FROM ${tableName}
+        WHERE NOT (title MATCH_ALL 'Philosophy' OR content MATCH_ALL 'Disney+ 
Hotstar')
+    """
+
+    assertEquals(0, test7[0][0], "NOT OR should exclude all rows due to NULL 
semantics")
+    logger.info("Test 7 PASSED: NOT OR with NULL correctly excludes all rows")
+
+    // Test 8: Verify behavior without pushdown (should still work correctly)
+    sql "SET enable_common_expr_pushdown = false"
+
+    def test8 = sql """
+        SELECT COUNT(*) FROM ${tableName}
+        WHERE title MATCH_ALL 'Philosophy' OR content MATCH_ALL 'Disney+ 
Hotstar'
+    """
+
+    assertEquals(16, test8[0][0], "Should return 16 rows even without 
pushdown")
+    logger.info("Test 8 PASSED: Query works correctly without pushdown")
+    logger.info("All tests PASSED: MATCH OR NULL semantics work correctly")
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to