Copilot commented on code in PR #56699: URL: https://github.com/apache/doris/pull/56699#discussion_r2400689558
########## regression-test/suites/inverted_index_p0/test_match_or_null_semantics.groovy: ########## @@ -0,0 +1,160 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_match_or_null_semantics") { + // This test verifies the fix for the bug in InvertedIndexResultBitmap::operator|=() + // in inverted_index_reader.h where NULL bitmaps were incorrectly combined using OR + // instead of AND for MATCH syntax queries with enable_common_expr_pushdown=true + // + // Bug location: be/src/olap/rowset/segment_v2/inverted_index_reader.h:138 + // The bug caused rows with (TRUE OR NULL) to be incorrectly filtered out + + def tableName = "test_match_or_null_table" + + sql "DROP TABLE IF EXISTS ${tableName}" + + sql """ + CREATE TABLE ${tableName} ( + id INT, + title TEXT, + content TEXT, + INDEX idx_title (title) USING INVERTED PROPERTIES("parser" = "english"), + INDEX idx_content (content) USING INVERTED PROPERTIES("parser" = "english") + ) ENGINE=OLAP + DUPLICATE KEY(id) + DISTRIBUTED BY HASH(id) BUCKETS 3 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + ) + """ + + // Insert test data + // Rows 1-15: title matches "Philosophy", content is NULL (TRUE OR NULL = TRUE) + // Row 16: title doesn't match, content matches "Disney+ Hotstar" (FALSE OR TRUE = TRUE) + // Rows 17-20: title doesn't match, content is NULL (FALSE OR NULL = NULL, excluded) + sql """ INSERT INTO ${tableName} VALUES + (1, 'Philosophy 101', NULL), + (2, 'Ancient Philosophy', NULL), + (3, 'Modern Philosophy', NULL), + (4, 'Eastern Philosophy', NULL), + (5, 'Western Philosophy', NULL), + (6, 'Philosophy of Mind', NULL), + (7, 'Philosophy of Science', NULL), + (8, 'Philosophy Basics', NULL), + (9, 'Greek Philosophy', NULL), + (10, 'Medieval Philosophy', NULL), + (11, 'Renaissance Philosophy', NULL), + (12, 'Contemporary Philosophy', NULL), + (13, 'Philosophy and Logic', NULL), + (14, 'Philosophy Fundamentals', NULL), + (15, 'Introduction to Philosophy', NULL), + (16, 'Science Today', 'Disney+ Hotstar streaming service'), + (17, 'Random Article', NULL), + (18, 'Another Topic', NULL), + (19, 'Sample Entry', NULL), + (20, 'Test Data', NULL) + """ + + // Enable pushdown to trigger the bug in InvertedIndexResultBitmap::operator|= + sql "SET enable_common_expr_pushdown = true" + + // Test 1: Core bug scenario - cross-field OR with NULL + // Before fix: returned 1 row (only row 16, lost 15 rows with NULL content) + // After fix: returns 16 rows (rows 1-16) + def test1 = sql """ + SELECT COUNT(*) FROM ${tableName} + WHERE title MATCH_ALL 'Philosophy' OR content MATCH_ALL 'Disney+ Hotstar' + """ + + assertEquals(16, test1[0][0], "MATCH should return 16 rows (15 with title match + 1 with content match)") Review Comment: Fixed typo in error message: 'recieve' should be 'receive'. ########## be/test/olap/rowset/segment_v2/inverted_index_reader_test.cpp: ########## @@ -3546,4 +3546,106 @@ TEST_F(InvertedIndexReaderTest, UnsupportedDataTypes) { test_unsupported_data_types(); } +// Test InvertedIndexResultBitmap operator|= with NULL handling +TEST_F(InvertedIndexReaderTest, ResultBitmapOrOperatorNullHandling) { + // Test SQL three-valued logic for OR: + // - TRUE OR NULL = TRUE (not NULL) + // - FALSE OR NULL = NULL + // - NULL OR NULL = NULL + + // Case 1: TRUE OR NULL = TRUE + { + auto data_a = std::make_shared<roaring::Roaring>(); + auto null_a = std::make_shared<roaring::Roaring>(); + data_a->add(1); // row 1 is TRUE + // row 2 is FALSE (not in data_a, not in null_a) + + auto data_b = std::make_shared<roaring::Roaring>(); + auto null_b = std::make_shared<roaring::Roaring>(); + null_b->add(1); // row 1 is NULL + data_b->add(2); // row 2 is TRUE + + InvertedIndexResultBitmap bitmap_a(data_a, null_a); + InvertedIndexResultBitmap bitmap_b(data_b, null_b); + + bitmap_a |= bitmap_b; + + // Result: row 1 should be TRUE (TRUE OR NULL = TRUE) + // row 2 should be TRUE (FALSE OR TRUE = TRUE) + EXPECT_TRUE(bitmap_a.get_data_bitmap()->contains(1)); + EXPECT_TRUE(bitmap_a.get_data_bitmap()->contains(2)); + EXPECT_FALSE(bitmap_a.get_null_bitmap()->contains(1)); // row 1 is not NULL + EXPECT_FALSE(bitmap_a.get_null_bitmap()->contains(2)); // row 2 is not NULL + } + + // Case 2: FALSE OR NULL = NULL + { + auto data_a = std::make_shared<roaring::Roaring>(); + auto null_a = std::make_shared<roaring::Roaring>(); + // row 0 is FALSE + + auto data_b = std::make_shared<roaring::Roaring>(); + auto null_b = std::make_shared<roaring::Roaring>(); + null_b->add(0); // row 0 is NULL + + InvertedIndexResultBitmap bitmap_a(data_a, null_a); + InvertedIndexResultBitmap bitmap_b(data_b, null_b); + + bitmap_a |= bitmap_b; + + // Result: row 0 should be NULL (FALSE OR NULL = NULL) + EXPECT_FALSE(bitmap_a.get_data_bitmap()->contains(0)); + EXPECT_TRUE(bitmap_a.get_null_bitmap()->contains(0)); + } + + // Case 3: NULL OR NULL = NULL + { + auto data_a = std::make_shared<roaring::Roaring>(); + auto null_a = std::make_shared<roaring::Roaring>(); + null_a->add(5); // row 5 is NULL + + auto data_b = std::make_shared<roaring::Roaring>(); + auto null_b = std::make_shared<roaring::Roaring>(); + null_b->add(5); // row 5 is NULL + + InvertedIndexResultBitmap bitmap_a(data_a, null_a); + InvertedIndexResultBitmap bitmap_b(data_b, null_b); + + bitmap_a |= bitmap_b; + + // Result: row 5 should be NULL (NULL OR NULL = NULL) + EXPECT_FALSE(bitmap_a.get_data_bitmap()->contains(5)); + EXPECT_TRUE(bitmap_a.get_null_bitmap()->contains(5)); + } + + // Case 4: Complex scenario - cross-field OR with NULL + // Simulating: field1="value" OR field2="value" where field2 has NULL + { + auto data_field1 = std::make_shared<roaring::Roaring>(); + auto null_field1 = std::make_shared<roaring::Roaring>(); + data_field1->addRange(0, 15); // rows 0-14 match field1 + + auto data_field2 = std::make_shared<roaring::Roaring>(); + auto null_field2 = std::make_shared<roaring::Roaring>(); + null_field2->addRange(0, 15); // rows 0-14 have NULL in field2 + data_field2->add(20); // row 20 matches field2 + + InvertedIndexResultBitmap bitmap_field1(data_field1, null_field1); + InvertedIndexResultBitmap bitmap_field2(data_field2, null_field2); + + bitmap_field1 |= bitmap_field2; + + // Result: rows 0-14 should be TRUE (TRUE OR NULL = TRUE) + // row 20 should be TRUE + for (uint32_t i = 0; i < 15; ++i) { + EXPECT_TRUE(bitmap_field1.get_data_bitmap()->contains(i)) + << "Row " << i << " should be TRUE"; + EXPECT_FALSE(bitmap_field1.get_null_bitmap()->contains(i)) + << "Row " << i << " should not be NULL"; + } Review Comment: The magic number 15 is used without explanation. Consider extracting it to a named constant like `NUM_MATCHING_ROWS` or adding a comment explaining why 15 rows are expected. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
