This is an automated email from the ASF dual-hosted git repository.
airborne pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 6d4e218a2e3 [fix](search) inject MATCH_ALL_DOCS for multi-MUST_NOT
queries in lucene mode (#60891)
6d4e218a2e3 is described below
commit 6d4e218a2e3a86051c2645b4774b358a572c80af
Author: Jack <[email protected]>
AuthorDate: Sun Mar 1 11:07:47 2026 +0800
[fix](search) inject MATCH_ALL_DOCS for multi-MUST_NOT queries in lucene
mode (#60891)
### What problem does this PR solve?
Related PR: #60814
Problem Summary:
In search() lucene mode, when all terms in a boolean query are MUST_NOT
(e.g., `NOT a AND NOT b` or `NOT a NOT b` with default_operator=AND),
the query incorrectly returns all documents instead of returning all
documents EXCEPT those matching the negated terms.
Root cause: Lucene's BooleanQuery with only MUST_NOT clauses matches
nothing (by design). ES handles this by injecting a MatchAllDocsQuery
with SHOULD occur. Doris only handled the single-term MUST_NOT case
but not multi-term all-MUST_NOT queries.
Fix: After `applyLuceneBooleanLogic()`, detect if ALL terms are MUST_NOT
and inject `MATCH_ALL_DOCS(SHOULD)` with `minimum_should_match=1`.
---
.../functions/scalar/SearchDslParser.java | 17 +++++++
.../functions/scalar/SearchDslParserTest.java | 54 ++++++++++++++++++++++
2 files changed, 71 insertions(+)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParser.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParser.java
index 86e850cd7ad..3f5072b46b6 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParser.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParser.java
@@ -2164,6 +2164,23 @@ MATCH_ALL_DOCS, // Matches all documents (used for pure
NOT query rewriting)
// Apply Lucene boolean logic
applyLuceneBooleanLogic(terms);
+ // Check if ALL terms are MUST_NOT (pure negation query).
+ // In Lucene, a BooleanQuery with only MUST_NOT clauses matches
nothing,
+ // so we inject a MATCH_ALL_DOCS(SHOULD) node to ensure proper
semantics:
+ // match all docs EXCEPT those matching any MUST_NOT term.
+ boolean allMustNot = terms.stream().allMatch(t -> t.occur ==
QsOccur.MUST_NOT);
+ if (allMustNot) {
+ QsNode matchAllNode = new QsNode(QsClauseType.MATCH_ALL_DOCS,
(List<QsNode>) null);
+ matchAllNode.setOccur(QsOccur.SHOULD);
+ List<QsNode> children = new ArrayList<>();
+ children.add(matchAllNode);
+ for (TermWithOccur term : terms) {
+ term.node.setOccur(term.occur);
+ children.add(term.node);
+ }
+ return new QsNode(QsClauseType.OCCUR_BOOLEAN, children, 1);
+ }
+
// Determine minimum_should_match
// Only use explicit option at top level; nested clauses use
default logic
Integer minShouldMatch = (nestingLevel == 0) ?
options.getMinimumShouldMatch() : null;
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParserTest.java
b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParserTest.java
index 417f73eb4ed..359f3d86f8a 100644
---
a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParserTest.java
+++
b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParserTest.java
@@ -781,6 +781,60 @@ public class SearchDslParserTest {
Assertions.assertEquals(QsOccur.MUST_NOT, termNode.getOccur());
}
+ @Test
+ public void testLuceneModeMultipleNotTermsInjectMatchAllDocs() {
+ // Test: "NOT a AND NOT b" should inject MATCH_ALL_DOCS(SHOULD) when
ALL terms are MUST_NOT
+ String dsl = "NOT field:a AND NOT field:b";
+ String options = "{\"mode\":\"lucene\"}";
+ QsPlan plan = SearchDslParser.parseDsl(dsl, options);
+
+ Assertions.assertNotNull(plan);
+ Assertions.assertEquals(QsClauseType.OCCUR_BOOLEAN,
plan.getRoot().getType());
+ // 3 children: MATCH_ALL_DOCS(SHOULD) + MUST_NOT(a) + MUST_NOT(b)
+ Assertions.assertEquals(3, plan.getRoot().getChildren().size());
+ Assertions.assertEquals(Integer.valueOf(1),
plan.getRoot().getMinimumShouldMatch());
+
+ QsNode matchAllNode = plan.getRoot().getChildren().get(0);
+ Assertions.assertEquals(QsClauseType.MATCH_ALL_DOCS,
matchAllNode.getType());
+ Assertions.assertEquals(QsOccur.SHOULD, matchAllNode.getOccur());
+
+ for (int i = 1; i < plan.getRoot().getChildren().size(); i++) {
+ Assertions.assertEquals(QsOccur.MUST_NOT,
plan.getRoot().getChildren().get(i).getOccur());
+ }
+ }
+
+ @Test
+ public void testLuceneModeMultipleNotImplicitConjunction() {
+ // Test: "NOT a NOT b" with default_operator=and
+ String dsl = "NOT field:a NOT field:b";
+ String options = "{\"mode\":\"lucene\",\"default_operator\":\"and\"}";
+ QsPlan plan = SearchDslParser.parseDsl(dsl, options);
+
+ Assertions.assertNotNull(plan);
+ Assertions.assertEquals(QsClauseType.OCCUR_BOOLEAN,
plan.getRoot().getType());
+ Assertions.assertEquals(3, plan.getRoot().getChildren().size());
+
+ QsNode matchAllNode = plan.getRoot().getChildren().get(0);
+ Assertions.assertEquals(QsClauseType.MATCH_ALL_DOCS,
matchAllNode.getType());
+ Assertions.assertEquals(QsOccur.SHOULD, matchAllNode.getOccur());
+ }
+
+ @Test
+ public void testLuceneModeNotAllMustNotNoInjection() {
+ // Test: "NOT a AND b" - mixed, should NOT inject MATCH_ALL_DOCS
+ String dsl = "NOT field:a AND field:b";
+ String options = "{\"mode\":\"lucene\"}";
+ QsPlan plan = SearchDslParser.parseDsl(dsl, options);
+
+ Assertions.assertNotNull(plan);
+ Assertions.assertEquals(QsClauseType.OCCUR_BOOLEAN,
plan.getRoot().getType());
+ Assertions.assertEquals(2, plan.getRoot().getChildren().size());
+
+ boolean hasMatchAll = plan.getRoot().getChildren().stream()
+ .anyMatch(c -> c.getType() == QsClauseType.MATCH_ALL_DOCS);
+ Assertions.assertFalse(hasMatchAll, "Mixed MUST/MUST_NOT should not
inject MATCH_ALL_DOCS");
+ }
+
@Test
public void testLuceneModeMinimumShouldMatchExplicit() {
// Test: explicit minimum_should_match=1 keeps SHOULD clauses
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]