This is an automated email from the ASF dual-hosted git repository.

jackie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new 0e99bceffea Handle text search options param in TEXT_MATCH in text 
filter optimizer. (#16203)
0e99bceffea is described below

commit 0e99bceffea954a22b53b11c9545283bf584bf4b
Author: RAGHVENDRA KUMAR YADAV <raghavmn...@gmail.com>
AuthorDate: Thu Jun 26 21:22:27 2025 -0700

    Handle text search options param in TEXT_MATCH in text filter optimizer. 
(#16203)
---
 .../optimizer/filter/TextMatchFilterOptimizer.java | 35 +++++++++++---
 .../core/query/optimizer/QueryOptimizerTest.java   | 53 ++++++++++++++++++++++
 .../pinot/queries/TextSearchQueriesTest.java       | 47 +++++++++++++++++--
 3 files changed, 124 insertions(+), 11 deletions(-)

diff --git 
a/pinot-core/src/main/java/org/apache/pinot/core/query/optimizer/filter/TextMatchFilterOptimizer.java
 
b/pinot-core/src/main/java/org/apache/pinot/core/query/optimizer/filter/TextMatchFilterOptimizer.java
index 87d8f7a0408..a3dde421687 100644
--- 
a/pinot-core/src/main/java/org/apache/pinot/core/query/optimizer/filter/TextMatchFilterOptimizer.java
+++ 
b/pinot-core/src/main/java/org/apache/pinot/core/query/optimizer/filter/TextMatchFilterOptimizer.java
@@ -37,6 +37,7 @@ import org.apache.pinot.sql.FilterKind;
  *
  * NOTE: This optimizer follows the {@link FlattenAndOrFilterOptimizer}, so 
all the AND/OR filters are already
  *       flattened.
+ * NOTE: This optimizer does not optimize TEXT_MATCH expressions that have 
options (third parameter)
  */
 public class TextMatchFilterOptimizer implements FilterOptimizer {
   private static final String SPACE = " ";
@@ -75,8 +76,13 @@ public class TextMatchFilterOptimizer implements 
FilterOptimizer {
         String childOperator = childFunction.getOperator();
         if (childOperator.equals(FilterKind.TEXT_MATCH.name())) {
           List<Expression> operands = childFunction.getOperands();
-          Expression identifier = operands.get(0);
-          textMatchMap.computeIfAbsent(identifier, k -> new 
ArrayList<>()).add(child);
+          // Don't optimize TEXT_MATCH expressions that have options (third 
parameter)
+          if (hasTextMatchOptions(operands)) {
+            newChildren.add(child);
+          } else {
+            Expression identifier = operands.get(0);
+            textMatchMap.computeIfAbsent(identifier, k -> new 
ArrayList<>()).add(child);
+          }
         } else if (childOperator.equals(FilterKind.NOT.name())) {
           assert childFunction.getOperands().size() == 1;
           Expression operand = childFunction.getOperands().get(0);
@@ -87,8 +93,13 @@ public class TextMatchFilterOptimizer implements 
FilterOptimizer {
           }
           if 
(notChildFunction.getOperator().equals(FilterKind.TEXT_MATCH.name())) {
             List<Expression> operands = notChildFunction.getOperands();
-            Expression identifier = operands.get(0);
-            textMatchMap.computeIfAbsent(identifier, k -> new 
ArrayList<>()).add(child);
+            // Don't optimize TEXT_MATCH expressions that have options (third 
parameter)
+            if (hasTextMatchOptions(operands)) {
+              newChildren.add(child);
+            } else {
+              Expression identifier = operands.get(0);
+              textMatchMap.computeIfAbsent(identifier, k -> new 
ArrayList<>()).add(child);
+            }
             continue;
           }
           newChildren.add(child);
@@ -118,10 +129,16 @@ public class TextMatchFilterOptimizer implements 
FilterOptimizer {
       Map<Expression, List<Expression>> textMatchMap) {
     // for each key in textMatchMap, build a TEXT_MATCH expression (merge list 
of filters)
     for (Map.Entry<Expression, List<Expression>> entry : 
textMatchMap.entrySet()) {
+      List<Expression> expressions = entry.getValue();
+      // If only one TEXT_MATCH for this column, add it as-is
+      if (expressions.size() == 1) {
+        newChildren.add(expressions.get(0));
+        continue;
+      }
       // special case: if all expressions are NOT, then wrap the merged text 
match inside a NOT. otherwise, push the
       // NOT down into the text match expression
       boolean allNot = true;
-      for (Expression expression : entry.getValue()) {
+      for (Expression expression : expressions) {
         if 
(!expression.getFunctionCall().getOperator().equals(FilterKind.NOT.name())) {
           allNot = false;
           break;
@@ -130,13 +147,13 @@ public class TextMatchFilterOptimizer implements 
FilterOptimizer {
 
       List<String> literals = new ArrayList<>();
       if (allNot) {
-        for (Expression expression : entry.getValue()) {
+        for (Expression expression : expressions) {
           Expression operand = 
expression.getFunctionCall().getOperands().get(0);
           literals.add(
               
wrapWithParentheses(operand.getFunctionCall().getOperands().get(1).getLiteral().getStringValue()));
         }
       } else {
-        for (Expression expression : entry.getValue()) {
+        for (Expression expression : expressions) {
           if 
(expression.getFunctionCall().getOperator().equals(FilterKind.NOT.name())) {
             Expression operand = 
expression.getFunctionCall().getOperands().get(0);
 
@@ -190,4 +207,8 @@ public class TextMatchFilterOptimizer implements 
FilterOptimizer {
   private String wrapWithParentheses(String expression) {
     return "(" + expression + ")";
   }
+
+  private boolean hasTextMatchOptions(List<Expression> operands) {
+    return operands.size() > 2;
+  }
 }
diff --git 
a/pinot-core/src/test/java/org/apache/pinot/core/query/optimizer/QueryOptimizerTest.java
 
b/pinot-core/src/test/java/org/apache/pinot/core/query/optimizer/QueryOptimizerTest.java
index 9637a9dea21..9ab3ede7505 100644
--- 
a/pinot-core/src/test/java/org/apache/pinot/core/query/optimizer/QueryOptimizerTest.java
+++ 
b/pinot-core/src/test/java/org/apache/pinot/core/query/optimizer/QueryOptimizerTest.java
@@ -310,6 +310,59 @@ public class QueryOptimizerTest {
         "select * from testTable where text_match(string, 'foo') AND 
text_match(string, 'bar OR baz')",
         "select * from testTable where text_match(string, '(foo) AND (bar OR 
baz)')"
     );
+
+    // Test cases for TEXT_MATCH with mixed options scenarios
+    // Case 1: Same column with multiple text match - some with options, some 
without options
+    // Text match without options should merge, text match with options should 
remain separate
+    testQuery("SELECT * FROM testTable WHERE TEXT_MATCH(string, 'foo') AND 
TEXT_MATCH(string, 'bar') AND "
+            + "TEXT_MATCH(string, 'baz', 'parser=CLASSIC')",
+        "SELECT * FROM testTable WHERE TEXT_MATCH(string, '(foo) AND (bar)') 
AND "
+            + "TEXT_MATCH(string, 'baz', 'parser=CLASSIC')");
+
+    testCannotOptimizeQuery("SELECT * FROM testTable WHERE TEXT_MATCH(string, 
'foo') AND "
+        + "TEXT_MATCH(string, 'bar', 'parser=STANDARD') AND TEXT_MATCH(string, 
'baz', 'parser=STANDARD')");
+
+    testQuery(
+        "SELECT * FROM testTable WHERE TEXT_MATCH(string, 'foo', 
'parser=CLASSIC') AND TEXT_MATCH(string, 'bar') AND "
+            + "TEXT_MATCH(string, 'baz')",
+        "SELECT * FROM testTable WHERE TEXT_MATCH(string, 'foo', 
'parser=CLASSIC') AND "
+            + "TEXT_MATCH(string, '(bar) AND (baz)')");
+
+    // Case 2: Different columns with or without text match - should not merge
+    testQuery("SELECT * FROM testTable WHERE TEXT_MATCH(string1, 'foo') AND 
TEXT_MATCH(string1, 'bar') AND "
+            + "TEXT_MATCH(string2, 'baz')",
+        "SELECT * FROM testTable WHERE TEXT_MATCH(string1, '(foo) AND (bar)') 
AND TEXT_MATCH(string2, 'baz')");
+
+    testQuery("SELECT * FROM testTable WHERE TEXT_MATCH(string1, 'foo') AND 
TEXT_MATCH(string2, 'bar') AND "
+            + "TEXT_MATCH(string2, 'baz')",
+        "SELECT * FROM testTable WHERE TEXT_MATCH(string1, 'foo') AND 
TEXT_MATCH(string2, '(bar) AND (baz)')");
+
+    testCannotOptimizeQuery(
+        "SELECT * FROM testTable WHERE TEXT_MATCH(string1, 'foo', 
'parser=CLASSIC') AND TEXT_MATCH(string1, 'bar') AND "
+            + "TEXT_MATCH(string2, 'baz', 'parser=STANDARD')");
+
+    // Case 3: Complex scenarios with OR operators
+    testQuery("SELECT * FROM testTable WHERE TEXT_MATCH(string, 'foo') OR 
TEXT_MATCH(string, 'bar') OR "
+            + "TEXT_MATCH(string, 'baz', 'parser=CLASSIC')",
+        "SELECT * FROM testTable WHERE TEXT_MATCH(string, '(foo) OR (bar)') OR 
"
+            + "TEXT_MATCH(string, 'baz', 'parser=CLASSIC')");
+
+    testQuery("SELECT * FROM testTable WHERE TEXT_MATCH(string1, 'foo') AND 
TEXT_MATCH(string1, 'bar') OR "
+            + "TEXT_MATCH(string2, 'baz') AND TEXT_MATCH(string2, 'qux')",
+        "SELECT * FROM testTable WHERE TEXT_MATCH(string1, '(foo) AND (bar)') 
OR "
+            + "TEXT_MATCH(string2, '(baz) AND (qux)')");
+
+    // Case 4: Mixed scenarios with NOT operators
+    testQuery("SELECT * FROM testTable WHERE TEXT_MATCH(string, 'foo') AND NOT 
TEXT_MATCH(string, 'bar') AND "
+            + "TEXT_MATCH(string, 'baz', 'parser=CLASSIC')",
+        "SELECT * FROM testTable WHERE TEXT_MATCH(string, '(foo) AND NOT 
(bar)') AND "
+            + "TEXT_MATCH(string, 'baz', 'parser=CLASSIC')");
+
+    testQuery("SELECT * FROM testTable WHERE NOT TEXT_MATCH(string, 'foo') AND 
TEXT_MATCH(string, 'bar') AND "
+            + "TEXT_MATCH(string, 'baz', 'parser=STANDARD')",
+        "SELECT * FROM testTable WHERE TEXT_MATCH(string, 'NOT (foo) AND 
(bar)') AND "
+            + "TEXT_MATCH(string, 'baz', 'parser=STANDARD')");
+
     testCannotOptimizeQuery("SELECT * FROM testTable WHERE TEXT_MATCH(string1, 
'foo') OR TEXT_MATCH(string2, 'bar')");
     testCannotOptimizeQuery(
         "SELECT * FROM testTable WHERE int = 1 AND TEXT_MATCH(string, 'foo') 
OR TEXT_MATCH(string, 'bar')");
diff --git 
a/pinot-core/src/test/java/org/apache/pinot/queries/TextSearchQueriesTest.java 
b/pinot-core/src/test/java/org/apache/pinot/queries/TextSearchQueriesTest.java
index d55409fe032..0852bba44ca 100644
--- 
a/pinot-core/src/test/java/org/apache/pinot/queries/TextSearchQueriesTest.java
+++ 
b/pinot-core/src/test/java/org/apache/pinot/queries/TextSearchQueriesTest.java
@@ -48,6 +48,8 @@ import org.apache.lucene.search.Query;
 import org.apache.lucene.search.SearcherManager;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.FSDirectory;
+import org.apache.pinot.common.response.broker.BrokerResponseNative;
+import org.apache.pinot.common.response.broker.QueryProcessingException;
 import org.apache.pinot.common.response.broker.ResultTable;
 import org.apache.pinot.common.utils.DataSchema;
 import org.apache.pinot.common.utils.DataSchema.ColumnDataType;
@@ -75,10 +77,7 @@ import org.testng.annotations.AfterClass;
 import org.testng.annotations.BeforeClass;
 import org.testng.annotations.Test;
 
-import static org.testng.Assert.assertEquals;
-import static org.testng.Assert.assertNotEquals;
-import static org.testng.Assert.assertNotNull;
-import static org.testng.Assert.assertTrue;
+import static org.testng.Assert.*;
 
 
 /**
@@ -2249,4 +2248,44 @@ public class TextSearchQueriesTest extends 
BaseQueriesTest {
     testTextSearchSelectQueryHelper(queryDifferentParsers, 
expectedDifferentColumnsAnd.size(), false,
         expectedDifferentColumnsAnd);
   }
+
+  @Test
+  public void testTextFilterOptimizerWithWildcardsSameOptions()
+      throws Exception {
+    // Test that optimizer works when all TEXT_MATCH expressions have the same 
options with wildcards
+    String query =
+        "SELECT INT_COL, SKILLS_TEXT_COL FROM " + TABLE_NAME + " WHERE " + 
"TEXT_MATCH(" + SKILLS_TEXT_COL_NAME
+            + ", '*CUDA*', 'parser=CLASSIC,allowLeadingWildcard=true') AND " + 
"TEXT_MATCH(" + SKILLS_TEXT_COL_NAME
+            + ", '*Python*', 'parser=CLASSIC,allowLeadingWildcard=true') LIMIT 
50000";
+
+    BrokerResponseNative brokerResponse = 
getBrokerResponseForOptimizedQuery(query, getTableConfig(), SCHEMA);
+    assertTrue(brokerResponse.getNumDocsScanned() > 0, "Query should scan some 
documents");
+  }
+
+  @Test
+  public void testTextFilterOptimizerWithWildcardsDifferentOptions()
+      throws Exception {
+    // This should pass: trailing wildcard is allowed by default
+    String queryTrailing =
+        "SELECT INT_COL, SKILLS_TEXT_COL FROM " + TABLE_NAME + " WHERE " + 
"TEXT_MATCH(" + SKILLS_TEXT_COL_NAME
+            + ", '*CUDA*', 'parser=CLASSIC,allowLeadingWildcard=true') AND " + 
"TEXT_MATCH(" + SKILLS_TEXT_COL_NAME
+            + ", 'Python*', 'parser=STANDARD') LIMIT 50000";
+    BrokerResponseNative responseTrailing = 
getBrokerResponseForOptimizedQuery(queryTrailing, getTableConfig(), SCHEMA);
+    assertTrue(responseTrailing.getNumDocsScanned() > 0, "Trailing wildcard 
should scan some documents");
+
+    // This should fail: leading wildcard is NOT allowed with parser=STANDARD
+    // The optimizer should NOT merge these expressions because they have 
options
+    String queryLeading =
+        "SELECT INT_COL, SKILLS_TEXT_COL FROM " + TABLE_NAME + " WHERE " + 
"TEXT_MATCH(" + SKILLS_TEXT_COL_NAME
+            + ", '*CUDA*', 'parser=CLASSIC,allowLeadingWildcard=true') AND " + 
"TEXT_MATCH(" + SKILLS_TEXT_COL_NAME
+            + ", '*Python*', 'parser=STANDARD') LIMIT 50000";
+
+    BrokerResponseNative responseLeading = 
getBrokerResponseForOptimizedQuery(queryLeading, getTableConfig(), SCHEMA);
+    List<QueryProcessingException> exceptions = 
responseLeading.getExceptions();
+    assertFalse(exceptions.isEmpty(), "Expected error for leading wildcard 
with parser=STANDARD");
+    String errorMsg = exceptions.toString();
+    assertTrue(errorMsg.contains("Leading wildcard is not allowed") || 
errorMsg.contains(
+            "Failed while searching the text index"),
+        "Expected error related to leading wildcard or text search failure, 
got: " + errorMsg);
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org
For additional commands, e-mail: commits-h...@pinot.apache.org

Reply via email to