This is an automated email from the ASF dual-hosted git repository. jackie pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push: new 0e99bceffea Handle text search options param in TEXT_MATCH in text filter optimizer. (#16203) 0e99bceffea is described below commit 0e99bceffea954a22b53b11c9545283bf584bf4b Author: RAGHVENDRA KUMAR YADAV <raghavmn...@gmail.com> AuthorDate: Thu Jun 26 21:22:27 2025 -0700 Handle text search options param in TEXT_MATCH in text filter optimizer. (#16203) --- .../optimizer/filter/TextMatchFilterOptimizer.java | 35 +++++++++++--- .../core/query/optimizer/QueryOptimizerTest.java | 53 ++++++++++++++++++++++ .../pinot/queries/TextSearchQueriesTest.java | 47 +++++++++++++++++-- 3 files changed, 124 insertions(+), 11 deletions(-) diff --git a/pinot-core/src/main/java/org/apache/pinot/core/query/optimizer/filter/TextMatchFilterOptimizer.java b/pinot-core/src/main/java/org/apache/pinot/core/query/optimizer/filter/TextMatchFilterOptimizer.java index 87d8f7a0408..a3dde421687 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/query/optimizer/filter/TextMatchFilterOptimizer.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/query/optimizer/filter/TextMatchFilterOptimizer.java @@ -37,6 +37,7 @@ import org.apache.pinot.sql.FilterKind; * * NOTE: This optimizer follows the {@link FlattenAndOrFilterOptimizer}, so all the AND/OR filters are already * flattened. + * NOTE: This optimizer does not optimize TEXT_MATCH expressions that have options (third parameter) */ public class TextMatchFilterOptimizer implements FilterOptimizer { private static final String SPACE = " "; @@ -75,8 +76,13 @@ public class TextMatchFilterOptimizer implements FilterOptimizer { String childOperator = childFunction.getOperator(); if (childOperator.equals(FilterKind.TEXT_MATCH.name())) { List<Expression> operands = childFunction.getOperands(); - Expression identifier = operands.get(0); - textMatchMap.computeIfAbsent(identifier, k -> new ArrayList<>()).add(child); + // Don't optimize TEXT_MATCH expressions that have options (third parameter) + if (hasTextMatchOptions(operands)) { + newChildren.add(child); + } else { + Expression identifier = operands.get(0); + textMatchMap.computeIfAbsent(identifier, k -> new ArrayList<>()).add(child); + } } else if (childOperator.equals(FilterKind.NOT.name())) { assert childFunction.getOperands().size() == 1; Expression operand = childFunction.getOperands().get(0); @@ -87,8 +93,13 @@ public class TextMatchFilterOptimizer implements FilterOptimizer { } if (notChildFunction.getOperator().equals(FilterKind.TEXT_MATCH.name())) { List<Expression> operands = notChildFunction.getOperands(); - Expression identifier = operands.get(0); - textMatchMap.computeIfAbsent(identifier, k -> new ArrayList<>()).add(child); + // Don't optimize TEXT_MATCH expressions that have options (third parameter) + if (hasTextMatchOptions(operands)) { + newChildren.add(child); + } else { + Expression identifier = operands.get(0); + textMatchMap.computeIfAbsent(identifier, k -> new ArrayList<>()).add(child); + } continue; } newChildren.add(child); @@ -118,10 +129,16 @@ public class TextMatchFilterOptimizer implements FilterOptimizer { Map<Expression, List<Expression>> textMatchMap) { // for each key in textMatchMap, build a TEXT_MATCH expression (merge list of filters) for (Map.Entry<Expression, List<Expression>> entry : textMatchMap.entrySet()) { + List<Expression> expressions = entry.getValue(); + // If only one TEXT_MATCH for this column, add it as-is + if (expressions.size() == 1) { + newChildren.add(expressions.get(0)); + continue; + } // special case: if all expressions are NOT, then wrap the merged text match inside a NOT. otherwise, push the // NOT down into the text match expression boolean allNot = true; - for (Expression expression : entry.getValue()) { + for (Expression expression : expressions) { if (!expression.getFunctionCall().getOperator().equals(FilterKind.NOT.name())) { allNot = false; break; @@ -130,13 +147,13 @@ public class TextMatchFilterOptimizer implements FilterOptimizer { List<String> literals = new ArrayList<>(); if (allNot) { - for (Expression expression : entry.getValue()) { + for (Expression expression : expressions) { Expression operand = expression.getFunctionCall().getOperands().get(0); literals.add( wrapWithParentheses(operand.getFunctionCall().getOperands().get(1).getLiteral().getStringValue())); } } else { - for (Expression expression : entry.getValue()) { + for (Expression expression : expressions) { if (expression.getFunctionCall().getOperator().equals(FilterKind.NOT.name())) { Expression operand = expression.getFunctionCall().getOperands().get(0); @@ -190,4 +207,8 @@ public class TextMatchFilterOptimizer implements FilterOptimizer { private String wrapWithParentheses(String expression) { return "(" + expression + ")"; } + + private boolean hasTextMatchOptions(List<Expression> operands) { + return operands.size() > 2; + } } diff --git a/pinot-core/src/test/java/org/apache/pinot/core/query/optimizer/QueryOptimizerTest.java b/pinot-core/src/test/java/org/apache/pinot/core/query/optimizer/QueryOptimizerTest.java index 9637a9dea21..9ab3ede7505 100644 --- a/pinot-core/src/test/java/org/apache/pinot/core/query/optimizer/QueryOptimizerTest.java +++ b/pinot-core/src/test/java/org/apache/pinot/core/query/optimizer/QueryOptimizerTest.java @@ -310,6 +310,59 @@ public class QueryOptimizerTest { "select * from testTable where text_match(string, 'foo') AND text_match(string, 'bar OR baz')", "select * from testTable where text_match(string, '(foo) AND (bar OR baz)')" ); + + // Test cases for TEXT_MATCH with mixed options scenarios + // Case 1: Same column with multiple text match - some with options, some without options + // Text match without options should merge, text match with options should remain separate + testQuery("SELECT * FROM testTable WHERE TEXT_MATCH(string, 'foo') AND TEXT_MATCH(string, 'bar') AND " + + "TEXT_MATCH(string, 'baz', 'parser=CLASSIC')", + "SELECT * FROM testTable WHERE TEXT_MATCH(string, '(foo) AND (bar)') AND " + + "TEXT_MATCH(string, 'baz', 'parser=CLASSIC')"); + + testCannotOptimizeQuery("SELECT * FROM testTable WHERE TEXT_MATCH(string, 'foo') AND " + + "TEXT_MATCH(string, 'bar', 'parser=STANDARD') AND TEXT_MATCH(string, 'baz', 'parser=STANDARD')"); + + testQuery( + "SELECT * FROM testTable WHERE TEXT_MATCH(string, 'foo', 'parser=CLASSIC') AND TEXT_MATCH(string, 'bar') AND " + + "TEXT_MATCH(string, 'baz')", + "SELECT * FROM testTable WHERE TEXT_MATCH(string, 'foo', 'parser=CLASSIC') AND " + + "TEXT_MATCH(string, '(bar) AND (baz)')"); + + // Case 2: Different columns with or without text match - should not merge + testQuery("SELECT * FROM testTable WHERE TEXT_MATCH(string1, 'foo') AND TEXT_MATCH(string1, 'bar') AND " + + "TEXT_MATCH(string2, 'baz')", + "SELECT * FROM testTable WHERE TEXT_MATCH(string1, '(foo) AND (bar)') AND TEXT_MATCH(string2, 'baz')"); + + testQuery("SELECT * FROM testTable WHERE TEXT_MATCH(string1, 'foo') AND TEXT_MATCH(string2, 'bar') AND " + + "TEXT_MATCH(string2, 'baz')", + "SELECT * FROM testTable WHERE TEXT_MATCH(string1, 'foo') AND TEXT_MATCH(string2, '(bar) AND (baz)')"); + + testCannotOptimizeQuery( + "SELECT * FROM testTable WHERE TEXT_MATCH(string1, 'foo', 'parser=CLASSIC') AND TEXT_MATCH(string1, 'bar') AND " + + "TEXT_MATCH(string2, 'baz', 'parser=STANDARD')"); + + // Case 3: Complex scenarios with OR operators + testQuery("SELECT * FROM testTable WHERE TEXT_MATCH(string, 'foo') OR TEXT_MATCH(string, 'bar') OR " + + "TEXT_MATCH(string, 'baz', 'parser=CLASSIC')", + "SELECT * FROM testTable WHERE TEXT_MATCH(string, '(foo) OR (bar)') OR " + + "TEXT_MATCH(string, 'baz', 'parser=CLASSIC')"); + + testQuery("SELECT * FROM testTable WHERE TEXT_MATCH(string1, 'foo') AND TEXT_MATCH(string1, 'bar') OR " + + "TEXT_MATCH(string2, 'baz') AND TEXT_MATCH(string2, 'qux')", + "SELECT * FROM testTable WHERE TEXT_MATCH(string1, '(foo) AND (bar)') OR " + + "TEXT_MATCH(string2, '(baz) AND (qux)')"); + + // Case 4: Mixed scenarios with NOT operators + testQuery("SELECT * FROM testTable WHERE TEXT_MATCH(string, 'foo') AND NOT TEXT_MATCH(string, 'bar') AND " + + "TEXT_MATCH(string, 'baz', 'parser=CLASSIC')", + "SELECT * FROM testTable WHERE TEXT_MATCH(string, '(foo) AND NOT (bar)') AND " + + "TEXT_MATCH(string, 'baz', 'parser=CLASSIC')"); + + testQuery("SELECT * FROM testTable WHERE NOT TEXT_MATCH(string, 'foo') AND TEXT_MATCH(string, 'bar') AND " + + "TEXT_MATCH(string, 'baz', 'parser=STANDARD')", + "SELECT * FROM testTable WHERE TEXT_MATCH(string, 'NOT (foo) AND (bar)') AND " + + "TEXT_MATCH(string, 'baz', 'parser=STANDARD')"); + testCannotOptimizeQuery("SELECT * FROM testTable WHERE TEXT_MATCH(string1, 'foo') OR TEXT_MATCH(string2, 'bar')"); testCannotOptimizeQuery( "SELECT * FROM testTable WHERE int = 1 AND TEXT_MATCH(string, 'foo') OR TEXT_MATCH(string, 'bar')"); diff --git a/pinot-core/src/test/java/org/apache/pinot/queries/TextSearchQueriesTest.java b/pinot-core/src/test/java/org/apache/pinot/queries/TextSearchQueriesTest.java index d55409fe032..0852bba44ca 100644 --- a/pinot-core/src/test/java/org/apache/pinot/queries/TextSearchQueriesTest.java +++ b/pinot-core/src/test/java/org/apache/pinot/queries/TextSearchQueriesTest.java @@ -48,6 +48,8 @@ import org.apache.lucene.search.Query; import org.apache.lucene.search.SearcherManager; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; +import org.apache.pinot.common.response.broker.BrokerResponseNative; +import org.apache.pinot.common.response.broker.QueryProcessingException; import org.apache.pinot.common.response.broker.ResultTable; import org.apache.pinot.common.utils.DataSchema; import org.apache.pinot.common.utils.DataSchema.ColumnDataType; @@ -75,10 +77,7 @@ import org.testng.annotations.AfterClass; import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; -import static org.testng.Assert.assertEquals; -import static org.testng.Assert.assertNotEquals; -import static org.testng.Assert.assertNotNull; -import static org.testng.Assert.assertTrue; +import static org.testng.Assert.*; /** @@ -2249,4 +2248,44 @@ public class TextSearchQueriesTest extends BaseQueriesTest { testTextSearchSelectQueryHelper(queryDifferentParsers, expectedDifferentColumnsAnd.size(), false, expectedDifferentColumnsAnd); } + + @Test + public void testTextFilterOptimizerWithWildcardsSameOptions() + throws Exception { + // Test that optimizer works when all TEXT_MATCH expressions have the same options with wildcards + String query = + "SELECT INT_COL, SKILLS_TEXT_COL FROM " + TABLE_NAME + " WHERE " + "TEXT_MATCH(" + SKILLS_TEXT_COL_NAME + + ", '*CUDA*', 'parser=CLASSIC,allowLeadingWildcard=true') AND " + "TEXT_MATCH(" + SKILLS_TEXT_COL_NAME + + ", '*Python*', 'parser=CLASSIC,allowLeadingWildcard=true') LIMIT 50000"; + + BrokerResponseNative brokerResponse = getBrokerResponseForOptimizedQuery(query, getTableConfig(), SCHEMA); + assertTrue(brokerResponse.getNumDocsScanned() > 0, "Query should scan some documents"); + } + + @Test + public void testTextFilterOptimizerWithWildcardsDifferentOptions() + throws Exception { + // This should pass: trailing wildcard is allowed by default + String queryTrailing = + "SELECT INT_COL, SKILLS_TEXT_COL FROM " + TABLE_NAME + " WHERE " + "TEXT_MATCH(" + SKILLS_TEXT_COL_NAME + + ", '*CUDA*', 'parser=CLASSIC,allowLeadingWildcard=true') AND " + "TEXT_MATCH(" + SKILLS_TEXT_COL_NAME + + ", 'Python*', 'parser=STANDARD') LIMIT 50000"; + BrokerResponseNative responseTrailing = getBrokerResponseForOptimizedQuery(queryTrailing, getTableConfig(), SCHEMA); + assertTrue(responseTrailing.getNumDocsScanned() > 0, "Trailing wildcard should scan some documents"); + + // This should fail: leading wildcard is NOT allowed with parser=STANDARD + // The optimizer should NOT merge these expressions because they have options + String queryLeading = + "SELECT INT_COL, SKILLS_TEXT_COL FROM " + TABLE_NAME + " WHERE " + "TEXT_MATCH(" + SKILLS_TEXT_COL_NAME + + ", '*CUDA*', 'parser=CLASSIC,allowLeadingWildcard=true') AND " + "TEXT_MATCH(" + SKILLS_TEXT_COL_NAME + + ", '*Python*', 'parser=STANDARD') LIMIT 50000"; + + BrokerResponseNative responseLeading = getBrokerResponseForOptimizedQuery(queryLeading, getTableConfig(), SCHEMA); + List<QueryProcessingException> exceptions = responseLeading.getExceptions(); + assertFalse(exceptions.isEmpty(), "Expected error for leading wildcard with parser=STANDARD"); + String errorMsg = exceptions.toString(); + assertTrue(errorMsg.contains("Leading wildcard is not allowed") || errorMsg.contains( + "Failed while searching the text index"), + "Expected error related to leading wildcard or text search failure, got: " + errorMsg); + } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org