This is an automated email from the ASF dual-hosted git repository.
jackie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push:
new 1d534b41e3 Adding Match prefix phrase query lucene parser (#16476)
1d534b41e3 is described below
commit 1d534b41e3a4b26d0b4e39f154163074ef72822d
Author: RAGHVENDRA KUMAR YADAV <[email protected]>
AuthorDate: Thu Jul 31 23:17:56 2025 -0700
Adding Match prefix phrase query lucene parser (#16476)
---
.../pinot/queries/TextSearchQueriesTest.java | 54 +++-
.../lucene/parsers/PrefixPhraseQueryParser.java | 298 +++++++++++++++++++++
.../segment/local/utils/LuceneTextIndexUtils.java | 23 +-
.../local/utils/LuceneTextIndexUtilsTest.java | 133 +++++++++
4 files changed, 506 insertions(+), 2 deletions(-)
diff --git
a/pinot-core/src/test/java/org/apache/pinot/queries/TextSearchQueriesTest.java
b/pinot-core/src/test/java/org/apache/pinot/queries/TextSearchQueriesTest.java
index 0852bba44c..b88276963a 100644
---
a/pinot-core/src/test/java/org/apache/pinot/queries/TextSearchQueriesTest.java
+++
b/pinot-core/src/test/java/org/apache/pinot/queries/TextSearchQueriesTest.java
@@ -2028,7 +2028,11 @@ public class TextSearchQueriesTest extends
BaseQueriesTest {
});
String query = "SELECT INT_COL, SKILLS_TEXT_COL FROM " + TABLE_NAME + "
WHERE TEXT_MATCH(" + SKILLS_TEXT_COL_NAME
- + ", '*ealtime streaming system*',
'parser=CLASSIC,allowLeadingWildcard=true,defaultOperator=AND') LIMIT 50000";
+ + ", 'realtime streaming system', 'parser=MATCHPHRASE') LIMIT 50000";
+ testTextSearchSelectQueryHelper(query, 0, false, expected);
+
+ query = "SELECT INT_COL, SKILLS_TEXT_COL FROM " + TABLE_NAME + " WHERE
TEXT_MATCH(" + SKILLS_TEXT_COL_NAME
+ + ", 'realtime streaming system',
'parser=MATCHPHRASE,enablePrefixMatch=true') LIMIT 50000";
testTextSearchSelectQueryHelper(query, expected.size(), false, expected);
List<Object[]> expected1 = new ArrayList<>();
@@ -2082,6 +2086,54 @@ public class TextSearchQueriesTest extends
BaseQueriesTest {
testTextSearchSelectQueryHelper(query8, expected.size(), false, expected);
}
+ @Test
+ public void testMatchPhraseQueryParser()
+ throws Exception {
+ // Test case 1: "Tensor flow" - should match 3 documents
+ List<Object[]> expectedTensorFlow = new ArrayList<>();
+ expectedTensorFlow.add(new Object[]{
+ 1004, "Machine learning, Tensor flow, Java, Stanford university,"
+ });
+ expectedTensorFlow.add(new Object[]{
+ 1007, "C++, Python, Tensor flow, database kernel, storage, indexing
and transaction processing, building "
+ + "large scale systems, Machine learning"
+ });
+ expectedTensorFlow.add(new Object[]{
+ 1016, "CUDA, GPU processing, Tensor flow, Pandas, Python, Jupyter
notebook, spark, Machine learning, building"
+ + " high performance scalable systems"
+ });
+
+ // Test exact phrase "Tensor flow" with default settings (slop=0,
inOrder=true)
+ String queryExactPhrase =
+ "SELECT INT_COL, SKILLS_TEXT_COL FROM " + TABLE_NAME + " WHERE
TEXT_MATCH(" + SKILLS_TEXT_COL_NAME
+ + ", 'Tensor flow', 'parser=MATCHPHRASE,enablePrefixMatch=true')
LIMIT 50000";
+ testTextSearchSelectQueryHelper(queryExactPhrase, 3, false,
expectedTensorFlow);
+
+ // Test "Tensor database" with slop=1 (should allow one position gap)
+ List<Object[]> expectedTensorDatabase = new ArrayList<>();
+ expectedTensorDatabase.add(new Object[]{
+ 1007, "C++, Python, Tensor flow, database kernel, storage, indexing
and transaction processing, building "
+ + "large scale systems, Machine learning"
+ });
+
+ String querySlop1 =
+ "SELECT INT_COL, SKILLS_TEXT_COL FROM " + TABLE_NAME + " WHERE
TEXT_MATCH(" + SKILLS_TEXT_COL_NAME
+ + ", 'Tensor database',
'parser=MATCHPHRASE,enablePrefixMatch=true,slop=1') LIMIT 50000";
+ testTextSearchSelectQueryHelper(querySlop1, 1, false,
expectedTensorDatabase);
+
+ // Test "Tensor flow" with inOrder=false (should allow any order)
+ String queryInOrderFalse =
+ "SELECT INT_COL, SKILLS_TEXT_COL FROM " + TABLE_NAME + " WHERE
TEXT_MATCH(" + SKILLS_TEXT_COL_NAME
+ + ", 'Tensor flow',
'parser=MATCHPHRASE,enablePrefixMatch=true,inOrder=false') LIMIT 50000";
+ testTextSearchSelectQueryHelper(queryInOrderFalse, 3, false,
expectedTensorFlow);
+
+ // Test "Tensor flow" with both slop=1 and inOrder=false
+ String querySlopAndInOrder =
+ "SELECT INT_COL, SKILLS_TEXT_COL FROM " + TABLE_NAME + " WHERE
TEXT_MATCH(" + SKILLS_TEXT_COL_NAME
+ + ", 'flow Tensor',
'parser=MATCHPHRASE,enablePrefixMatch=true,inOrder=false') LIMIT 50000";
+ testTextSearchSelectQueryHelper(querySlopAndInOrder, 3, false,
expectedTensorFlow);
+ }
+
// ===== TEST CASES FOR AND/OR FILTER OPERATORS =====
@Test
public void testTextSearchWithOptionsAndOrOperators()
diff --git
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/text/lucene/parsers/PrefixPhraseQueryParser.java
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/text/lucene/parsers/PrefixPhraseQueryParser.java
new file mode 100644
index 0000000000..d6c5b74043
--- /dev/null
+++
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/text/lucene/parsers/PrefixPhraseQueryParser.java
@@ -0,0 +1,298 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.segment.index.text.lucene.parsers;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.queries.spans.SpanMultiTermQueryWrapper;
+import org.apache.lucene.queries.spans.SpanNearQuery;
+import org.apache.lucene.queries.spans.SpanQuery;
+import org.apache.lucene.queries.spans.SpanTermQuery;
+import org.apache.lucene.queryparser.charstream.CharStream;
+import org.apache.lucene.queryparser.classic.ParseException;
+import org.apache.lucene.queryparser.classic.QueryParserBase;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.WildcardQuery;
+
+
+/**
+ * A custom query parser that creates prefix phrase queries.
+ * This parser tokenizes the input query and creates a SpanNearQuery where
+ * all terms except the last one are exact matches, and the last term can
optionally
+ * have a wildcard suffix based on the enablePrefixMatch setting.
+ *
+ * <p>This parser is designed to support both exact phrase matching and prefix
phrase matching:</p>
+ * <ul>
+ * <li><strong>Exact phrase matching (default):</strong> All terms are
matched exactly as they appear</li>
+ * <li><strong>Prefix phrase matching:</strong> The last term is treated as
a prefix with wildcard</li>
+ * </ul>
+ *
+ * <p><strong>Example usage:</strong></p>
+ * <ul>
+ * <li>Input: 'java realtime streaming' with enablePrefixMatch=false
(default)
+ * <br>Output: SpanNearQuery with exact matches for "java", "realtime",
and "streaming"</li>
+ * <li>Input: 'java realtime streaming' with enablePrefixMatch=true
+ * <br>Output: SpanNearQuery with exact matches for "java" and
"realtime",
+ * and wildcard match for "streaming*"</li>
+ * <li>Input: 'stream' with enablePrefixMatch=false (default)
+ * <br>Output: SpanTermQuery for exact match "stream"</li>
+ * <li>Input: 'stream' with enablePrefixMatch=true
+ * <br>Output: SpanMultiTermQueryWrapper for wildcard match
"stream*"</li>
+ * </ul>
+ *
+ * <p><strong>Behavior:</strong></p>
+ * <ul>
+ * <li>Single term queries: Returns SpanTermQuery (exact) or
SpanMultiTermQueryWrapper (prefix)</li>
+ * <li>Multiple term queries: Returns SpanNearQuery with all terms in exact
order</li>
+ * <li>Null/empty queries: Throws ParseException</li>
+ * <li>Whitespace-only queries: Throws ParseException</li>
+ * </ul>
+ *
+ * <p>This parser extends Lucene's QueryParserBase and implements the required
abstract methods.
+ * It uses the provided Analyzer for tokenization and creates appropriate
Lucene Span queries.</p>
+ */
+public class PrefixPhraseQueryParser extends QueryParserBase {
+ /** The field name to search in */
+ private final String _field;
+
+ /** The analyzer used for tokenizing the query */
+ private final Analyzer _analyzer;
+
+ /** Flag to control whether prefix matching is enabled on the last term */
+ private boolean _enablePrefixMatch = false;
+
+ /** The slop (distance) allowed between terms in the phrase query. Default
is 0 (exact order) */
+ private int _slop = 0;
+
+ /** Whether terms must appear in the specified order. Default is true (exact
order) */
+ private boolean _inOrder = true;
+
+ /**
+ * Constructs a new PrefixPhraseQueryParser with the specified field and
analyzer.
+ *
+ * @param field the field name to search in (must not be null)
+ * @param analyzer the analyzer to use for tokenizing queries (must not be
null)
+ * @throws IllegalArgumentException if field or analyzer is null
+ */
+ public PrefixPhraseQueryParser(String field, Analyzer analyzer) {
+ super();
+ _field = field;
+ _analyzer = analyzer;
+ }
+
+ /**
+ * Sets whether to enable prefix matching on the last term.
+ *
+ * <p>When enabled ({@code true}):
+ * <ul>
+ * <li>Single term queries: Returns a SpanMultiTermQueryWrapper with
wildcard (*)</li>
+ * <li>Multiple term queries: The last term gets a wildcard suffix (*)</li>
+ * </ul>
+ *
+ * <p>When disabled ({@code false}, default):
+ * <ul>
+ * <li>Single term queries: Returns a SpanTermQuery for exact match</li>
+ * <li>Multiple term queries: All terms are matched exactly</li>
+ * </ul>
+ *
+ * @param enablePrefixMatch true to enable prefix matching, false to disable
(default)
+ */
+ public void setEnablePrefixMatch(boolean enablePrefixMatch) {
+ _enablePrefixMatch = enablePrefixMatch;
+ }
+
+ /**
+ * Sets the slop (distance) allowed between terms in the phrase query.
+ *
+ * <p>The slop determines how many positions apart the terms can be while
still matching.
+ * For example:</p>
+ * <ul>
+ * <li>slop=0: Terms must be adjacent in exact order</li>
+ * <li>slop=1: Terms can be 1 position apart</li>
+ * <li>slop=2: Terms can be 2 positions apart</li>
+ * </ul>
+ *
+ * <p>This setting only affects multiple term queries that create
SpanNearQuery.</p>
+ *
+ * @param slop the number of positions allowed between terms (default is 0)
+ * @throws IllegalArgumentException if slop is negative
+ */
+ public void setSlop(int slop) {
+ if (slop < 0) {
+ throw new IllegalArgumentException("Slop cannot be negative: " + slop);
+ }
+ _slop = slop;
+ }
+
+ /**
+ * Sets whether terms must appear in the specified order.
+ *
+ * <p>When enabled ({@code true}, default):
+ * <ul>
+ * <li>Terms must appear in the exact order specified in the query</li>
+ * <li>Example: "java realtime" matches "java realtime streaming" but not
"realtime java streaming"</li>
+ * </ul>
+ *
+ * <p>When disabled ({@code false}):
+ * <ul>
+ * <li>Terms can appear in any order within the slop distance</li>
+ * <li>Example: "java realtime" matches both "java realtime streaming" and
"realtime java streaming"</li>
+ * </ul>
+ *
+ * <p>This setting only affects multiple term queries that create
SpanNearQuery.</p>
+ *
+ * @param inOrder true to require terms in exact order, false to allow any
order
+ */
+ public void setInOrder(boolean inOrder) {
+ _inOrder = inOrder;
+ }
+
+ /**
+ * Parses the given query string and returns an appropriate Lucene Query.
+ *
+ * <p>This method performs the following steps:</p>
+ * <ol>
+ * <li>Validates the input query (null, empty, whitespace-only)</li>
+ * <li>Tokenizes the query using the configured analyzer</li>
+ * <li>Creates appropriate Lucene queries based on the number of tokens
and enablePrefixMatch setting</li>
+ * </ol>
+ *
+ * <p><strong>Query Types Returned:</strong></p>
+ * <ul>
+ * <li><strong>Single term:</strong>
+ * <ul>
+ * <li>If enablePrefixMatch=false: SpanTermQuery for exact match</li>
+ * <li>If enablePrefixMatch=true: SpanMultiTermQueryWrapper with
wildcard</li>
+ * </ul>
+ * </li>
+ * <li><strong>Multiple terms:</strong> SpanNearQuery with all terms in
exact order
+ * <ul>
+ * <li>All terms except the last: SpanTermQuery (exact match)</li>
+ * <li>Last term: SpanTermQuery (exact) or SpanMultiTermQueryWrapper
(wildcard)
+ * based on enablePrefixMatch</li>
+ * </ul>
+ * </li>
+ * </ul>
+ *
+ * @param query the query string to parse (must not be null or empty)
+ * @return a Lucene Query object representing the parsed query
+ * @throws ParseException if the query is null, empty, or contains no valid
tokens after tokenization
+ * @throws RuntimeException if tokenization fails due to an IOException
+ */
+ @Override
+ public Query parse(String query) throws ParseException {
+ if (query == null) {
+ throw new ParseException("Query cannot be null");
+ }
+
+ if (query.trim().isEmpty()) {
+ throw new ParseException("Query cannot be empty");
+ }
+
+ // Tokenize the query
+ List<String> tokens = new ArrayList<>();
+ try (TokenStream stream = _analyzer.tokenStream(_field, query)) {
+ stream.reset();
+ CharTermAttribute charTermAttribute =
stream.addAttribute(CharTermAttribute.class);
+
+ while (stream.incrementToken()) {
+ String token = charTermAttribute.toString();
+ if (!token.trim().isEmpty()) {
+ tokens.add(token);
+ }
+ }
+ stream.end();
+ } catch (IOException e) {
+ throw new RuntimeException("Failed to tokenize query: " + query, e);
+ }
+
+ // Check if we have any valid tokens after tokenization
+ if (tokens.isEmpty()) {
+ throw new ParseException("Query tokenization resulted in no valid
tokens");
+ }
+
+ // Handle single token case
+ if (tokens.size() == 1) {
+ String token = tokens.get(0);
+ if (_enablePrefixMatch) {
+ WildcardQuery wildcardQuery = new WildcardQuery(new Term(_field, token
+ "*"));
+ return new SpanMultiTermQueryWrapper<>(wildcardQuery);
+ } else {
+ return new SpanTermQuery(new Term(_field, token));
+ }
+ }
+
+ // Handle multiple tokens case
+ List<SpanQuery> spanQueries = new ArrayList<>();
+
+ // Add regular SpanTermQueries for all tokens except the last one
+ for (int i = 0; i < tokens.size() - 1; i++) {
+ spanQueries.add(new SpanTermQuery(new Term(_field, tokens.get(i))));
+ }
+
+ // Add query for the last token
+ String lastToken = tokens.get(tokens.size() - 1);
+ if (_enablePrefixMatch) {
+ WildcardQuery wildcardQuery = new WildcardQuery(new Term(_field,
lastToken + "*"));
+ spanQueries.add(new SpanMultiTermQueryWrapper<>(wildcardQuery));
+ } else {
+ spanQueries.add(new SpanTermQuery(new Term(_field, lastToken)));
+ }
+
+ // Create SpanNearQuery with configurable slop and inOrder settings
+ return new SpanNearQuery(spanQueries.toArray(new SpanQuery[0]), _slop,
_inOrder);
+ }
+
+ /**
+ * Reinitializes the parser with a new CharStream.
+ *
+ * <p>This method is required by QueryParserBase but is not used in this
implementation
+ * since we override the parse(String) method directly. The method is left
as a no-op.</p>
+ *
+ * @param input the CharStream to reinitialize with (ignored in this
implementation)
+ */
+ @Override
+ public void ReInit(CharStream input) {
+ // This method is required by QueryParserBase but not used in our
implementation
+ // since we override parse(String) directly
+ }
+
+ /**
+ * Creates a top-level query for the specified field.
+ *
+ * <p>This method is required by QueryParserBase but is not supported in
this implementation.
+ * Use the parse(String) method instead for query parsing.</p>
+ *
+ * @param field the field name (ignored in this implementation)
+ * @return never returns (always throws UnsupportedOperationException)
+ * @throws ParseException never thrown (method always throws
UnsupportedOperationException)
+ * @throws UnsupportedOperationException always thrown, indicating this
method is not supported
+ */
+ @Override
+ public Query TopLevelQuery(String field)
+ throws ParseException {
+ throw new UnsupportedOperationException(
+ "TopLevelQuery is not supported in PrefixPhraseQueryParser. Use
parse(String) method instead.");
+ }
+}
diff --git
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/LuceneTextIndexUtils.java
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/LuceneTextIndexUtils.java
index dfa4c9f6fb..d366789d9c 100644
---
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/LuceneTextIndexUtils.java
+++
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/LuceneTextIndexUtils.java
@@ -48,6 +48,7 @@ public class LuceneTextIndexUtils {
public static final String PARSER_CLASSIC = "CLASSIC";
public static final String PARSER_STANDARD = "STANDARD";
public static final String PARSER_COMPLEX = "COMPLEX";
+ public static final String PARSER_MATCHPHRASE = "MATCHPHRASE";
// Default operator constants
public static final String DEFAULT_OPERATOR_AND = "AND";
@@ -76,6 +77,9 @@ public class LuceneTextIndexUtils {
public static final String TIME_ZONE = "timeZone";
public static final String PHRASE_SLOP = "phraseSlop";
public static final String MAX_DETERMINIZED_STATES =
"maxDeterminizedStates";
+ public static final String SLOP = "slop";
+ public static final String IN_ORDER = "inOrder";
+ public static final String ENABLE_PREFIX_MATCH = "enablePrefixMatch";
}
// Parser class names
@@ -84,6 +88,8 @@ public class LuceneTextIndexUtils {
public static final String COMPLEX_PHRASE_QUERY_PARSER_CLASS =
"org.apache.lucene.queryparser.complexPhrase.ComplexPhraseQueryParser";
public static final String CLASSIC_QUERY_PARSER =
"org.apache.lucene.queryparser.classic.QueryParser";
+ public static final String MATCHPHRASE_QUERY_PARSER_CLASS =
+
"org.apache.pinot.segment.local.segment.index.text.lucene.parsers.PrefixPhraseQueryParser";
private LuceneTextIndexUtils() {
}
@@ -147,6 +153,9 @@ public class LuceneTextIndexUtils {
case PARSER_COMPLEX:
parserClassName = COMPLEX_PHRASE_QUERY_PARSER_CLASS;
break;
+ case PARSER_MATCHPHRASE:
+ parserClassName = MATCHPHRASE_QUERY_PARSER_CLASS;
+ break;
default:
parserClassName = CLASSIC_QUERY_PARSER;
break;
@@ -224,7 +233,7 @@ public class LuceneTextIndexUtils {
Method parseMethod = parser.getClass().getMethod("parse",
String.class, String.class);
query = (Query) parseMethod.invoke(parser, actualQuery, column);
} else {
- // Other parsers use parse(String)
+ // Other parsers (CLASSIC, COMPLEX, MATCHPHRASE) use parse(String)
Method parseMethod = parser.getClass().getMethod("parse",
String.class);
query = (Query) parseMethod.invoke(parser, actualQuery);
}
@@ -332,6 +341,18 @@ public class LuceneTextIndexUtils {
public int getMaxDeterminizedStates() {
return
Integer.parseInt(_options.getOrDefault(OptionKey.MAX_DETERMINIZED_STATES,
"10000"));
}
+
+ public int getSlop() {
+ return Integer.parseInt(_options.getOrDefault(OptionKey.SLOP, "0"));
+ }
+
+ public boolean isInOrder() {
+ return Boolean.parseBoolean(_options.getOrDefault(OptionKey.IN_ORDER,
"true"));
+ }
+
+ public boolean isEnablePrefixMatch() {
+ return
Boolean.parseBoolean(_options.getOrDefault(OptionKey.ENABLE_PREFIX_MATCH,
"false"));
+ }
}
/**
diff --git
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/LuceneTextIndexUtilsTest.java
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/LuceneTextIndexUtilsTest.java
index 2308809584..b8a70caad6 100644
---
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/LuceneTextIndexUtilsTest.java
+++
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/LuceneTextIndexUtilsTest.java
@@ -18,7 +18,10 @@
*/
package org.apache.pinot.segment.local.utils;
+import java.lang.reflect.InvocationTargetException;
import java.util.Map;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.index.Term;
import org.apache.lucene.queries.spans.SpanMultiTermQueryWrapper;
import org.apache.lucene.queries.spans.SpanNearQuery;
@@ -27,9 +30,11 @@ import org.apache.lucene.queries.spans.SpanTermQuery;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.PrefixQuery;
+import org.apache.lucene.search.Query;
import org.apache.lucene.search.RegexpQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.WildcardQuery;
+import
org.apache.pinot.segment.local.segment.index.text.lucene.parsers.PrefixPhraseQueryParser;
import org.testng.Assert;
import org.testng.annotations.Test;
@@ -202,4 +207,132 @@ public class LuceneTextIndexUtilsTest {
Assert.assertEquals(options.getPhraseSlop(), 2);
Assert.assertEquals(options.getMaxDeterminizedStates(), 5000);
}
+
+ @Test
+ public void testMatchPhraseQueryParser()
+ throws Exception {
+ // Test the new MATCHPHRASE parser functionality
+ String optionsString = "parser=MATCHPHRASE,enablePrefixMatch=true";
+ LuceneTextIndexUtils.LuceneTextIndexOptions options =
+ new LuceneTextIndexUtils.LuceneTextIndexOptions(optionsString);
+
+ // Create a simple analyzer for testing
+ Analyzer analyzer = new WhitespaceAnalyzer();
+ String column = "testColumn";
+
+ // Test positive case: "java realtime streaming"
+ String query = "java realtime streaming";
+
+ Query result = LuceneTextIndexUtils.createQueryParserWithOptions(query,
options, column, analyzer);
+ Assert.assertNotNull(result);
+ Assert.assertTrue(result instanceof SpanNearQuery);
+
+ // Test positive case: "realtime stream*"
+ query = "realtime stream*";
+ result = LuceneTextIndexUtils.createQueryParserWithOptions(query, options,
column, analyzer);
+ Assert.assertNotNull(result);
+ Assert.assertTrue(result instanceof SpanNearQuery);
+
+ // Test positive case: "stream*" - single term should return
SpanMultiTermQueryWrapper
+ query = "stream*";
+ result = LuceneTextIndexUtils.createQueryParserWithOptions(query, options,
column, analyzer);
+ Assert.assertNotNull(result);
+ Assert.assertTrue(result instanceof SpanMultiTermQueryWrapper);
+
+ // Test edge case: empty string ""
+ query = "";
+ try {
+ LuceneTextIndexUtils.createQueryParserWithOptions(query, options,
column, analyzer);
+ Assert.fail("Expected exception for empty query");
+ } catch (RuntimeException e) {
+ // The method wraps ParseException in RuntimeException via reflection
+ Assert.assertTrue(e.getCause() instanceof InvocationTargetException);
+ }
+
+ // Test edge case: null query
+ try {
+ LuceneTextIndexUtils.createQueryParserWithOptions(null, options, column,
analyzer);
+ Assert.fail("Expected exception for null query");
+ } catch (RuntimeException e) {
+ // The method wraps ParseException in RuntimeException via reflection
+ Assert.assertTrue(e.getCause() instanceof InvocationTargetException);
+ }
+
+ // Test that TopLevelQuery throws UnsupportedOperationException
+ try {
+ PrefixPhraseQueryParser parser = new PrefixPhraseQueryParser(column,
analyzer);
+ parser.TopLevelQuery(column);
+ Assert.fail("Expected UnsupportedOperationException for TopLevelQuery");
+ } catch (UnsupportedOperationException e) {
+ Assert.assertTrue(e.getMessage().contains("TopLevelQuery is not
supported"));
+ }
+
+ // Test slop and inOrder settings
+ PrefixPhraseQueryParser slopParser = new PrefixPhraseQueryParser(column,
analyzer);
+
+ // Test default slop and inOrder (0 slop, true inOrder)
+ Query defaultSlopQuery = slopParser.parse("java realtime streaming");
+ Assert.assertTrue(defaultSlopQuery instanceof SpanNearQuery);
+
+ // Test custom slop and inOrder
+ slopParser.setSlop(2);
+ slopParser.setInOrder(false);
+ Query customSlopQuery = slopParser.parse("java realtime streaming");
+ Assert.assertTrue(customSlopQuery instanceof SpanNearQuery);
+
+ // Test invalid slop (should throw exception)
+ try {
+ slopParser.setSlop(-1);
+ Assert.fail("Expected IllegalArgumentException for negative slop");
+ } catch (IllegalArgumentException e) {
+ Assert.assertTrue(e.getMessage().contains("Slop cannot be negative"));
+ }
+
+ // Test slop and inOrder with createQueryParserWithOptions
+ LuceneTextIndexUtils.LuceneTextIndexOptions slopOptions =
+
LuceneTextIndexUtils.createOptions("parser=MATCHPHRASE,enablePrefixMatch=true");
+
+ // Test default slop and inOrder behavior
+ Query defaultSlopResult =
LuceneTextIndexUtils.createQueryParserWithOptions(
+ "java realtime streaming", slopOptions, column, analyzer);
+ Assert.assertTrue(defaultSlopResult instanceof SpanNearQuery);
+
+ // Test custom slop and inOrder settings
+ LuceneTextIndexUtils.LuceneTextIndexOptions customSlopOptions =
+
LuceneTextIndexUtils.createOptions("parser=MATCHPHRASE,enablePrefixMatch=true");
+
+ // Create a parser instance to test slop and inOrder settings
+ PrefixPhraseQueryParser customParser = new PrefixPhraseQueryParser(column,
analyzer);
+ customParser.setEnablePrefixMatch(true);
+ customParser.setSlop(2);
+ customParser.setInOrder(false);
+
+ // Test that custom settings work correctly
+ Query customSlopResult = customParser.parse("java realtime streaming");
+ Assert.assertTrue(customSlopResult instanceof SpanNearQuery);
+
+ // Test that the parser can be configured with different slop values
+ customParser.setSlop(1);
+ Query slop1Result = customParser.parse("java realtime streaming");
+ Assert.assertTrue(slop1Result instanceof SpanNearQuery);
+
+ // Test that the parser can be configured with different inOrder values
+ customParser.setInOrder(true);
+ Query inOrderTrueResult = customParser.parse("java realtime streaming");
+ Assert.assertTrue(inOrderTrueResult instanceof SpanNearQuery);
+
+ // Test default behavior using createOptions
+ LuceneTextIndexUtils.LuceneTextIndexOptions defaultOptions =
+ LuceneTextIndexUtils.createOptions("parser=MATCHPHRASE");
+
+ // Test single term with default behavior (prefix match disabled)
+ Query defaultSingleTermQuery =
+ LuceneTextIndexUtils.createQueryParserWithOptions("stream",
defaultOptions, column, analyzer);
+ Assert.assertTrue(defaultSingleTermQuery instanceof SpanTermQuery);
+
+ // Test multiple terms with default behavior (prefix match disabled)
+ Query defaultMultiTermQuery =
+ LuceneTextIndexUtils.createQueryParserWithOptions("java realtime
streaming", defaultOptions, column, analyzer);
+ Assert.assertTrue(defaultMultiTermQuery instanceof SpanNearQuery);
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]