Jackie-Jiang commented on code in PR #16147: URL: https://github.com/apache/pinot/pull/16147#discussion_r2162623794
########## pinot-common/src/main/java/org/apache/pinot/common/request/context/predicate/TextMatchPredicate.java: ########## @@ -27,10 +27,18 @@ */ public class TextMatchPredicate extends BasePredicate { private final String _value; + private final String _options; public TextMatchPredicate(ExpressionContext lhs, String value) { super(lhs); _value = value; + _options = null; + } + + public TextMatchPredicate(ExpressionContext lhs, String value, String options) { + super(lhs); + _value = value; + _options = options; } Review Comment: ```suggestion public TextMatchPredicate(ExpressionContext lhs, String value) { this(lhs, value, null); } public TextMatchPredicate(ExpressionContext lhs, String value, @Nullable String options) { super(lhs); _value = value; _options = options; } ``` ########## pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/reader/TextIndexReader.java: ########## @@ -31,9 +31,23 @@ public interface TextIndexReader extends IndexReader { /** * Returns the matching document ids for the given search query. + * This is the legacy method for backward compatibility with native/FST text index readers. */ MutableRoaringBitmap getDocIds(String searchQuery); + /** + * Returns the matching document ids for the given search query with options string. + * This method allows passing options as a string parameter that will be parsed internally. + * Lucene-based text index readers should implement this method. + * @param searchQuery The search query string + * @param optionsString Options string in format "key1=value1,key2=value2", can be null + * @return Matching document ids + */ + default MutableRoaringBitmap getDocIds(String searchQuery, String optionsString) { Review Comment: ```suggestion default MutableRoaringBitmap getDocIds(String searchQuery, @Nullable String optionsString) { ``` ########## pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/text/LuceneTextIndexReader.java: ########## @@ -159,8 +159,25 @@ public ImmutableRoaringBitmap getDictIds(String searchQuery) { throw new UnsupportedOperationException(""); } + @Deprecated @Override public MutableRoaringBitmap getDocIds(String searchQuery) { + return getDocIds(searchQuery, null); + } + + @Override + public MutableRoaringBitmap getDocIds(String searchQuery, String optionsString) { Review Comment: Annotate `optionsString` as `@Nullable` ########## pinot-core/src/main/java/org/apache/pinot/core/operator/transform/function/TextMatchTransformFunction.java: ########## @@ -75,6 +76,19 @@ public void init(List<TransformFunction> arguments, Map<String, ColumnContext> c } _predicate = ((LiteralTransformFunction) predicate).getStringLiteral(); + + // Handle optional third parameter for options + if (arguments.size() > 2) { + TransformFunction options = arguments.get(2); + if (!(options instanceof LiteralTransformFunction && options.getResultMetadata().isSingleValue())) { Review Comment: We can also verify the data type to be string ########## pinot-common/src/main/java/org/apache/pinot/common/request/context/predicate/TextMatchPredicate.java: ########## @@ -42,6 +50,10 @@ public String getValue() { return _value; } + public String getOptions() { Review Comment: Annotate return as `@Nullable` ########## pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/LuceneTextIndexUtilsTest.java: ########## @@ -97,4 +97,49 @@ public void testQueryIsNotRewritten() { RegexpQuery regexpQuery = new RegexpQuery(new Term("field", "\\d+")); Assert.assertEquals(regexpQuery, LuceneTextIndexUtils.convertToMultiTermSpanQuery(regexpQuery)); } + + @Test + public void testParseOptionsString() { + // Test null input + Map<String, String> result = LuceneTextIndexUtils.parseOptionsString(null); + Assert.assertTrue(result.isEmpty()); + + // Test empty string + result = LuceneTextIndexUtils.parseOptionsString(""); + Assert.assertTrue(result.isEmpty()); + + // Test whitespace-only string + result = LuceneTextIndexUtils.parseOptionsString(" "); + Assert.assertTrue(result.isEmpty()); + + // Test single option + result = LuceneTextIndexUtils.parseOptionsString("parser=CLASSIC"); + Assert.assertEquals(1, result.size()); + Assert.assertEquals("CLASSIC", result.get("parser")); + + // Test multiple options + result = LuceneTextIndexUtils.parseOptionsString("parser=CLASSIC,allowLeadingWildcard=true,DefaultOperator=AND"); Review Comment: Where is `DefaultOperator` being handled? Can we make it also camel case with first letter in lower case for consistency? ########## pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/LuceneTextIndexUtils.java: ########## @@ -72,4 +78,148 @@ public static Query convertToMultiTermSpanQuery(Query query) { LOGGER.debug("The phrase query {} is re-written as {}", query, spanNearQuery); return spanNearQuery; } + + /** + * Creates a configured Lucene query with the specified options. + * This method can be used by both single-column and multi-column text index readers. + * + * @param actualQuery The cleaned query string without __OPTIONS + * @param options The parsed options map + * @param column The column name for the search + * @param analyzer The Lucene analyzer to use + * @return The parsed Lucene Query object + * @throws RuntimeException if parser creation or query parsing fails + */ + public static Query createQueryParserWithOptions(String actualQuery, Map<String, String> options, String column, + org.apache.lucene.analysis.Analyzer analyzer) { + // Get parser type from options + String parserType = options.getOrDefault("parser", "CLASSIC"); + String parserClassName; + + switch (parserType.toUpperCase()) { + case "STANDARD": + parserClassName = "org.apache.lucene.queryparser.flexible.standard.StandardQueryParser"; + break; + case "COMPLEX": + parserClassName = "org.apache.lucene.queryparser.complexPhrase.ComplexPhraseQueryParser"; + break; + default: + parserClassName = "org.apache.lucene.queryparser.classic.QueryParser"; + break; + } + + // Create parser instance and apply options + try { + Class<?> parserClass = Class.forName(parserClassName); + Object parser; + + if (parserClassName.equals("org.apache.lucene.queryparser.flexible.standard.StandardQueryParser")) { + java.lang.reflect.Constructor<?> constructor = parserClass.getConstructor(); + parser = constructor.newInstance(); + try { + java.lang.reflect.Method setAnalyzerMethod = + parserClass.getMethod("setAnalyzer", org.apache.lucene.analysis.Analyzer.class); + setAnalyzerMethod.invoke(parser, analyzer); + } catch (Exception e) { + LOGGER.warn("Failed to set analyzer on StandardQueryParser: {}", e.getMessage()); + } + } else { + // CLASSIC and COMPLEX parsers use the standard (String, Analyzer) constructor + java.lang.reflect.Constructor<?> constructor = + parserClass.getConstructor(String.class, org.apache.lucene.analysis.Analyzer.class); + parser = constructor.newInstance(column, analyzer); + } + + // Dynamically apply options using reflection + Class<?> clazz = parser.getClass(); + for (Map.Entry<String, String> entry : options.entrySet()) { + String key = entry.getKey(); + if (key.equals("parser")) { + continue; // Skip parser option as it's only used for initialization + } + String value = entry.getValue(); + String setterName = "set" + Character.toUpperCase(key.charAt(0)) + key.substring(1); + + boolean found = false; + for (java.lang.reflect.Method method : clazz.getMethods()) { + if (method.getName().equalsIgnoreCase(setterName) && method.getParameterCount() == 1) { + try { + Class<?> paramType = method.getParameterTypes()[0]; + Object paramValue; + + if (paramType == boolean.class || paramType == Boolean.class) { + paramValue = Boolean.valueOf(value); + } else if (paramType == int.class || paramType == Integer.class) { + paramValue = Integer.valueOf(value); + } else if (paramType == float.class || paramType == Float.class) { + paramValue = Float.valueOf(value); + } else if (paramType == double.class || paramType == Double.class) { + paramValue = Double.valueOf(value); + } else if (paramType.isEnum()) { + paramValue = java.lang.Enum.valueOf((Class<java.lang.Enum>) paramType, value); + } else { + paramValue = value; + } + + method.invoke(parser, paramValue); + found = true; + break; + } catch (Exception e) { + LOGGER.warn("Failed to apply option {}={}: {}", key, value, e.getMessage()); + } + } + } + + if (!found) { + LOGGER.warn("Failed to apply option: {}={} (setter not found)", key, value); + } + } + + // Parse the query using the configured parser + Query query; + if (parser.getClass().getName().equals("org.apache.lucene.queryparser.flexible.standard.StandardQueryParser")) { + // StandardQueryParser uses parse(String, String) where second parameter is default field + java.lang.reflect.Method parseMethod = parser.getClass().getMethod("parse", String.class, String.class); + query = (Query) parseMethod.invoke(parser, actualQuery, column); + } else { + // Other parsers use parse(String) + java.lang.reflect.Method parseMethod = parser.getClass().getMethod("parse", String.class); + query = (Query) parseMethod.invoke(parser, actualQuery); + } + + return query; + } catch (Exception e) { + String msg = "Failed to create or parse query: " + e.getMessage(); + throw new RuntimeException(msg, e); + } + } + + /** + * Parses options from a separate options string parameter. + * The options should be in the format: "key1=value1,key2=value2,key3=value3" + * + * @param optionsString The options string in key=value format + * @return A Map containing the parsed options, or empty map if optionsString is null/empty + */ + public static Map<String, String> parseOptionsString(String optionsString) { Review Comment: We can introduce a class to wrap the parsed options. Similar to `DistinctCountOffHeapAggregationFunction.Parameters` ########## pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/LuceneTextIndexUtils.java: ########## @@ -72,4 +78,148 @@ public static Query convertToMultiTermSpanQuery(Query query) { LOGGER.debug("The phrase query {} is re-written as {}", query, spanNearQuery); return spanNearQuery; } + + /** + * Creates a configured Lucene query with the specified options. + * This method can be used by both single-column and multi-column text index readers. + * + * @param actualQuery The cleaned query string without __OPTIONS + * @param options The parsed options map + * @param column The column name for the search + * @param analyzer The Lucene analyzer to use + * @return The parsed Lucene Query object + * @throws RuntimeException if parser creation or query parsing fails + */ + public static Query createQueryParserWithOptions(String actualQuery, Map<String, String> options, String column, + org.apache.lucene.analysis.Analyzer analyzer) { + // Get parser type from options + String parserType = options.getOrDefault("parser", "CLASSIC"); Review Comment: Make keys and values constant -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org