raghavyadav01 commented on code in PR #16147: URL: https://github.com/apache/pinot/pull/16147#discussion_r2162721206
########## pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/LuceneTextIndexUtilsTest.java: ########## @@ -97,4 +97,49 @@ public void testQueryIsNotRewritten() { RegexpQuery regexpQuery = new RegexpQuery(new Term("field", "\\d+")); Assert.assertEquals(regexpQuery, LuceneTextIndexUtils.convertToMultiTermSpanQuery(regexpQuery)); } + + @Test + public void testParseOptionsString() { + // Test null input + Map<String, String> result = LuceneTextIndexUtils.parseOptionsString(null); + Assert.assertTrue(result.isEmpty()); + + // Test empty string + result = LuceneTextIndexUtils.parseOptionsString(""); + Assert.assertTrue(result.isEmpty()); + + // Test whitespace-only string + result = LuceneTextIndexUtils.parseOptionsString(" "); + Assert.assertTrue(result.isEmpty()); + + // Test single option + result = LuceneTextIndexUtils.parseOptionsString("parser=CLASSIC"); + Assert.assertEquals(1, result.size()); + Assert.assertEquals("CLASSIC", result.get("parser")); + + // Test multiple options + result = LuceneTextIndexUtils.parseOptionsString("parser=CLASSIC,allowLeadingWildcard=true,DefaultOperator=AND"); Review Comment: DefaultOperator, allowLeadingWildcard all these are methods for QueryParser and when they are provided in params we use reflection to call setter methods and set them. Made them consistent in the test. ########## pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/LuceneTextIndexUtils.java: ########## @@ -72,4 +78,148 @@ public static Query convertToMultiTermSpanQuery(Query query) { LOGGER.debug("The phrase query {} is re-written as {}", query, spanNearQuery); return spanNearQuery; } + + /** + * Creates a configured Lucene query with the specified options. + * This method can be used by both single-column and multi-column text index readers. + * + * @param actualQuery The cleaned query string without __OPTIONS + * @param options The parsed options map + * @param column The column name for the search + * @param analyzer The Lucene analyzer to use + * @return The parsed Lucene Query object + * @throws RuntimeException if parser creation or query parsing fails + */ + public static Query createQueryParserWithOptions(String actualQuery, Map<String, String> options, String column, + org.apache.lucene.analysis.Analyzer analyzer) { + // Get parser type from options + String parserType = options.getOrDefault("parser", "CLASSIC"); + String parserClassName; + + switch (parserType.toUpperCase()) { + case "STANDARD": + parserClassName = "org.apache.lucene.queryparser.flexible.standard.StandardQueryParser"; + break; + case "COMPLEX": + parserClassName = "org.apache.lucene.queryparser.complexPhrase.ComplexPhraseQueryParser"; + break; + default: + parserClassName = "org.apache.lucene.queryparser.classic.QueryParser"; + break; + } + + // Create parser instance and apply options + try { + Class<?> parserClass = Class.forName(parserClassName); + Object parser; + + if (parserClassName.equals("org.apache.lucene.queryparser.flexible.standard.StandardQueryParser")) { + java.lang.reflect.Constructor<?> constructor = parserClass.getConstructor(); + parser = constructor.newInstance(); + try { + java.lang.reflect.Method setAnalyzerMethod = + parserClass.getMethod("setAnalyzer", org.apache.lucene.analysis.Analyzer.class); + setAnalyzerMethod.invoke(parser, analyzer); + } catch (Exception e) { + LOGGER.warn("Failed to set analyzer on StandardQueryParser: {}", e.getMessage()); + } + } else { + // CLASSIC and COMPLEX parsers use the standard (String, Analyzer) constructor + java.lang.reflect.Constructor<?> constructor = + parserClass.getConstructor(String.class, org.apache.lucene.analysis.Analyzer.class); + parser = constructor.newInstance(column, analyzer); + } + + // Dynamically apply options using reflection + Class<?> clazz = parser.getClass(); + for (Map.Entry<String, String> entry : options.entrySet()) { + String key = entry.getKey(); + if (key.equals("parser")) { + continue; // Skip parser option as it's only used for initialization + } + String value = entry.getValue(); + String setterName = "set" + Character.toUpperCase(key.charAt(0)) + key.substring(1); + + boolean found = false; + for (java.lang.reflect.Method method : clazz.getMethods()) { + if (method.getName().equalsIgnoreCase(setterName) && method.getParameterCount() == 1) { + try { + Class<?> paramType = method.getParameterTypes()[0]; + Object paramValue; + + if (paramType == boolean.class || paramType == Boolean.class) { + paramValue = Boolean.valueOf(value); + } else if (paramType == int.class || paramType == Integer.class) { + paramValue = Integer.valueOf(value); + } else if (paramType == float.class || paramType == Float.class) { + paramValue = Float.valueOf(value); + } else if (paramType == double.class || paramType == Double.class) { + paramValue = Double.valueOf(value); + } else if (paramType.isEnum()) { + paramValue = java.lang.Enum.valueOf((Class<java.lang.Enum>) paramType, value); + } else { + paramValue = value; + } + + method.invoke(parser, paramValue); + found = true; + break; + } catch (Exception e) { + LOGGER.warn("Failed to apply option {}={}: {}", key, value, e.getMessage()); + } + } + } + + if (!found) { + LOGGER.warn("Failed to apply option: {}={} (setter not found)", key, value); + } + } + + // Parse the query using the configured parser + Query query; + if (parser.getClass().getName().equals("org.apache.lucene.queryparser.flexible.standard.StandardQueryParser")) { + // StandardQueryParser uses parse(String, String) where second parameter is default field + java.lang.reflect.Method parseMethod = parser.getClass().getMethod("parse", String.class, String.class); + query = (Query) parseMethod.invoke(parser, actualQuery, column); + } else { + // Other parsers use parse(String) + java.lang.reflect.Method parseMethod = parser.getClass().getMethod("parse", String.class); + query = (Query) parseMethod.invoke(parser, actualQuery); + } + + return query; + } catch (Exception e) { + String msg = "Failed to create or parse query: " + e.getMessage(); + throw new RuntimeException(msg, e); + } + } + + /** + * Parses options from a separate options string parameter. + * The options should be in the format: "key1=value1,key2=value2,key3=value3" + * + * @param optionsString The options string in key=value format + * @return A Map containing the parsed options, or empty map if optionsString is null/empty + */ + public static Map<String, String> parseOptionsString(String optionsString) { Review Comment: Fixed. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org