Hi, earlier this week i started messing with getting wildcard queries to be analysed....
i've got some weird analysers doing stemming/lowercasing and writing in the same rules into a custom queryparser didn't seem logical given i just want the analysers to apply as they do at index time.... i came up with the hack below, which is just a modified version of the LuceneQParserPlugin ie. the solr default one which creates a SolrQueryParser query parser. in the SolrQueryParser I overwrite the "getWildcardQuery" function so that I insert a call to my method - "myWildcardQuery". myWildcardQuery method converts the wildcard term into an analysed version which it returns (and at least lowercases the if analysis fails for some reason). the myWildcardQuery method is just pulling in code from lucene's QueryParser.getFieldQuery -- so all this code is a magical giant cut and paste job right now (which you'll see when you look at the lucene/solr classes involved!) you use this custom queryparser in the usual way i.e. by registering the queryparser in the solrconfig.xml file: <queryParser name="ilexirQparser" class="com.ilexir.solr.search.ilexirQParserPlugin"/> then call that queryparser in your request handler: <requestHandler name="ilexir" class="solr.SearchHandler" default="true"> <!-- default values for query parameters --> <lst name="defaults"> <str name="defType">ilexirQparser</str> <str name="echoParams">explicit</str> <int name="rows">10</int> <int name="start">0</int> <str name="fl">*,score</str> <str name="version">2.2</str> <str name="wt">standard</str> <str name="indent">on</str> </lst> <arr name="last-components"> <str>spellcheck</str> <str>tvComponent</str> </arr> </requestHandler> i enable the leading wildcard queries using the reversedwildcard filter as per previous email i.e. in index-time analyser add in: <filter class="solr.ReversedWildcardFilterFactory" /> (not at query time) -- then the lucene query parser picks up the use of this filter and allows leading wildcard queries. of course, non of this is going to sort out trying to match against the query "co?mput?r" because you've probably stemmed "computer" to "comput" or something at index time -- but if you add in a copyfield to an extra field that isn't stemmed at query time, then query both the original + the non-stemmed field (boost accordingly -- i.e. you might want to boost the original non-stemmed field higher!) you'll get the right match then :) i'd be interested to hear from lucene/solr contributors why wildcards aren't analysed in general anyway? anyway hope that helps :) bec ---------------------- import java.io.IOException; import java.io.StringReader; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.CachingTokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.reverse.ReverseStringFilter; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.analysis.tokenattributes.TermAttribute; import org.apache.lucene.queryParser.ParseException; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.Query; import org.apache.lucene.search.WildcardQuery; import org.apache.solr.analysis.ReversedWildcardFilterFactory; import org.apache.solr.common.params.CommonParams; import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.util.NamedList; import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.search.LuceneQParserPlugin; import org.apache.solr.search.QParser; import org.apache.solr.search.QueryParsing; import org.apache.solr.search.SolrQueryParser; /** * modifies the code from LuceneQParserPlugin i.e. the default query parser * plugin used by solr. * @author bec */ public class ilexirQParserPlugin extends LuceneQParserPlugin { public static String NAME = "lucene"; public void init(NamedList args) { } public QParser createParser(String qstr, SolrParams localParams, SolrParams params, SolrQueryRequest req) { return new ilexirQParser(qstr, localParams, params, req); } } class ilexirQParser extends QParser { String sortStr; SolrQueryParser lparser; public ilexirQParser(String qstr, SolrParams localParams, SolrParams params, SolrQueryRequest req) { super(qstr, localParams, params, req); } public Query parse() throws ParseException { String qstr = getString(); String defaultField = getParam(CommonParams.DF); if (defaultField == null) { defaultField = getReq().getSchema().getDefaultSearchFieldName(); } lparser = new SolrQueryParser(this, defaultField) { /** * adapted from lucene's QueryParser.getFieldQuery !! * * @param field * @param termStr */ private String myWildcardQuery(String field, String termStr) { System.out .println("ILEXIR: ORIGINAL WILDCARD QUERY:" + termStr); // get the corresponding analyser - this one is // from the schema file -- the query one!! // i.e. YAY!! Analyzer analyzer = this.getAnalyzer(); TokenStream source; try { source = analyzer.reusableTokenStream(field, new StringReader(termStr)); source.reset(); } catch (IOException e) { source = analyzer.tokenStream(field, new StringReader( termStr)); } CachingTokenFilter buffer = new CachingTokenFilter(source); TermAttribute termAtt = null; PositionIncrementAttribute posIncrAtt = null; boolean success = false; try { buffer.reset(); success = true; } catch (IOException e) { // success==false if we hit an exception } if (success) { if (buffer.hasAttribute(TermAttribute.class)) { termAtt = (TermAttribute) buffer .getAttribute(TermAttribute.class); } if (buffer.hasAttribute(PositionIncrementAttribute.class)) { posIncrAtt = (PositionIncrementAttribute) buffer .getAttribute(PositionIncrementAttribute.class); } } boolean hasMoreTokens = false; if (termAtt != null) { try { hasMoreTokens = buffer.incrementToken(); // should be a single analysed term!: System.out .println("ILEXIR: RETURNING ANALYSED WILDCARD QUERY TERM:" + termAtt.term()); return termAtt.term(); /* * while (hasMoreTokens) { System.out.println("TERM:" + * termAtt.term()); hasMoreTokens = * buffer.incrementToken(); } */ } catch (IOException e) { System.out .println("ILEXIR: ilexirQParserPlugin.myWildcardQuery error:" + e.getMessage()); e.printStackTrace(); } } termStr = termStr.toLowerCase(); // return original wildcard term if errors occurred! System.out .println("ILEXIR: RETURNING LOWERCASED WILDCARD QUERY TERM:" + termStr); return termStr; } @Override protected Query getWildcardQuery(String field, String termStr) throws ParseException { // *:* -> MatchAllDocsQuery if ("*".equals(field) && "*".equals(termStr)) { return newMatchAllDocsQuery(); } // switch wildcard term to the analysed version!! termStr = this.myWildcardQuery(field, termStr); // can we use reversed wildcards in this field? String type = schema.getFieldType(field).getTypeName(); ReversedWildcardFilterFactory factory = leadingWildcards .get(type); if (factory != null && factory.shouldReverse(termStr)) { termStr = ReverseStringFilter.reverse(termStr + factory.getMarkerChar()); } Query q = super.getWildcardQuery(field, termStr); if (q instanceof WildcardQuery) { // use a constant score query to avoid overflowing clauses WildcardQuery wildcardQuery = new WildcardQuery( ((WildcardQuery) q).getTerm()); return wildcardQuery; } return q; } }; // these could either be checked & set here, or in the SolrQueryParser // constructor String opParam = getParam(QueryParsing.OP); if (opParam != null) { lparser .setDefaultOperator("AND".equals(opParam) ? QueryParser.Operator.AND : QueryParser.Operator.OR); } else { // try to get default operator from schema QueryParser.Operator operator = getReq().getSchema() .getSolrQueryParser(null).getDefaultOperator(); lparser .setDefaultOperator(null == operator ? QueryParser.Operator.OR : operator); } return lparser.parse(qstr); } public String[] getDefaultHighlightFields() { return new String[] { lparser.getField() }; } }