Stemming filter analyzers... anyone have any favorites for particular
search domains?  Just wondering what people are using.  I'm using Lucid
K Stemmer and having issues.   Seems like it misses a lot of common
stems.  We went to that because of excessively loose matches on the
solr.PorterStemFilterFactory


I understand K Stemmer is a dictionary based stemmer.  Seems to me like
it is missing a lot of common stem reductions.  Ie   Bags does not match
Bag in our searches.

Here is my analyzer stack:

                <fieldType name="text" class="solr.TextField"
positionIncrementGap="100">
                        <analyzer type="index">
                                <tokenizer
class="solr.WhitespaceTokenizerFactory"/>
                                <filter
class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt"
ignoreCase="true" expand="true"/>
                                <filter class="solr.StopFilterFactory"
ignoreCase="true" words="stopwords.txt"/>
          <filter class="solr.WordDelimiterFilterFactory"
                generateWordParts="1"
                generateNumberParts="1"
                catenateWords="1"
                catenateNumbers="1"
                catenateAll="1"
                preserveOriginal="1"
                />                              <filter
class="solr.LowerCaseFilterFactory"/>
                                <!-- The LucidKStemmer currently
requires a lowercase filter somewhere before it. -->
                                <filter
class="com.lucidimagination.solrworks.analysis.LucidKStemFilterFactory"
protected="protwords.txt"/>
                                <filter
class="solr.RemoveDuplicatesTokenFilterFactory"/>
                        </analyzer>
                        <analyzer type="query">
                                <tokenizer
class="solr.WhitespaceTokenizerFactory"/>
                                <filter
class="solr.SynonymFilterFactory" synonyms="query_synonyms.txt"
ignoreCase="true" expand="true"/>
                                <filter class="solr.StopFilterFactory"
ignoreCase="true" words="stopwords.txt"/>
          <filter class="solr.WordDelimiterFilterFactory"
                generateWordParts="1"
                generateNumberParts="1"
                catenateWords="1"
                catenateNumbers="1"
                catenateAll="1"
                preserveOriginal="1"
                />                              <filter
class="solr.LowerCaseFilterFactory"/>
                                <!-- The LucidKStemmer currently
requires a lowercase filter somewhere before it. -->
                                <filter
class="com.lucidimagination.solrworks.analysis.LucidKStemFilterFactory"
protected="protwords.txt"/>
                                <filter
class="solr.RemoveDuplicatesTokenFilterFactory"/>
                        </analyzer>
                </fieldType>

Reply via email to