> Suppose I have a product with a title='kMix Espresso maker'. If I tokenize
> this and put the result in product_tokens I should get
> '[kMix][Espresso][maker]'.
> 
> If now I try to search with facet.field='product_tokens' and
> facet.prefix='espresso' I should get only 'espresso' while I want 'kMix
> Espresso maker'.

Yes, you are probably right. I did use this approach at somepoint. Your remark 
has made me check my code again.
I was using n_gram in the end.

(facet.prefix on tokenized fields might work in certain circumstances where you 
can get the actual value from the string field (or its facet) in parallel.)

This is the jquery autocomplete plugin instantiation:

        $(function() {
                $("#qterm").autocomplete({
                        minLength: 1,
                        source: function(request,response) {
                                jQuery.ajax({
                                        url: "/solr/select",
                                        dataType: "json",
                                        data: {
                                        q : "title_ngrams:\"" + request.term + 
"\"",
                                        rows: 0,
                                        facet: "true",
                                        "facet.field": "title",
                                        "facet.mincount": 1,
                                        "facet.sort": "index",
                                        "facet.limit": 10,
                                                "fq": "end_date:[NOW TO *]"
                                        wt: "json"
                                        },
                                        success: function( data ) {
                                                /*var result = jQuery.map( 
data.facet_counts.facet_fields.title, function( item, index ) {
                                                        if (index%2) return 
null;
                                                        else return {
                                                                //label: item,
                                                                value: item
                                                        }
                                                });*/
                                                var result = [];
                                                var facets = 
data.facet_counts.facet_fields.title;
                                                var j = 0;
                                        for (i=0; i<facets.length; i=i+2) {
                                                result[j] = facets[i];
                                                j = j+1;
                                        }
                                                response(result);
                                        }
                                });
                        }
                });

And here the fieldtype ngram for "title_ngram". "title" is a string type field.

                <!-- NGram configuration for searching for wordparts without 
the use of wildcards.
                        This is for suggesting search terms e.g. sourcing an 
autocomplete widget. -->
                <fieldType name="ngram" class="solr.TextField">
                        <analyzer type="index">
                                <tokenizer class="solr.KeywordTokenizerFactory" 
/>
                                <filter class="solr.LengthFilterFactory" 
min="1" max="500" />
                                <filter class="solr.TrimFilterFactory" />
                                <filter 
class="solr.ISOLatin1AccentFilterFactory" />
                                <filter class="solr.WordDelimiterFilterFactory" 
splitOnCaseChange="1"
                                 splitOnNumerics="1" stemEnglishPossessive="1" 
generateWordParts="1"
                                 generateNumberParts="1" catenateAll="1" 
preserveOriginal="1" />
                                <filter class="solr.LowerCaseFilterFactory" />
                                <filter class="solr.EdgeNGramFilterFactory" 
minGramSize="2" maxGramSize="15" side="front"/>
                                <filter 
class="solr.RemoveDuplicatesTokenFilterFactory" />
                        </analyzer>
                        <analyzer type="query">
                                <tokenizer class="solr.KeywordTokenizerFactory" 
/>
                                <filter class="solr.TrimFilterFactory" />
                                <filter 
class="solr.ISOLatin1AccentFilterFactory" />
                                <filter class="solr.WordDelimiterFilterFactory" 
splitOnCaseChange="1"
                                 splitOnNumerics="1" stemEnglishPossessive="1" 
generateWordParts="1"
                                 generateNumberParts="1" catenateAll="0" 
preserveOriginal="1" />
                                <filter class="solr.LowerCaseFilterFactory" />
                        </analyzer>
                </fieldType>

Hope this one gets you going…
Chantal

Reply via email to