I am trying create suggester handler using solr 4.8, everything work fine but
when I try to get suggestion using different language Arabic, or Japanese
for example I got result in mixed language, but I am trying to search only
using Japanese, I got Arabic with that too. the following is my Schema.xml

<?xml version="1.0" encoding="UTF-8" ?>
<schema name="people_schema" version="1.5">
        <fields>
                <field name="_version_" type="long" indexed="true"
stored="true" />
                <field name="id" type="string" indexed="true" stored="true"
                        required="true" />
                <field name="first_name" type="txt_general" indexed="true"
                        stored="true" multiValued="false" />
                <field name="last_name" type="txt_general" indexed="true"
                        stored="true" multiValued="false" />
                <field name="about" type="text_general_edge_ngram"
indexed="true"
                        stored="true" multiValued="false" />
                <field name="year_birth" type="tint" indexed="true"
stored="true"
                        multiValued="false" />
                <field name="month_birth" type="tint" indexed="true"
stored="true"
                        multiValued="false" />
                <field name="day_birth" type="tint" indexed="true"
stored="true"
                        multiValued="false" />
                <field name="country" type="string" indexed="true"
stored="true"
                        required="false" multiValued="false" />
                <field name="country_tree" type="placetree" indexed="true"
                        stored="false" multiValued="false" />
                <field name="state" type="string" indexed="true"
stored="true"
                        required="false" multiValued="false" />
                <field name="state_tree" type="placetree" indexed="true"
stored="false"
                        multiValued="false" />
                <field name="city" type="string" indexed="true"
stored="true"
                        required="false" multiValued="false" />
                <field name="city_tree" type="placetree" indexed="true"
stored="false"
                        multiValued="false" />
                <field name="job" type="string" indexed="true" stored="true"
                        required="false" multiValued="false" />
                <field name="job_tree" type="txt_general" indexed="true"
stored="true"
                        multiValued="false" />
                <field name="company" type="string" indexed="true"
stored="true"
                        required="false" multiValued="false" />
                <field name="company_tree" type="companytree" indexed="true"
                        stored="false" multiValued="false" />

                <field name="full_name" type="txt_general" indexed="true"
                        stored="true" multiValued="false" />
                <field name="full_name_suggest" type="text_suggest"
indexed="true"
                        stored="true" multiValued="false" />
                <field name="full_name_edge" type="text_suggest_edge"
indexed="true"
                        stored="true" multiValued="false" />
                <field name="full_name_ngram" type="text_suggest_ngram"
indexed="true"
                        stored="true" multiValued="false" />
                <field name="full_name_sort" type="alphaNumericSort"
indexed="true"
                        stored="true" multiValued="false" />
                       
               
            <field name="job_suggest" type="text_suggest" indexed="true"
                        stored="true" multiValued="false" />
                <field name="job_edge" type="text_suggest_edge"
indexed="true"
                        stored="true" multiValued="false" />
                <field name="job_ngram" type="text_suggest_ngram"
indexed="true"
                        stored="true" multiValued="false" />
                <field name="job_sort" type="alphaNumericSort"
indexed="true"
                        stored="true" multiValued="false" />
               
               
                <copyField source="full_name" dest="full_name_suggest" />
                <copyField source="full_name" dest="full_name_edge" />
                <copyField source="full_name" dest="full_name_ngram" />
                <copyField source="full_name" dest="full_name_sort" />
               
                <copyField source="job_tree" dest="job_suggest" />
                <copyField source="job_tree" dest="job_edge" />
                <copyField source="job_tree" dest="job_ngram" />
                <copyField source="job_tree" dest="job_sort" />
               
        </fields>
        <uniqueKey>id</uniqueKey>
        <types>
               
                <fieldType name="string" class="solr.StrField"
                        sortMissingLast="true" />
               
                <fieldType name="boolean" class="solr.BoolField"
                        sortMissingLast="true" />
                <fieldType name="int" class="solr.TrieIntField"
                        precisionStep="0" positionIncrementGap="0" />
                <fieldType name="float" class="solr.TrieFloatField"
                        precisionStep="0" positionIncrementGap="0" />
                <fieldType name="long" class="solr.TrieLongField"
                        precisionStep="0" positionIncrementGap="0" />
                <fieldType name="double" class="solr.TrieDoubleField"
                        precisionStep="0" positionIncrementGap="0" />
                <fieldType name="tint" class="solr.TrieIntField"
                        precisionStep="8" positionIncrementGap="0" />
                <fieldType name="tfloat" class="solr.TrieFloatField"
                        precisionStep="8" positionIncrementGap="0" />
                <fieldType name="tlong" class="solr.TrieLongField"
                        precisionStep="8" positionIncrementGap="0" />
                <fieldType name="tdouble" class="solr.TrieDoubleField"
                        precisionStep="8" positionIncrementGap="0" />
                <fieldType name="date" class="solr.TrieDateField"
                        precisionStep="0" positionIncrementGap="0" />
                <fieldType name="tdate" class="solr.TrieDateField"
                        precisionStep="6" positionIncrementGap="0" />
                <fieldtype name="binary" class="solr.BinaryField" />

                <fieldType name="text_general_edge_ngram"
class="solr.TextField"
                        positionIncrementGap="100">
                        <analyzer type="index">
                                <tokenizer
class="solr.LowerCaseTokenizerFactory" />
                                <filter class="solr.EdgeNGramFilterFactory"
minGramSize="2"
                                        maxGramSize="15" side="front" />
                        </analyzer>
                        <analyzer type="query">
                                <tokenizer
class="solr.LowerCaseTokenizerFactory" />
                        </analyzer>
                </fieldType>



                <fieldType name="txt_general" class="solr.TextField"
                        positionIncrementGap="100">
                        <analyzer type="index">
                                <tokenizer
class="solr.StandardTokenizerFactory" />
                                <filter class="solr.LowerCaseFilterFactory"
/>
                        </analyzer>
                        <analyzer type="query">
                                <tokenizer
class="solr.StandardTokenizerFactory" />
                                <filter class="solr.SynonymFilterFactory"
synonyms="synonyms.txt"
                                        ignoreCase="true" expand="true" />
                                <filter class="solr.LowerCaseFilterFactory"
/>
                        </analyzer>
                </fieldType>
                <fieldtype name="name_phonetic" stored="false"
indexed="true"
                        class="solr.TextField">
                        <analyzer>
                                <tokenizer
class="solr.StandardTokenizerFactory" />
                                <filter
class="solr.DoubleMetaphoneFilterFactory" inject="false" />
                        </analyzer>
                </fieldtype>
               
                <fieldType name="placetree" class="solr.TextField"
                        positionIncrementGap="100">
                        <analyzer>
                                <tokenizer
class="solr.StandardTokenizerFactory" />
                                <filter class="solr.LowerCaseFilterFactory"
/>
                        </analyzer>
                </fieldType>
               
               
               
               
               
                <fieldType name="jobtree" class="solr.TextField"
                        positionIncrementGap="100">
                        <analyzer>
                                <tokenizer
class="solr.StandardTokenizerFactory" />
                                <filter class="solr.LowerCaseFilterFactory"
/>
                        </analyzer>
                </fieldType>
                <fieldType name="companytree" class="solr.TextField"
                        positionIncrementGap="100">
                        <analyzer>
                                <tokenizer
class="solr.StandardTokenizerFactory" />
                                <filter class="solr.LowerCaseFilterFactory"
/>
                        </analyzer>
                </fieldType>


               
               
               
               
               
               
               
               

               
               
               
               
               
               
               
               

        <fieldType name="text_suggest_ngram" class="solr.TextField">
                <analyzer type="index">
                        <charFilter class="solr.MappingCharFilterFactory"
mapping="mapping-ISOLatin1Accent.txt" />
                        <tokenizer class="solr.StandardTokenizerFactory" />
                        <filter class="solr.WordDelimiterFilterFactory"
                                generateWordParts="1"
generateNumberParts="1" catenateWords="0"
                                catenateNumbers="0" catenateAll="0"
splitOnCaseChange="1" />
                        <filter class="solr.LowerCaseFilterFactory" />
                        <filter class="solr.EdgeNGramFilterFactory"
maxGramSize="20"
                                minGramSize="1" />
                        <filter class="solr.PatternReplaceFilterFactory"
pattern="([^\w\d\*æøåÆØÅ ])"
                                replacement="" replace="all" />
                </analyzer>
                <analyzer type="query">
                        <charFilter class="solr.MappingCharFilterFactory"
mapping="mapping-ISOLatin1Accent.txt" />
                        <tokenizer class="solr.StandardTokenizerFactory" />
                        <filter class="solr.WordDelimiterFilterFactory"
                                generateWordParts="0"
generateNumberParts="0" catenateWords="0"
                                catenateNumbers="0" catenateAll="0"
splitOnCaseChange="0" />
                        <filter class="solr.LowerCaseFilterFactory" />
                        <filter class="solr.PatternReplaceFilterFactory"
pattern="([^\w\d\*æøåÆØÅ ])"
                                replacement="" replace="all" />
                        <filter class="solr.PatternReplaceFilterFactory"
pattern="^(.{20})(.*)?"
                                replacement="$1" replace="all" />
                </analyzer>
        </fieldType>

        <fieldType name="alphaNumericSort" class="solr.TextField"
                sortMissingLast="true" omitNorms="true">
                <analyzer>
                        <tokenizer class="solr.KeywordTokenizerFactory" />
                        <filter class="solr.LowerCaseFilterFactory" />
                        <filter class="solr.TrimFilterFactory" />
                        <filter class="solr.PatternReplaceFilterFactory"
pattern="^(a |the |les |la |le |l'|de la |du |des )"
                                replacement="" replace="all" />
                        <filter class="solr.PatternReplaceFilterFactory"
pattern="([^a-z0-9])"
                                replacement="" replace="all" />
                </analyzer>
        </fieldType>

        <fieldType name="text_suggest_edge" class="solr.TextField">
                <analyzer type="index">
                        <charFilter class="solr.MappingCharFilterFactory"
mapping="mapping-ISOLatin1Accent.txt" />
                        <tokenizer class="solr.KeywordTokenizerFactory" />
                        <filter class="solr.LowerCaseFilterFactory" />
                        <filter class="solr.PatternReplaceFilterFactory"
pattern="([\.,;:-_])"
                                replacement=" " replace="all" />
                        <filter class="solr.EdgeNGramFilterFactory"
maxGramSize="30"
                                minGramSize="1" />
                        <filter class="solr.PatternReplaceFilterFactory"
pattern="([^\w\d\*æøåÆØÅ ])"
                                replacement="" replace="all" />
                </analyzer>
                <analyzer type="query">
                        <charFilter class="solr.MappingCharFilterFactory"
mapping="mapping-ISOLatin1Accent.txt" />
                        <tokenizer class="solr.KeywordTokenizerFactory" />
                        <filter class="solr.LowerCaseFilterFactory" />
                        <filter class="solr.PatternReplaceFilterFactory"
pattern="([\.,;:-_])"
                                replacement=" " replace="all" />
                        <filter class="solr.PatternReplaceFilterFactory"
pattern="([^\w\d\*æøåÆØÅ ])"
                                replacement="" replace="all" />
                        <filter class="solr.PatternReplaceFilterFactory"
pattern="^(.{30})(.*)?"
                                replacement="$1" replace="all" />
                </analyzer>
        </fieldType>

        <fieldType name="text_suggest" class="solr.TextField"
                positionIncrementGap="100">
                <analyzer type="index">
                        <charFilter class="solr.MappingCharFilterFactory"
mapping="mapping-ISOLatin1Accent.txt" />
                        <tokenizer class="solr.StandardTokenizerFactory" />
                        <filter class="solr.WordDelimiterFilterFactory"
                                generateWordParts="1"
generateNumberParts="1" catenateWords="1"
                                catenateNumbers="1" catenateAll="1"
splitOnCaseChange="1"
                                splitOnNumerics="1" preserveOriginal="1" />
                        <filter class="solr.LowerCaseFilterFactory" />
                        <filter class="solr.PatternReplaceFilterFactory"
pattern="([^\w\d\*æøåÆØÅ ])"
                                replacement=" " replace="all" />
                </analyzer>
                <analyzer type="query">
                        <charFilter class="solr.MappingCharFilterFactory"
mapping="mapping-ISOLatin1Accent.txt" />
                        <tokenizer class="solr.StandardTokenizerFactory" />
                        <filter class="solr.WordDelimiterFilterFactory"
                                generateWordParts="0"
generateNumberParts="0" catenateWords="0"
                                catenateNumbers="0" catenateAll="0"
splitOnCaseChange="0"
                                splitOnNumerics="0" />
                        <filter class="solr.LowerCaseFilterFactory" />
                        <filter class="solr.PatternReplaceFilterFactory"
pattern="([^\w\d\*æøåÆØÅ ])"
                                replacement=" " replace="all" />
                </analyzer>
        </fieldType> 


               
        </types>
</schema>

and this is my SolrConfig

<?xml version="1.0" encoding="UTF-8" ?>



<config>
        <luceneMatchVersion>4.8</luceneMatchVersion>
       
        <directoryFactory name="DirectoryFactory"
               
class="${solr.directoryFactory:solr.StandardDirectoryFactory}" />

        <dataDir>${solr.core0.data.dir:}</dataDir>

       
        <schemaFactory class="ClassicIndexSchemaFactory" />

        <updateHandler class="solr.DirectUpdateHandler2">
                <updateLog>
                        <str name="dir">${solr.core0.data.dir:}</str>
                </updateLog>
        </updateHandler>

       
        <requestHandler name="/get" class="solr.RealTimeGetHandler">
                <lst name="defaults">
                        <str name="omitHeader">true</str>
                </lst>
        </requestHandler>
       
        <requestHandler name="/select" class="solr.SearchHandler">
   
     <lst name="defaults">
       <str name="echoParams">explicit</str>
       <int name="rows">10</int>
       <str name="df">id</str>
       </lst>
    </requestHandler>

        <requestHandler name="/suggest" class="solr.SearchHandler">
                <lst name="defaults">
                        <str name="echoParams">explicit</str>
                        <str name="defType">edismax</str>
                        <str name="rows">10</str>
                        <str name="fl">full_name,job_tree, company, city,
state, country, first_name, last_name, id</str>
                        <str name="qf">full_name_suggest^60
full_name_ngram^100.0 job_suggest^30 job_ngram^50.0 </str>
                        <str name="pf">full_name_edge^100.0
job_edge^50.0</str>
                        <str name="group">true</str>
                        <str name="group.field">full_name</str>
           

  <str name="sort">full_name asc</str>
  <str name="group.sort">full_name asc</str>
                </lst>
        </requestHandler>
       
       

        <requestHandler name="/replication" class="solr.ReplicationHandler"
                startup="lazy" />

        <requestDispatcher handleSelect="true">
                <requestParsers enableRemoteStreaming="false"
                        multipartUploadLimitInKB="2048"
formdataUploadLimitInKB="2048" />
        </requestDispatcher>

        <requestHandler name="standard" class="solr.StandardRequestHandler"
                default="true" />
        <requestHandler name="/analysis/field" startup="lazy"
                class="solr.FieldAnalysisRequestHandler" />
        <requestHandler name="/update" class="solr.UpdateRequestHandler" />
        <requestHandler name="/admin/"
                class="org.apache.solr.handler.admin.AdminHandlers" />

        <requestHandler name="/admin/ping" class="solr.PingRequestHandler">
                <lst name="invariants">
                        <str name="q">solrpingquery</str>
                </lst>
                <lst name="defaults">
                        <str name="echoParams">all</str>
                </lst>
        </requestHandler>

       
        <admin>
                <defaultQuery>solr</defaultQuery>
        </admin>

</config>
the following is the result for
(http://localhost:9090/solr/people/suggest?q=%E3%82%B7%E3%82%B9%E3%83%86%E3%83%A0%E3%82%A2%E3%83%8A%E3%83%AA%E3%82%B9%E3%83%88&wt=json&indent=true)

{
  "responseHeader":{
    "status":0,
    "QTime":8,
    "params":{
      "indent":"true",
      "q":"システムアナリスト",
      "wt":"json"}},
  "grouped":{
    "full_name":{
      "matches":2,
      "groups":[{
          "groupValue":"مسعود",
          "doclist":{"numFound":1,"start":0,"docs":[
              {
                "job_tree":"رسام كاريكاتور",
                "last_name":"النغش",
                "state":"Amman",
                "country":"Jordan",
                "city":"Amman",
                "id":"fa0a5f94-0497-49f6-9060-ec45c27c0d8e",
                "company":"شركة الفنون المتطورة",
                "full_name":"مسعود  النغش",
                "first_name":"مسعود "}]
          }},
        {
          "groupValue":"ね",
          "doclist":{"numFound":1,"start":0,"docs":[
              {
                "job_tree":"システムアナリスト",
                "last_name":"シャン",
                "state":"Tokyo",
                "country":"Japan",
                "city":"Tokyo",
                "id":"4fdce27b-3a9b-4045-85f3-2d5087d97b50",
                "company":"日立",
                "full_name":"すね シャン",
                "first_name":"すね"}]
          }}]}}}

I dont now why it bring the Arabic text with it the result is the same if I
try to search for the Arabic. Any help from you will be highly appreciated. 



--
View this message in context: 
http://lucene.472066.n3.nabble.com/Multi-Language-Suggester-Solr-Issue-tp4176075.html
Sent from the Solr - User mailing list archive at Nabble.com.

Reply via email to