I am trying create suggester handler using solr 4.8, everything work fine but when I try to get suggestion using different language Arabic, or Japanese for example I got result in mixed language, but I am trying to search only using Japanese, I got Arabic with that too. the following is my Schema.xml
<?xml version="1.0" encoding="UTF-8" ?> <schema name="people_schema" version="1.5"> <fields> <field name="_version_" type="long" indexed="true" stored="true" /> <field name="id" type="string" indexed="true" stored="true" required="true" /> <field name="first_name" type="txt_general" indexed="true" stored="true" multiValued="false" /> <field name="last_name" type="txt_general" indexed="true" stored="true" multiValued="false" /> <field name="about" type="text_general_edge_ngram" indexed="true" stored="true" multiValued="false" /> <field name="year_birth" type="tint" indexed="true" stored="true" multiValued="false" /> <field name="month_birth" type="tint" indexed="true" stored="true" multiValued="false" /> <field name="day_birth" type="tint" indexed="true" stored="true" multiValued="false" /> <field name="country" type="string" indexed="true" stored="true" required="false" multiValued="false" /> <field name="country_tree" type="placetree" indexed="true" stored="false" multiValued="false" /> <field name="state" type="string" indexed="true" stored="true" required="false" multiValued="false" /> <field name="state_tree" type="placetree" indexed="true" stored="false" multiValued="false" /> <field name="city" type="string" indexed="true" stored="true" required="false" multiValued="false" /> <field name="city_tree" type="placetree" indexed="true" stored="false" multiValued="false" /> <field name="job" type="string" indexed="true" stored="true" required="false" multiValued="false" /> <field name="job_tree" type="txt_general" indexed="true" stored="true" multiValued="false" /> <field name="company" type="string" indexed="true" stored="true" required="false" multiValued="false" /> <field name="company_tree" type="companytree" indexed="true" stored="false" multiValued="false" /> <field name="full_name" type="txt_general" indexed="true" stored="true" multiValued="false" /> <field name="full_name_suggest" type="text_suggest" indexed="true" stored="true" multiValued="false" /> <field name="full_name_edge" type="text_suggest_edge" indexed="true" stored="true" multiValued="false" /> <field name="full_name_ngram" type="text_suggest_ngram" indexed="true" stored="true" multiValued="false" /> <field name="full_name_sort" type="alphaNumericSort" indexed="true" stored="true" multiValued="false" /> <field name="job_suggest" type="text_suggest" indexed="true" stored="true" multiValued="false" /> <field name="job_edge" type="text_suggest_edge" indexed="true" stored="true" multiValued="false" /> <field name="job_ngram" type="text_suggest_ngram" indexed="true" stored="true" multiValued="false" /> <field name="job_sort" type="alphaNumericSort" indexed="true" stored="true" multiValued="false" /> <copyField source="full_name" dest="full_name_suggest" /> <copyField source="full_name" dest="full_name_edge" /> <copyField source="full_name" dest="full_name_ngram" /> <copyField source="full_name" dest="full_name_sort" /> <copyField source="job_tree" dest="job_suggest" /> <copyField source="job_tree" dest="job_edge" /> <copyField source="job_tree" dest="job_ngram" /> <copyField source="job_tree" dest="job_sort" /> </fields> <uniqueKey>id</uniqueKey> <types> <fieldType name="string" class="solr.StrField" sortMissingLast="true" /> <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true" /> <fieldType name="int" class="solr.TrieIntField" precisionStep="0" positionIncrementGap="0" /> <fieldType name="float" class="solr.TrieFloatField" precisionStep="0" positionIncrementGap="0" /> <fieldType name="long" class="solr.TrieLongField" precisionStep="0" positionIncrementGap="0" /> <fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" positionIncrementGap="0" /> <fieldType name="tint" class="solr.TrieIntField" precisionStep="8" positionIncrementGap="0" /> <fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8" positionIncrementGap="0" /> <fieldType name="tlong" class="solr.TrieLongField" precisionStep="8" positionIncrementGap="0" /> <fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8" positionIncrementGap="0" /> <fieldType name="date" class="solr.TrieDateField" precisionStep="0" positionIncrementGap="0" /> <fieldType name="tdate" class="solr.TrieDateField" precisionStep="6" positionIncrementGap="0" /> <fieldtype name="binary" class="solr.BinaryField" /> <fieldType name="text_general_edge_ngram" class="solr.TextField" positionIncrementGap="100"> <analyzer type="index"> <tokenizer class="solr.LowerCaseTokenizerFactory" /> <filter class="solr.EdgeNGramFilterFactory" minGramSize="2" maxGramSize="15" side="front" /> </analyzer> <analyzer type="query"> <tokenizer class="solr.LowerCaseTokenizerFactory" /> </analyzer> </fieldType> <fieldType name="txt_general" class="solr.TextField" positionIncrementGap="100"> <analyzer type="index"> <tokenizer class="solr.StandardTokenizerFactory" /> <filter class="solr.LowerCaseFilterFactory" /> </analyzer> <analyzer type="query"> <tokenizer class="solr.StandardTokenizerFactory" /> <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true" /> <filter class="solr.LowerCaseFilterFactory" /> </analyzer> </fieldType> <fieldtype name="name_phonetic" stored="false" indexed="true" class="solr.TextField"> <analyzer> <tokenizer class="solr.StandardTokenizerFactory" /> <filter class="solr.DoubleMetaphoneFilterFactory" inject="false" /> </analyzer> </fieldtype> <fieldType name="placetree" class="solr.TextField" positionIncrementGap="100"> <analyzer> <tokenizer class="solr.StandardTokenizerFactory" /> <filter class="solr.LowerCaseFilterFactory" /> </analyzer> </fieldType> <fieldType name="jobtree" class="solr.TextField" positionIncrementGap="100"> <analyzer> <tokenizer class="solr.StandardTokenizerFactory" /> <filter class="solr.LowerCaseFilterFactory" /> </analyzer> </fieldType> <fieldType name="companytree" class="solr.TextField" positionIncrementGap="100"> <analyzer> <tokenizer class="solr.StandardTokenizerFactory" /> <filter class="solr.LowerCaseFilterFactory" /> </analyzer> </fieldType> <fieldType name="text_suggest_ngram" class="solr.TextField"> <analyzer type="index"> <charFilter class="solr.MappingCharFilterFactory" mapping="mapping-ISOLatin1Accent.txt" /> <tokenizer class="solr.StandardTokenizerFactory" /> <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1" /> <filter class="solr.LowerCaseFilterFactory" /> <filter class="solr.EdgeNGramFilterFactory" maxGramSize="20" minGramSize="1" /> <filter class="solr.PatternReplaceFilterFactory" pattern="([^\w\d\*æøåÆØÅ ])" replacement="" replace="all" /> </analyzer> <analyzer type="query"> <charFilter class="solr.MappingCharFilterFactory" mapping="mapping-ISOLatin1Accent.txt" /> <tokenizer class="solr.StandardTokenizerFactory" /> <filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="0" /> <filter class="solr.LowerCaseFilterFactory" /> <filter class="solr.PatternReplaceFilterFactory" pattern="([^\w\d\*æøåÆØÅ ])" replacement="" replace="all" /> <filter class="solr.PatternReplaceFilterFactory" pattern="^(.{20})(.*)?" replacement="$1" replace="all" /> </analyzer> </fieldType> <fieldType name="alphaNumericSort" class="solr.TextField" sortMissingLast="true" omitNorms="true"> <analyzer> <tokenizer class="solr.KeywordTokenizerFactory" /> <filter class="solr.LowerCaseFilterFactory" /> <filter class="solr.TrimFilterFactory" /> <filter class="solr.PatternReplaceFilterFactory" pattern="^(a |the |les |la |le |l'|de la |du |des )" replacement="" replace="all" /> <filter class="solr.PatternReplaceFilterFactory" pattern="([^a-z0-9])" replacement="" replace="all" /> </analyzer> </fieldType> <fieldType name="text_suggest_edge" class="solr.TextField"> <analyzer type="index"> <charFilter class="solr.MappingCharFilterFactory" mapping="mapping-ISOLatin1Accent.txt" /> <tokenizer class="solr.KeywordTokenizerFactory" /> <filter class="solr.LowerCaseFilterFactory" /> <filter class="solr.PatternReplaceFilterFactory" pattern="([\.,;:-_])" replacement=" " replace="all" /> <filter class="solr.EdgeNGramFilterFactory" maxGramSize="30" minGramSize="1" /> <filter class="solr.PatternReplaceFilterFactory" pattern="([^\w\d\*æøåÆØÅ ])" replacement="" replace="all" /> </analyzer> <analyzer type="query"> <charFilter class="solr.MappingCharFilterFactory" mapping="mapping-ISOLatin1Accent.txt" /> <tokenizer class="solr.KeywordTokenizerFactory" /> <filter class="solr.LowerCaseFilterFactory" /> <filter class="solr.PatternReplaceFilterFactory" pattern="([\.,;:-_])" replacement=" " replace="all" /> <filter class="solr.PatternReplaceFilterFactory" pattern="([^\w\d\*æøåÆØÅ ])" replacement="" replace="all" /> <filter class="solr.PatternReplaceFilterFactory" pattern="^(.{30})(.*)?" replacement="$1" replace="all" /> </analyzer> </fieldType> <fieldType name="text_suggest" class="solr.TextField" positionIncrementGap="100"> <analyzer type="index"> <charFilter class="solr.MappingCharFilterFactory" mapping="mapping-ISOLatin1Accent.txt" /> <tokenizer class="solr.StandardTokenizerFactory" /> <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="1" splitOnCaseChange="1" splitOnNumerics="1" preserveOriginal="1" /> <filter class="solr.LowerCaseFilterFactory" /> <filter class="solr.PatternReplaceFilterFactory" pattern="([^\w\d\*æøåÆØÅ ])" replacement=" " replace="all" /> </analyzer> <analyzer type="query"> <charFilter class="solr.MappingCharFilterFactory" mapping="mapping-ISOLatin1Accent.txt" /> <tokenizer class="solr.StandardTokenizerFactory" /> <filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="0" splitOnNumerics="0" /> <filter class="solr.LowerCaseFilterFactory" /> <filter class="solr.PatternReplaceFilterFactory" pattern="([^\w\d\*æøåÆØÅ ])" replacement=" " replace="all" /> </analyzer> </fieldType> </types> </schema> and this is my SolrConfig <?xml version="1.0" encoding="UTF-8" ?> <config> <luceneMatchVersion>4.8</luceneMatchVersion> <directoryFactory name="DirectoryFactory" class="${solr.directoryFactory:solr.StandardDirectoryFactory}" /> <dataDir>${solr.core0.data.dir:}</dataDir> <schemaFactory class="ClassicIndexSchemaFactory" /> <updateHandler class="solr.DirectUpdateHandler2"> <updateLog> <str name="dir">${solr.core0.data.dir:}</str> </updateLog> </updateHandler> <requestHandler name="/get" class="solr.RealTimeGetHandler"> <lst name="defaults"> <str name="omitHeader">true</str> </lst> </requestHandler> <requestHandler name="/select" class="solr.SearchHandler"> <lst name="defaults"> <str name="echoParams">explicit</str> <int name="rows">10</int> <str name="df">id</str> </lst> </requestHandler> <requestHandler name="/suggest" class="solr.SearchHandler"> <lst name="defaults"> <str name="echoParams">explicit</str> <str name="defType">edismax</str> <str name="rows">10</str> <str name="fl">full_name,job_tree, company, city, state, country, first_name, last_name, id</str> <str name="qf">full_name_suggest^60 full_name_ngram^100.0 job_suggest^30 job_ngram^50.0 </str> <str name="pf">full_name_edge^100.0 job_edge^50.0</str> <str name="group">true</str> <str name="group.field">full_name</str> <str name="sort">full_name asc</str> <str name="group.sort">full_name asc</str> </lst> </requestHandler> <requestHandler name="/replication" class="solr.ReplicationHandler" startup="lazy" /> <requestDispatcher handleSelect="true"> <requestParsers enableRemoteStreaming="false" multipartUploadLimitInKB="2048" formdataUploadLimitInKB="2048" /> </requestDispatcher> <requestHandler name="standard" class="solr.StandardRequestHandler" default="true" /> <requestHandler name="/analysis/field" startup="lazy" class="solr.FieldAnalysisRequestHandler" /> <requestHandler name="/update" class="solr.UpdateRequestHandler" /> <requestHandler name="/admin/" class="org.apache.solr.handler.admin.AdminHandlers" /> <requestHandler name="/admin/ping" class="solr.PingRequestHandler"> <lst name="invariants"> <str name="q">solrpingquery</str> </lst> <lst name="defaults"> <str name="echoParams">all</str> </lst> </requestHandler> <admin> <defaultQuery>solr</defaultQuery> </admin> </config> the following is the result for (http://localhost:9090/solr/people/suggest?q=%E3%82%B7%E3%82%B9%E3%83%86%E3%83%A0%E3%82%A2%E3%83%8A%E3%83%AA%E3%82%B9%E3%83%88&wt=json&indent=true) { "responseHeader":{ "status":0, "QTime":8, "params":{ "indent":"true", "q":"システムアナリスト", "wt":"json"}}, "grouped":{ "full_name":{ "matches":2, "groups":[{ "groupValue":"مسعود", "doclist":{"numFound":1,"start":0,"docs":[ { "job_tree":"رسام كاريكاتور", "last_name":"النغش", "state":"Amman", "country":"Jordan", "city":"Amman", "id":"fa0a5f94-0497-49f6-9060-ec45c27c0d8e", "company":"شركة الفنون المتطورة", "full_name":"مسعود النغش", "first_name":"مسعود "}] }}, { "groupValue":"ね", "doclist":{"numFound":1,"start":0,"docs":[ { "job_tree":"システムアナリスト", "last_name":"シャン", "state":"Tokyo", "country":"Japan", "city":"Tokyo", "id":"4fdce27b-3a9b-4045-85f3-2d5087d97b50", "company":"日立", "full_name":"すね シャン", "first_name":"すね"}] }}]}}} I dont now why it bring the Arabic text with it the result is the same if I try to search for the Arabic. Any help from you will be highly appreciated. -- View this message in context: http://lucene.472066.n3.nabble.com/Multi-Language-Suggester-Solr-Issue-tp4176075.html Sent from the Solr - User mailing list archive at Nabble.com.