In hebrew words could contain the character *"*
ex: דו"ח

I would like to know how to configure my schema.xml to be able to index and
search correctly those types of words.

If I search this character *"* inside solr query tool I got this debug:

/"debug": {
    "rawquerystring": "\"",
    "querystring": "\"",
    "parsedquery": "(+())/no_coord",
    "parsedquery_toString": "+()",
/

So if I understand correctly solr remove the " when the query is parsed.


I'm using this schema:

<fieldType name="text" class="solr.TextField" positionIncrementGap="100">
      <analyzer type="index">

        <charFilter class="solr.MappingCharFilterFactory"
mapping="mapping-ISOLatin1Accent.txt"/>
        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
        
        
        
        <filter class="solr.StopFilterFactory"
                ignoreCase="true"
                words="stopwords.txt"
                enablePositionIncrements="true"
                />
        <filter class="solr.WordDelimiterFilterFactory"
                protected="protwords.txt"
                generateWordParts="1"
                generateNumberParts="1"
                catenateWords="1"
                catenateNumbers="1"
                catenateAll="0"
                splitOnCaseChange="1"
                preserveOriginal="1"/>
        <filter class="solr.LengthFilterFactory" min="2" max="100" />

        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.SnowballPorterFilterFactory" language="English"
protected="protwords.txt"/>
        <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
        <filter class="solr.EdgeNGramFilterFactory" minGramSize="2"
maxGramSize="25" />
      </analyzer>
      <analyzer type="query">
        <charFilter class="solr.MappingCharFilterFactory"
mapping="mapping-ISOLatin1Accent.txt"/>
        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt"
ignoreCase="true" expand="true"/>
        <filter class="solr.StopFilterFactory"
                ignoreCase="true"
                words="stopwords.txt"
                enablePositionIncrements="true"
                />
        <filter class="solr.WordDelimiterFilterFactory"
                protected="protwords.txt"
                generateWordParts="1"
                generateNumberParts="1"
                catenateWords="0"
                catenateNumbers="0"
                catenateAll="0"
                splitOnCaseChange="1"
                preserveOriginal="1"/>
        <filter class="solr.LengthFilterFactory" min="2" max="100" />
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.SnowballPorterFilterFactory" language="English"
protected="protwords.txt"/>
        <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
      </analyzer>








--
View this message in context: 
http://lucene.472066.n3.nabble.com/Howto-Search-word-which-contains-the-character-tp4137083.html
Sent from the Solr - User mailing list archive at Nabble.com.

Reply via email to