It looks like it was escaped in the query, but the word delimiter filter
will remove it and treat it as if it were white space.
The "types" attribute for WDF can point to a file containing the types for
various characters, so you could map a quote to ALPHA.
The doc is sketchy, but there are some examples in my e-book that shows how
to map @ and _ to ALPHA.
-- Jack Krupansky
-----Original Message-----
From: Ahmet Arslan
Sent: Tuesday, May 20, 2014 4:55 AM
To: solr-user@lucene.apache.org
Subject: Re: Howto Search word which contains the character "
Hi,
It is special query parser character, so it needs to be escaped.
http://lucene.apache.org/core/2_9_4/queryparsersyntax.html#Escaping%20Special%20Characters
Ahmet
On Tuesday, May 20, 2014 10:57 AM, heyyo <lionel.enka...@gmail.com> wrote:
In hebrew words could contain the character *"*
ex: דו"ח
I would like to know how to configure my schema.xml to be able to index and
search correctly those types of words.
If I search this character *"* inside solr query tool I got this debug:
/"debug": {
"rawquerystring": "\"",
"querystring": "\"",
"parsedquery": "(+())/no_coord",
"parsedquery_toString": "+()",
/
So if I understand correctly solr remove the " when the query is parsed.
I'm using this schema:
<fieldType name="text" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index">
<charFilter class="solr.MappingCharFilterFactory"
mapping="mapping-ISOLatin1Accent.txt"/>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.StopFilterFactory"
ignoreCase="true"
words="stopwords.txt"
enablePositionIncrements="true"
/>
<filter class="solr.WordDelimiterFilterFactory"
protected="protwords.txt"
generateWordParts="1"
generateNumberParts="1"
catenateWords="1"
catenateNumbers="1"
catenateAll="0"
splitOnCaseChange="1"
preserveOriginal="1"/>
<filter class="solr.LengthFilterFactory" min="2" max="100" />
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.SnowballPorterFilterFactory" language="English"
protected="protwords.txt"/>
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
<filter class="solr.EdgeNGramFilterFactory" minGramSize="2"
maxGramSize="25" />
</analyzer>
<analyzer type="query">
<charFilter class="solr.MappingCharFilterFactory"
mapping="mapping-ISOLatin1Accent.txt"/>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt"
ignoreCase="true" expand="true"/>
<filter class="solr.StopFilterFactory"
ignoreCase="true"
words="stopwords.txt"
enablePositionIncrements="true"
/>
<filter class="solr.WordDelimiterFilterFactory"
protected="protwords.txt"
generateWordParts="1"
generateNumberParts="1"
catenateWords="0"
catenateNumbers="0"
catenateAll="0"
splitOnCaseChange="1"
preserveOriginal="1"/>
<filter class="solr.LengthFilterFactory" min="2" max="100" />
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.SnowballPorterFilterFactory" language="English"
protected="protwords.txt"/>
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
</analyzer>
--
View this message in context:
http://lucene.472066.n3.nabble.com/Howto-Search-word-which-contains-the-character-tp4137083.html
Sent from the Solr - User mailing list archive at Nabble.com.