Thank you for sending the definitions. I thought you defined n-gram based
field for story and slug, but your definitions looks fine for me.
I don't understand why you got such strange snippets.
I think you can open a jira issue for this problem (sorry I cannot take it
at this moment) with a test case that reproduces the problem would be much
helpful.
Koji
(11/04/24 11:50), Ramanathapuram, Rajesh wrote:
Hi Koji,
My apologies for misunderstanding the question ...
here is Fields ...
<fields>
<field name="storyid" type="string" indexed="true" stored="true"
required="true" />
<field name="slug" type="text" indexed="false" stored="true" />
<field name="author" type="string" indexed="true" stored="true" />
<field name="status" type="string" indexed="false" stored="true" />
<field name="docdate" type="tdate" indexed="true" stored="true" />
<field name="createdate" type="tdate" indexed="false" stored="true"
/>
<field name="modifyby" type="string" indexed="true" stored="true" />
<field name="story" type="text" indexed="false" stored="true" />
<field name="queue" type="lowercase" indexed="true"
stored="true" />
<field name="modifydate" type="tdate" indexed="false"
stored="true" />
<field name="endorser" type="string" indexed="false"
stored="true" />
<field name="slug_sort" type="lowercase" indexed="true"
stored="false" />
<field name="url" type="string" indexed="false" stored="true" />
<field name="showtitle" type="string" indexed="true"
stored="true" />
<field name="date_sort" type="pdate" indexed="true"
stored="false" sortMissingFirst="true" />
<field name="site" type="string" stored="true" indexed="true"/>
<field name="segment" type="string" stored="true"
indexed="false"/>
<field name="digest" type="string" stored="true" indexed="false"/>
<field name="boost" type="float" stored="true" indexed="false"/>
<field name="host" type="url" stored="false" indexed="true"/>
<field name="tstamp" type="long" stored="true" indexed="false"
/>
<field name="anchor" type="string" stored="true" indexed="true"
multiValued="true"/>
<field name="headline" type="string" indexed="true"
stored="true" />
<field name="highlight" type="string" indexed="true"
stored="true" />
<field name="guests" type="string" indexed="true" stored="true"
/>
<field name="transcriptnum" type="string" indexed="false"
stored="true" />
<field name="additionalinewsfields" type="text" indexed="false"
stored="true" />
<field name="all_text" type="text" indexed="true" stored="false"
multiValued="true"/>
<field name="timestamp" type="date" indexed="true" stored="true"
default="NOW" multiValued="false"/>
<dynamicField name="*_kstem" type="text_kstem" indexed="true"
stored="true" multiValued="true"/>
</fields>
<uniqueKey>storyid</uniqueKey>
And here is Types ...
<types>
<fieldType name="string" class="solr.StrField"
sortMissingLast="true" omitNorms="true"/>
<fieldType name="boolean" class="solr.BoolField"
sortMissingLast="true" omitNorms="true"/>
<fieldtype name="binary" class="solr.BinaryField"/>
<fieldType name="int" class="solr.TrieIntField" precisionStep="0"
omitNorms="true" positionIncrementGap="0"/>
<fieldType name="float" class="solr.TrieFloatField"
precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
<fieldType name="long" class="solr.TrieLongField" precisionStep="0"
omitNorms="true" positionIncrementGap="0"/>
<fieldType name="double" class="solr.TrieDoubleField"
precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
<fieldType name="tint" class="solr.TrieIntField" precisionStep="8"
omitNorms="true" positionIncrementGap="0"/>
<fieldType name="tfloat" class="solr.TrieFloatField"
precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
<fieldType name="tlong" class="solr.TrieLongField" precisionStep="8"
omitNorms="true" positionIncrementGap="0"/>
<fieldType name="tdouble" class="solr.TrieDoubleField"
precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
<fieldType name="date" class="solr.TrieDateField" omitNorms="true"
precisionStep="0" positionIncrementGap="0"/>
<fieldType name="tdate" class="solr.TrieDateField" omitNorms="true"
precisionStep="6" positionIncrementGap="0"/>
<fieldType name="pint" class="solr.IntField" omitNorms="true"/>
<fieldType name="plong" class="solr.LongField" omitNorms="true"/>
<fieldType name="pfloat" class="solr.FloatField" omitNorms="true"/>
<fieldType name="pdouble" class="solr.DoubleField"
omitNorms="true"/>
<fieldType name="pdate" class="solr.DateField"
sortMissingLast="true" omitNorms="true"/>
<fieldType name="sint" class="solr.SortableIntField"
sortMissingLast="true" omitNorms="true"/>
<fieldType name="slong" class="solr.SortableLongField"
sortMissingLast="true" omitNorms="true"/>
<fieldType name="sfloat" class="solr.SortableFloatField"
sortMissingLast="true" omitNorms="true"/>
<fieldType name="sdouble" class="solr.SortableDoubleField"
sortMissingLast="true" omitNorms="true"/>
<fieldType name="random" class="solr.RandomSortField" indexed="true"
/>
<!-- A text field that only splits on whitespace for exact matching
of words -->
<fieldType name="text_ws" class="solr.TextField"
positionIncrementGap="100">
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
</analyzer>
</fieldType>
<fieldType name="text_kstem" class="solr.TextField"
positionIncrementGap="100">
<analyzer type="index">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true"
words="stopwords.txt" enablePositionIncrements="false" />
<filter class="solr.WordDelimiterFilterFactory"
generateWordParts="1" generateNumberParts="1" catenateWords="1"
catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter
class="com.lucidimagination.solrworks.analysis.LucidKStemFilterFactory"
protected="protwords.txt"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true"
words="stopwords.txt"/>
<filter class="solr.WordDelimiterFilterFactory"
generateWordParts="1" generateNumberParts="1" catenateWords="0"
catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter
class="com.lucidimagination.solrworks.analysis.LucidKStemFilterFactory"
protected="protwords.txt"/>
</analyzer>
</fieldType>
<fieldType name="text" class="solr.TextField"
positionIncrementGap="100">
<analyzer type="index">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.StopFilterFactory"
ignoreCase="true"
words="stopwords.txt"
enablePositionIncrements="true"
/>
<filter class="solr.WordDelimiterFilterFactory"
generateWordParts="1" generateNumberParts="1" catenateWords="1"
catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.SnowballPorterFilterFactory"
language="English" protected="protwords.txt"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.SynonymFilterFactory"
synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.StopFilterFactory"
ignoreCase="true"
words="stopwords.txt"
enablePositionIncrements="true"
/>
<filter class="solr.WordDelimiterFilterFactory"
generateWordParts="1" generateNumberParts="1" catenateWords="0"
catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.SnowballPorterFilterFactory"
language="English" protected="protwords.txt"/>
</analyzer>
</fieldType>
<fieldType name="textTight" class="solr.TextField"
positionIncrementGap="100">
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.SynonymFilterFactory"
synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
<filter class="solr.StopFilterFactory" ignoreCase="true"
words="stopwords.txt"/>
<filter class="solr.WordDelimiterFilterFactory"
generateWordParts="0" generateNumberParts="0" catenateWords="1"
catenateNumbers="1" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.SnowballPorterFilterFactory"
language="English" protected="protwords.txt"/>
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
</analyzer>
</fieldType>
<fieldType name="textgen" class="solr.TextField"
positionIncrementGap="100">
<analyzer type="index">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true"
words="stopwords.txt" enablePositionIncrements="true" />
<filter class="solr.WordDelimiterFilterFactory"
generateWordParts="1" generateNumberParts="1" catenateWords="1"
catenateNumbers="1" catenateAll="0" splitOnCaseChange="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.SynonymFilterFactory"
synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.StopFilterFactory"
ignoreCase="true"
words="stopwords.txt"
enablePositionIncrements="true"
/>
<filter class="solr.WordDelimiterFilterFactory"
generateWordParts="1" generateNumberParts="1" catenateWords="0"
catenateNumbers="0" catenateAll="0" splitOnCaseChange="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldType>
<fieldType name="text_rev" class="solr.TextField"
positionIncrementGap="100">
<analyzer type="index">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true"
words="stopwords.txt" enablePositionIncrements="true" />
<filter class="solr.WordDelimiterFilterFactory"
generateWordParts="1" generateNumberParts="1" catenateWords="1"
catenateNumbers="1" catenateAll="0" splitOnCaseChange="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.ReversedWildcardFilterFactory"
withOriginal="true"
maxPosAsterisk="3" maxPosQuestion="2"
maxFractionAsterisk="0.33"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.SynonymFilterFactory"
synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.StopFilterFactory"
ignoreCase="true"
words="stopwords.txt"
enablePositionIncrements="true"
/>
<filter class="solr.WordDelimiterFilterFactory"
generateWordParts="1" generateNumberParts="1" catenateWords="0"
catenateNumbers="0" catenateAll="0" splitOnCaseChange="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldType>
<fieldType name="alphaOnlySort" class="solr.TextField"
sortMissingLast="true" omitNorms="true">
<analyzer>
<tokenizer class="solr.KeywordTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory" />
<filter class="solr.TrimFilterFactory" />
<filter class="solr.PatternReplaceFilterFactory"
pattern="([^a-z])" replacement="" replace="all"
/>
</analyzer>
</fieldType>
<fieldtype name="phonetic" stored="false" indexed="true"
class="solr.TextField">
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.DoubleMetaphoneFilterFactory"
inject="false"/>
</analyzer>
</fieldtype>
<fieldtype name="payloads" stored="false" indexed="true"
class="solr.TextField">
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.DelimitedPayloadTokenFilterFactory"
encoder="float"/>
</analyzer>
</fieldtype>
<fieldType name="lowercase" class="solr.TextField"
positionIncrementGap="100">
<analyzer>
<tokenizer class="solr.KeywordTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory" />
<filter class="solr.TrimFilterFactory" />
</analyzer>
</fieldType>
<fieldType name="url" class="solr.TextField"
positionIncrementGap="100">
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.WordDelimiterFilterFactory"
generateWordParts="1" generateNumberParts="1"/>
<filter
class="solr.RemoveDuplicatesTokenFilterFactory"/>
</analyzer>
</fieldType>
<fieldtype name="ignored" stored="false" indexed="false"
multiValued="true" class="solr.StrField" />
</types>
thanks& regards,
Rajesh Ramana
-----Original Message-----
From: Koji Sekiguchi [mailto:k...@r.email.ne.jp]
Sent: Saturday, April 23, 2011 9:51 PM
To: solr-user@lucene.apache.org
Subject: Re: Solr - Multi Term highlighting issue
Hi Rajesh,
My question was how story and slug fields are defined in schema.xml.
In other words, please show us your<fieldType/> and<field/> for those
fields.
Koji
--
http://www.rondhuit.com/en/
(11/04/24 10:18), Ramanathapuram, Rajesh wrote:
I don't have hl.fl defined in my schema.xml, I am passing it in as my
query parameters
<str name="hl.fl">story, slug</str>
The elongated parameters is sent like this...
'hl' => 'on',
'hl.fragsize' => $fragsize,
'hl.maxAnalyzedChars' => $fragsize,
'hl.fl' => 'slug,story',
'hl.simple.pre' => '<span class="' .$className . '">',
'hl.simple.post' => '</span>',
Here is my query params in response
-<response>
-<lst name="responseHeader">
<int name="status">0</int>
<int name="QTime">26</int>
-<lst name="params">
<str name="hl.fragsize">100000</str>
<str name="explainOther" />
<str name="indent">on</str>
<str name="hl.fl">story, slug</str>
<str name="wt">standard</str>
<str name="hl">on</str>
<str name="rows">10</str>
<str name="version">2.2</str>
<str name="hl.highlightMultiTerm">true</str>
<str name="fl">*</str>
<str name="start">0</str>
<str name="q">mec us chile</str>
<str name="qt">standard</str>
<str name="hl.usePhraseHighlighter">true</str>
<str name="fq">storyid="XXXX XXXX XXXXX"</str>
</lst>
</lst>
Please let me know.
thanks& regards,
Rajesh Ramana
-----Original Message-----
From: Koji Sekiguchi [mailto:k...@r.email.ne.jp]
Sent: Friday, April 22, 2011 8:38 PM
To: solr-user@lucene.apache.org
Subject: Re: Solr - Multi Term highlighting issue
How are your hl.fl fields defined in schema.xml?
Koji
--
http://www.rondhuit.com/en/
(11/04/23 1:23), Ramanathapuram, Rajesh wrote:
Does anybody has other suggestions?
thanks& regards,
Rajesh Ramana
Enterprise Applications, Turner Broadcasting System, Inc.
404.878.7474
-----Original Message-----
From: Ramanathapuram, Rajesh
[mailto:rajesh.ramanathapu...@turner.com]
Sent: Wednesday, April 20, 2011 2:51 PM
To: solr-user@lucene.apache.org
Subject: RE: Solr - Multi Term highlighting issue
Thanks Erick.
I tried your suggestion, the issue still exists.
http://localhost:8983/searchsolr/mainCore/select?indent=on&version=2.2
&q=mec+us+chile&fq=storyid%3DXXXXXXX%22&start=0&rows=10&fl=*&qt=standa
rd&wt=standard&explainOther=&hl=on&hl.fl=story%2C+slug&hl.fragsize=100
000&hl.highlightMultiTerm=true&hl.usePhraseHighlighter=true&hl.mergeCo
ntiguous=false
-<lst name="params">
<str name="hl.fragsize">100000</str>
<str name="explainOther" />
<str name="indent">on</str>
<str name="hl.mergeContiguous">false</str> ....
... Corboba. (<em>MEC)</b></p><p></p><p><b>CHILE</em>/FOREST FIRES
...
thanks& regards,
Rajesh Ramana
-----Original Message-----
From: Erick Erickson [mailto:erickerick...@gmail.com]
Sent: Wednesday, April 20, 2011 11:59 AM
To: solr-user@lucene.apache.org
Subject: Re: Solr - Multi Term highlighting issue
Does your configuration have "hl.mergeContiguous" set to true by any
chance? And what happens if you explicitly set this to "false" on your
query?
Best
Erick
On Wed, Apr 20, 2011 at 9:43 AM, Ramanathapuram,
Rajesh<rajesh.ramanathapu...@turner.com> wrote:
Hello,
I am dealing with a highlighting issue in SOLR, I will try to
explain
the issue.
When I search for a single term in solr, it wraps<em> tag around
the
words I want to highlight, all works well.
But if I search multiple term, for most part highlighting works good
and then for some of the terms, the highlight return multiple terms
in
a sing<em> tag ...
<em>srchtrm1)<br><b><p>.... srchtrm2</em> I expect solr to return
highlight terms like ...<em>srchtrm1</em>)<br><b><p>...
<em>srchtrm2</em>
When I search for 'US mec chile', here is how my result appears
... Corboba. (<em>MEC)</b></p><p></p><p><b>CHILE</em>/FOREST
FIRES:
We had ... with<em>US</em> and<em>Chile</em> ...,
(<em>MEC)</b></p><p></p><p><b>US</em> ....
This is what I was expecting it to be
... Corboba.
(<em>MEC</em>)</b></p><p></p><p><b><em>CHILE</em>/FOREST
FIRES: We had ... with<em>US</em> and<em>Chile</em> ...,
(<em>MEC</em>)</b></p><p></p><p><b><em>US</em> ....
Here is my query params
-<response>
-<lst name="responseHeader">
<int name="status">0</int>
<int name="QTime">26</int>
-<lst name="params">
<str name="hl.fragsize">100000</str>
<str name="explainOther" />
<str name="indent">on</str>
<str name="hl.fl">story, slug</str>
<str name="wt">standard</str>
<str name="hl">on</str>
<str name="rows">10</str>
<str name="version">2.2</str>
<str name="hl.highlightMultiTerm">true</str>
<str name="fl">*</str>
<str name="start">0</str>
<str name="q">mec us chile</str>
<str name="qt">standard</str>
<str name="hl.usePhraseHighlighter">true</str>
<str name="fq">storyid="XXXX XXXX XXXXX"</str>
</lst>
</lst>
Here are some other links I found in the forum, but no real
conclusion
http://www.lucidimagination.com/search/document/ac64e4f0abb6e4fc/solr
_
hi
ghlighting_question#78163c42a67cb533
I am going to try this patch, which also had no conclusive results
https://issues.apache.org/jira/browse/SOLR-1394
Has anyone come across this issue?
Any suggestions on how to fix this issue is much appreciated.
thanks& regards,
Rajesh Ramana
--
http://www.rondhuit.com/en/