Thanks *very much* for replying. (You're right, I missed the "zero or more,"
having focused only on the examples in the doc. Oops).
New discovery. kin*ase returns 0 hits. Below I show the debug output and the
pertinent parts of the schema. Maybe you can spot my problem?
{
"responseHeader":{
"status":0,
"QTime":2,
"params":{
"q":"kin*ase",
"defType":"edismax",
"debug":"all",
"qf":"TEXT__gene_product",
"fl":"id,document-type,TEXT__gene_product,score",
"stopwords":"true"}},
"response":{"numFound":0,"start":0,"maxScore":0.0,"docs":[]
},
"debug":{
"rawquerystring":"kin*ase",
"querystring":"kin*ase",
"parsedquery":"+DisjunctionMaxQuery((TEXT__gene_product:kin*ase))",
"parsedquery_toString":"+(TEXT__gene_product:kin*ase)",
"explain":{},
"QParser":"ExtendedDismaxQParser",
"altquerystring":null,
"boost_queries":null,
"parsed_boost_queries":[],
"boostfuncs":null,
"timing":{
"time":2.0,
"prepare":{
"time":1.0,
"query":{
"time":1.0},
"facet":{
"time":0.0},
"facet_module":{
"time":0.0},
"mlt":{
"time":0.0},
"highlight":{
"time":0.0},
"stats":{
"time":0.0},
"expand":{
"time":0.0},
"terms":{
"time":0.0},
"debug":{
"time":0.0}},
"process":{
"time":1.0,
"query":{
"time":0.0},
"facet":{
"time":0.0},
"facet_module":{
"time":0.0},
"mlt":{
"time":0.0},
"highlight":{
"time":0.0},
"stats":{
"time":0.0},
"expand":{
"time":0.0},
"terms":{
"time":0.0},
"debug":{
"time":0.0}}}}}
<dynamicField name="TEXT__*" type="text_en_splitting" indexed="true"
stored="true" storeOffsetsWithPositions="true" termVectors="true"/>
<fieldType name="text_en_splitting_tight" class="solr.TextField"
positionIncrementGap="100" autoGeneratePhraseQueries="true">
<analyzer type="index">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt"
ignoreCase="true" expand="false"/>
<filter class="solr.StopFilterFactory" ignoreCase="true"
words="lang/stopwords_en.txt"/>
<filter class="solr.WordDelimiterGraphFilterFactory"
generateWordParts="0" generateNumberParts="0" catenateWords="1"
catenateNumbers="1" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.KeywordMarkerFilterFactory"
protected="protwords.txt"/>
<filter class="solr.EnglishMinimalStemFilterFactory"/>
<!-- this filter can remove any duplicate tokens that appear at the
same position - sometimes
possible with WordDelimiterGraphFilter in conjuncton with
stemming. -->
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
<filter class="solr.FlattenGraphFilterFactory" />
</analyzer>
<analyzer type="query">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt"
ignoreCase="true" expand="false"/>
<filter class="solr.StopFilterFactory" ignoreCase="true"
words="lang/stopwords_en.txt"/>
<filter class="solr.WordDelimiterGraphFilterFactory"
generateWordParts="0" generateNumberParts="0" catenateWords="1"
catenateNumbers="1" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.KeywordMarkerFilterFactory"
protected="protwords.txt"/>
<filter class="solr.EnglishMinimalStemFilterFactory"/>
<!-- this filter can remove any duplicate tokens that appear at the
same position - sometimes
possible with WordDelimiterGraphFilter in conjuncton with
stemming. -->
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
</analyzer>
</fieldType>
-----Original Message-----
From: Walter Underwood <[email protected]>
Sent: Wednesday, February 12, 2020 12:31 AM
To: [email protected]
Subject: [External] Re: wildcards match end-of-word?
“kinase*” does match “kinase”. On the page you linked to, it defines “*” as
matching "Multiple characters (matches zero or more sequential characters)”.
If it is not matching, you may be using a stemmer on that field or doing some
other processing that changes the tokens.
wunder
Walter Underwood
[email protected]
http://observer.wunderwood.org/ (my blog)
> On Feb 11, 2020, at 6:24 PM, Fischer, Stephen
> <[email protected]> wrote:
>
> Hi,
>
> I am a solr newbie. I was surprised to discover that a search for kinase*
> returned fewer results than kinase.
>
> Then I read the wildcard
> documentation<https://lucene.apache.org/solr/guide/6_6/the-standard-query-parser.html#TheStandardQueryParser-WildcardSearches>,
> and saw why. kinase* will not match the word "kinase".
>
> Our end-users won't expect this behavior. Presumably the solution would be
> for them (actually us, on their behalf), to use kinase* OR kinase.
>
> But that is kind of a hack.
>
> Is there a way we can configure solr to have wildcards match on end-of-word?
>
> Thanks,
> Steve