There are probably any number of changes between 3.x and 4.x to account for
query differences. This includes bug fixes and in some cases new bugs, in
areas such as the query parsers and various filters. The first step is to
isolate a couple of examples of both false positive queries and false
negative queries. Then look at the field types involved. Then use the Solr
Admin Analysis UI to see how an index or query term analyzes differently.
Post the details here and we can figure out what change is causing your
query discrepencies.
-- Jack Krupansky
-----Original Message-----
From: anarchos78
Sent: Wednesday, January 30, 2013 8:59 AM
To: solr-user@lucene.apache.org
Subject: CopyField issue on Solr4.1
Hello,
I am using Solr 3.6.1 and I am very satisfied. Now I want to move on
solr4.1. So I took “schema.xml” and “solrconfig.xml” (with minor changes)
and place them under my new solr4.1 configuration. The indexing was
successful (DIH). But, I have noticed an issue. In “schema.xml” I have
“copyField” directives in order to index same fields using different
“types”. When I try to index using the same configuration on solr4.1, the
index size is the half of the index size on solr3.6.1 (and when I query I
get different results). Has anything changed on Solr4.1? I need little help
on this.
*The schema.xml:*
<?xml version="1.0" encoding="UTF-8" ?>
<config>
<abortOnConfigurationError>${solr.abortOnConfigurationError:true}</abortOnConfigurationError>
<luceneMatchVersion>LUCENE_41</luceneMatchVersion>
<dataDir>${solr.data.dir:}</dataDir>
<directoryFactory name="DirectoryFactory"
class="${solr.directoryFactory:solr.StandardDirectoryFactory}"/>
<indexConfig>
</indexConfig>
<jmx />
<updateHandler class="solr.DirectUpdateHandler2">
</updateHandler>
<query>
<maxBooleanClauses>2048</maxBooleanClauses>
<filterCache class="solr.FastLRUCache"
size="2048"
initialSize="1024"
autowarmCount="512"
cleanupThread="true" />
<queryResultCache class="solr.FastLRUCache"
size="2048"
initialSize="1024"
autowarmCount="512"
cleanupThread="true" />
<documentCache class="solr.FastLRUCache"
size="2048"
initialSize="2048"
autowarmCount="512" />
<fieldValueCache class="solr.FastLRUCache"
size="2048"
initialSize="512"
autowarmCount="512"
cleanupThread="true" />
<enableLazyFieldLoading>true</enableLazyFieldLoading>
<queryResultWindowSize>150</queryResultWindowSize>
<queryResultMaxDocsCached>200</queryResultMaxDocsCached>
<listener event="newSearcher" class="solr.QuerySenderListener">
<arr name="queries">
<lst>
<str name="q">χρησικτησια νομη</str>
<str name="fq">apofasi_taxonomy:ΠΟΛΙΤΙΚΕΣ</str>
<str name="sort">apofasi_date asc,ida desc,apofasi_tmima
desc</str>
<str name="start">0</str>
<str name="rows">150</str>
</lst>
<lst>
<str name="q">νομη</str>
<str name="fq">apofasi_taxonomy:ΠΟΛΙΤΙΚΕΣ</str>
<str name="sort">apofasi_date asc,ida desc,apofasi_tmima
desc</str>
<str name="start">0</str>
<str name="rows">150</str>
</lst>
<lst>
<str name="q">χρησικτησια νομη</str>
<str name="fq">apofasi_taxonomy:ΠΟΙΝΙΚΕΣ</str>
<str name="sort">apofasi_date asc,ida desc,apofasi_tmima
desc</str>
<str name="start">0</str>
<str name="rows">150</str>
</lst>
</arr>
</listener>
<listener event="firstSearcher" class="solr.QuerySenderListener">
<arr name="queries">
<lst>
<str name="q">χρησικτησια νομη</str>
<str name="fq">apofasi_taxonomy:ΠΟΛΙΤΙΚΕΣ</str>
<str name="sort">apofasi_date asc,ida desc,apofasi_tmima
desc</str>
<str name="start">0</str>
<str name="rows">150</str>
</lst>
<lst>
<str name="q">νομη</str>
<str name="fq">apofasi_taxonomy:ΠΟΛΙΤΙΚΕΣ</str>
<str name="sort">apofasi_date asc,ida desc,apofasi_tmima
desc</str>
<str name="start">0</str>
<str name="rows">150</str>
</lst>
<lst>
<str name="q">χρησικτησια νομη</str>
<str name="fq">apofasi_taxonomy:ΠΟΙΝΙΚΕΣ</str>
<str name="sort">apofasi_date asc,ida desc,apofasi_tmima
desc</str>
<str name="start">0</str>
<str name="rows">150</str>
</lst>
</arr>
</listener>
<useColdSearcher>false</useColdSearcher>
<maxWarmingSearchers>2</maxWarmingSearchers>
</query>
<requestDispatcher>
<requestParsers enableRemoteStreaming="true"
multipartUploadLimitInKB="2048000" />
<httpCaching never304="true" />
</requestDispatcher>
<requestHandler name="/dataimport"
class="org.apache.solr.handler.dataimport.DataImportHandler">
<lst name="defaults">
<str name="config">data-config.xml</str>
</lst>
</requestHandler>
<requestHandler name="/select" class="solr.SearchHandler">
<lst name="defaults">
<str name="defType">edismax</str>
<str name="qf">content contentS^10</str>
<str name="pf">content^10 contentS^100</str>
<str name="ps">100</str>
<str name="echoParams">explicit</str>
<int name="rows">150</int>
<str name="sort">score desc</str>
<str name="defType">edismax</str>
<str name="qf">content contentS^10</str>
<str name="pf">content^10 contentS^100</str>
<str name="ps">100</str>
<str name="wt">json</str>
<str name="hl">true</str>
<str
name="fl">solr_id,ida,type,model,keywordlist,title,apofasi_taxonomy,apofasi_tmima,apofasi_date,grid_title</str>
<str name="hl.fl">content,title</str>
<str name="f.content.hl.alternateField">content</str>
<str name="hl.maxAlternateFieldLength">800</str>
<str name="hl.fragsize">800</str>
</lst>
</requestHandler>
<requestHandler name="/update"
class="solr.XmlUpdateRequestHandler">
</requestHandler>
<requestHandler name="/update/javabin"
class="solr.BinaryUpdateRequestHandler" />
<requestHandler name="/update/csv"
class="solr.CSVRequestHandler"
startup="lazy" />
<requestHandler name="/update/json"
class="solr.JsonUpdateRequestHandler"
startup="lazy" />
<requestHandler name="/update/extract"
startup="lazy"
class="solr.extraction.ExtractingRequestHandler" >
<lst name="defaults">
<str name="fmap.content">text</str>
<str name="lowernames">true</str>
<str name="uprefix">ignored_</str>
<str name="fmap.Last-Modified">last_modified</str>
<str name="captureAttr">true</str>
<str name="fmap.a">links</str>
<str name="fmap.div">ignored_</str>
</lst>
</requestHandler>
<requestHandler name="/update/xslt"
startup="lazy"
class="solr.XsltUpdateRequestHandler"/>
<requestHandler name="/analysis/field"
startup="lazy"
class="solr.FieldAnalysisRequestHandler" />
<requestHandler name="/analysis/document"
class="solr.DocumentAnalysisRequestHandler"
startup="lazy" />
<requestHandler name="/admin/"
class="solr.admin.AdminHandlers" />
<requestHandler name="/admin/ping" class="solr.PingRequestHandler">
<lst name="invariants">
<str name="q">solrpingquery</str>
</lst>
<lst name="defaults">
<str name="echoParams">all</str>
</lst>
</requestHandler>
<requestHandler name="/debug/dump" class="solr.DumpRequestHandler" >
<lst name="defaults">
<str name="echoParams">explicit</str>
<str name="echoHandler">true</str>
</lst>
</requestHandler>
<searchComponent name="spellcheck" class="solr.SpellCheckComponent">
<str name="queryAnalyzerFieldType">textSpell</str>
<lst name="spellchecker">
<str name="name">default</str>
<str name="field">name</str>
<str name="spellcheckIndexDir">spellchecker</str>
</lst>
</searchComponent>
<requestHandler name="/spell" class="solr.SearchHandler" startup="lazy">
<lst name="defaults">
<str name="df">text</str>
<str name="spellcheck.onlyMorePopular">false</str>
<str name="spellcheck.extendedResults">false</str>
<str name="spellcheck.count">1</str>
</lst>
<arr name="last-components">
<str>spellcheck</str>
</arr>
</requestHandler>
<searchComponent name="terms" class="solr.TermsComponent"/>
<requestHandler name="/terms" class="solr.SearchHandler" startup="lazy">
<lst name="defaults">
<bool name="terms">true</bool>
</lst>
<arr name="components">
<str>terms</str>
</arr>
</requestHandler>
<searchComponent name="elevator" class="solr.QueryElevationComponent" >
<str name="queryFieldType">string</str>
<str name="config-file">elevate.xml</str>
</searchComponent>
<requestHandler name="/elevate" class="solr.SearchHandler" startup="lazy">
<lst name="defaults">
<str name="echoParams">explicit</str>
<str name="df">text</str>
</lst>
<arr name="last-components">
<str>elevator</str>
</arr>
</requestHandler>
<searchComponent class="solr.HighlightComponent" name="highlight">
<highlighting>
<fragmenter name="gap"
default="true"
class="solr.highlight.GapFragmenter">
<lst name="defaults">
</lst>
</fragmenter>
<fragmenter name="regex"
class="solr.highlight.RegexFragmenter">
<lst name="defaults">
<int name="hl.fragsize">70</int>
<float name="hl.regex.slop">0.5</float>
<str name="hl.regex.pattern">[-\w ,/\n\"']{20,200}</str>
</lst>
</fragmenter>
<formatter name="html"
default="true"
class="solr.highlight.HtmlFormatter">
<lst name="defaults">
<str name="hl.simple.pre"><shl></str>
<str name="hl.simple.post"></shl></str>
</lst>
</formatter>
<encoder name="html"
class="solr.highlight.HtmlEncoder" />
<fragListBuilder name="simple"
default="true"
class="solr.highlight.SimpleFragListBuilder"/>
<fragListBuilder name="single"
class="solr.highlight.SingleFragListBuilder"/>
<fragmentsBuilder name="default"
default="true"
class="solr.highlight.ScoreOrderFragmentsBuilder">
</fragmentsBuilder>
<fragmentsBuilder name="colored"
class="solr.highlight.ScoreOrderFragmentsBuilder">
<lst name="defaults">
<str name="hl.tag.pre"></str>
<str name="hl.tag.post"></str>
</lst>
</fragmentsBuilder>
<boundaryScanner name="default"
default="true"
class="solr.highlight.SimpleBoundaryScanner">
<lst name="defaults">
<str name="hl.bs.maxScan">10</str>
<str name="hl.bs.chars">.,!? 	 </str>
</lst>
</boundaryScanner>
<boundaryScanner name="breakIterator"
class="solr.highlight.BreakIteratorBoundaryScanner">
<lst name="defaults">
<str name="hl.bs.type">WORD</str>
<str name="hl.bs.language">en</str>
<str name="hl.bs.country">US</str>
</lst>
</boundaryScanner>
</highlighting>
</searchComponent>
<queryResponseWriter name="json" class="solr.JSONResponseWriter">
<str name="content-type">text/plain; charset=UTF-8</str>
</queryResponseWriter>
<queryResponseWriter name="velocity" class="solr.VelocityResponseWriter"
startup="lazy"/>
<queryResponseWriter name="xslt" class="solr.XSLTResponseWriter">
<int name="xsltCacheLifetimeSeconds">5</int>
</queryResponseWriter>
<admin>
<defaultQuery>*:*</defaultQuery>
</admin>
</config>
*The solrconfig.xml*
<?xml version="1.0" encoding="UTF-8" ?>
<schema name="areios_pagos" version="1.5">
<types>
<fieldType name="string" class="solr.StrField" sortMissingLast="true" />
<fieldType name="boolean" class="solr.BoolField"
sortMissingLast="true"/>
<fieldtype name="binary" class="solr.BinaryField"/>
<fieldType name="int" class="solr.TrieIntField" precisionStep="0"
positionIncrementGap="0"/>
<fieldType name="float" class="solr.TrieFloatField" precisionStep="0"
positionIncrementGap="0"/>
<fieldType name="long" class="solr.TrieLongField" precisionStep="0"
positionIncrementGap="0"/>
<fieldType name="double" class="solr.TrieDoubleField" precisionStep="0"
positionIncrementGap="0"/>
<fieldType name="tint" class="solr.TrieIntField" precisionStep="8"
positionIncrementGap="0"/>
<fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8"
positionIncrementGap="0"/>
<fieldType name="tlong" class="solr.TrieLongField" precisionStep="8"
positionIncrementGap="0"/>
<fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8"
positionIncrementGap="0"/>
<fieldType name="date" class="solr.TrieDateField" precisionStep="0"
positionIncrementGap="0"/>
<fieldType name="tdate" class="solr.TrieDateField" precisionStep="6"
positionIncrementGap="0"/>
<fieldType name="pint" class="solr.IntField"/>
<fieldType name="plong" class="solr.LongField"/>
<fieldType name="pfloat" class="solr.FloatField"/>
<fieldType name="pdouble" class="solr.DoubleField"/>
<fieldType name="pdate" class="solr.DateField" sortMissingLast="true"/>
<fieldType name="sint" class="solr.SortableIntField"
sortMissingLast="true" omitNorms="true"/>
<fieldType name="slong" class="solr.SortableLongField"
sortMissingLast="true" omitNorms="true"/>
<fieldType name="sfloat" class="solr.SortableFloatField"
sortMissingLast="true" omitNorms="true"/>
<fieldType name="sdouble" class="solr.SortableDoubleField"
sortMissingLast="true" omitNorms="true"/>
<fieldType name="random" class="solr.RandomSortField" indexed="true" />
<fieldType name="text_el" class="solr.TextField"
positionIncrementGap="1000">
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.GreekLowerCaseFilterFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="false"
words="lang/stopwords_el.txt" enablePositionIncrements="true"/>
<filter class="solr.GreekStemFilterFactory"/>
</analyzer>
</fieldType>
<fieldType name="text" class="solr.TextField" positionIncrementGap="1000">
<analyzer type="index">
<charFilter class="solr.HTMLStripCharFilterFactory"/>
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.GreekLowerCaseFilterFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true"
words="lang/stopwords_el.txt" enablePositionIncrements="true"/>
<filter class="solr.GreekStemFilterFactory"/>
</analyzer>
<analyzer type="query">
<charFilter class="solr.HTMLStripCharFilterFactory"/>
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.GreekLowerCaseFilterFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true"
words="lang/stopwords_el.txt" enablePositionIncrements="true"/>
<filter class="solr.GreekStemFilterFactory"/>
</analyzer>
</fieldType>
<fieldType name="text_areios_pagos_s" class="solr.TextField"
positionIncrementGap="100" >
<analyzer type="index">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.LimitTokenCountFilterFactory" maxTokenCount="20"/>
<filter class="solr.GreekLowerCaseFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.LimitTokenCountFilterFactory" maxTokenCount="20"/>
<filter class="solr.GreekLowerCaseFilterFactory"/>
</analyzer>
</fieldType>
<fieldType name="text_areios_pagos" class="solr.TextField"
positionIncrementGap="100">
<analyzer type="index">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.GreekLowerCaseFilterFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true"
words="lang/stopwords_el.txt" enablePositionIncrements="true"/>
<filter class="solr.GreekStemFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.GreekLowerCaseFilterFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true"
words="lang/stopwords_el.txt" enablePositionIncrements="true"/>
<filter class="solr.GreekStemFilterFactory"/>
</analyzer>
</fieldType>
<fieldtype name="ignored" stored="false" indexed="false"
multiValued="true" class="solr.StrField" />
<fieldType name="point" class="solr.PointType" dimension="2"
subFieldSuffix="_d"/>
<fieldType name="location" class="solr.LatLonType"
subFieldSuffix="_coordinate"/>
<fieldtype name="geohash" class="solr.GeoHashField"/>
<fieldType name="currency" class="solr.CurrencyField" precisionStep="8"
defaultCurrency="USD" currencyConfig="currency.xml" />
</types>
<fields>
<field name="ida" type="string" indexed="true" stored="true"
multiValued="false"/>
<field name="solr_id" type="string" indexed="true" stored="true"
multiValued="false"/>
<field name="apofasi_number" type="text_areios_pagos" indexed="true"
stored="true" multiValued="true"/>
<field name="apofasi_date" type="text_areios_pagos" indexed="true"
stored="true"/>
<field name="apofasi_tmima" type="text_areios_pagos" indexed="true"
stored="true"/>
<field name="apofasi_taxonomy" type="text_areios_pagos" indexed="true"
stored="true"/>
<field name="content" type="text_areios_pagos" indexed="true"
stored="true" multiValued="true"/>
<field name="type" type="string" indexed="true" stored="true"/>
<field name="model" type="string" indexed="true" stored="true"
multiValued="false"/>
<field name="url" type="string" indexed="true" stored="true"/>
<field name="search_tag" type="text_areios_pagos" indexed="true"
stored="true"/>
<field name="contentbin" type="text" indexed="true" stored="true"
multiValued="true"/>
<field name="last_modified" type="string" indexed="true" stored="true"/>
<field name="title" type="text_areios_pagos" indexed="true" stored="true"
multiValued="true"/>
<field name="grid_title" type="text_areios_pagos" indexed="true"
stored="true"/>
<field name="contentS" type="text_areios_pagos_s" indexed="true"
stored="true"/>
</fields>
<uniqueKey>solr_id</uniqueKey>
<defaultSearchField>content</defaultSearchField>
<solrQueryParser defaultOperator="AND"/>
<copyField source="apofasi_number" dest="content" />
<copyField source="apofasi_date" dest="content" />
<copyField source="apofasi_tmima" dest="content" />
<copyField source="apofasi_taxonomy" dest="content" />
<copyField source="title" dest="content" />
<copyField source="search_tag" dest="content" />
<copyField source="contentbin" dest="content"/>
<copyField source="content" dest="contentS" />
</schema>
Regards,
Tom
--
View this message in context:
http://lucene.472066.n3.nabble.com/CopyField-issue-on-Solr4-1-tp4037373.html
Sent from the Solr - User mailing list archive at Nabble.com.