indexing a message on solr7.7.1 is failing with the following error. any
help is appreciated. attaching schema files.
2019-05-24 19:32:42.010 ERROR (qtp1115201599-17) [c:bn_sample s:shard1
r:core_node2 x:bn_sample_shard1_replica_n1] o.a.s.h.RequestHandlerBase
org.apache.solr.common.SolrException: Exception writing document id 1
to the index; possible analysis error: startOffset must be
non-negative, and endOffset must be >= startOffset, and offsets must
not go backwards startOffset=1,endOffset=3,lastStartOffset=6721 for
field 'message_text'
at
org.apache.solr.update.DirectUpdateHandler2.addDoc(DirectUpdateHandler2.java:243)
at
org.apache.solr.update.processor.RunUpdateProcessor.processAdd(RunUpdateProcessorFactory.java:67)
at
org.apache.solr.update.processor.UpdateRequestProcessor.processAdd(UpdateRequestProcessor.java:55)
at
org.apache.solr.update.processor.DistributedUpdateProcessor.doLocalAdd(DistributedUpdateProcessor.java:1001)
at
org.apache.solr.update.processor.DistributedUpdateProcessor.versionAdd(DistributedUpdateProcessor.java:1222)
at
org.apache.solr.update.processor.DistributedUpdateProcessor.processAdd(DistributedUpdateProcessor.java:693)
at
org.apache.solr.update.processor.LogUpdateProcessorFactory$LogUpdateProcessor.processAdd(LogUpdateProcessorFactory.java:103)
at org.apache.solr.handler.loader.JavabinLoader$1.update(JavabinLoader.java:110)
at
org.apache.solr.client.solrj.request.JavaBinUpdateRequestCodec$StreamingCodec.readOuterMostDocIterator(JavaBinUpdateRequestCodec.java:327)
at
org.apache.solr.client.solrj.request.JavaBinUpdateRequestCodec$StreamingCodec.readIterator(JavaBinUpdateRequestCodec.java:280)
at org.apache.solr.common.util.JavaBinCodec.readObject(JavaBinCodec.java:333)
at org.apache.solr.common.util.JavaBinCodec.readVal(JavaBinCodec.java:278)
at
org.apache.solr.client.solrj.request.JavaBinUpdateRequestCodec$StreamingCodec.readNamedList(JavaBinUpdateRequestCodec.java:235)
at org.apache.solr.common.util.JavaBinCodec.readObject(JavaBinCodec.java:298)
at org.apache.solr.common.util.JavaBinCodec.readVal(JavaBinCodec.java:278)
at org.apache.solr.common.util.JavaBinCodec.unmarshal(JavaBinCodec.java:191)
at
org.apache.solr.client.solrj.request.JavaBinUpdateRequestCodec.unmarshal(JavaBinUpdateRequestCodec.java:126)
at
org.apache.solr.handler.loader.JavabinLoader.parseAndLoadDocs(JavabinLoader.java:123)
at org.apache.solr.handler.loader.JavabinLoader.load(JavabinLoader.java:70)
at
org.apache.solr.handler.UpdateRequestHandler$1.load(UpdateRequestHandler.java:97)
at
org.apache.solr.handler.ContentStreamHandlerBase.handleRequestBody(ContentStreamHandlerBase.java:68)
at
org.apache.solr.handler.RequestHandlerBase.handleRequest(RequestHandlerBase.java:199)
at org.apache.solr.core.SolrCore.execute(SolrCore.java:2551)
at org.apache.solr.servlet.HttpSolrCall.execute(HttpSolrCall.java:710)
at org.apache.solr.servlet.HttpSolrCall.call(HttpSolrCall.java:516)
at
org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:395)
at
org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:341)
at
org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1602)
at org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:540)
at org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:146)
at org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:548)
at
org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:132)
at
org.eclipse.jetty.server.handler.ScopedHandler.nextHandle(ScopedHandler.java:257)
at
org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:1588)
at
org.eclipse.jetty.server.handler.ScopedHandler.nextHandle(ScopedHandler.java:255)
at
org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1345)
at
org.eclipse.jetty.server.handler.ScopedHandler.nextScope(ScopedHandler.java:203)
at org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:480)
at
org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:1557)
at
org.eclipse.jetty.server.handler.ScopedHandler.nextScope(ScopedHandler.java:201)
at
org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1247)
at org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:144)
at
org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:220)
at
org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerCollection.java:126)
at
org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:132)
at
org.eclipse.jetty.rewrite.handler.RewriteHandler.handle(RewriteHandler.java:335)
at
org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:132)
at org.eclipse.jetty.server.Server.handle(Server.java:502)
at org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:364)
at org.eclipse.jetty.server.HttpConnection.onFillable(HttpConnection.java:260)
at
org.eclipse.jetty.io.AbstractConnection$ReadCallback.succeeded(AbstractConnection.java:305)
at org.eclipse.jetty.io.FillInterest.fillable(FillInterest.java:103)
at org.eclipse.jetty.io.ChannelEndPoint$2.run(ChannelEndPoint.java:118)
at
org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.runTask(EatWhatYouKill.java:333)
at
org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.doProduce(EatWhatYouKill.java:310)
at
org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.tryProduce(EatWhatYouKill.java:168)
at
org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.run(EatWhatYouKill.java:126)
at
org.eclipse.jetty.util.thread.ReservedThreadExecutor$ReservedThread.run(ReservedThreadExecutor.java:366)
at
org.eclipse.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:765)
at
org.eclipse.jetty.util.thread.QueuedThreadPool$2.run(QueuedThreadPool.java:683)
at java.lang.Thread.run(Thread.java:745)
Caused by: java.lang.IllegalArgumentException: startOffset must be
non-negative, and endOffset must be >= startOffset, and offsets must
not go backwards startOffset=1,endOffset=3,lastStartOffset=6721 for
field 'message_text'
at
org.apache.lucene.index.DefaultIndexingChain$PerField.invert(DefaultIndexingChain.java:824)
at
org.apache.lucene.index.DefaultIndexingChain.processField(DefaultIndexingChain.java:430)
at
org.apache.lucene.index.DefaultIndexingChain.processDocument(DefaultIndexingChain.java:394)
at
org.apache.lucene.index.DocumentsWriterPerThread.updateDocument(DocumentsWriterPerThread.java:251)
at
org.apache.lucene.index.DocumentsWriter.updateDocument(DocumentsWriter.java:494)
at org.apache.lucene.index.IndexWriter.updateDocument(IndexWriter.java:1616)
at org.apache.lucene.index.IndexWriter.updateDocument(IndexWriter.java:1608)
at
org.apache.solr.update.DirectUpdateHandler2.updateDocOrDocValues(DirectUpdateHandler2.java:969)
at
org.apache.solr.update.DirectUpdateHandler2.doNormalUpdate(DirectUpdateHandler2.java:341)
at
org.apache.solr.update.DirectUpdateHandler2.addDoc0(DirectUpdateHandler2.java:288)
at
org.apache.solr.update.DirectUpdateHandler2.addDoc(DirectUpdateHandler2.java:235)
... 60 more
<https://jira.oraclecorp.com/jira/browse/SOCIALCLOUD-3515#>
Permalink
<https://jira.oraclecorp.com/jira/browse/SOCIALCLOUD-3515?focusedCommentId=45263454&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-45263454>
Edit
malink
Edit
<https://jira.oraclecorp.com/jira/secure/EditComment!default.jspa?id=8148094&commentId=45263454>
<schema name="ci-signal" version="2.0">
<types>
<fieldType class="solr.StrField" name="string" sortMissingLast="true"></fieldType>
<fieldtype class="solr.BinaryField" name="binary"></fieldtype>
<fieldType class="solr.TrieIntField" name="int" positionIncrementGap="0" precisionStep="0"></fieldType>
<fieldType class="solr.TrieLongField" name="long" positionIncrementGap="0" precisionStep="0"></fieldType>
<fieldType class="solr.TrieIntField" name="tint" positionIncrementGap="0" precisionStep="8"></fieldType>
<fieldType class="solr.TrieDateField" name="date" positionIncrementGap="0" precisionStep="0"></fieldType>
<fieldType class="solr.RandomSortField" indexed="true" name="random"></fieldType>
<fieldType autoGeneratePhraseQueries="true" class="solr.TextField" name="standard_text_legacy" omitNorms="true" omitTermFreqAndPositions="false" positionIncrementGap="100" termOffsets="false" termPositions="false" termVectors="false">
<analyzer type="index">
<tokenizer class="solr.WhitespaceTokenizerFactory"></tokenizer>
<filter class="solr.ICUNormalizer2FilterFactory" mode="compose" name="nfkc_cf"></filter>
<filter class="solr.HyphenatedWordsFilterFactory"></filter>
<filter catenateAll="0" catenateNumbers="0" catenateWords="0" class="solr.WordDelimiterFilterFactory" generateNumberParts="1" generateWordParts="1" preserveOriginal="1" splitOnCaseChange="0" splitOnNumerics="0" stemEnglishPossessive="1"></filter>
<filter class="solr.TruncateTokenFilterFactory" prefixLength="45"></filter>
</analyzer>
</fieldType>
<!--
<fieldType autoGeneratePhraseQueries="true" class="solr.TextField" name="standard_text" omitNorms="true" omitTermFreqAndPositions="false" positionIncrementGap="100" termOffsets="false" termPositions="false" termVectors="false">
<analyzer type="index">
<tokenizer class="com.ci.nlp.lucene.OltLuceneTokenizerFactory" languageCode="en" maxTextSize="600000" tokenTextType="ORIGINAL_TEXT"></tokenizer>
<filter class="solr.ICUNormalizer2FilterFactory" mode="compose" name="nfkc_cf"></filter>
<filter catenateAll="0" catenateNumbers="0" catenateWords="0" class="solr.WordDelimiterFilterFactory" generateNumberParts="1" generateWordParts="1" preserveOriginal="1" splitOnCaseChange="0" splitOnNumerics="0" stemEnglishPossessive="1"></filter>
<filter class="solr.TruncateTokenFilterFactory" prefixLength="45"></filter>
</analyzer>
</fieldType>
-->
<fieldType class="solr.TextField" name="simple_url" omitNorms="true" sortMissingLast="true">
<analyzer>
<filter class="solr.TrimFilterFactory"></filter>
<tokenizer class="solr.PatternTokenizerFactory" pattern="/"></tokenizer>
<filter class="solr.PatternReplaceCharFilterFactory" pattern="^http(s)?:[0-9]*" replace="all" replacement=""></filter>
<filter class="solr.PatternReplaceCharFilterFactory" pattern="(#.*$)" replace="all" replacement=""></filter>
<filter class="solr.PatternReplaceCharFilterFactory" pattern="(\?.*$)" replace="all" replacement=""></filter>
<filter catenateAll="0" catenateNumbers="0" catenateWords="0" class="solr.WordDelimiterFilterFactory" generateNumberParts="1" generateWordParts="1" preserveOriginal="1" splitOnCaseChange="1" stemEnglishPossessive="0"></filter>
<filter class="solr.LowerCaseFilterFactory"></filter>
<filter class="solr.RemoveDuplicatesTokenFilterFactory"></filter>
</analyzer>
</fieldType>
<fieldType class="solr.TextField" name="simple_text" omitNorms="true" sortMissingLast="true">
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"></tokenizer>
<filter class="solr.LowerCaseFilterFactory"></filter>
<filter class="solr.TrimFilterFactory"></filter>
</analyzer>
</fieldType>
<fieldType class="solr.TextField" name="lower_string" omitNorms="true" sortMissingLast="true">
<analyzer>
<tokenizer class="solr.KeywordTokenizerFactory"></tokenizer>
<filter class="solr.LowerCaseFilterFactory"></filter>
<filter class="solr.TrimFilterFactory"></filter>
</analyzer>
</fieldType>
<fieldType class="solr.TextField" name="datasource" omitNorms="true" sortMissingLast="true">
<analyzer>
<tokenizer class="solr.PatternTokenizerFactory" pattern="_"></tokenizer>
<filter class="solr.LowerCaseFilterFactory"></filter>
</analyzer>
</fieldType>
<fieldType autoIndex="true" class="solr.SpatialRecursivePrefixTreeFieldType" geo="true" name="location_rpt" spatialContextFactory="com.spatial4j.core.context.jts.JtsSpatialContextFactory" distanceUnits="degrees"></fieldType>
<fieldtype class="solr.StrField" indexed="false" multiValued="true" name="ignored" stored="false"></fieldtype>
</types>
<fields>
<field indexed="true" name="message_key" required="true" stored="true" type="string"></field>
<field indexed="true" name="message_message_lookup_id" required="true" stored="true" type="string"></field>
<field indexed="true" name="message_source_generated_id" required="false" stored="true" type="string"></field>
<field indexed="true" name="message_url" required="true" stored="true" type="simple_url"></field>
<field indexed="false" name="message_subject" required="false" stored="true" type="string"></field>
<field indexed="false" name="message_body" required="false" stored="true" type="string"></field>
<field indexed="true" name="message_date" required="true" stored="true" type="date"></field>
<field indexed="false" name="message_language_code" required="true" stored="true" type="string"></field>
<field indexed="true" name="message_data_source_context" required="false" stored="true" type="string"></field>
<field indexed="true" name="message_data_source_context_id" required="true" stored="true" type="datasource"></field>
<field indexed="true" name="message_collection_strategy" required="true" stored="true" type="string"></field>
<field indexed="true" multiValued="true" name="message_text" required="true" stored="false" type="standard_text_legacy"></field>
<field indexed="true" name="message_location_display_name" required="false" stored="true" type="simple_text"></field>
<field indexed="true" name="message_latlon" required="false" stored="true" type="location_rpt"></field>
<field indexed="true" name="message_country" required="false" stored="true" type="string"></field>
<field indexed="true" name="message_region" required="false" stored="true" type="string"></field>
<field indexed="true" name="message_subregion" required="false" stored="true" type="string"></field>
<field indexed="true" name="message_locality" required="false" stored="true" type="simple_text"></field>
<field indexed="true" name="source_name" required="true" stored="true" type="lower_string"></field>
<field indexed="true" name="source_url" required="true" stored="true" type="simple_url"></field>
<field indexed="true" name="author_name" required="false" stored="true" type="lower_string"></field>
<field indexed="true" name="author_source_generated_id" required="false" stored="true" type="string"></field>
<field indexed="true" name="author_location_display_name" required="false" stored="true" type="simple_text"></field>
<field indexed="true" name="author_latlon" required="false" stored="true" type="location_rpt"></field>
<field indexed="true" name="author_country" required="false" stored="true" type="string"></field>
<field indexed="true" name="author_region" required="false" stored="true" type="string"></field>
<field indexed="true" name="author_subregion" required="false" stored="true" type="string"></field>
<field indexed="true" name="author_locality" required="false" stored="true" type="simple_text"></field>
<field indexed="true" name="compliance_update_timestamp" required="false" stored="true" type="date"></field>
<field indexed="true" name="compliance_action" required="false" stored="true" type="string"></field>
<field default="NOW" indexed="true" name="index_timestamp" required="true" stored="true" type="date"></field>
<field indexed="true" multiValued="false" name="_version_" stored="true" type="long"></field>
<dynamicField name="random_*" type="random"></dynamicField>
<dynamicField name="*" type="ignored"></dynamicField>
</fields>
<uniqueKey>message_key</uniqueKey>
<!-- <defaultSearchField>message_text</defaultSearchField> ERROR: Setting defaultSearchField in schema not supported since Solr 7 -->
<!-- <solrQueryParser defaultOperator="OR"></solrQueryParser> ERROR: Setting default operator in schema (solrQueryParser/@defaultOperator) not supported -->
<copyField dest="message_text" source="message_subject"></copyField>
<copyField dest="message_text" source="message_body"></copyField>
</schema>
<config>
<luceneMatchVersion>7.7.1</luceneMatchVersion>
<abortOnConfigurationError>${solr.abortOnConfigurationError:true}</abortOnConfigurationError>
<dataDir>${solr.data.dir:}</dataDir>
<directoryFactory class="${solr.directoryFactory:solr.NRTCachingDirectoryFactory}" name="DirectoryFactory"></directoryFactory>
<codecFactory class="solr.SchemaCodecFactory"></codecFactory>
<schemaFactory class="ClassicIndexSchemaFactory"></schemaFactory>
<indexConfig>
<filter class="solr.LimitTokenCountFilterFactory" maxTokenCount="10000"></filter>
<ramBufferSizeMB>1024</ramBufferSizeMB>
<lockType>${solr.lock.type:native}</lockType>
</indexConfig>
<jmx></jmx>
<initParams path="/select">
<lst name="defaults">
<str name="df">message_text</str>
<str name="q.op">OR</str>
</lst>
</initParams>
<updateHandler class="solr.DirectUpdateHandler2">
<updateLog>
<str name="dir">${solr.ulog.dir:}</str>
</updateLog>
<autoCommit>
<maxTime>15000</maxTime>
<openSearcher>false</openSearcher>
</autoCommit>
<autoSoftCommit>
<maxTime>600000</maxTime>
</autoSoftCommit>
</updateHandler>
<query>
<maxBooleanClauses>10240</maxBooleanClauses>
<filterCache autowarmCount="1" class="solr.FastLRUCache" initialSize="1" size="5"></filterCache>
<queryResultCache autowarmCount="1" class="solr.LRUCache" initialSize="1" size="50"></queryResultCache>
<documentCache autowarmCount="10" class="solr.LRUCache" initialSize="1" size="50"></documentCache>
<enableLazyFieldLoading>true</enableLazyFieldLoading>
<useFilterForSortedQuery>true</useFilterForSortedQuery>
<queryResultWindowSize>1000</queryResultWindowSize>
<queryResultMaxDocsCached>2000</queryResultMaxDocsCached>
<listener class="solr.QuerySenderListener" event="newSearcher">
<arr name="queries">
<lst>
<str name="q">oracle</str><str name="fq">message_date_id:[20130101 TO 20170101]</str><str name="sort">random_51 desc</str>
</lst>
</arr>
</listener>
<listener class="solr.QuerySenderListener" event="firstSearcher">
<arr name="queries">
<lst>
<str name="q">oracle</str><str name="fq">message_date_id:[20130101 TO 20170101]</str><str name="sort">random_51 desc</str>
</lst>
</arr>
</listener>
<useColdSearcher>true</useColdSearcher>
<maxWarmingSearchers>2</maxWarmingSearchers>
</query>
<requestDispatcher handleSelect="true">
<requestParsers enableRemoteStreaming="true" formdataUploadLimitInKB="2048" multipartUploadLimitInKB="2048000"></requestParsers>
<httpCaching never304="true"></httpCaching>
</requestDispatcher>
<requestHandler class="solr.SearchHandler" name="/select">
<lst name="defaults">
<str name="echoParams">explicit</str>
<str name="df">message_text</str>
<int name="rows">10</int>
<str name="df">text</str>
</lst>
</requestHandler>
<requestHandler class="solr.StandardRequestHandler" name="/standard">
<lst name="defaults">
<str name="echoParams">explicit</str>
<str name="df">message_text</str>
<int name="rows">0</int>
</lst>
</requestHandler>
<requestHandler class="solr.SearchHandler" name="/query">
<lst name="defaults">
<str name="echoParams">explicit</str>
<str name="df">message_text</str>
<str name="wt">json</str>
<str name="indent">true</str>
<str name="df">text</str>
</lst>
</requestHandler>
<requestHandler class="solr.RealTimeGetHandler" name="/get">
<lst name="defaults">
<str name="omitHeader">true</str>
<str name="wt">json</str>
<str name="indent">true</str>
</lst>
</requestHandler>
<requestHandler class="solr.UpdateRequestHandler" name="/update">
</requestHandler>
<requestHandler class="solr.extraction.ExtractingRequestHandler" name="/update/extract" startup="lazy">
<lst name="defaults">
<str name="lowernames">true</str>
<str name="uprefix">ignored_</str>
<str name="captureAttr">true</str>
<str name="fmap.a">links</str>
<str name="fmap.div">ignored_</str>
</lst>
</requestHandler>
<requestHandler class="solr.FieldAnalysisRequestHandler" name="/analysis/field" startup="lazy"></requestHandler>
<requestHandler class="solr.DocumentAnalysisRequestHandler" name="/analysis/document" startup="lazy"></requestHandler>
<!-- <requestHandler class="solr.CoreAdminHandler" name="/admin/"></requestHandler> -->
<requestHandler class="solr.PingRequestHandler" name="/admin/ping">
<lst name="invariants">
<str name="q">solrpingquery</str>
</lst>
<lst name="defaults">
<str name="echoParams">all</str>
</lst>
</requestHandler>
<requestHandler class="solr.DumpRequestHandler" name="/debug/dump">
<lst name="defaults">
<str name="echoParams">explicit</str>
<str name="echoHandler">true</str>
</lst>
</requestHandler>
<requestHandler class="solr.ReplicationHandler" name="/replication">
</requestHandler>
<searchComponent class="solr.TermVectorComponent" name="tvComponent"></searchComponent>
<requestHandler class="solr.SearchHandler" name="/tvrh" startup="lazy">
<lst name="defaults">
<str name="df">text</str>
<bool name="tv">true</bool>
</lst>
<arr name="last-components">
<str>tvComponent</str>
</arr>
</requestHandler>
<searchComponent class="solr.TermsComponent" name="terms"></searchComponent>
<requestHandler class="solr.SearchHandler" name="/terms" startup="lazy">
<lst name="defaults">
<bool name="terms">true</bool>
<bool name="distrib">false</bool>
</lst>
<arr name="components">
<str>terms</str>
</arr>
</requestHandler>
<queryResponseWriter class="solr.JSONResponseWriter" name="json">
<str name="content-type">text/plain; charset=UTF-8</str>
</queryResponseWriter>
<queryResponseWriter class="solr.RubyResponseWriter" name="ruby"></queryResponseWriter>
<queryResponseWriter class="solr.CSVResponseWriter" name="csv"></queryResponseWriter>
<queryResponseWriter class="solr.XSLTResponseWriter" name="xslt">
<int name="xsltCacheLifetimeSeconds">5</int>
</queryResponseWriter>
<admin>
<defaultQuery>*:*</defaultQuery>
</admin>
</config>