Hi, I upgraded to solr 7 today and i am seeing tonnes of following errors for various fields.
o.a.s.h.RequestHandlerBase org.apache.solr.common.SolrException: Exception writing document id file_38810000549 to the index; possible analysis error: startOffset must be non-negative, and endOffset must be >= startOffset, and offsets must not go backwards startOffset=6,endOffset=8,lastStartOffset=9 for field 'name_combined' We don't have a lot of custom code for analysis at indexing time, so my suspicion is on the schema definition, can someone suggest how should I start debugging this? <field name="file_content_en" type="text_stemming_en" indexed="true" stored="true" omitPositions="false"/> <analyzer type="index"> <charFilter class="org.apache.lucene.analysis.icu.ICUNormalizer2CharFilterFactory" name="nfkc" mode="compose"/> <tokenizer class="solr.WhitespaceTokenizerFactory"/> <filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" preserveOriginal="1" splitOnCaseChange="0" splitOnNumerics="0" stemEnglishPossessive="1"/> <filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" preserveOriginal="1" splitOnCaseChange="1" splitOnNumerics="1" stemEnglishPossessive="1"/> <filter class="solr.PatternReplaceFilterFactory" pattern="^(\p{Punct}*)(.*?)(\p{Punct}*)$" replacement="$2"/> <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/> <filter class="solr.LowerCaseFilterFactory"/> <filter class="solr.ASCIIFoldingFilterFactory"/> <filter class="solr.SnowballPorterFilterFactory" /> <filter class="solr.RemoveDuplicatesTokenFilterFactory"/> <filter class="solr.LimitTokenCountFilterFactory" maxTokenCount="10000" consumeAllTokens="false"/> <filter class="solr.LengthFilterFactory" min="1" max="255"/> </analyzer> <field name="name_combined" type="text_ngram" indexed="true" stored="false" multiValued="true" omitPositions="true"/> <analyzer type="index"> <charFilter class="org.apache.lucene.analysis.icu.ICUNormalizer2CharFilterFactory" name="nfkc" mode="compose"/> <tokenizer class="solr.WhitespaceTokenizerFactory"/> <filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" preserveOriginal="1" splitOnCaseChange="0" splitOnNumerics="0" stemEnglishPossessive="1"/> <filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" preserveOriginal="1" splitOnCaseChange="1" splitOnNumerics="1" stemEnglishPossessive="1"/> <filter class="solr.PatternReplaceFilterFactory" pattern="^(\p{Punct}*)(.*?)(\p{Punct}*)$" replacement="$2"/> <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/> <filter class="solr.LowerCaseFilterFactory"/> <filter class="solr.ASCIIFoldingFilterFactory"/> <filter class="solr.EdgeNGramFilterFactory" minGramSize="1" maxGramSize="255"/> <filter class="solr.RemoveDuplicatesTokenFilterFactory"/> <filter class="solr.LimitTokenCountFilterFactory" maxTokenCount="10000" consumeAllTokens="false"/> <filter class="solr.LengthFilterFactory" min="1" max="255"/> </analyzer> Thanks nawab