I searched a way to index only the content/text part of a PDF (without all the other fields Tika creates) and I found the "solution" with the "uprefix" = ignored_ and <dynamicField name="ignored_*" type="ignored" multiValued="true" indexed="false" stored="false" />.
The problem is, that uprefix works on fields that are not specified in the schema. In my schema I specified two fields (id and rmDocumentTitle) and this two fields are added to the content too (what I will avoid). How can I exclude this two fields to be added to the fullText? Here are my config files: schema.xml <?xml version="1.0" encoding="UTF-8" ?> <schema name="simple" version="1.1"> <types> <fieldtype name="string" class="solr.StrField" postingsFormat="SimpleText" /> <fieldtype name="ignored" class="solr.TextField" /> <fieldtype name="text" class="solr.TextField" postingsFormat="SimpleText"> <analyzer type="index"> <tokenizer class="solr.StandardTokenizerFactory"/> <!--<filter class="solr.ASCIIFoldingFilterFactory"/>--> <!--Converts alphabetic, numeric, and symbolic Unicode characters which are not in the first 127 ASCII characters into their ASCII equivalents, if one exists. --> <filter class="solr.LowerCaseFilterFactory" /> <!--Lowercases the letters in each token. Leaves non-letter tokens alone.--> <filter class="solr.TrimFilterFactory"/> <!--Trims whitespace at either end of a token. --> <filter class="solr.StopFilterFactory" words="stopwords.txt" ignoreCase="true"/> <!--Discards common words. --> <filter class="solr.PorterStemFilterFactory"/> <!--<filter class="solr.SnowballPorterFilterFactory" language="German2" /> --> <filter class="solr.RemoveDuplicatesTokenFilterFactory"/> </analyzer> <analyzer type="query"> <tokenizer class="solr.StandardTokenizerFactory"/> <filter class="solr.StopFilterFactory" words="stopwords.txt" ignoreCase="true"/> <filter class="solr.LowerCaseFilterFactory" /> <filter class="solr.TrimFilterFactory"/> <filter class="solr.PorterStemFilterFactory"/> <!--<filter class="solr.SnowballPorterFilterFactory" language="German2" /> --> <filter class="solr.RemoveDuplicatesTokenFilterFactory"/> </analyzer> </fieldtype> </types> <fields> <field name="signatureField" type="string" indexed="true" stored="true" multiValued="false" /> <dynamicField name="ignored_*" type="ignored" multiValued="true" indexed="false" stored="false" /> <field name="id" type="string" indexed="true" stored="true" multiValued="false" /> <field name="rmDocumentTitle" type="string" indexed="true" stored="true" multiValued="true"/> <field name="fullText" indexed="true" type="text" multiValued="true" /> </fields> <defaultSearchField>fullText</defaultSearchField> <solrQueryParser defaultOperator="OR" /> <uniqueKey>id</uniqueKey> </schema> solrconfig.xml <?xml version="1.0" encoding="UTF-8" ?> <config> ... <requestHandler name="/update/extract" class="solr.extraction.ExtractingRequestHandler"> <lst name="defaults"> <str name="captureAttr">true</str> <str name="lowernames">false</str> <str name="overwrite">false</str> <str name="captureAttr">true</str> <str name="literalsOverride">true</str> <str name="uprefix">ignored_</str> <str name="fmap.a">link</str> <str name="fmap.content">fullText</str> <!-- the configuration here could be useful for tests --> <str name="update.chain">deduplication</str> </lst> </requestHandler> <updateRequestProcessorChain name="deduplication"> <processor class="org.apache.solr.update.processor.SignatureUpdateProcessorFactory"> <bool name="overwriteDupes">false</bool> <str name="signatureField">signatureField</str> <bool name="enabled">true</bool> <str name="fields">content</str> <str name="minTokenLen">10</str> <str name="quantRate">.2</str> <str name="signatureClass">solr.update.processor.TextProfileSignature</str> </processor> <processor class="solr.LogUpdateProcessorFactory" /> <processor class="solr.RunUpdateProcessorFactory" /> </updateRequestProcessorChain> <requestHandler name="/admin/" class="org.apache.solr.handler.admin.AdminHandlers" /> <lockType>none</lockType> <admin> <defaultQuery>*:*</defaultQuery> </admin> </config> Thank you for any help. Francesco