You need to change the handler to /update/extract - the handler that accepts “rich documents”, whereas /update only handles the types it mentions in the error message.
Erik On Aug 20, 2014, at 9:34 AM, Croci Francesco Luigi (ID SWS) <fcr...@id.ethz.ch> wrote: > Hallo, > > I have solr 4.9.0 and I’m getting the above error if I try to index a pdf > document with the Solr Web-Interface. > > Here is my schema and solrconfig. Do I miss something? : > > <?xml version="1.0" encoding="UTF-8" ?> > <schema name="simple" version="1.1"> > <types> > <fieldtype name="string" class="solr.StrField" > postingsFormat="SimpleText" /> > <fieldtype name="ignored" > class="solr.TextField" /> > <fieldtype name="text" class="solr.TextField" > postingsFormat="SimpleText"> > <analyzer type="index"> > <tokenizer > class="solr.StandardTokenizerFactory"/> > <filter > class="solr.LowerCaseFilterFactory" /> <!--Lowercases the letters in each > token. Leaves non-letter tokens alone.--> > <filter > class="solr.TrimFilterFactory"/> <!--Trims whitespace at either end of a > token. --> > <filter > class="solr.StopFilterFactory" words="stopwords.txt" ignoreCase="true"/> > <!--Discards common words. --> > <filter > class="solr.RemoveDuplicatesTokenFilterFactory"/> > </analyzer> > <analyzer type="query"> > <tokenizer > class="solr.StandardTokenizerFactory"/> > <filter > class="solr.StopFilterFactory" words="stopwords.txt" ignoreCase="true"/> > <filter > class="solr.LowerCaseFilterFactory" /> > <filter > class="solr.TrimFilterFactory"/> > <filter > class="solr.RemoveDuplicatesTokenFilterFactory"/> > </analyzer> > </fieldtype> > </types> > > <fields> > <field name="signatureField" type="string" > indexed="true" stored="true" multiValued="false" /> > <dynamicField name="ignored_*" type="ignored" > multiValued="true" indexed="false" stored="false" /> > <field name="id" type="string" indexed="true" > stored="true" multiValued="false" /> > <field name="fullText" type="text" > indexed="true" multiValued="true" /> > </fields> > > <defaultSearchField>fullText</defaultSearchField> > > <solrQueryParser defaultOperator="OR" /> > <uniqueKey>id</uniqueKey> > </schema> > > > > <?xml version="1.0" encoding="UTF-8" ?> > <config> > <luceneMatchVersion>LUCENE_45</luceneMatchVersion> > <directoryFactory name='DirectoryFactory' > class='solr.MMapDirectoryFactory' /> > > <codecFactory name="CodecFactory" > class="solr.SchemaCodecFactory" /> > > <!-- <lib dir='${solr.core.instanceDir}/lib' /> --> > <lib dir="${solr.core.instanceDir}/dist/" > regex="solr-cell-\d.*\.jar" /> > <lib dir="${solr.core.instanceDir}/contrib/extraction/lib" > regex=".*\.jar" /> > <!-- <lib dir="${solr.core.instanceDir}/dist/" > regex="solr-langid-.*\.jar" /> > <lib dir="${solr.core.instanceDir}/contrib/langid/lib/" />--> > > <requestHandler name="standard" > class="solr.StandardRequestHandler" default="true" /> > > <requestHandler name="/admin/" > class="org.apache.solr.handler.admin.AdminHandlers" /> > > <requestHandler name="/admin/luke" > class="org.apache.solr.handler.admin.LukeRequestHandler" /> > > <requestHandler name="/update" > class="solr.UpdateRequestHandler"> > <lst name="defaults"> > <str > name="update.chain">deduplication</str> > </lst> > </requestHandler> > > <requestHandler name="/update/extract" > class="solr.extraction.ExtractingRequestHandler"> > <lst name="defaults"> > <str > name="captureAttr">true</str> > <str > name="lowernames">false</str> > <str > name="overwrite">false</str> > <str > name="literalsOverride">true</str> > <str > name="uprefix">ignored_</str> > <str name="fmap.a">link</str> > <str > name="fmap.content">fullText</str> > <!-- the configuration here > could be useful for tests --> > <str > name="update.chain">deduplication</str> > </lst> > </requestHandler> > > <updateRequestProcessorChain name="deduplication"> > <processor > class="org.apache.solr.update.processor.SignatureUpdateProcessorFactory"> > <bool > name="overwriteDupes">false</bool> > <str > name="signatureField">signatureField</str> > <bool name="enabled">true</bool> > <str name="fields">content</str> > <str name="minTokenLen">10</str> > <str name="quantRate">.2</str> > <str > name="signatureClass">solr.update.processor.TextProfileSignature</str> > </processor> > <processor > class="solr.LogUpdateProcessorFactory" /> > <processor > class="solr.RunUpdateProcessorFactory" /> > </updateRequestProcessorChain> > > <requestHandler name="/selectAdmin" class="solr.SearchHandler"> > <lst name="defaults"> > > </lst> > </requestHandler> > > <requestHandler name="/select" class="solr.SearchHandler"> > <str name="echoParams">explicit</str> > <int name="rows">10</int> > </requestHandler> > > <lockType>none</lockType> > > <admin> > <defaultQuery>*:*</defaultQuery> > </admin> > > </config>