You need to change the handler to /update/extract - the handler that accepts 
“rich documents”, whereas /update only handles the types it mentions in the 
error message.

        Erik

On Aug 20, 2014, at 9:34 AM, Croci Francesco Luigi (ID SWS) <fcr...@id.ethz.ch> 
wrote:

> Hallo,
> 
> I have solr 4.9.0 and I’m getting the above error if I try to index a pdf 
> document with the Solr Web-Interface.
> 
> Here is my schema and solrconfig. Do I miss something? :
> 
> <?xml version="1.0" encoding="UTF-8" ?>
> <schema name="simple" version="1.1">
>                <types>
>                               <fieldtype name="string" class="solr.StrField" 
> postingsFormat="SimpleText" />
>                               <fieldtype name="ignored" 
> class="solr.TextField" />
>                               <fieldtype name="text" class="solr.TextField" 
> postingsFormat="SimpleText">
>                                               <analyzer type="index">
>                                                               <tokenizer 
> class="solr.StandardTokenizerFactory"/>
>                                                               <filter 
> class="solr.LowerCaseFilterFactory" /> <!--Lowercases the letters in each 
> token. Leaves non-letter tokens alone.-->
>                                                               <filter 
> class="solr.TrimFilterFactory"/> <!--Trims whitespace at either end of a 
> token. -->
>                                                               <filter 
> class="solr.StopFilterFactory" words="stopwords.txt" ignoreCase="true"/> 
> <!--Discards common words.  -->
>                                                               <filter 
> class="solr.RemoveDuplicatesTokenFilterFactory"/>
>                                               </analyzer>
>                                               <analyzer type="query">
>                                                               <tokenizer 
> class="solr.StandardTokenizerFactory"/>
>                                                               <filter 
> class="solr.StopFilterFactory" words="stopwords.txt" ignoreCase="true"/>
>                                                               <filter 
> class="solr.LowerCaseFilterFactory" />
>                                                               <filter 
> class="solr.TrimFilterFactory"/>
>                                                               <filter 
> class="solr.RemoveDuplicatesTokenFilterFactory"/>
>                                               </analyzer>
>                               </fieldtype>
>                </types>
> 
>                <fields>
>                               <field name="signatureField" type="string" 
> indexed="true" stored="true" multiValued="false" />
>                               <dynamicField name="ignored_*" type="ignored" 
> multiValued="true" indexed="false" stored="false" />
>                               <field name="id" type="string" indexed="true" 
> stored="true" multiValued="false" />
>                               <field name="fullText" type="text" 
> indexed="true" multiValued="true" />
>                </fields>
> 
>                <defaultSearchField>fullText</defaultSearchField>
> 
>                <solrQueryParser defaultOperator="OR" />
>                <uniqueKey>id</uniqueKey>
> </schema>
> 
> 
> 
> <?xml version="1.0" encoding="UTF-8" ?>
> <config>
>                <luceneMatchVersion>LUCENE_45</luceneMatchVersion>
>                <directoryFactory name='DirectoryFactory' 
> class='solr.MMapDirectoryFactory' />
> 
>                <codecFactory name="CodecFactory" 
> class="solr.SchemaCodecFactory" />
> 
>                <!-- <lib dir='${solr.core.instanceDir}/lib' /> -->
>                <lib dir="${solr.core.instanceDir}/dist/" 
> regex="solr-cell-\d.*\.jar" />
>                <lib dir="${solr.core.instanceDir}/contrib/extraction/lib" 
> regex=".*\.jar" />
>                <!-- <lib dir="${solr.core.instanceDir}/dist/" 
> regex="solr-langid-.*\.jar" />
>                <lib dir="${solr.core.instanceDir}/contrib/langid/lib/" />-->
> 
>                <requestHandler name="standard" 
> class="solr.StandardRequestHandler" default="true" />
> 
>                <requestHandler name="/admin/" 
> class="org.apache.solr.handler.admin.AdminHandlers" />
> 
>                <requestHandler name="/admin/luke" 
> class="org.apache.solr.handler.admin.LukeRequestHandler" />
> 
>                <requestHandler name="/update" 
> class="solr.UpdateRequestHandler">
>                               <lst name="defaults">
>                                               <str 
> name="update.chain">deduplication</str>
>                               </lst>
>                </requestHandler>
> 
>                <requestHandler name="/update/extract" 
> class="solr.extraction.ExtractingRequestHandler">
>                               <lst name="defaults">
>                                               <str 
> name="captureAttr">true</str>
>                                               <str 
> name="lowernames">false</str>
>                                               <str 
> name="overwrite">false</str>
>                                               <str 
> name="literalsOverride">true</str>
>                                               <str 
> name="uprefix">ignored_</str>
>                                               <str name="fmap.a">link</str>
>                                               <str 
> name="fmap.content">fullText</str>
>                                               <!-- the configuration here 
> could be useful for tests -->
>                                               <str 
> name="update.chain">deduplication</str>
>                               </lst>
>                </requestHandler>
> 
>                <updateRequestProcessorChain name="deduplication">
>                               <processor 
> class="org.apache.solr.update.processor.SignatureUpdateProcessorFactory">
>                                               <bool 
> name="overwriteDupes">false</bool>
>                                               <str 
> name="signatureField">signatureField</str>
>                                               <bool name="enabled">true</bool>
>                                               <str name="fields">content</str>
>                                               <str name="minTokenLen">10</str>
>                                               <str name="quantRate">.2</str>
>                                               <str 
> name="signatureClass">solr.update.processor.TextProfileSignature</str>
>                               </processor>
>                               <processor 
> class="solr.LogUpdateProcessorFactory" />
>                               <processor 
> class="solr.RunUpdateProcessorFactory" />
>                </updateRequestProcessorChain>
> 
>                <requestHandler name="/selectAdmin" class="solr.SearchHandler">
>                               <lst name="defaults">
> 
>                   </lst>
>                </requestHandler>
> 
>                <requestHandler name="/select" class="solr.SearchHandler">
>                               <str name="echoParams">explicit</str>
>                               <int name="rows">10</int>
>                </requestHandler>
> 
>                <lockType>none</lockType>
> 
>                <admin>
>                               <defaultQuery>*:*</defaultQuery>
>                </admin>
> 
> </config>

Reply via email to