I've defined my update processors as:

<updateRequestProcessorChain name="langid">
   <processor
class="org.apache.solr.update.processor.LangDetectLanguageIdentifierUpdateProcessorFactory">
      <lst name="invariants">
        <str name="langid.fl">content</str>
        <str name="langid.whitelist">en,tr</str>
        <str name="langid.langField">language_code</str>
        <str name="langid.fallback">other</str>
        <bool name="langid.map">true</bool>
        <bool name="langid.map.keepOrig">true</bool>
      </lst>
    </processor>
   <processor class="solr.LogUpdateProcessorFactory" />
   <processor class="solr.RunUpdateProcessorFactory" />
 </updateRequestProcessorChain>

 <updateRequestProcessorChain name="dedupe">
   <processor class="solr.processor.SignatureUpdateProcessorFactory">
     <bool name="enabled">true</bool>
     <str name="signatureField">signature</str>
     <bool name="overwriteDupes">false</bool>
     <str name="fields">content</str>
     <str name="minTokenLen">3</str>
     <str
name="signatureClass">org.apache.solr.update.processor.TextProfileSignature</str>
   </processor>
   <processor class="solr.LogUpdateProcessorFactory" />
   <processor class="solr.RunUpdateProcessorFactory" />
 </updateRequestProcessorChain>

 <updateRequestProcessorChain name="ignore-commit-from-client"
default="true">
   <processor class="solr.IgnoreCommitOptimizeUpdateProcessorFactory">
     <int name="statusCode">200</int>
   </processor>
   <processor class="solr.LogUpdateProcessorFactory" />
   <processor class="solr.DistributedUpdateProcessorFactory" />
   <processor class="solr.RunUpdateProcessorFactory" />
 </updateRequestProcessorChain>

My /update/extract request handler is as follows:

<requestHandler name="/update/extract"
                startup="lazy"
                class="solr.extraction.ExtractingRequestHandler" >
  <lst name="defaults">
    <str name="lowernames">true</str>
    <str name="captureAttr">true</str>
    <str name="fmap.meta">ignored_</str>
    <str name="fmap.content">content</str>
    <str name="fmap.div">ignored_</str>
    <str name="fmap.a">ignored_</str>
  </lst>
  <lst name="invariants">
    <str name="update.chain">dedupe</str>
    <str name="update.chain">langid</str>
    <str name="update.chain">ignore-commit-from-client</str>
 </lst>
</requestHandler>

dedupe chain works nd signature field is populated but langid processor is
not triggered at this combination. When I change their places:

<requestHandler name="/update/extract"
                startup="lazy"
                class="solr.extraction.ExtractingRequestHandler" >
  <lst name="defaults">
    <str name="lowernames">true</str>
    <str name="captureAttr">true</str>
    <str name="fmap.meta">ignored_</str>
    <str name="fmap.content">content</str>
    <str name="fmap.div">ignored_</str>
    <str name="fmap.a">ignored_</str>
  </lst>
  <lst name="invariants">
    <str name="update.chain">langid</str>
    <str name="update.chain">dedupe</str>
    <str name="update.chain">ignore-commit-from-client</str>
 </lst>
</requestHandler>

langid works but dedup is not activated (signature field is disappears).

I use Solr 6.3. How can I solve this problem?

Kind Regards,
Furkan KAMACI

Reply via email to