Hello all,

I’m working on a project with Solr.  I had 1.4.1 working OK using 
ExtractingRequestHandler except that it was crashing on some PDFs.  I noticed 
that Tika bundled with 1.4.1 was 0.4, which was kind of old.  I decided to try 
updating to 0.7 as per the directions here: 
http://wiki.apache.org/solr/ExtractingRequestHandler  but it was giving me 
errors (I forget what they were specifically).

Then I tried downloading Solr 3.1 from the source repository, which I noticed 
came with Tika 0.7.  I figured this would be an easier route to get working.  
Now I’m testing with 3.1 and 0.7 and I’m noticing my documents are going into 
Solr OK, but they all have blank content (no document text stored in Solr).  I 
did see that the default “text” field is not stored. Changing that to 
stored=true didn’t help.  Changing to 
fmap.content=attr_content&uprefix=attr_content didn’t help either.  I have 
attached all relevant info here.  Please let me know if someone sees something 
I don’t (it’s entirely possible as I’m relatively new to Solr).

Schema.xml:
<?xml version="1.0" encoding="UTF-8" ?>
<schema name="example" version="1.3">
  <types>
    <fieldType name="string" class="solr.StrField" sortMissingLast="true" 
omitNorms="true"/>
    <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true" 
omitNorms="true"/>
    <fieldtype name="binary" class="solr.BinaryField"/>
    <fieldType name="int" class="solr.TrieIntField" precisionStep="0" 
omitNorms="true" positionIncrementGap="0"/>
    <fieldType name="float" class="solr.TrieFloatField" precisionStep="0" 
omitNorms="true" positionIncrementGap="0"/>
    <fieldType name="long" class="solr.TrieLongField" precisionStep="0" 
omitNorms="true" positionIncrementGap="0"/>
    <fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" 
omitNorms="true" positionIncrementGap="0"/>
    <fieldType name="tint" class="solr.TrieIntField" precisionStep="8" 
omitNorms="true" positionIncrementGap="0"/>
    <fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8" 
omitNorms="true" positionIncrementGap="0"/>
    <fieldType name="tlong" class="solr.TrieLongField" precisionStep="8" 
omitNorms="true" positionIncrementGap="0"/>
    <fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8" 
omitNorms="true" positionIncrementGap="0"/>
    <fieldType name="date" class="solr.TrieDateField" omitNorms="true" 
precisionStep="0" positionIncrementGap="0"/>
    <fieldType name="tdate" class="solr.TrieDateField" omitNorms="true" 
precisionStep="6" positionIncrementGap="0"/>
    <fieldType name="pint" class="solr.IntField" omitNorms="true"/>
    <fieldType name="plong" class="solr.LongField" omitNorms="true"/>
    <fieldType name="pfloat" class="solr.FloatField" omitNorms="true"/>
    <fieldType name="pdouble" class="solr.DoubleField" omitNorms="true"/>
    <fieldType name="pdate" class="solr.DateField" sortMissingLast="true" 
omitNorms="true"/>
    <fieldType name="sint" class="solr.SortableIntField" sortMissingLast="true" 
omitNorms="true"/>
    <fieldType name="slong" class="solr.SortableLongField" 
sortMissingLast="true" omitNorms="true"/>
    <fieldType name="sfloat" class="solr.SortableFloatField" 
sortMissingLast="true" omitNorms="true"/>
    <fieldType name="sdouble" class="solr.SortableDoubleField" 
sortMissingLast="true" omitNorms="true"/>
    <fieldType name="random" class="solr.RandomSortField" indexed="true" />
    <fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
      <analyzer>
        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
      </analyzer>
    </fieldType>
    <fieldType name="text" class="solr.TextField" positionIncrementGap="100" 
autoGeneratePhraseQueries="true">
      <analyzer type="index">
        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
        <filter class="solr.StopFilterFactory"
                ignoreCase="true"
                words="stopwords.txt"
                enablePositionIncrements="true"
                />
        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" 
generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" 
splitOnCaseChange="1"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.KeywordMarkerFilterFactory" 
protected="protwords.txt"/>
        <filter class="solr.PorterStemFilterFactory"/>
      </analyzer>
      <analyzer type="query">
        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" 
ignoreCase="true" expand="true"/>
        <filter class="solr.StopFilterFactory"
                ignoreCase="true"
                words="stopwords.txt"
                enablePositionIncrements="true"
                />
        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" 
generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" 
splitOnCaseChange="1"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.KeywordMarkerFilterFactory" 
protected="protwords.txt"/>
        <filter class="solr.PorterStemFilterFactory"/>
      </analyzer>
    </fieldType>
    <fieldType name="textTight" class="solr.TextField" 
positionIncrementGap="100" >
      <analyzer>
        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" 
ignoreCase="true" expand="false"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true" 
words="stopwords.txt"/>
        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" 
generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.KeywordMarkerFilterFactory" 
protected="protwords.txt"/>
        <filter class="solr.PorterStemFilterFactory"/>
        <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
      </analyzer>
    </fieldType>
    <fieldType name="textgen" class="solr.TextField" positionIncrementGap="100">
      <analyzer type="index">
        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true" 
words="stopwords.txt" enablePositionIncrements="true" />
        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" 
generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" 
splitOnCaseChange="0"/>
        <filter class="solr.LowerCaseFilterFactory"/>
      </analyzer>
      <analyzer type="query">
        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" 
ignoreCase="true" expand="true"/>
        <filter class="solr.StopFilterFactory"
                ignoreCase="true"
                words="stopwords.txt"
                enablePositionIncrements="true"
                />
        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" 
generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" 
splitOnCaseChange="0"/>
        <filter class="solr.LowerCaseFilterFactory"/>
      </analyzer>
    </fieldType>
    <fieldType name="text_rev" class="solr.TextField" 
positionIncrementGap="100">
      <analyzer type="index">
        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true" 
words="stopwords.txt" enablePositionIncrements="true" />
        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" 
generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" 
splitOnCaseChange="0"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.ReversedWildcardFilterFactory" withOriginal="true"
           maxPosAsterisk="3" maxPosQuestion="2" maxFractionAsterisk="0.33"/>
      </analyzer>
      <analyzer type="query">
        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" 
ignoreCase="true" expand="true"/>
        <filter class="solr.StopFilterFactory"
                ignoreCase="true"
                words="stopwords.txt"
                enablePositionIncrements="true"
                />
        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" 
generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" 
splitOnCaseChange="0"/>
        <filter class="solr.LowerCaseFilterFactory"/>
      </analyzer>
    </fieldType>
    <fieldType name="alphaOnlySort" class="solr.TextField" 
sortMissingLast="true" omitNorms="true">
      <analyzer>
        <tokenizer class="solr.KeywordTokenizerFactory"/>
        <filter class="solr.LowerCaseFilterFactory" />
        <filter class="solr.TrimFilterFactory" />
        <filter class="solr.PatternReplaceFilterFactory"
                pattern="([^a-z])" replacement="" replace="all"
        />
      </analyzer>
    </fieldType>

    <fieldtype name="phonetic" stored="false" indexed="true" 
class="solr.TextField" >
      <analyzer>
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <filter class="solr.DoubleMetaphoneFilterFactory" inject="false"/>
      </analyzer>
    </fieldtype>

    <fieldtype name="payloads" stored="false" indexed="true" 
class="solr.TextField" >
      <analyzer>
        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
        <filter class="solr.DelimitedPayloadTokenFilterFactory" 
encoder="float"/>
      </analyzer>
    </fieldtype>
    <fieldType name="lowercase" class="solr.TextField" 
positionIncrementGap="100">
      <analyzer>
        <tokenizer class="solr.KeywordTokenizerFactory"/>
        <filter class="solr.LowerCaseFilterFactory" />
      </analyzer>
    </fieldType>
    <fieldtype name="ignored" stored="false" indexed="false" multiValued="true" 
class="solr.StrField" />
    <fieldType name="location" class="solr.PointType" dimension="2" 
subFieldSuffix="_d"/>
    <fieldtype name="geohash" class="solr.GeoHashField"/>
    <fieldType name="tile" class="solr.SpatialTileField" start="4" end="15" 
subFieldSuffix="_tiled"/>

 </types>


 <fields>
   <field name="id" type="string" indexed="true" stored="true" required="true" 
/>
   <field name="sku" type="textTight" indexed="true" stored="true" 
omitNorms="true"/>
   <field name="name" type="textgen" indexed="true" stored="true"/>
   <field name="alphaNameSort" type="alphaOnlySort" indexed="true" 
stored="false"/>
   <field name="manu" type="textgen" indexed="true" stored="true" 
omitNorms="true"/>
   <field name="cat" type="text_ws" indexed="true" stored="true" 
multiValued="true" omitNorms="true" />
   <field name="features" type="text" indexed="true" stored="true" 
multiValued="true"/>
   <field name="includes" type="text" indexed="true" stored="true" 
termVectors="true" termPositions="true" termOffsets="true" />

   <field name="weight" type="float" indexed="true" stored="true"/>
   <field name="price"  type="float" indexed="true" stored="true"/>
   <field name="popularity" type="int" indexed="true" stored="true" />
   <field name="inStock" type="boolean" indexed="true" stored="true" />

   <field name="store" type="location" indexed="true" stored="true"/>
   <field name="store_hash" type="geohash" indexed="true" stored="false"/>
   <field name="store_tiles" type="tile" indexed="true" stored="false"/>
   <field name="title" type="text" indexed="true" stored="true" 
multiValued="true"/>
   <field name="subject" type="text" indexed="true" stored="true"/>
   <field name="description" type="text" indexed="true" stored="true"/>
   <field name="comments" type="text" indexed="true" stored="true"/>
   <field name="author" type="textgen" indexed="true" stored="true"/>
   <field name="keywords" type="textgen" indexed="true" stored="true"/>
   <field name="category" type="textgen" indexed="true" stored="true"/>
   <field name="content_type" type="string" indexed="true" stored="true" 
multiValued="true"/>
   <field name="last_modified" type="date" indexed="true" stored="true"/>
   <field name="links" type="string" indexed="true" stored="true" 
multiValued="true"/>
   <field name="text" type="text" indexed="true" stored="false" 
multiValued="true"/>
   <field name="text_rev" type="text_rev" indexed="true" stored="false" 
multiValued="true"/>
   <field name="manu_exact" type="string" indexed="true" stored="false"/>

   <field name="payloads" type="payloads" indexed="true" stored="true"/>
   <dynamicField name="*_i"  type="int"    indexed="true"  stored="true"/>
   <dynamicField name="*_s"  type="string"  indexed="true"  stored="true"/>
   <dynamicField name="*_l"  type="long"   indexed="true"  stored="true"/>
   <dynamicField name="*_t"  type="text"    indexed="true"  stored="true"/>
   <dynamicField name="*_b"  type="boolean" indexed="true"  stored="true"/>
   <dynamicField name="*_f"  type="float"  indexed="true"  stored="true"/>
   <dynamicField name="*_d"  type="double" indexed="true"  stored="true"/>

   <dynamicField name="*_tiled"  type="double" indexed="true"  stored="false"/>

   <dynamicField name="*_dt" type="date"    indexed="true"  stored="true"/>
   <dynamicField name="*_p"  type="location" indexed="true" stored="true"/>

   <dynamicField name="*_ti" type="tint"    indexed="true"  stored="true"/>
   <dynamicField name="*_tl" type="tlong"   indexed="true"  stored="true"/>
   <dynamicField name="*_tf" type="tfloat"  indexed="true"  stored="true"/>
   <dynamicField name="*_td" type="tdouble" indexed="true"  stored="true"/>
   <dynamicField name="*_tdt" type="tdate"  indexed="true"  stored="true"/>

   <dynamicField name="*_pi"  type="pint"    indexed="true"  stored="true"/>

   <dynamicField name="ignored_*" type="ignored" multiValued="true"/>
   <dynamicField name="attr_*" type="textgen" indexed="true" stored="true" 
multiValued="true"/>

   <dynamicField name="random_*" type="random" />
 </fields>
 <uniqueKey>id</uniqueKey>

 <defaultSearchField>text</defaultSearchField>

 <solrQueryParser defaultOperator="OR"/>

   <copyField source="cat" dest="text"/>
   <copyField source="store" dest="store_hash"/>
   <copyField source="store" dest="store_tiles"/>
   <copyField source="name" dest="text"/>
   <copyField source="manu" dest="text"/>
   <copyField source="features" dest="text"/>
   <copyField source="includes" dest="text"/>
   <copyField source="manu" dest="manu_exact"/>

</schema>

Solrconfig.xml:
<?xml version="1.0" encoding="UTF-8" ?>
<config>
  
<abortOnConfigurationError>${solr.abortOnConfigurationError:true}</abortOnConfigurationError>

  <luceneMatchVersion>LUCENE_31</luceneMatchVersion>

  <lib dir="./contrib/extraction/lib" />
  <lib dir="./lib"/>
  <lib dir="./contrib/clustering/lib" />
  <dataDir>C:/Program Files/Apache Software Foundation/solr-3.1/data</dataDir>
  <directoryFactory name="DirectoryFactory" 
class="${solr.directoryFactory:solr.StandardDirectoryFactory}"/>
  <indexDefaults>
    <useCompoundFile>false</useCompoundFile>

    <mergeFactor>10</mergeFactor>
    <ramBufferSizeMB>32</ramBufferSizeMB>
    <maxFieldLength>10000</maxFieldLength>
    <writeLockTimeout>1000</writeLockTimeout>
    <commitLockTimeout>10000</commitLockTimeout>
    <lockType>native</lockType>

  </indexDefaults>

  <mainIndex>

    <useCompoundFile>false</useCompoundFile>
    <ramBufferSizeMB>32</ramBufferSizeMB>
    <mergeFactor>10</mergeFactor>

    <unlockOnStartup>false</unlockOnStartup>

    <reopenReaders>true</reopenReaders>


    <deletionPolicy class="solr.SolrDeletionPolicy">
      <str name="maxCommitsToKeep">1</str>
      <str name="maxOptimizedCommitsToKeep">0</str>

    </deletionPolicy>

     <infoStream file="INFOSTREAM.txt">false</infoStream>

  </mainIndex>

  <jmx />

   <updateHandler class="solr.DirectUpdateHandler2">

  </updateHandler>



  <query>
    <maxBooleanClauses>1024</maxBooleanClauses>
    <filterCache
      class="solr.FastLRUCache"
      size="512"
      initialSize="512"
      autowarmCount="0"/>
    <queryResultCache
      class="solr.LRUCache"
      size="512"
      initialSize="512"
      autowarmCount="0"/>
    <documentCache
      class="solr.LRUCache"
      size="512"
      initialSize="512"
      autowarmCount="0"/>
    <enableLazyFieldLoading>true</enableLazyFieldLoading>
    <queryResultWindowSize>20</queryResultWindowSize>
    <queryResultMaxDocsCached>200</queryResultMaxDocsCached>
    <listener event="newSearcher" class="solr.QuerySenderListener">
      <arr name="queries">
      </arr>
    </listener>
    <listener event="firstSearcher" class="solr.QuerySenderListener">
      <arr name="queries">
        <lst> <str name="q">solr rocks</str><str name="start">0</str><str 
name="rows">10</str></lst>
        <lst><str name="q">static firstSearcher warming query from 
solrconfig.xml</str></lst>
      </arr>
    </listener>
    <useColdSearcher>false</useColdSearcher>
    <maxWarmingSearchers>2</maxWarmingSearchers>

  </query>
  <requestDispatcher handleSelect="true" >
    <requestParsers enableRemoteStreaming="false" 
multipartUploadLimitInKB="2048000" />
    <httpCaching lastModifiedFrom="openTime"
                 etagSeed="Solr">
    </httpCaching>
  </requestDispatcher>
  <requestHandler name="standard" class="solr.SearchHandler" default="true">
    <!-- default values for query parameters -->
     <lst name="defaults">
       <str name="echoParams">explicit</str>
       <!--
       <int name="rows">10</int>
       <str name="fl">*</str>
       <str name="version">2.1</str>
        -->
     </lst>
  </requestHandler>
  <requestHandler name="/browse" class="solr.SearchHandler">
     <lst name="defaults">
       <str name="wt">velocity</str>

       <str name="v.template">browse</str>
       <str name="v.layout">layout</str>
       <str name="title">Solritas</str>

       <str name="defType">dismax</str>
       <str name="q.alt">*:*</str>
       <str name="rows">10</str>
       <str name="fl">*,score</str>

       <str name="facet">on</str>
       <str name="facet.field">cat</str>
       <str name="facet.field">manu_exact</str>
       <str name="facet.mincount">1</str>
       <str name="qf">
          text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
       </str>

       <str name="hl">on</str>
       <str name="hl.fl">text features name</str>
       <str name="f.name.hl.fragsize">0</str>
       <str name="f.name.hl.alternateField">name</str>
     </lst>
  </requestHandler>
  <requestHandler name="dismax" class="solr.SearchHandler" >
    <lst name="defaults">
     <str name="defType">dismax</str>
     <str name="echoParams">explicit</str>
     <float name="tie">0.01</float>
     <str name="qf">
        text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
     </str>
     <str name="pf">
        text^0.2 features^1.1 name^1.5 manu^1.4 manu_exact^1.9
     </str>
     <str name="bf">
        popularity^0.5 recip(price,1,1000,1000)^0.3
     </str>
     <str name="fl">
        id,name,price,score
     </str>
     <str name="mm">
        2&lt;-1 5&lt;-2 6&lt;90%
     </str>
     <int name="ps">100</int>
     <str name="q.alt">*:*</str>
     <!-- example highlighter config, enable per-query with hl=true -->
     <str name="hl.fl">text features name</str>
     <!-- for this field, we want no fragmenting, just highlighting -->
     <str name="f.name.hl.fragsize">0</str>
     <!-- instructs Solr to return the field itself if no query terms are
          found -->
     <str name="f.name.hl.alternateField">name</str>
     <str name="f.text.hl.fragmenter">regex</str> <!-- defined below -->
    </lst>
  </requestHandler>
  <requestHandler name="partitioned" class="solr.SearchHandler" >
    <lst name="defaults">
     <str name="defType">dismax</str>
     <str name="echoParams">explicit</str>
     <str name="qf">text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0</str>
     <str name="mm">2&lt;-1 5&lt;-2 6&lt;90%</str>
     <!-- This is an example of using Date Math to specify a constantly
          moving date range in a config...
       -->
     <str name="bq">incubationdate_dt:[* TO NOW/DAY-1MONTH]^2.2</str>
    </lst>
    <lst name="appends">
      <str name="fq">inStock:true</str>
    </lst>
    <lst name="invariants">
      <str name="facet.field">cat</str>
      <str name="facet.field">manu_exact</str>
      <str name="facet.query">price:[* TO 500]</str>
      <str name="facet.query">price:[500 TO *]</str>
    </lst>
  </requestHandler>
  <searchComponent name="spellcheck" class="solr.SpellCheckComponent">

    <str name="queryAnalyzerFieldType">textSpell</str>

    <lst name="spellchecker">
      <str name="name">default</str>
      <str name="field">name</str>
      <str name="spellcheckIndexDir">./spellchecker</str>
    </lst>
  </searchComponent>
  <requestHandler name="/spell" class="solr.SearchHandler" lazy="true">
    <lst name="defaults">
      <str name="spellcheck.onlyMorePopular">false</str>
      <str name="spellcheck.extendedResults">false</str>
      <str name="spellcheck.count">1</str>
    </lst>
    <arr name="last-components">
      <str>spellcheck</str>
    </arr>
  </requestHandler>

  <searchComponent name="tvComponent" 
class="org.apache.solr.handler.component.TermVectorComponent"/>
  <requestHandler name="tvrh" 
class="org.apache.solr.handler.component.SearchHandler">
    <lst name="defaults">
      <bool name="tv">true</bool>
    </lst>
    <arr name="last-components">
      <str>tvComponent</str>
    </arr>
  </requestHandler>
  <searchComponent
    name="clusteringComponent"
    enable="${solr.clustering.enabled:false}"
    class="org.apache.solr.handler.clustering.ClusteringComponent" >
    <lst name="engine">
      <!-- The name, only one can be named "default" -->
      <str name="name">default</str>
      <str 
name="carrot.algorithm">org.carrot2.clustering.lingo.LingoClusteringAlgorithm</str>
      <str name="LingoClusteringAlgorithm.desiredClusterCountBase">20</str>
    </lst>
    <lst name="engine">
      <str name="name">stc</str>
      <str 
name="carrot.algorithm">org.carrot2.clustering.stc.STCClusteringAlgorithm</str>
    </lst>
  </searchComponent>
  <requestHandler name="/clustering"
                  enable="${solr.clustering.enabled:false}"
                  class="solr.SearchHandler">
     <lst name="defaults">
       <bool name="clustering">true</bool>
       <str name="clustering.engine">default</str>
       <bool name="clustering.results">true</bool>
       <!-- The title field -->
       <str name="carrot.title">name</str>
       <str name="carrot.url">id</str>
       <!-- The field to cluster on -->
       <str name="carrot.snippet">features</str>
       <!-- produce summaries -->
       <bool name="carrot.produceSummary">true</bool>
       <!-- the maximum number of labels per cluster -->
       <!--<int name="carrot.numDescriptions">5</int>-->
       <!-- produce sub clusters -->
       <bool name="carrot.outputSubClusters">false</bool>
    </lst>
    <arr name="last-components">
      <str>clusteringComponent</str>
    </arr>
  </requestHandler>

  <requestHandler name="/update/extract" 
class="org.apache.solr.handler.extraction.ExtractingRequestHandler" 
startup="lazy">
    <lst name="defaults">
      <str name="fmap.content">text</str>
      <str name="lowernames">true</str>
      <str name="uprefix">ignored_</str>

      <!-- capture link hrefs but ignore div attributes -->
      <str name="captureAttr">true</str>
      <str name="fmap.a">links</str>
      <str name="fmap.div">ignored_</str>
    </lst>
  </requestHandler>


  <searchComponent name="termsComponent" 
class="org.apache.solr.handler.component.TermsComponent"/>

  <requestHandler name="/terms" 
class="org.apache.solr.handler.component.SearchHandler">
     <lst name="defaults">
      <bool name="terms">true</bool>
    </lst>
    <arr name="components">
      <str>termsComponent</str>
    </arr>
  </requestHandler>
  <searchComponent name="elevator" class="solr.QueryElevationComponent" >
    <!-- pick a fieldType to analyze queries -->
    <str name="queryFieldType">string</str>
    <str name="config-file">elevate.xml</str>
  </searchComponent>

  <!-- a request handler utilizing the elevator component -->
  <requestHandler name="/elevate" class="solr.SearchHandler" startup="lazy">
    <lst name="defaults">
      <str name="echoParams">explicit</str>
    </lst>
    <arr name="last-components">
      <str>elevator</str>
    </arr>
  </requestHandler>
  <requestHandler name="/update" class="solr.XmlUpdateRequestHandler" />


  <requestHandler name="/update/javabin" 
class="solr.BinaryUpdateRequestHandler" />

  <requestHandler name="/analysis/document" 
class="solr.DocumentAnalysisRequestHandler" />
  <requestHandler name="/analysis/field" 
class="solr.FieldAnalysisRequestHandler" />
  <requestHandler name="/update/csv" class="solr.CSVRequestHandler" 
startup="lazy" />
  <requestHandler name="/admin/" 
class="org.apache.solr.handler.admin.AdminHandlers" />
  <requestHandler name="/admin/ping" class="PingRequestHandler">
    <lst name="defaults">
      <str name="qt">standard</str>
      <str name="q">solrpingquery</str>
      <str name="echoParams">all</str>
    </lst>
  </requestHandler>

  <requestHandler name="/debug/dump" class="solr.DumpRequestHandler" >
    <lst name="defaults">
     <str name="echoParams">explicit</str> <!-- for all params (including the 
default etc) use: 'all' -->
     <str name="echoHandler">true</str>
    </lst>
  </requestHandler>
  <searchComponent class="solr.HighlightComponent" name="highlight">
  <highlighting>
   <fragmenter name="gap" class="org.apache.solr.highlight.GapFragmenter" 
default="true">
    <lst name="defaults">
     <int name="hl.fragsize">100</int>
    </lst>
   </fragmenter>

   <fragmenter name="regex" class="org.apache.solr.highlight.RegexFragmenter">
    <lst name="defaults">
      <!-- slightly smaller fragsizes work better because of slop -->
      <int name="hl.fragsize">70</int>
      <!-- allow 50% slop on fragment sizes -->
      <float name="hl.regex.slop">0.5</float>
      <!-- a basic sentence pattern -->
      <str name="hl.regex.pattern">[-\w ,/\n\&quot;&apos;]{20,200}</str>
    </lst>
   </fragmenter>

   <formatter name="html" class="org.apache.solr.highlight.HtmlFormatter" 
default="true">
    <lst name="defaults">
     <str name="hl.simple.pre"><![CDATA[<em>]]></str>
     <str name="hl.simple.post"><![CDATA[</em>]]></str>
    </lst>
   </formatter>

   <fragListBuilder name="simple" 
class="org.apache.solr.highlight.SimpleFragListBuilder" default="true"/>

   <fragListBuilder name="single" 
class="org.apache.solr.highlight.SingleFragListBuilder"/>

   <fragmentsBuilder name="colored" 
class="org.apache.solr.highlight.MultiColoredScoreOrderFragmentsBuilder" 
default="true"/>
  </highlighting>
  </searchComponent>
  <queryResponseWriter name="xslt" class="solr.XSLTResponseWriter">
    <int name="xsltCacheLifetimeSeconds">5</int>
  </queryResponseWriter>
  <admin>
    <defaultQuery>solr</defaultQuery>
  </admin>

</config>

Test1.txt document:
Asdf
Asdf
Asdf
Adsf

Upload command:
curl 
"http://localhost:8080/solr/update/extract?literal.id=123&uprefix=attr_&fmap.content=attr_content&commit=true";
 -F "myfi...@test1.txt”

RESULTS from an id:[* TO *] query:
<response>
−
<lst name="responseHeader">
<int name="status">0</int>
<int name="QTime">91</int>
−
<lst name="params">
<str name="explainOther"/>
<str name="fl">*,score</str>
<str name="indent">on</str>
<str name="start">0</str>
<str name="q">id:[* TO *]</str>
<str name="hl.fl"/>
<str name="qt">standard</str>
<str name="wt">standard</str>
<str name="fq"/>
<str name="rows">10</str>
<str name="version">2.2</str>
</lst>
</lst>
−
<result name="response" numFound="1" start="0" maxScore="1.0">
−
<doc>
<float name="score">1.0</float>
−
<arr name="attr_content">
<str>        </str>
</arr>
−
<arr name="attr_stream_content_type">
<str>text/plain</str>
</arr>
−
<arr name="attr_stream_name">
<str>test1.txt</str>
</arr>
−
<arr name="attr_stream_size">
<str>24</str>
</arr>
−
<arr name="attr_stream_source_info">
<str>myfile</str>
</arr>
−
<arr name="content_type">
<str>text/plain</str>
</arr>
<str name="id">123</str>
</doc>
</result>
</response>

Note that the attr_content section of the response is blank.  Any help & hints 
would be GREATLY appreciated…=)

Best,
Dave

Reply via email to