Hi All,

I am new to Solr, and started to look at our existing Solr implementation.
We are
We have a Solr 3.6 implementation with an index size of 54 GB with a
configuration of 2 Master/3 Slaves. Each Machine has a RAM of 32 GB. Avg.
Size of document is 100KB, and avg number of documents being indexed is 3
Millions a day. We do have sorting on 1 Numeric field.

Issues, we are facing are sometimes Solr is unresponsive with OOM error,
and that need restart. And sometimes, our queries are not able to get the
result.
We do plan to upgrade to latest and greatest in 6 months to year time
frame, but till then want to reduce the pain.

Would appreciate any suggestions.

Here is the Solr config, without comments:

<?xml version="1.0" encoding="UTF-8" ?>
<config>


<abortOnConfigurationError>${solr.abortOnConfigurationError:true}</abortOnConfigurationError>


  <lib dir="../contrib/extraction/lib" />

  <lib dir="../dist/" regex="apache-solr-\d.*\.jar" />
  <lib dir="../dist/" regex="apache-solr-clustering-\d.*\.jar" />
  <lib dir="../dist/" regex="apache-solr-velocity-\d.*\.jar" />
  <lib dir="../dist/" regex="apache-solr-dataimporthandler-\d.*\.jar" />
  <lib dir="../dist/"
regex="apache-solr-dataimporthandler-extras-\d.*\.jar" />

  <lib dir="../contrib/clustering/lib/downloads/" />
  <lib dir="../contrib/clustering/lib/" />
  <lib dir="../contrib/dataimporthandler/lib/" />
  <lib dir="../example/example-DIH/solr/db/lib/" />

  <lib dir="../contrib/velocity/lib/" />

  <dataDir>/data/pxs3_data</dataDir>

  <indexDefaults>

    <useCompoundFile>false</useCompoundFile>

    <mergeFactor>10</mergeFactor>

    <ramBufferSizeMB>32</ramBufferSizeMB>

    <maxFieldLength>10000</maxFieldLength>
    <writeLockTimeout>1000</writeLockTimeout>
    <commitLockTimeout>10000</commitLockTimeout>



    <lockType>native</lockType>

  </indexDefaults>

  <mainIndex>
    <!-- options specific to the main on-disk lucene index -->
    <useCompoundFile>false</useCompoundFile>
    <ramBufferSizeMB>64</ramBufferSizeMB>
    <mergeFactor>2</mergeFactor>

    <unlockOnStartup>false</unlockOnStartup>

    <!-- If true, IndexReaders will be reopened (often more efficient)
instead
         of closed and then opened.  -->
    <reopenReaders>true</reopenReaders>


    <deletionPolicy class="solr.SolrDeletionPolicy">
      <!-- The number of commit points to be kept -->
      <str name="maxCommitsToKeep">1</str>
      <!-- The number of optimized commit points to be kept -->
      <str name="maxOptimizedCommitsToKeep">0</str>
    </deletionPolicy>


     <infoStream file="INFOSTREAM.txt">false</infoStream>

  </mainIndex>


  <jmx />


  <updateHandler class="solr.DirectUpdateHandler2">


  </updateHandler>


  <query>

    <maxBooleanClauses>1024</maxBooleanClauses>


    <filterCache
      class="solr.FastLRUCache"
      size="512"
      initialSize="512"
      autowarmCount="0"/>

    <queryResultCache
      class="solr.LRUCache"
      size="512"
      initialSize="512"
      autowarmCount="0"/>

    <documentCache
      class="solr.LRUCache"
      size="512"
      initialSize="512"
      autowarmCount="0"/>


    <enableLazyFieldLoading>true</enableLazyFieldLoading>

    <queryResultWindowSize>20</queryResultWindowSize>


    <queryResultMaxDocsCached>200</queryResultMaxDocsCached>



    <listener event="newSearcher" class="solr.QuerySenderListener">
      <arr name="queries">

      </arr>
    </listener>

    <listener event="firstSearcher" class="solr.QuerySenderListener">
      <arr name="queries">
        <lst> <str name="q">solr rocks</str><str name="start">0</str><str
name="rows">10</str></lst>
        <lst><str name="q">static firstSearcher warming query from
solrconfig.xml</str></lst>
      </arr>
    </listener>

    <useColdSearcher>false</useColdSearcher>

    <maxWarmingSearchers>2</maxWarmingSearchers>

  </query>

  <requestDispatcher handleSelect="true" >
    <!--Make sure your system has some authentication before enabling
remote streaming!  -->
    <requestParsers enableRemoteStreaming="true"
multipartUploadLimitInKB="2048000" />

    <httpCaching lastModifiedFrom="openTime"
                 etagSeed="Solr">
    </httpCaching>
  </requestDispatcher>


  <requestHandler name="standard" class="solr.SearchHandler">
     <lst name="defaults">
       <str name="echoParams">explicit</str>

     </lst>
  </requestHandler>

  <!--Copied from the default request handler and labeled for DLAP -->
  <requestHandler name="dlap" class="solr.SearchHandler" default="true">
    <!-- default values for query parameters -->
    <lst name="defaults">
      <str name="echoParams">explicit</str>
    </lst>
  </requestHandler>

<requestHandler name="/replication" class="solr.ReplicationHandler" >
    <lst name="slave">
      <str name="masterUrl">http://hbpxsolrm03:8080/pxs3/replication</str>
      <str name="pollInterval">00:05:00</str>
    </lst>
</requestHandler>


  <requestHandler name="dismax" class="solr.SearchHandler" >
    <lst name="defaults">
     <str name="defType">dismax</str>
     <str name="echoParams">explicit</str>
     <float name="tie">0.01</float>
     <str name="qf">
        text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
     </str>
     <str name="pf">
        text^0.2 features^1.1 name^1.5 manu^1.4 manu_exact^1.9
     </str>
     <str name="bf">
        popularity^0.5 recip(price,1,1000,1000)^0.3
     </str>
     <str name="fl">
        id,name,price,score
     </str>
     <str name="mm">
        2&lt;-1 5&lt;-2 6&lt;90%
     </str>
     <int name="ps">100</int>
     <str name="q.alt">*:*</str>
     <!-- example highlighter config, enable per-query with hl=true -->
     <str name="hl.fl">text features name</str>
     <!-- for this field, we want no fragmenting, just highlighting -->
     <str name="f.name.hl.fragsize">0</str>
     <!-- instructs Solr to return the field itself if no query terms are
          found -->
     <str name="f.name.hl.alternateField">name</str>
     <str name="f.text.hl.fragmenter">regex</str> <!-- defined below -->
    </lst>
  </requestHandler>


  <requestHandler name="partitioned" class="solr.SearchHandler" >
    <lst name="defaults">
     <str name="defType">dismax</str>
     <str name="echoParams">explicit</str>
     <str name="qf">text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0</str>
     <str name="mm">2&lt;-1 5&lt;-2 6&lt;90%</str>

     <str name="bq">incubationdate_dt:[* TO NOW/DAY-1MONTH]^2.2</str>
    </lst>

    <lst name="appends">
      <str name="fq">inStock:true</str>
    </lst>

    <lst name="invariants">
      <str name="facet.field">cat</str>
      <str name="facet.field">manu_exact</str>
      <str name="facet.query">price:[* TO 500]</str>
      <str name="facet.query">price:[500 TO *]</str>
    </lst>
  </requestHandler>


  <searchComponent name="spellcheck" class="solr.SpellCheckComponent">

    <str name="queryAnalyzerFieldType">textSpell</str>

    <lst name="spellchecker">
      <str name="name">default</str>
      <str name="field">name</str>
      <str name="spellcheckIndexDir">./spellchecker</str>
    </lst>


  </searchComponent>

  <!-- A request handler utilizing the spellcheck component.

#############################################################################
  NOTE: This is purely as an example.  The whole purpose of the
  SpellCheckComponent is to hook it into the request handler that handles
(i.e.
  the standard or dismax SearchHandler) queries such that a separate
request is
  not needed to get suggestions.

  IN OTHER WORDS, THERE IS REALLY GOOD CHANCE THE SETUP BELOW IS NOT WHAT
YOU
  WANT FOR YOUR PRODUCTION SYSTEM!

#############################################################################
  -->
  <requestHandler name="/spell" class="solr.SearchHandler" lazy="true">
    <lst name="defaults">
      <!-- omp = Only More Popular -->
      <str name="spellcheck.onlyMorePopular">false</str>
      <!-- exr = Extended Results -->
      <str name="spellcheck.extendedResults">false</str>
      <!--  The number of suggestions to return -->
      <str name="spellcheck.count">1</str>
    </lst>
    <arr name="last-components">
      <str>spellcheck</str>
    </arr>
  </requestHandler>

  <searchComponent name="tvComponent"
class="org.apache.solr.handler.component.TermVectorComponent"/>
  <!-- A Req Handler for working with the tvComponent.  This is purely as
an example.
  You will likely want to add the component to your already specified
request handlers. -->
  <requestHandler name="tvrh"
class="org.apache.solr.handler.component.SearchHandler">
    <lst name="defaults">
      <bool name="tv">true</bool>
    </lst>
    <arr name="last-components">
      <str>tvComponent</str>
    </arr>
  </requestHandler>


  <searchComponent
    name="clusteringComponent"
    class="org.apache.solr.handler.clustering.ClusteringComponent" >
    <!-- Declare an engine -->
    <lst name="engine">
      <!-- The name, only one can be named "default" -->
      <str name="name">default</str>

      <str
name="carrot.algorithm">org.carrot2.clustering.lingo.LingoClusteringAlgorithm</str>

      <str name="LingoClusteringAlgorithm.desiredClusterCountBase">20</str>
    </lst>
    <lst name="engine">
      <str name="name">stc</str>
      <str
name="carrot.algorithm">org.carrot2.clustering.stc.STCClusteringAlgorithm</str>
    </lst>
  </searchComponent>
  <requestHandler name="/clustering"
                  class="solr.SearchHandler">
     <lst name="defaults">
       <bool name="clustering">true</bool>
       <str name="clustering.engine">default</str>
       <bool name="clustering.results">true</bool>
       <!-- The title field -->
       <str name="carrot.title">name</str>
       <str name="carrot.url">id</str>
       <!-- The field to cluster on -->
       <str name="carrot.snippet">features</str>
       <!-- produce summaries -->
       <bool name="carrot.produceSummary">true</bool>
       <!-- the maximum number of labels per cluster -->
       <!--<int name="carrot.numDescriptions">5</int>-->
       <!-- produce sub clusters -->
       <bool name="carrot.outputSubClusters">false</bool>
    </lst>
    <arr name="last-components">
      <str>clusteringComponent</str>
    </arr>
  </requestHandler>

  <!-- Solr Cell: http://wiki.apache.org/solr/ExtractingRequestHandler -->
  <requestHandler name="/update/extract"
class="org.apache.solr.handler.extraction.ExtractingRequestHandler"
startup="lazy">
    <lst name="defaults">
      <!-- All the main content goes into "text"... if you need to return
           the extracted text or do highlighting, use a stored field. -->
      <str name="fmap.content">text</str>
      <str name="lowernames">true</str>
      <str name="uprefix">ignored_</str>

      <!-- capture link hrefs but ignore div attributes -->
      <str name="captureAttr">true</str>
      <str name="fmap.a">links</str>
      <str name="fmap.div">ignored_</str>
    </lst>
  </requestHandler>


  <!-- A component to return terms and document frequency of those terms.
       This component does not yet support distributed search. -->
  <searchComponent name="termsComponent"
class="org.apache.solr.handler.component.TermsComponent"/>

  <requestHandler name="/terms"
class="org.apache.solr.handler.component.SearchHandler">
     <lst name="defaults">
      <bool name="terms">true</bool>
    </lst>
    <arr name="components">
      <str>termsComponent</str>
    </arr>
  </requestHandler>


  <!-- a search component that enables you to configure the top results for
       a given query regardless of the normal lucene scoring.-->
  <searchComponent name="elevator" class="solr.QueryElevationComponent" >
    <!-- pick a fieldType to analyze queries -->
    <str name="queryFieldType">string</str>
    <str name="config-file">elevate.xml</str>
  </searchComponent>

  <!-- a request handler utilizing the elevator component -->
  <requestHandler name="/elevate" class="solr.SearchHandler" startup="lazy">
    <lst name="defaults">
      <str name="echoParams">explicit</str>
    </lst>
    <arr name="last-components">
      <str>elevator</str>
    </arr>
  </requestHandler>



  <requestHandler name="/update" class="solr.XmlUpdateRequestHandler" />


  <requestHandler name="/update/javabin"
class="solr.BinaryUpdateRequestHandler" />


  <requestHandler name="/analysis/document"
class="solr.DocumentAnalysisRequestHandler" />


  <requestHandler name="/analysis/field"
class="solr.FieldAnalysisRequestHandler" />


  <!-- CSV update handler, loaded on demand -->
  <requestHandler name="/update/csv" class="solr.CSVRequestHandler"
startup="lazy" />


  <requestHandler name="/admin/"
class="org.apache.solr.handler.admin.AdminHandlers" />

  <!-- ping/healthcheck -->
  <requestHandler name="/admin/ping" class="PingRequestHandler">
    <lst name="defaults">
      <str name="qt">standard</str>
      <str name="q">solrpingquery</str>
      <str name="echoParams">all</str>
    </lst>
  </requestHandler>

  <!-- Echo the request contents back to the client -->
  <requestHandler name="/debug/dump" class="solr.DumpRequestHandler" >
    <lst name="defaults">
     <str name="echoParams">explicit</str> <!-- for all params (including
the default etc) use: 'all' -->
     <str name="echoHandler">true</str>
    </lst>
  </requestHandler>

  <highlighting>
   <!-- Configure the standard fragmenter -->
   <!-- This could most likely be commented out in the "default" case -->
   <fragmenter name="gap" class="org.apache.solr.highlight.GapFragmenter"
default="true">
    <lst name="defaults">
     <int name="hl.fragsize">100</int>
    </lst>
   </fragmenter>

   <!-- A regular-expression-based fragmenter (f.i., for sentence
extraction) -->
   <fragmenter name="regex"
class="org.apache.solr.highlight.RegexFragmenter">
    <lst name="defaults">
      <!-- slightly smaller fragsizes work better because of slop -->
      <int name="hl.fragsize">70</int>
      <!-- allow 50% slop on fragment sizes -->
      <float name="hl.regex.slop">0.5</float>
      <!-- a basic sentence pattern -->
      <str name="hl.regex.pattern">[-\w ,/\n\"']{20,200}</str>
    </lst>
   </fragmenter>

   <!-- Configure the standard formatter -->
   <formatter name="html" class="org.apache.solr.highlight.HtmlFormatter"
default="true">
    <lst name="defaults">
     <str name="hl.simple.pre"><![CDATA[<em>]]></str>
     <str name="hl.simple.post"><![CDATA[</em>]]></str>
    </lst>
   </formatter>
  </highlighting>


  <queryResponseWriter name="xslt"
class="org.apache.solr.request.XSLTResponseWriter">
    <int name="xsltCacheLifetimeSeconds">5</int>
  </queryResponseWriter>

  <!-- config for the admin interface -->
  <admin>
    <defaultQuery>solr</defaultQuery>

    <!-- configure a healthcheck file for servers behind a loadbalancer
    <healthcheck type="file">server-enabled</healthcheck>
    -->
  </admin>

 <queryResponseWriter name="velocity" class="solr.VelocityResponseWriter"/>
<!-- <queryResponseWriter name="velocity"
class="org.apache.solr.request.VelocityResponseWriter"/> -->

   <!-- /itas mapping for Solritas view with some basic good defaults: like
dismax, facet on cat -->
   <requestHandler name="/itas" class="solr.SearchHandler">
      <lst name="defaults">
        <!-- VelocityResponseWriter settings -->
        <str name="wt">velocity</str>
        <str name="v.template">browse</str>
        <str name="v.layout">layout</str>
        <str name="title">Solritas</str>

        <str name="rows">10</str>
        <str name="fl">*,score</str>

        <!-- dismax -->
        <str name="defType">dismax</str>
        <str name="q.alt">*:*</str>
        <str name="qf">
           text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
        </str>

        <!-- faceting -->
        <str name="facet">on</str>
        <str name="facet.field">cat</str>
        <str name="facet.mincount">1</str>

        <!-- Clustering -->
        <bool name="clustering">true</bool>
        <str name="clustering.engine">default</str>
        <bool name="clustering.results">true</bool>
        <!-- The title field -->
        <str name="carrot.title">name</str>
        <str name="carrot.url">id</str>
        <!-- The field to cluster on -->
        <str name="carrot.snippet">features</str>
        <!-- produce summaries -->
        <bool name="carrot.produceSummary">true</bool>
        <!-- the maximum number of labels per cluster -->
        <!--<int name="carrot.numDescriptions">5</int>-->
        <!-- produce sub clusters -->
        <bool name="carrot.outputSubClusters">false</bool>

      </lst>

      <arr name="last-components">
        <str>clusteringComponent</str>
      </arr>
   </requestHandler>

 <requestHandler name="/dataimport"
class="org.apache.solr.handler.dataimport.DataImportHandler">
    <lst name="defaults">
 <str name="config">./dataimporthandler/data-config.xml</str>
    </lst>
  </requestHandler>

</config>

Reply via email to