Hi Tracy
Do you have autocommit enabled (or are you manually commiting every
few thousand docs?)
If not try that.
-Nick
On 5/10/08, Tracy Flynn <[EMAIL PROTECTED]> wrote:
> Hi,
>
>  I'm starting to see significant slowdown in loading performance after I
> have loaded about 400K documents.  I go from a load rate of near 40 docs/sec
> to 20- 25 docs a second.
>
>  Am I correct in assuming that, during indexing operations, Lucene/SOLR
> tries to hold as much of the indexex in memory as possible? If so, does the
> slowdown indicate need to increase JVM heap space?
>
>  Any ideas / help would be appreciated
>
>  Regards,
>
>  Tracy
>
> ---------------------------------------------------------------------------------------------------------------------
>
>  Details
>
>  Documents loaded as XML via POST command in batches of 1000, commit after
> each batch
>
>  Total current documents ~ 450,000
>  Avg document size: 4KB
>  One indexed text field contains 3KB or so. (body field below - standard
> type 'text')
>
>  Dual XEON 3 GHZ 4 GB memory
>
>  SOLR JVM Startup options
>
>  java -Xms256m -Xmx1000m  -jar start.jar
>
>
>  Relevant portion of the schema follows
>
>
>    <field name="document_id" type="string" indexed="true" stored="true"
> required="true"/>
>    <field name="language" type="string" indexed="true" stored="true"
> required="false"/>
>    <field name="languages" type="string" indexed="true" stored="true"
> required="false"/>
>    <!-- The value specified for folding_id must be a field of type "integer"
> -
>         type "sint" does not work -->
>    <field name="folding_id" type="integer" indexed="true" stored="true"
> required="false" default="0"/>
>    <field name="document_type" type="string" indexed="true" stored="true"
> required="true"/>
>    <field name="title" type="text" indexed="true" stored="true"
> required="false"/>
>    <field name="body" type="text" indexed="true" stored="true"
> required="false" compressed="true"/>
>    <field name="teaser" type="text" indexed="no" stored="true"
> required="false"/>
>    <field name="articles_in_category" type="sint" indexed="true"
> stored="true" required="false" default="0"/>
>    <field name="pen_name" type="text" indexed="true" stored="true"
> required="false"/>
>    <field name="article_id" type="sint" indexed="true" stored="true"
> required="false" default="0"/>
>    <field name="article_status_id" type="sint" indexed="true" stored="true"
> required="false" default="0"/>
>    <field name="user_id" type="sint" indexed="true" stored="true"
> required="false" default="0"/>
>    <field name="user_name" type="text" indexed="true" stored="true"
> required="false"/>
>    <field name="user_email" type="text" indexed="true" stored="true"
> required="false"/>
>    <field name="channel_context" type="sint" indexed="true" stored="true"
> required="false" multiValued="true"/>
>    <field name="category_id" type="sint" indexed="true" stored="true"
> required="false" default="0"/>
>    <field name="category_status_id" type="sint" indexed="true" stored="true"
> required="false" default="0"/>
>    <field name="category_title" type="text" indexed="true" stored="true"
> required="false"/>
>    <field name="category_keywords" type="text" indexed="true" stored="true"
> required="false" multiValued="true"/>
>    <field name="category_type" type="text" indexed="true" stored="true"
> required="false"/>
>    <field name="channel_id" type="sint" indexed="true" stored="true"
> required="false" default="0"/>
>    <field name="channel_title" type="text" indexed="true" stored="true"
> required="false"/>
>    <field name="helium_rank" type="sint" indexed="false" stored="true"
> required="false" default="0"/>
>    <field name="helium_rank_percentile" type="sfloat" indexed="false"
> stored="true" required="false"/>
>    <field name="helium_scaled_rank_boost" type="sfloat" indexed="true"
> stored="true" required="false"/>
>    <field name="helium_scaled_rank_boost_string"
> type="string" indexed="true" stored="true" required="false"/>
>     <!--
>     <field name="title_popularity" type="sint" indexed="true" stored="true"
> default="0"/>
>     <field name="title_recent_popularity" type="sint" indexed="true"
> stored="true" default="0"/>
>     <field name="title_views_measure" type="sint" indexed="true"
> stored="true" default="0"/>
>     <field name="title_recent_earnings_measure" type="sint"
> indexed="true" stored="true" default="0"/>
>     <field name="title_earnings_measure" type="sint" indexed="true"
> stored="true" default="0"/>
>    -->
>    <field name="created_date" type="date" indexed="true" stored="true"
> required="false" />
>
>
>

Reply via email to