Hi Tracy
Do you have autocommit enabled (or are you manually commiting every
few thousand docs?)
If not try that.
-Nick
On 5/10/08, Tracy Flynn <[EMAIL PROTECTED]> wrote:
> Hi,
>
> I'm starting to see significant slowdown in loading performance after I
> have loaded about 400K documents. I go from a load rate of near 40 docs/sec
> to 20- 25 docs a second.
>
> Am I correct in assuming that, during indexing operations, Lucene/SOLR
> tries to hold as much of the indexex in memory as possible? If so, does the
> slowdown indicate need to increase JVM heap space?
>
> Any ideas / help would be appreciated
>
> Regards,
>
> Tracy
>
> ---------------------------------------------------------------------------------------------------------------------
>
> Details
>
> Documents loaded as XML via POST command in batches of 1000, commit after
> each batch
>
> Total current documents ~ 450,000
> Avg document size: 4KB
> One indexed text field contains 3KB or so. (body field below - standard
> type 'text')
>
> Dual XEON 3 GHZ 4 GB memory
>
> SOLR JVM Startup options
>
> java -Xms256m -Xmx1000m -jar start.jar
>
>
> Relevant portion of the schema follows
>
>
> <field name="document_id" type="string" indexed="true" stored="true"
> required="true"/>
> <field name="language" type="string" indexed="true" stored="true"
> required="false"/>
> <field name="languages" type="string" indexed="true" stored="true"
> required="false"/>
> <!-- The value specified for folding_id must be a field of type "integer"
> -
> type "sint" does not work -->
> <field name="folding_id" type="integer" indexed="true" stored="true"
> required="false" default="0"/>
> <field name="document_type" type="string" indexed="true" stored="true"
> required="true"/>
> <field name="title" type="text" indexed="true" stored="true"
> required="false"/>
> <field name="body" type="text" indexed="true" stored="true"
> required="false" compressed="true"/>
> <field name="teaser" type="text" indexed="no" stored="true"
> required="false"/>
> <field name="articles_in_category" type="sint" indexed="true"
> stored="true" required="false" default="0"/>
> <field name="pen_name" type="text" indexed="true" stored="true"
> required="false"/>
> <field name="article_id" type="sint" indexed="true" stored="true"
> required="false" default="0"/>
> <field name="article_status_id" type="sint" indexed="true" stored="true"
> required="false" default="0"/>
> <field name="user_id" type="sint" indexed="true" stored="true"
> required="false" default="0"/>
> <field name="user_name" type="text" indexed="true" stored="true"
> required="false"/>
> <field name="user_email" type="text" indexed="true" stored="true"
> required="false"/>
> <field name="channel_context" type="sint" indexed="true" stored="true"
> required="false" multiValued="true"/>
> <field name="category_id" type="sint" indexed="true" stored="true"
> required="false" default="0"/>
> <field name="category_status_id" type="sint" indexed="true" stored="true"
> required="false" default="0"/>
> <field name="category_title" type="text" indexed="true" stored="true"
> required="false"/>
> <field name="category_keywords" type="text" indexed="true" stored="true"
> required="false" multiValued="true"/>
> <field name="category_type" type="text" indexed="true" stored="true"
> required="false"/>
> <field name="channel_id" type="sint" indexed="true" stored="true"
> required="false" default="0"/>
> <field name="channel_title" type="text" indexed="true" stored="true"
> required="false"/>
> <field name="helium_rank" type="sint" indexed="false" stored="true"
> required="false" default="0"/>
> <field name="helium_rank_percentile" type="sfloat" indexed="false"
> stored="true" required="false"/>
> <field name="helium_scaled_rank_boost" type="sfloat" indexed="true"
> stored="true" required="false"/>
> <field name="helium_scaled_rank_boost_string"
> type="string" indexed="true" stored="true" required="false"/>
> <!--
> <field name="title_popularity" type="sint" indexed="true" stored="true"
> default="0"/>
> <field name="title_recent_popularity" type="sint" indexed="true"
> stored="true" default="0"/>
> <field name="title_views_measure" type="sint" indexed="true"
> stored="true" default="0"/>
> <field name="title_recent_earnings_measure" type="sint"
> indexed="true" stored="true" default="0"/>
> <field name="title_earnings_measure" type="sint" indexed="true"
> stored="true" default="0"/>
> -->
> <field name="created_date" type="date" indexed="true" stored="true"
> required="false" />
>
>
>