Here are some things I would try:

1. Make sure the parent entity is only returning 1 row per solr document.  If 
not, move the problems joins to child entities to their own queries and child 
entities.

2. For the child entites, use caching.  This prevents the "n+1" select problem. 
 The changes are:
 > remove the "pk" attribute (only the parent entity needs this, and only to 
 > support delta updates).
 > remove the "where" clause from the query
 > add cacheKey/cacheLookup to each child like this:  cacheKey='id' 
 > cacheLookup='item.shop_id'
 > add cacheImpl="SortedMapBackedCache" to each child.  This will cache 
 > in-memory.

3. If caching uses too much memory, see 
https://issues.apache.org/jira/browse/SOLR-2613 & 
https://issues.apache.org/jira/browse/SOLR-2948 .  These are disk-backed cache 
implementations that you can use as alternatives to SortedMapBackedCache.  Or 
you can write your own.

4. If it is still too slow, you can parallelize it by splitting the data into 
partitions then running multiple DIH handlers at once.  This is a somewhat 
complex solution but still might be easier than writing a multi-threaded import 
program yourself.  One way to partition SQL data like this is to add a where 
clause like "where mod(id, 4)=${dataimporter.request.partitionNumber}"

I will mention that I recently converted one of our applications to use its own 
solrj-based code to update instead of DIH.  We were using BerkleyBackedCache 
from SOLR-2613 to handle the child entites, and it worked well.  But the app 
dev team wanted something that was part of their codebase that they could 
maintain more easily, so we migrated off of DIH.  We do updates more frequently 
and batch the updates so everything can fit in-memory.  Doing it this way, the 
SolrJ code was very straightforward and quick & easy to write.

James Dyer
Ingram Content Group
(615) 213-4311


-----Original Message-----
From: xiaoqi [mailto:belivexia...@gmail.com] 
Sent: Friday, April 26, 2013 5:10 AM
To: solr-user@lucene.apache.org
Subject: Re: Using another way instead of DIH


below is my data-import.xml

any suggestion ? 

------------------------------------

<?xml version="1.0" encoding="UTF-8"?>
<dataConfig>
    <dataSource type="JdbcDataSource"
                driver="com.mysql.jdbc.Driver"
                url="jdbc:mysql://*****:3306/guang"
                user="guang"
                password="guang"/>

    <document name="products">

        <entity name="item" pk="id" query="SELECT a.*,d.* FROM item a LEFT
JOIN item_ctr_by_area d ON a.id=d.item_id LEFT JOIN shop b ON a.shop_id =
b.id WHERE a.status =1 AND b.status = 1 AND b.uctrac_status =0 AND
uctrac_adgroup_id IS NOT NULL">
            <field column="id" name="item_id" />
            <field column="title" name="item_title" />
            <field column="description" name="item_description" />
            <field column="price" name="item_price" />
            <field column="promotion" name="item_promotion" />
            <field column="pic_url" name="item_picurl" />
            <field column="local_pic_url" name="item_local_picurl" />
            <field column="detail_url" name="item_detailurl" />
            <field column="recommend_value" name="item_recommend_value"/>
            <field column="uctrac_adgroup_id" name="uctrac_adgroup_id"/>
            <field column="uctrac_price" name="uctrac_adgroup_price"/>
            <field column="uctrac_status" name="uctrac_adgroup_status"/>
            <field column="uctrac_creative_id" name="uctrac_creative_id"/>
            <field column="lctr" name="item_lctr"/>
            <field column="CTR_ALL" name="region_ctr_all"/>
            <field column="CTR_N" name="region_ctr_n"/>
            <field column="CTR_MN" name="region_ctr_mn"/>
            <field column="CTR_MS" name="region_ctr_ms"/>
            <field column="CTR_S" name="region_ctr_s"/>
            <field column="CTR_011100" name="region_ctr_0111"/>
            <field column="CTR_011300" name="region_ctr_0113"/>
            <field column="CTR_012100" name="region_ctr_0121"/>
            <field column="CTR_013100" name="region_ctr_0131"/>
            <field column="CTR_013200" name="region_ctr_0132"/>
            <field column="CTR_013300" name="region_ctr_0133"/>
            <field column="CTR_013400" name="region_ctr_0134"/>
            <field column="CTR_013500" name="region_ctr_0135"/>
            <field column="CTR_013700" name="region_ctr_0137"/>
            <field column="CTR_014100" name="region_ctr_0141"/>
            <field column="CTR_014200" name="region_ctr_0142"/>
            <field column="CTR_014300" name="region_ctr_0143"/>
            <field column="CTR_014400" name="region_ctr_0144"/>
            <field column="CTR_015100" name="region_ctr_0151"/>
            <field column="CTR_016100" name="region_ctr_0161"/>
            <field column="CTR_ALL_2" name="region_ctr_all_2"/>
            <field column="CTR_N_2" name="region_ctr_n_2"/>
            <field column="CTR_MN_2" name="region_ctr_mn_2"/>
            <field column="CTR_MS_2" name="region_ctr_ms_2"/>
            <field column="CTR_S_2" name="region_ctr_s_2"/>
            <field column="CTR_011100_2" name="region_ctr_0111_2"/>
            <field column="CTR_011300_2" name="region_ctr_0113_2"/>
            <field column="CTR_012100_2" name="region_ctr_0121_2"/>
            <field column="CTR_013100_2" name="region_ctr_0131_2"/>
            <field column="CTR_013200_2" name="region_ctr_0132_2"/>
            <field column="CTR_013300_2" name="region_ctr_0133_2"/>
            <field column="CTR_013400_2" name="region_ctr_0134_2"/>
            <field column="CTR_013500_2" name="region_ctr_0135_2"/>
            <field column="CTR_013700_2" name="region_ctr_0137_2"/>
            <field column="CTR_014100_2" name="region_ctr_0141_2"/>
            <field column="CTR_014200_2" name="region_ctr_0142_2"/>
            <field column="CTR_014300_2" name="region_ctr_0143_2"/>
            <field column="CTR_014400_2" name="region_ctr_0144_2"/>
            <field column="CTR_015100_2" name="region_ctr_0151_2"/>
            <field column="CTR_016100_2" name="region_ctr_0161_2"/>
            <field column="CTR_ALL_4" name="region_ctr_all_4"/>
            <field column="CTR_N_4" name="region_ctr_n_4"/>
            <field column="CTR_MN_4" name="region_ctr_mn_4"/>
            <field column="CTR_MS_4" name="region_ctr_ms_4"/>
            <field column="CTR_S_4" name="region_ctr_s_4"/>
            <field column="CTR_011100_4" name="region_ctr_0111_4"/>
            <field column="CTR_011300_4" name="region_ctr_0113_4"/>
            <field column="CTR_012100_4" name="region_ctr_0121_4"/>
            <field column="CTR_013100_4" name="region_ctr_0131_4"/>
            <field column="CTR_013200_4" name="region_ctr_0132_4"/>
            <field column="CTR_013300_4" name="region_ctr_0133_4"/>
            <field column="CTR_013400_4" name="region_ctr_0134_4"/>
            <field column="CTR_013500_4" name="region_ctr_0135_4"/>
            <field column="CTR_013700_4" name="region_ctr_0137_4"/>
            <field column="CTR_014100_4" name="region_ctr_0141_4"/>
            <field column="CTR_014200_4" name="region_ctr_0142_4"/>
            <field column="CTR_014300_4" name="region_ctr_0143_4"/>
            <field column="CTR_014400_4" name="region_ctr_0144_4"/>
            <field column="CTR_015100_4" name="region_ctr_0151_4"/>
            <field column="CTR_016100_4" name="region_ctr_0161_4"/>
            <field column="votescore" name="item_votescore"/>
            <field column="votescore_s1" name="item_votescores1"/>
            <field column="votescore_s2" name="item_votescores2"/>
            <field column="votescore_s3" name="item_votescores3"/>
            <field column="volume" name="item_volume"/>
            <field column="lshow" name="item_lshow"/>
            <field column="click" name="item_click"/>
            <field column="promoted" name="item_promoted"/>
            <field column="created" name="item_created"/>

            <entity name="shop" pk="id" query="select * from shop where
id='${item.shop_id}'">
                <field column="id" name="shop_id" />
                <field column="name" name="shop_name" />
                <field column="url" name="shop_url" />
                <field column="type" name="shop_type" />
                <field column="description" name="shop_description" />
                <field column="item_score" name="shop_item_score" />
                <field column="service_score" name="shop_service_score" />
                <field column="delivery_score" name="shop_delivery_score" />
                <field column="promoted_service"
name="shop_promoted_service" />
                <field column="is_prepay" name="shop_is_prepay" />
                <field column="freight_payer" name="shop_freight_payer" />
                <field column="uctrac_campaign_id" name="uctrac_campaign_id"
/>
                <field column="uctrac_status" name="uctrac_campaign_status"
/>
                <field column="uctrac_admember_id" name="uctrac_admember_id"
/>
                <field column="level" name="shop_level" />
            </entity>

            <entity name="term" pk="id" query="select
a.cid,a.parent_cid,a.name,a.level1_parent_id,a.level2_parent_id,a.level3_parent_id,b.promoted
from term a left join item_term b on a.cid = b.term_id where b.item_id
='${item.id}'" transformer="TemplateTransformer">
                <field column="cid" name="term_id"/>
                <field column="parent_cid" name="term_parent_cid"/>
                <field column="name" name="term_name"/>
            </entity>

            <entity name="itemproperties" pk="id" query="select
distinct(property_id) from item_properties where item_id = '${item.id}' and
manualtype!=2">
                <field column="property_id" name="guang_property_id"/>
            </entity>

            <entity name="itemtag" pk="id" query="select
tagid,concat(CAST(tagid AS CHAR),':', CAST(weight_value AS CHAR), ':',
CAST(weight_flag AS CHAR)) as tagmatch from item_tag where itemid =
'${item.id}'">
                <field column="tagid" name="guang_tag_id"/>
                <field column="tagmatch" name="guang_tag_match"/>
            </entity>

            <entity name="tagids" pk="id" query="SELECT tag_id FROM
product_tags WHERE item_id='${item.id}' AND status=2 ORDER BY tag_id"
transformer="TemplateTransformer">
                <field column="tag_id" name="tag_ids"/>
            </entity>

        </entity>

    </document>

</dataConfig>




--
View this message in context: 
http://lucene.472066.n3.nabble.com/Using-another-way-instead-of-DIH-tp4058937p4059127.html
Sent from the Solr - User mailing list archive at Nabble.com.


Reply via email to