I'm using solr 1.3 and am trying to get a delta-import with the DIH.
Recently the wiki, http://wiki.apache.org/solr/DataImportHandler, was
updated explaining that delta import is a 1.4 feature now but it was
still possible get a delta using the full import example here,
http://wiki.apache.org/solr/DataImportHandlerFaq#fullimportdelta.  I
tried this but each time I run DIH, it reimports all rows and updates.

Below is my data-config.xml.  I set rootEntity to false and issued
command=full-import&clean=false&optimize=false through DIH.  Am I
doing something wrong here or is the DataImportHandlerFaq incorrect?

<dataConfig>
        <dataSource driver="com.mysql.jdbc.Driver"
url="jdbc:mysql://pencil-somewhere.com:22222/SomeDB" user="someUser"
 password="somePassword"/>
        <document name="">
                <entity name = "item" rootEntity="false"
                        query = "select DId from 2_Doc where
ModifiedDate > '${dataimporter.last_index_time}'
                                      and DocType != 'Research Articles'">
                        <entity name="feature" pk="DId"
transformer="RegexTransformer"
                                query = "SELECT d.DId, d.SiteId,
d.DocTitle, d.DocURL, d.DocDesc,
                                        d.DocType, d.Tags, d.Source,
d.Last90DaysRFIsPercent,
                                        d.ModifiedDate, d.DocGuid, d.Author,
                                        i.Industry FROM 2_Doc d LEFT
OUTER JOIN tmp_DocIndustry i
                                        ON (d.DocId=i.DocId AND
d.SiteId=i.SiteId) where d.DocType != 'Research articles'
                                        and d.DId = '${item.DId}' and
d.ModifiedDate > '${dataimporter.last_index_time}'">
                                <field column = "DId"   name ="did"/>
                                <field column = "SiteId"   name ="SiteId"/>
                                <field column = "DocId"   name ="DocId"/>
                                <field column = "DocTitle"   name ="DocTitle"/>
                                <field column = "DocURL"   name ="DocURL"/>
                                <field column = "DocDesc" name ="DocDesc" />
                                <field column = "Snippet"
regex="^(.{0,800})\b.*$" sourceColName="DocDesc"/>
                                <field column = "DocType"   name ="DocType"/>
                                <field column = "Tags" name ="Tags"
splitBy=";" sourceColName="Tags"/>
                                <field column = "Source"   name ="Source"/>
                                <field column =
"Last90DaysRFIsPercent"   name ="Last90DaysRFIsPercent"/>
                                <field column = "ModifiedDate"   name
="ModifiedDate"/>
                                <field column = "DocGuid"   name ="DocGuid"/>
                                <field column = "Author"   name ="Author"/>
                                <field column = "Industry" name
="Industry" sourceColName="Industry"/>
                        </entity>
                </entity>
        </document>
</dataConfig>

Thanks,
-Tim

Reply via email to