I am using Solr 6.5.1 and working on importing xml files using the 
DataImportHandler.  I am wanting to get the files from a remote server, but I 
am dealing with multiple xml files in multiple folders.  I am using a nested 
entity in my dataConfig.  Below is an example of how I have my dataConfig set 
up.  I got most of this from an online reference.  In this example I am getting 
the xml files from a folder on the Solr server, but as I mentioned above I want 
to get the files from a remote server.  I have looked at the different Entity 
Processors for the DIH, but have not seen anything that seems to work.  Is 
there a way to configure the below code to let me do this?


<dataConfig>

                <dataSource name="hbk" encoding="UTF-8" type="FileDataSource" />
                <document name="hbk">
                                <!--
            Pickupdir fetches all files matching the filename regex in the 
supplied directory
            and passes them to other entities which parse the file contents.
        -->

                                <entity
            name="pickupdir"
            processor="FileListEntityProcessor"
            rootEntity="false"
            dataSource="null"
            fileName="^[\w\d-]+\.xml$"
            baseDir="/var/solr/data/hbk/data/xml/"
            recursive="true"

        >
                                                <!--
                                                                Pickupxmlfile 
parses standard Solr update XML.
                                                -->

                                                <entity
                                                                name="xml"
                                                                pk="itemId"
                                                                
processor="XPathEntityProcessor"
                                                                
transformer="RegexTransformer,TemplateTransformer"
                                                                
datasource="pickupdir"
                                                                stream="true"
                                                                
xsl="/var/solr/data/hbk/data/xsl/solr_timdex.xsl"
                                                                
url="${pickupdir.fileAbsolutePath}"
                                                                
forEach="/eflow/section | /eflow/section/item"
                                                >

                                                                <field 
column="sectionId" xpath="/eflow/section/@id" commonField="true" />
                                                                <field 
column="sectionTitle" xpath="/eflow/section/@title" commonField="true" />
                                                                <field 
column="sectionNo" xpath="/eflow/section/@secno" commonField="true" />
                                                                <field 
column="hbkNo" xpath="/eflow/section/@hbkno" commonField="true" />
                                                                <field 
column="volumeNo" xpath="/eflow/section/@volno" commonField="true" />

                                                                <field 
column="itemId" xpath="/eflow/section/item/@id" />
                                                                <field 
column="itemTitle" xpath="/eflow/section/item/@title" />
                                                                <field 
column="itemNo" xpath="/eflow/section/item/@mit" />
                                                                <field 
column="itemFile" xpath="/eflow/section/item/@file" />
                                                                <field 
column="itemType" xpath="/eflow/section/item/@type" />
                                                </entity>
                                </entity>
                </document>
</dataConfig>





~~~~~~~~~~~~~~~~~~~~~~~
William Kevin Miller
[ecsLogo]
ECS Federal, Inc.
USPS/MTSC
(405) 573-2158

Reply via email to