Hello guys,

I need to index 5 different kinds of xml files. They share similar structure
with slight differences in each of them.

example 1:
<manifest>
  <metadata>
                <isbn>9780815341291</isbn>
                <title>Essential Cell Biology,Third Edition</title>
                <authors>
                        <author>Alberts;Bruce</author>
                        <author>Bray;Dennis</author>
                </authors>
                <categories>
                        <category>SCABC</category>
                        <category>SCDEF</category>
                </categories>
  </metadata>
  <resources>
        <audioresource>
                        <uuid>123456789</uuid>
                        <source>03_Mutations_Origin_Cancer.mp3</source>
                        <mimetype>audio/mpeg</mimetype>
                        <title>Part Three - Mutations and the Origin of 
Cancer</title>
                        <description>123</description>
                        <chapters>
                                <chapter>1</chapter>    
                        </chapters>
                </audioresource>
  </resources>
</manifest>

example 2:
<manifest>
<metadata>
                <isbn>9780815341291</isbn>
                <title>Essential Cell Biology,Third Edition</title>
                <authors>
                        <author>FN:Alberts;Bruce</author>
                        <author>FN:Bray;Dennis</author>
                </authors>
                <categories>
                        <category>SCABC</category>
                        <category>SCGHI</category>
                </categories>
        </metadata>

<resources>
<glossaryresource>
                        <uuid>123456789</uuid>
                        <term>A subunit </term>
                        <definition>The portion of a bacterial exotoxin that 
interferes with
normal host cell function. </definition>
                        <chapters>
                                <chapter>10</chapter>
                        </chapters>
                </glossaryresource>
</resources>
</manifest>

My dih-config.xml is as below:
<?xml version="1.0"?>

<dataConfig>
        <dataSource name="fileReader" type="FileDataSource" encoding="UTF-8"/>
        <document>
                <entity name="dir" rootEntry="false" dataSource="null"
processor="FileListEntityProcessor" fileName="^.*\.xml$" recursive="true"
baseDir="X:/tmp/npr">
                        <entity name="audioresource" 
                                        rootEntity="true"
                                        dataSource="fileReader"
                                        url="${dir.fileAbsolutePath}"
                                        stream="false"
                                        logTemplate=" processing 
${dir.fileAbsolutePath}"
                                        logLevel="debug"
                                        processor="XPathEntityProcessor"
                                        forEach="/manifest/metadata | 
/manifest/metadata/authors |
/manifest/metadata/categories | /manifest/metadata/resources |
/manifest/resources/audioresource |
/manifest/resources/audioresource/chapters"
                                        transformer="DateFormatTransformer">

                                        <field column="category" 
xpath="/manifest/metadata/categories/category"
/>
                                        <field column="author" 
xpath="/manifest/metadata/authors/author" />
                                        <field column="book_title" 
xpath="/manifest/metadata/title" />
                                        <field column="isbn" 
xpath="/manifest/metadata/isbn"/>
                                        <field column="id" 
xpath="/manifest/resources/audioresource/uuid"/>
                                        <field column="mimetype"
xpath="/manifest/resources/audioresource/mimetype" />
                                        <field column="title" 
xpath="/manifest/resources/audioresource/title"/>
                                        <field column="description"
xpath="/manifest/resources/audioresource/description"/>
                                        <field column="chapter"
xpath="/manifest/resources/audioresource/chapters/chapter"/>
                                        <field column="source"
xpath="/manifest/resources/audioresource/source"/>
                        </entity>
                </entity>
        </document>
</dataConfig>

I'm not quite familiar with xpath. I can't use wildcard in element name, can
I? Tried it and it didn't work. 

Many thanks in advance.
-- 
View this message in context: 
http://lucene.472066.n3.nabble.com/solr-index-different-type-of-xml-tp1529898p1529898.html
Sent from the Solr - User mailing list archive at Nabble.com.

Reply via email to