I am trying to index the contents of a database for the first time, and I am 
only getting the primary key of the table represented by the top level entity 
in my data-config.xml file to be indexed. The database I am starting with has 
three tables:

The table called docs has columns called doc_id, type and last_modified. The 
primary key is doc_id.
The table called codes has columns called id, doc_id, origin, type, code and 
last_modified. The primary key is id. doc_id is a foreign key to the doc_id 
column in the docs table.
The table called texts has columns called id, doc_id, origin, type, text and 
last_modified. The primary key is id. doc_id is a foreign key to the doc_id 
column in the docs table.

My data-config.xml file looks like this:

<dataConfig>
  <dataSource type="JdbcDataSource" driver="com.mysql.jdbc.Driver"
              url="jdbc:mysql://localhost:3306/bioscope" user="db_user" 
password=""/>
  <document name="bioscope">
    <entity name="docs" pk="doc_id" query="SELECT doc_id, type FROM 
bioscope.docs"
            deltaQuery="SELECT doc_id FROM bioscope.docs where last_modified > 
'${dataimporter.last_index_time}'">
      <field column="doc_id" name="DOC_ID"/>
      <field column="type" name="DOC_TYPE"/>
      <entity name="codes" pk="id" query="SELECT id, origin, type, code FROM 
bioscope.codes WHERE doc_id='${docs.doc_id}'"
              deltaQuery="SELECT doc_id FROM bioscope.codes WHERE last_modified 
> '${dataimporter.last_index_time}'"
              parentDeltaQuery="SELECT doc_id from bioscope.docs WHERE 
doc_id='${codes.doc_id}'">
        <field column="id" name="CODE_ID"/>
        <field column="doc_id" name="DOC_ID"/>
        <field column="origin" name="CODE_ORIGIN"/>
        <field column="type" name="CODE_TYPE"/>
        <field column="code" name="CODE_VALUE"/>
      </entity>
      <entity name="notes" pk="id" query="SELECT id, origin, type, text FROM 
bioscope.texts WHERE doc_id='${docs.doc_id}'"
              deltaQuery="SELECT doc_id FROM bioscope.texts WHERE last_modified 
> '${dataimporter.last_index_time}'"
              parentDeltaQuery="SELECT doc_id from bioscope.docs WHERE 
doc_id='${texts.doc_id}'">
        <field column="id" name="NOTE_ID"/>
        <field column="doc_id" name="DOC_ID"/>
        <field column="origin" name="NOTE_ORIGIN"/>
        <field column="type" name="NOTE_TYPE"/>
        <field column="text" name="NOTE_TEXT"/>
      </entity>
    </entity>
  </document>
</dataConfig>

I added these lines to the schema.xml file:

<field name="DOC_ID" type="string" indexed="true" omitNorms="true" 
stored="true"/>
<field name="DOC_TYPE" type="string" indexed="true" omitNorms="true" 
stored="true"/>

<field name="CODE_ID" type="string" indexed="true" omitNorms="true" 
stored="true"/>
<field name="CODE_ORIGIN" type="string" indexed="true" omitNorms="true" 
stored="true"/>
<field name="CODE_TYPE" type="string" indexed="true" omitNorms="true" 
stored="true"/>
<field name="CODE_VALUE" type="string" indexed="true" omitNorms="true" 
stored="true"/>

<field name="NOTE_ID" type="string" indexed="true" omitNorms="true" 
stored="true"/>
<field name="NOTE_ORIGIN" type="string" indexed="true" omitNorms="true" 
stored="true"/>
<field name="NOTE_TYPE" type="string" indexed="true" omitNorms="true" 
stored="true"/>
<field name="NOTE_TEXT" type="text_ws" indexed="true" omitNorms="true" 
stored="true"/>

...

<uniqueKey>DOC_ID</uniqueKey>
<defaultSearchField>NOTE_TEXT</defaultSearchField>

When I run the full-import operation, only the DOC_ID values are written to the 
index. When I run a program that dumps the index contents as an xml string, the 
output looks like this:

<?xml version="1.0" ?>
<documents>
  <document>
    <field name="DOC_ID" value="97634811">
    </field>
  </document>
  <document>
    <field name="DOC_ID" value="97634910">
    </field>
  </document>
...
</documents>

Since this is new to me, I am sure that I have simply left something out or 
specified something the wrong way, but I haven't been able to spot what I have 
been doing wrong when I have gone over the configuration files that I am using. 
Can anyone help me figure out why the other database contents are not being 
indexed?
Thanks,
Mike

Reply via email to