I am trying to index the contents of a database for the first time, and I am
only getting the primary key of the table represented by the top level entity
in my data-config.xml file to be indexed. The database I am starting with has
three tables:
The table called docs has columns called doc_id, type and last_modified. The
primary key is doc_id.
The table called codes has columns called id, doc_id, origin, type, code and
last_modified. The primary key is id. doc_id is a foreign key to the doc_id
column in the docs table.
The table called texts has columns called id, doc_id, origin, type, text and
last_modified. The primary key is id. doc_id is a foreign key to the doc_id
column in the docs table.
My data-config.xml file looks like this:
<dataConfig>
<dataSource type="JdbcDataSource" driver="com.mysql.jdbc.Driver"
url="jdbc:mysql://localhost:3306/bioscope" user="db_user"
password=""/>
<document name="bioscope">
<entity name="docs" pk="doc_id" query="SELECT doc_id, type FROM
bioscope.docs"
deltaQuery="SELECT doc_id FROM bioscope.docs where last_modified >
'${dataimporter.last_index_time}'">
<field column="doc_id" name="DOC_ID"/>
<field column="type" name="DOC_TYPE"/>
<entity name="codes" pk="id" query="SELECT id, origin, type, code FROM
bioscope.codes WHERE doc_id='${docs.doc_id}'"
deltaQuery="SELECT doc_id FROM bioscope.codes WHERE last_modified
> '${dataimporter.last_index_time}'"
parentDeltaQuery="SELECT doc_id from bioscope.docs WHERE
doc_id='${codes.doc_id}'">
<field column="id" name="CODE_ID"/>
<field column="doc_id" name="DOC_ID"/>
<field column="origin" name="CODE_ORIGIN"/>
<field column="type" name="CODE_TYPE"/>
<field column="code" name="CODE_VALUE"/>
</entity>
<entity name="notes" pk="id" query="SELECT id, origin, type, text FROM
bioscope.texts WHERE doc_id='${docs.doc_id}'"
deltaQuery="SELECT doc_id FROM bioscope.texts WHERE last_modified
> '${dataimporter.last_index_time}'"
parentDeltaQuery="SELECT doc_id from bioscope.docs WHERE
doc_id='${texts.doc_id}'">
<field column="id" name="NOTE_ID"/>
<field column="doc_id" name="DOC_ID"/>
<field column="origin" name="NOTE_ORIGIN"/>
<field column="type" name="NOTE_TYPE"/>
<field column="text" name="NOTE_TEXT"/>
</entity>
</entity>
</document>
</dataConfig>
I added these lines to the schema.xml file:
<field name="DOC_ID" type="string" indexed="true" omitNorms="true"
stored="true"/>
<field name="DOC_TYPE" type="string" indexed="true" omitNorms="true"
stored="true"/>
<field name="CODE_ID" type="string" indexed="true" omitNorms="true"
stored="true"/>
<field name="CODE_ORIGIN" type="string" indexed="true" omitNorms="true"
stored="true"/>
<field name="CODE_TYPE" type="string" indexed="true" omitNorms="true"
stored="true"/>
<field name="CODE_VALUE" type="string" indexed="true" omitNorms="true"
stored="true"/>
<field name="NOTE_ID" type="string" indexed="true" omitNorms="true"
stored="true"/>
<field name="NOTE_ORIGIN" type="string" indexed="true" omitNorms="true"
stored="true"/>
<field name="NOTE_TYPE" type="string" indexed="true" omitNorms="true"
stored="true"/>
<field name="NOTE_TEXT" type="text_ws" indexed="true" omitNorms="true"
stored="true"/>
...
<uniqueKey>DOC_ID</uniqueKey>
<defaultSearchField>NOTE_TEXT</defaultSearchField>
When I run the full-import operation, only the DOC_ID values are written to the
index. When I run a program that dumps the index contents as an xml string, the
output looks like this:
<?xml version="1.0" ?>
<documents>
<document>
<field name="DOC_ID" value="97634811">
</field>
</document>
<document>
<field name="DOC_ID" value="97634910">
</field>
</document>
...
</documents>
Since this is new to me, I am sure that I have simply left something out or
specified something the wrong way, but I haven't been able to spot what I have
been doing wrong when I have gone over the configuration files that I am using.
Can anyone help me figure out why the other database contents are not being
indexed?
Thanks,
Mike