Hello again,
I can index pdf using:
*data-config.xml*
<?xml version="1.0" encoding="utf-8"?>
<dataConfig>
<dataSource type="BinFileDataSource" name="binary" />
        <document>
                <entity name="f" dataSource="binary" rootEntity="false"
processor="FileListEntityProcessor" baseDir="../solr/docu/" fileName=".*pdf"
recursive="true">
                        <entity name="tika" processor="TikaEntityProcessor" 
url="${f.fileAbsolutePath}" format="text">
                                <field column="id" name="id" meta="true" />
                                <field column="fake_id" name="fake_id" 
meta="true" />
                                <field column="model" name="model" meta="true" 
/>
                                <field column="text" name="biog" />
                        </entity>
                </entity>
        </document>  
</dataConfig> 

I can also index a database using:
*data-config.xml*
<?xml version="1.0" encoding="utf-8"?>

<dataConfig>

  <dataSource type="JdbcDataSource" 
              driver="com.mysql.jdbc.Driver"
              url="jdbc:mysql://127.0.0.1:3306/rental" 
              user="root" 
              password="1a2b3c4d"
                          name="db" />
                          
  <dataSource type="BinFileDataSource" name="binary" />                   
                          
  <document>
  
    <entity name="members" dataSource="db"
transformer="HTMLStripTransformer" query="select CONCAT('m_',id) as fake_id,
id, firstname, lastname, biog, model from members">
                <field column="id" name="id" /> 
                <field column="fake_id" name="fake_id" />
        <field column="firstname" name="firstname" stripHTML="true" />
        <field column="lastname" name="lastname" stripHTML="true" />
                <field column="biog" name="biog" stripHTML="true" />
                <field column="model" name="model" stripHTML="true"  />
    </entity>
        
        <entity name="new_members" dataSource="db"
transformer="HTMLStripTransformer" query="select CONCAT('nm_',id) as
fake_id, id, firstname, lastname, biog, model from new_members">
                <field column="id" name="id" />
                <field column="fake_id" name="fake_id" /> 
        <field column="firstname" name="firstname" stripHTML="true" />
        <field column="lastname" name="lastname" stripHTML="true" />
                <field column="biog" name="biog" stripHTML="true" />
                <field column="model" name="model" stripHTML="true" />
    </entity>
  
  
  <entity name="books" dataSource="db" transformer="HTMLStripTransformer"
query="select CONCAT('b_',id) as fake_id, id, title, description, model from
books">
                <field column="id" name="id" />
                <field column="fake_id" name="fake_id" /> 
        <field column="title" name="title" stripHTML="true" />
        <field column="description" name="biog" stripHTML="true" />
                <field column="model" name="model" stripHTML="true" />
    </entity>
  
  
  <entity name="journals" dataSource="db" transformer="HTMLStripTransformer"
query="select CONCAT('j_',id) as fake_id, id, title, description, model from
journals">
                <field column="id" name="id" />
                <field column="fake_id" name="fake_id" /> 
        <field column="title" name="title" stripHTML="true" />
        <field column="description" name="biog" stripHTML="true" />
                <field column="model" name="model" stripHTML="true" />
    </entity>
  
  
  <entity name="cds" dataSource="db" transformer="HTMLStripTransformer"
query="select CONCAT('c_',id) as fake_id, id, title, description, model from
cd">
                <field column="id" name="id" /> 
                <field column="fake_id" name="fake_id" />
        <field column="title" name="title" stripHTML="true" />
        <field column="description" name="biog" stripHTML="true" />
                <field column="model" name="model" stripHTML="true" />
    </entity>
</document>
</dataConfig>

For the above I have:
*schema.xml(fields)*
<fields>
<field  name="id" type="string" indexed="true" stored="true" /> 
  <field  name="fake_id" type="string" indexed="true" stored="true" /> 
  <field  name="model" type="text_en" indexed="true" stored="true"  />
  <field  name="firstname" type="text_en" indexed="true" stored="true"/>
  <field  name="lastname" type="text_en" indexed="true" stored="true"/>
  <field  name="title" type="text_en" indexed="true" stored="true"/>
  <field  name="biog" type="text_en" indexed="true" stored="true"/>
 </fields>
<uniqueKey>fake_id</uniqueKey>
<defaultSearchField>biog</defaultSearchField>



But when I am using the below data-config.xml indexing fails:

*data-config.xml*

<?xml version="1.0" encoding="utf-8"?>

<dataConfig>

  <dataSource type="JdbcDataSource" 
              driver="com.mysql.jdbc.Driver"
              url="jdbc:mysql://127.0.0.1:3306/rental" 
              user="root" 
              password="1a2b3c4d"
                          name="db" />
                          
  <dataSource type="BinFileDataSource" name="binary" />                   
                          
  <document>
  
    <entity name="members" dataSource="db"
transformer="HTMLStripTransformer" query="select CONCAT('m_',id) as fake_id,
id, firstname, lastname, biog, model from members">
                <field column="id" name="id" /> 
                <field column="fake_id" name="fake_id" />
        <field column="firstname" name="firstname" stripHTML="true" />
        <field column="lastname" name="lastname" stripHTML="true" />
                <field column="biog" name="biog" stripHTML="true" />
                <field column="model" name="model" stripHTML="true"  />
    </entity>
        
        <entity name="new_members" dataSource="db"
transformer="HTMLStripTransformer" query="select CONCAT('nm_',id) as
fake_id, id, firstname, lastname, biog, model from new_members">
                <field column="id" name="id" />
                <field column="fake_id" name="fake_id" /> 
        <field column="firstname" name="firstname" stripHTML="true" />
        <field column="lastname" name="lastname" stripHTML="true" />
                <field column="biog" name="biog" stripHTML="true" />
                <field column="model" name="model" stripHTML="true" />
    </entity>
  
  
  <entity name="books" dataSource="db" transformer="HTMLStripTransformer"
query="select CONCAT('b_',id) as fake_id, id, title, description, model from
books">
                <field column="id" name="id" />
                <field column="fake_id" name="fake_id" /> 
        <field column="title" name="title" stripHTML="true" />
        <field column="description" name="biog" stripHTML="true" />
                <field column="model" name="model" stripHTML="true" />
    </entity>
  
  
  <entity name="journals" dataSource="db" transformer="HTMLStripTransformer"
query="select CONCAT('j_',id) as fake_id, id, title, description, model from
journals">
                <field column="id" name="id" />
                <field column="fake_id" name="fake_id" /> 
        <field column="title" name="title" stripHTML="true" />
        <field column="description" name="biog" stripHTML="true" />
                <field column="model" name="model" stripHTML="true" />
    </entity>
  
  
  <entity name="cds" dataSource="db" transformer="HTMLStripTransformer"
query="select CONCAT('c_',id) as fake_id, id, title, description, model from
cd">
                <field column="id" name="id" /> 
                <field column="fake_id" name="fake_id" />
        <field column="title" name="title" stripHTML="true" />
        <field column="description" name="biog" stripHTML="true" />
                <field column="model" name="model" stripHTML="true" />
    </entity>
        
        
        <entity name="f" dataSource="binary" rootEntity="false"
processor="FileListEntityProcessor" baseDir="../solr/docu/" fileName=".*pdf"
recursive="true">
                        <entity name="tika" processor="TikaEntityProcessor" 
url="${f.fileAbsolutePath}" format="text">
                                <field column="id" name="id" meta="true" />
                                <field column="fake_id" name="fake_id" 
meta="true" />
                                <field column="model" name="model" meta="true" 
/>
                                <field column="text" name="biog" />
                        </entity>
                </entity>
        
  </document>   
</dataConfig>

*The log file is outputting:*

SEVERE: Exception while processing: f document :
null:org.apache.solr.handler.dataimport.DataImportHandlerException: Unable
to execute query: C:\solr\tomcat\..\solr\docu\dinos.pdf Processing Document
# 36
        at
org.apache.solr.handler.dataimport.DataImportHandlerException.wrapAndThrow(DataImportHandlerException.java:72)
        at
org.apache.solr.handler.dataimport.JdbcDataSource$ResultSetIterator.<init>(JdbcDataSource.java:253)
        at
org.apache.solr.handler.dataimport.JdbcDataSource.getData(JdbcDataSource.java:210)
        at
org.apache.solr.handler.dataimport.JdbcDataSource.getData(JdbcDataSource.java:39)
        at
org.apache.solr.handler.dataimport.TikaEntityProcessor.nextRow(TikaEntityProcessor.java:103)
        at
org.apache.solr.handler.dataimport.EntityProcessorWrapper.pullRow(EntityProcessorWrapper.java:330)
        at
org.apache.solr.handler.dataimport.EntityProcessorWrapper.nextRow(EntityProcessorWrapper.java:296)
        at
org.apache.solr.handler.dataimport.DocBuilder.buildDocument(DocBuilder.java:683)
        at
org.apache.solr.handler.dataimport.DocBuilder.buildDocument(DocBuilder.java:709)
        at
org.apache.solr.handler.dataimport.DocBuilder.buildDocument(DocBuilder.java:619)
        at
org.apache.solr.handler.dataimport.DocBuilder.doFullDump(DocBuilder.java:327)
        at
org.apache.solr.handler.dataimport.DocBuilder.execute(DocBuilder.java:225)
        at
org.apache.solr.handler.dataimport.DataImporter.doFullImport(DataImporter.java:375)
        at
org.apache.solr.handler.dataimport.DataImporter.runCmd(DataImporter.java:445)
        at
org.apache.solr.handler.dataimport.DataImporter$1.run(DataImporter.java:426)
Caused by: com.mysql.jdbc.exceptions.jdbc4.MySQLSyntaxErrorException: You
have an error in your SQL syntax; check the manual that corresponds to your
MySQL server version for the right syntax to use near
'C:\solr\tomcat\..\solr\docu\dinos.pdf' at line 1
        at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
        at sun.reflect.NativeConstructorAccessorImpl.newInstance(Unknown Source)
        at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(Unknown
Source)
        at java.lang.reflect.Constructor.newInstance(Unknown Source)
        at com.mysql.jdbc.Util.handleNewInstance(Util.java:411)
        at com.mysql.jdbc.Util.getInstance(Util.java:386)
        at com.mysql.jdbc.SQLError.createSQLException(SQLError.java:1052)
        at com.mysql.jdbc.MysqlIO.checkErrorPacket(MysqlIO.java:4096)
        at com.mysql.jdbc.MysqlIO.checkErrorPacket(MysqlIO.java:4028)
        at com.mysql.jdbc.MysqlIO.sendCommand(MysqlIO.java:2490)
        at com.mysql.jdbc.MysqlIO.sqlQueryDirect(MysqlIO.java:2651)
        at com.mysql.jdbc.ConnectionImpl.execSQL(ConnectionImpl.java:2677)
        at com.mysql.jdbc.ConnectionImpl.execSQL(ConnectionImpl.java:2627)
        at com.mysql.jdbc.StatementImpl.execute(StatementImpl.java:841)
        at com.mysql.jdbc.StatementImpl.execute(StatementImpl.java:681)
        at
org.apache.solr.handler.dataimport.JdbcDataSource$ResultSetIterator.<init>(JdbcDataSource.java:246)
        ... 13 more

Is it possible to index pdfs, docs, rtf along with database and having a
single document?

Thank in advance,
Tom




--
View this message in context: 
http://lucene.472066.n3.nabble.com/Is-it-possible-to-index-pdfs-and-database-into-single-document-tp3980761.html
Sent from the Solr - User mailing list archive at Nabble.com.

Reply via email to