Hello again, I can index pdf using: *data-config.xml* <?xml version="1.0" encoding="utf-8"?> <dataConfig> <dataSource type="BinFileDataSource" name="binary" /> <document> <entity name="f" dataSource="binary" rootEntity="false" processor="FileListEntityProcessor" baseDir="../solr/docu/" fileName=".*pdf" recursive="true"> <entity name="tika" processor="TikaEntityProcessor" url="${f.fileAbsolutePath}" format="text"> <field column="id" name="id" meta="true" /> <field column="fake_id" name="fake_id" meta="true" /> <field column="model" name="model" meta="true" /> <field column="text" name="biog" /> </entity> </entity> </document> </dataConfig>
I can also index a database using: *data-config.xml* <?xml version="1.0" encoding="utf-8"?> <dataConfig> <dataSource type="JdbcDataSource" driver="com.mysql.jdbc.Driver" url="jdbc:mysql://127.0.0.1:3306/rental" user="root" password="1a2b3c4d" name="db" /> <dataSource type="BinFileDataSource" name="binary" /> <document> <entity name="members" dataSource="db" transformer="HTMLStripTransformer" query="select CONCAT('m_',id) as fake_id, id, firstname, lastname, biog, model from members"> <field column="id" name="id" /> <field column="fake_id" name="fake_id" /> <field column="firstname" name="firstname" stripHTML="true" /> <field column="lastname" name="lastname" stripHTML="true" /> <field column="biog" name="biog" stripHTML="true" /> <field column="model" name="model" stripHTML="true" /> </entity> <entity name="new_members" dataSource="db" transformer="HTMLStripTransformer" query="select CONCAT('nm_',id) as fake_id, id, firstname, lastname, biog, model from new_members"> <field column="id" name="id" /> <field column="fake_id" name="fake_id" /> <field column="firstname" name="firstname" stripHTML="true" /> <field column="lastname" name="lastname" stripHTML="true" /> <field column="biog" name="biog" stripHTML="true" /> <field column="model" name="model" stripHTML="true" /> </entity> <entity name="books" dataSource="db" transformer="HTMLStripTransformer" query="select CONCAT('b_',id) as fake_id, id, title, description, model from books"> <field column="id" name="id" /> <field column="fake_id" name="fake_id" /> <field column="title" name="title" stripHTML="true" /> <field column="description" name="biog" stripHTML="true" /> <field column="model" name="model" stripHTML="true" /> </entity> <entity name="journals" dataSource="db" transformer="HTMLStripTransformer" query="select CONCAT('j_',id) as fake_id, id, title, description, model from journals"> <field column="id" name="id" /> <field column="fake_id" name="fake_id" /> <field column="title" name="title" stripHTML="true" /> <field column="description" name="biog" stripHTML="true" /> <field column="model" name="model" stripHTML="true" /> </entity> <entity name="cds" dataSource="db" transformer="HTMLStripTransformer" query="select CONCAT('c_',id) as fake_id, id, title, description, model from cd"> <field column="id" name="id" /> <field column="fake_id" name="fake_id" /> <field column="title" name="title" stripHTML="true" /> <field column="description" name="biog" stripHTML="true" /> <field column="model" name="model" stripHTML="true" /> </entity> </document> </dataConfig> For the above I have: *schema.xml(fields)* <fields> <field name="id" type="string" indexed="true" stored="true" /> <field name="fake_id" type="string" indexed="true" stored="true" /> <field name="model" type="text_en" indexed="true" stored="true" /> <field name="firstname" type="text_en" indexed="true" stored="true"/> <field name="lastname" type="text_en" indexed="true" stored="true"/> <field name="title" type="text_en" indexed="true" stored="true"/> <field name="biog" type="text_en" indexed="true" stored="true"/> </fields> <uniqueKey>fake_id</uniqueKey> <defaultSearchField>biog</defaultSearchField> But when I am using the below data-config.xml indexing fails: *data-config.xml* <?xml version="1.0" encoding="utf-8"?> <dataConfig> <dataSource type="JdbcDataSource" driver="com.mysql.jdbc.Driver" url="jdbc:mysql://127.0.0.1:3306/rental" user="root" password="1a2b3c4d" name="db" /> <dataSource type="BinFileDataSource" name="binary" /> <document> <entity name="members" dataSource="db" transformer="HTMLStripTransformer" query="select CONCAT('m_',id) as fake_id, id, firstname, lastname, biog, model from members"> <field column="id" name="id" /> <field column="fake_id" name="fake_id" /> <field column="firstname" name="firstname" stripHTML="true" /> <field column="lastname" name="lastname" stripHTML="true" /> <field column="biog" name="biog" stripHTML="true" /> <field column="model" name="model" stripHTML="true" /> </entity> <entity name="new_members" dataSource="db" transformer="HTMLStripTransformer" query="select CONCAT('nm_',id) as fake_id, id, firstname, lastname, biog, model from new_members"> <field column="id" name="id" /> <field column="fake_id" name="fake_id" /> <field column="firstname" name="firstname" stripHTML="true" /> <field column="lastname" name="lastname" stripHTML="true" /> <field column="biog" name="biog" stripHTML="true" /> <field column="model" name="model" stripHTML="true" /> </entity> <entity name="books" dataSource="db" transformer="HTMLStripTransformer" query="select CONCAT('b_',id) as fake_id, id, title, description, model from books"> <field column="id" name="id" /> <field column="fake_id" name="fake_id" /> <field column="title" name="title" stripHTML="true" /> <field column="description" name="biog" stripHTML="true" /> <field column="model" name="model" stripHTML="true" /> </entity> <entity name="journals" dataSource="db" transformer="HTMLStripTransformer" query="select CONCAT('j_',id) as fake_id, id, title, description, model from journals"> <field column="id" name="id" /> <field column="fake_id" name="fake_id" /> <field column="title" name="title" stripHTML="true" /> <field column="description" name="biog" stripHTML="true" /> <field column="model" name="model" stripHTML="true" /> </entity> <entity name="cds" dataSource="db" transformer="HTMLStripTransformer" query="select CONCAT('c_',id) as fake_id, id, title, description, model from cd"> <field column="id" name="id" /> <field column="fake_id" name="fake_id" /> <field column="title" name="title" stripHTML="true" /> <field column="description" name="biog" stripHTML="true" /> <field column="model" name="model" stripHTML="true" /> </entity> <entity name="f" dataSource="binary" rootEntity="false" processor="FileListEntityProcessor" baseDir="../solr/docu/" fileName=".*pdf" recursive="true"> <entity name="tika" processor="TikaEntityProcessor" url="${f.fileAbsolutePath}" format="text"> <field column="id" name="id" meta="true" /> <field column="fake_id" name="fake_id" meta="true" /> <field column="model" name="model" meta="true" /> <field column="text" name="biog" /> </entity> </entity> </document> </dataConfig> *The log file is outputting:* SEVERE: Exception while processing: f document : null:org.apache.solr.handler.dataimport.DataImportHandlerException: Unable to execute query: C:\solr\tomcat\..\solr\docu\dinos.pdf Processing Document # 36 at org.apache.solr.handler.dataimport.DataImportHandlerException.wrapAndThrow(DataImportHandlerException.java:72) at org.apache.solr.handler.dataimport.JdbcDataSource$ResultSetIterator.<init>(JdbcDataSource.java:253) at org.apache.solr.handler.dataimport.JdbcDataSource.getData(JdbcDataSource.java:210) at org.apache.solr.handler.dataimport.JdbcDataSource.getData(JdbcDataSource.java:39) at org.apache.solr.handler.dataimport.TikaEntityProcessor.nextRow(TikaEntityProcessor.java:103) at org.apache.solr.handler.dataimport.EntityProcessorWrapper.pullRow(EntityProcessorWrapper.java:330) at org.apache.solr.handler.dataimport.EntityProcessorWrapper.nextRow(EntityProcessorWrapper.java:296) at org.apache.solr.handler.dataimport.DocBuilder.buildDocument(DocBuilder.java:683) at org.apache.solr.handler.dataimport.DocBuilder.buildDocument(DocBuilder.java:709) at org.apache.solr.handler.dataimport.DocBuilder.buildDocument(DocBuilder.java:619) at org.apache.solr.handler.dataimport.DocBuilder.doFullDump(DocBuilder.java:327) at org.apache.solr.handler.dataimport.DocBuilder.execute(DocBuilder.java:225) at org.apache.solr.handler.dataimport.DataImporter.doFullImport(DataImporter.java:375) at org.apache.solr.handler.dataimport.DataImporter.runCmd(DataImporter.java:445) at org.apache.solr.handler.dataimport.DataImporter$1.run(DataImporter.java:426) Caused by: com.mysql.jdbc.exceptions.jdbc4.MySQLSyntaxErrorException: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near 'C:\solr\tomcat\..\solr\docu\dinos.pdf' at line 1 at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method) at sun.reflect.NativeConstructorAccessorImpl.newInstance(Unknown Source) at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(Unknown Source) at java.lang.reflect.Constructor.newInstance(Unknown Source) at com.mysql.jdbc.Util.handleNewInstance(Util.java:411) at com.mysql.jdbc.Util.getInstance(Util.java:386) at com.mysql.jdbc.SQLError.createSQLException(SQLError.java:1052) at com.mysql.jdbc.MysqlIO.checkErrorPacket(MysqlIO.java:4096) at com.mysql.jdbc.MysqlIO.checkErrorPacket(MysqlIO.java:4028) at com.mysql.jdbc.MysqlIO.sendCommand(MysqlIO.java:2490) at com.mysql.jdbc.MysqlIO.sqlQueryDirect(MysqlIO.java:2651) at com.mysql.jdbc.ConnectionImpl.execSQL(ConnectionImpl.java:2677) at com.mysql.jdbc.ConnectionImpl.execSQL(ConnectionImpl.java:2627) at com.mysql.jdbc.StatementImpl.execute(StatementImpl.java:841) at com.mysql.jdbc.StatementImpl.execute(StatementImpl.java:681) at org.apache.solr.handler.dataimport.JdbcDataSource$ResultSetIterator.<init>(JdbcDataSource.java:246) ... 13 more Is it possible to index pdfs, docs, rtf along with database and having a single document? Thank in advance, Tom -- View this message in context: http://lucene.472066.n3.nabble.com/Is-it-possible-to-index-pdfs-and-database-into-single-document-tp3980761.html Sent from the Solr - User mailing list archive at Nabble.com.