I am using SOLR 7.7.2 and trying to index binary data that is stored in postgresql's large object feature (OID type / lo module) and not directly in the database. Is this possible? If so are there any examples of others configuring SOLR in this way?
Attached are my db-data-config and managed-schema files for reference. This same file works against an oracle database with the same data.
<dataConfig> <dataSource type="JdbcDataSource" name="ISO17025V3" driver="org.postgresql.Driver" url="jdbc:postgresql://IS-Config-DB:5432/ISO17025V3" batchSize="0" user="postgres" password="xxxxx" /> <!-- always needed regardless of database --> <dataSource name="fieldReader" type="FieldStreamDataSource" /> <!-- document and field definitions - DB_DOCUMENT table --> <uniqueKey>file_name</uniqueKey> <document> <entity name="root" query="select d.file_name, dbf.file_contents, d.file_label, d.version, d.dir_num from db_document d inner join db_files dbf on (d.file_name = dbf.original_file_name and d.version = dbf.document_version and d.revision_no = dbf.revision_no ) where dbf.file_contents is not null and dbf.parent_file_name is null and d.version = (select max(version) from db_document d2 where d.file_name = d2.file_name) order by d.file_label" deltaImportQuery="select d.file_name, dbf.file_contents, d.file_label, d.version, d.dir_num from db_document d inner join db_files dbf on (d.file_name = dbf.original_file_name and d.version = dbf.document_version and d.revision_no = dbf.revision_no ) where dbf.file_contents is not null and dbf.parent_file_name is null and d.version = (select max(version) from db_document d2 where d.file_name = d2.file_name) and d.file_name = '${dih.delta.file_name}' order by d.file_label" deltaQuery="select d.file_name as file_name from db_document d inner join db_files dbf on (d.file_name = dbf.original_file_name and d.version = dbf.document_version and d.revision_no = dbf.revision_no ) where dbf.file_contents is not null and dbf.parent_file_name is null and d.version = (select max(version) from db_document d2 where d.file_name = d2.file_name) and dbf.changed_on > to_date('${dataimporter.last_index_time}', 'YYYY-MM-DD HH:MI:SS') order by d.file_label" transformer="TemplateTransformer" onError="skip" dataSource="ISO17025V3"> <field column="file_name" name="file_name" /> <entity name="blob2" dataSource="fieldReader" processor="TikaEntityProcessor" dataField="root.file_contents" format="text" onError="skip" extractEmbedded="true"> </entity> </entity> </document> </dataConfig>