I am using SOLR 7.7.2  and trying to index binary data that is stored in
postgresql's large object feature (OID type / lo module) and not directly
in the database.  Is this possible?  If so are there any examples of others
configuring SOLR in this way?

Attached are my db-data-config and managed-schema files for reference.
This same file works against an oracle database with the same data.
<dataConfig>


  <dataSource 
	type="JdbcDataSource"
	name="ISO17025V3"
	driver="org.postgresql.Driver" 
	url="jdbc:postgresql://IS-Config-DB:5432/ISO17025V3"
	batchSize="0" 
	user="postgres" 
	password="xxxxx"
  />



<!-- always needed regardless of database -->
  <dataSource name="fieldReader"
    type="FieldStreamDataSource"
  />


<!-- document and field definitions - DB_DOCUMENT table  -->
  <uniqueKey>file_name</uniqueKey> 
  <document>
    <entity
      name="root"
		query="select d.file_name, dbf.file_contents, d.file_label, d.version, d.dir_num
			from db_document d 
			inner join db_files dbf on (d.file_name = dbf.original_file_name and d.version = dbf.document_version and d.revision_no = dbf.revision_no )
			where dbf.file_contents is not null and dbf.parent_file_name is null
			and d.version = (select max(version) from db_document d2 where d.file_name = d2.file_name)
			order by d.file_label"
		deltaImportQuery="select d.file_name, dbf.file_contents, d.file_label, d.version, d.dir_num
			from db_document d 
			inner join db_files dbf on (d.file_name = dbf.original_file_name and d.version = dbf.document_version and d.revision_no = dbf.revision_no )
			where dbf.file_contents is not null and dbf.parent_file_name is null
			and d.version = (select max(version) from db_document d2 where d.file_name = d2.file_name) and d.file_name = '${dih.delta.file_name}'
			order by d.file_label"
		deltaQuery="select d.file_name as file_name
			from db_document d 
			inner join db_files dbf on (d.file_name = dbf.original_file_name and d.version = dbf.document_version and d.revision_no = dbf.revision_no )
			where dbf.file_contents is not null and dbf.parent_file_name is null
			and d.version = (select max(version) from db_document d2 where d.file_name = d2.file_name) and dbf.changed_on > to_date('${dataimporter.last_index_time}', 'YYYY-MM-DD HH:MI:SS')
			order by d.file_label"

      transformer="TemplateTransformer"
      onError="skip"
      dataSource="ISO17025V3">
        <field column="file_name" name="file_name" />
		

	<entity
        name="blob2"
        dataSource="fieldReader"
		processor="TikaEntityProcessor"
		dataField="root.file_contents" format="text" onError="skip"  extractEmbedded="true">
	</entity>
	  </entity>
    </document>


</dataConfig>

Reply via email to