I've added a unit test for the problem down below. It feeds document field data into the XPathEntityProcessor via the FieldReaderDataSource, and the XPath EP does not emit unpacked fields.
Running this under the debugger, I can see the supplied StringReader, with the XML string, being piped into the XPath EP. But somehow the XPath EP does not pick it apart the right way. Here is the DIH configuration file separately. <dataConfig> <dataSource type='FieldReaderDataSource' name='fc' /> <dataSource type='MockDataSource' name='db' /> <document> <entity name='db' query='select * from x' dataSource='db'> <field column='dbid' /> <field column='tag' /> <field column='blob' /> <entity name='unpack' dataSource='fc' processor='XPathEntityProcessor' forEach='/names' dataField='db.blob'> <field column='name' xpath='/names/name' /> </entity> </entity> </document> </dataConfig> Any ideas? --------------------------------------------------------------------------------------- package org.apache.solr.handler.dataimport; import static org.apache.solr.handler.dataimport.AbstractDataImportHandlerTest.createMap; import junit.framework.TestCase; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import org.apache.solr.common.SolrInputDocument; import org.apache.solr.common.SolrInputField; import org.apache.solr.handler.dataimport.TestDocBuilder.SolrWriterImpl; import org.junit.Test; /* * Demonstrate problem feeding XPathEntity from a FieldReaderDatasource */ public class TestFieldReaderXPath extends TestCase { static final String KISSINGER = "<names><name>Henry</name></names>"; static final String[][][] DBDOCS = { {{"dbid", "1"}, {"blob", KISSINGER}}, }; /* * Receive a row from SQL and fetch a row from Solr - no value matching * stolen from TestDocBuilder * */ @Test public void testSolrEmbedded() throws Exception { try { DataImporter di = new DataImporter(); di.loadDataConfig(dih_config_FR_into_XP); DataImporter.RequestParams rp = new DataImporter.RequestParams(); rp.command = "full-import"; rp.requestParams = new HashMap<String, Object>(); DataConfig cfg = di.getConfig(); DataConfig.Entity entity = cfg.document.entities.get(0); List<Map<String,Object>> l = new ArrayList<Map<String,Object>>(); addDBDocuments(l); MockDataSource.setIterator("select * from x", l.iterator()); entity.dataSrc = new MockDataSource(); entity.isDocRoot = true; SolrWriterImpl swi = new SolrWriterImpl(); di.runCmd(rp, swi); assertEquals(1, swi.docs.size()); SolrInputDocument doc = swi.docs.get(0); SolrInputField field; field = doc.getField("dbid"); assertEquals(field.getValue().toString(), "1"); field = doc.getField("blob"); assertEquals(field.getValue().toString(), KISSINGER); field = doc.getField("name"); assertNotNull(field); assertEquals(field.getValue().toString(), "Henry"); } finally { MockDataSource.clearCache(); } } private void addDBDocuments(List<Map<String, Object>> l) { for(String[][] dbdoc: DBDOCS) { l.add(createMap(dbdoc[0][0], dbdoc[0][1], dbdoc[1][0], dbdoc[1][1])); } } String dih_config_FR_into_XP = "<dataConfig>\r\n" + " <dataSource type='FieldReaderDataSource' name='fc' />\r\n" + " <dataSource type='MockDataSource' name='db' />\r\n" + " <document>\r\n" + " <entity name='db' query='select * from x' dataSource='db'>\r\n" + " <field column='dbid' />\r\n" + " <field column='tag' />\r\n" + " <field column='blob' />\r\n" + " <entity name='unpack' dataSource='fc' processor='XPathEntityProcessor'\r\n" + " forEach='/names' dataField='db.blob'>\r\n" + " <field column='name' xpath='/names/name' />\r\n" + " </entity>\r\n" + " </entity>\r\n" + " </document>\r\n" + "</dataConfig>\r\n" ; }