Hi,
(The first version of this was rejected for spam).
I'm setting up a test instance of Solr, and keep running into the problem of
having Solr not work the way I think it should work. Specifically, the data I
want to go into the index isn't there after indexing. I'm extracting the data
from MSSQL via DataImportHandler, JDBC 4.0.
My data is set up that for every product ID there is one category
(hierarchical, but I'm not dealing with that ATM), a family, and a set of
attributes (which includes name, etc). After indexing, I get Category, Family,
and Product ID - but nothing from my attribute values (STRING_NAME, below) -
which is the most useful data.
Is there something wrong with my schema?
I thought it might be that the schema.xml file wasn't respecting the names I
assigned via the DataImportHandler; when I changed to the column names in the
schema.xml, I picked up Family and Category (previously, it was only product
ID).
I'm really banging my head against the wall at this point, so I'd appreciate
any help. My step will probably be to do a considerably more complicated
denormalization (in terms of the SQL), which would make the Solr end simpler
(but that has problems of its own).
Config information below.
Any help appreciated.
Thanks,
Michael
Data Config:
<dataConfig>
<dataSource driver="com.microsoft.sqlserver.jdbc.SQLServerDriver"
url="jdbc:sqlserver://localhost\DEVELOPMENT/Databases/data:1433" />
<document name="products">
<entity onError="continue" name="product" query="select
Product_ID,Category_ID from TB_Product">
<field column="PRODUCT_ID" name="pid" />
<field column="CATEGORY_ID" name="cid" />
<entity name="facets" query="select * from TB_PROD_SPECS where
PRODUCT_ID=${product.Product_ID}">
<field column="STRING_VALUE" />
<field column="NUMERIC_VALUE" />
<entity name="attributes" query="select
ATTRIBUTE_NAME,ATTRIBUTE_TYPE from TB_ATTRIBUTE where
ATTRIBUTE_ID=${facets.ATTRIBUTE_ID}">
<field column="Attribute_Name" name="Attribute Name" />
</entity>
</entity>
<entity name="category" query="select CATEGORY_NAME,PARENT_CATEGORY
from TB_CATEGORY where CATEGORY_ID='${product.Category_ID}'">
<field column="Category_Name" name="Category" />
<field column="Parent_Category" name="Parent Category" />
</entity>
<entity name="family_id" query="select FAMILY_ID from
TB_PROD_FAMILY where Product_ID = ${product.Product_ID}">
<entity name="family" query="select
FAMILY_Name,PARENT_FAMILY_ID,ROOT_FAMILY,CATEGORY_ID from TB_Family where
Family_ID = ${family_id.FAMILY_ID}">
<field column="FAMILY_NAME" name="Family" />
<field column="ROOT_FAMILY" name="Root Family" />
<field column="PARENT_FAMILY" name="Parent Family" />
<field column="Category_id" name="Category ID" />
</entity>
</entity>
</entity>
</document>
</dataConfig>
Schema:
<fields>
<field name="Product_ID" type="int" indexed="true" stored="true"
required="true" />
<field name="Family_NAME" type="textTight" indexed="true"
stored="false" multivalued="true"/>
<field name="Category_Name" type="textTight" indexed="true"
stored="true" multiValued="true" omitNorms="true" />
<field name="STRING_VALUE" type="textTight" indexed="true"
stored="false" multivalued="true"/>
<field name="ATTRIBUTE_NAME" type="textTight" indexed="true"
stored="false" multivalued="true"/>
<field name="text" type="text" indexed="true" stored="false"
multiValued="true"/>
<dynamicField name="*_i" type="string" indexed="true"
stored="true" multivalued="true"/>
</fields>
<uniqueKey>Product_ID</uniqueKey>
<defaultSearchField>text</defaultSearchField>
<solrQueryParser defaultOperator="OR"/>
<copyField source="*" dest="text"/>