i use dih to create index on oracle table
data table:tab_post_content
---id-----content(varchar2 4000)------: for example
0001      i have an ipad and Seagate hd 

industry word table: tab_iword
--code-----word-----: for example
01/00/00  computer
01/01/00  cpu
01/01/01  intel
01/01/02  amd
01/02/00  hard disk
01/02/01  Seagate
01/02/02  westwood
02/00/00  mobile
02/01/00  phone
02/01/01  sumsung
02/01/00  iphone
02/02/00  pad
02/02/01  ipad
02/02/02  xpad


schema.xml snippet

<fieldType name="iword_tag" class="solr.TextField"> 
<analyzer type="index">
    <tokenizer class="solr.IwordTokenizerFactory"
tab_iword="tab_iword.txt"/>
</analyzer>
<analyzer type="query">
    <tokenizer class="solr.KeywordTokenizerFactory"/>
</analyzer>
</fieldType>
</types>

<field name="id" type="string" indexed="true" stored="true"
required="true"/>
<field name="content" type="text" indexed="true" stored="true"
required="false"/>
<field name="iword" type="iword_tag" indexed="true" stored="true"
required="false" multiValued="true"/>

dih-db-config.xml entity snippet
<entity name="test" query="select id,content,content as iword from
tab_post_content" transformer="RegexTransformer,..">
    <field column="id" name="id" />
    <field column="content" name="content" />
    <field column="iword" name="iword"/>
</entity>

how to write custom code IwordTokenizerFactory?
the solr index will return is like this
<doc>
    <str name="id">0001</str>
    <str name="content">i have an ipad and Seagate hd</str>
    <arr name="iword">
      <str>02/02/01</str>
      <str>01/02/01</str>
    </arr>
<doc>

thanks.



--
View this message in context: 
http://lucene.472066.n3.nabble.com/industry-word-extract-tp4041069.html
Sent from the Solr - User mailing list archive at Nabble.com.

Reply via email to