hi,
   I installed tika and made its jar files into solr home library and also
gave the path to the tika configuration file. But the error is same.  the
tika config file is as follows:::


<?xml version="1.0" encoding="UTF-8"?>
<properties>
<mimeTypeRepository
resource="/opt/tika-0.7/tika-core/target/classes/org/apache/tika/mime/tika-mimetypes.xml"
magic="false"/>

<parsers>
     <parser name="text-xml" class="org.apache.tika.parser.xml.XMLParser">
     <namespace>http://purl.org/dc/elements/1.1/</namespace>
     <mime>application/xml</mime>
    <extract>
     <content name="title" xpathSelect="//dc:title"/>
     <content name="subject" xpathSelect="//dc:subject"/>
    <content name="creator" xpathSelect="//dc:creator"/>
     <content name="description" xpathSelect="//dc:description"/>
     <content name="publisher" xpathSelect="//dc:publisher"/>
     <content name="contributor" xpathSelect="//dc:contributor"/>
     <content name="type" xpathSelect="//dc:type"/>
     <content name="format" xpathSelect="//dc:format"/>
     <content name="identifier" xpathSelect="//dc:identifier"/>
     <content name="language" xpathSelect="//dc:language"/>
     <content name="rights" xpathSelect="//dc:rights"/>
     <content name="outLinks">
     <regexSelect>
     <![CDATA[

([A-Za-z][A-Za-z0-9+.-]{1,120}:[A-Za-z0-9/](([A-Za-z0-9$_.+!*,;/?:@&~=-])|%[A-Fa-f0-9]{2}){1,333}(#([a-zA-Z0-9][a-zA-Z0-9$_.+!*,;/?:@&~=%-]{0,1000}))?)
     ]]>
     </regexSelect>
     </content>
     </extract>
     </parser>
     <parser name="parse-msword"
class="org.apache.tika.parser.msword.MsWordParser">

     <mime>application/msword</mime>
     <extract>
     <content name="fullText" textSelect="fullText"/>
     <content name="outLinks">
     <regexSelect>
     <![CDATA[

([A-Za-z][A-Za-z0-9+.-]{1,120}:[A-Za-z0-9/](([A-Za-z0-9$_.+!*,;/?:@&~=-])|%[A-Fa-f0-9]{2}){1,333}(#([a-zA-Z0-9][a-zA-Z0-9$_.+!*,;/?:@&~=%-]{0,1000}))?)
     ]]>
     </regexSelect>
     </content>
     </extract>

     </parser>
     <parser name="parse-msexcel"
class="org.apache.tika.parser.msexcel.MsExcelParser">

     <mime>application/vnd.ms-excel</mime>
     <extract>
     <content name="fullText" textSelect="fullText"/>
     <content name="outLinks">
     <regexSelect>
     <![CDATA[

([A-Za-z][A-Za-z0-9+.-]{1,120}:[A-Za-z0-9/](([A-Za-z0-9$_.+!*,;/?:@&~=-])|%[A-Fa-f0-9]{2}){1,333}(#([a-zA-Z0-9][a-zA-Z0-9$_.+!*,;/?:@&~=%-]{0,1000}))?)
     ]]>
     </regexSelect>
     </content>
     </extract>

     </parser>
     <parser name="parse-mspowerpoint"
class="org.apache.tika.parser.mspowerpoint.MsPowerPointParser">

     <mime>application/vnd.ms-powerpoint</mime>
     <extract>
     <content name="fullText" textSelect="fullText"/>
     <content name="title" textSelect="title"/>
     <content name="author" textSelect="author"/>
     <content name="subject" textSelect="subject"/>
     <content name="outLinks">
     <regexSelect>
     <![CDATA[

([A-Za-z][A-Za-z0-9+.-]{1,120}:[A-Za-z0-9/](([A-Za-z0-9$_.+!*,;/?:@&~=-])|%[A-Fa-f0-9]{2}){1,333}(#([a-zA-Z0-9][a-zA-Z0-9$_.+!*,;/?:@&~=%-]{0,1000}))?)
     ]]>
     </regexSelect>
     </content>
     </extract>

     </parser>
     <parser name="parse-html"
class="org.apache.tika.parser.html.HtmlParser">

    <mime>text/html</mime>
     <mime>application/x-asp</mime>
     <extract>
     <content name="fullText" textSelect="fullText"/>
     <content name="title" textSelect="title"/>
     <content name="outLinks">
     <regexSelect>
     <![CDATA[

([A-Za-z][A-Za-z0-9+.-]{1,120}:[A-Za-z0-9/](([A-Za-z0-9$_.+!*,;/?:@&~=-])|%[A-Fa-f0-9]{2}){1,333}(#([a-zA-Z0-9][a-zA-Z0-9$_.+!*,;/?:@&~=%-]{0,1000}))?)
     ]]>
     </regexSelect>
     </content>
     </extract>

     </parser>
     <!--

     <parser name="parse-html"
class="org.apache.tika.parser.html.NekoHtmlParser">

     <mime>text/html</mime>
     <mime>application/x-asp</mime>
     .....
     <extract>
     <content name="fullText" xpathSelect="//*"/>
     <content name="title" xpathSelect="//title"/>
     <content name="outLinks">
     <regexSelect>
     <![CDATA[

([A-Za-z][A-Za-z0-9+.-]{1,120}:[A-Za-z0-9/](([A-Za-z0-9$_.+!*,;/?:@&~=-])|%[A-Fa-f0-9]{2}){1,333}(#([a-zA-Z0-9][a-zA-Z0-9$_.+!*,;/?:@&~=%-]{0,1000}))?)
     ]]>
     </regexSelect>
     </content>
     </extract>

     </parser>

     -->
     <parser mame="parse-rtf" class="org.apache.tika.parser.rtf.RTFParser">

     <mime>application/rtf</mime>
     <extract>
     <content name="fullText" textSelect="fullText"/>
     <content name="outLinks">
     <regexSelect>
     <![CDATA[

([A-Za-z][A-Za-z0-9+.-]{1,120}:[A-Za-z0-9/](([A-Za-z0-9$_.+!*,;/?:@&~=-])|%[A-Fa-f0-9]{2}){1,333}(#([a-zA-Z0-9][a-zA-Z0-9$_.+!*,;/?:@&~=%-]{0,1000}))?)
     ]]>
     </regexSelect>
     </content>
     </extract>

     </parser>
     <parser name="parse-pdf" class="org.apache.tika.parser.pdf.PDFParser">

     <mime>application/pdf</mime>
     <extract>
     <content name="fullText" textSelect="fullText"/>
     <content name="title" textSelect="title"/>
     <content name="author" textSelect="author"/>
     <content name="creator" textSelect="creator"/>
     <content name="summary" textSelect="summary"/>
     <content name="keywords" textSelect="keywords"/>
     <content name="producer" textSelect="producer"/>
    <content name="subject" textSelect="subject"/>
     <content name="trapped" textSelect="trapped"/>
     <content name="creationDate" textSelect="creationDate"/>
     <content name="modificationDate" textSelect="modificationDate"/>
     <content name="outLinks">
     <regexSelect><![CDATA[

([A-Za-z][A-Za-z0-9+.-]{1,120}:[A-Za-z0-9/](([A-Za-z0-9$_.+!*,;/?:@&~=-])|%[A-Fa-f0-9]{2}){1,333}(#([a-zA-Z0-9][a-zA-Z0-9$_.+!*,;/?:@&~=%-]{0,1000}))?)
     ]]></regexSelect>
     </content>
     </extract>

     </parser>
     <parser name="parse-txt" class="org.apache.tika.parser.txt.TXTParser">

     <mime>text/plain</mime>
     <extract>
     <content name="fullText" textSelect="fullText"/>
     <content name="outLinks">
     <regexSelect>
     <![CDATA[

([A-Za-z][A-Za-z0-9+.-]{1,120}:[A-Za-z0-9/](([A-Za-z0-9$_.+!*,;/?:@&~=-])|%[A-Fa-f0-9]{2}){1,333}(#([a-zA-Z0-9][a-zA-Z0-9$_.+!*,;/?:@&~=%-]{0,1000}))?)
     ]]>
     </regexSelect>
     </content>
     </extract>

</parser>
     <parser name="parse-openoffice"
class="org.apache.tika.parser.opendocument.OpenOfficeParser">
     <mime>application/vnd.sun.xml.writer</mime>
    <mime>application/vnd.oasis.opendocument.text</mime>
     <extract>
     <content name="title" xpathSelect="//dc:title"/>
    <content name="subject" xpathSelect="//dc:subject"/>
     <content name="keyword" xpathSelect="//meta:keyword"/>
     <content name="creator" xpathSelect="//dc:creator"/>
     <content name="description" xpathSelect="//dc:description"/>
     <content name="date" xpathSelect="//dc:date"/>
     <content name="language" xpathSelect="//dc:language"/>
    <content name="nbTab"
xpathSelect="//meta:document-statistic/@meta:table-count"/>
    <content name="nbObject"
xpathSelect="//meta:document-statistic/@meta:object-count"/>
    <content name="nbImg"
xpathSelect="//meta:document-statistic/@meta:image-count"/>
     <content name="nbPage"
xpathSelect="//meta:document-statistic/@meta:page-count"/>
     <content name="nbPara"
xpathSelect="//meta:document-statistic/@meta:paragraph-count"/>
    <content name="nbWord"
xpathSelect="//meta:document-statistic/@meta:word-count"/>
     <content name="nbcharacter"
xpathSelect="//meta:document-statistic/@meta:character-count"/>
     <content name="fullText" xpathSelect="//office:body//*"/>
    <content name="outLinks">
     <regexSelect>
    <![CDATA[

([A-Za-z][A-Za-z0-9+.-]{1,120}:[A-Za-z0-9/](([A-Za-z0-9$_.+!*,;/?:@&~=-])|%[A-Fa-f0-9]{2}){1,333}(#([a-zA-Z0-9][a-zA-Z0-9$_.+!*,;/?:@&~=%-]{0,1000}))?)
     ]]>
     </regexSelect>
    </content>
     </extract>
     </parser>
     </parsers>
    </properties>


with regards,
swaroop

Reply via email to