i am trying to index .docx file using solrj, i referred this link:
http://wiki.apache.org/solr/ContentStreamUpdateRequestExample

My code is :
import java.io.File;
import java.io.IOException;

import org.apache.solr.client.solrj.SolrServer;
import org.apache.solr.client.solrj.SolrServerException;
            
            import org.apache.solr.client.solrj.request.AbstractUpdateRequest;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.impl.*;
import org.apache.solr.client.solrj.request.ContentStreamUpdateRequest;
public class rich_index {
         
             public static void main(String[] args) {
               try {
                 //Solr cell can also index MS file (2003 version and 2007 
version)
types.
                 String fileName = 
"C:\\solr\\document\\src\\test1\\contract.docx"; 
                 //this will be unique Id used by Solr to index the file 
contents.
                String solrId = "contract.docx"; 
                
                indexFilesSolrCell(fileName, solrId);
                
              } catch (Exception ex) {
                System.out.println(ex.toString());
              }
            }
             
           public static void indexFilesSolrCell(String fileName, String 
solrId) 
               throws IOException, SolrServerException {
               
               String urlString = "http://localhost:8080/solr/document";; 
               SolrServer solr = new HttpSolrServer(urlString);
               
               ContentStreamUpdateRequest up  = new
ContentStreamUpdateRequest("/update/extract");
               
               up.addFile(new File(fileName), "text");
               
               
                up.setParam("literal.id", solrId);
               up.setParam("uprefix", "ignored_");
               up.setParam("fmap.content", "contents");
               
               up.setAction(AbstractUpdateRequest.ACTION.COMMIT, true, true);
               
               solr.request(up);
               
               QueryResponse rsp = solr.query(new SolrQuery("*:*"));
               
               System.out.println(rsp);
             }  
}



This is my logs:
Dec 22, 2013 12:27:58 AM org.apache.solr.update.processor.LogUpdateProcessor
finish
INFO: [document] webapp=/solr path=/update/extract
params={fmap.content=contents&waitSearcher=true&commit=true&uprefix=ignored_&literal.id=contract.docx&wt=javabin&version=2&softCommit=false}
{} 0 0
Dec 22, 2013 12:27:58 AM org.apache.solr.common.SolrException log
SEVERE: null:java.lang.RuntimeException: java.lang.NoClassDefFoundError:
*org/apache/xml/serialize/BaseMarkupSerializer*
        at
org.apache.solr.servlet.SolrDispatchFilter.sendError(SolrDispatchFilter.java:651)
        at
org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:364)
        at
org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:141)
        at
org.apache.catalina.core.ApplicationFilterChain.internalDoFilter(ApplicationFilterChain.java:243)
        at
org.apache.catalina.core.ApplicationFilterChain.doFilter(ApplicationFilterChain.java:210)
        at
org.apache.catalina.core.StandardWrapperValve.invoke(StandardWrapperValve.java:224)
        at
org.apache.catalina.core.StandardContextValve.invoke(StandardContextValve.java:169)
        at
org.apache.catalina.core.StandardHostValve.invoke(StandardHostValve.java:168)
        at
org.apache.catalina.valves.ErrorReportValve.invoke(ErrorReportValve.java:98)
        at
org.apache.catalina.valves.AccessLogValve.invoke(AccessLogValve.java:928)
        at
org.apache.catalina.core.StandardEngineValve.invoke(StandardEngineValve.java:118)
        at
org.apache.catalina.connector.CoyoteAdapter.service(CoyoteAdapter.java:407)
        at
org.apache.coyote.http11.AbstractHttp11Processor.process(AbstractHttp11Processor.java:987)
        at
org.apache.coyote.AbstractProtocol$AbstractConnectionHandler.process(AbstractProtocol.java:539)
        at
org.apache.tomcat.util.net.JIoEndpoint$SocketProcessor.run(JIoEndpoint.java:298)

To resolve this i added xerces.jar in the build path,this has.
org/apache/xml/serialize/BaseMarkupSerializer class,but the error is not
resolved.
What is the problem??


*Solrconfig:*
<requestHandler name="/update/extract" 
class="solr.extraction.ExtractingRequestHandler" >
<lst name="defaults">
<str name="map.Last-Modified">last_modified</str>
<str name="fmap.content">contents</str>
<str name="lowernames">true</str>
<str name="uprefix">ignored_</str>

</lst>
</requestHandler>

*scehma:*
<fields> 

<field name="doc_id" type="uuid" indexed="true" stored="true" default="NEW"
multiValued="false"/>
<field name="id" type="integer" indexed="true" stored="true" required="true"
multiValued="false"/>
<field name="contents" type="text" indexed="true" stored="true"
multiValued="false"/>
<field name="author" type="title_text" indexed="true" stored="true"
multiValued="true"/>
<field name="title" type="title_text" indexed="true" stored="true"/>
<field name="date_modified" type="date" indexed="true" stored="true"
multivalued="true"/>
</fields>



--
View this message in context: 
http://lucene.472066.n3.nabble.com/indexing-docx-using-solrj-tp4107737.html
Sent from the Solr - User mailing list archive at Nabble.com.

Reply via email to