i am trying to index .docx file using solrj, i referred this link: http://wiki.apache.org/solr/ContentStreamUpdateRequestExample
My code is : import java.io.File; import java.io.IOException; import org.apache.solr.client.solrj.SolrServer; import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.client.solrj.request.AbstractUpdateRequest; import org.apache.solr.client.solrj.response.QueryResponse; import org.apache.solr.client.solrj.SolrQuery; import org.apache.solr.client.solrj.impl.*; import org.apache.solr.client.solrj.request.ContentStreamUpdateRequest; public class rich_index { public static void main(String[] args) { try { //Solr cell can also index MS file (2003 version and 2007 version) types. String fileName = "C:\\solr\\document\\src\\test1\\contract.docx"; //this will be unique Id used by Solr to index the file contents. String solrId = "contract.docx"; indexFilesSolrCell(fileName, solrId); } catch (Exception ex) { System.out.println(ex.toString()); } } public static void indexFilesSolrCell(String fileName, String solrId) throws IOException, SolrServerException { String urlString = "http://localhost:8080/solr/document"; SolrServer solr = new HttpSolrServer(urlString); ContentStreamUpdateRequest up = new ContentStreamUpdateRequest("/update/extract"); up.addFile(new File(fileName), "text"); up.setParam("literal.id", solrId); up.setParam("uprefix", "ignored_"); up.setParam("fmap.content", "contents"); up.setAction(AbstractUpdateRequest.ACTION.COMMIT, true, true); solr.request(up); QueryResponse rsp = solr.query(new SolrQuery("*:*")); System.out.println(rsp); } } This is my logs: Dec 22, 2013 12:27:58 AM org.apache.solr.update.processor.LogUpdateProcessor finish INFO: [document] webapp=/solr path=/update/extract params={fmap.content=contents&waitSearcher=true&commit=true&uprefix=ignored_&literal.id=contract.docx&wt=javabin&version=2&softCommit=false} {} 0 0 Dec 22, 2013 12:27:58 AM org.apache.solr.common.SolrException log SEVERE: null:java.lang.RuntimeException: java.lang.NoClassDefFoundError: *org/apache/xml/serialize/BaseMarkupSerializer* at org.apache.solr.servlet.SolrDispatchFilter.sendError(SolrDispatchFilter.java:651) at org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:364) at org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:141) at org.apache.catalina.core.ApplicationFilterChain.internalDoFilter(ApplicationFilterChain.java:243) at org.apache.catalina.core.ApplicationFilterChain.doFilter(ApplicationFilterChain.java:210) at org.apache.catalina.core.StandardWrapperValve.invoke(StandardWrapperValve.java:224) at org.apache.catalina.core.StandardContextValve.invoke(StandardContextValve.java:169) at org.apache.catalina.core.StandardHostValve.invoke(StandardHostValve.java:168) at org.apache.catalina.valves.ErrorReportValve.invoke(ErrorReportValve.java:98) at org.apache.catalina.valves.AccessLogValve.invoke(AccessLogValve.java:928) at org.apache.catalina.core.StandardEngineValve.invoke(StandardEngineValve.java:118) at org.apache.catalina.connector.CoyoteAdapter.service(CoyoteAdapter.java:407) at org.apache.coyote.http11.AbstractHttp11Processor.process(AbstractHttp11Processor.java:987) at org.apache.coyote.AbstractProtocol$AbstractConnectionHandler.process(AbstractProtocol.java:539) at org.apache.tomcat.util.net.JIoEndpoint$SocketProcessor.run(JIoEndpoint.java:298) To resolve this i added xerces.jar in the build path,this has. org/apache/xml/serialize/BaseMarkupSerializer class,but the error is not resolved. What is the problem?? *Solrconfig:* <requestHandler name="/update/extract" class="solr.extraction.ExtractingRequestHandler" > <lst name="defaults"> <str name="map.Last-Modified">last_modified</str> <str name="fmap.content">contents</str> <str name="lowernames">true</str> <str name="uprefix">ignored_</str> </lst> </requestHandler> *scehma:* <fields> <field name="doc_id" type="uuid" indexed="true" stored="true" default="NEW" multiValued="false"/> <field name="id" type="integer" indexed="true" stored="true" required="true" multiValued="false"/> <field name="contents" type="text" indexed="true" stored="true" multiValued="false"/> <field name="author" type="title_text" indexed="true" stored="true" multiValued="true"/> <field name="title" type="title_text" indexed="true" stored="true"/> <field name="date_modified" type="date" indexed="true" stored="true" multivalued="true"/> </fields> -- View this message in context: http://lucene.472066.n3.nabble.com/indexing-docx-using-solrj-tp4107737.html Sent from the Solr - User mailing list archive at Nabble.com.