That class seems to be in xercesImpl jar...probably is a dependency of tika or a required lib of the underlying parser used for that kind of document
Andrea On 21 Dec 2013 20:07, "sweety" <sweetyshind...@yahoo.com> wrote: > i am trying to index .docx file using solrj, i referred this link: > http://wiki.apache.org/solr/ContentStreamUpdateRequestExample > > My code is : > import java.io.File; > import java.io.IOException; > > import org.apache.solr.client.solrj.SolrServer; > import org.apache.solr.client.solrj.SolrServerException; > > import > org.apache.solr.client.solrj.request.AbstractUpdateRequest; > import org.apache.solr.client.solrj.response.QueryResponse; > import org.apache.solr.client.solrj.SolrQuery; > import org.apache.solr.client.solrj.impl.*; > import org.apache.solr.client.solrj.request.ContentStreamUpdateRequest; > public class rich_index { > > public static void main(String[] args) { > try { > //Solr cell can also index MS file (2003 version and 2007 > version) > types. > String fileName = > "C:\\solr\\document\\src\\test1\\contract.docx"; > //this will be unique Id used by Solr to index the file > contents. > String solrId = "contract.docx"; > > indexFilesSolrCell(fileName, solrId); > > } catch (Exception ex) { > System.out.println(ex.toString()); > } > } > > public static void indexFilesSolrCell(String fileName, String > solrId) > throws IOException, SolrServerException { > > String urlString = "http://localhost:8080/solr/document"; > SolrServer solr = new HttpSolrServer(urlString); > > ContentStreamUpdateRequest up = new > ContentStreamUpdateRequest("/update/extract"); > > up.addFile(new File(fileName), "text"); > > > up.setParam("literal.id", solrId); > up.setParam("uprefix", "ignored_"); > up.setParam("fmap.content", "contents"); > > up.setAction(AbstractUpdateRequest.ACTION.COMMIT, true, > true); > > solr.request(up); > > QueryResponse rsp = solr.query(new SolrQuery("*:*")); > > System.out.println(rsp); > } > } > > > > This is my logs: > Dec 22, 2013 12:27:58 AM > org.apache.solr.update.processor.LogUpdateProcessor > finish > INFO: [document] webapp=/solr path=/update/extract > > params={fmap.content=contents&waitSearcher=true&commit=true&uprefix=ignored_& > literal.id=contract.docx&wt=javabin&version=2&softCommit=false} > {} 0 0 > Dec 22, 2013 12:27:58 AM org.apache.solr.common.SolrException log > SEVERE: null:java.lang.RuntimeException: java.lang.NoClassDefFoundError: > *org/apache/xml/serialize/BaseMarkupSerializer* > at > > org.apache.solr.servlet.SolrDispatchFilter.sendError(SolrDispatchFilter.java:651) > at > > org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:364) > at > > org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:141) > at > > org.apache.catalina.core.ApplicationFilterChain.internalDoFilter(ApplicationFilterChain.java:243) > at > > org.apache.catalina.core.ApplicationFilterChain.doFilter(ApplicationFilterChain.java:210) > at > > org.apache.catalina.core.StandardWrapperValve.invoke(StandardWrapperValve.java:224) > at > > org.apache.catalina.core.StandardContextValve.invoke(StandardContextValve.java:169) > at > > org.apache.catalina.core.StandardHostValve.invoke(StandardHostValve.java:168) > at > > org.apache.catalina.valves.ErrorReportValve.invoke(ErrorReportValve.java:98) > at > org.apache.catalina.valves.AccessLogValve.invoke(AccessLogValve.java:928) > at > > org.apache.catalina.core.StandardEngineValve.invoke(StandardEngineValve.java:118) > at > org.apache.catalina.connector.CoyoteAdapter.service(CoyoteAdapter.java:407) > at > > org.apache.coyote.http11.AbstractHttp11Processor.process(AbstractHttp11Processor.java:987) > at > > org.apache.coyote.AbstractProtocol$AbstractConnectionHandler.process(AbstractProtocol.java:539) > at > > org.apache.tomcat.util.net.JIoEndpoint$SocketProcessor.run(JIoEndpoint.java:298) > > To resolve this i added xerces.jar in the build path,this has. > org/apache/xml/serialize/BaseMarkupSerializer class,but the error is not > resolved. > What is the problem?? > > > *Solrconfig:* > <requestHandler name="/update/extract" > class="solr.extraction.ExtractingRequestHandler" > > <lst name="defaults"> > <str name="map.Last-Modified">last_modified</str> > <str name="fmap.content">contents</str> > <str name="lowernames">true</str> > <str name="uprefix">ignored_</str> > > </lst> > </requestHandler> > > *scehma:* > <fields> > > <field name="doc_id" type="uuid" indexed="true" stored="true" default="NEW" > multiValued="false"/> > <field name="id" type="integer" indexed="true" stored="true" > required="true" > multiValued="false"/> > <field name="contents" type="text" indexed="true" stored="true" > multiValued="false"/> > <field name="author" type="title_text" indexed="true" stored="true" > multiValued="true"/> > <field name="title" type="title_text" indexed="true" stored="true"/> > <field name="date_modified" type="date" indexed="true" stored="true" > multivalued="true"/> > </fields> > > > > -- > View this message in context: > http://lucene.472066.n3.nabble.com/indexing-docx-using-solrj-tp4107737.html > Sent from the Solr - User mailing list archive at Nabble.com. >