http://wiki.apache.org/solr/post.jar
-- Jan Høydahl, search solution architect Cominvent AS - www.cominvent.com Solr Training - www.solrtraining.com 26. apr. 2013 kl. 13:28 skrev Furkan KAMACI <furkankam...@gmail.com>: > Hi Raymond; > > Now I get that error: SimplePostTool: WARNING: IOException while reading > response: java.io.FileNotFoundException: > > 2013/4/26 Raymond Wiker <rwi...@gmail.com> > >> You could start by doing >> >> java post.jar -help >> >> --- the 7th example shows exactly what you need to do to add a document id. >> >> On Fri, Apr 26, 2013 at 11:30 AM, Furkan KAMACI <furkankam...@gmail.com >>> wrote: >> >>> I use Solr 4.2.1 and these are my fields: >>> >>> <field name="id" type="string" indexed="true" stored="true" >> required="true" >>> multiValued="false" /> >>> <field name="text" type="text_general" indexed="true" stored="true"/> >>> >>> >>> <!-- Common metadata fields, named specifically to match up with >>> SolrCell metadata when parsing rich documents such as Word, PDF. >>> Some fields are multiValued only because Tika currently may return >>> multiple values for them. Some metadata is parsed from the documents, >>> but there are some which come from the client context: >>> "content_type": From the HTTP headers of incoming stream >>> "resourcename": From SolrCell request param resource.name >>> --> >>> <field name="title" type="text_general" indexed="true" stored="true" >>> multiValued="true"/> >>> <field name="subject" type="text_general" indexed="true" stored="true"/> >>> <field name="description" type="text_general" indexed="true" >>> stored="true"/> >>> <field name="comments" type="text_general" indexed="true" stored="true"/> >>> <field name="author" type="text_general" indexed="true" stored="true"/> >>> <field name="keywords" type="text_general" indexed="true" stored="true"/> >>> <field name="category" type="text_general" indexed="true" stored="true"/> >>> <field name="resourcename" type="text_general" indexed="true" >>> stored="true"/> >>> <field name="url" type="text_general" indexed="true" stored="true"/> >>> <field name="content_type" type="string" indexed="true" stored="true" >>> multiValued="true"/> >>> <field name="last_modified" type="date" indexed="true" stored="true"/> >>> <field name="links" type="string" indexed="true" stored="true" >>> multiValued="true"/> >>> >>> <!-- Main body of document extracted by SolrCell. >>> NOTE: This field is not indexed by default, since it is also copied to >>> "text" >>> using copyField below. This is to save space. Use this field for >> returning >>> and >>> highlighting document content. Use the "text" field to search the >> content. >>> --> >>> <field name="content" type="text_general" indexed="false" stored="true" >>> multiValued="true"/> >>> >>> >>> <!-- catchall field, containing all other searchable text fields >>> (implemented >>> via copyField further on in this schema --> >>> <!-- >>> <field name="text" type="text_general" indexed="true" stored="false" >>> multiValued="true"/> >>> --> >>> <!-- catchall text field that indexes tokens both normally and in reverse >>> for efficient >>> leading wildcard queries. --> >>> <field name="text_rev" type="text_general_rev" indexed="true" >>> stored="false" multiValued="true"/> >>> >>> <!-- non-tokenized version of manufacturer to make it easier to sort or >>> group >>> results by manufacturer. copied from "manu" via copyField --> >>> <field name="manu_exact" type="string" indexed="true" stored="false"/> >>> >>> <field name="payloads" type="payloads" indexed="true" stored="true"/> >>> >>> <field name="_version_" type="long" indexed="true" stored="true"/> >>> >>> I run that command: >>> >>> java -Durl=http://localhost:8983/solr/update/extract -jar post.jar >>> 523387.pdf >>> >>> However I get that error, any ideas? >>> >>> Apr 26, 2013 12:26:51 PM org.apache.solr.common.SolrException log >>> SEVERE: org.apache.solr.common.SolrException: Document is missing >> mandatory >>> uniqueKey field: id >>> at >>> >>> >> org.apache.solr.update.AddUpdateCommand.getIndexedId(AddUpdateCommand.java:88) >>> at >>> >>> >> org.apache.solr.update.processor.DistributedUpdateProcessor.versionAdd(DistributedUpdateProcessor.java:464) >>> at >>> >>> >> org.apache.solr.update.processor.DistributedUpdateProcessor.processAdd(DistributedUpdateProcessor.java:346) >>> at >>> >>> >> org.apache.solr.update.processor.LogUpdateProcessor.processAdd(LogUpdateProcessorFactory.java:100) >>> at >>> >>> >> org.apache.solr.handler.extraction.ExtractingDocumentLoader.doAdd(ExtractingDocumentLoader.java:121) >>> at >>> >>> >> org.apache.solr.handler.extraction.ExtractingDocumentLoader.addDoc(ExtractingDocumentLoader.java:126) >>> at >>> >>> >> org.apache.solr.handler.extraction.ExtractingDocumentLoader.load(ExtractingDocumentLoader.java:228) >>> at >>> >>> >> org.apache.solr.handler.ContentStreamHandlerBase.handleRequestBody(ContentStreamHandlerBase.java:74) >>> at >>> >>> >> org.apache.solr.handler.RequestHandlerBase.handleRequest(RequestHandlerBase.java:135) >>> at org.apache.solr.core.SolrCore.execute(SolrCore.java:1817) >>> at >>> >>> >> org.apache.solr.servlet.SolrDispatchFilter.execute(SolrDispatchFilter.java:639) >>> at >>> >>> >> org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:345) >>> at >>> >>> >> org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:141) >>> at >>> >>> >> org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1307) >>> at >>> >> org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:453) >>> at >>> >>> >> org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:137) >>> at >>> >> org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:560) >>> at >>> >>> >> org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:231) >>> at >>> >>> >> org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1072) >>> at >>> org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:382) >>> at >>> >>> >> org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:193) >>> at >>> >>> >> org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1006) >>> at >>> >>> >> org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:135) >>> at >>> >>> >> org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:255) >>> at >>> >>> >> org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerCollection.java:154) >>> at >>> >>> >> org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:116) >>> at org.eclipse.jetty.server.Server.handle(Server.java:365) >>> at >>> >>> >> org.eclipse.jetty.server.AbstractHttpConnection.handleRequest(AbstractHttpConnection.java:485) >>> at >>> >>> >> org.eclipse.jetty.server.BlockingHttpConnection.handleRequest(BlockingHttpConnection.java:53) >>> at >>> >>> >> org.eclipse.jetty.server.AbstractHttpConnection.content(AbstractHttpConnection.java:937) >>> at >>> >>> >> org.eclipse.jetty.server.AbstractHttpConnection$RequestHandler.content(AbstractHttpConnection.java:998) >>> at org.eclipse.jetty.http.HttpParser.parseNext(HttpParser.java:856) >>> at org.eclipse.jetty.http.HttpParser.parseAvailable(HttpParser.java:240) >>> at >>> >>> >> org.eclipse.jetty.server.BlockingHttpConnection.handle(BlockingHttpConnection.java:72) >>> at >>> >>> >> org.eclipse.jetty.server.bio.SocketConnector$ConnectorEndPoint.run(SocketConnector.java:264) >>> at >>> >>> >> org.eclipse.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:608) >>> at >>> >>> >> org.eclipse.jetty.util.thread.QueuedThreadPool$3.run(QueuedThreadPool.java:543) >>> at java.lang.Thread.run(Thread.java:722) >>> >>