What are the performance characteristics and implications of the SolrServer classes queryAndStreamResponse method over large result sets ( One hundred thousand, one million records, etc ) ?
http://lucene.apache.org/solr/4_2_0/solr-solrj/org/apache/solr/client/solrj/SolrServer.html#queryAndStreamResponse%28org.apache.solr.common.params.SolrParams,%20org.apache.solr.client.solrj.StreamingResponseCallback%29 <http://lucene.apache.org/solr/4_2_0/solr-solrj/org/apache/solr/client/solrj/SolrServer.html#queryAndStreamResponse%28org.apache.solr.common.params.SolrParams,%20org.apache.solr.client.solrj.StreamingResponseCallback%29> Would it be better to use create a custom streaming caching client, such as the following SolrDocumentStreamingEnumeration class? > package test; > > import java.util.Collections; > import java.util.Enumeration; > import java.util.LinkedList; > import java.util.List; > > import org.apache.solr.client.solrj.SolrQuery; > import org.apache.solr.client.solrj.SolrServerException; > import org.apache.solr.client.solrj.impl.HttpSolrServer; > import org.apache.solr.client.solrj.response.QueryResponse; > import org.apache.solr.common.SolrDocument; > import org.apache.solr.common.SolrDocumentList; > > public class SolrDocumentStreamingEnumeration implements Enumeration > <SolrDocument> > { > > protected HttpSolrServer server; > > protected SolrQuery solrQuery; > protected int solrQueryPosition; > > private List > <SolrDocument> > cache; > protected int cacheSize; > > public SolrDocumentStreamingEnumeration(HttpSolrServer server, > SolrQuery solrQuery, int cacheSize) { > > this.server = server; > this.solrQuery = solrQuery; > this.cacheSize = cacheSize; > > reset(); > } > > public synchronized void reset() { > cache = Collections.synchronizedList(new LinkedList > <SolrDocument> > ()); > solrQueryPosition = 0; > } > > @Override > public synchronized boolean hasMoreElements() { > > manageCache(); > > if (cache.size() < 1) { > // end of stream reached > return false; > } else { > return true; > } > > > } > > @Override > public synchronized SolrDocument nextElement() { > > manageCache(); > > if(cache.size() < 1) { > return null; > } > > return cache.remove(0); > } > > protected synchronized boolean manageCache() throws RuntimeException { > > if (cache.size() > 1) { > return true; > } > > try { > return updateCache(); > } catch (SolrServerException e) { > throw new RuntimeException(e); > } > } > > protected synchronized boolean updateCache() throws > SolrServerException { > > solrQuery.setStart(solrQueryPosition); > solrQuery.setRows(cacheSize); > > QueryResponse queryResponse = server.query(solrQuery); > > if (queryResponse.getStatus() != 0) { > return false; > } > > SolrDocumentList currentDocumentList = queryResponse.getResults(); > > boolean success = cache.addAll(currentDocumentList); > if (!success) { > return false; > } > > // only move the position on success > solrQueryPosition = solrQueryPosition + cacheSize; > return true; > > } > > > } I have also included the below is an example test class for demonstration purposes. > package test; > > import java.util.ArrayList; > import java.util.HashSet; > import java.util.List; > import java.util.Set; > > import junit.framework.Assert; > > import org.apache.solr.client.solrj.SolrQuery; > import org.apache.solr.client.solrj.impl.HttpSolrServer; > import org.apache.solr.common.SolrDocument; > import org.apache.solr.common.SolrInputDocument; > import org.junit.After; > import org.junit.Before; > import org.junit.Test; > > import test.HttpSolrServerTestSupport; > > public class SolrDocumentStreamingEnumerationTest { > > private static HttpSolrServer server = > HttpSolrServerTestSupport.getInstance().getServer(); > > private List > <SolrInputDocument> > getDefaultSolrInputDocumentList() { > > List > <SolrInputDocument> > solrInputDocumentList = new ArrayList > <SolrInputDocument> > (); > > { > SolrInputDocument solrInputDocument = new SolrInputDocument(); > solrInputDocument.addField("id", "1"); > solrInputDocument.addField("contents_s", "ONE"); > solrInputDocumentList.add(solrInputDocument); > } > > { > SolrInputDocument solrInputDocument = new SolrInputDocument(); > solrInputDocument.addField("id", "2"); > solrInputDocument.addField("contents_s", "TWO"); > solrInputDocumentList.add(solrInputDocument); > } > > { > SolrInputDocument solrInputDocument = new SolrInputDocument(); > solrInputDocument.addField("id", "3"); > solrInputDocument.addField("contents_s", "THREE"); > solrInputDocumentList.add(solrInputDocument); > } > > return solrInputDocumentList; > > } > > private SolrQuery getDefaultSolrQuery() { > SolrQuery solrQuery = new SolrQuery(); > solrQuery.setQuery("*:*"); > solrQuery.addFilterQuery("id:1 OR id:2 OR id:3"); > return solrQuery; > } > > private void removeTestSolrDocumentsIfTheyExist() throws Exception { > server.deleteById("1"); > server.deleteById("2"); > server.deleteById("3"); > server.commit(); > } > > @Before > public void setUp() throws Exception { > removeTestSolrDocumentsIfTheyExist(); > server.add(getDefaultSolrInputDocumentList()); > server.commit(); > } > > > @After > public void tearDown() throws Exception { > removeTestSolrDocumentsIfTheyExist(); > } > > > private void testEnumeration( > SolrDocumentStreamingEnumeration beanEnumeration, Integer > maxElementCountToRetrieve) { > > Set > <String> > beanIds = new HashSet > <String> > (); > > int loopCount = 0; > while (beanEnumeration.hasMoreElements()) { > loopCount++; > SolrDocument doc = beanEnumeration.nextElement(); > beanIds.add(String.valueOf(doc.get("id"))); > > if (maxElementCountToRetrieve != null && > maxElementCountToRetrieve.intValue() == loopCount ){ > break; > } > } > > Assert.assertTrue(beanIds.size() > 0); > Assert.assertEquals(loopCount, beanIds.size()); > } > > private void testEnumerationAtEnd(SolrDocumentStreamingEnumeration > beanEnumeration ) { > > // Check that end of enumeration has been reached > for (int i = 0; i < 100; i++) { > Assert.assertFalse(beanEnumeration.hasMoreElements()); > Assert.assertNull(beanEnumeration.nextElement()); > } > } > > > @Test > public void testEnumeration() { > > SolrQuery solrQuery = getDefaultSolrQuery(); > final int cacheSize = 1; > > SolrDocumentStreamingEnumeration beanEnumeration = > new SolrDocumentStreamingEnumeration( > server, > solrQuery, cacheSize); > > testEnumeration(beanEnumeration, null); > > testEnumerationAtEnd(beanEnumeration); > > } > > @Test > public void testReset() { > > SolrQuery solrQuery = getDefaultSolrQuery(); > > final int cacheSize = 10; > > SolrDocumentStreamingEnumeration beanEnumeration = > new SolrDocumentStreamingEnumeration( > server, solrQuery, cacheSize); > > > testEnumeration(beanEnumeration, null); > > beanEnumeration.reset(); > > testEnumeration(beanEnumeration, null); > > Assert.assertFalse(beanEnumeration.hasMoreElements()); > Assert.assertNull(beanEnumeration.nextElement()); > > beanEnumeration.reset(); > > Assert.assertTrue(beanEnumeration.hasMoreElements()); > Assert.assertNotNull(beanEnumeration.nextElement()); > > } > > @Test > public void testLargeCache() { > > SolrQuery solrQuery = getDefaultSolrQuery(); > > final int cacheSize = 1000000000; // 1 Billion > > SolrDocumentStreamingEnumeration beanEnumeration = > new SolrDocumentStreamingEnumeration( > server, solrQuery, cacheSize); > > > testEnumeration(beanEnumeration, null); > > testEnumerationAtEnd(beanEnumeration); > > } > > > } -- View this message in context: http://lucene.472066.n3.nabble.com/Enumeration-SolrServer-queryAndStreamResponse-vs-custom-streaming-caching-client-tp4050743.html Sent from the Solr - User mailing list archive at Nabble.com.