jimczi commented on a change in pull request #1725: URL: https://github.com/apache/lucene-solr/pull/1725#discussion_r483531827
########## File path: lucene/core/src/test/org/apache/lucene/search/TestFieldSortOptimizationSkipping.java ########## @@ -290,5 +299,114 @@ public void testFloatSortOptimization() throws IOException { dir.close(); } + public void testDocSortOptimizationWithAfter() throws IOException { + final Directory dir = newDirectory(); + final IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig()); + final int numDocs = atLeast(1500); + for (int i = 0; i < numDocs; ++i) { + final Document doc = new Document(); + writer.addDocument(doc); + if ((i > 0) && (i % 500 == 0)) { + writer.commit(); + } + } + final IndexReader reader = DirectoryReader.open(writer); + IndexSearcher searcher = new IndexSearcher(reader); + final int numHits = 3; + final int totalHitsThreshold = 3; + final int searchAfter = 1400; + + // sort by _doc with search after should trigger optimization + { + final Sort sort = new Sort(FIELD_DOC); + FieldDoc after = new FieldDoc(searchAfter, Float.NaN, new Integer[]{searchAfter}); + final TopFieldCollector collector = TopFieldCollector.create(sort, numHits, after, totalHitsThreshold); + searcher.search(new MatchAllDocsQuery(), collector); + TopDocs topDocs = collector.topDocs(); + assertEquals(topDocs.scoreDocs.length, numHits); + for (int i = 0; i < numHits; i++) { + int expectedDocID = searchAfter + 1 + i; + assertEquals(expectedDocID, topDocs.scoreDocs[i].doc); + } + assertTrue(collector.isEarlyTerminated()); + // check that very few hits were collected, and most hits before searchAfter were skipped + assertTrue(topDocs.totalHits.value < (numDocs - searchAfter)); + } + + // sort by _doc + _score with search after should trigger optimization + { + final Sort sort = new Sort(FIELD_DOC, FIELD_SCORE); + FieldDoc after = new FieldDoc(searchAfter, Float.NaN, new Object[]{searchAfter, 1.0f}); + final TopFieldCollector collector = TopFieldCollector.create(sort, numHits, after, totalHitsThreshold); + searcher.search(new MatchAllDocsQuery(), collector); + TopDocs topDocs = collector.topDocs(); + assertEquals(topDocs.scoreDocs.length, numHits); + for (int i = 0; i < numHits; i++) { + int expectedDocID = searchAfter + 1 + i; + assertEquals(expectedDocID, topDocs.scoreDocs[i].doc); + } + assertTrue(collector.isEarlyTerminated()); + // assert that very few hits were collected, and most hits before searchAfter were skipped + assertTrue(topDocs.totalHits.value < (numDocs - searchAfter)); + } + + // sort by _doc desc should not trigger optimization + { + final Sort sort = new Sort(new SortField(null, SortField.Type.DOC, true)); + FieldDoc after = new FieldDoc(searchAfter, Float.NaN, new Integer[]{searchAfter}); + final TopFieldCollector collector = TopFieldCollector.create(sort, numHits, after, totalHitsThreshold); + searcher.search(new MatchAllDocsQuery(), collector); + TopDocs topDocs = collector.topDocs(); + for (int i = 0; i < numHits; i++) { + int expectedDocID = searchAfter - 1 - i; + assertEquals(expectedDocID, topDocs.scoreDocs[i].doc); + } + assertEquals(topDocs.scoreDocs.length, numHits); + // assert that many hits were collected including all hits before searchAfter + assertTrue(topDocs.totalHits.value > searchAfter); + + } + + writer.close(); + reader.close(); + dir.close(); + } + + + public void testDocSortOptimization() throws IOException { + final Directory dir = newDirectory(); + final IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig()); + final int numDocs = atLeast(1500); Review comment: why do you need that many documents ? `100` should be enough, no ? ########## File path: lucene/core/src/test/org/apache/lucene/search/TestFieldSortOptimizationSkipping.java ########## @@ -290,5 +299,114 @@ public void testFloatSortOptimization() throws IOException { dir.close(); } + public void testDocSortOptimizationWithAfter() throws IOException { + final Directory dir = newDirectory(); + final IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig()); + final int numDocs = atLeast(1500); + for (int i = 0; i < numDocs; ++i) { + final Document doc = new Document(); + writer.addDocument(doc); + if ((i > 0) && (i % 500 == 0)) { + writer.commit(); + } + } + final IndexReader reader = DirectoryReader.open(writer); + IndexSearcher searcher = new IndexSearcher(reader); + final int numHits = 3; + final int totalHitsThreshold = 3; + final int searchAfter = 1400; + + // sort by _doc with search after should trigger optimization + { + final Sort sort = new Sort(FIELD_DOC); + FieldDoc after = new FieldDoc(searchAfter, Float.NaN, new Integer[]{searchAfter}); + final TopFieldCollector collector = TopFieldCollector.create(sort, numHits, after, totalHitsThreshold); + searcher.search(new MatchAllDocsQuery(), collector); + TopDocs topDocs = collector.topDocs(); + assertEquals(topDocs.scoreDocs.length, numHits); + for (int i = 0; i < numHits; i++) { + int expectedDocID = searchAfter + 1 + i; + assertEquals(expectedDocID, topDocs.scoreDocs[i].doc); + } + assertTrue(collector.isEarlyTerminated()); + // check that very few hits were collected, and most hits before searchAfter were skipped + assertTrue(topDocs.totalHits.value < (numDocs - searchAfter)); + } + + // sort by _doc + _score with search after should trigger optimization + { + final Sort sort = new Sort(FIELD_DOC, FIELD_SCORE); + FieldDoc after = new FieldDoc(searchAfter, Float.NaN, new Object[]{searchAfter, 1.0f}); + final TopFieldCollector collector = TopFieldCollector.create(sort, numHits, after, totalHitsThreshold); + searcher.search(new MatchAllDocsQuery(), collector); + TopDocs topDocs = collector.topDocs(); + assertEquals(topDocs.scoreDocs.length, numHits); + for (int i = 0; i < numHits; i++) { + int expectedDocID = searchAfter + 1 + i; + assertEquals(expectedDocID, topDocs.scoreDocs[i].doc); + } + assertTrue(collector.isEarlyTerminated()); + // assert that very few hits were collected, and most hits before searchAfter were skipped + assertTrue(topDocs.totalHits.value < (numDocs - searchAfter)); + } + + // sort by _doc desc should not trigger optimization + { + final Sort sort = new Sort(new SortField(null, SortField.Type.DOC, true)); + FieldDoc after = new FieldDoc(searchAfter, Float.NaN, new Integer[]{searchAfter}); + final TopFieldCollector collector = TopFieldCollector.create(sort, numHits, after, totalHitsThreshold); + searcher.search(new MatchAllDocsQuery(), collector); + TopDocs topDocs = collector.topDocs(); + for (int i = 0; i < numHits; i++) { + int expectedDocID = searchAfter - 1 - i; + assertEquals(expectedDocID, topDocs.scoreDocs[i].doc); + } + assertEquals(topDocs.scoreDocs.length, numHits); + // assert that many hits were collected including all hits before searchAfter + assertTrue(topDocs.totalHits.value > searchAfter); + + } + + writer.close(); + reader.close(); + dir.close(); + } + + + public void testDocSortOptimization() throws IOException { + final Directory dir = newDirectory(); + final IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig()); + final int numDocs = atLeast(1500); + for (int i = 0; i < numDocs; ++i) { + final Document doc = new Document(); + writer.addDocument(doc); + if ((i > 0) && (i % 500 == 0)) { + writer.commit(); + } + } + final IndexReader reader = DirectoryReader.open(writer); + IndexSearcher searcher = new IndexSearcher(reader); + final int numHits = 3; + final int totalHitsThreshold = 3; + + // sort by _doc should skip all non-competitive documents + { + final Sort sort = new Sort(FIELD_DOC); + final TopFieldCollector collector = TopFieldCollector.create(sort, numHits, null, totalHitsThreshold); + searcher.search(new MatchAllDocsQuery(), collector); + TopDocs topDocs = collector.topDocs(); + assertEquals(topDocs.scoreDocs.length, numHits); + for (int i = 0; i < numHits; i++) { + assertEquals(i, topDocs.scoreDocs[i].doc); + } + assertTrue(collector.isEarlyTerminated()); + // check that very few hits were collected + assertTrue(topDocs.totalHits.value < 5); + } Review comment: Can you add a test with a boolean query or a simple filter ? ########## File path: lucene/core/src/test/org/apache/lucene/search/TestFieldSortOptimizationSkipping.java ########## @@ -290,5 +299,114 @@ public void testFloatSortOptimization() throws IOException { dir.close(); } + public void testDocSortOptimizationWithAfter() throws IOException { + final Directory dir = newDirectory(); + final IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig()); + final int numDocs = atLeast(1500); Review comment: `1500` seems big for a unit test ;) ########## File path: lucene/core/src/test/org/apache/lucene/search/TestFieldSortOptimizationSkipping.java ########## @@ -290,5 +299,114 @@ public void testFloatSortOptimization() throws IOException { dir.close(); } + public void testDocSortOptimizationWithAfter() throws IOException { + final Directory dir = newDirectory(); + final IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig()); + final int numDocs = atLeast(1500); + for (int i = 0; i < numDocs; ++i) { + final Document doc = new Document(); + writer.addDocument(doc); + if ((i > 0) && (i % 500 == 0)) { + writer.commit(); + } + } + final IndexReader reader = DirectoryReader.open(writer); + IndexSearcher searcher = new IndexSearcher(reader); + final int numHits = 3; + final int totalHitsThreshold = 3; + final int searchAfter = 1400; + + // sort by _doc with search after should trigger optimization Review comment: you can try different values of search after in a loop to increase the coverage? ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org