Re: [PR] Remove scoreAll() optimization from DefaultBulkScorer. [lucene]

via GitHub Mon, 06 Jan 2025 10:05:27 -0800


gsmiller commented on code in PR #14039:
URL: https://github.com/apache/lucene/pull/14039#discussion_r1904467456



##########
lucene/core/src/java/org/apache/lucene/search/Weight.java:
##########
@@ -289,75 +262,108 @@ static int scoreRange(
         }
       }
 
-      int doc = iterator.docID();
-      if (doc < min) {
-        if (doc == min - 1) {
-          doc = iterator.nextDoc();
+      if (iterator.docID() < min) {
+        if (iterator.docID() == min - 1) {
+          iterator.nextDoc();
         } else {
-          doc = iterator.advance(min);
+          iterator.advance(min);
         }
       }
 
+      // These various specializations help save some null checks in a hot 
loop, but as importantly
+      // if not more importantly, they help reduce the polymorphism of calls 
sites to nextDoc() and
+      // collect() because only a subset of collectors produce a competitive 
iterator, and the set
+      // of implementing classes for two-phase approximations is smaller than 
the set of doc id set
+      // iterator implementations.
       if (twoPhase == null && competitiveIterator == null) {
         // Optimize simple iterators with collectors that can't skip
-        while (doc < max) {
-          if (acceptDocs == null || acceptDocs.get(doc)) {
-            collector.collect(doc);
-          }
-          doc = iterator.nextDoc();
-        }
+        scoreIterator(collector, acceptDocs, iterator, max);
+      } else if (competitiveIterator == null) {
+        scoreTwoPhaseIterator(collector, acceptDocs, iterator, twoPhase, max);
+      } else if (twoPhase == null) {
+        scoreCompetitiveIterator(collector, acceptDocs, iterator, 
competitiveIterator, max);
       } else {
-        while (doc < max) {
-          if (competitiveIterator != null) {
-            assert competitiveIterator.docID() <= doc;
-            if (competitiveIterator.docID() < doc) {
-              competitiveIterator.advance(doc);
-            }
-            if (competitiveIterator.docID() != doc) {
-              doc = iterator.advance(competitiveIterator.docID());
-              continue;
-            }
-          }
+        scoreTwoPhaseOrCompetitiveIterator(
+            collector, acceptDocs, iterator, twoPhase, competitiveIterator, 
max);
+      }
 
-          if ((acceptDocs == null || acceptDocs.get(doc))
-              && (twoPhase == null || twoPhase.matches())) {
-            collector.collect(doc);
-          }
-          doc = iterator.nextDoc();
+      return iterator.docID();
+    }
+
+    private static void scoreIterator(
+        LeafCollector collector, Bits acceptDocs, DocIdSetIterator iterator, 
int max)
+        throws IOException {
+      for (int doc = iterator.docID(); doc < max; doc = iterator.nextDoc()) {
+        if (acceptDocs == null || acceptDocs.get(doc)) {
+          collector.collect(doc);
         }
       }
-
-      return doc;
     }
 
-    /**
-     * Specialized method to bulk-score all hits; we separate this from {@link 
#scoreRange} to help
-     * out hotspot. See <a 
href="https://issues.apache.org/jira/browse/LUCENE-5487";>LUCENE-5487</a>
-     */
-    static void scoreAll(
+    private static void scoreTwoPhaseIterator(
         LeafCollector collector,
+        Bits acceptDocs,
         DocIdSetIterator iterator,
         TwoPhaseIterator twoPhase,
-        Bits acceptDocs)
+        int max)
         throws IOException {
-      if (twoPhase == null) {
-        for (int doc = iterator.nextDoc();
-            doc != DocIdSetIterator.NO_MORE_DOCS;
-            doc = iterator.nextDoc()) {
-          if (acceptDocs == null || acceptDocs.get(doc)) {
-            collector.collect(doc);
+      for (int doc = iterator.docID(); doc < max; ) {
+        if ((acceptDocs == null || acceptDocs.get(doc)) && twoPhase.matches()) 
{
+          collector.collect(doc);
+        }
+
+        doc = iterator.nextDoc();

Review Comment:
   nit: Any reason to not put this in the for-loop control statement itself? 
Seems slightly more idiomatic?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Re: [PR] Remove scoreAll() optimization from DefaultBulkScorer. [lucene]

Reply via email to