gf2121 commented on code in PR #14400: URL: https://github.com/apache/lucene/pull/14400#discussion_r2012455948
########## lucene/core/src/java/org/apache/lucene/search/DenseConjunctionBulkScorer.java: ########## @@ -171,37 +171,36 @@ private int scoreWindow( } } - if (acceptDocs == null) { - int minDocIDRunEnd = max; - for (DisiWrapper w : iterators) { - if (w.docID() > min) { - minDocIDRunEnd = min; - break; - } else { - minDocIDRunEnd = Math.min(minDocIDRunEnd, w.docIDRunEnd()); - } - } - - if (minDocIDRunEnd - min >= WINDOW_SIZE / 2) { - // We have a large range of doc IDs that all match. - rangeDocIdStream.from = min; - rangeDocIdStream.to = minDocIDRunEnd; - collector.collect(rangeDocIdStream); - return minDocIDRunEnd; - } - } - - int bitsetWindowMax = (int) Math.min(max, (long) min + WINDOW_SIZE); - + // Partition clauses of the conjunction into: + // - clauses that don't fully match the first half of the window and get evaluated via + // #loadIntoBitSet or leaf-frog, + // - other clauses that are used to compute the greatest possible window size that they fully + // match. + // This logic helps align scoring windows with the natural #docIDRunEnd() boundaries of the + // data, which helps evaluate fewer clauses per window - without allowing windows to become too + // small thanks to the WINDOW_SIZE/2 threshold. + int minDocIDRunEnd = max; for (DisiWrapper w : iterators) { - if (w.docID() > min || w.docIDRunEnd() < bitsetWindowMax) { + int docIdRunEnd = w.docIDRunEnd(); + if (w.docID() > min || (docIdRunEnd - min) < WINDOW_SIZE / 2) { windowApproximations.add(w.approximation()); if (w.twoPhase() != null) { windowTwoPhases.add(w.twoPhase()); } + } else { + minDocIDRunEnd = Math.min(minDocIDRunEnd, docIdRunEnd); } } + if (acceptDocs == null && windowApproximations.isEmpty()) { Review Comment: Out of curiosity and not related to this PR: Would it be worth dealing `acceptDocs != null` here as well so that we won't need to call `intoBitset`? ########## lucene/core/src/java/org/apache/lucene/search/DenseConjunctionBulkScorer.java: ########## @@ -171,37 +171,36 @@ private int scoreWindow( } } - if (acceptDocs == null) { - int minDocIDRunEnd = max; - for (DisiWrapper w : iterators) { - if (w.docID() > min) { - minDocIDRunEnd = min; - break; - } else { - minDocIDRunEnd = Math.min(minDocIDRunEnd, w.docIDRunEnd()); - } - } - - if (minDocIDRunEnd - min >= WINDOW_SIZE / 2) { - // We have a large range of doc IDs that all match. - rangeDocIdStream.from = min; - rangeDocIdStream.to = minDocIDRunEnd; - collector.collect(rangeDocIdStream); - return minDocIDRunEnd; - } - } - - int bitsetWindowMax = (int) Math.min(max, (long) min + WINDOW_SIZE); - + // Partition clauses of the conjunction into: + // - clauses that don't fully match the first half of the window and get evaluated via + // #loadIntoBitSet or leaf-frog, + // - other clauses that are used to compute the greatest possible window size that they fully + // match. + // This logic helps align scoring windows with the natural #docIDRunEnd() boundaries of the + // data, which helps evaluate fewer clauses per window - without allowing windows to become too + // small thanks to the WINDOW_SIZE/2 threshold. + int minDocIDRunEnd = max; for (DisiWrapper w : iterators) { - if (w.docID() > min || w.docIDRunEnd() < bitsetWindowMax) { + int docIdRunEnd = w.docIDRunEnd(); + if (w.docID() > min || (docIdRunEnd - min) < WINDOW_SIZE / 2) { Review Comment: > I believe that it only makes a difference when max-min < WINDOW_SIZE, where more clauses would now get evaluated Was this line making the difference and could be addressed by something like following code? I'm OK either way :) ``` int minRunEnd = max; final int minRunEndThreshold = Math.min(min + WINDOW_SIZE / 2, max); for (DisiWrapper w : iterators) { int docIdRunEnd = w.docIDRunEnd(); if (w.docID() > min || docIdRunEnd < minRunEndThreshold) { ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org