jpountz commented on code in PR #13697: URL: https://github.com/apache/lucene/pull/13697#discussion_r1742643399
########## lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java: ########## @@ -440,6 +477,99 @@ private String formatScoreExplanation(int matches, int start, int end, ScoreMode } } + private abstract static class BatchAwareLeafCollector extends FilterLeafCollector { + public BatchAwareLeafCollector(LeafCollector in) { + super(in); + } + + public void endBatch() throws IOException {} + } + + private static class BlockJoinBulkScorer extends BulkScorer { + private final BulkScorer childBulkScorer; + private final ScoreMode scoreMode; + private final BitSet parents; + private final int parentsLength; + + public BlockJoinBulkScorer(BulkScorer childBulkScorer, ScoreMode scoreMode, BitSet parents) { + this.childBulkScorer = childBulkScorer; + this.scoreMode = scoreMode; + this.parents = parents; + this.parentsLength = parents.length(); + } + + @Override + public int score(LeafCollector collector, Bits acceptDocs, int min, int max) + throws IOException { + // Subtract one because max is exclusive w.r.t. score but inclusive w.r.t prevSetBit + int lastParent = parents.prevSetBit(Math.min(parentsLength, max) - 1); + int prevParent = min == 0 ? -1 : parents.prevSetBit(min - 1); + if (lastParent == prevParent) { + // No parent docs in this range + return max; + } + + BatchAwareLeafCollector wrappedCollector = wrapCollector(collector); + childBulkScorer.score(wrappedCollector, acceptDocs, prevParent + 1, lastParent + 1); + wrappedCollector.endBatch(); + + // If we've scored the last parent in the bit set, return NO_MORE_DOCS to indicate we are done + // scoring + return lastParent + 1 >= parentsLength ? NO_MORE_DOCS : max; + } + + @Override + public long cost() { + return childBulkScorer.cost(); + } + + // TODO: Need to resolve parent doc IDs in multi-reader space? + private BatchAwareLeafCollector wrapCollector(LeafCollector collector) { + return new BatchAwareLeafCollector(collector) { + private final Score currentParentScore = new Score(scoreMode); Review Comment: You need to extend it to override `setMinCompetitiveScore` and delegate to `scorer` when appropriate, if you want dynamic pruning to work. ########## lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java: ########## @@ -440,6 +477,99 @@ private String formatScoreExplanation(int matches, int start, int end, ScoreMode } } + private abstract static class BatchAwareLeafCollector extends FilterLeafCollector { + public BatchAwareLeafCollector(LeafCollector in) { + super(in); + } + + public void endBatch() throws IOException {} + } + + private static class BlockJoinBulkScorer extends BulkScorer { + private final BulkScorer childBulkScorer; + private final ScoreMode scoreMode; + private final BitSet parents; + private final int parentsLength; + + public BlockJoinBulkScorer(BulkScorer childBulkScorer, ScoreMode scoreMode, BitSet parents) { + this.childBulkScorer = childBulkScorer; + this.scoreMode = scoreMode; + this.parents = parents; + this.parentsLength = parents.length(); + } + + @Override + public int score(LeafCollector collector, Bits acceptDocs, int min, int max) + throws IOException { + // Subtract one because max is exclusive w.r.t. score but inclusive w.r.t prevSetBit + int lastParent = parents.prevSetBit(Math.min(parentsLength, max) - 1); + int prevParent = min == 0 ? -1 : parents.prevSetBit(min - 1); + if (lastParent == prevParent) { + // No parent docs in this range + return max; + } + + BatchAwareLeafCollector wrappedCollector = wrapCollector(collector); + childBulkScorer.score(wrappedCollector, acceptDocs, prevParent + 1, lastParent + 1); + wrappedCollector.endBatch(); + + // If we've scored the last parent in the bit set, return NO_MORE_DOCS to indicate we are done + // scoring + return lastParent + 1 >= parentsLength ? NO_MORE_DOCS : max; + } + + @Override + public long cost() { + return childBulkScorer.cost(); + } + + // TODO: Need to resolve parent doc IDs in multi-reader space? + private BatchAwareLeafCollector wrapCollector(LeafCollector collector) { + return new BatchAwareLeafCollector(collector) { + private final Score currentParentScore = new Score(scoreMode); Review Comment: By the way, can you add a test that dynamic pruning works with your new bulk scorer? See e.g. `TestMaxScoreBulkScorer#testBasicsWithTwoDisjunctionClauses` for an example of a similar test. ########## lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java: ########## @@ -275,6 +286,53 @@ public float matchCost() { } } + private static class Score extends Scorable { + private final ScoreMode scoreMode; + private double score; + private int freq; + + public Score(ScoreMode scoreMode) { + this.scoreMode = scoreMode; + this.score = 0; + this.freq = 0; + } + + public void reset(float firstChildScore) { + score = firstChildScore; + freq = 1; + } + + public void addChildScore(float childScore) { Review Comment: I wonder if it should take a `Scorable` instead of a `float` so that we could encapsulate the logic of not computing the score if `scoreMode == NONE` in this class instead of forcing callers to care about it. ########## lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java: ########## @@ -440,6 +477,99 @@ private String formatScoreExplanation(int matches, int start, int end, ScoreMode } } + private abstract static class BatchAwareLeafCollector extends FilterLeafCollector { + public BatchAwareLeafCollector(LeafCollector in) { + super(in); + } + + public void endBatch() throws IOException {} + } + + private static class BlockJoinBulkScorer extends BulkScorer { + private final BulkScorer childBulkScorer; + private final ScoreMode scoreMode; + private final BitSet parents; + private final int parentsLength; + + public BlockJoinBulkScorer(BulkScorer childBulkScorer, ScoreMode scoreMode, BitSet parents) { + this.childBulkScorer = childBulkScorer; + this.scoreMode = scoreMode; + this.parents = parents; + this.parentsLength = parents.length(); + } + + @Override + public int score(LeafCollector collector, Bits acceptDocs, int min, int max) + throws IOException { + // Subtract one because max is exclusive w.r.t. score but inclusive w.r.t prevSetBit + int lastParent = parents.prevSetBit(Math.min(parentsLength, max) - 1); + int prevParent = min == 0 ? -1 : parents.prevSetBit(min - 1); + if (lastParent == prevParent) { + // No parent docs in this range + return max; Review Comment: Should it return `NO_MORE_DOCS` if `max >= parentsLength` like you're doing in the other return statement? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org