jpountz commented on code in PR #13697: URL: https://github.com/apache/lucene/pull/13697#discussion_r1739357851
########## lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java: ########## @@ -275,6 +286,52 @@ public float matchCost() { } } + private static class Score extends Scorable { + private final ScoreMode scoreMode; + private float score; + private int freq; + + public Score(ScoreMode scoreMode) { + this.scoreMode = scoreMode; + reset(); + } + + public void reset() { + score = 0; + freq = 0; + } + + public void addChildScore(float childScore) { + switch (scoreMode) { + case Total: + case Avg: + score += childScore; + break; + case Min: + score = freq == 0 ? childScore : Math.min(score, childScore); + break; + case Max: + score = Math.max(score, childScore); + break; + case None: + break; + default: + throw new AssertionError(); + } + + freq++; + } + + @Override + public float score() { + float score = this.score; + if (scoreMode == ScoreMode.Avg && freq > 0) { Review Comment: technically freq should always be greater than 0 right? should it be an assert then? ########## lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java: ########## @@ -440,6 +498,98 @@ private String formatScoreExplanation(int matches, int start, int end, ScoreMode } } + private abstract static class BatchAwareLeafCollector extends FilterLeafCollector { + public BatchAwareLeafCollector(LeafCollector in) { + super(in); + } + + public void endBatch() throws IOException {} + } + + private static class BlockJoinBulkScorer extends BulkScorer { + private final BulkScorer childBulkScorer; + private final ScoreMode scoreMode; + private final BitSet parents; + private final int parentsLength; + private int currentMin; + + public BlockJoinBulkScorer(BulkScorer childBulkScorer, ScoreMode scoreMode, BitSet parents) { + this.childBulkScorer = childBulkScorer; + this.scoreMode = scoreMode; + this.parents = parents; + this.parentsLength = parents.length(); + this.currentMin = -1; + } + + @Override + public int score(LeafCollector collector, Bits acceptDocs, int min, int max) + throws IOException { + // Subtract one because max is exclusive w.r.t. score but inclusive w.r.t prevSetBit + int lastParent = parents.prevSetBit(Math.min(parentsLength - 1, max - 1)); + if (lastParent < min) { + // No parent docs in this range. Save the passed-in min so we can score over this range once + // we find a parent doc. + currentMin = min; + return max; + } else if (currentMin == -1) { + currentMin = min; + } Review Comment: I would rather not track `currentMin` and find the previous parent, e.g. something like that: ```java int prevParent = min == 0 ? -1 : parents.prevSetBit(min - 1); ``` And then score childBulkScorer using `min = prevParent + 1` and `max = lastParent` (`lastParent + 1` is technically correct too, it shouldn't matter since `lastParent + 1` must not match the child query anyway). ########## lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java: ########## @@ -440,6 +498,98 @@ private String formatScoreExplanation(int matches, int start, int end, ScoreMode } } + private abstract static class BatchAwareLeafCollector extends FilterLeafCollector { + public BatchAwareLeafCollector(LeafCollector in) { + super(in); + } + + public void endBatch() throws IOException {} + } + + private static class BlockJoinBulkScorer extends BulkScorer { + private final BulkScorer childBulkScorer; + private final ScoreMode scoreMode; + private final BitSet parents; + private final int parentsLength; + private int currentMin; + + public BlockJoinBulkScorer(BulkScorer childBulkScorer, ScoreMode scoreMode, BitSet parents) { + this.childBulkScorer = childBulkScorer; + this.scoreMode = scoreMode; + this.parents = parents; + this.parentsLength = parents.length(); + this.currentMin = -1; + } + + @Override + public int score(LeafCollector collector, Bits acceptDocs, int min, int max) + throws IOException { + // Subtract one because max is exclusive w.r.t. score but inclusive w.r.t prevSetBit + int lastParent = parents.prevSetBit(Math.min(parentsLength - 1, max - 1)); Review Comment: nit: let's do the - 1 only once? ```suggestion int lastParent = parents.prevSetBit(Math.min(parentsLength , max) - 1); ``` ########## lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java: ########## @@ -275,6 +286,52 @@ public float matchCost() { } } + private static class Score extends Scorable { + private final ScoreMode scoreMode; + private float score; + private int freq; + + public Score(ScoreMode scoreMode) { + this.scoreMode = scoreMode; + reset(); + } + + public void reset() { + score = 0; + freq = 0; + } + + public void addChildScore(float childScore) { + switch (scoreMode) { + case Total: + case Avg: + score += childScore; + break; + case Min: + score = freq == 0 ? childScore : Math.min(score, childScore); Review Comment: I wonder if we could simplify this by making `reset()` take the first child score, and then `addChildScore` is only called on follow-up matching child docs? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org