javanna commented on code in PR #13542: URL: https://github.com/apache/lucene/pull/13542#discussion_r1731271557
########## lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java: ########## @@ -890,11 +945,70 @@ public static class LeafSlice { * * @lucene.experimental */ - public final LeafReaderContext[] leaves; + public final LeafReaderContextPartition[] leaves; - public LeafSlice(List<LeafReaderContext> leavesList) { - Collections.sort(leavesList, Comparator.comparingInt(l -> l.docBase)); - this.leaves = leavesList.toArray(new LeafReaderContext[0]); + public LeafSlice(List<LeafReaderContextPartition> leafReaderContextPartitions) { + leafReaderContextPartitions.sort(Comparator.comparingInt(l -> l.ctx.docBase)); + // TODO should we sort by minDocId too? + this.leaves = leafReaderContextPartitions.toArray(new LeafReaderContextPartition[0]); + } + + /** + * Returns the total number of docs that a slice targets, by summing the number of docs that + * each of its leaf context partitions targets. + */ + public int getNumDocs() { + return Arrays.stream(leaves) + .map(LeafReaderContextPartition::getNumDocs) + .reduce(Integer::sum) + .get(); + } + } + + /** + * Holds information about a specific leaf context and the corresponding range of doc ids to + * search within. + * + * @lucene.experimental + */ + public static final class LeafReaderContextPartition { + private final int minDocId; + private final int maxDocId; + private final int numDocs; + public final LeafReaderContext ctx; + + private LeafReaderContextPartition( + LeafReaderContext leafReaderContext, int minDocId, int maxDocId, int numDocs) { + this.ctx = leafReaderContext; + this.minDocId = minDocId; + this.maxDocId = maxDocId; + this.numDocs = numDocs; Review Comment: good with the proposed validation. You both have a good feeling about numDocs hiding something here :) it is very subtle but I found that it is better to use `DocIdSetIterator.NO_MORE_DOCS`, hence `Integer.MAX_VALUE` when targeting the entire segment, or the last partition, as opposed to using maxDoc. I ended up hitting weird issues in that case. I may spend more time into trying to address this. It did not feel so important at the time given that numDocs is only used in tests to introspect how many docs each partition holds. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org