original-brownbear commented on code in PR #13860: URL: https://github.com/apache/lucene/pull/13860#discussion_r1986157505
########## lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java: ########## @@ -360,102 +350,95 @@ public static LeafSlice[] slices( boolean allowSegmentPartitions) { // Make a copy so we can sort: - List<LeafReaderContext> sortedLeaves = new ArrayList<>(leaves); + LeafReaderContext[] sortedLeaves = leaves.toArray(new LeafReaderContext[0]); // Sort by maxDoc, descending: - sortedLeaves.sort(Collections.reverseOrder(Comparator.comparingInt(l -> l.reader().maxDoc()))); + Arrays.sort( + sortedLeaves, (l1, l2) -> Integer.compare(l2.reader().maxDoc(), l1.reader().maxDoc())); if (allowSegmentPartitions) { - final List<List<LeafReaderContextPartition>> groupedLeafPartitions = new ArrayList<>(); - int currentSliceNumDocs = 0; - List<LeafReaderContextPartition> group = null; - for (LeafReaderContext ctx : sortedLeaves) { - if (ctx.reader().maxDoc() > maxDocsPerSlice) { - assert group == null; - // if the segment does not fit in a single slice, we split it into maximum 5 partitions of - // equal size - int numSlices = Math.min(5, Math.ceilDiv(ctx.reader().maxDoc(), maxDocsPerSlice)); - int numDocs = ctx.reader().maxDoc() / numSlices; - int maxDocId = numDocs; - int minDocId = 0; - for (int i = 0; i < numSlices - 1; i++) { - groupedLeafPartitions.add( - Collections.singletonList( - LeafReaderContextPartition.createFromAndTo(ctx, minDocId, maxDocId))); - minDocId = maxDocId; - maxDocId += numDocs; - } - // the last slice gets all the remaining docs - groupedLeafPartitions.add( - Collections.singletonList( - LeafReaderContextPartition.createFromAndTo( - ctx, minDocId, ctx.reader().maxDoc()))); - } else { - if (group == null) { - group = new ArrayList<>(); - groupedLeafPartitions.add(group); - } - group.add(LeafReaderContextPartition.createForEntireSegment(ctx)); - - currentSliceNumDocs += ctx.reader().maxDoc(); - // We only split a segment when it does not fit entirely in a slice. We don't partition - // the - // segment that makes the current slice (which holds multiple segments) go over - // maxDocsPerSlice. This means that a slice either contains multiple entire segments, or a - // single partition of a segment. - if (group.size() >= maxSegmentsPerSlice || currentSliceNumDocs > maxDocsPerSlice) { - group = null; - currentSliceNumDocs = 0; - } - } - } + return slicesWithPartitionedSegments(maxDocsPerSlice, maxSegmentsPerSlice, sortedLeaves); + } - LeafSlice[] slices = new LeafSlice[groupedLeafPartitions.size()]; - int upto = 0; - for (List<LeafReaderContextPartition> currentGroup : groupedLeafPartitions) { - slices[upto] = new LeafSlice(currentGroup); - ++upto; + final List<LeafSlice> groupedLeaves = new ArrayList<>(); + long docSum = 0; + List<LeafReaderContextPartition> group = null; + for (LeafReaderContext ctx : sortedLeaves) { + final int maxDoc = ctx.reader().maxDoc(); + var partition = LeafReaderContextPartition.createForEntireSegment(ctx); + if (maxDoc > maxDocsPerSlice) { + assert group == null; + groupedLeaves.add(new LeafSlice(Collections.singletonList(partition))); + } else { + if (group == null) { + group = new ArrayList<>(); + } + group.add(partition); + docSum += maxDoc; + if (docSum > maxDocsPerSlice || group.size() >= maxSegmentsPerSlice) { + groupedLeaves.add(new LeafSlice(group)); + group = null; + docSum = 0; + } } - return slices; } + if (group != null) { + groupedLeaves.add(new LeafSlice(group)); + } + return groupedLeaves.toArray(LeafSlice.EMPTY_ARRAY); + } - final List<List<LeafReaderContext>> groupedLeaves = new ArrayList<>(); - long docSum = 0; - List<LeafReaderContext> group = null; + private static LeafSlice[] slicesWithPartitionedSegments( + int maxDocsPerSlice, int maxSegmentsPerSlice, LeafReaderContext[] sortedLeaves) { + final List<List<LeafReaderContextPartition>> groupedLeafPartitions = new ArrayList<>(); + int currentSliceNumDocs = 0; + List<LeafReaderContextPartition> group = null; for (LeafReaderContext ctx : sortedLeaves) { if (ctx.reader().maxDoc() > maxDocsPerSlice) { assert group == null; - groupedLeaves.add(Collections.singletonList(ctx)); + // if the segment does not fit in a single slice, we split it into maximum 5 partitions of + // equal size + int numSlices = Math.min(5, Math.ceilDiv(ctx.reader().maxDoc(), maxDocsPerSlice)); + int numDocs = ctx.reader().maxDoc() / numSlices; + int maxDocId = numDocs; + int minDocId = 0; + for (int i = 0; i < numSlices - 1; i++) { + groupedLeafPartitions.add( + Collections.singletonList( + LeafReaderContextPartition.createFromAndTo(ctx, minDocId, maxDocId))); + minDocId = maxDocId; + maxDocId += numDocs; + } + // the last slice gets all the remaining docs + groupedLeafPartitions.add( + Collections.singletonList( + LeafReaderContextPartition.createFromAndTo(ctx, minDocId, ctx.reader().maxDoc()))); } else { if (group == null) { group = new ArrayList<>(); - group.add(ctx); - - groupedLeaves.add(group); - } else { - group.add(ctx); + groupedLeafPartitions.add(group); } - - docSum += ctx.reader().maxDoc(); - if (group.size() >= maxSegmentsPerSlice || docSum > maxDocsPerSlice) { + group.add(LeafReaderContextPartition.createForEntireSegment(ctx)); + + currentSliceNumDocs += ctx.reader().maxDoc(); + // We only split a segment when it does not fit entirely in a slice. We don't partition + // the + // segment that makes the current slice (which holds multiple segments) go over + // maxDocsPerSlice. This means that a slice either contains multiple entire segments, or a + // single partition of a segment. + if (group.size() >= maxSegmentsPerSlice || currentSliceNumDocs > maxDocsPerSlice) { group = null; - docSum = 0; + currentSliceNumDocs = 0; } } } - LeafSlice[] slices = new LeafSlice[groupedLeaves.size()]; + LeafSlice[] slices = new LeafSlice[groupedLeafPartitions.size()]; int upto = 0; - for (List<LeafReaderContext> currentLeaf : groupedLeaves) { - slices[upto] = - new LeafSlice( - new ArrayList<>( - currentLeaf.stream() - .map(LeafReaderContextPartition::createForEntireSegment) - .toList())); + for (List<LeafReaderContextPartition> currentGroup : groupedLeafPartitions) { + slices[upto] = new LeafSlice(currentGroup); Review Comment: Done in https://github.com/apache/lucene/pull/14336 sorry for delay on this one :) -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org