original-brownbear commented on code in PR #13860: URL: https://github.com/apache/lucene/pull/13860#discussion_r1986157505
########## lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java: ########## @@ -360,102 +350,95 @@ public static LeafSlice[] slices( boolean allowSegmentPartitions) { // Make a copy so we can sort: - List<LeafReaderContext> sortedLeaves = new ArrayList<>(leaves); + LeafReaderContext[] sortedLeaves = leaves.toArray(new LeafReaderContext[0]); // Sort by maxDoc, descending: - sortedLeaves.sort(Collections.reverseOrder(Comparator.comparingInt(l -> l.reader().maxDoc()))); + Arrays.sort( + sortedLeaves, (l1, l2) -> Integer.compare(l2.reader().maxDoc(), l1.reader().maxDoc())); if (allowSegmentPartitions) { - final List<List<LeafReaderContextPartition>> groupedLeafPartitions = new ArrayList<>(); - int currentSliceNumDocs = 0; - List<LeafReaderContextPartition> group = null; - for (LeafReaderContext ctx : sortedLeaves) { - if (ctx.reader().maxDoc() > maxDocsPerSlice) { - assert group == null; - // if the segment does not fit in a single slice, we split it into maximum 5 partitions of - // equal size - int numSlices = Math.min(5, Math.ceilDiv(ctx.reader().maxDoc(), maxDocsPerSlice)); - int numDocs = ctx.reader().maxDoc() / numSlices; - int maxDocId = numDocs; - int minDocId = 0; - for (int i = 0; i < numSlices - 1; i++) { - groupedLeafPartitions.add( - Collections.singletonList( - LeafReaderContextPartition.createFromAndTo(ctx, minDocId, maxDocId))); - minDocId = maxDocId; - maxDocId += numDocs; - } - // the last slice gets all the remaining docs - groupedLeafPartitions.add( - Collections.singletonList( - LeafReaderContextPartition.createFromAndTo( - ctx, minDocId, ctx.reader().maxDoc()))); - } else { - if (group == null) { - group = new ArrayList<>(); - groupedLeafPartitions.add(group); - } - group.add(LeafReaderContextPartition.createForEntireSegment(ctx)); - - currentSliceNumDocs += ctx.reader().maxDoc(); - // We only split a segment when it does not fit entirely in a slice. We don't partition - // the - // segment that makes the current slice (which holds multiple segments) go over - // maxDocsPerSlice. This means that a slice either contains multiple entire segments, or a - // single partition of a segment. - if (group.size() >= maxSegmentsPerSlice || currentSliceNumDocs > maxDocsPerSlice) { - group = null; - currentSliceNumDocs = 0; - } - } - } + return slicesWithPartitionedSegments(maxDocsPerSlice, maxSegmentsPerSlice, sortedLeaves); + } - LeafSlice[] slices = new LeafSlice[groupedLeafPartitions.size()]; - int upto = 0; - for (List<LeafReaderContextPartition> currentGroup : groupedLeafPartitions) { - slices[upto] = new LeafSlice(currentGroup); - ++upto; + final List<LeafSlice> groupedLeaves = new ArrayList<>(); + long docSum = 0; + List<LeafReaderContextPartition> group = null; + for (LeafReaderContext ctx : sortedLeaves) { + final int maxDoc = ctx.reader().maxDoc(); + var partition = LeafReaderContextPartition.createForEntireSegment(ctx); + if (maxDoc > maxDocsPerSlice) { + assert group == null; + groupedLeaves.add(new LeafSlice(Collections.singletonList(partition))); + } else { + if (group == null) { + group = new ArrayList<>(); + } + group.add(partition); + docSum += maxDoc; + if (docSum > maxDocsPerSlice || group.size() >= maxSegmentsPerSlice) { + groupedLeaves.add(new LeafSlice(group)); + group = null; + docSum = 0; + } } - return slices; } + if (group != null) { + groupedLeaves.add(new LeafSlice(group)); + } + return groupedLeaves.toArray(LeafSlice.EMPTY_ARRAY); + } - final List<List<LeafReaderContext>> groupedLeaves = new ArrayList<>(); - long docSum = 0; - List<LeafReaderContext> group = null; + private static LeafSlice[] slicesWithPartitionedSegments( + int maxDocsPerSlice, int maxSegmentsPerSlice, LeafReaderContext[] sortedLeaves) { + final List<List<LeafReaderContextPartition>> groupedLeafPartitions = new ArrayList<>(); + int currentSliceNumDocs = 0; + List<LeafReaderContextPartition> group = null; for (LeafReaderContext ctx : sortedLeaves) { if (ctx.reader().maxDoc() > maxDocsPerSlice) { assert group == null; - groupedLeaves.add(Collections.singletonList(ctx)); + // if the segment does not fit in a single slice, we split it into maximum 5 partitions of + // equal size + int numSlices = Math.min(5, Math.ceilDiv(ctx.reader().maxDoc(), maxDocsPerSlice)); + int numDocs = ctx.reader().maxDoc() / numSlices; + int maxDocId = numDocs; + int minDocId = 0; + for (int i = 0; i < numSlices - 1; i++) { + groupedLeafPartitions.add( + Collections.singletonList( + LeafReaderContextPartition.createFromAndTo(ctx, minDocId, maxDocId))); + minDocId = maxDocId; + maxDocId += numDocs; + } + // the last slice gets all the remaining docs + groupedLeafPartitions.add( + Collections.singletonList( + LeafReaderContextPartition.createFromAndTo(ctx, minDocId, ctx.reader().maxDoc()))); } else { if (group == null) { group = new ArrayList<>(); - group.add(ctx); - - groupedLeaves.add(group); - } else { - group.add(ctx); + groupedLeafPartitions.add(group); } - - docSum += ctx.reader().maxDoc(); - if (group.size() >= maxSegmentsPerSlice || docSum > maxDocsPerSlice) { + group.add(LeafReaderContextPartition.createForEntireSegment(ctx)); + + currentSliceNumDocs += ctx.reader().maxDoc(); + // We only split a segment when it does not fit entirely in a slice. We don't partition + // the + // segment that makes the current slice (which holds multiple segments) go over + // maxDocsPerSlice. This means that a slice either contains multiple entire segments, or a + // single partition of a segment. + if (group.size() >= maxSegmentsPerSlice || currentSliceNumDocs > maxDocsPerSlice) { group = null; - docSum = 0; + currentSliceNumDocs = 0; } } } - LeafSlice[] slices = new LeafSlice[groupedLeaves.size()]; + LeafSlice[] slices = new LeafSlice[groupedLeafPartitions.size()]; int upto = 0; - for (List<LeafReaderContext> currentLeaf : groupedLeaves) { - slices[upto] = - new LeafSlice( - new ArrayList<>( - currentLeaf.stream() - .map(LeafReaderContextPartition::createForEntireSegment) - .toList())); + for (List<LeafReaderContextPartition> currentGroup : groupedLeafPartitions) { + slices[upto] = new LeafSlice(currentGroup); Review Comment: Done in https://github.com/apache/lucene/pull/14336 sorry for delay on this one :) But after benchmarking a little more as of late, this is more valuable than ever for us. Especially for large segment count use-cases this logic tends to make up a non-trivial part of the total execution time of the query phase (as in up to O(10%)) without this fix (close to an order of magnitude cheaper with all the fixes in here in ES use cases seemingly ... probably due to instruction cache/code-size issues as it turned out now :)). -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org