zacharymorn commented on a change in pull request #240: URL: https://github.com/apache/lucene/pull/240#discussion_r691795187
########## File path: lucene/core/src/java/org/apache/lucene/search/TopFieldCollectorManager.java ########## @@ -0,0 +1,141 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.search; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; + +/** + * Create a TopFieldCollectorManager which uses a shared hit counter to maintain number of hits and + * a shared {@link MaxScoreAccumulator} to propagate the minimum score across segments if the + * primary sort is by relevancy. + * + * <p>Note that a new collectorManager should be created for each search due to its internal states. + */ +public class TopFieldCollectorManager implements CollectorManager<TopFieldCollector, TopFieldDocs> { + private final Sort sort; + private final int numHits; + private final FieldDoc after; + private final HitsThresholdChecker hitsThresholdChecker; + private final MaxScoreAccumulator minScoreAcc; + private final List<TopFieldCollector> collectors; + + public TopFieldCollectorManager(Sort sort, int numHits, FieldDoc after, int totalHitsThreshold) { + if (totalHitsThreshold < 0) { + throw new IllegalArgumentException( + "totalHitsThreshold must be >= 0, got " + totalHitsThreshold); + } + + this.sort = sort; + this.numHits = numHits; + this.after = after; + /* + nocommit + Should the following two be passed in instead? Possible custom initialization based on executor status and slices? + On the other hand, in a single-threaded environment, shared HitsThresholdChecker and MaxScoreAccumulator should be fast without lock contention anyway? Review comment: Hi @jpountz, for now I've implemented the above approach in https://github.com/apache/lucene/pull/240/commits/4af77405fcb914d497d82eb60f0011f93079ec7b, and the lucenenutil benchmark shows there's no obvious impact to performance now: ``` TaskQPS baseline StdDevQPS my_modified_version StdDev Pct diff p-value MedSpanNear 6.59 (5.6%) 6.30 (10.4%) -4.5% ( -19% - 12%) 0.091 BrowseDayOfYearTaxoFacets 0.45 (6.5%) 0.43 (10.0%) -4.3% ( -19% - 13%) 0.106 OrNotHighLow 321.23 (8.0%) 308.14 (7.5%) -4.1% ( -18% - 12%) 0.097 HighSpanNear 2.96 (6.4%) 2.84 (9.5%) -3.9% ( -18% - 12%) 0.129 BrowseDateTaxoFacets 0.45 (6.9%) 0.43 (9.6%) -3.9% ( -19% - 13%) 0.142 HighIntervalsOrdered 4.12 (4.7%) 3.97 (8.7%) -3.6% ( -16% - 10%) 0.102 BrowseMonthSSDVFacets 1.68 (6.6%) 1.63 (11.7%) -3.5% ( -20% - 15%) 0.239 HighSloppyPhrase 6.41 (8.6%) 6.18 (10.0%) -3.5% ( -20% - 16%) 0.233 IntNRQ 14.11 (6.2%) 13.62 (11.2%) -3.5% ( -19% - 14%) 0.225 Wildcard 14.90 (7.5%) 14.39 (8.2%) -3.4% ( -17% - 13%) 0.170 LowSpanNear 4.53 (5.7%) 4.38 (8.1%) -3.3% ( -16% - 11%) 0.135 OrHighLow 164.11 (6.2%) 158.84 (8.5%) -3.2% ( -16% - 12%) 0.174 BrowseMonthTaxoFacets 0.47 (6.1%) 0.45 (9.2%) -3.2% ( -17% - 12%) 0.193 LowIntervalsOrdered 3.02 (5.5%) 2.93 (9.1%) -3.2% ( -16% - 12%) 0.185 LowSloppyPhrase 8.80 (8.7%) 8.53 (9.7%) -3.0% ( -19% - 16%) 0.298 MedIntervalsOrdered 6.46 (5.8%) 6.26 (10.1%) -3.0% ( -17% - 13%) 0.248 AndHighMed 41.11 (6.6%) 39.91 (8.0%) -2.9% ( -16% - 12%) 0.205 Fuzzy1 30.54 (13.2%) 29.75 (14.1%) -2.6% ( -26% - 28%) 0.550 BrowseDayOfYearSSDVFacets 1.46 (6.1%) 1.43 (10.6%) -2.6% ( -18% - 15%) 0.350 HighPhrase 104.16 (5.5%) 101.53 (8.9%) -2.5% ( -16% - 12%) 0.284 OrHighNotLow 394.47 (6.8%) 385.27 (8.3%) -2.3% ( -16% - 13%) 0.329 Fuzzy2 25.00 (8.9%) 24.42 (9.8%) -2.3% ( -19% - 18%) 0.440 MedSloppyPhrase 9.24 (8.3%) 9.03 (9.5%) -2.3% ( -18% - 16%) 0.421 OrHighHigh 4.58 (5.9%) 4.47 (7.7%) -2.2% ( -14% - 12%) 0.304 OrNotHighMed 326.95 (6.6%) 319.95 (7.1%) -2.1% ( -14% - 12%) 0.322 LowPhrase 128.75 (5.9%) 126.06 (8.9%) -2.1% ( -15% - 13%) 0.382 MedPhrase 34.71 (5.7%) 34.01 (8.0%) -2.0% ( -14% - 12%) 0.354 OrNotHighHigh 303.59 (6.7%) 297.55 (8.0%) -2.0% ( -15% - 13%) 0.394 OrHighNotMed 304.72 (5.9%) 299.56 (7.5%) -1.7% ( -14% - 12%) 0.429 Prefix3 62.22 (5.1%) 61.22 (7.5%) -1.6% ( -13% - 11%) 0.431 OrHighMed 32.41 (6.8%) 32.02 (6.8%) -1.2% ( -13% - 13%) 0.579 HighTermTitleBDVSort 24.77 (20.2%) 24.53 (18.1%) -1.0% ( -32% - 46%) 0.871 Respell 20.64 (5.3%) 20.47 (7.1%) -0.8% ( -12% - 12%) 0.682 AndHighHigh 23.51 (6.0%) 23.34 (6.7%) -0.7% ( -12% - 12%) 0.727 LowTerm 724.06 (7.1%) 719.45 (5.6%) -0.6% ( -12% - 12%) 0.751 AndHighLow 272.87 (5.9%) 271.24 (7.3%) -0.6% ( -13% - 13%) 0.777 PKLookup 90.10 (5.5%) 89.65 (8.8%) -0.5% ( -14% - 14%) 0.831 HighTerm 876.85 (7.3%) 875.67 (7.8%) -0.1% ( -14% - 16%) 0.955 OrHighNotHigh 291.95 (8.8%) 291.77 (8.8%) -0.1% ( -16% - 19%) 0.982 HighTermDayOfYearSort 37.60 (16.6%) 37.65 (17.9%) 0.1% ( -29% - 41%) 0.981 HighTermMonthSort 18.43 (18.6%) 18.50 (20.3%) 0.4% ( -32% - 48%) 0.954 TermDTSort 41.45 (15.4%) 41.99 (15.1%) 1.3% ( -25% - 37%) 0.786 MedTerm 700.62 (6.8%) 711.10 (6.0%) 1.5% ( -10% - 15%) 0.461 ``` Please let me know how this looks to you. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org