HoustonPutman commented on code in PR #13914: URL: https://github.com/apache/lucene/pull/13914#discussion_r1958610123
########## lucene/facet/src/java/org/apache/lucene/facet/range/DynamicRangeUtil.java: ########## @@ -202,66 +208,83 @@ public SegmentOutput(int hitsLength) { * is used to compute the equi-weight per bin. */ public static List<DynamicRangeInfo> computeDynamicNumericRanges( - long[] values, long[] weights, int len, long totalWeight, int topN) { + long[] values, long[] weights, int len, long totalValue, long totalWeight, int topN) { assert values.length == weights.length && len <= values.length && len >= 0; assert topN >= 0; List<DynamicRangeInfo> dynamicRangeResult = new ArrayList<>(); if (len == 0 || topN == 0) { return dynamicRangeResult; } - new InPlaceMergeSorter() { - @Override - protected int compare(int index1, int index2) { - int cmp = Long.compare(values[index1], values[index2]); - if (cmp == 0) { - // If the values are equal, sort based on the weights. - // Any weight order is correct as long as it's deterministic. - return Long.compare(weights[index1], weights[index2]); - } - return cmp; - } + double rangeWeightTarget = (double) totalWeight / topN; + double[] kWeights = new double[topN]; + for (int i = 0; i < topN; i++) { + kWeights[i] = (i == 0 ? 0 : kWeights[i - 1]) + rangeWeightTarget; + } - @Override - protected void swap(int index1, int index2) { - long tmp = values[index1]; - values[index1] = values[index2]; - values[index2] = tmp; - tmp = weights[index1]; - weights[index1] = weights[index2]; - weights[index2] = tmp; - } - }.sort(0, len); + WeightedSelector.WeightRangeInfo[] kIndexResults = + new WeightedSelector() { + private long pivotValue; + private long pivotWeight; - long accuWeight = 0; - long valueSum = 0; - int count = 0; - int minIdx = 0; + @Override + protected long getWeight(int i) { + return weights[i]; + } - double rangeWeightTarget = (double) totalWeight / Math.min(topN, len); + @Override + protected long getValue(int i) { + return values[i]; + } - for (int i = 0; i < len; i++) { - accuWeight += weights[i]; - valueSum += values[i]; - count++; + @Override + protected void swap(int index1, int index2) { + long tmp = values[index1]; + values[index1] = values[index2]; + values[index2] = tmp; + tmp = weights[index1]; + weights[index1] = weights[index2]; + weights[index2] = tmp; + } - if (accuWeight >= rangeWeightTarget) { + @Override + protected void setPivot(int i) { + pivotValue = values[i]; + pivotWeight = weights[i]; + } + + @Override + protected int comparePivot(int j) { + int cmp = Long.compare(pivotValue, values[j]); + if (cmp == 0) { + // If the values are equal, sort based on the weights. + // Any weight order is correct as long as it's deterministic. + return Long.compare(pivotWeight, weights[j]); + } + return cmp; + } + }.select(0, len, totalValue, 0, totalWeight, 0, kWeights); + + int lastIdx = -1; + long lastTotalValue = 0; + long lastTotalWeight = 0; + for (int kIdx = 0; kIdx < topN; kIdx++) { Review Comment: Sounds good! It's pretty trivial to switch over to the other implementation, so happy to test that out when the benchmark is available! -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org