rishabhmaurya commented on code in PR #13123: URL: https://github.com/apache/lucene/pull/13123#discussion_r1525325740
########## lucene/misc/src/java/org/apache/lucene/misc/index/BPIndexReorderer.java: ########## @@ -341,116 +344,94 @@ protected void compute() { */ private boolean shuffle( ForwardIndex forwardIndex, - IntsRef left, - IntsRef right, + IntsRef docIDs, + int midPoint, int[] leftDocFreqs, int[] rightDocFreqs, - float[] gains, + float[] biases, int iter) throws IOException { - assert left.ints == right.ints; - assert left.offset + left.length == right.offset; - // Computing gains is typically a bottleneck, because each iteration needs to iterate over all - // postings to recompute gains, and the total number of postings is usually one order of + // Computing biases is typically a bottleneck, because each iteration needs to iterate over + // all postings to recompute biases, and the total number of postings is usually one order of // magnitude or more larger than the number of docs. So we try to parallelize it. - ComputeGainsTask leftGainsTask = - new ComputeGainsTask( - left.ints, - gains, - left.offset, - left.offset + left.length, + new ComputeBiasTask( + docIDs.ints, + biases, + docIDs.offset, + docIDs.offset + docIDs.length, leftDocFreqs, rightDocFreqs, threadLocal, - depth); - ComputeGainsTask rightGainsTask = - new ComputeGainsTask( - right.ints, - gains, - right.offset, - right.offset + right.length, - rightDocFreqs, - leftDocFreqs, - threadLocal, - depth); - if (shouldFork(docIDs.length, docIDs.ints.length)) { - invokeAll(leftGainsTask, rightGainsTask); - } else { - leftGainsTask.compute(); - rightGainsTask.compute(); + depth) + .compute(); + + float maxLeftBias = Float.NEGATIVE_INFINITY; + for (int i = docIDs.offset; i < midPoint; ++i) { + maxLeftBias = Math.max(maxLeftBias, biases[i]); + } + float minRightBias = Float.POSITIVE_INFINITY; + for (int i = midPoint, end = docIDs.offset + docIDs.length; i < end; ++i) { + minRightBias = Math.min(minRightBias, biases[i]); + } + float gain = maxLeftBias - minRightBias; + // This uses the simulated annealing proposed by Mackenzie et al in "Tradeoff Options for + // Bipartite Graph Partitioning" by comparing the gain of swapping the doc from the left side + // that is most attracted to the right and the doc from the right side that is most attracted + // to the left against `iter` rather than zero. + if (gain <= iter) { + return false; } - class ByDescendingGainSorter extends IntroSorter { + new IntroSelector() { int pivotDoc; - float pivotGain; + float pivotBias; @Override protected void setPivot(int i) { - pivotDoc = left.ints[i]; - pivotGain = gains[i]; + pivotDoc = docIDs.ints[i]; + pivotBias = biases[i]; } @Override protected int comparePivot(int j) { - // Compare in reverse order to get a descending sort - int cmp = Float.compare(gains[j], pivotGain); + int cmp = Float.compare(pivotBias, biases[j]); Review Comment: @jpountz done https://github.com/apache/lucene/pull/13186. I wasn't sure if its worthy a PR, thus started discussion here. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org