This is an automated email from the ASF dual-hosted git repository. aherbert pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/commons-numbers.git
The following commit(s) were added to refs/heads/master by this push: new 690ae987 NUMBERS-206: Add selection for int arrays 690ae987 is described below commit 690ae98726ebde34288a9a1dddf31633d821be63 Author: Alex Herbert <aherb...@apache.org> AuthorDate: Mon Jun 17 17:27:42 2024 +0100 NUMBERS-206: Add selection for int arrays --- .../apache/commons/numbers/arrays/QuickSelect.java | 1261 ++++++++++++++++++++ .../apache/commons/numbers/arrays/Selection.java | 76 ++ .../org/apache/commons/numbers/arrays/Sorting.java | 228 ++++ .../commons/numbers/arrays/SelectionTest.java | 817 +++++++++++++ .../apache/commons/numbers/arrays/SortingTest.java | 340 +++++- .../examples/jmh/arrays/SelectionPerformance.java | 120 +- .../checkstyle/checkstyle-suppressions.xml | 1 + 7 files changed, 2828 insertions(+), 15 deletions(-) diff --git a/commons-numbers-arrays/src/main/java/org/apache/commons/numbers/arrays/QuickSelect.java b/commons-numbers-arrays/src/main/java/org/apache/commons/numbers/arrays/QuickSelect.java index f0b90795..f5ca7e48 100644 --- a/commons-numbers-arrays/src/main/java/org/apache/commons/numbers/arrays/QuickSelect.java +++ b/commons-numbers-arrays/src/main/java/org/apache/commons/numbers/arrays/QuickSelect.java @@ -1464,6 +1464,1267 @@ final class QuickSelect { return lower; } + /** + * Partition the elements between {@code ka} and {@code kb} using a heap select + * algorithm. It is assumed {@code left <= ka <= kb <= right}. + * + * @param a Data array to use to find out the K<sup>th</sup> value. + * @param left Lower bound (inclusive). + * @param right Upper bound (inclusive). + * @param ka Lower index to select. + * @param kb Upper index to select. + */ + static void heapSelect(int[] a, int left, int right, int ka, int kb) { + if (right <= left) { + return; + } + // Use the smallest heap + if (kb - left < right - ka) { + heapSelectLeft(a, left, right, ka, kb); + } else { + heapSelectRight(a, left, right, ka, kb); + } + } + + /** + * Partition the elements between {@code ka} and {@code kb} using a heap select + * algorithm. It is assumed {@code left <= ka <= kb <= right}. + * + * <p>For best performance this should be called with {@code k} in the lower + * half of the range. + * + * @param a Data array to use to find out the K<sup>th</sup> value. + * @param left Lower bound (inclusive). + * @param right Upper bound (inclusive). + * @param ka Lower index to select. + * @param kb Upper index to select. + */ + static void heapSelectLeft(int[] a, int left, int right, int ka, int kb) { + // Create a max heap in-place in [left, k], rooted at a[left] = max + // |l|-max-heap-|k|--------------| + // Build the heap using Floyd's heap-construction algorithm for heap size n. + // Start at parent of the last element in the heap (k), + // i.e. start = parent(n-1) : parent(c) = floor((c - 1) / 2) : c = k - left + int end = kb + 1; + for (int p = left + ((kb - left - 1) >> 1); p >= left; p--) { + maxHeapSiftDown(a, a[p], p, left, end); + } + // Scan the remaining data and insert + // Mitigate worst case performance on descending data by backward sweep + int max = a[left]; + for (int i = right + 1; --i > kb;) { + final int v = a[i]; + if (v < max) { + a[i] = max; + maxHeapSiftDown(a, v, left, left, end); + max = a[left]; + } + } + // Partition [ka, kb] + // |l|-max-heap-|k|--------------| + // | <-swap-> | then sift down reduced size heap + // Avoid sifting heap of size 1 + final int last = Math.max(left, ka - 1); + while (--end > last) { + maxHeapSiftDown(a, a[end], left, left, end); + a[end] = max; + max = a[left]; + } + } + + /** + * Sift the element down the max heap. + * + * <p>Assumes {@code root <= p < end}, i.e. the max heap is above root. + * + * @param a Heap data. + * @param v Value to sift. + * @param p Start position. + * @param root Root of the heap. + * @param end End of the heap (exclusive). + */ + private static void maxHeapSiftDown(int[] a, int v, int p, int root, int end) { + // child2 = root + 2 * (parent - root) + 2 + // = 2 * parent - root + 2 + while (true) { + // Right child + int c = (p << 1) - root + 2; + if (c > end) { + // No left child + break; + } + // Use the left child if right doesn't exist, or it is greater + if (c == end || a[c] < a[c - 1]) { + --c; + } + if (v >= a[c]) { + // Parent greater than largest child - done + break; + } + // Swap and descend + a[p] = a[c]; + p = c; + } + a[p] = v; + } + + /** + * Partition the elements between {@code ka} and {@code kb} using a heap select + * algorithm. It is assumed {@code left <= ka <= kb <= right}. + * + * <p>For best performance this should be called with {@code k} in the upper + * half of the range. + * + * @param a Data array to use to find out the K<sup>th</sup> value. + * @param left Lower bound (inclusive). + * @param right Upper bound (inclusive). + * @param ka Lower index to select. + * @param kb Upper index to select. + */ + static void heapSelectRight(int[] a, int left, int right, int ka, int kb) { + // Create a min heap in-place in [k, right], rooted at a[right] = min + // |--------------|k|-min-heap-|r| + // Build the heap using Floyd's heap-construction algorithm for heap size n. + // Start at parent of the last element in the heap (k), + // i.e. start = parent(n-1) : parent(c) = floor((c - 1) / 2) : c = right - k + int end = ka - 1; + for (int p = right - ((right - ka - 1) >> 1); p <= right; p++) { + minHeapSiftDown(a, a[p], p, right, end); + } + // Scan the remaining data and insert + // Mitigate worst case performance on descending data by backward sweep + int min = a[right]; + for (int i = left - 1; ++i < ka;) { + final int v = a[i]; + if (v > min) { + a[i] = min; + minHeapSiftDown(a, v, right, right, end); + min = a[right]; + } + } + // Partition [ka, kb] + // |--------------|k|-min-heap-|r| + // | <-swap-> | then sift down reduced size heap + // Avoid sifting heap of size 1 + final int last = Math.min(right, kb + 1); + while (++end < last) { + minHeapSiftDown(a, a[end], right, right, end); + a[end] = min; + min = a[right]; + } + } + + /** + * Sift the element down the min heap. + * + * <p>Assumes {@code root >= p > end}, i.e. the max heap is below root. + * + * @param a Heap data. + * @param v Value to sift. + * @param p Start position. + * @param root Root of the heap. + * @param end End of the heap (exclusive). + */ + private static void minHeapSiftDown(int[] a, int v, int p, int root, int end) { + // child2 = root - 2 * (root - parent) - 2 + // = 2 * parent - root - 2 + while (true) { + // Right child + int c = (p << 1) - root - 2; + if (c < end) { + // No left child + break; + } + // Use the left child if right doesn't exist, or it is less + if (c == end || a[c] > a[c + 1]) { + ++c; + } + if (v <= a[c]) { + // Parent less than smallest child - done + break; + } + // Swap and descend + a[p] = a[c]; + p = c; + } + a[p] = v; + } + + /** + * Partition the elements between {@code ka} and {@code kb} using a sort select + * algorithm. It is assumed {@code left <= ka <= kb <= right}. + * + * @param a Data array to use to find out the K<sup>th</sup> value. + * @param left Lower bound (inclusive). + * @param right Upper bound (inclusive). + * @param ka Lower index to select. + * @param kb Upper index to select. + */ + static void sortSelect(int[] a, int left, int right, int ka, int kb) { + // Combine the test for right <= left with + // avoiding the overhead of sort select on tiny data. + if (right - left <= MIN_SORTSELECT_SIZE) { + Sorting.sort(a, left, right); + return; + } + // Sort the smallest side + if (kb - left < right - ka) { + sortSelectLeft(a, left, right, kb); + } else { + sortSelectRight(a, left, right, ka); + } + } + + /** + * Partition the minimum {@code n} elements below {@code k} where + * {@code n = k - left + 1}. Uses an insertion sort algorithm. + * + * <p>Works with any {@code k} in the range {@code left <= k <= right} + * and performs a full sort of the range below {@code k}. + * + * <p>For best performance this should be called with + * {@code k - left < right - k}, i.e. + * to partition a value in the lower half of the range. + * + * @param a Data array to use to find out the K<sup>th</sup> value. + * @param left Lower bound (inclusive). + * @param right Upper bound (inclusive). + * @param k Index to select. + */ + static void sortSelectLeft(int[] a, int left, int right, int k) { + // Sort + for (int i = left; ++i <= k;) { + final int v = a[i]; + // Move preceding higher elements above (if required) + if (v < a[i - 1]) { + int j = i; + while (--j >= left && v < a[j]) { + a[j + 1] = a[j]; + } + a[j + 1] = v; + } + } + // Scan the remaining data and insert + // Mitigate worst case performance on descending data by backward sweep + int m = a[k]; + for (int i = right + 1; --i > k;) { + final int v = a[i]; + if (v < m) { + a[i] = m; + int j = k; + while (--j >= left && v < a[j]) { + a[j + 1] = a[j]; + } + a[j + 1] = v; + m = a[k]; + } + } + } + + /** + * Partition the maximum {@code n} elements above {@code k} where + * {@code n = right - k + 1}. Uses an insertion sort algorithm. + * + * <p>Works with any {@code k} in the range {@code left <= k <= right} + * and can be used to perform a full sort of the range above {@code k}. + * + * <p>For best performance this should be called with + * {@code k - left > right - k}, i.e. + * to partition a value in the upper half of the range. + * + * @param a Data array to use to find out the K<sup>th</sup> value. + * @param left Lower bound (inclusive). + * @param right Upper bound (inclusive). + * @param k Index to select. + */ + static void sortSelectRight(int[] a, int left, int right, int k) { + // Sort + for (int i = right; --i >= k;) { + final int v = a[i]; + // Move succeeding lower elements below (if required) + if (v > a[i + 1]) { + int j = i; + while (++j <= right && v > a[j]) { + a[j - 1] = a[j]; + } + a[j - 1] = v; + } + } + // Scan the remaining data and insert + // Mitigate worst case performance on descending data by backward sweep + int m = a[k]; + for (int i = left - 1; ++i < k;) { + final int v = a[i]; + if (v > m) { + a[i] = m; + int j = k; + while (++j <= right && v > a[j]) { + a[j - 1] = a[j]; + } + a[j - 1] = v; + m = a[k]; + } + } + } + + /** + * Partition the array such that index {@code k} corresponds to its correctly + * sorted value in the equivalent fully sorted array. + * + * <p>Assumes {@code k} is a valid index into [left, right]. + * + * @param a Values. + * @param left Lower bound of data (inclusive). + * @param right Upper bound of data (inclusive). + * @param k Index. + */ + static void select(int[] a, int left, int right, int k) { + quickSelectAdaptive(a, left, right, k, k, new int[1], MODE_FR_SAMPLING); + } + + /** + * Partition the array such that indices {@code k} correspond to their correctly + * sorted value in the equivalent fully sorted array. + * + * <p>The count of the number of used indices is returned. If the keys are sorted in-place, + * the count is returned as a negative. + * + * @param a Values. + * @param left Lower bound of data (inclusive). + * @param right Upper bound of data (inclusive). + * @param k Indices (may be destructively modified). + * @param n Count of indices. + * @throws IndexOutOfBoundsException if any index {@code k} is not within the + * sub-range {@code [left, right]} + */ + static void select(int[] a, int left, int right, int[] k, int n) { + if (n == 1) { + quickSelectAdaptive(a, left, right, k[0], k[0], new int[1], MODE_FR_SAMPLING); + return; + } + + // Interval creation validates the indices are in [left, right] + final UpdatingInterval keys = IndexSupport.createUpdatingInterval(left, right, k, n); + + // Note: If the keys are not separated then they are effectively a single key. + // Any split of keys separated by the sort select size + // will be finished on the next iteration. + final int k1 = keys.left(); + final int kn = keys.right(); + if (kn - k1 < DP_SORTSELECT_SIZE) { + quickSelectAdaptive(a, left, right, k1, kn, new int[1], MODE_FR_SAMPLING); + } else { + // Dual-pivot mode with small range sort length configured using index density + dualPivotQuickSelect(a, left, right, keys, dualPivotFlags(left, right, k1, kn)); + } + } + + /** + * Partition the array such that indices {@code k} correspond to their + * correctly sorted value in the equivalent fully sorted array. + * + * <p>For all indices {@code [ka, kb]} and any index {@code i}: + * + * <pre>{@code + * data[i < ka] <= data[ka] <= data[kb] <= data[kb < i] + * }</pre> + * + * <p>This function accepts indices {@code [ka, kb]} that define the + * range of indices to partition. It is expected that the range is small. + * + * <p>The {@code flags} are used to control the sampling mode and adaption of + * the index within the sample. + * + * <p>Returns the bounds containing {@code [ka, kb]}. These may be lower/higher + * than the keys if equal values are present in the data. + * + * @param a Values. + * @param left Lower bound of data (inclusive, assumed to be strictly positive). + * @param right Upper bound of data (inclusive, assumed to be strictly positive). + * @param ka First key of interest. + * @param kb Last key of interest. + * @param bounds Upper bound of the range containing {@code [ka, kb]} (inclusive). + * @param flags Adaption flags. + * @return Lower bound of the range containing {@code [ka, kb]} (inclusive). + */ + static int quickSelectAdaptive(int[] a, int left, int right, int ka, int kb, + int[] bounds, int flags) { + int l = left; + int r = right; + int m = flags; + while (true) { + // Select when ka and kb are close to the same end + // |l|-----|ka|kkkkkkkk|kb|------|r| + if (Math.min(kb - l, r - ka) < LINEAR_SORTSELECT_SIZE) { + sortSelect(a, l, r, ka, kb); + bounds[0] = kb; + return ka; + } + + // Only target ka; kb is assumed to be close + int p0; + final int n = r - l; + // f in [0, 1] + final double f = (double) (ka - l) / n; + // Record the larger margin (start at 1/4) to create the estimated size. + // step L R + // far left 1/12 1/3 (use 1/4 + 1/32 + 1/64 ~ 0.328) + // left 1/6 1/4 + // middle 2/9 2/9 (use 1/4 - 1/32 ~ 0.219) + int margin = n >> 2; + if (m < MODE_SAMPLING && r - l > FR_SAMPLING_SIZE) { + // Floyd-Rivest sample step uses the same margins + p0 = sampleStep(a, l, r, ka, bounds); + if (f <= STEP_FAR_LEFT || f >= STEP_FAR_RIGHT) { + margin += (n >> 5) + (n >> 6); + } else if (f > STEP_LEFT && f < STEP_RIGHT) { + margin -= n >> 5; + } + } else if (f <= STEP_LEFT) { + if (f <= STEP_FAR_LEFT) { + margin += (n >> 5) + (n >> 6); + p0 = repeatedStepFarLeft(a, l, r, ka, bounds, m); + } else { + p0 = repeatedStepLeft(a, l, r, ka, bounds, m); + } + } else if (f >= STEP_RIGHT) { + if (f >= STEP_FAR_RIGHT) { + margin += (n >> 5) + (n >> 6); + p0 = repeatedStepFarRight(a, l, r, ka, bounds, m); + } else { + p0 = repeatedStepRight(a, l, r, ka, bounds, m); + } + } else { + margin -= n >> 5; + p0 = repeatedStep(a, l, r, ka, bounds, m); + } + + // Note: Here we expect [ka, kb] to be small and splitting is unlikely. + // p0 p1 + // |l|--|ka|kkkk|kb|--|P|-------------------|r| + // |l|----------------|P|--|ka|kkk|kb|------|r| + // |l|-----------|ka|k|P|k|kb|--------------|r| + final int p1 = bounds[0]; + if (kb < p0) { + // Entirely on left side + r = p0 - 1; + } else if (ka > p1) { + // Entirely on right side + l = p1 + 1; + } else { + // Pivot splits [ka, kb]. Expect ends to be close to the pivot and finish. + // Here we set the bounds for use after median-of-medians pivot selection. + // In the event there are many equal values this allows collecting those + // known to be equal together when moving around the medians sample. + if (kb > p1) { + sortSelectLeft(a, p1 + 1, r, kb); + bounds[0] = kb; + } + if (ka < p0) { + sortSelectRight(a, l, p0 - 1, ka); + p0 = ka; + } + return p0; + } + // Update mode based on target partition size + if (r - l > n - margin) { + m++; + } + } + } + + /** + * Partition an array slice around a pivot. Partitioning exchanges array elements such + * that all elements smaller than pivot are before it and all elements larger than + * pivot are after it. + * + * <p>Partitions a Floyd-Rivest sample around a pivot offset so that the input {@code k} will + * fall in the smaller partition when the entire range is partitioned. + * + * <p>Assumes the range {@code r - l} is large. + * + * @param a Data array. + * @param l Lower bound (inclusive). + * @param r Upper bound (inclusive). + * @param k Target index. + * @param upper Upper bound (inclusive) of the pivot range. + * @return Lower bound (inclusive) of the pivot range. + */ + private static int sampleStep(int[] a, int l, int r, int k, int[] upper) { + // Floyd-Rivest: use SELECT recursively on a sample of size S to get an estimate + // for the (k-l+1)-th smallest element into a[k], biased slightly so that the + // (k-l+1)-th element is expected to lie in the smaller set after partitioning. + final int n = r - l + 1; + final int ith = k - l + 1; + final double z = Math.log(n); + // sample size = 0.5 * n^(2/3) + final double s = 0.5 * Math.exp(0.6666666666666666 * z); + final double sd = 0.5 * Math.sqrt(z * s * (n - s) / n) * Integer.signum(ith - (n >> 1)); + final int ll = Math.max(l, (int) (k - ith * s / n + sd)); + final int rr = Math.min(r, (int) (k + (n - ith) * s / n + sd)); + // Sample recursion restarts from [ll, rr] + final int p = quickSelectAdaptive(a, ll, rr, k, k, upper, MODE_FR_SAMPLING); + return expandPartition(a, l, r, ll, rr, p, upper[0], upper); + } + + /** + * Partition an array slice around a pivot. Partitioning exchanges array elements such + * that all elements smaller than pivot are before it and all elements larger than + * pivot are after it. + * + * <p>Assumes the range {@code r - l >= 8}; the caller is responsible for selection on a smaller + * range. If using a 12th-tile for sampling then assumes {@code r - l >= 11}. + * + * <p>Uses the Chen and Dumitrescu repeated step median-of-medians-of-medians algorithm + * with the median of 3 then median of 3; the final sample is placed in the + * 5th 9th-tile; the pivot chosen from the sample is adaptive using the input {@code k}. + * + * <p>Given a pivot in the middle of the sample this has margins of 2/9 and 2/9. + * + * @param a Data array. + * @param l Lower bound (inclusive). + * @param r Upper bound (inclusive). + * @param k Target index. + * @param upper Upper bound (inclusive) of the pivot range. + * @param flags Control flags. + * @return Lower bound (inclusive) of the pivot range. + */ + private static int repeatedStep(int[] a, int l, int r, int k, int[] upper, int flags) { + // Adapted from Alexandrescu (2016), algorithm 8. + int fp; + int s; + int p; + if (flags <= MODE_SAMPLING) { + // Median into a 12th-tile + fp = (r - l + 1) / 12; + // Position the sample around the target k + s = k - mapDistance(k - l, l, r, fp); + p = k; + } else { + // i in tertile [3f':6f') + fp = (r - l + 1) / 9; + final int f3 = 3 * fp; + final int end = l + (f3 << 1); + for (int i = l + f3; i < end; i++) { + Sorting.sort3(a, i - f3, i, i + f3); + } + // 5th 9th-tile: [4f':5f') + s = l + (fp << 2); + // No adaption uses the middle to enforce strict margins + p = s + (flags == MODE_ADAPTION ? mapDistance(k - l, l, r, fp) : (fp >>> 1)); + } + final int e = s + fp - 1; + for (int i = s; i <= e; i++) { + Sorting.sort3(a, i - fp, i, i + fp); + } + p = quickSelectAdaptive(a, s, e, p, p, upper, MODE_FR_SAMPLING); + return expandPartition(a, l, r, s, e, p, upper[0], upper); + } + + /** + * Partition an array slice around a pivot. Partitioning exchanges array elements such + * that all elements smaller than pivot are before it and all elements larger than + * pivot are after it. + * + * <p>Assumes the range {@code r - l >= 11}; the caller is responsible for selection on a smaller + * range. + * + * <p>Uses the Chen and Dumitrescu repeated step median-of-medians-of-medians algorithm + * with the lower median of 4 then either median of 3 with the final sample placed in the + * 5th 12th-tile, or min of 3 with the final sample in the 4th 12th-tile; + * the pivot chosen from the sample is adaptive using the input {@code k}. + * + * <p>Given a pivot in the middle of the sample this has margins of 1/6 and 1/4. + * + * @param a Data array. + * @param l Lower bound (inclusive). + * @param r Upper bound (inclusive). + * @param k Target index. + * @param upper Upper bound (inclusive) of the pivot range. + * @param flags Control flags. + * @return Lower bound (inclusive) of the pivot range. + */ + private static int repeatedStepLeft(int[] a, int l, int r, int k, int[] upper, int flags) { + // Adapted from Alexandrescu (2016), algorithm 9. + int fp; + int s; + int p; + if (flags <= MODE_SAMPLING) { + // Median into a 12th-tile + fp = (r - l + 1) / 12; + // Position the sample around the target k + // Avoid bounds error due to rounding as (k-l)/(r-l) -> 1/12 + s = Math.max(k - mapDistance(k - l, l, r, fp), l + fp); + p = k; + } else { + // i in 2nd quartile + final int f = (r - l + 1) >> 2; + final int f2 = f + f; + final int end = l + f2; + for (int i = l + f; i < end; i++) { + Sorting.lowerMedian4(a, i - f, i, i + f, i + f2); + } + // i in 5th 12th-tile + fp = f / 3; + s = l + f + fp; + // No adaption uses the middle to enforce strict margins + p = s + (flags == MODE_ADAPTION ? mapDistance(k - l, l, r, fp) : (fp >>> 1)); + } + final int e = s + fp - 1; + for (int i = s; i <= e; i++) { + Sorting.sort3(a, i - fp, i, i + fp); + } + p = quickSelectAdaptive(a, s, e, p, p, upper, MODE_FR_SAMPLING); + return expandPartition(a, l, r, s, e, p, upper[0], upper); + } + + /** + * Partition an array slice around a pivot. Partitioning exchanges array elements such + * that all elements smaller than pivot are before it and all elements larger than + * pivot are after it. + * + * <p>Assumes the range {@code r - l >= 11}; the caller is responsible for selection on a smaller + * range. + * + * <p>Uses the Chen and Dumitrescu repeated step median-of-medians-of-medians algorithm + * with the upper median of 4 then either median of 3 with the final sample placed in the + * 8th 12th-tile, or max of 3 with the final sample in the 9th 12th-tile; + * the pivot chosen from the sample is adaptive using the input {@code k}. + * + * <p>Given a pivot in the middle of the sample this has margins of 1/4 and 1/6. + * + * @param a Data array. + * @param l Lower bound (inclusive). + * @param r Upper bound (inclusive). + * @param k Target index. + * @param upper Upper bound (inclusive) of the pivot range. + * @param flags Control flags. + * @return Lower bound (inclusive) of the pivot range. + */ + private static int repeatedStepRight(int[] a, int l, int r, int k, int[] upper, int flags) { + // Mirror image repeatedStepLeft using upper median into 3rd quartile + int fp; + int e; + int p; + if (flags <= MODE_SAMPLING) { + // Median into a 12th-tile + fp = (r - l + 1) / 12; + // Position the sample around the target k + // Avoid bounds error due to rounding as (r-k)/(r-l) -> 11/12 + e = Math.min(k + mapDistance(r - k, l, r, fp), r - fp); + p = k; + } else { + // i in 3rd quartile + final int f = (r - l + 1) >> 2; + final int f2 = f + f; + final int end = r - f2; + for (int i = r - f; i > end; i--) { + Sorting.upperMedian4(a, i - f2, i - f, i, i + f); + } + // i in 8th 12th-tile + fp = f / 3; + e = r - f - fp; + // No adaption uses the middle to enforce strict margins + p = e - (flags == MODE_ADAPTION ? mapDistance(r - k, l, r, fp) : (fp >>> 1)); + } + final int s = e - fp + 1; + for (int i = s; i <= e; i++) { + Sorting.sort3(a, i - fp, i, i + fp); + } + p = quickSelectAdaptive(a, s, e, p, p, upper, MODE_FR_SAMPLING); + return expandPartition(a, l, r, s, e, p, upper[0], upper); + } + + /** + * Partition an array slice around a pivot. Partitioning exchanges array elements such + * that all elements smaller than pivot are before it and all elements larger than + * pivot are after it. + * + * <p>Assumes the range {@code r - l >= 11}; the caller is responsible for selection on a smaller + * range. + * + * <p>Uses the Chen and Dumitrescu repeated step median-of-medians-of-medians algorithm + * with the minimum of 4 then median of 3; the final sample is placed in the + * 2nd 12th-tile; the pivot chosen from the sample is adaptive using the input {@code k}. + * + * <p>Given a pivot in the middle of the sample this has margins of 1/12 and 1/3. + * + * @param a Data array. + * @param l Lower bound (inclusive). + * @param r Upper bound (inclusive). + * @param k Target index. + * @param upper Upper bound (inclusive) of the pivot range. + * @param flags Control flags. + * @return Lower bound (inclusive) of the pivot range. + */ + private static int repeatedStepFarLeft(int[] a, int l, int r, int k, int[] upper, int flags) { + // Far step has been changed from the Alexandrescu (2016) step of lower-median-of-4, min-of-3 + // into the 4th 12th-tile to a min-of-4, median-of-3 into the 2nd 12th-tile. + // The differences are: + // - The upper margin when not sampling is 8/24 vs. 9/24; the lower margin remains at 1/12. + // - The position of the sample is closer to the expected location of k < |A| / 12. + // - Sampling mode uses a median-of-3 with adaptive k, matching the other step methods. + // A min-of-3 sample can create a pivot too small if used with adaption of k leaving + // k in the larger parition and a wasted iteration. + // - Adaption is adjusted to force use of the lower margin when not sampling. + int fp; + int s; + int p; + if (flags <= MODE_SAMPLING) { + // 2nd 12th-tile + fp = (r - l + 1) / 12; + s = l + fp; + // Use adaption + p = s + mapDistance(k - l, l, r, fp); + } else { + // i in 2nd quartile; min into i-f (1st quartile) + final int f = (r - l + 1) >> 2; + final int f2 = f + f; + final int end = l + f2; + for (int i = l + f; i < end; i++) { + if (a[i + f] < a[i - f]) { + final int u = a[i + f]; + a[i + f] = a[i - f]; + a[i - f] = u; + } + if (a[i + f2] < a[i]) { + final int v = a[i + f2]; + a[i + f2] = a[i]; + a[i] = v; + } + if (a[i] < a[i - f]) { + final int u = a[i]; + a[i] = a[i - f]; + a[i - f] = u; + } + } + // 2nd 12th-tile + fp = f / 3; + s = l + fp; + // Lower margin has 2(d+1) elements; d == (position in sample) - s + // Force k into the lower margin + p = s + ((k - l) >>> 1); + } + final int e = s + fp - 1; + for (int i = s; i <= e; i++) { + Sorting.sort3(a, i - fp, i, i + fp); + } + p = quickSelectAdaptive(a, s, e, p, p, upper, MODE_FR_SAMPLING); + return expandPartition(a, l, r, s, e, p, upper[0], upper); + } + + /** + * Partition an array slice around a pivot. Partitioning exchanges array elements such + * that all elements smaller than pivot are before it and all elements larger than + * pivot are after it. + * + * <p>Assumes the range {@code r - l >= 11}; the caller is responsible for selection on a smaller + * range. + * + * <p>Uses the Chen and Dumitrescu repeated step median-of-medians-of-medians algorithm + * with the maximum of 4 then median of 3; the final sample is placed in the + * 11th 12th-tile; the pivot chosen from the sample is adaptive using the input {@code k}. + * + * <p>Given a pivot in the middle of the sample this has margins of 1/3 and 1/12. + * + * @param a Data array. + * @param l Lower bound (inclusive). + * @param r Upper bound (inclusive). + * @param k Target index. + * @param upper Upper bound (inclusive) of the pivot range. + * @param flags Control flags. + * @return Lower bound (inclusive) of the pivot range. + */ + private static int repeatedStepFarRight(int[] a, int l, int r, int k, int[] upper, int flags) { + // Mirror image repeatedStepFarLeft + int fp; + int e; + int p; + if (flags <= MODE_SAMPLING) { + // 11th 12th-tile + fp = (r - l + 1) / 12; + e = r - fp; + // Use adaption + p = e - mapDistance(r - k, l, r, fp); + } else { + // i in 3rd quartile; max into i+f (4th quartile) + final int f = (r - l + 1) >> 2; + final int f2 = f + f; + final int end = r - f2; + for (int i = r - f; i > end; i--) { + if (a[i - f] > a[i + f]) { + final int u = a[i - f]; + a[i - f] = a[i + f]; + a[i + f] = u; + } + if (a[i - f2] > a[i]) { + final int v = a[i - f2]; + a[i - f2] = a[i]; + a[i] = v; + } + if (a[i] > a[i + f]) { + final int u = a[i]; + a[i] = a[i + f]; + a[i + f] = u; + } + } + // 11th 12th-tile + fp = f / 3; + e = r - fp; + // Upper margin has 2(d+1) elements; d == e - (position in sample) + // Force k into the upper margin + p = e - ((r - k) >>> 1); + } + final int s = e - fp + 1; + for (int i = s; i <= e; i++) { + Sorting.sort3(a, i - fp, i, i + fp); + } + p = quickSelectAdaptive(a, s, e, p, p, upper, MODE_FR_SAMPLING); + return expandPartition(a, l, r, s, e, p, upper[0], upper); + } + + /** + * Expand a partition around a single pivot. Partitioning exchanges array + * elements such that all elements smaller than pivot are before it and all + * elements larger than pivot are after it. The central region is already + * partitioned. + * + * <pre>{@code + * |l |s |p0 p1| e| r| + * | ??? | <P | ==P | >P | ??? | + * }</pre> + * + * <p>This requires that {@code start != end}. However it handles + * {@code left == start} and/or {@code end == right}. + * + * @param a Data array. + * @param left Lower bound (inclusive). + * @param right Upper bound (inclusive). + * @param start Start of the partition range (inclusive). + * @param end End of the partitioned range (inclusive). + * @param pivot0 Lower pivot location (inclusive). + * @param pivot1 Upper pivot location (inclusive). + * @param upper Upper bound (inclusive) of the pivot range [k1]. + * @return Lower bound (inclusive) of the pivot range [k0]. + */ + // package-private for testing + static int expandPartition(int[] a, int left, int right, int start, int end, + int pivot0, int pivot1, int[] upper) { + // 3-way partition of the data using a pivot value into + // less-than, equal or greater-than. + // Based on Sedgewick's Bentley-McIroy partitioning: always swap i<->j then + // check for equal to the pivot and move again. + // + // Move sentinels from start and end to left and right. Scan towards the + // sentinels until >=,<=. Swap then move == to the pivot region. + // <-i j-> + // |l | | |p0 p1| | | r| + // |>=| ??? | < | == | > | ??? |<=| + // + // When either i or j reach the edge perform finishing loop. + // Finish loop for a[j] <= v replaces j with p1+1, optionally moves value + // to p0 for < and updates the pivot range p1 (and optionally p0): + // j-> + // |l |p0 p1| | | r| + // | < | == | > | ??? |<=| + + final int v = a[pivot0]; + // Use start/end as sentinels (requires start != end) + int vi = a[start]; + int vj = a[end]; + a[start] = a[left]; + a[end] = a[right]; + a[left] = vj; + a[right] = vi; + + int i = start + 1; + int j = end - 1; + + // Positioned for pre-in/decrement to write to pivot region + int p0 = pivot0 == start ? i : pivot0; + int p1 = pivot1 == end ? j : pivot1; + + while (true) { + do { + --i; + } while (a[i] < v); + do { + ++j; + } while (a[j] > v); + vj = a[i]; + vi = a[j]; + a[i] = vi; + a[j] = vj; + // Move the equal values to pivot region + if (vi == v) { + a[i] = a[--p0]; + a[p0] = v; + } + if (vj == v) { + a[j] = a[++p1]; + a[p1] = v; + } + // Termination check and finishing loops. + // Note: This works even if pivot region is zero length (p1 == p0-1 due to + // length 1 pivot region at either start/end) because we pre-inc/decrement + // one side and post-inc/decrement the other side. + if (i == left) { + while (j < right) { + do { + ++j; + } while (a[j] > v); + final int w = a[j]; + // Move upper bound of pivot region + a[j] = a[++p1]; + a[p1] = v; + // Move lower bound of pivot region + if (w != v) { + a[p0] = w; + p0++; + } + } + break; + } + if (j == right) { + while (i > left) { + do { + --i; + } while (a[i] < v); + final int w = a[i]; + // Move lower bound of pivot region + a[i] = a[--p0]; + a[p0] = v; + // Move upper bound of pivot region + if (w != v) { + a[p1] = w; + p1--; + } + } + break; + } + } + + upper[0] = p1; + return p0; + } + + /** + * Partition the array such that indices {@code k} correspond to their + * correctly sorted value in the equivalent fully sorted array. + * + * <p>For all indices {@code k} and any index {@code i}: + * + * <pre>{@code + * data[i < k] <= data[k] <= data[k < i] + * }</pre> + * + * <p>This function accepts a {@link UpdatingInterval} of indices {@code k} that define the + * range of indices to partition. The {@link UpdatingInterval} can be narrowed or split as + * partitioning divides the range. + * + * <p>Uses an introselect variant. The quickselect is a dual-pivot quicksort + * partition method by Vladimir Yaroslavskiy; the fall-back on poor convergence of + * the quickselect is a heapselect. + * + * <p>The {@code flags} contain the the current recursion count and the configured + * length threshold for {@code r - l} to perform sort select. The count is in the upper + * bits and the threshold is in the lower bits. + * + * @param a Values. + * @param left Lower bound of data (inclusive, assumed to be strictly positive). + * @param right Upper bound of data (inclusive, assumed to be strictly positive). + * @param k Interval of indices to partition (ordered). + * @param flags Control flags. + */ + // package-private for testing + static void dualPivotQuickSelect(int[] a, int left, int right, UpdatingInterval k, int flags) { + // If partitioning splits the interval then recursion is used for the left-most side(s) + // and the right-most side remains within this function. If partitioning does + // not split the interval then it remains within this function. + int l = left; + int r = right; + int f = flags; + int ka = k.left(); + int kb = k.right(); + final int[] upper = {0, 0, 0}; + while (true) { + // Select when ka and kb are close to the same end, + // or the entire range is small + // |l|-----|ka|--------|kb|------|r| + final int n = r - l; + if (Math.min(kb - l, r - ka) < DP_SORTSELECT_SIZE || + n < (f & SORTSELECT_MASK)) { + sortSelect(a, l, r, ka, kb); + return; + } + if (kb - ka < DP_SORTSELECT_SIZE) { + // Switch to single-pivot mode with Floyd-Rivest sub-sampling + quickSelectAdaptive(a, l, r, ka, kb, upper, MODE_FR_SAMPLING); + return; + } + if (f < 0) { + // Excess recursion, switch to heap select + heapSelect(a, l, r, ka, kb); + return; + } + + // Dual-pivot partitioning + final int p0 = partition(a, l, r, upper); + final int p1 = upper[0]; + + // Recursion to max depth + // Note: Here we possibly branch left, middle and right with multiple keys. + // It is possible that the partition has split the keys + // and the recursion proceeds with a reduced set in each region. + // p0 p1 p2 p3 + // |l|--|ka|--k----k--|P|------k--|kb|----|P|----|r| + // kb | ka + f += RECURSION_INCREMENT; + // Recurse left side if required + if (ka < p0) { + if (kb <= p1) { + // Entirely on left side + r = p0 - 1; + if (r < kb) { + kb = k.updateRight(r); + } + continue; + } + dualPivotQuickSelect(a, l, p0 - 1, k.splitLeft(p0, p1), f); + // Here we must process middle and/or right + ka = k.left(); + } else if (kb <= p1) { + // No middle/right side + return; + } else if (ka <= p1) { + // Advance lower bound + ka = k.updateLeft(p1 + 1); + } + // Recurse middle if required + final int p2 = upper[1]; + final int p3 = upper[2]; + if (ka < p2) { + l = p1 + 1; + if (kb <= p3) { + // Entirely in middle + r = p2 - 1; + if (r < kb) { + kb = k.updateRight(r); + } + continue; + } + dualPivotQuickSelect(a, l, p2 - 1, k.splitLeft(p2, p3), f); + ka = k.left(); + } else if (kb <= p3) { + // No right side + return; + } else if (ka <= p3) { + ka = k.updateLeft(p3 + 1); + } + // Continue right + l = p3 + 1; + } + } + + /** + * Partition an array slice around 2 pivots. Partitioning exchanges array elements + * such that all elements smaller than pivot are before it and all elements larger + * than pivot are after it. + * + * <p>This method returns 4 points describing the pivot ranges of equal values. + * + * <pre>{@code + * |k0 k1| |k2 k3| + * | <P | ==P1 | <P1 && <P2 | ==P2 | >P | + * }</pre> + * + * <ul> + * <li>k0: lower pivot1 point + * <li>k1: upper pivot1 point (inclusive) + * <li>k2: lower pivot2 point + * <li>k3: upper pivot2 point (inclusive) + * </ul> + * + * <p>Bounds are set so {@code i < k0}, {@code i > k3} and {@code k1 < i < k2} are + * unsorted. When the range {@code [k0, k3]} contains fully sorted elements the result + * is set to {@code k1 = k3; k2 == k0}. This can occur if + * {@code P1 == P2} or there are zero or one value between the pivots + * {@code P1 < v < P2}. Any sort/partition of ranges [left, k0-1], [k1+1, k2-1] and + * [k3+1, right] must check the length is {@code > 1}. + * + * @param a Data array. + * @param left Lower bound (inclusive). + * @param right Upper bound (inclusive). + * @param bounds Points [k1, k2, k3]. + * @return Lower bound (inclusive) of the pivot range [k0]. + */ + private static int partition(int[] a, int left, int right, int[] bounds) { + // Pick 2 pivots from 5 approximately uniform through the range. + // Spacing is ~ 1/7 made using shifts. Other strategies are equal or much + // worse. 1/7 = 5/35 ~ 1/8 + 1/64 : 0.1429 ~ 0.1406 + // Ensure the value is above zero to choose different points! + final int n = right - left; + final int step = 1 + (n >>> 3) + (n >>> 6); + final int i3 = left + (n >>> 1); + final int i2 = i3 - step; + final int i1 = i2 - step; + final int i4 = i3 + step; + final int i5 = i4 + step; + Sorting.sort5(a, i1, i2, i3, i4, i5); + + // Partition data using pivots P1 and P2 into less-than, greater-than or between. + // Pivot values P1 & P2 are placed at the end. If P1 < P2, P2 acts as a sentinel. + // k traverses the unknown region ??? and values moved if less-than or + // greater-than: + // + // left less k great right + // |P1| <P1 | P1 <= & <= P2 | ??? | >P2 |P2| + // + // <P1 (left, lt) + // P1 <= & <= P2 [lt, k) + // >P2 (gt, right) + // + // At the end pivots are swapped back to behind the less and great pointers. + // + // | <P1 |P1| P1<= & <= P2 |P2| >P2 | + + // Swap ends to the pivot locations. + final int v1 = a[i2]; + a[i2] = a[left]; + a[left] = v1; + final int v2 = a[i4]; + a[i4] = a[right]; + a[right] = v2; + + // pointers + int less = left; + int great = right; + + // Fast-forward ascending / descending runs to reduce swaps. + // Cannot overrun as end pivots (v1 <= v2) act as sentinels. + do { + ++less; + } while (a[less] < v1); + do { + --great; + } while (a[great] > v2); + + // a[less - 1] < P1 : a[great + 1] > P2 + // unvisited in [less, great] + SORTING: + for (int k = less - 1; ++k <= great;) { + final int v = a[k]; + if (v < v1) { + // swap(a, k, less++) + a[k] = a[less]; + a[less] = v; + less++; + } else if (v > v2) { + // while k < great and a[great] > v2: + // great-- + while (a[great] > v2) { + if (great-- == k) { + // Done + break SORTING; + } + } + // swap(a, k, great--) + // if a[k] < v1: + // swap(a, k, less++) + final int w = a[great]; + a[great] = v; + great--; + // delay a[k] = w + if (w < v1) { + a[k] = a[less]; + a[less] = w; + less++; + } else { + a[k] = w; + } + } + } + + // Change to inclusive ends : a[less] < P1 : a[great] > P2 + less--; + great++; + // Move the pivots to correct locations + a[left] = a[less]; + a[less] = v1; + a[right] = a[great]; + a[great] = v2; + + // Record the pivot locations + final int lower = less; + bounds[2] = great; + + // equal elements + // Original paper: If middle partition is bigger than a threshold + // then check for equal elements. + + // Note: This is extra work. When performing partitioning the region of interest + // may be entirely above or below the central region and this can be skipped. + + // Here we look for equal elements if the centre is more than 5/8 the length. + // 5/8 = 1/2 + 1/8. Pivots must be different. + if ((great - less) > (n >>> 1) + (n >>> 3) && v1 != v2) { + + // Fast-forward to reduce swaps. Changes inclusive ends to exclusive ends. + // Since v1 != v2 these act as sentinels to prevent overrun. + do { + ++less; + } while (a[less] == v1); + do { + --great; + } while (a[great] == v2); + + // This copies the logic in the sorting loop using == comparisons + EQUAL: + for (int k = less - 1; ++k <= great;) { + final int v = a[k]; + if (v == v1) { + a[k] = a[less]; + a[less] = v; + less++; + } else if (v == v2) { + while (a[great] == v2) { + if (great-- == k) { + // Done + break EQUAL; + } + } + final int w = a[great]; + a[great] = v; + great--; + if (w == v1) { + a[k] = a[less]; + a[less] = w; + less++; + } else { + a[k] = w; + } + } + } + + // Change to inclusive ends + less--; + great++; + } + + // Between pivots in (less, great) + if (v1 != v2 && less < great - 1) { + // Record the pivot end points + bounds[0] = less; + bounds[1] = great; + } else { + // No unsorted internal region (set k1 = k3; k2 = k0) + bounds[0] = bounds[2]; + bounds[1] = lower; + } + + return lower; + } + /** * Map the distance from the edge of {@code [l, r]} to a new distance in {@code [0, n)}. * diff --git a/commons-numbers-arrays/src/main/java/org/apache/commons/numbers/arrays/Selection.java b/commons-numbers-arrays/src/main/java/org/apache/commons/numbers/arrays/Selection.java index 60c7d73f..21c7dfcf 100644 --- a/commons-numbers-arrays/src/main/java/org/apache/commons/numbers/arrays/Selection.java +++ b/commons-numbers-arrays/src/main/java/org/apache/commons/numbers/arrays/Selection.java @@ -331,4 +331,80 @@ public final class Selection { } } } + + /** + * Partition the array such that index {@code k} corresponds to its correctly + * sorted value in the equivalent fully sorted array. + * + * @param a Values. + * @param k Index. + * @throws IndexOutOfBoundsException if index {@code k} is not within the + * sub-range {@code [0, a.length)} + */ + public static void select(int[] a, int k) { + IndexSupport.checkIndex(0, a.length, k); + if (a.length <= 1) { + return; + } + QuickSelect.select(a, 0, a.length - 1, k); + } + + /** + * Partition the array such that indices {@code k} correspond to their correctly + * sorted value in the equivalent fully sorted array. + * + * @param a Values. + * @param k Indices (may be destructively modified). + * @throws IndexOutOfBoundsException if any index {@code k} is not within the + * sub-range {@code [0, a.length)} + */ + public static void select(int[] a, int[] k) { + IndexSupport.checkIndices(0, a.length, k); + if (k.length == 0 || a.length <= 1) { + return; + } + QuickSelect.select(a, 0, a.length - 1, k, k.length); + } + + /** + * Partition the array such that index {@code k} corresponds to its correctly + * sorted value in the equivalent fully sorted array. + * + * @param a Values. + * @param fromIndex Index of the first element (inclusive). + * @param toIndex Index of the last element (exclusive). + * @param k Index. + * @throws IndexOutOfBoundsException if the sub-range {@code [fromIndex, toIndex)} is out of + * bounds of range {@code [0, a.length)}; or if index {@code k} is not within the + * sub-range {@code [fromIndex, toIndex)} + */ + public static void select(int[] a, int fromIndex, int toIndex, int k) { + IndexSupport.checkFromToIndex(fromIndex, toIndex, a.length); + IndexSupport.checkIndex(fromIndex, toIndex, k); + if (toIndex - fromIndex <= 1) { + return; + } + QuickSelect.select(a, fromIndex, toIndex - 1, k); + } + + /** + * Partition the array such that indices {@code k} correspond to their correctly + * sorted value in the equivalent fully sorted array. + * + * @param a Values. + * @param fromIndex Index of the first element (inclusive). + * @param toIndex Index of the last element (exclusive). + * @param k Indices (may be destructively modified). + * @throws IndexOutOfBoundsException if the sub-range {@code [fromIndex, toIndex)} is out of + * bounds of range {@code [0, a.length)}; or if any index {@code k} is not within the + * sub-range {@code [fromIndex, toIndex)} + */ + public static void select(int[] a, int fromIndex, int toIndex, int[] k) { + IndexSupport.checkFromToIndex(fromIndex, toIndex, a.length); + IndexSupport.checkIndices(fromIndex, toIndex, k); + if (k.length == 0 || toIndex - fromIndex <= 1) { + return; + } + QuickSelect.select(a, fromIndex, toIndex - 1, k, k.length); + } } diff --git a/commons-numbers-arrays/src/main/java/org/apache/commons/numbers/arrays/Sorting.java b/commons-numbers-arrays/src/main/java/org/apache/commons/numbers/arrays/Sorting.java index 114c7477..cf35c4a0 100644 --- a/commons-numbers-arrays/src/main/java/org/apache/commons/numbers/arrays/Sorting.java +++ b/commons-numbers-arrays/src/main/java/org/apache/commons/numbers/arrays/Sorting.java @@ -264,6 +264,234 @@ final class Sorting { } } + /** + * Sorts an array using an insertion sort. + * + * @param x Data array. + * @param left Lower bound (inclusive). + * @param right Upper bound (inclusive). + */ + static void sort(int[] x, int left, int right) { + for (int i = left; ++i <= right;) { + final int v = x[i]; + // Move preceding higher elements above (if required) + if (v < x[i - 1]) { + int j = i; + while (--j >= left && v < x[j]) { + x[j + 1] = x[j]; + } + x[j + 1] = v; + } + } + } + + /** + * Sorts the elements at the given distinct indices in an array. + * + * @param x Data array. + * @param a Index. + * @param b Index. + * @param c Index. + */ + static void sort3(int[] x, int a, int b, int c) { + // Decision tree avoiding swaps: + // Order [(0,2)] + // Move point 1 above point 2 or below point 0 + final int u = x[a]; + final int v = x[b]; + final int w = x[c]; + if (w < u) { + if (v < w) { + x[a] = v; + x[b] = w; + x[c] = u; + return; + } + if (u < v) { + x[a] = w; + x[b] = u; + x[c] = v; + return; + } + // w < v < u + x[a] = w; + x[c] = u; + return; + } + if (v < u) { + // v < u < w + x[a] = v; + x[b] = u; + return; + } + if (w < v) { + // u < w < v + x[b] = w; + x[c] = v; + } + // u < v < w + } + + /** + * Sorts the elements at the given distinct indices in an array. + * + * @param x Data array. + * @param a Index. + * @param b Index. + * @param c Index. + * @param d Index. + * @param e Index. + */ + static void sort5(int[] x, int a, int b, int c, int d, int e) { + // Uses an optimal sorting network from Knuth's Art of Computer Programming. + // 9 comparisons. + // Order pairs: + // [(0,3),(1,4)] + // [(0,2),(1,3)] + // [(0,1),(2,4)] + // [(1,2),(3,4)] + // [(2,3)] + if (x[e] < x[b]) { + final int u = x[e]; + x[e] = x[b]; + x[b] = u; + } + if (x[d] < x[a]) { + final int v = x[d]; + x[d] = x[a]; + x[a] = v; + } + + if (x[d] < x[b]) { + final int u = x[d]; + x[d] = x[b]; + x[b] = u; + } + if (x[c] < x[a]) { + final int v = x[c]; + x[c] = x[a]; + x[a] = v; + } + + if (x[e] < x[c]) { + final int u = x[e]; + x[e] = x[c]; + x[c] = u; + } + if (x[b] < x[a]) { + final int v = x[b]; + x[b] = x[a]; + x[a] = v; + } + + if (x[e] < x[d]) { + final int u = x[e]; + x[e] = x[d]; + x[d] = u; + } + if (x[c] < x[b]) { + final int v = x[c]; + x[c] = x[b]; + x[b] = v; + } + + if (x[d] < x[c]) { + final int u = x[d]; + x[d] = x[c]; + x[c] = u; + } + } + + /** + * Place the lower median of 4 elements in {@code b}; the smaller element in + * {@code a}; and the larger two elements in {@code c, d}. + * + * @param x Values + * @param a Index. + * @param b Index. + * @param c Index. + * @param d Index. + */ + static void lowerMedian4(int[] x, int a, int b, int c, int d) { + // 3 to 5 comparisons + if (x[d] < x[b]) { + final int u = x[d]; + x[d] = x[b]; + x[b] = u; + } + if (x[c] < x[a]) { + final int v = x[c]; + x[c] = x[a]; + x[a] = v; + } + // a--c + // b--d + if (x[c] < x[b]) { + final int u = x[c]; + x[c] = x[b]; + x[b] = u; + } else if (x[b] < x[a]) { + // a--c + // b--d + final int xb = x[a]; + x[a] = x[b]; + x[b] = xb; + // b--c + // a--d + if (x[d] < xb) { + x[b] = x[d]; + // Move a pair to maintain the sorted order + x[d] = x[c]; + x[c] = xb; + } + } + } + + /** + * Place the upper median of 4 elements in {@code c}; the smaller two elements in + * {@code a,b}; and the larger element in {@code d}. + * + * @param x Values + * @param a Index. + * @param b Index. + * @param c Index. + * @param d Index. + */ + static void upperMedian4(int[] x, int a, int b, int c, int d) { + // 3 to 5 comparisons + if (x[d] < x[b]) { + final int u = x[d]; + x[d] = x[b]; + x[b] = u; + } + if (x[c] < x[a]) { + final int v = x[c]; + x[c] = x[a]; + x[a] = v; + } + // a--c + // b--d + if (x[b] > x[c]) { + final int u = x[c]; + x[c] = x[b]; + x[b] = u; + } else if (x[c] > x[d]) { + // a--c + // b--d + final int xc = x[d]; + x[d] = x[c]; + x[c] = xc; + // a--d + // b--c + if (x[a] > xc) { + x[c] = x[a]; + // Move a pair to maintain the sorted order + x[a] = x[b]; + x[b] = xc; + } + } + } + /** * Sort the unique indices in-place to the start of the array. The number of * unique indices is returned. diff --git a/commons-numbers-arrays/src/test/java/org/apache/commons/numbers/arrays/SelectionTest.java b/commons-numbers-arrays/src/test/java/org/apache/commons/numbers/arrays/SelectionTest.java index 43118b92..efc5d346 100644 --- a/commons-numbers-arrays/src/test/java/org/apache/commons/numbers/arrays/SelectionTest.java +++ b/commons-numbers-arrays/src/test/java/org/apache/commons/numbers/arrays/SelectionTest.java @@ -988,6 +988,823 @@ class SelectionTest { return builder.build(); } + /** + * Partition function. Used to test different implementations. + */ + private interface IntRangePartitionFunction { + /** + * Partition the array such that range of indices {@code [ka, kb]} correspond to + * their correctly sorted value in the equivalent fully sorted array. For all + * indices {@code k} and any index {@code i}: + * + * <pre>{@code + * data[i < k] <= data[k] <= data[k < i] + * }</pre> + * + * @param a Data array to use to find out the K<sup>th</sup> value. + * @param left Lower bound (inclusive). + * @param right Upper bound (inclusive). + * @param ka Lower index to select. + * @param kb Upper index to select. + */ + void partition(int[] a, int left, int right, int ka, int kb); + } + + /** + * Partition function. Used to test different implementations. + */ + private interface IntPartitionFunction { + /** + * Partition the array such that indices {@code k} correspond to their correctly + * sorted value in the equivalent fully sorted array. For all indices {@code k} + * and any index {@code i}: + * + * <pre>{@code + * data[i < k] <= data[k] <= data[k < i] + * }</pre> + * + * <p>This method allows variable length indices using a count of the indices to + * process. + * + * @param a Values. + * @param k Indices. + * @param n Count of indices. + */ + void partition(int[] a, int[] k, int n); + } + + /** + * Return a sorted copy of the {@code values}. + * + * @param values Values. + * @return the copy + */ + private static int[] sort(int[] values) { + final int[] sorted = values.clone(); + Arrays.sort(sorted); + return sorted; + } + + /** + * Return a copy of the {@code values} sorted in the range {@code [from, to]}. + * + * @param values Values. + * @param from From (inclusive). + * @param to To (inclusive). + * @return the copy + */ + private static int[] sort(int[] values, int from, int to) { + final int[] sorted = values.clone(); + Arrays.sort(sorted, from, to + 1); + return sorted; + } + + /** + * Shuffles the entries of the given array. + * + * @param rng Source of randomness. + * @param array Array whose entries will be shuffled (in-place). + * @return Shuffled input array. + */ + // TODO - replace with Commons RNG 1.6: o.a.c.rng.sampling.ArraySampler + private static int[] shuffle(UniformRandomProvider rng, int[] array) { + for (int i = array.length; i > 1; i--) { + swap(array, i - 1, rng.nextInt(i)); + } + return array; + } + + /** + * Swaps the two specified elements in the array. + * + * @param array Array. + * @param i First index. + * @param j Second index. + */ + private static void swap(int[] array, int i, int j) { + final int tmp = array[i]; + array[i] = array[j]; + array[j] = tmp; + } + + @ParameterizedTest + @MethodSource(value = {"testIntHeapSelect", "testIntSelectMinMax", "testIntSelectMinMax2"}) + void testIntHeapSelectLeft(int[] values, int from, int to) { + final int[] sorted = sort(values, from, to); + + final int[] x = values.clone(); + final IntRangePartitionFunction fun = QuickSelect::heapSelectLeft; + + for (int k = from; k <= to; k++) { + assertPartitionRange(sorted, fun, x.clone(), from, to, k, k); + if (k > from) { + // Sort an extra 1 + assertPartitionRange(sorted, fun, x.clone(), from, to, k - 1, k); + if (k > from + 1) { + // Sort all + // Test clipping with k < from + assertPartitionRange(sorted, fun, x.clone(), from, to, from - 23, k); + } + } + } + } + + @ParameterizedTest + @MethodSource(value = {"testIntHeapSelect", "testIntSelectMinMax", "testIntSelectMinMax2"}) + void testIntHeapSelectRight(int[] values, int from, int to) { + final int[] sorted = sort(values, from, to); + + final int[] x = values.clone(); + final IntRangePartitionFunction fun = QuickSelect::heapSelectRight; + + for (int k = from; k <= to; k++) { + assertPartitionRange(sorted, fun, x.clone(), from, to, k, k); + if (k < to) { + // Sort an extra 1 + assertPartitionRange(sorted, fun, x.clone(), from, to, k, k + 1); + if (k < to - 1) { + // Sort all + // Test clipping with k > to + assertPartitionRange(sorted, fun, x.clone(), from, to, k, to + 23); + } + } + } + } + + static Stream<Arguments> testIntHeapSelect() { + final Stream.Builder<Arguments> builder = Stream.builder(); + builder.add(Arguments.of(new int[] {1}, 0, 0)); + builder.add(Arguments.of(new int[] {3, 2, 1}, 1, 1)); + builder.add(Arguments.of(new int[] {2, 1}, 0, 1)); + builder.add(Arguments.of(new int[] {4, 3, 2, 1}, 1, 2)); + builder.add(Arguments.of(new int[] {-2, 0, -1, -1, 2}, 0, 4)); + builder.add(Arguments.of(new int[] {-2, 0, -1, -1, 2}, 0, 2)); + builder.add(Arguments.of(new int[] {2, 0, -1, -1, -2}, 0, 4)); + builder.add(Arguments.of(new int[] {-1, 2, -3, 4, -4, 3, -2, 1}, 1, 6)); + return builder.build(); + } + + @ParameterizedTest + @MethodSource(value = {"testIntHeapSelectRange"}) + void testIntHeapSelectRange(int[] values, int from, int to, int k1, int k2) { + assertPartitionRange(sort(values, from, to), + QuickSelect::heapSelect, values, from, to, k1, k2); + } + + static Stream<Arguments> testIntHeapSelectRange() { + final Stream.Builder<Arguments> builder = Stream.builder(); + builder.add(Arguments.of(new int[] {-1, 2, -3, 4, -4, 3, -2, 1}, 0, 7, 1, 2)); + builder.add(Arguments.of(new int[] {-1, 2, -3, 4, -4, 3, -2, 1}, 0, 7, 2, 2)); + builder.add(Arguments.of(new int[] {-1, 2, -3, 4, -4, 3, -2, 1}, 0, 7, 5, 7)); + builder.add(Arguments.of(new int[] {-1, 2, -3, 4, -4, 3, -2, 1}, 0, 7, 1, 6)); + builder.add(Arguments.of(new int[] {-1, 2, -3, 4, -4, 3, -2, 1}, 0, 7, 0, 3)); + builder.add(Arguments.of(new int[] {-1, 2, -3, 4, -4, 3, -2, 1}, 0, 7, 4, 7)); + return builder.build(); + } + + static Stream<Arguments> testIntSelectMinMax() { + final Stream.Builder<Arguments> builder = Stream.builder(); + builder.add(Arguments.of(new int[] {1, 2, 3, 4, 5}, 0, 4)); + builder.add(Arguments.of(new int[] {5, 4, 3, 2, 1}, 0, 4)); + final UniformRandomProvider rng = RandomSource.XO_SHI_RO_128_PP.create(); + for (final int size : new int[] {5, 10}) { + final int[] values = rng.ints(size).toArray(); + builder.add(Arguments.of(values.clone(), 0, size - 1)); + builder.add(Arguments.of(values.clone(), size >>> 1, size - 1)); + builder.add(Arguments.of(values.clone(), 1, size >>> 1)); + } + builder.add(Arguments.of(new int[] {-1, 0}, 0, 1)); + builder.add(Arguments.of(new int[] {0, -1}, 0, 1)); + builder.add(Arguments.of(new int[] {-1, -1}, 0, 1)); + builder.add(Arguments.of(new int[] {0, 0}, 0, 1)); + builder.add(Arguments.of(new int[] {0, -1, 0, -1}, 0, 3)); + builder.add(Arguments.of(new int[] {-1, 0, -1, 0}, 0, 3)); + builder.add(Arguments.of(new int[] {0, -1, -1, 0}, 0, 3)); + builder.add(Arguments.of(new int[] {-1, 0, 0, -1}, 0, 3)); + return builder.build(); + } + + static Stream<Arguments> testIntSelectMinMax2() { + final Stream.Builder<Arguments> builder = Stream.builder(); + final int[] values = {-1, 0, 2}; + final int x = -123; + final int y = 42; + for (final int a : values) { + for (final int b : values) { + builder.add(Arguments.of(new int[] {a, b}, 0, 1)); + builder.add(Arguments.of(new int[] {x, a, b, y}, 1, 2)); + for (final int c : values) { + builder.add(Arguments.of(new int[] {a, b, c}, 0, 2)); + builder.add(Arguments.of(new int[] {x, a, b, c, y}, 1, 3)); + for (final int d : values) { + builder.add(Arguments.of(new int[] {a, b, c, d}, 0, 3)); + builder.add(Arguments.of(new int[] {x, a, b, c, d, y}, 1, 4)); + } + } + } + } + builder.add(Arguments.of(new int[] {-1, -1, -1, 4, 3, 2, 1, y}, 3, 6)); + builder.add(Arguments.of(new int[] {1, 2, 3, 4, 5}, 0, 4)); + builder.add(Arguments.of(new int[] {5, 4, 3, 2, 1}, 0, 4)); + final UniformRandomProvider rng = RandomSource.XO_SHI_RO_128_PP.create(); + for (final int size : new int[] {5, 10}) { + final int[] a = rng.ints(size).toArray(); + builder.add(Arguments.of(a.clone(), 0, size - 1)); + builder.add(Arguments.of(a.clone(), size >>> 1, size - 1)); + builder.add(Arguments.of(a.clone(), 1, size >>> 1)); + } + builder.add(Arguments.of(new int[] {-0, 1}, 0, 1)); + builder.add(Arguments.of(new int[] {1, -0}, 0, 1)); + builder.add(Arguments.of(new int[] {-0, -0}, 0, 1)); + builder.add(Arguments.of(new int[] {1, 1}, 0, 1)); + builder.add(Arguments.of(new int[] {1, -0, 1, -0}, 0, 3)); + builder.add(Arguments.of(new int[] {-0, 1, -0, 1}, 0, 3)); + builder.add(Arguments.of(new int[] {1, -0, -0, 1}, 0, 3)); + builder.add(Arguments.of(new int[] {-0, 1, 1, -0}, 0, 3)); + return builder.build(); + } + + @ParameterizedTest + @MethodSource(value = {"testIntHeapSelect", "testIntSelectMinMax", "testIntSelectMinMax2"}) + void testIntSortSelectLeft(int[] values, int from, int to) { + final int[] sorted = sort(values, from, to); + + final int[] x = values.clone(); + final IntRangePartitionFunction fun = (a, l, r, ka, kb) -> + QuickSelect.sortSelectLeft(a, l, r, kb); + + for (int k = from; k <= to; k++) { + assertPartitionRange(sorted, fun, x.clone(), from, to, from, k); + } + } + + @ParameterizedTest + @MethodSource(value = {"testIntHeapSelect", "testIntSelectMinMax", "testIntSelectMinMax2"}) + void testIntSortSelectRight(int[] values, int from, int to) { + final int[] sorted = sort(values, from, to); + + final int[] x = values.clone(); + final IntRangePartitionFunction fun = (a, l, r, ka, kb) -> + QuickSelect.sortSelectRight(a, l, r, ka); + + for (int k = from; k <= to; k++) { + assertPartitionRange(sorted, fun, x.clone(), from, to, k, to); + } + } + + @ParameterizedTest + @MethodSource(value = {"testIntHeapSelectRange"}) + void testIntSortSelectRange(int[] values, int from, int to, int k1, int k2) { + assertPartitionRange(sort(values, from, to), + QuickSelect::sortSelect, values, from, to, k1, k2); + } + + /** + * Assert the function correctly partitions the range. + * + * @param sorted Expected sort result. + * @param fun Partition function. + * @param values Values. + * @param from From (inclusive). + * @param to To (inclusive). + * @param ka Lower index to select. + * @param kb Upper index to select. + */ + private static void assertPartitionRange(int[] sorted, + IntRangePartitionFunction fun, + int[] values, int from, int to, int ka, int kb) { + Arrays.sort(sorted, from, to + 1); + fun.partition(values, from, to, ka, kb); + // Clip + ka = ka < from ? from : ka; + kb = kb > to ? to : kb; + for (int i = ka; i <= kb; i++) { + final int index = i; + Assertions.assertEquals(sorted[i], values[i], () -> "index: " + index); + } + // Check the data is the same + Arrays.sort(values, from, to + 1); + Assertions.assertArrayEquals(sorted, values, "Data destroyed"); + } + + @ParameterizedTest + @MethodSource + void testIntSelectThrows(int[] values, int[] indices, int from, int to) { + final int[] x = values.clone(); + final int[] k = indices.clone(); + if (from == IGNORE_FROM) { + Assertions.assertThrows(IndexOutOfBoundsException.class, () -> Selection.select(values, indices)); + } else { + Assertions.assertThrows(IndexOutOfBoundsException.class, () -> Selection.select(values, from, to, indices)); + } + Assertions.assertArrayEquals(x, values, "Data modified"); + Assertions.assertArrayEquals(k, indices, "Indices modified"); + if (k.length != 1) { + return; + } + if (from == IGNORE_FROM) { + Assertions.assertThrows(IndexOutOfBoundsException.class, () -> Selection.select(values, k[0])); + } else { + Assertions.assertThrows(IndexOutOfBoundsException.class, () -> Selection.select(values, from, to, k[0])); + } + Assertions.assertArrayEquals(x, values, "Data modified for single k"); + } + + static Stream<Arguments> testIntSelectThrows() { + final Stream.Builder<Arguments> builder = Stream.builder(); + final int[] a = {1, 2, 3, -123, 0, -1}; + // Invalid range + builder.add(Arguments.of(a.clone(), new int[] {0}, 0, a.length + 1)); + builder.add(Arguments.of(a.clone(), new int[] {0}, -1, a.length)); + builder.add(Arguments.of(a.clone(), new int[] {0}, 0, 0)); + builder.add(Arguments.of(a.clone(), new int[] {0}, a.length, 0)); + builder.add(Arguments.of(a.clone(), new int[] {1}, 3, 1)); + // Single k + // Full length + builder.add(Arguments.of(a.clone(), new int[] {-1}, IGNORE_FROM, 0)); + builder.add(Arguments.of(a.clone(), new int[] {10}, IGNORE_FROM, 0)); + // Range + builder.add(Arguments.of(a.clone(), new int[] {-1}, 0, 5)); + builder.add(Arguments.of(a.clone(), new int[] {1}, 2, 5)); + builder.add(Arguments.of(a.clone(), new int[] {10}, 2, 5)); + // Multiple k, some invalid + // Full length + builder.add(Arguments.of(a.clone(), new int[] {0, -1, 1, 2}, IGNORE_FROM, 0)); + builder.add(Arguments.of(a.clone(), new int[] {0, 2, 3, 10}, IGNORE_FROM, 0)); + // Range + builder.add(Arguments.of(a.clone(), new int[] {0, -1, 1, 2}, 0, 5)); + builder.add(Arguments.of(a.clone(), new int[] {2, 3, 1}, 2, 5)); + builder.add(Arguments.of(a.clone(), new int[] {2, 10, 3}, 2, 5)); + return builder.build(); + } + + @ParameterizedTest + @MethodSource(value = {"testIntPartition", "testIntPartitionBigData"}) + void testIntQuickSelectAdaptiveFRSampling(int[] values, int[] indices) { + assertQuickSelectAdaptive(values, indices, QuickSelect.MODE_FR_SAMPLING); + } + + @ParameterizedTest + @MethodSource(value = {"testIntPartition", "testIntPartitionBigData"}) + void testIntQuickSelectAdaptiveSampling(int[] values, int[] indices) { + assertQuickSelectAdaptive(values, indices, QuickSelect.MODE_SAMPLING); + } + + @ParameterizedTest + @MethodSource(value = {"testIntPartition", "testIntPartitionBigData"}) + void testIntQuickSelectAdaptiveAdaption(int[] values, int[] indices) { + assertQuickSelectAdaptive(values, indices, QuickSelect.MODE_ADAPTION); + } + + @ParameterizedTest + @MethodSource(value = {"testIntPartition", "testIntPartitionBigData"}) + void testIntQuickSelectAdaptiveStrict(int[] values, int[] indices) { + assertQuickSelectAdaptive(values, indices, QuickSelect.MODE_STRICT); + } + + private static void assertQuickSelectAdaptive(int[] values, int[] indices, int mode) { + Assumptions.assumeTrue(indices.length == 1 || + (indices.length == 2 && Math.abs(indices[1] - indices[0]) < 10)); + final int k1 = Math.min(indices[0], indices[indices.length - 1]); + final int kn = Math.max(indices[0], indices[indices.length - 1]); + assertPartition(values, indices, (a, k, n) -> + QuickSelect.quickSelectAdaptive(a, 0, a.length - 1, k1, kn, new int[1], mode), true); + } + + @ParameterizedTest + @MethodSource(value = {"testIntPartition", "testIntPartitionBigData"}) + void testIntDualPivotQuickSelectMaxRecursion(int[] values, int[] indices) { + assertPartition(values, indices, (a, k, n) -> { + final int right = a.length - 1; + if (right < 1 || k.length == 0) { + return; + } + QuickSelect.dualPivotQuickSelect(a, 0, right, + IndexSupport.createUpdatingInterval(0, right, k, k.length), + QuickSelect.dualPivotFlags(2, 5)); + }, false); + } + + @ParameterizedTest + @MethodSource(value = {"testIntPartition", "testIntPartitionBigData"}) + void testIntSelect(int[] values, int[] indices) { + assertPartition(values, indices, (a, k, n) -> { + int[] b = a; + if (n == 1) { + b = a.clone(); + Selection.select(b, k[0]); + } + Selection.select(a, Arrays.copyOf(k, n)); + if (n == 1) { + Assertions.assertArrayEquals(a, b, "single k mismatch"); + } + }, false); + } + + @ParameterizedTest + @MethodSource(value = {"testIntPartition", "testIntPartitionBigData"}) + void testIntSelectRange(int[] values, int[] indices) { + assertPartition(values, indices, (a, k, n) -> { + int[] b = a; + if (n == 1) { + b = a.clone(); + Selection.select(b, 0, b.length, k[0]); + } + Selection.select(a, 0, a.length, Arrays.copyOf(k, n)); + if (n == 1) { + Assertions.assertArrayEquals(a, b, "single k mismatch"); + } + }, false); + } + + static void assertPartition(int[] values, int[] indices, IntPartitionFunction function, + boolean sortedRange) { + final int[] data = values.clone(); + final int[] sorted = sort(values); + // Indices may be destructively modified + function.partition(data, indices.clone(), indices.length); + if (indices.length == 0) { + return; + } + for (final int k : indices) { + Assertions.assertEquals(sorted[k], data[k], () -> "k[" + k + "]"); + } + // Check partial ordering + Arrays.sort(indices); + int i = 0; + for (final int k : indices) { + final int value = sorted[k]; + while (i < k) { + final int j = i; + Assertions.assertTrue(Integer.compare(data[i], value) <= 0, + () -> j + " < " + k + " : " + data[j] + " < " + value); + i++; + } + } + final int k = indices[indices.length - 1]; + final int value = sorted[k]; + while (i < data.length) { + final int j = i; + Assertions.assertTrue(Integer.compare(data[i], value) >= 0, + () -> k + " < " + j); + i++; + } + if (sortedRange) { + final int[] a = Arrays.copyOfRange(sorted, indices[0], k + 1); + final int[] b = Arrays.copyOfRange(data, indices[0], k + 1); + Assertions.assertArrayEquals(a, b, "Entire range of indices is not sorted"); + } + Arrays.sort(data); + Assertions.assertArrayEquals(sorted, data, "Data destroyed"); + } + + static Stream<Arguments> testIntPartition() { + final Stream.Builder<Arguments> builder = Stream.builder(); + UniformRandomProvider rng = RandomSource.XO_SHI_RO_128_PP.create(123); + // Sizes above and below the threshold for partitioning. + // The largest size should trigger single-pivot sub-sampling for pivot selection. + for (final int size : new int[] {5, 47, SU + 10}) { + final int halfSize = size >>> 1; + final int from = -halfSize; + final int to = -halfSize + size; + final int[] values = IntStream.range(from, to).toArray(); + final int[] zeros = values.clone(); + final int quarterSize = size >>> 2; + Arrays.fill(zeros, quarterSize, halfSize + quarterSize, 0); + for (final int k : new int[] {1, 2, 3, size}) { + for (int i = 0; i < 15; i++) { + // Note: Duplicate indices do not matter + final int[] indices = rng.ints(k, 0, size).toArray(); + builder.add(Arguments.of( + shuffle(rng, values.clone()), + indices.clone())); + builder.add(Arguments.of( + shuffle(rng, zeros.clone()), + indices.clone())); + } + } + // Test sequential processing by creating potential ranges + // after an initial low point. This should be high enough + // so any range analysis that joins indices will leave the initial + // index as a single point. + final int limit = 50; + if (size > limit) { + for (int i = 0; i < 10; i++) { + final int[] indices = rng.ints(size - limit, limit, size).toArray(); + // This sets a low index + indices[rng.nextInt(indices.length)] = rng.nextInt(0, limit >>> 1); + builder.add(Arguments.of( + shuffle(rng, values.clone()), + indices.clone())); + } + } + // min; max; min/max + builder.add(Arguments.of(values.clone(), new int[] {0})); + builder.add(Arguments.of(values.clone(), new int[] {size - 1})); + builder.add(Arguments.of(values.clone(), new int[] {0, size - 1})); + builder.add(Arguments.of(zeros.clone(), new int[] {0})); + builder.add(Arguments.of(zeros.clone(), new int[] {size - 1})); + builder.add(Arguments.of(zeros.clone(), new int[] {0, size - 1})); + } + final int value = Integer.MIN_VALUE; + builder.add(Arguments.of(new int[] {}, new int[0])); + builder.add(Arguments.of(new int[] {value}, new int[] {0})); + builder.add(Arguments.of(new int[] {0, value}, new int[] {1})); + builder.add(Arguments.of(new int[] {value, value, value}, new int[] {2})); + builder.add(Arguments.of(new int[] {value, 0, 0, value}, new int[] {3})); + builder.add(Arguments.of(new int[] {value, 0, 0, value}, new int[] {1, 2})); + builder.add(Arguments.of(new int[] {value, 0, 1, 0, value}, new int[] {1, 3})); + builder.add(Arguments.of(new int[] {value, 0, 0}, new int[] {0, 2})); + builder.add(Arguments.of(new int[] {value, 123, 0, -456, 0, value}, new int[] {0, 1, 3})); + // Dual-pivot with a large middle region (> 5 / 8) requires equal elements loop + final int n = 128; + final int[] x = IntStream.range(0, n).toArray(); + // Put equal elements in the central region: + // 2/16 6/16 10/16 14/16 + // | <P1 | P1 | P1< & < P2 | P2 | >P2 | + final int sixteenth = n / 16; + final int i2 = 2 * sixteenth; + final int i6 = 6 * sixteenth; + final int p1 = x[i2]; + final int p2 = x[n - i2]; + // Lots of values equal to the pivots + Arrays.fill(x, i2, i6, p1); + Arrays.fill(x, n - i6, n - i2, p2); + // Equal value in between the pivots + Arrays.fill(x, i6, n - i6, (p1 + p2) / 2); + // Shuffle this and partition in the middle. + // Also partition with the pivots in P1 and P2 using thirds. + final int third = (int) (n / 3.0); + // Use a fix seed to ensure we hit coverage with only 5 loops. + rng = RandomSource.XO_SHI_RO_128_PP.create(-8111061151820577011L); + for (int i = 0; i < 5; i++) { + builder.add(Arguments.of(shuffle(rng, x.clone()), new int[] {n >> 1})); + builder.add(Arguments.of(shuffle(rng, x.clone()), + new int[] {third, 2 * third})); + } + // A single value smaller/greater than the pivot at the left/right/both ends + Arrays.fill(x, 1); + for (int i = 0; i <= 2; i++) { + for (int j = 0; j <= 2; j++) { + x[n - 1] = i; + x[0] = j; + builder.add(Arguments.of(x.clone(), new int[] {50})); + } + } + // Reverse data. Makes it simple to detect failed range selection. + final int[] a = IntStream.range(0, 50).toArray(); + for (int i = -1, j = a.length; ++i < --j;) { + final int v = a[i]; + a[i] = a[j]; + a[j] = v; + } + builder.add(Arguments.of(a, new int[] {1, 1})); + builder.add(Arguments.of(a, new int[] {1, 2})); + builder.add(Arguments.of(a, new int[] {10, 12})); + builder.add(Arguments.of(a, new int[] {10, 42})); + builder.add(Arguments.of(a, new int[] {1, 48})); + builder.add(Arguments.of(a, new int[] {48, 49})); + return builder.build(); + } + + static Stream<Arguments> testIntPartitionBigData() { + final Stream.Builder<Arguments> builder = Stream.builder(); + final UniformRandomProvider rng = RandomSource.XO_SHI_RO_128_PP.create(123); + // Sizes above the threshold (1200) for recursive partitioning + for (final int size : new int[] {1000, 5000, 10000}) { + final int[] a = IntStream.range(0, size).toArray(); + // With repeat elements + final int[] b = rng.ints(size, 0, size >> 3).toArray(); + for (int i = 0; i < 15; i++) { + builder.add(Arguments.of( + shuffle(rng, a.clone()), + new int[] {rng.nextInt(size)})); + builder.add(Arguments.of(b.clone(), + new int[] {rng.nextInt(size)})); + } + } + // Hit Floyd-Rivest sub-sampling conditions. + // Close to edge but outside edge select size. + final int n = 7000; + final int[] x = IntStream.range(0, n).toArray(); + builder.add(Arguments.of(x.clone(), new int[] {20})); + builder.add(Arguments.of(x.clone(), new int[] {n - 1 - 20})); + // Constant value when using FR partitioning + Arrays.fill(x, 123); + builder.add(Arguments.of(x, new int[] {x.length >>> 1})); + return builder.build(); + } + + @ParameterizedTest + @MethodSource + void testIntExpandPartition(int[] values, int start, int end, int pivot0, int pivot1) { + final int[] upper = new int[1]; + final int[] sorted = sort(values); + final int v = values[pivot0]; + final int p0 = QuickSelect.expandPartition(values, 0, values.length - 1, start, end, pivot0, pivot1, upper); + final int p1 = upper[0]; + for (int i = 0; i < p0; i++) { + final int index = i; + Assertions.assertTrue(values[i] < v, + () -> String.format("[%d] : %s < %s", index, values[index], v)); + } + for (int i = p0; i <= p1; i++) { + final int index = i; + Assertions.assertEquals(v, values[i], + () -> String.format("[%d] : %s == %s", index, values[index], v)); + } + for (int i = p1 + 1; i < values.length; i++) { + final int index = i; + Assertions.assertTrue(values[i] > v, + () -> String.format("[%d] : %s > %s", index, values[index], v)); + } + Arrays.sort(values); + Assertions.assertArrayEquals(sorted, values, "Data destroyed"); + } + + static Stream<Arguments> testIntExpandPartition() { + final Stream.Builder<Arguments> builder = Stream.builder(); + // Create data: + // |l |start |p0 p1| end| r| + // | ??? | < | == | > | ??? | + // Arguments: data, start, end, pivot0, pivot1 + + // Create the data with unique values 42 and 0 either side of + // [start, end] (e.g. region ???). These are permuted for -1 and 10 + // to create cases that may or not have to swap elements. + + // Single pivot + addExpandPartitionArguments(builder, new int[] {42, 1, 2, 3, 4, 0}, 1, 4, 2, 2); + // Pivot range + addExpandPartitionArguments(builder, new int[] {42, 1, 2, 2, 3, 0}, 1, 4, 2, 3); + // Single pivot at start/end + addExpandPartitionArguments(builder, new int[] {42, 1, 2, 3, 4, 0}, 1, 4, 1, 1); + addExpandPartitionArguments(builder, new int[] {42, 1, 2, 3, 4, 0}, 1, 4, 4, 4); + // Pivot range at start/end + addExpandPartitionArguments(builder, new int[] {42, 1, 1, 2, 3, 0}, 1, 4, 1, 2); + addExpandPartitionArguments(builder, new int[] {42, 1, 2, 3, 3, 0}, 1, 4, 3, 4); + addExpandPartitionArguments(builder, new int[] {42, 1, 2, 2, 2, 0}, 1, 4, 2, 4); + addExpandPartitionArguments(builder, new int[] {42, 1, 1, 1, 2, 0}, 1, 4, 1, 3); + addExpandPartitionArguments(builder, new int[] {42, 1, 1, 1, 1, 0}, 1, 4, 1, 4); + + // Single pivot at left/right + addExpandPartitionArguments(builder, new int[] {1, 2, 3, 4, 0}, 0, 3, 0, 0); + addExpandPartitionArguments(builder, new int[] {42, 1, 2, 3, 4}, 1, 4, 4, 4); + // Pivot range at left/right + addExpandPartitionArguments(builder, new int[] {1, 1, 2, 3, 4, 0}, 0, 4, 0, 1); + addExpandPartitionArguments(builder, new int[] {42, 1, 2, 3, 4, 4}, 1, 5, 4, 5); + addExpandPartitionArguments(builder, new int[] {1, 1, 1, 1, 2, 0}, 0, 4, 0, 3); + addExpandPartitionArguments(builder, new int[] {42, 3, 4, 4, 4, 4}, 1, 5, 2, 5); + addExpandPartitionArguments(builder, new int[] {1, 1, 1, 1, 1, 0}, 0, 4, 0, 4); + addExpandPartitionArguments(builder, new int[] {42, 4, 4, 4, 4, 4}, 1, 5, 1, 5); + + // Minimum range: [start, end] == length 2 + addExpandPartitionArguments(builder, new int[] {42, 1, 2, 0}, 1, 2, 1, 1); + addExpandPartitionArguments(builder, new int[] {42, 1, 2, 0}, 1, 2, 2, 2); + addExpandPartitionArguments(builder, new int[] {42, 1, 1, 0}, 1, 2, 1, 2); + addExpandPartitionArguments(builder, new int[] {42, 1, 2}, 1, 2, 1, 1); + addExpandPartitionArguments(builder, new int[] {42, 1, 2}, 1, 2, 2, 2); + addExpandPartitionArguments(builder, new int[] {42, 1, 1}, 1, 2, 1, 2); + addExpandPartitionArguments(builder, new int[] {1, 2, 0}, 0, 1, 0, 0); + addExpandPartitionArguments(builder, new int[] {1, 2, 0}, 0, 1, 1, 1); + addExpandPartitionArguments(builder, new int[] {1, 1, 0}, 0, 1, 0, 1); + addExpandPartitionArguments(builder, new int[] {1, 2}, 0, 1, 0, 0); + addExpandPartitionArguments(builder, new int[] {1, 2}, 0, 1, 1, 1); + addExpandPartitionArguments(builder, new int[] {1, 1}, 0, 1, 0, 1); + + return builder.build(); + } + + private static void addExpandPartitionArguments(Stream.Builder<Arguments> builder, + int[] a, int start, int end, int pivot0, int pivot1) { + builder.add(Arguments.of(a.clone(), start, end, pivot0, pivot1)); + final int[] b = a.clone(); + if (replace(a, 42, -1)) { + builder.add(Arguments.of(a.clone(), start, end, pivot0, pivot1)); + if (replace(a, 0, 10)) { + builder.add(Arguments.of(a, start, end, pivot0, pivot1)); + } + } + if (replace(b, 0, 10)) { + builder.add(Arguments.of(b, start, end, pivot0, pivot1)); + } + } + + private static boolean replace(int[] a, int x, int y) { + boolean updated = false; + for (int i = 0; i < a.length; i++) { + if (a[i] == x) { + a[i] = y; + updated = true; + } + } + return updated; + } + + @ParameterizedTest + @MethodSource(value = {"testIntSort"}) + void testIntSortUsingHeapSelect(int[] values) { + Assumptions.assumeTrue(values.length > 0); + assertSort(values, x -> { + final int right = x.length - 1; + // heapSelect is robust to right <= left + QuickSelect.heapSelect(x, 0, right, 0, right); + }); + } + + @ParameterizedTest + @MethodSource(value = {"testIntSort"}) + void testIntSortUsingHeapSelectLeft(int[] values) { + Assumptions.assumeTrue(values.length > 0); + assertSort(values, x -> { + final int right = x.length - 1; + if (right < 1) { + return; + } + QuickSelect.heapSelectLeft(x, 0, right, 0, right); + }); + } + + @ParameterizedTest + @MethodSource(value = {"testIntSort"}) + void testIntSortUsingHeapSelectRight(int[] values) { + Assumptions.assumeTrue(values.length > 0); + assertSort(values, x -> { + final int right = x.length - 1; + if (right < 1) { + return; + } + QuickSelect.heapSelectRight(x, 0, right, 0, right); + }); + } + + @ParameterizedTest + @MethodSource(value = {"testIntSort"}) + void testIntSortUsingSelection(int[] values) { + // This tests that the select function performs + // a full sort when the interval is saturated + assertSort(values, a -> { + final int right = a.length - 1; + if (right < 1) { + return; + } + QuickSelect.dualPivotQuickSelect(a, 0, right, new RangeInterval(0, right), + QuickSelect.dualPivotFlags(QuickSelect.dualPivotMaxDepth(right), 20)); + }); + } + + private static void assertSort(int[] values, Consumer<int[]> function) { + final int[] data = values.clone(); + final int[] sorted = sort(values); + function.accept(data); + Assertions.assertArrayEquals(sorted, data); + } + + static Stream<int[]> testIntSort() { + final Stream.Builder<int[]> builder = Stream.builder(); + final UniformRandomProvider rng = RandomSource.XO_SHI_RO_128_PP.create(123); + // Sizes above and below the threshold for partitioning + for (final int size : new int[] {5, 50}) { + int[] a = new int[size]; + Arrays.fill(a, 123); + builder.add(a.clone()); + for (int ii = 0; ii < size; ii++) { + a[ii] = ii; + } + builder.add(a.clone()); + for (int ii = 0; ii < size; ii++) { + a[ii] = size - ii; + } + builder.add(a.clone()); + for (int i = 0; i < 5; i++) { + a = rng.ints(size).toArray(); + builder.add(a.clone()); + final int j = rng.nextInt(size); + final int k = rng.nextInt(size); + a[j] = 0; + a[k] = 0; + builder.add(a.clone()); + for (int z = 0; z < size; z++) { + a[z] = rng.nextBoolean() ? 0 : 1; + } + builder.add(a.clone()); + a[j] = -rng.nextInt(); + a[k] = rng.nextInt(); + builder.add(a.clone()); + } + } + final int value = Integer.MIN_VALUE; + builder.add(new int[] {}); + builder.add(new int[] {value}); + builder.add(new int[] {0, value}); + builder.add(new int[] {value, value, value}); + builder.add(new int[] {value, 0, 0, value}); + builder.add(new int[] {value, 0, 0}); + builder.add(new int[] {value, 0, 1, 0}); + builder.add(new int[] {value, 123, 0, 456, 0, value}); + return builder.build(); + } + @Test void testDualPivotMaxDepth() { // Reasonable behaviour at small x diff --git a/commons-numbers-arrays/src/test/java/org/apache/commons/numbers/arrays/SortingTest.java b/commons-numbers-arrays/src/test/java/org/apache/commons/numbers/arrays/SortingTest.java index 310947a5..83bdd708 100644 --- a/commons-numbers-arrays/src/test/java/org/apache/commons/numbers/arrays/SortingTest.java +++ b/commons-numbers-arrays/src/test/java/org/apache/commons/numbers/arrays/SortingTest.java @@ -155,7 +155,7 @@ class SortingTest { } builder.add(a); for (int i = 0; i < 5; i++) { - builder.add(rng.doubles(size).toArray().clone()); + builder.add(rng.doubles(size).toArray()); } } return builder.build(); @@ -365,6 +365,334 @@ class SortingTest { return builder.build(); } + private static double[] extractIndices(double[] values, int[] indices) { + final double[] data = new double[indices.length]; + for (int i = 0; i < indices.length; i++) { + data[i] = values[indices[i]]; + } + return data; + } + + // int[] + + @ParameterizedTest + @MethodSource(value = {"testIntSort"}) + void testIntInsertionSort(int[] values) { + assertSort(values, x -> Sorting.sort(x, 0, x.length - 1)); + } + + @ParameterizedTest + @MethodSource(value = {"testIntSort", "testIntSort3"}) + void testIntSort3(int[] values) { + final int[] data = Arrays.copyOf(values, 3); + assertSort(data, x -> Sorting.sort3(x, 0, 1, 2)); + } + + @ParameterizedTest + @MethodSource(value = {"testIntSort", "testIntSort5"}) + void testIntSort5(int[] values) { + final int[] data = Arrays.copyOf(values, 5); + assertSort(data, x -> Sorting.sort5(x, 0, 1, 2, 3, 4)); + } + + @ParameterizedTest + @MethodSource(value = {"testIntSort3Internal"}) + void testIntSort3Internal(int[] values, int[] indices) { + final int a = indices[0]; + final int b = indices[1]; + final int c = indices[2]; + assertSortInternal(values, x -> Sorting.sort3(x, a, b, c), indices); + } + + @ParameterizedTest + @MethodSource(value = {"testIntSort5Internal"}) + void testIntSort5Internal(int[] values, int[] indices) { + final int a = indices[0]; + final int b = indices[1]; + final int c = indices[2]; + final int d = indices[3]; + final int e = indices[4]; + assertSortInternal(values, x -> Sorting.sort5(x, a, b, c, d, e), indices); + } + + /** + * Assert that the sort {@code function} computes the same result as + * {@link Arrays#sort(int[])}. + * + * @param values Data. + * @param function Sort function. + */ + private static void assertSort(int[] values, Consumer<int[]> function) { + final int[] expected = values.clone(); + Arrays.sort(expected); + final int[] actual = values.clone(); + function.accept(actual); + Assertions.assertArrayEquals(expected, actual, "Invalid sort"); + } + + /** + * Assert that the sort {@code function} computes the same result as + * {@link Arrays#sort(int[])} run on the provided {@code indices}. + * + * @param values Data. + * @param function Sort function. + * @param indices Indices. + */ + private static void assertSortInternal(int[] values, Consumer<int[]> function, int... indices) { + Assertions.assertFalse(containsDuplicates(indices), () -> "Duplicate indices: " + Arrays.toString(indices)); + // Pick out the data to sort + final int[] expected = extractIndices(values, indices); + Arrays.sort(expected); + final int[] data = values.clone(); + function.accept(data); + // Pick out the data that was sorted + final int[] actual = extractIndices(data, indices); + Assertions.assertArrayEquals(expected, actual, "Invalid sort"); + // Check outside the sorted indices + OUTSIDE: for (int i = 0; i < values.length; i++) { + for (final int ignore : indices) { + if (i == ignore) { + continue OUTSIDE; + } + } + Assertions.assertEquals(values[i], data[i]); + } + } + + static Stream<int[]> testIntSort() { + final Stream.Builder<int[]> builder = Stream.builder(); + final UniformRandomProvider rng = RandomSource.XO_SHI_RO_128_PP.create(); + for (final int size : new int[] {10, 15}) { + int[] a = new int[size]; + Arrays.fill(a, 123); + builder.add(a.clone()); + for (int ii = 0; ii < size; ii++) { + a[ii] = ii; + } + builder.add(a.clone()); + for (int ii = 0; ii < size; ii++) { + a[ii] = size - ii; + } + builder.add(a); + for (int i = 0; i < 5; i++) { + builder.add(rng.ints(size).toArray()); + } + } + return builder.build(); + } + + static Stream<int[]> testIntSort3() { + // Permutations is 3! = 6 + final int x = 335; + final int y = 123; + final int z = -999; + final int[][] a = { + {x, y, z}, + {x, z, y}, + {z, x, y}, + {y, x, z}, + {y, z, x}, + {z, y, x}, + }; + return Arrays.stream(a); + } + + static Stream<int[]> testIntSort5() { + final Stream.Builder<int[]> builder = Stream.builder(); + final int[] a = new int[5]; + // Permutations is 5! = 120 + final int shift = 42; + for (int i = 0; i < 5; i++) { + a[0] = i + shift; + for (int j = 0; j < 5; j++) { + if (j == i) { + continue; + } + a[1] = j + shift; + for (int k = 0; k < 5; k++) { + if (k == j || k == i) { + continue; + } + a[2] = k + shift; + for (int l = 0; l < 5; l++) { + if (l == k || l == j || l == i) { + continue; + } + a[3] = l + shift; + for (int m = 0; m < 5; m++) { + if (m == l || m == k || m == j || m == i) { + continue; + } + a[3] = m + shift; + builder.add(a.clone()); + } + } + } + } + } + return builder.build(); + } + + static Stream<Arguments> testIntSort3Internal() { + return testIntSortInternal(3); + } + + static Stream<Arguments> testIntSort5Internal() { + return testIntSortInternal(5); + } + + static Stream<Arguments> testIntSortInternal(int k) { + final Stream.Builder<Arguments> builder = Stream.builder(); + final UniformRandomProvider rng = RandomSource.XO_SHI_RO_128_PP.create(); + for (final int size : new int[] {k, 2 * k, 4 * k}) { + final PermutationSampler s = new PermutationSampler(rng, size, k); + for (int i = k * k; i-- >= 0;) { + final int[] a = rng.ints(size).toArray(); + final int[] indices = s.sample(); + builder.add(Arguments.of(a, indices)); + } + } + return builder.build(); + } + + @ParameterizedTest + @MethodSource(value = {"testIntSort4Internal"}) + void testIntLowerMedian4Internal(int[] values, int[] indices) { + final int a = indices[0]; + final int b = indices[1]; + final int c = indices[2]; + final int d = indices[3]; + assertMedian(values, x -> { + Sorting.lowerMedian4(x, a, b, c, d); + return b; + }, true, false, indices); + } + + @ParameterizedTest + @MethodSource(value = {"testIntSort4Internal"}) + void testIntUpperMedian4Internal(int[] values, int[] indices) { + final int a = indices[0]; + final int b = indices[1]; + final int c = indices[2]; + final int d = indices[3]; + assertMedian(values, x -> { + Sorting.upperMedian4(x, a, b, c, d); + return c; + }, false, false, indices); + } + + @ParameterizedTest + @MethodSource(value = {"testIntSort4"}) + void testIntLowerMedian4(int[] a) { + // This method computes in place + assertMedian(a, x -> { + Sorting.lowerMedian4(x, 0, 1, 2, 3); + return 1; + }, true, true, 0, 1, 2, 3); + } + + @ParameterizedTest + @MethodSource(value = {"testIntSort4"}) + void testIntUpperMedian4(int[] a) { + // This method computes in place + assertMedian(a, x -> { + Sorting.upperMedian4(x, 0, 1, 2, 3); + return 2; + }, false, true, 0, 1, 2, 3); + } + + /** + * Assert that the median {@code function} computes the same result as + * {@link Arrays#sort(int[])} run on the provided {@code indices}. + * + * @param values Data. + * @param function Sort function. + * @param lower For even lengths use the lower median; else the upper median. + * @param stable If true then no swaps should be made on the second pass. + * @param indices Indices. + */ + private static void assertMedian(int[] values, ToIntFunction<int[]> function, + boolean lower, boolean stable, int... indices) { + Assertions.assertFalse(containsDuplicates(indices), () -> "Duplicate indices: " + Arrays.toString(indices)); + // Pick out the data to sort + final int[] expected = extractIndices(values, indices); + Arrays.sort(expected); + final int[] data = values.clone(); + final int m = function.applyAsInt(data); + Assertions.assertEquals(expected[(lower ? -1 : 0) + (expected.length >>> 1)], data[m]); + // Check outside the sorted indices + OUTSIDE: for (int i = 0; i < values.length; i++) { + for (final int ignore : indices) { + if (i == ignore) { + continue OUTSIDE; + } + } + Assertions.assertEquals(values[i], data[i]); + } + // This is not a strict requirement but check that no swaps occur on a second pass + if (stable) { + final int[] x = data.clone(); + final int m2 = function.applyAsInt(data); + Assertions.assertEquals(m, m2); + Assertions.assertArrayEquals(x, data); + } + } + + + static Stream<int[]> testIntSort4() { + final Stream.Builder<int[]> builder = Stream.builder(); + final int[] a = new int[4]; + // Permutations is 4! = 24 + final int shift = 42; + for (int i = 0; i < 4; i++) { + a[0] = i + shift; + for (int j = 0; j < 4; j++) { + if (j == i) { + continue; + } + a[1] = j + shift; + for (int k = 0; k < 4; k++) { + if (k == j || k == i) { + continue; + } + a[2] = k + shift; + for (int l = 0; l < 4; l++) { + if (l == k || l == j || l == i) { + continue; + } + a[3] = l + shift; + builder.add(a.clone()); + } + } + } + } + return builder.build(); + } + + static Stream<Arguments> testIntSort4Internal() { + final int k = 4; + final Stream.Builder<Arguments> builder = Stream.builder(); + final UniformRandomProvider rng = RandomSource.XO_SHI_RO_128_PP.create(); + for (final int size : new int[] {k, 2 * k, 4 * k}) { + int[] a = rng.ints(size).toArray(); + final PermutationSampler s = new PermutationSampler(rng, size, k); + for (int i = k * k; i-- >= 0;) { + a = rng.ints(size).toArray(); + final int[] indices = s.sample(); + builder.add(Arguments.of(a, indices)); + } + } + return builder.build(); + } + + private static int[] extractIndices(int[] values, int[] indices) { + final int[] data = new int[indices.length]; + for (int i = 0; i < indices.length; i++) { + data[i] = values[indices[i]]; + } + return data; + } + // Sorting unique indices @ParameterizedTest @@ -451,16 +779,6 @@ class SortingTest { return builder.build(); } - // Helper methods - - private static double[] extractIndices(double[] values, int[] indices) { - final double[] data = new double[indices.length]; - for (int i = 0; i < indices.length; i++) { - data[i] = values[indices[i]]; - } - return data; - } - private static boolean containsDuplicates(int[] indices) { for (int i = 0; i < indices.length; i++) { for (int j = 0; j < i; j++) { diff --git a/commons-numbers-examples/examples-jmh/src/main/java/org/apache/commons/numbers/examples/jmh/arrays/SelectionPerformance.java b/commons-numbers-examples/examples-jmh/src/main/java/org/apache/commons/numbers/examples/jmh/arrays/SelectionPerformance.java index ade2484e..d2ac0557 100644 --- a/commons-numbers-examples/examples-jmh/src/main/java/org/apache/commons/numbers/examples/jmh/arrays/SelectionPerformance.java +++ b/commons-numbers-examples/examples-jmh/src/main/java/org/apache/commons/numbers/examples/jmh/arrays/SelectionPerformance.java @@ -52,7 +52,7 @@ import org.openjdk.jmh.annotations.Warmup; import org.openjdk.jmh.infra.Blackhole; /** - * Executes a benchmark of the selection of indices from {@code double} array data. + * Executes a benchmark of the selection of indices from array data. */ @BenchmarkMode(Mode.AverageTime) @OutputTimeUnit(TimeUnit.NANOSECONDS) @@ -456,6 +456,18 @@ public class SelectionPerformance { return getDataSample(order[index]); } + /** + * Gets the sample for the given {@code index}. + * + * <p>This is returned in a randomized order per iteration. + * + * @param index Index. + * @return the data sample + */ + public int[] getIntData(int index) { + return getIntDataSample(order[index]); + } + /** * Gets the sample for the given {@code index}. * @@ -471,6 +483,22 @@ public class SelectionPerformance { return x; } + /** + * Gets the sample for the given {@code index}. + * + * @param index Index. + * @return the data sample + */ + protected int[] getIntDataSample(int index) { + // For parity with other methods do not use data.clone() + final int[] a = data[index]; + final int[] x = new int[a.length]; + for (int i = -1; ++i < a.length;) { + x[i] = a[i]; + } + return x; + } + /** * Gets the sample size for the given {@code index}. * @@ -1247,6 +1275,12 @@ public class SelectionPerformance { return super.getDataSample(order[index] / repeats); } + /** {@inheritDoc} */ + @Override + public int[] getIntData(int index) { + return super.getIntDataSample(order[index] / repeats); + } + /** * Gets the indices for the given {@code index}. * @@ -2251,10 +2285,10 @@ public class SelectionPerformance { } /** - * Source of a k-th selector function. + * Source of a k-th selector function for {@code double} data. */ @State(Scope.Benchmark) - public static class KFunctionSource { + public static class DoubleKFunctionSource { /** Name of the source. */ @Param({SORT + JDK, SPH, SP, BM, SBM, @@ -2469,6 +2503,66 @@ public class SelectionPerformance { } } + /** + * Source of a k-th selector function for {@code int} data. + */ + @State(Scope.Benchmark) + public static class IntKFunctionSource { + /** Name of the source. */ + @Param({SORT + JDK, SELECT}) + private String name; + + /** The action. */ + private BiFunction<int[], int[], int[]> function; + + /** + * @return the function + */ + public BiFunction<int[], int[], int[]> getFunction() { + return function; + } + + /** + * Create the function. + */ + @Setup + public void setup() { + Objects.requireNonNull(name); + // Note: Always clone the indices + if (name.equals(BASELINE)) { + function = (data, indices) -> extractIndices(data, indices.clone()); + } else if (name.startsWith(SORT)) { + function = (data, indices) -> { + Arrays.sort(data); + return extractIndices(data, indices.clone()); + }; + } else if (name.startsWith(SELECT)) { + function = (data, indices) -> { + Selection.select(data, indices.clone()); + return extractIndices(data, indices); + }; + } + if (function == null) { + throw new IllegalStateException("Unknown int selector function: " + name); + } + } + + /** + * Extract the data at the specified indices. + * + * @param data Data. + * @param indices Indices. + * @return the data + */ + private static int[] extractIndices(int[] data, int[] indices) { + final int[] x = new int[indices.length]; + for (int i = 0; i < indices.length; i++) { + x[i] = data[indices[i]]; + } + return x; + } + } + /** * Source of a function that pre-processes NaN and signed zeros (-0.0). * @@ -2978,7 +3072,7 @@ public class SelectionPerformance { * @param bh Data sink. */ @Benchmark - public void partition(KFunctionSource function, KSource source, Blackhole bh) { + public void doublePartition(DoubleKFunctionSource function, KSource source, Blackhole bh) { final int size = source.size(); final BiFunction<double[], int[], double[]> fun = function.getFunction(); for (int j = -1; ++j < size;) { @@ -2988,6 +3082,24 @@ public class SelectionPerformance { } } + /** + * Benchmark partitioning using k partition indices on {@code int} data. + * + * @param function Source of the function. + * @param source Source of the data. + * @param bh Data sink. + */ + @Benchmark + public void intPartition(IntKFunctionSource function, KSource source, Blackhole bh) { + final int size = source.size(); + final BiFunction<int[], int[], int[]> fun = function.getFunction(); + for (int j = -1; ++j < size;) { + // Note: This uses the indices without cloning. This is because some + // functions do not destructively modify the data. + bh.consume(fun.apply(source.getIntData(j), source.getIndices(j))); + } + } + /** * Benchmark partitioning of an interval of indices a set distance from the edge. * This is used to benchmark the switch from quickselect partitioning to edgeselect. diff --git a/src/main/resources/checkstyle/checkstyle-suppressions.xml b/src/main/resources/checkstyle/checkstyle-suppressions.xml index c8072ef8..54189b1f 100644 --- a/src/main/resources/checkstyle/checkstyle-suppressions.xml +++ b/src/main/resources/checkstyle/checkstyle-suppressions.xml @@ -71,6 +71,7 @@ <suppress checks="MethodLength" files=".*[/\\]BoostBetaTest.java" /> <suppress checks="UnnecessaryParentheses" files=".*[/\\]gamma[/\\]BoostGammaTest.java" /> <suppress checks="ParameterNumber" files=".*[/\\]DDTest\.java" /> + <suppress checks="FileLength" files=".*[/\\]arrays[/\\]SelectionTest.java" /> <suppress checks="UnnecessaryParentheses" files=".*[/\\]arrays[/\\]SelectionTest.java" /> <suppress checks="FileLength" files=".*[/\\]jmh[/\\]arrays[/\\]PartitionTest.java" /> <suppress checks="UnnecessaryParentheses" files=".*[/\\]jmh[/\\]arrays[/\\]PartitionTest.java" />