houserjohn commented on PR #13914: URL: https://github.com/apache/lucene/pull/13914#issuecomment-2667735504
Here are the promised modified randomized unit tests. These should work with your API change, but you might need to modify them to suit the caveat you mentioned. Of course, add the correct imports: ``` public void testComputeDynamicNumericRangesWithSameWeightsShuffled() { List<DynamicRangeUtil.DynamicRangeInfo> expectedRangeInfoList = new ArrayList<>(); long[] values = new long[100]; long[] weights = new long[100]; for (int i = 0; i < 100; i++) { values[i] = i; weights[i] = 50; } // Shuffling the values and weights should not change the answer between runs // We expect that returned ranges should come in a strict, deterministic order // with the same values and weights shuffleValuesWeights(values, weights); expectedRangeInfoList.add(new DynamicRangeUtil.DynamicRangeInfo(25, 1250L, 0L, 24L, 12.0D)); expectedRangeInfoList.add(new DynamicRangeUtil.DynamicRangeInfo(25, 1250L, 25L, 49L, 37.0D)); expectedRangeInfoList.add(new DynamicRangeUtil.DynamicRangeInfo(25, 1250L, 50L, 74L, 62.0D)); expectedRangeInfoList.add(new DynamicRangeUtil.DynamicRangeInfo(25, 1250L, 75L, 99L, 87.0D)); assertDynamicNumericRangeResults(values, weights, 4, 4950L, 5000L, expectedRangeInfoList); } public void testComputeDynamicNumericRangesWithSameValuesShuffled() { List<DynamicRangeUtil.DynamicRangeInfo> expectedRangeInfoList = new ArrayList<>(); long totalWeight = 0; long[] values = new long[100]; long[] weights = new long[100]; for (int i = 0; i < 100; i++) { values[i] = 50; weights[i] = i; totalWeight += i; } // Shuffling the values and weights should not change the answer between runs // We expect that returned ranges should come in a strict, deterministic order // with the same values and weights shuffleValuesWeights(values, weights); expectedRangeInfoList.add(new DynamicRangeUtil.DynamicRangeInfo(51, 1275L, 50L, 50L, 50D)); expectedRangeInfoList.add(new DynamicRangeUtil.DynamicRangeInfo(21, 1281L, 50L, 50L, 50D)); expectedRangeInfoList.add(new DynamicRangeUtil.DynamicRangeInfo(16, 1272L, 50L, 50L, 50D)); expectedRangeInfoList.add(new DynamicRangeUtil.DynamicRangeInfo(12, 1122L, 50L, 50L, 50D)); assertDynamicNumericRangeResults(values, weights, 4, 5000L, totalWeight, expectedRangeInfoList); } public void testComputeDynamicNumericRangesWithRandomValues() { int arraySize = random().nextInt(100); long[] values = new long[arraySize]; long[] weights = new long[arraySize]; for (int i = 0; i < arraySize; i++) { values[i] = random().nextLong(1000); weights[i] = random().nextLong(1000); } int topN = random().nextInt(100); long totalWeight = 0; long totalValue = 0; for (int i = 0; i < arraySize; i++) { totalWeight += weights[i]; totalValue += values[i]; } assertDynamicNumericRangeValidProperties(values, weights, topN, totalValue, totalWeight); } private static void assertDynamicNumericRangeValidProperties( long[] values, long[] weights, int topN, long totalValue, long totalWeight) { List<WeightedPair> sortedPairs = new ArrayList<>(); for (int i = 0; i < values.length; i++) { long value = values[i]; long weight = weights[i]; WeightedPair pair = new WeightedPair(value, weight); sortedPairs.add(pair); } sortedPairs.sort( Comparator.comparingLong(WeightedPair::value).thenComparingLong(WeightedPair::weight)); int len = values.length; double rangeWeightTarget = (double) totalWeight / Math.min(topN, len); List<DynamicRangeUtil.DynamicRangeInfo> mockDynamicRangeResult = DynamicRangeUtil.computeDynamicNumericRanges( values, weights, values.length, totalValue, totalWeight, topN); // Zero requested ranges (TopN) should return a empty list of ranges regardless of inputs if (topN == 0) { assertTrue(mockDynamicRangeResult.size() == 0); return; // Early return; do not check anything else } // Adjacent ranges do not overlap - only adjacent max-min can overlap for (int i = 0; i < mockDynamicRangeResult.size() - 1; i++) { DynamicRangeUtil.DynamicRangeInfo rangeInfo = mockDynamicRangeResult.get(i); DynamicRangeUtil.DynamicRangeInfo nextRangeInfo = mockDynamicRangeResult.get(i + 1); assertTrue(rangeInfo.max() <= nextRangeInfo.min()); } // The count of every range sums to the number of values int accuCount = 0; for (int i = 0; i < mockDynamicRangeResult.size(); i++) { DynamicRangeUtil.DynamicRangeInfo rangeInfo = mockDynamicRangeResult.get(i); int count = rangeInfo.count(); accuCount += count; } assertTrue(accuCount == len); // The sum of every range weight equals the total weight long accuWeight = 0; for (int i = 0; i < mockDynamicRangeResult.size(); i++) { DynamicRangeUtil.DynamicRangeInfo rangeInfo = mockDynamicRangeResult.get(i); long weight = rangeInfo.weight(); accuWeight += weight; } assertTrue(accuWeight == totalWeight); // All values appear in atleast one range for (int pairOffset = 0, rangeIdx = 0; rangeIdx < mockDynamicRangeResult.size(); rangeIdx++) { DynamicRangeUtil.DynamicRangeInfo rangeInfo = mockDynamicRangeResult.get(rangeIdx); int count = rangeInfo.count(); for (int i = pairOffset; i < pairOffset + count; i++) { WeightedPair pair = sortedPairs.get(i); long value = pair.value(); assertTrue(rangeInfo.min() <= value && value <= rangeInfo.max()); } pairOffset += count; } // The minimum/maximum of each range is actually the smallest/largest value for (int pairOffset = 0, rangeIdx = 0; rangeIdx < mockDynamicRangeResult.size(); rangeIdx++) { DynamicRangeUtil.DynamicRangeInfo rangeInfo = mockDynamicRangeResult.get(rangeIdx); int count = rangeInfo.count(); WeightedPair minPair = sortedPairs.get(pairOffset); WeightedPair maxPair = sortedPairs.get(pairOffset + count - 1); long min = minPair.value(); long max = maxPair.value(); assertTrue(rangeInfo.min() == min); assertTrue(rangeInfo.max() == max); pairOffset += count; } // Weights of each range is over the rangeWeightTarget - exclude last range for (int i = 0; i < mockDynamicRangeResult.size() - 1; i++) { DynamicRangeUtil.DynamicRangeInfo rangeInfo = mockDynamicRangeResult.get(i); assertTrue(rangeInfo.weight() >= rangeWeightTarget); } // Removing the last weight from a range brings it under the rangeWeightTarget - exclude last // range for (int pairOffset = 0, rangeIdx = 0; rangeIdx < mockDynamicRangeResult.size() - 1; rangeIdx++) { DynamicRangeUtil.DynamicRangeInfo rangeInfo = mockDynamicRangeResult.get(rangeIdx); int count = rangeInfo.count(); WeightedPair lastPair = sortedPairs.get(pairOffset + count - 1); long lastWeight = lastPair.weight(); pairOffset += count; assertTrue(rangeInfo.weight() - lastWeight < rangeWeightTarget); } // Centroids for each range are correct for (int pairOffset = 0, rangeIdx = 0; rangeIdx < mockDynamicRangeResult.size(); rangeIdx++) { DynamicRangeUtil.DynamicRangeInfo rangeInfo = mockDynamicRangeResult.get(rangeIdx); int count = rangeInfo.count(); long accuValue = 0; for (int i = pairOffset; i < pairOffset + count; i++) { WeightedPair pair = sortedPairs.get(i); long value = pair.value(); accuValue += value; } pairOffset += count; assertTrue(rangeInfo.centroid() == ((double) accuValue / count)); } } /** Implementation of Durstenfeld's algorithm for shuffling values and weights */ private static void shuffleValuesWeights(long[] values, long[] weights) { for (int i = values.length - 1; i > 0; i--) { int rdmIdx = random().nextInt(i + 1); long tmpValue = values[i]; long tmpWeight = weights[i]; values[i] = values[rdmIdx]; weights[i] = weights[rdmIdx]; values[rdmIdx] = tmpValue; weights[rdmIdx] = tmpWeight; } } /** * Holds parameters of a weighted pair. * * @param value the value of the pair * @param weight the weight of the pair */ private record WeightedPair(long value, long weight) {} ``` Additionally, here is a command that you can run from the command line to search for bugs: First, `./gradlew clean` and `./gradlew build` ``` for i in {1..100}; do; echo $i; ./gradlew check; if [ $? -gt 0 ]; then; cat $path_to_test_output >> "../errors.txt"; fi; done; ``` `$path_to_test_output` should be a path ending with `OUTPUT-org.apache.lucene.facet.range.TestDynamicRangeUtil.txt`. This should be the same location that you go whenever you want to view the output from unit tests (which appears when a unit test fails after a build fails). After the command finishes, all of the found bugs should be in `../error.txt`. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org