This is an automated email from the ASF dual-hosted git repository. aherbert pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/commons-rng.git
commit 23f4f7c235de32c7bee9c7b113df60677a488488 Author: aherbert <aherb...@apache.org> AuthorDate: Tue Oct 8 13:16:20 2019 +0100 Added systematic failures report to stress test application. --- .../examples/stress/AlphaNumericComparator.java | 224 +++++++++++++++++++++ .../rng/examples/stress/ResultsCommand.java | 178 +++++++++++++--- 2 files changed, 375 insertions(+), 27 deletions(-) diff --git a/commons-rng-examples/examples-stress/src/main/java/org/apache/commons/rng/examples/stress/AlphaNumericComparator.java b/commons-rng-examples/examples-stress/src/main/java/org/apache/commons/rng/examples/stress/AlphaNumericComparator.java new file mode 100644 index 0000000..c920753 --- /dev/null +++ b/commons-rng-examples/examples-stress/src/main/java/org/apache/commons/rng/examples/stress/AlphaNumericComparator.java @@ -0,0 +1,224 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.rng.examples.stress; + +import java.io.Serializable; +import java.util.Comparator; + +/** + * Provides number sensitive sorting for character sequences. + * + * <p>Extracts sub-sequences of either numeric ({@code [0, 9]}) or non-numeric characters + * and compares them numerically or lexicographically. Leading zeros are ignored from + * numbers. Negative numbers are not supported. + * + * <pre> + * Traditional AlphaNumeric + * z0200.html z2.html + * z100.html z100.html + * z2.html z0200.html + * </pre> + * + * <p>This is based on ideas in the Alphanum algorithm by David Koelle.</p> + * + * <p>This implementation supports:</p> + * + * <ul> + * <li>{@link CharSequence} comparison + * <li>Direct use of input sequences for minimal memory consumption + * <li>Numbers with leading zeros + * </ul> + * + * <p>Any null sequences are ordered before non-null sequences.</p> + * + * <p>Note: The comparator is thread-safe so can be used in a parallel sort. + * + * @see <a href="http://www.DaveKoelle.com">Alphanum Algorithm</a> + */ +class AlphaNumericComparator implements Comparator<CharSequence>, Serializable { + /** + * An instance. + */ + public static final AlphaNumericComparator INSTANCE = new AlphaNumericComparator(); + + /** + * The serial version ID. + * Note: Comparators are recommended to be Serializable to allow serialization of + * collections constructed with a Comparator. + */ + private static final long serialVersionUID = 1L; + + @Override + public int compare(CharSequence seq1, CharSequence seq2) { + if (seq1 == seq2) { + return 0; + } + // Null is less + if (seq1 == null) { + return -1; + } + if (seq2 == null) { + return 1; + } + + int pos1 = 0; + int pos2 = 0; + final int length1 = seq1.length(); + final int length2 = seq2.length(); + + while (pos1 < length1 && pos2 < length2) { + final int end1 = nextSubSequenceEnd(seq1, pos1, length1); + final int end2 = nextSubSequenceEnd(seq2, pos2, length2); + + // If both sub-sequences contain numeric characters, sort them numerically + int result = 0; + if (isDigit(seq1.charAt(pos1)) && isDigit(seq2.charAt(pos2))) { + result = compareNumerically(seq1, pos1, end1, seq2, pos2, end2); + } else { + result = compareLexicographically(seq1, pos1, end1, seq2, pos2, end2); + } + + if (result != 0) { + return result; + } + + pos1 = end1; + pos2 = end2; + } + + return length1 - length2; + } + + /** + * Get the end position of the next sub-sequence of either digits or non-digit + * characters starting from the start position. + * + * <p>The end position is exclusive such that the sub-sequence is the interval + * {@code [start, end)}. + * + * @param seq the character sequence + * @param start the start position + * @param length the sequence length + * @return the sub-sequence end position (exclusive) + */ + private static int nextSubSequenceEnd(CharSequence seq, int start, int length) { + int pos = start; + // Set the sub-sequence type (digits or non-digits) + final boolean seqType = isDigit(seq.charAt(pos++)); + while (pos < length && seqType == isDigit(seq.charAt(pos))) { + // Extend the sub-sequence + pos++; + } + return pos; + } + + /** + * Checks if the character is a digit. + * + * @param ch the character + * @return true if a digit + */ + private static boolean isDigit(char ch) { + return ch >= 48 && ch <= 57; + } + + /** + * Compares two sub-sequences numerically. Ignores leading zeros. Assumes all + * characters are digits. + * + * @param seq1 the first sequence + * @param start1 the start of the first sub-sequence + * @param end1 the end of the first sub-sequence + * @param seq2 the second sequence + * @param start2 the start of the second sub-sequence + * @param end2 the end of the second sub-sequence sequence + * @return the value {@code 0} if the sub-sequences are equal; a value less than + * {@code 0} if sub-sequence 1 is numerically less than sub-sequence 2; and a value + * greater than {@code 0} if sub-sequence 1 is numerically greater than sub-sequence + * 2. + */ + private static int compareNumerically(CharSequence seq1, int start1, int end1, + CharSequence seq2, int start2, int end2) { + // Ignore leading zeros in numbers + int pos1 = advancePastLeadingZeros(seq1, start1, end1); + int pos2 = advancePastLeadingZeros(seq2, start2, end2); + + // Simple comparison by length + final int result = (end1 - pos1) - (end2 - pos2); + // If equal, the first different number counts. + if (result == 0) { + while (pos1 < end1) { + final char c1 = seq1.charAt(pos1++); + final char c2 = seq2.charAt(pos2++); + if (c1 != c2) { + return c1 - c2; + } + } + } + return result; + } + + /** + * Advances past leading zeros in the sub-sequence. Returns the index of the start + * character of the number. + * + * @param seq the sequence + * @param start the start of the sub-sequence + * @param end the end of the sub-sequence + * @return the start index of the number + */ + private static int advancePastLeadingZeros(CharSequence seq, int start, int end) { + int pos = start; + // Ignore zeros only when there are further characters + while (pos < end - 1 && seq.charAt(pos) == '0') { + pos++; + } + return pos; + } + + /** + * Compares two sub-sequences lexicographically. This matches the compare function in + * {@link String} using extracted sub-sequences. + * + * @param seq1 the first sequence + * @param start1 the start of the first sub-sequence + * @param end1 the end of the first sub-sequence + * @param seq2 the second sequence + * @param start2 the start of the second sub-sequence + * @param end2 the end of the second sub-sequence sequence + * @return the value {@code 0} if the sub-sequences are equal; a value less than + * {@code 0} if sub-sequence 1 is lexicographically less than sub-sequence 2; and a + * value greater than {@code 0} if sub-sequence 1 is lexicographically greater than + * sub-sequence 2. + * @see String#compareTo(String) + */ + private static int compareLexicographically(CharSequence seq1, int start1, int end1, + CharSequence seq2, int start2, int end2) { + final int len1 = end1 - start1; + final int len2 = end2 - start2; + final int limit = Math.min(len1, len2); + + for (int i = 0; i < limit; i++) { + final char c1 = seq1.charAt(i + start1); + final char c2 = seq2.charAt(i + start2); + if (c1 != c2) { + return c1 - c2; + } + } + return len1 - len2; + } +} diff --git a/commons-rng-examples/examples-stress/src/main/java/org/apache/commons/rng/examples/stress/ResultsCommand.java b/commons-rng-examples/examples-stress/src/main/java/org/apache/commons/rng/examples/stress/ResultsCommand.java index 2f20252..5f3eb9e 100644 --- a/commons-rng-examples/examples-stress/src/main/java/org/apache/commons/rng/examples/stress/ResultsCommand.java +++ b/commons-rng-examples/examples-stress/src/main/java/org/apache/commons/rng/examples/stress/ResultsCommand.java @@ -36,9 +36,11 @@ import java.util.ArrayList; import java.util.Collections; import java.util.EnumSet; import java.util.Formatter; +import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; +import java.util.Map.Entry; import java.util.concurrent.Callable; import java.util.function.Function; import java.util.regex.Matcher; @@ -84,6 +86,8 @@ class ResultsCommand implements Callable<Void> { private static final char BACK_SLASH = '\\'; /** Character '|'. */ private static final char PIPE = '|'; + /** The column name for the RNG. */ + private static final String COLUMN_RNG = "RNG"; /** The standard options. */ @Mixin @@ -108,7 +112,9 @@ class ResultsCommand implements Callable<Void> { /** The flag to show failed tests. */ @Option(names = {"--failed"}, - description = "Output failed tests (not all formats).") + description = {"Output failed tests (support varies by format).", + "CSV: List individual test failures.", + "APT: Count of systematic test failures."}) private boolean showFailedTests; /** The flag to include the Dieharder sums test. */ @@ -133,6 +139,8 @@ class ResultsCommand implements Callable<Void> { APT, /** Text table output. */ TXT, + /** Systematic failures output. */ + FAILURES, } /** @@ -310,6 +318,9 @@ class ResultsCommand implements Callable<Void> { case TXT: writeTXT(out, results); break; + case FAILURES: + writeFailures(out, results); + break; default: throw new ApplicationException("Unknown output format: " + outputFormat); } @@ -709,9 +720,14 @@ class ResultsCommand implements Callable<Void> { } for (final String testName : testNames) { final List<TestResult> testResults = getTestResults(results, randomSource, reversed, testName); - writeAPTColumn(output, testResults.stream() - .map(toAPTString) - .collect(Collectors.joining(", "))); + String text = testResults.stream() + .map(toAPTString) + .collect(Collectors.joining(", ")); + // Add systematic failures in brackets + if (showFailedTests) { + text += " (" + getSystematicFailures(testResults).size() + ")"; + writeAPTColumn(output, text); + } } output.newLine(); output.write(separator); @@ -861,16 +877,16 @@ class ResultsCommand implements Callable<Void> { } /** - * Write the column name to the output. + * Write the column text to the output. * * @param output Output. - * @param name Name. + * @param text Text. * @throws IOException Signals that an I/O exception has occurred. */ private static void writeAPTColumn(BufferedWriter output, - String name) throws IOException { + String text) throws IOException { output.write(' '); - output.write(name); + output.write(text); output.write(" |"); } @@ -899,6 +915,31 @@ class ResultsCommand implements Callable<Void> { } /** + * Gets the systematic failures (tests that fail in every test result). + * + * @param results Results. + * @return the systematic failures + */ + private static List<String> getSystematicFailures(List<TestResult> results) { + final HashMap<String, Integer> map = new HashMap<>(); + for (final TestResult result : results) { + // Some named tests can fail more than once on different statistics. + // For example TestU01 BigCrush LongestHeadRun can output in the summary: + // 86 LongestHeadRun, r = 0 eps + // 86 LongestHeadRun, r = 0 1 - eps1 + // This will be counted as 2 failed tests. For the purpose of systematic + // failures the name of the test is the same and should be counted once. + final HashSet<String> unique = new HashSet<>(result.getFailedTests()); + for (String test : unique) { + map.merge(test, 1, (i, j) -> i + j); + } + } + return map.entrySet().stream().filter(e -> e.getValue() == results.size()) + .map(Entry::getKey) + .collect(Collectors.toList()); + } + + /** * Write the results as a text table. * * @param out Output stream. @@ -917,7 +958,7 @@ class ResultsCommand implements Callable<Void> { // Make bit-reversed column optional if no generators are bit reversed. final boolean showBitReversedColumn = bitReversed.contains(Boolean.TRUE); - final List<List<String>> columns = createColumns(testNames, showBitReversedColumn); + final List<List<String>> columns = createTXTColumns(testNames, showBitReversedColumn); // Add all data // This will collate results for each combination of 'RandomSource + bitReversed' @@ -934,43 +975,31 @@ class ResultsCommand implements Callable<Void> { columns.get(i++).add(testResults.stream() .map(TestResult::getFailureCountString) .collect(Collectors.joining(","))); + columns.get(i++).add(String.valueOf(getSystematicFailures(testResults).size())); } } } - // Create format using the column widths - final String format = createTextFormatFromColumnWidths(columns); - - // Output - try (BufferedWriter output = new BufferedWriter(new OutputStreamWriter(out, StandardCharsets.UTF_8)); - Formatter formatter = new Formatter(output)) { - final int rows = columns.get(0).size(); - final Object[] args = new Object[columns.size()]; - for (int row = 0; row < rows; row++) { - for (int i = 0; i < args.length; i++) { - args[i] = columns.get(i).get(row); - } - formatter.format(format, args); - } - } + writeColumns(out, columns); } /** - * Creates the columns. + * Creates the columns for the text output. * * @param testNames the test names * @param showBitReversedColumn Set to true to show the bit reversed column * @return the list of columns */ - private static List<List<String>> createColumns(final List<String> testNames, + private static List<List<String>> createTXTColumns(final List<String> testNames, final boolean showBitReversedColumn) { final ArrayList<List<String>> columns = new ArrayList<>(); - columns.add(createColumn("RNG")); + columns.add(createColumn(COLUMN_RNG)); if (showBitReversedColumn) { columns.add(createColumn(BIT_REVERSED)); } for (final String testName : testNames) { columns.add(createColumn(testName)); + columns.add(createColumn("∩")); } return columns; } @@ -1021,4 +1050,99 @@ class ResultsCommand implements Callable<Void> { } return width; } + + /** + * Write the columns as fixed width text to the output. + * + * @param out Output stream. + * @param columns Columns + * @throws IOException Signals that an I/O exception has occurred. + */ + private static void writeColumns(OutputStream out, + List<List<String>> columns) throws IOException { + // Create format using the column widths + final String format = createTextFormatFromColumnWidths(columns); + + // Output + try (BufferedWriter output = new BufferedWriter(new OutputStreamWriter(out, StandardCharsets.UTF_8)); + Formatter formatter = new Formatter(output)) { + final int rows = columns.get(0).size(); + final Object[] args = new Object[columns.size()]; + for (int row = 0; row < rows; row++) { + for (int i = 0; i < args.length; i++) { + args[i] = columns.get(i).get(row); + } + formatter.format(format, args); + } + } + } + + /** + * Write the systematic failures as a text table. + * + * @param out Output stream. + * @param results Results. + * @throws IOException Signals that an I/O exception has occurred. + */ + private static void writeFailures(OutputStream out, + List<TestResult> results) throws IOException { + // Identify all: + // RandomSources, bit-reversed, test names, + final List<RandomSource> randomSources = getRandomSources(results); + final List<Boolean> bitReversed = getBitReversed(results); + final List<String> testNames = getTestNames(results); + + // Create columns for RandomSource, bit-reversed, each test name. + // Make bit-reversed column optional if no generators are bit reversed. + final boolean showBitReversedColumn = bitReversed.contains(Boolean.TRUE); + + final List<List<String>> columns = createFailuresColumns(testNames, showBitReversedColumn); + + final AlphaNumericComparator cmp = new AlphaNumericComparator(); + + // Add all data for each combination of 'RandomSource + bitReversed' + for (final RandomSource randomSource : randomSources) { + for (final boolean reversed : bitReversed) { + for (final String testName : testNames) { + final List<TestResult> testResults = getTestResults(results, randomSource, + reversed, testName); + final List<String> failures = getSystematicFailures(testResults); + if (failures.isEmpty()) { + continue; + } + Collections.sort(failures, cmp); + for (String failed : failures) { + int i = 0; + columns.get(i++).add(randomSource.toString()); + if (showBitReversedColumn) { + columns.get(i++).add(Boolean.toString(reversed)); + } + columns.get(i++).add(testName); + columns.get(i).add(failed); + } + } + } + } + + writeColumns(out, columns); + } + + /** + * Creates the columns for the failures output. + * + * @param testNames the test names + * @param showBitReversedColumn Set to true to show the bit reversed column + * @return the list of columns + */ + private static List<List<String>> createFailuresColumns(final List<String> testNames, + final boolean showBitReversedColumn) { + final ArrayList<List<String>> columns = new ArrayList<>(); + columns.add(createColumn(COLUMN_RNG)); + if (showBitReversedColumn) { + columns.add(createColumn(BIT_REVERSED)); + } + columns.add(createColumn("Test Suite")); + columns.add(createColumn("Test")); + return columns; + } }