This is an automated email from the ASF dual-hosted git repository. ggregory pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/commons-text.git
The following commit(s) were added to refs/heads/master by this push: new aa7e49d Check for null input, use a ternary expression, refactor a constant, Javadoc. aa7e49d is described below commit aa7e49dda44d442ef9dede756c42eb48a79c0ad1 Author: Gary Gregory <gardgreg...@gmail.com> AuthorDate: Sun Dec 20 13:58:50 2020 -0500 Check for null input, use a ternary expression, refactor a constant, Javadoc. Checkstyle: names, imports. --- .../text/translate/NumericEntityUnescaper.java | 30 +++---- .../apache/commons/text/AlphabetConverterTest.java | 95 ++++++++++++---------- .../apache/commons/text/StringEscapeUtilsTest.java | 1 - .../similarity/IntersectionSimilarityTest.java | 1 - .../text/translate/NumericEntityUnescaperTest.java | 4 +- 5 files changed, 69 insertions(+), 62 deletions(-) diff --git a/src/main/java/org/apache/commons/text/translate/NumericEntityUnescaper.java b/src/main/java/org/apache/commons/text/translate/NumericEntityUnescaper.java index f9050e7..b1abde5 100644 --- a/src/main/java/org/apache/commons/text/translate/NumericEntityUnescaper.java +++ b/src/main/java/org/apache/commons/text/translate/NumericEntityUnescaper.java @@ -22,43 +22,49 @@ import java.util.Arrays; import java.util.Collections; import java.util.EnumSet; +import org.apache.commons.lang3.ArrayUtils; + /** - * Translate XML numeric entities of the form &#[xX]?\d+;? to + * Translates XML numeric entities of the form &#[xX]?\d+;? to * the specific codepoint. * - * Note that the semi-colon is optional. + * Note that the semicolon is optional. * * @since 1.0 */ public class NumericEntityUnescaper extends CharSequenceTranslator { + /** Default options. */ + private static final EnumSet<OPTION> DEFAULT_OPTIONS = EnumSet + .copyOf(Collections.singletonList(OPTION.semiColonRequired)); + /** Enumerates NumericEntityUnescaper options for unescaping. */ public enum OPTION { /** - * Require a semicolon. + * Requires a semicolon. */ semiColonRequired, /** - * Do not require a semicolon. + * Does not require a semicolon. */ semiColonOptional, /** - * Throw an exception if a semi-colon is missing. + * Throws an exception if a semicolon is missing. */ errorIfNoSemiColon } - /** EnumSet of OPTIONS, given from the constructor. */ + /** EnumSet of OPTIONS, given from the constructor, read-only. */ private final EnumSet<OPTION> options; /** - * Create a UnicodeUnescaper. + * Creates a UnicodeUnescaper. * * The constructor takes a list of options, only one type of which is currently - * available (whether to allow, error or ignore the semi-colon on the end of a + * available (whether to allow, error or ignore the semicolon on the end of a * numeric entity to being missing). * * For example, to support numeric entities without a ';': @@ -71,15 +77,11 @@ public class NumericEntityUnescaper extends CharSequenceTranslator { * @param options to apply to this unescaper */ public NumericEntityUnescaper(final OPTION... options) { - if (options.length > 0) { - this.options = EnumSet.copyOf(Arrays.asList(options)); - } else { - this.options = EnumSet.copyOf(Collections.singletonList(OPTION.semiColonRequired)); - } + this.options = ArrayUtils.isEmpty(options) ? DEFAULT_OPTIONS : EnumSet.copyOf(Arrays.asList(options)); } /** - * Whether the passed in option is currently set. + * Tests whether the passed in option is currently set. * * @param option to check state of * @return whether the option is set diff --git a/src/test/java/org/apache/commons/text/AlphabetConverterTest.java b/src/test/java/org/apache/commons/text/AlphabetConverterTest.java index 664b3cf..fb10493 100644 --- a/src/test/java/org/apache/commons/text/AlphabetConverterTest.java +++ b/src/test/java/org/apache/commons/text/AlphabetConverterTest.java @@ -26,6 +26,7 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import org.apache.commons.lang3.ArrayUtils; import org.junit.jupiter.api.Test; /** @@ -33,33 +34,39 @@ import org.junit.jupiter.api.Test; */ public class AlphabetConverterTest { - private static final Character[] lowerCaseEnglish = {' ', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', - 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'}; - private static final Character[] englishAndNumbers = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', - 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', - 'y', 'z', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', - 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', ' '}; - private static final Character[] lowerCaseEnglishAndNumbers = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', - 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', - 'w', 'x', 'y', 'z', ' '}; - private static final Character[] numbers = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9'}; - private static final Character[] binary = {'0', '1'}; - private static final Character[] hebrew = {'_', ' ', '\u05e7', '\u05e8', '\u05d0', '\u05d8', '\u05d5', '\u05df', '\u05dd', - '\u05e4', '\u05e9', '\u05d3', '\u05d2', '\u05db', '\u05e2', '\u05d9', '\u05d7', '\u05dc', '\u05da', - '\u05e3', '\u05d6', '\u05e1', '\u05d1', '\u05d4', '\u05e0', '\u05de', '\u05e6', '\u05ea', '\u05e5'}; - private static final Character[] empty = {}; - - private static final Integer[] unicode = {32, 35395, 35397, 36302, 36291, 35203, 35201, 35215, 35219, 35268, 97, 98, 99, - 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 1001, 1002, 1003, 1004, 1005}; - private static final Integer[] lowerCaseEnglishCodepoints = {32, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, - 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122}; - private static final Integer[] doNotEncodeCodepoints = {32, 97, 98, 99}; // space, a, b, c + private static final Character[] LOWER_CASE_ENGLISH = {' ', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', + 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'}; + + private static final Character[] ENGLISH_AND_NUMBERS = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', + 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', + 'y', 'z', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', + 'U', 'V', 'W', 'X', 'Y', 'Z', ' '}; + + private static final Character[] LOWER_CASE_ENGLISH_AND_NUMBERS = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', + 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', + 'w', 'x', 'y', 'z', ' '}; + + private static final Character[] NUMBERS = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9'}; + + private static final Character[] BINARY = {'0', '1'}; + + private static final Character[] HEBREW = {'_', ' ', '\u05e7', '\u05e8', '\u05d0', '\u05d8', '\u05d5', '\u05df', + '\u05dd', '\u05e4', '\u05e9', '\u05d3', '\u05d2', '\u05db', '\u05e2', '\u05d9', '\u05d7', '\u05dc', '\u05da', + '\u05e3', '\u05d6', '\u05e1', '\u05d1', '\u05d4', '\u05e0', '\u05de', '\u05e6', '\u05ea', '\u05e5'}; + + private static final Integer[] UNICODE = {32, 35395, 35397, 36302, 36291, 35203, 35201, 35215, 35219, 35268, 97, 98, + 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 1001, 1002, 1003, 1004, 1005}; + + private static final Integer[] LOWER_CASE_ENGLISH_CODEPOINTS = {32, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, + 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122}; + + private static final Integer[] DO_NOT_ENCODE_CODEPOINTS = {32, 97, 98, 99}; // space, a, b, c @Test public void binaryTest() throws UnsupportedEncodingException { - test(binary, numbers, empty, "0", "1", "10", "11"); - test(numbers, binary, empty, "12345", "0"); - test(lowerCaseEnglish, binary, empty, "abc", "a"); + test(BINARY, NUMBERS, ArrayUtils.EMPTY_CHARACTER_OBJECT_ARRAY, "0", "1", "10", "11"); + test(NUMBERS, BINARY, ArrayUtils.EMPTY_CHARACTER_OBJECT_ARRAY, "12345", "0"); + test(LOWER_CASE_ENGLISH, BINARY, ArrayUtils.EMPTY_CHARACTER_OBJECT_ARRAY, "abc", "a"); } private AlphabetConverter createJavadocExample() { @@ -72,29 +79,29 @@ public class AlphabetConverterTest { @Test public void doNotEncodeTest() throws UnsupportedEncodingException { - test(englishAndNumbers, lowerCaseEnglishAndNumbers, lowerCaseEnglish, "1", "456", "abc", "ABC", + test(ENGLISH_AND_NUMBERS, LOWER_CASE_ENGLISH_AND_NUMBERS, LOWER_CASE_ENGLISH, "1", "456", "abc", "ABC", "this will not be converted but THIS WILL"); - test(englishAndNumbers, lowerCaseEnglishAndNumbers, numbers, "1", "456", "abc", "ABC", + test(ENGLISH_AND_NUMBERS, LOWER_CASE_ENGLISH_AND_NUMBERS, NUMBERS, "1", "456", "abc", "ABC", "this will be converted but 12345 and this will be"); } @Test public void encodeFailureTest() { assertThatThrownBy(() -> { - test(binary, numbers, empty, "3"); + test(BINARY, NUMBERS, ArrayUtils.EMPTY_CHARACTER_OBJECT_ARRAY, "3"); }).isInstanceOf(UnsupportedEncodingException.class).hasMessage("Couldn't find encoding for '3' in 3"); } @Test public void hebrewTest() throws UnsupportedEncodingException { - test(hebrew, binary, empty, "\u05d0", "\u05e2", - "\u05d0\u05dc\u05e3_\u05d0\u05d5\u05d4\u05d1\u05dc_\u05d1\u05d9\u05ea_\u05d6\u05d4_\u05d1\u05d9\u05ea_" + test(HEBREW, BINARY, ArrayUtils.EMPTY_CHARACTER_OBJECT_ARRAY, "\u05d0", "\u05e2", + "\u05d0\u05dc\u05e3_\u05d0\u05d5\u05d4\u05d1\u05dc_\u05d1\u05d9\u05ea_\u05d6\u05d4_\u05d1\u05d9\u05ea_" + "\u05d2\u05d9\u05de\u05dc_\u05d6\u05d4_\u05db\u05de\u05dc_\u05d2\u05d3\u05d5\u05dc"); - test(hebrew, numbers, empty, "\u05d0", "\u05e2", - "\u05d0\u05dc\u05e3_\u05d0\u05d5\u05d4\u05d1\u05dc_\u05d1\u05d9\u05ea_\u05d6\u05d4_\u05d1\u05d9\u05ea_" + test(HEBREW, NUMBERS, ArrayUtils.EMPTY_CHARACTER_OBJECT_ARRAY, "\u05d0", "\u05e2", + "\u05d0\u05dc\u05e3_\u05d0\u05d5\u05d4\u05d1\u05dc_\u05d1\u05d9\u05ea_\u05d6\u05d4_\u05d1\u05d9\u05ea_" + "\u05d2\u05d9\u05de\u05dc_\u05d6\u05d4_\u05db\u05de\u05dc_\u05d2\u05d3\u05d5\u05dc"); - test(numbers, hebrew, empty, "123456789", "1", "5"); - test(lowerCaseEnglish, hebrew, empty, "this is a test"); + test(NUMBERS, HEBREW, ArrayUtils.EMPTY_CHARACTER_OBJECT_ARRAY, "123456789", "1", "5"); + test(LOWER_CASE_ENGLISH, HEBREW, ArrayUtils.EMPTY_CHARACTER_OBJECT_ARRAY, "this is a test"); } /* @@ -114,7 +121,7 @@ public class AlphabetConverterTest { @Test public void missingDoNotEncodeLettersFromEncodingTest() { assertThatThrownBy(() -> { - AlphabetConverter.createConverterFromChars(englishAndNumbers, lowerCaseEnglish, numbers); + AlphabetConverter.createConverterFromChars(ENGLISH_AND_NUMBERS, LOWER_CASE_ENGLISH, NUMBERS); }).isInstanceOf(IllegalArgumentException.class).hasMessage( "Can not use 'do not encode' list because encoding alphabet does not contain '0'"); } @@ -122,7 +129,7 @@ public class AlphabetConverterTest { @Test public void missingDoNotEncodeLettersFromOriginalTest() { assertThatThrownBy(() -> { - AlphabetConverter.createConverterFromChars(lowerCaseEnglish, englishAndNumbers, numbers); + AlphabetConverter.createConverterFromChars(LOWER_CASE_ENGLISH, ENGLISH_AND_NUMBERS, NUMBERS); }).isInstanceOf(IllegalArgumentException.class).hasMessage( "Can not use 'do not encode' list because original alphabet does not contain '0'"); } @@ -130,20 +137,20 @@ public class AlphabetConverterTest { @Test public void noEncodingLettersTest() { assertThatThrownBy(() -> { - AlphabetConverter.createConverterFromChars(englishAndNumbers, numbers, numbers); + AlphabetConverter.createConverterFromChars(ENGLISH_AND_NUMBERS, NUMBERS, NUMBERS); }).isInstanceOf(IllegalArgumentException.class).hasMessage( - "Must have at least two encoding characters (excluding those in the 'do not encode' list), but has 0"); + "Must have at least two encoding characters (excluding those in the 'do not encode' list), but has 0"); } @Test public void onlyOneEncodingLettersTest() { assertThatThrownBy(() -> { - final Character[] numbersPlusUnderscore = Arrays.copyOf(numbers, numbers.length + 1); + final Character[] numbersPlusUnderscore = Arrays.copyOf(NUMBERS, NUMBERS.length + 1); numbersPlusUnderscore[numbersPlusUnderscore.length - 1] = '_'; - AlphabetConverter.createConverterFromChars(englishAndNumbers, numbersPlusUnderscore, numbers); + AlphabetConverter.createConverterFromChars(ENGLISH_AND_NUMBERS, numbersPlusUnderscore, NUMBERS); }).isInstanceOf(IllegalArgumentException.class).hasMessage( - "Must have at least two encoding characters (excluding those in the 'do not encode' list), but has 1"); + "Must have at least two encoding characters (excluding those in the 'do not encode' list), but has 1"); } private void test(final Character[] originalChars, final Character[] encodingChars, @@ -258,7 +265,7 @@ public class AlphabetConverterTest { public void testEqualsWithNull() { final Character[] characterArray = new Character[0]; final AlphabetConverter alphabetConverter = AlphabetConverter.createConverterFromChars(characterArray, null, - null); + null); assertThat(alphabetConverter.equals(null)).isFalse(); } @@ -295,13 +302,13 @@ public class AlphabetConverterTest { "Unexpected string without decoding (XX) in " + toDecode); } - /* + /** * Test constructor from code points */ @Test public void unicodeTest() throws UnsupportedEncodingException { - final AlphabetConverter ac = AlphabetConverter.createConverter(unicode, lowerCaseEnglishCodepoints, - doNotEncodeCodepoints); + final AlphabetConverter ac = AlphabetConverter.createConverter(UNICODE, LOWER_CASE_ENGLISH_CODEPOINTS, + DO_NOT_ENCODE_CODEPOINTS); assertThat(ac.getEncodedCharLength()).isEqualTo(2); diff --git a/src/test/java/org/apache/commons/text/StringEscapeUtilsTest.java b/src/test/java/org/apache/commons/text/StringEscapeUtilsTest.java index 3ea582a..6e68957 100644 --- a/src/test/java/org/apache/commons/text/StringEscapeUtilsTest.java +++ b/src/test/java/org/apache/commons/text/StringEscapeUtilsTest.java @@ -30,7 +30,6 @@ import java.io.IOException; import java.io.StringWriter; import java.lang.reflect.Constructor; import java.lang.reflect.Modifier; -import java.nio.charset.Charset; import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Paths; diff --git a/src/test/java/org/apache/commons/text/similarity/IntersectionSimilarityTest.java b/src/test/java/org/apache/commons/text/similarity/IntersectionSimilarityTest.java index 3e26e74..5906da6 100644 --- a/src/test/java/org/apache/commons/text/similarity/IntersectionSimilarityTest.java +++ b/src/test/java/org/apache/commons/text/similarity/IntersectionSimilarityTest.java @@ -20,7 +20,6 @@ import static org.assertj.core.api.Assertions.assertThatIllegalArgumentException import static org.junit.jupiter.api.Assertions.assertEquals; import java.util.ArrayList; -import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.HashMap; diff --git a/src/test/java/org/apache/commons/text/translate/NumericEntityUnescaperTest.java b/src/test/java/org/apache/commons/text/translate/NumericEntityUnescaperTest.java index a8bd26d..4770b05 100644 --- a/src/test/java/org/apache/commons/text/translate/NumericEntityUnescaperTest.java +++ b/src/test/java/org/apache/commons/text/translate/NumericEntityUnescaperTest.java @@ -55,7 +55,7 @@ public class NumericEntityUnescaperTest { String expected = "Test \u0030 not test"; String result = neu.translate(input); - assertThat(result).as("Failed to support unfinished entities (i.e. missing semi-colon)").isEqualTo(expected); + assertThat(result).as("Failed to support unfinished entities (i.e. missing semicolon)").isEqualTo(expected); // ignore it neu = new NumericEntityUnescaper(); @@ -63,7 +63,7 @@ public class NumericEntityUnescaperTest { expected = input; result = neu.translate(input); - assertThat(result).as("Failed to ignore unfinished entities (i.e. missing semi-colon)").isEqualTo(expected); + assertThat(result).as("Failed to ignore unfinished entities (i.e. missing semicolon)").isEqualTo(expected); // fail it neu = new NumericEntityUnescaper(NumericEntityUnescaper.OPTION.errorIfNoSemiColon);