This is an automated email from the ASF dual-hosted git repository. ggregory pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/commons-text.git
The following commit(s) were added to refs/heads/master by this push: new 8c61f12d [TEXT-175] Fix regression for determining whitespace in WordUtils (#519) 8c61f12d is described below commit 8c61f12d44f5ec744a11164d1d8a1cca58e190d3 Author: seanfabs <165280862+seanf...@users.noreply.github.com> AuthorDate: Fri Mar 29 12:22:51 2024 +0000 [TEXT-175] Fix regression for determining whitespace in WordUtils (#519) * Fix regression for determining whitespace * Declutter --------- Co-authored-by: sean.fabri <sean.fa...@crunch.co.uk> Co-authored-by: Gary Gregory <garydgreg...@users.noreply.github.com> --- .../java/org/apache/commons/text/WordUtils.java | 41 +++++++++++----------- .../org/apache/commons/text/WordUtilsTest.java | 4 +++ 2 files changed, 25 insertions(+), 20 deletions(-) diff --git a/src/main/java/org/apache/commons/text/WordUtils.java b/src/main/java/org/apache/commons/text/WordUtils.java index 306c68af..ac550b0d 100644 --- a/src/main/java/org/apache/commons/text/WordUtils.java +++ b/src/main/java/org/apache/commons/text/WordUtils.java @@ -18,6 +18,7 @@ package org.apache.commons.text; import java.util.HashSet; import java.util.Set; +import java.util.function.Predicate; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -170,7 +171,7 @@ public class WordUtils { if (StringUtils.isEmpty(str)) { return str; } - final Set<Integer> delimiterSet = generateDelimiterSet(delimiters); + final Predicate<Integer> isDelimiter = generateIsDelimiterFunction(delimiters); final int strLen = str.length(); final int[] newCodePoints = new int[strLen]; int outOffset = 0; @@ -179,7 +180,7 @@ public class WordUtils { for (int index = 0; index < strLen;) { final int codePoint = str.codePointAt(index); - if (delimiterSet.contains(codePoint)) { + if (isDelimiter.test(codePoint)) { capitalizeNext = true; newCodePoints[outOffset++] = codePoint; index += Character.charCount(codePoint); @@ -290,26 +291,26 @@ public class WordUtils { } /** - * Converts an array of delimiters to a hash set of code points. Code point of space(32) is added as the default - * value if delimiters is null. The generated hash set provides O(1) lookup time. + * Given the array of delimiters supplied; returns a function determining whether a character code point is a delimiter. + * The function provides O(1) lookup time. + * Whitespace is defined by {@link Character#isWhitespace(char)} and is used as the defaultvalue if delimiters is null. * - * @param delimiters set of characters to determine capitalization, null means whitespace - * @return Set<Integer> + * @param delimiters set of characters to determine delimiters, null means whitespace + * @return Predicate<Integer> taking a code point value as an argument and returning true if a delimiter. */ - private static Set<Integer> generateDelimiterSet(final char[] delimiters) { - final Set<Integer> delimiterHashSet = new HashSet<>(); + private static Predicate<Integer> generateIsDelimiterFunction(final char[] delimiters) { + final Predicate<Integer> isDelimiter; if (delimiters == null || delimiters.length == 0) { - if (delimiters == null) { - delimiterHashSet.add(Character.codePointAt(new char[] {' '}, 0)); + isDelimiter = delimiters == null ? Character::isWhitespace : c -> false; + } else { + Set<Integer> delimiterSet = new HashSet<>(); + for (int index = 0; index < delimiters.length; index++) { + delimiterSet.add(Character.codePointAt(delimiters, index)); } - - return delimiterHashSet; + isDelimiter = delimiterSet::contains; } - for (int index = 0; index < delimiters.length; index++) { - delimiterHashSet.add(Character.codePointAt(delimiters, index)); - } - return delimiterHashSet; + return isDelimiter; } /** @@ -368,7 +369,7 @@ public class WordUtils { if (delimiters != null && delimiters.length == 0) { return StringUtils.EMPTY; } - final Set<Integer> delimiterSet = generateDelimiterSet(delimiters); + final Predicate<Integer> isDelimiter = generateIsDelimiterFunction(delimiters); final int strLen = str.length(); final int[] newCodePoints = new int[strLen / 2 + 1]; int count = 0; @@ -376,7 +377,7 @@ public class WordUtils { for (int i = 0; i < strLen;) { final int codePoint = str.codePointAt(i); - if (delimiterSet.contains(codePoint) || delimiters == null && Character.isWhitespace(codePoint)) { + if (isDelimiter.test(codePoint)) { lastWasGap = true; } else if (lastWasGap) { newCodePoints[count++] = codePoint; @@ -534,7 +535,7 @@ public class WordUtils { if (StringUtils.isEmpty(str)) { return str; } - final Set<Integer> delimiterSet = generateDelimiterSet(delimiters); + final Predicate<Integer> isDelimiter = generateIsDelimiterFunction(delimiters); final int strLen = str.length(); final int[] newCodePoints = new int[strLen]; int outOffset = 0; @@ -543,7 +544,7 @@ public class WordUtils { for (int index = 0; index < strLen;) { final int codePoint = str.codePointAt(index); - if (delimiterSet.contains(codePoint)) { + if (isDelimiter.test(codePoint)) { uncapitalizeNext = true; newCodePoints[outOffset++] = codePoint; index += Character.charCount(codePoint); diff --git a/src/test/java/org/apache/commons/text/WordUtilsTest.java b/src/test/java/org/apache/commons/text/WordUtilsTest.java index 2a6f9d68..2a397d2e 100644 --- a/src/test/java/org/apache/commons/text/WordUtilsTest.java +++ b/src/test/java/org/apache/commons/text/WordUtilsTest.java @@ -109,6 +109,8 @@ public class WordUtilsTest { assertThat(WordUtils.capitalizeFully("i am HERE 123")).isEqualTo("I Am Here 123"); assertThat(WordUtils.capitalizeFully("I AM HERE 123")).isEqualTo("I Am Here 123"); assertThat(WordUtils.capitalizeFully("alphabet")).isEqualTo("Alphabet"); // single word + assertThat(WordUtils.capitalizeFully("a\tb\nc d")).isEqualTo("A\tB\nC D"); + assertThat(WordUtils.capitalizeFully("and \tbut \ncleat dome")).isEqualTo("And \tBut \nCleat Dome"); } @Test @@ -368,6 +370,8 @@ public class WordUtilsTest { assertThat(WordUtils.uncapitalize("I Am Here 123")).isEqualTo("i am here 123"); assertThat(WordUtils.uncapitalize("i am HERE 123")).isEqualTo("i am hERE 123"); assertThat(WordUtils.uncapitalize("I AM HERE 123")).isEqualTo("i aM hERE 123"); + assertThat(WordUtils.uncapitalize("A\tB\nC D")).isEqualTo("a\tb\nc d"); + assertThat(WordUtils.uncapitalize("And \tBut \nCLEAT Dome")).isEqualTo("and \tbut \ncLEAT dome"); } @Test