This is an automated email from the ASF dual-hosted git repository.

ggregory pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-text.git


The following commit(s) were added to refs/heads/master by this push:
     new 8c61f12d [TEXT-175] Fix regression for determining whitespace in 
WordUtils (#519)
8c61f12d is described below

commit 8c61f12d44f5ec744a11164d1d8a1cca58e190d3
Author: seanfabs <165280862+seanf...@users.noreply.github.com>
AuthorDate: Fri Mar 29 12:22:51 2024 +0000

    [TEXT-175] Fix regression for determining whitespace in WordUtils (#519)
    
    * Fix regression for determining whitespace
    
    * Declutter
    
    ---------
    
    Co-authored-by: sean.fabri <sean.fa...@crunch.co.uk>
    Co-authored-by: Gary Gregory <garydgreg...@users.noreply.github.com>
---
 .../java/org/apache/commons/text/WordUtils.java    | 41 +++++++++++-----------
 .../org/apache/commons/text/WordUtilsTest.java     |  4 +++
 2 files changed, 25 insertions(+), 20 deletions(-)

diff --git a/src/main/java/org/apache/commons/text/WordUtils.java 
b/src/main/java/org/apache/commons/text/WordUtils.java
index 306c68af..ac550b0d 100644
--- a/src/main/java/org/apache/commons/text/WordUtils.java
+++ b/src/main/java/org/apache/commons/text/WordUtils.java
@@ -18,6 +18,7 @@ package org.apache.commons.text;
 
 import java.util.HashSet;
 import java.util.Set;
+import java.util.function.Predicate;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
@@ -170,7 +171,7 @@ public class WordUtils {
         if (StringUtils.isEmpty(str)) {
             return str;
         }
-        final Set<Integer> delimiterSet = generateDelimiterSet(delimiters);
+        final Predicate<Integer> isDelimiter = 
generateIsDelimiterFunction(delimiters);
         final int strLen = str.length();
         final int[] newCodePoints = new int[strLen];
         int outOffset = 0;
@@ -179,7 +180,7 @@ public class WordUtils {
         for (int index = 0; index < strLen;) {
             final int codePoint = str.codePointAt(index);
 
-            if (delimiterSet.contains(codePoint)) {
+            if (isDelimiter.test(codePoint)) {
                 capitalizeNext = true;
                 newCodePoints[outOffset++] = codePoint;
                 index += Character.charCount(codePoint);
@@ -290,26 +291,26 @@ public class WordUtils {
     }
 
     /**
-     * Converts an array of delimiters to a hash set of code points. Code 
point of space(32) is added as the default
-     * value if delimiters is null. The generated hash set provides O(1) 
lookup time.
+     * Given the array of delimiters supplied; returns a function determining 
whether a character code point is a delimiter.
+     * The function provides O(1) lookup time.
+     * Whitespace is defined by {@link Character#isWhitespace(char)} and is 
used as the defaultvalue if delimiters is null.
      *
-     * @param delimiters set of characters to determine capitalization, null 
means whitespace
-     * @return Set<Integer>
+     * @param delimiters set of characters to determine delimiters, null means 
whitespace
+     * @return Predicate<Integer> taking a code point value as an argument and 
returning true if a delimiter.
      */
-    private static Set<Integer> generateDelimiterSet(final char[] delimiters) {
-        final Set<Integer> delimiterHashSet = new HashSet<>();
+    private static Predicate<Integer> generateIsDelimiterFunction(final char[] 
delimiters) {
+        final Predicate<Integer> isDelimiter;
         if (delimiters == null || delimiters.length == 0) {
-            if (delimiters == null) {
-                delimiterHashSet.add(Character.codePointAt(new char[] {' '}, 
0));
+            isDelimiter = delimiters == null ? Character::isWhitespace : c -> 
false;
+        } else {
+            Set<Integer> delimiterSet = new HashSet<>();
+            for (int index = 0; index < delimiters.length; index++) {
+                delimiterSet.add(Character.codePointAt(delimiters, index));
             }
-
-            return delimiterHashSet;
+            isDelimiter = delimiterSet::contains;
         }
 
-        for (int index = 0; index < delimiters.length; index++) {
-            delimiterHashSet.add(Character.codePointAt(delimiters, index));
-        }
-        return delimiterHashSet;
+        return isDelimiter;
     }
 
     /**
@@ -368,7 +369,7 @@ public class WordUtils {
         if (delimiters != null && delimiters.length == 0) {
             return StringUtils.EMPTY;
         }
-        final Set<Integer> delimiterSet = generateDelimiterSet(delimiters);
+        final Predicate<Integer> isDelimiter = 
generateIsDelimiterFunction(delimiters);
         final int strLen = str.length();
         final int[] newCodePoints = new int[strLen / 2 + 1];
         int count = 0;
@@ -376,7 +377,7 @@ public class WordUtils {
         for (int i = 0; i < strLen;) {
             final int codePoint = str.codePointAt(i);
 
-            if (delimiterSet.contains(codePoint) || delimiters == null && 
Character.isWhitespace(codePoint)) {
+            if (isDelimiter.test(codePoint)) {
                 lastWasGap = true;
             } else if (lastWasGap) {
                 newCodePoints[count++] = codePoint;
@@ -534,7 +535,7 @@ public class WordUtils {
         if (StringUtils.isEmpty(str)) {
             return str;
         }
-        final Set<Integer> delimiterSet = generateDelimiterSet(delimiters);
+        final Predicate<Integer> isDelimiter = 
generateIsDelimiterFunction(delimiters);
         final int strLen = str.length();
         final int[] newCodePoints = new int[strLen];
         int outOffset = 0;
@@ -543,7 +544,7 @@ public class WordUtils {
         for (int index = 0; index < strLen;) {
             final int codePoint = str.codePointAt(index);
 
-            if (delimiterSet.contains(codePoint)) {
+            if (isDelimiter.test(codePoint)) {
                 uncapitalizeNext = true;
                 newCodePoints[outOffset++] = codePoint;
                 index += Character.charCount(codePoint);
diff --git a/src/test/java/org/apache/commons/text/WordUtilsTest.java 
b/src/test/java/org/apache/commons/text/WordUtilsTest.java
index 2a6f9d68..2a397d2e 100644
--- a/src/test/java/org/apache/commons/text/WordUtilsTest.java
+++ b/src/test/java/org/apache/commons/text/WordUtilsTest.java
@@ -109,6 +109,8 @@ public class WordUtilsTest {
         assertThat(WordUtils.capitalizeFully("i am HERE 123")).isEqualTo("I Am 
Here 123");
         assertThat(WordUtils.capitalizeFully("I AM HERE 123")).isEqualTo("I Am 
Here 123");
         
assertThat(WordUtils.capitalizeFully("alphabet")).isEqualTo("Alphabet"); // 
single word
+        assertThat(WordUtils.capitalizeFully("a\tb\nc d")).isEqualTo("A\tB\nC 
D");
+        assertThat(WordUtils.capitalizeFully("and \tbut \ncleat  
dome")).isEqualTo("And \tBut \nCleat  Dome");
     }
 
     @Test
@@ -368,6 +370,8 @@ public class WordUtilsTest {
         assertThat(WordUtils.uncapitalize("I Am Here 123")).isEqualTo("i am 
here 123");
         assertThat(WordUtils.uncapitalize("i am HERE 123")).isEqualTo("i am 
hERE 123");
         assertThat(WordUtils.uncapitalize("I AM HERE 123")).isEqualTo("i aM 
hERE 123");
+        assertThat(WordUtils.uncapitalize("A\tB\nC D")).isEqualTo("a\tb\nc d");
+        assertThat(WordUtils.uncapitalize("And \tBut \nCLEAT  
Dome")).isEqualTo("and \tbut \ncLEAT  dome");
     }
 
     @Test

Reply via email to