This is an automated email from the ASF dual-hosted git repository.

garydgregory pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-lang.git


The following commit(s) were added to refs/heads/master by this push:
     new 4c6abb128 Fold supplementary code points in 
CharSequenceUtils.regionMatches (#1725).
4c6abb128 is described below

commit 4c6abb128d952e7369fae8a18656bed9338d293c
Author: Gary Gregory <[email protected]>
AuthorDate: Wed Jun 24 11:55:06 2026 +0000

    Fold supplementary code points in CharSequenceUtils.regionMatches
    (#1725).
    
    - Sort members
    - Reduce vertical whitespace
---
 src/changes/changes.xml                            |  1 +
 .../apache/commons/lang3/CharSequenceUtils.java    | 52 +++++++++++-----------
 .../commons/lang3/CharSequenceUtilsTest.java       | 22 ++++-----
 3 files changed, 37 insertions(+), 38 deletions(-)

diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index ec2b36e2b..f1f89bb53 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -206,6 +206,7 @@ java.lang.NullPointerException: Cannot invoke
     <action                   type="fix" dev="ggregory" due-to="alhudz, Gary 
Gregory">Classify supplementary code points in StringUtils is* predicates 
(#1724).</action>
     <action                   type="fix" dev="ggregory" due-to="alhudz, Gary 
Gregory">Emit surrogate pair for supplementary code points in UnicodeEscaper 
(#1726).</action>
     <action                   type="fix" dev="ggregory" due-to="alhudz, Gary 
Gregory">Fix MethodUtils.getMatchingMethod false ambiguity on boxed arguments 
(#1727).</action>
+    <action                   type="fix" dev="ggregory" due-to="alhudz, Gary 
Gregory">Fold supplementary code points in CharSequenceUtils.regionMatches 
(#1725).</action>
     <!-- ADD -->
     <action                   type="add" dev="ggregory" due-to="Gary 
Gregory">Add JavaVersion.JAVA_27.</action>
     <action                   type="add" dev="ggregory" due-to="Gary 
Gregory">Add SystemUtils.IS_JAVA_27.</action>
diff --git a/src/main/java/org/apache/commons/lang3/CharSequenceUtils.java 
b/src/main/java/org/apache/commons/lang3/CharSequenceUtils.java
index 6009ba1d3..4a544eb78 100644
--- a/src/main/java/org/apache/commons/lang3/CharSequenceUtils.java
+++ b/src/main/java/org/apache/commons/lang3/CharSequenceUtils.java
@@ -28,15 +28,14 @@ public class CharSequenceUtils {
     private static final int NOT_FOUND = -1;
 
     /**
-     * Whether the running JDK folds a supplementary code point split across a 
surrogate pair when comparing case
-     * insensitively in {@link String#regionMatches(boolean, int, String, int, 
int)}. JDKs up to and including Java 11
-     * compare surrogate by surrogate and never match such a pair; later JDKs 
fold the whole code point. Probing what
-     * {@link String} actually does (rather than gating on a version constant) 
keeps every {@link CharSequence} type in
-     * step with {@link String} on whatever JDK is running. DESERET CAPITAL 
LETTER LONG I (U+10400) folds to its small
-     * form (U+10428).
+     * Whether the running JDK folds a supplementary code point split across a 
surrogate pair when comparing case insensitively in
+     * {@link String#regionMatches(boolean, int, String, int, int)}. JDKs up 
to and including Java 11 compare surrogate by surrogate and never match such a
+     * pair; later JDKs fold the whole code point. Probing what {@link String} 
actually does (rather than gating on a version constant) keeps every
+     * {@link CharSequence} type in step with {@link String} on whatever JDK 
is running. DESERET CAPITAL LETTER LONG I (U+10400) folds to its small form
+     * (U+10428).
      */
-    private static final boolean STRING_FOLDS_SUPPLEMENTARY_CASE =
-            new String(Character.toChars(0x10400)).regionMatches(true, 0, new 
String(Character.toChars(0x10428)), 0, 2);
+    private static final boolean STRING_FOLDS_SUPPLEMENTARY_CASE = new 
String(Character.toChars(0x10400)).regionMatches(true, 0,
+            new String(Character.toChars(0x10428)), 0, 2);
 
     static final int TO_STRING_LIMIT = 16;
 
@@ -49,6 +48,19 @@ private static boolean checkLaterThan1(final CharSequence 
cs, final CharSequence
         return true;
     }
 
+    /**
+     * Tests whether two code points are equal ignoring case, matching the 
folding used by {@link String#regionMatches(boolean, int, String, int, int)}.
+     *
+     * @param cp1 the first code point.
+     * @param cp2 the second code point.
+     * @return whether the code points are equal ignoring case.
+     */
+    private static boolean equalsIgnoreCase(final int cp1, final int cp2) {
+        final int u1 = Character.toUpperCase(cp1);
+        final int u2 = Character.toUpperCase(cp2);
+        return u1 == u2 || Character.toLowerCase(u1) == 
Character.toLowerCase(u2);
+    }
+
     /**
      * Used by the indexOf(CharSequence methods) as a green implementation of 
indexOf.
      *
@@ -289,12 +301,12 @@ static int lastIndexOf(final CharSequence cs, final int 
searchChar, int start) {
     /**
      * Tests if two string regions are equal.
      *
-     * @param cs the {@link CharSequence} to be processed.
+     * @param cs         the {@link CharSequence} to be processed.
      * @param ignoreCase whether or not to be case-insensitive.
-     * @param thisStart the index to start on the {@code cs} CharSequence.
-     * @param substring the {@link CharSequence} to be looked for.
-     * @param start the index to start on the {@code substring} CharSequence.
-     * @param length character length of the region.
+     * @param thisStart  the index to start on the {@code cs} CharSequence.
+     * @param substring  the {@link CharSequence} to be looked for.
+     * @param start      the index to start on the {@code substring} 
CharSequence.
+     * @param length     character length of the region.
      * @return whether the region matched.
      * @see String#regionMatches(boolean, int, String, int, int)
      */
@@ -365,20 +377,6 @@ static boolean regionMatches(final CharSequence cs, final 
boolean ignoreCase, fi
         return true;
     }
 
-    /**
-     * Tests whether two code points are equal ignoring case, matching the 
folding used by
-     * {@link String#regionMatches(boolean, int, String, int, int)}.
-     *
-     * @param cp1 the first code point.
-     * @param cp2 the second code point.
-     * @return whether the code points are equal ignoring case.
-     */
-    private static boolean equalsIgnoreCase(final int cp1, final int cp2) {
-        final int u1 = Character.toUpperCase(cp1);
-        final int u2 = Character.toUpperCase(cp2);
-        return u1 == u2 || Character.toLowerCase(u1) == 
Character.toLowerCase(u2);
-    }
-
     /**
      * Returns a new {@link CharSequence} that is a subsequence of this
      * sequence starting with the {@code char} value at the specified index.
diff --git a/src/test/java/org/apache/commons/lang3/CharSequenceUtilsTest.java 
b/src/test/java/org/apache/commons/lang3/CharSequenceUtilsTest.java
index 48bbf6b60..b45892533 100644
--- a/src/test/java/org/apache/commons/lang3/CharSequenceUtilsTest.java
+++ b/src/test/java/org/apache/commons/lang3/CharSequenceUtilsTest.java
@@ -165,6 +165,17 @@ public String toString() {
             // @formatter:on
     };
 
+    private static void assertRegionMatchesParity(final String source, final 
boolean ignoreCase, final int toffset, final String other,
+            final int ooffset, final int len) {
+        // String is the reference: whatever the running JDK does for String, 
every CharSequence type must match.
+        final boolean expected = source.regionMatches(ignoreCase, toffset, 
other, ooffset, len);
+        final CharSequence[] sources = {source, new StringBuilder(source), new 
StringBuffer(source), CharBuffer.wrap(source)};
+        for (final CharSequence cs : sources) {
+            assertEquals(expected, CharSequenceUtils.regionMatches(cs, 
ignoreCase, toffset, other, ooffset, len),
+                    cs.getClass().getSimpleName() + " differs from String for 
" + source + " vs " + other);
+        }
+    }
+
     static Stream<Arguments> lastIndexWithStandardCharSequence() {
         // @formatter:off
         return Stream.of(
@@ -277,17 +288,6 @@ boolean invoke() {
         }
     }
 
-    private static void assertRegionMatchesParity(final String source, final 
boolean ignoreCase, final int toffset, final String other,
-            final int ooffset, final int len) {
-        // String is the reference: whatever the running JDK does for String, 
every CharSequence type must match.
-        final boolean expected = source.regionMatches(ignoreCase, toffset, 
other, ooffset, len);
-        final CharSequence[] sources = {source, new StringBuilder(source), new 
StringBuffer(source), CharBuffer.wrap(source)};
-        for (final CharSequence cs : sources) {
-            assertEquals(expected, CharSequenceUtils.regionMatches(cs, 
ignoreCase, toffset, other, ooffset, len),
-                    cs.getClass().getSimpleName() + " differs from String for 
" + source + " vs " + other);
-        }
-    }
-
     /**
      * A supplementary code point split across a surrogate pair must fold the 
same way for every {@link CharSequence}
      * type that it does for {@link String} on the running JDK. {@link 
String#regionMatches(boolean, int, String, int, int)}

Reply via email to