This is an automated email from the ASF dual-hosted git repository.

garydgregory pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-lang.git


The following commit(s) were added to refs/heads/master by this push:
     new 53172785c fix indexOfDifference splitting a surrogate pair (#1713)
53172785c is described below

commit 53172785c29bb450479c5ba2c198102a5aa9a753
Author: alhuda <[email protected]>
AuthorDate: Thu Jun 18 00:34:40 2026 +0530

    fix indexOfDifference splitting a surrogate pair (#1713)
---
 .../java/org/apache/commons/lang3/StringUtils.java   | 12 ++++++++++++
 .../org/apache/commons/lang3/StringUtilsTest.java    | 20 ++++++++++++++++++++
 2 files changed, 32 insertions(+)

diff --git a/src/main/java/org/apache/commons/lang3/StringUtils.java 
b/src/main/java/org/apache/commons/lang3/StringUtils.java
index 4cc5ab75d..7dfc11ec4 100644
--- a/src/main/java/org/apache/commons/lang3/StringUtils.java
+++ b/src/main/java/org/apache/commons/lang3/StringUtils.java
@@ -3002,6 +3002,12 @@ public static int indexOfDifference(final 
CharSequence... css) {
                 break;
             }
         }
+        if (firstDiff > 0 && Character.isLowSurrogate(css[0].charAt(firstDiff))
+                && Character.isHighSurrogate(css[0].charAt(firstDiff - 1))) {
+            // the difference splits a surrogate pair whose high half is 
common; report the start of the
+            // pair so getCommonPrefix never slices it in half and leaves a 
stray high surrogate.
+            firstDiff--;
+        }
         if (firstDiff == -1 && shortestStrLen != longestStrLen) {
             // we compared all of the characters up to the length of the
             // shortest string and didn't find a match, but the string lengths
@@ -3048,6 +3054,12 @@ public static int indexOfDifference(final CharSequence 
cs1, final CharSequence c
                 break;
             }
         }
+        if (i > 0 && i < cs1.length() && i < cs2.length() && 
Character.isHighSurrogate(cs1.charAt(i - 1))
+                && (Character.isLowSurrogate(cs1.charAt(i)) || 
Character.isLowSurrogate(cs2.charAt(i)))) {
+            // the difference splits a surrogate pair whose high half is 
common; report the start of the
+            // pair so difference does not return a string that begins with a 
stray low surrogate.
+            i--;
+        }
         if (i < cs2.length() || i < cs1.length()) {
             return i;
         }
diff --git a/src/test/java/org/apache/commons/lang3/StringUtilsTest.java 
b/src/test/java/org/apache/commons/lang3/StringUtilsTest.java
index 9a32ce099..578d3117f 100644
--- a/src/test/java/org/apache/commons/lang3/StringUtilsTest.java
+++ b/src/test/java/org/apache/commons/lang3/StringUtilsTest.java
@@ -543,6 +543,11 @@ void testDifference_StringString() {
         assertEquals("robot", StringUtils.difference("i am a machine", "i am a 
robot"));
         assertEquals("", StringUtils.difference("abc", "abc"));
         assertEquals("you are a robot", StringUtils.difference("i am a robot", 
"you are a robot"));
+        // 0x10400 and 0x10401 share the same high surrogate; the difference 
must not begin with a lone low surrogate
+        final String cp10400 = new String(Character.toChars(0x10400));
+        final String cp10401 = new String(Character.toChars(0x10401));
+        assertEquals(cp10401, StringUtils.difference(cp10400, cp10401));
+        assertEquals("Y", StringUtils.difference(cp10400 + "X", cp10400 + 
"Y"));
     }
 
     @Test
@@ -564,6 +569,11 @@ void testDifferenceAt_StringArray() {
         assertEquals(0, StringUtils.indexOfDifference("abcde", "xyz"));
         assertEquals(0, StringUtils.indexOfDifference("xyz", "abcde"));
         assertEquals(7, StringUtils.indexOfDifference("i am a machine", "i am 
a robot"));
+        // a difference that falls inside a shared surrogate pair is reported 
at the start of the pair, not mid-pair
+        final String cp10400 = new String(Character.toChars(0x10400));
+        final String cp10401 = new String(Character.toChars(0x10401));
+        assertEquals(0, StringUtils.indexOfDifference(new String[] {cp10400, 
cp10401}));
+        assertEquals(2, StringUtils.indexOfDifference(new String[] {cp10400 + 
"X", cp10400 + "Y"}));
     }
 
     @Test
@@ -577,6 +587,11 @@ void testDifferenceAt_StringString() {
         assertEquals(7, StringUtils.indexOfDifference("i am a machine", "i am 
a robot"));
         assertEquals(-1, StringUtils.indexOfDifference("foo", "foo"));
         assertEquals(0, StringUtils.indexOfDifference("i am a robot", "you are 
a robot"));
+        // a difference that falls inside a shared surrogate pair is reported 
at the start of the pair, not mid-pair
+        final String cp10400 = new String(Character.toChars(0x10400));
+        final String cp10401 = new String(Character.toChars(0x10401));
+        assertEquals(0, StringUtils.indexOfDifference(cp10400, cp10401));
+        assertEquals(2, StringUtils.indexOfDifference(cp10400 + "X", cp10400 + 
"Y"));
     }
 
     /**
@@ -680,6 +695,11 @@ void testGetCommonPrefix_StringArray() {
         assertEquals("", StringUtils.getCommonPrefix("abcde", "xyz"));
         assertEquals("", StringUtils.getCommonPrefix("xyz", "abcde"));
         assertEquals("i am a ", StringUtils.getCommonPrefix("i am a machine", 
"i am a robot"));
+        // 0x10400 and 0x10401 share the high surrogate but differ; the common 
prefix must not be a lone high surrogate
+        final String cp10400 = new String(Character.toChars(0x10400));
+        final String cp10401 = new String(Character.toChars(0x10401));
+        assertEquals("", StringUtils.getCommonPrefix(cp10400, cp10401));
+        assertEquals(cp10400, StringUtils.getCommonPrefix(cp10400 + "X", 
cp10400 + "Y"));
     }
 
     @Test

Reply via email to