This is an automated email from the ASF dual-hosted git repository.
garydgregory pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-lang.git
The following commit(s) were added to refs/heads/master by this push:
new 53172785c fix indexOfDifference splitting a surrogate pair (#1713)
53172785c is described below
commit 53172785c29bb450479c5ba2c198102a5aa9a753
Author: alhuda <[email protected]>
AuthorDate: Thu Jun 18 00:34:40 2026 +0530
fix indexOfDifference splitting a surrogate pair (#1713)
---
.../java/org/apache/commons/lang3/StringUtils.java | 12 ++++++++++++
.../org/apache/commons/lang3/StringUtilsTest.java | 20 ++++++++++++++++++++
2 files changed, 32 insertions(+)
diff --git a/src/main/java/org/apache/commons/lang3/StringUtils.java
b/src/main/java/org/apache/commons/lang3/StringUtils.java
index 4cc5ab75d..7dfc11ec4 100644
--- a/src/main/java/org/apache/commons/lang3/StringUtils.java
+++ b/src/main/java/org/apache/commons/lang3/StringUtils.java
@@ -3002,6 +3002,12 @@ public static int indexOfDifference(final
CharSequence... css) {
break;
}
}
+ if (firstDiff > 0 && Character.isLowSurrogate(css[0].charAt(firstDiff))
+ && Character.isHighSurrogate(css[0].charAt(firstDiff - 1))) {
+ // the difference splits a surrogate pair whose high half is
common; report the start of the
+ // pair so getCommonPrefix never slices it in half and leaves a
stray high surrogate.
+ firstDiff--;
+ }
if (firstDiff == -1 && shortestStrLen != longestStrLen) {
// we compared all of the characters up to the length of the
// shortest string and didn't find a match, but the string lengths
@@ -3048,6 +3054,12 @@ public static int indexOfDifference(final CharSequence
cs1, final CharSequence c
break;
}
}
+ if (i > 0 && i < cs1.length() && i < cs2.length() &&
Character.isHighSurrogate(cs1.charAt(i - 1))
+ && (Character.isLowSurrogate(cs1.charAt(i)) ||
Character.isLowSurrogate(cs2.charAt(i)))) {
+ // the difference splits a surrogate pair whose high half is
common; report the start of the
+ // pair so difference does not return a string that begins with a
stray low surrogate.
+ i--;
+ }
if (i < cs2.length() || i < cs1.length()) {
return i;
}
diff --git a/src/test/java/org/apache/commons/lang3/StringUtilsTest.java
b/src/test/java/org/apache/commons/lang3/StringUtilsTest.java
index 9a32ce099..578d3117f 100644
--- a/src/test/java/org/apache/commons/lang3/StringUtilsTest.java
+++ b/src/test/java/org/apache/commons/lang3/StringUtilsTest.java
@@ -543,6 +543,11 @@ void testDifference_StringString() {
assertEquals("robot", StringUtils.difference("i am a machine", "i am a
robot"));
assertEquals("", StringUtils.difference("abc", "abc"));
assertEquals("you are a robot", StringUtils.difference("i am a robot",
"you are a robot"));
+ // 0x10400 and 0x10401 share the same high surrogate; the difference
must not begin with a lone low surrogate
+ final String cp10400 = new String(Character.toChars(0x10400));
+ final String cp10401 = new String(Character.toChars(0x10401));
+ assertEquals(cp10401, StringUtils.difference(cp10400, cp10401));
+ assertEquals("Y", StringUtils.difference(cp10400 + "X", cp10400 +
"Y"));
}
@Test
@@ -564,6 +569,11 @@ void testDifferenceAt_StringArray() {
assertEquals(0, StringUtils.indexOfDifference("abcde", "xyz"));
assertEquals(0, StringUtils.indexOfDifference("xyz", "abcde"));
assertEquals(7, StringUtils.indexOfDifference("i am a machine", "i am
a robot"));
+ // a difference that falls inside a shared surrogate pair is reported
at the start of the pair, not mid-pair
+ final String cp10400 = new String(Character.toChars(0x10400));
+ final String cp10401 = new String(Character.toChars(0x10401));
+ assertEquals(0, StringUtils.indexOfDifference(new String[] {cp10400,
cp10401}));
+ assertEquals(2, StringUtils.indexOfDifference(new String[] {cp10400 +
"X", cp10400 + "Y"}));
}
@Test
@@ -577,6 +587,11 @@ void testDifferenceAt_StringString() {
assertEquals(7, StringUtils.indexOfDifference("i am a machine", "i am
a robot"));
assertEquals(-1, StringUtils.indexOfDifference("foo", "foo"));
assertEquals(0, StringUtils.indexOfDifference("i am a robot", "you are
a robot"));
+ // a difference that falls inside a shared surrogate pair is reported
at the start of the pair, not mid-pair
+ final String cp10400 = new String(Character.toChars(0x10400));
+ final String cp10401 = new String(Character.toChars(0x10401));
+ assertEquals(0, StringUtils.indexOfDifference(cp10400, cp10401));
+ assertEquals(2, StringUtils.indexOfDifference(cp10400 + "X", cp10400 +
"Y"));
}
/**
@@ -680,6 +695,11 @@ void testGetCommonPrefix_StringArray() {
assertEquals("", StringUtils.getCommonPrefix("abcde", "xyz"));
assertEquals("", StringUtils.getCommonPrefix("xyz", "abcde"));
assertEquals("i am a ", StringUtils.getCommonPrefix("i am a machine",
"i am a robot"));
+ // 0x10400 and 0x10401 share the high surrogate but differ; the common
prefix must not be a lone high surrogate
+ final String cp10400 = new String(Character.toChars(0x10400));
+ final String cp10401 = new String(Character.toChars(0x10401));
+ assertEquals("", StringUtils.getCommonPrefix(cp10400, cp10401));
+ assertEquals(cp10400, StringUtils.getCommonPrefix(cp10400 + "X",
cp10400 + "Y"));
}
@Test