This is an automated email from the ASF dual-hosted git repository.
garydgregory pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-lang.git
The following commit(s) were added to refs/heads/master by this push:
new 68fc73ce5 Classify supplementary code points in StringUtils is*
predicates (#1724)
68fc73ce5 is described below
commit 68fc73ce56557c45decc1a37fa94c00a4f0d7b8f
Author: alhuda <[email protected]>
AuthorDate: Tue Jun 23 16:13:16 2026 +0530
Classify supplementary code points in StringUtils is* predicates (#1724)
* classify supplementary code points in StringUtils is* predicates
* Move supplementary code point assertions into dedicated test methods
---
.../java/org/apache/commons/lang3/StringUtils.java | 60 +++++++++++++---------
.../apache/commons/lang3/StringUtilsIsTest.java | 45 ++++++++++++++++
.../org/apache/commons/lang3/StringUtilsTest.java | 33 ++++++++++++
3 files changed, 115 insertions(+), 23 deletions(-)
diff --git a/src/main/java/org/apache/commons/lang3/StringUtils.java
b/src/main/java/org/apache/commons/lang3/StringUtils.java
index d96ba2c91..bbbcc52cc 100644
--- a/src/main/java/org/apache/commons/lang3/StringUtils.java
+++ b/src/main/java/org/apache/commons/lang3/StringUtils.java
@@ -3237,10 +3237,12 @@ public static boolean isAllLowerCase(final CharSequence
cs) {
return false;
}
final int sz = cs.length();
- for (int i = 0; i < sz; i++) {
- if (!Character.isLowerCase(cs.charAt(i))) {
+ for (int i = 0; i < sz;) {
+ final int codePoint = Character.codePointAt(cs, i);
+ if (!Character.isLowerCase(codePoint)) {
return false;
}
+ i += Character.charCount(codePoint);
}
return true;
}
@@ -3272,10 +3274,12 @@ public static boolean isAllUpperCase(final CharSequence
cs) {
return false;
}
final int sz = cs.length();
- for (int i = 0; i < sz; i++) {
- if (!Character.isUpperCase(cs.charAt(i))) {
+ for (int i = 0; i < sz;) {
+ final int codePoint = Character.codePointAt(cs, i);
+ if (!Character.isUpperCase(codePoint)) {
return false;
}
+ i += Character.charCount(codePoint);
}
return true;
}
@@ -3306,10 +3310,12 @@ public static boolean isAlpha(final CharSequence cs) {
return false;
}
final int sz = cs.length();
- for (int i = 0; i < sz; i++) {
- if (!Character.isLetter(cs.charAt(i))) {
+ for (int i = 0; i < sz;) {
+ final int codePoint = Character.codePointAt(cs, i);
+ if (!Character.isLetter(codePoint)) {
return false;
}
+ i += Character.charCount(codePoint);
}
return true;
}
@@ -3341,10 +3347,12 @@ public static boolean isAlphanumeric(final CharSequence
cs) {
return false;
}
final int sz = cs.length();
- for (int i = 0; i < sz; i++) {
- if (!Character.isLetterOrDigit(cs.charAt(i))) {
+ for (int i = 0; i < sz;) {
+ final int codePoint = Character.codePointAt(cs, i);
+ if (!Character.isLetterOrDigit(codePoint)) {
return false;
}
+ i += Character.charCount(codePoint);
}
return true;
}
@@ -3375,11 +3383,12 @@ public static boolean isAlphanumericSpace(final
CharSequence cs) {
return false;
}
final int sz = cs.length();
- for (int i = 0; i < sz; i++) {
- final char nowChar = cs.charAt(i);
- if (nowChar != ' ' && !Character.isLetterOrDigit(nowChar)) {
+ for (int i = 0; i < sz;) {
+ final int codePoint = Character.codePointAt(cs, i);
+ if (codePoint != ' ' && !Character.isLetterOrDigit(codePoint)) {
return false;
}
+ i += Character.charCount(codePoint);
}
return true;
}
@@ -3410,11 +3419,12 @@ public static boolean isAlphaSpace(final CharSequence
cs) {
return false;
}
final int sz = cs.length();
- for (int i = 0; i < sz; i++) {
- final char nowChar = cs.charAt(i);
- if (nowChar != ' ' && !Character.isLetter(nowChar)) {
+ for (int i = 0; i < sz;) {
+ final int codePoint = Character.codePointAt(cs, i);
+ if (codePoint != ' ' && !Character.isLetter(codePoint)) {
return false;
}
+ i += Character.charCount(codePoint);
}
return true;
}
@@ -3608,16 +3618,17 @@ public static boolean isMixedCase(final CharSequence
cs) {
boolean containsUppercase = false;
boolean containsLowercase = false;
final int sz = cs.length();
- for (int i = 0; i < sz; i++) {
- final char nowChar = cs.charAt(i);
- if (Character.isUpperCase(nowChar)) {
+ for (int i = 0; i < sz;) {
+ final int codePoint = Character.codePointAt(cs, i);
+ if (Character.isUpperCase(codePoint)) {
containsUppercase = true;
- } else if (Character.isLowerCase(nowChar)) {
+ } else if (Character.isLowerCase(codePoint)) {
containsLowercase = true;
}
if (containsUppercase && containsLowercase) {
return true;
}
+ i += Character.charCount(codePoint);
}
return false;
}
@@ -3755,10 +3766,12 @@ public static boolean isNumeric(final CharSequence cs) {
return false;
}
final int sz = cs.length();
- for (int i = 0; i < sz; i++) {
- if (!Character.isDigit(cs.charAt(i))) {
+ for (int i = 0; i < sz;) {
+ final int codePoint = Character.codePointAt(cs, i);
+ if (!Character.isDigit(codePoint)) {
return false;
}
+ i += Character.charCount(codePoint);
}
return true;
}
@@ -3792,11 +3805,12 @@ public static boolean isNumericSpace(final CharSequence
cs) {
return false;
}
final int sz = cs.length();
- for (int i = 0; i < sz; i++) {
- final char nowChar = cs.charAt(i);
- if (nowChar != ' ' && !Character.isDigit(nowChar)) {
+ for (int i = 0; i < sz;) {
+ final int codePoint = Character.codePointAt(cs, i);
+ if (codePoint != ' ' && !Character.isDigit(codePoint)) {
return false;
}
+ i += Character.charCount(codePoint);
}
return true;
}
diff --git a/src/test/java/org/apache/commons/lang3/StringUtilsIsTest.java
b/src/test/java/org/apache/commons/lang3/StringUtilsIsTest.java
index f94ebd50b..e0e496181 100644
--- a/src/test/java/org/apache/commons/lang3/StringUtilsIsTest.java
+++ b/src/test/java/org/apache/commons/lang3/StringUtilsIsTest.java
@@ -41,6 +41,14 @@ void testIsAlpha() {
assertFalse(StringUtils.isAlpha("hkHKHik*khbkuh"));
}
+ @Test
+ void testIsAlphaSupplementary() {
+ // U+10400 DESERET CAPITAL LETTER LONG I is a supplementary Unicode
letter
+ assertTrue(StringUtils.isAlpha(new
String(Character.toChars(0x10400))));
+ // U+1D7CE MATHEMATICAL BOLD DIGIT ZERO is a supplementary digit, not
a letter
+ assertFalse(StringUtils.isAlpha(new
String(Character.toChars(0x1D7CE))));
+ }
+
@Test
void testIsAlphanumeric() {
assertFalse(StringUtils.isAlphanumeric(null));
@@ -56,6 +64,13 @@ void testIsAlphanumeric() {
assertFalse(StringUtils.isAlphanumeric("hkHKHik*khbkuh"));
}
+ @Test
+ void testIsAlphanumericSupplementary() {
+ // both a supplementary letter and a supplementary digit are
alphanumeric
+ assertTrue(StringUtils.isAlphanumeric(new
String(Character.toChars(0x10400))));
+ assertTrue(StringUtils.isAlphanumeric(new
String(Character.toChars(0x1D7CE))));
+ }
+
@Test
void testIsAlphanumericSpace() {
assertFalse(StringUtils.isAlphanumericSpace(null));
@@ -71,6 +86,12 @@ void testIsAlphanumericSpace() {
assertFalse(StringUtils.isAlphanumericSpace("hkHKHik*khbkuh"));
}
+ @Test
+ void testIsAlphanumericSpaceSupplementary() {
+ // a supplementary letter and digit separated by a space
+ assertTrue(StringUtils.isAlphanumericSpace(new
String(Character.toChars(0x10400)) + " " + new
String(Character.toChars(0x1D7CE))));
+ }
+
@Test
void testIsAlphaspace() {
assertFalse(StringUtils.isAlphaSpace(null));
@@ -86,6 +107,14 @@ void testIsAlphaspace() {
assertFalse(StringUtils.isAlphaSpace("hkHKHik*khbkuh"));
}
+ @Test
+ void testIsAlphaSpaceSupplementary() {
+ // a supplementary letter plus a space stays alpha-space
+ assertTrue(StringUtils.isAlphaSpace(new
String(Character.toChars(0x10400)) + " a"));
+ // a supplementary digit is not a letter
+ assertFalse(StringUtils.isAlphaSpace(new
String(Character.toChars(0x1D7CE))));
+ }
+
@Test
void testIsAsciiPrintable_String() {
assertFalse(StringUtils.isAsciiPrintable(null));
@@ -134,6 +163,14 @@ void testIsNumeric() {
assertFalse(StringUtils.isNumeric("-123"));
}
+ @Test
+ void testIsNumericSupplementary() {
+ // U+1D7CE MATHEMATICAL BOLD DIGIT ZERO is a supplementary Unicode
digit
+ assertTrue(StringUtils.isNumeric(new
String(Character.toChars(0x1D7CE))));
+ // U+10400 DESERET CAPITAL LETTER LONG I is a supplementary letter,
not a digit
+ assertFalse(StringUtils.isNumeric(new
String(Character.toChars(0x10400))));
+ }
+
@Test
void testIsNumericSpace() {
assertFalse(StringUtils.isNumericSpace(null));
@@ -154,6 +191,14 @@ void testIsNumericSpace() {
assertFalse(StringUtils.isNumericSpace("hkHKHik*khbkuh"));
}
+ @Test
+ void testIsNumericSpaceSupplementary() {
+ // two supplementary digits separated by a space
+ assertTrue(StringUtils.isNumericSpace(new
String(Character.toChars(0x1D7CE)) + " " + new
String(Character.toChars(0x1D7CE))));
+ // a supplementary letter is not a digit
+ assertFalse(StringUtils.isNumericSpace(new
String(Character.toChars(0x10400))));
+ }
+
@Test
void testIsWhitespace() {
assertFalse(StringUtils.isWhitespace(null));
diff --git a/src/test/java/org/apache/commons/lang3/StringUtilsTest.java
b/src/test/java/org/apache/commons/lang3/StringUtilsTest.java
index 369e9bb41..7d60d0f9e 100644
--- a/src/test/java/org/apache/commons/lang3/StringUtilsTest.java
+++ b/src/test/java/org/apache/commons/lang3/StringUtilsTest.java
@@ -948,6 +948,17 @@ void testIsAllLowerCase() {
assertFalse(StringUtils.isAllLowerCase("ab/c"));
}
+ /**
+ * Test for {@link StringUtils#isAllLowerCase(CharSequence)} with
supplementary code points.
+ */
+ @Test
+ void testIsAllLowerCaseSupplementary() {
+ // U+10428 DESERET SMALL LETTER LONG I is a lowercase supplementary
letter
+ assertTrue(StringUtils.isAllLowerCase(new
String(Character.toChars(0x10428))));
+ // U+10400 DESERET CAPITAL LETTER LONG I is an uppercase supplementary
letter
+ assertFalse(StringUtils.isAllLowerCase(new
String(Character.toChars(0x10400))));
+ }
+
/**
* Test for {@link StringUtils#isAllUpperCase(CharSequence)}.
*/
@@ -965,6 +976,17 @@ void testIsAllUpperCase() {
assertFalse(StringUtils.isAllUpperCase("A/C"));
}
+ /**
+ * Test for {@link StringUtils#isAllUpperCase(CharSequence)} with
supplementary code points.
+ */
+ @Test
+ void testIsAllUpperCaseSupplementary() {
+ // U+10400 DESERET CAPITAL LETTER LONG I is an uppercase supplementary
letter
+ assertTrue(StringUtils.isAllUpperCase(new
String(Character.toChars(0x10400))));
+ // U+10428 DESERET SMALL LETTER LONG I is a lowercase supplementary
letter
+ assertFalse(StringUtils.isAllUpperCase(new
String(Character.toChars(0x10428))));
+ }
+
/**
* Test for {@link StringUtils#isMixedCase(CharSequence)}.
*/
@@ -988,6 +1010,17 @@ void testIsMixedCase() {
assertTrue(StringUtils.isMixedCase("a/C"));
}
+ /**
+ * Test for {@link StringUtils#isMixedCase(CharSequence)} with
supplementary code points.
+ */
+ @Test
+ void testIsMixedCaseSupplementary() {
+ // lowercase 'a' mixed with the uppercase supplementary letter U+10400
+ assertTrue(StringUtils.isMixedCase("a" + new
String(Character.toChars(0x10400))));
+ // a single uppercase supplementary letter is not mixed case
+ assertFalse(StringUtils.isMixedCase(new
String(Character.toChars(0x10400))));
+ }
+
@Test
void testJoin_ArrayCharSeparator() {
assertNull(StringUtils.join((Object[]) null, ','));