This is an automated email from the ASF dual-hosted git repository.
garydgregory pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-lang.git
The following commit(s) were added to refs/heads/master by this push:
new 14deed1bf NumericEntityUnescaper.translate() for entity values out of
range passed (#1673)
14deed1bf is described below
commit 14deed1bff705a46c485733451f462f549ac528d
Author: Gary Gregory <[email protected]>
AuthorDate: Sun May 24 15:35:31 2026 -0400
NumericEntityUnescaper.translate() for entity values out of range passed
(#1673)
to Character.toChars() throw IllegalArgumentException
---
.../text/translate/NumericEntityUnescaper.java | 11 +++-----
.../text/translate/NumericEntityEscaperTest.java | 29 +++++++++++++++++++---
2 files changed, 28 insertions(+), 12 deletions(-)
diff --git
a/src/main/java/org/apache/commons/lang3/text/translate/NumericEntityUnescaper.java
b/src/main/java/org/apache/commons/lang3/text/translate/NumericEntityUnescaper.java
index 805ceef49..00e94a024 100644
---
a/src/main/java/org/apache/commons/lang3/text/translate/NumericEntityUnescaper.java
+++
b/src/main/java/org/apache/commons/lang3/text/translate/NumericEntityUnescaper.java
@@ -110,26 +110,21 @@ public int translate(final CharSequence input, final int
index, final Writer out
if (input.charAt(index) == '&' && index < seqEnd - 2 &&
input.charAt(index + 1) == '#') {
int start = index + 2;
boolean isHex = false;
-
final char firstChar = input.charAt(start);
if (firstChar == 'x' || firstChar == 'X') {
start++;
isHex = true;
-
// Check there's more than just an x after the &#
if (start == seqEnd) {
return 0;
}
}
-
int end = start;
// Note that this supports character codes without a ; on the end
while (end < seqEnd && CharUtils.isHex(input.charAt(end))) {
end++;
}
-
final boolean semiNext = end != seqEnd && input.charAt(end) == ';';
-
if (!semiNext) {
if (isSet(OPTION.semiColonRequired)) {
return 0;
@@ -138,7 +133,6 @@ public int translate(final CharSequence input, final int
index, final Writer out
throw new IllegalArgumentException("Semi-colon required at
end of numeric entity");
}
}
-
final int entityValue;
try {
if (isHex) {
@@ -149,7 +143,9 @@ public int translate(final CharSequence input, final int
index, final Writer out
} catch (final NumberFormatException nfe) {
return 0;
}
-
+ if (entityValue < Character.MIN_CODE_POINT || entityValue >
Character.MAX_CODE_POINT) {
+ return 0;
+ }
if (entityValue > 0xFFFF) {
final char[] chars = Character.toChars(entityValue);
out.write(chars[0]);
@@ -157,7 +153,6 @@ public int translate(final CharSequence input, final int
index, final Writer out
} else {
out.write(entityValue);
}
-
return 2 + end - start + (isHex ? 1 : 0) + (semiNext ? 1 : 0);
}
return 0;
diff --git
a/src/test/java/org/apache/commons/lang3/text/translate/NumericEntityEscaperTest.java
b/src/test/java/org/apache/commons/lang3/text/translate/NumericEntityEscaperTest.java
index 22c5639c7..dab4c884a 100644
---
a/src/test/java/org/apache/commons/lang3/text/translate/NumericEntityEscaperTest.java
+++
b/src/test/java/org/apache/commons/lang3/text/translate/NumericEntityEscaperTest.java
@@ -19,6 +19,8 @@
import static org.junit.jupiter.api.Assertions.assertEquals;
+import java.io.StringWriter;
+
import org.apache.commons.lang3.AbstractLangTest;
import org.junit.jupiter.api.Test;
@@ -31,7 +33,6 @@ class NumericEntityEscaperTest extends AbstractLangTest {
@Test
void testAbove() {
final NumericEntityEscaper nee = NumericEntityEscaper.above('F');
-
final String input = "ADFGZ";
final String result = nee.translate(input);
assertEquals("ADFGZ", result, "Failed to escape numeric
entities via the above method");
@@ -40,7 +41,6 @@ void testAbove() {
@Test
void testBelow() {
final NumericEntityEscaper nee = NumericEntityEscaper.below('F');
-
final String input = "ADFGZ";
final String result = nee.translate(input);
assertEquals("ADFGZ", result, "Failed to escape numeric
entities via the below method");
@@ -49,7 +49,6 @@ void testBelow() {
@Test
void testBetween() {
final NumericEntityEscaper nee = NumericEntityEscaper.between('F',
'L');
-
final String input = "ADFGZ";
final String result = nee.translate(input);
assertEquals("ADFGZ", result, "Failed to escape numeric
entities via the between method");
@@ -61,10 +60,32 @@ void testSupplementary() {
final NumericEntityEscaper nee = new NumericEntityEscaper();
final String input = "\uD803\uDC22";
final String expected = "𐰢";
-
final String result = nee.translate(input);
assertEquals(expected, result, "Failed to escape numeric entities
supplementary characters");
+ }
+ @Test
+ void testNumericEntityOverflow() throws Exception {
+ // cp = 1234567890 > Character.MAX_CODE_POINT (0x10FFFF = 1114111).
+ // Pre-patch: IAE escapes from Character.toChars.
+ // Post-patch: return 0, no write, no exception.
+ final NumericEntityUnescaper u = new NumericEntityUnescaper();
+ final StringWriter sw = new StringWriter();
+ int consumed = u.translate("�", 0, sw);
+ assertEquals(0, consumed);
+ assertEquals("", sw.toString());
+ consumed = u.translate("---�---", 0, sw);
+ assertEquals(0, consumed);
+ assertEquals("", sw.toString());
}
+ @Test
+ void testValidCodePoint() throws Exception {
+ // Negative control: 'A' = 'A' must translate successfully.
+ final NumericEntityUnescaper u = new NumericEntityUnescaper();
+ final StringWriter sw = new StringWriter();
+ final int consumed = u.translate("A", 0, sw);
+ assertEquals("A", sw.toString());
+ assertEquals(5, consumed);
+ }
}