This is an automated email from the ASF dual-hosted git repository.

garydgregory pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-lang.git


The following commit(s) were added to refs/heads/master by this push:
     new 14deed1bf NumericEntityUnescaper.translate() for entity values out of 
range passed (#1673)
14deed1bf is described below

commit 14deed1bff705a46c485733451f462f549ac528d
Author: Gary Gregory <[email protected]>
AuthorDate: Sun May 24 15:35:31 2026 -0400

    NumericEntityUnescaper.translate() for entity values out of range passed 
(#1673)
    
    to Character.toChars() throw IllegalArgumentException
---
 .../text/translate/NumericEntityUnescaper.java     | 11 +++-----
 .../text/translate/NumericEntityEscaperTest.java   | 29 +++++++++++++++++++---
 2 files changed, 28 insertions(+), 12 deletions(-)

diff --git 
a/src/main/java/org/apache/commons/lang3/text/translate/NumericEntityUnescaper.java
 
b/src/main/java/org/apache/commons/lang3/text/translate/NumericEntityUnescaper.java
index 805ceef49..00e94a024 100644
--- 
a/src/main/java/org/apache/commons/lang3/text/translate/NumericEntityUnescaper.java
+++ 
b/src/main/java/org/apache/commons/lang3/text/translate/NumericEntityUnescaper.java
@@ -110,26 +110,21 @@ public int translate(final CharSequence input, final int 
index, final Writer out
         if (input.charAt(index) == '&' && index < seqEnd - 2 && 
input.charAt(index + 1) == '#') {
             int start = index + 2;
             boolean isHex = false;
-
             final char firstChar = input.charAt(start);
             if (firstChar == 'x' || firstChar == 'X') {
                 start++;
                 isHex = true;
-
                 // Check there's more than just an x after the &#
                 if (start == seqEnd) {
                     return 0;
                 }
             }
-
             int end = start;
             // Note that this supports character codes without a ; on the end
             while (end < seqEnd && CharUtils.isHex(input.charAt(end))) {
                 end++;
             }
-
             final boolean semiNext = end != seqEnd && input.charAt(end) == ';';
-
             if (!semiNext) {
                 if (isSet(OPTION.semiColonRequired)) {
                     return 0;
@@ -138,7 +133,6 @@ public int translate(final CharSequence input, final int 
index, final Writer out
                     throw new IllegalArgumentException("Semi-colon required at 
end of numeric entity");
                 }
             }
-
             final int entityValue;
             try {
                 if (isHex) {
@@ -149,7 +143,9 @@ public int translate(final CharSequence input, final int 
index, final Writer out
             } catch (final NumberFormatException nfe) {
                 return 0;
             }
-
+            if (entityValue < Character.MIN_CODE_POINT || entityValue > 
Character.MAX_CODE_POINT) {
+                return 0;
+            }
             if (entityValue > 0xFFFF) {
                 final char[] chars = Character.toChars(entityValue);
                 out.write(chars[0]);
@@ -157,7 +153,6 @@ public int translate(final CharSequence input, final int 
index, final Writer out
             } else {
                 out.write(entityValue);
             }
-
             return 2 + end - start + (isHex ? 1 : 0) + (semiNext ? 1 : 0);
         }
         return 0;
diff --git 
a/src/test/java/org/apache/commons/lang3/text/translate/NumericEntityEscaperTest.java
 
b/src/test/java/org/apache/commons/lang3/text/translate/NumericEntityEscaperTest.java
index 22c5639c7..dab4c884a 100644
--- 
a/src/test/java/org/apache/commons/lang3/text/translate/NumericEntityEscaperTest.java
+++ 
b/src/test/java/org/apache/commons/lang3/text/translate/NumericEntityEscaperTest.java
@@ -19,6 +19,8 @@
 
 import static org.junit.jupiter.api.Assertions.assertEquals;
 
+import java.io.StringWriter;
+
 import org.apache.commons.lang3.AbstractLangTest;
 import org.junit.jupiter.api.Test;
 
@@ -31,7 +33,6 @@ class NumericEntityEscaperTest extends AbstractLangTest {
     @Test
     void testAbove() {
         final NumericEntityEscaper nee = NumericEntityEscaper.above('F');
-
         final String input = "ADFGZ";
         final String result = nee.translate(input);
         assertEquals("ADF&#71;&#90;", result, "Failed to escape numeric 
entities via the above method");
@@ -40,7 +41,6 @@ void testAbove() {
     @Test
     void testBelow() {
         final NumericEntityEscaper nee = NumericEntityEscaper.below('F');
-
         final String input = "ADFGZ";
         final String result = nee.translate(input);
         assertEquals("&#65;&#68;FGZ", result, "Failed to escape numeric 
entities via the below method");
@@ -49,7 +49,6 @@ void testBelow() {
     @Test
     void testBetween() {
         final NumericEntityEscaper nee = NumericEntityEscaper.between('F', 
'L');
-
         final String input = "ADFGZ";
         final String result = nee.translate(input);
         assertEquals("AD&#70;&#71;Z", result, "Failed to escape numeric 
entities via the between method");
@@ -61,10 +60,32 @@ void testSupplementary() {
         final NumericEntityEscaper nee = new NumericEntityEscaper();
         final String input = "\uD803\uDC22";
         final String expected = "&#68642;";
-
         final String result = nee.translate(input);
         assertEquals(expected, result, "Failed to escape numeric entities 
supplementary characters");
+    }
 
+    @Test
+    void testNumericEntityOverflow() throws Exception {
+        // cp = 1234567890 > Character.MAX_CODE_POINT (0x10FFFF = 1114111).
+        // Pre-patch: IAE escapes from Character.toChars.
+        // Post-patch: return 0, no write, no exception.
+        final NumericEntityUnescaper u = new NumericEntityUnescaper();
+        final StringWriter sw = new StringWriter();
+        int consumed = u.translate("&#1234567890;", 0, sw);
+        assertEquals(0, consumed);
+        assertEquals("", sw.toString());
+        consumed = u.translate("---&#1234567890;---", 0, sw);
+        assertEquals(0, consumed);
+        assertEquals("", sw.toString());
     }
 
+    @Test
+    void testValidCodePoint() throws Exception {
+        // Negative control: '&#65;' = 'A' must translate successfully.
+        final NumericEntityUnescaper u = new NumericEntityUnescaper();
+        final StringWriter sw = new StringWriter();
+        final int consumed = u.translate("&#65;", 0, sw);
+        assertEquals("A", sw.toString());
+        assertEquals(5, consumed);
+    }
 }

Reply via email to