This is an automated email from the ASF dual-hosted git repository.

garydgregory pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-codec.git


The following commit(s) were added to refs/heads/master by this push:
     new 92e18dea [CODEC-339] Escape URLCodec control characters in custom safe 
sets (#433)
92e18dea is described below

commit 92e18dea1b6bb18b32b8b1443acf38d8342ff827
Author: OldTruckDriver <[email protected]>
AuthorDate: Thu Jun 18 11:35:11 2026 +1000

    [CODEC-339] Escape URLCodec control characters in custom safe sets (#433)
    
    Keep '%' and '+' percent-encoded even when callers mark them safe in 
URLCodec.encodeUrl(BitSet, byte[]). decodeUrl() treats those bytes as encoding 
syntax, so emitting them literally can produce undecodable output or break 
round trips.
    
    Reviewed-by: OpenAI Codex
    Reviewed-by: Anthropic Claude Code
    
    Co-authored-by: Gary Gregory <[email protected]>
---
 src/changes/changes.xml                            |  1 +
 .../org/apache/commons/codec/net/URLCodec.java     | 12 +++++++----
 .../org/apache/commons/codec/net/URLCodecTest.java | 25 ++++++++++++++++++++++
 3 files changed, 34 insertions(+), 4 deletions(-)

diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index b0428fc4..6bfe8353 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -45,6 +45,7 @@ The <action> type attribute can be add,update,fix,remove.
   <body>
     <release version="1.22.1" date="YYYY-MM-DD" description="This is a feature 
and maintenance release. Java 8 or later is required.">
       <!-- FIX -->
+      <action type="fix" issue="CODEC-339" dev="ggregory" due-to="Ruiqi Dong, 
Gary Gregory">URLCodec.encodeUrl(BitSet, byte[]) allows custom safe sets to 
emit URL encoding control characters.</action>
       <action type="fix" issue="CODEC-338" dev="ggregory" due-to="Ruiqi Dong, 
Gary Gregory">PercentCodec loses literal '+' when plusForSpace is 
enabled.</action>
       <action type="add" issue="CODEC-337" dev="pkarwasz" due-to="Ruiqi Dong, 
Gary Gregory">Digest ALL reuses System.in, so only the first algorithm sees the 
real input (#431).</action>
       <!-- ADD -->
diff --git a/src/main/java/org/apache/commons/codec/net/URLCodec.java 
b/src/main/java/org/apache/commons/codec/net/URLCodec.java
index 20f25e25..462c8a69 100644
--- a/src/main/java/org/apache/commons/codec/net/URLCodec.java
+++ b/src/main/java/org/apache/commons/codec/net/URLCodec.java
@@ -53,6 +53,8 @@ public class URLCodec implements BinaryEncoder, 
BinaryDecoder, StringEncoder, St
      */
     protected static final byte ESCAPE_CHAR = '%';
 
+    private static final byte PLUS_CHAR = '+';
+
     /**
      * BitSet of www-form-url safe characters.
      * This is a copy of the internal BitSet which is now used for the 
conversion.
@@ -107,7 +109,7 @@ public class URLCodec implements BinaryEncoder, 
BinaryDecoder, StringEncoder, St
         final ByteArrayOutputStream buffer = new ByteArrayOutputStream();
         for (int i = 0; i < bytes.length; i++) {
             final int b = bytes[i];
-            if (b == '+') {
+            if (b == PLUS_CHAR) {
                 buffer.write(' ');
             } else if (b == ESCAPE_CHAR) {
                 try {
@@ -126,9 +128,11 @@ public class URLCodec implements BinaryEncoder, 
BinaryDecoder, StringEncoder, St
 
     /**
      * Encodes an array of bytes into an array of URL safe 7-bit characters. 
Unsafe characters are escaped.
+     * The characters {@code %} and {@code +} are always escaped because 
{@link #decodeUrl(byte[])}
+     * treats them as URL-encoding syntax.
      *
      * @param urlsafe
-     *            bitset of characters deemed URL safe.
+     *            bitset of characters deemed URL safe, except for {@code %} 
and {@code +}.
      * @param bytes
      *            array of bytes to convert to URL safe characters.
      * @return array of bytes containing URL safe characters.
@@ -147,9 +151,9 @@ public class URLCodec implements BinaryEncoder, 
BinaryDecoder, StringEncoder, St
             if (b < 0) {
                 b = 256 + b;
             }
-            if (urlsafe.get(b)) {
+            if (urlsafe.get(b) && b != ESCAPE_CHAR && b != PLUS_CHAR) {
                 if (b == ' ') {
-                    b = '+';
+                    b = PLUS_CHAR;
                 }
                 buffer.write(b);
             } else {
diff --git a/src/test/java/org/apache/commons/codec/net/URLCodecTest.java 
b/src/test/java/org/apache/commons/codec/net/URLCodecTest.java
index b862803c..fa374e31 100644
--- a/src/test/java/org/apache/commons/codec/net/URLCodecTest.java
+++ b/src/test/java/org/apache/commons/codec/net/URLCodecTest.java
@@ -22,6 +22,7 @@ import static org.junit.jupiter.api.Assertions.assertNull;
 import static org.junit.jupiter.api.Assertions.assertThrows;
 
 import java.nio.charset.StandardCharsets;
+import java.util.BitSet;
 
 import org.apache.commons.codec.CharEncoding;
 import org.apache.commons.codec.DecoderException;
@@ -180,6 +181,30 @@ class URLCodecTest {
         validateState(urlCodec);
     }
 
+    @Test
+    void testEncodeUrlWithPercentMarkedSafeEscapesPercent() throws Exception {
+        final BitSet safe = new BitSet();
+        safe.set('%');
+        final String plain = "%";
+        final byte[] encoded = URLCodec.encodeUrl(safe, 
plain.getBytes(StandardCharsets.US_ASCII));
+        final String encodedS = new String(encoded, StandardCharsets.US_ASCII);
+        assertEquals("%25", encodedS, "URLCodec should escape percent even 
when marked safe");
+        final byte[] decoded = URLCodec.decodeUrl(encoded);
+        assertEquals(plain, new String(decoded, StandardCharsets.US_ASCII), 
"URLCodec percent decoding test");
+    }
+
+    @Test
+    void testEncodeUrlWithPlusMarkedSafeEscapesPlus() throws Exception {
+        final BitSet safe = new BitSet();
+        safe.set('+');
+        final String plain = "+";
+        final byte[] encoded = URLCodec.encodeUrl(safe, 
plain.getBytes(StandardCharsets.US_ASCII));
+        final String encodedS = new String(encoded, StandardCharsets.US_ASCII);
+        assertEquals("%2B", encodedS, "URLCodec should escape plus even when 
marked safe");
+        final byte[] decoded = URLCodec.decodeUrl(encoded);
+        assertEquals(plain, new String(decoded, StandardCharsets.US_ASCII), 
"URLCodec plus decoding test");
+    }
+
     @Test
     void testInvalidEncoding() {
         final URLCodec urlCodec = new URLCodec("NONSENSE");

Reply via email to