This is an automated email from the ASF dual-hosted git repository.
garydgregory pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-codec.git
The following commit(s) were added to refs/heads/master by this push:
new 92e18dea [CODEC-339] Escape URLCodec control characters in custom safe
sets (#433)
92e18dea is described below
commit 92e18dea1b6bb18b32b8b1443acf38d8342ff827
Author: OldTruckDriver <[email protected]>
AuthorDate: Thu Jun 18 11:35:11 2026 +1000
[CODEC-339] Escape URLCodec control characters in custom safe sets (#433)
Keep '%' and '+' percent-encoded even when callers mark them safe in
URLCodec.encodeUrl(BitSet, byte[]). decodeUrl() treats those bytes as encoding
syntax, so emitting them literally can produce undecodable output or break
round trips.
Reviewed-by: OpenAI Codex
Reviewed-by: Anthropic Claude Code
Co-authored-by: Gary Gregory <[email protected]>
---
src/changes/changes.xml | 1 +
.../org/apache/commons/codec/net/URLCodec.java | 12 +++++++----
.../org/apache/commons/codec/net/URLCodecTest.java | 25 ++++++++++++++++++++++
3 files changed, 34 insertions(+), 4 deletions(-)
diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index b0428fc4..6bfe8353 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -45,6 +45,7 @@ The <action> type attribute can be add,update,fix,remove.
<body>
<release version="1.22.1" date="YYYY-MM-DD" description="This is a feature
and maintenance release. Java 8 or later is required.">
<!-- FIX -->
+ <action type="fix" issue="CODEC-339" dev="ggregory" due-to="Ruiqi Dong,
Gary Gregory">URLCodec.encodeUrl(BitSet, byte[]) allows custom safe sets to
emit URL encoding control characters.</action>
<action type="fix" issue="CODEC-338" dev="ggregory" due-to="Ruiqi Dong,
Gary Gregory">PercentCodec loses literal '+' when plusForSpace is
enabled.</action>
<action type="add" issue="CODEC-337" dev="pkarwasz" due-to="Ruiqi Dong,
Gary Gregory">Digest ALL reuses System.in, so only the first algorithm sees the
real input (#431).</action>
<!-- ADD -->
diff --git a/src/main/java/org/apache/commons/codec/net/URLCodec.java
b/src/main/java/org/apache/commons/codec/net/URLCodec.java
index 20f25e25..462c8a69 100644
--- a/src/main/java/org/apache/commons/codec/net/URLCodec.java
+++ b/src/main/java/org/apache/commons/codec/net/URLCodec.java
@@ -53,6 +53,8 @@ public class URLCodec implements BinaryEncoder,
BinaryDecoder, StringEncoder, St
*/
protected static final byte ESCAPE_CHAR = '%';
+ private static final byte PLUS_CHAR = '+';
+
/**
* BitSet of www-form-url safe characters.
* This is a copy of the internal BitSet which is now used for the
conversion.
@@ -107,7 +109,7 @@ public class URLCodec implements BinaryEncoder,
BinaryDecoder, StringEncoder, St
final ByteArrayOutputStream buffer = new ByteArrayOutputStream();
for (int i = 0; i < bytes.length; i++) {
final int b = bytes[i];
- if (b == '+') {
+ if (b == PLUS_CHAR) {
buffer.write(' ');
} else if (b == ESCAPE_CHAR) {
try {
@@ -126,9 +128,11 @@ public class URLCodec implements BinaryEncoder,
BinaryDecoder, StringEncoder, St
/**
* Encodes an array of bytes into an array of URL safe 7-bit characters.
Unsafe characters are escaped.
+ * The characters {@code %} and {@code +} are always escaped because
{@link #decodeUrl(byte[])}
+ * treats them as URL-encoding syntax.
*
* @param urlsafe
- * bitset of characters deemed URL safe.
+ * bitset of characters deemed URL safe, except for {@code %}
and {@code +}.
* @param bytes
* array of bytes to convert to URL safe characters.
* @return array of bytes containing URL safe characters.
@@ -147,9 +151,9 @@ public class URLCodec implements BinaryEncoder,
BinaryDecoder, StringEncoder, St
if (b < 0) {
b = 256 + b;
}
- if (urlsafe.get(b)) {
+ if (urlsafe.get(b) && b != ESCAPE_CHAR && b != PLUS_CHAR) {
if (b == ' ') {
- b = '+';
+ b = PLUS_CHAR;
}
buffer.write(b);
} else {
diff --git a/src/test/java/org/apache/commons/codec/net/URLCodecTest.java
b/src/test/java/org/apache/commons/codec/net/URLCodecTest.java
index b862803c..fa374e31 100644
--- a/src/test/java/org/apache/commons/codec/net/URLCodecTest.java
+++ b/src/test/java/org/apache/commons/codec/net/URLCodecTest.java
@@ -22,6 +22,7 @@ import static org.junit.jupiter.api.Assertions.assertNull;
import static org.junit.jupiter.api.Assertions.assertThrows;
import java.nio.charset.StandardCharsets;
+import java.util.BitSet;
import org.apache.commons.codec.CharEncoding;
import org.apache.commons.codec.DecoderException;
@@ -180,6 +181,30 @@ class URLCodecTest {
validateState(urlCodec);
}
+ @Test
+ void testEncodeUrlWithPercentMarkedSafeEscapesPercent() throws Exception {
+ final BitSet safe = new BitSet();
+ safe.set('%');
+ final String plain = "%";
+ final byte[] encoded = URLCodec.encodeUrl(safe,
plain.getBytes(StandardCharsets.US_ASCII));
+ final String encodedS = new String(encoded, StandardCharsets.US_ASCII);
+ assertEquals("%25", encodedS, "URLCodec should escape percent even
when marked safe");
+ final byte[] decoded = URLCodec.decodeUrl(encoded);
+ assertEquals(plain, new String(decoded, StandardCharsets.US_ASCII),
"URLCodec percent decoding test");
+ }
+
+ @Test
+ void testEncodeUrlWithPlusMarkedSafeEscapesPlus() throws Exception {
+ final BitSet safe = new BitSet();
+ safe.set('+');
+ final String plain = "+";
+ final byte[] encoded = URLCodec.encodeUrl(safe,
plain.getBytes(StandardCharsets.US_ASCII));
+ final String encodedS = new String(encoded, StandardCharsets.US_ASCII);
+ assertEquals("%2B", encodedS, "URLCodec should escape plus even when
marked safe");
+ final byte[] decoded = URLCodec.decodeUrl(encoded);
+ assertEquals(plain, new String(decoded, StandardCharsets.US_ASCII),
"URLCodec plus decoding test");
+ }
+
@Test
void testInvalidEncoding() {
final URLCodec urlCodec = new URLCodec("NONSENSE");