This is an automated email from the ASF dual-hosted git repository.
pkarwasz pushed a commit to branch 2.25.x
in repository https://gitbox.apache.org/repos/asf/logging-log4j2.git
The following commit(s) were added to refs/heads/2.25.x by this push:
new 55e0ece1dc MapMessage.asXml(): replace invalid XML characters with
U+FFFD (#4079)
55e0ece1dc is described below
commit 55e0ece1dcee854b81367fa1a8e8e64bab4f0363
Author: Piotr P. Karwasz <[email protected]>
AuthorDate: Tue Mar 24 23:58:00 2026 +0100
MapMessage.asXml(): replace invalid XML characters with U+FFFD (#4079)
This change sanitizes the output of `MapMessage.asXml()` by replacing
characters not permitted in XML 1.0 with the Unicode replacement character
(`U+FFFD`).
Although `MapMessage.asXml()` is not currently used by any layout, this
aligns its behavior with `XmlLayout` and `Log4j1XmlLayout`, ensuring consistent
and well-formed XML output across the codebase.
---
.../logging/log4j/message/MapMessageTest.java | 23 +++++++--
.../logging/log4j/util/StringBuildersTest.java | 41 +++++++++++++---
.../apache/logging/log4j/message/MapMessage.java | 10 ++--
.../apache/logging/log4j/util/StringBuilders.java | 55 ++++++++++++++++++++--
.../.2.x.x/4079_map-message-control-characters.xml | 12 +++++
5 files changed, 123 insertions(+), 18 deletions(-)
diff --git
a/log4j-api-test/src/test/java/org/apache/logging/log4j/message/MapMessageTest.java
b/log4j-api-test/src/test/java/org/apache/logging/log4j/message/MapMessageTest.java
index a36bb6b4ae..db29a18949 100644
---
a/log4j-api-test/src/test/java/org/apache/logging/log4j/message/MapMessageTest.java
+++
b/log4j-api-test/src/test/java/org/apache/logging/log4j/message/MapMessageTest.java
@@ -16,6 +16,7 @@
*/
package org.apache.logging.log4j.message;
+import static org.assertj.core.api.Assertions.assertThat;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertThrows;
@@ -72,12 +73,26 @@ class MapMessageTest {
@Test
void testXMLEscape() {
- final String testMsg = "Test message <foo>";
+ final String notBmp = new String(Character.toChars(0x10000));
+ final String invalid = "A\uD800B\uDE00C\0\1\2\3";
+ final String expectedInvalid =
"A\uFFFDB\uFFFDC\uFFFD\uFFFD\uFFFD\uFFFD";
+ final String key = "k<e&y> '\"\t\r\n" + notBmp + invalid;
+ final String value = "v>al<u& '\"\t\r\n" + notBmp + invalid;
final StringMapMessage msg = new StringMapMessage();
- msg.put("message", testMsg);
+ msg.put(key, value);
final String result = msg.getFormattedMessage(new String[] {"XML"});
- final String expected = "<Map>\n <Entry key=\"message\">Test message
<foo></Entry>\n" + "</Map>";
- assertEquals(expected, result);
+
+ assertThat(result)
+ .isEqualTo(
+ "<Map>\n" //
+ + " <Entry key=\"k<e&y>
'"\t\r\n"
+ + notBmp
+ + expectedInvalid
+ + "\">v>al<u& '"\t\r\n"
+ + notBmp
+ + expectedInvalid
+ + "</Entry>\n" //
+ + "</Map>");
}
@Test
diff --git
a/log4j-api-test/src/test/java/org/apache/logging/log4j/util/StringBuildersTest.java
b/log4j-api-test/src/test/java/org/apache/logging/log4j/util/StringBuildersTest.java
index 8722dd33e4..9c0a5caa61 100644
---
a/log4j-api-test/src/test/java/org/apache/logging/log4j/util/StringBuildersTest.java
+++
b/log4j-api-test/src/test/java/org/apache/logging/log4j/util/StringBuildersTest.java
@@ -16,10 +16,15 @@
*/
package org.apache.logging.log4j.util;
+import static org.assertj.core.api.Assertions.assertThat;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;
+import java.util.stream.Stream;
import org.junit.jupiter.api.Test;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.Arguments;
+import org.junit.jupiter.params.provider.MethodSource;
/**
* Tests the StringBuilders class.
@@ -79,15 +84,37 @@ class StringBuildersTest {
assertEquals(jsonValueEscaped, sb.toString());
}
- @Test
- void escapeXMLCharactersCorrectly() {
- final String xmlValueNotEscaped = "<\"Salt&Peppa'\">";
- final String xmlValueEscaped =
"<"Salt&Peppa'">";
+ static Stream<Arguments> escapeXmlCharactersCorrectly() {
+ final char replacement = '\uFFFD';
+ return Stream.of(
+ // Empty
+ Arguments.of("", ""),
+ // characters that need to be escaped
+ Arguments.of("<\"Salt&Peppa'\">",
"<"Salt&Peppa'">"),
+ // control character replaced with U+FFFD
+ Arguments.of("A" + (char) 0x01 + "B", "A" + replacement + "B"),
+ // standalone low surrogate replaced with U+FFFD
+ Arguments.of("low" + Character.MIN_LOW_SURROGATE +
"surrogate", "low" + replacement + "surrogate"),
+ Arguments.of(Character.MIN_LOW_SURROGATE + "low", replacement
+ "low"),
+ // standalone high surrogate replaced with U+FFFD
+ Arguments.of("high" + Character.MIN_HIGH_SURROGATE +
"surrogate", "high" + replacement + "surrogate"),
+ Arguments.of(Character.MIN_HIGH_SURROGATE + "high",
replacement + "high"),
+ // FFFE and FFFF
+ Arguments.of("invalid\uFFFEchars", "invalid" + replacement +
"chars"),
+ Arguments.of("invalid\uFFFFchars", "invalid" + replacement +
"chars"),
+ // whitespace characters are preserved
+ Arguments.of("tab\tnewline\ncr\r", "tab\tnewline\ncr\r"),
+ // character beyond BMP (emoji) preserved as surrogate pair
+ Arguments.of("emoji " + "\uD83D\uDE00" + " end", "emoji " +
"\uD83D\uDE00" + " end"));
+ }
+ @ParameterizedTest
+ @MethodSource
+ void escapeXmlCharactersCorrectly(final String input, final String
expected) {
final StringBuilder sb = new StringBuilder();
- sb.append(xmlValueNotEscaped);
- assertEquals(xmlValueNotEscaped, sb.toString());
+ sb.append(input);
+ assertThat(sb.toString()).isEqualTo(input);
StringBuilders.escapeXml(sb, 0);
- assertEquals(xmlValueEscaped, sb.toString());
+ assertThat(sb.toString()).isEqualTo(expected);
}
}
diff --git
a/log4j-api/src/main/java/org/apache/logging/log4j/message/MapMessage.java
b/log4j-api/src/main/java/org/apache/logging/log4j/message/MapMessage.java
index f1144cee62..c78d59a0b5 100644
--- a/log4j-api/src/main/java/org/apache/logging/log4j/message/MapMessage.java
+++ b/log4j-api/src/main/java/org/apache/logging/log4j/message/MapMessage.java
@@ -358,10 +358,14 @@ public class MapMessage<M extends MapMessage<M, V>, V>
implements MultiFormatStr
public void asXml(final StringBuilder sb) {
sb.append("<Map>\n");
for (int i = 0; i < data.size(); i++) {
- sb.append(" <Entry
key=\"").append(data.getKeyAt(i)).append("\">");
- final int size = sb.length();
+ sb.append(" <Entry key=\"");
+ int start = sb.length();
+ sb.append(data.getKeyAt(i));
+ StringBuilders.escapeXml(sb, start);
+ sb.append("\">");
+ start = sb.length();
ParameterFormatter.recursiveDeepToString(data.getValueAt(i), sb);
- StringBuilders.escapeXml(sb, size);
+ StringBuilders.escapeXml(sb, start);
sb.append("</Entry>\n");
}
sb.append("</Map>");
diff --git
a/log4j-api/src/main/java/org/apache/logging/log4j/util/StringBuilders.java
b/log4j-api/src/main/java/org/apache/logging/log4j/util/StringBuilders.java
index 8f697d53f1..c835e17eeb 100644
--- a/log4j-api/src/main/java/org/apache/logging/log4j/util/StringBuilders.java
+++ b/log4j-api/src/main/java/org/apache/logging/log4j/util/StringBuilders.java
@@ -27,6 +27,8 @@ import java.util.Map.Entry;
@InternalApi
public final class StringBuilders {
+ private static final char REPLACEMENT_CHAR = '\uFFFD';
+
private static final Class<?> timeClass;
private static final Class<?> dateClass;
@@ -310,8 +312,8 @@ public final class StringBuilders {
*/
public static void escapeXml(final StringBuilder toAppendTo, final int
start) {
int escapeCount = 0;
- for (int i = start; i < toAppendTo.length(); i++) {
- final char c = toAppendTo.charAt(i);
+ for (int i = start; i < toAppendTo.length(); ) {
+ final int c = toAppendTo.codePointAt(i);
switch (c) {
case '&':
escapeCount += 4;
@@ -323,15 +325,36 @@ public final class StringBuilders {
case '"':
case '\'':
escapeCount += 5;
+ break;
+ default:
+ // All invalid XML 1.0 characters have the same length as
the replacement character
+ // Therefore no additional adjustment is needed
}
+ i += Character.charCount(c);
}
final int lastChar = toAppendTo.length() - 1;
+ if (lastChar < 0) {
+ return;
+ }
toAppendTo.setLength(toAppendTo.length() + escapeCount);
int lastPos = toAppendTo.length() - 1;
- for (int i = lastChar; lastPos > i; i--) {
+ for (int i = lastChar; lastPos >= start; i--) {
final char c = toAppendTo.charAt(i);
+ // Handle surrogate pairs and invalid low surrogates
+ if (i > 0 && Character.isLowSurrogate(c)) {
+ final char previous = toAppendTo.charAt(i - 1);
+ // Invalid low surrogate
+ if (!Character.isHighSurrogate(previous)) {
+ toAppendTo.setCharAt(lastPos--, REPLACEMENT_CHAR);
+ } else {
+ toAppendTo.setCharAt(lastPos--, c);
+ toAppendTo.setCharAt(lastPos--, previous);
+ i--;
+ }
+ continue;
+ }
switch (c) {
case '&':
toAppendTo.setCharAt(lastPos--, ';');
@@ -369,8 +392,32 @@ public final class StringBuilders {
toAppendTo.setCharAt(lastPos--, '&');
break;
default:
- toAppendTo.setCharAt(lastPos--, c);
+ toAppendTo.setCharAt(lastPos--, isValidXml10(c) ? c :
REPLACEMENT_CHAR);
}
}
}
+
+ /**
+ * Checks if a BMP {@code char} is a valid XML 1.0 character.
+ *
+ * <p>This method is restricted to characters in the BMP, i.e. represented
by one UTF-16 code unit.</p>
+ *
+ * @param ch a BMP {@code char} to validate
+ * @return {@code true} if it is a valid XML 1.0 character
+ */
+ private static boolean isValidXml10(final char ch) {
+ // XML 1.0 valid characters (Fifth Edition):
+ // #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] |
[#x10000-#x10FFFF]
+
+ // [#x20–#xD7FF] (placed early as a fast path for the most common case)
+ return (ch >= ' ' && ch < Character.MIN_SURROGATE)
+ // #x9
+ || ch == '\t'
+ // #xA
+ || ch == '\n'
+ // #xD
+ || ch == '\r'
+ // [#xE000-#xFFFD]
+ || (ch > Character.MAX_SURROGATE && ch <= 0xFFFD);
+ }
}
diff --git a/src/changelog/.2.x.x/4079_map-message-control-characters.xml
b/src/changelog/.2.x.x/4079_map-message-control-characters.xml
new file mode 100644
index 0000000000..1365e5f167
--- /dev/null
+++ b/src/changelog/.2.x.x/4079_map-message-control-characters.xml
@@ -0,0 +1,12 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<entry xmlns="https://logging.apache.org/xml/ns"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="
+ https://logging.apache.org/xml/ns
+ https://logging.apache.org/xml/ns/log4j-changelog-0.xsd"
+ type="fixed">
+ <issue id="4079"
link="https://github.com/apache/logging-log4j2/pull/4079"/>
+ <description format="asciidoc">
+ Replace invalid characters in MapMessage.asXml() output with the
Unicode replacement character (U+FFFD).
+ </description>
+</entry>