This is an automated email from the ASF dual-hosted git repository. pkarwasz pushed a commit to branch fix/2.25.x/map-message-control-characters in repository https://gitbox.apache.org/repos/asf/logging-log4j2.git
commit d65566954c15b4e46c4694e64dc935a786b474c0 Author: Piotr P. Karwasz <[email protected]> AuthorDate: Tue Mar 24 18:31:54 2026 +0100 MapMessage.asXml(): replace invalid XML characters with U+FFFD This change sanitizes the output of `MapMessage.asXml()` by replacing characters not permitted in XML 1.0 with the Unicode replacement character (`U+FFFD`). Although `MapMessage.asXml()` is not currently used by any layout, this aligns its behavior with `XmlLayout` and `Log4j1XmlLayout`, ensuring consistent and well-formed XML output across the codebase. --- .../logging/log4j/message/MapMessageTest.java | 23 +++++++-- .../logging/log4j/util/StringBuildersTest.java | 41 +++++++++++++--- .../apache/logging/log4j/message/MapMessage.java | 10 ++-- .../apache/logging/log4j/util/StringBuilders.java | 55 ++++++++++++++++++++-- .../.2.x.x/map-message-control-characters.xml | 11 +++++ 5 files changed, 122 insertions(+), 18 deletions(-) diff --git a/log4j-api-test/src/test/java/org/apache/logging/log4j/message/MapMessageTest.java b/log4j-api-test/src/test/java/org/apache/logging/log4j/message/MapMessageTest.java index a36bb6b4ae..db29a18949 100644 --- a/log4j-api-test/src/test/java/org/apache/logging/log4j/message/MapMessageTest.java +++ b/log4j-api-test/src/test/java/org/apache/logging/log4j/message/MapMessageTest.java @@ -16,6 +16,7 @@ */ package org.apache.logging.log4j.message; +import static org.assertj.core.api.Assertions.assertThat; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertThrows; @@ -72,12 +73,26 @@ class MapMessageTest { @Test void testXMLEscape() { - final String testMsg = "Test message <foo>"; + final String notBmp = new String(Character.toChars(0x10000)); + final String invalid = "A\uD800B\uDE00C\0\1\2\3"; + final String expectedInvalid = "A\uFFFDB\uFFFDC\uFFFD\uFFFD\uFFFD\uFFFD"; + final String key = "k<e&y> '\"\t\r\n" + notBmp + invalid; + final String value = "v>al<u& '\"\t\r\n" + notBmp + invalid; final StringMapMessage msg = new StringMapMessage(); - msg.put("message", testMsg); + msg.put(key, value); final String result = msg.getFormattedMessage(new String[] {"XML"}); - final String expected = "<Map>\n <Entry key=\"message\">Test message <foo></Entry>\n" + "</Map>"; - assertEquals(expected, result); + + assertThat(result) + .isEqualTo( + "<Map>\n" // + + " <Entry key=\"k<e&y> '"\t\r\n" + + notBmp + + expectedInvalid + + "\">v>al<u& '"\t\r\n" + + notBmp + + expectedInvalid + + "</Entry>\n" // + + "</Map>"); } @Test diff --git a/log4j-api-test/src/test/java/org/apache/logging/log4j/util/StringBuildersTest.java b/log4j-api-test/src/test/java/org/apache/logging/log4j/util/StringBuildersTest.java index 8722dd33e4..1dee8b5dfa 100644 --- a/log4j-api-test/src/test/java/org/apache/logging/log4j/util/StringBuildersTest.java +++ b/log4j-api-test/src/test/java/org/apache/logging/log4j/util/StringBuildersTest.java @@ -16,10 +16,15 @@ */ package org.apache.logging.log4j.util; +import static org.assertj.core.api.Assertions.assertThat; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertTrue; +import java.util.stream.Stream; import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; /** * Tests the StringBuilders class. @@ -79,15 +84,37 @@ class StringBuildersTest { assertEquals(jsonValueEscaped, sb.toString()); } - @Test - void escapeXMLCharactersCorrectly() { - final String xmlValueNotEscaped = "<\"Salt&Peppa'\">"; - final String xmlValueEscaped = "<"Salt&Peppa'">"; + static Stream<Arguments> escapeXmlCharactersCorrectly() { + final char replacement = '\uFFFD'; + return Stream.of( + // Empty + Arguments.of("", ""), + // characters that need to be escaped + Arguments.of("<\"Salt&Peppa'\">", "<"Salt&Peppa'">"), + // control character replaced with U+FFFD + Arguments.of("A" + (char) 0x01 + "B", "A" + replacement + "B"), + // standalone low surrogate replaced with U+FFFD + Arguments.of("low" + Character.MIN_SURROGATE + "surrogate", "low" + replacement + "surrogate"), + Arguments.of(Character.MIN_SURROGATE + "low", replacement + "low"), + // standalone high surrogate replaced with U+FFFD + Arguments.of("high" + Character.MAX_SURROGATE + "surrogate", "high" + replacement + "surrogate"), + Arguments.of(Character.MAX_SURROGATE + "high", replacement + "high"), + // FFFE and FFFF + Arguments.of("invalid\uFFFEchars", "invalid" + replacement + "chars"), + Arguments.of("invalid\uFFFFchars", "invalid" + replacement + "chars"), + // whitespace characters are preserved + Arguments.of("tab\tnewline\ncr\r", "tab\tnewline\ncr\r"), + // character beyond BMP (emoji) preserved as surrogate pair + Arguments.of("emoji " + "\uD83D\uDE00" + " end", "emoji " + "\uD83D\uDE00" + " end")); + } + @ParameterizedTest + @MethodSource + void escapeXmlCharactersCorrectly(final String input, final String expected) { final StringBuilder sb = new StringBuilder(); - sb.append(xmlValueNotEscaped); - assertEquals(xmlValueNotEscaped, sb.toString()); + sb.append(input); + assertThat(sb.toString()).isEqualTo(input); StringBuilders.escapeXml(sb, 0); - assertEquals(xmlValueEscaped, sb.toString()); + assertThat(sb.toString()).isEqualTo(expected); } } diff --git a/log4j-api/src/main/java/org/apache/logging/log4j/message/MapMessage.java b/log4j-api/src/main/java/org/apache/logging/log4j/message/MapMessage.java index f1144cee62..c78d59a0b5 100644 --- a/log4j-api/src/main/java/org/apache/logging/log4j/message/MapMessage.java +++ b/log4j-api/src/main/java/org/apache/logging/log4j/message/MapMessage.java @@ -358,10 +358,14 @@ public class MapMessage<M extends MapMessage<M, V>, V> implements MultiFormatStr public void asXml(final StringBuilder sb) { sb.append("<Map>\n"); for (int i = 0; i < data.size(); i++) { - sb.append(" <Entry key=\"").append(data.getKeyAt(i)).append("\">"); - final int size = sb.length(); + sb.append(" <Entry key=\""); + int start = sb.length(); + sb.append(data.getKeyAt(i)); + StringBuilders.escapeXml(sb, start); + sb.append("\">"); + start = sb.length(); ParameterFormatter.recursiveDeepToString(data.getValueAt(i), sb); - StringBuilders.escapeXml(sb, size); + StringBuilders.escapeXml(sb, start); sb.append("</Entry>\n"); } sb.append("</Map>"); diff --git a/log4j-api/src/main/java/org/apache/logging/log4j/util/StringBuilders.java b/log4j-api/src/main/java/org/apache/logging/log4j/util/StringBuilders.java index 8f697d53f1..1ee2012cc7 100644 --- a/log4j-api/src/main/java/org/apache/logging/log4j/util/StringBuilders.java +++ b/log4j-api/src/main/java/org/apache/logging/log4j/util/StringBuilders.java @@ -27,6 +27,8 @@ import java.util.Map.Entry; @InternalApi public final class StringBuilders { + private static final char REPLACEMENT_CHAR = '\uFFFD'; + private static final Class<?> timeClass; private static final Class<?> dateClass; @@ -310,8 +312,8 @@ public final class StringBuilders { */ public static void escapeXml(final StringBuilder toAppendTo, final int start) { int escapeCount = 0; - for (int i = start; i < toAppendTo.length(); i++) { - final char c = toAppendTo.charAt(i); + for (int i = start; i < toAppendTo.length(); ) { + final int c = toAppendTo.codePointAt(i); switch (c) { case '&': escapeCount += 4; @@ -323,15 +325,36 @@ public final class StringBuilders { case '"': case '\'': escapeCount += 5; + break; + default: + // All invalid XML 1.0 characters have the same length as the replacement character + // Therefore no additional adjustment is needed } + i += Character.charCount(c); } final int lastChar = toAppendTo.length() - 1; + if (lastChar < 0) { + return; + } toAppendTo.setLength(toAppendTo.length() + escapeCount); int lastPos = toAppendTo.length() - 1; - for (int i = lastChar; lastPos > i; i--) { + for (int i = lastChar; lastPos >= start; i--) { final char c = toAppendTo.charAt(i); + // Handle surrogate pairs and invalid low surrogates + if (i > 0 && Character.isLowSurrogate(c)) { + final char previous = toAppendTo.charAt(i - 1); + // Invalid low surrogate + if (!Character.isHighSurrogate(previous)) { + toAppendTo.setCharAt(lastPos--, REPLACEMENT_CHAR); + } else { + toAppendTo.setCharAt(lastPos--, c); + toAppendTo.setCharAt(lastPos--, previous); + i--; + } + continue; + } switch (c) { case '&': toAppendTo.setCharAt(lastPos--, ';'); @@ -369,8 +392,32 @@ public final class StringBuilders { toAppendTo.setCharAt(lastPos--, '&'); break; default: - toAppendTo.setCharAt(lastPos--, c); + toAppendTo.setCharAt(lastPos--, isValidXml10(c) ? c : REPLACEMENT_CHAR); } } } + + /** + * Checks if a code point is a valid XML 1.0 character + * + * <p>This method is restricted to characters in the BMP, i.e. represented by one UTF-16 code unit.</p> + * + * @param codePoint a code point + * @return {@code true} if it is a valid XML 1.0 code point + */ + private static boolean isValidXml10(final char codePoint) { + // XML 1.0 valid characters (Fifth Edition): + // #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] + + // [#x20–#xD7FF] (placed early as a fast path for the most common case) + return (codePoint >= ' ' && codePoint < Character.MIN_SURROGATE) + // #x9 + || codePoint == '\t' + // #xA + || codePoint == '\n' + // #xD + || codePoint == '\r' + // [#xE000-#xFFFD] + || (codePoint > Character.MAX_SURROGATE && codePoint <= 0xFFFD); + } } diff --git a/src/changelog/.2.x.x/map-message-control-characters.xml b/src/changelog/.2.x.x/map-message-control-characters.xml new file mode 100644 index 0000000000..80715b70fc --- /dev/null +++ b/src/changelog/.2.x.x/map-message-control-characters.xml @@ -0,0 +1,11 @@ +<?xml version="1.0" encoding="UTF-8"?> +<entry xmlns="https://logging.apache.org/xml/ns" + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation=" + https://logging.apache.org/xml/ns + https://logging.apache.org/xml/ns/log4j-changelog-0.xsd" + type="fixed"> + <description format="asciidoc"> + Replace invalid characters in MapMessage.asXml() output with the Unicode replacement character (U+FFFD). + </description> +</entry>
