This is an automated email from the ASF dual-hosted git repository.

pkarwasz pushed a commit to branch fix/2.25.x/map-message-control-characters
in repository https://gitbox.apache.org/repos/asf/logging-log4j2.git

commit d65566954c15b4e46c4694e64dc935a786b474c0
Author: Piotr P. Karwasz <[email protected]>
AuthorDate: Tue Mar 24 18:31:54 2026 +0100

    MapMessage.asXml(): replace invalid XML characters with U+FFFD
    
    This change sanitizes the output of `MapMessage.asXml()` by replacing 
characters not permitted in XML 1.0 with the Unicode replacement character 
(`U+FFFD`).
    
    Although `MapMessage.asXml()` is not currently used by any layout, this 
aligns its behavior with `XmlLayout` and `Log4j1XmlLayout`, ensuring consistent 
and well-formed XML output across the codebase.
---
 .../logging/log4j/message/MapMessageTest.java      | 23 +++++++--
 .../logging/log4j/util/StringBuildersTest.java     | 41 +++++++++++++---
 .../apache/logging/log4j/message/MapMessage.java   | 10 ++--
 .../apache/logging/log4j/util/StringBuilders.java  | 55 ++++++++++++++++++++--
 .../.2.x.x/map-message-control-characters.xml      | 11 +++++
 5 files changed, 122 insertions(+), 18 deletions(-)

diff --git 
a/log4j-api-test/src/test/java/org/apache/logging/log4j/message/MapMessageTest.java
 
b/log4j-api-test/src/test/java/org/apache/logging/log4j/message/MapMessageTest.java
index a36bb6b4ae..db29a18949 100644
--- 
a/log4j-api-test/src/test/java/org/apache/logging/log4j/message/MapMessageTest.java
+++ 
b/log4j-api-test/src/test/java/org/apache/logging/log4j/message/MapMessageTest.java
@@ -16,6 +16,7 @@
  */
 package org.apache.logging.log4j.message;
 
+import static org.assertj.core.api.Assertions.assertThat;
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertThrows;
 
@@ -72,12 +73,26 @@ class MapMessageTest {
 
     @Test
     void testXMLEscape() {
-        final String testMsg = "Test message <foo>";
+        final String notBmp = new String(Character.toChars(0x10000));
+        final String invalid = "A\uD800B\uDE00C\0\1\2\3";
+        final String expectedInvalid = 
"A\uFFFDB\uFFFDC\uFFFD\uFFFD\uFFFD\uFFFD";
+        final String key = "k<e&y> '\"\t\r\n" + notBmp + invalid;
+        final String value = "v>al<u& '\"\t\r\n" + notBmp + invalid;
         final StringMapMessage msg = new StringMapMessage();
-        msg.put("message", testMsg);
+        msg.put(key, value);
         final String result = msg.getFormattedMessage(new String[] {"XML"});
-        final String expected = "<Map>\n  <Entry key=\"message\">Test message 
&lt;foo&gt;</Entry>\n" + "</Map>";
-        assertEquals(expected, result);
+
+        assertThat(result)
+                .isEqualTo(
+                        "<Map>\n" //
+                                + "  <Entry key=\"k&lt;e&amp;y&gt; 
&apos;&quot;\t\r\n"
+                                + notBmp
+                                + expectedInvalid
+                                + "\">v&gt;al&lt;u&amp; &apos;&quot;\t\r\n"
+                                + notBmp
+                                + expectedInvalid
+                                + "</Entry>\n" //
+                                + "</Map>");
     }
 
     @Test
diff --git 
a/log4j-api-test/src/test/java/org/apache/logging/log4j/util/StringBuildersTest.java
 
b/log4j-api-test/src/test/java/org/apache/logging/log4j/util/StringBuildersTest.java
index 8722dd33e4..1dee8b5dfa 100644
--- 
a/log4j-api-test/src/test/java/org/apache/logging/log4j/util/StringBuildersTest.java
+++ 
b/log4j-api-test/src/test/java/org/apache/logging/log4j/util/StringBuildersTest.java
@@ -16,10 +16,15 @@
  */
 package org.apache.logging.log4j.util;
 
+import static org.assertj.core.api.Assertions.assertThat;
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 
+import java.util.stream.Stream;
 import org.junit.jupiter.api.Test;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.Arguments;
+import org.junit.jupiter.params.provider.MethodSource;
 
 /**
  * Tests the StringBuilders class.
@@ -79,15 +84,37 @@ class StringBuildersTest {
         assertEquals(jsonValueEscaped, sb.toString());
     }
 
-    @Test
-    void escapeXMLCharactersCorrectly() {
-        final String xmlValueNotEscaped = "<\"Salt&Peppa'\">";
-        final String xmlValueEscaped = 
"&lt;&quot;Salt&amp;Peppa&apos;&quot;&gt;";
+    static Stream<Arguments> escapeXmlCharactersCorrectly() {
+        final char replacement = '\uFFFD';
+        return Stream.of(
+                // Empty
+                Arguments.of("", ""),
+                // characters that need to be escaped
+                Arguments.of("<\"Salt&Peppa'\">", 
"&lt;&quot;Salt&amp;Peppa&apos;&quot;&gt;"),
+                // control character replaced with U+FFFD
+                Arguments.of("A" + (char) 0x01 + "B", "A" + replacement + "B"),
+                // standalone low surrogate replaced with U+FFFD
+                Arguments.of("low" + Character.MIN_SURROGATE + "surrogate", 
"low" + replacement + "surrogate"),
+                Arguments.of(Character.MIN_SURROGATE + "low", replacement + 
"low"),
+                // standalone high surrogate replaced with U+FFFD
+                Arguments.of("high" + Character.MAX_SURROGATE + "surrogate", 
"high" + replacement + "surrogate"),
+                Arguments.of(Character.MAX_SURROGATE + "high", replacement + 
"high"),
+                // FFFE and FFFF
+                Arguments.of("invalid\uFFFEchars", "invalid" + replacement + 
"chars"),
+                Arguments.of("invalid\uFFFFchars", "invalid" + replacement + 
"chars"),
+                // whitespace characters are preserved
+                Arguments.of("tab\tnewline\ncr\r", "tab\tnewline\ncr\r"),
+                // character beyond BMP (emoji) preserved as surrogate pair
+                Arguments.of("emoji " + "\uD83D\uDE00" + " end", "emoji " + 
"\uD83D\uDE00" + " end"));
+    }
 
+    @ParameterizedTest
+    @MethodSource
+    void escapeXmlCharactersCorrectly(final String input, final String 
expected) {
         final StringBuilder sb = new StringBuilder();
-        sb.append(xmlValueNotEscaped);
-        assertEquals(xmlValueNotEscaped, sb.toString());
+        sb.append(input);
+        assertThat(sb.toString()).isEqualTo(input);
         StringBuilders.escapeXml(sb, 0);
-        assertEquals(xmlValueEscaped, sb.toString());
+        assertThat(sb.toString()).isEqualTo(expected);
     }
 }
diff --git 
a/log4j-api/src/main/java/org/apache/logging/log4j/message/MapMessage.java 
b/log4j-api/src/main/java/org/apache/logging/log4j/message/MapMessage.java
index f1144cee62..c78d59a0b5 100644
--- a/log4j-api/src/main/java/org/apache/logging/log4j/message/MapMessage.java
+++ b/log4j-api/src/main/java/org/apache/logging/log4j/message/MapMessage.java
@@ -358,10 +358,14 @@ public class MapMessage<M extends MapMessage<M, V>, V> 
implements MultiFormatStr
     public void asXml(final StringBuilder sb) {
         sb.append("<Map>\n");
         for (int i = 0; i < data.size(); i++) {
-            sb.append("  <Entry 
key=\"").append(data.getKeyAt(i)).append("\">");
-            final int size = sb.length();
+            sb.append("  <Entry key=\"");
+            int start = sb.length();
+            sb.append(data.getKeyAt(i));
+            StringBuilders.escapeXml(sb, start);
+            sb.append("\">");
+            start = sb.length();
             ParameterFormatter.recursiveDeepToString(data.getValueAt(i), sb);
-            StringBuilders.escapeXml(sb, size);
+            StringBuilders.escapeXml(sb, start);
             sb.append("</Entry>\n");
         }
         sb.append("</Map>");
diff --git 
a/log4j-api/src/main/java/org/apache/logging/log4j/util/StringBuilders.java 
b/log4j-api/src/main/java/org/apache/logging/log4j/util/StringBuilders.java
index 8f697d53f1..1ee2012cc7 100644
--- a/log4j-api/src/main/java/org/apache/logging/log4j/util/StringBuilders.java
+++ b/log4j-api/src/main/java/org/apache/logging/log4j/util/StringBuilders.java
@@ -27,6 +27,8 @@ import java.util.Map.Entry;
 @InternalApi
 public final class StringBuilders {
 
+    private static final char REPLACEMENT_CHAR = '\uFFFD';
+
     private static final Class<?> timeClass;
     private static final Class<?> dateClass;
 
@@ -310,8 +312,8 @@ public final class StringBuilders {
      */
     public static void escapeXml(final StringBuilder toAppendTo, final int 
start) {
         int escapeCount = 0;
-        for (int i = start; i < toAppendTo.length(); i++) {
-            final char c = toAppendTo.charAt(i);
+        for (int i = start; i < toAppendTo.length(); ) {
+            final int c = toAppendTo.codePointAt(i);
             switch (c) {
                 case '&':
                     escapeCount += 4;
@@ -323,15 +325,36 @@ public final class StringBuilders {
                 case '"':
                 case '\'':
                     escapeCount += 5;
+                    break;
+                default:
+                    // All invalid XML 1.0 characters have the same length as 
the replacement character
+                    // Therefore no additional adjustment is needed
             }
+            i += Character.charCount(c);
         }
 
         final int lastChar = toAppendTo.length() - 1;
+        if (lastChar < 0) {
+            return;
+        }
         toAppendTo.setLength(toAppendTo.length() + escapeCount);
         int lastPos = toAppendTo.length() - 1;
 
-        for (int i = lastChar; lastPos > i; i--) {
+        for (int i = lastChar; lastPos >= start; i--) {
             final char c = toAppendTo.charAt(i);
+            // Handle surrogate pairs and invalid low surrogates
+            if (i > 0 && Character.isLowSurrogate(c)) {
+                final char previous = toAppendTo.charAt(i - 1);
+                // Invalid low surrogate
+                if (!Character.isHighSurrogate(previous)) {
+                    toAppendTo.setCharAt(lastPos--, REPLACEMENT_CHAR);
+                } else {
+                    toAppendTo.setCharAt(lastPos--, c);
+                    toAppendTo.setCharAt(lastPos--, previous);
+                    i--;
+                }
+                continue;
+            }
             switch (c) {
                 case '&':
                     toAppendTo.setCharAt(lastPos--, ';');
@@ -369,8 +392,32 @@ public final class StringBuilders {
                     toAppendTo.setCharAt(lastPos--, '&');
                     break;
                 default:
-                    toAppendTo.setCharAt(lastPos--, c);
+                    toAppendTo.setCharAt(lastPos--, isValidXml10(c) ? c : 
REPLACEMENT_CHAR);
             }
         }
     }
+
+    /**
+     * Checks if a code point is a valid XML 1.0 character
+     *
+     * <p>This method is restricted to characters in the BMP, i.e. represented 
by one UTF-16 code unit.</p>
+     *
+     * @param codePoint a code point
+     * @return {@code true} if it is a valid XML 1.0 code point
+     */
+    private static boolean isValidXml10(final char codePoint) {
+        // XML 1.0 valid characters (Fifth Edition):
+        //   #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | 
[#x10000-#x10FFFF]
+
+        // [#x20–#xD7FF] (placed early as a fast path for the most common case)
+        return (codePoint >= ' ' && codePoint < Character.MIN_SURROGATE)
+                // #x9
+                || codePoint == '\t'
+                // #xA
+                || codePoint == '\n'
+                // #xD
+                || codePoint == '\r'
+                // [#xE000-#xFFFD]
+                || (codePoint > Character.MAX_SURROGATE && codePoint <= 
0xFFFD);
+    }
 }
diff --git a/src/changelog/.2.x.x/map-message-control-characters.xml 
b/src/changelog/.2.x.x/map-message-control-characters.xml
new file mode 100644
index 0000000000..80715b70fc
--- /dev/null
+++ b/src/changelog/.2.x.x/map-message-control-characters.xml
@@ -0,0 +1,11 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<entry xmlns="https://logging.apache.org/xml/ns";
+       xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance";
+       xsi:schemaLocation="
+           https://logging.apache.org/xml/ns
+           https://logging.apache.org/xml/ns/log4j-changelog-0.xsd";
+       type="fixed">
+    <description format="asciidoc">
+        Replace invalid characters in MapMessage.asXml() output with the 
Unicode replacement character (U+FFFD).
+    </description>
+</entry>

Reply via email to