This is an automated email from the ASF dual-hosted git repository.

pkarwasz pushed a commit to branch fix/2.25.x/xml-control-characters
in repository https://gitbox.apache.org/repos/asf/logging-log4j2.git

commit 7948eca29c4284edb56483d01141def6d3b0cf4e
Author: Piotr P. Karwasz <[email protected]>
AuthorDate: Tue Mar 10 21:11:20 2026 +0100

    XmlLayout: replace invalid XML characters with U+FFFD
    
    This change sanitizes the output of `XmlLayout` by replacing characters 
that are not permitted in XML 1.0 with the Unicode replacement character 
(`U+FFFD`).
    
    This guarantees that the generated log output is always well-formed XML and 
can be parsed by any XML 1.0–compliant parser, even when log data contains 
control characters or other invalid code points.
    
    Co-authored-by: Volkan Yazıcı <[email protected]>
---
 .../logging/log4j/core/layout/XmlLayoutTest.java   |  89 ++++++++++++
 log4j-core/pom.xml                                 |   7 +-
 .../log4j/core/jackson/Log4jXmlObjectMapper.java   | 149 ++++++++++++++++++++-
 log4j-parent/pom.xml                               |   7 +
 src/changelog/.2.x.x/xml-control-characters.xml    |  11 ++
 5 files changed, 261 insertions(+), 2 deletions(-)

diff --git 
a/log4j-core-test/src/test/java/org/apache/logging/log4j/core/layout/XmlLayoutTest.java
 
b/log4j-core-test/src/test/java/org/apache/logging/log4j/core/layout/XmlLayoutTest.java
index 0f246d2838..cda4ce851c 100644
--- 
a/log4j-core-test/src/test/java/org/apache/logging/log4j/core/layout/XmlLayoutTest.java
+++ 
b/log4j-core-test/src/test/java/org/apache/logging/log4j/core/layout/XmlLayoutTest.java
@@ -37,6 +37,7 @@ import org.apache.logging.log4j.core.Appender;
 import org.apache.logging.log4j.core.Logger;
 import org.apache.logging.log4j.core.LoggerContext;
 import org.apache.logging.log4j.core.config.ConfigurationFactory;
+import org.apache.logging.log4j.core.impl.ContextDataFactory;
 import org.apache.logging.log4j.core.impl.Log4jLogEvent;
 import org.apache.logging.log4j.core.jackson.Log4jXmlObjectMapper;
 import org.apache.logging.log4j.core.lookup.JavaLookup;
@@ -46,13 +47,18 @@ import 
org.apache.logging.log4j.core.test.categories.Layouts;
 import org.apache.logging.log4j.core.util.KeyValuePair;
 import org.apache.logging.log4j.message.SimpleMessage;
 import org.apache.logging.log4j.spi.AbstractLogger;
+import org.apache.logging.log4j.spi.DefaultThreadContextStack;
 import org.apache.logging.log4j.test.junit.ThreadContextRule;
+import org.apache.logging.log4j.util.StringMap;
+import org.assertj.core.api.Assertions;
 import org.junit.AfterClass;
 import org.junit.Assert;
 import org.junit.BeforeClass;
 import org.junit.Rule;
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.ValueSource;
 
 /**
  * Tests {@link XmlLayout}.
@@ -397,4 +403,87 @@ public class XmlLayoutTest {
         final String str = 
layout.toSerializable(LogEventFixtures.createLogEvent());
         assertFalse(str.endsWith("\0"));
     }
+
+    // In Java 11 this can be replaced with Character.toString
+    private static String codePointToString(final int codePoint) {
+        return new String(Character.toChars(codePoint));
+    }
+
+    private static Log4jLogEvent createLogEventWithString(final String str) {
+        final Marker marker = MarkerManager.getMarker("marker" + str);
+
+        final RuntimeException thrown = new RuntimeException("thrown" + str);
+        thrown.addSuppressed(new IllegalStateException("suppressed" + str));
+
+        final StringMap contextData = ContextDataFactory.createContextData();
+        contextData.putValue("mdcKey" + str, "mdcValue" + str);
+
+        final DefaultThreadContextStack contextStack = new 
DefaultThreadContextStack();
+        contextStack.clear();
+        contextStack.push("contextStack" + str);
+
+        final StackTraceElement source =
+                new StackTraceElement("class" + str, "method" + str, "file" + 
str + ".java", 123);
+
+        return Log4jLogEvent.newBuilder()
+                .setLoggerName("logger" + str)
+                .setMarker(marker)
+                .setLoggerFqcn("fqcn" + str)
+                .setLevel(Level.DEBUG)
+                .setMessage(new SimpleMessage("message" + str))
+                .setThrown(thrown)
+                .setContextData(contextData)
+                .setContextStack(contextStack)
+                .setThreadName("thread" + str)
+                .setSource(source)
+                .setTimeMillis(1L)
+                .build();
+    }
+
+    @ParameterizedTest
+    @ValueSource(
+            strings = {
+                "\u0000",
+                "\u001F",
+                // hi surrogate
+                "\uD800",
+                // low surrogate
+                "\uDC00",
+                // invalid chars
+                "\uFFFE",
+                "\uFFFF"
+            })
+    void testInvalidXmlCharsAreSanitized(final String invalidXmlChars) {
+        final Log4jLogEvent event = createLogEventWithString(invalidXmlChars);
+        final AbstractJacksonLayout layout = XmlLayout.newBuilder()
+                .setCompact(true)
+                .setIncludeStacktrace(true)
+                .setLocationInfo(true)
+                .setProperties(true)
+                .build();
+        final String str = layout.toSerializable(event);
+        
Assertions.assertThat(str).doesNotContain(invalidXmlChars).contains("\uFFFD");
+    }
+
+    @ParameterizedTest
+    @ValueSource(
+            strings = {
+                " ",
+                "A",
+                // First character from supplementary plane
+                "\uD801\uDC00",
+                // Last character from supplementary plane
+                "\uDBFF\uDFFF"
+            })
+    void testValidXmlCharsAreKept(final String validXmlChars) {
+        final Log4jLogEvent event = createLogEventWithString(validXmlChars);
+        final AbstractJacksonLayout layout = XmlLayout.newBuilder()
+                .setCompact(true)
+                .setIncludeStacktrace(true)
+                .setLocationInfo(true)
+                .setProperties(true)
+                .build();
+        final String str = layout.toSerializable(event);
+        
Assertions.assertThat(str).contains(validXmlChars).doesNotContain("\uFFFD");
+    }
 }
diff --git a/log4j-core/pom.xml b/log4j-core/pom.xml
index 49293f47df..b2f34d4ab9 100644
--- a/log4j-core/pom.xml
+++ b/log4j-core/pom.xml
@@ -66,7 +66,7 @@
       org.apache.commons.compress.*;resolution:=optional,
       org.apache.commons.csv;resolution:=optional,
       org.apache.kafka.*;resolution:=optional,
-      org.codehaus.stax2;resolution:=optional,
+      org.codehaus.stax2.*;resolution:=optional,
       org.jctools.*;resolution:=optional,
       org.zeromq;resolution:=optional,
       javax.lang.model.*;resolution:=optional,
@@ -217,6 +217,11 @@
       <scope>runtime</scope>
       <optional>true</optional>
     </dependency>
+    <dependency>
+      <groupId>org.codehaus.woodstox</groupId>
+      <artifactId>stax2-api</artifactId>
+      <optional>true</optional>
+    </dependency>
   </dependencies>
 
   <build>
diff --git 
a/log4j-core/src/main/java/org/apache/logging/log4j/core/jackson/Log4jXmlObjectMapper.java
 
b/log4j-core/src/main/java/org/apache/logging/log4j/core/jackson/Log4jXmlObjectMapper.java
index fa36d1d425..086b2546d9 100644
--- 
a/log4j-core/src/main/java/org/apache/logging/log4j/core/jackson/Log4jXmlObjectMapper.java
+++ 
b/log4j-core/src/main/java/org/apache/logging/log4j/core/jackson/Log4jXmlObjectMapper.java
@@ -17,8 +17,18 @@
 package org.apache.logging.log4j.core.jackson;
 
 import com.fasterxml.jackson.annotation.JsonInclude;
+import com.fasterxml.jackson.core.io.IOContext;
 import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.dataformat.xml.XmlFactory;
 import com.fasterxml.jackson.dataformat.xml.XmlMapper;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.io.Writer;
+import javax.xml.stream.XMLStreamException;
+import javax.xml.stream.XMLStreamWriter;
+import org.codehaus.stax2.XMLStreamWriter2;
+import org.codehaus.stax2.ri.Stax2WriterAdapter;
+import org.codehaus.stax2.util.StreamWriter2Delegate;
 
 /**
  * A Jackson XML {@link ObjectMapper} initialized for Log4j.
@@ -41,7 +51,144 @@ public class Log4jXmlObjectMapper extends XmlMapper {
      * Create a new instance using the {@link Log4jXmlModule}.
      */
     public Log4jXmlObjectMapper(final boolean includeStacktrace, final boolean 
stacktraceAsString) {
-        super(new Log4jXmlModule(includeStacktrace, stacktraceAsString));
+        super(new SanitizingXmlFactory(), new 
Log4jXmlModule(includeStacktrace, stacktraceAsString));
         this.setSerializationInclusion(JsonInclude.Include.NON_EMPTY);
     }
+
+    /**
+     * Writer that sanitizes text to be valid XML 1.0 by replacing disallowed 
code points with the replacement character (U+FFFD).
+     */
+    private static final class SanitizingWriter extends StreamWriter2Delegate {
+
+        private static final char REPLACEMENT_CHAR = '\uFFFD';
+
+        SanitizingWriter(final XMLStreamWriter2 delegate) {
+            super(delegate);
+            setParent(delegate);
+        }
+
+        @Override
+        public void writeAttribute(final String localName, final String value) 
throws XMLStreamException {
+            super.writeAttribute(localName, sanitizeXml10(value));
+        }
+
+        @Override
+        public void writeAttribute(final String namespaceURI, final String 
localName, final String value)
+                throws XMLStreamException {
+            super.writeAttribute(namespaceURI, localName, 
sanitizeXml10(value));
+        }
+
+        @Override
+        public void writeAttribute(
+                final String prefix, final String namespaceURI, final String 
localName, final String value)
+                throws XMLStreamException {
+            super.writeAttribute(prefix, namespaceURI, localName, 
sanitizeXml10(value));
+        }
+
+        @Override
+        public void writeCData(String text) throws XMLStreamException {
+            super.writeCData(sanitizeXml10(text));
+        }
+
+        @Override
+        public void writeCData(char[] text, int start, int len) throws 
XMLStreamException {
+            super.writeCData(sanitizeXml10(text, start, len));
+        }
+
+        @Override
+        public void writeCharacters(final String text) throws 
XMLStreamException {
+            super.writeCharacters(sanitizeXml10(text));
+        }
+
+        @Override
+        public void writeCharacters(final char[] text, final int start, final 
int len) throws XMLStreamException {
+            super.writeCharacters(sanitizeXml10(text, start, len));
+        }
+
+        @Override
+        public void writeComment(String text) throws XMLStreamException {
+            super.writeComment(sanitizeXml10(text));
+        }
+
+        private static String sanitizeXml10(final String input) {
+            if (input == null) {
+                return null;
+            }
+            final int length = input.length();
+            // Only create a new string if we find an invalid code point.
+            // In the common case, this should avoid unnecessary allocations.
+            for (int i = 0; i < length; ) {
+                final int cp = input.codePointAt(i);
+                if (!isValidXml10(cp)) {
+                    final StringBuilder out = new StringBuilder(length);
+                    out.append(input, 0, i);
+                    appendSanitized(input, i, length, out);
+                    return out.toString();
+                }
+                i += Character.charCount(cp);
+            }
+            return input;
+        }
+
+        private static String sanitizeXml10(final char[] input, final int 
start, final int len) {
+            return sanitizeXml10(new String(input, start, len));
+        }
+
+        private static void appendSanitized(final String input, int i, final 
int length, final StringBuilder out) {
+            while (i < length) {
+                final int cp = input.codePointAt(i);
+                out.appendCodePoint(isValidXml10(cp) ? cp : REPLACEMENT_CHAR);
+                i += Character.charCount(cp);
+            }
+        }
+
+        /**
+         * Checks if a code point is valid
+         *
+         * @param codePoint a code point between {@code 0} and {@link 
Character#MAX_CODE_POINT}
+         * @return {@code true} if it is a valid XML 1.0 code point
+         */
+        private static boolean isValidXml10(final int codePoint) {
+            assert codePoint >= 0 && codePoint <= Character.MAX_CODE_POINT;
+            // XML 1.0 valid characters (Fifth Edition):
+            //   #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | 
[#x10000-#x10FFFF]
+
+            // [#x20–#xD7FF] (placed early as a fast path for the most common 
case)
+            return (codePoint >= ' ' && codePoint < Character.MIN_SURROGATE)
+                    // #x9
+                    || codePoint == '\t'
+                    // #xA
+                    || codePoint == '\n'
+                    // #xD
+                    || codePoint == '\r'
+                    // [#xE000-#xFFFD]
+                    || (codePoint > Character.MAX_SURROGATE && codePoint <= 
0xFFFD)
+                    // [#x10000-#x10FFFF]
+                    || codePoint >= Character.MIN_SUPPLEMENTARY_CODE_POINT;
+        }
+    }
+
+    /**
+     * Factory that creates {@link SanitizingWriter} instances to ensure that 
all text written to the XML output is valid XML 1.0.
+     */
+    private static final class SanitizingXmlFactory extends XmlFactory {
+
+        private static final long serialVersionUID = 1L;
+
+        @Override
+        protected XMLStreamWriter _createXmlWriter(final IOContext ctxt, final 
Writer w) throws IOException {
+            return new 
SanitizingWriter(Stax2WriterAdapter.wrapIfNecessary(super._createXmlWriter(ctxt,
 w)));
+        }
+
+        @Override
+        protected XMLStreamWriter _createXmlWriter(final IOContext ctxt, final 
OutputStream out) throws IOException {
+            return new 
SanitizingWriter(Stax2WriterAdapter.wrapIfNecessary(super._createXmlWriter(ctxt,
 out)));
+        }
+
+        @Override
+        public XmlFactory copy() {
+            _checkInvalidCopy(SanitizingXmlFactory.class);
+            return new SanitizingXmlFactory();
+        }
+    }
 }
diff --git a/log4j-parent/pom.xml b/log4j-parent/pom.xml
index e939009ee4..fea7d2293a 100644
--- a/log4j-parent/pom.xml
+++ b/log4j-parent/pom.xml
@@ -134,6 +134,7 @@
     <plexus-utils.version>3.6.0</plexus-utils.version>
     <spring-boot.version>2.7.18</spring-boot.version>
     <spring-framework.version>5.3.39</spring-framework.version>
+    <stax2-api.version>4.2.2</stax2-api.version>
     <system-stubs.version>2.0.3</system-stubs.version>
     <velocity.version>1.7</velocity.version>
     <wiremock.version>2.35.2</wiremock.version>
@@ -790,6 +791,12 @@
         </exclusions>
       </dependency>
 
+      <dependency>
+        <groupId>org.codehaus.woodstox</groupId>
+        <artifactId>stax2-api</artifactId>
+        <version>${stax2-api.version}</version>
+      </dependency>
+
       <dependency>
         <groupId>uk.org.webcompere</groupId>
         <artifactId>system-stubs-core</artifactId>
diff --git a/src/changelog/.2.x.x/xml-control-characters.xml 
b/src/changelog/.2.x.x/xml-control-characters.xml
new file mode 100644
index 0000000000..846b1182db
--- /dev/null
+++ b/src/changelog/.2.x.x/xml-control-characters.xml
@@ -0,0 +1,11 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<entry xmlns="https://logging.apache.org/xml/ns";
+       xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance";
+       xsi:schemaLocation="
+           https://logging.apache.org/xml/ns
+           https://logging.apache.org/xml/ns/log4j-changelog-0.xsd";
+       type="fixed">
+    <description format="asciidoc">
+        Replace invalid characters in XmlLayout output with the Unicode 
replacement character (U+FFFD).
+    </description>
+</entry>

Reply via email to