This is an automated email from the ASF dual-hosted git repository. pkarwasz pushed a commit to branch fix/2.25.x/xml-control-characters in repository https://gitbox.apache.org/repos/asf/logging-log4j2.git
commit 7948eca29c4284edb56483d01141def6d3b0cf4e Author: Piotr P. Karwasz <[email protected]> AuthorDate: Tue Mar 10 21:11:20 2026 +0100 XmlLayout: replace invalid XML characters with U+FFFD This change sanitizes the output of `XmlLayout` by replacing characters that are not permitted in XML 1.0 with the Unicode replacement character (`U+FFFD`). This guarantees that the generated log output is always well-formed XML and can be parsed by any XML 1.0–compliant parser, even when log data contains control characters or other invalid code points. Co-authored-by: Volkan Yazıcı <[email protected]> --- .../logging/log4j/core/layout/XmlLayoutTest.java | 89 ++++++++++++ log4j-core/pom.xml | 7 +- .../log4j/core/jackson/Log4jXmlObjectMapper.java | 149 ++++++++++++++++++++- log4j-parent/pom.xml | 7 + src/changelog/.2.x.x/xml-control-characters.xml | 11 ++ 5 files changed, 261 insertions(+), 2 deletions(-) diff --git a/log4j-core-test/src/test/java/org/apache/logging/log4j/core/layout/XmlLayoutTest.java b/log4j-core-test/src/test/java/org/apache/logging/log4j/core/layout/XmlLayoutTest.java index 0f246d2838..cda4ce851c 100644 --- a/log4j-core-test/src/test/java/org/apache/logging/log4j/core/layout/XmlLayoutTest.java +++ b/log4j-core-test/src/test/java/org/apache/logging/log4j/core/layout/XmlLayoutTest.java @@ -37,6 +37,7 @@ import org.apache.logging.log4j.core.Appender; import org.apache.logging.log4j.core.Logger; import org.apache.logging.log4j.core.LoggerContext; import org.apache.logging.log4j.core.config.ConfigurationFactory; +import org.apache.logging.log4j.core.impl.ContextDataFactory; import org.apache.logging.log4j.core.impl.Log4jLogEvent; import org.apache.logging.log4j.core.jackson.Log4jXmlObjectMapper; import org.apache.logging.log4j.core.lookup.JavaLookup; @@ -46,13 +47,18 @@ import org.apache.logging.log4j.core.test.categories.Layouts; import org.apache.logging.log4j.core.util.KeyValuePair; import org.apache.logging.log4j.message.SimpleMessage; import org.apache.logging.log4j.spi.AbstractLogger; +import org.apache.logging.log4j.spi.DefaultThreadContextStack; import org.apache.logging.log4j.test.junit.ThreadContextRule; +import org.apache.logging.log4j.util.StringMap; +import org.assertj.core.api.Assertions; import org.junit.AfterClass; import org.junit.Assert; import org.junit.BeforeClass; import org.junit.Rule; import org.junit.Test; import org.junit.experimental.categories.Category; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; /** * Tests {@link XmlLayout}. @@ -397,4 +403,87 @@ public class XmlLayoutTest { final String str = layout.toSerializable(LogEventFixtures.createLogEvent()); assertFalse(str.endsWith("\0")); } + + // In Java 11 this can be replaced with Character.toString + private static String codePointToString(final int codePoint) { + return new String(Character.toChars(codePoint)); + } + + private static Log4jLogEvent createLogEventWithString(final String str) { + final Marker marker = MarkerManager.getMarker("marker" + str); + + final RuntimeException thrown = new RuntimeException("thrown" + str); + thrown.addSuppressed(new IllegalStateException("suppressed" + str)); + + final StringMap contextData = ContextDataFactory.createContextData(); + contextData.putValue("mdcKey" + str, "mdcValue" + str); + + final DefaultThreadContextStack contextStack = new DefaultThreadContextStack(); + contextStack.clear(); + contextStack.push("contextStack" + str); + + final StackTraceElement source = + new StackTraceElement("class" + str, "method" + str, "file" + str + ".java", 123); + + return Log4jLogEvent.newBuilder() + .setLoggerName("logger" + str) + .setMarker(marker) + .setLoggerFqcn("fqcn" + str) + .setLevel(Level.DEBUG) + .setMessage(new SimpleMessage("message" + str)) + .setThrown(thrown) + .setContextData(contextData) + .setContextStack(contextStack) + .setThreadName("thread" + str) + .setSource(source) + .setTimeMillis(1L) + .build(); + } + + @ParameterizedTest + @ValueSource( + strings = { + "\u0000", + "\u001F", + // hi surrogate + "\uD800", + // low surrogate + "\uDC00", + // invalid chars + "\uFFFE", + "\uFFFF" + }) + void testInvalidXmlCharsAreSanitized(final String invalidXmlChars) { + final Log4jLogEvent event = createLogEventWithString(invalidXmlChars); + final AbstractJacksonLayout layout = XmlLayout.newBuilder() + .setCompact(true) + .setIncludeStacktrace(true) + .setLocationInfo(true) + .setProperties(true) + .build(); + final String str = layout.toSerializable(event); + Assertions.assertThat(str).doesNotContain(invalidXmlChars).contains("\uFFFD"); + } + + @ParameterizedTest + @ValueSource( + strings = { + " ", + "A", + // First character from supplementary plane + "\uD801\uDC00", + // Last character from supplementary plane + "\uDBFF\uDFFF" + }) + void testValidXmlCharsAreKept(final String validXmlChars) { + final Log4jLogEvent event = createLogEventWithString(validXmlChars); + final AbstractJacksonLayout layout = XmlLayout.newBuilder() + .setCompact(true) + .setIncludeStacktrace(true) + .setLocationInfo(true) + .setProperties(true) + .build(); + final String str = layout.toSerializable(event); + Assertions.assertThat(str).contains(validXmlChars).doesNotContain("\uFFFD"); + } } diff --git a/log4j-core/pom.xml b/log4j-core/pom.xml index 49293f47df..b2f34d4ab9 100644 --- a/log4j-core/pom.xml +++ b/log4j-core/pom.xml @@ -66,7 +66,7 @@ org.apache.commons.compress.*;resolution:=optional, org.apache.commons.csv;resolution:=optional, org.apache.kafka.*;resolution:=optional, - org.codehaus.stax2;resolution:=optional, + org.codehaus.stax2.*;resolution:=optional, org.jctools.*;resolution:=optional, org.zeromq;resolution:=optional, javax.lang.model.*;resolution:=optional, @@ -217,6 +217,11 @@ <scope>runtime</scope> <optional>true</optional> </dependency> + <dependency> + <groupId>org.codehaus.woodstox</groupId> + <artifactId>stax2-api</artifactId> + <optional>true</optional> + </dependency> </dependencies> <build> diff --git a/log4j-core/src/main/java/org/apache/logging/log4j/core/jackson/Log4jXmlObjectMapper.java b/log4j-core/src/main/java/org/apache/logging/log4j/core/jackson/Log4jXmlObjectMapper.java index fa36d1d425..086b2546d9 100644 --- a/log4j-core/src/main/java/org/apache/logging/log4j/core/jackson/Log4jXmlObjectMapper.java +++ b/log4j-core/src/main/java/org/apache/logging/log4j/core/jackson/Log4jXmlObjectMapper.java @@ -17,8 +17,18 @@ package org.apache.logging.log4j.core.jackson; import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.core.io.IOContext; import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.dataformat.xml.XmlFactory; import com.fasterxml.jackson.dataformat.xml.XmlMapper; +import java.io.IOException; +import java.io.OutputStream; +import java.io.Writer; +import javax.xml.stream.XMLStreamException; +import javax.xml.stream.XMLStreamWriter; +import org.codehaus.stax2.XMLStreamWriter2; +import org.codehaus.stax2.ri.Stax2WriterAdapter; +import org.codehaus.stax2.util.StreamWriter2Delegate; /** * A Jackson XML {@link ObjectMapper} initialized for Log4j. @@ -41,7 +51,144 @@ public class Log4jXmlObjectMapper extends XmlMapper { * Create a new instance using the {@link Log4jXmlModule}. */ public Log4jXmlObjectMapper(final boolean includeStacktrace, final boolean stacktraceAsString) { - super(new Log4jXmlModule(includeStacktrace, stacktraceAsString)); + super(new SanitizingXmlFactory(), new Log4jXmlModule(includeStacktrace, stacktraceAsString)); this.setSerializationInclusion(JsonInclude.Include.NON_EMPTY); } + + /** + * Writer that sanitizes text to be valid XML 1.0 by replacing disallowed code points with the replacement character (U+FFFD). + */ + private static final class SanitizingWriter extends StreamWriter2Delegate { + + private static final char REPLACEMENT_CHAR = '\uFFFD'; + + SanitizingWriter(final XMLStreamWriter2 delegate) { + super(delegate); + setParent(delegate); + } + + @Override + public void writeAttribute(final String localName, final String value) throws XMLStreamException { + super.writeAttribute(localName, sanitizeXml10(value)); + } + + @Override + public void writeAttribute(final String namespaceURI, final String localName, final String value) + throws XMLStreamException { + super.writeAttribute(namespaceURI, localName, sanitizeXml10(value)); + } + + @Override + public void writeAttribute( + final String prefix, final String namespaceURI, final String localName, final String value) + throws XMLStreamException { + super.writeAttribute(prefix, namespaceURI, localName, sanitizeXml10(value)); + } + + @Override + public void writeCData(String text) throws XMLStreamException { + super.writeCData(sanitizeXml10(text)); + } + + @Override + public void writeCData(char[] text, int start, int len) throws XMLStreamException { + super.writeCData(sanitizeXml10(text, start, len)); + } + + @Override + public void writeCharacters(final String text) throws XMLStreamException { + super.writeCharacters(sanitizeXml10(text)); + } + + @Override + public void writeCharacters(final char[] text, final int start, final int len) throws XMLStreamException { + super.writeCharacters(sanitizeXml10(text, start, len)); + } + + @Override + public void writeComment(String text) throws XMLStreamException { + super.writeComment(sanitizeXml10(text)); + } + + private static String sanitizeXml10(final String input) { + if (input == null) { + return null; + } + final int length = input.length(); + // Only create a new string if we find an invalid code point. + // In the common case, this should avoid unnecessary allocations. + for (int i = 0; i < length; ) { + final int cp = input.codePointAt(i); + if (!isValidXml10(cp)) { + final StringBuilder out = new StringBuilder(length); + out.append(input, 0, i); + appendSanitized(input, i, length, out); + return out.toString(); + } + i += Character.charCount(cp); + } + return input; + } + + private static String sanitizeXml10(final char[] input, final int start, final int len) { + return sanitizeXml10(new String(input, start, len)); + } + + private static void appendSanitized(final String input, int i, final int length, final StringBuilder out) { + while (i < length) { + final int cp = input.codePointAt(i); + out.appendCodePoint(isValidXml10(cp) ? cp : REPLACEMENT_CHAR); + i += Character.charCount(cp); + } + } + + /** + * Checks if a code point is valid + * + * @param codePoint a code point between {@code 0} and {@link Character#MAX_CODE_POINT} + * @return {@code true} if it is a valid XML 1.0 code point + */ + private static boolean isValidXml10(final int codePoint) { + assert codePoint >= 0 && codePoint <= Character.MAX_CODE_POINT; + // XML 1.0 valid characters (Fifth Edition): + // #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] + + // [#x20–#xD7FF] (placed early as a fast path for the most common case) + return (codePoint >= ' ' && codePoint < Character.MIN_SURROGATE) + // #x9 + || codePoint == '\t' + // #xA + || codePoint == '\n' + // #xD + || codePoint == '\r' + // [#xE000-#xFFFD] + || (codePoint > Character.MAX_SURROGATE && codePoint <= 0xFFFD) + // [#x10000-#x10FFFF] + || codePoint >= Character.MIN_SUPPLEMENTARY_CODE_POINT; + } + } + + /** + * Factory that creates {@link SanitizingWriter} instances to ensure that all text written to the XML output is valid XML 1.0. + */ + private static final class SanitizingXmlFactory extends XmlFactory { + + private static final long serialVersionUID = 1L; + + @Override + protected XMLStreamWriter _createXmlWriter(final IOContext ctxt, final Writer w) throws IOException { + return new SanitizingWriter(Stax2WriterAdapter.wrapIfNecessary(super._createXmlWriter(ctxt, w))); + } + + @Override + protected XMLStreamWriter _createXmlWriter(final IOContext ctxt, final OutputStream out) throws IOException { + return new SanitizingWriter(Stax2WriterAdapter.wrapIfNecessary(super._createXmlWriter(ctxt, out))); + } + + @Override + public XmlFactory copy() { + _checkInvalidCopy(SanitizingXmlFactory.class); + return new SanitizingXmlFactory(); + } + } } diff --git a/log4j-parent/pom.xml b/log4j-parent/pom.xml index e939009ee4..fea7d2293a 100644 --- a/log4j-parent/pom.xml +++ b/log4j-parent/pom.xml @@ -134,6 +134,7 @@ <plexus-utils.version>3.6.0</plexus-utils.version> <spring-boot.version>2.7.18</spring-boot.version> <spring-framework.version>5.3.39</spring-framework.version> + <stax2-api.version>4.2.2</stax2-api.version> <system-stubs.version>2.0.3</system-stubs.version> <velocity.version>1.7</velocity.version> <wiremock.version>2.35.2</wiremock.version> @@ -790,6 +791,12 @@ </exclusions> </dependency> + <dependency> + <groupId>org.codehaus.woodstox</groupId> + <artifactId>stax2-api</artifactId> + <version>${stax2-api.version}</version> + </dependency> + <dependency> <groupId>uk.org.webcompere</groupId> <artifactId>system-stubs-core</artifactId> diff --git a/src/changelog/.2.x.x/xml-control-characters.xml b/src/changelog/.2.x.x/xml-control-characters.xml new file mode 100644 index 0000000000..846b1182db --- /dev/null +++ b/src/changelog/.2.x.x/xml-control-characters.xml @@ -0,0 +1,11 @@ +<?xml version="1.0" encoding="UTF-8"?> +<entry xmlns="https://logging.apache.org/xml/ns" + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation=" + https://logging.apache.org/xml/ns + https://logging.apache.org/xml/ns/log4j-changelog-0.xsd" + type="fixed"> + <description format="asciidoc"> + Replace invalid characters in XmlLayout output with the Unicode replacement character (U+FFFD). + </description> +</entry>
