This is an automated email from the ASF dual-hosted git repository.
pkarwasz pushed a commit to branch 2.25.x
in repository https://gitbox.apache.org/repos/asf/logging-log4j2.git
The following commit(s) were added to refs/heads/2.25.x by this push:
new 4f50142298 XmlLayout: replace invalid XML characters with U+FFFD
(#4077)
4f50142298 is described below
commit 4f5014229825d8be977662e0743205bb8a67f989
Author: Piotr P. Karwasz <[email protected]>
AuthorDate: Tue Mar 24 23:56:43 2026 +0100
XmlLayout: replace invalid XML characters with U+FFFD (#4077)
This change sanitizes the output of `XmlLayout` by replacing characters
that are not permitted in XML 1.0 with the Unicode replacement character
(`U+FFFD`).
This guarantees that the generated log output is always well-formed XML and
can be parsed by any XML 1.0–compliant parser, even when log data contains
control characters or other invalid code points.
Co-authored-by: Volkan Yazıcı <[email protected]>
---
.../log4j/core/layout/XmlLayoutJUnit5Test.java | 111 +++++++++++++
log4j-core/pom.xml | 7 +-
.../log4j/core/jackson/Log4jXmlObjectMapper.java | 175 ++++++++++++++++++++-
log4j-parent/pom.xml | 7 +
.../.2.x.x/4077_xml-control-characters.xml | 12 ++
5 files changed, 310 insertions(+), 2 deletions(-)
diff --git
a/log4j-core-test/src/test/java/org/apache/logging/log4j/core/layout/XmlLayoutJUnit5Test.java
b/log4j-core-test/src/test/java/org/apache/logging/log4j/core/layout/XmlLayoutJUnit5Test.java
new file mode 100644
index 0000000000..3dcf896c37
--- /dev/null
+++
b/log4j-core-test/src/test/java/org/apache/logging/log4j/core/layout/XmlLayoutJUnit5Test.java
@@ -0,0 +1,111 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.logging.log4j.core.layout;
+
+import static org.assertj.core.api.Assertions.assertThat;
+
+import org.apache.logging.log4j.Level;
+import org.apache.logging.log4j.Marker;
+import org.apache.logging.log4j.MarkerManager;
+import org.apache.logging.log4j.core.impl.ContextDataFactory;
+import org.apache.logging.log4j.core.impl.Log4jLogEvent;
+import org.apache.logging.log4j.message.SimpleMessage;
+import org.apache.logging.log4j.spi.DefaultThreadContextStack;
+import org.apache.logging.log4j.util.StringMap;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.ValueSource;
+
+class XmlLayoutJUnit5Test {
+
+ private static Log4jLogEvent createLogEventWithString(final String str) {
+ final Marker marker = MarkerManager.getMarker("marker" + str);
+
+ final RuntimeException thrown = new RuntimeException("thrown" + str);
+ thrown.addSuppressed(new IllegalStateException("suppressed" + str));
+
+ final StringMap contextData = ContextDataFactory.createContextData();
+ contextData.putValue("mdcKey" + str, "mdcValue" + str);
+
+ final DefaultThreadContextStack contextStack = new
DefaultThreadContextStack();
+ contextStack.clear();
+ contextStack.push("contextStack" + str);
+
+ final StackTraceElement source =
+ new StackTraceElement("class" + str, "method" + str, "file" +
str + ".java", 123);
+
+ return Log4jLogEvent.newBuilder()
+ .setLoggerName("logger" + str)
+ .setMarker(marker)
+ .setLoggerFqcn("fqcn" + str)
+ .setLevel(Level.DEBUG)
+ .setMessage(new SimpleMessage("message" + str))
+ .setThrown(thrown)
+ .setContextData(contextData)
+ .setContextStack(contextStack)
+ .setThreadName("thread" + str)
+ .setSource(source)
+ .setTimeMillis(1L)
+ .build();
+ }
+
+ @ParameterizedTest
+ @ValueSource(
+ strings = {
+ "\u0000",
+ "\u001F",
+ // hi surrogate
+ "\uD800",
+ // low surrogate
+ "\uDC00",
+ // invalid chars
+ "\uFFFE",
+ "\uFFFF"
+ })
+ void testInvalidXmlCharsAreSanitized(final String invalidXmlChars) {
+ final Log4jLogEvent event = createLogEventWithString(invalidXmlChars);
+ final AbstractJacksonLayout layout = XmlLayout.newBuilder()
+ .setCompact(true)
+ .setIncludeStacktrace(true)
+ .setLocationInfo(true)
+ .setProperties(true)
+ .build();
+ final String str = layout.toSerializable(event);
+ assertThat(str).doesNotContain(invalidXmlChars).contains("\uFFFD");
+ }
+
+ @ParameterizedTest
+ @ValueSource(
+ strings = {
+ " ",
+ "A",
+ // First character from supplementary plane
+ "\uD801\uDC00",
+ // Last character from supplementary plane
+ "\uDBFF\uDFFF"
+ })
+ void testValidXmlCharsAreKept(final String validXmlChars) {
+ final Log4jLogEvent event = createLogEventWithString(validXmlChars);
+ final AbstractJacksonLayout layout = XmlLayout.newBuilder()
+ .setCompact(true)
+ .setIncludeStacktrace(true)
+ .setLocationInfo(true)
+ .setProperties(true)
+ .build();
+ final String str = layout.toSerializable(event);
+ assertThat(str).contains(validXmlChars).doesNotContain("\uFFFD");
+ }
+}
diff --git a/log4j-core/pom.xml b/log4j-core/pom.xml
index 49293f47df..b2f34d4ab9 100644
--- a/log4j-core/pom.xml
+++ b/log4j-core/pom.xml
@@ -66,7 +66,7 @@
org.apache.commons.compress.*;resolution:=optional,
org.apache.commons.csv;resolution:=optional,
org.apache.kafka.*;resolution:=optional,
- org.codehaus.stax2;resolution:=optional,
+ org.codehaus.stax2.*;resolution:=optional,
org.jctools.*;resolution:=optional,
org.zeromq;resolution:=optional,
javax.lang.model.*;resolution:=optional,
@@ -217,6 +217,11 @@
<scope>runtime</scope>
<optional>true</optional>
</dependency>
+ <dependency>
+ <groupId>org.codehaus.woodstox</groupId>
+ <artifactId>stax2-api</artifactId>
+ <optional>true</optional>
+ </dependency>
</dependencies>
<build>
diff --git
a/log4j-core/src/main/java/org/apache/logging/log4j/core/jackson/Log4jXmlObjectMapper.java
b/log4j-core/src/main/java/org/apache/logging/log4j/core/jackson/Log4jXmlObjectMapper.java
index fa36d1d425..f1b0293a01 100644
---
a/log4j-core/src/main/java/org/apache/logging/log4j/core/jackson/Log4jXmlObjectMapper.java
+++
b/log4j-core/src/main/java/org/apache/logging/log4j/core/jackson/Log4jXmlObjectMapper.java
@@ -17,8 +17,22 @@
package org.apache.logging.log4j.core.jackson;
import com.fasterxml.jackson.annotation.JsonInclude;
+import com.fasterxml.jackson.core.ObjectCodec;
+import com.fasterxml.jackson.core.io.IOContext;
import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.dataformat.xml.XmlFactory;
import com.fasterxml.jackson.dataformat.xml.XmlMapper;
+import com.fasterxml.jackson.dataformat.xml.XmlNameProcessor;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.io.Writer;
+import javax.xml.stream.XMLInputFactory;
+import javax.xml.stream.XMLOutputFactory;
+import javax.xml.stream.XMLStreamException;
+import javax.xml.stream.XMLStreamWriter;
+import org.codehaus.stax2.XMLStreamWriter2;
+import org.codehaus.stax2.ri.Stax2WriterAdapter;
+import org.codehaus.stax2.util.StreamWriter2Delegate;
/**
* A Jackson XML {@link ObjectMapper} initialized for Log4j.
@@ -41,7 +55,166 @@ public class Log4jXmlObjectMapper extends XmlMapper {
* Create a new instance using the {@link Log4jXmlModule}.
*/
public Log4jXmlObjectMapper(final boolean includeStacktrace, final boolean
stacktraceAsString) {
- super(new Log4jXmlModule(includeStacktrace, stacktraceAsString));
+ super(new SanitizingXmlFactory(), new
Log4jXmlModule(includeStacktrace, stacktraceAsString));
this.setSerializationInclusion(JsonInclude.Include.NON_EMPTY);
}
+
+ /**
+ * Writer that sanitizes text to be valid XML 1.0 by replacing disallowed
code points with the replacement character (U+FFFD).
+ */
+ private static final class SanitizingWriter extends StreamWriter2Delegate {
+
+ private static final char REPLACEMENT_CHAR = '\uFFFD';
+
+ SanitizingWriter(final XMLStreamWriter2 delegate) {
+ super(delegate);
+ setParent(delegate);
+ }
+
+ @Override
+ public void writeAttribute(final String localName, final String value)
throws XMLStreamException {
+ super.writeAttribute(localName, sanitizeXml10(value));
+ }
+
+ @Override
+ public void writeAttribute(final String namespaceURI, final String
localName, final String value)
+ throws XMLStreamException {
+ super.writeAttribute(namespaceURI, localName,
sanitizeXml10(value));
+ }
+
+ @Override
+ public void writeAttribute(
+ final String prefix, final String namespaceURI, final String
localName, final String value)
+ throws XMLStreamException {
+ super.writeAttribute(prefix, namespaceURI, localName,
sanitizeXml10(value));
+ }
+
+ @Override
+ public void writeCData(String text) throws XMLStreamException {
+ super.writeCData(sanitizeXml10(text));
+ }
+
+ @Override
+ public void writeCData(char[] text, int start, int len) throws
XMLStreamException {
+ super.writeCData(sanitizeXml10(text, start, len));
+ }
+
+ @Override
+ public void writeCharacters(final String text) throws
XMLStreamException {
+ super.writeCharacters(sanitizeXml10(text));
+ }
+
+ @Override
+ public void writeCharacters(final char[] text, final int start, final
int len) throws XMLStreamException {
+ super.writeCharacters(sanitizeXml10(text, start, len));
+ }
+
+ @Override
+ public void writeComment(String text) throws XMLStreamException {
+ super.writeComment(sanitizeXml10(text));
+ }
+
+ private static String sanitizeXml10(final String input) {
+ if (input == null) {
+ return null;
+ }
+ final int length = input.length();
+ // Only create a new string if we find an invalid code point.
+ // In the common case, this should avoid unnecessary allocations.
+ for (int i = 0; i < length; ) {
+ final int cp = input.codePointAt(i);
+ if (!isValidXml10(cp)) {
+ final StringBuilder out = new StringBuilder(length);
+ out.append(input, 0, i);
+ appendSanitized(input, i, length, out);
+ return out.toString();
+ }
+ i += Character.charCount(cp);
+ }
+ return input;
+ }
+
+ private static String sanitizeXml10(final char[] input, final int
start, final int len) {
+ return sanitizeXml10(new String(input, start, len));
+ }
+
+ private static void appendSanitized(final String input, int i, final
int length, final StringBuilder out) {
+ while (i < length) {
+ final int cp = input.codePointAt(i);
+ out.appendCodePoint(isValidXml10(cp) ? cp : REPLACEMENT_CHAR);
+ i += Character.charCount(cp);
+ }
+ }
+
+ /**
+ * Checks if a code point is valid
+ *
+ * @param codePoint a code point between {@code 0} and {@link
Character#MAX_CODE_POINT}
+ * @return {@code true} if it is a valid XML 1.0 code point
+ */
+ private static boolean isValidXml10(final int codePoint) {
+ assert codePoint >= 0 && codePoint <= Character.MAX_CODE_POINT;
+ // XML 1.0 valid characters (Fifth Edition):
+ // #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] |
[#x10000-#x10FFFF]
+
+ // [#x20–#xD7FF] (placed early as a fast path for the most common
case)
+ return (codePoint >= ' ' && codePoint < Character.MIN_SURROGATE)
+ // #x9
+ || codePoint == '\t'
+ // #xA
+ || codePoint == '\n'
+ // #xD
+ || codePoint == '\r'
+ // [#xE000-#xFFFD]
+ || (codePoint > Character.MAX_SURROGATE && codePoint <=
0xFFFD)
+ // [#x10000-#x10FFFF]
+ || codePoint >= Character.MIN_SUPPLEMENTARY_CODE_POINT;
+ }
+ }
+
+ /**
+ * Factory that creates {@link SanitizingWriter} instances to ensure that
all text written to the XML output is valid XML 1.0.
+ */
+ private static final class SanitizingXmlFactory extends XmlFactory {
+
+ private static final long serialVersionUID = 1L;
+
+ public SanitizingXmlFactory() {
+ super();
+ }
+
+ private SanitizingXmlFactory(
+ ObjectCodec oc,
+ int xpFeatures,
+ int xgFeatures,
+ XMLInputFactory xmlIn,
+ XMLOutputFactory xmlOut,
+ String nameForTextElem,
+ XmlNameProcessor nameProcessor) {
+ super(oc, xpFeatures, xgFeatures, xmlIn, xmlOut, nameForTextElem,
nameProcessor);
+ }
+
+ @Override
+ protected XMLStreamWriter _createXmlWriter(final IOContext ctxt, final
Writer w) throws IOException {
+ return new
SanitizingWriter(Stax2WriterAdapter.wrapIfNecessary(super._createXmlWriter(ctxt,
w)));
+ }
+
+ @Override
+ protected XMLStreamWriter _createXmlWriter(final IOContext ctxt, final
OutputStream out) throws IOException {
+ return new
SanitizingWriter(Stax2WriterAdapter.wrapIfNecessary(super._createXmlWriter(ctxt,
out)));
+ }
+
+ @Override
+ public XmlFactory copy() {
+ _checkInvalidCopy(SanitizingXmlFactory.class);
+ return new SanitizingXmlFactory(
+ _objectCodec,
+ _xmlParserFeatures,
+ _xmlGeneratorFeatures,
+ _xmlInputFactory,
+ _xmlOutputFactory,
+ _cfgNameForTextElement,
+ _nameProcessor);
+ }
+ }
}
diff --git a/log4j-parent/pom.xml b/log4j-parent/pom.xml
index e939009ee4..fea7d2293a 100644
--- a/log4j-parent/pom.xml
+++ b/log4j-parent/pom.xml
@@ -134,6 +134,7 @@
<plexus-utils.version>3.6.0</plexus-utils.version>
<spring-boot.version>2.7.18</spring-boot.version>
<spring-framework.version>5.3.39</spring-framework.version>
+ <stax2-api.version>4.2.2</stax2-api.version>
<system-stubs.version>2.0.3</system-stubs.version>
<velocity.version>1.7</velocity.version>
<wiremock.version>2.35.2</wiremock.version>
@@ -790,6 +791,12 @@
</exclusions>
</dependency>
+ <dependency>
+ <groupId>org.codehaus.woodstox</groupId>
+ <artifactId>stax2-api</artifactId>
+ <version>${stax2-api.version}</version>
+ </dependency>
+
<dependency>
<groupId>uk.org.webcompere</groupId>
<artifactId>system-stubs-core</artifactId>
diff --git a/src/changelog/.2.x.x/4077_xml-control-characters.xml
b/src/changelog/.2.x.x/4077_xml-control-characters.xml
new file mode 100644
index 0000000000..ddae6cb481
--- /dev/null
+++ b/src/changelog/.2.x.x/4077_xml-control-characters.xml
@@ -0,0 +1,12 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<entry xmlns="https://logging.apache.org/xml/ns"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="
+ https://logging.apache.org/xml/ns
+ https://logging.apache.org/xml/ns/log4j-changelog-0.xsd"
+ type="fixed">
+ <issue id="4077"
link="https://github.com/apache/logging-log4j2/pull/4077"/>
+ <description format="asciidoc">
+ Replace invalid characters in XmlLayout output with the Unicode
replacement character (U+FFFD).
+ </description>
+</entry>