This is an automated email from the ASF dual-hosted git repository.
swebb2066 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/logging-log4cxx.git
The following commit(s) were added to refs/heads/master by this push:
new 4f3ba2be Restore support for multi-byte codepoints in JSON output
(#615)
4f3ba2be is described below
commit 4f3ba2be829104280cfd0c75a2532537c707cb96
Author: Stephen Webb <[email protected]>
AuthorDate: Mon Mar 30 13:10:53 2026 +1100
Restore support for multi-byte codepoints in JSON output (#615)
---
src/main/cpp/jsonlayout.cpp | 34 +++++++++++++----------------
src/site/markdown/change-report-gh.md | 3 +++
src/test/cpp/jsonlayouttest.cpp | 40 +++++++++++++++++++++++++++++++++++
src/test/cpp/xml/xmllayouttest.cpp | 2 +-
4 files changed, 59 insertions(+), 20 deletions(-)
diff --git a/src/main/cpp/jsonlayout.cpp b/src/main/cpp/jsonlayout.cpp
index 54712359..d0f45f27 100644
--- a/src/main/cpp/jsonlayout.cpp
+++ b/src/main/cpp/jsonlayout.cpp
@@ -220,23 +220,24 @@ void JSONLayout::appendItem(const LogString& input,
LogString& buf)
/* add leading quote */
buf.push_back(0x22);
- size_t start = 0;
- size_t index = 0;
-
- for (int ch : input)
+ auto start = input.begin();
+ for (auto nextCodePoint = start; input.end() != nextCodePoint; )
{
- if (0x22 == ch || 0x5c == ch)
- ;
- else if (0x20 <= ch)
- {
- ++index;
- continue;
- }
- if (start < index)
+ auto lastCodePoint = nextCodePoint;
+ auto ch = Transcoder::decode(input, nextCodePoint);
+ if (nextCodePoint == lastCodePoint) // failed to decode input?
{
- buf.append(input, start, index - start);
+ nextCodePoint = input.end();
+ ch = 0xFFFD; // The Unicode replacement character
}
+ else if (0x22 == ch || 0x5c == ch) // double quote or backslash?
+ ;
+ else if (0x20 <= ch) // not a control character?
+ continue;
+ if (start != lastCodePoint)
+ buf.append(start, lastCodePoint);
+ start = nextCodePoint;
switch (ch)
{
case 0x08:
@@ -290,13 +291,8 @@ void JSONLayout::appendItem(const LogString& input,
LogString& buf)
buf.push_back(toHexDigit(ch & 0xF));
break;
}
- start = ++index;
- }
-
- if (start < input.size())
- {
- buf.append(input, start, input.size() - start);
}
+ buf.append(start, input.end());
/* add trailing quote */
buf.push_back(0x22);
diff --git a/src/site/markdown/change-report-gh.md
b/src/site/markdown/change-report-gh.md
index 52bc740e..72b9ad70 100644
--- a/src/site/markdown/change-report-gh.md
+++ b/src/site/markdown/change-report-gh.md
@@ -68,6 +68,9 @@ Release 1.7.0 includes the following new features:
The following issues have been addressed:
+* Non-ascii characters incorrectly encoded in JSON output
+ \[[#615](https://github.com/apache/logging-log4cxx/pull/615)\]
+
* XML output could contain characters not allowed by the XML 1.0 specification
* [#609](https://github.com/apache/logging-log4cxx/pull/609)
, [#610](https://github.com/apache/logging-log4cxx/pull/610)
diff --git a/src/test/cpp/jsonlayouttest.cpp b/src/test/cpp/jsonlayouttest.cpp
index 75f17dc0..eeaafc4f 100644
--- a/src/test/cpp/jsonlayouttest.cpp
+++ b/src/test/cpp/jsonlayouttest.cpp
@@ -23,6 +23,7 @@
#include <iostream>
#include <log4cxx/helpers/stringhelper.h>
+#include <log4cxx/helpers/transcoder.h>
using namespace log4cxx;
@@ -59,6 +60,7 @@ LOGUNIT_CLASS(JSONLayoutTest), public JSONLayout
LOGUNIT_TEST(testFormat);
LOGUNIT_TEST(testFormatWithPrettyPrint);
LOGUNIT_TEST(testGetSetLocationInfo);
+ LOGUNIT_TEST(testAppendQuotedEscapedString);
LOGUNIT_TEST_SUITE_END();
@@ -492,6 +494,44 @@ public:
layout.setPrettyPrint(false);
LOGUNIT_ASSERT_EQUAL(false, layout.getPrettyPrint());
}
+
+ /**
+ * Tests Unicode characters.
+ */
+ void testAppendQuotedEscapedString()
+ {
+ std::string problemMessage = "'\001\"Hello \"\004'";
+ LogString expectedQuotedEscapedMessage =
LOG4CXX_STR("\"'\\u0001\\\"Hello \\\"\\u0004'\"");
+ LOG4CXX_DECODE_CHAR(problemMessageLS, problemMessage);
+ LogString quotedEscapedMessage;
+ appendQuotedEscapedString(quotedEscapedMessage,
problemMessageLS);
+ LOGUNIT_ASSERT_EQUAL(expectedQuotedEscapedMessage,
quotedEscapedMessage);
+
+ // '\"räksmörgås.josefsson.org\"'
+ std::string problemName =
"'\"\162\303\244\153\163\155\303\266\162\147\303\245\163\056\152\157\163\145\146\163\163\157\156\056\157\162\147\"'";
+ LOG4CXX_DECODE_CHAR(problemNameLS, problemName);
+ LogString expectedQuotedEscapedName = LOG4CXX_STR("\"") +
problemNameLS + LOG4CXX_STR("\"");
+ expectedQuotedEscapedName.insert(2, 1, 0x5c); // insert a
backslash before the first double quote
+
expectedQuotedEscapedName.insert(expectedQuotedEscapedName.size() - 3, 1,
0x5c); // insert a backslash before the last double quote
+ LogString quotedEscapedName;
+ appendQuotedEscapedString(quotedEscapedName, problemNameLS);
+ LOGUNIT_ASSERT_EQUAL(expectedQuotedEscapedName,
quotedEscapedName);
+
+ Transcoder::encode(0xD822, problemNameLS); // Add a character
that cannot be converted to UTF16
+#if LOG4CXX_LOGCHAR_IS_WCHAR && defined(__STDC_ISO_10646__)
+ expectedQuotedEscapedName[expectedQuotedEscapedName.size() - 1]
= 0xD822;
+ expectedQuotedEscapedName += 0x22; // Add a double quote at the
end
+#elif LOG4CXX_LOGCHAR_IS_WCHAR
+ // encodeUTF16 adds 0xD822, but decodeUTF16 cannot convert
0xD822
+
expectedQuotedEscapedName.insert(expectedQuotedEscapedName.size() - 1,
LOG4CXX_STR("\\ufffd")); // The Unicode replacement character
+#elif LOG4CXX_LOGCHAR_IS_UTF8
+ // 0xD822 is encoded in UTF-8 as 0xED 0xA0 0xA2
+
expectedQuotedEscapedName.insert(expectedQuotedEscapedName.size() - 1,
"\xED\xA0\xA2");
+#endif
+ LogString escapedQuoted0xD822Name;
+ appendQuotedEscapedString(escapedQuoted0xD822Name,
problemNameLS);
+ LOGUNIT_ASSERT_EQUAL(expectedQuotedEscapedName,
escapedQuoted0xD822Name);
+ }
};
diff --git a/src/test/cpp/xml/xmllayouttest.cpp
b/src/test/cpp/xml/xmllayouttest.cpp
index 016d64c3..e8f185b8 100644
--- a/src/test/cpp/xml/xmllayouttest.cpp
+++ b/src/test/cpp/xml/xmllayouttest.cpp
@@ -373,7 +373,7 @@ public:
void testProblemCharacters()
{
// '\"<räksmörgås.josefsson.org>&\"'
- std::string problemName =
"'\"\162\303\244\153\163\155\303\266\162\147\303\245\163\056\152\157\163\145\146\163\163\157\156\056\157\162\147>&\"'";
+ std::string problemName =
"'\"<\162\303\244\153\163\155\303\266\162\147\303\245\163\056\152\157\163\145\146\163\163\157\156\056\157\162\147>&\"'";
LOG4CXX_DECODE_CHAR(problemNameLS, problemName);
auto loggerNameLS = problemNameLS;
auto levelNameLS = problemNameLS;