This is an automated email from the ASF dual-hosted git repository. swebb2066 pushed a commit to branch json_layout_unicode_support in repository https://gitbox.apache.org/repos/asf/logging-log4cxx.git
commit e41540bae8fe01447edd0cb37198d3938a4429de Author: Stephen Webb <[email protected]> AuthorDate: Sat Mar 28 15:14:01 2026 +1100 Restore support for multi-byte codepoints in JSON output --- src/main/cpp/jsonlayout.cpp | 34 +++++++++++++++------------------- src/test/cpp/jsonlayouttest.cpp | 38 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+), 19 deletions(-) diff --git a/src/main/cpp/jsonlayout.cpp b/src/main/cpp/jsonlayout.cpp index 54712359..d0f45f27 100644 --- a/src/main/cpp/jsonlayout.cpp +++ b/src/main/cpp/jsonlayout.cpp @@ -220,23 +220,24 @@ void JSONLayout::appendItem(const LogString& input, LogString& buf) /* add leading quote */ buf.push_back(0x22); - size_t start = 0; - size_t index = 0; - - for (int ch : input) + auto start = input.begin(); + for (auto nextCodePoint = start; input.end() != nextCodePoint; ) { - if (0x22 == ch || 0x5c == ch) - ; - else if (0x20 <= ch) - { - ++index; - continue; - } - if (start < index) + auto lastCodePoint = nextCodePoint; + auto ch = Transcoder::decode(input, nextCodePoint); + if (nextCodePoint == lastCodePoint) // failed to decode input? { - buf.append(input, start, index - start); + nextCodePoint = input.end(); + ch = 0xFFFD; // The Unicode replacement character } + else if (0x22 == ch || 0x5c == ch) // double quote or backslash? + ; + else if (0x20 <= ch) // not a control character? + continue; + if (start != lastCodePoint) + buf.append(start, lastCodePoint); + start = nextCodePoint; switch (ch) { case 0x08: @@ -290,13 +291,8 @@ void JSONLayout::appendItem(const LogString& input, LogString& buf) buf.push_back(toHexDigit(ch & 0xF)); break; } - start = ++index; - } - - if (start < input.size()) - { - buf.append(input, start, input.size() - start); } + buf.append(start, input.end()); /* add trailing quote */ buf.push_back(0x22); diff --git a/src/test/cpp/jsonlayouttest.cpp b/src/test/cpp/jsonlayouttest.cpp index 75f17dc0..083cc481 100644 --- a/src/test/cpp/jsonlayouttest.cpp +++ b/src/test/cpp/jsonlayouttest.cpp @@ -59,6 +59,7 @@ LOGUNIT_CLASS(JSONLayoutTest), public JSONLayout LOGUNIT_TEST(testFormat); LOGUNIT_TEST(testFormatWithPrettyPrint); LOGUNIT_TEST(testGetSetLocationInfo); + LOGUNIT_TEST(testAppendQuotedEscapedString); LOGUNIT_TEST_SUITE_END(); @@ -492,6 +493,43 @@ public: layout.setPrettyPrint(false); LOGUNIT_ASSERT_EQUAL(false, layout.getPrettyPrint()); } + + /** + * Tests Unicode characters. + */ + void testAppendQuotedEscapedString() + { + std::string problemMessage = "'\001\"<Hello >\"\004'"; + LogString expectedQuotedEscapedMessage = LOG4CXX_STR("\"'\\u0001\\\"<Hello >\\\"\\u0004'\""); + LOG4CXX_DECODE_CHAR(problemMessageLS, problemMessage); + LogString quotedEscapedMessage; + appendQuotedEscapedString(quotedEscapedMessage, problemMessageLS); + LOGUNIT_ASSERT_EQUAL(expectedQuotedEscapedMessage, quotedEscapedMessage); + + // '\"<räksmörgås.josefsson.org>\"' + std::string problemName = "'\"\162\303\244\153\163\155\303\266\162\147\303\245\163\056\152\157\163\145\146\163\163\157\156\056\157\162\147>\"'"; + LOG4CXX_DECODE_CHAR(problemNameLS, problemName); + LogString expectedQuotedEscapedName = LOG4CXX_STR("\"'\\\"\162\303\244\153\163\155\303\266\162\147\303\245\163\056\152\157\163\145\146\163\163\157\156\056\157\162\147>\\\"'\""); + LogString quotedEscapedName; + appendQuotedEscapedString(quotedEscapedName, problemNameLS); + LOGUNIT_ASSERT_EQUAL(expectedQuotedEscapedName, quotedEscapedName); + + Transcoder::encode(0xD822, problemNameLS); // Add a character that cannot be converted to UTF16 +#if LOG4CXX_LOGCHAR_IS_WCHAR && defined(__STDC_ISO_10646__) + expectedQuotedEscapedName.replace(expectedQuotedEscapedName.size() - 1, 1, 0xD822); + expectedQuotedEscapedName += 0x22; // Add a double quote at the end +#elif LOG4CXX_LOGCHAR_IS_WCHAR + // encodeUTF16 adds 0xD822, but decodeUTF16 cannot convert 0xD822 + // The Unicode replacement character is the following utf-8 sequence + expectedQuotedEscapedName.replace(expectedQuotedEscapedName.size() - 1, 1, "\xEF\xBF\xBD\""); +#elif LOG4CXX_LOGCHAR_IS_UTF8 + // 0xD822 is encoded in UTF-8 as 0xED 0xA0 0xA2 + expectedQuotedEscapedName.replace(expectedQuotedEscapedName.size() - 1, 1, "\xED\xa0\xa2\""); +#endif + LogString escapedQuoted0xD822Name; + appendQuotedEscapedString(escapedQuoted0xD822Name, problemNameLS); + LOGUNIT_ASSERT_EQUAL(expectedQuotedEscapedName, escapedQuoted0xD822Name); + } };
