This is an automated email from the ASF dual-hosted git repository.

swebb2066 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/logging-log4cxx.git


The following commit(s) were added to refs/heads/master by this push:
     new 4f3ba2be Restore support for multi-byte codepoints in JSON output 
(#615)
4f3ba2be is described below

commit 4f3ba2be829104280cfd0c75a2532537c707cb96
Author: Stephen Webb <[email protected]>
AuthorDate: Mon Mar 30 13:10:53 2026 +1100

    Restore support for multi-byte codepoints in JSON output (#615)
---
 src/main/cpp/jsonlayout.cpp           | 34 +++++++++++++----------------
 src/site/markdown/change-report-gh.md |  3 +++
 src/test/cpp/jsonlayouttest.cpp       | 40 +++++++++++++++++++++++++++++++++++
 src/test/cpp/xml/xmllayouttest.cpp    |  2 +-
 4 files changed, 59 insertions(+), 20 deletions(-)

diff --git a/src/main/cpp/jsonlayout.cpp b/src/main/cpp/jsonlayout.cpp
index 54712359..d0f45f27 100644
--- a/src/main/cpp/jsonlayout.cpp
+++ b/src/main/cpp/jsonlayout.cpp
@@ -220,23 +220,24 @@ void JSONLayout::appendItem(const LogString& input, 
LogString& buf)
        /* add leading quote */
        buf.push_back(0x22);
 
-       size_t start = 0;
-       size_t index = 0;
-
-       for (int ch : input)
+       auto start = input.begin();
+       for (auto nextCodePoint = start; input.end() != nextCodePoint; )
        {
-               if (0x22 == ch || 0x5c == ch)
-                       ;
-               else if (0x20 <= ch)
-               {
-                       ++index;
-                       continue;
-               }
-               if (start < index)
+               auto lastCodePoint = nextCodePoint;
+               auto ch = Transcoder::decode(input, nextCodePoint);
+               if (nextCodePoint == lastCodePoint) // failed to decode input?
                {
-                       buf.append(input, start, index - start);
+                       nextCodePoint = input.end();
+                       ch = 0xFFFD; // The Unicode replacement character
                }
+               else if (0x22 == ch || 0x5c == ch) // double quote or backslash?
+                       ;
+               else if (0x20 <= ch) // not a control character?
+                       continue;
 
+               if (start != lastCodePoint)
+                       buf.append(start, lastCodePoint);
+               start = nextCodePoint;
                switch (ch)
                {
                        case 0x08:
@@ -290,13 +291,8 @@ void JSONLayout::appendItem(const LogString& input, 
LogString& buf)
                                buf.push_back(toHexDigit(ch & 0xF));
                                break;
                }
-               start = ++index;
-       }
-
-       if (start < input.size())
-       {
-               buf.append(input, start, input.size() - start);
        }
+       buf.append(start, input.end());
 
        /* add trailing quote */
        buf.push_back(0x22);
diff --git a/src/site/markdown/change-report-gh.md 
b/src/site/markdown/change-report-gh.md
index 52bc740e..72b9ad70 100644
--- a/src/site/markdown/change-report-gh.md
+++ b/src/site/markdown/change-report-gh.md
@@ -68,6 +68,9 @@ Release 1.7.0 includes the following new features:
 
 The following issues have been addressed:
 
+* Non-ascii characters incorrectly encoded in JSON output
+   \[[#615](https://github.com/apache/logging-log4cxx/pull/615)\]
+
 * XML output could contain characters not allowed by the XML 1.0 specification
    * [#609](https://github.com/apache/logging-log4cxx/pull/609)
    , [#610](https://github.com/apache/logging-log4cxx/pull/610)
diff --git a/src/test/cpp/jsonlayouttest.cpp b/src/test/cpp/jsonlayouttest.cpp
index 75f17dc0..eeaafc4f 100644
--- a/src/test/cpp/jsonlayouttest.cpp
+++ b/src/test/cpp/jsonlayouttest.cpp
@@ -23,6 +23,7 @@
 
 #include <iostream>
 #include <log4cxx/helpers/stringhelper.h>
+#include <log4cxx/helpers/transcoder.h>
 
 
 using namespace log4cxx;
@@ -59,6 +60,7 @@ LOGUNIT_CLASS(JSONLayoutTest), public JSONLayout
        LOGUNIT_TEST(testFormat);
        LOGUNIT_TEST(testFormatWithPrettyPrint);
        LOGUNIT_TEST(testGetSetLocationInfo);
+       LOGUNIT_TEST(testAppendQuotedEscapedString);
        LOGUNIT_TEST_SUITE_END();
 
 
@@ -492,6 +494,44 @@ public:
                layout.setPrettyPrint(false);
                LOGUNIT_ASSERT_EQUAL(false, layout.getPrettyPrint());
        }
+
+       /**
+        * Tests Unicode characters.
+        */
+       void testAppendQuotedEscapedString()
+       {
+               std::string problemMessage = "'\001\"Hello \"\004'";
+               LogString expectedQuotedEscapedMessage = 
LOG4CXX_STR("\"'\\u0001\\\"Hello \\\"\\u0004'\"");
+               LOG4CXX_DECODE_CHAR(problemMessageLS, problemMessage);
+               LogString quotedEscapedMessage;
+               appendQuotedEscapedString(quotedEscapedMessage, 
problemMessageLS);
+               LOGUNIT_ASSERT_EQUAL(expectedQuotedEscapedMessage, 
quotedEscapedMessage);
+
+               // '\"räksmörgås.josefsson.org\"'
+               std::string problemName = 
"'\"\162\303\244\153\163\155\303\266\162\147\303\245\163\056\152\157\163\145\146\163\163\157\156\056\157\162\147\"'";
+               LOG4CXX_DECODE_CHAR(problemNameLS, problemName);
+               LogString expectedQuotedEscapedName = LOG4CXX_STR("\"") + 
problemNameLS + LOG4CXX_STR("\"");
+               expectedQuotedEscapedName.insert(2, 1, 0x5c); // insert a 
backslash before the first double quote
+               
expectedQuotedEscapedName.insert(expectedQuotedEscapedName.size() - 3, 1, 
0x5c); // insert a backslash before the last double quote
+               LogString quotedEscapedName;
+               appendQuotedEscapedString(quotedEscapedName, problemNameLS);
+               LOGUNIT_ASSERT_EQUAL(expectedQuotedEscapedName, 
quotedEscapedName);
+
+               Transcoder::encode(0xD822, problemNameLS); // Add a character 
that cannot be converted to UTF16
+#if LOG4CXX_LOGCHAR_IS_WCHAR && defined(__STDC_ISO_10646__)
+               expectedQuotedEscapedName[expectedQuotedEscapedName.size() - 1] 
= 0xD822;
+               expectedQuotedEscapedName += 0x22; // Add a double quote at the 
end
+#elif LOG4CXX_LOGCHAR_IS_WCHAR
+               // encodeUTF16 adds 0xD822, but decodeUTF16 cannot convert 
0xD822
+               
expectedQuotedEscapedName.insert(expectedQuotedEscapedName.size() - 1, 
LOG4CXX_STR("\\ufffd")); // The Unicode replacement character
+#elif LOG4CXX_LOGCHAR_IS_UTF8
+               // 0xD822 is encoded in UTF-8 as 0xED 0xA0 0xA2
+               
expectedQuotedEscapedName.insert(expectedQuotedEscapedName.size() - 1, 
"\xED\xA0\xA2");
+#endif
+               LogString escapedQuoted0xD822Name;
+               appendQuotedEscapedString(escapedQuoted0xD822Name, 
problemNameLS);
+               LOGUNIT_ASSERT_EQUAL(expectedQuotedEscapedName, 
escapedQuoted0xD822Name);
+       }
 };
 
 
diff --git a/src/test/cpp/xml/xmllayouttest.cpp 
b/src/test/cpp/xml/xmllayouttest.cpp
index 016d64c3..e8f185b8 100644
--- a/src/test/cpp/xml/xmllayouttest.cpp
+++ b/src/test/cpp/xml/xmllayouttest.cpp
@@ -373,7 +373,7 @@ public:
        void testProblemCharacters()
        {
                // '\"<räksmörgås.josefsson.org>&\"'
-               std::string problemName = 
"'\"\162\303\244\153\163\155\303\266\162\147\303\245\163\056\152\157\163\145\146\163\163\157\156\056\157\162\147>&\"'";
+               std::string problemName = 
"'\"<\162\303\244\153\163\155\303\266\162\147\303\245\163\056\152\157\163\145\146\163\163\157\156\056\157\162\147>&\"'";
                LOG4CXX_DECODE_CHAR(problemNameLS, problemName);
                auto loggerNameLS = problemNameLS;
                auto levelNameLS = problemNameLS;

Reply via email to