On Mon, Nov 17, 2025 at 01:08:49PM +0100, Jakub Jelinek wrote:
On Mon, Nov 17, 2025 at 04:45:44PM +0530, Jason Merrill wrote:
On 11/16/25 4:28 AM, Jakub Jelinek wrote:
I've tried to test a patch to switch -std=gnu++17 C++ default
to -std=gnu++20 (will post momentarily), but ran into various problems
during GCC bootstraps, our codebase isn't fully C++20 ready.
The most common problems are arithmetic or bitwise operations
between enumerators of different enum types (or between enumerator
and floating point in the testsuite), ambiguous overloaded
operator == because of forgotten const qualification of const inside
of the argument and then libcody being largely stuck in C++ and incompatible
with C++20 which introduced char8_t type and uses it for u8 literals.
The following patch fixes various issues I've run into, for libcody
this patch just makes sure code including cody.hh can be compiled
with -std=gnu++20, libcody itself I have a tweak in the other patch.
Frankly I'm not sure what the point of libcody's u8 dance is; UTF-8 for all
the ASCII characters that it uses S2C(u8"x") for is the same byte as the 'x'
plain character literal. I'd be inclined to strip that all out.
It isn't the same for -fexec-charset=IBM1047 (or other non-ASCII execution
charsets), so perhaps the intent is to communicate using UTF-8 (or ASCII?)
instead of using arbitrary other character set.
Detail::S2C is from I understand just a workaround for C++11 and one can use
(char) u8'x' instead for C++14 and later (perhaps conditionally based on
feature test macros).
I don't know what other projects use libcody.
If it is just GCC, perhaps it is time to update it to minimum C++14 like
rest of GCC and the workarounds can go.
But for C++20 another thing is that I think we'd need to add some extra
overloads for const char8_t * etc. and just cast to const char. Plus
there are some uses of std::string created from u8 literals, dunno if it
should use std::u8string in that case or have casts to const char * first.
The following builds with -std=c++11 and c++14 and c++17 and c++20 and c++23
and c++26.
I see the u8 string literals are mixed e.g. with strerror, so in
-fexec-charset=IBM1047 there will still be garbage, so am not 100% sure if
the u8 literals everywhere are worth it either.
2025-11-17 Jakub Jelinek <[email protected]>
* cody.hh (S2C): For __cpp_char8_t >= 201811 use char8_t instead of
char in argument type.
(MessageBuffer::Space): Revert 2025-11-15 change.
(MessageBuffer::Append): For __cpp_char8_t >= 201811 add overload
with char8_t const * type of first argument.
(Packet::Packet): Similarly for first argument.
* client.cc (CommunicationError, Client::ProcessResponse,
Client::Connect, ConnectResponse, PathnameResponse, OKResponse,
IncludeTranslateResponse): Cast u8 string literals to (const char *)
where needed.
* server.cc (Server::ProcessRequests, ConnectRequest): Likewise.
--- libcody/cody.hh.jj 2025-11-15 19:23:39.520293500 +0100
+++ libcody/cody.hh 2025-11-17 16:58:55.949656716 +0100
@@ -47,12 +47,21 @@ namespace Detail {
// C++11 doesn't have utf8 character literals :(
+#if __cpp_char8_t >= 201811
+template<unsigned I>
+constexpr char S2C (char8_t const (&s)[I])
+{
+ static_assert (I == 2, "only single octet strings may be converted");
+ return s[0];
+}
+#else
template<unsigned I>
constexpr char S2C (char const (&s)[I])
{
static_assert (I == 2, "only single octet strings may be converted");
return s[0];
}
+#endif
/// Internal buffering class. Used to concatenate outgoing messages
/// and Lex incoming ones.
@@ -110,11 +119,7 @@ public:
/// Add whitespace word separator. Multiple adjacent whitespace is fine.
void Space ()
{
-#if __cpp_unicode_characters >= 201411
- Append ((char) u8' ');
-#else
Append (Detail::S2C(u8" "));
-#endif
}
public:
@@ -127,6 +132,13 @@ public:
Space ();
Append (str, maybe_quote, len);
}
+#if __cpp_char8_t >= 201811
+ void AppendWord (char8_t const *str, bool maybe_quote = false,
+ size_t len = ~size_t (0))
+ {
+ AppendWord ((const char *) str, maybe_quote, len);
+ }
+#endif
/// Add a word as with AppendWord
/// @param str the string to append
/// @param maybe_quote string might need quoting, as for Append
@@ -268,6 +280,12 @@ public:
: string (s), cat (STRING), code (c)
{
}
+#if __cpp_char8_t >= 201811
+ Packet (unsigned c, const char8_t *s)
+ : string ((const char *) s), cat (STRING), code (c)
+ {
+ }
+#endif
Packet (unsigned c, std::vector<std::string> &&v)
: vector (std::move (v)), cat (VECTOR), code (c)
{
--- libcody/client.cc.jj 2021-01-05 00:13:58.118299183 +0100
+++ libcody/client.cc 2025-11-17 17:01:27.069591926 +0100
@@ -97,7 +97,7 @@ int Client::CommunicateWithServer ()
static Packet CommunicationError (int err)
{
- std::string e {u8"communication error:"};
+ std::string e {(const char *) u8"communication error:"};
e.append (strerror (err));
return Packet (Client::PC_ERROR, std::move (e));
@@ -110,33 +110,34 @@ Packet Client::ProcessResponse (std::vec
{
if (e == EINVAL)
{
- std::string msg (u8"malformed string '");
+ std::string msg ((const char *) u8"malformed string '");
msg.append (words[0]);
- msg.append (u8"'");
+ msg.append ((const char *) u8"'");
return Packet (Client::PC_ERROR, std::move (msg));
}
else
- return Packet (Client::PC_ERROR, u8"missing response");
+ return Packet (Client::PC_ERROR, (const char *) u8"missing response");
}
Assert (!words.empty ());
- if (words[0] == u8"ERROR")
+ if (words[0] == (const char *) u8"ERROR")
return Packet (Client::PC_ERROR,
- words.size () == 2 ? words[1]: u8"malformed error response");
+ words.size () == 2 ? words[1]
+ : (const char *) u8"malformed error response");
if (isLast && !read.IsAtEnd ())
return Packet (Client::PC_ERROR,
- std::string (u8"unexpected extra response"));
+ std::string ((const char *) u8"unexpected extra response"));
Assert (code < Detail::RC_HWM);
Packet result (responseTable[code] (words));
result.SetRequest (code);
if (result.GetCode () == Client::PC_ERROR && result.GetString ().empty ())
{
- std::string msg {u8"malformed response '"};
+ std::string msg {(const char *) u8"malformed response '"};
read.LexedLine (msg);
- msg.append (u8"'");
+ msg.append ((const char *) u8"'");
result.GetString () = std::move (msg);
}
else if (result.GetCode () == Client::PC_CONNECT)
@@ -199,7 +200,7 @@ Packet Client::Connect (char const *agen
size_t alen, size_t ilen)
{
write.BeginLine ();
- write.AppendWord (u8"HELLO");
+ write.AppendWord ((const char *) u8"HELLO");
write.AppendInteger (Version);
write.AppendWord (agent, true, alen);
write.AppendWord (ident, true, ilen);
@@ -211,7 +212,8 @@ Packet Client::Connect (char const *agen
// HELLO $version $agent [$flags]
Packet ConnectResponse (std::vector<std::string> &words)
{
- if (words[0] == u8"HELLO" && (words.size () == 3 || words.size () == 4))
+ if (words[0] == (const char *) u8"HELLO"
+ && (words.size () == 3 || words.size () == 4))
{
char *eptr;
unsigned long val = strtoul (words[1].c_str (), &eptr, 10);
@@ -247,7 +249,7 @@ Packet Client::ModuleRepo ()
// PATHNAME $dir | ERROR
Packet PathnameResponse (std::vector<std::string> &words)
{
- if (words[0] == u8"PATHNAME" && words.size () == 2)
+ if (words[0] == (const char *) u8"PATHNAME" && words.size () == 2)
return Packet (Client::PC_PATHNAME, std::move (words[1]));
return Packet (Client::PC_ERROR, u8"");
@@ -256,7 +258,7 @@ Packet PathnameResponse (std::vector<std
// OK or ERROR
Packet OKResponse (std::vector<std::string> &words)
{
- if (words[0] == u8"OK")
+ if (words[0] == (const char *) u8"OK")
return Packet (Client::PC_OK);
else
return Packet (Client::PC_ERROR,
@@ -319,11 +321,11 @@ Packet Client::IncludeTranslate (char co
// PATHNAME $cmifile
Packet IncludeTranslateResponse (std::vector<std::string> &words)
{
- if (words[0] == u8"BOOL" && words.size () == 2)
+ if (words[0] == (const char *) u8"BOOL" && words.size () == 2)
{
- if (words[1] == u8"FALSE")
- return Packet (Client::PC_BOOL, 0);
- else if (words[1] == u8"TRUE")
+ if (words[1] == (const char *) u8"FALSE")
+ return Packet (Client::PC_BOOL);
+ else if (words[1] == (const char *) u8"TRUE")
return Packet (Client::PC_BOOL, 1);
else
return Packet (Client::PC_ERROR, u8"");
--- libcody/server.cc.jj 2020-12-21 22:20:04.143490902 +0100
+++ libcody/server.cc 2025-11-17 17:04:10.535350717 +0100
@@ -36,12 +36,12 @@ static RequestPair
const requestTable[Detail::RC_HWM] =
{
// Same order as enum RequestCode
- RequestPair {u8"HELLO", nullptr},
- RequestPair {u8"MODULE-REPO", ModuleRepoRequest},
- RequestPair {u8"MODULE-EXPORT", ModuleExportRequest},
- RequestPair {u8"MODULE-IMPORT", ModuleImportRequest},
- RequestPair {u8"MODULE-COMPILED", ModuleCompiledRequest},
- RequestPair {u8"INCLUDE-TRANSLATE", IncludeTranslateRequest},
+ RequestPair {(const char *) u8"HELLO", nullptr},
+ RequestPair {(const char *) u8"MODULE-REPO", ModuleRepoRequest},
+ RequestPair {(const char *) u8"MODULE-EXPORT", ModuleExportRequest},
+ RequestPair {(const char *) u8"MODULE-IMPORT", ModuleImportRequest},
+ RequestPair {(const char *) u8"MODULE-COMPILED", ModuleCompiledRequest},
+ RequestPair {(const char *) u8"INCLUDE-TRANSLATE",
IncludeTranslateRequest},
};
}
@@ -135,21 +135,21 @@ void Server::ProcessRequests (void)
std::string msg;
if (err > 0)
- msg = u8"error processing '";
+ msg = (const char *) u8"error processing '";
else if (ix >= Detail::RC_HWM)
- msg = u8"unrecognized '";
+ msg = (const char *) u8"unrecognized '";
else if (IsConnected () && ix == Detail::RC_CONNECT)
- msg = u8"already connected '";
+ msg = (const char *) u8"already connected '";
else if (!IsConnected () && ix != Detail::RC_CONNECT)
- msg = u8"not connected '";
+ msg = (const char *) u8"not connected '";
else
- msg = u8"malformed '";
+ msg = (const char *) u8"malformed '";
read.LexedLine (msg);
- msg.append (u8"'");
+ msg.append ((const char *) u8"'");
if (err > 0)
{
- msg.append (u8" ");
+ msg.append ((const char *) u8" ");
msg.append (strerror (err));
}
resolver->ErrorResponse (this, std::move (msg));
@@ -176,7 +176,7 @@ Resolver *ConnectRequest (Server *s, Res
return nullptr;
if (words.size () == 3)
- words.emplace_back (u8"");
+ words.emplace_back ((const char *) u8"");
unsigned version = ParseUnsigned (words[1]);
if (version == ~0u)
return nullptr;
Jakub