Dear GCC,
This patch adds macros to the general preprocessor that allow
users to understand what the execution and wide execution charsets
are, which are used for "bark" and L"meow" literals in C-family
languages.
The goal of this is to enable individuals in capable languages
like C and C++ to determine the encoding of string literals and,
hopefully, transcode those literals to other encodings. For example,
data stored in `char[]` on an IBM machine with EBCDIC can be
(losslessly, or with a compile-time warning) transcoded to UTF-8 at
constant-expression time in C++ with this patch, without having to use
a series of complicated architecture and platform definitions to
figure out what the encoding of string literals and wide character
literals might be.
The names are meaningful because they are tied directly to iconv,
which means there is a strong reference between the name and the code
that encodes/decodes a proper sequence. Therefore, we just present the
names.
Does this sound useful?
Sincerely,
JeanHeyd
Patch notes: the strings passed to the charset creation routines are
all static and/or allocated far at the beginning of the program and
never deallocated until the end of the invocation, so it seems safe to
just store a normal pointer to it.
2020-10-08 JeanHeyd "ThePhD" Meneide <[email protected]>
* gcc/c-family/c-cppbuiltin.c: Add predefined macro
definitions for charsets
* gcc/doc/cpp.texi: Document new predefined macro.
* gcc/testsuite/c-c++-common/cpp/wide-narrow-predef-macros.c (new):
New test for macro definitions to always exist.
* libcpp/include/cpplib.h: Add functions declarations for
retrieving charset names
* libcpp/directives.c: Add function definitions to retrieve charset
names.
* libcpp/internal.h: Add to/from name preservations
diff --git a/gcc/c-family/c-cppbuiltin.c b/gcc/c-family/c-cppbuiltin.c
index 74ecca8de8e..8de25786592 100644
--- a/gcc/c-family/c-cppbuiltin.c
+++ b/gcc/c-family/c-cppbuiltin.c
@@ -866,6 +866,13 @@ c_cpp_builtins (cpp_reader *pfile)
define_language_independent_builtin_macros (pfile);
+ /* encoding definitions used by users and libraries */
+ builtin_define_with_value ("__GNUC_EXECUTION_CHARSET_NAME",
+ cpp_get_narrow_charset_name (pfile), 1);
+ builtin_define_with_value ("__GNUC_WIDE_EXECUTION_CHARSET_NAME",
+ cpp_get_wide_charset_name (pfile), 1);
+
+
if (c_dialect_cxx ())
{
int major;
diff --git a/gcc/doc/cpp.texi b/gcc/doc/cpp.texi
index 33f876ab706..55fa5739812 100644
--- a/gcc/doc/cpp.texi
+++ b/gcc/doc/cpp.texi
@@ -2451,6 +2451,13 @@ features are supported by GCC.
@item __NO_MATH_ERRNO__
This macro is defined if @option{-fno-math-errno} is used, or enabled
by another option such as @option{-ffast-math} or by default.
+
+@item __GNUC_EXECUTION_CHARSET_NAME
+@itemx __GNUC_WIDE_EXECUTION_CHARSET_NAME
+These macros are defined to the name of the narrow and wide compile-time
+execution character set used. It directly reflects the name passed to
+the options @option{-fexec-charset} and @option{-fwide-exec-charset},
+or the defaults documented for those options. @xref{Invocation}.
@end table
@node System-specific Predefined Macros
diff --git a/gcc/testsuite/c-c++-common/cpp/wide-narrow-predef-macros.c
b/gcc/testsuite/c-c++-common/cpp/wide-narrow-predef-macros.c
new file mode 100644
index 00000000000..0f55e6915f4
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/cpp/wide-narrow-predef-macros.c
@@ -0,0 +1,10 @@
+/*
+ { dg-do compile }
+ */
+
+#if !defined(__GNUC_EXECUTION_CHARSET_NAME)
+#error "Required implementation macro for compile-time charset name is not
present"
+#endif
+#if !defined(__GNUC_WIDE_EXECUTION_CHARSET_NAME)
+#error "Required implementation macro for wide compile-time charset name is
not present"
+#endif
diff --git a/libcpp/charset.c b/libcpp/charset.c
index 28b81c9c864..3e5578b1390 100644
--- a/libcpp/charset.c
+++ b/libcpp/charset.c
@@ -638,6 +638,9 @@ init_iconv_desc (cpp_reader *pfile, const char *to, const
char *from)
char *pair;
size_t i;
+ ret.to = to;
+ ret.from = from;
+
if (!strcasecmp (to, from))
{
ret.func = convert_no_conversion;
diff --git a/libcpp/directives.c b/libcpp/directives.c
index f59718708e4..ad540872581 100644
--- a/libcpp/directives.c
+++ b/libcpp/directives.c
@@ -2571,6 +2571,20 @@ cpp_set_callbacks (cpp_reader *pfile, cpp_callbacks *cb)
pfile->cb = *cb;
}
+/* The narrow character set identifier. */
+const char *
+cpp_get_narrow_charset_name (cpp_reader *pfile)
+{
+ return pfile->narrow_cset_desc.to;
+}
+
+/* The wide character set identifier. */
+const char *
+cpp_get_wide_charset_name (cpp_reader *pfile)
+{
+ return pfile->wide_cset_desc.to;
+}
+
/* The dependencies structure. (Creates one if it hasn't already been.) */
class mkdeps *
cpp_get_deps (cpp_reader *pfile)
diff --git a/libcpp/include/cpplib.h b/libcpp/include/cpplib.h
index 8e398863cf6..69a5042d0bf 100644
--- a/libcpp/include/cpplib.h
+++ b/libcpp/include/cpplib.h
@@ -971,6 +971,11 @@ extern cpp_callbacks *cpp_get_callbacks (cpp_reader *)
ATTRIBUTE_PURE;
extern void cpp_set_callbacks (cpp_reader *, cpp_callbacks *);
extern class mkdeps *cpp_get_deps (cpp_reader *) ATTRIBUTE_PURE;
+/* Call these to get name data about the various compile-time
+ charsets. */
+extern const char *cpp_get_narrow_charset_name (cpp_reader *) ATTRIBUTE_PURE;
+extern const char *cpp_get_wide_charset_name (cpp_reader *) ATTRIBUTE_PURE;
+
/* This function reads the file, but does not start preprocessing. It
returns the name of the original file; this is the same as the
input file, except for preprocessed input. This will generate at
diff --git a/libcpp/internal.h b/libcpp/internal.h
index 4bafe1cf353..0a85766ff41 100644
--- a/libcpp/internal.h
+++ b/libcpp/internal.h
@@ -48,6 +48,8 @@ struct cset_converter
convert_f func;
iconv_t cd;
int width;
+ const char* from;
+ const char* to;
};
#define BITS_PER_CPPCHAR_T (CHAR_BIT * sizeof (cppchar_t))