@@ -249,6 +250,7 @@ static unsigned ProcessCharEscape(const char *ThisTokBegin,
case '4': case '5': case '6': case '7': {
// Octal escapes.
--ThisTokBuf;
+Translate = false;
efriedma-quic wrote:
Also handle `\o` escapes?
https://github.com/llvm/
@@ -2191,6 +2243,16 @@ void StringLiteralParser::init(ArrayRef
StringToks){
if (CopyStringFragment(StringToks[i], ThisTokBegin, BeforeCRLF))
hadError = true;
+if (!hadError && Converter) {
+ assert(Kind != tok::wide_string_literal &&
+
@@ -0,0 +1,36 @@
+//===--- clang/Lex/LiteralConverter.h - Translator for Literals -*- C++
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apa
@@ -0,0 +1,36 @@
+//===--- clang/Lex/LiteralConverter.h - Translator for Literals -*- C++
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apa
@@ -0,0 +1,36 @@
+//===--- clang/Lex/LiteralConverter.h - Translator for Literals -*- C++
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apa
@@ -633,6 +633,9 @@ class LangOptions : public LangOptionsBase {
bool AtomicFineGrainedMemory = false;
bool AtomicIgnoreDenormalMode = false;
+ /// Name of the exec charset to convert the internal charset to.
+ std::string ExecCharset;
cor3ntin wrote:
T
@@ -0,0 +1,36 @@
+//===--- clang/Lex/LiteralConverter.h - Translator for Literals -*- C++
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apa
@@ -0,0 +1,36 @@
+//===--- clang/Lex/LiteralConverter.h - Translator for Literals -*- C++
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apa
@@ -633,6 +633,9 @@ class LangOptions : public LangOptionsBase {
bool AtomicFineGrainedMemory = false;
bool AtomicIgnoreDenormalMode = false;
+ /// Name of the exec charset to convert the internal charset to.
+ std::string ExecCharset;
abhina-sree wrote:
@@ -367,6 +370,15 @@ static unsigned ProcessCharEscape(const char *ThisTokBegin,
HadError = true;
}
+ if (Translate && Converter) {
+// Invalid escapes are written as '?' and then translated.
+char ByteChar = Invalid ? '?' : ResultChar;
+SmallString<8> Resul
@@ -0,0 +1,36 @@
+//===--- clang/Lex/LiteralConverter.h - Translator for Literals -*- C++
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apa
@@ -491,6 +491,9 @@ class Triple {
/// For example, "fooos1.2.3" would return "1.2.3".
StringRef getEnvironmentVersionString() const;
+ /// getSystemCharset - Get the system charset of the triple.
+ StringRef getSystemCharset() const;
+
cor3ntin wrote:
@@ -0,0 +1,36 @@
+//===--- clang/Lex/LiteralConverter.h - Translator for Literals -*- C++
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apa
@@ -0,0 +1,36 @@
+//===--- clang/Lex/LiteralConverter.h - Translator for Literals -*- C++
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apa
@@ -416,8 +416,7 @@ Builtin Macros
``__clang_literal_encoding__``
Defined to a narrow string literal that represents the current encoding of
narrow string literals, e.g., ``"hello"``. This macro typically expands to
- "UTF-8" (but may change in the future if the
- ``-fexe
@@ -246,18 +249,19 @@ class StringLiteralParser {
StringLiteralEvalMethod EvalMethod;
public:
- StringLiteralParser(ArrayRef StringToks, Preprocessor &PP,
- StringLiteralEvalMethod StringMethod =
- StringLiteralEvalMethod::Evalu
@@ -1842,23 +1859,52 @@ CharLiteralParser::CharLiteralParser(const char *begin,
const char *end,
HadError = true;
PP.Diag(Loc, diag::err_character_too_large);
}
+ if (!HadError && Converter) {
+assert(Kind != tok::wide_cha
@@ -146,6 +144,8 @@ static unsigned ProcessCharEscape(const char *ThisTokBegin,
// that would have been \", which would not have been the end of string.
unsigned ResultChar = *ThisTokBuf++;
char Escape = ResultChar;
+ bool Translate = true;
cor3ntin wrot
@@ -1842,23 +1859,52 @@ CharLiteralParser::CharLiteralParser(const char *begin,
const char *end,
HadError = true;
PP.Diag(Loc, diag::err_character_too_large);
}
+ if (!HadError && Converter) {
+assert(Kind != tok::wide_cha
@@ -633,6 +633,9 @@ class LangOptions : public LangOptionsBase {
bool AtomicFineGrainedMemory = false;
bool AtomicIgnoreDenormalMode = false;
+ /// Name of the exec charset to convert the internal charset to.
+ std::string ExecCharset;
cor3ntin wrote:
L
cor3ntin wrote:
Thanks for working on this
I don't have time to do a full review yet, but this is going to need a lot more
tests
- Constant evaluation tests
- Preprocessor string concatenation
- Tests that we emit diagnostics for non-encodable strings
- Tests for wide strings ( It's not okay t
llvmbot wrote:
@llvm/pr-subscribers-clang
Author: Abhina Sree (abhina-sree)
Changes
This patch enables the fexec-charset option to control the execution charset of
string literals. It sets the default internal charset, system charset, and
execution charset for z/OS and UTF-8 for all othe
https://github.com/abhina-sree created
https://github.com/llvm/llvm-project/pull/138895
This patch enables the fexec-charset option to control the execution charset of
string literals. It sets the default internal charset, system charset, and
execution charset for z/OS and UTF-8 for all other
23 matches
Mail list logo