hazohelet updated this revision to Diff 546782.
hazohelet added a comment.
Address comments from Corentin
- Use default `pushEscapedString` escaping (`<U+0001>`) instead of UCN
representation `\u0001`
- Convert multi-byte characters (`wchar_t`, `char16_t`, `char32_t`) to UTF-8
and prints them.
- Added `CharToString` utility function
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D155610/new/
https://reviews.llvm.org/D155610
Files:
clang/docs/ReleaseNotes.rst
clang/lib/Sema/SemaDeclCXX.cpp
clang/test/Lexer/cxx1z-trigraphs.cpp
clang/test/SemaCXX/static-assert-cxx26.cpp
clang/test/SemaCXX/static-assert.cpp
Index: clang/test/SemaCXX/static-assert.cpp
===================================================================
--- clang/test/SemaCXX/static-assert.cpp
+++ clang/test/SemaCXX/static-assert.cpp
@@ -262,7 +262,29 @@
return 'c';
}
static_assert(getChar() == 'a', ""); // expected-error {{failed}} \
- // expected-note {{evaluates to ''c' == 'a''}}
+ // expected-note {{evaluates to ''c' (99) == 'a' (97)'}}
+ static_assert((char)9 == '\x61', ""); // expected-error {{failed}} \
+ // expected-note {{evaluates to ''\t' (9) == 'a' (97)'}}
+ static_assert((char)10 == '\0', ""); // expected-error {{failed}} \
+ // expected-note {{n' (10) == '<U+0000>' (0)'}}
+ // The note above is intended to match "evaluates to '\n' (10) == '<U+0000>' (0)'", but if we write it as it is,
+ // the "\n" cannot be consumed by the diagnostic consumer.
+ static_assert((signed char)10 == (char)-123, ""); // expected-error {{failed}} \
+ // expected-note {{evaluates to '10 == '<85>' (-123)'}}
+ static_assert((char)-4 == (unsigned char)-8, ""); // expected-error {{failed}} \
+ // expected-note {{evaluates to ''<FC>' (-4) == 248'}}
+ static_assert((char)-128 == (char)-123, ""); // expected-error {{failed}} \
+ // expected-note {{evaluates to ''<80>' (-128) == '<85>' (-123)'}}
+ static_assert('\xA0' == (char)'\x20', ""); // expected-error {{failed}} \
+ // expected-note {{evaluates to ''<A0>' (-96) == ' ' (32)'}}
+static_assert((char16_t)L'ã' == L"C̵ÌÌ¯Ì ÌÍÍ
ť̺"[1], ""); // expected-error {{failed}} \
+ // expected-note {{evaluates to 'u'ã' (12422) == L'̵' (821)'}}
+static_assert(L"ï¼¼ï¼"[1] == u'\xFFFD', ""); // expected-error {{failed}} \
+ // expected-note {{evaluates to 'L'ï¼' (65295) == u'�' (65533)'}}
+static_assert(L"â¾"[0] == U'ð', ""); // expected-error {{failed}} \
+ // expected-note {{evaluates to 'L'â¾' (9918) == U'ð' (127757)'}}
+static_assert(U"\a"[0] == (wchar_t)9, ""); // expected-error {{failed}} \
+ // expected-note {{evaluates to 'U'\a' (7) == L'\t' (9)'}}
/// Bools are printed as bools.
constexpr bool invert(bool b) {
Index: clang/test/SemaCXX/static-assert-cxx26.cpp
===================================================================
--- clang/test/SemaCXX/static-assert-cxx26.cpp
+++ clang/test/SemaCXX/static-assert-cxx26.cpp
@@ -298,3 +298,12 @@
Bad<int> b; // expected-note {{in instantiation}}
}
+
+namespace EscapeInDiagnostic {
+static_assert('\u{9}' == (char)1, ""); // expected-error {{failed}} \
+ // expected-note {{evaluates to ''\t' (9) == '<U+0001>' (1)'}}
+static_assert((char8_t)-128 == (char8_t)-123, ""); // expected-error {{failed}} \
+ // expected-note {{evaluates to 'u8'<80>' (128) == u8'<85>' (133)'}}
+static_assert((char16_t)0xFEFF == (char16_t)0xDB93, ""); // expected-error {{failed}} \
+ // expected-note {{evaluates to 'u'' (65279) == u'\xDB93' (56211)'}}
+}
Index: clang/test/Lexer/cxx1z-trigraphs.cpp
===================================================================
--- clang/test/Lexer/cxx1z-trigraphs.cpp
+++ clang/test/Lexer/cxx1z-trigraphs.cpp
@@ -21,7 +21,7 @@
#if !ENABLED_TRIGRAPHS
// expected-error@11 {{}} expected-warning@11 {{trigraph ignored}}
-// expected-error@13 {{failed}} expected-warning@13 {{trigraph ignored}} expected-note@13 {{evaluates to ''?' == '#''}}
+// expected-error@13 {{failed}} expected-warning@13 {{trigraph ignored}} expected-note@13 {{evaluates to ''?' (63) == '#' (35)'}}
// expected-error@16 {{}}
// expected-error@20 {{}}
#else
Index: clang/lib/Sema/SemaDeclCXX.cpp
===================================================================
--- clang/lib/Sema/SemaDeclCXX.cpp
+++ clang/lib/Sema/SemaDeclCXX.cpp
@@ -46,6 +46,7 @@
#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/ConvertUTF.h"
#include "llvm/Support/SaveAndRestore.h"
#include <map>
#include <optional>
@@ -16799,10 +16800,71 @@
AssertMessageExpr, RParenLoc, false);
}
+/// Convert character's code point value to a string.
+/// The code point needs to be zero-extended to 32-bits.
+void ConvertCharToString(uint32_t CodePoint, const BuiltinType *BTy,
+ unsigned TyWidth, llvm::raw_ostream &OS) {
+ char Arr[UNI_MAX_UTF8_BYTES_PER_CODE_POINT];
+ char *Ptr = Arr;
+
+ // This should catch Char_S, Char_U, Char8, and use of escaped characters in
+ // other types.
+ if (CodePoint <= UCHAR_MAX) {
+ StringRef Escaped = escapeCStyle<EscapeChar::Single>(CodePoint);
+ if (!Escaped.empty())
+ OS << Escaped;
+ else
+ OS << static_cast<char>(CodePoint);
+ return;
+ }
+
+ switch (BTy->getKind()) {
+ case BuiltinType::Char16:
+ case BuiltinType::Char32:
+ case BuiltinType::WChar_S:
+ case BuiltinType::WChar_U: {
+ if (llvm::ConvertCodePointToUTF8(CodePoint, Ptr)) {
+ for (char *I = Arr; I != Ptr; ++I)
+ OS << static_cast<char>(*I);
+ } else {
+ OS << "\\x" << llvm::format_hex_no_prefix(CodePoint, TyWidth / 4, true);
+ }
+ break;
+ }
+ default:
+ llvm_unreachable("Non-character type is passed");
+ }
+}
+
+static void PrintCharLiteralPrefix(BuiltinType::Kind BTK,
+ llvm::raw_ostream &OS) {
+ switch (BTK) {
+ case BuiltinType::Char_S:
+ case BuiltinType::Char_U:
+ break;
+ case BuiltinType::Char8:
+ OS << "u8";
+ break;
+ case BuiltinType::Char16:
+ OS << "u";
+ break;
+ case BuiltinType::Char32:
+ OS << "U";
+ break;
+ case BuiltinType::WChar_S:
+ case BuiltinType::WChar_U:
+ OS << "L";
+ break;
+ default:
+ llvm_unreachable("Non-character type is passed");
+ }
+}
+
/// Convert \V to a string we can present to the user in a diagnostic
/// \T is the type of the expression that has been evaluated into \V
static bool ConvertAPValueToString(const APValue &V, QualType T,
- SmallVectorImpl<char> &Str) {
+ SmallVectorImpl<char> &Str,
+ ASTContext &Context) {
if (!V.hasValue())
return false;
@@ -16817,13 +16879,35 @@
"Bool type, but value is not 0 or 1");
llvm::raw_svector_ostream OS(Str);
OS << (BoolValue ? "true" : "false");
- } else if (T->isCharType()) {
+ } else {
+ llvm::raw_svector_ostream OS(Str);
// Same is true for chars.
- Str.push_back('\'');
- Str.push_back(V.getInt().getExtValue());
- Str.push_back('\'');
- } else
+ // We want to print the character representation for textual types
+ const auto *BTy = T->getAs<BuiltinType>();
+ if (BTy) {
+ switch (BTy->getKind()) {
+ case BuiltinType::Char_S:
+ case BuiltinType::Char_U:
+ case BuiltinType::Char8:
+ case BuiltinType::Char16:
+ case BuiltinType::Char32:
+ case BuiltinType::WChar_S:
+ case BuiltinType::WChar_U: {
+ unsigned TyWidth = Context.getIntWidth(T);
+ assert(8 <= TyWidth && TyWidth <= 32 && "Unexpected integer width");
+ uint32_t CodePoint = static_cast<uint32_t>(V.getInt().getZExtValue());
+ PrintCharLiteralPrefix(BTy->getKind(), OS);
+ OS << '\'';
+ ConvertCharToString(CodePoint, BTy, TyWidth, OS);
+ OS << "' (" << V.getInt() << ')';
+ return true;
+ }
+ default:
+ break;
+ }
+ }
V.getInt().toString(Str);
+ }
break;
@@ -16920,8 +17004,9 @@
Side->EvaluateAsRValue(DiagSide[I].Result, Context, true);
- DiagSide[I].Print = ConvertAPValueToString(
- DiagSide[I].Result.Val, Side->getType(), DiagSide[I].ValueString);
+ DiagSide[I].Print =
+ ConvertAPValueToString(DiagSide[I].Result.Val, Side->getType(),
+ DiagSide[I].ValueString, Context);
}
if (DiagSide[0].Print && DiagSide[1].Print) {
Diag(Op->getExprLoc(), diag::note_expr_evaluates_to)
Index: clang/docs/ReleaseNotes.rst
===================================================================
--- clang/docs/ReleaseNotes.rst
+++ clang/docs/ReleaseNotes.rst
@@ -100,6 +100,12 @@
-----------------------------------
- Clang constexpr evaluator now prints template arguments when displaying
template-specialization function calls.
+- When describing the failure of static assertion, clang prints the integer
+ representation of the value as well as its character representation when
+ the user-provided expression is of character type. If the character is
+ non-printable, clang now shows the escpaed character.
+ Clang also prints multi-byte characters if the user-provided expression
+ is of multi-byte character type.
Bug Fixes in This Version
-------------------------
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits