https://github.com/js324 updated https://github.com/llvm/llvm-project/pull/86586
>From 3186b97752f6a6a9b065b5b63b78fc3025ed224b Mon Sep 17 00:00:00 2001 From: Jin S <jins...@gmail.com> Date: Mon, 25 Mar 2024 17:19:41 -0400 Subject: [PATCH 1/2] [BitInt] Expose a _BitInt literal suffix in C++ --- clang/docs/ReleaseNotes.rst | 1 + .../clang/Basic/DiagnosticCommonKinds.td | 3 + clang/include/clang/Basic/DiagnosticGroups.td | 2 + .../clang/Basic/DiagnosticParseKinds.td | 2 +- clang/include/clang/Lex/LiteralSupport.h | 3 +- clang/lib/Lex/LiteralSupport.cpp | 31 ++- clang/lib/Lex/PPExpressions.cpp | 8 +- clang/lib/Sema/SemaExpr.cpp | 12 +- clang/test/AST/bitint-suffix.cpp | 32 ++++ clang/test/Lexer/bitint-constants-compat.c | 11 +- clang/test/Lexer/bitint-constants.cpp | 177 ++++++++++++++++++ 11 files changed, 266 insertions(+), 16 deletions(-) create mode 100644 clang/test/AST/bitint-suffix.cpp create mode 100644 clang/test/Lexer/bitint-constants.cpp diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 7fbe2fec6ca065..d40c86a15ac2da 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -88,6 +88,7 @@ sections with improvements to Clang's support for those languages. C++ Language Changes -------------------- +- Implemented ``_BitInt`` literal suffixes ``__wb`` or ``__WB`` as a Clang extension with ``unsigned`` modifiers also allowed. (#GH85223). C++20 Feature Support ^^^^^^^^^^^^^^^^^^^^^ diff --git a/clang/include/clang/Basic/DiagnosticCommonKinds.td b/clang/include/clang/Basic/DiagnosticCommonKinds.td index a52bf62e24202c..0738f43ca555c8 100644 --- a/clang/include/clang/Basic/DiagnosticCommonKinds.td +++ b/clang/include/clang/Basic/DiagnosticCommonKinds.td @@ -234,6 +234,9 @@ def err_cxx23_size_t_suffix: Error< def err_size_t_literal_too_large: Error< "%select{signed |}0'size_t' literal is out of range of possible " "%select{signed |}0'size_t' values">; +def ext_cxx_bitint_suffix : Extension< + "'_BitInt' suffix for literals is a Clang extension">, + InGroup<BitIntExtension>; def ext_c23_bitint_suffix : ExtWarn< "'_BitInt' suffix for literals is a C23 extension">, InGroup<C23>; diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td index 44035e2fd16f2e..38c0c6af949f63 100644 --- a/clang/include/clang/Basic/DiagnosticGroups.td +++ b/clang/include/clang/Basic/DiagnosticGroups.td @@ -1516,3 +1516,5 @@ def UnsafeBufferUsage : DiagGroup<"unsafe-buffer-usage", [UnsafeBufferUsageInCon // Warnings and notes InstallAPI verification. def InstallAPIViolation : DiagGroup<"installapi-violation">; +// Warnings related to _BitInt extension +def BitIntExtension : DiagGroup<"bit-int-extension">; diff --git a/clang/include/clang/Basic/DiagnosticParseKinds.td b/clang/include/clang/Basic/DiagnosticParseKinds.td index 46a44418a3153b..6759f923564adf 100644 --- a/clang/include/clang/Basic/DiagnosticParseKinds.td +++ b/clang/include/clang/Basic/DiagnosticParseKinds.td @@ -1646,7 +1646,7 @@ def warn_ext_int_deprecated : Warning< "'_ExtInt' is deprecated; use '_BitInt' instead">, InGroup<DeprecatedType>; def ext_bit_int : Extension< "'_BitInt' in %select{C17 and earlier|C++}0 is a Clang extension">, - InGroup<DiagGroup<"bit-int-extension">>; + InGroup<BitIntExtension>; } // end of Parse Issue category. let CategoryName = "Modules Issue" in { diff --git a/clang/include/clang/Lex/LiteralSupport.h b/clang/include/clang/Lex/LiteralSupport.h index 643ddbdad8c87d..e7a2ccc9bb0bb3 100644 --- a/clang/include/clang/Lex/LiteralSupport.h +++ b/clang/include/clang/Lex/LiteralSupport.h @@ -80,7 +80,8 @@ class NumericLiteralParser { bool isFloat128 : 1; // 1.0q bool isFract : 1; // 1.0hr/r/lr/uhr/ur/ulr bool isAccum : 1; // 1.0hk/k/lk/uhk/uk/ulk - bool isBitInt : 1; // 1wb, 1uwb (C23) + bool isBitInt : 1; // 1wb, 1uwb (C23) or 1__wb, 1__uwb (Clang extension in C++ + // mode) uint8_t MicrosoftInteger; // Microsoft suffix extension i8, i16, i32, or i64. diff --git a/clang/lib/Lex/LiteralSupport.cpp b/clang/lib/Lex/LiteralSupport.cpp index 438c6d772e6e04..050c7ab96b0b32 100644 --- a/clang/lib/Lex/LiteralSupport.cpp +++ b/clang/lib/Lex/LiteralSupport.cpp @@ -974,6 +974,7 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling, bool isFixedPointConstant = isFixedPointLiteral(); bool isFPConstant = isFloatingLiteral(); bool HasSize = false; + bool PossibleBitInt = false; // Loop over all of the characters of the suffix. If we see something bad, // we break out of the loop. @@ -1117,6 +1118,26 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling, if (isImaginary) break; // Cannot be repeated. isImaginary = true; continue; // Success. + case '_': + if (isFPConstant) + break; // Invalid for floats + if (HasSize) + break; + if (PossibleBitInt) + break; // Cannot be repeated. + if (LangOpts.CPlusPlus && s[1] == '_') { + // Scan ahead to find possible rest of BitInt suffix + for (const char *c = s; c != ThisTokEnd; ++c) { + if (*c == 'w' || *c == 'W') { + PossibleBitInt = true; + ++s; // Skip both '_' (2nd '_' skipped on continue) + break; + } + } + if (PossibleBitInt) + continue; + } + break; case 'w': case 'W': if (isFPConstant) @@ -1127,9 +1148,9 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling, // wb and WB are allowed, but a mixture of cases like Wb or wB is not. We // explicitly do not support the suffix in C++ as an extension because a // library-based UDL that resolves to a library type may be more - // appropriate there. - if (!LangOpts.CPlusPlus && ((s[0] == 'w' && s[1] == 'b') || - (s[0] == 'W' && s[1] == 'B'))) { + // appropriate there. The same rules apply for __wb/__WB. + if ((!LangOpts.CPlusPlus || PossibleBitInt) && + ((s[0] == 'w' && s[1] == 'b') || (s[0] == 'W' && s[1] == 'B'))) { isBitInt = true; HasSize = true; ++s; // Skip both characters (2nd char skipped on continue). @@ -1241,7 +1262,9 @@ bool NumericLiteralParser::isValidUDSuffix(const LangOptions &LangOpts, return false; // By C++11 [lex.ext]p10, ud-suffixes starting with an '_' are always valid. - if (Suffix[0] == '_') + // Suffixes starting with '__' (double underscore) are for use by + // the implementation. + if (Suffix.starts_with("_") && !Suffix.starts_with("__")) return true; // In C++11, there are no library suffixes. diff --git a/clang/lib/Lex/PPExpressions.cpp b/clang/lib/Lex/PPExpressions.cpp index 8f25c67ec9dfbe..f267efabd617fd 100644 --- a/clang/lib/Lex/PPExpressions.cpp +++ b/clang/lib/Lex/PPExpressions.cpp @@ -333,11 +333,11 @@ static bool EvaluateValue(PPValue &Result, Token &PeekTok, DefinedTracker &DT, : diag::ext_cxx23_size_t_suffix : diag::err_cxx23_size_t_suffix); - // 'wb/uwb' literals are a C23 feature. We explicitly do not support the - // suffix in C++ as an extension because a library-based UDL that resolves - // to a library type may be more appropriate there. + // 'wb/uwb' literals are a C23 feature. + // '__wb/__uwb' are a C++ extension. if (Literal.isBitInt) - PP.Diag(PeekTok, PP.getLangOpts().C23 + PP.Diag(PeekTok, PP.getLangOpts().CPlusPlus ? diag::ext_cxx_bitint_suffix + : PP.getLangOpts().C23 ? diag::warn_c23_compat_bitint_suffix : diag::ext_c23_bitint_suffix); diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index 5f03b981428251..ada4214b64ec37 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -4164,11 +4164,13 @@ ExprResult Sema::ActOnNumericConstant(const Token &Tok, Scope *UDLScope) { // 'wb/uwb' literals are a C23 feature. We support _BitInt as a type in C++, // but we do not currently support the suffix in C++ mode because it's not // entirely clear whether WG21 will prefer this suffix to return a library - // type such as std::bit_int instead of returning a _BitInt. - if (Literal.isBitInt && !getLangOpts().CPlusPlus) - PP.Diag(Tok.getLocation(), getLangOpts().C23 - ? diag::warn_c23_compat_bitint_suffix - : diag::ext_c23_bitint_suffix); + // type such as std::bit_int instead of returning a _BitInt. '__wb/__uwb' + // literals are a C++ extension. + if (Literal.isBitInt) + PP.Diag(Tok.getLocation(), + getLangOpts().CPlusPlus ? diag::ext_cxx_bitint_suffix + : getLangOpts().C23 ? diag::warn_c23_compat_bitint_suffix + : diag::ext_c23_bitint_suffix); // Get the value in the widest-possible width. What is "widest" depends on // whether the literal is a bit-precise integer or not. For a bit-precise diff --git a/clang/test/AST/bitint-suffix.cpp b/clang/test/AST/bitint-suffix.cpp new file mode 100644 index 00000000000000..dab2b16c74235d --- /dev/null +++ b/clang/test/AST/bitint-suffix.cpp @@ -0,0 +1,32 @@ +// RUN: %clang_cc1 -ast-dump -Wno-unused %s | FileCheck --strict-whitespace %s + +// CHECK: FunctionDecl 0x{{[^ ]*}} <{{.*}}:[[@LINE+1]]:1, line:{{[0-9]*}}:1> line:[[@LINE+1]]:6 func 'void ()' +void func() { + // Ensure that we calculate the correct type from the literal suffix. + + // Note: 0__wb should create an _BitInt(2) because a signed bit-precise + // integer requires one bit for the sign and one bit for the value, + // at a minimum. + // CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:29> col:29 zero_wb 'typeof (0wb)':'_BitInt(2)' + typedef __typeof__(0__wb) zero_wb; + // CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:30> col:30 neg_zero_wb 'typeof (-0wb)':'_BitInt(2)' + typedef __typeof__(-0__wb) neg_zero_wb; + // CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:29> col:29 one_wb 'typeof (1wb)':'_BitInt(2)' + typedef __typeof__(1__wb) one_wb; + // CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:30> col:30 neg_one_wb 'typeof (-1wb)':'_BitInt(2)' + typedef __typeof__(-1__wb) neg_one_wb; + + // CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:30> col:30 zero_uwb 'typeof (0uwb)':'unsigned _BitInt(1)' + typedef __typeof__(0__uwb) zero_uwb; + // CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:31> col:31 neg_zero_uwb 'typeof (-0uwb)':'unsigned _BitInt(1)' + typedef __typeof__(-0__uwb) neg_zero_uwb; + // CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:30> col:30 one_uwb 'typeof (1uwb)':'unsigned _BitInt(1)' + typedef __typeof__(1__uwb) one_uwb; + + // Try a value that is too large to fit in [u]intmax_t. + + // CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:49> col:49 huge_uwb 'typeof (18446744073709551616uwb)':'unsigned _BitInt(65)' + typedef __typeof__(18446744073709551616__uwb) huge_uwb; + // CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:48> col:48 huge_wb 'typeof (18446744073709551616wb)':'_BitInt(66)' + typedef __typeof__(18446744073709551616__wb) huge_wb; +} diff --git a/clang/test/Lexer/bitint-constants-compat.c b/clang/test/Lexer/bitint-constants-compat.c index 607ae88a6188bb..d8bff94ef88caa 100644 --- a/clang/test/Lexer/bitint-constants-compat.c +++ b/clang/test/Lexer/bitint-constants-compat.c @@ -1,14 +1,23 @@ // RUN: %clang_cc1 -std=c17 -fsyntax-only -verify=ext -Wno-unused %s // RUN: %clang_cc1 -std=c2x -fsyntax-only -verify=compat -Wpre-c2x-compat -Wno-unused %s -// RUN: %clang_cc1 -fsyntax-only -verify=cpp -Wno-unused -x c++ %s +// RUN: %clang_cc1 -fsyntax-only -verify=cpp -Wbit-int-extension -Wno-unused -x c++ %s #if 18446744073709551615uwb // ext-warning {{'_BitInt' suffix for literals is a C23 extension}} \ compat-warning {{'_BitInt' suffix for literals is incompatible with C standards before C23}} \ cpp-error {{invalid suffix 'uwb' on integer constant}} #endif +#if 18446744073709551615__uwb // ext-error {{invalid suffix '__uwb' on integer constant}} \ + compat-error {{invalid suffix '__uwb' on integer constant}} \ + cpp-warning {{'_BitInt' suffix for literals is a Clang extension}} +#endif + void func(void) { 18446744073709551615wb; // ext-warning {{'_BitInt' suffix for literals is a C23 extension}} \ compat-warning {{'_BitInt' suffix for literals is incompatible with C standards before C23}} \ cpp-error {{invalid suffix 'wb' on integer constant}} + + 18446744073709551615__wb; // ext-error {{invalid suffix '__wb' on integer constant}} \ + compat-error {{invalid suffix '__wb' on integer constant}} \ + cpp-warning {{'_BitInt' suffix for literals is a Clang extension}} } diff --git a/clang/test/Lexer/bitint-constants.cpp b/clang/test/Lexer/bitint-constants.cpp new file mode 100644 index 00000000000000..5a66024d26b0af --- /dev/null +++ b/clang/test/Lexer/bitint-constants.cpp @@ -0,0 +1,177 @@ +// RUN: %clang_cc1 -triple aarch64-unknown-unknown -fsyntax-only -verify -Wno-unused %s + +// Test that the preprocessor behavior makes sense. +#if 1__wb != 1 +#error "wb suffix must be recognized by preprocessor" +#endif +#if 1__uwb != 1 +#error "uwb suffix must be recognized by preprocessor" +#endif +#if !(-1__wb < 0) +#error "wb suffix must be interpreted as signed" +#endif +#if !(-1__uwb > 0) +#error "uwb suffix must be interpreted as unsigned" +#endif + +#if 18446744073709551615__uwb != 18446744073709551615ULL +#error "expected the max value for uintmax_t to compare equal" +#endif + +// Test that the preprocessor gives appropriate diagnostics when the +// literal value is larger than what can be stored in a [u]intmax_t. +#if 18446744073709551616__wb != 0ULL // expected-error {{integer literal is too large to be represented in any integer type}} +#error "never expected to get here due to error" +#endif +#if 18446744073709551616__uwb != 0ULL // expected-error {{integer literal is too large to be represented in any integer type}} +#error "never expected to get here due to error" +#endif + +// Despite using a bit-precise integer, this is expected to overflow +// because all preprocessor arithmetic is done in [u]intmax_t, so this +// should result in the value 0. +#if 18446744073709551615__uwb + 1 != 0ULL +#error "expected modulo arithmetic with uintmax_t width" +#endif + +// Because this bit-precise integer is signed, it will also overflow, +// but Clang handles that by converting to uintmax_t instead of +// intmax_t. +#if 18446744073709551615__wb + 1 != 0LL // expected-warning {{integer literal is too large to be represented in a signed integer type, interpreting as unsigned}} +#error "expected modulo arithmetic with uintmax_t width" +#endif + +// Test that just because the preprocessor can't figure out the bit +// width doesn't mean we can't form the constant, it just means we +// can't use the value in a preprocessor conditional. +unsigned _BitInt(65) Val = 18446744073709551616__uwb; +// UDL test to make sure underscore parsing is correct +unsigned operator ""_(const char *); + +void ValidSuffix(void) { + // Decimal literals. + 1__wb; + 1__WB; + -1__wb; + _Static_assert((int)1__wb == 1, "not 1?"); + _Static_assert((int)-1__wb == -1, "not -1?"); + + 1__uwb; + 1__uWB; + 1__Uwb; + 1__UWB; + 1u__wb; + 1__WBu; + 1U__WB; + _Static_assert((unsigned int)1__uwb == 1u, "not 1?"); + + 1'2__wb; + 1'2__uwb; + _Static_assert((int)1'2__wb == 12, "not 12?"); + _Static_assert((unsigned int)1'2__uwb == 12u, "not 12?"); + + // Hexadecimal literals. + 0x1__wb; + 0x1__uwb; + 0x0'1'2'3__wb; + 0xA'B'c'd__uwb; + _Static_assert((int)0x0'1'2'3__wb == 0x0123, "not 0x0123"); + _Static_assert((unsigned int)0xA'B'c'd__uwb == 0xABCDu, "not 0xABCD"); + + // Binary literals. + 0b1__wb; + 0b1__uwb; + 0b1'0'1'0'0'1__wb; + 0b0'1'0'1'1'0__uwb; + _Static_assert((int)0b1__wb == 1, "not 1?"); + _Static_assert((unsigned int)0b1__uwb == 1u, "not 1?"); + + // Octal literals. + 01__wb; + 01__uwb; + 0'6'0__wb; + 0'0'1__uwb; + 0__wbu; + 0__WBu; + 0U__wb; + 0U__WB; + 0__wb; + _Static_assert((int)0__wb == 0, "not 0?"); + _Static_assert((unsigned int)0__wbu == 0u, "not 0?"); + + // Imaginary or Complex. These are allowed because _Complex can work with any + // integer type, and that includes _BitInt. + 1__iwb; + 1i__wb; + 1__wbj; + + //UDL test as single underscore + unsigned i = 1.0_; +} + +void InvalidSuffix(void) { + // Can't mix the case of wb or WB, and can't rearrange the letters. + 0__wB; // expected-error {{invalid suffix '__wB' on integer constant}} + 0__Wb; // expected-error {{invalid suffix '__Wb' on integer constant}} + 0__bw; // expected-error {{invalid suffix '__bw' on integer constant}} + 0__BW; // expected-error {{invalid suffix '__BW' on integer constant}} + + // Trailing digit separators should still diagnose. + 1'2'__wb; // expected-error {{digit separator cannot appear at end of digit sequence}} + 1'2'__uwb; // expected-error {{digit separator cannot appear at end of digit sequence}} + + // Long. + 1l__wb; // expected-error {{invalid suffix}} + 1__wbl; // expected-error {{invalid suffix}} + 1l__uwb; // expected-error {{invalid suffix}} + 1__l; // expected-error {{invalid suffix}} + 1ul__wb; // expected-error {{invalid suffix}} + + // Long long. + 1ll__wb; // expected-error {{invalid suffix}} + 1__uwbll; // expected-error {{invalid suffix}} + + // Floating point. + 0.1__wb; // expected-error {{invalid suffix}} + 0.1f__wb; // expected-error {{invalid suffix}} + + // Repetitive suffix. + 1__wb__wb; // expected-error {{invalid suffix}} + 1__uwbuwb; // expected-error {{invalid suffix}} + 1__wbuwb; // expected-error {{invalid suffix}} + 1__uwbwb; // expected-error {{invalid suffix}} + + // Missing or extra characters in suffix. + 1__; // expected-error {{invalid suffix}} + 1___; // expected-error {{invalid suffix}} + 1___WB; // expected-error {{invalid suffix}} + 1__wb__; // expected-error {{invalid suffix}} + 1__w; // expected-error {{invalid suffix}} + 1__b; // expected-error {{invalid suffix}} +} + +void ValidSuffixInvalidValue(void) { + // This is a valid suffix, but the value is larger than one that fits within + // the width of BITINT_MAXWIDTH. When this value changes in the future, the + // test cases should pick a new value that can't be represented by a _BitInt, + // but also add a test case that a 129-bit literal still behaves as-expected. + _Static_assert(__BITINT_MAXWIDTH__ <= 128, + "Need to pick a bigger constant for the test case below."); + 0xFFFF'FFFF'FFFF'FFFF'FFFF'FFFF'FFFF'FFFF'1__wb; // expected-error {{integer literal is too large to be represented in any signed integer type}} + 0xFFFF'FFFF'FFFF'FFFF'FFFF'FFFF'FFFF'FFFF'1__uwb; // expected-error {{integer literal is too large to be represented in any integer type}} +} + +void TestTypes(void) { + // 2 value bits, one sign bit + _Static_assert(__is_same(decltype(3__wb), _BitInt(3))); + // 2 value bits, one sign bit + _Static_assert(__is_same(decltype(-3__wb), _BitInt(3))); + // 2 value bits, no sign bit + _Static_assert(__is_same(decltype(3__uwb), unsigned _BitInt(2))); + // 4 value bits, one sign bit + _Static_assert(__is_same(decltype(0xF__wb), _BitInt(5))); + // 4 value bits, one sign bit + _Static_assert(__is_same(decltype(-0xF__wb), _BitInt(5))); + // 4 value bits, no sign bit + _Static_assert(__is_same(decltype(0xF__uwb), unsigned _BitInt(4))); +} >From d40fd45aa86ed9b47d91f9ff84f47df350d37624 Mon Sep 17 00:00:00 2001 From: Jin S <jins...@gmail.com> Date: Thu, 28 Mar 2024 00:35:08 -0400 Subject: [PATCH 2/2] remove whitespace --- clang/include/clang/Basic/DiagnosticGroups.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td index 38c0c6af949f63..37f56ed6289d27 100644 --- a/clang/include/clang/Basic/DiagnosticGroups.td +++ b/clang/include/clang/Basic/DiagnosticGroups.td @@ -1516,5 +1516,5 @@ def UnsafeBufferUsage : DiagGroup<"unsafe-buffer-usage", [UnsafeBufferUsageInCon // Warnings and notes InstallAPI verification. def InstallAPIViolation : DiagGroup<"installapi-violation">; -// Warnings related to _BitInt extension +// Warnings related to _BitInt extension def BitIntExtension : DiagGroup<"bit-int-extension">; _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits