[clang] Lex: add support for `i128` and `ui128` suffixes (PR #130993)

Saleem Abdulrasool via cfe-commits Thu, 13 Mar 2025 10:26:52 -0700

https://github.com/compnerd updated 
https://github.com/llvm/llvm-project/pull/130993


>From 1021d05a73cb990bcbdd1948fd372fdf4b1a21ec Mon Sep 17 00:00:00 2001
From: Saleem Abdulrasool <compn...@compnerd.org>
Date: Wed, 12 Mar 2025 09:52:58 -0700
Subject: [PATCH] Lex: add support for `i128` and `ui128` suffixes

Microsoft's compiler supports an extension for 128-bit literals. This is
referenced in `intsafe.h` which is included transitievly. When building
with modules, the literal parsing causes a failure due to the missing
support for the extension. To alleviate this issue, support parsing this
literal, especially now that there is the BitInt extension.

Take the opportunity to tighten up the code slightly by ensuring that we
do not access out-of-bounds characters when lexing the token.
---
 clang/docs/ReleaseNotes.rst              |  4 ++++
 clang/include/clang/Lex/LiteralSupport.h |  4 ++--
 clang/lib/Lex/LiteralSupport.cpp         | 14 +++++++++-----
 clang/lib/Sema/SemaExpr.cpp              | 16 ++++++++++++----
 clang/test/Lexer/ms-extensions.c         | 18 ++++++++++++++++--
 5 files changed, 43 insertions(+), 13 deletions(-)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 8989124611e66..86495e80eb188 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -363,6 +363,10 @@ Windows Support
   which makes ``offsetof`` provided by Microsoft's ``<stddef.h>`` to be defined
   correctly. (#GH59689)
 
+- Clang now can process the `i128` and `ui128` integeral suffixes when MSVC
+  extensions are enabled. This allows for properly processing ``intsafe.h`` in
+  the Windows SDK.
+
 LoongArch Support
 ^^^^^^^^^^^^^^^^^
 
diff --git a/clang/include/clang/Lex/LiteralSupport.h 
b/clang/include/clang/Lex/LiteralSupport.h
index 705021fcfa5b1..ea5f63bc20399 100644
--- a/clang/include/clang/Lex/LiteralSupport.h
+++ b/clang/include/clang/Lex/LiteralSupport.h
@@ -82,8 +82,8 @@ class NumericLiteralParser {
   bool isAccum : 1;         // 1.0hk/k/lk/uhk/uk/ulk
   bool isBitInt : 1;        // 1wb, 1uwb (C23) or 1__wb, 1__uwb (Clang 
extension in C++
                             // mode)
-  uint8_t MicrosoftInteger; // Microsoft suffix extension i8, i16, i32, or i64.
-
+  uint8_t MicrosoftInteger; // Microsoft suffix extension i8, i16, i32, i64, or
+                            // i128.
 
   bool isFixedPointLiteral() const {
     return (saw_period || saw_exponent) && saw_fixed_point_suffix;
diff --git a/clang/lib/Lex/LiteralSupport.cpp b/clang/lib/Lex/LiteralSupport.cpp
index 225a6c2d15baa..69dc057d0df4b 100644
--- a/clang/lib/Lex/LiteralSupport.cpp
+++ b/clang/lib/Lex/LiteralSupport.cpp
@@ -1073,8 +1073,8 @@ NumericLiteralParser::NumericLiteralParser(StringRef 
TokSpelling,
       continue;
     case 'i':
     case 'I':
-      if (LangOpts.MicrosoftExt && !isFPConstant) {
-        // Allow i8, i16, i32, and i64. First, look ahead and check if
+      if (LangOpts.MicrosoftExt && s + 1 < ThisTokEnd && !isFPConstant) {
+        // Allow i8, i16, i32, i64, and i128. First, look ahead and check if
         // suffixes are Microsoft integers and not the imaginary unit.
         uint8_t Bits = 0;
         size_t ToSkip = 0;
@@ -1084,19 +1084,23 @@ NumericLiteralParser::NumericLiteralParser(StringRef 
TokSpelling,
           ToSkip = 2;
           break;
         case '1':
-          if (s[2] == '6') { // i16 suffix
+          if (s + 2 < ThisTokEnd && s[2] == '6') { // i16 suffix
             Bits = 16;
             ToSkip = 3;
+          } else if (s + 3 < ThisTokEnd && s[2] == '2' &&
+                     s[3] == '8') { // i128 suffix
+            Bits = 128;
+            ToSkip = 4;
           }
           break;
         case '3':
-          if (s[2] == '2') { // i32 suffix
+          if (s + 2 < ThisTokEnd && s[2] == '2') { // i32 suffix
             Bits = 32;
             ToSkip = 3;
           }
           break;
         case '6':
-          if (s[2] == '4') { // i64 suffix
+          if (s + 2 < ThisTokEnd && s[2] == '4') { // i64 suffix
             Bits = 64;
             ToSkip = 3;
           }
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 7d4d3cc24e539..e19136b394800 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -3924,10 +3924,18 @@ ExprResult Sema::ActOnNumericConstant(const Token &Tok, 
Scope *UDLScope) {
     // to get the integer value from an overly-wide APInt is *extremely*
     // expensive, so the naive approach of assuming
     // llvm::IntegerType::MAX_INT_BITS is a big performance hit.
-    unsigned BitsNeeded =
-        Literal.isBitInt ? llvm::APInt::getSufficientBitsNeeded(
-                               Literal.getLiteralDigits(), Literal.getRadix())
-                         : Context.getTargetInfo().getIntMaxTWidth();
+    unsigned BitsNeeded = Context.getTargetInfo().getIntMaxTWidth();
+    if (Literal.isBitInt)
+      BitsNeeded = llvm::APInt::getSufficientBitsNeeded(
+          Literal.getLiteralDigits(), Literal.getRadix());
+    if (Literal.MicrosoftInteger) {
+      if (Literal.MicrosoftInteger == 128 &&
+          !Context.getTargetInfo().hasInt128Type())
+        PP.Diag(Tok.getLocation(), diag::err_integer_literal_too_large)
+            << Literal.isUnsigned;
+      BitsNeeded = Literal.MicrosoftInteger;
+    }
+
     llvm::APInt ResultVal(BitsNeeded, 0);
 
     if (Literal.GetIntegerValue(ResultVal)) {
diff --git a/clang/test/Lexer/ms-extensions.c b/clang/test/Lexer/ms-extensions.c
index f1eed337b8737..d1885a5696bce 100644
--- a/clang/test/Lexer/ms-extensions.c
+++ b/clang/test/Lexer/ms-extensions.c
@@ -13,16 +13,30 @@ __int64 w = 0x43ui64;
 __int64 z = 9Li64;  // expected-error {{invalid suffix}}
 __int64 q = 10lli64;  // expected-error {{invalid suffix}}
 
-__complex double c1 = 1i;
-__complex double c2 = 1.0i;
+__complex double c1 = 1i; // GNU extension
+__complex double c2 = 1.0i; // GNU extension
 __complex float c3 = 1.0if;
 
+#define UINT128_MAX 0xffffffffffffffffffffffffffffffffui128
 #define ULLONG_MAX 0xffffffffffffffffui64
 #define UINT 0xffffffffui32
 #define USHORT 0xffffui16
 #define UCHAR 0xffui8
 
+#define INT128_MAX 170141183460469231731687303715884105727i128
+
 void a(void) {
+#if __SIZEOF_INT128__
+        __int128 j = UINT128_MAX;
+        __int128 k = INT128_MAX;
+#else
+        int j = UINT128_MAX;
+        // expected-warning@-1{{implicit conversion from 'unsigned __int128' 
to 'int' changes value from 340282366920938463463374607431768211455 to -1}}
+        // expected-error@-2{{integer literal is too large to be represented 
in any integer type}}
+        int k = INT128_MAX;
+        // expected-warning@-1{{implicit conversion from '__int128' to 'int' 
changes value from 170141183460469231731687303715884105727 to -1}}
+        // expected-error@-2{{integer literal is too large to be represented 
in any signed integer type}}
+#endif
        unsigned long long m = ULLONG_MAX;
        unsigned int n = UINT;
         unsigned short s = USHORT;

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] Lex: add support for `i128` and `ui128` suffixes (PR #130993)

Reply via email to