[clang] [Clang] [Lexer] Detect SSE4.2 availability at runtime in fastParseASCIIIdentifier (PR #171914)

Thibault Monnier via cfe-commits Sun, 28 Dec 2025 08:44:37 -0800

https://github.com/Thibault-Monnier updated 
https://github.com/llvm/llvm-project/pull/171914


>From 4fc9a07698e1a4627a050ba6fa9df3f1f8725451 Mon Sep 17 00:00:00 2001
From: Thibault-Monnier <[email protected]>
Date: Thu, 11 Dec 2025 22:02:35 +0100
Subject: [PATCH 1/6] Detect sse4.2 availability at runtime to use it on modern
 processors

---
 clang/lib/Lex/Lexer.cpp | 35 ++++++++++++++++++++++++++---------
 1 file changed, 26 insertions(+), 9 deletions(-)

diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp
index b282a600c0e56..3b8fa0b9b7f36 100644
--- a/clang/lib/Lex/Lexer.cpp
+++ b/clang/lib/Lex/Lexer.cpp
@@ -46,9 +46,7 @@
 #include <string>
 #include <tuple>
 
-#ifdef __SSE4_2__
 #include <nmmintrin.h>
-#endif
 
 using namespace clang;
 
@@ -1921,9 +1919,17 @@ bool Lexer::LexUnicodeIdentifierStart(Token &Result, 
uint32_t C,
 }
 
 static const char *
-fastParseASCIIIdentifier(const char *CurPtr,
-                         [[maybe_unused]] const char *BufferEnd) {
-#ifdef __SSE4_2__
+fastParseASCIIIdentifierScalar(const char *CurPtr,
+                               [[maybe_unused]] const char *BufferEnd) {
+  unsigned char C = *CurPtr;
+  while (isAsciiIdentifierContinue(C))
+    C = *++CurPtr;
+  return CurPtr;
+}
+
+__attribute__((target("sse4.2"))) static const char *
+fastParseASCIIIdentifierSSE42(const char *CurPtr,
+                              [[maybe_unused]] const char *BufferEnd) {
   alignas(16) static constexpr char AsciiIdentifierRange[16] = {
       '_', '_', 'A', 'Z', 'a', 'z', '0', '9',
   };
@@ -1943,12 +1949,23 @@ fastParseASCIIIdentifier(const char *CurPtr,
       continue;
     return CurPtr;
   }
+
+  return fastParseASCIIIdentifierScalar(CurPtr, BufferEnd);
+}
+
+static bool supportsSSE42() {
+  static bool SupportsSSE42 = __builtin_cpu_supports("sse4.2");
+  return SupportsSSE42;
+}
+
+static const char *fastParseASCIIIdentifier(const char *CurPtr,
+                                            const char *BufferEnd) {
+#ifndef __SSE4_2__
+  if (LLVM_UNLIKELY(!supportsSSE42()))
+    return fastParseASCIIIdentifierScalar(CurPtr, BufferEnd);
 #endif
 
-  unsigned char C = *CurPtr;
-  while (isAsciiIdentifierContinue(C))
-    C = *++CurPtr;
-  return CurPtr;
+  return fastParseASCIIIdentifierSSE42(CurPtr, BufferEnd);
 }
 
 bool Lexer::LexIdentifierContinue(Token &Result, const char *CurPtr) {

>From ce3bf515e7a60bd58ff5871352979999f5864b4b Mon Sep 17 00:00:00 2001
From: Thibault-Monnier <[email protected]>
Date: Thu, 11 Dec 2025 23:15:40 +0100
Subject: [PATCH 2/6] Only on x86

---
 clang/lib/Lex/Lexer.cpp | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp
index 3b8fa0b9b7f36..c195237dae1f4 100644
--- a/clang/lib/Lex/Lexer.cpp
+++ b/clang/lib/Lex/Lexer.cpp
@@ -46,7 +46,9 @@
 #include <string>
 #include <tuple>
 
+#if defined(__i386__) || defined(__x86_64__)
 #include <nmmintrin.h>
+#endif
 
 using namespace clang;
 
@@ -1927,6 +1929,8 @@ fastParseASCIIIdentifierScalar(const char *CurPtr,
   return CurPtr;
 }
 
+#if defined(__i386__) || defined(__x86_64__)
+
 __attribute__((target("sse4.2"))) static const char *
 fastParseASCIIIdentifierSSE42(const char *CurPtr,
                               [[maybe_unused]] const char *BufferEnd) {
@@ -1958,14 +1962,22 @@ static bool supportsSSE42() {
   return SupportsSSE42;
 }
 
+#endif
+
 static const char *fastParseASCIIIdentifier(const char *CurPtr,
                                             const char *BufferEnd) {
+#if !defined(__i386__) && !defined(__x86_64__)
+  return fastParseASCIIIdentifierScalar(CurPtr, BufferEnd);
+#else
+
 #ifndef __SSE4_2__
   if (LLVM_UNLIKELY(!supportsSSE42()))
     return fastParseASCIIIdentifierScalar(CurPtr, BufferEnd);
 #endif
 
   return fastParseASCIIIdentifierSSE42(CurPtr, BufferEnd);
+
+#endif
 }
 
 bool Lexer::LexIdentifierContinue(Token &Result, const char *CurPtr) {

>From 2109fdd371822ec77f870c5edbbdfccaaa7615be Mon Sep 17 00:00:00 2001
From: Thibault-Monnier <[email protected]>
Date: Sun, 14 Dec 2025 11:32:30 +0100
Subject: [PATCH 3/6] Not on windows

---
 clang/lib/Lex/Lexer.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp
index c195237dae1f4..86cfb47ca84d5 100644
--- a/clang/lib/Lex/Lexer.cpp
+++ b/clang/lib/Lex/Lexer.cpp
@@ -36,6 +36,7 @@
 #include "llvm/Support/NativeFormatting.h"
 #include "llvm/Support/Unicode.h"
 #include "llvm/Support/UnicodeCharRanges.h"
+
 #include <algorithm>
 #include <cassert>
 #include <cstddef>
@@ -1929,7 +1930,7 @@ fastParseASCIIIdentifierScalar(const char *CurPtr,
   return CurPtr;
 }
 
-#if defined(__i386__) || defined(__x86_64__)
+#if defined(__i386__) || defined(__x86_64__) && !defined(_WIN32)
 
 __attribute__((target("sse4.2"))) static const char *
 fastParseASCIIIdentifierSSE42(const char *CurPtr,
@@ -1966,7 +1967,7 @@ static bool supportsSSE42() {
 
 static const char *fastParseASCIIIdentifier(const char *CurPtr,
                                             const char *BufferEnd) {
-#if !defined(__i386__) && !defined(__x86_64__)
+#if !defined(__i386__) && !defined(__x86_64__) || defined(_WIN32)
   return fastParseASCIIIdentifierScalar(CurPtr, BufferEnd);
 #else
 

>From d5485438edd460892bf210916827e0d92fc24065 Mon Sep 17 00:00:00 2001
From: Thibault-Monnier <[email protected]>
Date: Sun, 14 Dec 2025 14:37:43 +0100
Subject: [PATCH 4/6] Address comments

---
 clang/lib/Lex/Lexer.cpp | 26 ++++++++++----------------
 1 file changed, 10 insertions(+), 16 deletions(-)

diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp
index 86cfb47ca84d5..470579df233d1 100644
--- a/clang/lib/Lex/Lexer.cpp
+++ b/clang/lib/Lex/Lexer.cpp
@@ -1930,7 +1930,12 @@ fastParseASCIIIdentifierScalar(const char *CurPtr,
   return CurPtr;
 }
 
-#if defined(__i386__) || defined(__x86_64__) && !defined(_WIN32)
+// Fast path for lexing ASCII identifiers using SSE4.2 instructions.
+// Only enabled on x86/x86_64 when building with a compiler that supports
+// the 'target' attribute, which is used for runtime dispatch. Otherwise, we
+// fall back to the scalar implementation.
+#if (defined(__i386__) || defined(__x86_64__)) && defined(__has_attribute) &&  
\
+    __has_attribute(target)
 
 __attribute__((target("sse4.2"))) static const char *
 fastParseASCIIIdentifierSSE42(const char *CurPtr,
@@ -1958,27 +1963,16 @@ fastParseASCIIIdentifierSSE42(const char *CurPtr,
   return fastParseASCIIIdentifierScalar(CurPtr, BufferEnd);
 }
 
-static bool supportsSSE42() {
-  static bool SupportsSSE42 = __builtin_cpu_supports("sse4.2");
-  return SupportsSSE42;
+__attribute__((target("sse4.2"))) static const char *
+fastParseASCIIIdentifier(const char *CurPtr, const char *BufferEnd) {
+  return fastParseASCIIIdentifierSSE42(CurPtr, BufferEnd);
 }
 
+__attribute__((target("default")))
 #endif
-
 static const char *fastParseASCIIIdentifier(const char *CurPtr,
                                             const char *BufferEnd) {
-#if !defined(__i386__) && !defined(__x86_64__) || defined(_WIN32)
   return fastParseASCIIIdentifierScalar(CurPtr, BufferEnd);
-#else
-
-#ifndef __SSE4_2__
-  if (LLVM_UNLIKELY(!supportsSSE42()))
-    return fastParseASCIIIdentifierScalar(CurPtr, BufferEnd);
-#endif
-
-  return fastParseASCIIIdentifierSSE42(CurPtr, BufferEnd);
-
-#endif
 }
 
 bool Lexer::LexIdentifierContinue(Token &Result, const char *CurPtr) {

>From 82cf41e460d2fa1105e9abbf925837fa9e9c7b45 Mon Sep 17 00:00:00 2001
From: Thibault-Monnier <[email protected]>
Date: Sun, 14 Dec 2025 19:32:29 +0100
Subject: [PATCH 5/6] Not on MSVC

---
 clang/lib/Lex/Lexer.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp
index 470579df233d1..58cd9348d3027 100644
--- a/clang/lib/Lex/Lexer.cpp
+++ b/clang/lib/Lex/Lexer.cpp
@@ -1935,7 +1935,7 @@ fastParseASCIIIdentifierScalar(const char *CurPtr,
 // the 'target' attribute, which is used for runtime dispatch. Otherwise, we
 // fall back to the scalar implementation.
 #if (defined(__i386__) || defined(__x86_64__)) && defined(__has_attribute) &&  
\
-    __has_attribute(target)
+    __has_attribute(target) && !defined(_MSC_VER)
 
 __attribute__((target("sse4.2"))) static const char *
 fastParseASCIIIdentifierSSE42(const char *CurPtr,

>From 3e1428de60068729a17e5e3cca3942aa175ab975 Mon Sep 17 00:00:00 2001
From: Thibault-Monnier <[email protected]>
Date: Sun, 28 Dec 2025 17:42:50 +0100
Subject: [PATCH 6/6] Clean up

---
 clang/lib/Lex/Lexer.cpp | 720 +++++++++++++++++++++-------------------
 1 file changed, 383 insertions(+), 337 deletions(-)

diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp
index 58cd9348d3027..7d3731812cf68 100644
--- a/clang/lib/Lex/Lexer.cpp
+++ b/clang/lib/Lex/Lexer.cpp
@@ -106,6 +106,7 @@ bool Token::isSimpleTypeSpecifier(const LangOptions 
&LangOpts) const {
   case tok::kw__Sat:
 #define TRANSFORM_TYPE_TRAIT_DEF(_, Trait) case tok::kw___##Trait:
 #include "clang/Basic/TransformTypeTraits.def"
+
   case tok::kw___auto_type:
   case tok::kw_char16_t:
   case tok::kw_char32_t:
@@ -142,8 +143,8 @@ void Lexer::InitLexer(const char *BufStart, const char 
*BufPtr,
     // Determine the size of the BOM.
     StringRef Buf(BufferStart, BufferEnd - BufferStart);
     size_t BOMLength = llvm::StringSwitch<size_t>(Buf)
-      .StartsWith("\xEF\xBB\xBF", 3) // UTF-8 BOM
-      .Default(0);
+                           .StartsWith("\xEF\xBB\xBF", 3) // UTF-8 BOM
+                           .Default(0);
 
     // Skip the BOM.
     BufferPtr += BOMLength;
@@ -257,14 +258,14 @@ Lexer *Lexer::Create_PragmaLexer(SourceLocation 
SpellingLoc,
   const char *StrData = SM.getCharacterData(SpellingLoc);
 
   L->BufferPtr = StrData;
-  L->BufferEnd = StrData+TokLen;
+  L->BufferEnd = StrData + TokLen;
   assert(L->BufferEnd[0] == 0 && "Buffer is not nul terminated!");
 
   // Set the SourceLocation with the remapping information.  This ensures that
   // GetMappedTokenLoc will remap the tokens as they are lexed.
-  L->FileLoc = SM.createExpansionLoc(SM.getLocForStartOfFile(SpellingFID),
-                                     ExpansionLocStart,
-                                     ExpansionLocEnd, TokLen);
+  L->FileLoc =
+      SM.createExpansionLoc(SM.getLocForStartOfFile(SpellingFID),
+                            ExpansionLocStart, ExpansionLocEnd, TokLen);
 
   // Ensure that the lexer thinks it is inside a directive, so that end \n will
   // return an EOD token.
@@ -343,12 +344,14 @@ static size_t getSpellingSlow(const Token &Tok, const 
char *BufPtr,
     // Raw string literals need special handling; trigraph expansion and line
     // splicing do not occur within their d-char-sequence nor within their
     // r-char-sequence.
-    if (Length >= 2 &&
-        Spelling[Length - 2] == 'R' && Spelling[Length - 1] == '"') {
+    if (Length >= 2 && Spelling[Length - 2] == 'R' &&
+        Spelling[Length - 1] == '"') {
       // Search backwards from the end of the token to find the matching 
closing
       // quote.
       const char *RawEnd = BufEnd;
-      do --RawEnd; while (*RawEnd != '"');
+      do
+        --RawEnd;
+      while (*RawEnd != '"');
       size_t RawLength = RawEnd - BufPtr + 1;
 
       // Everything between the quotes is included verbatim in the spelling.
@@ -376,11 +379,9 @@ static size_t getSpellingSlow(const Token &Tok, const char 
*BufPtr,
 /// after trigraph expansion and escaped-newline folding.  In particular, this
 /// wants to get the true, uncanonicalized, spelling of things like digraphs
 /// UCNs, etc.
-StringRef Lexer::getSpelling(SourceLocation loc,
-                             SmallVectorImpl<char> &buffer,
+StringRef Lexer::getSpelling(SourceLocation loc, SmallVectorImpl<char> &buffer,
                              const SourceManager &SM,
-                             const LangOptions &options,
-                             bool *invalid) {
+                             const LangOptions &options, bool *invalid) {
   // Break down the source location.
   FileIDAndOffset locInfo = SM.getDecomposedLoc(loc);
 
@@ -388,15 +389,16 @@ StringRef Lexer::getSpelling(SourceLocation loc,
   bool invalidTemp = false;
   StringRef file = SM.getBufferData(locInfo.first, &invalidTemp);
   if (invalidTemp) {
-    if (invalid) *invalid = true;
+    if (invalid)
+      *invalid = true;
     return {};
   }
 
   const char *tokenBegin = file.data() + locInfo.second;
 
   // Lex from the start of the given location.
-  Lexer lexer(SM.getLocForStartOfFile(locInfo.first), options,
-              file.begin(), tokenBegin, file.end());
+  Lexer lexer(SM.getLocForStartOfFile(locInfo.first), options, file.begin(),
+              tokenBegin, file.end());
   Token token;
   lexer.LexFromRawLexer(token);
 
@@ -422,8 +424,8 @@ std::string Lexer::getSpelling(const Token &Tok, const 
SourceManager &SourceMgr,
   assert((int)Tok.getLength() >= 0 && "Token character range is bogus!");
 
   bool CharDataInvalid = false;
-  const char *TokStart = SourceMgr.getCharacterData(Tok.getLocation(),
-                                                    &CharDataInvalid);
+  const char *TokStart =
+      SourceMgr.getCharacterData(Tok.getLocation(), &CharDataInvalid);
   if (Invalid)
     *Invalid = CharDataInvalid;
   if (CharDataInvalid)
@@ -489,15 +491,14 @@ unsigned Lexer::getSpelling(const Token &Tok, const char 
*&Buffer,
   }
 
   // Otherwise, hard case, relex the characters into the string.
-  return getSpellingSlow(Tok, TokStart, LangOpts, const_cast<char*>(Buffer));
+  return getSpellingSlow(Tok, TokStart, LangOpts, const_cast<char *>(Buffer));
 }
 
 /// MeasureTokenLength - Relex the token at the specified location and return
 /// its length in bytes in the input file.  If the token needs cleaning (e.g.
 /// includes a trigraph or an escaped newline) then this count includes bytes
 /// that are part of that.
-unsigned Lexer::MeasureTokenLength(SourceLocation Loc,
-                                   const SourceManager &SM,
+unsigned Lexer::MeasureTokenLength(SourceLocation Loc, const SourceManager &SM,
                                    const LangOptions &LangOpts) {
   Token TheTok;
   if (getRawToken(Loc, TheTok, SM, LangOpts))
@@ -508,8 +509,7 @@ unsigned Lexer::MeasureTokenLength(SourceLocation Loc,
 /// Relex the token at the specified location.
 /// \returns true if there was a failure, false on success.
 bool Lexer::getRawToken(SourceLocation Loc, Token &Result,
-                        const SourceManager &SM,
-                        const LangOptions &LangOpts,
+                        const SourceManager &SM, const LangOptions &LangOpts,
                         bool IgnoreWhiteSpace) {
   // TODO: this could be special cased for common tokens like identifiers, ')',
   // etc to make this faster, if it mattered.  Just look at StrData[0] to 
handle
@@ -526,7 +526,7 @@ bool Lexer::getRawToken(SourceLocation Loc, Token &Result,
   if (Invalid)
     return true;
 
-  const char *StrData = Buffer.data()+LocInfo.second;
+  const char *StrData = Buffer.data() + LocInfo.second;
 
   if (!IgnoreWhiteSpace && isWhitespace(SkipEscapedNewLines(StrData)[0]))
     return true;
@@ -626,10 +626,7 @@ SourceLocation Lexer::GetBeginningOfToken(SourceLocation 
Loc,
 
 namespace {
 
-enum PreambleDirectiveKind {
-  PDK_Skipped,
-  PDK_Unknown
-};
+enum PreambleDirectiveKind { PDK_Skipped, PDK_Unknown };
 
 } // namespace
 
@@ -713,31 +710,31 @@ PreambleBounds Lexer::ComputePreamble(StringRef Buffer,
       TheLexer.LexFromRawLexer(TheTok);
       if (TheTok.getKind() == tok::raw_identifier && !TheTok.needsCleaning()) {
         StringRef Keyword = TheTok.getRawIdentifier();
-        PreambleDirectiveKind PDK
-          = llvm::StringSwitch<PreambleDirectiveKind>(Keyword)
-              .Case("include", PDK_Skipped)
-              .Case("__include_macros", PDK_Skipped)
-              .Case("define", PDK_Skipped)
-              .Case("undef", PDK_Skipped)
-              .Case("line", PDK_Skipped)
-              .Case("error", PDK_Skipped)
-              .Case("pragma", PDK_Skipped)
-              .Case("import", PDK_Skipped)
-              .Case("include_next", PDK_Skipped)
-              .Case("warning", PDK_Skipped)
-              .Case("ident", PDK_Skipped)
-              .Case("sccs", PDK_Skipped)
-              .Case("assert", PDK_Skipped)
-              .Case("unassert", PDK_Skipped)
-              .Case("if", PDK_Skipped)
-              .Case("ifdef", PDK_Skipped)
-              .Case("ifndef", PDK_Skipped)
-              .Case("elif", PDK_Skipped)
-              .Case("elifdef", PDK_Skipped)
-              .Case("elifndef", PDK_Skipped)
-              .Case("else", PDK_Skipped)
-              .Case("endif", PDK_Skipped)
-              .Default(PDK_Unknown);
+        PreambleDirectiveKind PDK =
+            llvm::StringSwitch<PreambleDirectiveKind>(Keyword)
+                .Case("include", PDK_Skipped)
+                .Case("__include_macros", PDK_Skipped)
+                .Case("define", PDK_Skipped)
+                .Case("undef", PDK_Skipped)
+                .Case("line", PDK_Skipped)
+                .Case("error", PDK_Skipped)
+                .Case("pragma", PDK_Skipped)
+                .Case("import", PDK_Skipped)
+                .Case("include_next", PDK_Skipped)
+                .Case("warning", PDK_Skipped)
+                .Case("ident", PDK_Skipped)
+                .Case("sccs", PDK_Skipped)
+                .Case("assert", PDK_Skipped)
+                .Case("unassert", PDK_Skipped)
+                .Case("if", PDK_Skipped)
+                .Case("ifdef", PDK_Skipped)
+                .Case("ifndef", PDK_Skipped)
+                .Case("elif", PDK_Skipped)
+                .Case("elifdef", PDK_Skipped)
+                .Case("elifndef", PDK_Skipped)
+                .Case("else", PDK_Skipped)
+                .Case("endif", PDK_Skipped)
+                .Default(PDK_Unknown);
 
         switch (PDK) {
         case PDK_Skipped:
@@ -826,7 +823,7 @@ unsigned Lexer::getTokenPrefixLength(SourceLocation 
TokStart, unsigned CharNo,
   // advanced by 3 should return the location of b, not of \\.  One compounding
   // detail of this is that the escape may be made by a trigraph.
   if (!Lexer::isObviouslySimpleCharacter(*TokPtr))
-    PhysOffset += Lexer::SkipEscapedNewLines(TokPtr)-TokPtr;
+    PhysOffset += Lexer::SkipEscapedNewLines(TokPtr) - TokPtr;
 
   return PhysOffset;
 }
@@ -890,8 +887,7 @@ bool Lexer::isAtStartOfMacroExpansion(SourceLocation loc,
 
 /// Returns true if the given MacroID location points at the last
 /// token of the macro expansion.
-bool Lexer::isAtEndOfMacroExpansion(SourceLocation loc,
-                                    const SourceManager &SM,
+bool Lexer::isAtEndOfMacroExpansion(SourceLocation loc, const SourceManager 
&SM,
                                     const LangOptions &LangOpts,
                                     SourceLocation *MacroEnd) {
   assert(loc.isValid() && loc.isMacroID() && "Expected a valid macro loc");
@@ -923,7 +919,7 @@ static CharSourceRange 
makeRangeFromFileLocs(CharSourceRange Range,
   SourceLocation End = Range.getEnd();
   assert(Begin.isFileID() && End.isFileID());
   if (Range.isTokenRange()) {
-    End = Lexer::getLocForEndOfToken(End, 0, SM,LangOpts);
+    End = Lexer::getLocForEndOfToken(End, 0, SM, LangOpts);
     if (End.isInvalid())
       return {};
   }
@@ -934,8 +930,7 @@ static CharSourceRange 
makeRangeFromFileLocs(CharSourceRange Range,
     return {};
 
   unsigned EndOffs;
-  if (!SM.isInFileID(End, FID, &EndOffs) ||
-      BeginOffs > EndOffs)
+  if (!SM.isInFileID(End, FID, &EndOffs) || BeginOffs > EndOffs)
     return {};
 
   return CharSourceRange::getCharRange(Begin, End);
@@ -982,10 +977,10 @@ CharSourceRange Lexer::makeFileCharRange(CharSourceRange 
Range,
   assert(Begin.isMacroID() && End.isMacroID());
   SourceLocation MacroBegin, MacroEnd;
   if (isAtStartOfMacroExpansion(Begin, SM, LangOpts, &MacroBegin) &&
-      ((Range.isTokenRange() && isAtEndOfMacroExpansion(End, SM, LangOpts,
-                                                        &MacroEnd)) ||
-       (Range.isCharRange() && isAtStartOfMacroExpansion(End, SM, LangOpts,
-                                                         &MacroEnd)))) {
+      ((Range.isTokenRange() &&
+        isAtEndOfMacroExpansion(End, SM, LangOpts, &MacroEnd)) ||
+       (Range.isCharRange() &&
+        isAtStartOfMacroExpansion(End, SM, LangOpts, &MacroEnd)))) {
     Range.setBegin(MacroBegin);
     Range.setEnd(MacroEnd);
     // Use the *original* `End`, not the expanded one in `MacroEnd`.
@@ -995,14 +990,14 @@ CharSourceRange Lexer::makeFileCharRange(CharSourceRange 
Range,
   }
 
   bool Invalid = false;
-  const SrcMgr::SLocEntry &BeginEntry = SM.getSLocEntry(SM.getFileID(Begin),
-                                                        &Invalid);
+  const SrcMgr::SLocEntry &BeginEntry =
+      SM.getSLocEntry(SM.getFileID(Begin), &Invalid);
   if (Invalid)
     return {};
 
   if (BeginEntry.getExpansion().isMacroArgExpansion()) {
-    const SrcMgr::SLocEntry &EndEntry = SM.getSLocEntry(SM.getFileID(End),
-                                                        &Invalid);
+    const SrcMgr::SLocEntry &EndEntry =
+        SM.getSLocEntry(SM.getFileID(End), &Invalid);
     if (Invalid)
       return {};
 
@@ -1018,27 +1013,28 @@ CharSourceRange 
Lexer::makeFileCharRange(CharSourceRange Range,
   return {};
 }
 
-StringRef Lexer::getSourceText(CharSourceRange Range,
-                               const SourceManager &SM,
-                               const LangOptions &LangOpts,
-                               bool *Invalid) {
+StringRef Lexer::getSourceText(CharSourceRange Range, const SourceManager &SM,
+                               const LangOptions &LangOpts, bool *Invalid) {
   Range = makeFileCharRange(Range, SM, LangOpts);
   if (Range.isInvalid()) {
-    if (Invalid) *Invalid = true;
+    if (Invalid)
+      *Invalid = true;
     return {};
   }
 
   // Break down the source location.
   FileIDAndOffset beginInfo = SM.getDecomposedLoc(Range.getBegin());
   if (beginInfo.first.isInvalid()) {
-    if (Invalid) *Invalid = true;
+    if (Invalid)
+      *Invalid = true;
     return {};
   }
 
   unsigned EndOffs;
   if (!SM.isInFileID(Range.getEnd(), beginInfo.first, &EndOffs) ||
       beginInfo.second > EndOffs) {
-    if (Invalid) *Invalid = true;
+    if (Invalid)
+      *Invalid = true;
     return {};
   }
 
@@ -1046,11 +1042,13 @@ StringRef Lexer::getSourceText(CharSourceRange Range,
   bool invalidTemp = false;
   StringRef file = SM.getBufferData(beginInfo.first, &invalidTemp);
   if (invalidTemp) {
-    if (Invalid) *Invalid = true;
+    if (Invalid)
+      *Invalid = true;
     return {};
   }
 
-  if (Invalid) *Invalid = false;
+  if (Invalid)
+    *Invalid = false;
   return file.substr(beginInfo.second, EndOffs - beginInfo.second);
 }
 
@@ -1184,8 +1182,8 @@ StringRef Lexer::getIndentationForLine(SourceLocation Loc,
 static LLVM_ATTRIBUTE_NOINLINE SourceLocation GetMappedTokenLoc(
     Preprocessor &PP, SourceLocation FileLoc, unsigned CharNo, unsigned 
TokLen);
 static SourceLocation GetMappedTokenLoc(Preprocessor &PP,
-                                        SourceLocation FileLoc,
-                                        unsigned CharNo, unsigned TokLen) {
+                                        SourceLocation FileLoc, unsigned 
CharNo,
+                                        unsigned TokLen) {
   assert(FileLoc.isMacroID() && "Must be a macro expansion");
 
   // Otherwise, we're lexing "mapped tokens".  This is used for things like
@@ -1214,7 +1212,7 @@ SourceLocation Lexer::getSourceLocation(const char *Loc,
 
   // In the normal case, we're just lexing from a simple file buffer, return
   // the file id from FileLoc with the offset specified.
-  unsigned CharNo = Loc-BufferStart;
+  unsigned CharNo = Loc - BufferStart;
   if (FileLoc.isFileID())
     return FileLoc.getLocWithOffset(CharNo);
 
@@ -1238,16 +1236,26 @@ DiagnosticBuilder Lexer::Diag(const char *Loc, unsigned 
DiagID) const {
 /// return the decoded trigraph letter it corresponds to, or '\0' if nothing.
 static char GetTrigraphCharForLetter(char Letter) {
   switch (Letter) {
-  default:   return 0;
-  case '=':  return '#';
-  case ')':  return ']';
-  case '(':  return '[';
-  case '!':  return '|';
-  case '\'': return '^';
-  case '>':  return '}';
-  case '/':  return '\\';
-  case '<':  return '{';
-  case '-':  return '~';
+  default:
+    return 0;
+  case '=':
+    return '#';
+  case ')':
+    return ']';
+  case '(':
+    return '[';
+  case '!':
+    return '|';
+  case '\'':
+    return '^';
+  case '>':
+    return '}';
+  case '/':
+    return '\\';
+  case '<':
+    return '{';
+  case '-':
+    return '~';
   }
 }
 
@@ -1262,12 +1270,12 @@ static char DecodeTrigraphChar(const char *CP, Lexer 
*L, bool Trigraphs) {
 
   if (!Trigraphs) {
     if (L && !L->isLexingRawMode())
-      L->Diag(CP-2, diag::trigraph_ignored);
+      L->Diag(CP - 2, diag::trigraph_ignored);
     return 0;
   }
 
   if (L && !L->isLexingRawMode())
-    L->Diag(CP-2, diag::trigraph_converted) << StringRef(&Res, 1);
+    L->Diag(CP - 2, diag::trigraph_converted) << StringRef(&Res, 1);
   return Res;
 }
 
@@ -1279,12 +1287,11 @@ unsigned Lexer::getEscapedNewLineSize(const char *Ptr) {
   while (isWhitespace(Ptr[Size])) {
     ++Size;
 
-    if (Ptr[Size-1] != '\n' && Ptr[Size-1] != '\r')
+    if (Ptr[Size - 1] != '\n' && Ptr[Size - 1] != '\r')
       continue;
 
     // If this is a \r\n or \n\r, skip the other half.
-    if ((Ptr[Size] == '\r' || Ptr[Size] == '\n') &&
-        Ptr[Size-1] != Ptr[Size])
+    if ((Ptr[Size] == '\r' || Ptr[Size] == '\n') && Ptr[Size - 1] != Ptr[Size])
       ++Size;
 
     return Size;
@@ -1301,21 +1308,22 @@ const char *Lexer::SkipEscapedNewLines(const char *P) {
   while (true) {
     const char *AfterEscape;
     if (*P == '\\') {
-      AfterEscape = P+1;
+      AfterEscape = P + 1;
     } else if (*P == '?') {
       // If not a trigraph for escape, bail out.
       if (P[1] != '?' || P[2] != '/')
         return P;
       // FIXME: Take LangOpts into account; the language might not
       // support trigraphs.
-      AfterEscape = P+3;
+      AfterEscape = P + 3;
     } else {
       return P;
     }
 
     unsigned NewLineSize = Lexer::getEscapedNewLineSize(AfterEscape);
-    if (NewLineSize == 0) return P;
-    P = AfterEscape+NewLineSize;
+    if (NewLineSize == 0)
+      return P;
+    P = AfterEscape + NewLineSize;
   }
 }
 
@@ -1342,7 +1350,7 @@ std::optional<Token> Lexer::findNextToken(SourceLocation 
Loc,
 
   // Lex from the start of the given location.
   Lexer lexer(SM.getLocForStartOfFile(LocInfo.first), LangOpts, File.begin(),
-                                      TokenBegin, File.end());
+              TokenBegin, File.end());
   lexer.SetCommentRetentionState(IncludeComments);
   // Find the token.
   Token Tok;
@@ -1427,7 +1435,7 @@ Lexer::SizedChar Lexer::getCharAndSizeSlow(const char 
*Ptr, Token *Tok) {
   if (Ptr[0] == '\\') {
     ++Size;
     ++Ptr;
-Slash:
+  Slash:
     // Common case, backslash-char where the char is not whitespace.
     if (!isWhitespace(Ptr[0]))
       return {'\\', Size};
@@ -1436,7 +1444,8 @@ Lexer::SizedChar Lexer::getCharAndSizeSlow(const char 
*Ptr, Token *Tok) {
     // newline.
     if (unsigned EscapedNewLineSize = getEscapedNewLineSize(Ptr)) {
       // Remember that this token needs to be cleaned.
-      if (Tok) Tok->setFlag(Token::NeedsCleaning);
+      if (Tok)
+        Tok->setFlag(Token::NeedsCleaning);
 
       // Warn if there was whitespace between the backslash and newline.
       if (Ptr[0] != '\n' && Ptr[0] != '\r' && Tok && !isLexingRawMode())
@@ -1444,7 +1453,7 @@ Lexer::SizedChar Lexer::getCharAndSizeSlow(const char 
*Ptr, Token *Tok) {
 
       // Found backslash<whitespace><newline>.  Parse the char after it.
       Size += EscapedNewLineSize;
-      Ptr  += EscapedNewLineSize;
+      Ptr += EscapedNewLineSize;
 
       // Use slow version to accumulate a correct size field.
       auto CharAndSize = getCharAndSizeSlow(Ptr, Tok);
@@ -1463,11 +1472,13 @@ Lexer::SizedChar Lexer::getCharAndSizeSlow(const char 
*Ptr, Token *Tok) {
     if (char C = DecodeTrigraphChar(Ptr + 2, Tok ? this : nullptr,
                                     LangOpts.Trigraphs)) {
       // Remember that this token needs to be cleaned.
-      if (Tok) Tok->setFlag(Token::NeedsCleaning);
+      if (Tok)
+        Tok->setFlag(Token::NeedsCleaning);
 
       Ptr += 3;
       Size += 3;
-      if (C == '\\') goto Slash;
+      if (C == '\\')
+        goto Slash;
       return {C, Size};
     }
   }
@@ -1490,7 +1501,7 @@ Lexer::SizedChar Lexer::getCharAndSizeSlowNoWarn(const 
char *Ptr,
   if (Ptr[0] == '\\') {
     ++Size;
     ++Ptr;
-Slash:
+  Slash:
     // Common case, backslash-char where the char is not whitespace.
     if (!isWhitespace(Ptr[0]))
       return {'\\', Size};
@@ -1499,7 +1510,7 @@ Lexer::SizedChar Lexer::getCharAndSizeSlowNoWarn(const 
char *Ptr,
     if (unsigned EscapedNewLineSize = getEscapedNewLineSize(Ptr)) {
       // Found backslash<whitespace><newline>.  Parse the char after it.
       Size += EscapedNewLineSize;
-      Ptr  += EscapedNewLineSize;
+      Ptr += EscapedNewLineSize;
 
       // Use slow version to accumulate a correct size field.
       auto CharAndSize = getCharAndSizeSlowNoWarn(Ptr, LangOpts);
@@ -1518,7 +1529,8 @@ Lexer::SizedChar Lexer::getCharAndSizeSlowNoWarn(const 
char *Ptr,
     if (char C = GetTrigraphCharForLetter(Ptr[2])) {
       Ptr += 3;
       Size += 3;
-      if (C == '\\') goto Slash;
+      if (C == '\\')
+        goto Slash;
       return {C, Size};
     }
   }
@@ -1656,10 +1668,7 @@ static void maybeDiagnoseIDCharCompat(DiagnosticsEngine 
&Diags, uint32_t C,
                                       CharSourceRange Range, bool IsFirst) {
   // Check C99 compatibility.
   if (!Diags.isIgnored(diag::warn_c99_compat_unicode_id, Range.getBegin())) {
-    enum {
-      CannotAppearInIdentifier = 0,
-      CannotStartIdentifier
-    };
+    enum { CannotAppearInIdentifier = 0, CannotStartIdentifier };
 
     static const llvm::sys::UnicodeCharSet C99AllowedIDChars(
         C99AllowedIDCharRanges);
@@ -1667,12 +1676,10 @@ static void maybeDiagnoseIDCharCompat(DiagnosticsEngine 
&Diags, uint32_t C,
         C99DisallowedInitialIDCharRanges);
     if (!C99AllowedIDChars.contains(C)) {
       Diags.Report(Range.getBegin(), diag::warn_c99_compat_unicode_id)
-        << Range
-        << CannotAppearInIdentifier;
+          << Range << CannotAppearInIdentifier;
     } else if (IsFirst && C99DisallowedInitialIDChars.contains(C)) {
       Diags.Report(Range.getBegin(), diag::warn_c99_compat_unicode_id)
-        << Range
-        << CannotStartIdentifier;
+          << Range << CannotStartIdentifier;
     }
   }
 }
@@ -1690,57 +1697,56 @@ static void 
maybeDiagnoseUTF8Homoglyph(DiagnosticsEngine &Diags, uint32_t C,
     bool operator<(HomoglyphPair R) const { return Character < R.Character; }
   };
   static constexpr HomoglyphPair SortedHomoglyphs[] = {
-    {U'\u00ad', 0},   // SOFT HYPHEN
-    {U'\u01c3', '!'}, // LATIN LETTER RETROFLEX CLICK
-    {U'\u037e', ';'}, // GREEK QUESTION MARK
-    {U'\u200b', 0},   // ZERO WIDTH SPACE
-    {U'\u200c', 0},   // ZERO WIDTH NON-JOINER
-    {U'\u200d', 0},   // ZERO WIDTH JOINER
-    {U'\u2060', 0},   // WORD JOINER
-    {U'\u2061', 0},   // FUNCTION APPLICATION
-    {U'\u2062', 0},   // INVISIBLE TIMES
-    {U'\u2063', 0},   // INVISIBLE SEPARATOR
-    {U'\u2064', 0},   // INVISIBLE PLUS
-    {U'\u2212', '-'}, // MINUS SIGN
-    {U'\u2215', '/'}, // DIVISION SLASH
-    {U'\u2216', '\\'}, // SET MINUS
-    {U'\u2217', '*'}, // ASTERISK OPERATOR
-    {U'\u2223', '|'}, // DIVIDES
-    {U'\u2227', '^'}, // LOGICAL AND
-    {U'\u2236', ':'}, // RATIO
-    {U'\u223c', '~'}, // TILDE OPERATOR
-    {U'\ua789', ':'}, // MODIFIER LETTER COLON
-    {U'\ufeff', 0},   // ZERO WIDTH NO-BREAK SPACE
-    {U'\uff01', '!'}, // FULLWIDTH EXCLAMATION MARK
-    {U'\uff03', '#'}, // FULLWIDTH NUMBER SIGN
-    {U'\uff04', '$'}, // FULLWIDTH DOLLAR SIGN
-    {U'\uff05', '%'}, // FULLWIDTH PERCENT SIGN
-    {U'\uff06', '&'}, // FULLWIDTH AMPERSAND
-    {U'\uff08', '('}, // FULLWIDTH LEFT PARENTHESIS
-    {U'\uff09', ')'}, // FULLWIDTH RIGHT PARENTHESIS
-    {U'\uff0a', '*'}, // FULLWIDTH ASTERISK
-    {U'\uff0b', '+'}, // FULLWIDTH ASTERISK
-    {U'\uff0c', ','}, // FULLWIDTH COMMA
-    {U'\uff0d', '-'}, // FULLWIDTH HYPHEN-MINUS
-    {U'\uff0e', '.'}, // FULLWIDTH FULL STOP
-    {U'\uff0f', '/'}, // FULLWIDTH SOLIDUS
-    {U'\uff1a', ':'}, // FULLWIDTH COLON
-    {U'\uff1b', ';'}, // FULLWIDTH SEMICOLON
-    {U'\uff1c', '<'}, // FULLWIDTH LESS-THAN SIGN
-    {U'\uff1d', '='}, // FULLWIDTH EQUALS SIGN
-    {U'\uff1e', '>'}, // FULLWIDTH GREATER-THAN SIGN
-    {U'\uff1f', '?'}, // FULLWIDTH QUESTION MARK
-    {U'\uff20', '@'}, // FULLWIDTH COMMERCIAL AT
-    {U'\uff3b', '['}, // FULLWIDTH LEFT SQUARE BRACKET
-    {U'\uff3c', '\\'}, // FULLWIDTH REVERSE SOLIDUS
-    {U'\uff3d', ']'}, // FULLWIDTH RIGHT SQUARE BRACKET
-    {U'\uff3e', '^'}, // FULLWIDTH CIRCUMFLEX ACCENT
-    {U'\uff5b', '{'}, // FULLWIDTH LEFT CURLY BRACKET
-    {U'\uff5c', '|'}, // FULLWIDTH VERTICAL LINE
-    {U'\uff5d', '}'}, // FULLWIDTH RIGHT CURLY BRACKET
-    {U'\uff5e', '~'}, // FULLWIDTH TILDE
-    {0, 0}
-  };
+      {U'\u00ad', 0},    // SOFT HYPHEN
+      {U'\u01c3', '!'},  // LATIN LETTER RETROFLEX CLICK
+      {U'\u037e', ';'},  // GREEK QUESTION MARK
+      {U'\u200b', 0},    // ZERO WIDTH SPACE
+      {U'\u200c', 0},    // ZERO WIDTH NON-JOINER
+      {U'\u200d', 0},    // ZERO WIDTH JOINER
+      {U'\u2060', 0},    // WORD JOINER
+      {U'\u2061', 0},    // FUNCTION APPLICATION
+      {U'\u2062', 0},    // INVISIBLE TIMES
+      {U'\u2063', 0},    // INVISIBLE SEPARATOR
+      {U'\u2064', 0},    // INVISIBLE PLUS
+      {U'\u2212', '-'},  // MINUS SIGN
+      {U'\u2215', '/'},  // DIVISION SLASH
+      {U'\u2216', '\\'}, // SET MINUS
+      {U'\u2217', '*'},  // ASTERISK OPERATOR
+      {U'\u2223', '|'},  // DIVIDES
+      {U'\u2227', '^'},  // LOGICAL AND
+      {U'\u2236', ':'},  // RATIO
+      {U'\u223c', '~'},  // TILDE OPERATOR
+      {U'\ua789', ':'},  // MODIFIER LETTER COLON
+      {U'\ufeff', 0},    // ZERO WIDTH NO-BREAK SPACE
+      {U'\uff01', '!'},  // FULLWIDTH EXCLAMATION MARK
+      {U'\uff03', '#'},  // FULLWIDTH NUMBER SIGN
+      {U'\uff04', '$'},  // FULLWIDTH DOLLAR SIGN
+      {U'\uff05', '%'},  // FULLWIDTH PERCENT SIGN
+      {U'\uff06', '&'},  // FULLWIDTH AMPERSAND
+      {U'\uff08', '('},  // FULLWIDTH LEFT PARENTHESIS
+      {U'\uff09', ')'},  // FULLWIDTH RIGHT PARENTHESIS
+      {U'\uff0a', '*'},  // FULLWIDTH ASTERISK
+      {U'\uff0b', '+'},  // FULLWIDTH ASTERISK
+      {U'\uff0c', ','},  // FULLWIDTH COMMA
+      {U'\uff0d', '-'},  // FULLWIDTH HYPHEN-MINUS
+      {U'\uff0e', '.'},  // FULLWIDTH FULL STOP
+      {U'\uff0f', '/'},  // FULLWIDTH SOLIDUS
+      {U'\uff1a', ':'},  // FULLWIDTH COLON
+      {U'\uff1b', ';'},  // FULLWIDTH SEMICOLON
+      {U'\uff1c', '<'},  // FULLWIDTH LESS-THAN SIGN
+      {U'\uff1d', '='},  // FULLWIDTH EQUALS SIGN
+      {U'\uff1e', '>'},  // FULLWIDTH GREATER-THAN SIGN
+      {U'\uff1f', '?'},  // FULLWIDTH QUESTION MARK
+      {U'\uff20', '@'},  // FULLWIDTH COMMERCIAL AT
+      {U'\uff3b', '['},  // FULLWIDTH LEFT SQUARE BRACKET
+      {U'\uff3c', '\\'}, // FULLWIDTH REVERSE SOLIDUS
+      {U'\uff3d', ']'},  // FULLWIDTH RIGHT SQUARE BRACKET
+      {U'\uff3e', '^'},  // FULLWIDTH CIRCUMFLEX ACCENT
+      {U'\uff5b', '{'},  // FULLWIDTH LEFT CURLY BRACKET
+      {U'\uff5c', '|'},  // FULLWIDTH VERTICAL LINE
+      {U'\uff5d', '}'},  // FULLWIDTH RIGHT CURLY BRACKET
+      {U'\uff5e', '~'},  // FULLWIDTH TILDE
+      {0, 0}};
   auto Homoglyph =
       std::lower_bound(std::begin(SortedHomoglyphs),
                        std::end(SortedHomoglyphs) - 1, HomoglyphPair{C, '\0'});
@@ -1815,7 +1821,7 @@ bool Lexer::tryConsumeIdentifierUCN(const char *&CurPtr, 
unsigned Size,
   }
 
   Result.setFlag(Token::HasUCN);
-  if ((UCNPtr - CurPtr ==  6 && CurPtr[1] == 'u') ||
+  if ((UCNPtr - CurPtr == 6 && CurPtr[1] == 'u') ||
       (UCNPtr - CurPtr == 10 && CurPtr[1] == 'U'))
     CurPtr = UCNPtr;
   else
@@ -1921,9 +1927,7 @@ bool Lexer::LexUnicodeIdentifierStart(Token &Result, 
uint32_t C,
   return true;
 }
 
-static const char *
-fastParseASCIIIdentifierScalar(const char *CurPtr,
-                               [[maybe_unused]] const char *BufferEnd) {
+static const char *fastParseASCIIIdentifierScalar(const char *CurPtr) {
   unsigned char C = *CurPtr;
   while (isAsciiIdentifierContinue(C))
     C = *++CurPtr;
@@ -1936,10 +1940,8 @@ fastParseASCIIIdentifierScalar(const char *CurPtr,
 // fall back to the scalar implementation.
 #if (defined(__i386__) || defined(__x86_64__)) && defined(__has_attribute) &&  
\
     __has_attribute(target) && !defined(_MSC_VER)
-
 __attribute__((target("sse4.2"))) static const char *
-fastParseASCIIIdentifierSSE42(const char *CurPtr,
-                              [[maybe_unused]] const char *BufferEnd) {
+fastParseASCIIIdentifierSSE42(const char *CurPtr, const char *BufferEnd) {
   alignas(16) static constexpr char AsciiIdentifierRange[16] = {
       '_', '_', 'A', 'Z', 'a', 'z', '0', '9',
   };
@@ -1960,7 +1962,7 @@ fastParseASCIIIdentifierSSE42(const char *CurPtr,
     return CurPtr;
   }
 
-  return fastParseASCIIIdentifierScalar(CurPtr, BufferEnd);
+  return fastParseASCIIIdentifierScalar(CurPtr);
 }
 
 __attribute__((target("sse4.2"))) static const char *
@@ -1972,7 +1974,7 @@ __attribute__((target("default")))
 #endif
 static const char *fastParseASCIIIdentifier(const char *CurPtr,
                                             const char *BufferEnd) {
-  return fastParseASCIIIdentifierScalar(CurPtr, BufferEnd);
+  return fastParseASCIIIdentifierScalar(CurPtr);
 }
 
 bool Lexer::LexIdentifierContinue(Token &Result, const char *CurPtr) {
@@ -2159,10 +2161,10 @@ const char *Lexer::LexUDSuffix(Token &Result, const 
char *CurPtr,
 
   if (!LangOpts.CPlusPlus11) {
     if (!isLexingRawMode())
-      Diag(CurPtr,
-           C == '_' ? diag::warn_cxx11_compat_user_defined_literal
-                    : diag::warn_cxx11_compat_reserved_user_defined_literal)
-        << FixItHint::CreateInsertion(getSourceLocation(CurPtr), " ");
+      Diag(CurPtr, C == '_'
+                       ? diag::warn_cxx11_compat_user_defined_literal
+                       : diag::warn_cxx11_compat_reserved_user_defined_literal)
+          << FixItHint::CreateInsertion(getSourceLocation(CurPtr), " ");
     return CurPtr;
   }
 
@@ -2180,7 +2182,7 @@ const char *Lexer::LexUDSuffix(Token &Result, const char 
*CurPtr,
       // valid suffix for a string literal or a numeric literal (this could be
       // the 'operator""if' defining a numeric literal operator).
       const unsigned MaxStandardSuffixLength = 3;
-      char Buffer[MaxStandardSuffixLength] = { C };
+      char Buffer[MaxStandardSuffixLength] = {C};
       unsigned Consumed = Size;
       unsigned Chars = 1;
       while (true) {
@@ -2238,8 +2240,7 @@ bool Lexer::LexStringLiteral(Token &Result, const char 
*CurPtr,
   const char *NulCharacter = nullptr;
 
   if (!isLexingRawMode() &&
-      (Kind == tok::utf8_string_literal ||
-       Kind == tok::utf16_string_literal ||
+      (Kind == tok::utf8_string_literal || Kind == tok::utf16_string_literal ||
        Kind == tok::utf32_string_literal))
     Diag(BufferPtr, LangOpts.CPlusPlus ? 
diag::warn_cxx98_compat_unicode_literal
                                        : 
diag::warn_c99_compat_unicode_literal);
@@ -2251,16 +2252,16 @@ bool Lexer::LexStringLiteral(Token &Result, const char 
*CurPtr,
     if (C == '\\')
       C = getAndAdvanceChar(CurPtr, Result);
 
-    if (C == '\n' || C == '\r' ||             // Newline.
-        (C == 0 && CurPtr-1 == BufferEnd)) {  // End of file.
+    if (C == '\n' || C == '\r' ||              // Newline.
+        (C == 0 && CurPtr - 1 == BufferEnd)) { // End of file.
       if (!isLexingRawMode() && !LangOpts.AsmPreprocessor)
         Diag(BufferPtr, diag::ext_unterminated_char_or_string) << 1;
-      FormTokenWithChars(Result, CurPtr-1, tok::unknown);
+      FormTokenWithChars(Result, CurPtr - 1, tok::unknown);
       return true;
     }
 
     if (C == 0) {
-      if (isCodeCompletionPoint(CurPtr-1)) {
+      if (isCodeCompletionPoint(CurPtr - 1)) {
         if (ParsingFilename)
           codeCompleteIncludedFile(AfterQuote, CurPtr - 1, /*IsAngled=*/false);
         else
@@ -2270,7 +2271,7 @@ bool Lexer::LexStringLiteral(Token &Result, const char 
*CurPtr,
         return true;
       }
 
-      NulCharacter = CurPtr-1;
+      NulCharacter = CurPtr - 1;
     }
     C = getAndAdvanceChar(CurPtr, Result);
   }
@@ -2326,7 +2327,7 @@ bool Lexer::LexRawStringLiteral(Token &Result, const char 
*CurPtr,
         Diag(PrefixEnd, diag::err_invalid_newline_raw_delim);
       } else {
         Diag(PrefixEnd, diag::err_invalid_char_raw_delim)
-          << StringRef(PrefixEnd, 1);
+            << StringRef(PrefixEnd, 1);
       }
     }
 
@@ -2338,7 +2339,7 @@ bool Lexer::LexRawStringLiteral(Token &Result, const char 
*CurPtr,
 
       if (C == '"')
         break;
-      if (C == 0 && CurPtr-1 == BufferEnd) {
+      if (C == 0 && CurPtr - 1 == BufferEnd) {
         --CurPtr;
         break;
       }
@@ -2361,11 +2362,11 @@ bool Lexer::LexRawStringLiteral(Token &Result, const 
char *CurPtr,
         CurPtr += PrefixLen + 1; // skip over prefix and '"'
         break;
       }
-    } else if (C == 0 && CurPtr-1 == BufferEnd) { // End of file.
+    } else if (C == 0 && CurPtr - 1 == BufferEnd) { // End of file.
       if (!isLexingRawMode())
         Diag(BufferPtr, diag::err_unterminated_raw_string)
-          << StringRef(Prefix, PrefixLen);
-      FormTokenWithChars(Result, CurPtr-1, tok::unknown);
+            << StringRef(Prefix, PrefixLen);
+      FormTokenWithChars(Result, CurPtr - 1, tok::unknown);
       return true;
     }
   }
@@ -2409,7 +2410,7 @@ bool Lexer::LexAngledStringLiteral(Token &Result, const 
char *CurPtr) {
         FormTokenWithChars(Result, CurPtr - 1, tok::unknown);
         return true;
       }
-      NulCharacter = CurPtr-1;
+      NulCharacter = CurPtr - 1;
     }
     C = getAndAdvanceChar(CurPtr, Result);
   }
@@ -2489,23 +2490,23 @@ bool Lexer::LexCharConstant(Token &Result, const char 
*CurPtr,
     if (C == '\\')
       C = getAndAdvanceChar(CurPtr, Result);
 
-    if (C == '\n' || C == '\r' ||             // Newline.
-        (C == 0 && CurPtr-1 == BufferEnd)) {  // End of file.
+    if (C == '\n' || C == '\r' ||              // Newline.
+        (C == 0 && CurPtr - 1 == BufferEnd)) { // End of file.
       if (!isLexingRawMode() && !LangOpts.AsmPreprocessor)
         Diag(BufferPtr, diag::ext_unterminated_char_or_string) << 0;
-      FormTokenWithChars(Result, CurPtr-1, tok::unknown);
+      FormTokenWithChars(Result, CurPtr - 1, tok::unknown);
       return true;
     }
 
     if (C == 0) {
-      if (isCodeCompletionPoint(CurPtr-1)) {
+      if (isCodeCompletionPoint(CurPtr - 1)) {
         PP->CodeCompleteNaturalLanguage();
-        FormTokenWithChars(Result, CurPtr-1, tok::unknown);
+        FormTokenWithChars(Result, CurPtr - 1, tok::unknown);
         cutOffLexing();
         return true;
       }
 
-      NulCharacter = CurPtr-1;
+      NulCharacter = CurPtr - 1;
     }
     C = getAndAdvanceChar(CurPtr, Result);
   }
@@ -2659,7 +2660,7 @@ bool Lexer::SkipLineComment(Token &Result, const char 
*CurPtr,
     const char *NextLine = CurPtr;
     if (C != 0) {
       // We found a newline, see if it's escaped.
-      const char *EscapePtr = CurPtr-1;
+      const char *EscapePtr = CurPtr - 1;
       bool HasSpace = false;
       while (isHorizontalWhitespace(*EscapePtr)) { // Skip whitespace.
         --EscapePtr;
@@ -2672,7 +2673,7 @@ bool Lexer::SkipLineComment(Token &Result, const char 
*CurPtr,
       else if (EscapePtr[0] == '/' && EscapePtr[-1] == '?' &&
                EscapePtr[-2] == '?' && LangOpts.Trigraphs)
         // Trigraph-escaped newline.
-        CurPtr = EscapePtr-2;
+        CurPtr = EscapePtr - 2;
       else
         break; // This is a newline, we're done.
 
@@ -2693,7 +2694,7 @@ bool Lexer::SkipLineComment(Token &Result, const char 
*CurPtr,
 
     // If we only read only one character, then no special handling is needed.
     // We're done and can skip forward to the newline.
-    if (C != 0 && CurPtr == OldPtr+1) {
+    if (C != 0 && CurPtr == OldPtr + 1) {
       CurPtr = NextLine;
       break;
     }
@@ -2709,14 +2710,14 @@ bool Lexer::SkipLineComment(Token &Result, const char 
*CurPtr,
           // line is also a // comment, but has spaces, don't emit a 
diagnostic.
           if (isWhitespace(C)) {
             const char *ForwardPtr = CurPtr;
-            while (isWhitespace(*ForwardPtr))  // Skip whitespace.
+            while (isWhitespace(*ForwardPtr)) // Skip whitespace.
               ++ForwardPtr;
             if (ForwardPtr[0] == '/' && ForwardPtr[1] == '/')
               break;
           }
 
           if (!isLexingRawMode())
-            Diag(OldPtr-1, diag::ext_multi_line_line_comment);
+            Diag(OldPtr - 1, diag::ext_multi_line_line_comment);
           break;
         }
     }
@@ -2726,7 +2727,7 @@ bool Lexer::SkipLineComment(Token &Result, const char 
*CurPtr,
       break;
     }
 
-    if (C == '\0' && isCodeCompletionPoint(CurPtr-1)) {
+    if (C == '\0' && isCodeCompletionPoint(CurPtr - 1)) {
       PP->CodeCompleteNaturalLanguage();
       cutOffLexing();
       return false;
@@ -2787,12 +2788,12 @@ bool Lexer::SaveLineComment(Token &Result, const char 
*CurPtr) {
     return true;
 
   assert(Spelling[0] == '/' && Spelling[1] == '/' && "Not line comment?");
-  Spelling[1] = '*';   // Change prefix to "/*".
-  Spelling += "*/";    // add suffix.
+  Spelling[1] = '*'; // Change prefix to "/*".
+  Spelling += "*/";  // add suffix.
 
   Result.setKind(tok::comment);
-  PP->CreateString(Spelling, Result,
-                   Result.getLocation(), Result.getLocation());
+  PP->CreateString(Spelling, Result, Result.getLocation(),
+                   Result.getLocation());
   return true;
 }
 
@@ -2900,7 +2901,7 @@ bool Lexer::SkipBlockComment(Token &Result, const char 
*CurPtr,
   unsigned CharSize;
   unsigned char C = getCharAndSize(CurPtr, CharSize);
   CurPtr += CharSize;
-  if (C == 0 && CurPtr == BufferEnd+1) {
+  if (C == 0 && CurPtr == BufferEnd + 1) {
     if (!isLexingRawMode())
       Diag(BufferPtr, diag::err_unterminated_block_comment);
     --CurPtr;
@@ -2940,7 +2941,8 @@ bool Lexer::SkipBlockComment(Token &Result, const char 
*CurPtr,
           goto MultiByteUTF8;
         C = *CurPtr++;
       }
-      if (C == '/') goto FoundSlash;
+      if (C == '/')
+        goto FoundSlash;
 
 #ifdef __SSE2__
       __m128i Slashes = _mm_set1_epi8('/');
@@ -2950,8 +2952,8 @@ bool Lexer::SkipBlockComment(Token &Result, const char 
*CurPtr,
           goto MultiByteUTF8;
         }
         // look for slashes
-        int cmp = _mm_movemask_epi8(_mm_cmpeq_epi8(*(const __m128i*)CurPtr,
-                                    Slashes));
+        int cmp = _mm_movemask_epi8(
+            _mm_cmpeq_epi8(*(const __m128i *)CurPtr, Slashes));
         if (cmp != 0) {
           // Adjust the pointer to point directly after the first slash. It's
           // not necessary to set C here, it will be overwritten at the end of
@@ -2965,10 +2967,8 @@ bool Lexer::SkipBlockComment(Token &Result, const char 
*CurPtr,
       __vector unsigned char LongUTF = {0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
                                         0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
                                         0x80, 0x80, 0x80, 0x80};
-      __vector unsigned char Slashes = {
-        '/', '/', '/', '/',  '/', '/', '/', '/',
-        '/', '/', '/', '/',  '/', '/', '/', '/'
-      };
+      __vector unsigned char Slashes = {'/', '/', '/', '/', '/', '/', '/', '/',
+                                        '/', '/', '/', '/', '/', '/', '/', 
'/'};
       while (CurPtr + 16 < BufferEnd) {
         if (LLVM_UNLIKELY(
                 vec_any_ge(*(const __vector unsigned char *)CurPtr, LongUTF)))
@@ -3027,8 +3027,8 @@ bool Lexer::SkipBlockComment(Token &Result, const char 
*CurPtr,
     }
 
     if (C == '/') {
-  FoundSlash:
-      if (CurPtr[-2] == '*')  // We found the final */.  We're done!
+    FoundSlash:
+      if (CurPtr[-2] == '*') // We found the final */.  We're done!
         break;
 
       if ((CurPtr[-2] == '\n' || CurPtr[-2] == '\r')) {
@@ -3044,9 +3044,9 @@ bool Lexer::SkipBlockComment(Token &Result, const char 
*CurPtr,
         // if this is a /*/, which will end the comment.  This misses cases 
with
         // embedded escaped newlines, but oh well.
         if (!isLexingRawMode())
-          Diag(CurPtr-1, diag::warn_nested_block_comment);
+          Diag(CurPtr - 1, diag::warn_nested_block_comment);
       }
-    } else if (C == 0 && CurPtr == BufferEnd+1) {
+    } else if (C == 0 && CurPtr == BufferEnd + 1) {
       if (!isLexingRawMode())
         Diag(BufferPtr, diag::err_unterminated_block_comment);
       // Note: the user probably forgot a */.  We could continue immediately
@@ -3063,7 +3063,7 @@ bool Lexer::SkipBlockComment(Token &Result, const char 
*CurPtr,
 
       BufferPtr = CurPtr;
       return false;
-    } else if (C == '\0' && isCodeCompletionPoint(CurPtr-1)) {
+    } else if (C == '\0' && isCodeCompletionPoint(CurPtr - 1)) {
       PP->CodeCompleteNaturalLanguage();
       cutOffLexing();
       return false;
@@ -3091,7 +3091,7 @@ bool Lexer::SkipBlockComment(Token &Result, const char 
*CurPtr,
   // efficiently now.  This is safe even in KeepWhitespaceMode because we would
   // have already returned above with the comment as a token.
   if (isHorizontalWhitespace(*CurPtr)) {
-    SkipWhitespace(Result, CurPtr+1, TokAtPhysicalStartOfLine);
+    SkipWhitespace(Result, CurPtr + 1, TokAtPhysicalStartOfLine);
     return false;
   }
 
@@ -3122,10 +3122,10 @@ void Lexer::ReadToEndOfLine(SmallVectorImpl<char> 
*Result) {
       if (Result)
         Result->push_back(Char);
       break;
-    case 0:  // Null.
+    case 0: // Null.
       // Found end of file?
-      if (CurPtr-1 != BufferEnd) {
-        if (isCodeCompletionPoint(CurPtr-1)) {
+      if (CurPtr - 1 != BufferEnd) {
+        if (isCodeCompletionPoint(CurPtr - 1)) {
           PP->CodeCompleteNaturalLanguage();
           cutOffLexing();
           return;
@@ -3142,7 +3142,7 @@ void Lexer::ReadToEndOfLine(SmallVectorImpl<char> 
*Result) {
     case '\n':
       // Okay, we found the end of the line. First, back up past the \0, \r, 
\n.
       assert(CurPtr[-1] == Char && "Trigraphs for newline?");
-      BufferPtr = CurPtr-1;
+      BufferPtr = CurPtr - 1;
 
       // Next, lex the character, which should handle the EOD transition.
       Lex(Tmp);
@@ -3176,7 +3176,7 @@ bool Lexer::LexEndOfFile(Token &Result, const char 
*CurPtr) {
     // Restore comment saving mode, in case it was disabled for directive.
     if (PP)
       resetExtendedTokenMode();
-    return true;  // Have a token.
+    return true; // Have a token.
   }
 
   // If we are in raw mode, return this event as an EOF token.  Let the caller
@@ -3276,11 +3276,11 @@ static const char *FindConflictEnd(const char *CurPtr, 
const char *BufferEnd,
     // Must occur at start of line.
     if (Pos == 0 ||
         (RestOfBuffer[Pos - 1] != '\r' && RestOfBuffer[Pos - 1] != '\n')) {
-      RestOfBuffer = RestOfBuffer.substr(Pos+TermLen);
+      RestOfBuffer = RestOfBuffer.substr(Pos + TermLen);
       Pos = RestOfBuffer.find(Terminator);
       continue;
     }
-    return RestOfBuffer.data()+Pos;
+    return RestOfBuffer.data() + Pos;
   }
   return nullptr;
 }
@@ -3291,8 +3291,7 @@ static const char *FindConflictEnd(const char *CurPtr, 
const char *BufferEnd,
 /// if not.
 bool Lexer::IsStartOfConflictMarker(const char *CurPtr) {
   // Only a conflict marker if it starts at the beginning of a line.
-  if (CurPtr != BufferStart &&
-      CurPtr[-1] != '\n' && CurPtr[-1] != '\r')
+  if (CurPtr != BufferStart && CurPtr[-1] != '\n' && CurPtr[-1] != '\r')
     return false;
 
   // Check to see if we have <<<<<<< or >>>>.
@@ -3335,8 +3334,7 @@ bool Lexer::IsStartOfConflictMarker(const char *CurPtr) {
 /// the line.  This returns true if it is a conflict marker and false if not.
 bool Lexer::HandleEndOfConflictMarker(const char *CurPtr) {
   // Only a conflict marker if it starts at the beginning of a line.
-  if (CurPtr != BufferStart &&
-      CurPtr[-1] != '\n' && CurPtr[-1] != '\r')
+  if (CurPtr != BufferStart && CurPtr[-1] != '\n' && CurPtr[-1] != '\r')
     return false;
 
   // If we have a situation where we don't care about conflict markers, ignore
@@ -3352,8 +3350,8 @@ bool Lexer::HandleEndOfConflictMarker(const char *CurPtr) 
{
   // If we do have it, search for the end of the conflict marker.  This could
   // fail if it got skipped with a '#if 0' or something.  Note that CurPtr 
might
   // be the end of conflict marker.
-  if (const char *End = FindConflictEnd(CurPtr, BufferEnd,
-                                        CurrentConflictMarkerState)) {
+  if (const char *End =
+          FindConflictEnd(CurPtr, BufferEnd, CurrentConflictMarkerState)) {
     CurPtr = End;
 
     // Skip ahead to the end of line.
@@ -3403,7 +3401,7 @@ bool Lexer::lexEditorPlaceholder(Token &Result, const 
char *CurPtr) {
 
 bool Lexer::isCodeCompletionPoint(const char *CurPtr) const {
   if (PP && PP->isCodeCompletionEnabled()) {
-    SourceLocation Loc = FileLoc.getLocWithOffset(CurPtr-BufferStart);
+    SourceLocation Loc = FileLoc.getLocWithOffset(CurPtr - BufferStart);
     return Loc == PP->getCodeCompletionLoc();
   }
 
@@ -3709,7 +3707,7 @@ bool Lexer::CheckUnicodeWhitespace(Token &Result, 
uint32_t C,
   if (!isLexingRawMode() && !PP->isPreprocessedOutput() &&
       isUnicodeWhitespace(C)) {
     Diag(BufferPtr, diag::ext_unicode_whitespace)
-      << makeCharRange(*this, BufferPtr, CurPtr);
+        << makeCharRange(*this, BufferPtr, CurPtr);
 
     Result.setFlag(Token::LeadingSpace);
     return true;
@@ -3749,7 +3747,7 @@ bool Lexer::Lex(Token &Result) {
   bool atPhysicalStartOfLine = IsAtPhysicalStartOfLine;
   IsAtPhysicalStartOfLine = false;
   bool isRawLex = isLexingRawMode();
-  (void) isRawLex;
+  (void)isRawLex;
   bool returnedToken = LexTokenInternal(Result, atPhysicalStartOfLine);
   // (After the LexTokenInternal call, the lexer might be destroyed.)
   assert((returnedToken || !isRawLex) && "Raw lex must succeed");
@@ -3788,7 +3786,7 @@ bool Lexer::LexTokenInternal(Token &Result, bool 
TokAtPhysicalStartOfLine) {
     Result.setFlag(Token::LeadingSpace);
   }
 
-  unsigned SizeTmp, SizeTmp2;   // Temporaries for use in cases below.
+  unsigned SizeTmp, SizeTmp2; // Temporaries for use in cases below.
 
   // Read a character, advancing over it.
   char Char = getAndAdvanceChar(CurPtr, Result);
@@ -3798,13 +3796,13 @@ bool Lexer::LexTokenInternal(Token &Result, bool 
TokAtPhysicalStartOfLine) {
     NewLinePtr = nullptr;
 
   switch (Char) {
-  case 0:  // Null.
+  case 0: // Null.
     // Found end of file?
-    if (CurPtr-1 == BufferEnd)
-      return LexEndOfFile(Result, CurPtr-1);
+    if (CurPtr - 1 == BufferEnd)
+      return LexEndOfFile(Result, CurPtr - 1);
 
     // Check if we are performing code completion.
-    if (isCodeCompletionPoint(CurPtr-1)) {
+    if (isCodeCompletionPoint(CurPtr - 1)) {
       // Return the code-completion token.
       Result.startToken();
       FormTokenWithChars(Result, CurPtr, tok::code_completion);
@@ -3812,7 +3810,7 @@ bool Lexer::LexTokenInternal(Token &Result, bool 
TokAtPhysicalStartOfLine) {
     }
 
     if (!isLexingRawMode())
-      Diag(CurPtr-1, diag::null_in_file);
+      Diag(CurPtr - 1, diag::null_in_file);
     Result.setFlag(Token::LeadingSpace);
     if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))
       return true; // KeepWhitespaceMode
@@ -3821,12 +3819,12 @@ bool Lexer::LexTokenInternal(Token &Result, bool 
TokAtPhysicalStartOfLine) {
     // (We manually eliminate the tail call to avoid recursion.)
     goto LexNextToken;
 
-  case 26:  // DOS & CP/M EOF: "^Z".
+  case 26: // DOS & CP/M EOF: "^Z".
     // If we're in Microsoft extensions mode, treat this as end of file.
     if (LangOpts.MicrosoftExt) {
       if (!isLexingRawMode())
-        Diag(CurPtr-1, diag::ext_ctrl_z_eof_microsoft);
-      return LexEndOfFile(Result, CurPtr-1);
+        Diag(CurPtr - 1, diag::ext_ctrl_z_eof_microsoft);
+      return LexEndOfFile(Result, CurPtr - 1);
     }
 
     // If Microsoft extensions are disabled, this is just random garbage.
@@ -3882,11 +3880,11 @@ bool Lexer::LexTokenInternal(Token &Result, bool 
TokAtPhysicalStartOfLine) {
     // too (without going through the big switch stmt).
     if (CurPtr[0] == '/' && CurPtr[1] == '/' && !inKeepCommentMode() &&
         LineComment && (LangOpts.CPlusPlus || !LangOpts.TraditionalCPP)) {
-      if (SkipLineComment(Result, CurPtr+2, TokAtPhysicalStartOfLine))
+      if (SkipLineComment(Result, CurPtr + 2, TokAtPhysicalStartOfLine))
         return true; // There is a token to return.
       goto SkipIgnoredUnits;
     } else if (CurPtr[0] == '/' && CurPtr[1] == '*' && !inKeepCommentMode()) {
-      if (SkipBlockComment(Result, CurPtr+2, TokAtPhysicalStartOfLine))
+      if (SkipBlockComment(Result, CurPtr + 2, TokAtPhysicalStartOfLine))
         return true; // There is a token to return.
       goto SkipIgnoredUnits;
     } else if (isHorizontalWhitespace(*CurPtr)) {
@@ -3898,8 +3896,16 @@ bool Lexer::LexTokenInternal(Token &Result, bool 
TokAtPhysicalStartOfLine) {
 
   // C99 6.4.4.1: Integer Constants.
   // C99 6.4.4.2: Floating Constants.
-  case '0': case '1': case '2': case '3': case '4':
-  case '5': case '6': case '7': case '8': case '9':
+  case '0':
+  case '1':
+  case '2':
+  case '3':
+  case '4':
+  case '5':
+  case '6':
+  case '7':
+  case '8':
+  case '9':
     // Notify MIOpt that we read a non-whitespace/non-comment token.
     MIOpt.ReadToken();
     return LexNumericConstant(Result, CurPtr);
@@ -3927,24 +3933,26 @@ bool Lexer::LexTokenInternal(Token &Result, bool 
TokAtPhysicalStartOfLine) {
       // UTF-16 raw string literal
       if (Char == 'R' && LangOpts.RawStringLiterals &&
           getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == '"')
-        return LexRawStringLiteral(Result,
-                               ConsumeChar(ConsumeChar(CurPtr, SizeTmp, 
Result),
-                                           SizeTmp2, Result),
-                               tok::utf16_string_literal);
+        return LexRawStringLiteral(
+            Result,
+            ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), SizeTmp2, 
Result),
+            tok::utf16_string_literal);
 
       if (Char == '8') {
         char Char2 = getCharAndSize(CurPtr + SizeTmp, SizeTmp2);
 
         // UTF-8 string literal
         if (Char2 == '"')
-          return LexStringLiteral(Result,
-                               ConsumeChar(ConsumeChar(CurPtr, SizeTmp, 
Result),
-                                           SizeTmp2, Result),
-                               tok::utf8_string_literal);
+          return LexStringLiteral(
+              Result,
+              ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), SizeTmp2,
+                          Result),
+              tok::utf8_string_literal);
         if (Char2 == '\'' && (LangOpts.CPlusPlus17 || LangOpts.C23))
           return LexCharConstant(
-              Result, ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
-                                  SizeTmp2, Result),
+              Result,
+              ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), SizeTmp2,
+                          Result),
               tok::utf8_char_constant);
 
         if (Char2 == 'R' && LangOpts.RawStringLiterals) {
@@ -3952,11 +3960,12 @@ bool Lexer::LexTokenInternal(Token &Result, bool 
TokAtPhysicalStartOfLine) {
           char Char3 = getCharAndSize(CurPtr + SizeTmp + SizeTmp2, SizeTmp3);
           // UTF-8 raw string literal
           if (Char3 == '"') {
-            return LexRawStringLiteral(Result,
-                   ConsumeChar(ConsumeChar(ConsumeChar(CurPtr, SizeTmp, 
Result),
-                                           SizeTmp2, Result),
-                               SizeTmp3, Result),
-                   tok::utf8_string_literal);
+            return LexRawStringLiteral(
+                Result,
+                ConsumeChar(ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
+                                        SizeTmp2, Result),
+                            SizeTmp3, Result),
+                tok::utf8_string_literal);
           }
         }
       }
@@ -3985,10 +3994,10 @@ bool Lexer::LexTokenInternal(Token &Result, bool 
TokAtPhysicalStartOfLine) {
       // UTF-32 raw string literal
       if (Char == 'R' && LangOpts.RawStringLiterals &&
           getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == '"')
-        return LexRawStringLiteral(Result,
-                               ConsumeChar(ConsumeChar(CurPtr, SizeTmp, 
Result),
-                                           SizeTmp2, Result),
-                               tok::utf32_string_literal);
+        return LexRawStringLiteral(
+            Result,
+            ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), SizeTmp2, 
Result),
+            tok::utf32_string_literal);
     }
 
     // treat U like the start of an identifier.
@@ -4002,15 +4011,14 @@ bool Lexer::LexTokenInternal(Token &Result, bool 
TokAtPhysicalStartOfLine) {
       Char = getCharAndSize(CurPtr, SizeTmp);
 
       if (Char == '"')
-        return LexRawStringLiteral(Result,
-                                   ConsumeChar(CurPtr, SizeTmp, Result),
+        return LexRawStringLiteral(Result, ConsumeChar(CurPtr, SizeTmp, 
Result),
                                    tok::string_literal);
     }
 
     // treat R like the start of an identifier.
     return LexIdentifierContinue(Result, CurPtr);
 
-  case 'L':   // Identifier (Loony) or wide literal (L'x' or L"xyz").
+  case 'L': // Identifier (Loony) or wide literal (L'x' or L"xyz").
     // Notify MIOpt that we read a non-whitespace/non-comment token.
     MIOpt.ReadToken();
     Char = getCharAndSize(CurPtr, SizeTmp);
@@ -4023,10 +4031,10 @@ bool Lexer::LexTokenInternal(Token &Result, bool 
TokAtPhysicalStartOfLine) {
     // Wide raw string literal.
     if (LangOpts.RawStringLiterals && Char == 'R' &&
         getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == '"')
-      return LexRawStringLiteral(Result,
-                               ConsumeChar(ConsumeChar(CurPtr, SizeTmp, 
Result),
-                                           SizeTmp2, Result),
-                               tok::wide_string_literal);
+      return LexRawStringLiteral(
+          Result,
+          ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), SizeTmp2, Result),
+          tok::wide_string_literal);
 
     // Wide character constant.
     if (Char == '\'')
@@ -4036,23 +4044,63 @@ bool Lexer::LexTokenInternal(Token &Result, bool 
TokAtPhysicalStartOfLine) {
     [[fallthrough]];
 
   // C99 6.4.2: Identifiers.
-  case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
-  case 'H': case 'I': case 'J': case 'K':    /*'L'*/case 'M': case 'N':
-  case 'O': case 'P': case 'Q':    /*'R'*/case 'S': case 'T':    /*'U'*/
-  case 'V': case 'W': case 'X': case 'Y': case 'Z':
-  case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
-  case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
-  case 'o': case 'p': case 'q': case 'r': case 's': case 't':    /*'u'*/
-  case 'v': case 'w': case 'x': case 'y': case 'z':
+  case 'A':
+  case 'B':
+  case 'C':
+  case 'D':
+  case 'E':
+  case 'F':
+  case 'G':
+  case 'H':
+  case 'I':
+  case 'J':
+  case 'K': /*'L'*/
+  case 'M':
+  case 'N':
+  case 'O':
+  case 'P':
+  case 'Q': /*'R'*/
+  case 'S':
+  case 'T': /*'U'*/
+  case 'V':
+  case 'W':
+  case 'X':
+  case 'Y':
+  case 'Z':
+  case 'a':
+  case 'b':
+  case 'c':
+  case 'd':
+  case 'e':
+  case 'f':
+  case 'g':
+  case 'h':
+  case 'i':
+  case 'j':
+  case 'k':
+  case 'l':
+  case 'm':
+  case 'n':
+  case 'o':
+  case 'p':
+  case 'q':
+  case 'r':
+  case 's':
+  case 't': /*'u'*/
+  case 'v':
+  case 'w':
+  case 'x':
+  case 'y':
+  case 'z':
   case '_':
     // Notify MIOpt that we read a non-whitespace/non-comment token.
     MIOpt.ReadToken();
     return LexIdentifierContinue(Result, CurPtr);
 
-  case '$':   // $ in identifiers.
+  case '$': // $ in identifiers.
     if (LangOpts.DollarIdents) {
       if (!isLexingRawMode())
-        Diag(CurPtr-1, diag::ext_dollar_in_identifier);
+        Diag(CurPtr - 1, diag::ext_dollar_in_identifier);
       // Notify MIOpt that we read a non-whitespace/non-comment token.
       MIOpt.ReadToken();
       return LexIdentifierContinue(Result, CurPtr);
@@ -4108,10 +4156,10 @@ bool Lexer::LexTokenInternal(Token &Result, bool 
TokAtPhysicalStartOfLine) {
       Kind = tok::periodstar;
       CurPtr += SizeTmp;
     } else if (Char == '.' &&
-               getCharAndSize(CurPtr+SizeTmp, SizeTmp2) == '.') {
+               getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == '.') {
       Kind = tok::ellipsis;
-      CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
-                           SizeTmp2, Result);
+      CurPtr =
+          ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), SizeTmp2, Result);
     } else {
       Kind = tok::period;
     }
@@ -4150,18 +4198,18 @@ bool Lexer::LexTokenInternal(Token &Result, bool 
TokAtPhysicalStartOfLine) {
     break;
   case '-':
     Char = getCharAndSize(CurPtr, SizeTmp);
-    if (Char == '-') {      // --
+    if (Char == '-') { // --
       CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
       Kind = tok::minusminus;
     } else if (Char == '>' && LangOpts.CPlusPlus &&
-               getCharAndSize(CurPtr+SizeTmp, SizeTmp2) == '*') {  // C++ ->*
-      CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
-                           SizeTmp2, Result);
+               getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == '*') { // C++ ->*
+      CurPtr =
+          ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), SizeTmp2, Result);
       Kind = tok::arrowstar;
-    } else if (Char == '>') {   // ->
+    } else if (Char == '>') { // ->
       CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
       Kind = tok::arrow;
-    } else if (Char == '=') {   // -=
+    } else if (Char == '=') { // -=
       CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
       Kind = tok::minusequal;
     } else {
@@ -4182,7 +4230,7 @@ bool Lexer::LexTokenInternal(Token &Result, bool 
TokAtPhysicalStartOfLine) {
   case '/':
     // 6.4.9: Comments
     Char = getCharAndSize(CurPtr, SizeTmp);
-    if (Char == '/') {         // Line comment.
+    if (Char == '/') { // Line comment.
       // Even if Line comments are disabled (e.g. in C89 mode), we generally
       // want to lex this as a comment.  There is one problem with this though,
       // that in one particular corner case, this can change the behavior of 
the
@@ -4195,7 +4243,7 @@ bool Lexer::LexTokenInternal(Token &Result, bool 
TokAtPhysicalStartOfLine) {
           LineComment && (LangOpts.CPlusPlus || !LangOpts.TraditionalCPP);
       if (!TreatAsComment)
         if (!(PP && PP->isPreprocessedOutput()))
-          TreatAsComment = getCharAndSize(CurPtr+SizeTmp, SizeTmp2) != '*';
+          TreatAsComment = getCharAndSize(CurPtr + SizeTmp, SizeTmp2) != '*';
 
       if (TreatAsComment) {
         if (SkipLineComment(Result, ConsumeChar(CurPtr, SizeTmp, Result),
@@ -4209,7 +4257,7 @@ bool Lexer::LexTokenInternal(Token &Result, bool 
TokAtPhysicalStartOfLine) {
       }
     }
 
-    if (Char == '*') {  // /**/ comment.
+    if (Char == '*') { // /**/ comment.
       if (SkipBlockComment(Result, ConsumeChar(CurPtr, SizeTmp, Result),
                            TokAtPhysicalStartOfLine))
         return true; // There is a token to return.
@@ -4232,21 +4280,21 @@ bool Lexer::LexTokenInternal(Token &Result, bool 
TokAtPhysicalStartOfLine) {
       Kind = tok::percentequal;
       CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
     } else if (LangOpts.Digraphs && Char == '>') {
-      Kind = tok::r_brace;                             // '%>' -> '}'
+      Kind = tok::r_brace; // '%>' -> '}'
       CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
     } else if (LangOpts.Digraphs && Char == ':') {
       CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
       Char = getCharAndSize(CurPtr, SizeTmp);
-      if (Char == '%' && getCharAndSize(CurPtr+SizeTmp, SizeTmp2) == ':') {
-        Kind = tok::hashhash;                          // '%:%:' -> '##'
-        CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
-                             SizeTmp2, Result);
-      } else if (Char == '@' && LangOpts.MicrosoftExt) {// %:@ -> #@ -> Charize
+      if (Char == '%' && getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == ':') {
+        Kind = tok::hashhash; // '%:%:' -> '##'
+        CurPtr =
+            ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), SizeTmp2, 
Result);
+      } else if (Char == '@' && LangOpts.MicrosoftExt) { // %:@ -> #@ -> 
Charize
         CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
         if (!isLexingRawMode())
           Diag(BufferPtr, diag::ext_charize_microsoft);
         Kind = tok::hashat;
-      } else {                                         // '%:' -> '#'
+      } else { // '%:' -> '#'
         // We parsed a # character.  If this occurs at the start of the line,
         // it's actually the start of a preprocessing directive.  Callback to
         // the preprocessor to handle it.
@@ -4265,35 +4313,35 @@ bool Lexer::LexTokenInternal(Token &Result, bool 
TokAtPhysicalStartOfLine) {
     if (ParsingFilename) {
       return LexAngledStringLiteral(Result, CurPtr);
     } else if (Char == '<') {
-      char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2);
+      char After = getCharAndSize(CurPtr + SizeTmp, SizeTmp2);
       if (After == '=') {
         Kind = tok::lesslessequal;
-        CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
-                             SizeTmp2, Result);
-      } else if (After == '<' && IsStartOfConflictMarker(CurPtr-1)) {
+        CurPtr =
+            ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), SizeTmp2, 
Result);
+      } else if (After == '<' && IsStartOfConflictMarker(CurPtr - 1)) {
         // If this is actually a '<<<<<<<' version control conflict marker,
         // recognize it as such and recover nicely.
         goto LexNextToken;
-      } else if (After == '<' && HandleEndOfConflictMarker(CurPtr-1)) {
+      } else if (After == '<' && HandleEndOfConflictMarker(CurPtr - 1)) {
         // If this is '<<<<' and we're in a Perforce-style conflict marker,
         // ignore it.
         goto LexNextToken;
       } else if (LangOpts.CUDA && After == '<') {
         Kind = tok::lesslessless;
-        CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
-                             SizeTmp2, Result);
+        CurPtr =
+            ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), SizeTmp2, 
Result);
       } else {
         CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
         Kind = tok::lessless;
       }
     } else if (Char == '=') {
-      char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2);
+      char After = getCharAndSize(CurPtr + SizeTmp, SizeTmp2);
       if (After == '>') {
         if (LangOpts.CPlusPlus20) {
           if (!isLexingRawMode())
             Diag(BufferPtr, diag::warn_cxx17_compat_spaceship);
-          CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
-                               SizeTmp2, Result);
+          CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), SizeTmp2,
+                               Result);
           Kind = tok::spaceship;
           break;
         }
@@ -4301,13 +4349,13 @@ bool Lexer::LexTokenInternal(Token &Result, bool 
TokAtPhysicalStartOfLine) {
         // change in semantics if this turns up in C++ <=17 mode.
         if (LangOpts.CPlusPlus && !isLexingRawMode()) {
           Diag(BufferPtr, diag::warn_cxx20_compat_spaceship)
-            << FixItHint::CreateInsertion(
-                   getSourceLocation(CurPtr + SizeTmp, SizeTmp2), " ");
+              << FixItHint::CreateInsertion(
+                     getSourceLocation(CurPtr + SizeTmp, SizeTmp2), " ");
         }
       }
       CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
       Kind = tok::lessequal;
-    } else if (LangOpts.Digraphs && Char == ':') {     // '<:' -> '['
+    } else if (LangOpts.Digraphs && Char == ':') { // '<:' -> '['
       if (LangOpts.CPlusPlus11 &&
           getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == ':') {
         // C++0x [lex.pptoken]p3:
@@ -4327,7 +4375,7 @@ bool Lexer::LexTokenInternal(Token &Result, bool 
TokAtPhysicalStartOfLine) {
 
       CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
       Kind = tok::l_square;
-    } else if (LangOpts.Digraphs && Char == '%') {     // '<%' -> '{'
+    } else if (LangOpts.Digraphs && Char == '%') { // '<%' -> '{'
       CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
       Kind = tok::l_brace;
     } else if (Char == '#' && /*Not a trigraph*/ SizeTmp == 1 &&
@@ -4343,22 +4391,22 @@ bool Lexer::LexTokenInternal(Token &Result, bool 
TokAtPhysicalStartOfLine) {
       CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
       Kind = tok::greaterequal;
     } else if (Char == '>') {
-      char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2);
+      char After = getCharAndSize(CurPtr + SizeTmp, SizeTmp2);
       if (After == '=') {
-        CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
-                             SizeTmp2, Result);
+        CurPtr =
+            ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), SizeTmp2, 
Result);
         Kind = tok::greatergreaterequal;
-      } else if (After == '>' && IsStartOfConflictMarker(CurPtr-1)) {
+      } else if (After == '>' && IsStartOfConflictMarker(CurPtr - 1)) {
         // If this is actually a '>>>>' conflict marker, recognize it as such
         // and recover nicely.
         goto LexNextToken;
-      } else if (After == '>' && HandleEndOfConflictMarker(CurPtr-1)) {
+      } else if (After == '>' && HandleEndOfConflictMarker(CurPtr - 1)) {
         // If this is '>>>>>>>' and we're in a conflict marker, ignore it.
         goto LexNextToken;
       } else if (LangOpts.CUDA && After == '>') {
         Kind = tok::greatergreatergreater;
-        CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
-                             SizeTmp2, Result);
+        CurPtr =
+            ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), SizeTmp2, 
Result);
       } else {
         CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
         Kind = tok::greatergreater;
@@ -4385,7 +4433,7 @@ bool Lexer::LexTokenInternal(Token &Result, bool 
TokAtPhysicalStartOfLine) {
       CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
     } else if (Char == '|') {
       // If this is '|||||||' and we're in a conflict marker, ignore it.
-      if (CurPtr[1] == '|' && HandleEndOfConflictMarker(CurPtr-1))
+      if (CurPtr[1] == '|' && HandleEndOfConflictMarker(CurPtr - 1))
         goto LexNextToken;
       Kind = tok::pipepipe;
       CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
@@ -4412,7 +4460,7 @@ bool Lexer::LexTokenInternal(Token &Result, bool 
TokAtPhysicalStartOfLine) {
     Char = getCharAndSize(CurPtr, SizeTmp);
     if (Char == '=') {
       // If this is '====' and we're in a conflict marker, ignore it.
-      if (CurPtr[1] == '=' && HandleEndOfConflictMarker(CurPtr-1))
+      if (CurPtr[1] == '=' && HandleEndOfConflictMarker(CurPtr - 1))
         goto LexNextToken;
 
       Kind = tok::equalequal;
@@ -4429,7 +4477,7 @@ bool Lexer::LexTokenInternal(Token &Result, bool 
TokAtPhysicalStartOfLine) {
     if (Char == '#') {
       Kind = tok::hashhash;
       CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
-    } else if (Char == '@' && LangOpts.MicrosoftExt) {  // #@ -> Charize
+    } else if (Char == '@' && LangOpts.MicrosoftExt) { // #@ -> Charize
       Kind = tok::hashat;
       if (!isLexingRawMode())
         Diag(BufferPtr, diag::ext_charize_microsoft);
@@ -4485,11 +4533,9 @@ bool Lexer::LexTokenInternal(Token &Result, bool 
TokAtPhysicalStartOfLine) {
     // We can't just reset CurPtr to BufferPtr because BufferPtr may point to
     // an escaped newline.
     --CurPtr;
-    llvm::ConversionResult Status =
-        llvm::convertUTF8Sequence((const llvm::UTF8 **)&CurPtr,
-                                  (const llvm::UTF8 *)BufferEnd,
-                                  &CodePoint,
-                                  llvm::strictConversion);
+    llvm::ConversionResult Status = llvm::convertUTF8Sequence(
+        (const llvm::UTF8 **)&CurPtr, (const llvm::UTF8 *)BufferEnd, 
&CodePoint,
+        llvm::strictConversion);
     if (Status == llvm::conversionOK) {
       if (CheckUnicodeWhitespace(Result, CodePoint, CurPtr)) {
         if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))
@@ -4514,7 +4560,7 @@ bool Lexer::LexTokenInternal(Token &Result, bool 
TokAtPhysicalStartOfLine) {
     // just diagnose the invalid UTF-8, then drop the character.
     Diag(CurPtr, diag::err_invalid_utf8);
 
-    BufferPtr = CurPtr+1;
+    BufferPtr = CurPtr + 1;
     // We're pretending the character didn't exist, so just try again with
     // this lexer.
     // (We manually eliminate the tail call to avoid recursion.)

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [Clang] [Lexer] Detect SSE4.2 availability at runtime in fastParseASCIIIdentifier (PR #171914)

Reply via email to