https://github.com/Thibault-Monnier updated https://github.com/llvm/llvm-project/pull/171914
>From 4fc9a07698e1a4627a050ba6fa9df3f1f8725451 Mon Sep 17 00:00:00 2001 From: Thibault-Monnier <[email protected]> Date: Thu, 11 Dec 2025 22:02:35 +0100 Subject: [PATCH 1/6] Detect sse4.2 availability at runtime to use it on modern processors --- clang/lib/Lex/Lexer.cpp | 35 ++++++++++++++++++++++++++--------- 1 file changed, 26 insertions(+), 9 deletions(-) diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp index b282a600c0e56..3b8fa0b9b7f36 100644 --- a/clang/lib/Lex/Lexer.cpp +++ b/clang/lib/Lex/Lexer.cpp @@ -46,9 +46,7 @@ #include <string> #include <tuple> -#ifdef __SSE4_2__ #include <nmmintrin.h> -#endif using namespace clang; @@ -1921,9 +1919,17 @@ bool Lexer::LexUnicodeIdentifierStart(Token &Result, uint32_t C, } static const char * -fastParseASCIIIdentifier(const char *CurPtr, - [[maybe_unused]] const char *BufferEnd) { -#ifdef __SSE4_2__ +fastParseASCIIIdentifierScalar(const char *CurPtr, + [[maybe_unused]] const char *BufferEnd) { + unsigned char C = *CurPtr; + while (isAsciiIdentifierContinue(C)) + C = *++CurPtr; + return CurPtr; +} + +__attribute__((target("sse4.2"))) static const char * +fastParseASCIIIdentifierSSE42(const char *CurPtr, + [[maybe_unused]] const char *BufferEnd) { alignas(16) static constexpr char AsciiIdentifierRange[16] = { '_', '_', 'A', 'Z', 'a', 'z', '0', '9', }; @@ -1943,12 +1949,23 @@ fastParseASCIIIdentifier(const char *CurPtr, continue; return CurPtr; } + + return fastParseASCIIIdentifierScalar(CurPtr, BufferEnd); +} + +static bool supportsSSE42() { + static bool SupportsSSE42 = __builtin_cpu_supports("sse4.2"); + return SupportsSSE42; +} + +static const char *fastParseASCIIIdentifier(const char *CurPtr, + const char *BufferEnd) { +#ifndef __SSE4_2__ + if (LLVM_UNLIKELY(!supportsSSE42())) + return fastParseASCIIIdentifierScalar(CurPtr, BufferEnd); #endif - unsigned char C = *CurPtr; - while (isAsciiIdentifierContinue(C)) - C = *++CurPtr; - return CurPtr; + return fastParseASCIIIdentifierSSE42(CurPtr, BufferEnd); } bool Lexer::LexIdentifierContinue(Token &Result, const char *CurPtr) { >From ce3bf515e7a60bd58ff5871352979999f5864b4b Mon Sep 17 00:00:00 2001 From: Thibault-Monnier <[email protected]> Date: Thu, 11 Dec 2025 23:15:40 +0100 Subject: [PATCH 2/6] Only on x86 --- clang/lib/Lex/Lexer.cpp | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp index 3b8fa0b9b7f36..c195237dae1f4 100644 --- a/clang/lib/Lex/Lexer.cpp +++ b/clang/lib/Lex/Lexer.cpp @@ -46,7 +46,9 @@ #include <string> #include <tuple> +#if defined(__i386__) || defined(__x86_64__) #include <nmmintrin.h> +#endif using namespace clang; @@ -1927,6 +1929,8 @@ fastParseASCIIIdentifierScalar(const char *CurPtr, return CurPtr; } +#if defined(__i386__) || defined(__x86_64__) + __attribute__((target("sse4.2"))) static const char * fastParseASCIIIdentifierSSE42(const char *CurPtr, [[maybe_unused]] const char *BufferEnd) { @@ -1958,14 +1962,22 @@ static bool supportsSSE42() { return SupportsSSE42; } +#endif + static const char *fastParseASCIIIdentifier(const char *CurPtr, const char *BufferEnd) { +#if !defined(__i386__) && !defined(__x86_64__) + return fastParseASCIIIdentifierScalar(CurPtr, BufferEnd); +#else + #ifndef __SSE4_2__ if (LLVM_UNLIKELY(!supportsSSE42())) return fastParseASCIIIdentifierScalar(CurPtr, BufferEnd); #endif return fastParseASCIIIdentifierSSE42(CurPtr, BufferEnd); + +#endif } bool Lexer::LexIdentifierContinue(Token &Result, const char *CurPtr) { >From 2109fdd371822ec77f870c5edbbdfccaaa7615be Mon Sep 17 00:00:00 2001 From: Thibault-Monnier <[email protected]> Date: Sun, 14 Dec 2025 11:32:30 +0100 Subject: [PATCH 3/6] Not on windows --- clang/lib/Lex/Lexer.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp index c195237dae1f4..86cfb47ca84d5 100644 --- a/clang/lib/Lex/Lexer.cpp +++ b/clang/lib/Lex/Lexer.cpp @@ -36,6 +36,7 @@ #include "llvm/Support/NativeFormatting.h" #include "llvm/Support/Unicode.h" #include "llvm/Support/UnicodeCharRanges.h" + #include <algorithm> #include <cassert> #include <cstddef> @@ -1929,7 +1930,7 @@ fastParseASCIIIdentifierScalar(const char *CurPtr, return CurPtr; } -#if defined(__i386__) || defined(__x86_64__) +#if defined(__i386__) || defined(__x86_64__) && !defined(_WIN32) __attribute__((target("sse4.2"))) static const char * fastParseASCIIIdentifierSSE42(const char *CurPtr, @@ -1966,7 +1967,7 @@ static bool supportsSSE42() { static const char *fastParseASCIIIdentifier(const char *CurPtr, const char *BufferEnd) { -#if !defined(__i386__) && !defined(__x86_64__) +#if !defined(__i386__) && !defined(__x86_64__) || defined(_WIN32) return fastParseASCIIIdentifierScalar(CurPtr, BufferEnd); #else >From d5485438edd460892bf210916827e0d92fc24065 Mon Sep 17 00:00:00 2001 From: Thibault-Monnier <[email protected]> Date: Sun, 14 Dec 2025 14:37:43 +0100 Subject: [PATCH 4/6] Address comments --- clang/lib/Lex/Lexer.cpp | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp index 86cfb47ca84d5..470579df233d1 100644 --- a/clang/lib/Lex/Lexer.cpp +++ b/clang/lib/Lex/Lexer.cpp @@ -1930,7 +1930,12 @@ fastParseASCIIIdentifierScalar(const char *CurPtr, return CurPtr; } -#if defined(__i386__) || defined(__x86_64__) && !defined(_WIN32) +// Fast path for lexing ASCII identifiers using SSE4.2 instructions. +// Only enabled on x86/x86_64 when building with a compiler that supports +// the 'target' attribute, which is used for runtime dispatch. Otherwise, we +// fall back to the scalar implementation. +#if (defined(__i386__) || defined(__x86_64__)) && defined(__has_attribute) && \ + __has_attribute(target) __attribute__((target("sse4.2"))) static const char * fastParseASCIIIdentifierSSE42(const char *CurPtr, @@ -1958,27 +1963,16 @@ fastParseASCIIIdentifierSSE42(const char *CurPtr, return fastParseASCIIIdentifierScalar(CurPtr, BufferEnd); } -static bool supportsSSE42() { - static bool SupportsSSE42 = __builtin_cpu_supports("sse4.2"); - return SupportsSSE42; +__attribute__((target("sse4.2"))) static const char * +fastParseASCIIIdentifier(const char *CurPtr, const char *BufferEnd) { + return fastParseASCIIIdentifierSSE42(CurPtr, BufferEnd); } +__attribute__((target("default"))) #endif - static const char *fastParseASCIIIdentifier(const char *CurPtr, const char *BufferEnd) { -#if !defined(__i386__) && !defined(__x86_64__) || defined(_WIN32) return fastParseASCIIIdentifierScalar(CurPtr, BufferEnd); -#else - -#ifndef __SSE4_2__ - if (LLVM_UNLIKELY(!supportsSSE42())) - return fastParseASCIIIdentifierScalar(CurPtr, BufferEnd); -#endif - - return fastParseASCIIIdentifierSSE42(CurPtr, BufferEnd); - -#endif } bool Lexer::LexIdentifierContinue(Token &Result, const char *CurPtr) { >From 82cf41e460d2fa1105e9abbf925837fa9e9c7b45 Mon Sep 17 00:00:00 2001 From: Thibault-Monnier <[email protected]> Date: Sun, 14 Dec 2025 19:32:29 +0100 Subject: [PATCH 5/6] Not on MSVC --- clang/lib/Lex/Lexer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp index 470579df233d1..58cd9348d3027 100644 --- a/clang/lib/Lex/Lexer.cpp +++ b/clang/lib/Lex/Lexer.cpp @@ -1935,7 +1935,7 @@ fastParseASCIIIdentifierScalar(const char *CurPtr, // the 'target' attribute, which is used for runtime dispatch. Otherwise, we // fall back to the scalar implementation. #if (defined(__i386__) || defined(__x86_64__)) && defined(__has_attribute) && \ - __has_attribute(target) + __has_attribute(target) && !defined(_MSC_VER) __attribute__((target("sse4.2"))) static const char * fastParseASCIIIdentifierSSE42(const char *CurPtr, >From 3e1428de60068729a17e5e3cca3942aa175ab975 Mon Sep 17 00:00:00 2001 From: Thibault-Monnier <[email protected]> Date: Sun, 28 Dec 2025 17:42:50 +0100 Subject: [PATCH 6/6] Clean up --- clang/lib/Lex/Lexer.cpp | 720 +++++++++++++++++++++------------------- 1 file changed, 383 insertions(+), 337 deletions(-) diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp index 58cd9348d3027..7d3731812cf68 100644 --- a/clang/lib/Lex/Lexer.cpp +++ b/clang/lib/Lex/Lexer.cpp @@ -106,6 +106,7 @@ bool Token::isSimpleTypeSpecifier(const LangOptions &LangOpts) const { case tok::kw__Sat: #define TRANSFORM_TYPE_TRAIT_DEF(_, Trait) case tok::kw___##Trait: #include "clang/Basic/TransformTypeTraits.def" + case tok::kw___auto_type: case tok::kw_char16_t: case tok::kw_char32_t: @@ -142,8 +143,8 @@ void Lexer::InitLexer(const char *BufStart, const char *BufPtr, // Determine the size of the BOM. StringRef Buf(BufferStart, BufferEnd - BufferStart); size_t BOMLength = llvm::StringSwitch<size_t>(Buf) - .StartsWith("\xEF\xBB\xBF", 3) // UTF-8 BOM - .Default(0); + .StartsWith("\xEF\xBB\xBF", 3) // UTF-8 BOM + .Default(0); // Skip the BOM. BufferPtr += BOMLength; @@ -257,14 +258,14 @@ Lexer *Lexer::Create_PragmaLexer(SourceLocation SpellingLoc, const char *StrData = SM.getCharacterData(SpellingLoc); L->BufferPtr = StrData; - L->BufferEnd = StrData+TokLen; + L->BufferEnd = StrData + TokLen; assert(L->BufferEnd[0] == 0 && "Buffer is not nul terminated!"); // Set the SourceLocation with the remapping information. This ensures that // GetMappedTokenLoc will remap the tokens as they are lexed. - L->FileLoc = SM.createExpansionLoc(SM.getLocForStartOfFile(SpellingFID), - ExpansionLocStart, - ExpansionLocEnd, TokLen); + L->FileLoc = + SM.createExpansionLoc(SM.getLocForStartOfFile(SpellingFID), + ExpansionLocStart, ExpansionLocEnd, TokLen); // Ensure that the lexer thinks it is inside a directive, so that end \n will // return an EOD token. @@ -343,12 +344,14 @@ static size_t getSpellingSlow(const Token &Tok, const char *BufPtr, // Raw string literals need special handling; trigraph expansion and line // splicing do not occur within their d-char-sequence nor within their // r-char-sequence. - if (Length >= 2 && - Spelling[Length - 2] == 'R' && Spelling[Length - 1] == '"') { + if (Length >= 2 && Spelling[Length - 2] == 'R' && + Spelling[Length - 1] == '"') { // Search backwards from the end of the token to find the matching closing // quote. const char *RawEnd = BufEnd; - do --RawEnd; while (*RawEnd != '"'); + do + --RawEnd; + while (*RawEnd != '"'); size_t RawLength = RawEnd - BufPtr + 1; // Everything between the quotes is included verbatim in the spelling. @@ -376,11 +379,9 @@ static size_t getSpellingSlow(const Token &Tok, const char *BufPtr, /// after trigraph expansion and escaped-newline folding. In particular, this /// wants to get the true, uncanonicalized, spelling of things like digraphs /// UCNs, etc. -StringRef Lexer::getSpelling(SourceLocation loc, - SmallVectorImpl<char> &buffer, +StringRef Lexer::getSpelling(SourceLocation loc, SmallVectorImpl<char> &buffer, const SourceManager &SM, - const LangOptions &options, - bool *invalid) { + const LangOptions &options, bool *invalid) { // Break down the source location. FileIDAndOffset locInfo = SM.getDecomposedLoc(loc); @@ -388,15 +389,16 @@ StringRef Lexer::getSpelling(SourceLocation loc, bool invalidTemp = false; StringRef file = SM.getBufferData(locInfo.first, &invalidTemp); if (invalidTemp) { - if (invalid) *invalid = true; + if (invalid) + *invalid = true; return {}; } const char *tokenBegin = file.data() + locInfo.second; // Lex from the start of the given location. - Lexer lexer(SM.getLocForStartOfFile(locInfo.first), options, - file.begin(), tokenBegin, file.end()); + Lexer lexer(SM.getLocForStartOfFile(locInfo.first), options, file.begin(), + tokenBegin, file.end()); Token token; lexer.LexFromRawLexer(token); @@ -422,8 +424,8 @@ std::string Lexer::getSpelling(const Token &Tok, const SourceManager &SourceMgr, assert((int)Tok.getLength() >= 0 && "Token character range is bogus!"); bool CharDataInvalid = false; - const char *TokStart = SourceMgr.getCharacterData(Tok.getLocation(), - &CharDataInvalid); + const char *TokStart = + SourceMgr.getCharacterData(Tok.getLocation(), &CharDataInvalid); if (Invalid) *Invalid = CharDataInvalid; if (CharDataInvalid) @@ -489,15 +491,14 @@ unsigned Lexer::getSpelling(const Token &Tok, const char *&Buffer, } // Otherwise, hard case, relex the characters into the string. - return getSpellingSlow(Tok, TokStart, LangOpts, const_cast<char*>(Buffer)); + return getSpellingSlow(Tok, TokStart, LangOpts, const_cast<char *>(Buffer)); } /// MeasureTokenLength - Relex the token at the specified location and return /// its length in bytes in the input file. If the token needs cleaning (e.g. /// includes a trigraph or an escaped newline) then this count includes bytes /// that are part of that. -unsigned Lexer::MeasureTokenLength(SourceLocation Loc, - const SourceManager &SM, +unsigned Lexer::MeasureTokenLength(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts) { Token TheTok; if (getRawToken(Loc, TheTok, SM, LangOpts)) @@ -508,8 +509,7 @@ unsigned Lexer::MeasureTokenLength(SourceLocation Loc, /// Relex the token at the specified location. /// \returns true if there was a failure, false on success. bool Lexer::getRawToken(SourceLocation Loc, Token &Result, - const SourceManager &SM, - const LangOptions &LangOpts, + const SourceManager &SM, const LangOptions &LangOpts, bool IgnoreWhiteSpace) { // TODO: this could be special cased for common tokens like identifiers, ')', // etc to make this faster, if it mattered. Just look at StrData[0] to handle @@ -526,7 +526,7 @@ bool Lexer::getRawToken(SourceLocation Loc, Token &Result, if (Invalid) return true; - const char *StrData = Buffer.data()+LocInfo.second; + const char *StrData = Buffer.data() + LocInfo.second; if (!IgnoreWhiteSpace && isWhitespace(SkipEscapedNewLines(StrData)[0])) return true; @@ -626,10 +626,7 @@ SourceLocation Lexer::GetBeginningOfToken(SourceLocation Loc, namespace { -enum PreambleDirectiveKind { - PDK_Skipped, - PDK_Unknown -}; +enum PreambleDirectiveKind { PDK_Skipped, PDK_Unknown }; } // namespace @@ -713,31 +710,31 @@ PreambleBounds Lexer::ComputePreamble(StringRef Buffer, TheLexer.LexFromRawLexer(TheTok); if (TheTok.getKind() == tok::raw_identifier && !TheTok.needsCleaning()) { StringRef Keyword = TheTok.getRawIdentifier(); - PreambleDirectiveKind PDK - = llvm::StringSwitch<PreambleDirectiveKind>(Keyword) - .Case("include", PDK_Skipped) - .Case("__include_macros", PDK_Skipped) - .Case("define", PDK_Skipped) - .Case("undef", PDK_Skipped) - .Case("line", PDK_Skipped) - .Case("error", PDK_Skipped) - .Case("pragma", PDK_Skipped) - .Case("import", PDK_Skipped) - .Case("include_next", PDK_Skipped) - .Case("warning", PDK_Skipped) - .Case("ident", PDK_Skipped) - .Case("sccs", PDK_Skipped) - .Case("assert", PDK_Skipped) - .Case("unassert", PDK_Skipped) - .Case("if", PDK_Skipped) - .Case("ifdef", PDK_Skipped) - .Case("ifndef", PDK_Skipped) - .Case("elif", PDK_Skipped) - .Case("elifdef", PDK_Skipped) - .Case("elifndef", PDK_Skipped) - .Case("else", PDK_Skipped) - .Case("endif", PDK_Skipped) - .Default(PDK_Unknown); + PreambleDirectiveKind PDK = + llvm::StringSwitch<PreambleDirectiveKind>(Keyword) + .Case("include", PDK_Skipped) + .Case("__include_macros", PDK_Skipped) + .Case("define", PDK_Skipped) + .Case("undef", PDK_Skipped) + .Case("line", PDK_Skipped) + .Case("error", PDK_Skipped) + .Case("pragma", PDK_Skipped) + .Case("import", PDK_Skipped) + .Case("include_next", PDK_Skipped) + .Case("warning", PDK_Skipped) + .Case("ident", PDK_Skipped) + .Case("sccs", PDK_Skipped) + .Case("assert", PDK_Skipped) + .Case("unassert", PDK_Skipped) + .Case("if", PDK_Skipped) + .Case("ifdef", PDK_Skipped) + .Case("ifndef", PDK_Skipped) + .Case("elif", PDK_Skipped) + .Case("elifdef", PDK_Skipped) + .Case("elifndef", PDK_Skipped) + .Case("else", PDK_Skipped) + .Case("endif", PDK_Skipped) + .Default(PDK_Unknown); switch (PDK) { case PDK_Skipped: @@ -826,7 +823,7 @@ unsigned Lexer::getTokenPrefixLength(SourceLocation TokStart, unsigned CharNo, // advanced by 3 should return the location of b, not of \\. One compounding // detail of this is that the escape may be made by a trigraph. if (!Lexer::isObviouslySimpleCharacter(*TokPtr)) - PhysOffset += Lexer::SkipEscapedNewLines(TokPtr)-TokPtr; + PhysOffset += Lexer::SkipEscapedNewLines(TokPtr) - TokPtr; return PhysOffset; } @@ -890,8 +887,7 @@ bool Lexer::isAtStartOfMacroExpansion(SourceLocation loc, /// Returns true if the given MacroID location points at the last /// token of the macro expansion. -bool Lexer::isAtEndOfMacroExpansion(SourceLocation loc, - const SourceManager &SM, +bool Lexer::isAtEndOfMacroExpansion(SourceLocation loc, const SourceManager &SM, const LangOptions &LangOpts, SourceLocation *MacroEnd) { assert(loc.isValid() && loc.isMacroID() && "Expected a valid macro loc"); @@ -923,7 +919,7 @@ static CharSourceRange makeRangeFromFileLocs(CharSourceRange Range, SourceLocation End = Range.getEnd(); assert(Begin.isFileID() && End.isFileID()); if (Range.isTokenRange()) { - End = Lexer::getLocForEndOfToken(End, 0, SM,LangOpts); + End = Lexer::getLocForEndOfToken(End, 0, SM, LangOpts); if (End.isInvalid()) return {}; } @@ -934,8 +930,7 @@ static CharSourceRange makeRangeFromFileLocs(CharSourceRange Range, return {}; unsigned EndOffs; - if (!SM.isInFileID(End, FID, &EndOffs) || - BeginOffs > EndOffs) + if (!SM.isInFileID(End, FID, &EndOffs) || BeginOffs > EndOffs) return {}; return CharSourceRange::getCharRange(Begin, End); @@ -982,10 +977,10 @@ CharSourceRange Lexer::makeFileCharRange(CharSourceRange Range, assert(Begin.isMacroID() && End.isMacroID()); SourceLocation MacroBegin, MacroEnd; if (isAtStartOfMacroExpansion(Begin, SM, LangOpts, &MacroBegin) && - ((Range.isTokenRange() && isAtEndOfMacroExpansion(End, SM, LangOpts, - &MacroEnd)) || - (Range.isCharRange() && isAtStartOfMacroExpansion(End, SM, LangOpts, - &MacroEnd)))) { + ((Range.isTokenRange() && + isAtEndOfMacroExpansion(End, SM, LangOpts, &MacroEnd)) || + (Range.isCharRange() && + isAtStartOfMacroExpansion(End, SM, LangOpts, &MacroEnd)))) { Range.setBegin(MacroBegin); Range.setEnd(MacroEnd); // Use the *original* `End`, not the expanded one in `MacroEnd`. @@ -995,14 +990,14 @@ CharSourceRange Lexer::makeFileCharRange(CharSourceRange Range, } bool Invalid = false; - const SrcMgr::SLocEntry &BeginEntry = SM.getSLocEntry(SM.getFileID(Begin), - &Invalid); + const SrcMgr::SLocEntry &BeginEntry = + SM.getSLocEntry(SM.getFileID(Begin), &Invalid); if (Invalid) return {}; if (BeginEntry.getExpansion().isMacroArgExpansion()) { - const SrcMgr::SLocEntry &EndEntry = SM.getSLocEntry(SM.getFileID(End), - &Invalid); + const SrcMgr::SLocEntry &EndEntry = + SM.getSLocEntry(SM.getFileID(End), &Invalid); if (Invalid) return {}; @@ -1018,27 +1013,28 @@ CharSourceRange Lexer::makeFileCharRange(CharSourceRange Range, return {}; } -StringRef Lexer::getSourceText(CharSourceRange Range, - const SourceManager &SM, - const LangOptions &LangOpts, - bool *Invalid) { +StringRef Lexer::getSourceText(CharSourceRange Range, const SourceManager &SM, + const LangOptions &LangOpts, bool *Invalid) { Range = makeFileCharRange(Range, SM, LangOpts); if (Range.isInvalid()) { - if (Invalid) *Invalid = true; + if (Invalid) + *Invalid = true; return {}; } // Break down the source location. FileIDAndOffset beginInfo = SM.getDecomposedLoc(Range.getBegin()); if (beginInfo.first.isInvalid()) { - if (Invalid) *Invalid = true; + if (Invalid) + *Invalid = true; return {}; } unsigned EndOffs; if (!SM.isInFileID(Range.getEnd(), beginInfo.first, &EndOffs) || beginInfo.second > EndOffs) { - if (Invalid) *Invalid = true; + if (Invalid) + *Invalid = true; return {}; } @@ -1046,11 +1042,13 @@ StringRef Lexer::getSourceText(CharSourceRange Range, bool invalidTemp = false; StringRef file = SM.getBufferData(beginInfo.first, &invalidTemp); if (invalidTemp) { - if (Invalid) *Invalid = true; + if (Invalid) + *Invalid = true; return {}; } - if (Invalid) *Invalid = false; + if (Invalid) + *Invalid = false; return file.substr(beginInfo.second, EndOffs - beginInfo.second); } @@ -1184,8 +1182,8 @@ StringRef Lexer::getIndentationForLine(SourceLocation Loc, static LLVM_ATTRIBUTE_NOINLINE SourceLocation GetMappedTokenLoc( Preprocessor &PP, SourceLocation FileLoc, unsigned CharNo, unsigned TokLen); static SourceLocation GetMappedTokenLoc(Preprocessor &PP, - SourceLocation FileLoc, - unsigned CharNo, unsigned TokLen) { + SourceLocation FileLoc, unsigned CharNo, + unsigned TokLen) { assert(FileLoc.isMacroID() && "Must be a macro expansion"); // Otherwise, we're lexing "mapped tokens". This is used for things like @@ -1214,7 +1212,7 @@ SourceLocation Lexer::getSourceLocation(const char *Loc, // In the normal case, we're just lexing from a simple file buffer, return // the file id from FileLoc with the offset specified. - unsigned CharNo = Loc-BufferStart; + unsigned CharNo = Loc - BufferStart; if (FileLoc.isFileID()) return FileLoc.getLocWithOffset(CharNo); @@ -1238,16 +1236,26 @@ DiagnosticBuilder Lexer::Diag(const char *Loc, unsigned DiagID) const { /// return the decoded trigraph letter it corresponds to, or '\0' if nothing. static char GetTrigraphCharForLetter(char Letter) { switch (Letter) { - default: return 0; - case '=': return '#'; - case ')': return ']'; - case '(': return '['; - case '!': return '|'; - case '\'': return '^'; - case '>': return '}'; - case '/': return '\\'; - case '<': return '{'; - case '-': return '~'; + default: + return 0; + case '=': + return '#'; + case ')': + return ']'; + case '(': + return '['; + case '!': + return '|'; + case '\'': + return '^'; + case '>': + return '}'; + case '/': + return '\\'; + case '<': + return '{'; + case '-': + return '~'; } } @@ -1262,12 +1270,12 @@ static char DecodeTrigraphChar(const char *CP, Lexer *L, bool Trigraphs) { if (!Trigraphs) { if (L && !L->isLexingRawMode()) - L->Diag(CP-2, diag::trigraph_ignored); + L->Diag(CP - 2, diag::trigraph_ignored); return 0; } if (L && !L->isLexingRawMode()) - L->Diag(CP-2, diag::trigraph_converted) << StringRef(&Res, 1); + L->Diag(CP - 2, diag::trigraph_converted) << StringRef(&Res, 1); return Res; } @@ -1279,12 +1287,11 @@ unsigned Lexer::getEscapedNewLineSize(const char *Ptr) { while (isWhitespace(Ptr[Size])) { ++Size; - if (Ptr[Size-1] != '\n' && Ptr[Size-1] != '\r') + if (Ptr[Size - 1] != '\n' && Ptr[Size - 1] != '\r') continue; // If this is a \r\n or \n\r, skip the other half. - if ((Ptr[Size] == '\r' || Ptr[Size] == '\n') && - Ptr[Size-1] != Ptr[Size]) + if ((Ptr[Size] == '\r' || Ptr[Size] == '\n') && Ptr[Size - 1] != Ptr[Size]) ++Size; return Size; @@ -1301,21 +1308,22 @@ const char *Lexer::SkipEscapedNewLines(const char *P) { while (true) { const char *AfterEscape; if (*P == '\\') { - AfterEscape = P+1; + AfterEscape = P + 1; } else if (*P == '?') { // If not a trigraph for escape, bail out. if (P[1] != '?' || P[2] != '/') return P; // FIXME: Take LangOpts into account; the language might not // support trigraphs. - AfterEscape = P+3; + AfterEscape = P + 3; } else { return P; } unsigned NewLineSize = Lexer::getEscapedNewLineSize(AfterEscape); - if (NewLineSize == 0) return P; - P = AfterEscape+NewLineSize; + if (NewLineSize == 0) + return P; + P = AfterEscape + NewLineSize; } } @@ -1342,7 +1350,7 @@ std::optional<Token> Lexer::findNextToken(SourceLocation Loc, // Lex from the start of the given location. Lexer lexer(SM.getLocForStartOfFile(LocInfo.first), LangOpts, File.begin(), - TokenBegin, File.end()); + TokenBegin, File.end()); lexer.SetCommentRetentionState(IncludeComments); // Find the token. Token Tok; @@ -1427,7 +1435,7 @@ Lexer::SizedChar Lexer::getCharAndSizeSlow(const char *Ptr, Token *Tok) { if (Ptr[0] == '\\') { ++Size; ++Ptr; -Slash: + Slash: // Common case, backslash-char where the char is not whitespace. if (!isWhitespace(Ptr[0])) return {'\\', Size}; @@ -1436,7 +1444,8 @@ Lexer::SizedChar Lexer::getCharAndSizeSlow(const char *Ptr, Token *Tok) { // newline. if (unsigned EscapedNewLineSize = getEscapedNewLineSize(Ptr)) { // Remember that this token needs to be cleaned. - if (Tok) Tok->setFlag(Token::NeedsCleaning); + if (Tok) + Tok->setFlag(Token::NeedsCleaning); // Warn if there was whitespace between the backslash and newline. if (Ptr[0] != '\n' && Ptr[0] != '\r' && Tok && !isLexingRawMode()) @@ -1444,7 +1453,7 @@ Lexer::SizedChar Lexer::getCharAndSizeSlow(const char *Ptr, Token *Tok) { // Found backslash<whitespace><newline>. Parse the char after it. Size += EscapedNewLineSize; - Ptr += EscapedNewLineSize; + Ptr += EscapedNewLineSize; // Use slow version to accumulate a correct size field. auto CharAndSize = getCharAndSizeSlow(Ptr, Tok); @@ -1463,11 +1472,13 @@ Lexer::SizedChar Lexer::getCharAndSizeSlow(const char *Ptr, Token *Tok) { if (char C = DecodeTrigraphChar(Ptr + 2, Tok ? this : nullptr, LangOpts.Trigraphs)) { // Remember that this token needs to be cleaned. - if (Tok) Tok->setFlag(Token::NeedsCleaning); + if (Tok) + Tok->setFlag(Token::NeedsCleaning); Ptr += 3; Size += 3; - if (C == '\\') goto Slash; + if (C == '\\') + goto Slash; return {C, Size}; } } @@ -1490,7 +1501,7 @@ Lexer::SizedChar Lexer::getCharAndSizeSlowNoWarn(const char *Ptr, if (Ptr[0] == '\\') { ++Size; ++Ptr; -Slash: + Slash: // Common case, backslash-char where the char is not whitespace. if (!isWhitespace(Ptr[0])) return {'\\', Size}; @@ -1499,7 +1510,7 @@ Lexer::SizedChar Lexer::getCharAndSizeSlowNoWarn(const char *Ptr, if (unsigned EscapedNewLineSize = getEscapedNewLineSize(Ptr)) { // Found backslash<whitespace><newline>. Parse the char after it. Size += EscapedNewLineSize; - Ptr += EscapedNewLineSize; + Ptr += EscapedNewLineSize; // Use slow version to accumulate a correct size field. auto CharAndSize = getCharAndSizeSlowNoWarn(Ptr, LangOpts); @@ -1518,7 +1529,8 @@ Lexer::SizedChar Lexer::getCharAndSizeSlowNoWarn(const char *Ptr, if (char C = GetTrigraphCharForLetter(Ptr[2])) { Ptr += 3; Size += 3; - if (C == '\\') goto Slash; + if (C == '\\') + goto Slash; return {C, Size}; } } @@ -1656,10 +1668,7 @@ static void maybeDiagnoseIDCharCompat(DiagnosticsEngine &Diags, uint32_t C, CharSourceRange Range, bool IsFirst) { // Check C99 compatibility. if (!Diags.isIgnored(diag::warn_c99_compat_unicode_id, Range.getBegin())) { - enum { - CannotAppearInIdentifier = 0, - CannotStartIdentifier - }; + enum { CannotAppearInIdentifier = 0, CannotStartIdentifier }; static const llvm::sys::UnicodeCharSet C99AllowedIDChars( C99AllowedIDCharRanges); @@ -1667,12 +1676,10 @@ static void maybeDiagnoseIDCharCompat(DiagnosticsEngine &Diags, uint32_t C, C99DisallowedInitialIDCharRanges); if (!C99AllowedIDChars.contains(C)) { Diags.Report(Range.getBegin(), diag::warn_c99_compat_unicode_id) - << Range - << CannotAppearInIdentifier; + << Range << CannotAppearInIdentifier; } else if (IsFirst && C99DisallowedInitialIDChars.contains(C)) { Diags.Report(Range.getBegin(), diag::warn_c99_compat_unicode_id) - << Range - << CannotStartIdentifier; + << Range << CannotStartIdentifier; } } } @@ -1690,57 +1697,56 @@ static void maybeDiagnoseUTF8Homoglyph(DiagnosticsEngine &Diags, uint32_t C, bool operator<(HomoglyphPair R) const { return Character < R.Character; } }; static constexpr HomoglyphPair SortedHomoglyphs[] = { - {U'\u00ad', 0}, // SOFT HYPHEN - {U'\u01c3', '!'}, // LATIN LETTER RETROFLEX CLICK - {U'\u037e', ';'}, // GREEK QUESTION MARK - {U'\u200b', 0}, // ZERO WIDTH SPACE - {U'\u200c', 0}, // ZERO WIDTH NON-JOINER - {U'\u200d', 0}, // ZERO WIDTH JOINER - {U'\u2060', 0}, // WORD JOINER - {U'\u2061', 0}, // FUNCTION APPLICATION - {U'\u2062', 0}, // INVISIBLE TIMES - {U'\u2063', 0}, // INVISIBLE SEPARATOR - {U'\u2064', 0}, // INVISIBLE PLUS - {U'\u2212', '-'}, // MINUS SIGN - {U'\u2215', '/'}, // DIVISION SLASH - {U'\u2216', '\\'}, // SET MINUS - {U'\u2217', '*'}, // ASTERISK OPERATOR - {U'\u2223', '|'}, // DIVIDES - {U'\u2227', '^'}, // LOGICAL AND - {U'\u2236', ':'}, // RATIO - {U'\u223c', '~'}, // TILDE OPERATOR - {U'\ua789', ':'}, // MODIFIER LETTER COLON - {U'\ufeff', 0}, // ZERO WIDTH NO-BREAK SPACE - {U'\uff01', '!'}, // FULLWIDTH EXCLAMATION MARK - {U'\uff03', '#'}, // FULLWIDTH NUMBER SIGN - {U'\uff04', '$'}, // FULLWIDTH DOLLAR SIGN - {U'\uff05', '%'}, // FULLWIDTH PERCENT SIGN - {U'\uff06', '&'}, // FULLWIDTH AMPERSAND - {U'\uff08', '('}, // FULLWIDTH LEFT PARENTHESIS - {U'\uff09', ')'}, // FULLWIDTH RIGHT PARENTHESIS - {U'\uff0a', '*'}, // FULLWIDTH ASTERISK - {U'\uff0b', '+'}, // FULLWIDTH ASTERISK - {U'\uff0c', ','}, // FULLWIDTH COMMA - {U'\uff0d', '-'}, // FULLWIDTH HYPHEN-MINUS - {U'\uff0e', '.'}, // FULLWIDTH FULL STOP - {U'\uff0f', '/'}, // FULLWIDTH SOLIDUS - {U'\uff1a', ':'}, // FULLWIDTH COLON - {U'\uff1b', ';'}, // FULLWIDTH SEMICOLON - {U'\uff1c', '<'}, // FULLWIDTH LESS-THAN SIGN - {U'\uff1d', '='}, // FULLWIDTH EQUALS SIGN - {U'\uff1e', '>'}, // FULLWIDTH GREATER-THAN SIGN - {U'\uff1f', '?'}, // FULLWIDTH QUESTION MARK - {U'\uff20', '@'}, // FULLWIDTH COMMERCIAL AT - {U'\uff3b', '['}, // FULLWIDTH LEFT SQUARE BRACKET - {U'\uff3c', '\\'}, // FULLWIDTH REVERSE SOLIDUS - {U'\uff3d', ']'}, // FULLWIDTH RIGHT SQUARE BRACKET - {U'\uff3e', '^'}, // FULLWIDTH CIRCUMFLEX ACCENT - {U'\uff5b', '{'}, // FULLWIDTH LEFT CURLY BRACKET - {U'\uff5c', '|'}, // FULLWIDTH VERTICAL LINE - {U'\uff5d', '}'}, // FULLWIDTH RIGHT CURLY BRACKET - {U'\uff5e', '~'}, // FULLWIDTH TILDE - {0, 0} - }; + {U'\u00ad', 0}, // SOFT HYPHEN + {U'\u01c3', '!'}, // LATIN LETTER RETROFLEX CLICK + {U'\u037e', ';'}, // GREEK QUESTION MARK + {U'\u200b', 0}, // ZERO WIDTH SPACE + {U'\u200c', 0}, // ZERO WIDTH NON-JOINER + {U'\u200d', 0}, // ZERO WIDTH JOINER + {U'\u2060', 0}, // WORD JOINER + {U'\u2061', 0}, // FUNCTION APPLICATION + {U'\u2062', 0}, // INVISIBLE TIMES + {U'\u2063', 0}, // INVISIBLE SEPARATOR + {U'\u2064', 0}, // INVISIBLE PLUS + {U'\u2212', '-'}, // MINUS SIGN + {U'\u2215', '/'}, // DIVISION SLASH + {U'\u2216', '\\'}, // SET MINUS + {U'\u2217', '*'}, // ASTERISK OPERATOR + {U'\u2223', '|'}, // DIVIDES + {U'\u2227', '^'}, // LOGICAL AND + {U'\u2236', ':'}, // RATIO + {U'\u223c', '~'}, // TILDE OPERATOR + {U'\ua789', ':'}, // MODIFIER LETTER COLON + {U'\ufeff', 0}, // ZERO WIDTH NO-BREAK SPACE + {U'\uff01', '!'}, // FULLWIDTH EXCLAMATION MARK + {U'\uff03', '#'}, // FULLWIDTH NUMBER SIGN + {U'\uff04', '$'}, // FULLWIDTH DOLLAR SIGN + {U'\uff05', '%'}, // FULLWIDTH PERCENT SIGN + {U'\uff06', '&'}, // FULLWIDTH AMPERSAND + {U'\uff08', '('}, // FULLWIDTH LEFT PARENTHESIS + {U'\uff09', ')'}, // FULLWIDTH RIGHT PARENTHESIS + {U'\uff0a', '*'}, // FULLWIDTH ASTERISK + {U'\uff0b', '+'}, // FULLWIDTH ASTERISK + {U'\uff0c', ','}, // FULLWIDTH COMMA + {U'\uff0d', '-'}, // FULLWIDTH HYPHEN-MINUS + {U'\uff0e', '.'}, // FULLWIDTH FULL STOP + {U'\uff0f', '/'}, // FULLWIDTH SOLIDUS + {U'\uff1a', ':'}, // FULLWIDTH COLON + {U'\uff1b', ';'}, // FULLWIDTH SEMICOLON + {U'\uff1c', '<'}, // FULLWIDTH LESS-THAN SIGN + {U'\uff1d', '='}, // FULLWIDTH EQUALS SIGN + {U'\uff1e', '>'}, // FULLWIDTH GREATER-THAN SIGN + {U'\uff1f', '?'}, // FULLWIDTH QUESTION MARK + {U'\uff20', '@'}, // FULLWIDTH COMMERCIAL AT + {U'\uff3b', '['}, // FULLWIDTH LEFT SQUARE BRACKET + {U'\uff3c', '\\'}, // FULLWIDTH REVERSE SOLIDUS + {U'\uff3d', ']'}, // FULLWIDTH RIGHT SQUARE BRACKET + {U'\uff3e', '^'}, // FULLWIDTH CIRCUMFLEX ACCENT + {U'\uff5b', '{'}, // FULLWIDTH LEFT CURLY BRACKET + {U'\uff5c', '|'}, // FULLWIDTH VERTICAL LINE + {U'\uff5d', '}'}, // FULLWIDTH RIGHT CURLY BRACKET + {U'\uff5e', '~'}, // FULLWIDTH TILDE + {0, 0}}; auto Homoglyph = std::lower_bound(std::begin(SortedHomoglyphs), std::end(SortedHomoglyphs) - 1, HomoglyphPair{C, '\0'}); @@ -1815,7 +1821,7 @@ bool Lexer::tryConsumeIdentifierUCN(const char *&CurPtr, unsigned Size, } Result.setFlag(Token::HasUCN); - if ((UCNPtr - CurPtr == 6 && CurPtr[1] == 'u') || + if ((UCNPtr - CurPtr == 6 && CurPtr[1] == 'u') || (UCNPtr - CurPtr == 10 && CurPtr[1] == 'U')) CurPtr = UCNPtr; else @@ -1921,9 +1927,7 @@ bool Lexer::LexUnicodeIdentifierStart(Token &Result, uint32_t C, return true; } -static const char * -fastParseASCIIIdentifierScalar(const char *CurPtr, - [[maybe_unused]] const char *BufferEnd) { +static const char *fastParseASCIIIdentifierScalar(const char *CurPtr) { unsigned char C = *CurPtr; while (isAsciiIdentifierContinue(C)) C = *++CurPtr; @@ -1936,10 +1940,8 @@ fastParseASCIIIdentifierScalar(const char *CurPtr, // fall back to the scalar implementation. #if (defined(__i386__) || defined(__x86_64__)) && defined(__has_attribute) && \ __has_attribute(target) && !defined(_MSC_VER) - __attribute__((target("sse4.2"))) static const char * -fastParseASCIIIdentifierSSE42(const char *CurPtr, - [[maybe_unused]] const char *BufferEnd) { +fastParseASCIIIdentifierSSE42(const char *CurPtr, const char *BufferEnd) { alignas(16) static constexpr char AsciiIdentifierRange[16] = { '_', '_', 'A', 'Z', 'a', 'z', '0', '9', }; @@ -1960,7 +1962,7 @@ fastParseASCIIIdentifierSSE42(const char *CurPtr, return CurPtr; } - return fastParseASCIIIdentifierScalar(CurPtr, BufferEnd); + return fastParseASCIIIdentifierScalar(CurPtr); } __attribute__((target("sse4.2"))) static const char * @@ -1972,7 +1974,7 @@ __attribute__((target("default"))) #endif static const char *fastParseASCIIIdentifier(const char *CurPtr, const char *BufferEnd) { - return fastParseASCIIIdentifierScalar(CurPtr, BufferEnd); + return fastParseASCIIIdentifierScalar(CurPtr); } bool Lexer::LexIdentifierContinue(Token &Result, const char *CurPtr) { @@ -2159,10 +2161,10 @@ const char *Lexer::LexUDSuffix(Token &Result, const char *CurPtr, if (!LangOpts.CPlusPlus11) { if (!isLexingRawMode()) - Diag(CurPtr, - C == '_' ? diag::warn_cxx11_compat_user_defined_literal - : diag::warn_cxx11_compat_reserved_user_defined_literal) - << FixItHint::CreateInsertion(getSourceLocation(CurPtr), " "); + Diag(CurPtr, C == '_' + ? diag::warn_cxx11_compat_user_defined_literal + : diag::warn_cxx11_compat_reserved_user_defined_literal) + << FixItHint::CreateInsertion(getSourceLocation(CurPtr), " "); return CurPtr; } @@ -2180,7 +2182,7 @@ const char *Lexer::LexUDSuffix(Token &Result, const char *CurPtr, // valid suffix for a string literal or a numeric literal (this could be // the 'operator""if' defining a numeric literal operator). const unsigned MaxStandardSuffixLength = 3; - char Buffer[MaxStandardSuffixLength] = { C }; + char Buffer[MaxStandardSuffixLength] = {C}; unsigned Consumed = Size; unsigned Chars = 1; while (true) { @@ -2238,8 +2240,7 @@ bool Lexer::LexStringLiteral(Token &Result, const char *CurPtr, const char *NulCharacter = nullptr; if (!isLexingRawMode() && - (Kind == tok::utf8_string_literal || - Kind == tok::utf16_string_literal || + (Kind == tok::utf8_string_literal || Kind == tok::utf16_string_literal || Kind == tok::utf32_string_literal)) Diag(BufferPtr, LangOpts.CPlusPlus ? diag::warn_cxx98_compat_unicode_literal : diag::warn_c99_compat_unicode_literal); @@ -2251,16 +2252,16 @@ bool Lexer::LexStringLiteral(Token &Result, const char *CurPtr, if (C == '\\') C = getAndAdvanceChar(CurPtr, Result); - if (C == '\n' || C == '\r' || // Newline. - (C == 0 && CurPtr-1 == BufferEnd)) { // End of file. + if (C == '\n' || C == '\r' || // Newline. + (C == 0 && CurPtr - 1 == BufferEnd)) { // End of file. if (!isLexingRawMode() && !LangOpts.AsmPreprocessor) Diag(BufferPtr, diag::ext_unterminated_char_or_string) << 1; - FormTokenWithChars(Result, CurPtr-1, tok::unknown); + FormTokenWithChars(Result, CurPtr - 1, tok::unknown); return true; } if (C == 0) { - if (isCodeCompletionPoint(CurPtr-1)) { + if (isCodeCompletionPoint(CurPtr - 1)) { if (ParsingFilename) codeCompleteIncludedFile(AfterQuote, CurPtr - 1, /*IsAngled=*/false); else @@ -2270,7 +2271,7 @@ bool Lexer::LexStringLiteral(Token &Result, const char *CurPtr, return true; } - NulCharacter = CurPtr-1; + NulCharacter = CurPtr - 1; } C = getAndAdvanceChar(CurPtr, Result); } @@ -2326,7 +2327,7 @@ bool Lexer::LexRawStringLiteral(Token &Result, const char *CurPtr, Diag(PrefixEnd, diag::err_invalid_newline_raw_delim); } else { Diag(PrefixEnd, diag::err_invalid_char_raw_delim) - << StringRef(PrefixEnd, 1); + << StringRef(PrefixEnd, 1); } } @@ -2338,7 +2339,7 @@ bool Lexer::LexRawStringLiteral(Token &Result, const char *CurPtr, if (C == '"') break; - if (C == 0 && CurPtr-1 == BufferEnd) { + if (C == 0 && CurPtr - 1 == BufferEnd) { --CurPtr; break; } @@ -2361,11 +2362,11 @@ bool Lexer::LexRawStringLiteral(Token &Result, const char *CurPtr, CurPtr += PrefixLen + 1; // skip over prefix and '"' break; } - } else if (C == 0 && CurPtr-1 == BufferEnd) { // End of file. + } else if (C == 0 && CurPtr - 1 == BufferEnd) { // End of file. if (!isLexingRawMode()) Diag(BufferPtr, diag::err_unterminated_raw_string) - << StringRef(Prefix, PrefixLen); - FormTokenWithChars(Result, CurPtr-1, tok::unknown); + << StringRef(Prefix, PrefixLen); + FormTokenWithChars(Result, CurPtr - 1, tok::unknown); return true; } } @@ -2409,7 +2410,7 @@ bool Lexer::LexAngledStringLiteral(Token &Result, const char *CurPtr) { FormTokenWithChars(Result, CurPtr - 1, tok::unknown); return true; } - NulCharacter = CurPtr-1; + NulCharacter = CurPtr - 1; } C = getAndAdvanceChar(CurPtr, Result); } @@ -2489,23 +2490,23 @@ bool Lexer::LexCharConstant(Token &Result, const char *CurPtr, if (C == '\\') C = getAndAdvanceChar(CurPtr, Result); - if (C == '\n' || C == '\r' || // Newline. - (C == 0 && CurPtr-1 == BufferEnd)) { // End of file. + if (C == '\n' || C == '\r' || // Newline. + (C == 0 && CurPtr - 1 == BufferEnd)) { // End of file. if (!isLexingRawMode() && !LangOpts.AsmPreprocessor) Diag(BufferPtr, diag::ext_unterminated_char_or_string) << 0; - FormTokenWithChars(Result, CurPtr-1, tok::unknown); + FormTokenWithChars(Result, CurPtr - 1, tok::unknown); return true; } if (C == 0) { - if (isCodeCompletionPoint(CurPtr-1)) { + if (isCodeCompletionPoint(CurPtr - 1)) { PP->CodeCompleteNaturalLanguage(); - FormTokenWithChars(Result, CurPtr-1, tok::unknown); + FormTokenWithChars(Result, CurPtr - 1, tok::unknown); cutOffLexing(); return true; } - NulCharacter = CurPtr-1; + NulCharacter = CurPtr - 1; } C = getAndAdvanceChar(CurPtr, Result); } @@ -2659,7 +2660,7 @@ bool Lexer::SkipLineComment(Token &Result, const char *CurPtr, const char *NextLine = CurPtr; if (C != 0) { // We found a newline, see if it's escaped. - const char *EscapePtr = CurPtr-1; + const char *EscapePtr = CurPtr - 1; bool HasSpace = false; while (isHorizontalWhitespace(*EscapePtr)) { // Skip whitespace. --EscapePtr; @@ -2672,7 +2673,7 @@ bool Lexer::SkipLineComment(Token &Result, const char *CurPtr, else if (EscapePtr[0] == '/' && EscapePtr[-1] == '?' && EscapePtr[-2] == '?' && LangOpts.Trigraphs) // Trigraph-escaped newline. - CurPtr = EscapePtr-2; + CurPtr = EscapePtr - 2; else break; // This is a newline, we're done. @@ -2693,7 +2694,7 @@ bool Lexer::SkipLineComment(Token &Result, const char *CurPtr, // If we only read only one character, then no special handling is needed. // We're done and can skip forward to the newline. - if (C != 0 && CurPtr == OldPtr+1) { + if (C != 0 && CurPtr == OldPtr + 1) { CurPtr = NextLine; break; } @@ -2709,14 +2710,14 @@ bool Lexer::SkipLineComment(Token &Result, const char *CurPtr, // line is also a // comment, but has spaces, don't emit a diagnostic. if (isWhitespace(C)) { const char *ForwardPtr = CurPtr; - while (isWhitespace(*ForwardPtr)) // Skip whitespace. + while (isWhitespace(*ForwardPtr)) // Skip whitespace. ++ForwardPtr; if (ForwardPtr[0] == '/' && ForwardPtr[1] == '/') break; } if (!isLexingRawMode()) - Diag(OldPtr-1, diag::ext_multi_line_line_comment); + Diag(OldPtr - 1, diag::ext_multi_line_line_comment); break; } } @@ -2726,7 +2727,7 @@ bool Lexer::SkipLineComment(Token &Result, const char *CurPtr, break; } - if (C == '\0' && isCodeCompletionPoint(CurPtr-1)) { + if (C == '\0' && isCodeCompletionPoint(CurPtr - 1)) { PP->CodeCompleteNaturalLanguage(); cutOffLexing(); return false; @@ -2787,12 +2788,12 @@ bool Lexer::SaveLineComment(Token &Result, const char *CurPtr) { return true; assert(Spelling[0] == '/' && Spelling[1] == '/' && "Not line comment?"); - Spelling[1] = '*'; // Change prefix to "/*". - Spelling += "*/"; // add suffix. + Spelling[1] = '*'; // Change prefix to "/*". + Spelling += "*/"; // add suffix. Result.setKind(tok::comment); - PP->CreateString(Spelling, Result, - Result.getLocation(), Result.getLocation()); + PP->CreateString(Spelling, Result, Result.getLocation(), + Result.getLocation()); return true; } @@ -2900,7 +2901,7 @@ bool Lexer::SkipBlockComment(Token &Result, const char *CurPtr, unsigned CharSize; unsigned char C = getCharAndSize(CurPtr, CharSize); CurPtr += CharSize; - if (C == 0 && CurPtr == BufferEnd+1) { + if (C == 0 && CurPtr == BufferEnd + 1) { if (!isLexingRawMode()) Diag(BufferPtr, diag::err_unterminated_block_comment); --CurPtr; @@ -2940,7 +2941,8 @@ bool Lexer::SkipBlockComment(Token &Result, const char *CurPtr, goto MultiByteUTF8; C = *CurPtr++; } - if (C == '/') goto FoundSlash; + if (C == '/') + goto FoundSlash; #ifdef __SSE2__ __m128i Slashes = _mm_set1_epi8('/'); @@ -2950,8 +2952,8 @@ bool Lexer::SkipBlockComment(Token &Result, const char *CurPtr, goto MultiByteUTF8; } // look for slashes - int cmp = _mm_movemask_epi8(_mm_cmpeq_epi8(*(const __m128i*)CurPtr, - Slashes)); + int cmp = _mm_movemask_epi8( + _mm_cmpeq_epi8(*(const __m128i *)CurPtr, Slashes)); if (cmp != 0) { // Adjust the pointer to point directly after the first slash. It's // not necessary to set C here, it will be overwritten at the end of @@ -2965,10 +2967,8 @@ bool Lexer::SkipBlockComment(Token &Result, const char *CurPtr, __vector unsigned char LongUTF = {0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}; - __vector unsigned char Slashes = { - '/', '/', '/', '/', '/', '/', '/', '/', - '/', '/', '/', '/', '/', '/', '/', '/' - }; + __vector unsigned char Slashes = {'/', '/', '/', '/', '/', '/', '/', '/', + '/', '/', '/', '/', '/', '/', '/', '/'}; while (CurPtr + 16 < BufferEnd) { if (LLVM_UNLIKELY( vec_any_ge(*(const __vector unsigned char *)CurPtr, LongUTF))) @@ -3027,8 +3027,8 @@ bool Lexer::SkipBlockComment(Token &Result, const char *CurPtr, } if (C == '/') { - FoundSlash: - if (CurPtr[-2] == '*') // We found the final */. We're done! + FoundSlash: + if (CurPtr[-2] == '*') // We found the final */. We're done! break; if ((CurPtr[-2] == '\n' || CurPtr[-2] == '\r')) { @@ -3044,9 +3044,9 @@ bool Lexer::SkipBlockComment(Token &Result, const char *CurPtr, // if this is a /*/, which will end the comment. This misses cases with // embedded escaped newlines, but oh well. if (!isLexingRawMode()) - Diag(CurPtr-1, diag::warn_nested_block_comment); + Diag(CurPtr - 1, diag::warn_nested_block_comment); } - } else if (C == 0 && CurPtr == BufferEnd+1) { + } else if (C == 0 && CurPtr == BufferEnd + 1) { if (!isLexingRawMode()) Diag(BufferPtr, diag::err_unterminated_block_comment); // Note: the user probably forgot a */. We could continue immediately @@ -3063,7 +3063,7 @@ bool Lexer::SkipBlockComment(Token &Result, const char *CurPtr, BufferPtr = CurPtr; return false; - } else if (C == '\0' && isCodeCompletionPoint(CurPtr-1)) { + } else if (C == '\0' && isCodeCompletionPoint(CurPtr - 1)) { PP->CodeCompleteNaturalLanguage(); cutOffLexing(); return false; @@ -3091,7 +3091,7 @@ bool Lexer::SkipBlockComment(Token &Result, const char *CurPtr, // efficiently now. This is safe even in KeepWhitespaceMode because we would // have already returned above with the comment as a token. if (isHorizontalWhitespace(*CurPtr)) { - SkipWhitespace(Result, CurPtr+1, TokAtPhysicalStartOfLine); + SkipWhitespace(Result, CurPtr + 1, TokAtPhysicalStartOfLine); return false; } @@ -3122,10 +3122,10 @@ void Lexer::ReadToEndOfLine(SmallVectorImpl<char> *Result) { if (Result) Result->push_back(Char); break; - case 0: // Null. + case 0: // Null. // Found end of file? - if (CurPtr-1 != BufferEnd) { - if (isCodeCompletionPoint(CurPtr-1)) { + if (CurPtr - 1 != BufferEnd) { + if (isCodeCompletionPoint(CurPtr - 1)) { PP->CodeCompleteNaturalLanguage(); cutOffLexing(); return; @@ -3142,7 +3142,7 @@ void Lexer::ReadToEndOfLine(SmallVectorImpl<char> *Result) { case '\n': // Okay, we found the end of the line. First, back up past the \0, \r, \n. assert(CurPtr[-1] == Char && "Trigraphs for newline?"); - BufferPtr = CurPtr-1; + BufferPtr = CurPtr - 1; // Next, lex the character, which should handle the EOD transition. Lex(Tmp); @@ -3176,7 +3176,7 @@ bool Lexer::LexEndOfFile(Token &Result, const char *CurPtr) { // Restore comment saving mode, in case it was disabled for directive. if (PP) resetExtendedTokenMode(); - return true; // Have a token. + return true; // Have a token. } // If we are in raw mode, return this event as an EOF token. Let the caller @@ -3276,11 +3276,11 @@ static const char *FindConflictEnd(const char *CurPtr, const char *BufferEnd, // Must occur at start of line. if (Pos == 0 || (RestOfBuffer[Pos - 1] != '\r' && RestOfBuffer[Pos - 1] != '\n')) { - RestOfBuffer = RestOfBuffer.substr(Pos+TermLen); + RestOfBuffer = RestOfBuffer.substr(Pos + TermLen); Pos = RestOfBuffer.find(Terminator); continue; } - return RestOfBuffer.data()+Pos; + return RestOfBuffer.data() + Pos; } return nullptr; } @@ -3291,8 +3291,7 @@ static const char *FindConflictEnd(const char *CurPtr, const char *BufferEnd, /// if not. bool Lexer::IsStartOfConflictMarker(const char *CurPtr) { // Only a conflict marker if it starts at the beginning of a line. - if (CurPtr != BufferStart && - CurPtr[-1] != '\n' && CurPtr[-1] != '\r') + if (CurPtr != BufferStart && CurPtr[-1] != '\n' && CurPtr[-1] != '\r') return false; // Check to see if we have <<<<<<< or >>>>. @@ -3335,8 +3334,7 @@ bool Lexer::IsStartOfConflictMarker(const char *CurPtr) { /// the line. This returns true if it is a conflict marker and false if not. bool Lexer::HandleEndOfConflictMarker(const char *CurPtr) { // Only a conflict marker if it starts at the beginning of a line. - if (CurPtr != BufferStart && - CurPtr[-1] != '\n' && CurPtr[-1] != '\r') + if (CurPtr != BufferStart && CurPtr[-1] != '\n' && CurPtr[-1] != '\r') return false; // If we have a situation where we don't care about conflict markers, ignore @@ -3352,8 +3350,8 @@ bool Lexer::HandleEndOfConflictMarker(const char *CurPtr) { // If we do have it, search for the end of the conflict marker. This could // fail if it got skipped with a '#if 0' or something. Note that CurPtr might // be the end of conflict marker. - if (const char *End = FindConflictEnd(CurPtr, BufferEnd, - CurrentConflictMarkerState)) { + if (const char *End = + FindConflictEnd(CurPtr, BufferEnd, CurrentConflictMarkerState)) { CurPtr = End; // Skip ahead to the end of line. @@ -3403,7 +3401,7 @@ bool Lexer::lexEditorPlaceholder(Token &Result, const char *CurPtr) { bool Lexer::isCodeCompletionPoint(const char *CurPtr) const { if (PP && PP->isCodeCompletionEnabled()) { - SourceLocation Loc = FileLoc.getLocWithOffset(CurPtr-BufferStart); + SourceLocation Loc = FileLoc.getLocWithOffset(CurPtr - BufferStart); return Loc == PP->getCodeCompletionLoc(); } @@ -3709,7 +3707,7 @@ bool Lexer::CheckUnicodeWhitespace(Token &Result, uint32_t C, if (!isLexingRawMode() && !PP->isPreprocessedOutput() && isUnicodeWhitespace(C)) { Diag(BufferPtr, diag::ext_unicode_whitespace) - << makeCharRange(*this, BufferPtr, CurPtr); + << makeCharRange(*this, BufferPtr, CurPtr); Result.setFlag(Token::LeadingSpace); return true; @@ -3749,7 +3747,7 @@ bool Lexer::Lex(Token &Result) { bool atPhysicalStartOfLine = IsAtPhysicalStartOfLine; IsAtPhysicalStartOfLine = false; bool isRawLex = isLexingRawMode(); - (void) isRawLex; + (void)isRawLex; bool returnedToken = LexTokenInternal(Result, atPhysicalStartOfLine); // (After the LexTokenInternal call, the lexer might be destroyed.) assert((returnedToken || !isRawLex) && "Raw lex must succeed"); @@ -3788,7 +3786,7 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) { Result.setFlag(Token::LeadingSpace); } - unsigned SizeTmp, SizeTmp2; // Temporaries for use in cases below. + unsigned SizeTmp, SizeTmp2; // Temporaries for use in cases below. // Read a character, advancing over it. char Char = getAndAdvanceChar(CurPtr, Result); @@ -3798,13 +3796,13 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) { NewLinePtr = nullptr; switch (Char) { - case 0: // Null. + case 0: // Null. // Found end of file? - if (CurPtr-1 == BufferEnd) - return LexEndOfFile(Result, CurPtr-1); + if (CurPtr - 1 == BufferEnd) + return LexEndOfFile(Result, CurPtr - 1); // Check if we are performing code completion. - if (isCodeCompletionPoint(CurPtr-1)) { + if (isCodeCompletionPoint(CurPtr - 1)) { // Return the code-completion token. Result.startToken(); FormTokenWithChars(Result, CurPtr, tok::code_completion); @@ -3812,7 +3810,7 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) { } if (!isLexingRawMode()) - Diag(CurPtr-1, diag::null_in_file); + Diag(CurPtr - 1, diag::null_in_file); Result.setFlag(Token::LeadingSpace); if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine)) return true; // KeepWhitespaceMode @@ -3821,12 +3819,12 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) { // (We manually eliminate the tail call to avoid recursion.) goto LexNextToken; - case 26: // DOS & CP/M EOF: "^Z". + case 26: // DOS & CP/M EOF: "^Z". // If we're in Microsoft extensions mode, treat this as end of file. if (LangOpts.MicrosoftExt) { if (!isLexingRawMode()) - Diag(CurPtr-1, diag::ext_ctrl_z_eof_microsoft); - return LexEndOfFile(Result, CurPtr-1); + Diag(CurPtr - 1, diag::ext_ctrl_z_eof_microsoft); + return LexEndOfFile(Result, CurPtr - 1); } // If Microsoft extensions are disabled, this is just random garbage. @@ -3882,11 +3880,11 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) { // too (without going through the big switch stmt). if (CurPtr[0] == '/' && CurPtr[1] == '/' && !inKeepCommentMode() && LineComment && (LangOpts.CPlusPlus || !LangOpts.TraditionalCPP)) { - if (SkipLineComment(Result, CurPtr+2, TokAtPhysicalStartOfLine)) + if (SkipLineComment(Result, CurPtr + 2, TokAtPhysicalStartOfLine)) return true; // There is a token to return. goto SkipIgnoredUnits; } else if (CurPtr[0] == '/' && CurPtr[1] == '*' && !inKeepCommentMode()) { - if (SkipBlockComment(Result, CurPtr+2, TokAtPhysicalStartOfLine)) + if (SkipBlockComment(Result, CurPtr + 2, TokAtPhysicalStartOfLine)) return true; // There is a token to return. goto SkipIgnoredUnits; } else if (isHorizontalWhitespace(*CurPtr)) { @@ -3898,8 +3896,16 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) { // C99 6.4.4.1: Integer Constants. // C99 6.4.4.2: Floating Constants. - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': // Notify MIOpt that we read a non-whitespace/non-comment token. MIOpt.ReadToken(); return LexNumericConstant(Result, CurPtr); @@ -3927,24 +3933,26 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) { // UTF-16 raw string literal if (Char == 'R' && LangOpts.RawStringLiterals && getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == '"') - return LexRawStringLiteral(Result, - ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), - SizeTmp2, Result), - tok::utf16_string_literal); + return LexRawStringLiteral( + Result, + ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), SizeTmp2, Result), + tok::utf16_string_literal); if (Char == '8') { char Char2 = getCharAndSize(CurPtr + SizeTmp, SizeTmp2); // UTF-8 string literal if (Char2 == '"') - return LexStringLiteral(Result, - ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), - SizeTmp2, Result), - tok::utf8_string_literal); + return LexStringLiteral( + Result, + ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), SizeTmp2, + Result), + tok::utf8_string_literal); if (Char2 == '\'' && (LangOpts.CPlusPlus17 || LangOpts.C23)) return LexCharConstant( - Result, ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), - SizeTmp2, Result), + Result, + ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), SizeTmp2, + Result), tok::utf8_char_constant); if (Char2 == 'R' && LangOpts.RawStringLiterals) { @@ -3952,11 +3960,12 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) { char Char3 = getCharAndSize(CurPtr + SizeTmp + SizeTmp2, SizeTmp3); // UTF-8 raw string literal if (Char3 == '"') { - return LexRawStringLiteral(Result, - ConsumeChar(ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), - SizeTmp2, Result), - SizeTmp3, Result), - tok::utf8_string_literal); + return LexRawStringLiteral( + Result, + ConsumeChar(ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), + SizeTmp2, Result), + SizeTmp3, Result), + tok::utf8_string_literal); } } } @@ -3985,10 +3994,10 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) { // UTF-32 raw string literal if (Char == 'R' && LangOpts.RawStringLiterals && getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == '"') - return LexRawStringLiteral(Result, - ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), - SizeTmp2, Result), - tok::utf32_string_literal); + return LexRawStringLiteral( + Result, + ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), SizeTmp2, Result), + tok::utf32_string_literal); } // treat U like the start of an identifier. @@ -4002,15 +4011,14 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) { Char = getCharAndSize(CurPtr, SizeTmp); if (Char == '"') - return LexRawStringLiteral(Result, - ConsumeChar(CurPtr, SizeTmp, Result), + return LexRawStringLiteral(Result, ConsumeChar(CurPtr, SizeTmp, Result), tok::string_literal); } // treat R like the start of an identifier. return LexIdentifierContinue(Result, CurPtr); - case 'L': // Identifier (Loony) or wide literal (L'x' or L"xyz"). + case 'L': // Identifier (Loony) or wide literal (L'x' or L"xyz"). // Notify MIOpt that we read a non-whitespace/non-comment token. MIOpt.ReadToken(); Char = getCharAndSize(CurPtr, SizeTmp); @@ -4023,10 +4031,10 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) { // Wide raw string literal. if (LangOpts.RawStringLiterals && Char == 'R' && getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == '"') - return LexRawStringLiteral(Result, - ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), - SizeTmp2, Result), - tok::wide_string_literal); + return LexRawStringLiteral( + Result, + ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), SizeTmp2, Result), + tok::wide_string_literal); // Wide character constant. if (Char == '\'') @@ -4036,23 +4044,63 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) { [[fallthrough]]; // C99 6.4.2: Identifiers. - case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': - case 'H': case 'I': case 'J': case 'K': /*'L'*/case 'M': case 'N': - case 'O': case 'P': case 'Q': /*'R'*/case 'S': case 'T': /*'U'*/ - case 'V': case 'W': case 'X': case 'Y': case 'Z': - case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': - case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': - case 'o': case 'p': case 'q': case 'r': case 's': case 't': /*'u'*/ - case 'v': case 'w': case 'x': case 'y': case 'z': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': /*'L'*/ + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': /*'R'*/ + case 'S': + case 'T': /*'U'*/ + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': /*'u'*/ + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': case '_': // Notify MIOpt that we read a non-whitespace/non-comment token. MIOpt.ReadToken(); return LexIdentifierContinue(Result, CurPtr); - case '$': // $ in identifiers. + case '$': // $ in identifiers. if (LangOpts.DollarIdents) { if (!isLexingRawMode()) - Diag(CurPtr-1, diag::ext_dollar_in_identifier); + Diag(CurPtr - 1, diag::ext_dollar_in_identifier); // Notify MIOpt that we read a non-whitespace/non-comment token. MIOpt.ReadToken(); return LexIdentifierContinue(Result, CurPtr); @@ -4108,10 +4156,10 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) { Kind = tok::periodstar; CurPtr += SizeTmp; } else if (Char == '.' && - getCharAndSize(CurPtr+SizeTmp, SizeTmp2) == '.') { + getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == '.') { Kind = tok::ellipsis; - CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), - SizeTmp2, Result); + CurPtr = + ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), SizeTmp2, Result); } else { Kind = tok::period; } @@ -4150,18 +4198,18 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) { break; case '-': Char = getCharAndSize(CurPtr, SizeTmp); - if (Char == '-') { // -- + if (Char == '-') { // -- CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); Kind = tok::minusminus; } else if (Char == '>' && LangOpts.CPlusPlus && - getCharAndSize(CurPtr+SizeTmp, SizeTmp2) == '*') { // C++ ->* - CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), - SizeTmp2, Result); + getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == '*') { // C++ ->* + CurPtr = + ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), SizeTmp2, Result); Kind = tok::arrowstar; - } else if (Char == '>') { // -> + } else if (Char == '>') { // -> CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); Kind = tok::arrow; - } else if (Char == '=') { // -= + } else if (Char == '=') { // -= CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); Kind = tok::minusequal; } else { @@ -4182,7 +4230,7 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) { case '/': // 6.4.9: Comments Char = getCharAndSize(CurPtr, SizeTmp); - if (Char == '/') { // Line comment. + if (Char == '/') { // Line comment. // Even if Line comments are disabled (e.g. in C89 mode), we generally // want to lex this as a comment. There is one problem with this though, // that in one particular corner case, this can change the behavior of the @@ -4195,7 +4243,7 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) { LineComment && (LangOpts.CPlusPlus || !LangOpts.TraditionalCPP); if (!TreatAsComment) if (!(PP && PP->isPreprocessedOutput())) - TreatAsComment = getCharAndSize(CurPtr+SizeTmp, SizeTmp2) != '*'; + TreatAsComment = getCharAndSize(CurPtr + SizeTmp, SizeTmp2) != '*'; if (TreatAsComment) { if (SkipLineComment(Result, ConsumeChar(CurPtr, SizeTmp, Result), @@ -4209,7 +4257,7 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) { } } - if (Char == '*') { // /**/ comment. + if (Char == '*') { // /**/ comment. if (SkipBlockComment(Result, ConsumeChar(CurPtr, SizeTmp, Result), TokAtPhysicalStartOfLine)) return true; // There is a token to return. @@ -4232,21 +4280,21 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) { Kind = tok::percentequal; CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); } else if (LangOpts.Digraphs && Char == '>') { - Kind = tok::r_brace; // '%>' -> '}' + Kind = tok::r_brace; // '%>' -> '}' CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); } else if (LangOpts.Digraphs && Char == ':') { CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); Char = getCharAndSize(CurPtr, SizeTmp); - if (Char == '%' && getCharAndSize(CurPtr+SizeTmp, SizeTmp2) == ':') { - Kind = tok::hashhash; // '%:%:' -> '##' - CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), - SizeTmp2, Result); - } else if (Char == '@' && LangOpts.MicrosoftExt) {// %:@ -> #@ -> Charize + if (Char == '%' && getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == ':') { + Kind = tok::hashhash; // '%:%:' -> '##' + CurPtr = + ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), SizeTmp2, Result); + } else if (Char == '@' && LangOpts.MicrosoftExt) { // %:@ -> #@ -> Charize CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); if (!isLexingRawMode()) Diag(BufferPtr, diag::ext_charize_microsoft); Kind = tok::hashat; - } else { // '%:' -> '#' + } else { // '%:' -> '#' // We parsed a # character. If this occurs at the start of the line, // it's actually the start of a preprocessing directive. Callback to // the preprocessor to handle it. @@ -4265,35 +4313,35 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) { if (ParsingFilename) { return LexAngledStringLiteral(Result, CurPtr); } else if (Char == '<') { - char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2); + char After = getCharAndSize(CurPtr + SizeTmp, SizeTmp2); if (After == '=') { Kind = tok::lesslessequal; - CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), - SizeTmp2, Result); - } else if (After == '<' && IsStartOfConflictMarker(CurPtr-1)) { + CurPtr = + ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), SizeTmp2, Result); + } else if (After == '<' && IsStartOfConflictMarker(CurPtr - 1)) { // If this is actually a '<<<<<<<' version control conflict marker, // recognize it as such and recover nicely. goto LexNextToken; - } else if (After == '<' && HandleEndOfConflictMarker(CurPtr-1)) { + } else if (After == '<' && HandleEndOfConflictMarker(CurPtr - 1)) { // If this is '<<<<' and we're in a Perforce-style conflict marker, // ignore it. goto LexNextToken; } else if (LangOpts.CUDA && After == '<') { Kind = tok::lesslessless; - CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), - SizeTmp2, Result); + CurPtr = + ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), SizeTmp2, Result); } else { CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); Kind = tok::lessless; } } else if (Char == '=') { - char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2); + char After = getCharAndSize(CurPtr + SizeTmp, SizeTmp2); if (After == '>') { if (LangOpts.CPlusPlus20) { if (!isLexingRawMode()) Diag(BufferPtr, diag::warn_cxx17_compat_spaceship); - CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), - SizeTmp2, Result); + CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), SizeTmp2, + Result); Kind = tok::spaceship; break; } @@ -4301,13 +4349,13 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) { // change in semantics if this turns up in C++ <=17 mode. if (LangOpts.CPlusPlus && !isLexingRawMode()) { Diag(BufferPtr, diag::warn_cxx20_compat_spaceship) - << FixItHint::CreateInsertion( - getSourceLocation(CurPtr + SizeTmp, SizeTmp2), " "); + << FixItHint::CreateInsertion( + getSourceLocation(CurPtr + SizeTmp, SizeTmp2), " "); } } CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); Kind = tok::lessequal; - } else if (LangOpts.Digraphs && Char == ':') { // '<:' -> '[' + } else if (LangOpts.Digraphs && Char == ':') { // '<:' -> '[' if (LangOpts.CPlusPlus11 && getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == ':') { // C++0x [lex.pptoken]p3: @@ -4327,7 +4375,7 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) { CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); Kind = tok::l_square; - } else if (LangOpts.Digraphs && Char == '%') { // '<%' -> '{' + } else if (LangOpts.Digraphs && Char == '%') { // '<%' -> '{' CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); Kind = tok::l_brace; } else if (Char == '#' && /*Not a trigraph*/ SizeTmp == 1 && @@ -4343,22 +4391,22 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) { CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); Kind = tok::greaterequal; } else if (Char == '>') { - char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2); + char After = getCharAndSize(CurPtr + SizeTmp, SizeTmp2); if (After == '=') { - CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), - SizeTmp2, Result); + CurPtr = + ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), SizeTmp2, Result); Kind = tok::greatergreaterequal; - } else if (After == '>' && IsStartOfConflictMarker(CurPtr-1)) { + } else if (After == '>' && IsStartOfConflictMarker(CurPtr - 1)) { // If this is actually a '>>>>' conflict marker, recognize it as such // and recover nicely. goto LexNextToken; - } else if (After == '>' && HandleEndOfConflictMarker(CurPtr-1)) { + } else if (After == '>' && HandleEndOfConflictMarker(CurPtr - 1)) { // If this is '>>>>>>>' and we're in a conflict marker, ignore it. goto LexNextToken; } else if (LangOpts.CUDA && After == '>') { Kind = tok::greatergreatergreater; - CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), - SizeTmp2, Result); + CurPtr = + ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), SizeTmp2, Result); } else { CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); Kind = tok::greatergreater; @@ -4385,7 +4433,7 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) { CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); } else if (Char == '|') { // If this is '|||||||' and we're in a conflict marker, ignore it. - if (CurPtr[1] == '|' && HandleEndOfConflictMarker(CurPtr-1)) + if (CurPtr[1] == '|' && HandleEndOfConflictMarker(CurPtr - 1)) goto LexNextToken; Kind = tok::pipepipe; CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); @@ -4412,7 +4460,7 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) { Char = getCharAndSize(CurPtr, SizeTmp); if (Char == '=') { // If this is '====' and we're in a conflict marker, ignore it. - if (CurPtr[1] == '=' && HandleEndOfConflictMarker(CurPtr-1)) + if (CurPtr[1] == '=' && HandleEndOfConflictMarker(CurPtr - 1)) goto LexNextToken; Kind = tok::equalequal; @@ -4429,7 +4477,7 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) { if (Char == '#') { Kind = tok::hashhash; CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); - } else if (Char == '@' && LangOpts.MicrosoftExt) { // #@ -> Charize + } else if (Char == '@' && LangOpts.MicrosoftExt) { // #@ -> Charize Kind = tok::hashat; if (!isLexingRawMode()) Diag(BufferPtr, diag::ext_charize_microsoft); @@ -4485,11 +4533,9 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) { // We can't just reset CurPtr to BufferPtr because BufferPtr may point to // an escaped newline. --CurPtr; - llvm::ConversionResult Status = - llvm::convertUTF8Sequence((const llvm::UTF8 **)&CurPtr, - (const llvm::UTF8 *)BufferEnd, - &CodePoint, - llvm::strictConversion); + llvm::ConversionResult Status = llvm::convertUTF8Sequence( + (const llvm::UTF8 **)&CurPtr, (const llvm::UTF8 *)BufferEnd, &CodePoint, + llvm::strictConversion); if (Status == llvm::conversionOK) { if (CheckUnicodeWhitespace(Result, CodePoint, CurPtr)) { if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine)) @@ -4514,7 +4560,7 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) { // just diagnose the invalid UTF-8, then drop the character. Diag(CurPtr, diag::err_invalid_utf8); - BufferPtr = CurPtr+1; + BufferPtr = CurPtr + 1; // We're pretending the character didn't exist, so just try again with // this lexer. // (We manually eliminate the tail call to avoid recursion.) _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
