Author: Thibault Monnier Date: 2026-01-07T20:29:22+01:00 New Revision: 961d52f3304700393b1b03a72c9ad5bb155e851b
URL: https://github.com/llvm/llvm-project/commit/961d52f3304700393b1b03a72c9ad5bb155e851b DIFF: https://github.com/llvm/llvm-project/commit/961d52f3304700393b1b03a72c9ad5bb155e851b.diff LOG: [Clang] [Lexer] Detect SSE4.2 availability at runtime in fastParseASCIIIdentifier (#171914) This change attempts to maximize usage of the SSE fast path in `fastParseASCIIIdentifier`. If compiling for x86, we compile both the SSE fast path and the scalar loop. At runtime, we check if SSE4.2 is available and dispatch to the right function by using the `target` attribute. If it _is_ available, this allows a net performance improvement. Otherwise, there's a very slight but negligible regression... I believe that's perfectly reasonable for a non-SSE4.2-supporting processor. If we are not compiling for x86, then the behavior is the exact same, ensuring we have no regressions. If the binary is compiled for x86 with SSE4.2 enabled, we still do a runtime check, but this has negligible impact ; furthermore, the point of the PR is that this is rarely the case. The benchmark results are available at [llvm-compile-time-tracker](https://llvm-compile-time-tracker.com/compare.php?from=f88d060c4176d17df56587a083944637ca865cb3&to=d5485438edd460892bf210916827e0d92fc24065&stat=instructions%3Au). Added: Modified: clang/lib/Lex/Lexer.cpp Removed: ################################################################################ diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp index afebef0974016..5e8ed5f815c7b 100644 --- a/clang/lib/Lex/Lexer.cpp +++ b/clang/lib/Lex/Lexer.cpp @@ -36,6 +36,7 @@ #include "llvm/Support/NativeFormatting.h" #include "llvm/Support/Unicode.h" #include "llvm/Support/UnicodeCharRanges.h" + #include <algorithm> #include <cassert> #include <cstddef> @@ -45,7 +46,7 @@ #include <optional> #include <string> -#ifdef __SSE4_2__ +#if defined(__i386__) || defined(__x86_64__) #include <nmmintrin.h> #endif @@ -1919,10 +1920,21 @@ bool Lexer::LexUnicodeIdentifierStart(Token &Result, uint32_t C, return true; } -static const char * -fastParseASCIIIdentifier(const char *CurPtr, - [[maybe_unused]] const char *BufferEnd) { -#ifdef __SSE4_2__ +static const char *fastParseASCIIIdentifierScalar(const char *CurPtr) { + unsigned char C = *CurPtr; + while (isAsciiIdentifierContinue(C)) + C = *++CurPtr; + return CurPtr; +} + +// Fast path for lexing ASCII identifiers using SSE4.2 instructions. +// Only enabled on x86/x86_64 when building with a compiler that supports +// the 'target' attribute, which is used for runtime dispatch. Otherwise, we +// fall back to the scalar implementation. +#if (defined(__i386__) || defined(__x86_64__)) && defined(__has_attribute) && \ + __has_attribute(target) && !defined(_MSC_VER) +__attribute__((target("sse4.2"))) static const char * +fastParseASCIIIdentifierSSE42(const char *CurPtr, const char *BufferEnd) { alignas(16) static constexpr char AsciiIdentifierRange[16] = { '_', '_', 'A', 'Z', 'a', 'z', '0', '9', }; @@ -1942,12 +1954,20 @@ fastParseASCIIIdentifier(const char *CurPtr, continue; return CurPtr; } -#endif - unsigned char C = *CurPtr; - while (isAsciiIdentifierContinue(C)) - C = *++CurPtr; - return CurPtr; + return fastParseASCIIIdentifierScalar(CurPtr); +} + +__attribute__((target("sse4.2"))) static const char * +fastParseASCIIIdentifier(const char *CurPtr, const char *BufferEnd) { + return fastParseASCIIIdentifierSSE42(CurPtr, BufferEnd); +} + +__attribute__((target("default"))) +#endif +static const char *fastParseASCIIIdentifier(const char *CurPtr, + const char *BufferEnd) { + return fastParseASCIIIdentifierScalar(CurPtr); } bool Lexer::LexIdentifierContinue(Token &Result, const char *CurPtr) { _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
