Author: Thibault Monnier Date: 2026-01-20T17:13:01+01:00 New Revision: 146a919360eb0e4f0ac012050ccc7c0f4593345e
URL: https://github.com/llvm/llvm-project/commit/146a919360eb0e4f0ac012050ccc7c0f4593345e DIFF: https://github.com/llvm/llvm-project/commit/146a919360eb0e4f0ac012050ccc7c0f4593345e.diff LOG: [Clang][Lexer] Reland "Detect SSE4.2 availability at runtime in fastParseASCIIIdentifier" (#175452) This PR reopens #171914 after it was merged then reverted by #174946 because of compilation failures. This change attempts to maximize usage of the SSE fast path in `fastParseASCIIIdentifier`. If the binary is compiled with SSE4.2 enabled, or if we are not compiling for x86, then the behavior is the exact same, ensuring we have no regressions. Otherwise, we compile both the SSE fast path and the scalar loop. At runtime, we check if SSE4.2 is available and dispatch to the right function by using the `target` attribute. If it _is_ available, this allows a net performance improvement. Otherwise, there's a very slight but negligible regression... I believe that's perfectly reasonable for a non-SSE4.2-supporting processor. I checked locally on an old x86 processor with QEMU to ensure this doesn't break compatibility. The benchmark results are available at [llvm-compile-time-tracker](https://llvm-compile-time-tracker.com/compare.php?from=f88d060c4176d17df56587a083944637ca865cb3&to=d5485438edd460892bf210916827e0d92fc24065&stat=instructions%3Au). Added: Modified: clang/lib/Lex/Lexer.cpp llvm/include/llvm/Support/Compiler.h Removed: ################################################################################ diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp index 5e9d2743ba53f..2c4ba70551fab 100644 --- a/clang/lib/Lex/Lexer.cpp +++ b/clang/lib/Lex/Lexer.cpp @@ -36,6 +36,7 @@ #include "llvm/Support/NativeFormatting.h" #include "llvm/Support/Unicode.h" #include "llvm/Support/UnicodeCharRanges.h" + #include <algorithm> #include <cassert> #include <cstddef> @@ -45,7 +46,7 @@ #include <optional> #include <string> -#ifdef __SSE4_2__ +#if defined(__i386__) || defined(__x86_64__) #include <nmmintrin.h> #endif @@ -1930,10 +1931,21 @@ bool Lexer::LexUnicodeIdentifierStart(Token &Result, uint32_t C, return true; } -static const char * -fastParseASCIIIdentifier(const char *CurPtr, - [[maybe_unused]] const char *BufferEnd) { -#ifdef __SSE4_2__ +static const char *fastParseASCIIIdentifierScalar(const char *CurPtr) { + unsigned char C = *CurPtr; + while (isAsciiIdentifierContinue(C)) + C = *++CurPtr; + return CurPtr; +} + +// Fast path for lexing ASCII identifiers using SSE4.2 instructions. +// Only enabled on x86/x86_64 when building with __SSE4_2__ enabled, or with a +// compiler and platform that support runtime dispatch. +#if defined(__SSE4_2__) || LLVM_SUPPORTS_RUNTIME_SSE42_CHECK +// LLVM_ATTRIBUTE_USED is a hack to suppress a false-positive warning due to a +// bug in clang-18 and less. See PR175452. +LLVM_ATTRIBUTE_USED LLVM_TARGET_SSE42 static const char * +fastParseASCIIIdentifier(const char *CurPtr, const char *BufferEnd) { alignas(16) static constexpr char AsciiIdentifierRange[16] = { '_', '_', 'A', 'Z', 'a', 'z', '0', '9', }; @@ -1953,13 +1965,19 @@ fastParseASCIIIdentifier(const char *CurPtr, continue; return CurPtr; } + + return fastParseASCIIIdentifierScalar(CurPtr); +} #endif - unsigned char C = *CurPtr; - while (isAsciiIdentifierContinue(C)) - C = *++CurPtr; - return CurPtr; +#ifndef __SSE4_2__ +#if LLVM_SUPPORTS_RUNTIME_SSE42_CHECK +LLVM_TARGET_DEFAULT +#endif +static const char *fastParseASCIIIdentifier(const char *CurPtr, const char *) { + return fastParseASCIIIdentifierScalar(CurPtr); } +#endif bool Lexer::LexIdentifierContinue(Token &Result, const char *CurPtr) { // Match [_A-Za-z0-9]*, we have already matched an identifier start. diff --git a/llvm/include/llvm/Support/Compiler.h b/llvm/include/llvm/Support/Compiler.h index f4bd894021097..4115c3d6060e3 100644 --- a/llvm/include/llvm/Support/Compiler.h +++ b/llvm/include/llvm/Support/Compiler.h @@ -762,4 +762,30 @@ void AnnotateIgnoreWritesEnd(const char *file, int line); #endif // clang-format on +/// \macro LLVM_SUPPORTS_RUNTIME_SSE42_CHECK +/// Expands to true if runtime detection of SSE4.2 is supported. +/// This can be used to guard runtime checks for SSE4.2 support. +#if ((defined(__i386__) || defined(__x86_64__)) && defined(__has_attribute) && \ + __has_attribute(target) && !defined(_WIN32)) +#define LLVM_SUPPORTS_RUNTIME_SSE42_CHECK 1 +#else +#define LLVM_SUPPORTS_RUNTIME_SSE42_CHECK 0 +#endif + +/// \macro LLVM_TARGET_DEFAULT +/// Function attribute to compile a function with default target features. +#if defined(__has_attribute) && __has_attribute(target) +#define LLVM_TARGET_DEFAULT __attribute__((target("default"))) +#else +#define LLVM_TARGET_DEFAULT +#endif + +/// \macro LLVM_TARGET_SSE42 +/// Function attribute to compile a function with SSE4.2 enabled. +#if defined(__has_attribute) && __has_attribute(target) +#define LLVM_TARGET_SSE42 __attribute__((target("sse4.2"))) +#else +#define LLVM_TARGET_SSE42 +#endif + #endif _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
