https://github.com/wpcwzy updated https://github.com/llvm/llvm-project/pull/193881
From 8cdd44d8dd9e2ca33ce5021e48527fb9c580812d Mon Sep 17 00:00:00 2001 From: Pincheng Wang <[email protected]> Date: Wed, 22 Apr 2026 22:30:19 +0800 Subject: [PATCH 1/2] [Clang][OpenMP] declare-simd: Preserve libcalls --- clang/lib/CodeGen/CGBuiltin.cpp | 15 ++++ clang/lib/CodeGen/CGOpenMPRuntime.cpp | 73 +++++++++++++++++++ .../OpenMP/declare_simd_preserve_builtin.cpp | 14 ++++ 3 files changed, 102 insertions(+) create mode 100644 clang/test/OpenMP/declare_simd_preserve_builtin.cpp diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index da72a43643a54..b0abe0de56c19 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -28,6 +28,7 @@ #include "clang/AST/StmtVisitor.h" #include "clang/Basic/DiagnosticFrontend.h" #include "clang/Basic/TargetInfo.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Intrinsics.h" @@ -69,6 +70,16 @@ static bool shouldEmitBuiltinAsIR(unsigned BuiltinID, return false; } +static bool shouldPreserveLibCallForDeclareSimd(const FunctionDecl *FD, + const LangOptions &LangOpts) { + if (!FD || !LangOpts.OpenMP) + return false; + + return llvm::any_of(FD->redecls(), [](const FunctionDecl *Redecl) { + return Redecl->hasAttr<OMPDeclareSimdDeclAttr>(); + }); +} + static Value *EmitTargetArchBuiltinExpr(CodeGenFunction *CGF, unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, @@ -2647,6 +2658,10 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, BuiltinID, CGM.getTriple(), ErrnoOverriden, getLangOpts().MathErrno, OptNone, IsOptimizationEnabled); + if (GenerateFPMathIntrinsics && + shouldPreserveLibCallForDeclareSimd(FD, getLangOpts())) + GenerateFPMathIntrinsics = false; + if (GenerateFPMathIntrinsics) { switch (BuiltinIDIfNoAsmLabel) { case Builtin::BIacos: diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index a8255ac74cfcf..0bbd463d9a0d1 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -35,6 +35,7 @@ #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/InstrTypes.h" +#include "llvm/IR/VFABIDemangler.h" #include "llvm/IR/Value.h" #include "llvm/Support/AtomicOrdering.h" #include "llvm/Support/raw_ostream.h" @@ -11311,6 +11312,73 @@ emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, } } +static void emitDeclareSimdVariantMetadata(CodeGenModule &CGM, + llvm::Function *Fn) { + llvm::SmallVector<std::string, 8> VariantMappings; + for (llvm::Attribute Attr : Fn->getAttributes().getFnAttrs()) { + if (!Attr.isStringAttribute()) + continue; + + StringRef VariantName = Attr.getKindAsString(); + if (!VariantName.starts_with("_ZGV")) + continue; + + std::optional<llvm::VFInfo> Info = + llvm::VFABI::tryDemangleForVFABI(VariantName, Fn->getFunctionType()); + if (!Info) + continue; + + llvm::FunctionType *VectorTy = + llvm::VFABI::createFunctionType(*Info, Fn->getFunctionType()); + llvm::Function *VecFn = llvm::cast<llvm::Function>( + Fn->getParent() + ->getOrInsertFunction(Info->VectorName, VectorTy) + .getCallee()); + CGM.addCompilerUsedGlobal(VecFn); + VariantMappings.push_back(std::string(VariantName)); + } + + if (VariantMappings.empty()) + return; + + SmallString<256> Buffer; + llvm::raw_svector_ostream Out(Buffer); + for (const std::string &VariantName : VariantMappings) + Out << VariantName << ','; + Buffer.pop_back(); + + Fn->removeFnAttr(llvm::VFABI::MappingsAttrName); + Fn->addFnAttr(llvm::VFABI::MappingsAttrName, Buffer.str()); +} + +static bool shouldEmitDeclareSimdVariantMetadata(const FunctionDecl *FD, + const SourceManager &SM) { + if (!FD) + return false; + + for (const FunctionDecl *Redecl : FD->redecls()) { + if (!Redecl->hasAttr<OMPDeclareSimdDeclAttr>()) + continue; + + unsigned BuiltinID = Redecl->getBuiltinID(); + if (BuiltinID && + Redecl->getASTContext().BuiltinInfo.isLibFunction(BuiltinID)) + return true; + + SourceLocation Loc = SM.getExpansionLoc(Redecl->getLocation()); + if (!Loc.isValid()) + continue; + + // Keep support for library-style declarations coming from headers that are + // not marked as system headers (e.g. staged glibc build directories), but + // avoid enabling this for every non-main-file declaration. + if (!SM.isWrittenInMainFile(Loc) && Redecl->hasExternalFormalLinkage()) + return true; + } + + return false; +} + // This are the Functions that are needed to mangle the name of the // vector functions generated by the compiler, according to the rules // defined in the "Vector Function ABI specifications for AArch64", @@ -11577,6 +11645,7 @@ void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn) { ASTContext &C = CGM.getContext(); FD = FD->getMostRecentDecl(); + const FunctionDecl *MostRecentFD = FD; while (FD) { // Map params to their positions in function decl. llvm::DenseMap<const Decl *, unsigned> ParamPositions; @@ -11724,6 +11793,10 @@ void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, } FD = FD->getPreviousDecl(); } + + if (shouldEmitDeclareSimdVariantMetadata(MostRecentFD, + CGM.getContext().getSourceManager())) + emitDeclareSimdVariantMetadata(CGM, Fn); } namespace { diff --git a/clang/test/OpenMP/declare_simd_preserve_builtin.cpp b/clang/test/OpenMP/declare_simd_preserve_builtin.cpp new file mode 100644 index 0000000000000..7b6a808de13da --- /dev/null +++ b/clang/test/OpenMP/declare_simd_preserve_builtin.cpp @@ -0,0 +1,14 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O2 -ffast-math -fopenmp -emit-llvm %s -o - | FileCheck %s + +double sqrt(double); + +#pragma omp declare simd notinbranch +double sqrt(double); + +double test(double x) { + return sqrt(x); +} + +// CHECK-LABEL: define{{.*}}@_Z4testd( +// CHECK: call{{.*}}@_Z4sqrtd( +// CHECK-NOT: llvm.sqrt From 1aa219fb354fc22c8128b3a89a658604698699de Mon Sep 17 00:00:00 2001 From: Pincheng Wang <[email protected]> Date: Fri, 12 Jun 2026 09:09:31 +0800 Subject: [PATCH 2/2] [Clang][OpenMP] declare-simd: Mark math calls memory(none) in SIMD loops When a libm math function that carries #pragma omp declare simd is called inside an OpenMP SIMD loop, mark the call site memory(none). This lets the loop vectorizer replace it with the function's VFABI vector variant instead of treating it as a dependence-blocking memory access. The tightening is applied per call site, on calls emitted while a parallel (OpenMP SIMD) loop is on the loop-info stack, rather than on the function declaration. This is what makes it correct under -fmath-errno: an ordinary scalar call to the same function keeps its errno / FP-environment side effects, while inside a SIMD loop the user has asserted that iterations are independent and these per-iteration side effects need not be preserved (the vector variant carries them per the vector ABI). It mirrors what GCC does for the same loops and matches glibc's libmvec ABI tests, which build with -fmath-errno and therefore cannot rely on fast-math making the function pure. Because glibc compiles those tests with -fno-builtin, which clears the identifier builtin ID so FunctionDecl::getBuiltinID() returns 0. The libm function is classified by name via a new Builtin::Context::isConstWithoutErrnoAndExceptions(StringRef) overload that queries the builtin table directly. Functions that write results back through pointer parameters (sincos, modf, frexp, remquo, ...) are excluded so their effect on user-visible memory is preserved. Part of issue #120868. --- clang/include/clang/Basic/Builtins.h | 10 ++++ clang/lib/Basic/Builtins.cpp | 16 +++++ clang/lib/CodeGen/CGCall.cpp | 55 +++++++++++++++++ .../OpenMP/declare_simd_math_call_readnone.c | 60 +++++++++++++++++++ 4 files changed, 141 insertions(+) create mode 100644 clang/test/OpenMP/declare_simd_math_call_readnone.c diff --git a/clang/include/clang/Basic/Builtins.h b/clang/include/clang/Basic/Builtins.h index 324e0deb241ab..d523879139296 100644 --- a/clang/include/clang/Basic/Builtins.h +++ b/clang/include/clang/Basic/Builtins.h @@ -402,6 +402,16 @@ class Context { return strchr(getAttributesString(ID), 'e') != nullptr; } + /// Return true if the target-independent builtin named \p Name has no side + /// effects and doesn't read memory, except for possibly errno or raising FP + /// exceptions. + /// + /// Unlike the ID-based overload, this looks the function up by name directly + /// in the builtin table and therefore still classifies standard libm + /// functions even under -fno-builtin (which clears their identifier builtin + /// IDs, so FunctionDecl::getBuiltinID() would return 0). + bool isConstWithoutErrnoAndExceptions(llvm::StringRef Name) const; + bool isConstWithoutExceptions(unsigned ID) const { return strchr(getAttributesString(ID), 'g') != nullptr; } diff --git a/clang/lib/Basic/Builtins.cpp b/clang/lib/Basic/Builtins.cpp index e477da34fd5e0..c7cd0e9346110 100644 --- a/clang/lib/Basic/Builtins.cpp +++ b/clang/lib/Basic/Builtins.cpp @@ -135,6 +135,22 @@ bool Builtin::Context::isBuiltinFunc(llvm::StringRef FuncName) { return false; } +bool Builtin::Context::isConstWithoutErrnoAndExceptions( + llvm::StringRef Name) const { + bool InStdNamespace = Name.consume_front("std-"); + for (const auto &Shard : {InfosShard{&BuiltinStrings, BuiltinInfos}}) + if (llvm::StringRef NameSuffix = Name; + NameSuffix.consume_front(Shard.NamePrefix)) + for (const auto &I : Shard.Infos) + if (NameSuffix == (*Shard.Strings)[I.Offsets.Name] && + (bool)strchr((*Shard.Strings)[I.Offsets.Attributes].data(), 'z') == + InStdNamespace) + return strchr((*Shard.Strings)[I.Offsets.Attributes].data(), 'e') != + nullptr; + + return false; +} + /// Is this builtin supported according to the given language options? static bool builtinIsSupported(const llvm::StringTable &Strings, const Builtin::Info &BuiltinInfo, diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index efacb3cc04c01..002c4c1ee74b7 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -5232,6 +5232,52 @@ static unsigned getMaxVectorWidth(const llvm::Type *Ty) { return MaxVectorWidth; } +// Returns true if a call to \p TargetDecl emitted inside the body of an OpenMP +// SIMD (parallel) loop may have its memory effects tightened to memory(none). +// +// This applies to standard libm math functions that carry a #pragma omp declare +// simd: their only side effects are on the floating-point environment (rounding +// mode, exceptions) and any implementation-defined errno reporting. Inside an +// OpenMP SIMD loop the user has asserted the iterations are independent and that +// these per-iteration FP-environment/errno side effects need not be preserved, +// so the call can be modelled as accessing no memory. This lets the loop +// vectorizer replace it with the function's VFABI vector variant instead of +// treating it as a dependence-blocking memory access. +// +// The classification is done by name so it still fires under -fno-builtin (which +// clears the identifier builtin ID, making FunctionDecl::getBuiltinID() == 0). +// Functions that write results back through pointer parameters (sincos, modf, +// frexp, remquo, ...) are excluded so their effect on user memory is preserved. +static bool isDeclareSimdConstMathCall(const Decl *TargetDecl, + CodeGenModule &CGM) { + const auto *FD = dyn_cast_or_null<FunctionDecl>(TargetDecl); + if (!FD || !FD->getIdentifier()) + return false; + + bool HasDeclareSimd = false; + for (const FunctionDecl *Redecl : FD->redecls()) + if (Redecl->hasAttr<OMPDeclareSimdDeclAttr>()) { + HasDeclareSimd = true; + break; + } + if (!HasDeclareSimd) + return false; + + if (!CGM.getContext().BuiltinInfo.isConstWithoutErrnoAndExceptions( + FD->getName())) + return false; + + // sincos-style functions take output pointers; their effects on user-visible + // memory must not be dropped. + if (!FD->getReturnType()->isFloatingType()) + return false; + for (const ParmVarDecl *P : FD->parameters()) + if (P->getType()->isAnyPointerType()) + return false; + + return true; +} + RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, const CGCallee &Callee, ReturnValueSlot ReturnValue, @@ -5973,6 +6019,15 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, CI->setAttributes(Attrs); CI->setCallingConv(static_cast<llvm::CallingConv::ID>(CallingConv)); + // Inside an OpenMP SIMD loop, model a declare-simd libm math call as not + // accessing memory so the loop vectorizer can replace it with its VFABI + // vector variant. See isDeclareSimdConstMathCall for the rationale and the + // safety conditions; the loop being parallel is the user's assertion that the + // per-iteration FP-environment/errno side effects need not be preserved. + if (LoopStack.getCurLoopParallel() && + isDeclareSimdConstMathCall(TargetDecl, CGM)) + CI->setMemoryEffects(llvm::MemoryEffects::none()); + // Apply various metadata. if (!CI->getType()->isVoidTy()) diff --git a/clang/test/OpenMP/declare_simd_math_call_readnone.c b/clang/test/OpenMP/declare_simd_math_call_readnone.c new file mode 100644 index 0000000000000..3ea22b55638ad --- /dev/null +++ b/clang/test/OpenMP/declare_simd_math_call_readnone.c @@ -0,0 +1,60 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O0 -fopenmp -fmath-errno \ +// RUN: -fno-builtin -disable-llvm-passes -emit-llvm %s -o - \ +// RUN: | FileCheck %s --check-prefixes=CHECK,ERRNO +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O0 -fopenmp -ffast-math \ +// RUN: -disable-llvm-passes -emit-llvm %s -o - \ +// RUN: | FileCheck %s --check-prefixes=CHECK,FAST + +// A libm math function that carries #pragma omp declare simd, when called inside +// an OpenMP SIMD loop, gets its call site marked memory(none). This lets the +// loop vectorizer replace it with its VFABI vector variant instead of treating +// it as a dependence-blocking memory access. The tightening is per-call-site: +// * it fires even under -fmath-errno -fno-builtin (the configuration glibc +// uses for its libmvec ABI tests), where the function declaration is left +// with default may-read/write memory effects and FunctionDecl::getBuiltinID +// returns 0, so the classification must be done by name; +// * under -fmath-errno the same call outside any SIMD loop is NOT tightened, +// so ordinary scalar callers keep the function's errno / FP-environment side +// effects (under fast-math the declaration is globally pure already, so the +// distinction is not observable there); +// * functions that write through output pointers (e.g. sincos) are NOT +// tightened, even inside a SIMD loop. + +#pragma omp declare simd notinbranch +double acosh(double); + +#pragma omp declare simd notinbranch linear(s) linear(c) +void sincos(double, double *s, double *c); + +#define N 128 +double a[N], b[N]; + +// CHECK-LABEL: define{{.*}}@test_simd( +// The acosh call inside the SIMD loop is memory(none) under both configs. +// CHECK: call{{.*}}double @acosh({{.*}}) [[SIMD_ATTR:#[0-9]+]] +void test_simd(void) { +#pragma omp simd + for (int i = 0; i < N; ++i) + a[i] = acosh(b[i]); +} + +// CHECK-LABEL: define{{.*}}@test_scalar( +// Under -fmath-errno the call outside any SIMD loop is NOT tightened. +// ERRNO: call{{.*}}double @acosh({{.*}}) [[SCALAR_ATTR:#[0-9]+]] +double test_scalar(double x) { return acosh(x); } + +// CHECK-LABEL: define{{.*}}@test_sincos_simd( +// sincos has output pointers, so it is NOT tightened even inside a SIMD loop. +// CHECK: call{{.*}}void @sincos({{.*}}ptr{{.*}}ptr{{.*}}) [[SINCOS_ATTR:#[0-9]+]] +void test_sincos_simd(double *s, double *c) { +#pragma omp simd + for (int i = 0; i < N; ++i) + sincos(b[i], &s[i], &c[i]); +} + +// The in-SIMD-loop math call site is memory(none). +// CHECK: attributes [[SIMD_ATTR]] = { {{.*}}memory(none){{.*}} } +// The sincos call site is never memory(none). +// CHECK-NOT: attributes [[SINCOS_ATTR]] = {{.*}}memory(none) +// Under -fmath-errno the plain scalar call site is not memory(none) either. +// ERRNO-NOT: attributes [[SCALAR_ATTR]] = {{.*}}memory(none) _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
