fpetrogalli created this revision. fpetrogalli added a reviewer: cfe-commits.
This patch generates a list of global external variables that are passed to the llvm::TargetLibraryInfo (TLI) to enable the vectorization of loops containing calls to function that are marked with a #pragma omp declare simd. To do so, the global variables are generated as "global external", so that they get removed in the middle-end and generated no useless code. A new method of the TLI (provided in a separate LLVM patch) looks for such global variables and updates the lists of vectorizable functions that the vectorizer can use. This behavior enables the programmer to provide vector routines that are recognized as vectorizable by using the OpenMP directive "declare simd" as follow: $> clang -fopenmp -c -o whatever.o -O3 file.c where `file.c` is #pragma omp declare simd double f(double x); void aaa(double *x, double *y, int N) { for (int i = 0; i < N; ++i) { x[i] = f(y[i]); } } https://reviews.llvm.org/D27250 Files: lib/CodeGen/BackendUtil.cpp lib/CodeGen/CGCall.cpp lib/CodeGen/CGOpenMPRuntime.cpp lib/CodeGen/CGOpenMPRuntime.h test/OpenMP/declare_simd_no_definition.c test/OpenMP/declare_simd_no_definition.cpp
Index: test/OpenMP/declare_simd_no_definition.cpp =================================================================== --- /dev/null +++ test/OpenMP/declare_simd_no_definition.cpp @@ -0,0 +1,55 @@ +// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -x c++ -emit-llvm %s -o - -femit-all-decls -verify| FileCheck %s +// RUN: %clang_cc1 -verify -triple aarch64-linux-gnu -fopenmp -x c++ -emit-llvm %s -o - -femit-all-decls -verify| FileCheck %s --check-prefix=AARCH64 +// expected-no-diagnostics + +#pragma omp declare simd +double f(double x); + +#pragma omp declare simd +float f(float x); + +void aaa(double *x, double *y, int N) { + for (int i = 0; i < N; ++i) { + x[i] = f(y[i]); + } +} + +void aaa(float *x, float *y, int N) { + for (int i = 0; i < N; ++i) { + x[i] = f(y[i]); + } +} + +// CHECK-LABEL: define void @_Z3aaaPdS_i +// CHECK-DAG: %call = call double @_Z1fd(double %{{[0-9]+}}) #[[attrD:[0-9]+]] + +// CHECK-LABEL: define void @_Z3aaaPfS_i +// CHECK-DAG: %call = call float @_Z1ff(float %{{[0-9]+}}) #[[attrF:[0-9]+]] + +// CHECK-DAG: attributes #[[attrD]] +// CHECK-DAG: _ZGVbM2v__Z1fd +// CHECK-DAG: _ZGVbN2v__Z1fd +// CHECK-DAG: _ZGVcM4v__Z1fd +// CHECK-DAG: _ZGVcN4v__Z1fd +// CHECK-DAG: _ZGVdM4v__Z1fd +// CHECK-DAG: _ZGVdN4v__Z1fd +// CHECK-DAG: _ZGVeM8v__Z1fd +// CHECK-DAG: _ZGVeN8v__Z1fd + +// CHECK-DAG: attributes #[[attrF]] +// CHECK-DAG: _ZGVbM4v__Z1ff +// CHECK-DAG: _ZGVbN4v__Z1ff +// CHECK-DAG: _ZGVcM8v__Z1ff +// CHECK-DAG: _ZGVcN8v__Z1ff +// CHECK-DAG: _ZGVdM8v__Z1ff +// CHECK-DAG: _ZGVdN8v__Z1ff +// CHECK-DAG: _ZGVeM16v__Z1ff +// CHECK-DAG: _ZGVeN16v__Z1ff + +// AARCH64-DAG: @vec_prefix__VA64QM2v__Z1fd_vec_midfix__Z1fd_vec_postfix = external global <2 x double> (<2 x i64>, <2 x double>) +// AARCH64-DAG: @vec_prefix__VA64QN2v__Z1fd_vec_midfix__Z1fd_vec_postfix = external global <2 x double> (<2 x double>) + +// AARCH64-DAG: @vec_prefix__VA64DM2v__Z1ff_vec_midfix__Z1ff_vec_postfix = external global <2 x float> (<2 x i32>, <2 x float>) +// AARCH64-DAG: @vec_prefix__VA64QM4v__Z1ff_vec_midfix__Z1ff_vec_postfix = external global <4 x float> (<4 x i32>, <4 x float>) +// AARCH64-DAG: @vec_prefix__VA64DN2v__Z1ff_vec_midfix__Z1ff_vec_postfix = external global <2 x float> (<2 x float>) +// AARCH64-DAG: @vec_prefix__VA64QN4v__Z1ff_vec_midfix__Z1ff_vec_postfix = external global <4 x float> (<4 x float>) Index: test/OpenMP/declare_simd_no_definition.c =================================================================== --- /dev/null +++ test/OpenMP/declare_simd_no_definition.c @@ -0,0 +1,45 @@ +// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -x c -emit-llvm %s -o - -femit-all-decls -verify| FileCheck %s +// RUN: %clang_cc1 -verify -triple aarch64-linux-gnu -fopenmp -x c -emit-llvm %s -o - -femit-all-decls -verify| FileCheck %s --check-prefix=AARCH64 +// expected-no-diagnostics + +#pragma omp declare simd +double f(double x); + +void aaa(double *x, double *y, int N) { + for (int i = 0; i < N; ++i) { + x[i] = f(y[i]); + } +} + +#pragma omp declare simd notinbranch +#pragma omp declare simd uniform(y) +double xpow(double x, double y); + + +void bbb(double *x, double *y, int N) { + for (int i = 0; i < N; ++i) { + x[i] = xpow(y[i], N); + } +} + + +// CHECK-LABEL: define void @aaa +// CHECK-DAG: %call = call double @f(double %{{[0-9]+}}) #[[attr:[0-9]+]] +// CHECK: attributes #[[attr]] +// CHECK-DAG: _ZGVbM2v_f +// CHECK-DAG: _ZGVbN2v_f +// CHECK-DAG: _ZGVcM4v_f +// CHECK-DAG: _ZGVcN4v_f +// CHECK-DAG: _ZGVdM4v_f +// CHECK-DAG: _ZGVdN4v_f +// CHECK-DAG: _ZGVeM8v_f +// CHECK-DAG: _ZGVeN8v_f + + +// AARCH64-DAG: @vec_prefix__VA64QM2v_f_vec_midfix_f_vec_postfix = external global <2 x double> (<2 x i64>, <2 x double>) +// AARCH64-DAG: @vec_prefix__VA64QN2v_f_vec_midfix_f_vec_postfix = external global <2 x double> (<2 x double>) + +// AARCH64-DAG: @vec_prefix__VA64QM2vu_xpow_vec_midfix_xpow_vec_postfix = external global <2 x double> (<2 x i64>, <2 x double>, double) +// AARCH64-DAG: @vec_prefix__VA64QN2vu_xpow_vec_midfix_xpow_vec_postfix = external global <2 x double> (<2 x double>, double) +// AARCH64-DAG: @vec_prefix__VA64QN2vv_xpow_vec_midfix_xpow_vec_postfix = external global <2 x double> (<2 x double>, <2 x double>) + Index: lib/CodeGen/CGOpenMPRuntime.h =================================================================== --- lib/CodeGen/CGOpenMPRuntime.h +++ lib/CodeGen/CGOpenMPRuntime.h @@ -1070,6 +1070,11 @@ virtual void emitDeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn); + /// Provides all the names of the vector variants associated to a + /// function \param FD marked with "declare simd' + virtual std::vector<std::string> + listAvailableVectorSignatures(const FunctionDecl *FD); + /// Emit initialization for doacross loop nesting support. /// \param D Loop-based construct used in doacross nesting construct. virtual void emitDoacrossInit(CodeGenFunction &CGF, Index: lib/CodeGen/CGOpenMPRuntime.cpp =================================================================== --- lib/CodeGen/CGOpenMPRuntime.cpp +++ lib/CodeGen/CGOpenMPRuntime.cpp @@ -19,6 +19,7 @@ #include "clang/AST/Decl.h" #include "clang/AST/StmtOpenMP.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Bitcode/BitcodeReader.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/DerivedTypes.h" @@ -6515,11 +6516,13 @@ return C.getTypeSize(CDT); } -static void -emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, - const llvm::APSInt &VLENVal, - ArrayRef<ParamAttrTy> ParamAttrs, - OMPDeclareSimdDeclAttr::BranchStateTy State) { +static std::vector<std::string> +emitX86DeclareSimdFunction(const FunctionDecl *FD, + llvm::APSInt VLENVal, + ArrayRef<ParamAttrTy> ParamAttrs, + OMPDeclareSimdDeclAttr::BranchStateTy State, + const StringRef MangledName) { + std::vector<std::string> OutVec; struct ISADataTy { char ISA; unsigned VecRegSize; @@ -6581,14 +6584,146 @@ if (!!ParamAttr.Alignment) Out << 'a' << ParamAttr.Alignment; } - Out << '_' << Fn->getName(); - Fn->addFnAttr(Out.str()); + Out << '_' << MangledName; + OutVec.push_back(Out.str()); } } + return OutVec; +} + +static llvm::VectorType * getAArch64MaskTy(const char ISA, + const unsigned LaneSizeInBits, + const unsigned VLEN, + llvm::LLVMContext &C) { + + switch (LaneSizeInBits) { + case 64: + return llvm::VectorType::get(llvm::Type::getInt64Ty(C),VLEN); + case 32: + return llvm::VectorType::get(llvm::Type::getInt32Ty(C),VLEN); + case 16: + return llvm::VectorType::get(llvm::Type::getInt16Ty(C),VLEN); + case 8: + return llvm::VectorType::get(llvm::Type::getInt8Ty(C),VLEN); + default : + llvm_unreachable("Type is not supported"); + } + return nullptr; } +static void +emitAArch64DeclareSimdFunction(CodeGenModule & CGM, const FunctionDecl *FD, + const llvm::APSInt &UserVLEN, + ArrayRef<ParamAttrTy> ParamAttrs, + OMPDeclareSimdDeclAttr::BranchStateTy State, + const StringRef MangledName) { + struct ISADataTy { + char ISA; + unsigned VecRegSize; + }; + // make this depend on the size of + const ISADataTy ISAData[] = { + { 'Q', 128 }, // NEON 128 + { 'D', 64 }, // NEON 64 + }; + std::vector<char> Masked; + switch (State) { + case OMPDeclareSimdDeclAttr::BS_Undefined: + Masked = {'M', 'N'}; + break; + case OMPDeclareSimdDeclAttr::BS_Notinbranch: + Masked = {'N'}; + break; + case OMPDeclareSimdDeclAttr::BS_Inbranch: + Masked = {'M'}; + break; + } + + for (auto Mask : Masked) { + for (auto Data : ISAData) { + std::string Buffer; + llvm::raw_string_ostream Out(Buffer); + Out << "_VA64" << Data.ISA << Mask; + std::vector<llvm::Type *> Args; + auto VLEN = UserVLEN; + + // Compute VLEN if the user hasn't provided one in the pragma. + if (!UserVLEN) { + auto CDTSize = evaluateCDTSize(FD, ParamAttrs); + VLEN = llvm::APSInt::getUnsigned(Data.VecRegSize / CDTSize); + } + // that's not a vector, skip + if (VLEN == 1) + continue; + + // Add extra paramter if the function is Masked + if (Mask == 'M') { + auto MaskTy = getAArch64MaskTy(Data.ISA, + Data.VecRegSize/ VLEN.getExtValue(), + VLEN.getExtValue(), CGM.getLLVMContext()); + Args.push_back(MaskTy); + } + + Out << VLEN; + unsigned Pos = 0; + for (auto &ParamAttr : ParamAttrs) { + switch (ParamAttr.Kind){ + case LinearWithVarStride: + Out << 's' << ParamAttr.StrideOrArg; + break; + case Linear: + Out << 'l'; + if (!!ParamAttr.StrideOrArg) + Out << ParamAttr.StrideOrArg; + break; + case Uniform: + Out << 'u'; + break; + case Vector: + Out << 'v'; + break; + } + + if (!!ParamAttr.Alignment) + Out << 'a' << ParamAttr.Alignment; + + llvm::Type *Ty = + CGM.getTypes().ConvertType(FD->getParamDecl(Pos)->getOriginalType()); + llvm::Type *VecTy = llvm::VectorType::get(Ty, VLEN.getExtValue()); + switch (ParamAttr.Kind){ + default: + Args.push_back(VecTy); + break; + case Uniform: + Args.push_back(Ty); + break; + } + + ++Pos; + } + Out << '_' << MangledName; + std::string GlobalName = + llvm::TargetLibraryInfoImpl::mangle(Out.str(), MangledName); + llvm::Type * RetTy = CGM.getTypes().ConvertType(FD->getReturnType()); + llvm::Type *VecRetTy = llvm::VectorType::get(RetTy, VLEN.getExtValue()); + llvm::FunctionType * FTy= llvm::FunctionType::get(VecRetTy, Args, false); + CGM.getModule().getOrInsertGlobal(GlobalName, FTy); + } + } + } + void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn) { + + auto FunList = listAvailableVectorSignatures(FD); + for (auto &VectorName: FunList) { + std::string name = VectorName; + Fn->addFnAttr(name); + } +} + +std::vector<std::string> +CGOpenMPRuntime::listAvailableVectorSignatures(const FunctionDecl *FD) { ASTContext &C = CGM.getContext(); FD = FD->getCanonicalDecl(); // Map params to their positions in function decl. @@ -6600,6 +6735,8 @@ ParamPositions.insert({P->getCanonicalDecl(), ParamPos}); ++ParamPos; } + std::vector<std::string> OutVec; + std::string MangledName = CGM.getMangledName(GlobalDecl(FD)); for (auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) { llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size()); // Mark uniform parameters. @@ -6672,9 +6809,19 @@ VLENVal = VLEN->EvaluateKnownConstInt(C); OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); if (CGM.getTriple().getArch() == llvm::Triple::x86 || - CGM.getTriple().getArch() == llvm::Triple::x86_64) - emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); + CGM.getTriple().getArch() == llvm::Triple::x86_64) { + const auto Out = + emitX86DeclareSimdFunction(FD, VLENVal, ParamAttrs, State, MangledName); + for (auto &VecName : Out) + OutVec.push_back(VecName); + } + if (CGM.getTriple().getArch() == llvm::Triple::aarch64) { + emitAArch64DeclareSimdFunction(CGM, FD, VLENVal, + ParamAttrs, State, + MangledName); + } } + return OutVec; } namespace { Index: lib/CodeGen/CGCall.cpp =================================================================== --- lib/CodeGen/CGCall.cpp +++ lib/CodeGen/CGCall.cpp @@ -17,6 +17,7 @@ #include "CGBlocks.h" #include "CGCXXABI.h" #include "CGCleanup.h" +#include "CGOpenMPRuntime.h" #include "CodeGenFunction.h" #include "CodeGenModule.h" #include "TargetInfo.h" @@ -1663,6 +1664,15 @@ const CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(Fn); if (Fn->isNoReturn() && !(AttrOnCallSite && MD && MD->isVirtual())) FuncAttrs.addAttribute(llvm::Attribute::NoReturn); + + // TODO: this should work also when using -fopenmp-simd + if (getLangOpts().OpenMP && Fn->hasAttr<OMPDeclareSimdDeclAttr>() + && !Fn->hasBody()) { + auto MangledNames = + CGOpenMPRuntime(*this).listAvailableVectorSignatures(Fn); + for (auto &MangledName : MangledNames) + FuncAttrs.addAttribute(MangledName); + } } // 'const', 'pure' and 'noalias' attributed functions are also nounwind. Index: lib/CodeGen/BackendUtil.cpp =================================================================== --- lib/CodeGen/BackendUtil.cpp +++ lib/CodeGen/BackendUtil.cpp @@ -306,6 +306,10 @@ std::unique_ptr<TargetLibraryInfoImpl> TLII( createTLII(TargetTriple, CodeGenOpts)); + if (LangOpts.OpenMP) { + TLII->addOpenMPVectorFunctions(TheModule); + } + switch (Inlining) { case CodeGenOptions::NoInlining: break;
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits