erichkeane created this revision. erichkeane added reviewers: thiagomacieira, aaron.ballman.
As discussed here: https://lwn.net/Articles/691932/ GCC6.0 adds target_clones multiversioning. This functionality is an odd cross between the cpu_dispatch and 'target' MV, but is compatible with neither. This attribute allows you to list all options, then emits a separately optimized version of each function per-option (similar to the cpu_specific attribute). It automatically generates a resolver, just like the other two. The mangling however, is... ODD to say the least. The mangling format is: <normal_mangling>.<option string>.<option ordinal>. However, the 'option ordinal' is where it gets strange. When parsing the list, 'default' is moved to the end, so "foo,default,bar", foo is 0, bar is 1, and default is 2. Otherwise, emission rules should be the same as 'target'. https://reviews.llvm.org/D51650 Files: include/clang/AST/Decl.h include/clang/Basic/Attr.td include/clang/Basic/AttrDocs.td include/clang/Basic/DiagnosticSemaKinds.td include/clang/Sema/Sema.h lib/AST/Decl.cpp lib/CodeGen/CodeGenFunction.cpp lib/CodeGen/CodeGenFunction.h lib/CodeGen/CodeGenModule.cpp lib/CodeGen/CodeGenModule.h lib/Sema/SemaDecl.cpp lib/Sema/SemaDeclAttr.cpp test/CodeGen/attr-cpuspecific.c test/CodeGen/attr-target-clones.c test/Misc/pragma-attribute-supported-attributes-list.test test/Sema/attr-target-clones.c
Index: lib/CodeGen/CodeGenModule.cpp =================================================================== --- lib/CodeGen/CodeGenModule.cpp +++ lib/CodeGen/CodeGenModule.cpp @@ -917,6 +917,19 @@ } } +static void AppendTargetClonesMangling(const CodeGenModule &CGM, + const TargetClonesAttr *Attr, + raw_ostream &Out) { + Out << '.'; + StringRef FeatureStr = Attr->getCurFeatureStr(); + if (FeatureStr.startswith("arch=")) + Out << "arch_" << FeatureStr.substr(sizeof("arch=") - 1); + else + Out << FeatureStr; + + Out << '.' << Attr->ActiveArgIndex; +} + static std::string getMangledNameImpl(const CodeGenModule &CGM, GlobalDecl GD, const NamedDecl *ND, bool OmitMultiVersionMangling = false) { @@ -950,6 +963,8 @@ if (FD->isCPUDispatchMultiVersion() || FD->isCPUSpecificMultiVersion()) AppendCPUSpecificCPUDispatchMangling( CGM, FD->getAttr<CPUSpecificAttr>(), Out); + else if (FD->isTargetClonesMultiVersion()) + AppendTargetClonesMangling(CGM, FD->getAttr<TargetClonesAttr>(), Out); else AppendTargetMangling(CGM, FD->getAttr<TargetAttr>(), Out); } @@ -1013,12 +1028,19 @@ // Since CPUSpecific can require multiple emits per decl, store the manglings // separately. if (FD && - (FD->isCPUDispatchMultiVersion() || FD->isCPUSpecificMultiVersion())) { + (FD->isCPUDispatchMultiVersion() || FD->isCPUSpecificMultiVersion() || + FD->isTargetClonesMultiVersion())) { const auto *SD = FD->getAttr<CPUSpecificAttr>(); + const auto *TC = FD->getAttr<TargetClonesAttr>(); - std::pair<GlobalDecl, unsigned> SpecCanonicalGD{ - CanonicalGD, - SD ? SD->ActiveArgIndex : std::numeric_limits<unsigned>::max()}; + unsigned VersionID = std::numeric_limits<unsigned>::max(); + + if (SD) + VersionID = SD->ActiveArgIndex; + else if (TC) + VersionID = TC->ActiveArgIndex; + + std::pair<GlobalDecl, unsigned> SpecCanonicalGD{CanonicalGD, VersionID}; auto FoundName = CPUSpecificMangledDeclNames.find(SpecCanonicalGD); if (FoundName != CPUSpecificMangledDeclNames.end()) @@ -1376,9 +1398,10 @@ const auto *FD = dyn_cast_or_null<FunctionDecl>(D); FD = FD ? FD->getMostRecentDecl() : FD; const auto *TD = FD ? FD->getAttr<TargetAttr>() : nullptr; - const auto *SD = FD ? FD->getAttr<CPUSpecificAttr>() : nullptr; bool AddedAttr = false; - if (TD || SD) { + + if (FD && (TD || FD->hasAttr<CPUSpecificAttr>() || + FD->hasAttr<TargetClonesAttr>())) { llvm::StringMap<bool> FeatureMap; getFunctionFeatureMap(FeatureMap, FD); @@ -2111,6 +2134,9 @@ if (Global->hasAttr<CPUDispatchAttr>()) return emitCPUDispatchDefinition(GD); + if (Global->hasAttr<CPUSpecificAttr>() || Global->hasAttr<TargetClonesAttr>()) + return EmitGlobalFunctionDefinition(GD, nullptr); + // If this is CUDA, be selective about which declarations we emit. if (LangOpts.CUDA) { if (LangOpts.CUDAIsDevice) { @@ -2365,6 +2391,7 @@ if (getFunctionLinkage(GD) != llvm::Function::AvailableExternallyLinkage) return true; const auto *F = cast<FunctionDecl>(GD.getDecl()); + if (CodeGenOpts.OptimizationLevel == 0 && !F->hasAttr<AlwaysInlineAttr>()) return false; @@ -2526,6 +2553,51 @@ CGF.EmitCPUDispatchMultiVersionResolver(ResolverFunc, Options); } +void CodeGenModule::EmitTargetClonesResolver(GlobalDecl GD) { + const auto *FD = cast<FunctionDecl>(GD.getDecl()); + assert(FD && "Not a FunctionDecl?"); + auto *ClonesAttr = FD->getAttr<TargetClonesAttr>(); + assert(ClonesAttr && "Not a target_clones Function?"); + llvm::Type *DeclTy = getTypes().ConvertTypeForMem(FD->getType()); + + // Force emission of the IFunc. + GetOrCreateMultiVersionIFunc(GD, DeclTy, FD); + + StringRef MangledName = + getMangledNameImpl(*this, GD, FD, /*OmitMVMangling*/ true); + std::string ResolverName = (MangledName + ".resolver").str(); + llvm::Type *ResolverType = llvm::FunctionType::get( + llvm::PointerType::get(DeclTy, + Context.getTargetAddressSpace(FD->getType())), + false); + auto *ResolverFunc = cast<llvm::Function>( + GetOrCreateLLVMFunction(ResolverName, ResolverType, GlobalDecl{}, + /*ForVTable=*/false)); + + SmallVector<CodeGenFunction::TargetClonesMultiVersionResolverOption, 10> + Options; + const TargetInfo &Target = getTarget(); + + ClonesAttr->ActiveArgIndex = 0; + while (ClonesAttr->ActiveArgIndex < ClonesAttr->featuresStrs_size()) { + std::string MangledName = getMangledName(GD); + llvm::Constant *Func = GetOrCreateLLVMFunction( + MangledName, DeclTy, GD, /*ForVTable=*/false, /*DontDefer=*/false, + /*IsThunk=*/false, llvm::AttributeList(), ForDefinition); + + Options.emplace_back(Target, cast<llvm::Function>(Func), + ClonesAttr->getCurFeatureStr()); + ClonesAttr->AdvanceActiveArgIndex(); + } + ClonesAttr->ActiveArgIndex = 0; + + std::stable_sort( + Options.begin(), Options.end(), + std::greater<CodeGenFunction::TargetClonesMultiVersionResolverOption>()); + CodeGenFunction CGF(*this); + CGF.EmitTargetClonesMultiVersionResolver(ResolverFunc, Options); +} + /// If an ifunc for the specified mangled name is not in the module, create and /// return an llvm IFunc Function with the specified type. llvm::Constant * @@ -2540,7 +2612,8 @@ // Since this is the first time we've created this IFunc, make sure // that we put this multiversioned function into the list to be // replaced later if necessary (target multiversioning only). - if (!FD->isCPUDispatchMultiVersion() && !FD->isCPUSpecificMultiVersion()) + if (!FD->isCPUDispatchMultiVersion() && !FD->isCPUSpecificMultiVersion() && + !FD->isTargetClonesMultiVersion()) MultiVersionFuncs.push_back(GD); std::string ResolverName = MangledName + ".resolver"; @@ -3877,6 +3950,16 @@ ++Spec->ActiveArgIndex; EmitGlobalFunctionDefinition(GD, nullptr); } + } else if (D->isTargetClonesMultiVersion()) { + auto *Clone = D->getAttr<TargetClonesAttr>(); + // If there is another specific version we need to emit, do so here. Then, + // the last thing we do is emit the resolver. + Clone->AdvanceActiveArgIndex(); + if (Clone->ActiveArgIndex < Clone->featuresStrs_size()) + EmitGlobalFunctionDefinition(GD, nullptr); + else + EmitTargetClonesResolver(GD); + Clone->ActiveArgIndex = 0; } } @@ -5264,6 +5347,15 @@ FeaturesTmp); std::vector<std::string> Features(FeaturesTmp.begin(), FeaturesTmp.end()); Target.initFeatureMap(FeatureMap, getDiags(), TargetCPU, Features); + } else if (const auto *Clones = FD->getAttr<TargetClonesAttr>()) { + StringRef Cur = Clones->getCurFeatureStr(); + std::vector<std::string> Features; + + if (Cur.startswith("arch=")) + TargetCPU = Cur.substr(sizeof("arch=") - 1); + else if (Cur != "default") + Features.push_back((StringRef{"+"} + Cur).str()); + Target.initFeatureMap(FeatureMap, getDiags(), TargetCPU, Features); } else { Target.initFeatureMap(FeatureMap, getDiags(), TargetCPU, Target.getTargetOpts().Features); Index: lib/CodeGen/CodeGenFunction.h =================================================================== --- lib/CodeGen/CodeGenFunction.h +++ lib/CodeGen/CodeGenFunction.h @@ -4313,6 +4313,27 @@ ArrayRef<CPUDispatchMultiVersionResolverOption> Options); static uint32_t GetX86CpuSupportsMask(ArrayRef<StringRef> FeatureStrs); + struct TargetClonesMultiVersionResolverOption { + llvm::Function *Function; + StringRef FeatureStr; + unsigned Priority; + TargetClonesMultiVersionResolverOption(const TargetInfo &TargInfo, + llvm::Function *F, StringRef Feature) + : Function(F), FeatureStr(Feature), Priority(0u) { + if (FeatureStr.startswith("arch=")) + Priority = TargInfo.multiVersionSortPriority( + FeatureStr.drop_front(sizeof("arch=") - 1)); + else if (FeatureStr != "default") + Priority = TargInfo.multiVersionSortPriority(FeatureStr); + } + bool operator>(const TargetClonesMultiVersionResolverOption &Other) const { + return Priority > Other.Priority; + } + }; + void EmitTargetClonesMultiVersionResolver( + llvm::Function *Resolver, + ArrayRef<TargetClonesMultiVersionResolverOption> Options); + private: QualType getVarArgType(const Expr *Arg); @@ -4332,6 +4353,8 @@ llvm::Value *EmitX86CpuInit(); llvm::Value * FormResolverCondition(const TargetMultiVersionResolverOption &RO); + llvm::Value * + FormResolverCondition(const TargetClonesMultiVersionResolverOption &RO); }; inline DominatingLLVMValue::saved_type Index: lib/CodeGen/CodeGenModule.h =================================================================== --- lib/CodeGen/CodeGenModule.h +++ lib/CodeGen/CodeGenModule.h @@ -1314,6 +1314,7 @@ void EmitAliasDefinition(GlobalDecl GD); void emitIFuncDefinition(GlobalDecl GD); void emitCPUDispatchDefinition(GlobalDecl GD); + void EmitTargetClonesResolver(GlobalDecl GD); void EmitObjCPropertyImplementations(const ObjCImplementationDecl *D); void EmitObjCIvarInitializations(ObjCImplementationDecl *D); Index: lib/CodeGen/CodeGenFunction.cpp =================================================================== --- lib/CodeGen/CodeGenFunction.cpp +++ lib/CodeGen/CodeGenFunction.cpp @@ -2382,6 +2382,17 @@ return TrueCondition; } +llvm::Value *CodeGenFunction::FormResolverCondition( + const TargetClonesMultiVersionResolverOption &RO) { + if (RO.FeatureStr.startswith("arch=")) + return EmitX86CpuIs(RO.FeatureStr.drop_front(sizeof("arch=") - 1)); + + if (RO.FeatureStr == "default") + return nullptr; + + return EmitX86CpuSupports(RO.FeatureStr); +} + void CodeGenFunction::EmitTargetMultiVersionResolver( llvm::Function *Resolver, ArrayRef<TargetMultiVersionResolverOption> Options) { @@ -2456,6 +2467,36 @@ Builder.ClearInsertionPoint(); } +void CodeGenFunction::EmitTargetClonesMultiVersionResolver( + llvm::Function *Resolver, + ArrayRef<TargetClonesMultiVersionResolverOption> Options) { + assert((getContext().getTargetInfo().getTriple().getArch() == + llvm::Triple::x86 || + getContext().getTargetInfo().getTriple().getArch() == + llvm::Triple::x86_64) && + "Only implemented for x86 targets"); + llvm::BasicBlock *CurBlock = createBasicBlock("resolver_entry", Resolver); + Builder.SetInsertPoint(CurBlock); + EmitX86CpuInit(); + + for (const TargetClonesMultiVersionResolverOption &RO : Options) { + Builder.SetInsertPoint(CurBlock); + llvm::Value *TrueCondition = FormResolverCondition(RO); + + if (TrueCondition) { + llvm::BasicBlock *RetBlock = createBasicBlock("ro_ret", Resolver); + llvm::IRBuilder<> RetBuilder(RetBlock); + RetBuilder.CreateRet(RO.Function); + CurBlock = createBasicBlock("ro_else", Resolver); + Builder.CreateCondBr(TrueCondition, RetBlock, CurBlock); + } else { + // Emit the default version and end emission. + Builder.CreateRet(RO.Function); + return; + } + } +} + llvm::DebugLoc CodeGenFunction::SourceLocToDebugLoc(SourceLocation Location) { if (CGDebugInfo *DI = getDebugInfo()) return DI->SourceLocToDebugLoc(Location); Index: lib/AST/Decl.cpp =================================================================== --- lib/AST/Decl.cpp +++ lib/AST/Decl.cpp @@ -2928,6 +2928,9 @@ bool FunctionDecl::isCPUSpecificMultiVersion() const { return isMultiVersion() && hasAttr<CPUSpecificAttr>(); } +bool FunctionDecl::isTargetClonesMultiVersion() const { + return isMultiVersion() && hasAttr<TargetClonesAttr>(); +} void FunctionDecl::setPreviousDeclaration(FunctionDecl *PrevDecl) { Index: lib/Sema/SemaDecl.cpp =================================================================== --- lib/Sema/SemaDecl.cpp +++ lib/Sema/SemaDecl.cpp @@ -9330,7 +9330,7 @@ } namespace MultiVersioning { -enum Type { None, Target, CPUSpecific, CPUDispatch}; +enum Type { None, Target, CPUSpecific, CPUDispatch, TargetClones }; } // MultiVersionType static MultiVersioning::Type @@ -9341,6 +9341,8 @@ return MultiVersioning::CPUDispatch; if (FD->hasAttr<CPUSpecificAttr>()) return MultiVersioning::CPUSpecific; + if (FD->hasAttr<TargetClonesAttr>()) + return MultiVersioning::TargetClones; return MultiVersioning::None; } /// Check the target attribute of the function for MultiVersion @@ -9402,10 +9404,6 @@ Linkage = 5 }; - bool IsCPUSpecificCPUDispatchMVType = - MVType == MultiVersioning::CPUDispatch || - MVType == MultiVersioning::CPUSpecific; - if (OldFD && !OldFD->getType()->getAs<FunctionProtoType>()) { S.Diag(OldFD->getLocation(), diag::err_multiversion_noproto); S.Diag(NewFD->getLocation(), diag::note_multiversioning_caused_here); @@ -9427,56 +9425,56 @@ if (CausesMV && OldFD && std::distance(OldFD->attr_begin(), OldFD->attr_end()) != 1) { S.Diag(OldFD->getLocation(), diag::err_multiversion_no_other_attrs) - << IsCPUSpecificCPUDispatchMVType; + << (MVType - 1); S.Diag(NewFD->getLocation(), diag::note_multiversioning_caused_here); return true; } - if (std::distance(NewFD->attr_begin(), NewFD->attr_end()) != 1) + if (std::distance(NewFD->attr_begin(), NewFD->attr_end()) > 1) return S.Diag(NewFD->getLocation(), diag::err_multiversion_no_other_attrs) - << IsCPUSpecificCPUDispatchMVType; + << (MVType - 1); if (NewFD->getTemplatedKind() == FunctionDecl::TK_FunctionTemplate) return S.Diag(NewFD->getLocation(), diag::err_multiversion_doesnt_support) - << IsCPUSpecificCPUDispatchMVType << FuncTemplates; + << (MVType - 1) << FuncTemplates; if (const auto *NewCXXFD = dyn_cast<CXXMethodDecl>(NewFD)) { if (NewCXXFD->isVirtual()) return S.Diag(NewCXXFD->getLocation(), diag::err_multiversion_doesnt_support) - << IsCPUSpecificCPUDispatchMVType << VirtFuncs; + << (MVType - 1) << VirtFuncs; if (const auto *NewCXXCtor = dyn_cast<CXXConstructorDecl>(NewFD)) return S.Diag(NewCXXCtor->getLocation(), diag::err_multiversion_doesnt_support) - << IsCPUSpecificCPUDispatchMVType << Constructors; + << (MVType - 1) << Constructors; if (const auto *NewCXXDtor = dyn_cast<CXXDestructorDecl>(NewFD)) return S.Diag(NewCXXDtor->getLocation(), diag::err_multiversion_doesnt_support) - << IsCPUSpecificCPUDispatchMVType << Destructors; + << (MVType - 1) << Destructors; } if (NewFD->isDeleted()) return S.Diag(NewFD->getLocation(), diag::err_multiversion_doesnt_support) - << IsCPUSpecificCPUDispatchMVType << DeletedFuncs; + << (MVType - 1) << DeletedFuncs; if (NewFD->isDefaulted()) return S.Diag(NewFD->getLocation(), diag::err_multiversion_doesnt_support) - << IsCPUSpecificCPUDispatchMVType << DefaultedFuncs; + << (MVType - 1) << DefaultedFuncs; if (NewFD->isConstexpr() && (MVType == MultiVersioning::CPUDispatch || MVType == MultiVersioning::CPUSpecific)) return S.Diag(NewFD->getLocation(), diag::err_multiversion_doesnt_support) - << IsCPUSpecificCPUDispatchMVType << ConstexprFuncs; + << (MVType - 1) << ConstexprFuncs; QualType NewQType = S.getASTContext().getCanonicalType(NewFD->getType()); const auto *NewType = cast<FunctionType>(NewQType); QualType NewReturnType = NewType->getReturnType(); if (NewReturnType->isUndeducedType()) return S.Diag(NewFD->getLocation(), diag::err_multiversion_doesnt_support) - << IsCPUSpecificCPUDispatchMVType << DeducedReturn; + << (MVType - 1) << DeducedReturn; // Only allow transition to MultiVersion if it hasn't been used. if (OldFD && CausesMV && OldFD->isUsed(false)) @@ -9631,15 +9629,18 @@ Sema &S, FunctionDecl *OldFD, FunctionDecl *NewFD, MultiVersioning::Type NewMVType, const TargetAttr *NewTA, const CPUDispatchAttr *NewCPUDisp, const CPUSpecificAttr *NewCPUSpec, - bool &Redeclaration, NamedDecl *&OldDecl, bool &MergeTypeWithPrevious, - LookupResult &Previous) { + const TargetClonesAttr *NewClones, bool &Redeclaration, NamedDecl *&OldDecl, + bool &MergeTypeWithPrevious, LookupResult &Previous) { MultiVersioning::Type OldMVType = getMultiVersionType(OldFD); + // Disallow mixing of multiversioning types. - if ((OldMVType == MultiVersioning::Target && - NewMVType != MultiVersioning::Target) || - (NewMVType == MultiVersioning::Target && - OldMVType != MultiVersioning::Target)) { + if (OldMVType != MultiVersioning::None && + NewMVType != MultiVersioning::None && OldMVType != NewMVType && + !(OldMVType == MultiVersioning::CPUDispatch && + NewMVType == MultiVersioning::CPUSpecific) && + !(OldMVType == MultiVersioning::CPUSpecific && + NewMVType == MultiVersioning::CPUDispatch)) { S.Diag(NewFD->getLocation(), diag::err_multiversion_types_mixed); S.Diag(OldFD->getLocation(), diag::note_previous_declaration); NewFD->setInvalidDecl(); @@ -9681,6 +9682,25 @@ NewFD->setInvalidDecl(); return true; } + } else if (NewMVType == MultiVersioning::TargetClones) { + const auto *CurClones = CurFD->getAttr<TargetClonesAttr>(); + Redeclaration = true; + OldDecl = CurFD; + MergeTypeWithPrevious = true; + NewFD->setIsMultiVersion(); + + if (CurClones && NewClones && + (CurClones->featuresStrs_size() != NewClones->featuresStrs_size() || + !std::equal(CurClones->featuresStrs_begin(), + CurClones->featuresStrs_end(), + NewClones->featuresStrs_begin()))) { + S.Diag(NewFD->getLocation(), diag::err_target_clone_doesnt_match); + S.Diag(CurFD->getLocation(), diag::note_previous_declaration); + NewFD->setInvalidDecl(); + return true; + } + + return false; } else { const auto *CurCPUSpec = CurFD->getAttr<CPUSpecificAttr>(); const auto *CurCPUDisp = CurFD->getAttr<CPUDispatchAttr>(); @@ -9777,10 +9797,12 @@ const auto *NewTA = NewFD->getAttr<TargetAttr>(); const auto *NewCPUDisp = NewFD->getAttr<CPUDispatchAttr>(); const auto *NewCPUSpec = NewFD->getAttr<CPUSpecificAttr>(); + const auto *NewTargetClones = NewFD->getAttr<TargetClonesAttr>(); // Mixing Multiversioning types is prohibited. - if ((NewTA && NewCPUDisp) || (NewTA && NewCPUSpec) || - (NewCPUDisp && NewCPUSpec)) { + if ((static_cast<bool>(NewTA) + static_cast<bool>(NewCPUDisp) + + static_cast<bool>(NewCPUSpec) + static_cast<bool>(NewTargetClones)) > + 1) { S.Diag(NewFD->getLocation(), diag::err_multiversion_types_mixed); NewFD->setInvalidDecl(); return true; @@ -9791,9 +9813,8 @@ // Main isn't allowed to become a multiversion function, however it IS // permitted to have 'main' be marked with the 'target' optimization hint. if (NewFD->isMain()) { - if ((MVType == MultiVersioning::Target && NewTA->isDefaultVersion()) || - MVType == MultiVersioning::CPUDispatch || - MVType == MultiVersioning::CPUSpecific) { + if (MVType != MultiVersioning::None && + (MVType != MultiVersioning::Target || NewTA->isDefaultVersion())) { S.Diag(NewFD->getLocation(), diag::err_multiversion_not_allowed_on_main); NewFD->setInvalidDecl(); return true; @@ -9817,7 +9838,10 @@ if (!OldFD->isMultiVersion() && MVType == MultiVersioning::None) return false; - if (OldFD->isMultiVersion() && MVType == MultiVersioning::None) { + // MultiVersioned redeclarations aren't allowed to omit the attribute except + // for target_clones. + if (OldFD->isMultiVersion() && MVType == MultiVersioning::None && + getMultiVersionType(OldFD) != MultiVersioning::TargetClones) { S.Diag(NewFD->getLocation(), diag::err_multiversion_required_in_redecl) << (getMultiVersionType(OldFD) != MultiVersioning::Target); NewFD->setInvalidDecl(); @@ -9829,8 +9853,18 @@ return CheckTargetCausesMultiVersioning(S, OldFD, NewFD, NewTA, Redeclaration, OldDecl, MergeTypeWithPrevious, Previous); - // Previous declarations lack CPUDispatch/CPUSpecific. - if (!OldFD->isMultiVersion()) { + + if (!OldFD->isMultiVersion() && MVType == MultiVersioning::TargetClones) { + if (OldFD->isUsed(false)) { + NewFD->setInvalidDecl(); + return S.Diag(NewFD->getLocation(), diag::err_multiversion_after_used); + } + OldFD->setIsMultiVersion(); + } + + // Check if previous declarations lack CPUDispatch/CPUSpecific. + if (!OldFD->isMultiVersion() && (MVType == MultiVersioning::CPUDispatch || + MVType == MultiVersioning::CPUSpecific)) { S.Diag(OldFD->getLocation(), diag::err_multiversion_required_in_redecl) << 1; S.Diag(NewFD->getLocation(), diag::note_multiversioning_caused_here); @@ -9842,8 +9876,8 @@ // appropriate attribute in the current function decl. Resolve that these are // still compatible with previous declarations. return CheckMultiVersionAdditionalDecl( - S, OldFD, NewFD, MVType, NewTA, NewCPUDisp, NewCPUSpec, Redeclaration, - OldDecl, MergeTypeWithPrevious, Previous); + S, OldFD, NewFD, MVType, NewTA, NewCPUDisp, NewCPUSpec, NewTargetClones, + Redeclaration, OldDecl, MergeTypeWithPrevious, Previous); } /// Perform semantic checking of a new function declaration. Index: lib/Sema/SemaDeclAttr.cpp =================================================================== --- lib/Sema/SemaDeclAttr.cpp +++ lib/Sema/SemaDeclAttr.cpp @@ -2953,33 +2953,39 @@ bool Sema::checkTargetAttr(SourceLocation LiteralLoc, StringRef AttrStr) { enum FirstParam { Unsupported, Duplicate }; enum SecondParam { None, Architecture }; + enum FourthParam { Target, TargetClones }; for (auto Str : {"tune=", "fpmath="}) if (AttrStr.find(Str) != StringRef::npos) return Diag(LiteralLoc, diag::warn_unsupported_target_attribute) - << Unsupported << None << Str; + << Unsupported << None << Str << Target; TargetAttr::ParsedTargetAttr ParsedAttrs = TargetAttr::parse(AttrStr); if (!ParsedAttrs.Architecture.empty() && !Context.getTargetInfo().isValidCPUName(ParsedAttrs.Architecture)) return Diag(LiteralLoc, diag::warn_unsupported_target_attribute) - << Unsupported << Architecture << ParsedAttrs.Architecture; + << Unsupported << Architecture << ParsedAttrs.Architecture << Target; if (ParsedAttrs.DuplicateArchitecture) return Diag(LiteralLoc, diag::warn_unsupported_target_attribute) - << Duplicate << None << "arch="; + << Duplicate << None << "arch=" << Target; for (const auto &Feature : ParsedAttrs.Features) { auto CurFeature = StringRef(Feature).drop_front(); // remove + or -. if (!Context.getTargetInfo().isValidFeatureName(CurFeature)) return Diag(LiteralLoc, diag::warn_unsupported_target_attribute) - << Unsupported << None << CurFeature; + << Unsupported << None << CurFeature << Target; } return false; } static void handleTargetAttr(Sema &S, Decl *D, const ParsedAttr &AL) { + if (checkAttrMutualExclusion<TargetClonesAttr>(S, D, AL) || + checkAttrMutualExclusion<CPUDispatchAttr>(S, D, AL) || + checkAttrMutualExclusion<CPUSpecificAttr>(S, D, AL)) + return; + StringRef Str; SourceLocation LiteralLoc; if (!S.checkStringLiteralArgumentAttr(AL, 0, Str, &LiteralLoc) || @@ -2992,6 +2998,78 @@ D->addAttr(NewAttr); } +bool Sema::checkTargetClonesAttr(SourceLocation LiteralLoc, StringRef Str, + bool &HasDefault, + SmallVectorImpl<StringRef> &Strings) { + enum FirstParam { Unsupported, Duplicate }; + enum SecondParam { None, Architecture }; + enum FourthParam { Target, TargetClones }; + + // Warn on empty at the beginning of a string. + if (Str.size() == 0 || Str[0] == ',') + return Diag(LiteralLoc, diag::warn_unsupported_target_attribute) + << Unsupported << None << "" << TargetClones; + + while (Str.size() != 0) { + // remove the comma we found last time through. + if (Str[0] == ',') + Str = Str.substr(1); + + StringRef Cur{Str.data(), std::min(Str.find(','), Str.size())}; + Str = Str.substr(Cur.size()); + Cur = Cur.trim(); + + if (Cur.startswith("arch=")) { + if (!Context.getTargetInfo().isValidCPUName( + Cur.drop_front(sizeof("arch=") - 1))) + return Diag(LiteralLoc, diag::warn_unsupported_target_attribute) + << Unsupported << Architecture + << Cur.drop_front(sizeof("arch=") - 1) << TargetClones; + } else if (Cur == "default") { + HasDefault = true; + continue; + } else if (!Context.getTargetInfo().isValidFeatureName(Cur)) + return Diag(LiteralLoc, diag::warn_unsupported_target_attribute) + << Unsupported << None << Cur << TargetClones; + + Strings.push_back(Cur); + } + + return false; +} + +static void handleTargetClonesAttr(Sema &S, Decl *D, const ParsedAttr &AL) { + if (checkAttrMutualExclusion<TargetAttr>(S, D, AL) || + checkAttrMutualExclusion<CPUDispatchAttr>(S, D, AL) || + checkAttrMutualExclusion<CPUSpecificAttr>(S, D, AL)) + return; + + SmallVector<StringRef, 2> Strings; + bool HasDefault = false; + + for (unsigned I = 0, E = AL.getNumArgs(); I != E; ++I) { + StringRef CurStr; + SourceLocation LiteralLoc; + if (!S.checkStringLiteralArgumentAttr(AL, I, CurStr, &LiteralLoc) || + S.checkTargetClonesAttr(LiteralLoc, CurStr, HasDefault, Strings)) + return; + } + + if (!HasDefault) { + S.Diag(AL.getLoc(), diag::err_target_clone_must_have_default); + return; + } + + Strings.push_back("default"); + + FunctionDecl *FD = cast<FunctionDecl>(D); + FD->setIsMultiVersion(true); + unsigned Index = AL.getAttributeSpellingListIndex(); + TargetClonesAttr *NewAttr = ::new (S.Context) TargetClonesAttr( + AL.getRange(), S.Context, Strings.data(), Strings.size(), Index); + D->addAttr(NewAttr); +} + static void handleMinVectorWidthAttr(Sema &S, Decl *D, const ParsedAttr &AL) { Expr *E = AL.getArgAsExpr(0); uint32_t VecWidth; @@ -6292,6 +6370,9 @@ case ParsedAttr::AT_Target: handleTargetAttr(S, D, AL); break; + case ParsedAttr::AT_TargetClones: + handleTargetClonesAttr(S, D, AL); + break; case ParsedAttr::AT_MinVectorWidth: handleMinVectorWidthAttr(S, D, AL); break; Index: include/clang/AST/Decl.h =================================================================== --- include/clang/AST/Decl.h +++ include/clang/AST/Decl.h @@ -2225,6 +2225,9 @@ /// True if this function is a multiversioned processor specific function as a /// part of the cpu_specific/cpu_dispatch functionality. bool isCPUSpecificMultiVersion() const; + /// True if this function is a multiversioned function specified with the + /// attribute target_clones. + bool isTargetClonesMultiVersion() const; void setPreviousDeclaration(FunctionDecl * PrevDecl); Index: include/clang/Sema/Sema.h =================================================================== --- include/clang/Sema/Sema.h +++ include/clang/Sema/Sema.h @@ -3355,6 +3355,9 @@ SourceLocation *ArgLocation = nullptr); bool checkSectionName(SourceLocation LiteralLoc, StringRef Str); bool checkTargetAttr(SourceLocation LiteralLoc, StringRef Str); + bool checkTargetClonesAttr(SourceLocation LiteralLoc, StringRef Str, + bool &HasDefault, + SmallVectorImpl<StringRef> &Strings); bool checkMSInheritanceAttrOnDefinition( CXXRecordDecl *RD, SourceRange Range, bool BestCase, MSInheritanceAttr::Spelling SemanticSpelling); Index: include/clang/Basic/DiagnosticSemaKinds.td =================================================================== --- include/clang/Basic/DiagnosticSemaKinds.td +++ include/clang/Basic/DiagnosticSemaKinds.td @@ -2494,7 +2494,8 @@ "%0 attribute requires OpenCL version %1%select{| or above}2">; def warn_unsupported_target_attribute : Warning<"%select{unsupported|duplicate}0%select{| architecture}1 '%2' in" - " the 'target' attribute string; 'target' attribute ignored">, + " the '%select{target|target_clones}3' attribute string; " + "'%select{target|target_clones}3' attribute ignored">, InGroup<IgnoredAttributes>; def err_attribute_unsupported : Error<"%0 attribute is not supported for this target">; @@ -9413,8 +9414,8 @@ def err_multiversion_noproto : Error< "multiversioned function must have a prototype">; def err_multiversion_no_other_attrs : Error< - "attribute '%select{target|cpu_specific|cpu_dispatch}0' multiversioning cannot be combined" - " with other attributes">; + "attribute '%select{target|cpu_specific|cpu_dispatch|target_clones}0'" + " multiversioning cannot be combined with other attributes">; def err_multiversion_diff : Error< "multiversioned function declaration has a different %select{calling convention" "|return type|constexpr specification|inline specification|storage class|" @@ -9441,6 +9442,11 @@ "body of cpu_dispatch function will be ignored">, InGroup<FunctionMultiVersioning>; +def err_target_clone_must_have_default : Error < + "'target_clones' multiversioning requires a default target">; +def err_target_clone_doesnt_match : Error < + "'target_clones' attribute does not match previous declaration">; + // three-way comparison operator diagnostics def err_implied_comparison_category_type_not_found : Error< "cannot deduce return type of 'operator<=>' because type '%0' was not found; " Index: include/clang/Basic/AttrDocs.td =================================================================== --- include/clang/Basic/AttrDocs.td +++ include/clang/Basic/AttrDocs.td @@ -1594,6 +1594,32 @@ }]; } +def TargetClonesDocs : Documentation { + let Category = DocCatFunction; + let Content = [{ +Clang supports the GNU style ``__attribute__((target_clones("OPTIONS")))`` +attribute. This attribute may be attached to a function definition and causes +function multiversioning, where multiple versions of the function will be +emitted with different code generation options. Additionally, these versions +will be resolved at runtime based on the priority of their attribute options. +All ``target_clone`` functions are considered multiversioned functions. + +All multiversioned functions must contain a ``default`` (fallback) +implementation, otherwise usages of the function are considered invalid. +Additionally, a function may not become multiversioned after its first use. + +Note that unlike the ``target`` syntax, every option listed creates a new +version, desregarding whether it is split on a comma inside or outside a string. +The following will emit 4 versions of the function. + + .. code-block:: c++ + + __attribute__((target_clones("arch=atom,avx2","arch=ivybridge","default"))) + void foo() {} + + }]; +} + def MinVectorWidthDocs : Documentation { let Category = DocCatFunction; let Content = [{ Index: include/clang/Basic/Attr.td =================================================================== --- include/clang/Basic/Attr.td +++ include/clang/Basic/Attr.td @@ -2099,6 +2099,31 @@ }]; } +def TargetClones : InheritableAttr { + let Spellings = [GCC<"target_clones">]; + let Args = [VariadicStringArgument<"featuresStrs">]; + let Documentation = [TargetClonesDocs]; + let Subjects = SubjectList<[Function], ErrorDiag>; + let AdditionalMembers = [{ + unsigned ActiveArgIndex = 0; + void AdvanceActiveArgIndex() { + ++ActiveArgIndex; + while(ActiveArgIndex < featuresStrs_size()) { + if (std::find(featuresStrs_begin(), + featuresStrs_begin() + ActiveArgIndex, + *(featuresStrs_begin() + ActiveArgIndex)) + == (featuresStrs_begin() + ActiveArgIndex)) + return; + ++ActiveArgIndex; + } + } + + StringRef getCurFeatureStr() const { + return *(featuresStrs_begin() + ActiveArgIndex); + } + }]; +} + def MinVectorWidth : InheritableAttr { let Spellings = [Clang<"min_vector_width">]; let Args = [UnsignedArgument<"VectorWidth">]; Index: test/CodeGen/attr-cpuspecific.c =================================================================== --- test/CodeGen/attr-cpuspecific.c +++ test/CodeGen/attr-cpuspecific.c @@ -11,8 +11,10 @@ void SingleVersion(void){} // CHECK: define void @SingleVersion.S() #[[S:[0-9]+]] -__attribute__((cpu_specific(ivybridge))) -void NotCalled(void){} +__attribute__((cpu_specific(ivybridge))) inline void InlineSingleVersion(void) {} +// CHECK: define available_externally void @InlineSingleVersion.S() #[[S:[0-9]+]] + +__attribute__((cpu_specific(ivybridge))) void NotCalled(void) {} // CHECK: define void @NotCalled.S() #[[S]] // Done before any of the implementations. Index: test/CodeGen/attr-target-clones.c =================================================================== --- test/CodeGen/attr-target-clones.c +++ test/CodeGen/attr-target-clones.c @@ -0,0 +1,62 @@ +// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s + +// CHECK: @foo.ifunc = ifunc i32 (), i32 ()* ()* @foo.resolver +// CHECK: @foo_inline.ifunc = ifunc i32 (), i32 ()* ()* @foo_inline.resolver +// CHECK: @foo_decls.ifunc = ifunc void (), void ()* ()* @foo_decls.resolver +// CHECK: @unused.ifunc = ifunc void (), void ()* ()* @unused.resolver + +int __attribute__((target_clones("sse4.2, default"))) foo(void) { return 0; } +// CHECK: define i32 @foo.sse4.2.0() +// CHECK: define i32 @foo.default.1() +// CHECK: define i32 ()* @foo.resolver() +// CHECK: ret i32 ()* @foo.sse4.2.0 +// CHECK: ret i32 ()* @foo.default.1 + +int bar() { + // CHECK: define i32 @bar() + return foo(); + // CHECK: call i32 @foo.ifunc() +} + +inline int __attribute__((target_clones("arch=sandybridge,default,sse4.2"))) +foo_inline(void) { return 0; } +// CHECK: define available_externally i32 @foo_inline.arch_sandybridge.0() #[[SB:[0-9]+]] +// CHECK: define available_externally i32 @foo_inline.sse4.2.1() #[[SSE42:[0-9]+]] +// CHECK: define available_externally i32 @foo_inline.default.2() #[[DEF:[0-9]+]] +// CHECK: define i32 ()* @foo_inline.resolver() +// CHECK: ret i32 ()* @foo_inline.arch_sandybridge.0 +// CHECK: ret i32 ()* @foo_inline.sse4.2.1 +// CHECK: ret i32 ()* @foo_inline.default.2 + +int bar2() { + // CHECK: define i32 @bar2() + return foo_inline(); + // CHECK: call i32 @foo_inline.ifunc() +} + +inline __attribute__((target_clones("default,default ,sse4.2"))) void foo_decls(void) {} +// CHECK: define available_externally void @foo_decls.sse4.2.0() +// CHECK: define available_externally void @foo_decls.default.1() +// CHECK: define void ()* @foo_decls.resolver() +// CHECK: ret void ()* @foo_decls.sse4.2.0 +// CHECK: ret void ()* @foo_decls.default.1 + +void bar3() { + // CHECK: define void @bar3() + foo_decls(); + // CHECK: call void @foo_decls.ifunc() +} + +void __attribute__((target_clones("default, arch=ivybridge"))) unused(void) {} +// CHECK: define void @unused.arch_ivybridge.0() +// CHECK: define void @unused.default.1() +// CHECK: define void ()* @unused.resolver() +// CHECK: ret void ()* @unused.arch_ivybridge.0 +// CHECK: ret void ()* @unused.default.1 + +// CHECK: attributes #[[SSE42]] = +// CHECK-SAME: "target-features"="+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87" +// CHECK: attributes #[[DEF]] = +// CHECK-SAME: "target-features"="+mmx,+sse,+sse2,+x87" +// CHECK: attributes #[[SB]] = +// CHECK-SAME: "target-features"="+aes,+avx,+cx16,+fxsr,+mmx,+pclmul,+popcnt,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt" Index: test/Misc/pragma-attribute-supported-attributes-list.test =================================================================== --- test/Misc/pragma-attribute-supported-attributes-list.test +++ test/Misc/pragma-attribute-supported-attributes-list.test @@ -2,7 +2,7 @@ // The number of supported attributes should never go down! -// CHECK: #pragma clang attribute supports 75 attributes: +// CHECK: #pragma clang attribute supports 76 attributes: // CHECK-NEXT: AMDGPUFlatWorkGroupSize (SubjectMatchRule_function) // CHECK-NEXT: AMDGPUNumSGPR (SubjectMatchRule_function) // CHECK-NEXT: AMDGPUNumVGPR (SubjectMatchRule_function) @@ -73,6 +73,7 @@ // CHECK-NEXT: SwiftIndirectResult (SubjectMatchRule_variable_is_parameter) // CHECK-NEXT: TLSModel (SubjectMatchRule_variable_is_thread_local) // CHECK-NEXT: Target (SubjectMatchRule_function) +// CHECK-NEXT: TargetClones (SubjectMatchRule_function) // CHECK-NEXT: TestTypestate (SubjectMatchRule_function_is_member) // CHECK-NEXT: TrivialABI (SubjectMatchRule_record) // CHECK-NEXT: WarnUnusedResult (SubjectMatchRule_objc_method, SubjectMatchRule_enum, SubjectMatchRule_record, SubjectMatchRule_hasType_functionType) Index: test/Sema/attr-target-clones.c =================================================================== --- test/Sema/attr-target-clones.c +++ test/Sema/attr-target-clones.c @@ -0,0 +1,72 @@ +// RUN: %clang_cc1 -triple x86_64-linux-gnu -fsyntax-only -verify %s + +// expected-error@+1 {{'target_clones' multiversioning requires a default target}} +void __attribute__((target_clones("sse4.2", "arch=sandybridge"))) +no_default(void); + +// expected-error@+2 {{'target_clones' and 'target' attributes are not compatible}} +// expected-note@+1 {{conflicting attribute is here}} +void __attribute__((target("sse4.2"), target_clones("arch=sandybridge"))) +ignored_attr(void); +// expected-error@+2 {{'target' and 'target_clones' attributes are not compatible}} +// expected-note@+1 {{conflicting attribute is here}} +void __attribute__((target_clones("arch=sandybridge,default"), target("sse4.2"))) +ignored_attr2(void); + +int redecl(void); +int __attribute__((target_clones("sse4.2", "default"))) redecl(void) { return 1; } + +int __attribute__((target_clones("sse4.2", "default"))) redecl2(void); +int __attribute__((target_clones("sse4.2", "default"))) redecl2(void) { return 1; } + +int __attribute__((target_clones("sse4.2", "default"))) redecl3(void); +int redecl3(void); + +int __attribute__((target_clones("sse4.2", "arch=atom", "default"))) redecl4(void); +// expected-error@+3 {{'target_clones' attribute does not match previous declaration}} +// expected-note@-2 {{previous declaration is here}} +int __attribute__((target_clones("sse4.2", "arch=sandybridge", "default"))) +redecl4(void) { return 1; } + +int __attribute__((target("sse4.2"))) redef2(void) { return 1; } +// expected-error@+2 {{multiversioning attributes cannot be combined}} +// expected-note@-2 {{previous declaration is here}} +int __attribute__((target_clones("sse4.2", "default"))) redef2(void) { return 1; } + +int __attribute__((target_clones("sse4.2,default"))) redef3(void) { return 1; } +// expected-error@+2 {{redefinition of 'redef3'}} +// expected-note@-2 {{previous definition is here}} +int __attribute__((target_clones("sse4.2,default"))) redef3(void) { return 1; } + +int __attribute__((target_clones("sse4.2,default"))) redef4(void) { return 1; } +// expected-error@+2 {{redefinition of 'redef4'}} +// expected-note@-2 {{previous definition is here}} +int __attribute__((target_clones("sse4.2,default"))) redef4(void) { return 1; } + +// No error here... duplicates are allowed because they alter name mangling. +int __attribute__((target_clones("arch=atom,arch=atom", "arch=atom,default"))) +dupes(void) { return 1; } + +// expected-warning@+1 {{unsupported '' in the 'target_clones' attribute string;}} +void __attribute__((target_clones(""))) +empty_target_1(void); +// expected-warning@+1 {{unsupported '' in the 'target_clones' attribute string;}} +void __attribute__((target_clones(",default"))) +empty_target_2(void); +// expected-warning@+1 {{unsupported '' in the 'target_clones' attribute string;}} +void __attribute__((target_clones("default,"))) +empty_target_3(void); +// expected-warning@+1 {{unsupported '' in the 'target_clones' attribute string;}} +void __attribute__((target_clones("default, ,avx2"))) +empty_target_4(void); + +// expected-warning@+1 {{unsupported '' in the 'target_clones' attribute string;}} +void __attribute__((target_clones("default,avx2", ""))) +empty_target_5(void); + +int mv_after_use(void); +int useage() { + return mv_after_use(); +} +// expected-error@+1 {{function declaration cannot become a multiversioned function after first usage}} +int __attribute__((target_clones("sse4.2", "default"))) mv_after_use(void) { return 1; }
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits