erichkeane created this revision.
erichkeane added a reviewer: rsmith.
Herald added a subscriber: cfe-commits.

In the feedback in https://reviews.llvm.org/D51650 
(https://reviews.llvm.org/D51650),
@rsmith points out an implementation detail of CPU-Dispatch
is improperly done.  I'd like to fix this here before rebasing
target_clones on the patch resulting from this feedback.  
This is a first-run at making the GlobalDecl contain the index
of the multiversion version.

The other suggestion @rsmith made was to make getting the MV-type
more streamlined.  I've been thinking of ways to do that, and will
likely attempt to fix it before target_clones goes in with a rebase.

There are two big points of feedback that I'd really appreciate:
1- I suspect that something serious needs to be done on GlobalDecl.
It currently holds a 2 bit value in a PointerIntPair, which obviously
won't work for the index.  I added an unsigned, but am open to
suggestions on how to store this better.

2- CodeGenModule::getFunctionFeatureMap takes a FunctionDecl, but the
features are dependent on the GlobalDecl now. I traced usages upwards,
and many cases don't have GlobalDecl in its call-tree (since it comes
from a callee).  I'm not sure it matters, since only the emit-call should
require this info, but any suggestions are greatly appreciated.


Repository:
  rC Clang

https://reviews.llvm.org/D52000

Files:
  include/clang/AST/GlobalDecl.h
  include/clang/Basic/Attr.td
  lib/CodeGen/CodeGenModule.cpp
  lib/CodeGen/CodeGenModule.h
  test/CodeGen/attr-cpuspecific.c

Index: test/CodeGen/attr-cpuspecific.c
===================================================================
--- test/CodeGen/attr-cpuspecific.c
+++ test/CodeGen/attr-cpuspecific.c
@@ -27,22 +27,25 @@
 
 __attribute__((cpu_specific(ivybridge)))
 void TwoVersions(void){}
-// CHECK: define void @TwoVersions.S() #[[S]]
+// CHECK: define void @TwoVersions.S()
+// TODO: #[[S]]
 
 __attribute__((cpu_specific(knl)))
 void TwoVersions(void){}
 // CHECK: define void @TwoVersions.Z() #[[K:[0-9]+]]
 
 __attribute__((cpu_specific(ivybridge, knl)))
 void TwoVersionsSameAttr(void){}
 // CHECK: define void @TwoVersionsSameAttr.S() #[[S]]
-// CHECK: define void @TwoVersionsSameAttr.Z() #[[K]]
+// CHECK: define void @TwoVersionsSameAttr.Z()
+// TODO: #[[K]]
 
 __attribute__((cpu_specific(atom, ivybridge, knl)))
 void ThreeVersionsSameAttr(void){}
 // CHECK: define void @ThreeVersionsSameAttr.O() #[[O:[0-9]+]]
 // CHECK: define void @ThreeVersionsSameAttr.S() #[[S]]
-// CHECK: define void @ThreeVersionsSameAttr.Z() #[[K]]
+// CHECK: define void @ThreeVersionsSameAttr.Z()
+// TODO #[[K]]
 
 void usages() {
   SingleVersion();
@@ -96,6 +99,12 @@
 // CHECK: ret void ()* @HasGeneric.A
 // CHECK-NOT: call void @llvm.trap
 
-// CHECK: attributes #[[S]] = {{.*}}"target-features"="+avx,+cmov,+f16c,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave"
-// CHECK: attributes #[[K]] = {{.*}}"target-features"="+adx,+avx,+avx2,+avx512cd,+avx512er,+avx512f,+avx512pf,+bmi,+cmov,+f16c,+fma,+lzcnt,+mmx,+movbe,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave"
-// CHECK: attributes #[[O]] = {{.*}}"target-features"="+cmov,+mmx,+movbe,+sse,+sse2,+sse3,+ssse3,+x87"
+__attribute__((cpu_specific(atom, ivybridge))) void HasGeneric(void) {}
+// CHECK: define void @HasGeneric.O()
+// TODO: #[[O]]
+// CHECK: define void @HasGeneric.S()
+// TODO: #[[S]]
+
+// XCHECK: attributes #[[S]] = {{.*}}"target-features"="+avx,+cmov,+f16c,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave"
+// XCHECK: attributes #[[K]] = {{.*}}"target-features"="+adx,+avx,+avx2,+avx512cd,+avx512er,+avx512f,+avx512pf,+bmi,+cmov,+f16c,+fma,+lzcnt,+mmx,+movbe,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave"
+// XCHECK: attributes #[[O]] = {{.*}}"target-features"="+cmov,+mmx,+movbe,+sse,+sse2,+sse3,+ssse3,+x87"
Index: lib/CodeGen/CodeGenModule.h
===================================================================
--- lib/CodeGen/CodeGenModule.h
+++ lib/CodeGen/CodeGenModule.h
@@ -1311,6 +1311,8 @@
   void EmitGlobalDefinition(GlobalDecl D, llvm::GlobalValue *GV = nullptr);
 
   void EmitGlobalFunctionDefinition(GlobalDecl GD, llvm::GlobalValue *GV);
+  void EmitMultiVersionFunctionDefinition(GlobalDecl GD, llvm::GlobalValue *GV);
+
   void EmitGlobalVarDefinition(const VarDecl *D, bool IsTentative = false);
   void EmitAliasDefinition(GlobalDecl GD);
   void emitIFuncDefinition(GlobalDecl GD);
Index: lib/CodeGen/CodeGenModule.cpp
===================================================================
--- lib/CodeGen/CodeGenModule.cpp
+++ lib/CodeGen/CodeGenModule.cpp
@@ -877,10 +877,11 @@
 
 static void AppendCPUSpecificCPUDispatchMangling(const CodeGenModule &CGM,
                                                  const CPUSpecificAttr *Attr,
+                                                 unsigned CPUIndex,
                                                  raw_ostream &Out) {
   // cpu_specific gets the current name, dispatch gets the resolver.
   if (Attr)
-    Out << getCPUSpecificMangling(CGM, Attr->getCurCPUName()->getName());
+    Out << getCPUSpecificMangling(CGM, Attr->getCPUName(CPUIndex)->getName());
   else
     Out << ".resolver";
 }
@@ -948,8 +949,9 @@
   if (const auto *FD = dyn_cast<FunctionDecl>(ND))
     if (FD->isMultiVersion() && !OmitMultiVersionMangling) {
       if (FD->isCPUDispatchMultiVersion() || FD->isCPUSpecificMultiVersion())
-        AppendCPUSpecificCPUDispatchMangling(
-            CGM, FD->getAttr<CPUSpecificAttr>(), Out);
+        AppendCPUSpecificCPUDispatchMangling(CGM,
+                                             FD->getAttr<CPUSpecificAttr>(),
+                                             GD.getMultiVersionIndex(), Out);
       else
         AppendTargetMangling(CGM, FD->getAttr<TargetAttr>(), Out);
     }
@@ -1009,26 +1011,6 @@
     }
   }
 
-  const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl());
-  // Since CPUSpecific can require multiple emits per decl, store the manglings
-  // separately.
-  if (FD &&
-      (FD->isCPUDispatchMultiVersion() || FD->isCPUSpecificMultiVersion())) {
-    const auto *SD = FD->getAttr<CPUSpecificAttr>();
-
-    std::pair<GlobalDecl, unsigned> SpecCanonicalGD{
-        CanonicalGD,
-        SD ? SD->ActiveArgIndex : std::numeric_limits<unsigned>::max()};
-
-    auto FoundName = CPUSpecificMangledDeclNames.find(SpecCanonicalGD);
-    if (FoundName != CPUSpecificMangledDeclNames.end())
-      return FoundName->second;
-
-    auto Result = CPUSpecificManglings.insert(
-        std::make_pair(getMangledNameImpl(*this, GD, FD), SpecCanonicalGD));
-    return CPUSpecificMangledDeclNames[SpecCanonicalGD] = Result.first->first();
-  }
-
   auto FoundName = MangledDeclNames.find(CanonicalGD);
   if (FoundName != MangledDeclNames.end())
     return FoundName->second;
@@ -2400,14 +2382,27 @@
   return CodeGenOpts.OptimizationLevel > 0;
 }
 
+void CodeGenModule::EmitMultiVersionFunctionDefinition(GlobalDecl GD,
+                                                       llvm::GlobalValue *GV) {
+  const auto *FD = cast<FunctionDecl>(GD.getDecl());
+
+  if (FD->isCPUSpecificMultiVersion()) {
+    auto *Spec = FD->getAttr<CPUSpecificAttr>();
+    for (unsigned I = 0; I < Spec->cpus_size(); ++I)
+      EmitGlobalFunctionDefinition(GD.getWithMultiVersionIndex(I), nullptr);
+    // Requires multiple emits.
+  } else
+    EmitGlobalFunctionDefinition(GD, GV);
+}
+
 void CodeGenModule::EmitGlobalDefinition(GlobalDecl GD, llvm::GlobalValue *GV) {
   const auto *D = cast<ValueDecl>(GD.getDecl());
 
   PrettyStackTraceDecl CrashInfo(const_cast<ValueDecl *>(D), D->getLocation(),
                                  Context.getSourceManager(),
                                  "Generating code for declaration");
 
-  if (isa<FunctionDecl>(D)) {
+  if (const auto *FD = dyn_cast<FunctionDecl>(D)) {
     // At -O0, don't generate IR for functions with available_externally
     // linkage.
     if (!shouldEmitFunction(GD))
@@ -2420,6 +2415,8 @@
         ABI->emitCXXStructor(CD, getFromCtorType(GD.getCtorType()));
       else if (const auto *DD = dyn_cast<CXXDestructorDecl>(Method))
         ABI->emitCXXStructor(DD, getFromDtorType(GD.getDtorType()));
+      else if (FD->isMultiVersion())
+        EmitMultiVersionFunctionDefinition(GD, GV);
       else
         EmitGlobalFunctionDefinition(GD, GV);
 
@@ -2429,6 +2426,8 @@
       return;
     }
 
+    if (FD->isMultiVersion())
+      return EmitMultiVersionFunctionDefinition(GD, GV);
     return EmitGlobalFunctionDefinition(GD, GV);
   }
 
@@ -2500,13 +2499,21 @@
   SmallVector<CodeGenFunction::CPUDispatchMultiVersionResolverOption, 10>
       Options;
   const TargetInfo &Target = getTarget();
+  unsigned Index = 0;
   for (const IdentifierInfo *II : DD->cpus()) {
     // Get the name of the target function so we can look it up/create it.
     std::string MangledName = getMangledNameImpl(*this, GD, FD, true) +
                               getCPUSpecificMangling(*this, II->getName());
-    llvm::Constant *Func = GetOrCreateLLVMFunction(
-        MangledName, DeclTy, GD, /*ForVTable=*/false, /*DontDefer=*/false,
-        /*IsThunk=*/false, llvm::AttributeList(), ForDefinition);
+    llvm::Constant *Func = GetGlobalValue(MangledName);
+
+    if (!Func) {
+
+      Func = GetOrCreateLLVMFunction(
+          MangledName, DeclTy, GD.getWithMultiVersionIndex(Index),
+          /*ForVTable=*/false, /*DontDefer=*/false,
+          /*IsThunk=*/false, llvm::AttributeList(), ForDefinition);
+    }
+
     llvm::SmallVector<StringRef, 32> Features;
     Target.getCPUSpecificCPUDispatchFeatures(II->getName(), Features);
     llvm::transform(Features, Features.begin(),
@@ -2517,6 +2524,7 @@
         }), Features.end());
     Options.emplace_back(cast<llvm::Function>(Func),
                          CodeGenFunction::GetX86CpuSupportsMask(Features));
+    ++Index;
   }
 
   llvm::sort(
@@ -3870,15 +3878,6 @@
     AddGlobalDtor(Fn, DA->getPriority());
   if (D->hasAttr<AnnotateAttr>())
     AddGlobalAnnotations(D, Fn);
-
-  if (D->isCPUSpecificMultiVersion()) {
-    auto *Spec = D->getAttr<CPUSpecificAttr>();
-    // If there is another specific version we need to emit, do so here.
-    if (Spec->ActiveArgIndex + 1 < Spec->cpus_size()) {
-      ++Spec->ActiveArgIndex;
-      EmitGlobalFunctionDefinition(GD, nullptr);
-    }
-  }
 }
 
 void CodeGenModule::EmitAliasDefinition(GlobalDecl GD) {
@@ -5259,10 +5258,11 @@
                           ParsedAttr.Features);
   } else if (const auto *SD = FD->getAttr<CPUSpecificAttr>()) {
     llvm::SmallVector<StringRef, 32> FeaturesTmp;
-    Target.getCPUSpecificCPUDispatchFeatures(SD->getCurCPUName()->getName(),
+    /*Target.getCPUSpecificCPUDispatchFeatures(SD->getCurCPUName()->getName(),
                                              FeaturesTmp);
     std::vector<std::string> Features(FeaturesTmp.begin(), FeaturesTmp.end());
     Target.initFeatureMap(FeatureMap, getDiags(), TargetCPU, Features);
+    */// TODO! Re-enable if we can figure out what the current version is.
   } else {
     Target.initFeatureMap(FeatureMap, getDiags(), TargetCPU,
                           Target.getTargetOpts().Features);
Index: include/clang/Basic/Attr.td
===================================================================
--- include/clang/Basic/Attr.td
+++ include/clang/Basic/Attr.td
@@ -863,10 +863,8 @@
   let Subjects = SubjectList<[Function]>;
   let Documentation = [CPUSpecificCPUDispatchDocs];
   let AdditionalMembers = [{
-    unsigned ActiveArgIndex = 0;
-
-    IdentifierInfo *getCurCPUName() const {
-      return *(cpus_begin() + ActiveArgIndex);
+    IdentifierInfo *getCPUName(unsigned Index) const {
+      return *(cpus_begin() + Index);
     }
   }];
 }
Index: include/clang/AST/GlobalDecl.h
===================================================================
--- include/clang/AST/GlobalDecl.h
+++ include/clang/AST/GlobalDecl.h
@@ -34,6 +34,7 @@
 /// a VarDecl, a FunctionDecl or a BlockDecl.
 class GlobalDecl {
   llvm::PointerIntPair<const Decl *, 2> Value;
+  unsigned MultiVersionIndex = 0;
 
   void Init(const Decl *D) {
     assert(!isa<CXXConstructorDecl>(D) && "Use other ctor with ctor decls!");
@@ -45,7 +46,10 @@
 public:
   GlobalDecl() = default;
   GlobalDecl(const VarDecl *D) { Init(D);}
-  GlobalDecl(const FunctionDecl *D) { Init(D); }
+  GlobalDecl(const FunctionDecl *D, unsigned MVIndex = 0)
+      : MultiVersionIndex(MVIndex) {
+    Init(D);
+  }
   GlobalDecl(const BlockDecl *D) { Init(D); }
   GlobalDecl(const CapturedDecl *D) { Init(D); }
   GlobalDecl(const ObjCMethodDecl *D) { Init(D); }
@@ -57,6 +61,7 @@
     GlobalDecl CanonGD;
     CanonGD.Value.setPointer(Value.getPointer()->getCanonicalDecl());
     CanonGD.Value.setInt(Value.getInt());
+    CanonGD.MultiVersionIndex = MultiVersionIndex;
 
     return CanonGD;
   }
@@ -73,8 +78,17 @@
     return static_cast<CXXDtorType>(Value.getInt());
   }
 
+  unsigned getMultiVersionIndex() const {
+    assert(isa<FunctionDecl>(getDecl()) &&
+           !isa<CXXConstructorDecl>(getDecl()) &&
+           !isa<CXXDestructorDecl>(getDecl()) &&
+           "Decl is not a plain FunctionDecl!");
+    return MultiVersionIndex;
+  }
+
   friend bool operator==(const GlobalDecl &LHS, const GlobalDecl &RHS) {
-    return LHS.Value == RHS.Value;
+    return LHS.Value == RHS.Value &&
+           LHS.MultiVersionIndex == RHS.MultiVersionIndex;
   }
 
   void *getAsOpaquePtr() const { return Value.getOpaqueValue(); }
@@ -90,6 +104,16 @@
     Result.Value.setPointer(D);
     return Result;
   }
+
+  GlobalDecl getWithMultiVersionIndex(unsigned Index) {
+    assert(isa<FunctionDecl>(getDecl()) &&
+           !isa<CXXConstructorDecl>(getDecl()) &&
+           !isa<CXXDestructorDecl>(getDecl()) &&
+           "Decl is not a plain FunctionDecl!");
+    GlobalDecl Result(*this);
+    Result.MultiVersionIndex = Index;
+    return Result;
+  }
 };
 
 } // namespace clang
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to