d.zobnin.bugzilla updated this revision to Diff 53400.
d.zobnin.bugzilla added a comment.

Eric, thank you for the review!

I've renamed the test, but have also made it a .cpp file for the reasons 
described below.

You are right -- GCC looks at "target" attribute applied to the function in 
which inline asm exists. But GCC seems to take into account lambda's attributes 
as well, if asm statement is inside a lambda. To be correct, GCC seems to 
collect all target features from command line, function and lambda's 
attributes, so such code:

  void f8(void *x) __attribute__((__target__("no-avx")));
  void f8(void *x) {
    auto g1 = [](void *arg) __attribute__((__target__("avx2"))) {
      auto g2 = [&arg]() __attribute__((__target__("no-mmx"))) {
        int a = 10, b;
        __asm__ ("%vmovq (%%rbp), %%xmm0" : "=r"(b) : "r"(a) : "%eax");
      };
      g2();
    };
    g1(x);
  }

will produce "vmovq" instruction regardless of command line arguments because 
of g1's attribute.

I tried several approaches to implement this behavior and came up with this 
patch, please take a look!


http://reviews.llvm.org/D18700

Files:
  include/clang/AST/Stmt.h
  lib/AST/Stmt.cpp
  lib/Sema/SemaStmtAsm.cpp
  test/CodeGen/avx-v-modifier-inline-asm.cpp

Index: lib/AST/Stmt.cpp
===================================================================
--- lib/AST/Stmt.cpp
+++ lib/AST/Stmt.cpp
@@ -632,6 +632,14 @@
 
       CurPtr = NameEnd+1;
       continue;
+    } else if (*Begin == 'v') {
+      // GCC accepts code staring with "%v", e. g. "%vpcmpestri" and transforms
+      // it into "vpcmpestri" instruction if target processor supports AVX and
+      // into "pcmpestri" otherwise.
+      if (SupportsAVX)
+        CurStringPiece = "v" + CurStringPiece;
+      CurStringPiece += EscapedChar;
+      continue;
     }
 
     DiagOffs = CurPtr-StrStart-1;
@@ -682,13 +690,15 @@
 //===----------------------------------------------------------------------===//
 
 GCCAsmStmt::GCCAsmStmt(const ASTContext &C, SourceLocation asmloc,
-                       bool issimple, bool isvolatile, unsigned numoutputs,
-                       unsigned numinputs, IdentifierInfo **names,
-                       StringLiteral **constraints, Expr **exprs,
-                       StringLiteral *asmstr, unsigned numclobbers,
-                       StringLiteral **clobbers, SourceLocation rparenloc)
-  : AsmStmt(GCCAsmStmtClass, asmloc, issimple, isvolatile, numoutputs,
-            numinputs, numclobbers), RParenLoc(rparenloc), AsmStr(asmstr) {
+                       bool issimple, bool isvolatile, bool supportsavx,
+                       unsigned numoutputs, unsigned numinputs,
+                       IdentifierInfo **names, StringLiteral **constraints,
+                       Expr **exprs, StringLiteral *asmstr,
+                       unsigned numclobbers, StringLiteral **clobbers,
+                       SourceLocation rparenloc)
+    : AsmStmt(GCCAsmStmtClass, asmloc, issimple, isvolatile, numoutputs,
+              numinputs, numclobbers),
+      RParenLoc(rparenloc), AsmStr(asmstr), SupportsAVX(supportsavx) {
 
   unsigned NumExprs = NumOutputs + NumInputs;
 
Index: lib/Sema/SemaStmtAsm.cpp
===================================================================
--- lib/Sema/SemaStmtAsm.cpp
+++ lib/Sema/SemaStmtAsm.cpp
@@ -138,6 +138,75 @@
   return false;
 }
 
+// Collect all target features specified for the current context by the "target"
+// attribute. Start from current DeclContext and iterate its parents until we
+// find a function context.
+static void CollectTargetFeatures(Sema &S,
+                                  std::vector<std::string> &TargetFeatures,
+                                  StringRef &TargetCPU) {
+  DeclContext *DC = S.CurContext;
+
+  auto Collect = [&TargetFeatures, &TargetCPU](const FunctionDecl *FD) {
+    if (!FD)
+      return;
+    if (const auto *TA = FD->getAttr<TargetAttr>()) {
+      TargetAttr::ParsedTargetAttr ParsedAttr = TA->parse();
+      TargetFeatures.insert(TargetFeatures.begin(), ParsedAttr.first.begin(),
+                            ParsedAttr.first.end());
+      if (TargetCPU == "")
+        TargetCPU = ParsedAttr.second;
+    }
+  };
+
+  // Climb up the tree of nested DeclContext-s, collecting the target features.
+  while (true) {
+    if (isa<BlockDecl>(DC) || isa<EnumDecl>(DC) || isa<CapturedDecl>(DC)) {
+      DC = DC->getParent();
+    } else if (const auto *MD = dyn_cast<CXXMethodDecl>(DC)) {
+      // Check if it's a lambda and collect its target features.
+      if (MD->getOverloadedOperator() == OO_Call &&
+          cast<CXXRecordDecl>(DC->getParent())->isLambda()) {
+        Collect(MD);
+      }
+      DC = DC->getParent()->getParent();
+    } else {
+      // It's the nearest function, collect its features and break.
+      Collect(dyn_cast<FunctionDecl>(DC));
+      break;
+    }
+  }
+}
+
+// Check whether a particular target feature is enabled in current context,
+// either by command-line options or by __attribute__((__target__("..."))),
+// applied to any surrounding lambda or function.
+static bool TargetFeatureEnabled(Sema &S, StringRef Feature) {
+  const TargetInfo &TI = S.Context.getTargetInfo();
+  llvm::StringMap<bool> FeatureMap;
+  StringRef TargetCPU;
+  std::vector<std::string> TargetFeatures;
+
+  // Collect target features and CPU from surrounding declarations.
+  CollectTargetFeatures(S, TargetFeatures, TargetCPU);
+
+  // Don't forget to add command-line features and CPU (lowest priority).
+  TargetFeatures.insert(TargetFeatures.begin(),
+                        TI.getTargetOpts().FeaturesAsWritten.begin(),
+                        TI.getTargetOpts().FeaturesAsWritten.end());
+  if (TargetCPU == "")
+    TargetCPU = TI.getTargetOpts().CPU;
+
+  // Process the feature vector.
+  TI.initFeatureMap(FeatureMap, S.getDiagnostics(), TargetCPU, TargetFeatures);
+
+  // See if the requested feature is present and enabled.
+  for (const auto & F : FeatureMap)
+    if (F.getValue() && F.getKey().equals(Feature))
+      return true;
+
+  return false;
+}
+
 StmtResult Sema::ActOnGCCAsmStmt(SourceLocation AsmLoc, bool IsSimple,
                                  bool IsVolatile, unsigned NumOutputs,
                                  unsigned NumInputs, IdentifierInfo **Names,
@@ -150,17 +219,24 @@
   StringLiteral *AsmString = cast<StringLiteral>(asmString);
   StringLiteral **Clobbers = reinterpret_cast<StringLiteral**>(clobbers.data());
 
+  // Determine if current asm statement exists inside a function with enabled
+  // "AVX" target feature. Use this information to resolve GCC-style statements
+  // with "%v" prefix, transforming "%vfoo" into "vfoo" with enabled AVX and
+  // into "foo" otherwise.
+  bool SupportsAVX = TargetFeatureEnabled(*this, "avx");
+
   SmallVector<TargetInfo::ConstraintInfo, 4> OutputConstraintInfos;
 
   // The parser verifies that there is a string literal here.
   assert(AsmString->isAscii());
 
   // If we're compiling CUDA file and function attributes indicate that it's not
   // for this compilation side, skip all the checks.
   if (!DeclAttrsMatchCUDAMode(getLangOpts(), getCurFunctionDecl())) {
-    GCCAsmStmt *NS = new (Context) GCCAsmStmt(
-        Context, AsmLoc, IsSimple, IsVolatile, NumOutputs, NumInputs, Names,
-        Constraints, Exprs.data(), AsmString, NumClobbers, Clobbers, RParenLoc);
+    GCCAsmStmt *NS = new (Context)
+        GCCAsmStmt(Context, AsmLoc, IsSimple, IsVolatile, SupportsAVX,
+                   NumOutputs, NumInputs, Names, Constraints, Exprs.data(),
+                   AsmString, NumClobbers, Clobbers, RParenLoc);
     return NS;
   }
 
@@ -344,10 +420,10 @@
                   diag::err_asm_unknown_register_name) << Clobber);
   }
 
-  GCCAsmStmt *NS =
-    new (Context) GCCAsmStmt(Context, AsmLoc, IsSimple, IsVolatile, NumOutputs,
-                             NumInputs, Names, Constraints, Exprs.data(),
-                             AsmString, NumClobbers, Clobbers, RParenLoc);
+  GCCAsmStmt *NS = new (Context)
+      GCCAsmStmt(Context, AsmLoc, IsSimple, IsVolatile, SupportsAVX, NumOutputs,
+                 NumInputs, Names, Constraints, Exprs.data(), AsmString,
+                 NumClobbers, Clobbers, RParenLoc);
   // Validate the asm string, ensuring it makes sense given the operands we
   // have.
   SmallVector<GCCAsmStmt::AsmStringPiece, 8> Pieces;
Index: include/clang/AST/Stmt.h
===================================================================
--- include/clang/AST/Stmt.h
+++ include/clang/AST/Stmt.h
@@ -1550,18 +1550,24 @@
   StringLiteral **Clobbers;
   IdentifierInfo **Names;
 
+  // Indicates whether this statement exists inside a function with enabled AVX
+  // target feature (by attribute or invocation options).
+  bool SupportsAVX;
+
   friend class ASTStmtReader;
 
 public:
   GCCAsmStmt(const ASTContext &C, SourceLocation asmloc, bool issimple,
-             bool isvolatile, unsigned numoutputs, unsigned numinputs,
-             IdentifierInfo **names, StringLiteral **constraints, Expr **exprs,
-             StringLiteral *asmstr, unsigned numclobbers,
-             StringLiteral **clobbers, SourceLocation rparenloc);
+             bool isvolatile, bool supportsavx, unsigned numoutputs,
+             unsigned numinputs, IdentifierInfo **names,
+             StringLiteral **constraints, Expr **exprs, StringLiteral *asmstr,
+             unsigned numclobbers, StringLiteral **clobbers,
+             SourceLocation rparenloc);
 
   /// \brief Build an empty inline-assembly statement.
-  explicit GCCAsmStmt(EmptyShell Empty) : AsmStmt(GCCAsmStmtClass, Empty),
-    Constraints(nullptr), Clobbers(nullptr), Names(nullptr) { }
+  explicit GCCAsmStmt(EmptyShell Empty)
+      : AsmStmt(GCCAsmStmtClass, Empty), Constraints(nullptr),
+        Clobbers(nullptr), Names(nullptr), SupportsAVX(false) {}
 
   SourceLocation getRParenLoc() const { return RParenLoc; }
   void setRParenLoc(SourceLocation L) { RParenLoc = L; }
Index: test/CodeGen/avx-v-modifier-inline-asm.cpp
===================================================================
--- test/CodeGen/avx-v-modifier-inline-asm.cpp
+++ test/CodeGen/avx-v-modifier-inline-asm.cpp
@@ -0,0 +1,86 @@
+// RUN: %clang_cc1 -std=c++11 -triple x86_64-unknown-linux -target-feature +avx -emit-llvm -S %s -o - | FileCheck %s -check-prefix=CHECK-AVX
+// RUN: %clang_cc1 -std=c++11 -triple x86_64-unknown-linux -emit-llvm -S %s -o - | FileCheck %s
+
+void f1(void *arg) {
+  __asm__ ("%vpcmpestri $0, (%1), %2" : "=c"(arg) : "r"(arg), "x"(arg));
+  // CHECK-LABEL: f1
+  // CHECK: pcmpestri
+  // CHECK-AVX: vpcmpestri
+}
+
+// Regardless of compiler's invocation options, we should have "vpcmpestri".
+void f2(void *arg) __attribute__((__target__("avx")));
+void f2(void *arg) {
+  __asm__ ("%vpcmpestri $0, (%1), %2" : "=c"(arg) : "r"(arg), "x"(arg));
+  // CHECK-LABEL: f2
+  // CHECK: vpcmpestri
+  // CHECK-AVX: vpcmpestri
+}
+
+// avx2 enables avx, so always expect "vpcmpestri".
+void f3(void *arg) __attribute__((__target__("avx2")));
+void f3(void *arg) {
+  __asm__ ("%vpcmpestri $0, (%1), %2" : "=c"(arg) : "r"(arg), "x"(arg));
+  // CHECK-LABEL: f3
+  // CHECK: vpcmpestri
+  // CHECK-AVX: vpcmpestri
+}
+
+// Here we explicitly disable avx, so always expect "pcmpestri".
+void f4(void *arg) __attribute__((__target__("no-avx")));
+void f4(void *arg) {
+  __asm__ ("%vpcmpestri $0, (%1), %2" : "=c"(arg) : "r"(arg), "x"(arg));
+  // CHECK-LABEL: f4
+  // CHECK: pcmpestri
+  // CHECK-AVX: pcmpestri
+}
+
+// Check that enabling features other than avx don't affect the expected logic.
+void f5(void *arg) __attribute__((__target__("mmx"))) __attribute__((__target__("sse4.1")));
+void f5(void *arg) {
+  __asm__ ("%vpcmpestri $0, (%1), %2" : "=c"(arg) : "r"(arg), "x"(arg));
+  // CHECK-LABEL: f5
+  // CHECK: pcmpestri
+  // CHECK-AVX: vpcmpestri
+}
+
+void f6() {
+  int a = 10, b;
+  __asm__ ("%vmovq (%%rbp), %%xmm0" : "=r"(b) : "r"(a) : "%eax");
+  // CHECK-LABEL: f6
+  // CHECK: movq
+  // CHECK-AVX: vmovq
+}
+
+// Lambda's attributes also counts.
+void f7(void *x) {
+  auto f = [](void *arg) __attribute__((__target__("avx"))) {
+    __asm__("%vpcmpestri $0, (%1), %2" : "=c"(arg) : "r"(arg), "x"(arg));
+  };
+
+  // CHECK-LABEL: f7
+  // CHECK: vpcmpestri
+  // CHECK-AVX: vpcmpestri
+
+  f(x);
+}
+
+// Check influence of attributes of nested scopes. Here command line enables
+// avx, f8 disables, then g1 enables again, so always expect "vmovq".
+void f8(void *x) __attribute__((__target__("no-avx")));
+void f8(void *x) {
+  auto g1 = [](void *arg) __attribute__((__target__("avx2"))) {
+    auto g2 = [&arg]() __attribute__((__target__("no-mmx"))) {
+      int a = 10, b;
+      __asm__ ("%vmovq (%%rbp), %%xmm0" : "=r"(b) : "r"(a) : "%eax");
+
+      // CHECK-LABEL: f8
+      // CHECK: vmovq
+      // CHECK-AVX: vmovq
+    };
+
+    g2();
+  };
+
+  g1(x);
+}
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to