jlebar created this revision.
jlebar added a reviewer: tra.
jlebar added a subscriber: cfe-commits.

This is particularly important because a some convergent CUDA intrinsics
(e.g.  __shfl_down) are implemented in terms of inline asm.

http://reviews.llvm.org/D20836

Files:
  lib/CodeGen/CGStmt.cpp
  test/CodeGenCUDA/convergent.cu

Index: test/CodeGenCUDA/convergent.cu
===================================================================
--- test/CodeGenCUDA/convergent.cu
+++ test/CodeGenCUDA/convergent.cu
@@ -25,13 +25,19 @@
 __host__ __device__ void bar() {
   // DEVICE: call void @_Z3bazv() [[CALL_ATTR:#[0-9]+]]
   baz();
+  // DEVICE: call i32 asm "trap;", "=l"() [[ASM_ATTR:#[0-9]+]]
+  int x;
+  asm ("trap;" : "=l"(x));
+  // DEVICE: call void asm sideeffect "trap;", ""() [[ASM_ATTR:#[0-9]+]]
+  asm volatile ("trap;");
 }
 
 // DEVICE: declare void @_Z3bazv() [[BAZ_ATTR:#[0-9]+]]
 // DEVICE: attributes [[BAZ_ATTR]] = {
 // DEVICE-SAME: convergent
 // DEVICE-SAME: }
 // DEVICE: attributes [[CALL_ATTR]] = { convergent }
+// DEVICE: attributes [[ASM_ATTR]] = { convergent
 
 // HOST: declare void @_Z3bazv() [[BAZ_ATTR:#[0-9]+]]
 // HOST: attributes [[BAZ_ATTR]] = {
Index: lib/CodeGen/CGStmt.cpp
===================================================================
--- lib/CodeGen/CGStmt.cpp
+++ lib/CodeGen/CGStmt.cpp
@@ -2054,6 +2054,14 @@
                                           llvm::ConstantAsMetadata::get(Loc)));
   }
 
+  if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) {
+    // Conservatively, mark all inline asm blocks in CUDA as convergent
+    // (meaning, they may call an intrinsically convergent op, such as 
bar.sync,
+    // and so can't have certain optimizations applied around them).
+    Result->addAttribute(llvm::AttributeSet::FunctionIndex,
+                         llvm::Attribute::Convergent);
+  }
+
   // Extract all of the register value results from the asm.
   std::vector<llvm::Value*> RegResults;
   if (ResultRegTypes.size() == 1) {


Index: test/CodeGenCUDA/convergent.cu
===================================================================
--- test/CodeGenCUDA/convergent.cu
+++ test/CodeGenCUDA/convergent.cu
@@ -25,13 +25,19 @@
 __host__ __device__ void bar() {
   // DEVICE: call void @_Z3bazv() [[CALL_ATTR:#[0-9]+]]
   baz();
+  // DEVICE: call i32 asm "trap;", "=l"() [[ASM_ATTR:#[0-9]+]]
+  int x;
+  asm ("trap;" : "=l"(x));
+  // DEVICE: call void asm sideeffect "trap;", ""() [[ASM_ATTR:#[0-9]+]]
+  asm volatile ("trap;");
 }
 
 // DEVICE: declare void @_Z3bazv() [[BAZ_ATTR:#[0-9]+]]
 // DEVICE: attributes [[BAZ_ATTR]] = {
 // DEVICE-SAME: convergent
 // DEVICE-SAME: }
 // DEVICE: attributes [[CALL_ATTR]] = { convergent }
+// DEVICE: attributes [[ASM_ATTR]] = { convergent
 
 // HOST: declare void @_Z3bazv() [[BAZ_ATTR:#[0-9]+]]
 // HOST: attributes [[BAZ_ATTR]] = {
Index: lib/CodeGen/CGStmt.cpp
===================================================================
--- lib/CodeGen/CGStmt.cpp
+++ lib/CodeGen/CGStmt.cpp
@@ -2054,6 +2054,14 @@
                                           llvm::ConstantAsMetadata::get(Loc)));
   }
 
+  if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) {
+    // Conservatively, mark all inline asm blocks in CUDA as convergent
+    // (meaning, they may call an intrinsically convergent op, such as bar.sync,
+    // and so can't have certain optimizations applied around them).
+    Result->addAttribute(llvm::AttributeSet::FunctionIndex,
+                         llvm::Attribute::Convergent);
+  }
+
   // Extract all of the register value results from the asm.
   std::vector<llvm::Value*> RegResults;
   if (ResultRegTypes.size() == 1) {
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to