jlebar created this revision. jlebar added a reviewer: tra. jlebar added a subscriber: cfe-commits.
This is particularly important because a some convergent CUDA intrinsics (e.g. __shfl_down) are implemented in terms of inline asm. http://reviews.llvm.org/D20836 Files: lib/CodeGen/CGStmt.cpp test/CodeGenCUDA/convergent.cu Index: test/CodeGenCUDA/convergent.cu =================================================================== --- test/CodeGenCUDA/convergent.cu +++ test/CodeGenCUDA/convergent.cu @@ -25,13 +25,19 @@ __host__ __device__ void bar() { // DEVICE: call void @_Z3bazv() [[CALL_ATTR:#[0-9]+]] baz(); + // DEVICE: call i32 asm "trap;", "=l"() [[ASM_ATTR:#[0-9]+]] + int x; + asm ("trap;" : "=l"(x)); + // DEVICE: call void asm sideeffect "trap;", ""() [[ASM_ATTR:#[0-9]+]] + asm volatile ("trap;"); } // DEVICE: declare void @_Z3bazv() [[BAZ_ATTR:#[0-9]+]] // DEVICE: attributes [[BAZ_ATTR]] = { // DEVICE-SAME: convergent // DEVICE-SAME: } // DEVICE: attributes [[CALL_ATTR]] = { convergent } +// DEVICE: attributes [[ASM_ATTR]] = { convergent // HOST: declare void @_Z3bazv() [[BAZ_ATTR:#[0-9]+]] // HOST: attributes [[BAZ_ATTR]] = { Index: lib/CodeGen/CGStmt.cpp =================================================================== --- lib/CodeGen/CGStmt.cpp +++ lib/CodeGen/CGStmt.cpp @@ -2054,6 +2054,14 @@ llvm::ConstantAsMetadata::get(Loc))); } + if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) { + // Conservatively, mark all inline asm blocks in CUDA as convergent + // (meaning, they may call an intrinsically convergent op, such as bar.sync, + // and so can't have certain optimizations applied around them). + Result->addAttribute(llvm::AttributeSet::FunctionIndex, + llvm::Attribute::Convergent); + } + // Extract all of the register value results from the asm. std::vector<llvm::Value*> RegResults; if (ResultRegTypes.size() == 1) {
Index: test/CodeGenCUDA/convergent.cu =================================================================== --- test/CodeGenCUDA/convergent.cu +++ test/CodeGenCUDA/convergent.cu @@ -25,13 +25,19 @@ __host__ __device__ void bar() { // DEVICE: call void @_Z3bazv() [[CALL_ATTR:#[0-9]+]] baz(); + // DEVICE: call i32 asm "trap;", "=l"() [[ASM_ATTR:#[0-9]+]] + int x; + asm ("trap;" : "=l"(x)); + // DEVICE: call void asm sideeffect "trap;", ""() [[ASM_ATTR:#[0-9]+]] + asm volatile ("trap;"); } // DEVICE: declare void @_Z3bazv() [[BAZ_ATTR:#[0-9]+]] // DEVICE: attributes [[BAZ_ATTR]] = { // DEVICE-SAME: convergent // DEVICE-SAME: } // DEVICE: attributes [[CALL_ATTR]] = { convergent } +// DEVICE: attributes [[ASM_ATTR]] = { convergent // HOST: declare void @_Z3bazv() [[BAZ_ATTR:#[0-9]+]] // HOST: attributes [[BAZ_ATTR]] = { Index: lib/CodeGen/CGStmt.cpp =================================================================== --- lib/CodeGen/CGStmt.cpp +++ lib/CodeGen/CGStmt.cpp @@ -2054,6 +2054,14 @@ llvm::ConstantAsMetadata::get(Loc))); } + if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) { + // Conservatively, mark all inline asm blocks in CUDA as convergent + // (meaning, they may call an intrinsically convergent op, such as bar.sync, + // and so can't have certain optimizations applied around them). + Result->addAttribute(llvm::AttributeSet::FunctionIndex, + llvm::Attribute::Convergent); + } + // Extract all of the register value results from the asm. std::vector<llvm::Value*> RegResults; if (ResultRegTypes.size() == 1) {
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits