Author: Matt Arsenault
Date: 2023-01-30T15:03:14-04:00
New Revision: 00f6a7f02f9c8d542ce8ff1c9c037d9fdb421b88

URL: 
https://github.com/llvm/llvm-project/commit/00f6a7f02f9c8d542ce8ff1c9c037d9fdb421b88
DIFF: 
https://github.com/llvm/llvm-project/commit/00f6a7f02f9c8d542ce8ff1c9c037d9fdb421b88.diff

LOG: clang/OpenCL: Fix not setting convergent on block invoke kernels

Yet another example how convergent not being the default is dangerous
and backwards.

Added: 
    

Modified: 
    clang/lib/CodeGen/TargetInfo.cpp
    clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
    clang/test/CodeGenOpenCL/cl20-device-side-enqueue.cl

Removed: 
    


################################################################################
diff  --git a/clang/lib/CodeGen/TargetInfo.cpp 
b/clang/lib/CodeGen/TargetInfo.cpp
index 1b80529e36a72..7e08d42e866ff 100644
--- a/clang/lib/CodeGen/TargetInfo.cpp
+++ b/clang/lib/CodeGen/TargetInfo.cpp
@@ -12451,6 +12451,7 @@ llvm::Value 
*TargetCodeGenInfo::createEnqueuedBlockKernel(
 
   // FIXME: Apply default attributes
   F->addFnAttr(llvm::Attribute::NoUnwind);
+  F->addFnAttr(llvm::Attribute::Convergent);
 
   Builder.CreateRetVoid();
   Builder.restoreIP(IP);
@@ -12504,6 +12505,7 @@ llvm::Value 
*AMDGPUTargetCodeGenInfo::createEnqueuedBlockKernel(
 
   // FIXME: Apply default attributes
   F->addFnAttr(llvm::Attribute::NoUnwind);
+  F->addFnAttr(llvm::Attribute::Convergent);
   F->addFnAttr("enqueued-block");
 
   auto IP = CGF.Builder.saveIP();

diff  --git a/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl 
b/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
index 4277dbbc20530..17c5fc6132856 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
@@ -182,7 +182,7 @@ kernel void test(global char *a, char b, global long *c, 
long d) {
 // CHECK-NEXT:    ret void
 //
 //
-// CHECK: Function Attrs: nounwind
+// CHECK: Function Attrs: convergent nounwind
 // CHECK-LABEL: define {{[^@]+}}@__test_block_invoke_kernel
 // CHECK-SAME: (<{ i32, i32, ptr, ptr addrspace(1), i8 }> [[TMP0:%.*]]) 
#[[ATTR4:[0-9]+]] !kernel_arg_addr_space !7 !kernel_arg_access_qual !8 
!kernel_arg_type !9 !kernel_arg_base_type !9 !kernel_arg_type_qual !10 {
 // CHECK-NEXT:  entry:
@@ -216,7 +216,7 @@ kernel void test(global char *a, char b, global long *c, 
long d) {
 // CHECK-NEXT:    ret void
 //
 //
-// CHECK: Function Attrs: nounwind
+// CHECK: Function Attrs: convergent nounwind
 // CHECK-LABEL: define {{[^@]+}}@__test_block_invoke_2_kernel
 // CHECK-SAME: (<{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 
}> [[TMP0:%.*]]) #[[ATTR4]] !kernel_arg_addr_space !7 !kernel_arg_access_qual 
!8 !kernel_arg_type !9 !kernel_arg_base_type !9 !kernel_arg_type_qual !10 {
 // CHECK-NEXT:  entry:
@@ -255,7 +255,7 @@ kernel void test(global char *a, char b, global long *c, 
long d) {
 // CHECK-NEXT:    ret void
 //
 //
-// CHECK: Function Attrs: nounwind
+// CHECK: Function Attrs: convergent nounwind
 // CHECK-LABEL: define {{[^@]+}}@__test_block_invoke_3_kernel
 // CHECK-SAME: (<{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 
}> [[TMP0:%.*]], ptr addrspace(3) [[TMP1:%.*]]) #[[ATTR4]] 
!kernel_arg_addr_space !11 !kernel_arg_access_qual !12 !kernel_arg_type !13 
!kernel_arg_base_type !13 !kernel_arg_type_qual !14 {
 // CHECK-NEXT:  entry:
@@ -282,7 +282,7 @@ kernel void test(global char *a, char b, global long *c, 
long d) {
 // CHECK-NEXT:    ret void
 //
 //
-// CHECK: Function Attrs: nounwind
+// CHECK: Function Attrs: convergent nounwind
 // CHECK-LABEL: define {{[^@]+}}@__test_block_invoke_4_kernel
 // CHECK-SAME: (<{ i32, i32, ptr, i64, ptr addrspace(1) }> [[TMP0:%.*]]) 
#[[ATTR4]] !kernel_arg_addr_space !7 !kernel_arg_access_qual !8 
!kernel_arg_type !9 !kernel_arg_base_type !9 !kernel_arg_type_qual !10 {
 // CHECK-NEXT:  entry:
@@ -297,7 +297,7 @@ kernel void test(global char *a, char b, global long *c, 
long d) {
 // CHECK: attributes #1 = { convergent noinline norecurse nounwind optnone 
"amdgpu-flat-work-group-size"="1,256" "no-trapping-math"="true" 
"stack-protector-buffer-size"="8" "target-cpu"="gfx900" 
"target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
 "uniform-work-group-size"="false" }
 // CHECK: attributes #2 = { nocallback nofree nounwind willreturn 
memory(argmem: readwrite) }
 // CHECK: attributes #3 = { convergent noinline nounwind optnone 
"no-trapping-math"="true" "stack-protector-buffer-size"="8" 
"target-cpu"="gfx900" 
"target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
 }
-// CHECK: attributes #4 = { nounwind "enqueued-block" }
+// CHECK: attributes #4 = { convergent nounwind "enqueued-block" }
 // CHECK: attributes #5 = { convergent nounwind }
 //.
 // CHECK: !0 = !{i32 1, !"amdgpu_code_object_version", i32 400}

diff  --git a/clang/test/CodeGenOpenCL/cl20-device-side-enqueue.cl 
b/clang/test/CodeGenOpenCL/cl20-device-side-enqueue.cl
index 3cfb5f55e5d21..bce1a922668a1 100644
--- a/clang/test/CodeGenOpenCL/cl20-device-side-enqueue.cl
+++ b/clang/test/CodeGenOpenCL/cl20-device-side-enqueue.cl
@@ -297,7 +297,7 @@ kernel void device_side_enqueue(global int *a, global int 
*b, int i) {
   };
 
   // Uses global block literal [[BLG8]] and invoke function [[INVG8]].
-  // COMMON: call spir_func void @__device_side_enqueue_block_invoke_11(ptr 
addrspace(4) addrspacecast (ptr addrspace(1) [[BLG8]] to ptr addrspace(4)))
+  // COMMON: call spir_func void @__device_side_enqueue_block_invoke_11(ptr 
addrspace(4) addrspacecast (ptr addrspace(1) [[BLG8]] to ptr addrspace(4))) 
[[INVOKE_ATTR:#[0-9]+]]
   block_A();
 
   // Emits global block literal [[BLG8]] and block kernel [[INVGK8]]. 
[[INVGK8]] calls [[INVG8]].
@@ -393,7 +393,7 @@ kernel void device_side_enqueue(global int *a, global int 
*b, int i) {
 // COMMON:  ret void
 // COMMON: }
 // COMMON: define spir_kernel void [[INVLK2]](ptr addrspace(4){{.*}})
-// COMMON: define spir_kernel void [[INVGK1]](ptr addrspace(4){{.*}}, ptr 
addrspace(3){{.*}})
+// COMMON: define spir_kernel void [[INVGK1]](ptr addrspace(4){{.*}}, ptr 
addrspace(3){{.*}})  [[INVOKE_ATTR:#[0-9]+]]
 // COMMON: define spir_kernel void [[INVGK2]](ptr addrspace(4){{.*}}, ptr 
addrspace(3){{.*}})
 // COMMON: define spir_kernel void [[INVGK3]](ptr addrspace(4){{.*}}, ptr 
addrspace(3){{.*}})
 // COMMON: define spir_kernel void [[INVGK4]](ptr addrspace(4){{.*}}, ptr 
addrspace(3){{.*}})
@@ -412,3 +412,5 @@ kernel void device_side_enqueue(global int *a, global int 
*b, int i) {
 // COMMON: define spir_kernel void [[INVGK9]](ptr addrspace(4){{.*}}, ptr 
addrspace(3){{.*}})
 // COMMON: define spir_kernel void [[INVGK10]](ptr addrspace(4){{.*}})
 // COMMON: define spir_kernel void [[INVGK11]](ptr addrspace(4){{.*}})
+
+// COMMON: attributes [[INVOKE_ATTR]] = { convergent nounwind }


        
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to