llvmbot wrote:

<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-amdgpu

Author: Aniket Lal (lalaniket8)

<details>
<summary>Changes</summary>

OpenCL Kernels body is emitted as stubs and the kernel is emitted as call to 
respective stub. (https://github.com/llvm/llvm-project/pull/115821).
The stub function should be alwaysinlined, since call to stub can cause 
performance drop.

---

Patch is 239.87 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/137769.diff


11 Files Affected:

- (modified) clang/lib/CodeGen/CodeGenModule.cpp (+8) 
- (modified) clang/test/CodeGenOpenCL/addr-space-struct-arg.cl (+349-82) 
- (modified) clang/test/CodeGenOpenCL/amdgpu-abi-struct-arg-byref.cl (+66-10) 
- (modified) clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl (+7-7) 
- (modified) clang/test/CodeGenOpenCL/cl-uniform-wg-size.cl (-4) 
- (modified) clang/test/CodeGenOpenCL/cl20-device-side-enqueue-attributes.cl 
(+83-21) 
- (modified) clang/test/CodeGenOpenCL/cl20-device-side-enqueue.cl (+9-15) 
- (modified) clang/test/CodeGenOpenCL/convergent.cl (+5-4) 
- (modified) clang/test/CodeGenOpenCL/enqueue-kernel-non-entry-block.cl (+2-9) 
- (modified) clang/test/CodeGenOpenCL/opencl-kernel-call.cl (+445-114) 
- (modified) clang/test/CodeGenOpenCL/sampler.cl (-4) 


``````````diff
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp 
b/clang/lib/CodeGen/CodeGenModule.cpp
index e917f3c42da06..384c4f3627212 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -6174,6 +6174,14 @@ void 
CodeGenModule::EmitGlobalFunctionDefinition(GlobalDecl GD,
   CodeGenFunction(*this).GenerateCode(GD, Fn, FI);
 
   setNonAliasAttributes(GD, Fn);
+
+  if (D->hasAttr<OpenCLKernelAttr>())
+    if (GD.getKernelReferenceKind() == KernelReferenceKind::Stub &&
+        !Fn->hasFnAttribute(llvm::Attribute::NoInline) &&
+        !Fn->hasFnAttribute(llvm::Attribute::InlineHint) &&
+        !Fn->hasFnAttribute(llvm::Attribute::OptimizeNone))
+      Fn->addFnAttr(llvm::Attribute::AlwaysInline);
+
   SetLLVMFunctionAttributesForDefinition(D, Fn);
 
   if (const ConstructorAttr *CA = D->getAttr<ConstructorAttr>())
diff --git a/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl 
b/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl
index 49604c6c5e61b..58c358672dd0e 100644
--- a/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl
+++ b/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl
@@ -128,18 +128,29 @@ kernel void KernelLargeTwoMember(struct 
LargeStructTwoMember u) {
 // X86-LABEL: define spir_kernel void @ker(
 // X86-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) 
noundef align 4 [[OUT:%.*]]) #[[ATTR1:[0-9]+]] !kernel_arg_addr_space 
[[META4:![0-9]+]] !kernel_arg_access_qual [[META5:![0-9]+]] !kernel_arg_type 
[[META6:![0-9]+]] !kernel_arg_base_type [[META6]] !kernel_arg_type_qual 
[[META7:![0-9]+]] {
 // X86-NEXT:  [[ENTRY:.*:]]
+// X86-NEXT:    [[IN_ADDR_I:%.*]] = alloca ptr addrspace(1), align 4
+// X86-NEXT:    [[OUT_ADDR_I:%.*]] = alloca ptr addrspace(1), align 4
+// X86-NEXT:    [[TMP_I:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4
+// X86-NEXT:    [[BYVAL_TEMP_I:%.*]] = alloca [[STRUCT_MAT3X3:%.*]], align 4
 // X86-NEXT:    [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 4
 // X86-NEXT:    [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 4
 // X86-NEXT:    store ptr addrspace(1) [[IN]], ptr [[IN_ADDR]], align 4
 // X86-NEXT:    store ptr addrspace(1) [[OUT]], ptr [[OUT_ADDR]], align 4
 // X86-NEXT:    [[TMP0:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR]], align 4
 // X86-NEXT:    [[TMP1:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR]], align 4
-// X86-NEXT:    call void @__clang_ocl_kern_imp_ker(ptr addrspace(1) noundef 
align 4 [[TMP0]], ptr addrspace(1) noundef align 4 [[TMP1]]) #[[ATTR3:[0-9]+]]
+// X86-NEXT:    store ptr addrspace(1) [[TMP0]], ptr [[IN_ADDR_I]], align 4
+// X86-NEXT:    store ptr addrspace(1) [[TMP1]], ptr [[OUT_ADDR_I]], align 4
+// X86-NEXT:    [[TMP2:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_I]], 
align 4
+// X86-NEXT:    [[TMP3:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR_I]], align 
4
+// X86-NEXT:    [[ARRAYIDX1_I:%.*]] = getelementptr inbounds 
[[STRUCT_MAT3X3]], ptr addrspace(1) [[TMP3]], i32 1
+// X86-NEXT:    call void @llvm.memcpy.p0.p1.i32(ptr align 4 [[BYVAL_TEMP_I]], 
ptr addrspace(1) align 4 [[ARRAYIDX1_I]], i32 36, i1 false)
+// X86-NEXT:    call void @foo(ptr dead_on_unwind writable 
sret([[STRUCT_MAT4X4]]) align 4 [[TMP_I]], ptr noundef byval([[STRUCT_MAT3X3]]) 
align 4 [[BYVAL_TEMP_I]]) #[[ATTR4:[0-9]+]]
+// X86-NEXT:    call void @llvm.memcpy.p1.p0.i32(ptr addrspace(1) align 4 
[[TMP2]], ptr align 4 [[TMP_I]], i32 64, i1 false)
 // X86-NEXT:    ret void
 //
 //
 // X86-LABEL: define void @__clang_ocl_kern_imp_ker(
-// X86-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) 
noundef align 4 [[OUT:%.*]]) #[[ATTR0]] !kernel_arg_addr_space [[META4]] 
!kernel_arg_access_qual [[META5]] !kernel_arg_type [[META6]] 
!kernel_arg_base_type [[META6]] !kernel_arg_type_qual [[META7]] {
+// X86-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) 
noundef align 4 [[OUT:%.*]]) #[[ATTR2:[0-9]+]] !kernel_arg_addr_space [[META4]] 
!kernel_arg_access_qual [[META5]] !kernel_arg_type [[META6]] 
!kernel_arg_base_type [[META6]] !kernel_arg_type_qual [[META7]] {
 // X86-NEXT:  [[ENTRY:.*:]]
 // X86-NEXT:    [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 4
 // X86-NEXT:    [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 4
@@ -152,7 +163,7 @@ kernel void KernelLargeTwoMember(struct 
LargeStructTwoMember u) {
 // X86-NEXT:    [[TMP1:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR]], align 4
 // X86-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT3X3]], 
ptr addrspace(1) [[TMP1]], i32 1
 // X86-NEXT:    call void @llvm.memcpy.p0.p1.i32(ptr align 4 [[BYVAL_TEMP]], 
ptr addrspace(1) align 4 [[ARRAYIDX1]], i32 36, i1 false)
-// X86-NEXT:    call void @foo(ptr dead_on_unwind writable 
sret([[STRUCT_MAT4X4]]) align 4 [[TMP]], ptr noundef byval([[STRUCT_MAT3X3]]) 
align 4 [[BYVAL_TEMP]]) #[[ATTR3]]
+// X86-NEXT:    call void @foo(ptr dead_on_unwind writable 
sret([[STRUCT_MAT4X4]]) align 4 [[TMP]], ptr noundef byval([[STRUCT_MAT3X3]]) 
align 4 [[BYVAL_TEMP]]) #[[ATTR4]]
 // X86-NEXT:    call void @llvm.memcpy.p1.p0.i32(ptr addrspace(1) align 4 
[[ARRAYIDX]], ptr align 4 [[TMP]], i32 64, i1 false)
 // X86-NEXT:    ret void
 //
@@ -168,18 +179,29 @@ kernel void KernelLargeTwoMember(struct 
LargeStructTwoMember u) {
 // X86-LABEL: define spir_kernel void @ker_large(
 // X86-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) 
noundef align 4 [[OUT:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META4]] 
!kernel_arg_access_qual [[META5]] !kernel_arg_type [[META8:![0-9]+]] 
!kernel_arg_base_type [[META8]] !kernel_arg_type_qual [[META7]] {
 // X86-NEXT:  [[ENTRY:.*:]]
+// X86-NEXT:    [[IN_ADDR_I:%.*]] = alloca ptr addrspace(1), align 4
+// X86-NEXT:    [[OUT_ADDR_I:%.*]] = alloca ptr addrspace(1), align 4
+// X86-NEXT:    [[TMP_I:%.*]] = alloca [[STRUCT_MAT64X64:%.*]], align 4
+// X86-NEXT:    [[BYVAL_TEMP_I:%.*]] = alloca [[STRUCT_MAT32X32:%.*]], align 4
 // X86-NEXT:    [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 4
 // X86-NEXT:    [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 4
 // X86-NEXT:    store ptr addrspace(1) [[IN]], ptr [[IN_ADDR]], align 4
 // X86-NEXT:    store ptr addrspace(1) [[OUT]], ptr [[OUT_ADDR]], align 4
 // X86-NEXT:    [[TMP0:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR]], align 4
 // X86-NEXT:    [[TMP1:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR]], align 4
-// X86-NEXT:    call void @__clang_ocl_kern_imp_ker_large(ptr addrspace(1) 
noundef align 4 [[TMP0]], ptr addrspace(1) noundef align 4 [[TMP1]]) #[[ATTR3]]
+// X86-NEXT:    store ptr addrspace(1) [[TMP0]], ptr [[IN_ADDR_I]], align 4
+// X86-NEXT:    store ptr addrspace(1) [[TMP1]], ptr [[OUT_ADDR_I]], align 4
+// X86-NEXT:    [[TMP2:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_I]], 
align 4
+// X86-NEXT:    [[TMP3:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR_I]], align 
4
+// X86-NEXT:    [[ARRAYIDX1_I:%.*]] = getelementptr inbounds 
[[STRUCT_MAT32X32]], ptr addrspace(1) [[TMP3]], i32 1
+// X86-NEXT:    call void @llvm.memcpy.p0.p1.i32(ptr align 4 [[BYVAL_TEMP_I]], 
ptr addrspace(1) align 4 [[ARRAYIDX1_I]], i32 4096, i1 false)
+// X86-NEXT:    call void @foo_large(ptr dead_on_unwind writable 
sret([[STRUCT_MAT64X64]]) align 4 [[TMP_I]], ptr noundef 
byval([[STRUCT_MAT32X32]]) align 4 [[BYVAL_TEMP_I]]) #[[ATTR4]]
+// X86-NEXT:    call void @llvm.memcpy.p1.p0.i32(ptr addrspace(1) align 4 
[[TMP2]], ptr align 4 [[TMP_I]], i32 16384, i1 false)
 // X86-NEXT:    ret void
 //
 //
 // X86-LABEL: define void @__clang_ocl_kern_imp_ker_large(
-// X86-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) 
noundef align 4 [[OUT:%.*]]) #[[ATTR0]] !kernel_arg_addr_space [[META4]] 
!kernel_arg_access_qual [[META5]] !kernel_arg_type [[META8]] 
!kernel_arg_base_type [[META8]] !kernel_arg_type_qual [[META7]] {
+// X86-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) 
noundef align 4 [[OUT:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META4]] 
!kernel_arg_access_qual [[META5]] !kernel_arg_type [[META8]] 
!kernel_arg_base_type [[META8]] !kernel_arg_type_qual [[META7]] {
 // X86-NEXT:  [[ENTRY:.*:]]
 // X86-NEXT:    [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 4
 // X86-NEXT:    [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 4
@@ -192,7 +214,7 @@ kernel void KernelLargeTwoMember(struct 
LargeStructTwoMember u) {
 // X86-NEXT:    [[TMP1:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR]], align 4
 // X86-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds 
[[STRUCT_MAT32X32]], ptr addrspace(1) [[TMP1]], i32 1
 // X86-NEXT:    call void @llvm.memcpy.p0.p1.i32(ptr align 4 [[BYVAL_TEMP]], 
ptr addrspace(1) align 4 [[ARRAYIDX1]], i32 4096, i1 false)
-// X86-NEXT:    call void @foo_large(ptr dead_on_unwind writable 
sret([[STRUCT_MAT64X64]]) align 4 [[TMP]], ptr noundef 
byval([[STRUCT_MAT32X32]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3]]
+// X86-NEXT:    call void @foo_large(ptr dead_on_unwind writable 
sret([[STRUCT_MAT64X64]]) align 4 [[TMP]], ptr noundef 
byval([[STRUCT_MAT32X32]]) align 4 [[BYVAL_TEMP]]) #[[ATTR4]]
 // X86-NEXT:    call void @llvm.memcpy.p1.p0.i32(ptr addrspace(1) align 4 
[[ARRAYIDX]], ptr align 4 [[TMP]], i32 16384, i1 false)
 // X86-NEXT:    ret void
 //
@@ -227,16 +249,18 @@ kernel void KernelLargeTwoMember(struct 
LargeStructTwoMember u) {
 // X86-LABEL: define spir_kernel void @test_indirect_arg_local(
 // X86-SAME: ) #[[ATTR1]] !kernel_arg_addr_space [[META9:![0-9]+]] 
!kernel_arg_access_qual [[META9]] !kernel_arg_type [[META9]] 
!kernel_arg_base_type [[META9]] !kernel_arg_type_qual [[META9]] {
 // X86-NEXT:  [[ENTRY:.*:]]
-// X86-NEXT:    call void @__clang_ocl_kern_imp_test_indirect_arg_local() 
#[[ATTR3]]
+// X86-NEXT:    [[BYVAL_TEMP_I:%.*]] = alloca 
[[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 4
+// X86-NEXT:    call void @llvm.memcpy.p0.p3.i32(ptr align 4 [[BYVAL_TEMP_I]], 
ptr addrspace(3) align 8 @test_indirect_arg_local.l_s, i32 800, i1 false)
+// X86-NEXT:    call void @FuncOneLargeMember(ptr noundef 
byval([[STRUCT_LARGESTRUCTONEMEMBER]]) align 4 [[BYVAL_TEMP_I]]) #[[ATTR4]]
 // X86-NEXT:    ret void
 //
 //
 // X86-LABEL: define void @__clang_ocl_kern_imp_test_indirect_arg_local(
-// X86-SAME: ) #[[ATTR0]] !kernel_arg_addr_space [[META9]] 
!kernel_arg_access_qual [[META9]] !kernel_arg_type [[META9]] 
!kernel_arg_base_type [[META9]] !kernel_arg_type_qual [[META9]] {
+// X86-SAME: ) #[[ATTR2]] !kernel_arg_addr_space [[META9]] 
!kernel_arg_access_qual [[META9]] !kernel_arg_type [[META9]] 
!kernel_arg_base_type [[META9]] !kernel_arg_type_qual [[META9]] {
 // X86-NEXT:  [[ENTRY:.*:]]
 // X86-NEXT:    [[BYVAL_TEMP:%.*]] = alloca 
[[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 4
 // X86-NEXT:    call void @llvm.memcpy.p0.p3.i32(ptr align 4 [[BYVAL_TEMP]], 
ptr addrspace(3) align 8 @test_indirect_arg_local.l_s, i32 800, i1 false)
-// X86-NEXT:    call void @FuncOneLargeMember(ptr noundef 
byval([[STRUCT_LARGESTRUCTONEMEMBER]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3]]
+// X86-NEXT:    call void @FuncOneLargeMember(ptr noundef 
byval([[STRUCT_LARGESTRUCTONEMEMBER]]) align 4 [[BYVAL_TEMP]]) #[[ATTR4]]
 // X86-NEXT:    ret void
 //
 //
@@ -244,61 +268,74 @@ kernel void KernelLargeTwoMember(struct 
LargeStructTwoMember u) {
 // X86-SAME: ) #[[ATTR0]] {
 // X86-NEXT:  [[ENTRY:.*:]]
 // X86-NEXT:    [[P_S:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], 
align 8
-// X86-NEXT:    call void @FuncOneLargeMember(ptr noundef 
byval([[STRUCT_LARGESTRUCTONEMEMBER]]) align 4 [[P_S]]) #[[ATTR3]]
+// X86-NEXT:    call void @FuncOneLargeMember(ptr noundef 
byval([[STRUCT_LARGESTRUCTONEMEMBER]]) align 4 [[P_S]]) #[[ATTR4]]
 // X86-NEXT:    ret void
 //
 //
 // X86-LABEL: define spir_kernel void @KernelOneMember(
 // X86-SAME: ptr noundef byval([[STRUCT_STRUCTONEMEMBER:%.*]]) align 8 
[[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10:![0-9]+]] 
!kernel_arg_access_qual [[META11:![0-9]+]] !kernel_arg_type [[META12:![0-9]+]] 
!kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META13:![0-9]+]] {
 // X86-NEXT:  [[ENTRY:.*:]]
-// X86-NEXT:    call void @__clang_ocl_kern_imp_KernelOneMember(ptr noundef 
byval([[STRUCT_STRUCTONEMEMBER]]) align 4 [[U]]) #[[ATTR3]]
+// X86-NEXT:    [[U_I:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER]], align 8
+// X86-NEXT:    [[U1:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER]], align 8
+// X86-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[U1]], ptr align 
4 [[U]], i64 8, i1 false)
+// X86-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[U_I]], ptr 
align 4 [[U1]], i32 8, i1 false)
+// X86-NEXT:    call void @FuncOneMember(ptr noundef 
byval([[STRUCT_STRUCTONEMEMBER]]) align 4 [[U_I]]) #[[ATTR4]]
 // X86-NEXT:    ret void
 //
 //
 // X86-LABEL: define void @__clang_ocl_kern_imp_KernelOneMember(
-// X86-SAME: ptr noundef byval([[STRUCT_STRUCTONEMEMBER:%.*]]) align 4 
[[TMP0:%.*]]) #[[ATTR0]] !kernel_arg_addr_space [[META10]] 
!kernel_arg_access_qual [[META11]] !kernel_arg_type [[META12]] 
!kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META13]] {
+// X86-SAME: ptr noundef byval([[STRUCT_STRUCTONEMEMBER:%.*]]) align 4 
[[TMP0:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META10]] 
!kernel_arg_access_qual [[META11]] !kernel_arg_type [[META12]] 
!kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META13]] {
 // X86-NEXT:  [[ENTRY:.*:]]
 // X86-NEXT:    [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER]], align 8
 // X86-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[U]], ptr align 
4 [[TMP0]], i32 8, i1 false)
-// X86-NEXT:    call void @FuncOneMember(ptr noundef 
byval([[STRUCT_STRUCTONEMEMBER]]) align 4 [[U]]) #[[ATTR3]]
+// X86-NEXT:    call void @FuncOneMember(ptr noundef 
byval([[STRUCT_STRUCTONEMEMBER]]) align 4 [[U]]) #[[ATTR4]]
 // X86-NEXT:    ret void
 //
 //
 // X86-LABEL: define spir_kernel void @KernelOneMemberSpir(
 // X86-SAME: ptr addrspace(1) noundef align 8 [[U:%.*]]) #[[ATTR1]] 
!kernel_arg_addr_space [[META14:![0-9]+]] !kernel_arg_access_qual [[META11]] 
!kernel_arg_type [[META15:![0-9]+]] !kernel_arg_base_type [[META15]] 
!kernel_arg_type_qual [[META13]] {
 // X86-NEXT:  [[ENTRY:.*:]]
+// X86-NEXT:    [[U_ADDR_I:%.*]] = alloca ptr addrspace(1), align 4
+// X86-NEXT:    [[BYVAL_TEMP_I:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], 
align 4
 // X86-NEXT:    [[U_ADDR:%.*]] = alloca ptr addrspace(1), align 4
 // X86-NEXT:    store ptr addrspace(1) [[U]], ptr [[U_ADDR]], align 4
 // X86-NEXT:    [[TMP0:%.*]] = load ptr addrspace(1), ptr [[U_ADDR]], align 4
-// X86-NEXT:    call void @__clang_ocl_kern_imp_KernelOneMemberSpir(ptr 
addrspace(1) noundef align 8 [[TMP0]]) #[[ATTR3]]
+// X86-NEXT:    store ptr addrspace(1) [[TMP0]], ptr [[U_ADDR_I]], align 4
+// X86-NEXT:    [[TMP1:%.*]] = load ptr addrspace(1), ptr [[U_ADDR_I]], align 4
+// X86-NEXT:    call void @llvm.memcpy.p0.p1.i32(ptr align 4 [[BYVAL_TEMP_I]], 
ptr addrspace(1) align 8 [[TMP1]], i32 8, i1 false)
+// X86-NEXT:    call void @FuncOneMember(ptr noundef 
byval([[STRUCT_STRUCTONEMEMBER]]) align 4 [[BYVAL_TEMP_I]]) #[[ATTR4]]
 // X86-NEXT:    ret void
 //
 //
 // X86-LABEL: define void @__clang_ocl_kern_imp_KernelOneMemberSpir(
-// X86-SAME: ptr addrspace(1) noundef align 8 [[U:%.*]]) #[[ATTR0]] 
!kernel_arg_addr_space [[META14]] !kernel_arg_access_qual [[META11]] 
!kernel_arg_type [[META15]] !kernel_arg_base_type [[META15]] 
!kernel_arg_type_qual [[META13]] {
+// X86-SAME: ptr addrspace(1) noundef align 8 [[U:%.*]]) #[[ATTR2]] 
!kernel_arg_addr_space [[META14]] !kernel_arg_access_qual [[META11]] 
!kernel_arg_type [[META15]] !kernel_arg_base_type [[META15]] 
!kernel_arg_type_qual [[META13]] {
 // X86-NEXT:  [[ENTRY:.*:]]
 // X86-NEXT:    [[U_ADDR:%.*]] = alloca ptr addrspace(1), align 4
 // X86-NEXT:    [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], 
align 4
 // X86-NEXT:    store ptr addrspace(1) [[U]], ptr [[U_ADDR]], align 4
 // X86-NEXT:    [[TMP0:%.*]] = load ptr addrspace(1), ptr [[U_ADDR]], align 4
 // X86-NEXT:    call void @llvm.memcpy.p0.p1.i32(ptr align 4 [[BYVAL_TEMP]], 
ptr addrspace(1) align 8 [[TMP0]], i32 8, i1 false)
-// X86-NEXT:    call void @FuncOneMember(ptr noundef 
byval([[STRUCT_STRUCTONEMEMBER]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3]]
+// X86-NEXT:    call void @FuncOneMember(ptr noundef 
byval([[STRUCT_STRUCTONEMEMBER]]) align 4 [[BYVAL_TEMP]]) #[[ATTR4]]
 // X86-NEXT:    ret void
 //
 //
 // X86-LABEL: define spir_kernel void @KernelLargeOneMember(
 // X86-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 
[[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual 
[[META11]] !kernel_arg_type [[META16:![0-9]+]] !kernel_arg_base_type [[META16]] 
!kernel_arg_type_qual [[META13]] {
 // X86-NEXT:  [[ENTRY:.*:]]
-// X86-NEXT:    call void @__clang_ocl_kern_imp_KernelLargeOneMember(ptr 
noundef byval([[STRUCT_LARGESTRUCTONEMEMBER]]) align 4 [[U]]) #[[ATTR3]]
+// X86-NEXT:    [[U_I:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8
+// X86-NEXT:    [[U1:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8
+// X86-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[U1]], ptr align 
4 [[U]], i64 800, i1 false)
+// X86-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[U_I]], ptr 
align 4 [[U1]], i32 800, i1 false)
+// X86-NEXT:    call void @FuncOneLargeMember(ptr noundef 
byval([[STRUCT_LARGESTRUCTONEMEMBER]]) align 4 [[U_I]]) #[[ATTR4]]
 // X86-NEXT:    ret void
 //
 //
 // X86-LABEL: define void @__clang_ocl_kern_imp_KernelLargeOneMember(
-// X86-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 4 
[[TMP0:%.*]]) #[[ATTR0]] !kernel_arg_addr_space [[META10]] 
!kernel_arg_access_qual [[META11]] !kernel_arg_type [[META16]] 
!kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META13]] {
+// X86-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 4 
[[TMP0:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META10]] 
!kernel_arg_access_qual [[META11]] !kernel_arg_type [[META16]] 
!kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META13]] {
 // X86-NEXT:  [[ENTRY:.*:]]
 // X86-NEXT:    [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8
 // X86-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[U]], ptr align 
4 [[TMP0]], i32 800, i1 false)
-// X86-NEXT:    call void @FuncOneLargeMember(ptr noundef 
byval([[STRUCT_LARGESTRUCTONEMEMBER]]) align 4 [[U]]) #[[ATTR3]]
+// X86-NEXT:    call void @FuncOneLargeMember(ptr noundef 
byval([[STRUCT_LARGESTRUCTONEMEMBER]]) align 4 [[U]]) #[[ATTR4]]
 // X86-NEXT:    ret void
 //
 //
@@ -332,32 +369,40 @@ kernel void KernelLargeTwoMember(struct 
LargeStructTwoMember u) {
 // X86-LABEL: define spir_kernel void @KernelTwoMember(
 // X86-SAME: ptr noundef byval([[STRUCT_STRUCTTWOMEMBER:%.*]]) align 8 
[[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual 
[[META11]] !kernel_arg_type [[META17:![0-9]+]] !kernel_arg_base_type [[META17]] 
!kernel_arg_type_qual [[META13]] {
 // X86-NEXT:  [[ENTRY:.*:]]
-// X86-NEXT:    call void @__clang_ocl_kern_imp_KernelTwoMember(ptr noundef 
byval([[STRUCT_STRUCTTWOMEMBER]]) align 4 [[U]]) #[[ATTR3]]
+// X86-NEXT:    [[U_I:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER]], align 8
+// X86-NEXT:    [[U1:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER]], align 8
+// X86-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[U1]], ptr align 
4 [[U]], i64 16, i1 false)
+// X86-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[U_I]], ptr 
align 4 [[U1]], i32 16, i1 false)
+// X86-NEXT:    call void @FuncTwoMember(ptr noundef 
byval([[STRUCT_STRUCTTWOMEMBER]]) align 4 [[U_I]]) #[[ATTR4]]
 // X86-NEXT:    ret void
 //
 //
 // X86-LABEL: define void @__clang_ocl_kern_imp_KernelTwoMember(
-// X86-SAME: ptr noundef byval([[STRUCT_STRUCTTWOMEMBER:%.*]]) align 4 
[[TMP0:%.*]]) #[[ATTR0]] !kernel_arg_addr_space [[META10]] 
!kernel_arg_access_qual [[META11]] !kernel_arg_type [[META17]] 
!kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META13]] {
+// X86-SAME: ptr noundef byval([[STRUCT_STRUCTTWOMEMBER:%.*]]) align 4 
[[TMP0:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META10]] 
!kernel_arg_access_qual [[META11]] !kernel_arg_type [[META17]] 
!kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META13]] {
 // X86-NEXT:  [[ENTRY:.*:]]
 // X86-NEXT:    [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER]], align 8
 // X86-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[U]], ptr align 
4 [[TMP0]], i32 16, i1 false)
-// X86-NEXT:    call void @FuncTwoMember(ptr noundef 
byval([[STRUCT_STRUCTTWOMEMBER]]) align 4 [[U]]) #[[ATTR3]]
+// X86-NEXT:    call void @FuncTwoMember(ptr noundef 
byval([[STRUCT_STRUCTTWOMEMBER]]) align 4 [[U]]) #[[ATTR4]]
 // X86-NEXT:    ret void
 //
 //
 // X86-LABEL...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/137769
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to