llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-backend-amdgpu Author: Aniket Lal (lalaniket8) <details> <summary>Changes</summary> OpenCL Kernels body is emitted as stubs and the kernel is emitted as call to respective stub. (https://github.com/llvm/llvm-project/pull/115821). The stub function should be alwaysinlined, since call to stub can cause performance drop. --- Patch is 239.87 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/137769.diff 11 Files Affected: - (modified) clang/lib/CodeGen/CodeGenModule.cpp (+8) - (modified) clang/test/CodeGenOpenCL/addr-space-struct-arg.cl (+349-82) - (modified) clang/test/CodeGenOpenCL/amdgpu-abi-struct-arg-byref.cl (+66-10) - (modified) clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl (+7-7) - (modified) clang/test/CodeGenOpenCL/cl-uniform-wg-size.cl (-4) - (modified) clang/test/CodeGenOpenCL/cl20-device-side-enqueue-attributes.cl (+83-21) - (modified) clang/test/CodeGenOpenCL/cl20-device-side-enqueue.cl (+9-15) - (modified) clang/test/CodeGenOpenCL/convergent.cl (+5-4) - (modified) clang/test/CodeGenOpenCL/enqueue-kernel-non-entry-block.cl (+2-9) - (modified) clang/test/CodeGenOpenCL/opencl-kernel-call.cl (+445-114) - (modified) clang/test/CodeGenOpenCL/sampler.cl (-4) ``````````diff diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index e917f3c42da06..384c4f3627212 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -6174,6 +6174,14 @@ void CodeGenModule::EmitGlobalFunctionDefinition(GlobalDecl GD, CodeGenFunction(*this).GenerateCode(GD, Fn, FI); setNonAliasAttributes(GD, Fn); + + if (D->hasAttr<OpenCLKernelAttr>()) + if (GD.getKernelReferenceKind() == KernelReferenceKind::Stub && + !Fn->hasFnAttribute(llvm::Attribute::NoInline) && + !Fn->hasFnAttribute(llvm::Attribute::InlineHint) && + !Fn->hasFnAttribute(llvm::Attribute::OptimizeNone)) + Fn->addFnAttr(llvm::Attribute::AlwaysInline); + SetLLVMFunctionAttributesForDefinition(D, Fn); if (const ConstructorAttr *CA = D->getAttr<ConstructorAttr>()) diff --git a/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl b/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl index 49604c6c5e61b..58c358672dd0e 100644 --- a/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl +++ b/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl @@ -128,18 +128,29 @@ kernel void KernelLargeTwoMember(struct LargeStructTwoMember u) { // X86-LABEL: define spir_kernel void @ker( // X86-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1:[0-9]+]] !kernel_arg_addr_space [[META4:![0-9]+]] !kernel_arg_access_qual [[META5:![0-9]+]] !kernel_arg_type [[META6:![0-9]+]] !kernel_arg_base_type [[META6]] !kernel_arg_type_qual [[META7:![0-9]+]] { // X86-NEXT: [[ENTRY:.*:]] +// X86-NEXT: [[IN_ADDR_I:%.*]] = alloca ptr addrspace(1), align 4 +// X86-NEXT: [[OUT_ADDR_I:%.*]] = alloca ptr addrspace(1), align 4 +// X86-NEXT: [[TMP_I:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4 +// X86-NEXT: [[BYVAL_TEMP_I:%.*]] = alloca [[STRUCT_MAT3X3:%.*]], align 4 // X86-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 4 // X86-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 4 // X86-NEXT: store ptr addrspace(1) [[IN]], ptr [[IN_ADDR]], align 4 // X86-NEXT: store ptr addrspace(1) [[OUT]], ptr [[OUT_ADDR]], align 4 // X86-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR]], align 4 // X86-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR]], align 4 -// X86-NEXT: call void @__clang_ocl_kern_imp_ker(ptr addrspace(1) noundef align 4 [[TMP0]], ptr addrspace(1) noundef align 4 [[TMP1]]) #[[ATTR3:[0-9]+]] +// X86-NEXT: store ptr addrspace(1) [[TMP0]], ptr [[IN_ADDR_I]], align 4 +// X86-NEXT: store ptr addrspace(1) [[TMP1]], ptr [[OUT_ADDR_I]], align 4 +// X86-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_I]], align 4 +// X86-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR_I]], align 4 +// X86-NEXT: [[ARRAYIDX1_I:%.*]] = getelementptr inbounds [[STRUCT_MAT3X3]], ptr addrspace(1) [[TMP3]], i32 1 +// X86-NEXT: call void @llvm.memcpy.p0.p1.i32(ptr align 4 [[BYVAL_TEMP_I]], ptr addrspace(1) align 4 [[ARRAYIDX1_I]], i32 36, i1 false) +// X86-NEXT: call void @foo(ptr dead_on_unwind writable sret([[STRUCT_MAT4X4]]) align 4 [[TMP_I]], ptr noundef byval([[STRUCT_MAT3X3]]) align 4 [[BYVAL_TEMP_I]]) #[[ATTR4:[0-9]+]] +// X86-NEXT: call void @llvm.memcpy.p1.p0.i32(ptr addrspace(1) align 4 [[TMP2]], ptr align 4 [[TMP_I]], i32 64, i1 false) // X86-NEXT: ret void // // // X86-LABEL: define void @__clang_ocl_kern_imp_ker( -// X86-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR0]] !kernel_arg_addr_space [[META4]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META6]] !kernel_arg_base_type [[META6]] !kernel_arg_type_qual [[META7]] { +// X86-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR2:[0-9]+]] !kernel_arg_addr_space [[META4]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META6]] !kernel_arg_base_type [[META6]] !kernel_arg_type_qual [[META7]] { // X86-NEXT: [[ENTRY:.*:]] // X86-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 4 // X86-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 4 @@ -152,7 +163,7 @@ kernel void KernelLargeTwoMember(struct LargeStructTwoMember u) { // X86-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR]], align 4 // X86-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT3X3]], ptr addrspace(1) [[TMP1]], i32 1 // X86-NEXT: call void @llvm.memcpy.p0.p1.i32(ptr align 4 [[BYVAL_TEMP]], ptr addrspace(1) align 4 [[ARRAYIDX1]], i32 36, i1 false) -// X86-NEXT: call void @foo(ptr dead_on_unwind writable sret([[STRUCT_MAT4X4]]) align 4 [[TMP]], ptr noundef byval([[STRUCT_MAT3X3]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3]] +// X86-NEXT: call void @foo(ptr dead_on_unwind writable sret([[STRUCT_MAT4X4]]) align 4 [[TMP]], ptr noundef byval([[STRUCT_MAT3X3]]) align 4 [[BYVAL_TEMP]]) #[[ATTR4]] // X86-NEXT: call void @llvm.memcpy.p1.p0.i32(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr align 4 [[TMP]], i32 64, i1 false) // X86-NEXT: ret void // @@ -168,18 +179,29 @@ kernel void KernelLargeTwoMember(struct LargeStructTwoMember u) { // X86-LABEL: define spir_kernel void @ker_large( // X86-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META4]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META8:![0-9]+]] !kernel_arg_base_type [[META8]] !kernel_arg_type_qual [[META7]] { // X86-NEXT: [[ENTRY:.*:]] +// X86-NEXT: [[IN_ADDR_I:%.*]] = alloca ptr addrspace(1), align 4 +// X86-NEXT: [[OUT_ADDR_I:%.*]] = alloca ptr addrspace(1), align 4 +// X86-NEXT: [[TMP_I:%.*]] = alloca [[STRUCT_MAT64X64:%.*]], align 4 +// X86-NEXT: [[BYVAL_TEMP_I:%.*]] = alloca [[STRUCT_MAT32X32:%.*]], align 4 // X86-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 4 // X86-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 4 // X86-NEXT: store ptr addrspace(1) [[IN]], ptr [[IN_ADDR]], align 4 // X86-NEXT: store ptr addrspace(1) [[OUT]], ptr [[OUT_ADDR]], align 4 // X86-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR]], align 4 // X86-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR]], align 4 -// X86-NEXT: call void @__clang_ocl_kern_imp_ker_large(ptr addrspace(1) noundef align 4 [[TMP0]], ptr addrspace(1) noundef align 4 [[TMP1]]) #[[ATTR3]] +// X86-NEXT: store ptr addrspace(1) [[TMP0]], ptr [[IN_ADDR_I]], align 4 +// X86-NEXT: store ptr addrspace(1) [[TMP1]], ptr [[OUT_ADDR_I]], align 4 +// X86-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_I]], align 4 +// X86-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR_I]], align 4 +// X86-NEXT: [[ARRAYIDX1_I:%.*]] = getelementptr inbounds [[STRUCT_MAT32X32]], ptr addrspace(1) [[TMP3]], i32 1 +// X86-NEXT: call void @llvm.memcpy.p0.p1.i32(ptr align 4 [[BYVAL_TEMP_I]], ptr addrspace(1) align 4 [[ARRAYIDX1_I]], i32 4096, i1 false) +// X86-NEXT: call void @foo_large(ptr dead_on_unwind writable sret([[STRUCT_MAT64X64]]) align 4 [[TMP_I]], ptr noundef byval([[STRUCT_MAT32X32]]) align 4 [[BYVAL_TEMP_I]]) #[[ATTR4]] +// X86-NEXT: call void @llvm.memcpy.p1.p0.i32(ptr addrspace(1) align 4 [[TMP2]], ptr align 4 [[TMP_I]], i32 16384, i1 false) // X86-NEXT: ret void // // // X86-LABEL: define void @__clang_ocl_kern_imp_ker_large( -// X86-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR0]] !kernel_arg_addr_space [[META4]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META8]] !kernel_arg_base_type [[META8]] !kernel_arg_type_qual [[META7]] { +// X86-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META4]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META8]] !kernel_arg_base_type [[META8]] !kernel_arg_type_qual [[META7]] { // X86-NEXT: [[ENTRY:.*:]] // X86-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 4 // X86-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 4 @@ -192,7 +214,7 @@ kernel void KernelLargeTwoMember(struct LargeStructTwoMember u) { // X86-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR]], align 4 // X86-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT32X32]], ptr addrspace(1) [[TMP1]], i32 1 // X86-NEXT: call void @llvm.memcpy.p0.p1.i32(ptr align 4 [[BYVAL_TEMP]], ptr addrspace(1) align 4 [[ARRAYIDX1]], i32 4096, i1 false) -// X86-NEXT: call void @foo_large(ptr dead_on_unwind writable sret([[STRUCT_MAT64X64]]) align 4 [[TMP]], ptr noundef byval([[STRUCT_MAT32X32]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3]] +// X86-NEXT: call void @foo_large(ptr dead_on_unwind writable sret([[STRUCT_MAT64X64]]) align 4 [[TMP]], ptr noundef byval([[STRUCT_MAT32X32]]) align 4 [[BYVAL_TEMP]]) #[[ATTR4]] // X86-NEXT: call void @llvm.memcpy.p1.p0.i32(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr align 4 [[TMP]], i32 16384, i1 false) // X86-NEXT: ret void // @@ -227,16 +249,18 @@ kernel void KernelLargeTwoMember(struct LargeStructTwoMember u) { // X86-LABEL: define spir_kernel void @test_indirect_arg_local( // X86-SAME: ) #[[ATTR1]] !kernel_arg_addr_space [[META9:![0-9]+]] !kernel_arg_access_qual [[META9]] !kernel_arg_type [[META9]] !kernel_arg_base_type [[META9]] !kernel_arg_type_qual [[META9]] { // X86-NEXT: [[ENTRY:.*:]] -// X86-NEXT: call void @__clang_ocl_kern_imp_test_indirect_arg_local() #[[ATTR3]] +// X86-NEXT: [[BYVAL_TEMP_I:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 4 +// X86-NEXT: call void @llvm.memcpy.p0.p3.i32(ptr align 4 [[BYVAL_TEMP_I]], ptr addrspace(3) align 8 @test_indirect_arg_local.l_s, i32 800, i1 false) +// X86-NEXT: call void @FuncOneLargeMember(ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER]]) align 4 [[BYVAL_TEMP_I]]) #[[ATTR4]] // X86-NEXT: ret void // // // X86-LABEL: define void @__clang_ocl_kern_imp_test_indirect_arg_local( -// X86-SAME: ) #[[ATTR0]] !kernel_arg_addr_space [[META9]] !kernel_arg_access_qual [[META9]] !kernel_arg_type [[META9]] !kernel_arg_base_type [[META9]] !kernel_arg_type_qual [[META9]] { +// X86-SAME: ) #[[ATTR2]] !kernel_arg_addr_space [[META9]] !kernel_arg_access_qual [[META9]] !kernel_arg_type [[META9]] !kernel_arg_base_type [[META9]] !kernel_arg_type_qual [[META9]] { // X86-NEXT: [[ENTRY:.*:]] // X86-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 4 // X86-NEXT: call void @llvm.memcpy.p0.p3.i32(ptr align 4 [[BYVAL_TEMP]], ptr addrspace(3) align 8 @test_indirect_arg_local.l_s, i32 800, i1 false) -// X86-NEXT: call void @FuncOneLargeMember(ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3]] +// X86-NEXT: call void @FuncOneLargeMember(ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER]]) align 4 [[BYVAL_TEMP]]) #[[ATTR4]] // X86-NEXT: ret void // // @@ -244,61 +268,74 @@ kernel void KernelLargeTwoMember(struct LargeStructTwoMember u) { // X86-SAME: ) #[[ATTR0]] { // X86-NEXT: [[ENTRY:.*:]] // X86-NEXT: [[P_S:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8 -// X86-NEXT: call void @FuncOneLargeMember(ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER]]) align 4 [[P_S]]) #[[ATTR3]] +// X86-NEXT: call void @FuncOneLargeMember(ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER]]) align 4 [[P_S]]) #[[ATTR4]] // X86-NEXT: ret void // // // X86-LABEL: define spir_kernel void @KernelOneMember( // X86-SAME: ptr noundef byval([[STRUCT_STRUCTONEMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10:![0-9]+]] !kernel_arg_access_qual [[META11:![0-9]+]] !kernel_arg_type [[META12:![0-9]+]] !kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META13:![0-9]+]] { // X86-NEXT: [[ENTRY:.*:]] -// X86-NEXT: call void @__clang_ocl_kern_imp_KernelOneMember(ptr noundef byval([[STRUCT_STRUCTONEMEMBER]]) align 4 [[U]]) #[[ATTR3]] +// X86-NEXT: [[U_I:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER]], align 8 +// X86-NEXT: [[U1:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER]], align 8 +// X86-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[U1]], ptr align 4 [[U]], i64 8, i1 false) +// X86-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[U_I]], ptr align 4 [[U1]], i32 8, i1 false) +// X86-NEXT: call void @FuncOneMember(ptr noundef byval([[STRUCT_STRUCTONEMEMBER]]) align 4 [[U_I]]) #[[ATTR4]] // X86-NEXT: ret void // // // X86-LABEL: define void @__clang_ocl_kern_imp_KernelOneMember( -// X86-SAME: ptr noundef byval([[STRUCT_STRUCTONEMEMBER:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META12]] !kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META13]] { +// X86-SAME: ptr noundef byval([[STRUCT_STRUCTONEMEMBER:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META12]] !kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META13]] { // X86-NEXT: [[ENTRY:.*:]] // X86-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER]], align 8 // X86-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[U]], ptr align 4 [[TMP0]], i32 8, i1 false) -// X86-NEXT: call void @FuncOneMember(ptr noundef byval([[STRUCT_STRUCTONEMEMBER]]) align 4 [[U]]) #[[ATTR3]] +// X86-NEXT: call void @FuncOneMember(ptr noundef byval([[STRUCT_STRUCTONEMEMBER]]) align 4 [[U]]) #[[ATTR4]] // X86-NEXT: ret void // // // X86-LABEL: define spir_kernel void @KernelOneMemberSpir( // X86-SAME: ptr addrspace(1) noundef align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META14:![0-9]+]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META15:![0-9]+]] !kernel_arg_base_type [[META15]] !kernel_arg_type_qual [[META13]] { // X86-NEXT: [[ENTRY:.*:]] +// X86-NEXT: [[U_ADDR_I:%.*]] = alloca ptr addrspace(1), align 4 +// X86-NEXT: [[BYVAL_TEMP_I:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 4 // X86-NEXT: [[U_ADDR:%.*]] = alloca ptr addrspace(1), align 4 // X86-NEXT: store ptr addrspace(1) [[U]], ptr [[U_ADDR]], align 4 // X86-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[U_ADDR]], align 4 -// X86-NEXT: call void @__clang_ocl_kern_imp_KernelOneMemberSpir(ptr addrspace(1) noundef align 8 [[TMP0]]) #[[ATTR3]] +// X86-NEXT: store ptr addrspace(1) [[TMP0]], ptr [[U_ADDR_I]], align 4 +// X86-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[U_ADDR_I]], align 4 +// X86-NEXT: call void @llvm.memcpy.p0.p1.i32(ptr align 4 [[BYVAL_TEMP_I]], ptr addrspace(1) align 8 [[TMP1]], i32 8, i1 false) +// X86-NEXT: call void @FuncOneMember(ptr noundef byval([[STRUCT_STRUCTONEMEMBER]]) align 4 [[BYVAL_TEMP_I]]) #[[ATTR4]] // X86-NEXT: ret void // // // X86-LABEL: define void @__clang_ocl_kern_imp_KernelOneMemberSpir( -// X86-SAME: ptr addrspace(1) noundef align 8 [[U:%.*]]) #[[ATTR0]] !kernel_arg_addr_space [[META14]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META15]] !kernel_arg_base_type [[META15]] !kernel_arg_type_qual [[META13]] { +// X86-SAME: ptr addrspace(1) noundef align 8 [[U:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META14]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META15]] !kernel_arg_base_type [[META15]] !kernel_arg_type_qual [[META13]] { // X86-NEXT: [[ENTRY:.*:]] // X86-NEXT: [[U_ADDR:%.*]] = alloca ptr addrspace(1), align 4 // X86-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 4 // X86-NEXT: store ptr addrspace(1) [[U]], ptr [[U_ADDR]], align 4 // X86-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[U_ADDR]], align 4 // X86-NEXT: call void @llvm.memcpy.p0.p1.i32(ptr align 4 [[BYVAL_TEMP]], ptr addrspace(1) align 8 [[TMP0]], i32 8, i1 false) -// X86-NEXT: call void @FuncOneMember(ptr noundef byval([[STRUCT_STRUCTONEMEMBER]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3]] +// X86-NEXT: call void @FuncOneMember(ptr noundef byval([[STRUCT_STRUCTONEMEMBER]]) align 4 [[BYVAL_TEMP]]) #[[ATTR4]] // X86-NEXT: ret void // // // X86-LABEL: define spir_kernel void @KernelLargeOneMember( // X86-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META16:![0-9]+]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META13]] { // X86-NEXT: [[ENTRY:.*:]] -// X86-NEXT: call void @__clang_ocl_kern_imp_KernelLargeOneMember(ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER]]) align 4 [[U]]) #[[ATTR3]] +// X86-NEXT: [[U_I:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8 +// X86-NEXT: [[U1:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8 +// X86-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[U1]], ptr align 4 [[U]], i64 800, i1 false) +// X86-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[U_I]], ptr align 4 [[U1]], i32 800, i1 false) +// X86-NEXT: call void @FuncOneLargeMember(ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER]]) align 4 [[U_I]]) #[[ATTR4]] // X86-NEXT: ret void // // // X86-LABEL: define void @__clang_ocl_kern_imp_KernelLargeOneMember( -// X86-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META16]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META13]] { +// X86-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META16]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META13]] { // X86-NEXT: [[ENTRY:.*:]] // X86-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8 // X86-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[U]], ptr align 4 [[TMP0]], i32 800, i1 false) -// X86-NEXT: call void @FuncOneLargeMember(ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER]]) align 4 [[U]]) #[[ATTR3]] +// X86-NEXT: call void @FuncOneLargeMember(ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER]]) align 4 [[U]]) #[[ATTR4]] // X86-NEXT: ret void // // @@ -332,32 +369,40 @@ kernel void KernelLargeTwoMember(struct LargeStructTwoMember u) { // X86-LABEL: define spir_kernel void @KernelTwoMember( // X86-SAME: ptr noundef byval([[STRUCT_STRUCTTWOMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META17:![0-9]+]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META13]] { // X86-NEXT: [[ENTRY:.*:]] -// X86-NEXT: call void @__clang_ocl_kern_imp_KernelTwoMember(ptr noundef byval([[STRUCT_STRUCTTWOMEMBER]]) align 4 [[U]]) #[[ATTR3]] +// X86-NEXT: [[U_I:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER]], align 8 +// X86-NEXT: [[U1:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER]], align 8 +// X86-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[U1]], ptr align 4 [[U]], i64 16, i1 false) +// X86-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[U_I]], ptr align 4 [[U1]], i32 16, i1 false) +// X86-NEXT: call void @FuncTwoMember(ptr noundef byval([[STRUCT_STRUCTTWOMEMBER]]) align 4 [[U_I]]) #[[ATTR4]] // X86-NEXT: ret void // // // X86-LABEL: define void @__clang_ocl_kern_imp_KernelTwoMember( -// X86-SAME: ptr noundef byval([[STRUCT_STRUCTTWOMEMBER:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META17]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META13]] { +// X86-SAME: ptr noundef byval([[STRUCT_STRUCTTWOMEMBER:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META17]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META13]] { // X86-NEXT: [[ENTRY:.*:]] // X86-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER]], align 8 // X86-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[U]], ptr align 4 [[TMP0]], i32 16, i1 false) -// X86-NEXT: call void @FuncTwoMember(ptr noundef byval([[STRUCT_STRUCTTWOMEMBER]]) align 4 [[U]]) #[[ATTR3]] +// X86-NEXT: call void @FuncTwoMember(ptr noundef byval([[STRUCT_STRUCTTWOMEMBER]]) align 4 [[U]]) #[[ATTR4]] // X86-NEXT: ret void // // // X86-LABEL... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/137769 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits