llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang-codegen Author: Matt Arsenault (arsenm) <details> <summary>Changes</summary> This reverts commit 6e0b0038cd65ce726ce404305a06e1cf33e36cca. This breaks the rocm-device-libs build, so it should not ship in the release. --- Patch is 214.51 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/127771.diff 21 Files Affected: - (modified) clang/lib/Basic/Targets/AMDGPU.cpp (+3-3) - (modified) clang/lib/CodeGen/CGBlocks.cpp (+1-2) - (modified) clang/lib/CodeGen/CGBuiltin.cpp (+2-9) - (modified) clang/test/CodeGen/scoped-fence-ops.c (+120-61) - (modified) clang/test/CodeGenOpenCL/addr-space-struct-arg.cl (+70-99) - (modified) clang/test/CodeGenOpenCL/amdgcn-automatic-variable.cl (+16-20) - (modified) clang/test/CodeGenOpenCL/amdgpu-abi-struct-arg-byref.cl (+118-164) - (modified) clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl (+220-275) - (modified) clang/test/CodeGenOpenCL/amdgpu-nullptr.cl (+14-14) - (modified) clang/test/CodeGenOpenCL/atomic-ops.cl (+4-7) - (modified) clang/test/CodeGenOpenCL/atomics-unsafe-hw-remarks-gfx90a.cl (+3-3) - (modified) clang/test/CodeGenOpenCL/blocks.cl (+12-11) - (modified) clang/test/CodeGenOpenCL/builtins-alloca.cl (+4-428) - (modified) clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12.cl (+56-87) - (modified) clang/test/CodeGenOpenCL/builtins-amdgcn-gfx940.cl (+12-18) - (modified) clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx12.cl (+2-2) - (modified) clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx8.cl (+1-1) - (modified) clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl (+1-1) - (modified) clang/test/CodeGenOpenCL/enqueue-kernel-non-entry-block.cl (+1-1) - (modified) clang/test/CodeGenOpenCL/opencl_types.cl (+1-1) - (modified) clang/test/Index/pipe-size.cl (+2-2) ``````````diff diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp index 0d308cb6af969..9ea366af56a52 100644 --- a/clang/lib/Basic/Targets/AMDGPU.cpp +++ b/clang/lib/Basic/Targets/AMDGPU.cpp @@ -261,9 +261,9 @@ AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple, void AMDGPUTargetInfo::adjust(DiagnosticsEngine &Diags, LangOptions &Opts) { TargetInfo::adjust(Diags, Opts); // ToDo: There are still a few places using default address space as private - // address space in OpenCL, which needs to be cleaned up, then the references - // to OpenCL can be removed from the following line. - setAddressSpaceMap((Opts.OpenCL && !Opts.OpenCLGenericAddressSpace) || + // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL + // can be removed from the following line. + setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL || !isAMDGCN(getTriple())); } diff --git a/clang/lib/CodeGen/CGBlocks.cpp b/clang/lib/CodeGen/CGBlocks.cpp index a7584a95c8ca7..f38f86c792f69 100644 --- a/clang/lib/CodeGen/CGBlocks.cpp +++ b/clang/lib/CodeGen/CGBlocks.cpp @@ -1396,8 +1396,7 @@ void CodeGenFunction::setBlockContextParameter(const ImplicitParamDecl *D, DI->setLocation(D->getLocation()); DI->EmitDeclareOfBlockLiteralArgVariable( *BlockInfo, D->getName(), argNum, - cast<llvm::AllocaInst>(alloc.getPointer()->stripPointerCasts()), - Builder); + cast<llvm::AllocaInst>(alloc.getPointer()), Builder); } } diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 7ec9d59bfed5c..5237533364294 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -6092,13 +6092,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, /*IndexTypeQuals=*/0); auto Tmp = CreateMemTemp(SizeArrayTy, "block_sizes"); llvm::Value *TmpPtr = Tmp.getPointer(); - // The EmitLifetime* pair expect a naked Alloca as their last argument, - // however for cases where the default AS is not the Alloca AS, Tmp is - // actually the Alloca ascasted to the default AS, hence the - // stripPointerCasts() - llvm::Value *Alloca = TmpPtr->stripPointerCasts(); llvm::Value *TmpSize = EmitLifetimeStart( - CGM.getDataLayout().getTypeAllocSize(Tmp.getElementType()), Alloca); + CGM.getDataLayout().getTypeAllocSize(Tmp.getElementType()), TmpPtr); llvm::Value *ElemPtr; // Each of the following arguments specifies the size of the corresponding // argument passed to the enqueued block. @@ -6114,9 +6109,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Builder.CreateAlignedStore( V, GEP, CGM.getDataLayout().getPrefTypeAlign(SizeTy)); } - // Return the Alloca itself rather than a potential ascast as this is only - // used by the paired EmitLifetimeEnd. - return std::tie(ElemPtr, TmpSize, Alloca); + return std::tie(ElemPtr, TmpSize, TmpPtr); }; // Could have events and/or varargs. diff --git a/clang/test/CodeGen/scoped-fence-ops.c b/clang/test/CodeGen/scoped-fence-ops.c index d83ae05b0aea2..20cbb511a1758 100644 --- a/clang/test/CodeGen/scoped-fence-ops.c +++ b/clang/test/CodeGen/scoped-fence-ops.c @@ -1,8 +1,8 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 // RUN: %clang_cc1 %s -emit-llvm -o - -triple=amdgcn-amd-amdhsa -ffreestanding \ -// RUN: -fvisibility=hidden | FileCheck --check-prefix=AMDGCN %s +// RUN: -fvisibility=hidden | FileCheck --check-prefixes=AMDGCN,AMDGCN-CL12 %s // RUN: %clang_cc1 %s -emit-llvm -o - -triple=amdgcn-amd-amdhsa -ffreestanding \ -// RUN: -cl-std=CL2.0 -fvisibility=hidden | FileCheck --check-prefix=AMDGCN %s +// RUN: -cl-std=CL2.0 -fvisibility=hidden | FileCheck --check-prefixes=AMDGCN,AMDGCN-CL20 %s // RUN: %clang_cc1 %s -emit-llvm -o - -triple=spirv64-unknown-unknown -ffreestanding \ // RUN: -fvisibility=hidden | FileCheck --check-prefix=SPIRV %s // RUN: %clang_cc1 %s -emit-llvm -o - -triple=x86_64-unknown-linux-gnu -ffreestanding \ @@ -30,34 +30,62 @@ void fe1a() { __scoped_atomic_thread_fence(__ATOMIC_RELEASE, __MEMORY_SCOPE_WRKGRP); } -// AMDGCN-LABEL: define hidden void @fe1b( -// AMDGCN-SAME: i32 noundef [[ORD:%.*]]) #[[ATTR0]] { -// AMDGCN-NEXT: [[ENTRY:.*:]] -// AMDGCN-NEXT: [[ORD_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// AMDGCN-NEXT: [[ORD_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ORD_ADDR]] to ptr -// AMDGCN-NEXT: store i32 [[ORD]], ptr [[ORD_ADDR_ASCAST]], align 4 -// AMDGCN-NEXT: [[TMP0:%.*]] = load i32, ptr [[ORD_ADDR_ASCAST]], align 4 -// AMDGCN-NEXT: switch i32 [[TMP0]], label %[[ATOMIC_SCOPE_CONTINUE:.*]] [ -// AMDGCN-NEXT: i32 1, label %[[ACQUIRE:.*]] -// AMDGCN-NEXT: i32 2, label %[[ACQUIRE]] -// AMDGCN-NEXT: i32 3, label %[[RELEASE:.*]] -// AMDGCN-NEXT: i32 4, label %[[ACQREL:.*]] -// AMDGCN-NEXT: i32 5, label %[[SEQCST:.*]] -// AMDGCN-NEXT: ] -// AMDGCN: [[ATOMIC_SCOPE_CONTINUE]]: -// AMDGCN-NEXT: ret void -// AMDGCN: [[ACQUIRE]]: -// AMDGCN-NEXT: fence syncscope("workgroup") acquire -// AMDGCN-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]] -// AMDGCN: [[RELEASE]]: -// AMDGCN-NEXT: fence syncscope("workgroup") release -// AMDGCN-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]] -// AMDGCN: [[ACQREL]]: -// AMDGCN-NEXT: fence syncscope("workgroup") acq_rel -// AMDGCN-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]] -// AMDGCN: [[SEQCST]]: -// AMDGCN-NEXT: fence syncscope("workgroup") seq_cst -// AMDGCN-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]] +// AMDGCN-CL12-LABEL: define hidden void @fe1b( +// AMDGCN-CL12-SAME: i32 noundef [[ORD:%.*]]) #[[ATTR0]] { +// AMDGCN-CL12-NEXT: [[ENTRY:.*:]] +// AMDGCN-CL12-NEXT: [[ORD_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// AMDGCN-CL12-NEXT: [[ORD_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ORD_ADDR]] to ptr +// AMDGCN-CL12-NEXT: store i32 [[ORD]], ptr [[ORD_ADDR_ASCAST]], align 4 +// AMDGCN-CL12-NEXT: [[TMP0:%.*]] = load i32, ptr [[ORD_ADDR_ASCAST]], align 4 +// AMDGCN-CL12-NEXT: switch i32 [[TMP0]], label %[[ATOMIC_SCOPE_CONTINUE:.*]] [ +// AMDGCN-CL12-NEXT: i32 1, label %[[ACQUIRE:.*]] +// AMDGCN-CL12-NEXT: i32 2, label %[[ACQUIRE]] +// AMDGCN-CL12-NEXT: i32 3, label %[[RELEASE:.*]] +// AMDGCN-CL12-NEXT: i32 4, label %[[ACQREL:.*]] +// AMDGCN-CL12-NEXT: i32 5, label %[[SEQCST:.*]] +// AMDGCN-CL12-NEXT: ] +// AMDGCN-CL12: [[ATOMIC_SCOPE_CONTINUE]]: +// AMDGCN-CL12-NEXT: ret void +// AMDGCN-CL12: [[ACQUIRE]]: +// AMDGCN-CL12-NEXT: fence syncscope("workgroup") acquire +// AMDGCN-CL12-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]] +// AMDGCN-CL12: [[RELEASE]]: +// AMDGCN-CL12-NEXT: fence syncscope("workgroup") release +// AMDGCN-CL12-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]] +// AMDGCN-CL12: [[ACQREL]]: +// AMDGCN-CL12-NEXT: fence syncscope("workgroup") acq_rel +// AMDGCN-CL12-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]] +// AMDGCN-CL12: [[SEQCST]]: +// AMDGCN-CL12-NEXT: fence syncscope("workgroup") seq_cst +// AMDGCN-CL12-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]] +// +// AMDGCN-CL20-LABEL: define hidden void @fe1b( +// AMDGCN-CL20-SAME: i32 noundef [[ORD:%.*]]) #[[ATTR0]] { +// AMDGCN-CL20-NEXT: [[ENTRY:.*:]] +// AMDGCN-CL20-NEXT: [[ORD_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// AMDGCN-CL20-NEXT: store i32 [[ORD]], ptr addrspace(5) [[ORD_ADDR]], align 4 +// AMDGCN-CL20-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[ORD_ADDR]], align 4 +// AMDGCN-CL20-NEXT: switch i32 [[TMP0]], label %[[ATOMIC_SCOPE_CONTINUE:.*]] [ +// AMDGCN-CL20-NEXT: i32 1, label %[[ACQUIRE:.*]] +// AMDGCN-CL20-NEXT: i32 2, label %[[ACQUIRE]] +// AMDGCN-CL20-NEXT: i32 3, label %[[RELEASE:.*]] +// AMDGCN-CL20-NEXT: i32 4, label %[[ACQREL:.*]] +// AMDGCN-CL20-NEXT: i32 5, label %[[SEQCST:.*]] +// AMDGCN-CL20-NEXT: ] +// AMDGCN-CL20: [[ATOMIC_SCOPE_CONTINUE]]: +// AMDGCN-CL20-NEXT: ret void +// AMDGCN-CL20: [[ACQUIRE]]: +// AMDGCN-CL20-NEXT: fence syncscope("workgroup") acquire +// AMDGCN-CL20-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]] +// AMDGCN-CL20: [[RELEASE]]: +// AMDGCN-CL20-NEXT: fence syncscope("workgroup") release +// AMDGCN-CL20-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]] +// AMDGCN-CL20: [[ACQREL]]: +// AMDGCN-CL20-NEXT: fence syncscope("workgroup") acq_rel +// AMDGCN-CL20-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]] +// AMDGCN-CL20: [[SEQCST]]: +// AMDGCN-CL20-NEXT: fence syncscope("workgroup") seq_cst +// AMDGCN-CL20-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]] // // SPIRV-LABEL: define hidden spir_func void @fe1b( // SPIRV-SAME: i32 noundef [[ORD:%.*]]) #[[ATTR0]] { @@ -119,37 +147,68 @@ void fe1b(int ord) { __scoped_atomic_thread_fence(ord, __MEMORY_SCOPE_WRKGRP); } -// AMDGCN-LABEL: define hidden void @fe1c( -// AMDGCN-SAME: i32 noundef [[SCOPE:%.*]]) #[[ATTR0]] { -// AMDGCN-NEXT: [[ENTRY:.*:]] -// AMDGCN-NEXT: [[SCOPE_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// AMDGCN-NEXT: [[SCOPE_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SCOPE_ADDR]] to ptr -// AMDGCN-NEXT: store i32 [[SCOPE]], ptr [[SCOPE_ADDR_ASCAST]], align 4 -// AMDGCN-NEXT: [[TMP0:%.*]] = load i32, ptr [[SCOPE_ADDR_ASCAST]], align 4 -// AMDGCN-NEXT: switch i32 [[TMP0]], label %[[ATOMIC_SCOPE_CONTINUE:.*]] [ -// AMDGCN-NEXT: i32 1, label %[[DEVICE_SCOPE:.*]] -// AMDGCN-NEXT: i32 0, label %[[SYSTEM_SCOPE:.*]] -// AMDGCN-NEXT: i32 2, label %[[WORKGROUP_SCOPE:.*]] -// AMDGCN-NEXT: i32 3, label %[[WAVEFRONT_SCOPE:.*]] -// AMDGCN-NEXT: i32 4, label %[[SINGLE_SCOPE:.*]] -// AMDGCN-NEXT: ] -// AMDGCN: [[ATOMIC_SCOPE_CONTINUE]]: -// AMDGCN-NEXT: ret void -// AMDGCN: [[DEVICE_SCOPE]]: -// AMDGCN-NEXT: fence syncscope("agent") release -// AMDGCN-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]] -// AMDGCN: [[SYSTEM_SCOPE]]: -// AMDGCN-NEXT: fence release -// AMDGCN-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]] -// AMDGCN: [[WORKGROUP_SCOPE]]: -// AMDGCN-NEXT: fence syncscope("workgroup") release -// AMDGCN-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]] -// AMDGCN: [[WAVEFRONT_SCOPE]]: -// AMDGCN-NEXT: fence syncscope("wavefront") release -// AMDGCN-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]] -// AMDGCN: [[SINGLE_SCOPE]]: -// AMDGCN-NEXT: fence syncscope("singlethread") release -// AMDGCN-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]] +// AMDGCN-CL12-LABEL: define hidden void @fe1c( +// AMDGCN-CL12-SAME: i32 noundef [[SCOPE:%.*]]) #[[ATTR0]] { +// AMDGCN-CL12-NEXT: [[ENTRY:.*:]] +// AMDGCN-CL12-NEXT: [[SCOPE_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// AMDGCN-CL12-NEXT: [[SCOPE_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SCOPE_ADDR]] to ptr +// AMDGCN-CL12-NEXT: store i32 [[SCOPE]], ptr [[SCOPE_ADDR_ASCAST]], align 4 +// AMDGCN-CL12-NEXT: [[TMP0:%.*]] = load i32, ptr [[SCOPE_ADDR_ASCAST]], align 4 +// AMDGCN-CL12-NEXT: switch i32 [[TMP0]], label %[[ATOMIC_SCOPE_CONTINUE:.*]] [ +// AMDGCN-CL12-NEXT: i32 1, label %[[DEVICE_SCOPE:.*]] +// AMDGCN-CL12-NEXT: i32 0, label %[[SYSTEM_SCOPE:.*]] +// AMDGCN-CL12-NEXT: i32 2, label %[[WORKGROUP_SCOPE:.*]] +// AMDGCN-CL12-NEXT: i32 3, label %[[WAVEFRONT_SCOPE:.*]] +// AMDGCN-CL12-NEXT: i32 4, label %[[SINGLE_SCOPE:.*]] +// AMDGCN-CL12-NEXT: ] +// AMDGCN-CL12: [[ATOMIC_SCOPE_CONTINUE]]: +// AMDGCN-CL12-NEXT: ret void +// AMDGCN-CL12: [[DEVICE_SCOPE]]: +// AMDGCN-CL12-NEXT: fence syncscope("agent") release +// AMDGCN-CL12-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]] +// AMDGCN-CL12: [[SYSTEM_SCOPE]]: +// AMDGCN-CL12-NEXT: fence release +// AMDGCN-CL12-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]] +// AMDGCN-CL12: [[WORKGROUP_SCOPE]]: +// AMDGCN-CL12-NEXT: fence syncscope("workgroup") release +// AMDGCN-CL12-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]] +// AMDGCN-CL12: [[WAVEFRONT_SCOPE]]: +// AMDGCN-CL12-NEXT: fence syncscope("wavefront") release +// AMDGCN-CL12-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]] +// AMDGCN-CL12: [[SINGLE_SCOPE]]: +// AMDGCN-CL12-NEXT: fence syncscope("singlethread") release +// AMDGCN-CL12-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]] +// +// AMDGCN-CL20-LABEL: define hidden void @fe1c( +// AMDGCN-CL20-SAME: i32 noundef [[SCOPE:%.*]]) #[[ATTR0]] { +// AMDGCN-CL20-NEXT: [[ENTRY:.*:]] +// AMDGCN-CL20-NEXT: [[SCOPE_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// AMDGCN-CL20-NEXT: store i32 [[SCOPE]], ptr addrspace(5) [[SCOPE_ADDR]], align 4 +// AMDGCN-CL20-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[SCOPE_ADDR]], align 4 +// AMDGCN-CL20-NEXT: switch i32 [[TMP0]], label %[[ATOMIC_SCOPE_CONTINUE:.*]] [ +// AMDGCN-CL20-NEXT: i32 1, label %[[DEVICE_SCOPE:.*]] +// AMDGCN-CL20-NEXT: i32 0, label %[[SYSTEM_SCOPE:.*]] +// AMDGCN-CL20-NEXT: i32 2, label %[[WORKGROUP_SCOPE:.*]] +// AMDGCN-CL20-NEXT: i32 3, label %[[WAVEFRONT_SCOPE:.*]] +// AMDGCN-CL20-NEXT: i32 4, label %[[SINGLE_SCOPE:.*]] +// AMDGCN-CL20-NEXT: ] +// AMDGCN-CL20: [[ATOMIC_SCOPE_CONTINUE]]: +// AMDGCN-CL20-NEXT: ret void +// AMDGCN-CL20: [[DEVICE_SCOPE]]: +// AMDGCN-CL20-NEXT: fence syncscope("agent") release +// AMDGCN-CL20-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]] +// AMDGCN-CL20: [[SYSTEM_SCOPE]]: +// AMDGCN-CL20-NEXT: fence release +// AMDGCN-CL20-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]] +// AMDGCN-CL20: [[WORKGROUP_SCOPE]]: +// AMDGCN-CL20-NEXT: fence syncscope("workgroup") release +// AMDGCN-CL20-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]] +// AMDGCN-CL20: [[WAVEFRONT_SCOPE]]: +// AMDGCN-CL20-NEXT: fence syncscope("wavefront") release +// AMDGCN-CL20-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]] +// AMDGCN-CL20: [[SINGLE_SCOPE]]: +// AMDGCN-CL20-NEXT: fence syncscope("singlethread") release +// AMDGCN-CL20-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]] // // SPIRV-LABEL: define hidden spir_func void @fe1c( // SPIRV-SAME: i32 noundef [[SCOPE:%.*]]) #[[ATTR0]] { diff --git a/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl b/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl index 57d056b0ff9d5..7377b5bcbc347 100644 --- a/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl +++ b/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl @@ -69,11 +69,9 @@ struct LargeStructOneMember g_s; // AMDGCN20-NEXT: [[ENTRY:.*:]] // AMDGCN20-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4, addrspace(5) // AMDGCN20-NEXT: [[IN:%.*]] = alloca [[STRUCT_MAT3X3:%.*]], align 4, addrspace(5) -// AMDGCN20-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr -// AMDGCN20-NEXT: [[IN1:%.*]] = addrspacecast ptr addrspace(5) [[IN]] to ptr -// AMDGCN20-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr [[IN1]], i32 0, i32 0 -// AMDGCN20-NEXT: store [9 x i32] [[IN_COERCE]], ptr [[COERCE_DIVE]], align 4 -// AMDGCN20-NEXT: [[TMP0:%.*]] = load [[STRUCT_MAT4X4]], ptr [[RETVAL_ASCAST]], align 4 +// AMDGCN20-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr addrspace(5) [[IN]], i32 0, i32 0 +// AMDGCN20-NEXT: store [9 x i32] [[IN_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 4 +// AMDGCN20-NEXT: [[TMP0:%.*]] = load [[STRUCT_MAT4X4]], ptr addrspace(5) [[RETVAL]], align 4 // AMDGCN20-NEXT: ret [[STRUCT_MAT4X4]] [[TMP0]] // // SPIR-LABEL: define dso_local spir_func void @foo( @@ -152,22 +150,19 @@ Mat4X4 __attribute__((noinline)) foo(Mat3X3 in) { // AMDGCN20-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) // AMDGCN20-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) // AMDGCN20-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4, addrspace(5) -// AMDGCN20-NEXT: [[IN_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[IN_ADDR]] to ptr -// AMDGCN20-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr -// AMDGCN20-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr -// AMDGCN20-NEXT: store ptr addrspace(1) [[IN]], ptr [[IN_ADDR_ASCAST]], align 8 -// AMDGCN20-NEXT: store ptr addrspace(1) [[OUT]], ptr [[OUT_ADDR_ASCAST]], align 8 -// AMDGCN20-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8 +// AMDGCN20-NEXT: store ptr addrspace(1) [[IN]], ptr addrspace(5) [[IN_ADDR]], align 8 +// AMDGCN20-NEXT: store ptr addrspace(1) [[OUT]], ptr addrspace(5) [[OUT_ADDR]], align 8 +// AMDGCN20-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8 // AMDGCN20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT4X4]], ptr addrspace(1) [[TMP0]], i64 0 -// AMDGCN20-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR_ASCAST]], align 8 +// AMDGCN20-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[IN_ADDR]], align 8 // AMDGCN20-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT3X3:%.*]], ptr addrspace(1) [[TMP1]], i64 1 // AMDGCN20-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr addrspace(1) [[ARRAYIDX1]], i32 0, i32 0 // AMDGCN20-NEXT: [[TMP3:%.*]] = load [9 x i32], ptr addrspace(1) [[TMP2]], align 4 // AMDGCN20-NEXT: [[CALL:%.*]] = call [[STRUCT_MAT4X4]] @[[FOO:[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]]([9 x i32] [[TMP3]]) #[[ATTR3:[0-9]+]] -// AMDGCN20-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT4X4]], ptr [[TMP_ASCAST]], i32 0, i32 0 +// AMDGCN20-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT4X4]], ptr addrspace(5) [[TMP]], i32 0, i32 0 // AMDGCN20-NEXT: [[TMP5:%.*]] = extractvalue [[STRUCT_MAT4X4]] [[CALL]], 0 -// AMDGCN20-NEXT: store [16 x i32] [[TMP5]], ptr [[TMP4]], align 4 -// AMDGCN20-NEXT: call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr align 4 [[TMP_ASCAST]], i64 64, i1 false) +// AMDGCN20-NEXT: store [16 x i32] [[TMP5]], ptr addrspace(5) [[TMP4]], align 4 +// AMDGCN20-NEXT: call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr addrspace(5) align 4 [[TMP]], i64 64, i1 false) // AMDGCN20-NEXT: ret void // // SPIR-LABEL: define dso_local spir_kernel void @ker( @@ -250,11 +245,10 @@ kernel void ker(global Mat3X3 *in, global Mat4X4 *out) { // AMDGCN-NEXT: ret void // // AMDGCN20-LABEL: define dso_local void @foo_large( -// AMDGCN20-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_MAT64X64:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] { +// AMDGCN20-SAME: ptr addrspace(5) dead_on_unwind noalias writable sret([[STRUCT_MAT64X64:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] { // AMDGCN20-NEXT: [[ENTRY:.*:]] -// AMDGCN20-NEXT: [[COERCE:%.*]] = alloca [[STRUCT_MAT32X32]], align 4, addrspace(5) -// AMDGCN20-NEXT: [[IN:%.*]] = addrspacecast ptr addrspace(5) [[COERCE]] to ptr -// AMDGCN20-NEXT: call void @llvm.memcpy.p0.p5.i64(ptr align 4 [[IN]], ptr addrspace(5) align 4 [[TMP0]], i64 4096, i1 false) +// AMDGCN20-NEXT: [[IN:%.*]] = alloca [[STRUCT_MAT32X32]], align 4, addrspace(5) +// AMDGCN20-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 4 [[IN]], ptr addrspace(5) align 4 [[TMP0]], i64 4096, i1 false) // AMDGCN20-NEXT: ret void // // SPIR-LABEL: define dso_local spir_func void @foo_large( @@ -325,18 +319,15 @@ Mat64X64 __attribute__((noinline)) foo_large(Mat32X32 in) { // AMDGCN20-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) // AMDGCN20-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT64X64:%.*]], align 4, addrspace(5) // AMDGCN20-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_MAT32X32:%.*]], align 4, addrspace(5) -// AMDGCN20-NEXT: [[IN_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[IN_ADDR]] to ptr -// AMDGCN20-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr -// AMDGCN20-NEXT: [[TM... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/127771 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits