Author: Matt Arsenault Date: 2026-03-12T07:28:39+01:00 New Revision: 7cb3005ba22899b8ab7efe4fe43cba09cb7e12d4
URL: https://github.com/llvm/llvm-project/commit/7cb3005ba22899b8ab7efe4fe43cba09cb7e12d4 DIFF: https://github.com/llvm/llvm-project/commit/7cb3005ba22899b8ab7efe4fe43cba09cb7e12d4.diff LOG: AMDGPU: Add dereferenceable attribute to dispatch ptr intrinsic (#185955) Stop manually setting it on the callsite in clang. Added: Modified: clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp clang/test/CodeGen/amdgpu-abi-version.c clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu clang/test/CodeGenCUDA/builtins-amdgcn.cu clang/test/CodeGenCUDA/builtins-spirv-amdgcn.cu clang/test/CodeGenOpenCL/builtins-amdgcn-workgroup-size.cl clang/test/CodeGenOpenCL/builtins-amdgcn.cl clang/test/Headers/gpuintrin.c llvm/include/llvm/IR/IntrinsicsAMDGPU.td llvm/test/Assembler/amdgcn-intrinsic-attributes.ll llvm/test/CodeGen/AMDGPU/implicit-arg-block-count.ll Removed: ################################################################################ diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp index 4258bfeea1c35..0d572d37ab972 100644 --- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp @@ -56,9 +56,6 @@ Value *EmitAMDGPUDispatchPtr(CodeGenFunction &CGF, const CallExpr *E = nullptr) { auto *F = CGF.CGM.getIntrinsic(Intrinsic::amdgcn_dispatch_ptr); auto *Call = CGF.Builder.CreateCall(F); - Call->addRetAttr( - Attribute::getWithDereferenceableBytes(Call->getContext(), 64)); - Call->addRetAttr(Attribute::getWithAlignment(Call->getContext(), Align(4))); if (!E) return Call; QualType BuiltinRetType = E->getType(); diff --git a/clang/test/CodeGen/amdgpu-abi-version.c b/clang/test/CodeGen/amdgpu-abi-version.c index 2cfab3e8e3e0c..ae67aa405f4bc 100644 --- a/clang/test/CodeGen/amdgpu-abi-version.c +++ b/clang/test/CodeGen/amdgpu-abi-version.c @@ -19,7 +19,7 @@ // LLVM-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[TMP2]], i32 [[TMP7]] // LLVM-NEXT: [[TMP9:%.*]] = load i16, ptr addrspace(4) [[TMP8]], align 2, !range [[RNG2:![0-9]+]], !invariant.load [[META1]], !noundef [[META1]] // LLVM-NEXT: [[TMP10:%.*]] = zext i16 [[TMP9]] to i32 -// LLVM-NEXT: [[TMP11:%.*]] = call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() +// LLVM-NEXT: [[TMP11:%.*]] = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() // LLVM-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[TMP11]], i64 4 // LLVM-NEXT: [[TMP13:%.*]] = load i16, ptr addrspace(4) [[TMP12]], align 2, !range [[RNG2]], !invariant.load [[META1]], !noundef [[META1]] // LLVM-NEXT: [[TMP14:%.*]] = zext i16 [[TMP13]] to i32 diff --git a/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu b/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu index b7f597b989242..782728c1e0ae0 100644 --- a/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu +++ b/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu @@ -31,7 +31,7 @@ // PRECOV5-NEXT: i32 2, label %[[SW_BB2:.*]] // PRECOV5-NEXT: ] // PRECOV5: [[SW_BB]]: -// PRECOV5-NEXT: [[TMP1:%.*]] = call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() +// PRECOV5-NEXT: [[TMP1:%.*]] = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() // PRECOV5-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[TMP1]], i64 4 // PRECOV5-NEXT: [[TMP3:%.*]] = load i16, ptr addrspace(4) [[TMP2]], align 2, !range [[RNG3:![0-9]+]], !invariant.load [[META4:![0-9]+]], !noundef [[META4]] // PRECOV5-NEXT: [[TMP4:%.*]] = zext i16 [[TMP3]] to i32 @@ -39,7 +39,7 @@ // PRECOV5-NEXT: store i32 [[TMP4]], ptr [[TMP5]], align 4 // PRECOV5-NEXT: br label %[[SW_EPILOG:.*]] // PRECOV5: [[SW_BB1]]: -// PRECOV5-NEXT: [[TMP6:%.*]] = call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() +// PRECOV5-NEXT: [[TMP6:%.*]] = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() // PRECOV5-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[TMP6]], i64 6 // PRECOV5-NEXT: [[TMP8:%.*]] = load i16, ptr addrspace(4) [[TMP7]], align 2, !range [[RNG3]], !invariant.load [[META4]], !noundef [[META4]] // PRECOV5-NEXT: [[TMP9:%.*]] = zext i16 [[TMP8]] to i32 @@ -47,7 +47,7 @@ // PRECOV5-NEXT: store i32 [[TMP9]], ptr [[TMP10]], align 4 // PRECOV5-NEXT: br label %[[SW_EPILOG]] // PRECOV5: [[SW_BB2]]: -// PRECOV5-NEXT: [[TMP11:%.*]] = call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() +// PRECOV5-NEXT: [[TMP11:%.*]] = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() // PRECOV5-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[TMP11]], i64 8 // PRECOV5-NEXT: [[TMP13:%.*]] = load i16, ptr addrspace(4) [[TMP12]], align 2, !range [[RNG3]], !invariant.load [[META4]], !noundef [[META4]] // PRECOV5-NEXT: [[TMP14:%.*]] = zext i16 [[TMP13]] to i32 diff --git a/clang/test/CodeGenCUDA/builtins-amdgcn.cu b/clang/test/CodeGenCUDA/builtins-amdgcn.cu index 4bf23e529c7a5..7edf64db91f2e 100644 --- a/clang/test/CodeGenCUDA/builtins-amdgcn.cu +++ b/clang/test/CodeGenCUDA/builtins-amdgcn.cu @@ -20,7 +20,7 @@ // CHECK-NEXT: store ptr addrspace(1) [[OUT_COERCE:%.*]], ptr [[OUT_ASCAST]], align 8 // CHECK-NEXT: [[OUT1:%.*]] = load ptr, ptr [[OUT_ASCAST]], align 8 // CHECK-NEXT: store ptr [[OUT1]], ptr [[OUT_ADDR_ASCAST]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() +// CHECK-NEXT: [[TMP0:%.*]] = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() // CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(4) [[TMP0]] to ptr // CHECK-NEXT: store ptr [[TMP1]], ptr [[DISPATCH_PTR_ASCAST]], align 8 // CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DISPATCH_PTR_ASCAST]], align 8 @@ -150,7 +150,7 @@ __global__ void test_ds_fmin(float src, float *shared) { // CHECK-NEXT: entry: // CHECK-NEXT: [[X:%.*]] = alloca ptr, align 8, addrspace(5) // CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X]] to ptr -// CHECK-NEXT: [[TMP0:%.*]] = call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() +// CHECK-NEXT: [[TMP0:%.*]] = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() // CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(4) [[TMP0]] to ptr // CHECK-NEXT: store ptr [[TMP1]], ptr [[X_ASCAST]], align 8 // CHECK-NEXT: ret void @@ -241,7 +241,7 @@ __device__ void func(float *x); // CHECK-NEXT: [[TMP3:%.*]] = atomicrmw fmin ptr addrspace(3) [[TMP1]], float [[TMP2]] monotonic, align 4 // CHECK-NEXT: store volatile float [[TMP3]], ptr [[X_ASCAST]], align 4 // CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[SHARED_ADDR_ASCAST]], align 8 -// CHECK-NEXT: call void @_Z4funcPf(ptr noundef [[TMP4]]) #[[ATTR7:[0-9]+]] +// CHECK-NEXT: call void @_Z4funcPf(ptr noundef [[TMP4]]) #[[ATTR8:[0-9]+]] // CHECK-NEXT: ret void // __global__ void test_ds_fmin_func(float src, float *__restrict shared) { diff --git a/clang/test/CodeGenCUDA/builtins-spirv-amdgcn.cu b/clang/test/CodeGenCUDA/builtins-spirv-amdgcn.cu index 1cbe358910b85..677fcd761760d 100644 --- a/clang/test/CodeGenCUDA/builtins-spirv-amdgcn.cu +++ b/clang/test/CodeGenCUDA/builtins-spirv-amdgcn.cu @@ -20,7 +20,7 @@ // CHECK-NEXT: store ptr addrspace(1) [[OUT_COERCE:%.*]], ptr addrspace(4) [[OUT_ASCAST]], align 8 // CHECK-NEXT: [[OUT1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[OUT_ASCAST]], align 8 // CHECK-NEXT: store ptr addrspace(4) [[OUT1]], ptr addrspace(4) [[OUT_ADDR_ASCAST]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = call align 4 dereferenceable(64) addrspace(4) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() +// CHECK-NEXT: [[TMP0:%.*]] = call addrspace(4) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() // CHECK-NEXT: store ptr addrspace(4) [[TMP0]], ptr addrspace(4) [[DISPATCH_PTR_ASCAST]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[DISPATCH_PTR_ASCAST]], align 8 // CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TMP1]], align 4 @@ -232,7 +232,7 @@ __device__ void func(float *x); // CHECK-NEXT: [[TMP3:%.*]] = atomicrmw fmin ptr addrspace(3) [[TMP1]], float [[TMP2]] monotonic, align 4 // CHECK-NEXT: store volatile float [[TMP3]], ptr addrspace(4) [[X_ASCAST]], align 4 // CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[SHARED_ADDR_ASCAST]], align 8 -// CHECK-NEXT: call spir_func addrspace(4) void @_Z4funcPf(ptr addrspace(4) noundef [[TMP4]]) #[[ATTR6:[0-9]+]] +// CHECK-NEXT: call spir_func addrspace(4) void @_Z4funcPf(ptr addrspace(4) noundef [[TMP4]]) #[[ATTR7:[0-9]+]] // CHECK-NEXT: ret void // __global__ void test_ds_fmin_func(float src, float *__restrict shared) { diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-workgroup-size.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-workgroup-size.cl index 9b4cdfa08176f..4e64f1127a912 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-workgroup-size.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-workgroup-size.cl @@ -33,7 +33,7 @@ // NONUNIFORM-V4-LABEL: define dso_local range(i32 0, 1025) i32 @test_get_workgroup_size_x( // NONUNIFORM-V4-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] { // NONUNIFORM-V4-NEXT: [[ENTRY:.*:]] -// NONUNIFORM-V4-NEXT: [[TMP0:%.*]] = tail call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() +// NONUNIFORM-V4-NEXT: [[TMP0:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() // NONUNIFORM-V4-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP0]], i64 4 // NONUNIFORM-V4-NEXT: [[TMP2:%.*]] = load i16, ptr addrspace(4) [[TMP1]], align 4, !range [[RNG7:![0-9]+]], !invariant.load [[META8:![0-9]+]], !noundef [[META8]] // NONUNIFORM-V4-NEXT: [[TMP3:%.*]] = zext nneg i16 [[TMP2]] to i32 @@ -48,7 +48,7 @@ // UNIFORM-V4-LABEL: define dso_local range(i32 1, 1025) i32 @test_get_workgroup_size_x( // UNIFORM-V4-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] { // UNIFORM-V4-NEXT: [[ENTRY:.*:]] -// UNIFORM-V4-NEXT: [[TMP0:%.*]] = tail call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() +// UNIFORM-V4-NEXT: [[TMP0:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() // UNIFORM-V4-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP0]], i64 4 // UNIFORM-V4-NEXT: [[TMP2:%.*]] = load i16, ptr addrspace(4) [[TMP1]], align 4, !range [[RNG7:![0-9]+]], !invariant.load [[META8:![0-9]+]], !noundef [[META8]] // UNIFORM-V4-NEXT: [[TMP3:%.*]] = zext nneg i16 [[TMP2]] to i32 @@ -67,7 +67,7 @@ // NONUNIFORM-UNKNOWN-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP2]], i64 [[TMP6]] // NONUNIFORM-UNKNOWN-NEXT: [[TMP8:%.*]] = load i16, ptr addrspace(4) [[TMP7]], align 2, !range [[RNG7:![0-9]+]], !invariant.load [[META6]], !noundef [[META6]] // NONUNIFORM-UNKNOWN-NEXT: [[TMP9:%.*]] = zext nneg i16 [[TMP8]] to i32 -// NONUNIFORM-UNKNOWN-NEXT: [[TMP10:%.*]] = tail call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() +// NONUNIFORM-UNKNOWN-NEXT: [[TMP10:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() // NONUNIFORM-UNKNOWN-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP10]], i64 4 // NONUNIFORM-UNKNOWN-NEXT: [[TMP12:%.*]] = load i16, ptr addrspace(4) [[TMP11]], align 4, !range [[RNG7]], !invariant.load [[META6]], !noundef [[META6]] // NONUNIFORM-UNKNOWN-NEXT: [[TMP13:%.*]] = zext nneg i16 [[TMP12]] to i32 @@ -87,7 +87,7 @@ // UNIFORM-UNKNOWN-NEXT: [[TMP2:%.*]] = tail call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() // UNIFORM-UNKNOWN-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP2]], i64 12 // UNIFORM-UNKNOWN-NEXT: [[TMP4:%.*]] = load i16, ptr addrspace(4) [[TMP3]], align 4, !range [[RNG6:![0-9]+]], !invariant.load [[META7:![0-9]+]], !noundef [[META7]] -// UNIFORM-UNKNOWN-NEXT: [[TMP5:%.*]] = tail call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() +// UNIFORM-UNKNOWN-NEXT: [[TMP5:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() // UNIFORM-UNKNOWN-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP5]], i64 4 // UNIFORM-UNKNOWN-NEXT: [[TMP7:%.*]] = load i16, ptr addrspace(4) [[TMP6]], align 4, !range [[RNG6]], !invariant.load [[META7]], !noundef [[META7]] // UNIFORM-UNKNOWN-NEXT: [[DOTV:%.*]] = select i1 [[TMP1]], i16 [[TMP4]], i16 [[TMP7]] @@ -125,7 +125,7 @@ unsigned int test_get_workgroup_size_x() // NONUNIFORM-V4-LABEL: define dso_local range(i32 0, 1025) i32 @test_get_workgroup_size_y( // NONUNIFORM-V4-SAME: ) local_unnamed_addr #[[ATTR2:[0-9]+]] { // NONUNIFORM-V4-NEXT: [[ENTRY:.*:]] -// NONUNIFORM-V4-NEXT: [[TMP0:%.*]] = tail call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() +// NONUNIFORM-V4-NEXT: [[TMP0:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() // NONUNIFORM-V4-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP0]], i64 6 // NONUNIFORM-V4-NEXT: [[TMP2:%.*]] = load i16, ptr addrspace(4) [[TMP1]], align 2, !range [[RNG7]], !invariant.load [[META8]], !noundef [[META8]] // NONUNIFORM-V4-NEXT: [[TMP3:%.*]] = zext nneg i16 [[TMP2]] to i32 @@ -140,7 +140,7 @@ unsigned int test_get_workgroup_size_x() // UNIFORM-V4-LABEL: define dso_local range(i32 1, 1025) i32 @test_get_workgroup_size_y( // UNIFORM-V4-SAME: ) local_unnamed_addr #[[ATTR0]] { // UNIFORM-V4-NEXT: [[ENTRY:.*:]] -// UNIFORM-V4-NEXT: [[TMP0:%.*]] = tail call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() +// UNIFORM-V4-NEXT: [[TMP0:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() // UNIFORM-V4-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP0]], i64 6 // UNIFORM-V4-NEXT: [[TMP2:%.*]] = load i16, ptr addrspace(4) [[TMP1]], align 2, !range [[RNG7]], !invariant.load [[META8]], !noundef [[META8]] // UNIFORM-V4-NEXT: [[TMP3:%.*]] = zext nneg i16 [[TMP2]] to i32 @@ -160,7 +160,7 @@ unsigned int test_get_workgroup_size_x() // NONUNIFORM-UNKNOWN-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP2]], i64 [[TMP7]] // NONUNIFORM-UNKNOWN-NEXT: [[TMP9:%.*]] = load i16, ptr addrspace(4) [[TMP8]], align 2, !range [[RNG7]], !invariant.load [[META6]], !noundef [[META6]] // NONUNIFORM-UNKNOWN-NEXT: [[TMP10:%.*]] = zext nneg i16 [[TMP9]] to i32 -// NONUNIFORM-UNKNOWN-NEXT: [[TMP11:%.*]] = tail call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() +// NONUNIFORM-UNKNOWN-NEXT: [[TMP11:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() // NONUNIFORM-UNKNOWN-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP11]], i64 6 // NONUNIFORM-UNKNOWN-NEXT: [[TMP13:%.*]] = load i16, ptr addrspace(4) [[TMP12]], align 2, !range [[RNG7]], !invariant.load [[META6]], !noundef [[META6]] // NONUNIFORM-UNKNOWN-NEXT: [[TMP14:%.*]] = zext nneg i16 [[TMP13]] to i32 @@ -180,7 +180,7 @@ unsigned int test_get_workgroup_size_x() // UNIFORM-UNKNOWN-NEXT: [[TMP2:%.*]] = tail call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() // UNIFORM-UNKNOWN-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP2]], i64 14 // UNIFORM-UNKNOWN-NEXT: [[TMP4:%.*]] = load i16, ptr addrspace(4) [[TMP3]], align 2, !range [[RNG6]], !invariant.load [[META7]], !noundef [[META7]] -// UNIFORM-UNKNOWN-NEXT: [[TMP5:%.*]] = tail call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() +// UNIFORM-UNKNOWN-NEXT: [[TMP5:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() // UNIFORM-UNKNOWN-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP5]], i64 6 // UNIFORM-UNKNOWN-NEXT: [[TMP7:%.*]] = load i16, ptr addrspace(4) [[TMP6]], align 2, !range [[RNG6]], !invariant.load [[META7]], !noundef [[META7]] // UNIFORM-UNKNOWN-NEXT: [[DOTV:%.*]] = select i1 [[TMP1]], i16 [[TMP4]], i16 [[TMP7]] @@ -218,7 +218,7 @@ unsigned int test_get_workgroup_size_y() // NONUNIFORM-V4-LABEL: define dso_local range(i32 0, 1025) i32 @test_get_workgroup_size_z( // NONUNIFORM-V4-SAME: ) local_unnamed_addr #[[ATTR3:[0-9]+]] { // NONUNIFORM-V4-NEXT: [[ENTRY:.*:]] -// NONUNIFORM-V4-NEXT: [[TMP0:%.*]] = tail call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() +// NONUNIFORM-V4-NEXT: [[TMP0:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() // NONUNIFORM-V4-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP0]], i64 8 // NONUNIFORM-V4-NEXT: [[TMP2:%.*]] = load i16, ptr addrspace(4) [[TMP1]], align 4, !range [[RNG7]], !invariant.load [[META8]], !noundef [[META8]] // NONUNIFORM-V4-NEXT: [[TMP3:%.*]] = zext nneg i16 [[TMP2]] to i32 @@ -233,7 +233,7 @@ unsigned int test_get_workgroup_size_y() // UNIFORM-V4-LABEL: define dso_local range(i32 1, 1025) i32 @test_get_workgroup_size_z( // UNIFORM-V4-SAME: ) local_unnamed_addr #[[ATTR0]] { // UNIFORM-V4-NEXT: [[ENTRY:.*:]] -// UNIFORM-V4-NEXT: [[TMP0:%.*]] = tail call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() +// UNIFORM-V4-NEXT: [[TMP0:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() // UNIFORM-V4-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP0]], i64 8 // UNIFORM-V4-NEXT: [[TMP2:%.*]] = load i16, ptr addrspace(4) [[TMP1]], align 4, !range [[RNG7]], !invariant.load [[META8]], !noundef [[META8]] // UNIFORM-V4-NEXT: [[TMP3:%.*]] = zext nneg i16 [[TMP2]] to i32 @@ -253,7 +253,7 @@ unsigned int test_get_workgroup_size_y() // NONUNIFORM-UNKNOWN-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP2]], i64 [[TMP7]] // NONUNIFORM-UNKNOWN-NEXT: [[TMP9:%.*]] = load i16, ptr addrspace(4) [[TMP8]], align 2, !range [[RNG7]], !invariant.load [[META6]], !noundef [[META6]] // NONUNIFORM-UNKNOWN-NEXT: [[TMP10:%.*]] = zext nneg i16 [[TMP9]] to i32 -// NONUNIFORM-UNKNOWN-NEXT: [[TMP11:%.*]] = tail call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() +// NONUNIFORM-UNKNOWN-NEXT: [[TMP11:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() // NONUNIFORM-UNKNOWN-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP11]], i64 8 // NONUNIFORM-UNKNOWN-NEXT: [[TMP13:%.*]] = load i16, ptr addrspace(4) [[TMP12]], align 4, !range [[RNG7]], !invariant.load [[META6]], !noundef [[META6]] // NONUNIFORM-UNKNOWN-NEXT: [[TMP14:%.*]] = zext nneg i16 [[TMP13]] to i32 @@ -273,7 +273,7 @@ unsigned int test_get_workgroup_size_y() // UNIFORM-UNKNOWN-NEXT: [[TMP2:%.*]] = tail call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() // UNIFORM-UNKNOWN-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP2]], i64 16 // UNIFORM-UNKNOWN-NEXT: [[TMP4:%.*]] = load i16, ptr addrspace(4) [[TMP3]], align 8, !range [[RNG6]], !invariant.load [[META7]], !noundef [[META7]] -// UNIFORM-UNKNOWN-NEXT: [[TMP5:%.*]] = tail call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() +// UNIFORM-UNKNOWN-NEXT: [[TMP5:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() // UNIFORM-UNKNOWN-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP5]], i64 8 // UNIFORM-UNKNOWN-NEXT: [[TMP7:%.*]] = load i16, ptr addrspace(4) [[TMP6]], align 4, !range [[RNG6]], !invariant.load [[META7]], !noundef [[META7]] // UNIFORM-UNKNOWN-NEXT: [[DOTV:%.*]] = select i1 [[TMP1]], i16 [[TMP4]], i16 [[TMP7]] @@ -368,7 +368,7 @@ unsigned int test_get_workgroup_size_z() // NONUNIFORM-V4-NEXT: i32 2, label %[[SW_BB2:.*]] // NONUNIFORM-V4-NEXT: ] // NONUNIFORM-V4: [[SW_BB]]: -// NONUNIFORM-V4-NEXT: [[TMP0:%.*]] = tail call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() +// NONUNIFORM-V4-NEXT: [[TMP0:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() // NONUNIFORM-V4-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP0]], i64 4 // NONUNIFORM-V4-NEXT: [[TMP2:%.*]] = load i16, ptr addrspace(4) [[TMP1]], align 4, !range [[RNG7]], !invariant.load [[META8]], !noundef [[META8]] // NONUNIFORM-V4-NEXT: [[TMP3:%.*]] = zext nneg i16 [[TMP2]] to i32 @@ -381,7 +381,7 @@ unsigned int test_get_workgroup_size_z() // NONUNIFORM-V4-NEXT: [[ADD:%.*]] = add nuw nsw i32 [[TMP9]], 1 // NONUNIFORM-V4-NEXT: br label %[[SW_EPILOG]] // NONUNIFORM-V4: [[SW_BB1]]: -// NONUNIFORM-V4-NEXT: [[TMP10:%.*]] = tail call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() +// NONUNIFORM-V4-NEXT: [[TMP10:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() // NONUNIFORM-V4-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP10]], i64 6 // NONUNIFORM-V4-NEXT: [[TMP12:%.*]] = load i16, ptr addrspace(4) [[TMP11]], align 2, !range [[RNG7]], !invariant.load [[META8]], !noundef [[META8]] // NONUNIFORM-V4-NEXT: [[TMP13:%.*]] = zext nneg i16 [[TMP12]] to i32 @@ -393,7 +393,7 @@ unsigned int test_get_workgroup_size_z() // NONUNIFORM-V4-NEXT: [[TMP19:%.*]] = tail call i32 @llvm.umin.i32(i32 [[TMP18]], i32 [[TMP13]]) // NONUNIFORM-V4-NEXT: br label %[[SW_EPILOG]] // NONUNIFORM-V4: [[SW_BB2]]: -// NONUNIFORM-V4-NEXT: [[TMP20:%.*]] = tail call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() +// NONUNIFORM-V4-NEXT: [[TMP20:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() // NONUNIFORM-V4-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP20]], i64 8 // NONUNIFORM-V4-NEXT: [[TMP22:%.*]] = load i16, ptr addrspace(4) [[TMP21]], align 4, !range [[RNG7]], !invariant.load [[META8]], !noundef [[META8]] // NONUNIFORM-V4-NEXT: [[TMP23:%.*]] = zext nneg i16 [[TMP22]] to i32 @@ -418,18 +418,18 @@ unsigned int test_get_workgroup_size_z() // UNIFORM-V4-NEXT: i32 2, label %[[SW_BB2:.*]] // UNIFORM-V4-NEXT: ] // UNIFORM-V4: [[SW_BB]]: -// UNIFORM-V4-NEXT: [[TMP0:%.*]] = tail call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() +// UNIFORM-V4-NEXT: [[TMP0:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() // UNIFORM-V4-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP0]], i64 4 // UNIFORM-V4-NEXT: [[TMP2:%.*]] = load i16, ptr addrspace(4) [[TMP1]], align 4, !range [[RNG7]], !invariant.load [[META8]], !noundef [[META8]] // UNIFORM-V4-NEXT: [[NARROW:%.*]] = add nuw nsw i16 [[TMP2]], 1 // UNIFORM-V4-NEXT: br label %[[SW_EPILOG]] // UNIFORM-V4: [[SW_BB1]]: -// UNIFORM-V4-NEXT: [[TMP3:%.*]] = tail call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() +// UNIFORM-V4-NEXT: [[TMP3:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() // UNIFORM-V4-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP3]], i64 6 // UNIFORM-V4-NEXT: [[TMP5:%.*]] = load i16, ptr addrspace(4) [[TMP4]], align 2, !range [[RNG7]], !invariant.load [[META8]], !noundef [[META8]] // UNIFORM-V4-NEXT: br label %[[SW_EPILOG]] // UNIFORM-V4: [[SW_BB2]]: -// UNIFORM-V4-NEXT: [[TMP6:%.*]] = tail call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() +// UNIFORM-V4-NEXT: [[TMP6:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() // UNIFORM-V4-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP6]], i64 8 // UNIFORM-V4-NEXT: [[TMP8:%.*]] = load i16, ptr addrspace(4) [[TMP7]], align 4, !range [[RNG7]], !invariant.load [[META8]], !noundef [[META8]] // UNIFORM-V4-NEXT: br label %[[SW_EPILOG]] @@ -458,7 +458,7 @@ unsigned int test_get_workgroup_size_z() // NONUNIFORM-UNKNOWN-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP2]], i64 [[TMP6]] // NONUNIFORM-UNKNOWN-NEXT: [[TMP8:%.*]] = load i16, ptr addrspace(4) [[TMP7]], align 2, !range [[RNG7]], !invariant.load [[META6]], !noundef [[META6]] // NONUNIFORM-UNKNOWN-NEXT: [[TMP9:%.*]] = zext nneg i16 [[TMP8]] to i32 -// NONUNIFORM-UNKNOWN-NEXT: [[TMP10:%.*]] = tail call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() +// NONUNIFORM-UNKNOWN-NEXT: [[TMP10:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() // NONUNIFORM-UNKNOWN-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP10]], i64 4 // NONUNIFORM-UNKNOWN-NEXT: [[TMP12:%.*]] = load i16, ptr addrspace(4) [[TMP11]], align 4, !range [[RNG7]], !invariant.load [[META6]], !noundef [[META6]] // NONUNIFORM-UNKNOWN-NEXT: [[TMP13:%.*]] = zext nneg i16 [[TMP12]] to i32 @@ -482,7 +482,7 @@ unsigned int test_get_workgroup_size_z() // NONUNIFORM-UNKNOWN-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP22]], i64 [[TMP27]] // NONUNIFORM-UNKNOWN-NEXT: [[TMP29:%.*]] = load i16, ptr addrspace(4) [[TMP28]], align 2, !range [[RNG7]], !invariant.load [[META6]], !noundef [[META6]] // NONUNIFORM-UNKNOWN-NEXT: [[TMP30:%.*]] = zext nneg i16 [[TMP29]] to i32 -// NONUNIFORM-UNKNOWN-NEXT: [[TMP31:%.*]] = tail call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() +// NONUNIFORM-UNKNOWN-NEXT: [[TMP31:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() // NONUNIFORM-UNKNOWN-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP31]], i64 6 // NONUNIFORM-UNKNOWN-NEXT: [[TMP33:%.*]] = load i16, ptr addrspace(4) [[TMP32]], align 2, !range [[RNG7]], !invariant.load [[META6]], !noundef [[META6]] // NONUNIFORM-UNKNOWN-NEXT: [[TMP34:%.*]] = zext nneg i16 [[TMP33]] to i32 @@ -505,7 +505,7 @@ unsigned int test_get_workgroup_size_z() // NONUNIFORM-UNKNOWN-NEXT: [[TMP49:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP43]], i64 [[TMP48]] // NONUNIFORM-UNKNOWN-NEXT: [[TMP50:%.*]] = load i16, ptr addrspace(4) [[TMP49]], align 2, !range [[RNG7]], !invariant.load [[META6]], !noundef [[META6]] // NONUNIFORM-UNKNOWN-NEXT: [[TMP51:%.*]] = zext nneg i16 [[TMP50]] to i32 -// NONUNIFORM-UNKNOWN-NEXT: [[TMP52:%.*]] = tail call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() +// NONUNIFORM-UNKNOWN-NEXT: [[TMP52:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() // NONUNIFORM-UNKNOWN-NEXT: [[TMP53:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP52]], i64 8 // NONUNIFORM-UNKNOWN-NEXT: [[TMP54:%.*]] = load i16, ptr addrspace(4) [[TMP53]], align 4, !range [[RNG7]], !invariant.load [[META6]], !noundef [[META6]] // NONUNIFORM-UNKNOWN-NEXT: [[TMP55:%.*]] = zext nneg i16 [[TMP54]] to i32 @@ -535,7 +535,7 @@ unsigned int test_get_workgroup_size_z() // UNIFORM-UNKNOWN-NEXT: [[TMP2:%.*]] = tail call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() // UNIFORM-UNKNOWN-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP2]], i64 12 // UNIFORM-UNKNOWN-NEXT: [[TMP4:%.*]] = load i16, ptr addrspace(4) [[TMP3]], align 4, !range [[RNG6]], !invariant.load [[META7]], !noundef [[META7]] -// UNIFORM-UNKNOWN-NEXT: [[TMP5:%.*]] = tail call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() +// UNIFORM-UNKNOWN-NEXT: [[TMP5:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() // UNIFORM-UNKNOWN-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP5]], i64 4 // UNIFORM-UNKNOWN-NEXT: [[TMP7:%.*]] = load i16, ptr addrspace(4) [[TMP6]], align 4, !range [[RNG6]], !invariant.load [[META7]], !noundef [[META7]] // UNIFORM-UNKNOWN-NEXT: [[DOTV7:%.*]] = select i1 [[TMP1]], i16 [[TMP4]], i16 [[TMP7]] @@ -547,7 +547,7 @@ unsigned int test_get_workgroup_size_z() // UNIFORM-UNKNOWN-NEXT: [[TMP10:%.*]] = tail call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() // UNIFORM-UNKNOWN-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP10]], i64 14 // UNIFORM-UNKNOWN-NEXT: [[TMP12:%.*]] = load i16, ptr addrspace(4) [[TMP11]], align 2, !range [[RNG6]], !invariant.load [[META7]], !noundef [[META7]] -// UNIFORM-UNKNOWN-NEXT: [[TMP13:%.*]] = tail call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() +// UNIFORM-UNKNOWN-NEXT: [[TMP13:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() // UNIFORM-UNKNOWN-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP13]], i64 6 // UNIFORM-UNKNOWN-NEXT: [[TMP15:%.*]] = load i16, ptr addrspace(4) [[TMP14]], align 2, !range [[RNG6]], !invariant.load [[META7]], !noundef [[META7]] // UNIFORM-UNKNOWN-NEXT: [[DOTV6:%.*]] = select i1 [[TMP9]], i16 [[TMP12]], i16 [[TMP15]] @@ -558,7 +558,7 @@ unsigned int test_get_workgroup_size_z() // UNIFORM-UNKNOWN-NEXT: [[TMP18:%.*]] = tail call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() // UNIFORM-UNKNOWN-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP18]], i64 16 // UNIFORM-UNKNOWN-NEXT: [[TMP20:%.*]] = load i16, ptr addrspace(4) [[TMP19]], align 8, !range [[RNG6]], !invariant.load [[META7]], !noundef [[META7]] -// UNIFORM-UNKNOWN-NEXT: [[TMP21:%.*]] = tail call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() +// UNIFORM-UNKNOWN-NEXT: [[TMP21:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() // UNIFORM-UNKNOWN-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP21]], i64 8 // UNIFORM-UNKNOWN-NEXT: [[TMP23:%.*]] = load i16, ptr addrspace(4) [[TMP22]], align 4, !range [[RNG6]], !invariant.load [[META7]], !noundef [[META7]] // UNIFORM-UNKNOWN-NEXT: [[DOTV:%.*]] = select i1 [[TMP17]], i16 [[TMP20]], i16 [[TMP23]] diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl index dc5333c92d439..f4e2676212f3d 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl @@ -1051,7 +1051,7 @@ void test_read_exec_hi(global uint* out) { } // CHECK-LABEL: @test_dispatch_ptr -// CHECK: {{.*}}call align 4 dereferenceable(64){{.*}} ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() +// CHECK: {{.*}}call{{.*}} ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() #if !defined(__SPIRV__) void test_dispatch_ptr(__constant unsigned char ** out) #else @@ -1138,7 +1138,7 @@ void test_get_local_id(int d, global int *out) // CHECK: declare noundef range(i32 0, 1024) i32 @llvm.amdgcn.workitem.id.z() // CHECK-LABEL: @test_get_grid_size( -// CHECK: {{.*}}call align 4 dereferenceable(64){{.*}} ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() +// CHECK: {{.*}}call{{.*}}ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() // CHECK: getelementptr inbounds nuw i8, ptr addrspace(4) %{{.*}}, i64 %{{.+}} // CHECK: load i32, ptr addrspace(4) %{{.*}}, align 4, !range [[$GRID_RANGE:![0-9]+]], !invariant.load void test_get_grid_size(int d, global int *out) diff --git a/clang/test/Headers/gpuintrin.c b/clang/test/Headers/gpuintrin.c index c6a20dec210bb..3c4fcfc2bd43d 100644 --- a/clang/test/Headers/gpuintrin.c +++ b/clang/test/Headers/gpuintrin.c @@ -84,7 +84,7 @@ __gpu_kernel void foo() { // AMDGPU-LABEL: define internal i32 @__gpu_num_blocks_x( // AMDGPU-SAME: ) #[[ATTR0]] { // AMDGPU-NEXT: [[ENTRY:.*:]] -// AMDGPU-NEXT: [[TMP0:%.*]] = call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() +// AMDGPU-NEXT: [[TMP0:%.*]] = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() // AMDGPU-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP0]], i32 12 // AMDGPU-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TMP1]], align 4, !range [[RNG2:![0-9]+]], !invariant.load [[META3:![0-9]+]] // AMDGPU-NEXT: [[TMP3:%.*]] = call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() @@ -103,7 +103,7 @@ __gpu_kernel void foo() { // AMDGPU-LABEL: define internal i32 @__gpu_num_blocks_y( // AMDGPU-SAME: ) #[[ATTR0]] { // AMDGPU-NEXT: [[ENTRY:.*:]] -// AMDGPU-NEXT: [[TMP0:%.*]] = call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() +// AMDGPU-NEXT: [[TMP0:%.*]] = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() // AMDGPU-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP0]], i32 16 // AMDGPU-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TMP1]], align 4, !range [[RNG2]], !invariant.load [[META3]] // AMDGPU-NEXT: [[TMP3:%.*]] = call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() @@ -122,7 +122,7 @@ __gpu_kernel void foo() { // AMDGPU-LABEL: define internal i32 @__gpu_num_blocks_z( // AMDGPU-SAME: ) #[[ATTR0]] { // AMDGPU-NEXT: [[ENTRY:.*:]] -// AMDGPU-NEXT: [[TMP0:%.*]] = call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() +// AMDGPU-NEXT: [[TMP0:%.*]] = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() // AMDGPU-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP0]], i32 20 // AMDGPU-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TMP1]], align 4, !range [[RNG2]], !invariant.load [[META3]] // AMDGPU-NEXT: [[TMP3:%.*]] = call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 9101666c2a49c..3331072a1cb2a 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -180,7 +180,7 @@ def int_amdgcn_cluster_workgroup_max_flat_id: def int_amdgcn_dispatch_ptr : DefaultAttrsIntrinsic<[LLVMQualPointerType<4>], [], - [Align<RetIndex, 4>, NoUndef<RetIndex>, NonNull<RetIndex>, IntrNoMem, IntrSpeculatable]>; + [Align<RetIndex, 4>, Dereferenceable<RetIndex, 64>, NoUndef<RetIndex>, NonNull<RetIndex>, IntrNoMem, IntrSpeculatable]>; def int_amdgcn_queue_ptr : ClangBuiltin<"__builtin_amdgcn_queue_ptr">, diff --git a/llvm/test/Assembler/amdgcn-intrinsic-attributes.ll b/llvm/test/Assembler/amdgcn-intrinsic-attributes.ll index 63d0381ad1fd1..d04d591943023 100644 --- a/llvm/test/Assembler/amdgcn-intrinsic-attributes.ll +++ b/llvm/test/Assembler/amdgcn-intrinsic-attributes.ll @@ -2,41 +2,47 @@ ; RUN: llvm-as < %s | llvm-dis | FileCheck %s + ; Test assumed alignment parameter +; CHECK: declare noundef nonnull align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() -; CHECK: declare i32 @llvm.amdgcn.ds.append.p3(ptr addrspace(3) align 4 captures(none), i1 immarg) #0 +define ptr addrspace(4) @dispatch_ptr() { + %ptr = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() + ret ptr addrspace(4) %ptr +} +; CHECK: declare i32 @llvm.amdgcn.ds.append.p3(ptr addrspace(3) align 4 captures(none), i1 immarg) #1 define i32 @ds_append(ptr addrspace(3) %ptr) { %ret = call i32 @llvm.amdgcn.ds.append.p3(ptr addrspace(3) %ptr, i1 false) ret i32 %ret } ; Test assumed alignment parameter -; CHECK: declare i32 @llvm.amdgcn.ds.consume.p3(ptr addrspace(3) align 4 captures(none), i1 immarg) #0 +; CHECK: declare i32 @llvm.amdgcn.ds.consume.p3(ptr addrspace(3) align 4 captures(none), i1 immarg) #1 define i32 @ds_consume(ptr addrspace(3) %ptr) { %ret = call i32 @llvm.amdgcn.ds.consume.p3(ptr addrspace(3) %ptr, i1 false) ret i32 %ret } -; CHECK: declare void @llvm.amdgcn.s.wait.event(i16 immarg) #1 +; CHECK: declare void @llvm.amdgcn.s.wait.event(i16 immarg) #2 define void @s_wait_event() { call void @llvm.amdgcn.s.wait.event(i16 0) ret void } -; CHECK: declare void @llvm.amdgcn.s.wait.event.export.ready() #1 +; CHECK: declare void @llvm.amdgcn.s.wait.event.export.ready() #2 define void @s_wait_event_export_ready() { call void @llvm.amdgcn.s.wait.event.export.ready() ret void } ; Test assumed range -; CHECK: declare noundef range(i32 32, 65) i32 @llvm.amdgcn.wavefrontsize() #2 +; CHECK: declare noundef range(i32 32, 65) i32 @llvm.amdgcn.wavefrontsize() #0 define i32 @wavefrontsize() { %ret = call i32 @llvm.amdgcn.wavefrontsize() ret i32 %ret } -; CHECK: attributes #0 = { convergent nocallback nofree nounwind willreturn memory(argmem: readwrite) } -; CHECK: attributes #1 = { nocallback nofree nounwind willreturn } -; CHNCK: attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } +; CHECK: attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } +; CHECK: attributes #1 = { convergent nocallback nofree nounwind willreturn memory(argmem: readwrite) } +; CHECK: attributes #2 = { nocallback nofree nounwind willreturn } diff --git a/llvm/test/CodeGen/AMDGPU/implicit-arg-block-count.ll b/llvm/test/CodeGen/AMDGPU/implicit-arg-block-count.ll index 83ee7cba567d5..36945665ecfd1 100644 --- a/llvm/test/CodeGen/AMDGPU/implicit-arg-block-count.ll +++ b/llvm/test/CodeGen/AMDGPU/implicit-arg-block-count.ll @@ -155,7 +155,7 @@ define i32 @bad_offset() { ; CHECK-LABEL: define i32 @bad_offset() { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[DISPATCH:%.*]] = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() -; CHECK-NEXT: [[D_GEP_Y:%.*]] = getelementptr i8, ptr addrspace(4) [[DISPATCH]], i64 16 +; CHECK-NEXT: [[D_GEP_Y:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[DISPATCH]], i64 16 ; CHECK-NEXT: [[GRID_SIZE_Y:%.*]] = load i32, ptr addrspace(4) [[D_GEP_Y]], align 4 ; CHECK-NEXT: [[IMPLICITARG:%.*]] = call dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() ; CHECK-NEXT: [[I_GEP_X:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[IMPLICITARG]], i64 12 @@ -180,7 +180,7 @@ define i32 @dangling() { ; CHECK-LABEL: define i32 @dangling() { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[DISPATCH:%.*]] = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() -; CHECK-NEXT: [[D_GEP_X:%.*]] = getelementptr i8, ptr addrspace(4) [[DISPATCH]], i64 12 +; CHECK-NEXT: [[D_GEP_X:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[DISPATCH]], i64 12 ; CHECK-NEXT: [[GRID_SIZE_X:%.*]] = load i32, ptr addrspace(4) [[D_GEP_X]], align 4 ; CHECK-NEXT: ret i32 [[GRID_SIZE_X]] ; @@ -199,7 +199,7 @@ define i32 @wrong_cast() { ; CHECK-LABEL: define i32 @wrong_cast() { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[DISPATCH:%.*]] = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() -; CHECK-NEXT: [[D_GEP_X:%.*]] = getelementptr i8, ptr addrspace(4) [[DISPATCH]], i64 12 +; CHECK-NEXT: [[D_GEP_X:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[DISPATCH]], i64 12 ; CHECK-NEXT: [[GRID_SIZE_X:%.*]] = load i32, ptr addrspace(4) [[D_GEP_X]], align 4 ; CHECK-NEXT: [[IMPLICITARG:%.*]] = call dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() ; CHECK-NEXT: [[I_GEP_X:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[IMPLICITARG]], i64 12 @@ -224,7 +224,7 @@ define i32 @wrong_size() { ; CHECK-LABEL: define i32 @wrong_size() { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[DISPATCH:%.*]] = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() -; CHECK-NEXT: [[D_GEP_X:%.*]] = getelementptr i8, ptr addrspace(4) [[DISPATCH]], i64 12 +; CHECK-NEXT: [[D_GEP_X:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[DISPATCH]], i64 12 ; CHECK-NEXT: [[GRID_SIZE_X:%.*]] = load i32, ptr addrspace(4) [[D_GEP_X]], align 4 ; CHECK-NEXT: [[IMPLICITARG:%.*]] = call dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() ; CHECK-NEXT: [[I_GEP_X:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[IMPLICITARG]], i64 12 @@ -274,7 +274,7 @@ define i16 @empty_use() { ; CHECK-LABEL: define i16 @empty_use() { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[DISPATCH:%.*]] = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() -; CHECK-NEXT: [[D_GEP_X:%.*]] = getelementptr i8, ptr addrspace(4) [[DISPATCH]], i64 12 +; CHECK-NEXT: [[D_GEP_X:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[DISPATCH]], i64 12 ; CHECK-NEXT: [[GRID_SIZE_X:%.*]] = load i32, ptr addrspace(4) [[D_GEP_X]], align 4 ; CHECK-NEXT: [[TRUNC_X:%.*]] = trunc i32 [[GRID_SIZE_X]] to i16 ; CHECK-NEXT: [[IMPLICITARG:%.*]] = call dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
