https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/130904
>From fa3c82be14f0e94ea7e1a33c167968c7379f2563 Mon Sep 17 00:00:00 2001 From: Matt Arsenault <matthew.arsena...@amd.com> Date: Wed, 12 Mar 2025 13:24:50 +0700 Subject: [PATCH] AMDGPU: Replace ptr addrspace(8) undef uses with poison --- llvm/test/CodeGen/AMDGPU/amdpal.ll | 2 +- .../CodeGen/AMDGPU/combine-add-zext-xor.ll | 12 +- llvm/test/CodeGen/AMDGPU/else.ll | 2 +- .../AMDGPU/extract_subvector_vec4_vec3.ll | 8 +- llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.ll | 2 +- .../llvm.amdgcn.raw.ptr.buffer.atomic.ll | 2 +- .../llvm.amdgcn.struct.ptr.buffer.atomic.ll | 2 +- .../test/CodeGen/AMDGPU/loop_exit_with_xor.ll | 6 +- .../lower-work-group-id-intrinsics-hsa.ll | 2 +- .../lower-work-group-id-intrinsics-pal.ll | 2 +- llvm/test/CodeGen/AMDGPU/merge-store-crash.ll | 2 +- .../test/CodeGen/AMDGPU/merge-store-usedef.ll | 2 +- .../AMDGPU/required-export-priority.ll | 2 +- .../AMDGPU/si-triv-disjoint-mem-access.ll | 2 +- llvm/test/CodeGen/AMDGPU/wave32.ll | 8 +- llvm/test/CodeGen/AMDGPU/wqm.ll | 110 +++++++++--------- 16 files changed, 83 insertions(+), 83 deletions(-) diff --git a/llvm/test/CodeGen/AMDGPU/amdpal.ll b/llvm/test/CodeGen/AMDGPU/amdpal.ll index 171df029615ed..fd9227d2f4319 100644 --- a/llvm/test/CodeGen/AMDGPU/amdpal.ll +++ b/llvm/test/CodeGen/AMDGPU/amdpal.ll @@ -72,7 +72,7 @@ entry: %e = getelementptr [2 x i32], ptr addrspace(5) %v1, i32 0, i32 %idx %x = load i32, ptr addrspace(5) %e %xf = bitcast i32 %x to float - call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %xf, ptr addrspace(8) undef, i32 0, i32 0, i32 0) + call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %xf, ptr addrspace(8) poison, i32 0, i32 0, i32 0) ret void } diff --git a/llvm/test/CodeGen/AMDGPU/combine-add-zext-xor.ll b/llvm/test/CodeGen/AMDGPU/combine-add-zext-xor.ll index b42542db6dbd8..f8227f0039af7 100644 --- a/llvm/test/CodeGen/AMDGPU/combine-add-zext-xor.ll +++ b/llvm/test/CodeGen/AMDGPU/combine-add-zext-xor.ll @@ -66,7 +66,7 @@ define i32 @combine_add_zext_xor() { br i1 undef, label %bb9, label %bb bb: ; preds = %.a - %.i3 = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) undef, i32 %.2, i32 64, i32 1) + %.i3 = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) poison, i32 %.2, i32 64, i32 1) %i5 = icmp eq i32 %.i3, 0 br label %bb9 @@ -146,7 +146,7 @@ define i32 @combine_sub_zext_xor() { br i1 undef, label %bb9, label %bb bb: ; preds = %.a - %.i3 = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) undef, i32 %.2, i32 64, i32 1) + %.i3 = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) poison, i32 %.2, i32 64, i32 1) %i5 = icmp eq i32 %.i3, 0 br label %bb9 @@ -229,7 +229,7 @@ define i32 @combine_add_zext_or() { br i1 undef, label %bb9, label %bb bb: ; preds = %.a - %.i3 = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) undef, i32 %.2, i32 64, i32 1) + %.i3 = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) poison, i32 %.2, i32 64, i32 1) %i5 = icmp eq i32 %.i3, 0 br label %bb9 @@ -313,7 +313,7 @@ define i32 @combine_sub_zext_or() { br i1 undef, label %bb9, label %bb bb: ; preds = %.a - %.i3 = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) undef, i32 %.2, i32 64, i32 1) + %.i3 = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) poison, i32 %.2, i32 64, i32 1) %i5 = icmp eq i32 %.i3, 0 br label %bb9 @@ -392,7 +392,7 @@ define i32 @combine_add_zext_and() { br i1 undef, label %bb9, label %bb bb: ; preds = %.a - %.i3 = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) undef, i32 %.2, i32 64, i32 1) + %.i3 = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) poison, i32 %.2, i32 64, i32 1) %i5 = icmp eq i32 %.i3, 0 br label %bb9 @@ -471,7 +471,7 @@ define i32 @combine_sub_zext_and() { br i1 undef, label %bb9, label %bb bb: ; preds = %.a - %.i3 = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) undef, i32 %.2, i32 64, i32 1) + %.i3 = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) poison, i32 %.2, i32 64, i32 1) %i5 = icmp eq i32 %.i3, 0 br label %bb9 diff --git a/llvm/test/CodeGen/AMDGPU/else.ll b/llvm/test/CodeGen/AMDGPU/else.ll index 4a3018e67b17d..884f5305407a1 100644 --- a/llvm/test/CodeGen/AMDGPU/else.ll +++ b/llvm/test/CodeGen/AMDGPU/else.ll @@ -47,7 +47,7 @@ else: end: %r = phi float [ %v.if, %if ], [ %v.else, %else ] - call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %r, ptr addrspace(8) undef, i32 0, i32 0, i32 0) + call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %r, ptr addrspace(8) poison, i32 0, i32 0, i32 0) ret void } diff --git a/llvm/test/CodeGen/AMDGPU/extract_subvector_vec4_vec3.ll b/llvm/test/CodeGen/AMDGPU/extract_subvector_vec4_vec3.ll index a44231f39ae6f..37a2e9e24dbd6 100644 --- a/llvm/test/CodeGen/AMDGPU/extract_subvector_vec4_vec3.ll +++ b/llvm/test/CodeGen/AMDGPU/extract_subvector_vec4_vec3.ll @@ -13,7 +13,7 @@ define amdgpu_hs void @main(ptr addrspace(6) inreg %arg) { ; GCN-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[DEF]] ; GCN-NEXT: [[DEF1:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF - ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY]], [[DEF1]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from `ptr addrspace(8) undef`, align 1, addrspace 8) + ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY]], [[DEF1]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from `ptr addrspace(8) poison`, align 1, addrspace 8) ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2 ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1 ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0 @@ -22,14 +22,14 @@ define amdgpu_hs void @main(ptr addrspace(6) inreg %arg) { ; GCN-NEXT: [[DEF2:%[0-9]+]]:sreg_32 = IMPLICIT_DEF ; GCN-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[DEF2]] ; GCN-NEXT: [[DEF3:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF - ; GCN-NEXT: BUFFER_STORE_DWORDX3_OFFEN_exact killed [[COPY4]], [[COPY5]], [[DEF3]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (s96) into `ptr addrspace(8) undef`, align 1, addrspace 8) + ; GCN-NEXT: BUFFER_STORE_DWORDX3_OFFEN_exact killed [[COPY4]], [[COPY5]], [[DEF3]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (s96) into `ptr addrspace(8) poison`, align 1, addrspace 8) ; GCN-NEXT: S_ENDPGM 0 main_body: - %tmp25 = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) undef, i32 undef, i32 0, i32 0) + %tmp25 = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) poison, i32 undef, i32 0, i32 0) %tmp27 = bitcast <4 x float> %tmp25 to <16 x i8> %tmp28 = shufflevector <16 x i8> %tmp27, <16 x i8> poison, <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> %tmp29 = bitcast <12 x i8> %tmp28 to <3 x i32> - call void @llvm.amdgcn.raw.ptr.buffer.store.v3i32(<3 x i32> %tmp29, ptr addrspace(8) undef, i32 undef, i32 0, i32 0) #3 + call void @llvm.amdgcn.raw.ptr.buffer.store.v3i32(<3 x i32> %tmp29, ptr addrspace(8) poison, i32 undef, i32 0, i32 0) #3 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.ll index c120c58ce6f33..c506e08855470 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.ll @@ -623,7 +623,7 @@ define amdgpu_kernel void @test_export_pos_before_param_ordered(float %x, float define amdgpu_kernel void @test_export_pos_before_param_across_load(i32 %idx) #0 { call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float 1.0, float 1.0, float 1.0, float 1.0, i1 false, i1 false) call void @llvm.amdgcn.exp.f32(i32 33, i32 15, float 1.0, float 1.0, float 1.0, float 0.5, i1 false, i1 false) - %load = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) undef, i32 %idx, i32 0, i32 0) + %load = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) poison, i32 %idx, i32 0, i32 0) call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float 0.0, float 0.0, float 0.0, float %load, i1 true, i1 false) ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.atomic.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.atomic.ll index 9c9cf36baecf1..843ad563df69e 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.atomic.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.atomic.ll @@ -103,7 +103,7 @@ main_body: ;CHECK: buffer_atomic_add v0, define amdgpu_ps float @test4() { main_body: - %v = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.add.i32(i32 1, ptr addrspace(8) undef, i32 4, i32 0, i32 0) + %v = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.add.i32(i32 1, ptr addrspace(8) poison, i32 4, i32 0, i32 0) %v.float = bitcast i32 %v to float ret float %v.float } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.atomic.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.atomic.ll index fb44515595a55..607f6001f99b4 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.atomic.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.atomic.ll @@ -115,7 +115,7 @@ main_body: ;CHECK: buffer_atomic_add v0, define amdgpu_ps float @test4() { main_body: - %v = call i32 @llvm.amdgcn.struct.ptr.buffer.atomic.add.i32(i32 1, ptr addrspace(8) undef, i32 0, i32 4, i32 0, i32 0) + %v = call i32 @llvm.amdgcn.struct.ptr.buffer.atomic.add.i32(i32 1, ptr addrspace(8) poison, i32 0, i32 4, i32 0, i32 0) %v.float = bitcast i32 %v to float ret float %v.float } diff --git a/llvm/test/CodeGen/AMDGPU/loop_exit_with_xor.ll b/llvm/test/CodeGen/AMDGPU/loop_exit_with_xor.ll index a407cd20bf762..2ce612b176587 100644 --- a/llvm/test/CodeGen/AMDGPU/loop_exit_with_xor.ll +++ b/llvm/test/CodeGen/AMDGPU/loop_exit_with_xor.ll @@ -46,7 +46,7 @@ loop: br i1 %tmp27, label %then, label %endif then: ; preds = %bb - call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float undef, ptr addrspace(8) undef, i32 0, i32 undef, i32 0) + call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float undef, ptr addrspace(8) poison, i32 0, i32 undef, i32 0) br label %endif endif: ; preds = %bb28, %bb @@ -85,7 +85,7 @@ loop: %tmp23phi = phi i32 [ %tmp23, %loop ], [ 0, %entry ] %tmp23 = add nuw i32 %tmp23phi, 1 %tmp27 = icmp ult i32 %arg, %tmp23 - call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float undef, ptr addrspace(8) undef, i32 0, i32 undef, i32 0) + call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float undef, ptr addrspace(8) poison, i32 0, i32 undef, i32 0) br i1 %tmp27, label %loop, label %loopexit loopexit: @@ -136,7 +136,7 @@ loop: br i1 %tmp27, label %then, label %endif then: ; preds = %bb - call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float undef, ptr addrspace(8) undef, i32 0, i32 undef, i32 0) + call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float undef, ptr addrspace(8) poison, i32 0, i32 undef, i32 0) br label %endif endif: ; preds = %bb28, %bb diff --git a/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-hsa.ll b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-hsa.ll index 018cdee038b91..a62427b2ce922 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-hsa.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-hsa.ll @@ -61,7 +61,7 @@ define amdgpu_kernel void @workgroup_ids_kernel() { %ielemx = insertelement <3 x i32> poison, i32 %idx, i64 0 %ielemy = insertelement <3 x i32> %ielemx, i32 %idy, i64 1 %ielemz = insertelement <3 x i32> %ielemy, i32 %idz, i64 2 - call void @llvm.amdgcn.raw.ptr.buffer.store.v3i32(<3 x i32> %ielemz, ptr addrspace(8) undef, i32 0, i32 0, i32 0) + call void @llvm.amdgcn.raw.ptr.buffer.store.v3i32(<3 x i32> %ielemz, ptr addrspace(8) poison, i32 0, i32 0, i32 0) ret void } diff --git a/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-pal.ll b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-pal.ll index dcec7288686b1..52b1d5e6c5b17 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-pal.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-pal.ll @@ -68,7 +68,7 @@ define amdgpu_cs void @_amdgpu_cs_main() { %ielemx = insertelement <3 x i32> poison, i32 %idx, i64 0 %ielemy = insertelement <3 x i32> %ielemx, i32 %idy, i64 1 %ielemz = insertelement <3 x i32> %ielemy, i32 %idz, i64 2 - call void @llvm.amdgcn.raw.ptr.buffer.store.v3i32(<3 x i32> %ielemz, ptr addrspace(8) undef, i32 0, i32 0, i32 0) + call void @llvm.amdgcn.raw.ptr.buffer.store.v3i32(<3 x i32> %ielemz, ptr addrspace(8) poison, i32 0, i32 0, i32 0) ret void } diff --git a/llvm/test/CodeGen/AMDGPU/merge-store-crash.ll b/llvm/test/CodeGen/AMDGPU/merge-store-crash.ll index 0809891aec72b..66a217cb7be45 100644 --- a/llvm/test/CodeGen/AMDGPU/merge-store-crash.ll +++ b/llvm/test/CodeGen/AMDGPU/merge-store-crash.ll @@ -26,7 +26,7 @@ main_body: %tmp9 = insertelement <4 x i32> %tmp8, i32 %tmp7, i32 1 %tmp10 = insertelement <4 x i32> %tmp9, i32 poison, i32 2 %tmp11 = insertelement <4 x i32> %tmp10, i32 poison, i32 3 - call void @llvm.amdgcn.struct.ptr.tbuffer.store.v4i32(<4 x i32> %tmp11, ptr addrspace(8) undef, i32 0, i32 0, i32 %arg, i32 78, i32 3) #2 + call void @llvm.amdgcn.struct.ptr.tbuffer.store.v4i32(<4 x i32> %tmp11, ptr addrspace(8) poison, i32 0, i32 0, i32 %arg, i32 78, i32 3) #2 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/merge-store-usedef.ll b/llvm/test/CodeGen/AMDGPU/merge-store-usedef.ll index 88cc5334d5c73..ae4fd6674f466 100644 --- a/llvm/test/CodeGen/AMDGPU/merge-store-usedef.ll +++ b/llvm/test/CodeGen/AMDGPU/merge-store-usedef.ll @@ -10,7 +10,7 @@ define amdgpu_vs void @test1(i32 %v) #0 { store i32 %v, ptr addrspace(3) null - call void @llvm.amdgcn.raw.ptr.tbuffer.store.i32(i32 %v, ptr addrspace(8) undef, i32 0, i32 0, i32 68, i32 1) + call void @llvm.amdgcn.raw.ptr.tbuffer.store.i32(i32 %v, ptr addrspace(8) poison, i32 0, i32 0, i32 68, i32 1) %w = load i32, ptr addrspace(3) null store i32 %w, ptr addrspace(3) %p1 diff --git a/llvm/test/CodeGen/AMDGPU/required-export-priority.ll b/llvm/test/CodeGen/AMDGPU/required-export-priority.ll index a37e2bf4eb294..f14cd4488ef1e 100644 --- a/llvm/test/CodeGen/AMDGPU/required-export-priority.ll +++ b/llvm/test/CodeGen/AMDGPU/required-export-priority.ll @@ -254,7 +254,7 @@ define amdgpu_ps void @test_export_pos_before_param_across_load(i32 %idx) #0 { ; GCN-NEXT: s_endpgm call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float 1.0, float 1.0, float 1.0, float 1.0, i1 false, i1 false) call void @llvm.amdgcn.exp.f32(i32 33, i32 15, float 1.0, float 1.0, float 1.0, float 0.5, i1 false, i1 false) - %load = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) undef, i32 %idx, i32 0, i32 0) + %load = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) poison, i32 %idx, i32 0, i32 0) call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float 0.0, float 0.0, float 0.0, float %load, i1 true, i1 false) ret void } diff --git a/llvm/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll b/llvm/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll index 096dfcc2c7667..4e6aa03d91876 100644 --- a/llvm/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll +++ b/llvm/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll @@ -318,7 +318,7 @@ define amdgpu_vs void @reorder_local_load_tbuffer_store_local_load(ptr addrspace %vdata = insertelement <4 x i32> poison, i32 %a1, i32 0 %vaddr.add = add i32 %vaddr, 32 - call void @llvm.amdgcn.struct.ptr.tbuffer.store.v4i32(<4 x i32> %vdata, ptr addrspace(8) undef, i32 %vaddr.add, i32 0, i32 0, i32 228, i32 3) + call void @llvm.amdgcn.struct.ptr.tbuffer.store.v4i32(<4 x i32> %vdata, ptr addrspace(8) poison, i32 %vaddr.add, i32 0, i32 0, i32 228, i32 3) %tmp2 = load i32, ptr addrspace(3) %ptr2, align 4 diff --git a/llvm/test/CodeGen/AMDGPU/wave32.ll b/llvm/test/CodeGen/AMDGPU/wave32.ll index 94c4eebe32281..846e347a4947e 100644 --- a/llvm/test/CodeGen/AMDGPU/wave32.ll +++ b/llvm/test/CodeGen/AMDGPU/wave32.ll @@ -1959,7 +1959,7 @@ main_body: br i1 %cc, label %endif, label %if if: - %src = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) undef, i32 %idx, i32 0, i32 0, i32 0) + %src = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) poison, i32 %idx, i32 0, i32 0, i32 0) %out = fadd float %src, %src %out.0 = call float @llvm.amdgcn.wwm.f32(float %out) %out.1 = fadd float %src, %out.0 @@ -2046,7 +2046,7 @@ main_body: br i1 %cc, label %endif, label %if if: - %src = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) undef, i32 %idx, i32 0, i32 0, i32 0) + %src = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) poison, i32 %idx, i32 0, i32 0, i32 0) %out = fadd float %src, %src %out.0 = call float @llvm.amdgcn.strict.wwm.f32(float %out) %out.1 = fadd float %src, %out.0 @@ -2128,8 +2128,8 @@ define amdgpu_ps float @test_wqm2(i32 inreg %idx0, i32 inreg %idx1) #0 { ; GFX1064-NEXT: s_and_b64 exec, exec, s[2:3] ; GFX1064-NEXT: ; return to shader part epilog main_body: - %src0 = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) undef, i32 %idx0, i32 0, i32 0, i32 0) - %src1 = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) undef, i32 %idx1, i32 0, i32 0, i32 0) + %src0 = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) poison, i32 %idx0, i32 0, i32 0, i32 0) + %src1 = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) poison, i32 %idx1, i32 0, i32 0, i32 0) %out = fadd float %src0, %src1 %out.0 = bitcast float %out to i32 %out.1 = call i32 @llvm.amdgcn.wqm.i32(i32 %out.0) diff --git a/llvm/test/CodeGen/AMDGPU/wqm.ll b/llvm/test/CodeGen/AMDGPU/wqm.ll index 3845db2edf73f..abf690048bae3 100644 --- a/llvm/test/CodeGen/AMDGPU/wqm.ll +++ b/llvm/test/CodeGen/AMDGPU/wqm.ll @@ -125,7 +125,7 @@ main_body: %tex.1 = bitcast <4 x float> %tex to <4 x i32> %tex.2 = extractelement <4 x i32> %tex.1, i32 0 - call void @llvm.amdgcn.struct.ptr.buffer.store.v4f32(<4 x float> %tex, ptr addrspace(8) undef, i32 %tex.2, i32 0, i32 0, i32 0) + call void @llvm.amdgcn.struct.ptr.buffer.store.v4f32(<4 x float> %tex, ptr addrspace(8) poison, i32 %tex.2, i32 0, i32 0, i32 0) ret <4 x float> %tex } @@ -247,7 +247,7 @@ define amdgpu_ps <4 x float> @test4_ptr_buf(<8 x i32> inreg %rsrc, <4 x i32> inr main_body: %c.1 = mul i32 %c, %d - call void @llvm.amdgcn.struct.ptr.buffer.store.v4f32(<4 x float> undef, ptr addrspace(8) undef, i32 %c.1, i32 0, i32 0, i32 0) + call void @llvm.amdgcn.struct.ptr.buffer.store.v4f32(<4 x float> undef, ptr addrspace(8) poison, i32 %c.1, i32 0, i32 0, i32 0) %c.1.bc = bitcast i32 %c.1 to float %tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %c.1.bc, <8 x i32> %rsrc, <4 x i32> %sampler, i1 false, i32 0, i32 0) #0 %tex0 = extractelement <4 x float> %tex, i32 0 @@ -327,8 +327,8 @@ define amdgpu_ps float @test5_ptr_buf(i32 inreg %idx0, i32 inreg %idx1) { ; GFX10-W32-NEXT: s_and_b32 exec_lo, exec_lo, s2 ; GFX10-W32-NEXT: ; return to shader part epilog main_body: - %src0 = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) undef, i32 %idx0, i32 0, i32 0, i32 0) - %src1 = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) undef, i32 %idx1, i32 0, i32 0, i32 0) + %src0 = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) poison, i32 %idx0, i32 0, i32 0, i32 0) + %src1 = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) poison, i32 %idx1, i32 0, i32 0, i32 0) %out = fadd float %src0, %src1 %out.0 = call float @llvm.amdgcn.wqm.f32(float %out) ret float %out.0 @@ -406,8 +406,8 @@ define amdgpu_ps float @test6_ptr_buf(i32 inreg %idx0, i32 inreg %idx1) { ; GFX10-W32-NEXT: s_and_b32 exec_lo, exec_lo, s2 ; GFX10-W32-NEXT: ; return to shader part epilog main_body: - %src0 = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) undef, i32 %idx0, i32 0, i32 0, i32 0) - %src1 = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) undef, i32 %idx1, i32 0, i32 0, i32 0) + %src0 = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) poison, i32 %idx0, i32 0, i32 0, i32 0) + %src1 = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) poison, i32 %idx1, i32 0, i32 0, i32 0) %out = fadd float %src0, %src1 %out.0 = bitcast float %out to i32 %out.1 = call i32 @llvm.amdgcn.wqm.i32(i32 %out.0) @@ -447,8 +447,8 @@ define amdgpu_ps float @test_wwm1(i32 inreg %idx0, i32 inreg %idx1) { ; GFX10-W32-NEXT: v_mov_b32_e32 v0, v1 ; GFX10-W32-NEXT: ; return to shader part epilog main_body: - %src0 = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) undef, i32 %idx0, i32 0, i32 0, i32 0) - %src1 = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) undef, i32 %idx1, i32 0, i32 0, i32 0) + %src0 = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) poison, i32 %idx0, i32 0, i32 0, i32 0) + %src1 = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) poison, i32 %idx1, i32 0, i32 0, i32 0) %out = fadd float %src0, %src1 %out.0 = call float @llvm.amdgcn.wwm.f32(float %out) ret float %out.0 @@ -484,8 +484,8 @@ define amdgpu_ps float @test_wwm2(i32 inreg %idx0, i32 inreg %idx1) { ; GFX10-W32-NEXT: v_mov_b32_e32 v0, v1 ; GFX10-W32-NEXT: ; return to shader part epilog main_body: - %src0 = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) undef, i32 %idx0, i32 0, i32 0, i32 0) - %src1 = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) undef, i32 %idx1, i32 0, i32 0, i32 0) + %src0 = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) poison, i32 %idx0, i32 0, i32 0, i32 0) + %src1 = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) poison, i32 %idx1, i32 0, i32 0, i32 0) %src0.0 = bitcast float %src0 to i32 %src1.0 = bitcast float %src1 to i32 %out = add i32 %src0.0, %src1.0 @@ -549,7 +549,7 @@ main_body: br i1 %cc, label %endif, label %if if: - %src = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) undef, i32 %idx, i32 0, i32 0, i32 0) + %src = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) poison, i32 %idx, i32 0, i32 0, i32 0) %out = fadd float %src, %src %out.0 = call float @llvm.amdgcn.wwm.f32(float %out) %out.1 = fadd float %src, %out.0 @@ -612,7 +612,7 @@ main_body: br i1 %cc, label %endif, label %if if: - %src = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) undef, i32 %idx, i32 0, i32 0, i32 0) + %src = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) poison, i32 %idx, i32 0, i32 0, i32 0) %out = fadd float %src, %src %out.0 = call float @llvm.amdgcn.wwm.f32(float %out) br label %endif @@ -666,9 +666,9 @@ define amdgpu_ps float @test_wwm5(i32 inreg %idx0, i32 inreg %idx1) { ; GFX10-W32-NEXT: s_and_b32 exec_lo, exec_lo, s2 ; GFX10-W32-NEXT: ; return to shader part epilog main_body: - %src0 = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) undef, i32 %idx0, i32 0, i32 0, i32 0) - call void @llvm.amdgcn.struct.ptr.buffer.store.f32(float %src0, ptr addrspace(8) undef, i32 %idx0, i32 0, i32 0, i32 0) - %src1 = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) undef, i32 %idx1, i32 0, i32 0, i32 0) + %src0 = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) poison, i32 %idx0, i32 0, i32 0, i32 0) + call void @llvm.amdgcn.struct.ptr.buffer.store.f32(float %src0, ptr addrspace(8) poison, i32 %idx0, i32 0, i32 0, i32 0) + %src1 = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) poison, i32 %idx1, i32 0, i32 0, i32 0) %temp = fadd float %src1, %src1 %temp.0 = call float @llvm.amdgcn.wwm.f32(float %temp) %out = fadd float %temp.0, %temp.0 @@ -857,13 +857,13 @@ define amdgpu_ps void @test_wwm_set_inactive1(i32 inreg %idx) { ; GFX10-W32-NEXT: buffer_store_dword v2, v1, s[0:3], 0 idxen ; GFX10-W32-NEXT: s_endpgm main_body: - %src = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) undef, i32 %idx, i32 0, i32 0, i32 0) + %src = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) poison, i32 %idx, i32 0, i32 0, i32 0) %src.0 = bitcast float %src to i32 %src.1 = call i32 @llvm.amdgcn.set.inactive.i32(i32 %src.0, i32 0) %out = add i32 %src.1, %src.1 %out.0 = call i32 @llvm.amdgcn.wwm.i32(i32 %out) %out.1 = bitcast i32 %out.0 to float - call void @llvm.amdgcn.struct.ptr.buffer.store.f32(float %out.1, ptr addrspace(8) undef, i32 %idx, i32 0, i32 0, i32 0) + call void @llvm.amdgcn.struct.ptr.buffer.store.f32(float %out.1, ptr addrspace(8) poison, i32 %idx, i32 0, i32 0, i32 0) ret void } @@ -899,8 +899,8 @@ define amdgpu_ps float @test_strict_wqm1(i32 inreg %idx0, i32 inreg %idx1) { ; GFX10-W32-NEXT: v_mov_b32_e32 v0, v1 ; GFX10-W32-NEXT: ; return to shader part epilog main_body: - %src0 = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) undef, i32 %idx0, i32 0, i32 0, i32 0) - %src1 = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) undef, i32 %idx1, i32 0, i32 0, i32 0) + %src0 = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) poison, i32 %idx0, i32 0, i32 0, i32 0) + %src1 = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) poison, i32 %idx1, i32 0, i32 0, i32 0) %out = fadd float %src0, %src1 %out.0 = call float @llvm.amdgcn.strict.wqm.f32(float %out) ret float %out.0 @@ -938,8 +938,8 @@ define amdgpu_ps float @test_strict_wqm2(i32 inreg %idx0, i32 inreg %idx1) { ; GFX10-W32-NEXT: v_mov_b32_e32 v0, v1 ; GFX10-W32-NEXT: ; return to shader part epilog main_body: - %src0 = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) undef, i32 %idx0, i32 0, i32 0, i32 0) - %src1 = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) undef, i32 %idx1, i32 0, i32 0, i32 0) + %src0 = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) poison, i32 %idx0, i32 0, i32 0, i32 0) + %src1 = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) poison, i32 %idx1, i32 0, i32 0, i32 0) %src0.0 = bitcast float %src0 to i32 %src1.0 = bitcast float %src1 to i32 %out = add i32 %src0.0, %src1.0 @@ -1005,7 +1005,7 @@ main_body: br i1 %cc, label %endif, label %if if: - %src = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) undef, i32 %idx, i32 0, i32 0, i32 0) + %src = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) poison, i32 %idx, i32 0, i32 0, i32 0) %out = fadd float %src, %src %out.0 = call float @llvm.amdgcn.strict.wqm.f32(float %out) %out.1 = fadd float %src, %out.0 @@ -1070,7 +1070,7 @@ main_body: br i1 %cc, label %endif, label %if if: - %src = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) undef, i32 %idx, i32 0, i32 0, i32 0) + %src = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) poison, i32 %idx, i32 0, i32 0, i32 0) %out = fadd float %src, %src %out.0 = call float @llvm.amdgcn.strict.wqm.f32(float %out) br label %endif @@ -1127,9 +1127,9 @@ define amdgpu_ps float @test_strict_wqm5(i32 inreg %idx0, i32 inreg %idx1) { ; GFX10-W32-NEXT: s_and_b32 exec_lo, exec_lo, s2 ; GFX10-W32-NEXT: ; return to shader part epilog main_body: - %src0 = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) undef, i32 %idx0, i32 0, i32 0, i32 0) - call void @llvm.amdgcn.struct.ptr.buffer.store.f32(float %src0, ptr addrspace(8) undef, i32 %idx0, i32 0, i32 0, i32 0) - %src1 = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) undef, i32 %idx1, i32 0, i32 0, i32 0) + %src0 = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) poison, i32 %idx0, i32 0, i32 0, i32 0) + call void @llvm.amdgcn.struct.ptr.buffer.store.f32(float %src0, ptr addrspace(8) poison, i32 %idx0, i32 0, i32 0, i32 0) + %src1 = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) poison, i32 %idx1, i32 0, i32 0, i32 0) %temp = fadd float %src1, %src1 %temp.0 = call float @llvm.amdgcn.strict.wqm.f32(float %temp) %out = fadd float %temp.0, %temp.0 @@ -1336,15 +1336,15 @@ define amdgpu_ps void @test_set_inactive2(i32 inreg %idx0, i32 inreg %idx1) { ; GFX10-W32-NEXT: buffer_store_dword v1, v0, s[0:3], 0 idxen ; GFX10-W32-NEXT: s_endpgm main_body: - %src1 = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) undef, i32 %idx1, i32 0, i32 0, i32 0) + %src1 = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) poison, i32 %idx1, i32 0, i32 0, i32 0) %src1.0 = bitcast float %src1 to i32 %src1.1 = call i32 @llvm.amdgcn.set.inactive.i32(i32 %src1.0, i32 undef) - %src0 = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) undef, i32 %idx0, i32 0, i32 0, i32 0) + %src0 = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) poison, i32 %idx0, i32 0, i32 0, i32 0) %src0.0 = bitcast float %src0 to i32 %src0.1 = call i32 @llvm.amdgcn.wqm.i32(i32 %src0.0) %out = add i32 %src0.1, %src1.1 %out.0 = bitcast i32 %out to float - call void @llvm.amdgcn.struct.ptr.buffer.store.f32(float %out.0, ptr addrspace(8) undef, i32 %idx1, i32 0, i32 0, i32 0) + call void @llvm.amdgcn.struct.ptr.buffer.store.f32(float %out.0, ptr addrspace(8) poison, i32 %idx1, i32 0, i32 0, i32 0) ret void } @@ -1420,7 +1420,7 @@ IF: br label %END ELSE: - call void @llvm.amdgcn.struct.ptr.buffer.store.f32(float %data, ptr addrspace(8) undef, i32 %c, i32 0, i32 0, i32 0) + call void @llvm.amdgcn.struct.ptr.buffer.store.f32(float %data, ptr addrspace(8) poison, i32 %c, i32 0, i32 0, i32 0) br label %END END: @@ -1498,7 +1498,7 @@ IF: br label %END ELSE: - call void @llvm.amdgcn.struct.ptr.buffer.store.f32(float %data, ptr addrspace(8) undef, i32 %c, i32 0, i32 0, i32 0) + call void @llvm.amdgcn.struct.ptr.buffer.store.f32(float %data, ptr addrspace(8) poison, i32 %c, i32 0, i32 0, i32 0) br label %END END: @@ -1570,16 +1570,16 @@ define amdgpu_ps <4 x float> @test_control_flow_2(<8 x i32> inreg %rsrc, <4 x i3 main_body: %idx.1 = extractelement <3 x i32> %idx, i32 0 %data.1 = extractelement <2 x float> %data, i32 0 - call void @llvm.amdgcn.struct.ptr.buffer.store.f32(float %data.1, ptr addrspace(8) undef, i32 %idx.1, i32 0, i32 0, i32 0) + call void @llvm.amdgcn.struct.ptr.buffer.store.f32(float %data.1, ptr addrspace(8) poison, i32 %idx.1, i32 0, i32 0, i32 0) ; The load that determines the branch (and should therefore be WQM) is ; surrounded by stores that require disabled WQM. %idx.2 = extractelement <3 x i32> %idx, i32 1 - %z = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) undef, i32 %idx.2, i32 0, i32 0, i32 0) + %z = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) poison, i32 %idx.2, i32 0, i32 0, i32 0) %idx.3 = extractelement <3 x i32> %idx, i32 2 %data.3 = extractelement <2 x float> %data, i32 1 - call void @llvm.amdgcn.struct.ptr.buffer.store.f32(float %data.3, ptr addrspace(8) undef, i32 %idx.3, i32 0, i32 0, i32 0) + call void @llvm.amdgcn.struct.ptr.buffer.store.f32(float %data.3, ptr addrspace(8) poison, i32 %idx.3, i32 0, i32 0, i32 0) %cc = fcmp ogt float %z, 0.0 br i1 %cc, label %IF, label %ELSE @@ -1671,7 +1671,7 @@ main_body: %tex0 = extractelement <4 x float> %tex, i32 0 %dtex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %tex0, <8 x i32> %rsrc, <4 x i32> %sampler, i1 false, i32 0, i32 0) #0 %dtex.1 = extractelement <4 x float> %dtex, i32 0 - call void @llvm.amdgcn.struct.ptr.buffer.store.f32(float %dtex.1, ptr addrspace(8) undef, i32 %idx, i32 0, i32 0, i32 0) + call void @llvm.amdgcn.struct.ptr.buffer.store.f32(float %dtex.1, ptr addrspace(8) poison, i32 %idx, i32 0, i32 0, i32 0) %cc = fcmp ogt float %dtex.1, 0.0 br i1 %cc, label %IF, label %ELSE @@ -1741,8 +1741,8 @@ main_body: br i1 %cond, label %IF, label %END IF: - %data = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) undef, i32 0, i32 0, i32 0) - call void @llvm.amdgcn.struct.ptr.buffer.store.f32(float %data, ptr addrspace(8) undef, i32 1, i32 0, i32 0, i32 0) + %data = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) poison, i32 0, i32 0, i32 0) + call void @llvm.amdgcn.struct.ptr.buffer.store.f32(float %data, ptr addrspace(8) poison, i32 1, i32 0, i32 0, i32 0) br label %END END: @@ -1821,14 +1821,14 @@ main_body: %tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %coord, <8 x i32> %rsrc, <4 x i32> %sampler, i1 false, i32 0, i32 0) #0 %idx.0 = extractelement <2 x i32> %idx, i32 0 %data.0 = extractelement <2 x float> %data, i32 0 - call void @llvm.amdgcn.struct.ptr.buffer.store.f32(float %data.0, ptr addrspace(8) undef, i32 %idx.0, i32 0, i32 0, i32 0) + call void @llvm.amdgcn.struct.ptr.buffer.store.f32(float %data.0, ptr addrspace(8) poison, i32 %idx.0, i32 0, i32 0, i32 0) %z.cmp = fcmp olt float %z, 0.0 call void @llvm.amdgcn.kill(i1 %z.cmp) %idx.1 = extractelement <2 x i32> %idx, i32 1 %data.1 = extractelement <2 x float> %data, i32 1 - call void @llvm.amdgcn.struct.ptr.buffer.store.f32(float %data.1, ptr addrspace(8) undef, i32 %idx.1, i32 0, i32 0, i32 0) + call void @llvm.amdgcn.struct.ptr.buffer.store.f32(float %data.1, ptr addrspace(8) poison, i32 %idx.1, i32 0, i32 0, i32 0) %tex2 = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %coord2, <8 x i32> %rsrc, <4 x i32> %sampler, i1 false, i32 0, i32 0) #0 %tex2.0 = extractelement <4 x float> %tex2, i32 0 %dtex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %tex2.0, <8 x i32> %rsrc, <4 x i32> %sampler, i1 false, i32 0, i32 0) #0 @@ -1891,7 +1891,7 @@ main_body: %tex0 = extractelement <4 x float> %tex, i32 0 %dtex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %tex0, <8 x i32> %rsrc, <4 x i32> %sampler, i1 false, i32 0, i32 0) #0 - call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %data, ptr addrspace(8) undef, i32 0, i32 0, i32 0) + call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %data, ptr addrspace(8) poison, i32 0, i32 0, i32 0) %z.cmp = fcmp olt float %z, 0.0 call void @llvm.amdgcn.kill(i1 %z.cmp) @@ -2071,17 +2071,17 @@ define amdgpu_ps void @test_alloca(float %data, i32 %a, i32 %idx) nounwind { entry: %array = alloca [32 x i32], align 4, addrspace(5) - call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %data, ptr addrspace(8) undef, i32 0, i32 0, i32 0) + call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %data, ptr addrspace(8) poison, i32 0, i32 0, i32 0) store volatile i32 %a, ptr addrspace(5) %array, align 4 - call void @llvm.amdgcn.struct.ptr.buffer.store.f32(float %data, ptr addrspace(8) undef, i32 1, i32 0, i32 0, i32 0) + call void @llvm.amdgcn.struct.ptr.buffer.store.f32(float %data, ptr addrspace(8) poison, i32 1, i32 0, i32 0, i32 0) %c.gep = getelementptr [32 x i32], ptr addrspace(5) %array, i32 0, i32 %idx %c = load i32, ptr addrspace(5) %c.gep, align 4 %c.bc = bitcast i32 %c to float %t = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %c.bc, <8 x i32> poison, <4 x i32> poison, i1 false, i32 0, i32 0) #0 - call void @llvm.amdgcn.raw.ptr.buffer.store.v4f32(<4 x float> %t, ptr addrspace(8) undef, i32 0, i32 0, i32 0) + call void @llvm.amdgcn.raw.ptr.buffer.store.v4f32(<4 x float> %t, ptr addrspace(8) poison, i32 0, i32 0, i32 0) ret void } @@ -2236,7 +2236,7 @@ else: end: %r = phi <4 x float> [ %r.if, %if ], [ %r.else, %else ] - call void @llvm.amdgcn.struct.ptr.buffer.store.f32(float 1.0, ptr addrspace(8) undef, i32 %idx, i32 0, i32 0, i32 0) + call void @llvm.amdgcn.struct.ptr.buffer.store.f32(float 1.0, ptr addrspace(8) poison, i32 %idx, i32 0, i32 0, i32 0) ret <4 x float> %r } @@ -2348,8 +2348,8 @@ define amdgpu_ps float @test_strict_wwm1(i32 inreg %idx0, i32 inreg %idx1) { ; GFX10-W32-NEXT: v_mov_b32_e32 v0, v1 ; GFX10-W32-NEXT: ; return to shader part epilog main_body: - %src0 = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) undef, i32 %idx0, i32 0, i32 0, i32 0) - %src1 = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) undef, i32 %idx1, i32 0, i32 0, i32 0) + %src0 = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) poison, i32 %idx0, i32 0, i32 0, i32 0) + %src1 = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) poison, i32 %idx1, i32 0, i32 0, i32 0) %out = fadd float %src0, %src1 %out.0 = call float @llvm.amdgcn.strict.wwm.f32(float %out) ret float %out.0 @@ -2385,8 +2385,8 @@ define amdgpu_ps float @test_strict_wwm2(i32 inreg %idx0, i32 inreg %idx1) { ; GFX10-W32-NEXT: v_mov_b32_e32 v0, v1 ; GFX10-W32-NEXT: ; return to shader part epilog main_body: - %src0 = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) undef, i32 %idx0, i32 0, i32 0, i32 0) - %src1 = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) undef, i32 %idx1, i32 0, i32 0, i32 0) + %src0 = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) poison, i32 %idx0, i32 0, i32 0, i32 0) + %src1 = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) poison, i32 %idx1, i32 0, i32 0, i32 0) %src0.0 = bitcast float %src0 to i32 %src1.0 = bitcast float %src1 to i32 %out = add i32 %src0.0, %src1.0 @@ -2450,7 +2450,7 @@ main_body: br i1 %cc, label %endif, label %if if: - %src = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) undef, i32 %idx, i32 0, i32 0, i32 0) + %src = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) poison, i32 %idx, i32 0, i32 0, i32 0) %out = fadd float %src, %src %out.0 = call float @llvm.amdgcn.strict.wwm.f32(float %out) %out.1 = fadd float %src, %out.0 @@ -2513,7 +2513,7 @@ main_body: br i1 %cc, label %endif, label %if if: - %src = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) undef, i32 %idx, i32 0, i32 0, i32 0) + %src = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) poison, i32 %idx, i32 0, i32 0, i32 0) %out = fadd float %src, %src %out.0 = call float @llvm.amdgcn.strict.wwm.f32(float %out) br label %endif @@ -2567,9 +2567,9 @@ define amdgpu_ps float @test_strict_wwm5(i32 inreg %idx0, i32 inreg %idx1) { ; GFX10-W32-NEXT: s_and_b32 exec_lo, exec_lo, s2 ; GFX10-W32-NEXT: ; return to shader part epilog main_body: - %src0 = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) undef, i32 %idx0, i32 0, i32 0, i32 0) - call void @llvm.amdgcn.struct.ptr.buffer.store.f32(float %src0, ptr addrspace(8) undef, i32 %idx0, i32 0, i32 0, i32 0) - %src1 = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) undef, i32 %idx1, i32 0, i32 0, i32 0) + %src0 = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) poison, i32 %idx0, i32 0, i32 0, i32 0) + call void @llvm.amdgcn.struct.ptr.buffer.store.f32(float %src0, ptr addrspace(8) poison, i32 %idx0, i32 0, i32 0, i32 0) + %src1 = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) poison, i32 %idx1, i32 0, i32 0, i32 0) %temp = fadd float %src1, %src1 %temp.0 = call float @llvm.amdgcn.strict.wwm.f32(float %temp) %out = fadd float %temp.0, %temp.0 @@ -2754,13 +2754,13 @@ define amdgpu_ps void @test_strict_wwm_set_inactive1(i32 inreg %idx) { ; GFX10-W32-NEXT: buffer_store_dword v2, v1, s[0:3], 0 idxen ; GFX10-W32-NEXT: s_endpgm main_body: - %src = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) undef, i32 %idx, i32 0, i32 0, i32 0) + %src = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) poison, i32 %idx, i32 0, i32 0, i32 0) %src.0 = bitcast float %src to i32 %src.1 = call i32 @llvm.amdgcn.set.inactive.i32(i32 %src.0, i32 0) %out = add i32 %src.1, %src.1 %out.0 = call i32 @llvm.amdgcn.strict.wwm.i32(i32 %out) %out.1 = bitcast i32 %out.0 to float - call void @llvm.amdgcn.struct.ptr.buffer.store.f32(float %out.1, ptr addrspace(8) undef, i32 %idx, i32 0, i32 0, i32 0) + call void @llvm.amdgcn.struct.ptr.buffer.store.f32(float %out.1, ptr addrspace(8) poison, i32 %idx, i32 0, i32 0, i32 0) ret void } _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits