https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/130903
>From aeb2f61c061c99871f66e2b1173ea7c25a23c0d5 Mon Sep 17 00:00:00 2001 From: Matt Arsenault <matthew.arsena...@amd.com> Date: Wed, 12 Mar 2025 13:23:32 +0700 Subject: [PATCH] AMDGPU: Replace <8 x i32> undef uses in tests with poison --- .../AMDGPU/adjust-writemask-invalid-copy.ll | 10 +++++----- llvm/test/CodeGen/AMDGPU/commute-shifts.ll | 2 +- .../test/CodeGen/AMDGPU/dagcombine-fma-fmad.ll | 14 +++++++------- llvm/test/CodeGen/AMDGPU/else.ll | 2 +- llvm/test/CodeGen/AMDGPU/img-nouse-adjust.ll | 2 +- llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll | 2 +- .../test/CodeGen/AMDGPU/llvm.amdgcn.ps.live.ll | 4 ++-- .../test/CodeGen/AMDGPU/mixed-wave32-wave64.ll | 2 +- llvm/test/CodeGen/AMDGPU/sgpr-copy.ll | 2 +- llvm/test/CodeGen/AMDGPU/skip-if-dead.ll | 4 ++-- .../CodeGen/AMDGPU/subreg-coalescer-crash.ll | 2 +- .../AMDGPU/undefined-subreg-liverange.ll | 2 +- .../CodeGen/AMDGPU/unigine-liveness-crash.ll | 12 ++++++------ .../CodeGen/AMDGPU/vgpr-tuple-allocation.ll | 6 +++--- llvm/test/CodeGen/AMDGPU/wave32.ll | 2 +- llvm/test/CodeGen/AMDGPU/wqm.ll | 18 +++++++++--------- 16 files changed, 43 insertions(+), 43 deletions(-) diff --git a/llvm/test/CodeGen/AMDGPU/adjust-writemask-invalid-copy.ll b/llvm/test/CodeGen/AMDGPU/adjust-writemask-invalid-copy.ll index b913b5c3ab746..dd85edf59b18f 100644 --- a/llvm/test/CodeGen/AMDGPU/adjust-writemask-invalid-copy.ll +++ b/llvm/test/CodeGen/AMDGPU/adjust-writemask-invalid-copy.ll @@ -7,7 +7,7 @@ ; GCN: buffer_store_dword v0 define amdgpu_ps void @adjust_writemask_crash_0_nochain() #0 { main_body: - %tmp = call <2 x float> @llvm.amdgcn.image.getlod.1d.v2f32.f32(i32 3, float undef, <8 x i32> undef, <4 x i32> poison, i1 0, i32 0, i32 0) + %tmp = call <2 x float> @llvm.amdgcn.image.getlod.1d.v2f32.f32(i32 3, float undef, <8 x i32> poison, <4 x i32> poison, i1 0, i32 0, i32 0) %tmp1 = bitcast <2 x float> %tmp to <2 x i32> %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison> %tmp3 = bitcast <4 x i32> %tmp2 to <4 x float> @@ -23,7 +23,7 @@ main_body: ; GCN: buffer_store_dword v0 define amdgpu_ps void @adjust_writemask_crash_1_nochain() #0 { main_body: - %tmp = call <2 x float> @llvm.amdgcn.image.getlod.1d.v2f32.f32(i32 3, float undef, <8 x i32> undef, <4 x i32> poison, i1 0, i32 0, i32 0) + %tmp = call <2 x float> @llvm.amdgcn.image.getlod.1d.v2f32.f32(i32 3, float undef, <8 x i32> poison, <4 x i32> poison, i1 0, i32 0, i32 0) %tmp1 = bitcast <2 x float> %tmp to <2 x i32> %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> poison, <4 x i32> <i32 1, i32 0, i32 poison, i32 poison> %tmp3 = bitcast <4 x i32> %tmp2 to <4 x float> @@ -39,7 +39,7 @@ main_body: ; GCN: buffer_store_dword v0 define amdgpu_ps void @adjust_writemask_crash_0_chain() #0 { main_body: - %tmp = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 3, float undef, <8 x i32> undef, <4 x i32> poison, i1 0, i32 0, i32 0) + %tmp = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 3, float undef, <8 x i32> poison, <4 x i32> poison, i1 0, i32 0, i32 0) %tmp1 = bitcast <2 x float> %tmp to <2 x i32> %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison> %tmp3 = bitcast <4 x i32> %tmp2 to <4 x float> @@ -55,7 +55,7 @@ main_body: ; GCN: buffer_store_dword v0 define amdgpu_ps void @adjust_writemask_crash_1_chain() #0 { main_body: - %tmp = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 3, float undef, <8 x i32> undef, <4 x i32> poison, i1 0, i32 0, i32 0) + %tmp = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 3, float undef, <8 x i32> poison, <4 x i32> poison, i1 0, i32 0, i32 0) %tmp1 = bitcast <2 x float> %tmp to <2 x i32> %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> poison, <4 x i32> <i32 1, i32 0, i32 poison, i32 poison> %tmp3 = bitcast <4 x i32> %tmp2 to <4 x float> @@ -66,7 +66,7 @@ main_body: define amdgpu_ps void @adjust_writemask_crash_0_v4() #0 { main_body: - %tmp = call <4 x float> @llvm.amdgcn.image.getlod.1d.v4f32.f32(i32 5, float undef, <8 x i32> undef, <4 x i32> poison, i1 0, i32 0, i32 0) + %tmp = call <4 x float> @llvm.amdgcn.image.getlod.1d.v4f32.f32(i32 5, float undef, <8 x i32> poison, <4 x i32> poison, i1 0, i32 0, i32 0) %tmp1 = bitcast <4 x float> %tmp to <4 x i32> %tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison> %tmp3 = bitcast <4 x i32> %tmp2 to <4 x float> diff --git a/llvm/test/CodeGen/AMDGPU/commute-shifts.ll b/llvm/test/CodeGen/AMDGPU/commute-shifts.ll index 849348a7be53d..1e40b4c9f04cf 100644 --- a/llvm/test/CodeGen/AMDGPU/commute-shifts.ll +++ b/llvm/test/CodeGen/AMDGPU/commute-shifts.ll @@ -30,7 +30,7 @@ define amdgpu_ps float @main(float %arg0, float %arg1) #0 { ; VI-NEXT: ; return to shader part epilog bb: %tmp = fptosi float %arg0 to i32 - %tmp1 = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 undef, <8 x i32> undef, i32 0, i32 0) + %tmp1 = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 undef, <8 x i32> poison, i32 0, i32 0) %tmp2.f = extractelement <4 x float> %tmp1, i32 0 %tmp2 = bitcast float %tmp2.f to i32 %tmp3 = and i32 %tmp, 7 diff --git a/llvm/test/CodeGen/AMDGPU/dagcombine-fma-fmad.ll b/llvm/test/CodeGen/AMDGPU/dagcombine-fma-fmad.ll index 646ea8a584f2b..4da5d727acb1b 100644 --- a/llvm/test/CodeGen/AMDGPU/dagcombine-fma-fmad.ll +++ b/llvm/test/CodeGen/AMDGPU/dagcombine-fma-fmad.ll @@ -143,7 +143,7 @@ define amdgpu_ps float @_amdgpu_ps_main() #0 { ; GFX11-NEXT: v_max_f32_e32 v0, 0, v1 ; GFX11-NEXT: ; return to shader part epilog .entry: - %0 = call <3 x float> @llvm.amdgcn.image.sample.2d.v3f32.f32(i32 7, float undef, float undef, <8 x i32> undef, <4 x i32> poison, i1 false, i32 0, i32 0) + %0 = call <3 x float> @llvm.amdgcn.image.sample.2d.v3f32.f32(i32 7, float undef, float undef, <8 x i32> poison, <4 x i32> poison, i1 false, i32 0, i32 0) %.i2243 = extractelement <3 x float> %0, i32 2 %1 = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> poison, i32 0, i32 0) %2 = shufflevector <3 x i32> %1, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> @@ -151,13 +151,13 @@ define amdgpu_ps float @_amdgpu_ps_main() #0 { %.i2248 = extractelement <4 x float> %3, i32 2 %.i2249 = fmul reassoc nnan nsz arcp contract afn float %.i2243, %.i2248 %4 = call reassoc nnan nsz arcp contract afn float @llvm.amdgcn.fmed3.f32(float undef, float 0.000000e+00, float 1.000000e+00) - %5 = call <3 x float> @llvm.amdgcn.image.sample.2d.v3f32.f32(i32 7, float undef, float undef, <8 x i32> undef, <4 x i32> poison, i1 false, i32 0, i32 0) + %5 = call <3 x float> @llvm.amdgcn.image.sample.2d.v3f32.f32(i32 7, float undef, float undef, <8 x i32> poison, <4 x i32> poison, i1 false, i32 0, i32 0) %.i2333 = extractelement <3 x float> %5, i32 2 %6 = call reassoc nnan nsz arcp contract afn float @llvm.amdgcn.fmed3.f32(float undef, float 0.000000e+00, float 1.000000e+00) - %7 = call <2 x float> @llvm.amdgcn.image.sample.2d.v2f32.f32(i32 3, float undef, float undef, <8 x i32> undef, <4 x i32> poison, i1 false, i32 0, i32 0) + %7 = call <2 x float> @llvm.amdgcn.image.sample.2d.v2f32.f32(i32 3, float undef, float undef, <8 x i32> poison, <4 x i32> poison, i1 false, i32 0, i32 0) %.i1408 = extractelement <2 x float> %7, i32 1 %.i0364 = extractelement <2 x float> %7, i32 0 - %8 = call float @llvm.amdgcn.image.sample.2d.f32.f32(i32 1, float undef, float undef, <8 x i32> undef, <4 x i32> poison, i1 false, i32 0, i32 0) + %8 = call float @llvm.amdgcn.image.sample.2d.f32.f32(i32 1, float undef, float undef, <8 x i32> poison, <4 x i32> poison, i1 false, i32 0, i32 0) %9 = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> poison, i32 112, i32 0) %10 = shufflevector <3 x i32> %9, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> %11 = bitcast <4 x i32> %10 to <4 x float> @@ -204,10 +204,10 @@ define amdgpu_ps float @_amdgpu_ps_main() #0 { %40 = fmul reassoc nnan nsz arcp contract afn float %39, 0x3F847AE140000000 %41 = fadd reassoc nnan nsz arcp contract afn float %40, 0x3F947AE140000000 %.i2415 = fmul reassoc nnan nsz arcp contract afn float %.i2407, %41 - %42 = call <3 x float> @llvm.amdgcn.image.load.mip.2d.v3f32.i32(i32 7, i32 undef, i32 undef, i32 0, <8 x i32> undef, i32 0, i32 0) + %42 = call <3 x float> @llvm.amdgcn.image.load.mip.2d.v3f32.i32(i32 7, i32 undef, i32 undef, i32 0, <8 x i32> poison, i32 0, i32 0) %.i2521 = extractelement <3 x float> %42, i32 2 %43 = call reassoc nnan nsz arcp contract afn float @llvm.amdgcn.fmed3.f32(float undef, float 0.000000e+00, float 1.000000e+00) - %44 = call <3 x float> @llvm.amdgcn.image.sample.2d.v3f32.f32(i32 7, float undef, float undef, <8 x i32> undef, <4 x i32> poison, i1 false, i32 0, i32 0) + %44 = call <3 x float> @llvm.amdgcn.image.sample.2d.v3f32.f32(i32 7, float undef, float undef, <8 x i32> poison, <4 x i32> poison, i1 false, i32 0, i32 0) %.i2465 = extractelement <3 x float> %44, i32 2 %.i2466 = fmul reassoc nnan nsz arcp contract afn float %.i2465, %43 %.i2469 = fmul reassoc nnan nsz arcp contract afn float %.i2415, %.i2466 @@ -224,7 +224,7 @@ define amdgpu_ps float @_amdgpu_ps_main() #0 { %.i2488 = fmul reassoc nnan nsz arcp contract afn float %.i2249, %18 %.i2491 = fmul reassoc nnan nsz arcp contract afn float %.i2485, %4 %.i2494 = fadd reassoc nnan nsz arcp contract afn float %.i2479, %.i2491 - %51 = call <3 x float> @llvm.amdgcn.image.sample.2d.v3f32.f32(i32 7, float undef, float undef, <8 x i32> undef, <4 x i32> poison, i1 false, i32 0, i32 0) + %51 = call <3 x float> @llvm.amdgcn.image.sample.2d.v3f32.f32(i32 7, float undef, float undef, <8 x i32> poison, <4 x i32> poison, i1 false, i32 0, i32 0) %.i2515 = extractelement <3 x float> %51, i32 2 %.i2516 = fadd reassoc nnan nsz arcp contract afn float %.i2515, %.i2494 %.i2522 = fadd reassoc nnan nsz arcp contract afn float %.i2521, %.i2516 diff --git a/llvm/test/CodeGen/AMDGPU/else.ll b/llvm/test/CodeGen/AMDGPU/else.ll index aa9bd0fa4d618..4a3018e67b17d 100644 --- a/llvm/test/CodeGen/AMDGPU/else.ll +++ b/llvm/test/CodeGen/AMDGPU/else.ll @@ -41,7 +41,7 @@ if: else: %c = fmul float %v, 3.0 - %tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %c, <8 x i32> undef, <4 x i32> poison, i1 0, i32 0, i32 0) + %tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %c, <8 x i32> poison, <4 x i32> poison, i1 0, i32 0, i32 0) %v.else = extractelement <4 x float> %tex, i32 0 br label %end diff --git a/llvm/test/CodeGen/AMDGPU/img-nouse-adjust.ll b/llvm/test/CodeGen/AMDGPU/img-nouse-adjust.ll index 6d1adb9c1f87d..cd410ab8bc143 100644 --- a/llvm/test/CodeGen/AMDGPU/img-nouse-adjust.ll +++ b/llvm/test/CodeGen/AMDGPU/img-nouse-adjust.ll @@ -11,7 +11,7 @@ ; GCN: IMAGE_LOAD_V4_V2 define amdgpu_cs void @_amdgpu_cs_main(i32 %dummy) local_unnamed_addr #0 { .entry: - %unused.result = tail call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 15, i32 undef, i32 undef, <8 x i32> undef, i32 0, i32 0) #3 + %unused.result = tail call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 15, i32 undef, i32 undef, <8 x i32> poison, i32 0, i32 0) #3 call void asm sideeffect ";", "" () #0 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll index 8720fda9646e2..95447736611f5 100644 --- a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll +++ b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll @@ -390,7 +390,7 @@ define <4 x float> @insertelement_to_sgpr() nounwind { ; GCN-NEXT: s_setpc_b64 s[30:31] %tmp = load <4 x i32>, ptr addrspace(4) undef %tmp1 = insertelement <4 x i32> %tmp, i32 0, i32 0 - %tmp2 = call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f32(i32 1, float undef, float undef, <8 x i32> undef, <4 x i32> %tmp1, i1 0, i32 0, i32 0) + %tmp2 = call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f32(i32 1, float undef, float undef, <8 x i32> poison, <4 x i32> %tmp1, i1 0, i32 0, i32 0) ret <4 x float> %tmp2 } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ps.live.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ps.live.ll index 107e0a5450a4c..14109391e141a 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ps.live.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ps.live.ll @@ -28,7 +28,7 @@ define amdgpu_ps float @test2() #0 { %live = call i1 @llvm.amdgcn.ps.live() %live.32 = zext i1 %live to i32 %live.32.bc = bitcast i32 %live.32 to float - %t = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %live.32.bc, <8 x i32> undef, <4 x i32> poison, i1 0, i32 0, i32 0) + %t = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %live.32.bc, <8 x i32> poison, <4 x i32> poison, i1 0, i32 0, i32 0) %r = extractelement <4 x float> %t, i32 0 ret float %r } @@ -51,7 +51,7 @@ dead: end: %tc = phi i32 [ %in, %entry ], [ %tc.dead, %dead ] %tc.bc = bitcast i32 %tc to float - %t = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %tc.bc, <8 x i32> undef, <4 x i32> poison, i1 0, i32 0, i32 0) #0 + %t = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %tc.bc, <8 x i32> poison, <4 x i32> poison, i1 0, i32 0, i32 0) #0 %r = extractelement <4 x float> %t, i32 0 ret float %r } diff --git a/llvm/test/CodeGen/AMDGPU/mixed-wave32-wave64.ll b/llvm/test/CodeGen/AMDGPU/mixed-wave32-wave64.ll index a59d1d2b6ae97..c55bdf2c79ef6 100644 --- a/llvm/test/CodeGen/AMDGPU/mixed-wave32-wave64.ll +++ b/llvm/test/CodeGen/AMDGPU/mixed-wave32-wave64.ll @@ -13,7 +13,7 @@ define amdgpu_hs void @_amdgpu_hs_main() #0 { define amdgpu_ps void @_amdgpu_ps_main(i32 %arg) local_unnamed_addr #1 { .entry: %tmp = tail call float @llvm.amdgcn.interp.p2(float undef, float undef, i32 1, i32 0, i32 %arg) #2 - %tmp1 = tail call float @llvm.amdgcn.image.sample.2d.f32.f32(i32 1, float undef, float %tmp, <8 x i32> undef, <4 x i32> poison, i1 false, i32 0, i32 0) + %tmp1 = tail call float @llvm.amdgcn.image.sample.2d.f32.f32(i32 1, float undef, float %tmp, <8 x i32> poison, <4 x i32> poison, i1 false, i32 0, i32 0) %tmp2 = fcmp olt float %tmp1, 5.000000e-01 br i1 %tmp2, label %bb, label %l diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-copy.ll b/llvm/test/CodeGen/AMDGPU/sgpr-copy.ll index f99f85a718253..8c10353b11259 100644 --- a/llvm/test/CodeGen/AMDGPU/sgpr-copy.ll +++ b/llvm/test/CodeGen/AMDGPU/sgpr-copy.ll @@ -383,7 +383,7 @@ bb: %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 %tmp7 = getelementptr [17 x <4 x i32>], ptr addrspace(4) %arg, i32 0, i32 %tid %tmp8 = load <4 x i32>, ptr addrspace(4) %tmp7, align 16, !tbaa !0 - %tmp = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float 7.500000e-01, float 2.500000e-01, <8 x i32> undef, <4 x i32> %tmp8, i1 0, i32 0, i32 0) + %tmp = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float 7.500000e-01, float 2.500000e-01, <8 x i32> poison, <4 x i32> %tmp8, i1 0, i32 0, i32 0) %tmp10 = extractelement <4 x float> %tmp, i32 0 %tmp12 = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %tmp10, float undef) call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> %tmp12, <2 x half> undef, i1 true, i1 true) #0 diff --git a/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll b/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll index d624d6d7d9190..434f266e91d25 100644 --- a/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll +++ b/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll @@ -1532,7 +1532,7 @@ bb3: ; preds = %bb br label %bb4 bb4: ; preds = %bb3, %bb - %tmp5 = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32 16, float %arg2, float %arg3, <8 x i32> undef, <4 x i32> poison, i1 0, i32 0, i32 0) + %tmp5 = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32 16, float %arg2, float %arg3, <8 x i32> poison, <4 x i32> poison, i1 0, i32 0, i32 0) %tmp6 = extractelement <4 x float> %tmp5, i32 0 %tmp7 = fcmp une float %tmp6, 0.000000e+00 br i1 %tmp7, label %bb8, label %bb9 @@ -1677,7 +1677,7 @@ define amdgpu_ps void @cbranch_kill(i32 inreg %0, float %val0, float %val1) { ; GFX11-NEXT: exp mrt0 off, off, off, off done ; GFX11-NEXT: s_endpgm .entry: - %sample = call float @llvm.amdgcn.image.sample.l.2darray.f32.f32(i32 1, float %val1, float %val1, float %val1, float 0.000000e+00, <8 x i32> undef, <4 x i32> poison, i1 false, i32 0, i32 0) + %sample = call float @llvm.amdgcn.image.sample.l.2darray.f32.f32(i32 1, float %val1, float %val1, float %val1, float 0.000000e+00, <8 x i32> poison, <4 x i32> poison, i1 false, i32 0, i32 0) %cond0 = fcmp ugt float %sample, 0.000000e+00 br i1 %cond0, label %live, label %kill diff --git a/llvm/test/CodeGen/AMDGPU/subreg-coalescer-crash.ll b/llvm/test/CodeGen/AMDGPU/subreg-coalescer-crash.ll index 30af34c2ce807..07cbbdaabcb6c 100644 --- a/llvm/test/CodeGen/AMDGPU/subreg-coalescer-crash.ll +++ b/llvm/test/CodeGen/AMDGPU/subreg-coalescer-crash.ll @@ -65,7 +65,7 @@ bb7: ; preds = %bb6 br label %bb4 bb9: ; preds = %bb2 - %tmp10 = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float undef, <8 x i32> undef, <4 x i32> poison, i1 0, i32 0, i32 0) + %tmp10 = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float undef, <8 x i32> poison, <4 x i32> poison, i1 0, i32 0, i32 0) %tmp11 = extractelement <4 x float> %tmp10, i32 1 %tmp12 = extractelement <4 x float> %tmp10, i32 3 br label %bb14 diff --git a/llvm/test/CodeGen/AMDGPU/undefined-subreg-liverange.ll b/llvm/test/CodeGen/AMDGPU/undefined-subreg-liverange.ll index 33f59142f1913..5496a95d2b737 100644 --- a/llvm/test/CodeGen/AMDGPU/undefined-subreg-liverange.ll +++ b/llvm/test/CodeGen/AMDGPU/undefined-subreg-liverange.ll @@ -71,7 +71,7 @@ bb: %tmp1 = load volatile i32, ptr addrspace(1) poison, align 4 %tmp2 = insertelement <4 x i32> poison, i32 %tmp1, i32 0 %tmp3 = bitcast i32 %tmp1 to float - %tmp4 = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %tmp3, float %tmp3, <8 x i32> undef, <4 x i32> poison, i1 0, i32 0, i32 0) + %tmp4 = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %tmp3, float %tmp3, <8 x i32> poison, <4 x i32> poison, i1 0, i32 0, i32 0) %tmp5 = extractelement <4 x float> %tmp4, i32 0 %tmp6 = fmul float %tmp5, undef %tmp7 = fadd float %tmp6, %tmp6 diff --git a/llvm/test/CodeGen/AMDGPU/unigine-liveness-crash.ll b/llvm/test/CodeGen/AMDGPU/unigine-liveness-crash.ll index e4bff8549aa57..f5e9f706a849b 100644 --- a/llvm/test/CodeGen/AMDGPU/unigine-liveness-crash.ll +++ b/llvm/test/CodeGen/AMDGPU/unigine-liveness-crash.ll @@ -17,20 +17,20 @@ main_body: %j.f.i = bitcast i32 %j.i to float %p1.i = call float @llvm.amdgcn.interp.p1(float %i.f.i, i32 3, i32 4, i32 %arg6) #2 %p2.i = call float @llvm.amdgcn.interp.p2(float %p1.i, float %j.f.i, i32 3, i32 4, i32 %arg6) #2 - %tmp23 = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float undef, float undef, <8 x i32> undef, <4 x i32> poison, i1 0, i32 0, i32 0) + %tmp23 = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float undef, float undef, <8 x i32> poison, <4 x i32> poison, i1 0, i32 0, i32 0) %tmp24 = extractelement <4 x float> %tmp23, i32 3 %tmp25 = fmul float %tmp24, %tmp24 %tmp26 = fmul float %p2.i, %p2.i %tmp27 = fadd float %tmp26, %tmp26 - %tmp32 = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32 15, float %tmp27, float 0.0, float undef, <8 x i32> undef, <4 x i32> poison, i1 0, i32 0, i32 0) + %tmp32 = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32 15, float %tmp27, float 0.0, float undef, <8 x i32> poison, <4 x i32> poison, i1 0, i32 0, i32 0) %tmp33 = extractelement <4 x float> %tmp32, i32 0 %tmp34 = fadd float %tmp33, %tmp33 %tmp35 = fadd float %tmp34, %tmp34 %tmp36 = fadd float %tmp35, %tmp35 %tmp37 = fadd float %tmp36, %tmp36 %tmp38 = fadd float %tmp37, %tmp37 - %tmp39 = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float undef, float undef, <8 x i32> undef, <4 x i32> poison, i1 0, i32 0, i32 0) + %tmp39 = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float undef, float undef, <8 x i32> poison, <4 x i32> poison, i1 0, i32 0, i32 0) %tmp40 = extractelement <4 x float> %tmp39, i32 0 %tmp41 = extractelement <4 x float> %tmp39, i32 1 %tmp42 = extractelement <4 x float> %tmp39, i32 2 @@ -48,12 +48,12 @@ main_body: %tmp54 = insertelement <4 x i32> %tmp53, i32 %tmp51, i32 1 %tmp55 = insertelement <4 x i32> %tmp54, i32 %tmp52, i32 2 %tmp55.cast = bitcast <4 x i32> %tmp55 to <4 x float> - %tmp56 = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32 15, float %tmp27, float %tmp48, float %tmp49, <8 x i32> undef, <4 x i32> poison, i1 0, i32 0, i32 0) + %tmp56 = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32 15, float %tmp27, float %tmp48, float %tmp49, <8 x i32> poison, <4 x i32> poison, i1 0, i32 0, i32 0) %tmp57 = extractelement <4 x float> %tmp56, i32 0 %tmp58 = fadd float %tmp38, %tmp57 %tmp59 = fadd float %tmp46, %tmp46 %tmp60 = fadd float %tmp47, %tmp47 - %tmp65 = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32 15, float undef, float %tmp59, float %tmp60, <8 x i32> undef, <4 x i32> poison, i1 0, i32 0, i32 0) + %tmp65 = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32 15, float undef, float %tmp59, float %tmp60, <8 x i32> poison, <4 x i32> poison, i1 0, i32 0, i32 0) %tmp66 = extractelement <4 x float> %tmp65, i32 0 %tmp67 = fadd float %tmp58, %tmp66 %tmp68 = fmul float %tmp67, 1.250000e-01 @@ -91,7 +91,7 @@ IF29: ; preds = %LOOP br label %ENDIF25 ENDIF28: ; preds = %LOOP - %tmp87 = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32 15, float %tmp27, float undef, float undef, <8 x i32> undef, <4 x i32> poison, i1 0, i32 0, i32 0) + %tmp87 = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32 15, float %tmp27, float undef, float undef, <8 x i32> poison, <4 x i32> poison, i1 0, i32 0, i32 0) %tmp88 = extractelement <4 x float> %tmp87, i32 0 %tmp89 = fadd float %tmp88, %tmp88 br label %LOOP diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-tuple-allocation.ll b/llvm/test/CodeGen/AMDGPU/vgpr-tuple-allocation.ll index af14cb00c323f..ebf6bd1d7f7f4 100644 --- a/llvm/test/CodeGen/AMDGPU/vgpr-tuple-allocation.ll +++ b/llvm/test/CodeGen/AMDGPU/vgpr-tuple-allocation.ll @@ -188,7 +188,7 @@ main_body: call void asm sideeffect "", "~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15}"() #0 call void asm sideeffect "", "~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23}"() #0 call void asm sideeffect "", "~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() #0 - %v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.2d.v4f32.f32.f32(i32 1, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> undef, <4 x i32> poison, i1 false, i32 0, i32 0) + %v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.2d.v4f32.f32.f32(i32 1, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> poison, <4 x i32> poison, i1 false, i32 0, i32 0) call void @extern_func() ret <4 x float> %v } @@ -353,10 +353,10 @@ define <4 x float> @call_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp, main_body: - %v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.2d.v4f32.f32.f32(i32 1, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> undef, <4 x i32> poison, i1 false, i32 0, i32 0) + %v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.2d.v4f32.f32.f32(i32 1, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> poison, <4 x i32> poison, i1 false, i32 0, i32 0) store <4 x float> %v, ptr addrspace(1) poison call void @extern_func() - %v1 = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.2d.v4f32.f32.f32(i32 1, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> undef, <4 x i32> poison, i1 false, i32 0, i32 0) + %v1 = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.2d.v4f32.f32.f32(i32 1, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> poison, <4 x i32> poison, i1 false, i32 0, i32 0) ret <4 x float> %v1 } diff --git a/llvm/test/CodeGen/AMDGPU/wave32.ll b/llvm/test/CodeGen/AMDGPU/wave32.ll index e695ad0e902f7..94c4eebe32281 100644 --- a/llvm/test/CodeGen/AMDGPU/wave32.ll +++ b/llvm/test/CodeGen/AMDGPU/wave32.ll @@ -1875,7 +1875,7 @@ loop: body: %c.iv0 = extractelement <4 x float> %c.iv, i32 0 - %c.next = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %c.iv0, <8 x i32> undef, <4 x i32> poison, i1 0, i32 0, i32 0) + %c.next = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %c.iv0, <8 x i32> poison, <4 x i32> poison, i1 0, i32 0, i32 0) %ctr.next = fadd float %ctr.iv, 2.0 br label %loop diff --git a/llvm/test/CodeGen/AMDGPU/wqm.ll b/llvm/test/CodeGen/AMDGPU/wqm.ll index a7ae7e1732967..3845db2edf73f 100644 --- a/llvm/test/CodeGen/AMDGPU/wqm.ll +++ b/llvm/test/CodeGen/AMDGPU/wqm.ll @@ -1994,7 +1994,7 @@ define amdgpu_ps <4 x float> @test_loop_vcc(<4 x float> %in) nounwind { ; GFX10-W32-NEXT: v_mov_b32_e32 v3, v7 ; GFX10-W32-NEXT: ; return to shader part epilog entry: - call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %in, i32 15, i32 undef, <8 x i32> undef, i32 0, i32 0) + call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %in, i32 15, i32 undef, <8 x i32> poison, i32 0, i32 0) br label %loop loop: @@ -2005,7 +2005,7 @@ loop: body: %c.iv0 = extractelement <4 x float> %c.iv, i32 0 - %c.next = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %c.iv0, <8 x i32> undef, <4 x i32> poison, i1 false, i32 0, i32 0) #0 + %c.next = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %c.iv0, <8 x i32> poison, <4 x i32> poison, i1 false, i32 0, i32 0) #0 %ctr.next = fadd float %ctr.iv, 2.0 br label %loop @@ -2080,7 +2080,7 @@ entry: %c.gep = getelementptr [32 x i32], ptr addrspace(5) %array, i32 0, i32 %idx %c = load i32, ptr addrspace(5) %c.gep, align 4 %c.bc = bitcast i32 %c to float - %t = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %c.bc, <8 x i32> undef, <4 x i32> poison, i1 false, i32 0, i32 0) #0 + %t = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %c.bc, <8 x i32> poison, <4 x i32> poison, i1 false, i32 0, i32 0) #0 call void @llvm.amdgcn.raw.ptr.buffer.store.v4f32(<4 x float> %t, ptr addrspace(8) undef, i32 0, i32 0, i32 0) ret void @@ -2112,9 +2112,9 @@ define amdgpu_ps <4 x float> @test_nonvoid_return() nounwind { ; GFX10-W32-NEXT: image_sample v[0:3], v0, s[0:7], s[0:3] dmask:0xf dim:SQ_RSRC_IMG_1D ; GFX10-W32-NEXT: s_waitcnt vmcnt(0) ; GFX10-W32-NEXT: ; return to shader part epilog - %tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float undef, <8 x i32> undef, <4 x i32> poison, i1 false, i32 0, i32 0) #0 + %tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float undef, <8 x i32> poison, <4 x i32> poison, i1 false, i32 0, i32 0) #0 %tex0 = extractelement <4 x float> %tex, i32 0 - %dtex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %tex0, <8 x i32> undef, <4 x i32> poison, i1 false, i32 0, i32 0) #0 + %dtex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %tex0, <8 x i32> poison, <4 x i32> poison, i1 false, i32 0, i32 0) #0 ret <4 x float> %dtex } @@ -2155,9 +2155,9 @@ define amdgpu_ps <4 x float> @test_nonvoid_return_unreachable(i32 inreg %c) noun ; GFX10-W32-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-W32-NEXT: .LBB38_3: entry: - %tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float undef, <8 x i32> undef, <4 x i32> poison, i1 false, i32 0, i32 0) #0 + %tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float undef, <8 x i32> poison, <4 x i32> poison, i1 false, i32 0, i32 0) #0 %tex0 = extractelement <4 x float> %tex, i32 0 - %dtex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %tex0, <8 x i32> undef, <4 x i32> poison, i1 false, i32 0, i32 0) #0 + %dtex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %tex0, <8 x i32> poison, <4 x i32> poison, i1 false, i32 0, i32 0) #0 %cc = icmp sgt i32 %c, 0 br i1 %cc, label %if, label %else @@ -2227,11 +2227,11 @@ main_body: br i1 %cc, label %if, label %else if: - %r.if = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float 0.0, <8 x i32> undef, <4 x i32> poison, i1 false, i32 0, i32 0) #0 + %r.if = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float 0.0, <8 x i32> poison, <4 x i32> poison, i1 false, i32 0, i32 0) #0 br label %end else: - %r.else = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float 0.0, float bitcast (i32 1 to float), <8 x i32> undef, <4 x i32> poison, i1 false, i32 0, i32 0) #0 + %r.else = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float 0.0, float bitcast (i32 1 to float), <8 x i32> poison, <4 x i32> poison, i1 false, i32 0, i32 0) #0 br label %end end: _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits