[llvm-branch-commits] [llvm] AMDGPU: Replace <8 x i32> undef uses in tests with poison (PR #130903)

Matt Arsenault via llvm-branch-commits Wed, 12 Mar 2025 06:49:21 -0700

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/130903


>From aeb2f61c061c99871f66e2b1173ea7c25a23c0d5 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <matthew.arsena...@amd.com>
Date: Wed, 12 Mar 2025 13:23:32 +0700
Subject: [PATCH] AMDGPU: Replace <8 x i32> undef uses in tests with poison

---
 .../AMDGPU/adjust-writemask-invalid-copy.ll    | 10 +++++-----
 llvm/test/CodeGen/AMDGPU/commute-shifts.ll     |  2 +-
 .../test/CodeGen/AMDGPU/dagcombine-fma-fmad.ll | 14 +++++++-------
 llvm/test/CodeGen/AMDGPU/else.ll               |  2 +-
 llvm/test/CodeGen/AMDGPU/img-nouse-adjust.ll   |  2 +-
 llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll  |  2 +-
 .../test/CodeGen/AMDGPU/llvm.amdgcn.ps.live.ll |  4 ++--
 .../test/CodeGen/AMDGPU/mixed-wave32-wave64.ll |  2 +-
 llvm/test/CodeGen/AMDGPU/sgpr-copy.ll          |  2 +-
 llvm/test/CodeGen/AMDGPU/skip-if-dead.ll       |  4 ++--
 .../CodeGen/AMDGPU/subreg-coalescer-crash.ll   |  2 +-
 .../AMDGPU/undefined-subreg-liverange.ll       |  2 +-
 .../CodeGen/AMDGPU/unigine-liveness-crash.ll   | 12 ++++++------
 .../CodeGen/AMDGPU/vgpr-tuple-allocation.ll    |  6 +++---
 llvm/test/CodeGen/AMDGPU/wave32.ll             |  2 +-
 llvm/test/CodeGen/AMDGPU/wqm.ll                | 18 +++++++++---------
 16 files changed, 43 insertions(+), 43 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/adjust-writemask-invalid-copy.ll 
b/llvm/test/CodeGen/AMDGPU/adjust-writemask-invalid-copy.ll
index b913b5c3ab746..dd85edf59b18f 100644
--- a/llvm/test/CodeGen/AMDGPU/adjust-writemask-invalid-copy.ll
+++ b/llvm/test/CodeGen/AMDGPU/adjust-writemask-invalid-copy.ll
@@ -7,7 +7,7 @@
 ; GCN: buffer_store_dword v0
 define amdgpu_ps void @adjust_writemask_crash_0_nochain() #0 {
 main_body:
-  %tmp = call <2 x float> @llvm.amdgcn.image.getlod.1d.v2f32.f32(i32 3, float 
undef, <8 x i32> undef, <4 x i32> poison, i1 0, i32 0, i32 0)
+  %tmp = call <2 x float> @llvm.amdgcn.image.getlod.1d.v2f32.f32(i32 3, float 
undef, <8 x i32> poison, <4 x i32> poison, i1 0, i32 0, i32 0)
   %tmp1 = bitcast <2 x float> %tmp to <2 x i32>
   %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> poison, <4 x i32> <i32 1, 
i32 poison, i32 poison, i32 poison>
   %tmp3 = bitcast <4 x i32> %tmp2 to <4 x float>
@@ -23,7 +23,7 @@ main_body:
 ; GCN: buffer_store_dword v0
 define amdgpu_ps void @adjust_writemask_crash_1_nochain() #0 {
 main_body:
-  %tmp = call <2 x float> @llvm.amdgcn.image.getlod.1d.v2f32.f32(i32 3, float 
undef, <8 x i32> undef, <4 x i32> poison, i1 0, i32 0, i32 0)
+  %tmp = call <2 x float> @llvm.amdgcn.image.getlod.1d.v2f32.f32(i32 3, float 
undef, <8 x i32> poison, <4 x i32> poison, i1 0, i32 0, i32 0)
   %tmp1 = bitcast <2 x float> %tmp to <2 x i32>
   %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> poison, <4 x i32> <i32 1, 
i32 0, i32 poison, i32 poison>
   %tmp3 = bitcast <4 x i32> %tmp2 to <4 x float>
@@ -39,7 +39,7 @@ main_body:
 ; GCN: buffer_store_dword v0
 define amdgpu_ps void @adjust_writemask_crash_0_chain() #0 {
 main_body:
-  %tmp = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 3, float 
undef, <8 x i32> undef, <4 x i32> poison, i1 0, i32 0, i32 0)
+  %tmp = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 3, float 
undef, <8 x i32> poison, <4 x i32> poison, i1 0, i32 0, i32 0)
   %tmp1 = bitcast <2 x float> %tmp to <2 x i32>
   %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> poison, <4 x i32> <i32 1, 
i32 poison, i32 poison, i32 poison>
   %tmp3 = bitcast <4 x i32> %tmp2 to <4 x float>
@@ -55,7 +55,7 @@ main_body:
 ; GCN: buffer_store_dword v0
 define amdgpu_ps void @adjust_writemask_crash_1_chain() #0 {
 main_body:
-  %tmp = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 3, float 
undef, <8 x i32> undef, <4 x i32> poison, i1 0, i32 0, i32 0)
+  %tmp = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 3, float 
undef, <8 x i32> poison, <4 x i32> poison, i1 0, i32 0, i32 0)
   %tmp1 = bitcast <2 x float> %tmp to <2 x i32>
   %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> poison, <4 x i32> <i32 1, 
i32 0, i32 poison, i32 poison>
   %tmp3 = bitcast <4 x i32> %tmp2 to <4 x float>
@@ -66,7 +66,7 @@ main_body:
 
 define amdgpu_ps void @adjust_writemask_crash_0_v4() #0 {
 main_body:
-  %tmp = call <4 x float> @llvm.amdgcn.image.getlod.1d.v4f32.f32(i32 5, float 
undef, <8 x i32> undef, <4 x i32> poison, i1 0, i32 0, i32 0)
+  %tmp = call <4 x float> @llvm.amdgcn.image.getlod.1d.v4f32.f32(i32 5, float 
undef, <8 x i32> poison, <4 x i32> poison, i1 0, i32 0, i32 0)
   %tmp1 = bitcast <4 x float> %tmp to <4 x i32>
   %tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> poison, <4 x i32> <i32 1, 
i32 poison, i32 poison, i32 poison>
   %tmp3 = bitcast <4 x i32> %tmp2 to <4 x float>
diff --git a/llvm/test/CodeGen/AMDGPU/commute-shifts.ll 
b/llvm/test/CodeGen/AMDGPU/commute-shifts.ll
index 849348a7be53d..1e40b4c9f04cf 100644
--- a/llvm/test/CodeGen/AMDGPU/commute-shifts.ll
+++ b/llvm/test/CodeGen/AMDGPU/commute-shifts.ll
@@ -30,7 +30,7 @@ define amdgpu_ps float @main(float %arg0, float %arg1) #0 {
 ; VI-NEXT:    ; return to shader part epilog
 bb:
   %tmp = fptosi float %arg0 to i32
-  %tmp1 = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 
undef, <8 x i32> undef, i32 0, i32 0)
+  %tmp1 = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 
undef, <8 x i32> poison, i32 0, i32 0)
   %tmp2.f = extractelement <4 x float> %tmp1, i32 0
   %tmp2 = bitcast float %tmp2.f to i32
   %tmp3 = and i32 %tmp, 7
diff --git a/llvm/test/CodeGen/AMDGPU/dagcombine-fma-fmad.ll 
b/llvm/test/CodeGen/AMDGPU/dagcombine-fma-fmad.ll
index 646ea8a584f2b..4da5d727acb1b 100644
--- a/llvm/test/CodeGen/AMDGPU/dagcombine-fma-fmad.ll
+++ b/llvm/test/CodeGen/AMDGPU/dagcombine-fma-fmad.ll
@@ -143,7 +143,7 @@ define amdgpu_ps float @_amdgpu_ps_main() #0 {
 ; GFX11-NEXT:    v_max_f32_e32 v0, 0, v1
 ; GFX11-NEXT:    ; return to shader part epilog
 .entry:
-  %0 = call <3 x float> @llvm.amdgcn.image.sample.2d.v3f32.f32(i32 7, float 
undef, float undef, <8 x i32> undef, <4 x i32> poison, i1 false, i32 0, i32 0)
+  %0 = call <3 x float> @llvm.amdgcn.image.sample.2d.v3f32.f32(i32 7, float 
undef, float undef, <8 x i32> poison, <4 x i32> poison, i1 false, i32 0, i32 0)
   %.i2243 = extractelement <3 x float> %0, i32 2
   %1 = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> poison, i32 
0, i32 0)
   %2 = shufflevector <3 x i32> %1, <3 x i32> poison, <4 x i32> <i32 0, i32 1, 
i32 2, i32 poison>
@@ -151,13 +151,13 @@ define amdgpu_ps float @_amdgpu_ps_main() #0 {
   %.i2248 = extractelement <4 x float> %3, i32 2
   %.i2249 = fmul reassoc nnan nsz arcp contract afn float %.i2243, %.i2248
   %4 = call reassoc nnan nsz arcp contract afn float 
@llvm.amdgcn.fmed3.f32(float undef, float 0.000000e+00, float 1.000000e+00)
-  %5 = call <3 x float> @llvm.amdgcn.image.sample.2d.v3f32.f32(i32 7, float 
undef, float undef, <8 x i32> undef, <4 x i32> poison, i1 false, i32 0, i32 0)
+  %5 = call <3 x float> @llvm.amdgcn.image.sample.2d.v3f32.f32(i32 7, float 
undef, float undef, <8 x i32> poison, <4 x i32> poison, i1 false, i32 0, i32 0)
   %.i2333 = extractelement <3 x float> %5, i32 2
   %6 = call reassoc nnan nsz arcp contract afn float 
@llvm.amdgcn.fmed3.f32(float undef, float 0.000000e+00, float 1.000000e+00)
-  %7 = call <2 x float> @llvm.amdgcn.image.sample.2d.v2f32.f32(i32 3, float 
undef, float undef, <8 x i32> undef, <4 x i32> poison, i1 false, i32 0, i32 0)
+  %7 = call <2 x float> @llvm.amdgcn.image.sample.2d.v2f32.f32(i32 3, float 
undef, float undef, <8 x i32> poison, <4 x i32> poison, i1 false, i32 0, i32 0)
   %.i1408 = extractelement <2 x float> %7, i32 1
   %.i0364 = extractelement <2 x float> %7, i32 0
-  %8 = call float @llvm.amdgcn.image.sample.2d.f32.f32(i32 1, float undef, 
float undef, <8 x i32> undef, <4 x i32> poison, i1 false, i32 0, i32 0)
+  %8 = call float @llvm.amdgcn.image.sample.2d.f32.f32(i32 1, float undef, 
float undef, <8 x i32> poison, <4 x i32> poison, i1 false, i32 0, i32 0)
   %9 = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> poison, i32 
112, i32 0)
   %10 = shufflevector <3 x i32> %9, <3 x i32> poison, <4 x i32> <i32 0, i32 1, 
i32 2, i32 poison>
   %11 = bitcast <4 x i32> %10 to <4 x float>
@@ -204,10 +204,10 @@ define amdgpu_ps float @_amdgpu_ps_main() #0 {
   %40 = fmul reassoc nnan nsz arcp contract afn float %39, 0x3F847AE140000000
   %41 = fadd reassoc nnan nsz arcp contract afn float %40, 0x3F947AE140000000
   %.i2415 = fmul reassoc nnan nsz arcp contract afn float %.i2407, %41
-  %42 = call <3 x float> @llvm.amdgcn.image.load.mip.2d.v3f32.i32(i32 7, i32 
undef, i32 undef, i32 0, <8 x i32> undef, i32 0, i32 0)
+  %42 = call <3 x float> @llvm.amdgcn.image.load.mip.2d.v3f32.i32(i32 7, i32 
undef, i32 undef, i32 0, <8 x i32> poison, i32 0, i32 0)
   %.i2521 = extractelement <3 x float> %42, i32 2
   %43 = call reassoc nnan nsz arcp contract afn float 
@llvm.amdgcn.fmed3.f32(float undef, float 0.000000e+00, float 1.000000e+00)
-  %44 = call <3 x float> @llvm.amdgcn.image.sample.2d.v3f32.f32(i32 7, float 
undef, float undef, <8 x i32> undef, <4 x i32> poison, i1 false, i32 0, i32 0)
+  %44 = call <3 x float> @llvm.amdgcn.image.sample.2d.v3f32.f32(i32 7, float 
undef, float undef, <8 x i32> poison, <4 x i32> poison, i1 false, i32 0, i32 0)
   %.i2465 = extractelement <3 x float> %44, i32 2
   %.i2466 = fmul reassoc nnan nsz arcp contract afn float %.i2465, %43
   %.i2469 = fmul reassoc nnan nsz arcp contract afn float %.i2415, %.i2466
@@ -224,7 +224,7 @@ define amdgpu_ps float @_amdgpu_ps_main() #0 {
   %.i2488 = fmul reassoc nnan nsz arcp contract afn float %.i2249, %18
   %.i2491 = fmul reassoc nnan nsz arcp contract afn float %.i2485, %4
   %.i2494 = fadd reassoc nnan nsz arcp contract afn float %.i2479, %.i2491
-  %51 = call <3 x float> @llvm.amdgcn.image.sample.2d.v3f32.f32(i32 7, float 
undef, float undef, <8 x i32> undef, <4 x i32> poison, i1 false, i32 0, i32 0)
+  %51 = call <3 x float> @llvm.amdgcn.image.sample.2d.v3f32.f32(i32 7, float 
undef, float undef, <8 x i32> poison, <4 x i32> poison, i1 false, i32 0, i32 0)
   %.i2515 = extractelement <3 x float> %51, i32 2
   %.i2516 = fadd reassoc nnan nsz arcp contract afn float %.i2515, %.i2494
   %.i2522 = fadd reassoc nnan nsz arcp contract afn float %.i2521, %.i2516
diff --git a/llvm/test/CodeGen/AMDGPU/else.ll b/llvm/test/CodeGen/AMDGPU/else.ll
index aa9bd0fa4d618..4a3018e67b17d 100644
--- a/llvm/test/CodeGen/AMDGPU/else.ll
+++ b/llvm/test/CodeGen/AMDGPU/else.ll
@@ -41,7 +41,7 @@ if:
 
 else:
   %c = fmul float %v, 3.0
-  %tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float 
%c, <8 x i32> undef, <4 x i32> poison, i1 0, i32 0, i32 0)
+  %tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float 
%c, <8 x i32> poison, <4 x i32> poison, i1 0, i32 0, i32 0)
   %v.else = extractelement <4 x float> %tex, i32 0
   br label %end
 
diff --git a/llvm/test/CodeGen/AMDGPU/img-nouse-adjust.ll 
b/llvm/test/CodeGen/AMDGPU/img-nouse-adjust.ll
index 6d1adb9c1f87d..cd410ab8bc143 100644
--- a/llvm/test/CodeGen/AMDGPU/img-nouse-adjust.ll
+++ b/llvm/test/CodeGen/AMDGPU/img-nouse-adjust.ll
@@ -11,7 +11,7 @@
 ; GCN: IMAGE_LOAD_V4_V2
 define amdgpu_cs void @_amdgpu_cs_main(i32 %dummy) local_unnamed_addr #0 {
 .entry:
-  %unused.result = tail call <4 x float> 
@llvm.amdgcn.image.load.2d.v4f32.i32(i32 15, i32 undef, i32 undef, <8 x i32> 
undef, i32 0, i32 0) #3
+  %unused.result = tail call <4 x float> 
@llvm.amdgcn.image.load.2d.v4f32.i32(i32 15, i32 undef, i32 undef, <8 x i32> 
poison, i32 0, i32 0) #3
   call void asm sideeffect ";", "" () #0
   ret void
 }
diff --git a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll 
b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll
index 8720fda9646e2..95447736611f5 100644
--- a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll
+++ b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll
@@ -390,7 +390,7 @@ define <4 x float> @insertelement_to_sgpr() nounwind {
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %tmp = load <4 x i32>, ptr addrspace(4) undef
   %tmp1 = insertelement <4 x i32> %tmp, i32 0, i32 0
-  %tmp2 = call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f32(i32 1, 
float undef, float undef, <8 x i32> undef, <4 x i32> %tmp1, i1 0, i32 0, i32 0)
+  %tmp2 = call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f32(i32 1, 
float undef, float undef, <8 x i32> poison, <4 x i32> %tmp1, i1 0, i32 0, i32 0)
   ret <4 x float> %tmp2
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ps.live.ll 
b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ps.live.ll
index 107e0a5450a4c..14109391e141a 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ps.live.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ps.live.ll
@@ -28,7 +28,7 @@ define amdgpu_ps float @test2() #0 {
   %live = call i1 @llvm.amdgcn.ps.live()
   %live.32 = zext i1 %live to i32
   %live.32.bc = bitcast i32 %live.32 to float
-  %t = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float 
%live.32.bc, <8 x i32> undef, <4 x i32> poison, i1 0, i32 0, i32 0)
+  %t = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float 
%live.32.bc, <8 x i32> poison, <4 x i32> poison, i1 0, i32 0, i32 0)
   %r = extractelement <4 x float> %t, i32 0
   ret float %r
 }
@@ -51,7 +51,7 @@ dead:
 end:
   %tc = phi i32 [ %in, %entry ], [ %tc.dead, %dead ]
   %tc.bc = bitcast i32 %tc to float
-  %t = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float 
%tc.bc, <8 x i32> undef, <4 x i32> poison, i1 0, i32 0, i32 0) #0
+  %t = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float 
%tc.bc, <8 x i32> poison, <4 x i32> poison, i1 0, i32 0, i32 0) #0
   %r = extractelement <4 x float> %t, i32 0
   ret float %r
 }
diff --git a/llvm/test/CodeGen/AMDGPU/mixed-wave32-wave64.ll 
b/llvm/test/CodeGen/AMDGPU/mixed-wave32-wave64.ll
index a59d1d2b6ae97..c55bdf2c79ef6 100644
--- a/llvm/test/CodeGen/AMDGPU/mixed-wave32-wave64.ll
+++ b/llvm/test/CodeGen/AMDGPU/mixed-wave32-wave64.ll
@@ -13,7 +13,7 @@ define amdgpu_hs void @_amdgpu_hs_main() #0 {
 define amdgpu_ps void @_amdgpu_ps_main(i32 %arg) local_unnamed_addr #1 {
 .entry:
   %tmp = tail call float @llvm.amdgcn.interp.p2(float undef, float undef, i32 
1, i32 0, i32 %arg) #2
-  %tmp1 = tail call float @llvm.amdgcn.image.sample.2d.f32.f32(i32 1, float 
undef, float %tmp, <8 x i32> undef, <4 x i32> poison, i1 false, i32 0, i32 0)
+  %tmp1 = tail call float @llvm.amdgcn.image.sample.2d.f32.f32(i32 1, float 
undef, float %tmp, <8 x i32> poison, <4 x i32> poison, i1 false, i32 0, i32 0)
   %tmp2 = fcmp olt float %tmp1, 5.000000e-01
   br i1 %tmp2, label %bb, label %l
 
diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-copy.ll 
b/llvm/test/CodeGen/AMDGPU/sgpr-copy.ll
index f99f85a718253..8c10353b11259 100644
--- a/llvm/test/CodeGen/AMDGPU/sgpr-copy.ll
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-copy.ll
@@ -383,7 +383,7 @@ bb:
   %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
   %tmp7 = getelementptr [17 x <4 x i32>], ptr addrspace(4) %arg, i32 0, i32 
%tid
   %tmp8 = load <4 x i32>, ptr addrspace(4) %tmp7, align 16, !tbaa !0
-  %tmp = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float 
7.500000e-01, float 2.500000e-01, <8 x i32> undef, <4 x i32> %tmp8, i1 0, i32 
0, i32 0)
+  %tmp = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float 
7.500000e-01, float 2.500000e-01, <8 x i32> poison, <4 x i32> %tmp8, i1 0, i32 
0, i32 0)
   %tmp10 = extractelement <4 x float> %tmp, i32 0
   %tmp12 = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %tmp10, float undef)
   call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> %tmp12, <2 
x half> undef, i1 true, i1 true) #0
diff --git a/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll 
b/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll
index d624d6d7d9190..434f266e91d25 100644
--- a/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll
+++ b/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll
@@ -1532,7 +1532,7 @@ bb3:                                              ; preds 
= %bb
   br label %bb4
 
 bb4:                                              ; preds = %bb3, %bb
-  %tmp5 = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32 16, 
float %arg2, float %arg3, <8 x i32> undef, <4 x i32> poison, i1 0, i32 0, i32 0)
+  %tmp5 = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32 16, 
float %arg2, float %arg3, <8 x i32> poison, <4 x i32> poison, i1 0, i32 0, i32 
0)
   %tmp6 = extractelement <4 x float> %tmp5, i32 0
   %tmp7 = fcmp une float %tmp6, 0.000000e+00
   br i1 %tmp7, label %bb8, label %bb9
@@ -1677,7 +1677,7 @@ define amdgpu_ps void @cbranch_kill(i32 inreg %0, float 
%val0, float %val1) {
 ; GFX11-NEXT:    exp mrt0 off, off, off, off done
 ; GFX11-NEXT:    s_endpgm
 .entry:
-  %sample = call float @llvm.amdgcn.image.sample.l.2darray.f32.f32(i32 1, 
float %val1, float %val1, float %val1, float 0.000000e+00, <8 x i32> undef, <4 
x i32> poison, i1 false, i32 0, i32 0)
+  %sample = call float @llvm.amdgcn.image.sample.l.2darray.f32.f32(i32 1, 
float %val1, float %val1, float %val1, float 0.000000e+00, <8 x i32> poison, <4 
x i32> poison, i1 false, i32 0, i32 0)
   %cond0 = fcmp ugt float %sample, 0.000000e+00
   br i1 %cond0, label %live, label %kill
 
diff --git a/llvm/test/CodeGen/AMDGPU/subreg-coalescer-crash.ll 
b/llvm/test/CodeGen/AMDGPU/subreg-coalescer-crash.ll
index 30af34c2ce807..07cbbdaabcb6c 100644
--- a/llvm/test/CodeGen/AMDGPU/subreg-coalescer-crash.ll
+++ b/llvm/test/CodeGen/AMDGPU/subreg-coalescer-crash.ll
@@ -65,7 +65,7 @@ bb7:                                              ; preds = 
%bb6
   br label %bb4
 
 bb9:                                              ; preds = %bb2
-  %tmp10 = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, 
float undef, <8 x i32> undef, <4 x i32> poison, i1 0, i32 0, i32 0)
+  %tmp10 = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, 
float undef, <8 x i32> poison, <4 x i32> poison, i1 0, i32 0, i32 0)
   %tmp11 = extractelement <4 x float> %tmp10, i32 1
   %tmp12 = extractelement <4 x float> %tmp10, i32 3
   br label %bb14
diff --git a/llvm/test/CodeGen/AMDGPU/undefined-subreg-liverange.ll 
b/llvm/test/CodeGen/AMDGPU/undefined-subreg-liverange.ll
index 33f59142f1913..5496a95d2b737 100644
--- a/llvm/test/CodeGen/AMDGPU/undefined-subreg-liverange.ll
+++ b/llvm/test/CodeGen/AMDGPU/undefined-subreg-liverange.ll
@@ -71,7 +71,7 @@ bb:
   %tmp1 = load volatile i32, ptr addrspace(1) poison, align 4
   %tmp2 = insertelement <4 x i32> poison, i32 %tmp1, i32 0
   %tmp3 = bitcast i32 %tmp1 to float
-  %tmp4 = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, 
float %tmp3, float %tmp3, <8 x i32> undef, <4 x i32> poison, i1 0, i32 0, i32 0)
+  %tmp4 = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, 
float %tmp3, float %tmp3, <8 x i32> poison, <4 x i32> poison, i1 0, i32 0, i32 
0)
   %tmp5 = extractelement <4 x float> %tmp4, i32 0
   %tmp6 = fmul float %tmp5, undef
   %tmp7 = fadd float %tmp6, %tmp6
diff --git a/llvm/test/CodeGen/AMDGPU/unigine-liveness-crash.ll 
b/llvm/test/CodeGen/AMDGPU/unigine-liveness-crash.ll
index e4bff8549aa57..f5e9f706a849b 100644
--- a/llvm/test/CodeGen/AMDGPU/unigine-liveness-crash.ll
+++ b/llvm/test/CodeGen/AMDGPU/unigine-liveness-crash.ll
@@ -17,20 +17,20 @@ main_body:
   %j.f.i = bitcast i32 %j.i to float
   %p1.i = call float @llvm.amdgcn.interp.p1(float %i.f.i, i32 3, i32 4, i32 
%arg6) #2
   %p2.i = call float @llvm.amdgcn.interp.p2(float %p1.i, float %j.f.i, i32 3, 
i32 4, i32 %arg6) #2
-  %tmp23 = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, 
float undef, float undef, <8 x i32> undef, <4 x i32> poison, i1 0, i32 0, i32 0)
+  %tmp23 = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, 
float undef, float undef, <8 x i32> poison, <4 x i32> poison, i1 0, i32 0, i32 
0)
 
   %tmp24 = extractelement <4 x float> %tmp23, i32 3
   %tmp25 = fmul float %tmp24, %tmp24
   %tmp26 = fmul float %p2.i, %p2.i
   %tmp27 = fadd float %tmp26, %tmp26
-  %tmp32 = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32 15, 
float %tmp27, float 0.0, float undef, <8 x i32> undef, <4 x i32> poison, i1 0, 
i32 0, i32 0)
+  %tmp32 = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32 15, 
float %tmp27, float 0.0, float undef, <8 x i32> poison, <4 x i32> poison, i1 0, 
i32 0, i32 0)
   %tmp33 = extractelement <4 x float> %tmp32, i32 0
   %tmp34 = fadd float %tmp33, %tmp33
   %tmp35 = fadd float %tmp34, %tmp34
   %tmp36 = fadd float %tmp35, %tmp35
   %tmp37 = fadd float %tmp36, %tmp36
   %tmp38 = fadd float %tmp37, %tmp37
-  %tmp39 = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, 
float undef, float undef, <8 x i32> undef, <4 x i32> poison, i1 0, i32 0, i32 0)
+  %tmp39 = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, 
float undef, float undef, <8 x i32> poison, <4 x i32> poison, i1 0, i32 0, i32 
0)
   %tmp40 = extractelement <4 x float> %tmp39, i32 0
   %tmp41 = extractelement <4 x float> %tmp39, i32 1
   %tmp42 = extractelement <4 x float> %tmp39, i32 2
@@ -48,12 +48,12 @@ main_body:
   %tmp54 = insertelement <4 x i32> %tmp53, i32 %tmp51, i32 1
   %tmp55 = insertelement <4 x i32> %tmp54, i32 %tmp52, i32 2
   %tmp55.cast = bitcast <4 x i32> %tmp55 to <4 x float>
-  %tmp56 = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32 15, 
float %tmp27, float %tmp48, float %tmp49, <8 x i32> undef, <4 x i32> poison, i1 
0, i32 0, i32 0)
+  %tmp56 = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32 15, 
float %tmp27, float %tmp48, float %tmp49, <8 x i32> poison, <4 x i32> poison, 
i1 0, i32 0, i32 0)
   %tmp57 = extractelement <4 x float> %tmp56, i32 0
   %tmp58 = fadd float %tmp38, %tmp57
   %tmp59 = fadd float %tmp46, %tmp46
   %tmp60 = fadd float %tmp47, %tmp47
-  %tmp65 = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32 15, 
float undef, float %tmp59, float %tmp60, <8 x i32> undef, <4 x i32> poison, i1 
0, i32 0, i32 0)
+  %tmp65 = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32 15, 
float undef, float %tmp59, float %tmp60, <8 x i32> poison, <4 x i32> poison, i1 
0, i32 0, i32 0)
   %tmp66 = extractelement <4 x float> %tmp65, i32 0
   %tmp67 = fadd float %tmp58, %tmp66
   %tmp68 = fmul float %tmp67, 1.250000e-01
@@ -91,7 +91,7 @@ IF29:                                             ; preds = 
%LOOP
   br label %ENDIF25
 
 ENDIF28:                                          ; preds = %LOOP
-  %tmp87 = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32 15, 
float %tmp27, float undef, float undef, <8 x i32> undef, <4 x i32> poison, i1 
0, i32 0, i32 0)
+  %tmp87 = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32 15, 
float %tmp27, float undef, float undef, <8 x i32> poison, <4 x i32> poison, i1 
0, i32 0, i32 0)
   %tmp88 = extractelement <4 x float> %tmp87, i32 0
   %tmp89 = fadd float %tmp88, %tmp88
   br label %LOOP
diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-tuple-allocation.ll 
b/llvm/test/CodeGen/AMDGPU/vgpr-tuple-allocation.ll
index af14cb00c323f..ebf6bd1d7f7f4 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr-tuple-allocation.ll
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-tuple-allocation.ll
@@ -188,7 +188,7 @@ main_body:
   call void asm sideeffect "", 
"~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15}"() #0
   call void asm sideeffect "", 
"~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23}"() #0
   call void asm sideeffect "", 
"~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() #0
-  %v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.2d.v4f32.f32.f32(i32 
1, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> 
undef, <4 x i32> poison, i1 false, i32 0, i32 0)
+  %v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.2d.v4f32.f32.f32(i32 
1, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> 
poison, <4 x i32> poison, i1 false, i32 0, i32 0)
   call void @extern_func()
   ret <4 x float> %v
 }
@@ -353,10 +353,10 @@ define <4 x float> @call_preserved_vgpr_tuple8(<8 x i32> 
%rsrc, <4 x i32> %samp,
 
 
 main_body:
-  %v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.2d.v4f32.f32.f32(i32 
1, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> 
undef, <4 x i32> poison, i1 false, i32 0, i32 0)
+  %v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.2d.v4f32.f32.f32(i32 
1, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> 
poison, <4 x i32> poison, i1 false, i32 0, i32 0)
   store <4 x float> %v, ptr addrspace(1) poison
   call void @extern_func()
-  %v1 = call <4 x float> 
@llvm.amdgcn.image.gather4.c.b.cl.2d.v4f32.f32.f32(i32 1, float %bias, float 
%zcompare, float %s, float %t, float %clamp, <8 x i32> undef, <4 x i32> poison, 
i1 false, i32 0, i32 0)
+  %v1 = call <4 x float> 
@llvm.amdgcn.image.gather4.c.b.cl.2d.v4f32.f32.f32(i32 1, float %bias, float 
%zcompare, float %s, float %t, float %clamp, <8 x i32> poison, <4 x i32> 
poison, i1 false, i32 0, i32 0)
   ret <4 x float> %v1
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/wave32.ll 
b/llvm/test/CodeGen/AMDGPU/wave32.ll
index e695ad0e902f7..94c4eebe32281 100644
--- a/llvm/test/CodeGen/AMDGPU/wave32.ll
+++ b/llvm/test/CodeGen/AMDGPU/wave32.ll
@@ -1875,7 +1875,7 @@ loop:
 
 body:
   %c.iv0 = extractelement <4 x float> %c.iv, i32 0
-  %c.next = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, 
float %c.iv0, <8 x i32> undef, <4 x i32> poison, i1 0, i32 0, i32 0)
+  %c.next = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, 
float %c.iv0, <8 x i32> poison, <4 x i32> poison, i1 0, i32 0, i32 0)
   %ctr.next = fadd float %ctr.iv, 2.0
   br label %loop
 
diff --git a/llvm/test/CodeGen/AMDGPU/wqm.ll b/llvm/test/CodeGen/AMDGPU/wqm.ll
index a7ae7e1732967..3845db2edf73f 100644
--- a/llvm/test/CodeGen/AMDGPU/wqm.ll
+++ b/llvm/test/CodeGen/AMDGPU/wqm.ll
@@ -1994,7 +1994,7 @@ define amdgpu_ps <4 x float> @test_loop_vcc(<4 x float> 
%in) nounwind {
 ; GFX10-W32-NEXT:    v_mov_b32_e32 v3, v7
 ; GFX10-W32-NEXT:    ; return to shader part epilog
 entry:
-  call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %in, i32 15, i32 
undef, <8 x i32> undef, i32 0, i32 0)
+  call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %in, i32 15, i32 
undef, <8 x i32> poison, i32 0, i32 0)
   br label %loop
 
 loop:
@@ -2005,7 +2005,7 @@ loop:
 
 body:
   %c.iv0 = extractelement <4 x float> %c.iv, i32 0
-  %c.next = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, 
float %c.iv0, <8 x i32> undef, <4 x i32> poison, i1 false, i32 0, i32 0) #0
+  %c.next = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, 
float %c.iv0, <8 x i32> poison, <4 x i32> poison, i1 false, i32 0, i32 0) #0
   %ctr.next = fadd float %ctr.iv, 2.0
   br label %loop
 
@@ -2080,7 +2080,7 @@ entry:
   %c.gep = getelementptr [32 x i32], ptr addrspace(5) %array, i32 0, i32 %idx
   %c = load i32, ptr addrspace(5) %c.gep, align 4
   %c.bc = bitcast i32 %c to float
-  %t = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float 
%c.bc, <8 x i32> undef, <4 x i32> poison, i1 false, i32 0, i32 0) #0
+  %t = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float 
%c.bc, <8 x i32> poison, <4 x i32> poison, i1 false, i32 0, i32 0) #0
   call void @llvm.amdgcn.raw.ptr.buffer.store.v4f32(<4 x float> %t, ptr 
addrspace(8) undef, i32 0, i32 0, i32 0)
 
   ret void
@@ -2112,9 +2112,9 @@ define amdgpu_ps <4 x float> @test_nonvoid_return() 
nounwind {
 ; GFX10-W32-NEXT:    image_sample v[0:3], v0, s[0:7], s[0:3] dmask:0xf 
dim:SQ_RSRC_IMG_1D
 ; GFX10-W32-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-W32-NEXT:    ; return to shader part epilog
-  %tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float 
undef, <8 x i32> undef, <4 x i32> poison, i1 false, i32 0, i32 0) #0
+  %tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float 
undef, <8 x i32> poison, <4 x i32> poison, i1 false, i32 0, i32 0) #0
   %tex0 = extractelement <4 x float> %tex, i32 0
-  %dtex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, 
float %tex0, <8 x i32> undef, <4 x i32> poison, i1 false, i32 0, i32 0) #0
+  %dtex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, 
float %tex0, <8 x i32> poison, <4 x i32> poison, i1 false, i32 0, i32 0) #0
   ret <4 x float> %dtex
 }
 
@@ -2155,9 +2155,9 @@ define amdgpu_ps <4 x float> 
@test_nonvoid_return_unreachable(i32 inreg %c) noun
 ; GFX10-W32-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX10-W32-NEXT:  .LBB38_3:
 entry:
-  %tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float 
undef, <8 x i32> undef, <4 x i32> poison, i1 false, i32 0, i32 0) #0
+  %tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float 
undef, <8 x i32> poison, <4 x i32> poison, i1 false, i32 0, i32 0) #0
   %tex0 = extractelement <4 x float> %tex, i32 0
-  %dtex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, 
float %tex0, <8 x i32> undef, <4 x i32> poison, i1 false, i32 0, i32 0) #0
+  %dtex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, 
float %tex0, <8 x i32> poison, <4 x i32> poison, i1 false, i32 0, i32 0) #0
   %cc = icmp sgt i32 %c, 0
   br i1 %cc, label %if, label %else
 
@@ -2227,11 +2227,11 @@ main_body:
   br i1 %cc, label %if, label %else
 
 if:
-  %r.if = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, 
float 0.0, <8 x i32> undef, <4 x i32> poison, i1 false, i32 0, i32 0) #0
+  %r.if = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, 
float 0.0, <8 x i32> poison, <4 x i32> poison, i1 false, i32 0, i32 0) #0
   br label %end
 
 else:
-  %r.else = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, 
float 0.0, float bitcast (i32 1 to float), <8 x i32> undef, <4 x i32> poison, 
i1 false, i32 0, i32 0) #0
+  %r.else = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, 
float 0.0, float bitcast (i32 1 to float), <8 x i32> poison, <4 x i32> poison, 
i1 false, i32 0, i32 0) #0
   br label %end
 
 end:

_______________________________________________
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] AMDGPU: Replace <8 x i32> undef uses in tests with poison (PR #130903)

Reply via email to