https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/130898
I had to manually intervene in a few tests. fcanonicalize.f16.ll is directly sensitive to undef vs. poison. >From 3a308651bb8da4521e8769651f860325a05eb4cf Mon Sep 17 00:00:00 2001 From: Matt Arsenault <matthew.arsena...@amd.com> Date: Wed, 12 Mar 2025 12:32:31 +0700 Subject: [PATCH] AMDGPU: Replace insertelement undef with poison in cases with manual updates I had to manually intervene in a few tests. fcanonicalize.f16.ll is directly sensitive to undef vs. poison. --- llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll | 16 +++++++------- .../AMDGPU/promote-alloca-array-aggregate.ll | 6 ++--- .../AMDGPU/promote-alloca-loadstores.ll | 22 +++++++++---------- 3 files changed, 22 insertions(+), 22 deletions(-) diff --git a/llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll b/llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll index e72f3d3ce993a..d48b75a666db7 100644 --- a/llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll @@ -238,7 +238,7 @@ define <2 x half> @v_test_canonicalize_build_vector_v2f16(half %lo, half %hi) #1 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-FAKE16-NEXT: v_pk_max_f16 v0, v0, v0 ; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] - %ins0 = insertelement <2 x half> undef, half %lo, i32 0 + %ins0 = insertelement <2 x half> poison, half %lo, i32 0 %ins1 = insertelement <2 x half> %ins0, half %hi, i32 1 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %ins1) ret <2 x half> %canonicalized @@ -2581,7 +2581,7 @@ define <2 x half> @v_test_canonicalize_reg_undef_v2f16(half %val) #1 { ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-FAKE16-NEXT: v_pack_b32_f16 v0, v0, 0 ; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] - %vec = insertelement <2 x half> undef, half %val, i32 0 + %vec = insertelement <2 x half> poison, half %val, i32 0 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %vec) ret <2 x half> %canonicalized } @@ -2622,7 +2622,7 @@ define <2 x half> @v_test_canonicalize_undef_reg_v2f16(half %val) #1 { ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] - %vec = insertelement <2 x half> undef, half %val, i32 1 + %vec = insertelement <2 x half> poison, half %val, i32 1 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %vec) ret <2 x half> %canonicalized } @@ -2785,7 +2785,7 @@ define <2 x half> @v_test_canonicalize_reg_k_v2f16(half %val) #1 { ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-FAKE16-NEXT: v_pack_b32_f16 v0, v0, 2.0 ; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] - %vec0 = insertelement <2 x half> undef, half %val, i32 0 + %vec0 = insertelement <2 x half> poison, half %val, i32 0 %vec1 = insertelement <2 x half> %vec0, half 2.0, i32 1 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %vec1) ret <2 x half> %canonicalized @@ -2829,7 +2829,7 @@ define <2 x half> @v_test_canonicalize_k_reg_v2f16(half %val) #1 { ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-FAKE16-NEXT: v_pack_b32_f16 v0, 2.0, v0 ; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] - %vec0 = insertelement <2 x half> undef, half 2.0, i32 0 + %vec0 = insertelement <2 x half> poison, half 2.0, i32 0 %vec1 = insertelement <2 x half> %vec0, half %val, i32 1 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %vec1) ret <2 x half> %canonicalized @@ -2925,7 +2925,7 @@ define <4 x half> @v_test_canonicalize_reg_undef_undef_undef_v4f16(half %val) #1 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX11-FAKE16-NEXT: v_pack_b32_f16 v0, v0, 0 ; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] - %vec = insertelement <4 x half> undef, half %val, i32 0 + %vec = insertelement <4 x half> poison, half %val, i32 0 %canonicalized = call <4 x half> @llvm.canonicalize.v4f16(<4 x half> %vec) ret <4 x half> %canonicalized } @@ -2977,7 +2977,7 @@ define <4 x half> @v_test_canonicalize_reg_reg_undef_undef_v4f16(half %val0, hal ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX11-FAKE16-NEXT: v_pk_max_f16 v0, v0, v0 ; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] - %vec0 = insertelement <4 x half> undef, half %val0, i32 0 + %vec0 = insertelement <4 x half> poison, half %val0, i32 0 %vec1 = insertelement <4 x half> %vec0, half %val1, i32 1 %canonicalized = call <4 x half> @llvm.canonicalize.v4f16(<4 x half> %vec1) ret <4 x half> %canonicalized @@ -3035,7 +3035,7 @@ define <4 x half> @v_test_canonicalize_reg_undef_reg_reg_v4f16(half %val0, half ; GFX11-FAKE16-NEXT: v_pack_b32_f16 v0, v0, 0 ; GFX11-FAKE16-NEXT: v_pk_max_f16 v1, v1, v1 ; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] - %vec0 = insertelement <4 x half> undef, half %val0, i32 0 + %vec0 = insertelement <4 x half> poison, half %val0, i32 0 %vec1 = insertelement <4 x half> %vec0, half %val1, i32 2 %vec2 = insertelement <4 x half> %vec1, half %val2, i32 3 %canonicalized = call <4 x half> @llvm.canonicalize.v4f16(<4 x half> %vec2) diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-array-aggregate.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-array-aggregate.ll index 2c3cb1e6a5e6e..a4a8a985df0bf 100644 --- a/llvm/test/CodeGen/AMDGPU/promote-alloca-array-aggregate.ll +++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-array-aggregate.ll @@ -134,7 +134,7 @@ define amdgpu_vs void @promote_load_from_store_aggr_varoff(<4 x i32> %input) { ; CHECK-NEXT: [[FOO3_UNPACK2:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) @block4, i64 8), align 4 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <3 x i32> undef, i32 [[FOO3_UNPACK2]], i32 2 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <3 x i32> [[TMP1]], i32 [[FOO3_UNPACK2]] -; CHECK-NEXT: [[FOO12:%.*]] = insertelement <4 x i32> %input, i32 [[TMP2]], i64 3 +; CHECK-NEXT: [[FOO12:%.*]] = insertelement <4 x i32> [[INPUT:%.*]], i32 [[TMP2]], i64 3 ; CHECK-NEXT: store <4 x i32> [[FOO12]], ptr addrspace(1) @pv1, align 16 ; CHECK-NEXT: ret void ; @@ -344,7 +344,7 @@ define amdgpu_ps void @promote_double_aggr() #0 { ; CHECK-NEXT: [[FOO10:%.*]] = fadd double [[FOO5_FCA_1_EXTRACT]], [[FOO5_FCA_1_EXTRACT]] ; CHECK-NEXT: [[FOO16:%.*]] = fadd double [[FOO10]], [[FOO5_FCA_1_EXTRACT]] ; CHECK-NEXT: [[FOO17:%.*]] = fptrunc double [[FOO16]] to float -; CHECK-NEXT: [[FOO18:%.*]] = insertelement <4 x float> undef, float [[FOO17]], i32 0 +; CHECK-NEXT: [[FOO18:%.*]] = insertelement <4 x float> poison, float [[FOO17]], i32 0 ; CHECK-NEXT: [[FOO19:%.*]] = insertelement <4 x float> [[FOO18]], float [[FOO17]], i32 1 ; CHECK-NEXT: [[FOO20:%.*]] = insertelement <4 x float> [[FOO19]], float [[FOO17]], i32 2 ; CHECK-NEXT: [[FOO21:%.*]] = insertelement <4 x float> [[FOO20]], float [[FOO17]], i32 3 @@ -370,7 +370,7 @@ define amdgpu_ps void @promote_double_aggr() #0 { %foo15 = load double, ptr addrspace(5) %foo14 %foo16 = fadd double %foo13, %foo15 %foo17 = fptrunc double %foo16 to float - %foo18 = insertelement <4 x float> undef, float %foo17, i32 0 + %foo18 = insertelement <4 x float> poison, float %foo17, i32 0 %foo19 = insertelement <4 x float> %foo18, float %foo17, i32 1 %foo20 = insertelement <4 x float> %foo19, float %foo17, i32 2 %foo21 = insertelement <4 x float> %foo20, float %foo17, i32 3 diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-loadstores.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-loadstores.ll index 1e49500a243e1..119d3611e1007 100644 --- a/llvm/test/CodeGen/AMDGPU/promote-alloca-loadstores.ll +++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-loadstores.ll @@ -9,15 +9,15 @@ define amdgpu_kernel void @test_overwrite(i64 %val, i1 %cond) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 [[COND]], label [[LOOP:%.*]], label [[END:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[PROMOTEALLOCA:%.*]] = phi <3 x i64> [ [[TMP2:%.*]], [[LOOP]] ], [ <i64 43, i64 undef, i64 undef>, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[TMP0:%.*]] = extractelement <3 x i64> [[PROMOTEALLOCA]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <3 x i64> [[PROMOTEALLOCA]], i64 68, i32 0 +; CHECK-NEXT: [[PROMOTEALLOCA1:%.*]] = phi <3 x i64> [ [[TMP2:%.*]], [[LOOP]] ], [ <i64 43, i64 undef, i64 undef>, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[TMP0:%.*]] = extractelement <3 x i64> [[PROMOTEALLOCA1]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <3 x i64> [[PROMOTEALLOCA1]], i64 68, i32 0 ; CHECK-NEXT: [[TMP2]] = insertelement <3 x i64> [[TMP1]], i64 32, i32 0 ; CHECK-NEXT: [[LOOP_CC:%.*]] = icmp ne i64 [[TMP0]], 68 ; CHECK-NEXT: br i1 [[LOOP_CC]], label [[LOOP]], label [[END]] ; CHECK: end: -; CHECK-NEXT: [[PROMOTEALLOCA1:%.*]] = phi <3 x i64> [ [[TMP2]], [[LOOP]] ], [ <i64 43, i64 undef, i64 undef>, [[ENTRY]] ] -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <3 x i64> [[PROMOTEALLOCA1]], i32 0 +; CHECK-NEXT: [[PROMOTEALLOCA:%.*]] = phi <3 x i64> [ [[TMP2]], [[LOOP]] ], [ <i64 43, i64 undef, i64 undef>, [[ENTRY]] ] +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <3 x i64> [[PROMOTEALLOCA]], i32 0 ; CHECK-NEXT: ret void ; entry: @@ -64,15 +64,15 @@ define amdgpu_kernel void @test_no_overwrite(i64 %val, i1 %cond) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 [[COND]], label [[LOOP:%.*]], label [[END:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[PROMOTEALLOCA:%.*]] = phi <3 x i64> [ [[TMP1:%.*]], [[LOOP]] ], [ <i64 43, i64 undef, i64 undef>, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[TMP0:%.*]] = extractelement <3 x i64> [[PROMOTEALLOCA]], i32 0 -; CHECK-NEXT: [[TMP1]] = insertelement <3 x i64> [[PROMOTEALLOCA]], i64 32, i32 1 +; CHECK-NEXT: [[PROMOTEALLOCA1:%.*]] = phi <3 x i64> [ [[TMP1:%.*]], [[LOOP]] ], [ <i64 43, i64 undef, i64 undef>, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[TMP0:%.*]] = extractelement <3 x i64> [[PROMOTEALLOCA1]], i32 0 +; CHECK-NEXT: [[TMP1]] = insertelement <3 x i64> [[PROMOTEALLOCA1]], i64 32, i32 1 ; CHECK-NEXT: [[LOOP_CC:%.*]] = icmp ne i64 [[TMP0]], 32 ; CHECK-NEXT: br i1 [[LOOP_CC]], label [[LOOP]], label [[END]] ; CHECK: end: -; CHECK-NEXT: [[PROMOTEALLOCA1:%.*]] = phi <3 x i64> [ [[TMP1]], [[LOOP]] ], [ <i64 43, i64 undef, i64 undef>, [[ENTRY]] ] -; CHECK-NEXT: [[TMP2:%.*]] = extractelement <3 x i64> [[PROMOTEALLOCA1]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <3 x i64> [[PROMOTEALLOCA1]], i32 1 +; CHECK-NEXT: [[PROMOTEALLOCA:%.*]] = phi <3 x i64> [ [[TMP1]], [[LOOP]] ], [ <i64 43, i64 undef, i64 undef>, [[ENTRY]] ] +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <3 x i64> [[PROMOTEALLOCA]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <3 x i64> [[PROMOTEALLOCA]], i32 1 ; CHECK-NEXT: ret void ; entry: _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits