llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) <details> <summary>Changes</summary> --- Patch is 161.48 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/130899.diff 63 Files Affected: - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/bug-legalization-artifact-combiner-dead-def.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll (+40-40) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot2.ll (+2-2) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.udot2.ll (+2-2) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/trunc.ll (+2-2) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w32-f16-f32-matrix-modifiers.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64-f16-f32-matrix-modifiers.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/adjust-writemask-invalid-copy.ll (+5-5) - (modified) llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-break-large-phis-heuristics.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/amdpal_scratch_mergedshader.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/buffer-rsrc-ptr-ops.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/bug-deadlanes.ll (+3-3) - (modified) llvm/test/CodeGen/AMDGPU/build-vector-packed-partial-undef.ll (+4-4) - (modified) llvm/test/CodeGen/AMDGPU/call-argument-types.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/clamp-modifier.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/clamp.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/coalesce-vgpr-alignment.ll (+4-4) - (modified) llvm/test/CodeGen/AMDGPU/combine_vloads.ll (+4-4) - (modified) llvm/test/CodeGen/AMDGPU/computeKnownBits-scalar-to-vector-crash.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/concat_vectors.ll (+4-4) - (modified) llvm/test/CodeGen/AMDGPU/dagcomb-shuffle-vecextend-non2.ll (+2-2) - (modified) llvm/test/CodeGen/AMDGPU/dagcombine-fma-fmad.ll (+7-7) - (modified) llvm/test/CodeGen/AMDGPU/dagcombiner-bug-illegal-vec4-int-to-fp.ll (+2-2) - (modified) llvm/test/CodeGen/AMDGPU/debug-value.ll (+3-3) - (modified) llvm/test/CodeGen/AMDGPU/debug-value2.ll (+8-8) - (modified) llvm/test/CodeGen/AMDGPU/early-if-convert.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/extract-subvector-16bit.ll (+10-10) - (modified) llvm/test/CodeGen/AMDGPU/extract-subvector-equal-length.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/extract-subvector.ll (+7-7) - (modified) llvm/test/CodeGen/AMDGPU/extract_subvector_vec4_vec3.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/fmac.sdwa.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/fneg-modifier-casting.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll (+2-2) - (modified) llvm/test/CodeGen/AMDGPU/greedy-reverse-local-assignment.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.d16.dim.ll (+3-3) - (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.d16.dim.ll (+2-2) - (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll (+8-8) - (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.store.format.d16.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.store.format.d16.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.tbuffer.store.d16.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.store.d16.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.store.format.d16.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.store.format.d16.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.tbuffer.store.d16.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.store.d16.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/load-local-redundant-copies.ll (+7-7) - (modified) llvm/test/CodeGen/AMDGPU/loop-live-out-copy-undef-subrange.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/mad-mix.ll (+3-3) - (modified) llvm/test/CodeGen/AMDGPU/mul_int24.ll (+2-2) - (modified) llvm/test/CodeGen/AMDGPU/packed-fp32.ll (+4-4) - (modified) llvm/test/CodeGen/AMDGPU/packed-op-sel.ll (+11-11) - (modified) llvm/test/CodeGen/AMDGPU/permute_i8.ll (+13-13) - (modified) llvm/test/CodeGen/AMDGPU/reduction.ll (+41-41) - (modified) llvm/test/CodeGen/AMDGPU/scalar_to_vector.ll (+5-5) - (modified) llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll (+3-3) - (modified) llvm/test/CodeGen/AMDGPU/split-arg-dbg-value.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/trunc-store-vec-i16-to-i8.ll (+8-8) - (modified) llvm/test/CodeGen/AMDGPU/vector_shuffle.packed.ll (+51-51) - (modified) llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll (+2-2) - (modified) llvm/test/CodeGen/AMDGPU/wmma-gfx12-w32-f16-f32-matrix-modifiers.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/wmma-gfx12-w64-f16-f32-matrix-modifiers.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/wmma_modifiers.ll (+2-2) ``````````diff diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/bug-legalization-artifact-combiner-dead-def.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/bug-legalization-artifact-combiner-dead-def.ll index 39423ea22cbb4..d317a3ef54162 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/bug-legalization-artifact-combiner-dead-def.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/bug-legalization-artifact-combiner-dead-def.ll @@ -12,7 +12,7 @@ define void @value_finder_bug(ptr addrspace(5) %store_ptr, ptr addrspace(4) %ptr ; GFX10-NEXT: s_setpc_b64 s[30:31] %vec = load <4 x float>, ptr addrspace(4) %ptr, align 4 %vec.3 = extractelement <4 x float> %vec, i32 3 - %shuffle = shufflevector <4 x float> %vec, <4 x float> undef, <2 x i32> <i32 2, i32 undef> + %shuffle = shufflevector <4 x float> %vec, <4 x float> poison, <2 x i32> <i32 2, i32 poison> %new_vec = insertelement <2 x float> %shuffle, float %vec.3, i32 1 store <2 x float> %new_vec, ptr addrspace(5) %store_ptr, align 8 ret void diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll index 298dfcf048fc4..ece08e40ae453 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll @@ -870,10 +870,10 @@ define void @dyn_insertelement_v8f64_const_s_v_v(double %val, i32 %idx) { ; GFX11-NEXT: s_setpc_b64 s[30:31] entry: %insert = insertelement <8 x double> <double 1.0, double 2.0, double 3.0, double 4.0, double 5.0, double 6.0, double 7.0, double 8.0>, double %val, i32 %idx - %vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1> - %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3> - %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5> - %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7> + %vec.0 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 0, i32 1> + %vec.1 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 2, i32 3> + %vec.2 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 4, i32 5> + %vec.3 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 6, i32 7> store volatile <2 x double> %vec.0, ptr addrspace(1) undef store volatile <2 x double> %vec.1, ptr addrspace(1) undef store volatile <2 x double> %vec.2, ptr addrspace(1) undef @@ -1081,10 +1081,10 @@ define amdgpu_ps void @dyn_insertelement_v8f64_s_s_v(<8 x double> inreg %vec, do ; GFX11-NEXT: s_endpgm entry: %insert = insertelement <8 x double> %vec, double %val, i32 %idx - %vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1> - %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3> - %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5> - %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7> + %vec.0 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 0, i32 1> + %vec.1 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 2, i32 3> + %vec.2 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 4, i32 5> + %vec.3 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 6, i32 7> store volatile <2 x double> %vec.0, ptr addrspace(1) undef store volatile <2 x double> %vec.1, ptr addrspace(1) undef store volatile <2 x double> %vec.2, ptr addrspace(1) undef @@ -1229,10 +1229,10 @@ define amdgpu_ps void @dyn_insertelement_v8f64_s_v_s(<8 x double> inreg %vec, do ; GFX11-NEXT: s_endpgm entry: %insert = insertelement <8 x double> %vec, double %val, i32 %idx - %vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1> - %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3> - %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5> - %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7> + %vec.0 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 0, i32 1> + %vec.1 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 2, i32 3> + %vec.2 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 4, i32 5> + %vec.3 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 6, i32 7> store volatile <2 x double> %vec.0, ptr addrspace(1) undef store volatile <2 x double> %vec.1, ptr addrspace(1) undef store volatile <2 x double> %vec.2, ptr addrspace(1) undef @@ -1289,10 +1289,10 @@ define amdgpu_ps void @dyn_insertelement_v8f64_v_s_s(<8 x double> %vec, double i ; GFX11-NEXT: s_endpgm entry: %insert = insertelement <8 x double> %vec, double %val, i32 %idx - %vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1> - %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3> - %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5> - %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7> + %vec.0 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 0, i32 1> + %vec.1 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 2, i32 3> + %vec.2 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 4, i32 5> + %vec.3 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 6, i32 7> store volatile <2 x double> %vec.0, ptr addrspace(1) undef store volatile <2 x double> %vec.1, ptr addrspace(1) undef store volatile <2 x double> %vec.2, ptr addrspace(1) undef @@ -1494,10 +1494,10 @@ define amdgpu_ps void @dyn_insertelement_v8f64_s_v_v(<8 x double> inreg %vec, do ; GFX11-NEXT: s_endpgm entry: %insert = insertelement <8 x double> %vec, double %val, i32 %idx - %vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1> - %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3> - %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5> - %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7> + %vec.0 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 0, i32 1> + %vec.1 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 2, i32 3> + %vec.2 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 4, i32 5> + %vec.3 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 6, i32 7> store volatile <2 x double> %vec.0, ptr addrspace(1) undef store volatile <2 x double> %vec.1, ptr addrspace(1) undef store volatile <2 x double> %vec.2, ptr addrspace(1) undef @@ -1617,10 +1617,10 @@ define amdgpu_ps void @dyn_insertelement_v8f64_v_s_v(<8 x double> %vec, double i ; GFX11-NEXT: s_endpgm entry: %insert = insertelement <8 x double> %vec, double %val, i32 %idx - %vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1> - %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3> - %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5> - %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7> + %vec.0 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 0, i32 1> + %vec.1 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 2, i32 3> + %vec.2 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 4, i32 5> + %vec.3 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 6, i32 7> store volatile <2 x double> %vec.0, ptr addrspace(1) undef store volatile <2 x double> %vec.1, ptr addrspace(1) undef store volatile <2 x double> %vec.2, ptr addrspace(1) undef @@ -1677,10 +1677,10 @@ define amdgpu_ps void @dyn_insertelement_v8f64_v_v_s(<8 x double> %vec, double % ; GFX11-NEXT: s_endpgm entry: %insert = insertelement <8 x double> %vec, double %val, i32 %idx - %vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1> - %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3> - %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5> - %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7> + %vec.0 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 0, i32 1> + %vec.1 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 2, i32 3> + %vec.2 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 4, i32 5> + %vec.3 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 6, i32 7> store volatile <2 x double> %vec.0, ptr addrspace(1) undef store volatile <2 x double> %vec.1, ptr addrspace(1) undef store volatile <2 x double> %vec.2, ptr addrspace(1) undef @@ -1794,10 +1794,10 @@ define amdgpu_ps void @dyn_insertelement_v8f64_v_v_v(<8 x double> %vec, double % ; GFX11-NEXT: s_endpgm entry: %insert = insertelement <8 x double> %vec, double %val, i32 %idx - %vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1> - %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3> - %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5> - %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7> + %vec.0 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 0, i32 1> + %vec.1 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 2, i32 3> + %vec.2 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 4, i32 5> + %vec.3 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 6, i32 7> store volatile <2 x double> %vec.0, ptr addrspace(1) undef store volatile <2 x double> %vec.1, ptr addrspace(1) undef store volatile <2 x double> %vec.2, ptr addrspace(1) undef @@ -2401,10 +2401,10 @@ define amdgpu_ps void @dyn_insertelement_v8f64_s_s_s_add_1(<8 x double> inreg %v entry: %idx.add = add i32 %idx, 1 %insert = insertelement <8 x double> %vec, double %val, i32 %idx.add - %vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1> - %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3> - %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5> - %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7> + %vec.0 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 0, i32 1> + %vec.1 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 2, i32 3> + %vec.2 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 4, i32 5> + %vec.3 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 6, i32 7> store volatile <2 x double> %vec.0, ptr addrspace(1) undef store volatile <2 x double> %vec.1, ptr addrspace(1) undef store volatile <2 x double> %vec.2, ptr addrspace(1) undef @@ -2525,10 +2525,10 @@ define amdgpu_ps void @dyn_insertelement_v8f64_v_v_v_add_1(<8 x double> %vec, do entry: %idx.add = add i32 %idx, 1 %insert = insertelement <8 x double> %vec, double %val, i32 %idx.add - %vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1> - %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3> - %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5> - %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7> + %vec.0 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 0, i32 1> + %vec.1 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 2, i32 3> + %vec.2 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 4, i32 5> + %vec.3 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 6, i32 7> store volatile <2 x double> %vec.0, ptr addrspace(1) undef store volatile <2 x double> %vec.1, ptr addrspace(1) undef store volatile <2 x double> %vec.2, ptr addrspace(1) undef diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll index 78f33a174980d..a7b063427bc8d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll @@ -1519,7 +1519,7 @@ define amdgpu_kernel void @test_call_external_void_func_v3i64() #0 { ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 %load = load <2 x i64>, ptr addrspace(1) null - %val = shufflevector <2 x i64> %load, <2 x i64> <i64 8589934593, i64 undef>, <3 x i32> <i32 0, i32 1, i32 2> + %val = shufflevector <2 x i64> %load, <2 x i64> <i64 8589934593, i64 poison>, <3 x i32> <i32 0, i32 1, i32 2> call void @external_void_func_v3i64(<3 x i64> %val) ret void diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot2.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot2.ll index e2dab03e410aa..744a5b7feb48d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot2.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot2.ll @@ -322,7 +322,7 @@ define i32 @v_sdot2_shuffle10_a(<2 x i16> %a, <2 x i16> %b, i32 %c) { ; GFX10-NEXT: v_alignbit_b32 v0, v0, v0, 16 ; GFX10-NEXT: v_dot2_i32_i16 v0, v0, v1, v2 ; GFX10-NEXT: s_setpc_b64 s[30:31] - %shuf.a = shufflevector <2 x i16> %a, <2 x i16> undef, <2 x i32> <i32 1, i32 0> + %shuf.a = shufflevector <2 x i16> %a, <2 x i16> poison, <2 x i32> <i32 1, i32 0> %r = call i32 @llvm.amdgcn.sdot2(<2 x i16> %shuf.a, <2 x i16> %b, i32 %c, i1 false) ret i32 %r } @@ -349,7 +349,7 @@ define i32 @v_sdot2_shuffle10_b(<2 x i16> %a, <2 x i16> %b, i32 %c) { ; GFX10-NEXT: v_alignbit_b32 v1, v1, v1, 16 ; GFX10-NEXT: v_dot2_i32_i16 v0, v0, v1, v2 ; GFX10-NEXT: s_setpc_b64 s[30:31] - %shuf.b = shufflevector <2 x i16> %b, <2 x i16> undef, <2 x i32> <i32 1, i32 0> + %shuf.b = shufflevector <2 x i16> %b, <2 x i16> poison, <2 x i32> <i32 1, i32 0> %r = call i32 @llvm.amdgcn.sdot2(<2 x i16> %a, <2 x i16> %shuf.b, i32 %c, i1 false) ret i32 %r } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.udot2.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.udot2.ll index 3acff52874dd9..9e623494a5a04 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.udot2.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.udot2.ll @@ -306,7 +306,7 @@ define i32 @v_udot2_shuffle10_a(<2 x i16> %a, <2 x i16> %b, i32 %c) { ; GFX10-NEXT: v_alignbit_b32 v0, v0, v0, 16 ; GFX10-NEXT: v_dot2_u32_u16 v0, v0, v1, v2 ; GFX10-NEXT: s_setpc_b64 s[30:31] - %shuf.a = shufflevector <2 x i16> %a, <2 x i16> undef, <2 x i32> <i32 1, i32 0> + %shuf.a = shufflevector <2 x i16> %a, <2 x i16> poison, <2 x i32> <i32 1, i32 0> %r = call i32 @llvm.amdgcn.udot2(<2 x i16> %shuf.a, <2 x i16> %b, i32 %c, i1 false) ret i32 %r } @@ -332,7 +332,7 @@ define i32 @v_udot2_shuffle10_b(<2 x i16> %a, <2 x i16> %b, i32 %c) { ; GFX10-NEXT: v_alignbit_b32 v1, v1, v1, 16 ; GFX10-NEXT: v_dot2_u32_u16 v0, v0, v1, v2 ; GFX10-NEXT: s_setpc_b64 s[30:31] - %shuf.b = shufflevector <2 x i16> %b, <2 x i16> undef, <2 x i32> <i32 1, i32 0> + %shuf.b = shufflevector <2 x i16> %b, <2 x i16> poison, <2 x i32> <i32 1, i32 0> %r = call i32 @llvm.amdgcn.udot2(<2 x i16> %a, <2 x i16> %shuf.b, i32 %c, i1 false) ret i32 %r } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/trunc.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/trunc.ll index 0cb346c35552c..569ed35b150d8 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/trunc.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/trunc.ll @@ -121,7 +121,7 @@ define amdgpu_ps i32 @s_trunc_v2i32_to_v2i16(<2 x i32> inreg %src) { ; ; FIXME: G_INSERT mishandled ; define <2 x i32> @v_trunc_v3i32_to_v3i16(<3 x i32> %src) { ; %trunc = trunc <3 x i32> %src to <3 x i16> -; %ext = shufflevector <3 x i16> %trunc, <3 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> +; %ext = shufflevector <3 x i16> %trunc, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3> ; %cast = bitcast <4 x i16> %ext to <2 x i32> ; ret <2 x i32> %cast ; } @@ -129,7 +129,7 @@ define amdgpu_ps i32 @s_trunc_v2i32_to_v2i16(<2 x i32> inreg %src) { ; ; FIXME: G_INSERT mishandled ; define amdgpu_ps <2 x i32> @s_trunc_v3i32_to_v3i16(<3 x i32> inreg %src) { ; %trunc = trunc <3 x i32> %src to <3 x i16> -; %ext = shufflevector <3 x i16> %trunc, <3 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> +; %ext = shufflevector <3 x i16> %trunc, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3> ; %cast = bitcast <4 x i16> %ext to <2 x i32> ; ret <2 x i32> %cast ; } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w32-f16-f32-matrix-modifiers.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w32-f16-f32-matrix-modifiers.ll index 9cf9839e69d5d..e12ac25867adb 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w32-f16-f32-matrix-modifiers.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w32-f16-f32-matrix-modifiers.ll @@ -426,7 +426,7 @@ define amdgpu_ps void @test_wmma_f16_16x16x16_f16_negC_pack(<8 x half> %A, <8 x ; GFX12-NEXT: s_endpgm bb: %C = load <16 x half>, ptr %Caddr - %C_shuffle = shufflevector <16 x half> %C, <16 x half> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> + %C_shuffle = shufflevector <16 x half> %C, <16 x half> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> %fneg.C_shuffle = fneg <8 x half> %C_shuffle %res = call <8 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.v8f16.v8f16(<8 x half> %A, <8 x half> %B, <8 x half> %fneg.C_shuffle , i1 0) store <8 x half> %res, ptr addrspace(1) %out diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64-f16-f32-matrix-modifiers.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64-f16-f32-matrix-modifiers.ll index be1761227f802..b871ac7976222 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64-f16-f32-matrix-modifiers.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64-f16-f32-matrix-modifiers.ll @@ -381,7 +381,7 @@ define amdgpu_ps void @test_wmma_f16_16x16x16_f16_negC_pack(<4 x half> %A, <4 x ; GFX12-NEXT: s_endpgm bb: %C = load <8 x half>, ptr %Caddr - %C_shuffle = shufflevector <8 x half> %C, <8 x half> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6> + %C_shuffle = shufflevector <8 x half> %C, <8 x half> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6> %fneg.C_shuffle = fneg <4 x half> %C_shuffle %res = call <4 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.v8f16.v8f16(<4 x half> %A, <4 x half> %B, <4 x half> %fneg.C_shuffle , i1 0) store <4 x half> %res, ptr addrspace(1) %out diff --git a/llvm/test/CodeGen/AMDGPU/adjust-writemask-invalid-copy.ll b/llvm/test/CodeGen/AMDGPU/adjust-writemask-invalid-copy.ll index ec4e2dda10d3a..d0c7a803ed6b5 100644 --- a/llvm/test/CodeGen/AMDGPU/adjust-writemask-invalid-copy.ll +++ b/llvm/test/CodeGen/AMDGPU/adjust-writemask-invalid-copy.ll @@ -9,7 +9,7 @@ define amdgpu_ps void @adjust_writemask_crash_0_nochain() #0 { main_body: %tmp = call <2 x float> @llvm.amdgcn.image.getlod.1d.v2f32.f32(i32 3, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0) %tmp1 = bitcast <2 x float> %tmp to <2 x i32> - %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> + %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison> %tmp3 = bitcast <4 x i32> %tmp2 to <4 x float> %tmp4 = extractelement <4 x float> %tmp3, i32 0 store volatile float %tmp4, ptr add... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/130899 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits