Author: Simon Pilgrim Date: 2021-01-18T15:55:00Z New Revision: ce06475da94f1040d17d46d471dd48478576a76f
URL: https://github.com/llvm/llvm-project/commit/ce06475da94f1040d17d46d471dd48478576a76f DIFF: https://github.com/llvm/llvm-project/commit/ce06475da94f1040d17d46d471dd48478576a76f.diff LOG: [X86][AVX] IsElementEquivalent - add matchShuffleWithUNPCK + VBROADCAST/VBROADCAST_LOAD handling Specify LHS/RHS operands in matchShuffleWithUNPCK's calls to isTargetShuffleEquivalent, and handle VBROADCAST/VBROADCAST_LOAD matching in IsElementEquivalent Added: Modified: llvm/lib/Target/X86/X86ISelLowering.cpp llvm/test/CodeGen/X86/avg.ll llvm/test/CodeGen/X86/avx512-shuffles/partial_permute.ll Removed: ################################################################################ diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 78a5d4a6dfbf8..60a2fd233d5cb 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -10960,6 +10960,11 @@ static bool IsElementEquivalent(int MaskSize, SDValue Op, SDValue ExpectedOp, MaskSize == (int)ExpectedOp.getNumOperands()) return Op.getOperand(Idx) == ExpectedOp.getOperand(ExpectedIdx); break; + case X86ISD::VBROADCAST: + case X86ISD::VBROADCAST_LOAD: + // TODO: Handle MaskSize != Op.getValueType().getVectorNumElements()? + return (Op == ExpectedOp && + Op.getValueType().getVectorNumElements() == MaskSize); case X86ISD::HADD: case X86ISD::HSUB: case X86ISD::FHADD: @@ -11321,7 +11326,8 @@ static bool matchShuffleWithUNPCK(MVT VT, SDValue &V1, SDValue &V2, // Attempt to match the target mask against the unpack lo/hi mask patterns. SmallVector<int, 64> Unpckl, Unpckh; createUnpackShuffleMask(VT, Unpckl, /* Lo = */ true, IsUnary); - if (isTargetShuffleEquivalent(VT, TargetMask, Unpckl)) { + if (isTargetShuffleEquivalent(VT, TargetMask, Unpckl, V1, + (IsUnary ? V1 : V2))) { UnpackOpcode = X86ISD::UNPCKL; V2 = (Undef2 ? DAG.getUNDEF(VT) : (IsUnary ? V1 : V2)); V1 = (Undef1 ? DAG.getUNDEF(VT) : V1); @@ -11329,7 +11335,8 @@ static bool matchShuffleWithUNPCK(MVT VT, SDValue &V1, SDValue &V2, } createUnpackShuffleMask(VT, Unpckh, /* Lo = */ false, IsUnary); - if (isTargetShuffleEquivalent(VT, TargetMask, Unpckh)) { + if (isTargetShuffleEquivalent(VT, TargetMask, Unpckh, V1, + (IsUnary ? V1 : V2))) { UnpackOpcode = X86ISD::UNPCKH; V2 = (Undef2 ? DAG.getUNDEF(VT) : (IsUnary ? V1 : V2)); V1 = (Undef1 ? DAG.getUNDEF(VT) : V1); diff --git a/llvm/test/CodeGen/X86/avg.ll b/llvm/test/CodeGen/X86/avg.ll index e2139fd20d32c..23fa7e033db9e 100644 --- a/llvm/test/CodeGen/X86/avg.ll +++ b/llvm/test/CodeGen/X86/avg.ll @@ -2245,7 +2245,7 @@ define void @not_avg_v16i8_wide_constants(<16 x i8>* %a, <16 x i8>* %b) nounwind ; AVX2-NEXT: vpunpcklbw {{.*#+}} xmm9 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] ; AVX2-NEXT: vpbroadcastw %xmm8, %xmm8 ; AVX2-NEXT: vpbroadcastw %xmm9, %xmm0 -; AVX2-NEXT: vpblendw {{.*#+}} xmm8 = xmm0[0,1,2,3,4,5,6],xmm8[7] +; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm8 = xmm0[0],xmm8[0],xmm0[1],xmm8[1],xmm0[2],xmm8[2],xmm0[3],xmm8[3] ; AVX2-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm13[0],xmm12[0],xmm13[1],xmm12[1],xmm13[2],xmm12[2],xmm13[3],xmm12[3],xmm13[4],xmm12[4],xmm13[5],xmm12[5],xmm13[6],xmm12[6],xmm13[7],xmm12[7] ; AVX2-NEXT: vpunpcklbw {{.*#+}} xmm9 = xmm15[0],xmm14[0],xmm15[1],xmm14[1],xmm15[2],xmm14[2],xmm15[3],xmm14[3],xmm15[4],xmm14[4],xmm15[5],xmm14[5],xmm15[6],xmm14[6],xmm15[7],xmm14[7] ; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0 diff --git a/llvm/test/CodeGen/X86/avx512-shuffles/partial_permute.ll b/llvm/test/CodeGen/X86/avx512-shuffles/partial_permute.ll index 29ea4d3bf55d3..4c86242a1d302 100644 --- a/llvm/test/CodeGen/X86/avx512-shuffles/partial_permute.ll +++ b/llvm/test/CodeGen/X86/avx512-shuffles/partial_permute.ll @@ -4230,11 +4230,10 @@ define <4 x double> @test_masked_z_8xdouble_to_4xdouble_perm_mem_mask6(<8 x doub define <4 x double> @test_masked_8xdouble_to_4xdouble_perm_mem_mask7(<8 x double>* %vp, <4 x double> %vec2, <4 x double> %mask) { ; CHECK-LABEL: test_masked_8xdouble_to_4xdouble_perm_mem_mask7: ; CHECK: # %bb.0: -; CHECK-NEXT: vbroadcastsd 40(%rdi), %ymm2 -; CHECK-NEXT: vblendpd $5, (%rdi), %ymm2, %ymm2 # ymm2 = mem[0],ymm2[1],mem[2],ymm2[3] +; CHECK-NEXT: vmovapd (%rdi), %ymm2 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm1, %k1 -; CHECK-NEXT: vmovapd %ymm2, %ymm0 {%k1} +; CHECK-NEXT: vunpcklpd 40(%rdi){1to4}, %ymm2, %ymm0 {%k1} ; CHECK-NEXT: retq %vec = load <8 x double>, <8 x double>* %vp %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <4 x i32> <i32 0, i32 5, i32 2, i32 5> @@ -4246,11 +4245,10 @@ define <4 x double> @test_masked_8xdouble_to_4xdouble_perm_mem_mask7(<8 x double define <4 x double> @test_masked_z_8xdouble_to_4xdouble_perm_mem_mask7(<8 x double>* %vp, <4 x double> %mask) { ; CHECK-LABEL: test_masked_z_8xdouble_to_4xdouble_perm_mem_mask7: ; CHECK: # %bb.0: -; CHECK-NEXT: vbroadcastsd 40(%rdi), %ymm1 -; CHECK-NEXT: vblendpd $5, (%rdi), %ymm1, %ymm1 # ymm1 = mem[0],ymm1[1],mem[2],ymm1[3] +; CHECK-NEXT: vmovapd (%rdi), %ymm1 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm0, %k1 -; CHECK-NEXT: vmovapd %ymm1, %ymm0 {%k1} {z} +; CHECK-NEXT: vunpcklpd 40(%rdi){1to4}, %ymm1, %ymm0 {%k1} {z} ; CHECK-NEXT: retq %vec = load <8 x double>, <8 x double>* %vp %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <4 x i32> <i32 0, i32 5, i32 2, i32 5> _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits