Author: Simon Pilgrim Date: 2021-01-22T11:31:38Z New Revision: ffe72f987f4866c46c18174cdb750dea88bedba3
URL: https://github.com/llvm/llvm-project/commit/ffe72f987f4866c46c18174cdb750dea88bedba3 DIFF: https://github.com/llvm/llvm-project/commit/ffe72f987f4866c46c18174cdb750dea88bedba3.diff LOG: [X86][SSE] Don't fold shuffle(binop(),binop()) -> binop(shuffle(),shuffle()) if the shuffle are splats rGbe69e66b1cd8 added the fold, but DAGCombiner.visitVECTOR_SHUFFLE doesn't merge shuffles if the inner shuffle is a splat, so we need to bail. The non-fast-horiz-ops paths see some minor regressions, we might be able to improve on this after lowering to target shuffles. Fix PR48823 Added: Modified: llvm/lib/Target/X86/X86ISelLowering.cpp llvm/test/CodeGen/X86/haddsub-3.ll Removed: ################################################################################ diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index c5cc23f6236e..895a02e5c98e 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -37964,23 +37964,24 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG, return HAddSub; // Merge shuffles through binops if its likely we'll be able to merge it - // with other shuffles. + // with other shuffles (as long as they aren't splats). // shuffle(bop(shuffle(x,y),shuffle(z,w)),bop(shuffle(a,b),shuffle(c,d))) // TODO: We might be able to move this to DAGCombiner::visitVECTOR_SHUFFLE. if (auto *SVN = dyn_cast<ShuffleVectorSDNode>(N)) { unsigned SrcOpcode = N->getOperand(0).getOpcode(); if (SrcOpcode == N->getOperand(1).getOpcode() && TLI.isBinOp(SrcOpcode) && N->isOnlyUserOf(N->getOperand(0).getNode()) && - N->isOnlyUserOf(N->getOperand(1).getNode()) && - VT.getScalarSizeInBits() >= 32) { + N->isOnlyUserOf(N->getOperand(1).getNode())) { SDValue Op00 = N->getOperand(0).getOperand(0); SDValue Op10 = N->getOperand(1).getOperand(0); SDValue Op01 = N->getOperand(0).getOperand(1); SDValue Op11 = N->getOperand(1).getOperand(1); - if ((Op00.getOpcode() == ISD::VECTOR_SHUFFLE || - Op10.getOpcode() == ISD::VECTOR_SHUFFLE) && - (Op01.getOpcode() == ISD::VECTOR_SHUFFLE || - Op11.getOpcode() == ISD::VECTOR_SHUFFLE)) { + auto *SVN00 = dyn_cast<ShuffleVectorSDNode>(Op00); + auto *SVN10 = dyn_cast<ShuffleVectorSDNode>(Op10); + auto *SVN01 = dyn_cast<ShuffleVectorSDNode>(Op01); + auto *SVN11 = dyn_cast<ShuffleVectorSDNode>(Op11); + if (((SVN00 && !SVN00->isSplat()) || (SVN10 && !SVN10->isSplat())) && + ((SVN01 && !SVN01->isSplat()) || (SVN11 && !SVN11->isSplat()))) { SDLoc DL(N); ArrayRef<int> Mask = SVN->getMask(); SDValue LHS = DAG.getVectorShuffle(VT, DL, Op00, Op10, Mask); diff --git a/llvm/test/CodeGen/X86/haddsub-3.ll b/llvm/test/CodeGen/X86/haddsub-3.ll index 651ab4ef3935..48d4fe556555 100644 --- a/llvm/test/CodeGen/X86/haddsub-3.ll +++ b/llvm/test/CodeGen/X86/haddsub-3.ll @@ -161,46 +161,49 @@ define <4 x float> @PR48823(<4 x float> %0, <4 x float> %1) { ; SSE2-LABEL: PR48823: ; SSE2: # %bb.0: ; SSE2-NEXT: movaps %xmm0, %xmm2 -; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm1[2,3] -; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,2] +; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm0[1,1] ; SSE2-NEXT: subps %xmm2, %xmm0 +; SSE2-NEXT: movaps %xmm1, %xmm2 +; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[2,2],xmm1[2,2] +; SSE2-NEXT: subps %xmm1, %xmm2 +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3] ; SSE2-NEXT: retq ; ; SSSE3-SLOW-LABEL: PR48823: ; SSSE3-SLOW: # %bb.0: -; SSSE3-SLOW-NEXT: movaps %xmm0, %xmm2 -; SSSE3-SLOW-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm1[2,3] -; SSSE3-SLOW-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,2] +; SSSE3-SLOW-NEXT: movshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] ; SSSE3-SLOW-NEXT: subps %xmm2, %xmm0 +; SSSE3-SLOW-NEXT: movsldup {{.*#+}} xmm2 = xmm1[0,0,2,2] +; SSSE3-SLOW-NEXT: subps %xmm1, %xmm2 +; SSSE3-SLOW-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3] ; SSSE3-SLOW-NEXT: retq ; ; SSSE3-FAST-LABEL: PR48823: ; SSSE3-FAST: # %bb.0: -; SSSE3-FAST-NEXT: movaps %xmm0, %xmm2 -; SSSE3-FAST-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm1[2,3] -; SSSE3-FAST-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,2] -; SSSE3-FAST-NEXT: subps %xmm2, %xmm0 +; SSSE3-FAST-NEXT: hsubps %xmm1, %xmm0 ; SSSE3-FAST-NEXT: retq ; ; AVX1-SLOW-LABEL: PR48823: ; AVX1-SLOW: # %bb.0: -; AVX1-SLOW-NEXT: vshufps {{.*#+}} xmm2 = xmm0[1,1],xmm1[2,3] -; AVX1-SLOW-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,2] +; AVX1-SLOW-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] ; AVX1-SLOW-NEXT: vsubps %xmm2, %xmm0, %xmm0 +; AVX1-SLOW-NEXT: vmovsldup {{.*#+}} xmm2 = xmm1[0,0,2,2] +; AVX1-SLOW-NEXT: vsubps %xmm1, %xmm2, %xmm1 +; AVX1-SLOW-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] ; AVX1-SLOW-NEXT: retq ; ; AVX1-FAST-LABEL: PR48823: ; AVX1-FAST: # %bb.0: -; AVX1-FAST-NEXT: vshufps {{.*#+}} xmm2 = xmm0[1,1],xmm1[2,3] -; AVX1-FAST-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,2] -; AVX1-FAST-NEXT: vsubps %xmm2, %xmm0, %xmm0 +; AVX1-FAST-NEXT: vhsubps %xmm1, %xmm0, %xmm0 ; AVX1-FAST-NEXT: retq ; ; AVX2-LABEL: PR48823: ; AVX2: # %bb.0: -; AVX2-NEXT: vshufps {{.*#+}} xmm2 = xmm0[1,1],xmm1[2,3] -; AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,2] +; AVX2-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] ; AVX2-NEXT: vsubps %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vmovsldup {{.*#+}} xmm2 = xmm1[0,0,2,2] +; AVX2-NEXT: vsubps %xmm1, %xmm2, %xmm1 +; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] ; AVX2-NEXT: retq %3 = shufflevector <4 x float> %0, <4 x float> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> %4 = fsub <4 x float> %0, %3 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits