Author: Walter Lee Date: 2025-03-27T19:13:12-04:00 New Revision: af9ec23840d3355ab78f4d82e558732337d382cb
URL: https://github.com/llvm/llvm-project/commit/af9ec23840d3355ab78f4d82e558732337d382cb DIFF: https://github.com/llvm/llvm-project/commit/af9ec23840d3355ab78f4d82e558732337d382cb.diff LOG: Revert "[X86] combineINSERT_SUBVECTOR - fold insert_subvector(base,extract_su…" This reverts commit 491d3dfc761e3a03c6bd187533f4684d6864a8cb. Added: Modified: llvm/lib/Target/X86/X86ISelLowering.cpp llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast.ll llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast_from_memory.ll llvm/test/CodeGen/X86/vector-interleaved-store-i32-stride-5.ll Removed: ################################################################################ diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 81c75a2f98079..72977923bac2b 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -58823,8 +58823,6 @@ static SDValue combineINSERT_SUBVECTOR(SDNode *N, SelectionDAG &DAG, uint64_t IdxVal = N->getConstantOperandVal(2); MVT SubVecVT = SubVec.getSimpleValueType(); - int VecNumElts = OpVT.getVectorNumElements(); - int SubVecNumElts = SubVecVT.getVectorNumElements(); if (Vec.isUndef() && SubVec.isUndef()) return DAG.getUNDEF(OpVT); @@ -58884,9 +58882,10 @@ static SDValue combineINSERT_SUBVECTOR(SDNode *N, SelectionDAG &DAG, SubVec.getOperand(0).getSimpleValueType() == OpVT && (IdxVal != 0 || !(Vec.isUndef() || ISD::isBuildVectorAllZeros(Vec.getNode())))) { - SDValue ExtSrc = SubVec.getOperand(0); int ExtIdxVal = SubVec.getConstantOperandVal(1); if (ExtIdxVal != 0) { + int VecNumElts = OpVT.getVectorNumElements(); + int SubVecNumElts = SubVecVT.getVectorNumElements(); SmallVector<int, 64> Mask(VecNumElts); // First create an identity shuffle mask. for (int i = 0; i != VecNumElts; ++i) @@ -58894,24 +58893,8 @@ static SDValue combineINSERT_SUBVECTOR(SDNode *N, SelectionDAG &DAG, // Now insert the extracted portion. for (int i = 0; i != SubVecNumElts; ++i) Mask[i + IdxVal] = i + ExtIdxVal + VecNumElts; - return DAG.getVectorShuffle(OpVT, dl, Vec, ExtSrc, Mask); - } - // If we're broadcasting, see if we can use a blend instead of - // extract/insert pair. For subvector broadcasts, we must ensure that the - // subvector is aligned with the insertion/extractions. - if (ExtSrc.getOpcode() == X86ISD::VBROADCAST || - ExtSrc.getOpcode() == X86ISD::VBROADCAST_LOAD || - (ExtSrc.getOpcode() == X86ISD::SUBV_BROADCAST_LOAD && - (ExtIdxVal % SubVecNumElts) == 0 && (IdxVal % SubVecNumElts) == 0 && - cast<MemIntrinsicSDNode>(ExtSrc)->getMemoryVT() == SubVecVT)) { - SmallVector<int, 64> Mask(VecNumElts); - // First create an identity shuffle mask. - for (int i = 0; i != VecNumElts; ++i) - Mask[i] = i; - // Now blend the broadcast. - for (int i = 0; i != SubVecNumElts; ++i) - Mask[i + IdxVal] = i + IdxVal + VecNumElts; - return DAG.getVectorShuffle(OpVT, dl, Vec, ExtSrc, Mask); + + return DAG.getVectorShuffle(OpVT, dl, Vec, SubVec.getOperand(0), Mask); } } @@ -58959,7 +58942,7 @@ static SDValue combineINSERT_SUBVECTOR(SDNode *N, SelectionDAG &DAG, // If we're splatting the lower half subvector of a full vector load into the // upper half, attempt to create a subvector broadcast. // TODO: Drop hasOneUse checks. - if (IdxVal == (VecNumElts / 2) && + if (IdxVal == (OpVT.getVectorNumElements() / 2) && Vec.getValueSizeInBits() == (2 * SubVec.getValueSizeInBits()) && (Vec.hasOneUse() || SubVec.hasOneUse())) { auto *VecLd = dyn_cast<LoadSDNode>(Vec); diff --git a/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast.ll b/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast.ll index ce62d7a278b1a..2dfa515d9f05c 100644 --- a/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast.ll +++ b/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast.ll @@ -2837,7 +2837,8 @@ define void @vec384_i8_widen_to_i48_factor6_broadcast_to_v8i48_factor8(ptr %in.v ; AVX512F-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0] ; AVX512F-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[15,0,1,2,3,4,15,6,7,8,9,10,15,12,13,14] ; AVX512F-NEXT: vpbroadcastb %xmm0, %ymm0 -; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7] +; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 +; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm1 ; AVX512F-NEXT: vpaddb (%rdx), %ymm1, %ymm1 ; AVX512F-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0 ; AVX512F-NEXT: vmovdqa %ymm0, 32(%rcx) @@ -2854,7 +2855,8 @@ define void @vec384_i8_widen_to_i48_factor6_broadcast_to_v8i48_factor8(ptr %in.v ; AVX512DQ-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0] ; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[15,0,1,2,3,4,15,6,7,8,9,10,15,12,13,14] ; AVX512DQ-NEXT: vpbroadcastb %xmm0, %ymm0 -; AVX512DQ-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7] +; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 +; AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm1 ; AVX512DQ-NEXT: vpaddb (%rdx), %ymm1, %ymm1 ; AVX512DQ-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0 ; AVX512DQ-NEXT: vmovdqa %ymm0, 32(%rcx) @@ -2870,7 +2872,7 @@ define void @vec384_i8_widen_to_i48_factor6_broadcast_to_v8i48_factor8(ptr %in.v ; AVX512BW-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0] ; AVX512BW-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[15,0,1,2,3,4,15,6,7,8,9,10,15,12,13,14] ; AVX512BW-NEXT: vpbroadcastb %xmm0, %ymm0 -; AVX512BW-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7] +; AVX512BW-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm1 ; AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 ; AVX512BW-NEXT: vpaddb (%rdx), %zmm0, %zmm0 ; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rcx) @@ -3098,7 +3100,8 @@ define void @vec384_i8_widen_to_i96_factor12_broadcast_to_v4i96_factor4(ptr %in. ; AVX512F-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0] ; AVX512F-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[15,0,1,2,3,4,5,6,7,8,9,10,15,12,13,14] ; AVX512F-NEXT: vpbroadcastb %xmm0, %ymm0 -; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7] +; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 +; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm1 ; AVX512F-NEXT: vpaddb (%rdx), %ymm1, %ymm1 ; AVX512F-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0 ; AVX512F-NEXT: vmovdqa %ymm0, 32(%rcx) @@ -3115,7 +3118,8 @@ define void @vec384_i8_widen_to_i96_factor12_broadcast_to_v4i96_factor4(ptr %in. ; AVX512DQ-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0] ; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[15,0,1,2,3,4,5,6,7,8,9,10,15,12,13,14] ; AVX512DQ-NEXT: vpbroadcastb %xmm0, %ymm0 -; AVX512DQ-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7] +; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 +; AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm1 ; AVX512DQ-NEXT: vpaddb (%rdx), %ymm1, %ymm1 ; AVX512DQ-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0 ; AVX512DQ-NEXT: vmovdqa %ymm0, 32(%rcx) @@ -3131,7 +3135,7 @@ define void @vec384_i8_widen_to_i96_factor12_broadcast_to_v4i96_factor4(ptr %in. ; AVX512BW-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0] ; AVX512BW-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[15,0,1,2,3,4,5,6,7,8,9,10,15,12,13,14] ; AVX512BW-NEXT: vpbroadcastb %xmm0, %ymm0 -; AVX512BW-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7] +; AVX512BW-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm1 ; AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 ; AVX512BW-NEXT: vpaddb (%rdx), %zmm0, %zmm0 ; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rcx) @@ -3864,11 +3868,12 @@ define void @vec384_i16_widen_to_i96_factor6_broadcast_to_v4i96_factor4(ptr %in. ; AVX512F: # %bb.0: ; AVX512F-NEXT: vmovdqa (%rdi), %xmm0 ; AVX512F-NEXT: vmovdqa 48(%rdi), %xmm1 -; AVX512F-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1 ; AVX512F-NEXT: vpaddb (%rsi), %xmm0, %xmm0 ; AVX512F-NEXT: vpbroadcastw %xmm0, %ymm0 +; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 +; AVX512F-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1 ; AVX512F-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3,4,5],xmm0[6],xmm1[7] -; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7] +; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm1 ; AVX512F-NEXT: vpaddb (%rdx), %ymm1, %ymm1 ; AVX512F-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0 ; AVX512F-NEXT: vmovdqa %ymm0, 32(%rcx) @@ -3880,11 +3885,12 @@ define void @vec384_i16_widen_to_i96_factor6_broadcast_to_v4i96_factor4(ptr %in. ; AVX512DQ: # %bb.0: ; AVX512DQ-NEXT: vmovdqa (%rdi), %xmm0 ; AVX512DQ-NEXT: vmovdqa 48(%rdi), %xmm1 -; AVX512DQ-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1 ; AVX512DQ-NEXT: vpaddb (%rsi), %xmm0, %xmm0 ; AVX512DQ-NEXT: vpbroadcastw %xmm0, %ymm0 +; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 +; AVX512DQ-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1 ; AVX512DQ-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3,4,5],xmm0[6],xmm1[7] -; AVX512DQ-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7] +; AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm1 ; AVX512DQ-NEXT: vpaddb (%rdx), %ymm1, %ymm1 ; AVX512DQ-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0 ; AVX512DQ-NEXT: vmovdqa %ymm0, 32(%rcx) diff --git a/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast_from_memory.ll b/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast_from_memory.ll index 52f856befa130..305509ca7fc3f 100644 --- a/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast_from_memory.ll +++ b/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast_from_memory.ll @@ -2239,7 +2239,7 @@ define void @vec384_i8_widen_to_i48_factor6_broadcast_to_v8i48_factor8(ptr %in.e ; AVX512F-NEXT: vpalignr {{.*#+}} xmm0 = mem[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0] ; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15,0,1,2,3,4,15,6,7,8,9,10,15,12,13,14] ; AVX512F-NEXT: vpbroadcastb (%rdi), %ymm1 -; AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] +; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 ; AVX512F-NEXT: vpaddb (%rsi), %ymm0, %ymm0 ; AVX512F-NEXT: vpaddb 32(%rsi), %ymm1, %ymm1 ; AVX512F-NEXT: vmovdqa %ymm1, 32(%rdx) @@ -2253,7 +2253,7 @@ define void @vec384_i8_widen_to_i48_factor6_broadcast_to_v8i48_factor8(ptr %in.e ; AVX512DQ-NEXT: vpalignr {{.*#+}} xmm0 = mem[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0] ; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15,0,1,2,3,4,15,6,7,8,9,10,15,12,13,14] ; AVX512DQ-NEXT: vpbroadcastb (%rdi), %ymm1 -; AVX512DQ-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] +; AVX512DQ-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 ; AVX512DQ-NEXT: vpaddb (%rsi), %ymm0, %ymm0 ; AVX512DQ-NEXT: vpaddb 32(%rsi), %ymm1, %ymm1 ; AVX512DQ-NEXT: vmovdqa %ymm1, 32(%rdx) @@ -2267,7 +2267,7 @@ define void @vec384_i8_widen_to_i48_factor6_broadcast_to_v8i48_factor8(ptr %in.e ; AVX512BW-NEXT: vpalignr {{.*#+}} xmm0 = mem[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0] ; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15,0,1,2,3,4,15,6,7,8,9,10,15,12,13,14] ; AVX512BW-NEXT: vpbroadcastb (%rdi), %ymm1 -; AVX512BW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] +; AVX512BW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 ; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vpaddb (%rsi), %zmm0, %zmm0 ; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rdx) @@ -2458,7 +2458,7 @@ define void @vec384_i8_widen_to_i96_factor12_broadcast_to_v4i96_factor4(ptr %in. ; AVX512F-NEXT: vpalignr {{.*#+}} xmm0 = mem[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0] ; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15,0,1,2,3,4,5,6,7,8,9,10,15,12,13,14] ; AVX512F-NEXT: vpbroadcastb (%rdi), %ymm1 -; AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] +; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 ; AVX512F-NEXT: vpaddb (%rsi), %ymm0, %ymm0 ; AVX512F-NEXT: vpaddb 32(%rsi), %ymm1, %ymm1 ; AVX512F-NEXT: vmovdqa %ymm1, 32(%rdx) @@ -2472,7 +2472,7 @@ define void @vec384_i8_widen_to_i96_factor12_broadcast_to_v4i96_factor4(ptr %in. ; AVX512DQ-NEXT: vpalignr {{.*#+}} xmm0 = mem[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0] ; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15,0,1,2,3,4,5,6,7,8,9,10,15,12,13,14] ; AVX512DQ-NEXT: vpbroadcastb (%rdi), %ymm1 -; AVX512DQ-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] +; AVX512DQ-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 ; AVX512DQ-NEXT: vpaddb (%rsi), %ymm0, %ymm0 ; AVX512DQ-NEXT: vpaddb 32(%rsi), %ymm1, %ymm1 ; AVX512DQ-NEXT: vmovdqa %ymm1, 32(%rdx) @@ -2486,7 +2486,7 @@ define void @vec384_i8_widen_to_i96_factor12_broadcast_to_v4i96_factor4(ptr %in. ; AVX512BW-NEXT: vpalignr {{.*#+}} xmm0 = mem[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0] ; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15,0,1,2,3,4,5,6,7,8,9,10,15,12,13,14] ; AVX512BW-NEXT: vpbroadcastb (%rdi), %ymm1 -; AVX512BW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] +; AVX512BW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 ; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vpaddb (%rsi), %zmm0, %zmm0 ; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rdx) @@ -3095,7 +3095,7 @@ define void @vec384_i16_widen_to_i96_factor6_broadcast_to_v4i96_factor4(ptr %in. ; AVX512F: # %bb.0: ; AVX512F-NEXT: vpbroadcastw (%rdi), %ymm0 ; AVX512F-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1,2,3,4,5],xmm0[6],mem[7] -; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7] +; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm1 ; AVX512F-NEXT: vpaddb (%rsi), %ymm1, %ymm1 ; AVX512F-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0 ; AVX512F-NEXT: vmovdqa %ymm0, 32(%rdx) @@ -3107,7 +3107,7 @@ define void @vec384_i16_widen_to_i96_factor6_broadcast_to_v4i96_factor4(ptr %in. ; AVX512DQ: # %bb.0: ; AVX512DQ-NEXT: vpbroadcastw (%rdi), %ymm0 ; AVX512DQ-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1,2,3,4,5],xmm0[6],mem[7] -; AVX512DQ-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7] +; AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm1 ; AVX512DQ-NEXT: vpaddb (%rsi), %ymm1, %ymm1 ; AVX512DQ-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0 ; AVX512DQ-NEXT: vmovdqa %ymm0, 32(%rdx) diff --git a/llvm/test/CodeGen/X86/vector-interleaved-store-i32-stride-5.ll b/llvm/test/CodeGen/X86/vector-interleaved-store-i32-stride-5.ll index 89330122fa239..e88a651d29cef 100644 --- a/llvm/test/CodeGen/X86/vector-interleaved-store-i32-stride-5.ll +++ b/llvm/test/CodeGen/X86/vector-interleaved-store-i32-stride-5.ll @@ -350,7 +350,7 @@ define void @store_i32_stride5_vf4(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec ; AVX-NEXT: vshufpd {{.*#+}} ymm4 = ymm4[0,0,3,3] ; AVX-NEXT: vblendps {{.*#+}} ymm4 = ymm4[0,1],ymm5[2,3],ymm4[4,5,6],ymm5[7] ; AVX-NEXT: vbroadcastf128 {{.*#+}} ymm5 = mem[0,1,0,1] -; AVX-NEXT: vblendps {{.*#+}} ymm7 = ymm0[0,1,2,3],ymm5[4,5,6,7] +; AVX-NEXT: vinsertf128 $1, %xmm5, %ymm0, %ymm7 ; AVX-NEXT: vblendps {{.*#+}} ymm4 = ymm7[0],ymm4[1,2,3],ymm7[4],ymm4[5,6,7] ; AVX-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[u,u,u,2,u,u,u,7] ; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2],ymm1[3,4,5,6,7] _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits