On 2/10/19, H.J. Lu <hjl.to...@gmail.com> wrote: > Emulate MMX pshufw with SSE. Only SSE register source operand is allowed. > > PR target/89021 > * config/i386/mmx.md (mmx_pshufw_1): Add SSE emulation. > (*vec_dupv4hi): Likewise. > emulation. > --- > gcc/config/i386/mmx.md | 33 +++++++++++++++++++++------------ > 1 file changed, 21 insertions(+), 12 deletions(-) > > diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md > index 1ee51c5deb7..dc81d7f45df 100644 > --- a/gcc/config/i386/mmx.md > +++ b/gcc/config/i386/mmx.md > @@ -1364,7 +1364,8 @@ > [(match_operand:V4HI 0 "register_operand") > (match_operand:V4HI 1 "nonimmediate_operand") > (match_operand:SI 2 "const_int_operand")] > - "TARGET_SSE || TARGET_3DNOW_A" > + "((TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSE) > + || TARGET_3DNOW_A"
I think that the above condition should read (TARGET_MMX || TARGET_MMX_WITH_SSE) && (TARGET_SSE || TARGET_3DNOW_A) and with TARGET_MMX_WITH_SSE (which implies SSE2) we always use XMM registers. Without SSE2, we use MMX registers, as before. > { > int mask = INTVAL (operands[2]); > emit_insn (gen_mmx_pshufw_1 (operands[0], operands[1], > @@ -1376,14 +1377,15 @@ > }) > > (define_insn "mmx_pshufw_1" > - [(set (match_operand:V4HI 0 "register_operand" "=y") > + [(set (match_operand:V4HI 0 "register_operand" "=y,Yv") > (vec_select:V4HI > - (match_operand:V4HI 1 "nonimmediate_operand" "ym") > + (match_operand:V4HI 1 "nonimmediate_operand" "ym,Yv") > (parallel [(match_operand 2 "const_0_to_3_operand") > (match_operand 3 "const_0_to_3_operand") > (match_operand 4 "const_0_to_3_operand") > (match_operand 5 "const_0_to_3_operand")])))] > - "TARGET_SSE || TARGET_3DNOW_A" > + "((TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSE) > + || TARGET_3DNOW_A" > { > int mask = 0; > mask |= INTVAL (operands[2]) << 0; > @@ -1392,11 +1394,15 @@ > mask |= INTVAL (operands[5]) << 6; > operands[2] = GEN_INT (mask); > > - return "pshufw\t{%2, %1, %0|%0, %1, %2}"; > + if (TARGET_MMX_WITH_SSE) > + return "%vpshuflw\t{%2, %1, %0|%0, %1, %2}"; > + else > + return "pshufw\t{%2, %1, %0|%0, %1, %2}"; The above should be implemented as multi-output template. > } > - [(set_attr "type" "mmxcvt") > + [(set_attr "mmx_isa" "native,x64") > + (set_attr "type" "mmxcvt,sselog") > (set_attr "length_immediate" "1") > - (set_attr "mode" "DI")]) > + (set_attr "mode" "DI,TI")]) > > (define_insn "mmx_pswapdv2si2" > [(set (match_operand:V2SI 0 "register_operand" "=y") > @@ -1410,15 +1416,18 @@ > (set_attr "mode" "DI")]) > > (define_insn "*vec_dupv4hi" > - [(set (match_operand:V4HI 0 "register_operand" "=y") > + [(set (match_operand:V4HI 0 "register_operand" "=y,Yv") > (vec_duplicate:V4HI > (truncate:HI > - (match_operand:SI 1 "register_operand" "0"))))] > + (match_operand:SI 1 "register_operand" "0,Yv"))))] > "TARGET_SSE || TARGET_3DNOW_A" Here we also need "(TARGET_MMX || TARGET_MMX_WITH_SSE) &&" Uros. > - "pshufw\t{$0, %0, %0|%0, %0, 0}" > - [(set_attr "type" "mmxcvt") > + "@ > + pshufw\t{$0, %0, %0|%0, %0, 0} > + %vpshuflw\t{$0, %1, %0|%0, %1, 0}" > + [(set_attr "mmx_isa" "native,x64") > + (set_attr "type" "mmxcvt,sselog1") > (set_attr "length_immediate" "1") > - (set_attr "mode" "DI")]) > + (set_attr "mode" "DI,TI")]) > > (define_insn_and_split "*vec_dupv2si" > [(set (match_operand:V2SI 0 "register_operand" "=y,x,Yv") > -- > 2.20.1 > >