On Mon, Feb 4, 2019 at 4:01 AM Uros Bizjak <ubiz...@gmail.com> wrote:
>
> On Fri, Feb 1, 2019 at 10:18 PM H.J. Lu <hjl.to...@gmail.com> wrote:
> >
> > Emulate MMX punpcklXX/punpckhXX with SSE punpcklXX.  For MMX punpckhXX,
> > move bits 64:127 to bits 0:63 in SSE register.  Only SSE register source
> > operand is allowed.
> >
> >         PR target/89021
> >         * config/i386/i386-protos.h (ix86_split_mmx_punpck): New
> >         prototype.
> >         * config/i386/i386.c (ix86_split_mmx_punpck): New function.
> >         * config/i386/mmx.m (mmx_punpckhbw): Changed to
> >         define_insn_and_split to support SSE emulation.
> >         (mmx_punpcklbw): Likewise.
> >         (mmx_punpckhwd): Likewise.
> >         (mmx_punpcklwd): Likewise.
> >         (mmx_punpckhdq): Likewise.
> >         (mmx_punpckldq): Likewise.
> > ---
> >  gcc/config/i386/i386-protos.h |   1 +
> >  gcc/config/i386/i386.c        |  77 ++++++++++++++++++++++++
> >  gcc/config/i386/mmx.md        | 108 +++++++++++++++++++++-------------
> >  3 files changed, 144 insertions(+), 42 deletions(-)
> >
> > diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
> > index bb96a420a85..dc7fc38d8e4 100644
> > --- a/gcc/config/i386/i386-protos.h
> > +++ b/gcc/config/i386/i386-protos.h
> > @@ -202,6 +202,7 @@ extern rtx ix86_split_stack_guard (void);
> >
> >  extern void ix86_move_vector_high_sse_to_mmx (rtx);
> >  extern void ix86_split_mmx_pack (rtx[], enum rtx_code);
> > +extern void ix86_split_mmx_punpck (rtx[], bool);
> >
> >  #ifdef TREE_CODE
> >  extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree, int);
> > diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> > index fde32983fa2..d795af1dd93 100644
> > --- a/gcc/config/i386/i386.c
> > +++ b/gcc/config/i386/i386.c
> > @@ -20006,6 +20006,83 @@ ix86_split_mmx_pack (rtx operands[], enum rtx_code 
> > code)
> >    ix86_move_vector_high_sse_to_mmx (op0);
> >  }
> >
> > +/* Split MMX punpcklXX/punpckhXX with SSE punpcklXX.  */
> > +
> > +void
> > +ix86_split_mmx_punpck (rtx operands[], bool high_p)
> > +{
> > +  rtx op0 = operands[0];
> > +  rtx op1 = operands[1];
> > +  rtx op2 = operands[2];
> > +  machine_mode mode = GET_MODE (op0);
> > +  rtx mask;
> > +  /* The corresponding SSE mode.  */
> > +  machine_mode sse_mode, double_sse_mode;
> > +
> > +  switch (mode)
> > +    {
> > +    case E_V8QImode:
> > +      sse_mode = V16QImode;
> > +      double_sse_mode = V32QImode;
> > +      mask = gen_rtx_PARALLEL (VOIDmode,
> > +                              gen_rtvec (16,
> > +                                         GEN_INT (0), GEN_INT (16),
> > +                                         GEN_INT (1), GEN_INT (17),
> > +                                         GEN_INT (2), GEN_INT (18),
> > +                                         GEN_INT (3), GEN_INT (19),
> > +                                         GEN_INT (4), GEN_INT (20),
> > +                                         GEN_INT (5), GEN_INT (21),
> > +                                         GEN_INT (6), GEN_INT (22),
> > +                                         GEN_INT (7), GEN_INT (23)));
> > +      break;
> > +
> > +    case E_V4HImode:
> > +      sse_mode = V8HImode;
> > +      double_sse_mode = V16HImode;
> > +      mask = gen_rtx_PARALLEL (VOIDmode,
> > +                              gen_rtvec (8,
> > +                                         GEN_INT (0), GEN_INT (8),
> > +                                         GEN_INT (1), GEN_INT (9),
> > +                                         GEN_INT (2), GEN_INT (10),
> > +                                         GEN_INT (3), GEN_INT (11)));
> > +      break;
> > +
> > +    case E_V2SImode:
> > +      sse_mode = V4SImode;
> > +      double_sse_mode = V8SImode;
> > +      mask = gen_rtx_PARALLEL (VOIDmode,
> > +                              gen_rtvec (4,
> > +                                         GEN_INT (0), GEN_INT (4),
> > +                                         GEN_INT (1), GEN_INT (5)));
> > +      break;
> > +
> > +    default:
> > +      gcc_unreachable ();
> > +    }
> > +
> > +  /* Generate SSE punpcklXX.  */
> > +  rtx dest = gen_rtx_REG (sse_mode, REGNO (op0));
> > +  op1 = gen_rtx_REG (sse_mode, REGNO (op1));
> > +  op2 = gen_rtx_REG (sse_mode, REGNO (op2));
> > +
> > +  op1 = gen_rtx_VEC_CONCAT (double_sse_mode, op1, op2);
> > +  op2 = gen_rtx_VEC_SELECT (sse_mode, op1, mask);
> > +  rtx insn = gen_rtx_SET (dest, op2);
> > +  emit_insn (insn);
> > +
> > +  if (high_p)
> > +    {
> > +      /* Move bits 64:127 to bits 0:63.  */
> > +      mask = gen_rtx_PARALLEL (VOIDmode,
> > +                              gen_rtvec (4, GEN_INT (2), GEN_INT (3),
> > +                                         GEN_INT (0), GEN_INT (0)));
> > +      dest = gen_rtx_REG (V4SImode, REGNO (dest));
> > +      op1 = gen_rtx_VEC_SELECT (V4SImode, dest, mask);
> > +      insn = gen_rtx_SET (dest, op1);
> > +      emit_insn (insn);
> > +    }
> > +}
> > +
> >  /* Helper function of ix86_fixup_binary_operands to canonicalize
> >     operand order.  Returns true if the operands should be swapped.  */
> >
> > diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
> > index c183f949a7c..fbd341109d6 100644
> > --- a/gcc/config/i386/mmx.md
> > +++ b/gcc/config/i386/mmx.md
> > @@ -1083,87 +1083,111 @@
> >     (set_attr "type" "mmxshft,sselog,sselog")
> >     (set_attr "mode" "DI,TI,TI")])
> >
> > -(define_insn "mmx_punpckhbw"
> > -  [(set (match_operand:V8QI 0 "register_operand" "=y")
> > +(define_insn_and_split "mmx_punpckhbw"
> > +  [(set (match_operand:V8QI 0 "register_operand" "=y,Yx,Yy")
> >         (vec_select:V8QI
> >           (vec_concat:V16QI
> > -           (match_operand:V8QI 1 "register_operand" "0")
> > -           (match_operand:V8QI 2 "nonimmediate_operand" "ym"))
> > +           (match_operand:V8QI 1 "register_operand" "0,0,Yy")
> > +           (match_operand:V8QI 2 "nonimmediate_operand" "ym,Yx,Yy"))
> >            (parallel [(const_int 4) (const_int 12)
> >                       (const_int 5) (const_int 13)
> >                       (const_int 6) (const_int 14)
> >                       (const_int 7) (const_int 15)])))]
> > -  "TARGET_MMX"
> > +  "TARGET_MMX_INSNS"
> >    "punpckhbw\t{%2, %0|%0, %2}"
>
> Please add "#" for alternatives that have to be split.
>

Did you mean

(define_insn_and_split "mmx_punpckhbw"
  [(set (match_operand:V8QI 0 "register_operand" "=y,Yx,Yy")
        (vec_select:V8QI
          (vec_concat:V16QI
            (match_operand:V8QI 1 "register_operand" "0,0,Yy")
            (match_operand:V8QI 2 "nonimmediate_operand" "ym,Yx,Yy"))
          (parallel [(const_int 4) (const_int 12)
                     (const_int 5) (const_int 13)
                     (const_int 6) (const_int 14)
                     (const_int 7) (const_int 15)])))]
  "TARGET_MMX_INSNS"
  "#"
  "&& reload_completed && TARGET_MMX_WITH_SSE"
  [(const_int 0)]
  "ix86_split_mmx_punpck (operands, true);"
  [(set_attr "isa" "*,noavx,avx")
   (set_attr "type" "mmxcvt,sselog,sselog")
   (set_attr "mode" "DI,TI,TI")])

(define_insn "*mmx_punpckhbw"
  [(set (match_operand:V8QI 0 "register_operand" "=y")
        (vec_select:V8QI
          (vec_concat:V16QI
            (match_operand:V8QI 1 "register_operand" "0")
            (match_operand:V8QI 2 "nonimmediate_operand" "ym"))
          (parallel [(const_int 4) (const_int 12)
                     (const_int 5) (const_int 13)
                     (const_int 6) (const_int 14)
                     (const_int 7) (const_int 15)])))]
  "TARGET_MMX"
  "punpckhbw\t{%2, %0|%0, %2}"
  [(set_attr "type" "mmxcvt")
   (set_attr "mode" "DI")])

What is the advantage of an extra pattern?

-- 
H.J.

Reply via email to