On Mon, Feb 4, 2019 at 4:01 AM Uros Bizjak <ubiz...@gmail.com> wrote: > > On Fri, Feb 1, 2019 at 10:18 PM H.J. Lu <hjl.to...@gmail.com> wrote: > > > > Emulate MMX punpcklXX/punpckhXX with SSE punpcklXX. For MMX punpckhXX, > > move bits 64:127 to bits 0:63 in SSE register. Only SSE register source > > operand is allowed. > > > > PR target/89021 > > * config/i386/i386-protos.h (ix86_split_mmx_punpck): New > > prototype. > > * config/i386/i386.c (ix86_split_mmx_punpck): New function. > > * config/i386/mmx.m (mmx_punpckhbw): Changed to > > define_insn_and_split to support SSE emulation. > > (mmx_punpcklbw): Likewise. > > (mmx_punpckhwd): Likewise. > > (mmx_punpcklwd): Likewise. > > (mmx_punpckhdq): Likewise. > > (mmx_punpckldq): Likewise. > > --- > > gcc/config/i386/i386-protos.h | 1 + > > gcc/config/i386/i386.c | 77 ++++++++++++++++++++++++ > > gcc/config/i386/mmx.md | 108 +++++++++++++++++++++------------- > > 3 files changed, 144 insertions(+), 42 deletions(-) > > > > diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h > > index bb96a420a85..dc7fc38d8e4 100644 > > --- a/gcc/config/i386/i386-protos.h > > +++ b/gcc/config/i386/i386-protos.h > > @@ -202,6 +202,7 @@ extern rtx ix86_split_stack_guard (void); > > > > extern void ix86_move_vector_high_sse_to_mmx (rtx); > > extern void ix86_split_mmx_pack (rtx[], enum rtx_code); > > +extern void ix86_split_mmx_punpck (rtx[], bool); > > > > #ifdef TREE_CODE > > extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree, int); > > diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c > > index fde32983fa2..d795af1dd93 100644 > > --- a/gcc/config/i386/i386.c > > +++ b/gcc/config/i386/i386.c > > @@ -20006,6 +20006,83 @@ ix86_split_mmx_pack (rtx operands[], enum rtx_code > > code) > > ix86_move_vector_high_sse_to_mmx (op0); > > } > > > > +/* Split MMX punpcklXX/punpckhXX with SSE punpcklXX. */ > > + > > +void > > +ix86_split_mmx_punpck (rtx operands[], bool high_p) > > +{ > > + rtx op0 = operands[0]; > > + rtx op1 = operands[1]; > > + rtx op2 = operands[2]; > > + machine_mode mode = GET_MODE (op0); > > + rtx mask; > > + /* The corresponding SSE mode. */ > > + machine_mode sse_mode, double_sse_mode; > > + > > + switch (mode) > > + { > > + case E_V8QImode: > > + sse_mode = V16QImode; > > + double_sse_mode = V32QImode; > > + mask = gen_rtx_PARALLEL (VOIDmode, > > + gen_rtvec (16, > > + GEN_INT (0), GEN_INT (16), > > + GEN_INT (1), GEN_INT (17), > > + GEN_INT (2), GEN_INT (18), > > + GEN_INT (3), GEN_INT (19), > > + GEN_INT (4), GEN_INT (20), > > + GEN_INT (5), GEN_INT (21), > > + GEN_INT (6), GEN_INT (22), > > + GEN_INT (7), GEN_INT (23))); > > + break; > > + > > + case E_V4HImode: > > + sse_mode = V8HImode; > > + double_sse_mode = V16HImode; > > + mask = gen_rtx_PARALLEL (VOIDmode, > > + gen_rtvec (8, > > + GEN_INT (0), GEN_INT (8), > > + GEN_INT (1), GEN_INT (9), > > + GEN_INT (2), GEN_INT (10), > > + GEN_INT (3), GEN_INT (11))); > > + break; > > + > > + case E_V2SImode: > > + sse_mode = V4SImode; > > + double_sse_mode = V8SImode; > > + mask = gen_rtx_PARALLEL (VOIDmode, > > + gen_rtvec (4, > > + GEN_INT (0), GEN_INT (4), > > + GEN_INT (1), GEN_INT (5))); > > + break; > > + > > + default: > > + gcc_unreachable (); > > + } > > + > > + /* Generate SSE punpcklXX. */ > > + rtx dest = gen_rtx_REG (sse_mode, REGNO (op0)); > > + op1 = gen_rtx_REG (sse_mode, REGNO (op1)); > > + op2 = gen_rtx_REG (sse_mode, REGNO (op2)); > > + > > + op1 = gen_rtx_VEC_CONCAT (double_sse_mode, op1, op2); > > + op2 = gen_rtx_VEC_SELECT (sse_mode, op1, mask); > > + rtx insn = gen_rtx_SET (dest, op2); > > + emit_insn (insn); > > + > > + if (high_p) > > + { > > + /* Move bits 64:127 to bits 0:63. */ > > + mask = gen_rtx_PARALLEL (VOIDmode, > > + gen_rtvec (4, GEN_INT (2), GEN_INT (3), > > + GEN_INT (0), GEN_INT (0))); > > + dest = gen_rtx_REG (V4SImode, REGNO (dest)); > > + op1 = gen_rtx_VEC_SELECT (V4SImode, dest, mask); > > + insn = gen_rtx_SET (dest, op1); > > + emit_insn (insn); > > + } > > +} > > + > > /* Helper function of ix86_fixup_binary_operands to canonicalize > > operand order. Returns true if the operands should be swapped. */ > > > > diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md > > index c183f949a7c..fbd341109d6 100644 > > --- a/gcc/config/i386/mmx.md > > +++ b/gcc/config/i386/mmx.md > > @@ -1083,87 +1083,111 @@ > > (set_attr "type" "mmxshft,sselog,sselog") > > (set_attr "mode" "DI,TI,TI")]) > > > > -(define_insn "mmx_punpckhbw" > > - [(set (match_operand:V8QI 0 "register_operand" "=y") > > +(define_insn_and_split "mmx_punpckhbw" > > + [(set (match_operand:V8QI 0 "register_operand" "=y,Yx,Yy") > > (vec_select:V8QI > > (vec_concat:V16QI > > - (match_operand:V8QI 1 "register_operand" "0") > > - (match_operand:V8QI 2 "nonimmediate_operand" "ym")) > > + (match_operand:V8QI 1 "register_operand" "0,0,Yy") > > + (match_operand:V8QI 2 "nonimmediate_operand" "ym,Yx,Yy")) > > (parallel [(const_int 4) (const_int 12) > > (const_int 5) (const_int 13) > > (const_int 6) (const_int 14) > > (const_int 7) (const_int 15)])))] > > - "TARGET_MMX" > > + "TARGET_MMX_INSNS" > > "punpckhbw\t{%2, %0|%0, %2}" > > Please add "#" for alternatives that have to be split. >
Did you mean (define_insn_and_split "mmx_punpckhbw" [(set (match_operand:V8QI 0 "register_operand" "=y,Yx,Yy") (vec_select:V8QI (vec_concat:V16QI (match_operand:V8QI 1 "register_operand" "0,0,Yy") (match_operand:V8QI 2 "nonimmediate_operand" "ym,Yx,Yy")) (parallel [(const_int 4) (const_int 12) (const_int 5) (const_int 13) (const_int 6) (const_int 14) (const_int 7) (const_int 15)])))] "TARGET_MMX_INSNS" "#" "&& reload_completed && TARGET_MMX_WITH_SSE" [(const_int 0)] "ix86_split_mmx_punpck (operands, true);" [(set_attr "isa" "*,noavx,avx") (set_attr "type" "mmxcvt,sselog,sselog") (set_attr "mode" "DI,TI,TI")]) (define_insn "*mmx_punpckhbw" [(set (match_operand:V8QI 0 "register_operand" "=y") (vec_select:V8QI (vec_concat:V16QI (match_operand:V8QI 1 "register_operand" "0") (match_operand:V8QI 2 "nonimmediate_operand" "ym")) (parallel [(const_int 4) (const_int 12) (const_int 5) (const_int 13) (const_int 6) (const_int 14) (const_int 7) (const_int 15)])))] "TARGET_MMX" "punpckhbw\t{%2, %0|%0, %2}" [(set_attr "type" "mmxcvt") (set_attr "mode" "DI")]) What is the advantage of an extra pattern? -- H.J.