On 2/10/19, Uros Bizjak <ubiz...@gmail.com> wrote:
> On 2/10/19, H.J. Lu <hjl.to...@gmail.com> wrote:
>> Emulate MMX packsswb/packssdw/packuswb with SSE
>> packsswb/packssdw/packuswb
>> plus moving bits 64:95 to bits 32:63 in SSE register.  Only SSE register
>> source operand is allowed.
>>
>> 2019-02-08  H.J. Lu  <hongjiu...@intel.com>
>>          Uros Bizjak  <ubiz...@gmail.com>
>>
>>      PR target/89021
>>      * config/i386/i386-protos.h (ix86_move_vector_high_sse_to_mmx):
>>      New prototype.
>>      (ix86_split_mmx_pack): Likewise.
>>      * config/i386/i386.c (ix86_move_vector_high_sse_to_mmx): New
>>      function.
>>      (ix86_split_mmx_pack): Likewise.
>>      * config/i386/i386.md (mmx_isa): New.
>>      (enabled): Also check mmx_isa.
>>      * config/i386/mmx.md (any_s_truncate): New code iterator.
>>      (s_trunsuffix): New code attr.
>>      (mmx_packsswb): Removed.
>>      (mmx_packssdw): Likewise.
>>      (mmx_packuswb): Likewise.
>>      (mmx_pack<s_trunsuffix>swb): New define_insn_and_split to emulate
>>      MMX packsswb/packuswb with SSE2.
>>      (mmx_packssdw): Likewise.
>
> LGTM, with a couple of nits below.

Oh, you also need DONE; at the end of preparation statements,
otherwise splitters will inject (const_int 0) into the insn stream.

Uros.

>> ---
>>  gcc/config/i386/i386-protos.h |  3 ++
>>  gcc/config/i386/i386.c        | 54 ++++++++++++++++++++++++++++
>>  gcc/config/i386/i386.md       | 12 +++++++
>>  gcc/config/i386/mmx.md        | 67 +++++++++++++++++++----------------
>>  4 files changed, 106 insertions(+), 30 deletions(-)
>>
>> diff --git a/gcc/config/i386/i386-protos.h
>> b/gcc/config/i386/i386-protos.h
>> index 2d600173917..bb96a420a85 100644
>> --- a/gcc/config/i386/i386-protos.h
>> +++ b/gcc/config/i386/i386-protos.h
>> @@ -200,6 +200,9 @@ extern void ix86_expand_vecop_qihi (enum rtx_code,
>> rtx,
>> rtx, rtx);
>>
>>  extern rtx ix86_split_stack_guard (void);
>>
>> +extern void ix86_move_vector_high_sse_to_mmx (rtx);
>> +extern void ix86_split_mmx_pack (rtx[], enum rtx_code);
>> +
>>  #ifdef TREE_CODE
>>  extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree,
>> int);
>>  #endif      /* TREE_CODE  */
>> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
>> index ba02c26c8b2..2af7f891350 100644
>> --- a/gcc/config/i386/i386.c
>> +++ b/gcc/config/i386/i386.c
>> @@ -19955,6 +19955,60 @@ ix86_expand_vector_move_misalign (machine_mode
>> mode, rtx operands[])
>>      gcc_unreachable ();
>>  }
>>
>> +/* Move bits 64:95 to bits 32:63.  */
>> +
>> +void
>> +ix86_move_vector_high_sse_to_mmx (rtx op)
>> +{
>> +  rtx mask = gen_rtx_PARALLEL (VOIDmode,
>> +                           gen_rtvec (4, GEN_INT (0), GEN_INT (2),
>> +                                      GEN_INT (0), GEN_INT (0)));
>> +  rtx dest = gen_rtx_REG (V4SImode, REGNO (op));
>> +  op = gen_rtx_VEC_SELECT (V4SImode, dest, mask);
>> +  rtx insn = gen_rtx_SET (dest, op);
>> +  emit_insn (insn);
>> +}
>> +
>> +/* Split MMX pack with signed/unsigned saturation with SSE/SSE2.  */
>> +
>> +void
>> +ix86_split_mmx_pack (rtx operands[], enum rtx_code code)
>> +{
>> +  rtx op0 = operands[0];
>> +  rtx op1 = operands[1];
>> +  rtx op2 = operands[2];
>> +
>> +  machine_mode dmode = GET_MODE (op0);
>> +  machine_mode smode = GET_MODE (op1);
>> +  machine_mode inner_dmode = GET_MODE_INNER (dmode);
>> +  machine_mode inner_smode = GET_MODE_INNER (smode);
>> +
>> +  /* Get the corresponding SSE mode for destination.  */
>> +  int nunits = 16 / GET_MODE_SIZE (inner_dmode);
>> +  machine_mode sse_dmode = mode_for_vector (GET_MODE_INNER (dmode),
>> +                                        nunits).require ();
>> +  machine_mode sse_half_dmode = mode_for_vector (GET_MODE_INNER (dmode),
>> +                                             nunits / 2).require ();
>> +
>> +  /* Get the corresponding SSE mode for source.  */
>> +  nunits = 16 / GET_MODE_SIZE (inner_smode);
>> +  machine_mode sse_smode = mode_for_vector (GET_MODE_INNER (smode),
>> +                                        nunits).require ();
>> +
>> +  /* Generate SSE pack with signed/unsigned saturation.  */
>> +  rtx dest = gen_rtx_REG (sse_dmode, REGNO (op0));
>> +  op1 = gen_rtx_REG (sse_smode, REGNO (op1));
>> +  op2 = gen_rtx_REG (sse_smode, REGNO (op2));
>> +
>> +  op1 = gen_rtx_fmt_e (code, sse_half_dmode, op1);
>> +  op2 = gen_rtx_fmt_e (code, sse_half_dmode, op2);
>> +  rtx insn = gen_rtx_SET (dest, gen_rtx_VEC_CONCAT (sse_dmode,
>> +                                                op1, op2));
>> +  emit_insn (insn);
>> +
>> +  ix86_move_vector_high_sse_to_mmx (op0);
>> +}
>> +
>>  /* Helper function of ix86_fixup_binary_operands to canonicalize
>>     operand order.  Returns true if the operands should be swapped.  */
>>
>> diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
>> index 4a32144a71a..72685107fc0 100644
>> --- a/gcc/config/i386/i386.md
>> +++ b/gcc/config/i386/i386.md
>> @@ -792,6 +792,9 @@
>>                  avx512vl,noavx512vl,x64_avx512dq,x64_avx512bw"
>>    (const_string "base"))
>>
>> +;; Define instruction set of MMX instructions
>> +(define_attr "mmx_isa" "base,native,x64,x64_noavx,x64_avx" (const_string
>> "base"))
>> +
>>  (define_attr "enabled" ""
>>    (cond [(eq_attr "isa" "x64") (symbol_ref "TARGET_64BIT")
>>       (eq_attr "isa" "x64_sse2")
>> @@ -830,6 +833,15 @@
>>       (eq_attr "isa" "noavx512dq") (symbol_ref "!TARGET_AVX512DQ")
>>       (eq_attr "isa" "avx512vl") (symbol_ref "TARGET_AVX512VL")
>>       (eq_attr "isa" "noavx512vl") (symbol_ref "!TARGET_AVX512VL")
>> +
>> +     (eq_attr "mmx_isa" "native")
>> +       (symbol_ref "!TARGET_MMX_WITH_SSE")
>> +     (eq_attr "mmx_isa" "x64")
>> +       (symbol_ref "TARGET_MMX_WITH_SSE")
>> +     (eq_attr "mmx_isa" "x64_avx")
>> +       (symbol_ref "TARGET_MMX_WITH_SSE && TARGET_AVX")
>> +     (eq_attr "mmx_isa" "x64_noavx")
>> +       (symbol_ref "TARGET_MMX_WITH_SSE && !TARGET_AVX")
>>      ]
>>      (const_int 1)))
>>
>> diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
>> index c1e0f2c411e..10096f7cab7 100644
>> --- a/gcc/config/i386/mmx.md
>> +++ b/gcc/config/i386/mmx.md
>> @@ -58,6 +58,11 @@
>>  ;; Mapping from integer vector mode to mnemonic suffix
>>  (define_mode_attr mmxvecsize [(V8QI "b") (V4HI "w") (V2SI "d") (V1DI
>> "q")])
>>
>> +;; Used in signed and unsigned truncations with saturation.
>> +(define_code_iterator any_s_truncate [ss_truncate us_truncate])
>> +;; Instruction suffix for truncations with saturation.
>> +(define_code_attr s_trunsuffix [(ss_truncate "s") (us_truncate "u")])
>
> Please move definitions that have single use nearby their usage site.
>
>> +
>>  ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
>>  ;;
>>  ;; Move patterns
>> @@ -1046,41 +1051,43 @@
>>  ;;
>>  ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
>>
>> -(define_insn "mmx_packsswb"
>> -  [(set (match_operand:V8QI 0 "register_operand" "=y")
>> +(define_insn_and_split "mmx_pack<s_trunsuffix>swb"
>> +  [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yv")
>>      (vec_concat:V8QI
>> -      (ss_truncate:V4QI
>> -        (match_operand:V4HI 1 "register_operand" "0"))
>> -      (ss_truncate:V4QI
>> -        (match_operand:V4HI 2 "nonimmediate_operand" "ym"))))]
>> -  "TARGET_MMX"
>> -  "packsswb\t{%2, %0|%0, %2}"
>> -  [(set_attr "type" "mmxshft")
>> -   (set_attr "mode" "DI")])
>> +      (any_s_truncate:V4QI
>> +        (match_operand:V4HI 1 "register_operand" "0,0,Yv"))
>> +      (any_s_truncate:V4QI
>> +        (match_operand:V4HI 2 "nonimmediate_operand" "ym,x,Yv"))))]
>> +  "TARGET_MMX || TARGET_MMX_WITH_SSE"
>> +  "@
>> +   pack<s_trunsuffix>swb\t{%2, %0|%0, %2}
>> +   #
>> +   #"
>> +  "&& reload_completed && TARGET_MMX_WITH_SSE"
>
> The above should be without first &&, with "reload_completed" last. In
> effect, the condition of the separate split pattern would read as:
>
> "TARGET_MMX_WITH_SSE && reload_completed".
>
>> +  [(const_int 0)]
>> +  "ix86_split_mmx_pack (operands, <any_s_truncate:CODE>);"

Missing DONE; above.

>> +  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
>> +   (set_attr "type" "mmxshft,sselog,sselog")
>> +   (set_attr "mode" "DI,TI,TI")])
>>
>> -(define_insn "mmx_packssdw"
>> -  [(set (match_operand:V4HI 0 "register_operand" "=y")
>> +(define_insn_and_split "mmx_packssdw"
>> +  [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
>>      (vec_concat:V4HI
>>        (ss_truncate:V2HI
>> -        (match_operand:V2SI 1 "register_operand" "0"))
>> +        (match_operand:V2SI 1 "register_operand" "0,0,Yv"))
>>        (ss_truncate:V2HI
>> -        (match_operand:V2SI 2 "nonimmediate_operand" "ym"))))]
>> -  "TARGET_MMX"
>> -  "packssdw\t{%2, %0|%0, %2}"
>> -  [(set_attr "type" "mmxshft")
>> -   (set_attr "mode" "DI")])
>> -
>> -(define_insn "mmx_packuswb"
>> -  [(set (match_operand:V8QI 0 "register_operand" "=y")
>> -    (vec_concat:V8QI
>> -      (us_truncate:V4QI
>> -        (match_operand:V4HI 1 "register_operand" "0"))
>> -      (us_truncate:V4QI
>> -        (match_operand:V4HI 2 "nonimmediate_operand" "ym"))))]
>> -  "TARGET_MMX"
>> -  "packuswb\t{%2, %0|%0, %2}"
>> -  [(set_attr "type" "mmxshft")
>> -   (set_attr "mode" "DI")])
>> +        (match_operand:V2SI 2 "nonimmediate_operand" "ym,x,Yv"))))]
>> +  "TARGET_MMX || TARGET_MMX_WITH_SSE"
>> +  "@
>> +   packssdw\t{%2, %0|%0, %2}
>> +   #
>> +   #"
>> +  "&& reload_completed && TARGET_MMX_WITH_SSE"
>
> Also here.
>
>> +  [(const_int 0)]
>> +  "ix86_split_mmx_pack (operands, SS_TRUNCATE);"

And here.

>> +  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
>> +   (set_attr "type" "mmxshft,sselog,sselog")
>> +   (set_attr "mode" "DI,TI,TI")])
>>
>>  (define_insn "mmx_punpckhbw"
>>    [(set (match_operand:V8QI 0 "register_operand" "=y")
>> --
>> 2.20.1
>>
>>
>

Reply via email to