mul with SSE

Uros Bizjak Sun, 10 Feb 2019 04:44:00 -0800

On 2/10/19, H.J. Lu <[email protected]> wrote:
> In 64-bit mode, implement V2SF add/sub/mul with SEE.  Only SSE register
> source operand is allowed.
>
> gcc/
>
>       PR target/89028
>       * config/i386/i386.md (comm): Handle mult.
>       * config/i386/mmx.md (plusminusmult): New.
>       (plusminusmult_insn): Likewse.
>       (plusminusmult_mnemonic): Likewse.
>       (plusminusmult_type): Likewse.
>       (mmx_addv2sf3): Add "&& !TARGET_MMX_WITH_SSE".
>       (*mmx_addv2sf3): Likewise.
>       (mmx_subv2sf3): Likewise.
>       (mmx_subrv2sf3): Likewise.
>       (*mmx_subv2sf3): Likewise.
>       (mmx_mulv2sf3): Likewise.
>       (*mmx_mulv2sf3): Likewise.
>       (<plusminusmult_insn>v2sf3): New.
>       (*sse_<plusminusmult_insn>v2sf3): Likewise.


No. There is no native support for V2SF in SSE, so we'll leave these out.

Uros.

>
> gcc/testsuite/
>
>       PR target/89028
>       * gcc.target/i386/pr89028-2.c: New test.
>       * gcc.target/i386/pr89028-3.c: Likewise.
>       * gcc.target/i386/pr89028-4.c: Likewise.
>       * gcc.target/i386/pr89028-5.c: Likewise.
>       * gcc.target/i386/pr89028-6.c: Likewise.
>       * gcc.target/i386/pr89028-7.c: Likewise.
> ---
>  gcc/config/i386/i386.md                   |  3 +-
>  gcc/config/i386/mmx.md                    | 56 ++++++++++++++++++++---
>  gcc/testsuite/gcc.target/i386/pr89028-2.c | 11 +++++
>  gcc/testsuite/gcc.target/i386/pr89028-3.c | 14 ++++++
>  gcc/testsuite/gcc.target/i386/pr89028-4.c | 14 ++++++
>  gcc/testsuite/gcc.target/i386/pr89028-5.c | 11 +++++
>  gcc/testsuite/gcc.target/i386/pr89028-6.c | 14 ++++++
>  gcc/testsuite/gcc.target/i386/pr89028-7.c | 14 ++++++
>  8 files changed, 129 insertions(+), 8 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr89028-2.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr89028-3.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr89028-4.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr89028-5.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr89028-6.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr89028-7.c
>
> diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
> index 72685107fc0..cda973c0fbf 100644
> --- a/gcc/config/i386/i386.md
> +++ b/gcc/config/i386/i386.md
> @@ -873,7 +873,8 @@
>
>  ;; Mark commutative operators as such in constraints.
>  (define_code_attr comm [(plus "%") (ss_plus "%") (us_plus "%")
> -                     (minus "") (ss_minus "") (us_minus "")])
> +                     (minus "") (ss_minus "") (us_minus "")
> +                     (mult "%")])
>
>  ;; Mapping of max and min
>  (define_code_iterator maxmin [smax smin umax umin])
> diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
> index e56d2e71168..88c1ecd9ae6 100644
> --- a/gcc/config/i386/mmx.md
> +++ b/gcc/config/i386/mmx.md
> @@ -63,6 +63,20 @@
>  ;; Instruction suffix for truncations with saturation.
>  (define_code_attr s_trunsuffix [(ss_truncate "s") (us_truncate "u")])
>
> +(define_code_iterator plusminusmult [plus minus mult])
> +
> +;; Base name for define_insn
> +(define_code_attr plusminusmult_insn
> +  [(plus "add") (minus "sub") (mult "mul")])
> +
> +;; Base name for insn mnemonic.
> +(define_code_attr plusminusmult_mnemonic
> +  [(plus "add") (minus "sub") (mult "mul")])
> +
> +;; Insn type name for insn mnemonic.
> +(define_code_attr plusminusmult_type
> +  [(plus "add") (minus "add") (mult "mul")])
> +
>  ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
>  ;;
>  ;; Move patterns
> @@ -279,14 +293,16 @@
>       (plus:V2SF
>         (match_operand:V2SF 1 "nonimmediate_operand")
>         (match_operand:V2SF 2 "nonimmediate_operand")))]
> -  "TARGET_3DNOW"
> +  "TARGET_3DNOW && !TARGET_MMX_WITH_SSE"
>    "ix86_fixup_binary_operands_no_copy (PLUS, V2SFmode, operands);")
>
>  (define_insn "*mmx_addv2sf3"
>    [(set (match_operand:V2SF 0 "register_operand" "=y")
>       (plus:V2SF (match_operand:V2SF 1 "nonimmediate_operand" "%0")
>                  (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
> -  "TARGET_3DNOW && ix86_binary_operator_ok (PLUS, V2SFmode, operands)"
> +  "TARGET_3DNOW
> +   && !TARGET_MMX_WITH_SSE
> +   && ix86_binary_operator_ok (PLUS, V2SFmode, operands)"
>    "pfadd\t{%2, %0|%0, %2}"
>    [(set_attr "type" "mmxadd")
>     (set_attr "prefix_extra" "1")
> @@ -296,19 +312,21 @@
>    [(set (match_operand:V2SF 0 "register_operand")
>          (minus:V2SF (match_operand:V2SF 1 "register_operand")
>                   (match_operand:V2SF 2 "nonimmediate_operand")))]
> -  "TARGET_3DNOW")
> +  "TARGET_3DNOW && !TARGET_MMX_WITH_SSE")
>
>  (define_expand "mmx_subrv2sf3"
>    [(set (match_operand:V2SF 0 "register_operand")
>          (minus:V2SF (match_operand:V2SF 2 "register_operand")
>                   (match_operand:V2SF 1 "nonimmediate_operand")))]
> -  "TARGET_3DNOW")
> +  "TARGET_3DNOW && !TARGET_MMX_WITH_SSE")
>
>  (define_insn "*mmx_subv2sf3"
>    [(set (match_operand:V2SF 0 "register_operand" "=y,y")
>          (minus:V2SF (match_operand:V2SF 1 "nonimmediate_operand" "0,ym")
>                   (match_operand:V2SF 2 "nonimmediate_operand" "ym,0")))]
> -  "TARGET_3DNOW && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
> +  "TARGET_3DNOW
> +   && !TARGET_MMX_WITH_SSE
> +   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
>    "@
>     pfsub\t{%2, %0|%0, %2}
>     pfsubr\t{%1, %0|%0, %1}"
> @@ -320,19 +338,43 @@
>    [(set (match_operand:V2SF 0 "register_operand")
>       (mult:V2SF (match_operand:V2SF 1 "nonimmediate_operand")
>                  (match_operand:V2SF 2 "nonimmediate_operand")))]
> -  "TARGET_3DNOW"
> +  "TARGET_3DNOW && !TARGET_MMX_WITH_SSE"
>    "ix86_fixup_binary_operands_no_copy (MULT, V2SFmode, operands);")
>
>  (define_insn "*mmx_mulv2sf3"
>    [(set (match_operand:V2SF 0 "register_operand" "=y")
>       (mult:V2SF (match_operand:V2SF 1 "nonimmediate_operand" "%0")
>                  (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
> -  "TARGET_3DNOW && ix86_binary_operator_ok (MULT, V2SFmode, operands)"
> +  "TARGET_3DNOW
> +   && !TARGET_MMX_WITH_SSE
> +   && ix86_binary_operator_ok (MULT, V2SFmode, operands)"
>    "pfmul\t{%2, %0|%0, %2}"
>    [(set_attr "type" "mmxmul")
>     (set_attr "prefix_extra" "1")
>     (set_attr "mode" "V2SF")])
>
> +(define_expand "<plusminusmult_insn>v2sf3"
> +  [(set (match_operand:V2SF 0 "register_operand")
> +     (plusminusmult:V2SF
> +       (match_operand:V2SF 1 "nonimmediate_operand")
> +       (match_operand:V2SF 2 "nonimmediate_operand")))]
> +  "TARGET_MMX_WITH_SSE"
> +  "ix86_fixup_binary_operands_no_copy (<CODE>, V2SFmode, operands);")
> +
> +(define_insn "*sse_<plusminusmult_insn>v2sf3"
> +  [(set (match_operand:V2SF 0 "register_operand" "=x,Yv")
> +        (plusminusmult:V2SF
> +       (match_operand:V2SF 1 "nonimmediate_operand" "<comm>0,Yv")
> +       (match_operand:V2SF 2 "nonimmediate_operand" "x,Yv")))]
> +  "TARGET_MMX_WITH_SSE
> +   && ix86_binary_operator_ok (<CODE>, V4SFmode, operands)"
> +  "@
> +   <plusminusmult_mnemonic>ps\t{%2, %0|%0, %2}
> +   v<plusminusmult_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
> +  [(set_attr "isa" "noavx,avx")
> +   (set_attr "type" "sse<plusminusmult_type>")
> +   (set_attr "mode" "V4SF")])
> +
>  (define_expand "mmx_<code>v2sf3"
>    [(set (match_operand:V2SF 0 "register_operand")
>          (smaxmin:V2SF
> diff --git a/gcc/testsuite/gcc.target/i386/pr89028-2.c
> b/gcc/testsuite/gcc.target/i386/pr89028-2.c
> new file mode 100644
> index 00000000000..d096b0b6863
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr89028-2.c
> @@ -0,0 +1,11 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -msse2 -mno-mmx" } */
> +/* { dg-final { scan-assembler-times "addps" 1 } } */
> +
> +typedef float __v2sf __attribute__ ((__vector_size__ (8)));
> +
> +__v2sf
> +foo1 (__v2sf x, __v2sf y)
> +{
> +  return x + y;
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/pr89028-3.c
> b/gcc/testsuite/gcc.target/i386/pr89028-3.c
> new file mode 100644
> index 00000000000..0fa187aaf72
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr89028-3.c
> @@ -0,0 +1,14 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -msse2 -mno-mmx" } */
> +/* { dg-final { scan-assembler-times "movlps" 2 } } */
> +/* { dg-final { scan-assembler-times "addps" 1 } } */
> +
> +typedef float __v2sf __attribute__ ((__vector_size__ (8)));
> +
> +extern __v2sf x, y, z;
> +
> +__v2sf
> +foo2 (void)
> +{
> +  return x + y;
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/pr89028-4.c
> b/gcc/testsuite/gcc.target/i386/pr89028-4.c
> new file mode 100644
> index 00000000000..b25f67632cb
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr89028-4.c
> @@ -0,0 +1,14 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -msse2 -mno-mmx" } */
> +/* { dg-final { scan-assembler-times "movlps" 1 } } */
> +/* { dg-final { scan-assembler-times "addps" 1 } } */
> +
> +typedef float __v2sf __attribute__ ((__vector_size__ (8)));
> +
> +extern __v2sf x, y, z;
> +
> +void
> +foo3 (__v2sf x, __v2sf y)
> +{
> +  z = x + y;
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/pr89028-5.c
> b/gcc/testsuite/gcc.target/i386/pr89028-5.c
> new file mode 100644
> index 00000000000..4ead7187605
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr89028-5.c
> @@ -0,0 +1,11 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -msse2 -mno-mmx" } */
> +/* { dg-final { scan-assembler-times "mulps" 1 } } */
> +
> +typedef float __v2sf __attribute__ ((__vector_size__ (8)));
> +
> +__v2sf
> +foo1 (__v2sf x, __v2sf y)
> +{
> +  return x * y;
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/pr89028-6.c
> b/gcc/testsuite/gcc.target/i386/pr89028-6.c
> new file mode 100644
> index 00000000000..9277c848c6c
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr89028-6.c
> @@ -0,0 +1,14 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -msse2 -mno-mmx" } */
> +/* { dg-final { scan-assembler-times "movlps" 2 } } */
> +/* { dg-final { scan-assembler-times "mulps" 1 } } */
> +
> +typedef float __v2sf __attribute__ ((__vector_size__ (8)));
> +
> +extern __v2sf x, y, z;
> +
> +__v2sf
> +foo2 (void)
> +{
> +  return x * y;
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/pr89028-7.c
> b/gcc/testsuite/gcc.target/i386/pr89028-7.c
> new file mode 100644
> index 00000000000..c8af7b2a4e9
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr89028-7.c
> @@ -0,0 +1,14 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -msse2 -mno-mmx" } */
> +/* { dg-final { scan-assembler-times "movlps" 1 } } */
> +/* { dg-final { scan-assembler-times "mulps" 1 } } */
> +
> +typedef float __v2sf __attribute__ ((__vector_size__ (8)));
> +
> +extern __v2sf x, y, z;
> +
> +void
> +foo3 (__v2sf x, __v2sf y)
> +{
> +  z = x * y;
> +}
> --
> 2.20.1
>
>

Re: [PATCH 41/43] i386: Implement V2SF add/sub/mul with SSE

Reply via email to