On 2/10/19, H.J. Lu <[email protected]> wrote:
> In 64-bit mode, implement V2SF add/sub/mul with SEE. Only SSE register
> source operand is allowed.
>
> gcc/
>
> PR target/89028
> * config/i386/i386.md (comm): Handle mult.
> * config/i386/mmx.md (plusminusmult): New.
> (plusminusmult_insn): Likewse.
> (plusminusmult_mnemonic): Likewse.
> (plusminusmult_type): Likewse.
> (mmx_addv2sf3): Add "&& !TARGET_MMX_WITH_SSE".
> (*mmx_addv2sf3): Likewise.
> (mmx_subv2sf3): Likewise.
> (mmx_subrv2sf3): Likewise.
> (*mmx_subv2sf3): Likewise.
> (mmx_mulv2sf3): Likewise.
> (*mmx_mulv2sf3): Likewise.
> (<plusminusmult_insn>v2sf3): New.
> (*sse_<plusminusmult_insn>v2sf3): Likewise.
No. There is no native support for V2SF in SSE, so we'll leave these out.
Uros.
>
> gcc/testsuite/
>
> PR target/89028
> * gcc.target/i386/pr89028-2.c: New test.
> * gcc.target/i386/pr89028-3.c: Likewise.
> * gcc.target/i386/pr89028-4.c: Likewise.
> * gcc.target/i386/pr89028-5.c: Likewise.
> * gcc.target/i386/pr89028-6.c: Likewise.
> * gcc.target/i386/pr89028-7.c: Likewise.
> ---
> gcc/config/i386/i386.md | 3 +-
> gcc/config/i386/mmx.md | 56 ++++++++++++++++++++---
> gcc/testsuite/gcc.target/i386/pr89028-2.c | 11 +++++
> gcc/testsuite/gcc.target/i386/pr89028-3.c | 14 ++++++
> gcc/testsuite/gcc.target/i386/pr89028-4.c | 14 ++++++
> gcc/testsuite/gcc.target/i386/pr89028-5.c | 11 +++++
> gcc/testsuite/gcc.target/i386/pr89028-6.c | 14 ++++++
> gcc/testsuite/gcc.target/i386/pr89028-7.c | 14 ++++++
> 8 files changed, 129 insertions(+), 8 deletions(-)
> create mode 100644 gcc/testsuite/gcc.target/i386/pr89028-2.c
> create mode 100644 gcc/testsuite/gcc.target/i386/pr89028-3.c
> create mode 100644 gcc/testsuite/gcc.target/i386/pr89028-4.c
> create mode 100644 gcc/testsuite/gcc.target/i386/pr89028-5.c
> create mode 100644 gcc/testsuite/gcc.target/i386/pr89028-6.c
> create mode 100644 gcc/testsuite/gcc.target/i386/pr89028-7.c
>
> diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
> index 72685107fc0..cda973c0fbf 100644
> --- a/gcc/config/i386/i386.md
> +++ b/gcc/config/i386/i386.md
> @@ -873,7 +873,8 @@
>
> ;; Mark commutative operators as such in constraints.
> (define_code_attr comm [(plus "%") (ss_plus "%") (us_plus "%")
> - (minus "") (ss_minus "") (us_minus "")])
> + (minus "") (ss_minus "") (us_minus "")
> + (mult "%")])
>
> ;; Mapping of max and min
> (define_code_iterator maxmin [smax smin umax umin])
> diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
> index e56d2e71168..88c1ecd9ae6 100644
> --- a/gcc/config/i386/mmx.md
> +++ b/gcc/config/i386/mmx.md
> @@ -63,6 +63,20 @@
> ;; Instruction suffix for truncations with saturation.
> (define_code_attr s_trunsuffix [(ss_truncate "s") (us_truncate "u")])
>
> +(define_code_iterator plusminusmult [plus minus mult])
> +
> +;; Base name for define_insn
> +(define_code_attr plusminusmult_insn
> + [(plus "add") (minus "sub") (mult "mul")])
> +
> +;; Base name for insn mnemonic.
> +(define_code_attr plusminusmult_mnemonic
> + [(plus "add") (minus "sub") (mult "mul")])
> +
> +;; Insn type name for insn mnemonic.
> +(define_code_attr plusminusmult_type
> + [(plus "add") (minus "add") (mult "mul")])
> +
> ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
> ;;
> ;; Move patterns
> @@ -279,14 +293,16 @@
> (plus:V2SF
> (match_operand:V2SF 1 "nonimmediate_operand")
> (match_operand:V2SF 2 "nonimmediate_operand")))]
> - "TARGET_3DNOW"
> + "TARGET_3DNOW && !TARGET_MMX_WITH_SSE"
> "ix86_fixup_binary_operands_no_copy (PLUS, V2SFmode, operands);")
>
> (define_insn "*mmx_addv2sf3"
> [(set (match_operand:V2SF 0 "register_operand" "=y")
> (plus:V2SF (match_operand:V2SF 1 "nonimmediate_operand" "%0")
> (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
> - "TARGET_3DNOW && ix86_binary_operator_ok (PLUS, V2SFmode, operands)"
> + "TARGET_3DNOW
> + && !TARGET_MMX_WITH_SSE
> + && ix86_binary_operator_ok (PLUS, V2SFmode, operands)"
> "pfadd\t{%2, %0|%0, %2}"
> [(set_attr "type" "mmxadd")
> (set_attr "prefix_extra" "1")
> @@ -296,19 +312,21 @@
> [(set (match_operand:V2SF 0 "register_operand")
> (minus:V2SF (match_operand:V2SF 1 "register_operand")
> (match_operand:V2SF 2 "nonimmediate_operand")))]
> - "TARGET_3DNOW")
> + "TARGET_3DNOW && !TARGET_MMX_WITH_SSE")
>
> (define_expand "mmx_subrv2sf3"
> [(set (match_operand:V2SF 0 "register_operand")
> (minus:V2SF (match_operand:V2SF 2 "register_operand")
> (match_operand:V2SF 1 "nonimmediate_operand")))]
> - "TARGET_3DNOW")
> + "TARGET_3DNOW && !TARGET_MMX_WITH_SSE")
>
> (define_insn "*mmx_subv2sf3"
> [(set (match_operand:V2SF 0 "register_operand" "=y,y")
> (minus:V2SF (match_operand:V2SF 1 "nonimmediate_operand" "0,ym")
> (match_operand:V2SF 2 "nonimmediate_operand" "ym,0")))]
> - "TARGET_3DNOW && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
> + "TARGET_3DNOW
> + && !TARGET_MMX_WITH_SSE
> + && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
> "@
> pfsub\t{%2, %0|%0, %2}
> pfsubr\t{%1, %0|%0, %1}"
> @@ -320,19 +338,43 @@
> [(set (match_operand:V2SF 0 "register_operand")
> (mult:V2SF (match_operand:V2SF 1 "nonimmediate_operand")
> (match_operand:V2SF 2 "nonimmediate_operand")))]
> - "TARGET_3DNOW"
> + "TARGET_3DNOW && !TARGET_MMX_WITH_SSE"
> "ix86_fixup_binary_operands_no_copy (MULT, V2SFmode, operands);")
>
> (define_insn "*mmx_mulv2sf3"
> [(set (match_operand:V2SF 0 "register_operand" "=y")
> (mult:V2SF (match_operand:V2SF 1 "nonimmediate_operand" "%0")
> (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
> - "TARGET_3DNOW && ix86_binary_operator_ok (MULT, V2SFmode, operands)"
> + "TARGET_3DNOW
> + && !TARGET_MMX_WITH_SSE
> + && ix86_binary_operator_ok (MULT, V2SFmode, operands)"
> "pfmul\t{%2, %0|%0, %2}"
> [(set_attr "type" "mmxmul")
> (set_attr "prefix_extra" "1")
> (set_attr "mode" "V2SF")])
>
> +(define_expand "<plusminusmult_insn>v2sf3"
> + [(set (match_operand:V2SF 0 "register_operand")
> + (plusminusmult:V2SF
> + (match_operand:V2SF 1 "nonimmediate_operand")
> + (match_operand:V2SF 2 "nonimmediate_operand")))]
> + "TARGET_MMX_WITH_SSE"
> + "ix86_fixup_binary_operands_no_copy (<CODE>, V2SFmode, operands);")
> +
> +(define_insn "*sse_<plusminusmult_insn>v2sf3"
> + [(set (match_operand:V2SF 0 "register_operand" "=x,Yv")
> + (plusminusmult:V2SF
> + (match_operand:V2SF 1 "nonimmediate_operand" "<comm>0,Yv")
> + (match_operand:V2SF 2 "nonimmediate_operand" "x,Yv")))]
> + "TARGET_MMX_WITH_SSE
> + && ix86_binary_operator_ok (<CODE>, V4SFmode, operands)"
> + "@
> + <plusminusmult_mnemonic>ps\t{%2, %0|%0, %2}
> + v<plusminusmult_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
> + [(set_attr "isa" "noavx,avx")
> + (set_attr "type" "sse<plusminusmult_type>")
> + (set_attr "mode" "V4SF")])
> +
> (define_expand "mmx_<code>v2sf3"
> [(set (match_operand:V2SF 0 "register_operand")
> (smaxmin:V2SF
> diff --git a/gcc/testsuite/gcc.target/i386/pr89028-2.c
> b/gcc/testsuite/gcc.target/i386/pr89028-2.c
> new file mode 100644
> index 00000000000..d096b0b6863
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr89028-2.c
> @@ -0,0 +1,11 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -msse2 -mno-mmx" } */
> +/* { dg-final { scan-assembler-times "addps" 1 } } */
> +
> +typedef float __v2sf __attribute__ ((__vector_size__ (8)));
> +
> +__v2sf
> +foo1 (__v2sf x, __v2sf y)
> +{
> + return x + y;
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/pr89028-3.c
> b/gcc/testsuite/gcc.target/i386/pr89028-3.c
> new file mode 100644
> index 00000000000..0fa187aaf72
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr89028-3.c
> @@ -0,0 +1,14 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -msse2 -mno-mmx" } */
> +/* { dg-final { scan-assembler-times "movlps" 2 } } */
> +/* { dg-final { scan-assembler-times "addps" 1 } } */
> +
> +typedef float __v2sf __attribute__ ((__vector_size__ (8)));
> +
> +extern __v2sf x, y, z;
> +
> +__v2sf
> +foo2 (void)
> +{
> + return x + y;
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/pr89028-4.c
> b/gcc/testsuite/gcc.target/i386/pr89028-4.c
> new file mode 100644
> index 00000000000..b25f67632cb
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr89028-4.c
> @@ -0,0 +1,14 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -msse2 -mno-mmx" } */
> +/* { dg-final { scan-assembler-times "movlps" 1 } } */
> +/* { dg-final { scan-assembler-times "addps" 1 } } */
> +
> +typedef float __v2sf __attribute__ ((__vector_size__ (8)));
> +
> +extern __v2sf x, y, z;
> +
> +void
> +foo3 (__v2sf x, __v2sf y)
> +{
> + z = x + y;
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/pr89028-5.c
> b/gcc/testsuite/gcc.target/i386/pr89028-5.c
> new file mode 100644
> index 00000000000..4ead7187605
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr89028-5.c
> @@ -0,0 +1,11 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -msse2 -mno-mmx" } */
> +/* { dg-final { scan-assembler-times "mulps" 1 } } */
> +
> +typedef float __v2sf __attribute__ ((__vector_size__ (8)));
> +
> +__v2sf
> +foo1 (__v2sf x, __v2sf y)
> +{
> + return x * y;
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/pr89028-6.c
> b/gcc/testsuite/gcc.target/i386/pr89028-6.c
> new file mode 100644
> index 00000000000..9277c848c6c
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr89028-6.c
> @@ -0,0 +1,14 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -msse2 -mno-mmx" } */
> +/* { dg-final { scan-assembler-times "movlps" 2 } } */
> +/* { dg-final { scan-assembler-times "mulps" 1 } } */
> +
> +typedef float __v2sf __attribute__ ((__vector_size__ (8)));
> +
> +extern __v2sf x, y, z;
> +
> +__v2sf
> +foo2 (void)
> +{
> + return x * y;
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/pr89028-7.c
> b/gcc/testsuite/gcc.target/i386/pr89028-7.c
> new file mode 100644
> index 00000000000..c8af7b2a4e9
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr89028-7.c
> @@ -0,0 +1,14 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -msse2 -mno-mmx" } */
> +/* { dg-final { scan-assembler-times "movlps" 1 } } */
> +/* { dg-final { scan-assembler-times "mulps" 1 } } */
> +
> +typedef float __v2sf __attribute__ ((__vector_size__ (8)));
> +
> +extern __v2sf x, y, z;
> +
> +void
> +foo3 (__v2sf x, __v2sf y)
> +{
> + z = x * y;
> +}
> --
> 2.20.1
>
>