On Sat, Dec 28, 2019 at 10:33 AM Jakub Jelinek <ja...@redhat.com> wrote:
>
> Hi!
>
> In i386.md, we have nearbyint<mode>2 and rint<mode>2 patterns that expand
> SF/DF/XF mode patterns to rounding instructions.  For pre-sse4.1 that is
> done using XFmode and so inappropriate for vectorization, but for sse4.1
> and later we can just use the {,v}{round,rndscale}p{s,d} instructions
> when we emit {,v}rounds{s,d} for SF/DF mode.

In i386-builtins.c, ix86_builtin_vectorized_function, we already have:

--cut here--
    CASE_CFN_RINT:
      /* The round insn does not trap on denormals.  */
      if (flag_trapping_math || !TARGET_SSE4_1)
break;

      if (out_mode == DFmode && in_mode == DFmode)
{
 if (out_n == 2 && in_n == 2)
   return ix86_get_builtin (IX86_BUILTIN_RINTPD);
 else if (out_n == 4 && in_n == 4)
   return ix86_get_builtin (IX86_BUILTIN_RINTPD256);
}
      if (out_mode == SFmode && in_mode == SFmode)
{
 if (out_n == 4 && in_n == 4)
   return ix86_get_builtin (IX86_BUILTIN_RINTPS);
 else if (out_n == 8 && in_n == 8)
   return ix86_get_builtin (IX86_BUILTIN_RINTPS256);
}
      break;
--cut here--

which is converting rint functions to corresponding x86 builtin. If we
want to go through generic path, then the above code is probably
redundant and should be removed together with corresponding builtins.
OTOH, the existing code also bails out for flag_trapping_math, so this
condition should also be considered in named expanders.

Uros.

> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
>
> 2019-12-28  Jakub Jelinek  <ja...@redhat.com>
>
>         PR target/93078
>         * config/i386/sse.md (nearbyint<mode>2, rint<mode>2): New expanders
>         with VF iterator.
>
>         * gcc.target/i386/sse4_1-pr93078.c: New test.
>         * gcc.target/i386/avx-pr93078.c: New test.
>         * gcc.target/i386/avx512f-pr93078.c: New test.
>
> --- gcc/config/i386/sse.md.jj   2019-12-21 00:12:54.000000000 +0100
> +++ gcc/config/i386/sse.md      2019-12-27 18:16:48.146431083 +0100
> @@ -17977,6 +17977,24 @@ (define_insn "ptesttf2"
>     (set_attr "prefix" "orig,orig,vex")
>     (set_attr "mode" "TI")])
>
> +(define_expand "nearbyint<mode>2"
> +  [(set (match_operand:VF 0 "register_operand")
> +       (unspec:VF
> +         [(match_operand:VF 1 "vector_operand")
> +          (match_dup 2)]
> +         UNSPEC_ROUND))]
> +  "TARGET_SSE4_1"
> +  "operands[2] = GEN_INT (ROUND_MXCSR | ROUND_NO_EXC);")
> +
> +(define_expand "rint<mode>2"
> +  [(set (match_operand:VF 0 "register_operand")
> +       (unspec:VF
> +         [(match_operand:VF 1 "vector_operand")
> +          (match_dup 2)]
> +         UNSPEC_ROUND))]
> +  "TARGET_SSE4_1"
> +  "operands[2] = GEN_INT (ROUND_MXCSR);")
> +
>  (define_insn "<sse4_1>_round<ssemodesuffix><avxsizesuffix>"
>    [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
>         (unspec:VF_128_256
> --- gcc/testsuite/gcc.target/i386/sse4_1-pr93078.c.jj   2019-12-27 
> 18:26:05.436970472 +0100
> +++ gcc/testsuite/gcc.target/i386/sse4_1-pr93078.c      2019-12-27 
> 18:32:29.107147604 +0100
> @@ -0,0 +1,42 @@
> +/* PR target/93078 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -ftree-vectorize -msse4.1 -mno-sse4.2 -masm=att" } */
> +/* { dg-final { scan-assembler "roundps\[ \t]\+\\\$12," } } */
> +/* { dg-final { scan-assembler "roundps\[ \t]\+\\\$4," } } */
> +/* { dg-final { scan-assembler "roundpd\[ \t]\+\\\$12," } } */
> +/* { dg-final { scan-assembler "roundpd\[ \t]\+\\\$4," } } */
> +
> +float a[16], b[16];
> +double c[8], d[8];
> +
> +void
> +foo (void)
> +{
> +  int i;
> +  for (i = 0; i < 16; ++i)
> +    b[i] = __builtin_nearbyintf (a[i]);
> +}
> +
> +void
> +bar (void)
> +{
> +  int i;
> +  for (i = 0; i < 16; ++i)
> +    b[i] = __builtin_rintf (a[i]);
> +}
> +
> +void
> +baz (void)
> +{
> +  int i;
> +  for (i = 0; i < 8; ++i)
> +    d[i] = __builtin_nearbyint (c[i]);
> +}
> +
> +void
> +qux (void)
> +{
> +  int i;
> +  for (i = 0; i < 8; ++i)
> +    d[i] = __builtin_rint (c[i]);
> +}
> --- gcc/testsuite/gcc.target/i386/avx-pr93078.c.jj      2019-12-27 
> 18:32:47.567867421 +0100
> +++ gcc/testsuite/gcc.target/i386/avx-pr93078.c 2019-12-27 18:34:41.527137818 
> +0100
> @@ -0,0 +1,9 @@
> +/* PR target/93078 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -ftree-vectorize -mavx -mno-avx2 
> -mprefer-vector-width=256 -masm=att" } */
> +/* { dg-final { scan-assembler "vroundps\[ \t]\+\\\$12,\[^\n\r]*%y" } } */
> +/* { dg-final { scan-assembler "vroundps\[ \t]\+\\\$4,\[^\n\r]*%y" } } */
> +/* { dg-final { scan-assembler "vroundpd\[ \t]\+\\\$12,\[^\n\r]*%y" } } */
> +/* { dg-final { scan-assembler "vroundpd\[ \t]\+\\\$4,\[^\n\r]*%y" } } */
> +
> +#include "sse4_1-pr93078.c"
> --- gcc/testsuite/gcc.target/i386/avx512f-pr93078.c.jj  2019-12-27 
> 18:34:56.632908546 +0100
> +++ gcc/testsuite/gcc.target/i386/avx512f-pr93078.c     2019-12-27 
> 18:35:38.650270831 +0100
> @@ -0,0 +1,9 @@
> +/* PR target/93078 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -ftree-vectorize -mavx512f -mprefer-vector-width=512 
> -masm=att" } */
> +/* { dg-final { scan-assembler "vrndscaleps\[ \t]\+\\\$12,\[^\n\r]*%z" } } */
> +/* { dg-final { scan-assembler "vrndscaleps\[ \t]\+\\\$4,\[^\n\r]*%z" } } */
> +/* { dg-final { scan-assembler "vrndscalepd\[ \t]\+\\\$12,\[^\n\r]*%z" } } */
> +/* { dg-final { scan-assembler "vrndscalepd\[ \t]\+\\\$4,\[^\n\r]*%z" } } */
> +
> +#include "sse4_1-pr93078.c"
>
>         Jakub
>

Reply via email to