[PATCH] Allow {nearby,r}int{,f} vectorization on x86 with sse4.1 and later (PR target/93078)

Jakub Jelinek Sat, 28 Dec 2019 01:34:21 -0800

Hi!

In i386.md, we have nearbyint<mode>2 and rint<mode>2 patterns that expand
SF/DF/XF mode patterns to rounding instructions.  For pre-sse4.1 that is
done using XFmode and so inappropriate for vectorization, but for sse4.1
and later we can just use the {,v}{round,rndscale}p{s,d} instructions
when we emit {,v}rounds{s,d} for SF/DF mode.


Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2019-12-28  Jakub Jelinek  <ja...@redhat.com>

        PR target/93078
        * config/i386/sse.md (nearbyint<mode>2, rint<mode>2): New expanders
        with VF iterator.

        * gcc.target/i386/sse4_1-pr93078.c: New test.
        * gcc.target/i386/avx-pr93078.c: New test.
        * gcc.target/i386/avx512f-pr93078.c: New test.

--- gcc/config/i386/sse.md.jj   2019-12-21 00:12:54.000000000 +0100
+++ gcc/config/i386/sse.md      2019-12-27 18:16:48.146431083 +0100
@@ -17977,6 +17977,24 @@ (define_insn "ptesttf2"
    (set_attr "prefix" "orig,orig,vex")
    (set_attr "mode" "TI")])
 
+(define_expand "nearbyint<mode>2"
+  [(set (match_operand:VF 0 "register_operand")
+       (unspec:VF
+         [(match_operand:VF 1 "vector_operand")
+          (match_dup 2)]
+         UNSPEC_ROUND))]
+  "TARGET_SSE4_1"
+  "operands[2] = GEN_INT (ROUND_MXCSR | ROUND_NO_EXC);")
+
+(define_expand "rint<mode>2"
+  [(set (match_operand:VF 0 "register_operand")
+       (unspec:VF
+         [(match_operand:VF 1 "vector_operand")
+          (match_dup 2)]
+         UNSPEC_ROUND))]
+  "TARGET_SSE4_1"
+  "operands[2] = GEN_INT (ROUND_MXCSR);")
+
 (define_insn "<sse4_1>_round<ssemodesuffix><avxsizesuffix>"
   [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
        (unspec:VF_128_256
--- gcc/testsuite/gcc.target/i386/sse4_1-pr93078.c.jj   2019-12-27 
18:26:05.436970472 +0100
+++ gcc/testsuite/gcc.target/i386/sse4_1-pr93078.c      2019-12-27 
18:32:29.107147604 +0100
@@ -0,0 +1,42 @@
+/* PR target/93078 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -msse4.1 -mno-sse4.2 -masm=att" } */
+/* { dg-final { scan-assembler "roundps\[ \t]\+\\\$12," } } */
+/* { dg-final { scan-assembler "roundps\[ \t]\+\\\$4," } } */
+/* { dg-final { scan-assembler "roundpd\[ \t]\+\\\$12," } } */
+/* { dg-final { scan-assembler "roundpd\[ \t]\+\\\$4," } } */
+
+float a[16], b[16];
+double c[8], d[8];
+
+void
+foo (void)
+{
+  int i;
+  for (i = 0; i < 16; ++i)
+    b[i] = __builtin_nearbyintf (a[i]);
+}
+
+void
+bar (void)
+{
+  int i;
+  for (i = 0; i < 16; ++i)
+    b[i] = __builtin_rintf (a[i]);
+}
+
+void
+baz (void)
+{
+  int i;
+  for (i = 0; i < 8; ++i)
+    d[i] = __builtin_nearbyint (c[i]);
+}
+
+void
+qux (void)
+{
+  int i;
+  for (i = 0; i < 8; ++i)
+    d[i] = __builtin_rint (c[i]);
+}
--- gcc/testsuite/gcc.target/i386/avx-pr93078.c.jj      2019-12-27 
18:32:47.567867421 +0100
+++ gcc/testsuite/gcc.target/i386/avx-pr93078.c 2019-12-27 18:34:41.527137818 
+0100
@@ -0,0 +1,9 @@
+/* PR target/93078 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -mavx -mno-avx2 
-mprefer-vector-width=256 -masm=att" } */
+/* { dg-final { scan-assembler "vroundps\[ \t]\+\\\$12,\[^\n\r]*%y" } } */
+/* { dg-final { scan-assembler "vroundps\[ \t]\+\\\$4,\[^\n\r]*%y" } } */
+/* { dg-final { scan-assembler "vroundpd\[ \t]\+\\\$12,\[^\n\r]*%y" } } */
+/* { dg-final { scan-assembler "vroundpd\[ \t]\+\\\$4,\[^\n\r]*%y" } } */
+
+#include "sse4_1-pr93078.c"
--- gcc/testsuite/gcc.target/i386/avx512f-pr93078.c.jj  2019-12-27 
18:34:56.632908546 +0100
+++ gcc/testsuite/gcc.target/i386/avx512f-pr93078.c     2019-12-27 
18:35:38.650270831 +0100
@@ -0,0 +1,9 @@
+/* PR target/93078 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -mavx512f -mprefer-vector-width=512 
-masm=att" } */
+/* { dg-final { scan-assembler "vrndscaleps\[ \t]\+\\\$12,\[^\n\r]*%z" } } */
+/* { dg-final { scan-assembler "vrndscaleps\[ \t]\+\\\$4,\[^\n\r]*%z" } } */
+/* { dg-final { scan-assembler "vrndscalepd\[ \t]\+\\\$12,\[^\n\r]*%z" } } */
+/* { dg-final { scan-assembler "vrndscalepd\[ \t]\+\\\$4,\[^\n\r]*%z" } } */
+
+#include "sse4_1-pr93078.c"

        Jakub

[PATCH] Allow {nearby,r}int{,f} vectorization on x86 with sse4.1 and later (PR target/93078)

Reply via email to