Some patterns that are detected by the autovectorizer can be supported by
s390.  Add expanders such that autovectorization of these patterns works.

Bootstrapped and regtested on s390.  Ok for trunk?

gcc/ChangeLog:

        * config/s390/vector.md (avg<mode>3_ceil): New pattern.
        (uavg<mode>3_ceil): New pattern.
        (smul<mode>3_highpart): New pattern.
        (umul<mode>3_highpart): New pattern.

gcc/testsuite/ChangeLog:

        * gcc.target/s390/vector/pattern-avg-1.c: New test.
        * gcc.target/s390/vector/pattern-mulh-1.c: New test.

Signed-off-by: Juergen Christ <jchr...@linux.ibm.com>
---
 gcc/config/s390/vector.md                     | 28 ++++++++++++++++++
 .../gcc.target/s390/vector/pattern-avg-1.c    | 26 +++++++++++++++++
 .../gcc.target/s390/vector/pattern-mulh-1.c   | 29 +++++++++++++++++++
 3 files changed, 83 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/s390/vector/pattern-avg-1.c
 create mode 100644 gcc/testsuite/gcc.target/s390/vector/pattern-mulh-1.c

diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md
index 6f4e1929eb80..16f4b8116432 100644
--- a/gcc/config/s390/vector.md
+++ b/gcc/config/s390/vector.md
@@ -3576,3 +3576,31 @@
 ; vec_unpacks_float_lo
 ; vec_unpacku_float_hi
 ; vec_unpacku_float_lo
+
+(define_expand "avg<mode>3_ceil"
+  [(set (match_operand:VIT_HW_VXE3_T                        0 
"register_operand" "=v")
+       (unspec:VIT_HW_VXE3_T [(match_operand:VIT_HW_VXE3_T 1 
"register_operand"  "v")
+                              (match_operand:VIT_HW_VXE3_T 2 
"register_operand"  "v")]
+                             UNSPEC_VEC_AVG))]
+  "TARGET_VX")
+
+(define_expand "uavg<mode>3_ceil"
+  [(set (match_operand:VIT_HW_VXE3_T                        0 
"register_operand" "=v")
+       (unspec:VIT_HW_VXE3_T [(match_operand:VIT_HW_VXE3_T 1 
"register_operand"  "v")
+                              (match_operand:VIT_HW_VXE3_T 2 
"register_operand"  "v")]
+                             UNSPEC_VEC_AVGU))]
+  "TARGET_VX")
+
+(define_expand "smul<mode>3_highpart"
+  [(set (match_operand:VIT_HW_VXE3_DT 0 "register_operand"                     
  "=v")
+       (unspec:VIT_HW_VXE3_DT [(match_operand:VIT_HW_VXE3_DT 1 
"register_operand" "v")
+                               (match_operand:VIT_HW_VXE3_DT 2 
"register_operand" "v")]
+                              UNSPEC_VEC_SMULT_HI))]
+  "TARGET_VX")
+
+(define_expand "umul<mode>3_highpart"
+  [(set (match_operand:VIT_HW_VXE3_DT 0 "register_operand"                     
  "=v")
+       (unspec:VIT_HW_VXE3_DT [(match_operand:VIT_HW_VXE3_DT 1 
"register_operand" "v")
+                               (match_operand:VIT_HW_VXE3_DT 2 
"register_operand" "v")]
+                              UNSPEC_VEC_UMULT_HI))]
+  "TARGET_VX")
diff --git a/gcc/testsuite/gcc.target/s390/vector/pattern-avg-1.c 
b/gcc/testsuite/gcc.target/s390/vector/pattern-avg-1.c
new file mode 100644
index 000000000000..a15301aabe54
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/vector/pattern-avg-1.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mzarch -march=z16 -ftree-vectorize 
-fdump-tree-optimized" } */
+
+#define TEST(T1,T2,N)                                                   \
+  void                                                                  \
+  avg##T1 (signed T1 *__restrict res, signed T1 *__restrict a,          \
+           signed T1 *__restrict b)                                     \
+  {                                                                     \
+    for (int i = 0; i < N; ++i)                                         \
+      res[i] = ((signed T2)a[i] + b[i] + 1) >> 1;                       \
+  }                                                                     \
+                                                                        \
+  void                                                                  \
+  uavg##T1 (unsigned T1 *__restrict res, unsigned T1 *__restrict a,     \
+            unsigned T1 *__restrict b)                                  \
+  {                                                                     \
+    for (int i = 0; i < N; ++i)                                         \
+      res[i] = ((unsigned T2)a[i] + b[i] + 1) >> 1;                     \
+  }
+
+TEST(char,short,16)
+TEST(short,int,8)
+TEST(int,long,4)
+TEST(long,__int128,2)
+
+/* { dg-final { scan-tree-dump-times "\.AVG_CEIL" 8 "optimized" } } */
diff --git a/gcc/testsuite/gcc.target/s390/vector/pattern-mulh-1.c 
b/gcc/testsuite/gcc.target/s390/vector/pattern-mulh-1.c
new file mode 100644
index 000000000000..cd8e4e7d7a09
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/vector/pattern-mulh-1.c
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mzarch -march=arch15 -ftree-vectorize 
-fdump-tree-optimized" } */
+
+#define TEST(T1,T2,N,S)                                                 \
+  void                                                                  \
+  mulh##T1 (signed T1 *__restrict res,                                  \
+            signed T1 *__restrict l,                                    \
+            signed T1 *__restrict r)                                    \
+  {                                                                     \
+    for (int i = 0; i < N; ++i)                                         \
+      res[i] = (signed T1) (((signed T2)l[i] * (signed T2)r[i]) >> S);  \
+  }                                                                     \
+                                                                        \
+  void                                                                  \
+  umulh##T1 (unsigned T1 *__restrict res,                               \
+             unsigned T1 *__restrict l,                                 \
+             unsigned T1 *__restrict r)                                 \
+  {                                                                     \
+    for (int i = 0; i < N; ++i)                                         \
+      res[i] = (unsigned T1)                                            \
+        (((unsigned T2)l[i] * (unsigned T2)r[i]) >> S);                 \
+  }
+
+TEST(char,short,16,8)
+TEST(short,int,8,16)
+TEST(int,long,4,32)
+TEST(long,__int128,2,64)
+
+/* { dg-final { scan-tree-dump-times "\.MULH" 8 "optimized" } } */
-- 
2.43.5

Reply via email to