https://gcc.gnu.org/g:169341f0893a009736f9715db969909880d0e876

commit r15-2427-g169341f0893a009736f9715db969909880d0e876
Author: Kewen Lin <li...@linux.ibm.com>
Date:   Tue Jul 30 21:20:51 2024 -0500

    rs6000: Use standard name uabd for absdu insns
    
    r14-1832 adds recognition pattern, ifn and optab for ABD
    (ABsolute Difference), we have some vector absolute
    difference unsigned instructions since ISA 3.0, as the
    associated test cases shown, they are not exploited well
    as we don't define it (them) with a standard name.  So this
    patch is to rename it with standard name first.  And it
    merges both define_expand and define_insn as a separated
    define_expand isn't needed.  Besides, it adjusts the RTL
    pattern by using generic umax and umin rather than
    UNSPEC_VADU, it's more meaningful and can catch umin/umax
    opportunity.
    
    gcc/ChangeLog:
    
            * config/rs6000/altivec.md (p9_vadu<mode>3): Rename to ...
            (uabd<mode>3): ... this.  Update RTL pattern with umin and umax 
rather
            than UNSPEC_VADU.
            (vadu<mode>3): Remove.
            (UNSPEC_VADU): Remove.
            (usadv16qi): Replace gen_p9_vaduv16qi3 with gen_uabdv16qi3.
            (usadv8hi): Replace gen_p9_vaduv8hi3 with gen_uabdv8hi3.
            * config/rs6000/rs6000-builtins.def (__builtin_altivec_vadub): 
Replace
            expander with uabdv16qi3.
            (__builtin_altivec_vaduh): Adjust expander with uabdv8hi3.
            (__builtin_altivec_vaduw): Adjust expander with uabdv4si3.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.target/powerpc/abd-vectorize-1.c: New test.
            * gcc.target/powerpc/abd-vectorize-2.c: New test.

Diff:
---
 gcc/config/rs6000/altivec.md                       | 25 ++++++---------
 gcc/config/rs6000/rs6000-builtins.def              |  6 ++--
 gcc/testsuite/gcc.target/powerpc/abd-vectorize-1.c | 27 ++++++++++++++++
 gcc/testsuite/gcc.target/powerpc/abd-vectorize-2.c | 37 ++++++++++++++++++++++
 4 files changed, 77 insertions(+), 18 deletions(-)

diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index 5af9bf920a2e..aa9d8fffc901 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -119,7 +119,6 @@
    UNSPEC_STVLXL
    UNSPEC_STVRX
    UNSPEC_STVRXL
-   UNSPEC_VADU
    UNSPEC_VSLV
    UNSPEC_VSRV
    UNSPEC_VMULWHUB
@@ -4323,19 +4322,15 @@
   [(set_attr "type" "vecsimple")])
 
 ;; Vector absolute difference unsigned
-(define_expand "vadu<mode>3"
-  [(set (match_operand:VI 0 "register_operand")
-        (unspec:VI [(match_operand:VI 1 "register_operand")
-                   (match_operand:VI 2 "register_operand")]
-         UNSPEC_VADU))]
-  "TARGET_P9_VECTOR")
-
-;; Vector absolute difference unsigned
-(define_insn "p9_vadu<mode>3"
+(define_insn "uabd<mode>3"
   [(set (match_operand:VI 0 "register_operand" "=v")
-        (unspec:VI [(match_operand:VI 1 "register_operand" "v")
-                   (match_operand:VI 2 "register_operand" "v")]
-         UNSPEC_VADU))]
+       (minus:VI
+         (umax:VI
+           (match_operand:VI 1 "register_operand" "v")
+           (match_operand:VI 2 "register_operand" "v"))
+         (umin:VI
+           (match_dup 1)
+           (match_dup 2))))]
   "TARGET_P9_VECTOR"
   "vabsdu<wd> %0,%1,%2"
   [(set_attr "type" "vecsimple")])
@@ -4500,7 +4495,7 @@
   rtx zero = gen_reg_rtx (V4SImode);
   rtx psum = gen_reg_rtx (V4SImode);
 
-  emit_insn (gen_p9_vaduv16qi3 (absd, operands[1], operands[2]));
+  emit_insn (gen_uabdv16qi3 (absd, operands[1], operands[2]));
   emit_insn (gen_altivec_vspltisw (zero, const0_rtx));
   emit_insn (gen_altivec_vsum4ubs (psum, absd, zero));
   emit_insn (gen_addv4si3 (operands[0], psum, operands[3]));
@@ -4521,7 +4516,7 @@
   rtx zero = gen_reg_rtx (V4SImode);
   rtx psum = gen_reg_rtx (V4SImode);
 
-  emit_insn (gen_p9_vaduv8hi3 (absd, operands[1], operands[2]));
+  emit_insn (gen_uabdv8hi3 (absd, operands[1], operands[2]));
   emit_insn (gen_altivec_vspltisw (zero, const0_rtx));
   emit_insn (gen_altivec_vsum4shs (psum, absd, zero));
   emit_insn (gen_addv4si3 (operands[0], psum, operands[3]));
diff --git a/gcc/config/rs6000/rs6000-builtins.def 
b/gcc/config/rs6000/rs6000-builtins.def
index 12d131d016d6..0c3c884c1104 100644
--- a/gcc/config/rs6000/rs6000-builtins.def
+++ b/gcc/config/rs6000/rs6000-builtins.def
@@ -2345,13 +2345,13 @@
     VFIRSTMISMATCHOREOSINDEX_V4SI first_mismatch_or_eos_index_v4si {}
 
   const vsc __builtin_altivec_vadub (vsc, vsc);
-    VADUB vaduv16qi3 {}
+    VADUB uabdv16qi3 {}
 
   const vss __builtin_altivec_vaduh (vss, vss);
-    VADUH vaduv8hi3 {}
+    VADUH uabdv8hi3 {}
 
   const vsi __builtin_altivec_vaduw (vsi, vsi);
-    VADUW vaduv4si3 {}
+    VADUW uabdv4si3 {}
 
   const vsll __builtin_altivec_vbpermd (vsll, vsc);
     VBPERMD altivec_vbpermd {}
diff --git a/gcc/testsuite/gcc.target/powerpc/abd-vectorize-1.c 
b/gcc/testsuite/gcc.target/powerpc/abd-vectorize-1.c
new file mode 100644
index 000000000000..d63b887b4b8f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/abd-vectorize-1.c
@@ -0,0 +1,27 @@
+/* { dg-options "-mdejagnu-cpu=power9 -O2" } */
+/* { dg-require-effective-target powerpc_vsx } */
+
+/* Expliot vector absolute difference unsigned.  */
+
+#define MAX(x, y) ((x) > (y) ? (x) : (y))
+#define MIN(x, y) ((x) < (y) ? (x) : (y))
+#define N 128
+#define PRAGMA(X) _Pragma (#X)
+#define UNROLL0 PRAGMA (GCC unroll 0)
+
+#define TEST(T)                                                                
\
+  void uabd_##T (unsigned T *restrict a, unsigned T *restrict b,               
\
+                unsigned T *restrict out)                                     \
+  {                                                                            
\
+    UNROLL0                                                                    
\
+    for (int i = 0; i < N; i++)                                                
\
+      out[i] = MAX (a[i], b[i]) - MIN (a[i], b[i]);                            
\
+  }
+
+TEST(char)
+TEST(short)
+TEST(int)
+
+/* { dg-final { scan-assembler-times {\mvabsdub\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mvabsduh\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mvabsduw\M} 1 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/abd-vectorize-2.c 
b/gcc/testsuite/gcc.target/powerpc/abd-vectorize-2.c
new file mode 100644
index 000000000000..f5a80d8fbd94
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/abd-vectorize-2.c
@@ -0,0 +1,37 @@
+/* { dg-options "-mdejagnu-cpu=power9 -O2" } */
+/* { dg-require-effective-target powerpc_vsx } */
+
+/* Expliot vector absolute difference unsigned.  */
+
+#define N 128
+#define PRAGMA(X) _Pragma (#X)
+#define UNROLL0 PRAGMA (GCC unroll 0)
+
+#define TEST1(TYPE)                                                            
\
+  void test1_##TYPE (unsigned TYPE *restrict a, unsigned TYPE *restrict b,     
\
+                    unsigned TYPE *restrict out)                              \
+  {                                                                            
\
+    UNROLL0                                                                    
\
+    for (int i = 0; i < N; i++)                                                
\
+      out[i] = __builtin_abs (a[i] - b[i]);                                    
\
+  }
+
+TEST1(char)
+TEST1(short)
+
+#define TEST2(TYPE1, TYPE2, FUNC)                                              
\
+  void test2_##TYPE1 (unsigned TYPE1 *restrict a, unsigned TYPE1 *restrict b,  
\
+                     unsigned TYPE1 *restrict out)                            \
+  {                                                                            
\
+    UNROLL0                                                                    
\
+    for (int i = 0; i < N; i++)                                                
\
+      out[i] = __builtin_##FUNC ((TYPE2) a[i] - (TYPE2) b[i]);                 
\
+  }
+
+TEST2(char, int, abs)
+TEST2(short, int, abs)
+TEST2(int, long long, llabs)
+
+/* { dg-final { scan-assembler-times {\mvabsdub\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mvabsduh\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mvabsduw\M} 1 } } */

Reply via email to