https://gcc.gnu.org/g:169341f0893a009736f9715db969909880d0e876
commit r15-2427-g169341f0893a009736f9715db969909880d0e876 Author: Kewen Lin <li...@linux.ibm.com> Date: Tue Jul 30 21:20:51 2024 -0500 rs6000: Use standard name uabd for absdu insns r14-1832 adds recognition pattern, ifn and optab for ABD (ABsolute Difference), we have some vector absolute difference unsigned instructions since ISA 3.0, as the associated test cases shown, they are not exploited well as we don't define it (them) with a standard name. So this patch is to rename it with standard name first. And it merges both define_expand and define_insn as a separated define_expand isn't needed. Besides, it adjusts the RTL pattern by using generic umax and umin rather than UNSPEC_VADU, it's more meaningful and can catch umin/umax opportunity. gcc/ChangeLog: * config/rs6000/altivec.md (p9_vadu<mode>3): Rename to ... (uabd<mode>3): ... this. Update RTL pattern with umin and umax rather than UNSPEC_VADU. (vadu<mode>3): Remove. (UNSPEC_VADU): Remove. (usadv16qi): Replace gen_p9_vaduv16qi3 with gen_uabdv16qi3. (usadv8hi): Replace gen_p9_vaduv8hi3 with gen_uabdv8hi3. * config/rs6000/rs6000-builtins.def (__builtin_altivec_vadub): Replace expander with uabdv16qi3. (__builtin_altivec_vaduh): Adjust expander with uabdv8hi3. (__builtin_altivec_vaduw): Adjust expander with uabdv4si3. gcc/testsuite/ChangeLog: * gcc.target/powerpc/abd-vectorize-1.c: New test. * gcc.target/powerpc/abd-vectorize-2.c: New test. Diff: --- gcc/config/rs6000/altivec.md | 25 ++++++--------- gcc/config/rs6000/rs6000-builtins.def | 6 ++-- gcc/testsuite/gcc.target/powerpc/abd-vectorize-1.c | 27 ++++++++++++++++ gcc/testsuite/gcc.target/powerpc/abd-vectorize-2.c | 37 ++++++++++++++++++++++ 4 files changed, 77 insertions(+), 18 deletions(-) diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md index 5af9bf920a2e..aa9d8fffc901 100644 --- a/gcc/config/rs6000/altivec.md +++ b/gcc/config/rs6000/altivec.md @@ -119,7 +119,6 @@ UNSPEC_STVLXL UNSPEC_STVRX UNSPEC_STVRXL - UNSPEC_VADU UNSPEC_VSLV UNSPEC_VSRV UNSPEC_VMULWHUB @@ -4323,19 +4322,15 @@ [(set_attr "type" "vecsimple")]) ;; Vector absolute difference unsigned -(define_expand "vadu<mode>3" - [(set (match_operand:VI 0 "register_operand") - (unspec:VI [(match_operand:VI 1 "register_operand") - (match_operand:VI 2 "register_operand")] - UNSPEC_VADU))] - "TARGET_P9_VECTOR") - -;; Vector absolute difference unsigned -(define_insn "p9_vadu<mode>3" +(define_insn "uabd<mode>3" [(set (match_operand:VI 0 "register_operand" "=v") - (unspec:VI [(match_operand:VI 1 "register_operand" "v") - (match_operand:VI 2 "register_operand" "v")] - UNSPEC_VADU))] + (minus:VI + (umax:VI + (match_operand:VI 1 "register_operand" "v") + (match_operand:VI 2 "register_operand" "v")) + (umin:VI + (match_dup 1) + (match_dup 2))))] "TARGET_P9_VECTOR" "vabsdu<wd> %0,%1,%2" [(set_attr "type" "vecsimple")]) @@ -4500,7 +4495,7 @@ rtx zero = gen_reg_rtx (V4SImode); rtx psum = gen_reg_rtx (V4SImode); - emit_insn (gen_p9_vaduv16qi3 (absd, operands[1], operands[2])); + emit_insn (gen_uabdv16qi3 (absd, operands[1], operands[2])); emit_insn (gen_altivec_vspltisw (zero, const0_rtx)); emit_insn (gen_altivec_vsum4ubs (psum, absd, zero)); emit_insn (gen_addv4si3 (operands[0], psum, operands[3])); @@ -4521,7 +4516,7 @@ rtx zero = gen_reg_rtx (V4SImode); rtx psum = gen_reg_rtx (V4SImode); - emit_insn (gen_p9_vaduv8hi3 (absd, operands[1], operands[2])); + emit_insn (gen_uabdv8hi3 (absd, operands[1], operands[2])); emit_insn (gen_altivec_vspltisw (zero, const0_rtx)); emit_insn (gen_altivec_vsum4shs (psum, absd, zero)); emit_insn (gen_addv4si3 (operands[0], psum, operands[3])); diff --git a/gcc/config/rs6000/rs6000-builtins.def b/gcc/config/rs6000/rs6000-builtins.def index 12d131d016d6..0c3c884c1104 100644 --- a/gcc/config/rs6000/rs6000-builtins.def +++ b/gcc/config/rs6000/rs6000-builtins.def @@ -2345,13 +2345,13 @@ VFIRSTMISMATCHOREOSINDEX_V4SI first_mismatch_or_eos_index_v4si {} const vsc __builtin_altivec_vadub (vsc, vsc); - VADUB vaduv16qi3 {} + VADUB uabdv16qi3 {} const vss __builtin_altivec_vaduh (vss, vss); - VADUH vaduv8hi3 {} + VADUH uabdv8hi3 {} const vsi __builtin_altivec_vaduw (vsi, vsi); - VADUW vaduv4si3 {} + VADUW uabdv4si3 {} const vsll __builtin_altivec_vbpermd (vsll, vsc); VBPERMD altivec_vbpermd {} diff --git a/gcc/testsuite/gcc.target/powerpc/abd-vectorize-1.c b/gcc/testsuite/gcc.target/powerpc/abd-vectorize-1.c new file mode 100644 index 000000000000..d63b887b4b8f --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/abd-vectorize-1.c @@ -0,0 +1,27 @@ +/* { dg-options "-mdejagnu-cpu=power9 -O2" } */ +/* { dg-require-effective-target powerpc_vsx } */ + +/* Expliot vector absolute difference unsigned. */ + +#define MAX(x, y) ((x) > (y) ? (x) : (y)) +#define MIN(x, y) ((x) < (y) ? (x) : (y)) +#define N 128 +#define PRAGMA(X) _Pragma (#X) +#define UNROLL0 PRAGMA (GCC unroll 0) + +#define TEST(T) \ + void uabd_##T (unsigned T *restrict a, unsigned T *restrict b, \ + unsigned T *restrict out) \ + { \ + UNROLL0 \ + for (int i = 0; i < N; i++) \ + out[i] = MAX (a[i], b[i]) - MIN (a[i], b[i]); \ + } + +TEST(char) +TEST(short) +TEST(int) + +/* { dg-final { scan-assembler-times {\mvabsdub\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mvabsduh\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mvabsduw\M} 1 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/abd-vectorize-2.c b/gcc/testsuite/gcc.target/powerpc/abd-vectorize-2.c new file mode 100644 index 000000000000..f5a80d8fbd94 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/abd-vectorize-2.c @@ -0,0 +1,37 @@ +/* { dg-options "-mdejagnu-cpu=power9 -O2" } */ +/* { dg-require-effective-target powerpc_vsx } */ + +/* Expliot vector absolute difference unsigned. */ + +#define N 128 +#define PRAGMA(X) _Pragma (#X) +#define UNROLL0 PRAGMA (GCC unroll 0) + +#define TEST1(TYPE) \ + void test1_##TYPE (unsigned TYPE *restrict a, unsigned TYPE *restrict b, \ + unsigned TYPE *restrict out) \ + { \ + UNROLL0 \ + for (int i = 0; i < N; i++) \ + out[i] = __builtin_abs (a[i] - b[i]); \ + } + +TEST1(char) +TEST1(short) + +#define TEST2(TYPE1, TYPE2, FUNC) \ + void test2_##TYPE1 (unsigned TYPE1 *restrict a, unsigned TYPE1 *restrict b, \ + unsigned TYPE1 *restrict out) \ + { \ + UNROLL0 \ + for (int i = 0; i < N; i++) \ + out[i] = __builtin_##FUNC ((TYPE2) a[i] - (TYPE2) b[i]); \ + } + +TEST2(char, int, abs) +TEST2(short, int, abs) +TEST2(int, long long, llabs) + +/* { dg-final { scan-assembler-times {\mvabsdub\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mvabsduh\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mvabsduw\M} 1 } } */