The AArch64 FEAT_FAMINMAX extension is optional from Armv9.2-a and mandatory from Armv9.5-a. It introduces instructions for computing the floating point absolute maximum and minimum of the two vectors element-wise.
This patch adds code generation for famax and famin in terms of existing unspecs. With this patch: 1. famax can be expressed as taking fmax/fmaxnm of the two operands and then taking absolute value of their result. 2. famin can be expressed as taking fmin/fminnm of the two operands and then taking absolute value of their result. This fusion of operators is only possible when -march=armv9-a+faminmax+sve flags are passed. This code generation is only available on -O2 or -O3 as that is when auto-vectorization is enabled. gcc/ChangeLog: * config/aarch64/aarch64-sve.md (*aarch64_pred_faminmax_fused): Instruction pattern for faminmax codegen. * config/aarch64/iterators.md: Attribute for faminmax codegen. gcc/testsuite/ChangeLog: * gcc.target/aarch64/sve/faminmax.c: New test. --- gcc/config/aarch64/aarch64-sve.md | 29 +++++++ gcc/config/aarch64/iterators.md | 6 ++ .../gcc.target/aarch64/sve/faminmax.c | 85 +++++++++++++++++++ 3 files changed, 120 insertions(+) create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/faminmax.c
diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index a5cd42be9d5..feb6438efde 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -11111,3 +11111,32 @@ return "sel\t%0.<Vetype>, %3, %2.<Vetype>, %1.<Vetype>"; } ) + +;; ------------------------------------------------------------------------- +;; -- [FP] Absolute maximum and minimum +;; ------------------------------------------------------------------------- +;; Includes: +;; - FAMAX +;; - FAMIN +;; ------------------------------------------------------------------------- + +;; Predicated floating-point absolute maximum and minimum. +(define_insn "*aarch64_pred_faminmax_fused" + [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w") + (unspec:SVE_FULL_F + [(match_operand:<VPRED> 1 "register_operand" "Upl") + (match_operand:SI 4 "aarch64_sve_gp_strictness" "w") + (unspec:SVE_FULL_F + [(match_operand 5) + (const_int SVE_RELAXED_GP) + (match_operand:SVE_FULL_F 2 "register_operand" "w")] + UNSPEC_COND_FABS) + (unspec:SVE_FULL_F + [(match_operand 6) + (const_int SVE_RELAXED_GP) + (match_operand:SVE_FULL_F 3 "register_operand" "w")] + UNSPEC_COND_FABS)] + SVE_COND_FP_MAXMIN))] + "TARGET_SVE_FAMINMAX" + "<faminmax_cond_uns_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" +) diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index b993ac9a7f6..5bdf1970f92 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -4489,5 +4489,11 @@ (define_int_attr faminmax_uns_op [(UNSPEC_FAMAX "famax") (UNSPEC_FAMIN "famin")]) +(define_int_attr faminmax_cond_uns_op + [(UNSPEC_COND_FMAX "famax") + (UNSPEC_COND_FMAXNM "famax") + (UNSPEC_COND_FMIN "famin") + (UNSPEC_COND_FMINNM "famin")]) + (define_code_attr faminmax_op [(smax "famax") (smin "famin")]) diff --git a/gcc/testsuite/gcc.target/aarch64/sve/faminmax.c b/gcc/testsuite/gcc.target/aarch64/sve/faminmax.c new file mode 100644 index 00000000000..b70e19fa276 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/faminmax.c @@ -0,0 +1,85 @@ +/* { dg-do assemble} */ +/* { dg-additional-options "-O3 -ffast-math -march=armv9-a+sve+faminmax" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "arm_sve.h" + +#pragma GCC target "+sve" + +#define TEST_FAMAX(TYPE) \ + void fn_famax_##TYPE (TYPE * restrict a, \ + TYPE * restrict b, \ + TYPE * restrict c, \ + int n) { \ + for (int i = 0; i < n; i++) { \ + TYPE temp1 = __builtin_fabs (a[i]); \ + TYPE temp2 = __builtin_fabs (b[i]); \ + c[i] = __builtin_fmax (temp1, temp2); \ + } \ + } \ + +#define TEST_FAMIN(TYPE) \ + void fn_famin_##TYPE (TYPE * restrict a, \ + TYPE * restrict b, \ + TYPE * restrict c, \ + int n) { \ + for (int i = 0; i < n; i++) { \ + TYPE temp1 = __builtin_fabs (a[i]); \ + TYPE temp2 = __builtin_fabs (b[i]); \ + c[i] = __builtin_fmin (temp1, temp2); \ + } \ + } \ + +/* +** fn_famax_float16_t: +** ... +** famax z31.h, p6/m, z31.h, z30.h +** ... +** ret +*/ +TEST_FAMAX (float16_t) + +/* +** fn_famax_float32_t: +** ... +** famax z31.s, p6/m, z31.s, z30.s +** ... +** ret +*/ +TEST_FAMAX (float32_t) + +/* +** fn_famax_float64_t: +** ... +** famax z31.d, p6/m, z31.d, z30.d +** ... +** ret +*/ +TEST_FAMAX (float64_t) + +/* +** fn_famin_float16_t: +** ... +** famin z31.h, p6/m, z31.h, z30.h +** ... +** ret +*/ +TEST_FAMIN (float16_t) + +/* +** fn_famin_float32_t: +** ... +** famin z31.s, p6/m, z31.s, z30.s +** ... +** ret +*/ +TEST_FAMIN (float32_t) + +/* +** fn_famin_float64_t: +** ... +** famin z31.d, p6/m, z31.d, z30.d +** ... +** ret +*/ +TEST_FAMIN (float64_t)