The AArch64 FEAT_FAMINMAX extension introduces instructions for
computing the floating point absolute maximum and minimum of the
two vectors element-wise.
This patch adds code generation for famax and famin in terms of existing
unspecs. With this patch:
1. famax can be expressed as taking UNSPEC_COND_SMAX of the two operands
and then taking absolute value of their result.
2. famin can be expressed as taking UNSPEC_COND_SMIN of the two operands
and then taking absolute value of their result.
This fusion of operators is only possible when
-march=armv9-a+faminmax+sve flags are passed. We also need to pass
-ffast-math flag; this is what enables compiler to use UNSPEC_COND_SMAX
and UNSPEC_COND_SMIN.
This code generation is only available on -O2 or -O3 as that is when
auto-vectorization is enabled.
gcc/ChangeLog:
* config/aarch64/aarch64-sve2.md
(*aarch64_pred_faminmax_fused): Instruction pattern for faminmax
codegen.
* config/aarch64/iterators.md: Iterator and attribute for
faminmax codegen.
gcc/testsuite/ChangeLog:
* gcc.target/aarch64/sve/faminmax_1.c: New test.
* gcc.target/aarch64/sve/faminmax_2.c: New test.
---
gcc/config/aarch64/aarch64-sve2.md | 31 ++++
gcc/config/aarch64/iterators.md | 6 +
.../gcc.target/aarch64/sve/faminmax_1.c | 85 ++++++++++
.../gcc.target/aarch64/sve/faminmax_2.c | 154 ++++++++++++++++++
4 files changed, 276 insertions(+)
create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/faminmax_1.c
create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/faminmax_2.c
diff --git a/gcc/config/aarch64/aarch64-sve2.md b/gcc/config/aarch64/aarch64-sve2.md
index 972b03a4fef..6a8e940e16d 100644
--- a/gcc/config/aarch64/aarch64-sve2.md
+++ b/gcc/config/aarch64/aarch64-sve2.md
@@ -2467,6 +2467,37 @@
[(set_attr "movprfx" "yes")]
)
+;; -------------------------------------------------------------------------
+;; -- [FP] Absolute maximum and minimum
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FAMAX
+;; - FAMIN
+;; -------------------------------------------------------------------------
+;; Predicated floating-point absolute maximum and minimum.
+(define_insn "*aarch64_pred_faminmax_fused"
+ [(set (match_operand:SVE_FULL_F 0 "register_operand")
+ (unspec:SVE_FULL_F
+ [(match_operand:<VPRED> 1 "register_operand")
+ (match_operand:SI 4 "aarch64_sve_gp_strictness")
+ (unspec:SVE_FULL_F
+ [(match_operand 5)
+ (const_int SVE_RELAXED_GP)
+ (match_operand:SVE_FULL_F 2 "register_operand")]
+ UNSPEC_COND_FABS)
+ (unspec:SVE_FULL_F
+ [(match_operand 6)
+ (const_int SVE_RELAXED_GP)
+ (match_operand:SVE_FULL_F 3 "register_operand")]
+ UNSPEC_COND_FABS)]
+ SVE_COND_FP_SMAXMIN))]
+ "TARGET_SVE_FAMINMAX"
+ {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
+ [ w , Upl , %0 , w ; * ] <faminmax_cond_uns_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+ [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %2\;<faminmax_cond_uns_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+ }
+)
+
;; =========================================================================
;; == Complex arithmetic
;; =========================================================================
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index d3a457fc6d9..e9adb4209da 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -3143,6 +3143,9 @@
UNSPEC_COND_FMIN
UNSPEC_COND_FMINNM])
+(define_int_iterator SVE_COND_FP_SMAXMIN [UNSPEC_COND_SMAX
+ UNSPEC_COND_SMIN])
+
(define_int_iterator SVE_COND_FP_TERNARY [UNSPEC_COND_FMLA
UNSPEC_COND_FMLS
UNSPEC_COND_FNMLA
@@ -4503,6 +4506,9 @@
(define_int_iterator FAMINMAX_UNS [UNSPEC_FAMAX UNSPEC_FAMIN])
+(define_int_attr faminmax_cond_uns_op
+ [(UNSPEC_COND_SMAX "famax") (UNSPEC_COND_SMIN "famin")])
+
(define_int_attr faminmax_uns_op
[(UNSPEC_FAMAX "famax") (UNSPEC_FAMIN "famin")])
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/faminmax_1.c b/gcc/testsuite/gcc.target/aarch64/sve/faminmax_1.c
new file mode 100644
index 00000000000..bdf077ab2f7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/faminmax_1.c
@@ -0,0 +1,85 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O3 -ffast-math" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "arm_sve.h"
+
+#pragma GCC target "+sve+faminmax"
+
+#define TEST_FAMAX(TYPE) \
+ void fn_famax_##TYPE (TYPE * restrict a, \
+ TYPE * restrict b, \
+ TYPE * restrict c, \
+ int n) { \
+ for (int i = 0; i < n; i++) { \
+ TYPE temp1 = __builtin_fabs (a[i]); \
+ TYPE temp2 = __builtin_fabs (b[i]); \
+ c[i] = __builtin_fmax (temp1, temp2); \
+ } \
+ } \
+
+#define TEST_FAMIN(TYPE) \
+ void fn_famin_##TYPE (TYPE * restrict a, \
+ TYPE * restrict b, \
+ TYPE * restrict c, \
+ int n) { \
+ for (int i = 0; i < n; i++) { \
+ TYPE temp1 = __builtin_fabs (a[i]); \
+ TYPE temp2 = __builtin_fabs (b[i]); \
+ c[i] = __builtin_fmin (temp1, temp2); \
+ } \
+ } \
+
+/*
+** fn_famax_float16_t:
+** ...
+** famax z30.h, p6/m, z30.h, z31.h
+** ...
+** ret
+*/
+TEST_FAMAX (float16_t)
+
+/*
+** fn_famax_float32_t:
+** ...
+** famax z30.s, p6/m, z30.s, z31.s
+** ...
+** ret
+*/
+TEST_FAMAX (float32_t)
+
+/*
+** fn_famax_float64_t:
+** ...
+** famax z30.d, p6/m, z30.d, z31.d
+** ...
+** ret
+*/
+TEST_FAMAX (float64_t)
+
+/*
+** fn_famin_float16_t:
+** ...
+** famin z30.h, p6/m, z30.h, z31.h
+** ...
+** ret
+*/
+TEST_FAMIN (float16_t)
+
+/*
+** fn_famin_float32_t:
+** ...
+** famin z30.s, p6/m, z30.s, z31.s
+** ...
+** ret
+*/
+TEST_FAMIN (float32_t)
+
+/*
+** fn_famin_float64_t:
+** ...
+** famin z30.d, p6/m, z30.d, z31.d
+** ...
+** ret
+*/
+TEST_FAMIN (float64_t)
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/faminmax_2.c b/gcc/testsuite/gcc.target/aarch64/sve/faminmax_2.c
new file mode 100644
index 00000000000..26396979389
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/faminmax_2.c
@@ -0,0 +1,154 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O3 -ffast-math" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "arm_sve.h"
+
+#pragma GCC target "+sve+faminmax"
+
+#define TEST_WITH_SVMAX(TYPE) \
+ TYPE fn_fmax_##TYPE (TYPE x, TYPE y) { \
+ svbool_t pg = svptrue_b8(); \
+ return svmax_x(pg, svabs_x(pg, x), svabs_x(pg, y)); \
+ } \
+
+#define TEST_WITH_SVMAXNM(TYPE) \
+ TYPE fn_fmaxnm_##TYPE (TYPE x, TYPE y) { \
+ svbool_t pg = svptrue_b8(); \
+ return svmaxnm_x(pg, svabs_x(pg, x), svabs_x(pg, y)); \
+ } \
+
+#define TEST_WITH_SVMIN(TYPE) \
+ TYPE fn_fmin_##TYPE (TYPE x, TYPE y) { \
+ svbool_t pg = svptrue_b8(); \
+ return svmin_x(pg, svabs_x(pg, x), svabs_x(pg, y)); \
+ } \
+
+#define TEST_WITH_SVMINNM(TYPE) \
+ TYPE fn_fminnm_##TYPE (TYPE x, TYPE y) { \
+ svbool_t pg = svptrue_b8(); \
+ return svminnm_x(pg, svabs_x(pg, x), svabs_x(pg, y)); \
+ } \
+
+/*
+** fn_fmax_svfloat16_t:
+** ptrue p3.b, all
+** fabs z0.h, p3/m, z0.h
+** fabs z1.h, p3/m, z1.h
+** fmax z0.h, p3/m, z0.h, z1.h
+** ret
+*/
+TEST_WITH_SVMAX (svfloat16_t)
+
+/*
+** fn_fmax_svfloat32_t:
+** ptrue p3.b, all
+** fabs z0.s, p3/m, z0.s
+** fabs z1.s, p3/m, z1.s
+** fmax z0.s, p3/m, z0.s, z1.s
+** ret
+*/
+TEST_WITH_SVMAX (svfloat32_t)
+
+/*
+** fn_fmax_svfloat64_t:
+** ptrue p3.b, all
+** fabs z0.d, p3/m, z0.d
+** fabs z1.d, p3/m, z1.d
+** fmax z0.d, p3/m, z0.d, z1.d
+** ret
+*/
+TEST_WITH_SVMAX (svfloat64_t)
+
+/*
+** fn_fmaxnm_svfloat16_t:
+** ptrue p3.b, all
+** fabs z0.h, p3/m, z0.h
+** fabs z1.h, p3/m, z1.h
+** fmaxnm z0.h, p3/m, z0.h, z1.h
+** ret
+*/
+TEST_WITH_SVMAXNM (svfloat16_t)
+
+/*
+** fn_fmaxnm_svfloat32_t:
+** ptrue p3.b, all
+** fabs z0.s, p3/m, z0.s
+** fabs z1.s, p3/m, z1.s
+** fmaxnm z0.s, p3/m, z0.s, z1.s
+** ret
+*/
+TEST_WITH_SVMAXNM (svfloat32_t)
+
+/*
+** fn_fmaxnm_svfloat64_t:
+** ptrue p3.b, all
+** fabs z0.d, p3/m, z0.d
+** fabs z1.d, p3/m, z1.d
+** fmaxnm z0.d, p3/m, z0.d, z1.d
+** ret
+*/
+TEST_WITH_SVMAXNM (svfloat64_t)
+
+/*
+** fn_fmin_svfloat16_t:
+** ptrue p3.b, all
+** fabs z0.h, p3/m, z0.h
+** fabs z1.h, p3/m, z1.h
+** fmin z0.h, p3/m, z0.h, z1.h
+** ret
+*/
+TEST_WITH_SVMIN (svfloat16_t)
+
+/*
+** fn_fmin_svfloat32_t:
+** ptrue p3.b, all
+** fabs z0.s, p3/m, z0.s
+** fabs z1.s, p3/m, z1.s
+** fmin z0.s, p3/m, z0.s, z1.s
+** ret
+*/
+TEST_WITH_SVMIN (svfloat32_t)
+
+/*
+** fn_fmin_svfloat64_t:
+** ptrue p3.b, all
+** fabs z0.d, p3/m, z0.d
+** fabs z1.d, p3/m, z1.d
+** fmin z0.d, p3/m, z0.d, z1.d
+** ret
+*/
+TEST_WITH_SVMIN (svfloat64_t)
+
+/*
+** fn_fminnm_svfloat16_t:
+** ptrue p3.b, all
+** fabs z0.h, p3/m, z0.h
+** fabs z1.h, p3/m, z1.h
+** fminnm z0.h, p3/m, z0.h, z1.h
+** ret
+*/
+TEST_WITH_SVMINNM (svfloat16_t)
+
+/*
+** fn_fminnm_svfloat32_t:
+** ptrue p3.b, all
+** fabs z0.s, p3/m, z0.s
+** fabs z1.s, p3/m, z1.s
+** fminnm z0.s, p3/m, z0.s, z1.s
+** ret
+*/
+TEST_WITH_SVMINNM (svfloat32_t)
+
+/*
+** fn_fminnm_svfloat64_t:
+** ptrue p3.b, all
+** fabs z0.d, p3/m, z0.d
+** fabs z1.d, p3/m, z1.d
+** fminnm z0.d, p3/m, z0.d, z1.d
+** ret
+*/
+TEST_WITH_SVMINNM (svfloat64_t)
+
+/* { dg-final { scan-assembler-not {\tfamax\t} } } */
+/* { dg-final { scan-assembler-not {\tfamin\t} } } */