[PATCH 2/2] aarch64: Add codegen support for SVE2 faminmax

saurabh.jha Fri, 13 Sep 2024 02:09:09 -0700

The AArch64 FEAT_FAMINMAX extension is optional from Armv9.2-a and
mandatory from Armv9.5-a. It introduces instructions for computing the
floating point absolute maximum and minimum of the two vectors
element-wise.


This patch adds code generation for famax and famin in terms of existing
unspecs. With this patch:
1. famax can be expressed as taking fmax/fmaxnm of the two operands and
   then taking absolute value of their result.
2. famin can be expressed as taking fmin/fminnm of the two operands and
   then taking absolute value of their result.

This fusion of operators is only possible when
-march=armv9-a+faminmax+sve flags are passed.

This code generation is only available on -O2 or -O3 as that is when
auto-vectorization is enabled.

gcc/ChangeLog:

        * config/aarch64/aarch64-sve.md
        (*aarch64_pred_faminmax_fused): Instruction pattern for faminmax
        codegen.
        * config/aarch64/iterators.md: Attribute for faminmax codegen.

gcc/testsuite/ChangeLog:

        * gcc.target/aarch64/sve/faminmax.c: New test.
---
 gcc/config/aarch64/aarch64-sve.md             | 29 +++++++
 gcc/config/aarch64/iterators.md               |  6 ++
 .../gcc.target/aarch64/sve/faminmax.c         | 85 +++++++++++++++++++
 3 files changed, 120 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/faminmax.c

diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md
index a5cd42be9d5..feb6438efde 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -11111,3 +11111,32 @@
     return "sel\t%0.<Vetype>, %3, %2.<Vetype>, %1.<Vetype>";
   }
 )
+
+;; -------------------------------------------------------------------------
+;; -- [FP] Absolute maximum and minimum
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FAMAX
+;; - FAMIN
+;; -------------------------------------------------------------------------
+
+;; Predicated floating-point absolute maximum and minimum.
+(define_insn "*aarch64_pred_faminmax_fused"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
+	(unspec:SVE_FULL_F
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
+	   (match_operand:SI 4 "aarch64_sve_gp_strictness" "w")
+	   (unspec:SVE_FULL_F
+	     [(match_operand 5)
+	      (const_int SVE_RELAXED_GP)
+	      (match_operand:SVE_FULL_F 2 "register_operand" "w")]
+	     UNSPEC_COND_FABS)
+	   (unspec:SVE_FULL_F
+	     [(match_operand 6)
+	      (const_int SVE_RELAXED_GP)
+	      (match_operand:SVE_FULL_F 3 "register_operand" "w")]
+	     UNSPEC_COND_FABS)]
+	  SVE_COND_FP_MAXMIN))]
+  "TARGET_SVE_FAMINMAX"
+  "<faminmax_cond_uns_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+)
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index b993ac9a7f6..5bdf1970f92 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -4489,5 +4489,11 @@
 (define_int_attr faminmax_uns_op
   [(UNSPEC_FAMAX "famax") (UNSPEC_FAMIN "famin")])
 
+(define_int_attr faminmax_cond_uns_op
+  [(UNSPEC_COND_FMAX "famax")
+   (UNSPEC_COND_FMAXNM "famax")
+   (UNSPEC_COND_FMIN "famin")
+   (UNSPEC_COND_FMINNM "famin")])
+
 (define_code_attr faminmax_op
   [(smax "famax") (smin "famin")])
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/faminmax.c b/gcc/testsuite/gcc.target/aarch64/sve/faminmax.c
new file mode 100644
index 00000000000..b70e19fa276
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/faminmax.c
@@ -0,0 +1,85 @@
+/* { dg-do assemble} */
+/* { dg-additional-options "-O3 -ffast-math -march=armv9-a+sve+faminmax" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "arm_sve.h"
+
+#pragma GCC target "+sve"
+
+#define TEST_FAMAX(TYPE)						\
+  void fn_famax_##TYPE (TYPE * restrict a,				\
+			TYPE * restrict b,				\
+			TYPE * restrict c,				\
+			int n) {					\
+    for (int i = 0; i < n; i++) {					\
+      TYPE temp1 = __builtin_fabs (a[i]);				\
+      TYPE temp2 = __builtin_fabs (b[i]);				\
+      c[i] = __builtin_fmax (temp1, temp2);				\
+    }									\
+  }									\
+
+#define TEST_FAMIN(TYPE)						\
+  void fn_famin_##TYPE (TYPE * restrict a,				\
+			TYPE * restrict b,				\
+			TYPE * restrict c,				\
+			int n) {					\
+    for (int i = 0; i < n; i++) {					\
+      TYPE temp1 = __builtin_fabs (a[i]);				\
+      TYPE temp2 = __builtin_fabs (b[i]);				\
+      c[i] = __builtin_fmin (temp1, temp2);				\
+    }									\
+  }									\
+
+/*
+** fn_famax_float16_t:
+**	...
+**      famax	z31.h, p6/m, z31.h, z30.h
+**	...
+**	ret
+*/
+TEST_FAMAX (float16_t)
+
+/*
+** fn_famax_float32_t:
+**	...
+**      famax	z31.s, p6/m, z31.s, z30.s
+**	...
+**	ret
+*/
+TEST_FAMAX (float32_t)
+
+/*
+** fn_famax_float64_t:
+**	...
+**      famax	z31.d, p6/m, z31.d, z30.d
+**	...
+**	ret
+*/
+TEST_FAMAX (float64_t)
+
+/*
+** fn_famin_float16_t:
+**	...
+**      famin	z31.h, p6/m, z31.h, z30.h
+**	...
+**	ret
+*/
+TEST_FAMIN (float16_t)
+
+/*
+** fn_famin_float32_t:
+**	...
+**      famin	z31.s, p6/m, z31.s, z30.s
+**	...
+**	ret
+*/
+TEST_FAMIN (float32_t)
+
+/*
+** fn_famin_float64_t:
+**	...
+**      famin	z31.d, p6/m, z31.d, z30.d
+**	...
+**	ret
+*/
+TEST_FAMIN (float64_t)

[PATCH 2/2] aarch64: Add codegen support for SVE2 faminmax

Reply via email to