[gcc r15-3811] aarch64: Add codegen support for AdvSIMD faminmax

Saurabh Jha via Gcc-cvs Mon, 23 Sep 2024 09:33:48 -0700

https://gcc.gnu.org/g:c1fb78fb03caede01b02a1ebb3275ac98343d468


commit r15-3811-gc1fb78fb03caede01b02a1ebb3275ac98343d468
Author: Saurabh Jha <[email protected]>
Date:   Wed Aug 7 12:34:20 2024 +0100

    aarch64: Add codegen support for AdvSIMD faminmax
    
    The AArch64 FEAT_FAMINMAX extension is optional from Armv9.2-a and
    mandatory from Armv9.5-a. It introduces instructions for computing the
    floating point absolute maximum and minimum of the two vectors
    element-wise.
    
    This patch adds code generation support for famax and famin in terms of
    existing RTL operators.
    
    famax/famin is equivalent to first taking abs of the operands and then
    taking smax/smin on the results of abs.
    
            famax/famin (a, b) = smax/smin (abs (a), abs (b))
    
    This fusion of operators is only possible when -march=armv9-a+faminmax
    flags are passed. We also need to pass -ffast-math flag; if we don't,
    then a statement like
    
            c[i] = __builtin_fmaxf16 (a[i], b[i]);
    
    is RTL expanded to UNSPEC_FMAXNM instead of smax (likewise for smin).
    
    This code generation is only available on -O2 or -O3 as that is when
    auto-vectorization is enabled.
    
    gcc/ChangeLog:
    
            * config/aarch64/aarch64-simd.md
            (*aarch64_faminmax_fused): Instruction pattern for faminmax
            codegen.
            * config/aarch64/iterators.md: Attribute for faminmax codegen.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.target/aarch64/simd/faminmax-codegen-no-flag.c: New test.
            * gcc.target/aarch64/simd/faminmax-codegen.c: New test.
            * gcc.target/aarch64/simd/faminmax-no-codegen.c: New test.

Diff:
---
 gcc/config/aarch64/aarch64-simd.md                 |   9 +
 gcc/config/aarch64/iterators.md                    |   3 +
 .../aarch64/simd/faminmax-codegen-no-flag.c        | 217 +++++++++++++++++
 .../gcc.target/aarch64/simd/faminmax-codegen.c     | 197 +++++++++++++++
 .../gcc.target/aarch64/simd/faminmax-no-codegen.c  | 267 +++++++++++++++++++++
 5 files changed, 693 insertions(+)

diff --git a/gcc/config/aarch64/aarch64-simd.md 
b/gcc/config/aarch64/aarch64-simd.md
index 67f0fe26f938..2a44aa3fcc33 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -9920,3 +9920,12 @@
   "TARGET_FAMINMAX"
   "<faminmax_uns_op>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 )
+
+(define_insn "*aarch64_faminmax_fused"
+  [(set (match_operand:VHSDF 0 "register_operand" "=w")
+       (FMAXMIN:VHSDF
+         (abs:VHSDF (match_operand:VHSDF 1 "register_operand" "w"))
+         (abs:VHSDF (match_operand:VHSDF 2 "register_operand" "w"))))]
+  "TARGET_FAMINMAX"
+  "<faminmax_op>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
+)
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 17ac5e073aa1..c2fcd18306e4 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -4472,3 +4472,6 @@
 
 (define_int_attr faminmax_uns_op
   [(UNSPEC_FAMAX "famax") (UNSPEC_FAMIN "famin")])
+
+(define_code_attr faminmax_op
+  [(smax "famax") (smin "famin")])
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/faminmax-codegen-no-flag.c 
b/gcc/testsuite/gcc.target/aarch64/simd/faminmax-codegen-no-flag.c
new file mode 100644
index 000000000000..6688a7883b7d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/simd/faminmax-codegen-no-flag.c
@@ -0,0 +1,217 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O3 -ffast-math -march=armv9-a" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "arm_neon.h"
+
+#pragma GCC target "+nosve"
+
+/*
+** test_vamax_f16:
+**     fabs    v1.4h, v1.4h
+**     fabs    v0.4h, v0.4h
+**     fmaxnm  v0.4h, v0.4h, v1.4h
+**     ret
+*/
+float16x4_t
+test_vamax_f16 (float16x4_t a, float16x4_t b)
+{
+  int i;
+  float16x4_t c;
+
+  for (i = 0; i < 4; ++i) {
+    a[i] = __builtin_fabsf16 (a[i]);
+    b[i] = __builtin_fabsf16 (b[i]);
+    c[i] = __builtin_fmaxf16 (a[i], b[i]);
+  }
+  return c;
+}
+
+/*
+** test_vamaxq_f16:
+**     fabs    v1.8h, v1.8h
+**     fabs    v0.8h, v0.8h
+**     fmaxnm  v0.8h, v0.8h, v1.8h
+**     ret
+*/
+float16x8_t
+test_vamaxq_f16 (float16x8_t a, float16x8_t b)
+{
+  int i;
+  float16x8_t c;
+
+  for (i = 0; i < 8; ++i) {
+    a[i] = __builtin_fabsf16 (a[i]);
+    b[i] = __builtin_fabsf16 (b[i]);
+    c[i] = __builtin_fmaxf16 (a[i], b[i]);
+  }
+  return c;
+}
+
+/*
+** test_vamax_f32:
+**     fabs    v1.2s, v1.2s
+**     fabs    v0.2s, v0.2s
+**     fmaxnm  v0.2s, v0.2s, v1.2s
+**     ret
+*/
+float32x2_t
+test_vamax_f32 (float32x2_t a, float32x2_t b)
+{
+  int i;
+  float32x2_t c;
+
+  for (i = 0; i < 2; ++i) {
+    a[i] = __builtin_fabsf32 (a[i]);
+    b[i] = __builtin_fabsf32 (b[i]);
+    c[i] = __builtin_fmaxf32 (a[i], b[i]);
+  }
+  return c;
+}
+
+/*
+** test_vamaxq_f32:
+**     fabs    v1.4s, v1.4s
+**     fabs    v0.4s, v0.4s
+**     fmaxnm  v0.4s, v0.4s, v1.4s
+**     ret
+*/
+float32x4_t
+test_vamaxq_f32 (float32x4_t a, float32x4_t b)
+{
+  int i;
+  float32x4_t c;
+
+  for (i = 0; i < 4; ++i) {
+    a[i] = __builtin_fabsf32 (a[i]);
+    b[i] = __builtin_fabsf32 (b[i]);
+    c[i] = __builtin_fmaxf32 (a[i], b[i]);
+  }
+  return c;
+}
+
+/*
+** test_vamaxq_f64:
+**     fabs    v1.2d, v1.2d
+**     fabs    v0.2d, v0.2d
+**     fmaxnm  v0.2d, v0.2d, v1.2d
+**     ret
+*/
+float64x2_t
+test_vamaxq_f64 (float64x2_t a, float64x2_t b)
+{
+  int i;
+  float64x2_t c;
+
+  for (i = 0; i < 2; ++i) {
+    a[i] = __builtin_fabsf64 (a[i]);
+    b[i] = __builtin_fabsf64 (b[i]);
+    c[i] = __builtin_fmaxf64 (a[i], b[i]);
+  }
+  return c;
+}
+
+/*
+** test_vamin_f16:
+**     fabs    v1.4h, v1.4h
+**     fabs    v0.4h, v0.4h
+**     fminnm  v0.4h, v0.4h, v1.4h
+**     ret
+*/
+float16x4_t
+test_vamin_f16 (float16x4_t a, float16x4_t b)
+{
+  int i;
+  float16x4_t c;
+
+  for (i = 0; i < 4; ++i) {
+    a[i] = __builtin_fabsf16 (a[i]);
+    b[i] = __builtin_fabsf16 (b[i]);
+    c[i] = __builtin_fminf16 (a[i], b[i]);
+  }
+  return c;
+}
+
+/*
+** test_vaminq_f16:
+**     fabs    v1.8h, v1.8h
+**     fabs    v0.8h, v0.8h
+**     fminnm  v0.8h, v0.8h, v1.8h
+**     ret
+*/
+float16x8_t
+test_vaminq_f16 (float16x8_t a, float16x8_t b)
+{
+  int i;
+  float16x8_t c;
+
+  for (i = 0; i < 8; ++i) {
+    a[i] = __builtin_fabsf16 (a[i]);
+    b[i] = __builtin_fabsf16 (b[i]);
+    c[i] = __builtin_fminf16 (a[i], b[i]);
+  }
+  return c;
+}
+
+/*
+** test_vamin_f32:
+**     fabs    v1.2s, v1.2s
+**     fabs    v0.2s, v0.2s
+**     fminnm  v0.2s, v0.2s, v1.2s
+**     ret
+*/
+float32x2_t
+test_vamin_f32 (float32x2_t a, float32x2_t b)
+{
+  int i;
+  float32x2_t c;
+
+  for (i = 0; i < 2; ++i) {
+    a[i] = __builtin_fabsf32 (a[i]);
+    b[i] = __builtin_fabsf32 (b[i]);
+    c[i] = __builtin_fminf32 (a[i], b[i]);
+  }
+  return c;
+}
+
+/*
+** test_vaminq_f32:
+**     fabs    v1.4s, v1.4s
+**     fabs    v0.4s, v0.4s
+**     fminnm  v0.4s, v0.4s, v1.4s
+**     ret
+*/
+float32x4_t
+test_vaminq_f32 (float32x4_t a, float32x4_t b)
+{
+  int i;
+  float32x4_t c;
+
+  for (i = 0; i < 4; ++i) {
+    a[i] = __builtin_fabsf32 (a[i]);
+    b[i] = __builtin_fabsf32 (b[i]);
+    c[i] = __builtin_fminf32 (a[i], b[i]);
+  }
+  return c;
+}
+
+/*
+** test_vaminq_f64:
+**     fabs    v1.2d, v1.2d
+**     fabs    v0.2d, v0.2d
+**     fminnm  v0.2d, v0.2d, v1.2d
+**     ret
+*/
+float64x2_t
+test_vaminq_f64 (float64x2_t a, float64x2_t b)
+{
+  int i;
+  float64x2_t c;
+
+  for (i = 0; i < 2; ++i) {
+    a[i] = __builtin_fabsf64 (a[i]);
+    b[i] = __builtin_fabsf64 (b[i]);
+    c[i] = __builtin_fminf64 (a[i], b[i]);
+  }
+  return c;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/faminmax-codegen.c 
b/gcc/testsuite/gcc.target/aarch64/simd/faminmax-codegen.c
new file mode 100644
index 000000000000..d77bd9052308
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/simd/faminmax-codegen.c
@@ -0,0 +1,197 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O2 -ffast-math -march=armv9-a+faminmax" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "arm_neon.h"
+
+#pragma GCC target "+nosve"
+
+/*
+** test_vamax_f16:
+**     famax   v0.4h, v1.4h, v0.4h
+**     ret
+*/
+float16x4_t
+test_vamax_f16 (float16x4_t a, float16x4_t b)
+{
+  int i;
+  float16x4_t c;
+
+  for (i = 0; i < 4; ++i) {
+    a[i] = __builtin_fabsf16 (a[i]);
+    b[i] = __builtin_fabsf16 (b[i]);
+    c[i] = __builtin_fmaxf16 (a[i], b[i]);
+  }
+  return c;
+}
+
+/*
+** test_vamaxq_f16:
+**     famax   v0.8h, v1.8h, v0.8h
+**     ret
+*/
+float16x8_t
+test_vamaxq_f16 (float16x8_t a, float16x8_t b)
+{
+  int i;
+  float16x8_t c;
+
+  for (i = 0; i < 8; ++i) {
+    a[i] = __builtin_fabsf16 (a[i]);
+    b[i] = __builtin_fabsf16 (b[i]);
+    c[i] = __builtin_fmaxf16 (a[i], b[i]);
+  }
+  return c;
+}
+
+/*
+** test_vamax_f32:
+**     famax   v0.2s, v1.2s, v0.2s
+**     ret
+*/
+float32x2_t
+test_vamax_f32 (float32x2_t a, float32x2_t b)
+{
+  int i;
+  float32x2_t c;
+
+  for (i = 0; i < 2; ++i) {
+    a[i] = __builtin_fabsf32 (a[i]);
+    b[i] = __builtin_fabsf32 (b[i]);
+    c[i] = __builtin_fmaxf32 (a[i], b[i]);
+  }
+  return c;
+}
+
+/*
+** test_vamaxq_f32:
+**     famax   v0.4s, v1.4s, v0.4s
+**     ret
+*/
+float32x4_t
+test_vamaxq_f32 (float32x4_t a, float32x4_t b)
+{
+  int i;
+  float32x4_t c;
+
+  for (i = 0; i < 4; ++i) {
+    a[i] = __builtin_fabsf32 (a[i]);
+    b[i] = __builtin_fabsf32 (b[i]);
+    c[i] = __builtin_fmaxf32 (a[i], b[i]);
+  }
+  return c;
+}
+
+/*
+** test_vamaxq_f64:
+**     famax   v0.2d, v1.2d, v0.2d
+**     ret
+*/
+float64x2_t
+test_vamaxq_f64 (float64x2_t a, float64x2_t b)
+{
+  int i;
+  float64x2_t c;
+
+  for (i = 0; i < 2; ++i) {
+    a[i] = __builtin_fabsf64 (a[i]);
+    b[i] = __builtin_fabsf64 (b[i]);
+    c[i] = __builtin_fmaxf64 (a[i], b[i]);
+  }
+  return c;
+}
+
+/*
+** test_vamin_f16:
+**     famin   v0.4h, v1.4h, v0.4h
+**     ret
+*/
+float16x4_t
+test_vamin_f16 (float16x4_t a, float16x4_t b)
+{
+  int i;
+  float16x4_t c;
+
+  for (i = 0; i < 4; ++i) {
+    a[i] = __builtin_fabsf16 (a[i]);
+    b[i] = __builtin_fabsf16 (b[i]);
+    c[i] = __builtin_fminf16 (a[i], b[i]);
+  }
+  return c;
+}
+
+/*
+** test_vaminq_f16:
+**     famin   v0.8h, v1.8h, v0.8h
+**     ret
+*/
+float16x8_t
+test_vaminq_f16 (float16x8_t a, float16x8_t b)
+{
+  int i;
+  float16x8_t c;
+
+  for (i = 0; i < 8; ++i) {
+    a[i] = __builtin_fabsf16 (a[i]);
+    b[i] = __builtin_fabsf16 (b[i]);
+    c[i] = __builtin_fminf16 (a[i], b[i]);
+  }
+  return c;
+}
+
+/*
+** test_vamin_f32:
+**     famin   v0.2s, v1.2s, v0.2s
+**     ret
+*/
+float32x2_t
+test_vamin_f32 (float32x2_t a, float32x2_t b)
+{
+  int i;
+  float32x2_t c;
+
+  for (i = 0; i < 2; ++i) {
+    a[i] = __builtin_fabsf32 (a[i]);
+    b[i] = __builtin_fabsf32 (b[i]);
+    c[i] = __builtin_fminf32 (a[i], b[i]);
+  }
+  return c;
+}
+
+/*
+** test_vaminq_f32:
+**     famin   v0.4s, v1.4s, v0.4s
+**     ret
+*/
+float32x4_t
+test_vaminq_f32 (float32x4_t a, float32x4_t b)
+{
+  int i;
+  float32x4_t c;
+
+  for (i = 0; i < 4; ++i) {
+    a[i] = __builtin_fabsf32 (a[i]);
+    b[i] = __builtin_fabsf32 (b[i]);
+    c[i] = __builtin_fminf32 (a[i], b[i]);
+  }
+  return c;
+}
+
+/*
+** test_vaminq_f64:
+**     famin   v0.2d, v1.2d, v0.2d
+**     ret
+*/
+float64x2_t
+test_vaminq_f64 (float64x2_t a, float64x2_t b)
+{
+  int i;
+  float64x2_t c;
+
+  for (i = 0; i < 2; ++i) {
+    a[i] = __builtin_fabsf64 (a[i]);
+    b[i] = __builtin_fabsf64 (b[i]);
+    c[i] = __builtin_fminf64 (a[i], b[i]);
+  }
+  return c;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/faminmax-no-codegen.c 
b/gcc/testsuite/gcc.target/aarch64/simd/faminmax-no-codegen.c
new file mode 100644
index 000000000000..3ec0a52b5224
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/simd/faminmax-no-codegen.c
@@ -0,0 +1,267 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O2 -ffast-math -march=armv9-a+faminmax" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "arm_neon.h"
+
+#pragma GCC target "+nosve"
+
+/*
+** test_abs_max_f16:
+**     fabs    v1.4h, v1.4h
+**     fabs    v0.4h, v0.4h
+**     fmax    v0.4h, v0.4h, v1.4h
+**     ret
+*/
+float16x4_t
+test_abs_max_f16 (float16x4_t a, float16x4_t b)
+{
+  return vmax_f16 (vabs_f16 (a), vabs_f16 (b));
+}
+
+/*
+** test_abs_maxnm_f16:
+**     fabs    v1.4h, v1.4h
+**     fabs    v0.4h, v0.4h
+**     fmaxnm  v0.4h, v0.4h, v1.4h
+**     ret
+*/
+float16x4_t
+test_abs_maxnm_f16 (float16x4_t a, float16x4_t b)
+{
+  return vmaxnm_f16 (vabs_f16 (a), vabs_f16 (b));
+}
+
+/*
+** test_abs_maxq_f16:
+**     fabs    v1.8h, v1.8h
+**     fabs    v0.8h, v0.8h
+**     fmax    v0.8h, v0.8h, v1.8h
+**     ret
+*/
+float16x8_t
+test_abs_maxq_f16 (float16x8_t a, float16x8_t b)
+{
+  return vmaxq_f16 (vabsq_f16 (a), vabsq_f16 (b));
+}
+
+/*
+** test_abs_maxnmq_f16:
+**     fabs    v1.8h, v1.8h
+**     fabs    v0.8h, v0.8h
+**     fmaxnm  v0.8h, v0.8h, v1.8h
+**     ret
+*/
+float16x8_t
+test_abs_maxnmq_f16 (float16x8_t a, float16x8_t b)
+{
+  return vmaxnmq_f16 (vabsq_f16 (a), vabsq_f16 (b));
+}
+
+/*
+** test_abs_max_f32:
+**     fabs    v1.2s, v1.2s
+**     fabs    v0.2s, v0.2s
+**     fmax    v0.2s, v0.2s, v1.2s
+**     ret
+*/
+float32x2_t
+test_abs_max_f32 (float32x2_t a, float32x2_t b)
+{
+  return vmax_f32 (vabs_f32 (a), vabs_f32 (b));
+}
+
+/*
+** test_abs_maxnm_f32:
+**     fabs    v1.2s, v1.2s
+**     fabs    v0.2s, v0.2s
+**     fmaxnm  v0.2s, v0.2s, v1.2s
+**     ret
+*/
+float32x2_t
+test_abs_maxnm_f32 (float32x2_t a, float32x2_t b)
+{
+  return vmaxnm_f32 (vabs_f32 (a), vabs_f32 (b));
+}
+
+/*
+** test_abs_maxq_f32:
+**     fabs    v1.4s, v1.4s
+**     fabs    v0.4s, v0.4s
+**     fmax    v0.4s, v0.4s, v1.4s
+**     ret
+*/
+float32x4_t
+test_abs_maxq_f32 (float32x4_t a, float32x4_t b)
+{
+  return vmaxq_f32 (vabsq_f32 (a), vabsq_f32 (b));
+}
+
+/*
+** test_abs_maxnmq_f32:
+**     fabs    v1.4s, v1.4s
+**     fabs    v0.4s, v0.4s
+**     fmaxnm  v0.4s, v0.4s, v1.4s
+**     ret
+*/
+float32x4_t
+test_abs_maxnmq_f32 (float32x4_t a, float32x4_t b)
+{
+  return vmaxnmq_f32 (vabsq_f32 (a), vabsq_f32 (b));
+}
+
+/*
+** test_abs_maxq_f64:
+**     fabs    v1.2d, v1.2d
+**     fabs    v0.2d, v0.2d
+**     fmax    v0.2d, v0.2d, v1.2d
+**     ret
+*/
+float64x2_t
+test_abs_maxq_f64 (float64x2_t a, float64x2_t b)
+{
+  return vmaxq_f64 (vabsq_f64 (a), vabsq_f64 (b));
+}
+
+/*
+** test_abs_maxnmq_f64:
+**     fabs    v1.2d, v1.2d
+**     fabs    v0.2d, v0.2d
+**     fmaxnm  v0.2d, v0.2d, v1.2d
+**     ret
+*/
+float64x2_t
+test_abs_maxnmq_f64 (float64x2_t a, float64x2_t b)
+{
+  return vmaxnmq_f64 (vabsq_f64 (a), vabsq_f64 (b));
+}
+
+/*
+** test_abs_min_f16:
+**     fabs    v1.4h, v1.4h
+**     fabs    v0.4h, v0.4h
+**     fmin    v0.4h, v0.4h, v1.4h
+**     ret
+*/
+float16x4_t
+test_abs_min_f16 (float16x4_t a, float16x4_t b)
+{
+  return vmin_f16 (vabs_f16 (a), vabs_f16 (b));
+}
+
+/*
+** test_abs_minnm_f16:
+**     fabs    v1.4h, v1.4h
+**     fabs    v0.4h, v0.4h
+**     fminnm  v0.4h, v0.4h, v1.4h
+**     ret
+*/
+float16x4_t
+test_abs_minnm_f16 (float16x4_t a, float16x4_t b)
+{
+  return vminnm_f16 (vabs_f16 (a), vabs_f16 (b));
+}
+
+/*
+** test_abs_minq_f16:
+**     fabs    v1.8h, v1.8h
+**     fabs    v0.8h, v0.8h
+**     fmin    v0.8h, v0.8h, v1.8h
+**     ret
+*/
+float16x8_t
+test_abs_minq_f16 (float16x8_t a, float16x8_t b)
+{
+  return vminq_f16 (vabsq_f16 (a), vabsq_f16 (b));
+}
+
+/*
+** test_abs_minnmq_f16:
+**     fabs    v1.8h, v1.8h
+**     fabs    v0.8h, v0.8h
+**     fminnm  v0.8h, v0.8h, v1.8h
+**     ret
+*/
+float16x8_t
+test_abs_minnmq_f16 (float16x8_t a, float16x8_t b)
+{
+  return vminnmq_f16 (vabsq_f16 (a), vabsq_f16 (b));
+}
+
+/*
+** test_abs_min_f32:
+**     fabs    v1.2s, v1.2s
+**     fabs    v0.2s, v0.2s
+**     fmin    v0.2s, v0.2s, v1.2s
+**     ret
+*/
+float32x2_t
+test_abs_min_f32 (float32x2_t a, float32x2_t b)
+{
+  return vmin_f32 (vabs_f32 (a), vabs_f32 (b));
+}
+
+/*
+** test_abs_minnm_f32:
+**     fabs    v1.2s, v1.2s
+**     fabs    v0.2s, v0.2s
+**     fminnm  v0.2s, v0.2s, v1.2s
+**     ret
+*/
+float32x2_t
+test_abs_minnm_f32 (float32x2_t a, float32x2_t b)
+{
+  return vminnm_f32 (vabs_f32 (a), vabs_f32 (b));
+}
+
+/*
+** test_abs_minq_f32:
+**     fabs    v1.4s, v1.4s
+**     fabs    v0.4s, v0.4s
+**     fmin    v0.4s, v0.4s, v1.4s
+**     ret
+*/
+float32x4_t
+test_abs_minq_f32 (float32x4_t a, float32x4_t b)
+{
+  return vminq_f32 (vabsq_f32 (a), vabsq_f32 (b));
+}
+
+/*
+** test_abs_minnmq_f32:
+**     fabs    v1.4s, v1.4s
+**     fabs    v0.4s, v0.4s
+**     fminnm  v0.4s, v0.4s, v1.4s
+**     ret
+*/
+float32x4_t
+test_abs_minnmq_f32 (float32x4_t a, float32x4_t b)
+{
+  return vminnmq_f32 (vabsq_f32 (a), vabsq_f32 (b));
+}
+
+/*
+** test_abs_minq_f64:
+**     fabs    v1.2d, v1.2d
+**     fabs    v0.2d, v0.2d
+**     fmin    v0.2d, v0.2d, v1.2d
+**     ret
+*/
+float64x2_t
+test_abs_minq_f64 (float64x2_t a, float64x2_t b)
+{
+  return vminq_f64 (vabsq_f64 (a), vabsq_f64 (b));
+}
+
+/*
+** test_abs_minnmq_f64:
+**     fabs    v1.2d, v1.2d
+**     fabs    v0.2d, v0.2d
+**     fminnm  v0.2d, v0.2d, v1.2d
+**     ret
+*/
+float64x2_t
+test_abs_minnmq_f64 (float64x2_t a, float64x2_t b)
+{
+  return vminnmq_f64 (vabsq_f64 (a), vabsq_f64 (b));
+}

[gcc r15-3811] aarch64: Add codegen support for AdvSIMD faminmax

Reply via email to