[gcc r15-4261] aarch64: Add SVE2 faminmax intrinsics

2024-10-11 Thread Saurabh Jha via Gcc-cvs
https://gcc.gnu.org/g:1a6a8cb1a72b282c418cd143b132de6e67b5d62b

commit r15-4261-g1a6a8cb1a72b282c418cd143b132de6e67b5d62b
Author: Saurabh Jha 
Date:   Wed Sep 25 22:08:33 2024 +

aarch64: Add SVE2 faminmax intrinsics

The AArch64 FEAT_FAMINMAX extension introduces instructions for
computing the floating point absolute maximum and minimum of the
two vectors element-wise.

This patch introduces SVE2 faminmax intrinsics. The intrinsics of this
extension are implemented as the following builtin functions:
* sva[max|min]_[m|x|z]
* sva[max|min]_[f16|f32|f64]_[m|x|z]
* sva[max|min]_n_[f16|f32|f64]_[m|x|z]

gcc/ChangeLog:

* config/aarch64/aarch64-sve-builtins-base.cc
(svamax): Absolute maximum declaration.
(svamin): Absolute minimum declaration.
* config/aarch64/aarch64-sve-builtins-base.def
(REQUIRED_EXTENSIONS): Add faminmax intrinsics behind a flag.
(svamax): Absolute maximum declaration.
(svamin): Absolute minimum declaration.
* config/aarch64/aarch64-sve-builtins-base.h: Declaring function
bases for the new intrinsics.
* config/aarch64/aarch64.h
(TARGET_SVE_FAMINMAX): New flag for SVE2 faminmax.
* config/aarch64/iterators.md: New unspecs, iterators, and attrs
for the new intrinsics.

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/sve2/acle/asm/amax_f16.c: New test.
* gcc.target/aarch64/sve2/acle/asm/amax_f32.c: New test.
* gcc.target/aarch64/sve2/acle/asm/amax_f64.c: New test.
* gcc.target/aarch64/sve2/acle/asm/amin_f16.c: New test.
* gcc.target/aarch64/sve2/acle/asm/amin_f32.c: New test.
* gcc.target/aarch64/sve2/acle/asm/amin_f64.c: New test.

Diff:
---
 gcc/config/aarch64/aarch64-sve-builtins-base.cc|   4 +
 gcc/config/aarch64/aarch64-sve-builtins-base.def   |   5 +
 gcc/config/aarch64/aarch64-sve-builtins-base.h |   2 +
 gcc/config/aarch64/aarch64.h   |   1 +
 gcc/config/aarch64/iterators.md|  18 +-
 .../gcc.target/aarch64/sve2/acle/asm/amax_f16.c| 431 +
 .../gcc.target/aarch64/sve2/acle/asm/amax_f32.c| 431 +
 .../gcc.target/aarch64/sve2/acle/asm/amax_f64.c| 431 +
 .../gcc.target/aarch64/sve2/acle/asm/amin_f16.c| 431 +
 .../gcc.target/aarch64/sve2/acle/asm/amin_f32.c| 431 +
 .../gcc.target/aarch64/sve2/acle/asm/amin_f64.c| 431 +
 11 files changed, 2615 insertions(+), 1 deletion(-)

diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc 
b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
index 4b33585d9814..b189818d6430 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
@@ -3071,6 +3071,10 @@ FUNCTION (svadrb, svadr_bhwd_impl, (0))
 FUNCTION (svadrd, svadr_bhwd_impl, (3))
 FUNCTION (svadrh, svadr_bhwd_impl, (1))
 FUNCTION (svadrw, svadr_bhwd_impl, (2))
+FUNCTION (svamax, cond_or_uncond_unspec_function,
+ (UNSPEC_COND_FAMAX, UNSPEC_FAMAX))
+FUNCTION (svamin, cond_or_uncond_unspec_function,
+ (UNSPEC_COND_FAMIN, UNSPEC_FAMIN))
 FUNCTION (svand, rtx_code_function, (AND, AND))
 FUNCTION (svandv, reduction, (UNSPEC_ANDV))
 FUNCTION (svasr, rtx_code_function, (ASHIFTRT, ASHIFTRT))
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.def 
b/gcc/config/aarch64/aarch64-sve-builtins-base.def
index 65fcba915866..95e04e4393d2 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-base.def
+++ b/gcc/config/aarch64/aarch64-sve-builtins-base.def
@@ -379,3 +379,8 @@ DEF_SVE_FUNCTION (svzip2q, binary, all_data, none)
 DEF_SVE_FUNCTION (svld1ro, load_replicate, all_data, implicit)
 DEF_SVE_FUNCTION (svmmla, mmla, d_float, none)
 #undef REQUIRED_EXTENSIONS
+
+#define REQUIRED_EXTENSIONS AARCH64_FL_SVE | AARCH64_FL_FAMINMAX
+DEF_SVE_FUNCTION (svamax, binary_opt_single_n, all_float, mxz)
+DEF_SVE_FUNCTION (svamin, binary_opt_single_n, all_float, mxz)
+#undef REQUIRED_EXTENSIONS
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.h 
b/gcc/config/aarch64/aarch64-sve-builtins-base.h
index 5bbf3569c4b4..978cf7013f92 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-base.h
+++ b/gcc/config/aarch64/aarch64-sve-builtins-base.h
@@ -37,6 +37,8 @@ namespace aarch64_sve
 extern const function_base *const svadrd;
 extern const function_base *const svadrh;
 extern const function_base *const svadrw;
+extern const function_base *const svamax;
+extern const function_base *const svamin;
 extern const function_base *const svand;
 extern const function_base *const svandv;
 extern const function_base *const svasr;
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 030cffb17606..593319fd4723 100644
--- a/gcc/c

[gcc r15-4262] aarch64: Add codegen support for SVE2 faminmax

2024-10-11 Thread Saurabh Jha via Gcc-cvs
https://gcc.gnu.org/g:914f4f86e6cb1e570a1928fccde1dbbfc362430b

commit r15-4262-g914f4f86e6cb1e570a1928fccde1dbbfc362430b
Author: Saurabh Jha 
Date:   Mon Sep 30 14:38:32 2024 +

aarch64: Add codegen support for SVE2 faminmax

The AArch64 FEAT_FAMINMAX extension introduces instructions for
computing the floating point absolute maximum and minimum of the
two vectors element-wise.

This patch adds code generation for famax and famin in terms of existing
unspecs. With this patch:
1. famax can be expressed as taking UNSPEC_COND_SMAX of the two operands
   and then taking absolute value of their result.
2. famin can be expressed as taking UNSPEC_COND_SMIN of the two operands
   and then taking absolute value of their result.

This fusion of operators is only possible when
-march=armv9-a+faminmax+sve flags are passed. We also need to pass
-ffast-math flag; this is what enables compiler to use UNSPEC_COND_SMAX
and UNSPEC_COND_SMIN.

This code generation is only available on -O2 or -O3 as that is when
auto-vectorization is enabled.

gcc/ChangeLog:

* config/aarch64/aarch64-sve2.md
(*aarch64_pred_faminmax_fused): Instruction pattern for faminmax
codegen.
* config/aarch64/iterators.md: Iterator and attribute for
faminmax codegen.

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/sve/faminmax_1.c: New test.
* gcc.target/aarch64/sve/faminmax_2.c: New test.

Diff:
---
 gcc/config/aarch64/aarch64-sve2.md| 37 ++
 gcc/config/aarch64/iterators.md   |  6 +++
 gcc/testsuite/gcc.target/aarch64/sve/faminmax_1.c | 44 +
 gcc/testsuite/gcc.target/aarch64/sve/faminmax_2.c | 60 +++
 4 files changed, 147 insertions(+)

diff --git a/gcc/config/aarch64/aarch64-sve2.md 
b/gcc/config/aarch64/aarch64-sve2.md
index 725092cc95f0..5f2697c31797 100644
--- a/gcc/config/aarch64/aarch64-sve2.md
+++ b/gcc/config/aarch64/aarch64-sve2.md
@@ -2467,6 +2467,43 @@
   [(set_attr "movprfx" "yes")]
 )
 
+;; -
+;; -- [FP] Absolute maximum and minimum
+;; -
+;; Includes:
+;; - FAMAX
+;; - FAMIN
+;; -
+;; Predicated floating-point absolute maximum and minimum.
+(define_insn_and_rewrite "*aarch64_pred_faminmax_fused"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand")
+   (unspec:SVE_FULL_F
+ [(match_operand: 1 "register_operand")
+  (match_operand:SI 4 "aarch64_sve_gp_strictness")
+  (unspec:SVE_FULL_F
+[(match_operand 5)
+ (const_int SVE_RELAXED_GP)
+ (match_operand:SVE_FULL_F 2 "register_operand")]
+UNSPEC_COND_FABS)
+  (unspec:SVE_FULL_F
+[(match_operand 6)
+ (const_int SVE_RELAXED_GP)
+ (match_operand:SVE_FULL_F 3 "register_operand")]
+UNSPEC_COND_FABS)]
+ SVE_COND_SMAXMIN))]
+  "TARGET_SVE_FAMINMAX"
+  {@ [ cons: =0 , 1   , 2  , 3 ; attrs: movprfx ]
+ [ w, Upl , %0 , w ; *  ] 
\t%0., %1/m, %0., %3.
+ [ ?&w  , Upl , w  , w ; yes] movprfx\t%0, 
%2\;\t%0., %1/m, %0., %3.
+  }
+  "&& (!rtx_equal_p (operands[1], operands[5])
+   || !rtx_equal_p (operands[1], operands[6]))"
+  {
+operands[5] = copy_rtx (operands[1]);
+operands[6] = copy_rtx (operands[1]);
+  }
+)
+
 ;; =
 ;; == Complex arithmetic
 ;; =
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index a04f9f9eb3f9..efba78375c26 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -3142,6 +3142,9 @@
 UNSPEC_COND_SMAX
 UNSPEC_COND_SMIN])
 
+(define_int_iterator SVE_COND_SMAXMIN [UNSPEC_COND_SMAX
+  UNSPEC_COND_SMIN])
+
 (define_int_iterator SVE_COND_FP_TERNARY [UNSPEC_COND_FMLA
  UNSPEC_COND_FMLS
  UNSPEC_COND_FNMLA
@@ -4502,6 +4505,9 @@
 
 (define_int_iterator FAMINMAX_UNS [UNSPEC_FAMAX UNSPEC_FAMIN])
 
+(define_int_attr faminmax_cond_uns_op
+  [(UNSPEC_COND_SMAX "famax") (UNSPEC_COND_SMIN "famin")])
+
 (define_int_attr faminmax_uns_op
   [(UNSPEC_FAMAX "famax") (UNSPEC_FAMIN "famin")])
 
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/faminmax_1.c 
b/gcc/testsuite/gcc.target/aarch64/sve/faminmax_1.c
new file mode 100644
index ..3b65ccea0656
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/faminmax_1.c
@@ -0,0 +1,

[gcc r15-4079] aarch64: Fix bug with max/min (PR116934)

2024-10-04 Thread Saurabh Jha via Gcc-cvs
https://gcc.gnu.org/g:20ce363c557d6458ec3193ab4e7df760fbe34976

commit r15-4079-g20ce363c557d6458ec3193ab4e7df760fbe34976
Author: Saurabh Jha 
Date:   Thu Oct 3 13:16:31 2024 +

aarch64: Fix bug with max/min (PR116934)

In ac4cdf5cb43c0b09e81760e2a1902ceebcf1a135, I introduced a bug where
I put the new unspecs, UNSPEC_COND_SMAX and UNSPEC_COND_SMIN, into the
wrong iterator.

I should have put new unspecs in SVE_COND_FP_MAXMIN but I put it in
SVE_COND_FP_BINARY_REG instead. That was incorrect because the
SVE_COND_FP_MAXMIN iterator is being used for predicated floating-point
maximum/minimum, not SVE_COND_FP_BINARY_REG.

Also added a testcase to validate the new change.

Regression tested on aarch64-unknown-linux-gnu and found no regressions.
There are some test cases with "libitm" in their directory names which
appear in compare_tests output as changed tests but it looks like they
are in the output just because of changed build directories, like from
build-patched/aarch64-unknown-linux-gnu/./libitm/* to
build-pristine/aarch64-unknown-linux-gnu/./libitm/*. I didn't think it
was a cause of concern and have pushed this for review.

gcc/ChangeLog:

PR target/116934
* config/aarch64/iterators.md: Move UNSPEC_COND_SMAX and
UNSPEC_COND_SMIN to correct iterators.

gcc/testsuite/ChangeLog:

PR target/116934
* gcc.target/aarch64/sve2/pr116934.c: New test.

Diff:
---
 gcc/config/aarch64/iterators.md  |  8 
 gcc/testsuite/gcc.target/aarch64/sve2/pr116934.c | 13 +
 2 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 0836dee61c9f..fcad236eee9f 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -3125,9 +3125,7 @@
 
 (define_int_iterator SVE_COND_FP_BINARY_REG
   [UNSPEC_COND_FDIV
-   UNSPEC_COND_FMULX
-   UNSPEC_COND_SMAX
-   UNSPEC_COND_SMIN])
+   UNSPEC_COND_FMULX])
 
 (define_int_iterator SVE_COND_FCADD [UNSPEC_COND_FCADD90
 UNSPEC_COND_FCADD270])
@@ -3135,7 +3133,9 @@
 (define_int_iterator SVE_COND_FP_MAXMIN [UNSPEC_COND_FMAX
 UNSPEC_COND_FMAXNM
 UNSPEC_COND_FMIN
-UNSPEC_COND_FMINNM])
+UNSPEC_COND_FMINNM
+UNSPEC_COND_SMAX
+UNSPEC_COND_SMIN])
 
 (define_int_iterator SVE_COND_FP_TERNARY [UNSPEC_COND_FMLA
  UNSPEC_COND_FMLS
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/pr116934.c 
b/gcc/testsuite/gcc.target/aarch64/sve2/pr116934.c
new file mode 100644
index ..94fb96ffa7db
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/pr116934.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-Ofast -mcpu=neoverse-v2" } */
+
+int a;
+float *b;
+
+void foo() {
+  for (; a; a--, b += 4) {
+b[0] = b[1] = b[2] = b[2] > 0 ?: 0;
+if (b[3] < 0)
+  b[3] = 0;
+  }
+}


[gcc r15-3795] Add myself to write after approval

2024-09-23 Thread Saurabh Jha via Gcc-cvs
https://gcc.gnu.org/g:346f767fff859dd7fdd79b7f5e150d344e0f288c

commit r15-3795-g346f767fff859dd7fdd79b7f5e150d344e0f288c
Author: Saurabh Jha 
Date:   Mon Sep 23 12:30:50 2024 +0100

Add myself to write after approval

ChangeLog:

* MAINTAINERS: Add myself to write after approval.

Diff:
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index e9fafaf45a7e..0ea4db20f882 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -557,6 +557,7 @@ Andrew Jenner   andrewjenner

 Haochen Jiang   -   
 Qian Jianhua-   
 Michal Jiresmjires  
+Saurabh Jha -   
 Janis Johnson   janis   
 Teresa Johnson  tejohnson   
 Kean Johnston   -   


[gcc r15-3809] [MAINTAINERS] Fix myself in order and add username

2024-09-23 Thread Saurabh Jha via Gcc-cvs
https://gcc.gnu.org/g:6141d0c98a518148a8a8c35dabd8ba053fbebf18

commit r15-3809-g6141d0c98a518148a8a8c35dabd8ba053fbebf18
Author: Saurabh Jha 
Date:   Mon Sep 23 16:17:47 2024 +0100

[MAINTAINERS] Fix myself in order and add username

ChangeLog:

* MAINTAINERS: Fix sort order and add username.

Diff:
---
 MAINTAINERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 0ea4db20f882..3b4cf9d20d80 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -554,10 +554,10 @@ Sam James   sjames  

 Surya Kumari Jangalajskumari
 Jakub Jelinek   jakub   
 Andrew Jenner   andrewjenner
+Saurabh Jha saurabhjha  
 Haochen Jiang   -   
 Qian Jianhua-   
 Michal Jiresmjires  
-Saurabh Jha -   
 Janis Johnson   janis   
 Teresa Johnson  tejohnson   
 Kean Johnston   -   


[gcc r15-3810] aarch64: Add AdvSIMD faminmax intrinsics

2024-09-23 Thread Saurabh Jha via Gcc-cvs
https://gcc.gnu.org/g:bfefed6c5bb62648cf0303d377c06cb45ab1f24a

commit r15-3810-gbfefed6c5bb62648cf0303d377c06cb45ab1f24a
Author: Saurabh Jha 
Date:   Tue Aug 6 16:34:49 2024 +0100

aarch64: Add AdvSIMD faminmax intrinsics

The AArch64 FEAT_FAMINMAX extension is optional from Armv9.2-a and
mandatory from Armv9.5-a. It introduces instructions for computing the
floating point absolute maximum and minimum of the two vectors element-wise.

This patch introduces AdvSIMD faminmax intrinsics. The intrinsics of
this extension are implemented as the following builtin functions:
* vamax_f16
* vamaxq_f16
* vamax_f32
* vamaxq_f32
* vamaxq_f64
* vamin_f16
* vaminq_f16
* vamin_f32
* vaminq_f32
* vaminq_f64

We are defining a new way to add AArch64 AdvSIMD intrinsics by listing
all the intrinsics in a .def file and then using that .def file to
initialise various data structures. This would lead to more concise code
and easier addition of the new AdvSIMD intrinsics in future.

The faminmax intrinsics are defined using the new approach.

gcc/ChangeLog:

* config/aarch64/aarch64-builtins.cc
(ENTRY): Macro to parse the contents of
aarch64-simd-pragma-builtins.def.
(ENTRY_VHSDF): Macro to parse the contents of
aarch64-simd-pragma-builtins.def.
(enum aarch64_builtins): New enum values for faminmax builtins
via aarch64-simd-pragma-builtins.def.
(enum class aarch64_builtin_signatures): Enum class to specify
the number of operands a builtin will take.
(struct aarch64_pragma_builtins_data): Struct to hold data from
aarch64-simd-pragma-builtins.def.
(aarch64_fntype): New function to define function types of
intrinsics given an object of type aarch64_pragma_builtins_data.
(aarch64_init_pragma_builtins): New function to define pragma
builtins.
(aarch64_get_pragma_builtin): New function to get a row of
aarch64_pragma_builtins, given code.
(handle_arm_neon_h): Modify to call
aarch64_init_pragma_builtins.
(aarch64_general_check_builtin_call): Modify to check whether
required flag is being used for pragma builtins.
(aarch64_expand_pragma_builtin): New function to emit
instructions of pragma_builtin.
(aarch64_general_expand_builtin): Modify to call
aarch64_expand_pragma_builtin.
* config/aarch64/aarch64-option-extensions.def
(AARCH64_OPT_EXTENSION): Introduce new flag for this extension.
* config/aarch64/aarch64-simd.md
(@aarch64_): Instruction pattern for
faminmax intrinsics.
* config/aarch64/aarch64.h
(TARGET_FAMINMAX): Introduce new flag for this extension.
* config/aarch64/iterators.md: New iterators and unspecs.
* doc/invoke.texi: Document extension in AArch64 Options.
* config/aarch64/aarch64-simd-pragma-builtins.def: New file to
list pragma builtins.

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/simd/faminmax-builtins-no-flag.c: New test.
* gcc.target/aarch64/simd/faminmax-builtins.c: New test.

Diff:
---
 gcc/config/aarch64/aarch64-builtins.cc | 119 +
 gcc/config/aarch64/aarch64-option-extensions.def   |   2 +
 .../aarch64/aarch64-simd-pragma-builtins.def   |  23 
 gcc/config/aarch64/aarch64-simd.md |  10 ++
 gcc/config/aarch64/aarch64.h   |   4 +
 gcc/config/aarch64/iterators.md|   9 ++
 gcc/doc/invoke.texi|   2 +
 .../aarch64/simd/faminmax-builtins-no-flag.c   |  10 ++
 .../gcc.target/aarch64/simd/faminmax-builtins.c| 115 
 9 files changed, 294 insertions(+)

diff --git a/gcc/config/aarch64/aarch64-builtins.cc 
b/gcc/config/aarch64/aarch64-builtins.cc
index eb878b933fe5..6266bea3b39c 100644
--- a/gcc/config/aarch64/aarch64-builtins.cc
+++ b/gcc/config/aarch64/aarch64-builtins.cc
@@ -757,6 +757,18 @@ typedef struct
 #define VAR1(T, N, MAP, FLAG, A) \
   AARCH64_SIMD_BUILTIN_##T##_##N##A,
 
+#undef ENTRY
+#define ENTRY(N, S, M, U, F) \
+  AARCH64_##N,
+
+#undef ENTRY_VHSDF
+#define ENTRY_VHSDF(NAME, SIGNATURE, UNSPEC, EXTENSIONS) \
+  AARCH64_##NAME##_f16, \
+  AARCH64_##NAME##q_f16, \
+  AARCH64_##NAME##_f32, \
+  AARCH64_##NAME##q_f32, \
+  AARCH64_##NAME##q_f64,
+
 enum aarch64_builtins
 {
   AARCH64_BUILTIN_MIN,
@@ -829,6 +841,10 @@ enum aarch64_builtins
   AARCH64_RBIT,
   AARCH64_RBITL,
   AARCH64_RBITLL,
+  /* Pragma builtins.  */
+  AARCH64_PRAGMA_BUILTIN_START,
+#include "aarch64-simd-pragma-builtins.def"
+  AARCH64_PRAGMA_BUILTIN_END,
   /* System register builtins.  */
   AARCH64_RSR,
   AARCH64_

[gcc r15-3811] aarch64: Add codegen support for AdvSIMD faminmax

2024-09-23 Thread Saurabh Jha via Gcc-cvs
https://gcc.gnu.org/g:c1fb78fb03caede01b02a1ebb3275ac98343d468

commit r15-3811-gc1fb78fb03caede01b02a1ebb3275ac98343d468
Author: Saurabh Jha 
Date:   Wed Aug 7 12:34:20 2024 +0100

aarch64: Add codegen support for AdvSIMD faminmax

The AArch64 FEAT_FAMINMAX extension is optional from Armv9.2-a and
mandatory from Armv9.5-a. It introduces instructions for computing the
floating point absolute maximum and minimum of the two vectors
element-wise.

This patch adds code generation support for famax and famin in terms of
existing RTL operators.

famax/famin is equivalent to first taking abs of the operands and then
taking smax/smin on the results of abs.

famax/famin (a, b) = smax/smin (abs (a), abs (b))

This fusion of operators is only possible when -march=armv9-a+faminmax
flags are passed. We also need to pass -ffast-math flag; if we don't,
then a statement like

c[i] = __builtin_fmaxf16 (a[i], b[i]);

is RTL expanded to UNSPEC_FMAXNM instead of smax (likewise for smin).

This code generation is only available on -O2 or -O3 as that is when
auto-vectorization is enabled.

gcc/ChangeLog:

* config/aarch64/aarch64-simd.md
(*aarch64_faminmax_fused): Instruction pattern for faminmax
codegen.
* config/aarch64/iterators.md: Attribute for faminmax codegen.

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/simd/faminmax-codegen-no-flag.c: New test.
* gcc.target/aarch64/simd/faminmax-codegen.c: New test.
* gcc.target/aarch64/simd/faminmax-no-codegen.c: New test.

Diff:
---
 gcc/config/aarch64/aarch64-simd.md |   9 +
 gcc/config/aarch64/iterators.md|   3 +
 .../aarch64/simd/faminmax-codegen-no-flag.c| 217 +
 .../gcc.target/aarch64/simd/faminmax-codegen.c | 197 +++
 .../gcc.target/aarch64/simd/faminmax-no-codegen.c  | 267 +
 5 files changed, 693 insertions(+)

diff --git a/gcc/config/aarch64/aarch64-simd.md 
b/gcc/config/aarch64/aarch64-simd.md
index 67f0fe26f938..2a44aa3fcc33 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -9920,3 +9920,12 @@
   "TARGET_FAMINMAX"
   "\t%0., %1., %2."
 )
+
+(define_insn "*aarch64_faminmax_fused"
+  [(set (match_operand:VHSDF 0 "register_operand" "=w")
+   (FMAXMIN:VHSDF
+ (abs:VHSDF (match_operand:VHSDF 1 "register_operand" "w"))
+ (abs:VHSDF (match_operand:VHSDF 2 "register_operand" "w"]
+  "TARGET_FAMINMAX"
+  "\t%0., %1., %2."
+)
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 17ac5e073aa1..c2fcd18306e4 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -4472,3 +4472,6 @@
 
 (define_int_attr faminmax_uns_op
   [(UNSPEC_FAMAX "famax") (UNSPEC_FAMIN "famin")])
+
+(define_code_attr faminmax_op
+  [(smax "famax") (smin "famin")])
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/faminmax-codegen-no-flag.c 
b/gcc/testsuite/gcc.target/aarch64/simd/faminmax-codegen-no-flag.c
new file mode 100644
index ..6688a7883b7d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/simd/faminmax-codegen-no-flag.c
@@ -0,0 +1,217 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O3 -ffast-math -march=armv9-a" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "arm_neon.h"
+
+#pragma GCC target "+nosve"
+
+/*
+** test_vamax_f16:
+** fabsv1.4h, v1.4h
+** fabsv0.4h, v0.4h
+** fmaxnm  v0.4h, v0.4h, v1.4h
+** ret
+*/
+float16x4_t
+test_vamax_f16 (float16x4_t a, float16x4_t b)
+{
+  int i;
+  float16x4_t c;
+
+  for (i = 0; i < 4; ++i) {
+a[i] = __builtin_fabsf16 (a[i]);
+b[i] = __builtin_fabsf16 (b[i]);
+c[i] = __builtin_fmaxf16 (a[i], b[i]);
+  }
+  return c;
+}
+
+/*
+** test_vamaxq_f16:
+** fabsv1.8h, v1.8h
+** fabsv0.8h, v0.8h
+** fmaxnm  v0.8h, v0.8h, v1.8h
+** ret
+*/
+float16x8_t
+test_vamaxq_f16 (float16x8_t a, float16x8_t b)
+{
+  int i;
+  float16x8_t c;
+
+  for (i = 0; i < 8; ++i) {
+a[i] = __builtin_fabsf16 (a[i]);
+b[i] = __builtin_fabsf16 (b[i]);
+c[i] = __builtin_fmaxf16 (a[i], b[i]);
+  }
+  return c;
+}
+
+/*
+** test_vamax_f32:
+** fabsv1.2s, v1.2s
+** fabsv0.2s, v0.2s
+** fmaxnm  v0.2s, v0.2s, v1.2s
+** ret
+*/
+float32x2_t
+test_vamax_f32 (float32x2_t a, float32x2_t b)
+{
+  int i;
+  float32x2_t c;
+
+  for (i = 0; i < 2; ++i) {
+a[i] = __builtin_fabsf32 (a[i]);
+b[i] = __builtin_fabsf32 (b[i]);
+c[i] = __builtin_fmaxf32 (a[i], b[i]);
+  }
+  return c;
+}
+
+/*
+** test_vamaxq_f32:
+** fabsv1.4s, v1.4s
+** fabsv0.4s, v0.4s
+** fmaxnm  v0.4s, v0.4s, v1.4s
+** ret
+*/
+float32x4_t
+test_vamaxq_f32 (float32x4_t a, float32x4_t b)
+{
+  int i;
+  float32x4_t c;
+
+  for (i = 0

[gcc r15-3997] aarch64: Introduce new unspecs for smax/smin

2024-10-01 Thread Saurabh Jha via Gcc-cvs
https://gcc.gnu.org/g:ac4cdf5cb43c0b09e81760e2a1902ceebcf1a135

commit r15-3997-gac4cdf5cb43c0b09e81760e2a1902ceebcf1a135
Author: Saurabh Jha 
Date:   Mon Sep 30 10:37:16 2024 +

aarch64: Introduce new unspecs for smax/smin

Introduce two new unspecs, UNSPEC_COND_SMAX and UNSPEC_COND_SMIN,
corresponding to rtl operators smax and smin. UNSPEC_COND_SMAX is used
to generate fmaxnm instruction and UNSPEC_COND_SMIN is used to generate
fminnm instruction.

With these new unspecs, we can generate SVE2 max/min instructions using
existing generic unpredicated and predicated instruction patterns that
use optab attribute. Thus, we have removed specialised instruction
patterns for max/min instructions that were using
SVE_COND_FP_MAXMIN_PUBLIC iterator.

No new test cases as the existing test cases should be enough to test
this refactoring.

gcc/ChangeLog:

* config/aarch64/aarch64-sve.md
(3): Remove this instruction pattern.
(cond_): Remove this instruction pattern.
* config/aarch64/iterators.md: New unspecs and changes to
iterators and attrs to use the new unspecs

Diff:
---
 gcc/config/aarch64/aarch64-sve.md | 33 --
 gcc/config/aarch64/iterators.md   | 73 ---
 2 files changed, 45 insertions(+), 61 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-sve.md 
b/gcc/config/aarch64/aarch64-sve.md
index f6c7c2f4cb31..ec1d059a2b1b 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -6600,39 +6600,6 @@
 ;; - FMINNM
 ;; -
 
-;; Unpredicated fmax/fmin (the libm functions).  The optabs for the
-;; smax/smin rtx codes are handled in the generic section above.
-(define_expand "3"
-  [(set (match_operand:SVE_FULL_F 0 "register_operand")
-   (unspec:SVE_FULL_F
- [(match_dup 3)
-  (const_int SVE_RELAXED_GP)
-  (match_operand:SVE_FULL_F 1 "register_operand")
-  (match_operand:SVE_FULL_F 2 "aarch64_sve_float_maxmin_operand")]
- SVE_COND_FP_MAXMIN_PUBLIC))]
-  "TARGET_SVE"
-  {
-operands[3] = aarch64_ptrue_reg (mode);
-  }
-)
-
-;; Predicated fmax/fmin (the libm functions).  The optabs for the
-;; smax/smin rtx codes are handled in the generic section above.
-(define_expand "cond_"
-  [(set (match_operand:SVE_FULL_F 0 "register_operand")
-   (unspec:SVE_FULL_F
- [(match_operand: 1 "register_operand")
-  (unspec:SVE_FULL_F
-[(match_dup 1)
- (const_int SVE_RELAXED_GP)
- (match_operand:SVE_FULL_F 2 "register_operand")
- (match_operand:SVE_FULL_F 3 "aarch64_sve_float_maxmin_operand")]
-SVE_COND_FP_MAXMIN_PUBLIC)
-  (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
- UNSPEC_SEL))]
-  "TARGET_SVE"
-)
-
 ;; Predicated floating-point maximum/minimum.
 (define_insn "@aarch64_pred_"
   [(set (match_operand:SVE_FULL_F 0 "register_operand")
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index c2fcd18306e4..0836dee61c9f 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -881,6 +881,8 @@
 UNSPEC_COND_FSQRT  ; Used in aarch64-sve.md.
 UNSPEC_COND_FSUB   ; Used in aarch64-sve.md.
 UNSPEC_COND_SCVTF  ; Used in aarch64-sve.md.
+UNSPEC_COND_SMAX   ; Used in aarch64-sve.md.
+UNSPEC_COND_SMIN   ; Used in aarch64-sve.md.
 UNSPEC_COND_UCVTF  ; Used in aarch64-sve.md.
 UNSPEC_LASTA   ; Used in aarch64-sve.md.
 UNSPEC_LASTB   ; Used in aarch64-sve.md.
@@ -3081,15 +3083,18 @@
 (define_int_iterator SVE_COND_FCVTI [UNSPEC_COND_FCVTZS UNSPEC_COND_FCVTZU])
 (define_int_iterator SVE_COND_ICVTF [UNSPEC_COND_SCVTF UNSPEC_COND_UCVTF])
 
-(define_int_iterator SVE_COND_FP_BINARY [UNSPEC_COND_FADD
-UNSPEC_COND_FDIV
-UNSPEC_COND_FMAX
-UNSPEC_COND_FMAXNM
-UNSPEC_COND_FMIN
-UNSPEC_COND_FMINNM
-UNSPEC_COND_FMUL
-UNSPEC_COND_FMULX
-UNSPEC_COND_FSUB])
+(define_int_iterator SVE_COND_FP_BINARY
+  [UNSPEC_COND_FADD
+   UNSPEC_COND_FDIV
+   UNSPEC_COND_FMAX
+   UNSPEC_COND_FMAXNM
+   UNSPEC_COND_FMIN
+   UNSPEC_COND_FMINNM
+   UNSPEC_COND_FMUL
+   UNSPEC_COND_FMULX
+   UNSPEC_COND_FSUB
+   UNSPEC_COND_SMAX
+   UNSPEC_COND_SMIN])
 
 ;; Same as SVE_COND_FP_BINARY, but without codes that have a dedicated
 ;; 3 expander.
@@ -3100,7 +3105,9 @@
   UNSPEC_COND_FMINNM
   UNSPEC_COND_FMUL
   UNSPEC_

[gcc r15-3989] aarch64: Add fp8 scalar types

2024-10-01 Thread Saurabh Jha via Gcc-cvs
https://gcc.gnu.org/g:35dd5cfbfd7f33b5f22ae209635f40af4ab6863c

commit r15-3989-g35dd5cfbfd7f33b5f22ae209635f40af4ab6863c
Author: Claudio Bantaloukas 
Date:   Tue Oct 1 12:45:11 2024 +

aarch64: Add fp8 scalar types

The ACLE defines a new scalar type, __mfp8. This is an opaque 8bit types 
that
can only be used by fp8 intrinsics. Additionally, the mfloat8_t type is made
available in arm_neon.h and arm_sve.h as an alias of the same.

This implementation uses an unsigned INTEGER_TYPE, with precision 8 to
represent __mfp8. Conversions to int and other types are disabled via the
TARGET_INVALID_CONVERSION hook.
Additionally, operations that are typically available to integer types are
disabled via TARGET_INVALID_UNARY_OP and TARGET_INVALID_BINARY_OP hooks.

gcc/ChangeLog:

* config/aarch64/aarch64-builtins.cc (aarch64_mfp8_type_node): Add 
node
for __mfp8 type.
(aarch64_mfp8_ptr_type_node): Add node for __mfp8 pointer type.
(aarch64_init_fp8_types): New function to initialise fp8 types and
register with language backends.
* config/aarch64/aarch64.cc (aarch64_mangle_type): Add ABI mangling 
for
new type.
(aarch64_invalid_conversion): Add function implementing
TARGET_INVALID_CONVERSION hook that blocks conversion to and from 
the
__mfp8 type.
(aarch64_invalid_unary_op): Add function implementing 
TARGET_UNARY_OP
hook that blocks operations on __mfp8 other than &.
(aarch64_invalid_binary_op): Extend TARGET_BINARY_OP hook to 
disallow
operations on __mfp8 type.
(TARGET_INVALID_CONVERSION): Add define.
(TARGET_INVALID_UNARY_OP): Likewise.
* config/aarch64/aarch64.h (aarch64_mfp8_type_node): Add node for 
__mfp8
type.
(aarch64_mfp8_ptr_type_node): Add node for __mfp8 pointer type.
* config/aarch64/arm_private_fp8.h (mfloat8_t): Add typedef.

gcc/testsuite/ChangeLog:

* g++.target/aarch64/fp8_mangling.C: New tests exercising mangling.
* g++.target/aarch64/fp8_scalar_typecheck_2.C: New tests in C++.
* gcc.target/aarch64/fp8_scalar_1.c: New tests in C.
* gcc.target/aarch64/fp8_scalar_typecheck_1.c: Likewise.

Diff:
---
 gcc/config/aarch64/aarch64-builtins.cc |  20 ++
 gcc/config/aarch64/aarch64.cc  |  54 ++-
 gcc/config/aarch64/aarch64.h   |   5 +
 gcc/config/aarch64/arm_private_fp8.h   |   2 +
 gcc/testsuite/g++.target/aarch64/fp8_mangling.C|  44 +++
 .../g++.target/aarch64/fp8_scalar_typecheck_2.C| 381 +
 gcc/testsuite/gcc.target/aarch64/fp8_scalar_1.c| 134 
 .../gcc.target/aarch64/fp8_scalar_typecheck_1.c| 356 +++
 8 files changed, 994 insertions(+), 2 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-builtins.cc 
b/gcc/config/aarch64/aarch64-builtins.cc
index 38b860c176a4..7d737877e0bf 100644
--- a/gcc/config/aarch64/aarch64-builtins.cc
+++ b/gcc/config/aarch64/aarch64-builtins.cc
@@ -991,6 +991,11 @@ static GTY(()) tree aarch64_simd_intOI_type_node = 
NULL_TREE;
 static GTY(()) tree aarch64_simd_intCI_type_node = NULL_TREE;
 static GTY(()) tree aarch64_simd_intXI_type_node = NULL_TREE;
 
+/* The user-visible __mfp8 type, and a pointer to that type.  Used
+   across the back-end.  */
+tree aarch64_mfp8_type_node = NULL_TREE;
+tree aarch64_mfp8_ptr_type_node = NULL_TREE;
+
 /* The user-visible __fp16 type, and a pointer to that type.  Used
across the back-end.  */
 tree aarch64_fp16_type_node = NULL_TREE;
@@ -1824,6 +1829,19 @@ aarch64_init_builtin_rsqrt (void)
   }
 }
 
+/* Initialize the backend type that supports the user-visible __mfp8
+   type and its relative pointer type.  */
+
+static void
+aarch64_init_fp8_types (void)
+{
+  aarch64_mfp8_type_node = make_unsigned_type (8);
+  SET_TYPE_MODE (aarch64_mfp8_type_node, QImode);
+
+  lang_hooks.types.register_builtin_type (aarch64_mfp8_type_node, "__mfp8");
+  aarch64_mfp8_ptr_type_node = build_pointer_type (aarch64_mfp8_type_node);
+}
+
 /* Initialize the backend types that support the user-visible __fp16
type, also initialize a pointer to that type, to be used when
forming HFAs.  */
@@ -2228,6 +2246,8 @@ aarch64_general_init_builtins (void)
 {
   aarch64_init_fpsr_fpcr_builtins ();
 
+  aarch64_init_fp8_types ();
+
   aarch64_init_fp16_types ();
 
   aarch64_init_bf16_types ();
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 4131d2fe65b0..e7bb3278a27e 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -22507,6 +22507,10 @@ aarch64_mangle_type (const_tree type)
return "Dh";
 }
 
+  /* Modal 8 bit floating point types.  */
+  if (TYPE_MAIN_VARIANT (type) == aarch64_mfp8_type_node)
+return "u6__m

[gcc] Created branch 'devel/existing-fp8'

2024-11-20 Thread Saurabh Jha via Gcc-cvs
The branch 'devel/existing-fp8' was created pointing to:

 d54a66c1d81c... Work in progress for refactoring simd intrinsic


[gcc/devel/existing-fp8] aarch64: Add support for fp8dot2 and fp8dot4

2024-11-20 Thread Saurabh Jha via Gcc-cvs
https://gcc.gnu.org/g:ee10846d0216e4dae4e99b20054595e668575c47

commit ee10846d0216e4dae4e99b20054595e668575c47
Author: Saurabh Jha 
Date:   Wed Nov 13 19:48:26 2024 +

aarch64: Add support for fp8dot2 and fp8dot4

The AArch64 FEAT_FP8DOT2 and FEAT_FP8DOT4 extension introduces
instructions for dot product of vectors.

This patch introduces the following intrinsics:
1. vdot{q}_{fp16|fp32}_mf8_fpm.
2. vdot{q}_lane{q}_{fp16|fp32}_mf8_fpm.

It introduces two flags: fp8dot2 and fp8dot4.

We had to add space for another type in aarch64_pragma_builtins_data
struct. The macros were updated to reflect that.

We added a new aarch64_builtin_signature variant, quaternary, and added
support for it in the functions aarch64_fntype and
aarch64_expand_pragma_builtin.

We added a new namespace, function_checker, to implement range checks
for functions defined using the new pragma approach. The old intrinsic
range checks will continue to work. All the new AdvSIMD intrinsics we
define that need lane checks should be using the function in this
namespace to implement the checks.

gcc/ChangeLog:

* config/aarch64/aarch64-builtins.cc
(ENTRY): Change to handle extra type.
(enum class): Added new variant.
(struct aarch64_pragma_builtins_data): Add support for another
type.
(aarch64_get_number_of_args): Handle new signature.
(require_integer_constant): New function to check whether the
operand is an integer constant.
(require_immediate_range): New function to validate index
ranges.
(check_simd_lane_bounds): New function to validate index
operands.
(aarch64_general_check_builtin_call): Call
function_checker::check-simd_lane_bounds.
(aarch64_expand_pragma_builtin): Handle new signature.
* config/aarch64/aarch64-c.cc
(aarch64_update_cpp_builtins): New flags.
* config/aarch64/aarch64-option-extensions.def
(AARCH64_OPT_EXTENSION): New flags.
* config/aarch64/aarch64-simd-pragma-builtins.def
(ENTRY_BINARY): Change to handle extra type.
(ENTRY_BINARY_FPM): Change to handle extra type.
(ENTRY_UNARY_FPM): Change to handle extra type.
(ENTRY_TERNARY_FPM_LANE): Macro to declare fpm ternary with
lane intrinsics.
(ENTRY_VDOT_FPM): Macro to declare vdot intrinsics.
(REQUIRED_EXTENSIONS): Define to declare functions behind
command line flags.
* config/aarch64/aarch64-simd.md:
(@aarch64_):
Instruction pattern for vdot2 intrinsics.

(@aarch64_):
Instruction pattern for vdot2 intrinsics with lane.
(@aarch64_):
Instruction pattern for vdot4 intrinsics.

(@aarch64_):
Instruction pattern for vdo4 intrinsics with lane.
* config/aarch64/aarch64.h
(TARGET_FP8DOT2): New flag for fp8dot2 instructions.
(TARGET_FP8DOT4): New flag for fp8dot4 instructions.
* config/aarch64/iterators.md: New attributes and iterators.
* doc/invoke.texi: New flag for fp8dot2 and fp8dot4
instructions.

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/simd/vdot2_fpmdot.c: New test.
* gcc.target/aarch64/simd/vdot4_fpmdot.c: New test.

Diff:
---
 gcc/config/aarch64/aarch64-builtins.cc | 107 +++--
 gcc/config/aarch64/aarch64-c.cc|   4 +
 gcc/config/aarch64/aarch64-option-extensions.def   |   4 +
 .../aarch64/aarch64-simd-pragma-builtins.def   |  39 ++--
 gcc/config/aarch64/aarch64-simd.md |  58 +++
 gcc/config/aarch64/aarch64.h   |   6 ++
 gcc/config/aarch64/iterators.md|  19 +++-
 gcc/doc/invoke.texi|   4 +
 .../gcc.target/aarch64/simd/vdot2_fpmdot.c |  77 +++
 .../gcc.target/aarch64/simd/vdot4_fpmdot.c |  77 +++
 10 files changed, 380 insertions(+), 15 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-builtins.cc 
b/gcc/config/aarch64/aarch64-builtins.cc
index 9b7280a30d07..a71c8c9a64e9 100644
--- a/gcc/config/aarch64/aarch64-builtins.cc
+++ b/gcc/config/aarch64/aarch64-builtins.cc
@@ -780,7 +780,7 @@ typedef struct
   AARCH64_SIMD_BUILTIN_##T##_##N##A,
 
 #undef ENTRY
-#define ENTRY(N, S, M0, M1, M2, M3, USES_FPMR, U)  \
+#define ENTRY(N, S, M0, M1, M2, M3, M4, USES_FPMR, U)  \
   AARCH64_##N,
 
 enum aarch64_builtins
@@ -1590,9 +1590,10 @@ aarch64_init_simd_builtin_functions (bool 
called_from_pragma)
 
 enum class aarch64_builtin_signatures
 {
+  unary,
   binary,
   ternary,
-  unary,
+  quaternary,
 };
 
 namespace {
@@ -1617,6 +1618,7 @

[gcc/devel/existing-fp8] Work in progress for refactoring simd intrinsic

2024-11-20 Thread Saurabh Jha via Gcc-cvs
https://gcc.gnu.org/g:d54a66c1d81ca3874be4c086652f205b1d6ebe49

commit d54a66c1d81ca3874be4c086652f205b1d6ebe49
Author: Saurabh Jha 
Date:   Tue Nov 19 22:38:51 2024 +

Work in progress for refactoring simd intrinsic

Diff:
---
 gcc/config/aarch64/aarch64-builtins.cc |  138 ++-
 .../aarch64/aarch64-simd-pragma-builtins.def   |  156 +++
 gcc/config/aarch64/aarch64-simd.md |   21 +-
 gcc/config/aarch64/arm_neon.h  | 1183 
 gcc/config/aarch64/iterators.md|5 +
 5 files changed, 518 insertions(+), 985 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-builtins.cc 
b/gcc/config/aarch64/aarch64-builtins.cc
index 7b2decf671fa..62adc62976c8 100644
--- a/gcc/config/aarch64/aarch64-builtins.cc
+++ b/gcc/config/aarch64/aarch64-builtins.cc
@@ -696,6 +696,7 @@ static aarch64_simd_builtin_datum 
aarch64_simd_builtin_data[] = {
   VREINTERPRET_BUILTINS \
   VREINTERPRETQ_BUILTINS
 
+/* Add fp8 here and in high */
 #define AARCH64_SIMD_VGET_LOW_BUILTINS \
   VGET_LOW_BUILTIN(f16) \
   VGET_LOW_BUILTIN(f32) \
@@ -1608,31 +1609,85 @@ namespace simd_types {
   constexpr simd_type f8 { V8QImode, qualifier_modal_float };
   constexpr simd_type f8q { V16QImode, qualifier_modal_float };
 
+  constexpr simd_type s8_scalar_const_ptr
+{ QImode, qualifier_const_pointer_map_mode };
+  constexpr simd_type s8_scalar { QImode, qualifier_none };
   constexpr simd_type s8 { V8QImode, qualifier_none };
-  constexpr simd_type u8 { V8QImode, qualifier_unsigned };
   constexpr simd_type s8q { V16QImode, qualifier_none };
+  constexpr simd_type u8_scalar_const_ptr
+{ QImode, qualifier_const_pointer_map_mode };
+  constexpr simd_type u8_scalar { QImode, qualifier_unsigned };
+  constexpr simd_type u8 { V8QImode, qualifier_unsigned };
   constexpr simd_type u8q { V16QImode, qualifier_unsigned };
 
+  constexpr simd_type s16_scalar_const_ptr
+{ HImode, qualifier_const_pointer_map_mode };
+  constexpr simd_type s16_scalar { HImode, qualifier_none };
   constexpr simd_type s16 { V4HImode, qualifier_none };
+  constexpr simd_type u16_scalar_const_ptr
+{ HImode, qualifier_const_pointer_map_mode };
+  constexpr simd_type u16_scalar { HImode, qualifier_unsigned };
   constexpr simd_type u16 { V4HImode, qualifier_unsigned };
   constexpr simd_type s16q { V8HImode, qualifier_none };
   constexpr simd_type u16q { V8HImode, qualifier_unsigned };
 
+  constexpr simd_type s32_scalar_const_ptr
+{ SImode, qualifier_const_pointer_map_mode };
   constexpr simd_type s32_index { SImode, qualifier_lane_index };
+  constexpr simd_type s32_scalar { SImode, qualifier_none };
   constexpr simd_type s32 { V2SImode, qualifier_none };
+  constexpr simd_type u32_scalar_const_ptr
+{ SImode, qualifier_const_pointer_map_mode };
+  constexpr simd_type u32_scalar { SImode, qualifier_unsigned };
+  constexpr simd_type u32 { V2SImode, qualifier_unsigned };
   constexpr simd_type s32q { V4SImode, qualifier_none };
-
+  constexpr simd_type u32q { V4SImode, qualifier_unsigned };
+
+  constexpr simd_type s64_scalar_const_ptr
+{ DImode, qualifier_const_pointer_map_mode };
+  constexpr simd_type s64_scalar { DImode, qualifier_none };
+  constexpr simd_type s64 { V1DImode, qualifier_none };
+  constexpr simd_type u64_scalar_const_ptr
+{ DImode, qualifier_const_pointer_map_mode };
+  constexpr simd_type u64_scalar { DImode, qualifier_unsigned };
+  constexpr simd_type u64 { V1DImode, qualifier_unsigned };
   constexpr simd_type s64q { V2DImode, qualifier_none };
+  constexpr simd_type u64q { V2DImode, qualifier_unsigned };
 
+  constexpr simd_type p8_scalar_const_ptr
+{ QImode, qualifier_const_pointer_map_mode };
+  constexpr simd_type p8_scalar { QImode, qualifier_poly };
   constexpr simd_type p8 { V8QImode, qualifier_poly };
   constexpr simd_type p8q { V16QImode, qualifier_poly };
+
+  constexpr simd_type p16_scalar_const_ptr
+{ HImode, qualifier_const_pointer_map_mode };
+  constexpr simd_type p16_scalar { HImode, qualifier_poly };
   constexpr simd_type p16 { V4HImode, qualifier_poly };
   constexpr simd_type p16q { V8HImode, qualifier_poly };
 
+  constexpr simd_type p64_scalar_const_ptr
+{ DImode, qualifier_const_pointer_map_mode };
+  constexpr simd_type p64_scalar { DImode, qualifier_poly };
+  constexpr simd_type p64 { V1DImode, qualifier_poly };
+  constexpr simd_type p64q { V2DImode, qualifier_poly };
+
+  constexpr simd_type f16_scalar_const_ptr
+{ HFmode, qualifier_const_pointer_map_mode };
+  constexpr simd_type f16_scalar { HFmode, qualifier_none };
   constexpr simd_type f16 { V4HFmode, qualifier_none };
   constexpr simd_type f16q { V8HFmode, qualifier_none };
+
+  constexpr simd_type f32_scalar_const_ptr
+{ SFmode, qualifier_const_pointer_map_mode };
+  constexpr simd_type f32_scalar { SFmode, qualifier_none };
   constexpr simd_type f32 { V2SFmode, qualifier_none };
   constexpr simd_type f32q { V4SFmode, qualif

[gcc/devel/existing-fp8] aarch64: Add support for fp8 convert and scale

2024-11-20 Thread Saurabh Jha via Gcc-cvs
https://gcc.gnu.org/g:3103441079fa30dc9f75a75bda38c39f1ffd708e

commit 3103441079fa30dc9f75a75bda38c39f1ffd708e
Author: Saurabh Jha 
Date:   Mon Nov 4 09:11:33 2024 +

aarch64: Add support for fp8 convert and scale

The AArch64 FEAT_FP8 extension introduces instructions for conversion
and scaling.

This patch introduces the following intrinsics:
1. vcvt{1|2}_{bf16|high_bf16|low_bf16}_mf8_fpm.
2. vcvt{q}_mf8_f16_fpm.
3. vcvt_{high}_mf8_f32_fpm.
4. vscale{q}_{f16|f32|f64}.

We introduced two aarch64_builtin_signatures enum variants, unary and
ternary, and added support for these variants in the functions
aarch64_fntype and aarch64_expand_pragma_builtin.

We added new simd_types for integers (s32, s32q, and s64q) and for
floating points (f8 and f8q).

Because we added support for fp8 intrinsics here, we modified the check
in acle/fp8.c that was checking that __ARM_FEATURE_FP8 macro is not
defined.

gcc/ChangeLog:

* config/aarch64/aarch64-builtins.cc
(ENTRY): Modified to support uses_fpmr flag.
(enum class): New variants to support new signatures.
(struct aarch64_pragma_builtins_data): Add a new boolean field,
uses_fpmr.
(aarch64_get_number_of_args): Helper function used in
aarch64_fntype and aarch64_expand_pragma_builtin.
(aarch64_fntype): Handle new signatures.
(aarch64_expand_pragma_builtin): Handle new signatures.
* config/aarch64/aarch64-c.cc
(aarch64_update_cpp_builtins): New flag for FP8.
* config/aarch64/aarch64-simd-pragma-builtins.def
(ENTRY_BINARY): Macro to declare binary intrinsics.
(ENTRY_TERNARY): Macro to declare ternary intrinsics.
(ENTRY_UNARY): Macro to declare unary intrinsics.
(ENTRY_VHSDF): Macro to declare binary intrinsics.
(ENTRY_VHSDF_VHSDI): Macro to declare binary intrinsics.
(REQUIRED_EXTENSIONS): Define to declare functions behind
command line flags.
* config/aarch64/aarch64-simd.md
(@aarch64_): Unary
pattern.
(@aarch64_): Unary
pattern.

(@aarch64_lower_):
Unary pattern.

(@aarch64_lower_):
Unary pattern.
(@aarch64):
Binary pattern.

(@aarch64_):
Unary pattern.
(@aarch64_): Binary pattern.
* config/aarch64/iterators.md: New attributes and iterators.

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/acle/fp8.c: Remove check that fp8 feature
macro doesn't exist.
* gcc.target/aarch64/simd/scale_fpm.c: New test.
* gcc.target/aarch64/simd/vcvt_fpm.c: New test.

Diff:
---
 gcc/config/aarch64/aarch64-builtins.cc | 137 +++---
 gcc/config/aarch64/aarch64-c.cc|   2 +
 .../aarch64/aarch64-simd-pragma-builtins.def   |  67 +--
 gcc/config/aarch64/aarch64-simd.md |  98 ++
 gcc/config/aarch64/iterators.md|  65 +++
 gcc/testsuite/gcc.target/aarch64/acle/fp8.c|  10 --
 gcc/testsuite/gcc.target/aarch64/simd/scale_fpm.c  |  60 +++
 gcc/testsuite/gcc.target/aarch64/simd/vcvt_fpm.c   | 197 +
 8 files changed, 587 insertions(+), 49 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-builtins.cc 
b/gcc/config/aarch64/aarch64-builtins.cc
index ad82c680c6a0..9b7280a30d07 100644
--- a/gcc/config/aarch64/aarch64-builtins.cc
+++ b/gcc/config/aarch64/aarch64-builtins.cc
@@ -780,7 +780,7 @@ typedef struct
   AARCH64_SIMD_BUILTIN_##T##_##N##A,
 
 #undef ENTRY
-#define ENTRY(N, S, M0, M1, M2, M3, U) \
+#define ENTRY(N, S, M0, M1, M2, M3, USES_FPMR, U)  \
   AARCH64_##N,
 
 enum aarch64_builtins
@@ -1591,6 +1591,8 @@ aarch64_init_simd_builtin_functions (bool 
called_from_pragma)
 enum class aarch64_builtin_signatures
 {
   binary,
+  ternary,
+  unary,
 };
 
 namespace {
@@ -1602,6 +1604,9 @@ struct simd_type {
 
 namespace simd_types {
 
+  constexpr simd_type f8 { V8QImode, qualifier_modal_float };
+  constexpr simd_type f8q { V16QImode, qualifier_modal_float };
+
   constexpr simd_type s8 { V8QImode, qualifier_none };
   constexpr simd_type u8 { V8QImode, qualifier_unsigned };
   constexpr simd_type s8q { V16QImode, qualifier_none };
@@ -1612,6 +1617,11 @@ namespace simd_types {
   constexpr simd_type s16q { V8HImode, qualifier_none };
   constexpr simd_type u16q { V8HImode, qualifier_unsigned };
 
+  constexpr simd_type s32 { V2SImode, qualifier_none };
+  constexpr simd_type s32q { V4SImode, qualifier_none };
+
+  constexpr simd_type s64q { V2DImode, qualifier_none };
+
   constexpr simd_type p8 { V8QImode, qualifier_poly };
   constexpr simd_type p8q { V16QImode, qualifier_poly };
   constexpr simd_type p16 { V4HImode,

[gcc/devel/existing-fp8] aarch64: Add support for fp8fma instructions

2024-11-20 Thread Saurabh Jha via Gcc-cvs
https://gcc.gnu.org/g:8e45a01d0fd36d21c9743f30a25e277b67e79f0e

commit 8e45a01d0fd36d21c9743f30a25e277b67e79f0e
Author: Saurabh Jha 
Date:   Wed Nov 13 17:16:37 2024 +

aarch64: Add support for fp8fma instructions

The AArch64 FEAT_FP8FMA extension introduces instructions for
multiply-add of vectors.

This patch introduces the following instructions:
1. {vmlalbq|vmlaltq}_f16_mf8_fpm.
2. {vmlalbq|vmlaltq}_lane{q}_f16_mf8_fpm.
3. {vmlallbbq|vmlallbtq|vmlalltbq|vmlallttq}_f32_mf8_fpm.
4. {vmlallbbq|vmlallbtq|vmlalltbq|vmlallttq}_lane{q}_f32_mf8_fpm.

It introduces the fp8fma flag.

gcc/ChangeLog:

* config/aarch64/aarch64-builtins.cc
(check_simd_lane_bounds): Add support for new unspecs.
(aarch64_expand_pragma_builtins): Add support for new unspecs.
* config/aarch64/aarch64-c.cc
(aarch64_update_cpp_builtins): New flags.
* config/aarch64/aarch64-option-extensions.def
(AARCH64_OPT_EXTENSION): New flags.
* config/aarch64/aarch64-simd-pragma-builtins.def
(ENTRY_FMA_FPM): Macro to declare fma intrinsics.
(REQUIRED_EXTENSIONS): Define to declare functions behind
command line flags.
* config/aarch64/aarch64-simd.md:

(@aarch64_unspec == UNSPEC_VDOT2
- ? vector_to_index_mode_size / 2 - 1
- : vector_to_index_mode_size / 4 - 1;
+   int high;
+   switch (builtin_data->unspec)
+ {
+ case UNSPEC_VDOT2:
+   high = vector_to_index_mode_size / 2 - 1;
+   break;
+ case UNSPEC_VDOT4:
+   high = vector_to_index_mode_size / 4 - 1;
+   break;
+ case UNSPEC_FMLALB:
+ case UNSPEC_FMLALT:
+ case UNSPEC_FMLALLBB:
+ case UNSPEC_FMLALLBT:
+ case UNSPEC_FMLALLTB:
+ case UNSPEC_FMLALLTT:
+   high = vector_to_index_mode_size - 1;
+   break;
+ default:
+   gcc_unreachable ();
+ }
require_immediate_range (location, index_arg, low, high);
break;
   }
@@ -3552,6 +3568,12 @@ aarch64_expand_pragma_builtin (tree exp, rtx target,
 
 case UNSPEC_VDOT2:
 case UNSPEC_VDOT4:
+case UNSPEC_FMLALB:
+case UNSPEC_FMLALT:
+case UNSPEC_FMLALLBB:
+case UNSPEC_FMLALLBT:
+case UNSPEC_FMLALLTB:
+case UNSPEC_FMLALLTT:
   if (builtin_data->signature == aarch64_builtin_signatures::ternary)
icode = code_for_aarch64 (builtin_data->unspec,
  builtin_data->types[0].mode,
diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc
index ae1472e0fcf2..03f912cde077 100644
--- a/gcc/config/aarch64/aarch64-c.cc
+++ b/gcc/config/aarch64/aarch64-c.cc
@@ -264,6 +264,8 @@ aarch64_update_cpp_builtins (cpp_reader *pfile)
 
   aarch64_def_or_undef (TARGET_FP8DOT4, "__ARM_FEATURE_FP8DOT4", pfile);
 
+  aarch64_def_or_undef (TARGET_FP8FMA, "__ARM_FEATURE_FP8FMA", pfile);
+
   aarch64_def_or_undef (TARGET_LS64,
"__ARM_FEATURE_LS64", pfile);
   aarch64_def_or_undef (TARGET_RCPC, "__ARM_FEATURE_RCPC", pfile);
diff --git a/gcc/config/aarch64/aarch64-option-extensions.def 
b/gcc/config/aarch64/aarch64-option-extensions.def
index 44d2e18d46bd..8446d1bcd5dc 100644
--- a/gcc/config/aarch64/aarch64-option-extensions.def
+++ b/gcc/config/aarch64/aarch64-option-extensions.def
@@ -240,6 +240,8 @@ AARCH64_OPT_EXTENSION("fp8dot2", FP8DOT2, (SIMD), (), (), 
"fp8dot2")
 
 AARCH64_OPT_EXTENSION("fp8dot4", FP8DOT4, (SIMD), (), (), "fp8dot4")
 
+AARCH64_OPT_EXTENSION("fp8fma", FP8FMA, (SIMD), (), (), "fp8fma")
+
 AARCH64_OPT_EXTENSION("faminmax", FAMINMAX, (SIMD), (), (), "faminmax")
 
 #undef AARCH64_OPT_FMV_EXTENSION
diff --git a/gcc/config/aarch64/aarch64-simd-pragma-builtins.def 
b/gcc/config/aarch64/aarch64-simd-pragma-builtins.def
index 4a94a6613f08..c7857123ca03 100644
--- a/gcc/config/aarch64/aarch64-simd-pragma-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-pragma-builtins.def
@@ -48,6 +48,12 @@
   ENTRY_TERNARY_FPM_LANE (vdotq_lane_##T##_mf8_fpm, T##q, T##q, f8q, f8, U) \
   ENTRY_TERNARY_FPM_LANE (vdotq_laneq_##T##_mf8_fpm, T##q, T##q, f8q, f8q, U)
 
+#undef ENTRY_FMA_FPM
+#define ENTRY_FMA_FPM(N, T, U) \
+  ENTRY_TERNARY_FPM (N##_##T##_mf8_fpm, T##q, T##q, f8q, f8q, U)   \
+  ENTRY_TERNARY_FPM_LANE (N##_lane_##T##_mf8_fpm, T##q, T##q, f8q, f8, U) \
+  ENTRY_TERNARY_FPM_LANE (N##_laneq_##T##_mf8_fpm, T##q, T##q, f8q, f8q, U)
+
 #undef ENTRY_VHSDF
 #define ENTRY_VHSDF(NAME, UNSPEC) \
   ENTRY_BINARY (NAME##_f16, f16, f16, f16, UNSPEC) \
@@ -106,3 +112,13 @@ ENTRY_VDOT_FPM (f16, UNSPEC_VDOT2)
 #define REQUIRED_EXTENSIONS nonstreaming_only (AARCH64_FL_FP8DOT4)
 ENTRY_VDOT_FPM (f32, UNSPEC_VDOT4)
 #undef REQUIRED_EXTENSIONS
+
+// fp8 multiply-add
+#define REQUIRED_EXTENSIONS nonstreaming_only (AARCH64_FL_FP8FMA)
+ENTR

[gcc/devel/existing-fp8] aarch64: Refactor infrastructure for advsimd intrinsics

2024-11-20 Thread Saurabh Jha via Gcc-cvs
https://gcc.gnu.org/g:1b6b028e272228c54801d7e038ec0536f92b22bb

commit 1b6b028e272228c54801d7e038ec0536f92b22bb
Author: Vladimir Miloserdov 
Date:   Fri Nov 1 12:35:59 2024 +

aarch64: Refactor infrastructure for advsimd intrinsics

This patch refactors the infrastructure for defining advsimd pragma
intrinsics, adding support for more flexible type and signature
handling in future SIMD extensions.

A new simd_type structure is introduced, which allows for consistent
mode and qualifier management across various advsimd operations.

gcc/ChangeLog:

* config/aarch64/aarch64-builtins.cc (ENTRY): Modify to
include modes and qualifiers for simd_type structure.
(ENTRY_VHSDF): Move to aarch64-builtins.cc to decouple.
(struct simd_type): New structure for managing mode and
qualifier combinations for SIMD types.
(struct aarch64_pragma_builtins_data): Replace mode with
simd_type to support multiple argument types for intrinsics.
(aarch64_fntype): Modify to handle different shapes type.
(aarch64_expand_pragma_builtin): Modify to handle different
shapes type.

* config/aarch64/aarch64-simd-pragma-builtins.def (ENTRY_BINARY):
Move from aarch64-builtins.cc.
(ENTRY_VHSDF): Move from aarch64-builtins.cc.
(REQUIRED_EXTENSIONS): New macro.

Diff:
---
 gcc/config/aarch64/aarch64-builtins.cc | 81 ++
 .../aarch64/aarch64-simd-pragma-builtins.def   | 15 ++--
 2 files changed, 77 insertions(+), 19 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-builtins.cc 
b/gcc/config/aarch64/aarch64-builtins.cc
index 97bde7c15d3b..ad82c680c6a0 100644
--- a/gcc/config/aarch64/aarch64-builtins.cc
+++ b/gcc/config/aarch64/aarch64-builtins.cc
@@ -780,7 +780,7 @@ typedef struct
   AARCH64_SIMD_BUILTIN_##T##_##N##A,
 
 #undef ENTRY
-#define ENTRY(N, S, M, U) \
+#define ENTRY(N, S, M0, M1, M2, M3, U) \
   AARCH64_##N,
 
 enum aarch64_builtins
@@ -1593,10 +1593,49 @@ enum class aarch64_builtin_signatures
   binary,
 };
 
+namespace {
+
+struct simd_type {
+  machine_mode mode;
+  aarch64_type_qualifiers qualifiers;
+};
+
+namespace simd_types {
+
+  constexpr simd_type s8 { V8QImode, qualifier_none };
+  constexpr simd_type u8 { V8QImode, qualifier_unsigned };
+  constexpr simd_type s8q { V16QImode, qualifier_none };
+  constexpr simd_type u8q { V16QImode, qualifier_unsigned };
+
+  constexpr simd_type s16 { V4HImode, qualifier_none };
+  constexpr simd_type u16 { V4HImode, qualifier_unsigned };
+  constexpr simd_type s16q { V8HImode, qualifier_none };
+  constexpr simd_type u16q { V8HImode, qualifier_unsigned };
+
+  constexpr simd_type p8 { V8QImode, qualifier_poly };
+  constexpr simd_type p8q { V16QImode, qualifier_poly };
+  constexpr simd_type p16 { V4HImode, qualifier_poly };
+  constexpr simd_type p16q { V8HImode, qualifier_poly };
+
+  constexpr simd_type f16 { V4HFmode, qualifier_none };
+  constexpr simd_type f16q { V8HFmode, qualifier_none };
+  constexpr simd_type f32 { V2SFmode, qualifier_none };
+  constexpr simd_type f32q { V4SFmode, qualifier_none };
+  constexpr simd_type f64q { V2DFmode, qualifier_none };
+
+  constexpr simd_type bf16 { V4BFmode, qualifier_none };
+  constexpr simd_type bf16q { V8BFmode, qualifier_none };
+
+  constexpr simd_type none { VOIDmode, qualifier_none };
+}
+
+}
+
 #undef ENTRY
-#define ENTRY(N, S, M, U) \
-  {#N, aarch64_builtin_signatures::S, E_##M##mode, U, \
-   aarch64_required_extensions::REQUIRED_EXTENSIONS},
+#define ENTRY(N, S, T0, T1, T2, T3, U) \
+  {#N, aarch64_builtin_signatures::S, simd_types::T0, simd_types::T1, \
+simd_types::T2, simd_types::T3, U, \
+aarch64_required_extensions::REQUIRED_EXTENSIONS},
 
 /* Initialize pragma builtins.  */
 
@@ -1604,7 +1643,7 @@ struct aarch64_pragma_builtins_data
 {
   const char *name;
   aarch64_builtin_signatures signature;
-  machine_mode mode;
+  simd_type types[4];
   int unspec;
   aarch64_required_extensions required_extensions;
 };
@@ -1616,11 +1655,19 @@ static aarch64_pragma_builtins_data 
aarch64_pragma_builtins[] = {
 static tree
 aarch64_fntype (const aarch64_pragma_builtins_data &builtin_data)
 {
-  auto type = aarch64_simd_builtin_type (builtin_data.mode, qualifier_none);
+  tree type0, type1, type2;
+
   switch (builtin_data.signature)
 {
 case aarch64_builtin_signatures::binary:
-  return build_function_type_list (type, type, type, NULL_TREE);
+  type0 = aarch64_simd_builtin_type (builtin_data.types[0].mode,
+   builtin_data.types[0].qualifiers);
+  type1 = aarch64_simd_builtin_type (builtin_data.types[1].mode,
+   builtin_data.types[1].qualifiers);
+  type2 = aarch64_simd_builtin_type (builtin_data.types[2].mode,
+   builtin_data.types[2].qualifiers);
+  return build_function_type_list (type0, type1, type2, NULL_TREE);
+
 default

[gcc r15-7097] AArch64: Add LUTI ACLE for SVE2

2025-01-21 Thread Saurabh Jha via Gcc-cvs
https://gcc.gnu.org/g:eb0b551c5570d98dd7cf21fa1bd0240a0c9d875f

commit r15-7097-geb0b551c5570d98dd7cf21fa1bd0240a0c9d875f
Author: Vladimir Miloserdov 
Date:   Fri May 31 16:26:11 2024 +

AArch64: Add LUTI ACLE for SVE2

This patch introduces support for LUTI2/LUTI4 ACLE for SVE2.

LUTI instructions are used for efficient table lookups with 2-bit
or 4-bit indices. LUTI2 reads indexed 8-bit or 16-bit elements from
the low 128 bits of the table vector using packed 2-bit indices,
while LUTI4 can read from the low 128 or 256 bits of the table
vector or from two table vectors using packed 4-bit indices.
These instructions fill the destination vector by copying elements
indexed by segments of the source vector, selected by the vector
segment index.

The changes include the addition of a new AArch64 option
extension "lut", __ARM_FEATURE_LUT preprocessor macro, definitions
for the new LUTI instruction shapes, and implementations of the
svluti2 and svluti4 builtins.

gcc/ChangeLog:

* config/aarch64/aarch64-c.cc
(aarch64_update_cpp_builtins): Add new flag TARGET_LUT.
* config/aarch64/aarch64-sve-builtins-shapes.cc
(struct luti_base): Shape for lut intrinsics.
(SHAPE): Specializations for lut shapes for luti2 and luti4..
* config/aarch64/aarch64-sve-builtins-shapes.h: Declare lut
intrinsics.
* config/aarch64/aarch64-sve-builtins-sve2.cc
(class svluti_lane_impl): Define expand for lut intrinsics.
(FUNCTION): Define expand for lut intrinsics.
* config/aarch64/aarch64-sve-builtins-sve2.def
(REQUIRED_EXTENSIONS): Declare lut intrinsics behind lut flag.
(svluti2_lane): Define intrinsic behind flag.
(svluti4_lane): Define intrinsic behind flag.
* config/aarch64/aarch64-sve-builtins-sve2.h: Declare lut
intrinsics.
* config/aarch64/aarch64-sve-builtins.cc
(TYPES_bh_data): New type for byte and halfword.
(bh_data): Type array for byte and halfword.
(h_data): Type array for halfword.
* config/aarch64/aarch64-sve2.md
(@aarch64_sve_luti): Instruction patterns for
lut intrinsics.
* config/aarch64/iterators.md: Iterators and attributes for lut
intrinsics.

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/sve/acle/asm/test_sve_acle.h: New test
macro.
* lib/target-supports.exp: Add lut flag to the for loop.
* gcc.target/aarch64/sve/acle/general-c/lut_1.c: New test.
* gcc.target/aarch64/sve/acle/general-c/lut_2.c: New test.
* gcc.target/aarch64/sve/acle/general-c/lut_3.c: New test.
* gcc.target/aarch64/sve/acle/general-c/lut_4.c: New test.
* gcc.target/aarch64/sve2/acle/asm/luti2_bf16.c: New test.
* gcc.target/aarch64/sve2/acle/asm/luti2_f16.c: New test.
* gcc.target/aarch64/sve2/acle/asm/luti2_s16.c: New test.
* gcc.target/aarch64/sve2/acle/asm/luti2_s8.c: New test.
* gcc.target/aarch64/sve2/acle/asm/luti2_u16.c: New test.
* gcc.target/aarch64/sve2/acle/asm/luti2_u8.c: New test.
* gcc.target/aarch64/sve2/acle/asm/luti4_bf16.c: New test.
* gcc.target/aarch64/sve2/acle/asm/luti4_bf16_x2.c: New test.
* gcc.target/aarch64/sve2/acle/asm/luti4_f16.c: New test.
* gcc.target/aarch64/sve2/acle/asm/luti4_f16_x2.c: New test.
* gcc.target/aarch64/sve2/acle/asm/luti4_s16.c: New test.
* gcc.target/aarch64/sve2/acle/asm/luti4_s16_x2.c: New test.
* gcc.target/aarch64/sve2/acle/asm/luti4_s8.c: New test.
* gcc.target/aarch64/sve2/acle/asm/luti4_u16.c: New test.
* gcc.target/aarch64/sve2/acle/asm/luti4_u16_x2.c: New test.
* gcc.target/aarch64/sve2/acle/asm/luti4_u8.c: New test.

Diff:
---
 gcc/config/aarch64/aarch64-c.cc|   2 +
 gcc/config/aarch64/aarch64-sve-builtins-shapes.cc  |  47 
 gcc/config/aarch64/aarch64-sve-builtins-shapes.h   |   2 +
 gcc/config/aarch64/aarch64-sve-builtins-sve2.cc|  17 ++
 gcc/config/aarch64/aarch64-sve-builtins-sve2.def   |   8 +
 gcc/config/aarch64/aarch64-sve-builtins-sve2.h |   2 +
 gcc/config/aarch64/aarch64-sve-builtins.cc |   8 +-
 gcc/config/aarch64/aarch64-sve2.md |  33 +++
 gcc/config/aarch64/iterators.md|   7 +
 .../aarch64/sve/acle/asm/test_sve_acle.h   |  16 ++
 .../gcc.target/aarch64/sve/acle/general-c/lut_1.c  |  34 +++
 .../gcc.target/aarch64/sve/acle/general-c/lut_2.c  |  11 +
 .../gcc.target/aarch64/sve/acle/general-c/lut_3.c  |  92 
 .../gcc.target/aarch64/sve/acle/general-c/lut_4.c  | 262 +
 .../gcc.target/aarch64/sve2/acle/asm/luti2_bf

[gcc r15-7174] Fix command flags for SVE2 faminmax

2025-01-24 Thread Saurabh Jha via Gcc-cvs
https://gcc.gnu.org/g:8bdf10fc2e9ac16a296f76a442c068216469b3a3

commit r15-7174-g8bdf10fc2e9ac16a296f76a442c068216469b3a3
Author: Saurabh Jha 
Date:   Tue Jan 21 15:59:39 2025 +

Fix command flags for SVE2 faminmax

Earlier, we were gating SVE2 faminmax behind sve+faminmax. This was
incorrect and this patch changes it so that it is gated behind
sve2+faminmax.

gcc/ChangeLog:

* config/aarch64/aarch64-sve2.md:
(*aarch64_pred_faminmax_fused): Fix to use the correct flags.
* config/aarch64/aarch64.h
(TARGET_SVE_FAMINMAX): Remove.
* config/aarch64/iterators.md: Fix iterators so that famax and
famin use correct flags.

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/sve/faminmax_1.c: Fix test to use the
correct flags.
* gcc.target/aarch64/sve/faminmax_2.c: Fix test to use the
correct flags.
* gcc.target/aarch64/sve/faminmax_3.c: New test.

Diff:
---
 gcc/config/aarch64/aarch64-sve2.md|  2 +-
 gcc/config/aarch64/aarch64.h  |  1 -
 gcc/config/aarch64/iterators.md   |  8 
 gcc/testsuite/gcc.target/aarch64/sve/faminmax_1.c |  2 +-
 gcc/testsuite/gcc.target/aarch64/sve/faminmax_2.c |  2 +-
 gcc/testsuite/gcc.target/aarch64/sve/faminmax_3.c | 11 +++
 6 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-sve2.md 
b/gcc/config/aarch64/aarch64-sve2.md
index 60bc03b2650c..3e08e092cd04 100644
--- a/gcc/config/aarch64/aarch64-sve2.md
+++ b/gcc/config/aarch64/aarch64-sve2.md
@@ -2950,7 +2950,7 @@
  (match_operand:SVE_FULL_F 3 "register_operand")]
 UNSPEC_COND_FABS)]
  SVE_COND_SMAXMIN))]
-  "TARGET_SVE_FAMINMAX"
+  "TARGET_FAMINMAX && TARGET_SVE2_OR_SME2"
   {@ [ cons: =0 , 1   , 2  , 3 ; attrs: movprfx ]
  [ w, Upl , %0 , w ; *  ] 
\t%0., %1/m, %0., %3.
  [ ?&w  , Upl , w  , w ; yes] movprfx\t%0, 
%2\;\t%0., %1/m, %0., %3.
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 5cbf442130bc..1a19b27fd934 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -472,7 +472,6 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE ATTRIBUTE_UNUSED
 /* Floating Point Absolute Maximum/Minimum extension instructions are
enabled through +faminmax.  */
 #define TARGET_FAMINMAX AARCH64_HAVE_ISA (FAMINMAX)
-#define TARGET_SVE_FAMINMAX (TARGET_SVE && TARGET_FAMINMAX)
 
 /* Lookup table (LUTI) extension instructions are enabled through +lut.  */
 #define TARGET_LUT AARCH64_HAVE_ISA (LUT)
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index e843c66cf268..9fbd74939882 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -3340,8 +3340,8 @@
 
 (define_int_iterator SVE_COND_FP_BINARY
   [UNSPEC_COND_FADD
-   (UNSPEC_COND_FAMAX "TARGET_SVE_FAMINMAX")
-   (UNSPEC_COND_FAMIN "TARGET_SVE_FAMINMAX")
+   (UNSPEC_COND_FAMAX "TARGET_FAMINMAX && TARGET_SVE2_OR_SME2")
+   (UNSPEC_COND_FAMIN "TARGET_FAMINMAX && TARGET_SVE2_OR_SME2")
UNSPEC_COND_FDIV
UNSPEC_COND_FMAX
UNSPEC_COND_FMAXNM
@@ -3381,8 +3381,8 @@
UNSPEC_COND_SMIN])
 
 (define_int_iterator SVE_COND_FP_BINARY_REG
-  [(UNSPEC_COND_FAMAX "TARGET_SVE_FAMINMAX")
-   (UNSPEC_COND_FAMIN "TARGET_SVE_FAMINMAX")
+  [(UNSPEC_COND_FAMAX "TARGET_FAMINMAX && TARGET_SVE2_OR_SME2")
+   (UNSPEC_COND_FAMIN "TARGET_FAMINMAX && TARGET_SVE2_OR_SME2")
UNSPEC_COND_FDIV
UNSPEC_COND_FMULX])
 
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/faminmax_1.c 
b/gcc/testsuite/gcc.target/aarch64/sve/faminmax_1.c
index 3b65ccea0656..154dbd9de846 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/faminmax_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/faminmax_1.c
@@ -3,7 +3,7 @@
 
 #include "arm_sve.h"
 
-#pragma GCC target "+sve+faminmax"
+#pragma GCC target "+sve2+faminmax"
 
 #define TEST_FAMAX(TYPE)   \
   void fn_famax_##TYPE (TYPE * restrict a, \
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/faminmax_2.c 
b/gcc/testsuite/gcc.target/aarch64/sve/faminmax_2.c
index d80f6eca8f82..44ecef1e0878 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/faminmax_2.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/faminmax_2.c
@@ -3,7 +3,7 @@
 
 #include "arm_sve.h"
 
-#pragma GCC target "+sve+faminmax"
+#pragma GCC target "+sve2+faminmax"
 
 #define TEST_WITH_SVMAX(TYPE)  \
   TYPE fn_fmax_##TYPE (TYPE x, TYPE y) {   \
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/faminmax_3.c 
b/gcc/testsuite/gcc.target/aarch64/sve/faminmax_3.c
new file mode 100644
index ..2b01fa48b8e9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/faminmax_3.c
@@ -0,0 +1,11 @@
+/* {