[gcc r15-4261] aarch64: Add SVE2 faminmax intrinsics
https://gcc.gnu.org/g:1a6a8cb1a72b282c418cd143b132de6e67b5d62b commit r15-4261-g1a6a8cb1a72b282c418cd143b132de6e67b5d62b Author: Saurabh Jha Date: Wed Sep 25 22:08:33 2024 + aarch64: Add SVE2 faminmax intrinsics The AArch64 FEAT_FAMINMAX extension introduces instructions for computing the floating point absolute maximum and minimum of the two vectors element-wise. This patch introduces SVE2 faminmax intrinsics. The intrinsics of this extension are implemented as the following builtin functions: * sva[max|min]_[m|x|z] * sva[max|min]_[f16|f32|f64]_[m|x|z] * sva[max|min]_n_[f16|f32|f64]_[m|x|z] gcc/ChangeLog: * config/aarch64/aarch64-sve-builtins-base.cc (svamax): Absolute maximum declaration. (svamin): Absolute minimum declaration. * config/aarch64/aarch64-sve-builtins-base.def (REQUIRED_EXTENSIONS): Add faminmax intrinsics behind a flag. (svamax): Absolute maximum declaration. (svamin): Absolute minimum declaration. * config/aarch64/aarch64-sve-builtins-base.h: Declaring function bases for the new intrinsics. * config/aarch64/aarch64.h (TARGET_SVE_FAMINMAX): New flag for SVE2 faminmax. * config/aarch64/iterators.md: New unspecs, iterators, and attrs for the new intrinsics. gcc/testsuite/ChangeLog: * gcc.target/aarch64/sve2/acle/asm/amax_f16.c: New test. * gcc.target/aarch64/sve2/acle/asm/amax_f32.c: New test. * gcc.target/aarch64/sve2/acle/asm/amax_f64.c: New test. * gcc.target/aarch64/sve2/acle/asm/amin_f16.c: New test. * gcc.target/aarch64/sve2/acle/asm/amin_f32.c: New test. * gcc.target/aarch64/sve2/acle/asm/amin_f64.c: New test. Diff: --- gcc/config/aarch64/aarch64-sve-builtins-base.cc| 4 + gcc/config/aarch64/aarch64-sve-builtins-base.def | 5 + gcc/config/aarch64/aarch64-sve-builtins-base.h | 2 + gcc/config/aarch64/aarch64.h | 1 + gcc/config/aarch64/iterators.md| 18 +- .../gcc.target/aarch64/sve2/acle/asm/amax_f16.c| 431 + .../gcc.target/aarch64/sve2/acle/asm/amax_f32.c| 431 + .../gcc.target/aarch64/sve2/acle/asm/amax_f64.c| 431 + .../gcc.target/aarch64/sve2/acle/asm/amin_f16.c| 431 + .../gcc.target/aarch64/sve2/acle/asm/amin_f32.c| 431 + .../gcc.target/aarch64/sve2/acle/asm/amin_f64.c| 431 + 11 files changed, 2615 insertions(+), 1 deletion(-) diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc b/gcc/config/aarch64/aarch64-sve-builtins-base.cc index 4b33585d9814..b189818d6430 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc @@ -3071,6 +3071,10 @@ FUNCTION (svadrb, svadr_bhwd_impl, (0)) FUNCTION (svadrd, svadr_bhwd_impl, (3)) FUNCTION (svadrh, svadr_bhwd_impl, (1)) FUNCTION (svadrw, svadr_bhwd_impl, (2)) +FUNCTION (svamax, cond_or_uncond_unspec_function, + (UNSPEC_COND_FAMAX, UNSPEC_FAMAX)) +FUNCTION (svamin, cond_or_uncond_unspec_function, + (UNSPEC_COND_FAMIN, UNSPEC_FAMIN)) FUNCTION (svand, rtx_code_function, (AND, AND)) FUNCTION (svandv, reduction, (UNSPEC_ANDV)) FUNCTION (svasr, rtx_code_function, (ASHIFTRT, ASHIFTRT)) diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.def b/gcc/config/aarch64/aarch64-sve-builtins-base.def index 65fcba915866..95e04e4393d2 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-base.def +++ b/gcc/config/aarch64/aarch64-sve-builtins-base.def @@ -379,3 +379,8 @@ DEF_SVE_FUNCTION (svzip2q, binary, all_data, none) DEF_SVE_FUNCTION (svld1ro, load_replicate, all_data, implicit) DEF_SVE_FUNCTION (svmmla, mmla, d_float, none) #undef REQUIRED_EXTENSIONS + +#define REQUIRED_EXTENSIONS AARCH64_FL_SVE | AARCH64_FL_FAMINMAX +DEF_SVE_FUNCTION (svamax, binary_opt_single_n, all_float, mxz) +DEF_SVE_FUNCTION (svamin, binary_opt_single_n, all_float, mxz) +#undef REQUIRED_EXTENSIONS diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.h b/gcc/config/aarch64/aarch64-sve-builtins-base.h index 5bbf3569c4b4..978cf7013f92 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-base.h +++ b/gcc/config/aarch64/aarch64-sve-builtins-base.h @@ -37,6 +37,8 @@ namespace aarch64_sve extern const function_base *const svadrd; extern const function_base *const svadrh; extern const function_base *const svadrw; +extern const function_base *const svamax; +extern const function_base *const svamin; extern const function_base *const svand; extern const function_base *const svandv; extern const function_base *const svasr; diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h index 030cffb17606..593319fd4723 100644 --- a/gcc/c
[gcc r15-4262] aarch64: Add codegen support for SVE2 faminmax
https://gcc.gnu.org/g:914f4f86e6cb1e570a1928fccde1dbbfc362430b commit r15-4262-g914f4f86e6cb1e570a1928fccde1dbbfc362430b Author: Saurabh Jha Date: Mon Sep 30 14:38:32 2024 + aarch64: Add codegen support for SVE2 faminmax The AArch64 FEAT_FAMINMAX extension introduces instructions for computing the floating point absolute maximum and minimum of the two vectors element-wise. This patch adds code generation for famax and famin in terms of existing unspecs. With this patch: 1. famax can be expressed as taking UNSPEC_COND_SMAX of the two operands and then taking absolute value of their result. 2. famin can be expressed as taking UNSPEC_COND_SMIN of the two operands and then taking absolute value of their result. This fusion of operators is only possible when -march=armv9-a+faminmax+sve flags are passed. We also need to pass -ffast-math flag; this is what enables compiler to use UNSPEC_COND_SMAX and UNSPEC_COND_SMIN. This code generation is only available on -O2 or -O3 as that is when auto-vectorization is enabled. gcc/ChangeLog: * config/aarch64/aarch64-sve2.md (*aarch64_pred_faminmax_fused): Instruction pattern for faminmax codegen. * config/aarch64/iterators.md: Iterator and attribute for faminmax codegen. gcc/testsuite/ChangeLog: * gcc.target/aarch64/sve/faminmax_1.c: New test. * gcc.target/aarch64/sve/faminmax_2.c: New test. Diff: --- gcc/config/aarch64/aarch64-sve2.md| 37 ++ gcc/config/aarch64/iterators.md | 6 +++ gcc/testsuite/gcc.target/aarch64/sve/faminmax_1.c | 44 + gcc/testsuite/gcc.target/aarch64/sve/faminmax_2.c | 60 +++ 4 files changed, 147 insertions(+) diff --git a/gcc/config/aarch64/aarch64-sve2.md b/gcc/config/aarch64/aarch64-sve2.md index 725092cc95f0..5f2697c31797 100644 --- a/gcc/config/aarch64/aarch64-sve2.md +++ b/gcc/config/aarch64/aarch64-sve2.md @@ -2467,6 +2467,43 @@ [(set_attr "movprfx" "yes")] ) +;; - +;; -- [FP] Absolute maximum and minimum +;; - +;; Includes: +;; - FAMAX +;; - FAMIN +;; - +;; Predicated floating-point absolute maximum and minimum. +(define_insn_and_rewrite "*aarch64_pred_faminmax_fused" + [(set (match_operand:SVE_FULL_F 0 "register_operand") + (unspec:SVE_FULL_F + [(match_operand: 1 "register_operand") + (match_operand:SI 4 "aarch64_sve_gp_strictness") + (unspec:SVE_FULL_F +[(match_operand 5) + (const_int SVE_RELAXED_GP) + (match_operand:SVE_FULL_F 2 "register_operand")] +UNSPEC_COND_FABS) + (unspec:SVE_FULL_F +[(match_operand 6) + (const_int SVE_RELAXED_GP) + (match_operand:SVE_FULL_F 3 "register_operand")] +UNSPEC_COND_FABS)] + SVE_COND_SMAXMIN))] + "TARGET_SVE_FAMINMAX" + {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ] + [ w, Upl , %0 , w ; * ] \t%0., %1/m, %0., %3. + [ ?&w , Upl , w , w ; yes] movprfx\t%0, %2\;\t%0., %1/m, %0., %3. + } + "&& (!rtx_equal_p (operands[1], operands[5]) + || !rtx_equal_p (operands[1], operands[6]))" + { +operands[5] = copy_rtx (operands[1]); +operands[6] = copy_rtx (operands[1]); + } +) + ;; = ;; == Complex arithmetic ;; = diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index a04f9f9eb3f9..efba78375c26 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -3142,6 +3142,9 @@ UNSPEC_COND_SMAX UNSPEC_COND_SMIN]) +(define_int_iterator SVE_COND_SMAXMIN [UNSPEC_COND_SMAX + UNSPEC_COND_SMIN]) + (define_int_iterator SVE_COND_FP_TERNARY [UNSPEC_COND_FMLA UNSPEC_COND_FMLS UNSPEC_COND_FNMLA @@ -4502,6 +4505,9 @@ (define_int_iterator FAMINMAX_UNS [UNSPEC_FAMAX UNSPEC_FAMIN]) +(define_int_attr faminmax_cond_uns_op + [(UNSPEC_COND_SMAX "famax") (UNSPEC_COND_SMIN "famin")]) + (define_int_attr faminmax_uns_op [(UNSPEC_FAMAX "famax") (UNSPEC_FAMIN "famin")]) diff --git a/gcc/testsuite/gcc.target/aarch64/sve/faminmax_1.c b/gcc/testsuite/gcc.target/aarch64/sve/faminmax_1.c new file mode 100644 index ..3b65ccea0656 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/faminmax_1.c @@ -0,0 +1,
[gcc r15-4079] aarch64: Fix bug with max/min (PR116934)
https://gcc.gnu.org/g:20ce363c557d6458ec3193ab4e7df760fbe34976 commit r15-4079-g20ce363c557d6458ec3193ab4e7df760fbe34976 Author: Saurabh Jha Date: Thu Oct 3 13:16:31 2024 + aarch64: Fix bug with max/min (PR116934) In ac4cdf5cb43c0b09e81760e2a1902ceebcf1a135, I introduced a bug where I put the new unspecs, UNSPEC_COND_SMAX and UNSPEC_COND_SMIN, into the wrong iterator. I should have put new unspecs in SVE_COND_FP_MAXMIN but I put it in SVE_COND_FP_BINARY_REG instead. That was incorrect because the SVE_COND_FP_MAXMIN iterator is being used for predicated floating-point maximum/minimum, not SVE_COND_FP_BINARY_REG. Also added a testcase to validate the new change. Regression tested on aarch64-unknown-linux-gnu and found no regressions. There are some test cases with "libitm" in their directory names which appear in compare_tests output as changed tests but it looks like they are in the output just because of changed build directories, like from build-patched/aarch64-unknown-linux-gnu/./libitm/* to build-pristine/aarch64-unknown-linux-gnu/./libitm/*. I didn't think it was a cause of concern and have pushed this for review. gcc/ChangeLog: PR target/116934 * config/aarch64/iterators.md: Move UNSPEC_COND_SMAX and UNSPEC_COND_SMIN to correct iterators. gcc/testsuite/ChangeLog: PR target/116934 * gcc.target/aarch64/sve2/pr116934.c: New test. Diff: --- gcc/config/aarch64/iterators.md | 8 gcc/testsuite/gcc.target/aarch64/sve2/pr116934.c | 13 + 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 0836dee61c9f..fcad236eee9f 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -3125,9 +3125,7 @@ (define_int_iterator SVE_COND_FP_BINARY_REG [UNSPEC_COND_FDIV - UNSPEC_COND_FMULX - UNSPEC_COND_SMAX - UNSPEC_COND_SMIN]) + UNSPEC_COND_FMULX]) (define_int_iterator SVE_COND_FCADD [UNSPEC_COND_FCADD90 UNSPEC_COND_FCADD270]) @@ -3135,7 +3133,9 @@ (define_int_iterator SVE_COND_FP_MAXMIN [UNSPEC_COND_FMAX UNSPEC_COND_FMAXNM UNSPEC_COND_FMIN -UNSPEC_COND_FMINNM]) +UNSPEC_COND_FMINNM +UNSPEC_COND_SMAX +UNSPEC_COND_SMIN]) (define_int_iterator SVE_COND_FP_TERNARY [UNSPEC_COND_FMLA UNSPEC_COND_FMLS diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/pr116934.c b/gcc/testsuite/gcc.target/aarch64/sve2/pr116934.c new file mode 100644 index ..94fb96ffa7db --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/pr116934.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-Ofast -mcpu=neoverse-v2" } */ + +int a; +float *b; + +void foo() { + for (; a; a--, b += 4) { +b[0] = b[1] = b[2] = b[2] > 0 ?: 0; +if (b[3] < 0) + b[3] = 0; + } +}
[gcc r15-3795] Add myself to write after approval
https://gcc.gnu.org/g:346f767fff859dd7fdd79b7f5e150d344e0f288c commit r15-3795-g346f767fff859dd7fdd79b7f5e150d344e0f288c Author: Saurabh Jha Date: Mon Sep 23 12:30:50 2024 +0100 Add myself to write after approval ChangeLog: * MAINTAINERS: Add myself to write after approval. Diff: --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index e9fafaf45a7e..0ea4db20f882 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -557,6 +557,7 @@ Andrew Jenner andrewjenner Haochen Jiang - Qian Jianhua- Michal Jiresmjires +Saurabh Jha - Janis Johnson janis Teresa Johnson tejohnson Kean Johnston -
[gcc r15-3809] [MAINTAINERS] Fix myself in order and add username
https://gcc.gnu.org/g:6141d0c98a518148a8a8c35dabd8ba053fbebf18 commit r15-3809-g6141d0c98a518148a8a8c35dabd8ba053fbebf18 Author: Saurabh Jha Date: Mon Sep 23 16:17:47 2024 +0100 [MAINTAINERS] Fix myself in order and add username ChangeLog: * MAINTAINERS: Fix sort order and add username. Diff: --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 0ea4db20f882..3b4cf9d20d80 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -554,10 +554,10 @@ Sam James sjames Surya Kumari Jangalajskumari Jakub Jelinek jakub Andrew Jenner andrewjenner +Saurabh Jha saurabhjha Haochen Jiang - Qian Jianhua- Michal Jiresmjires -Saurabh Jha - Janis Johnson janis Teresa Johnson tejohnson Kean Johnston -
[gcc r15-3810] aarch64: Add AdvSIMD faminmax intrinsics
https://gcc.gnu.org/g:bfefed6c5bb62648cf0303d377c06cb45ab1f24a commit r15-3810-gbfefed6c5bb62648cf0303d377c06cb45ab1f24a Author: Saurabh Jha Date: Tue Aug 6 16:34:49 2024 +0100 aarch64: Add AdvSIMD faminmax intrinsics The AArch64 FEAT_FAMINMAX extension is optional from Armv9.2-a and mandatory from Armv9.5-a. It introduces instructions for computing the floating point absolute maximum and minimum of the two vectors element-wise. This patch introduces AdvSIMD faminmax intrinsics. The intrinsics of this extension are implemented as the following builtin functions: * vamax_f16 * vamaxq_f16 * vamax_f32 * vamaxq_f32 * vamaxq_f64 * vamin_f16 * vaminq_f16 * vamin_f32 * vaminq_f32 * vaminq_f64 We are defining a new way to add AArch64 AdvSIMD intrinsics by listing all the intrinsics in a .def file and then using that .def file to initialise various data structures. This would lead to more concise code and easier addition of the new AdvSIMD intrinsics in future. The faminmax intrinsics are defined using the new approach. gcc/ChangeLog: * config/aarch64/aarch64-builtins.cc (ENTRY): Macro to parse the contents of aarch64-simd-pragma-builtins.def. (ENTRY_VHSDF): Macro to parse the contents of aarch64-simd-pragma-builtins.def. (enum aarch64_builtins): New enum values for faminmax builtins via aarch64-simd-pragma-builtins.def. (enum class aarch64_builtin_signatures): Enum class to specify the number of operands a builtin will take. (struct aarch64_pragma_builtins_data): Struct to hold data from aarch64-simd-pragma-builtins.def. (aarch64_fntype): New function to define function types of intrinsics given an object of type aarch64_pragma_builtins_data. (aarch64_init_pragma_builtins): New function to define pragma builtins. (aarch64_get_pragma_builtin): New function to get a row of aarch64_pragma_builtins, given code. (handle_arm_neon_h): Modify to call aarch64_init_pragma_builtins. (aarch64_general_check_builtin_call): Modify to check whether required flag is being used for pragma builtins. (aarch64_expand_pragma_builtin): New function to emit instructions of pragma_builtin. (aarch64_general_expand_builtin): Modify to call aarch64_expand_pragma_builtin. * config/aarch64/aarch64-option-extensions.def (AARCH64_OPT_EXTENSION): Introduce new flag for this extension. * config/aarch64/aarch64-simd.md (@aarch64_): Instruction pattern for faminmax intrinsics. * config/aarch64/aarch64.h (TARGET_FAMINMAX): Introduce new flag for this extension. * config/aarch64/iterators.md: New iterators and unspecs. * doc/invoke.texi: Document extension in AArch64 Options. * config/aarch64/aarch64-simd-pragma-builtins.def: New file to list pragma builtins. gcc/testsuite/ChangeLog: * gcc.target/aarch64/simd/faminmax-builtins-no-flag.c: New test. * gcc.target/aarch64/simd/faminmax-builtins.c: New test. Diff: --- gcc/config/aarch64/aarch64-builtins.cc | 119 + gcc/config/aarch64/aarch64-option-extensions.def | 2 + .../aarch64/aarch64-simd-pragma-builtins.def | 23 gcc/config/aarch64/aarch64-simd.md | 10 ++ gcc/config/aarch64/aarch64.h | 4 + gcc/config/aarch64/iterators.md| 9 ++ gcc/doc/invoke.texi| 2 + .../aarch64/simd/faminmax-builtins-no-flag.c | 10 ++ .../gcc.target/aarch64/simd/faminmax-builtins.c| 115 9 files changed, 294 insertions(+) diff --git a/gcc/config/aarch64/aarch64-builtins.cc b/gcc/config/aarch64/aarch64-builtins.cc index eb878b933fe5..6266bea3b39c 100644 --- a/gcc/config/aarch64/aarch64-builtins.cc +++ b/gcc/config/aarch64/aarch64-builtins.cc @@ -757,6 +757,18 @@ typedef struct #define VAR1(T, N, MAP, FLAG, A) \ AARCH64_SIMD_BUILTIN_##T##_##N##A, +#undef ENTRY +#define ENTRY(N, S, M, U, F) \ + AARCH64_##N, + +#undef ENTRY_VHSDF +#define ENTRY_VHSDF(NAME, SIGNATURE, UNSPEC, EXTENSIONS) \ + AARCH64_##NAME##_f16, \ + AARCH64_##NAME##q_f16, \ + AARCH64_##NAME##_f32, \ + AARCH64_##NAME##q_f32, \ + AARCH64_##NAME##q_f64, + enum aarch64_builtins { AARCH64_BUILTIN_MIN, @@ -829,6 +841,10 @@ enum aarch64_builtins AARCH64_RBIT, AARCH64_RBITL, AARCH64_RBITLL, + /* Pragma builtins. */ + AARCH64_PRAGMA_BUILTIN_START, +#include "aarch64-simd-pragma-builtins.def" + AARCH64_PRAGMA_BUILTIN_END, /* System register builtins. */ AARCH64_RSR, AARCH64_
[gcc r15-3811] aarch64: Add codegen support for AdvSIMD faminmax
https://gcc.gnu.org/g:c1fb78fb03caede01b02a1ebb3275ac98343d468 commit r15-3811-gc1fb78fb03caede01b02a1ebb3275ac98343d468 Author: Saurabh Jha Date: Wed Aug 7 12:34:20 2024 +0100 aarch64: Add codegen support for AdvSIMD faminmax The AArch64 FEAT_FAMINMAX extension is optional from Armv9.2-a and mandatory from Armv9.5-a. It introduces instructions for computing the floating point absolute maximum and minimum of the two vectors element-wise. This patch adds code generation support for famax and famin in terms of existing RTL operators. famax/famin is equivalent to first taking abs of the operands and then taking smax/smin on the results of abs. famax/famin (a, b) = smax/smin (abs (a), abs (b)) This fusion of operators is only possible when -march=armv9-a+faminmax flags are passed. We also need to pass -ffast-math flag; if we don't, then a statement like c[i] = __builtin_fmaxf16 (a[i], b[i]); is RTL expanded to UNSPEC_FMAXNM instead of smax (likewise for smin). This code generation is only available on -O2 or -O3 as that is when auto-vectorization is enabled. gcc/ChangeLog: * config/aarch64/aarch64-simd.md (*aarch64_faminmax_fused): Instruction pattern for faminmax codegen. * config/aarch64/iterators.md: Attribute for faminmax codegen. gcc/testsuite/ChangeLog: * gcc.target/aarch64/simd/faminmax-codegen-no-flag.c: New test. * gcc.target/aarch64/simd/faminmax-codegen.c: New test. * gcc.target/aarch64/simd/faminmax-no-codegen.c: New test. Diff: --- gcc/config/aarch64/aarch64-simd.md | 9 + gcc/config/aarch64/iterators.md| 3 + .../aarch64/simd/faminmax-codegen-no-flag.c| 217 + .../gcc.target/aarch64/simd/faminmax-codegen.c | 197 +++ .../gcc.target/aarch64/simd/faminmax-no-codegen.c | 267 + 5 files changed, 693 insertions(+) diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 67f0fe26f938..2a44aa3fcc33 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -9920,3 +9920,12 @@ "TARGET_FAMINMAX" "\t%0., %1., %2." ) + +(define_insn "*aarch64_faminmax_fused" + [(set (match_operand:VHSDF 0 "register_operand" "=w") + (FMAXMIN:VHSDF + (abs:VHSDF (match_operand:VHSDF 1 "register_operand" "w")) + (abs:VHSDF (match_operand:VHSDF 2 "register_operand" "w"] + "TARGET_FAMINMAX" + "\t%0., %1., %2." +) diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 17ac5e073aa1..c2fcd18306e4 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -4472,3 +4472,6 @@ (define_int_attr faminmax_uns_op [(UNSPEC_FAMAX "famax") (UNSPEC_FAMIN "famin")]) + +(define_code_attr faminmax_op + [(smax "famax") (smin "famin")]) diff --git a/gcc/testsuite/gcc.target/aarch64/simd/faminmax-codegen-no-flag.c b/gcc/testsuite/gcc.target/aarch64/simd/faminmax-codegen-no-flag.c new file mode 100644 index ..6688a7883b7d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/faminmax-codegen-no-flag.c @@ -0,0 +1,217 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-O3 -ffast-math -march=armv9-a" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "arm_neon.h" + +#pragma GCC target "+nosve" + +/* +** test_vamax_f16: +** fabsv1.4h, v1.4h +** fabsv0.4h, v0.4h +** fmaxnm v0.4h, v0.4h, v1.4h +** ret +*/ +float16x4_t +test_vamax_f16 (float16x4_t a, float16x4_t b) +{ + int i; + float16x4_t c; + + for (i = 0; i < 4; ++i) { +a[i] = __builtin_fabsf16 (a[i]); +b[i] = __builtin_fabsf16 (b[i]); +c[i] = __builtin_fmaxf16 (a[i], b[i]); + } + return c; +} + +/* +** test_vamaxq_f16: +** fabsv1.8h, v1.8h +** fabsv0.8h, v0.8h +** fmaxnm v0.8h, v0.8h, v1.8h +** ret +*/ +float16x8_t +test_vamaxq_f16 (float16x8_t a, float16x8_t b) +{ + int i; + float16x8_t c; + + for (i = 0; i < 8; ++i) { +a[i] = __builtin_fabsf16 (a[i]); +b[i] = __builtin_fabsf16 (b[i]); +c[i] = __builtin_fmaxf16 (a[i], b[i]); + } + return c; +} + +/* +** test_vamax_f32: +** fabsv1.2s, v1.2s +** fabsv0.2s, v0.2s +** fmaxnm v0.2s, v0.2s, v1.2s +** ret +*/ +float32x2_t +test_vamax_f32 (float32x2_t a, float32x2_t b) +{ + int i; + float32x2_t c; + + for (i = 0; i < 2; ++i) { +a[i] = __builtin_fabsf32 (a[i]); +b[i] = __builtin_fabsf32 (b[i]); +c[i] = __builtin_fmaxf32 (a[i], b[i]); + } + return c; +} + +/* +** test_vamaxq_f32: +** fabsv1.4s, v1.4s +** fabsv0.4s, v0.4s +** fmaxnm v0.4s, v0.4s, v1.4s +** ret +*/ +float32x4_t +test_vamaxq_f32 (float32x4_t a, float32x4_t b) +{ + int i; + float32x4_t c; + + for (i = 0
[gcc r15-3997] aarch64: Introduce new unspecs for smax/smin
https://gcc.gnu.org/g:ac4cdf5cb43c0b09e81760e2a1902ceebcf1a135 commit r15-3997-gac4cdf5cb43c0b09e81760e2a1902ceebcf1a135 Author: Saurabh Jha Date: Mon Sep 30 10:37:16 2024 + aarch64: Introduce new unspecs for smax/smin Introduce two new unspecs, UNSPEC_COND_SMAX and UNSPEC_COND_SMIN, corresponding to rtl operators smax and smin. UNSPEC_COND_SMAX is used to generate fmaxnm instruction and UNSPEC_COND_SMIN is used to generate fminnm instruction. With these new unspecs, we can generate SVE2 max/min instructions using existing generic unpredicated and predicated instruction patterns that use optab attribute. Thus, we have removed specialised instruction patterns for max/min instructions that were using SVE_COND_FP_MAXMIN_PUBLIC iterator. No new test cases as the existing test cases should be enough to test this refactoring. gcc/ChangeLog: * config/aarch64/aarch64-sve.md (3): Remove this instruction pattern. (cond_): Remove this instruction pattern. * config/aarch64/iterators.md: New unspecs and changes to iterators and attrs to use the new unspecs Diff: --- gcc/config/aarch64/aarch64-sve.md | 33 -- gcc/config/aarch64/iterators.md | 73 --- 2 files changed, 45 insertions(+), 61 deletions(-) diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index f6c7c2f4cb31..ec1d059a2b1b 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -6600,39 +6600,6 @@ ;; - FMINNM ;; - -;; Unpredicated fmax/fmin (the libm functions). The optabs for the -;; smax/smin rtx codes are handled in the generic section above. -(define_expand "3" - [(set (match_operand:SVE_FULL_F 0 "register_operand") - (unspec:SVE_FULL_F - [(match_dup 3) - (const_int SVE_RELAXED_GP) - (match_operand:SVE_FULL_F 1 "register_operand") - (match_operand:SVE_FULL_F 2 "aarch64_sve_float_maxmin_operand")] - SVE_COND_FP_MAXMIN_PUBLIC))] - "TARGET_SVE" - { -operands[3] = aarch64_ptrue_reg (mode); - } -) - -;; Predicated fmax/fmin (the libm functions). The optabs for the -;; smax/smin rtx codes are handled in the generic section above. -(define_expand "cond_" - [(set (match_operand:SVE_FULL_F 0 "register_operand") - (unspec:SVE_FULL_F - [(match_operand: 1 "register_operand") - (unspec:SVE_FULL_F -[(match_dup 1) - (const_int SVE_RELAXED_GP) - (match_operand:SVE_FULL_F 2 "register_operand") - (match_operand:SVE_FULL_F 3 "aarch64_sve_float_maxmin_operand")] -SVE_COND_FP_MAXMIN_PUBLIC) - (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")] - UNSPEC_SEL))] - "TARGET_SVE" -) - ;; Predicated floating-point maximum/minimum. (define_insn "@aarch64_pred_" [(set (match_operand:SVE_FULL_F 0 "register_operand") diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index c2fcd18306e4..0836dee61c9f 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -881,6 +881,8 @@ UNSPEC_COND_FSQRT ; Used in aarch64-sve.md. UNSPEC_COND_FSUB ; Used in aarch64-sve.md. UNSPEC_COND_SCVTF ; Used in aarch64-sve.md. +UNSPEC_COND_SMAX ; Used in aarch64-sve.md. +UNSPEC_COND_SMIN ; Used in aarch64-sve.md. UNSPEC_COND_UCVTF ; Used in aarch64-sve.md. UNSPEC_LASTA ; Used in aarch64-sve.md. UNSPEC_LASTB ; Used in aarch64-sve.md. @@ -3081,15 +3083,18 @@ (define_int_iterator SVE_COND_FCVTI [UNSPEC_COND_FCVTZS UNSPEC_COND_FCVTZU]) (define_int_iterator SVE_COND_ICVTF [UNSPEC_COND_SCVTF UNSPEC_COND_UCVTF]) -(define_int_iterator SVE_COND_FP_BINARY [UNSPEC_COND_FADD -UNSPEC_COND_FDIV -UNSPEC_COND_FMAX -UNSPEC_COND_FMAXNM -UNSPEC_COND_FMIN -UNSPEC_COND_FMINNM -UNSPEC_COND_FMUL -UNSPEC_COND_FMULX -UNSPEC_COND_FSUB]) +(define_int_iterator SVE_COND_FP_BINARY + [UNSPEC_COND_FADD + UNSPEC_COND_FDIV + UNSPEC_COND_FMAX + UNSPEC_COND_FMAXNM + UNSPEC_COND_FMIN + UNSPEC_COND_FMINNM + UNSPEC_COND_FMUL + UNSPEC_COND_FMULX + UNSPEC_COND_FSUB + UNSPEC_COND_SMAX + UNSPEC_COND_SMIN]) ;; Same as SVE_COND_FP_BINARY, but without codes that have a dedicated ;; 3 expander. @@ -3100,7 +3105,9 @@ UNSPEC_COND_FMINNM UNSPEC_COND_FMUL UNSPEC_
[gcc r15-3989] aarch64: Add fp8 scalar types
https://gcc.gnu.org/g:35dd5cfbfd7f33b5f22ae209635f40af4ab6863c commit r15-3989-g35dd5cfbfd7f33b5f22ae209635f40af4ab6863c Author: Claudio Bantaloukas Date: Tue Oct 1 12:45:11 2024 + aarch64: Add fp8 scalar types The ACLE defines a new scalar type, __mfp8. This is an opaque 8bit types that can only be used by fp8 intrinsics. Additionally, the mfloat8_t type is made available in arm_neon.h and arm_sve.h as an alias of the same. This implementation uses an unsigned INTEGER_TYPE, with precision 8 to represent __mfp8. Conversions to int and other types are disabled via the TARGET_INVALID_CONVERSION hook. Additionally, operations that are typically available to integer types are disabled via TARGET_INVALID_UNARY_OP and TARGET_INVALID_BINARY_OP hooks. gcc/ChangeLog: * config/aarch64/aarch64-builtins.cc (aarch64_mfp8_type_node): Add node for __mfp8 type. (aarch64_mfp8_ptr_type_node): Add node for __mfp8 pointer type. (aarch64_init_fp8_types): New function to initialise fp8 types and register with language backends. * config/aarch64/aarch64.cc (aarch64_mangle_type): Add ABI mangling for new type. (aarch64_invalid_conversion): Add function implementing TARGET_INVALID_CONVERSION hook that blocks conversion to and from the __mfp8 type. (aarch64_invalid_unary_op): Add function implementing TARGET_UNARY_OP hook that blocks operations on __mfp8 other than &. (aarch64_invalid_binary_op): Extend TARGET_BINARY_OP hook to disallow operations on __mfp8 type. (TARGET_INVALID_CONVERSION): Add define. (TARGET_INVALID_UNARY_OP): Likewise. * config/aarch64/aarch64.h (aarch64_mfp8_type_node): Add node for __mfp8 type. (aarch64_mfp8_ptr_type_node): Add node for __mfp8 pointer type. * config/aarch64/arm_private_fp8.h (mfloat8_t): Add typedef. gcc/testsuite/ChangeLog: * g++.target/aarch64/fp8_mangling.C: New tests exercising mangling. * g++.target/aarch64/fp8_scalar_typecheck_2.C: New tests in C++. * gcc.target/aarch64/fp8_scalar_1.c: New tests in C. * gcc.target/aarch64/fp8_scalar_typecheck_1.c: Likewise. Diff: --- gcc/config/aarch64/aarch64-builtins.cc | 20 ++ gcc/config/aarch64/aarch64.cc | 54 ++- gcc/config/aarch64/aarch64.h | 5 + gcc/config/aarch64/arm_private_fp8.h | 2 + gcc/testsuite/g++.target/aarch64/fp8_mangling.C| 44 +++ .../g++.target/aarch64/fp8_scalar_typecheck_2.C| 381 + gcc/testsuite/gcc.target/aarch64/fp8_scalar_1.c| 134 .../gcc.target/aarch64/fp8_scalar_typecheck_1.c| 356 +++ 8 files changed, 994 insertions(+), 2 deletions(-) diff --git a/gcc/config/aarch64/aarch64-builtins.cc b/gcc/config/aarch64/aarch64-builtins.cc index 38b860c176a4..7d737877e0bf 100644 --- a/gcc/config/aarch64/aarch64-builtins.cc +++ b/gcc/config/aarch64/aarch64-builtins.cc @@ -991,6 +991,11 @@ static GTY(()) tree aarch64_simd_intOI_type_node = NULL_TREE; static GTY(()) tree aarch64_simd_intCI_type_node = NULL_TREE; static GTY(()) tree aarch64_simd_intXI_type_node = NULL_TREE; +/* The user-visible __mfp8 type, and a pointer to that type. Used + across the back-end. */ +tree aarch64_mfp8_type_node = NULL_TREE; +tree aarch64_mfp8_ptr_type_node = NULL_TREE; + /* The user-visible __fp16 type, and a pointer to that type. Used across the back-end. */ tree aarch64_fp16_type_node = NULL_TREE; @@ -1824,6 +1829,19 @@ aarch64_init_builtin_rsqrt (void) } } +/* Initialize the backend type that supports the user-visible __mfp8 + type and its relative pointer type. */ + +static void +aarch64_init_fp8_types (void) +{ + aarch64_mfp8_type_node = make_unsigned_type (8); + SET_TYPE_MODE (aarch64_mfp8_type_node, QImode); + + lang_hooks.types.register_builtin_type (aarch64_mfp8_type_node, "__mfp8"); + aarch64_mfp8_ptr_type_node = build_pointer_type (aarch64_mfp8_type_node); +} + /* Initialize the backend types that support the user-visible __fp16 type, also initialize a pointer to that type, to be used when forming HFAs. */ @@ -2228,6 +2246,8 @@ aarch64_general_init_builtins (void) { aarch64_init_fpsr_fpcr_builtins (); + aarch64_init_fp8_types (); + aarch64_init_fp16_types (); aarch64_init_bf16_types (); diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index 4131d2fe65b0..e7bb3278a27e 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -22507,6 +22507,10 @@ aarch64_mangle_type (const_tree type) return "Dh"; } + /* Modal 8 bit floating point types. */ + if (TYPE_MAIN_VARIANT (type) == aarch64_mfp8_type_node) +return "u6__m
[gcc] Created branch 'devel/existing-fp8'
The branch 'devel/existing-fp8' was created pointing to: d54a66c1d81c... Work in progress for refactoring simd intrinsic
[gcc/devel/existing-fp8] aarch64: Add support for fp8dot2 and fp8dot4
https://gcc.gnu.org/g:ee10846d0216e4dae4e99b20054595e668575c47 commit ee10846d0216e4dae4e99b20054595e668575c47 Author: Saurabh Jha Date: Wed Nov 13 19:48:26 2024 + aarch64: Add support for fp8dot2 and fp8dot4 The AArch64 FEAT_FP8DOT2 and FEAT_FP8DOT4 extension introduces instructions for dot product of vectors. This patch introduces the following intrinsics: 1. vdot{q}_{fp16|fp32}_mf8_fpm. 2. vdot{q}_lane{q}_{fp16|fp32}_mf8_fpm. It introduces two flags: fp8dot2 and fp8dot4. We had to add space for another type in aarch64_pragma_builtins_data struct. The macros were updated to reflect that. We added a new aarch64_builtin_signature variant, quaternary, and added support for it in the functions aarch64_fntype and aarch64_expand_pragma_builtin. We added a new namespace, function_checker, to implement range checks for functions defined using the new pragma approach. The old intrinsic range checks will continue to work. All the new AdvSIMD intrinsics we define that need lane checks should be using the function in this namespace to implement the checks. gcc/ChangeLog: * config/aarch64/aarch64-builtins.cc (ENTRY): Change to handle extra type. (enum class): Added new variant. (struct aarch64_pragma_builtins_data): Add support for another type. (aarch64_get_number_of_args): Handle new signature. (require_integer_constant): New function to check whether the operand is an integer constant. (require_immediate_range): New function to validate index ranges. (check_simd_lane_bounds): New function to validate index operands. (aarch64_general_check_builtin_call): Call function_checker::check-simd_lane_bounds. (aarch64_expand_pragma_builtin): Handle new signature. * config/aarch64/aarch64-c.cc (aarch64_update_cpp_builtins): New flags. * config/aarch64/aarch64-option-extensions.def (AARCH64_OPT_EXTENSION): New flags. * config/aarch64/aarch64-simd-pragma-builtins.def (ENTRY_BINARY): Change to handle extra type. (ENTRY_BINARY_FPM): Change to handle extra type. (ENTRY_UNARY_FPM): Change to handle extra type. (ENTRY_TERNARY_FPM_LANE): Macro to declare fpm ternary with lane intrinsics. (ENTRY_VDOT_FPM): Macro to declare vdot intrinsics. (REQUIRED_EXTENSIONS): Define to declare functions behind command line flags. * config/aarch64/aarch64-simd.md: (@aarch64_): Instruction pattern for vdot2 intrinsics. (@aarch64_): Instruction pattern for vdot2 intrinsics with lane. (@aarch64_): Instruction pattern for vdot4 intrinsics. (@aarch64_): Instruction pattern for vdo4 intrinsics with lane. * config/aarch64/aarch64.h (TARGET_FP8DOT2): New flag for fp8dot2 instructions. (TARGET_FP8DOT4): New flag for fp8dot4 instructions. * config/aarch64/iterators.md: New attributes and iterators. * doc/invoke.texi: New flag for fp8dot2 and fp8dot4 instructions. gcc/testsuite/ChangeLog: * gcc.target/aarch64/simd/vdot2_fpmdot.c: New test. * gcc.target/aarch64/simd/vdot4_fpmdot.c: New test. Diff: --- gcc/config/aarch64/aarch64-builtins.cc | 107 +++-- gcc/config/aarch64/aarch64-c.cc| 4 + gcc/config/aarch64/aarch64-option-extensions.def | 4 + .../aarch64/aarch64-simd-pragma-builtins.def | 39 ++-- gcc/config/aarch64/aarch64-simd.md | 58 +++ gcc/config/aarch64/aarch64.h | 6 ++ gcc/config/aarch64/iterators.md| 19 +++- gcc/doc/invoke.texi| 4 + .../gcc.target/aarch64/simd/vdot2_fpmdot.c | 77 +++ .../gcc.target/aarch64/simd/vdot4_fpmdot.c | 77 +++ 10 files changed, 380 insertions(+), 15 deletions(-) diff --git a/gcc/config/aarch64/aarch64-builtins.cc b/gcc/config/aarch64/aarch64-builtins.cc index 9b7280a30d07..a71c8c9a64e9 100644 --- a/gcc/config/aarch64/aarch64-builtins.cc +++ b/gcc/config/aarch64/aarch64-builtins.cc @@ -780,7 +780,7 @@ typedef struct AARCH64_SIMD_BUILTIN_##T##_##N##A, #undef ENTRY -#define ENTRY(N, S, M0, M1, M2, M3, USES_FPMR, U) \ +#define ENTRY(N, S, M0, M1, M2, M3, M4, USES_FPMR, U) \ AARCH64_##N, enum aarch64_builtins @@ -1590,9 +1590,10 @@ aarch64_init_simd_builtin_functions (bool called_from_pragma) enum class aarch64_builtin_signatures { + unary, binary, ternary, - unary, + quaternary, }; namespace { @@ -1617,6 +1618,7 @
[gcc/devel/existing-fp8] Work in progress for refactoring simd intrinsic
https://gcc.gnu.org/g:d54a66c1d81ca3874be4c086652f205b1d6ebe49 commit d54a66c1d81ca3874be4c086652f205b1d6ebe49 Author: Saurabh Jha Date: Tue Nov 19 22:38:51 2024 + Work in progress for refactoring simd intrinsic Diff: --- gcc/config/aarch64/aarch64-builtins.cc | 138 ++- .../aarch64/aarch64-simd-pragma-builtins.def | 156 +++ gcc/config/aarch64/aarch64-simd.md | 21 +- gcc/config/aarch64/arm_neon.h | 1183 gcc/config/aarch64/iterators.md|5 + 5 files changed, 518 insertions(+), 985 deletions(-) diff --git a/gcc/config/aarch64/aarch64-builtins.cc b/gcc/config/aarch64/aarch64-builtins.cc index 7b2decf671fa..62adc62976c8 100644 --- a/gcc/config/aarch64/aarch64-builtins.cc +++ b/gcc/config/aarch64/aarch64-builtins.cc @@ -696,6 +696,7 @@ static aarch64_simd_builtin_datum aarch64_simd_builtin_data[] = { VREINTERPRET_BUILTINS \ VREINTERPRETQ_BUILTINS +/* Add fp8 here and in high */ #define AARCH64_SIMD_VGET_LOW_BUILTINS \ VGET_LOW_BUILTIN(f16) \ VGET_LOW_BUILTIN(f32) \ @@ -1608,31 +1609,85 @@ namespace simd_types { constexpr simd_type f8 { V8QImode, qualifier_modal_float }; constexpr simd_type f8q { V16QImode, qualifier_modal_float }; + constexpr simd_type s8_scalar_const_ptr +{ QImode, qualifier_const_pointer_map_mode }; + constexpr simd_type s8_scalar { QImode, qualifier_none }; constexpr simd_type s8 { V8QImode, qualifier_none }; - constexpr simd_type u8 { V8QImode, qualifier_unsigned }; constexpr simd_type s8q { V16QImode, qualifier_none }; + constexpr simd_type u8_scalar_const_ptr +{ QImode, qualifier_const_pointer_map_mode }; + constexpr simd_type u8_scalar { QImode, qualifier_unsigned }; + constexpr simd_type u8 { V8QImode, qualifier_unsigned }; constexpr simd_type u8q { V16QImode, qualifier_unsigned }; + constexpr simd_type s16_scalar_const_ptr +{ HImode, qualifier_const_pointer_map_mode }; + constexpr simd_type s16_scalar { HImode, qualifier_none }; constexpr simd_type s16 { V4HImode, qualifier_none }; + constexpr simd_type u16_scalar_const_ptr +{ HImode, qualifier_const_pointer_map_mode }; + constexpr simd_type u16_scalar { HImode, qualifier_unsigned }; constexpr simd_type u16 { V4HImode, qualifier_unsigned }; constexpr simd_type s16q { V8HImode, qualifier_none }; constexpr simd_type u16q { V8HImode, qualifier_unsigned }; + constexpr simd_type s32_scalar_const_ptr +{ SImode, qualifier_const_pointer_map_mode }; constexpr simd_type s32_index { SImode, qualifier_lane_index }; + constexpr simd_type s32_scalar { SImode, qualifier_none }; constexpr simd_type s32 { V2SImode, qualifier_none }; + constexpr simd_type u32_scalar_const_ptr +{ SImode, qualifier_const_pointer_map_mode }; + constexpr simd_type u32_scalar { SImode, qualifier_unsigned }; + constexpr simd_type u32 { V2SImode, qualifier_unsigned }; constexpr simd_type s32q { V4SImode, qualifier_none }; - + constexpr simd_type u32q { V4SImode, qualifier_unsigned }; + + constexpr simd_type s64_scalar_const_ptr +{ DImode, qualifier_const_pointer_map_mode }; + constexpr simd_type s64_scalar { DImode, qualifier_none }; + constexpr simd_type s64 { V1DImode, qualifier_none }; + constexpr simd_type u64_scalar_const_ptr +{ DImode, qualifier_const_pointer_map_mode }; + constexpr simd_type u64_scalar { DImode, qualifier_unsigned }; + constexpr simd_type u64 { V1DImode, qualifier_unsigned }; constexpr simd_type s64q { V2DImode, qualifier_none }; + constexpr simd_type u64q { V2DImode, qualifier_unsigned }; + constexpr simd_type p8_scalar_const_ptr +{ QImode, qualifier_const_pointer_map_mode }; + constexpr simd_type p8_scalar { QImode, qualifier_poly }; constexpr simd_type p8 { V8QImode, qualifier_poly }; constexpr simd_type p8q { V16QImode, qualifier_poly }; + + constexpr simd_type p16_scalar_const_ptr +{ HImode, qualifier_const_pointer_map_mode }; + constexpr simd_type p16_scalar { HImode, qualifier_poly }; constexpr simd_type p16 { V4HImode, qualifier_poly }; constexpr simd_type p16q { V8HImode, qualifier_poly }; + constexpr simd_type p64_scalar_const_ptr +{ DImode, qualifier_const_pointer_map_mode }; + constexpr simd_type p64_scalar { DImode, qualifier_poly }; + constexpr simd_type p64 { V1DImode, qualifier_poly }; + constexpr simd_type p64q { V2DImode, qualifier_poly }; + + constexpr simd_type f16_scalar_const_ptr +{ HFmode, qualifier_const_pointer_map_mode }; + constexpr simd_type f16_scalar { HFmode, qualifier_none }; constexpr simd_type f16 { V4HFmode, qualifier_none }; constexpr simd_type f16q { V8HFmode, qualifier_none }; + + constexpr simd_type f32_scalar_const_ptr +{ SFmode, qualifier_const_pointer_map_mode }; + constexpr simd_type f32_scalar { SFmode, qualifier_none }; constexpr simd_type f32 { V2SFmode, qualifier_none }; constexpr simd_type f32q { V4SFmode, qualif
[gcc/devel/existing-fp8] aarch64: Add support for fp8 convert and scale
https://gcc.gnu.org/g:3103441079fa30dc9f75a75bda38c39f1ffd708e commit 3103441079fa30dc9f75a75bda38c39f1ffd708e Author: Saurabh Jha Date: Mon Nov 4 09:11:33 2024 + aarch64: Add support for fp8 convert and scale The AArch64 FEAT_FP8 extension introduces instructions for conversion and scaling. This patch introduces the following intrinsics: 1. vcvt{1|2}_{bf16|high_bf16|low_bf16}_mf8_fpm. 2. vcvt{q}_mf8_f16_fpm. 3. vcvt_{high}_mf8_f32_fpm. 4. vscale{q}_{f16|f32|f64}. We introduced two aarch64_builtin_signatures enum variants, unary and ternary, and added support for these variants in the functions aarch64_fntype and aarch64_expand_pragma_builtin. We added new simd_types for integers (s32, s32q, and s64q) and for floating points (f8 and f8q). Because we added support for fp8 intrinsics here, we modified the check in acle/fp8.c that was checking that __ARM_FEATURE_FP8 macro is not defined. gcc/ChangeLog: * config/aarch64/aarch64-builtins.cc (ENTRY): Modified to support uses_fpmr flag. (enum class): New variants to support new signatures. (struct aarch64_pragma_builtins_data): Add a new boolean field, uses_fpmr. (aarch64_get_number_of_args): Helper function used in aarch64_fntype and aarch64_expand_pragma_builtin. (aarch64_fntype): Handle new signatures. (aarch64_expand_pragma_builtin): Handle new signatures. * config/aarch64/aarch64-c.cc (aarch64_update_cpp_builtins): New flag for FP8. * config/aarch64/aarch64-simd-pragma-builtins.def (ENTRY_BINARY): Macro to declare binary intrinsics. (ENTRY_TERNARY): Macro to declare ternary intrinsics. (ENTRY_UNARY): Macro to declare unary intrinsics. (ENTRY_VHSDF): Macro to declare binary intrinsics. (ENTRY_VHSDF_VHSDI): Macro to declare binary intrinsics. (REQUIRED_EXTENSIONS): Define to declare functions behind command line flags. * config/aarch64/aarch64-simd.md (@aarch64_): Unary pattern. (@aarch64_): Unary pattern. (@aarch64_lower_): Unary pattern. (@aarch64_lower_): Unary pattern. (@aarch64): Binary pattern. (@aarch64_): Unary pattern. (@aarch64_): Binary pattern. * config/aarch64/iterators.md: New attributes and iterators. gcc/testsuite/ChangeLog: * gcc.target/aarch64/acle/fp8.c: Remove check that fp8 feature macro doesn't exist. * gcc.target/aarch64/simd/scale_fpm.c: New test. * gcc.target/aarch64/simd/vcvt_fpm.c: New test. Diff: --- gcc/config/aarch64/aarch64-builtins.cc | 137 +++--- gcc/config/aarch64/aarch64-c.cc| 2 + .../aarch64/aarch64-simd-pragma-builtins.def | 67 +-- gcc/config/aarch64/aarch64-simd.md | 98 ++ gcc/config/aarch64/iterators.md| 65 +++ gcc/testsuite/gcc.target/aarch64/acle/fp8.c| 10 -- gcc/testsuite/gcc.target/aarch64/simd/scale_fpm.c | 60 +++ gcc/testsuite/gcc.target/aarch64/simd/vcvt_fpm.c | 197 + 8 files changed, 587 insertions(+), 49 deletions(-) diff --git a/gcc/config/aarch64/aarch64-builtins.cc b/gcc/config/aarch64/aarch64-builtins.cc index ad82c680c6a0..9b7280a30d07 100644 --- a/gcc/config/aarch64/aarch64-builtins.cc +++ b/gcc/config/aarch64/aarch64-builtins.cc @@ -780,7 +780,7 @@ typedef struct AARCH64_SIMD_BUILTIN_##T##_##N##A, #undef ENTRY -#define ENTRY(N, S, M0, M1, M2, M3, U) \ +#define ENTRY(N, S, M0, M1, M2, M3, USES_FPMR, U) \ AARCH64_##N, enum aarch64_builtins @@ -1591,6 +1591,8 @@ aarch64_init_simd_builtin_functions (bool called_from_pragma) enum class aarch64_builtin_signatures { binary, + ternary, + unary, }; namespace { @@ -1602,6 +1604,9 @@ struct simd_type { namespace simd_types { + constexpr simd_type f8 { V8QImode, qualifier_modal_float }; + constexpr simd_type f8q { V16QImode, qualifier_modal_float }; + constexpr simd_type s8 { V8QImode, qualifier_none }; constexpr simd_type u8 { V8QImode, qualifier_unsigned }; constexpr simd_type s8q { V16QImode, qualifier_none }; @@ -1612,6 +1617,11 @@ namespace simd_types { constexpr simd_type s16q { V8HImode, qualifier_none }; constexpr simd_type u16q { V8HImode, qualifier_unsigned }; + constexpr simd_type s32 { V2SImode, qualifier_none }; + constexpr simd_type s32q { V4SImode, qualifier_none }; + + constexpr simd_type s64q { V2DImode, qualifier_none }; + constexpr simd_type p8 { V8QImode, qualifier_poly }; constexpr simd_type p8q { V16QImode, qualifier_poly }; constexpr simd_type p16 { V4HImode,
[gcc/devel/existing-fp8] aarch64: Add support for fp8fma instructions
https://gcc.gnu.org/g:8e45a01d0fd36d21c9743f30a25e277b67e79f0e commit 8e45a01d0fd36d21c9743f30a25e277b67e79f0e Author: Saurabh Jha Date: Wed Nov 13 17:16:37 2024 + aarch64: Add support for fp8fma instructions The AArch64 FEAT_FP8FMA extension introduces instructions for multiply-add of vectors. This patch introduces the following instructions: 1. {vmlalbq|vmlaltq}_f16_mf8_fpm. 2. {vmlalbq|vmlaltq}_lane{q}_f16_mf8_fpm. 3. {vmlallbbq|vmlallbtq|vmlalltbq|vmlallttq}_f32_mf8_fpm. 4. {vmlallbbq|vmlallbtq|vmlalltbq|vmlallttq}_lane{q}_f32_mf8_fpm. It introduces the fp8fma flag. gcc/ChangeLog: * config/aarch64/aarch64-builtins.cc (check_simd_lane_bounds): Add support for new unspecs. (aarch64_expand_pragma_builtins): Add support for new unspecs. * config/aarch64/aarch64-c.cc (aarch64_update_cpp_builtins): New flags. * config/aarch64/aarch64-option-extensions.def (AARCH64_OPT_EXTENSION): New flags. * config/aarch64/aarch64-simd-pragma-builtins.def (ENTRY_FMA_FPM): Macro to declare fma intrinsics. (REQUIRED_EXTENSIONS): Define to declare functions behind command line flags. * config/aarch64/aarch64-simd.md: (@aarch64_unspec == UNSPEC_VDOT2 - ? vector_to_index_mode_size / 2 - 1 - : vector_to_index_mode_size / 4 - 1; + int high; + switch (builtin_data->unspec) + { + case UNSPEC_VDOT2: + high = vector_to_index_mode_size / 2 - 1; + break; + case UNSPEC_VDOT4: + high = vector_to_index_mode_size / 4 - 1; + break; + case UNSPEC_FMLALB: + case UNSPEC_FMLALT: + case UNSPEC_FMLALLBB: + case UNSPEC_FMLALLBT: + case UNSPEC_FMLALLTB: + case UNSPEC_FMLALLTT: + high = vector_to_index_mode_size - 1; + break; + default: + gcc_unreachable (); + } require_immediate_range (location, index_arg, low, high); break; } @@ -3552,6 +3568,12 @@ aarch64_expand_pragma_builtin (tree exp, rtx target, case UNSPEC_VDOT2: case UNSPEC_VDOT4: +case UNSPEC_FMLALB: +case UNSPEC_FMLALT: +case UNSPEC_FMLALLBB: +case UNSPEC_FMLALLBT: +case UNSPEC_FMLALLTB: +case UNSPEC_FMLALLTT: if (builtin_data->signature == aarch64_builtin_signatures::ternary) icode = code_for_aarch64 (builtin_data->unspec, builtin_data->types[0].mode, diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc index ae1472e0fcf2..03f912cde077 100644 --- a/gcc/config/aarch64/aarch64-c.cc +++ b/gcc/config/aarch64/aarch64-c.cc @@ -264,6 +264,8 @@ aarch64_update_cpp_builtins (cpp_reader *pfile) aarch64_def_or_undef (TARGET_FP8DOT4, "__ARM_FEATURE_FP8DOT4", pfile); + aarch64_def_or_undef (TARGET_FP8FMA, "__ARM_FEATURE_FP8FMA", pfile); + aarch64_def_or_undef (TARGET_LS64, "__ARM_FEATURE_LS64", pfile); aarch64_def_or_undef (TARGET_RCPC, "__ARM_FEATURE_RCPC", pfile); diff --git a/gcc/config/aarch64/aarch64-option-extensions.def b/gcc/config/aarch64/aarch64-option-extensions.def index 44d2e18d46bd..8446d1bcd5dc 100644 --- a/gcc/config/aarch64/aarch64-option-extensions.def +++ b/gcc/config/aarch64/aarch64-option-extensions.def @@ -240,6 +240,8 @@ AARCH64_OPT_EXTENSION("fp8dot2", FP8DOT2, (SIMD), (), (), "fp8dot2") AARCH64_OPT_EXTENSION("fp8dot4", FP8DOT4, (SIMD), (), (), "fp8dot4") +AARCH64_OPT_EXTENSION("fp8fma", FP8FMA, (SIMD), (), (), "fp8fma") + AARCH64_OPT_EXTENSION("faminmax", FAMINMAX, (SIMD), (), (), "faminmax") #undef AARCH64_OPT_FMV_EXTENSION diff --git a/gcc/config/aarch64/aarch64-simd-pragma-builtins.def b/gcc/config/aarch64/aarch64-simd-pragma-builtins.def index 4a94a6613f08..c7857123ca03 100644 --- a/gcc/config/aarch64/aarch64-simd-pragma-builtins.def +++ b/gcc/config/aarch64/aarch64-simd-pragma-builtins.def @@ -48,6 +48,12 @@ ENTRY_TERNARY_FPM_LANE (vdotq_lane_##T##_mf8_fpm, T##q, T##q, f8q, f8, U) \ ENTRY_TERNARY_FPM_LANE (vdotq_laneq_##T##_mf8_fpm, T##q, T##q, f8q, f8q, U) +#undef ENTRY_FMA_FPM +#define ENTRY_FMA_FPM(N, T, U) \ + ENTRY_TERNARY_FPM (N##_##T##_mf8_fpm, T##q, T##q, f8q, f8q, U) \ + ENTRY_TERNARY_FPM_LANE (N##_lane_##T##_mf8_fpm, T##q, T##q, f8q, f8, U) \ + ENTRY_TERNARY_FPM_LANE (N##_laneq_##T##_mf8_fpm, T##q, T##q, f8q, f8q, U) + #undef ENTRY_VHSDF #define ENTRY_VHSDF(NAME, UNSPEC) \ ENTRY_BINARY (NAME##_f16, f16, f16, f16, UNSPEC) \ @@ -106,3 +112,13 @@ ENTRY_VDOT_FPM (f16, UNSPEC_VDOT2) #define REQUIRED_EXTENSIONS nonstreaming_only (AARCH64_FL_FP8DOT4) ENTRY_VDOT_FPM (f32, UNSPEC_VDOT4) #undef REQUIRED_EXTENSIONS + +// fp8 multiply-add +#define REQUIRED_EXTENSIONS nonstreaming_only (AARCH64_FL_FP8FMA) +ENTR
[gcc/devel/existing-fp8] aarch64: Refactor infrastructure for advsimd intrinsics
https://gcc.gnu.org/g:1b6b028e272228c54801d7e038ec0536f92b22bb commit 1b6b028e272228c54801d7e038ec0536f92b22bb Author: Vladimir Miloserdov Date: Fri Nov 1 12:35:59 2024 + aarch64: Refactor infrastructure for advsimd intrinsics This patch refactors the infrastructure for defining advsimd pragma intrinsics, adding support for more flexible type and signature handling in future SIMD extensions. A new simd_type structure is introduced, which allows for consistent mode and qualifier management across various advsimd operations. gcc/ChangeLog: * config/aarch64/aarch64-builtins.cc (ENTRY): Modify to include modes and qualifiers for simd_type structure. (ENTRY_VHSDF): Move to aarch64-builtins.cc to decouple. (struct simd_type): New structure for managing mode and qualifier combinations for SIMD types. (struct aarch64_pragma_builtins_data): Replace mode with simd_type to support multiple argument types for intrinsics. (aarch64_fntype): Modify to handle different shapes type. (aarch64_expand_pragma_builtin): Modify to handle different shapes type. * config/aarch64/aarch64-simd-pragma-builtins.def (ENTRY_BINARY): Move from aarch64-builtins.cc. (ENTRY_VHSDF): Move from aarch64-builtins.cc. (REQUIRED_EXTENSIONS): New macro. Diff: --- gcc/config/aarch64/aarch64-builtins.cc | 81 ++ .../aarch64/aarch64-simd-pragma-builtins.def | 15 ++-- 2 files changed, 77 insertions(+), 19 deletions(-) diff --git a/gcc/config/aarch64/aarch64-builtins.cc b/gcc/config/aarch64/aarch64-builtins.cc index 97bde7c15d3b..ad82c680c6a0 100644 --- a/gcc/config/aarch64/aarch64-builtins.cc +++ b/gcc/config/aarch64/aarch64-builtins.cc @@ -780,7 +780,7 @@ typedef struct AARCH64_SIMD_BUILTIN_##T##_##N##A, #undef ENTRY -#define ENTRY(N, S, M, U) \ +#define ENTRY(N, S, M0, M1, M2, M3, U) \ AARCH64_##N, enum aarch64_builtins @@ -1593,10 +1593,49 @@ enum class aarch64_builtin_signatures binary, }; +namespace { + +struct simd_type { + machine_mode mode; + aarch64_type_qualifiers qualifiers; +}; + +namespace simd_types { + + constexpr simd_type s8 { V8QImode, qualifier_none }; + constexpr simd_type u8 { V8QImode, qualifier_unsigned }; + constexpr simd_type s8q { V16QImode, qualifier_none }; + constexpr simd_type u8q { V16QImode, qualifier_unsigned }; + + constexpr simd_type s16 { V4HImode, qualifier_none }; + constexpr simd_type u16 { V4HImode, qualifier_unsigned }; + constexpr simd_type s16q { V8HImode, qualifier_none }; + constexpr simd_type u16q { V8HImode, qualifier_unsigned }; + + constexpr simd_type p8 { V8QImode, qualifier_poly }; + constexpr simd_type p8q { V16QImode, qualifier_poly }; + constexpr simd_type p16 { V4HImode, qualifier_poly }; + constexpr simd_type p16q { V8HImode, qualifier_poly }; + + constexpr simd_type f16 { V4HFmode, qualifier_none }; + constexpr simd_type f16q { V8HFmode, qualifier_none }; + constexpr simd_type f32 { V2SFmode, qualifier_none }; + constexpr simd_type f32q { V4SFmode, qualifier_none }; + constexpr simd_type f64q { V2DFmode, qualifier_none }; + + constexpr simd_type bf16 { V4BFmode, qualifier_none }; + constexpr simd_type bf16q { V8BFmode, qualifier_none }; + + constexpr simd_type none { VOIDmode, qualifier_none }; +} + +} + #undef ENTRY -#define ENTRY(N, S, M, U) \ - {#N, aarch64_builtin_signatures::S, E_##M##mode, U, \ - aarch64_required_extensions::REQUIRED_EXTENSIONS}, +#define ENTRY(N, S, T0, T1, T2, T3, U) \ + {#N, aarch64_builtin_signatures::S, simd_types::T0, simd_types::T1, \ +simd_types::T2, simd_types::T3, U, \ +aarch64_required_extensions::REQUIRED_EXTENSIONS}, /* Initialize pragma builtins. */ @@ -1604,7 +1643,7 @@ struct aarch64_pragma_builtins_data { const char *name; aarch64_builtin_signatures signature; - machine_mode mode; + simd_type types[4]; int unspec; aarch64_required_extensions required_extensions; }; @@ -1616,11 +1655,19 @@ static aarch64_pragma_builtins_data aarch64_pragma_builtins[] = { static tree aarch64_fntype (const aarch64_pragma_builtins_data &builtin_data) { - auto type = aarch64_simd_builtin_type (builtin_data.mode, qualifier_none); + tree type0, type1, type2; + switch (builtin_data.signature) { case aarch64_builtin_signatures::binary: - return build_function_type_list (type, type, type, NULL_TREE); + type0 = aarch64_simd_builtin_type (builtin_data.types[0].mode, + builtin_data.types[0].qualifiers); + type1 = aarch64_simd_builtin_type (builtin_data.types[1].mode, + builtin_data.types[1].qualifiers); + type2 = aarch64_simd_builtin_type (builtin_data.types[2].mode, + builtin_data.types[2].qualifiers); + return build_function_type_list (type0, type1, type2, NULL_TREE); + default
[gcc r15-7097] AArch64: Add LUTI ACLE for SVE2
https://gcc.gnu.org/g:eb0b551c5570d98dd7cf21fa1bd0240a0c9d875f commit r15-7097-geb0b551c5570d98dd7cf21fa1bd0240a0c9d875f Author: Vladimir Miloserdov Date: Fri May 31 16:26:11 2024 + AArch64: Add LUTI ACLE for SVE2 This patch introduces support for LUTI2/LUTI4 ACLE for SVE2. LUTI instructions are used for efficient table lookups with 2-bit or 4-bit indices. LUTI2 reads indexed 8-bit or 16-bit elements from the low 128 bits of the table vector using packed 2-bit indices, while LUTI4 can read from the low 128 or 256 bits of the table vector or from two table vectors using packed 4-bit indices. These instructions fill the destination vector by copying elements indexed by segments of the source vector, selected by the vector segment index. The changes include the addition of a new AArch64 option extension "lut", __ARM_FEATURE_LUT preprocessor macro, definitions for the new LUTI instruction shapes, and implementations of the svluti2 and svluti4 builtins. gcc/ChangeLog: * config/aarch64/aarch64-c.cc (aarch64_update_cpp_builtins): Add new flag TARGET_LUT. * config/aarch64/aarch64-sve-builtins-shapes.cc (struct luti_base): Shape for lut intrinsics. (SHAPE): Specializations for lut shapes for luti2 and luti4.. * config/aarch64/aarch64-sve-builtins-shapes.h: Declare lut intrinsics. * config/aarch64/aarch64-sve-builtins-sve2.cc (class svluti_lane_impl): Define expand for lut intrinsics. (FUNCTION): Define expand for lut intrinsics. * config/aarch64/aarch64-sve-builtins-sve2.def (REQUIRED_EXTENSIONS): Declare lut intrinsics behind lut flag. (svluti2_lane): Define intrinsic behind flag. (svluti4_lane): Define intrinsic behind flag. * config/aarch64/aarch64-sve-builtins-sve2.h: Declare lut intrinsics. * config/aarch64/aarch64-sve-builtins.cc (TYPES_bh_data): New type for byte and halfword. (bh_data): Type array for byte and halfword. (h_data): Type array for halfword. * config/aarch64/aarch64-sve2.md (@aarch64_sve_luti): Instruction patterns for lut intrinsics. * config/aarch64/iterators.md: Iterators and attributes for lut intrinsics. gcc/testsuite/ChangeLog: * gcc.target/aarch64/sve/acle/asm/test_sve_acle.h: New test macro. * lib/target-supports.exp: Add lut flag to the for loop. * gcc.target/aarch64/sve/acle/general-c/lut_1.c: New test. * gcc.target/aarch64/sve/acle/general-c/lut_2.c: New test. * gcc.target/aarch64/sve/acle/general-c/lut_3.c: New test. * gcc.target/aarch64/sve/acle/general-c/lut_4.c: New test. * gcc.target/aarch64/sve2/acle/asm/luti2_bf16.c: New test. * gcc.target/aarch64/sve2/acle/asm/luti2_f16.c: New test. * gcc.target/aarch64/sve2/acle/asm/luti2_s16.c: New test. * gcc.target/aarch64/sve2/acle/asm/luti2_s8.c: New test. * gcc.target/aarch64/sve2/acle/asm/luti2_u16.c: New test. * gcc.target/aarch64/sve2/acle/asm/luti2_u8.c: New test. * gcc.target/aarch64/sve2/acle/asm/luti4_bf16.c: New test. * gcc.target/aarch64/sve2/acle/asm/luti4_bf16_x2.c: New test. * gcc.target/aarch64/sve2/acle/asm/luti4_f16.c: New test. * gcc.target/aarch64/sve2/acle/asm/luti4_f16_x2.c: New test. * gcc.target/aarch64/sve2/acle/asm/luti4_s16.c: New test. * gcc.target/aarch64/sve2/acle/asm/luti4_s16_x2.c: New test. * gcc.target/aarch64/sve2/acle/asm/luti4_s8.c: New test. * gcc.target/aarch64/sve2/acle/asm/luti4_u16.c: New test. * gcc.target/aarch64/sve2/acle/asm/luti4_u16_x2.c: New test. * gcc.target/aarch64/sve2/acle/asm/luti4_u8.c: New test. Diff: --- gcc/config/aarch64/aarch64-c.cc| 2 + gcc/config/aarch64/aarch64-sve-builtins-shapes.cc | 47 gcc/config/aarch64/aarch64-sve-builtins-shapes.h | 2 + gcc/config/aarch64/aarch64-sve-builtins-sve2.cc| 17 ++ gcc/config/aarch64/aarch64-sve-builtins-sve2.def | 8 + gcc/config/aarch64/aarch64-sve-builtins-sve2.h | 2 + gcc/config/aarch64/aarch64-sve-builtins.cc | 8 +- gcc/config/aarch64/aarch64-sve2.md | 33 +++ gcc/config/aarch64/iterators.md| 7 + .../aarch64/sve/acle/asm/test_sve_acle.h | 16 ++ .../gcc.target/aarch64/sve/acle/general-c/lut_1.c | 34 +++ .../gcc.target/aarch64/sve/acle/general-c/lut_2.c | 11 + .../gcc.target/aarch64/sve/acle/general-c/lut_3.c | 92 .../gcc.target/aarch64/sve/acle/general-c/lut_4.c | 262 + .../gcc.target/aarch64/sve2/acle/asm/luti2_bf
[gcc r15-7174] Fix command flags for SVE2 faminmax
https://gcc.gnu.org/g:8bdf10fc2e9ac16a296f76a442c068216469b3a3 commit r15-7174-g8bdf10fc2e9ac16a296f76a442c068216469b3a3 Author: Saurabh Jha Date: Tue Jan 21 15:59:39 2025 + Fix command flags for SVE2 faminmax Earlier, we were gating SVE2 faminmax behind sve+faminmax. This was incorrect and this patch changes it so that it is gated behind sve2+faminmax. gcc/ChangeLog: * config/aarch64/aarch64-sve2.md: (*aarch64_pred_faminmax_fused): Fix to use the correct flags. * config/aarch64/aarch64.h (TARGET_SVE_FAMINMAX): Remove. * config/aarch64/iterators.md: Fix iterators so that famax and famin use correct flags. gcc/testsuite/ChangeLog: * gcc.target/aarch64/sve/faminmax_1.c: Fix test to use the correct flags. * gcc.target/aarch64/sve/faminmax_2.c: Fix test to use the correct flags. * gcc.target/aarch64/sve/faminmax_3.c: New test. Diff: --- gcc/config/aarch64/aarch64-sve2.md| 2 +- gcc/config/aarch64/aarch64.h | 1 - gcc/config/aarch64/iterators.md | 8 gcc/testsuite/gcc.target/aarch64/sve/faminmax_1.c | 2 +- gcc/testsuite/gcc.target/aarch64/sve/faminmax_2.c | 2 +- gcc/testsuite/gcc.target/aarch64/sve/faminmax_3.c | 11 +++ 6 files changed, 18 insertions(+), 8 deletions(-) diff --git a/gcc/config/aarch64/aarch64-sve2.md b/gcc/config/aarch64/aarch64-sve2.md index 60bc03b2650c..3e08e092cd04 100644 --- a/gcc/config/aarch64/aarch64-sve2.md +++ b/gcc/config/aarch64/aarch64-sve2.md @@ -2950,7 +2950,7 @@ (match_operand:SVE_FULL_F 3 "register_operand")] UNSPEC_COND_FABS)] SVE_COND_SMAXMIN))] - "TARGET_SVE_FAMINMAX" + "TARGET_FAMINMAX && TARGET_SVE2_OR_SME2" {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ] [ w, Upl , %0 , w ; * ] \t%0., %1/m, %0., %3. [ ?&w , Upl , w , w ; yes] movprfx\t%0, %2\;\t%0., %1/m, %0., %3. diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h index 5cbf442130bc..1a19b27fd934 100644 --- a/gcc/config/aarch64/aarch64.h +++ b/gcc/config/aarch64/aarch64.h @@ -472,7 +472,6 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE ATTRIBUTE_UNUSED /* Floating Point Absolute Maximum/Minimum extension instructions are enabled through +faminmax. */ #define TARGET_FAMINMAX AARCH64_HAVE_ISA (FAMINMAX) -#define TARGET_SVE_FAMINMAX (TARGET_SVE && TARGET_FAMINMAX) /* Lookup table (LUTI) extension instructions are enabled through +lut. */ #define TARGET_LUT AARCH64_HAVE_ISA (LUT) diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index e843c66cf268..9fbd74939882 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -3340,8 +3340,8 @@ (define_int_iterator SVE_COND_FP_BINARY [UNSPEC_COND_FADD - (UNSPEC_COND_FAMAX "TARGET_SVE_FAMINMAX") - (UNSPEC_COND_FAMIN "TARGET_SVE_FAMINMAX") + (UNSPEC_COND_FAMAX "TARGET_FAMINMAX && TARGET_SVE2_OR_SME2") + (UNSPEC_COND_FAMIN "TARGET_FAMINMAX && TARGET_SVE2_OR_SME2") UNSPEC_COND_FDIV UNSPEC_COND_FMAX UNSPEC_COND_FMAXNM @@ -3381,8 +3381,8 @@ UNSPEC_COND_SMIN]) (define_int_iterator SVE_COND_FP_BINARY_REG - [(UNSPEC_COND_FAMAX "TARGET_SVE_FAMINMAX") - (UNSPEC_COND_FAMIN "TARGET_SVE_FAMINMAX") + [(UNSPEC_COND_FAMAX "TARGET_FAMINMAX && TARGET_SVE2_OR_SME2") + (UNSPEC_COND_FAMIN "TARGET_FAMINMAX && TARGET_SVE2_OR_SME2") UNSPEC_COND_FDIV UNSPEC_COND_FMULX]) diff --git a/gcc/testsuite/gcc.target/aarch64/sve/faminmax_1.c b/gcc/testsuite/gcc.target/aarch64/sve/faminmax_1.c index 3b65ccea0656..154dbd9de846 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/faminmax_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/faminmax_1.c @@ -3,7 +3,7 @@ #include "arm_sve.h" -#pragma GCC target "+sve+faminmax" +#pragma GCC target "+sve2+faminmax" #define TEST_FAMAX(TYPE) \ void fn_famax_##TYPE (TYPE * restrict a, \ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/faminmax_2.c b/gcc/testsuite/gcc.target/aarch64/sve/faminmax_2.c index d80f6eca8f82..44ecef1e0878 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/faminmax_2.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/faminmax_2.c @@ -3,7 +3,7 @@ #include "arm_sve.h" -#pragma GCC target "+sve+faminmax" +#pragma GCC target "+sve2+faminmax" #define TEST_WITH_SVMAX(TYPE) \ TYPE fn_fmax_##TYPE (TYPE x, TYPE y) { \ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/faminmax_3.c b/gcc/testsuite/gcc.target/aarch64/sve/faminmax_3.c new file mode 100644 index ..2b01fa48b8e9 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/faminmax_3.c @@ -0,0 +1,11 @@ +/* {