[gcc(refs/vendors/ARM/heads/gcs-v2)] aarch64: Add GCS support for nonlocal stack save

2024-10-18 Thread Andre Simoes Dias Vieira via Gcc-cvs
https://gcc.gnu.org/g:966e78f319f43356a42a689944ec53f74deb48dc

commit 966e78f319f43356a42a689944ec53f74deb48dc
Author: Szabolcs Nagy 
Date:   Fri Apr 14 18:23:52 2023 +0100

aarch64: Add GCS support for nonlocal stack save

Nonlocal stack save and restore has to also save and restore the GCS
pointer. This is used in __builtin_setjmp/longjmp and nonlocal goto.

The GCS specific code is only emitted if GCS branch-protection is
enabled and the code always checks at runtime if GCS is enabled.

The new -mbranch-protection=gcs and old -mbranch-protection=none code
are ABI compatible: jmpbuf for __builtin_setjmp has space for 5
pointers, the layout is

  old layout: fp, pc, sp, unused, unused
  new layout: fp, pc, sp, gcsp, unused

Note: the ILP32 code generation is wrong as it saves the pointers with
Pmode (i.e. 8 bytes per pointer), but the user supplied buffer size is
for 5 pointers (4 bytes per pointer), this is not fixed.

The nonlocal goto has no ABI compatibility issues as the goto and its
destination are in the same translation unit.

gcc/ChangeLog:

* config/aarch64/aarch64.h (STACK_SAVEAREA_MODE): Make space for 
gcs.
* config/aarch64/aarch64.md (save_stack_nonlocal): New.
(restore_stack_nonlocal): New.

Diff:
---
 gcc/config/aarch64/aarch64.h  |  7 
 gcc/config/aarch64/aarch64.md | 82 +++
 2 files changed, 89 insertions(+)

diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 030cffb17606..c8f764ea550d 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -1296,6 +1296,13 @@ typedef struct
 #define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \
   ((VALUE) = GET_MODE_UNIT_BITSIZE (MODE), 2)
 
+/* Have space for both SP and GCSPR in the NONLOCAL case in
+   emit_stack_save as well as in __builtin_setjmp, __builtin_longjmp
+   and __builtin_nonlocal_goto.
+   Note: On ILP32 the documented buf size is not enough PR84150.  */
+#define STACK_SAVEAREA_MODE(LEVEL) \
+  ((LEVEL) == SAVE_NONLOCAL ? TImode : Pmode)
+
 #define INCOMING_RETURN_ADDR_RTX gen_rtx_REG (Pmode, LR_REGNUM)
 
 #define RETURN_ADDR_RTX aarch64_return_addr
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index e4e11e35b5bd..6e1646387d89 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -1200,6 +1200,88 @@
  (const_int 1)))]
 )
 
+(define_expand "save_stack_nonlocal"
+  [(set (match_operand 0 "memory_operand")
+(match_operand 1 "register_operand"))]
+  ""
+{
+  rtx stack_slot = adjust_address (operands[0], Pmode, 0);
+  emit_move_insn (stack_slot, operands[1]);
+
+  if (aarch64_gcs_enabled ())
+{
+  /* Save GCS with code like
+   mov x16, 1
+   chkfeat x16
+   tbnzx16, 0, .L_done
+   mrs tmp, gcspr_el0
+   str tmp, [%0, 8]
+   .L_done:  */
+
+  rtx done_label = gen_label_rtx ();
+  rtx r16 = gen_rtx_REG (DImode, R16_REGNUM);
+  emit_move_insn (r16, const1_rtx);
+  emit_insn (gen_aarch64_chkfeat ());
+  emit_insn (gen_tbranch_neqi3 (r16, const0_rtx, done_label));
+  rtx gcs_slot = adjust_address (operands[0], Pmode, GET_MODE_SIZE 
(Pmode));
+  rtx gcs = force_reg (Pmode, const0_rtx);
+  emit_insn (gen_aarch64_load_gcspr (gcs));
+  emit_move_insn (gcs_slot, gcs);
+  emit_label (done_label);
+}
+  DONE;
+})
+
+(define_expand "restore_stack_nonlocal"
+  [(set (match_operand 0 "register_operand" "")
+   (match_operand 1 "memory_operand" ""))]
+  ""
+{
+  rtx stack_slot = adjust_address (operands[1], Pmode, 0);
+  emit_move_insn (operands[0], stack_slot);
+
+  if (aarch64_gcs_enabled ())
+{
+  /* Restore GCS with code like
+   mov x16, 1
+   chkfeat x16
+   tbnzx16, 0, .L_done
+   ldr tmp1, [%1, 8]
+   mrs tmp2, gcspr_el0
+   substmp2, tmp1, tmp2
+   b.eq.L_done
+   .L_loop:
+   gcspopm
+   substmp2, tmp2, 8
+   b.ne.L_loop
+   .L_done:  */
+
+  rtx loop_label = gen_label_rtx ();
+  rtx done_label = gen_label_rtx ();
+  rtx r16 = gen_rtx_REG (DImode, R16_REGNUM);
+  emit_move_insn (r16, const1_rtx);
+  emit_insn (gen_aarch64_chkfeat ());
+  emit_insn (gen_tbranch_neqi3 (r16, const0_rtx, done_label));
+  rtx gcs_slot = adjust_address (operands[1], Pmode, GET_MODE_SIZE 
(Pmode));
+  rtx gcs_old = force_reg (Pmode, const0_rtx);
+  emit_move_insn (gcs_old, gcs_slot);
+  rtx gcs_now = force_reg (Pmode, const0_rtx);
+  emit_insn (gen_aarch64_load_gcspr (gcs_now));
+  emit_insn (gen_subdi3_compare1 (gcs_now, gcs_old, gcs_now));
+  rtx cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM);
+   

[gcc r15-4483] hppa: Add LRA support

2024-10-18 Thread John David Anglin via Gcc-cvs
https://gcc.gnu.org/g:44a81aaf73f795e6992cbfb98ec48480e5ca94ec

commit r15-4483-g44a81aaf73f795e6992cbfb98ec48480e5ca94ec
Author: John David Anglin 
Date:   Fri Oct 18 11:28:23 2024 -0400

hppa: Add LRA support

LRA is not enabled as default since there are some new test fails
remaining to resolve.

2024-10-18  John David Anglin  

gcc/ChangeLog:

PR target/113933
* config/pa/pa.cc (pa_use_lra_p): Declare.
(TARGET_LRA_P): Change define to pa_use_lra_p.
(pa_use_lra_p): New function.
(legitimize_pic_address): Also check lra_in_progress.
(pa_emit_move_sequence): Likewise.
(pa_legitimate_constant_p): Likewise.
(pa_legitimate_address_p): Likewise.
(pa_secondary_reload): For floating-point loads and stores,
return NO_REGS for REG and SUBREG operands.  Return
GENERAL_REGS for some shift register spills.
* config/pa/pa.opt: Add mlra option.
* config/pa/predicates.md (integer_store_memory_operand):
Also check lra_in_progress.
(floating_point_store_memory_operand): Likewise.
(reg_before_reload_operand): Likewise.

Diff:
---
 gcc/config/pa/pa.cc | 86 -
 gcc/config/pa/pa.opt|  4 +++
 gcc/config/pa/predicates.md | 14 
 3 files changed, 66 insertions(+), 38 deletions(-)

diff --git a/gcc/config/pa/pa.cc b/gcc/config/pa/pa.cc
index 84aa4f1b1f2a..62f8764b7ca5 100644
--- a/gcc/config/pa/pa.cc
+++ b/gcc/config/pa/pa.cc
@@ -209,6 +209,7 @@ static bool pa_can_change_mode_class (machine_mode, 
machine_mode, reg_class_t);
 static HOST_WIDE_INT pa_starting_frame_offset (void);
 static section* pa_elf_select_rtx_section(machine_mode, rtx, unsigned 
HOST_WIDE_INT) ATTRIBUTE_UNUSED;
 static void pa_atomic_assign_expand_fenv (tree *, tree *, tree *);
+static bool pa_use_lra_p (void);
 
 /* The following extra sections are only used for SOM.  */
 static GTY(()) section *som_readonly_data_section;
@@ -412,7 +413,7 @@ static size_t n_deferred_plabels = 0;
 #define TARGET_LEGITIMATE_ADDRESS_P pa_legitimate_address_p
 
 #undef TARGET_LRA_P
-#define TARGET_LRA_P hook_bool_void_false
+#define TARGET_LRA_P pa_use_lra_p
 
 #undef TARGET_HARD_REGNO_NREGS
 #define TARGET_HARD_REGNO_NREGS pa_hard_regno_nregs
@@ -973,7 +974,7 @@ legitimize_pic_address (rtx orig, machine_mode mode, rtx 
reg)
 
   /* During and after reload, we need to generate a REG_LABEL_OPERAND note
 and update LABEL_NUSES because this is not done automatically.  */
-  if (reload_in_progress || reload_completed)
+  if (lra_in_progress || reload_in_progress || reload_completed)
{
  /* Extract LABEL_REF.  */
  if (GET_CODE (orig) == CONST)
@@ -998,7 +999,7 @@ legitimize_pic_address (rtx orig, machine_mode mode, rtx 
reg)
   /* Before reload, allocate a temporary register for the intermediate
 result.  This allows the sequence to be deleted when the final
 result is unused and the insns are trivially dead.  */
-  tmp_reg = ((reload_in_progress || reload_completed)
+  tmp_reg = ((lra_in_progress || reload_in_progress || reload_completed)
 ? reg : gen_reg_rtx (Pmode));
 
   if (function_label_operand (orig, VOIDmode))
@@ -1959,11 +1960,13 @@ pa_emit_move_sequence (rtx *operands, machine_mode 
mode, rtx scratch_reg)
   copy_to_mode_reg (Pmode, XEXP (operand1, 0)));
 
   if (scratch_reg
-  && reload_in_progress && GET_CODE (operand0) == REG
+  && reload_in_progress
+  && GET_CODE (operand0) == REG
   && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
 operand0 = reg_equiv_mem (REGNO (operand0));
   else if (scratch_reg
-  && reload_in_progress && GET_CODE (operand0) == SUBREG
+  && reload_in_progress
+  && GET_CODE (operand0) == SUBREG
   && GET_CODE (SUBREG_REG (operand0)) == REG
   && REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER)
 {
@@ -1976,11 +1979,13 @@ pa_emit_move_sequence (rtx *operands, machine_mode 
mode, rtx scratch_reg)
 }
 
   if (scratch_reg
-  && reload_in_progress && GET_CODE (operand1) == REG
+  && reload_in_progress
+  && GET_CODE (operand1) == REG
   && REGNO (operand1) >= FIRST_PSEUDO_REGISTER)
 operand1 = reg_equiv_mem (REGNO (operand1));
   else if (scratch_reg
-  && reload_in_progress && GET_CODE (operand1) == SUBREG
+  && reload_in_progress
+  && GET_CODE (operand1) == SUBREG
   && GET_CODE (SUBREG_REG (operand1)) == REG
   && REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER)
 {
@@ -1992,12 +1997,16 @@ pa_emit_move_sequence (rtx *operands, machine_mode 
mode, rtx scratch_reg)
   operand1 = alter_subreg (&temp, true);
 }
 
-  if (scratch_reg && reload_in_progress && GET_CODE (operand0) == MEM
+  if (scratc

[gcc r15-4479] i386: Fix the order of operands in andn3 [PR117192]

2024-10-18 Thread Uros Bizjak via Gcc-cvs
https://gcc.gnu.org/g:3a12ac403251e0a1542609d7a4d8a464a5e1dc86

commit r15-4479-g3a12ac403251e0a1542609d7a4d8a464a5e1dc86
Author: Uros Bizjak 
Date:   Fri Oct 18 16:04:12 2024 +0200

i386: Fix the order of operands in andn3 [PR117192]

Fix the order of operands in andn3 expander to comply
with the specification, where bitwise-complement applies to operand 2.

PR target/117192

gcc/ChangeLog:

* config/i386/mmx.md (andn3): Swap operand
indexes 1 and 2 to comply with andn specification.

gcc/testsuite/ChangeLog:

* gcc.target/i386/pr117192.c: New test.

Diff:
---
 gcc/config/i386/mmx.md   |  6 +++---
 gcc/testsuite/gcc.target/i386/pr117192.c | 16 
 2 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index ef4ed8b501a1..506f4cab6a81 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -4470,9 +4470,9 @@
 (define_expand "andn3"
   [(set (match_operand:MMXMODEI 0 "register_operand")
 (and:MMXMODEI
-  (not:MMXMODEI (match_operand:MMXMODEI 1 "register_operand"))
-  (match_operand:MMXMODEI 2 "register_operand")))]
-  "TARGET_SSE2")
+  (not:MMXMODEI (match_operand:MMXMODEI 2 "register_operand"))
+  (match_operand:MMXMODEI 1 "register_operand")))]
+  "TARGET_MMX_WITH_SSE")
 
 (define_insn "mmx_andnot3"
   [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,x,v")
diff --git a/gcc/testsuite/gcc.target/i386/pr117192.c 
b/gcc/testsuite/gcc.target/i386/pr117192.c
new file mode 100644
index ..8480c72dc0e5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr117192.c
@@ -0,0 +1,16 @@
+/* PR target/117192 */
+/* { dg-do run } */
+/* { dg-options "-O3 -fno-unswitch-loops" } */
+
+int a, b, c, d;
+int main() {
+  int e[6];
+  for (d = 0; d < 6; d++)
+if (!c)
+  e[d] = 0;
+  for (; b < 6; b++)
+a = e[b];
+  if (a != 0)
+__builtin_abort();
+  return 0;
+}


[gcc r15-4467] [2/n] remove no-vfa-*.c special-casing of gcc.dg/vect/ files

2024-10-18 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:d3d41ec6092ac0d2247d7224957a5ec3a68177e7

commit r15-4467-gd3d41ec6092ac0d2247d7224957a5ec3a68177e7
Author: Richard Biener 
Date:   Fri Oct 18 13:24:51 2024 +0200

[2/n] remove no-vfa-*.c special-casing of gcc.dg/vect/ files

The following makes --param vect-max-version-for-alias-checks=0
explicit.

* gcc.dg/vect/vect.exp: Remove special-casing of tests
named no-vfa-*
* gcc.dg/vect/no-vfa-pr29145.c: Add dg-additional-options
--param vect-max-version-for-alias-checks=0.
* gcc.dg/vect/no-vfa-vect-101.c: Likewise.
* gcc.dg/vect/no-vfa-vect-102.c: Likewise.
* gcc.dg/vect/no-vfa-vect-102a.c: Likewise.
* gcc.dg/vect/no-vfa-vect-37.c: Likewise.
* gcc.dg/vect/no-vfa-vect-43.c: Likewise.
* gcc.dg/vect/no-vfa-vect-45.c: Likewise.
* gcc.dg/vect/no-vfa-vect-49.c: Likewise.
* gcc.dg/vect/no-vfa-vect-51.c: Likewise.
* gcc.dg/vect/no-vfa-vect-53.c: Likewise.
* gcc.dg/vect/no-vfa-vect-57.c: Likewise.
* gcc.dg/vect/no-vfa-vect-61.c: Likewise.
* gcc.dg/vect/no-vfa-vect-79.c: Likewise.
* gcc.dg/vect/no-vfa-vect-depend-1.c: Likewise.
* gcc.dg/vect/no-vfa-vect-depend-2.c: Likewise.
* gcc.dg/vect/no-vfa-vect-depend-3.c: Likewise.
* gcc.dg/vect/no-vfa-vect-dv-2.c: Likewise.

Diff:
---
 gcc/testsuite/gcc.dg/vect/no-vfa-pr29145.c   |  2 +-
 gcc/testsuite/gcc.dg/vect/no-vfa-vect-101.c  |  1 +
 gcc/testsuite/gcc.dg/vect/no-vfa-vect-102.c  |  1 +
 gcc/testsuite/gcc.dg/vect/no-vfa-vect-102a.c |  1 +
 gcc/testsuite/gcc.dg/vect/no-vfa-vect-37.c   |  1 +
 gcc/testsuite/gcc.dg/vect/no-vfa-vect-43.c   |  1 +
 gcc/testsuite/gcc.dg/vect/no-vfa-vect-45.c   |  1 +
 gcc/testsuite/gcc.dg/vect/no-vfa-vect-49.c   |  1 +
 gcc/testsuite/gcc.dg/vect/no-vfa-vect-51.c   |  1 +
 gcc/testsuite/gcc.dg/vect/no-vfa-vect-53.c   |  1 +
 gcc/testsuite/gcc.dg/vect/no-vfa-vect-57.c   |  1 +
 gcc/testsuite/gcc.dg/vect/no-vfa-vect-61.c   |  1 +
 gcc/testsuite/gcc.dg/vect/no-vfa-vect-79.c   |  1 +
 gcc/testsuite/gcc.dg/vect/no-vfa-vect-depend-1.c |  1 +
 gcc/testsuite/gcc.dg/vect/no-vfa-vect-depend-2.c |  1 +
 gcc/testsuite/gcc.dg/vect/no-vfa-vect-depend-3.c |  1 +
 gcc/testsuite/gcc.dg/vect/no-vfa-vect-dv-2.c |  2 +-
 gcc/testsuite/gcc.dg/vect/vect.exp   | 10 +++---
 18 files changed, 20 insertions(+), 9 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/vect/no-vfa-pr29145.c 
b/gcc/testsuite/gcc.dg/vect/no-vfa-pr29145.c
index 45cca1d1991c..cb8c72bdea3a 100644
--- a/gcc/testsuite/gcc.dg/vect/no-vfa-pr29145.c
+++ b/gcc/testsuite/gcc.dg/vect/no-vfa-pr29145.c
@@ -1,5 +1,5 @@
 /* { dg-require-effective-target vect_int } */
-/* { dg-additional-options "-fno-ipa-icf" } */
+/* { dg-additional-options "--param vect-max-version-for-alias-checks=0 
-fno-ipa-icf" } */
 
 #include 
 #include "tree-vect.h"
diff --git a/gcc/testsuite/gcc.dg/vect/no-vfa-vect-101.c 
b/gcc/testsuite/gcc.dg/vect/no-vfa-vect-101.c
index 73b92177dabf..4b2b0f60b4c0 100644
--- a/gcc/testsuite/gcc.dg/vect/no-vfa-vect-101.c
+++ b/gcc/testsuite/gcc.dg/vect/no-vfa-vect-101.c
@@ -1,4 +1,5 @@
 /* { dg-require-effective-target vect_int } */
+/* { dg-additional-options "--param vect-max-version-for-alias-checks=0" } */
 
 #include 
 #include 
diff --git a/gcc/testsuite/gcc.dg/vect/no-vfa-vect-102.c 
b/gcc/testsuite/gcc.dg/vect/no-vfa-vect-102.c
index 9a3fdab128a3..26b9cd1c4276 100644
--- a/gcc/testsuite/gcc.dg/vect/no-vfa-vect-102.c
+++ b/gcc/testsuite/gcc.dg/vect/no-vfa-vect-102.c
@@ -1,4 +1,5 @@
 /* { dg-require-effective-target vect_int } */
+/* { dg-additional-options "--param vect-max-version-for-alias-checks=0" } */
 
 #include 
 #include 
diff --git a/gcc/testsuite/gcc.dg/vect/no-vfa-vect-102a.c 
b/gcc/testsuite/gcc.dg/vect/no-vfa-vect-102a.c
index 439347c3bb10..5b9905a04ee0 100644
--- a/gcc/testsuite/gcc.dg/vect/no-vfa-vect-102a.c
+++ b/gcc/testsuite/gcc.dg/vect/no-vfa-vect-102a.c
@@ -1,4 +1,5 @@
 /* { dg-require-effective-target vect_int } */
+/* { dg-additional-options "--param vect-max-version-for-alias-checks=0" } */
 
 #include 
 #include 
diff --git a/gcc/testsuite/gcc.dg/vect/no-vfa-vect-37.c 
b/gcc/testsuite/gcc.dg/vect/no-vfa-vect-37.c
index f59eb69d99fb..347af57b7c69 100644
--- a/gcc/testsuite/gcc.dg/vect/no-vfa-vect-37.c
+++ b/gcc/testsuite/gcc.dg/vect/no-vfa-vect-37.c
@@ -1,4 +1,5 @@
 /* { dg-require-effective-target vect_int } */
+/* { dg-additional-options "--param vect-max-version-for-alias-checks=0" } */
 
 #include 
 #include "tree-vect.h"
diff --git a/gcc/testsuite/gcc.dg/vect/no-vfa-vect-43.c 
b/gcc/testsuite/gcc.dg/vect/no-vfa-vect-43.c
index 6b4542f5948b..d06079e3d72b 100644
--- a/gcc/testsuite/gcc.dg/vect/no-vfa-vect-43.c
+++ b/gcc/testsuite/gcc.dg/vect/no-vfa-vect-43.c
@@ -1,4 +1,5 @@
 /* { dg-require-effective-target vect_float } */
+

[gcc r15-4480] [PATCH 1/7] RISC-V: Fix indentation in riscv_vector::expand_block_move [NFC]

2024-10-18 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:f244492ec258d84ab253bd58ad57f31c65a2312d

commit r15-4480-gf244492ec258d84ab253bd58ad57f31c65a2312d
Author: Craig Blackmore 
Date:   Fri Oct 18 09:01:35 2024 -0600

[PATCH 1/7] RISC-V: Fix indentation in riscv_vector::expand_block_move [NFC]

gcc/ChangeLog:

* config/riscv/riscv-string.cc (expand_block_move): Fix
indentation.

Diff:
---
 gcc/config/riscv/riscv-string.cc | 32 
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/gcc/config/riscv/riscv-string.cc b/gcc/config/riscv/riscv-string.cc
index 4bb8bcec4a50..0c5ffd7d861e 100644
--- a/gcc/config/riscv/riscv-string.cc
+++ b/gcc/config/riscv/riscv-string.cc
@@ -1086,22 +1086,22 @@ expand_block_move (rtx dst_in, rtx src_in, rtx 
length_in)
 {
   HOST_WIDE_INT length = INTVAL (length_in);
 
-/* By using LMUL=8, we can copy as many bytes in one go as there
-   are bits in a vector register.  If the entire block thus fits,
-   we don't need a loop.  */
-if (length <= TARGET_MIN_VLEN)
-  {
-   need_loop = false;
-
-   /* If a single scalar load / store pair can do the job, leave it
-  to the scalar code to do that.  */
-   /* ??? If fast unaligned access is supported, the scalar code could
-  use suitably sized scalars irrespective of alignment.  If that
-  gets fixed, we have to adjust the test here.  */
-
-   if (pow2p_hwi (length) && length <= potential_ew)
- return false;
-  }
+  /* By using LMUL=8, we can copy as many bytes in one go as there
+are bits in a vector register.  If the entire block thus fits,
+we don't need a loop.  */
+  if (length <= TARGET_MIN_VLEN)
+   {
+ need_loop = false;
+
+ /* If a single scalar load / store pair can do the job, leave it
+to the scalar code to do that.  */
+ /* ??? If fast unaligned access is supported, the scalar code could
+use suitably sized scalars irrespective of alignment.  If that
+gets fixed, we have to adjust the test here.  */
+
+ if (pow2p_hwi (length) && length <= potential_ew)
+   return false;
+   }
 
   /* Find the vector mode to use.  Using the largest possible element
 size is likely to give smaller constants, and thus potentially


[gcc r15-4471] SVE intrinsics: Add fold_active_lanes_to method to refactor svmul and svdiv.

2024-10-18 Thread Jennifer Schmitz via Gcc-cvs
https://gcc.gnu.org/g:e69c2e212011f2bfa6f8c3748d902690b7a3639a

commit r15-4471-ge69c2e212011f2bfa6f8c3748d902690b7a3639a
Author: Jennifer Schmitz 
Date:   Fri Sep 27 08:02:53 2024 -0700

SVE intrinsics: Add fold_active_lanes_to method to refactor svmul and svdiv.

As suggested in
https://gcc.gnu.org/pipermail/gcc-patches/2024-September/663275.html,
this patch adds the method gimple_folder::fold_active_lanes_to (tree X).
This method folds active lanes to X and sets inactive lanes according to
the predication, returning a new gimple statement. That makes folding of
SVE intrinsics easier and reduces code duplication in the
svxxx_impl::fold implementations.
Using this new method, svdiv_impl::fold and svmul_impl::fold were 
refactored.
Additionally, the method was used for two optimizations:
1) Fold svdiv to the dividend, if the divisor is all ones and
2) for svmul, if one of the operands is all ones, fold to the other operand.
Both optimizations were previously applied to _x and _m predication on
the RTL level, but not for _z, where svdiv/svmul were still being used.
For both optimization, codegen was improved by this patch, for example by
skipping sel instructions with all-same operands and replacing sel
instructions by mov instructions.

The patch was bootstrapped and regtested on aarch64-linux-gnu, no 
regression.
OK for mainline?

Signed-off-by: Jennifer Schmitz 

gcc/
* config/aarch64/aarch64-sve-builtins-base.cc (svdiv_impl::fold):
Refactor using fold_active_lanes_to and fold to dividend, is the
divisor is all ones.
(svmul_impl::fold): Refactor using fold_active_lanes_to and fold
to the other operand, if one of the operands is all ones.
* config/aarch64/aarch64-sve-builtins.h: Declare
gimple_folder::fold_active_lanes_to (tree).
* config/aarch64/aarch64-sve-builtins.cc
(gimple_folder::fold_actives_lanes_to): Add new method to fold
actives lanes to given argument and setting inactives lanes
according to the predication.

gcc/testsuite/
* gcc.target/aarch64/sve/acle/asm/div_s32.c: Adjust expected 
outcome.
* gcc.target/aarch64/sve/acle/asm/div_s64.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/div_u32.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/div_u64.c: Likewise.
* gcc.target/aarch64/sve/fold_div_zero.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/mul_s16.c: New test.
* gcc.target/aarch64/sve/acle/asm/mul_s32.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/mul_s64.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/mul_s8.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/mul_u16.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/mul_u32.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/mul_u64.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/mul_u8.c: Likewise.
* gcc.target/aarch64/sve/mul_const_run.c: Likewise.

Diff:
---
 gcc/config/aarch64/aarch64-sve-builtins-base.cc| 39 +---
 gcc/config/aarch64/aarch64-sve-builtins.cc | 27 ++
 gcc/config/aarch64/aarch64-sve-builtins.h  |  1 +
 .../gcc.target/aarch64/sve/acle/asm/div_s32.c  | 13 +++
 .../gcc.target/aarch64/sve/acle/asm/div_s64.c  | 13 +++
 .../gcc.target/aarch64/sve/acle/asm/div_u32.c  | 13 +++
 .../gcc.target/aarch64/sve/acle/asm/div_u64.c  | 13 +++
 .../gcc.target/aarch64/sve/acle/asm/mul_s16.c  | 43 --
 .../gcc.target/aarch64/sve/acle/asm/mul_s32.c  | 43 --
 .../gcc.target/aarch64/sve/acle/asm/mul_s64.c  | 43 --
 .../gcc.target/aarch64/sve/acle/asm/mul_s8.c   | 43 --
 .../gcc.target/aarch64/sve/acle/asm/mul_u16.c  | 43 --
 .../gcc.target/aarch64/sve/acle/asm/mul_u32.c  | 43 --
 .../gcc.target/aarch64/sve/acle/asm/mul_u64.c  | 43 --
 .../gcc.target/aarch64/sve/acle/asm/mul_u8.c   | 43 --
 .../gcc.target/aarch64/sve/fold_div_zero.c | 12 ++
 .../gcc.target/aarch64/sve/mul_const_run.c |  6 +++
 17 files changed, 387 insertions(+), 94 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc 
b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
index 1c17149e1f07..70bd83005d7c 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
@@ -758,18 +758,15 @@ public:
 if (auto *res = f.fold_const_binary (TRUNC_DIV_EXPR))
   return res;
 
-/* If the dividend is all zeros, fold to zero vector.  */
+/* If the divisor is all ones, fold to dividend.  */
 tree op1 = gimple_call_arg (f.call, 1);
-i

[gcc r15-4490] c: Fix -std=gnu23 -Wtraditional for () in function definitions

2024-10-18 Thread Joseph Myers via Gcc-cvs
https://gcc.gnu.org/g:d277ded292d7e9eeaa0b8bce6782c4cd6b32d2c0

commit r15-4490-gd277ded292d7e9eeaa0b8bce6782c4cd6b32d2c0
Author: Joseph Myers 
Date:   Sat Oct 19 00:20:15 2024 +

c: Fix -std=gnu23 -Wtraditional for () in function definitions

We don't yet have clear agreement on removing -Wtraditional (although
it seems there is little to no use for most of the warnings therein),
so fix the bug in its interaction with -std=gnu23 to continue progress
on making -std=gnu23 the default while -Wtraditional remains under
discussion.

The warning for ISO C function definitions with -Wtraditional properly
covers (void), but also wrongly warned for () in C23 mode as that has
the same semantics as (void) in that case.  Keep track in c_arg_info
of when () was converted to (void) for C23 so that -Wtraditional can
avoid warning in that case (with an appropriate comment on the
definition of the new field to make clear it can be removed along with
-Wtraditional).

Bootstrapped with no regressions for x86_64-pc-linux-gnu.

gcc/c/
* c-tree.h (c_arg_info): Add c23_empty_parens.
* c-decl.cc (grokparms): Set c23_empty_parens.
(build_arg_info): Clear c23_empty_parens.
(store_parm_decls_newstyle): Do not give -Wtraditional warning for
ISO C function definition if c23_empty_parens.

gcc/testsuite/
* gcc.dg/wtr-gnu17-1.c, gcc.dg/wtr-gnu23-1.c: New tests.

Diff:
---
 gcc/c/c-decl.cc| 9 +++--
 gcc/c/c-tree.h | 4 
 gcc/testsuite/gcc.dg/wtr-gnu17-1.c | 9 +
 gcc/testsuite/gcc.dg/wtr-gnu23-1.c | 9 +
 4 files changed, 29 insertions(+), 2 deletions(-)

diff --git a/gcc/c/c-decl.cc b/gcc/c/c-decl.cc
index 491c24b9fe7e..3733ecfc13fc 100644
--- a/gcc/c/c-decl.cc
+++ b/gcc/c/c-decl.cc
@@ -8519,7 +8519,10 @@ grokparms (struct c_arg_info *arg_info, bool 
funcdef_flag)
  && !arg_types
  && !arg_info->parms
  && !arg_info->no_named_args_stdarg_p)
-   arg_types = arg_info->types = void_list_node;
+   {
+ arg_types = arg_info->types = void_list_node;
+ arg_info->c23_empty_parens = 1;
+   }
 
   /* If there is a parameter of incomplete type in a definition,
 this is an error.  In a declaration this is valid, and a
@@ -8589,6 +8592,7 @@ build_arg_info (void)
   ret->pending_sizes = NULL;
   ret->had_vla_unspec = 0;
   ret->no_named_args_stdarg_p = 0;
+  ret->c23_empty_parens = 0;
   return ret;
 }
 
@@ -10923,7 +10927,8 @@ store_parm_decls_newstyle (tree fndecl, const struct 
c_arg_info *arg_info)
  its parameter list).  */
   else if (!in_system_header_at (input_location)
   && !current_function_scope
-  && arg_info->types != error_mark_node)
+  && arg_info->types != error_mark_node
+  && !arg_info->c23_empty_parens)
 warning_at (DECL_SOURCE_LOCATION (fndecl), OPT_Wtraditional,
"traditional C rejects ISO C style function definitions");
 
diff --git a/gcc/c/c-tree.h b/gcc/c/c-tree.h
index bfdcb78bbcc1..a1435e7cb0ca 100644
--- a/gcc/c/c-tree.h
+++ b/gcc/c/c-tree.h
@@ -525,6 +525,10 @@ struct c_arg_info {
   BOOL_BITFIELD had_vla_unspec : 1;
   /* True when the arguments are a (...) prototype.  */
   BOOL_BITFIELD no_named_args_stdarg_p : 1;
+  /* True when empty parentheses have been interpreted as (void) in C23 or
+ later.  This is only for use by -Wtraditional and is no longer needed if
+ -Wtraditional is removed.  */
+  BOOL_BITFIELD c23_empty_parens : 1;
 };
 
 /* A declarator.  */
diff --git a/gcc/testsuite/gcc.dg/wtr-gnu17-1.c 
b/gcc/testsuite/gcc.dg/wtr-gnu17-1.c
new file mode 100644
index ..74c06e4aa4c8
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/wtr-gnu17-1.c
@@ -0,0 +1,9 @@
+/* Test -Wtraditional -std=gnu17 does not warn for empty parentheses in
+   function definition.  */
+/* { dg-do compile } */
+/* { dg-options "-Wtraditional -std=gnu17" } */
+
+void
+f ()
+{
+}
diff --git a/gcc/testsuite/gcc.dg/wtr-gnu23-1.c 
b/gcc/testsuite/gcc.dg/wtr-gnu23-1.c
new file mode 100644
index ..207e7c59d27b
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/wtr-gnu23-1.c
@@ -0,0 +1,9 @@
+/* Test -Wtraditional -std=gnu23 does not warn for empty parentheses in
+   function definition.  */
+/* { dg-do compile } */
+/* { dg-options "-Wtraditional -std=gnu23" } */
+
+void
+f ()
+{
+}


[gcc(refs/users/meissner/heads/work181-sha)] Update ChangeLog.*

2024-10-18 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:e58fa10958c9ed48d5edfccb28219c78c6028f17

commit e58fa10958c9ed48d5edfccb28219c78c6028f17
Author: Michael Meissner 
Date:   Fri Oct 18 16:45:38 2024 -0400

Update ChangeLog.*

Diff:
---
 gcc/ChangeLog.sha | 12 
 1 file changed, 12 insertions(+)

diff --git a/gcc/ChangeLog.sha b/gcc/ChangeLog.sha
index 17733c99a5c0..2d7f998a8b52 100644
--- a/gcc/ChangeLog.sha
+++ b/gcc/ChangeLog.sha
@@ -1,5 +1,17 @@
  Branch work181-sha, patch #403 
 
+Move xxeval case before alternative that needs a temporary register.
+
+2024-10-18  Michael Meissner  
+
+gcc/
+
+   * config/rs6000/genfusion.pl (gen_logical_addsubf): Move xxeval case
+   before alternative that needs a temporary register.
+   * config/rs6000/fusion.md: Regenerate.
+
+ Branch work181-sha, patch #403 
+
 Rename sha3 tests.
 
 2024-10-17  Michael Meissner  


[gcc r15-4491] diagnostics: add debug dump functions

2024-10-18 Thread David Malcolm via Gcc-cvs
https://gcc.gnu.org/g:2ca19d43fb5d7598c4a6fa9bfbfaf6dea4ea7c33

commit r15-4491-g2ca19d43fb5d7598c4a6fa9bfbfaf6dea4ea7c33
Author: David Malcolm 
Date:   Fri Oct 18 21:51:35 2024 -0400

diagnostics: add debug dump functions

This commit expands on r15-3973-g4c7a58ac2617e2, which added
debug "dump" member functiosn to pretty_printer and output_buffer.

This followup adds "dump" member functions to diagnostic_context and
diagnostic_format, extends the existing dump functions and adds
indentation to make it much easier to see the various relationships
between context, format, printer, etc.

Hence you can now do:

(gdb) call global_dc->dump ()

and get a useful summary of what the diagnostic subsystem is doing;
for example:

(gdb) call global_dc->dump()
diagnostic_context:
  counts:
  output format:
sarif_output_format
  printer:
m_show_color: false
m_url_format: bel
m_buffer:
  m_formatted_obstack current object: length 0:
  m_chunk_obstack current object: length 0:
  pp_formatted_chunks: depth 0
0: TEXT("Function ")]
1: BEGIN_QUOTE, TEXT("program"), END_QUOTE]
2: TEXT(" requires an argument list at ")]
3: TEXT("(1)")]

showing the counts of all diagnostic kind that are non-zero (none yet),
that we have a sarif output format, and the printer is part-way through
formatting a string.

gcc/ChangeLog:
* diagnostic-format-json.cc (json_output_format::dump): New.
* diagnostic-format-sarif.cc (sarif_output_format::dump): New.
(sarif_file_output_format::dump): New.
* diagnostic-format-text.cc (diagnostic_text_output_format::dump):
New.
* diagnostic-format-text.h (diagnostic_text_output_format::dump):
New decl.
* diagnostic-format.h (diagnostic_output_format::dump): New decls.
* diagnostic.cc (diagnostic_context::dump): New.
(diagnostic_output_format::dump): New.
* diagnostic.h (diagnostic_context::dump): New decls.
* pretty-print-format-impl.h (pp_formatted_chunks::dump): Add
"indent" param.
* pretty-print.cc (bytes_per_hexdump_line): New constant.
(print_hexdump_line): New.
(print_hexdump): New.
(output_buffer::dump): Add "indent" param and use it.  Add
hexdump of current object in m_formatted_obstack and
m_chunk_obstack.
(pp_formatted_chunks::dump): Add "indent" param and use it.
(pretty_printer::dump): Likewise.  Add dumping of m_show_color
and m_url_format.
* pretty-print.h (output_buffer::dump): Add "indent" param.
(pretty_printer::dump): Likewise.

gcc/testsuite/ChangeLog:
* gcc.dg/plugin/diagnostic_plugin_xhtml_format.c
(xhtml_output_format::dump): New.

Signed-off-by: David Malcolm 

Diff:
---
 gcc/diagnostic-format-json.cc  |  6 ++
 gcc/diagnostic-format-sarif.cc | 13 +++
 gcc/diagnostic-format-text.cc  |  7 ++
 gcc/diagnostic-format-text.h   |  3 +
 gcc/diagnostic-format.h|  4 +
 gcc/diagnostic.cc  | 24 ++
 gcc/diagnostic.h   |  3 +
 gcc/pretty-print-format-impl.h |  4 +-
 gcc/pretty-print.cc| 96 --
 gcc/pretty-print.h |  8 +-
 .../gcc.dg/plugin/diagnostic_plugin_xhtml_format.c |  6 ++
 11 files changed, 160 insertions(+), 14 deletions(-)

diff --git a/gcc/diagnostic-format-json.cc b/gcc/diagnostic-format-json.cc
index b4c1f13ee671..4f035dd2fae3 100644
--- a/gcc/diagnostic-format-json.cc
+++ b/gcc/diagnostic-format-json.cc
@@ -38,6 +38,12 @@ along with GCC; see the file COPYING3.  If not see
 class json_output_format : public diagnostic_output_format
 {
 public:
+  void dump (FILE *out, int indent) const override
+  {
+fprintf (out, "%*sjson_output_format\n", indent, "");
+diagnostic_output_format::dump (out, indent);
+  }
+
   void on_begin_group () final override
   {
 /* No-op.  */
diff --git a/gcc/diagnostic-format-sarif.cc b/gcc/diagnostic-format-sarif.cc
index 89ac9a5424c9..f64c83ad6e14 100644
--- a/gcc/diagnostic-format-sarif.cc
+++ b/gcc/diagnostic-format-sarif.cc
@@ -3302,6 +3302,12 @@ public:
 gcc_assert (!pending_result);
   }
 
+  void dump (FILE *out, int indent) const override
+  {
+fprintf (out, "%*ssarif_output_format\n", indent, "");
+diagnostic_output_format::dump (out, indent);
+  }
+
   void on_begin_group () final override
   {
 /* No-op,  */
@@ -3386,6 +3392,13 @@ public:
   {
 m_builder.flush_to_file (m_output_file.get_open_file ());
   }
+  void

[gcc r15-4492] diagnostics: remove forward decl of json::value from diagnostic.h

2024-10-18 Thread David Malcolm via Gcc-cvs
https://gcc.gnu.org/g:83abdb041426b7490e93c3f77be93148bcd94de3

commit r15-4492-g83abdb041426b7490e93c3f77be93148bcd94de3
Author: David Malcolm 
Date:   Fri Oct 18 21:51:39 2024 -0400

diagnostics: remove forward decl of json::value from diagnostic.h

I believe this hasn't been necessary since r15-1413-gd3878c85f331c7.

gcc/ChangeLog:
* diagnostic.h (json::value): Remove forward decl.

Signed-off-by: David Malcolm 

Diff:
---
 gcc/diagnostic.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/gcc/diagnostic.h b/gcc/diagnostic.h
index edd221f1a8ce..423e07230a65 100644
--- a/gcc/diagnostic.h
+++ b/gcc/diagnostic.h
@@ -220,7 +220,6 @@ public:
 };
 
 class edit_context;
-namespace json { class value; }
 class diagnostic_client_data_hooks;
 class logical_location;
 class diagnostic_diagram;


[gcc r15-4487] gcc/: Rename array_type_nelts => array_type_nelts_minus_one

2024-10-18 Thread Joseph Myers via Gcc-cvs
https://gcc.gnu.org/g:11577659949dfd1dfb49cc14e6dcc6b394ebd8c4

commit r15-4487-g11577659949dfd1dfb49cc14e6dcc6b394ebd8c4
Author: Alejandro Colomar 
Date:   Wed Oct 16 14:10:39 2024 +0200

gcc/: Rename array_type_nelts => array_type_nelts_minus_one

The old name was misleading.

While at it, also rename some temporary variables that are used with
this function, for consistency.

Link: 


gcc/ChangeLog:

* tree.cc (array_type_nelts, array_type_nelts_minus_one)
* tree.h (array_type_nelts, array_type_nelts_minus_one)
* expr.cc (count_type_elements)
* config/aarch64/aarch64.cc
(pure_scalable_type_info::analyze_array)
* config/i386/i386.cc (ix86_canonical_va_list_type):
Rename array_type_nelts => array_type_nelts_minus_one
The old name was misleading.

gcc/c/ChangeLog:

* c-decl.cc (one_element_array_type_p, get_parm_array_spec)
* c-fold.cc (c_fold_array_ref):
Rename array_type_nelts => array_type_nelts_minus_one

gcc/cp/ChangeLog:

* decl.cc (reshape_init_array)
* init.cc
(build_zero_init_1)
(build_value_init_noctor)
(build_vec_init)
(build_delete)
* lambda.cc (add_capture)
* tree.cc (array_type_nelts_top):
Rename array_type_nelts => array_type_nelts_minus_one

gcc/fortran/ChangeLog:

* trans-array.cc (structure_alloc_comps)
* trans-openmp.cc
(gfc_walk_alloc_comps)
(gfc_omp_clause_linear_ctor):
Rename array_type_nelts => array_type_nelts_minus_one

gcc/rust/ChangeLog:

* backend/rust-tree.cc (array_type_nelts_top):
Rename array_type_nelts => array_type_nelts_minus_one

Suggested-by: Richard Biener 
Signed-off-by: Alejandro Colomar 

Diff:
---
 gcc/c/c-decl.cc   | 10 +-
 gcc/c/c-fold.cc   |  7 ---
 gcc/config/aarch64/aarch64.cc |  2 +-
 gcc/config/i386/i386.cc   |  2 +-
 gcc/cp/decl.cc|  2 +-
 gcc/cp/init.cc|  8 
 gcc/cp/lambda.cc  |  3 ++-
 gcc/cp/tree.cc|  2 +-
 gcc/expr.cc   |  8 
 gcc/fortran/trans-array.cc|  2 +-
 gcc/fortran/trans-openmp.cc   |  4 ++--
 gcc/rust/backend/rust-tree.cc |  2 +-
 gcc/tree.cc   |  4 ++--
 gcc/tree.h|  2 +-
 14 files changed, 30 insertions(+), 28 deletions(-)

diff --git a/gcc/c/c-decl.cc b/gcc/c/c-decl.cc
index 1827bbf06465..491c24b9fe7e 100644
--- a/gcc/c/c-decl.cc
+++ b/gcc/c/c-decl.cc
@@ -5367,7 +5367,7 @@ one_element_array_type_p (const_tree type)
 {
   if (TREE_CODE (type) != ARRAY_TYPE)
 return false;
-  return integer_zerop (array_type_nelts (type));
+  return integer_zerop (array_type_nelts_minus_one (type));
 }
 
 /* Determine whether TYPE is a zero-length array type "[0]".  */
@@ -6315,15 +6315,15 @@ get_parm_array_spec (const struct c_parm *parm, tree 
attrs)
  for (tree type = parm->specs->type; TREE_CODE (type) == ARRAY_TYPE;
   type = TREE_TYPE (type))
{
- tree nelts = array_type_nelts (type);
- if (error_operand_p (nelts))
+ tree nelts_minus_one = array_type_nelts_minus_one (type);
+ if (error_operand_p (nelts_minus_one))
return attrs;
- if (TREE_CODE (nelts) != INTEGER_CST)
+ if (TREE_CODE (nelts_minus_one) != INTEGER_CST)
{
  /* Each variable VLA bound is represented by the dollar
 sign.  */
  spec += "$";
- tpbnds = tree_cons (NULL_TREE, nelts, tpbnds);
+ tpbnds = tree_cons (NULL_TREE, nelts_minus_one, tpbnds);
}
}
  tpbnds = nreverse (tpbnds);
diff --git a/gcc/c/c-fold.cc b/gcc/c/c-fold.cc
index 57b67c74bd8e..9ea174f79c49 100644
--- a/gcc/c/c-fold.cc
+++ b/gcc/c/c-fold.cc
@@ -73,11 +73,12 @@ c_fold_array_ref (tree type, tree ary, tree index)
   unsigned elem_nchars = (TYPE_PRECISION (elem_type)
  / TYPE_PRECISION (char_type_node));
   unsigned len = (unsigned) TREE_STRING_LENGTH (ary) / elem_nchars;
-  tree nelts = array_type_nelts (TREE_TYPE (ary));
+  tree nelts_minus_one = array_type_nelts_minus_one (TREE_TYPE (ary));
   bool dummy1 = true, dummy2 = true;
-  nelts = c_fully_fold_internal (nelts, true, &dummy1, &dummy2, false, false);
+  nelts_minus_one = c_fully_fold_internal (nelts_minus_one, true, &dummy1,
+  &dummy2, false, false);
   unsigned HOST_WIDE_INT i = tree_to_uhwi (index);
-  if (!tree_int_cst_le (index, nel

[gcc r15-4488] gcc/: Merge definitions of array_type_nelts_top

2024-10-18 Thread Joseph Myers via Gcc-cvs
https://gcc.gnu.org/g:c886eb3421883a3ab70c6af79f271289cc8bb1d4

commit r15-4488-gc886eb3421883a3ab70c6af79f271289cc8bb1d4
Author: Alejandro Colomar 
Date:   Wed Oct 16 14:10:48 2024 +0200

gcc/: Merge definitions of array_type_nelts_top

There were two identical definitions, and none of them are available
where they are needed for implementing a number-of-elements-of
operator.  Merge them, and provide the single definition in
gcc/tree.{h,cc}, where it's available for that operator, which will be
added in a following commit.

gcc/ChangeLog:

* tree.h (array_type_nelts_top)
* tree.cc (array_type_nelts_top):
Define function (moved from gcc/cp/).

gcc/cp/ChangeLog:

* cp-tree.h (array_type_nelts_top)
* tree.cc (array_type_nelts_top):
Remove function (move to gcc/).

gcc/rust/ChangeLog:

* backend/rust-tree.h (array_type_nelts_top)
* backend/rust-tree.cc (array_type_nelts_top):
Remove function.

Signed-off-by: Alejandro Colomar 

Diff:
---
 gcc/cp/cp-tree.h  |  1 -
 gcc/cp/tree.cc| 13 -
 gcc/rust/backend/rust-tree.cc | 13 -
 gcc/rust/backend/rust-tree.h  |  2 --
 gcc/tree.cc   | 13 +
 gcc/tree.h|  1 +
 6 files changed, 14 insertions(+), 29 deletions(-)

diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h
index 94ee550bd9cb..a44100a2bc43 100644
--- a/gcc/cp/cp-tree.h
+++ b/gcc/cp/cp-tree.h
@@ -8121,7 +8121,6 @@ extern tree build_exception_variant   (tree, 
tree);
 extern void fixup_deferred_exception_variants   (tree, tree);
 extern tree bind_template_template_parm(tree, tree);
 extern tree array_type_nelts_total (tree);
-extern tree array_type_nelts_top   (tree);
 extern bool array_of_unknown_bound_p   (const_tree);
 extern tree break_out_target_exprs (tree, bool = false);
 extern tree build_ctor_subob_ref   (tree, tree, tree);
diff --git a/gcc/cp/tree.cc b/gcc/cp/tree.cc
index 3cac8ac4df1b..c80ee0689588 100644
--- a/gcc/cp/tree.cc
+++ b/gcc/cp/tree.cc
@@ -3076,19 +3076,6 @@ cxx_print_statistics (void)
 depth_reached);
 }
 
-/* Return, as an INTEGER_CST node, the number of elements for TYPE
-   (which is an ARRAY_TYPE).  This counts only elements of the top
-   array.  */
-
-tree
-array_type_nelts_top (tree type)
-{
-  return fold_build2_loc (input_location,
- PLUS_EXPR, sizetype,
- array_type_nelts_minus_one (type),
- size_one_node);
-}
-
 /* Return, as an INTEGER_CST node, the number of elements for TYPE
(which is an ARRAY_TYPE).  This one is a recursive count of all
ARRAY_TYPEs that are clumped together.  */
diff --git a/gcc/rust/backend/rust-tree.cc b/gcc/rust/backend/rust-tree.cc
index 8d32e5203aea..3dc6b0767112 100644
--- a/gcc/rust/backend/rust-tree.cc
+++ b/gcc/rust/backend/rust-tree.cc
@@ -859,19 +859,6 @@ is_empty_class (tree type)
   return CLASSTYPE_EMPTY_P (type);
 }
 
-// forked from gcc/cp/tree.cc array_type_nelts_top
-
-/* Return, as an INTEGER_CST node, the number of elements for TYPE
-   (which is an ARRAY_TYPE).  This counts only elements of the top
-   array.  */
-
-tree
-array_type_nelts_top (tree type)
-{
-  return fold_build2_loc (input_location, PLUS_EXPR, sizetype,
- array_type_nelts_minus_one (type), size_one_node);
-}
-
 // forked from gcc/cp/tree.cc builtin_valid_in_constant_expr_p
 
 /* Test whether DECL is a builtin that may appear in a
diff --git a/gcc/rust/backend/rust-tree.h b/gcc/rust/backend/rust-tree.h
index 26c8b653ac64..e597c3ab81d9 100644
--- a/gcc/rust/backend/rust-tree.h
+++ b/gcc/rust/backend/rust-tree.h
@@ -2993,8 +2993,6 @@ extern location_t rs_expr_location (const_tree);
 extern int
 is_empty_class (tree type);
 
-extern tree array_type_nelts_top (tree);
-
 extern bool
 is_really_empty_class (tree, bool);
 
diff --git a/gcc/tree.cc b/gcc/tree.cc
index 94c6d086bd73..b40f4d31b2f1 100644
--- a/gcc/tree.cc
+++ b/gcc/tree.cc
@@ -3732,6 +3732,19 @@ array_type_nelts_minus_one (const_tree type)
  ? max
  : fold_build2 (MINUS_EXPR, TREE_TYPE (max), max, min));
 }
+
+/* Return, as an INTEGER_CST node, the number of elements for TYPE
+   (which is an ARRAY_TYPE).  This counts only elements of the top
+   array.  */
+
+tree
+array_type_nelts_top (tree type)
+{
+  return fold_build2_loc (input_location,
+ PLUS_EXPR, sizetype,
+ array_type_nelts_minus_one (type),
+ size_one_node);
+}
 
 /* If arg is static -- a reference to an object in static storage -- then
return the object.  This is not the same as the C meaning of `static'.
diff --git a/gcc/tree.h b/gcc/tree.h
index c996821c9534..f4c89f5477c5 100644
--- a/gcc/tree.h
+++ b/gcc/tree.h
@@ -493

[gcc r15-4493] runtime/testdata: fix for C23 nullptr keyword

2024-10-18 Thread Ian Lance Taylor via Gcc-cvs
https://gcc.gnu.org/g:04b4a5e4866daa7b6f4a4a3e7ed1a23f7787841d

commit r15-4493-g04b4a5e4866daa7b6f4a4a3e7ed1a23f7787841d
Author: Ian Lance Taylor 
Date:   Fri Oct 18 19:51:00 2024 -0700

runtime/testdata: fix for C23 nullptr keyword

Backport https://go.dev/cl/620955 from main repo.  Original description:

src/runtime/testdata/testprogcgo/threadprof.go contains C code with a
variable called nullptr.  This conflicts with the nullptr keyword in
the C23 revision of the C standard (showing up as gccgo test build
failures when updating GCC to use C23 by default when building C
code).

Rename that variable to nullpointer to avoid the clash with the
keyword (any other name that's not a keyword would work just as well).

Reviewed-on: https://go-review.googlesource.com/c/gofrontend/+/621059

Diff:
---
 gcc/go/gofrontend/MERGE | 2 +-
 libgo/go/runtime/testdata/testprogcgo/threadprof.go | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/gcc/go/gofrontend/MERGE b/gcc/go/gofrontend/MERGE
index 3a839410e9cc..c39aca9b1b00 100644
--- a/gcc/go/gofrontend/MERGE
+++ b/gcc/go/gofrontend/MERGE
@@ -1,4 +1,4 @@
-6aa463fef2d8f04e0bd2675f63a6529df080a44a
+092668d6ce6d7b3aff6797247cd53dc44319c558
 
 The first line of this file holds the git revision number of the last
 merge done from the gofrontend repository.
diff --git a/libgo/go/runtime/testdata/testprogcgo/threadprof.go 
b/libgo/go/runtime/testdata/testprogcgo/threadprof.go
index d62d4b4be839..f61c51b8b623 100644
--- a/libgo/go/runtime/testdata/testprogcgo/threadprof.go
+++ b/libgo/go/runtime/testdata/testprogcgo/threadprof.go
@@ -36,10 +36,10 @@ __attribute__((constructor)) void issue9456() {
}
 }
 
-void **nullptr;
+void **nullpointer;
 
 void *crash(void *p) {
-   *nullptr = p;
+   *nullpointer = p;
return 0;
 }


[gcc r15-4469] [4/n] remove wrapv-*.c special-casing of gcc.dg/vect/ files

2024-10-18 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:902f4ee7f13af94a4c85abfb64e23cc7967ec05f

commit r15-4469-g902f4ee7f13af94a4c85abfb64e23cc7967ec05f
Author: Richard Biener 
Date:   Fri Oct 18 13:38:14 2024 +0200

[4/n] remove wrapv-*.c special-casing of gcc.dg/vect/ files

The following makes -fwrapv explicit.

* gcc.dg/vect/vect.exp: Remove special-casing of tests
named wrapv-*
* gcc.dg/vect/wrapv-vect-7.c: Add dg-additional-options -fwrapv.
* gcc.dg/vect/wrapv-vect-reduc-2char.c: Likewise.
* gcc.dg/vect/wrapv-vect-reduc-2short.c: Likewise.
* gcc.dg/vect/wrapv-vect-reduc-dot-s8b.c: Likewise.
* gcc.dg/vect/wrapv-vect-reduc-pattern-2c.c: Likewise.

Diff:
---
 gcc/testsuite/gcc.dg/vect/vect.exp  | 21 +++--
 gcc/testsuite/gcc.dg/vect/wrapv-vect-7.c|  1 +
 gcc/testsuite/gcc.dg/vect/wrapv-vect-reduc-2char.c  |  1 +
 gcc/testsuite/gcc.dg/vect/wrapv-vect-reduc-2short.c |  1 +
 .../gcc.dg/vect/wrapv-vect-reduc-dot-s8b.c  |  1 +
 .../gcc.dg/vect/wrapv-vect-reduc-pattern-2c.c   |  1 +
 6 files changed, 12 insertions(+), 14 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/vect/vect.exp 
b/gcc/testsuite/gcc.dg/vect/vect.exp
index eddebf53c7f5..14c6168f6eec 100644
--- a/gcc/testsuite/gcc.dg/vect/vect.exp
+++ b/gcc/testsuite/gcc.dg/vect/vect.exp
@@ -112,6 +112,13 @@ foreach flags $VECT_ADDITIONAL_FLAGS {
 et-dg-runtest dg-runtest [lsort \
[glob -nocomplain $srcdir/$subdir/fast-math-\[ipsvc\]*.\[cS\]]] \
$flags $DEFAULT_VECTCFLAGS
+et-dg-runtest dg-runtest [lsort \
+   [glob -nocomplain $srcdir/$subdir/wrapv-*.\[cS\]]] \
+   $flags $DEFAULT_VECTCFLAGS
+
+et-dg-runtest dg-runtest [lsort \
+   [glob -nocomplain $srcdir/$subdir/fast-math-bb-slp-*.\[cS\]]] \
+   $flags $VECT_SLP_CFLAGS
 et-dg-runtest dg-runtest [lsort \
[glob -nocomplain $srcdir/$subdir/bb-slp*.\[cS\]]] \
$flags $VECT_SLP_CFLAGS
@@ -122,20 +129,6 @@ global SAVED_DEFAULT_VECTCFLAGS
 set SAVED_DEFAULT_VECTCFLAGS $DEFAULT_VECTCFLAGS
 set SAVED_VECT_SLP_CFLAGS $VECT_SLP_CFLAGS
 
-# -ffast-math SLP tests
-set VECT_SLP_CFLAGS $SAVED_VECT_SLP_CFLAGS
-lappend VECT_SLP_CFLAGS "-ffast-math"
-et-dg-runtest dg-runtest [lsort \
-   [glob -nocomplain $srcdir/$subdir/fast-math-bb-slp-*.\[cS\]]] \
-   "" $VECT_SLP_CFLAGS
-
-# -fwrapv tests
-set DEFAULT_VECTCFLAGS $SAVED_DEFAULT_VECTCFLAGS
-lappend DEFAULT_VECTCFLAGS "-fwrapv"
-et-dg-runtest dg-runtest [lsort \
-   [glob -nocomplain $srcdir/$subdir/wrapv-*.\[cS\]]] \
-   "" $DEFAULT_VECTCFLAGS
-
 # -ftrapv tests
 set DEFAULT_VECTCFLAGS $SAVED_DEFAULT_VECTCFLAGS
 lappend DEFAULT_VECTCFLAGS "-ftrapv"
diff --git a/gcc/testsuite/gcc.dg/vect/wrapv-vect-7.c 
b/gcc/testsuite/gcc.dg/vect/wrapv-vect-7.c
index 414bd9d3e127..2a557f697e11 100644
--- a/gcc/testsuite/gcc.dg/vect/wrapv-vect-7.c
+++ b/gcc/testsuite/gcc.dg/vect/wrapv-vect-7.c
@@ -1,3 +1,4 @@
+/* { dg-additional-options "-fwrapv" } */
 /* { dg-require-effective-target vect_int } */
 /* { dg-add-options bind_pic_locally } */
 
diff --git a/gcc/testsuite/gcc.dg/vect/wrapv-vect-reduc-2char.c 
b/gcc/testsuite/gcc.dg/vect/wrapv-vect-reduc-2char.c
index 556c2a06dc5d..0ee9178025e9 100644
--- a/gcc/testsuite/gcc.dg/vect/wrapv-vect-reduc-2char.c
+++ b/gcc/testsuite/gcc.dg/vect/wrapv-vect-reduc-2char.c
@@ -1,3 +1,4 @@
+/* { dg-additional-options "-fwrapv" } */
 /* { dg-require-effective-target vect_int } */
 
 #include 
diff --git a/gcc/testsuite/gcc.dg/vect/wrapv-vect-reduc-2short.c 
b/gcc/testsuite/gcc.dg/vect/wrapv-vect-reduc-2short.c
index f9142173b257..aadc9c37da3f 100644
--- a/gcc/testsuite/gcc.dg/vect/wrapv-vect-reduc-2short.c
+++ b/gcc/testsuite/gcc.dg/vect/wrapv-vect-reduc-2short.c
@@ -1,3 +1,4 @@
+/* { dg-additional-options "-fwrapv" } */
 /* { dg-require-effective-target vect_int } */
 
 #include 
diff --git a/gcc/testsuite/gcc.dg/vect/wrapv-vect-reduc-dot-s8b.c 
b/gcc/testsuite/gcc.dg/vect/wrapv-vect-reduc-dot-s8b.c
index 72080af59239..920374d42635 100644
--- a/gcc/testsuite/gcc.dg/vect/wrapv-vect-reduc-dot-s8b.c
+++ b/gcc/testsuite/gcc.dg/vect/wrapv-vect-reduc-dot-s8b.c
@@ -1,3 +1,4 @@
+/* { dg-additional-options "-fwrapv" } */
 /* Disabling epilogues until we find a better way to deal with scans.  */
 /* { dg-additional-options "--param vect-epilogues-nomask=0" } */
 /* { dg-require-effective-target vect_int } */
diff --git a/gcc/testsuite/gcc.dg/vect/wrapv-vect-reduc-pattern-2c.c 
b/gcc/testsuite/gcc.dg/vect/wrapv-vect-reduc-pattern-2c.c
index e3c33cff7e14..be0447c7b103 100644
--- a/gcc/testsuite/gcc.dg/vect/wrapv-vect-reduc-pattern-2c.c
+++ b/gcc/testsuite/gcc.dg/vect/wrapv-vect-reduc-pattern-2c.c
@@ -1,3 +1,4 @@
+/* { dg-additional-options "-fwrapv" } */
 /* { dg-require-effective-target vect_int } */
 
 #include 


[gcc r15-4470] [5/n] remove trapv-*.c special-casing of gcc.dg/vect/ files

2024-10-18 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:94b95f7a3f188bcfcf45beeef9c472248b1810ef

commit r15-4470-g94b95f7a3f188bcfcf45beeef9c472248b1810ef
Author: Richard Biener 
Date:   Fri Oct 18 13:40:13 2024 +0200

[5/n] remove trapv-*.c special-casing of gcc.dg/vect/ files

The following makes -ftrapv explicit.

* gcc.dg/vect/vect.exp: Remove special-casing of tests
named trapv-*
* gcc.dg/vect/trapv-vect-reduc-4.c: Add dg-additional-options 
-ftrapv.

Diff:
---
 gcc/testsuite/gcc.dg/vect/trapv-vect-reduc-4.c |  2 +-
 gcc/testsuite/gcc.dg/vect/vect.exp | 10 +++---
 2 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/vect/trapv-vect-reduc-4.c 
b/gcc/testsuite/gcc.dg/vect/trapv-vect-reduc-4.c
index 24cf1f793c7f..e59fbba824f5 100644
--- a/gcc/testsuite/gcc.dg/vect/trapv-vect-reduc-4.c
+++ b/gcc/testsuite/gcc.dg/vect/trapv-vect-reduc-4.c
@@ -1,5 +1,5 @@
 /* Disabling epilogues until we find a better way to deal with scans.  */
-/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+/* { dg-additional-options "-ftrapv --param vect-epilogues-nomask=0" } */
 /* { dg-do compile } */
 /* { dg-require-effective-target vect_int } */
 
diff --git a/gcc/testsuite/gcc.dg/vect/vect.exp 
b/gcc/testsuite/gcc.dg/vect/vect.exp
index 14c6168f6eec..37e7bc424f83 100644
--- a/gcc/testsuite/gcc.dg/vect/vect.exp
+++ b/gcc/testsuite/gcc.dg/vect/vect.exp
@@ -115,6 +115,9 @@ foreach flags $VECT_ADDITIONAL_FLAGS {
 et-dg-runtest dg-runtest [lsort \
[glob -nocomplain $srcdir/$subdir/wrapv-*.\[cS\]]] \
$flags $DEFAULT_VECTCFLAGS
+et-dg-runtest dg-runtest [lsort \
+   [glob -nocomplain $srcdir/$subdir/trapv-*.\[cS\]]] \
+   $flags $DEFAULT_VECTCFLAGS
 
 et-dg-runtest dg-runtest [lsort \
[glob -nocomplain $srcdir/$subdir/fast-math-bb-slp-*.\[cS\]]] \
@@ -129,13 +132,6 @@ global SAVED_DEFAULT_VECTCFLAGS
 set SAVED_DEFAULT_VECTCFLAGS $DEFAULT_VECTCFLAGS
 set SAVED_VECT_SLP_CFLAGS $VECT_SLP_CFLAGS
 
-# -ftrapv tests
-set DEFAULT_VECTCFLAGS $SAVED_DEFAULT_VECTCFLAGS
-lappend DEFAULT_VECTCFLAGS "-ftrapv"
-et-dg-runtest dg-runtest [lsort \
-   [glob -nocomplain $srcdir/$subdir/trapv-*.\[cS\]]] \
-   "" $DEFAULT_VECTCFLAGS
-
 # -fno-tree-dce tests
 set DEFAULT_VECTCFLAGS $SAVED_DEFAULT_VECTCFLAGS
 lappend DEFAULT_VECTCFLAGS "-fno-tree-dce"


[gcc r15-4485] hppa: Fix up pa.opt.urls

2024-10-18 Thread John David Anglin via Gcc-cvs
https://gcc.gnu.org/g:aaa855fac0c7003d823b48fe4cc4b9ded9331a2b

commit r15-4485-gaaa855fac0c7003d823b48fe4cc4b9ded9331a2b
Author: John David Anglin 
Date:   Fri Oct 18 12:43:15 2024 -0400

hppa: Fix up pa.opt.urls

2024-10-18  John David Anglin  

gcc/ChangeLog:

* config/pa/pa.opt.urls: Fix for -mlra.

Diff:
---
 gcc/config/pa/pa.opt.urls | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/gcc/config/pa/pa.opt.urls b/gcc/config/pa/pa.opt.urls
index 5b8bcebdd0da..5516332ead13 100644
--- a/gcc/config/pa/pa.opt.urls
+++ b/gcc/config/pa/pa.opt.urls
@@ -36,6 +36,8 @@ UrlSuffix(gcc/HPPA-Options.html#index-mlinker-opt)
 mlong-calls
 UrlSuffix(gcc/HPPA-Options.html#index-mlong-calls-5)
 
+; skipping UrlSuffix for 'mlra' due to finding no URLs
+
 mlong-load-store
 UrlSuffix(gcc/HPPA-Options.html#index-mlong-load-store)


[gcc(refs/users/jmelcr/heads/omp-cp)] omp-cp: add callback attribute, wip callback edges

2024-10-18 Thread Josef Melcr via Gcc-cvs
https://gcc.gnu.org/g:723b98f9c1f1aea96d2eb74f06578565f6f405d1

commit 723b98f9c1f1aea96d2eb74f06578565f6f405d1
Author: Josef Melcr 
Date:   Fri Oct 18 14:40:46 2024 +0200

omp-cp: add callback attribute, wip callback edges

gcc/c-family/ChangeLog:

* c-attribs.cc: add callback attribute

gcc/ChangeLog:

* cgraph.cc (symbol_table::create_edge): allow multiple stmts to
  edges for callback edges
(cgraph_edge::make_callback): add make_callback method
* cgraph.h: add make_callback signature
* gimple.h (GCC_GIMPLE_H):
(gimple_omp_parallel_set_child_fn): add callback attribute when
setting new child fn
* ipa-inline.cc (can_inline_edge_p): disable inlining of
  callback edges
* ipa-prop.cc (ipa_analyze_node): add callback edge creation

yeah it's segfaulting atm

Signed-off-by: Josef Melcr 

Diff:
---
 gcc/c-family/c-attribs.cc |  3 ++-
 gcc/cgraph.cc | 26 +-
 gcc/cgraph.h  |  7 +++
 gcc/gimple.h  | 12 +++-
 gcc/ipa-inline.cc |  6 ++
 gcc/ipa-prop.cc   | 24 
 6 files changed, 75 insertions(+), 3 deletions(-)

diff --git a/gcc/c-family/c-attribs.cc b/gcc/c-family/c-attribs.cc
index 4dd2eecbea54..63906bd01246 100644
--- a/gcc/c-family/c-attribs.cc
+++ b/gcc/c-family/c-attribs.cc
@@ -635,7 +635,8 @@ const struct attribute_spec c_common_gnu_attributes[] =
   { "flag_enum", 0, 0, false, true, false, false,
  handle_flag_enum_attribute, NULL },
   { "null_terminated_string_arg", 1, 1, false, true, true, false,
- handle_null_terminated_string_arg_attribute, NULL}
+ handle_null_terminated_string_arg_attribute, 
NULL},
+  { "callback", 0, 0, true, false, false, false, NULL, NULL}
 };
 
 const struct scoped_attribute_specs c_common_gnu_attribute_table =
diff --git a/gcc/cgraph.cc b/gcc/cgraph.cc
index 39a3adbc7c35..58813e8cc2d0 100644
--- a/gcc/cgraph.cc
+++ b/gcc/cgraph.cc
@@ -885,7 +885,7 @@ symbol_table::create_edge (cgraph_node *caller, cgraph_node 
*callee,
 construction of call stmt hashtable.  */
   cgraph_edge *e;
   gcc_checking_assert (!(e = caller->get_edge (call_stmt))
-  || e->speculative);
+  || e->speculative || e->has_callback);
 
   gcc_assert (is_gimple_call (call_stmt));
 }
@@ -911,6 +911,8 @@ symbol_table::create_edge (cgraph_node *caller, cgraph_node 
*callee,
   edge->indirect_info = NULL;
   edge->indirect_inlining_edge = 0;
   edge->speculative = false;
+  edge->has_callback = false;
+  edge->callback = false;
   edge->indirect_unknown_callee = indir_unknown_callee;
   if (call_stmt && caller->call_site_hash)
 cgraph_add_edge_to_call_site_hash (edge);
@@ -1136,6 +1138,28 @@ cgraph_edge::make_speculative (cgraph_node *n2, 
profile_count direct_count,
   return e2;
 }
 
+cgraph_edge *
+cgraph_edge::make_callback (cgraph_node *n2)
+{
+  cgraph_node *n = caller;
+  cgraph_edge *e2;
+
+  if (dump_file)
+fprintf (dump_file, "Indirect call -> callback call %s => %s\n",
+n->dump_name (), n2->dump_name ());
+  has_callback = true;
+  e2 = n->create_edge (n2, call_stmt, count);
+  initialize_inline_failed (e2);
+  e2->callback = true;
+  if (TREE_NOTHROW (n2->decl))
+e2->can_throw_external = false;
+  else
+e2->can_throw_external = can_throw_external;
+  e2->lto_stmt_uid = lto_stmt_uid;
+  n2->mark_address_taken ();
+  return e2;
+}
+
 /* Speculative call consists of an indirect edge and one or more
direct edge+ref pairs.
 
diff --git a/gcc/cgraph.h b/gcc/cgraph.h
index a8c3224802c1..bf0a22bcf365 100644
--- a/gcc/cgraph.h
+++ b/gcc/cgraph.h
@@ -1735,6 +1735,9 @@ public:
   cgraph_edge *make_speculative (cgraph_node *n2, profile_count direct_count,
 unsigned int speculative_id = 0);
 
+  /* TODO DOCS */
+  cgraph_edge *make_callback (cgraph_node *n2);
+
   /* Speculative call consists of an indirect edge and one or more
  direct edge+ref pairs.  Speculative will expand to the following sequence:
 
@@ -1951,6 +1954,10 @@ public:
  Optimizers may later redirect direct call to clone, so 1) and 3)
  do not need to necessarily agree with destination.  */
   unsigned int speculative : 1;
+  /* TODO DOCS */
+  unsigned int callback : 1;
+  /* TODO DOCS */
+  unsigned int has_callback : 1;
   /* Set to true when caller is a constructor or destructor of polymorphic
  type.  */
   unsigned in_polymorphic_cdtor : 1;
diff --git a/gcc/gimple.h b/gcc/gimple.h
index 4a6e0e97d1e7..56eb49802e6b 100644
--- a/gcc/gimple.h
+++ b/gcc/gimple.h
@@ -22,8 +22,11 @@ along with GCC; see the file COPYING3.  If not see
 #ifndef GCC_GIMPLE_H
 #define GCC_GIMPLE_H
 
+#include "stringpool.h"
+#include "attribs.

[gcc r15-4466] Adjust assert in vect_build_slp_tree_2

2024-10-18 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:ee70e5c729b157351155be905b290412435c081c

commit r15-4466-gee70e5c729b157351155be905b290412435c081c
Author: Richard Biener 
Date:   Fri Oct 18 12:03:12 2024 +0200

Adjust assert in vect_build_slp_tree_2

The assert in SLP discovery when we handle masked operations is
confusingly wide - all gather variants should be catched by
the earlier STMT_VINFO_GATHER_SCATTER_P.

* tree-vect-slp.cc (vect_build_slp_tree_2): Only expect
IFN_MASK_LOAD for masked loads that are not
STMT_VINFO_GATHER_SCATTER_P.

Diff:
---
 gcc/tree-vect-slp.cc | 6 +-
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 9276662fa0f1..53f5400a961d 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -2020,11 +2020,7 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
 
  if (gcall *stmt = dyn_cast  (stmt_info->stmt))
{
- gcc_assert (gimple_call_internal_p (stmt, IFN_MASK_LOAD)
- || gimple_call_internal_p (stmt, IFN_GATHER_LOAD)
- || gimple_call_internal_p (stmt, IFN_MASK_GATHER_LOAD)
- || gimple_call_internal_p (stmt,
-IFN_MASK_LEN_GATHER_LOAD));
+ gcc_assert (gimple_call_internal_p (stmt, IFN_MASK_LOAD));
  bool has_gaps = false;
  if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
for (stmt_vec_info si = DR_GROUP_NEXT_ELEMENT (first_stmt_info);


[gcc r15-4468] [3/n] remove fast-math-*.c special-casing of gcc.dg/vect/ files

2024-10-18 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:a1381b69b9d81f210735d4a5bc9d65a0b01d1bda

commit r15-4468-ga1381b69b9d81f210735d4a5bc9d65a0b01d1bda
Author: Richard Biener 
Date:   Fri Oct 18 13:30:26 2024 +0200

[3/n] remove fast-math-*.c special-casing of gcc.dg/vect/ files

The following makes -ffast-math explicit.

* gcc.dg/vect/vect.exp: Remove special-casing of tests
named fast-math-*
* gcc.dg/vect/fast-math-bb-slp-call-1.c: Add dg-additional-options
-ffast-math.
* gcc.dg/vect/fast-math-bb-slp-call-2.c: Likewise.
* gcc.dg/vect/fast-math-bb-slp-call-3.c: Likewise.
* gcc.dg/vect/fast-math-ifcvt-1.c: Likewise.
* gcc.dg/vect/fast-math-pr35982.c: Likewise.
* gcc.dg/vect/fast-math-pr43074.c: Likewise.
* gcc.dg/vect/fast-math-pr44152.c: Likewise.
* gcc.dg/vect/fast-math-pr55281.c: Likewise.
* gcc.dg/vect/fast-math-slp-27.c: Likewise.
* gcc.dg/vect/fast-math-slp-38.c: Likewise.
* gcc.dg/vect/fast-math-vect-call-1.c: Likewise.
* gcc.dg/vect/fast-math-vect-call-2.c: Likewise.
* gcc.dg/vect/fast-math-vect-complex-3.c: Likewise.
* gcc.dg/vect/fast-math-vect-outer-7.c: Likewise.
* gcc.dg/vect/fast-math-vect-pow-1.c: Likewise.
* gcc.dg/vect/fast-math-vect-pow-2.c: Likewise.
* gcc.dg/vect/fast-math-vect-pr25911.c: Likewise.
* gcc.dg/vect/fast-math-vect-pr29925.c: Likewise.
* gcc.dg/vect/fast-math-vect-reduc-5.c: Likewise.
* gcc.dg/vect/fast-math-vect-reduc-7.c: Likewise.
* gcc.dg/vect/fast-math-vect-reduc-8.c: Likewise.
* gcc.dg/vect/fast-math-vect-reduc-9.c: Likewise.
* gcc.dg/vect/complex/fast-math-bb-slp-complex-add-double.c: 
Likewise.
* gcc.dg/vect/complex/fast-math-bb-slp-complex-add-float.c: 
Likewise.
* gcc.dg/vect/complex/fast-math-bb-slp-complex-add-half-float.c:
Likewise.
* gcc.dg/vect/complex/fast-math-bb-slp-complex-add-pattern-double.c:
Likewise.
* gcc.dg/vect/complex/fast-math-bb-slp-complex-add-pattern-float.c:
Likewise.
* 
gcc.dg/vect/complex/fast-math-bb-slp-complex-add-pattern-half-float.c:
Likewise.
* gcc.dg/vect/complex/fast-math-bb-slp-complex-mla-double.c: 
Likewise.
* gcc.dg/vect/complex/fast-math-bb-slp-complex-mla-float.c: 
Likewise.
* gcc.dg/vect/complex/fast-math-bb-slp-complex-mla-half-float.c:
Likewise.
* gcc.dg/vect/complex/fast-math-bb-slp-complex-mls-double.c: 
Likewise.
* gcc.dg/vect/complex/fast-math-bb-slp-complex-mls-float.c: 
Likewise.
* gcc.dg/vect/complex/fast-math-bb-slp-complex-mls-half-float.c:
Likewise.
* gcc.dg/vect/complex/fast-math-bb-slp-complex-mul-double.c: 
Likewise.
* gcc.dg/vect/complex/fast-math-bb-slp-complex-mul-float.c: 
Likewise.
* gcc.dg/vect/complex/fast-math-bb-slp-complex-mul-half-float.c:
Likewise.
* gcc.dg/vect/complex/fast-math-complex-add-double.c: Likewise.
* gcc.dg/vect/complex/fast-math-complex-add-float.c: Likewise.
* gcc.dg/vect/complex/fast-math-complex-add-half-float.c: Likewise.
* gcc.dg/vect/complex/fast-math-complex-add-pattern-double.c:
Likewise.
* gcc.dg/vect/complex/fast-math-complex-add-pattern-float.c: 
Likewise.
* gcc.dg/vect/complex/fast-math-complex-add-pattern-half-float.c:
Likewise.
* gcc.dg/vect/complex/fast-math-complex-mla-double.c: Likewise.
* gcc.dg/vect/complex/fast-math-complex-mla-float.c: Likewise.
* gcc.dg/vect/complex/fast-math-complex-mla-half-float.c: Likewise.
* gcc.dg/vect/complex/fast-math-complex-mls-double.c: Likewise.
* gcc.dg/vect/complex/fast-math-complex-mls-float.c: Likewise.
* gcc.dg/vect/complex/fast-math-complex-mls-half-float.c: Likewise.
* gcc.dg/vect/complex/fast-math-complex-mul-double.c: Likewise.
* gcc.dg/vect/complex/fast-math-complex-mul-float.c: Likewise.
* gcc.dg/vect/complex/fast-math-complex-mul-half-float.c: Likewise.

Diff:
---
 .../gcc.dg/vect/complex/fast-math-bb-slp-complex-add-double.c  |  2 +-
 .../gcc.dg/vect/complex/fast-math-bb-slp-complex-add-float.c   |  2 +-
 .../vect/complex/fast-math-bb-slp-complex-add-half-float.c |  2 +-
 .../vect/complex/fast-math-bb-slp-complex-add-pattern-double.c |  2 +-
 .../vect/complex/fast-math-bb-slp-complex-add-pattern-float.c  |  2 +-
 .../complex/fast-math-bb-slp-complex-add-pattern-half-float.c  |  2 +-
 .../gcc.dg/vect/complex/fast-math-bb-slp-complex-mla-double.c  |  2 +-
 .../gcc.dg/vect/complex/fast-math-bb-slp-complex-mla-float.c   |  2 +-
 .../vect/complex/fast-math-bb-slp-complex-mla-half-float

[gcc r14-10803] ipa: Treat static constructors and destructors as non-local (PR 115815)

2024-10-18 Thread Martin Jambor via Gcc-cvs
https://gcc.gnu.org/g:f057e958732cd2627b6db127fa6d4d882b61dd5f

commit r14-10803-gf057e958732cd2627b6db127fa6d4d882b61dd5f
Author: Martin Jambor 
Date:   Fri Oct 18 21:32:16 2024 +0200

ipa: Treat static constructors and destructors as non-local (PR 115815)

In PR 115815, IPA-SRA thought it had control over all invocations of a
(recursive) static destructor but it did not see the implied
invocation which led to the original being left behind and the
clean-up code encountering uses of SSAs that definitely should have
been dead.

Fixed by teaching cgraph_node::can_be_local_p about static
constructors and destructors.  Similar test is missing in
cgraph_node::local_p so I added the check there as well.

In addition to the commit with the fix, this backport also contains
squashed commit 1a458bdeb223ffa501bac8e76182115681967094 which fixes
dejagnu directives in the testcase.

gcc/ChangeLog:

2024-07-25  Martin Jambor  

PR ipa/115815
* cgraph.cc (cgraph_node_cannot_be_local_p_1): Also check
DECL_STATIC_CONSTRUCTOR and DECL_STATIC_DESTRUCTOR.
* ipa-visibility.cc (non_local_p): Likewise.
(cgraph_node::local_p): Delete extraneous line of tabs.

gcc/testsuite/ChangeLog:

2024-07-25  Martin Jambor  

PR ipa/115815
* gcc.dg/lto/pr115815_0.c: New test.

(cherry picked from commit e98ad6a049c96c21cf641954584c2f5b7df0ce93)

Diff:
---
 gcc/cgraph.cc |  4 +++-
 gcc/ipa-visibility.cc |  5 +++--
 gcc/testsuite/gcc.dg/lto/pr115815_0.c | 22 ++
 3 files changed, 28 insertions(+), 3 deletions(-)

diff --git a/gcc/cgraph.cc b/gcc/cgraph.cc
index 473d8410bc97..39a3adbc7c35 100644
--- a/gcc/cgraph.cc
+++ b/gcc/cgraph.cc
@@ -2434,7 +2434,9 @@ cgraph_node_cannot_be_local_p_1 (cgraph_node *node, void 
*)
&& !node->forced_by_abi
&& !node->used_from_object_file_p ()
&& !node->same_comdat_group)
-  || !node->externally_visible));
+  || !node->externally_visible)
+  && !DECL_STATIC_CONSTRUCTOR (node->decl)
+  && !DECL_STATIC_DESTRUCTOR (node->decl));
 }
 
 /* Return true if cgraph_node can be made local for API change.
diff --git a/gcc/ipa-visibility.cc b/gcc/ipa-visibility.cc
index 501d3c304aa3..21f0c47f388e 100644
--- a/gcc/ipa-visibility.cc
+++ b/gcc/ipa-visibility.cc
@@ -102,7 +102,9 @@ non_local_p (struct cgraph_node *node, void *data 
ATTRIBUTE_UNUSED)
   && !node->externally_visible
   && !node->used_from_other_partition
   && !node->in_other_partition
-  && node->get_availability () >= AVAIL_AVAILABLE);
+  && node->get_availability () >= AVAIL_AVAILABLE
+  && !DECL_STATIC_CONSTRUCTOR (node->decl)
+  && !DECL_STATIC_DESTRUCTOR (node->decl));
 }
 
 /* Return true when function can be marked local.  */
@@ -116,7 +118,6 @@ cgraph_node::local_p (void)
  return n->callees->callee->local_p ();
return !n->call_for_symbol_thunks_and_aliases (non_local_p,
  NULL, true);
-   
 }
 
 /* A helper for comdat_can_be_unshared_p.  */
diff --git a/gcc/testsuite/gcc.dg/lto/pr115815_0.c 
b/gcc/testsuite/gcc.dg/lto/pr115815_0.c
new file mode 100644
index ..ade91def55b0
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/lto/pr115815_0.c
@@ -0,0 +1,22 @@
+/* { dg-lto-options {{-O2 -flto}} }  */
+/* { dg-lto-do link } */
+/* { dg-require-effective-target global_constructor } */
+
+int a;
+volatile int v;
+volatile int w;
+
+int __attribute__((destructor))
+b() {
+  if (v)
+return a + b();
+  v = 5;
+  return 0;
+}
+
+int
+main (int argc, char **argv)
+{
+  w = 1;
+  return 0;
+}


[gcc r15-4484] Handle GFC_STD_UNSIGNED like a standard in error messages.

2024-10-18 Thread Thomas Kテカnig via Gcc-cvs
https://gcc.gnu.org/g:1f07dea91c37d45874cd91c2333696e784ae2d44

commit r15-4484-g1f07dea91c37d45874cd91c2333696e784ae2d44
Author: Thomas Koenig 
Date:   Fri Oct 18 17:58:56 2024 +0200

Handle GFC_STD_UNSIGNED like a standard in error messages.

gcc/fortran/ChangeLog:

* error.cc (notify_std_msg): Handle GFC_STD_UNSIGNED.

gcc/testsuite/ChangeLog:

* gfortran.dg/unsigned_37.f90: New test.

Diff:
---
 gcc/fortran/error.cc  | 2 ++
 gcc/testsuite/gfortran.dg/unsigned_37.f90 | 4 
 2 files changed, 6 insertions(+)

diff --git a/gcc/fortran/error.cc b/gcc/fortran/error.cc
index d184ffd878ad..afe2e49e4994 100644
--- a/gcc/fortran/error.cc
+++ b/gcc/fortran/error.cc
@@ -362,6 +362,8 @@ notify_std_msg(int std)
 return _("Obsolescent feature:");
   else if (std & GFC_STD_F95_DEL)
 return _("Deleted feature:");
+  else if (std & GFC_STD_UNSIGNED)
+return _("Unsigned:");
   else
 gcc_unreachable ();
 }
diff --git a/gcc/testsuite/gfortran.dg/unsigned_37.f90 
b/gcc/testsuite/gfortran.dg/unsigned_37.f90
new file mode 100644
index ..b11f214336aa
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/unsigned_37.f90
@@ -0,0 +1,4 @@
+! { dg-do compile }
+program main
+  use iso_fortran_env, only : uint32 ! { dg-error "not in the selected 
standard" }
+end program main


[gcc r15-4486] libbacktrace: don't get confused by overlapping address ranges

2024-10-18 Thread Ian Lance Taylor via Gcc-cvs
https://gcc.gnu.org/g:f8687bceaa8ef9cd3c48b6706e8620af3ec5e2eb

commit r15-4486-gf8687bceaa8ef9cd3c48b6706e8620af3ec5e2eb
Author: Ian Lance Taylor 
Date:   Fri Oct 18 13:02:21 2024 -0700

libbacktrace: don't get confused by overlapping address ranges

Fixes https://github.com/ianlancetaylor/libbacktrace/issues/137.

* dwarf.c (resolve_unit_addrs_overlap_walk): New static function.
(resolve_unit_addrs_overlap): New static function.
(build_dwarf_data): Call resolve_unit_addrs_overlap.

Diff:
---
 libbacktrace/dwarf.c | 214 +++
 1 file changed, 199 insertions(+), 15 deletions(-)

diff --git a/libbacktrace/dwarf.c b/libbacktrace/dwarf.c
index 96ffc4cc481b..cc5cad703339 100644
--- a/libbacktrace/dwarf.c
+++ b/libbacktrace/dwarf.c
@@ -1276,6 +1276,194 @@ unit_addrs_search (const void *vkey, const void *ventry)
 return 0;
 }
 
+/* Fill in overlapping ranges as needed.  This is a subroutine of
+   resolve_unit_addrs_overlap.  */
+
+static int
+resolve_unit_addrs_overlap_walk (struct backtrace_state *state,
+size_t *pfrom, size_t *pto,
+struct unit_addrs *enclosing,
+struct unit_addrs_vector *old_vec,
+backtrace_error_callback error_callback,
+void *data,
+struct unit_addrs_vector *new_vec)
+{
+  struct unit_addrs *old_addrs;
+  size_t old_count;
+  struct unit_addrs *new_addrs;
+  size_t from;
+  size_t to;
+
+  old_addrs = (struct unit_addrs *) old_vec->vec.base;
+  old_count = old_vec->count;
+  new_addrs = (struct unit_addrs *) new_vec->vec.base;
+
+  for (from = *pfrom, to = *pto; from < old_count; from++, to++)
+{
+  /* If we are in the scope of a larger range that can no longer
+cover any further ranges, return back to the caller.  */
+
+  if (enclosing != NULL
+ && enclosing->high <= old_addrs[from].low)
+   {
+ *pfrom = from;
+ *pto = to;
+ return 1;
+   }
+
+  new_addrs[to] = old_addrs[from];
+
+  /* If we are in scope of a larger range, fill in any gaps
+between this entry and the next one.
+
+There is an extra entry at the end of the vector, so it's
+always OK to refer to from + 1.  */
+
+  if (enclosing != NULL
+ && enclosing->high > old_addrs[from].high
+ && old_addrs[from].high < old_addrs[from + 1].low)
+   {
+ void *grew;
+ size_t new_high;
+
+ grew = backtrace_vector_grow (state, sizeof (struct unit_addrs),
+   error_callback, data, &new_vec->vec);
+ if (grew == NULL)
+   return 0;
+ new_addrs = (struct unit_addrs *) new_vec->vec.base;
+ to++;
+ new_addrs[to].low = old_addrs[from].high;
+ new_high = old_addrs[from + 1].low;
+ if (enclosing->high < new_high)
+   new_high = enclosing->high;
+ new_addrs[to].high = new_high;
+ new_addrs[to].u = enclosing->u;
+   }
+
+  /* If this range has a larger scope than the next one, use it to
+fill in any gaps.  */
+
+  if (old_addrs[from].high > old_addrs[from + 1].high)
+   {
+ *pfrom = from + 1;
+ *pto = to + 1;
+ if (!resolve_unit_addrs_overlap_walk (state, pfrom, pto,
+   &old_addrs[from], old_vec,
+   error_callback, data, new_vec))
+   return 0;
+ from = *pfrom;
+ to = *pto;
+
+ /* Undo the increment the loop is about to do.  */
+ from--;
+ to--;
+   }
+}
+
+  if (enclosing == NULL)
+{
+  struct unit_addrs *pa;
+
+  /* Add trailing entry.  */
+
+  pa = ((struct unit_addrs *)
+   backtrace_vector_grow (state, sizeof (struct unit_addrs),
+  error_callback, data, &new_vec->vec));
+  if (pa == NULL)
+   return 0;
+  pa->low = 0;
+  --pa->low;
+  pa->high = pa->low;
+  pa->u = NULL;
+
+  new_vec->count = to;
+}
+
+  return 1;
+}
+
+/* It is possible for the unit_addrs list to contain overlaps, as in
+
+   10: low == 10, high == 20, unit 1
+   11: low == 12, high == 15, unit 2
+   12: low == 20, high == 30, unit 1
+
+   In such a case, for pc == 17, a search using units_addr_search will
+   return entry 11.  However, pc == 17 doesn't fit in that range.  We
+   actually want range 10.
+
+   It seems that in general we might have an arbitrary number of
+   ranges in between 10 and 12.
+
+   To handle this we look for cases where range R1 is followed by
+   range R2 such that R2 is a strict subset of R1.  In such cases we
+   insert a new range R3 following R2 that fills in the remainder of
+   the address space covered by R1.  That le

[gcc r15-4472] libstdc++: Move std::__niter_base and std::__niter_wrap to stl_iterator.h

2024-10-18 Thread Jonathan Wakely via Gcc-cvs
https://gcc.gnu.org/g:2608fcfe5fcff260d62379d8f74efb9be8df70f4

commit r15-4472-g2608fcfe5fcff260d62379d8f74efb9be8df70f4
Author: Jonathan Wakely 
Date:   Wed Oct 9 12:55:54 2024 +0100

libstdc++: Move std::__niter_base and std::__niter_wrap to stl_iterator.h

Move the functions for unwrapping and rewrapping __normal_iterator
objects to the same file as the definition of __normal_iterator itself.

This will allow a later commit to make use of std::__niter_base in other
headers without having to include all of .

libstdc++-v3/ChangeLog:

* include/bits/stl_algobase.h (__niter_base, __niter_wrap): Move
to ...
* include/bits/stl_iterator.h: ... here.
(__niter_base, __miter_base): Move all overloads to the end of
the header.
* testsuite/24_iterators/normal_iterator/wrapping.cc: New test.

Reviewed-by: Patrick Palka 

Diff:
---
 libstdc++-v3/include/bits/stl_algobase.h   |  45 ---
 libstdc++-v3/include/bits/stl_iterator.h   | 138 +++--
 .../24_iterators/normal_iterator/wrapping.cc   |  29 +
 3 files changed, 132 insertions(+), 80 deletions(-)

diff --git a/libstdc++-v3/include/bits/stl_algobase.h 
b/libstdc++-v3/include/bits/stl_algobase.h
index 384e5fdcdc99..751b7ad119b0 100644
--- a/libstdc++-v3/include/bits/stl_algobase.h
+++ b/libstdc++-v3/include/bits/stl_algobase.h
@@ -308,51 +308,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   return __a;
 }
 
-  // Fallback implementation of the function in bits/stl_iterator.h used to
-  // remove the __normal_iterator wrapper. See copy, fill, ...
-  template
-_GLIBCXX20_CONSTEXPR
-inline _Iterator
-__niter_base(_Iterator __it)
-_GLIBCXX_NOEXCEPT_IF(std::is_nothrow_copy_constructible<_Iterator>::value)
-{ return __it; }
-
-#if __cplusplus < 201103L
-  template
-_Ite
-__niter_base(const ::__gnu_debug::_Safe_iterator<_Ite, _Seq,
-std::random_access_iterator_tag>&);
-
- template
-_Ite
-__niter_base(const ::__gnu_debug::_Safe_iterator<
-::__gnu_cxx::__normal_iterator<_Ite, _Cont>, _Seq,
-std::random_access_iterator_tag>&);
-#else
-  template
-_GLIBCXX20_CONSTEXPR
-decltype(std::__niter_base(std::declval<_Ite>()))
-__niter_base(const ::__gnu_debug::_Safe_iterator<_Ite, _Seq,
-std::random_access_iterator_tag>&)
-noexcept(std::is_nothrow_copy_constructible<_Ite>::value);
-#endif
-
-  // Reverse the __niter_base transformation to get a
-  // __normal_iterator back again (this assumes that __normal_iterator
-  // is only used to wrap random access iterators, like pointers).
-  template
-_GLIBCXX20_CONSTEXPR
-inline _From
-__niter_wrap(_From __from, _To __res)
-{ return __from + (std::__niter_base(__res) - std::__niter_base(__from)); }
-
-  // No need to wrap, iterator already has the right type.
-  template
-_GLIBCXX20_CONSTEXPR
-inline _Iterator
-__niter_wrap(const _Iterator&, _Iterator __res)
-{ return __res; }
-
   // All of these auxiliary structs serve two purposes.  (1) Replace
   // calls to copy with memmove whenever possible.  (Memmove, not memcpy,
   // because the input and output ranges are permitted to overlap.)
diff --git a/libstdc++-v3/include/bits/stl_iterator.h 
b/libstdc++-v3/include/bits/stl_iterator.h
index 28a600c81cb2..be3fa6f7a349 100644
--- a/libstdc++-v3/include/bits/stl_iterator.h
+++ b/libstdc++-v3/include/bits/stl_iterator.h
@@ -654,24 +654,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 #  endif // C++20
 # endif // __glibcxx_make_reverse_iterator
 
-  template
-_GLIBCXX20_CONSTEXPR
-auto
-__niter_base(reverse_iterator<_Iterator> __it)
--> decltype(__make_reverse_iterator(__niter_base(__it.base(
-{ return __make_reverse_iterator(__niter_base(__it.base())); }
-
   template
 struct __is_move_iterator >
   : __is_move_iterator<_Iterator>
 { };
-
-  template
-_GLIBCXX20_CONSTEXPR
-auto
-__miter_base(reverse_iterator<_Iterator> __it)
--> decltype(__make_reverse_iterator(__miter_base(__it.base(
-{ return __make_reverse_iterator(__miter_base(__it.base())); }
 #endif // C++11
 
   // 24.4.2.2.1 back_insert_iterator
@@ -1336,19 +1322,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 { return __normal_iterator<_Iterator, _Container>(__i.base() + __n); }
 
 _GLIBCXX_END_NAMESPACE_VERSION
-} // namespace
+} // namespace __gnu_cxx
 
 namespace std _GLIBCXX_VISIBILITY(default)
 {
 _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
-  template
-_GLIBCXX20_CONSTEXPR
-_Iterator
-__niter_base(__gnu_cxx::__normal_iterator<_Iterator, _Container> __it)
-_GLIBCXX_NOEXCEPT_IF(std::is_nothrow_copy_constructible<_Iterator>::value)
-{ return __it.base(); }
-
 #if __cplusplus >= 201103L && __cplusplus <= 201703L
   // Need to overload __to_address because the pointer_traits primary template
   // will deduce element_type

[gcc r15-4473] libstdc++: Refactor std::uninitialized_{copy, fill, fill_n} algos [PR68350]

2024-10-18 Thread Jonathan Wakely via Gcc-cvs
https://gcc.gnu.org/g:3abe751ea86e3472fa2c97bf2014f9f93f569019

commit r15-4473-g3abe751ea86e3472fa2c97bf2014f9f93f569019
Author: Jonathan Wakely 
Date:   Wed Oct 9 12:55:54 2024 +0100

libstdc++: Refactor std::uninitialized_{copy,fill,fill_n} algos [PR68350]

This refactors the std::uninitialized_copy, std::uninitialized_fill and
std::uninitialized_fill_n algorithms to directly perform memcpy/memset
optimizations instead of dispatching to std::copy/std::fill/std::fill_n.

The reasons for this are:

- Use 'if constexpr' to simplify and optimize compilation throughput, so
  dispatching to specialized class templates is only needed for C++98
  mode.
- Use memcpy instead of memmove, because the conditions on
  non-overlapping ranges are stronger for std::uninitialized_copy than
  for std::copy. Using memcpy might be a minor optimization.
- No special case for creating a range of one element, which std::copy
  needs to deal with (see PR libstdc++/108846). The uninitialized algos
  create new objects, which reuses storage and is allowed to clobber
  tail padding.
- Relax the conditions for using memcpy/memset, because the C++20 rules
  on implicit-lifetime types mean that we can rely on memcpy to begin
  lifetimes of trivially copyable types.  We don't need to require
  trivially default constructible, so don't need to limit the
  optimization to trivial types. See PR 68350 for more details.
- Remove the dependency on std::copy and std::fill. This should mean
  that stl_uninitialized.h no longer needs to include all of
  stl_algobase.h.  This isn't quite true yet, because we still use
  std::fill in __uninitialized_default and still use std::fill_n in
  __uninitialized_default_n. That will be fixed later.

Several tests need changes to the diagnostics matched by dg-error
because we no longer use the __constructible() function that had a
static assert in. Now we just get straightforward errors for attempting
to use a deleted constructor.

Two tests needed more signficant changes to the actual expected results
of executing the tests, because they were checking for old behaviour
which was incorrect according to the standard.
20_util/specialized_algorithms/uninitialized_copy/64476.cc was expecting
std::copy to be used for a call to std::uninitialized_copy involving two
trivially copyable types. That was incorrect behaviour, because a
non-trivial constructor should have been used, but using std::copy used
trivial default initialization followed by assignment.
20_util/specialized_algorithms/uninitialized_fill_n/sizes.cc was testing
the behaviour with a non-integral Size passed to uninitialized_fill_n,
but I wrote the test looking at the requirements of uninitialized_copy_n
which are not the same as uninitialized_fill_n. The former uses --n and
tests n > 0, but the latter just tests n-- (which will never be false
for a floating-point value with a fractional part).

libstdc++-v3/ChangeLog:

PR libstdc++/68350
PR libstdc++/93059
* include/bits/stl_uninitialized.h (__check_constructible)
(_GLIBCXX_USE_ASSIGN_FOR_INIT): Remove.
[C++98] (__unwrappable_niter): New trait.
(__uninitialized_copy): Replace use of std::copy.
(uninitialized_copy): Fix Doxygen comments. Open-code memcpy
optimization for C++11 and later.
(__uninitialized_fill): Replace use of std::fill.
(uninitialized_fill): Fix Doxygen comments. Open-code memset
optimization for C++11 and later.
(__uninitialized_fill_n): Replace use of std::fill_n.
(uninitialized_fill_n): Fix Doxygen comments. Open-code memset
optimization for C++11 and later.
* 
testsuite/20_util/specialized_algorithms/uninitialized_copy/64476.cc:
Adjust expected behaviour to match what the standard specifies.
* 
testsuite/20_util/specialized_algorithms/uninitialized_fill_n/sizes.cc:
Likewise.
* testsuite/20_util/specialized_algorithms/uninitialized_copy/1.cc:
Adjust dg-error directives.
* 
testsuite/20_util/specialized_algorithms/uninitialized_copy/89164.cc:
Likewise.
* 
testsuite/20_util/specialized_algorithms/uninitialized_copy_n/89164.cc:
Likewise.
* 
testsuite/20_util/specialized_algorithms/uninitialized_fill/89164.cc:
Likewise.
* 
testsuite/20_util/specialized_algorithms/uninitialized_fill_n/89164.cc:
Likewise.
* testsuite/23_containers/vector/cons/89164.cc: Likewise.
* testsuite/23_containers/vector/cons/89164_c++17.cc: Likewise.

Reviewed-by: Patrick Palka 

Diff:
---
 libstdc++-v3/include/bits/stl_uninitialized.h  | 379 ++

[gcc r15-4474] libstdc++: Make __normal_iterator constexpr, always_inline, nodiscard

2024-10-18 Thread Jonathan Wakely via Libstdc++-cvs
https://gcc.gnu.org/g:4020ee77186d1544e8565ae5786af99b8e56543a

commit r15-4474-g4020ee77186d1544e8565ae5786af99b8e56543a
Author: Jonathan Wakely 
Date:   Thu Oct 17 21:02:03 2024 +0100

libstdc++: Make __normal_iterator constexpr, always_inline, nodiscard

The __gnu_cxx::__normal_iterator type we use for std::vector::iterator
is not specified by the standard, it's an implementation detail. This
means it's not constrained by the rule that forbids strengthening
constexpr. We can make it meet the constexpr iterator requirements for
older standards, not only when it's required to be for C++20.

For the non-const member functions they can't be constexpr in C++11, so
use _GLIBCXX14_CONSTEXPR for those. For all constructors, const members
and non-member operator overloads, use _GLIBCXX_CONSTEXPR or just
constexpr.

We can also liberally add [[nodiscard]] and [[gnu::always_inline]]
attributes to those functions.

Also change some internal helpers for std::move_iterator which can be
unconditionally constexpr and marked nodiscard.

libstdc++-v3/ChangeLog:

* include/bits/stl_iterator.h (__normal_iterator): Make all
members and overloaded operators constexpr before C++20, and add
always_inline attribute
(__to_address): Add nodiscard and always_inline attributes.
(__make_move_if_noexcept_iterator): Add nodiscard
and make unconditionally constexpr.
(__niter_base(__normal_iterator), __niter_base(Iter)):
Add nodiscard and always_inline attributes.
(__niter_base(reverse_iterator), __niter_base(move_iterator))
(__miter_base): Add inline.
(__niter_wrap(From, To)): Add nodiscard attribute.
(__niter_wrap(const Iter&, Iter)): Add nodiscard and
always_inline attributes.

Reviewed-by: Patrick Palka 

Diff:
---
 libstdc++-v3/include/bits/stl_iterator.h | 116 +++
 1 file changed, 71 insertions(+), 45 deletions(-)

diff --git a/libstdc++-v3/include/bits/stl_iterator.h 
b/libstdc++-v3/include/bits/stl_iterator.h
index be3fa6f7a349..26c5eab4b4e8 100644
--- a/libstdc++-v3/include/bits/stl_iterator.h
+++ b/libstdc++-v3/include/bits/stl_iterator.h
@@ -1046,24 +1046,29 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   using iterator_concept = std::__detail::__iter_concept<_Iterator>;
 #endif
 
-  _GLIBCXX_CONSTEXPR __normal_iterator() _GLIBCXX_NOEXCEPT
-  : _M_current(_Iterator()) { }
+  __attribute__((__always_inline__))
+  _GLIBCXX_CONSTEXPR
+  __normal_iterator() _GLIBCXX_NOEXCEPT
+  : _M_current() { }
 
-  explicit _GLIBCXX20_CONSTEXPR
+  __attribute__((__always_inline__))
+  explicit _GLIBCXX_CONSTEXPR
   __normal_iterator(const _Iterator& __i) _GLIBCXX_NOEXCEPT
   : _M_current(__i) { }
 
   // Allow iterator to const_iterator conversion
 #if __cplusplus >= 201103L
   template>
-   _GLIBCXX20_CONSTEXPR
+   [[__gnu__::__always_inline__]]
+   constexpr
__normal_iterator(const __normal_iterator<_Iter, _Container>& __i)
noexcept
 #else
   // N.B. _Container::pointer is not actually in container requirements,
   // but is present in std::vector and std::basic_string.
   template
-__normal_iterator(const __normal_iterator<_Iter,
+   __attribute__((__always_inline__))
+   __normal_iterator(const __normal_iterator<_Iter,
  typename __enable_if<
   (std::__are_same<_Iter, typename _Container::pointer>::__value),
  _Container>::__type>& __i)
@@ -1071,17 +1076,21 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 : _M_current(__i.base()) { }
 
   // Forward iterator requirements
-  _GLIBCXX20_CONSTEXPR
+
+  __attribute__((__always_inline__)) _GLIBCXX_NODISCARD
+  _GLIBCXX_CONSTEXPR
   reference
   operator*() const _GLIBCXX_NOEXCEPT
   { return *_M_current; }
 
-  _GLIBCXX20_CONSTEXPR
+  __attribute__((__always_inline__)) _GLIBCXX_NODISCARD
+  _GLIBCXX_CONSTEXPR
   pointer
   operator->() const _GLIBCXX_NOEXCEPT
   { return _M_current; }
 
-  _GLIBCXX20_CONSTEXPR
+  __attribute__((__always_inline__))
+  _GLIBCXX14_CONSTEXPR
   __normal_iterator&
   operator++() _GLIBCXX_NOEXCEPT
   {
@@ -1089,13 +1098,16 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
return *this;
   }
 
-  _GLIBCXX20_CONSTEXPR
+  __attribute__((__always_inline__))
+  _GLIBCXX14_CONSTEXPR
   __normal_iterator
   operator++(int) _GLIBCXX_NOEXCEPT
   { return __normal_iterator(_M_current++); }
 
   // Bidirectional iterator requirements
-  _GLIBCXX20_CONSTEXPR
+
+  __attribute__((__always_inline__))
+  _GLIBCXX14_CONSTEXPR
   __normal_iterator&
   operator--() _GLIBCXX_NOEXCEPT
   {
@@ -1103,38 +1115,46 @@ _GLIBCXX_BEGIN_NAMESPACE_V

[gcc r15-4477] libstdc++: Add always_inline to some one-liners in

2024-10-18 Thread Jonathan Wakely via Libstdc++-cvs
https://gcc.gnu.org/g:6ecf2b380da6129cd32edccdbc322dfd2884d490

commit r15-4477-g6ecf2b380da6129cd32edccdbc322dfd2884d490
Author: Jonathan Wakely 
Date:   Fri Oct 11 14:49:21 2024 +0100

libstdc++: Add always_inline to some one-liners in 

We implement std::copy, std::fill etc. as a series of calls to other
overloads which incrementally peel off layers of iterator wrappers. This
adds a high abstraction penalty for -O0 and potentially even -O1. Add
the always_inline attribute to several functions that are just a single
return statement (and maybe a static_assert, or some concept-checking
assertions which are disabled by default).

libstdc++-v3/ChangeLog:

* include/bits/stl_algobase.h (__copy_move_a1, __copy_move_a)
(__copy_move_backward_a1, __copy_move_backward_a, move_backward)
(__fill_a1, __fill_a, fill, __fill_n_a, fill_n, __equal_aux):
Add always_inline attribute to one-line forwarding functions.

Reviewed-by: Patrick Palka 

Diff:
---
 libstdc++-v3/include/bits/stl_algobase.h | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/libstdc++-v3/include/bits/stl_algobase.h 
b/libstdc++-v3/include/bits/stl_algobase.h
index 049ba4f96c4c..9ecd0b216c1a 100644
--- a/libstdc++-v3/include/bits/stl_algobase.h
+++ b/libstdc++-v3/include/bits/stl_algobase.h
@@ -498,12 +498,14 @@ _GLIBCXX_END_NAMESPACE_CONTAINER
 __copy_move_a1(_II, _II, _GLIBCXX_STD_C::_Deque_iterator<_Tp, _Tp&, _Tp*>);
 
   template
+__attribute__((__always_inline__))
 _GLIBCXX20_CONSTEXPR
 inline _OI
 __copy_move_a1(_II __first, _II __last, _OI __result)
 { return std::__copy_move_a2<_IsMove>(__first, __last, __result); }
 
   template
+__attribute__((__always_inline__))
 _GLIBCXX20_CONSTEXPR
 inline _OI
 __copy_move_a(_II __first, _II __last, _OI __result)
@@ -755,6 +757,7 @@ _GLIBCXX_END_NAMESPACE_CONTAINER
 #undef _GLIBCXX_ADVANCE
 
   template
+__attribute__((__always_inline__))
 _GLIBCXX20_CONSTEXPR
 inline _BI2
 __copy_move_backward_a1(_BI1 __first, _BI1 __last, _BI2 __result)
@@ -783,6 +786,7 @@ _GLIBCXX_END_NAMESPACE_CONTAINER
_GLIBCXX_STD_C::_Deque_iterator<_Tp, _Tp&, _Tp*>);
 
   template
+__attribute__((__always_inline__))
 _GLIBCXX20_CONSTEXPR
 inline _OI
 __copy_move_backward_a(_II __first, _II __last, _OI __result)
@@ -838,6 +842,7 @@ _GLIBCXX_END_NAMESPACE_CONTAINER
*  that the start of the output range may overlap [first,last).
   */
   template
+__attribute__((__always_inline__))
 _GLIBCXX20_CONSTEXPR
 inline _BI2
 copy_backward(_BI1 __first, _BI1 __last, _BI2 __result)
@@ -873,6 +878,7 @@ _GLIBCXX_END_NAMESPACE_CONTAINER
*  that the start of the output range may overlap [first,last).
   */
   template
+__attribute__((__always_inline__))
 _GLIBCXX20_CONSTEXPR
 inline _BI2
 move_backward(_BI1 __first, _BI1 __last, _BI2 __result)
@@ -956,6 +962,7 @@ _GLIBCXX_END_NAMESPACE_CONTAINER
 }
 
   template
+__attribute__((__always_inline__))
 _GLIBCXX20_CONSTEXPR
 inline void
 __fill_a1(::__gnu_cxx::__normal_iterator<_Ite, _Cont> __first,
@@ -975,6 +982,7 @@ _GLIBCXX_END_NAMESPACE_CONTAINER
const bool&);
 
   template
+__attribute__((__always_inline__))
 _GLIBCXX20_CONSTEXPR
 inline void
 __fill_a(_FIte __first, _FIte __last, const _Tp& __value)
@@ -1000,6 +1008,7 @@ _GLIBCXX_END_NAMESPACE_CONTAINER
*  to @c memset or @c wmemset.
   */
   template
+__attribute__((__always_inline__))
 _GLIBCXX20_CONSTEXPR
 inline void
 fill(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value)
@@ -1106,6 +1115,7 @@ _GLIBCXX_END_NAMESPACE_CONTAINER
   std::input_iterator_tag);
 
   template
+__attribute__((__always_inline__))
 _GLIBCXX20_CONSTEXPR
 inline _OutputIterator
 __fill_n_a(_OutputIterator __first, _Size __n, const _Tp& __value,
@@ -1118,6 +1128,7 @@ _GLIBCXX_END_NAMESPACE_CONTAINER
 }
 
   template
+__attribute__((__always_inline__))
 _GLIBCXX20_CONSTEXPR
 inline _OutputIterator
 __fill_n_a(_OutputIterator __first, _Size __n, const _Tp& __value,
@@ -1130,6 +1141,7 @@ _GLIBCXX_END_NAMESPACE_CONTAINER
 }
 
   template
+__attribute__((__always_inline__))
 _GLIBCXX20_CONSTEXPR
 inline _OutputIterator
 __fill_n_a(_OutputIterator __first, _Size __n, const _Tp& __value,
@@ -1165,6 +1177,7 @@ _GLIBCXX_END_NAMESPACE_CONTAINER
   // DR 865. More algorithms that throw away information
   // DR 426. search_n(), fill_n(), and generate_n() with negative n
   template
+__attribute__((__always_inline__))
 _GLIBCXX20_CONSTEXPR
 inline _OI
 fill_n(_OI __first, _Size __n, const _Tp& __value)
@@ -1244,6 +1257,7 @@ _GLIBCXX_END_NAMESPACE_CONTAINER
 }
 
   template
+__attribute__((__always_inline__))
 _GLIBCXX20_CONSTEXPR
 inline bool

[gcc r15-4475] libstdc++: Inline memmove optimizations for std::copy etc. [PR115444]

2024-10-18 Thread Jonathan Wakely via Gcc-cvs
https://gcc.gnu.org/g:7ed561f63e7955df4d194669998176df5ef47803

commit r15-4475-g7ed561f63e7955df4d194669998176df5ef47803
Author: Jonathan Wakely 
Date:   Thu Jun 27 13:01:18 2024 +0100

libstdc++: Inline memmove optimizations for std::copy etc. [PR115444]

This removes all the __copy_move class template specializations that
decide how to optimize std::copy and std::copy_n. We can inline those
optimizations into the algorithms, using if-constexpr (and macros for
C++98 compatibility) and remove the code dispatching to the various
class template specializations.

Doing this means we implement the optimization directly for std::copy_n
instead of deferring to std::copy, That avoids the unwanted consequence
of advancing the iterator in copy_n only to take the difference later to
get back to the length that we already had in copy_n originally (as
described in PR 115444).

With the new flattened implementations, we can also lower contiguous
iterators to pointers in std::copy/std::copy_n/std::copy_backwards, so
that they benefit from the same memmove optimizations as pointers.
There's a subtlety though: contiguous iterators can potentially throw
exceptions to exit the algorithm early.  So we can only transform the
loop to memmove if dereferencing the iterator is noexcept. We don't
check that incrementing the iterator is noexcept because we advance the
contiguous iterators before using memmove, so that if incrementing would
throw, that happens first. I am writing a proposal (P3349R0) which would
make this unnecessary, so I hope we can drop the nothrow requirements
later.

This change also solves PR 114817 by checking is_trivially_assignable
before optimizing copy/copy_n etc. to memmove. It's not enough to check
that the types are trivially copyable (a precondition for using memmove
at all), we also need to check that the specific assignment that would
be performed by the algorithm is also trivial. Replacing a non-trivial
assignment with memmove would be observable, so not allowed.

libstdc++-v3/ChangeLog:

PR libstdc++/115444
PR libstdc++/114817
* include/bits/stl_algo.h (__copy_n): Remove generic overload
and overload for random access iterators.
(copy_n): Inline generic version of __copy_n here. Do not defer
to std::copy for random access iterators.
* include/bits/stl_algobase.h (__copy_move): Remove.
(__nothrow_contiguous_iterator, __memcpyable_iterators): New
concepts.
(__assign_one, _GLIBCXX_TO_ADDR, _GLIBCXX_ADVANCE): New helpers.
(__copy_move_a2): Inline __copy_move logic and conditional
memmove optimization into the most generic overload.
(__copy_n_a): Likewise.
(__copy_move_backward): Remove.
(__copy_move_backward_a2): Inline __copy_move_backward logic and
memmove optimization into the most generic overload.
* 
testsuite/20_util/specialized_algorithms/uninitialized_copy/114817.cc:
New test.
* 
testsuite/20_util/specialized_algorithms/uninitialized_copy_n/114817.cc:
New test.
* testsuite/25_algorithms/copy/114817.cc: New test.
* testsuite/25_algorithms/copy/115444.cc: New test.
* testsuite/25_algorithms/copy_n/114817.cc: New test.

Reviewed-by: Patrick Palka 

Diff:
---
 libstdc++-v3/include/bits/stl_algo.h   |  24 +-
 libstdc++-v3/include/bits/stl_algobase.h   | 412 +++--
 .../uninitialized_copy/114817.cc   |  39 ++
 .../uninitialized_copy_n/114817.cc |  39 ++
 .../testsuite/25_algorithms/copy/114817.cc |  38 ++
 .../testsuite/25_algorithms/copy/115444.cc |  93 +
 .../testsuite/25_algorithms/copy_n/114817.cc   |  38 ++
 7 files changed, 461 insertions(+), 222 deletions(-)

diff --git a/libstdc++-v3/include/bits/stl_algo.h 
b/libstdc++-v3/include/bits/stl_algo.h
index a1ef665506d1..489ce7e14d24 100644
--- a/libstdc++-v3/include/bits/stl_algo.h
+++ b/libstdc++-v3/include/bits/stl_algo.h
@@ -665,25 +665,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   return __result;
 }
 
-  template
-_GLIBCXX20_CONSTEXPR
-_OutputIterator
-__copy_n(_InputIterator __first, _Size __n,
-_OutputIterator __result, input_iterator_tag)
-{
-  return std::__niter_wrap(__result,
-  __copy_n_a(__first, __n,
- std::__niter_base(__result), true));
-}
-
-  template
-_GLIBCXX20_CONSTEXPR
-inline _OutputIterator
-__copy_n(_RandomAccessIterator __first, _Size __n,
-_OutputIterator __result, random_access_iterator_tag)
-{ return std::copy(__first, __first + __n, __result); }
-
   /**
*  @brief Copies th

[gcc r15-4476] libstdc++: Add nodiscard to std::find

2024-10-18 Thread Jonathan Wakely via Gcc-cvs
https://gcc.gnu.org/g:5546be4c24cd1085c8e43b5635be56a9b591c626

commit r15-4476-g5546be4c24cd1085c8e43b5635be56a9b591c626
Author: Jonathan Wakely 
Date:   Thu Oct 17 21:18:14 2024 +0100

libstdc++: Add nodiscard to std::find

I missed this one out in r14-9478-gdf483ebd24689a but I don't think that
was intentional. I see no reason std::find shouldn't be [[nodiscard]].

libstdc++-v3/ChangeLog:

* include/bits/stl_algo.h (find): Add nodiscard.

Reviewed-by: Patrick Palka 

Diff:
---
 libstdc++-v3/include/bits/stl_algo.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libstdc++-v3/include/bits/stl_algo.h 
b/libstdc++-v3/include/bits/stl_algo.h
index 489ce7e14d24..780bd8e5e826 100644
--- a/libstdc++-v3/include/bits/stl_algo.h
+++ b/libstdc++-v3/include/bits/stl_algo.h
@@ -3820,7 +3820,7 @@ _GLIBCXX_BEGIN_NAMESPACE_ALGO
*  such that @c *i == @p __val, or @p __last if no such iterator exists.
   */
   template
-_GLIBCXX20_CONSTEXPR
+_GLIBCXX_NODISCARD _GLIBCXX20_CONSTEXPR
 inline _InputIterator
 find(_InputIterator __first, _InputIterator __last, const _Tp& __val)
 {


[gcc r15-4478] libstdc++: Reuse std::__assign_one in

2024-10-18 Thread Jonathan Wakely via Gcc-cvs
https://gcc.gnu.org/g:d0a9ae1321f01c33b7ee377249cad30187061c0c

commit r15-4478-gd0a9ae1321f01c33b7ee377249cad30187061c0c
Author: Jonathan Wakely 
Date:   Mon Oct 14 23:34:20 2024 +0100

libstdc++: Reuse std::__assign_one in 

Use std::__assign_one instead of ranges::__assign_one. Adjust the uses,
because std::__assign_one has the arguments in the opposite order (the
same order as an assignment expression).

libstdc++-v3/ChangeLog:

* include/bits/ranges_algobase.h (ranges::__assign_one): Remove.
(__copy_or_move, __copy_or_move_backward): Use std::__assign_one
instead of ranges::__assign_one.

Reviewed-by: Patrick Palka 

Diff:
---
 libstdc++-v3/include/bits/ranges_algobase.h | 22 ++
 1 file changed, 6 insertions(+), 16 deletions(-)

diff --git a/libstdc++-v3/include/bits/ranges_algobase.h 
b/libstdc++-v3/include/bits/ranges_algobase.h
index 0345ea850a4e..df4e770e7a65 100644
--- a/libstdc++-v3/include/bits/ranges_algobase.h
+++ b/libstdc++-v3/include/bits/ranges_algobase.h
@@ -225,16 +225,6 @@ namespace ranges
  copy_backward_result<_Iter, _Out>>
 __copy_or_move_backward(_Iter __first, _Sent __last, _Out __result);
 
-  template
-constexpr void
-__assign_one(_Iter& __iter, _Out& __result)
-{
-  if constexpr (_IsMove)
- *__result = std::move(*__iter);
-  else
- *__result = *__iter;
-}
-
   template _Sent,
   weakly_incrementable _Out>
@@ -294,14 +284,14 @@ namespace ranges
__builtin_memmove(__result, __first,
  sizeof(_ValueTypeI) * __num);
  else if (__num == 1)
-   ranges::__assign_one<_IsMove>(__first, __result);
+   std::__assign_one<_IsMove>(__result, __first);
  return {__first + __num, __result + __num};
}
}
 
  for (auto __n = __last - __first; __n > 0; --__n)
{
- ranges::__assign_one<_IsMove>(__first, __result);
+ std::__assign_one<_IsMove>(__result, __first);
  ++__first;
  ++__result;
}
@@ -311,7 +301,7 @@ namespace ranges
{
  while (__first != __last)
{
- ranges::__assign_one<_IsMove>(__first, __result);
+ std::__assign_one<_IsMove>(__result, __first);
  ++__first;
  ++__result;
}
@@ -423,7 +413,7 @@ namespace ranges
__builtin_memmove(__result, __first,
  sizeof(_ValueTypeI) * __num);
  else if (__num == 1)
-   ranges::__assign_one<_IsMove>(__first, __result);
+   std::__assign_one<_IsMove>(__result, __first);
  return {__first + __num, __result};
}
}
@@ -435,7 +425,7 @@ namespace ranges
{
  --__tail;
  --__result;
- ranges::__assign_one<_IsMove>(__tail, __result);
+ std::__assign_one<_IsMove>(__result, __tail);
}
  return {std::move(__lasti), std::move(__result)};
}
@@ -448,7 +438,7 @@ namespace ranges
{
  --__tail;
  --__result;
- ranges::__assign_one<_IsMove>(__tail, __result);
+ std::__assign_one<_IsMove>(__result, __tail);
}
  return {std::move(__lasti), std::move(__result)};
}


[gcc r15-4481] [PATCH 2/7] RISC-V: Fix uninitialized reg in memcpy

2024-10-18 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:212d8685e4590c9f1168f503a383e3ea2639b418

commit r15-4481-g212d8685e4590c9f1168f503a383e3ea2639b418
Author: Craig Blackmore 
Date:   Fri Oct 18 09:06:58 2024 -0600

[PATCH 2/7] RISC-V: Fix uninitialized reg in memcpy

gcc/ChangeLog:

* config/riscv/riscv-string.cc (expand_block_move): Replace
`end` with `length_rtx` in gen_rtx_NE.

Diff:
---
 gcc/config/riscv/riscv-string.cc | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/gcc/config/riscv/riscv-string.cc b/gcc/config/riscv/riscv-string.cc
index 0c5ffd7d861e..0f1353baba3b 100644
--- a/gcc/config/riscv/riscv-string.cc
+++ b/gcc/config/riscv/riscv-string.cc
@@ -1078,7 +1078,6 @@ expand_block_move (rtx dst_in, rtx src_in, rtx length_in)
   bool need_loop = true;
   bool size_p = optimize_function_for_size_p (cfun);
   rtx src, dst;
-  rtx end = gen_reg_rtx (Pmode);
   rtx vec;
   rtx length_rtx = length_in;
 
@@ -1245,7 +1244,7 @@ expand_block_move (rtx dst_in, rtx src_in, rtx length_in)
   emit_insn (gen_rtx_SET (length_rtx, gen_rtx_MINUS (Pmode, length_rtx, 
cnt)));
 
   /* Emit the loop condition.  */
-  rtx test = gen_rtx_NE (VOIDmode, end, const0_rtx);
+  rtx test = gen_rtx_NE (VOIDmode, length_rtx, const0_rtx);
   emit_jump_insn (gen_cbranch4 (Pmode, test, length_rtx, const0_rtx, 
label));
   emit_insn (gen_nop ());
 }


[gcc r15-4482] [PATCH 3/7] RISC-V: Fix vector memcpy smaller LMUL generation

2024-10-18 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:b039d06c9a810a3fab4c5eb9d50b0c7aff94b2d8

commit r15-4482-gb039d06c9a810a3fab4c5eb9d50b0c7aff94b2d8
Author: Craig Blackmore 
Date:   Fri Oct 18 09:17:21 2024 -0600

[PATCH 3/7] RISC-V: Fix vector memcpy smaller LMUL generation

If riscv_vector::expand_block_move is generating a straight-line memcpy
using a predicated store, it tries to use a smaller LMUL to reduce
register pressure if it still allows an entire transfer.

This happens in the inner loop of riscv_vector::expand_block_move,
however, the vmode chosen by this loop gets overwritten later in the
function, so I have added the missing break from the outer loop.

I have also addressed a couple of issues with the conditions of the if
statement within the inner loop.

The first condition did not make sense to me:
```
  TARGET_MIN_VLEN * lmul <= nunits * BITS_PER_UNIT
```
I think this was supposed to be checking that the length fits within the
given LMUL, so I have changed it to do that.

The second condition:
```
  /* Avoid loosing the option of using vsetivli .  */
  && (nunits <= 31 * lmul || nunits > 31 * 8)
```
seems to imply that lmul affects the range of AVL immediate that
vsetivli can take but I don't think that is correct.  Anyway, I don't
think this condition is necessary because if we find a suitable mode we
should stick with it, regardless of whether it allowed vsetivli, rather
than continuing to try larger lmul which would increase register
pressure or smaller potential_ew which would increase AVL.  I have
removed this condition.

gcc/ChangeLog:

* config/riscv/riscv-string.cc (expand_block_move): Fix
condition for using smaller LMUL.  Break outer loop if a
suitable vmode has been found.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/vsetvl/pr112929-1.c: Expect smaller lmul.
* gcc.target/riscv/rvv/vsetvl/pr112988-1.c: Likewise.
* gcc.target/riscv/rvv/base/cpymem-3.c: New test.

Diff:
---
 gcc/config/riscv/riscv-string.cc   |  8 +-
 gcc/testsuite/gcc.target/riscv/rvv/base/cpymem-3.c | 85 ++
 .../gcc.target/riscv/rvv/vsetvl/pr112929-1.c   |  2 +-
 .../gcc.target/riscv/rvv/vsetvl/pr112988-1.c   |  2 +-
 4 files changed, 92 insertions(+), 5 deletions(-)

diff --git a/gcc/config/riscv/riscv-string.cc b/gcc/config/riscv/riscv-string.cc
index 0f1353baba3b..b590c5163543 100644
--- a/gcc/config/riscv/riscv-string.cc
+++ b/gcc/config/riscv/riscv-string.cc
@@ -1153,9 +1153,7 @@ expand_block_move (rtx dst_in, rtx src_in, rtx length_in)
 Still, by choosing a lower LMUL factor that still allows
 an entire transfer, we can reduce register pressure.  */
  for (unsigned lmul = 1; lmul <= 4; lmul <<= 1)
-   if (TARGET_MIN_VLEN * lmul <= nunits * BITS_PER_UNIT
-   /* Avoid loosing the option of using vsetivli .  */
-   && (nunits <= 31 * lmul || nunits > 31 * 8)
+   if (length * BITS_PER_UNIT <= TARGET_MIN_VLEN * lmul
&& multiple_p (BYTES_PER_RISCV_VECTOR * lmul, potential_ew)
&& (riscv_vector::get_vector_mode
 (elem_mode, exact_div (BYTES_PER_RISCV_VECTOR * lmul,
@@ -1163,6 +1161,10 @@ expand_block_move (rtx dst_in, rtx src_in, rtx length_in)
  break;
}
 
+ /* Stop searching if a suitable vmode has been found.  */
+ if (vmode != VOIDmode)
+   break;
+
  /* The RVVM8?I modes are notionally 8 * BYTES_PER_RISCV_VECTOR bytes
 wide.  BYTES_PER_RISCV_VECTOR can't be evenly divided by
 the sizes of larger element types; the LMUL factor of 8 can at
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/cpymem-3.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/cpymem-3.c
new file mode 100644
index ..f07078ba6a7c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/cpymem-3.c
@@ -0,0 +1,85 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O1 -fno-schedule-insns -fno-schedule-insns2 
-mrvv-max-lmul=m8" } */
+/* { dg-add-options riscv_v } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#define MIN_VECTOR_BYTES (__riscv_v_min_vlen / 8)
+
+/* Check that vector memcpy with predicated store uses smaller LMUL where
+   possible.
+
+/* m1
+** f1:
+**  (
+**  vsetivli\s+zero,\d+,e8,m1,ta,ma
+**  |
+**  li\s+[ta][0-7],\d+
+**  vsetvli\s+zero,[ta][0-7],e8,m1,ta,ma
+**  )
+**  vle8.v\s+v\d+,0\(a1\)
+**  vse8.v\s+v\d+,0\(a0\)
+**  ret
+*/
+
+void f1 (char *d, char *s)
+{
+  __builtin_memcpy (d, s, MIN_VECTOR_BYTES - 1);
+}
+
+/* m2
+** f2:
+**  (
+**  vsetivli\s+zero,\d+,e8,m2,ta,ma
+**  |
+**  li\s+[ta][0-7],\d+
+**  vsetvli\s+zero,[ta][0-7],e8,m2,ta,ma
+**  )
+**  vle8.v\s+v\d+,0\(a1\)
+**  vse8.v\s+v\d+,0\(a0\)
+**

[gcc(refs/users/meissner/heads/work181-sha)] Move xxeval case before alternative that needs a temporary register.

2024-10-18 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:c6af3094b52b550a54df39b5a5f4ca0ee26d1158

commit c6af3094b52b550a54df39b5a5f4ca0ee26d1158
Author: Michael Meissner 
Date:   Fri Oct 18 16:44:22 2024 -0400

Move xxeval case before alternative that needs a temporary register.

2024-10-18  Michael Meissner  

gcc/

* config/rs6000/genfusion.pl (gen_logical_addsubf): Move xxeval case
before alternative that needs a temporary register.
* config/rs6000/fusion.md: Regenerate.

Diff:
---
 gcc/config/rs6000/fusion.md| 792 -
 gcc/config/rs6000/genfusion.pl |  14 +-
 2 files changed, 403 insertions(+), 403 deletions(-)

diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md
index 6b5830908a51..215a3aae074f 100644
--- a/gcc/config/rs6000/fusion.md
+++ b/gcc/config/rs6000/fusion.md
@@ -1871,170 +1871,170 @@
 ;; logical-logical fusion pattern generated by gen_logical_addsubf
 ;; vector vand -> vand
 (define_insn "*fuse_vand_vand"
-  [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,v,wa")
-(and:VM (and:VM (match_operand:VM 0 "vector_fusion_operand" 
"v,v,v,v,wa")
-  (match_operand:VM 1 "vector_fusion_operand" 
"%v,v,v,v,wa"))
- (match_operand:VM 2 "vector_fusion_operand" "v,v,v,v,wa")))
-   (clobber (match_scratch:VM 4 "=X,X,X,&v,X"))]
+  [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v")
+(and:VM (and:VM (match_operand:VM 0 "vector_fusion_operand" 
"v,v,v,wa,v")
+  (match_operand:VM 1 "vector_fusion_operand" 
"%v,v,v,wa,v"))
+ (match_operand:VM 2 "vector_fusion_operand" "v,v,v,wa,v")))
+   (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))]
   "(TARGET_P10_FUSION)"
   "@
vand %3,%1,%0\;vand %3,%3,%2
vand %3,%1,%0\;vand %3,%3,%2
vand %3,%1,%0\;vand %3,%3,%2
-   vand %4,%1,%0\;vand %3,%4,%2
-   xxeval %x3,%x2,%x1,%x0,1"
+   xxeval %x3,%x2,%x1,%x0,1
+   vand %4,%1,%0\;vand %3,%4,%2"
   [(set_attr "type" "fused_vector")
(set_attr "cost" "6")
(set_attr "length" "8")
-   (set_attr "prefixed" "*,*,*,*,yes")
-   (set_attr "isa" "*,*,*,*,xxeval")])
+   (set_attr "prefixed" "*,*,*,yes,*")
+   (set_attr "isa" "*,*,*,xxeval,*")])
 
 ;; logical-logical fusion pattern generated by gen_logical_addsubf
 ;; vector vandc -> vand
 (define_insn "*fuse_vandc_vand"
-  [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,v,wa")
-(and:VM (and:VM (not:VM (match_operand:VM 0 "vector_fusion_operand" 
"v,v,v,v,wa"))
-  (match_operand:VM 1 "vector_fusion_operand" 
"v,v,v,v,wa"))
- (match_operand:VM 2 "vector_fusion_operand" "v,v,v,v,wa")))
-   (clobber (match_scratch:VM 4 "=X,X,X,&v,X"))]
+  [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v")
+(and:VM (and:VM (not:VM (match_operand:VM 0 "vector_fusion_operand" 
"v,v,v,wa,v"))
+  (match_operand:VM 1 "vector_fusion_operand" 
"v,v,v,wa,v"))
+ (match_operand:VM 2 "vector_fusion_operand" "v,v,v,wa,v")))
+   (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))]
   "(TARGET_P10_FUSION)"
   "@
vandc %3,%1,%0\;vand %3,%3,%2
vandc %3,%1,%0\;vand %3,%3,%2
vandc %3,%1,%0\;vand %3,%3,%2
-   vandc %4,%1,%0\;vand %3,%4,%2
-   xxeval %x3,%x2,%x1,%x0,2"
+   xxeval %x3,%x2,%x1,%x0,2
+   vandc %4,%1,%0\;vand %3,%4,%2"
   [(set_attr "type" "fused_vector")
(set_attr "cost" "6")
(set_attr "length" "8")
-   (set_attr "prefixed" "*,*,*,*,yes")
-   (set_attr "isa" "*,*,*,*,xxeval")])
+   (set_attr "prefixed" "*,*,*,yes,*")
+   (set_attr "isa" "*,*,*,xxeval,*")])
 
 ;; logical-logical fusion pattern generated by gen_logical_addsubf
 ;; vector veqv -> vand
 (define_insn "*fuse_veqv_vand"
-  [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,v,wa")
-(and:VM (not:VM (xor:VM (match_operand:VM 0 "vector_fusion_operand" 
"v,v,v,v,wa")
-  (match_operand:VM 1 "vector_fusion_operand" 
"v,v,v,v,wa")))
- (match_operand:VM 2 "vector_fusion_operand" "v,v,v,v,wa")))
-   (clobber (match_scratch:VM 4 "=X,X,X,&v,X"))]
+  [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v")
+(and:VM (not:VM (xor:VM (match_operand:VM 0 "vector_fusion_operand" 
"v,v,v,wa,v")
+  (match_operand:VM 1 "vector_fusion_operand" 
"v,v,v,wa,v")))
+ (match_operand:VM 2 "vector_fusion_operand" "v,v,v,wa,v")))
+   (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))]
   "(TARGET_P10_FUSION)"
   "@
veqv %3,%1,%0\;vand %3,%3,%2
veqv %3,%1,%0\;vand %3,%3,%2
veqv %3,%1,%0\;vand %3,%3,%2
-   veqv %4,%1,%0\;vand %3,%4,%2
-   xxeval %x3,%x2,%x1,%x0,9"
+   xxeval %x3,%x2,%x1,%x0,9
+   veqv %4,%1,%0\;vand %3,%4,%2"
   [(set_attr "type" "fused_vector")
(set_attr "cost" "6")
(set_attr "length" "8")
-   (set_attr "prefixed" "*,*,*,*,yes")
-   (set_attr "isa" "*,*,*,*,xxeval")])
+   (set_attr "pref

[gcc r15-4421] testsuite: arm: Corrected expected error message for cde-mve-error-1.c

2024-10-18 Thread Torbjorn Svensson via Gcc-cvs
https://gcc.gnu.org/g:a449b4245556e7540e916e9ec475e8275c0b8484

commit r15-4421-ga449b4245556e7540e916e9ec475e8275c0b8484
Author: Torbjörn SVENSSON 
Date:   Fri Oct 18 07:55:22 2024 +0200

testsuite: arm: Corrected expected error message for cde-mve-error-1.c

gcc/testsuite/ChangeLog:

* gcc.target/arm/acle/cde-mve-error-1.c: Corrected quotation in
expected error message.

Signed-off-by: Torbjörn SVENSSON 

Diff:
---
 .../gcc.target/arm/acle/cde-mve-error-1.c  | 56 +++---
 1 file changed, 28 insertions(+), 28 deletions(-)

diff --git a/gcc/testsuite/gcc.target/arm/acle/cde-mve-error-1.c 
b/gcc/testsuite/gcc.target/arm/acle/cde-mve-error-1.c
index 611bdb9dd0e1..2c34a2fd2ec8 100644
--- a/gcc/testsuite/gcc.target/arm/acle/cde-mve-error-1.c
+++ b/gcc/testsuite/gcc.target/arm/acle/cde-mve-error-1.c
@@ -8,35 +8,35 @@
to the intrinsic user-facing functions.  */
 uint8x16_t test_invalid_arguments (uint8x16_t n, uint8x16_t m)
 {
-  uint8x16_t accum = __arm_vcx1q_u8 (0, 33, 1);   /* { dg-error {macro 
"__arm_vcx1q_u8" passed 3 arguments, but takes just 2} } */
-  accum += __arm_vcx1qa (0, accum, 33, 1);/* { dg-error {macro 
"__arm_vcx1qa" passed 4 arguments, but takes just 3} } */
-  accum += __arm_vcx2q_u8 (0, n, 33, 1);  /* { dg-error {macro 
"__arm_vcx2q_u8" passed 4 arguments, but takes just 3} } */
-  accum += __arm_vcx2q (0, n, 33, 1); /* { dg-error {macro 
"__arm_vcx2q" passed 4 arguments, but takes just 3} } */
-  accum += __arm_vcx2qa (0, accum, n, 33, 1); /* { dg-error {macro 
"__arm_vcx2qa" passed 5 arguments, but takes just 4} } */
-  accum += __arm_vcx3q_u8 (0, n, m, 33, 1);   /* { dg-error {macro 
"__arm_vcx3q_u8" passed 5 arguments, but takes just 4} } */
-  accum += __arm_vcx3q (0, n, m, 33, 1);  /* { dg-error {macro 
"__arm_vcx3q" passed 5 arguments, but takes just 4} } */
-  accum += __arm_vcx3qa (0, accum, n, m, 33, 1);  /* { dg-error {macro 
"__arm_vcx3qa" passed 6 arguments, but takes just 5} } */
-  accum += __arm_vcx1q_u8 (0);/* { dg-error {macro 
"__arm_vcx1q_u8" requires 2 arguments, but only 1 given} } */
-  accum += __arm_vcx1qa (0, accum);   /* { dg-error {macro 
"__arm_vcx1qa" requires 3 arguments, but only 2 given} } */
-  accum += __arm_vcx2q_u8 (0, n); /* { dg-error {macro 
"__arm_vcx2q_u8" requires 3 arguments, but only 2 given} } */
-  accum += __arm_vcx2q (0, n);/* { dg-error {macro 
"__arm_vcx2q" requires 3 arguments, but only 2 given} } */
-  accum += __arm_vcx2qa (0, accum, n);/* { dg-error {macro 
"__arm_vcx2qa" requires 4 arguments, but only 3 given} } */
-  accum += __arm_vcx3q_u8 (0, n, m);  /* { dg-error {macro 
"__arm_vcx3q_u8" requires 4 arguments, but only 3 given} } */
-  accum += __arm_vcx3q (0, n, m); /* { dg-error {macro 
"__arm_vcx3q" requires 4 arguments, but only 3 given} } */
-  accum += __arm_vcx3qa (0, accum, n, m); /* { dg-error {macro 
"__arm_vcx3qa" requires 5 arguments, but only 4 given} } */
+  uint8x16_t accum = __arm_vcx1q_u8 (0, 33, 1);   /* { dg-error {macro 
'__arm_vcx1q_u8' passed 3 arguments, but takes just 2} } */
+  accum += __arm_vcx1qa (0, accum, 33, 1);/* { dg-error {macro 
'__arm_vcx1qa' passed 4 arguments, but takes just 3} } */
+  accum += __arm_vcx2q_u8 (0, n, 33, 1);  /* { dg-error {macro 
'__arm_vcx2q_u8' passed 4 arguments, but takes just 3} } */
+  accum += __arm_vcx2q (0, n, 33, 1); /* { dg-error {macro 
'__arm_vcx2q' passed 4 arguments, but takes just 3} } */
+  accum += __arm_vcx2qa (0, accum, n, 33, 1); /* { dg-error {macro 
'__arm_vcx2qa' passed 5 arguments, but takes just 4} } */
+  accum += __arm_vcx3q_u8 (0, n, m, 33, 1);   /* { dg-error {macro 
'__arm_vcx3q_u8' passed 5 arguments, but takes just 4} } */
+  accum += __arm_vcx3q (0, n, m, 33, 1);  /* { dg-error {macro 
'__arm_vcx3q' passed 5 arguments, but takes just 4} } */
+  accum += __arm_vcx3qa (0, accum, n, m, 33, 1);  /* { dg-error {macro 
'__arm_vcx3qa' passed 6 arguments, but takes just 5} } */
+  accum += __arm_vcx1q_u8 (0);/* { dg-error {macro 
'__arm_vcx1q_u8' requires 2 arguments, but only 1 given} } */
+  accum += __arm_vcx1qa (0, accum);   /* { dg-error {macro 
'__arm_vcx1qa' requires 3 arguments, but only 2 given} } */
+  accum += __arm_vcx2q_u8 (0, n); /* { dg-error {macro 
'__arm_vcx2q_u8' requires 3 arguments, but only 2 given} } */
+  accum += __arm_vcx2q (0, n);/* { dg-error {macro 
'__arm_vcx2q' requires 3 arguments, but only 2 given} } */
+  accum += __arm_vcx2qa (0, accum, n);/* { dg-error {macro 
'__arm_vcx2qa' requires 4 arguments, but only 3 given} } */
+  accum += __arm_vcx3q_u8 (0, n, m);  /* { dg-error {macro 
'__arm_vcx3q_u8' requires 4 arguments, but only 3 given} } */
+  accum += __

[gcc r15-4422] Relax boolean processing in vect_maybe_update_slp_op_vectype

2024-10-18 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:59afd44ffb1bd55fc84b4a36107a9bdba708e9a0

commit r15-4422-g59afd44ffb1bd55fc84b4a36107a9bdba708e9a0
Author: Richard Biener 
Date:   Thu Oct 17 14:27:13 2024 +0200

Relax boolean processing in vect_maybe_update_slp_op_vectype

The following makes VECTOR_BOOLEAN_TYPE_P processing consistent with
what we do without SLP.  The original motivation for rejecting of
VECTOR_BOOLEAN_TYPE_P extern defs was bad code generation.  But
the non-SLP codepath happily goes along - but always hits the
case of an uniform vector and this case specifically we can now
code-generate optimally.  So the following allows single-lane
externs as well.

Requiring patterns to code-generate can have bad influence on
the vectorization factor though a prototype patch of mine shows
that generating vector compares externally isn't always trivial.

The patch fixes the gcc.dg/vect/vect-early-break_82.c FAIL on x86_64
when --param vect-force-slp=1 is in effect.

PR tree-optimization/117171
* tree-vect-stmts.cc (vect_maybe_update_slp_op_vectype):
Relax vect_external_def VECTOR_BOOLEAN_TYPE_P constraint.

Diff:
---
 gcc/tree-vect-stmts.cc | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 6967d50288e9..e7f14c3144c3 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -14290,9 +14290,12 @@ vect_maybe_update_slp_op_vectype (slp_tree op, tree 
vectype)
   if (SLP_TREE_VECTYPE (op))
 return types_compatible_p (SLP_TREE_VECTYPE (op), vectype);
   /* For external defs refuse to produce VECTOR_BOOLEAN_TYPE_P, those
- should be handled by patters.  Allow vect_constant_def for now.  */
+ should be handled by patters.  Allow vect_constant_def for now
+ as well as the trivial single-lane uniform vect_external_def case
+ both of which we code-generate reasonably.  */
   if (VECTOR_BOOLEAN_TYPE_P (vectype)
-  && SLP_TREE_DEF_TYPE (op) == vect_external_def)
+  && SLP_TREE_DEF_TYPE (op) == vect_external_def
+  && SLP_TREE_LANES (op) > 1)
 return false;
   SLP_TREE_VECTYPE (op) = vectype;
   return true;


[gcc r15-4423] arm: [MVE intrinsics] improve comment for orrq shape

2024-10-18 Thread Christophe Lyon via Gcc-cvs
https://gcc.gnu.org/g:ad176d828d118d3ec8b146d6d3b20cd7d5ce8967

commit r15-4423-gad176d828d118d3ec8b146d6d3b20cd7d5ce8967
Author: Christophe Lyon 
Date:   Tue Jul 9 12:27:54 2024 +

arm: [MVE intrinsics] improve comment for orrq shape

Add a comment about the lack of "n" forms for floating-point nor 8-bit
integers, to make it clearer why we use build_16_32 for MODE_n.

2024-07-11  Christophe Lyon  

gcc/
* config/arm/arm-mve-builtins-shapes.cc (binary_orrq_def): Improve 
comment.

Diff:
---
 gcc/config/arm/arm-mve-builtins-shapes.cc | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/gcc/config/arm/arm-mve-builtins-shapes.cc 
b/gcc/config/arm/arm-mve-builtins-shapes.cc
index ba20c6a8f734..e01939469e3a 100644
--- a/gcc/config/arm/arm-mve-builtins-shapes.cc
+++ b/gcc/config/arm/arm-mve-builtins-shapes.cc
@@ -865,7 +865,12 @@ SHAPE (binary_opt_n)
int16x8_t [__arm_]vorrq_m[_s16](int16x8_t inactive, int16x8_t a, int16x8_t 
b, mve_pred16_t p)
int16x8_t [__arm_]vorrq_x[_s16](int16x8_t a, int16x8_t b, mve_pred16_t p)
int16x8_t [__arm_]vorrq[_n_s16](int16x8_t a, const int16_t imm)
-   int16x8_t [__arm_]vorrq_m_n[_s16](int16x8_t a, const int16_t imm, 
mve_pred16_t p)  */
+   int16x8_t [__arm_]vorrq_m_n[_s16](int16x8_t a, const int16_t imm, 
mve_pred16_t p)
+
+   No "_n" forms for floating-point, nor 8-bit integers:
+   float16x8_t [__arm_]vorrq[_f16](float16x8_t a, float16x8_t b)
+   float16x8_t [__arm_]vorrq_m[_f16](float16x8_t inactive, float16x8_t a, 
float16x8_t b, mve_pred16_t p)
+   float16x8_t [__arm_]vorrq_x[_f16](float16x8_t a, float16x8_t b, 
mve_pred16_t p)  */
 struct binary_orrq_def : public overloaded_base<0>
 {
   bool


[gcc r15-4424] arm: [MVE intrinsics] remove useless resolve from create shape

2024-10-18 Thread Christophe Lyon via Gcc-cvs
https://gcc.gnu.org/g:0a38e5fffca31dfff4aea64ddbfd750910252cde

commit r15-4424-g0a38e5fffca31dfff4aea64ddbfd750910252cde
Author: Christophe Lyon 
Date:   Mon Jun 10 14:42:51 2024 +

arm: [MVE intrinsics] remove useless resolve from create shape

vcreateq have no overloaded forms, so there's no need for resolve ().

2024-07-11  Christophe Lyon  

gcc/
* config/arm/arm-mve-builtins-shapes.cc (create_def::resolve):
Delete function.

Diff:
---
 gcc/config/arm/arm-mve-builtins-shapes.cc | 6 --
 1 file changed, 6 deletions(-)

diff --git a/gcc/config/arm/arm-mve-builtins-shapes.cc 
b/gcc/config/arm/arm-mve-builtins-shapes.cc
index e01939469e3a..0520a8331db0 100644
--- a/gcc/config/arm/arm-mve-builtins-shapes.cc
+++ b/gcc/config/arm/arm-mve-builtins-shapes.cc
@@ -1408,12 +1408,6 @@ struct create_def : public nonoverloaded_base
   {
 build_all (b, "v0,su64,su64", group, MODE_none, preserve_user_namespace);
   }
-
-  tree
-  resolve (function_resolver &r) const override
-  {
-return r.resolve_uniform (0, 2);
-  }
 };
 SHAPE (create)


[gcc r15-4425] arm: [MVE intrinsics] Cleanup arm-mve-builtins-functions.h

2024-10-18 Thread Christophe Lyon via Gcc-cvs
https://gcc.gnu.org/g:4fc0c6c6186cad8c7648a1453ea299114c347348

commit r15-4425-g4fc0c6c6186cad8c7648a1453ea299114c347348
Author: Christophe Lyon 
Date:   Wed Jul 10 21:25:07 2024 +

arm: [MVE intrinsics] Cleanup arm-mve-builtins-functions.h

This patch brings no functional change but removes some code
duplication in arm-mve-builtins-functions.h and makes it easier to
read and maintain.

It introduces a new expand_unspec () member of
unspec_based_mve_function_base and makes a few classes inherit from it
instead of function_base.

This adds 3 new members containing the unspec codes for signed-int,
unsigned-int and floating-point intrinsics (no mode, no predicate).
Depending on the derived class, these will be used instead of the 3
similar RTX codes.

The new expand_unspec () handles all the possible unspecs, some of
which maybe not be supported by a given intrinsics family: such code
paths won't be used in that case.  Similarly, codes specific to a
family (RTX, or PRED_p for instance) should be handled by the caller
of expand_unspec ().

Thanks to this, expand () for unspec_based_mve_function_exact_insn,
unspec_mve_function_exact_insn, unspec_mve_function_exact_insn_pred_p,
unspec_mve_function_exact_insn_vshl no longer duplicate a lot of code.

The patch also makes most of PRED_m and PRED_x handling use the same
code, and uses conditional operators when computing which RTX
code/unspec to use when calling code_for_mve_q_XXX.

2024-07-11  Christophe Lyon  

gcc/
* config/arm/arm-mve-builtins-functions.h
(unspec_based_mve_function_base): Add m_unspec_for_sint,
m_unspec_for_uint, m_unspec_for_fp and expand_unspec members.
(unspec_based_mve_function_exact_insn): Inherit from
unspec_based_mve_function_base and use expand_unspec.
(unspec_mve_function_exact_insn): Likewise.
(unspec_mve_function_exact_insn_pred_p): Likewise.  Use
conditionals.
(unspec_mve_function_exact_insn_vshl): Likewise.
(unspec_based_mve_function_exact_insn_vcmp): Initialize new
inherited members.  Use conditionals.
(unspec_mve_function_exact_insn_rot): Merge PRED_m and PRED_x
handling.  Use conditionals.
(unspec_mve_function_exact_insn_vmull): Likewise.
(unspec_mve_function_exact_insn_vmull_poly): Likewise.

Diff:
---
 gcc/config/arm/arm-mve-builtins-functions.h | 726 +++-
 1 file changed, 286 insertions(+), 440 deletions(-)

diff --git a/gcc/config/arm/arm-mve-builtins-functions.h 
b/gcc/config/arm/arm-mve-builtins-functions.h
index ac2a731bff46..35cb5242b771 100644
--- a/gcc/config/arm/arm-mve-builtins-functions.h
+++ b/gcc/config/arm/arm-mve-builtins-functions.h
@@ -40,17 +40,23 @@ public:
 };
 
 /* An incomplete function_base for functions that have an associated
-   rtx_code for signed integers, unsigned integers and floating-point
-   values for the non-predicated, non-suffixed intrinsic, and unspec
-   codes, with separate codes for signed integers, unsigned integers
-   and floating-point values.  The class simply records information
-   about the mapping for derived classes to use.  */
+   rtx_code or an unspec for signed integers, unsigned integers and
+   floating-point values for the non-predicated, non-suffixed
+   intrinsics, and unspec codes, with separate codes for signed
+   integers, unsigned integers and floating-point values for
+   predicated and/or suffixed intrinsics.  The class simply records
+   information about the mapping for derived classes to use and
+   provides a generic expand_unspec () to avoid duplicating expansion
+   code in derived classes.  */
 class unspec_based_mve_function_base : public function_base
 {
 public:
   CONSTEXPR unspec_based_mve_function_base (rtx_code code_for_sint,
rtx_code code_for_uint,
rtx_code code_for_fp,
+   int unspec_for_sint,
+   int unspec_for_uint,
+   int unspec_for_fp,
int unspec_for_n_sint,
int unspec_for_n_uint,
int unspec_for_n_fp,
@@ -63,6 +69,9 @@ public:
 : m_code_for_sint (code_for_sint),
   m_code_for_uint (code_for_uint),
   m_code_for_fp (code_for_fp),
+  m_unspec_for_sint (unspec_for_sint),
+  m_unspec_for_uint (unspec_for_uint),
+  m_unspec_for_fp (unspec_for_fp),
   m_unspec_for_n_sint (unspec_for_n_sint),
   m_unspec_for_n_uint (unspec_for_n_uint),
   m_unspec_for_n_fp (unspec_for_n_fp),
@@ -83,6 +92,9 @@ public:
   /* The unspec code associated with sign

[gcc r15-4431] arm: [MVE intrinsics] rework vcvtbq_f16_f32 vcvttq_f16_f32 vcvtbq_f32_f16 vcvttq_f32_f16

2024-10-18 Thread Christophe Lyon via Gcc-cvs
https://gcc.gnu.org/g:43d8286399d4096f52a39016b5ba3a403063e547

commit r15-4431-g43d8286399d4096f52a39016b5ba3a403063e547
Author: Christophe Lyon 
Date:   Mon Jun 10 07:50:35 2024 +

arm: [MVE intrinsics] rework vcvtbq_f16_f32 vcvttq_f16_f32 vcvtbq_f32_f16 
vcvttq_f32_f16

Implement vcvtbq_f16_f32, vcvttq_f16_f32, vcvtbq_f32_f16 and
vcvttq_f32_f16 using the new MVE builtins framework.

2024-07-11 Christophe Lyon  

gcc/
* config/arm/arm-mve-builtins-base.cc (class vcvtxq_impl): New.
(vcvtbq, vcvttq): New.
* config/arm/arm-mve-builtins-base.def (vcvtbq, vcvttq): New.
* config/arm/arm-mve-builtins-base.h (vcvtbq, vcvttq): New.
* config/arm/arm-mve-builtins.cc (cvt_f16_f32, cvt_f32_f16): New
types.
(function_instance::has_inactive_argument): Support vcvtbq and
vcvttq.
* config/arm/arm_mve.h (vcvttq_f32): Delete.
(vcvtbq_f32): Delete.
(vcvtbq_m): Delete.
(vcvttq_m): Delete.
(vcvttq_f32_f16): Delete.
(vcvtbq_f32_f16): Delete.
(vcvttq_f16_f32): Delete.
(vcvtbq_f16_f32): Delete.
(vcvtbq_m_f16_f32): Delete.
(vcvtbq_m_f32_f16): Delete.
(vcvttq_m_f16_f32): Delete.
(vcvttq_m_f32_f16): Delete.
(vcvtbq_x_f32_f16): Delete.
(vcvttq_x_f32_f16): Delete.
(__arm_vcvttq_f32_f16): Delete.
(__arm_vcvtbq_f32_f16): Delete.
(__arm_vcvttq_f16_f32): Delete.
(__arm_vcvtbq_f16_f32): Delete.
(__arm_vcvtbq_m_f16_f32): Delete.
(__arm_vcvtbq_m_f32_f16): Delete.
(__arm_vcvttq_m_f16_f32): Delete.
(__arm_vcvttq_m_f32_f16): Delete.
(__arm_vcvtbq_x_f32_f16): Delete.
(__arm_vcvttq_x_f32_f16): Delete.
(__arm_vcvttq_f32): Delete.
(__arm_vcvtbq_f32): Delete.
(__arm_vcvtbq_m): Delete.
(__arm_vcvttq_m): Delete.

Diff:
---
 gcc/config/arm/arm-mve-builtins-base.cc  |  56 
 gcc/config/arm/arm-mve-builtins-base.def |   4 +
 gcc/config/arm/arm-mve-builtins-base.h   |   2 +
 gcc/config/arm/arm-mve-builtins.cc   |  12 +++
 gcc/config/arm/arm_mve.h | 146 ---
 5 files changed, 74 insertions(+), 146 deletions(-)

diff --git a/gcc/config/arm/arm-mve-builtins-base.cc 
b/gcc/config/arm/arm-mve-builtins-base.cc
index 64ed78ace943..1646b609c98f 100644
--- a/gcc/config/arm/arm-mve-builtins-base.cc
+++ b/gcc/config/arm/arm-mve-builtins-base.cc
@@ -304,6 +304,60 @@ public:
   }
 };
 
+  /* Implements vcvt[bt]q_f32_f16 and vcvt[bt]q_f16_f32
+ intrinsics.  */
+class vcvtxq_impl : public function_base
+{
+public:
+  CONSTEXPR vcvtxq_impl (int unspec_f16_f32, int unspec_for_m_f16_f32,
+int unspec_f32_f16, int unspec_for_m_f32_f16)
+: m_unspec_f16_f32 (unspec_f16_f32),
+  m_unspec_for_m_f16_f32 (unspec_for_m_f16_f32),
+  m_unspec_f32_f16 (unspec_f32_f16),
+  m_unspec_for_m_f32_f16 (unspec_for_m_f32_f16)
+  {}
+
+  /* The unspec code associated with vcvt[bt]q.  */
+  int m_unspec_f16_f32;
+  int m_unspec_for_m_f16_f32;
+  int m_unspec_f32_f16;
+  int m_unspec_for_m_f32_f16;
+
+  rtx
+  expand (function_expander &e) const override
+  {
+insn_code code;
+switch (e.pred)
+  {
+  case PRED_none:
+   /* No predicate.  */
+   if (e.type_suffix (0).element_bits == 16)
+ code = code_for_mve_q_f16_f32v8hf (m_unspec_f16_f32);
+   else
+ code = code_for_mve_q_f32_f16v4sf (m_unspec_f32_f16);
+   return e.use_exact_insn (code);
+
+  case PRED_m:
+  case PRED_x:
+   /* "m" or "x" predicate.  */
+   if (e.type_suffix (0).element_bits == 16)
+ code = code_for_mve_q_m_f16_f32v8hf (m_unspec_for_m_f16_f32);
+   else
+ code = code_for_mve_q_m_f32_f16v4sf (m_unspec_for_m_f32_f16);
+
+   if (e.pred == PRED_m)
+ return e.use_cond_insn (code, 0);
+   else
+ return e.use_pred_x_insn (code);
+
+  default:
+   gcc_unreachable ();
+  }
+
+gcc_unreachable ();
+  }
+};
+
 } /* end anonymous namespace */
 
 namespace arm_mve {
@@ -504,7 +558,9 @@ FUNCTION (vcmpltq, 
unspec_based_mve_function_exact_insn_vcmp, (LT, UNKNOWN, LT,
 FUNCTION (vcmpcsq, unspec_based_mve_function_exact_insn_vcmp, (UNKNOWN, GEU, 
UNKNOWN, UNKNOWN, VCMPCSQ_M_U, UNKNOWN, UNKNOWN, VCMPCSQ_M_N_U, UNKNOWN))
 FUNCTION (vcmphiq, unspec_based_mve_function_exact_insn_vcmp, (UNKNOWN, GTU, 
UNKNOWN, UNKNOWN, VCMPHIQ_M_U, UNKNOWN, UNKNOWN, VCMPHIQ_M_N_U, UNKNOWN))
 FUNCTION_WITHOUT_M_N (vcreateq, VCREATEQ)
+FUNCTION (vcvtbq, vcvtxq_impl, (VCVTBQ_F16_F32, VCVTBQ_M_F16_F32, 
VCVTBQ_F32_F16, VCVTBQ_M_F32_F16))
 FUNCTION (vcvtq, vcvtq_impl,)
+FUNCTION (vcvttq, vcvtxq_impl, (VCVTTQ_F16_F32, VCVTTQ_M_F16_F32, 
VCVTTQ_F32_F16, VCVTTQ_M_F32_F16))
 FUNCTION (v

[gcc r15-4427] arm: [MVE intrinsics] add vcvt shape

2024-10-18 Thread Christophe Lyon via Gcc-cvs
https://gcc.gnu.org/g:931ba2f11b9e5edf9b79cc924b99791e0696c818

commit r15-4427-g931ba2f11b9e5edf9b79cc924b99791e0696c818
Author: Christophe Lyon 
Date:   Thu Jun 6 17:50:39 2024 +

arm: [MVE intrinsics] add vcvt shape

This patch adds the vcvt shape description.

It needs to add a new type_suffix_info parameter to
explicit_type_suffix_p (), because vcvt uses overloads for type
suffixes for integer to floating-point conversions, but not for
floating-point to integer.

2024-07-11 Christophe Lyon  

gcc/
* config/arm/arm-mve-builtins-shapes.cc
(nonoverloaded_base::explicit_type_suffix_p): Add unused
type_suffix_info parameter.
(overloaded_base::explicit_type_suffix_p): Likewise.
(unary_n_def::explicit_type_suffix_p): Likewise.
(vcvt): New.
* config/arm/arm-mve-builtins-shapes.h (vcvt): New.
* config/arm/arm-mve-builtins.cc (function_builder::get_name): Add
new type_suffix parameter.
(function_builder::add_overloaded_functions): Likewise.
* config/arm/arm-mve-builtins.h
(function_shape::explicit_type_suffix_p): Likewise.

Diff:
---
 gcc/config/arm/arm-mve-builtins-shapes.cc | 108 +-
 gcc/config/arm/arm-mve-builtins-shapes.h  |   1 +
 gcc/config/arm/arm-mve-builtins.cc|   9 ++-
 gcc/config/arm/arm-mve-builtins.h |  10 ++-
 4 files changed, 119 insertions(+), 9 deletions(-)

diff --git a/gcc/config/arm/arm-mve-builtins-shapes.cc 
b/gcc/config/arm/arm-mve-builtins-shapes.cc
index 0520a8331db0..bc99a6a7c43d 100644
--- a/gcc/config/arm/arm-mve-builtins-shapes.cc
+++ b/gcc/config/arm/arm-mve-builtins-shapes.cc
@@ -330,7 +330,8 @@ build_16_32 (function_builder &b, const char *signature,
 struct nonoverloaded_base : public function_shape
 {
   bool
-  explicit_type_suffix_p (unsigned int, enum predication_index, enum 
mode_suffix_index) const override
+  explicit_type_suffix_p (unsigned int, enum predication_index,
+ enum mode_suffix_index, type_suffix_info) const 
override
   {
 return true;
   }
@@ -360,7 +361,8 @@ template
 struct overloaded_base : public function_shape
 {
   bool
-  explicit_type_suffix_p (unsigned int i, enum predication_index, enum 
mode_suffix_index) const override
+  explicit_type_suffix_p (unsigned int i, enum predication_index,
+ enum mode_suffix_index, type_suffix_info) const 
override
   {
 return (EXPLICIT_MASK >> i) & 1;
   }
@@ -1856,7 +1858,7 @@ struct unary_n_def : public overloaded_base<0>
 {
   bool
   explicit_type_suffix_p (unsigned int, enum predication_index pred,
- enum mode_suffix_index) const override
+ enum mode_suffix_index, type_suffix_info) const 
override
   {
 return pred != PRED_m;
   }
@@ -1979,6 +1981,106 @@ struct unary_widen_acc_def : public overloaded_base<0>
 };
 SHAPE (unary_widen_acc)
 
+/* _t foo_t0[_t1](_t)
+   _t foo_t0_n[_t1](_t, const int)
+
+   Example: vcvtq.
+   float32x4_t [__arm_]vcvtq[_f32_s32](int32x4_t a)
+   float32x4_t [__arm_]vcvtq_m[_f32_s32](float32x4_t inactive, int32x4_t a, 
mve_pred16_t p)
+   float32x4_t [__arm_]vcvtq_x[_f32_s32](int32x4_t a, mve_pred16_t p)
+   float32x4_t [__arm_]vcvtq_n[_f32_s32](int32x4_t a, const int imm6)
+   float32x4_t [__arm_]vcvtq_m_n[_f32_s32](float32x4_t inactive, int32x4_t a, 
const int imm6, mve_pred16_t p)
+   float32x4_t [__arm_]vcvtq_x_n[_f32_s32](int32x4_t a, const int imm6, 
mve_pred16_t p)
+   int32x4_t [__arm_]vcvtq_s32_f32(float32x4_t a)
+   int32x4_t [__arm_]vcvtq_m[_s32_f32](int32x4_t inactive, float32x4_t a, 
mve_pred16_t p)
+   int32x4_t [__arm_]vcvtq_x_s32_f32(float32x4_t a, mve_pred16_t p)
+   int32x4_t [__arm_]vcvtq_n_s32_f32(float32x4_t a, const int imm6)
+   int32x4_t [__arm_]vcvtq_m_n[_s32_f32](int32x4_t inactive, float32x4_t a, 
const int imm6, mve_pred16_t p)
+   int32x4_t [__arm_]vcvtq_x_n_s32_f32(float32x4_t a, const int imm6, 
mve_pred16_t p)  */
+struct vcvt_def : public overloaded_base<0>
+{
+  bool
+  explicit_type_suffix_p (unsigned int i, enum predication_index pred,
+ enum mode_suffix_index,
+ type_suffix_info type_info) const override
+  {
+if (pred != PRED_m
+   && ((i == 0 && type_info.integer_p)
+   || (i == 1 && type_info.float_p)))
+  return true;
+return false;
+  }
+
+  bool
+  explicit_mode_suffix_p (enum predication_index,
+ enum mode_suffix_index) const override
+  {
+return true;
+  }
+
+  void
+  build (function_builder &b, const function_group_info &group,
+bool preserve_user_namespace) const override
+  {
+b.add_overloaded_functions (group, MODE_none, preserve_user_namespace);
+b.add_overloaded_functions (group, MODE_n, preserve_user_namespace);
+build_all (b, "v0,v1", group, MODE_none, preserve_us

[gcc r15-4428] arm: [MVE intrinsics] rework vcvtq

2024-10-18 Thread Christophe Lyon via Gcc-cvs
https://gcc.gnu.org/g:b8963bb0671199cf2d42bda5aad62931094a1005

commit r15-4428-gb8963bb0671199cf2d42bda5aad62931094a1005
Author: Christophe Lyon 
Date:   Thu Jun 6 17:48:50 2024 +

arm: [MVE intrinsics] rework vcvtq

Implement vcvtq using the new MVE builtins framework.

In config/arm/arm-mve-builtins-base.def, the patch also restores the
alphabetical order.

2024-07-11  Christophe Lyon  

gcc/
* config/arm/arm-mve-builtins-base.cc (class vcvtq_impl): New.
(vcvtq): New.
* config/arm/arm-mve-builtins-base.def (vcvtq): New.
* config/arm/arm-mve-builtins-base.h (vcvtq): New.
* config/arm/arm-mve-builtins.cc (cvt): New type.
* config/arm/arm_mve.h (vcvtq): Delete.
(vcvtq_n): Delete.
(vcvtq_m): Delete.
(vcvtq_m_n): Delete.
(vcvtq_x): Delete.
(vcvtq_x_n): Delete.
(vcvtq_f16_s16): Delete.
(vcvtq_f32_s32): Delete.
(vcvtq_f16_u16): Delete.
(vcvtq_f32_u32): Delete.
(vcvtq_s16_f16): Delete.
(vcvtq_s32_f32): Delete.
(vcvtq_u16_f16): Delete.
(vcvtq_u32_f32): Delete.
(vcvtq_n_f16_s16): Delete.
(vcvtq_n_f32_s32): Delete.
(vcvtq_n_f16_u16): Delete.
(vcvtq_n_f32_u32): Delete.
(vcvtq_n_s16_f16): Delete.
(vcvtq_n_s32_f32): Delete.
(vcvtq_n_u16_f16): Delete.
(vcvtq_n_u32_f32): Delete.
(vcvtq_m_f16_s16): Delete.
(vcvtq_m_f16_u16): Delete.
(vcvtq_m_f32_s32): Delete.
(vcvtq_m_f32_u32): Delete.
(vcvtq_m_s16_f16): Delete.
(vcvtq_m_u16_f16): Delete.
(vcvtq_m_s32_f32): Delete.
(vcvtq_m_u32_f32): Delete.
(vcvtq_m_n_f16_u16): Delete.
(vcvtq_m_n_f16_s16): Delete.
(vcvtq_m_n_f32_u32): Delete.
(vcvtq_m_n_f32_s32): Delete.
(vcvtq_m_n_s32_f32): Delete.
(vcvtq_m_n_s16_f16): Delete.
(vcvtq_m_n_u32_f32): Delete.
(vcvtq_m_n_u16_f16): Delete.
(vcvtq_x_f16_u16): Delete.
(vcvtq_x_f16_s16): Delete.
(vcvtq_x_f32_s32): Delete.
(vcvtq_x_f32_u32): Delete.
(vcvtq_x_n_f16_s16): Delete.
(vcvtq_x_n_f16_u16): Delete.
(vcvtq_x_n_f32_s32): Delete.
(vcvtq_x_n_f32_u32): Delete.
(vcvtq_x_s16_f16): Delete.
(vcvtq_x_s32_f32): Delete.
(vcvtq_x_u16_f16): Delete.
(vcvtq_x_u32_f32): Delete.
(vcvtq_x_n_s16_f16): Delete.
(vcvtq_x_n_s32_f32): Delete.
(vcvtq_x_n_u16_f16): Delete.
(vcvtq_x_n_u32_f32): Delete.
(__arm_vcvtq_f16_s16): Delete.
(__arm_vcvtq_f32_s32): Delete.
(__arm_vcvtq_f16_u16): Delete.
(__arm_vcvtq_f32_u32): Delete.
(__arm_vcvtq_s16_f16): Delete.
(__arm_vcvtq_s32_f32): Delete.
(__arm_vcvtq_u16_f16): Delete.
(__arm_vcvtq_u32_f32): Delete.
(__arm_vcvtq_n_f16_s16): Delete.
(__arm_vcvtq_n_f32_s32): Delete.
(__arm_vcvtq_n_f16_u16): Delete.
(__arm_vcvtq_n_f32_u32): Delete.
(__arm_vcvtq_n_s16_f16): Delete.
(__arm_vcvtq_n_s32_f32): Delete.
(__arm_vcvtq_n_u16_f16): Delete.
(__arm_vcvtq_n_u32_f32): Delete.
(__arm_vcvtq_m_f16_s16): Delete.
(__arm_vcvtq_m_f16_u16): Delete.
(__arm_vcvtq_m_f32_s32): Delete.
(__arm_vcvtq_m_f32_u32): Delete.
(__arm_vcvtq_m_s16_f16): Delete.
(__arm_vcvtq_m_u16_f16): Delete.
(__arm_vcvtq_m_s32_f32): Delete.
(__arm_vcvtq_m_u32_f32): Delete.
(__arm_vcvtq_m_n_f16_u16): Delete.
(__arm_vcvtq_m_n_f16_s16): Delete.
(__arm_vcvtq_m_n_f32_u32): Delete.
(__arm_vcvtq_m_n_f32_s32): Delete.
(__arm_vcvtq_m_n_s32_f32): Delete.
(__arm_vcvtq_m_n_s16_f16): Delete.
(__arm_vcvtq_m_n_u32_f32): Delete.
(__arm_vcvtq_m_n_u16_f16): Delete.
(__arm_vcvtq_x_f16_u16): Delete.
(__arm_vcvtq_x_f16_s16): Delete.
(__arm_vcvtq_x_f32_s32): Delete.
(__arm_vcvtq_x_f32_u32): Delete.
(__arm_vcvtq_x_n_f16_s16): Delete.
(__arm_vcvtq_x_n_f16_u16): Delete.
(__arm_vcvtq_x_n_f32_s32): Delete.
(__arm_vcvtq_x_n_f32_u32): Delete.
(__arm_vcvtq_x_s16_f16): Delete.
(__arm_vcvtq_x_s32_f32): Delete.
(__arm_vcvtq_x_u16_f16): Delete.
(__arm_vcvtq_x_u32_f32): Delete.
(__arm_vcvtq_x_n_s16_f16): Delete.
(__arm_vcvtq_x_n_s32_f32): Delete.
(__arm_vcvtq_x_n_u16_f16): Delete.
(__arm_vcvtq_x_n_

[gcc r15-4432] arm: [MVE intrinsics] factorize vcvtaq vcvtmq vcvtnq vcvtpq

2024-10-18 Thread Christophe Lyon via Gcc-cvs
https://gcc.gnu.org/g:17ccbbee44387b0233a0cafb516782f0188aee2b

commit r15-4432-g17ccbbee44387b0233a0cafb516782f0188aee2b
Author: Christophe Lyon 
Date:   Wed Jul 10 22:12:03 2024 +

arm: [MVE intrinsics] factorize vcvtaq vcvtmq vcvtnq vcvtpq

Factorize vcvtaq vcvtmq vcvtnq vcvtpq builtins so that they use the
same parameterized names.

2024-07-11  Christophe Lyon  

gcc/
* config/arm/iterators.md (mve_insn): Add VCVTAQ_M_S, VCVTAQ_M_U,
VCVTAQ_S, VCVTAQ_U, VCVTMQ_M_S, VCVTMQ_M_U, VCVTMQ_S, VCVTMQ_U,
VCVTNQ_M_S, VCVTNQ_M_U, VCVTNQ_S, VCVTNQ_U, VCVTPQ_M_S,
VCVTPQ_M_U, VCVTPQ_S, VCVTPQ_U.
(VCVTAQ, VCVTPQ, VCVTNQ, VCVTMQ, VCVTAQ_M, VCVTMQ_M, VCVTNQ_M)
(VCVTPQ_M): Delete.
(VCVTxQ, VCVTxQ_M): New.
* config/arm/mve.md (mve_vcvtpq_)
(mve_vcvtnq_, mve_vcvtmq_)
(mve_vcvtaq_): Merge into ...
(@mve_q_): ... this.
(mve_vcvtaq_m_, mve_vcvtmq_m_)
(mve_vcvtpq_m_, mve_vcvtnq_m_): Merge into
...
(@mve_q_m_): ... this.

Diff:
---
 gcc/config/arm/iterators.md |  18 ---
 gcc/config/arm/mve.md   | 121 ++--
 2 files changed, 26 insertions(+), 113 deletions(-)

diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index b9c39a98ca2b..162c0d56bfb0 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -964,10 +964,18 @@
 (VCMLAQ_M_F "vcmla") (VCMLAQ_ROT90_M_F "vcmla") 
(VCMLAQ_ROT180_M_F "vcmla") (VCMLAQ_ROT270_M_F "vcmla")
 (VCMULQ_M_F "vcmul") (VCMULQ_ROT90_M_F "vcmul") 
(VCMULQ_ROT180_M_F "vcmul") (VCMULQ_ROT270_M_F "vcmul")
 (VCREATEQ_S "vcreate") (VCREATEQ_U "vcreate") (VCREATEQ_F 
"vcreate")
+(VCVTAQ_M_S "vcvta") (VCVTAQ_M_U "vcvta")
+(VCVTAQ_S "vcvta") (VCVTAQ_U "vcvta")
 (VCVTBQ_F16_F32 "vcvtb") (VCVTTQ_F16_F32 "vcvtt")
 (VCVTBQ_F32_F16 "vcvtb") (VCVTTQ_F32_F16 "vcvtt")
 (VCVTBQ_M_F16_F32 "vcvtb") (VCVTTQ_M_F16_F32 "vcvtt")
 (VCVTBQ_M_F32_F16 "vcvtb") (VCVTTQ_M_F32_F16 "vcvtt")
+(VCVTMQ_M_S "vcvtm") (VCVTMQ_M_U "vcvtm")
+(VCVTMQ_S "vcvtm") (VCVTMQ_U "vcvtm")
+(VCVTNQ_M_S "vcvtn") (VCVTNQ_M_U "vcvtn")
+(VCVTNQ_S "vcvtn") (VCVTNQ_U "vcvtn")
+(VCVTPQ_M_S "vcvtp") (VCVTPQ_M_U "vcvtp")
+(VCVTPQ_S "vcvtp") (VCVTPQ_U "vcvtp")
 (VCVTQ_FROM_F_S "vcvt") (VCVTQ_FROM_F_U "vcvt")
 (VCVTQ_M_FROM_F_S "vcvt") (VCVTQ_M_FROM_F_U "vcvt")
 (VCVTQ_M_N_FROM_F_S "vcvt") (VCVTQ_M_N_FROM_F_U "vcvt")
@@ -2732,14 +2740,10 @@
 (define_int_iterator VREV64Q [VREV64Q_S VREV64Q_U])
 (define_int_iterator VCVTQ_FROM_F [VCVTQ_FROM_F_S VCVTQ_FROM_F_U])
 (define_int_iterator VREV16Q [VREV16Q_U VREV16Q_S])
-(define_int_iterator VCVTAQ [VCVTAQ_U VCVTAQ_S])
 (define_int_iterator VDUPQ_N [VDUPQ_N_U VDUPQ_N_S])
 (define_int_iterator VADDVQ [VADDVQ_U VADDVQ_S])
 (define_int_iterator VREV32Q [VREV32Q_U VREV32Q_S])
 (define_int_iterator VMOVLxQ [VMOVLBQ_S VMOVLBQ_U VMOVLTQ_U VMOVLTQ_S])
-(define_int_iterator VCVTPQ [VCVTPQ_S VCVTPQ_U])
-(define_int_iterator VCVTNQ [VCVTNQ_S VCVTNQ_U])
-(define_int_iterator VCVTMQ [VCVTMQ_S VCVTMQ_U])
 (define_int_iterator VADDLVQ [VADDLVQ_U VADDLVQ_S])
 (define_int_iterator VCVTQ_N_TO_F [VCVTQ_N_TO_F_S VCVTQ_N_TO_F_U])
 (define_int_iterator VCREATEQ [VCREATEQ_U VCREATEQ_S])
@@ -2795,7 +2799,6 @@
 (define_int_iterator VSHLLxQ_N [VSHLLBQ_N_S VSHLLBQ_N_U VSHLLTQ_N_S 
VSHLLTQ_N_U])
 (define_int_iterator VRMLALDAVHQ [VRMLALDAVHQ_U VRMLALDAVHQ_S])
 (define_int_iterator VBICQ_M_N [VBICQ_M_N_S VBICQ_M_N_U])
-(define_int_iterator VCVTAQ_M [VCVTAQ_M_S VCVTAQ_M_U])
 (define_int_iterator VCVTQ_M_TO_F [VCVTQ_M_TO_F_S VCVTQ_M_TO_F_U])
 (define_int_iterator VQRSHRNBQ_N [VQRSHRNBQ_N_U VQRSHRNBQ_N_S])
 (define_int_iterator VABAVQ [VABAVQ_S VABAVQ_U])
@@ -2845,9 +2848,6 @@
 (define_int_iterator VMVNQ_M_N [VMVNQ_M_N_U VMVNQ_M_N_S])
 (define_int_iterator VQSHRNTQ_N [VQSHRNTQ_N_U VQSHRNTQ_N_S])
 (define_int_iterator VSHRNTQ_N [VSHRNTQ_N_S VSHRNTQ_N_U])
-(define_int_iterator VCVTMQ_M [VCVTMQ_M_S VCVTMQ_M_U])
-(define_int_iterator VCVTNQ_M [VCVTNQ_M_S VCVTNQ_M_U])
-(define_int_iterator VCVTPQ_M [VCVTPQ_M_S VCVTPQ_M_U])
 (define_int_iterator VCVTQ_M_N_FROM_F [VCVTQ_M_N_FROM_F_S VCVTQ_M_N_FROM_F_U])
 (define_int_iterator VCVTQ_M_FROM_F [VCVTQ_M_FROM_F_U VCVTQ_M_FROM_F_S])
 (define_int_iterator VRMLALDAVHQ_P [VRMLALDAVHQ_P_S VRMLALDAVHQ_P_U])
@@ -2956,6 +2956,8 @@
 (define_int_iterator VCVTxQ_F32_F16 [VCVTBQ_F32_F16 VCVTTQ_F32_F16])
 (define_int_iterator VCVTxQ_M_F16_F32 [VCVTBQ_M_F16_F32 VCVTTQ_M_F16_F32])
 (define_int_iterator VCVTxQ_M_F32_F16 [VCVTBQ_M_F32_F16 VCVTTQ_M_F32_F16])
+(define_int_iterator VCVTxQ [VCVTAQ_S VCVTAQ_U VCVTMQ_S VCVTMQ_U VCVTNQ_S 
VCVTNQ_U VCVTP

[gcc r15-4434] arm: [MVE intrinsics] rework vcvtaq vcvtmq vcvtnq vcvtpq

2024-10-18 Thread Christophe Lyon via Gcc-cvs
https://gcc.gnu.org/g:cbcb8026b8fa0c959df3daeb28ee19719b7b828f

commit r15-4434-gcbcb8026b8fa0c959df3daeb28ee19719b7b828f
Author: Christophe Lyon 
Date:   Mon Jun 10 13:21:56 2024 +

arm: [MVE intrinsics] rework vcvtaq vcvtmq vcvtnq vcvtpq

Implement vcvtaq vcvtmq vcvtnq vcvtpq using the new MVE builtins
framework.

2024-07-11  Christophe Lyon  

gcc/
* config/arm/arm-mve-builtins-base.cc (vcvtaq): New.
(vcvtmq): New.
(vcvtnq): New.
(vcvtpq): New.
* config/arm/arm-mve-builtins-base.def (vcvtaq): New.
(vcvtmq): New.
(vcvtnq): New.
(vcvtpq): New.
* config/arm/arm-mve-builtins-base.h: (vcvtaq): New.
(vcvtmq): New.
(vcvtnq): New.
(vcvtpq): New.
* config/arm/arm-mve-builtins.cc (cvtx): New type.
* config/arm/arm_mve.h (vcvtaq_m): Delete.
(vcvtmq_m): Delete.
(vcvtnq_m): Delete.
(vcvtpq_m): Delete.
(vcvtaq_s16_f16): Delete.
(vcvtaq_s32_f32): Delete.
(vcvtnq_s16_f16): Delete.
(vcvtnq_s32_f32): Delete.
(vcvtpq_s16_f16): Delete.
(vcvtpq_s32_f32): Delete.
(vcvtmq_s16_f16): Delete.
(vcvtmq_s32_f32): Delete.
(vcvtpq_u16_f16): Delete.
(vcvtpq_u32_f32): Delete.
(vcvtnq_u16_f16): Delete.
(vcvtnq_u32_f32): Delete.
(vcvtmq_u16_f16): Delete.
(vcvtmq_u32_f32): Delete.
(vcvtaq_u16_f16): Delete.
(vcvtaq_u32_f32): Delete.
(vcvtaq_m_s16_f16): Delete.
(vcvtaq_m_u16_f16): Delete.
(vcvtaq_m_s32_f32): Delete.
(vcvtaq_m_u32_f32): Delete.
(vcvtmq_m_s16_f16): Delete.
(vcvtnq_m_s16_f16): Delete.
(vcvtpq_m_s16_f16): Delete.
(vcvtmq_m_u16_f16): Delete.
(vcvtnq_m_u16_f16): Delete.
(vcvtpq_m_u16_f16): Delete.
(vcvtmq_m_s32_f32): Delete.
(vcvtnq_m_s32_f32): Delete.
(vcvtpq_m_s32_f32): Delete.
(vcvtmq_m_u32_f32): Delete.
(vcvtnq_m_u32_f32): Delete.
(vcvtpq_m_u32_f32): Delete.
(vcvtaq_x_s16_f16): Delete.
(vcvtaq_x_s32_f32): Delete.
(vcvtaq_x_u16_f16): Delete.
(vcvtaq_x_u32_f32): Delete.
(vcvtnq_x_s16_f16): Delete.
(vcvtnq_x_s32_f32): Delete.
(vcvtnq_x_u16_f16): Delete.
(vcvtnq_x_u32_f32): Delete.
(vcvtpq_x_s16_f16): Delete.
(vcvtpq_x_s32_f32): Delete.
(vcvtpq_x_u16_f16): Delete.
(vcvtpq_x_u32_f32): Delete.
(vcvtmq_x_s16_f16): Delete.
(vcvtmq_x_s32_f32): Delete.
(vcvtmq_x_u16_f16): Delete.
(vcvtmq_x_u32_f32): Delete.
(__arm_vcvtpq_u16_f16): Delete.
(__arm_vcvtpq_u32_f32): Delete.
(__arm_vcvtnq_u16_f16): Delete.
(__arm_vcvtnq_u32_f32): Delete.
(__arm_vcvtmq_u16_f16): Delete.
(__arm_vcvtmq_u32_f32): Delete.
(__arm_vcvtaq_u16_f16): Delete.
(__arm_vcvtaq_u32_f32): Delete.
(__arm_vcvtaq_s16_f16): Delete.
(__arm_vcvtaq_s32_f32): Delete.
(__arm_vcvtnq_s16_f16): Delete.
(__arm_vcvtnq_s32_f32): Delete.
(__arm_vcvtpq_s16_f16): Delete.
(__arm_vcvtpq_s32_f32): Delete.
(__arm_vcvtmq_s16_f16): Delete.
(__arm_vcvtmq_s32_f32): Delete.
(__arm_vcvtaq_m_s16_f16): Delete.
(__arm_vcvtaq_m_u16_f16): Delete.
(__arm_vcvtaq_m_s32_f32): Delete.
(__arm_vcvtaq_m_u32_f32): Delete.
(__arm_vcvtmq_m_s16_f16): Delete.
(__arm_vcvtnq_m_s16_f16): Delete.
(__arm_vcvtpq_m_s16_f16): Delete.
(__arm_vcvtmq_m_u16_f16): Delete.
(__arm_vcvtnq_m_u16_f16): Delete.
(__arm_vcvtpq_m_u16_f16): Delete.
(__arm_vcvtmq_m_s32_f32): Delete.
(__arm_vcvtnq_m_s32_f32): Delete.
(__arm_vcvtpq_m_s32_f32): Delete.
(__arm_vcvtmq_m_u32_f32): Delete.
(__arm_vcvtnq_m_u32_f32): Delete.
(__arm_vcvtpq_m_u32_f32): Delete.
(__arm_vcvtaq_x_s16_f16): Delete.
(__arm_vcvtaq_x_s32_f32): Delete.
(__arm_vcvtaq_x_u16_f16): Delete.
(__arm_vcvtaq_x_u32_f32): Delete.
(__arm_vcvtnq_x_s16_f16): Delete.
(__arm_vcvtnq_x_s32_f32): Delete.
(__arm_vcvtnq_x_u16_f16): Delete.
(__arm_vcvtnq_x_u32_f32): Delete.
(__arm_vcvtpq_x_s16_f16): Delete.
(__arm_vcvtpq_x_s32_f32): Delete.
(__arm_vcvtpq_x_u16_f16): Delete.
(__arm_vcvtpq_x_u32_f32): Delete.
(__arm_vcvtmq_x_s16_f16): Delete.
(__arm_vcv

[gcc r15-4429] arm: [MVE intrinsics] factorize vcvtbq vcvttq

2024-10-18 Thread Christophe Lyon via Gcc-cvs
https://gcc.gnu.org/g:8035b5b1fdbfe3656013bfdbd3efa2579691ce9f

commit r15-4429-g8035b5b1fdbfe3656013bfdbd3efa2579691ce9f
Author: Christophe Lyon 
Date:   Wed Jul 10 22:02:26 2024 +

arm: [MVE intrinsics] factorize vcvtbq vcvttq

Factorize vcvtbq, vcvttq so that they use the same parameterized
names.

2024-07-11  Christophe Lyon  

gcc/
* config/arm/iterators.md (mve_insn): Add VCVTBQ_F16_F32,
VCVTTQ_F16_F32, VCVTBQ_F32_F16, VCVTTQ_F32_F16, VCVTBQ_M_F16_F32,
VCVTTQ_M_F16_F32, VCVTBQ_M_F32_F16, VCVTTQ_M_F32_F16.
(VCVTxQ_F16_F32): New iterator.
(VCVTxQ_F32_F16): Likewise.
(VCVTxQ_M_F16_F32): Likewise.
(VCVTxQ_M_F32_F16): Likewise.
* config/arm/mve.md (mve_vcvttq_f32_f16v4sf)
(mve_vcvtbq_f32_f16v4sf): Merge into ...
(@mve_q_f32_f16v4sf): ... this.
(mve_vcvtbq_f16_f32v8hf, mve_vcvttq_f16_f32v8hf): Merge into ...
(@mve_q_f16_f32v8hf): ... this.
(mve_vcvtbq_m_f16_f32v8hf, mve_vcvttq_m_f16_f32v8hf): Merge into
...
(@mve_q_m_f16_f32v8hf): ... this.
(mve_vcvtbq_m_f32_f16v4sf, mve_vcvttq_m_f32_f16v4sf): Merge into
...
(@mve_q_m_f32_f16v4sf): ... this.

Diff:
---
 gcc/config/arm/iterators.md |   8 
 gcc/config/arm/mve.md   | 112 ++--
 2 files changed, 34 insertions(+), 86 deletions(-)

diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index bf800625face..b9c39a98ca2b 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -964,6 +964,10 @@
 (VCMLAQ_M_F "vcmla") (VCMLAQ_ROT90_M_F "vcmla") 
(VCMLAQ_ROT180_M_F "vcmla") (VCMLAQ_ROT270_M_F "vcmla")
 (VCMULQ_M_F "vcmul") (VCMULQ_ROT90_M_F "vcmul") 
(VCMULQ_ROT180_M_F "vcmul") (VCMULQ_ROT270_M_F "vcmul")
 (VCREATEQ_S "vcreate") (VCREATEQ_U "vcreate") (VCREATEQ_F 
"vcreate")
+(VCVTBQ_F16_F32 "vcvtb") (VCVTTQ_F16_F32 "vcvtt")
+(VCVTBQ_F32_F16 "vcvtb") (VCVTTQ_F32_F16 "vcvtt")
+(VCVTBQ_M_F16_F32 "vcvtb") (VCVTTQ_M_F16_F32 "vcvtt")
+(VCVTBQ_M_F32_F16 "vcvtb") (VCVTTQ_M_F32_F16 "vcvtt")
 (VCVTQ_FROM_F_S "vcvt") (VCVTQ_FROM_F_U "vcvt")
 (VCVTQ_M_FROM_F_S "vcvt") (VCVTQ_M_FROM_F_U "vcvt")
 (VCVTQ_M_N_FROM_F_S "vcvt") (VCVTQ_M_N_FROM_F_U "vcvt")
@@ -2948,6 +2952,10 @@
 (define_int_iterator VSHLCQ_M [VSHLCQ_M_S VSHLCQ_M_U])
 (define_int_iterator VQSHLUQ_M_N [VQSHLUQ_M_N_S])
 (define_int_iterator VQSHLUQ_N [VQSHLUQ_N_S])
+(define_int_iterator VCVTxQ_F16_F32 [VCVTBQ_F16_F32 VCVTTQ_F16_F32])
+(define_int_iterator VCVTxQ_F32_F16 [VCVTBQ_F32_F16 VCVTTQ_F32_F16])
+(define_int_iterator VCVTxQ_M_F16_F32 [VCVTBQ_M_F16_F32 VCVTTQ_M_F16_F32])
+(define_int_iterator VCVTxQ_M_F32_F16 [VCVTBQ_M_F32_F16 VCVTTQ_M_F32_F16])
 (define_int_iterator DLSTP [DLSTP8 DLSTP16 DLSTP32
   DLSTP64])
 (define_int_iterator LETP [LETP8 LETP16 LETP32
diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
index 686620035a8d..49a016f5fdd0 100644
--- a/gcc/config/arm/mve.md
+++ b/gcc/config/arm/mve.md
@@ -205,33 +205,20 @@
  [(set (attr "mve_unpredicated_insn") (symbol_ref 
"CODE_FOR_mve_q_f"))
   (set_attr "type" "mve_move")
 ])
-;;
-;; [vcvttq_f32_f16])
-;;
-(define_insn "mve_vcvttq_f32_f16v4sf"
-  [
-   (set (match_operand:V4SF 0 "s_register_operand" "=w")
-   (unspec:V4SF [(match_operand:V8HF 1 "s_register_operand" "w")]
-VCVTTQ_F32_F16))
-  ]
-  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-  "vcvtt.f32.f16\t%q0, %q1"
- [(set (attr "mve_unpredicated_insn") (symbol_ref 
"CODE_FOR_mve_vcvttq_f32_f16v4sf"))
-  (set_attr "type" "mve_move")
-])
 
 ;;
-;; [vcvtbq_f32_f16])
+;; [vcvtbq_f32_f16]
+;; [vcvttq_f32_f16]
 ;;
-(define_insn "mve_vcvtbq_f32_f16v4sf"
+(define_insn "@mve_q_f32_f16v4sf"
   [
(set (match_operand:V4SF 0 "s_register_operand" "=w")
(unspec:V4SF [(match_operand:V8HF 1 "s_register_operand" "w")]
-VCVTBQ_F32_F16))
+VCVTxQ_F32_F16))
   ]
   "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-  "vcvtb.f32.f16\t%q0, %q1"
- [(set (attr "mve_unpredicated_insn") (symbol_ref 
"CODE_FOR_mve_vcvtbq_f32_f16v4sf"))
+  ".f32.f16\t%q0, %q1"
+ [(set (attr "mve_unpredicated_insn") (symbol_ref 
"CODE_FOR_mve_q_f32_f16v4sf"))
   (set_attr "type" "mve_move")
 ])
 
@@ -1315,34 +1302,19 @@
 ])
 
 ;;
-;; [vcvtbq_f16_f32])
-;;
-(define_insn "mve_vcvtbq_f16_f32v8hf"
-  [
-   (set (match_operand:V8HF 0 "s_register_operand" "=w")
-   (unspec:V8HF [(match_operand:V8HF 1 "s_register_operand" "0")
- (match_operand:V4SF 2 "s_register_operand" "w")]
-VCVTBQ_F16_F32))
-  ]
-  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-  "vcvtb.f16.f32\t%q0, %q2"
- [(set (attr "mve_unpredicated_insn") (symbol_ref 
"CODE_FOR_mve_vcvtbq_f16_f32v8hf"))
-  (se

[gcc r15-4430] arm: [MVE intrinsics] add vcvt_f16_f32 and vcvt_f32_f16 shapes

2024-10-18 Thread Christophe Lyon via Gcc-cvs
https://gcc.gnu.org/g:097f560457fdb2102dae5774ac230b6eec1a

commit r15-4430-g097f560457fdb2102dae5774ac230b6eec1a
Author: Christophe Lyon 
Date:   Mon Jun 10 07:47:19 2024 +

arm: [MVE intrinsics] add vcvt_f16_f32 and vcvt_f32_f16 shapes

This patch adds the vcvt_f16_f32 and vcvt_f32_f16 shapes descriptions.

2024-07-11  Christophe Lyon  

gcc/
* config/arm/arm-mve-builtins-shapes.cc (vcvt_f16_f32)
(vcvt_f32_f16): New.
* config/arm/arm-mve-builtins-shapes.h (vcvt_f16_f32)
(vcvt_f32_f16): New.

Diff:
---
 gcc/config/arm/arm-mve-builtins-shapes.cc | 35 +++
 gcc/config/arm/arm-mve-builtins-shapes.h  |  2 ++
 2 files changed, 37 insertions(+)

diff --git a/gcc/config/arm/arm-mve-builtins-shapes.cc 
b/gcc/config/arm/arm-mve-builtins-shapes.cc
index bc99a6a7c43d..5ebf666d954b 100644
--- a/gcc/config/arm/arm-mve-builtins-shapes.cc
+++ b/gcc/config/arm/arm-mve-builtins-shapes.cc
@@ -2081,6 +2081,41 @@ struct vcvt_def : public overloaded_base<0>
 };
 SHAPE (vcvt)
 
+/* float16x8_t foo_f16_f32(float16x8_t, float32x4_t)
+
+   Example: vcvttq_f16_f32.
+   float16x8_t [__arm_]vcvttq_f16_f32(float16x8_t a, float32x4_t b)
+   float16x8_t [__arm_]vcvttq_m_f16_f32(float16x8_t a, float32x4_t b, 
mve_pred16_t p)
+*/
+struct vcvt_f16_f32_def : public nonoverloaded_base
+{
+  void
+  build (function_builder &b, const function_group_info &group,
+bool preserve_user_namespace) const override
+  {
+build_all (b, "v0,v0,v1", group, MODE_none, preserve_user_namespace);
+  }
+};
+SHAPE (vcvt_f16_f32)
+
+/* float32x4_t foo_f32_f16(float16x8_t)
+
+   Example: vcvttq_f32_f16.
+   float32x4_t [__arm_]vcvttq_f32_f16(float16x8_t a)
+   float32x4_t [__arm_]vcvttq_m_f32_f16(float32x4_t inactive, float16x8_t a, 
mve_pred16_t p)
+   float32x4_t [__arm_]vcvttq_x_f32_f16(float16x8_t a, mve_pred16_t p)
+*/
+struct vcvt_f32_f16_def : public nonoverloaded_base
+{
+  void
+  build (function_builder &b, const function_group_info &group,
+bool preserve_user_namespace) const override
+  {
+build_all (b, "v0,v1", group, MODE_none, preserve_user_namespace);
+  }
+};
+SHAPE (vcvt_f32_f16)
+
 /* _t vfoo[_t0](_t, _t, mve_pred16_t)
 
i.e. a version of the standard ternary shape in which
diff --git a/gcc/config/arm/arm-mve-builtins-shapes.h 
b/gcc/config/arm/arm-mve-builtins-shapes.h
index 9a112ceeb292..50157b575712 100644
--- a/gcc/config/arm/arm-mve-builtins-shapes.h
+++ b/gcc/config/arm/arm-mve-builtins-shapes.h
@@ -78,6 +78,8 @@ namespace arm_mve
 extern const function_shape *const unary_widen;
 extern const function_shape *const unary_widen_acc;
 extern const function_shape *const vcvt;
+extern const function_shape *const vcvt_f16_f32;
+extern const function_shape *const vcvt_f32_f16;
 extern const function_shape *const vpsel;
 
   } /* end namespace arm_mve::shapes */


[gcc r15-4426] arm: [MVE intrinsics] factorize vcvtq

2024-10-18 Thread Christophe Lyon via Gcc-cvs
https://gcc.gnu.org/g:e1762af3056f0491d0c2a7d14dcd86d425c4a92e

commit r15-4426-ge1762af3056f0491d0c2a7d14dcd86d425c4a92e
Author: Christophe Lyon 
Date:   Wed Jul 10 21:52:46 2024 +

arm: [MVE intrinsics] factorize vcvtq

Factorize vcvtq so that they use parameterized names.

2024-07-11  Christophe Lyon  

gcc/
* config/arm/iterators.md (mve_insn): Add VCVTQ_FROM_F_S,
VCVTQ_FROM_F_U, VCVTQ_M_FROM_F_S, VCVTQ_M_FROM_F_U,
VCVTQ_M_N_FROM_F_S, VCVTQ_M_N_FROM_F_U, VCVTQ_M_N_TO_F_S,
VCVTQ_M_N_TO_F_U, VCVTQ_M_TO_F_S, VCVTQ_M_TO_F_U,
VCVTQ_N_FROM_F_S, VCVTQ_N_FROM_F_U, VCVTQ_N_TO_F_S,
VCVTQ_N_TO_F_U, VCVTQ_TO_F_S, VCVTQ_TO_F_U.
* config/arm/mve.md (mve_vcvtq_to_f_): Rename into
@mve_q_to_f_.
(mve_vcvtq_from_f_): Rename into
@mve_q_from_f_.
(mve_vcvtq_n_to_f_): Rename into
@mve_q_n_to_f_.
(mve_vcvtq_n_from_f_): Rename into
@mve_q_n_from_f_.
(mve_vcvtq_m_to_f_): Rename into
@mve_q_m_to_f_.
(mve_vcvtq_m_n_from_f_): Rename into
@mve_q_m_n_from_f_.
(mve_vcvtq_m_from_f_): Rename into
@mve_q_m_from_f_.
(mve_vcvtq_m_n_to_f_): Rename into
@mve_q_m_n_to_f_.

Diff:
---
 gcc/config/arm/iterators.md |  8 ++
 gcc/config/arm/mve.md   | 64 ++---
 2 files changed, 40 insertions(+), 32 deletions(-)

diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index b9ff01cb104e..bf800625face 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -964,6 +964,14 @@
 (VCMLAQ_M_F "vcmla") (VCMLAQ_ROT90_M_F "vcmla") 
(VCMLAQ_ROT180_M_F "vcmla") (VCMLAQ_ROT270_M_F "vcmla")
 (VCMULQ_M_F "vcmul") (VCMULQ_ROT90_M_F "vcmul") 
(VCMULQ_ROT180_M_F "vcmul") (VCMULQ_ROT270_M_F "vcmul")
 (VCREATEQ_S "vcreate") (VCREATEQ_U "vcreate") (VCREATEQ_F 
"vcreate")
+(VCVTQ_FROM_F_S "vcvt") (VCVTQ_FROM_F_U "vcvt")
+(VCVTQ_M_FROM_F_S "vcvt") (VCVTQ_M_FROM_F_U "vcvt")
+(VCVTQ_M_N_FROM_F_S "vcvt") (VCVTQ_M_N_FROM_F_U "vcvt")
+(VCVTQ_M_N_TO_F_S "vcvt") (VCVTQ_M_N_TO_F_U "vcvt")
+(VCVTQ_M_TO_F_S "vcvt") (VCVTQ_M_TO_F_U "vcvt")
+(VCVTQ_N_FROM_F_S "vcvt") (VCVTQ_N_FROM_F_U "vcvt")
+(VCVTQ_N_TO_F_S "vcvt") (VCVTQ_N_TO_F_U "vcvt")
+(VCVTQ_TO_F_S "vcvt") (VCVTQ_TO_F_U "vcvt")
 (VDUPQ_M_N_S "vdup") (VDUPQ_M_N_U "vdup") (VDUPQ_M_N_F "vdup")
 (VDUPQ_N_S "vdup") (VDUPQ_N_U "vdup") (VDUPQ_N_F "vdup")
 (VEORQ_M_S "veor") (VEORQ_M_U "veor") (VEORQ_M_F "veor")
diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
index 7ace8b1c8085..686620035a8d 100644
--- a/gcc/config/arm/mve.md
+++ b/gcc/config/arm/mve.md
@@ -236,17 +236,17 @@
 ])
 
 ;;
-;; [vcvtq_to_f_s, vcvtq_to_f_u])
+;; [vcvtq_to_f_s, vcvtq_to_f_u]
 ;;
-(define_insn "mve_vcvtq_to_f_"
+(define_insn "@mve_q_to_f_"
   [
(set (match_operand:MVE_0 0 "s_register_operand" "=w")
(unspec:MVE_0 [(match_operand: 1 "s_register_operand" "w")]
 VCVTQ_TO_F))
   ]
   "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-  "vcvt.f%#.%#\t%q0, %q1"
- [(set (attr "mve_unpredicated_insn") (symbol_ref 
"CODE_FOR_mve_vcvtq_to_f_"))
+  ".f%#.%#\t%q0, %q1"
+ [(set (attr "mve_unpredicated_insn") (symbol_ref 
"CODE_FOR_mve_q_to_f_"))
   (set_attr "type" "mve_move")
 ])
 
@@ -266,17 +266,17 @@
 ])
 
 ;;
-;; [vcvtq_from_f_s, vcvtq_from_f_u])
+;; [vcvtq_from_f_s, vcvtq_from_f_u]
 ;;
-(define_insn "mve_vcvtq_from_f_"
+(define_insn "@mve_q_from_f_"
   [
(set (match_operand:MVE_5 0 "s_register_operand" "=w")
(unspec:MVE_5 [(match_operand: 1 "s_register_operand" "w")]
 VCVTQ_FROM_F))
   ]
   "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-  "vcvt.%#.f%#\t%q0, %q1"
- [(set (attr "mve_unpredicated_insn") (symbol_ref 
"CODE_FOR_mve_vcvtq_from_f_"))
+  ".%#.f%#\t%q0, %q1"
+ [(set (attr "mve_unpredicated_insn") (symbol_ref 
"CODE_FOR_mve_q_from_f_"))
   (set_attr "type" "mve_move")
 ])
 
@@ -554,9 +554,9 @@
 ])
 
 ;;
-;; [vcvtq_n_to_f_s, vcvtq_n_to_f_u])
+;; [vcvtq_n_to_f_s, vcvtq_n_to_f_u]
 ;;
-(define_insn "mve_vcvtq_n_to_f_"
+(define_insn "@mve_q_n_to_f_"
   [
(set (match_operand:MVE_0 0 "s_register_operand" "=w")
(unspec:MVE_0 [(match_operand: 1 "s_register_operand" "w")
@@ -564,8 +564,8 @@
 VCVTQ_N_TO_F))
   ]
   "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-  "vcvt.f.\t%q0, %q1, %2"
- [(set (attr "mve_unpredicated_insn") (symbol_ref 
"CODE_FOR_mve_vcvtq_n_to_f_"))
+  ".f.\t%q0, %q1, %2"
+ [(set (attr "mve_unpredicated_insn") (symbol_ref 
"CODE_FOR_mve_q_n_to_f_"))
   (set_attr "type" "mve_move")
 ])
 
@@ -652,9 +652,9 @@
 ])
 
 ;;
-;; [vcvtq_n_from_f_s, vcvtq_n_from_f_u])
+;; [vcvtq_n_from_f_s, vcvtq_n_from

[gcc r15-4433] arm: [MVE intrinsics] add vcvtx shape

2024-10-18 Thread Christophe Lyon via Gcc-cvs
https://gcc.gnu.org/g:4f3aa71017155dfd9d371e32bedddb7c1279362d

commit r15-4433-g4f3aa71017155dfd9d371e32bedddb7c1279362d
Author: Christophe Lyon 
Date:   Mon Jun 10 13:19:41 2024 +

arm: [MVE intrinsics] add vcvtx shape

This patch adds the vcvtx shape description for vcvtaq, vcvtmq,
vcvtnq, vcvtpq.

2024-07-11  Christophe Lyon  

gcc/
* config/arm/arm-mve-builtins-shapes.cc (vcvtx): New.
* config/arm/arm-mve-builtins-shapes.h (vcvtx): New.

Diff:
---
 gcc/config/arm/arm-mve-builtins-shapes.cc | 59 +++
 gcc/config/arm/arm-mve-builtins-shapes.h  |  1 +
 2 files changed, 60 insertions(+)

diff --git a/gcc/config/arm/arm-mve-builtins-shapes.cc 
b/gcc/config/arm/arm-mve-builtins-shapes.cc
index 5ebf666d954b..6632ee49067d 100644
--- a/gcc/config/arm/arm-mve-builtins-shapes.cc
+++ b/gcc/config/arm/arm-mve-builtins-shapes.cc
@@ -2116,6 +2116,65 @@ struct vcvt_f32_f16_def : public nonoverloaded_base
 };
 SHAPE (vcvt_f32_f16)
 
+/* _t foo_t0[_t1](_t)
+
+   Example: vcvtaq.
+   int16x8_t [__arm_]vcvtaq_s16_f16(float16x8_t a)
+   int16x8_t [__arm_]vcvtaq_m[_s16_f16](int16x8_t inactive, float16x8_t a, 
mve_pred16_t p)
+   int16x8_t [__arm_]vcvtaq_x_s16_f16(float16x8_t a, mve_pred16_t p)
+*/
+struct vcvtx_def : public overloaded_base<0>
+{
+  bool
+  explicit_type_suffix_p (unsigned int, enum predication_index pred,
+ enum mode_suffix_index,
+ type_suffix_info) const override
+  {
+return pred != PRED_m;
+  }
+
+  bool
+  skip_overload_p (enum predication_index pred, enum mode_suffix_index)
+const override
+  {
+return pred != PRED_m;
+  }
+
+  void
+  build (function_builder &b, const function_group_info &group,
+bool preserve_user_namespace) const override
+  {
+b.add_overloaded_functions (group, MODE_none, preserve_user_namespace);
+build_all (b, "v0,v1", group, MODE_none, preserve_user_namespace);
+  }
+
+  tree
+  resolve (function_resolver &r) const override
+  {
+unsigned int i, nargs;
+type_suffix_index from_type;
+tree res;
+
+if (!r.check_gp_argument (1, i, nargs)
+   || (from_type
+   = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES)
+  return error_mark_node;
+
+type_suffix_index to_type;
+
+gcc_assert (r.pred == PRED_m);
+
+/* Get the return type from the 'inactive' argument.  */
+to_type = r.infer_vector_type (0);
+
+if ((res = r.lookup_form (r.mode_suffix_id, to_type, from_type)))
+   return res;
+
+return r.report_no_such_form (from_type);
+  }
+};
+SHAPE (vcvtx)
+
 /* _t vfoo[_t0](_t, _t, mve_pred16_t)
 
i.e. a version of the standard ternary shape in which
diff --git a/gcc/config/arm/arm-mve-builtins-shapes.h 
b/gcc/config/arm/arm-mve-builtins-shapes.h
index 50157b575712..ef497b6c97a5 100644
--- a/gcc/config/arm/arm-mve-builtins-shapes.h
+++ b/gcc/config/arm/arm-mve-builtins-shapes.h
@@ -80,6 +80,7 @@ namespace arm_mve
 extern const function_shape *const vcvt;
 extern const function_shape *const vcvt_f16_f32;
 extern const function_shape *const vcvt_f32_f16;
+extern const function_shape *const vcvtx;
 extern const function_shape *const vpsel;
 
   } /* end namespace arm_mve::shapes */


[gcc r15-4446] arm: [MVE intrinsics] add vidwdup shape

2024-10-18 Thread Christophe Lyon via Gcc-cvs
https://gcc.gnu.org/g:ec116668058f3f0c472313b95bf11e99965f92df

commit r15-4446-gec116668058f3f0c472313b95bf11e99965f92df
Author: Christophe Lyon 
Date:   Mon Aug 26 17:16:07 2024 +

arm: [MVE intrinsics] add vidwdup shape

This patch adds the vidwdup shape description for vdwdup and viwdup.

It is very similar to viddup, but accounts for the additional 'wrap'
scalar parameter.

2024-08-21  Christophe Lyon  

gcc/
* config/arm/arm-mve-builtins-shapes.cc (vidwdup): New.
* config/arm/arm-mve-builtins-shapes.h (vidwdup): New.

Diff:
---
 gcc/config/arm/arm-mve-builtins-shapes.cc | 88 +++
 gcc/config/arm/arm-mve-builtins-shapes.h  |  1 +
 2 files changed, 89 insertions(+)

diff --git a/gcc/config/arm/arm-mve-builtins-shapes.cc 
b/gcc/config/arm/arm-mve-builtins-shapes.cc
index a1d2e2431287..510f15ae73af 100644
--- a/gcc/config/arm/arm-mve-builtins-shapes.cc
+++ b/gcc/config/arm/arm-mve-builtins-shapes.cc
@@ -2291,6 +2291,94 @@ struct viddup_def : public overloaded_base<0>
 };
 SHAPE (viddup)
 
+/* _t vfoo[_n]_t0(uint32_t, uint32_t, const int)
+   _t vfoo[_wb]_t0(uint32_t *, uint32_t, const int)
+
+   Shape for vector increment or decrement with wrap and duplicate operations
+   that take an integer or pointer to integer first argument, an integer second
+   argument and an immediate, and produce a vector.
+
+   Check that 'imm' is one of 1, 2, 4 or 8.
+
+   Example: vdwdupq.
+   uint8x16_t [__arm_]vdwdupq[_n]_u8(uint32_t a, uint32_t b, const int imm)
+   uint8x16_t [__arm_]vdwdupq[_wb]_u8(uint32_t *a, uint32_t b, const int imm)
+   uint8x16_t [__arm_]vdwdupq_m[_n_u8](uint8x16_t inactive, uint32_t a, 
uint32_t b, const int imm, mve_pred16_t p)
+   uint8x16_t [__arm_]vdwdupq_m[_wb_u8](uint8x16_t inactive, uint32_t *a, 
uint32_t b, const int imm, mve_pred16_t p)
+   uint8x16_t [__arm_]vdwdupq_x[_n]_u8(uint32_t a, uint32_t b, const int imm, 
mve_pred16_t p)
+   uint8x16_t [__arm_]vdwdupq_x[_wb]_u8(uint32_t *a, uint32_t b, const int 
imm, mve_pred16_t p)  */
+struct vidwdup_def : public overloaded_base<0>
+{
+  bool
+  explicit_type_suffix_p (unsigned int i, enum predication_index pred,
+ enum mode_suffix_index,
+ type_suffix_info) const override
+  {
+return ((i == 0) && (pred != PRED_m));
+  }
+
+  bool
+  skip_overload_p (enum predication_index, enum mode_suffix_index mode) const 
override
+  {
+/* For MODE_wb, share the overloaded instance with MODE_n.  */
+if (mode == MODE_wb)
+  return true;
+
+return false;
+  }
+
+  void
+  build (function_builder &b, const function_group_info &group,
+bool preserve_user_namespace) const override
+  {
+b.add_overloaded_functions (group, MODE_none, preserve_user_namespace);
+build_all (b, "v0,su32,su32,su64", group, MODE_n, preserve_user_namespace);
+build_all (b, "v0,as,su32,su64", group, MODE_wb, preserve_user_namespace);
+  }
+
+  tree
+  resolve (function_resolver &r) const override
+  {
+unsigned int i, nargs;
+type_suffix_index type_suffix = NUM_TYPE_SUFFIXES;
+if (!r.check_gp_argument (3, i, nargs))
+  return error_mark_node;
+
+type_suffix = r.type_suffix_ids[0];
+/* With PRED_m, ther is no type suffix, so infer it from the first 
(inactive)
+   argument.  */
+if (type_suffix == NUM_TYPE_SUFFIXES)
+  type_suffix = r.infer_vector_type (0);
+
+unsigned int last_arg = i - 2;
+/* Check that last_arg is either scalar or pointer.  */
+if (!r.scalar_argument_p (last_arg))
+  return error_mark_node;
+
+if (!r.scalar_argument_p (last_arg + 1))
+  return error_mark_node;
+
+if (!r.require_integer_immediate (last_arg + 2))
+  return error_mark_node;
+
+/* With MODE_n we expect a scalar, with MODE_wb we expect a pointer.  */
+mode_suffix_index mode_suffix;
+if (POINTER_TYPE_P (r.get_argument_type (last_arg)))
+  mode_suffix = MODE_wb;
+else
+  mode_suffix = MODE_n;
+
+return r.resolve_to (mode_suffix, type_suffix);
+  }
+
+  bool
+  check (function_checker &c) const override
+  {
+return c.require_immediate_one_of (2, 1, 2, 4, 8);
+  }
+};
+SHAPE (vidwdup)
+
 /* _t vfoo[_t0](_t, _t, mve_pred16_t)
 
i.e. a version of the standard ternary shape in which
diff --git a/gcc/config/arm/arm-mve-builtins-shapes.h 
b/gcc/config/arm/arm-mve-builtins-shapes.h
index 186287c16207..b3d08ab38669 100644
--- a/gcc/config/arm/arm-mve-builtins-shapes.h
+++ b/gcc/config/arm/arm-mve-builtins-shapes.h
@@ -83,6 +83,7 @@ namespace arm_mve
 extern const function_shape *const vcvt_f32_f16;
 extern const function_shape *const vcvtx;
 extern const function_shape *const viddup;
+extern const function_shape *const vidwdup;
 extern const function_shape *const vpsel;
 
   } /* end namespace arm_mve::shapes */


[gcc r15-4447] arm: [MVE intrinsics] rework vdwdup viwdup

2024-10-18 Thread Christophe Lyon via Gcc-cvs
https://gcc.gnu.org/g:47ed70f758164af4b42d6e044d6f5be62a097c7b

commit r15-4447-g47ed70f758164af4b42d6e044d6f5be62a097c7b
Author: Christophe Lyon 
Date:   Thu Aug 29 08:52:23 2024 +

arm: [MVE intrinsics] rework vdwdup viwdup

Implement vdwdup and viwdup using the new MVE builtins framework.

In order to share more code with viddup_impl, the patch swaps operands
1 and 2 in @mve_v[id]wdupq_m_wb_u_insn, so that the parameter
order is similar to what @mve_v[id]dupq_m_wb_u_insn uses.

2024-08-28  Christophe Lyon  

gcc/
* config/arm/arm-mve-builtins-base.cc (viddup_impl): Add support
for wrapping versions.
(vdwdupq): New.
(viwdupq): New.
* config/arm/arm-mve-builtins-base.def (vdwdupq): New.
(viwdupq): New.
* config/arm/arm-mve-builtins-base.h (vdwdupq): New.
(viwdupq): New.
* config/arm/arm_mve.h (vdwdupq_m): Delete.
(vdwdupq_u8): Delete.
(vdwdupq_u32): Delete.
(vdwdupq_u16): Delete.
(viwdupq_m): Delete.
(viwdupq_u8): Delete.
(viwdupq_u32): Delete.
(viwdupq_u16): Delete.
(vdwdupq_x_u8): Delete.
(vdwdupq_x_u16): Delete.
(vdwdupq_x_u32): Delete.
(viwdupq_x_u8): Delete.
(viwdupq_x_u16): Delete.
(viwdupq_x_u32): Delete.
(vdwdupq_m_n_u8): Delete.
(vdwdupq_m_n_u32): Delete.
(vdwdupq_m_n_u16): Delete.
(vdwdupq_m_wb_u8): Delete.
(vdwdupq_m_wb_u32): Delete.
(vdwdupq_m_wb_u16): Delete.
(vdwdupq_n_u8): Delete.
(vdwdupq_n_u32): Delete.
(vdwdupq_n_u16): Delete.
(vdwdupq_wb_u8): Delete.
(vdwdupq_wb_u32): Delete.
(vdwdupq_wb_u16): Delete.
(viwdupq_m_n_u8): Delete.
(viwdupq_m_n_u32): Delete.
(viwdupq_m_n_u16): Delete.
(viwdupq_m_wb_u8): Delete.
(viwdupq_m_wb_u32): Delete.
(viwdupq_m_wb_u16): Delete.
(viwdupq_n_u8): Delete.
(viwdupq_n_u32): Delete.
(viwdupq_n_u16): Delete.
(viwdupq_wb_u8): Delete.
(viwdupq_wb_u32): Delete.
(viwdupq_wb_u16): Delete.
(vdwdupq_x_n_u8): Delete.
(vdwdupq_x_n_u16): Delete.
(vdwdupq_x_n_u32): Delete.
(vdwdupq_x_wb_u8): Delete.
(vdwdupq_x_wb_u16): Delete.
(vdwdupq_x_wb_u32): Delete.
(viwdupq_x_n_u8): Delete.
(viwdupq_x_n_u16): Delete.
(viwdupq_x_n_u32): Delete.
(viwdupq_x_wb_u8): Delete.
(viwdupq_x_wb_u16): Delete.
(viwdupq_x_wb_u32): Delete.
(__arm_vdwdupq_m_n_u8): Delete.
(__arm_vdwdupq_m_n_u32): Delete.
(__arm_vdwdupq_m_n_u16): Delete.
(__arm_vdwdupq_m_wb_u8): Delete.
(__arm_vdwdupq_m_wb_u32): Delete.
(__arm_vdwdupq_m_wb_u16): Delete.
(__arm_vdwdupq_n_u8): Delete.
(__arm_vdwdupq_n_u32): Delete.
(__arm_vdwdupq_n_u16): Delete.
(__arm_vdwdupq_wb_u8): Delete.
(__arm_vdwdupq_wb_u32): Delete.
(__arm_vdwdupq_wb_u16): Delete.
(__arm_viwdupq_m_n_u8): Delete.
(__arm_viwdupq_m_n_u32): Delete.
(__arm_viwdupq_m_n_u16): Delete.
(__arm_viwdupq_m_wb_u8): Delete.
(__arm_viwdupq_m_wb_u32): Delete.
(__arm_viwdupq_m_wb_u16): Delete.
(__arm_viwdupq_n_u8): Delete.
(__arm_viwdupq_n_u32): Delete.
(__arm_viwdupq_n_u16): Delete.
(__arm_viwdupq_wb_u8): Delete.
(__arm_viwdupq_wb_u32): Delete.
(__arm_viwdupq_wb_u16): Delete.
(__arm_vdwdupq_x_n_u8): Delete.
(__arm_vdwdupq_x_n_u16): Delete.
(__arm_vdwdupq_x_n_u32): Delete.
(__arm_vdwdupq_x_wb_u8): Delete.
(__arm_vdwdupq_x_wb_u16): Delete.
(__arm_vdwdupq_x_wb_u32): Delete.
(__arm_viwdupq_x_n_u8): Delete.
(__arm_viwdupq_x_n_u16): Delete.
(__arm_viwdupq_x_n_u32): Delete.
(__arm_viwdupq_x_wb_u8): Delete.
(__arm_viwdupq_x_wb_u16): Delete.
(__arm_viwdupq_x_wb_u32): Delete.
(__arm_vdwdupq_m): Delete.
(__arm_vdwdupq_u8): Delete.
(__arm_vdwdupq_u32): Delete.
(__arm_vdwdupq_u16): Delete.
(__arm_viwdupq_m): Delete.
(__arm_viwdupq_u8): Delete.
(__arm_viwdupq_u32): Delete.
(__arm_viwdupq_u16): Delete.
(__arm_vdwdupq_x_u8): Delete.
(__arm_vdwdupq_x_u16): Delete.
(__arm_vdwdupq_x_u32): Delete.
(__arm_viwdupq_x_u8): Delete.
(__arm_viwdupq_x_u16): Delete.
(__arm_viwdup

[gcc r15-4450] arm: [MVE intrinsics] add vshlc shape

2024-10-18 Thread Christophe Lyon via Gcc-cvs
https://gcc.gnu.org/g:2ddabb28dbd42471f8fa526e2689144bacd0d3d0

commit r15-4450-g2ddabb28dbd42471f8fa526e2689144bacd0d3d0
Author: Christophe Lyon 
Date:   Wed Aug 28 13:46:52 2024 +

arm: [MVE intrinsics] add vshlc shape

This patch adds the vshlc shape description.

2024-08-28  Christophe Lyon  

gcc/
* config/arm/arm-mve-builtins-shapes.cc (vshlc): New.
* config/arm/arm-mve-builtins-shapes.h (vshlc): New.

Diff:
---
 gcc/config/arm/arm-mve-builtins-shapes.cc | 44 +++
 gcc/config/arm/arm-mve-builtins-shapes.h  |  1 +
 2 files changed, 45 insertions(+)

diff --git a/gcc/config/arm/arm-mve-builtins-shapes.cc 
b/gcc/config/arm/arm-mve-builtins-shapes.cc
index 510f15ae73af..ee6b5b0a7b14 100644
--- a/gcc/config/arm/arm-mve-builtins-shapes.cc
+++ b/gcc/config/arm/arm-mve-builtins-shapes.cc
@@ -2418,6 +2418,50 @@ struct vpsel_def : public overloaded_base<0>
 };
 SHAPE (vpsel)
 
+/* _t vfoo[_t0](T0, uint32_t* , const int)
+
+   Check that 'imm' is in [1..32].
+
+   Example: vshlcq.
+   uint8x16_t [__arm_]vshlcq[_u8](uint8x16_t a, uint32_t *b, const int imm)
+   uint8x16_t [__arm_]vshlcq_m[_u8](uint8x16_t a, uint32_t *b, const int imm, 
mve_pred16_t p)  */
+struct vshlc_def : public overloaded_base<0>
+{
+  void
+  build (function_builder &b, const function_group_info &group,
+bool preserve_user_namespace) const override
+  {
+b.add_overloaded_functions (group, MODE_none, preserve_user_namespace);
+build_all (b, "v0,v0,as,su64", group, MODE_none, preserve_user_namespace);
+  }
+
+  tree
+  resolve (function_resolver &r) const override
+  {
+unsigned int i, nargs;
+type_suffix_index type;
+if (!r.check_gp_argument (3, i, nargs)
+   || (type = r.infer_vector_type (0)) == NUM_TYPE_SUFFIXES)
+  return error_mark_node;
+
+/* Check that arg #2 is a pointer.  */
+if (!POINTER_TYPE_P (r.get_argument_type (i - 1)))
+  return error_mark_node;
+
+if (!r.require_integer_immediate (i))
+  return error_mark_node;
+
+return r.resolve_to (r.mode_suffix_id, type);
+  }
+
+  bool
+  check (function_checker &c) const override
+  {
+return c.require_immediate_range (2, 1, 32);
+  }
+};
+SHAPE (vshlc)
+
 } /* end namespace arm_mve */
 
 #undef SHAPE
diff --git a/gcc/config/arm/arm-mve-builtins-shapes.h 
b/gcc/config/arm/arm-mve-builtins-shapes.h
index b3d08ab38669..d73c74c8ad74 100644
--- a/gcc/config/arm/arm-mve-builtins-shapes.h
+++ b/gcc/config/arm/arm-mve-builtins-shapes.h
@@ -85,6 +85,7 @@ namespace arm_mve
 extern const function_shape *const viddup;
 extern const function_shape *const vidwdup;
 extern const function_shape *const vpsel;
+extern const function_shape *const vshlc;
 
   } /* end namespace arm_mve::shapes */
 } /* end namespace arm_mve */


[gcc r15-4454] arm: [MVE intrinsics] factorize vadc vadci vsbc vsbci

2024-10-18 Thread Christophe Lyon via Gcc-cvs
https://gcc.gnu.org/g:8c21fc6610e1b1e038ec0e7d58e0e32716334892

commit r15-4454-g8c21fc6610e1b1e038ec0e7d58e0e32716334892
Author: Christophe Lyon 
Date:   Thu Aug 29 21:00:30 2024 +

arm: [MVE intrinsics] factorize vadc vadci vsbc vsbci

Factorize vadc/vsbc and vadci/vsbci so that they use the same
parameterized names.

2024-08-28  Christophe Lyon  

gcc/
* config/arm/iterators.md (mve_insn): Add VADCIQ_M_S, VADCIQ_M_U,
VADCIQ_U, VADCIQ_S, VADCQ_M_S, VADCQ_M_U, VADCQ_S, VADCQ_U,
VSBCIQ_M_S, VSBCIQ_M_U, VSBCIQ_S, VSBCIQ_U, VSBCQ_M_S, VSBCQ_M_U,
VSBCQ_S, VSBCQ_U.
(VADCIQ, VSBCIQ): Merge into ...
(VxCIQ): ... this.
(VADCIQ_M, VSBCIQ_M): Merge into ...
(VxCIQ_M): ... this.
(VSBCQ, VADCQ): Merge into ...
(VxCQ): ... this.
(VSBCQ_M, VADCQ_M): Merge into ...
(VxCQ_M): ... this.
* config/arm/mve.md
(mve_vadciq_v4si, mve_vsbciq_v4si): Merge into ...
(@mve_q_v4si): ... this.
(mve_vadciq_m_v4si, mve_vsbciq_m_v4si): Merge into ...
(@mve_q_m_v4si): ... this.
(mve_vadcq_v4si, mve_vsbcq_v4si): Merge into ...
(@mve_q_v4si): ... this.
(mve_vadcq_m_v4si, mve_vsbcq_m_v4si): Merge into ...
(@mve_q_m_v4si): ... this.

Diff:
---
 gcc/config/arm/iterators.md |  20 ---
 gcc/config/arm/mve.md   | 131 ++--
 2 files changed, 42 insertions(+), 109 deletions(-)

diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index 2fb3b25040f4..59e112b228cc 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -941,6 +941,10 @@
 (VABDQ_S "vabd") (VABDQ_U "vabd") (VABDQ_F "vabd")
 (VABSQ_M_F "vabs")
 (VABSQ_M_S "vabs")
+(VADCIQ_M_S "vadci") (VADCIQ_M_U "vadci")
+(VADCIQ_S "vadci") (VADCIQ_U "vadci")
+(VADCQ_M_S "vadc") (VADCQ_M_U "vadc")
+(VADCQ_S "vadc") (VADCQ_U "vadc")
 (VADDLVAQ_P_S "vaddlva") (VADDLVAQ_P_U "vaddlva")
 (VADDLVAQ_S "vaddlva") (VADDLVAQ_U "vaddlva")
 (VADDLVQ_P_S "vaddlv") (VADDLVQ_P_U "vaddlv")
@@ -1235,6 +1239,10 @@
 (VRSHRNTQ_N_S "vrshrnt") (VRSHRNTQ_N_U "vrshrnt")
 (VRSHRQ_M_N_S "vrshr") (VRSHRQ_M_N_U "vrshr")
 (VRSHRQ_N_S "vrshr") (VRSHRQ_N_U "vrshr")
+(VSBCIQ_M_S "vsbci") (VSBCIQ_M_U "vsbci")
+(VSBCIQ_S "vsbci") (VSBCIQ_U "vsbci")
+(VSBCQ_M_S "vsbc") (VSBCQ_M_U "vsbc")
+(VSBCQ_S "vsbc") (VSBCQ_U "vsbc")
 (VSHLLBQ_M_N_S "vshllb") (VSHLLBQ_M_N_U "vshllb")
 (VSHLLBQ_N_S "vshllb") (VSHLLBQ_N_U "vshllb")
 (VSHLLTQ_M_N_S "vshllt") (VSHLLTQ_M_N_U "vshllt")
@@ -2949,14 +2957,10 @@
 (define_int_iterator VLDRWGBWBQ [VLDRWQGBWB_S VLDRWQGBWB_U])
 (define_int_iterator VSTRDSBWBQ [VSTRDQSBWB_S VSTRDQSBWB_U])
 (define_int_iterator VLDRDGBWBQ [VLDRDQGBWB_S VLDRDQGBWB_U])
-(define_int_iterator VADCIQ [VADCIQ_U VADCIQ_S])
-(define_int_iterator VADCIQ_M [VADCIQ_M_U VADCIQ_M_S])
-(define_int_iterator VSBCQ [VSBCQ_U VSBCQ_S])
-(define_int_iterator VSBCQ_M [VSBCQ_M_U VSBCQ_M_S])
-(define_int_iterator VSBCIQ [VSBCIQ_U VSBCIQ_S])
-(define_int_iterator VSBCIQ_M [VSBCIQ_M_U VSBCIQ_M_S])
-(define_int_iterator VADCQ [VADCQ_U VADCQ_S])
-(define_int_iterator VADCQ_M [VADCQ_M_U VADCQ_M_S])
+(define_int_iterator VxCIQ [VADCIQ_U VADCIQ_S VSBCIQ_U VSBCIQ_S])
+(define_int_iterator VxCIQ_M [VADCIQ_M_U VADCIQ_M_S VSBCIQ_M_U VSBCIQ_M_S])
+(define_int_iterator VxCQ [VADCQ_U VADCQ_S  VSBCQ_U VSBCQ_S])
+(define_int_iterator VxCQ_M [VADCQ_M_U VADCQ_M_S  VSBCQ_M_U VSBCQ_M_S])
 (define_int_iterator UQRSHLLQ [UQRSHLL_64 UQRSHLL_48])
 (define_int_iterator SQRSHRLQ [SQRSHRL_64 SQRSHRL_48])
 (define_int_iterator VSHLCQ_M [VSHLCQ_M_S VSHLCQ_M_U])
diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
index b2a8319f60ad..8c69670b1613 100644
--- a/gcc/config/arm/mve.md
+++ b/gcc/config/arm/mve.md
@@ -5690,159 +5690,88 @@
 }
  [(set (attr "mve_unpredicated_insn") (symbol_ref 
"CODE_FOR_mve_vldrdq_gather_base_wb_v2di_insn"))
   (set_attr "length" "8")])
-;;
-;; [vadciq_m_s, vadciq_m_u])
-;;
-(define_insn "mve_vadciq_m_v4si"
-  [(set (match_operand:V4SI 0 "s_register_operand" "=w")
-   (unspec:V4SI [(match_operand:V4SI 1 "s_register_operand" "0")
- (match_operand:V4SI 2 "s_register_operand" "w")
- (match_operand:V4SI 3 "s_register_operand" "w")
- (match_operand:V4BI 4 "vpr_register_operand" "Up")]
-VADCIQ_M))
-   (set (reg:SI VFPCC_REGNUM)
-   (unspec:SI [(const_int 0)]
-VADCIQ_M))
-  ]
-  "TARGET_HAVE_MVE"
-  "vpst\;vadcit.i32\t%q0, %q2, %q3"
- [(set (attr "mve_unpredicated_insn") (symbol_ref 
"

[gcc r15-4448] arm: [MVE intrinsics] update v[id]wdup tests

2024-10-18 Thread Christophe Lyon via Gcc-cvs
https://gcc.gnu.org/g:e65ab03fac39bc708dbc9c7436e17887b6457f05

commit r15-4448-ge65ab03fac39bc708dbc9c7436e17887b6457f05
Author: Christophe Lyon 
Date:   Fri Aug 30 09:37:34 2024 +

arm: [MVE intrinsics] update v[id]wdup tests

Testing v[id]wdup overloads with '1' as argument for uint32_t* does
not make sense: this patch adds a new 'unit32_t *a' parameter to foo2
in such tests.

The difference with v[id]dup tests (where we removed 'foo2') is that
in 'foo1' we test the overload with a variable 'wrap' parameter (b)
and we need foo2 to test the overload with an immediate (1).

2024-08-28  Christophe Lyon  

gcc/testsuite/

* gcc.target/arm/mve/intrinsics/vdwdupq_m_wb_u16.c: Use pointer
parameter in foo2.
* gcc.target/arm/mve/intrinsics/vdwdupq_m_wb_u32.c: Likewise.
* gcc.target/arm/mve/intrinsics/vdwdupq_m_wb_u8.c: Likewise.
* gcc.target/arm/mve/intrinsics/vdwdupq_wb_u16.c: Likewise.
* gcc.target/arm/mve/intrinsics/vdwdupq_wb_u32.c: Likewise.
* gcc.target/arm/mve/intrinsics/vdwdupq_wb_u8.c: Likewise.
* gcc.target/arm/mve/intrinsics/vdwdupq_x_wb_u16.c: Likewise.
* gcc.target/arm/mve/intrinsics/vdwdupq_x_wb_u32.c: Likewise.
* gcc.target/arm/mve/intrinsics/vdwdupq_x_wb_u8.c: Likewise.
* gcc.target/arm/mve/intrinsics/viwdupq_m_wb_u16.c: Likewise.
* gcc.target/arm/mve/intrinsics/viwdupq_m_wb_u32.c: Likewise.
* gcc.target/arm/mve/intrinsics/viwdupq_m_wb_u8.c: Likewise.
* gcc.target/arm/mve/intrinsics/viwdupq_wb_u16.c: Likewise.
* gcc.target/arm/mve/intrinsics/viwdupq_wb_u32.c: Likewise.
* gcc.target/arm/mve/intrinsics/viwdupq_wb_u8.c: Likewise.
* gcc.target/arm/mve/intrinsics/viwdupq_x_wb_u16.c: Likewise.
* gcc.target/arm/mve/intrinsics/viwdupq_x_wb_u32.c: Likewise.
* gcc.target/arm/mve/intrinsics/viwdupq_x_wb_u8.c: Likewise.

Diff:
---
 gcc/testsuite/gcc.target/arm/mve/intrinsics/vdwdupq_m_wb_u16.c | 6 +++---
 gcc/testsuite/gcc.target/arm/mve/intrinsics/vdwdupq_m_wb_u32.c | 6 +++---
 gcc/testsuite/gcc.target/arm/mve/intrinsics/vdwdupq_m_wb_u8.c  | 6 +++---
 gcc/testsuite/gcc.target/arm/mve/intrinsics/vdwdupq_wb_u16.c   | 6 +++---
 gcc/testsuite/gcc.target/arm/mve/intrinsics/vdwdupq_wb_u32.c   | 6 +++---
 gcc/testsuite/gcc.target/arm/mve/intrinsics/vdwdupq_wb_u8.c| 6 +++---
 gcc/testsuite/gcc.target/arm/mve/intrinsics/vdwdupq_x_wb_u16.c | 6 +++---
 gcc/testsuite/gcc.target/arm/mve/intrinsics/vdwdupq_x_wb_u32.c | 6 +++---
 gcc/testsuite/gcc.target/arm/mve/intrinsics/vdwdupq_x_wb_u8.c  | 6 +++---
 gcc/testsuite/gcc.target/arm/mve/intrinsics/viwdupq_m_wb_u16.c | 6 +++---
 gcc/testsuite/gcc.target/arm/mve/intrinsics/viwdupq_m_wb_u32.c | 6 +++---
 gcc/testsuite/gcc.target/arm/mve/intrinsics/viwdupq_m_wb_u8.c  | 6 +++---
 gcc/testsuite/gcc.target/arm/mve/intrinsics/viwdupq_wb_u16.c   | 6 +++---
 gcc/testsuite/gcc.target/arm/mve/intrinsics/viwdupq_wb_u32.c   | 6 +++---
 gcc/testsuite/gcc.target/arm/mve/intrinsics/viwdupq_wb_u8.c| 6 +++---
 gcc/testsuite/gcc.target/arm/mve/intrinsics/viwdupq_x_wb_u16.c | 6 +++---
 gcc/testsuite/gcc.target/arm/mve/intrinsics/viwdupq_x_wb_u32.c | 6 +++---
 gcc/testsuite/gcc.target/arm/mve/intrinsics/viwdupq_x_wb_u8.c  | 6 +++---
 18 files changed, 54 insertions(+), 54 deletions(-)

diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vdwdupq_m_wb_u16.c 
b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vdwdupq_m_wb_u16.c
index b24e7a2f5af2..e6004056c2c4 100644
--- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vdwdupq_m_wb_u16.c
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vdwdupq_m_wb_u16.c
@@ -53,13 +53,13 @@ foo1 (uint16x8_t inactive, uint32_t *a, uint32_t b, 
mve_pred16_t p)
 ** ...
 */
 uint16x8_t
-foo2 (uint16x8_t inactive, mve_pred16_t p)
+foo2 (uint16x8_t inactive, uint32_t *a, mve_pred16_t p)
 {
-  return vdwdupq_m (inactive, 1, 1, 1, p);
+  return vdwdupq_m (inactive, a, 1, 1, p);
 }
 
 #ifdef __cplusplus
 }
 #endif
 
-/* { dg-final { scan-assembler-not "__ARM_undef" } } */
\ No newline at end of file
+/* { dg-final { scan-assembler-not "__ARM_undef" } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vdwdupq_m_wb_u32.c 
b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vdwdupq_m_wb_u32.c
index 75c41450a380..b36dbcd85855 100644
--- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vdwdupq_m_wb_u32.c
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vdwdupq_m_wb_u32.c
@@ -53,13 +53,13 @@ foo1 (uint32x4_t inactive, uint32_t *a, uint32_t b, 
mve_pred16_t p)
 ** ...
 */
 uint32x4_t
-foo2 (uint32x4_t inactive, mve_pred16_t p)
+foo2 (uint32x4_t inactive, uint32_t *a, mve_pred16_t p)
 {
-  return vdwdupq_m (inactive, 1, 1, 1, p);
+  return vdwdupq_m (inactive, a, 1, 1, p);
 }
 
 #ifdef __cplusplus
 }
 #endif
 
-/* { dg-final { scan-assembler-not "__ARM_undef" } } */
\ No ne

[gcc r15-4449] arm: [MVE intrinsics] remove useless v[id]wdup expanders

2024-10-18 Thread Christophe Lyon via Gcc-cvs
https://gcc.gnu.org/g:c7f95f2b531ed179012c9bd26bbc1d2d4a849b22

commit r15-4449-gc7f95f2b531ed179012c9bd26bbc1d2d4a849b22
Author: Christophe Lyon 
Date:   Thu Aug 29 08:56:03 2024 +

arm: [MVE intrinsics] remove useless v[id]wdup expanders

Like with vddup/vidup, we use code_for_mve_q_wb_u_insn, so we can drop
the expanders and their declarations as builtins, now useless.

2024-08-28  Christophe Lyon  

gcc/
* config/arm/arm-builtins.cc
(arm_quinop_unone_unone_unone_unone_imm_pred_qualifiers): Delete.
* config/arm/arm_mve_builtins.def (viwdupq_wb_u, vdwdupq_wb_u)
(viwdupq_m_wb_u, vdwdupq_m_wb_u, viwdupq_m_n_u, vdwdupq_m_n_u)
(vdwdupq_n_u, viwdupq_n_u): Delete.
* config/arm/mve.md (mve_vdwdupq_n_u): Delete.
(mve_vdwdupq_wb_u): Delete.
(mve_vdwdupq_m_n_u): Delete.
(mve_vdwdupq_m_wb_u): Delete.

Diff:
---
 gcc/config/arm/arm-builtins.cc  |  7 
 gcc/config/arm/arm_mve_builtins.def |  8 
 gcc/config/arm/mve.md   | 75 -
 3 files changed, 90 deletions(-)

diff --git a/gcc/config/arm/arm-builtins.cc b/gcc/config/arm/arm-builtins.cc
index 74cea8900b48..00e2befd81d4 100644
--- a/gcc/config/arm/arm-builtins.cc
+++ b/gcc/config/arm/arm-builtins.cc
@@ -755,13 +755,6 @@ arm_ldru_z_qualifiers[SIMD_MAX_BUILTIN_ARGS]
   = { qualifier_unsigned, qualifier_pointer, qualifier_predicate};
 #define LDRU_Z_QUALIFIERS (arm_ldru_z_qualifiers)
 
-static enum arm_type_qualifiers
-arm_quinop_unone_unone_unone_unone_imm_pred_qualifiers[SIMD_MAX_BUILTIN_ARGS]
-  = { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned,
-  qualifier_unsigned, qualifier_immediate, qualifier_predicate };
-#define QUINOP_UNONE_UNONE_UNONE_UNONE_IMM_PRED_QUALIFIERS \
-  (arm_quinop_unone_unone_unone_unone_imm_pred_qualifiers)
-
 static enum arm_type_qualifiers
 arm_ldrgbwbxu_qualifiers[SIMD_MAX_BUILTIN_ARGS]
   = { qualifier_unsigned, qualifier_unsigned, qualifier_immediate};
diff --git a/gcc/config/arm/arm_mve_builtins.def 
b/gcc/config/arm/arm_mve_builtins.def
index 13070d958fcc..74379fadee0a 100644
--- a/gcc/config/arm/arm_mve_builtins.def
+++ b/gcc/config/arm/arm_mve_builtins.def
@@ -797,14 +797,6 @@ VAR1 (STRSU_P, vstrdq_scatter_offset_p_u, v2di)
 VAR1 (STRSU_P, vstrdq_scatter_shifted_offset_p_u, v2di)
 VAR1 (STRSU_P, vstrwq_scatter_offset_p_u, v4si)
 VAR1 (STRSU_P, vstrwq_scatter_shifted_offset_p_u, v4si)
-VAR3 (TERNOP_UNONE_UNONE_UNONE_IMM, viwdupq_wb_u, v16qi, v4si, v8hi)
-VAR3 (TERNOP_UNONE_UNONE_UNONE_IMM, vdwdupq_wb_u, v16qi, v4si, v8hi)
-VAR3 (QUINOP_UNONE_UNONE_UNONE_UNONE_IMM_PRED, viwdupq_m_wb_u, v16qi, v8hi, 
v4si)
-VAR3 (QUINOP_UNONE_UNONE_UNONE_UNONE_IMM_PRED, vdwdupq_m_wb_u, v16qi, v8hi, 
v4si)
-VAR3 (QUINOP_UNONE_UNONE_UNONE_UNONE_IMM_PRED, viwdupq_m_n_u, v16qi, v8hi, 
v4si)
-VAR3 (QUINOP_UNONE_UNONE_UNONE_UNONE_IMM_PRED, vdwdupq_m_n_u, v16qi, v8hi, 
v4si)
-VAR3 (TERNOP_UNONE_UNONE_UNONE_IMM, vdwdupq_n_u, v16qi, v4si, v8hi)
-VAR3 (TERNOP_UNONE_UNONE_UNONE_IMM, viwdupq_n_u, v16qi, v4si, v8hi)
 VAR1 (STRSBWBU, vstrwq_scatter_base_wb_u, v4si)
 VAR1 (STRSBWBU, vstrdq_scatter_base_wb_u, v2di)
 VAR1 (STRSBWBU_P, vstrwq_scatter_base_wb_p_u, v4si)
diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
index 219633319d83..225bb7ab3e06 100644
--- a/gcc/config/arm/mve.md
+++ b/gcc/config/arm/mve.md
@@ -5093,41 +5093,6 @@
  [(set (attr "mve_unpredicated_insn") (symbol_ref 
"CODE_FOR_mve_q_u_insn"))
   (set_attr "length""8")])
 
-;;
-;; [vdwdupq_n_u])
-;;
-(define_expand "mve_vdwdupq_n_u"
- [(match_operand:MVE_2 0 "s_register_operand")
-  (match_operand:SI 1 "s_register_operand")
-  (match_operand:DI 2 "s_register_operand")
-  (match_operand:SI 3 "mve_imm_selective_upto_8")]
- "TARGET_HAVE_MVE"
-{
-  rtx ignore_wb = gen_reg_rtx (SImode);
-  emit_insn (gen_mve_vdwdupq_wb_u_insn (operands[0], ignore_wb,
- operands[1], operands[2],
- operands[3]));
-  DONE;
-})
-
-;;
-;; [vdwdupq_wb_u])
-;;
-(define_expand "mve_vdwdupq_wb_u"
- [(match_operand:SI 0 "s_register_operand")
-  (match_operand:SI 1 "s_register_operand")
-  (match_operand:DI 2 "s_register_operand")
-  (match_operand:SI 3 "mve_imm_selective_upto_8")
-  (unspec:MVE_2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
- "TARGET_HAVE_MVE"
-{
-  rtx ignore_vec = gen_reg_rtx (mode);
-  emit_insn (gen_mve_vdwdupq_wb_u_insn (ignore_vec, operands[0],
- operands[1], operands[2],
- operands[3]));
-  DONE;
-})
-
 ;;
 ;; [vdwdupq_wb_u_insn, viwdupq_wb_u_insn]
 ;;
@@ -5147,46 +5112,6 @@
  [(set (attr "mve_unpredicated_insn") (symbol_ref 
"CODE_FOR_mve_q_wb_u_insn"))
   (set_attr "type" "mve_move")])
 
-;;
-;; [vdwdupq_m_n_u])
-;;
-(define_expand "mve_vdwdupq_m_n_u"
- [(match_operand:MVE_2 0 "s_register_operand")
-  (match_opera

[gcc r15-4451] arm: [MVE intrinsics] rework vshlcq

2024-10-18 Thread Christophe Lyon via Gcc-cvs
https://gcc.gnu.org/g:4d2b6a7dd5393475134c46537004346126458136

commit r15-4451-g4d2b6a7dd5393475134c46537004346126458136
Author: Christophe Lyon 
Date:   Wed Aug 28 13:47:17 2024 +

arm: [MVE intrinsics] rework vshlcq

Implement vshlc using the new MVE builtins framework.

2024-08-28  Christophe Lyon  

gcc/
* config/arm/arm-mve-builtins-base.cc (class vshlc_impl): New.
(vshlc): New.
* config/arm/arm-mve-builtins-base.def (vshlcq): New.
* config/arm/arm-mve-builtins-base.h (vshlcq): New.
* config/arm/arm-mve-builtins.cc
(function_instance::has_inactive_argument): Handle vshlc.
* config/arm/arm_mve.h (vshlcq): Delete.
(vshlcq_m): Delete.
(vshlcq_s8): Delete.
(vshlcq_u8): Delete.
(vshlcq_s16): Delete.
(vshlcq_u16): Delete.
(vshlcq_s32): Delete.
(vshlcq_u32): Delete.
(vshlcq_m_s8): Delete.
(vshlcq_m_u8): Delete.
(vshlcq_m_s16): Delete.
(vshlcq_m_u16): Delete.
(vshlcq_m_s32): Delete.
(vshlcq_m_u32): Delete.
(__arm_vshlcq_s8): Delete.
(__arm_vshlcq_u8): Delete.
(__arm_vshlcq_s16): Delete.
(__arm_vshlcq_u16): Delete.
(__arm_vshlcq_s32): Delete.
(__arm_vshlcq_u32): Delete.
(__arm_vshlcq_m_s8): Delete.
(__arm_vshlcq_m_u8): Delete.
(__arm_vshlcq_m_s16): Delete.
(__arm_vshlcq_m_u16): Delete.
(__arm_vshlcq_m_s32): Delete.
(__arm_vshlcq_m_u32): Delete.
(__arm_vshlcq): Delete.
(__arm_vshlcq_m): Delete.
* config/arm/mve.md (mve_vshlcq_): Add '@' prefix.
(mve_vshlcq_m_): Likewise.

Diff:
---
 gcc/config/arm/arm-mve-builtins-base.cc  |  72 ++
 gcc/config/arm/arm-mve-builtins-base.def |   1 +
 gcc/config/arm/arm-mve-builtins-base.h   |   1 +
 gcc/config/arm/arm-mve-builtins.cc   |   1 +
 gcc/config/arm/arm_mve.h | 233 ---
 gcc/config/arm/mve.md|   4 +-
 6 files changed, 77 insertions(+), 235 deletions(-)

diff --git a/gcc/config/arm/arm-mve-builtins-base.cc 
b/gcc/config/arm/arm-mve-builtins-base.cc
index 86ad8a847131..1b7d97fbe072 100644
--- a/gcc/config/arm/arm-mve-builtins-base.cc
+++ b/gcc/config/arm/arm-mve-builtins-base.cc
@@ -536,6 +536,77 @@ public:
   }
 };
 
+/* Map the vshlc function directly to CODE (UNSPEC, M) where M is the vector
+   mode associated with type suffix 0.  We need this special case because the
+   intrinsics derefrence the second parameter and update its contents.  */
+class vshlc_impl : public function_base
+{
+public:
+  unsigned int
+  call_properties (const function_instance &) const override
+  {
+return CP_WRITE_MEMORY | CP_READ_MEMORY;
+  }
+
+  tree
+  memory_scalar_type (const function_instance &) const override
+  {
+return get_typenode_from_name (UINT32_TYPE);
+  }
+
+  rtx
+  expand (function_expander &e) const override
+  {
+machine_mode mode = e.vector_mode (0);
+insn_code code;
+rtx insns, carry_ptr, carry, new_carry;
+int carry_arg_no;
+
+if (! e.type_suffix (0).integer_p)
+  gcc_unreachable ();
+
+if (e.mode_suffix_id != MODE_none)
+  gcc_unreachable ();
+
+carry_arg_no = 1;
+
+carry = gen_reg_rtx (SImode);
+carry_ptr = e.args[carry_arg_no];
+emit_insn (gen_rtx_SET (carry, gen_rtx_MEM (SImode, carry_ptr)));
+e.args[carry_arg_no] = carry;
+
+new_carry = gen_reg_rtx (SImode);
+e.args.quick_insert (0, new_carry);
+
+switch (e.pred)
+  {
+  case PRED_none:
+   /* No predicate.  */
+   code = e.type_suffix (0).unsigned_p
+ ? code_for_mve_vshlcq (VSHLCQ_U, mode)
+ : code_for_mve_vshlcq (VSHLCQ_S, mode);
+   insns = e.use_exact_insn (code);
+   break;
+
+  case PRED_m:
+   /* "m" predicate.  */
+   code = e.type_suffix (0).unsigned_p
+ ? code_for_mve_vshlcq_m (VSHLCQ_M_U, mode)
+ : code_for_mve_vshlcq_m (VSHLCQ_M_S, mode);
+   insns = e.use_cond_insn (code, 0);
+   break;
+
+  default:
+   gcc_unreachable ();
+  }
+
+/* Update carry.  */
+emit_insn (gen_rtx_SET (gen_rtx_MEM (Pmode, carry_ptr), new_carry));
+
+return insns;
+  }
+};
+
 } /* end anonymous namespace */
 
 namespace arm_mve {
@@ -868,6 +939,7 @@ FUNCTION_WITH_M_N_NO_F (vrshlq, VRSHLQ)
 FUNCTION_ONLY_N_NO_F (vrshrnbq, VRSHRNBQ)
 FUNCTION_ONLY_N_NO_F (vrshrntq, VRSHRNTQ)
 FUNCTION_ONLY_N_NO_F (vrshrq, VRSHRQ)
+FUNCTION (vshlcq, vshlc_impl,)
 FUNCTION_ONLY_N_NO_F (vshllbq, VSHLLBQ)
 FUNCTION_ONLY_N_NO_F (vshlltq, VSHLLTQ)
 FUNCTION_WITH_M_N_R (vshlq, VSHLQ)
diff --git a/gcc/config/arm/arm-mve-builtins-base.def 
b/gcc/config/arm/arm-mve-builtins-base.def
index c5f1e8a197b1..bd69f06d7e41 100644
--- a/gcc/config/arm/arm-

[gcc r15-4445] arm: [MVE intrinsics] factorize vdwdup viwdup

2024-10-18 Thread Christophe Lyon via Gcc-cvs
https://gcc.gnu.org/g:42be837c368692edcff1a35cad3eafc33b3ac461

commit r15-4445-g42be837c368692edcff1a35cad3eafc33b3ac461
Author: Christophe Lyon 
Date:   Thu Aug 22 20:36:46 2024 +

arm: [MVE intrinsics] factorize vdwdup viwdup

Factorize vdwdup and viwdup so that they use the same parameterized
names.

Like with vddup and vidup, we do not bother with the corresponding
expanders, as we stop using them in a subsequent patch.

The patch also adds the missing attributes to vdwdupq_wb_u_insn and
viwdupq_wb_u_insn patterns.

2024-08-21  Christophe Lyon  

gcc/
* config/arm/iterators.md (mve_insn): Add VIWDUPQ, VDWDUPQ,
VIWDUPQ_M, VDWDUPQ_M.
(VIDWDUPQ): New iterator.
(VIDWDUPQ_M): New iterator.
* config/arm/mve.md (mve_vdwdupq_wb_u_insn)
(mve_viwdupq_wb_u_insn): Merge into ...
(@mve_q_wb_u_insn): ... this. Add missing
mve_unpredicated_insn and mve_move attributes.
(mve_vdwdupq_m_wb_u_insn, mve_viwdupq_m_wb_u_insn):
Merge into ...
(@mve_q_m_wb_u_insn): ... this.

Diff:
---
 gcc/config/arm/iterators.md |  4 +++
 gcc/config/arm/mve.md   | 68 +
 2 files changed, 17 insertions(+), 55 deletions(-)

diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index c0299117f26e..2fb3b25040f4 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -1009,6 +1009,8 @@
 (VHSUBQ_S "vhsub") (VHSUBQ_U "vhsub")
 (VIDUPQ "vidup") (VDDUPQ "vddup")
 (VIDUPQ_M "vidup") (VDDUPQ_M "vddup")
+(VIWDUPQ "viwdup") (VDWDUPQ "vdwdup")
+(VIWDUPQ_M "viwdup") (VDWDUPQ_M "vdwdup")
 (VMAXAQ_M_S "vmaxa")
 (VMAXAQ_S "vmaxa")
 (VMAXAVQ_P_S "vmaxav")
@@ -2968,6 +2970,8 @@
 (define_int_iterator VCVTxQ_M [VCVTAQ_M_S VCVTAQ_M_U VCVTMQ_M_S VCVTMQ_M_U 
VCVTNQ_M_S VCVTNQ_M_U VCVTPQ_M_S VCVTPQ_M_U])
 (define_int_iterator VIDDUPQ [VIDUPQ VDDUPQ])
 (define_int_iterator VIDDUPQ_M [VIDUPQ_M VDDUPQ_M])
+(define_int_iterator VIDWDUPQ [VIWDUPQ VDWDUPQ])
+(define_int_iterator VIDWDUPQ_M [VIWDUPQ_M VDWDUPQ_M])
 (define_int_iterator DLSTP [DLSTP8 DLSTP16 DLSTP32
   DLSTP64])
 (define_int_iterator LETP [LETP8 LETP16 LETP32
diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
index ffb4a10fd226..2c8f61116091 100644
--- a/gcc/config/arm/mve.md
+++ b/gcc/config/arm/mve.md
@@ -5129,22 +5129,23 @@
 })
 
 ;;
-;; [vdwdupq_wb_u_insn])
+;; [vdwdupq_wb_u_insn, viwdupq_wb_u_insn]
 ;;
-(define_insn "mve_vdwdupq_wb_u_insn"
+(define_insn "@mve_q_wb_u_insn"
   [(set (match_operand:MVE_2 0 "s_register_operand" "=w")
(unspec:MVE_2 [(match_operand:SI 2 "s_register_operand" "1")
   (subreg:SI (match_operand:DI 3 "s_register_operand" "r") 
4)
   (match_operand:SI 4 "mve_imm_selective_upto_8" "Rg")]
-VDWDUPQ))
+VIDWDUPQ))
(set (match_operand:SI 1 "s_register_operand" "=Te")
(unspec:SI [(match_dup 2)
(subreg:SI (match_dup 3) 4)
(match_dup 4)]
-VDWDUPQ))]
+VIDWDUPQ))]
   "TARGET_HAVE_MVE"
-  "vdwdup.u%#\t%q0, %2, %R3, %4"
-)
+  ".u%#\t%q0, %2, %R3, %4"
+ [(set (attr "mve_unpredicated_insn") (symbol_ref 
"CODE_FOR_mve_q_wb_u_insn"))
+  (set_attr "type" "mve_move")])
 
 ;;
 ;; [vdwdupq_m_n_u])
@@ -5187,27 +5188,27 @@
 })
 
 ;;
-;; [vdwdupq_m_wb_u_insn])
+;; [vdwdupq_m_wb_u_insn, viwdupq_m_wb_u_insn]
 ;;
-(define_insn "mve_vdwdupq_m_wb_u_insn"
+(define_insn "@mve_q_m_wb_u_insn"
   [(set (match_operand:MVE_2 0 "s_register_operand" "=w")
(unspec:MVE_2 [(match_operand:MVE_2 2 "s_register_operand" "0")
   (match_operand:SI 3 "s_register_operand" "1")
   (subreg:SI (match_operand:DI 4 "s_register_operand" "r") 
4)
   (match_operand:SI 5 "mve_imm_selective_upto_8" "Rg")
   (match_operand: 6 "vpr_register_operand" 
"Up")]
-VDWDUPQ_M))
+VIDWDUPQ_M))
(set (match_operand:SI 1 "s_register_operand" "=Te")
(unspec:SI [(match_dup 2)
(match_dup 3)
(subreg:SI (match_dup 4) 4)
(match_dup 5)
(match_dup 6)]
-VDWDUPQ_M))
+VIDWDUPQ_M))
   ]
   "TARGET_HAVE_MVE"
-  "vpst\;vdwdupt.u%#\t%q2, %3, %R4, %5"
- [(set (attr "mve_unpredicated_insn") (symbol_ref 
"CODE_FOR_mve_vdwdupq_wb_u_insn"))
+  "vpst\;t.u%#\t%q2, %3, %R4, %5"
+ [(set (attr "mve_unpredicated_insn") (symbol_ref 
"CODE_FOR_mve_q_wb_u_insn"))
   (set_attr "type" "mve_move")
   (set_attr "length""8")])
 
@@ -5246,24 +5247,6 @@
   DONE;
 })
 
-;;
-;; [viwdupq_wb_u_insn])
-;;
-(define_insn "mve_viwdupq_wb_u_insn"
-  [(set (match_operand:MVE_2 0 "s_register_operand" "=w")
-   (u

[gcc r15-4452] arm: [MVE intrinsics] remove vshlcq useless expanders

2024-10-18 Thread Christophe Lyon via Gcc-cvs
https://gcc.gnu.org/g:8d73d2780f284606000cf20120a22a2f5b92b01e

commit r15-4452-g8d73d2780f284606000cf20120a22a2f5b92b01e
Author: Christophe Lyon 
Date:   Wed Aug 28 13:48:52 2024 +

arm: [MVE intrinsics] remove vshlcq useless expanders

Since we rewrote the implementation of vshlcq intrinsics, we no longer
need these expanders.

2024-08-28  Christophe Lyon  

gcc/
* config/arm/arm-builtins.cc
(arm_ternop_unone_none_unone_imm_qualifiers)
(-arm_ternop_none_none_unone_imm_qualifiers): Delete.
* config/arm/arm_mve_builtins.def (vshlcq_m_vec_s)
(vshlcq_m_carry_s, vshlcq_m_vec_u, vshlcq_m_carry_u): Delete.
* config/arm/mve.md (mve_vshlcq_vec_): Delete.
(mve_vshlcq_carry_): Delete.
(mve_vshlcq_m_vec_): Delete.
(mve_vshlcq_m_carry_): Delete.

Diff:
---
 gcc/config/arm/arm-builtins.cc  | 13 
 gcc/config/arm/arm_mve_builtins.def |  8 -
 gcc/config/arm/mve.md   | 60 -
 3 files changed, 81 deletions(-)

diff --git a/gcc/config/arm/arm-builtins.cc b/gcc/config/arm/arm-builtins.cc
index 00e2befd81d4..0f16503e92dd 100644
--- a/gcc/config/arm/arm-builtins.cc
+++ b/gcc/config/arm/arm-builtins.cc
@@ -476,19 +476,6 @@ 
arm_ternop_unone_unone_none_none_qualifiers[SIMD_MAX_BUILTIN_ARGS]
 #define TERNOP_UNONE_UNONE_NONE_NONE_QUALIFIERS \
   (arm_ternop_unone_unone_none_none_qualifiers)
 
-static enum arm_type_qualifiers
-arm_ternop_unone_none_unone_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
-  = { qualifier_unsigned, qualifier_none, qualifier_unsigned,
-  qualifier_immediate };
-#define TERNOP_UNONE_NONE_UNONE_IMM_QUALIFIERS \
-  (arm_ternop_unone_none_unone_imm_qualifiers)
-
-static enum arm_type_qualifiers
-arm_ternop_none_none_unone_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
-  = { qualifier_none, qualifier_none, qualifier_unsigned, qualifier_immediate 
};
-#define TERNOP_NONE_NONE_UNONE_IMM_QUALIFIERS \
-  (arm_ternop_none_none_unone_imm_qualifiers)
-
 static enum arm_type_qualifiers
 arm_ternop_unone_unone_none_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
   = { qualifier_unsigned, qualifier_unsigned, qualifier_none,
diff --git a/gcc/config/arm/arm_mve_builtins.def 
b/gcc/config/arm/arm_mve_builtins.def
index 74379fadee0a..0a9d3213eec9 100644
--- a/gcc/config/arm/arm_mve_builtins.def
+++ b/gcc/config/arm/arm_mve_builtins.def
@@ -286,15 +286,11 @@ VAR1 (TERNOP_UNONE_UNONE_UNONE_UNONE, vrmlaldavhaq_u, 
v4si)
 VAR2 (TERNOP_NONE_NONE_UNONE_PRED, vcvtq_m_to_f_u, v8hf, v4sf)
 VAR2 (TERNOP_NONE_NONE_NONE_PRED, vcvtq_m_to_f_s, v8hf, v4sf)
 VAR2 (TERNOP_PRED_NONE_NONE_PRED, vcmpeqq_m_f, v8hf, v4sf)
-VAR3 (TERNOP_UNONE_NONE_UNONE_IMM, vshlcq_carry_s, v16qi, v8hi, v4si)
-VAR3 (TERNOP_UNONE_UNONE_UNONE_IMM, vshlcq_carry_u, v16qi, v8hi, v4si)
 VAR2 (TERNOP_UNONE_UNONE_NONE_IMM, vqrshrunbq_n_s, v8hi, v4si)
 VAR3 (TERNOP_UNONE_UNONE_NONE_NONE, vabavq_s, v16qi, v8hi, v4si)
 VAR3 (TERNOP_UNONE_UNONE_UNONE_UNONE, vabavq_u, v16qi, v8hi, v4si)
 VAR2 (TERNOP_UNONE_UNONE_NONE_PRED, vcvtaq_m_u, v8hi, v4si)
 VAR2 (TERNOP_NONE_NONE_NONE_PRED, vcvtaq_m_s, v8hi, v4si)
-VAR3 (TERNOP_UNONE_UNONE_UNONE_IMM, vshlcq_vec_u, v16qi, v8hi, v4si)
-VAR3 (TERNOP_NONE_NONE_UNONE_IMM, vshlcq_vec_s, v16qi, v8hi, v4si)
 VAR4 (TERNOP_UNONE_UNONE_UNONE_PRED, vpselq_u, v16qi, v8hi, v4si, v2di)
 VAR4 (TERNOP_NONE_NONE_NONE_PRED, vpselq_s, v16qi, v8hi, v4si, v2di)
 VAR3 (TERNOP_UNONE_UNONE_UNONE_PRED, vrev64q_m_u, v16qi, v8hi, v4si)
@@ -860,7 +856,3 @@ VAR1 (UQSHL, urshr_, si)
 VAR1 (UQSHL, urshrl_, di)
 VAR1 (UQSHL, uqshl_, si)
 VAR1 (UQSHL, uqshll_, di)
-VAR3 (QUADOP_NONE_NONE_UNONE_IMM_PRED, vshlcq_m_vec_s, v16qi, v8hi, v4si)
-VAR3 (QUADOP_NONE_NONE_UNONE_IMM_PRED, vshlcq_m_carry_s, v16qi, v8hi, v4si)
-VAR3 (QUADOP_UNONE_UNONE_UNONE_IMM_PRED, vshlcq_m_vec_u, v16qi, v8hi, v4si)
-VAR3 (QUADOP_UNONE_UNONE_UNONE_IMM_PRED, vshlcq_m_carry_u, v16qi, v8hi, v4si)
diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
index 8efbbf18b458..b2a8319f60ad 100644
--- a/gcc/config/arm/mve.md
+++ b/gcc/config/arm/mve.md
@@ -1664,34 +1664,6 @@
 ;;
 ;; [vshlcq_u vshlcq_s]
 ;;
-(define_expand "mve_vshlcq_vec_"
- [(match_operand:MVE_2 0 "s_register_operand")
-  (match_operand:MVE_2 1 "s_register_operand")
-  (match_operand:SI 2 "s_register_operand")
-  (match_operand:SI 3 "mve_imm_32")
-  (unspec:MVE_2 [(const_int 0)] VSHLCQ)]
- "TARGET_HAVE_MVE"
-{
-  rtx ignore_wb = gen_reg_rtx (SImode);
-  emit_insn(gen_mve_vshlcq_(operands[0], ignore_wb, operands[1],
- operands[2], operands[3]));
-  DONE;
-})
-
-(define_expand "mve_vshlcq_carry_"
- [(match_operand:SI 0 "s_register_operand")
-  (match_operand:MVE_2 1 "s_register_operand")
-  (match_operand:SI 2 "s_register_operand")
-  (match_operand:SI 3 "mve_imm_32")
-  (unspec:MVE_2 [(const_int 0)] VSHLCQ)]
- "TARGET_HAVE_MVE"
-{
-  rtx ignore_vec = gen_reg_rtx (mode);
-  emit_insn(gen_mve_vshlcq_(ignore_vec, 

[gcc r15-4440] arm: [MVE intrinsics] add viddup shape

2024-10-18 Thread Christophe Lyon via Gcc-cvs
https://gcc.gnu.org/g:e38566afb4a7cede996e32596e94055667420c83

commit r15-4440-ge38566afb4a7cede996e32596e94055667420c83
Author: Christophe Lyon 
Date:   Mon Aug 19 14:47:10 2024 +

arm: [MVE intrinsics] add viddup shape

This patch adds the viddup shape description for vidup and vddup.

This requires the addition of report_not_one_of and
function_checker::require_immediate_one_of to
gcc/config/arm/arm-mve-builtins.cc (they are copies of the aarch64 SVE
counterpart).

This patch also introduces MODE_wb.

2024-08-21  Christophe Lyon  

gcc/

* config/arm/arm-mve-builtins-shapes.cc (viddup): New.
* config/arm/arm-mve-builtins-shapes.h (viddup): New.
* config/arm/arm-mve-builtins.cc (report_not_one_of): New.
(function_checker::require_immediate_one_of): New.
* config/arm/arm-mve-builtins.def (wb): New mode.
* config/arm/arm-mve-builtins.h (function_checker) Add
require_immediate_one_of.

Diff:
---
 gcc/config/arm/arm-mve-builtins-shapes.cc | 85 +++
 gcc/config/arm/arm-mve-builtins-shapes.h  |  1 +
 gcc/config/arm/arm-mve-builtins.cc| 44 
 gcc/config/arm/arm-mve-builtins.def   |  1 +
 gcc/config/arm/arm-mve-builtins.h |  2 +
 5 files changed, 133 insertions(+)

diff --git a/gcc/config/arm/arm-mve-builtins-shapes.cc 
b/gcc/config/arm/arm-mve-builtins-shapes.cc
index 8a849c2bc028..971e86a2727b 100644
--- a/gcc/config/arm/arm-mve-builtins-shapes.cc
+++ b/gcc/config/arm/arm-mve-builtins-shapes.cc
@@ -2191,6 +2191,91 @@ struct vcvtx_def : public overloaded_base<0>
 };
 SHAPE (vcvtx)
 
+/* _t vfoo[_n]_t0(uint32_t, const int)
+   _t vfoo[_wb]_t0(uint32_t *, const int)
+
+   Shape for vector increment or decrement and duplicate operations that take
+   an integer or pointer to integer first argument and an immediate, and
+   produce a vector.
+
+   Check that 'imm' is one of 1, 2, 4 or 8.
+
+   Example: vddupq.
+   uint8x16_t [__arm_]vddupq[_n]_u8(uint32_t a, const int imm)
+   uint8x16_t [__arm_]vddupq[_wb]_u8(uint32_t *a, const int imm)
+   uint8x16_t [__arm_]vddupq_m[_n_u8](uint8x16_t inactive, uint32_t a, const 
int imm, mve_pred16_t p)
+   uint8x16_t [__arm_]vddupq_m[_wb_u8](uint8x16_t inactive, uint32_t *a, const 
int imm, mve_pred16_t p)
+   uint8x16_t [__arm_]vddupq_x[_n]_u8(uint32_t a, const int imm, mve_pred16_t 
p)
+   uint8x16_t [__arm_]vddupq_x[_wb]_u8(uint32_t *a, const int imm, 
mve_pred16_t p)  */
+struct viddup_def : public overloaded_base<0>
+{
+  bool
+  explicit_type_suffix_p (unsigned int i, enum predication_index pred,
+ enum mode_suffix_index,
+ type_suffix_info) const override
+  {
+return ((i == 0) && (pred != PRED_m));
+  }
+
+  bool
+  skip_overload_p (enum predication_index, enum mode_suffix_index mode) const 
override
+  {
+/* For MODE_wb, share the overloaded instance with MODE_n.  */
+if (mode == MODE_wb)
+  return true;
+
+return false;
+  }
+
+  void
+  build (function_builder &b, const function_group_info &group,
+bool preserve_user_namespace) const override
+  {
+b.add_overloaded_functions (group, MODE_none, preserve_user_namespace);
+build_all (b, "v0,su32,su64", group, MODE_n, preserve_user_namespace);
+build_all (b, "v0,as,su64", group, MODE_wb, preserve_user_namespace);
+  }
+
+  tree
+  resolve (function_resolver &r) const override
+  {
+unsigned int i, nargs;
+type_suffix_index type_suffix = NUM_TYPE_SUFFIXES;
+if (!r.check_gp_argument (2, i, nargs))
+  return error_mark_node;
+
+type_suffix = r.type_suffix_ids[0];
+/* With PRED_m, ther is no type suffix, so infer it from the first 
(inactive)
+   argument.  */
+if (type_suffix == NUM_TYPE_SUFFIXES)
+  type_suffix = r.infer_vector_type (0);
+
+unsigned int last_arg = i - 1;
+/* Check that last_arg is either scalar or pointer.  */
+if (!r.scalar_argument_p (last_arg))
+  return error_mark_node;
+
+if (!r.require_integer_immediate (last_arg + 1))
+  return error_mark_node;
+
+/* With MODE_n we expect a scalar, with MODE_wb we expect a pointer.  */
+mode_suffix_index mode_suffix;
+if (POINTER_TYPE_P (r.get_argument_type (last_arg)))
+  mode_suffix = MODE_wb;
+else
+  mode_suffix = MODE_n;
+
+return r.resolve_to (mode_suffix, type_suffix);
+  }
+
+  bool
+  check (function_checker &c) const override
+  {
+return c.require_immediate_one_of (1, 1, 2, 4, 8);
+  }
+};
+SHAPE (viddup)
+
 /* _t vfoo[_t0](_t, _t, mve_pred16_t)
 
i.e. a version of the standard ternary shape in which
diff --git a/gcc/config/arm/arm-mve-builtins-shapes.h 
b/gcc/config/arm/arm-mve-builtins-shapes.h
index 80340dc33ecf..186287c16207 100644
--- a/gcc/config/arm/arm-mve-builtins-shapes.h
+++ b/gcc/config/arm/arm-mve-builtins-shapes.h
@@ -82,6 +82,7 @@ namespace arm_

[gcc r15-4441] arm: [MVE intrinsics] rework vddup vidup

2024-10-18 Thread Christophe Lyon via Gcc-cvs
https://gcc.gnu.org/g:d7250b623fb1451008d5e836467b475150194bc6

commit r15-4441-gd7250b623fb1451008d5e836467b475150194bc6
Author: Christophe Lyon 
Date:   Thu Aug 29 09:01:54 2024 +

arm: [MVE intrinsics] rework vddup vidup

Implement vddup and vidup using the new MVE builtins framework.

We generate better code because we take advantage of the two outputs
produced by the v[id]dup instructions.

For instance, before:
ldr r3, [r0]
sub r2, r3, #8
str r2, [r0]
mov r2, r3
vddup.u16   q3, r2, #1

now:
ldr r2, [r0]
vddup.u16   q3, r2, #1
str r2, [r0]

2024-08-21  Christophe Lyon  

gcc/
* config/arm/arm-mve-builtins-base.cc (class viddup_impl): New.
(vddup): New.
(vidup): New.
* config/arm/arm-mve-builtins-base.def (vddupq): New.
(vidupq): New.
* config/arm/arm-mve-builtins-base.h (vddupq): New.
(vidupq): New.
* config/arm/arm_mve.h (vddupq_m): Delete.
(vddupq_u8): Delete.
(vddupq_u32): Delete.
(vddupq_u16): Delete.
(vidupq_m): Delete.
(vidupq_u8): Delete.
(vidupq_u32): Delete.
(vidupq_u16): Delete.
(vddupq_x_u8): Delete.
(vddupq_x_u16): Delete.
(vddupq_x_u32): Delete.
(vidupq_x_u8): Delete.
(vidupq_x_u16): Delete.
(vidupq_x_u32): Delete.
(vddupq_m_n_u8): Delete.
(vddupq_m_n_u32): Delete.
(vddupq_m_n_u16): Delete.
(vddupq_m_wb_u8): Delete.
(vddupq_m_wb_u16): Delete.
(vddupq_m_wb_u32): Delete.
(vddupq_n_u8): Delete.
(vddupq_n_u32): Delete.
(vddupq_n_u16): Delete.
(vddupq_wb_u8): Delete.
(vddupq_wb_u16): Delete.
(vddupq_wb_u32): Delete.
(vidupq_m_n_u8): Delete.
(vidupq_m_n_u32): Delete.
(vidupq_m_n_u16): Delete.
(vidupq_m_wb_u8): Delete.
(vidupq_m_wb_u16): Delete.
(vidupq_m_wb_u32): Delete.
(vidupq_n_u8): Delete.
(vidupq_n_u32): Delete.
(vidupq_n_u16): Delete.
(vidupq_wb_u8): Delete.
(vidupq_wb_u16): Delete.
(vidupq_wb_u32): Delete.
(vddupq_x_n_u8): Delete.
(vddupq_x_n_u16): Delete.
(vddupq_x_n_u32): Delete.
(vddupq_x_wb_u8): Delete.
(vddupq_x_wb_u16): Delete.
(vddupq_x_wb_u32): Delete.
(vidupq_x_n_u8): Delete.
(vidupq_x_n_u16): Delete.
(vidupq_x_n_u32): Delete.
(vidupq_x_wb_u8): Delete.
(vidupq_x_wb_u16): Delete.
(vidupq_x_wb_u32): Delete.
(__arm_vddupq_m_n_u8): Delete.
(__arm_vddupq_m_n_u32): Delete.
(__arm_vddupq_m_n_u16): Delete.
(__arm_vddupq_m_wb_u8): Delete.
(__arm_vddupq_m_wb_u16): Delete.
(__arm_vddupq_m_wb_u32): Delete.
(__arm_vddupq_n_u8): Delete.
(__arm_vddupq_n_u32): Delete.
(__arm_vddupq_n_u16): Delete.
(__arm_vidupq_m_n_u8): Delete.
(__arm_vidupq_m_n_u32): Delete.
(__arm_vidupq_m_n_u16): Delete.
(__arm_vidupq_n_u8): Delete.
(__arm_vidupq_m_wb_u8): Delete.
(__arm_vidupq_m_wb_u16): Delete.
(__arm_vidupq_m_wb_u32): Delete.
(__arm_vidupq_n_u32): Delete.
(__arm_vidupq_n_u16): Delete.
(__arm_vidupq_wb_u8): Delete.
(__arm_vidupq_wb_u16): Delete.
(__arm_vidupq_wb_u32): Delete.
(__arm_vddupq_wb_u8): Delete.
(__arm_vddupq_wb_u16): Delete.
(__arm_vddupq_wb_u32): Delete.
(__arm_vddupq_x_n_u8): Delete.
(__arm_vddupq_x_n_u16): Delete.
(__arm_vddupq_x_n_u32): Delete.
(__arm_vddupq_x_wb_u8): Delete.
(__arm_vddupq_x_wb_u16): Delete.
(__arm_vddupq_x_wb_u32): Delete.
(__arm_vidupq_x_n_u8): Delete.
(__arm_vidupq_x_n_u16): Delete.
(__arm_vidupq_x_n_u32): Delete.
(__arm_vidupq_x_wb_u8): Delete.
(__arm_vidupq_x_wb_u16): Delete.
(__arm_vidupq_x_wb_u32): Delete.
(__arm_vddupq_m): Delete.
(__arm_vddupq_u8): Delete.
(__arm_vddupq_u32): Delete.
(__arm_vddupq_u16): Delete.
(__arm_vidupq_m): Delete.
(__arm_vidupq_u8): Delete.
(__arm_vidupq_u32): Delete.
(__arm_vidupq_u16): Delete.
(__arm_vddupq_x_u8): Delete.
(__arm_vddupq_x_u16): Delete.
(__arm_vddupq_x_u32): Delete.
(__arm_vidupq_x_u8): Delete.

[gcc r15-4435] arm: [MVE intrinsics] rework vbicq

2024-10-18 Thread Christophe Lyon via Gcc-cvs
https://gcc.gnu.org/g:b1f996525d6fb7d804b8addaf79b046a4ffa56b7

commit r15-4435-gb1f996525d6fb7d804b8addaf79b046a4ffa56b7
Author: Christophe Lyon 
Date:   Wed Jul 10 22:57:39 2024 +

arm: [MVE intrinsics] rework vbicq

Implement vbicq using the new MVE builtins framework.

2024-07-11  Christophe Lyon  

gcc/
* config/arm/arm-mve-builtins-base.cc (vbicq): New.
* config/arm/arm-mve-builtins-base.def (vbicq): New.
* config/arm/arm-mve-builtins-base.h (vbicq): New.
* config/arm/arm-mve-builtins-functions.h (class
unspec_based_mve_function_exact_insn_vbic): New.
* config/arm/arm-mve-builtins.cc
(function_instance::has_inactive_argument): Add support for vbicq.
* config/arm/arm_mve.h (vbicq): Delete.
(vbicq_m_n): Delete.
(vbicq_m): Delete.
(vbicq_x): Delete.
(vbicq_u8): Delete.
(vbicq_s8): Delete.
(vbicq_u16): Delete.
(vbicq_s16): Delete.
(vbicq_u32): Delete.
(vbicq_s32): Delete.
(vbicq_n_u16): Delete.
(vbicq_f16): Delete.
(vbicq_n_s16): Delete.
(vbicq_n_u32): Delete.
(vbicq_f32): Delete.
(vbicq_n_s32): Delete.
(vbicq_m_n_s16): Delete.
(vbicq_m_n_s32): Delete.
(vbicq_m_n_u16): Delete.
(vbicq_m_n_u32): Delete.
(vbicq_m_s8): Delete.
(vbicq_m_s32): Delete.
(vbicq_m_s16): Delete.
(vbicq_m_u8): Delete.
(vbicq_m_u32): Delete.
(vbicq_m_u16): Delete.
(vbicq_m_f32): Delete.
(vbicq_m_f16): Delete.
(vbicq_x_s8): Delete.
(vbicq_x_s16): Delete.
(vbicq_x_s32): Delete.
(vbicq_x_u8): Delete.
(vbicq_x_u16): Delete.
(vbicq_x_u32): Delete.
(vbicq_x_f16): Delete.
(vbicq_x_f32): Delete.
(__arm_vbicq_u8): Delete.
(__arm_vbicq_s8): Delete.
(__arm_vbicq_u16): Delete.
(__arm_vbicq_s16): Delete.
(__arm_vbicq_u32): Delete.
(__arm_vbicq_s32): Delete.
(__arm_vbicq_n_u16): Delete.
(__arm_vbicq_n_s16): Delete.
(__arm_vbicq_n_u32): Delete.
(__arm_vbicq_n_s32): Delete.
(__arm_vbicq_m_n_s16): Delete.
(__arm_vbicq_m_n_s32): Delete.
(__arm_vbicq_m_n_u16): Delete.
(__arm_vbicq_m_n_u32): Delete.
(__arm_vbicq_m_s8): Delete.
(__arm_vbicq_m_s32): Delete.
(__arm_vbicq_m_s16): Delete.
(__arm_vbicq_m_u8): Delete.
(__arm_vbicq_m_u32): Delete.
(__arm_vbicq_m_u16): Delete.
(__arm_vbicq_x_s8): Delete.
(__arm_vbicq_x_s16): Delete.
(__arm_vbicq_x_s32): Delete.
(__arm_vbicq_x_u8): Delete.
(__arm_vbicq_x_u16): Delete.
(__arm_vbicq_x_u32): Delete.
(__arm_vbicq_f16): Delete.
(__arm_vbicq_f32): Delete.
(__arm_vbicq_m_f32): Delete.
(__arm_vbicq_m_f16): Delete.
(__arm_vbicq_x_f16): Delete.
(__arm_vbicq_x_f32): Delete.
(__arm_vbicq): Delete.
(__arm_vbicq_m_n): Delete.
(__arm_vbicq_m): Delete.
(__arm_vbicq_x): Delete.
* config/arm/mve.md (mve_vbicq_u): Rename into ...
(@mve_vbicq_u): ... this.
(mve_vbicq_s): Rename into ...
(@mve_vbicq_s): ... this.
(mve_vbicq_f): Rename into ...
(@mve_vbicq_f): ... this.

Diff:
---
 gcc/config/arm/arm-mve-builtins-base.cc |   1 +
 gcc/config/arm/arm-mve-builtins-base.def|   2 +
 gcc/config/arm/arm-mve-builtins-base.h  |   1 +
 gcc/config/arm/arm-mve-builtins-functions.h |  54 +++
 gcc/config/arm/arm-mve-builtins.cc  |   1 +
 gcc/config/arm/arm_mve.h| 574 
 gcc/config/arm/mve.md   |   6 +-
 7 files changed, 62 insertions(+), 577 deletions(-)

diff --git a/gcc/config/arm/arm-mve-builtins-base.cc 
b/gcc/config/arm/arm-mve-builtins-base.cc
index 1b254066dece..277596f39c91 100644
--- a/gcc/config/arm/arm-mve-builtins-base.cc
+++ b/gcc/config/arm/arm-mve-builtins-base.cc
@@ -534,6 +534,7 @@ FUNCTION_PRED_P_S_U (vaddlvq, VADDLVQ)
 FUNCTION_PRED_P_S_U (vaddvq, VADDVQ)
 FUNCTION_PRED_P_S_U (vaddvaq, VADDVAQ)
 FUNCTION_WITH_RTX_M (vandq, AND, VANDQ)
+FUNCTION (vbicq, unspec_based_mve_function_exact_insn_vbic, (VBICQ_N_S, 
VBICQ_N_U, VBICQ_M_S, VBICQ_M_U, VBICQ_M_F, VBICQ_M_N_S, VBICQ_M_N_U))
 FUNCTION_ONLY_N (vbrsrq, VBRSRQ)
 FUNCTION (vcaddq_rot90, unspec_mve_function_exact_insn_rot, (UNSPEC_VCADD90, 
UNSPEC_VCADD90, UNSPEC_VCADD90, VCADDQ_ROT90_M, VCADDQ_ROT90_M, 
VCADDQ_ROT90_M_F))
 FUNCTION (vcaddq_rot270, unspec_mv

[gcc r15-4436] arm: [MVE intrinsics] factorize vorn

2024-10-18 Thread Christophe Lyon via Gcc-cvs
https://gcc.gnu.org/g:9c69991f3e151dfe89bad82f0ea82bc4ca065322

commit r15-4436-g9c69991f3e151dfe89bad82f0ea82bc4ca065322
Author: Christophe Lyon 
Date:   Wed Jul 10 22:45:23 2024 +

arm: [MVE intrinsics] factorize vorn

Factorize vorn so that they use parameterized names.

2024-07-11  Christophe Lyon  

gcc/
* config/arm/iterators.md (MVE_INT_M_BINARY_LOGIC): Add VORNQ_M_S,
VORNQ_M_U.
(MVE_FP_M_BINARY_LOGIC): Add VORNQ_M_F.
(mve_insn): Add VORNQ_M_S, VORNQ_M_U, VORNQ_M_F.
* config/arm/mve.md (mve_vornq_s): Rename into ...
(@mve_vornq_s): ... this.
(mve_vornq_u): Rename into ...
(@mve_vornq_u): ... this.
(mve_vornq_f): Rename into ...
(@mve_vornq_f): ... this.
(mve_vornq_m_): Merge into vand/vbic pattern.
(mve_vornq_m_f): Likewise.

Diff:
---
 gcc/config/arm/iterators.md |  3 +++
 gcc/config/arm/mve.md   | 48 +++--
 2 files changed, 10 insertions(+), 41 deletions(-)

diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index 162c0d56bfb0..3a1825ebab2e 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -444,6 +444,7 @@
 VANDQ_M_S VANDQ_M_U
 VBICQ_M_S VBICQ_M_U
 VEORQ_M_S VEORQ_M_U
+VORNQ_M_S VORNQ_M_U
 VORRQ_M_S VORRQ_M_U
 ])
 
@@ -594,6 +595,7 @@
 VANDQ_M_F
 VBICQ_M_F
 VEORQ_M_F
+VORNQ_M_F
 VORRQ_M_F
 ])
 
@@ -1094,6 +1096,7 @@
 (VMVNQ_N_S "vmvn") (VMVNQ_N_U "vmvn")
 (VNEGQ_M_F "vneg")
 (VNEGQ_M_S "vneg")
+(VORNQ_M_S "vorn") (VORNQ_M_U "vorn") (VORNQ_M_F "vorn")
 (VORRQ_M_N_S "vorr") (VORRQ_M_N_U "vorr")
 (VORRQ_M_S "vorr") (VORRQ_M_U "vorr") (VORRQ_M_F "vorr")
 (VORRQ_N_S "vorr") (VORRQ_N_U "vorr")
diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
index 81ce8e447b48..7f92d693c1a5 100644
--- a/gcc/config/arm/mve.md
+++ b/gcc/config/arm/mve.md
@@ -994,9 +994,9 @@
 ])
 
 ;;
-;; [vornq_u, vornq_s])
+;; [vornq_u, vornq_s]
 ;;
-(define_insn "mve_vornq_s"
+(define_insn "@mve_vornq_s"
   [
(set (match_operand:MVE_2 0 "s_register_operand" "=w")
(ior:MVE_2 (not:MVE_2 (match_operand:MVE_2 2 "s_register_operand" "w"))
@@ -1008,7 +1008,7 @@
   (set_attr "type" "mve_move")
 ])
 
-(define_expand "mve_vornq_u"
+(define_expand "@mve_vornq_u"
   [
(set (match_operand:MVE_2 0 "s_register_operand")
(ior:MVE_2 (not:MVE_2 (match_operand:MVE_2 2 "s_register_operand"))
@@ -1402,9 +1402,9 @@
 ])
 
 ;;
-;; [vornq_f])
+;; [vornq_f]
 ;;
-(define_insn "mve_vornq_f"
+(define_insn "@mve_vornq_f"
   [
(set (match_operand:MVE_0 0 "s_register_operand" "=w")
(ior:MVE_0 (not:MVE_0 (match_operand:MVE_0 2 "s_register_operand" "w"))
@@ -2683,6 +2683,7 @@
 ;; [vandq_m_u, vandq_m_s]
 ;; [vbicq_m_u, vbicq_m_s]
 ;; [veorq_m_u, veorq_m_s]
+;; [vornq_m_u, vornq_m_s]
 ;; [vorrq_m_u, vorrq_m_s]
 ;;
 (define_insn "@mve_q_m_"
@@ -2809,24 +2810,6 @@
   (set_attr "type" "mve_move")
(set_attr "length""8")])
 
-;;
-;; [vornq_m_u, vornq_m_s])
-;;
-(define_insn "mve_vornq_m_"
-  [
-   (set (match_operand:MVE_2 0 "s_register_operand" "=w")
-   (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0")
-  (match_operand:MVE_2 2 "s_register_operand" "w")
-  (match_operand:MVE_2 3 "s_register_operand" "w")
-  (match_operand: 4 "vpr_register_operand" 
"Up")]
-VORNQ_M))
-  ]
-  "TARGET_HAVE_MVE"
-  "vpst\;vornt\t%q0, %q2, %q3"
- [(set (attr "mve_unpredicated_insn") (symbol_ref 
"CODE_FOR_mve_vornq_"))
-  (set_attr "type" "mve_move")
-   (set_attr "length""8")])
-
 ;;
 ;; [vqshlq_m_n_s, vqshlq_m_n_u]
 ;; [vshlq_m_n_s, vshlq_m_n_u]
@@ -3081,6 +3064,7 @@
 ;; [vandq_m_f]
 ;; [vbicq_m_f]
 ;; [veorq_m_f]
+;; [vornq_m_f]
 ;; [vorrq_m_f]
 ;;
 (define_insn "@mve_q_m_f"
@@ -3160,24 +3144,6 @@
   (set_attr "type" "mve_move")
(set_attr "length""8")])
 
-;;
-;; [vornq_m_f])
-;;
-(define_insn "mve_vornq_m_f"
-  [
-   (set (match_operand:MVE_0 0 "s_register_operand" "=w")
-   (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0")
-  (match_operand:MVE_0 2 "s_register_operand" "w")
-  (match_operand:MVE_0 3 "s_register_operand" "w")
-  (match_operand: 4 "vpr_register_operand" 
"Up")]
-VORNQ_M_F))
-  ]
-  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-  "vpst\;vornt\t%q0, %q2, %q3"
- [(set (attr "mve_unpredicated_insn") (symbol_ref 
"CODE_FOR_mve_vornq_f"))
-  (set_attr "type" "mve_move")
-   (set_attr "length""8")])
-
 ;;
 ;; [vstrbq_s vstrbq_u]
 ;;


[gcc r15-4437] arm: [MVE intrinsics] rework vorn

2024-10-18 Thread Christophe Lyon via Gcc-cvs
https://gcc.gnu.org/g:da92e77ed4937102880d41b9c994fcadcb98e2aa

commit r15-4437-gda92e77ed4937102880d41b9c994fcadcb98e2aa
Author: Christophe Lyon 
Date:   Wed Jul 10 22:38:52 2024 +

arm: [MVE intrinsics] rework vorn

Implement vorn using the new MVE builtins framework.

2024-07-11  Christophe Lyon  

gcc/
* config/arm/arm-mve-builtins-base.cc (vornq): New.
* config/arm/arm-mve-builtins-base.def (vornq): New.
* config/arm/arm-mve-builtins-base.h (vornq): New.
* config/arm/arm-mve-builtins-functions.h (class
unspec_based_mve_function_exact_insn_vorn): New.
* config/arm/arm_mve.h (vornq): Delete.
(vornq_m): Delete.
(vornq_x): Delete.
(vornq_u8): Delete.
(vornq_s8): Delete.
(vornq_u16): Delete.
(vornq_s16): Delete.
(vornq_u32): Delete.
(vornq_s32): Delete.
(vornq_f16): Delete.
(vornq_f32): Delete.
(vornq_m_s8): Delete.
(vornq_m_s32): Delete.
(vornq_m_s16): Delete.
(vornq_m_u8): Delete.
(vornq_m_u32): Delete.
(vornq_m_u16): Delete.
(vornq_m_f32): Delete.
(vornq_m_f16): Delete.
(vornq_x_s8): Delete.
(vornq_x_s16): Delete.
(vornq_x_s32): Delete.
(vornq_x_u8): Delete.
(vornq_x_u16): Delete.
(vornq_x_u32): Delete.
(vornq_x_f16): Delete.
(vornq_x_f32): Delete.
(__arm_vornq_u8): Delete.
(__arm_vornq_s8): Delete.
(__arm_vornq_u16): Delete.
(__arm_vornq_s16): Delete.
(__arm_vornq_u32): Delete.
(__arm_vornq_s32): Delete.
(__arm_vornq_m_s8): Delete.
(__arm_vornq_m_s32): Delete.
(__arm_vornq_m_s16): Delete.
(__arm_vornq_m_u8): Delete.
(__arm_vornq_m_u32): Delete.
(__arm_vornq_m_u16): Delete.
(__arm_vornq_x_s8): Delete.
(__arm_vornq_x_s16): Delete.
(__arm_vornq_x_s32): Delete.
(__arm_vornq_x_u8): Delete.
(__arm_vornq_x_u16): Delete.
(__arm_vornq_x_u32): Delete.
(__arm_vornq_f16): Delete.
(__arm_vornq_f32): Delete.
(__arm_vornq_m_f32): Delete.
(__arm_vornq_m_f16): Delete.
(__arm_vornq_x_f16): Delete.
(__arm_vornq_x_f32): Delete.
(__arm_vornq): Delete.
(__arm_vornq_m): Delete.
(__arm_vornq_x): Delete.

Diff:
---
 gcc/config/arm/arm-mve-builtins-base.cc |   1 +
 gcc/config/arm/arm-mve-builtins-base.def|   2 +
 gcc/config/arm/arm-mve-builtins-base.h  |   1 +
 gcc/config/arm/arm-mve-builtins-functions.h |  53 
 gcc/config/arm/arm_mve.h| 431 
 5 files changed, 57 insertions(+), 431 deletions(-)

diff --git a/gcc/config/arm/arm-mve-builtins-base.cc 
b/gcc/config/arm/arm-mve-builtins-base.cc
index 277596f39c91..de9f91e5c29a 100644
--- a/gcc/config/arm/arm-mve-builtins-base.cc
+++ b/gcc/config/arm/arm-mve-builtins-base.cc
@@ -621,6 +621,7 @@ FUNCTION_WITH_RTX_M_N (vmulq, MULT, VMULQ)
 FUNCTION_WITH_RTX_M_N_NO_F (vmvnq, NOT, VMVNQ)
 FUNCTION (vnegq, unspec_based_mve_function_exact_insn, (NEG, NEG, NEG, -1, -1, 
-1, VNEGQ_M_S, -1, VNEGQ_M_F, -1, -1, -1))
 FUNCTION_WITHOUT_M_N (vpselq, VPSELQ)
+FUNCTION (vornq, unspec_based_mve_function_exact_insn_vorn, (-1, -1, 
VORNQ_M_S, VORNQ_M_U, VORNQ_M_F, -1, -1))
 FUNCTION_WITH_RTX_M_N_NO_N_F (vorrq, IOR, VORRQ)
 FUNCTION_WITHOUT_N_NO_U_F (vqabsq, VQABSQ)
 FUNCTION_WITH_M_N_NO_F (vqaddq, VQADDQ)
diff --git a/gcc/config/arm/arm-mve-builtins-base.def 
b/gcc/config/arm/arm-mve-builtins-base.def
index aa7b71387f94..cc76db3e0b9c 100644
--- a/gcc/config/arm/arm-mve-builtins-base.def
+++ b/gcc/config/arm/arm-mve-builtins-base.def
@@ -87,6 +87,7 @@ DEF_MVE_FUNCTION (vmulltq_poly, binary_widen_poly, poly_8_16, 
mx_or_none)
 DEF_MVE_FUNCTION (vmulq, binary_opt_n, all_integer, mx_or_none)
 DEF_MVE_FUNCTION (vmvnq, mvn, all_integer, mx_or_none)
 DEF_MVE_FUNCTION (vnegq, unary, all_signed, mx_or_none)
+DEF_MVE_FUNCTION (vornq, binary_orrq, all_integer, mx_or_none)
 DEF_MVE_FUNCTION (vorrq, binary_orrq, all_integer, mx_or_none)
 DEF_MVE_FUNCTION (vpselq, vpsel, all_integer_with_64, none)
 DEF_MVE_FUNCTION (vqabsq, unary, all_signed, m_or_none)
@@ -206,6 +207,7 @@ DEF_MVE_FUNCTION (vminnmq, binary, all_float, mx_or_none)
 DEF_MVE_FUNCTION (vminnmvq, binary_maxvminv, all_float, p_or_none)
 DEF_MVE_FUNCTION (vmulq, binary_opt_n, all_float, mx_or_none)
 DEF_MVE_FUNCTION (vnegq, unary, all_float, mx_or_none)
+DEF_MVE_FUNCTION (vornq, binary_orrq, all_float, mx_or_none)
 DEF_MVE_FUNCTION (vorrq, binary_orrq, all_float, mx_or_none)
 DEF_MVE_FUNCTION (vpselq, vpsel, all_float, none)
 DEF_MVE_FUNCTION (vreinterpretq, u

[gcc r15-4439] arm: [MVE intrinsics] factorize vddup vidup

2024-10-18 Thread Christophe Lyon via Gcc-cvs
https://gcc.gnu.org/g:387b121467fb0c05493429f201d7951d6ac0540f

commit r15-4439-g387b121467fb0c05493429f201d7951d6ac0540f
Author: Christophe Lyon 
Date:   Mon Aug 19 14:48:47 2024 +

arm: [MVE intrinsics] factorize vddup vidup

Factorize vddup and vidup so that they use the same parameterized
names.

This patch updates only the (define_insn
"@mve_q_u_insn") patterns and does not bother with the
(define_expand "mve_vidupq_n_u") ones, because a subsequent
patch avoids using them.

2024-08-21  Christophe Lyon  

gcc/
* config/arm/iterators.md (mve_insn): Add VIDUPQ, VDDUPQ,
VIDUPQ_M, VDDUPQ_M.
(viddupq_op): New.
(viddupq_m_op): New.
(VIDDUPQ): New.
(VIDDUPQ_M): New.
* config/arm/mve.md (mve_vddupq_u_insn)
(mve_vidupq_u_insn): Merge into ...
(mve_q_u_insn): ... this.
(mve_vddupq_m_wb_u_insn, mve_vidupq_m_wb_u_insn):
Merge into ...
(mve_q_m_wb_u_insn): ... this.

Diff:
---
 gcc/config/arm/iterators.md |  7 ++
 gcc/config/arm/mve.md   | 58 ++---
 2 files changed, 20 insertions(+), 45 deletions(-)

diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index 3a1825ebab2e..c0299117f26e 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -1007,6 +1007,8 @@
 (VHSUBQ_M_S "vhsub") (VHSUBQ_M_U "vhsub")
 (VHSUBQ_N_S "vhsub") (VHSUBQ_N_U "vhsub")
 (VHSUBQ_S "vhsub") (VHSUBQ_U "vhsub")
+(VIDUPQ "vidup") (VDDUPQ "vddup")
+(VIDUPQ_M "vidup") (VDDUPQ_M "vddup")
 (VMAXAQ_M_S "vmaxa")
 (VMAXAQ_S "vmaxa")
 (VMAXAVQ_P_S "vmaxav")
@@ -1340,6 +1342,9 @@
 (VRNDXQ_F "vrintx") (VRNDXQ_M_F "vrintx")
 ])
 
+(define_int_attr viddupq_op [ (VIDUPQ "plus") (VDDUPQ "minus")])
+(define_int_attr viddupq_m_op [ (VIDUPQ_M "plus") (VDDUPQ_M "minus")])
+
 ;; plus and minus are the only SHIFTABLE_OPS for which Thumb2 allows
 ;; a stack pointer operand.  The minus operation is a candidate for an rsub
 ;; and hence only plus is supported.
@@ -2961,6 +2966,8 @@
 (define_int_iterator VCVTxQ_M_F32_F16 [VCVTBQ_M_F32_F16 VCVTTQ_M_F32_F16])
 (define_int_iterator VCVTxQ [VCVTAQ_S VCVTAQ_U VCVTMQ_S VCVTMQ_U VCVTNQ_S 
VCVTNQ_U VCVTPQ_S VCVTPQ_U])
 (define_int_iterator VCVTxQ_M [VCVTAQ_M_S VCVTAQ_M_U VCVTMQ_M_S VCVTMQ_M_U 
VCVTNQ_M_S VCVTNQ_M_U VCVTPQ_M_S VCVTPQ_M_U])
+(define_int_iterator VIDDUPQ [VIDUPQ VDDUPQ])
+(define_int_iterator VIDDUPQ_M [VIDUPQ_M VDDUPQ_M])
 (define_int_iterator DLSTP [DLSTP8 DLSTP16 DLSTP32
   DLSTP64])
 (define_int_iterator LETP [LETP8 LETP16 LETP32
diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
index cef55e8c9303..d6022276240e 100644
--- a/gcc/config/arm/mve.md
+++ b/gcc/config/arm/mve.md
@@ -5078,18 +5078,18 @@
 })
 
 ;;
-;; [vidupq_u_insn])
+;; [vddupq_u_insn, vidupq_u_insn]
 ;;
-(define_insn "mve_vidupq_u_insn"
+(define_insn "@mve_q_u_insn"
  [(set (match_operand:MVE_2 0 "s_register_operand" "=w")
(unspec:MVE_2 [(match_operand:SI 2 "s_register_operand" "1")
  (match_operand:SI 3 "mve_imm_selective_upto_8" "Rg")]
-VIDUPQ))
+   VIDDUPQ))
   (set (match_operand:SI 1 "s_register_operand" "=Te")
-   (plus:SI (match_dup 2)
-   (match_operand:SI 4 "immediate_operand" "i")))]
+   (:SI (match_dup 2)
+   (match_operand:SI 4 "immediate_operand" "i")))]
  "TARGET_HAVE_MVE"
- "vidup.u%#\t%q0, %1, %3")
+ ".u%#\t%q0, %1, %3")
 
 ;;
 ;; [vidupq_m_n_u])
@@ -5112,21 +5112,21 @@
 })
 
 ;;
-;; [vidupq_m_wb_u_insn])
+;; [vddupq_m_wb_u_insn, vidupq_m_wb_u_insn]
 ;;
-(define_insn "mve_vidupq_m_wb_u_insn"
+(define_insn "@mve_q_m_wb_u_insn"
  [(set (match_operand:MVE_2 0 "s_register_operand" "=w")
(unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0")
  (match_operand:SI 3 "s_register_operand" "2")
  (match_operand:SI 4 "mve_imm_selective_upto_8" "Rg")
  (match_operand: 5 "vpr_register_operand" "Up")]
-   VIDUPQ_M))
+   VIDDUPQ_M))
   (set (match_operand:SI 2 "s_register_operand" "=Te")
-   (plus:SI (match_dup 3)
-   (match_operand:SI 6 "immediate_operand" "i")))]
+   (:SI (match_dup 3)
+ (match_operand:SI 6 "immediate_operand" "i")))]
  "TARGET_HAVE_MVE"
- "vpst\;\tvidupt.u%#\t%q0, %2, %4"
- [(set (attr "mve_unpredicated_insn") (symbol_ref 
"CODE_FOR_mve_vidupq_u_insn"))
+ "vpst\;t.u%#\t%q0, %2, %4"
+ [(set (attr "mve_unpredicated_insn") (symbol_ref 
"CODE_FOR_mve_q_u_insn"))
   (set_attr "length""8")])
 
 ;;
@@ -5146,20 +5146,6 @@
   DONE;
 })
 
-;;
-;; [vddupq_u_insn])
-;;
-(define_insn "mve_vddupq_u_insn"
- [(set (match_operand:MVE_2 0 "s_register_ope

[gcc r15-4443] arm: [MVE intrinsics] remove v[id]dup expanders

2024-10-18 Thread Christophe Lyon via Gcc-cvs
https://gcc.gnu.org/g:f936ddb75336fb24ffbf249ab972959f4428155b

commit r15-4443-gf936ddb75336fb24ffbf249ab972959f4428155b
Author: Christophe Lyon 
Date:   Thu Aug 29 09:04:38 2024 +

arm: [MVE intrinsics] remove v[id]dup expanders

We use code_for_mve_q_u_insn, rather than the expanders used by the
previous implementation, so we can remove the expanders and their
declaration as builtins.

2024-08-21  Christophe Lyon  

gcc/
* config/arm/arm_mve_builtins.def (vddupq_n_u, vidupq_n_u)
(vddupq_m_n_u, vidupq_m_n_u): Delete.
* config/arm/mve.md (mve_vidupq_n_u, mve_vidupq_m_n_u)
(mve_vddupq_n_u, mve_vddupq_m_n_u): Delete.

Diff:
---
 gcc/config/arm/arm_mve_builtins.def |  4 --
 gcc/config/arm/mve.md   | 73 -
 2 files changed, 77 deletions(-)

diff --git a/gcc/config/arm/arm_mve_builtins.def 
b/gcc/config/arm/arm_mve_builtins.def
index dd99a90b9521..13070d958fcc 100644
--- a/gcc/config/arm/arm_mve_builtins.def
+++ b/gcc/config/arm/arm_mve_builtins.def
@@ -803,10 +803,6 @@ VAR3 (QUINOP_UNONE_UNONE_UNONE_UNONE_IMM_PRED, 
viwdupq_m_wb_u, v16qi, v8hi, v4si
 VAR3 (QUINOP_UNONE_UNONE_UNONE_UNONE_IMM_PRED, vdwdupq_m_wb_u, v16qi, v8hi, 
v4si)
 VAR3 (QUINOP_UNONE_UNONE_UNONE_UNONE_IMM_PRED, viwdupq_m_n_u, v16qi, v8hi, 
v4si)
 VAR3 (QUINOP_UNONE_UNONE_UNONE_UNONE_IMM_PRED, vdwdupq_m_n_u, v16qi, v8hi, 
v4si)
-VAR3 (BINOP_UNONE_UNONE_IMM, vddupq_n_u, v16qi, v8hi, v4si)
-VAR3 (BINOP_UNONE_UNONE_IMM, vidupq_n_u, v16qi, v8hi, v4si)
-VAR3 (QUADOP_UNONE_UNONE_UNONE_IMM_PRED, vddupq_m_n_u, v16qi, v8hi, v4si)
-VAR3 (QUADOP_UNONE_UNONE_UNONE_IMM_PRED, vidupq_m_n_u, v16qi, v8hi, v4si)
 VAR3 (TERNOP_UNONE_UNONE_UNONE_IMM, vdwdupq_n_u, v16qi, v4si, v8hi)
 VAR3 (TERNOP_UNONE_UNONE_UNONE_IMM, viwdupq_n_u, v16qi, v4si, v8hi)
 VAR1 (STRSBWBU, vstrwq_scatter_base_wb_u, v4si)
diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
index d6022276240e..ffb4a10fd226 100644
--- a/gcc/config/arm/mve.md
+++ b/gcc/config/arm/mve.md
@@ -5061,22 +5061,6 @@
   (set_attr "length" "4")])
 
 ;;
-;; [vidupq_n_u])
-;;
-(define_expand "mve_vidupq_n_u"
- [(match_operand:MVE_2 0 "s_register_operand")
-  (match_operand:SI 1 "s_register_operand")
-  (match_operand:SI 2 "mve_imm_selective_upto_8")]
- "TARGET_HAVE_MVE"
-{
-  rtx temp = gen_reg_rtx (SImode);
-  emit_move_insn (temp, operands[1]);
-  rtx inc = gen_int_mode (INTVAL(operands[2]) * , SImode);
-  emit_insn (gen_mve_vidupq_u_insn (operands[0], temp, operands[1],
- operands[2], inc));
-  DONE;
-})
-
 ;;
 ;; [vddupq_u_insn, vidupq_u_insn]
 ;;
@@ -5091,26 +5075,6 @@
  "TARGET_HAVE_MVE"
  ".u%#\t%q0, %1, %3")
 
-;;
-;; [vidupq_m_n_u])
-;;
-(define_expand "mve_vidupq_m_n_u"
-  [(match_operand:MVE_2 0 "s_register_operand")
-   (match_operand:MVE_2 1 "s_register_operand")
-   (match_operand:SI 2 "s_register_operand")
-   (match_operand:SI 3 "mve_imm_selective_upto_8")
-   (match_operand: 4 "vpr_register_operand")]
-  "TARGET_HAVE_MVE"
-{
-  rtx temp = gen_reg_rtx (SImode);
-  emit_move_insn (temp, operands[2]);
-  rtx inc = gen_int_mode (INTVAL(operands[3]) * , SImode);
-  emit_insn (gen_mve_vidupq_m_wb_u_insn(operands[0], operands[1], temp,
-operands[2], operands[3],
-operands[4], inc));
-  DONE;
-})
-
 ;;
 ;; [vddupq_m_wb_u_insn, vidupq_m_wb_u_insn]
 ;;
@@ -5129,43 +5093,6 @@
  [(set (attr "mve_unpredicated_insn") (symbol_ref 
"CODE_FOR_mve_q_u_insn"))
   (set_attr "length""8")])
 
-;;
-;; [vddupq_n_u])
-;;
-(define_expand "mve_vddupq_n_u"
- [(match_operand:MVE_2 0 "s_register_operand")
-  (match_operand:SI 1 "s_register_operand")
-  (match_operand:SI 2 "mve_imm_selective_upto_8")]
- "TARGET_HAVE_MVE"
-{
-  rtx temp = gen_reg_rtx (SImode);
-  emit_move_insn (temp, operands[1]);
-  rtx inc = gen_int_mode (INTVAL(operands[2]) * , SImode);
-  emit_insn (gen_mve_vddupq_u_insn (operands[0], temp, operands[1],
- operands[2], inc));
-  DONE;
-})
-
-;;
-;; [vddupq_m_n_u])
-;;
-(define_expand "mve_vddupq_m_n_u"
-  [(match_operand:MVE_2 0 "s_register_operand")
-   (match_operand:MVE_2 1 "s_register_operand")
-   (match_operand:SI 2 "s_register_operand")
-   (match_operand:SI 3 "mve_imm_selective_upto_8")
-   (match_operand: 4 "vpr_register_operand")]
-  "TARGET_HAVE_MVE"
-{
-  rtx temp = gen_reg_rtx (SImode);
-  emit_move_insn (temp, operands[2]);
-  rtx inc = gen_int_mode (INTVAL(operands[3]) * , SImode);
-  emit_insn (gen_mve_vddupq_m_wb_u_insn(operands[0], operands[1], temp,
-operands[2], operands[3],
-operands[4], inc));
-  DONE;
-})
-
 ;;
 ;; [vdwdupq_n_u])
 ;;


[gcc r15-4444] arm: [MVE intrinsics] fix checks of immediate arguments

2024-10-18 Thread Christophe Lyon via Gcc-cvs
https://gcc.gnu.org/g:2fd08f37d5350231f0ce03e8f887d83234b47eaf

commit r15--g2fd08f37d5350231f0ce03e8f887d83234b47eaf
Author: Christophe Lyon 
Date:   Wed Aug 21 13:13:23 2024 +

arm: [MVE intrinsics] fix checks of immediate arguments

As discussed in [1], it is better to use "su64" for immediates in
intrinsics signatures in order to provide better diagnostics
(erroneous constants are not truncated for instance).  This patch thus
uses su64 instead of ss32 in binary_lshift_unsigned,
binary_rshift_narrow, binary_rshift_narrow_unsigned, ternary_lshift,
ternary_rshift.

In addition, we fix cases where we called require_integer_immediate
whereas we just want to check that the argument is a scalar, and thus
use require_scalar_type in binary_acca_int32, binary_acca_int64,
unary_int32_acc.

Finally, in binary_lshift_unsigned we just want to check that 'imm' is
an immediate, not the optional predicates.

[1] https://gcc.gnu.org/pipermail/gcc-patches/2024-August/660262.html

2024-08-21  Christophe Lyon  

gcc/
* config/arm/arm-mve-builtins-shapes.cc (binary_acca_int32): Fix
check of scalar argument.
(binary_acca_int64): Likewise.
(binary_lshift_unsigned): Likewise.
(binary_rshift_narrow): Likewise.
(binary_rshift_narrow_unsigned): Likewise.
(ternary_lshift): Likewise.
(ternary_rshift): Likewise.
(unary_int32_acc): Likewise.

Diff:
---
 gcc/config/arm/arm-mve-builtins-shapes.cc | 47 ---
 1 file changed, 31 insertions(+), 16 deletions(-)

diff --git a/gcc/config/arm/arm-mve-builtins-shapes.cc 
b/gcc/config/arm/arm-mve-builtins-shapes.cc
index 971e86a2727b..a1d2e2431287 100644
--- a/gcc/config/arm/arm-mve-builtins-shapes.cc
+++ b/gcc/config/arm/arm-mve-builtins-shapes.cc
@@ -477,18 +477,23 @@ struct binary_acca_int32_def : public overloaded_base<0>
   {
 unsigned int i, nargs;
 type_suffix_index type;
+const char *first_type_name;
+
 if (!r.check_gp_argument (3, i, nargs)
|| (type = r.infer_vector_type (1)) == NUM_TYPE_SUFFIXES)
   return error_mark_node;
 
+first_type_name = (type_suffixes[type].unsigned_p
+  ? "uint32_t"
+  : "int32_t");
+if (!r.require_scalar_type (0, first_type_name))
+  return error_mark_node;
+
 unsigned int last_arg = i + 1;
 for (i = 1; i < last_arg; i++)
   if (!r.require_matching_vector_type (i, type))
return error_mark_node;
 
-if (!r.require_integer_immediate (0))
-  return error_mark_node;
-
 return r.resolve_to (r.mode_suffix_id, type);
   }
 };
@@ -514,18 +519,24 @@ struct binary_acca_int64_def : public overloaded_base<0>
   {
 unsigned int i, nargs;
 type_suffix_index type;
+const char *first_type_name;
+
 if (!r.check_gp_argument (3, i, nargs)
|| (type = r.infer_vector_type (1)) == NUM_TYPE_SUFFIXES)
   return error_mark_node;
 
+
+first_type_name = (type_suffixes[type].unsigned_p
+  ? "uint64_t"
+  : "int64_t");
+if (!r.require_scalar_type (0, first_type_name))
+  return error_mark_node;
+
 unsigned int last_arg = i + 1;
 for (i = 1; i < last_arg; i++)
   if (!r.require_matching_vector_type (i, type))
return error_mark_node;
 
-if (!r.require_integer_immediate (0))
-  return error_mark_node;
-
 return r.resolve_to (r.mode_suffix_id, type);
   }
 };
@@ -613,7 +624,7 @@ struct binary_lshift_unsigned_def : public 
overloaded_base<0>
 bool preserve_user_namespace) const override
   {
 b.add_overloaded_functions (group, MODE_n, preserve_user_namespace);
-build_all (b, "vu0,vs0,ss32", group, MODE_n, preserve_user_namespace);
+build_all (b, "vu0,vs0,su64", group, MODE_n, preserve_user_namespace);
   }
 
   tree
@@ -622,6 +633,7 @@ struct binary_lshift_unsigned_def : public 
overloaded_base<0>
 unsigned int i, nargs;
 type_suffix_index type;
 if (!r.check_gp_argument (2, i, nargs)
+   || !r.require_integer_immediate (i)
|| (type = r.infer_vector_type (i-1)) == NUM_TYPE_SUFFIXES)
   return error_mark_node;
 
@@ -636,10 +648,6 @@ struct binary_lshift_unsigned_def : public 
overloaded_base<0>
  return error_mark_node;
   }
 
-for (; i < nargs; ++i)
-  if (!r.require_integer_immediate (i))
-   return error_mark_node;
-
 return r.resolve_to (r.mode_suffix_id, type);
   }
 
@@ -1097,7 +1105,7 @@ struct binary_rshift_narrow_def : public 
overloaded_base<0>
 bool preserve_user_namespace) const override
   {
 b.add_overloaded_functions (group, MODE_n, preserve_user_namespace);
-build_all (b, "vh0,vh0,v0,ss32", group, MODE_n, preserve_user_namespace);
+build_all (b, "vh0,vh0,v0,su64", group, MODE_n, preserve_user_namespace);
   }
 
   tree
@@ -1144,7 +1

[gcc r15-4438] arm: [MVE intrinsics] rework vctp

2024-10-18 Thread Christophe Lyon via Gcc-cvs
https://gcc.gnu.org/g:e4366770dc32c0dd5c2cd3563e37d5d63c9e1eef

commit r15-4438-ge4366770dc32c0dd5c2cd3563e37d5d63c9e1eef
Author: Christophe Lyon 
Date:   Thu Aug 1 16:08:58 2024 +

arm: [MVE intrinsics] rework vctp

Implement vctp using the new MVE builtins framework.

2024-08-21  Christophe Lyon  

gcc/ChangeLog:

* config/arm/arm-mve-builtins-base.cc (class vctpq_impl): New.
(vctp16q): New.
(vctp32q): New.
(vctp64q): New.
(vctp8q): New.
* config/arm/arm-mve-builtins-base.def (vctp16q): New.
(vctp32q): New.
(vctp64q): New.
(vctp8q): New.
* config/arm/arm-mve-builtins-base.h (vctp16q): New.
(vctp32q): New.
(vctp64q): New.
(vctp8q): New.
* config/arm/arm-mve-builtins-shapes.cc (vctp): New.
* config/arm/arm-mve-builtins-shapes.h (vctp): New.
* config/arm/arm-mve-builtins.cc
(function_instance::has_inactive_argument): Add support for vctp.
* config/arm/arm_mve.h (vctp16q): Delete.
(vctp32q): Delete.
(vctp64q): Delete.
(vctp8q): Delete.
(vctp8q_m): Delete.
(vctp64q_m): Delete.
(vctp32q_m): Delete.
(vctp16q_m): Delete.
(__arm_vctp16q): Delete.
(__arm_vctp32q): Delete.
(__arm_vctp64q): Delete.
(__arm_vctp8q): Delete.
(__arm_vctp8q_m): Delete.
(__arm_vctp64q_m): Delete.
(__arm_vctp32q_m): Delete.
(__arm_vctp16q_m): Delete.
* config/arm/mve.md (mve_vctpq): Add '@'
prefix.
(mve_vctpq_m): Likewise.

Diff:
---
 gcc/config/arm/arm-mve-builtins-base.cc   | 48 +++
 gcc/config/arm/arm-mve-builtins-base.def  |  4 ++
 gcc/config/arm/arm-mve-builtins-base.h|  4 ++
 gcc/config/arm/arm-mve-builtins-shapes.cc | 16 
 gcc/config/arm/arm-mve-builtins-shapes.h  |  1 +
 gcc/config/arm/arm-mve-builtins.cc|  4 ++
 gcc/config/arm/arm_mve.h  | 64 ---
 gcc/config/arm/mve.md |  4 +-
 8 files changed, 79 insertions(+), 66 deletions(-)

diff --git a/gcc/config/arm/arm-mve-builtins-base.cc 
b/gcc/config/arm/arm-mve-builtins-base.cc
index de9f91e5c29a..778263abf9a0 100644
--- a/gcc/config/arm/arm-mve-builtins-base.cc
+++ b/gcc/config/arm/arm-mve-builtins-base.cc
@@ -192,6 +192,50 @@ public:
   }
 };
 
+  /* Implements vctp8q, vctp16q, vctp32q and vctp64q intrinsics.  */
+class vctpq_impl : public function_base
+{
+public:
+  CONSTEXPR vctpq_impl (machine_mode mode)
+: m_mode (mode)
+  {}
+
+  /* Mode this intrinsic operates on.  */
+  machine_mode m_mode;
+
+  rtx
+  expand (function_expander &e) const override
+  {
+insn_code code;
+rtx target;
+
+if (e.mode_suffix_id != MODE_none)
+  gcc_unreachable ();
+
+switch (e.pred)
+  {
+  case PRED_none:
+   /* No predicate, no suffix.  */
+   code = code_for_mve_vctpq (m_mode, m_mode);
+   target = e.use_exact_insn (code);
+   break;
+
+  case PRED_m:
+   /* No suffix, "m" predicate.  */
+   code = code_for_mve_vctpq_m (m_mode, m_mode);
+   target = e.use_cond_insn (code, 0);
+   break;
+
+  default:
+   gcc_unreachable ();
+  }
+
+rtx HItarget = gen_reg_rtx (HImode);
+emit_move_insn (HItarget, gen_lowpart (HImode, target));
+return HItarget;
+  }
+};
+
   /* Implements vcvtq intrinsics.  */
 class vcvtq_impl : public function_base
 {
@@ -559,6 +603,10 @@ FUNCTION (vcmpltq, 
unspec_based_mve_function_exact_insn_vcmp, (LT, UNKNOWN, LT,
 FUNCTION (vcmpcsq, unspec_based_mve_function_exact_insn_vcmp, (UNKNOWN, GEU, 
UNKNOWN, UNKNOWN, VCMPCSQ_M_U, UNKNOWN, UNKNOWN, VCMPCSQ_M_N_U, UNKNOWN))
 FUNCTION (vcmphiq, unspec_based_mve_function_exact_insn_vcmp, (UNKNOWN, GTU, 
UNKNOWN, UNKNOWN, VCMPHIQ_M_U, UNKNOWN, UNKNOWN, VCMPHIQ_M_N_U, UNKNOWN))
 FUNCTION_WITHOUT_M_N (vcreateq, VCREATEQ)
+FUNCTION (vctp8q, vctpq_impl, (V16BImode))
+FUNCTION (vctp16q, vctpq_impl, (V8BImode))
+FUNCTION (vctp32q, vctpq_impl, (V4BImode))
+FUNCTION (vctp64q, vctpq_impl, (V2QImode))
 FUNCTION_WITHOUT_N_NO_F (vcvtaq, VCVTAQ)
 FUNCTION (vcvtbq, vcvtxq_impl, (VCVTBQ_F16_F32, VCVTBQ_M_F16_F32, 
VCVTBQ_F32_F16, VCVTBQ_M_F32_F16))
 FUNCTION (vcvtq, vcvtq_impl,)
diff --git a/gcc/config/arm/arm-mve-builtins-base.def 
b/gcc/config/arm/arm-mve-builtins-base.def
index cc76db3e0b9c..dd46d8828828 100644
--- a/gcc/config/arm/arm-mve-builtins-base.def
+++ b/gcc/config/arm/arm-mve-builtins-base.def
@@ -42,6 +42,10 @@ DEF_MVE_FUNCTION (vcmpleq, cmp, all_signed, m_or_none)
 DEF_MVE_FUNCTION (vcmpltq, cmp, all_signed, m_or_none)
 DEF_MVE_FUNCTION (vcmpneq, cmp, all_integer, m_or_none)
 DEF_MVE_FUNCTION (vcreateq, create, all_integer_with_64, none)
+DEF_MVE_FUNCTION (vctp16q, vctp, none, m_or_none)

[gcc r15-4442] arm: [MVE intrinsics] update v[id]dup tests

2024-10-18 Thread Christophe Lyon via Gcc-cvs
https://gcc.gnu.org/g:faaf83b9bc2bbbe0dff830c1fc733ed9f248db42

commit r15-4442-gfaaf83b9bc2bbbe0dff830c1fc733ed9f248db42
Author: Christophe Lyon 
Date:   Fri Aug 30 09:29:46 2024 +

arm: [MVE intrinsics] update v[id]dup tests

Testing v[id]dup overloads with '1' as argument for uint32_t* does not
make sense: instead of choosing the '_wb' overload, we choose the
'_n', but we already do that in the '_n' tests.

This patch removes all such bogus foo2 functions.

2024-08-28  Christophe Lyon  

gcc/testsuite/
* gcc.target/arm/mve/intrinsics/vddupq_m_wb_u16.c: Remove foo2.
* gcc.target/arm/mve/intrinsics/vddupq_m_wb_u32.c: Remove foo2.
* gcc.target/arm/mve/intrinsics/vddupq_m_wb_u8.c: Remove foo2.
* gcc.target/arm/mve/intrinsics/vddupq_wb_u16.c: Remove foo2.
* gcc.target/arm/mve/intrinsics/vddupq_wb_u32.c: Remove foo2.
* gcc.target/arm/mve/intrinsics/vddupq_wb_u8.c: Remove foo2.
* gcc.target/arm/mve/intrinsics/vddupq_x_wb_u16.c: Remove foo2.
* gcc.target/arm/mve/intrinsics/vddupq_x_wb_u32.c: Remove foo2.
* gcc.target/arm/mve/intrinsics/vddupq_x_wb_u8.c: Remove foo2.
* gcc.target/arm/mve/intrinsics/vidupq_m_wb_u16.c: Remove foo2.
* gcc.target/arm/mve/intrinsics/vidupq_m_wb_u32.c: Remove foo2.
* gcc.target/arm/mve/intrinsics/vidupq_m_wb_u8.c: Remove foo2.
* gcc.target/arm/mve/intrinsics/vidupq_wb_u16.c: Remove foo2.
* gcc.target/arm/mve/intrinsics/vidupq_wb_u32.c: Remove foo2.
* gcc.target/arm/mve/intrinsics/vidupq_wb_u8.c: Remove foo2.
* gcc.target/arm/mve/intrinsics/vidupq_x_wb_u16.c: Remove foo2.
* gcc.target/arm/mve/intrinsics/vidupq_x_wb_u32.c: Remove foo2.
* gcc.target/arm/mve/intrinsics/vidupq_x_wb_u8.c: Remove foo2.

Diff:
---
 .../gcc.target/arm/mve/intrinsics/vddupq_m_wb_u16.c| 18 +-
 .../gcc.target/arm/mve/intrinsics/vddupq_m_wb_u32.c| 18 +-
 .../gcc.target/arm/mve/intrinsics/vddupq_m_wb_u8.c | 18 +-
 .../gcc.target/arm/mve/intrinsics/vddupq_wb_u16.c  | 14 +-
 .../gcc.target/arm/mve/intrinsics/vddupq_wb_u32.c  | 14 +-
 .../gcc.target/arm/mve/intrinsics/vddupq_wb_u8.c   | 14 +-
 .../gcc.target/arm/mve/intrinsics/vddupq_x_wb_u16.c| 18 +-
 .../gcc.target/arm/mve/intrinsics/vddupq_x_wb_u32.c| 18 +-
 .../gcc.target/arm/mve/intrinsics/vddupq_x_wb_u8.c | 18 +-
 .../gcc.target/arm/mve/intrinsics/vidupq_m_wb_u16.c| 18 +-
 .../gcc.target/arm/mve/intrinsics/vidupq_m_wb_u32.c| 18 +-
 .../gcc.target/arm/mve/intrinsics/vidupq_m_wb_u8.c | 18 +-
 .../gcc.target/arm/mve/intrinsics/vidupq_wb_u16.c  | 14 +-
 .../gcc.target/arm/mve/intrinsics/vidupq_wb_u32.c  | 14 +-
 .../gcc.target/arm/mve/intrinsics/vidupq_wb_u8.c   | 14 +-
 .../gcc.target/arm/mve/intrinsics/vidupq_x_wb_u16.c| 18 +-
 .../gcc.target/arm/mve/intrinsics/vidupq_x_wb_u32.c| 18 +-
 .../gcc.target/arm/mve/intrinsics/vidupq_x_wb_u8.c | 18 +-
 18 files changed, 18 insertions(+), 282 deletions(-)

diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vddupq_m_wb_u16.c 
b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vddupq_m_wb_u16.c
index 2a907417b401..d4391358fc25 100644
--- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vddupq_m_wb_u16.c
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vddupq_m_wb_u16.c
@@ -42,24 +42,8 @@ foo1 (uint16x8_t inactive, uint32_t *a, mve_pred16_t p)
   return vddupq_m (inactive, a, 1, p);
 }
 
-/*
-**foo2:
-** ...
-** vmsrp0, (?:ip|fp|r[0-9]+)(?:@.*|)
-** ...
-** vpst(?: @.*|)
-** ...
-** vddupt.u16  q[0-9]+, (?:ip|fp|r[0-9]+), #[0-9]+(?:  @.*|)
-** ...
-*/
-uint16x8_t
-foo2 (uint16x8_t inactive, mve_pred16_t p)
-{
-  return vddupq_m (inactive, 1, 1, p);
-}
-
 #ifdef __cplusplus
 }
 #endif
 
-/* { dg-final { scan-assembler-not "__ARM_undef" } } */
\ No newline at end of file
+/* { dg-final { scan-assembler-not "__ARM_undef" } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vddupq_m_wb_u32.c 
b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vddupq_m_wb_u32.c
index ffaf37349235..58609dae29fb 100644
--- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vddupq_m_wb_u32.c
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vddupq_m_wb_u32.c
@@ -42,24 +42,8 @@ foo1 (uint32x4_t inactive, uint32_t *a, mve_pred16_t p)
   return vddupq_m (inactive, a, 1, p);
 }
 
-/*
-**foo2:
-** ...
-** vmsrp0, (?:ip|fp|r[0-9]+)(?:@.*|)
-** ...
-** vpst(?: @.*|)
-** ...
-** vddupt.u32  q[0-9]+, (?:ip|fp|r[0-9]+), #[0-9]+(?:  @.*|)
-** ...
-*/
-uint32x4_t
-foo2 (uint32x4

[gcc r15-4455] arm: [MVE intrinsics] rework vadciq

2024-10-18 Thread Christophe Lyon via Gcc-cvs
https://gcc.gnu.org/g:cb21ceae313825d01e72726e18ab213e34c6a7b2

commit r15-4455-gcb21ceae313825d01e72726e18ab213e34c6a7b2
Author: Christophe Lyon 
Date:   Wed Aug 28 21:26:58 2024 +

arm: [MVE intrinsics] rework vadciq

Implement vadciq using the new MVE builtins framework.

2024-08-28  Christophe Lyon  
gcc/

* config/arm/arm-mve-builtins-base.cc (class vadc_vsbc_impl): New.
(vadciq): New.
* config/arm/arm-mve-builtins-base.def (vadciq): New.
* config/arm/arm-mve-builtins-base.h (vadciq): New.
* config/arm/arm_mve.h (vadciq): Delete.
(vadciq_m): Delete.
(vadciq_s32): Delete.
(vadciq_u32): Delete.
(vadciq_m_s32): Delete.
(vadciq_m_u32): Delete.
(__arm_vadciq_s32): Delete.
(__arm_vadciq_u32): Delete.
(__arm_vadciq_m_s32): Delete.
(__arm_vadciq_m_u32): Delete.
(__arm_vadciq): Delete.
(__arm_vadciq_m): Delete.

Diff:
---
 gcc/config/arm/arm-mve-builtins-base.cc  | 93 
 gcc/config/arm/arm-mve-builtins-base.def |  1 +
 gcc/config/arm/arm-mve-builtins-base.h   |  1 +
 gcc/config/arm/arm_mve.h | 89 --
 4 files changed, 95 insertions(+), 89 deletions(-)

diff --git a/gcc/config/arm/arm-mve-builtins-base.cc 
b/gcc/config/arm/arm-mve-builtins-base.cc
index 1b7d97fbe072..f04bf027b417 100644
--- a/gcc/config/arm/arm-mve-builtins-base.cc
+++ b/gcc/config/arm/arm-mve-builtins-base.cc
@@ -607,6 +607,98 @@ public:
   }
 };
 
+/* Map the vadc and similar functions directly to CODE (UNSPEC, UNSPEC).  Take
+   care of the implicit carry argument.  */
+class vadc_vsbc_impl : public function_base
+{
+public:
+  unsigned int
+  call_properties (const function_instance &) const override
+  {
+unsigned int flags = CP_WRITE_MEMORY | CP_READ_FPCR;
+return flags;
+  }
+
+  tree
+  memory_scalar_type (const function_instance &) const override
+  {
+/* carry is "unsigned int".  */
+return get_typenode_from_name ("unsigned int");
+  }
+
+  rtx
+  expand (function_expander &e) const override
+  {
+insn_code code;
+rtx insns, carry_ptr, carry_out;
+int carry_out_arg_no;
+int unspec;
+
+if (! e.type_suffix (0).integer_p)
+  gcc_unreachable ();
+
+if (e.mode_suffix_id != MODE_none)
+  gcc_unreachable ();
+
+/* Remove carry from arguments, it is implicit for the builtin.  */
+switch (e.pred)
+  {
+  case PRED_none:
+   carry_out_arg_no = 2;
+   break;
+
+  case PRED_m:
+   carry_out_arg_no = 3;
+   break;
+
+  default:
+   gcc_unreachable ();
+  }
+
+carry_ptr = e.args[carry_out_arg_no];
+e.args.ordered_remove (carry_out_arg_no);
+
+switch (e.pred)
+  {
+  case PRED_none:
+   /* No predicate.  */
+   unspec = e.type_suffix (0).unsigned_p
+ ? VADCIQ_U
+ : VADCIQ_S;
+   code = code_for_mve_q_v4si (unspec, unspec);
+   insns = e.use_exact_insn (code);
+   break;
+
+  case PRED_m:
+   /* "m" predicate.  */
+   unspec = e.type_suffix (0).unsigned_p
+ ? VADCIQ_M_U
+ : VADCIQ_M_S;
+   code = code_for_mve_q_m_v4si (unspec, unspec);
+   insns = e.use_cond_insn (code, 0);
+   break;
+
+  default:
+   gcc_unreachable ();
+  }
+
+/* Update carry_out.  */
+carry_out = gen_reg_rtx (SImode);
+emit_insn (gen_get_fpscr_nzcvqc (carry_out));
+emit_insn (gen_rtx_SET (carry_out,
+   gen_rtx_LSHIFTRT (SImode,
+ carry_out,
+ GEN_INT (29;
+emit_insn (gen_rtx_SET (carry_out,
+   gen_rtx_AND (SImode,
+carry_out,
+GEN_INT (1;
+emit_insn (gen_rtx_SET (gen_rtx_MEM (Pmode, carry_ptr), carry_out));
+
+return insns;
+  }
+};
+
 } /* end anonymous namespace */
 
 namespace arm_mve {
@@ -777,6 +869,7 @@ namespace arm_mve {
 FUNCTION_PRED_P_S_U (vabavq, VABAVQ)
 FUNCTION_WITHOUT_N (vabdq, VABDQ)
 FUNCTION (vabsq, unspec_based_mve_function_exact_insn, (ABS, ABS, ABS, -1, -1, 
-1, VABSQ_M_S, -1, VABSQ_M_F, -1, -1, -1))
+FUNCTION (vadciq, vadc_vsbc_impl,)
 FUNCTION_WITH_RTX_M_N (vaddq, PLUS, VADDQ)
 FUNCTION_PRED_P_S_U (vaddlvaq, VADDLVAQ)
 FUNCTION_PRED_P_S_U (vaddlvq, VADDLVQ)
diff --git a/gcc/config/arm/arm-mve-builtins-base.def 
b/gcc/config/arm/arm-mve-builtins-base.def
index bd69f06d7e41..72d6461c4e4e 100644
--- a/gcc/config/arm/arm-mve-builtins-base.def
+++ b/gcc/config/arm/arm-mve-builtins-base.def
@@ -21,6 +21,7 @@
 DEF_MVE_FUNCTION (vabavq, binary_acca_int32, all_integer, p_or_none)
 DEF_MVE_FUNCTION (vabdq, binary, all_integer, mx_or_none)
 DEF_MVE_FUNCTION (vabsq, unary, all_signed, mx_or_none)
+DEF_MVE_FUNCTION (vad

[gcc r15-4457] arm: [MVE intrinsics] rework vsbcq vsbciq

2024-10-18 Thread Christophe Lyon via Gcc-cvs
https://gcc.gnu.org/g:a5efcfcc93a911c7a04626e64ee65f02d5ecce6f

commit r15-4457-ga5efcfcc93a911c7a04626e64ee65f02d5ecce6f
Author: Christophe Lyon 
Date:   Mon Sep 2 14:10:25 2024 +

arm: [MVE intrinsics] rework vsbcq vsbciq

Implement vsbcq vsbciq using the new MVE builtins framework.

We re-use most of the code introduced by the previous patches.

2024-08-28  Christophe Lyon  

gcc/

* config/arm/arm-mve-builtins-base.cc (class vadc_vsbc_impl): Add
support for vsbciq and vsbcq.
(vadciq, vadcq): Add new parameter.
(vsbciq): New.
(vsbcq): New.
* config/arm/arm-mve-builtins-base.def (vsbciq): New.
(vsbcq): New.
* config/arm/arm-mve-builtins-base.h (vsbciq): New.
(vsbcq): New.
* config/arm/arm_mve.h (vsbciq): Delete.
(vsbciq_m): Delete.
(vsbcq): Delete.
(vsbcq_m): Delete.
(vsbciq_s32): Delete.
(vsbciq_u32): Delete.
(vsbciq_m_s32): Delete.
(vsbciq_m_u32): Delete.
(vsbcq_s32): Delete.
(vsbcq_u32): Delete.
(vsbcq_m_s32): Delete.
(vsbcq_m_u32): Delete.
(__arm_vsbciq_s32): Delete.
(__arm_vsbciq_u32): Delete.
(__arm_vsbciq_m_s32): Delete.
(__arm_vsbciq_m_u32): Delete.
(__arm_vsbcq_s32): Delete.
(__arm_vsbcq_u32): Delete.
(__arm_vsbcq_m_s32): Delete.
(__arm_vsbcq_m_u32): Delete.
(__arm_vsbciq): Delete.
(__arm_vsbciq_m): Delete.
(__arm_vsbcq): Delete.
(__arm_vsbcq_m): Delete.

Diff:
---
 gcc/config/arm/arm-mve-builtins-base.cc  |  56 ++
 gcc/config/arm/arm-mve-builtins-base.def |   2 +
 gcc/config/arm/arm-mve-builtins-base.h   |   2 +
 gcc/config/arm/arm_mve.h | 170 ---
 4 files changed, 42 insertions(+), 188 deletions(-)

diff --git a/gcc/config/arm/arm-mve-builtins-base.cc 
b/gcc/config/arm/arm-mve-builtins-base.cc
index e8f703e4e7ad..cadd41371b4e 100644
--- a/gcc/config/arm/arm-mve-builtins-base.cc
+++ b/gcc/config/arm/arm-mve-builtins-base.cc
@@ -612,12 +612,14 @@ public:
 class vadc_vsbc_impl : public function_base
 {
 public:
-  CONSTEXPR vadc_vsbc_impl (bool init_carry)
-: m_init_carry (init_carry)
+  CONSTEXPR vadc_vsbc_impl (bool init_carry, bool add)
+: m_init_carry (init_carry), m_add (add)
   {}
 
   /* Initialize carry with 0 (vadci).  */
   bool m_init_carry;
+  /* Add (true) or Sub (false).  */
+  bool m_add;
 
   unsigned int
   call_properties (const function_instance &) const override
@@ -700,26 +702,42 @@ public:
   {
   case PRED_none:
/* No predicate.  */
-   unspec = m_init_carry
- ? (e.type_suffix (0).unsigned_p
-? VADCIQ_U
-: VADCIQ_S)
- : (e.type_suffix (0).unsigned_p
-? VADCQ_U
-: VADCQ_S);
+   unspec = m_add
+ ? (m_init_carry
+? (e.type_suffix (0).unsigned_p
+   ? VADCIQ_U
+   : VADCIQ_S)
+: (e.type_suffix (0).unsigned_p
+   ? VADCQ_U
+   : VADCQ_S))
+ : (m_init_carry
+? (e.type_suffix (0).unsigned_p
+   ? VSBCIQ_U
+   : VSBCIQ_S)
+: (e.type_suffix (0).unsigned_p
+   ? VSBCQ_U
+   : VSBCQ_S));
code = code_for_mve_q_v4si (unspec, unspec);
insns = e.use_exact_insn (code);
break;
 
   case PRED_m:
/* "m" predicate.  */
-   unspec = m_init_carry
- ? (e.type_suffix (0).unsigned_p
-? VADCIQ_M_U
-: VADCIQ_M_S)
- : (e.type_suffix (0).unsigned_p
-? VADCQ_M_U
-: VADCQ_M_S);
+   unspec = m_add
+ ? (m_init_carry
+? (e.type_suffix (0).unsigned_p
+   ? VADCIQ_M_U
+   : VADCIQ_M_S)
+: (e.type_suffix (0).unsigned_p
+   ? VADCQ_M_U
+   : VADCQ_M_S))
+ : (m_init_carry
+? (e.type_suffix (0).unsigned_p
+   ? VSBCIQ_M_U
+   : VSBCIQ_M_S)
+: (e.type_suffix (0).unsigned_p
+   ? VSBCQ_M_U
+   : VSBCQ_M_S));
code = code_for_mve_q_m_v4si (unspec, unspec);
insns = e.use_cond_insn (code, 0);
break;
@@ -915,8 +933,8 @@ namespace arm_mve {
 FUNCTION_PRED_P_S_U (vabavq, VABAVQ)
 FUNCTION_WITHOUT_N (vabdq, VABDQ)
 FUNCTION (vabsq, unspec_based_mve_function_exact_insn, (ABS, ABS, ABS, -1, -1, 
-1, VABSQ_M_S, -1, VABSQ_M_F, -1, -1, -1))
-FUNCTION (vadciq, vadc_vsbc_impl, (true))
-FUNCTION (vadcq, vadc_vsbc_impl, (false))
+FUNCTION (vadciq, vadc_vsbc_impl, (true, true))
+FUNCTION (vadcq, vadc_vsbc_impl, (false, true))
 FUNCTION_WITH_RTX_M_N (vaddq, PLUS, VADDQ)
 FUNCTION_PRED_P_S_U (vaddlvaq, VADD

[gcc r15-4458] arm: [MVE intrinsics] use long_type_suffix / half_type_suffix helpers

2024-10-18 Thread Christophe Lyon via Gcc-cvs
https://gcc.gnu.org/g:8e74cbc3a834ee009a5e60e76b20e8df7114da31

commit r15-4458-g8e74cbc3a834ee009a5e60e76b20e8df7114da31
Author: Christophe Lyon 
Date:   Fri Aug 30 13:52:23 2024 +

arm: [MVE intrinsics] use long_type_suffix / half_type_suffix helpers

In several places we are looking for a type twice or half as large as
the type suffix: this patch introduces helper functions to avoid code
duplication. long_type_suffix is similar to the SVE counterpart, but
adds an 'expected_tclass' parameter.  half_type_suffix is similar to
it, but does not exist in SVE.

2024-08-28  Christophe Lyon  

gcc/

* config/arm/arm-mve-builtins-shapes.cc (long_type_suffix): New.
(half_type_suffix): New.
(struct binary_move_narrow_def): Use new helper.
(struct binary_move_narrow_unsigned_def): Likewise.
(struct binary_rshift_narrow_def): Likewise.
(struct binary_rshift_narrow_unsigned_def): Likewise.
(struct binary_widen_def): Likewise.
(struct binary_widen_n_def): Likewise.
(struct binary_widen_opt_n_def): Likewise.
(struct unary_widen_def): Likewise.

Diff:
---
 gcc/config/arm/arm-mve-builtins-shapes.cc | 114 ++
 1 file changed, 68 insertions(+), 46 deletions(-)

diff --git a/gcc/config/arm/arm-mve-builtins-shapes.cc 
b/gcc/config/arm/arm-mve-builtins-shapes.cc
index 9deed1789664..0a108cf0127e 100644
--- a/gcc/config/arm/arm-mve-builtins-shapes.cc
+++ b/gcc/config/arm/arm-mve-builtins-shapes.cc
@@ -320,6 +320,45 @@ build_16_32 (function_builder &b, const char *signature,
 }
 }
 
+/* TYPE is the largest type suffix associated with the arguments of R, but the
+   result is twice as wide.  Return the associated type suffix of
+   EXPECTED_TCLASS if it exists, otherwise report an appropriate error and
+   return NUM_TYPE_SUFFIXES.  */
+static type_suffix_index
+long_type_suffix (function_resolver &r,
+ type_suffix_index type,
+ type_class_index expected_tclass)
+{
+  unsigned int element_bits = type_suffixes[type].element_bits;
+  if (expected_tclass == function_resolver::SAME_TYPE_CLASS)
+expected_tclass = type_suffixes[type].tclass;
+
+  if (type_suffixes[type].integer_p && element_bits < 64)
+return find_type_suffix (expected_tclass, element_bits * 2);
+
+  r.report_no_such_form (type);
+  return NUM_TYPE_SUFFIXES;
+}
+
+/* Return the type suffix half as wide as TYPE with EXPECTED_TCLASS if it
+   exists, otherwise report an appropriate error and return
+   NUM_TYPE_SUFFIXES.  */
+static type_suffix_index
+half_type_suffix (function_resolver &r,
+ type_suffix_index type,
+ type_class_index expected_tclass)
+{
+  unsigned int element_bits = type_suffixes[type].element_bits;
+  if (expected_tclass == function_resolver::SAME_TYPE_CLASS)
+expected_tclass = type_suffixes[type].tclass;
+
+  if (type_suffixes[type].integer_p && element_bits > 8)
+return find_type_suffix (expected_tclass, element_bits / 2);
+
+  r.report_no_such_form (type);
+  return NUM_TYPE_SUFFIXES;
+}
+
 /* Declare the function shape NAME, pointing it to an instance
of class _def.  */
 #define SHAPE(NAME) \
@@ -779,16 +818,13 @@ struct binary_move_narrow_def : public overloaded_base<0>
   resolve (function_resolver &r) const override
   {
 unsigned int i, nargs;
-type_suffix_index type;
+type_suffix_index type, narrow_suffix;
 if (!r.check_gp_argument (2, i, nargs)
-   || (type = r.infer_vector_type (1)) == NUM_TYPE_SUFFIXES)
+   || (type = r.infer_vector_type (1)) == NUM_TYPE_SUFFIXES
+   || ((narrow_suffix = half_type_suffix (r, type, r.SAME_TYPE_CLASS))
+   == NUM_TYPE_SUFFIXES))
   return error_mark_node;
 
-type_suffix_index narrow_suffix
-  = find_type_suffix (type_suffixes[type].tclass,
- type_suffixes[type].element_bits / 2);
-
-
 if (!r.require_matching_vector_type (0, narrow_suffix))
   return error_mark_node;
 
@@ -816,15 +852,13 @@ struct binary_move_narrow_unsigned_def : public 
overloaded_base<0>
   resolve (function_resolver &r) const override
   {
 unsigned int i, nargs;
-type_suffix_index type;
+type_suffix_index type, narrow_suffix;
 if (!r.check_gp_argument (2, i, nargs)
-   || (type = r.infer_vector_type (1)) == NUM_TYPE_SUFFIXES)
+   || (type = r.infer_vector_type (1)) == NUM_TYPE_SUFFIXES
+   || ((narrow_suffix = half_type_suffix (r, type, TYPE_unsigned))
+   == NUM_TYPE_SUFFIXES))
   return error_mark_node;
 
-type_suffix_index narrow_suffix
-  = find_type_suffix (TYPE_unsigned,
- type_suffixes[type].element_bits / 2);
-
 if (!r.require_matching_vector_type (0, narrow_suffix))
   return error_mark_node;
 
@@ -1112,16 +1146,14 @@ struct binary_rshift_narrow_def : public 
overloaded_base<0>
   re

[gcc r15-4453] arm: [MVE intrinsics] add vadc_vsbc shape

2024-10-18 Thread Christophe Lyon via Gcc-cvs
https://gcc.gnu.org/g:ba7b97e0bcc089ee37caf2203239939cff030b68

commit r15-4453-gba7b97e0bcc089ee37caf2203239939cff030b68
Author: Christophe Lyon 
Date:   Wed Aug 28 21:26:33 2024 +

arm: [MVE intrinsics] add vadc_vsbc shape

This patch adds the vadc_vsbc shape description.

2024-08-28  Christophe Lyon  

gcc/
* config/arm/arm-mve-builtins-shapes.cc (vadc_vsbc): New.
* config/arm/arm-mve-builtins-shapes.h (vadc_vsbc): New.

Diff:
---
 gcc/config/arm/arm-mve-builtins-shapes.cc | 36 +++
 gcc/config/arm/arm-mve-builtins-shapes.h  |  1 +
 2 files changed, 37 insertions(+)

diff --git a/gcc/config/arm/arm-mve-builtins-shapes.cc 
b/gcc/config/arm/arm-mve-builtins-shapes.cc
index ee6b5b0a7b14..9deed1789664 100644
--- a/gcc/config/arm/arm-mve-builtins-shapes.cc
+++ b/gcc/config/arm/arm-mve-builtins-shapes.cc
@@ -1996,6 +1996,42 @@ struct unary_widen_acc_def : public overloaded_base<0>
 };
 SHAPE (unary_widen_acc)
 
+/* _t vfoo[_t0](T0, T0, uint32_t*)
+
+   Example: vadcq.
+   int32x4_t [__arm_]vadcq[_s32](int32x4_t a, int32x4_t b, unsigned *carry)
+   int32x4_t [__arm_]vadcq_m[_s32](int32x4_t inactive, int32x4_t a, int32x4_t 
b, unsigned *carry, mve_pred16_t p)  */
+struct vadc_vsbc_def : public overloaded_base<0>
+{
+  void
+  build (function_builder &b, const function_group_info &group,
+bool preserve_user_namespace) const override
+  {
+b.add_overloaded_functions (group, MODE_none, preserve_user_namespace);
+build_all (b, "v0,v0,v0,as", group, MODE_none, preserve_user_namespace);
+  }
+
+  tree
+  resolve (function_resolver &r) const override
+  {
+unsigned int i, nargs;
+type_suffix_index type;
+if (!r.check_gp_argument (3, i, nargs)
+   || (type = r.infer_vector_type (0)) == NUM_TYPE_SUFFIXES)
+  return error_mark_node;
+
+if (!r.require_matching_vector_type (1, type))
+  return error_mark_node;
+
+/* Check that last arg is a pointer.  */
+if (!POINTER_TYPE_P (r.get_argument_type (i)))
+  return error_mark_node;
+
+return r.resolve_to (r.mode_suffix_id, type);
+  }
+};
+SHAPE (vadc_vsbc)
+
 /* mve_pred16_t foo_t0(uint32_t)
 
Example: vctp16q.
diff --git a/gcc/config/arm/arm-mve-builtins-shapes.h 
b/gcc/config/arm/arm-mve-builtins-shapes.h
index d73c74c8ad74..e53381d8f36c 100644
--- a/gcc/config/arm/arm-mve-builtins-shapes.h
+++ b/gcc/config/arm/arm-mve-builtins-shapes.h
@@ -77,6 +77,7 @@ namespace arm_mve
 extern const function_shape *const unary_n;
 extern const function_shape *const unary_widen;
 extern const function_shape *const unary_widen_acc;
+extern const function_shape *const vadc_vsbc;
 extern const function_shape *const vctp;
 extern const function_shape *const vcvt;
 extern const function_shape *const vcvt_f16_f32;


[gcc r15-4456] arm: [MVE intrinsics] rework vadcq

2024-10-18 Thread Christophe Lyon via Gcc-cvs
https://gcc.gnu.org/g:6e2b3125c2f47d52d0eefe88298c41ae8e2eee0d

commit r15-4456-g6e2b3125c2f47d52d0eefe88298c41ae8e2eee0d
Author: Christophe Lyon 
Date:   Thu Aug 29 19:26:27 2024 +

arm: [MVE intrinsics] rework vadcq

Implement vadcq using the new MVE builtins framework.

We re-use most of the code introduced by the previous patch to support
vadciq: we just need to initialize carry from the input parameter.

2024-08-28  Christophe Lyon  

gcc/

* config/arm/arm-mve-builtins-base.cc (vadcq_vsbc): Add support
for vadcq.
* config/arm/arm-mve-builtins-base.def (vadcq): New.
* config/arm/arm-mve-builtins-base.h (vadcq): New.
* config/arm/arm_mve.h (vadcq): Delete.
(vadcq_m): Delete.
(vadcq_s32): Delete.
(vadcq_u32): Delete.
(vadcq_m_s32): Delete.
(vadcq_m_u32): Delete.
(__arm_vadcq_s32): Delete.
(__arm_vadcq_u32): Delete.
(__arm_vadcq_m_s32): Delete.
(__arm_vadcq_m_u32): Delete.
(__arm_vadcq): Delete.
(__arm_vadcq_m): Delete.

Diff:
---
 gcc/config/arm/arm-mve-builtins-base.cc  | 61 +++---
 gcc/config/arm/arm-mve-builtins-base.def |  1 +
 gcc/config/arm/arm-mve-builtins-base.h   |  1 +
 gcc/config/arm/arm_mve.h | 87 
 4 files changed, 56 insertions(+), 94 deletions(-)

diff --git a/gcc/config/arm/arm-mve-builtins-base.cc 
b/gcc/config/arm/arm-mve-builtins-base.cc
index f04bf027b417..e8f703e4e7ad 100644
--- a/gcc/config/arm/arm-mve-builtins-base.cc
+++ b/gcc/config/arm/arm-mve-builtins-base.cc
@@ -612,10 +612,19 @@ public:
 class vadc_vsbc_impl : public function_base
 {
 public:
+  CONSTEXPR vadc_vsbc_impl (bool init_carry)
+: m_init_carry (init_carry)
+  {}
+
+  /* Initialize carry with 0 (vadci).  */
+  bool m_init_carry;
+
   unsigned int
   call_properties (const function_instance &) const override
   {
 unsigned int flags = CP_WRITE_MEMORY | CP_READ_FPCR;
+if (!m_init_carry)
+  flags |= CP_READ_MEMORY;
 return flags;
   }
 
@@ -658,22 +667,59 @@ public:
 carry_ptr = e.args[carry_out_arg_no];
 e.args.ordered_remove (carry_out_arg_no);
 
+if (!m_init_carry)
+  {
+   /* Prepare carry in:
+  set_fpscr ( (fpscr & ~0x2000u)
+  | ((*carry & 1u) << 29) )  */
+   rtx carry_in = gen_reg_rtx (SImode);
+   rtx fpscr = gen_reg_rtx (SImode);
+   emit_insn (gen_get_fpscr_nzcvqc (fpscr));
+   emit_insn (gen_rtx_SET (carry_in, gen_rtx_MEM (SImode, carry_ptr)));
+
+   emit_insn (gen_rtx_SET (carry_in,
+   gen_rtx_ASHIFT (SImode,
+   carry_in,
+   GEN_INT (29;
+   emit_insn (gen_rtx_SET (carry_in,
+   gen_rtx_AND (SImode,
+carry_in,
+GEN_INT (0x2000;
+   emit_insn (gen_rtx_SET (fpscr,
+   gen_rtx_AND (SImode,
+fpscr,
+GEN_INT (~0x2000;
+   emit_insn (gen_rtx_SET (carry_in,
+   gen_rtx_IOR (SImode,
+carry_in,
+fpscr)));
+   emit_insn (gen_set_fpscr_nzcvqc (carry_in));
+  }
+
 switch (e.pred)
   {
   case PRED_none:
/* No predicate.  */
-   unspec = e.type_suffix (0).unsigned_p
- ? VADCIQ_U
- : VADCIQ_S;
+   unspec = m_init_carry
+ ? (e.type_suffix (0).unsigned_p
+? VADCIQ_U
+: VADCIQ_S)
+ : (e.type_suffix (0).unsigned_p
+? VADCQ_U
+: VADCQ_S);
code = code_for_mve_q_v4si (unspec, unspec);
insns = e.use_exact_insn (code);
break;
 
   case PRED_m:
/* "m" predicate.  */
-   unspec = e.type_suffix (0).unsigned_p
- ? VADCIQ_M_U
- : VADCIQ_M_S;
+   unspec = m_init_carry
+ ? (e.type_suffix (0).unsigned_p
+? VADCIQ_M_U
+: VADCIQ_M_S)
+ : (e.type_suffix (0).unsigned_p
+? VADCQ_M_U
+: VADCQ_M_S);
code = code_for_mve_q_m_v4si (unspec, unspec);
insns = e.use_cond_insn (code, 0);
break;
@@ -869,7 +915,8 @@ namespace arm_mve {
 FUNCTION_PRED_P_S_U (vabavq, VABAVQ)
 FUNCTION_WITHOUT_N (vabdq, VABDQ)
 FUNCTION (vabsq, unspec_based_mve_function_exact_insn, (ABS, ABS, ABS, -1, -1, 
-1, VABSQ_M_S, -1, VABSQ_M_F, -1, -1, -1))
-FUNCTION (vadciq, vadc_vsbc_impl,)
+FUNCTION (vadciq, vadc_vsbc_impl, (true))
+FUNCTION (vadcq, vadc_vsbc_impl, (false))
 FUNCTION_WITH_RTX_M_N (vaddq, PLUS, VADDQ)
 FUNCTION_PRED_P

[gcc r15-4460] AArch64: support encoding integer immediates using floating point moves

2024-10-18 Thread Tamar Christina via Gcc-cvs
https://gcc.gnu.org/g:87dc6b1992e7ee02e7a4a81c568754198c0f61f5

commit r15-4460-g87dc6b1992e7ee02e7a4a81c568754198c0f61f5
Author: Tamar Christina 
Date:   Fri Oct 18 09:43:45 2024 +0100

AArch64: support encoding integer immediates using floating point moves

This patch extends our immediate SIMD generation cases to support generating
integer immediates using floating point operation if the integer immediate 
maps
to an exact FP value.

As an example:

uint32x4_t f1() {
return vdupq_n_u32(0x3f80);
}

currently generates:

f1:
adrpx0, .LC0
ldr q0, [x0, #:lo12:.LC0]
ret

i.e. a load, but with this change:

f1:
fmovv0.4s, 1.0e+0
ret

Such immediates are common in e.g. our Math routines in glibc because they 
are
created to extract or mark part of an FP immediate as masks.

gcc/ChangeLog:

* config/aarch64/aarch64.cc (aarch64_sve_valid_immediate,
aarch64_simd_valid_immediate): Refactor accepting modes and values.
(aarch64_float_const_representable_p): Refactor and extract FP 
checks
into ...
(aarch64_real_float_const_representable_p): ...This and fix fail
fallback from real_to_integer.
(aarch64_advsimd_valid_immediate): Use it.

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/const_create_using_fmov.c: New test.

Diff:
---
 gcc/config/aarch64/aarch64.cc  | 282 +++--
 .../gcc.target/aarch64/const_create_using_fmov.c   |  87 +++
 2 files changed, 241 insertions(+), 128 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 5770491b30ce..e65b24e2ad6a 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -22899,19 +22899,19 @@ aarch64_advsimd_valid_immediate_hs (unsigned int 
val32,
   return false;
 }
 
-/* Return true if replicating VAL64 is a valid immediate for the
+/* Return true if replicating VAL64 with mode MODE is a valid immediate for the
Advanced SIMD operation described by WHICH.  If INFO is nonnull,
use it to describe valid immediates.  */
 static bool
 aarch64_advsimd_valid_immediate (unsigned HOST_WIDE_INT val64,
+scalar_int_mode mode,
 simd_immediate_info *info,
 enum simd_immediate_check which)
 {
   unsigned int val32 = val64 & 0x;
-  unsigned int val16 = val64 & 0x;
   unsigned int val8 = val64 & 0xff;
 
-  if (val32 == (val64 >> 32))
+  if (mode != DImode)
 {
   if ((which & AARCH64_CHECK_ORR) != 0
  && aarch64_advsimd_valid_immediate_hs (val32, info, which,
@@ -22924,9 +22924,7 @@ aarch64_advsimd_valid_immediate (unsigned HOST_WIDE_INT 
val64,
return true;
 
   /* Try using a replicated byte.  */
-  if (which == AARCH64_CHECK_MOV
- && val16 == (val32 >> 16)
- && val8 == (val16 >> 8))
+  if (which == AARCH64_CHECK_MOV && mode == QImode)
{
  if (info)
*info = simd_immediate_info (QImode, val8);
@@ -22954,28 +22952,15 @@ aarch64_advsimd_valid_immediate (unsigned 
HOST_WIDE_INT val64,
   return false;
 }
 
-/* Return true if replicating VAL64 gives a valid immediate for an SVE MOV
-   instruction.  If INFO is nonnull, use it to describe valid immediates.  */
+/* Return true if replicating IVAL with MODE gives a valid immediate for an SVE
+   MOV instruction.  If INFO is nonnull, use it to describe valid
+   immediates.  */
 
 static bool
-aarch64_sve_valid_immediate (unsigned HOST_WIDE_INT val64,
+aarch64_sve_valid_immediate (unsigned HOST_WIDE_INT ival, scalar_int_mode mode,
 simd_immediate_info *info)
 {
-  scalar_int_mode mode = DImode;
-  unsigned int val32 = val64 & 0x;
-  if (val32 == (val64 >> 32))
-{
-  mode = SImode;
-  unsigned int val16 = val32 & 0x;
-  if (val16 == (val32 >> 16))
-   {
- mode = HImode;
- unsigned int val8 = val16 & 0xff;
- if (val8 == (val16 >> 8))
-   mode = QImode;
-   }
-}
-  HOST_WIDE_INT val = trunc_int_for_mode (val64, mode);
+  HOST_WIDE_INT val = trunc_int_for_mode (ival, mode);
   if (IN_RANGE (val, -0x80, 0x7f))
 {
   /* DUP with no shift.  */
@@ -22990,7 +22975,7 @@ aarch64_sve_valid_immediate (unsigned HOST_WIDE_INT 
val64,
*info = simd_immediate_info (mode, val);
   return true;
 }
-  if (aarch64_bitmask_imm (val64, mode))
+  if (aarch64_bitmask_imm (ival, mode))
 {
   /* DUPM.  */
   if (info)
@@ -23071,6 +23056,91 @@ aarch64_sve_pred_valid_immediate (rtx x, 
simd_immediate_info *info)
   return false;
 }
 
+/* We can only represent floating point constants which will fit in
+   "quarter-precision" values.  These values are characterised by
+

[gcc r15-4461] AArch64: use movi d0, #0 to clear SVE registers instead of mov z0.d, #0

2024-10-18 Thread Tamar Christina via Gcc-cvs
https://gcc.gnu.org/g:453d3d90c374d3bb329f1431b7dfb8d0510a88b9

commit r15-4461-g453d3d90c374d3bb329f1431b7dfb8d0510a88b9
Author: Tamar Christina 
Date:   Fri Oct 18 09:44:15 2024 +0100

AArch64: use movi d0, #0 to clear SVE registers instead of mov z0.d, #0

This patch changes SVE to use Adv. SIMD movi 0 to clear SVE registers when 
not
in SVE streaming mode.  As the Neoverse Software Optimization guides 
indicate
SVE mov #0 is not a zero cost move.

When In streaming mode we continue to use SVE's mov to clear the registers.

Tests have already been updated.

gcc/ChangeLog:

* config/aarch64/aarch64.cc (aarch64_output_sve_mov_immediate): Use
fmov for SVE zeros.

Diff:
---
 gcc/config/aarch64/aarch64.cc | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index e65b24e2ad6a..3ab550acc7cd 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -25516,8 +25516,11 @@ aarch64_output_sve_mov_immediate (rtx const_vector)
}
 }
 
-  snprintf (templ, sizeof (templ), "mov\t%%0.%c, #" HOST_WIDE_INT_PRINT_DEC,
-   element_char, INTVAL (info.u.mov.value));
+  if (info.u.mov.value == const0_rtx && TARGET_NON_STREAMING)
+snprintf (templ, sizeof (templ), "movi\t%%d0, #0");
+  else
+snprintf (templ, sizeof (templ), "mov\t%%0.%c, #" HOST_WIDE_INT_PRINT_DEC,
+ element_char, INTVAL (info.u.mov.value));
   return templ;
 }


[gcc(refs/vendors/ARM/heads/gcs-v2)] aarch64: libitm: Add GCS support

2024-10-18 Thread Andre Simoes Dias Vieira via Gcc-cvs
https://gcc.gnu.org/g:6d40608ca4d335e56cd7e95ee9b418881c99c6a0

commit 6d40608ca4d335e56cd7e95ee9b418881c99c6a0
Author: Szabolcs Nagy 
Date:   Tue Apr 2 15:43:23 2024 +0100

aarch64: libitm: Add GCS support

Transaction begin and abort use setjmp/longjmp like operations that
need to be updated for GCS compatibility. We use similar logic to
libc setjmp/longjmp that support switching stack and thus switching
GCS (e.g. due to longjmp out of a makecontext stack), this is kept
even though it is likely not required for transaction aborts.

The gtm_jmpbuf is internal to libitm so we can change its layout
without breaking ABI.

libitm/ChangeLog:

* config/aarch64/sjlj.S: Add GCS support and mark GCS compatible.
* config/aarch64/target.h: Add gcs field to gtm_jmpbuf.

Diff:
---
 libitm/config/aarch64/sjlj.S   | 60 +++---
 libitm/config/aarch64/target.h |  1 +
 2 files changed, 58 insertions(+), 3 deletions(-)

diff --git a/libitm/config/aarch64/sjlj.S b/libitm/config/aarch64/sjlj.S
index aeffd4d10701..cf1d8af2c968 100644
--- a/libitm/config/aarch64/sjlj.S
+++ b/libitm/config/aarch64/sjlj.S
@@ -29,6 +29,13 @@
 #define AUTIASPhint29
 #define PACIBSPhint27
 #define AUTIBSPhint31
+#define CHKFEAT_X16hint40
+#define MRS_GCSPR(x)   mrs x, s3_3_c2_c5_1
+#define GCSPOPM(x) syslx, #3, c7, c7, #1
+#define GCSSS1(x)  sys #3, c7, c7, #2, x
+#define GCSSS2(x)  syslx, #3, c7, c7, #3
+
+#define L(name) .L##name
 
 #if defined(HAVE_AS_CFI_PSEUDO_OP) && defined(__GCC_HAVE_DWARF2_CFI_ASM)
 # define cfi_negate_ra_state .cfi_negate_ra_state
@@ -80,7 +87,16 @@ _ITM_beginTransaction:
stp d10, d11, [sp, 7*16]
stp d12, d13, [sp, 8*16]
stp d14, d15, [sp, 9*16]
-   str x1, [sp, 10*16]
+
+   /* GCS support.  */
+   mov x2, 0
+   mov x16, 1
+   CHKFEAT_X16
+   tbnzx16, 0, L(gcs_done_sj)
+   MRS_GCSPR (x2)
+   add x2, x2, 8 /* GCS after _ITM_beginTransaction returns.  */
+L(gcs_done_sj):
+   stp x2, x1, [sp, 10*16]
 
/* Invoke GTM_begin_transaction with the struct we just built.  */
mov x1, sp
@@ -117,7 +133,38 @@ GTM_longjmp:
ldp d10, d11, [x1, 7*16]
ldp d12, d13, [x1, 8*16]
ldp d14, d15, [x1, 9*16]
+
+   /* GCS support.  */
+   mov x16, 1
+   CHKFEAT_X16
+   tbnzx16, 0, L(gcs_done_lj)
+   MRS_GCSPR (x7)
ldr x3, [x1, 10*16]
+   mov x4, x3
+   /* x7: GCSPR now.  x3, x4: target GCSPR.  x5, x6: tmp regs.  */
+L(gcs_scan):
+   cmp x7, x4
+   b.eqL(gcs_pop)
+   sub x4, x4, 8
+   /* Check for a cap token.  */
+   ldr x5, [x4]
+   and x6, x4, 0xf000
+   orr x6, x6, 1
+   cmp x5, x6
+   b.neL(gcs_scan)
+L(gcs_switch):
+   add x7, x4, 8
+   GCSSS1 (x4)
+   GCSSS2 (xzr)
+L(gcs_pop):
+   cmp x7, x3
+   b.eqL(gcs_done_lj)
+   GCSPOPM (xzr)
+   add x7, x7, 8
+   b   L(gcs_pop)
+L(gcs_done_lj):
+
+   ldr x3, [x1, 10*16 + 8]
ldp x29, x30, [x1]
cfi_def_cfa(x1, 0)
CFI_PAC_TOGGLE
@@ -132,6 +179,7 @@ GTM_longjmp:
 #define FEATURE_1_AND 0xc000
 #define FEATURE_1_BTI 1
 #define FEATURE_1_PAC 2
+#define FEATURE_1_GCS 4
 
 /* Supported features based on the code generation options.  */
 #if defined(__ARM_FEATURE_BTI_DEFAULT)
@@ -146,6 +194,12 @@ GTM_longjmp:
 # define PAC_FLAG 0
 #endif
 
+#if __ARM_FEATURE_GCS_DEFAULT
+# define GCS_FLAG FEATURE_1_GCS
+#else
+# define GCS_FLAG 0
+#endif
+
 /* Add a NT_GNU_PROPERTY_TYPE_0 note.  */
 #define GNU_PROPERTY(type, value)  \
   .section .note.gnu.property, "a";\
@@ -163,7 +217,7 @@ GTM_longjmp:
 .section .note.GNU-stack, "", %progbits
 
 /* Add GNU property note if built with branch protection.  */
-# if (BTI_FLAG|PAC_FLAG) != 0
-GNU_PROPERTY (FEATURE_1_AND, BTI_FLAG|PAC_FLAG)
+# if (BTI_FLAG|PAC_FLAG|GCS_FLAG) != 0
+GNU_PROPERTY (FEATURE_1_AND, BTI_FLAG|PAC_FLAG|GCS_FLAG)
 # endif
 #endif
diff --git a/libitm/config/aarch64/target.h b/libitm/config/aarch64/target.h
index 3d99197bfaba..a1f39b4bf7a7 100644
--- a/libitm/config/aarch64/target.h
+++ b/libitm/config/aarch64/target.h
@@ -30,6 +30,7 @@ typedef struct gtm_jmpbuf
   unsigned long long pc;   /* x30 */
   unsigned long long gr[10];   /* x19-x28 */
   unsigned long long vr[8];/* d8-d15 */
+  void *gcs;   /* GCSPR_EL0 */
   void *cfa;
 } gtm_jmpbuf;


[gcc r15-4463] middle-end: Fix GSI for gcond root [PR117140]

2024-10-18 Thread Tamar Christina via Gcc-cvs
https://gcc.gnu.org/g:51291ad0f1f89a81de917110af96e019dcd5690c

commit r15-4463-g51291ad0f1f89a81de917110af96e019dcd5690c
Author: Tamar Christina 
Date:   Fri Oct 18 10:37:28 2024 +0100

middle-end: Fix GSI for gcond root [PR117140]

When finding the gsi to use for code of the root statements we should use 
the
one of the original statement rather than the gcond which may be inside a
pattern.

Without this the emitted instructions may be discarded later.

gcc/ChangeLog:

PR tree-optimization/117140
* tree-vect-slp.cc (vectorize_slp_instance_root_stmt): Use gsi from
original statement.

gcc/testsuite/ChangeLog:

PR tree-optimization/117140
* gcc.dg/vect/vect-early-break_129-pr117140.c: New test.

Diff:
---
 .../gcc.dg/vect/vect-early-break_129-pr117140.c| 94 ++
 gcc/tree-vect-slp.cc   |  2 +-
 2 files changed, 95 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_129-pr117140.c 
b/gcc/testsuite/gcc.dg/vect/vect-early-break_129-pr117140.c
new file mode 100644
index ..eec7f8db40c7
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_129-pr117140.c
@@ -0,0 +1,94 @@
+/* { dg-do compile } */
+/* { dg-add-options vect_early_break } */
+/* { dg-require-effective-target vect_early_break } */
+/* { dg-require-effective-target vect_int } */
+
+typedef signed char int8_t;
+typedef short int int16_t;
+typedef int int32_t;
+typedef long long int int64_t;
+typedef unsigned char uint8_t;
+typedef short unsigned int uint16_t;
+typedef unsigned int uint32_t;
+typedef long long unsigned int uint64_t;
+
+void __attribute__ ((noinline, noclone))
+test_1_TYPE1_uint32_t (uint16_t *__restrict f, uint32_t *__restrict d,
+   uint16_t x, uint16_t x2, uint32_t y, int n)
+{
+for (int i = 0; i < n; ++i)
+{
+f[i * 2 + 0] = x;
+f[i * 2 + 1] = x2;
+d[i] = y;
+}
+}
+
+void __attribute__ ((noinline, noclone))
+test_1_TYPE1_int64_t (int32_t *__restrict f, int64_t *__restrict d, int32_t x,
+  int32_t x2, int64_t y, int n)
+{
+for (int i = 0; i < n; ++i)
+{
+f[i * 2 + 0] = x;
+f[i * 2 + 1] = x2;
+d[i] = y;
+}
+}
+
+int
+main (void)
+{
+// This part is necessary for ice to appear though running it by 
itself does not trigger an ICE
+int n_3_TYPE1_uint32_t = 32;
+uint16_t x_3_uint16_t = 233;
+uint16_t x2_3_uint16_t = 78;
+uint32_t y_3_uint32_t = 1234;
+uint16_t f_3_uint16_t[33 * 2 + 1] = { 0} ;
+uint32_t d_3_uint32_t[33] = { 0} ;
+test_1_TYPE1_uint32_t (f_3_uint16_t, d_3_uint32_t, x_3_uint16_t, 
x2_3_uint16_t, y_3_uint32_t, n_3_TYPE1_uint32_t);
+for (int i = 0;
+i < n_3_TYPE1_uint32_t;
+++i) {
+if (f_3_uint16_t[i * 2 + 0] != x_3_uint16_t) __builtin_abort 
();
+if (f_3_uint16_t[i * 2 + 1] != x2_3_uint16_t) __builtin_abort 
();
+if (d_3_uint32_t[i] != y_3_uint32_t) __builtin_abort ();
+}
+for (int i = n_3_TYPE1_uint32_t;
+i < n_3_TYPE1_uint32_t + 1;
+++i) {
+if (f_3_uint16_t[i * 2 + 0] != 0) __builtin_abort ();
+if (f_3_uint16_t[i * 2 + 1] != 0) __builtin_abort ();
+if (d_3_uint32_t[i] != 0) __builtin_abort ();
+}
+// If ran without the above section, a different ice appears. see below
+int n_3_TYPE1_int64_t = 32;
+int32_t x_3_int32_t = 233;
+int32_t x2_3_int32_t = 78;
+int64_t y_3_int64_t = 1234;
+int32_t f_3_int32_t[33 * 2 + 1] = { 0 };
+int64_t d_3_int64_t[33] = { 0 };
+test_1_TYPE1_int64_t (f_3_int32_t, d_3_int64_t, x_3_int32_t, x2_3_int32_t,
+  y_3_int64_t, n_3_TYPE1_int64_t);
+for (int i = 0; i < n_3_TYPE1_int64_t; ++i)
+{
+if (f_3_int32_t[i * 2 + 0] != x_3_int32_t)
+__builtin_abort ();
+if (f_3_int32_t[i * 2 + 1] != x2_3_int32_t)
+__builtin_abort ();
+if (d_3_int64_t[i] != y_3_int64_t)
+__builtin_abort ();
+}
+
+for (int i = n_3_TYPE1_int64_t; i < n_3_TYPE1_int64_t + 1; ++i)
+{
+if (f_3_int32_t[i * 2 + 0] != 0)
+__builtin_abort ();
+if (f_3_int32_t[i * 2 + 1] != 0)
+__builtin_abort ();
+if (d_3_int64_t[i] != 0)
+__builtin_abort ();
+}
+
+return 0;
+}
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index d35c2ea02dce..9276662fa0f1 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -11167,7 +11167,7 @@ vectorize_slp_instance_root_stmt (vec_info *vinfo, 
slp_tree node, slp_instance i
 can't support lane

[gcc r15-4459] AArch64: update testsuite to account for new zero moves

2024-10-18 Thread Tamar Christina via Gcc-cvs
https://gcc.gnu.org/g:fc3507927768c3df425a0b5c0e4051eb8bb1ccf0

commit r15-4459-gfc3507927768c3df425a0b5c0e4051eb8bb1ccf0
Author: Tamar Christina 
Date:   Fri Oct 18 09:42:46 2024 +0100

AArch64: update testsuite to account for new zero moves

The patch series will adjust how zeros are created.  In principal it doesn't
matter the exact lane size a zero gets created on but this makes the tests a
bit fragile.

This preparation patch will update the testsuite to accept multiple variants
of ways to create vector zeros to accept both the current syntax and the one
being transitioned to in the series.

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/ldp_stp_18.c: Update zero regexpr.
* gcc.target/aarch64/memset-corner-cases.c: Likewise.
* gcc.target/aarch64/sme/acle-asm/revd_bf16.c: Likewise.
* gcc.target/aarch64/sme/acle-asm/revd_f16.c: Likewise.
* gcc.target/aarch64/sme/acle-asm/revd_f32.c: Likewise.
* gcc.target/aarch64/sme/acle-asm/revd_f64.c: Likewise.
* gcc.target/aarch64/sme/acle-asm/revd_s16.c: Likewise.
* gcc.target/aarch64/sme/acle-asm/revd_s32.c: Likewise.
* gcc.target/aarch64/sme/acle-asm/revd_s64.c: Likewise.
* gcc.target/aarch64/sme/acle-asm/revd_s8.c: Likewise.
* gcc.target/aarch64/sme/acle-asm/revd_u16.c: Likewise.
* gcc.target/aarch64/sme/acle-asm/revd_u32.c: Likewise.
* gcc.target/aarch64/sme/acle-asm/revd_u64.c: Likewise.
* gcc.target/aarch64/sme/acle-asm/revd_u8.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/acge_f16.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/acge_f32.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/acge_f64.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/acgt_f16.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/acgt_f32.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/acgt_f64.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/acle_f16.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/acle_f32.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/acle_f64.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/aclt_f16.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/aclt_f32.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/aclt_f64.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/bic_s8.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/bic_u8.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/cmpuo_f16.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/cmpuo_f32.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/cmpuo_f64.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/dup_f16.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/dup_f32.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/dup_f64.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/dup_s16.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/dup_s32.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/dup_s64.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/dup_s8.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/dup_u16.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/dup_u32.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/dup_u64.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/dup_u8.c: Likewise.
* gcc.target/aarch64/sve/const_fold_div_1.c: Likewise.
* gcc.target/aarch64/sve/const_fold_mul_1.c: Likewise.
* gcc.target/aarch64/sve/dup_imm_1.c: Likewise.
* gcc.target/aarch64/sve/fdup_1.c: Likewise.
* gcc.target/aarch64/sve/fold_div_zero.c: Likewise.
* gcc.target/aarch64/sve/fold_mul_zero.c: Likewise.
* gcc.target/aarch64/sve/pcs/args_2.c: Likewise.
* gcc.target/aarch64/sve/pcs/args_3.c: Likewise.
* gcc.target/aarch64/sve/pcs/args_4.c: Likewise.
* gcc.target/aarch64/vect-fmovd-zero.c: Likewise.

Diff:
---
 gcc/testsuite/gcc.target/aarch64/ldp_stp_18.c  |  2 +-
 .../gcc.target/aarch64/memset-corner-cases.c   |  2 +-
 .../gcc.target/aarch64/sme/acle-asm/revd_bf16.c|  2 +-
 .../gcc.target/aarch64/sme/acle-asm/revd_f16.c |  2 +-
 .../gcc.target/aarch64/sme/acle-asm/revd_f32.c |  2 +-
 .../gcc.target/aarch64/sme/acle-asm/revd_f64.c |  2 +-
 .../gcc.target/aarch64/sme/acle-asm/revd_s16.c |  2 +-
 .../gcc.target/aarch64/sme/acle-asm/revd_s32.c |  2 +-
 .../gcc.target/aarch64/sme/acle-asm/revd_s64.c |  2 +-
 .../gcc.target/aarch64/sme/acle-asm/revd_s8.c  |  2 +-
 .../gcc.target/aarch64/sme/acle-asm/revd_u16.c |  2 +-
 .../gcc.target/aarch64/sme/acle-asm/revd_u32.c |  2 +-
 .../gcc.target/aarch64/sme/acle-asm/revd_u64.c |  2 +-
 .../gcc.target/aarch64/sme/acle-asm/revd_u8.c  |  2 +-
 .../gc

[gcc(refs/vendors/ARM/heads/gcs-v2)] aarch64: libgcc: add GCS marking to asm

2024-10-18 Thread Andre Simoes Dias Vieira via Gcc-cvs
https://gcc.gnu.org/g:d7010b625c0048d11214c8157f25341580f61809

commit d7010b625c0048d11214c8157f25341580f61809
Author: Szabolcs Nagy 
Date:   Fri Dec 22 13:44:19 2023 +

aarch64: libgcc: add GCS marking to asm

libgcc/ChangeLog:

* config/aarch64/aarch64-asm.h (FEATURE_1_GCS): Define.
(GCS_FLAG): Define if GCS is enabled.
(GNU_PROPERTY): Add GCS_FLAG.
* config/aarch64/lse.S: fix warning L redefined

Diff:
---
 libgcc/config/aarch64/aarch64-asm.h | 16 ++--
 libgcc/config/aarch64/lse.S |  2 ++
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/libgcc/config/aarch64/aarch64-asm.h 
b/libgcc/config/aarch64/aarch64-asm.h
index d8ab91d52f1b..f7bd225f7a4e 100644
--- a/libgcc/config/aarch64/aarch64-asm.h
+++ b/libgcc/config/aarch64/aarch64-asm.h
@@ -22,6 +22,9 @@
see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
.  */
 
+#ifndef AARCH64_ASM_H
+#define AARCH64_ASM_H
+
 #include "auto-target.h"
 
 #define L(label) .L ## label
@@ -38,6 +41,7 @@
 #define FEATURE_1_AND 0xc000
 #define FEATURE_1_BTI 1
 #define FEATURE_1_PAC 2
+#define FEATURE_1_GCS 4
 
 /* Supported features based on the code generation options.  */
 #if defined(__ARM_FEATURE_BTI_DEFAULT)
@@ -58,6 +62,12 @@
 # define AUTIASP
 #endif
 
+#if __ARM_FEATURE_GCS_DEFAULT
+# define GCS_FLAG FEATURE_1_GCS
+#else
+# define GCS_FLAG 0
+#endif
+
 #ifdef __ELF__
 #define HIDDEN(name) .hidden name
 #define SYMBOL_SIZE(name) .size name, .-name
@@ -88,8 +98,8 @@
 .previous
 
 /* Add GNU property note if built with branch protection.  */
-# if (BTI_FLAG|PAC_FLAG) != 0
-GNU_PROPERTY (FEATURE_1_AND, BTI_FLAG|PAC_FLAG)
+# if (BTI_FLAG|PAC_FLAG|GCS_FLAG) != 0
+GNU_PROPERTY (FEATURE_1_AND, BTI_FLAG|PAC_FLAG|GCS_FLAG)
 # endif
 #endif
 
@@ -106,3 +116,5 @@ GNU_PROPERTY (FEATURE_1_AND, BTI_FLAG|PAC_FLAG)
 #define END(name) \
   .cfi_endproc;\
   SYMBOL_SIZE(name)
+
+#endif
diff --git a/libgcc/config/aarch64/lse.S b/libgcc/config/aarch64/lse.S
index ecef47086c69..0c6c1b510663 100644
--- a/libgcc/config/aarch64/lse.S
+++ b/libgcc/config/aarch64/lse.S
@@ -85,6 +85,8 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If 
not, see
 # error
 #endif
 
+#undef L
+
 #if MODEL == 1
 # define SUFF  _relax
 # define A


[gcc(refs/vendors/ARM/heads/gcs-v2)] aarch64: Add -mbranch-protection=gcs option

2024-10-18 Thread Andre Simoes Dias Vieira via Gcc-cvs
https://gcc.gnu.org/g:6d65c23b8aa617b8a52042d99aa0337299769858

commit 6d65c23b8aa617b8a52042d99aa0337299769858
Author: Szabolcs Nagy 
Date:   Mon Jun 19 12:57:56 2023 +0100

aarch64: Add -mbranch-protection=gcs option

This enables Guarded Control Stack (GCS) compatible code generation.

The "standard" branch-protection type enables it, and the default
depends on the compiler default.

gcc/ChangeLog:

* config/aarch64/aarch64-protos.h (aarch_gcs_enabled): Declare.
* config/aarch64/aarch64.cc (aarch_gcs_enabled): Define.
(aarch_handle_no_branch_protection): Handle gcs.
(aarch_handle_standard_branch_protection): Handle gcs.
(aarch_handle_gcs_protection): New.
* config/aarch64/aarch64.opt: Add aarch_enable_gcs.
* configure: Regenerate.
* configure.ac: Handle gcs in --enable-standard-branch-protection.
* doc/invoke.texi: Document -mbranch-protection=gcs.

Diff:
---
 gcc/config/aarch64/aarch64-protos.h |  2 ++
 gcc/config/aarch64/aarch64.cc   | 24 
 gcc/config/aarch64/aarch64.opt  |  3 +++
 gcc/configure   |  2 +-
 gcc/configure.ac|  2 +-
 gcc/doc/invoke.texi |  5 +++--
 6 files changed, 34 insertions(+), 4 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-protos.h 
b/gcc/config/aarch64/aarch64-protos.h
index d03c1fe798b2..b8ec8a58c4e7 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -1125,4 +1125,6 @@ extern void aarch64_adjust_reg_alloc_order ();
 bool aarch64_optimize_mode_switching (aarch64_mode_entity);
 void aarch64_restore_za (rtx);
 
+extern bool aarch64_gcs_enabled ();
+
 #endif /* GCC_AARCH64_PROTOS_H */
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 102680a0efca..d59b276500d5 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -8502,6 +8502,13 @@ aarch_bti_j_insn_p (rtx_insn *insn)
   return GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_BTI_J;
 }
 
+/* Return TRUE if Guarded Control Stack is enabled.  */
+bool
+aarch64_gcs_enabled (void)
+{
+  return (aarch64_enable_gcs == 1);
+}
+
 /* Check if X (or any sub-rtx of X) is a PACIASP/PACIBSP instruction.  */
 bool
 aarch_pac_insn_p (rtx x)
@@ -18881,6 +1,7 @@ aarch64_handle_no_branch_protection (void)
 {
   aarch_ra_sign_scope = AARCH_FUNCTION_NONE;
   aarch_enable_bti = 0;
+  aarch64_enable_gcs = 0;
 }
 
 static void
@@ -18889,6 +18897,7 @@ aarch64_handle_standard_branch_protection (void)
   aarch_ra_sign_scope = AARCH_FUNCTION_NON_LEAF;
   aarch64_ra_sign_key = AARCH64_KEY_A;
   aarch_enable_bti = 1;
+  aarch64_enable_gcs = 1;
 }
 
 static void
@@ -18915,6 +18924,11 @@ aarch64_handle_bti_protection (void)
 {
   aarch_enable_bti = 1;
 }
+static void
+aarch64_handle_gcs_protection (void)
+{
+  aarch64_enable_gcs = 1;
+}
 
 static const struct aarch_branch_protect_type aarch64_pac_ret_subtypes[] = {
   { "leaf", false, aarch64_handle_pac_ret_leaf, NULL, 0 },
@@ -18929,6 +18943,7 @@ static const struct aarch_branch_protect_type 
aarch64_branch_protect_types[] =
   { "pac-ret", false, aarch64_handle_pac_ret_protection,
 aarch64_pac_ret_subtypes, ARRAY_SIZE (aarch64_pac_ret_subtypes) },
   { "bti", false, aarch64_handle_bti_protection, NULL, 0 },
+  { "gcs", false, aarch64_handle_gcs_protection, NULL, 0 },
   { NULL, false, NULL, NULL, 0 }
 };
 
@@ -19028,6 +19043,15 @@ aarch64_override_options (void)
 #endif
 }
 
+  if (aarch64_enable_gcs == 2)
+{
+#ifdef TARGET_ENABLE_GCS
+  aarch64_enable_gcs = 1;
+#else
+  aarch64_enable_gcs = 0;
+#endif
+}
+
   /* Return address signing is currently not supported for ILP32 targets.  For
  LP64 targets use the configured option in the absence of a command-line
  option for -mbranch-protection.  */
diff --git a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt
index c2c9965b0625..36bc719b822d 100644
--- a/gcc/config/aarch64/aarch64.opt
+++ b/gcc/config/aarch64/aarch64.opt
@@ -45,6 +45,9 @@ uint64_t aarch64_isa_flags_1 = 0
 TargetVariable
 unsigned aarch_enable_bti = 2
 
+TargetVariable
+unsigned aarch64_enable_gcs = 2
+
 TargetVariable
 enum aarch64_key_type aarch64_ra_sign_key = AARCH64_KEY_A
 
diff --git a/gcc/configure b/gcc/configure
index 5acc42c1e4d9..8ed47b4dadbe 100755
--- a/gcc/configure
+++ b/gcc/configure
@@ -28044,7 +28044,7 @@ if test "${enable_standard_branch_protection+set}" = 
set; then :
   enableval=$enable_standard_branch_protection;
 case $enableval in
   yes)
-tm_defines="${tm_defines} TARGET_ENABLE_BTI=1 
TARGET_ENABLE_PAC_RET=1"
+tm_defines="${tm_defines} TARGET_ENABLE_BTI=1 
TARGET_ENABLE_PAC_RET=1 TARGET_ENABLE_GCS=1"
 ;;
   no)
 ;;
diff --git a/gcc/configure.ac b/gcc/configure.ac
index 23f4884eff9e..8a5fed516b37 100644
-

[gcc(refs/vendors/ARM/heads/gcs-v2)] aarch64: Add test for GCS ACLE defs

2024-10-18 Thread Andre Simoes Dias Vieira via Gcc-cvs
https://gcc.gnu.org/g:b2371fe0346d5eda4e07c26654a6558c275204fc

commit b2371fe0346d5eda4e07c26654a6558c275204fc
Author: Szabolcs Nagy 
Date:   Wed Jun 7 16:17:53 2023 +0100

aarch64: Add test for GCS ACLE defs

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/pragma_cpp_predefs_1.c: GCS test.

Diff:
---
 .../gcc.target/aarch64/pragma_cpp_predefs_1.c  | 30 ++
 1 file changed, 30 insertions(+)

diff --git a/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_1.c 
b/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_1.c
index 307fa3d67da9..6122cd55d662 100644
--- a/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_1.c
@@ -268,6 +268,36 @@
 #error "__ARM_FEATURE_RCPC is not defined but should be!"
 #endif
 
+#pragma GCC target ("arch=armv8.8-a+gcs")
+#ifndef __ARM_FEATURE_GCS
+#error "__ARM_FEATURE_GCS is not defined but should be!"
+#endif
+
+#pragma GCC target ("arch=armv8.8-a+nogcs")
+#ifdef __ARM_FEATURE_GCS
+#error "__ARM_FEATURE_GCS is defined but should not be!"
+#endif
+
+#pragma GCC target ("arch=armv8.8-a")
+#ifdef __ARM_FEATURE_GCS
+#error "__ARM_FEATURE_GCS is defined but should not be!"
+#endif
+
+#pragma GCC target ("branch-protection=gcs")
+#ifndef __ARM_FEATURE_GCS_DEFAULT
+#error "__ARM_FEATURE_GCS_DEFAULT is not defined but should be!"
+#endif
+
+#pragma GCC target ("branch-protection=none")
+#ifdef __ARM_FEATURE_GCS_DEFAULT
+#error "__ARM_FEATURE_GCS_DEFAULT is defined but should not be!"
+#endif
+
+#pragma GCC target ("branch-protection=standard")
+#ifndef __ARM_FEATURE_GCS_DEFAULT
+#error "__ARM_FEATURE_GCS_DEFAULT is not defined but should be!"
+#endif
+
 int
 foo (int a)
 {


[gcc(refs/vendors/ARM/heads/gcs-v2)] aarch64: Introduce indirect_return attribute

2024-10-18 Thread Andre Simoes Dias Vieira via Gcc-cvs
https://gcc.gnu.org/g:e55649e1e985a5d7b278687276427604b8089b9b

commit e55649e1e985a5d7b278687276427604b8089b9b
Author: Szabolcs Nagy 
Date:   Thu Dec 28 13:37:38 2023 +

aarch64: Introduce indirect_return attribute

Tail calls of indirect_return functions from non-indirect_return
functions are disallowed even if BTI is disabled, since the call
site may have BTI enabled.

Following x86, mismatching attribute on function pointers is not
a type error even though this can lead to bugs.

Needed for swapcontext within the same function when GCS is enabled.

gcc/ChangeLog:

* config/aarch64/aarch64.cc (aarch64_gnu_attributes): Add
indirect_return.
(aarch64_function_ok_for_sibcall): Disallow tail calls if caller
is non-indirect_return but callee is indirect_return.
(aarch64_comp_type_attributes): Check indirect_return attribute.
* config/arm/aarch-bti-insert.cc (call_needs_bti_j): New.
(rest_of_insert_bti): Use call_needs_bti_j.

Diff:
---
 gcc/config/aarch64/aarch64.cc  | 11 +++
 gcc/config/arm/aarch-bti-insert.cc | 36 
 2 files changed, 43 insertions(+), 4 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index c40e5778c98e..914f2902d253 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -853,6 +853,7 @@ static const attribute_spec aarch64_gnu_attributes[] =
affects_type_identity, handler, exclude } */
   { "aarch64_vector_pcs", 0, 0, false, true,  true,  true,
  handle_aarch64_vector_pcs_attribute, NULL },
+  { "indirect_return",0, 0, false, true, true, false, NULL, NULL },
   { "arm_sve_vector_bits", 1, 1, false, true,  false, true,
  aarch64_sve::handle_arm_sve_vector_bits_attribute,
  NULL },
@@ -6429,6 +6430,14 @@ aarch64_function_ok_for_sibcall (tree, tree exp)
 if (bool (aarch64_cfun_shared_flags (state))
!= bool (aarch64_fntype_shared_flags (fntype, state)))
   return false;
+
+  /* BTI J is needed where indirect_return functions may return
+ if bti is enabled there.  */
+  if (lookup_attribute ("indirect_return", TYPE_ATTRIBUTES (fntype))
+  && !lookup_attribute ("indirect_return",
+   TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl
+return false;
+
   return true;
 }
 
@@ -29085,6 +29094,8 @@ aarch64_comp_type_attributes (const_tree type1, 
const_tree type2)
 
   if (!check_attr ("gnu", "aarch64_vector_pcs"))
 return 0;
+  if (!check_attr ("gnu", "indirect_return"))
+return 0;
   if (!check_attr ("gnu", "Advanced SIMD type"))
 return 0;
   if (!check_attr ("gnu", "SVE type"))
diff --git a/gcc/config/arm/aarch-bti-insert.cc 
b/gcc/config/arm/aarch-bti-insert.cc
index 14d36971cd40..403afff91209 100644
--- a/gcc/config/arm/aarch-bti-insert.cc
+++ b/gcc/config/arm/aarch-bti-insert.cc
@@ -92,6 +92,35 @@ const pass_data pass_data_insert_bti =
   0, /* todo_flags_finish.  */
 };
 
+/* Decide if BTI J is needed after a call instruction.  */
+static bool
+call_needs_bti_j (rtx_insn *insn)
+{
+  /* Call returns twice, one of which may be indirect.  */
+  if (find_reg_note (insn, REG_SETJMP, NULL))
+return true;
+
+  /* Tail call does not return.  */
+  if (SIBLING_CALL_P (insn))
+return false;
+
+  /* Check if the function is marked to return indirectly.  */
+  rtx call = get_call_rtx_from (insn);
+  rtx fnaddr = XEXP (call, 0);
+  tree fndecl = NULL_TREE;
+  if (GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
+fndecl = SYMBOL_REF_DECL (XEXP (fnaddr, 0));
+  if (fndecl == NULL_TREE)
+fndecl = MEM_EXPR (fnaddr);
+  if (!fndecl)
+return false;
+  if (TREE_CODE (TREE_TYPE (fndecl)) != FUNCTION_TYPE
+  && TREE_CODE (TREE_TYPE (fndecl)) != METHOD_TYPE)
+return false;
+  tree fntype = TREE_TYPE (fndecl);
+  return lookup_attribute ("indirect_return", TYPE_ATTRIBUTES (fntype));
+}
+
 /* Insert the BTI instruction.  */
 /* This is implemented as a late RTL pass that runs before branch
shortening and does the following.  */
@@ -147,10 +176,9 @@ rest_of_insert_bti (void)
}
}
 
- /* Also look for calls to setjmp () which would be marked with
-REG_SETJMP note and put a BTI J after.  This is where longjump ()
-will return.  */
- if (CALL_P (insn) && (find_reg_note (insn, REG_SETJMP, NULL)))
+ /* Also look for calls that may return indirectly, such as setjmp,
+and put a BTI J after them.  */
+ if (CALL_P (insn) && call_needs_bti_j (insn))
{
  bti_insn = aarch_gen_bti_j ();
  emit_insn_after (bti_insn, insn);


[gcc r15-4462] middle-end: Fix VEC_PERM_EXPR lowering since relaxation of vector sizes

2024-10-18 Thread Tamar Christina via Gcc-cvs
https://gcc.gnu.org/g:55f898008ec8235897cf56c89f5599c3ec1bc963

commit r15-4462-g55f898008ec8235897cf56c89f5599c3ec1bc963
Author: Tamar Christina 
Date:   Fri Oct 18 10:36:19 2024 +0100

middle-end: Fix VEC_PERM_EXPR lowering since relaxation of vector sizes

In GCC 14 VEC_PERM_EXPR was relaxed to be able to permute to a 2x larger 
vector
than the size of the input vectors.  However various passes and 
transformations
were not updated to account for this.

I have patches in these area that I will be upstreaming with individual 
patches
that expose them.

This one is that vectlower tries to lower based on the size of the input 
vectors
rather than the size of the output.  As a consequence it creates an invalid
vector of half the size.

Luckily we ICE because the resulting nunits doesn't match the vector size.

gcc/ChangeLog:

* tree-vect-generic.cc (lower_vec_perm): Use output vector size 
instead
of input vector when determining output nunits.

gcc/testsuite/ChangeLog:

* gcc.dg/vec-perm-lower.c: New test.

Diff:
---
 gcc/testsuite/gcc.dg/vec-perm-lower.c | 16 
 gcc/tree-vect-generic.cc  |  7 ---
 2 files changed, 20 insertions(+), 3 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/vec-perm-lower.c 
b/gcc/testsuite/gcc.dg/vec-perm-lower.c
new file mode 100644
index ..da738fbeed80
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vec-perm-lower.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-fgimple -O2" } */
+
+typedef char v8qi __attribute__ ((vector_size (8)));
+typedef char v16qi __attribute__ ((vector_size (16)));
+
+v16qi __GIMPLE (ssa)
+foo (v8qi a, v8qi b)
+{
+  v16qi _5;
+
+  __BB(2):
+  _5 = __VEC_PERM (a, b, _Literal (unsigned char [[gnu::vector_size(16)]]) { 
_Literal (unsigned char) 0, _Literal (unsigned char) 16, _Literal (unsigned 
char) 1, _Literal (unsigned char) 17, _Literal (unsigned char) 2, _Literal 
(unsigned char) 18, _Literal (unsigned char) 3, _Literal (unsigned char) 19, 
_Literal (unsigned char) 4, _Literal (unsigned char) 20, _Literal (unsigned 
char) 5, _Literal (unsigned char) 21, _Literal (unsigned char) 6, _Literal 
(unsigned char) 22, _Literal (unsigned char) 7, _Literal (unsigned char) 23 });
+  return _5;
+
+}
diff --git a/gcc/tree-vect-generic.cc b/gcc/tree-vect-generic.cc
index 3041fb8fcf23..f86f7eabb255 100644
--- a/gcc/tree-vect-generic.cc
+++ b/gcc/tree-vect-generic.cc
@@ -1500,6 +1500,7 @@ lower_vec_perm (gimple_stmt_iterator *gsi)
   tree mask = gimple_assign_rhs3 (stmt);
   tree vec0 = gimple_assign_rhs1 (stmt);
   tree vec1 = gimple_assign_rhs2 (stmt);
+  tree res_vect_type = TREE_TYPE (gimple_assign_lhs (stmt));
   tree vect_type = TREE_TYPE (vec0);
   tree mask_type = TREE_TYPE (mask);
   tree vect_elt_type = TREE_TYPE (vect_type);
@@ -1512,7 +1513,7 @@ lower_vec_perm (gimple_stmt_iterator *gsi)
   location_t loc = gimple_location (gsi_stmt (*gsi));
   unsigned i;
 
-  if (!TYPE_VECTOR_SUBPARTS (vect_type).is_constant (&elements))
+  if (!TYPE_VECTOR_SUBPARTS (res_vect_type).is_constant (&elements))
 return;
 
   if (TREE_CODE (mask) == SSA_NAME)
@@ -1672,9 +1673,9 @@ lower_vec_perm (gimple_stmt_iterator *gsi)
 }
 
   if (constant_p)
-constr = build_vector_from_ctor (vect_type, v);
+constr = build_vector_from_ctor (res_vect_type, v);
   else
-constr = build_constructor (vect_type, v);
+constr = build_constructor (res_vect_type, v);
   gimple_assign_set_rhs_from_tree (gsi, constr);
   update_stmt (gsi_stmt (*gsi));
 }


[gcc(refs/vendors/ARM/heads/gcs-v2)] aarch64: Add GCS support to the unwinder

2024-10-18 Thread Andre Simoes Dias Vieira via Gcc-cvs
https://gcc.gnu.org/g:cd55b11dfd28cf2ec1d942a4db9102b3c81f07f9

commit cd55b11dfd28cf2ec1d942a4db9102b3c81f07f9
Author: Szabolcs Nagy 
Date:   Wed Apr 19 14:01:36 2023 +0100

aarch64: Add GCS support to the unwinder

Follows the current linux ABI that uses single signal entry token
and shared shadow stack between thread and alt stack.
Could be behind __ARM_FEATURE_GCS_DEFAULT ifdef (only do anything
special with gcs compat codegen) but there is a runtime check anyway.

Change affected tests to be compatible with -mbranch-protection=standard

gcc/testsuite/ChangeLog:

* g++.target/aarch64/pr94515-1.C (f1_no_pac_ret): Update.
(main): Update.
Co-authored-by: Matthieu Longo 

* gcc.target/aarch64/pr104689.c (unwind): Update.
Co-authored-by: Matthieu Longo 

libgcc/ChangeLog:

* config/aarch64/aarch64-unwind.h (_Unwind_Frames_Extra): Update.
(_Unwind_Frames_Increment): Define.

Co-authored-by: Matthieu Longo 

Diff:
---
 gcc/testsuite/g++.target/aarch64/pr94515-1.C |  6 +--
 gcc/testsuite/gcc.target/aarch64/pr104689.c  |  3 +-
 libgcc/config/aarch64/aarch64-unwind.h   | 59 +++-
 3 files changed, 63 insertions(+), 5 deletions(-)

diff --git a/gcc/testsuite/g++.target/aarch64/pr94515-1.C 
b/gcc/testsuite/g++.target/aarch64/pr94515-1.C
index 359039e17536..8175ea50c321 100644
--- a/gcc/testsuite/g++.target/aarch64/pr94515-1.C
+++ b/gcc/testsuite/g++.target/aarch64/pr94515-1.C
@@ -5,7 +5,7 @@
 
 volatile int zero = 0;
 
-__attribute__((noinline, target("branch-protection=none")))
+__attribute__((noinline, target("branch-protection=bti")))
 void unwind (void)
 {
   if (zero == 0)
@@ -22,7 +22,7 @@ int test (int z)
 // autiasp -> cfi_negate_ra_state: RA_signing_SP -> RA_no_signing
 return 1;
   } else {
-// 2nd cfi_negate_ra_state because the CFI directives are processed 
linearily.
+// 2nd cfi_negate_ra_state because the CFI directives are processed 
linearly.
 // At this point, the unwinder would believe that the address is not signed
 // due to the previous return. That's why the compiler has to emit second
 // cfi_negate_ra_state to mean that the return address is still signed.
@@ -33,7 +33,7 @@ int test (int z)
   }
 }
 
-__attribute__((target("branch-protection=none")))
+__attribute__((target("branch-protection=bti")))
 int main ()
 {
   try {
diff --git a/gcc/testsuite/gcc.target/aarch64/pr104689.c 
b/gcc/testsuite/gcc.target/aarch64/pr104689.c
index 3b7adbdfe7d6..9688ecc85f99 100644
--- a/gcc/testsuite/gcc.target/aarch64/pr104689.c
+++ b/gcc/testsuite/gcc.target/aarch64/pr104689.c
@@ -98,6 +98,7 @@ asm(""
 "unusual_no_pac_ret:\n"
 "  .cfi_startproc\n"
 "  " SET_RA_STATE_0 "\n"
+"  bti c\n"
 "  stp x29, x30, [sp, -16]!\n"
 "  .cfi_def_cfa_offset 16\n"
 "  .cfi_offset 29, -16\n"
@@ -121,7 +122,7 @@ static void f2_pac_ret (void)
   die ();
 }
 
-__attribute__((target("branch-protection=none")))
+__attribute__((target("branch-protection=bti")))
 static void f1_no_pac_ret (void)
 {
   unusual_pac_ret (f2_pac_ret);
diff --git a/libgcc/config/aarch64/aarch64-unwind.h 
b/libgcc/config/aarch64/aarch64-unwind.h
index 4d36f0b26f70..cf4ec749c055 100644
--- a/libgcc/config/aarch64/aarch64-unwind.h
+++ b/libgcc/config/aarch64/aarch64-unwind.h
@@ -178,6 +178,9 @@ aarch64_demangle_return_addr (struct _Unwind_Context 
*context,
   return addr;
 }
 
+/* GCS enable flag for chkfeat instruction.  */
+#define CHKFEAT_GCS 1
+
 /* SME runtime function local to libgcc, streaming compatible
and preserves more registers than the base PCS requires, but
we don't rely on that here.  */
@@ -185,12 +188,66 @@ __attribute__ ((visibility ("hidden")))
 void __libgcc_arm_za_disable (void);
 
 /* Disable the SME ZA state in case an unwound frame used the ZA
-   lazy saving scheme.  */
+   lazy saving scheme. And unwind the GCS for EH.  */
 #undef _Unwind_Frames_Extra
 #define _Unwind_Frames_Extra(x)\
   do   \
 {  \
   __libgcc_arm_za_disable ();  \
+  if (__builtin_aarch64_chkfeat (CHKFEAT_GCS) == 0)\
+   {   \
+ for (_Unwind_Word n = (x); n != 0; n--)   \
+   __builtin_aarch64_gcspopm ();   \
+   }   \
+}  \
+  while (0)
+
+/* On signal entry the OS places a token on the GCS that can be used to
+   verify the integrity of the GCS pointer on signal return.  It also
+   places the signal handler return address (the restorer that calls the
+   signal return syscall) on the GCS so the handler can return.
+   Because of this token, each stack frame

[gcc r14-10802] RISC-V:Bugfix for C++ code compilation failure with rv32imafc_zve32f[pr116883]

2024-10-18 Thread Li Xu via Gcc-cvs
https://gcc.gnu.org/g:ab465ce3a948cf57a315ea5b0c71780def0c8425

commit r14-10802-gab465ce3a948cf57a315ea5b0c71780def0c8425
Author: Li Xu 
Date:   Thu Oct 10 08:51:19 2024 -0600

RISC-V:Bugfix for C++ code compilation failure with 
rv32imafc_zve32f[pr116883]

From: xuli 

Example as follows:

int main()
{
  unsigned long arraya[128], arrayb[128], arrayc[128];
  for (int i = 0; i < 128; i++)
   {
  arraya[i] = arrayb[i] + arrayc[i];
   }
  return 0;
}

Compiled with -march=rv32imafc_zve32f -mabi=ilp32f, it will cause a 
compilation issue:

riscv_vector.h:40:25: error: ambiguating new declaration of 'vint64m4_t 
__riscv_vle64(vbool16_t, const long long int*, unsigned int)'
   40 | #pragma riscv intrinsic "vector"
  | ^~~~
riscv_vector.h:40:25: note: old declaration 'vint64m1_t 
__riscv_vle64(vbool64_t, const long long int*, unsigned int)'

With zvl=32b, vbool16_t is registered in init_builtins() with
type_common.precision=0x101 (nunits=2), mode_nunits[E_RVVMF16BI]=[2,2].

Normally, vbool64_t is only valid when TARGET_MIN_VLEN > 32, so vbool64_t
is not registered in init_builtins(), meaning vbool64_t=null.

In order to implement __attribute__((target("arch=+v"))), we must register
all vector types and all RVV intrinsics. Therefore, vbool64_t will be 
registered
by default with zvl=128b in reinit_builtins(), resulting in
type_common.precision=0x101 (nunits=2) and mode_nunits[E_RVVMF64BI]=[2,2].

We then get TYPE_VECTOR_SUBPARTS(vbool16_t) == 
TYPE_VECTOR_SUBPARTS(vbool64_t),
calculated using type_common.precision, resulting in 2. Since vbool16_t and
vbool64_t have the same element type (boolean_type), the compiler treats 
them
as the same type, leading to a re-declaration conflict.

After all types and intrinsics have been registered, processing
__attribute__((target("arch=+v"))) will update the parameters option and
init_adjust_machine_modes. Therefore, to avoid conflicts, we can choose
zvl=4096b for the null type reinit_builtins().

command option zvl=32b
  type nunits
  vbool64_t => null
  vbool32_t=> [1,1]
  vbool16_t=> [2,2]
  vbool8_t=>  [4,4]
  vbool4_t=>  [8,8]
  vbool2_t=>  [16,16]
  vbool1_t=>  [32,32]

reinit zvl=128b
  vbool64_t => [2,2] conflict with zvl32b vbool16_t=> [2,2]
reinit zvl=256b
  vbool64_t => [4,4] conflict with zvl32b vbool8_t=>  [4,4]
reinit zvl=512b
  vbool64_t => [8,8] conflict with zvl32b vbool4_t=>  [8,8]
reinit zvl=1024b
  vbool64_t => [16,16] conflict with zvl32b vbool2_t=>  [16,16]
reinit zvl=2048b
  vbool64_t => [32,32] conflict with zvl32b vbool1_t=>  [32,32]
reinit zvl=4096b
  vbool64_t => [64,64] zvl=4096b is ok

Signed-off-by: xuli 

PR target/116883

gcc/ChangeLog:

* config/riscv/riscv-c.cc (riscv_pragma_intrinsic_flags_pollute): 
Choose zvl4096b
to initialize null type.

gcc/testsuite/ChangeLog:

* g++.target/riscv/rvv/base/pr116883.C: New test.

(cherry picked from commit fd8e590ff11266598d8f9b3d03d72ba7a6100512)

Diff:
---
 gcc/config/riscv/riscv-c.cc|  7 ++-
 gcc/testsuite/g++.target/riscv/rvv/base/pr116883.C | 15 +++
 2 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/gcc/config/riscv/riscv-c.cc b/gcc/config/riscv/riscv-c.cc
index 71112d9c66d7..c59f408d3a8e 100644
--- a/gcc/config/riscv/riscv-c.cc
+++ b/gcc/config/riscv/riscv-c.cc
@@ -59,7 +59,12 @@ riscv_pragma_intrinsic_flags_pollute (struct 
pragma_intrinsic_flags *flags)
   riscv_zvl_flags = riscv_zvl_flags
 | MASK_ZVL32B
 | MASK_ZVL64B
-| MASK_ZVL128B;
+| MASK_ZVL128B
+| MASK_ZVL256B
+| MASK_ZVL512B
+| MASK_ZVL1024B
+| MASK_ZVL2048B
+| MASK_ZVL4096B;
 
   riscv_vector_elen_flags = riscv_vector_elen_flags
 | MASK_VECTOR_ELEN_32
diff --git a/gcc/testsuite/g++.target/riscv/rvv/base/pr116883.C 
b/gcc/testsuite/g++.target/riscv/rvv/base/pr116883.C
new file mode 100644
index ..15bbec40bdde
--- /dev/null
+++ b/gcc/testsuite/g++.target/riscv/rvv/base/pr116883.C
@@ -0,0 +1,15 @@
+/* Test that we do not have ice when compile */
+/* { dg-do compile } */
+/* { dg-options "-march=rv32imafc_zve32f -mabi=ilp32f" } */
+
+#include 
+
+int main()
+{
+  unsigned long arraya[128], arrayb[128], arrayc[128];
+  for (int i; i < 128; i++)
+   {
+  arraya[i] = arrayb[i] + arrayc[i];
+   }
+  return 0;
+}


[gcc(refs/vendors/ARM/heads/gcs-v2)] aarch64: Add __builtin_aarch64_gcs* tests

2024-10-18 Thread Andre Simoes Dias Vieira via Gcc-cvs
https://gcc.gnu.org/g:340b1f5292836f122b08db5b6cbee99aff9a867d

commit 340b1f5292836f122b08db5b6cbee99aff9a867d
Author: Szabolcs Nagy 
Date:   Tue Jun 6 17:35:51 2023 +0100

aarch64: Add __builtin_aarch64_gcs* tests

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/gcspopm-1.c: New test.
* gcc.target/aarch64/gcspr-1.c: New test.
* gcc.target/aarch64/gcsss-1.c: New test.

Diff:
---
 gcc/testsuite/gcc.target/aarch64/gcspopm-1.c | 69 
 gcc/testsuite/gcc.target/aarch64/gcspr-1.c   | 31 +
 gcc/testsuite/gcc.target/aarch64/gcsss-1.c   | 49 
 3 files changed, 149 insertions(+)

diff --git a/gcc/testsuite/gcc.target/aarch64/gcspopm-1.c 
b/gcc/testsuite/gcc.target/aarch64/gcspopm-1.c
new file mode 100644
index ..6e6add39cf75
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/gcspopm-1.c
@@ -0,0 +1,69 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mbranch-protection=none" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+/*
+**foo1:
+** syslxzr, #3, c7, c7, #1 // gcspopm
+** ret
+*/
+void
+foo1 (void)
+{
+  __builtin_aarch64_gcspopm ();
+}
+
+/*
+**foo2:
+** mov x0, 0
+** syslx0, #3, c7, c7, #1 // gcspopm
+** ret
+*/
+unsigned long long
+foo2 (void)
+{
+  return __builtin_aarch64_gcspopm ();
+}
+
+/*
+**foo3:
+** mov x16, 1
+** (
+** mov x0, 0
+** hint40 // chkfeat x16
+** |
+** hint40 // chkfeat x16
+** mov x0, 0
+** )
+** cbz x16, .*
+** ret
+** mov x0, 0
+** syslx0, #3, c7, c7, #1 // gcspopm
+** ret
+*/
+unsigned long long
+foo3 (void)
+{
+  if (__builtin_aarch64_chkfeat (1) == 0)
+return __builtin_aarch64_gcspopm ();
+  return 0;
+}
+
+/*
+**foo4:
+** syslxzr, #3, c7, c7, #1 // gcspopm
+** mov x0, 0
+** syslx0, #3, c7, c7, #1 // gcspopm
+** syslxzr, #3, c7, c7, #1 // gcspopm
+** ret
+*/
+unsigned long long
+foo4 (void)
+{
+  unsigned long long a = __builtin_aarch64_gcspopm ();
+  unsigned long long b = __builtin_aarch64_gcspopm ();
+  unsigned long long c = __builtin_aarch64_gcspopm ();
+  (void) a;
+  (void) c;
+  return b;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/gcspr-1.c 
b/gcc/testsuite/gcc.target/aarch64/gcspr-1.c
new file mode 100644
index ..0e651979551d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/gcspr-1.c
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mbranch-protection=none" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+/*
+**foo1:
+** mrs x0, s3_3_c2_c5_1 // gcspr_el0
+** ret
+*/
+void *
+foo1 (void)
+{
+  return __builtin_aarch64_gcspr ();
+}
+
+/*
+**foo2:
+** mrs x[0-9]*, s3_3_c2_c5_1 // gcspr_el0
+** syslxzr, #3, c7, c7, #1 // gcspopm
+** mrs x[0-9]*, s3_3_c2_c5_1 // gcspr_el0
+** sub x0, x[0-9]*, x[0-9]*
+** ret
+*/
+long
+foo2 (void)
+{
+  const char *p = __builtin_aarch64_gcspr ();
+  __builtin_aarch64_gcspopm ();
+  const char *q = __builtin_aarch64_gcspr ();
+  return p - q;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/gcsss-1.c 
b/gcc/testsuite/gcc.target/aarch64/gcsss-1.c
new file mode 100644
index ..025c7fee647f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/gcsss-1.c
@@ -0,0 +1,49 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mbranch-protection=none" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+/*
+**foo1:
+** sys #3, c7, c7, #2, x0 // gcsss1
+** mov x[0-9]*, 0
+** syslx[0-9]*, #3, c7, c7, #3 // gcsss2
+** ret
+*/
+void
+foo1 (void *p)
+{
+  __builtin_aarch64_gcsss (p);
+}
+
+/*
+**foo2:
+** sys #3, c7, c7, #2, x0 // gcsss1
+** mov x0, 0
+** syslx0, #3, c7, c7, #3 // gcsss2
+** ret
+*/
+void *
+foo2 (void *p)
+{
+  return __builtin_aarch64_gcsss (p);
+}
+
+/*
+**foo3:
+** mov x16, 1
+** hint40 // chkfeat x16
+** cbnzx16, .*
+** sys #3, c7, c7, #2, x0 // gcsss1
+** mov x0, 0
+** syslx0, #3, c7, c7, #3 // gcsss2
+** ret
+** mov x0, 0
+** ret
+*/
+void *
+foo3 (void *p)
+{
+  if (__builtin_aarch64_chkfeat (1) == 0)
+return __builtin_aarch64_gcsss (p);
+  return 0;
+}


[gcc(refs/vendors/ARM/heads/gcs-v2)] aarch64: Add __builtin_aarch64_chkfeat

2024-10-18 Thread Andre Simoes Dias Vieira via Gcc-cvs
https://gcc.gnu.org/g:f518b0a9d9cc2fac46b67f159b59d3af04a3

commit f518b0a9d9cc2fac46b67f159b59d3af04a3
Author: Szabolcs Nagy 
Date:   Tue May 9 15:24:18 2023 +0100

aarch64: Add __builtin_aarch64_chkfeat

Builtin for chkfeat: the input argument is used to initialize x16 then
execute chkfeat and return the updated x16.

Note: ACLE __chkfeat(x) plans to flip the bits to be more intuitive
(xor the input to output), but for the builtin that seems unnecessary
complication.

gcc/ChangeLog:

* config/aarch64/aarch64-builtins.cc (enum aarch64_builtins):
Define AARCH64_BUILTIN_CHKFEAT.
(aarch64_general_init_builtins): Handle chkfeat.
(aarch64_general_expand_builtin): Handle chkfeat.

Diff:
---
 gcc/config/aarch64/aarch64-builtins.cc | 24 
 1 file changed, 24 insertions(+)

diff --git a/gcc/config/aarch64/aarch64-builtins.cc 
b/gcc/config/aarch64/aarch64-builtins.cc
index 7d737877e0bf..6a573025d137 100644
--- a/gcc/config/aarch64/aarch64-builtins.cc
+++ b/gcc/config/aarch64/aarch64-builtins.cc
@@ -875,6 +875,8 @@ enum aarch64_builtins
   AARCH64_PLDX,
   AARCH64_PLI,
   AARCH64_PLIX,
+  /* Armv8.9-A / Armv9.4-A builtins.  */
+  AARCH64_BUILTIN_CHKFEAT,
   AARCH64_BUILTIN_MAX
 };
 
@@ -2280,6 +2282,18 @@ aarch64_general_init_builtins (void)
   if (!TARGET_ILP32)
 aarch64_init_pauth_hint_builtins ();
 
+  if (TARGET_TME)
+aarch64_init_tme_builtins ();
+
+  if (TARGET_MEMTAG)
+aarch64_init_memtag_builtins ();
+
+  tree ftype_chkfeat
+= build_function_type_list (uint64_type_node, uint64_type_node, NULL);
+  aarch64_builtin_decls[AARCH64_BUILTIN_CHKFEAT]
+= aarch64_general_add_builtin ("__builtin_aarch64_chkfeat", ftype_chkfeat,
+  AARCH64_BUILTIN_CHKFEAT);
+
   if (in_lto_p)
 handle_arm_acle_h ();
 }
@@ -3484,6 +3498,16 @@ aarch64_general_expand_builtin (unsigned int fcode, tree 
exp, rtx target,
 case AARCH64_PLIX:
   aarch64_expand_prefetch_builtin (exp, fcode);
   return target;
+
+case AARCH64_BUILTIN_CHKFEAT:
+  {
+   rtx x16_reg = gen_rtx_REG (DImode, R16_REGNUM);
+   op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
+   emit_move_insn (x16_reg, op0);
+   expand_insn (CODE_FOR_aarch64_chkfeat, 0, 0);
+   emit_move_insn (target, x16_reg);
+   return target;
+  }
 }
 
   if (fcode >= AARCH64_SIMD_BUILTIN_BASE && fcode <= AARCH64_SIMD_BUILTIN_MAX)


[gcc(refs/vendors/ARM/heads/gcs-v2)] aarch64: Add GCS builtins

2024-10-18 Thread Andre Simoes Dias Vieira via Gcc-cvs
https://gcc.gnu.org/g:3e01c17e4dd3af175ba37cf5ed697e62bb9bec99

commit 3e01c17e4dd3af175ba37cf5ed697e62bb9bec99
Author: Szabolcs Nagy 
Date:   Tue May 9 16:21:28 2023 +0100

aarch64: Add GCS builtins

Add new builtins for GCS:

  void *__builtin_aarch64_gcspr (void)
  uint64_t __builtin_aarch64_gcspopm (void)
  void *__builtin_aarch64_gcsss (void *)

The builtins are always enabled, but should be used behind runtime
checks in case the target does not support GCS. They are thin
wrappers around the corresponding instructions.

The GCS pointer is modelled with void * type (normal stores do not
work on GCS memory, but it is writable via the gcsss operation or
via GCSSTR if enabled so not const) and an entry on the GCS is
modelled with uint64_t (since it has fixed size and can be a token
that's not a pointer).

gcc/ChangeLog:

* config/aarch64/aarch64-builtins.cc (enum aarch64_builtins): Add
AARCH64_BUILTIN_GCSPR, AARCH64_BUILTIN_GCSPOPM, 
AARCH64_BUILTIN_GCSSS.
(aarch64_init_gcs_builtins): New.
(aarch64_general_init_builtins): Call aarch64_init_gcs_builtins.
(aarch64_expand_gcs_builtin): New.
(aarch64_general_expand_builtin): Call aarch64_expand_gcs_builtin.

Diff:
---
 gcc/config/aarch64/aarch64-builtins.cc | 70 ++
 1 file changed, 70 insertions(+)

diff --git a/gcc/config/aarch64/aarch64-builtins.cc 
b/gcc/config/aarch64/aarch64-builtins.cc
index 6a573025d137..1bab6579cf72 100644
--- a/gcc/config/aarch64/aarch64-builtins.cc
+++ b/gcc/config/aarch64/aarch64-builtins.cc
@@ -877,6 +877,9 @@ enum aarch64_builtins
   AARCH64_PLIX,
   /* Armv8.9-A / Armv9.4-A builtins.  */
   AARCH64_BUILTIN_CHKFEAT,
+  AARCH64_BUILTIN_GCSPR,
+  AARCH64_BUILTIN_GCSPOPM,
+  AARCH64_BUILTIN_GCSSS,
   AARCH64_BUILTIN_MAX
 };
 
@@ -2241,6 +2244,29 @@ aarch64_init_fpsr_fpcr_builtins (void)
   AARCH64_BUILTIN_SET_FPSR64);
 }
 
+/* Add builtins for Guarded Control Stack instructions.  */
+
+static void
+aarch64_init_gcs_builtins (void)
+{
+  tree ftype;
+
+  ftype = build_function_type_list (ptr_type_node, NULL);
+  aarch64_builtin_decls[AARCH64_BUILTIN_GCSPR]
+= aarch64_general_add_builtin ("__builtin_aarch64_gcspr", ftype,
+  AARCH64_BUILTIN_GCSPR);
+
+  ftype = build_function_type_list (uint64_type_node, NULL);
+  aarch64_builtin_decls[AARCH64_BUILTIN_GCSPOPM]
+= aarch64_general_add_builtin ("__builtin_aarch64_gcspopm", ftype,
+  AARCH64_BUILTIN_GCSPOPM);
+
+  ftype = build_function_type_list (ptr_type_node, ptr_type_node, NULL);
+  aarch64_builtin_decls[AARCH64_BUILTIN_GCSSS]
+= aarch64_general_add_builtin ("__builtin_aarch64_gcsss", ftype,
+  AARCH64_BUILTIN_GCSSS);
+}
+
 /* Initialize all builtins in the AARCH64_BUILTIN_GENERAL group.  */
 
 void
@@ -2294,6 +2320,8 @@ aarch64_general_init_builtins (void)
 = aarch64_general_add_builtin ("__builtin_aarch64_chkfeat", ftype_chkfeat,
   AARCH64_BUILTIN_CHKFEAT);
 
+  aarch64_init_gcs_builtins ();
+
   if (in_lto_p)
 handle_arm_acle_h ();
 }
@@ -3373,6 +3401,43 @@ aarch64_expand_fpsr_fpcr_getter (enum insn_code icode, 
machine_mode mode,
   return op.value;
 }
 
+/* Expand GCS builtin EXP with code FCODE, putting the result
+   int TARGET.  If IGNORE is true the return value is ignored.  */
+
+rtx
+aarch64_expand_gcs_builtin (tree exp, rtx target, int fcode, int ignore)
+{
+  if (fcode == AARCH64_BUILTIN_GCSPR)
+{
+  expand_operand op;
+  create_output_operand (&op, target, DImode);
+  expand_insn (CODE_FOR_aarch64_load_gcspr, 1, &op);
+  return op.value;
+}
+  if (fcode == AARCH64_BUILTIN_GCSPOPM && ignore)
+{
+  expand_insn (CODE_FOR_aarch64_gcspopm_xzr, 0, 0);
+  return target;
+}
+  if (fcode == AARCH64_BUILTIN_GCSPOPM)
+{
+  expand_operand op;
+  create_output_operand (&op, target, Pmode);
+  expand_insn (CODE_FOR_aarch64_gcspopm, 1, &op);
+  return op.value;
+}
+  if (fcode == AARCH64_BUILTIN_GCSSS)
+{
+  expand_operand ops[2];
+  rtx op1 = expand_normal (CALL_EXPR_ARG (exp, 0));
+  create_output_operand (&ops[0], target, Pmode);
+  create_input_operand (&ops[1], op1, Pmode);
+  expand_insn (CODE_FOR_aarch64_gcsss, 2, ops);
+  return ops[0].value;
+}
+  gcc_unreachable ();
+}
+
 /* Expand an expression EXP that calls built-in function FCODE,
with result going to TARGET if that's convenient.  IGNORE is true
if the result of the builtin is ignored.  */
@@ -3508,6 +3573,11 @@ aarch64_general_expand_builtin (unsigned int fcode, tree 
exp, rtx target,
emit_move_insn (target, x16_reg);
return target;
   }
+
+case AARCH64_BUILTIN_GCSPR:
+case AARCH64_BUILTIN_GCSPOPM:
+case AARCH64_BUILTIN_GCSSS:
+ 

[gcc(refs/vendors/ARM/heads/gcs-v2)] aarch64: Add target pragma tests for gcs

2024-10-18 Thread Andre Simoes Dias Vieira via Gcc-cvs
https://gcc.gnu.org/g:81a2021312ff6951ad53e055891c8710137ecf26

commit 81a2021312ff6951ad53e055891c8710137ecf26
Author: Szabolcs Nagy 
Date:   Fri Jun 30 16:50:23 2023 +0100

aarch64: Add target pragma tests for gcs

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/pragma_cpp_predefs_4.c: Add gcs specific
tests.

Diff:
---
 .../gcc.target/aarch64/pragma_cpp_predefs_4.c  | 35 ++
 1 file changed, 35 insertions(+)

diff --git a/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c 
b/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c
index 8e7076307741..417293d4d5ad 100644
--- a/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c
+++ b/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c
@@ -91,6 +91,9 @@
 #if __ARM_FEATURE_PAC_DEFAULT != 1
 #error Foo
 #endif
+#ifndef __ARM_FEATURE_GCS_DEFAULT
+#error Foo
+#endif
 
 #pragma GCC target ("branch-protection=none")
 #ifdef __ARM_FEATURE_BTI_DEFAULT
@@ -99,6 +102,9 @@
 #ifdef __ARM_FEATURE_PAC_DEFAULT
 #error Foo
 #endif
+#ifdef __ARM_FEATURE_GCS_DEFAULT
+#error Foo
+#endif
 
 #pragma GCC push_options
 #pragma GCC target "branch-protection=bti+pac-ret"
@@ -117,6 +123,9 @@
 #ifdef __ARM_FEATURE_PAC_DEFAULT
 #error Foo
 #endif
+#ifdef __ARM_FEATURE_GCS_DEFAULT
+#error Foo
+#endif
 
 #pragma GCC target "branch-protection=pac-ret"
 #ifdef __ARM_FEATURE_BTI_DEFAULT
@@ -133,3 +142,29 @@
 #if __ARM_FEATURE_PAC_DEFAULT != 6
 #error Foo
 #endif
+
+#pragma GCC target "branch-protection=gcs"
+#ifdef __ARM_FEATURE_BTI_DEFAULT
+#error Foo
+#endif
+#ifdef __ARM_FEATURE_PAC_DEFAULT
+#error Foo
+#endif
+#ifndef __ARM_FEATURE_GCS_DEFAULT
+#error Foo
+#endif
+
+#pragma GCC target "arch=armv8.8-a+gcs"
+#ifndef __ARM_FEATURE_GCS
+#error Foo
+#endif
+
+#pragma GCC target "arch=armv8.8-a+nogcs"
+#ifdef __ARM_FEATURE_GCS
+#error Foo
+#endif
+
+#pragma GCC target "arch=armv8.8-a"
+#ifdef __ARM_FEATURE_GCS
+#error Foo
+#endif


[gcc(refs/vendors/ARM/heads/gcs-v2)] aarch64: Add support for chkfeat insn

2024-10-18 Thread Andre Simoes Dias Vieira via Gcc-cvs
https://gcc.gnu.org/g:44b1bb47545865fa53621ad8a59aa898f404c606

commit 44b1bb47545865fa53621ad8a59aa898f404c606
Author: Szabolcs Nagy 
Date:   Tue May 9 15:37:49 2023 +0100

aarch64: Add support for chkfeat insn

This is a hint space instruction to check for enabled HW features and
update the x16 register accordingly.

Use unspec_volatile to prevent reordering it around calls since calls
can enable or disable HW features.

gcc/ChangeLog:

* config/aarch64/aarch64.md (aarch64_chkfeat): New.

Diff:
---
 gcc/config/aarch64/aarch64.md | 9 +
 1 file changed, 9 insertions(+)

diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index c54b29cd64b9..43bed0ce10fd 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -381,6 +381,7 @@
 UNSPECV_BTI_C  ; Represent BTI c.
 UNSPECV_BTI_J  ; Represent BTI j.
 UNSPECV_BTI_JC ; Represent BTI jc.
+UNSPECV_CHKFEAT; Represent CHKFEAT X16.
 UNSPECV_TSTART ; Represent transaction start.
 UNSPECV_TCOMMIT; Represent transaction commit.
 UNSPECV_TCANCEL; Represent transaction cancel.
@@ -8312,6 +8313,14 @@
   "msr\tnzcv, %0"
 )
 
+;; CHKFEAT instruction
+(define_insn "aarch64_chkfeat"
+  [(set (reg:DI R16_REGNUM)
+(unspec_volatile:DI [(reg:DI R16_REGNUM)] UNSPECV_CHKFEAT))]
+  ""
+  "hint\\t40 // chkfeat x16"
+)
+
 ;; AdvSIMD Stuff
 (include "aarch64-simd.md")


[gcc(refs/vendors/ARM/heads/gcs-v2)] aarch64: Add non-local goto and jump tests for GCS

2024-10-18 Thread Andre Simoes Dias Vieira via Gcc-cvs
https://gcc.gnu.org/g:81b86dc8c2192f53a0ad03994645603488fe8393

commit 81b86dc8c2192f53a0ad03994645603488fe8393
Author: Szabolcs Nagy 
Date:   Wed Jun 7 10:58:06 2023 +0100

aarch64: Add non-local goto and jump tests for GCS

These are scan asm tests only, relying on existing execution tests
for runtime coverage.

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/gcs-nonlocal-1.c: New test.
* gcc.target/aarch64/gcs-nonlocal-2.c: New test.

Diff:
---
 gcc/testsuite/gcc.target/aarch64/gcs-nonlocal-1.c | 25 +++
 gcc/testsuite/gcc.target/aarch64/gcs-nonlocal-2.c | 21 +++
 2 files changed, 46 insertions(+)

diff --git a/gcc/testsuite/gcc.target/aarch64/gcs-nonlocal-1.c 
b/gcc/testsuite/gcc.target/aarch64/gcs-nonlocal-1.c
new file mode 100644
index ..821fab816f9a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/gcs-nonlocal-1.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mbranch-protection=gcs" } */
+/* { dg-final { scan-assembler-times "hint\\t40 // chkfeat x16" 2 } } */
+/* { dg-final { scan-assembler-times "mrs\\tx\[0-9\]+, s3_3_c2_c5_1 // 
gcspr_el0" 2 } } */
+/* { dg-final { scan-assembler-times "sysl\\txzr, #3, c7, c7, #1 // gcspopm" 1 
} } */
+
+int bar1 (int);
+int bar2 (int);
+
+void foo (int cmd)
+{
+  __label__ start;
+  int x = 0;
+
+  void nonlocal_goto (void)
+  {
+x++;
+goto start;
+  }
+
+start:
+  while (bar1 (x))
+if (bar2 (x))
+  nonlocal_goto ();
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/gcs-nonlocal-2.c 
b/gcc/testsuite/gcc.target/aarch64/gcs-nonlocal-2.c
new file mode 100644
index ..63dbce36e1ed
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/gcs-nonlocal-2.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mbranch-protection=gcs" } */
+/* { dg-final { scan-assembler-times "hint\\t40 // chkfeat x16" 2 } } */
+/* { dg-final { scan-assembler-times "mrs\\tx\[0-9\]+, s3_3_c2_c5_1 // 
gcspr_el0" 2 } } */
+/* { dg-final { scan-assembler-times "sysl\\txzr, #3, c7, c7, #1 // gcspopm" 1 
} } */
+
+void longj (void *buf)
+{
+  __builtin_longjmp (buf, 1);
+}
+
+void foo (void);
+void bar (void);
+
+void setj (void *buf)
+{
+  if (__builtin_setjmp (buf))
+foo ();
+  else
+bar ();
+}


[gcc(refs/vendors/ARM/heads/gcs-v2)] aarch64: Add __builtin_aarch64_chkfeat tests

2024-10-18 Thread Andre Simoes Dias Vieira via Gcc-cvs
https://gcc.gnu.org/g:2f17823f549fa5aa42061979d8ec69909f2443e9

commit 2f17823f549fa5aa42061979d8ec69909f2443e9
Author: Szabolcs Nagy 
Date:   Fri Jun 2 16:15:25 2023 +0100

aarch64: Add __builtin_aarch64_chkfeat tests

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/chkfeat-1.c: New test.
* gcc.target/aarch64/chkfeat-2.c: New test.

Diff:
---
 gcc/testsuite/gcc.target/aarch64/chkfeat-1.c | 75 
 gcc/testsuite/gcc.target/aarch64/chkfeat-2.c | 15 ++
 2 files changed, 90 insertions(+)

diff --git a/gcc/testsuite/gcc.target/aarch64/chkfeat-1.c 
b/gcc/testsuite/gcc.target/aarch64/chkfeat-1.c
new file mode 100644
index ..2fae81e740fa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/chkfeat-1.c
@@ -0,0 +1,75 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mbranch-protection=none" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+/*
+**foo1:
+** mov x16, 1
+** hint40 // chkfeat x16
+** mov x0, x16
+** ret
+*/
+unsigned long long
+foo1 (void)
+{
+  return __builtin_aarch64_chkfeat (1);
+}
+
+/*
+**foo2:
+** mov x16, 1
+** movkx16, 0x5678, lsl 32
+** movkx16, 0x1234, lsl 48
+** hint40 // chkfeat x16
+** mov x0, x16
+** ret
+*/
+unsigned long long
+foo2 (void)
+{
+  return __builtin_aarch64_chkfeat (0x123456780001);
+}
+
+/*
+**foo3:
+** mov x16, x0
+** hint40 // chkfeat x16
+** mov x0, x16
+** ret
+*/
+unsigned long long
+foo3 (unsigned long long x)
+{
+  return __builtin_aarch64_chkfeat (x);
+}
+
+/*
+**foo4:
+** ldr x16, \[x0\]
+** hint40 // chkfeat x16
+** str x16, \[x0\]
+** ret
+*/
+void
+foo4 (unsigned long long *p)
+{
+  *p = __builtin_aarch64_chkfeat (*p);
+}
+
+/*
+**foo5:
+** mov x16, 1
+** hint40 // chkfeat x16
+** cmp x16, 0
+**(
+** cselw0, w1, w0, eq
+**|
+** cselw0, w0, w1, ne
+**)
+** ret
+*/
+int
+foo5 (int x, int y)
+{
+  return __builtin_aarch64_chkfeat (1) ? x : y;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/chkfeat-2.c 
b/gcc/testsuite/gcc.target/aarch64/chkfeat-2.c
new file mode 100644
index ..682524e244fc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/chkfeat-2.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { scan-assembler-times {hint\t40 // chkfeat x16} 2 } } */
+
+void bar (void);
+
+/* Extern call may change enabled HW features.  */
+unsigned long long
+foo (void)
+{
+  unsigned long long a = __builtin_aarch64_chkfeat (1);
+  bar ();
+  unsigned long long b = __builtin_aarch64_chkfeat (1);
+  return a + b;
+}


[gcc(refs/vendors/ARM/heads/gcs-v2)] aarch64: Add branch-protection target pragma tests

2024-10-18 Thread Andre Simoes Dias Vieira via Gcc-cvs
https://gcc.gnu.org/g:b700eeaa295af4db83a107d96c73735857d27ad5

commit b700eeaa295af4db83a107d96c73735857d27ad5
Author: Szabolcs Nagy 
Date:   Fri Jun 30 16:31:23 2023 +0100

aarch64: Add branch-protection target pragma tests

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/pragma_cpp_predefs_4.c: Add branch-protection
tests.

Diff:
---
 .../gcc.target/aarch64/pragma_cpp_predefs_4.c  | 50 ++
 1 file changed, 50 insertions(+)

diff --git a/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c 
b/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c
index 23ebe5e4f508..8e7076307741 100644
--- a/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c
+++ b/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c
@@ -83,3 +83,53 @@
 #ifndef __ARM_FEATURE_SME_F64F64
 #error Foo
 #endif
+
+#pragma GCC target "branch-protection=standard"
+#ifndef __ARM_FEATURE_BTI_DEFAULT
+#error Foo
+#endif
+#if __ARM_FEATURE_PAC_DEFAULT != 1
+#error Foo
+#endif
+
+#pragma GCC target ("branch-protection=none")
+#ifdef __ARM_FEATURE_BTI_DEFAULT
+#error Foo
+#endif
+#ifdef __ARM_FEATURE_PAC_DEFAULT
+#error Foo
+#endif
+
+#pragma GCC push_options
+#pragma GCC target "branch-protection=bti+pac-ret"
+#ifndef __ARM_FEATURE_BTI_DEFAULT
+#error Foo
+#endif
+#pragma GCC pop_options
+#ifdef __ARM_FEATURE_BTI_DEFAULT
+#error Foo
+#endif
+
+#pragma GCC target "branch-protection=bti"
+#ifndef __ARM_FEATURE_BTI_DEFAULT
+#error Foo
+#endif
+#ifdef __ARM_FEATURE_PAC_DEFAULT
+#error Foo
+#endif
+
+#pragma GCC target "branch-protection=pac-ret"
+#ifdef __ARM_FEATURE_BTI_DEFAULT
+#error Foo
+#endif
+#if __ARM_FEATURE_PAC_DEFAULT != 1
+#error Foo
+#endif
+
+#pragma GCC target "branch-protection=pac-ret+leaf+b-key"
+#ifdef __ARM_FEATURE_BTI_DEFAULT
+#error Foo
+#endif
+#if __ARM_FEATURE_PAC_DEFAULT != 6
+#error Foo
+#endif


[gcc(refs/vendors/ARM/heads/gcs-v2)] aarch64: Fix tests incompatible with GCS

2024-10-18 Thread Andre Simoes Dias Vieira via Gcc-cvs
https://gcc.gnu.org/g:d4b1ee557b2e9ee8ac466d841e7349a2f44ada84

commit d4b1ee557b2e9ee8ac466d841e7349a2f44ada84
Author: Matthieu Longo 
Date:   Fri Oct 11 17:09:14 2024 +0100

aarch64: Fix tests incompatible with GCS

gcc/testsuite/ChangeLog:

* g++.target/aarch64/return_address_sign_ab_exception.C: Update.
* gcc.target/aarch64/eh_return.c: Update.

Diff:
---
 .../aarch64/return_address_sign_ab_exception.C| 19 +--
 gcc/testsuite/gcc.target/aarch64/eh_return.c  | 13 +
 2 files changed, 26 insertions(+), 6 deletions(-)

diff --git 
a/gcc/testsuite/g++.target/aarch64/return_address_sign_ab_exception.C 
b/gcc/testsuite/g++.target/aarch64/return_address_sign_ab_exception.C
index ead11de7b15d..6c79ebf03eb1 100644
--- a/gcc/testsuite/g++.target/aarch64/return_address_sign_ab_exception.C
+++ b/gcc/testsuite/g++.target/aarch64/return_address_sign_ab_exception.C
@@ -1,16 +1,28 @@
 /* { dg-do run } */
 /* { dg-options "--save-temps" } */
 /* { dg-require-effective-target arm_v8_3a_bkey_directive } */
+/* { dg-final { check-function-bodies "**" "" } } */
 
+/*
+** _Z5foo_av:
+** hint25 // paciasp
+** ...
+*/
 __attribute__((target("branch-protection=pac-ret+leaf")))
 int foo_a () {
   throw 22;
 }
 
+/*
+** _Z5foo_bv:
+** hint27 // pacibsp
+** ...
+*/
 __attribute__((target("branch-protection=pac-ret+leaf+b-key")))
 int foo_b () {
   throw 22;
 }
+/* { dg-final { scan-assembler-times ".cfi_b_key_frame" 1 } } */
 
 int main (int argc, char** argv) {
   try {
@@ -23,9 +35,4 @@ int main (int argc, char** argv) {
 }
   }
   return 1;
-}
-
-/* { dg-final { scan-assembler-times "paciasp" 1 } } */
-/* { dg-final { scan-assembler-times "pacibsp" 1 } } */
-/* { dg-final { scan-assembler-times ".cfi_b_key_frame" 1 } } */
-
+}
\ No newline at end of file
diff --git a/gcc/testsuite/gcc.target/aarch64/eh_return.c 
b/gcc/testsuite/gcc.target/aarch64/eh_return.c
index 32179488085e..51b20f784b31 100644
--- a/gcc/testsuite/gcc.target/aarch64/eh_return.c
+++ b/gcc/testsuite/gcc.target/aarch64/eh_return.c
@@ -1,6 +1,19 @@
 /* { dg-do run } */
 /* { dg-options "-O2 -fno-inline" } */
 
+/* With BTI enabled, this test would crash with SIGILL, Illegal instruction.
+   The 2nd argument of __builtin_eh_return is expected to be an EH handler
+   within a function, rather than a separate function.
+   The current implementation of __builtin_eh_return in AArch64 backend emits a
+   jump instead of branching with LR.
+   The prologue of the handler (i.e. continuation) starts with "bti c" (vs.
+   "bti jc") which is a landing pad type prohibiting jumps, hence the exception
+   at runtime.
+   The current behavior of __builtin_eh_return is considered correct.
+   Consequently, the default option -mbranch-protection=standard needs to be
+   overridden to remove BTI.  */
+/* { dg-additional-options "-mbranch-protection=pac-ret+leaf+gcs" { target { 
default_branch_protection } } } */
+
 #include 
 #include 


[gcc(refs/vendors/ARM/heads/gcs-v2)] aarch64: libatomic: add GCS marking to asm

2024-10-18 Thread Andre Simoes Dias Vieira via Gcc-cvs
https://gcc.gnu.org/g:2873ad86aa0d2cff138698ed9bb1517f98e02438

commit 2873ad86aa0d2cff138698ed9bb1517f98e02438
Author: Szabolcs Nagy 
Date:   Fri Dec 22 15:11:25 2023 +

aarch64: libatomic: add GCS marking to asm

libatomic/ChangeLog:

* config/linux/aarch64/atomic_16.S (FEATURE_1_GCS): Define.
(GCS_FLAG): Define if GCS is enabled.
(GNU_PROPERTY): Add GCS_FLAG.

Diff:
---
 libatomic/config/linux/aarch64/atomic_16.S | 11 +--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/libatomic/config/linux/aarch64/atomic_16.S 
b/libatomic/config/linux/aarch64/atomic_16.S
index 5767fba5c03c..685db7763821 100644
--- a/libatomic/config/linux/aarch64/atomic_16.S
+++ b/libatomic/config/linux/aarch64/atomic_16.S
@@ -775,6 +775,7 @@ END_FEAT (compare_exchange_16, LSE)
 #define FEATURE_1_AND 0xc000
 #define FEATURE_1_BTI 1
 #define FEATURE_1_PAC 2
+#define FEATURE_1_GCS 4
 
 /* Supported features based on the code generation options.  */
 #if defined(__ARM_FEATURE_BTI_DEFAULT)
@@ -789,6 +790,12 @@ END_FEAT (compare_exchange_16, LSE)
 # define PAC_FLAG 0
 #endif
 
+#if __ARM_FEATURE_GCS_DEFAULT
+# define GCS_FLAG FEATURE_1_GCS
+#else
+# define GCS_FLAG 0
+#endif
+
 /* Add a NT_GNU_PROPERTY_TYPE_0 note.  */
 #define GNU_PROPERTY(type, value)  \
   .section .note.gnu.property, "a"; \
@@ -806,7 +813,7 @@ END_FEAT (compare_exchange_16, LSE)
 .section .note.GNU-stack, "", %progbits
 
 /* Add GNU property note if built with branch protection.  */
-# if (BTI_FLAG|PAC_FLAG) != 0
-GNU_PROPERTY (FEATURE_1_AND, BTI_FLAG|PAC_FLAG)
+# if (BTI_FLAG|PAC_FLAG|GCS_FLAG) != 0
+GNU_PROPERTY (FEATURE_1_AND, BTI_FLAG|PAC_FLAG|GCS_FLAG)
 # endif
 #endif


[gcc(refs/vendors/ARM/heads/gcs-v2)] aarch64: Fix nonlocal goto tests incompatible with GCS

2024-10-18 Thread Andre Simoes Dias Vieira via Gcc-cvs
https://gcc.gnu.org/g:a58c2df81664b63ce9a0fc82911b69a0197302e3

commit a58c2df81664b63ce9a0fc82911b69a0197302e3
Author: Yury Khrustalev 
Date:   Wed Oct 16 10:59:00 2024 +0100

aarch64: Fix nonlocal goto tests incompatible with GCS

gcc/testsuite/ChangeLog:
* gcc.target/aarch64/gcs-nonlocal-3.c: New test.
* gcc.target/aarch64/sme/nonlocal_goto_4.c: Update.
* gcc.target/aarch64/sme/nonlocal_goto_5.c: Update.
* gcc.target/aarch64/sme/nonlocal_goto_6.c: Update.

Diff:
---
 gcc/testsuite/gcc.target/aarch64/gcs-nonlocal-3.c  | 33 ++
 .../gcc.target/aarch64/sme/nonlocal_goto_4.c   |  2 +-
 .../gcc.target/aarch64/sme/nonlocal_goto_5.c   |  2 +-
 .../gcc.target/aarch64/sme/nonlocal_goto_6.c   |  2 +-
 4 files changed, 36 insertions(+), 3 deletions(-)

diff --git a/gcc/testsuite/gcc.target/aarch64/gcs-nonlocal-3.c 
b/gcc/testsuite/gcc.target/aarch64/gcs-nonlocal-3.c
new file mode 100644
index ..8511f66f66ef
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/gcs-nonlocal-3.c
@@ -0,0 +1,33 @@
+/* { dg-options "-O2 -fno-schedule-insns -fno-schedule-insns2 
-mbranch-protection=gcs" } */
+/* { dg-final { check-function-bodies "**" "" "" { target "*-*-*" } 
{\.L[0-9]+\:} } } */
+
+void run(void (*)());
+
+/*
+** bar.0:
+** ...
+** hint40 // chkfeat x16
+** tbnzw16, 0, (\.L[0-9]+)
+** ...
+** mrs x1, s3_3_c2_c5_1 // gcspr_el0
+** subsx1, x3, x1
+** bne (\.L[0-9]+)\n\1\:
+** ...
+** br  x[0-9]+\n\2\:
+** ...
+** syslxzr, #3, c7, c7, #1 // gcspopm
+** ...
+** b   \1
+*/
+int
+foo (int *ptr)
+{
+  __label__ failure;
+
+  void bar () { *ptr += 1; goto failure; }
+  run (bar);
+  return 1;
+
+failure:
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_4.c 
b/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_4.c
index 0446076286ba..aed04bb495c3 100644
--- a/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_4.c
+++ b/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_4.c
@@ -1,4 +1,4 @@
-/* { dg-options "-O2 -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-options "-O2 -fno-schedule-insns -fno-schedule-insns2 
-mbranch-protection=none" } */
 /* { dg-final { check-function-bodies "**" "" } } */
 
 void run(void (*)());
diff --git a/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_5.c 
b/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_5.c
index 4246aec8b2fa..e4a31c5c6000 100644
--- a/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_5.c
+++ b/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_5.c
@@ -1,4 +1,4 @@
-/* { dg-options "-O2 -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-options "-O2 -fno-schedule-insns -fno-schedule-insns2 
-mbranch-protection=none" } */
 /* { dg-final { check-function-bodies "**" "" } } */
 
 void run(void (*)() __arm_streaming);
diff --git a/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_6.c 
b/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_6.c
index 151e2f22dc72..38f6c139f6d4 100644
--- a/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_6.c
+++ b/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_6.c
@@ -1,4 +1,4 @@
-/* { dg-options "-O2 -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-options "-O2 -fno-schedule-insns -fno-schedule-insns2 
-mbranch-protection=none" } */
 /* { dg-final { check-function-bodies "**" "" } } */
 
 void run(void (*)() __arm_streaming_compatible);


[gcc(refs/vendors/ARM/heads/gcs-v2)] aarch64: Add tests and docs for indirect_return attribute

2024-10-18 Thread Andre Simoes Dias Vieira via Gcc-cvs
https://gcc.gnu.org/g:00d04311840f95558dd4b87b7fa8d29dd55d5989

commit 00d04311840f95558dd4b87b7fa8d29dd55d5989
Author: Richard Ball 
Date:   Mon Sep 16 14:50:47 2024 +0100

aarch64: Add tests and docs for indirect_return attribute

This patch adds a new testcase and docs
for the indirect_return attribute.

gcc/ChangeLog:

* doc/extend.texi: Add AArch64 docs for indirect_return
attribute.

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/indirect_return.c: New test.

Diff:
---
 gcc/doc/extend.texi|  5 +
 gcc/testsuite/gcc.target/aarch64/indirect_return.c | 15 +++
 2 files changed, 20 insertions(+)

diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 302c3299ede8..f12af7d2115e 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -4760,6 +4760,11 @@ Enable or disable calls to out-of-line helpers to 
implement atomic operations.
 This corresponds to the behavior of the command-line options
 @option{-moutline-atomics} and @option{-mno-outline-atomics}.
 
+@cindex @code{indirect_return} function attribute, AArch64
+@item indirect_return
+Used to inform the compiler that a function may return via
+an indirect return. Adds a BTI J instruction under 
@option{mbranch-protection=} bti.
+
 @end table
 
 The above target attributes can be specified as follows:
diff --git a/gcc/testsuite/gcc.target/aarch64/indirect_return.c 
b/gcc/testsuite/gcc.target/aarch64/indirect_return.c
new file mode 100644
index ..959d2268687d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/indirect_return.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-mbranch-protection=bti" } */
+
+int __attribute((indirect_return)) foo (int a) 
+{
+  return a;
+}
+
+int
+func1 (int a, int b)
+{
+  return foo (a + b);
+}
+
+/* { dg-final { scan-assembler-times "bti j" 1 } } */
\ No newline at end of file


[gcc(refs/vendors/ARM/heads/gcs-v2)] aarch64: Add ACLE feature macros for GCS

2024-10-18 Thread Andre Simoes Dias Vieira via Gcc-cvs
https://gcc.gnu.org/g:901d5333f16934edb2105a4239ac2a869ce75107

commit 901d5333f16934edb2105a4239ac2a869ce75107
Author: Szabolcs Nagy 
Date:   Tue May 9 17:04:34 2023 +0100

aarch64: Add ACLE feature macros for GCS

gcc/ChangeLog:

* config/aarch64/aarch64-c.cc (aarch64_update_cpp_builtins): Define
macros for GCS.

Diff:
---
 gcc/config/aarch64/aarch64-c.cc | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc
index f9b9e3793755..bdc1c0da5845 100644
--- a/gcc/config/aarch64/aarch64-c.cc
+++ b/gcc/config/aarch64/aarch64-c.cc
@@ -247,6 +247,9 @@ aarch64_update_cpp_builtins (cpp_reader *pfile)
 
   aarch64_def_or_undef (TARGET_PAUTH, "__ARM_FEATURE_PAUTH", pfile);
   aarch64_def_or_undef (TARGET_BTI, "__ARM_FEATURE_BTI", pfile);
+  aarch64_def_or_undef (aarch64_gcs_enabled (),
+   "__ARM_FEATURE_GCS_DEFAULT", pfile);
+  aarch64_def_or_undef (TARGET_GCS, "__ARM_FEATURE_GCS", pfile);
   aarch64_def_or_undef (TARGET_I8MM, "__ARM_FEATURE_MATMUL_INT8", pfile);
   aarch64_def_or_undef (TARGET_BF16_SIMD,
"__ARM_FEATURE_BF16_VECTOR_ARITHMETIC", pfile);


[gcc(refs/vendors/ARM/heads/gcs-v2)] aarch64: Emit GNU property NOTE for GCS

2024-10-18 Thread Andre Simoes Dias Vieira via Gcc-cvs
https://gcc.gnu.org/g:ae33e0ecdba9f987680fdbafcd6217f6f231f5e6

commit ae33e0ecdba9f987680fdbafcd6217f6f231f5e6
Author: Szabolcs Nagy 
Date:   Tue May 9 14:32:46 2023 +0100

aarch64: Emit GNU property NOTE for GCS

gcc/ChangeLog:

* config/aarch64/aarch64.cc (GNU_PROPERTY_AARCH64_FEATURE_1_GCS):
Define.
(aarch64_file_end_indicate_exec_stack): Set GCS property bit.

Diff:
---
 gcc/config/aarch64/aarch64.cc | 5 +
 1 file changed, 5 insertions(+)

diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index d59b276500d5..c40e5778c98e 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -29209,6 +29209,7 @@ aarch64_can_tag_addresses ()
 #define GNU_PROPERTY_AARCH64_FEATURE_1_AND 0xc000
 #define GNU_PROPERTY_AARCH64_FEATURE_1_BTI (1U << 0)
 #define GNU_PROPERTY_AARCH64_FEATURE_1_PAC (1U << 1)
+#define GNU_PROPERTY_AARCH64_FEATURE_1_GCS (1U << 2)
 void
 aarch64_file_end_indicate_exec_stack ()
 {
@@ -29221,6 +29222,9 @@ aarch64_file_end_indicate_exec_stack ()
   if (aarch_ra_sign_scope != AARCH_FUNCTION_NONE)
 feature_1_and |= GNU_PROPERTY_AARCH64_FEATURE_1_PAC;
 
+  if (aarch64_gcs_enabled ())
+feature_1_and |= GNU_PROPERTY_AARCH64_FEATURE_1_GCS;
+
   if (feature_1_and)
 {
   /* Generate .note.gnu.property section.  */
@@ -29252,6 +29256,7 @@ aarch64_file_end_indicate_exec_stack ()
   assemble_align (POINTER_SIZE);
 }
 }
+#undef GNU_PROPERTY_AARCH64_FEATURE_1_GCS
 #undef GNU_PROPERTY_AARCH64_FEATURE_1_PAC
 #undef GNU_PROPERTY_AARCH64_FEATURE_1_BTI
 #undef GNU_PROPERTY_AARCH64_FEATURE_1_AND


[gcc] Created branch 'ARM/heads/gcs-v2' in namespace 'refs/vendors'

2024-10-18 Thread Andre Simoes Dias Vieira via Gcc-cvs
The branch 'ARM/heads/gcs-v2' was created in namespace 'refs/vendors' pointing 
to:

 a58c2df81664... aarch64: Fix nonlocal goto tests incompatible with GCS


[gcc r15-4464] testsuite: Add necessary dejagnu directives to pr115815_0.c

2024-10-18 Thread Martin Jambor via Gcc-cvs
https://gcc.gnu.org/g:1a458bdeb223ffa501bac8e76182115681967094

commit r15-4464-g1a458bdeb223ffa501bac8e76182115681967094
Author: Martin Jambor 
Date:   Fri Oct 18 12:00:12 2024 +0200

testsuite: Add necessary dejagnu directives to pr115815_0.c

I have received an email from the Linaro infrastructure that the test
gcc.dg/lto/pr115815_0.c which I added is failing on arm-eabi and I
realized that not only it is missing dg-require-effective-target
global_constructor but actually any dejagnu directives at all, which
means it is unnecessarily running both at -O0 and -O2 and there is an
unnecesary run test too.  All fixed by this patch.

I have not actually verified that the failure goes away on arm-eabi
but have very high hopes it will.  I have verified that the test still
checks for the bug and also that it passes by running:

  make -k check-gcc RUNTESTFLAGS="lto.exp=*pr115815*"

gcc/testsuite/ChangeLog:

2024-10-14  Martin Jambor  

* gcc.dg/lto/pr115815_0.c: Add dejagu directives.

Diff:
---
 gcc/testsuite/gcc.dg/lto/pr115815_0.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/gcc/testsuite/gcc.dg/lto/pr115815_0.c 
b/gcc/testsuite/gcc.dg/lto/pr115815_0.c
index d938ae4c8025..ade91def55b0 100644
--- a/gcc/testsuite/gcc.dg/lto/pr115815_0.c
+++ b/gcc/testsuite/gcc.dg/lto/pr115815_0.c
@@ -1,3 +1,7 @@
+/* { dg-lto-options {{-O2 -flto}} }  */
+/* { dg-lto-do link } */
+/* { dg-require-effective-target global_constructor } */
+
 int a;
 volatile int v;
 volatile int w;


[gcc r15-4465] MAINTAINERS: Add myself as pair fusion and aarch64 ldp/stp maintainer

2024-10-18 Thread Alex Coplan via Gcc-cvs
https://gcc.gnu.org/g:261d803c40c9fd28c59d8d1771051663f738a871

commit r15-4465-g261d803c40c9fd28c59d8d1771051663f738a871
Author: Alex Coplan 
Date:   Fri Oct 18 11:02:15 2024 +0100

MAINTAINERS: Add myself as pair fusion and aarch64 ldp/stp maintainer

ChangeLog:

* MAINTAINERS (CPU Port Maintainers): Add myself as aarch64 ldp/stp
maintainer.
(Various Maintainers): Add myself as pair fusion maintainer.

Diff:
---
 MAINTAINERS | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 269ac2ea6b49..1074886f4419 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -54,6 +54,7 @@ docs, and the testsuite related to that.
 
 CPU Port Maintainers(CPU alphabetical order)
 
+aarch64 ldp/stp Alex Coplan 
 aarch64 portRichard Earnshaw
 aarch64 portRichard Sandiford   
 aarch64 portMarcus Shawcroft
@@ -251,6 +252,7 @@ AutoFDO Eugene Rozenfeld

 reload  Ulrich Weigand  
 RTL optimizers  Eric Botcazou   
 instruction combinerSegher Boessenkool  
+pair fusion Alex Coplan 
 auto-vectorizer Richard Biener  
 auto-vectorizer Zdenek Dvorak   
 loop infrastructure Zdenek Dvorak   


[gcc(refs/vendors/ARM/heads/gcs-v2)] aarch64: Add GCS instructions

2024-10-18 Thread Andre Simoes Dias Vieira via Gcc-cvs
https://gcc.gnu.org/g:6552d3a4d4503af5fa876e3ca41df604052cc6cd

commit 6552d3a4d4503af5fa876e3ca41df604052cc6cd
Author: Szabolcs Nagy 
Date:   Tue May 9 16:00:01 2023 +0100

aarch64: Add GCS instructions

Add instructions for the Guarded Control Stack extension.

GCSSS1 and GCSSS2 are modelled as a single GCSSS unspec, because they
are always used together in the compiler.

Before GCSPOPM and GCSSS2 an extra "mov xn, 0" is added to clear the
output register, this is needed to get reasonable result when GCS is
disabled, when the instructions are NOPs. Since the instructions are
expected to be used behind runtime feature checks, this is mainly
relevant if GCS can be disabled asynchronously.

The output of GCSPOPM is usually not needed, so a separate gcspopm_xzr
was added to model that. Did not do the same for GCSSS as it is a less
common operation.

The used mnemonics do not depend on updated assembler since these
instructions can be used without new -march setting behind a runtime
check.

Reading the GCSPR is modelled as unspec_volatile so it does not get
reordered wrt the other instructions changing the GCSPR.

gcc/ChangeLog:

* config/aarch64/aarch64.md (aarch64_load_gcspr): New.
(aarch64_gcspopm): New.
(aarch64_gcspopm_xzr): New.
(aarch64_gcsss): New.

Diff:
---
 gcc/config/aarch64/aarch64.md | 35 +++
 1 file changed, 35 insertions(+)

diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 43bed0ce10fd..e4e11e35b5bd 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -382,6 +382,9 @@
 UNSPECV_BTI_J  ; Represent BTI j.
 UNSPECV_BTI_JC ; Represent BTI jc.
 UNSPECV_CHKFEAT; Represent CHKFEAT X16.
+UNSPECV_GCSPR  ; Represent MRS Xn, GCSPR_EL0
+UNSPECV_GCSPOPM; Represent GCSPOPM.
+UNSPECV_GCSSS  ; Represent GCSSS1 and GCSSS2.
 UNSPECV_TSTART ; Represent transaction start.
 UNSPECV_TCOMMIT; Represent transaction commit.
 UNSPECV_TCANCEL; Represent transaction cancel.
@@ -8321,6 +8324,38 @@
   "hint\\t40 // chkfeat x16"
 )
 
+;; Guarded Control Stack (GCS) instructions
+(define_insn "aarch64_load_gcspr"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+   (unspec_volatile:DI [(const_int 0)] UNSPECV_GCSPR))]
+  ""
+  "mrs\\t%0, s3_3_c2_c5_1 // gcspr_el0"
+  [(set_attr "type" "mrs")]
+)
+
+(define_insn "aarch64_gcspopm"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+   (unspec_volatile:DI [(const_int 0)] UNSPECV_GCSPOPM))]
+  ""
+  "mov\\t%0, 0\;sysl\\t%0, #3, c7, c7, #1 // gcspopm"
+  [(set_attr "length" "8")]
+)
+
+(define_insn "aarch64_gcspopm_xzr"
+  [(unspec_volatile [(const_int 0)] UNSPECV_GCSPOPM)]
+  ""
+  "sysl\\txzr, #3, c7, c7, #1 // gcspopm"
+)
+
+(define_insn "aarch64_gcsss"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+   (unspec_volatile:DI [(match_operand:DI 1 "register_operand" "r")]
+ UNSPECV_GCSSS))]
+  ""
+  "sys\\t#3, c7, c7, #2, %1 // gcsss1\;mov\\t%0, 0\;sysl\\t%0, #3, c7, c7, #3 
// gcsss2"
+  [(set_attr "length" "12")]
+)
+
 ;; AdvSIMD Stuff
 (include "aarch64-simd.md")


  1   2   >