[gcc r14-10909] AArch64: backport Neoverse and Cortex CPU definitions

2024-11-08 Thread Tamar Christina via Gcc-cvs
https://gcc.gnu.org/g:05d54bcdc5395a9d3df36c8b640579a0558c89f0

commit r14-10909-g05d54bcdc5395a9d3df36c8b640579a0558c89f0
Author: Tamar Christina 
Date:   Fri Nov 8 18:12:32 2024 +

AArch64: backport Neoverse and Cortex CPU definitions

This is a conservative backport of a few core definitions backporting only 
the
core definitions and mapping them to their closest cost model that exist on 
the
branches.

gcc/ChangeLog:

* config/aarch64/aarch64-cores.def (cortex-a725, cortex-x925,
neoverse-n3, neoverse-v3, neoverse-v3ae): New.
* config/aarch64/aarch64-tune.md: Regenerate
* doc/invoke.texi: Document them.

Diff:
---
 gcc/config/aarch64/aarch64-cores.def |  6 ++
 gcc/config/aarch64/aarch64-tune.md   |  2 +-
 gcc/doc/invoke.texi  | 10 ++
 3 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-cores.def 
b/gcc/config/aarch64/aarch64-cores.def
index 1ab09ea5f720..a919ab7d8a5a 100644
--- a/gcc/config/aarch64/aarch64-cores.def
+++ b/gcc/config/aarch64/aarch64-cores.def
@@ -179,6 +179,7 @@ AARCH64_CORE("cortex-a710",  cortexa710, cortexa57, V9A,  
(SVE2_BITPERM, MEMTAG,
 AARCH64_CORE("cortex-a715",  cortexa715, cortexa57, V9A,  (SVE2_BITPERM, 
MEMTAG, I8MM, BF16), neoversen2, 0x41, 0xd4d, -1)
 
 AARCH64_CORE("cortex-a720",  cortexa720, cortexa57, V9_2A,  (SVE2_BITPERM, 
MEMTAG, PROFILE), neoversen2, 0x41, 0xd81, -1)
+AARCH64_CORE("cortex-a725",  cortexa725, cortexa57, V9_2A, (SVE2_BITPERM, 
MEMTAG, PROFILE), neoversen2, 0x41, 0xd87, -1)
 
 AARCH64_CORE("cortex-x2",  cortexx2, cortexa57, V9A,  (SVE2_BITPERM, MEMTAG, 
I8MM, BF16), neoversen2, 0x41, 0xd48, -1)
 
@@ -186,11 +187,16 @@ AARCH64_CORE("cortex-x3",  cortexx3, cortexa57, V9A,  
(SVE2_BITPERM, MEMTAG, I8M
 
 AARCH64_CORE("cortex-x4",  cortexx4, cortexa57, V9_2A,  (SVE2_BITPERM, MEMTAG, 
PROFILE), neoversen2, 0x41, 0xd81, -1)
 
+AARCH64_CORE("cortex-x925", cortexx925, cortexa57, V9_2A,  (SVE2_BITPERM, 
MEMTAG, PROFILE), neoversen2, 0x41, 0xd85, -1)
+
 AARCH64_CORE("neoverse-n2", neoversen2, cortexa57, V9A, (I8MM, BF16, 
SVE2_BITPERM, RNG, MEMTAG, PROFILE), neoversen2, 0x41, 0xd49, -1)
 AARCH64_CORE("cobalt-100",   cobalt100, cortexa57, V9A, (I8MM, BF16, 
SVE2_BITPERM, RNG, MEMTAG, PROFILE), neoversen2, 0x6d, 0xd49, -1)
+AARCH64_CORE("neoverse-n3", neoversen3, cortexa57, V9_2A, (SVE2_BITPERM, RNG, 
MEMTAG, PROFILE), neoversen2, 0x41, 0xd8e, -1)
 
 AARCH64_CORE("neoverse-v2", neoversev2, cortexa57, V9A, (I8MM, BF16, 
SVE2_BITPERM, RNG, MEMTAG, PROFILE), neoversev2, 0x41, 0xd4f, -1)
 AARCH64_CORE("grace", grace, cortexa57, V9A, (I8MM, BF16, SVE2_BITPERM, 
SVE2_AES, SVE2_SHA3, SVE2_SM4, PROFILE), neoversev2, 0x41, 0xd4f, -1)
+AARCH64_CORE("neoverse-v3", neoversev3, cortexa57, V9_2A, (SVE2_BITPERM, RNG, 
LS64, MEMTAG, PROFILE), neoversev2, 0x41, 0xd84, -1)
+AARCH64_CORE("neoverse-v3ae", neoversev3ae, cortexa57, V9_2A, (SVE2_BITPERM, 
RNG, LS64, MEMTAG, PROFILE), neoversev2, 0x41, 0xd83, -1)
 
 AARCH64_CORE("demeter", demeter, cortexa57, V9A, (I8MM, BF16, SVE2_BITPERM, 
RNG, MEMTAG, PROFILE), neoversev2, 0x41, 0xd4f, -1)
 
diff --git a/gcc/config/aarch64/aarch64-tune.md 
b/gcc/config/aarch64/aarch64-tune.md
index 06e8680607bd..35b27ddb8831 100644
--- a/gcc/config/aarch64/aarch64-tune.md
+++ b/gcc/config/aarch64/aarch64-tune.md
@@ -1,5 +1,5 @@
 ;; -*- buffer-read-only: t -*-
 ;; Generated automatically by gentune.sh from aarch64-cores.def
 (define_attr "tune"
-   
"cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,ampere1,ampere1a,ampere1b,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa78c,cortexa65,cortexa65ae,cortexx1,cortexx1c,neoversen1,ares,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,fujitsu_monaka,tsv110,thunderx3t110,neoversev1,zeus,neoverse512tvb,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82,cortexa510,cortexa520,cortexa710,cortexa715,cortexa720,cortexx2,cortexx3,cortexx4,neoversen2,cobalt100,neoversev2,grace,demeter,generic,generic_armv8_a,generic_armv9_a"
+   
"cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,ampere1,ampere1a,ampere1b,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa78c,cortexa65,cortexa65ae,cortexx1,cortexx1c,neoversen1,ares,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,fujitsu_monaka,tsv110,thunderx3t110,neoversev1,zeus,neoverse512tvb,saphira,cortexa57cort

[gcc(refs/users/meissner/heads/work182-test)] Add power9 and power10 float to logical optimizations.

2024-11-08 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:b6cef51eaf142eb14e32544d982db0d186f348d9

commit b6cef51eaf142eb14e32544d982db0d186f348d9
Author: Michael Meissner 
Date:   Fri Nov 8 13:17:45 2024 -0500

Add power9 and power10 float to logical optimizations.

2024-11-08  Michael Meissner  

gcc/

PR target/117487
* config/rs6000/rs6000.cc (sf_logical_op_p): Delete.
* config/rs6000/rs6000.h (sf_logical_op_p): Likewise.
* config/rs6000/vsx.md (SFmode logical peephoole): Update comments 
in
the original code that supports power8.  Add a new define_peephole2 
to
do the optimization on power9/power10.

Diff:
---
 gcc/config/rs6000/rs6000.cc |  62 -
 gcc/config/rs6000/rs6000.h  |   1 -
 gcc/config/rs6000/vsx.md| 161 ++--
 3 files changed, 155 insertions(+), 69 deletions(-)

diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index e1ec9591a0eb..aa67e7256bb9 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -29564,68 +29564,6 @@ rs6000_opaque_type_invalid_use_p (gimple *stmt)
   return false;
 }
 
-bool
-sf_logical_op_p (rtx operands[])
-{
-  if (!TARGET_POWERPC64 || !TARGET_DIRECT_MOVE)
-{
-  fprintf (stderr, "!TARGET_POWERPC64 || !TARGET_DIRECT_MOVE\n");
-  return false;
-}
-
-   /* The REG_P (xxx) tests prevents SUBREG's, which allows us to use REGNO
-  to compare registers, when the mode is different.  */
-  if (!REG_P (operands[SFBOOL_MFVSR_D]) && REG_P (operands[SFBOOL_BOOL_D]))
-{
-  fprintf (stderr, "REG_P (operands[SFBOOL_MFVSR_D]) && REG_P 
(operands[SFBOOL_BOOL_D]))\n");
-  return false;
-}
-
-  if (!REG_P (operands[SFBOOL_BOOL_A1]) && REG_P (operands[SFBOOL_SHL_D]))
-{
-  fprintf (stderr, "!REG_P (operands[SFBOOL_BOOL_A1]) && REG_P 
(operands[SFBOOL_SHL_D])\n");
-  return false;
-}
-
-  if (!REG_P (operands[SFBOOL_SHL_A])   && REG_P (operands[SFBOOL_MTVSR_D]))
-{
-  fprintf (stderr, "!REG_P (operands[SFBOOL_SHL_A])   && REG_P 
(operands[SFBOOL_MTVSR_D])\n");
-  return false;
-}
-
-  if (!REG_P (operands[SFBOOL_BOOL_A2])
-   && !CONST_INT_P (operands[SFBOOL_BOOL_A2]))
-{
-  fprintf (stderr, "!REG_P (operands[SFBOOL_BOOL_A2]) && !CONST_INT_P 
(operands[SFBOOL_BOOL_A2])\n");
-  return false;
-}
-
-  if (!REGNO (operands[SFBOOL_BOOL_D]) == REGNO (operands[SFBOOL_MFVSR_D])
-   && !peep2_reg_dead_p (2, operands[SFBOOL_MFVSR_D]))
-{
-  fprintf (stderr, "!REGNO (operands[SFBOOL_BOOL_D]) == REGNO 
(operands[SFBOOL_MFVSR_D]) && !peep2_reg_dead_p (2, 
operands[SFBOOL_MFVSR_D])\n");
-  return false;
-}
-
-  if (((REGNO (operands[SFBOOL_MFVSR_D]) == REGNO (operands[SFBOOL_BOOL_A1])
-   || (REG_P (operands[SFBOOL_BOOL_A2])
-   && REGNO (operands[SFBOOL_MFVSR_D]) == REGNO 
(operands[SFBOOL_BOOL_A2])))
-   && REGNO (operands[SFBOOL_BOOL_D]) == REGNO (operands[SFBOOL_SHL_A])
-   && (REGNO (operands[SFBOOL_SHL_D]) == REGNO (operands[SFBOOL_BOOL_D])
-  || peep2_reg_dead_p (3, operands[SFBOOL_BOOL_D]))
-   && peep2_reg_dead_p (4, operands[SFBOOL_SHL_D])))
-{
-  fprintf (stderr, "last test passed\n");
-  return true;
-}
-  else
-{
-  fprintf (stderr, "last test failed\n");
-  return false;
-}
-}
-
-
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 #include "gt-rs6000.h"
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index 499e80fda08d..197005af5195 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -2526,4 +2526,3 @@ enum {
 #undef ARCH_EXPAND
 #endif /* GCC_HWINT_H.  */
 
-extern bool sf_logical_op_p (rtx operands[]);
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index bcf8e2a60462..bfa1516768bc 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -6262,7 +6262,7 @@
(SFBOOL_MFVSR_A  3) ;; move to gpr src
(SFBOOL_BOOL_D   4) ;; and/ior/xor dest
(SFBOOL_BOOL_A1  5) ;; and/ior/xor arg1
-   (SFBOOL_BOOL_A2  6) ;; and/ior/xor arg1
+   (SFBOOL_BOOL_A2  6) ;; and/ior/xor arg2
(SFBOOL_SHL_D7) ;; shift left dest
(SFBOOL_SHL_A8) ;; shift left arg
(SFBOOL_MTVSR_D  9) ;; move to vecter dest
@@ -6302,18 +6302,18 @@
 ;; GPR, and instead move the integer mask value to the vector register after a
 ;; shift and do the VSX logical operation.
 
-;; The insns for dealing with SFmode in GPR registers looks like:
+;; The insns for dealing with SFmode in GPR registers looks like on power8:
 ;; (set (reg:V4SF reg2) (unspec:V4SF [(reg:SF reg1)] UNSPEC_VSX_CVDPSPN))
 ;;
-;; (set (reg:DI reg3) (unspec:DI [(reg:V4SF reg2)] UNSPEC_P8V_RELOAD_FROM_VSX))
+;; (set (reg:DI reg3) (zero_extend:DI (reg:SI

[gcc(refs/users/meissner/heads/work182-test)] Revert changes

2024-11-08 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:d47e62860e33f96f0bc22aa0c1d60609b96fa4f9

commit d47e62860e33f96f0bc22aa0c1d60609b96fa4f9
Author: Michael Meissner 
Date:   Fri Nov 8 13:21:21 2024 -0500

Revert changes

Diff:
---
 gcc/ChangeLog.test |  28 +
 gcc/config/rs6000/rs6000.h |   1 -
 gcc/config/rs6000/vsx.md   | 142 ++---
 3 files changed, 7 insertions(+), 164 deletions(-)

diff --git a/gcc/ChangeLog.test b/gcc/ChangeLog.test
index 3862b4c9794e..c20f5b300851 100644
--- a/gcc/ChangeLog.test
+++ b/gcc/ChangeLog.test
@@ -1,29 +1,5 @@
- Branch work182-test, patch #501 
-
-Add power9 and power10 float to logical optimizations.
-
-2024-11-08  Michael Meissner  
-
-gcc/
-
-   PR target/117487
-   * config/rs6000/rs6000.cc (sf_logical_op_p): Delete.
-   * config/rs6000/vsx.md (SFmode logical peephoole): Update comments in
-   the original code that supports power8.  Add a new define_peephole2 to
-   do the optimization on power9/power10.
-
- Branch work182-test, patch #500 
-
-Add debugging for PR 71977-1.c regression.
-
-2024-11-06  Michael Meissner  
-
-gcc/
-
-   * config/rs6000/rs6000.cc (sf_logical_op_p): New function.
-   * config/rs6000/rs6000.h (sf_logical_op_p): Add declaration.
-   * config/rs6000/vsx.md (define_peephole2 for SF + logical): Move test to
-   sf_logical_op_p.
+ Branch work182-test, patch #501 was reverted 

+ Branch work182-test, patch #500 was reverted 

 
  Branch work182-test, baseline 
 
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index 197005af5195..8cfd9faf77dc 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -2525,4 +2525,3 @@ enum {
 
 #undef ARCH_EXPAND
 #endif /* GCC_HWINT_H.  */
-
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index bfa1516768bc..b2fc39acf4e8 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -6262,7 +6262,7 @@
(SFBOOL_MFVSR_A  3) ;; move to gpr src
(SFBOOL_BOOL_D   4) ;; and/ior/xor dest
(SFBOOL_BOOL_A1  5) ;; and/ior/xor arg1
-   (SFBOOL_BOOL_A2  6) ;; and/ior/xor arg2
+   (SFBOOL_BOOL_A2  6) ;; and/ior/xor arg1
(SFBOOL_SHL_D7) ;; shift left dest
(SFBOOL_SHL_A8) ;; shift left arg
(SFBOOL_MTVSR_D  9) ;; move to vecter dest
@@ -6302,18 +6302,18 @@
 ;; GPR, and instead move the integer mask value to the vector register after a
 ;; shift and do the VSX logical operation.
 
-;; The insns for dealing with SFmode in GPR registers looks like on power8:
+;; The insns for dealing with SFmode in GPR registers looks like:
 ;; (set (reg:V4SF reg2) (unspec:V4SF [(reg:SF reg1)] UNSPEC_VSX_CVDPSPN))
 ;;
-;; (set (reg:DI reg3) (zero_extend:DI (reg:SI reg2)))
+;; (set (reg:DI reg3) (unspec:DI [(reg:V4SF reg2)] UNSPEC_P8V_RELOAD_FROM_VSX))
 ;;
-;; (set (reg:DI reg4) (and:SI (reg:SI reg3) (reg:SI mask)))
+;; (set (reg:DI reg4) (and:DI (reg:DI reg3) (reg:DI reg3)))
 ;;
 ;; (set (reg:DI reg5) (ashift:DI (reg:DI reg4) (const_int 32)))
 ;;
 ;; (set (reg:SF reg6) (unspec:SF [(reg:DI reg5)] UNSPEC_P8V_MTVSRD))
 ;;
-;; (set (reg:SF reg7) (unspec:SF [(reg:SF reg6)] UNSPEC_VSX_CVSPDPN))
+;; (set (reg:SF reg6) (unspec:SF [(reg:SF reg6)] UNSPEC_VSX_CVSPDPN))
 
 (define_peephole2
   [(match_scratch:DI SFBOOL_TMP_GPR "r")
@@ -6394,138 +6394,6 @@
   operands[SFBOOL_MTVSR_D_V4SF] = gen_rtx_REG (V4SFmode, regno_mtvsr_d);
 })
 
-;; Constants for SFbool optimization on power9/power10
-(define_constants
-  [(SFBOOL2_TMP_VSX_V4SI0) ;; vector temporary (V4SI)
-   (SFBOOL2_TMP_GPR_SI  1) ;; GPR temporary (SI)
-   (SFBOOL2_MFVSR_D 2) ;; move to gpr dest (DI)
-   (SFBOOL2_MFVSR_A 3) ;; move to gpr src (SI)
-   (SFBOOL2_BOOL_D  4) ;; and/ior/xor dest (SI)
-   (SFBOOL2_BOOL_A1 5) ;; and/ior/xor arg1 (SI)
-   (SFBOOL2_BOOL_A2 6) ;; and/ior/xor arg2 (SI)
-   (SFBOOL2_SPLAT_D 7) ;; splat dest (V4SI)
-   (SFBOOL2_MTVSR_D 8) ;; move/splat to VSX dest.
-   (SFBOOL2_MTVSR_A 9) ;; move/splat to VSX arg.
-   (SFBOOL2_MFVSR_A_V4SI   10) ;; MFVSR_A as V4SI
-   (SFBOOL2_MTVSR_D_V4SI   11) ;; MTVSR_D as V4SI
-   (SFBOOL2_XXSPLTW12)])   ;; 1 or 3 for XXSPLTW
-
-;; On power9/power10, the code is different because we have a splat 32-bit
-;; operation that does a direct move to the FPR/vector registers (MTVSRWS).
-;;
-;; The insns for dealing with SFmode in GPR

[gcc(refs/users/meissner/heads/work182-test)] Add power9 and power10 float to logical optimizations.

2024-11-08 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:bb578a879fdc41a34f471dde426c973a3c3d30bc

commit bb578a879fdc41a34f471dde426c973a3c3d30bc
Author: Michael Meissner 
Date:   Fri Nov 8 13:28:39 2024 -0500

Add power9 and power10 float to logical optimizations.

2024-11-08  Michael Meissner  

gcc/

PR target/117487
* config/rs6000/vsx.md (SFmode logical peephoole): Update comments 
in
the original code that supports power8.  Add a new define_peephole2 
to
do the optimization on power9/power10.

Diff:
---
 gcc/config/rs6000/rs6000.h |   1 +
 gcc/config/rs6000/vsx.md   | 142 +++--
 2 files changed, 138 insertions(+), 5 deletions(-)

diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index 8cfd9faf77dc..197005af5195 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -2525,3 +2525,4 @@ enum {
 
 #undef ARCH_EXPAND
 #endif /* GCC_HWINT_H.  */
+
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index b2fc39acf4e8..bfa1516768bc 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -6262,7 +6262,7 @@
(SFBOOL_MFVSR_A  3) ;; move to gpr src
(SFBOOL_BOOL_D   4) ;; and/ior/xor dest
(SFBOOL_BOOL_A1  5) ;; and/ior/xor arg1
-   (SFBOOL_BOOL_A2  6) ;; and/ior/xor arg1
+   (SFBOOL_BOOL_A2  6) ;; and/ior/xor arg2
(SFBOOL_SHL_D7) ;; shift left dest
(SFBOOL_SHL_A8) ;; shift left arg
(SFBOOL_MTVSR_D  9) ;; move to vecter dest
@@ -6302,18 +6302,18 @@
 ;; GPR, and instead move the integer mask value to the vector register after a
 ;; shift and do the VSX logical operation.
 
-;; The insns for dealing with SFmode in GPR registers looks like:
+;; The insns for dealing with SFmode in GPR registers looks like on power8:
 ;; (set (reg:V4SF reg2) (unspec:V4SF [(reg:SF reg1)] UNSPEC_VSX_CVDPSPN))
 ;;
-;; (set (reg:DI reg3) (unspec:DI [(reg:V4SF reg2)] UNSPEC_P8V_RELOAD_FROM_VSX))
+;; (set (reg:DI reg3) (zero_extend:DI (reg:SI reg2)))
 ;;
-;; (set (reg:DI reg4) (and:DI (reg:DI reg3) (reg:DI reg3)))
+;; (set (reg:DI reg4) (and:SI (reg:SI reg3) (reg:SI mask)))
 ;;
 ;; (set (reg:DI reg5) (ashift:DI (reg:DI reg4) (const_int 32)))
 ;;
 ;; (set (reg:SF reg6) (unspec:SF [(reg:DI reg5)] UNSPEC_P8V_MTVSRD))
 ;;
-;; (set (reg:SF reg6) (unspec:SF [(reg:SF reg6)] UNSPEC_VSX_CVSPDPN))
+;; (set (reg:SF reg7) (unspec:SF [(reg:SF reg6)] UNSPEC_VSX_CVSPDPN))
 
 (define_peephole2
   [(match_scratch:DI SFBOOL_TMP_GPR "r")
@@ -6394,6 +6394,138 @@
   operands[SFBOOL_MTVSR_D_V4SF] = gen_rtx_REG (V4SFmode, regno_mtvsr_d);
 })
 
+;; Constants for SFbool optimization on power9/power10
+(define_constants
+  [(SFBOOL2_TMP_VSX_V4SI0) ;; vector temporary (V4SI)
+   (SFBOOL2_TMP_GPR_SI  1) ;; GPR temporary (SI)
+   (SFBOOL2_MFVSR_D 2) ;; move to gpr dest (DI)
+   (SFBOOL2_MFVSR_A 3) ;; move to gpr src (SI)
+   (SFBOOL2_BOOL_D  4) ;; and/ior/xor dest (SI)
+   (SFBOOL2_BOOL_A1 5) ;; and/ior/xor arg1 (SI)
+   (SFBOOL2_BOOL_A2 6) ;; and/ior/xor arg2 (SI)
+   (SFBOOL2_SPLAT_D 7) ;; splat dest (V4SI)
+   (SFBOOL2_MTVSR_D 8) ;; move/splat to VSX dest.
+   (SFBOOL2_MTVSR_A 9) ;; move/splat to VSX arg.
+   (SFBOOL2_MFVSR_A_V4SI   10) ;; MFVSR_A as V4SI
+   (SFBOOL2_MTVSR_D_V4SI   11) ;; MTVSR_D as V4SI
+   (SFBOOL2_XXSPLTW12)])   ;; 1 or 3 for XXSPLTW
+
+;; On power9/power10, the code is different because we have a splat 32-bit
+;; operation that does a direct move to the FPR/vector registers (MTVSRWS).
+;;
+;; The insns for dealing with SFmode in GPR registers looks like on
+;; power9/power10:
+;;
+;; (set (reg:V4SF reg2) (unspec:V4SF [(reg:SF reg1)] UNSPEC_VSX_CVDPSPN))
+;;
+;; (set (reg:DI reg3) (zero_extend:DI (reg:SI reg2)))
+;;
+;; (set (reg:SI reg4) (and:SI (reg:SI reg3) (reg:SI mask)))
+;;
+;; (set (reg:V4SI reg5) (vec_duplicate:V4SI (reg:SI reg4)))
+;;
+;; (set (reg:SF reg6) (unspec:SF [(reg:SF reg5)] UNSPEC_VSX_CVSPDPN))
+
+;; The VSX temporary needs to be an Altivec register in case we are trying to
+;; do and/ior/xor of -16..15 and we want to use VSPLTISW to load the constant.
+;;
+;; The GPR temporary is only used if we are trying to do a logical operation
+;; with a constant outside of the -16..15 range on a power9.  Otherwise, we can
+;; load the constant directly into the VSX temporary register.
+
+(define_peephole2
+  [(match_scratch:V4SI SFBOOL2_TMP_VSX_V4SI "v")
+   (match_scratch:SI SFBOOL2_TMP_GPR_SI "r")
+
+   ;; Zero_extend and direct move
+   (set (match_operand:DI SFBOOL2_MFVSR_D "int_reg_operand

[gcc(refs/users/meissner/heads/work182-test)] Add power9 and power10 float to logical optimizations.

2024-11-08 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:db5b49248a7afd5f1bc3c4c9928a7e97d6a22aca

commit db5b49248a7afd5f1bc3c4c9928a7e97d6a22aca
Author: Michael Meissner 
Date:   Fri Nov 8 13:32:34 2024 -0500

Add power9 and power10 float to logical optimizations.

2024-11-08  Michael Meissner  

gcc/

PR target/117487
* config/rs6000/vsx.md (SFmode logical peephoole): Update comments 
in
the original code that supports power8.  Add a new define_peephole2 
to
do the optimization on power9/power10.

Diff:
---
 gcc/config/rs6000/vsx.md | 142 +--
 1 file changed, 137 insertions(+), 5 deletions(-)

diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index b2fc39acf4e8..bfa1516768bc 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -6262,7 +6262,7 @@
(SFBOOL_MFVSR_A  3) ;; move to gpr src
(SFBOOL_BOOL_D   4) ;; and/ior/xor dest
(SFBOOL_BOOL_A1  5) ;; and/ior/xor arg1
-   (SFBOOL_BOOL_A2  6) ;; and/ior/xor arg1
+   (SFBOOL_BOOL_A2  6) ;; and/ior/xor arg2
(SFBOOL_SHL_D7) ;; shift left dest
(SFBOOL_SHL_A8) ;; shift left arg
(SFBOOL_MTVSR_D  9) ;; move to vecter dest
@@ -6302,18 +6302,18 @@
 ;; GPR, and instead move the integer mask value to the vector register after a
 ;; shift and do the VSX logical operation.
 
-;; The insns for dealing with SFmode in GPR registers looks like:
+;; The insns for dealing with SFmode in GPR registers looks like on power8:
 ;; (set (reg:V4SF reg2) (unspec:V4SF [(reg:SF reg1)] UNSPEC_VSX_CVDPSPN))
 ;;
-;; (set (reg:DI reg3) (unspec:DI [(reg:V4SF reg2)] UNSPEC_P8V_RELOAD_FROM_VSX))
+;; (set (reg:DI reg3) (zero_extend:DI (reg:SI reg2)))
 ;;
-;; (set (reg:DI reg4) (and:DI (reg:DI reg3) (reg:DI reg3)))
+;; (set (reg:DI reg4) (and:SI (reg:SI reg3) (reg:SI mask)))
 ;;
 ;; (set (reg:DI reg5) (ashift:DI (reg:DI reg4) (const_int 32)))
 ;;
 ;; (set (reg:SF reg6) (unspec:SF [(reg:DI reg5)] UNSPEC_P8V_MTVSRD))
 ;;
-;; (set (reg:SF reg6) (unspec:SF [(reg:SF reg6)] UNSPEC_VSX_CVSPDPN))
+;; (set (reg:SF reg7) (unspec:SF [(reg:SF reg6)] UNSPEC_VSX_CVSPDPN))
 
 (define_peephole2
   [(match_scratch:DI SFBOOL_TMP_GPR "r")
@@ -6394,6 +6394,138 @@
   operands[SFBOOL_MTVSR_D_V4SF] = gen_rtx_REG (V4SFmode, regno_mtvsr_d);
 })
 
+;; Constants for SFbool optimization on power9/power10
+(define_constants
+  [(SFBOOL2_TMP_VSX_V4SI0) ;; vector temporary (V4SI)
+   (SFBOOL2_TMP_GPR_SI  1) ;; GPR temporary (SI)
+   (SFBOOL2_MFVSR_D 2) ;; move to gpr dest (DI)
+   (SFBOOL2_MFVSR_A 3) ;; move to gpr src (SI)
+   (SFBOOL2_BOOL_D  4) ;; and/ior/xor dest (SI)
+   (SFBOOL2_BOOL_A1 5) ;; and/ior/xor arg1 (SI)
+   (SFBOOL2_BOOL_A2 6) ;; and/ior/xor arg2 (SI)
+   (SFBOOL2_SPLAT_D 7) ;; splat dest (V4SI)
+   (SFBOOL2_MTVSR_D 8) ;; move/splat to VSX dest.
+   (SFBOOL2_MTVSR_A 9) ;; move/splat to VSX arg.
+   (SFBOOL2_MFVSR_A_V4SI   10) ;; MFVSR_A as V4SI
+   (SFBOOL2_MTVSR_D_V4SI   11) ;; MTVSR_D as V4SI
+   (SFBOOL2_XXSPLTW12)])   ;; 1 or 3 for XXSPLTW
+
+;; On power9/power10, the code is different because we have a splat 32-bit
+;; operation that does a direct move to the FPR/vector registers (MTVSRWS).
+;;
+;; The insns for dealing with SFmode in GPR registers looks like on
+;; power9/power10:
+;;
+;; (set (reg:V4SF reg2) (unspec:V4SF [(reg:SF reg1)] UNSPEC_VSX_CVDPSPN))
+;;
+;; (set (reg:DI reg3) (zero_extend:DI (reg:SI reg2)))
+;;
+;; (set (reg:SI reg4) (and:SI (reg:SI reg3) (reg:SI mask)))
+;;
+;; (set (reg:V4SI reg5) (vec_duplicate:V4SI (reg:SI reg4)))
+;;
+;; (set (reg:SF reg6) (unspec:SF [(reg:SF reg5)] UNSPEC_VSX_CVSPDPN))
+
+;; The VSX temporary needs to be an Altivec register in case we are trying to
+;; do and/ior/xor of -16..15 and we want to use VSPLTISW to load the constant.
+;;
+;; The GPR temporary is only used if we are trying to do a logical operation
+;; with a constant outside of the -16..15 range on a power9.  Otherwise, we can
+;; load the constant directly into the VSX temporary register.
+
+(define_peephole2
+  [(match_scratch:V4SI SFBOOL2_TMP_VSX_V4SI "v")
+   (match_scratch:SI SFBOOL2_TMP_GPR_SI "r")
+
+   ;; Zero_extend and direct move
+   (set (match_operand:DI SFBOOL2_MFVSR_D "int_reg_operand")
+   (zero_extend:DI
+(match_operand:SI SFBOOL2_MFVSR_A "vsx_register_operand")))
+
+   ;; AND/IOR/XOR operation on int
+   (set (match_operand:SI SFBOOL2_BOOL_D "int_reg_operand")
+   (and_ior_xor:SI
+(match_operand:SI SFBOOL2_BOOL_A1 "int_reg_operand")
+(ma

[gcc(refs/users/meissner/heads/work182-test)] Revert changes

2024-11-08 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:68ebb5004c2f7248658d737e2105401e8c910a6c

commit 68ebb5004c2f7248658d737e2105401e8c910a6c
Author: Michael Meissner 
Date:   Fri Nov 8 13:31:58 2024 -0500

Revert changes

Diff:
---
 gcc/config/rs6000/rs6000.h |   1 -
 gcc/config/rs6000/vsx.md   | 142 ++---
 2 files changed, 5 insertions(+), 138 deletions(-)

diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index 197005af5195..8cfd9faf77dc 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -2525,4 +2525,3 @@ enum {
 
 #undef ARCH_EXPAND
 #endif /* GCC_HWINT_H.  */
-
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index bfa1516768bc..b2fc39acf4e8 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -6262,7 +6262,7 @@
(SFBOOL_MFVSR_A  3) ;; move to gpr src
(SFBOOL_BOOL_D   4) ;; and/ior/xor dest
(SFBOOL_BOOL_A1  5) ;; and/ior/xor arg1
-   (SFBOOL_BOOL_A2  6) ;; and/ior/xor arg2
+   (SFBOOL_BOOL_A2  6) ;; and/ior/xor arg1
(SFBOOL_SHL_D7) ;; shift left dest
(SFBOOL_SHL_A8) ;; shift left arg
(SFBOOL_MTVSR_D  9) ;; move to vecter dest
@@ -6302,18 +6302,18 @@
 ;; GPR, and instead move the integer mask value to the vector register after a
 ;; shift and do the VSX logical operation.
 
-;; The insns for dealing with SFmode in GPR registers looks like on power8:
+;; The insns for dealing with SFmode in GPR registers looks like:
 ;; (set (reg:V4SF reg2) (unspec:V4SF [(reg:SF reg1)] UNSPEC_VSX_CVDPSPN))
 ;;
-;; (set (reg:DI reg3) (zero_extend:DI (reg:SI reg2)))
+;; (set (reg:DI reg3) (unspec:DI [(reg:V4SF reg2)] UNSPEC_P8V_RELOAD_FROM_VSX))
 ;;
-;; (set (reg:DI reg4) (and:SI (reg:SI reg3) (reg:SI mask)))
+;; (set (reg:DI reg4) (and:DI (reg:DI reg3) (reg:DI reg3)))
 ;;
 ;; (set (reg:DI reg5) (ashift:DI (reg:DI reg4) (const_int 32)))
 ;;
 ;; (set (reg:SF reg6) (unspec:SF [(reg:DI reg5)] UNSPEC_P8V_MTVSRD))
 ;;
-;; (set (reg:SF reg7) (unspec:SF [(reg:SF reg6)] UNSPEC_VSX_CVSPDPN))
+;; (set (reg:SF reg6) (unspec:SF [(reg:SF reg6)] UNSPEC_VSX_CVSPDPN))
 
 (define_peephole2
   [(match_scratch:DI SFBOOL_TMP_GPR "r")
@@ -6394,138 +6394,6 @@
   operands[SFBOOL_MTVSR_D_V4SF] = gen_rtx_REG (V4SFmode, regno_mtvsr_d);
 })
 
-;; Constants for SFbool optimization on power9/power10
-(define_constants
-  [(SFBOOL2_TMP_VSX_V4SI0) ;; vector temporary (V4SI)
-   (SFBOOL2_TMP_GPR_SI  1) ;; GPR temporary (SI)
-   (SFBOOL2_MFVSR_D 2) ;; move to gpr dest (DI)
-   (SFBOOL2_MFVSR_A 3) ;; move to gpr src (SI)
-   (SFBOOL2_BOOL_D  4) ;; and/ior/xor dest (SI)
-   (SFBOOL2_BOOL_A1 5) ;; and/ior/xor arg1 (SI)
-   (SFBOOL2_BOOL_A2 6) ;; and/ior/xor arg2 (SI)
-   (SFBOOL2_SPLAT_D 7) ;; splat dest (V4SI)
-   (SFBOOL2_MTVSR_D 8) ;; move/splat to VSX dest.
-   (SFBOOL2_MTVSR_A 9) ;; move/splat to VSX arg.
-   (SFBOOL2_MFVSR_A_V4SI   10) ;; MFVSR_A as V4SI
-   (SFBOOL2_MTVSR_D_V4SI   11) ;; MTVSR_D as V4SI
-   (SFBOOL2_XXSPLTW12)])   ;; 1 or 3 for XXSPLTW
-
-;; On power9/power10, the code is different because we have a splat 32-bit
-;; operation that does a direct move to the FPR/vector registers (MTVSRWS).
-;;
-;; The insns for dealing with SFmode in GPR registers looks like on
-;; power9/power10:
-;;
-;; (set (reg:V4SF reg2) (unspec:V4SF [(reg:SF reg1)] UNSPEC_VSX_CVDPSPN))
-;;
-;; (set (reg:DI reg3) (zero_extend:DI (reg:SI reg2)))
-;;
-;; (set (reg:SI reg4) (and:SI (reg:SI reg3) (reg:SI mask)))
-;;
-;; (set (reg:V4SI reg5) (vec_duplicate:V4SI (reg:SI reg4)))
-;;
-;; (set (reg:SF reg6) (unspec:SF [(reg:SF reg5)] UNSPEC_VSX_CVSPDPN))
-
-;; The VSX temporary needs to be an Altivec register in case we are trying to
-;; do and/ior/xor of -16..15 and we want to use VSPLTISW to load the constant.
-;;
-;; The GPR temporary is only used if we are trying to do a logical operation
-;; with a constant outside of the -16..15 range on a power9.  Otherwise, we can
-;; load the constant directly into the VSX temporary register.
-
-(define_peephole2
-  [(match_scratch:V4SI SFBOOL2_TMP_VSX_V4SI "v")
-   (match_scratch:SI SFBOOL2_TMP_GPR_SI "r")
-
-   ;; Zero_extend and direct move
-   (set (match_operand:DI SFBOOL2_MFVSR_D "int_reg_operand")
-   (zero_extend:DI
-(match_operand:SI SFBOOL2_MFVSR_A "vsx_register_operand")))
-
-   ;; AND/IOR/XOR operation on int
-   (set (match_operand:SI SFBOOL2_BOOL_D "int_reg_operand")
-   (and_ior_xor:SI
-(match_operand:SI SFBOOL2_BOOL_A1 "int_reg_operand")
-(match_operand:SI SFBOOL2_BOOL_A2 "reg_or_cint_operand")

[gcc(refs/users/meissner/heads/work182-test)] Update ChangeLog.*

2024-11-08 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:43b639f4215a5f556a3967366bb6c76cd3aeb140

commit 43b639f4215a5f556a3967366bb6c76cd3aeb140
Author: Michael Meissner 
Date:   Fri Nov 8 13:34:01 2024 -0500

Update ChangeLog.*

Diff:
---
 gcc/ChangeLog.test | 13 +
 1 file changed, 13 insertions(+)

diff --git a/gcc/ChangeLog.test b/gcc/ChangeLog.test
index c20f5b300851..f3a58f306912 100644
--- a/gcc/ChangeLog.test
+++ b/gcc/ChangeLog.test
@@ -1,3 +1,16 @@
+ Branch work182-test, patch #510 
+
+Add power9 and power10 float to logical optimizations.
+
+2024-11-08  Michael Meissner  
+
+gcc/
+
+   PR target/117487
+   * config/rs6000/vsx.md (SFmode logical peephoole): Update comments in
+   the original code that supports power8.  Add a new define_peephole2 to
+   do the optimization on power9/power10.
+
  Branch work182-test, patch #501 was reverted 

  Branch work182-test, patch #500 was reverted 



[gcc r15-5053] c: Implement C2y N3356, if declarations [PR117019]

2024-11-08 Thread Marek Polacek via Gcc-cvs
https://gcc.gnu.org/g:440be01b07941506d1c8819448bd17c8717d55f5

commit r15-5053-g440be01b07941506d1c8819448bd17c8717d55f5
Author: Marek Polacek 
Date:   Thu Oct 31 09:28:15 2024 -0400

c: Implement C2y N3356, if declarations [PR117019]

This patch implements C2y N3356, if declarations as described at
.

This feature is cognate with C++17 Selection statements with initializer
,
but they are not the same yet.  For example, C++17 allows

  if (lock (); int i = getval ())

whereas C2y does not.

The proposal adds new grammar productions.  selection-header is handled
in c_parser_selection_header which is the gist of the patch.
simple-declaration is handled by c_parser_declaration_or_fndef, which
gets a new parameter.

PR c/117019

gcc/c/ChangeLog:

* c-parser.cc (c_parser_declaration_or_fndef): Adjust declaration.
(c_parser_external_declaration): Adjust a call to
c_parser_declaration_or_fndef.
(c_parser_declaration_or_fndef): New bool parameter.  Return a tree
instead of void.  Adjust for N3356.  Adjust a call to
c_parser_declaration_or_fndef.
(c_parser_compound_statement_nostart): Adjust calls to
c_parser_declaration_or_fndef.
(c_parser_selection_header): New.
(c_parser_paren_selection_header): New.
(c_parser_if_statement): Call c_parser_paren_selection_header
instead of c_parser_paren_condition.
(c_parser_switch_statement): Call c_parser_selection_header instead 
of
c_parser_expression.
(c_parser_for_statement): Adjust calls to 
c_parser_declaration_or_fndef.
(c_parser_objc_methodprotolist): Likewise.
(c_parser_oacc_routine): Likewise.
(c_parser_omp_loop_nest): Likewise.
(c_parser_omp_declare_simd): Likewise.

gcc/testsuite/ChangeLog:

* gcc.dg/c23-if-decls-1.c: New test.
* gcc.dg/c23-if-decls-2.c: New test.
* gcc.dg/c2y-if-decls-1.c: New test.
* gcc.dg/c2y-if-decls-2.c: New test.
* gcc.dg/c2y-if-decls-3.c: New test.
* gcc.dg/c2y-if-decls-4.c: New test.
* gcc.dg/c2y-if-decls-5.c: New test.
* gcc.dg/c2y-if-decls-6.c: New test.
* gcc.dg/c2y-if-decls-7.c: New test.
* gcc.dg/c2y-if-decls-8.c: New test.
* gcc.dg/c2y-if-decls-9.c: New test.
* gcc.dg/c2y-if-decls-10.c: New test.
* gcc.dg/c2y-if-decls-11.c: New test.
* gcc.dg/gnu2y-if-decls-1.c: New test.
* gcc.dg/gnu99-if-decls-1.c: New test.
* gcc.dg/gnu99-if-decls-2.c: New test.

Diff:
---
 gcc/c/c-parser.cc   | 253 +---
 gcc/testsuite/gcc.dg/c23-if-decls-1.c   |  15 ++
 gcc/testsuite/gcc.dg/c23-if-decls-2.c   |   6 +
 gcc/testsuite/gcc.dg/c2y-if-decls-1.c   | 168 +
 gcc/testsuite/gcc.dg/c2y-if-decls-10.c  |  38 +
 gcc/testsuite/gcc.dg/c2y-if-decls-11.c  | 199 +
 gcc/testsuite/gcc.dg/c2y-if-decls-2.c   |  35 +
 gcc/testsuite/gcc.dg/c2y-if-decls-3.c   |  39 +
 gcc/testsuite/gcc.dg/c2y-if-decls-4.c   | 199 +
 gcc/testsuite/gcc.dg/c2y-if-decls-5.c   |  35 +
 gcc/testsuite/gcc.dg/c2y-if-decls-6.c   |  27 
 gcc/testsuite/gcc.dg/c2y-if-decls-7.c   |  96 
 gcc/testsuite/gcc.dg/c2y-if-decls-8.c   | 168 +
 gcc/testsuite/gcc.dg/c2y-if-decls-9.c   |  35 +
 gcc/testsuite/gcc.dg/gnu2y-if-decls-1.c |  15 ++
 gcc/testsuite/gcc.dg/gnu99-if-decls-1.c |  15 ++
 gcc/testsuite/gcc.dg/gnu99-if-decls-2.c |  15 ++
 17 files changed, 1302 insertions(+), 56 deletions(-)

diff --git a/gcc/c/c-parser.cc b/gcc/c/c-parser.cc
index 179c772fb76f..3ab8a49bf353 100644
--- a/gcc/c/c-parser.cc
+++ b/gcc/c/c-parser.cc
@@ -1634,8 +1634,8 @@ static bool c_parser_nth_token_starts_std_attributes 
(c_parser *,
 static tree c_parser_std_attribute_specifier_sequence (c_parser *);
 static void c_parser_external_declaration (c_parser *);
 static void c_parser_asm_definition (c_parser *);
-static void c_parser_declaration_or_fndef (c_parser *, bool, bool, bool,
-  bool, bool, tree * = NULL,
+static tree c_parser_declaration_or_fndef (c_parser *, bool, bool, bool,
+  bool, bool, bool, tree * = NULL,
   vec * = NULL,
   bool have_attrs = false,
   tree attrs = NULL,
@@ -2060,7 +2060,8 @@ c_parser_external_declaration (c_parser *parser)
 an @interface or @protocol with prefix attributes).  We 

[gcc r15-5049] hppa: Fix handling of secondary reloads involving a SUBREG

2024-11-08 Thread John David Anglin via Gcc-cvs
https://gcc.gnu.org/g:1ea45291af0bc8f7b6dff67a0f23be662b2f9908

commit r15-5049-g1ea45291af0bc8f7b6dff67a0f23be662b2f9908
Author: John David Anglin 
Date:   Fri Nov 8 16:34:41 2024 -0500

hppa: Fix handling of secondary reloads involving a SUBREG

This is fairly subtle.

When handling spills for SUBREG arguments in pa_emit_move_sequence,
alter_subreg may be called.  It in turn calls adjust_address_1 and
change_address_1.  change_address_1 calls pa_legitimate_address_p
to validate the new spill address.  change_address_1 generates an
internal compiler error if the address is not valid.  We need to
allow 14-bit displacements for all modes when reload_in_progress
is true and strict is false to prevent the internal compiler error.

SUBREGs are only used with the general registers, so the spill
should result in an integer access.  14-bit displacements are okay
for integer loads and stores but not for floating-point loads and
stores.

Potentially, the change could break the handling of spills for the
floating point-registers but I believe these are handled separately
in pa_emit_move_sequence.

This change fixes the build of symmetrica-3.0.1+ds.

2024-11-08  John David Anglin  

gcc/ChangeLog:

PR target/117443
* config/pa/pa.cc (pa_legitimate_address_p): Allow any
14-bit displacement when reload is in progress and strict
is false.

Diff:
---
 gcc/config/pa/pa.cc | 1 +
 1 file changed, 1 insertion(+)

diff --git a/gcc/config/pa/pa.cc b/gcc/config/pa/pa.cc
index 94ee7dbfa8ee..941ef3a71287 100644
--- a/gcc/config/pa/pa.cc
+++ b/gcc/config/pa/pa.cc
@@ -11009,6 +11009,7 @@ pa_legitimate_address_p (machine_mode mode, rtx x, bool 
strict, code_helper)
  /* Long 14-bit displacements always okay for these cases.  */
  if (INT14_OK_STRICT
  || reload_completed
+ || (reload_in_progress && !strict)
  || mode == QImode
  || mode == HImode)
return true;


[gcc r14-10910] hppa: Fix handling of secondary reloads involving a SUBREG

2024-11-08 Thread John David Anglin via Gcc-cvs
https://gcc.gnu.org/g:4b30972e5171093c472ef344297994dd00bf5e97

commit r14-10910-g4b30972e5171093c472ef344297994dd00bf5e97
Author: John David Anglin 
Date:   Fri Nov 8 16:34:41 2024 -0500

hppa: Fix handling of secondary reloads involving a SUBREG

This is fairly subtle.

When handling spills for SUBREG arguments in pa_emit_move_sequence,
alter_subreg may be called.  It in turn calls adjust_address_1 and
change_address_1.  change_address_1 calls pa_legitimate_address_p
to validate the new spill address.  change_address_1 generates an
internal compiler error if the address is not valid.  We need to
allow 14-bit displacements for all modes when reload_in_progress
is true and strict is false to prevent the internal compiler error.

SUBREGs are only used with the general registers, so the spill
should result in an integer access.  14-bit displacements are okay
for integer loads and stores but not for floating-point loads and
stores.

Potentially, the change could break the handling of spills for the
floating point-registers but I believe these are handled separately
in pa_emit_move_sequence.

This change fixes the build of symmetrica-3.0.1+ds.

2024-11-08  John David Anglin  

gcc/ChangeLog:

PR target/117443
* config/pa/pa.cc (pa_legitimate_address_p): Allow any
14-bit displacement when reload is in progress and strict
is false.

Diff:
---
 gcc/config/pa/pa.cc | 1 +
 1 file changed, 1 insertion(+)

diff --git a/gcc/config/pa/pa.cc b/gcc/config/pa/pa.cc
index b24434628fa5..d72946f6c198 100644
--- a/gcc/config/pa/pa.cc
+++ b/gcc/config/pa/pa.cc
@@ -10983,6 +10983,7 @@ pa_legitimate_address_p (machine_mode mode, rtx x, bool 
strict, code_helper)
  /* Long 14-bit displacements always okay for these cases.  */
  if (INT14_OK_STRICT
  || reload_completed
+ || (reload_in_progress && !strict)
  || mode == QImode
  || mode == HImode)
return true;


[gcc r15-5050] hppa: Don't allow large modes in hard registers

2024-11-08 Thread John David Anglin via Gcc-cvs
https://gcc.gnu.org/g:3a1da8ffb71af1005c5a035d0eb5f956056adf32

commit r15-5050-g3a1da8ffb71af1005c5a035d0eb5f956056adf32
Author: John David Anglin 
Date:   Fri Nov 8 16:49:34 2024 -0500

hppa: Don't allow large modes in hard registers

LRA has problems handling spills for OI and TI modes.  There are
issues with SUBREG support as well.

This change fixes gcc.c-torture/compile/pr92618.c with LRA.

2024-11-08  John David Anglin  

gcc/ChangeLog:

PR target/117238
* config/pa/pa32-regs.h (PA_HARD_REGNO_MODE_OK): Don't allow
mode size 32.  Limit mode size 16 in general registers to
complex modes.

Diff:
---
 gcc/config/pa/pa32-regs.h | 15 ++-
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/gcc/config/pa/pa32-regs.h b/gcc/config/pa/pa32-regs.h
index 3467e03afed7..c9a27ef16587 100644
--- a/gcc/config/pa/pa32-regs.h
+++ b/gcc/config/pa/pa32-regs.h
@@ -187,10 +187,9 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  
If not, see
that includes the incoming arguments and the return value.  We specify a
set with no overlaps so that we don't have to specify that the destination
register is an early clobber in patterns using this mode.  Except for the
-   return value, the starting registers are odd.  For 128 and 256 bit modes,
-   we similarly specify non-overlapping sets of cpu registers.  However,
-   there aren't any patterns defined for modes larger than 64 bits at the
-   moment.
+   return value, the starting registers are odd.  Except for complex modes,
+   we don't allow modes larger than 64 bits in the general registers as there
+   are issues with copies, spills and SUBREG support.
 
We limit the modes allowed in the floating point registers to the
set of modes used in the machine definition.  In addition, we allow
@@ -217,15 +216,13 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  
If not, see
  ? (VALID_FP_MODE_P (MODE) \
&& (GET_MODE_SIZE (MODE) <= 4   \
|| (GET_MODE_SIZE (MODE) == 8 && ((REGNO) & 1) == 0)\
-   || (GET_MODE_SIZE (MODE) == 16 && ((REGNO) & 3) == 0)   \
-   || (GET_MODE_SIZE (MODE) == 32 && ((REGNO) & 7) == 0))) \
+   || (GET_MODE_SIZE (MODE) == 16 && ((REGNO) & 3) == 0))) \
: (GET_MODE_SIZE (MODE) <= UNITS_PER_WORD   \
   || (GET_MODE_SIZE (MODE) == 2 * UNITS_PER_WORD   \
  && REGNO) & 1) == 1 && (REGNO) <= 25) || (REGNO) == 28))  \
   || (GET_MODE_SIZE (MODE) == 4 * UNITS_PER_WORD   \
- && ((REGNO) & 3) == 3 && (REGNO) <= 23)   \
-  || (GET_MODE_SIZE (MODE) == 8 * UNITS_PER_WORD   \
- && ((REGNO) & 7) == 3 && (REGNO) <= 19)))
+ && COMPLEX_MODE_P (MODE)  \
+ && ((REGNO) & 3) == 3 && (REGNO) <= 23)))
 
 /* How to renumber registers for gdb.


[gcc r15-5051] hppa: Don't use '%' operator in base14_operand

2024-11-08 Thread John David Anglin via Gcc-cvs
https://gcc.gnu.org/g:c9db5322ae39a49db0728a0a4cb5003efb6ae668

commit r15-5051-gc9db5322ae39a49db0728a0a4cb5003efb6ae668
Author: John David Anglin 
Date:   Fri Nov 8 16:54:48 2024 -0500

hppa: Don't use '%' operator in base14_operand

Division is slow on hppa and mode sizes are powers of 2.  So, we
can use '&' operator to check displacement alignment.

2024-11-08  John David Anglin  

gcc/ChangeLog:

* config/pa/predicates.md (base14_operand): Use '&' operator
instead of '%' to check displacement alignment.

Diff:
---
 gcc/config/pa/predicates.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/config/pa/predicates.md b/gcc/config/pa/predicates.md
index 0defd2282fbf..a27b2b1c78db 100644
--- a/gcc/config/pa/predicates.md
+++ b/gcc/config/pa/predicates.md
@@ -285,7 +285,7 @@
   return false;
 
 default:
-  return (INTVAL (op) % GET_MODE_SIZE (mode)) == 0;
+  return (INTVAL (op) & (GET_MODE_SIZE (mode) - 1)) == 0;
 }
 
   return false;


[gcc r14-10911] hppa: Don't use '%' operator in base14_operand

2024-11-08 Thread John David Anglin via Gcc-cvs
https://gcc.gnu.org/g:3bc7af0e2b06131465b8de560692c7011b45cf22

commit r14-10911-g3bc7af0e2b06131465b8de560692c7011b45cf22
Author: John David Anglin 
Date:   Fri Nov 8 16:54:48 2024 -0500

hppa: Don't use '%' operator in base14_operand

Division is slow on hppa and mode sizes are powers of 2.  So, we
can use '&' operator to check displacement alignment.

2024-11-08  John David Anglin  

gcc/ChangeLog:

* config/pa/predicates.md (base14_operand): Use '&' operator
instead of '%' to check displacement alignment.

Diff:
---
 gcc/config/pa/predicates.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/config/pa/predicates.md b/gcc/config/pa/predicates.md
index 50dffa1138ca..74f75770676e 100644
--- a/gcc/config/pa/predicates.md
+++ b/gcc/config/pa/predicates.md
@@ -285,7 +285,7 @@
   return false;
 
 default:
-  return (INTVAL (op) % GET_MODE_SIZE (mode)) == 0;
+  return (INTVAL (op) & (GET_MODE_SIZE (mode) - 1)) == 0;
 }
 
   return false;


[gcc r15-5052] hppa: Don't allow mode size 32 in hard registers

2024-11-08 Thread John David Anglin via Gcc-cvs
https://gcc.gnu.org/g:7175fece7df50326703e4ca8b49d7cc93a5e8dfe

commit r15-5052-g7175fece7df50326703e4ca8b49d7cc93a5e8dfe
Author: John David Anglin 
Date:   Fri Nov 8 16:58:49 2024 -0500

hppa: Don't allow mode size 32 in hard registers

LRA has problems handling spills for OI mode.  There are issues with
SUBREG support as well.

2024-11-08  John David Anglin  

gcc/ChangeLog:

PR target/117238
* config/pa/pa64-regs.h (PA_HARD_REGNO_MODE_OK): Don't allow
mode size 32.

Diff:
---
 gcc/config/pa/pa64-regs.h | 7 ++-
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/gcc/config/pa/pa64-regs.h b/gcc/config/pa/pa64-regs.h
index 3b9273c28677..90762e119dcf 100644
--- a/gcc/config/pa/pa64-regs.h
+++ b/gcc/config/pa/pa64-regs.h
@@ -157,13 +157,10 @@ along with GCC; see the file COPYING3.  If not see
: FP_REGNO_P (REGNO)
\
  ? (VALID_FP_MODE_P (MODE) \
&& (GET_MODE_SIZE (MODE) <= 8   \
-   || (GET_MODE_SIZE (MODE) == 16 && ((REGNO) & 1) == 0)   \
-   || (GET_MODE_SIZE (MODE) == 32 && ((REGNO) & 3) == 0))) \
+   || (GET_MODE_SIZE (MODE) == 16 && ((REGNO) & 1) == 0))) \
: (GET_MODE_SIZE (MODE) <= UNITS_PER_WORD   \
   || (GET_MODE_SIZE (MODE) == 2 * UNITS_PER_WORD   \
- && REGNO) & 1) == 1 && (REGNO) <= 25) || (REGNO) == 28))  \
-  || (GET_MODE_SIZE (MODE) == 4 * UNITS_PER_WORD   \
- && ((REGNO) & 3) == 3 && (REGNO) <= 23)))
+ && REGNO) & 1) == 1 && (REGNO) <= 25) || (REGNO) == 28
 
 /* How to renumber registers for gdb.


[gcc r15-5048] ibstdc++: Add some further attributes to ::operator new in

2024-11-08 Thread Jakub Jelinek via Libstdc++-cvs
https://gcc.gnu.org/g:80e5be0c7f388cf8b8b321dca436ff529ac76867

commit r15-5048-g80e5be0c7f388cf8b8b321dca436ff529ac76867
Author: Jakub Jelinek 
Date:   Fri Nov 8 22:07:33 2024 +0100

ibstdc++: Add some further attributes to ::operator new in 

I've noticed alloc_align attribute is missing on the non-vector
::operator new with std::align_val_t and const std::nothrow_t&
arguments, this patch adds it.  The last hunk is just
an attempt to make the line shorter.
The first hunk originally added also __alloc_size__ (1) attribute,
but seems that regresses
FAIL: g++.dg/tm/pr46270.C  -std=gnu++98 (test for excess errors)
with
Excess errors:
.../libstdc++-v3/libsupc++/new:137:26: warning: new declaration 'void* 
operator new(std::size_t)' ambiguates built-in declaration 'void* operator 
new(long unsigned int)
+transaction_safe' [-Wbuiltin-declaration-mismatch]
.../libstdc++-v3/libsupc++/new:140:26: warning: new declaration 'void* 
operator new [](std::size_t)' ambiguates built-in declaration 'void* operator 
new [](long unsigned int)
+transaction_safe' [-Wbuiltin-declaration-mismatch]
I must say I have no clue why that happens only in C++98 (C++11 and
above are quiet) and why only with -fgnu-tm, tried to debug that but
am lost.  It is some conflict with the predeclared ::operator new, but
those clearly do have the externally_visible attribute, and alloc_size (1)
attributes:
 extvisattr = build_tree_list (get_identifier ("externally_visible"),
   NULL_TREE);
 newattrs = tree_cons (get_identifier ("alloc_size"),
   build_tree_list (NULL_TREE, integer_one_node),
   extvisattr);
 newtype = cp_build_type_attribute_variant (ptr_ftype_sizetype, 
newattrs);
 newtype = build_exception_variant (newtype, new_eh_spec);
...
tree opnew = push_cp_library_fn (NEW_EXPR, newtype, 0);
DECL_IS_MALLOC (opnew) = 1;
DECL_SET_IS_OPERATOR_NEW (opnew, true);
DECL_IS_REPLACEABLE_OPERATOR (opnew) = 1;
and at C++98 I think libstdc++ doesn't add transaction_safe attribute:
 // Conditionally enable annotations for the Transactional Memory TS on 
C++11.
 // Most of the following conditions are due to limitations in the current
 // implementation.
 #if __cplusplus >= 201103L && _GLIBCXX_USE_CXX11_ABI\
   && _GLIBCXX_USE_DUAL_ABI && __cpp_transactional_memory >= 201500L \
   &&  !_GLIBCXX_FULLY_DYNAMIC_STRING && _GLIBCXX_USE_WEAK_REF   \
   && _GLIBCXX_USE_ALLOCATOR_NEW
 #define _GLIBCXX_TXN_SAFE transaction_safe
 #define _GLIBCXX_TXN_SAFE_DYN transaction_safe_dynamic
 #else
 #define _GLIBCXX_TXN_SAFE
 #define _GLIBCXX_TXN_SAFE_DYN
 #endif
push_cp_library_fn adds transaction_safe attribute whenever -fgnu-tm
is used, regardless of the other conditionals:
   if (flag_tm)
 apply_tm_attr (fn, get_identifier ("transaction_safe"));

Anyway, omitting alloc_size (1) fixes that test and given that the
predeclared operator new already has alloc_size (1) attribute, I think it
can be safely left out.

2024-11-08  Jakub Jelinek  

* libsupc++/new (::operator new, ::operator new[]): Add malloc
attribute where missing.  Add alloc_align attribute when
std::align_val_t is present and where it was missing.  Formatting 
fix.

Diff:
---
 libstdc++-v3/libsupc++/new | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/libstdc++-v3/libsupc++/new b/libstdc++-v3/libsupc++/new
index 4345030071bb..8f8eaf9c3903 100644
--- a/libstdc++-v3/libsupc++/new
+++ b/libstdc++-v3/libsupc++/new
@@ -136,10 +136,10 @@ namespace std
 */
 _GLIBCXX_NODISCARD void* operator new(std::size_t)
   _GLIBCXX_TXN_SAFE _GLIBCXX_THROW (std::bad_alloc)
-  __attribute__((__externally_visible__));
+  __attribute__((__externally_visible__, __malloc__));
 _GLIBCXX_NODISCARD void* operator new[](std::size_t)
   _GLIBCXX_TXN_SAFE _GLIBCXX_THROW (std::bad_alloc)
-  __attribute__((__externally_visible__));
+  __attribute__((__externally_visible__, __malloc__));
 void operator delete(void*) _GLIBCXX_TXN_SAFE _GLIBCXX_USE_NOEXCEPT
   __attribute__((__externally_visible__));
 void operator delete[](void*) _GLIBCXX_TXN_SAFE _GLIBCXX_USE_NOEXCEPT
@@ -169,8 +169,8 @@ _GLIBCXX_NODISCARD void* operator new(std::size_t, 
std::align_val_t)
   _GLIBCXX_TXN_SAFE
   __attribute__((__externally_visible__, __alloc_size__ (1), __alloc_align__ 
(2),  __malloc__));
 _GLIBCXX_NODISCARD void* operator new(std::size_t, std::align_val_t, const 
std::nothrow_t&)
-   _GLIBCXX_TXN_SAFE
-  _GLIBCXX_USE_NOEXCEPT __attribute__((__externally_visible__, __alloc_size__ 
(1), __malloc__));
+  _GLIBCXX_TXN_SAFE _GLIBCXX_USE_NOEXCEPT
+  __attribute__((__externally_visible__, __alloc_size__ (1), __alloc_align__ 
(2

[gcc(refs/users/meissner/heads/work182-test)] Update ChangeLog.*

2024-11-08 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:7a5058cbdd02116e30108c55cc18b21c7071c3fb

commit 7a5058cbdd02116e30108c55cc18b21c7071c3fb
Author: Michael Meissner 
Date:   Fri Nov 8 13:19:50 2024 -0500

Update ChangeLog.*

Diff:
---
 gcc/ChangeLog.test | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/gcc/ChangeLog.test b/gcc/ChangeLog.test
index 6f28010a85b3..3862b4c9794e 100644
--- a/gcc/ChangeLog.test
+++ b/gcc/ChangeLog.test
@@ -1,3 +1,17 @@
+ Branch work182-test, patch #501 
+
+Add power9 and power10 float to logical optimizations.
+
+2024-11-08  Michael Meissner  
+
+gcc/
+
+   PR target/117487
+   * config/rs6000/rs6000.cc (sf_logical_op_p): Delete.
+   * config/rs6000/vsx.md (SFmode logical peephoole): Update comments in
+   the original code that supports power8.  Add a new define_peephole2 to
+   do the optimization on power9/power10.
+
  Branch work182-test, patch #500 
 
 Add debugging for PR 71977-1.c regression.


[gcc r15-5054] Update gcc-auto-profile / gen_autofdo_event.py

2024-11-08 Thread Andi Kleen via Gcc-cvs
https://gcc.gnu.org/g:9c8f3d5e7d5ee64ffe5c50a72b087227f2e8f957

commit r15-5054-g9c8f3d5e7d5ee64ffe5c50a72b087227f2e8f957
Author: Andi Kleen 
Date:   Thu Oct 31 16:31:02 2024 -0700

Update gcc-auto-profile / gen_autofdo_event.py

- Fix warnings with newer python versions about bad escapes by
making all the python string raw.
- Add a fallback for using the builtin perf event list if the
CPU model number is unknown.
- Regenerate the shipped gcc-auto-profile with the changes.

contrib/ChangeLog:

* gen_autofdo_event.py: Convert strings to raw.
Add fallback to using builtin perf event list.

gcc/ChangeLog:

* config/i386/gcc-auto-profile: Regenerate.

Diff:
---
 contrib/gen_autofdo_event.py | 36 
 gcc/config/i386/gcc-auto-profile | 21 ++---
 2 files changed, 34 insertions(+), 23 deletions(-)

diff --git a/contrib/gen_autofdo_event.py b/contrib/gen_autofdo_event.py
index 4c201943b5c7..4e58a5320fff 100755
--- a/contrib/gen_autofdo_event.py
+++ b/contrib/gen_autofdo_event.py
@@ -112,7 +112,7 @@ for j in u:
 u.close()
 
 if args.script:
-print('''#!/bin/sh
+print(r'''#!/bin/sh
 # Profile workload for gcc profile feedback (autofdo) using Linux perf.
 # Auto generated. To regenerate for new CPUs run
 # contrib/gen_autofdo_event.py --script --all in gcc source
@@ -152,22 +152,26 @@ case `grep -E -q "^cpu family\s*: 6" /proc/cpuinfo &&
 for event, mod in eventmap.items():
 for m in mod[:-1]:
 print("model*:\ %s|\\" % m)
-print('model*:\ %s) E="%s$FLAGS" ;;' % (mod[-1], event))
-print('''*)
+print(r'model*:\ %s) E="%s$FLAGS" ;;' % (mod[-1], event))
+print(r'''*)
+if perf list br_inst_retired | grep -q br_inst_retired.near_taken ; 
then
+E=br_inst_retired.near_taken:p
+else
 echo >&2 "Unknown CPU. Run contrib/gen_autofdo_event.py --all --script to 
update script."
-   exit 1 ;;''')
-print("esac")
-print("set -x")
-print('if ! perf record -e $E -b "$@" ; then')
-print('  # PEBS may not actually be working even if the processor supports 
it')
-print('  # (e.g., in a virtual machine). Trying to run without /p.')
-print('  set +x')
-print('  echo >&2 "Retrying without /p."')
-print('  E="$(echo "${E}" | sed -e \'s/\/p/\//\')"')
-print('  set -x')
-print('  exec perf record -e $E -b "$@"')
-print(' set +x')
-print('fi')
+ exit 1
+fi ;;''')
+print(r"esac")
+print(r"set -x")
+print(r'if ! perf record -e $E -b "$@" ; then')
+print(r'  # PEBS may not actually be working even if the processor 
supports it')
+print(r'  # (e.g., in a virtual machine). Trying to run without /p.')
+print(r'  set +x')
+print(r'  echo >&2 "Retrying without /p."')
+print(r'  E="$(echo "${E}" | sed -e \'s/\/p/\//\ -e s/:p//)"')
+print(r'  set -x')
+print(r'  exec perf record -e $E -b "$@"')
+print(r' set +x')
+print(r'fi')
 
 if cpufound == 0 and not args.all:
 sys.exit('CPU %s not found' % cpu)
diff --git a/gcc/config/i386/gcc-auto-profile b/gcc/config/i386/gcc-auto-profile
index 04f7d35dcc51..528b34e42400 100755
--- a/gcc/config/i386/gcc-auto-profile
+++ b/gcc/config/i386/gcc-auto-profile
@@ -82,17 +82,24 @@ model*:\ 126|\
 model*:\ 167|\
 model*:\ 140|\
 model*:\ 141|\
-model*:\ 143|\
-model*:\ 207|\
 model*:\ 106|\
-model*:\ 108) E="cpu/event=0xc4,umask=0x20/p$FLAGS" ;;
+model*:\ 108|\
+model*:\ 173|\
+model*:\ 174) E="cpu/event=0xc4,umask=0x20/$FLAGS" ;;
 model*:\ 134|\
 model*:\ 150|\
-model*:\ 156|\
-model*:\ 190) E="cpu/event=0xc4,umask=0xfe/p$FLAGS" ;;
+model*:\ 156) E="cpu/event=0xc4,umask=0xfe/p$FLAGS" ;;
+model*:\ 143|\
+model*:\ 207) E="cpu/event=0xc4,umask=0x20/p$FLAGS" ;;
+model*:\ 190) E="cpu/event=0xc4,umask=0xc0/$FLAGS" ;;
+model*:\ 190) E="cpu/event=0xc4,umask=0xfe/$FLAGS" ;;
 *)
+if perf list br_inst_retired | grep -q br_inst_retired.near_taken ; 
then
+E=br_inst_retired.near_taken:p
+else
 echo >&2 "Unknown CPU. Run contrib/gen_autofdo_event.py --all --script to 
update script."
-   exit 1 ;;
+ exit 1
+fi ;;
 esac
 set -x
 if ! perf record -e $E -b "$@" ; then
@@ -100,7 +107,7 @@ if ! perf record -e $E -b "$@" ; then
   # (e.g., in a virtual machine). Trying to run without /p.
   set +x
   echo >&2 "Retrying without /p."
-  E="$(echo "${E}" | sed -e 's/\/p/\//')"
+  E="$(echo "${E}" | sed -e \'s/\/p/\//\ -e s/:p//)"
   set -x
   exec perf record -e $E -b "$@"
  set +x


[gcc r15-5047] libstdc++: Make some _Hashtable members inline

2024-11-08 Thread Jonathan Wakely via Libstdc++-cvs
https://gcc.gnu.org/g:7e1d9f58858153bee4bcbab45aa862442859d958

commit r15-5047-g7e1d9f58858153bee4bcbab45aa862442859d958
Author: Jonathan Wakely 
Date:   Fri Nov 1 14:26:38 2024 +

libstdc++: Make some _Hashtable members inline

libstdc++-v3/ChangeLog:

* include/bits/hashtable.h (_Hashtable): Add 'inline' to some
one-line constructors.

Reviewed-by: François Dumont 

Diff:
---
 libstdc++-v3/include/bits/hashtable.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/libstdc++-v3/include/bits/hashtable.h 
b/libstdc++-v3/include/bits/hashtable.h
index 6bcba2de368e..b36142b358a7 100644
--- a/libstdc++-v3/include/bits/hashtable.h
+++ b/libstdc++-v3/include/bits/hashtable.h
@@ -1264,6 +1264,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   typename _Hash, typename _RangeHash, typename _Unused,
   typename _RehashPolicy, typename _Traits>
 template
+  inline
   _Hashtable<_Key, _Value, _Alloc, _ExtractKey, _Equal,
 _Hash, _RangeHash, _Unused, _RehashPolicy, _Traits>::
   _Hashtable(_InputIterator __f, _InputIterator __l,
@@ -1527,6 +1528,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   typename _ExtractKey, typename _Equal,
   typename _Hash, typename _RangeHash, typename _Unused,
   typename _RehashPolicy, typename _Traits>
+inline
 _Hashtable<_Key, _Value, _Alloc, _ExtractKey, _Equal,
   _Hash, _RangeHash, _Unused, _RehashPolicy, _Traits>::
 _Hashtable(const _Hashtable& __ht)
@@ -1582,6 +1584,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   typename _ExtractKey, typename _Equal,
   typename _Hash, typename _RangeHash, typename _Unused,
   typename _RehashPolicy, typename _Traits>
+inline
 _Hashtable<_Key, _Value, _Alloc, _ExtractKey, _Equal,
   _Hash, _RangeHash, _Unused, _RehashPolicy, _Traits>::
 _Hashtable(const _Hashtable& __ht, const allocator_type& __a)


[gcc r15-5036] aarch64: Extend support for the AE family of Cortex CPUs

2024-11-08 Thread Victor Do Nascimento via Gcc-cvs
https://gcc.gnu.org/g:775056616386b7d05f81a413a0ad72c63aa381bf

commit r15-5036-g775056616386b7d05f81a413a0ad72c63aa381bf
Author: Victor Do Nascimento 
Date:   Fri Nov 8 11:09:54 2024 +

aarch64: Extend support for the AE family of Cortex CPUs

Implement -mcpu options for:

  - Cortex-A520AE
  - Cortex-A720AE
  - Cortex-R82AE

These all implement the same feature sets as their non-AE
counterparts, using the same scheduler and costs and differing only in
their respective part numbers.

gcc/ChangeLog:

* config/aarch64/aarch64-cores.def (cortex-a520ae,
cortex-a720ae, cortex-r82ae): Define new entries.
* config/aarch64/aarch64-tune.md: Regenerate.
* doc/invoke.texi: Document A520AE, A720AE and R82AE CPUs.

Diff:
---
 gcc/config/aarch64/aarch64-cores.def | 3 +++
 gcc/config/aarch64/aarch64-tune.md   | 2 +-
 gcc/doc/invoke.texi  | 9 +
 3 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-cores.def 
b/gcc/config/aarch64/aarch64-cores.def
index 8c398596be36..ea435931ccca 100644
--- a/gcc/config/aarch64/aarch64-cores.def
+++ b/gcc/config/aarch64/aarch64-cores.def
@@ -171,6 +171,7 @@ AARCH64_CORE("cortex-a76.cortex-a55",  cortexa76cortexa55, 
cortexa53, V8_2A,  (F
 
 /* Armv8-R Architecture Processors.  */
 AARCH64_CORE("cortex-r82", cortexr82, cortexa53, V8R, (), cortexa53, 0x41, 
0xd15, -1)
+AARCH64_CORE("cortex-r82ae", cortexr82ae, cortexa53, V8R, (), cortexa53, 0x41, 
0xd14, -1)
 
 /* Armv9.0-A Architecture Processors.  */
 
@@ -178,12 +179,14 @@ AARCH64_CORE("cortex-r82", cortexr82, cortexa53, V8R, (), 
cortexa53, 0x41, 0xd15
 AARCH64_CORE("cortex-a510",  cortexa510, cortexa53, V9A,  (SVE2_BITPERM, 
MEMTAG, I8MM, BF16), cortexa53, 0x41, 0xd46, -1)
 
 AARCH64_CORE("cortex-a520",  cortexa520, cortexa53, V9_2A,  (SVE2_BITPERM, 
MEMTAG), cortexa53, 0x41, 0xd80, -1)
+AARCH64_CORE("cortex-a520ae",  cortexa520ae, cortexa53, V9_2A,  (SVE2_BITPERM, 
MEMTAG), cortexa53, 0x41, 0xd88, -1)
 
 AARCH64_CORE("cortex-a710",  cortexa710, cortexa57, V9A,  (SVE2_BITPERM, 
MEMTAG, I8MM, BF16), neoversen2, 0x41, 0xd47, -1)
 
 AARCH64_CORE("cortex-a715",  cortexa715, cortexa57, V9A,  (SVE2_BITPERM, 
MEMTAG, I8MM, BF16), neoversen2, 0x41, 0xd4d, -1)
 
 AARCH64_CORE("cortex-a720",  cortexa720, cortexa57, V9_2A,  (SVE2_BITPERM, 
MEMTAG, PROFILE), neoversen2, 0x41, 0xd81, -1)
+AARCH64_CORE("cortex-a720ae",  cortexa720ae, cortexa57, V9_2A,  (SVE2_BITPERM, 
MEMTAG, PROFILE), neoversen2, 0x41, 0xd89, -1)
 AARCH64_CORE("cortex-a725",  cortexa725, cortexa57, V9_2A, (SVE2_BITPERM, 
MEMTAG, PROFILE), neoversen3, 0x41, 0xd87, -1)
 
 AARCH64_CORE("cortex-x2",  cortexx2, cortexa57, V9A,  (SVE2_BITPERM, MEMTAG, 
I8MM, BF16), neoversen2, 0x41, 0xd48, -1)
diff --git a/gcc/config/aarch64/aarch64-tune.md 
b/gcc/config/aarch64/aarch64-tune.md
index 4423a99f3afe..54c65cbf68df 100644
--- a/gcc/config/aarch64/aarch64-tune.md
+++ b/gcc/config/aarch64/aarch64-tune.md
@@ -1,5 +1,5 @@
 ;; -*- buffer-read-only: t -*-
 ;; Generated automatically by gentune.sh from aarch64-cores.def
 (define_attr "tune"
-   
"cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88,thunderxt88p1,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,ampere1,ampere1a,ampere1b,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa78c,cortexa65,cortexa65ae,cortexx1,cortexx1c,neoversen1,ares,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,fujitsu_monaka,tsv110,thunderx3t110,neoversev1,zeus,neoverse512tvb,saphira,oryon1,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82,cortexa510,cortexa520,cortexa710,cortexa715,cortexa720,cortexa725,cortexx2,cortexx3,cortexx4,cortexx925,neoversen2,cobalt100,neoversen3,neoversev2,grace,neoversev3,neoversev3ae,demeter,generic,generic_armv8_a,generic_armv9_a"
+   
"cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88,thunderxt88p1,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,ampere1,ampere1a,ampere1b,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa78c,cortexa65,cortexa65ae,cortexx1,cortexx1c,neoversen1,ares,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,fujitsu_monaka,tsv110,thunderx3t110,neoversev1,zeus,neoverse512tvb,saphira,oryon1,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82,cortexr82ae,cortexa510,cortexa520,cortexa520ae,cortexa710,cortexa715,cortexa720,cortexa720ae,cortexa725,cortexx2,cortexx3,cortexx4,cortexx925,neoversen2,cobal

[gcc r15-5035] testsuite: arm: Use effective-target for nomve_fp_1 test

2024-11-08 Thread Torbjorn Svensson via Gcc-cvs
https://gcc.gnu.org/g:e8886406fac50f80a521a4100a80517e50e1c388

commit r15-5035-ge8886406fac50f80a521a4100a80517e50e1c388
Author: Torbjörn SVENSSON 
Date:   Thu Oct 31 19:11:57 2024 +0100

testsuite: arm: Use effective-target for nomve_fp_1 test

Test uses MVE, so add effective-target arm_fp requirement.

gcc/testsuite/ChangeLog:

* g++.target/arm/mve/general-c++/nomve_fp_1.c: Use
effective-target arm_fp.

Signed-off-by: Torbjörn SVENSSON 

Diff:
---
 gcc/testsuite/g++.target/arm/mve/general-c++/nomve_fp_1.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/gcc/testsuite/g++.target/arm/mve/general-c++/nomve_fp_1.c 
b/gcc/testsuite/g++.target/arm/mve/general-c++/nomve_fp_1.c
index e0692ceb8c83..a2069d353cf9 100644
--- a/gcc/testsuite/g++.target/arm/mve/general-c++/nomve_fp_1.c
+++ b/gcc/testsuite/g++.target/arm/mve/general-c++/nomve_fp_1.c
@@ -1,9 +1,11 @@
 /* { dg-do compile } */
+/* { dg-require-effective-target arm_fp_ok } */
 /* { dg-require-effective-target arm_v8_1m_mve_ok } */
 /* Do not use dg-add-options arm_v8_1m_mve, because this might expand to "",
which could imply mve+fp depending on the user settings. We want to make
sure the '+fp' extension is not enabled.  */
 /* { dg-options "-mfpu=auto -march=armv8.1-m.main+mve" } */
+/* { dg-add-options arm_fp } */
 
 #include 


[gcc r14-10898] testsuite: arm: Use effective-target for nomve_fp_1 test

2024-11-08 Thread Torbjorn Svensson via Gcc-cvs
https://gcc.gnu.org/g:ef7719338423acd0bffc11895c8bb7c78e45c2f9

commit r14-10898-gef7719338423acd0bffc11895c8bb7c78e45c2f9
Author: Torbjörn SVENSSON 
Date:   Thu Oct 31 19:11:57 2024 +0100

testsuite: arm: Use effective-target for nomve_fp_1 test

Test uses MVE, so add effective-target arm_fp requirement.

gcc/testsuite/ChangeLog:

* g++.target/arm/mve/general-c++/nomve_fp_1.c: Use
effective-target arm_fp.

Signed-off-by: Torbjörn SVENSSON 
(cherry picked from commit e8886406fac50f80a521a4100a80517e50e1c388)

Diff:
---
 gcc/testsuite/g++.target/arm/mve/general-c++/nomve_fp_1.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/gcc/testsuite/g++.target/arm/mve/general-c++/nomve_fp_1.c 
b/gcc/testsuite/g++.target/arm/mve/general-c++/nomve_fp_1.c
index e0692ceb8c83..a2069d353cf9 100644
--- a/gcc/testsuite/g++.target/arm/mve/general-c++/nomve_fp_1.c
+++ b/gcc/testsuite/g++.target/arm/mve/general-c++/nomve_fp_1.c
@@ -1,9 +1,11 @@
 /* { dg-do compile } */
+/* { dg-require-effective-target arm_fp_ok } */
 /* { dg-require-effective-target arm_v8_1m_mve_ok } */
 /* Do not use dg-add-options arm_v8_1m_mve, because this might expand to "",
which could imply mve+fp depending on the user settings. We want to make
sure the '+fp' extension is not enabled.  */
 /* { dg-options "-mfpu=auto -march=armv8.1-m.main+mve" } */
+/* { dg-add-options arm_fp } */
 
 #include 


[gcc r15-5041] testsuite: arm: Use effective-target arm_libc_fp_abi for pr68620.c test

2024-11-08 Thread Torbjorn Svensson via Gcc-cvs
https://gcc.gnu.org/g:dc5d559494656c17c4faa99f398047b7d0c33adc

commit r15-5041-gdc5d559494656c17c4faa99f398047b7d0c33adc
Author: Torbjörn SVENSSON 
Date:   Wed Nov 6 07:12:14 2024 +0100

testsuite: arm: Use effective-target arm_libc_fp_abi for pr68620.c test

This fixes reported regression at
https://linaro.atlassian.net/browse/GNU-1407.

gcc/testsuite/ChangeLog:

* gcc.target/arm/pr68620.c: Use effective-target
arm_libc_fp_abi.
* lib/target-supports.exp: Define effective-target
arm_libc_fp_abi.

Signed-off-by: Torbjörn SVENSSON 
Co-authored-by: Richard Earnshaw 

Diff:
---
 gcc/testsuite/gcc.target/arm/pr68620.c |  4 +++-
 gcc/testsuite/lib/target-supports.exp  | 35 ++
 2 files changed, 38 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.target/arm/pr68620.c 
b/gcc/testsuite/gcc.target/arm/pr68620.c
index 6e38671752f2..3ffaa5c5a9c9 100644
--- a/gcc/testsuite/gcc.target/arm/pr68620.c
+++ b/gcc/testsuite/gcc.target/arm/pr68620.c
@@ -1,8 +1,10 @@
 /* { dg-do compile } */
 /* { dg-skip-if "-mpure-code supports M-profile without Neon only" { *-*-* } { 
"-mpure-code" } } */
 /* { dg-require-effective-target arm_arch_v7a_ok } */
-/* { dg-options "-mfp16-format=ieee -mfpu=auto -mfloat-abi=softfp" } */
+/* { dg-require-effective-target arm_libc_fp_abi_ok } */
+/* { dg-options "-mfp16-format=ieee -mfpu=auto" } */
 /* { dg-add-options arm_arch_v7a } */
+/* { dg-add-options arm_libc_fp_abi } */
 
 #include "arm_neon.h"
 
diff --git a/gcc/testsuite/lib/target-supports.exp 
b/gcc/testsuite/lib/target-supports.exp
index 75703ddca608..0c2fd83f45c8 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -4950,6 +4950,41 @@ proc add_options_for_arm_fp { flags } {
 return "$flags $et_arm_fp_flags"
 }
 
+# Some libc headers will only compile correctly if the correct ABI flags
+# are picked for the target environment.  Try to find an ABI setting
+# that works.  Glibc falls into this category.  This test is intended
+# to enable FP as far as possible, so does not try -mfloat-abi=soft.
+proc check_effective_target_arm_libc_fp_abi_ok_nocache { } {
+global et_arm_libc_fp_abi_flags
+set et_arm_libc_fp_abi_flags ""
+if { [check_effective_target_arm32] } {
+   foreach flags {"-mfloat-abi=hard" "-mfloat-abi=softfp"} {
+   if { [check_no_compiler_messages_nocache arm_libc_fp_abi_ok object {
+   #include 
+   } "$flags"] } {
+   set et_arm_libc_fp_abi_flags $flags
+   return 1
+   }
+   }
+}
+return 0
+}
+
+proc  check_effective_target_arm_libc_fp_abi_ok { } {
+return [check_cached_effective_target arm_libc_fp_abi_ok \
+   check_effective_target_arm_libc_fp_abi_ok_nocache]
+}
+
+# Add flags that pick the right ABI for the supported libc headers on
+# this platform.
+proc add_options_for_arm_libc_fp_abi { flags } {
+if { ! [check_effective_target_arm_libc_fp_abi_ok] } {
+   return "$flags"
+}
+global et_arm_libc_fp_abi_flags
+return "$flags $et_arm_libc_fp_abi_flags"
+}
+
 # Return 1 if this is an ARM target defining __ARM_FP with
 # double-precision support. We may need -mfloat-abi=softfp or
 # equivalent options.  Some multilibs may be incompatible with these


[gcc r15-5042] testsuite: arm: Use check-function-bodies in epilog-1.c test

2024-11-08 Thread Torbjorn Svensson via Gcc-cvs
https://gcc.gnu.org/g:ec86e87439b4a5cf73da6f318757f3561f9f278a

commit r15-5042-gec86e87439b4a5cf73da6f318757f3561f9f278a
Author: Torbjörn SVENSSON 
Date:   Thu Nov 7 20:09:48 2024 +0100

testsuite: arm: Use check-function-bodies in epilog-1.c test

Update test case for armv8.1-m.main that supports conditional
arithmetic.

armv7-m:
push{r4, lr}
ldr r4, .L6
ldr r4, [r4]
lslsr4, r4, #29
it  mi
addmi   r2, r2, #1
bl  bar
movsr0, #0
pop {r4, pc}

armv8.1-m.main:
push{r3, r4, r5, lr}
ldr r4, .L5
ldr r5, [r4]
tst r5, #4
csinc   r2, r2, r2, eq
bl  bar
movsr0, #0
pop {r3, r4, r5, pc}

gcc/testsuite/ChangeLog:

* gcc.target/arm/epilog-1.c: Use check-function-bodies.

Signed-off-by: Torbjörn SVENSSON 

Diff:
---
 gcc/testsuite/gcc.target/arm/epilog-1.c | 24 +---
 1 file changed, 21 insertions(+), 3 deletions(-)

diff --git a/gcc/testsuite/gcc.target/arm/epilog-1.c 
b/gcc/testsuite/gcc.target/arm/epilog-1.c
index f97f1ebeaaf3..a15164564606 100644
--- a/gcc/testsuite/gcc.target/arm/epilog-1.c
+++ b/gcc/testsuite/gcc.target/arm/epilog-1.c
@@ -2,16 +2,34 @@
 /* { dg-do compile } */
 /* { dg-options "-mthumb -Os" } */
 /* { dg-require-effective-target arm_thumb2_ok } */
+/* { dg-final { check-function-bodies "**" "" } } */
 
 volatile int g_k;
 extern void bar(int, int, int, int);
 
+/*
+** foo:
+** ...
+** (
+
+Below block is for non-armv8.1-m.main
+** lslsr[0-9]+, r[0-9]+, #29
+** it  mi
+** addmi   r2, r2, #1
+
+** |
+
+Below block is for armv8.1-m.main
+** tst r[0-9]+, #4
+** csinc   r2, r2, r2, eq
+
+** )
+** bl  bar
+** ...
+*/
 int foo(int a, int b, int c, int d)
 {
   if (g_k & 4) c++;
   bar (a, b, c, d);
   return 0;
 }
-
-/* { dg-final { scan-assembler-times "lsls.*#29" 1 } } */
-/* { dg-final { scan-assembler-not "tst" } } */


[gcc r15-5039] testsuite: arm: Use effective-target for pr84556.cc test

2024-11-08 Thread Torbjorn Svensson via Gcc-cvs
https://gcc.gnu.org/g:85c3d944800257248ab48cdc75c2c02fadf63c73

commit r15-5039-g85c3d944800257248ab48cdc75c2c02fadf63c73
Author: Torbjörn SVENSSON 
Date:   Wed Nov 6 10:28:34 2024 +0100

testsuite: arm: Use effective-target for pr84556.cc test

Using "dg-do run" with a selector overrides the default selector set by
vect.exp that picks between "dg-do run" and "dg-do compile" based on the
target's support for simd operations for Arm targets.
The actual selection of default operation is performed in
check_vect_support_and_set_flags.

gcc/testsuite/ChangeLog:

* g++.dg/vect/pr84556.cc: Change from "dg-do run" with selector
to instead use dg-require-effective-target with the same
selector.

Signed-off-by: Torbjörn SVENSSON 

Diff:
---
 gcc/testsuite/g++.dg/vect/pr84556.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/g++.dg/vect/pr84556.cc 
b/gcc/testsuite/g++.dg/vect/pr84556.cc
index 6b1c9cec515a..c7e331628a88 100644
--- a/gcc/testsuite/g++.dg/vect/pr84556.cc
+++ b/gcc/testsuite/g++.dg/vect/pr84556.cc
@@ -1,5 +1,5 @@
 // PR c++/84556
-// { dg-do run { target c++11 } }
+// { dg-require-effective-target c++11 }
 // { dg-additional-options "-O2 -fopenmp-simd" }
 // { dg-additional-options "-mavx" { target avx_runtime } }


[gcc r15-5040] testsuite: arm: Allow vst1.32 instruction in pr40457-2.c

2024-11-08 Thread Torbjorn Svensson via Gcc-cvs
https://gcc.gnu.org/g:636b8aeacd182351313381636ecbf8dcef1ee45a

commit r15-5040-g636b8aeacd182351313381636ecbf8dcef1ee45a
Author: Torbjörn SVENSSON 
Date:   Thu Nov 7 18:05:19 2024 +0100

testsuite: arm: Allow vst1.32 instruction in pr40457-2.c

When building the test case with neon, the 'vst1.32' instruction is used
instead of 'strd'. Allow both variants to make the test pass.

gcc/testsuite/ChangeLog:

* gcc.target/arm/pr40457-2.c: Add vst1.32 as an allowed
instruction.

Signed-off-by: Torbjörn SVENSSON 

Diff:
---
 gcc/testsuite/gcc.target/arm/pr40457-2.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.target/arm/pr40457-2.c 
b/gcc/testsuite/gcc.target/arm/pr40457-2.c
index 31624d35127d..5f742a3029af 100644
--- a/gcc/testsuite/gcc.target/arm/pr40457-2.c
+++ b/gcc/testsuite/gcc.target/arm/pr40457-2.c
@@ -7,4 +7,4 @@ void foo(int* p)
   p[1] = 0;
 }
 
-/* { dg-final { scan-assembler "strd|stm" } } */
+/* { dg-final { scan-assembler "strd|stm|vst1\\.32" } } */


[gcc r14-10900] testsuite: arm: Allow vst1.32 instruction in pr40457-2.c

2024-11-08 Thread Torbjorn Svensson via Gcc-cvs
https://gcc.gnu.org/g:82191dec727fdc4740a7fffce01002a2dcfb3b6f

commit r14-10900-g82191dec727fdc4740a7fffce01002a2dcfb3b6f
Author: Torbjörn SVENSSON 
Date:   Thu Nov 7 18:05:19 2024 +0100

testsuite: arm: Allow vst1.32 instruction in pr40457-2.c

When building the test case with neon, the 'vst1.32' instruction is used
instead of 'strd'. Allow both variants to make the test pass.

gcc/testsuite/ChangeLog:

* gcc.target/arm/pr40457-2.c: Add vst1.32 as an allowed
instruction.

Signed-off-by: Torbjörn SVENSSON 
(cherry picked from commit 636b8aeacd182351313381636ecbf8dcef1ee45a)

Diff:
---
 gcc/testsuite/gcc.target/arm/pr40457-2.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.target/arm/pr40457-2.c 
b/gcc/testsuite/gcc.target/arm/pr40457-2.c
index 31624d35127d..5f742a3029af 100644
--- a/gcc/testsuite/gcc.target/arm/pr40457-2.c
+++ b/gcc/testsuite/gcc.target/arm/pr40457-2.c
@@ -7,4 +7,4 @@ void foo(int* p)
   p[1] = 0;
 }
 
-/* { dg-final { scan-assembler "strd|stm" } } */
+/* { dg-final { scan-assembler "strd|stm|vst1\\.32" } } */


[gcc r14-10901] testsuite: arm: Use effective-target arm_libc_fp_abi for pr68620.c test

2024-11-08 Thread Torbjorn Svensson via Gcc-cvs
https://gcc.gnu.org/g:29284becc5cbfdf3a474b75087e71812c1e70de1

commit r14-10901-g29284becc5cbfdf3a474b75087e71812c1e70de1
Author: Torbjörn SVENSSON 
Date:   Wed Nov 6 07:12:14 2024 +0100

testsuite: arm: Use effective-target arm_libc_fp_abi for pr68620.c test

This fixes reported regression at
https://linaro.atlassian.net/browse/GNU-1407.

gcc/testsuite/ChangeLog:

* gcc.target/arm/pr68620.c: Use effective-target
arm_libc_fp_abi.
* lib/target-supports.exp: Define effective-target
arm_libc_fp_abi.

Signed-off-by: Torbjörn SVENSSON 
Co-authored-by: Richard Earnshaw 
(cherry picked from commit dc5d559494656c17c4faa99f398047b7d0c33adc)

Diff:
---
 gcc/testsuite/gcc.target/arm/pr68620.c |  4 +++-
 gcc/testsuite/lib/target-supports.exp  | 35 ++
 2 files changed, 38 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.target/arm/pr68620.c 
b/gcc/testsuite/gcc.target/arm/pr68620.c
index 6e38671752f2..3ffaa5c5a9c9 100644
--- a/gcc/testsuite/gcc.target/arm/pr68620.c
+++ b/gcc/testsuite/gcc.target/arm/pr68620.c
@@ -1,8 +1,10 @@
 /* { dg-do compile } */
 /* { dg-skip-if "-mpure-code supports M-profile without Neon only" { *-*-* } { 
"-mpure-code" } } */
 /* { dg-require-effective-target arm_arch_v7a_ok } */
-/* { dg-options "-mfp16-format=ieee -mfpu=auto -mfloat-abi=softfp" } */
+/* { dg-require-effective-target arm_libc_fp_abi_ok } */
+/* { dg-options "-mfp16-format=ieee -mfpu=auto" } */
 /* { dg-add-options arm_arch_v7a } */
+/* { dg-add-options arm_libc_fp_abi } */
 
 #include "arm_neon.h"
 
diff --git a/gcc/testsuite/lib/target-supports.exp 
b/gcc/testsuite/lib/target-supports.exp
index d8b5a06a4224..6b8603b1af86 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -4733,6 +4733,41 @@ proc add_options_for_arm_fp { flags } {
 return "$flags $et_arm_fp_flags"
 }
 
+# Some libc headers will only compile correctly if the correct ABI flags
+# are picked for the target environment.  Try to find an ABI setting
+# that works.  Glibc falls into this category.  This test is intended
+# to enable FP as far as possible, so does not try -mfloat-abi=soft.
+proc check_effective_target_arm_libc_fp_abi_ok_nocache { } {
+global et_arm_libc_fp_abi_flags
+set et_arm_libc_fp_abi_flags ""
+if { [check_effective_target_arm32] } {
+   foreach flags {"-mfloat-abi=hard" "-mfloat-abi=softfp"} {
+   if { [check_no_compiler_messages_nocache arm_libc_fp_abi_ok object {
+   #include 
+   } "$flags"] } {
+   set et_arm_libc_fp_abi_flags $flags
+   return 1
+   }
+   }
+}
+return 0
+}
+
+proc  check_effective_target_arm_libc_fp_abi_ok { } {
+return [check_cached_effective_target arm_libc_fp_abi_ok \
+   check_effective_target_arm_libc_fp_abi_ok_nocache]
+}
+
+# Add flags that pick the right ABI for the supported libc headers on
+# this platform.
+proc add_options_for_arm_libc_fp_abi { flags } {
+if { ! [check_effective_target_arm_libc_fp_abi_ok] } {
+   return "$flags"
+}
+global et_arm_libc_fp_abi_flags
+return "$flags $et_arm_libc_fp_abi_flags"
+}
+
 # Return 1 if this is an ARM target defining __ARM_FP with
 # double-precision support. We may need -mfloat-abi=softfp or
 # equivalent options.  Some multilibs may be incompatible with these


[gcc r14-10902] testsuite: arm: Use check-function-bodies in epilog-1.c test

2024-11-08 Thread Torbjorn Svensson via Gcc-cvs
https://gcc.gnu.org/g:724446556e5cf2686a12076d282214f02119beb4

commit r14-10902-g724446556e5cf2686a12076d282214f02119beb4
Author: Torbjörn SVENSSON 
Date:   Thu Nov 7 20:09:48 2024 +0100

testsuite: arm: Use check-function-bodies in epilog-1.c test

Update test case for armv8.1-m.main that supports conditional
arithmetic.

armv7-m:
push{r4, lr}
ldr r4, .L6
ldr r4, [r4]
lslsr4, r4, #29
it  mi
addmi   r2, r2, #1
bl  bar
movsr0, #0
pop {r4, pc}

armv8.1-m.main:
push{r3, r4, r5, lr}
ldr r4, .L5
ldr r5, [r4]
tst r5, #4
csinc   r2, r2, r2, eq
bl  bar
movsr0, #0
pop {r3, r4, r5, pc}

gcc/testsuite/ChangeLog:

* gcc.target/arm/epilog-1.c: Use check-function-bodies.

Signed-off-by: Torbjörn SVENSSON 
(cherry picked from commit ec86e87439b4a5cf73da6f318757f3561f9f278a)

Diff:
---
 gcc/testsuite/gcc.target/arm/epilog-1.c | 24 +---
 1 file changed, 21 insertions(+), 3 deletions(-)

diff --git a/gcc/testsuite/gcc.target/arm/epilog-1.c 
b/gcc/testsuite/gcc.target/arm/epilog-1.c
index f97f1ebeaaf3..a15164564606 100644
--- a/gcc/testsuite/gcc.target/arm/epilog-1.c
+++ b/gcc/testsuite/gcc.target/arm/epilog-1.c
@@ -2,16 +2,34 @@
 /* { dg-do compile } */
 /* { dg-options "-mthumb -Os" } */
 /* { dg-require-effective-target arm_thumb2_ok } */
+/* { dg-final { check-function-bodies "**" "" } } */
 
 volatile int g_k;
 extern void bar(int, int, int, int);
 
+/*
+** foo:
+** ...
+** (
+
+Below block is for non-armv8.1-m.main
+** lslsr[0-9]+, r[0-9]+, #29
+** it  mi
+** addmi   r2, r2, #1
+
+** |
+
+Below block is for armv8.1-m.main
+** tst r[0-9]+, #4
+** csinc   r2, r2, r2, eq
+
+** )
+** bl  bar
+** ...
+*/
 int foo(int a, int b, int c, int d)
 {
   if (g_k & 4) c++;
   bar (a, b, c, d);
   return 0;
 }
-
-/* { dg-final { scan-assembler-times "lsls.*#29" 1 } } */
-/* { dg-final { scan-assembler-not "tst" } } */


[gcc r14-10899] testsuite: arm: Use effective-target for pr84556.cc test

2024-11-08 Thread Torbjorn Svensson via Gcc-cvs
https://gcc.gnu.org/g:8cf9b2657046421327956ff972e70d348c06ae1a

commit r14-10899-g8cf9b2657046421327956ff972e70d348c06ae1a
Author: Torbjörn SVENSSON 
Date:   Wed Nov 6 10:28:34 2024 +0100

testsuite: arm: Use effective-target for pr84556.cc test

Using "dg-do run" with a selector overrides the default selector set by
vect.exp that picks between "dg-do run" and "dg-do compile" based on the
target's support for simd operations for Arm targets.
The actual selection of default operation is performed in
check_vect_support_and_set_flags.

gcc/testsuite/ChangeLog:

* g++.dg/vect/pr84556.cc: Change from "dg-do run" with selector
to instead use dg-require-effective-target with the same
selector.

Signed-off-by: Torbjörn SVENSSON 
(cherry picked from commit 85c3d944800257248ab48cdc75c2c02fadf63c73)

Diff:
---
 gcc/testsuite/g++.dg/vect/pr84556.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/g++.dg/vect/pr84556.cc 
b/gcc/testsuite/g++.dg/vect/pr84556.cc
index 6b1c9cec515a..c7e331628a88 100644
--- a/gcc/testsuite/g++.dg/vect/pr84556.cc
+++ b/gcc/testsuite/g++.dg/vect/pr84556.cc
@@ -1,5 +1,5 @@
 // PR c++/84556
-// { dg-do run { target c++11 } }
+// { dg-require-effective-target c++11 }
 // { dg-additional-options "-O2 -fopenmp-simd" }
 // { dg-additional-options "-mavx" { target avx_runtime } }


[gcc r15-5037] libstdc++: Simplify __detail::__distance_fw using 'if constexpr'

2024-11-08 Thread Jonathan Wakely via Libstdc++-cvs
https://gcc.gnu.org/g:b907cde027dc65af5842c06c50cfa12e2f58133b

commit r15-5037-gb907cde027dc65af5842c06c50cfa12e2f58133b
Author: Jonathan Wakely 
Date:   Fri Nov 1 12:38:29 2024 +

libstdc++: Simplify __detail::__distance_fw using 'if constexpr'

This uses 'if constexpr' instead of tag dispatching, removing the need
for a second call using that tag, and simplifying the overload set that
needs to be resolved for calls to __distance_fw.

libstdc++-v3/ChangeLog:

* include/bits/hashtable_policy.h (__distance_fw): Replace tag
dispatching with 'if constexpr'.

Diff:
---
 libstdc++-v3/include/bits/hashtable_policy.h | 24 ++--
 1 file changed, 10 insertions(+), 14 deletions(-)

diff --git a/libstdc++-v3/include/bits/hashtable_policy.h 
b/libstdc++-v3/include/bits/hashtable_policy.h
index e5ad85ed9f1a..ecf50313d09c 100644
--- a/libstdc++-v3/include/bits/hashtable_policy.h
+++ b/libstdc++-v3/include/bits/hashtable_policy.h
@@ -62,25 +62,21 @@ namespace __detail
   typename _Unused, typename _Traits>
 struct _Hashtable_base;
 
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wc++17-extensions" // if constexpr
   // Helper function: return distance(first, last) for forward
   // iterators, or 0/1 for input iterators.
-  template
-inline typename std::iterator_traits<_Iterator>::difference_type
-__distance_fw(_Iterator __first, _Iterator __last,
- std::input_iterator_tag)
-{ return __first != __last ? 1 : 0; }
-
-  template
-inline typename std::iterator_traits<_Iterator>::difference_type
-__distance_fw(_Iterator __first, _Iterator __last,
- std::forward_iterator_tag)
-{ return std::distance(__first, __last); }
-
   template
 inline typename std::iterator_traits<_Iterator>::difference_type
 __distance_fw(_Iterator __first, _Iterator __last)
-{ return __distance_fw(__first, __last,
-  std::__iterator_category(__first)); }
+{
+  using _Cat = typename std::iterator_traits<_Iterator>::iterator_category;
+  if constexpr (is_convertible<_Cat, forward_iterator_tag>::value)
+   return std::distance(__first, __last);
+  else
+   return __first != __last ? 1 : 0;
+}
+#pragma GCC diagnostic pop
 
   struct _Identity
   {


[gcc r15-5044] arm: Improvements to arm_noce_conversion_profitable_p call [PR 116444]

2024-11-08 Thread Andre Simoes Dias Vieira via Gcc-cvs
https://gcc.gnu.org/g:1e8396464cb990d554c932cd959742b86660a25a

commit r15-5044-g1e8396464cb990d554c932cd959742b86660a25a
Author: Andre Simoes Dias Vieira 
Date:   Fri Nov 8 13:34:57 2024 +

arm: Improvements to arm_noce_conversion_profitable_p call [PR 116444]

When not dealing with the special armv8.1-m.main conditional instructions 
case
make sure it uses the default_noce_conversion_profitable_p call to determine
whether the sequence is cost effective.

Also make sure arm_noce_conversion_profitable_p accepts vsel patterns 
for
Armv8.1-M Mainline targets.

gcc/ChangeLog:

PR target/116444
* config/arm/arm.cc (arm_noce_conversion_profitable_p): Call
default_noce_conversion_profitable_p when not dealing with the
armv8.1-m.main special case.
(arm_is_vsel_fp_insn): New function.

Diff:
---
 gcc/config/arm/arm.cc | 59 ---
 1 file changed, 56 insertions(+), 3 deletions(-)

diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc
index 6f11b6c816d4..0f72f3a90312 100644
--- a/gcc/config/arm/arm.cc
+++ b/gcc/config/arm/arm.cc
@@ -36099,10 +36099,58 @@ arm_get_mask_mode (machine_mode mode)
   return default_get_mask_mode (mode);
 }
 
+/* Helper function to determine whether SEQ represents a sequence of
+   instructions representing the vsel floating point instructions.
+   This is an heuristic to check whether the proposed optimisation is desired,
+   the choice has no consequence for correctness.  */
+static bool
+arm_is_vsel_fp_insn (rtx_insn *seq)
+{
+  rtx_insn *curr_insn = seq;
+  rtx set = NULL_RTX;
+  /* The pattern may start with a simple set with register operands.  Skip
+ through any of those.  */
+  while (curr_insn)
+{
+  set = single_set (curr_insn);
+  if (!set
+ || !REG_P (SET_DEST (set)))
+   return false;
+
+  if (!REG_P (SET_SRC (set)))
+   break;
+  curr_insn = NEXT_INSN (curr_insn);
+}
+
+  if (!set)
+return false;
+
+  /* The next instruction should be a compare.  */
+  if (!REG_P (SET_DEST (set))
+  || GET_CODE (SET_SRC (set)) != COMPARE)
+return false;
+
+  curr_insn = NEXT_INSN (curr_insn);
+  if (!curr_insn)
+return false;
+
+  /* And the last instruction should be an IF_THEN_ELSE.  */
+  set = single_set (curr_insn);
+  if (!set
+  || !REG_P (SET_DEST (set))
+  || GET_CODE (SET_SRC (set)) != IF_THEN_ELSE)
+return false;
+
+  return !NEXT_INSN (curr_insn);
+}
+
+
 /* Helper function to determine whether SEQ represents a sequence of
instructions representing the Armv8.1-M Mainline conditional arithmetic
instructions: csinc, csneg and csinv. The cinc instruction is generated
-   using a different mechanism.  */
+   using a different mechanism.
+   This is an heuristic to check whether the proposed optimisation is desired,
+   the choice has no consequence for correctness.  */
 
 static bool
 arm_is_v81m_cond_insn (rtx_insn *seq)
@@ -36171,15 +36219,20 @@ arm_is_v81m_cond_insn (rtx_insn *seq)
hook to only allow "noce" to generate the patterns that are profitable.  */
 
 bool
-arm_noce_conversion_profitable_p (rtx_insn *seq, struct noce_if_info *)
+arm_noce_conversion_profitable_p (rtx_insn *seq, struct noce_if_info *if_info)
 {
   if (!TARGET_COND_ARITH
   || reload_completed)
-return true;
+return default_noce_conversion_profitable_p (seq, if_info);
 
   if (arm_is_v81m_cond_insn (seq))
 return true;
 
+  /* Look for vsel opportunities as we still want to codegen these for
+ Armv8.1-M Mainline targets.  */
+  if (arm_is_vsel_fp_insn (seq))
+return true;
+
   return false;
 }


[gcc r15-5038] Enable gcc.dg/vect/vect-early-break_21.c on x86_64

2024-11-08 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:a9c31c2e76d9b63225448375a72a80591b43c7a6

commit r15-5038-ga9c31c2e76d9b63225448375a72a80591b43c7a6
Author: Richard Biener 
Date:   Fri Nov 8 12:44:47 2024 +0100

Enable gcc.dg/vect/vect-early-break_21.c on x86_64

The following also enables the testcase on x86 as it now has the
required cbranch.

* gcc.dg/vect/vect-early-break_21.c: Remove disabling of
x86_64 and i?86.

Diff:
---
 gcc/testsuite/gcc.dg/vect/vect-early-break_21.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_21.c 
b/gcc/testsuite/gcc.dg/vect/vect-early-break_21.c
index dbe3f8265115..f73f3c2eb86e 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-early-break_21.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_21.c
@@ -5,7 +5,7 @@
 
 /* { dg-additional-options "-Ofast" } */
 
-/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" { target { ! 
"x86_64-*-* i?86-*-*" } } } } */
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
 
 #include 


[gcc r15-5043] c++: Fix ICE on constexpr virtual function [PR117317]

2024-11-08 Thread Jakub Jelinek via Gcc-cvs
https://gcc.gnu.org/g:5ff9e21c1ec81f8288e74679547e56051e051975

commit r15-5043-g5ff9e21c1ec81f8288e74679547e56051e051975
Author: Jakub Jelinek 
Date:   Fri Nov 8 13:36:05 2024 +0100

c++: Fix ICE on constexpr virtual function [PR117317]

Since C++20 virtual methods can be constexpr, and if they are
constexpr evaluated, we choose tentative_decl_linkage for those
defer their output and decide at_eof again.
On the following testcases we ICE though, because if
expand_or_defer_fn_1 decides to use tentative_decl_linkage, it
returns true and the caller in that case cals emit_associated_thunks,
where use_thunk which it calls asserts DECL_INTERFACE_KNOWN on the
thunk destination, which isn't the case for tentative_decl_linkage.

The following patch fixes the ICE by not emitting the thunks
for the DECL_DEFER_OUTPUT fns just yet but waiting until at_eof
time when we return to those.
Note, the second testcase ICEs already since r0-110035 with -std=c++0x
before it gets a chance to diagnose constexpr virtual method.

2024-11-08  Jakub Jelinek  

PR c++/117317
* semantics.cc (emit_associated_thunks): Do nothing for
!DECL_INTERFACE_KNOWN && DECL_DEFER_OUTPUT fns.

* g++.dg/cpp2a/pr117317-1.C: New test.
* g++.dg/cpp2a/pr117317-2.C: New test.

Diff:
---
 gcc/cp/semantics.cc |  5 -
 gcc/testsuite/g++.dg/cpp2a/pr117317-1.C | 19 +++
 gcc/testsuite/g++.dg/cpp2a/pr117317-2.C | 15 +++
 3 files changed, 38 insertions(+), 1 deletion(-)

diff --git a/gcc/cp/semantics.cc b/gcc/cp/semantics.cc
index ab8614e376d6..cb2b1543462c 100644
--- a/gcc/cp/semantics.cc
+++ b/gcc/cp/semantics.cc
@@ -5150,7 +5150,10 @@ emit_associated_thunks (tree fn)
  enabling you to output all the thunks with the function itself.  */
   if (DECL_VIRTUAL_P (fn)
   /* Do not emit thunks for extern template instantiations.  */
-  && ! DECL_REALLY_EXTERN (fn))
+  && ! DECL_REALLY_EXTERN (fn)
+  /* Do not emit thunks for tentative decls, those will be processed
+again at_eof if really needed.  */
+  && (DECL_INTERFACE_KNOWN (fn) || !DECL_DEFER_OUTPUT (fn)))
 {
   tree thunk;
 
diff --git a/gcc/testsuite/g++.dg/cpp2a/pr117317-1.C 
b/gcc/testsuite/g++.dg/cpp2a/pr117317-1.C
new file mode 100644
index ..f3ef3849d033
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp2a/pr117317-1.C
@@ -0,0 +1,19 @@
+// PR c++/117317
+// { dg-do compile { target c++20 } }
+
+struct C {
+  constexpr bool operator== (const C &b) const { return foo (); }
+  constexpr virtual bool foo () const = 0;
+};
+class A : public C {};
+class B : public C {};
+template 
+struct D : A, B
+{
+  constexpr bool operator== (const D &) const = default;
+  constexpr bool foo () const override { return true; }
+};
+struct E : D<1> {};
+constexpr E e;
+constexpr E f;
+static_assert (e == f, "");
diff --git a/gcc/testsuite/g++.dg/cpp2a/pr117317-2.C 
b/gcc/testsuite/g++.dg/cpp2a/pr117317-2.C
new file mode 100644
index ..44f2ec601bbf
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp2a/pr117317-2.C
@@ -0,0 +1,15 @@
+// PR c++/117317
+// { dg-do compile { target c++20 } }
+
+struct C {
+  constexpr virtual bool foo () const = 0;
+};
+struct A : public C {};
+struct B : public C {};
+template 
+struct D : A, B
+{
+  constexpr bool foo () const override { return true; }
+};
+constexpr D<0> d;
+static_assert (d.foo (), "");


[gcc r14-10906] aarch64: Restrict FCLAMP to SME2

2024-11-08 Thread Richard Sandiford via Gcc-cvs
https://gcc.gnu.org/g:d228af5cbc2f635d0837ed67fe95641d6e567aff

commit r14-10906-gd228af5cbc2f635d0837ed67fe95641d6e567aff
Author: Richard Sandiford 
Date:   Fri Nov 8 14:07:46 2024 +

aarch64: Restrict FCLAMP to SME2

There are two sets of patterns for FCLAMP: one set for single registers
and one set for multiple registers.  The multiple-register set was
correctly gated on SME2, but the single-register set only required SME.
This doesn't matter for ACLE usage, since the intrinsic definitions
are correctly gated.  But it does matter for automatic generation of
FCLAMP from separate minimum and maximum operations (either ACLE
intrinsics or autovectorised code).

gcc/
* config/aarch64/aarch64-sve2.md (@aarch64_sve_fclamp)
(*aarch64_sve_fclamp_x): Require TARGET_STREAMING_SME2
rather than TARGET_STREAMING_SME.

gcc/testsuite/
* gcc.target/aarch64/sme/clamp_3.c: Force sme2
* gcc.target/aarch64/sme/clamp_4.c: Likewise.
* gcc.target/aarch64/sme/clamp_5.c: New test.

(cherry picked from commit f5962839d6e0c3115931e68d938d9a0cd7a383b1)

Diff:
---
 gcc/config/aarch64/aarch64-sve2.md |  4 ++--
 gcc/testsuite/gcc.target/aarch64/sme/clamp_3.c |  2 ++
 gcc/testsuite/gcc.target/aarch64/sme/clamp_4.c |  2 ++
 gcc/testsuite/gcc.target/aarch64/sme/clamp_5.c | 24 
 4 files changed, 30 insertions(+), 2 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-sve2.md 
b/gcc/config/aarch64/aarch64-sve2.md
index 934e57055d34..bae153b2c8c3 100644
--- a/gcc/config/aarch64/aarch64-sve2.md
+++ b/gcc/config/aarch64/aarch64-sve2.md
@@ -1117,7 +1117,7 @@
 UNSPEC_FMAXNM)
   (match_operand:SVE_FULL_F 3 "register_operand")]
  UNSPEC_FMINNM))]
-  "TARGET_STREAMING_SME"
+  "TARGET_STREAMING_SME2"
   {@ [cons: =0,  1, 2, 3; attrs: movprfx]
  [   w, %0, w, w; * ] fclamp\t%0., %2., 
%3.
  [ ?&w,  w, w, w; yes   ] movprfx\t%0, 
%1\;fclamp\t%0., %2., %3.
@@ -1137,7 +1137,7 @@
 UNSPEC_COND_FMAXNM)
   (match_operand:SVE_FULL_F 3 "register_operand")]
  UNSPEC_COND_FMINNM))]
-  "TARGET_STREAMING_SME"
+  "TARGET_STREAMING_SME2"
   {@ [cons: =0,  1, 2, 3; attrs: movprfx]
  [   w, %0, w, w; * ] #
  [ ?&w,  w, w, w; yes   ] #
diff --git a/gcc/testsuite/gcc.target/aarch64/sme/clamp_3.c 
b/gcc/testsuite/gcc.target/aarch64/sme/clamp_3.c
index 44959f794909..162de6224d58 100644
--- a/gcc/testsuite/gcc.target/aarch64/sme/clamp_3.c
+++ b/gcc/testsuite/gcc.target/aarch64/sme/clamp_3.c
@@ -2,6 +2,8 @@
 
 #include 
 
+#pragma GCC target "+sme2"
+
 #define TEST(TYPE) \
   TYPE \
   tied1_##TYPE(TYPE a, TYPE b, TYPE c) __arm_streaming \
diff --git a/gcc/testsuite/gcc.target/aarch64/sme/clamp_4.c 
b/gcc/testsuite/gcc.target/aarch64/sme/clamp_4.c
index 643b2635b90e..453c82cd8605 100644
--- a/gcc/testsuite/gcc.target/aarch64/sme/clamp_4.c
+++ b/gcc/testsuite/gcc.target/aarch64/sme/clamp_4.c
@@ -2,6 +2,8 @@
 
 #include 
 
+#pragma GCC target "+sme2"
+
 #define TEST(TYPE) \
   TYPE \
   untied_##TYPE(TYPE a, TYPE b, TYPE c, TYPE d) __arm_streaming
\
diff --git a/gcc/testsuite/gcc.target/aarch64/sme/clamp_5.c 
b/gcc/testsuite/gcc.target/aarch64/sme/clamp_5.c
new file mode 100644
index ..7c5464bdc366
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sme/clamp_5.c
@@ -0,0 +1,24 @@
+// { dg-options "-O" }
+
+#include 
+
+#pragma GCC target "+nosme2"
+
+#define TEST(TYPE) \
+  TYPE \
+  tied1_##TYPE(TYPE a, TYPE b, TYPE c) __arm_streaming \
+  {\
+return svminnm_x(svptrue_b8(), svmaxnm_x(svptrue_b8(), a, b), c);  \
+  }\
+   \
+  TYPE \
+  tied2_##TYPE(TYPE a, TYPE b, TYPE c) __arm_streaming \
+  {\
+return svminnm_x(svptrue_b8(), svmaxnm_x(svptrue_b8(), b, a), c);  \
+  }
+
+TEST(svfloat16_t)
+TEST(svfloat32_t)
+TEST(svfloat64_t)
+
+/* { dg-final { scan-assembler-not {\tfclamp\t} } } */


[gcc r14-10907] aarch64: Make PSEL dependent on SME rather than SME2

2024-11-08 Thread Richard Sandiford via Gcc-cvs
https://gcc.gnu.org/g:8681e1ed812e2a59c5512fe8cda383682bc648f1

commit r14-10907-g8681e1ed812e2a59c5512fe8cda383682bc648f1
Author: Richard Sandiford 
Date:   Fri Nov 8 14:07:47 2024 +

aarch64: Make PSEL dependent on SME rather than SME2

The svpsel_lane intrinsics were wrongly classified as SME2+ only,
rather than as base SME intrinsics.  They should always be available
in streaming mode.

gcc/
* config/aarch64/aarch64-sve2.md (@aarch64_sve_psel)
(*aarch64_sve_psel_plus): Require TARGET_STREAMING
rather than TARGET_STREAMING_SME2.

gcc/testsuite/
* gcc.target/aarch64/sme2/acle-asm/psel_lane_b16.c: Move to...
* gcc.target/aarch64/sme/acle-asm/psel_lane_b16.c: ...here.
* gcc.target/aarch64/sme2/acle-asm/psel_lane_b32.c: Move to...
* gcc.target/aarch64/sme/acle-asm/psel_lane_b32.c: ...here.
* gcc.target/aarch64/sme2/acle-asm/psel_lane_b64.c: Move to...
* gcc.target/aarch64/sme/acle-asm/psel_lane_b64.c: ...here.
* gcc.target/aarch64/sme2/acle-asm/psel_lane_b8.c: Move to...
* gcc.target/aarch64/sme/acle-asm/psel_lane_b8.c: ...here.
* gcc.target/aarch64/sme2/acle-asm/psel_lane_c16.c: Move to...
* gcc.target/aarch64/sme/acle-asm/psel_lane_c16.c: ...here.
* gcc.target/aarch64/sme2/acle-asm/psel_lane_c32.c: Move to...
* gcc.target/aarch64/sme/acle-asm/psel_lane_c32.c: ...here.
* gcc.target/aarch64/sme2/acle-asm/psel_lane_c64.c: Move to...
* gcc.target/aarch64/sme/acle-asm/psel_lane_c64.c: ...here.
* gcc.target/aarch64/sme2/acle-asm/psel_lane_c8.c: Move to...
* gcc.target/aarch64/sme/acle-asm/psel_lane_c8.c: ...here.

(cherry picked from commit afd3887262edbdd5d7be5f34658432fd3046a168)

Diff:
---
 gcc/config/aarch64/aarch64-sve2.md| 4 ++--
 .../gcc.target/aarch64/{sme2 => sme}/acle-asm/psel_lane_b16.c | 2 +-
 .../gcc.target/aarch64/{sme2 => sme}/acle-asm/psel_lane_b32.c | 2 +-
 .../gcc.target/aarch64/{sme2 => sme}/acle-asm/psel_lane_b64.c | 2 +-
 .../gcc.target/aarch64/{sme2 => sme}/acle-asm/psel_lane_b8.c  | 2 +-
 .../gcc.target/aarch64/{sme2 => sme}/acle-asm/psel_lane_c16.c | 2 +-
 .../gcc.target/aarch64/{sme2 => sme}/acle-asm/psel_lane_c32.c | 2 +-
 .../gcc.target/aarch64/{sme2 => sme}/acle-asm/psel_lane_c64.c | 2 +-
 .../gcc.target/aarch64/{sme2 => sme}/acle-asm/psel_lane_c8.c  | 2 +-
 9 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-sve2.md 
b/gcc/config/aarch64/aarch64-sve2.md
index bae153b2c8c3..b0bdce1b0c45 100644
--- a/gcc/config/aarch64/aarch64-sve2.md
+++ b/gcc/config/aarch64/aarch64-sve2.md
@@ -418,7 +418,7 @@
   (match_operand:SI 3 "register_operand" "Ucj")
   (const_int BHSD_BITS)]
  UNSPEC_PSEL))]
-  "TARGET_STREAMING_SME2"
+  "TARGET_STREAMING"
   "psel\t%0, %1, %2.[%w3, 0]"
 )
 
@@ -432,7 +432,7 @@
 (match_operand:SI 4 "const_int_operand"))
   (const_int BHSD_BITS)]
  UNSPEC_PSEL))]
-  "TARGET_STREAMING_SME2
+  "TARGET_STREAMING
&& UINTVAL (operands[4]) < 128 / "
   "psel\t%0, %1, %2.[%w3, %4]"
 )
diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/psel_lane_b16.c 
b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/psel_lane_b16.c
similarity index 98%
rename from gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/psel_lane_b16.c
rename to gcc/testsuite/gcc.target/aarch64/sme/acle-asm/psel_lane_b16.c
index 704e9e375f5e..45dda808d2a6 100644
--- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/psel_lane_b16.c
+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/psel_lane_b16.c
@@ -1,6 +1,6 @@
 /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
 
-#include "test_sme2_acle.h"
+#include "test_sme_acle.h"
 
 /*
 ** psel_lane_p0_p2_p7_0:
diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/psel_lane_b32.c 
b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/psel_lane_b32.c
similarity index 98%
rename from gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/psel_lane_b32.c
rename to gcc/testsuite/gcc.target/aarch64/sme/acle-asm/psel_lane_b32.c
index 7d9c7a129ea4..d3d1b7b42cac 100644
--- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/psel_lane_b32.c
+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/psel_lane_b32.c
@@ -1,6 +1,6 @@
 /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
 
-#include "test_sme2_acle.h"
+#include "test_sme_acle.h"
 
 /*
 ** psel_lane_p0_p2_p7_0:
diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/psel_lane_b64.c 
b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/psel_lane_b64.c
similarity index 98%
rename from gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/psel_lane_b64.c
rename to gcc/testsuite/gcc.target/aarch64/sme/acle-asm/psel_lane_b64.c
index a59032a57f61..8c1e014db650 100

[gcc r14-10905] aarch64: Fix folding of degenerate svwhilele case [PR117045]

2024-11-08 Thread Richard Sandiford via Gcc-cvs
https://gcc.gnu.org/g:e49c265c59d7ba73e50fb7fe2784eb2874037642

commit r14-10905-ge49c265c59d7ba73e50fb7fe2784eb2874037642
Author: Richard Sandiford 
Date:   Fri Nov 8 14:07:46 2024 +

aarch64: Fix folding of degenerate svwhilele case [PR117045]

The svwhilele folder mishandled the degenerate case in which
the second argument is the maximum integer.  In that case,
the result is all-true regardless of the first parameter:

  If the second scalar operand is equal to the maximum signed integer
  value then a condition which includes an equality test can never fail
  and the result will be an all-true predicate.

This is because the conceptual "increment the first operand
by 1 after each element" is done modulo the range of the operand.
The GCC code was instead treating it as infinite precision.
whilele_5.c even had a test for the incorrect behaviour.

The easiest fix seemed to be to handle that case specially before
doing constant folding.  This also copes with variable first operands.

gcc/
PR target/116999
PR target/117045
* config/aarch64/aarch64-sve-builtins-base.cc
(svwhilelx_impl::fold): Check for WHILELTs of the minimum value
and WHILELEs of the maximum value.  Fold them to all-false and
all-true respectively.

gcc/testsuite/
PR target/116999
PR target/117045
* gcc.target/aarch64/sve/acle/general/whilele_5.c: Fix bogus
expected result.
* gcc.target/aarch64/sve/acle/general/whilele_11.c: New test.
* gcc.target/aarch64/sve/acle/general/whilele_12.c: Likewise.

(cherry picked from commit 50e7c51b0a0e9dc1d93f829016ae743b4f2e5070)

Diff:
---
 gcc/config/aarch64/aarch64-sve-builtins-base.cc| 11 ++-
 .../aarch64/sve/acle/general/whilele_11.c  | 31 
 .../aarch64/sve/acle/general/whilele_12.c  | 34 ++
 .../aarch64/sve/acle/general/whilele_5.c   |  2 +-
 4 files changed, 76 insertions(+), 2 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc 
b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
index 241a249503fb..2bd084fc75bf 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
@@ -2857,7 +2857,9 @@ public:
 : while_comparison (unspec_for_sint, unspec_for_uint), m_eq_p (eq_p)
   {}
 
-  /* Try to fold a call by treating its arguments as constants of type T.  */
+  /* Try to fold a call by treating its arguments as constants of type T.
+ We have already filtered out the degenerate cases of X .LT. MIN
+ and X .LE. MAX.  */
   template
   gimple *
   fold_type (gimple_folder &f) const
@@ -2913,6 +2915,13 @@ public:
 if (f.vectors_per_tuple () > 1)
   return nullptr;
 
+/* Filter out cases where the condition is always true or always false.  */
+tree arg1 = gimple_call_arg (f.call, 1);
+if (!m_eq_p && operand_equal_p (arg1, TYPE_MIN_VALUE (TREE_TYPE (arg1
+  return f.fold_to_pfalse ();
+if (m_eq_p && operand_equal_p (arg1, TYPE_MAX_VALUE (TREE_TYPE (arg1
+  return f.fold_to_ptrue ();
+
 if (f.type_suffix (1).unsigned_p)
   return fold_type (f);
 else
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilele_11.c 
b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilele_11.c
new file mode 100644
index ..2be9dc5c5347
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilele_11.c
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#include 
+#include 
+
+svbool_t
+f1 (volatile int32_t *ptr)
+{
+  return svwhilelt_b8_s32 (*ptr, INT32_MIN);
+}
+
+svbool_t
+f2 (volatile uint32_t *ptr)
+{
+  return svwhilelt_b16_u32 (*ptr, 0);
+}
+
+svbool_t
+f3 (volatile int64_t *ptr)
+{
+  return svwhilelt_b32_s64 (*ptr, INT64_MIN);
+}
+
+svbool_t
+f4 (volatile uint64_t *ptr)
+{
+  return svwhilelt_b64_u64 (*ptr, 0);
+}
+
+/* { dg-final { scan-assembler-times {\tpfalse\tp[0-9]+\.b\n} 4 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilele_12.c 
b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilele_12.c
new file mode 100644
index ..713065c31453
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilele_12.c
@@ -0,0 +1,34 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#include 
+#include 
+
+svbool_t
+f1 (volatile int32_t *ptr)
+{
+  return svwhilele_b8_s32 (*ptr, INT32_MAX);
+}
+
+svbool_t
+f2 (volatile uint32_t *ptr)
+{
+  return svwhilele_b16_u32 (*ptr, UINT32_MAX);
+}
+
+svbool_t
+f3 (volatile int64_t *ptr)
+{
+  return svwhilele_b32_s64 (*ptr, INT64_MAX);
+}
+
+svbool_t
+f4 (volatile uint64_t *ptr)
+{
+  return svwhilele_b64_u64 (*ptr, UINT64_MAX);
+}
+
+/* { dg-final { scan-assembler {\tptrue\tp[0-9]+\.b(?:, all)\n} } } */
+/* { dg-final { scan

[gcc r14-10903] aarch64: Rename svpext to svpext_lane [PR116371]

2024-11-08 Thread Richard Sandiford via Gcc-cvs
https://gcc.gnu.org/g:66e611619b39b8383bfeafb4b27ef8553c4aab01

commit r14-10903-g66e611619b39b8383bfeafb4b27ef8553c4aab01
Author: Richard Sandiford 
Date:   Fri Nov 8 14:07:45 2024 +

aarch64: Rename svpext to svpext_lane [PR116371]

When implementing the SME2 ACLE, I somehow missed off the _lane
suffix on svpext.

gcc/
PR target/116371
* config/aarch64/aarch64-sve-builtins-sve2.h (svpext): Rename to...
(svpext_lane): ...this.
* config/aarch64/aarch64-sve-builtins-sve2.cc (svpext_impl): Rename
to...
(svpext_lane_impl): ...this and update instantiation accordingly.
* config/aarch64/aarch64-sve-builtins-sve2.def (svpext): Rename 
to...
(svpext_lane): ...this.

gcc/testsuite/
PR target/116371
* gcc.target/aarch64/sme2/acle-asm/pext_c16.c,
gcc.target/aarch64/sme2/acle-asm/pext_c16_x2.c,
gcc.target/aarch64/sme2/acle-asm/pext_c32.c,
gcc.target/aarch64/sme2/acle-asm/pext_c32_x2.c,
gcc.target/aarch64/sme2/acle-asm/pext_c64.c,
gcc.target/aarch64/sme2/acle-asm/pext_c64_x2.c,
gcc.target/aarch64/sme2/acle-asm/pext_c8.c,
gcc.target/aarch64/sme2/acle-asm/pext_c8_x2.c: Replace with...
* gcc.target/aarch64/sme2/acle-asm/pext_lane_c16.c,
gcc.target/aarch64/sme2/acle-asm/pext_lane_c16_x2.c,
gcc.target/aarch64/sme2/acle-asm/pext_lane_c32.c,
gcc.target/aarch64/sme2/acle-asm/pext_lane_c32_x2.c,
gcc.target/aarch64/sme2/acle-asm/pext_lane_c64.c,
gcc.target/aarch64/sme2/acle-asm/pext_lane_c64_x2.c,
gcc.target/aarch64/sme2/acle-asm/pext_lane_c8.c,
gcc.target/aarch64/sme2/acle-asm/pext_lane_c8_x2.c: ...these new 
tests,
testing for svpext_lane instead of svpext.

(cherry picked from commit cc2d29e5f4434a3fd4e0dd93ea4f9857a0309201)

Diff:
---
 gcc/config/aarch64/aarch64-sve-builtins-sve2.cc|  4 +-
 gcc/config/aarch64/aarch64-sve-builtins-sve2.def   |  2 +-
 gcc/config/aarch64/aarch64-sve-builtins-sve2.h |  2 +-
 .../gcc.target/aarch64/sme2/acle-asm/pext_c16.c| 50 
 .../gcc.target/aarch64/sme2/acle-asm/pext_c16_x2.c | 54 --
 .../gcc.target/aarch64/sme2/acle-asm/pext_c32.c| 50 
 .../gcc.target/aarch64/sme2/acle-asm/pext_c32_x2.c | 54 --
 .../gcc.target/aarch64/sme2/acle-asm/pext_c64.c| 50 
 .../gcc.target/aarch64/sme2/acle-asm/pext_c64_x2.c | 54 --
 .../gcc.target/aarch64/sme2/acle-asm/pext_c8.c | 50 
 .../gcc.target/aarch64/sme2/acle-asm/pext_c8_x2.c  | 54 --
 .../aarch64/sme2/acle-asm/pext_lane_c16.c  | 50 
 .../aarch64/sme2/acle-asm/pext_lane_c16_x2.c   | 54 ++
 .../aarch64/sme2/acle-asm/pext_lane_c32.c  | 50 
 .../aarch64/sme2/acle-asm/pext_lane_c32_x2.c   | 54 ++
 .../aarch64/sme2/acle-asm/pext_lane_c64.c  | 50 
 .../aarch64/sme2/acle-asm/pext_lane_c64_x2.c   | 54 ++
 .../aarch64/sme2/acle-asm/pext_lane_c8.c   | 50 
 .../aarch64/sme2/acle-asm/pext_lane_c8_x2.c| 54 ++
 19 files changed, 420 insertions(+), 420 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc 
b/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc
index 06d4d22fc0b2..72fa0d633c5a 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc
@@ -221,7 +221,7 @@ public:
   }
 };
 
-class svpext_impl : public function_base
+class svpext_lane_impl : public function_base
 {
 public:
   rtx
@@ -619,7 +619,7 @@ FUNCTION (svmullt_lane, unspec_based_lane_function, 
(UNSPEC_SMULLT,
 UNSPEC_UMULLT, -1))
 FUNCTION (svnbsl, CODE_FOR_MODE0 (aarch64_sve2_nbsl),)
 FUNCTION (svnmatch, svmatch_svnmatch_impl, (UNSPEC_NMATCH))
-FUNCTION (svpext, svpext_impl,)
+FUNCTION (svpext_lane, svpext_lane_impl,)
 FUNCTION (svpmul, CODE_FOR_MODE0 (aarch64_sve2_pmul),)
 FUNCTION (svpmullb, unspec_based_function, (-1, UNSPEC_PMULLB, -1))
 FUNCTION (svpmullb_pair, unspec_based_function, (-1, UNSPEC_PMULLB_PAIR, -1))
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.def 
b/gcc/config/aarch64/aarch64-sve-builtins-sve2.def
index ef677a74020b..318dfff06f0d 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.def
+++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.def
@@ -263,7 +263,7 @@ DEF_SVE_FUNCTION_GS (svmax, binary_opt_single_n, all_arith, 
x24, none)
 DEF_SVE_FUNCTION_GS (svmaxnm, binary_opt_single_n, all_float, x24, none)
 DEF_SVE_FUNCTION_GS (svmin, binary_opt_single_n, all_arith, x24, none)
 DEF_SVE_FUNCTION_GS (svminnm,

[gcc r14-10904] aarch64: Fix SVE ACLE gimple folds for C++ LTO [PR116629]

2024-11-08 Thread Richard Sandiford via Gcc-cvs
https://gcc.gnu.org/g:ffe00a011720c76f06d9fb2b59ba6f5ec509fab5

commit r14-10904-gffe00a011720c76f06d9fb2b59ba6f5ec509fab5
Author: Richard Sandiford 
Date:   Fri Nov 8 14:07:45 2024 +

aarch64: Fix SVE ACLE gimple folds for C++ LTO [PR116629]

The SVE ACLE code has two ways of handling overloaded functions.
One, used by C, is to define a single dummy function for each unique
overloaded name, with resolve_overloaded_builtin then resolving calls
to real non-overloaded functions.  The other, used by C++, is to
define a separate function for each individual overload.

The builtins harness assigns integer function codes programmatically.
However, LTO requires it to use the same assignment for every
translation unit, regardless of language.  This means that C++ TUs
need to create (unused) slots for the C overloads and that C TUs
need to create (unused) slots for the C++ overloads.

In many ways, it doesn't matter whether the LTO frontend itself
uses the C approach or the C++ approach to defining overloaded
functions, since the LTO frontend never has to resolve source-level
overloading.  However, the C++ approach of defining a separate
function for each overload means that C++ calls never need to
be redirected to a different function.  Calls to an overload
can appear in the LTO dump and survive until expand.  In contrast,
calls to C's dummy overload functions are resolved by the front
end and never survive to LTO (or expand).

Some optimisations work by moving between sibling functions, such as _m
to _x.  If the source function is an overload, the expected destination
function is too.  The LTO frontend needs to define C++ overloads if it
wants to do this optimisation properly for C++.

The PR is about a tree checking failure caused by trying to use a
stubbed-out C++ overload in LTO.  Dealing with that by detecting the
stub (rather than changing which overloads are defined) would have
turned this from an ice-on-valid to a missed optimisation.

In future, it would probably make sense to redirect overloads to
non-overloaded functions during gimple folding, in case that exposes
more CSE opportunities.  But it'd probably be of limited benefit, since
it should be rare for code to mix overloaded and non-overloaded uses of
the same operation.  It also wouldn't be suitable for backports.

gcc/
PR target/116629
* config/aarch64/aarch64-sve-builtins.cc
(function_builder::function_builder): Use direct overloads for LTO.

gcc/testsuite/
PR target/116629
* gcc.target/aarch64/sve/acle/general/pr106326_2.c: New test.

(cherry picked from commit fee3adbac055c3ff2649fed866c66d44ebfcbe90)

Diff:
---
 gcc/config/aarch64/aarch64-sve-builtins.cc |   2 +-
 .../aarch64/sve/acle/general/pr106326_2.c  | 200 +
 2 files changed, 201 insertions(+), 1 deletion(-)

diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc 
b/gcc/config/aarch64/aarch64-sve-builtins.cc
index e0458f7c1f67..afc0c36e620b 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins.cc
@@ -1259,7 +1259,7 @@ function_builder::function_builder (handle_pragma_index 
pragma_index,
bool function_nulls)
 {
   m_overload_type = build_function_type (void_type_node, void_list_node);
-  m_direct_overloads = lang_GNU_CXX ();
+  m_direct_overloads = lang_GNU_CXX () || in_lto_p;
 
   if (initial_indexes[pragma_index] == 0)
 {
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/pr106326_2.c 
b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/pr106326_2.c
new file mode 100644
index ..d312d31f59c0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/pr106326_2.c
@@ -0,0 +1,200 @@
+/* { dg-do link } */
+/* { dg-options "-O2 -flto -shared -fPIC --save-temps" } */
+/* { dg-require-effective-target shared } */
+/* { dg-require-effective-target fpic } */
+
+#include 
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+svint32_t
+add1 (svint32_t x, svint32_t y)
+{
+  return svadd_z (svptrue_b8 (), x, y);
+}
+
+svint32_t
+add2 (svint32_t x, svint32_t y)
+{
+  return svadd_z (svptrue_b16 (), x, y);
+}
+
+svint32_t
+add3 (svint32_t x, svint32_t y)
+{
+  return svadd_z (svptrue_b32 (), x, y);
+}
+
+svint32_t
+add4 (svint32_t x, svint32_t y)
+{
+  return svadd_z (svptrue_b64 (), x, y);
+}
+
+svint32_t
+add5 (svint32_t x, svint32_t y)
+{
+  return svadd_m (svptrue_b8 (), x, y);
+}
+
+svint32_t
+add6 (svint32_t x, svint32_t y)
+{
+  return svadd_m (svptrue_b16 (), x, y);
+}
+
+svint32_t
+add7 (svint32_t x, svint32_t y)
+{
+  return svadd_m (svptrue_b32 (), x, y);
+}
+
+svint32_t
+add8 (svint32_t x, svint32_t y)
+{
+  return svadd_m (svptrue_b64 (), x, y);
+}
+
+svint16_t
+add9 (svint16_t x, svint16_t y)
+{
+

[gcc r14-10908] aarch64: Fix gcc.target/aarch64/sme2/acle-asm/bfmlslb_f32.c

2024-11-08 Thread Richard Sandiford via Gcc-cvs
https://gcc.gnu.org/g:998d05a7b650ae71f8a952e8503e8b5dbe6b4909

commit r14-10908-g998d05a7b650ae71f8a952e8503e8b5dbe6b4909
Author: Richard Sandiford 
Date:   Fri Nov 8 14:07:47 2024 +

aarch64: Fix gcc.target/aarch64/sme2/acle-asm/bfmlslb_f32.c

I missed a search-and-replace on this test, meaning that it was
duplicating bfmlalb_f32.c.

gcc/testsuite/
* gcc.target/aarch64/sme2/acle-asm/bfmlslb_f32.c: Replace bfmla*
with bfmls*

(cherry picked from commit 156f536d54b2f6f41de4719f9b3a8a33273a51a9)

Diff:
---
 .../gcc.target/aarch64/sme2/acle-asm/bfmlslb_f32.c | 60 +++---
 1 file changed, 30 insertions(+), 30 deletions(-)

diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/bfmlslb_f32.c 
b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/bfmlslb_f32.c
index f67316cd33ce..946af545141c 100644
--- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/bfmlslb_f32.c
+++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/bfmlslb_f32.c
@@ -3,63 +3,63 @@
 #include "test_sme2_acle.h"
 
 /*
-** bfmlalb_f32_tied1:
-** bfmlalb z0\.s, z4\.h, z5\.h
+** bfmlslb_f32_tied1:
+** bfmlslb z0\.s, z4\.h, z5\.h
 ** ret
 */
-TEST_DUAL_Z (bfmlalb_f32_tied1, svfloat32_t, svbfloat16_t,
-z0 = svbfmlalb_f32 (z0, z4, z5),
-z0 = svbfmlalb (z0, z4, z5))
+TEST_DUAL_Z (bfmlslb_f32_tied1, svfloat32_t, svbfloat16_t,
+z0 = svbfmlslb_f32 (z0, z4, z5),
+z0 = svbfmlslb (z0, z4, z5))
 
 /*
-** bfmlalb_f32_tied2:
+** bfmlslb_f32_tied2:
 ** mov (z[0-9]+)\.d, z0\.d
 ** movprfx z0, z4
-** bfmlalb z0\.s, \1\.h, z1\.h
+** bfmlslb z0\.s, \1\.h, z1\.h
 ** ret
 */
-TEST_DUAL_Z_REV (bfmlalb_f32_tied2, svfloat32_t, svbfloat16_t,
-z0_res = svbfmlalb_f32 (z4, z0, z1),
-z0_res = svbfmlalb (z4, z0, z1))
+TEST_DUAL_Z_REV (bfmlslb_f32_tied2, svfloat32_t, svbfloat16_t,
+z0_res = svbfmlslb_f32 (z4, z0, z1),
+z0_res = svbfmlslb (z4, z0, z1))
 
 /*
-** bfmlalb_f32_tied3:
+** bfmlslb_f32_tied3:
 ** mov (z[0-9]+)\.d, z0\.d
 ** movprfx z0, z4
-** bfmlalb z0\.s, z1\.h, \1\.h
+** bfmlslb z0\.s, z1\.h, \1\.h
 ** ret
 */
-TEST_DUAL_Z_REV (bfmlalb_f32_tied3, svfloat32_t, svbfloat16_t,
-z0_res = svbfmlalb_f32 (z4, z1, z0),
-z0_res = svbfmlalb (z4, z1, z0))
+TEST_DUAL_Z_REV (bfmlslb_f32_tied3, svfloat32_t, svbfloat16_t,
+z0_res = svbfmlslb_f32 (z4, z1, z0),
+z0_res = svbfmlslb (z4, z1, z0))
 
 /*
-** bfmlalb_f32_untied:
+** bfmlslb_f32_untied:
 ** movprfx z0, z1
-** bfmlalb z0\.s, z4\.h, z5\.h
+** bfmlslb z0\.s, z4\.h, z5\.h
 ** ret
 */
-TEST_DUAL_Z (bfmlalb_f32_untied, svfloat32_t, svbfloat16_t,
-z0 = svbfmlalb_f32 (z1, z4, z5),
-z0 = svbfmlalb (z1, z4, z5))
+TEST_DUAL_Z (bfmlslb_f32_untied, svfloat32_t, svbfloat16_t,
+z0 = svbfmlslb_f32 (z1, z4, z5),
+z0 = svbfmlslb (z1, z4, z5))
 
 /*
-** bfmlalb_h7_f32_tied1:
+** bfmlslb_h7_f32_tied1:
 ** mov (z[0-9]+\.h), h7
-** bfmlalb z0\.s, z4\.h, \1
+** bfmlslb z0\.s, z4\.h, \1
 ** ret
 */
-TEST_DUAL_ZD (bfmlalb_h7_f32_tied1, svfloat32_t, svbfloat16_t, bfloat16_t,
- z0 = svbfmlalb_n_f32 (z0, z4, d7),
- z0 = svbfmlalb (z0, z4, d7))
+TEST_DUAL_ZD (bfmlslb_h7_f32_tied1, svfloat32_t, svbfloat16_t, bfloat16_t,
+ z0 = svbfmlslb_n_f32 (z0, z4, d7),
+ z0 = svbfmlslb (z0, z4, d7))
 
 /*
-** bfmlalb_h7_f32_untied:
+** bfmlslb_h7_f32_untied:
 ** mov (z[0-9]+\.h), h7
 ** movprfx z0, z1
-** bfmlalb z0\.s, z4\.h, \1
+** bfmlslb z0\.s, z4\.h, \1
 ** ret
 */
-TEST_DUAL_ZD (bfmlalb_h7_f32_untied, svfloat32_t, svbfloat16_t, bfloat16_t,
- z0 = svbfmlalb_n_f32 (z1, z4, d7),
- z0 = svbfmlalb (z1, z4, d7))
+TEST_DUAL_ZD (bfmlslb_h7_f32_untied, svfloat32_t, svbfloat16_t, bfloat16_t,
+ z0 = svbfmlslb_n_f32 (z1, z4, d7),
+ z0 = svbfmlslb (z1, z4, d7))


[gcc r15-5045] Fix gcc.dg/vect/bb-slp-77.c for x86

2024-11-08 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:12383255fe4e82c31f5e42c72a8fbcb1b5dea35d

commit r15-5045-g12383255fe4e82c31f5e42c72a8fbcb1b5dea35d
Author: Richard Biener 
Date:   Fri Nov 8 15:11:34 2024 +0100

Fix gcc.dg/vect/bb-slp-77.c for x86

x86 doesn't have .REDUC_PLUS for V2SImode - there's no effective
target for that so add it to the list of targets not expecting the
BB vectorization.

* gcc.dg/vect/bb-slp-77.c: Add x86_64-*-* and i?86-*-* to
the list of expected failing targets.

Diff:
---
 gcc/testsuite/gcc.dg/vect/bb-slp-77.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-77.c 
b/gcc/testsuite/gcc.dg/vect/bb-slp-77.c
index b2cc1d114f10..bc74f6a4db31 100644
--- a/gcc/testsuite/gcc.dg/vect/bb-slp-77.c
+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-77.c
@@ -71,4 +71,4 @@ void test(const int n, float * restrict s, const void * 
restrict vx, const void
 *s = sumf;
 }
 
-/* { dg-final { scan-tree-dump-times "optimized: basic block" 1 "slp1"  { 
target { { vect_int_mult && vect_element_align } && { ! powerpc*-*-* } } } } } 
*/
+/* { dg-final { scan-tree-dump-times "optimized: basic block" 1 "slp1"  { 
target { { vect_int_mult && vect_element_align } && { ! { powerpc*-*-* 
x86_64-*-* i?86-*-* } } } } } } */


[gcc r15-5046] libstdc++: Do not define _Insert_base::try_emplace before C++17

2024-11-08 Thread Jonathan Wakely via Libstdc++-cvs
https://gcc.gnu.org/g:b66a57c0ad300b293ebd366bc29f44f2ddb65c69

commit r15-5046-gb66a57c0ad300b293ebd366bc29f44f2ddb65c69
Author: Jonathan Wakely 
Date:   Fri Nov 8 13:58:23 2024 +

libstdc++: Do not define _Insert_base::try_emplace before C++17

This is not a reserved name in C++11 and C++14, so must not be defined.

Also use the appropriate feature test macros for the try_emplace members
of the Debug Mode maps.

libstdc++-v3/ChangeLog:

* include/bits/hashtable_policy.h (_Insert_base::try_emplace):
Do not define for C++11 and C++14.
* include/debug/map.h (try_emplace): Use feature test macro.
* include/debug/unordered_map (try_emplace): Likewise.
* testsuite/17_intro/names.cc: Define try_emplace before C++17.

Diff:
---
 libstdc++-v3/include/bits/hashtable_policy.h | 2 ++
 libstdc++-v3/include/debug/map.h | 2 +-
 libstdc++-v3/include/debug/unordered_map | 2 +-
 libstdc++-v3/testsuite/17_intro/names.cc | 2 ++
 4 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/libstdc++-v3/include/bits/hashtable_policy.h 
b/libstdc++-v3/include/bits/hashtable_policy.h
index ecf50313d09c..b5f837e60619 100644
--- a/libstdc++-v3/include/bits/hashtable_policy.h
+++ b/libstdc++-v3/include/bits/hashtable_policy.h
@@ -1008,6 +1008,7 @@ namespace __detail
return __h._M_insert(__hint, __v, __node_gen, __unique_keys{});
   }
 
+#ifdef __glibcxx_unordered_map_try_emplace // C++ >= 17 && HOSTED
   template
std::pair
try_emplace(const_iterator, _KType&& __k, _Args&&... __args)
@@ -1029,6 +1030,7 @@ namespace __detail
  __node._M_node = nullptr;
  return { __it, true };
}
+#endif
 
   void
   insert(initializer_list __l)
diff --git a/libstdc++-v3/include/debug/map.h b/libstdc++-v3/include/debug/map.h
index d0e398f0fd97..5323a2b0d950 100644
--- a/libstdc++-v3/include/debug/map.h
+++ b/libstdc++-v3/include/debug/map.h
@@ -344,7 +344,7 @@ namespace __debug
}
 
 
-#if __cplusplus > 201402L
+#ifdef __glibcxx_map_try_emplace // C++ >= 17 && HOSTED
   template 
 pair
 try_emplace(const key_type& __k, _Args&&... __args)
diff --git a/libstdc++-v3/include/debug/unordered_map 
b/libstdc++-v3/include/debug/unordered_map
index 1acafd8facea..cc24fd0d8930 100644
--- a/libstdc++-v3/include/debug/unordered_map
+++ b/libstdc++-v3/include/debug/unordered_map
@@ -442,7 +442,7 @@ namespace __debug
  _M_check_rehashed(__bucket_count);
}
 
-#if __cplusplus > 201402L
+#ifdef __glibcxx_unordered_map_try_emplace // C++ >= 17 && HOSTED
   template 
pair
try_emplace(const key_type& __k, _Args&&... __args)
diff --git a/libstdc++-v3/testsuite/17_intro/names.cc 
b/libstdc++-v3/testsuite/17_intro/names.cc
index 5deb310dc313..1952028d2cf0 100644
--- a/libstdc++-v3/testsuite/17_intro/names.cc
+++ b/libstdc++-v3/testsuite/17_intro/names.cc
@@ -138,6 +138,8 @@
 //  defines to_chars_result::ptr and to_chars_result::ec
 #define ec (
 #define ptr (
+//  and  define try_emplace
+#define try_emplace (
 #endif
 
 // These clash with newlib so don't use them.