[gcc r14-10351] aarch64: Add support for -mcpu=grace

2024-06-27 Thread Kyrylo Tkachov via Gcc-cvs
https://gcc.gnu.org/g:c2878a9a1719e067b1476377bd2a292350482e61

commit r14-10351-gc2878a9a1719e067b1476377bd2a292350482e61
Author: Kyrylo Tkachov 
Date:   Wed Jun 19 14:56:02 2024 +0530

aarch64: Add support for -mcpu=grace

This adds support for the NVIDIA Grace CPU to aarch64.
We reuse the tuning decisions for the Neoverse V2 core, but include a
number of architecture features that are not enabled by default in
-mcpu=neoverse-v2.

This allows Grace users to more simply target the CPU with -mcpu=grace
rather than remembering what extensions to tag on top of
-mcpu=neoverse-v2.

Bootstrapped and tested on aarch64-none-linux-gnu.
gcc/

* config/aarch64/aarch64-cores.def (grace): New entry.
* config/aarch64/aarch64-tune.md: Regenerate.
* doc/invoke.texi (AArch64 Options): Document the above.

Signed-off-by: Kyrylo Tkachov 

Diff:
---
 gcc/config/aarch64/aarch64-cores.def | 2 ++
 gcc/config/aarch64/aarch64-tune.md   | 2 +-
 gcc/doc/invoke.texi  | 4 ++--
 3 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-cores.def 
b/gcc/config/aarch64/aarch64-cores.def
index f69fc212d56..f5536388f61 100644
--- a/gcc/config/aarch64/aarch64-cores.def
+++ b/gcc/config/aarch64/aarch64-cores.def
@@ -189,6 +189,8 @@ AARCH64_CORE("neoverse-n2", neoversen2, cortexa57, V9A, 
(I8MM, BF16, SVE2_BITPER
 AARCH64_CORE("cobalt-100",   cobalt100, cortexa57, V9A, (I8MM, BF16, 
SVE2_BITPERM, RNG, MEMTAG, PROFILE), neoversen2, 0x6d, 0xd49, -1)
 
 AARCH64_CORE("neoverse-v2", neoversev2, cortexa57, V9A, (I8MM, BF16, 
SVE2_BITPERM, RNG, MEMTAG, PROFILE), neoversev2, 0x41, 0xd4f, -1)
+AARCH64_CORE("grace", grace, cortexa57, V9A, (I8MM, BF16, SVE2_BITPERM, 
SVE2_AES, SVE2_SHA3, SVE2_SM4, PROFILE), neoversev2, 0x41, 0xd4f, -1)
+
 AARCH64_CORE("demeter", demeter, cortexa57, V9A, (I8MM, BF16, SVE2_BITPERM, 
RNG, MEMTAG, PROFILE), neoversev2, 0x41, 0xd4f, -1)
 
 /* Generic Architecture Processors.  */
diff --git a/gcc/config/aarch64/aarch64-tune.md 
b/gcc/config/aarch64/aarch64-tune.md
index abd3c9e0822..80254836e0e 100644
--- a/gcc/config/aarch64/aarch64-tune.md
+++ b/gcc/config/aarch64/aarch64-tune.md
@@ -1,5 +1,5 @@
 ;; -*- buffer-read-only: t -*-
 ;; Generated automatically by gentune.sh from aarch64-cores.def
 (define_attr "tune"
-   
"cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,ampere1,ampere1a,ampere1b,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa78c,cortexa65,cortexa65ae,cortexx1,cortexx1c,neoversen1,ares,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,thunderx3t110,neoversev1,zeus,neoverse512tvb,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82,cortexa510,cortexa520,cortexa710,cortexa715,cortexa720,cortexx2,cortexx3,cortexx4,neoversen2,cobalt100,neoversev2,demeter,generic,generic_armv8_a,generic_armv9_a"
+   
"cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,ampere1,ampere1a,ampere1b,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa78c,cortexa65,cortexa65ae,cortexx1,cortexx1c,neoversen1,ares,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,thunderx3t110,neoversev1,zeus,neoverse512tvb,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82,cortexa510,cortexa520,cortexa710,cortexa715,cortexa720,cortexx2,cortexx3,cortexx4,neoversen2,cobalt100,neoversev2,grace,demeter,generic,generic_armv8_a,generic_armv9_a"
(const (symbol_ref "((enum attr_tune) aarch64_tune)")))
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index a916d618960..67220051a5b 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -21324,8 +21324,8 @@ performance of the code.  Permissible values for this 
option are:
 @samp{cortex-a78}, @samp{cortex-a78ae}, @samp{cortex-a78c},
 @samp{ares}, @samp{exynos-m1}, @samp{emag}, @samp{falkor},
 @samp{neoverse-512tvb}, @samp{neoverse-e1}, @samp{neoverse-n1},
-@samp{neoverse-n2}, @samp{neoverse-v1}, @samp{neoverse-v2}, @samp{qdf24xx},
-@samp{saphira}, @samp{phecda}, @samp{xgene1}, @samp{vulcan},
+@samp{neoverse-n2}, @samp{neoverse-v1}, @samp{neoverse-v2}, @samp{grace},
+@samp{qdf24xx}, @samp{saphira}, @samp{phecda}, @samp{xgene1}, @samp{vulcan},
 @samp{octeontx}, @samp{octeontx81},  @samp{octeontx83},
 @samp{octeontx2}, @samp{octeontx2t98}, @samp{octe

[gcc r15-1677] vect: generate suitable convert insn for int -> int, float -> float and int <-> float.

2024-06-27 Thread Hu via Gcc-cvs
https://gcc.gnu.org/g:c320a7efcd35ba6c6be70dc9b2fe562a9673e363

commit r15-1677-gc320a7efcd35ba6c6be70dc9b2fe562a9673e363
Author: Hu, Lin1 
Date:   Thu Feb 1 15:15:01 2024 +0800

vect: generate suitable convert insn for int -> int, float -> float and int 
<-> float.

gcc/ChangeLog:

PR target/107432
* tree-vect-generic.cc
(expand_vector_conversion): Support convert for int -> int,
float -> float and int <-> float.
* tree-vect-stmts.cc (vectorizable_conversion): Wrap the
indirect convert part.
(supportable_indirect_convert_operation): New function.
* tree-vectorizer.h (supportable_indirect_convert_operation):
Define the new function.

gcc/testsuite/ChangeLog:

PR target/107432
* gcc.target/i386/pr107432-1.c: New test.
* gcc.target/i386/pr107432-2.c: Ditto.
* gcc.target/i386/pr107432-3.c: Ditto.
* gcc.target/i386/pr107432-4.c: Ditto.
* gcc.target/i386/pr107432-5.c: Ditto.
* gcc.target/i386/pr107432-6.c: Ditto.
* gcc.target/i386/pr107432-7.c: Ditto.

Diff:
---
 gcc/testsuite/gcc.target/i386/pr107432-1.c | 234 
 gcc/testsuite/gcc.target/i386/pr107432-2.c | 105 +
 gcc/testsuite/gcc.target/i386/pr107432-3.c |  55 +++
 gcc/testsuite/gcc.target/i386/pr107432-4.c |  56 +++
 gcc/testsuite/gcc.target/i386/pr107432-5.c |  72 +
 gcc/testsuite/gcc.target/i386/pr107432-6.c | 139 +
 gcc/testsuite/gcc.target/i386/pr107432-7.c | 150 ++
 gcc/tree-vect-generic.cc   |  29 +++-
 gcc/tree-vect-stmts.cc | 241 ++---
 gcc/tree-vectorizer.h  |   4 +
 10 files changed, 990 insertions(+), 95 deletions(-)

diff --git a/gcc/testsuite/gcc.target/i386/pr107432-1.c 
b/gcc/testsuite/gcc.target/i386/pr107432-1.c
new file mode 100644
index 000..a4f37447eb4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr107432-1.c
@@ -0,0 +1,234 @@
+/* { dg-do compile } */
+/* { dg-options "-march=x86-64 -mavx512bw -mavx512vl -O3" } */
+/* { dg-final { scan-assembler-times "vpmovqd" 6 } } */
+/* { dg-final { scan-assembler-times "vpmovqw" 6 } } */
+/* { dg-final { scan-assembler-times "vpmovqb" 6 } } */
+/* { dg-final { scan-assembler-times "vpmovdw" 6 { target { ia32 } } } } */
+/* { dg-final { scan-assembler-times "vpmovdw" 8 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vpmovdb" 6 { target { ia32 } } } } */
+/* { dg-final { scan-assembler-times "vpmovdb" 8 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vpmovwb" 8 } } */
+
+#include 
+
+typedef short __v2hi __attribute__ ((__vector_size__ (4)));
+typedef char __v2qi __attribute__ ((__vector_size__ (2)));
+typedef char __v4qi __attribute__ ((__vector_size__ (4)));
+typedef char __v8qi __attribute__ ((__vector_size__ (8)));
+
+typedef unsigned short __v2hu __attribute__ ((__vector_size__ (4)));
+typedef unsigned short __v4hu __attribute__ ((__vector_size__ (8)));
+typedef unsigned char __v2qu __attribute__ ((__vector_size__ (2)));
+typedef unsigned char __v4qu __attribute__ ((__vector_size__ (4)));
+typedef unsigned char __v8qu __attribute__ ((__vector_size__ (8)));
+typedef unsigned int __v2su __attribute__ ((__vector_size__ (8)));
+
+__v2si mm_cvtepi64_epi32_builtin_convertvector(__m128i a)
+{
+  return __builtin_convertvector((__v2di)a, __v2si);
+}
+
+__m128imm256_cvtepi64_epi32_builtin_convertvector(__m256i a)
+{
+  return (__m128i)__builtin_convertvector((__v4di)a, __v4si);
+}
+
+__m256imm512_cvtepi64_epi32_builtin_convertvector(__m512i a)
+{
+  return (__m256i)__builtin_convertvector((__v8di)a, __v8si);
+}
+
+__v2hi mm_cvtepi64_epi16_builtin_convertvector(__m128i a)
+{
+  return __builtin_convertvector((__v2di)a, __v2hi);
+}
+
+__v4hi mm256_cvtepi64_epi16_builtin_convertvector(__m256i a)
+{
+  return __builtin_convertvector((__v4di)a, __v4hi);
+}
+
+__m128imm512_cvtepi64_epi16_builtin_convertvector(__m512i a)
+{
+  return (__m128i)__builtin_convertvector((__v8di)a, __v8hi);
+}
+
+__v2qi mm_cvtepi64_epi8_builtin_convertvector(__m128i a)
+{
+  return __builtin_convertvector((__v2di)a, __v2qi);
+}
+
+__v4qi mm256_cvtepi64_epi8_builtin_convertvector(__m256i a)
+{
+  return __builtin_convertvector((__v4di)a, __v4qi);
+}
+
+__v8qi mm512_cvtepi64_epi8_builtin_convertvector(__m512i a)
+{
+  return __builtin_convertvector((__v8di)a, __v8qi);
+}
+
+__v2hi mm64_cvtepi32_epi16_builtin_convertvector(__v2si a)
+{
+  return __builtin_convertvector((__v2si)a, __v2hi);
+}
+
+__v4hi mm_cvtepi32_epi16_builtin_convertvector(__m128i a)
+{
+  return __builtin_convertvector((__v4si)a, __v4hi);
+}
+
+__m128imm256_cvtepi32_epi16_builtin_convertvector(__m256i a)
+{
+  return (__m128i)__builtin_convertvector((__v8si)a, __v8hi);
+}
+
+__m256imm5

[gcc r15-1678] vect: Support v4hi -> v4qi.

2024-06-27 Thread Hu via Gcc-cvs
https://gcc.gnu.org/g:e5f8a39941f6f0f25dac88bd71fd368fb284a10f

commit r15-1678-ge5f8a39941f6f0f25dac88bd71fd368fb284a10f
Author: Hu, Lin1 
Date:   Wed Feb 28 18:11:55 2024 +0800

vect: Support v4hi -> v4qi.

gcc/ChangeLog:

PR target/107432
* config/i386/mmx.md
(VI2_32_64): New mode iterator.
(mmxhalfmode): New mode atter.
(mmxhalfmodelower): Ditto.
(truncv2hiv2qi2): Extend mode v4hi and change name from
truncv2hiv2qi to trunc2.

gcc/testsuite/ChangeLog:

PR target/107432
* gcc.target/i386/pr107432-1.c: Modify test.
* gcc.target/i386/pr107432-6.c: Add test.
* gcc.target/i386/pr108938-3.c: This patch supports
truncv4hiv4qi affect bswap optimization, so I added
the -mno-avx option for now, and open a bugzilla.

Diff:
---
 gcc/config/i386/mmx.md | 17 +
 gcc/testsuite/gcc.target/i386/pr107432-1.c | 13 -
 gcc/testsuite/gcc.target/i386/pr107432-6.c | 29 ++---
 gcc/testsuite/gcc.target/i386/pr108938-3.c |  2 +-
 4 files changed, 44 insertions(+), 17 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index ea53f516cbb..24c0516726c 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -67,6 +67,9 @@
 ;; 4-byte integer vector modes
 (define_mode_iterator VI_32 [V4QI V2HI])
 
+;; 8-byte and 4-byte HImode vector modes
+(define_mode_iterator VI2_32_64 [(V4HI "TARGET_MMX_WITH_SSE") V2HI])
+
 ;; 4-byte and 2-byte integer vector modes
 (define_mode_iterator VI_16_32 [V4QI V2QI V2HI])
 
@@ -106,6 +109,12 @@
 (define_mode_attr mmxdoublemode
   [(V8QI "V8HI") (V4HI "V4SI")])
 
+(define_mode_attr mmxhalfmode
+  [(V4HI "V4QI") (V2HI "V2QI")])
+
+(define_mode_attr mmxhalfmodelower
+  [(V4HI "v4qi") (V2HI "v2qi")])
+
 ;; Mapping of vector float modes to an integer mode of the same size
 (define_mode_attr mmxintvecmode
   [(V2SF "V2SI") (V2SI "V2SI") (V4HI "V4HI") (V8QI "V8QI")
@@ -4880,10 +4889,10 @@
   DONE;
 })
 
-(define_insn "truncv2hiv2qi2"
-  [(set (match_operand:V2QI 0 "register_operand" "=v")
-   (truncate:V2QI
- (match_operand:V2HI 1 "register_operand" "v")))]
+(define_insn "trunc2"
+  [(set (match_operand: 0 "register_operand" "=v")
+   (truncate:
+ (match_operand:VI2_32_64 1 "register_operand" "v")))]
   "TARGET_AVX512VL && TARGET_AVX512BW"
   "vpmovwb\t{%1, %0|%0, %1}"
   [(set_attr "type" "ssemov")
diff --git a/gcc/testsuite/gcc.target/i386/pr107432-1.c 
b/gcc/testsuite/gcc.target/i386/pr107432-1.c
index a4f37447eb4..afdf367afe2 100644
--- a/gcc/testsuite/gcc.target/i386/pr107432-1.c
+++ b/gcc/testsuite/gcc.target/i386/pr107432-1.c
@@ -7,7 +7,8 @@
 /* { dg-final { scan-assembler-times "vpmovdw" 8 { target { ! ia32 } } } } */
 /* { dg-final { scan-assembler-times "vpmovdb" 6 { target { ia32 } } } } */
 /* { dg-final { scan-assembler-times "vpmovdb" 8 { target { ! ia32 } } } } */
-/* { dg-final { scan-assembler-times "vpmovwb" 8 } } */
+/* { dg-final { scan-assembler-times "vpmovwb" 8 { target { ia32 } } } } */
+/* { dg-final { scan-assembler-times "vpmovwb" 10 { target { ! ia32 } } } } */
 
 #include 
 
@@ -113,6 +114,11 @@ __v2qi mm32_cvtepi16_epi8_builtin_convertvector(__v2hi 
a)
   return __builtin_convertvector((__v2hi)a, __v2qi);
 }
 
+__v4qi mm64_cvtepi16_epi8_builtin_convertvector(__v4hi a)
+{
+  return __builtin_convertvector((__v4hi)a, __v4qi);
+}
+
 __v8qi mm_cvtepi16_epi8_builtin_convertvector(__m128i a)
 {
   return __builtin_convertvector((__v8hi)a, __v8qi);
@@ -218,6 +224,11 @@ __v2qu mm32_cvtepu16_epu8_builtin_convertvector(__v2hu 
a)
   return __builtin_convertvector((__v2hu)a, __v2qu);
 }
 
+__v4qu mm64_cvtepu16_epu8_builtin_convertvector(__v4hu a)
+{
+  return __builtin_convertvector((__v4hu)a, __v4qu);
+}
+
 __v8qu mm_cvtepu16_epu8_builtin_convertvector(__m128i a)
 {
   return __builtin_convertvector((__v8hu)a, __v8qu);
diff --git a/gcc/testsuite/gcc.target/i386/pr107432-6.c 
b/gcc/testsuite/gcc.target/i386/pr107432-6.c
index 4a68a10b089..dd585b2a351 100644
--- a/gcc/testsuite/gcc.target/i386/pr107432-6.c
+++ b/gcc/testsuite/gcc.target/i386/pr107432-6.c
@@ -1,18 +1,15 @@
 /* { dg-do compile } */
 /* { dg-options "-mavx512fp16 -mavx512vl -mavx512bw -O2 -mavx512dq 
-fno-trapping-math" } */
-/* { dg-final { scan-assembler-times "vcvttpd2dq" 2 { target { ia32 } } } } */
-/* { dg-final { scan-assembler-times "vcvttpd2dq" 3 { target { ! ia32 } } } } 
*/
-/* { dg-final { scan-assembler-times "vcvttpd2udq" 2 { target { ia32 } } } } */
-/* { dg-final { scan-assembler-times "vcvttpd2udq" 3 { target { ! ia32 } } } } 
*/
-/* { dg-final { scan-assembler-times "vcvttps2dq" 3 { target { ia32 } } } } */
-/* { dg-final { scan-assembler-times "vcvttps2dq" 4 { target { ! ia32 } } } } 
*/
-/* { dg-final { scan-assembler-times "vcvttps2udq" 3 { target { ia32 } } } } */
-/* { dg-final { scan-assembl

[gcc r15-1679] vect: support direct conversion under x86-64-v3.

2024-06-27 Thread Hu via Gcc-cvs
https://gcc.gnu.org/g:4385dc97b0d28e54541eb2418d6e68fc672441d7

commit r15-1679-g4385dc97b0d28e54541eb2418d6e68fc672441d7
Author: Hu, Lin1 
Date:   Wed Mar 6 19:58:48 2024 +0800

vect: support direct conversion under x86-64-v3.

gcc/ChangeLog:

PR target/107432
* config/i386/i386-expand.cc 
(ix86_expand_trunc_with_avx2_noavx512f):
New function for generate a series of suitable insn.
* config/i386/i386-protos.h (ix86_expand_trunc_with_avx2_noavx512f):
Define new function.
* config/i386/sse.md: Extend trunc2 for x86-64-v3.
(ssebytemode) Add V8HI.
(PMOV_DST_MODE_2_AVX2): New mode iterator.
(PMOV_SRC_MODE_3_AVX2): Ditto.
* config/i386/mmx.md
(trunc2): Ditto.
(avx512vl_trunc2): Ditto.
(truncv2si2): Ditto.
(avx512vl_truncv2si2): Ditto.
(mmxbytemode): New mode attr.

gcc/testsuite/ChangeLog:

PR target/107432
* gcc.target/i386/pr107432-8.c: New test.
* gcc.target/i386/pr107432-9.c: Ditto.
* gcc.target/i386/pr92645-4.c: Modify test.

Diff:
---
 gcc/config/i386/i386-expand.cc |  44 --
 gcc/config/i386/i386-protos.h  |   3 +
 gcc/config/i386/mmx.md |  35 +++-
 gcc/config/i386/sse.md |  88 +++-
 gcc/testsuite/gcc.target/i386/pr107432-8.c |  94 +
 gcc/testsuite/gcc.target/i386/pr107432-9.c | 129 +
 gcc/testsuite/gcc.target/i386/pr92645-4.c  |   2 -
 7 files changed, 363 insertions(+), 32 deletions(-)

diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index 5dfa7d49f58..eccad080f7c 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -1898,10 +1898,6 @@ ix86_split_convert_uns_si_sse (rtx operands[])
   emit_insn (gen_xorv4si3 (value, value, large));
 }
 
-static bool ix86_expand_vector_init_one_nonzero (bool mmx_ok,
-machine_mode mode, rtx target,
-rtx var, int one_var);
-
 /* Convert an unsigned DImode value into a DFmode, using only SSE.
Expects the 64-bit DImode to be supplied in a pair of integral
registers.  Requires SSE2; will use SSE3 if available.  For x86_32,
@@ -16126,7 +16122,7 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, 
machine_mode mode,
whose ONE_VAR element is VAR, and other elements are zero.  Return true
if successful.  */
 
-static bool
+bool
 ix86_expand_vector_init_one_nonzero (bool mmx_ok, machine_mode mode,
 rtx target, rtx var, int one_var)
 {
@@ -26137,4 +26133,42 @@ ix86_expand_ternlog (machine_mode mode, rtx op0, rtx 
op1, rtx op2, int idx,
   return target;
 }
 
+/* Trunc a vector to a narrow vector, like v4di -> v4si.  */
+
+void
+ix86_expand_trunc_with_avx2_noavx512f (rtx output, rtx input, machine_mode 
cvt_mode)
+{
+  machine_mode out_mode = GET_MODE (output);
+  machine_mode in_mode = GET_MODE (input);
+  int len = GET_MODE_SIZE (in_mode);
+  gcc_assert (len == GET_MODE_SIZE (cvt_mode)
+ && GET_MODE_INNER (out_mode) == GET_MODE_INNER (cvt_mode)
+ && (REG_P (input) || SUBREG_P (input)));
+  scalar_mode inner_out_mode = GET_MODE_INNER (out_mode);
+  int in_innersize = GET_MODE_SIZE (GET_MODE_INNER (in_mode));
+  int out_innersize = GET_MODE_SIZE (inner_out_mode);
+
+  struct expand_vec_perm_d d;
+  d.target = gen_reg_rtx (cvt_mode);
+  d.op0 = lowpart_subreg (cvt_mode, force_reg(in_mode, input), in_mode);
+  d.op1 = d.op0;
+  d.vmode = cvt_mode;
+  d.nelt = GET_MODE_NUNITS (cvt_mode);
+  d.testing_p = false;
+  d.one_operand_p = true;
+
+  /* Init perm. Put the needed bits of input in order and
+ fill the rest of bits by default.  */
+  for (int i = 0; i < d.nelt; ++i)
+{
+  d.perm[i] = i;
+  if (i < GET_MODE_NUNITS (out_mode))
+   d.perm[i] = i * (in_innersize / out_innersize);
+}
+
+  bool ok = ix86_expand_vec_perm_const_1(&d);
+  gcc_assert (ok);
+  emit_move_insn (output, gen_lowpart (out_mode, d.target));
+}
+
 #include "gt-i386-expand.h"
diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index 4f48dc0bf75..1a76090b9da 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -248,6 +248,7 @@ extern rtx ix86_gen_ccmp_first (rtx_insn **, rtx_insn **, 
enum rtx_code,
 extern rtx ix86_gen_ccmp_next (rtx_insn **, rtx_insn **, rtx,
   enum rtx_code, tree, tree, enum rtx_code);
 extern int ix86_get_flags_cc (enum rtx_code);
+extern void ix86_expand_trunc_with_avx2_noavx512f (rtx, rtx, machine_mode);
 extern rtx ix86_memtag_untagged_pointer (rtx, rtx);
 extern bool ix86_memtag_can_tag_addresses (void);
 
@@ -298,6 +299,8 @@ extern void ix86_expand_sse2_mulvxdi3 (rtx, rtx,

[gcc r15-1680] i386: Refactor vcvttps2qq/vcvtqq2ps patterns.

2024-06-27 Thread Hu via Gcc-cvs
https://gcc.gnu.org/g:94495247341bc05b77536271fe3dd789dad62624

commit r15-1680-g94495247341bc05b77536271fe3dd789dad62624
Author: Hu, Lin1 
Date:   Tue Jun 25 18:25:59 2024 +0800

i386: Refactor vcvttps2qq/vcvtqq2ps patterns.

Refactor vcvttps2qq/vcvtqq2ps patterns for remove redundant
round_*_modev8sf_condition.

gcc/ChangeLog:

* config/i386/sse.md
(float2
): Refactor the pattern.
(unspec_fix_trunc2
): Ditto.
(fix_trunc2
): Ditto.
* config/i386/subst.md (round_modev8sf_condition): Remove.
(round_saeonly_modev8sf_condition): Ditto.

Diff:
---
 gcc/config/i386/sse.md   | 51 +---
 gcc/config/i386/subst.md |  2 --
 2 files changed, 22 insertions(+), 31 deletions(-)

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 56ee7119e7c..a94ec3c441f 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -1158,6 +1158,9 @@
 (define_mode_attr ssePSmode2
   [(V8DI "V8SF") (V4DI "V4SF")])
 
+(define_mode_attr ssePSmode2lower
+  [(V8DI "v8sf") (V4DI "v4sf")])
+
 ;; Mapping of vector modes back to the scalar modes
 (define_mode_attr ssescalarmode
   [(V64QI "QI") (V32QI "QI") (V16QI "QI")
@@ -8862,27 +8865,17 @@
 
 ;; For float insn patterns
 (define_mode_attr qq2pssuff
-  [(V8SF "") (V4SF "{y}")])
-
-(define_mode_attr sselongvecmode
-  [(V8SF "V8DI") (V4SF  "V4DI")])
-
-(define_mode_attr sselongvecmodelower
-  [(V8SF "v8di") (V4SF  "v4di")])
-
-(define_mode_attr sseintvecmode3
-  [(V8SF "XI") (V4SF "OI")
-   (V8DF "OI") (V4DF "TI")])
+  [(V8DI "") (V4DI "{y}")])
 
-(define_insn 
"float2"
-  [(set (match_operand:VF1_128_256VL 0 "register_operand" "=v")
-(any_float:VF1_128_256VL
-  (match_operand: 1 "nonimmediate_operand" 
"")))]
-  "TARGET_AVX512DQ && "
+(define_insn 
"float2"
+  [(set (match_operand: 0 "register_operand" "=v")
+(any_float:
+  (match_operand:VI8_256_512 1 "nonimmediate_operand" 
"")))]
+  "TARGET_AVX512DQ && "
   "vcvtqq2ps\t{%1, 
%0|%0, %1}"
   [(set_attr "type" "ssecvt")
(set_attr "prefix" "evex")
-   (set_attr "mode" "")])
+   (set_attr "mode" "")])
 
 (define_expand "avx512dq_floatv2div2sf2"
   [(set (match_operand:V4SF 0 "register_operand" "=v")
@@ -9417,26 +9410,26 @@
(set_attr "prefix" "evex")
(set_attr "mode" "")])
 
-(define_insn 
"unspec_fix_trunc2"
-  [(set (match_operand: 0 "register_operand" "=v")
-   (unspec:
- [(match_operand:VF1_128_256VL 1 "" 
"")]
+(define_insn 
"unspec_fix_trunc2"
+  [(set (match_operand:VI8_256_512 0 "register_operand" "=v")
+   (unspec:VI8_256_512
+ [(match_operand: 1 "" 
"")]
  UNSPEC_VCVTT_U))]
-  "TARGET_AVX512DQ && "
+  "TARGET_AVX512DQ && "
   "vcvttps2qq\t{%1, 
%0|%0, %1}"
   [(set_attr "type" "ssecvt")
(set_attr "prefix" "evex")
-   (set_attr "mode" "")])
+   (set_attr "mode" "")])
 
-(define_insn 
"fix_trunc2"
-  [(set (match_operand: 0 "register_operand" "=v")
-   (any_fix:
- (match_operand:VF1_128_256VL 1 "" 
"")))]
-  "TARGET_AVX512DQ && "
+(define_insn 
"fix_trunc2"
+  [(set (match_operand:VI8_256_512 0 "register_operand" "=v")
+   (any_fix:VI8_256_512
+ (match_operand: 1 "" 
"")))]
+  "TARGET_AVX512DQ && "
   "vcvttps2qq\t{%1, 
%0|%0, %1}"
   [(set_attr "type" "ssecvt")
(set_attr "prefix" "evex")
-   (set_attr "mode" "")])
+   (set_attr "mode" "")])
 
 (define_insn "unspec_avx512dq_fix_truncv2sfv2di2"
   [(set (match_operand:V2DI 0 "register_operand" "=v")
diff --git a/gcc/config/i386/subst.md b/gcc/config/i386/subst.md
index 7a9b697e0f6..40fb92094d2 100644
--- a/gcc/config/i386/subst.md
+++ b/gcc/config/i386/subst.md
@@ -211,7 +211,6 @@
  || mode == 
V16SImode
  || mode == 
V32HFmode)")
 
-(define_subst_attr "round_modev8sf_condition" "round" "1" "(mode == 
V8SFmode)")
 (define_subst_attr "round_modev4sf_condition" "round" "1" "(mode == 
V4SFmode)")
 (define_subst_attr "round_codefor" "round" "*" "")
 (define_subst_attr "round_opnum" "round" "5" "6")
@@ -257,7 +256,6 @@
  
|| mode == V16SImode
  
|| mode == V32HFmode)")
 
-(define_subst_attr "round_saeonly_modev8sf_condition" "round_saeonly" "1" 
"(mode == V8SFmode)")
 
 (define_subst "round_saeonly"
   [(set (match_operand:SUBST_A 0)


[gcc r13-8871] Add support for -mcpu=grace

2024-06-27 Thread Kyrylo Tkachov via Gcc-cvs
https://gcc.gnu.org/g:952ea3260e40992d3bf5e1f17b4845a4e5c908b5

commit r13-8871-g952ea3260e40992d3bf5e1f17b4845a4e5c908b5
Author: Kyrylo Tkachov 
Date:   Wed Jun 19 14:56:02 2024 +0530

Add support for -mcpu=grace

This adds support for the NVIDIA Grace CPU to aarch64.
We reuse the tuning decisions for the Neoverse V2 core, but include a
number of architecture features that are not enabled by default in
-mcpu=neoverse-v2.

This allows Grace users to more simply target the CPU with -mcpu=grace
rather than remembering what extensions to tag on top of
-mcpu=neoverse-v2.

Bootstrapped and tested on aarch64-none-linux-gnu.
gcc/

* config/aarch64/aarch64-cores.def (grace): New entry.
* config/aarch64/aarch64-tune.md: Regenerate.
* doc/invoke.texi (AArch64 Options): Document the above.

Signed-off-by: Kyrylo Tkachov 

Diff:
---
 gcc/config/aarch64/aarch64-cores.def | 2 ++
 gcc/config/aarch64/aarch64-tune.md   | 2 +-
 gcc/doc/invoke.texi  | 4 ++--
 3 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-cores.def 
b/gcc/config/aarch64/aarch64-cores.def
index fdda0697b88..bec08ca1910 100644
--- a/gcc/config/aarch64/aarch64-cores.def
+++ b/gcc/config/aarch64/aarch64-cores.def
@@ -182,6 +182,8 @@ AARCH64_CORE("neoverse-n2", neoversen2, cortexa57, V9A, 
(I8MM, BF16, SVE2_BITPER
 AARCH64_CORE("cobalt-100",   cobalt100, cortexa57, V9A, (I8MM, BF16, 
SVE2_BITPERM, RNG, MEMTAG, PROFILE), neoversen2, 0x6d, 0xd49, -1)
 
 AARCH64_CORE("neoverse-v2", neoversev2, cortexa57, V9A, (I8MM, BF16, 
SVE2_BITPERM, RNG, MEMTAG, PROFILE), neoversev2, 0x41, 0xd4f, -1)
+AARCH64_CORE("grace", grace, cortexa57, V9A, (I8MM, BF16, CRYPTO, 
SVE2_BITPERM, SVE2_AES, SVE2_SHA3, SVE2_SM4, PROFILE), neoversev2, 0x41, 0xd4f, 
-1)
+
 AARCH64_CORE("demeter", demeter, cortexa57, V9A, (I8MM, BF16, SVE2_BITPERM, 
RNG, MEMTAG, PROFILE), neoversev2, 0x41, 0xd4f, -1)
 
 #undef AARCH64_CORE
diff --git a/gcc/config/aarch64/aarch64-tune.md 
b/gcc/config/aarch64/aarch64-tune.md
index 9d46d38a292..6eae8522593 100644
--- a/gcc/config/aarch64/aarch64-tune.md
+++ b/gcc/config/aarch64/aarch64-tune.md
@@ -1,5 +1,5 @@
 ;; -*- buffer-read-only: t -*-
 ;; Generated automatically by gentune.sh from aarch64-cores.def
 (define_attr "tune"
-   
"cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,ampere1,ampere1a,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa78c,cortexa65,cortexa65ae,cortexx1,cortexx1c,neoversen1,ares,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,thunderx3t110,neoversev1,zeus,neoverse512tvb,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82,cortexa510,cortexa710,cortexa715,cortexx2,cortexx3,neoversen2,cobalt100,neoversev2,demeter"
+   
"cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,ampere1,ampere1a,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa78c,cortexa65,cortexa65ae,cortexx1,cortexx1c,neoversen1,ares,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,thunderx3t110,neoversev1,zeus,neoverse512tvb,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82,cortexa510,cortexa710,cortexa715,cortexx2,cortexx3,neoversen2,cobalt100,neoversev2,grace,demeter"
(const (symbol_ref "((enum attr_tune) aarch64_tune)")))
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 914c4bc8e6d..b17d0cf9341 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -20315,8 +20315,8 @@ performance of the code.  Permissible values for this 
option are:
 @samp{cortex-a78}, @samp{cortex-a78ae}, @samp{cortex-a78c},
 @samp{ares}, @samp{exynos-m1}, @samp{emag}, @samp{falkor},
 @samp{neoverse-512tvb}, @samp{neoverse-e1}, @samp{neoverse-n1},
-@samp{neoverse-n2}, @samp{neoverse-v1}, @samp{neoverse-v2}, @samp{qdf24xx},
-@samp{saphira}, @samp{phecda}, @samp{xgene1}, @samp{vulcan},
+@samp{neoverse-n2}, @samp{neoverse-v1}, @samp{neoverse-v2}, @samp{grace},
+@samp{qdf24xx}, @samp{saphira}, @samp{phecda}, @samp{xgene1}, @samp{vulcan},
 @samp{octeontx}, @samp{octeontx81},  @samp{octeontx83},
 @samp{octeontx2}, @samp{octeontx2t98}, @samp{octeontx2t96}
 @samp{octeontx2t93}, @samp{octeontx2f95}, @samp{octeontx2f95n},


[gcc r15-1686] ada: Reject ambiguous function calls in interpolated string expressions

2024-06-27 Thread Marc Poulhi?s via Gcc-cvs
https://gcc.gnu.org/g:fdbc04d13f0e993ecf1a36680c8f7768dfb522fb

commit r15-1686-gfdbc04d13f0e993ecf1a36680c8f7768dfb522fb
Author: Javier Miranda 
Date:   Mon Jun 10 17:17:59 2024 +

ada: Reject ambiguous function calls in interpolated string expressions

gcc/ada/

* sem_ch2.adb (Analyze_Interpolated_String_Literal): Report
interpretations of ambiguous parameterless function calls.

Diff:
---
 gcc/ada/sem_ch2.adb | 80 -
 1 file changed, 79 insertions(+), 1 deletion(-)

diff --git a/gcc/ada/sem_ch2.adb b/gcc/ada/sem_ch2.adb
index 08cc75c9104..ddbb329d1f8 100644
--- a/gcc/ada/sem_ch2.adb
+++ b/gcc/ada/sem_ch2.adb
@@ -38,6 +38,8 @@ with Rident; use Rident;
 with Sem;use Sem;
 with Sem_Ch8;use Sem_Ch8;
 with Sem_Dim;use Sem_Dim;
+with Sem_Res;use Sem_Res;
+with Sem_Type;   use Sem_Type;
 with Sinfo;  use Sinfo;
 with Sinfo.Nodes;use Sinfo.Nodes;
 with Sinfo.Utils;use Sinfo.Utils;
@@ -135,20 +137,96 @@ package body Sem_Ch2 is
-
 
procedure Analyze_Interpolated_String_Literal (N : Node_Id) is
+
+  procedure Check_Ambiguous_Parameterless_Call (Func_Call : Node_Id);
+  --  Examine the interpretations of the call to the given parameterless
+  --  function call and report the location of each interpretation.
+
+  
+  -- Check_Ambiguous_Parameterless_Call --
+  
+
+  procedure Check_Ambiguous_Parameterless_Call (Func_Call : Node_Id) is
+
+ procedure Report_Interpretation (E : Entity_Id);
+ --  Report an interpretation of the function call
+
+ ---
+ -- Report_Interpretation --
+ ---
+
+ procedure Report_Interpretation (E : Entity_Id) is
+ begin
+Error_Msg_Sloc := Sloc (E);
+
+if Nkind (Parent (E)) = N_Full_Type_Declaration then
+   Error_Msg_N ("interpretation (inherited) #!", Func_Call);
+else
+   Error_Msg_N ("interpretation #!", Func_Call);
+end if;
+ end Report_Interpretation;
+
+ --  Local variables
+
+ Error_Reported : Boolean;
+ I  : Interp_Index;
+ It : Interp;
+
+  --  Start of processing for Check_Ambiguous_Parameterless_Call
+
+  begin
+ Error_Reported := False;
+
+ --  Examine possible interpretations
+
+ Get_First_Interp (Name (Func_Call), I, It);
+ while Present (It.Nam) loop
+if It.Nam /= Entity (Name (Func_Call))
+  and then Ekind (It.Nam) = E_Function
+  and then No (First_Formal (It.Nam))
+then
+   if not Error_Reported then
+  Error_Msg_NE
+("ambiguous call to&", Func_Call,
+ Entity (Name (Func_Call)));
+  Report_Interpretation (Entity (Name (Func_Call)));
+  Error_Reported := True;
+   end if;
+
+   Report_Interpretation (It.Nam);
+end if;
+
+Get_Next_Interp (I, It);
+ end loop;
+  end Check_Ambiguous_Parameterless_Call;
+
+  --  Local variables
+
   Str_Elem : Node_Id;
 
+   --  Start of processing for Analyze_Interpolated_String_Literal
+
begin
   Set_Etype (N, Any_String);
 
   Str_Elem := First (Expressions (N));
   while Present (Str_Elem) loop
+
+ --  Before analyzed, a function call that has parameter is an
+ --  N_Indexed_Component node, and a call to a function that has
+ --  no parameters is an N_Identifier node.
+
  Analyze (Str_Elem);
 
+ --  After analyzed, if it is still an N_Identifier node then we
+ --  found ambiguity and could not rewrite it as N_Function_Call.
+
  if Nkind (Str_Elem) = N_Identifier
and then Ekind (Entity (Str_Elem)) = E_Function
and then Is_Overloaded (Str_Elem)
  then
-Error_Msg_NE ("ambiguous call to&", Str_Elem, Entity (Str_Elem));
+Check_Parameterless_Call (Str_Elem);
+Check_Ambiguous_Parameterless_Call (Str_Elem);
  end if;
 
  Next (Str_Elem);


[gcc r15-1685] ada: Add missing dimension information for target names

2024-06-27 Thread Marc Poulhi?s via Gcc-cvs
https://gcc.gnu.org/g:d4c990759bcdc1f2b3384397cae6d8cb76a4cdad

commit r15-1685-gd4c990759bcdc1f2b3384397cae6d8cb76a4cdad
Author: Eric Botcazou 
Date:   Tue Jun 11 19:29:22 2024 +0200

ada: Add missing dimension information for target names

It is computed from the Etype of N_Target_Name nodes.

gcc/ada/

* sem_ch5.adb (Analyze_Target_Name): Call Analyze_Dimension on the
node once the Etype is set.
* sem_dim.adb (OK_For_Dimension): Set to True for N_Target_Name.
(Analyze_Dimension): Call Analyze_Dimension_Has_Etype for it.

Diff:
---
 gcc/ada/sem_ch5.adb | 1 +
 gcc/ada/sem_dim.adb | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/gcc/ada/sem_ch5.adb b/gcc/ada/sem_ch5.adb
index b92ceb17b1b..644bd21ce93 100644
--- a/gcc/ada/sem_ch5.adb
+++ b/gcc/ada/sem_ch5.adb
@@ -4201,6 +4201,7 @@ package body Sem_Ch5 is
if Current = Expression (Context) then
   pragma Assert (Context = Current_Assignment);
   Set_Etype (N, Etype (Name (Current_Assignment)));
+  Analyze_Dimension (N);
else
   Report_Error;
end if;
diff --git a/gcc/ada/sem_dim.adb b/gcc/ada/sem_dim.adb
index 45a0f2ab922..39c36332497 100644
--- a/gcc/ada/sem_dim.adb
+++ b/gcc/ada/sem_dim.adb
@@ -219,6 +219,7 @@ package body Sem_Dim is
   N_Real_Literal  => True,
   N_Selected_Component=> True,
   N_Slice => True,
+  N_Target_Name   => True,
   N_Type_Conversion   => True,
   N_Unchecked_Type_Conversion => True,
 
@@ -1179,6 +1180,7 @@ package body Sem_Dim is
 | N_Qualified_Expression
 | N_Selected_Component
 | N_Slice
+| N_Target_Name
 | N_Unchecked_Type_Conversion
  =>
 Analyze_Dimension_Has_Etype (N);


[gcc r15-1682] ada: Overridden operation field not correctly set for controlling result wrappers

2024-06-27 Thread Marc Poulhi?s via Gcc-cvs
https://gcc.gnu.org/g:60ca71044e8e4d492c74f65f6093fbcf46d238bb

commit r15-1682-g60ca71044e8e4d492c74f65f6093fbcf46d238bb
Author: Martin Clochard 
Date:   Fri Jun 7 11:44:45 2024 +0200

ada: Overridden operation field not correctly set for controlling result 
wrappers

Implicit wrapper overridings generated for functions with
controlling result when deriving with null extension may
have field Overridden_Operation incorrectly set, when making
several such derivations in succession. This happens because
overridings were assumed to come from source, and entities
generated by Derive_Subprograms were also assumed to be
derived from source subprograms. Overridden_Operation could
be set to the entity generated by Derive_Subprograms for the
same type, resulting in a cycle between Overriden_Operation
and Alias fields, causing non-termination in GNATprove.

gcc/ada/

* sem_ch6.adb (Check_Overriding_Indicator) Remove Comes_From_Source 
filter.
(New_Overloaded_Entity) Move up special case of LSP_Subprogram,
and remove Comes_From_Source filter.

Diff:
---
 gcc/ada/sem_ch6.adb | 82 +++--
 1 file changed, 35 insertions(+), 47 deletions(-)

diff --git a/gcc/ada/sem_ch6.adb b/gcc/ada/sem_ch6.adb
index e97afdaf12e..43aa2e636fa 100644
--- a/gcc/ada/sem_ch6.adb
+++ b/gcc/ada/sem_ch6.adb
@@ -6916,13 +6916,11 @@ package body Sem_Ch6 is
--  operation is the inherited primitive (which is available
--  through the attribute alias)
 
-   if (Is_Dispatching_Operation (Subp)
-or else Is_Dispatching_Operation (Overridden_Subp))
+   if Is_Dispatching_Operation (Subp)
  and then not Comes_From_Source (Overridden_Subp)
  and then Find_Dispatching_Type (Overridden_Subp) =
   Find_Dispatching_Type (Subp)
  and then Present (Alias (Overridden_Subp))
- and then Comes_From_Source (Alias (Overridden_Subp))
then
   Set_Overridden_Operation(Subp, Alias (Overridden_Subp));
   Inherit_Subprogram_Contract (Subp, Alias (Overridden_Subp));
@@ -12565,16 +12563,25 @@ package body Sem_Ch6 is
 
   Enter_Overloaded_Entity (S);
 
+  --  LSP wrappers must override the ultimate alias of their
+  --  wrapped dispatching primitive E; required to traverse the
+  --  chain of ancestor primitives (see Map_Primitives). They
+  --  don't inherit contracts.
+
+  if Is_Wrapper (S)
+and then Present (LSP_Subprogram (S))
+  then
+ Set_Overridden_Operation (S, Ultimate_Alias (E));
+
   --  For entities generated by Derive_Subprograms the
   --  overridden operation is the inherited primitive
   --  (which is available through the attribute alias).
 
-  if not (Comes_From_Source (E))
+  elsif not (Comes_From_Source (E))
 and then Is_Dispatching_Operation (E)
 and then Find_Dispatching_Type (E) =
  Find_Dispatching_Type (S)
 and then Present (Alias (E))
-and then Comes_From_Source (Alias (E))
   then
  Set_Overridden_Operation(S, Alias (E));
  Inherit_Subprogram_Contract (S, Alias (E));
@@ -12591,20 +12598,8 @@ package body Sem_Ch6 is
   --  must check whether the target is an init_proc.
 
   elsif not Is_Init_Proc (S) then
-
- --  LSP wrappers must override the ultimate alias of their
- --  wrapped dispatching primitive E; required to traverse
- --  the chain of ancestor primitives (c.f. Map_Primitives)
- --  They don't inherit contracts.
-
- if Is_Wrapper (S)
-   and then Present (LSP_Subprogram (S))
- then
-Set_Overridden_Operation(S, Ultimate_Alias (E));
- else
-Set_Overridden_Operation(S, E);
-Inherit_Subprogram_Contract (S, E);
- end if;
+ Set_Overridden_Operation(S, E);
+ Inherit_Subprogram_Contract (S, E);
 
  Set_Is_Ada_2022_Only (S, Is_Ada_2022_Only (E));
   end if;
@@ -12619,37 +12614,30 @@ package body Sem_Ch6 is
 
   --  If S is a user-defined subprogram or a null procedure
   --  expanded to override an inherited null procedure, or a
-  --  predefined dispatching primitive then indicate that E
-  

[gcc r15-1681] ada: Implement first half of Generalized Finalization

2024-06-27 Thread Marc Poulhi?s via Gcc-cvs
https://gcc.gnu.org/g:3cb7e22ad965672f51a437c0a30a4c95f558

commit r15-1681-g3cb7e22ad965672f51a437c0a30a4c95f558
Author: Eric Botcazou 
Date:   Wed Jun 5 23:19:53 2024 +0200

ada: Implement first half of Generalized Finalization

This implements the first half of the Generalized Finalization proposal,
namely the Finalizable aspect as well as its optional relaxed semantics
for the finalization operations, but the latter part is only implemented
for dynamically allocated objects.

In accordance with the spirit, if not the letter, of the proposal, this
implements the finalizable types declared with strict semantics for the
finalization operations as a direct generalization of controlled types,
which in turn makes it possible to reimplement the latter types in terms
of the former types and ensures full interoperability between them.

The relaxed semantics for the finalization operations is also a direct
generalization of the GNAT pragma No_Heap_Finalization for dynamically
allocated objects, in that it extends the effects of the pragma to all
access types designating the finalizable type, instead of just applying
them to library-level named access types.

gcc/ada/

* aspects.ads (Aspect_Id): Add Aspect_Finalizable.
(Implementation_Defined_Aspect): Add True for Aspect_Finalizable.
(Operational_Aspect): Add True for Aspect_Finalizable.
(Aspect_Argument): Add Expression for Aspect_Finalizable.
(Is_Representation_Aspect): Add False for Aspect_Finalizable.
(Aspect_Names): Add Name_Finalizable for Aspect_Finalizable.
(Aspect_Delay): Add Always_Delay  for Aspect_Finalizable.
* checks.adb: Add with and use clauses for Sem_Elab.
(Install_Primitive_Elaboration_Check): Call Is_Controlled_Procedure.
* einfo.ads (Has_Relaxed_Finalization): Document new flag.
(Is_Controlled_Active): Update documentation.
* exp_aggr.adb (Generate_Finalization_Actions): Replace Find_Prim_Op
with Find_Controlled_Prim_Op for Name_Finalize.
* exp_attr.adb (Expand_N_Attribute_Reference) :
Return 0 if the prefix type has relaxed finalization.
* exp_ch3.adb (Build_Equivalent_Record_Aggregate): Return Empty if
the type needs finalization.
(Expand_Freeze_Record_Type): Call Find_Controlled_Prim_Op instead of
Find_Prim_Op for Name_{Adjust,Initialize,Finalize}.
Call Make_Finalize_Address_Body for all controlled types.
* exp_ch4.adb (Insert_Dereference_Action): Do not generate a call to
Adjust_Controlled_Dereference if the designated type has relaxed
finalization.
* exp_ch6.adb (Needs_BIP_Collection): Return false for an untagged
type that has relaxed finalization.
* exp_ch7.adb (Allows_Finalization_Collection): Return false if the
designated type has relaxed finalization.
(Check_Visibly_Controlled): Call Find_Controlled_Prim_Op instead of
Find_Prim_Op.
(Make_Adjust_Call): Likewise.
(Make_Deep_Record_Body): Likewise.
(Make_Final_Call): Likewise.
(Make_Init_Call): Likewise.
* exp_disp.adb (Set_All_DT_Position): Remove obsolete warning.
* exp_util.ads: Add with and use clauses for Snames.
(Find_Prim_Op): Add precondition.
(Find_Controlled_Prim_Op): New function declaration.
(Name_Of_Controlled_Prim_Op): Likewise.
* exp_util.adb: Remove with and use clauses for Snames.
(Build_Allocate_Deallocate_Proc): Do not build finalization actions
if the designated type has relaxed finalization.
(Find_Controlled_Prim_Op): New function.
(Find_Last_Init): Call Find_Controlled_Prim_Op instead of
Find_Prim_Op.
(Name_Of_Controlled_Prim_Op): New function.
* freeze.adb (Freeze_Entity.Freeze_Record_Type): Propagate the
Has_Relaxed_Finalization flag from components.
* gen_il-fields.ads (Opt_Field_Enum): Add Has_Relaxed_Finalization.
* gen_il-gen-gen_entities.adb (Entity_Kind): Likewise.
* sem_aux.adb (Is_By_Reference_Type): Return true for all controlled
types.
* sem_ch3.adb (Build_Derived_Record_Type): Do not special case types
declared in Ada.Finalization.
(Record_Type_Definition): Propagate the Has_Relaxed_Finalization
flag from components.
* sem_ch13.adb (Analyze_Aspects_At_Freeze_Point): Also process the
Finalizable aspect.
(Analyze_Aspect_Specifications): Likewise. Call Flag_Non_Static_Expr
in more cases.
(Check_Aspect_At_Freeze_Point): Likewise.
(Inherit_Aspects

[gcc r15-1684] ada: Fix array-manipulating code in Mdll

2024-06-27 Thread Marc Poulhi?s via Gcc-cvs
https://gcc.gnu.org/g:432b8a346e10488a1751b86e7c51fff3ee870ae4

commit r15-1684-g432b8a346e10488a1751b86e7c51fff3ee870ae4
Author: Ronan Desplanques 
Date:   Thu May 2 09:52:34 2024 +0200

ada: Fix array-manipulating code in Mdll

This patch fixes a duo of array assigments in Mdll that were bound
to fail.

gcc/ada/

* mdll.adb (Build_Non_Reloc_DLL): Fix incorrect assignment
to array object.
(Ada_Build_Non_Reloc_DLL): Likewise.

Diff:
---
 gcc/ada/mdll.adb | 43 ++-
 1 file changed, 26 insertions(+), 17 deletions(-)

diff --git a/gcc/ada/mdll.adb b/gcc/ada/mdll.adb
index 2f946b0a5bb..ac4af8363aa 100644
--- a/gcc/ada/mdll.adb
+++ b/gcc/ada/mdll.adb
@@ -322,17 +322,21 @@ package body MDLL is
  --  Build the DLL
 
  declare
-Params : OS_Lib.Argument_List :=
-   Adr_Opt'Unchecked_Access & All_Options;
+Params  : constant OS_Lib.Argument_List :=
+Map_Opt'Unchecked_Access &
+Adr_Opt'Unchecked_Access & All_Options;
+First_Param : Positive := Params'First + 1;
+
  begin
 if Map_File then
-   Params := Map_Opt'Unchecked_Access & Params;
+   First_Param := Params'First;
 end if;
 
-Utl.Gcc (Output_File => Dll_File,
- Files   => Exp_File'Unchecked_Access & Ofiles,
- Options => Params,
- Build_Lib   => True);
+Utl.Gcc
+  (Output_File => Dll_File,
+   Files   => Exp_File'Unchecked_Access & Ofiles,
+   Options => Params (First_Param .. Params'Last),
+   Build_Lib   => True);
  end;
 
  OS_Lib.Delete_File (Exp_File, Success);
@@ -377,20 +381,25 @@ package body MDLL is
  Utl.Gnatbind (L_Afiles, Options & Bargs_Options);
 
  declare
-Params : OS_Lib.Argument_List :=
-   Out_Opt'Unchecked_Access &
-   Dll_File'Unchecked_Access &
-   Lib_Opt'Unchecked_Access &
-   Exp_File'Unchecked_Access &
-   Adr_Opt'Unchecked_Access &
-   Ofiles &
-   All_Options;
+Params  : constant OS_Lib.Argument_List :=
+Map_Opt'Unchecked_Access &
+Out_Opt'Unchecked_Access &
+Dll_File'Unchecked_Access &
+Lib_Opt'Unchecked_Access &
+Exp_File'Unchecked_Access &
+Adr_Opt'Unchecked_Access &
+Ofiles &
+All_Options;
+First_Param : Positive := Params'First + 1;
+
  begin
 if Map_File then
-   Params := Map_Opt'Unchecked_Access & Params;
+   First_Param := Params'First;
 end if;
 
-Utl.Gnatlink (L_Afiles (L_Afiles'Last).all, Params);
+Utl.Gnatlink
+  (L_Afiles (L_Afiles'Last).all,
+   Params (First_Param .. Params'Last));
  end;
 
  OS_Lib.Delete_File (Exp_File, Success);


[gcc r15-1687] ada: Remove last uses of System.Address_Operations in runtime library

2024-06-27 Thread Marc Poulhi?s via Gcc-cvs
https://gcc.gnu.org/g:5c8b7fcc04b1ec412e11ae3d77f704c19a63ab07

commit r15-1687-g5c8b7fcc04b1ec412e11ae3d77f704c19a63ab07
Author: Eric Botcazou 
Date:   Wed Jun 12 16:05:57 2024 +0200

ada: Remove last uses of System.Address_Operations in runtime library

This completes the switch from using System.Address_Operations to using only
System.Storage_Elements in the runtime library.  The remaining uses were for
simple optimizations that can be done by the optimizer alone.

gcc/ada/

* libgnat/s-carsi8.adb: Remove clauses for System.Address_Operations
and use only operations of System.Storage_Elements for addresses.
* libgnat/s-casi16.adb: Likewise.
* libgnat/s-casi32.adb: Likewise.
* libgnat/s-casi64.adb: Likewise.
* libgnat/s-casi128.adb: Likewise.
* libgnat/s-carun8.adb: Likewise.
* libgnat/s-caun16.adb: Likewise.
* libgnat/s-caun32.adb: Likewise.
* libgnat/s-caun64.adb: Likewise.
* libgnat/s-caun128.adb: Likewise.
* libgnat/s-geveop.adb: Likewise.

Diff:
---
 gcc/ada/libgnat/s-carsi8.adb  |  8 +---
 gcc/ada/libgnat/s-carun8.adb  |  8 +---
 gcc/ada/libgnat/s-casi128.adb |  7 ---
 gcc/ada/libgnat/s-casi16.adb  | 11 +++
 gcc/ada/libgnat/s-casi32.adb  |  7 ---
 gcc/ada/libgnat/s-casi64.adb  |  7 ---
 gcc/ada/libgnat/s-caun128.adb |  7 ---
 gcc/ada/libgnat/s-caun16.adb  | 11 +++
 gcc/ada/libgnat/s-caun32.adb  |  7 ---
 gcc/ada/libgnat/s-caun64.adb  |  7 ---
 gcc/ada/libgnat/s-geveop.adb  | 33 -
 11 files changed, 64 insertions(+), 49 deletions(-)

diff --git a/gcc/ada/libgnat/s-carsi8.adb b/gcc/ada/libgnat/s-carsi8.adb
index 2a6c532d247..7eb545a2657 100644
--- a/gcc/ada/libgnat/s-carsi8.adb
+++ b/gcc/ada/libgnat/s-carsi8.adb
@@ -29,8 +29,7 @@
 --  --
 --
 
-with System.Address_Operations; use System.Address_Operations;
-with System.Storage_Elements;   use System.Storage_Elements;
+with System.Storage_Elements; use System.Storage_Elements;
 
 with Ada.Unchecked_Conversion;
 
@@ -77,7 +76,10 @@ package body System.Compare_Array_Signed_8 is
begin
   --  If operands are non-aligned, or length is too short, go by bytes
 
-  if ModA (OrA (Left, Right), 4) /= 0 or else Compare_Len < 4 then
+  if Left mod Storage_Offset (4) /= 0
+or else Right mod Storage_Offset (4) /= 0
+or else Compare_Len < 4
+  then
  return Compare_Array_S8_Unaligned (Left, Right, Left_Len, Right_Len);
   end if;
 
diff --git a/gcc/ada/libgnat/s-carun8.adb b/gcc/ada/libgnat/s-carun8.adb
index 27422e5d728..e4cac204769 100644
--- a/gcc/ada/libgnat/s-carun8.adb
+++ b/gcc/ada/libgnat/s-carun8.adb
@@ -29,8 +29,7 @@
 --  --
 --
 
-with System.Address_Operations; use System.Address_Operations;
-with System.Storage_Elements;   use System.Storage_Elements;
+with System.Storage_Elements; use System.Storage_Elements;
 
 with Ada.Unchecked_Conversion;
 
@@ -76,7 +75,10 @@ package body System.Compare_Array_Unsigned_8 is
begin
   --  If operands are non-aligned, or length is too short, go by bytes
 
-  if ModA (OrA (Left, Right), 4) /= 0 or else Compare_Len < 4 then
+  if Left mod Storage_Offset (4) /= 0
+or else Right mod Storage_Offset (4) /= 0
+or else Compare_Len < 4
+  then
  return Compare_Array_U8_Unaligned (Left, Right, Left_Len, Right_Len);
   end if;
 
diff --git a/gcc/ada/libgnat/s-casi128.adb b/gcc/ada/libgnat/s-casi128.adb
index 3d3614136a7..1b65c8c86ef 100644
--- a/gcc/ada/libgnat/s-casi128.adb
+++ b/gcc/ada/libgnat/s-casi128.adb
@@ -29,8 +29,7 @@
 --  --
 --
 
-with System.Address_Operations; use System.Address_Operations;
-with System.Storage_Elements;   use System.Storage_Elements;
+with System.Storage_Elements; use System.Storage_Elements;
 
 with Ada.Unchecked_Conversion;
 
@@ -70,7 +69,9 @@ package body System.Compare_Array_Signed_128 is
begin
   --  Case of going by aligned quadruple words
 
-  if ModA (OrA (Left, Right), 16) = 0 then
+  if Left mod Storage_Offset (16) = 0
+and then Right mod Storage_Offset (16) = 0
+  then
  while Clen /= 0 loop
 if W (L).all /= W (R).all then
if W (L).all > W (R).all then
diff --git a/gcc/ada/libgnat/s-casi16.adb b/gcc/ada/libgnat/s-casi16.adb
index 01771d1f8ff..e3411c978c5 100644
--- a/gcc/ada/libgnat/s-casi16.adb
+++ b/gcc/ada/libgnat/s-casi16.adb
@@ -29,8 +29,7

[gcc r15-1683] ada: Bug using user defined string literals with interpolated strings

2024-06-27 Thread Marc Poulhi?s via Gcc-cvs
https://gcc.gnu.org/g:089bb078e8663f72292f2edc63f48c304dcb1ccc

commit r15-1683-g089bb078e8663f72292f2edc63f48c304dcb1ccc
Author: Javier Miranda 
Date:   Thu Jun 6 11:48:02 2024 +

ada: Bug using user defined string literals with interpolated strings

The frontend rejects the use of user defined string literals
using interpolated strings.

gcc/ada/

* sem_res.adb (Has_Applicable_User_Defined_Literal): Add missing
support for interpolated strings.

Diff:
---
 gcc/ada/sem_res.adb | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/gcc/ada/sem_res.adb b/gcc/ada/sem_res.adb
index a0dd1f7962b..72bba1f97af 100644
--- a/gcc/ada/sem_res.adb
+++ b/gcc/ada/sem_res.adb
@@ -467,7 +467,7 @@ package body Sem_Res is
   Literal_Aspect_Map :
 constant array (N_Numeric_Or_String_Literal) of Aspect_Id :=
   (N_Integer_Literal => Aspect_Integer_Literal,
-   N_Interpolated_String_Literal => No_Aspect,
+   N_Interpolated_String_Literal => Aspect_String_Literal,
N_Real_Literal=> Aspect_Real_Literal,
N_String_Literal  => Aspect_String_Literal);
 
@@ -487,6 +487,7 @@ package body Sem_Res is
 
begin
   if (Nkind (N) in N_Numeric_Or_String_Literal
+ | N_Interpolated_String_Literal
and then Present
 (Find_Aspect (Typ, Literal_Aspect_Map (Nkind (N)
 or else
@@ -563,6 +564,10 @@ package body Sem_Res is
 Param1 := Make_String_Literal (Loc, Strval (N));
 Params := New_List (Param1);
 
+ elsif Nkind (N) = N_Interpolated_String_Literal then
+Param1 := New_Copy_Tree (N);
+Params := New_List (Param1);
+
  else
 Param1 :=
   Make_String_Literal


[gcc r12-10584] Add support for -mcpu=grace

2024-06-27 Thread Kyrylo Tkachov via Gcc-cvs
https://gcc.gnu.org/g:25cb13649b1765a21f21907f2d7a0aa2135accb5

commit r12-10584-g25cb13649b1765a21f21907f2d7a0aa2135accb5
Author: Kyrylo Tkachov 
Date:   Wed Jun 19 14:56:02 2024 +0530

Add support for -mcpu=grace

This adds support for the NVIDIA Grace CPU to aarch64.
We reuse the tuning decisions for the Neoverse V2 core, but include a
number of architecture features that are not enabled by default in
-mcpu=neoverse-v2.

This allows Grace users to more simply target the CPU with -mcpu=grace
rather than remembering what extensions to tag on top of
-mcpu=neoverse-v2.

Bootstrapped and tested on aarch64-none-linux-gnu.
gcc/

* config/aarch64/aarch64-cores.def (grace): New entry.
* config/aarch64/aarch64-tune.md: Regenerate.
* doc/invoke.texi (AArch64 Options): Document the above.

Signed-off-by: Kyrylo Tkachov 

Diff:
---
 gcc/config/aarch64/aarch64-cores.def | 1 +
 gcc/config/aarch64/aarch64-tune.md   | 2 +-
 gcc/doc/invoke.texi  | 4 ++--
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-cores.def 
b/gcc/config/aarch64/aarch64-cores.def
index 956afa70714..6532bdaafb5 100644
--- a/gcc/config/aarch64/aarch64-cores.def
+++ b/gcc/config/aarch64/aarch64-cores.def
@@ -176,5 +176,6 @@ AARCH64_CORE("cobalt-100",   cobalt100, cortexa57, 9A, 
AARCH64_FL_FOR_ARCH9 | AA
 
 AARCH64_CORE("demeter", demeter, cortexa57, 9A, AARCH64_FL_FOR_ARCH9 | 
AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | 
AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversev2, 0x41, 0xd4f, -1)
 AARCH64_CORE("neoverse-v2", neoversev2, cortexa57, 9A, AARCH64_FL_FOR_ARCH9 | 
AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | 
AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversev2, 0x41, 0xd4f, -1)
+AARCH64_CORE("grace", grace, cortexa57, 9A, AARCH64_FL_FOR_ARCH9 | 
AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_CRYPTO 
| AARCH64_FL_SHA3 | AARCH64_FL_SM4 | AARCH64_FL_SVE2_AES | AARCH64_FL_SVE2_SHA3 
| AARCH64_FL_SVE2_SM4 | AARCH64_FL_PROFILE, neoversev2, 0x41, 0xd4f, -1)
 
 #undef AARCH64_CORE
diff --git a/gcc/config/aarch64/aarch64-tune.md 
b/gcc/config/aarch64/aarch64-tune.md
index 2c1852c8fe6..0c139e3e729 100644
--- a/gcc/config/aarch64/aarch64-tune.md
+++ b/gcc/config/aarch64/aarch64-tune.md
@@ -1,5 +1,5 @@
 ;; -*- buffer-read-only: t -*-
 ;; Generated automatically by gentune.sh from aarch64-cores.def
 (define_attr "tune"
-   
"cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,ampere1,ampere1a,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa78c,cortexa65,cortexa65ae,cortexx1,ares,neoversen1,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,thunderx3t110,zeus,neoversev1,neoverse512tvb,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82,cortexa510,cortexa710,cortexx2,neoversen2,cobalt100,demeter,neoversev2"
+   
"cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,ampere1,ampere1a,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa78c,cortexa65,cortexa65ae,cortexx1,ares,neoversen1,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,thunderx3t110,zeus,neoversev1,neoverse512tvb,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82,cortexa510,cortexa710,cortexx2,neoversen2,cobalt100,demeter,neoversev2,grace"
(const (symbol_ref "((enum attr_tune) aarch64_tune)")))
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index c83f667260e..fbfa3241e7f 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -19203,8 +19203,8 @@ performance of the code.  Permissible values for this 
option are:
 @samp{cortex-a78}, @samp{cortex-a78ae}, @samp{cortex-a78c},
 @samp{ares}, @samp{exynos-m1}, @samp{emag}, @samp{falkor},
 @samp{neoverse-512tvb}, @samp{neoverse-e1}, @samp{neoverse-n1},
-@samp{neoverse-n2}, @samp{neoverse-v1}, @samp{neoverse-v2}, @samp{qdf24xx},
-@samp{saphira}, @samp{phecda}, @samp{xgene1}, @samp{vulcan},
+@samp{neoverse-n2}, @samp{neoverse-v1}, @samp{neoverse-v2}, @samp{grace},
+@samp{qdf24xx}, @samp{saphira}, @samp{phecda}, @samp{xgene1}, @samp{vulcan},
 @samp{octeontx}, @samp{octeontx81},  @samp{octeontx83},
 @samp{octeontx2}, @samp{octeontx2t98}, @samp{octeontx2t96}
 @samp{octeontx2t93}, @samp{octeont

[gcc r11-11540] Add support for -mcpu=grace

2024-06-27 Thread Kyrylo Tkachov via Gcc-cvs
https://gcc.gnu.org/g:bb943609534fcbd984d39a9a7efef12fa2667ac6

commit r11-11540-gbb943609534fcbd984d39a9a7efef12fa2667ac6
Author: Kyrylo Tkachov 
Date:   Wed Jun 19 14:56:02 2024 +0530

Add support for -mcpu=grace

This adds support for the NVIDIA Grace CPU to aarch64.
We reuse the tuning decisions for the Neoverse V2 core, but include a
number of architecture features that are not enabled by default in
-mcpu=neoverse-v2.

This allows Grace users to more simply target the CPU with -mcpu=grace
rather than remembering what extensions to tag on top of
-mcpu=neoverse-v2.

Bootstrapped and tested on aarch64-none-linux-gnu.
gcc/

* config/aarch64/aarch64-cores.def (grace): New entry.
* config/aarch64/aarch64-tune.md: Regenerate.
* doc/invoke.texi (AArch64 Options): Document the above.

Signed-off-by: Kyrylo Tkachov 

Diff:
---
 gcc/config/aarch64/aarch64-cores.def | 1 +
 gcc/config/aarch64/aarch64-tune.md   | 2 +-
 gcc/doc/invoke.texi  | 4 ++--
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-cores.def 
b/gcc/config/aarch64/aarch64-cores.def
index 5599cde700f..0243e3d4d1c 100644
--- a/gcc/config/aarch64/aarch64-cores.def
+++ b/gcc/config/aarch64/aarch64-cores.def
@@ -150,6 +150,7 @@ AARCH64_CORE("saphira", saphira,saphira,8_4A,  
AARCH64_FL_FOR_ARCH8_
 AARCH64_CORE("neoverse-n2", neoversen2, cortexa57, 8_5A, 
AARCH64_FL_FOR_ARCH8_5 | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | 
AARCH64_FL_SVE | AARCH64_FL_SVE2 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | 
AARCH64_FL_MEMTAG, neoversen2, 0x41, 0xd49, -1)
 AARCH64_CORE("cobalt-100",   cobalt100, cortexa57, 8_5A, 
AARCH64_FL_FOR_ARCH8_5 | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | 
AARCH64_FL_SVE | AARCH64_FL_SVE2 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | 
AARCH64_FL_MEMTAG, neoversen2, 0x6d, 0xd49, -1)
 AARCH64_CORE("neoverse-v2", neoversev2, cortexa57, 8_5A, 
AARCH64_FL_FOR_ARCH8_5 | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | 
AARCH64_FL_SVE | AARCH64_FL_SVE2 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | 
AARCH64_FL_MEMTAG, neoverse512tvb, 0x41, 0xd4f, -1)
+AARCH64_CORE("grace", grace, cortexa57, 8_5A, AARCH64_FL_FOR_ARCH8_5 | 
AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_CRYPTO | 
AARCH64_FL_SHA3 | AARCH64_FL_SM4 | AARCH64_FL_SVE | AARCH64_FL_SVE2 | 
AARCH64_FL_SVE2_BITPERM | AARCH64_FL_SVE2_AES | AARCH64_FL_SVE2_SM4 | 
AARCH64_FL_SVE2_SHA3, neoverse512tvb, 0x41, 0xd4f, -1)
 
 /* ARMv8-A big.LITTLE implementations.  */
 
diff --git a/gcc/config/aarch64/aarch64-tune.md 
b/gcc/config/aarch64/aarch64-tune.md
index 8953f1c0332..f233a7cce6c 100644
--- a/gcc/config/aarch64/aarch64-tune.md
+++ b/gcc/config/aarch64/aarch64-tune.md
@@ -1,5 +1,5 @@
 ;; -*- buffer-read-only: t -*-
 ;; Generated automatically by gentune.sh from aarch64-cores.def
 (define_attr "tune"
-   
"cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,ampere1,ampere1a,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa78c,cortexa65,cortexa65ae,cortexx1,ares,neoversen1,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,thunderx3t110,zeus,neoversev1,neoverse512tvb,saphira,neoversen2,cobalt100,neoversev2,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82"
+   
"cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,ampere1,ampere1a,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa78c,cortexa65,cortexa65ae,cortexx1,ares,neoversen1,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,thunderx3t110,zeus,neoversev1,neoverse512tvb,saphira,neoversen2,cobalt100,neoversev2,grace,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82"
(const (symbol_ref "((enum attr_tune) aarch64_tune)")))
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 1ae94fb3677..ef331d72beb 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -18233,8 +18233,8 @@ performance of the code.  Permissible values for this 
option are:
 @samp{cortex-a78}, @samp{cortex-a78ae}, @samp{cortex-a78c},
 @samp{ares}, @samp{exynos-m1}, @samp{emag}, @samp{falkor},
 @samp{neoverse-512tvb}, @samp{neoverse-e1}, @samp{neoverse-n1},
-@samp{neoverse-n2}, @samp{neoverse-v1},@samp{neoverse-v2}, @samp{qdf24xx},
-@samp{saphira}, @samp{phecda}, @sa

[gcc r15-1688] libstdc++: Enable more debug assertions during constant evaluation [PR111250]

2024-06-27 Thread Jonathan Wakely via Libstdc++-cvs
https://gcc.gnu.org/g:cfc9fa3bdddc1af59b7854937b99516067fd8c63

commit r15-1688-gcfc9fa3bdddc1af59b7854937b99516067fd8c63
Author: Jonathan Wakely 
Date:   Tue Jun 18 20:57:13 2024 +0100

libstdc++: Enable more debug assertions during constant evaluation 
[PR111250]

Some of our debug assertions expand to nothing unless
_GLIBCXX_ASSERTIONS is defined, which means they are not checked during
constant evaluation. By making them unconditionally expand to a
__glibcxx_assert expression they will be checked during constant
evaluation. This allows us to diagnose more instances of undefined
behaviour at compile-time, such as accessing a vector past-the-end.

libstdc++-v3/ChangeLog:

PR libstdc++/111250
* include/debug/assertions.h (__glibcxx_requires_non_empty_range)
(__glibcxx_requires_nonempty, __glibcxx_requires_subscript):
Define to __glibcxx_assert expressions or to debug mode
__glibcxx_check_xxx expressions.
* testsuite/23_containers/array/element_access/constexpr_c++17.cc:
Add checks for out-of-bounds accesses in constant expressions.
* testsuite/23_containers/vector/element_access/constexpr.cc:
Likewise.

Diff:
---
 libstdc++-v3/include/debug/assertions.h| 14 ---
 .../array/element_access/constexpr_c++17.cc| 44 ++
 .../vector/element_access/constexpr.cc | 24 ++--
 3 files changed, 72 insertions(+), 10 deletions(-)

diff --git a/libstdc++-v3/include/debug/assertions.h 
b/libstdc++-v3/include/debug/assertions.h
index fff1ae8def0..20441e33897 100644
--- a/libstdc++-v3/include/debug/assertions.h
+++ b/libstdc++-v3/include/debug/assertions.h
@@ -31,12 +31,7 @@
 
 #include 
 
-#ifndef _GLIBCXX_ASSERTIONS
-# define __glibcxx_requires_non_empty_range(_First,_Last)
-# define __glibcxx_requires_nonempty()
-# define __glibcxx_requires_subscript(_N)
-#else
-
+#ifndef _GLIBCXX_DEBUG
 // Verify that [_First, _Last) forms a non-empty iterator range.
 # define __glibcxx_requires_non_empty_range(_First,_Last)  \
   __glibcxx_assert(_First != _Last)
@@ -45,6 +40,13 @@
 // Verify that the container is nonempty
 # define __glibcxx_requires_nonempty() \
   __glibcxx_assert(!this->empty())
+#else // Use the more verbose Debug Mode checks.
+# define __glibcxx_requires_non_empty_range(_First,_Last) \
+  __glibcxx_check_non_empty_range(_First,_Last)
+# define __glibcxx_requires_nonempty() \
+  __glibcxx_check_nonempty()
+# define __glibcxx_requires_subscript(_N) \
+  __glibcxx_check_subscript(_N)
 #endif
 
 #if defined _GLIBCXX_DEBUG && _GLIBCXX_HOSTED
diff --git 
a/libstdc++-v3/testsuite/23_containers/array/element_access/constexpr_c++17.cc 
b/libstdc++-v3/testsuite/23_containers/array/element_access/constexpr_c++17.cc
index a14ad487b42..19ab1cc1f8e 100644
--- 
a/libstdc++-v3/testsuite/23_containers/array/element_access/constexpr_c++17.cc
+++ 
b/libstdc++-v3/testsuite/23_containers/array/element_access/constexpr_c++17.cc
@@ -66,3 +66,47 @@ constexpr bool test_zero()
 }
 
 static_assert( test_zero() );
+
+#ifdef __cpp_concepts
+template
+  constexpr std::false_type
+  access_empty() { return {}; }
+
+template
+  requires (std::bool_constant<&std::array{}.at(0) != nullptr>::value)
+  constexpr std::true_type
+  access_empty() { return {}; }
+
+template
+  requires (std::bool_constant<&std::array{}[0] != nullptr>::value)
+  constexpr std::true_type
+  access_empty() { return {}; }
+
+template
+  requires (std::bool_constant<&std::array{}.front() != nullptr>::value)
+  constexpr std::true_type
+  access_empty() { return {}; }
+
+template
+  requires (std::bool_constant<&std::array{}.back() != nullptr>::value)
+  constexpr std::true_type
+  access_empty() { return {}; }
+
+static_assert( ! access_empty() );
+
+template
+  constexpr std::false_type
+  access_past_the_end() { return {}; }
+
+template
+  requires (std::bool_constant{}.at(0) != nullptr>::value)
+  constexpr std::true_type
+  access_past_the_end() { return {}; }
+
+template
+  requires (std::bool_constant<&std::array{}[1] != nullptr>::value)
+  constexpr std::true_type
+  access_past_the_end() { return {}; }
+
+static_assert( ! access_past_the_end() );
+#endif
diff --git 
a/libstdc++-v3/testsuite/23_containers/vector/element_access/constexpr.cc 
b/libstdc++-v3/testsuite/23_containers/vector/element_access/constexpr.cc
index 19c91d28cd6..358ded47ad9 100644
--- a/libstdc++-v3/testsuite/23_containers/vector/element_access/constexpr.cc
+++ b/libstdc++-v3/testsuite/23_containers/vector/element_access/constexpr.cc
@@ -85,23 +85,39 @@ template
   access_empty() { return {}; }
 
 template
-  requires (std::bool_constant<(std::vector().at(0), true)>::value)
+  requires (std::bool_constant<&std::vector().at(0) != nullptr>::value)
   constexpr std::true_type
   access_empty() { return {}; }
 
 template
-  requires (std::bool_constant<(std::vector()[0], true)>::

[gcc r15-1689] libstdc++: Add debug assertions to std::vector [PR103191]

2024-06-27 Thread Jonathan Wakely via Gcc-cvs
https://gcc.gnu.org/g:8fd84bc009b3073666a24047c78a04c19eeab752

commit r15-1689-g8fd84bc009b3073666a24047c78a04c19eeab752
Author: Jonathan Wakely 
Date:   Tue Jun 18 10:57:45 2024 +0100

libstdc++: Add debug assertions to std::vector [PR103191]

This adds debug assertions for std::vector element access.

libstdc++-v3/ChangeLog:

PR libstdc++/103191
* include/bits/stl_bvector.h (vector::operator[])
(vector::front, vector::back): Add debug assertions.
* testsuite/23_containers/vector/bool/element_access/constexpr.cc:
Remove dg-error that no longer triggers.

Diff:
---
 libstdc++-v3/include/bits/stl_bvector.h| 30 +-
 .../vector/bool/element_access/constexpr.cc|  2 +-
 2 files changed, 25 insertions(+), 7 deletions(-)

diff --git a/libstdc++-v3/include/bits/stl_bvector.h 
b/libstdc++-v3/include/bits/stl_bvector.h
index 52153cadf8f..8685cc64cc4 100644
--- a/libstdc++-v3/include/bits/stl_bvector.h
+++ b/libstdc++-v3/include/bits/stl_bvector.h
@@ -1084,12 +1084,18 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER
   _GLIBCXX_NODISCARD _GLIBCXX20_CONSTEXPR
   reference
   operator[](size_type __n)
-  { return begin()[__n]; }
+  {
+   __glibcxx_requires_subscript(__n);
+   return begin()[__n];
+  }
 
   _GLIBCXX_NODISCARD _GLIBCXX20_CONSTEXPR
   const_reference
   operator[](size_type __n) const
-  { return begin()[__n]; }
+  {
+   __glibcxx_requires_subscript(__n);
+   return begin()[__n];
+  }
 
 protected:
   _GLIBCXX20_CONSTEXPR
@@ -1133,22 +1139,34 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER
   _GLIBCXX_NODISCARD _GLIBCXX20_CONSTEXPR
   reference
   front()
-  { return *begin(); }
+  {
+   __glibcxx_requires_nonempty();
+   return *begin();
+  }
 
   _GLIBCXX_NODISCARD _GLIBCXX20_CONSTEXPR
   const_reference
   front() const
-  { return *begin(); }
+  {
+   __glibcxx_requires_nonempty();
+   return *begin();
+  }
 
   _GLIBCXX_NODISCARD _GLIBCXX20_CONSTEXPR
   reference
   back()
-  { return *(end() - 1); }
+  {
+   __glibcxx_requires_nonempty();
+   return *(end() - 1);
+  }
 
   _GLIBCXX_NODISCARD _GLIBCXX20_CONSTEXPR
   const_reference
   back() const
-  { return *(end() - 1); }
+  {
+   __glibcxx_requires_nonempty();
+   return *(end() - 1);
+  }
 
   _GLIBCXX20_CONSTEXPR
   void
diff --git 
a/libstdc++-v3/testsuite/23_containers/vector/bool/element_access/constexpr.cc 
b/libstdc++-v3/testsuite/23_containers/vector/bool/element_access/constexpr.cc
index bff9f7b4e0f..7c60e5db4d1 100644
--- 
a/libstdc++-v3/testsuite/23_containers/vector/bool/element_access/constexpr.cc
+++ 
b/libstdc++-v3/testsuite/23_containers/vector/bool/element_access/constexpr.cc
@@ -115,4 +115,4 @@ template
   constexpr std::true_type
   access_empty_front() { return {}; }
 
-static_assert( ! access_empty_front() ); // { dg-error "ambiguous" "PR 103191" 
{ target { ! debug_mode } } }
+static_assert( ! access_empty_front() );


[gcc r15-1690] libstdc++: Fix std::format for chrono::duration with unsigned rep [PR115668]

2024-06-27 Thread Jonathan Wakely via Libstdc++-cvs
https://gcc.gnu.org/g:dafa750c8a6f0a088677871bfaad054881737ab1

commit r15-1690-gdafa750c8a6f0a088677871bfaad054881737ab1
Author: Jonathan Wakely 
Date:   Wed Jun 26 20:22:54 2024 +0100

libstdc++: Fix std::format for chrono::duration with unsigned rep [PR115668]

Using std::chrono::abs is only valid if numeric_limits::is_signed
is true, so using it unconditionally made it ill-formed to format a
duration with an unsigned rep.

The duration formatter might as negate the duration itself instead of
using chrono::abs, because it already needs to check for a negative
value.

libstdc++-v3/ChangeLog:

PR libstdc++/115668
* include/bits/chrono_io.h (formatter::format):
Do not use chrono::abs.
* testsuite/20_util/duration/io.cc: Check formatting a duration
with unsigned rep.

Diff:
---
 libstdc++-v3/include/bits/chrono_io.h | 5 -
 libstdc++-v3/testsuite/20_util/duration/io.cc | 6 ++
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/libstdc++-v3/include/bits/chrono_io.h 
b/libstdc++-v3/include/bits/chrono_io.h
index 3b34992b42a..72c66a0fef0 100644
--- a/libstdc++-v3/include/bits/chrono_io.h
+++ b/libstdc++-v3/include/bits/chrono_io.h
@@ -1607,7 +1607,10 @@ namespace __format
format(const chrono::duration<_Rep, _Period>& __d,
   basic_format_context<_Out, _CharT>& __fc) const
{
- return _M_f._M_format(chrono::abs(__d), __fc, __d < __d.zero());
+ if constexpr (numeric_limits<_Rep>::is_signed)
+   if (__d < __d.zero())
+ return _M_f._M_format(-__d, __fc, true);
+ return _M_f._M_format(__d, __fc, false);
}
 
 private:
diff --git a/libstdc++-v3/testsuite/20_util/duration/io.cc 
b/libstdc++-v3/testsuite/20_util/duration/io.cc
index 2f940ef86b7..6b00689672c 100644
--- a/libstdc++-v3/testsuite/20_util/duration/io.cc
+++ b/libstdc++-v3/testsuite/20_util/duration/io.cc
@@ -100,6 +100,12 @@ test_format()
   std::chrono::duration d{0.5};
   s = std::format("{}", d);
   VERIFY( s == "0.5ms" );
+
+  std::chrono::duration u{500}; // PR libstdc++/115668
+  s = std::format("{}", u);
+  VERIFY( s == "500ms" );
+  s = std::format("{:%Q %q}", u);
+  VERIFY( s == "500 ms" );
 }
 
 void


[gcc r15-1691] Avoid global bitmap space in ranger.

2024-06-27 Thread Aldy Hernandez via Gcc-cvs
https://gcc.gnu.org/g:bcdbb85f26aa0d25645d51ddf728a049b201c980

commit r15-1691-gbcdbb85f26aa0d25645d51ddf728a049b201c980
Author: Aldy Hernandez 
Date:   Wed Jun 19 11:42:16 2024 +0200

Avoid global bitmap space in ranger.

gcc/ChangeLog:

* gimple-range-cache.cc (update_list::update_list): Add m_bitmaps.
(update_list::~update_list): Initialize m_bitmaps.
* gimple-range-cache.h (ssa_lazy_cache): Add m_bitmaps.
* gimple-range.cc (enable_ranger): Remove global bitmap
initialization.
(disable_ranger): Remove global bitmap release.

Diff:
---
 gcc/gimple-range-cache.cc | 6 --
 gcc/gimple-range-cache.h  | 9 +++--
 gcc/gimple-range.cc   | 4 
 3 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/gcc/gimple-range-cache.cc b/gcc/gimple-range-cache.cc
index d84fd1ca0e8..6979a14cbaa 100644
--- a/gcc/gimple-range-cache.cc
+++ b/gcc/gimple-range-cache.cc
@@ -906,6 +906,7 @@ private:
   vec m_update_list;
   int m_update_head;
   bitmap m_propfail;
+  bitmap_obstack m_bitmaps;
 };
 
 // Create an update list.
@@ -915,7 +916,8 @@ update_list::update_list ()
   m_update_list.create (0);
   m_update_list.safe_grow_cleared (last_basic_block_for_fn (cfun) + 64);
   m_update_head = -1;
-  m_propfail = BITMAP_ALLOC (NULL);
+  bitmap_obstack_initialize (&m_bitmaps);
+  m_propfail = BITMAP_ALLOC (&m_bitmaps);
 }
 
 // Destroy an update list.
@@ -923,7 +925,7 @@ update_list::update_list ()
 update_list::~update_list ()
 {
   m_update_list.release ();
-  BITMAP_FREE (m_propfail);
+  bitmap_obstack_release (&m_bitmaps);
 }
 
 // Add BB to the list of blocks to update, unless it's already in the list.
diff --git a/gcc/gimple-range-cache.h b/gcc/gimple-range-cache.h
index 63410d5437e..0ea34d3f686 100644
--- a/gcc/gimple-range-cache.h
+++ b/gcc/gimple-range-cache.h
@@ -78,8 +78,12 @@ protected:
 class ssa_lazy_cache : public ssa_cache
 {
 public:
-  inline ssa_lazy_cache () { active_p = BITMAP_ALLOC (NULL); }
-  inline ~ssa_lazy_cache () { BITMAP_FREE (active_p); }
+  inline ssa_lazy_cache ()
+  {
+bitmap_obstack_initialize (&m_bitmaps);
+active_p = BITMAP_ALLOC (&m_bitmaps);
+  }
+  inline ~ssa_lazy_cache () { bitmap_obstack_release (&m_bitmaps); }
   inline bool empty_p () const { return bitmap_empty_p (active_p); }
   virtual bool has_range (tree name) const;
   virtual bool set_range (tree name, const vrange &r);
@@ -89,6 +93,7 @@ public:
   virtual void clear ();
   void merge (const ssa_lazy_cache &);
 protected:
+  bitmap_obstack m_bitmaps;
   bitmap active_p;
 };
 
diff --git a/gcc/gimple-range.cc b/gcc/gimple-range.cc
index 50448ef81a2..5df649e268c 100644
--- a/gcc/gimple-range.cc
+++ b/gcc/gimple-range.cc
@@ -681,8 +681,6 @@ enable_ranger (struct function *fun, bool use_imm_uses)
 {
   gimple_ranger *r;
 
-  bitmap_obstack_initialize (NULL);
-
   gcc_checking_assert (!fun->x_range_query);
   r = new gimple_ranger (use_imm_uses);
   fun->x_range_query = r;
@@ -699,8 +697,6 @@ disable_ranger (struct function *fun)
   gcc_checking_assert (fun->x_range_query);
   delete fun->x_range_query;
   fun->x_range_query = NULL;
-
-  bitmap_obstack_release (NULL);
 }
 
 // 


[gcc r15-1692] [libstdc++] [testsuite] defer to check_vect_support* [PR115454]

2024-06-27 Thread Alexandre Oliva via Libstdc++-cvs
https://gcc.gnu.org/g:95faa1bea7bdc7f92fcccb3543bfcbc8184c5e5b

commit r15-1692-g95faa1bea7bdc7f92fcccb3543bfcbc8184c5e5b
Author: Alexandre Oliva 
Date:   Thu Jun 27 07:22:48 2024 -0300

[libstdc++] [testsuite] defer to check_vect_support* [PR115454]

The newly-added testcase overrides the default dg-do action set by
check_vect_support_and_set_flags (in libstdc++-dg/conformance.exp), so
it attempts to run the test even if runtime vector support is not
available.

Remove the explicit dg-do directive, so that the default is honored,
and the test is run if vector support is found, and only compiled
otherwise.


for  libstdc++-v3/ChangeLog

PR libstdc++/115454
* testsuite/experimental/simd/pr115454_find_last_set.cc: Defer
to check_vect_support_and_set_flags's default dg-do action.

Diff:
---
 libstdc++-v3/testsuite/experimental/simd/pr115454_find_last_set.cc | 1 -
 1 file changed, 1 deletion(-)

diff --git a/libstdc++-v3/testsuite/experimental/simd/pr115454_find_last_set.cc 
b/libstdc++-v3/testsuite/experimental/simd/pr115454_find_last_set.cc
index 25a713b4e94..4ade8601f27 100644
--- a/libstdc++-v3/testsuite/experimental/simd/pr115454_find_last_set.cc
+++ b/libstdc++-v3/testsuite/experimental/simd/pr115454_find_last_set.cc
@@ -1,5 +1,4 @@
 // { dg-options "-std=gnu++17" }
-// { dg-do run { target *-*-* } }
 // { dg-require-effective-target c++17 }
 // { dg-additional-options "-march=x86-64-v4" { target avx512f_runtime } }
 // { dg-require-cmath "" }


[gcc r15-1693] libstdc++: Fix std::codecvt for empty dest [PR37475]

2024-06-27 Thread Jonathan Wakely via Gcc-cvs
https://gcc.gnu.org/g:73ad57c244c283bf6da0c16630212f11b945eda5

commit r15-1693-g73ad57c244c283bf6da0c16630212f11b945eda5
Author: Jonathan Wakely 
Date:   Tue Jun 11 16:45:43 2024 +0100

libstdc++: Fix std::codecvt for empty dest 
[PR37475]

For the GNU locale model, codecvt::do_out and codecvt::do_in incorrectly
return 'ok' when the destination range is empty. That happens because
detecting incomplete output is done in the loop body, and the loop is
never even entered if to == to_end.

By restructuring the loop condition so that we check the output range
separately, we can ensure that for a non-empty source range, we always
enter the loop at least once, and detect if the destination range is too
small.

The loops also seem easier to reason about if we return immediately on
any error, instead of checking the result twice on every iteration. We
can use an RAII type to restore the locale before returning, which also
simplifies all the other member functions.

libstdc++-v3/ChangeLog:

PR libstdc++/37475
* config/locale/gnu/codecvt_members.cc (Guard): New RAII type.
(do_out, do_in): Return partial if the destination is empty but
the source is not. Use Guard to restore locale on scope exit.
Return immediately on any conversion error.
(do_encoding, do_max_length, do_length): Use Guard.
* testsuite/22_locale/codecvt/in/char/37475.cc: New test.
* testsuite/22_locale/codecvt/in/wchar_t/37475.cc: New test.
* testsuite/22_locale/codecvt/out/char/37475.cc: New test.
* testsuite/22_locale/codecvt/out/wchar_t/37475.cc: New test.

Diff:
---
 libstdc++-v3/config/locale/gnu/codecvt_members.cc  | 117 +
 .../testsuite/22_locale/codecvt/in/char/37475.cc   |  23 
 .../22_locale/codecvt/in/wchar_t/37475.cc  |  23 
 .../testsuite/22_locale/codecvt/out/char/37475.cc  |  23 
 .../22_locale/codecvt/out/wchar_t/37475.cc |  23 
 5 files changed, 142 insertions(+), 67 deletions(-)

diff --git a/libstdc++-v3/config/locale/gnu/codecvt_members.cc 
b/libstdc++-v3/config/locale/gnu/codecvt_members.cc
index 034713d236e..794f25a5f35 100644
--- a/libstdc++-v3/config/locale/gnu/codecvt_members.cc
+++ b/libstdc++-v3/config/locale/gnu/codecvt_members.cc
@@ -37,8 +37,23 @@ namespace std _GLIBCXX_VISIBILITY(default)
 {
 _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
-  // Specializations.
 #ifdef _GLIBCXX_USE_WCHAR_T
+namespace
+{
+  // RAII type for changing and restoring the current thread's locale.
+  struct Guard
+  {
+#if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2)
+explicit Guard(__c_locale loc) : old(__uselocale(loc)) { }
+~Guard() { __uselocale(old); }
+#else
+explicit Guard(__c_locale) { }
+#endif
+__c_locale old;
+  };
+}
+
+  // Specializations.
   codecvt_base::result
   codecvt::
   do_out(state_type& __state, const intern_type* __from,
@@ -46,22 +61,21 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 extern_type* __to, extern_type* __to_end,
 extern_type*& __to_next) const
   {
-result __ret = ok;
 state_type __tmp_state(__state);
-
-#if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2)
-__c_locale __old = __uselocale(_M_c_locale_codecvt);
-#endif
+Guard g(_M_c_locale_codecvt);
 
 // wcsnrtombs is *very* fast but stops if encounters NUL characters:
 // in case we fall back to wcrtomb and then continue, in a loop.
 // NB: wcsnrtombs is a GNU extension
-for (__from_next = __from, __to_next = __to;
-__from_next < __from_end && __to_next < __to_end
-&& __ret == ok;)
+__from_next = __from;
+__to_next = __to;
+while (__from_next < __from_end)
   {
-   const intern_type* __from_chunk_end = wmemchr(__from_next, L'\0',
- __from_end - __from_next);
+   if (__to_next >= __to_end)
+ return partial;
+
+   const intern_type* __from_chunk_end
+ = wmemchr(__from_next, L'\0', __from_end - __from_next);
if (!__from_chunk_end)
  __from_chunk_end = __from_end;
 
@@ -77,12 +91,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
for (; __from < __from_next; ++__from)
  __to_next += wcrtomb(__to_next, *__from, &__tmp_state);
__state = __tmp_state;
-   __ret = error;
+   return error;
  }
else if (__from_next && __from_next < __from_chunk_end)
  {
__to_next += __conv;
-   __ret = partial;
+   return partial;
  }
else
  {
@@ -90,13 +104,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
__to_next += __conv;
  }
 
-   if (__from_next < __from_end && __ret == ok)
+   if (__from_next < __from_end)
  {
extern_type __buf[MB_LEN_MAX];
__tmp_state = __state;
const s

[gcc r14-10352] [libstdc++] [testsuite] defer to check_vect_support* [PR115454]

2024-06-27 Thread Alexandre Oliva via Libstdc++-cvs
https://gcc.gnu.org/g:b70af0bd2e33e9cc20dae45c131429a402fc8845

commit r14-10352-gb70af0bd2e33e9cc20dae45c131429a402fc8845
Author: Alexandre Oliva 
Date:   Thu Jun 27 08:14:34 2024 -0300

[libstdc++] [testsuite] defer to check_vect_support* [PR115454]

The newly-added testcase overrides the default dg-do action set by
check_vect_support_and_set_flags (in libstdc++-dg/conformance.exp), so
it attempts to run the test even if runtime vector support is not
available.

Remove the explicit dg-do directive, so that the default is honored,
and the test is run if vector support is found, and only compiled
otherwise.


for  libstdc++-v3/ChangeLog

PR libstdc++/115454
* testsuite/experimental/simd/pr115454_find_last_set.cc: Defer
to check_vect_support_and_set_flags's default dg-do action.

(cherry picked from commit 95faa1bea7bdc7f92fcccb3543bfcbc8184c5e5b)

Diff:
---
 libstdc++-v3/testsuite/experimental/simd/pr115454_find_last_set.cc | 1 -
 1 file changed, 1 deletion(-)

diff --git a/libstdc++-v3/testsuite/experimental/simd/pr115454_find_last_set.cc 
b/libstdc++-v3/testsuite/experimental/simd/pr115454_find_last_set.cc
index 25a713b4e94..4ade8601f27 100644
--- a/libstdc++-v3/testsuite/experimental/simd/pr115454_find_last_set.cc
+++ b/libstdc++-v3/testsuite/experimental/simd/pr115454_find_last_set.cc
@@ -1,5 +1,4 @@
 // { dg-options "-std=gnu++17" }
-// { dg-do run { target *-*-* } }
 // { dg-require-effective-target c++17 }
 // { dg-additional-options "-march=x86-64-v4" { target avx512f_runtime } }
 // { dg-require-cmath "" }


[gcc r13-8872] [libstdc++] [testsuite] defer to check_vect_support* [PR115454]

2024-06-27 Thread Alexandre Oliva via Libstdc++-cvs
https://gcc.gnu.org/g:3de1c4985bebd1882b6643789daba24f2d11bafe

commit r13-8872-g3de1c4985bebd1882b6643789daba24f2d11bafe
Author: Alexandre Oliva 
Date:   Thu Jun 27 08:32:15 2024 -0300

[libstdc++] [testsuite] defer to check_vect_support* [PR115454]

The newly-added testcase overrides the default dg-do action set by
check_vect_support_and_set_flags (in libstdc++-dg/conformance.exp), so
it attempts to run the test even if runtime vector support is not
available.

Remove the explicit dg-do directive, so that the default is honored,
and the test is run if vector support is found, and only compiled
otherwise.


for  libstdc++-v3/ChangeLog

PR libstdc++/115454
* testsuite/experimental/simd/pr115454_find_last_set.cc: Defer
to check_vect_support_and_set_flags's default dg-do action.

(cherry picked from commit 95faa1bea7bdc7f92fcccb3543bfcbc8184c5e5b)

Diff:
---
 libstdc++-v3/testsuite/experimental/simd/pr115454_find_last_set.cc | 1 -
 1 file changed, 1 deletion(-)

diff --git a/libstdc++-v3/testsuite/experimental/simd/pr115454_find_last_set.cc 
b/libstdc++-v3/testsuite/experimental/simd/pr115454_find_last_set.cc
index 25a713b4e94..4ade8601f27 100644
--- a/libstdc++-v3/testsuite/experimental/simd/pr115454_find_last_set.cc
+++ b/libstdc++-v3/testsuite/experimental/simd/pr115454_find_last_set.cc
@@ -1,5 +1,4 @@
 // { dg-options "-std=gnu++17" }
-// { dg-do run { target *-*-* } }
 // { dg-require-effective-target c++17 }
 // { dg-additional-options "-march=x86-64-v4" { target avx512f_runtime } }
 // { dg-require-cmath "" }


[gcc r12-10585] [libstdc++] [testsuite] defer to check_vect_support* [PR115454]

2024-06-27 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:95ca5f458251e21123e45ec52c38d629d39cd0e4

commit r12-10585-g95ca5f458251e21123e45ec52c38d629d39cd0e4
Author: Alexandre Oliva 
Date:   Thu Jun 27 08:44:54 2024 -0300

[libstdc++] [testsuite] defer to check_vect_support* [PR115454]

The newly-added testcase overrides the default dg-do action set by
check_vect_support_and_set_flags (in libstdc++-dg/conformance.exp), so
it attempts to run the test even if runtime vector support is not
available.

Remove the explicit dg-do directive, so that the default is honored,
and the test is run if vector support is found, and only compiled
otherwise.


for  libstdc++-v3/ChangeLog

PR libstdc++/115454
* testsuite/experimental/simd/pr115454_find_last_set.cc: Defer
to check_vect_support_and_set_flags's default dg-do action.

(cherry picked from commit 95faa1bea7bdc7f92fcccb3543bfcbc8184c5e5b)

Diff:
---
 libstdc++-v3/testsuite/experimental/simd/pr115454_find_last_set.cc | 1 -
 1 file changed, 1 deletion(-)

diff --git a/libstdc++-v3/testsuite/experimental/simd/pr115454_find_last_set.cc 
b/libstdc++-v3/testsuite/experimental/simd/pr115454_find_last_set.cc
index 25a713b4e94..4ade8601f27 100644
--- a/libstdc++-v3/testsuite/experimental/simd/pr115454_find_last_set.cc
+++ b/libstdc++-v3/testsuite/experimental/simd/pr115454_find_last_set.cc
@@ -1,5 +1,4 @@
 // { dg-options "-std=gnu++17" }
-// { dg-do run { target *-*-* } }
 // { dg-require-effective-target c++17 }
 // { dg-additional-options "-march=x86-64-v4" { target avx512f_runtime } }
 // { dg-require-cmath "" }


[gcc r15-1694] tree-optimization/115669 - fix SLP reduction association

2024-06-27 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:7886830bb45c4f5dca0496d4deae9a45204d78f5

commit r15-1694-g7886830bb45c4f5dca0496d4deae9a45204d78f5
Author: Richard Biener 
Date:   Thu Jun 27 11:26:08 2024 +0200

tree-optimization/115669 - fix SLP reduction association

The following avoids associating a reduction path as that might
get STMT_VINFO_REDUC_IDX out-of-sync with the SLP operand order.
This is a latent issue with SLP reductions but now easily exposed
as we're doing single-lane SLP reductions.

When we achieved SLP only we can move and update this meta-data.

PR tree-optimization/115669
* tree-vect-slp.cc (vect_build_slp_tree_2): Do not reassociate
chains that participate in a reduction.

* gcc.dg/vect/pr115669.c: New testcase.

Diff:
---
 gcc/testsuite/gcc.dg/vect/pr115669.c | 22 ++
 gcc/tree-vect-slp.cc |  3 +++
 2 files changed, 25 insertions(+)

diff --git a/gcc/testsuite/gcc.dg/vect/pr115669.c 
b/gcc/testsuite/gcc.dg/vect/pr115669.c
new file mode 100644
index 000..361a17a64e6
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr115669.c
@@ -0,0 +1,22 @@
+/* { dg-additional-options "-fwrapv" } */
+
+#include "tree-vect.h"
+
+int a = 10;
+unsigned b;
+long long c[100];
+int foo()
+{
+  long long *d = c;
+  for (short e = 0; e < a; e++)
+b += ~(d ? d[e] : 0);
+  return b;
+}
+
+int main()
+{
+  check_vect ();
+  if (foo () != -10)
+abort ();
+  return 0;
+}
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 1252b613125..174b4800fa9 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -2069,6 +2069,9 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
   else if (is_a  (vinfo)
   /* ???  We don't handle !vect_internal_def defs below.  */
   && STMT_VINFO_DEF_TYPE (stmt_info) == vect_internal_def
+  /* ???  Do not associate a reduction, this will wreck REDUC_IDX
+ mapping as long as that exists on the stmt_info level.  */
+  && STMT_VINFO_REDUC_IDX (stmt_info) == -1
   && is_gimple_assign (stmt_info->stmt)
   && (associative_tree_code (gimple_assign_rhs_code (stmt_info->stmt))
   || gimple_assign_rhs_code (stmt_info->stmt) == MINUS_EXPR)


[gcc r11-11541] coroutines: Await expressions are not allowed in handlers [PR 99710].

2024-06-27 Thread Iain D Sandoe via Gcc-cvs
https://gcc.gnu.org/g:57482cadeb12af2dd52b381b0766776d1e8ec59b

commit r11-11541-g57482cadeb12af2dd52b381b0766776d1e8ec59b
Author: Iain Sandoe 
Date:   Sat Oct 2 14:43:39 2021 +0100

coroutines: Await expressions are not allowed in handlers [PR 99710].

C++20 [expr.await] / 2
An await-expression shall appear only in a potentially-evaluated expression
within the compound-statement of a function-body outside of a handler.

Signed-off-by: Iain Sandoe 

PR c++/99710

gcc/cp/ChangeLog:

* coroutines.cc (await_statement_walker): Report an error if
an await expression is found in a handler body.

gcc/testsuite/ChangeLog:

* g++.dg/coroutines/pr99710.C: New test.

(cherry picked from commit 650beb110538097b9c3e8600149b333a83e7e836)

Diff:
---
 gcc/cp/coroutines.cc  | 17 -
 gcc/testsuite/g++.dg/coroutines/pr99710.C | 25 +
 2 files changed, 41 insertions(+), 1 deletion(-)

diff --git a/gcc/cp/coroutines.cc b/gcc/cp/coroutines.cc
index 34d9d3e7d61..71246e99a6f 100644
--- a/gcc/cp/coroutines.cc
+++ b/gcc/cp/coroutines.cc
@@ -3713,7 +3713,22 @@ await_statement_walker (tree *stmt, int *do_subtree, 
void *d)
  }
return NULL_TREE; /* Done.  */
  }
-   break;
+ break;
+   case HANDLER:
+ {
+   /* [expr.await] An await-expression shall appear only in a
+  potentially-evaluated expression within the compound-statement
+  of a function-body outside of a handler.  */
+   tree *await_ptr;
+   hash_set visited;
+   if (!(cp_walk_tree (&HANDLER_BODY (expr), find_any_await,
+ &await_ptr, &visited)))
+ return NULL_TREE; /* All OK.  */
+   location_t loc = EXPR_LOCATION (*await_ptr);
+   error_at (loc, "await expressions are not permitted in handlers");
+   return NULL_TREE; /* This is going to fail later anyway.  */
+ }
+ break;
   }
   else if (EXPR_P (expr))
 {
diff --git a/gcc/testsuite/g++.dg/coroutines/pr99710.C 
b/gcc/testsuite/g++.dg/coroutines/pr99710.C
new file mode 100644
index 000..e4f7116b8d7
--- /dev/null
+++ b/gcc/testsuite/g++.dg/coroutines/pr99710.C
@@ -0,0 +1,25 @@
+#include 
+
+struct task {
+struct promise_type {
+std::suspend_always initial_suspend();
+std::suspend_always final_suspend() noexcept;
+task get_return_object();
+void return_void();
+void unhandled_exception();
+};
+};
+
+task
+my_coro ()
+{
+  try
+{ }
+  catch (...)
+{
+  // [expr.await] An await-expression shall appear only in a potentially-
+  // evaluated expression within the compound-statement of a function-body
+  // outside of a handler 
+  co_await std::suspend_always{}; // { dg-error "await expressions are not 
permitted in handlers" }
+}
+}


[gcc r11-11542] coroutines: Pass lvalues to user-defined operator new [PR 100772].

2024-06-27 Thread Iain D Sandoe via Gcc-cvs
https://gcc.gnu.org/g:f647906ef227bc22af224d955a408d776cfddb04

commit r11-11542-gf647906ef227bc22af224d955a408d776cfddb04
Author: Iain Sandoe 
Date:   Sun Oct 3 19:46:09 2021 +0100

coroutines: Pass lvalues to user-defined operator new [PR 100772].

The wording of the standard has been clarified to be explicit that
the the parameters to any user-defined operator-new in the promise
class should be lvalues.

Signed-off-by: Iain Sandoe 

PR c++/100772

gcc/cp/ChangeLog:

* coroutines.cc (morph_fn_to_coro): Convert function parms
from reference before constructing any operator-new args
list.

gcc/testsuite/ChangeLog:

* g++.dg/coroutines/pr100772-a.C: New test.
* g++.dg/coroutines/pr100772-b.C: New test.

(cherry picked from commit 921942a8a106cb53994c21162922e4934eb3a3e0)

Diff:
---
 gcc/cp/coroutines.cc |  8 +--
 gcc/testsuite/g++.dg/coroutines/pr100772-a.C | 77 +++
 gcc/testsuite/g++.dg/coroutines/pr100772-b.C | 93 
 3 files changed, 174 insertions(+), 4 deletions(-)

diff --git a/gcc/cp/coroutines.cc b/gcc/cp/coroutines.cc
index 71246e99a6f..04c72ddd48b 100644
--- a/gcc/cp/coroutines.cc
+++ b/gcc/cp/coroutines.cc
@@ -4613,8 +4613,8 @@ morph_fn_to_coro (tree orig, tree *resumer, tree 
*destroyer)
If the lookup finds an allocation function in the scope of the promise
type, overload resolution is performed on a function call created by
assembling an argument list.  The first argument is the amount of space
-   requested, and has type std::size_t.  The succeeding arguments are
-   those of the original function.  */
+   requested, and has type std::size_t.  The lvalues p1...pn are the
+   succeeding arguments..  */
   vec *args = make_tree_vector ();
   vec_safe_push (args, resizeable); /* Space needed.  */
 
@@ -4632,10 +4632,10 @@ morph_fn_to_coro (tree orig, tree *resumer, tree 
*destroyer)
  this_ref = convert_to_reference (tt, this_ref, CONV_STATIC,
   LOOKUP_NORMAL , NULL_TREE,
   tf_warning_or_error);
- vec_safe_push (args, this_ref);
+ vec_safe_push (args, convert_from_reference (this_ref));
}
  else
-   vec_safe_push (args, arg);
+   vec_safe_push (args, convert_from_reference (arg));
}
 
   /* Note the function selected; we test to see if it's NOTHROW.  */
diff --git a/gcc/testsuite/g++.dg/coroutines/pr100772-a.C 
b/gcc/testsuite/g++.dg/coroutines/pr100772-a.C
new file mode 100644
index 000..a325d384fc3
--- /dev/null
+++ b/gcc/testsuite/g++.dg/coroutines/pr100772-a.C
@@ -0,0 +1,77 @@
+//  { dg-additional-options "-fsyntax-only " }
+#ifdef __clang__
+#include 
+namespace std {
+  using namespace std::experimental;
+}
+#else
+#include 
+#endif
+
+struct Task
+{
+struct promise_type
+{
+   void return_void() const noexcept {}
+
+   void* operator new(std::size_t, auto &&...args) noexcept
+   {
+static_assert(sizeof...(args) > 0);
+static_assert(sizeof...(args) == 2);
+
+   return nullptr;
+   }
+
+   void operator delete(void *, std::size_t) noexcept
+   {
+   }
+
+static Task get_return_object_on_allocation_failure() noexcept
+{
+return {};
+}
+
+Task get_return_object() noexcept
+{
+return Task{ *this };
+}
+
+std::suspend_always initial_suspend() noexcept
+{
+return {};
+}
+
+std::suspend_always final_suspend() noexcept
+{
+return {};
+}
+
+void unhandled_exception() noexcept {}
+};
+
+using promise_handle = std::coroutine_handle;
+
+Task() = default;
+Task(promise_type & promise) noexcept
+: m_handle{ promise_handle::from_promise(promise) }
+{}
+
+~Task()
+{
+if (m_handle.address()) { m_handle.destroy(); }
+}
+
+promise_handle m_handle{};
+};
+
+
+Task Foo(auto && ... args) noexcept
+{
+co_return;
+}
+
+int main()
+{
+int v;
+Foo(v, 2134);
+}
diff --git a/gcc/testsuite/g++.dg/coroutines/pr100772-b.C 
b/gcc/testsuite/g++.dg/coroutines/pr100772-b.C
new file mode 100644
index 000..6cdf8d1e529
--- /dev/null
+++ b/gcc/testsuite/g++.dg/coroutines/pr100772-b.C
@@ -0,0 +1,93 @@
+#ifdef __clang__
+#include 
+namespace std {
+  using namespace std::experimental;
+}
+#else
+#include 
+#endif
+#include 
+#include 
+#include   // needed for abi::__cxa_demangle
+#include 
+
+std::shared_ptr cppDemangle(const char *abiName)
+{
+  int status;
+  char *ret = abi::__cxa_demangle(abiName, 0, 0, &status);  
+
+  /* NOTE: 

[gcc r11-11543] coroutines: Fail with a sorry when presented with a VLA [PR 101765].

2024-06-27 Thread Iain D Sandoe via Gcc-cvs
https://gcc.gnu.org/g:1d5779274ce9807358f9e04f1112b65c6ed6c284

commit r11-11543-g1d5779274ce9807358f9e04f1112b65c6ed6c284
Author: Iain Sandoe 
Date:   Sat Oct 2 16:15:38 2021 +0100

coroutines: Fail with a sorry when presented with a VLA [PR 101765].

We do not support this yet.

Signed-off-by: Iain Sandoe 

PR c++/101765

gcc/cp/ChangeLog:

* coroutines.cc (register_local_var_uses): Emit a sorry if
we encounter a VLA in the coroutine local variables.

gcc/testsuite/ChangeLog:

* g++.dg/coroutines/pr101765.C: New test.

(cherry picked from commit fdf0b6ce6c1cfa1c328c0c40473c71ca11fd8303)

Diff:
---
 gcc/cp/coroutines.cc   | 10 +++
 gcc/testsuite/g++.dg/coroutines/pr101765.C | 45 ++
 2 files changed, 55 insertions(+)

diff --git a/gcc/cp/coroutines.cc b/gcc/cp/coroutines.cc
index 04c72ddd48b..406c85c4176 100644
--- a/gcc/cp/coroutines.cc
+++ b/gcc/cp/coroutines.cc
@@ -3941,6 +3941,16 @@ register_local_var_uses (tree *stmt, int *do_subtree, 
void *d)
  if (local_var.is_static)
continue;
 
+ poly_uint64 size;
+ if (TREE_CODE (lvtype) == ARRAY_TYPE
+ && !poly_int_tree_p (DECL_SIZE_UNIT (lvar), &size))
+   {
+ sorry_at (local_var.def_loc, "variable length arrays are not"
+   " yet supported in coroutines");
+ /* Ignore it, this is broken anyway.  */
+ continue;
+   }
+
  lvd->local_var_seen = true;
  /* If this var is a lambda capture proxy, we want to leave it alone,
 and later rewrite the DECL_VALUE_EXPR to indirect through the
diff --git a/gcc/testsuite/g++.dg/coroutines/pr101765.C 
b/gcc/testsuite/g++.dg/coroutines/pr101765.C
new file mode 100644
index 000..49a49d11299
--- /dev/null
+++ b/gcc/testsuite/g++.dg/coroutines/pr101765.C
@@ -0,0 +1,45 @@
+// We cannot compile this yet, much run it - but one day it might be
+// feasible, so do the minimum for now.
+// { dg-additional-options " -fsyntax-only -Wno-vla" }
+
+#include "coro.h"
+
+// boiler-plate for tests of codegen
+#include "coro1-ret-int-yield-int.h"
+
+struct coro1
+foo (int arg) noexcept
+{
+  PRINTF ("foo arg = %d\n", arg);
+  char arr[arg]; /* { dg-message "sorry, unimplemented: variable length arrays 
are not yet supported in coroutines" "" { target *-*-* } } */
+  if (arg < 4)
+co_return -6174;
+  else
+for (int i = 0; i < arg; ++i) arr[i] = (char) i;
+  co_yield (int) arr[2];
+  co_return (int) arr[3];
+}
+
+int main ()
+{
+  PRINT ("main: create coro1");
+  struct coro1 x = foo (10);
+  PRINT ("main: got coro1 - resuming");
+  if (x.handle.done())
+abort();
+  x.handle.resume();
+  PRINT ("main: after resume");
+  int y = x.handle.promise().get_value();
+  if ( y == -6174 )
+{
+  PRINT ("main: saw -6174");
+  return 1;
+}
+  else if ( y != 2 )
+abort;
+  x.handle.resume();
+  y = x.handle.promise().get_value();
+  if ( y != 3 )
+abort ();
+  return 0;
+}


[gcc r11-11544] c++, coroutines: Improve check for throwing final await [PR104051].

2024-06-27 Thread Iain D Sandoe via Gcc-cvs
https://gcc.gnu.org/g:f4cdbf1f757fa9525d70780546d7daa43dfb129f

commit r11-11544-gf4cdbf1f757fa9525d70780546d7daa43dfb129f
Author: Iain Sandoe 
Date:   Mon Apr 18 16:23:30 2022 +0100

c++, coroutines: Improve check for throwing final await [PR104051].

We check that the final_suspend () method returns a sane type (i.e. a class
or structure) but, unfortunately, that check has to be later than the one
for a throwing case.  If the use returns some nonsensical type from the
method, we need to handle that in the checking for noexcept.

Signed-off-by: Iain Sandoe 

PR c++/104051

gcc/cp/ChangeLog:

* coroutines.cc (coro_diagnose_throwing_final_aw_expr): Handle
non-target expression inputs.

gcc/testsuite/ChangeLog:

* g++.dg/coroutines/pr104051.C: New test.

(cherry picked from commit 7b96274a340bc0e9bcaef9baff3a44ec2f12c3df)

Diff:
---
 gcc/cp/coroutines.cc   | 13 +++--
 gcc/testsuite/g++.dg/coroutines/pr104051.C | 29 +
 2 files changed, 36 insertions(+), 6 deletions(-)

diff --git a/gcc/cp/coroutines.cc b/gcc/cp/coroutines.cc
index 406c85c4176..b12d74bf975 100644
--- a/gcc/cp/coroutines.cc
+++ b/gcc/cp/coroutines.cc
@@ -877,13 +877,14 @@ coro_diagnose_throwing_fn (tree fndecl)
 static bool
 coro_diagnose_throwing_final_aw_expr (tree expr)
 {
-  tree t = TARGET_EXPR_INITIAL (expr);
+  if (TREE_CODE (expr) == TARGET_EXPR)
+expr = TARGET_EXPR_INITIAL (expr);
   tree fn = NULL_TREE;
-  if (TREE_CODE (t) == CALL_EXPR)
-fn = CALL_EXPR_FN(t);
-  else if (TREE_CODE (t) == AGGR_INIT_EXPR)
-fn = AGGR_INIT_EXPR_FN (t);
-  else if (TREE_CODE (t) == CONSTRUCTOR)
+  if (TREE_CODE (expr) == CALL_EXPR)
+fn = CALL_EXPR_FN (expr);
+  else if (TREE_CODE (expr) == AGGR_INIT_EXPR)
+fn = AGGR_INIT_EXPR_FN (expr);
+  else if (TREE_CODE (expr) == CONSTRUCTOR)
 return false;
   else
 {
diff --git a/gcc/testsuite/g++.dg/coroutines/pr104051.C 
b/gcc/testsuite/g++.dg/coroutines/pr104051.C
new file mode 100644
index 000..ce7ae55405a
--- /dev/null
+++ b/gcc/testsuite/g++.dg/coroutines/pr104051.C
@@ -0,0 +1,29 @@
+// { dg-additional-options "-fsyntax-only" }
+#include 
+#include 
+template  struct promise {
+  struct final_awaitable {
+bool await_ready() noexcept;
+template 
+std::coroutine_handle<>
+await_suspend(std::coroutine_handle) noexcept;
+void await_resume() noexcept;
+  };
+  auto get_return_object() {
+return std::coroutine_handle::from_promise(*this);
+  }
+  auto initial_suspend() { return std::suspend_always(); }
+  auto final_suspend() noexcept { return true; }
+  void unhandled_exception();
+};
+template  struct task {
+  using promise_type = promise;
+  task(std::coroutine_handle>);
+  bool await_ready();
+  std::coroutine_handle<> await_suspend(std::coroutine_handle<>);
+  T await_resume();
+};
+task> foo() { // { dg-error {awaitable type 'bool' is not a 
structure} }
+  while ((co_await foo()).empty())
+;
+}


[gcc r13-8873] aarch64: Fix +nocrypto handling

2024-06-27 Thread Kyrylo Tkachov via Gcc-cvs
https://gcc.gnu.org/g:c93a9bba743ac236f6045ba7aafbc12a83726c48

commit r13-8873-gc93a9bba743ac236f6045ba7aafbc12a83726c48
Author: Andrew Carlotti 
Date:   Fri Nov 24 17:06:07 2023 +

aarch64: Fix +nocrypto handling

Additionally, replace all checks for the AARCH64_FL_CRYPTO bit with
checks for (AARCH64_FL_AES | AARCH64_FL_SHA2) instead.  The value of the
AARCH64_FL_CRYPTO bit within isa_flags is now ignored, but it is
retained because removing it would make processing the data in
option-extensions.def significantly more complex.

This bug should have been picked up by an existing test, but a missing
newline meant that the pattern incorrectly allowed "+crypto+nocrypto".

gcc/ChangeLog:

PR target/115618
* common/config/aarch64/aarch64-common.cc
(aarch64_get_extension_string_for_isa_flags): Fix generation of
the "+nocrypto" extension.
* config/aarch64/aarch64.h (AARCH64_ISA_CRYPTO): Remove.
(TARGET_CRYPTO): Remove.
* config/aarch64/aarch64-c.cc (aarch64_update_cpp_builtins):
Don't use TARGET_CRYPTO.

gcc/testsuite/ChangeLog:

PR target/115618
* gcc.target/aarch64/options_set_4.c: Add terminating newline.
* gcc.target/aarch64/options_set_27.c: New test.

(cherry picked from commit 8d30107455f2309854ced3d65fb07dc1f2c357c0)

Diff:
---
 gcc/common/config/aarch64/aarch64-common.cc   | 35 +--
 gcc/config/aarch64/aarch64-c.cc   |  2 +-
 gcc/config/aarch64/aarch64.h  | 10 +++
 gcc/testsuite/gcc.target/aarch64/options_set_27.c |  9 ++
 gcc/testsuite/gcc.target/aarch64/options_set_4.c  |  2 +-
 5 files changed, 43 insertions(+), 15 deletions(-)

diff --git a/gcc/common/config/aarch64/aarch64-common.cc 
b/gcc/common/config/aarch64/aarch64-common.cc
index 20bc4e1291b..673407ca9a8 100644
--- a/gcc/common/config/aarch64/aarch64-common.cc
+++ b/gcc/common/config/aarch64/aarch64-common.cc
@@ -310,6 +310,7 @@ aarch64_get_extension_string_for_isa_flags
  But in order to make the output more readable, it seems better
  to add the strings in definition order.  */
   aarch64_feature_flags added = 0;
+  auto flags_crypto = AARCH64_FL_AES | AARCH64_FL_SHA2;
   for (unsigned int i = ARRAY_SIZE (all_extensions); i-- > 0; )
 {
   auto &opt = all_extensions[i];
@@ -319,7 +320,7 @@ aarch64_get_extension_string_for_isa_flags
 per-feature crypto flags.  */
   auto flags = opt.flag_canonical;
   if (flags == AARCH64_FL_CRYPTO)
-   flags = AARCH64_FL_AES | AARCH64_FL_SHA2;
+   flags = flags_crypto;
 
   if ((flags & isa_flags & (explicit_flags | ~current_flags)) == flags)
{
@@ -338,14 +339,32 @@ aarch64_get_extension_string_for_isa_flags
  not have an HWCAPs then it shouldn't be taken into account for feature
  detection because one way or another we can't tell if it's available
  or not.  */
+
   for (auto &opt : all_extensions)
-if (opt.native_detect_p
-   && (opt.flag_canonical & current_flags & ~isa_flags))
-  {
-   current_flags &= ~opt.flags_off;
-   outstr += "+no";
-   outstr += opt.name;
-  }
+{
+  auto flags = opt.flag_canonical;
+  /* As a special case, don't emit "+noaes" or "+nosha2" when we could emit
+"+nocrypto" instead, in order to support assemblers that predate the
+separate per-feature crypto flags.  Only allow "+nocrypto" when "sm4"
+is not already enabled (to avoid dependending on whether "+nocrypto"
+also disables "sm4").  */
+  if (flags & flags_crypto
+ && (flags_crypto & current_flags & ~isa_flags) == flags_crypto
+ && !(current_flags & AARCH64_FL_SM4))
+ continue;
+
+  if (flags == AARCH64_FL_CRYPTO)
+   /* If either crypto flag needs removing here, then both do.  */
+   flags = flags_crypto;
+
+  if (opt.native_detect_p
+ && (flags & current_flags & ~isa_flags))
+   {
+ current_flags &= ~opt.flags_off;
+ outstr += "+no";
+ outstr += opt.name;
+   }
+}
 
   return outstr;
 }
diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc
index 578ec6f45b0..6c5331a7625 100644
--- a/gcc/config/aarch64/aarch64-c.cc
+++ b/gcc/config/aarch64/aarch64-c.cc
@@ -139,7 +139,7 @@ aarch64_update_cpp_builtins (cpp_reader *pfile)
   aarch64_def_or_undef (TARGET_ILP32, "_ILP32", pfile);
   aarch64_def_or_undef (TARGET_ILP32, "__ILP32__", pfile);
 
-  aarch64_def_or_undef (TARGET_CRYPTO, "__ARM_FEATURE_CRYPTO", pfile);
+  aarch64_def_or_undef (TARGET_AES && TARGET_SHA2, "__ARM_FEATURE_CRYPTO", 
pfile);
   aarch64_def_or_undef (TARGET_SIMD_RDMA, "__ARM_FEATURE_QRDMX", pfile);
   aarch64_def_or_undef (TARGET_SVE, "__ARM_FEATURE_SVE", pfile);
   cpp_undef (pfile, "__ARM_FEATURE_SVE_BITS");
diff --git a/gcc/config/aarch64/aarch

[gcc/aoliva/heads/testbase] (51 commits) libstdc++: Fix std::codecvt for e

2024-06-27 Thread Alexandre Oliva via Gcc-cvs
The branch 'aoliva/heads/testbase' was updated to point to:

 73ad57c244c... libstdc++: Fix std::codecvt for e

It previously pointed to:

 5a10ac0e592... optab: Add isnormal_optab for isnormal builtin

Diff:

Summary of changes (added commits):
---

  73ad57c... libstdc++: Fix std::codecvt for e (*)
  95faa1b... [libstdc++] [testsuite] defer to check_vect_support* [PR115 (*)
  bcdbb85... Avoid global bitmap space in ranger. (*)
  dafa750... libstdc++: Fix std::format for chrono::duration with unsign (*)
  8fd84bc... libstdc++: Add debug assertions to std::vector [PR103 (*)
  cfc9fa3... libstdc++: Enable more debug assertions during constant eva (*)
  5c8b7fc... ada: Remove last uses of System.Address_Operations in runti (*)
  fdbc04d... ada: Reject ambiguous function calls in interpolated string (*)
  d4c9907... ada: Add missing dimension information for target names (*)
  432b8a3... ada: Fix array-manipulating code in Mdll (*)
  089bb07... ada: Bug using user defined string literals with interpolat (*)
  60ca710... ada: Overridden operation field not correctly set for contr (*)
  3cb7e22... ada: Implement first half of Generalized Finalization (*)
  9449524... i386: Refactor vcvttps2qq/vcvtqq2ps patterns. (*)
  4385dc9... vect: support direct conversion under x86-64-v3. (*)
  e5f8a39... vect: Support v4hi -> v4qi. (*)
  c320a7e... vect: generate suitable convert insn for int -> int, float  (*)
  b55798c... RISC-V: Add testcases for vector truncate after .SAT_SUB (*)
  2280e88... LoongArch: NFC: Dedup and sort the comment in loongarch_pri (*)
  94aade0... LoongArch: Tweak IOR rtx_cost for bstrins (*)
  b8153b5... Fix wrong cost of MEM when addr is a lea. (*)
  212441e... Internal-fn: Support new IFN SAT_TRUNC for unsigned scalar  (*)
  f2476a2... Vect: Support truncate after .SAT_SUB pattern in zip (*)
  c7cb0dd... tree-optimization/115652 - amend last fix (*)
  b7ba067... tree-optimization/115493 - complete previous fix (*)
  9c56dc7... Daily bump. (*)
  0731985... libstdc++: Add script to update docs for a new release bran (*)
  6eff233... libstdc++: Remove duplicate test (*)
  e65b662... libstdc++: Increase timeouts for PSTL tests in debug mode [ (*)
  003ce8a... libstdc++: Work around some PSTL test failures for debug mo (*)
  0ca8d56... libstdc++: Fix std::chrono::tzdb to work with vanguard form (*)
  629257b... tree-optimization/115629 - missed tail merging (*)
  86a3dbe... RISC-V: Update testcase comments to point to PSABI rather t (*)
  aa89e86... RISC-V: Consolidate amo testcase variants (*)
  08498f8... RISC-V: Rename amo testcases (*)
  e499aee... rs6000, change altivec*-runnable.c test file names (*)
  0699de2... rs6000, altivec-2-runnable.c update the require-effective-t (*)
  4bf719b... rs6000, altivec-1-runnable.c update the require-effective-t (*)
  47b68cd... [committed] Remove compromised sh test (*)
  03a3dff... [committed][RISC-V] Fix expected output for thead store pai (*)
  f80db54... tree-optimization/115652 - adjust insertion gsi for SLP (*)
  7a9b535... Record edge true/false value for gcov (*)
  0bf0021... Use the term MC/DC in help for gcov --conditions (*)
  229bf66... Add section on MC/DC in gcov manual (*)
  19f630e... Use auto_vec for memory release on return (*)
  ad20ad7... arm: make arm_predict_doloop_p reject loops with calls (*)
  7fada36... [aarch64] Add support for -mcpu=grace (*)
  f4e847b... i386: Remove declaration of unused functions (*)
  812c70b... rs6000: Fix wrong RTL patterns for vector merge high/low sh (*)
  62520e4... rs6000: Fix wrong RTL patterns for vector merge high/low ch (*)
  453b1d2... tree-optimization/115646 - ICE with pow shrink-wrapping fro (*)

(*) This commit already exists in another branch.
Because the reference `refs/users/aoliva/heads/testbase' matches
your hooks.email-new-commits-only configuration,
no separate email is sent for this commit.


[gcc/aoliva/heads/testme] (55 commits) Avoid dropping bits from num/den in fixed-point types

2024-06-27 Thread Alexandre Oliva via Gcc-cvs
The branch 'aoliva/heads/testme' was updated to point to:

 dec24e79d4e... Avoid dropping bits from num/den in fixed-point types

It previously pointed to:

 6668cf365ef... [i386] drop static decls moved to mingw/winnt-dll.cc

Diff:

!!! WARNING: THE FOLLOWING COMMITS ARE NO LONGER ACCESSIBLE (LOST):
---

  6668cf3... [i386] drop static decls moved to mingw/winnt-dll.cc
  c658106... [libstdc++] [testsuite] defer to check_vect_support* [PR115


Summary of changes (added commits):
---

  dec24e7... Avoid dropping bits from num/den in fixed-point types
  9235979... Map unpacked type to packed deduped type for debug info
  c0c2a61... make_type_from_size: fix compare for type reuse
  d16dbd4... Follow only proper TYPE_DEBUG_TYPE
  73ad57c... libstdc++: Fix std::codecvt for e (*)
  95faa1b... [libstdc++] [testsuite] defer to check_vect_support* [PR115 (*)
  bcdbb85... Avoid global bitmap space in ranger. (*)
  dafa750... libstdc++: Fix std::format for chrono::duration with unsign (*)
  8fd84bc... libstdc++: Add debug assertions to std::vector [PR103 (*)
  cfc9fa3... libstdc++: Enable more debug assertions during constant eva (*)
  5c8b7fc... ada: Remove last uses of System.Address_Operations in runti (*)
  fdbc04d... ada: Reject ambiguous function calls in interpolated string (*)
  d4c9907... ada: Add missing dimension information for target names (*)
  432b8a3... ada: Fix array-manipulating code in Mdll (*)
  089bb07... ada: Bug using user defined string literals with interpolat (*)
  60ca710... ada: Overridden operation field not correctly set for contr (*)
  3cb7e22... ada: Implement first half of Generalized Finalization (*)
  9449524... i386: Refactor vcvttps2qq/vcvtqq2ps patterns. (*)
  4385dc9... vect: support direct conversion under x86-64-v3. (*)
  e5f8a39... vect: Support v4hi -> v4qi. (*)
  c320a7e... vect: generate suitable convert insn for int -> int, float  (*)
  b55798c... RISC-V: Add testcases for vector truncate after .SAT_SUB (*)
  2280e88... LoongArch: NFC: Dedup and sort the comment in loongarch_pri (*)
  94aade0... LoongArch: Tweak IOR rtx_cost for bstrins (*)
  b8153b5... Fix wrong cost of MEM when addr is a lea. (*)
  212441e... Internal-fn: Support new IFN SAT_TRUNC for unsigned scalar  (*)
  f2476a2... Vect: Support truncate after .SAT_SUB pattern in zip (*)
  c7cb0dd... tree-optimization/115652 - amend last fix (*)
  b7ba067... tree-optimization/115493 - complete previous fix (*)
  9c56dc7... Daily bump. (*)
  0731985... libstdc++: Add script to update docs for a new release bran (*)
  6eff233... libstdc++: Remove duplicate test (*)
  e65b662... libstdc++: Increase timeouts for PSTL tests in debug mode [ (*)
  003ce8a... libstdc++: Work around some PSTL test failures for debug mo (*)
  0ca8d56... libstdc++: Fix std::chrono::tzdb to work with vanguard form (*)
  629257b... tree-optimization/115629 - missed tail merging (*)
  86a3dbe... RISC-V: Update testcase comments to point to PSABI rather t (*)
  aa89e86... RISC-V: Consolidate amo testcase variants (*)
  08498f8... RISC-V: Rename amo testcases (*)
  e499aee... rs6000, change altivec*-runnable.c test file names (*)
  0699de2... rs6000, altivec-2-runnable.c update the require-effective-t (*)
  4bf719b... rs6000, altivec-1-runnable.c update the require-effective-t (*)
  47b68cd... [committed] Remove compromised sh test (*)
  03a3dff... [committed][RISC-V] Fix expected output for thead store pai (*)
  f80db54... tree-optimization/115652 - adjust insertion gsi for SLP (*)
  7a9b535... Record edge true/false value for gcov (*)
  0bf0021... Use the term MC/DC in help for gcov --conditions (*)
  229bf66... Add section on MC/DC in gcov manual (*)
  19f630e... Use auto_vec for memory release on return (*)
  ad20ad7... arm: make arm_predict_doloop_p reject loops with calls (*)
  7fada36... [aarch64] Add support for -mcpu=grace (*)
  f4e847b... i386: Remove declaration of unused functions (*)
  812c70b... rs6000: Fix wrong RTL patterns for vector merge high/low sh (*)
  62520e4... rs6000: Fix wrong RTL patterns for vector merge high/low ch (*)
  453b1d2... tree-optimization/115646 - ICE with pow shrink-wrapping fro (*)

(*) This commit already exists in another branch.
Because the reference `refs/users/aoliva/heads/testme' matches
your hooks.email-new-commits-only configuration,
no separate email is sent for this commit.


[gcc(refs/users/aoliva/heads/testme)] Follow only proper TYPE_DEBUG_TYPE

2024-06-27 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:d16dbd45239df22a2adfffcc248c7958224d2e04

commit d16dbd45239df22a2adfffcc248c7958224d2e04
Author: Alexandre Oliva 
Date:   Thu Jun 27 09:10:29 2024 -0300

Follow only proper TYPE_DEBUG_TYPE

TYPE_DEBUG_TYPE's storage is shared with other sorts of references to
types, so it shouldn't be accessed unless TYPE_CAN_HAVE_DEBUG_TYPE_P
holds.


for  gcc/ada/ChangeLog

* gcc-interface/misc.cc (gnat_get_array_descr_info): Only follow
TYPE_DEBUG_TYPE if TYPE_CAN_HAVE_DEBUG_TYPE_P.

Diff:
---
 gcc/ada/gcc-interface/misc.cc | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gcc/ada/gcc-interface/misc.cc b/gcc/ada/gcc-interface/misc.cc
index 4f6f6774fe7..f77629ce70b 100644
--- a/gcc/ada/gcc-interface/misc.cc
+++ b/gcc/ada/gcc-interface/misc.cc
@@ -967,7 +967,8 @@ gnat_get_array_descr_info (const_tree const_type,
 
   while (true)
{
- if (TYPE_DEBUG_TYPE (source_element_type))
+ if (TYPE_CAN_HAVE_DEBUG_TYPE_P (source_element_type)
+ && TYPE_DEBUG_TYPE (source_element_type))
source_element_type = TYPE_DEBUG_TYPE (source_element_type);
  else if (TYPE_IS_PADDING_P (source_element_type))
source_element_type


[gcc(refs/users/aoliva/heads/testme)] make_type_from_size: fix compare for type reuse

2024-06-27 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:c0c2a6152c78b54f3cc6737b667bf1aa53929713

commit c0c2a6152c78b54f3cc6737b667bf1aa53929713
Author: Alexandre Oliva 
Date:   Thu Jun 27 09:11:01 2024 -0300

make_type_from_size: fix compare for type reuse

When make_type_from_size is called with a biased type, for an entity
that isn't explicitly biased, we may refrain from reusing the given
type because it doesn't seem to match, and then proceed to create an
exact copy of that type.

Compute earlier the biased status of the expected type, early enough
for the suitability check of the given type.  Modify for_biased
instead of biased_p, so that biased_p remains with the given type's
status for the comparison.


for  gcc/ada/ChangeLog

* gcc-interface/utils.cc (make_type_from_size): Fix type reuse
by combining biased_p and for_biased earlier.  Hold the
combination in for_biased, adjusting later uses.

Diff:
---
 gcc/ada/gcc-interface/utils.cc | 11 +++
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/gcc/ada/gcc-interface/utils.cc b/gcc/ada/gcc-interface/utils.cc
index 0eb9af8d4a2..d8d42f57b89 100644
--- a/gcc/ada/gcc-interface/utils.cc
+++ b/gcc/ada/gcc-interface/utils.cc
@@ -1383,6 +1383,11 @@ make_type_from_size (tree type, tree size_tree, bool 
for_biased)
   biased_p = (TREE_CODE (type) == INTEGER_TYPE
  && TYPE_BIASED_REPRESENTATION_P (type));
 
+  /* FOR_BIASED initially refers to the entity's representation,
+not to its type's.  The type we're to return must take both
+into account.  */
+  for_biased |= biased_p;
+
   /* Integer types with precision 0 are forbidden.  */
   if (size == 0)
size = 1;
@@ -1394,12 +1399,10 @@ make_type_from_size (tree type, tree size_tree, bool 
for_biased)
  || size > (Enable_128bit_Types ? 128 : LONG_LONG_TYPE_SIZE))
break;
 
-  biased_p |= for_biased;
-
   /* The type should be an unsigned type if the original type is unsigned
 or if the lower bound is constant and non-negative or if the type is
 biased, see E_Signed_Integer_Subtype case of gnat_to_gnu_entity.  */
-  if (type_unsigned_for_rm (type) || biased_p)
+  if (type_unsigned_for_rm (type) || for_biased)
new_type = make_unsigned_type (size);
   else
new_type = make_signed_type (size);
@@ -1409,7 +1412,7 @@ make_type_from_size (tree type, tree size_tree, bool 
for_biased)
   /* Copy the name to show that it's essentially the same type and
 not a subrange type.  */
   TYPE_NAME (new_type) = TYPE_NAME (type);
-  TYPE_BIASED_REPRESENTATION_P (new_type) = biased_p;
+  TYPE_BIASED_REPRESENTATION_P (new_type) = for_biased;
   SET_TYPE_RM_SIZE (new_type, bitsize_int (size));
   return new_type;


[gcc(refs/users/aoliva/heads/testme)] Map unpacked type to packed deduped type for debug info

2024-06-27 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:92359793fc3d8c94a6704e518555807227b21bea

commit 92359793fc3d8c94a6704e518555807227b21bea
Author: Alexandre Oliva 
Date:   Thu Jun 27 09:11:27 2024 -0300

Map unpacked type to packed deduped type for debug info

Avoid creating unnecessary copies of types in make_type_from_size.
Cache the packed version of a biased type in TYPE_DEBUG_TYPE, so as to
map the unpacked type to it.


for  gcc/ada/ChangeLog

* gcc-interface/utils.cc (make_type_from_size): Cache packed
variant, and map unpacked type to it in debug info.

for  gcc/testsuite/ChangeLog

* gnat.dg/bias1.adb: Count occurrences of -7.*DW_AT_GNU_bias.

Diff:
---
 gcc/ada/gcc-interface/utils.cc  | 19 +++
 gcc/testsuite/gnat.dg/bias1.adb |  3 ++-
 2 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/gcc/ada/gcc-interface/utils.cc b/gcc/ada/gcc-interface/utils.cc
index d8d42f57b89..daf8d7ccdc5 100644
--- a/gcc/ada/gcc-interface/utils.cc
+++ b/gcc/ada/gcc-interface/utils.cc
@@ -1399,6 +1399,15 @@ make_type_from_size (tree type, tree size_tree, bool 
for_biased)
  || size > (Enable_128bit_Types ? 128 : LONG_LONG_TYPE_SIZE))
break;
 
+  /* If we've already created this type, the base type is supposed
+to map to it.  Check that it is what we expect.  */
+  if (TYPE_CAN_HAVE_DEBUG_TYPE_P (type)
+ && (new_type = TYPE_DEBUG_TYPE (type))
+ && TYPE_PRECISION (new_type) == size
+ && ((TREE_CODE (new_type) == INTEGER_TYPE
+  && TYPE_BIASED_REPRESENTATION_P (new_type)) == for_biased))
+   return new_type;
+
   /* The type should be an unsigned type if the original type is unsigned
 or if the lower bound is constant and non-negative or if the type is
 biased, see E_Signed_Integer_Subtype case of gnat_to_gnu_entity.  */
@@ -1414,6 +1423,16 @@ make_type_from_size (tree type, tree size_tree, bool 
for_biased)
   TYPE_NAME (new_type) = TYPE_NAME (type);
   TYPE_BIASED_REPRESENTATION_P (new_type) = for_biased;
   SET_TYPE_RM_SIZE (new_type, bitsize_int (size));
+
+  /* Enable us to avoid creating the same narrower type multiple
+times, and avoid duplication in debug information, by mapping
+the wider type to the narrower version.  If biasing is
+different, we use the narrower type for debug information.  */
+  if (TYPE_CAN_HAVE_DEBUG_TYPE_P (type)
+ && !TYPE_DEBUG_TYPE (type)
+ && biased_p == for_biased)
+   SET_TYPE_DEBUG_TYPE (type, new_type);
+
   return new_type;
 
 case RECORD_TYPE:
diff --git a/gcc/testsuite/gnat.dg/bias1.adb b/gcc/testsuite/gnat.dg/bias1.adb
index 016a159b692..d9a00a1aa45 100644
--- a/gcc/testsuite/gnat.dg/bias1.adb
+++ b/gcc/testsuite/gnat.dg/bias1.adb
@@ -1,6 +1,7 @@
 --  { dg-do compile }
 --  { dg-options "-cargs -g -dA -gnatws -fgnat-encodings=gdb -margs" }
 --  { dg-final { scan-assembler "DW_AT_GNU_bias" } }
+--  { dg-final { scan-assembler-times "-7.*DW_AT_GNU_bias" 1 } }
 
 procedure Bias1 is
type Small is range -7 .. -4;
@@ -31,4 +32,4 @@ procedure Bias1 is
 
 begin
null;
-end Bias1;
\ No newline at end of file
+end Bias1;


[gcc(refs/users/aoliva/heads/testme)] Avoid dropping bits from num/den in fixed-point types

2024-06-27 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:dec24e79d4efe7f5b34c62b68ead75b9910e1a3b

commit dec24e79d4efe7f5b34c62b68ead75b9910e1a3b
Author: Alexandre Oliva 
Date:   Thu Jun 27 09:11:54 2024 -0300

Avoid dropping bits from num/den in fixed-point types

We used to use an unsigned 128-bit type to hold the numerator and
denominator used to represent the delta of a fixed-point type in debug
information, but there are cases in which that was not enough, and
more significant bits silently overflowed and got omitted from debug
information.

Introduce a mode in which UI_to_gnu selects a wide-enough unsigned
type, and use that to convert numerator and denominator.


for  gcc/ada/ChangeLog

* gcc-interface/cuintp.cc (UI_To_gnu): Add mode that selects a
wide enough unsigned type.
* gcc-interface/decl.cc (gnat_to_gnu_entity): Use it for
numerator and denominator of fixed-point types.

Diff:
---
 gcc/ada/gcc-interface/cuintp.cc | 47 ++---
 gcc/ada/gcc-interface/decl.cc   | 15 +
 2 files changed, 41 insertions(+), 21 deletions(-)

diff --git a/gcc/ada/gcc-interface/cuintp.cc b/gcc/ada/gcc-interface/cuintp.cc
index cdf6c019750..ad345096282 100644
--- a/gcc/ada/gcc-interface/cuintp.cc
+++ b/gcc/ada/gcc-interface/cuintp.cc
@@ -35,6 +35,7 @@
 #include "tree.h"
 #include "inchash.h"
 #include "fold-const.h"
+#include "stor-layout.h"
 
 #include "ada.h"
 #include "types.h"
@@ -67,7 +68,8 @@ build_cst_from_int (tree type, HOST_WIDE_INT low)
 /* Similar to UI_To_Int, but return a GCC INTEGER_CST or REAL_CST node,
depending on whether TYPE is an integral or real type.  Overflow is tested
by the constant-folding used to build the node.  TYPE is the GCC type of
-   the resulting node.  */
+   the resulting node.  If TYPE is NULL, an unsigned integer type wide enough
+   to hold the entire constant is selected.  */
 
 tree
 UI_To_gnu (Uint Input, tree type)
@@ -77,8 +79,10 @@ UI_To_gnu (Uint Input, tree type)
  any such possible value for intermediate computations and then rely on a
  conversion back to TYPE to perform the bias adjustment when need be.  */
   tree comp_type
-= TREE_CODE (type) == INTEGER_TYPE && TYPE_BIASED_REPRESENTATION_P (type)
-  ? get_base_type (type) : type;
+= (!type ? gnat_type_for_size (32, 0)
+   : (TREE_CODE (type) == INTEGER_TYPE
+ && TYPE_BIASED_REPRESENTATION_P (type))
+   ? get_base_type (type) : type);
   tree gnu_ret;
 
   if (Input <= Uint_Direct_Last)
@@ -88,6 +92,7 @@ UI_To_gnu (Uint Input, tree type)
   Int Idx = (*Uints_Ptr)[Input - Uint_Table_Start].Loc;
   Pos Length = (*Uints_Ptr)[Input - Uint_Table_Start].Length;
   Int First = (*Udigits_Ptr)[Idx];
+  tree_code code = First < 0 ? MINUS_EXPR : PLUS_EXPR;
   tree gnu_base;
 
   gcc_assert (Length > 0);
@@ -99,26 +104,34 @@ UI_To_gnu (Uint Input, tree type)
 convert the final result back to the incoming type later on.  */
   if (!SCALAR_FLOAT_TYPE_P (comp_type) && TYPE_PRECISION (comp_type) < 32)
comp_type = gnat_type_for_size (32, 0);
+  else if (!type && TYPE_UNSIGNED (comp_type))
+   /* Choose a signed type, so that we can detect overflow.  */
+   comp_type = make_signed_type (TYPE_PRECISION (comp_type));
 
   gnu_base = build_cst_from_int (comp_type, Base);
 
   gnu_ret = build_cst_from_int (comp_type, First);
-  if (First < 0)
-   for (Idx++, Length--; Length; Idx++, Length--)
- gnu_ret = fold_build2 (MINUS_EXPR, comp_type,
-fold_build2 (MULT_EXPR, comp_type,
- gnu_ret, gnu_base),
-build_cst_from_int (comp_type,
-(*Udigits_Ptr)[Idx]));
-  else
-   for (Idx++, Length--; Length; Idx++, Length--)
- gnu_ret = fold_build2 (PLUS_EXPR, comp_type,
-fold_build2 (MULT_EXPR, comp_type,
- gnu_ret, gnu_base),
-build_cst_from_int (comp_type,
-(*Udigits_Ptr)[Idx]));
+  for (Idx++, Length--; Length; Idx++, Length--)
+   for (;;)
+ {
+   tree next_ret = fold_build2 (code, comp_type,
+fold_build2 (MULT_EXPR, comp_type,
+ gnu_ret, gnu_base),
+build_cst_from_int
+(comp_type, (*Udigits_Ptr)[Idx]));
+   if (!TREE_OVERFLOW (next_ret) || type)
+ {
+   gnu_ret = next_ret;
+   break;
+ }
+   comp_type = make_signed_type (TYPE_PRECISION (comp_type) * 2);
+   gnu_base = convert (comp_type, gnu_base);
+   gnu_ret = 

[gcc r11-11545] libstdc++: fix typo in acinclude.m4.

2024-06-27 Thread Jonathan Wakely via Libstdc++-cvs
https://gcc.gnu.org/g:6e33ffd543257a1a599b51201e9db95b070dbf84

commit r11-11545-g6e33ffd543257a1a599b51201e9db95b070dbf84
Author: Martin Liska 
Date:   Thu Jan 27 14:47:23 2022 +0100

libstdc++: fix typo in acinclude.m4.

PR libstdc++/104259

libstdc++-v3/ChangeLog:

* acinclude.m4: Fix typo.
* configure: Regenerate.

(cherry picked from commit 14f339894db6ca7fe4772d5528c726694d2517c4)

Diff:
---
 libstdc++-v3/acinclude.m4 | 2 +-
 libstdc++-v3/configure| 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/libstdc++-v3/acinclude.m4 b/libstdc++-v3/acinclude.m4
index 696756a6df6..e2a12607d5d 100644
--- a/libstdc++-v3/acinclude.m4
+++ b/libstdc++-v3/acinclude.m4
@@ -4834,7 +4834,7 @@ dnl
   [glibcxx_cv_fdopendir=yes],
   [glibcxx_cv_fdopendir=no])
   ])
-  if test $glibcxx_cv_truncate = yes; then
+  if test $glibcxx_cv_fdopendir = yes; then
 AC_DEFINE(HAVE_FDOPENDIR, 1, [Define if fdopendir is available in 
.])
   fi
 dnl
diff --git a/libstdc++-v3/configure b/libstdc++-v3/configure
index 316c19a2c95..64f2552afe5 100755
--- a/libstdc++-v3/configure
+++ b/libstdc++-v3/configure
@@ -76851,7 +76851,7 @@ fi
 fi
 { $as_echo "$as_me:${as_lineno-$LINENO}: result: $glibcxx_cv_fdopendir" >&5
 $as_echo "$glibcxx_cv_fdopendir" >&6; }
-  if test $glibcxx_cv_truncate = yes; then
+  if test $glibcxx_cv_fdopendir = yes; then
 
 $as_echo "#define HAVE_FDOPENDIR 1" >>confdefs.h


[gcc r15-1695] s390: Check for ADDR_REGS in s390_decompose_addrstyle_without_index

2024-06-27 Thread Stefan Schulze Frielinghaus via Gcc-cvs
https://gcc.gnu.org/g:187eeb99ec5289538923668de9d61a3138376817

commit r15-1695-g187eeb99ec5289538923668de9d61a3138376817
Author: Stefan Schulze Frielinghaus 
Date:   Thu Jun 27 15:46:24 2024 +0200

s390: Check for ADDR_REGS in s390_decompose_addrstyle_without_index

An explicit check for address registers was not required so far since
during register allocation the processing of address constraints was
sufficient.  However, address constraints themself do not check for
REGNO_OK_FOR_{BASE,INDEX}_P.  Thus, with the newly introduced
late-combine pass in r15-1579-g792f97b44ffc5e we generate new insns with
invalid address registers which aren't fixed up afterwards.

Fixed by explicitly checking for address registers in
s390_decompose_addrstyle_without_index such that those new insns are
rejected.

gcc/ChangeLog:

PR target/115634
* config/s390/s390.cc (s390_decompose_addrstyle_without_index):
Check for ADDR_REGS in s390_decompose_addrstyle_without_index.

Diff:
---
 gcc/config/s390/s390.cc | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc
index c65421de831..05a0fde7fb0 100644
--- a/gcc/config/s390/s390.cc
+++ b/gcc/config/s390/s390.cc
@@ -3347,7 +3347,9 @@ s390_decompose_addrstyle_without_index (rtx op, rtx *base,
   while (op && GET_CODE (op) == SUBREG)
 op = SUBREG_REG (op);
 
-  if (op && GET_CODE (op) != REG)
+  if (op && (!REG_P (op)
+|| (reload_completed
+&& !REGNO_OK_FOR_BASE_P (REGNO (op)
 return false;
 
   if (offset)


[gcc r15-1696] Disable late-combine for -O0 [PR115677]

2024-06-27 Thread Richard Sandiford via Gcc-cvs
https://gcc.gnu.org/g:f6081ee665fd5e4e7d37e02c69d16df0d3eead10

commit r15-1696-gf6081ee665fd5e4e7d37e02c69d16df0d3eead10
Author: Richard Sandiford 
Date:   Thu Jun 27 14:51:37 2024 +0100

Disable late-combine for -O0 [PR115677]

late-combine relies on df, which for -O0 is only initialised late
(pass_df_initialize_no_opt, after split1).  Other df-based passes
cope with this by requiring optimize > 0, so this patch does the
same for late-combine.

gcc/
PR rtl-optimization/115677
* late-combine.cc (pass_late_combine::gate): New function.

Diff:
---
 gcc/late-combine.cc | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/gcc/late-combine.cc b/gcc/late-combine.cc
index b7c0bc07a8b..789d734692a 100644
--- a/gcc/late-combine.cc
+++ b/gcc/late-combine.cc
@@ -744,10 +744,16 @@ public:
 
   // opt_pass methods:
   opt_pass *clone () override { return new pass_late_combine (m_ctxt); }
-  bool gate (function *) override { return flag_late_combine_instructions; }
+  bool gate (function *) override;
   unsigned int execute (function *) override;
 };
 
+bool
+pass_late_combine::gate (function *)
+{
+  return optimize > 0 && flag_late_combine_instructions;
+}
+
 unsigned int
 pass_late_combine::execute (function *fn)
 {


[gcc r15-1697] fixincludes: adjust stdio fix for macOS 15 headers

2024-06-27 Thread François-Xavier Coudert via Gcc-cvs
https://gcc.gnu.org/g:1dc143181550573c9c902fb7a3b495e9b409d0b0

commit r15-1697-g1dc143181550573c9c902fb7a3b495e9b409d0b0
Author: Francois-Xavier Coudert 
Date:   Thu Jun 27 18:55:22 2024 +0200

fixincludes: adjust stdio fix for macOS 15 headers

fixincludes/ChangeLog:

* fixincl.x: Regenerate.
* inclhack.def (apple_local_stdio_fn_deprecation): Also apply to
_stdio.h.

Diff:
---
 fixincludes/fixincl.x| 6 +++---
 fixincludes/inclhack.def | 1 +
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/fixincludes/fixincl.x b/fixincludes/fixincl.x
index 54a530b50ca..fb9950d9b21 100644
--- a/fixincludes/fixincl.x
+++ b/fixincludes/fixincl.x
@@ -2,11 +2,11 @@
  *
  * DO NOT EDIT THIS FILE   (fixincl.x)
  *
- * It has been AutoGen-ed  June  7, 2024 at 11:03:58 AM by AutoGen 5.18.16
+ * It has been AutoGen-ed  June 27, 2024 at 06:52:39 PM by AutoGen 5.18.16
  * From the definitionsinclhack.def
  * and the template file   fixincl
  */
-/* DO NOT SVN-MERGE THIS FILE, EITHER Fri Jun  7 11:03:58 CEST 2024
+/* DO NOT SVN-MERGE THIS FILE, EITHER Thu Jun 27 18:52:39 CEST 2024
  *
  * You must regenerate it.  Use the ./genfixes script.
  *
@@ -2619,7 +2619,7 @@ tSCC zApple_Local_Stdio_Fn_DeprecationName[] =
  *  File name selection pattern
  */
 tSCC zApple_Local_Stdio_Fn_DeprecationList[] =
-  "stdio.h\0";
+  "stdio.h\0_stdio.h\0";
 /*
  *  Machine/OS name selection pattern
  */
diff --git a/fixincludes/inclhack.def b/fixincludes/inclhack.def
index f7fc5cdbabd..9f4a41199a1 100644
--- a/fixincludes/inclhack.def
+++ b/fixincludes/inclhack.def
@@ -1273,6 +1273,7 @@ fix = {
 hackname  = apple_local_stdio_fn_deprecation;
 mach  = "*-*-*darwin2*";
 files = stdio.h;
+files = _stdio.h;
 select= "__deprecated_msg([^\n]*)$";
 c_fix = format;
 c_fix_arg = "#if defined(__APPLE_LOCAL_DEPRECATIONS)\n"


[gcc r13-8874] AArch64: Fix strict-align cpymem/setmem [PR103100]

2024-06-27 Thread Wilco Dijkstra via Gcc-cvs
https://gcc.gnu.org/g:5aa9ed0f353f835005c3df8932c7bc6e26f53904

commit r13-8874-g5aa9ed0f353f835005c3df8932c7bc6e26f53904
Author: Wilco Dijkstra 
Date:   Wed Oct 25 16:28:04 2023 +0100

AArch64: Fix strict-align cpymem/setmem [PR103100]

The cpymemdi/setmemdi implementation doesn't fully support strict alignment.
Block the expansion if the alignment is less than 16 with STRICT_ALIGNMENT.
Clean up the condition when to use MOPS.

gcc/ChangeLog/
PR target/103100
* config/aarch64/aarch64.md (cpymemdi): Remove pattern condition.
(setmemdi): Likewise.
* config/aarch64/aarch64.cc (aarch64_expand_cpymem): Support
strict-align.  Cleanup condition for using MOPS.
(aarch64_expand_setmem): Likewise.

(cherry picked from commit 318f5232cfb3e0c9694889565e1f5424d0354463)

Diff:
---
 gcc/config/aarch64/aarch64.cc | 52 ++-
 gcc/config/aarch64/aarch64.md |  4 ++--
 2 files changed, 24 insertions(+), 32 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index b8a4ab1b980..2f01580a797 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -24897,27 +24897,23 @@ aarch64_expand_cpymem (rtx *operands)
   int mode_bits;
   rtx dst = operands[0];
   rtx src = operands[1];
+  unsigned align = UINTVAL (operands[3]);
   rtx base;
   machine_mode cur_mode = BLKmode;
+  bool size_p = optimize_function_for_size_p (cfun);
 
-  /* Variable-sized memcpy can go through the MOPS expansion if available.  */
-  if (!CONST_INT_P (operands[2]))
+  /* Variable-sized or strict-align copies may use the MOPS expansion.  */
+  if (!CONST_INT_P (operands[2]) || (STRICT_ALIGNMENT && align < 16))
 return aarch64_expand_cpymem_mops (operands);
 
-  unsigned HOST_WIDE_INT size = INTVAL (operands[2]);
-
-  /* Try to inline up to 256 bytes or use the MOPS threshold if available.  */
-  unsigned HOST_WIDE_INT max_copy_size
-= TARGET_MOPS ? aarch64_mops_memcpy_size_threshold : 256;
+  unsigned HOST_WIDE_INT size = UINTVAL (operands[2]);
 
-  bool size_p = optimize_function_for_size_p (cfun);
+  /* Try to inline up to 256 bytes.  */
+  unsigned max_copy_size = 256;
+  unsigned mops_threshold = aarch64_mops_memcpy_size_threshold;
 
-  /* Large constant-sized cpymem should go through MOPS when possible.
- It should be a win even for size optimization in the general case.
- For speed optimization the choice between MOPS and the SIMD sequence
- depends on the size of the copy, rather than number of instructions,
- alignment etc.  */
-  if (size > max_copy_size)
+  /* Large copies use MOPS when available or a library call.  */
+  if (size > max_copy_size || (TARGET_MOPS && size > mops_threshold))
 return aarch64_expand_cpymem_mops (operands);
 
   int copy_bits = 256;
@@ -25081,12 +25077,13 @@ aarch64_expand_setmem (rtx *operands)
   unsigned HOST_WIDE_INT len;
   rtx dst = operands[0];
   rtx val = operands[2], src;
+  unsigned align = UINTVAL (operands[3]);
   rtx base;
   machine_mode cur_mode = BLKmode, next_mode;
 
-  /* If we don't have SIMD registers or the size is variable use the MOPS
- inlined sequence if possible.  */
-  if (!CONST_INT_P (operands[1]) || !TARGET_SIMD)
+  /* Variable-sized or strict-align memset may use the MOPS expansion.  */
+  if (!CONST_INT_P (operands[1]) || !TARGET_SIMD
+  || (STRICT_ALIGNMENT && align < 16))
 return aarch64_expand_setmem_mops (operands);
 
   bool size_p = optimize_function_for_size_p (cfun);
@@ -25094,10 +25091,13 @@ aarch64_expand_setmem (rtx *operands)
   /* Default the maximum to 256-bytes when considering only libcall vs
  SIMD broadcast sequence.  */
   unsigned max_set_size = 256;
+  unsigned mops_threshold = aarch64_mops_memset_size_threshold;
 
-  len = INTVAL (operands[1]);
-  if (len > max_set_size && !TARGET_MOPS)
-return false;
+  len = UINTVAL (operands[1]);
+
+  /* Large memset uses MOPS when available or a library call.  */
+  if (len > max_set_size || (TARGET_MOPS && len > mops_threshold))
+return aarch64_expand_setmem_mops (operands);
 
   int cst_val = !!(CONST_INT_P (val) && (INTVAL (val) != 0));
   /* The MOPS sequence takes:
@@ -25110,12 +25110,6 @@ aarch64_expand_setmem (rtx *operands)
  the arguments + 1 for the call.  */
   unsigned libcall_cost = 4;
 
-  /* Upper bound check.  For large constant-sized setmem use the MOPS sequence
- when available.  */
-  if (TARGET_MOPS
-  && len >= (unsigned HOST_WIDE_INT) aarch64_mops_memset_size_threshold)
-return aarch64_expand_setmem_mops (operands);
-
   /* Attempt a sequence with a vector broadcast followed by stores.
  Count the number of operations involved to see if it's worth it
  against the alternatives.  A simple counter simd_ops on the
@@ -25157,10 +25151,8 @@ aarch64_expand_setmem (rtx *operands)
   simd_ops++;
   n -= mode_bits;
 
-  /* Do cer

[gcc r15-1698] c: Error message for incorrect use of static in array declarations.

2024-06-27 Thread Martin Uecker via Gcc-cvs
https://gcc.gnu.org/g:da7976a015a4388b8ed843412c3c1c840451cf0f

commit r15-1698-gda7976a015a4388b8ed843412c3c1c840451cf0f
Author: Martin Uecker 
Date:   Thu Jun 27 21:47:56 2024 +0200

c: Error message for incorrect use of static in array declarations.

Add an explicit error messages when c99's static is
used without a size expression in an array declarator.

gcc/c:
* c-parser.cc (c_parser_direct_declarator_inner): Add
error message.

gcc/testsuite:
* gcc.dg/c99-arraydecl-4.c: New test.

Diff:
---
 gcc/c/c-parser.cc  | 63 --
 gcc/testsuite/gcc.dg/c99-arraydecl-4.c | 14 
 2 files changed, 44 insertions(+), 33 deletions(-)

diff --git a/gcc/c/c-parser.cc b/gcc/c/c-parser.cc
index 6a3f96d5b61..8c4e697a4e1 100644
--- a/gcc/c/c-parser.cc
+++ b/gcc/c/c-parser.cc
@@ -4715,8 +4715,6 @@ c_parser_direct_declarator_inner (c_parser *parser, bool 
id_present,
   location_t brace_loc = c_parser_peek_token (parser)->location;
   struct c_declarator *declarator;
   struct c_declspecs *quals_attrs = build_null_declspecs ();
-  bool static_seen;
-  bool star_seen;
   struct c_expr dimen;
   dimen.value = NULL_TREE;
   dimen.original_code = ERROR_MARK;
@@ -4724,49 +4722,48 @@ c_parser_direct_declarator_inner (c_parser *parser, 
bool id_present,
   c_parser_consume_token (parser);
   c_parser_declspecs (parser, quals_attrs, false, false, true,
  false, false, false, false, cla_prefer_id);
-  static_seen = c_parser_next_token_is_keyword (parser, RID_STATIC);
-  if (static_seen)
-   c_parser_consume_token (parser);
-  if (static_seen && !quals_attrs->declspecs_seen_p)
-   c_parser_declspecs (parser, quals_attrs, false, false, true,
-   false, false, false, false, cla_prefer_id);
+
+  location_t static_loc = UNKNOWN_LOCATION;
+  if (c_parser_next_token_is_keyword (parser, RID_STATIC))
+   {
+ static_loc = c_parser_peek_token (parser)->location;
+ c_parser_consume_token (parser);
+ if (!quals_attrs->declspecs_seen_p)
+   c_parser_declspecs (parser, quals_attrs, false, false, true,
+   false, false, false, false, cla_prefer_id);
+   }
   if (!quals_attrs->declspecs_seen_p)
quals_attrs = NULL;
   /* If "static" is present, there must be an array dimension.
 Otherwise, there may be a dimension, "*", or no
 dimension.  */
-  if (static_seen)
+  const bool static_seen = (static_loc != UNKNOWN_LOCATION);
+  bool star_seen = false;
+  if (c_parser_next_token_is (parser, CPP_MULT)
+ && c_parser_peek_2nd_token (parser)->type == CPP_CLOSE_SQUARE)
{
- star_seen = false;
- dimen = c_parser_expr_no_commas (parser, NULL);
+ star_seen = true;
+ c_parser_consume_token (parser);
}
-  else
+  else if (!c_parser_next_token_is (parser, CPP_CLOSE_SQUARE))
+   dimen = c_parser_expr_no_commas (parser, NULL);
+
+  if (static_seen)
{
- if (c_parser_next_token_is (parser, CPP_CLOSE_SQUARE))
-   {
- dimen.value = NULL_TREE;
- star_seen = false;
-   }
- else if (c_parser_next_token_is (parser, CPP_MULT))
-   {
- if (c_parser_peek_2nd_token (parser)->type == CPP_CLOSE_SQUARE)
-   {
- dimen.value = NULL_TREE;
- star_seen = true;
- c_parser_consume_token (parser);
-   }
- else
-   {
- star_seen = false;
- dimen = c_parser_expr_no_commas (parser, NULL);
-   }
-   }
- else
+ if (star_seen)
{
+ error_at (static_loc,
+   "% may not be used with an unspecified "
+   "variable length array size");
+ /* Prevent further errors.  */
  star_seen = false;
- dimen = c_parser_expr_no_commas (parser, NULL);
+ dimen.value = error_mark_node;
}
+ else if (!dimen.value)
+   error_at (static_loc,
+ "% may not be used without an array size");
}
+
   if (c_parser_next_token_is (parser, CPP_CLOSE_SQUARE))
c_parser_consume_token (parser);
   else
diff --git a/gcc/testsuite/gcc.dg/c99-arraydecl-4.c 
b/gcc/testsuite/gcc.dg/c99-arraydecl-4.c
new file mode 100644
index 000..f8cad3b9429
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/c99-arraydecl-4.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-std=c99 -pedantic-errors" } */
+
+void fo(char buf[static]); /* { dg-error "'static' may not be used without 
an array size" } */
+void fo(char buf[static]) { }  /* { dg-error "'static' may not be used without 
an array size" } 

[gcc r15-1699] libgccjit: Add ability to get the alignment of a type

2024-06-27 Thread Antoni Boucher via Gcc-cvs
https://gcc.gnu.org/g:445c62ee492b363e7ad86260c7a91a7fdf984f50

commit r15-1699-g445c62ee492b363e7ad86260c7a91a7fdf984f50
Author: Antoni Boucher 
Date:   Thu Apr 4 18:57:07 2024 -0400

libgccjit: Add ability to get the alignment of a type

gcc/jit/ChangeLog:

* docs/topics/compatibility.rst (LIBGCCJIT_ABI_28): New ABI tag.
* docs/topics/expressions.rst: Document gcc_jit_context_new_alignof.
* jit-playback.cc (new_alignof): New method.
* jit-playback.h: New method.
* jit-recording.cc (recording::context::new_alignof): New
method.
(recording::memento_of_sizeof::replay_into,
recording::memento_of_typeinfo::replay_into,
recording::memento_of_sizeof::make_debug_string,
recording::memento_of_typeinfo::make_debug_string,
recording::memento_of_sizeof::write_reproducer,
recording::memento_of_typeinfo::write_reproducer): Rename.
* jit-recording.h (enum type_info_type): New enum.
(class memento_of_sizeof class memento_of_typeinfo): Rename.
* libgccjit.cc (gcc_jit_context_new_alignof): New function.
* libgccjit.h (gcc_jit_context_new_alignof): New function.
* libgccjit.map: New function.

gcc/testsuite/ChangeLog:

* jit.dg/all-non-failing-tests.h: New test.
* jit.dg/test-alignof.c: New test.

Diff:
---
 gcc/jit/docs/topics/compatibility.rst|  7 +++
 gcc/jit/docs/topics/expressions.rst  | 14 ++
 gcc/jit/jit-playback.cc  | 11 +
 gcc/jit/jit-playback.h   |  3 ++
 gcc/jit/jit-recording.cc | 67 ++-
 gcc/jit/jit-recording.h  | 19 ++--
 gcc/jit/libgccjit.cc | 18 
 gcc/jit/libgccjit.h  | 13 ++
 gcc/jit/libgccjit.map|  5 ++
 gcc/testsuite/jit.dg/all-non-failing-tests.h | 10 
 gcc/testsuite/jit.dg/test-alignof.c  | 69 
 11 files changed, 221 insertions(+), 15 deletions(-)

diff --git a/gcc/jit/docs/topics/compatibility.rst 
b/gcc/jit/docs/topics/compatibility.rst
index 9cfb054f653..92c3ed24c89 100644
--- a/gcc/jit/docs/topics/compatibility.rst
+++ b/gcc/jit/docs/topics/compatibility.rst
@@ -397,3 +397,10 @@ on functions and variables:
 
 ``LIBGCCJIT_ABI_27`` covers the addition of
 :func:`gcc_jit_context_new_sizeof`
+
+.. _LIBGCCJIT_ABI_28:
+
+``LIBGCCJIT_ABI_28``
+
+``LIBGCCJIT_ABI_28`` covers the addition of
+:func:`gcc_jit_context_new_alignof`
diff --git a/gcc/jit/docs/topics/expressions.rst 
b/gcc/jit/docs/topics/expressions.rst
index d83d95fe9e0..5734f0e5f7e 100644
--- a/gcc/jit/docs/topics/expressions.rst
+++ b/gcc/jit/docs/topics/expressions.rst
@@ -140,6 +140,20 @@ Simple expressions
 
  sizeof (type)
 
+.. function:: gcc_jit_rvalue *\
+  gcc_jit_context_new_alignof (gcc_jit_context *ctxt, \
+   gcc_jit_type *type)
+
+   Generate an rvalue that is equal to the alignment of ``type``.
+
+   The parameter ``type`` must be non-NULL.
+
+   This is equivalent to this C code:
+
+   .. code-block:: c
+
+ _Alignof (type)
+
 Constructor expressions
 ***
 
diff --git a/gcc/jit/jit-playback.cc b/gcc/jit/jit-playback.cc
index 6baa838af10..b3f54da24ab 100644
--- a/gcc/jit/jit-playback.cc
+++ b/gcc/jit/jit-playback.cc
@@ -1120,6 +1120,17 @@ new_sizeof (type *type)
 
 /* Construct a playback::rvalue instance (wrapping a tree).  */
 
+playback::rvalue *
+playback::context::
+new_alignof (type *type)
+{
+  int alignment = TYPE_ALIGN (type->as_tree ()) / BITS_PER_UNIT;
+  tree inner = build_int_cst (integer_type_node, alignment);
+  return new rvalue (this, inner);
+}
+
+/* Construct a playback::rvalue instance (wrapping a tree).  */
+
 playback::rvalue *
 playback::context::
 new_string_literal (const char *value)
diff --git a/gcc/jit/jit-playback.h b/gcc/jit/jit-playback.h
index aa6a086613c..6e97b389cbb 100644
--- a/gcc/jit/jit-playback.h
+++ b/gcc/jit/jit-playback.h
@@ -165,6 +165,9 @@ public:
   rvalue *
   new_sizeof (type *type);
 
+  rvalue *
+  new_alignof (type *type);
+
   rvalue *
   new_string_literal (const char *value);
 
diff --git a/gcc/jit/jit-recording.cc b/gcc/jit/jit-recording.cc
index 5e9ef40f3b7..f68d01fff55 100644
--- a/gcc/jit/jit-recording.cc
+++ b/gcc/jit/jit-recording.cc
@@ -1077,7 +1077,7 @@ recording::context::new_global_init_rvalue (lvalue 
*variable,
   gbl->set_rvalue_init (init); /* Needed by the global for write dump.  */
 }
 
-/* Create a recording::memento_of_sizeof instance and add it
+/* Create a recording::memento_of_typeinfo instance and add it
to this context's list of mementos.
 
Implements the post-error-checking part of
@@ -1087,7 +1087,22 @@ recording::rvalue *
 reco

[gcc r14-10353] libstdc++: Replace viewcvs links in docs with cgit links

2024-06-27 Thread Jonathan Wakely via Libstdc++-cvs
https://gcc.gnu.org/g:a8b77a696379343dfe67bb75088acd452eef0c2c

commit r14-10353-ga8b77a696379343dfe67bb75088acd452eef0c2c
Author: Jonathan Wakely 
Date:   Tue Jun 25 13:35:17 2024 +0100

libstdc++: Replace viewcvs links in docs with cgit links

For this backport to the release branch, the links to the git repo refer
to the branch.

libstdc++-v3/ChangeLog:

* doc/xml/faq.xml: Replace viewcvs links with cgit links.
* doc/xml/manual/allocator.xml: Likewise.
* doc/xml/manual/mt_allocator.xml: Likewise.
* doc/html/*: Regenerate.

(cherry picked from commit 9d8021d1875677286c3dde90dfed2aca864edad0)

Diff:
---
 libstdc++-v3/doc/html/faq.html  |  2 +-
 libstdc++-v3/doc/html/manual/memory.html| 10 +-
 libstdc++-v3/doc/html/manual/mt_allocator_impl.html |  6 +++---
 libstdc++-v3/doc/xml/faq.xml|  2 +-
 libstdc++-v3/doc/xml/manual/allocator.xml   | 10 +-
 libstdc++-v3/doc/xml/manual/mt_allocator.xml|  6 +++---
 6 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/libstdc++-v3/doc/html/faq.html b/libstdc++-v3/doc/html/faq.html
index e84e455c4e9..bbe716d5e23 100644
--- a/libstdc++-v3/doc/html/faq.html
+++ b/libstdc++-v3/doc/html/faq.html
@@ -147,7 +147,7 @@
  The libstdc++ project is contributed to by several developers
  all over the world, in the same way as GCC or the Linux kernel.
  The current maintainers are listed in the
- https://gcc.gnu.org/viewcvs/gcc/trunk/MAINTAINERS?view=co"; 
target="_top">MAINTAINERS
+ https://gcc.gnu.org/cgit/gcc/tree/MAINTAINERS"; 
target="_top">MAINTAINERS
  file (look for "c++ runtime libs").
 
 Development and discussion is held on the libstdc++ mailing
diff --git a/libstdc++-v3/doc/html/manual/memory.html 
b/libstdc++-v3/doc/html/manual/memory.html
index 08ad2fd4dd8..3a2025b90d2 100644
--- a/libstdc++-v3/doc/html/manual/memory.html
+++ b/libstdc++-v3/doc/html/manual/memory.html
@@ -120,8 +120,8 @@
Over multiple iterations, various STL container
  objects have elements inserted to some maximum amount. A variety
  of allocators are tested.
- Test source for http://gcc.gnu.org/viewcvs/gcc/trunk/libstdc%2B%2B-v3/testsuite/performance/23_containers/insert/sequence.cc?view=markup";
 target="_top">sequence
- and http://gcc.gnu.org/viewcvs/gcc/trunk/libstdc%2B%2B-v3/testsuite/performance/23_containers/insert/associative.cc?view=markup";
 target="_top">associative
+ Test source for https://gcc.gnu.org/cgit/gcc/tree/libstdc++-v3/testsuite/performance/23_containers/insert/sequence.cc?h=releases%2Fgcc-14";
 target="_top">sequence
+ and https://gcc.gnu.org/cgit/gcc/tree/libstdc++-v3/testsuite/performance/23_containers/insert/associative.cc?h=releases%2Fgcc-14";
 target="_top">associative
  containers.

Insertion and erasure in a multi-threaded environment.
@@ -130,14 +130,14 @@
  on a per-thread basis, as well as measuring thread contention
  for memory resources.
  Test source
-http://gcc.gnu.org/viewcvs/gcc/trunk/libstdc%2B%2B-v3/testsuite/performance/23_containers/insert_erase/associative.cc?view=markup";
 target="_top">here.
+https://gcc.gnu.org/cgit/gcc/tree/libstdc++-v3/testsuite/performance/23_containers/insert_erase/associative.cc?h=releases%2Fgcc-14";
 target="_top">here.

 A threaded producer/consumer model.

Test source for
- http://gcc.gnu.org/viewcvs/gcc/trunk/libstdc++-v3/testsuite/performance/23_containers/producer_consumer/sequence.cc?view=markup";
 target="_top">sequence
+ https://gcc.gnu.org/cgit/gcc/tree/libstdc++-v3/testsuite/performance/23_containers/producer_consumer/sequence.cc?h=releases%2Fgcc-14";
 target="_top">sequence
  and
- http://gcc.gnu.org/viewcvs/gcc/trunk/libstdc++-v3/testsuite/performance/23_containers/producer_consumer/associative.cc?view=markup";
 target="_top">associative
+ https://gcc.gnu.org/cgit/gcc/tree/libstdc++-v3/testsuite/performance/23_containers/producer_consumer/associative.cc?h=releases%2Fgcc-14";
 target="_top">associative
  containers.
  
  Since GCC 12 the default choice for
diff --git a/libstdc++-v3/doc/html/manual/mt_allocator_impl.html 
b/libstdc++-v3/doc/html/manual/mt_allocator_impl.html
index 2e5926add00..351fa90c975 100644
--- a/libstdc++-v3/doc/html/manual/mt_allocator_impl.html
+++ b/libstdc++-v3/doc/html/manual/mt_allocator_impl.html
@@ -155,7 +155,7 @@ that uses it is fully constructed. For most (but not all) 
STL
 containers, this works, as an instance of the allocator is constructed
 as part of a container's constructor. However, this assumption is
 implementation-specific, and subject to change. For an example of a
-pool that frees memory, see the following
-http://gcc.gnu.org/viewcvs/gcc/trunk/libstdc++-v3/testsuite/ext/mt_allocator/deallocate_local-6.cc?view=markup";
 target=

[gcc r12-10587] rs6000: Fix wrong RTL patterns for vector merge high/low word on LE

2024-06-27 Thread Kewen Lin via Gcc-cvs
https://gcc.gnu.org/g:96ef3367067219c8e3eb88c0474a1090cc7749b4

commit r12-10587-g96ef3367067219c8e3eb88c0474a1090cc7749b4
Author: Kewen Lin 
Date:   Thu Jun 20 20:23:56 2024 -0500

rs6000: Fix wrong RTL patterns for vector merge high/low word on LE

Commit r12-4496 changes some define_expands and define_insns
for vector merge high/low word, which are altivec_vmrg[hl]w,
vsx_xxmrg[hl]w_.  These defines are mainly for
built-in function vec_merge{h,l}, __builtin_vsx_xxmrghw,
__builtin_vsx_xxmrghw_4si and some internal gen function
needs.  These functions should consider endianness, taking
vec_mergeh as example, as PVIPR defines, vec_mergeh "Merges
the first halves (in element order) of two vectors", it does
note it's in element order.  So it's mapped into vmrghw on
BE while vmrglw on LE respectively.  Although the mapped
insns are different, as the discussion in PR106069, the RTL
pattern should be still the same, it is conformed before
commit r12-4496, define_expand altivec_vmrghw got expanded
into:

  (vec_select:VSX_W
 (vec_concat:
(match_operand:VSX_W 1 "register_operand" "wa,v")
(match_operand:VSX_W 2 "register_operand" "wa,v"))
(parallel [(const_int 0) (const_int 4)
   (const_int 1) (const_int 5)])))]

on both BE and LE then.  But commit r12-4496 changed it to
expand into:

  (vec_select:VSX_W
 (vec_concat:
(match_operand:VSX_W 1 "register_operand" "wa,v")
(match_operand:VSX_W 2 "register_operand" "wa,v"))
(parallel [(const_int 0) (const_int 4)
   (const_int 1) (const_int 5)])))]

on BE, and

  (vec_select:VSX_W
 (vec_concat:
(match_operand:VSX_W 1 "register_operand" "wa,v")
(match_operand:VSX_W 2 "register_operand" "wa,v"))
(parallel [(const_int 2) (const_int 6)
   (const_int 3) (const_int 7)])))]

on LE, although the mapped insn are still vmrghw on BE and
vmrglw on LE, the associated RTL pattern is completely
wrong and inconsistent with the mapped insn.  If optimization
passes leave this pattern alone, even if its pattern doesn't
represent its mapped insn, it's still fine, that's why simple
testing on bif doesn't expose this issue.  But once some
optimization pass such as combine does some changes basing
on this wrong pattern, because the pattern doesn't match the
semantics that the expanded insn is intended to represent,
it would cause the unexpected result.

So this patch is to fix the wrong RTL pattern, ensure the
associated RTL patterns become the same as before which can
have the same semantic as their mapped insns.  With the
proposed patch, the expanders like altivec_vmrghw expands
into altivec_vmrghb_direct_be or altivec_vmrglb_direct_le
depending on endianness, "direct" can easily show which
insn would be generated, _be and _le are mainly for the
different RTL patterns as endianness.

Co-authored-by: Xionghu Luo 

PR target/106069
PR target/115355

gcc/ChangeLog:

* config/rs6000/altivec.md (altivec_vmrghw_direct_): 
Rename
to ...
(altivec_vmrghw_direct__be): ... this.  Add the 
condition
BYTES_BIG_ENDIAN.
(altivec_vmrghw_direct__le): New define_insn.
(altivec_vmrglw_direct_): Rename to ...
(altivec_vmrglw_direct__be): ... this.  Add the 
condition
BYTES_BIG_ENDIAN.
(altivec_vmrglw_direct__le): New define_insn.
(altivec_vmrghw): Adjust by calling 
gen_altivec_vmrghw_direct_v4si_be
for BE and gen_altivec_vmrglw_direct_v4si_le for LE.
(altivec_vmrglw): Adjust by calling 
gen_altivec_vmrglw_direct_v4si_be
for BE and gen_altivec_vmrghw_direct_v4si_le for LE.
(vec_widen_umult_hi_v8hi): Adjust the call to
gen_altivec_vmrghw_direct_v4si by gen_altivec_vmrghw for BE
and by gen_altivec_vmrglw for LE.
(vec_widen_smult_hi_v8hi): Likewise.
(vec_widen_umult_lo_v8hi): Adjust the call to
gen_altivec_vmrglw_direct_v4si by gen_altivec_vmrglw for BE
and by gen_altivec_vmrghw for LE
(vec_widen_smult_lo_v8hi): Likewise.
* config/rs6000/rs6000.cc (altivec_expand_vec_perm_const): Replace
CODE_FOR_altivec_vmrghw_direct_v4si by
CODE_FOR_altivec_vmrghw_direct_v4si_be for BE and
CODE_FOR_altivec_vmrghw_direct_v4si_le for LE.  And replace
CODE_FOR_altivec_vmrglw_direct_v4si by
CODE_FOR_altivec_vmrglw_direct_v4si_be for BE and
CODE_FOR_altivec_vmrglw_direct_v4si_le for LE.
* config/rs6000/vsx.md (vsx_xxmrghw_): Adjust by calling
gen_altivec_vmrghw_d

[gcc r13-8876] rs6000: Fix wrong RTL patterns for vector merge high/low word on LE

2024-06-27 Thread Kewen Lin via Gcc-cvs
https://gcc.gnu.org/g:361bfcec901ca882130e338aebaa2ebc6ea2dc3b

commit r13-8876-g361bfcec901ca882130e338aebaa2ebc6ea2dc3b
Author: Kewen Lin 
Date:   Thu Jun 20 20:23:56 2024 -0500

rs6000: Fix wrong RTL patterns for vector merge high/low word on LE

Commit r12-4496 changes some define_expands and define_insns
for vector merge high/low word, which are altivec_vmrg[hl]w,
vsx_xxmrg[hl]w_.  These defines are mainly for
built-in function vec_merge{h,l}, __builtin_vsx_xxmrghw,
__builtin_vsx_xxmrghw_4si and some internal gen function
needs.  These functions should consider endianness, taking
vec_mergeh as example, as PVIPR defines, vec_mergeh "Merges
the first halves (in element order) of two vectors", it does
note it's in element order.  So it's mapped into vmrghw on
BE while vmrglw on LE respectively.  Although the mapped
insns are different, as the discussion in PR106069, the RTL
pattern should be still the same, it is conformed before
commit r12-4496, define_expand altivec_vmrghw got expanded
into:

  (vec_select:VSX_W
 (vec_concat:
(match_operand:VSX_W 1 "register_operand" "wa,v")
(match_operand:VSX_W 2 "register_operand" "wa,v"))
(parallel [(const_int 0) (const_int 4)
   (const_int 1) (const_int 5)])))]

on both BE and LE then.  But commit r12-4496 changed it to
expand into:

  (vec_select:VSX_W
 (vec_concat:
(match_operand:VSX_W 1 "register_operand" "wa,v")
(match_operand:VSX_W 2 "register_operand" "wa,v"))
(parallel [(const_int 0) (const_int 4)
   (const_int 1) (const_int 5)])))]

on BE, and

  (vec_select:VSX_W
 (vec_concat:
(match_operand:VSX_W 1 "register_operand" "wa,v")
(match_operand:VSX_W 2 "register_operand" "wa,v"))
(parallel [(const_int 2) (const_int 6)
   (const_int 3) (const_int 7)])))]

on LE, although the mapped insn are still vmrghw on BE and
vmrglw on LE, the associated RTL pattern is completely
wrong and inconsistent with the mapped insn.  If optimization
passes leave this pattern alone, even if its pattern doesn't
represent its mapped insn, it's still fine, that's why simple
testing on bif doesn't expose this issue.  But once some
optimization pass such as combine does some changes basing
on this wrong pattern, because the pattern doesn't match the
semantics that the expanded insn is intended to represent,
it would cause the unexpected result.

So this patch is to fix the wrong RTL pattern, ensure the
associated RTL patterns become the same as before which can
have the same semantic as their mapped insns.  With the
proposed patch, the expanders like altivec_vmrghw expands
into altivec_vmrghb_direct_be or altivec_vmrglb_direct_le
depending on endianness, "direct" can easily show which
insn would be generated, _be and _le are mainly for the
different RTL patterns as endianness.

Co-authored-by: Xionghu Luo 

PR target/106069
PR target/115355

gcc/ChangeLog:

* config/rs6000/altivec.md (altivec_vmrghw_direct_): 
Rename
to ...
(altivec_vmrghw_direct__be): ... this.  Add the 
condition
BYTES_BIG_ENDIAN.
(altivec_vmrghw_direct__le): New define_insn.
(altivec_vmrglw_direct_): Rename to ...
(altivec_vmrglw_direct__be): ... this.  Add the 
condition
BYTES_BIG_ENDIAN.
(altivec_vmrglw_direct__le): New define_insn.
(altivec_vmrghw): Adjust by calling 
gen_altivec_vmrghw_direct_v4si_be
for BE and gen_altivec_vmrglw_direct_v4si_le for LE.
(altivec_vmrglw): Adjust by calling 
gen_altivec_vmrglw_direct_v4si_be
for BE and gen_altivec_vmrghw_direct_v4si_le for LE.
(vec_widen_umult_hi_v8hi): Adjust the call to
gen_altivec_vmrghw_direct_v4si by gen_altivec_vmrghw for BE
and by gen_altivec_vmrglw for LE.
(vec_widen_smult_hi_v8hi): Likewise.
(vec_widen_umult_lo_v8hi): Adjust the call to
gen_altivec_vmrglw_direct_v4si by gen_altivec_vmrglw for BE
and by gen_altivec_vmrghw for LE
(vec_widen_smult_lo_v8hi): Likewise.
* config/rs6000/rs6000.cc (altivec_expand_vec_perm_const): Replace
CODE_FOR_altivec_vmrghw_direct_v4si by
CODE_FOR_altivec_vmrghw_direct_v4si_be for BE and
CODE_FOR_altivec_vmrghw_direct_v4si_le for LE.  And replace
CODE_FOR_altivec_vmrglw_direct_v4si by
CODE_FOR_altivec_vmrglw_direct_v4si_be for BE and
CODE_FOR_altivec_vmrglw_direct_v4si_le for LE.
* config/rs6000/vsx.md (vsx_xxmrghw_): Adjust by calling
gen_altivec_vmrghw_di

[gcc r14-10355] rs6000: Fix wrong RTL patterns for vector merge high/low word on LE

2024-06-27 Thread Kewen Lin via Gcc-cvs
https://gcc.gnu.org/g:ef8b60dd48faeaf2b4e28c35401fa10d2a3e53fb

commit r14-10355-gef8b60dd48faeaf2b4e28c35401fa10d2a3e53fb
Author: Kewen Lin 
Date:   Thu Jun 20 20:23:56 2024 -0500

rs6000: Fix wrong RTL patterns for vector merge high/low word on LE

Commit r12-4496 changes some define_expands and define_insns
for vector merge high/low word, which are altivec_vmrg[hl]w,
vsx_xxmrg[hl]w_.  These defines are mainly for
built-in function vec_merge{h,l}, __builtin_vsx_xxmrghw,
__builtin_vsx_xxmrghw_4si and some internal gen function
needs.  These functions should consider endianness, taking
vec_mergeh as example, as PVIPR defines, vec_mergeh "Merges
the first halves (in element order) of two vectors", it does
note it's in element order.  So it's mapped into vmrghw on
BE while vmrglw on LE respectively.  Although the mapped
insns are different, as the discussion in PR106069, the RTL
pattern should be still the same, it is conformed before
commit r12-4496, define_expand altivec_vmrghw got expanded
into:

  (vec_select:VSX_W
 (vec_concat:
(match_operand:VSX_W 1 "register_operand" "wa,v")
(match_operand:VSX_W 2 "register_operand" "wa,v"))
(parallel [(const_int 0) (const_int 4)
   (const_int 1) (const_int 5)])))]

on both BE and LE then.  But commit r12-4496 changed it to
expand into:

  (vec_select:VSX_W
 (vec_concat:
(match_operand:VSX_W 1 "register_operand" "wa,v")
(match_operand:VSX_W 2 "register_operand" "wa,v"))
(parallel [(const_int 0) (const_int 4)
   (const_int 1) (const_int 5)])))]

on BE, and

  (vec_select:VSX_W
 (vec_concat:
(match_operand:VSX_W 1 "register_operand" "wa,v")
(match_operand:VSX_W 2 "register_operand" "wa,v"))
(parallel [(const_int 2) (const_int 6)
   (const_int 3) (const_int 7)])))]

on LE, although the mapped insn are still vmrghw on BE and
vmrglw on LE, the associated RTL pattern is completely
wrong and inconsistent with the mapped insn.  If optimization
passes leave this pattern alone, even if its pattern doesn't
represent its mapped insn, it's still fine, that's why simple
testing on bif doesn't expose this issue.  But once some
optimization pass such as combine does some changes basing
on this wrong pattern, because the pattern doesn't match the
semantics that the expanded insn is intended to represent,
it would cause the unexpected result.

So this patch is to fix the wrong RTL pattern, ensure the
associated RTL patterns become the same as before which can
have the same semantic as their mapped insns.  With the
proposed patch, the expanders like altivec_vmrghw expands
into altivec_vmrghb_direct_be or altivec_vmrglb_direct_le
depending on endianness, "direct" can easily show which
insn would be generated, _be and _le are mainly for the
different RTL patterns as endianness.

Co-authored-by: Xionghu Luo 

PR target/106069
PR target/115355

gcc/ChangeLog:

* config/rs6000/altivec.md (altivec_vmrghw_direct_): 
Rename
to ...
(altivec_vmrghw_direct__be): ... this.  Add the 
condition
BYTES_BIG_ENDIAN.
(altivec_vmrghw_direct__le): New define_insn.
(altivec_vmrglw_direct_): Rename to ...
(altivec_vmrglw_direct__be): ... this.  Add the 
condition
BYTES_BIG_ENDIAN.
(altivec_vmrglw_direct__le): New define_insn.
(altivec_vmrghw): Adjust by calling 
gen_altivec_vmrghw_direct_v4si_be
for BE and gen_altivec_vmrglw_direct_v4si_le for LE.
(altivec_vmrglw): Adjust by calling 
gen_altivec_vmrglw_direct_v4si_be
for BE and gen_altivec_vmrghw_direct_v4si_le for LE.
(vec_widen_umult_hi_v8hi): Adjust the call to
gen_altivec_vmrghw_direct_v4si by gen_altivec_vmrghw for BE
and by gen_altivec_vmrglw for LE.
(vec_widen_smult_hi_v8hi): Likewise.
(vec_widen_umult_lo_v8hi): Adjust the call to
gen_altivec_vmrglw_direct_v4si by gen_altivec_vmrglw for BE
and by gen_altivec_vmrghw for LE
(vec_widen_smult_lo_v8hi): Likewise.
* config/rs6000/rs6000.cc (altivec_expand_vec_perm_const): Replace
CODE_FOR_altivec_vmrghw_direct_v4si by
CODE_FOR_altivec_vmrghw_direct_v4si_be for BE and
CODE_FOR_altivec_vmrghw_direct_v4si_le for LE.  And replace
CODE_FOR_altivec_vmrglw_direct_v4si by
CODE_FOR_altivec_vmrglw_direct_v4si_be for BE and
CODE_FOR_altivec_vmrglw_direct_v4si_le for LE.
* config/rs6000/vsx.md (vsx_xxmrghw_): Adjust by calling
gen_altivec_vmrghw_d

[gcc r15-1701] i386: Some additional AVX512 ternlog refinements.

2024-06-27 Thread Roger Sayle via Gcc-cvs
https://gcc.gnu.org/g:5938cf021e95b40b040974c9cbe7860399247f7f

commit r15-1701-g5938cf021e95b40b040974c9cbe7860399247f7f
Author: Roger Sayle 
Date:   Fri Jun 28 07:12:53 2024 +0100

i386: Some additional AVX512 ternlog refinements.

This patch is another round of refinements to fine tune the new ternlog
infrastructure in i386's sse.md.  This patch tweaks ix86_ternlog_idx
to allow multiple MEM/CONST_VECTOR/VEC_DUPLICATE operands prior to
splitting (before reload), when force_register is called on all but
one of these operands.  Conceptually during the dynamic programming,
registers fill the args slots in the order 0, 1, 2, and mem-like
operands fill the slots in the order 2, 0, 1 [preferring the memory
operand to come last].

This patch allows us to remove some of the legacy ternlog patterns
in sse.md without regressions [which is left to the next and final
patch in this series].  An indication that these patterns are no
longer required is shown by the necessary testsuite tweaks below,
where the output assembler for the legacy instructions used hexadecimal,
but with the new ternlog infrastructure now consistently use decimal.

2024-06-28  Roger Sayle  

gcc/ChangeLog
* config/i386/i386-expand.cc (ix86_ternlog_idx) :
Add a "goto do_mem_operand" as this need not match memory_operand.
: Only args[2] may be volatile memory operand.
Allow MEM/VEC_DUPLICATE/CONST_VECTOR as args[0] and args[1].

gcc/testsuite/ChangeLog
* gcc.target/i386/avx512f-andn-di-zmm-2.c: Match decimal instead
of hexadecimal immediate operand to ternlog.
* gcc.target/i386/avx512f-andn-si-zmm-2.c: Likewise.
* gcc.target/i386/avx512f-orn-si-zmm-1.c: Likewise.
* gcc.target/i386/avx512f-orn-si-zmm-2.c: Likewise.
* gcc.target/i386/pr100711-3.c: Likewise.
* gcc.target/i386/pr100711-4.c: Likewise.
* gcc.target/i386/pr100711-5.c: Likewise.

Diff:
---
 gcc/config/i386/i386-expand.cc | 35 --
 .../gcc.target/i386/avx512f-andn-di-zmm-2.c|  2 +-
 .../gcc.target/i386/avx512f-andn-si-zmm-2.c|  2 +-
 .../gcc.target/i386/avx512f-orn-si-zmm-1.c |  2 +-
 .../gcc.target/i386/avx512f-orn-si-zmm-2.c |  2 +-
 gcc/testsuite/gcc.target/i386/pr100711-3.c |  2 +-
 gcc/testsuite/gcc.target/i386/pr100711-4.c |  2 +-
 gcc/testsuite/gcc.target/i386/pr100711-5.c |  2 +-
 8 files changed, 39 insertions(+), 10 deletions(-)

diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index eccad080f7c..dd2c3a8718e 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -25606,7 +25606,7 @@ ix86_ternlog_idx (rtx op, rtx *args)
 case VEC_DUPLICATE:
   if (!bcst_mem_operand (op, GET_MODE (op)))
return -1;
-  /* FALLTHRU */
+  goto do_mem_operand;
 
 case MEM:
   if (!memory_operand (op, GET_MODE (op)))
@@ -25618,23 +25618,52 @@ ix86_ternlog_idx (rtx op, rtx *args)
   /* FALLTHRU */
 
 case CONST_VECTOR:
+do_mem_operand:
   if (!args[2])
{
  args[2] = op;
  return 0xaa;
}
   /* Maximum of one volatile memory reference per expression.  */
-  if (side_effects_p (op) && side_effects_p (args[2]))
+  if (side_effects_p (op))
return -1;
   if (rtx_equal_p (op, args[2]))
return 0xaa;
-  /* Check if one CONST_VECTOR is the ones-complement of the other.  */
+  /* Check if CONST_VECTOR is the ones-complement of args[2].  */
   if (GET_CODE (op) == CONST_VECTOR
  && GET_CODE (args[2]) == CONST_VECTOR
  && rtx_equal_p (simplify_const_unary_operation (NOT, GET_MODE (op),
  op, GET_MODE (op)),
  args[2]))
return 0x55;
+  if (!args[0])
+   {
+ args[0] = op;
+ return 0xf0;
+   }
+  if (rtx_equal_p (op, args[0]))
+   return 0xf0;
+  /* Check if CONST_VECTOR is the ones-complement of args[0].  */
+  if (GET_CODE (op) == CONST_VECTOR
+ && GET_CODE (args[0]) == CONST_VECTOR
+ && rtx_equal_p (simplify_const_unary_operation (NOT, GET_MODE (op),
+ op, GET_MODE (op)),
+ args[0]))
+   return 0x0f;
+  if (!args[1])
+   {
+ args[1] = op;
+ return 0xcc;
+   }
+  if (rtx_equal_p (op, args[1]))
+   return 0xcc;
+  /* Check if CONST_VECTOR is the ones-complement of args[1].  */
+  if (GET_CODE (op) == CONST_VECTOR
+ && GET_CODE (args[1]) == CONST_VECTOR
+ && rtx_equal_p (simplify_const_unary_operation (NOT, GET_MODE (op),
+ op, GET_MODE (op)),
+

[gcc r15-1702] i386: Handle sign_extend like zero_extend in *concatditi3_[346]

2024-06-27 Thread Roger Sayle via Gcc-cvs
https://gcc.gnu.org/g:07e915913b6b3d4e6e210f6dbc8e7e0e8ea594c4

commit r15-1702-g07e915913b6b3d4e6e210f6dbc8e7e0e8ea594c4
Author: Roger Sayle 
Date:   Fri Jun 28 07:16:07 2024 +0100

i386: Handle sign_extend like zero_extend in *concatditi3_[346]

This patch generalizes some of the patterns in i386.md that recognize
double word concatenation, so they handle sign_extend the same way that
they handle zero_extend in appropriate contexts.

As a motivating example consider the following function:

__int128 foo(long long x, unsigned long long y)
{
  return ((__int128)x<<64) | y;
}

when compiled with -O2, x86_64 currently generates:

foo:movq%rdi, %rdx
xorl%eax, %eax
xorl%edi, %edi
orq %rsi, %rax
orq %rdi, %rdx
ret

with this patch we now generate (the same as if x is unsigned):

foo:movq%rsi, %rax
movq%rdi, %rdx
ret

Treating both extensions the same way using any_extend is valid as
the top (extended) bits are "unused" after the shift by 64 (or more).
In theory, the RTL optimizers might consider canonicalizing the form
of extension used in these cases, but zero_extend is faster on some
machine, whereas sign extension is supported via addressing modes on
others, so handling both in the machine description is probably best.

2024-06-28  Roger Sayle  

gcc/ChangeLog
* config/i386/i386.md (*concat3_3): Change zero_extend
to any_extend in first operand to left shift by mode precision.
(*concat3_4): Likewise.
(*concat3_6): Likewise.

gcc/testsuite/ChangeLog
* gcc.target/i386/concatditi-1.c: New test case.

Diff:
---
 gcc/config/i386/i386.md  |  6 +++---
 gcc/testsuite/gcc.target/i386/concatditi-1.c | 10 ++
 2 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index fd48e764469..b6ccb1e798d 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -13446,7 +13446,7 @@
   [(set (match_operand: 0 "nonimmediate_operand" "=ro,r,r,&r,x")
(any_or_plus:
  (ashift:
-   (zero_extend:
+   (any_extend:
  (match_operand:DWIH 1 "nonimmediate_operand" "r,m,r,m,x"))
(match_operand:QI 2 "const_int_operand"))
  (zero_extend:
@@ -13473,7 +13473,7 @@
  (zero_extend:
(match_operand:DWIH 1 "nonimmediate_operand" "r,m,r,m"))
  (ashift:
-   (zero_extend:
+   (any_extend:
  (match_operand:DWIH 2 "nonimmediate_operand" "r,r,m,m"))
(match_operand:QI 3 "const_int_operand"]
   "INTVAL (operands[3]) ==  * BITS_PER_UNIT"
@@ -13520,7 +13520,7 @@
   [(set (match_operand: 0 "nonimmediate_operand" "=r,o,o,r")
(any_or_plus:
  (ashift:
-   (zero_extend:
+   (any_extend:
  (match_operand:DWIH 1 "nonimmediate_operand" "r,r,r,m"))
(match_operand:QI 2 "const_int_operand"))
  (match_operand: 3 "const_scalar_int_operand" "n,n,Wd,n")))]
diff --git a/gcc/testsuite/gcc.target/i386/concatditi-1.c 
b/gcc/testsuite/gcc.target/i386/concatditi-1.c
new file mode 100644
index 000..25c2a95586b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/concatditi-1.c
@@ -0,0 +1,10 @@
+/* { dg-do compile { target int128 } } */
+/* { dg-options "-O2" } */
+
+__int128 foo(long long x, unsigned long long y)
+{
+  return ((__int128)x<<64) | y;
+}
+
+/* { dg-final { scan-assembler-not "xorl" } } */
+/* { dg-final { scan-assembler-not "orq" } } */