[gcc r14-10351] aarch64: Add support for -mcpu=grace
https://gcc.gnu.org/g:c2878a9a1719e067b1476377bd2a292350482e61 commit r14-10351-gc2878a9a1719e067b1476377bd2a292350482e61 Author: Kyrylo Tkachov Date: Wed Jun 19 14:56:02 2024 +0530 aarch64: Add support for -mcpu=grace This adds support for the NVIDIA Grace CPU to aarch64. We reuse the tuning decisions for the Neoverse V2 core, but include a number of architecture features that are not enabled by default in -mcpu=neoverse-v2. This allows Grace users to more simply target the CPU with -mcpu=grace rather than remembering what extensions to tag on top of -mcpu=neoverse-v2. Bootstrapped and tested on aarch64-none-linux-gnu. gcc/ * config/aarch64/aarch64-cores.def (grace): New entry. * config/aarch64/aarch64-tune.md: Regenerate. * doc/invoke.texi (AArch64 Options): Document the above. Signed-off-by: Kyrylo Tkachov Diff: --- gcc/config/aarch64/aarch64-cores.def | 2 ++ gcc/config/aarch64/aarch64-tune.md | 2 +- gcc/doc/invoke.texi | 4 ++-- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def index f69fc212d56..f5536388f61 100644 --- a/gcc/config/aarch64/aarch64-cores.def +++ b/gcc/config/aarch64/aarch64-cores.def @@ -189,6 +189,8 @@ AARCH64_CORE("neoverse-n2", neoversen2, cortexa57, V9A, (I8MM, BF16, SVE2_BITPER AARCH64_CORE("cobalt-100", cobalt100, cortexa57, V9A, (I8MM, BF16, SVE2_BITPERM, RNG, MEMTAG, PROFILE), neoversen2, 0x6d, 0xd49, -1) AARCH64_CORE("neoverse-v2", neoversev2, cortexa57, V9A, (I8MM, BF16, SVE2_BITPERM, RNG, MEMTAG, PROFILE), neoversev2, 0x41, 0xd4f, -1) +AARCH64_CORE("grace", grace, cortexa57, V9A, (I8MM, BF16, SVE2_BITPERM, SVE2_AES, SVE2_SHA3, SVE2_SM4, PROFILE), neoversev2, 0x41, 0xd4f, -1) + AARCH64_CORE("demeter", demeter, cortexa57, V9A, (I8MM, BF16, SVE2_BITPERM, RNG, MEMTAG, PROFILE), neoversev2, 0x41, 0xd4f, -1) /* Generic Architecture Processors. */ diff --git a/gcc/config/aarch64/aarch64-tune.md b/gcc/config/aarch64/aarch64-tune.md index abd3c9e0822..80254836e0e 100644 --- a/gcc/config/aarch64/aarch64-tune.md +++ b/gcc/config/aarch64/aarch64-tune.md @@ -1,5 +1,5 @@ ;; -*- buffer-read-only: t -*- ;; Generated automatically by gentune.sh from aarch64-cores.def (define_attr "tune" - "cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,ampere1,ampere1a,ampere1b,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa78c,cortexa65,cortexa65ae,cortexx1,cortexx1c,neoversen1,ares,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,thunderx3t110,neoversev1,zeus,neoverse512tvb,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82,cortexa510,cortexa520,cortexa710,cortexa715,cortexa720,cortexx2,cortexx3,cortexx4,neoversen2,cobalt100,neoversev2,demeter,generic,generic_armv8_a,generic_armv9_a" + "cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,ampere1,ampere1a,ampere1b,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa78c,cortexa65,cortexa65ae,cortexx1,cortexx1c,neoversen1,ares,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,thunderx3t110,neoversev1,zeus,neoverse512tvb,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82,cortexa510,cortexa520,cortexa710,cortexa715,cortexa720,cortexx2,cortexx3,cortexx4,neoversen2,cobalt100,neoversev2,grace,demeter,generic,generic_armv8_a,generic_armv9_a" (const (symbol_ref "((enum attr_tune) aarch64_tune)"))) diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index a916d618960..67220051a5b 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -21324,8 +21324,8 @@ performance of the code. Permissible values for this option are: @samp{cortex-a78}, @samp{cortex-a78ae}, @samp{cortex-a78c}, @samp{ares}, @samp{exynos-m1}, @samp{emag}, @samp{falkor}, @samp{neoverse-512tvb}, @samp{neoverse-e1}, @samp{neoverse-n1}, -@samp{neoverse-n2}, @samp{neoverse-v1}, @samp{neoverse-v2}, @samp{qdf24xx}, -@samp{saphira}, @samp{phecda}, @samp{xgene1}, @samp{vulcan}, +@samp{neoverse-n2}, @samp{neoverse-v1}, @samp{neoverse-v2}, @samp{grace}, +@samp{qdf24xx}, @samp{saphira}, @samp{phecda}, @samp{xgene1}, @samp{vulcan}, @samp{octeontx}, @samp{octeontx81}, @samp{octeontx83}, @samp{octeontx2}, @samp{octeontx2t98}, @samp{octe
[gcc r15-1677] vect: generate suitable convert insn for int -> int, float -> float and int <-> float.
https://gcc.gnu.org/g:c320a7efcd35ba6c6be70dc9b2fe562a9673e363 commit r15-1677-gc320a7efcd35ba6c6be70dc9b2fe562a9673e363 Author: Hu, Lin1 Date: Thu Feb 1 15:15:01 2024 +0800 vect: generate suitable convert insn for int -> int, float -> float and int <-> float. gcc/ChangeLog: PR target/107432 * tree-vect-generic.cc (expand_vector_conversion): Support convert for int -> int, float -> float and int <-> float. * tree-vect-stmts.cc (vectorizable_conversion): Wrap the indirect convert part. (supportable_indirect_convert_operation): New function. * tree-vectorizer.h (supportable_indirect_convert_operation): Define the new function. gcc/testsuite/ChangeLog: PR target/107432 * gcc.target/i386/pr107432-1.c: New test. * gcc.target/i386/pr107432-2.c: Ditto. * gcc.target/i386/pr107432-3.c: Ditto. * gcc.target/i386/pr107432-4.c: Ditto. * gcc.target/i386/pr107432-5.c: Ditto. * gcc.target/i386/pr107432-6.c: Ditto. * gcc.target/i386/pr107432-7.c: Ditto. Diff: --- gcc/testsuite/gcc.target/i386/pr107432-1.c | 234 gcc/testsuite/gcc.target/i386/pr107432-2.c | 105 + gcc/testsuite/gcc.target/i386/pr107432-3.c | 55 +++ gcc/testsuite/gcc.target/i386/pr107432-4.c | 56 +++ gcc/testsuite/gcc.target/i386/pr107432-5.c | 72 + gcc/testsuite/gcc.target/i386/pr107432-6.c | 139 + gcc/testsuite/gcc.target/i386/pr107432-7.c | 150 ++ gcc/tree-vect-generic.cc | 29 +++- gcc/tree-vect-stmts.cc | 241 ++--- gcc/tree-vectorizer.h | 4 + 10 files changed, 990 insertions(+), 95 deletions(-) diff --git a/gcc/testsuite/gcc.target/i386/pr107432-1.c b/gcc/testsuite/gcc.target/i386/pr107432-1.c new file mode 100644 index 000..a4f37447eb4 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr107432-1.c @@ -0,0 +1,234 @@ +/* { dg-do compile } */ +/* { dg-options "-march=x86-64 -mavx512bw -mavx512vl -O3" } */ +/* { dg-final { scan-assembler-times "vpmovqd" 6 } } */ +/* { dg-final { scan-assembler-times "vpmovqw" 6 } } */ +/* { dg-final { scan-assembler-times "vpmovqb" 6 } } */ +/* { dg-final { scan-assembler-times "vpmovdw" 6 { target { ia32 } } } } */ +/* { dg-final { scan-assembler-times "vpmovdw" 8 { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler-times "vpmovdb" 6 { target { ia32 } } } } */ +/* { dg-final { scan-assembler-times "vpmovdb" 8 { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler-times "vpmovwb" 8 } } */ + +#include + +typedef short __v2hi __attribute__ ((__vector_size__ (4))); +typedef char __v2qi __attribute__ ((__vector_size__ (2))); +typedef char __v4qi __attribute__ ((__vector_size__ (4))); +typedef char __v8qi __attribute__ ((__vector_size__ (8))); + +typedef unsigned short __v2hu __attribute__ ((__vector_size__ (4))); +typedef unsigned short __v4hu __attribute__ ((__vector_size__ (8))); +typedef unsigned char __v2qu __attribute__ ((__vector_size__ (2))); +typedef unsigned char __v4qu __attribute__ ((__vector_size__ (4))); +typedef unsigned char __v8qu __attribute__ ((__vector_size__ (8))); +typedef unsigned int __v2su __attribute__ ((__vector_size__ (8))); + +__v2si mm_cvtepi64_epi32_builtin_convertvector(__m128i a) +{ + return __builtin_convertvector((__v2di)a, __v2si); +} + +__m128imm256_cvtepi64_epi32_builtin_convertvector(__m256i a) +{ + return (__m128i)__builtin_convertvector((__v4di)a, __v4si); +} + +__m256imm512_cvtepi64_epi32_builtin_convertvector(__m512i a) +{ + return (__m256i)__builtin_convertvector((__v8di)a, __v8si); +} + +__v2hi mm_cvtepi64_epi16_builtin_convertvector(__m128i a) +{ + return __builtin_convertvector((__v2di)a, __v2hi); +} + +__v4hi mm256_cvtepi64_epi16_builtin_convertvector(__m256i a) +{ + return __builtin_convertvector((__v4di)a, __v4hi); +} + +__m128imm512_cvtepi64_epi16_builtin_convertvector(__m512i a) +{ + return (__m128i)__builtin_convertvector((__v8di)a, __v8hi); +} + +__v2qi mm_cvtepi64_epi8_builtin_convertvector(__m128i a) +{ + return __builtin_convertvector((__v2di)a, __v2qi); +} + +__v4qi mm256_cvtepi64_epi8_builtin_convertvector(__m256i a) +{ + return __builtin_convertvector((__v4di)a, __v4qi); +} + +__v8qi mm512_cvtepi64_epi8_builtin_convertvector(__m512i a) +{ + return __builtin_convertvector((__v8di)a, __v8qi); +} + +__v2hi mm64_cvtepi32_epi16_builtin_convertvector(__v2si a) +{ + return __builtin_convertvector((__v2si)a, __v2hi); +} + +__v4hi mm_cvtepi32_epi16_builtin_convertvector(__m128i a) +{ + return __builtin_convertvector((__v4si)a, __v4hi); +} + +__m128imm256_cvtepi32_epi16_builtin_convertvector(__m256i a) +{ + return (__m128i)__builtin_convertvector((__v8si)a, __v8hi); +} + +__m256imm5
[gcc r15-1678] vect: Support v4hi -> v4qi.
https://gcc.gnu.org/g:e5f8a39941f6f0f25dac88bd71fd368fb284a10f commit r15-1678-ge5f8a39941f6f0f25dac88bd71fd368fb284a10f Author: Hu, Lin1 Date: Wed Feb 28 18:11:55 2024 +0800 vect: Support v4hi -> v4qi. gcc/ChangeLog: PR target/107432 * config/i386/mmx.md (VI2_32_64): New mode iterator. (mmxhalfmode): New mode atter. (mmxhalfmodelower): Ditto. (truncv2hiv2qi2): Extend mode v4hi and change name from truncv2hiv2qi to trunc2. gcc/testsuite/ChangeLog: PR target/107432 * gcc.target/i386/pr107432-1.c: Modify test. * gcc.target/i386/pr107432-6.c: Add test. * gcc.target/i386/pr108938-3.c: This patch supports truncv4hiv4qi affect bswap optimization, so I added the -mno-avx option for now, and open a bugzilla. Diff: --- gcc/config/i386/mmx.md | 17 + gcc/testsuite/gcc.target/i386/pr107432-1.c | 13 - gcc/testsuite/gcc.target/i386/pr107432-6.c | 29 ++--- gcc/testsuite/gcc.target/i386/pr108938-3.c | 2 +- 4 files changed, 44 insertions(+), 17 deletions(-) diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index ea53f516cbb..24c0516726c 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -67,6 +67,9 @@ ;; 4-byte integer vector modes (define_mode_iterator VI_32 [V4QI V2HI]) +;; 8-byte and 4-byte HImode vector modes +(define_mode_iterator VI2_32_64 [(V4HI "TARGET_MMX_WITH_SSE") V2HI]) + ;; 4-byte and 2-byte integer vector modes (define_mode_iterator VI_16_32 [V4QI V2QI V2HI]) @@ -106,6 +109,12 @@ (define_mode_attr mmxdoublemode [(V8QI "V8HI") (V4HI "V4SI")]) +(define_mode_attr mmxhalfmode + [(V4HI "V4QI") (V2HI "V2QI")]) + +(define_mode_attr mmxhalfmodelower + [(V4HI "v4qi") (V2HI "v2qi")]) + ;; Mapping of vector float modes to an integer mode of the same size (define_mode_attr mmxintvecmode [(V2SF "V2SI") (V2SI "V2SI") (V4HI "V4HI") (V8QI "V8QI") @@ -4880,10 +4889,10 @@ DONE; }) -(define_insn "truncv2hiv2qi2" - [(set (match_operand:V2QI 0 "register_operand" "=v") - (truncate:V2QI - (match_operand:V2HI 1 "register_operand" "v")))] +(define_insn "trunc2" + [(set (match_operand: 0 "register_operand" "=v") + (truncate: + (match_operand:VI2_32_64 1 "register_operand" "v")))] "TARGET_AVX512VL && TARGET_AVX512BW" "vpmovwb\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") diff --git a/gcc/testsuite/gcc.target/i386/pr107432-1.c b/gcc/testsuite/gcc.target/i386/pr107432-1.c index a4f37447eb4..afdf367afe2 100644 --- a/gcc/testsuite/gcc.target/i386/pr107432-1.c +++ b/gcc/testsuite/gcc.target/i386/pr107432-1.c @@ -7,7 +7,8 @@ /* { dg-final { scan-assembler-times "vpmovdw" 8 { target { ! ia32 } } } } */ /* { dg-final { scan-assembler-times "vpmovdb" 6 { target { ia32 } } } } */ /* { dg-final { scan-assembler-times "vpmovdb" 8 { target { ! ia32 } } } } */ -/* { dg-final { scan-assembler-times "vpmovwb" 8 } } */ +/* { dg-final { scan-assembler-times "vpmovwb" 8 { target { ia32 } } } } */ +/* { dg-final { scan-assembler-times "vpmovwb" 10 { target { ! ia32 } } } } */ #include @@ -113,6 +114,11 @@ __v2qi mm32_cvtepi16_epi8_builtin_convertvector(__v2hi a) return __builtin_convertvector((__v2hi)a, __v2qi); } +__v4qi mm64_cvtepi16_epi8_builtin_convertvector(__v4hi a) +{ + return __builtin_convertvector((__v4hi)a, __v4qi); +} + __v8qi mm_cvtepi16_epi8_builtin_convertvector(__m128i a) { return __builtin_convertvector((__v8hi)a, __v8qi); @@ -218,6 +224,11 @@ __v2qu mm32_cvtepu16_epu8_builtin_convertvector(__v2hu a) return __builtin_convertvector((__v2hu)a, __v2qu); } +__v4qu mm64_cvtepu16_epu8_builtin_convertvector(__v4hu a) +{ + return __builtin_convertvector((__v4hu)a, __v4qu); +} + __v8qu mm_cvtepu16_epu8_builtin_convertvector(__m128i a) { return __builtin_convertvector((__v8hu)a, __v8qu); diff --git a/gcc/testsuite/gcc.target/i386/pr107432-6.c b/gcc/testsuite/gcc.target/i386/pr107432-6.c index 4a68a10b089..dd585b2a351 100644 --- a/gcc/testsuite/gcc.target/i386/pr107432-6.c +++ b/gcc/testsuite/gcc.target/i386/pr107432-6.c @@ -1,18 +1,15 @@ /* { dg-do compile } */ /* { dg-options "-mavx512fp16 -mavx512vl -mavx512bw -O2 -mavx512dq -fno-trapping-math" } */ -/* { dg-final { scan-assembler-times "vcvttpd2dq" 2 { target { ia32 } } } } */ -/* { dg-final { scan-assembler-times "vcvttpd2dq" 3 { target { ! ia32 } } } } */ -/* { dg-final { scan-assembler-times "vcvttpd2udq" 2 { target { ia32 } } } } */ -/* { dg-final { scan-assembler-times "vcvttpd2udq" 3 { target { ! ia32 } } } } */ -/* { dg-final { scan-assembler-times "vcvttps2dq" 3 { target { ia32 } } } } */ -/* { dg-final { scan-assembler-times "vcvttps2dq" 4 { target { ! ia32 } } } } */ -/* { dg-final { scan-assembler-times "vcvttps2udq" 3 { target { ia32 } } } } */ -/* { dg-final { scan-assembl
[gcc r15-1679] vect: support direct conversion under x86-64-v3.
https://gcc.gnu.org/g:4385dc97b0d28e54541eb2418d6e68fc672441d7 commit r15-1679-g4385dc97b0d28e54541eb2418d6e68fc672441d7 Author: Hu, Lin1 Date: Wed Mar 6 19:58:48 2024 +0800 vect: support direct conversion under x86-64-v3. gcc/ChangeLog: PR target/107432 * config/i386/i386-expand.cc (ix86_expand_trunc_with_avx2_noavx512f): New function for generate a series of suitable insn. * config/i386/i386-protos.h (ix86_expand_trunc_with_avx2_noavx512f): Define new function. * config/i386/sse.md: Extend trunc2 for x86-64-v3. (ssebytemode) Add V8HI. (PMOV_DST_MODE_2_AVX2): New mode iterator. (PMOV_SRC_MODE_3_AVX2): Ditto. * config/i386/mmx.md (trunc2): Ditto. (avx512vl_trunc2): Ditto. (truncv2si2): Ditto. (avx512vl_truncv2si2): Ditto. (mmxbytemode): New mode attr. gcc/testsuite/ChangeLog: PR target/107432 * gcc.target/i386/pr107432-8.c: New test. * gcc.target/i386/pr107432-9.c: Ditto. * gcc.target/i386/pr92645-4.c: Modify test. Diff: --- gcc/config/i386/i386-expand.cc | 44 -- gcc/config/i386/i386-protos.h | 3 + gcc/config/i386/mmx.md | 35 +++- gcc/config/i386/sse.md | 88 +++- gcc/testsuite/gcc.target/i386/pr107432-8.c | 94 + gcc/testsuite/gcc.target/i386/pr107432-9.c | 129 + gcc/testsuite/gcc.target/i386/pr92645-4.c | 2 - 7 files changed, 363 insertions(+), 32 deletions(-) diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc index 5dfa7d49f58..eccad080f7c 100644 --- a/gcc/config/i386/i386-expand.cc +++ b/gcc/config/i386/i386-expand.cc @@ -1898,10 +1898,6 @@ ix86_split_convert_uns_si_sse (rtx operands[]) emit_insn (gen_xorv4si3 (value, value, large)); } -static bool ix86_expand_vector_init_one_nonzero (bool mmx_ok, -machine_mode mode, rtx target, -rtx var, int one_var); - /* Convert an unsigned DImode value into a DFmode, using only SSE. Expects the 64-bit DImode to be supplied in a pair of integral registers. Requires SSE2; will use SSE3 if available. For x86_32, @@ -16126,7 +16122,7 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode, whose ONE_VAR element is VAR, and other elements are zero. Return true if successful. */ -static bool +bool ix86_expand_vector_init_one_nonzero (bool mmx_ok, machine_mode mode, rtx target, rtx var, int one_var) { @@ -26137,4 +26133,42 @@ ix86_expand_ternlog (machine_mode mode, rtx op0, rtx op1, rtx op2, int idx, return target; } +/* Trunc a vector to a narrow vector, like v4di -> v4si. */ + +void +ix86_expand_trunc_with_avx2_noavx512f (rtx output, rtx input, machine_mode cvt_mode) +{ + machine_mode out_mode = GET_MODE (output); + machine_mode in_mode = GET_MODE (input); + int len = GET_MODE_SIZE (in_mode); + gcc_assert (len == GET_MODE_SIZE (cvt_mode) + && GET_MODE_INNER (out_mode) == GET_MODE_INNER (cvt_mode) + && (REG_P (input) || SUBREG_P (input))); + scalar_mode inner_out_mode = GET_MODE_INNER (out_mode); + int in_innersize = GET_MODE_SIZE (GET_MODE_INNER (in_mode)); + int out_innersize = GET_MODE_SIZE (inner_out_mode); + + struct expand_vec_perm_d d; + d.target = gen_reg_rtx (cvt_mode); + d.op0 = lowpart_subreg (cvt_mode, force_reg(in_mode, input), in_mode); + d.op1 = d.op0; + d.vmode = cvt_mode; + d.nelt = GET_MODE_NUNITS (cvt_mode); + d.testing_p = false; + d.one_operand_p = true; + + /* Init perm. Put the needed bits of input in order and + fill the rest of bits by default. */ + for (int i = 0; i < d.nelt; ++i) +{ + d.perm[i] = i; + if (i < GET_MODE_NUNITS (out_mode)) + d.perm[i] = i * (in_innersize / out_innersize); +} + + bool ok = ix86_expand_vec_perm_const_1(&d); + gcc_assert (ok); + emit_move_insn (output, gen_lowpart (out_mode, d.target)); +} + #include "gt-i386-expand.h" diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h index 4f48dc0bf75..1a76090b9da 100644 --- a/gcc/config/i386/i386-protos.h +++ b/gcc/config/i386/i386-protos.h @@ -248,6 +248,7 @@ extern rtx ix86_gen_ccmp_first (rtx_insn **, rtx_insn **, enum rtx_code, extern rtx ix86_gen_ccmp_next (rtx_insn **, rtx_insn **, rtx, enum rtx_code, tree, tree, enum rtx_code); extern int ix86_get_flags_cc (enum rtx_code); +extern void ix86_expand_trunc_with_avx2_noavx512f (rtx, rtx, machine_mode); extern rtx ix86_memtag_untagged_pointer (rtx, rtx); extern bool ix86_memtag_can_tag_addresses (void); @@ -298,6 +299,8 @@ extern void ix86_expand_sse2_mulvxdi3 (rtx, rtx,
[gcc r15-1680] i386: Refactor vcvttps2qq/vcvtqq2ps patterns.
https://gcc.gnu.org/g:94495247341bc05b77536271fe3dd789dad62624 commit r15-1680-g94495247341bc05b77536271fe3dd789dad62624 Author: Hu, Lin1 Date: Tue Jun 25 18:25:59 2024 +0800 i386: Refactor vcvttps2qq/vcvtqq2ps patterns. Refactor vcvttps2qq/vcvtqq2ps patterns for remove redundant round_*_modev8sf_condition. gcc/ChangeLog: * config/i386/sse.md (float2 ): Refactor the pattern. (unspec_fix_trunc2 ): Ditto. (fix_trunc2 ): Ditto. * config/i386/subst.md (round_modev8sf_condition): Remove. (round_saeonly_modev8sf_condition): Ditto. Diff: --- gcc/config/i386/sse.md | 51 +--- gcc/config/i386/subst.md | 2 -- 2 files changed, 22 insertions(+), 31 deletions(-) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 56ee7119e7c..a94ec3c441f 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -1158,6 +1158,9 @@ (define_mode_attr ssePSmode2 [(V8DI "V8SF") (V4DI "V4SF")]) +(define_mode_attr ssePSmode2lower + [(V8DI "v8sf") (V4DI "v4sf")]) + ;; Mapping of vector modes back to the scalar modes (define_mode_attr ssescalarmode [(V64QI "QI") (V32QI "QI") (V16QI "QI") @@ -8862,27 +8865,17 @@ ;; For float insn patterns (define_mode_attr qq2pssuff - [(V8SF "") (V4SF "{y}")]) - -(define_mode_attr sselongvecmode - [(V8SF "V8DI") (V4SF "V4DI")]) - -(define_mode_attr sselongvecmodelower - [(V8SF "v8di") (V4SF "v4di")]) - -(define_mode_attr sseintvecmode3 - [(V8SF "XI") (V4SF "OI") - (V8DF "OI") (V4DF "TI")]) + [(V8DI "") (V4DI "{y}")]) -(define_insn "float2" - [(set (match_operand:VF1_128_256VL 0 "register_operand" "=v") -(any_float:VF1_128_256VL - (match_operand: 1 "nonimmediate_operand" "")))] - "TARGET_AVX512DQ && " +(define_insn "float2" + [(set (match_operand: 0 "register_operand" "=v") +(any_float: + (match_operand:VI8_256_512 1 "nonimmediate_operand" "")))] + "TARGET_AVX512DQ && " "vcvtqq2ps\t{%1, %0|%0, %1}" [(set_attr "type" "ssecvt") (set_attr "prefix" "evex") - (set_attr "mode" "")]) + (set_attr "mode" "")]) (define_expand "avx512dq_floatv2div2sf2" [(set (match_operand:V4SF 0 "register_operand" "=v") @@ -9417,26 +9410,26 @@ (set_attr "prefix" "evex") (set_attr "mode" "")]) -(define_insn "unspec_fix_trunc2" - [(set (match_operand: 0 "register_operand" "=v") - (unspec: - [(match_operand:VF1_128_256VL 1 "" "")] +(define_insn "unspec_fix_trunc2" + [(set (match_operand:VI8_256_512 0 "register_operand" "=v") + (unspec:VI8_256_512 + [(match_operand: 1 "" "")] UNSPEC_VCVTT_U))] - "TARGET_AVX512DQ && " + "TARGET_AVX512DQ && " "vcvttps2qq\t{%1, %0|%0, %1}" [(set_attr "type" "ssecvt") (set_attr "prefix" "evex") - (set_attr "mode" "")]) + (set_attr "mode" "")]) -(define_insn "fix_trunc2" - [(set (match_operand: 0 "register_operand" "=v") - (any_fix: - (match_operand:VF1_128_256VL 1 "" "")))] - "TARGET_AVX512DQ && " +(define_insn "fix_trunc2" + [(set (match_operand:VI8_256_512 0 "register_operand" "=v") + (any_fix:VI8_256_512 + (match_operand: 1 "" "")))] + "TARGET_AVX512DQ && " "vcvttps2qq\t{%1, %0|%0, %1}" [(set_attr "type" "ssecvt") (set_attr "prefix" "evex") - (set_attr "mode" "")]) + (set_attr "mode" "")]) (define_insn "unspec_avx512dq_fix_truncv2sfv2di2" [(set (match_operand:V2DI 0 "register_operand" "=v") diff --git a/gcc/config/i386/subst.md b/gcc/config/i386/subst.md index 7a9b697e0f6..40fb92094d2 100644 --- a/gcc/config/i386/subst.md +++ b/gcc/config/i386/subst.md @@ -211,7 +211,6 @@ || mode == V16SImode || mode == V32HFmode)") -(define_subst_attr "round_modev8sf_condition" "round" "1" "(mode == V8SFmode)") (define_subst_attr "round_modev4sf_condition" "round" "1" "(mode == V4SFmode)") (define_subst_attr "round_codefor" "round" "*" "") (define_subst_attr "round_opnum" "round" "5" "6") @@ -257,7 +256,6 @@ || mode == V16SImode || mode == V32HFmode)") -(define_subst_attr "round_saeonly_modev8sf_condition" "round_saeonly" "1" "(mode == V8SFmode)") (define_subst "round_saeonly" [(set (match_operand:SUBST_A 0)
[gcc r13-8871] Add support for -mcpu=grace
https://gcc.gnu.org/g:952ea3260e40992d3bf5e1f17b4845a4e5c908b5 commit r13-8871-g952ea3260e40992d3bf5e1f17b4845a4e5c908b5 Author: Kyrylo Tkachov Date: Wed Jun 19 14:56:02 2024 +0530 Add support for -mcpu=grace This adds support for the NVIDIA Grace CPU to aarch64. We reuse the tuning decisions for the Neoverse V2 core, but include a number of architecture features that are not enabled by default in -mcpu=neoverse-v2. This allows Grace users to more simply target the CPU with -mcpu=grace rather than remembering what extensions to tag on top of -mcpu=neoverse-v2. Bootstrapped and tested on aarch64-none-linux-gnu. gcc/ * config/aarch64/aarch64-cores.def (grace): New entry. * config/aarch64/aarch64-tune.md: Regenerate. * doc/invoke.texi (AArch64 Options): Document the above. Signed-off-by: Kyrylo Tkachov Diff: --- gcc/config/aarch64/aarch64-cores.def | 2 ++ gcc/config/aarch64/aarch64-tune.md | 2 +- gcc/doc/invoke.texi | 4 ++-- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def index fdda0697b88..bec08ca1910 100644 --- a/gcc/config/aarch64/aarch64-cores.def +++ b/gcc/config/aarch64/aarch64-cores.def @@ -182,6 +182,8 @@ AARCH64_CORE("neoverse-n2", neoversen2, cortexa57, V9A, (I8MM, BF16, SVE2_BITPER AARCH64_CORE("cobalt-100", cobalt100, cortexa57, V9A, (I8MM, BF16, SVE2_BITPERM, RNG, MEMTAG, PROFILE), neoversen2, 0x6d, 0xd49, -1) AARCH64_CORE("neoverse-v2", neoversev2, cortexa57, V9A, (I8MM, BF16, SVE2_BITPERM, RNG, MEMTAG, PROFILE), neoversev2, 0x41, 0xd4f, -1) +AARCH64_CORE("grace", grace, cortexa57, V9A, (I8MM, BF16, CRYPTO, SVE2_BITPERM, SVE2_AES, SVE2_SHA3, SVE2_SM4, PROFILE), neoversev2, 0x41, 0xd4f, -1) + AARCH64_CORE("demeter", demeter, cortexa57, V9A, (I8MM, BF16, SVE2_BITPERM, RNG, MEMTAG, PROFILE), neoversev2, 0x41, 0xd4f, -1) #undef AARCH64_CORE diff --git a/gcc/config/aarch64/aarch64-tune.md b/gcc/config/aarch64/aarch64-tune.md index 9d46d38a292..6eae8522593 100644 --- a/gcc/config/aarch64/aarch64-tune.md +++ b/gcc/config/aarch64/aarch64-tune.md @@ -1,5 +1,5 @@ ;; -*- buffer-read-only: t -*- ;; Generated automatically by gentune.sh from aarch64-cores.def (define_attr "tune" - "cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,ampere1,ampere1a,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa78c,cortexa65,cortexa65ae,cortexx1,cortexx1c,neoversen1,ares,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,thunderx3t110,neoversev1,zeus,neoverse512tvb,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82,cortexa510,cortexa710,cortexa715,cortexx2,cortexx3,neoversen2,cobalt100,neoversev2,demeter" + "cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,ampere1,ampere1a,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa78c,cortexa65,cortexa65ae,cortexx1,cortexx1c,neoversen1,ares,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,thunderx3t110,neoversev1,zeus,neoverse512tvb,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82,cortexa510,cortexa710,cortexa715,cortexx2,cortexx3,neoversen2,cobalt100,neoversev2,grace,demeter" (const (symbol_ref "((enum attr_tune) aarch64_tune)"))) diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 914c4bc8e6d..b17d0cf9341 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -20315,8 +20315,8 @@ performance of the code. Permissible values for this option are: @samp{cortex-a78}, @samp{cortex-a78ae}, @samp{cortex-a78c}, @samp{ares}, @samp{exynos-m1}, @samp{emag}, @samp{falkor}, @samp{neoverse-512tvb}, @samp{neoverse-e1}, @samp{neoverse-n1}, -@samp{neoverse-n2}, @samp{neoverse-v1}, @samp{neoverse-v2}, @samp{qdf24xx}, -@samp{saphira}, @samp{phecda}, @samp{xgene1}, @samp{vulcan}, +@samp{neoverse-n2}, @samp{neoverse-v1}, @samp{neoverse-v2}, @samp{grace}, +@samp{qdf24xx}, @samp{saphira}, @samp{phecda}, @samp{xgene1}, @samp{vulcan}, @samp{octeontx}, @samp{octeontx81}, @samp{octeontx83}, @samp{octeontx2}, @samp{octeontx2t98}, @samp{octeontx2t96} @samp{octeontx2t93}, @samp{octeontx2f95}, @samp{octeontx2f95n},
[gcc r15-1686] ada: Reject ambiguous function calls in interpolated string expressions
https://gcc.gnu.org/g:fdbc04d13f0e993ecf1a36680c8f7768dfb522fb commit r15-1686-gfdbc04d13f0e993ecf1a36680c8f7768dfb522fb Author: Javier Miranda Date: Mon Jun 10 17:17:59 2024 + ada: Reject ambiguous function calls in interpolated string expressions gcc/ada/ * sem_ch2.adb (Analyze_Interpolated_String_Literal): Report interpretations of ambiguous parameterless function calls. Diff: --- gcc/ada/sem_ch2.adb | 80 - 1 file changed, 79 insertions(+), 1 deletion(-) diff --git a/gcc/ada/sem_ch2.adb b/gcc/ada/sem_ch2.adb index 08cc75c9104..ddbb329d1f8 100644 --- a/gcc/ada/sem_ch2.adb +++ b/gcc/ada/sem_ch2.adb @@ -38,6 +38,8 @@ with Rident; use Rident; with Sem;use Sem; with Sem_Ch8;use Sem_Ch8; with Sem_Dim;use Sem_Dim; +with Sem_Res;use Sem_Res; +with Sem_Type; use Sem_Type; with Sinfo; use Sinfo; with Sinfo.Nodes;use Sinfo.Nodes; with Sinfo.Utils;use Sinfo.Utils; @@ -135,20 +137,96 @@ package body Sem_Ch2 is - procedure Analyze_Interpolated_String_Literal (N : Node_Id) is + + procedure Check_Ambiguous_Parameterless_Call (Func_Call : Node_Id); + -- Examine the interpretations of the call to the given parameterless + -- function call and report the location of each interpretation. + + + -- Check_Ambiguous_Parameterless_Call -- + + + procedure Check_Ambiguous_Parameterless_Call (Func_Call : Node_Id) is + + procedure Report_Interpretation (E : Entity_Id); + -- Report an interpretation of the function call + + --- + -- Report_Interpretation -- + --- + + procedure Report_Interpretation (E : Entity_Id) is + begin +Error_Msg_Sloc := Sloc (E); + +if Nkind (Parent (E)) = N_Full_Type_Declaration then + Error_Msg_N ("interpretation (inherited) #!", Func_Call); +else + Error_Msg_N ("interpretation #!", Func_Call); +end if; + end Report_Interpretation; + + -- Local variables + + Error_Reported : Boolean; + I : Interp_Index; + It : Interp; + + -- Start of processing for Check_Ambiguous_Parameterless_Call + + begin + Error_Reported := False; + + -- Examine possible interpretations + + Get_First_Interp (Name (Func_Call), I, It); + while Present (It.Nam) loop +if It.Nam /= Entity (Name (Func_Call)) + and then Ekind (It.Nam) = E_Function + and then No (First_Formal (It.Nam)) +then + if not Error_Reported then + Error_Msg_NE +("ambiguous call to&", Func_Call, + Entity (Name (Func_Call))); + Report_Interpretation (Entity (Name (Func_Call))); + Error_Reported := True; + end if; + + Report_Interpretation (It.Nam); +end if; + +Get_Next_Interp (I, It); + end loop; + end Check_Ambiguous_Parameterless_Call; + + -- Local variables + Str_Elem : Node_Id; + -- Start of processing for Analyze_Interpolated_String_Literal + begin Set_Etype (N, Any_String); Str_Elem := First (Expressions (N)); while Present (Str_Elem) loop + + -- Before analyzed, a function call that has parameter is an + -- N_Indexed_Component node, and a call to a function that has + -- no parameters is an N_Identifier node. + Analyze (Str_Elem); + -- After analyzed, if it is still an N_Identifier node then we + -- found ambiguity and could not rewrite it as N_Function_Call. + if Nkind (Str_Elem) = N_Identifier and then Ekind (Entity (Str_Elem)) = E_Function and then Is_Overloaded (Str_Elem) then -Error_Msg_NE ("ambiguous call to&", Str_Elem, Entity (Str_Elem)); +Check_Parameterless_Call (Str_Elem); +Check_Ambiguous_Parameterless_Call (Str_Elem); end if; Next (Str_Elem);
[gcc r15-1685] ada: Add missing dimension information for target names
https://gcc.gnu.org/g:d4c990759bcdc1f2b3384397cae6d8cb76a4cdad commit r15-1685-gd4c990759bcdc1f2b3384397cae6d8cb76a4cdad Author: Eric Botcazou Date: Tue Jun 11 19:29:22 2024 +0200 ada: Add missing dimension information for target names It is computed from the Etype of N_Target_Name nodes. gcc/ada/ * sem_ch5.adb (Analyze_Target_Name): Call Analyze_Dimension on the node once the Etype is set. * sem_dim.adb (OK_For_Dimension): Set to True for N_Target_Name. (Analyze_Dimension): Call Analyze_Dimension_Has_Etype for it. Diff: --- gcc/ada/sem_ch5.adb | 1 + gcc/ada/sem_dim.adb | 2 ++ 2 files changed, 3 insertions(+) diff --git a/gcc/ada/sem_ch5.adb b/gcc/ada/sem_ch5.adb index b92ceb17b1b..644bd21ce93 100644 --- a/gcc/ada/sem_ch5.adb +++ b/gcc/ada/sem_ch5.adb @@ -4201,6 +4201,7 @@ package body Sem_Ch5 is if Current = Expression (Context) then pragma Assert (Context = Current_Assignment); Set_Etype (N, Etype (Name (Current_Assignment))); + Analyze_Dimension (N); else Report_Error; end if; diff --git a/gcc/ada/sem_dim.adb b/gcc/ada/sem_dim.adb index 45a0f2ab922..39c36332497 100644 --- a/gcc/ada/sem_dim.adb +++ b/gcc/ada/sem_dim.adb @@ -219,6 +219,7 @@ package body Sem_Dim is N_Real_Literal => True, N_Selected_Component=> True, N_Slice => True, + N_Target_Name => True, N_Type_Conversion => True, N_Unchecked_Type_Conversion => True, @@ -1179,6 +1180,7 @@ package body Sem_Dim is | N_Qualified_Expression | N_Selected_Component | N_Slice +| N_Target_Name | N_Unchecked_Type_Conversion => Analyze_Dimension_Has_Etype (N);
[gcc r15-1682] ada: Overridden operation field not correctly set for controlling result wrappers
https://gcc.gnu.org/g:60ca71044e8e4d492c74f65f6093fbcf46d238bb commit r15-1682-g60ca71044e8e4d492c74f65f6093fbcf46d238bb Author: Martin Clochard Date: Fri Jun 7 11:44:45 2024 +0200 ada: Overridden operation field not correctly set for controlling result wrappers Implicit wrapper overridings generated for functions with controlling result when deriving with null extension may have field Overridden_Operation incorrectly set, when making several such derivations in succession. This happens because overridings were assumed to come from source, and entities generated by Derive_Subprograms were also assumed to be derived from source subprograms. Overridden_Operation could be set to the entity generated by Derive_Subprograms for the same type, resulting in a cycle between Overriden_Operation and Alias fields, causing non-termination in GNATprove. gcc/ada/ * sem_ch6.adb (Check_Overriding_Indicator) Remove Comes_From_Source filter. (New_Overloaded_Entity) Move up special case of LSP_Subprogram, and remove Comes_From_Source filter. Diff: --- gcc/ada/sem_ch6.adb | 82 +++-- 1 file changed, 35 insertions(+), 47 deletions(-) diff --git a/gcc/ada/sem_ch6.adb b/gcc/ada/sem_ch6.adb index e97afdaf12e..43aa2e636fa 100644 --- a/gcc/ada/sem_ch6.adb +++ b/gcc/ada/sem_ch6.adb @@ -6916,13 +6916,11 @@ package body Sem_Ch6 is -- operation is the inherited primitive (which is available -- through the attribute alias) - if (Is_Dispatching_Operation (Subp) -or else Is_Dispatching_Operation (Overridden_Subp)) + if Is_Dispatching_Operation (Subp) and then not Comes_From_Source (Overridden_Subp) and then Find_Dispatching_Type (Overridden_Subp) = Find_Dispatching_Type (Subp) and then Present (Alias (Overridden_Subp)) - and then Comes_From_Source (Alias (Overridden_Subp)) then Set_Overridden_Operation(Subp, Alias (Overridden_Subp)); Inherit_Subprogram_Contract (Subp, Alias (Overridden_Subp)); @@ -12565,16 +12563,25 @@ package body Sem_Ch6 is Enter_Overloaded_Entity (S); + -- LSP wrappers must override the ultimate alias of their + -- wrapped dispatching primitive E; required to traverse the + -- chain of ancestor primitives (see Map_Primitives). They + -- don't inherit contracts. + + if Is_Wrapper (S) +and then Present (LSP_Subprogram (S)) + then + Set_Overridden_Operation (S, Ultimate_Alias (E)); + -- For entities generated by Derive_Subprograms the -- overridden operation is the inherited primitive -- (which is available through the attribute alias). - if not (Comes_From_Source (E)) + elsif not (Comes_From_Source (E)) and then Is_Dispatching_Operation (E) and then Find_Dispatching_Type (E) = Find_Dispatching_Type (S) and then Present (Alias (E)) -and then Comes_From_Source (Alias (E)) then Set_Overridden_Operation(S, Alias (E)); Inherit_Subprogram_Contract (S, Alias (E)); @@ -12591,20 +12598,8 @@ package body Sem_Ch6 is -- must check whether the target is an init_proc. elsif not Is_Init_Proc (S) then - - -- LSP wrappers must override the ultimate alias of their - -- wrapped dispatching primitive E; required to traverse - -- the chain of ancestor primitives (c.f. Map_Primitives) - -- They don't inherit contracts. - - if Is_Wrapper (S) - and then Present (LSP_Subprogram (S)) - then -Set_Overridden_Operation(S, Ultimate_Alias (E)); - else -Set_Overridden_Operation(S, E); -Inherit_Subprogram_Contract (S, E); - end if; + Set_Overridden_Operation(S, E); + Inherit_Subprogram_Contract (S, E); Set_Is_Ada_2022_Only (S, Is_Ada_2022_Only (E)); end if; @@ -12619,37 +12614,30 @@ package body Sem_Ch6 is -- If S is a user-defined subprogram or a null procedure -- expanded to override an inherited null procedure, or a - -- predefined dispatching primitive then indicate that E -
[gcc r15-1681] ada: Implement first half of Generalized Finalization
https://gcc.gnu.org/g:3cb7e22ad965672f51a437c0a30a4c95f558 commit r15-1681-g3cb7e22ad965672f51a437c0a30a4c95f558 Author: Eric Botcazou Date: Wed Jun 5 23:19:53 2024 +0200 ada: Implement first half of Generalized Finalization This implements the first half of the Generalized Finalization proposal, namely the Finalizable aspect as well as its optional relaxed semantics for the finalization operations, but the latter part is only implemented for dynamically allocated objects. In accordance with the spirit, if not the letter, of the proposal, this implements the finalizable types declared with strict semantics for the finalization operations as a direct generalization of controlled types, which in turn makes it possible to reimplement the latter types in terms of the former types and ensures full interoperability between them. The relaxed semantics for the finalization operations is also a direct generalization of the GNAT pragma No_Heap_Finalization for dynamically allocated objects, in that it extends the effects of the pragma to all access types designating the finalizable type, instead of just applying them to library-level named access types. gcc/ada/ * aspects.ads (Aspect_Id): Add Aspect_Finalizable. (Implementation_Defined_Aspect): Add True for Aspect_Finalizable. (Operational_Aspect): Add True for Aspect_Finalizable. (Aspect_Argument): Add Expression for Aspect_Finalizable. (Is_Representation_Aspect): Add False for Aspect_Finalizable. (Aspect_Names): Add Name_Finalizable for Aspect_Finalizable. (Aspect_Delay): Add Always_Delay for Aspect_Finalizable. * checks.adb: Add with and use clauses for Sem_Elab. (Install_Primitive_Elaboration_Check): Call Is_Controlled_Procedure. * einfo.ads (Has_Relaxed_Finalization): Document new flag. (Is_Controlled_Active): Update documentation. * exp_aggr.adb (Generate_Finalization_Actions): Replace Find_Prim_Op with Find_Controlled_Prim_Op for Name_Finalize. * exp_attr.adb (Expand_N_Attribute_Reference) : Return 0 if the prefix type has relaxed finalization. * exp_ch3.adb (Build_Equivalent_Record_Aggregate): Return Empty if the type needs finalization. (Expand_Freeze_Record_Type): Call Find_Controlled_Prim_Op instead of Find_Prim_Op for Name_{Adjust,Initialize,Finalize}. Call Make_Finalize_Address_Body for all controlled types. * exp_ch4.adb (Insert_Dereference_Action): Do not generate a call to Adjust_Controlled_Dereference if the designated type has relaxed finalization. * exp_ch6.adb (Needs_BIP_Collection): Return false for an untagged type that has relaxed finalization. * exp_ch7.adb (Allows_Finalization_Collection): Return false if the designated type has relaxed finalization. (Check_Visibly_Controlled): Call Find_Controlled_Prim_Op instead of Find_Prim_Op. (Make_Adjust_Call): Likewise. (Make_Deep_Record_Body): Likewise. (Make_Final_Call): Likewise. (Make_Init_Call): Likewise. * exp_disp.adb (Set_All_DT_Position): Remove obsolete warning. * exp_util.ads: Add with and use clauses for Snames. (Find_Prim_Op): Add precondition. (Find_Controlled_Prim_Op): New function declaration. (Name_Of_Controlled_Prim_Op): Likewise. * exp_util.adb: Remove with and use clauses for Snames. (Build_Allocate_Deallocate_Proc): Do not build finalization actions if the designated type has relaxed finalization. (Find_Controlled_Prim_Op): New function. (Find_Last_Init): Call Find_Controlled_Prim_Op instead of Find_Prim_Op. (Name_Of_Controlled_Prim_Op): New function. * freeze.adb (Freeze_Entity.Freeze_Record_Type): Propagate the Has_Relaxed_Finalization flag from components. * gen_il-fields.ads (Opt_Field_Enum): Add Has_Relaxed_Finalization. * gen_il-gen-gen_entities.adb (Entity_Kind): Likewise. * sem_aux.adb (Is_By_Reference_Type): Return true for all controlled types. * sem_ch3.adb (Build_Derived_Record_Type): Do not special case types declared in Ada.Finalization. (Record_Type_Definition): Propagate the Has_Relaxed_Finalization flag from components. * sem_ch13.adb (Analyze_Aspects_At_Freeze_Point): Also process the Finalizable aspect. (Analyze_Aspect_Specifications): Likewise. Call Flag_Non_Static_Expr in more cases. (Check_Aspect_At_Freeze_Point): Likewise. (Inherit_Aspects
[gcc r15-1684] ada: Fix array-manipulating code in Mdll
https://gcc.gnu.org/g:432b8a346e10488a1751b86e7c51fff3ee870ae4 commit r15-1684-g432b8a346e10488a1751b86e7c51fff3ee870ae4 Author: Ronan Desplanques Date: Thu May 2 09:52:34 2024 +0200 ada: Fix array-manipulating code in Mdll This patch fixes a duo of array assigments in Mdll that were bound to fail. gcc/ada/ * mdll.adb (Build_Non_Reloc_DLL): Fix incorrect assignment to array object. (Ada_Build_Non_Reloc_DLL): Likewise. Diff: --- gcc/ada/mdll.adb | 43 ++- 1 file changed, 26 insertions(+), 17 deletions(-) diff --git a/gcc/ada/mdll.adb b/gcc/ada/mdll.adb index 2f946b0a5bb..ac4af8363aa 100644 --- a/gcc/ada/mdll.adb +++ b/gcc/ada/mdll.adb @@ -322,17 +322,21 @@ package body MDLL is -- Build the DLL declare -Params : OS_Lib.Argument_List := - Adr_Opt'Unchecked_Access & All_Options; +Params : constant OS_Lib.Argument_List := +Map_Opt'Unchecked_Access & +Adr_Opt'Unchecked_Access & All_Options; +First_Param : Positive := Params'First + 1; + begin if Map_File then - Params := Map_Opt'Unchecked_Access & Params; + First_Param := Params'First; end if; -Utl.Gcc (Output_File => Dll_File, - Files => Exp_File'Unchecked_Access & Ofiles, - Options => Params, - Build_Lib => True); +Utl.Gcc + (Output_File => Dll_File, + Files => Exp_File'Unchecked_Access & Ofiles, + Options => Params (First_Param .. Params'Last), + Build_Lib => True); end; OS_Lib.Delete_File (Exp_File, Success); @@ -377,20 +381,25 @@ package body MDLL is Utl.Gnatbind (L_Afiles, Options & Bargs_Options); declare -Params : OS_Lib.Argument_List := - Out_Opt'Unchecked_Access & - Dll_File'Unchecked_Access & - Lib_Opt'Unchecked_Access & - Exp_File'Unchecked_Access & - Adr_Opt'Unchecked_Access & - Ofiles & - All_Options; +Params : constant OS_Lib.Argument_List := +Map_Opt'Unchecked_Access & +Out_Opt'Unchecked_Access & +Dll_File'Unchecked_Access & +Lib_Opt'Unchecked_Access & +Exp_File'Unchecked_Access & +Adr_Opt'Unchecked_Access & +Ofiles & +All_Options; +First_Param : Positive := Params'First + 1; + begin if Map_File then - Params := Map_Opt'Unchecked_Access & Params; + First_Param := Params'First; end if; -Utl.Gnatlink (L_Afiles (L_Afiles'Last).all, Params); +Utl.Gnatlink + (L_Afiles (L_Afiles'Last).all, + Params (First_Param .. Params'Last)); end; OS_Lib.Delete_File (Exp_File, Success);
[gcc r15-1687] ada: Remove last uses of System.Address_Operations in runtime library
https://gcc.gnu.org/g:5c8b7fcc04b1ec412e11ae3d77f704c19a63ab07 commit r15-1687-g5c8b7fcc04b1ec412e11ae3d77f704c19a63ab07 Author: Eric Botcazou Date: Wed Jun 12 16:05:57 2024 +0200 ada: Remove last uses of System.Address_Operations in runtime library This completes the switch from using System.Address_Operations to using only System.Storage_Elements in the runtime library. The remaining uses were for simple optimizations that can be done by the optimizer alone. gcc/ada/ * libgnat/s-carsi8.adb: Remove clauses for System.Address_Operations and use only operations of System.Storage_Elements for addresses. * libgnat/s-casi16.adb: Likewise. * libgnat/s-casi32.adb: Likewise. * libgnat/s-casi64.adb: Likewise. * libgnat/s-casi128.adb: Likewise. * libgnat/s-carun8.adb: Likewise. * libgnat/s-caun16.adb: Likewise. * libgnat/s-caun32.adb: Likewise. * libgnat/s-caun64.adb: Likewise. * libgnat/s-caun128.adb: Likewise. * libgnat/s-geveop.adb: Likewise. Diff: --- gcc/ada/libgnat/s-carsi8.adb | 8 +--- gcc/ada/libgnat/s-carun8.adb | 8 +--- gcc/ada/libgnat/s-casi128.adb | 7 --- gcc/ada/libgnat/s-casi16.adb | 11 +++ gcc/ada/libgnat/s-casi32.adb | 7 --- gcc/ada/libgnat/s-casi64.adb | 7 --- gcc/ada/libgnat/s-caun128.adb | 7 --- gcc/ada/libgnat/s-caun16.adb | 11 +++ gcc/ada/libgnat/s-caun32.adb | 7 --- gcc/ada/libgnat/s-caun64.adb | 7 --- gcc/ada/libgnat/s-geveop.adb | 33 - 11 files changed, 64 insertions(+), 49 deletions(-) diff --git a/gcc/ada/libgnat/s-carsi8.adb b/gcc/ada/libgnat/s-carsi8.adb index 2a6c532d247..7eb545a2657 100644 --- a/gcc/ada/libgnat/s-carsi8.adb +++ b/gcc/ada/libgnat/s-carsi8.adb @@ -29,8 +29,7 @@ -- -- -- -with System.Address_Operations; use System.Address_Operations; -with System.Storage_Elements; use System.Storage_Elements; +with System.Storage_Elements; use System.Storage_Elements; with Ada.Unchecked_Conversion; @@ -77,7 +76,10 @@ package body System.Compare_Array_Signed_8 is begin -- If operands are non-aligned, or length is too short, go by bytes - if ModA (OrA (Left, Right), 4) /= 0 or else Compare_Len < 4 then + if Left mod Storage_Offset (4) /= 0 +or else Right mod Storage_Offset (4) /= 0 +or else Compare_Len < 4 + then return Compare_Array_S8_Unaligned (Left, Right, Left_Len, Right_Len); end if; diff --git a/gcc/ada/libgnat/s-carun8.adb b/gcc/ada/libgnat/s-carun8.adb index 27422e5d728..e4cac204769 100644 --- a/gcc/ada/libgnat/s-carun8.adb +++ b/gcc/ada/libgnat/s-carun8.adb @@ -29,8 +29,7 @@ -- -- -- -with System.Address_Operations; use System.Address_Operations; -with System.Storage_Elements; use System.Storage_Elements; +with System.Storage_Elements; use System.Storage_Elements; with Ada.Unchecked_Conversion; @@ -76,7 +75,10 @@ package body System.Compare_Array_Unsigned_8 is begin -- If operands are non-aligned, or length is too short, go by bytes - if ModA (OrA (Left, Right), 4) /= 0 or else Compare_Len < 4 then + if Left mod Storage_Offset (4) /= 0 +or else Right mod Storage_Offset (4) /= 0 +or else Compare_Len < 4 + then return Compare_Array_U8_Unaligned (Left, Right, Left_Len, Right_Len); end if; diff --git a/gcc/ada/libgnat/s-casi128.adb b/gcc/ada/libgnat/s-casi128.adb index 3d3614136a7..1b65c8c86ef 100644 --- a/gcc/ada/libgnat/s-casi128.adb +++ b/gcc/ada/libgnat/s-casi128.adb @@ -29,8 +29,7 @@ -- -- -- -with System.Address_Operations; use System.Address_Operations; -with System.Storage_Elements; use System.Storage_Elements; +with System.Storage_Elements; use System.Storage_Elements; with Ada.Unchecked_Conversion; @@ -70,7 +69,9 @@ package body System.Compare_Array_Signed_128 is begin -- Case of going by aligned quadruple words - if ModA (OrA (Left, Right), 16) = 0 then + if Left mod Storage_Offset (16) = 0 +and then Right mod Storage_Offset (16) = 0 + then while Clen /= 0 loop if W (L).all /= W (R).all then if W (L).all > W (R).all then diff --git a/gcc/ada/libgnat/s-casi16.adb b/gcc/ada/libgnat/s-casi16.adb index 01771d1f8ff..e3411c978c5 100644 --- a/gcc/ada/libgnat/s-casi16.adb +++ b/gcc/ada/libgnat/s-casi16.adb @@ -29,8 +29,7
[gcc r15-1683] ada: Bug using user defined string literals with interpolated strings
https://gcc.gnu.org/g:089bb078e8663f72292f2edc63f48c304dcb1ccc commit r15-1683-g089bb078e8663f72292f2edc63f48c304dcb1ccc Author: Javier Miranda Date: Thu Jun 6 11:48:02 2024 + ada: Bug using user defined string literals with interpolated strings The frontend rejects the use of user defined string literals using interpolated strings. gcc/ada/ * sem_res.adb (Has_Applicable_User_Defined_Literal): Add missing support for interpolated strings. Diff: --- gcc/ada/sem_res.adb | 7 ++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/gcc/ada/sem_res.adb b/gcc/ada/sem_res.adb index a0dd1f7962b..72bba1f97af 100644 --- a/gcc/ada/sem_res.adb +++ b/gcc/ada/sem_res.adb @@ -467,7 +467,7 @@ package body Sem_Res is Literal_Aspect_Map : constant array (N_Numeric_Or_String_Literal) of Aspect_Id := (N_Integer_Literal => Aspect_Integer_Literal, - N_Interpolated_String_Literal => No_Aspect, + N_Interpolated_String_Literal => Aspect_String_Literal, N_Real_Literal=> Aspect_Real_Literal, N_String_Literal => Aspect_String_Literal); @@ -487,6 +487,7 @@ package body Sem_Res is begin if (Nkind (N) in N_Numeric_Or_String_Literal + | N_Interpolated_String_Literal and then Present (Find_Aspect (Typ, Literal_Aspect_Map (Nkind (N) or else @@ -563,6 +564,10 @@ package body Sem_Res is Param1 := Make_String_Literal (Loc, Strval (N)); Params := New_List (Param1); + elsif Nkind (N) = N_Interpolated_String_Literal then +Param1 := New_Copy_Tree (N); +Params := New_List (Param1); + else Param1 := Make_String_Literal
[gcc r12-10584] Add support for -mcpu=grace
https://gcc.gnu.org/g:25cb13649b1765a21f21907f2d7a0aa2135accb5 commit r12-10584-g25cb13649b1765a21f21907f2d7a0aa2135accb5 Author: Kyrylo Tkachov Date: Wed Jun 19 14:56:02 2024 +0530 Add support for -mcpu=grace This adds support for the NVIDIA Grace CPU to aarch64. We reuse the tuning decisions for the Neoverse V2 core, but include a number of architecture features that are not enabled by default in -mcpu=neoverse-v2. This allows Grace users to more simply target the CPU with -mcpu=grace rather than remembering what extensions to tag on top of -mcpu=neoverse-v2. Bootstrapped and tested on aarch64-none-linux-gnu. gcc/ * config/aarch64/aarch64-cores.def (grace): New entry. * config/aarch64/aarch64-tune.md: Regenerate. * doc/invoke.texi (AArch64 Options): Document the above. Signed-off-by: Kyrylo Tkachov Diff: --- gcc/config/aarch64/aarch64-cores.def | 1 + gcc/config/aarch64/aarch64-tune.md | 2 +- gcc/doc/invoke.texi | 4 ++-- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def index 956afa70714..6532bdaafb5 100644 --- a/gcc/config/aarch64/aarch64-cores.def +++ b/gcc/config/aarch64/aarch64-cores.def @@ -176,5 +176,6 @@ AARCH64_CORE("cobalt-100", cobalt100, cortexa57, 9A, AARCH64_FL_FOR_ARCH9 | AA AARCH64_CORE("demeter", demeter, cortexa57, 9A, AARCH64_FL_FOR_ARCH9 | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversev2, 0x41, 0xd4f, -1) AARCH64_CORE("neoverse-v2", neoversev2, cortexa57, 9A, AARCH64_FL_FOR_ARCH9 | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversev2, 0x41, 0xd4f, -1) +AARCH64_CORE("grace", grace, cortexa57, 9A, AARCH64_FL_FOR_ARCH9 | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_CRYPTO | AARCH64_FL_SHA3 | AARCH64_FL_SM4 | AARCH64_FL_SVE2_AES | AARCH64_FL_SVE2_SHA3 | AARCH64_FL_SVE2_SM4 | AARCH64_FL_PROFILE, neoversev2, 0x41, 0xd4f, -1) #undef AARCH64_CORE diff --git a/gcc/config/aarch64/aarch64-tune.md b/gcc/config/aarch64/aarch64-tune.md index 2c1852c8fe6..0c139e3e729 100644 --- a/gcc/config/aarch64/aarch64-tune.md +++ b/gcc/config/aarch64/aarch64-tune.md @@ -1,5 +1,5 @@ ;; -*- buffer-read-only: t -*- ;; Generated automatically by gentune.sh from aarch64-cores.def (define_attr "tune" - "cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,ampere1,ampere1a,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa78c,cortexa65,cortexa65ae,cortexx1,ares,neoversen1,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,thunderx3t110,zeus,neoversev1,neoverse512tvb,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82,cortexa510,cortexa710,cortexx2,neoversen2,cobalt100,demeter,neoversev2" + "cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,ampere1,ampere1a,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa78c,cortexa65,cortexa65ae,cortexx1,ares,neoversen1,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,thunderx3t110,zeus,neoversev1,neoverse512tvb,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82,cortexa510,cortexa710,cortexx2,neoversen2,cobalt100,demeter,neoversev2,grace" (const (symbol_ref "((enum attr_tune) aarch64_tune)"))) diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index c83f667260e..fbfa3241e7f 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -19203,8 +19203,8 @@ performance of the code. Permissible values for this option are: @samp{cortex-a78}, @samp{cortex-a78ae}, @samp{cortex-a78c}, @samp{ares}, @samp{exynos-m1}, @samp{emag}, @samp{falkor}, @samp{neoverse-512tvb}, @samp{neoverse-e1}, @samp{neoverse-n1}, -@samp{neoverse-n2}, @samp{neoverse-v1}, @samp{neoverse-v2}, @samp{qdf24xx}, -@samp{saphira}, @samp{phecda}, @samp{xgene1}, @samp{vulcan}, +@samp{neoverse-n2}, @samp{neoverse-v1}, @samp{neoverse-v2}, @samp{grace}, +@samp{qdf24xx}, @samp{saphira}, @samp{phecda}, @samp{xgene1}, @samp{vulcan}, @samp{octeontx}, @samp{octeontx81}, @samp{octeontx83}, @samp{octeontx2}, @samp{octeontx2t98}, @samp{octeontx2t96} @samp{octeontx2t93}, @samp{octeont
[gcc r11-11540] Add support for -mcpu=grace
https://gcc.gnu.org/g:bb943609534fcbd984d39a9a7efef12fa2667ac6 commit r11-11540-gbb943609534fcbd984d39a9a7efef12fa2667ac6 Author: Kyrylo Tkachov Date: Wed Jun 19 14:56:02 2024 +0530 Add support for -mcpu=grace This adds support for the NVIDIA Grace CPU to aarch64. We reuse the tuning decisions for the Neoverse V2 core, but include a number of architecture features that are not enabled by default in -mcpu=neoverse-v2. This allows Grace users to more simply target the CPU with -mcpu=grace rather than remembering what extensions to tag on top of -mcpu=neoverse-v2. Bootstrapped and tested on aarch64-none-linux-gnu. gcc/ * config/aarch64/aarch64-cores.def (grace): New entry. * config/aarch64/aarch64-tune.md: Regenerate. * doc/invoke.texi (AArch64 Options): Document the above. Signed-off-by: Kyrylo Tkachov Diff: --- gcc/config/aarch64/aarch64-cores.def | 1 + gcc/config/aarch64/aarch64-tune.md | 2 +- gcc/doc/invoke.texi | 4 ++-- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def index 5599cde700f..0243e3d4d1c 100644 --- a/gcc/config/aarch64/aarch64-cores.def +++ b/gcc/config/aarch64/aarch64-cores.def @@ -150,6 +150,7 @@ AARCH64_CORE("saphira", saphira,saphira,8_4A, AARCH64_FL_FOR_ARCH8_ AARCH64_CORE("neoverse-n2", neoversen2, cortexa57, 8_5A, AARCH64_FL_FOR_ARCH8_5 | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_SVE | AARCH64_FL_SVE2 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG, neoversen2, 0x41, 0xd49, -1) AARCH64_CORE("cobalt-100", cobalt100, cortexa57, 8_5A, AARCH64_FL_FOR_ARCH8_5 | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_SVE | AARCH64_FL_SVE2 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG, neoversen2, 0x6d, 0xd49, -1) AARCH64_CORE("neoverse-v2", neoversev2, cortexa57, 8_5A, AARCH64_FL_FOR_ARCH8_5 | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_SVE | AARCH64_FL_SVE2 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG, neoverse512tvb, 0x41, 0xd4f, -1) +AARCH64_CORE("grace", grace, cortexa57, 8_5A, AARCH64_FL_FOR_ARCH8_5 | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_CRYPTO | AARCH64_FL_SHA3 | AARCH64_FL_SM4 | AARCH64_FL_SVE | AARCH64_FL_SVE2 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_SVE2_AES | AARCH64_FL_SVE2_SM4 | AARCH64_FL_SVE2_SHA3, neoverse512tvb, 0x41, 0xd4f, -1) /* ARMv8-A big.LITTLE implementations. */ diff --git a/gcc/config/aarch64/aarch64-tune.md b/gcc/config/aarch64/aarch64-tune.md index 8953f1c0332..f233a7cce6c 100644 --- a/gcc/config/aarch64/aarch64-tune.md +++ b/gcc/config/aarch64/aarch64-tune.md @@ -1,5 +1,5 @@ ;; -*- buffer-read-only: t -*- ;; Generated automatically by gentune.sh from aarch64-cores.def (define_attr "tune" - "cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,ampere1,ampere1a,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa78c,cortexa65,cortexa65ae,cortexx1,ares,neoversen1,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,thunderx3t110,zeus,neoversev1,neoverse512tvb,saphira,neoversen2,cobalt100,neoversev2,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82" + "cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,ampere1,ampere1a,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa78c,cortexa65,cortexa65ae,cortexx1,ares,neoversen1,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,thunderx3t110,zeus,neoversev1,neoverse512tvb,saphira,neoversen2,cobalt100,neoversev2,grace,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82" (const (symbol_ref "((enum attr_tune) aarch64_tune)"))) diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 1ae94fb3677..ef331d72beb 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -18233,8 +18233,8 @@ performance of the code. Permissible values for this option are: @samp{cortex-a78}, @samp{cortex-a78ae}, @samp{cortex-a78c}, @samp{ares}, @samp{exynos-m1}, @samp{emag}, @samp{falkor}, @samp{neoverse-512tvb}, @samp{neoverse-e1}, @samp{neoverse-n1}, -@samp{neoverse-n2}, @samp{neoverse-v1},@samp{neoverse-v2}, @samp{qdf24xx}, -@samp{saphira}, @samp{phecda}, @sa
[gcc r15-1688] libstdc++: Enable more debug assertions during constant evaluation [PR111250]
https://gcc.gnu.org/g:cfc9fa3bdddc1af59b7854937b99516067fd8c63 commit r15-1688-gcfc9fa3bdddc1af59b7854937b99516067fd8c63 Author: Jonathan Wakely Date: Tue Jun 18 20:57:13 2024 +0100 libstdc++: Enable more debug assertions during constant evaluation [PR111250] Some of our debug assertions expand to nothing unless _GLIBCXX_ASSERTIONS is defined, which means they are not checked during constant evaluation. By making them unconditionally expand to a __glibcxx_assert expression they will be checked during constant evaluation. This allows us to diagnose more instances of undefined behaviour at compile-time, such as accessing a vector past-the-end. libstdc++-v3/ChangeLog: PR libstdc++/111250 * include/debug/assertions.h (__glibcxx_requires_non_empty_range) (__glibcxx_requires_nonempty, __glibcxx_requires_subscript): Define to __glibcxx_assert expressions or to debug mode __glibcxx_check_xxx expressions. * testsuite/23_containers/array/element_access/constexpr_c++17.cc: Add checks for out-of-bounds accesses in constant expressions. * testsuite/23_containers/vector/element_access/constexpr.cc: Likewise. Diff: --- libstdc++-v3/include/debug/assertions.h| 14 --- .../array/element_access/constexpr_c++17.cc| 44 ++ .../vector/element_access/constexpr.cc | 24 ++-- 3 files changed, 72 insertions(+), 10 deletions(-) diff --git a/libstdc++-v3/include/debug/assertions.h b/libstdc++-v3/include/debug/assertions.h index fff1ae8def0..20441e33897 100644 --- a/libstdc++-v3/include/debug/assertions.h +++ b/libstdc++-v3/include/debug/assertions.h @@ -31,12 +31,7 @@ #include -#ifndef _GLIBCXX_ASSERTIONS -# define __glibcxx_requires_non_empty_range(_First,_Last) -# define __glibcxx_requires_nonempty() -# define __glibcxx_requires_subscript(_N) -#else - +#ifndef _GLIBCXX_DEBUG // Verify that [_First, _Last) forms a non-empty iterator range. # define __glibcxx_requires_non_empty_range(_First,_Last) \ __glibcxx_assert(_First != _Last) @@ -45,6 +40,13 @@ // Verify that the container is nonempty # define __glibcxx_requires_nonempty() \ __glibcxx_assert(!this->empty()) +#else // Use the more verbose Debug Mode checks. +# define __glibcxx_requires_non_empty_range(_First,_Last) \ + __glibcxx_check_non_empty_range(_First,_Last) +# define __glibcxx_requires_nonempty() \ + __glibcxx_check_nonempty() +# define __glibcxx_requires_subscript(_N) \ + __glibcxx_check_subscript(_N) #endif #if defined _GLIBCXX_DEBUG && _GLIBCXX_HOSTED diff --git a/libstdc++-v3/testsuite/23_containers/array/element_access/constexpr_c++17.cc b/libstdc++-v3/testsuite/23_containers/array/element_access/constexpr_c++17.cc index a14ad487b42..19ab1cc1f8e 100644 --- a/libstdc++-v3/testsuite/23_containers/array/element_access/constexpr_c++17.cc +++ b/libstdc++-v3/testsuite/23_containers/array/element_access/constexpr_c++17.cc @@ -66,3 +66,47 @@ constexpr bool test_zero() } static_assert( test_zero() ); + +#ifdef __cpp_concepts +template + constexpr std::false_type + access_empty() { return {}; } + +template + requires (std::bool_constant<&std::array{}.at(0) != nullptr>::value) + constexpr std::true_type + access_empty() { return {}; } + +template + requires (std::bool_constant<&std::array{}[0] != nullptr>::value) + constexpr std::true_type + access_empty() { return {}; } + +template + requires (std::bool_constant<&std::array{}.front() != nullptr>::value) + constexpr std::true_type + access_empty() { return {}; } + +template + requires (std::bool_constant<&std::array{}.back() != nullptr>::value) + constexpr std::true_type + access_empty() { return {}; } + +static_assert( ! access_empty() ); + +template + constexpr std::false_type + access_past_the_end() { return {}; } + +template + requires (std::bool_constant{}.at(0) != nullptr>::value) + constexpr std::true_type + access_past_the_end() { return {}; } + +template + requires (std::bool_constant<&std::array{}[1] != nullptr>::value) + constexpr std::true_type + access_past_the_end() { return {}; } + +static_assert( ! access_past_the_end() ); +#endif diff --git a/libstdc++-v3/testsuite/23_containers/vector/element_access/constexpr.cc b/libstdc++-v3/testsuite/23_containers/vector/element_access/constexpr.cc index 19c91d28cd6..358ded47ad9 100644 --- a/libstdc++-v3/testsuite/23_containers/vector/element_access/constexpr.cc +++ b/libstdc++-v3/testsuite/23_containers/vector/element_access/constexpr.cc @@ -85,23 +85,39 @@ template access_empty() { return {}; } template - requires (std::bool_constant<(std::vector().at(0), true)>::value) + requires (std::bool_constant<&std::vector().at(0) != nullptr>::value) constexpr std::true_type access_empty() { return {}; } template - requires (std::bool_constant<(std::vector()[0], true)>::
[gcc r15-1689] libstdc++: Add debug assertions to std::vector [PR103191]
https://gcc.gnu.org/g:8fd84bc009b3073666a24047c78a04c19eeab752 commit r15-1689-g8fd84bc009b3073666a24047c78a04c19eeab752 Author: Jonathan Wakely Date: Tue Jun 18 10:57:45 2024 +0100 libstdc++: Add debug assertions to std::vector [PR103191] This adds debug assertions for std::vector element access. libstdc++-v3/ChangeLog: PR libstdc++/103191 * include/bits/stl_bvector.h (vector::operator[]) (vector::front, vector::back): Add debug assertions. * testsuite/23_containers/vector/bool/element_access/constexpr.cc: Remove dg-error that no longer triggers. Diff: --- libstdc++-v3/include/bits/stl_bvector.h| 30 +- .../vector/bool/element_access/constexpr.cc| 2 +- 2 files changed, 25 insertions(+), 7 deletions(-) diff --git a/libstdc++-v3/include/bits/stl_bvector.h b/libstdc++-v3/include/bits/stl_bvector.h index 52153cadf8f..8685cc64cc4 100644 --- a/libstdc++-v3/include/bits/stl_bvector.h +++ b/libstdc++-v3/include/bits/stl_bvector.h @@ -1084,12 +1084,18 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER _GLIBCXX_NODISCARD _GLIBCXX20_CONSTEXPR reference operator[](size_type __n) - { return begin()[__n]; } + { + __glibcxx_requires_subscript(__n); + return begin()[__n]; + } _GLIBCXX_NODISCARD _GLIBCXX20_CONSTEXPR const_reference operator[](size_type __n) const - { return begin()[__n]; } + { + __glibcxx_requires_subscript(__n); + return begin()[__n]; + } protected: _GLIBCXX20_CONSTEXPR @@ -1133,22 +1139,34 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER _GLIBCXX_NODISCARD _GLIBCXX20_CONSTEXPR reference front() - { return *begin(); } + { + __glibcxx_requires_nonempty(); + return *begin(); + } _GLIBCXX_NODISCARD _GLIBCXX20_CONSTEXPR const_reference front() const - { return *begin(); } + { + __glibcxx_requires_nonempty(); + return *begin(); + } _GLIBCXX_NODISCARD _GLIBCXX20_CONSTEXPR reference back() - { return *(end() - 1); } + { + __glibcxx_requires_nonempty(); + return *(end() - 1); + } _GLIBCXX_NODISCARD _GLIBCXX20_CONSTEXPR const_reference back() const - { return *(end() - 1); } + { + __glibcxx_requires_nonempty(); + return *(end() - 1); + } _GLIBCXX20_CONSTEXPR void diff --git a/libstdc++-v3/testsuite/23_containers/vector/bool/element_access/constexpr.cc b/libstdc++-v3/testsuite/23_containers/vector/bool/element_access/constexpr.cc index bff9f7b4e0f..7c60e5db4d1 100644 --- a/libstdc++-v3/testsuite/23_containers/vector/bool/element_access/constexpr.cc +++ b/libstdc++-v3/testsuite/23_containers/vector/bool/element_access/constexpr.cc @@ -115,4 +115,4 @@ template constexpr std::true_type access_empty_front() { return {}; } -static_assert( ! access_empty_front() ); // { dg-error "ambiguous" "PR 103191" { target { ! debug_mode } } } +static_assert( ! access_empty_front() );
[gcc r15-1690] libstdc++: Fix std::format for chrono::duration with unsigned rep [PR115668]
https://gcc.gnu.org/g:dafa750c8a6f0a088677871bfaad054881737ab1 commit r15-1690-gdafa750c8a6f0a088677871bfaad054881737ab1 Author: Jonathan Wakely Date: Wed Jun 26 20:22:54 2024 +0100 libstdc++: Fix std::format for chrono::duration with unsigned rep [PR115668] Using std::chrono::abs is only valid if numeric_limits::is_signed is true, so using it unconditionally made it ill-formed to format a duration with an unsigned rep. The duration formatter might as negate the duration itself instead of using chrono::abs, because it already needs to check for a negative value. libstdc++-v3/ChangeLog: PR libstdc++/115668 * include/bits/chrono_io.h (formatter::format): Do not use chrono::abs. * testsuite/20_util/duration/io.cc: Check formatting a duration with unsigned rep. Diff: --- libstdc++-v3/include/bits/chrono_io.h | 5 - libstdc++-v3/testsuite/20_util/duration/io.cc | 6 ++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/libstdc++-v3/include/bits/chrono_io.h b/libstdc++-v3/include/bits/chrono_io.h index 3b34992b42a..72c66a0fef0 100644 --- a/libstdc++-v3/include/bits/chrono_io.h +++ b/libstdc++-v3/include/bits/chrono_io.h @@ -1607,7 +1607,10 @@ namespace __format format(const chrono::duration<_Rep, _Period>& __d, basic_format_context<_Out, _CharT>& __fc) const { - return _M_f._M_format(chrono::abs(__d), __fc, __d < __d.zero()); + if constexpr (numeric_limits<_Rep>::is_signed) + if (__d < __d.zero()) + return _M_f._M_format(-__d, __fc, true); + return _M_f._M_format(__d, __fc, false); } private: diff --git a/libstdc++-v3/testsuite/20_util/duration/io.cc b/libstdc++-v3/testsuite/20_util/duration/io.cc index 2f940ef86b7..6b00689672c 100644 --- a/libstdc++-v3/testsuite/20_util/duration/io.cc +++ b/libstdc++-v3/testsuite/20_util/duration/io.cc @@ -100,6 +100,12 @@ test_format() std::chrono::duration d{0.5}; s = std::format("{}", d); VERIFY( s == "0.5ms" ); + + std::chrono::duration u{500}; // PR libstdc++/115668 + s = std::format("{}", u); + VERIFY( s == "500ms" ); + s = std::format("{:%Q %q}", u); + VERIFY( s == "500 ms" ); } void
[gcc r15-1691] Avoid global bitmap space in ranger.
https://gcc.gnu.org/g:bcdbb85f26aa0d25645d51ddf728a049b201c980 commit r15-1691-gbcdbb85f26aa0d25645d51ddf728a049b201c980 Author: Aldy Hernandez Date: Wed Jun 19 11:42:16 2024 +0200 Avoid global bitmap space in ranger. gcc/ChangeLog: * gimple-range-cache.cc (update_list::update_list): Add m_bitmaps. (update_list::~update_list): Initialize m_bitmaps. * gimple-range-cache.h (ssa_lazy_cache): Add m_bitmaps. * gimple-range.cc (enable_ranger): Remove global bitmap initialization. (disable_ranger): Remove global bitmap release. Diff: --- gcc/gimple-range-cache.cc | 6 -- gcc/gimple-range-cache.h | 9 +++-- gcc/gimple-range.cc | 4 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/gcc/gimple-range-cache.cc b/gcc/gimple-range-cache.cc index d84fd1ca0e8..6979a14cbaa 100644 --- a/gcc/gimple-range-cache.cc +++ b/gcc/gimple-range-cache.cc @@ -906,6 +906,7 @@ private: vec m_update_list; int m_update_head; bitmap m_propfail; + bitmap_obstack m_bitmaps; }; // Create an update list. @@ -915,7 +916,8 @@ update_list::update_list () m_update_list.create (0); m_update_list.safe_grow_cleared (last_basic_block_for_fn (cfun) + 64); m_update_head = -1; - m_propfail = BITMAP_ALLOC (NULL); + bitmap_obstack_initialize (&m_bitmaps); + m_propfail = BITMAP_ALLOC (&m_bitmaps); } // Destroy an update list. @@ -923,7 +925,7 @@ update_list::update_list () update_list::~update_list () { m_update_list.release (); - BITMAP_FREE (m_propfail); + bitmap_obstack_release (&m_bitmaps); } // Add BB to the list of blocks to update, unless it's already in the list. diff --git a/gcc/gimple-range-cache.h b/gcc/gimple-range-cache.h index 63410d5437e..0ea34d3f686 100644 --- a/gcc/gimple-range-cache.h +++ b/gcc/gimple-range-cache.h @@ -78,8 +78,12 @@ protected: class ssa_lazy_cache : public ssa_cache { public: - inline ssa_lazy_cache () { active_p = BITMAP_ALLOC (NULL); } - inline ~ssa_lazy_cache () { BITMAP_FREE (active_p); } + inline ssa_lazy_cache () + { +bitmap_obstack_initialize (&m_bitmaps); +active_p = BITMAP_ALLOC (&m_bitmaps); + } + inline ~ssa_lazy_cache () { bitmap_obstack_release (&m_bitmaps); } inline bool empty_p () const { return bitmap_empty_p (active_p); } virtual bool has_range (tree name) const; virtual bool set_range (tree name, const vrange &r); @@ -89,6 +93,7 @@ public: virtual void clear (); void merge (const ssa_lazy_cache &); protected: + bitmap_obstack m_bitmaps; bitmap active_p; }; diff --git a/gcc/gimple-range.cc b/gcc/gimple-range.cc index 50448ef81a2..5df649e268c 100644 --- a/gcc/gimple-range.cc +++ b/gcc/gimple-range.cc @@ -681,8 +681,6 @@ enable_ranger (struct function *fun, bool use_imm_uses) { gimple_ranger *r; - bitmap_obstack_initialize (NULL); - gcc_checking_assert (!fun->x_range_query); r = new gimple_ranger (use_imm_uses); fun->x_range_query = r; @@ -699,8 +697,6 @@ disable_ranger (struct function *fun) gcc_checking_assert (fun->x_range_query); delete fun->x_range_query; fun->x_range_query = NULL; - - bitmap_obstack_release (NULL); } //
[gcc r15-1692] [libstdc++] [testsuite] defer to check_vect_support* [PR115454]
https://gcc.gnu.org/g:95faa1bea7bdc7f92fcccb3543bfcbc8184c5e5b commit r15-1692-g95faa1bea7bdc7f92fcccb3543bfcbc8184c5e5b Author: Alexandre Oliva Date: Thu Jun 27 07:22:48 2024 -0300 [libstdc++] [testsuite] defer to check_vect_support* [PR115454] The newly-added testcase overrides the default dg-do action set by check_vect_support_and_set_flags (in libstdc++-dg/conformance.exp), so it attempts to run the test even if runtime vector support is not available. Remove the explicit dg-do directive, so that the default is honored, and the test is run if vector support is found, and only compiled otherwise. for libstdc++-v3/ChangeLog PR libstdc++/115454 * testsuite/experimental/simd/pr115454_find_last_set.cc: Defer to check_vect_support_and_set_flags's default dg-do action. Diff: --- libstdc++-v3/testsuite/experimental/simd/pr115454_find_last_set.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/libstdc++-v3/testsuite/experimental/simd/pr115454_find_last_set.cc b/libstdc++-v3/testsuite/experimental/simd/pr115454_find_last_set.cc index 25a713b4e94..4ade8601f27 100644 --- a/libstdc++-v3/testsuite/experimental/simd/pr115454_find_last_set.cc +++ b/libstdc++-v3/testsuite/experimental/simd/pr115454_find_last_set.cc @@ -1,5 +1,4 @@ // { dg-options "-std=gnu++17" } -// { dg-do run { target *-*-* } } // { dg-require-effective-target c++17 } // { dg-additional-options "-march=x86-64-v4" { target avx512f_runtime } } // { dg-require-cmath "" }
[gcc r15-1693] libstdc++: Fix std::codecvt for empty dest [PR37475]
https://gcc.gnu.org/g:73ad57c244c283bf6da0c16630212f11b945eda5 commit r15-1693-g73ad57c244c283bf6da0c16630212f11b945eda5 Author: Jonathan Wakely Date: Tue Jun 11 16:45:43 2024 +0100 libstdc++: Fix std::codecvt for empty dest [PR37475] For the GNU locale model, codecvt::do_out and codecvt::do_in incorrectly return 'ok' when the destination range is empty. That happens because detecting incomplete output is done in the loop body, and the loop is never even entered if to == to_end. By restructuring the loop condition so that we check the output range separately, we can ensure that for a non-empty source range, we always enter the loop at least once, and detect if the destination range is too small. The loops also seem easier to reason about if we return immediately on any error, instead of checking the result twice on every iteration. We can use an RAII type to restore the locale before returning, which also simplifies all the other member functions. libstdc++-v3/ChangeLog: PR libstdc++/37475 * config/locale/gnu/codecvt_members.cc (Guard): New RAII type. (do_out, do_in): Return partial if the destination is empty but the source is not. Use Guard to restore locale on scope exit. Return immediately on any conversion error. (do_encoding, do_max_length, do_length): Use Guard. * testsuite/22_locale/codecvt/in/char/37475.cc: New test. * testsuite/22_locale/codecvt/in/wchar_t/37475.cc: New test. * testsuite/22_locale/codecvt/out/char/37475.cc: New test. * testsuite/22_locale/codecvt/out/wchar_t/37475.cc: New test. Diff: --- libstdc++-v3/config/locale/gnu/codecvt_members.cc | 117 + .../testsuite/22_locale/codecvt/in/char/37475.cc | 23 .../22_locale/codecvt/in/wchar_t/37475.cc | 23 .../testsuite/22_locale/codecvt/out/char/37475.cc | 23 .../22_locale/codecvt/out/wchar_t/37475.cc | 23 5 files changed, 142 insertions(+), 67 deletions(-) diff --git a/libstdc++-v3/config/locale/gnu/codecvt_members.cc b/libstdc++-v3/config/locale/gnu/codecvt_members.cc index 034713d236e..794f25a5f35 100644 --- a/libstdc++-v3/config/locale/gnu/codecvt_members.cc +++ b/libstdc++-v3/config/locale/gnu/codecvt_members.cc @@ -37,8 +37,23 @@ namespace std _GLIBCXX_VISIBILITY(default) { _GLIBCXX_BEGIN_NAMESPACE_VERSION - // Specializations. #ifdef _GLIBCXX_USE_WCHAR_T +namespace +{ + // RAII type for changing and restoring the current thread's locale. + struct Guard + { +#if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2) +explicit Guard(__c_locale loc) : old(__uselocale(loc)) { } +~Guard() { __uselocale(old); } +#else +explicit Guard(__c_locale) { } +#endif +__c_locale old; + }; +} + + // Specializations. codecvt_base::result codecvt:: do_out(state_type& __state, const intern_type* __from, @@ -46,22 +61,21 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION extern_type* __to, extern_type* __to_end, extern_type*& __to_next) const { -result __ret = ok; state_type __tmp_state(__state); - -#if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2) -__c_locale __old = __uselocale(_M_c_locale_codecvt); -#endif +Guard g(_M_c_locale_codecvt); // wcsnrtombs is *very* fast but stops if encounters NUL characters: // in case we fall back to wcrtomb and then continue, in a loop. // NB: wcsnrtombs is a GNU extension -for (__from_next = __from, __to_next = __to; -__from_next < __from_end && __to_next < __to_end -&& __ret == ok;) +__from_next = __from; +__to_next = __to; +while (__from_next < __from_end) { - const intern_type* __from_chunk_end = wmemchr(__from_next, L'\0', - __from_end - __from_next); + if (__to_next >= __to_end) + return partial; + + const intern_type* __from_chunk_end + = wmemchr(__from_next, L'\0', __from_end - __from_next); if (!__from_chunk_end) __from_chunk_end = __from_end; @@ -77,12 +91,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION for (; __from < __from_next; ++__from) __to_next += wcrtomb(__to_next, *__from, &__tmp_state); __state = __tmp_state; - __ret = error; + return error; } else if (__from_next && __from_next < __from_chunk_end) { __to_next += __conv; - __ret = partial; + return partial; } else { @@ -90,13 +104,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION __to_next += __conv; } - if (__from_next < __from_end && __ret == ok) + if (__from_next < __from_end) { extern_type __buf[MB_LEN_MAX]; __tmp_state = __state; const s
[gcc r14-10352] [libstdc++] [testsuite] defer to check_vect_support* [PR115454]
https://gcc.gnu.org/g:b70af0bd2e33e9cc20dae45c131429a402fc8845 commit r14-10352-gb70af0bd2e33e9cc20dae45c131429a402fc8845 Author: Alexandre Oliva Date: Thu Jun 27 08:14:34 2024 -0300 [libstdc++] [testsuite] defer to check_vect_support* [PR115454] The newly-added testcase overrides the default dg-do action set by check_vect_support_and_set_flags (in libstdc++-dg/conformance.exp), so it attempts to run the test even if runtime vector support is not available. Remove the explicit dg-do directive, so that the default is honored, and the test is run if vector support is found, and only compiled otherwise. for libstdc++-v3/ChangeLog PR libstdc++/115454 * testsuite/experimental/simd/pr115454_find_last_set.cc: Defer to check_vect_support_and_set_flags's default dg-do action. (cherry picked from commit 95faa1bea7bdc7f92fcccb3543bfcbc8184c5e5b) Diff: --- libstdc++-v3/testsuite/experimental/simd/pr115454_find_last_set.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/libstdc++-v3/testsuite/experimental/simd/pr115454_find_last_set.cc b/libstdc++-v3/testsuite/experimental/simd/pr115454_find_last_set.cc index 25a713b4e94..4ade8601f27 100644 --- a/libstdc++-v3/testsuite/experimental/simd/pr115454_find_last_set.cc +++ b/libstdc++-v3/testsuite/experimental/simd/pr115454_find_last_set.cc @@ -1,5 +1,4 @@ // { dg-options "-std=gnu++17" } -// { dg-do run { target *-*-* } } // { dg-require-effective-target c++17 } // { dg-additional-options "-march=x86-64-v4" { target avx512f_runtime } } // { dg-require-cmath "" }
[gcc r13-8872] [libstdc++] [testsuite] defer to check_vect_support* [PR115454]
https://gcc.gnu.org/g:3de1c4985bebd1882b6643789daba24f2d11bafe commit r13-8872-g3de1c4985bebd1882b6643789daba24f2d11bafe Author: Alexandre Oliva Date: Thu Jun 27 08:32:15 2024 -0300 [libstdc++] [testsuite] defer to check_vect_support* [PR115454] The newly-added testcase overrides the default dg-do action set by check_vect_support_and_set_flags (in libstdc++-dg/conformance.exp), so it attempts to run the test even if runtime vector support is not available. Remove the explicit dg-do directive, so that the default is honored, and the test is run if vector support is found, and only compiled otherwise. for libstdc++-v3/ChangeLog PR libstdc++/115454 * testsuite/experimental/simd/pr115454_find_last_set.cc: Defer to check_vect_support_and_set_flags's default dg-do action. (cherry picked from commit 95faa1bea7bdc7f92fcccb3543bfcbc8184c5e5b) Diff: --- libstdc++-v3/testsuite/experimental/simd/pr115454_find_last_set.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/libstdc++-v3/testsuite/experimental/simd/pr115454_find_last_set.cc b/libstdc++-v3/testsuite/experimental/simd/pr115454_find_last_set.cc index 25a713b4e94..4ade8601f27 100644 --- a/libstdc++-v3/testsuite/experimental/simd/pr115454_find_last_set.cc +++ b/libstdc++-v3/testsuite/experimental/simd/pr115454_find_last_set.cc @@ -1,5 +1,4 @@ // { dg-options "-std=gnu++17" } -// { dg-do run { target *-*-* } } // { dg-require-effective-target c++17 } // { dg-additional-options "-march=x86-64-v4" { target avx512f_runtime } } // { dg-require-cmath "" }
[gcc r12-10585] [libstdc++] [testsuite] defer to check_vect_support* [PR115454]
https://gcc.gnu.org/g:95ca5f458251e21123e45ec52c38d629d39cd0e4 commit r12-10585-g95ca5f458251e21123e45ec52c38d629d39cd0e4 Author: Alexandre Oliva Date: Thu Jun 27 08:44:54 2024 -0300 [libstdc++] [testsuite] defer to check_vect_support* [PR115454] The newly-added testcase overrides the default dg-do action set by check_vect_support_and_set_flags (in libstdc++-dg/conformance.exp), so it attempts to run the test even if runtime vector support is not available. Remove the explicit dg-do directive, so that the default is honored, and the test is run if vector support is found, and only compiled otherwise. for libstdc++-v3/ChangeLog PR libstdc++/115454 * testsuite/experimental/simd/pr115454_find_last_set.cc: Defer to check_vect_support_and_set_flags's default dg-do action. (cherry picked from commit 95faa1bea7bdc7f92fcccb3543bfcbc8184c5e5b) Diff: --- libstdc++-v3/testsuite/experimental/simd/pr115454_find_last_set.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/libstdc++-v3/testsuite/experimental/simd/pr115454_find_last_set.cc b/libstdc++-v3/testsuite/experimental/simd/pr115454_find_last_set.cc index 25a713b4e94..4ade8601f27 100644 --- a/libstdc++-v3/testsuite/experimental/simd/pr115454_find_last_set.cc +++ b/libstdc++-v3/testsuite/experimental/simd/pr115454_find_last_set.cc @@ -1,5 +1,4 @@ // { dg-options "-std=gnu++17" } -// { dg-do run { target *-*-* } } // { dg-require-effective-target c++17 } // { dg-additional-options "-march=x86-64-v4" { target avx512f_runtime } } // { dg-require-cmath "" }
[gcc r15-1694] tree-optimization/115669 - fix SLP reduction association
https://gcc.gnu.org/g:7886830bb45c4f5dca0496d4deae9a45204d78f5 commit r15-1694-g7886830bb45c4f5dca0496d4deae9a45204d78f5 Author: Richard Biener Date: Thu Jun 27 11:26:08 2024 +0200 tree-optimization/115669 - fix SLP reduction association The following avoids associating a reduction path as that might get STMT_VINFO_REDUC_IDX out-of-sync with the SLP operand order. This is a latent issue with SLP reductions but now easily exposed as we're doing single-lane SLP reductions. When we achieved SLP only we can move and update this meta-data. PR tree-optimization/115669 * tree-vect-slp.cc (vect_build_slp_tree_2): Do not reassociate chains that participate in a reduction. * gcc.dg/vect/pr115669.c: New testcase. Diff: --- gcc/testsuite/gcc.dg/vect/pr115669.c | 22 ++ gcc/tree-vect-slp.cc | 3 +++ 2 files changed, 25 insertions(+) diff --git a/gcc/testsuite/gcc.dg/vect/pr115669.c b/gcc/testsuite/gcc.dg/vect/pr115669.c new file mode 100644 index 000..361a17a64e6 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr115669.c @@ -0,0 +1,22 @@ +/* { dg-additional-options "-fwrapv" } */ + +#include "tree-vect.h" + +int a = 10; +unsigned b; +long long c[100]; +int foo() +{ + long long *d = c; + for (short e = 0; e < a; e++) +b += ~(d ? d[e] : 0); + return b; +} + +int main() +{ + check_vect (); + if (foo () != -10) +abort (); + return 0; +} diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc index 1252b613125..174b4800fa9 100644 --- a/gcc/tree-vect-slp.cc +++ b/gcc/tree-vect-slp.cc @@ -2069,6 +2069,9 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node, else if (is_a (vinfo) /* ??? We don't handle !vect_internal_def defs below. */ && STMT_VINFO_DEF_TYPE (stmt_info) == vect_internal_def + /* ??? Do not associate a reduction, this will wreck REDUC_IDX + mapping as long as that exists on the stmt_info level. */ + && STMT_VINFO_REDUC_IDX (stmt_info) == -1 && is_gimple_assign (stmt_info->stmt) && (associative_tree_code (gimple_assign_rhs_code (stmt_info->stmt)) || gimple_assign_rhs_code (stmt_info->stmt) == MINUS_EXPR)
[gcc r11-11541] coroutines: Await expressions are not allowed in handlers [PR 99710].
https://gcc.gnu.org/g:57482cadeb12af2dd52b381b0766776d1e8ec59b commit r11-11541-g57482cadeb12af2dd52b381b0766776d1e8ec59b Author: Iain Sandoe Date: Sat Oct 2 14:43:39 2021 +0100 coroutines: Await expressions are not allowed in handlers [PR 99710]. C++20 [expr.await] / 2 An await-expression shall appear only in a potentially-evaluated expression within the compound-statement of a function-body outside of a handler. Signed-off-by: Iain Sandoe PR c++/99710 gcc/cp/ChangeLog: * coroutines.cc (await_statement_walker): Report an error if an await expression is found in a handler body. gcc/testsuite/ChangeLog: * g++.dg/coroutines/pr99710.C: New test. (cherry picked from commit 650beb110538097b9c3e8600149b333a83e7e836) Diff: --- gcc/cp/coroutines.cc | 17 - gcc/testsuite/g++.dg/coroutines/pr99710.C | 25 + 2 files changed, 41 insertions(+), 1 deletion(-) diff --git a/gcc/cp/coroutines.cc b/gcc/cp/coroutines.cc index 34d9d3e7d61..71246e99a6f 100644 --- a/gcc/cp/coroutines.cc +++ b/gcc/cp/coroutines.cc @@ -3713,7 +3713,22 @@ await_statement_walker (tree *stmt, int *do_subtree, void *d) } return NULL_TREE; /* Done. */ } - break; + break; + case HANDLER: + { + /* [expr.await] An await-expression shall appear only in a + potentially-evaluated expression within the compound-statement + of a function-body outside of a handler. */ + tree *await_ptr; + hash_set visited; + if (!(cp_walk_tree (&HANDLER_BODY (expr), find_any_await, + &await_ptr, &visited))) + return NULL_TREE; /* All OK. */ + location_t loc = EXPR_LOCATION (*await_ptr); + error_at (loc, "await expressions are not permitted in handlers"); + return NULL_TREE; /* This is going to fail later anyway. */ + } + break; } else if (EXPR_P (expr)) { diff --git a/gcc/testsuite/g++.dg/coroutines/pr99710.C b/gcc/testsuite/g++.dg/coroutines/pr99710.C new file mode 100644 index 000..e4f7116b8d7 --- /dev/null +++ b/gcc/testsuite/g++.dg/coroutines/pr99710.C @@ -0,0 +1,25 @@ +#include + +struct task { +struct promise_type { +std::suspend_always initial_suspend(); +std::suspend_always final_suspend() noexcept; +task get_return_object(); +void return_void(); +void unhandled_exception(); +}; +}; + +task +my_coro () +{ + try +{ } + catch (...) +{ + // [expr.await] An await-expression shall appear only in a potentially- + // evaluated expression within the compound-statement of a function-body + // outside of a handler + co_await std::suspend_always{}; // { dg-error "await expressions are not permitted in handlers" } +} +}
[gcc r11-11542] coroutines: Pass lvalues to user-defined operator new [PR 100772].
https://gcc.gnu.org/g:f647906ef227bc22af224d955a408d776cfddb04 commit r11-11542-gf647906ef227bc22af224d955a408d776cfddb04 Author: Iain Sandoe Date: Sun Oct 3 19:46:09 2021 +0100 coroutines: Pass lvalues to user-defined operator new [PR 100772]. The wording of the standard has been clarified to be explicit that the the parameters to any user-defined operator-new in the promise class should be lvalues. Signed-off-by: Iain Sandoe PR c++/100772 gcc/cp/ChangeLog: * coroutines.cc (morph_fn_to_coro): Convert function parms from reference before constructing any operator-new args list. gcc/testsuite/ChangeLog: * g++.dg/coroutines/pr100772-a.C: New test. * g++.dg/coroutines/pr100772-b.C: New test. (cherry picked from commit 921942a8a106cb53994c21162922e4934eb3a3e0) Diff: --- gcc/cp/coroutines.cc | 8 +-- gcc/testsuite/g++.dg/coroutines/pr100772-a.C | 77 +++ gcc/testsuite/g++.dg/coroutines/pr100772-b.C | 93 3 files changed, 174 insertions(+), 4 deletions(-) diff --git a/gcc/cp/coroutines.cc b/gcc/cp/coroutines.cc index 71246e99a6f..04c72ddd48b 100644 --- a/gcc/cp/coroutines.cc +++ b/gcc/cp/coroutines.cc @@ -4613,8 +4613,8 @@ morph_fn_to_coro (tree orig, tree *resumer, tree *destroyer) If the lookup finds an allocation function in the scope of the promise type, overload resolution is performed on a function call created by assembling an argument list. The first argument is the amount of space - requested, and has type std::size_t. The succeeding arguments are - those of the original function. */ + requested, and has type std::size_t. The lvalues p1...pn are the + succeeding arguments.. */ vec *args = make_tree_vector (); vec_safe_push (args, resizeable); /* Space needed. */ @@ -4632,10 +4632,10 @@ morph_fn_to_coro (tree orig, tree *resumer, tree *destroyer) this_ref = convert_to_reference (tt, this_ref, CONV_STATIC, LOOKUP_NORMAL , NULL_TREE, tf_warning_or_error); - vec_safe_push (args, this_ref); + vec_safe_push (args, convert_from_reference (this_ref)); } else - vec_safe_push (args, arg); + vec_safe_push (args, convert_from_reference (arg)); } /* Note the function selected; we test to see if it's NOTHROW. */ diff --git a/gcc/testsuite/g++.dg/coroutines/pr100772-a.C b/gcc/testsuite/g++.dg/coroutines/pr100772-a.C new file mode 100644 index 000..a325d384fc3 --- /dev/null +++ b/gcc/testsuite/g++.dg/coroutines/pr100772-a.C @@ -0,0 +1,77 @@ +// { dg-additional-options "-fsyntax-only " } +#ifdef __clang__ +#include +namespace std { + using namespace std::experimental; +} +#else +#include +#endif + +struct Task +{ +struct promise_type +{ + void return_void() const noexcept {} + + void* operator new(std::size_t, auto &&...args) noexcept + { +static_assert(sizeof...(args) > 0); +static_assert(sizeof...(args) == 2); + + return nullptr; + } + + void operator delete(void *, std::size_t) noexcept + { + } + +static Task get_return_object_on_allocation_failure() noexcept +{ +return {}; +} + +Task get_return_object() noexcept +{ +return Task{ *this }; +} + +std::suspend_always initial_suspend() noexcept +{ +return {}; +} + +std::suspend_always final_suspend() noexcept +{ +return {}; +} + +void unhandled_exception() noexcept {} +}; + +using promise_handle = std::coroutine_handle; + +Task() = default; +Task(promise_type & promise) noexcept +: m_handle{ promise_handle::from_promise(promise) } +{} + +~Task() +{ +if (m_handle.address()) { m_handle.destroy(); } +} + +promise_handle m_handle{}; +}; + + +Task Foo(auto && ... args) noexcept +{ +co_return; +} + +int main() +{ +int v; +Foo(v, 2134); +} diff --git a/gcc/testsuite/g++.dg/coroutines/pr100772-b.C b/gcc/testsuite/g++.dg/coroutines/pr100772-b.C new file mode 100644 index 000..6cdf8d1e529 --- /dev/null +++ b/gcc/testsuite/g++.dg/coroutines/pr100772-b.C @@ -0,0 +1,93 @@ +#ifdef __clang__ +#include +namespace std { + using namespace std::experimental; +} +#else +#include +#endif +#include +#include +#include // needed for abi::__cxa_demangle +#include + +std::shared_ptr cppDemangle(const char *abiName) +{ + int status; + char *ret = abi::__cxa_demangle(abiName, 0, 0, &status); + + /* NOTE:
[gcc r11-11543] coroutines: Fail with a sorry when presented with a VLA [PR 101765].
https://gcc.gnu.org/g:1d5779274ce9807358f9e04f1112b65c6ed6c284 commit r11-11543-g1d5779274ce9807358f9e04f1112b65c6ed6c284 Author: Iain Sandoe Date: Sat Oct 2 16:15:38 2021 +0100 coroutines: Fail with a sorry when presented with a VLA [PR 101765]. We do not support this yet. Signed-off-by: Iain Sandoe PR c++/101765 gcc/cp/ChangeLog: * coroutines.cc (register_local_var_uses): Emit a sorry if we encounter a VLA in the coroutine local variables. gcc/testsuite/ChangeLog: * g++.dg/coroutines/pr101765.C: New test. (cherry picked from commit fdf0b6ce6c1cfa1c328c0c40473c71ca11fd8303) Diff: --- gcc/cp/coroutines.cc | 10 +++ gcc/testsuite/g++.dg/coroutines/pr101765.C | 45 ++ 2 files changed, 55 insertions(+) diff --git a/gcc/cp/coroutines.cc b/gcc/cp/coroutines.cc index 04c72ddd48b..406c85c4176 100644 --- a/gcc/cp/coroutines.cc +++ b/gcc/cp/coroutines.cc @@ -3941,6 +3941,16 @@ register_local_var_uses (tree *stmt, int *do_subtree, void *d) if (local_var.is_static) continue; + poly_uint64 size; + if (TREE_CODE (lvtype) == ARRAY_TYPE + && !poly_int_tree_p (DECL_SIZE_UNIT (lvar), &size)) + { + sorry_at (local_var.def_loc, "variable length arrays are not" + " yet supported in coroutines"); + /* Ignore it, this is broken anyway. */ + continue; + } + lvd->local_var_seen = true; /* If this var is a lambda capture proxy, we want to leave it alone, and later rewrite the DECL_VALUE_EXPR to indirect through the diff --git a/gcc/testsuite/g++.dg/coroutines/pr101765.C b/gcc/testsuite/g++.dg/coroutines/pr101765.C new file mode 100644 index 000..49a49d11299 --- /dev/null +++ b/gcc/testsuite/g++.dg/coroutines/pr101765.C @@ -0,0 +1,45 @@ +// We cannot compile this yet, much run it - but one day it might be +// feasible, so do the minimum for now. +// { dg-additional-options " -fsyntax-only -Wno-vla" } + +#include "coro.h" + +// boiler-plate for tests of codegen +#include "coro1-ret-int-yield-int.h" + +struct coro1 +foo (int arg) noexcept +{ + PRINTF ("foo arg = %d\n", arg); + char arr[arg]; /* { dg-message "sorry, unimplemented: variable length arrays are not yet supported in coroutines" "" { target *-*-* } } */ + if (arg < 4) +co_return -6174; + else +for (int i = 0; i < arg; ++i) arr[i] = (char) i; + co_yield (int) arr[2]; + co_return (int) arr[3]; +} + +int main () +{ + PRINT ("main: create coro1"); + struct coro1 x = foo (10); + PRINT ("main: got coro1 - resuming"); + if (x.handle.done()) +abort(); + x.handle.resume(); + PRINT ("main: after resume"); + int y = x.handle.promise().get_value(); + if ( y == -6174 ) +{ + PRINT ("main: saw -6174"); + return 1; +} + else if ( y != 2 ) +abort; + x.handle.resume(); + y = x.handle.promise().get_value(); + if ( y != 3 ) +abort (); + return 0; +}
[gcc r11-11544] c++, coroutines: Improve check for throwing final await [PR104051].
https://gcc.gnu.org/g:f4cdbf1f757fa9525d70780546d7daa43dfb129f commit r11-11544-gf4cdbf1f757fa9525d70780546d7daa43dfb129f Author: Iain Sandoe Date: Mon Apr 18 16:23:30 2022 +0100 c++, coroutines: Improve check for throwing final await [PR104051]. We check that the final_suspend () method returns a sane type (i.e. a class or structure) but, unfortunately, that check has to be later than the one for a throwing case. If the use returns some nonsensical type from the method, we need to handle that in the checking for noexcept. Signed-off-by: Iain Sandoe PR c++/104051 gcc/cp/ChangeLog: * coroutines.cc (coro_diagnose_throwing_final_aw_expr): Handle non-target expression inputs. gcc/testsuite/ChangeLog: * g++.dg/coroutines/pr104051.C: New test. (cherry picked from commit 7b96274a340bc0e9bcaef9baff3a44ec2f12c3df) Diff: --- gcc/cp/coroutines.cc | 13 +++-- gcc/testsuite/g++.dg/coroutines/pr104051.C | 29 + 2 files changed, 36 insertions(+), 6 deletions(-) diff --git a/gcc/cp/coroutines.cc b/gcc/cp/coroutines.cc index 406c85c4176..b12d74bf975 100644 --- a/gcc/cp/coroutines.cc +++ b/gcc/cp/coroutines.cc @@ -877,13 +877,14 @@ coro_diagnose_throwing_fn (tree fndecl) static bool coro_diagnose_throwing_final_aw_expr (tree expr) { - tree t = TARGET_EXPR_INITIAL (expr); + if (TREE_CODE (expr) == TARGET_EXPR) +expr = TARGET_EXPR_INITIAL (expr); tree fn = NULL_TREE; - if (TREE_CODE (t) == CALL_EXPR) -fn = CALL_EXPR_FN(t); - else if (TREE_CODE (t) == AGGR_INIT_EXPR) -fn = AGGR_INIT_EXPR_FN (t); - else if (TREE_CODE (t) == CONSTRUCTOR) + if (TREE_CODE (expr) == CALL_EXPR) +fn = CALL_EXPR_FN (expr); + else if (TREE_CODE (expr) == AGGR_INIT_EXPR) +fn = AGGR_INIT_EXPR_FN (expr); + else if (TREE_CODE (expr) == CONSTRUCTOR) return false; else { diff --git a/gcc/testsuite/g++.dg/coroutines/pr104051.C b/gcc/testsuite/g++.dg/coroutines/pr104051.C new file mode 100644 index 000..ce7ae55405a --- /dev/null +++ b/gcc/testsuite/g++.dg/coroutines/pr104051.C @@ -0,0 +1,29 @@ +// { dg-additional-options "-fsyntax-only" } +#include +#include +template struct promise { + struct final_awaitable { +bool await_ready() noexcept; +template +std::coroutine_handle<> +await_suspend(std::coroutine_handle) noexcept; +void await_resume() noexcept; + }; + auto get_return_object() { +return std::coroutine_handle::from_promise(*this); + } + auto initial_suspend() { return std::suspend_always(); } + auto final_suspend() noexcept { return true; } + void unhandled_exception(); +}; +template struct task { + using promise_type = promise; + task(std::coroutine_handle>); + bool await_ready(); + std::coroutine_handle<> await_suspend(std::coroutine_handle<>); + T await_resume(); +}; +task> foo() { // { dg-error {awaitable type 'bool' is not a structure} } + while ((co_await foo()).empty()) +; +}
[gcc r13-8873] aarch64: Fix +nocrypto handling
https://gcc.gnu.org/g:c93a9bba743ac236f6045ba7aafbc12a83726c48 commit r13-8873-gc93a9bba743ac236f6045ba7aafbc12a83726c48 Author: Andrew Carlotti Date: Fri Nov 24 17:06:07 2023 + aarch64: Fix +nocrypto handling Additionally, replace all checks for the AARCH64_FL_CRYPTO bit with checks for (AARCH64_FL_AES | AARCH64_FL_SHA2) instead. The value of the AARCH64_FL_CRYPTO bit within isa_flags is now ignored, but it is retained because removing it would make processing the data in option-extensions.def significantly more complex. This bug should have been picked up by an existing test, but a missing newline meant that the pattern incorrectly allowed "+crypto+nocrypto". gcc/ChangeLog: PR target/115618 * common/config/aarch64/aarch64-common.cc (aarch64_get_extension_string_for_isa_flags): Fix generation of the "+nocrypto" extension. * config/aarch64/aarch64.h (AARCH64_ISA_CRYPTO): Remove. (TARGET_CRYPTO): Remove. * config/aarch64/aarch64-c.cc (aarch64_update_cpp_builtins): Don't use TARGET_CRYPTO. gcc/testsuite/ChangeLog: PR target/115618 * gcc.target/aarch64/options_set_4.c: Add terminating newline. * gcc.target/aarch64/options_set_27.c: New test. (cherry picked from commit 8d30107455f2309854ced3d65fb07dc1f2c357c0) Diff: --- gcc/common/config/aarch64/aarch64-common.cc | 35 +-- gcc/config/aarch64/aarch64-c.cc | 2 +- gcc/config/aarch64/aarch64.h | 10 +++ gcc/testsuite/gcc.target/aarch64/options_set_27.c | 9 ++ gcc/testsuite/gcc.target/aarch64/options_set_4.c | 2 +- 5 files changed, 43 insertions(+), 15 deletions(-) diff --git a/gcc/common/config/aarch64/aarch64-common.cc b/gcc/common/config/aarch64/aarch64-common.cc index 20bc4e1291b..673407ca9a8 100644 --- a/gcc/common/config/aarch64/aarch64-common.cc +++ b/gcc/common/config/aarch64/aarch64-common.cc @@ -310,6 +310,7 @@ aarch64_get_extension_string_for_isa_flags But in order to make the output more readable, it seems better to add the strings in definition order. */ aarch64_feature_flags added = 0; + auto flags_crypto = AARCH64_FL_AES | AARCH64_FL_SHA2; for (unsigned int i = ARRAY_SIZE (all_extensions); i-- > 0; ) { auto &opt = all_extensions[i]; @@ -319,7 +320,7 @@ aarch64_get_extension_string_for_isa_flags per-feature crypto flags. */ auto flags = opt.flag_canonical; if (flags == AARCH64_FL_CRYPTO) - flags = AARCH64_FL_AES | AARCH64_FL_SHA2; + flags = flags_crypto; if ((flags & isa_flags & (explicit_flags | ~current_flags)) == flags) { @@ -338,14 +339,32 @@ aarch64_get_extension_string_for_isa_flags not have an HWCAPs then it shouldn't be taken into account for feature detection because one way or another we can't tell if it's available or not. */ + for (auto &opt : all_extensions) -if (opt.native_detect_p - && (opt.flag_canonical & current_flags & ~isa_flags)) - { - current_flags &= ~opt.flags_off; - outstr += "+no"; - outstr += opt.name; - } +{ + auto flags = opt.flag_canonical; + /* As a special case, don't emit "+noaes" or "+nosha2" when we could emit +"+nocrypto" instead, in order to support assemblers that predate the +separate per-feature crypto flags. Only allow "+nocrypto" when "sm4" +is not already enabled (to avoid dependending on whether "+nocrypto" +also disables "sm4"). */ + if (flags & flags_crypto + && (flags_crypto & current_flags & ~isa_flags) == flags_crypto + && !(current_flags & AARCH64_FL_SM4)) + continue; + + if (flags == AARCH64_FL_CRYPTO) + /* If either crypto flag needs removing here, then both do. */ + flags = flags_crypto; + + if (opt.native_detect_p + && (flags & current_flags & ~isa_flags)) + { + current_flags &= ~opt.flags_off; + outstr += "+no"; + outstr += opt.name; + } +} return outstr; } diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc index 578ec6f45b0..6c5331a7625 100644 --- a/gcc/config/aarch64/aarch64-c.cc +++ b/gcc/config/aarch64/aarch64-c.cc @@ -139,7 +139,7 @@ aarch64_update_cpp_builtins (cpp_reader *pfile) aarch64_def_or_undef (TARGET_ILP32, "_ILP32", pfile); aarch64_def_or_undef (TARGET_ILP32, "__ILP32__", pfile); - aarch64_def_or_undef (TARGET_CRYPTO, "__ARM_FEATURE_CRYPTO", pfile); + aarch64_def_or_undef (TARGET_AES && TARGET_SHA2, "__ARM_FEATURE_CRYPTO", pfile); aarch64_def_or_undef (TARGET_SIMD_RDMA, "__ARM_FEATURE_QRDMX", pfile); aarch64_def_or_undef (TARGET_SVE, "__ARM_FEATURE_SVE", pfile); cpp_undef (pfile, "__ARM_FEATURE_SVE_BITS"); diff --git a/gcc/config/aarch64/aarch
[gcc/aoliva/heads/testbase] (51 commits) libstdc++: Fix std::codecvt for e
The branch 'aoliva/heads/testbase' was updated to point to: 73ad57c244c... libstdc++: Fix std::codecvt for e It previously pointed to: 5a10ac0e592... optab: Add isnormal_optab for isnormal builtin Diff: Summary of changes (added commits): --- 73ad57c... libstdc++: Fix std::codecvt for e (*) 95faa1b... [libstdc++] [testsuite] defer to check_vect_support* [PR115 (*) bcdbb85... Avoid global bitmap space in ranger. (*) dafa750... libstdc++: Fix std::format for chrono::duration with unsign (*) 8fd84bc... libstdc++: Add debug assertions to std::vector [PR103 (*) cfc9fa3... libstdc++: Enable more debug assertions during constant eva (*) 5c8b7fc... ada: Remove last uses of System.Address_Operations in runti (*) fdbc04d... ada: Reject ambiguous function calls in interpolated string (*) d4c9907... ada: Add missing dimension information for target names (*) 432b8a3... ada: Fix array-manipulating code in Mdll (*) 089bb07... ada: Bug using user defined string literals with interpolat (*) 60ca710... ada: Overridden operation field not correctly set for contr (*) 3cb7e22... ada: Implement first half of Generalized Finalization (*) 9449524... i386: Refactor vcvttps2qq/vcvtqq2ps patterns. (*) 4385dc9... vect: support direct conversion under x86-64-v3. (*) e5f8a39... vect: Support v4hi -> v4qi. (*) c320a7e... vect: generate suitable convert insn for int -> int, float (*) b55798c... RISC-V: Add testcases for vector truncate after .SAT_SUB (*) 2280e88... LoongArch: NFC: Dedup and sort the comment in loongarch_pri (*) 94aade0... LoongArch: Tweak IOR rtx_cost for bstrins (*) b8153b5... Fix wrong cost of MEM when addr is a lea. (*) 212441e... Internal-fn: Support new IFN SAT_TRUNC for unsigned scalar (*) f2476a2... Vect: Support truncate after .SAT_SUB pattern in zip (*) c7cb0dd... tree-optimization/115652 - amend last fix (*) b7ba067... tree-optimization/115493 - complete previous fix (*) 9c56dc7... Daily bump. (*) 0731985... libstdc++: Add script to update docs for a new release bran (*) 6eff233... libstdc++: Remove duplicate test (*) e65b662... libstdc++: Increase timeouts for PSTL tests in debug mode [ (*) 003ce8a... libstdc++: Work around some PSTL test failures for debug mo (*) 0ca8d56... libstdc++: Fix std::chrono::tzdb to work with vanguard form (*) 629257b... tree-optimization/115629 - missed tail merging (*) 86a3dbe... RISC-V: Update testcase comments to point to PSABI rather t (*) aa89e86... RISC-V: Consolidate amo testcase variants (*) 08498f8... RISC-V: Rename amo testcases (*) e499aee... rs6000, change altivec*-runnable.c test file names (*) 0699de2... rs6000, altivec-2-runnable.c update the require-effective-t (*) 4bf719b... rs6000, altivec-1-runnable.c update the require-effective-t (*) 47b68cd... [committed] Remove compromised sh test (*) 03a3dff... [committed][RISC-V] Fix expected output for thead store pai (*) f80db54... tree-optimization/115652 - adjust insertion gsi for SLP (*) 7a9b535... Record edge true/false value for gcov (*) 0bf0021... Use the term MC/DC in help for gcov --conditions (*) 229bf66... Add section on MC/DC in gcov manual (*) 19f630e... Use auto_vec for memory release on return (*) ad20ad7... arm: make arm_predict_doloop_p reject loops with calls (*) 7fada36... [aarch64] Add support for -mcpu=grace (*) f4e847b... i386: Remove declaration of unused functions (*) 812c70b... rs6000: Fix wrong RTL patterns for vector merge high/low sh (*) 62520e4... rs6000: Fix wrong RTL patterns for vector merge high/low ch (*) 453b1d2... tree-optimization/115646 - ICE with pow shrink-wrapping fro (*) (*) This commit already exists in another branch. Because the reference `refs/users/aoliva/heads/testbase' matches your hooks.email-new-commits-only configuration, no separate email is sent for this commit.
[gcc/aoliva/heads/testme] (55 commits) Avoid dropping bits from num/den in fixed-point types
The branch 'aoliva/heads/testme' was updated to point to: dec24e79d4e... Avoid dropping bits from num/den in fixed-point types It previously pointed to: 6668cf365ef... [i386] drop static decls moved to mingw/winnt-dll.cc Diff: !!! WARNING: THE FOLLOWING COMMITS ARE NO LONGER ACCESSIBLE (LOST): --- 6668cf3... [i386] drop static decls moved to mingw/winnt-dll.cc c658106... [libstdc++] [testsuite] defer to check_vect_support* [PR115 Summary of changes (added commits): --- dec24e7... Avoid dropping bits from num/den in fixed-point types 9235979... Map unpacked type to packed deduped type for debug info c0c2a61... make_type_from_size: fix compare for type reuse d16dbd4... Follow only proper TYPE_DEBUG_TYPE 73ad57c... libstdc++: Fix std::codecvt for e (*) 95faa1b... [libstdc++] [testsuite] defer to check_vect_support* [PR115 (*) bcdbb85... Avoid global bitmap space in ranger. (*) dafa750... libstdc++: Fix std::format for chrono::duration with unsign (*) 8fd84bc... libstdc++: Add debug assertions to std::vector [PR103 (*) cfc9fa3... libstdc++: Enable more debug assertions during constant eva (*) 5c8b7fc... ada: Remove last uses of System.Address_Operations in runti (*) fdbc04d... ada: Reject ambiguous function calls in interpolated string (*) d4c9907... ada: Add missing dimension information for target names (*) 432b8a3... ada: Fix array-manipulating code in Mdll (*) 089bb07... ada: Bug using user defined string literals with interpolat (*) 60ca710... ada: Overridden operation field not correctly set for contr (*) 3cb7e22... ada: Implement first half of Generalized Finalization (*) 9449524... i386: Refactor vcvttps2qq/vcvtqq2ps patterns. (*) 4385dc9... vect: support direct conversion under x86-64-v3. (*) e5f8a39... vect: Support v4hi -> v4qi. (*) c320a7e... vect: generate suitable convert insn for int -> int, float (*) b55798c... RISC-V: Add testcases for vector truncate after .SAT_SUB (*) 2280e88... LoongArch: NFC: Dedup and sort the comment in loongarch_pri (*) 94aade0... LoongArch: Tweak IOR rtx_cost for bstrins (*) b8153b5... Fix wrong cost of MEM when addr is a lea. (*) 212441e... Internal-fn: Support new IFN SAT_TRUNC for unsigned scalar (*) f2476a2... Vect: Support truncate after .SAT_SUB pattern in zip (*) c7cb0dd... tree-optimization/115652 - amend last fix (*) b7ba067... tree-optimization/115493 - complete previous fix (*) 9c56dc7... Daily bump. (*) 0731985... libstdc++: Add script to update docs for a new release bran (*) 6eff233... libstdc++: Remove duplicate test (*) e65b662... libstdc++: Increase timeouts for PSTL tests in debug mode [ (*) 003ce8a... libstdc++: Work around some PSTL test failures for debug mo (*) 0ca8d56... libstdc++: Fix std::chrono::tzdb to work with vanguard form (*) 629257b... tree-optimization/115629 - missed tail merging (*) 86a3dbe... RISC-V: Update testcase comments to point to PSABI rather t (*) aa89e86... RISC-V: Consolidate amo testcase variants (*) 08498f8... RISC-V: Rename amo testcases (*) e499aee... rs6000, change altivec*-runnable.c test file names (*) 0699de2... rs6000, altivec-2-runnable.c update the require-effective-t (*) 4bf719b... rs6000, altivec-1-runnable.c update the require-effective-t (*) 47b68cd... [committed] Remove compromised sh test (*) 03a3dff... [committed][RISC-V] Fix expected output for thead store pai (*) f80db54... tree-optimization/115652 - adjust insertion gsi for SLP (*) 7a9b535... Record edge true/false value for gcov (*) 0bf0021... Use the term MC/DC in help for gcov --conditions (*) 229bf66... Add section on MC/DC in gcov manual (*) 19f630e... Use auto_vec for memory release on return (*) ad20ad7... arm: make arm_predict_doloop_p reject loops with calls (*) 7fada36... [aarch64] Add support for -mcpu=grace (*) f4e847b... i386: Remove declaration of unused functions (*) 812c70b... rs6000: Fix wrong RTL patterns for vector merge high/low sh (*) 62520e4... rs6000: Fix wrong RTL patterns for vector merge high/low ch (*) 453b1d2... tree-optimization/115646 - ICE with pow shrink-wrapping fro (*) (*) This commit already exists in another branch. Because the reference `refs/users/aoliva/heads/testme' matches your hooks.email-new-commits-only configuration, no separate email is sent for this commit.
[gcc(refs/users/aoliva/heads/testme)] Follow only proper TYPE_DEBUG_TYPE
https://gcc.gnu.org/g:d16dbd45239df22a2adfffcc248c7958224d2e04 commit d16dbd45239df22a2adfffcc248c7958224d2e04 Author: Alexandre Oliva Date: Thu Jun 27 09:10:29 2024 -0300 Follow only proper TYPE_DEBUG_TYPE TYPE_DEBUG_TYPE's storage is shared with other sorts of references to types, so it shouldn't be accessed unless TYPE_CAN_HAVE_DEBUG_TYPE_P holds. for gcc/ada/ChangeLog * gcc-interface/misc.cc (gnat_get_array_descr_info): Only follow TYPE_DEBUG_TYPE if TYPE_CAN_HAVE_DEBUG_TYPE_P. Diff: --- gcc/ada/gcc-interface/misc.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/gcc/ada/gcc-interface/misc.cc b/gcc/ada/gcc-interface/misc.cc index 4f6f6774fe7..f77629ce70b 100644 --- a/gcc/ada/gcc-interface/misc.cc +++ b/gcc/ada/gcc-interface/misc.cc @@ -967,7 +967,8 @@ gnat_get_array_descr_info (const_tree const_type, while (true) { - if (TYPE_DEBUG_TYPE (source_element_type)) + if (TYPE_CAN_HAVE_DEBUG_TYPE_P (source_element_type) + && TYPE_DEBUG_TYPE (source_element_type)) source_element_type = TYPE_DEBUG_TYPE (source_element_type); else if (TYPE_IS_PADDING_P (source_element_type)) source_element_type
[gcc(refs/users/aoliva/heads/testme)] make_type_from_size: fix compare for type reuse
https://gcc.gnu.org/g:c0c2a6152c78b54f3cc6737b667bf1aa53929713 commit c0c2a6152c78b54f3cc6737b667bf1aa53929713 Author: Alexandre Oliva Date: Thu Jun 27 09:11:01 2024 -0300 make_type_from_size: fix compare for type reuse When make_type_from_size is called with a biased type, for an entity that isn't explicitly biased, we may refrain from reusing the given type because it doesn't seem to match, and then proceed to create an exact copy of that type. Compute earlier the biased status of the expected type, early enough for the suitability check of the given type. Modify for_biased instead of biased_p, so that biased_p remains with the given type's status for the comparison. for gcc/ada/ChangeLog * gcc-interface/utils.cc (make_type_from_size): Fix type reuse by combining biased_p and for_biased earlier. Hold the combination in for_biased, adjusting later uses. Diff: --- gcc/ada/gcc-interface/utils.cc | 11 +++ 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/gcc/ada/gcc-interface/utils.cc b/gcc/ada/gcc-interface/utils.cc index 0eb9af8d4a2..d8d42f57b89 100644 --- a/gcc/ada/gcc-interface/utils.cc +++ b/gcc/ada/gcc-interface/utils.cc @@ -1383,6 +1383,11 @@ make_type_from_size (tree type, tree size_tree, bool for_biased) biased_p = (TREE_CODE (type) == INTEGER_TYPE && TYPE_BIASED_REPRESENTATION_P (type)); + /* FOR_BIASED initially refers to the entity's representation, +not to its type's. The type we're to return must take both +into account. */ + for_biased |= biased_p; + /* Integer types with precision 0 are forbidden. */ if (size == 0) size = 1; @@ -1394,12 +1399,10 @@ make_type_from_size (tree type, tree size_tree, bool for_biased) || size > (Enable_128bit_Types ? 128 : LONG_LONG_TYPE_SIZE)) break; - biased_p |= for_biased; - /* The type should be an unsigned type if the original type is unsigned or if the lower bound is constant and non-negative or if the type is biased, see E_Signed_Integer_Subtype case of gnat_to_gnu_entity. */ - if (type_unsigned_for_rm (type) || biased_p) + if (type_unsigned_for_rm (type) || for_biased) new_type = make_unsigned_type (size); else new_type = make_signed_type (size); @@ -1409,7 +1412,7 @@ make_type_from_size (tree type, tree size_tree, bool for_biased) /* Copy the name to show that it's essentially the same type and not a subrange type. */ TYPE_NAME (new_type) = TYPE_NAME (type); - TYPE_BIASED_REPRESENTATION_P (new_type) = biased_p; + TYPE_BIASED_REPRESENTATION_P (new_type) = for_biased; SET_TYPE_RM_SIZE (new_type, bitsize_int (size)); return new_type;
[gcc(refs/users/aoliva/heads/testme)] Map unpacked type to packed deduped type for debug info
https://gcc.gnu.org/g:92359793fc3d8c94a6704e518555807227b21bea commit 92359793fc3d8c94a6704e518555807227b21bea Author: Alexandre Oliva Date: Thu Jun 27 09:11:27 2024 -0300 Map unpacked type to packed deduped type for debug info Avoid creating unnecessary copies of types in make_type_from_size. Cache the packed version of a biased type in TYPE_DEBUG_TYPE, so as to map the unpacked type to it. for gcc/ada/ChangeLog * gcc-interface/utils.cc (make_type_from_size): Cache packed variant, and map unpacked type to it in debug info. for gcc/testsuite/ChangeLog * gnat.dg/bias1.adb: Count occurrences of -7.*DW_AT_GNU_bias. Diff: --- gcc/ada/gcc-interface/utils.cc | 19 +++ gcc/testsuite/gnat.dg/bias1.adb | 3 ++- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/gcc/ada/gcc-interface/utils.cc b/gcc/ada/gcc-interface/utils.cc index d8d42f57b89..daf8d7ccdc5 100644 --- a/gcc/ada/gcc-interface/utils.cc +++ b/gcc/ada/gcc-interface/utils.cc @@ -1399,6 +1399,15 @@ make_type_from_size (tree type, tree size_tree, bool for_biased) || size > (Enable_128bit_Types ? 128 : LONG_LONG_TYPE_SIZE)) break; + /* If we've already created this type, the base type is supposed +to map to it. Check that it is what we expect. */ + if (TYPE_CAN_HAVE_DEBUG_TYPE_P (type) + && (new_type = TYPE_DEBUG_TYPE (type)) + && TYPE_PRECISION (new_type) == size + && ((TREE_CODE (new_type) == INTEGER_TYPE + && TYPE_BIASED_REPRESENTATION_P (new_type)) == for_biased)) + return new_type; + /* The type should be an unsigned type if the original type is unsigned or if the lower bound is constant and non-negative or if the type is biased, see E_Signed_Integer_Subtype case of gnat_to_gnu_entity. */ @@ -1414,6 +1423,16 @@ make_type_from_size (tree type, tree size_tree, bool for_biased) TYPE_NAME (new_type) = TYPE_NAME (type); TYPE_BIASED_REPRESENTATION_P (new_type) = for_biased; SET_TYPE_RM_SIZE (new_type, bitsize_int (size)); + + /* Enable us to avoid creating the same narrower type multiple +times, and avoid duplication in debug information, by mapping +the wider type to the narrower version. If biasing is +different, we use the narrower type for debug information. */ + if (TYPE_CAN_HAVE_DEBUG_TYPE_P (type) + && !TYPE_DEBUG_TYPE (type) + && biased_p == for_biased) + SET_TYPE_DEBUG_TYPE (type, new_type); + return new_type; case RECORD_TYPE: diff --git a/gcc/testsuite/gnat.dg/bias1.adb b/gcc/testsuite/gnat.dg/bias1.adb index 016a159b692..d9a00a1aa45 100644 --- a/gcc/testsuite/gnat.dg/bias1.adb +++ b/gcc/testsuite/gnat.dg/bias1.adb @@ -1,6 +1,7 @@ -- { dg-do compile } -- { dg-options "-cargs -g -dA -gnatws -fgnat-encodings=gdb -margs" } -- { dg-final { scan-assembler "DW_AT_GNU_bias" } } +-- { dg-final { scan-assembler-times "-7.*DW_AT_GNU_bias" 1 } } procedure Bias1 is type Small is range -7 .. -4; @@ -31,4 +32,4 @@ procedure Bias1 is begin null; -end Bias1; \ No newline at end of file +end Bias1;
[gcc(refs/users/aoliva/heads/testme)] Avoid dropping bits from num/den in fixed-point types
https://gcc.gnu.org/g:dec24e79d4efe7f5b34c62b68ead75b9910e1a3b commit dec24e79d4efe7f5b34c62b68ead75b9910e1a3b Author: Alexandre Oliva Date: Thu Jun 27 09:11:54 2024 -0300 Avoid dropping bits from num/den in fixed-point types We used to use an unsigned 128-bit type to hold the numerator and denominator used to represent the delta of a fixed-point type in debug information, but there are cases in which that was not enough, and more significant bits silently overflowed and got omitted from debug information. Introduce a mode in which UI_to_gnu selects a wide-enough unsigned type, and use that to convert numerator and denominator. for gcc/ada/ChangeLog * gcc-interface/cuintp.cc (UI_To_gnu): Add mode that selects a wide enough unsigned type. * gcc-interface/decl.cc (gnat_to_gnu_entity): Use it for numerator and denominator of fixed-point types. Diff: --- gcc/ada/gcc-interface/cuintp.cc | 47 ++--- gcc/ada/gcc-interface/decl.cc | 15 + 2 files changed, 41 insertions(+), 21 deletions(-) diff --git a/gcc/ada/gcc-interface/cuintp.cc b/gcc/ada/gcc-interface/cuintp.cc index cdf6c019750..ad345096282 100644 --- a/gcc/ada/gcc-interface/cuintp.cc +++ b/gcc/ada/gcc-interface/cuintp.cc @@ -35,6 +35,7 @@ #include "tree.h" #include "inchash.h" #include "fold-const.h" +#include "stor-layout.h" #include "ada.h" #include "types.h" @@ -67,7 +68,8 @@ build_cst_from_int (tree type, HOST_WIDE_INT low) /* Similar to UI_To_Int, but return a GCC INTEGER_CST or REAL_CST node, depending on whether TYPE is an integral or real type. Overflow is tested by the constant-folding used to build the node. TYPE is the GCC type of - the resulting node. */ + the resulting node. If TYPE is NULL, an unsigned integer type wide enough + to hold the entire constant is selected. */ tree UI_To_gnu (Uint Input, tree type) @@ -77,8 +79,10 @@ UI_To_gnu (Uint Input, tree type) any such possible value for intermediate computations and then rely on a conversion back to TYPE to perform the bias adjustment when need be. */ tree comp_type -= TREE_CODE (type) == INTEGER_TYPE && TYPE_BIASED_REPRESENTATION_P (type) - ? get_base_type (type) : type; += (!type ? gnat_type_for_size (32, 0) + : (TREE_CODE (type) == INTEGER_TYPE + && TYPE_BIASED_REPRESENTATION_P (type)) + ? get_base_type (type) : type); tree gnu_ret; if (Input <= Uint_Direct_Last) @@ -88,6 +92,7 @@ UI_To_gnu (Uint Input, tree type) Int Idx = (*Uints_Ptr)[Input - Uint_Table_Start].Loc; Pos Length = (*Uints_Ptr)[Input - Uint_Table_Start].Length; Int First = (*Udigits_Ptr)[Idx]; + tree_code code = First < 0 ? MINUS_EXPR : PLUS_EXPR; tree gnu_base; gcc_assert (Length > 0); @@ -99,26 +104,34 @@ UI_To_gnu (Uint Input, tree type) convert the final result back to the incoming type later on. */ if (!SCALAR_FLOAT_TYPE_P (comp_type) && TYPE_PRECISION (comp_type) < 32) comp_type = gnat_type_for_size (32, 0); + else if (!type && TYPE_UNSIGNED (comp_type)) + /* Choose a signed type, so that we can detect overflow. */ + comp_type = make_signed_type (TYPE_PRECISION (comp_type)); gnu_base = build_cst_from_int (comp_type, Base); gnu_ret = build_cst_from_int (comp_type, First); - if (First < 0) - for (Idx++, Length--; Length; Idx++, Length--) - gnu_ret = fold_build2 (MINUS_EXPR, comp_type, -fold_build2 (MULT_EXPR, comp_type, - gnu_ret, gnu_base), -build_cst_from_int (comp_type, -(*Udigits_Ptr)[Idx])); - else - for (Idx++, Length--; Length; Idx++, Length--) - gnu_ret = fold_build2 (PLUS_EXPR, comp_type, -fold_build2 (MULT_EXPR, comp_type, - gnu_ret, gnu_base), -build_cst_from_int (comp_type, -(*Udigits_Ptr)[Idx])); + for (Idx++, Length--; Length; Idx++, Length--) + for (;;) + { + tree next_ret = fold_build2 (code, comp_type, +fold_build2 (MULT_EXPR, comp_type, + gnu_ret, gnu_base), +build_cst_from_int +(comp_type, (*Udigits_Ptr)[Idx])); + if (!TREE_OVERFLOW (next_ret) || type) + { + gnu_ret = next_ret; + break; + } + comp_type = make_signed_type (TYPE_PRECISION (comp_type) * 2); + gnu_base = convert (comp_type, gnu_base); + gnu_ret =
[gcc r11-11545] libstdc++: fix typo in acinclude.m4.
https://gcc.gnu.org/g:6e33ffd543257a1a599b51201e9db95b070dbf84 commit r11-11545-g6e33ffd543257a1a599b51201e9db95b070dbf84 Author: Martin Liska Date: Thu Jan 27 14:47:23 2022 +0100 libstdc++: fix typo in acinclude.m4. PR libstdc++/104259 libstdc++-v3/ChangeLog: * acinclude.m4: Fix typo. * configure: Regenerate. (cherry picked from commit 14f339894db6ca7fe4772d5528c726694d2517c4) Diff: --- libstdc++-v3/acinclude.m4 | 2 +- libstdc++-v3/configure| 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/libstdc++-v3/acinclude.m4 b/libstdc++-v3/acinclude.m4 index 696756a6df6..e2a12607d5d 100644 --- a/libstdc++-v3/acinclude.m4 +++ b/libstdc++-v3/acinclude.m4 @@ -4834,7 +4834,7 @@ dnl [glibcxx_cv_fdopendir=yes], [glibcxx_cv_fdopendir=no]) ]) - if test $glibcxx_cv_truncate = yes; then + if test $glibcxx_cv_fdopendir = yes; then AC_DEFINE(HAVE_FDOPENDIR, 1, [Define if fdopendir is available in .]) fi dnl diff --git a/libstdc++-v3/configure b/libstdc++-v3/configure index 316c19a2c95..64f2552afe5 100755 --- a/libstdc++-v3/configure +++ b/libstdc++-v3/configure @@ -76851,7 +76851,7 @@ fi fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $glibcxx_cv_fdopendir" >&5 $as_echo "$glibcxx_cv_fdopendir" >&6; } - if test $glibcxx_cv_truncate = yes; then + if test $glibcxx_cv_fdopendir = yes; then $as_echo "#define HAVE_FDOPENDIR 1" >>confdefs.h
[gcc r15-1695] s390: Check for ADDR_REGS in s390_decompose_addrstyle_without_index
https://gcc.gnu.org/g:187eeb99ec5289538923668de9d61a3138376817 commit r15-1695-g187eeb99ec5289538923668de9d61a3138376817 Author: Stefan Schulze Frielinghaus Date: Thu Jun 27 15:46:24 2024 +0200 s390: Check for ADDR_REGS in s390_decompose_addrstyle_without_index An explicit check for address registers was not required so far since during register allocation the processing of address constraints was sufficient. However, address constraints themself do not check for REGNO_OK_FOR_{BASE,INDEX}_P. Thus, with the newly introduced late-combine pass in r15-1579-g792f97b44ffc5e we generate new insns with invalid address registers which aren't fixed up afterwards. Fixed by explicitly checking for address registers in s390_decompose_addrstyle_without_index such that those new insns are rejected. gcc/ChangeLog: PR target/115634 * config/s390/s390.cc (s390_decompose_addrstyle_without_index): Check for ADDR_REGS in s390_decompose_addrstyle_without_index. Diff: --- gcc/config/s390/s390.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc index c65421de831..05a0fde7fb0 100644 --- a/gcc/config/s390/s390.cc +++ b/gcc/config/s390/s390.cc @@ -3347,7 +3347,9 @@ s390_decompose_addrstyle_without_index (rtx op, rtx *base, while (op && GET_CODE (op) == SUBREG) op = SUBREG_REG (op); - if (op && GET_CODE (op) != REG) + if (op && (!REG_P (op) +|| (reload_completed +&& !REGNO_OK_FOR_BASE_P (REGNO (op) return false; if (offset)
[gcc r15-1696] Disable late-combine for -O0 [PR115677]
https://gcc.gnu.org/g:f6081ee665fd5e4e7d37e02c69d16df0d3eead10 commit r15-1696-gf6081ee665fd5e4e7d37e02c69d16df0d3eead10 Author: Richard Sandiford Date: Thu Jun 27 14:51:37 2024 +0100 Disable late-combine for -O0 [PR115677] late-combine relies on df, which for -O0 is only initialised late (pass_df_initialize_no_opt, after split1). Other df-based passes cope with this by requiring optimize > 0, so this patch does the same for late-combine. gcc/ PR rtl-optimization/115677 * late-combine.cc (pass_late_combine::gate): New function. Diff: --- gcc/late-combine.cc | 8 +++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/gcc/late-combine.cc b/gcc/late-combine.cc index b7c0bc07a8b..789d734692a 100644 --- a/gcc/late-combine.cc +++ b/gcc/late-combine.cc @@ -744,10 +744,16 @@ public: // opt_pass methods: opt_pass *clone () override { return new pass_late_combine (m_ctxt); } - bool gate (function *) override { return flag_late_combine_instructions; } + bool gate (function *) override; unsigned int execute (function *) override; }; +bool +pass_late_combine::gate (function *) +{ + return optimize > 0 && flag_late_combine_instructions; +} + unsigned int pass_late_combine::execute (function *fn) {
[gcc r15-1697] fixincludes: adjust stdio fix for macOS 15 headers
https://gcc.gnu.org/g:1dc143181550573c9c902fb7a3b495e9b409d0b0 commit r15-1697-g1dc143181550573c9c902fb7a3b495e9b409d0b0 Author: Francois-Xavier Coudert Date: Thu Jun 27 18:55:22 2024 +0200 fixincludes: adjust stdio fix for macOS 15 headers fixincludes/ChangeLog: * fixincl.x: Regenerate. * inclhack.def (apple_local_stdio_fn_deprecation): Also apply to _stdio.h. Diff: --- fixincludes/fixincl.x| 6 +++--- fixincludes/inclhack.def | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/fixincludes/fixincl.x b/fixincludes/fixincl.x index 54a530b50ca..fb9950d9b21 100644 --- a/fixincludes/fixincl.x +++ b/fixincludes/fixincl.x @@ -2,11 +2,11 @@ * * DO NOT EDIT THIS FILE (fixincl.x) * - * It has been AutoGen-ed June 7, 2024 at 11:03:58 AM by AutoGen 5.18.16 + * It has been AutoGen-ed June 27, 2024 at 06:52:39 PM by AutoGen 5.18.16 * From the definitionsinclhack.def * and the template file fixincl */ -/* DO NOT SVN-MERGE THIS FILE, EITHER Fri Jun 7 11:03:58 CEST 2024 +/* DO NOT SVN-MERGE THIS FILE, EITHER Thu Jun 27 18:52:39 CEST 2024 * * You must regenerate it. Use the ./genfixes script. * @@ -2619,7 +2619,7 @@ tSCC zApple_Local_Stdio_Fn_DeprecationName[] = * File name selection pattern */ tSCC zApple_Local_Stdio_Fn_DeprecationList[] = - "stdio.h\0"; + "stdio.h\0_stdio.h\0"; /* * Machine/OS name selection pattern */ diff --git a/fixincludes/inclhack.def b/fixincludes/inclhack.def index f7fc5cdbabd..9f4a41199a1 100644 --- a/fixincludes/inclhack.def +++ b/fixincludes/inclhack.def @@ -1273,6 +1273,7 @@ fix = { hackname = apple_local_stdio_fn_deprecation; mach = "*-*-*darwin2*"; files = stdio.h; +files = _stdio.h; select= "__deprecated_msg([^\n]*)$"; c_fix = format; c_fix_arg = "#if defined(__APPLE_LOCAL_DEPRECATIONS)\n"
[gcc r13-8874] AArch64: Fix strict-align cpymem/setmem [PR103100]
https://gcc.gnu.org/g:5aa9ed0f353f835005c3df8932c7bc6e26f53904 commit r13-8874-g5aa9ed0f353f835005c3df8932c7bc6e26f53904 Author: Wilco Dijkstra Date: Wed Oct 25 16:28:04 2023 +0100 AArch64: Fix strict-align cpymem/setmem [PR103100] The cpymemdi/setmemdi implementation doesn't fully support strict alignment. Block the expansion if the alignment is less than 16 with STRICT_ALIGNMENT. Clean up the condition when to use MOPS. gcc/ChangeLog/ PR target/103100 * config/aarch64/aarch64.md (cpymemdi): Remove pattern condition. (setmemdi): Likewise. * config/aarch64/aarch64.cc (aarch64_expand_cpymem): Support strict-align. Cleanup condition for using MOPS. (aarch64_expand_setmem): Likewise. (cherry picked from commit 318f5232cfb3e0c9694889565e1f5424d0354463) Diff: --- gcc/config/aarch64/aarch64.cc | 52 ++- gcc/config/aarch64/aarch64.md | 4 ++-- 2 files changed, 24 insertions(+), 32 deletions(-) diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index b8a4ab1b980..2f01580a797 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -24897,27 +24897,23 @@ aarch64_expand_cpymem (rtx *operands) int mode_bits; rtx dst = operands[0]; rtx src = operands[1]; + unsigned align = UINTVAL (operands[3]); rtx base; machine_mode cur_mode = BLKmode; + bool size_p = optimize_function_for_size_p (cfun); - /* Variable-sized memcpy can go through the MOPS expansion if available. */ - if (!CONST_INT_P (operands[2])) + /* Variable-sized or strict-align copies may use the MOPS expansion. */ + if (!CONST_INT_P (operands[2]) || (STRICT_ALIGNMENT && align < 16)) return aarch64_expand_cpymem_mops (operands); - unsigned HOST_WIDE_INT size = INTVAL (operands[2]); - - /* Try to inline up to 256 bytes or use the MOPS threshold if available. */ - unsigned HOST_WIDE_INT max_copy_size -= TARGET_MOPS ? aarch64_mops_memcpy_size_threshold : 256; + unsigned HOST_WIDE_INT size = UINTVAL (operands[2]); - bool size_p = optimize_function_for_size_p (cfun); + /* Try to inline up to 256 bytes. */ + unsigned max_copy_size = 256; + unsigned mops_threshold = aarch64_mops_memcpy_size_threshold; - /* Large constant-sized cpymem should go through MOPS when possible. - It should be a win even for size optimization in the general case. - For speed optimization the choice between MOPS and the SIMD sequence - depends on the size of the copy, rather than number of instructions, - alignment etc. */ - if (size > max_copy_size) + /* Large copies use MOPS when available or a library call. */ + if (size > max_copy_size || (TARGET_MOPS && size > mops_threshold)) return aarch64_expand_cpymem_mops (operands); int copy_bits = 256; @@ -25081,12 +25077,13 @@ aarch64_expand_setmem (rtx *operands) unsigned HOST_WIDE_INT len; rtx dst = operands[0]; rtx val = operands[2], src; + unsigned align = UINTVAL (operands[3]); rtx base; machine_mode cur_mode = BLKmode, next_mode; - /* If we don't have SIMD registers or the size is variable use the MOPS - inlined sequence if possible. */ - if (!CONST_INT_P (operands[1]) || !TARGET_SIMD) + /* Variable-sized or strict-align memset may use the MOPS expansion. */ + if (!CONST_INT_P (operands[1]) || !TARGET_SIMD + || (STRICT_ALIGNMENT && align < 16)) return aarch64_expand_setmem_mops (operands); bool size_p = optimize_function_for_size_p (cfun); @@ -25094,10 +25091,13 @@ aarch64_expand_setmem (rtx *operands) /* Default the maximum to 256-bytes when considering only libcall vs SIMD broadcast sequence. */ unsigned max_set_size = 256; + unsigned mops_threshold = aarch64_mops_memset_size_threshold; - len = INTVAL (operands[1]); - if (len > max_set_size && !TARGET_MOPS) -return false; + len = UINTVAL (operands[1]); + + /* Large memset uses MOPS when available or a library call. */ + if (len > max_set_size || (TARGET_MOPS && len > mops_threshold)) +return aarch64_expand_setmem_mops (operands); int cst_val = !!(CONST_INT_P (val) && (INTVAL (val) != 0)); /* The MOPS sequence takes: @@ -25110,12 +25110,6 @@ aarch64_expand_setmem (rtx *operands) the arguments + 1 for the call. */ unsigned libcall_cost = 4; - /* Upper bound check. For large constant-sized setmem use the MOPS sequence - when available. */ - if (TARGET_MOPS - && len >= (unsigned HOST_WIDE_INT) aarch64_mops_memset_size_threshold) -return aarch64_expand_setmem_mops (operands); - /* Attempt a sequence with a vector broadcast followed by stores. Count the number of operations involved to see if it's worth it against the alternatives. A simple counter simd_ops on the @@ -25157,10 +25151,8 @@ aarch64_expand_setmem (rtx *operands) simd_ops++; n -= mode_bits; - /* Do cer
[gcc r15-1698] c: Error message for incorrect use of static in array declarations.
https://gcc.gnu.org/g:da7976a015a4388b8ed843412c3c1c840451cf0f commit r15-1698-gda7976a015a4388b8ed843412c3c1c840451cf0f Author: Martin Uecker Date: Thu Jun 27 21:47:56 2024 +0200 c: Error message for incorrect use of static in array declarations. Add an explicit error messages when c99's static is used without a size expression in an array declarator. gcc/c: * c-parser.cc (c_parser_direct_declarator_inner): Add error message. gcc/testsuite: * gcc.dg/c99-arraydecl-4.c: New test. Diff: --- gcc/c/c-parser.cc | 63 -- gcc/testsuite/gcc.dg/c99-arraydecl-4.c | 14 2 files changed, 44 insertions(+), 33 deletions(-) diff --git a/gcc/c/c-parser.cc b/gcc/c/c-parser.cc index 6a3f96d5b61..8c4e697a4e1 100644 --- a/gcc/c/c-parser.cc +++ b/gcc/c/c-parser.cc @@ -4715,8 +4715,6 @@ c_parser_direct_declarator_inner (c_parser *parser, bool id_present, location_t brace_loc = c_parser_peek_token (parser)->location; struct c_declarator *declarator; struct c_declspecs *quals_attrs = build_null_declspecs (); - bool static_seen; - bool star_seen; struct c_expr dimen; dimen.value = NULL_TREE; dimen.original_code = ERROR_MARK; @@ -4724,49 +4722,48 @@ c_parser_direct_declarator_inner (c_parser *parser, bool id_present, c_parser_consume_token (parser); c_parser_declspecs (parser, quals_attrs, false, false, true, false, false, false, false, cla_prefer_id); - static_seen = c_parser_next_token_is_keyword (parser, RID_STATIC); - if (static_seen) - c_parser_consume_token (parser); - if (static_seen && !quals_attrs->declspecs_seen_p) - c_parser_declspecs (parser, quals_attrs, false, false, true, - false, false, false, false, cla_prefer_id); + + location_t static_loc = UNKNOWN_LOCATION; + if (c_parser_next_token_is_keyword (parser, RID_STATIC)) + { + static_loc = c_parser_peek_token (parser)->location; + c_parser_consume_token (parser); + if (!quals_attrs->declspecs_seen_p) + c_parser_declspecs (parser, quals_attrs, false, false, true, + false, false, false, false, cla_prefer_id); + } if (!quals_attrs->declspecs_seen_p) quals_attrs = NULL; /* If "static" is present, there must be an array dimension. Otherwise, there may be a dimension, "*", or no dimension. */ - if (static_seen) + const bool static_seen = (static_loc != UNKNOWN_LOCATION); + bool star_seen = false; + if (c_parser_next_token_is (parser, CPP_MULT) + && c_parser_peek_2nd_token (parser)->type == CPP_CLOSE_SQUARE) { - star_seen = false; - dimen = c_parser_expr_no_commas (parser, NULL); + star_seen = true; + c_parser_consume_token (parser); } - else + else if (!c_parser_next_token_is (parser, CPP_CLOSE_SQUARE)) + dimen = c_parser_expr_no_commas (parser, NULL); + + if (static_seen) { - if (c_parser_next_token_is (parser, CPP_CLOSE_SQUARE)) - { - dimen.value = NULL_TREE; - star_seen = false; - } - else if (c_parser_next_token_is (parser, CPP_MULT)) - { - if (c_parser_peek_2nd_token (parser)->type == CPP_CLOSE_SQUARE) - { - dimen.value = NULL_TREE; - star_seen = true; - c_parser_consume_token (parser); - } - else - { - star_seen = false; - dimen = c_parser_expr_no_commas (parser, NULL); - } - } - else + if (star_seen) { + error_at (static_loc, + "% may not be used with an unspecified " + "variable length array size"); + /* Prevent further errors. */ star_seen = false; - dimen = c_parser_expr_no_commas (parser, NULL); + dimen.value = error_mark_node; } + else if (!dimen.value) + error_at (static_loc, + "% may not be used without an array size"); } + if (c_parser_next_token_is (parser, CPP_CLOSE_SQUARE)) c_parser_consume_token (parser); else diff --git a/gcc/testsuite/gcc.dg/c99-arraydecl-4.c b/gcc/testsuite/gcc.dg/c99-arraydecl-4.c new file mode 100644 index 000..f8cad3b9429 --- /dev/null +++ b/gcc/testsuite/gcc.dg/c99-arraydecl-4.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-std=c99 -pedantic-errors" } */ + +void fo(char buf[static]); /* { dg-error "'static' may not be used without an array size" } */ +void fo(char buf[static]) { } /* { dg-error "'static' may not be used without an array size" }
[gcc r15-1699] libgccjit: Add ability to get the alignment of a type
https://gcc.gnu.org/g:445c62ee492b363e7ad86260c7a91a7fdf984f50 commit r15-1699-g445c62ee492b363e7ad86260c7a91a7fdf984f50 Author: Antoni Boucher Date: Thu Apr 4 18:57:07 2024 -0400 libgccjit: Add ability to get the alignment of a type gcc/jit/ChangeLog: * docs/topics/compatibility.rst (LIBGCCJIT_ABI_28): New ABI tag. * docs/topics/expressions.rst: Document gcc_jit_context_new_alignof. * jit-playback.cc (new_alignof): New method. * jit-playback.h: New method. * jit-recording.cc (recording::context::new_alignof): New method. (recording::memento_of_sizeof::replay_into, recording::memento_of_typeinfo::replay_into, recording::memento_of_sizeof::make_debug_string, recording::memento_of_typeinfo::make_debug_string, recording::memento_of_sizeof::write_reproducer, recording::memento_of_typeinfo::write_reproducer): Rename. * jit-recording.h (enum type_info_type): New enum. (class memento_of_sizeof class memento_of_typeinfo): Rename. * libgccjit.cc (gcc_jit_context_new_alignof): New function. * libgccjit.h (gcc_jit_context_new_alignof): New function. * libgccjit.map: New function. gcc/testsuite/ChangeLog: * jit.dg/all-non-failing-tests.h: New test. * jit.dg/test-alignof.c: New test. Diff: --- gcc/jit/docs/topics/compatibility.rst| 7 +++ gcc/jit/docs/topics/expressions.rst | 14 ++ gcc/jit/jit-playback.cc | 11 + gcc/jit/jit-playback.h | 3 ++ gcc/jit/jit-recording.cc | 67 ++- gcc/jit/jit-recording.h | 19 ++-- gcc/jit/libgccjit.cc | 18 gcc/jit/libgccjit.h | 13 ++ gcc/jit/libgccjit.map| 5 ++ gcc/testsuite/jit.dg/all-non-failing-tests.h | 10 gcc/testsuite/jit.dg/test-alignof.c | 69 11 files changed, 221 insertions(+), 15 deletions(-) diff --git a/gcc/jit/docs/topics/compatibility.rst b/gcc/jit/docs/topics/compatibility.rst index 9cfb054f653..92c3ed24c89 100644 --- a/gcc/jit/docs/topics/compatibility.rst +++ b/gcc/jit/docs/topics/compatibility.rst @@ -397,3 +397,10 @@ on functions and variables: ``LIBGCCJIT_ABI_27`` covers the addition of :func:`gcc_jit_context_new_sizeof` + +.. _LIBGCCJIT_ABI_28: + +``LIBGCCJIT_ABI_28`` + +``LIBGCCJIT_ABI_28`` covers the addition of +:func:`gcc_jit_context_new_alignof` diff --git a/gcc/jit/docs/topics/expressions.rst b/gcc/jit/docs/topics/expressions.rst index d83d95fe9e0..5734f0e5f7e 100644 --- a/gcc/jit/docs/topics/expressions.rst +++ b/gcc/jit/docs/topics/expressions.rst @@ -140,6 +140,20 @@ Simple expressions sizeof (type) +.. function:: gcc_jit_rvalue *\ + gcc_jit_context_new_alignof (gcc_jit_context *ctxt, \ + gcc_jit_type *type) + + Generate an rvalue that is equal to the alignment of ``type``. + + The parameter ``type`` must be non-NULL. + + This is equivalent to this C code: + + .. code-block:: c + + _Alignof (type) + Constructor expressions *** diff --git a/gcc/jit/jit-playback.cc b/gcc/jit/jit-playback.cc index 6baa838af10..b3f54da24ab 100644 --- a/gcc/jit/jit-playback.cc +++ b/gcc/jit/jit-playback.cc @@ -1120,6 +1120,17 @@ new_sizeof (type *type) /* Construct a playback::rvalue instance (wrapping a tree). */ +playback::rvalue * +playback::context:: +new_alignof (type *type) +{ + int alignment = TYPE_ALIGN (type->as_tree ()) / BITS_PER_UNIT; + tree inner = build_int_cst (integer_type_node, alignment); + return new rvalue (this, inner); +} + +/* Construct a playback::rvalue instance (wrapping a tree). */ + playback::rvalue * playback::context:: new_string_literal (const char *value) diff --git a/gcc/jit/jit-playback.h b/gcc/jit/jit-playback.h index aa6a086613c..6e97b389cbb 100644 --- a/gcc/jit/jit-playback.h +++ b/gcc/jit/jit-playback.h @@ -165,6 +165,9 @@ public: rvalue * new_sizeof (type *type); + rvalue * + new_alignof (type *type); + rvalue * new_string_literal (const char *value); diff --git a/gcc/jit/jit-recording.cc b/gcc/jit/jit-recording.cc index 5e9ef40f3b7..f68d01fff55 100644 --- a/gcc/jit/jit-recording.cc +++ b/gcc/jit/jit-recording.cc @@ -1077,7 +1077,7 @@ recording::context::new_global_init_rvalue (lvalue *variable, gbl->set_rvalue_init (init); /* Needed by the global for write dump. */ } -/* Create a recording::memento_of_sizeof instance and add it +/* Create a recording::memento_of_typeinfo instance and add it to this context's list of mementos. Implements the post-error-checking part of @@ -1087,7 +1087,22 @@ recording::rvalue * reco
[gcc r14-10353] libstdc++: Replace viewcvs links in docs with cgit links
https://gcc.gnu.org/g:a8b77a696379343dfe67bb75088acd452eef0c2c commit r14-10353-ga8b77a696379343dfe67bb75088acd452eef0c2c Author: Jonathan Wakely Date: Tue Jun 25 13:35:17 2024 +0100 libstdc++: Replace viewcvs links in docs with cgit links For this backport to the release branch, the links to the git repo refer to the branch. libstdc++-v3/ChangeLog: * doc/xml/faq.xml: Replace viewcvs links with cgit links. * doc/xml/manual/allocator.xml: Likewise. * doc/xml/manual/mt_allocator.xml: Likewise. * doc/html/*: Regenerate. (cherry picked from commit 9d8021d1875677286c3dde90dfed2aca864edad0) Diff: --- libstdc++-v3/doc/html/faq.html | 2 +- libstdc++-v3/doc/html/manual/memory.html| 10 +- libstdc++-v3/doc/html/manual/mt_allocator_impl.html | 6 +++--- libstdc++-v3/doc/xml/faq.xml| 2 +- libstdc++-v3/doc/xml/manual/allocator.xml | 10 +- libstdc++-v3/doc/xml/manual/mt_allocator.xml| 6 +++--- 6 files changed, 18 insertions(+), 18 deletions(-) diff --git a/libstdc++-v3/doc/html/faq.html b/libstdc++-v3/doc/html/faq.html index e84e455c4e9..bbe716d5e23 100644 --- a/libstdc++-v3/doc/html/faq.html +++ b/libstdc++-v3/doc/html/faq.html @@ -147,7 +147,7 @@ The libstdc++ project is contributed to by several developers all over the world, in the same way as GCC or the Linux kernel. The current maintainers are listed in the - https://gcc.gnu.org/viewcvs/gcc/trunk/MAINTAINERS?view=co"; target="_top">MAINTAINERS + https://gcc.gnu.org/cgit/gcc/tree/MAINTAINERS"; target="_top">MAINTAINERS file (look for "c++ runtime libs"). Development and discussion is held on the libstdc++ mailing diff --git a/libstdc++-v3/doc/html/manual/memory.html b/libstdc++-v3/doc/html/manual/memory.html index 08ad2fd4dd8..3a2025b90d2 100644 --- a/libstdc++-v3/doc/html/manual/memory.html +++ b/libstdc++-v3/doc/html/manual/memory.html @@ -120,8 +120,8 @@ Over multiple iterations, various STL container objects have elements inserted to some maximum amount. A variety of allocators are tested. - Test source for http://gcc.gnu.org/viewcvs/gcc/trunk/libstdc%2B%2B-v3/testsuite/performance/23_containers/insert/sequence.cc?view=markup"; target="_top">sequence - and http://gcc.gnu.org/viewcvs/gcc/trunk/libstdc%2B%2B-v3/testsuite/performance/23_containers/insert/associative.cc?view=markup"; target="_top">associative + Test source for https://gcc.gnu.org/cgit/gcc/tree/libstdc++-v3/testsuite/performance/23_containers/insert/sequence.cc?h=releases%2Fgcc-14"; target="_top">sequence + and https://gcc.gnu.org/cgit/gcc/tree/libstdc++-v3/testsuite/performance/23_containers/insert/associative.cc?h=releases%2Fgcc-14"; target="_top">associative containers. Insertion and erasure in a multi-threaded environment. @@ -130,14 +130,14 @@ on a per-thread basis, as well as measuring thread contention for memory resources. Test source -http://gcc.gnu.org/viewcvs/gcc/trunk/libstdc%2B%2B-v3/testsuite/performance/23_containers/insert_erase/associative.cc?view=markup"; target="_top">here. +https://gcc.gnu.org/cgit/gcc/tree/libstdc++-v3/testsuite/performance/23_containers/insert_erase/associative.cc?h=releases%2Fgcc-14"; target="_top">here. A threaded producer/consumer model. Test source for - http://gcc.gnu.org/viewcvs/gcc/trunk/libstdc++-v3/testsuite/performance/23_containers/producer_consumer/sequence.cc?view=markup"; target="_top">sequence + https://gcc.gnu.org/cgit/gcc/tree/libstdc++-v3/testsuite/performance/23_containers/producer_consumer/sequence.cc?h=releases%2Fgcc-14"; target="_top">sequence and - http://gcc.gnu.org/viewcvs/gcc/trunk/libstdc++-v3/testsuite/performance/23_containers/producer_consumer/associative.cc?view=markup"; target="_top">associative + https://gcc.gnu.org/cgit/gcc/tree/libstdc++-v3/testsuite/performance/23_containers/producer_consumer/associative.cc?h=releases%2Fgcc-14"; target="_top">associative containers. Since GCC 12 the default choice for diff --git a/libstdc++-v3/doc/html/manual/mt_allocator_impl.html b/libstdc++-v3/doc/html/manual/mt_allocator_impl.html index 2e5926add00..351fa90c975 100644 --- a/libstdc++-v3/doc/html/manual/mt_allocator_impl.html +++ b/libstdc++-v3/doc/html/manual/mt_allocator_impl.html @@ -155,7 +155,7 @@ that uses it is fully constructed. For most (but not all) STL containers, this works, as an instance of the allocator is constructed as part of a container's constructor. However, this assumption is implementation-specific, and subject to change. For an example of a -pool that frees memory, see the following -http://gcc.gnu.org/viewcvs/gcc/trunk/libstdc++-v3/testsuite/ext/mt_allocator/deallocate_local-6.cc?view=markup"; target=
[gcc r12-10587] rs6000: Fix wrong RTL patterns for vector merge high/low word on LE
https://gcc.gnu.org/g:96ef3367067219c8e3eb88c0474a1090cc7749b4 commit r12-10587-g96ef3367067219c8e3eb88c0474a1090cc7749b4 Author: Kewen Lin Date: Thu Jun 20 20:23:56 2024 -0500 rs6000: Fix wrong RTL patterns for vector merge high/low word on LE Commit r12-4496 changes some define_expands and define_insns for vector merge high/low word, which are altivec_vmrg[hl]w, vsx_xxmrg[hl]w_. These defines are mainly for built-in function vec_merge{h,l}, __builtin_vsx_xxmrghw, __builtin_vsx_xxmrghw_4si and some internal gen function needs. These functions should consider endianness, taking vec_mergeh as example, as PVIPR defines, vec_mergeh "Merges the first halves (in element order) of two vectors", it does note it's in element order. So it's mapped into vmrghw on BE while vmrglw on LE respectively. Although the mapped insns are different, as the discussion in PR106069, the RTL pattern should be still the same, it is conformed before commit r12-4496, define_expand altivec_vmrghw got expanded into: (vec_select:VSX_W (vec_concat: (match_operand:VSX_W 1 "register_operand" "wa,v") (match_operand:VSX_W 2 "register_operand" "wa,v")) (parallel [(const_int 0) (const_int 4) (const_int 1) (const_int 5)])))] on both BE and LE then. But commit r12-4496 changed it to expand into: (vec_select:VSX_W (vec_concat: (match_operand:VSX_W 1 "register_operand" "wa,v") (match_operand:VSX_W 2 "register_operand" "wa,v")) (parallel [(const_int 0) (const_int 4) (const_int 1) (const_int 5)])))] on BE, and (vec_select:VSX_W (vec_concat: (match_operand:VSX_W 1 "register_operand" "wa,v") (match_operand:VSX_W 2 "register_operand" "wa,v")) (parallel [(const_int 2) (const_int 6) (const_int 3) (const_int 7)])))] on LE, although the mapped insn are still vmrghw on BE and vmrglw on LE, the associated RTL pattern is completely wrong and inconsistent with the mapped insn. If optimization passes leave this pattern alone, even if its pattern doesn't represent its mapped insn, it's still fine, that's why simple testing on bif doesn't expose this issue. But once some optimization pass such as combine does some changes basing on this wrong pattern, because the pattern doesn't match the semantics that the expanded insn is intended to represent, it would cause the unexpected result. So this patch is to fix the wrong RTL pattern, ensure the associated RTL patterns become the same as before which can have the same semantic as their mapped insns. With the proposed patch, the expanders like altivec_vmrghw expands into altivec_vmrghb_direct_be or altivec_vmrglb_direct_le depending on endianness, "direct" can easily show which insn would be generated, _be and _le are mainly for the different RTL patterns as endianness. Co-authored-by: Xionghu Luo PR target/106069 PR target/115355 gcc/ChangeLog: * config/rs6000/altivec.md (altivec_vmrghw_direct_): Rename to ... (altivec_vmrghw_direct__be): ... this. Add the condition BYTES_BIG_ENDIAN. (altivec_vmrghw_direct__le): New define_insn. (altivec_vmrglw_direct_): Rename to ... (altivec_vmrglw_direct__be): ... this. Add the condition BYTES_BIG_ENDIAN. (altivec_vmrglw_direct__le): New define_insn. (altivec_vmrghw): Adjust by calling gen_altivec_vmrghw_direct_v4si_be for BE and gen_altivec_vmrglw_direct_v4si_le for LE. (altivec_vmrglw): Adjust by calling gen_altivec_vmrglw_direct_v4si_be for BE and gen_altivec_vmrghw_direct_v4si_le for LE. (vec_widen_umult_hi_v8hi): Adjust the call to gen_altivec_vmrghw_direct_v4si by gen_altivec_vmrghw for BE and by gen_altivec_vmrglw for LE. (vec_widen_smult_hi_v8hi): Likewise. (vec_widen_umult_lo_v8hi): Adjust the call to gen_altivec_vmrglw_direct_v4si by gen_altivec_vmrglw for BE and by gen_altivec_vmrghw for LE (vec_widen_smult_lo_v8hi): Likewise. * config/rs6000/rs6000.cc (altivec_expand_vec_perm_const): Replace CODE_FOR_altivec_vmrghw_direct_v4si by CODE_FOR_altivec_vmrghw_direct_v4si_be for BE and CODE_FOR_altivec_vmrghw_direct_v4si_le for LE. And replace CODE_FOR_altivec_vmrglw_direct_v4si by CODE_FOR_altivec_vmrglw_direct_v4si_be for BE and CODE_FOR_altivec_vmrglw_direct_v4si_le for LE. * config/rs6000/vsx.md (vsx_xxmrghw_): Adjust by calling gen_altivec_vmrghw_d
[gcc r13-8876] rs6000: Fix wrong RTL patterns for vector merge high/low word on LE
https://gcc.gnu.org/g:361bfcec901ca882130e338aebaa2ebc6ea2dc3b commit r13-8876-g361bfcec901ca882130e338aebaa2ebc6ea2dc3b Author: Kewen Lin Date: Thu Jun 20 20:23:56 2024 -0500 rs6000: Fix wrong RTL patterns for vector merge high/low word on LE Commit r12-4496 changes some define_expands and define_insns for vector merge high/low word, which are altivec_vmrg[hl]w, vsx_xxmrg[hl]w_. These defines are mainly for built-in function vec_merge{h,l}, __builtin_vsx_xxmrghw, __builtin_vsx_xxmrghw_4si and some internal gen function needs. These functions should consider endianness, taking vec_mergeh as example, as PVIPR defines, vec_mergeh "Merges the first halves (in element order) of two vectors", it does note it's in element order. So it's mapped into vmrghw on BE while vmrglw on LE respectively. Although the mapped insns are different, as the discussion in PR106069, the RTL pattern should be still the same, it is conformed before commit r12-4496, define_expand altivec_vmrghw got expanded into: (vec_select:VSX_W (vec_concat: (match_operand:VSX_W 1 "register_operand" "wa,v") (match_operand:VSX_W 2 "register_operand" "wa,v")) (parallel [(const_int 0) (const_int 4) (const_int 1) (const_int 5)])))] on both BE and LE then. But commit r12-4496 changed it to expand into: (vec_select:VSX_W (vec_concat: (match_operand:VSX_W 1 "register_operand" "wa,v") (match_operand:VSX_W 2 "register_operand" "wa,v")) (parallel [(const_int 0) (const_int 4) (const_int 1) (const_int 5)])))] on BE, and (vec_select:VSX_W (vec_concat: (match_operand:VSX_W 1 "register_operand" "wa,v") (match_operand:VSX_W 2 "register_operand" "wa,v")) (parallel [(const_int 2) (const_int 6) (const_int 3) (const_int 7)])))] on LE, although the mapped insn are still vmrghw on BE and vmrglw on LE, the associated RTL pattern is completely wrong and inconsistent with the mapped insn. If optimization passes leave this pattern alone, even if its pattern doesn't represent its mapped insn, it's still fine, that's why simple testing on bif doesn't expose this issue. But once some optimization pass such as combine does some changes basing on this wrong pattern, because the pattern doesn't match the semantics that the expanded insn is intended to represent, it would cause the unexpected result. So this patch is to fix the wrong RTL pattern, ensure the associated RTL patterns become the same as before which can have the same semantic as their mapped insns. With the proposed patch, the expanders like altivec_vmrghw expands into altivec_vmrghb_direct_be or altivec_vmrglb_direct_le depending on endianness, "direct" can easily show which insn would be generated, _be and _le are mainly for the different RTL patterns as endianness. Co-authored-by: Xionghu Luo PR target/106069 PR target/115355 gcc/ChangeLog: * config/rs6000/altivec.md (altivec_vmrghw_direct_): Rename to ... (altivec_vmrghw_direct__be): ... this. Add the condition BYTES_BIG_ENDIAN. (altivec_vmrghw_direct__le): New define_insn. (altivec_vmrglw_direct_): Rename to ... (altivec_vmrglw_direct__be): ... this. Add the condition BYTES_BIG_ENDIAN. (altivec_vmrglw_direct__le): New define_insn. (altivec_vmrghw): Adjust by calling gen_altivec_vmrghw_direct_v4si_be for BE and gen_altivec_vmrglw_direct_v4si_le for LE. (altivec_vmrglw): Adjust by calling gen_altivec_vmrglw_direct_v4si_be for BE and gen_altivec_vmrghw_direct_v4si_le for LE. (vec_widen_umult_hi_v8hi): Adjust the call to gen_altivec_vmrghw_direct_v4si by gen_altivec_vmrghw for BE and by gen_altivec_vmrglw for LE. (vec_widen_smult_hi_v8hi): Likewise. (vec_widen_umult_lo_v8hi): Adjust the call to gen_altivec_vmrglw_direct_v4si by gen_altivec_vmrglw for BE and by gen_altivec_vmrghw for LE (vec_widen_smult_lo_v8hi): Likewise. * config/rs6000/rs6000.cc (altivec_expand_vec_perm_const): Replace CODE_FOR_altivec_vmrghw_direct_v4si by CODE_FOR_altivec_vmrghw_direct_v4si_be for BE and CODE_FOR_altivec_vmrghw_direct_v4si_le for LE. And replace CODE_FOR_altivec_vmrglw_direct_v4si by CODE_FOR_altivec_vmrglw_direct_v4si_be for BE and CODE_FOR_altivec_vmrglw_direct_v4si_le for LE. * config/rs6000/vsx.md (vsx_xxmrghw_): Adjust by calling gen_altivec_vmrghw_di
[gcc r14-10355] rs6000: Fix wrong RTL patterns for vector merge high/low word on LE
https://gcc.gnu.org/g:ef8b60dd48faeaf2b4e28c35401fa10d2a3e53fb commit r14-10355-gef8b60dd48faeaf2b4e28c35401fa10d2a3e53fb Author: Kewen Lin Date: Thu Jun 20 20:23:56 2024 -0500 rs6000: Fix wrong RTL patterns for vector merge high/low word on LE Commit r12-4496 changes some define_expands and define_insns for vector merge high/low word, which are altivec_vmrg[hl]w, vsx_xxmrg[hl]w_. These defines are mainly for built-in function vec_merge{h,l}, __builtin_vsx_xxmrghw, __builtin_vsx_xxmrghw_4si and some internal gen function needs. These functions should consider endianness, taking vec_mergeh as example, as PVIPR defines, vec_mergeh "Merges the first halves (in element order) of two vectors", it does note it's in element order. So it's mapped into vmrghw on BE while vmrglw on LE respectively. Although the mapped insns are different, as the discussion in PR106069, the RTL pattern should be still the same, it is conformed before commit r12-4496, define_expand altivec_vmrghw got expanded into: (vec_select:VSX_W (vec_concat: (match_operand:VSX_W 1 "register_operand" "wa,v") (match_operand:VSX_W 2 "register_operand" "wa,v")) (parallel [(const_int 0) (const_int 4) (const_int 1) (const_int 5)])))] on both BE and LE then. But commit r12-4496 changed it to expand into: (vec_select:VSX_W (vec_concat: (match_operand:VSX_W 1 "register_operand" "wa,v") (match_operand:VSX_W 2 "register_operand" "wa,v")) (parallel [(const_int 0) (const_int 4) (const_int 1) (const_int 5)])))] on BE, and (vec_select:VSX_W (vec_concat: (match_operand:VSX_W 1 "register_operand" "wa,v") (match_operand:VSX_W 2 "register_operand" "wa,v")) (parallel [(const_int 2) (const_int 6) (const_int 3) (const_int 7)])))] on LE, although the mapped insn are still vmrghw on BE and vmrglw on LE, the associated RTL pattern is completely wrong and inconsistent with the mapped insn. If optimization passes leave this pattern alone, even if its pattern doesn't represent its mapped insn, it's still fine, that's why simple testing on bif doesn't expose this issue. But once some optimization pass such as combine does some changes basing on this wrong pattern, because the pattern doesn't match the semantics that the expanded insn is intended to represent, it would cause the unexpected result. So this patch is to fix the wrong RTL pattern, ensure the associated RTL patterns become the same as before which can have the same semantic as their mapped insns. With the proposed patch, the expanders like altivec_vmrghw expands into altivec_vmrghb_direct_be or altivec_vmrglb_direct_le depending on endianness, "direct" can easily show which insn would be generated, _be and _le are mainly for the different RTL patterns as endianness. Co-authored-by: Xionghu Luo PR target/106069 PR target/115355 gcc/ChangeLog: * config/rs6000/altivec.md (altivec_vmrghw_direct_): Rename to ... (altivec_vmrghw_direct__be): ... this. Add the condition BYTES_BIG_ENDIAN. (altivec_vmrghw_direct__le): New define_insn. (altivec_vmrglw_direct_): Rename to ... (altivec_vmrglw_direct__be): ... this. Add the condition BYTES_BIG_ENDIAN. (altivec_vmrglw_direct__le): New define_insn. (altivec_vmrghw): Adjust by calling gen_altivec_vmrghw_direct_v4si_be for BE and gen_altivec_vmrglw_direct_v4si_le for LE. (altivec_vmrglw): Adjust by calling gen_altivec_vmrglw_direct_v4si_be for BE and gen_altivec_vmrghw_direct_v4si_le for LE. (vec_widen_umult_hi_v8hi): Adjust the call to gen_altivec_vmrghw_direct_v4si by gen_altivec_vmrghw for BE and by gen_altivec_vmrglw for LE. (vec_widen_smult_hi_v8hi): Likewise. (vec_widen_umult_lo_v8hi): Adjust the call to gen_altivec_vmrglw_direct_v4si by gen_altivec_vmrglw for BE and by gen_altivec_vmrghw for LE (vec_widen_smult_lo_v8hi): Likewise. * config/rs6000/rs6000.cc (altivec_expand_vec_perm_const): Replace CODE_FOR_altivec_vmrghw_direct_v4si by CODE_FOR_altivec_vmrghw_direct_v4si_be for BE and CODE_FOR_altivec_vmrghw_direct_v4si_le for LE. And replace CODE_FOR_altivec_vmrglw_direct_v4si by CODE_FOR_altivec_vmrglw_direct_v4si_be for BE and CODE_FOR_altivec_vmrglw_direct_v4si_le for LE. * config/rs6000/vsx.md (vsx_xxmrghw_): Adjust by calling gen_altivec_vmrghw_d
[gcc r15-1701] i386: Some additional AVX512 ternlog refinements.
https://gcc.gnu.org/g:5938cf021e95b40b040974c9cbe7860399247f7f commit r15-1701-g5938cf021e95b40b040974c9cbe7860399247f7f Author: Roger Sayle Date: Fri Jun 28 07:12:53 2024 +0100 i386: Some additional AVX512 ternlog refinements. This patch is another round of refinements to fine tune the new ternlog infrastructure in i386's sse.md. This patch tweaks ix86_ternlog_idx to allow multiple MEM/CONST_VECTOR/VEC_DUPLICATE operands prior to splitting (before reload), when force_register is called on all but one of these operands. Conceptually during the dynamic programming, registers fill the args slots in the order 0, 1, 2, and mem-like operands fill the slots in the order 2, 0, 1 [preferring the memory operand to come last]. This patch allows us to remove some of the legacy ternlog patterns in sse.md without regressions [which is left to the next and final patch in this series]. An indication that these patterns are no longer required is shown by the necessary testsuite tweaks below, where the output assembler for the legacy instructions used hexadecimal, but with the new ternlog infrastructure now consistently use decimal. 2024-06-28 Roger Sayle gcc/ChangeLog * config/i386/i386-expand.cc (ix86_ternlog_idx) : Add a "goto do_mem_operand" as this need not match memory_operand. : Only args[2] may be volatile memory operand. Allow MEM/VEC_DUPLICATE/CONST_VECTOR as args[0] and args[1]. gcc/testsuite/ChangeLog * gcc.target/i386/avx512f-andn-di-zmm-2.c: Match decimal instead of hexadecimal immediate operand to ternlog. * gcc.target/i386/avx512f-andn-si-zmm-2.c: Likewise. * gcc.target/i386/avx512f-orn-si-zmm-1.c: Likewise. * gcc.target/i386/avx512f-orn-si-zmm-2.c: Likewise. * gcc.target/i386/pr100711-3.c: Likewise. * gcc.target/i386/pr100711-4.c: Likewise. * gcc.target/i386/pr100711-5.c: Likewise. Diff: --- gcc/config/i386/i386-expand.cc | 35 -- .../gcc.target/i386/avx512f-andn-di-zmm-2.c| 2 +- .../gcc.target/i386/avx512f-andn-si-zmm-2.c| 2 +- .../gcc.target/i386/avx512f-orn-si-zmm-1.c | 2 +- .../gcc.target/i386/avx512f-orn-si-zmm-2.c | 2 +- gcc/testsuite/gcc.target/i386/pr100711-3.c | 2 +- gcc/testsuite/gcc.target/i386/pr100711-4.c | 2 +- gcc/testsuite/gcc.target/i386/pr100711-5.c | 2 +- 8 files changed, 39 insertions(+), 10 deletions(-) diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc index eccad080f7c..dd2c3a8718e 100644 --- a/gcc/config/i386/i386-expand.cc +++ b/gcc/config/i386/i386-expand.cc @@ -25606,7 +25606,7 @@ ix86_ternlog_idx (rtx op, rtx *args) case VEC_DUPLICATE: if (!bcst_mem_operand (op, GET_MODE (op))) return -1; - /* FALLTHRU */ + goto do_mem_operand; case MEM: if (!memory_operand (op, GET_MODE (op))) @@ -25618,23 +25618,52 @@ ix86_ternlog_idx (rtx op, rtx *args) /* FALLTHRU */ case CONST_VECTOR: +do_mem_operand: if (!args[2]) { args[2] = op; return 0xaa; } /* Maximum of one volatile memory reference per expression. */ - if (side_effects_p (op) && side_effects_p (args[2])) + if (side_effects_p (op)) return -1; if (rtx_equal_p (op, args[2])) return 0xaa; - /* Check if one CONST_VECTOR is the ones-complement of the other. */ + /* Check if CONST_VECTOR is the ones-complement of args[2]. */ if (GET_CODE (op) == CONST_VECTOR && GET_CODE (args[2]) == CONST_VECTOR && rtx_equal_p (simplify_const_unary_operation (NOT, GET_MODE (op), op, GET_MODE (op)), args[2])) return 0x55; + if (!args[0]) + { + args[0] = op; + return 0xf0; + } + if (rtx_equal_p (op, args[0])) + return 0xf0; + /* Check if CONST_VECTOR is the ones-complement of args[0]. */ + if (GET_CODE (op) == CONST_VECTOR + && GET_CODE (args[0]) == CONST_VECTOR + && rtx_equal_p (simplify_const_unary_operation (NOT, GET_MODE (op), + op, GET_MODE (op)), + args[0])) + return 0x0f; + if (!args[1]) + { + args[1] = op; + return 0xcc; + } + if (rtx_equal_p (op, args[1])) + return 0xcc; + /* Check if CONST_VECTOR is the ones-complement of args[1]. */ + if (GET_CODE (op) == CONST_VECTOR + && GET_CODE (args[1]) == CONST_VECTOR + && rtx_equal_p (simplify_const_unary_operation (NOT, GET_MODE (op), + op, GET_MODE (op)), +
[gcc r15-1702] i386: Handle sign_extend like zero_extend in *concatditi3_[346]
https://gcc.gnu.org/g:07e915913b6b3d4e6e210f6dbc8e7e0e8ea594c4 commit r15-1702-g07e915913b6b3d4e6e210f6dbc8e7e0e8ea594c4 Author: Roger Sayle Date: Fri Jun 28 07:16:07 2024 +0100 i386: Handle sign_extend like zero_extend in *concatditi3_[346] This patch generalizes some of the patterns in i386.md that recognize double word concatenation, so they handle sign_extend the same way that they handle zero_extend in appropriate contexts. As a motivating example consider the following function: __int128 foo(long long x, unsigned long long y) { return ((__int128)x<<64) | y; } when compiled with -O2, x86_64 currently generates: foo:movq%rdi, %rdx xorl%eax, %eax xorl%edi, %edi orq %rsi, %rax orq %rdi, %rdx ret with this patch we now generate (the same as if x is unsigned): foo:movq%rsi, %rax movq%rdi, %rdx ret Treating both extensions the same way using any_extend is valid as the top (extended) bits are "unused" after the shift by 64 (or more). In theory, the RTL optimizers might consider canonicalizing the form of extension used in these cases, but zero_extend is faster on some machine, whereas sign extension is supported via addressing modes on others, so handling both in the machine description is probably best. 2024-06-28 Roger Sayle gcc/ChangeLog * config/i386/i386.md (*concat3_3): Change zero_extend to any_extend in first operand to left shift by mode precision. (*concat3_4): Likewise. (*concat3_6): Likewise. gcc/testsuite/ChangeLog * gcc.target/i386/concatditi-1.c: New test case. Diff: --- gcc/config/i386/i386.md | 6 +++--- gcc/testsuite/gcc.target/i386/concatditi-1.c | 10 ++ 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index fd48e764469..b6ccb1e798d 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -13446,7 +13446,7 @@ [(set (match_operand: 0 "nonimmediate_operand" "=ro,r,r,&r,x") (any_or_plus: (ashift: - (zero_extend: + (any_extend: (match_operand:DWIH 1 "nonimmediate_operand" "r,m,r,m,x")) (match_operand:QI 2 "const_int_operand")) (zero_extend: @@ -13473,7 +13473,7 @@ (zero_extend: (match_operand:DWIH 1 "nonimmediate_operand" "r,m,r,m")) (ashift: - (zero_extend: + (any_extend: (match_operand:DWIH 2 "nonimmediate_operand" "r,r,m,m")) (match_operand:QI 3 "const_int_operand"] "INTVAL (operands[3]) == * BITS_PER_UNIT" @@ -13520,7 +13520,7 @@ [(set (match_operand: 0 "nonimmediate_operand" "=r,o,o,r") (any_or_plus: (ashift: - (zero_extend: + (any_extend: (match_operand:DWIH 1 "nonimmediate_operand" "r,r,r,m")) (match_operand:QI 2 "const_int_operand")) (match_operand: 3 "const_scalar_int_operand" "n,n,Wd,n")))] diff --git a/gcc/testsuite/gcc.target/i386/concatditi-1.c b/gcc/testsuite/gcc.target/i386/concatditi-1.c new file mode 100644 index 000..25c2a95586b --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/concatditi-1.c @@ -0,0 +1,10 @@ +/* { dg-do compile { target int128 } } */ +/* { dg-options "-O2" } */ + +__int128 foo(long long x, unsigned long long y) +{ + return ((__int128)x<<64) | y; +} + +/* { dg-final { scan-assembler-not "xorl" } } */ +/* { dg-final { scan-assembler-not "orq" } } */