[gcc r15-3790] gcn/mkoffload.cc: Re-add fprintf for #include of stdlib.h/stdbool.h
https://gcc.gnu.org/g:dfb750798b07e7f412d52c22145ca8bce1911ac8 commit r15-3790-gdfb750798b07e7f412d52c22145ca8bce1911ac8 Author: Tobias Burnus Date: Mon Sep 23 10:24:05 2024 +0200 gcn/mkoffload.cc: Re-add fprintf for #include of stdlib.h/stdbool.h In commit r15-3629-g508ef585243d4674d06b0737bfe8769fc18f824f, #embed was added and no longer required fprintf '#include' removed, missing somehow that with -mstack-size=, the generated configure_stack_size will use 'setenv' and 'true'. gcc/ChangeLog: * config/gcn/mkoffload.cc (process_asm): (Re)add the fprintf lines for stdlib.h/stdbool.h inclusion if gcn_stack_size is used. Diff: --- gcc/config/gcn/mkoffload.cc | 6 ++ 1 file changed, 6 insertions(+) diff --git a/gcc/config/gcn/mkoffload.cc b/gcc/config/gcn/mkoffload.cc index 1f6337719e9d..1a524ced6535 100644 --- a/gcc/config/gcn/mkoffload.cc +++ b/gcc/config/gcn/mkoffload.cc @@ -613,6 +613,12 @@ process_asm (FILE *in, FILE *out, FILE *cfile) struct oaccdims *dims = XOBFINISH (&dims_os, struct oaccdims *); struct regcount *regcounts = XOBFINISH (®counts_os, struct regcount *); + if (gcn_stack_size) +{ + fprintf (cfile, "#include \n"); + fprintf (cfile, "#include \n\n"); +} + fprintf (cfile, "static const int gcn_num_vars = %d;\n\n", var_count); fprintf (cfile, "static const int gcn_num_ind_funcs = %d;\n\n", ind_fn_count);
[gcc r15-3791] tree-optimization/116791 - Elementwise SLP vectorization
https://gcc.gnu.org/g:723f7b6db841c1a101a2f5b3b6273d8449dae39e commit r15-3791-g723f7b6db841c1a101a2f5b3b6273d8449dae39e Author: Richard Biener Date: Mon Sep 23 10:13:17 2024 +0200 tree-optimization/116791 - Elementwise SLP vectorization The following restricts the elementwise SLP vectorization to the single-lane case which is the reason I enabled it to avoid regressions with non-SLP. The PR shows that multi-line SLP loads with elementwise accesses require work, I'll open a new bug to track this for the future. PR tree-optimization/116791 * tree-vect-stmts.cc (get_group_load_store_type): Only fall back to elementwise access for single-lane SLP, restore hard failure mode for other cases. * gcc.dg/vect/pr116791.c: New testcase. Diff: --- gcc/testsuite/gcc.dg/vect/pr116791.c | 20 gcc/tree-vect-stmts.cc | 23 +-- 2 files changed, 37 insertions(+), 6 deletions(-) diff --git a/gcc/testsuite/gcc.dg/vect/pr116791.c b/gcc/testsuite/gcc.dg/vect/pr116791.c new file mode 100644 index ..d9700a88fccb --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr116791.c @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-mavx2" { target avx2 } } */ + +struct nine_context { + unsigned tex_stage[8][33]; +}; +struct fvec4 { + float x[2]; +}; +void f(struct fvec4 *dst, struct nine_context *context) +{ + unsigned s; + for (s = 0; s < 8; ++s) +{ + float *rgba = &dst[s].x[0]; + unsigned color = context->tex_stage[s][0]; + rgba[0] = (float)((color >> 16) & 0xFF) / 0xFF; + rgba[1] = (float)((color >> 8) & 0xFF) / 0xFF; +} +} diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index b72b54d66687..ad08fbe55110 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -2190,12 +2190,23 @@ get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info, && single_element_p && maybe_gt (group_size, TYPE_VECTOR_SUBPARTS (vectype))) { - *memory_access_type = VMAT_ELEMENTWISE; - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, -"single-element interleaving not supported " -"for not adjacent vector loads, using " -"elementwise access\n"); + if (SLP_TREE_LANES (slp_node) == 1) + { + *memory_access_type = VMAT_ELEMENTWISE; + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, +"single-element interleaving not supported " +"for not adjacent vector loads, using " +"elementwise access\n"); + } + else + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, +"single-element interleaving not supported " +"for not adjacent vector loads\n"); + return false; + } } } }
[gcc r15-3792] middle-end: Insert invariant instructions before the gsi [PR116812]
https://gcc.gnu.org/g:09892448ebd8c396a26b2c09ba71f1e5a8dc42d7 commit r15-3792-g09892448ebd8c396a26b2c09ba71f1e5a8dc42d7 Author: Tamar Christina Date: Mon Sep 23 11:45:43 2024 +0100 middle-end: Insert invariant instructions before the gsi [PR116812] The new invariant statements should be inserted before the current statement and not after. This goes fine 99% of the time but when the current statement is a gcond the control flow gets corrupted. gcc/ChangeLog: PR tree-optimization/116812 * tree-vect-slp.cc (vect_slp_region): Fix insertion. gcc/testsuite/ChangeLog: PR tree-optimization/116812 * gcc.dg/vect/pr116812.c: New test. Diff: --- gcc/testsuite/gcc.dg/vect/pr116812.c | 17 + gcc/tree-vect-slp.cc | 6 ++ 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/gcc/testsuite/gcc.dg/vect/pr116812.c b/gcc/testsuite/gcc.dg/vect/pr116812.c new file mode 100644 index ..3e83c13d94bd --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr116812.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-O2 -fno-tree-dce -fno-tree-dse" } */ + +int a, b, c, d, e, f[2], g, h; +int k(int j) { return 2 >> a ? 2 >> a : a; } +int main() { + int i; + for (; g; g = k(d = 0)) +; + if (a) +b && h; + for (e = 0; e < 2; e++) +c = d & 1 ? d : 0; + for (i = 0; i < 2; i++) +f[i] = 0; + return 0; +} diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc index 600987dd6e5d..7161492f5114 100644 --- a/gcc/tree-vect-slp.cc +++ b/gcc/tree-vect-slp.cc @@ -9168,10 +9168,8 @@ vect_slp_region (vec bbs, vec datarefs, dump_printf_loc (MSG_NOTE, vect_location, "-->generating invariant statements\n"); - gimple_stmt_iterator gsi; - gsi = gsi_after_labels (bb_vinfo->bbs[0]); - gsi_insert_seq_after (&gsi, bb_vinfo->inv_pattern_def_seq, - GSI_CONTINUE_LINKING); + bb_vinfo->insert_seq_on_entry (NULL, +bb_vinfo->inv_pattern_def_seq); } } else
[gcc r15-3795] Add myself to write after approval
https://gcc.gnu.org/g:346f767fff859dd7fdd79b7f5e150d344e0f288c commit r15-3795-g346f767fff859dd7fdd79b7f5e150d344e0f288c Author: Saurabh Jha Date: Mon Sep 23 12:30:50 2024 +0100 Add myself to write after approval ChangeLog: * MAINTAINERS: Add myself to write after approval. Diff: --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index e9fafaf45a7e..0ea4db20f882 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -557,6 +557,7 @@ Andrew Jenner andrewjenner Haochen Jiang - Qian Jianhua- Michal Jiresmjires +Saurabh Jha - Janis Johnson janis Teresa Johnson tejohnson Kean Johnston -
[gcc r15-3796] c++: Don't ICE due to artificial constructor parameters [PR116722]
https://gcc.gnu.org/g:d7bf5e53887a467b8c5c8439e5aae3ad4e11e62e commit r15-3796-gd7bf5e53887a467b8c5c8439e5aae3ad4e11e62e Author: Simon Martin Date: Wed Sep 18 12:35:27 2024 +0200 c++: Don't ICE due to artificial constructor parameters [PR116722] The following code triggers an ICE === cut here === class base {}; class derived : virtual public base { public: template constexpr derived(Arg) {} }; int main() { derived obj(1.); } === cut here === The problem is that cxx_bind_parameters_in_call ends up attempting to convert a REAL_CST (the first non artificial parameter) to INTEGER_TYPE (the type of the __in_chrg parameter), which ICEs. This patch changes cxx_bind_parameters_in_call to return early if it's called with a *structor that has an __in_chrg or __vtt_parm parameter since the expression won't be a constant expression. Note that in the test case, the constructor is not constexpr-suitable, however it's OK since it's a template according to my read of paragraph (3) of [dcl.constexpr]. PR c++/116722 gcc/cp/ChangeLog: * constexpr.cc (cxx_bind_parameters_in_call): Leave early for {con,de}structors of classes with virtual bases. gcc/testsuite/ChangeLog: * g++.dg/cpp0x/constexpr-ctor22.C: New test. Diff: --- gcc/cp/constexpr.cc | 11 ++- gcc/testsuite/g++.dg/cpp0x/constexpr-ctor22.C | 15 +++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/gcc/cp/constexpr.cc b/gcc/cp/constexpr.cc index f6fd059be466..5c6696740fc9 100644 --- a/gcc/cp/constexpr.cc +++ b/gcc/cp/constexpr.cc @@ -1862,6 +1862,15 @@ cxx_bind_parameters_in_call (const constexpr_ctx *ctx, tree t, tree fun, int nparms = list_length (parms); int nbinds = nargs < nparms ? nargs : nparms; tree binds = make_tree_vec (nbinds); + + /* The call is not a constant expression if it involves the cdtor for a type + with virtual bases. */ + if (DECL_HAS_IN_CHARGE_PARM_P (fun) || DECL_HAS_VTT_PARM_P (fun)) +{ + *non_constant_p = true; + return binds; +} + for (i = 0; i < nargs; ++i) { tree x, arg; @@ -1871,7 +1880,7 @@ cxx_bind_parameters_in_call (const constexpr_ctx *ctx, tree t, tree fun, x = get_nth_callarg (t, i); /* For member function, the first argument is a pointer to the implied object. For a constructor, it might still be a dummy object, in - which case we get the real argument from ctx. */ +which case we get the real argument from ctx. */ if (i == 0 && DECL_CONSTRUCTOR_P (fun) && is_dummy_object (x)) { diff --git a/gcc/testsuite/g++.dg/cpp0x/constexpr-ctor22.C b/gcc/testsuite/g++.dg/cpp0x/constexpr-ctor22.C new file mode 100644 index ..279f6ec44547 --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp0x/constexpr-ctor22.C @@ -0,0 +1,15 @@ +// PR c++/116722 +// We're now accepting this in spite of the virtual base class. This is OK +// according to [dcl.constexpr] 3: "Except for instantiated constexpr functions +// non-templated constexpr functions shall be constexpr-suitable". +// { dg-do compile { target c++11 } } + +class base {}; +class derived : virtual public base { +public: + template + constexpr derived(Arg) {} +}; +int main() { + derived obj(1.); +}
[gcc r15-3793] tree-optimization/116796 - virtual LC SSA broken after unrolling
https://gcc.gnu.org/g:e97c75d668bacd8a2e901b819e00156f6e9f4c6c commit r15-3793-ge97c75d668bacd8a2e901b819e00156f6e9f4c6c Author: Richard Biener Date: Mon Sep 23 11:05:37 2024 +0200 tree-optimization/116796 - virtual LC SSA broken after unrolling When the unroller unloops loops it tracks whether it changes any nesting relationship of remaining loops but when scanning a loops preheader it fails to pass down the LC-SSA-invalidated bitmap, losing the fact that an unrolled formerly inner loop can now be placed on an exit of its outer loop. The following fixes that. PR tree-optimization/116796 * cfgloopmanip.cc (fix_loop_placements): Get LC-SSA-invalidated bitmap and pass it on. (remove_path): Pass LC-SSA-invalidated to fix_loop_placements. Diff: --- gcc/cfgloopmanip.cc | 10 ++ 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/gcc/cfgloopmanip.cc b/gcc/cfgloopmanip.cc index 3707db2fdb39..d37d351fdf3e 100644 --- a/gcc/cfgloopmanip.cc +++ b/gcc/cfgloopmanip.cc @@ -39,7 +39,7 @@ static void loop_redirect_edge (edge, basic_block); static void remove_bbs (basic_block *, int); static bool rpe_enum_p (const_basic_block, const void *); static int find_path (edge, basic_block **); -static void fix_loop_placements (class loop *, bool *); +static void fix_loop_placements (class loop *, bool *, bitmap); static bool fix_bb_placement (basic_block); static void fix_bb_placements (basic_block, bool *, bitmap); @@ -415,7 +415,8 @@ remove_path (edge e, bool *irred_invalidated, /* Fix placements of basic blocks inside loops and the placement of loops in the loop tree. */ fix_bb_placements (from, irred_invalidated, loop_closed_ssa_invalidated); - fix_loop_placements (from->loop_father, irred_invalidated); + fix_loop_placements (from->loop_father, irred_invalidated, + loop_closed_ssa_invalidated); if (local_irred_invalidated && loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS)) @@ -1048,7 +1049,8 @@ unloop (class loop *loop, bool *irred_invalidated, invalidate the information about irreducible regions. */ static void -fix_loop_placements (class loop *loop, bool *irred_invalidated) +fix_loop_placements (class loop *loop, bool *irred_invalidated, +bitmap loop_closed_ssa_invalidated) { class loop *outer; @@ -1064,7 +1066,7 @@ fix_loop_placements (class loop *loop, bool *irred_invalidated) to the loop. So call fix_bb_placements to fix up the placement of the preheader and (possibly) of its predecessors. */ fix_bb_placements (loop_preheader_edge (loop)->src, -irred_invalidated, NULL); +irred_invalidated, loop_closed_ssa_invalidated); loop = outer; } }
[gcc r15-3794] tree-optimization/116810 - out-of-bound access to matches[]
https://gcc.gnu.org/g:2c04f175de4f3985d783511662375d52f2cf4ad8 commit r15-3794-g2c04f175de4f3985d783511662375d52f2cf4ad8 Author: Richard Biener Date: Mon Sep 23 10:30:32 2024 +0200 tree-optimization/116810 - out-of-bound access to matches[] The following makes sure to apply forced splitting of groups for firced single-lane SLP only when the group being analyzed has more than one lane. This avoids an out-of-bound access to matches[]. PR tree-optimization/116810 * tree-vect-slp.cc (vect_build_slp_instance): Onlu force splitting for group_size > 1. Diff: --- gcc/tree-vect-slp.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc index 7161492f5114..ab49bb0e7ee1 100644 --- a/gcc/tree-vect-slp.cc +++ b/gcc/tree-vect-slp.cc @@ -3715,7 +3715,7 @@ vect_build_slp_instance (vec_info *vinfo, unsigned i; slp_tree node = NULL; - if (force_single_lane) + if (group_size > 1 && force_single_lane) { matches[0] = true; matches[1] = false;
[gcc r15-3809] [MAINTAINERS] Fix myself in order and add username
https://gcc.gnu.org/g:6141d0c98a518148a8a8c35dabd8ba053fbebf18 commit r15-3809-g6141d0c98a518148a8a8c35dabd8ba053fbebf18 Author: Saurabh Jha Date: Mon Sep 23 16:17:47 2024 +0100 [MAINTAINERS] Fix myself in order and add username ChangeLog: * MAINTAINERS: Fix sort order and add username. Diff: --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 0ea4db20f882..3b4cf9d20d80 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -554,10 +554,10 @@ Sam James sjames Surya Kumari Jangalajskumari Jakub Jelinek jakub Andrew Jenner andrewjenner +Saurabh Jha saurabhjha Haochen Jiang - Qian Jianhua- Michal Jiresmjires -Saurabh Jha - Janis Johnson janis Teresa Johnson tejohnson Kean Johnston -
[gcc r15-3810] aarch64: Add AdvSIMD faminmax intrinsics
https://gcc.gnu.org/g:bfefed6c5bb62648cf0303d377c06cb45ab1f24a commit r15-3810-gbfefed6c5bb62648cf0303d377c06cb45ab1f24a Author: Saurabh Jha Date: Tue Aug 6 16:34:49 2024 +0100 aarch64: Add AdvSIMD faminmax intrinsics The AArch64 FEAT_FAMINMAX extension is optional from Armv9.2-a and mandatory from Armv9.5-a. It introduces instructions for computing the floating point absolute maximum and minimum of the two vectors element-wise. This patch introduces AdvSIMD faminmax intrinsics. The intrinsics of this extension are implemented as the following builtin functions: * vamax_f16 * vamaxq_f16 * vamax_f32 * vamaxq_f32 * vamaxq_f64 * vamin_f16 * vaminq_f16 * vamin_f32 * vaminq_f32 * vaminq_f64 We are defining a new way to add AArch64 AdvSIMD intrinsics by listing all the intrinsics in a .def file and then using that .def file to initialise various data structures. This would lead to more concise code and easier addition of the new AdvSIMD intrinsics in future. The faminmax intrinsics are defined using the new approach. gcc/ChangeLog: * config/aarch64/aarch64-builtins.cc (ENTRY): Macro to parse the contents of aarch64-simd-pragma-builtins.def. (ENTRY_VHSDF): Macro to parse the contents of aarch64-simd-pragma-builtins.def. (enum aarch64_builtins): New enum values for faminmax builtins via aarch64-simd-pragma-builtins.def. (enum class aarch64_builtin_signatures): Enum class to specify the number of operands a builtin will take. (struct aarch64_pragma_builtins_data): Struct to hold data from aarch64-simd-pragma-builtins.def. (aarch64_fntype): New function to define function types of intrinsics given an object of type aarch64_pragma_builtins_data. (aarch64_init_pragma_builtins): New function to define pragma builtins. (aarch64_get_pragma_builtin): New function to get a row of aarch64_pragma_builtins, given code. (handle_arm_neon_h): Modify to call aarch64_init_pragma_builtins. (aarch64_general_check_builtin_call): Modify to check whether required flag is being used for pragma builtins. (aarch64_expand_pragma_builtin): New function to emit instructions of pragma_builtin. (aarch64_general_expand_builtin): Modify to call aarch64_expand_pragma_builtin. * config/aarch64/aarch64-option-extensions.def (AARCH64_OPT_EXTENSION): Introduce new flag for this extension. * config/aarch64/aarch64-simd.md (@aarch64_): Instruction pattern for faminmax intrinsics. * config/aarch64/aarch64.h (TARGET_FAMINMAX): Introduce new flag for this extension. * config/aarch64/iterators.md: New iterators and unspecs. * doc/invoke.texi: Document extension in AArch64 Options. * config/aarch64/aarch64-simd-pragma-builtins.def: New file to list pragma builtins. gcc/testsuite/ChangeLog: * gcc.target/aarch64/simd/faminmax-builtins-no-flag.c: New test. * gcc.target/aarch64/simd/faminmax-builtins.c: New test. Diff: --- gcc/config/aarch64/aarch64-builtins.cc | 119 + gcc/config/aarch64/aarch64-option-extensions.def | 2 + .../aarch64/aarch64-simd-pragma-builtins.def | 23 gcc/config/aarch64/aarch64-simd.md | 10 ++ gcc/config/aarch64/aarch64.h | 4 + gcc/config/aarch64/iterators.md| 9 ++ gcc/doc/invoke.texi| 2 + .../aarch64/simd/faminmax-builtins-no-flag.c | 10 ++ .../gcc.target/aarch64/simd/faminmax-builtins.c| 115 9 files changed, 294 insertions(+) diff --git a/gcc/config/aarch64/aarch64-builtins.cc b/gcc/config/aarch64/aarch64-builtins.cc index eb878b933fe5..6266bea3b39c 100644 --- a/gcc/config/aarch64/aarch64-builtins.cc +++ b/gcc/config/aarch64/aarch64-builtins.cc @@ -757,6 +757,18 @@ typedef struct #define VAR1(T, N, MAP, FLAG, A) \ AARCH64_SIMD_BUILTIN_##T##_##N##A, +#undef ENTRY +#define ENTRY(N, S, M, U, F) \ + AARCH64_##N, + +#undef ENTRY_VHSDF +#define ENTRY_VHSDF(NAME, SIGNATURE, UNSPEC, EXTENSIONS) \ + AARCH64_##NAME##_f16, \ + AARCH64_##NAME##q_f16, \ + AARCH64_##NAME##_f32, \ + AARCH64_##NAME##q_f32, \ + AARCH64_##NAME##q_f64, + enum aarch64_builtins { AARCH64_BUILTIN_MIN, @@ -829,6 +841,10 @@ enum aarch64_builtins AARCH64_RBIT, AARCH64_RBITL, AARCH64_RBITLL, + /* Pragma builtins. */ + AARCH64_PRAGMA_BUILTIN_START, +#include "aarch64-simd-pragma-builtins.def" + AARCH64_PRAGMA_BUILTIN_END, /* System register builtins. */ AARCH64_RSR, AARCH64_
[gcc r15-3811] aarch64: Add codegen support for AdvSIMD faminmax
https://gcc.gnu.org/g:c1fb78fb03caede01b02a1ebb3275ac98343d468 commit r15-3811-gc1fb78fb03caede01b02a1ebb3275ac98343d468 Author: Saurabh Jha Date: Wed Aug 7 12:34:20 2024 +0100 aarch64: Add codegen support for AdvSIMD faminmax The AArch64 FEAT_FAMINMAX extension is optional from Armv9.2-a and mandatory from Armv9.5-a. It introduces instructions for computing the floating point absolute maximum and minimum of the two vectors element-wise. This patch adds code generation support for famax and famin in terms of existing RTL operators. famax/famin is equivalent to first taking abs of the operands and then taking smax/smin on the results of abs. famax/famin (a, b) = smax/smin (abs (a), abs (b)) This fusion of operators is only possible when -march=armv9-a+faminmax flags are passed. We also need to pass -ffast-math flag; if we don't, then a statement like c[i] = __builtin_fmaxf16 (a[i], b[i]); is RTL expanded to UNSPEC_FMAXNM instead of smax (likewise for smin). This code generation is only available on -O2 or -O3 as that is when auto-vectorization is enabled. gcc/ChangeLog: * config/aarch64/aarch64-simd.md (*aarch64_faminmax_fused): Instruction pattern for faminmax codegen. * config/aarch64/iterators.md: Attribute for faminmax codegen. gcc/testsuite/ChangeLog: * gcc.target/aarch64/simd/faminmax-codegen-no-flag.c: New test. * gcc.target/aarch64/simd/faminmax-codegen.c: New test. * gcc.target/aarch64/simd/faminmax-no-codegen.c: New test. Diff: --- gcc/config/aarch64/aarch64-simd.md | 9 + gcc/config/aarch64/iterators.md| 3 + .../aarch64/simd/faminmax-codegen-no-flag.c| 217 + .../gcc.target/aarch64/simd/faminmax-codegen.c | 197 +++ .../gcc.target/aarch64/simd/faminmax-no-codegen.c | 267 + 5 files changed, 693 insertions(+) diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 67f0fe26f938..2a44aa3fcc33 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -9920,3 +9920,12 @@ "TARGET_FAMINMAX" "\t%0., %1., %2." ) + +(define_insn "*aarch64_faminmax_fused" + [(set (match_operand:VHSDF 0 "register_operand" "=w") + (FMAXMIN:VHSDF + (abs:VHSDF (match_operand:VHSDF 1 "register_operand" "w")) + (abs:VHSDF (match_operand:VHSDF 2 "register_operand" "w"] + "TARGET_FAMINMAX" + "\t%0., %1., %2." +) diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 17ac5e073aa1..c2fcd18306e4 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -4472,3 +4472,6 @@ (define_int_attr faminmax_uns_op [(UNSPEC_FAMAX "famax") (UNSPEC_FAMIN "famin")]) + +(define_code_attr faminmax_op + [(smax "famax") (smin "famin")]) diff --git a/gcc/testsuite/gcc.target/aarch64/simd/faminmax-codegen-no-flag.c b/gcc/testsuite/gcc.target/aarch64/simd/faminmax-codegen-no-flag.c new file mode 100644 index ..6688a7883b7d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/faminmax-codegen-no-flag.c @@ -0,0 +1,217 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-O3 -ffast-math -march=armv9-a" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "arm_neon.h" + +#pragma GCC target "+nosve" + +/* +** test_vamax_f16: +** fabsv1.4h, v1.4h +** fabsv0.4h, v0.4h +** fmaxnm v0.4h, v0.4h, v1.4h +** ret +*/ +float16x4_t +test_vamax_f16 (float16x4_t a, float16x4_t b) +{ + int i; + float16x4_t c; + + for (i = 0; i < 4; ++i) { +a[i] = __builtin_fabsf16 (a[i]); +b[i] = __builtin_fabsf16 (b[i]); +c[i] = __builtin_fmaxf16 (a[i], b[i]); + } + return c; +} + +/* +** test_vamaxq_f16: +** fabsv1.8h, v1.8h +** fabsv0.8h, v0.8h +** fmaxnm v0.8h, v0.8h, v1.8h +** ret +*/ +float16x8_t +test_vamaxq_f16 (float16x8_t a, float16x8_t b) +{ + int i; + float16x8_t c; + + for (i = 0; i < 8; ++i) { +a[i] = __builtin_fabsf16 (a[i]); +b[i] = __builtin_fabsf16 (b[i]); +c[i] = __builtin_fmaxf16 (a[i], b[i]); + } + return c; +} + +/* +** test_vamax_f32: +** fabsv1.2s, v1.2s +** fabsv0.2s, v0.2s +** fmaxnm v0.2s, v0.2s, v1.2s +** ret +*/ +float32x2_t +test_vamax_f32 (float32x2_t a, float32x2_t b) +{ + int i; + float32x2_t c; + + for (i = 0; i < 2; ++i) { +a[i] = __builtin_fabsf32 (a[i]); +b[i] = __builtin_fabsf32 (b[i]); +c[i] = __builtin_fmaxf32 (a[i], b[i]); + } + return c; +} + +/* +** test_vamaxq_f32: +** fabsv1.4s, v1.4s +** fabsv0.4s, v0.4s +** fmaxnm v0.4s, v0.4s, v1.4s +** ret +*/ +float32x4_t +test_vamaxq_f32 (float32x4_t a, float32x4_t b) +{ + int i; + float32x4_t c; + + for (i = 0
[gcc r15-3785] Update email in MAINTAINERS file.
https://gcc.gnu.org/g:52783489bce169f71cf7cf47ee435ebc6636675a commit r15-3785-g52783489bce169f71cf7cf47ee435ebc6636675a Author: Aldy Hernandez Date: Mon Sep 23 09:36:49 2024 +0200 Update email in MAINTAINERS file. ChangeLog: * MAINTAINERS: Update email and add myself to DCO. Diff: --- MAINTAINERS | 9 + 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index cfd96c9f33ec..e9fafaf45a7e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -116,7 +116,7 @@ riscv port Jim Wilson rs6000/powerpc port David Edelsohn rs6000/powerpc port Segher Boessenkool rs6000/powerpc port Kewen Lin -rs6000 vector extns Aldy Hernandez +rs6000 vector extns Aldy Hernandez rx port Nick Clifton s390 port Ulrich Weigand s390 port Andreas Krebbel @@ -213,7 +213,7 @@ c++ runtime libsJonathan Wakely c++ runtime libs special modes François Dumont fixincludes Bruce Korb *gimpl* Jakub Jelinek -*gimpl* Aldy Hernandez +*gimpl* Aldy Hernandez *gimpl* Jason Merrill gcse.cc Jeff Law global opt frameworkJeff Law @@ -240,7 +240,7 @@ option handling Joseph Myers middle-end Jeff Law middle-end Ian Lance Taylor middle-end Richard Biener -*vrp, rangerAldy Hernandez +*vrp, rangerAldy Hernandez *vrp, rangerAndrew MacLeod tree-ssaAndrew MacLeod tree browser/unparser Sebastian Pop @@ -518,7 +518,7 @@ Daniel Hellstromdanielh Fergus Henderson- Richard Henderson rth Stuart Hendersonshenders -Aldy Hernandez aldyh +Aldy Hernandez aldyh Philip Herron redbrain Marius Hillenbrand - Matthew Hiller - @@ -948,3 +948,4 @@ Jonathan Wakely Alexander Westbrooks Chung-Ju Wu Pengxuan Zheng +Aldy Hernandez
[gcc r15-3799] OpenMP: Fix omp_get_device_from_uid, minor cleanup
https://gcc.gnu.org/g:cdb9aa0f623ec7899da445a47f4a502b2987dc7b commit r15-3799-gcdb9aa0f623ec7899da445a47f4a502b2987dc7b Author: Tobias Burnus Date: Mon Sep 23 15:58:39 2024 +0200 OpenMP: Fix omp_get_device_from_uid, minor cleanup In Fortran, omp_get_device_from_uid can also accept substrings, which are then not NUL terminated. Fixed by introducing a fortran.c wrapper function. Additionally, in case of a fail the plugin functions now return NULL instead of failing fatally such that a fall-back UID is generated. gcc/ChangeLog: * omp-general.cc (omp_runtime_api_procname): Strip "omp_" from string; move get_device_from_uid as now a '_' suffix exists. libgomp/ChangeLog: * fortran.c (omp_get_device_from_uid_): New function. * libgomp.map (GOMP_6.0): Add it. * oacc-host.c (host_dispatch): Init '.uid' and '.get_uid_func'. * omp_lib.f90.in: Make it used by removing bind(C). * omp_lib.h.in: Likewise. * target.c (omp_get_device_from_uid): Ensure the device is initialized. * plugin/plugin-gcn.c (GOMP_OFFLOAD_get_uid): Add function comment; return NULL in case of an error. * plugin/plugin-nvptx.c (GOMP_OFFLOAD_get_uid): Likewise. * testsuite/libgomp.fortran/device_uid.f90: Update to test substrings. Diff: --- gcc/omp-general.cc | 4 ++-- libgomp/fortran.c| 18 ++ libgomp/libgomp.map | 1 + libgomp/oacc-host.c | 2 ++ libgomp/omp_lib.f90.in | 5 ++--- libgomp/omp_lib.h.in | 5 ++--- libgomp/plugin/plugin-gcn.c | 8 +++- libgomp/plugin/plugin-nvptx.c| 7 +-- libgomp/target.c | 7 +-- libgomp/testsuite/libgomp.fortran/device_uid.f90 | 18 -- 10 files changed, 60 insertions(+), 15 deletions(-) diff --git a/gcc/omp-general.cc b/gcc/omp-general.cc index 12788ad02490..3dfbc315056f 100644 --- a/gcc/omp-general.cc +++ b/gcc/omp-general.cc @@ -3260,7 +3260,6 @@ omp_runtime_api_procname (const char *name) "alloc", "calloc", "free", - "get_device_from_uid", "get_interop_int", "get_interop_ptr", "get_mapped_ptr", @@ -3290,6 +3289,7 @@ omp_runtime_api_procname (const char *name) "get_cancellation", "get_default_allocator", "get_default_device", + "get_device_from_uid", "get_device_num", "get_dynamic", "get_initial_device", @@ -3339,7 +3339,7 @@ omp_runtime_api_procname (const char *name) as DECL_NAME only omp_* and omp_*_8 appear. */ "display_env", "get_ancestor_thread_num", - "omp_get_uid_from_device", + "get_uid_from_device", "get_partition_place_nums", "get_place_num_procs", "get_place_proc_ids", diff --git a/libgomp/fortran.c b/libgomp/fortran.c index 9b7f093555b0..7976e5b9638d 100644 --- a/libgomp/fortran.c +++ b/libgomp/fortran.c @@ -834,6 +834,24 @@ omp_get_interop_rc_desc_ (const char **res, size_t *res_len, *res_len = *res ? strlen (*res) : 0; } +int +omp_get_device_from_uid_ (const char *uid, size_t uid_len) +{ +#ifndef LIBGOMP_OFFLOADED_ONLY + char *str = __builtin_alloca ((uid_len + 1) * sizeof (char)); + memcpy (str, uid, uid_len * sizeof (char)); + str[uid_len] = '\0'; + return omp_get_device_from_uid (str); +#else + /* Inside the target region, invoking this routine is undefined + behavior; thus, resolve it already here - instead of inside + libgomp/config/.../target.c. + Note that on nvptx __builtin_alloca is defined, but fails with a sorry + during compilation, as it is unsupported until isa 7.3 / sm_52. */ + return omp_invalid_device; +#endif +} + void omp_get_uid_from_device_ (const char **res, size_t *res_len, int32_t device_num) diff --git a/libgomp/libgomp.map b/libgomp/libgomp.map index 0023d3e1b6de..4530b3adc94e 100644 --- a/libgomp/libgomp.map +++ b/libgomp/libgomp.map @@ -446,6 +446,7 @@ GOMP_5.1.3 { GOMP_6.0 { global: omp_get_device_from_uid; + omp_get_device_from_uid_; omp_get_uid_from_device; omp_get_uid_from_device_; omp_get_uid_from_device_8_; diff --git a/libgomp/oacc-host.c b/libgomp/oacc-host.c index 5efdf7fb7968..7d4362ebd6ce 100644 --- a/libgomp/oacc-host.c +++ b/libgomp/oacc-host.c @@ -263,6 +263,7 @@ host_openacc_destroy_thread_data (void *tls_data __attribute__ ((unused))) static struct gomp_device_descr host_dispatch = { .name = "host", +.uid = NULL, .capabilities = (GOMP_OFFLOAD_CAP_SHARED_MEM | GOMP_OFFLOAD_CAP_NATIVE_EXEC | GOMP_OFFLOAD_CAP_OPENACC_200), @@ -270,6 +271,7 @@ static struct
[gcc r15-3798] arc: Remove mlra option [PR113954]
https://gcc.gnu.org/g:ffd861c808f307c865659b556dd5a8c922bd6a51 commit r15-3798-gffd861c808f307c865659b556dd5a8c922bd6a51 Author: Claudiu Zissulescu Date: Mon Sep 23 15:49:36 2024 +0300 arc: Remove mlra option [PR113954] The target dependent mlra option was designed to be able to quickly switch between LRA and reload. The reload register allocator step is scheduled for retirement, thus, remove the functionality of mlra, keeping it for backward compatibility. PR target/113954 gcc/ChangeLog: * config/arc/arc.cc (TARGET_LRA_P): Always return true. (arc_lra_p): Remove. * config/arc/arc.h (TARGET_LRA): Remove. * config/arc/arc.opt (mlra): Change it to do nothing. * doc/invoke.texi (mlra): Update option description. Signed-off-by: Claudiu Zissulescu Diff: --- gcc/config/arc/arc.cc | 10 +- gcc/config/arc/arc.h | 4 gcc/config/arc/arc.opt | 4 ++-- gcc/doc/invoke.texi| 4 +--- 4 files changed, 4 insertions(+), 18 deletions(-) diff --git a/gcc/config/arc/arc.cc b/gcc/config/arc/arc.cc index c800226b179b..a225adeff573 100644 --- a/gcc/config/arc/arc.cc +++ b/gcc/config/arc/arc.cc @@ -721,7 +721,7 @@ static rtx arc_legitimize_address_0 (rtx, rtx, machine_mode mode); arc_no_speculation_in_delay_slots_p #undef TARGET_LRA_P -#define TARGET_LRA_P arc_lra_p +#define TARGET_LRA_P hook_bool_void_true #define TARGET_REGISTER_PRIORITY arc_register_priority /* Stores with scaled offsets have different displacement ranges. */ #define TARGET_DIFFERENT_ADDR_DISPLACEMENT_P hook_bool_void_true @@ -10156,14 +10156,6 @@ arc_eh_uses (int regno) return false; } -/* Return true if we use LRA instead of reload pass. */ - -bool -arc_lra_p (void) -{ - return arc_lra_flag; -} - /* ??? Should we define TARGET_REGISTER_PRIORITY? We might perfer to use q registers, because some insn are shorter with them. OTOH we already have separate alternatives for this purpose, and other diff --git a/gcc/config/arc/arc.h b/gcc/config/arc/arc.h index 0a1ecb71d893..4cadef7a2b2b 100644 --- a/gcc/config/arc/arc.h +++ b/gcc/config/arc/arc.h @@ -1660,8 +1660,4 @@ enum /* The default option for BI/BIH instructions. */ #define DEFAULT_BRANCH_INDEX 0 -#ifndef TARGET_LRA -#define TARGET_LRA arc_lra_p() -#endif - #endif /* GCC_ARC_H */ diff --git a/gcc/config/arc/arc.opt b/gcc/config/arc/arc.opt index 5abb2977626d..7b9318335bec 100644 --- a/gcc/config/arc/arc.opt +++ b/gcc/config/arc/arc.opt @@ -401,8 +401,8 @@ Pass -marclinux_prof option through to linker. ;; lra is still unproven for ARC, so allow to fall back to reload with -mno-lra. mlra -Target Var(arc_lra_flag) Init(1) Save -Use LRA instead of reload. +Target Ignore +Does nothing. Preserved for backward compatibility. mlra-priority-none Target RejectNegative Var(arc_lra_priority_tag, ARC_LRA_PRIORITY_NONE) diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 032adfff5fc5..7e4f0ca7a620 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -22716,9 +22716,7 @@ the case. @opindex mlra @item -mlra -Enable Local Register Allocation. This is still experimental for ARC, -so by default the compiler uses standard reload -(i.e.@: @option{-mno-lra}). +Does nothing. Preserved for backward compatibility. @opindex mlra-priority-none @item -mlra-priority-none
[gcc r15-3800] aarch64: store signing key and signing method in DWARF _Unwind_FrameState
https://gcc.gnu.org/g:f531673917e4f80ad51eda0d806f0479c501a907 commit r15-3800-gf531673917e4f80ad51eda0d806f0479c501a907 Author: Matthieu Longo Date: Mon Sep 23 15:03:30 2024 +0100 aarch64: store signing key and signing method in DWARF _Unwind_FrameState This patch is only a refactoring of the existing implementation of PAuth and returned-address signing. The existing behavior is preserved. _Unwind_FrameState already contains several CIE and FDE information (see the attributes below the comment "The information we care about from the CIE/FDE" in libgcc/unwind-dw2.h). The patch aims at moving the information from DWARF CIE (signing key stored in the augmentation string) and FDE (the used signing method) into _Unwind_FrameState along the already-stored CIE and FDE information. Note: those information have to be saved in frame_state_reg_info instead of _Unwind_FrameState as they need to be savable by DW_CFA_remember_state and restorable by DW_CFA_restore_state, that both rely on the attribute "prev". Those new information in _Unwind_FrameState simplifies the look-up of the signing key when the return address is demangled. It also allows future signing methods to be easily added. _Unwind_FrameState is not a part of the public API of libunwind, so the change is backward compatible. A new architecture-specific handler MD_ARCH_EXTENSION_FRAME_INIT allows to reset values (if needed) in the frame state and unwind context before changing the frame state to the caller context. A new architecture-specific handler MD_ARCH_EXTENSION_CIE_AUG_HANDLER isolates the architecture-specific augmentation strings in AArch64 backend, and allows others architectures to reuse augmentation strings that would have clashed with AArch64 DWARF extensions. aarch64_demangle_return_addr, DW_CFA_AARCH64_negate_ra_state and DW_CFA_val_expression cases in libgcc/unwind-dw2-execute_cfa.h were documented to clarify where the value of the RA state register is stored (FS and CONTEXT respectively). libgcc/ChangeLog: * config/aarch64/aarch64-unwind.h (AARCH64_DWARF_RA_STATE_MASK): The mask for RA state register. (aarch64_ra_signing_method_t): The diversifiers used to sign a function's return address. (aarch64_pointer_auth_key): The key used to sign a function's return address. (aarch64_cie_signed_with_b_key): Deleted as the signing key is available now in _Unwind_FrameState. (MD_ARCH_EXTENSION_CIE_AUG_HANDLER): New CIE augmentation string handler for architecture extensions. (MD_ARCH_EXTENSION_FRAME_INIT): New architecture-extension initialization routine for DWARF frame state and context before execution of DWARF instructions. (aarch64_context_ra_state_get): Read RA state register from CONTEXT. (aarch64_ra_state_get): Read RA state register from FS. (aarch64_ra_state_set): Write RA state register into FS. (aarch64_ra_state_toggle): Toggle RA state register in FS. (aarch64_cie_aug_handler): Handler AArch64 augmentation strings. (aarch64_arch_extension_frame_init): Initialize defaults for the signing key (PAUTH_KEY_A), and RA state register (RA_no_signing). (aarch64_demangle_return_addr): Rely on the frame registers and the signing_key attribute in _Unwind_FrameState. * unwind-dw2-execute_cfa.h: Use the right alias DW_CFA_AARCH64_negate_ra_state for __aarch64__ instead of DW_CFA_GNU_window_save. (DW_CFA_AARCH64_negate_ra_state): Save the signing method in RA state register. Toggle RA state register without resetting 'how' to REG_UNSAVED. * unwind-dw2.c: (extract_cie_info): Save the signing key in the current _Unwind_FrameState while parsing the augmentation data. (uw_frame_state_for): Reset some attributes related to architecture extensions in _Unwind_FrameState. (uw_update_context): Move authentication code to AArch64 unwinding. * unwind-dw2.h (enum register_rule): Give a name to the existing enum for the register rules, and replace 'unsigned char' by 'enum register_rule' to facilitate debugging in GDB. (_Unwind_FrameState): Add a new architecture-extension attribute to store the signing key. Diff: --- libgcc/config/aarch64/aarch64-unwind.h | 145 +++-- libgcc/unwind-dw2-execute_cfa.h| 26 +++--- libgcc/unwind-dw2.c| 19 +++-- libgcc/unwind-dw2.h| 17 +++- 4 files changed, 159 insertions(+), 48 deletions(-) diff --git a/libgcc/config/a
[gcc r15-3802] libgcc: hide CIE and FDE data for DWARF architecture extensions behind a handler.
https://gcc.gnu.org/g:bdf41d627c13bc5f0dc676991f4513daa9d9ae36 commit r15-3802-gbdf41d627c13bc5f0dc676991f4513daa9d9ae36 Author: Matthieu Longo Date: Mon Sep 23 15:03:37 2024 +0100 libgcc: hide CIE and FDE data for DWARF architecture extensions behind a handler. This patch provides a new handler MD_ARCH_FRAME_STATE_T to hide an architecture-specific structure containing CIE and FDE data related to DWARF architecture extensions. Hiding the architecture-specific attributes behind a handler has the following benefits: 1. isolating those data from the generic ones in _Unwind_FrameState 2. avoiding casts to custom types. 3. preserving typing information when debugging with GDB, and so facilitating their printing. This approach required to add a new header md-unwind-def.h included at the top of libgcc/unwind-dw2.h, and redirecting to the corresponding architecture header via a symbolic link. An obvious drawback is the increase in complexity with macros, and headers. It also caused a split of architecture definitions between md-unwind-def.h (types definitions used in unwind-dw2.h) and md-unwind.h (local types definitions and handlers implementations). The naming of md-unwind.h with .h extension is a bit misleading as the file is only included in the middle of unwind-dw2.c. Changing this naming would require modification of others backends, which I prefered to abstain from. Overall the benefits are worth the added complexity from my perspective. libgcc/ChangeLog: * Makefile.in: New target for symbolic link to md-unwind-def.h * config.host: New parameter md_unwind_def_header. Set it to aarch64/aarch64-unwind-def.h for AArch64 targets, or no-unwind.h by default. * config/aarch64/aarch64-unwind.h (aarch64_pointer_auth_key): Move to aarch64-unwind-def.h (aarch64_cie_aug_handler): Update. (aarch64_arch_extension_frame_init): Update. (aarch64_demangle_return_addr): Update. * configure.ac: New substitute variable md_unwind_def_header. * unwind-dw2.h (defined): MD_ARCH_FRAME_STATE_T. * config/aarch64/aarch64-unwind-def.h: New file. * configure: Regenerate. * config/no-unwind.h: Updated comment Diff: --- libgcc/Makefile.in | 6 - libgcc/config.host | 13 -- libgcc/config/aarch64/aarch64-unwind-def.h | 41 ++ libgcc/config/aarch64/aarch64-unwind.h | 14 -- libgcc/config/no-unwind.h | 3 ++- libgcc/configure | 2 ++ libgcc/configure.ac| 1 + libgcc/unwind-dw2.h| 6 +++-- 8 files changed, 71 insertions(+), 15 deletions(-) diff --git a/libgcc/Makefile.in b/libgcc/Makefile.in index 0e46e9ef7686..ffc45f212672 100644 --- a/libgcc/Makefile.in +++ b/libgcc/Makefile.in @@ -47,6 +47,7 @@ with_aix_soname = @with_aix_soname@ solaris_ld_v2_maps = @solaris_ld_v2_maps@ enable_execute_stack = @enable_execute_stack@ unwind_header = @unwind_header@ +md_unwind_def_header = @md_unwind_def_header@ md_unwind_header = @md_unwind_header@ sfp_machine_header = @sfp_machine_header@ thread_header = @thread_header@ @@ -358,13 +359,16 @@ SHLIBUNWIND_INSTALL = # Create links to files specified in config.host. -LIBGCC_LINKS = enable-execute-stack.c unwind.h md-unwind-support.h \ +LIBGCC_LINKS = enable-execute-stack.c \ + unwind.h md-unwind-def.h md-unwind-support.h \ sfp-machine.h gthr-default.h enable-execute-stack.c: $(srcdir)/$(enable_execute_stack) -$(LN_S) $< $@ unwind.h: $(srcdir)/$(unwind_header) -$(LN_S) $< $@ +md-unwind-def.h: $(srcdir)/config/$(md_unwind_def_header) + -$(LN_S) $< $@ md-unwind-support.h: $(srcdir)/config/$(md_unwind_header) -$(LN_S) $< $@ sfp-machine.h: $(srcdir)/config/$(sfp_machine_header) diff --git a/libgcc/config.host b/libgcc/config.host index 4fb4205478a8..5c6b656531ff 100644 --- a/libgcc/config.host +++ b/libgcc/config.host @@ -51,8 +51,10 @@ # If either is set, EXTRA_PARTS and # EXTRA_MULTILIB_PARTS inherited from the GCC # subdirectory will be ignored. -# md_unwind_headerThe name of a header file defining -# MD_FALLBACK_FRAME_STATE_FOR. +# md_unwind_def_header The name of a header file defining architecture +# -specific frame information types for unwinding. +# md_unwind_headerThe name of a header file defining architecture +# -specific handlers used in the unwinder. # sfp_machine_header The name of a sfp-machine.h header file for soft-fp. # Defaults to "$cpu_type/sfp-machine.h" if it exists,
[gcc r15-3801] aarch64: skip copy of RA state register into target context
https://gcc.gnu.org/g:ba3e597681b640f6f9a676ec4f6cd3ca3878cefc commit r15-3801-gba3e597681b640f6f9a676ec4f6cd3ca3878cefc Author: Matthieu Longo Date: Mon Sep 23 15:03:35 2024 +0100 aarch64: skip copy of RA state register into target context The RA state register is local to a frame, so it should not be copied to the target frame during the context installation. This patch adds a new backend handler that check whether a register needs to be skipped or not before its installation. libgcc/ChangeLog: * config/aarch64/aarch64-unwind.h (MD_FRAME_LOCAL_REGISTER_P): new handler checking whether a register from the current context needs to be skipped before installation into the target context. (aarch64_frame_local_register): Likewise. * unwind-dw2.c (uw_install_context_1): use MD_FRAME_LOCAL_REGISTER_P. Diff: --- libgcc/config/aarch64/aarch64-unwind.h | 11 +++ libgcc/unwind-dw2.c| 5 + 2 files changed, 16 insertions(+) diff --git a/libgcc/config/aarch64/aarch64-unwind.h b/libgcc/config/aarch64/aarch64-unwind.h index 94ea5891b4eb..52bfd5409798 100644 --- a/libgcc/config/aarch64/aarch64-unwind.h +++ b/libgcc/config/aarch64/aarch64-unwind.h @@ -53,6 +53,9 @@ typedef enum { #define MD_DEMANGLE_RETURN_ADDR(context, fs, addr) \ aarch64_demangle_return_addr (context, fs, addr) +#define MD_FRAME_LOCAL_REGISTER_P(reg) \ + aarch64_frame_local_register (reg) + static inline aarch64_ra_signing_method_t aarch64_context_ra_state_get (struct _Unwind_Context *context) { @@ -127,6 +130,14 @@ aarch64_arch_extension_frame_init (struct _Unwind_Context *context ATTRIBUTE_UNU aarch64_fs_ra_state_set (fs, aarch64_ra_no_signing); } +/* Before copying the current context to the target context, check whether + the register is local to this context and should not be forwarded. */ +static inline bool +aarch64_frame_local_register(long reg) +{ + return (reg == AARCH64_DWARF_REGNUM_RA_STATE); +} + /* Do AArch64 private extraction on ADDR_WORD based on context info CONTEXT and unwind frame info FS. If ADDR_WORD is signed, we do address authentication on it using CFA of current frame. diff --git a/libgcc/unwind-dw2.c b/libgcc/unwind-dw2.c index 40d64c0c0a39..5f33f80670ac 100644 --- a/libgcc/unwind-dw2.c +++ b/libgcc/unwind-dw2.c @@ -1423,6 +1423,11 @@ uw_install_context_1 (struct _Unwind_Context *current, void *c = (void *) (_Unwind_Internal_Ptr) current->reg[i]; void *t = (void *) (_Unwind_Internal_Ptr)target->reg[i]; +#ifdef MD_FRAME_LOCAL_REGISTER_P + if (MD_FRAME_LOCAL_REGISTER_P (i)) + continue; +#endif + gcc_assert (current->by_value[i] == 0); if (target->by_value[i] && c) {
[gcc r15-3804] dwarf2: add hooks for architecture-specific CFIs
https://gcc.gnu.org/g:9e1c71bab50d51a1a8ec1a75080ffde6ca3d854c commit r15-3804-g9e1c71bab50d51a1a8ec1a75080ffde6ca3d854c Author: Matthieu Longo Date: Mon Sep 23 15:34:57 2024 +0100 dwarf2: add hooks for architecture-specific CFIs Architecture-specific CFI directives are currently declared an processed among others architecture-independent CFI directives in gcc/dwarf2* files. This approach creates confusion, specifically in the case of DWARF instructions in the vendor space and using the same instruction code. Such a clash currently happen between DW_CFA_GNU_window_save (used on SPARC) and DW_CFA_AARCH64_negate_ra_state (used on AArch64), and both having the same instruction code 0x2d. Then AArch64 compilers generates a SPARC CFI directive (.cfi_window_save) instead of .cfi_negate_ra_state, contrarilly to what is expected in [DWARF for the Arm 64-bit Architecture (AArch64)](https://github.com/ ARM-software/abi-aa/blob/main/aadwarf64/aadwarf64.rst). This refactoring does not solve completely the problem, but improve the situation by moving some of the processing of those directives (more specifically their output in the assembly) to the backend via 2 target hooks: - DW_CFI_OPRND1_DESC: parse the first operand of the directive (if any). - OUTPUT_CFI_DIRECTIVE: output the CFI directive as a string. Additionally, this patch also contains a renaming of an enum used for return address mangling on AArch64. gcc/ChangeLog: * config/aarch64/aarch64.cc (aarch64_output_cfi_directive): New hook for CFI directives. (aarch64_dw_cfi_oprnd1_desc): Same. (TARGET_OUTPUT_CFI_DIRECTIVE): Hook for output_cfi_directive. (TARGET_DW_CFI_OPRND1_DESC): Hook for dw_cfi_oprnd1_desc. * config/sparc/sparc.cc (sparc_output_cfi_directive): New hook for CFI directives. (sparc_dw_cfi_oprnd1_desc): Same. (TARGET_OUTPUT_CFI_DIRECTIVE): Hook for output_cfi_directive. (TARGET_DW_CFI_OPRND1_DESC): Hook for dw_cfi_oprnd1_desc. * coretypes.h (struct dw_cfi_node): Forward declaration of CFI type from gcc/dwarf2out.h. (enum dw_cfi_oprnd_type): Same. (enum dwarf_call_frame_info): Same. * doc/tm.texi: Regenerated from doc/tm.texi.in. * doc/tm.texi.in: Add doc for new target hooks. type of enum to allow forward declaration. * dwarf2cfi.cc (struct dw_cfi_row): Update the description for window_save and ra_mangled. (dwarf2out_frame_debug_cfa_negate_ra_state): Use AArch64 CFI directive instead of the SPARC one. (change_cfi_row): Use the right CFI directive's name for RA mangling. (output_cfi): Remove explicit architecture-specific CFI directive DW_CFA_GNU_window_save that falls into default case. (output_cfi_directive): Use target hook as default. * dwarf2out.cc (dw_cfi_oprnd1_desc): Use target hook as default. * dwarf2out.h (enum dw_cfi_oprnd_type): specify underlying type of enum to allow forward declaration. (dw_cfi_oprnd1_desc): Call target hook. (output_cfi_directive): Use dw_cfi_ref instead of struct dw_cfi_node *. * hooks.cc (hook_bool_dwcfi_dwcfioprndtyperef_false): New. (hook_bool_FILEptr_dwcfiptr_false): New. * hooks.h (hook_bool_dwcfi_dwcfioprndtyperef_false): New. (hook_bool_FILEptr_dwcfiptr_false): New. * target.def: Documentation for new hooks. include/ChangeLog: * dwarf2.h (enum dwarf_call_frame_info): specify underlying libffi/ChangeLog: * include/ffi_cfi.h (cfi_negate_ra_state): Declare AArch64 cfi directive. libgcc/ChangeLog: * config/aarch64/aarch64-asm.h (PACIASP): Replace SPARC CFI directive by AArch64 one. (AUTIASP): Same. libitm/ChangeLog: * config/aarch64/sjlj.S: Replace SPARC CFI directive by AArch64 one. gcc/testsuite/ChangeLog: * g++.target/aarch64/pr94515-1.C: Replace SPARC CFI directive by AArch64 one. * g++.target/aarch64/pr94515-2.C: Same. Diff: --- gcc/config/aarch64/aarch64.cc| 33 ++ gcc/config/sparc/sparc.cc| 35 gcc/coretypes.h | 6 + gcc/doc/tm.texi | 16 - gcc/doc/tm.texi.in | 5 +++- gcc/dwarf2cfi.cc | 31 gcc/dwarf2out.cc | 13 +++ gcc/dwarf2o
[gcc r15-3803] Rename REG_CFA_TOGGLE_RA_MANGLE to REG_CFA_NEGATE_RA_STATE
https://gcc.gnu.org/g:4068096fbf5aef65883a7492f4940cea85b39f40 commit r15-3803-g4068096fbf5aef65883a7492f4940cea85b39f40 Author: Matthieu Longo Date: Mon Sep 23 15:31:18 2024 +0100 Rename REG_CFA_TOGGLE_RA_MANGLE to REG_CFA_NEGATE_RA_STATE The current name REG_CFA_TOGGLE_RA_MANGLE is not representative of what it really is, i.e. a register to represent several states, not only a binary one. Same for dwarf2out_frame_debug_cfa_toggle_ra_mangle. gcc/ChangeLog: * combine-stack-adj.cc (no_unhandled_cfa): Rename. * config/aarch64/aarch64.cc (aarch64_expand_prologue): Rename. (aarch64_expand_epilogue): Rename. * dwarf2cfi.cc (dwarf2out_frame_debug_cfa_toggle_ra_mangle): Rename this... (dwarf2out_frame_debug_cfa_negate_ra_state): To this. (dwarf2out_frame_debug): Rename. * reg-notes.def (REG_CFA_NOTE): Rename REG_CFA_TOGGLE_RA_MANGLE. Diff: --- gcc/combine-stack-adj.cc | 2 +- gcc/config/aarch64/aarch64.cc | 4 ++-- gcc/dwarf2cfi.cc | 8 gcc/reg-notes.def | 8 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/gcc/combine-stack-adj.cc b/gcc/combine-stack-adj.cc index 2da9bf2bc1ef..367d3b66b749 100644 --- a/gcc/combine-stack-adj.cc +++ b/gcc/combine-stack-adj.cc @@ -212,7 +212,7 @@ no_unhandled_cfa (rtx_insn *insn) case REG_CFA_SET_VDRAP: case REG_CFA_WINDOW_SAVE: case REG_CFA_FLUSH_QUEUE: - case REG_CFA_TOGGLE_RA_MANGLE: + case REG_CFA_NEGATE_RA_STATE: return false; } diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index 68913beaee20..e41431d56ac4 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -9612,7 +9612,7 @@ aarch64_expand_prologue (void) default: gcc_unreachable (); } - add_reg_note (insn, REG_CFA_TOGGLE_RA_MANGLE, const0_rtx); + add_reg_note (insn, REG_CFA_NEGATE_RA_STATE, const0_rtx); RTX_FRAME_RELATED_P (insn) = 1; } @@ -10033,7 +10033,7 @@ aarch64_expand_epilogue (rtx_call_insn *sibcall) default: gcc_unreachable (); } - add_reg_note (insn, REG_CFA_TOGGLE_RA_MANGLE, const0_rtx); + add_reg_note (insn, REG_CFA_NEGATE_RA_STATE, const0_rtx); RTX_FRAME_RELATED_P (insn) = 1; } diff --git a/gcc/dwarf2cfi.cc b/gcc/dwarf2cfi.cc index 1231b5bb5f05..4ad9acbd6fd6 100644 --- a/gcc/dwarf2cfi.cc +++ b/gcc/dwarf2cfi.cc @@ -1547,13 +1547,13 @@ dwarf2out_frame_debug_cfa_window_save (void) cur_row->window_save = true; } -/* A subroutine of dwarf2out_frame_debug, process a REG_CFA_TOGGLE_RA_MANGLE. +/* A subroutine of dwarf2out_frame_debug, process a REG_CFA_NEGATE_RA_STATE. Note: DW_CFA_GNU_window_save dwarf opcode is reused for toggling RA mangle state, this is a target specific operation on AArch64 and can only be used on other targets if they don't use the window save operation otherwise. */ static void -dwarf2out_frame_debug_cfa_toggle_ra_mangle (void) +dwarf2out_frame_debug_cfa_negate_ra_state (void) { dw_cfi_ref cfi = new_cfi (); @@ -2341,8 +2341,8 @@ dwarf2out_frame_debug (rtx_insn *insn) handled_one = true; break; - case REG_CFA_TOGGLE_RA_MANGLE: - dwarf2out_frame_debug_cfa_toggle_ra_mangle (); + case REG_CFA_NEGATE_RA_STATE: + dwarf2out_frame_debug_cfa_negate_ra_state (); handled_one = true; break; diff --git a/gcc/reg-notes.def b/gcc/reg-notes.def index 5b878fb2a1cd..ddcf16b68be5 100644 --- a/gcc/reg-notes.def +++ b/gcc/reg-notes.def @@ -180,10 +180,10 @@ REG_CFA_NOTE (CFA_WINDOW_SAVE) the rest of the compiler as a CALL_INSN. */ REG_CFA_NOTE (CFA_FLUSH_QUEUE) -/* Attached to insns that are RTX_FRAME_RELATED_P, toggling the mangling status - of return address. Currently it's only used by AArch64. The argument is - ignored. */ -REG_CFA_NOTE (CFA_TOGGLE_RA_MANGLE) +/* Attached to insns that are RTX_FRAME_RELATED_P, indicating an authentication + of the return address. Currently it's only used by AArch64. + The argument is ignored. */ +REG_CFA_NOTE (CFA_NEGATE_RA_STATE) /* Indicates what exception region an INSN belongs in. This is used to indicate what region to which a call may throw. REGION 0
[gcc r15-3805] aarch64 testsuite: explain expectections for pr94515* tests
https://gcc.gnu.org/g:fb475d3f25943beffac8e9c0c78247bad75287a1 commit r15-3805-gfb475d3f25943beffac8e9c0c78247bad75287a1 Author: Matthieu Longo Date: Mon Sep 23 15:35:02 2024 +0100 aarch64 testsuite: explain expectections for pr94515* tests gcc/testsuite/ChangeLog: * g++.target/aarch64/pr94515-1.C: Improve test documentation. * g++.target/aarch64/pr94515-2.C: Same. Diff: --- gcc/testsuite/g++.target/aarch64/pr94515-1.C | 8 ++ gcc/testsuite/g++.target/aarch64/pr94515-2.C | 39 +++- 2 files changed, 41 insertions(+), 6 deletions(-) diff --git a/gcc/testsuite/g++.target/aarch64/pr94515-1.C b/gcc/testsuite/g++.target/aarch64/pr94515-1.C index d5c114a83a82..359039e17536 100644 --- a/gcc/testsuite/g++.target/aarch64/pr94515-1.C +++ b/gcc/testsuite/g++.target/aarch64/pr94515-1.C @@ -15,12 +15,20 @@ void unwind (void) __attribute__((noinline, noipa, target("branch-protection=pac-ret"))) int test (int z) { + // paciasp -> cfi_negate_ra_state: RA_no_signing -> RA_signing_SP if (z) { asm volatile ("":::"x20","x21"); unwind (); +// autiasp -> cfi_negate_ra_state: RA_signing_SP -> RA_no_signing return 1; } else { +// 2nd cfi_negate_ra_state because the CFI directives are processed linearily. +// At this point, the unwinder would believe that the address is not signed +// due to the previous return. That's why the compiler has to emit second +// cfi_negate_ra_state to mean that the return address is still signed. +// cfi_negate_ra_state: RA_no_signing -> RA_signing_SP unwind (); +// autiasp -> cfi_negate_ra_state: RA_signing_SP -> RA_no_signing return 2; } } diff --git a/gcc/testsuite/g++.target/aarch64/pr94515-2.C b/gcc/testsuite/g++.target/aarch64/pr94515-2.C index f4abeed4..bdb65411a080 100644 --- a/gcc/testsuite/g++.target/aarch64/pr94515-2.C +++ b/gcc/testsuite/g++.target/aarch64/pr94515-2.C @@ -6,6 +6,7 @@ volatile int zero = 0; int global = 0; +/* This is a leaf function, so no .cfi_negate_ra_state directive is expected. */ __attribute__((noinline)) int bar(void) { @@ -13,29 +14,55 @@ int bar(void) return 0; } +/* This function does not return normally, so the address is signed but no + * authentication code is emitted. It means that only one CFI directive is + * supposed to be emitted at signing time. */ __attribute__((noinline, noreturn)) void unwind (void) { throw 42; } +/* This function has several return instructions, and alternates different RA + * states. 4 .cfi_negate_ra_state and a .cfi_remember_state/.cfi_restore_state + * should be emitted. + * + * Expected layout: + * A: path to return 0 without assignment to global + * B: global=y + branch back into A + * C: return 2 + * D: unwind + * Which gives with return pointer authentication: + * A: sign -> authenticate [2 negate_ra_states + remember_state for B] + * B: signed [restore_state] + * C: unsigned [negate_ra_state] + * D: signed [negate_ra_state] + */ __attribute__((noinline, noipa)) int test(int x) { - if (x==1) return 2; /* This return path may not use the stack. */ + // This return path may not use the stack. This means that the return address + // won't be signed. + if (x==1) return 2; + + // All the return paths of the code below must have RA mangle state set, and + // the return address must be signed. int y = bar(); if (y > global) global=y; - if (y==3) unwind(); /* This return path must have RA mangle state set. */ - return 0; + if (y==3) unwind(); // authentication of the return address is not required. + return 0; // authentication of the return address is required. } +/* This function requires only 2 .cfi_negate_ra_state. */ int main () { + // paciasp -> cfi_negate_ra_state: RA_no_signing -> RA_signing_SP try { test (zero); -__builtin_abort (); +__builtin_abort (); // authentication of the return address is not required. } catch (...) { +// autiasp -> cfi_negate_ra_state: RA_signing_SP -> RA_no_signing return 0; } - __builtin_abort (); -} + __builtin_abort (); // authentication of the return address is not required. +} \ No newline at end of file
[gcc r15-3806] dwarf2: store the RA state in CFI row
https://gcc.gnu.org/g:2b7971448f122317ed012586f9f73ccc0537deb2 commit r15-3806-g2b7971448f122317ed012586f9f73ccc0537deb2 Author: Matthieu Longo Date: Mon Sep 23 15:35:07 2024 +0100 dwarf2: store the RA state in CFI row On AArch64, the RA state informs the unwinder whether the return address is mangled and how, or not. This information is encoded in a boolean in the CFI row. This binary approach prevents from expressing more complex configuration, as it is the case with PAuth_LR introduced in Armv9.5-A. This patch addresses this limitation by replacing the boolean by an enum. gcc/ChangeLog: * dwarf2cfi.cc (struct dw_cfi_row): Declare a new enum type to replace ra_mangled. (cfi_row_equal_p): Use ra_state instead of ra_mangled. (dwarf2out_frame_debug_cfa_negate_ra_state): Same. (change_cfi_row): Same. Diff: --- gcc/dwarf2cfi.cc | 24 ++-- 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/gcc/dwarf2cfi.cc b/gcc/dwarf2cfi.cc index f8d19d524299..1b94185a4966 100644 --- a/gcc/dwarf2cfi.cc +++ b/gcc/dwarf2cfi.cc @@ -57,6 +57,15 @@ along with GCC; see the file COPYING3. If not see #define DEFAULT_INCOMING_FRAME_SP_OFFSET INCOMING_FRAME_SP_OFFSET #endif + +/* Signing method used for return address authentication. + (AArch64 extension) */ +typedef enum +{ + ra_no_signing = 0x0, + ra_signing_sp = 0x1, +} ra_signing_method_t; + /* A collected description of an entire row of the abstract CFI table. */ struct GTY(()) dw_cfi_row { @@ -74,8 +83,8 @@ struct GTY(()) dw_cfi_row bool window_save; /* AArch64 extension for DW_CFA_AARCH64_negate_ra_state. - True if the return address is in a mangled state. */ - bool ra_mangled; + Enum which stores the return address state. */ + ra_signing_method_t ra_state; }; /* The caller's ORIG_REG is saved in SAVED_IN_REG. */ @@ -857,7 +866,7 @@ cfi_row_equal_p (dw_cfi_row *a, dw_cfi_row *b) if (a->window_save != b->window_save) return false; - if (a->ra_mangled != b->ra_mangled) + if (a->ra_state != b->ra_state) return false; return true; @@ -1554,8 +1563,11 @@ dwarf2out_frame_debug_cfa_negate_ra_state (void) { dw_cfi_ref cfi = new_cfi (); cfi->dw_cfi_opc = DW_CFA_AARCH64_negate_ra_state; + cur_row->ra_state += (cur_row->ra_state == ra_no_signing + ? ra_signing_sp + : ra_no_signing); add_cfi (cfi); - cur_row->ra_mangled = !cur_row->ra_mangled; } /* Record call frame debugging information for an expression EXPR, @@ -2412,12 +2424,12 @@ change_cfi_row (dw_cfi_row *old_row, dw_cfi_row *new_row) { dw_cfi_ref cfi = new_cfi (); - gcc_assert (!old_row->ra_mangled && !new_row->ra_mangled); + gcc_assert (!old_row->ra_state && !new_row->ra_state); cfi->dw_cfi_opc = DW_CFA_GNU_window_save; add_cfi (cfi); } - if (old_row->ra_mangled != new_row->ra_mangled) + if (old_row->ra_state != new_row->ra_state) { dw_cfi_ref cfi = new_cfi ();
[gcc r15-3807] libstdc++: operator new/delete are transaction_safe
https://gcc.gnu.org/g:2620e3727d9559ec03f9f967ecb68ed2e076a342 commit r15-3807-g2620e3727d9559ec03f9f967ecb68ed2e076a342 Author: Jason Merrill Date: Mon Sep 9 11:20:02 2024 -0400 libstdc++: operator new/delete are transaction_safe With the changes to #pragma system_header, g++.dg/tm/pr46270.C was failing because didn't implement the N4514 change to [new.delete] that says "The library versions of the global allocation and deallocation functions are declared transaction_safe (8.3.5 dcl.fct)." We already have the _GLIBCXX_TXN_SAFE macro, just need to add it. libstdc++-v3/ChangeLog: * libsupc++/new: Add _GLIBCXX_TXN_SAFE. Diff: --- libstdc++-v3/libsupc++/new | 56 +++--- 1 file changed, 38 insertions(+), 18 deletions(-) diff --git a/libstdc++-v3/libsupc++/new b/libstdc++-v3/libsupc++/new index b530bd1100df..e8b00505cf1e 100644 --- a/libstdc++-v3/libsupc++/new +++ b/libstdc++-v3/libsupc++/new @@ -132,49 +132,63 @@ namespace std * Placement new and delete signatures (take a memory address argument, * does nothing) may not be replaced by a user's program. */ -_GLIBCXX_NODISCARD void* operator new(std::size_t) _GLIBCXX_THROW (std::bad_alloc) +_GLIBCXX_NODISCARD void* operator new(std::size_t) + _GLIBCXX_TXN_SAFE _GLIBCXX_THROW (std::bad_alloc) __attribute__((__externally_visible__)); -_GLIBCXX_NODISCARD void* operator new[](std::size_t) _GLIBCXX_THROW (std::bad_alloc) +_GLIBCXX_NODISCARD void* operator new[](std::size_t) + _GLIBCXX_TXN_SAFE _GLIBCXX_THROW (std::bad_alloc) __attribute__((__externally_visible__)); -void operator delete(void*) _GLIBCXX_USE_NOEXCEPT +void operator delete(void*) _GLIBCXX_TXN_SAFE _GLIBCXX_USE_NOEXCEPT __attribute__((__externally_visible__)); -void operator delete[](void*) _GLIBCXX_USE_NOEXCEPT +void operator delete[](void*) _GLIBCXX_TXN_SAFE _GLIBCXX_USE_NOEXCEPT __attribute__((__externally_visible__)); #if __cpp_sized_deallocation -void operator delete(void*, std::size_t) _GLIBCXX_USE_NOEXCEPT +void operator delete(void*, std::size_t) + _GLIBCXX_TXN_SAFE _GLIBCXX_USE_NOEXCEPT __attribute__((__externally_visible__)); -void operator delete[](void*, std::size_t) _GLIBCXX_USE_NOEXCEPT +void operator delete[](void*, std::size_t) + _GLIBCXX_TXN_SAFE _GLIBCXX_USE_NOEXCEPT __attribute__((__externally_visible__)); #endif -_GLIBCXX_NODISCARD void* operator new(std::size_t, const std::nothrow_t&) _GLIBCXX_USE_NOEXCEPT +_GLIBCXX_NODISCARD void* operator new(std::size_t, const std::nothrow_t&) + _GLIBCXX_TXN_SAFE _GLIBCXX_USE_NOEXCEPT __attribute__((__externally_visible__, __alloc_size__ (1), __malloc__)); -_GLIBCXX_NODISCARD void* operator new[](std::size_t, const std::nothrow_t&) _GLIBCXX_USE_NOEXCEPT +_GLIBCXX_NODISCARD void* operator new[](std::size_t, const std::nothrow_t&) + _GLIBCXX_TXN_SAFE _GLIBCXX_USE_NOEXCEPT __attribute__((__externally_visible__, __alloc_size__ (1), __malloc__)); -void operator delete(void*, const std::nothrow_t&) _GLIBCXX_USE_NOEXCEPT +void operator delete(void*, const std::nothrow_t&) + _GLIBCXX_TXN_SAFE _GLIBCXX_USE_NOEXCEPT __attribute__((__externally_visible__)); -void operator delete[](void*, const std::nothrow_t&) _GLIBCXX_USE_NOEXCEPT +void operator delete[](void*, const std::nothrow_t&) + _GLIBCXX_TXN_SAFE _GLIBCXX_USE_NOEXCEPT __attribute__((__externally_visible__)); #if __cpp_aligned_new _GLIBCXX_NODISCARD void* operator new(std::size_t, std::align_val_t) + _GLIBCXX_TXN_SAFE __attribute__((__externally_visible__, __alloc_size__ (1), __malloc__)); _GLIBCXX_NODISCARD void* operator new(std::size_t, std::align_val_t, const std::nothrow_t&) + _GLIBCXX_TXN_SAFE _GLIBCXX_USE_NOEXCEPT __attribute__((__externally_visible__, __alloc_size__ (1), __malloc__)); -void operator delete(void*, std::align_val_t) +void operator delete(void*, std::align_val_t) _GLIBCXX_TXN_SAFE _GLIBCXX_USE_NOEXCEPT __attribute__((__externally_visible__)); void operator delete(void*, std::align_val_t, const std::nothrow_t&) + _GLIBCXX_TXN_SAFE _GLIBCXX_USE_NOEXCEPT __attribute__((__externally_visible__)); _GLIBCXX_NODISCARD void* operator new[](std::size_t, std::align_val_t) + _GLIBCXX_TXN_SAFE __attribute__((__externally_visible__, __alloc_size__ (1), __malloc__)); _GLIBCXX_NODISCARD void* operator new[](std::size_t, std::align_val_t, const std::nothrow_t&) + _GLIBCXX_TXN_SAFE _GLIBCXX_USE_NOEXCEPT __attribute__((__externally_visible__, __alloc_size__ (1), __malloc__)); -void operator delete[](void*, std::align_val_t) +void operator delete[](void*, std::align_val_t) _GLIBCXX_TXN_SAFE _GLIBCXX_USE_NOEXCEPT __attribute__((__externally_visible__)); void operator delete[](void*, std::align_val_t, const std::nothrow_t&) + _GLIBCXX_TXN_SAFE _GLIBCXX_USE_NOEXCEPT __attribute__((__externally_visible__)); #if __cpp_sized_deallocation -void operator delete(void*, std::size_t, std::align_val_t) +void operato
[gcc r15-3808] libstdc++: guard c++config pragmas
https://gcc.gnu.org/g:358db2e3ed4acf44282d1d9ebbc4a1a3b6e38d21 commit r15-3808-g358db2e3ed4acf44282d1d9ebbc4a1a3b6e38d21 Author: Jason Merrill Date: Sat Sep 21 13:30:31 2024 -0400 libstdc++: guard c++config pragmas c++config needs to be compilable as C, in which mode we complain about the -Wc++ pragmas. libstdc++-v3/ChangeLog: * include/bits/c++config: Don't try to disable -Wc++??-extensions when compiling as C. Diff: --- libstdc++-v3/include/bits/c++config | 2 ++ 1 file changed, 2 insertions(+) diff --git a/libstdc++-v3/include/bits/c++config b/libstdc++-v3/include/bits/c++config index 66d03cfd0375..16c67b80e769 100644 --- a/libstdc++-v3/include/bits/c++config +++ b/libstdc++-v3/include/bits/c++config @@ -34,8 +34,10 @@ #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wvariadic-macros" +#if __cplusplus #pragma GCC diagnostic ignored "-Wc++11-extensions" #pragma GCC diagnostic ignored "-Wc++23-extensions" // bf16 +#endif // The major release number for the GCC release the C++ library belongs to. #define _GLIBCXX_RELEASE
[gcc r15-3797] c++: Don't crash when mangling member with anonymous union or template type [PR100632, PR109790]
https://gcc.gnu.org/g:a030fcad4f9f490a08db0a4cad4c22635a0585c1 commit r15-3797-ga030fcad4f9f490a08db0a4cad4c22635a0585c1 Author: Simon Martin Date: Mon Sep 16 13:45:32 2024 +0200 c++: Don't crash when mangling member with anonymous union or template type [PR100632, PR109790] We currently crash upon mangling members that have an anonymous union or a template operator type. The problem is that before calling write_unqualified_name, write_member_name asserts that it has a declaration whose DECL_NAME is an identifier node that is not that of an operator. This is wrong: - In PR100632, it's an anonymous union declaration, hence a 0 DECL_NAME - In PR109790, it's a legitimate template declaration for an operator (this was accepted up to GCC 10) This assert was added via r11-6301, to be sure that we do write the "on" marker for operator members. This patch removes that assert and instead - Lets members with an anonymous union type go through - For operators, adds the missing "on" marker for ABI versions greater than the highest usable with GCC 10 PR c++/109790 PR c++/100632 gcc/cp/ChangeLog: * mangle.cc (write_member_name): Handle members whose type is an anonymous union member. Write missing "on" marker for operators when ABI version is at least 16. gcc/testsuite/ChangeLog: * g++.dg/cpp0x/decltype83.C: New test. * g++.dg/cpp0x/decltype83a.C: New test. * g++.dg/cpp1y/lambda-ice3.C: New test. * g++.dg/cpp1y/lambda-ice3a.C: New test. * g++.dg/cpp2a/nontype-class67.C: New test. Diff: --- gcc/cp/mangle.cc | 8 +++- gcc/testsuite/g++.dg/cpp0x/decltype83.C | 20 gcc/testsuite/g++.dg/cpp0x/decltype83a.C | 18 ++ gcc/testsuite/g++.dg/cpp1y/lambda-ice3.C | 19 +++ gcc/testsuite/g++.dg/cpp1y/lambda-ice3a.C| 17 + gcc/testsuite/g++.dg/cpp2a/nontype-class67.C | 9 + 6 files changed, 90 insertions(+), 1 deletion(-) diff --git a/gcc/cp/mangle.cc b/gcc/cp/mangle.cc index 46dc6923adde..17988d69e1ea 100644 --- a/gcc/cp/mangle.cc +++ b/gcc/cp/mangle.cc @@ -3255,7 +3255,13 @@ write_member_name (tree member) } else if (DECL_P (member)) { - gcc_assert (!DECL_OVERLOADED_OPERATOR_P (member)); + if (ANON_AGGR_TYPE_P (TREE_TYPE (member))) + ; + else if (DECL_OVERLOADED_OPERATOR_P (member)) + { + if (abi_check (16)) + write_string ("on"); + } write_unqualified_name (member); } else if (TREE_CODE (member) == TEMPLATE_ID_EXPR) diff --git a/gcc/testsuite/g++.dg/cpp0x/decltype83.C b/gcc/testsuite/g++.dg/cpp0x/decltype83.C new file mode 100644 index ..b71a302d5ebb --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp0x/decltype83.C @@ -0,0 +1,20 @@ +// PR c++/109790 +// This used to work until GCC 10; force the usage of ABI 15 (the highest +// usable in GCC 10) and check that the mangling (actually wrong; see +// decltyp83a.C) matches that of GCC 10's default ABI version (14). + +// { dg-do compile { target c++11 } } +// { dg-additional-options "-fabi-version=15" } + +struct A { + template void operator+(T); +}; + +template +decltype(&A::operator+) f(); + +int main() { + f(); +} + +// { dg-final { scan-assembler "_Z1fIiEDTadsr1AplIT_EEv" } } diff --git a/gcc/testsuite/g++.dg/cpp0x/decltype83a.C b/gcc/testsuite/g++.dg/cpp0x/decltype83a.C new file mode 100644 index ..27c363651f13 --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp0x/decltype83a.C @@ -0,0 +1,18 @@ +// PR c++/109790 +// Up to GCC 10, the mangling would be missing the "on" marker, hence be wrong. +// Check that this is fixed with the latest ABI. + +// { dg-do compile { target c++11 } } + +struct A { + template void operator+(T); +}; + +template +decltype(&A::operator+) f(); + +int main() { + f(); +} + +// { dg-final { scan-assembler "_Z1fIiEDTadsr1AonplIT_EEv" } } diff --git a/gcc/testsuite/g++.dg/cpp1y/lambda-ice3.C b/gcc/testsuite/g++.dg/cpp1y/lambda-ice3.C new file mode 100644 index ..b6a2056724fd --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp1y/lambda-ice3.C @@ -0,0 +1,19 @@ +// PR c++/109790 +// This used to work until GCC 10; force the usage of ABI 15 (the highest +// usable in GCC 10) and check that the mangling (actually wrong; see +// lambda-ice3a.C) matches that of GCC 10's default ABI version (14). + +// { dg-do compile { target c++14 } } +// { dg-additional-options "-fabi-version=15" } + +auto ll = [](auto ... ){}; +template + void mm(void (_Impl::*__p)(_Args) const); +template +using __impl_for = decltype(mm(&decltype(ll)::operator()<_Ts>)); +template __impl_for<_Ts> f() { } +void aaa() { + f(); +} + +// { dg-final { scan-assembler "_Z1fIiEDTcl2mmadsrN2llMUlDpT_E_EclIT_EEEv" } } diff --git
[gcc r15-3789] Genmatch: Fix ICE for binary phi cfg mismatching [PR116795]
https://gcc.gnu.org/g:999363c53cf6cd87f75f96388a531e090299b97d commit r15-3789-g999363c53cf6cd87f75f96388a531e090299b97d Author: Pan Li Date: Sat Sep 21 22:30:18 2024 +0800 Genmatch: Fix ICE for binary phi cfg mismatching [PR116795] This patch would like to fix one ICE when try to match the binary phi for below cfg. We check the first edge of the Phi block comes from b0, instead of check the only one edge of b1 comes from the b0 too. Thus, it will result in some code to be recog as .SAT_SUB but it is not, and finally result the verify_ssa failure. +--+ | b0: | | def | +-+ | ... | | b1: | | cond |-->| def | +--+ | ... | | +-+ | | | | v | +-+ | | b2: | | | Phi |<--+ +-+ The below test suites are passed for this patch. * The rv64gcv fully regression test. * The x86 bootstrap test. * The x86 fully regression test. PR target/116795 gcc/ChangeLog: * gimple-match-head.cc (match_cond_with_binary_phi): Fix the incorrect cfg check as b0->b1 in above example. gcc/testsuite/ChangeLog: * gcc.dg/torture/pr116795-1.c: New test. Signed-off-by: Pan Li Diff: --- gcc/gimple-match-head.cc | 2 +- gcc/testsuite/gcc.dg/torture/pr116795-1.c | 14 ++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/gcc/gimple-match-head.cc b/gcc/gimple-match-head.cc index b63b66e9485a..b5d4a71ddc55 100644 --- a/gcc/gimple-match-head.cc +++ b/gcc/gimple-match-head.cc @@ -402,7 +402,7 @@ match_cond_with_binary_phi (gphi *phi, tree *true_arg, tree *false_arg) if (EDGE_COUNT (pred_b0->succs) == 2 && EDGE_COUNT (pred_b1->succs) == 1 && EDGE_COUNT (pred_b1->preds) == 1 - && pred_b0 == EDGE_PRED (gimple_bb (phi), 0)->src) + && pred_b0 == EDGE_PRED (pred_b1, 0)->src) /* * +--+ * | b0: | diff --git a/gcc/testsuite/gcc.dg/torture/pr116795-1.c b/gcc/testsuite/gcc.dg/torture/pr116795-1.c new file mode 100644 index ..629bdf4bacda --- /dev/null +++ b/gcc/testsuite/gcc.dg/torture/pr116795-1.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O3" } */ + +volatile int a, b; +int c; +int main() { + unsigned e = 0; + for (; e < 2; e++) { +a && b; +if (c) + e = -(c ^ e); + } + return 0; +}
[gcc r15-3786] Remove commented out PHI_ARG_DEF macro defition
https://gcc.gnu.org/g:0d68bfe2bff7e7022e173bedfb918b796d3cb0af commit r15-3786-g0d68bfe2bff7e7022e173bedfb918b796d3cb0af Author: Andrew Pinski Date: Sun Sep 22 18:12:36 2024 + Remove commented out PHI_ARG_DEF macro defition This was commented out since r0-125500-g80560f9521f81a and a new defition was added at the same time. Let's remove the commented out version. gcc/ChangeLog: * tree-ssa-operands.h (PHI_ARG_DEF): Remove definition. Signed-off-by: Andrew Pinski Diff: --- gcc/tree-ssa-operands.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/gcc/tree-ssa-operands.h b/gcc/tree-ssa-operands.h index b6534f18c660..f368d5b59f8c 100644 --- a/gcc/tree-ssa-operands.h +++ b/gcc/tree-ssa-operands.h @@ -74,9 +74,6 @@ struct GTY(()) ssa_operands { #define PHI_RESULT(PHI)gimple_phi_result (PHI) #define SET_PHI_RESULT(PHI, V) SET_DEF (gimple_phi_result_ptr (PHI), (V)) -/* -#define PHI_ARG_DEF(PHI, I)USE_FROM_PTR (PHI_ARG_DEF_PTR ((PHI), (I))) -*/ #define PHI_ARG_DEF_PTR(PHI, I)gimple_phi_arg_imm_use_ptr ((PHI), (I)) #define PHI_ARG_DEF(PHI, I)gimple_phi_arg_def ((PHI), (I)) #define SET_PHI_ARG_DEF(PHI, I, V) \
[gcc r15-3788] gimple: Simplify gimple_seq_nondebug_singleton_p
https://gcc.gnu.org/g:831137be51a54715b73a6178dbfb28215d1963df commit r15-3788-g831137be51a54715b73a6178dbfb28215d1963df Author: Andrew Pinski Date: Sun Sep 22 13:18:30 2024 -0700 gimple: Simplify gimple_seq_nondebug_singleton_p The implementation of gimple_seq_nondebug_singleton_p was convoluted on how to determine if the sequence was a singleton (which could contain debug statements). This simplifies the function into two calls. One to get the start after all of the debug statements and then check to see if it is at the one before the end (or there is only debug statements afterwards). Bootstrapped and tested on x86_64-linux-gnu (including ada). gcc/ChangeLog: * gimple-iterator.h (gimple_seq_nondebug_singleton_p): Rewrite to be simplely, gsi_start_nondebug/gsi_one_nondebug_before_end_p. Signed-off-by: Andrew Pinski Diff: --- gcc/gimple-iterator.h | 23 ++- 1 file changed, 2 insertions(+), 21 deletions(-) diff --git a/gcc/gimple-iterator.h b/gcc/gimple-iterator.h index 501f0549d925..97176d639d9f 100644 --- a/gcc/gimple-iterator.h +++ b/gcc/gimple-iterator.h @@ -430,28 +430,9 @@ gsi_seq (gimple_stmt_iterator i) inline bool gimple_seq_nondebug_singleton_p (gimple_seq seq) { - gimple_stmt_iterator gsi; - - /* Find a nondebug gimple. */ - gsi.ptr = gimple_seq_first (seq); - gsi.seq = &seq; - gsi.bb = NULL; - while (!gsi_end_p (gsi) -&& is_gimple_debug (gsi_stmt (gsi))) -gsi_next (&gsi); - - /* No nondebug gimple found, not a singleton. */ - if (gsi_end_p (gsi)) -return false; - - /* Find a next nondebug gimple. */ - gsi_next (&gsi); - while (!gsi_end_p (gsi) -&& is_gimple_debug (gsi_stmt (gsi))) -gsi_next (&gsi); + gimple_stmt_iterator gsi = gsi_start_nondebug (seq); - /* Only a singleton if there's no next nondebug gimple. */ - return gsi_end_p (gsi); + return gsi_one_nondebug_before_end_p (gsi); } #endif /* GCC_GIMPLE_ITERATOR_H */
[gcc r15-3787] gimple: Remove custom remove_pointer
https://gcc.gnu.org/g:2cd76720c1584b55b05570894f602f05f1fc48ec commit r15-3787-g2cd76720c1584b55b05570894f602f05f1fc48ec Author: Andrew Pinski Date: Sun Sep 22 19:26:02 2024 + gimple: Remove custom remove_pointer Since r11-2700-g22dc89f8073cd0, type_traits has been included via system.h so we don't need a custom version for gimple.h. Note a small C++14 cleanup is to use remove_pointer_t directly here instead of remove_pointer::type. bootstrapped and tested on x86_64-linux-gnu gcc/ChangeLog: * gimple.h (remove_pointer): Remove. (GIMPLE_CHECK2): Use std::remove_pointer instead of custom one. Signed-off-by: Andrew Pinski Diff: --- gcc/gimple.h | 8 ++-- 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/gcc/gimple.h b/gcc/gimple.h index ee986eaf1539..4a6e0e97d1e7 100644 --- a/gcc/gimple.h +++ b/gcc/gimple.h @@ -37,10 +37,6 @@ enum gimple_code { extern const char *const gimple_code_name[]; extern const unsigned char gimple_rhs_class_table[]; -/* Strip the outermost pointer, from tr1/type_traits. */ -template struct remove_pointer { typedef T type; }; -template struct remove_pointer { typedef T type; }; - /* Error out if a gimple tuple is addressed incorrectly. */ #if defined ENABLE_GIMPLE_CHECKING #define gcc_gimple_checking_assert(EXPR) gcc_assert (EXPR) @@ -72,7 +68,7 @@ GIMPLE_CHECK2(const gimple *gs, T ret = dyn_cast (gs); if (!ret) gimple_check_failed (gs, file, line, fun, -remove_pointer::type::code_, ERROR_MARK); +std::remove_pointer::type::code_, ERROR_MARK); return ret; } template @@ -91,7 +87,7 @@ GIMPLE_CHECK2(gimple *gs, T ret = dyn_cast (gs); if (!ret) gimple_check_failed (gs, file, line, fun, -remove_pointer::type::code_, ERROR_MARK); +std::remove_pointer::type::code_, ERROR_MARK); return ret; } #else /* not ENABLE_GIMPLE_CHECKING */
[gcc r15-3814] modula2: Add noreturn attribute to m2/gm2-libs/M2RTS.mod
https://gcc.gnu.org/g:5ef52ec422b8b92539baaa761ad878ccb6f82db8 commit r15-3814-g5ef52ec422b8b92539baaa761ad878ccb6f82db8 Author: Gaius Mulley Date: Tue Sep 24 00:28:19 2024 +0100 modula2: Add noreturn attribute to m2/gm2-libs/M2RTS.mod This patch removes a build warning by adding a noreturn attribute to the M2RTS.mod:HaltC procedure. Also add an infinite loop to gm2-libs-min/M2RTS.mod. gcc/m2/ChangeLog: * Make-lang.in (m2/gm2-libs-boot/M2RTS.o): Remove --suppress-noreturn. * gm2-libs/M2RTS.mod (HaltC): Add noreturn attribute. * gm2-libs-min/M2RTS.mod (HALT): Add LOOP END. Signed-off-by: Gaius Mulley Diff: --- gcc/m2/Make-lang.in | 2 +- gcc/m2/gm2-libs-min/M2RTS.mod | 2 ++ gcc/m2/gm2-libs/M2RTS.mod | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/gcc/m2/Make-lang.in b/gcc/m2/Make-lang.in index 02754e98c572..480c4fb28a7e 100644 --- a/gcc/m2/Make-lang.in +++ b/gcc/m2/Make-lang.in @@ -1477,7 +1477,7 @@ mcflex.c: $(srcdir)/m2/mc/mc.flex m2/gm2-libs-boot/M2RTS.o: $(srcdir)/m2/gm2-libs/M2RTS.mod $(MCDEPS) $(BUILD-BOOT-H) -test -d $(@D)/$(DEPDIR) || $(mkinstalldirs) $(@D)/$(DEPDIR) - $(MC) --suppress-noreturn -o=m2/gm2-libs-boot/M2RTS.c $(srcdir)/m2/gm2-libs/M2RTS.mod + $(MC) -o=m2/gm2-libs-boot/M2RTS.c $(srcdir)/m2/gm2-libs/M2RTS.mod $(COMPILER) $(CM2DEP) -c -DIN_GCC $(CFLAGS) $(GM2_PICFLAGS) $(MC_SRC_FLAGS) -I$(srcdir)/m2 -Im2 -I. -Im2/gm2-libs-boot -I$(srcdir)/m2/gm2-libs-ch $(MCINCLUDES) $(INCLUDES) m2/gm2-libs-boot/M2RTS.c -o $@ $(POSTCOMPILE) diff --git a/gcc/m2/gm2-libs-min/M2RTS.mod b/gcc/m2/gm2-libs-min/M2RTS.mod index ae8cbfffbec9..1f04a027f82c 100644 --- a/gcc/m2/gm2-libs-min/M2RTS.mod +++ b/gcc/m2/gm2-libs-min/M2RTS.mod @@ -69,6 +69,8 @@ END ExecuteInitialProcedures ; PROCEDURE HALT ; BEGIN + LOOP + END END HALT ; diff --git a/gcc/m2/gm2-libs/M2RTS.mod b/gcc/m2/gm2-libs/M2RTS.mod index 41add830766e..5ea4d17c02c2 100644 --- a/gcc/m2/gm2-libs/M2RTS.mod +++ b/gcc/m2/gm2-libs/M2RTS.mod @@ -286,7 +286,7 @@ END ErrorMessageC ; to stderr and calls exit (1). *) -PROCEDURE HaltC (description, filename, function: ADDRESS; line: CARDINAL) ; +PROCEDURE HaltC (description, filename, function: ADDRESS; line: CARDINAL) <* noreturn *> ; BEGIN ErrorMessageC (description, filename, line, function) END HaltC ;
[gcc r14-10705] libstdc++: Fix std::allocator_traits::construct constraints [PR108619]
https://gcc.gnu.org/g:1be3e4e43839d313364ffa99012ada41b4fae8da commit r14-10705-g1be3e4e43839d313364ffa99012ada41b4fae8da Author: Jonathan Wakely Date: Wed Jul 10 23:14:19 2024 +0100 libstdc++: Fix std::allocator_traits::construct constraints [PR108619] Using std::is_constructible in the constraints introduces a spurious dependency on the type being destructible, which should not be required for constructing with an allocator. The test case shows a case where the type has a private destructor, which can be destroyed by the allocator, but std::is_destructible and std::is_constructible are false. Similarly, using is_nothrow_constructible in the noexcept-specifiers for the construct members of allocator_traits and std::allocator, __gnu_cxx::__new_allocator, and __gnu_cxx::__malloc_allocator gives the wrong answer if the type isn't destructible. We need a new type trait to define those correctly, so that we only check if the placement new-expression is nothrow after using is_constructible to check that it would be well-formed. On trunk all members of std::allocator_traits were rewritten in terms of 'if constexpr' using variable templates and the detection idiom. For the release branch this backport only changes the 'construct' member. Although we can use 'if constexpr' and variable templates in C++11 with appropriate uses of diagnostic pragmas, we can't have constexpr functions with multiple return statements. This means that in C++11 mode the _S_nothrow_construct helper used for noexcept-specifiers still needs to be a pair of overloads using enable_if. libstdc++-v3/ChangeLog: PR libstdc++/108619 * include/bits/alloc_traits.h (__allocator_traits_base): Add variable templates for detecting whether the allocator has a construct member, or if placement new can be used instead. (allocator_traits::__construct_helper): Remove. (allocator_traits::__has_construct): Remove. (allocator_traits::construct): Use 'if constexpr' instead of dispatching to overloads constrained with enable_if. (allocator_traits>::construct): Use _Construct if construct_at is not supported. Use __is_nothrow_new_constructible for noexcept-specifier. (allocator_traits>::construct): Use __is_nothrow_new_constructible for noexcept-specifier. * include/bits/new_allocator.h (construct): Likewise. * include/ext/malloc_allocator.h (construct): Likewise. * include/std/type_traits (__is_nothrow_new_constructible): New variable template. * testsuite/20_util/allocator/89510.cc: Adjust expected results. * testsuite/ext/malloc_allocator/89510.cc: Likewise. * testsuite/ext/new_allocator/89510.cc: Likewise. * testsuite/20_util/allocator_traits/members/108619.cc: New test. (cherry picked from commit 8cf51d7516b92b352c358c14ab4e456ae53c3371) Diff: --- libstdc++-v3/include/bits/alloc_traits.h | 131 + libstdc++-v3/include/bits/new_allocator.h | 2 +- libstdc++-v3/include/ext/malloc_allocator.h| 2 +- libstdc++-v3/include/std/type_traits | 15 +++ libstdc++-v3/testsuite/20_util/allocator/89510.cc | 14 +-- .../20_util/allocator_traits/members/108619.cc | 35 ++ .../testsuite/ext/malloc_allocator/89510.cc| 14 +-- libstdc++-v3/testsuite/ext/new_allocator/89510.cc | 14 +-- 8 files changed, 154 insertions(+), 73 deletions(-) diff --git a/libstdc++-v3/include/bits/alloc_traits.h b/libstdc++-v3/include/bits/alloc_traits.h index 82fc79c7b9f9..a81b286eee70 100644 --- a/libstdc++-v3/include/bits/alloc_traits.h +++ b/libstdc++-v3/include/bits/alloc_traits.h @@ -48,6 +48,11 @@ namespace std _GLIBCXX_VISIBILITY(default) _GLIBCXX_BEGIN_NAMESPACE_VERSION #if __cplusplus >= 201103L + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wc++14-extensions" // for variable templates +#pragma GCC diagnostic ignored "-Wc++17-extensions" // for if-constexpr + /// @cond undocumented struct __allocator_traits_base { @@ -89,6 +94,39 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION using __pocs = typename _Tp::propagate_on_container_swap; template using __equal = __type_identity; + +// __has_construct is true if a.construct(p, args...) is well-formed. +// __can_construct is true if either __has_construct is true, or if +// a placement new-expression for T(args...) is well-formed. We use this +// to constrain allocator_traits::construct, as a libstdc++ extension. +template + using __construct_t + = decltype(std::declval<_Alloc&>().construct(std::declval<_Tp*>(), +std::declval<_Args>()...)); +template +
[gcc r14-10706] libstdc++: Document missing features for old std:string ABI [PR116777]
https://gcc.gnu.org/g:74f67ba3dcfd09209bed6265772d5fd9007ea605 commit r14-10706-g74f67ba3dcfd09209bed6265772d5fd9007ea605 Author: Jonathan Wakely Date: Fri Sep 20 17:35:48 2024 +0100 libstdc++: Document missing features for old std:string ABI [PR116777] There are several features that are not supported when using the old std::string ABI. It's possible that PR 81967 will get fixed, but the missing C++20 features almost certainly won't be. Document this in the manual. libstdc++-v3/ChangeLog: PR libstdc++/116777 * doc/xml/manual/using.xml: Document features that are not supported for the gcc4-compatible ABI. * doc/html/manual/using_dual_abi.html: Regenerate. (cherry picked from commit 82309222300acf68e345b32155df21e1b876144e) Diff: --- libstdc++-v3/doc/html/manual/using_dual_abi.html | 18 +++- libstdc++-v3/doc/xml/manual/using.xml| 26 +++- 2 files changed, 42 insertions(+), 2 deletions(-) diff --git a/libstdc++-v3/doc/html/manual/using_dual_abi.html b/libstdc++-v3/doc/html/manual/using_dual_abi.html index 916ac575f64b..939eedae3629 100644 --- a/libstdc++-v3/doc/html/manual/using_dual_abi.html +++ b/libstdc++-v3/doc/html/manual/using_dual_abi.html @@ -22,7 +22,7 @@ of the macro is 1 which causes the new ABI to be active, so to use the old ABI you must explicitly define the macro to 0 before including any library headers. - (Be aware that some GNU/Linux distributions configure GCC 5 differently so + (Be aware that some GNU/Linux distributions configured GCC 5 differently so that the default value of the macro is 0 and users must define it to 1 to enable the new ABI.) Although the changes were made for C++11 conformance, the choice of ABI @@ -72,6 +72,22 @@ Handlers for std::exception will always catch iostreams exceptions, because the old and new type both inherit from std::exception. + + Some features are not supported when using the old ABI, including: + +Using std::string::const_iterator for +positional arguments to member functions such as +std::string::erase. + +Allocator propagation in std::string. + +Using std::string at compile-time in +constexpr functions. + +Class std::chrono::time_zone and all related APIs. + +Theheader. + Troubleshooting If you get linker errors about undefined references to symbols that involve types in the std::__cxx11 namespace or the tag [abi:cxx11] then it probably indicates that you are trying to diff --git a/libstdc++-v3/doc/xml/manual/using.xml b/libstdc++-v3/doc/xml/manual/using.xml index 2baa99205173..92d3c0ed6b86 100644 --- a/libstdc++-v3/doc/xml/manual/using.xml +++ b/libstdc++-v3/doc/xml/manual/using.xml @@ -1356,7 +1356,7 @@ g++ -Winvalid-pch -I. -include stdc++.h -H -g -O2 hello.cc -o test.exe of the macro is 1 which causes the new ABI to be active, so to use the old ABI you must explicitly define the macro to 0 before including any library headers. - (Be aware that some GNU/Linux distributions configure GCC 5 differently so + (Be aware that some GNU/Linux distributions configured GCC 5 differently so that the default value of the macro is 0 and users must define it to 1 to enable the new ABI.) @@ -1416,6 +1416,30 @@ g++ -Winvalid-pch -I. -include stdc++.h -H -g -O2 hello.cc -o test.exe std::exception. + + Some features are not supported when using the old ABI, including: + + +Using std::string::const_iterator for +positional arguments to member functions such as +std::string::erase. + + +Allocator propagation in std::string. + + +Using std::string at compile-time in +constexpr functions. + + +Class std::chrono::time_zone and all related APIs. + + +The header. + + + + Troubleshooting If you get linker errors about undefined references to symbols
[gcc r14-10707] libstdc++: Fix condition for ranges::copy to use memmove [PR116754]
https://gcc.gnu.org/g:f4fc6d2aa809d0c4b5b1825af17bed4a601a3d7f commit r14-10707-gf4fc6d2aa809d0c4b5b1825af17bed4a601a3d7f Author: Jonathan Wakely Date: Wed Sep 18 17:47:49 2024 +0100 libstdc++: Fix condition for ranges::copy to use memmove [PR116754] libstdc++-v3/ChangeLog: PR libstdc++/116754 * include/bits/ranges_algobase.h (__copy_or_move): Fix order of arguments to __memcpyable. (cherry picked from commit 83c6fe130a00c6c28cfffcc787a0a719966adfaf) Diff: --- libstdc++-v3/include/bits/ranges_algobase.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libstdc++-v3/include/bits/ranges_algobase.h b/libstdc++-v3/include/bits/ranges_algobase.h index e26a73a27d68..7387f616d361 100644 --- a/libstdc++-v3/include/bits/ranges_algobase.h +++ b/libstdc++-v3/include/bits/ranges_algobase.h @@ -252,7 +252,7 @@ namespace ranges { if (!std::__is_constant_evaluated()) { - if constexpr (__memcpyable<_Iter, _Out>::__value) + if constexpr (__memcpyable<_Out, _Iter>::__value) { using _ValueTypeI = iter_value_t<_Iter>; static_assert(_IsMove
[gcc r15-3812] c++: diagnose this specifier in requires expr [PR116798]
https://gcc.gnu.org/g:4700ad1c78ccd7767f846802fca148b2ea9a1852 commit r15-3812-g4700ad1c78ccd7767f846802fca148b2ea9a1852 Author: Marek Polacek Date: Mon Sep 23 12:19:40 2024 -0400 c++: diagnose this specifier in requires expr [PR116798] We don't detect an explicit object parameter in a requires expression. We can get there by way of requires-expression -> requirement-parameter-list -> parameter-declaration-clause -> ... -> parameter-declaration with this[opt]. But [dcl.fct]/5 doesn't allow an explicit object parameter in this context. So let's fix it like r14-9033 and not like r14-8832. PR c++/116798 gcc/cp/ChangeLog: * parser.cc (cp_parser_parameter_declaration): Detect an explicit object parameter in a requires expression. gcc/testsuite/ChangeLog: * g++.dg/cpp23/explicit-obj-diagnostics12.C: New test. Reviewed-by: Jason Merrill Diff: --- gcc/cp/parser.cc| 11 --- gcc/testsuite/g++.dg/cpp23/explicit-obj-diagnostics12.C | 10 ++ 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/gcc/cp/parser.cc b/gcc/cp/parser.cc index 4dd9474cf609..dbc607027dfb 100644 --- a/gcc/cp/parser.cc +++ b/gcc/cp/parser.cc @@ -25982,10 +25982,15 @@ cp_parser_parameter_declaration (cp_parser *parser, bool xobj_param_p = decl_spec_seq_has_spec_p (&decl_specifiers, ds_this); - if (xobj_param_p && template_parm_p) + if (xobj_param_p + && (template_parm_p || current_binding_level->requires_expression)) { - error_at (decl_specifiers.locations[ds_this], - "% specifier in template parameter declaration"); + if (template_parm_p) + error_at (decl_specifiers.locations[ds_this], + "% specifier in template parameter declaration"); + else + error_at (decl_specifiers.locations[ds_this], + "% specifier in a requires-expression parameter"); xobj_param_p = false; decl_specifiers.locations[ds_this] = 0; } diff --git a/gcc/testsuite/g++.dg/cpp23/explicit-obj-diagnostics12.C b/gcc/testsuite/g++.dg/cpp23/explicit-obj-diagnostics12.C new file mode 100644 index ..ec0aced0fd9c --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp23/explicit-obj-diagnostics12.C @@ -0,0 +1,10 @@ +// PR c++/116798 +// { dg-do compile { target c++23 } } + +template +concept C = requires(this T u, // { dg-error "'this' specifier in a requires-expression parameter" } +this T v) { // { dg-error "'this' specifier in a requires-expression parameter" } +u + v; +}; + +static_assert(C);
[gcc r15-3813] libobjc: Fix typos
https://gcc.gnu.org/g:0121b852c85db91babdb346f277ca6220eb7df86 commit r15-3813-g0121b852c85db91babdb346f277ca6220eb7df86 Author: Andrew Kreimer Date: Fri Sep 20 11:40:32 2024 +0300 libobjc: Fix typos Fix typos in comments. libobjc/ChangeLog: * Makefile.in: s/overrridden/overridden. * encoding.c (_darwin_rs6000_special_round_type_align): Fix typo in comment. (rs6000_special_round_type_align): Likewise. * exception.c (is_kind_of_exception_matcher): Likewise. (PERSONALITY_FUNCTION): Likewise. * hash.c (objc_hash_next): Likewise. * init.c (__objc_create_classes_tree): Likewise. * objc-private/objc-list.h (list_remove_head): Likewise. * sendmsg.c (__objc_install_dtable_for_class): Likewise. * thr.c (objc_thread_yield): Likewise. Signed-off-by: Andrew Kreimer Diff: --- libobjc/Makefile.in | 2 +- libobjc/encoding.c | 4 ++-- libobjc/exception.c | 4 ++-- libobjc/hash.c | 2 +- libobjc/init.c | 2 +- libobjc/objc-private/objc-list.h | 2 +- libobjc/sendmsg.c| 2 +- libobjc/thr.c| 2 +- 8 files changed, 10 insertions(+), 10 deletions(-) diff --git a/libobjc/Makefile.in b/libobjc/Makefile.in index 58d0638f72e4..3d856eb8d5f1 100644 --- a/libobjc/Makefile.in +++ b/libobjc/Makefile.in @@ -59,7 +59,7 @@ MULTIDO = true MULTICLEAN = true # Not configured per top-level version, since that doesn't get passed -# down at configure time, but overrridden by the top-level install +# down at configure time, but overridden by the top-level install # target. INSTALL = @INSTALL@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ diff --git a/libobjc/encoding.c b/libobjc/encoding.c index 7a2d2abe6d14..f4fc4f452e48 100644 --- a/libobjc/encoding.c +++ b/libobjc/encoding.c @@ -151,7 +151,7 @@ static int __attribute__ ((__unused__)) not_target_flags = 0; # undef TARGET_ALIGN_NATURAL # define TARGET_ALIGN_NATURAL 1 # endif -/* On Darwin32, we need to recurse until we find the starting stuct type. */ +/* On Darwin32, we need to recurse until we find the starting struct type. */ static int _darwin_rs6000_special_round_type_align (const char *struc, int comp, int spec) { @@ -186,7 +186,7 @@ _darwin_rs6000_special_round_type_align (const char *struc, int comp, int spec) /* FIXME: while this file has no business including tm.h, this definitely has no business defining this macro but it -is only way around without really rewritting this file, +is only way around without really rewriting this file, should look after the branch of 3.4 to fix this. */ #define rs6000_special_round_type_align(STRUCT, COMPUTED, SPECIFIED) \ ({ const char *_fields = TYPE_FIELDS (STRUCT); \ diff --git a/libobjc/exception.c b/libobjc/exception.c index f051c5f9524d..dce576e8559b 100644 --- a/libobjc/exception.c +++ b/libobjc/exception.c @@ -42,7 +42,7 @@ is_kind_of_exception_matcher (Class catch_class, id exception) return 1; /* If exception is nil (eg, @throw nil;), then it can only be - catched by a catch-all (eg, @catch (id object)). */ + caught by a catch-all (eg, @catch (id object)). */ if (exception != nil) { Class c; @@ -384,7 +384,7 @@ PERSONALITY_FUNCTION (int version, #endif /* __USING_SJLJ_EXCEPTIONS__ */ /* If ip is not present in the table, C++ would call terminate. */ - /* ??? As with Java, it's perhaps better to tweek the LSDA to that + /* ??? As with Java, it's perhaps better to tweak the LSDA to that no-action is mapped to no-entry. */ CONTINUE_UNWINDING; diff --git a/libobjc/hash.c b/libobjc/hash.c index e216c8cdf3b9..e0ecf30e0244 100644 --- a/libobjc/hash.c +++ b/libobjc/hash.c @@ -222,7 +222,7 @@ objc_hash_next (cache_ptr cache, node_ptr node) if (node->next) { /* There is a node which follows the last node returned. -Step to that node and retun it. */ +Step to that node and return it. */ return node->next; } else diff --git a/libobjc/init.c b/libobjc/init.c index 6216546084b8..9f8bafb8ee3f 100644 --- a/libobjc/init.c +++ b/libobjc/init.c @@ -851,7 +851,7 @@ __objc_create_classes_tree (struct objc_module *module) /* Now iterate over "claimed" categories too (ie, categories that extend a class that has already been loaded by the runtime), and - insert them in the classes tree hiearchy too. Otherwise, if you + insert them in the classes tree hierarchy too. Otherwise, if you add a category, its +load method would not be called if the class is already loaded in the runtime. It the category is "unclaimed", ie, we haven't loaded the main class yet, postpone diff --git a/libobjc/objc-private/objc-list.h b/libobjc/objc-private/objc-list.h index 44c5
[gcc r14-10708] libstdc++: Avoid forming T* in unique_ptr(auto_ptr&&) constraints [PR116529]
https://gcc.gnu.org/g:4d88724c2d804e126d63aed77fa8c2c333e99396 commit r14-10708-g4d88724c2d804e126d63aed77fa8c2c333e99396 Author: Jonathan Wakely Date: Thu Aug 29 13:47:15 2024 +0100 libstdc++: Avoid forming T* in unique_ptr(auto_ptr&&) constraints [PR116529] PR 116529 shows that std::unique_ptr is currently unusable because the constructor taking std::auto_ptr (which is a non-standard extension since C++17) tries to form the invalid type X&* during overload resolution. We can use the `pointer` type in the constructor constraints, instead of trying to form an invalid type. The std::auto_ptr constructor can never actually match for the case where element_type is a reference, so we just need it to produce a substitution failure instead of being ill-formed. LWG 4144 might make std::unique_ptr ill-formed, which would invalidate this new test. We would have to remove this test in that case. Using `pointer` in the constructor from std::auto_ptr would not be needed to support the std::unique_ptr case, but would not cause any harm either. libstdc++-v3/ChangeLog: PR libstdc++/116529 * include/bits/unique_ptr.h (unique_ptr(auto_ptr&&)): Use pointer instead of T*. * testsuite/20_util/unique_ptr/creation/116529.cc: New test. (cherry picked from commit a001d515059ba4647169f8c17967d08bbe41cb7a) Diff: --- libstdc++-v3/include/bits/unique_ptr.h | 5 ++-- .../20_util/unique_ptr/creation/116529.cc | 35 ++ 2 files changed, 38 insertions(+), 2 deletions(-) diff --git a/libstdc++-v3/include/bits/unique_ptr.h b/libstdc++-v3/include/bits/unique_ptr.h index 0f600db32f94..edcff78bff9f 100644 --- a/libstdc++-v3/include/bits/unique_ptr.h +++ b/libstdc++-v3/include/bits/unique_ptr.h @@ -379,8 +379,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wdeprecated-declarations" /// Converting constructor from @c auto_ptr - template, is_same<_Dp, default_delete<_Tp + template, + is_same<_Dp, default_delete<_Tp unique_ptr(auto_ptr<_Up>&& __u) noexcept; #pragma GCC diagnostic pop #endif diff --git a/libstdc++-v3/testsuite/20_util/unique_ptr/creation/116529.cc b/libstdc++-v3/testsuite/20_util/unique_ptr/creation/116529.cc new file mode 100644 index ..323fc7cb27ce --- /dev/null +++ b/libstdc++-v3/testsuite/20_util/unique_ptr/creation/116529.cc @@ -0,0 +1,35 @@ +// { dg-do run { target c++11 } } + +// Bug libstdc++/116529 - Construction of unique_ptr with reference type +// is rejected because of auto_ptr constructor + +#include +#include + +int count = 0; + +struct X +{ + ~X() { ++count; } +}; + +struct deleter : std::default_delete +{ + using pointer = X*; +}; + +void +test01() +{ + { +std::unique_ptr up(new X); +// { dg-bogus "forming pointer to reference" "" { target *-*-* } 0 } +VERIFY( count == 0 ); + } + VERIFY( count == 1 ); +} + +int main() +{ + test01(); +}
[gcc r15-3819] libstdc++: Remove unnecessary 'static' from __is_specialization_of
https://gcc.gnu.org/g:618871ff09c07817f7ce9b2bd7338cd3299ad8f5 commit r15-3819-g618871ff09c07817f7ce9b2bd7338cd3299ad8f5 Author: Nathaniel Shead Date: Tue Sep 10 22:05:40 2024 +1000 libstdc++: Remove unnecessary 'static' from __is_specialization_of This makes the declarations internal linkage, which is an ODR issue, and causes a future modules patch to fail regtest as it now detects attempted uses of TU-local entities in module CMIs. libstdc++-v3/ChangeLog: * include/std/format: Remove unnecessary 'static'. Signed-off-by: Nathaniel Shead Diff: --- libstdc++-v3/include/std/format | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/libstdc++-v3/include/std/format b/libstdc++-v3/include/std/format index 100a53dfd76f..1e9a8374012d 100644 --- a/libstdc++-v3/include/std/format +++ b/libstdc++-v3/include/std/format @@ -365,10 +365,9 @@ namespace __format /// @cond undocumented template class _Class> -static constexpr bool __is_specialization_of = false; +constexpr bool __is_specialization_of = false; template class _Class, typename... _Args> -static constexpr bool __is_specialization_of<_Class<_Args...>, _Class> - = true; +constexpr bool __is_specialization_of<_Class<_Args...>, _Class> = true; namespace __format {
[gcc r15-3816] nvptx: Partial support for aliases to aliases.
https://gcc.gnu.org/g:f5ee372b012594830f6d5f7f4b7e01bae810b1da commit r15-3816-gf5ee372b012594830f6d5f7f4b7e01bae810b1da Author: Prathamesh Kulkarni Date: Tue Sep 24 08:18:48 2024 +0530 nvptx: Partial support for aliases to aliases. For the following test (adapted from pr96390.c): __attribute__((noipa)) int foo () { return 42; } int bar () __attribute__((alias ("foo"))); int baz () __attribute__((alias ("bar"))); int main () { int n; #pragma omp target map(from:n) n = baz (); return n; } gcc emits following ptx for baz: .visible .func (.param.u32 %value_out) bar; .alias bar,foo; .visible .func (.param.u32 %value_out) baz; .alias baz,bar; which is incorrect since PTX requires aliasee to be a defined function. The patch instead uses cgraph_node::get(name)->ultimate_alias_target, which generates the following PTX: .visible .func (.param.u32 %value_out) baz; .alias baz,foo; gcc/ChangeLog: PR target/104957 * config/nvptx/nvptx.cc (nvptx_asm_output_def_from_decls): Use cgraph_node::get(name)->ultimate_alias_target instead of value. gcc/testsuite/ChangeLog: PR target/104957 * gcc.target/nvptx/alias-to-alias-1.c: Adjust. Signed-off-by: Prathamesh Kulkarni Co-authored-by: Thomas Schwinge Diff: --- gcc/config/nvptx/nvptx.cc | 24 --- gcc/testsuite/gcc.target/nvptx/alias-to-alias-1.c | 6 -- 2 files changed, 25 insertions(+), 5 deletions(-) diff --git a/gcc/config/nvptx/nvptx.cc b/gcc/config/nvptx/nvptx.cc index 4a7c64f05eb8..96a1134220ed 100644 --- a/gcc/config/nvptx/nvptx.cc +++ b/gcc/config/nvptx/nvptx.cc @@ -7582,7 +7582,8 @@ nvptx_mem_local_p (rtx mem) while (0) void -nvptx_asm_output_def_from_decls (FILE *stream, tree name, tree value) +nvptx_asm_output_def_from_decls (FILE *stream, tree name, +tree value ATTRIBUTE_UNUSED) { if (nvptx_alias == 0 || !TARGET_PTX_6_3) { @@ -7617,7 +7618,8 @@ nvptx_asm_output_def_from_decls (FILE *stream, tree name, tree value) return; } - if (!cgraph_node::get (name)->referred_to_p ()) + cgraph_node *cnode = cgraph_node::get (name); + if (!cnode->referred_to_p ()) /* Prevent "Internal error: reference to deleted section". */ return; @@ -7626,11 +7628,27 @@ nvptx_asm_output_def_from_decls (FILE *stream, tree name, tree value) fputs (s.str ().c_str (), stream); tree id = DECL_ASSEMBLER_NAME (name); + + /* Walk alias chain to get reference callgraph node. + The rationale of using ultimate_alias_target here is that + PTX's .alias directive only supports 1-level aliasing where + aliasee is function defined in same module. + + So for the following case: + int foo() { return 42; } + int bar () __attribute__((alias ("foo"))); + int baz () __attribute__((alias ("bar"))); + + should resolve baz to foo: + .visible .func (.param.u32 %value_out) baz; + .alias baz,foo; */ + symtab_node *alias_target_node = cnode->ultimate_alias_target (); + tree alias_target_id = DECL_ASSEMBLER_NAME (alias_target_node->decl); std::stringstream s_def; write_fn_marker (s_def, true, TREE_PUBLIC (name), IDENTIFIER_POINTER (id)); fputs (s_def.str ().c_str (), stream); NVPTX_ASM_OUTPUT_DEF (stream, IDENTIFIER_POINTER (id), - IDENTIFIER_POINTER (value)); + IDENTIFIER_POINTER (alias_target_id)); } #undef NVPTX_ASM_OUTPUT_DEF diff --git a/gcc/testsuite/gcc.target/nvptx/alias-to-alias-1.c b/gcc/testsuite/gcc.target/nvptx/alias-to-alias-1.c index 7bce7a358c79..08de9e6d69da 100644 --- a/gcc/testsuite/gcc.target/nvptx/alias-to-alias-1.c +++ b/gcc/testsuite/gcc.target/nvptx/alias-to-alias-1.c @@ -1,6 +1,8 @@ /* Alias to alias; 'libgomp.c-c++-common/pr96390.c'. */ -/* { dg-do compile } */ +/* { dg-do link } */ +/* { dg-do run { target nvptx_runtime_alias_ptx } } */ +/* { dg-options -save-temps } */ /* { dg-add-options nvptx_alias_ptx } */ int v; @@ -32,7 +34,7 @@ main (void) /* { dg-final { scan-assembler-times {(?n)^// BEGIN GLOBAL FUNCTION DECL: baz$} 1 } } { dg-final { scan-assembler-times {(?n)^\.visible \.func baz;$} 1 } } { dg-final { scan-assembler-times {(?n)^// BEGIN GLOBAL FUNCTION DEF: baz$} 1 } } - { dg-final { scan-assembler-times {(?n)^\.alias baz,bar;$} 1 } } */ + { dg-final { scan-assembler-times {(?n)^\.alias baz,foo;$} 1 } } */ /* { dg-final { scan-assembler-times {(?n)\tcall foo;$} 0 } } { dg-final { scan-assembler-times {(?n)\tcall bar;$} 0 } }
[gcc r15-3817] hosthooks.h: Fix GCC_HOST_HOOKS_H typo
https://gcc.gnu.org/g:824229e38662b5921e156d0fcbd7180462ba9d60 commit r15-3817-g824229e38662b5921e156d0fcbd7180462ba9d60 Author: Yangyu Chen Date: Tue Sep 24 01:11:11 2024 +0800 hosthooks.h: Fix GCC_HOST_HOOKS_H typo The comment of the final endif in hosthooks.h is wrong, it should be GCC_HOST_HOOKS_H instead of GCC_LANG_HOOKS_H. gcc/ChangeLog: * hosthooks.h (struct host_hooks): Fix GCC_HOST_HOOKS_H typo. Signed-off-by: Yangyu Chen Diff: --- gcc/hosthooks.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/hosthooks.h b/gcc/hosthooks.h index 53363801330a..8178c9c692ae 100644 --- a/gcc/hosthooks.h +++ b/gcc/hosthooks.h @@ -47,4 +47,4 @@ struct host_hooks /* Each host provides its own. */ extern const struct host_hooks host_hooks; -#endif /* GCC_LANG_HOOKS_H */ +#endif /* GCC_HOST_HOOKS_H */
[gcc r15-3818] tree-optimization/114855 - high update_ssa time
https://gcc.gnu.org/g:f9dfe8dea31bf5c56aa7798a0905707faf9e7ec4 commit r15-3818-gf9dfe8dea31bf5c56aa7798a0905707faf9e7ec4 Author: Richard Biener Date: Mon Sep 23 15:41:14 2024 +0200 tree-optimization/114855 - high update_ssa time Part of the problem in PR114855 is high update_ssa time. When one fixes the backward jump threading issue tree SSA incremental is at 439.91s ( 26%), mostly doing bitmap element searches for blocks_with_phis_to_rewrite. The following turns that bitmap to tree view noticing the two-dimensional vector of PHIs it guards is excessive compared to what we actually save with it - walking all PHI nodes in a block, something we already do once to initialize stmt flags. So instead of optimizing that walk we use the stmt flag, saving allocations and global state that lives throughout the whole compilation. This reduces the tree SSA incremental time to 203.13 ( 14%) The array was added in r0-74758-g2ce798794df8e1 when we still possibly had gazillion virtual operands for PR26830, I checked the testcase still behaves OK. PR tree-optimization/114855 * tree-into-ssa.cc (phis_to_rewrite): Remove global var. (mark_phi_for_rewrite): Simplify. (rewrite_update_phi_arguments): Walk all PHIs, process those satisfying rewrite_uses_p. (delete_update_ssa): Simplify. (update_ssa): Likewise. Switch blocks_with_phis_to_rewrite to tree view. Diff: --- gcc/tree-into-ssa.cc | 44 1 file changed, 8 insertions(+), 36 deletions(-) diff --git a/gcc/tree-into-ssa.cc b/gcc/tree-into-ssa.cc index 5b367c358125..1cce9d628090 100644 --- a/gcc/tree-into-ssa.cc +++ b/gcc/tree-into-ssa.cc @@ -101,12 +101,7 @@ static sbitmap interesting_blocks; released after we finish updating the SSA web. */ bitmap names_to_release; -/* vec of vec of PHIs to rewrite in a basic block. Element I corresponds - the to basic block with index I. Allocated once per compilation, *not* - released between different functions. */ -static vec< vec > phis_to_rewrite; - -/* The bitmap of non-NULL elements of PHIS_TO_REWRITE. */ +/* The bitmap of blocks with PHIs to rewrite. */ static bitmap blocks_with_phis_to_rewrite; /* Growth factor for NEW_SSA_NAMES and OLD_SSA_NAMES. These sets need @@ -942,9 +937,6 @@ find_def_blocks_for (tree var) static void mark_phi_for_rewrite (basic_block bb, gphi *phi) { - vec phis; - unsigned n, idx = bb->index; - if (rewrite_uses_p (phi)) return; @@ -953,21 +945,7 @@ mark_phi_for_rewrite (basic_block bb, gphi *phi) if (!blocks_with_phis_to_rewrite) return; - if (bitmap_set_bit (blocks_with_phis_to_rewrite, idx)) -{ - n = (unsigned) last_basic_block_for_fn (cfun) + 1; - if (phis_to_rewrite.length () < n) - phis_to_rewrite.safe_grow_cleared (n, true); - - phis = phis_to_rewrite[idx]; - gcc_assert (!phis.exists ()); - phis.create (10); -} - else -phis = phis_to_rewrite[idx]; - - phis.safe_push (phi); - phis_to_rewrite[idx] = phis; + bitmap_set_bit (blocks_with_phis_to_rewrite, bb->index); } /* Insert PHI nodes for variable VAR using the iterated dominance @@ -2097,18 +2075,17 @@ rewrite_update_phi_arguments (basic_block bb) FOR_EACH_EDGE (e, ei, bb->succs) { - vec phis; - if (!bitmap_bit_p (blocks_with_phis_to_rewrite, e->dest->index)) continue; - phis = phis_to_rewrite[e->dest->index]; - for (gphi *phi : phis) + for (auto gsi = gsi_start_phis (e->dest); + !gsi_end_p (gsi); gsi_next(&gsi)) { tree arg, lhs_sym, reaching_def = NULL; use_operand_p arg_p; - - gcc_checking_assert (rewrite_uses_p (phi)); + gphi *phi = *gsi; + if (!rewrite_uses_p (*gsi)) + continue; arg_p = PHI_ARG_DEF_PTR_FROM_EDGE (phi, e); arg = USE_FROM_PTR (arg_p); @@ -3060,10 +3037,6 @@ delete_update_ssa (void) fini_ssa_renamer (); - if (blocks_with_phis_to_rewrite) -EXECUTE_IF_SET_IN_BITMAP (blocks_with_phis_to_rewrite, 0, i, bi) - phis_to_rewrite[i].release (); - BITMAP_FREE (blocks_with_phis_to_rewrite); BITMAP_FREE (blocks_to_update); @@ -3470,8 +3443,7 @@ update_ssa (unsigned update_flags) gcc_assert (update_ssa_initialized_fn == cfun); blocks_with_phis_to_rewrite = BITMAP_ALLOC (NULL); - if (!phis_to_rewrite.exists ()) -phis_to_rewrite.create (last_basic_block_for_fn (cfun) + 1); + bitmap_tree_view (blocks_with_phis_to_rewrite); blocks_to_update = BITMAP_ALLOC (NULL); insert_phi_p = (update_flags != TODO_update_ssa_no_phi);