[gcc r15-3030] c++/modules: Fix type lookup in DECL_TEMPLATE_INSTANTIATIONS [PR116364]
https://gcc.gnu.org/g:c310d29cac1c3a770f48ab8bb2d295ef9cc08c53 commit r15-3030-gc310d29cac1c3a770f48ab8bb2d295ef9cc08c53 Author: Nathaniel Shead Date: Thu Aug 15 21:46:09 2024 +1000 c++/modules: Fix type lookup in DECL_TEMPLATE_INSTANTIATIONS [PR116364] We need to use the DECL_TEMPLATE_INSTANTIATIONS property to find reachable specialisations from a template to ensure that any GM specialisations are properly marked as reachable. Currently the modules code uses the decl when rebuilding this property, but this is not always correct; it appears that for type specialisations we need to use the TREE_TYPE of the decl instead so that the specialisation is correctly found. This patch makes the required adjustments. PR c++/116364 gcc/cp/ChangeLog: * cp-tree.h (get_mergeable_specialization_flags): Adjust signature. * module.cc (trees_out::decl_value): Indicate whether this is a type or decl specialisation. * pt.cc (get_mergeable_specialization_flags): Match against the type of a non-decl specialisation. (add_mergeable_specialization): Use the already calculated spec instead of always adding decl to DECL_TEMPLATE_INSTANTIATIONS. gcc/testsuite/ChangeLog: * g++.dg/modules/tpl-spec-9_a.C: New test. * g++.dg/modules/tpl-spec-9_b.C: New test. * g++.dg/modules/tpl-spec-9_c.C: New test. Signed-off-by: Nathaniel Shead Reviewed-by: Jason Merrill Diff: --- gcc/cp/cp-tree.h| 3 ++- gcc/cp/module.cc| 3 ++- gcc/cp/pt.cc| 8 +--- gcc/testsuite/g++.dg/modules/tpl-spec-9_a.C | 12 gcc/testsuite/g++.dg/modules/tpl-spec-9_b.C | 5 + gcc/testsuite/g++.dg/modules/tpl-spec-9_c.C | 5 + 6 files changed, 31 insertions(+), 5 deletions(-) diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h index a53fbcb43ec..039c70710a2 100644 --- a/gcc/cp/cp-tree.h +++ b/gcc/cp/cp-tree.h @@ -7672,7 +7672,8 @@ extern void walk_specializations (bool, void *), void *); extern tree match_mergeable_specialization (bool is_decl, spec_entry *); -extern unsigned get_mergeable_specialization_flags (tree tmpl, tree spec); +extern unsigned get_mergeable_specialization_flags (bool is_decl, tree tmpl, + tree spec); extern void add_mergeable_specialization(bool is_decl, spec_entry *, tree outer, unsigned); extern tree add_to_template_args (tree, tree); diff --git a/gcc/cp/module.cc b/gcc/cp/module.cc index f4d137b13a1..c3218bd5caf 100644 --- a/gcc/cp/module.cc +++ b/gcc/cp/module.cc @@ -7981,7 +7981,8 @@ trees_out::decl_value (tree decl, depset *dep) auto *entry = reinterpret_cast (dep->deps[0]); if (streaming_p ()) - u (get_mergeable_specialization_flags (entry->tmpl, decl)); + u (get_mergeable_specialization_flags (mk & MK_tmpl_decl_mask, + entry->tmpl, decl)); tree_node (entry->tmpl); tree_node (entry->args); } diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc index 684ee0c8a60..32d164f0fd5 100644 --- a/gcc/cp/pt.cc +++ b/gcc/cp/pt.cc @@ -31561,13 +31561,14 @@ match_mergeable_specialization (bool decl_p, spec_entry *elt) specialization lists of TMPL. */ unsigned -get_mergeable_specialization_flags (tree tmpl, tree decl) +get_mergeable_specialization_flags (bool decl_p, tree tmpl, tree decl) { unsigned flags = 0; + tree spec = decl_p ? decl : TREE_TYPE (decl); for (tree inst = DECL_TEMPLATE_INSTANTIATIONS (tmpl); inst; inst = TREE_CHAIN (inst)) -if (TREE_VALUE (inst) == decl) +if (TREE_VALUE (inst) == spec) { flags |= 1; break; @@ -31625,7 +31626,8 @@ add_mergeable_specialization (bool decl_p, spec_entry *elt, tree decl, if (flags & 1) DECL_TEMPLATE_INSTANTIATIONS (elt->tmpl) - = tree_cons (elt->args, decl, DECL_TEMPLATE_INSTANTIATIONS (elt->tmpl)); + = tree_cons (elt->args, elt->spec, + DECL_TEMPLATE_INSTANTIATIONS (elt->tmpl)); if (flags & 2) { diff --git a/gcc/testsuite/g++.dg/modules/tpl-spec-9_a.C b/gcc/testsuite/g++.dg/modules/tpl-spec-9_a.C new file mode 100644 index 000..d7c02bb279d --- /dev/null +++ b/gcc/testsuite/g++.dg/modules/tpl-spec-9_a.C @@ -0,0 +1,12 @@ +// PR c++/116364 +// { dg-additional-options "-fmodules-ts -Wno-global-module" } +// { dg-module-cmi foo:part } + +module; +template struct S {}; +template <> struct S + { static constexpr bool value = true; }; +export module foo:part; + +export templa
[gcc r15-3031] c++/modules: Avoid rechecking initializers when streaming NTTPs [PR116382]
https://gcc.gnu.org/g:0b7904e274fbd6a736d63c0fed28ea32f9cb5997 commit r15-3031-g0b7904e274fbd6a736d63c0fed28ea32f9cb5997 Author: Nathaniel Shead Date: Fri Aug 16 15:06:33 2024 +1000 c++/modules: Avoid rechecking initializers when streaming NTTPs [PR116382] When reading an NTTP we call get_template_parm_object which delegates setting of DECL_INITIAL to the general cp_finish_decl procedure, which calls check_initializer to validate and record it. Apart from being unnecessary (it must have already been validated by the writing module), this also causes errors in cases like the linked PR, as validating may end up needing to call lazy_load_pendings to determine any specialisations that may exist which violates assumptions of the modules streaming code. This patch works around the issue by adding a flag to get_template_parm_object to disable these checks when not needed. PR c++/116382 gcc/cp/ChangeLog: * cp-tree.h (get_template_parm_object): Add check_init param. * module.cc (trees_in::tree_node): Pass check_init=false when building NTTPs. * pt.cc (get_template_parm_object): Prevent cp_finish_decl from validating the initializer when check_init=false. gcc/testsuite/ChangeLog: * g++.dg/modules/tpl-nttp-1_a.C: New test. * g++.dg/modules/tpl-nttp-1_b.C: New test. Signed-off-by: Nathaniel Shead Reviewed-by: Jason Merrill Diff: --- gcc/cp/cp-tree.h| 3 ++- gcc/cp/module.cc| 6 +- gcc/cp/pt.cc| 18 ++ gcc/testsuite/g++.dg/modules/tpl-nttp-1_a.C | 8 gcc/testsuite/g++.dg/modules/tpl-nttp-1_b.C | 6 ++ 5 files changed, 35 insertions(+), 6 deletions(-) diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h index 039c70710a2..a9ce44bb214 100644 --- a/gcc/cp/cp-tree.h +++ b/gcc/cp/cp-tree.h @@ -7626,7 +7626,8 @@ enum { nt_opaque = false, nt_transparent = true }; extern tree alias_template_specialization_p (const_tree, bool); extern tree dependent_alias_template_spec_p (const_tree, bool); extern bool dependent_opaque_alias_p(const_tree); -extern tree get_template_parm_object (tree expr, tree mangle); +extern tree get_template_parm_object (tree expr, tree mangle, +bool check_init = true); extern tree tparm_object_argument (tree); extern bool explicit_class_specialization_p (tree); extern bool push_tinst_level(tree); diff --git a/gcc/cp/module.cc b/gcc/cp/module.cc index c3218bd5caf..0a4ceffa3d6 100644 --- a/gcc/cp/module.cc +++ b/gcc/cp/module.cc @@ -9938,7 +9938,11 @@ trees_in::tree_node (bool is_use) tree name = tree_node (); if (!get_overrun ()) { - res = get_template_parm_object (init, name); + /* We don't want to check the initializer as that may require + name lookup, which could recursively start lazy loading. + Instead we know that INIT is already valid so we can just + apply that directly. */ + res = get_template_parm_object (init, name, /*check_init=*/false); int tag = insert (res); dump (dumper::TREE) && dump ("Created nttp object:%d %N", tag, name); diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc index 32d164f0fd5..76edc7aad50 100644 --- a/gcc/cp/pt.cc +++ b/gcc/cp/pt.cc @@ -7361,10 +7361,11 @@ create_template_parm_object (tree expr, tsubst_flags_t complain) static GTY(()) hash_map *tparm_obj_values; /* Find or build an nttp object for (already-validated) EXPR with name - NAME. */ + NAME. When CHECK_INIT is false we don't need to process the initialiser, + it's already been done. */ tree -get_template_parm_object (tree expr, tree name) +get_template_parm_object (tree expr, tree name, bool check_init/*=true*/) { tree decl = get_global_binding (name); if (decl) @@ -7385,11 +7386,20 @@ get_template_parm_object (tree expr, tree name) { /* If EXPR contains any PTRMEM_CST, they will get clobbered by lower_var_init before we're done mangling. So store the original -value elsewhere. */ - tree copy = unshare_constructor (expr); +value elsewhere. We only need to unshare EXPR if it's not yet +been processed. */ + tree copy = check_init ? unshare_constructor (expr) : expr; hash_map_safe_put (tparm_obj_values, decl, copy); } + if (!check_init) +{ + /* The EXPR is the already processed initializer, set it on the NTTP +object now so that cp_finish_decl doesn't do it again later. */ + DECL_INITIAL (decl) = expr; + DECL_INITIALIZED_P (decl) = 1; +} + pushdecl_top_level_and_finish (decl, expr); return decl; diff --git
[gcc r15-3032] c++/modules: Handle transitive reachability for deduction guides [PR116403]
https://gcc.gnu.org/g:6f115a8eeea41d383dfb1bbb1af6ac9a97aee180 commit r15-3032-g6f115a8eeea41d383dfb1bbb1af6ac9a97aee180 Author: Nathaniel Shead Date: Sun Aug 18 11:36:40 2024 +1000 c++/modules: Handle transitive reachability for deduction guides [PR116403] Currently we implement [temp.deduct.guide] p1 by forcing all deduction guides to be considered as exported. However this is not sufficient: for transitive non-exported imports we will still hide the deduction guide from name lookup, causing errors. This patch instead adjusts name lookup to have a new ANY_REACHABLE flag to allow for this case. Currently this is only used by deduction guides but there are some other circumstances where this may be useful in the future (e.g. finding existing temploid friends). PR c++/116403 gcc/cp/ChangeLog: * pt.cc (deduction_guides_for): Use ANY_REACHABLE for lookup of deduction guides. * module.cc (depset::hash::add_deduction_guides): Likewise. (module_state::write_cluster): No longer override deduction guides as exported. * name-lookup.cc (name_lookup::search_namespace_only): Ignore visibility when LOOK_want::ANY_REACHABLE is specified. (check_module_override): Ignore visibility when checking for ambiguating deduction guides. * name-lookup.h (LOOK_want): New flag 'ANY_REACHABLE'. gcc/testsuite/ChangeLog: * g++.dg/modules/dguide-4_a.C: New test. * g++.dg/modules/dguide-4_b.C: New test. * g++.dg/modules/dguide-4_c.C: New test. Signed-off-by: Nathaniel Shead Reviewed-by: Jason Merrill Diff: --- gcc/cp/module.cc | 7 +- gcc/cp/name-lookup.cc | 38 --- gcc/cp/name-lookup.h | 5 +++- gcc/cp/pt.cc | 3 ++- gcc/testsuite/g++.dg/modules/dguide-4_a.C | 18 +++ gcc/testsuite/g++.dg/modules/dguide-4_b.C | 9 gcc/testsuite/g++.dg/modules/dguide-4_c.C | 15 7 files changed, 79 insertions(+), 16 deletions(-) diff --git a/gcc/cp/module.cc b/gcc/cp/module.cc index 0a4ceffa3d6..ce0ba69641b 100644 --- a/gcc/cp/module.cc +++ b/gcc/cp/module.cc @@ -13645,7 +13645,7 @@ depset::hash::add_deduction_guides (tree decl) if (find_binding (ns, name)) return; - tree guides = lookup_qualified_name (ns, name, LOOK_want::NORMAL, + tree guides = lookup_qualified_name (ns, name, LOOK_want::ANY_REACHABLE, /*complain=*/false); if (guides == error_mark_node) return; @@ -15228,11 +15228,6 @@ module_state::write_cluster (elf_out *to, depset *scc[], unsigned size, flags |= cbf_hidden; else if (DECL_MODULE_EXPORT_P (STRIP_TEMPLATE (bound))) flags |= cbf_export; - else if (deduction_guide_p (bound)) - /* Deduction guides are always exported so that they are -visible to name lookup whenever their class template -is reachable. */ - flags |= cbf_export; } gcc_checking_assert (DECL_P (bound)); diff --git a/gcc/cp/name-lookup.cc b/gcc/cp/name-lookup.cc index 872f1af0b2e..70ad4cbf3b5 100644 --- a/gcc/cp/name-lookup.cc +++ b/gcc/cp/name-lookup.cc @@ -916,7 +916,8 @@ name_lookup::search_namespace_only (tree scope) if (unsigned base = cluster->indices[jx].base) if (unsigned span = cluster->indices[jx].span) do - if (bitmap_bit_p (imports, base)) + if (bool (want & LOOK_want::ANY_REACHABLE) + || bitmap_bit_p (imports, base)) goto found; while (++base, --span); continue; @@ -960,9 +961,17 @@ name_lookup::search_namespace_only (tree scope) dup_detect |= dup; } - if (STAT_TYPE_VISIBLE_P (bind)) - type = STAT_TYPE (bind); - bind = STAT_VISIBLE (bind); + if (bool (want & LOOK_want::ANY_REACHABLE)) + { + type = STAT_TYPE (bind); + bind = STAT_DECL (bind); + } + else + { + if (STAT_TYPE_VISIBLE_P (bind)) + type = STAT_TYPE (bind); + bind = STAT_VISIBLE (bind); + } } /* And process it. */ @@ -3761,6 +3770,10 @@ check_module_override (tree decl, tree mvec, bool hiding, tree nontmpl = STRIP_TEMPLATE (decl); bool attached = DECL_LANG_SPECIFIC (nontm
[gcc r15-3033] c++/modules: Disable streaming definitions of non-vague-linkage GMF decls [PR115020]
https://gcc.gnu.org/g:c1a53d9dcf9ebf0a6b4528a8c3eae48a583f272c commit r15-3033-gc1a53d9dcf9ebf0a6b4528a8c3eae48a583f272c Author: Nathaniel Shead Date: Sat Aug 17 22:37:30 2024 +1000 c++/modules: Disable streaming definitions of non-vague-linkage GMF decls [PR115020] The error in the linked PR is caused because 'DECL_THIS_STATIC' is true for the static member function, causing the streaming code to assume that this is an internal linkage GM entity that needs to be explicitly streamed, which then on read-in gets marked as a vague linkage function (despite being non-inline) causing import_export_decl to complain. However, I don't see any reason why we should care about this: definitions in the GMF should just be emitted as per usual regardless of whether they're internal-linkage or not. Actually the only thing we care about here are header modules, since they have no TU to write definitions into. As such this patch removes these conditions from 'has_definition' and updates some comments to clarify. PR c++/115020 gcc/cp/ChangeLog: * module.cc (has_definition): Only force writing definitions for header_module_p. gcc/testsuite/ChangeLog: * g++.dg/modules/pr115020_a.C: New test. * g++.dg/modules/pr115020_b.C: New test. Signed-off-by: Nathaniel Shead Reviewed-by: Jason Merrill Diff: --- gcc/cp/module.cc | 14 +++--- gcc/testsuite/g++.dg/modules/pr115020_a.C | 10 ++ gcc/testsuite/g++.dg/modules/pr115020_b.C | 10 ++ 3 files changed, 27 insertions(+), 7 deletions(-) diff --git a/gcc/cp/module.cc b/gcc/cp/module.cc index ce0ba69641b..7c42aea05ee 100644 --- a/gcc/cp/module.cc +++ b/gcc/cp/module.cc @@ -11790,10 +11790,9 @@ has_definition (tree decl) if (DECL_DECLARED_INLINE_P (decl)) return true; - if (DECL_THIS_STATIC (decl) - && (header_module_p () - || (!DECL_LANG_SPECIFIC (decl) || !DECL_MODULE_PURVIEW_P (decl - /* GM static function. */ + if (header_module_p ()) + /* We always need to write definitions in header modules, + since there's no TU to emit them in otherwise. */ return true; if (DECL_TEMPLATE_INFO (decl)) @@ -11826,11 +11825,12 @@ has_definition (tree decl) else { if (!DECL_INITIALIZED_P (decl)) + /* Not defined. */ return false; - if (header_module_p () - || (!DECL_LANG_SPECIFIC (decl) || !DECL_MODULE_PURVIEW_P (decl))) - /* GM static variable. */ + if (header_module_p ()) + /* We always need to write definitions in header modules, + since there's no TU to emit them in otherwise. */ return true; if (!TREE_CONSTANT (decl)) diff --git a/gcc/testsuite/g++.dg/modules/pr115020_a.C b/gcc/testsuite/g++.dg/modules/pr115020_a.C new file mode 100644 index 000..8c190f13b1e --- /dev/null +++ b/gcc/testsuite/g++.dg/modules/pr115020_a.C @@ -0,0 +1,10 @@ +// PR c++/115020 +// { dg-additional-options "-fmodules-ts -Wno-global-module" } +// { dg-module-cmi M:a } + +module; +struct Check { static void assertion(); }; +void Check::assertion() {} + +module M:a; +Check c; diff --git a/gcc/testsuite/g++.dg/modules/pr115020_b.C b/gcc/testsuite/g++.dg/modules/pr115020_b.C new file mode 100644 index 000..e299454ed54 --- /dev/null +++ b/gcc/testsuite/g++.dg/modules/pr115020_b.C @@ -0,0 +1,10 @@ +// PR c++/115020 +// { dg-additional-options "-fmodules-ts -Wno-global-module" } +// { dg-module-cmi M } + +module; +struct Check { static void assertion(); }; + +export module M; +import :a; +void foo() { Check::assertion(); }
[gcc r15-3034] [optc-save-gen.awk] Fix streaming of command line options for offloading.
https://gcc.gnu.org/g:db2e9a2a46f64b037494e8300c46f2d90a9fa55c commit r15-3034-gdb2e9a2a46f64b037494e8300c46f2d90a9fa55c Author: Prathamesh Kulkarni Date: Tue Aug 20 12:54:02 2024 +0530 [optc-save-gen.awk] Fix streaming of command line options for offloading. The patch modifies optc-save-gen.awk to generate if (!lto_stream_offload_p) check before streaming out target-specific opt in cl_optimization_stream_out, when offloading is enabled. Also, it modifies cl_optimization_stream_in to issue an error during build time if accelerator backend defines a target-specific Optimization option. This restriction currently is in place to maintain consistency for streaming of Optimization options between host and accelerator. A proper fix would be to merge target-specific Optimization options for host and accelerators enabled for offloading. gcc/ChangeLog: * optc-save-gen.awk: New array var_target_opt. Use it to generate if (!lto_stream_offload_p) check in cl_optimization_stream_out, and generate a diagnostic with #error if accelerator backend uses Optimization for target-specifc options in cl_optimization_stream_in. Signed-off-by: Prathamesh Kulkarni Diff: --- gcc/optc-save-gen.awk | 16 1 file changed, 16 insertions(+) diff --git a/gcc/optc-save-gen.awk b/gcc/optc-save-gen.awk index a3af88e3776..b1289c281e7 100644 --- a/gcc/optc-save-gen.awk +++ b/gcc/optc-save-gen.awk @@ -1307,6 +1307,11 @@ for (i = 0; i < n_opts; i++) { var_opt_optimize_init[n_opt_val] = init; } + # Mark options that are annotated with both Optimization and + # Target so we can avoid streaming out target-specific opts when + # offloading is enabled. + if (flag_set_p("Target", flags[i])) + var_target_opt[n_opt_val] = 1; n_opt_val++; } } @@ -1384,6 +1389,10 @@ for (i = 0; i < n_opt_val; i++) { } else { sgn = "int"; } + # Do not stream out target-specific opts if offloading is + # enabled. + if (var_target_opt[i]) + print " if (!lto_stream_offload_p)" # If applicable, encode the streamed value. if (var_opt_optimize_init[i]) { print " if (" var_opt_optimize_init[i] " > (" var_opt_val_type[i] ") 10)"; @@ -1408,6 +1417,11 @@ print " struct cl_optimization *ptr ATTRIBUTE_UNUSED)" print "{"; for (i = 0; i < n_opt_val; i++) { name = var_opt_val[i] +if (var_target_opt[i]) { + print "#ifdef ACCEL_COMPILER" + print "#error accel compiler cannot define Optimization attribute for target-specific option " name; + print "#else" + } otype = var_opt_val_type[i]; if (otype ~ "^const char \\**$") { print " ptr->" name" = bp_unpack_string (data_in, bp);"; @@ -1427,6 +1441,8 @@ for (i = 0; i < n_opt_val; i++) { print "ptr->" name" ^= " var_opt_optimize_init[i] ";"; } } + if (var_target_opt[i]) + print "#endif" } print " for (size_t i = 0; i < ARRAY_SIZE (ptr->explicit_mask); i++)"; print "ptr->explicit_mask[i] = bp_unpack_value (bp, 64);";
[gcc r15-3035] Fortran: Fix [Coarray] ICE in conv_caf_send, at fortran/trans-intrinsic.c:1950 [PR84246]
https://gcc.gnu.org/g:35f56012806432fd89bbae431950a8dc5f6729a3 commit r15-3035-g35f56012806432fd89bbae431950a8dc5f6729a3 Author: Andre Vehreschild Date: Wed Jul 17 12:30:52 2024 +0200 Fortran: Fix [Coarray] ICE in conv_caf_send, at fortran/trans-intrinsic.c:1950 [PR84246] Fix ICE caused by converted expression already being pointer by checking for its type. Lift rewrite to caf_send completely into resolve and prevent more temporary arrays. PR fortran/84246 gcc/fortran/ChangeLog: * resolve.cc (caf_possible_reallocate): Detect arrays that may be reallocated by caf_send. (resolve_ordinary_assign): More reliably detect assignments where a rewrite to caf_send is needed. * trans-expr.cc (gfc_trans_assignment_1): Remove rewrite to caf_send, because this is done by resolve now. * trans-intrinsic.cc (conv_caf_send): Prevent unneeded temporary arrays. libgfortran/ChangeLog: * caf/single.c (send_by_ref): Created array's lbound is now 1 and the offset set correctly. gcc/testsuite/ChangeLog: * gfortran.dg/coarray_allocate_7.f08: Adapt to array being allocate by caf_send. Diff: --- gcc/fortran/resolve.cc | 18 ++ gcc/fortran/trans-expr.cc| 23 --- gcc/fortran/trans-intrinsic.cc | 17 ++--- gcc/testsuite/gfortran.dg/coarray_allocate_7.f08 | 4 +--- libgfortran/caf/single.c | 6 +++--- 5 files changed, 32 insertions(+), 36 deletions(-) diff --git a/gcc/fortran/resolve.cc b/gcc/fortran/resolve.cc index 12973c6bc85..5db327cd12b 100644 --- a/gcc/fortran/resolve.cc +++ b/gcc/fortran/resolve.cc @@ -11601,6 +11601,23 @@ gfc_resolve_blocks (gfc_code *b, gfc_namespace *ns) } } +bool +caf_possible_reallocate (gfc_expr *e) +{ + symbol_attribute caf_attr; + gfc_ref *last_arr_ref = nullptr; + + caf_attr = gfc_caf_attr (e); + if (!caf_attr.codimension || !caf_attr.allocatable || !caf_attr.dimension) +return false; + + /* Only full array refs can indicate a needed reallocation. */ + for (gfc_ref *ref = e->ref; ref; ref = ref->next) +if (ref->type == REF_ARRAY && ref->u.ar.dimen) + last_arr_ref = ref; + + return last_arr_ref && last_arr_ref->u.ar.type == AR_FULL; +} /* Does everything to resolve an ordinary assignment. Returns true if this is an interface assignment. */ @@ -11845,6 +11862,7 @@ resolve_ordinary_assign (gfc_code *code, gfc_namespace *ns) bool caf_convert_to_send = flag_coarray == GFC_FCOARRAY_LIB && (lhs_coindexed + || caf_possible_reallocate (lhs) || (code->expr2->expr_type == EXPR_FUNCTION && code->expr2->value.function.isym && code->expr2->value.function.isym->id == GFC_ISYM_CAF_GET diff --git a/gcc/fortran/trans-expr.cc b/gcc/fortran/trans-expr.cc index c11abb07eb6..8801a15c3a8 100644 --- a/gcc/fortran/trans-expr.cc +++ b/gcc/fortran/trans-expr.cc @@ -12701,29 +12701,6 @@ gfc_trans_assignment_1 (gfc_expr * expr1, gfc_expr * expr2, bool init_flag, expr1->must_finalize = 0; } - else if (flag_coarray == GFC_FCOARRAY_LIB - && lhs_caf_attr.codimension && rhs_caf_attr.codimension - && ((lhs_caf_attr.allocatable && lhs_refs_comp) - || (rhs_caf_attr.allocatable && rhs_refs_comp))) -{ - /* Only detour to caf_send[get][_by_ref] () when the lhs or rhs is an -allocatable component, because those need to be accessed via the -caf-runtime. No need to check for coindexes here, because resolve -has rewritten those already. */ - gfc_code code; - gfc_actual_arglist a1, a2; - /* Clear the structures to prevent accessing garbage. */ - memset (&code, '\0', sizeof (gfc_code)); - memset (&a1, '\0', sizeof (gfc_actual_arglist)); - memset (&a2, '\0', sizeof (gfc_actual_arglist)); - a1.expr = expr1; - a1.next = &a2; - a2.expr = expr2; - a2.next = NULL; - code.ext.actual = &a1; - code.resolved_isym = gfc_intrinsic_subroutine_by_id (GFC_ISYM_CAF_SEND); - tmp = gfc_conv_intrinsic_subroutine (&code); -} else if (!is_poly_assign && expr2->must_finalize && expr1->ts.type == BT_CLASS && expr2->ts.type == BT_CLASS) diff --git a/gcc/fortran/trans-intrinsic.cc b/gcc/fortran/trans-intrinsic.cc index 8e1a2b04ed4..fd2da463825 100644 --- a/gcc/fortran/trans-intrinsic.cc +++ b/gcc/fortran/trans-intrinsic.cc @@ -1945,11 +1945,14 @@ conv_caf_send (gfc_code *code) { tree lhs_type = NULL_TREE; tree vec = null_pointer_node, rhs_vec = null_pointer_node; symbol_attribute lhs_caf_attr, rhs_caf_attr; + bool lhs_is_coindexed, rhs_is_coindexed; gcc_assert (flag_coarray == GFC_FCOARRAY_LIB); lhs_expr = code->ext.a
[gcc r15-3036] tree-optimization/116274 - overzealous SLP vectorization
https://gcc.gnu.org/g:b8ea13ebf1211714503fd72f25c04376483bfa53 commit r15-3036-gb8ea13ebf1211714503fd72f25c04376483bfa53 Author: Richard Biener Date: Thu Aug 8 11:36:43 2024 +0200 tree-optimization/116274 - overzealous SLP vectorization The following tries to address that the vectorizer fails to have precise knowledge of argument and return calling conventions and views some accesses as loads and stores that are not. This is mainly important when doing basic-block vectorization as otherwise loop indexing would force such arguments to memory. On x86 the reduction in the number of apparent loads and stores often dominates cost analysis so the following tries to mitigate this aggressively by adjusting only the scalar load and store cost, reducing them to the cost of a simple scalar statement, but not touching the vector access cost which would be much harder to estimate. Thereby we error on the side of not performing basic-block vectorization. PR tree-optimization/116274 * tree-vect-slp.cc (vect_bb_slp_scalar_cost): Cost scalar loads and stores as simple scalar stmts when they access a non-global, not address-taken variable that doesn't have BLKmode assigned. * gcc.target/i386/pr116274-2.c: New testcase. Diff: --- gcc/testsuite/gcc.target/i386/pr116274-2.c | 9 + gcc/tree-vect-slp.cc | 12 +++- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/gcc/testsuite/gcc.target/i386/pr116274-2.c b/gcc/testsuite/gcc.target/i386/pr116274-2.c new file mode 100644 index 000..d5811344b93 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr116274-2.c @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-slp2-optimized" } */ + +struct a { long x,y; }; +long test(struct a a) { return a.x+a.y; } + +/* { dg-final { scan-tree-dump-not "basic block part vectorized" "slp2" } } */ +/* { dg-final { scan-assembler-times "addl|leaq" 1 } } */ +/* { dg-final { scan-assembler-not "padd" } } */ diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc index 43ecd268970..d6f34d0b73d 100644 --- a/gcc/tree-vect-slp.cc +++ b/gcc/tree-vect-slp.cc @@ -7618,7 +7618,17 @@ next_lane: vect_cost_for_stmt kind; if (STMT_VINFO_DATA_REF (orig_stmt_info)) { - if (DR_IS_READ (STMT_VINFO_DATA_REF (orig_stmt_info))) + data_reference_p dr = STMT_VINFO_DATA_REF (orig_stmt_info); + tree base = get_base_address (DR_REF (dr)); + /* When the scalar access is to a non-global not address-taken +decl that is not BLKmode assume we can access it with a single +non-load/store instruction. */ + if (DECL_P (base) + && !is_global_var (base) + && !TREE_ADDRESSABLE (base) + && DECL_MODE (base) != BLKmode) + kind = scalar_stmt; + else if (DR_IS_READ (STMT_VINFO_DATA_REF (orig_stmt_info))) kind = scalar_load; else kind = scalar_store;
[gcc r15-3037] libcpp: replace SSE4.2 helper with an SSSE3 one
https://gcc.gnu.org/g:20a5b4824993ae1c99f3b965c5e07bbd2c64b2ce commit r15-3037-g20a5b4824993ae1c99f3b965c5e07bbd2c64b2ce Author: Alexander Monakov Date: Tue Aug 6 09:47:23 2024 +0300 libcpp: replace SSE4.2 helper with an SSSE3 one Since the characters we are searching for (CR, LF, '\', '?') all have distinct ASCII codes mod 16, PSHUFB can help match them all at once. Directly use the new helper if __SSSE3__ is defined. It makes the other helpers unused, so mark them inline to prevent warnings. Rewrite and simplify init_vectorized_lexer. libcpp/ChangeLog: * config.in: Regenerate. * configure: Regenerate. * configure.ac: Check for SSSE3 instead of SSE4.2. * files.cc (read_file_guts): Bump padding to 64 if HAVE_SSSE3. * lex.cc (search_line_acc_char): Mark inline, not "unused". (search_line_sse2): Mark inline. (search_line_sse42): Replace with... (search_line_ssse3): ... this new function. Adjust the use... (init_vectorized_lexer): ... here. Simplify. Diff: --- libcpp/config.in| 4 +- libcpp/configure| 4 +- libcpp/configure.ac | 6 +-- libcpp/files.cc | 19 --- libcpp/lex.cc | 150 +++- 5 files changed, 73 insertions(+), 110 deletions(-) diff --git a/libcpp/config.in b/libcpp/config.in index 253ef03a3de..b2e2f4e842c 100644 --- a/libcpp/config.in +++ b/libcpp/config.in @@ -210,8 +210,8 @@ /* Define to 1 if you have the `putc_unlocked' function. */ #undef HAVE_PUTC_UNLOCKED -/* Define to 1 if you can assemble SSE4 insns. */ -#undef HAVE_SSE4 +/* Define to 1 if you can assemble SSSE3 insns. */ +#undef HAVE_SSSE3 /* Define to 1 if you have the header file. */ #undef HAVE_STDDEF_H diff --git a/libcpp/configure b/libcpp/configure index 32d6aaa3069..1391081ba09 100755 --- a/libcpp/configure +++ b/libcpp/configure @@ -9140,14 +9140,14 @@ case $target in int main () { -asm ("pcmpestri %0, %%xmm0, %%xmm1" : : "i"(0)) +asm ("pshufb %xmm0, %xmm1") ; return 0; } _ACEOF if ac_fn_c_try_compile "$LINENO"; then : -$as_echo "#define HAVE_SSE4 1" >>confdefs.h +$as_echo "#define HAVE_SSSE3 1" >>confdefs.h fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext diff --git a/libcpp/configure.ac b/libcpp/configure.ac index b883fec776f..981f97c4abd 100644 --- a/libcpp/configure.ac +++ b/libcpp/configure.ac @@ -197,9 +197,9 @@ fi case $target in i?86-* | x86_64-*) -AC_TRY_COMPILE([], [asm ("pcmpestri %0, %%xmm0, %%xmm1" : : "i"(0))], - [AC_DEFINE([HAVE_SSE4], [1], -[Define to 1 if you can assemble SSE4 insns.])]) +AC_TRY_COMPILE([], [asm ("pshufb %xmm0, %xmm1")], + [AC_DEFINE([HAVE_SSSE3], [1], +[Define to 1 if you can assemble SSSE3 insns.])]) esac # Enable --enable-host-shared. diff --git a/libcpp/files.cc b/libcpp/files.cc index 78f56e30bde..3775091d259 100644 --- a/libcpp/files.cc +++ b/libcpp/files.cc @@ -693,7 +693,7 @@ static bool read_file_guts (cpp_reader *pfile, _cpp_file *file, location_t loc, const char *input_charset) { - ssize_t size, total, count; + ssize_t size, pad, total, count; uchar *buf; bool regular; @@ -732,11 +732,14 @@ read_file_guts (cpp_reader *pfile, _cpp_file *file, location_t loc, the majority of C source files. */ size = 8 * 1024; - /* The + 16 here is space for the final '\n' and 15 bytes of padding, - used to quiet warnings from valgrind or Address Sanitizer, when the - optimized lexer accesses aligned 16-byte memory chunks, including - the bytes after the malloced, area, and stops lexing on '\n'. */ - buf = XNEWVEC (uchar, size + 16); +#ifdef HAVE_SSSE3 + pad = 64; +#else + pad = 16; +#endif + /* The '+ PAD' here is space for the final '\n' and PAD-1 bytes of padding, + allowing search_line_fast to use (possibly misaligned) vector loads. */ + buf = XNEWVEC (uchar, size + pad); total = 0; while ((count = read (file->fd, buf + total, size - total)) > 0) { @@ -747,7 +750,7 @@ read_file_guts (cpp_reader *pfile, _cpp_file *file, location_t loc, if (regular) break; size *= 2; - buf = XRESIZEVEC (uchar, buf, size + 16); + buf = XRESIZEVEC (uchar, buf, size + pad); } } @@ -765,7 +768,7 @@ read_file_guts (cpp_reader *pfile, _cpp_file *file, location_t loc, file->buffer = _cpp_convert_input (pfile, input_charset, -buf, size + 16, total, +buf, size + pad, total, &file->buffer_start, &file->st.st_size); file->buffer_valid = file->buffer; diff --git a/libcpp/lex.cc b/libcpp/lex.cc index 1591dcdf151..daf2c770bc3 100644 --- a/libcpp/lex.cc +++ b/lib
[gcc r15-3038] c++: default targ eligibility refinement [PR101463]
https://gcc.gnu.org/g:5348e3cb9bc99d2ee4d7438b8eca5c92fff5b931 commit r15-3038-g5348e3cb9bc99d2ee4d7438b8eca5c92fff5b931 Author: Patrick Palka Date: Tue Aug 20 08:34:36 2024 -0400 c++: default targ eligibility refinement [PR101463] On Tue, 9 Jan 2024, Jason Merrill wrote: > On 1/5/24 15:01, Patrick Palka wrote[1]: > > Here during default template argument substitution we wrongly consider > > the (substituted) default arguments v and vt as value-dependent[1] > > which ultimately leads to deduction failure for the calls. > > > > The bogus value_dependent_expression_p result aside, I noticed > > type_unification_real during default targ substitution keeps track of > > whether all previous targs are known and non-dependent, as is the case > > for these calls. And in such cases it should be safe to avoid checking > > dependence of the substituted default targ and just assume it's not. > > This patch implements this optimization, which lets us accept both > > testcases by sidestepping the value_dependent_expression_p issue > > altogether. > > Hmm, maybe instead of substituting and asking if it's dependent, we should > specifically look for undeduced parameters. This patch implements this refinement, which incidentally fixes PR101463 just as well. [1]: https://gcc.gnu.org/pipermail/gcc-patches/2024-January/641957.html PR c++/101463 gcc/cp/ChangeLog: * pt.cc (type_unification_real): Directly look for undeduced parameters in the default argument instead of doing a trial substitution. gcc/testsuite/ChangeLog: * g++.dg/cpp1z/nontype6.C: New test. * g++.dg/cpp1z/nontype6a.C: New test. Reviewed-by: Jason Merrill Diff: --- gcc/cp/pt.cc | 41 ++ gcc/testsuite/g++.dg/cpp1z/nontype6.C | 24 gcc/testsuite/g++.dg/cpp1z/nontype6a.C | 25 + 3 files changed, 71 insertions(+), 19 deletions(-) diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc index ea660a5a5c6..bc3ad5edcc5 100644 --- a/gcc/cp/pt.cc +++ b/gcc/cp/pt.cc @@ -23619,28 +23619,31 @@ type_unification_real (tree tparms, is important if the default argument contains something that might be instantiation-dependent like access (87480). */ processing_template_decl_sentinel s (!any_dependent_targs); - tree substed = NULL_TREE; - if (saw_undeduced == 1 && !any_dependent_targs) + + tree used_tparms = NULL_TREE; + if (saw_undeduced == 1) { - /* First instatiate in template context, in case we still -depend on undeduced template parameters. */ - ++processing_template_decl; - substed = tsubst_template_arg (arg, full_targs, complain, -NULL_TREE); - --processing_template_decl; - if (substed != error_mark_node - && !uses_template_parms (substed)) - /* We replaced all the tparms, substitute again out of - template context. */ - substed = NULL_TREE; + tree tparms_list = build_tree_list (size_int (1), tparms); + used_tparms = find_template_parameters (arg, tparms_list); + for (; used_tparms; used_tparms = TREE_CHAIN (used_tparms)) + { + int level, index; + template_parm_level_and_index (TREE_VALUE (used_tparms), +&level, &index); + if (TREE_VEC_ELT (targs, index) == NULL_TREE) + break; + } } - if (!substed) - substed = tsubst_template_arg (arg, full_targs, complain, - NULL_TREE); - if (!uses_template_parms (substed)) - arg = convert_template_argument (parm, substed, full_targs, -complain, i, NULL_TREE); + if (!used_tparms) + { + /* All template parameters within this default argument are +deduced, so we can use it. */ + arg = tsubst_template_arg (arg, full_targs, complain, +NULL_TREE); + arg = convert_template_argument (parm, arg, full_targs, + complain, i, NULL_TREE); + } else if (saw_undeduced == 1) arg = NULL_TREE; else if (!any_dependent_targs) diff --git a/gcc/testsuite/g++.dg/cpp1z/nontype6.C b/gcc/testsuite/g++.dg/cpp1z/non
[gcc r15-3039] testsuite: Add testcases for part of PR 103660
https://gcc.gnu.org/g:82a2f1386b2e8c951e910e1096a04bed21bbd39b commit r15-3039-g82a2f1386b2e8c951e910e1096a04bed21bbd39b Author: Andrew Pinski Date: Mon Aug 12 15:13:04 2024 -0700 testsuite: Add testcases for part of PR 103660 IOR part of the bug report was fixed by r13-4620-g4d9db4bdd458 but that added only aarch64 specific testcases. This adds 4 generic testcases for this to check to make sure they are optimized. The C++ testcases are the vector type versions. PR tree-optimization/103660 gcc/testsuite/ChangeLog: * g++.dg/tree-ssa/pr103660-0.C: New test. * g++.dg/tree-ssa/pr103660-1.C: New test. * gcc.dg/tree-ssa/pr103660-0.c: New test. * gcc.dg/tree-ssa/pr103660-1.c: New test. Signed-off-by: Andrew Pinski Diff: --- gcc/testsuite/g++.dg/tree-ssa/pr103660-0.C | 28 +++ gcc/testsuite/g++.dg/tree-ssa/pr103660-1.C | 28 +++ gcc/testsuite/gcc.dg/tree-ssa/pr103660-0.c | 33 +++ gcc/testsuite/gcc.dg/tree-ssa/pr103660-1.c | 43 ++ 4 files changed, 132 insertions(+) diff --git a/gcc/testsuite/g++.dg/tree-ssa/pr103660-0.C b/gcc/testsuite/g++.dg/tree-ssa/pr103660-0.C new file mode 100644 index 000..766ec92457c --- /dev/null +++ b/gcc/testsuite/g++.dg/tree-ssa/pr103660-0.C @@ -0,0 +1,28 @@ +/* PR tree-optimization/103660 */ +/* Vector type version. */ +/* { dg-do compile } */ +/* { dg-options "-O1 -fdump-tree-forwprop1-raw -Wno-psabi" } */ + +typedef int v4si __attribute((__vector_size__(4 * sizeof(int; +#define funcs(OP,n)\ +v4si min_##n(v4si a, v4si b) { \ + v4si X = -(a < b) * a; \ + v4si Y = -(a >= b) * b; \ + return (X OP Y); \ +} \ +v4si f_##n(v4si a, v4si b, \ + v4si c, v4si d) {\ + v4si X = -(a < b) * c; \ + v4si Y = -(a >= b) * d; \ + return (X OP Y); \ +} + + +funcs(|, ior) + +/* min_ior should produce min or `a < b ? a : b` depending on if the target + supports min on the vector type or not. */ +/* f_ior should produce (a < b) ? c : d */ +/* { dg-final { scan-tree-dump-not "bit_ior_expr, " "forwprop1" } } */ +/* { dg-final { scan-tree-dump-times "(?:lt_expr|min_expr), "2 "forwprop1" } } */ +/* { dg-final { scan-tree-dump-times "(?:vec_cond_expr|min_expr), " 2 "forwprop1" } } */ diff --git a/gcc/testsuite/g++.dg/tree-ssa/pr103660-1.C b/gcc/testsuite/g++.dg/tree-ssa/pr103660-1.C new file mode 100644 index 000..713057586f2 --- /dev/null +++ b/gcc/testsuite/g++.dg/tree-ssa/pr103660-1.C @@ -0,0 +1,28 @@ +/* PR tree-optimization/103660 */ +/* Vector type version. */ +/* { dg-do compile } */ +/* { dg-options "-O1 -fdump-tree-forwprop1-raw -Wno-psabi" } */ + +typedef int v4si __attribute((__vector_size__(4 * sizeof(int; +#define funcs(OP,n)\ +v4si min_##n(v4si a, v4si b) { \ + v4si X = a < b ? a : 0; \ + v4si Y = a >= b ? b : 0; \ + return (X OP Y); \ +} \ +v4si f_##n(v4si a, v4si b, \ + v4si c, v4si d) {\ + v4si X = a < b ? c : 0; \ + v4si Y = a >= b ? d : 0; \ + return (X OP Y); \ +} + + +funcs(|, ior) + +/* min_ior should produce min or `a < b ? a : b` depending on if the target + supports min on the vector type or not. */ +/* f_ior should produce (a < b) ? c : d */ +/* { dg-final { scan-tree-dump-not "bit_ior_expr, " "forwprop1" } } */ +/* { dg-final { scan-tree-dump-times "(?:lt_expr|min_expr), "2 "forwprop1" } } */ +/* { dg-final { scan-tree-dump-times "(?:vec_cond_expr|min_expr), " 2 "forwprop1" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr103660-0.c b/gcc/testsuite/gcc.dg/tree-ssa/pr103660-0.c new file mode 100644 index 000..6be0721aedd --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr103660-0.c @@ -0,0 +1,33 @@ +/* PR tree-optimization/103660 */ +/* { dg-do compile } */ +/* { dg-options "-O1 -fdump-tree-forwprop4-raw" } */ + +#define funcs(OP,n)\ +int min_##n(int a, int b) {\ + int t; \ + int t1; \ + int t2; \ + t1 = (a < b) * a;\ + t2 = (a >= b) * b; \ + t = t1 OP t2;\ + return t;\ +} \ +int f_##n(int a, int b, int c, \ +int d) { \ + int t; \ + int t1; \ + int t2; \ + t1 = (a < b) * c;\ + t2 = (a >= b) * d; \ + t = t1 OP t2;\ + return t;\ +} + +funcs(|, ior) + +/* min_ior should produce min */ +/* f_ior
[gcc r15-3040] match: extend the `((a CMP b) ? c : 0) | ((a CMP' b) ? d : 0)` patterns to support ^ and + [PR103660
https://gcc.gnu.org/g:b73373520f0ed5d131d2cd6ee9078939a98d7a0d commit r15-3040-gb73373520f0ed5d131d2cd6ee9078939a98d7a0d Author: Andrew Pinski Date: Mon Aug 12 16:00:45 2024 -0700 match: extend the `((a CMP b) ? c : 0) | ((a CMP' b) ? d : 0)` patterns to support ^ and + [PR103660] r13-4620-g4d9db4bdd458 Added a few patterns and some of them can be extended to support XOR and PLUS. This extends the patterns to support XOR and PLUS instead of just IOR. Bootstrapped and tested on x86_64-linux-gnu. PR tree-optimization/103660 gcc/ChangeLog: * match.pd (`((a CMP b) ? c : 0) | ((a CMP' b) ? d : 0)`): Extend to support XOR and PLUS. gcc/testsuite/ChangeLog: * g++.dg/tree-ssa/pr103660-2.C: New test. * g++.dg/tree-ssa/pr103660-3.C: New test. * gcc.dg/tree-ssa/pr103660-2.c: New test. * gcc.dg/tree-ssa/pr103660-3.c: New test. Signed-off-by: Andrew Pinski Diff: --- gcc/match.pd | 42 +++- gcc/testsuite/g++.dg/tree-ssa/pr103660-2.C | 30 gcc/testsuite/g++.dg/tree-ssa/pr103660-3.C | 30 gcc/testsuite/gcc.dg/tree-ssa/pr103660-2.c | 45 ++ gcc/testsuite/gcc.dg/tree-ssa/pr103660-3.c | 35 +++ 5 files changed, 163 insertions(+), 19 deletions(-) diff --git a/gcc/match.pd b/gcc/match.pd index c9c8478d286..b43ceb6def0 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -2356,18 +2356,20 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) /* Fold ((-(a < b) & c) | (-(a >= b) & d)) into a < b ? c : d. This is canonicalized further and we recognize the conditional form: -(a < b ? c : 0) | (a >= b ? d : 0) into a < b ? c : d. */ - (simplify - (bit_ior - (cond (cmp@0 @01 @02) @3 zerop) - (cond (icmp@4 @01 @02) @5 zerop)) -(if (INTEGRAL_TYPE_P (type) -&& invert_tree_comparison (cmp, HONOR_NANS (@01)) == icmp -/* The scalar version has to be canonicalized after vectorization - because it makes unconditional loads conditional ones, which - means we lose vectorization because the loads may trap. */ -&& canonicalize_math_after_vectorization_p ()) -(cond @0 @3 @5))) +(a < b ? c : 0) | (a >= b ? d : 0) into a < b ? c : d. +Handle also ^ and + in replacement of `|`. */ + (for op (bit_ior bit_xor plus) + (simplify + (op +(cond (cmp@0 @01 @02) @3 zerop) +(cond (icmp@4 @01 @02) @5 zerop)) + (if (INTEGRAL_TYPE_P (type) + && invert_tree_comparison (cmp, HONOR_NANS (@01)) == icmp + /* The scalar version has to be canonicalized after vectorization +because it makes unconditional loads conditional ones, which +means we lose vectorization because the loads may trap. */ + && canonicalize_math_after_vectorization_p ()) + (cond @0 @3 @5 /* Vector Fold (((a < b) & c) | ((a >= b) & d)) into a < b ? c : d. and ((~(a < b) & c) | (~(a >= b) & d)) into a < b ? c : d. */ @@ -2391,13 +2393,15 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (vec_cond @0 @3 @2)) /* Scalar Vectorized Fold ((-(a < b) & c) | (-(a >= b) & d)) -into a < b ? d : c. */ - (simplify - (bit_ior - (vec_cond:s (cmp@0 @4 @5) @2 integer_zerop) - (vec_cond:s (icmp@1 @4 @5) @3 integer_zerop)) - (if (invert_tree_comparison (cmp, HONOR_NANS (@4)) == icmp) - (vec_cond @0 @2 @3 +into a < b ? d : c. +Handle also ^ and + in replacement of `|`. */ + (for op (bit_ior bit_xor plus) + (simplify + (op +(vec_cond:s (cmp@0 @4 @5) @2 integer_zerop) +(vec_cond:s (icmp@1 @4 @5) @3 integer_zerop)) + (if (invert_tree_comparison (cmp, HONOR_NANS (@4)) == icmp) +(vec_cond @0 @2 @3) /* Transform X & -Y into X * Y when Y is { 0 or 1 }. */ (simplify diff --git a/gcc/testsuite/g++.dg/tree-ssa/pr103660-2.C b/gcc/testsuite/g++.dg/tree-ssa/pr103660-2.C new file mode 100644 index 000..95205c02bc3 --- /dev/null +++ b/gcc/testsuite/g++.dg/tree-ssa/pr103660-2.C @@ -0,0 +1,30 @@ +/* PR tree-optimization/103660 */ +/* Vector type version. */ +/* { dg-do compile } */ +/* { dg-options "-O1 -fdump-tree-forwprop1-raw -Wno-psabi" } */ + +typedef int v4si __attribute((__vector_size__(4 * sizeof(int; +#define funcs(OP,n)\ +v4si min_##n(v4si a, v4si b) { \ + v4si X = a < b ? a : 0; \ + v4si Y = a >= b ? b : 0; \ + return (X OP Y); \ +} \ +v4si f_##n(v4si a, v4si b, \ + v4si c, v4si d) {\ + v4si X = a < b ? c : 0; \ + v4si Y = a >= b ? d : 0; \ + return (X OP Y); \ +} + + +funcs(^, xor) +funcs(+, plus) + +/* min_xor/min_plus should produce min or `a < b ? a : b` depending on if the target + supports min on the vector type
[gcc r15-3041] Match: Add pattern for `(a ? b : 0) | (a ? 0 : c)` into `a ? b : c` [PR103660]
https://gcc.gnu.org/g:eface71c18caea3009ddc1ac624cb41647e9d5c4 commit r15-3041-geface71c18caea3009ddc1ac624cb41647e9d5c4 Author: Andrew Pinski Date: Mon Aug 12 17:37:32 2024 -0700 Match: Add pattern for `(a ? b : 0) | (a ? 0 : c)` into `a ? b : c` [PR103660] This adds a pattern to convert `(a ? b : 0) | (a ? 0 : c)` into `a ? b : c` which is simplier. It adds both for cond and vec_cond; even though vec_cond is handled via a different pattern currently but requires extra steps for matching so this should be slightly faster. Also handle it for xor and plus too since those can be handled the same way. Bootstrapped and tested on x86_64-linux-gnu with no regressions. PR tree-optimization/103660 gcc/ChangeLog: * match.pd (`(a ? b : 0) | (a ? 0 : c)`): New pattern. gcc/testsuite/ChangeLog: * g++.dg/tree-ssa/pr103660-4.C: New test. * gcc.dg/tree-ssa/pr103660-4.c: New test. Signed-off-by: Andrew Pinski Diff: --- gcc/match.pd | 10 +++ gcc/testsuite/g++.dg/tree-ssa/pr103660-4.C | 35 gcc/testsuite/gcc.dg/tree-ssa/pr103660-4.c | 43 ++ 3 files changed, 88 insertions(+) diff --git a/gcc/match.pd b/gcc/match.pd index b43ceb6def0..65a3aae2243 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -2339,6 +2339,16 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (if (INTEGRAL_TYPE_P (type)) (bit_and @0 @1))) +/* Fold `(a ? b : 0) | (a ? 0 : c)` into (a ? b : c). +Handle also ^ and + in replacement of `|`. */ +(for cnd (cond vec_cond) + (for op (bit_ior bit_xor plus) + (simplify + (op:c +(cnd:s @0 @00 integer_zerop) +(cnd:s @0 integer_zerop @01)) + (cnd @0 @00 @01 + (for cmp (tcc_comparison) icmp (inverted_tcc_comparison) /* Fold (((a < b) & c) | ((a >= b) & d)) into (a < b ? c : d) & 1. */ diff --git a/gcc/testsuite/g++.dg/tree-ssa/pr103660-4.C b/gcc/testsuite/g++.dg/tree-ssa/pr103660-4.C new file mode 100644 index 000..47727f86e24 --- /dev/null +++ b/gcc/testsuite/g++.dg/tree-ssa/pr103660-4.C @@ -0,0 +1,35 @@ +/* PR tree-optimization/103660 */ +/* Vector type version. */ +/* { dg-do compile } */ +/* { dg-options "-O1 -fdump-tree-forwprop2-raw -Wno-psabi" } */ + +typedef int v4si __attribute((__vector_size__(4 * sizeof(int; +#define funcs(OP,n)\ +v4si min_##n(v4si a, v4si b) { \ + v4si t = {0,0,0,0}; \ + v4si X = a < b ? a : t; \ + v4si Y = a < b ? t : b; \ + return (X OP Y); \ +} \ +v4si f_##n(v4si a, v4si b, \ + v4si c, v4si d) {\ + v4si t = {0,0,0,0}; \ + v4si X = a < b ? c : t; \ + v4si Y = a < b ? t : d; \ + return (X OP Y); \ +} + + +funcs(|, ior) +funcs(^, xor) +funcs(+, plus) + +/* min_ior/min_xor/min_plus should produce min or `a < b ? a : b` depending on if the target + supports min on the vector type or not. */ +/* f_ior/f_xor/f_plus should produce (a < b) ? c : d */ +/* { dg-final { scan-tree-dump-not "bit_xor_expr, " "forwprop2" } } */ +/* { dg-final { scan-tree-dump-not "bit_ior_expr, " "forwprop2" } } */ +/* { dg-final { scan-tree-dump-not "plus_expr, ""forwprop2" } } */ +/* { dg-final { scan-tree-dump-not "bit_ior_expr, " "forwprop2" } } */ +/* { dg-final { scan-tree-dump-times "(?:lt_expr|min_expr), "6 "forwprop2" } } */ +/* { dg-final { scan-tree-dump-times "(?:vec_cond_expr|min_expr), " 6 "forwprop2" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr103660-4.c b/gcc/testsuite/gcc.dg/tree-ssa/pr103660-4.c new file mode 100644 index 000..26c956fdcec --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr103660-4.c @@ -0,0 +1,43 @@ +/* PR tree-optimization/103660 */ +/* { dg-do compile } */ +/* { dg-options "-O1 -fgimple -fdump-tree-forwprop1-raw" } */ + +#define funcs(OP,n)\ +__GIMPLE() \ +int min_##n(int a, int b) {\ + _Bool X; \ + int t; \ + int t1; \ + int t2; \ + X = a < b; \ + t1 = X ? a : 0; \ + t2 = X ? 0 : b; \ + t = t1 OP t2;\ + return t;\ +} \ +__GIMPLE() \ +int f_##n(int a, int b, int c, \ +int d) { \ + _Bool X; \ + int t; \ + int t1; \ + int t2; \ + X = a < b; \ + t1 = X ? c : 0; \ + t2 = X ? 0 : d; \ + t = t1 OP t2;\ + return t;\ +} + +funcs(|, ior) +funcs(^, xor) +funcs(+, plus) + +/* min_i/m
[gcc r15-3042] doc: Normalize reference to binutils version for C6X
https://gcc.gnu.org/g:81bf84cf6b1b16609a59ceac5166c3846bba26cd commit r15-3042-g81bf84cf6b1b16609a59ceac5166c3846bba26cd Author: Gerald Pfeifer Date: Tue Aug 20 16:55:25 2024 +0200 doc: Normalize reference to binutils version for C6X We generally do not use a hyphen between project name and version. gcc: * doc/install.texi (Specific) : Normalize reference to binutils. Diff: --- gcc/doc/install.texi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/doc/install.texi b/gcc/doc/install.texi index 4973f195daf..f871227f684 100644 --- a/gcc/doc/install.texi +++ b/gcc/doc/install.texi @@ -5048,7 +5048,7 @@ as the @command{build} parameter on the @command{configure} line. For example: @end html @anchor{c6x-x-x} @heading c6x-*-* -The C6X family of processors. This port requires binutils-2.22 or newer. +The C6X family of processors. This port requires binutils 2.22 or newer. @html
[gcc r14-10605] [testsuite] [arm] [vect] adjust mve-vshr test [PR113281]
https://gcc.gnu.org/g:25812d8b789748911e800a972e5a3a030e3ac905 commit r14-10605-g25812d8b789748911e800a972e5a3a030e3ac905 Author: Alexandre Oliva Date: Wed Jun 26 02:08:18 2024 -0300 [testsuite] [arm] [vect] adjust mve-vshr test [PR113281] The test was too optimistic, alas. We used to vectorize shifts by clamping the shift counts below the bit width of the types (e.g. at 15 for 16-bit vector elements), but (uint16_t)32768 >> (uint16_t)16 is well defined (because of promotion to 32-bit int) and must yield 0, not 1 (as before the fix). Unfortunately, in the gimple model of vector units, such large shift counts wouldn't be well-defined, so we won't vectorize such shifts any more, unless we can tell they're in range or undefined. So the test that expected the vectorization we no longer performed needs to be adjusted. Instead of nobbling the test, Richard Earnshaw suggested annotating the test with the expected ranges so as to enable the optimization, and Christophe Lyon suggested a further simplification. Co-Authored-By: Richard Earnshaw for gcc/testsuite/ChangeLog PR tree-optimization/113281 * gcc.target/arm/simd/mve-vshr.c: Add expected ranges. (cherry picked from commit 54d2339c9f87f702e02e571a5460e11c19e1c02f) Diff: --- gcc/testsuite/gcc.target/arm/simd/mve-vshr.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/gcc/testsuite/gcc.target/arm/simd/mve-vshr.c b/gcc/testsuite/gcc.target/arm/simd/mve-vshr.c index 8c7adef9ed8..03078de49c6 100644 --- a/gcc/testsuite/gcc.target/arm/simd/mve-vshr.c +++ b/gcc/testsuite/gcc.target/arm/simd/mve-vshr.c @@ -9,6 +9,8 @@ void test_ ## NAME ##_ ## SIGN ## BITS ## x ## NB (TYPE##BITS##_t * __restrict__ dest, TYPE##BITS##_t *a, TYPE##BITS##_t *b) { \ int i; \ for (i=0; i= (unsigned)(BITS)) \ + __builtin_unreachable();\ dest[i] = a[i] OP b[i]; \ } \ }
[gcc r15-3043] c++: Improve errors parsing a braced list [PR101232]
https://gcc.gnu.org/g:64028d626a50410dbf29f252a78c7675b35751d6 commit r15-3043-g64028d626a50410dbf29f252a78c7675b35751d6 Author: Franciszek Witt Date: Tue Aug 20 14:34:01 2024 +0200 c++: Improve errors parsing a braced list [PR101232] PR c++/101232 gcc/cp/ChangeLog: * parser.cc (cp_parser_postfix_expression): Commit to the parse in case we know its either a cast or invalid syntax. (cp_parser_braced_list): Add a heuristic to inform about missing comma or operator. gcc/testsuite/ChangeLog: * g++.dg/cpp0x/initlist-err1.C: New test. * g++.dg/cpp0x/initlist-err2.C: New test. * g++.dg/cpp0x/initlist-err3.C: New test. Signed-off-by: Franciszek Witt Diff: --- gcc/cp/parser.cc | 23 ++- gcc/testsuite/g++.dg/cpp0x/initlist-err1.C | 11 +++ gcc/testsuite/g++.dg/cpp0x/initlist-err2.C | 11 +++ gcc/testsuite/g++.dg/cpp0x/initlist-err3.C | 11 +++ 4 files changed, 51 insertions(+), 5 deletions(-) diff --git a/gcc/cp/parser.cc b/gcc/cp/parser.cc index c9654cfff9d..c4388980348 100644 --- a/gcc/cp/parser.cc +++ b/gcc/cp/parser.cc @@ -7878,8 +7878,13 @@ cp_parser_postfix_expression (cp_parser *parser, bool address_p, bool cast_p, --parser->prevent_constrained_type_specifiers; /* Parse the cast itself. */ if (!cp_parser_error_occurred (parser)) - postfix_expression - = cp_parser_functional_cast (parser, type); + { + if (cp_lexer_next_token_is (parser->lexer, CPP_OPEN_BRACE)) + /* This can only be a cast. */ + cp_parser_commit_to_topmost_tentative_parse (parser); + postfix_expression + = cp_parser_functional_cast (parser, type); + } /* If that worked, we're done. */ if (cp_parser_parse_definitely (parser)) break; @@ -26372,7 +26377,7 @@ cp_parser_braced_list (cp_parser *parser, bool *non_constant_p /*=nullptr*/) /* Consume the `{' token. */ matching_braces braces; - braces.require_open (parser); + bool found_opening_brace = braces.require_open (parser); /* Create a CONSTRUCTOR to represent the braced-initializer. */ initializer = make_node (CONSTRUCTOR); /* If it's not a `}', then there is a non-trivial initializer. */ @@ -26390,8 +26395,16 @@ cp_parser_braced_list (cp_parser *parser, bool *non_constant_p /*=nullptr*/) else if (non_constant_p) *non_constant_p = false; /* Now, there should be a trailing `}'. */ - location_t finish_loc = cp_lexer_peek_token (parser->lexer)->location; - braces.require_close (parser); + cp_token * token = cp_lexer_peek_token (parser->lexer); + location_t finish_loc = token->location; + /* The part with CPP_SEMICOLON is just a heuristic. */ + if (!braces.require_close (parser) && token->type != CPP_SEMICOLON + && found_opening_brace && cp_parser_skip_to_closing_brace (parser)) +{ + cp_lexer_consume_token (parser->lexer); + inform (finish_loc, + "probably missing a comma or an operator before"); +} TREE_TYPE (initializer) = init_list_type_node; recompute_constructor_flags (initializer); diff --git a/gcc/testsuite/g++.dg/cpp0x/initlist-err1.C b/gcc/testsuite/g++.dg/cpp0x/initlist-err1.C new file mode 100644 index 000..6ea8afb3273 --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp0x/initlist-err1.C @@ -0,0 +1,11 @@ +// PR c++/101232 +// { dg-do compile { target c++11 } } + +struct X { +int a; +int b; +}; + +void f() { +auto x = X{ 1, 2; }; // { dg-error "21:" } +} // { dg-prune-output "expected declaration" } diff --git a/gcc/testsuite/g++.dg/cpp0x/initlist-err2.C b/gcc/testsuite/g++.dg/cpp0x/initlist-err2.C new file mode 100644 index 000..227f519dc19 --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp0x/initlist-err2.C @@ -0,0 +1,11 @@ +// PR c++/101232 +// { dg-do compile { target c++11 } } + +struct X { +int a; +int b; +}; + +void f() { +auto x = X{ 1 2 }; // { dg-error "19:.*probably" } +} diff --git a/gcc/testsuite/g++.dg/cpp0x/initlist-err3.C b/gcc/testsuite/g++.dg/cpp0x/initlist-err3.C new file mode 100644 index 000..b77ec9bf4e9 --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp0x/initlist-err3.C @@ -0,0 +1,11 @@ +// PR c++/101232 +// { dg-do compile { target c++11 } } + +struct X { +int a; +int b; +}; + +void f() { +auto x = X{ 1, {2 }; // { dg-error "expected.*before" } +}
[gcc(refs/users/meissner/heads/work176-vpair)] Revert changes
https://gcc.gnu.org/g:0b7aaee1ce5fa2f2f5f2239450eb128dd6d2b2e5 commit 0b7aaee1ce5fa2f2f5f2239450eb128dd6d2b2e5 Author: Michael Meissner Date: Tue Aug 20 12:46:42 2024 -0400 Revert changes Diff: --- gcc/config/rs6000/rs6000-builtins.def | 10 - gcc/config/rs6000/vector-pair.md | 326 +- gcc/doc/extend.texi | 9 - gcc/testsuite/gcc.target/powerpc/vector-pair-10.c | 61 gcc/testsuite/gcc.target/powerpc/vector-pair-11.c | 65 - gcc/testsuite/gcc.target/powerpc/vector-pair-12.c | 65 - gcc/testsuite/gcc.target/powerpc/vector-pair-5.c | 56 gcc/testsuite/gcc.target/powerpc/vector-pair-6.c | 56 gcc/testsuite/gcc.target/powerpc/vector-pair-7.c | 18 -- gcc/testsuite/gcc.target/powerpc/vector-pair-8.c | 18 -- gcc/testsuite/gcc.target/powerpc/vector-pair-9.c | 61 11 files changed, 1 insertion(+), 744 deletions(-) diff --git a/gcc/config/rs6000/rs6000-builtins.def b/gcc/config/rs6000/rs6000-builtins.def index b3eaa842f12..099f4b6a008 100644 --- a/gcc/config/rs6000/rs6000-builtins.def +++ b/gcc/config/rs6000/rs6000-builtins.def @@ -3934,10 +3934,6 @@ void __builtin_vsx_stxvp (v256, unsigned long, const v256 *); STXVP nothing {mma,pair} -;; Vector pair built-in functions. - v256 __builtin_vpair_zero (); -VPAIR_ZERO vpair_zero {mma} - ;; Vector pair built-in functions with float elements v256 __builtin_vpair_f32_abs (v256); VPAIR_F32_ABS vpair_abs_v8sf2 {mma} @@ -3975,9 +3971,6 @@ v256 __builtin_vpair_f32_nfms (v256, v256, v256); VPAIR_F32_NFMS vpair_nfms_v8sf4 {mma} - v256 __builtin_vpair_f32_splat (float); -VPAIR_F32_SPLAT vpair_splat_v8sf {mma} - v256 __builtin_vpair_f32_sub (v256, v256); VPAIR_F32_SUB vpair_sub_v8sf3 {mma} @@ -4018,8 +4011,5 @@ v256 __builtin_vpair_f64_nfms (v256, v256, v256); VPAIR_F64_NFMS vpair_nfms_v4df4 {mma} - v256 __builtin_vpair_f64_splat (double); -VPAIR_F64_SPLAT vpair_splat_v4df {mma} - v256 __builtin_vpair_f64_sub (v256, v256); VPAIR_F64_SUB vpair_sub_v4df3 {mma} diff --git a/gcc/config/rs6000/vector-pair.md b/gcc/config/rs6000/vector-pair.md index 7a81acbdc05..73ae46e6d40 100644 --- a/gcc/config/rs6000/vector-pair.md +++ b/gcc/config/rs6000/vector-pair.md @@ -38,9 +38,7 @@ UNSPEC_VPAIR_NEG UNSPEC_VPAIR_PLUS UNSPEC_VPAIR_SMAX - UNSPEC_VPAIR_SMIN - UNSPEC_VPAIR_ZERO - UNSPEC_VPAIR_SPLAT]) + UNSPEC_VPAIR_SMIN]) ;; Vector pair element ID that defines the scaler element within the vector pair. (define_c_enum "vpair_element" @@ -100,104 +98,6 @@ ;; Map the scalar element ID into the appropriate insn type for divide. (define_int_attr vpair_divtype [(VPAIR_ELEMENT_FLOAT "vecfdiv") (VPAIR_ELEMENT_DOUBLE "vecdiv")]) - -;; Mode iterator for the vector modes that we provide splat operations for. -(define_mode_iterator VPAIR_SPLAT_VMODE [V4SF V2DF]) - -;; Map element mode to 128-bit vector mode for splat operations -(define_mode_attr VPAIR_SPLAT_ELEMENT_TO_VMODE [(SF "V4SF") - (DF "V2DF")]) - -;; Map either element mode or vector mode into the name for the splat insn. -(define_mode_attr vpair_splat_name [(SF "v8sf") - (DF "v4df") - (V4SF "v8sf") - (V2DF "v4df")]) - -;; Initialize a vector pair to 0 -(define_insn_and_split "vpair_zero" - [(set (match_operand:OO 0 "vsx_register_operand" "=wa") - (unspec:OO [(const_int 0)] UNSPEC_VPAIR_ZERO))] - "TARGET_MMA" - "#" - "&& reload_completed" - [(set (match_dup 1) (match_dup 3)) - (set (match_dup 2) (match_dup 3))] -{ - rtx op0 = operands[0]; - - operands[1] = simplify_gen_subreg (V2DFmode, op0, OOmode, 0); - operands[2] = simplify_gen_subreg (V2DFmode, op0, OOmode, 16); - operands[3] = CONST0_RTX (V2DFmode); -} - [(set_attr "length" "8") - (set_attr "type" "vecperm")]) - -;; Create a vector pair with a value splat'ed (duplicated) to all of the -;; elements. -(define_expand "vpair_splat_" - [(use (match_operand:OO 0 "vsx_register_operand")) - (use (match_operand:SFDF 1 "input_operand"))] - "TARGET_MMA" -{ - rtx op0 = operands[0]; - rtx op1 = operands[1]; - machine_mode element_mode = mode; - - if (op1 == CONST0_RTX (element_mode)) -{ - emit_insn (gen_vpair_zero (op0)); - DONE; -} - - machine_mode vector_mode = mode; - rtx vec = gen_reg_rtx (vector_mode); - unsigned num_elements = GET_MODE_NUNITS (vector_mode); - rtvec elements = rtvec_alloc (num_elements); - for (size_t i = 0; i < num_elements; i++) -RTVEC_ELT (elements, i) = copy_rtx (op1); - - rs6000_expand_vector_init (vec, gen_rtx_PARALLEL (vector_mode, elements)); - emit_insn (gen_vpair_splat__internal (op0, vec)); - DONE; -}) - -;; Inner splat support. Operand1 is the vector splat created above. Allow -;; operand 1 to overlap
[gcc(refs/users/meissner/heads/work176-vpair)] Add vector pair init and splat.
https://gcc.gnu.org/g:901c7f0836effdb24831f04ec4a5884f33fd76ba commit 901c7f0836effdb24831f04ec4a5884f33fd76ba Author: Michael Meissner Date: Tue Aug 20 12:52:34 2024 -0400 Add vector pair init and splat. 2024-08-20 Michael Meissner gcc/ * config/rs6000/rs6000-builtins.def (__builtin_vpair_zero): New built-in function. (__builtin_vpair_f32_splat): Likewise. (__builtin_vpair_f64_splat): Likewise. * config/rs6000/vector-pair.md (UNSPEC_VPAIR_ZERO): New unspec. (UNSPEC_VPAIR_SPLAT): Likewise. (VPAIR_SPLAT_VMODE): New mode iterator. (VPAIR_SPLAT_ELEMENT_TO_VMODE): New mode attribute. (vpair_splat_name): Likewise. (vpair_zero): New insn. (vpair_splat_): New define_expand. (vpair_splat__internal): New insns. gcc/testsuite/ * gcc.target/powerpc/vector-pair-5.c: New test. * gcc.target/powerpc/vector-pair-6.c: Likewise. Diff: --- gcc/config/rs6000/rs6000-builtins.def | 10 gcc/config/rs6000/vector-pair.md | 102 +- gcc/doc/extend.texi | 9 +++ 3 files changed, 120 insertions(+), 1 deletion(-) diff --git a/gcc/config/rs6000/rs6000-builtins.def b/gcc/config/rs6000/rs6000-builtins.def index 099f4b6a008..b3eaa842f12 100644 --- a/gcc/config/rs6000/rs6000-builtins.def +++ b/gcc/config/rs6000/rs6000-builtins.def @@ -3934,6 +3934,10 @@ void __builtin_vsx_stxvp (v256, unsigned long, const v256 *); STXVP nothing {mma,pair} +;; Vector pair built-in functions. + v256 __builtin_vpair_zero (); +VPAIR_ZERO vpair_zero {mma} + ;; Vector pair built-in functions with float elements v256 __builtin_vpair_f32_abs (v256); VPAIR_F32_ABS vpair_abs_v8sf2 {mma} @@ -3971,6 +3975,9 @@ v256 __builtin_vpair_f32_nfms (v256, v256, v256); VPAIR_F32_NFMS vpair_nfms_v8sf4 {mma} + v256 __builtin_vpair_f32_splat (float); +VPAIR_F32_SPLAT vpair_splat_v8sf {mma} + v256 __builtin_vpair_f32_sub (v256, v256); VPAIR_F32_SUB vpair_sub_v8sf3 {mma} @@ -4011,5 +4018,8 @@ v256 __builtin_vpair_f64_nfms (v256, v256, v256); VPAIR_F64_NFMS vpair_nfms_v4df4 {mma} + v256 __builtin_vpair_f64_splat (double); +VPAIR_F64_SPLAT vpair_splat_v4df {mma} + v256 __builtin_vpair_f64_sub (v256, v256); VPAIR_F64_SUB vpair_sub_v4df3 {mma} diff --git a/gcc/config/rs6000/vector-pair.md b/gcc/config/rs6000/vector-pair.md index 73ae46e6d40..39b419c6814 100644 --- a/gcc/config/rs6000/vector-pair.md +++ b/gcc/config/rs6000/vector-pair.md @@ -38,7 +38,9 @@ UNSPEC_VPAIR_NEG UNSPEC_VPAIR_PLUS UNSPEC_VPAIR_SMAX - UNSPEC_VPAIR_SMIN]) + UNSPEC_VPAIR_SMIN + UNSPEC_VPAIR_ZERO + UNSPEC_VPAIR_SPLAT]) ;; Vector pair element ID that defines the scaler element within the vector pair. (define_c_enum "vpair_element" @@ -98,6 +100,104 @@ ;; Map the scalar element ID into the appropriate insn type for divide. (define_int_attr vpair_divtype [(VPAIR_ELEMENT_FLOAT "vecfdiv") (VPAIR_ELEMENT_DOUBLE "vecdiv")]) + +;; Mode iterator for the vector modes that we provide splat operations for. +(define_mode_iterator VPAIR_SPLAT_VMODE [V4SF V2DF]) + +;; Map element mode to 128-bit vector mode for splat operations +(define_mode_attr VPAIR_SPLAT_ELEMENT_TO_VMODE [(SF "V4SF") + (DF "V2DF")]) + +;; Map either element mode or vector mode into the name for the splat insn. +(define_mode_attr vpair_splat_name [(SF "v8sf") + (DF "v4df") + (V4SF "v8sf") + (V2DF "v4df")]) + +;; Initialize a vector pair to 0 +(define_insn_and_split "vpair_zero" + [(set (match_operand:OO 0 "vsx_register_operand" "=wa") + (unspec:OO [(const_int 0)] UNSPEC_VPAIR_ZERO))] + "TARGET_MMA" + "#" + "&& reload_completed" + [(set (match_dup 1) (match_dup 3)) + (set (match_dup 2) (match_dup 3))] +{ + rtx op0 = operands[0]; + + operands[1] = simplify_gen_subreg (V2DFmode, op0, OOmode, 0); + operands[2] = simplify_gen_subreg (V2DFmode, op0, OOmode, 16); + operands[3] = CONST0_RTX (V2DFmode); +} + [(set_attr "length" "8") + (set_attr "type" "vecperm")]) + +;; Create a vector pair with a value splat'ed (duplicated) to all of the +;; elements. +(define_expand "vpair_splat_" + [(use (match_operand:OO 0 "vsx_register_operand")) + (use (match_operand:SFDF 1 "input_operand"))] + "TARGET_MMA" +{ + rtx op0 = operands[0]; + rtx op1 = operands[1]; + machine_mode element_mode = mode; + + if (op1 == CONST0_RTX (element_mode)) +{ + emit_insn (gen_vpair_zero (op0)); + DONE; +} + + machine_mode vector_mode = mode; + rtx vec = gen_reg_rtx (vector_mode); + unsigned num_elements = GET_MODE_NUNITS (vector_mode); + rtvec elements = rtvec_alloc (num_elements); + for (size_
[gcc(refs/users/meissner/heads/work176-vpair)] Revert changes
https://gcc.gnu.org/g:528b36dade23b80f8021a0701d238ad971ff033c commit 528b36dade23b80f8021a0701d238ad971ff033c Author: Michael Meissner Date: Tue Aug 20 12:54:57 2024 -0400 Revert changes Diff: --- gcc/config/rs6000/rs6000-builtins.def | 10 gcc/config/rs6000/vector-pair.md | 102 +- gcc/doc/extend.texi | 9 --- 3 files changed, 1 insertion(+), 120 deletions(-) diff --git a/gcc/config/rs6000/rs6000-builtins.def b/gcc/config/rs6000/rs6000-builtins.def index b3eaa842f12..099f4b6a008 100644 --- a/gcc/config/rs6000/rs6000-builtins.def +++ b/gcc/config/rs6000/rs6000-builtins.def @@ -3934,10 +3934,6 @@ void __builtin_vsx_stxvp (v256, unsigned long, const v256 *); STXVP nothing {mma,pair} -;; Vector pair built-in functions. - v256 __builtin_vpair_zero (); -VPAIR_ZERO vpair_zero {mma} - ;; Vector pair built-in functions with float elements v256 __builtin_vpair_f32_abs (v256); VPAIR_F32_ABS vpair_abs_v8sf2 {mma} @@ -3975,9 +3971,6 @@ v256 __builtin_vpair_f32_nfms (v256, v256, v256); VPAIR_F32_NFMS vpair_nfms_v8sf4 {mma} - v256 __builtin_vpair_f32_splat (float); -VPAIR_F32_SPLAT vpair_splat_v8sf {mma} - v256 __builtin_vpair_f32_sub (v256, v256); VPAIR_F32_SUB vpair_sub_v8sf3 {mma} @@ -4018,8 +4011,5 @@ v256 __builtin_vpair_f64_nfms (v256, v256, v256); VPAIR_F64_NFMS vpair_nfms_v4df4 {mma} - v256 __builtin_vpair_f64_splat (double); -VPAIR_F64_SPLAT vpair_splat_v4df {mma} - v256 __builtin_vpair_f64_sub (v256, v256); VPAIR_F64_SUB vpair_sub_v4df3 {mma} diff --git a/gcc/config/rs6000/vector-pair.md b/gcc/config/rs6000/vector-pair.md index 39b419c6814..73ae46e6d40 100644 --- a/gcc/config/rs6000/vector-pair.md +++ b/gcc/config/rs6000/vector-pair.md @@ -38,9 +38,7 @@ UNSPEC_VPAIR_NEG UNSPEC_VPAIR_PLUS UNSPEC_VPAIR_SMAX - UNSPEC_VPAIR_SMIN - UNSPEC_VPAIR_ZERO - UNSPEC_VPAIR_SPLAT]) + UNSPEC_VPAIR_SMIN]) ;; Vector pair element ID that defines the scaler element within the vector pair. (define_c_enum "vpair_element" @@ -100,104 +98,6 @@ ;; Map the scalar element ID into the appropriate insn type for divide. (define_int_attr vpair_divtype [(VPAIR_ELEMENT_FLOAT "vecfdiv") (VPAIR_ELEMENT_DOUBLE "vecdiv")]) - -;; Mode iterator for the vector modes that we provide splat operations for. -(define_mode_iterator VPAIR_SPLAT_VMODE [V4SF V2DF]) - -;; Map element mode to 128-bit vector mode for splat operations -(define_mode_attr VPAIR_SPLAT_ELEMENT_TO_VMODE [(SF "V4SF") - (DF "V2DF")]) - -;; Map either element mode or vector mode into the name for the splat insn. -(define_mode_attr vpair_splat_name [(SF "v8sf") - (DF "v4df") - (V4SF "v8sf") - (V2DF "v4df")]) - -;; Initialize a vector pair to 0 -(define_insn_and_split "vpair_zero" - [(set (match_operand:OO 0 "vsx_register_operand" "=wa") - (unspec:OO [(const_int 0)] UNSPEC_VPAIR_ZERO))] - "TARGET_MMA" - "#" - "&& reload_completed" - [(set (match_dup 1) (match_dup 3)) - (set (match_dup 2) (match_dup 3))] -{ - rtx op0 = operands[0]; - - operands[1] = simplify_gen_subreg (V2DFmode, op0, OOmode, 0); - operands[2] = simplify_gen_subreg (V2DFmode, op0, OOmode, 16); - operands[3] = CONST0_RTX (V2DFmode); -} - [(set_attr "length" "8") - (set_attr "type" "vecperm")]) - -;; Create a vector pair with a value splat'ed (duplicated) to all of the -;; elements. -(define_expand "vpair_splat_" - [(use (match_operand:OO 0 "vsx_register_operand")) - (use (match_operand:SFDF 1 "input_operand"))] - "TARGET_MMA" -{ - rtx op0 = operands[0]; - rtx op1 = operands[1]; - machine_mode element_mode = mode; - - if (op1 == CONST0_RTX (element_mode)) -{ - emit_insn (gen_vpair_zero (op0)); - DONE; -} - - machine_mode vector_mode = mode; - rtx vec = gen_reg_rtx (vector_mode); - unsigned num_elements = GET_MODE_NUNITS (vector_mode); - rtvec elements = rtvec_alloc (num_elements); - for (size_t i = 0; i < num_elements; i++) -RTVEC_ELT (elements, i) = copy_rtx (op1); - - rs6000_expand_vector_init (vec, gen_rtx_PARALLEL (vector_mode, elements)); - emit_insn (gen_vpair_splat__internal (op0, vec)); - DONE; -}) - -;; Inner splat support. Operand1 is the vector splat created above. Allow -;; operand 1 to overlap with the output registers to eliminate one move -;; instruction. -(define_insn_and_split "vpair_splat__internal" - [(set (match_operand:OO 0 "vsx_register_operand" "=wa,wa") - (unspec:OO -[(match_operand:VPAIR_SPLAT_VMODE 1 "vsx_register_operand" "0,wa")] -UNSPEC_VPAIR_SPLAT))] - "TARGET_MMA" - "#" - "&& reload_completed" - [(const_int 0)] -{ - rtx op0 = operands[0]; - rtx op0_a = simplify_gen_subreg (mode, op0, OOmode, 0); - rtx op0_b = simplify_gen_subreg (mode, op0, OOmode, 16
[gcc(refs/users/meissner/heads/work176-vpair)] Add vector pair init and splat.
https://gcc.gnu.org/g:7d99722e8df0aade817f19f657dfd7e739108ab6 commit 7d99722e8df0aade817f19f657dfd7e739108ab6 Author: Michael Meissner Date: Tue Aug 20 12:57:02 2024 -0400 Add vector pair init and splat. 2024-08-20 Michael Meissner gcc/ * config/rs6000/rs6000-builtins.def (__builtin_vpair_zero): New built-in function. (__builtin_vpair_f32_splat): Likewise. (__builtin_vpair_f64_splat): Likewise. * config/rs6000/vector-pair.md (UNSPEC_VPAIR_ZERO): New unspec. (UNSPEC_VPAIR_SPLAT): Likewise. (VPAIR_SPLAT_VMODE): New mode iterator. (VPAIR_SPLAT_ELEMENT_TO_VMODE): New mode attribute. (vpair_splat_name): Likewise. (vpair_zero): New insn. (vpair_splat_): New define_expand. (vpair_splat__internal): New insns. gcc/testsuite/ * gcc.target/powerpc/vector-pair-5.c: New test. * gcc.target/powerpc/vector-pair-6.c: Likewise. Diff: --- gcc/config/rs6000/rs6000-builtins.def| 10 +++ gcc/config/rs6000/vector-pair.md | 102 ++- gcc/doc/extend.texi | 9 ++ gcc/testsuite/gcc.target/powerpc/vector-pair-5.c | 54 gcc/testsuite/gcc.target/powerpc/vector-pair-6.c | 56 + 5 files changed, 230 insertions(+), 1 deletion(-) diff --git a/gcc/config/rs6000/rs6000-builtins.def b/gcc/config/rs6000/rs6000-builtins.def index 099f4b6a008..b3eaa842f12 100644 --- a/gcc/config/rs6000/rs6000-builtins.def +++ b/gcc/config/rs6000/rs6000-builtins.def @@ -3934,6 +3934,10 @@ void __builtin_vsx_stxvp (v256, unsigned long, const v256 *); STXVP nothing {mma,pair} +;; Vector pair built-in functions. + v256 __builtin_vpair_zero (); +VPAIR_ZERO vpair_zero {mma} + ;; Vector pair built-in functions with float elements v256 __builtin_vpair_f32_abs (v256); VPAIR_F32_ABS vpair_abs_v8sf2 {mma} @@ -3971,6 +3975,9 @@ v256 __builtin_vpair_f32_nfms (v256, v256, v256); VPAIR_F32_NFMS vpair_nfms_v8sf4 {mma} + v256 __builtin_vpair_f32_splat (float); +VPAIR_F32_SPLAT vpair_splat_v8sf {mma} + v256 __builtin_vpair_f32_sub (v256, v256); VPAIR_F32_SUB vpair_sub_v8sf3 {mma} @@ -4011,5 +4018,8 @@ v256 __builtin_vpair_f64_nfms (v256, v256, v256); VPAIR_F64_NFMS vpair_nfms_v4df4 {mma} + v256 __builtin_vpair_f64_splat (double); +VPAIR_F64_SPLAT vpair_splat_v4df {mma} + v256 __builtin_vpair_f64_sub (v256, v256); VPAIR_F64_SUB vpair_sub_v4df3 {mma} diff --git a/gcc/config/rs6000/vector-pair.md b/gcc/config/rs6000/vector-pair.md index 73ae46e6d40..39b419c6814 100644 --- a/gcc/config/rs6000/vector-pair.md +++ b/gcc/config/rs6000/vector-pair.md @@ -38,7 +38,9 @@ UNSPEC_VPAIR_NEG UNSPEC_VPAIR_PLUS UNSPEC_VPAIR_SMAX - UNSPEC_VPAIR_SMIN]) + UNSPEC_VPAIR_SMIN + UNSPEC_VPAIR_ZERO + UNSPEC_VPAIR_SPLAT]) ;; Vector pair element ID that defines the scaler element within the vector pair. (define_c_enum "vpair_element" @@ -98,6 +100,104 @@ ;; Map the scalar element ID into the appropriate insn type for divide. (define_int_attr vpair_divtype [(VPAIR_ELEMENT_FLOAT "vecfdiv") (VPAIR_ELEMENT_DOUBLE "vecdiv")]) + +;; Mode iterator for the vector modes that we provide splat operations for. +(define_mode_iterator VPAIR_SPLAT_VMODE [V4SF V2DF]) + +;; Map element mode to 128-bit vector mode for splat operations +(define_mode_attr VPAIR_SPLAT_ELEMENT_TO_VMODE [(SF "V4SF") + (DF "V2DF")]) + +;; Map either element mode or vector mode into the name for the splat insn. +(define_mode_attr vpair_splat_name [(SF "v8sf") + (DF "v4df") + (V4SF "v8sf") + (V2DF "v4df")]) + +;; Initialize a vector pair to 0 +(define_insn_and_split "vpair_zero" + [(set (match_operand:OO 0 "vsx_register_operand" "=wa") + (unspec:OO [(const_int 0)] UNSPEC_VPAIR_ZERO))] + "TARGET_MMA" + "#" + "&& reload_completed" + [(set (match_dup 1) (match_dup 3)) + (set (match_dup 2) (match_dup 3))] +{ + rtx op0 = operands[0]; + + operands[1] = simplify_gen_subreg (V2DFmode, op0, OOmode, 0); + operands[2] = simplify_gen_subreg (V2DFmode, op0, OOmode, 16); + operands[3] = CONST0_RTX (V2DFmode); +} + [(set_attr "length" "8") + (set_attr "type" "vecperm")]) + +;; Create a vector pair with a value splat'ed (duplicated) to all of the +;; elements. +(define_expand "vpair_splat_" + [(use (match_operand:OO 0 "vsx_register_operand")) + (use (match_operand:SFDF 1 "input_operand"))] + "TARGET_MMA" +{ + rtx op0 = operands[0]; + rtx op1 = operands[1]; + machine_mode element_mode = mode; + + if (op1 == CONST0_RTX (element_mode)) +{ + emit_insn (gen_vpair_zero (op0)); + DONE; +} + + machine_mode vector_mode = mode;
[gcc(refs/users/meissner/heads/work176-vpair)] Add vector pair optimizations.
https://gcc.gnu.org/g:7e2134dab92d9032a849772f09b2b7935a8f4e62 commit 7e2134dab92d9032a849772f09b2b7935a8f4e62 Author: Michael Meissner Date: Tue Aug 20 12:57:41 2024 -0400 Add vector pair optimizations. 2024-08-20 Michael Meissner gcc/ * config/rs6000/vector-pair.md (vpair_add_neg_3): New combiner insn to convert vector plus/neg into a minus operation. (vpair_fma__merge): Optimize multiply, add/subtract, and negation into fma operations if the user specifies to create fmas. (vpair_fma__merge): Likewise. (vpair_fma__merge2): Likewise. (vpair_nfma__merge): Likewise. (vpair_nfms__merge): Likewise. (vpair_nfms__merge2): Likewise. gcc/testsuite/ * gcc.target/powerpc/vector-pair-7.c: New test. * gcc.target/powerpc/vector-pair-8.c: Likewise. * gcc.target/powerpc/vector-pair-9.c: Likewise. * gcc.target/powerpc/vector-pair-10.c: Likewise. * gcc.target/powerpc/vector-pair-11.c: Likewise. * gcc.target/powerpc/vector-pair-12xs.c: Likewise. Diff: --- gcc/config/rs6000/vector-pair.md | 224 ++ gcc/testsuite/gcc.target/powerpc/vector-pair-10.c | 61 ++ gcc/testsuite/gcc.target/powerpc/vector-pair-11.c | 65 +++ gcc/testsuite/gcc.target/powerpc/vector-pair-12.c | 65 +++ gcc/testsuite/gcc.target/powerpc/vector-pair-7.c | 18 ++ gcc/testsuite/gcc.target/powerpc/vector-pair-8.c | 18 ++ gcc/testsuite/gcc.target/powerpc/vector-pair-9.c | 61 ++ 7 files changed, 512 insertions(+) diff --git a/gcc/config/rs6000/vector-pair.md b/gcc/config/rs6000/vector-pair.md index 39b419c6814..7a81acbdc05 100644 --- a/gcc/config/rs6000/vector-pair.md +++ b/gcc/config/rs6000/vector-pair.md @@ -261,6 +261,31 @@ (set (attr "type") (if_then_else (match_test " == DIV") (const_string "") (const_string "")))]) + +;; Optimize vector pair add of a negative value into a subtract. +(define_insn_and_split "*vpair_add_neg_3" + [(set (match_operand:OO 0 "vsx_register_operand" "=wa") + (unspec:OO +[(match_operand:OO 1 "vsx_register_operand" "wa") + (unspec:OO + [(match_operand:OO 2 "vsx_register_operand" "wa") + (const_int VPAIR_FP_ELEMENT)] + UNSPEC_VPAIR_NEG) + (const_int VPAIR_FP_ELEMENT)] +VPAIR_FP_BINARY))] + "TARGET_MMA" + "#" + "&& 1" + [(set (match_dup 0) + (unspec:OO +[(match_dup 1) + (match_dup 2) + (const_int VPAIR_FP_ELEMENT)] +UNSPEC_VPAIR_MINUS))] +{ +} + [(set_attr "length" "8") + (set_attr "type" "")]) ;; Vector pair fused-multiply (FMA) operations. The last argument in the ;; UNSPEC is a CONST_INT which identifies what the scalar element is. @@ -354,3 +379,202 @@ } [(set_attr "length" "8") (set_attr "type" "")]) + +;; Optimize vector pair multiply and vector pair add into vector pair fma, +;; providing the compiler would do this optimization for scalar and vectors. +;; Unlike most of the define_insn_and_splits, this can be done before register +;; allocation. +(define_insn_and_split "*vpair_fma__merge" + [(set (match_operand:OO 0 "vsx_register_operand" "=wa,wa") + (unspec:OO +[(unspec:OO + [(match_operand:OO 1 "vsx_register_operand" "%wa,wa") + (match_operand:OO 2 "vsx_register_operand" "wa,0") + (const_int VPAIR_FP_ELEMENT)] + UNSPEC_VPAIR_MULT) + (match_operand:OO 3 "vsx_register_operand" "0,wa") + (const_int VPAIR_FP_ELEMENT)] +UNSPEC_VPAIR_PLUS))] + "TARGET_MMA && flag_fp_contract_mode == FP_CONTRACT_FAST" + "#" + "&& 1" + [(set (match_dup 0) + (unspec:OO +[(match_dup 1) + (match_dup 2) + (match_dup 3) + (const_int VPAIR_FP_ELEMENT)] +UNSPEC_VPAIR_FMA))] +{ +} + [(set_attr "length" "8") + (set_attr "type" "")]) + +;; Merge multiply and subtract. +(define_insn_and_split "*vpair_fma__merge" + [(set (match_operand:OO 0 "vsx_register_operand" "=wa,wa") + (unspec:OO +[(unspec:OO + [(match_operand:OO 1 "vsx_register_operand" "%wa,wa") + (match_operand:OO 2 "vsx_register_operand" "wa,0") + (const_int VPAIR_FP_ELEMENT)] + UNSPEC_VPAIR_MULT) + (match_operand:OO 3 "vsx_register_operand" "0,wa") + (const_int VPAIR_FP_ELEMENT)] +UNSPEC_VPAIR_MINUS))] + "TARGET_MMA && flag_fp_contract_mode == FP_CONTRACT_FAST" + "#" + "&& 1" + [(set (match_dup 0) + (unspec:OO +[(match_dup 1) + (match_dup 2) + (unspec:OO + [(match_dup 3) + (const_int VPAIR_FP_ELEMENT)] + UNSPEC_VPAIR_NEG) + (const_int VPAIR_FP_ELEMENT)] +UNSPEC_VPAIR_FMA))] +{ +} + [(set_attr "length" "8") + (set_attr "type" "
[gcc] Created branch 'devel/nothrow-detection'
The branch 'devel/nothrow-detection' was created pointing to: 9b53c7f9484f... Edge redirection for exceptions.
[gcc/devel/nothrow-detection] Edge redirection for exceptions.
https://gcc.gnu.org/g:9b53c7f9484fea5fa0fd23a6444b03071debc4f2 commit 9b53c7f9484fea5fa0fd23a6444b03071debc4f2 Author: Pranil Dey Date: Tue Aug 20 22:07:57 2024 +0530 Edge redirection for exceptions. This commit is contains change in code for the tree-eh.cc, tree-eh.h, MAINTAINERS and tree-cfg.cc files. Specifically it contains four functions - 1. void extract_exception_types_for_call which extracts the exception types in a call stmt and adds them into a vector tree. 2. bool stmt_throw_types does the same as stmt_could_throw the difference being that it also gives the list of exception types as given by the extract_exception_types_for_call function. 3. bool match_lp checks if a landing pad can handle any of the exception types given as input parameters by looking into the catch handlers. 4. update_stmt_eh_region is the function that walks up the EH tree and changes the landing pad for the last statement in a basic block in the control flow graph so that when the edge by make_eh_edge is created it points to the correct handlers. Further work to be done regarding RESX stmts. Diff: --- MAINTAINERS | 1 + gcc/tree-cfg.cc | 5 ++- gcc/tree-eh.cc | 114 gcc/tree-eh.h | 5 +++ 4 files changed, 123 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 595140b6f64f..f26db19345e4 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -915,6 +915,7 @@ information. Juergen Christ Robin Dapp Robin Dapp +Pranil Dey Michal Jires Matthias Kretz Prathamesh Kulkarni diff --git a/gcc/tree-cfg.cc b/gcc/tree-cfg.cc index e6fd1294b958..bb101c865ed6 100644 --- a/gcc/tree-cfg.cc +++ b/gcc/tree-cfg.cc @@ -855,11 +855,12 @@ make_edges_bb (basic_block bb, struct omp_region **pcur_region, int *pomp_index) if (!last) return ret; - + + update_stmt_eh_region(last); switch (gimple_code (last)) { case GIMPLE_GOTO: - if (make_goto_expr_edges (bb)) + if (make_goto_expr_edges (bb)) ret = 1; fallthru = false; break; diff --git a/gcc/tree-eh.cc b/gcc/tree-eh.cc index 9609bdc0d9b7..eec1e6af70d7 100644 --- a/gcc/tree-eh.cc +++ b/gcc/tree-eh.cc @@ -2271,6 +2271,84 @@ make_eh_dispatch_edges (geh_dispatch *stmt) return true; } +// Check if a landing pad can handle any of the given exception types +bool match_lp(eh_landing_pad lp, vec *exception_types) { +eh_region region = lp->region; + +// Ensure the region is of type ERT_TRY +if (region && region->type == ERT_TRY) { +eh_catch_d *catch_handler = region->u.eh_try.first_catch; + +while (catch_handler) { +tree type_list = catch_handler->type_list; + +for (tree t = type_list; t; t = TREE_CHAIN(t)) { +tree type = TREE_VALUE(t); +for (unsigned i = 0; i < exception_types->length(); ++i) { + // match found or a catch-all handler (NULL) +if (type == (*exception_types)[i] || !type) { +return true; +} +} +} +catch_handler = catch_handler->next_catch; +} +} +return false; +} + +// Function to update landing pad in throw_stmt_table for a given statement +void update_stmt_eh_region(gimple *stmt) { + auto_vec exception_types; + if (!stmt_throw_types (cfun, stmt, &exception_types)) { +return; +} + +int lp_nr = lookup_stmt_eh_lp_fn(cfun, stmt); +if (lp_nr <= 0) { +return; +} + +eh_landing_pad lp = get_eh_landing_pad_from_number(lp_nr); +if (!lp) { +return; +} + +eh_region region = lp->region; + +// Walk up the region tree +while (region) { +switch (region->type) { +case ERT_CLEANUP: +*cfun->eh->throw_stmt_table->get(const_cast(stmt)) = lp->index; +return; + +case ERT_TRY: +if (match_lp(lp, &exception_types)) { +*cfun->eh->throw_stmt_table->get(const_cast(stmt)) = lp->index; +return; +} +break; + +case ERT_MUST_NOT_THROW: +// Undefined behavior, leave edge unchanged +return; + +case ERT_ALLOWED_EXCEPTIONS: +if (!match_lp(lp, &exception_types)) { +return; +} +break; + +default: +break; +} +region = region->outer; +} + +remove_stmt_from_eh_lp_fn(cfun, stmt); +} + /* Create the single EH edge from STMT to its nearest landing pad, if there is
[gcc r15-3044] RISC-V: Remove testcase XFAIL
https://gcc.gnu.org/g:3676816caa510de226b357be1c68cf798da63576 commit r15-3044-g3676816caa510de226b357be1c68cf798da63576 Author: Edwin Lu Date: Mon Aug 19 13:10:15 2024 -0700 RISC-V: Remove testcase XFAIL The testcase has been modified to include the -fwrapv flag which now causes the test to pass. Remove the xfail exception gcc/testsuite/ChangeLog: * gcc.dg/signbit-5.c: Remove riscv xfail exception Signed-off-by: Edwin Lu Diff: --- gcc/testsuite/gcc.dg/signbit-5.c | 1 - 1 file changed, 1 deletion(-) diff --git a/gcc/testsuite/gcc.dg/signbit-5.c b/gcc/testsuite/gcc.dg/signbit-5.c index 2bca640f930b..e65c8910c825 100644 --- a/gcc/testsuite/gcc.dg/signbit-5.c +++ b/gcc/testsuite/gcc.dg/signbit-5.c @@ -4,7 +4,6 @@ /* This test does not work when the truth type does not match vector type. */ /* { dg-additional-options "-march=armv8-a" { target aarch64_sve } } */ /* { dg-xfail-run-if "truth type does not match vector type" { amdgcn-*-* } } */ -/* { dg-xfail-run-if "truth type does not match vector type" { riscv_v } } */ #include
[gcc r15-3045] c++: Parse and ignore attributes on base specifiers [PR110345]
https://gcc.gnu.org/g:1db5ca04da365ac57f7d788a85055edcf13da708 commit r15-3045-g1db5ca04da365ac57f7d788a85055edcf13da708 Author: Jakub Jelinek Date: Tue Aug 20 22:15:03 2024 +0200 c++: Parse and ignore attributes on base specifiers [PR110345] For C++ 26 P2552R3 I went through all the spots (except modules) where attribute-specifier-seq appears in the grammar and tried to construct a testcase in all those spots, for now for [[deprecated]] attribute. This is the third issue I found. https://eel.is/c++draft/class.derived#general-1 has attribute-specifier-seq at the start of base-specifier. The following patch parses it there and warns about those. 2024-08-20 Jakub Jelinek PR c++/110345 * parser.cc (cp_parser_base_specifier): Parse standard attributes at the start and emit a warning if there are any non-ignored ones. * g++.dg/cpp0x/gen-attrs-83.C: New test. Diff: --- gcc/cp/parser.cc | 17 - gcc/testsuite/g++.dg/cpp0x/gen-attrs-83.C | 10 ++ 2 files changed, 22 insertions(+), 5 deletions(-) diff --git a/gcc/cp/parser.cc b/gcc/cp/parser.cc index c43889803482..28ebf2beb60a 100644 --- a/gcc/cp/parser.cc +++ b/gcc/cp/parser.cc @@ -28995,11 +28995,12 @@ cp_parser_base_clause (cp_parser* parser) /* Parse a base-specifier. base-specifier: - :: [opt] nested-name-specifier [opt] class-name - virtual access-specifier [opt] :: [opt] nested-name-specifier - [opt] class-name - access-specifier virtual [opt] :: [opt] nested-name-specifier - [opt] class-name + attribute-specifier-seq [opt] :: [opt] nested-name-specifier [opt] + class-name + attribute-specifier-seq [opt] virtual access-specifier [opt] :: [opt] + nested-name-specifier [opt] class-name + attribute-specifier-seq [opt] access-specifier virtual [opt] :: [opt] + nested-name-specifier [opt] class-name Returns a TREE_LIST. The TREE_PURPOSE will be one of ACCESS_{DEFAULT,PUBLIC,PROTECTED,PRIVATE}_[VIRTUAL]_NODE to @@ -29017,6 +29018,12 @@ cp_parser_base_specifier (cp_parser* parser) bool class_scope_p, template_p; tree access = access_default_node; tree type; + location_t attrs_loc = cp_lexer_peek_token (parser->lexer)->location; + tree std_attrs = cp_parser_std_attribute_spec_seq (parser); + + if (std_attrs != NULL_TREE && any_nonignored_attribute_p (std_attrs)) +warning_at (attrs_loc, OPT_Wattributes, + "attributes on base specifiers are ignored"); /* Process the optional `virtual' and `access-specifier'. */ while (!done) diff --git a/gcc/testsuite/g++.dg/cpp0x/gen-attrs-83.C b/gcc/testsuite/g++.dg/cpp0x/gen-attrs-83.C new file mode 100644 index ..0ff1965d0ecf --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp0x/gen-attrs-83.C @@ -0,0 +1,10 @@ +// { dg-do compile { target c++11 } } + +struct A {}; +struct B {}; +struct C {}; +struct D : [[]] [[]] A, + [[]] virtual public B, [[]] [[]] [[]] public virtual C {}; +struct E : [[gnu::deprecated]] A, // { dg-warning "attributes on base specifiers are ignored" } + [[gnu::deprecated]] virtual public B,// { dg-warning "attributes on base specifiers are ignored" } + [[gnu::deprecated]] public virtual C {}; // { dg-warning "attributes on base specifiers are ignored" }
[gcc r15-3046] c++: Appertain standard attributes after array closing square bracket to array type rather than decl
https://gcc.gnu.org/g:d05949558ef1c8eeeb07399174a64f968f70e3ee commit r15-3046-gd05949558ef1c8eeeb07399174a64f968f70e3ee Author: Jakub Jelinek Date: Tue Aug 20 22:17:41 2024 +0200 c++: Appertain standard attributes after array closing square bracket to array type rather than declarator [PR110345] For C++ 26 P2552R3 I went through all the spots (except modules) where attribute-specifier-seq appears in the grammar and tried to construct a testcase in all those spots, for now for [[deprecated]] attribute. This is the second issue I found. The comment already correctly says that attributes after closing ] appertain to the array type, but we were appending them to returned_attrs, so effectively applying them to the declarator (as if they appeared right after declarator-id). 2024-08-20 Jakub Jelinek PR c++/110345 * decl.cc (grokdeclarator): Apply declarator->std_attributes for cdk_array to type, rather than chaining it to returned_attrs. * g++.dg/cpp0x/gen-attrs-82.C: New test. * g++.dg/gomp/attrs-3.C (foo): Expect different diagnostics for omp::directive attribute after closing square bracket of an automatic declaration and add a test with the attribute after array's declarator-id. Diff: --- gcc/cp/decl.cc| 5 ++--- gcc/testsuite/g++.dg/cpp0x/gen-attrs-82.C | 4 gcc/testsuite/g++.dg/gomp/attrs-3.C | 3 ++- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/gcc/cp/decl.cc b/gcc/cp/decl.cc index 12139e1d8627..7ab73f1031d7 100644 --- a/gcc/cp/decl.cc +++ b/gcc/cp/decl.cc @@ -13317,9 +13317,8 @@ grokdeclarator (const cp_declarator *declarator, /* [dcl.array]/1: The optional attribute-specifier-seq appertains to the - array. */ - returned_attrs = attr_chainon (returned_attrs, - declarator->std_attributes); + array type. */ + decl_attributes (&type, declarator->std_attributes, 0); break; case cdk_function: diff --git a/gcc/testsuite/g++.dg/cpp0x/gen-attrs-82.C b/gcc/testsuite/g++.dg/cpp0x/gen-attrs-82.C new file mode 100644 index ..67c1a2098430 --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp0x/gen-attrs-82.C @@ -0,0 +1,4 @@ +// { dg-do compile { target c++11 } } + +int a [[gnu::common]] [2]; +int b[2] [[gnu::common]]; // { dg-warning "'common' attribute does not apply to types" } diff --git a/gcc/testsuite/g++.dg/gomp/attrs-3.C b/gcc/testsuite/g++.dg/gomp/attrs-3.C index 7aab6370d499..5658b3a86895 100644 --- a/gcc/testsuite/g++.dg/gomp/attrs-3.C +++ b/gcc/testsuite/g++.dg/gomp/attrs-3.C @@ -35,6 +35,7 @@ foo () int *[[omp::directive (threadprivate (t3))]] c; // { dg-warning "'omp::directive' scoped attribute directive ignored" } int &[[omp::directive (threadprivate (t4))]] d = b; // { dg-warning "'omp::directive' scoped attribute directive ignored" } typedef int T [[omp::directive (threadprivate (t5))]]; // { dg-error "'omp::directive' not allowed to be specified in this context" } - int e[10] [[omp::directive (threadprivate (t6))]]; // { dg-error "'omp::directive' not allowed to be specified in this context" } + int e [[omp::directive (threadprivate (t6))]] [10]; // { dg-error "'omp::directive' not allowed to be specified in this context" } + int f[10] [[omp::directive (threadprivate (t6))]]; // { dg-warning "'omp::directive' scoped attribute directive ignored" } struct [[omp::directive (threadprivate (t7))]] S {}; // { dg-error "'omp::directive' not allowed to be specified in this context" } }
[gcc r15-3047] phi-opt: Fix for failing maybe_push_res_to_seq in factor_out_conditional_operation [PR 116409]
https://gcc.gnu.org/g:404d947d8ddd3c3035dcea115e9bab4c4a6bfa1c commit r15-3047-g404d947d8ddd3c3035dcea115e9bab4c4a6bfa1c Author: Andrew Pinski Date: Sun Aug 18 20:45:35 2024 -0700 phi-opt: Fix for failing maybe_push_res_to_seq in factor_out_conditional_operation [PR 116409] The code was assuming that maybe_push_res_to_seq would not fail if the gimple_extract_op returned true. But for some cases when the function is pure rather than const, then it can fail. This change moves around the code to check the result of maybe_push_res_to_seq instead of assuming it will always work. Changes since v1: * v2: Instead of directly testing non-pure builtin functions change to test if maybe_push_res_to_seq fails. Bootstrapped and tested on x86_64-linux-gnu with no regressions. PR tree-optimization/116409 gcc/ChangeLog: * tree-ssa-phiopt.cc (factor_out_conditional_operation): Move maybe_push_res_to_seq before creating the phi node and the debug dump. Return false if maybe_push_res_to_seq fails. gcc/testsuite/ChangeLog: * gcc.dg/torture/pr116409-1.c: New test. * gcc.dg/torture/pr116409-2.c: New test. Signed-off-by: Andrew Pinski Diff: --- gcc/testsuite/gcc.dg/torture/pr116409-1.c | 7 +++ gcc/testsuite/gcc.dg/torture/pr116409-2.c | 7 +++ gcc/tree-ssa-phiopt.cc| 30 -- 3 files changed, 34 insertions(+), 10 deletions(-) diff --git a/gcc/testsuite/gcc.dg/torture/pr116409-1.c b/gcc/testsuite/gcc.dg/torture/pr116409-1.c new file mode 100644 index ..7bf8d49c9a01 --- /dev/null +++ b/gcc/testsuite/gcc.dg/torture/pr116409-1.c @@ -0,0 +1,7 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-frounding-math -fno-math-errno" } */ +double f(int c, double a, double b) { + if (c) +return __builtin_sqrt(a); + return __builtin_sqrt(b); +} diff --git a/gcc/testsuite/gcc.dg/torture/pr116409-2.c b/gcc/testsuite/gcc.dg/torture/pr116409-2.c new file mode 100644 index ..c27f11312d98 --- /dev/null +++ b/gcc/testsuite/gcc.dg/torture/pr116409-2.c @@ -0,0 +1,7 @@ +/* { dg-do compile } */ + +int f (int t, char *a, char *b) { + if (t) +return __builtin_strlen (a); + return __builtin_strlen (b); +} diff --git a/gcc/tree-ssa-phiopt.cc b/gcc/tree-ssa-phiopt.cc index 2d4aba5b0872..95bac330c8f2 100644 --- a/gcc/tree-ssa-phiopt.cc +++ b/gcc/tree-ssa-phiopt.cc @@ -54,6 +54,7 @@ along with GCC; see the file COPYING3. If not see #include "dbgcnt.h" #include "tree-ssa-propagate.h" #include "tree-ssa-dce.h" +#include "calls.h" /* Return the singleton PHI in the SEQ of PHIs for edges E0 and E1. */ @@ -370,6 +371,25 @@ factor_out_conditional_operation (edge e0, edge e1, gphi *phi, /* Create a new PHI stmt. */ result = PHI_RESULT (phi); temp = make_ssa_name (TREE_TYPE (new_arg0), NULL); + + gimple_match_op new_op = arg0_op; + + /* Create the operation stmt if possible and insert it. */ + new_op.ops[0] = temp; + gimple_seq seq = NULL; + result = maybe_push_res_to_seq (&new_op, &seq, result); + + /* If we can't create the new statement, release the temp name + and return back. */ + if (!result) +{ + release_ssa_name (temp); + return NULL; +} + + gsi = gsi_after_labels (gimple_bb (phi)); + gsi_insert_seq_before (&gsi, seq, GSI_CONTINUE_LINKING); + newphi = create_phi_node (temp, gimple_bb (phi)); if (dump_file && (dump_flags & TDF_DETAILS)) @@ -398,16 +418,6 @@ factor_out_conditional_operation (edge e0, edge e1, gphi *phi, add_phi_arg (newphi, new_arg0, e0, locus); add_phi_arg (newphi, new_arg1, e1, locus); - gimple_match_op new_op = arg0_op; - - /* Create the operation stmt and insert it. */ - new_op.ops[0] = temp; - gimple_seq seq = NULL; - result = maybe_push_res_to_seq (&new_op, &seq, result); - gcc_assert (result); - gsi = gsi_after_labels (gimple_bb (phi)); - gsi_insert_seq_before (&gsi, seq, GSI_CONTINUE_LINKING); - /* Remove the original PHI stmt. */ gsi = gsi_for_stmt (phi); gsi_remove (&gsi, true);
[gcc r15-3048] libcpp: Adjust lang_defaults
https://gcc.gnu.org/g:447c32c5142a60278230f81ae6e50e41ef6d988e commit r15-3048-g447c32c5142a60278230f81ae6e50e41ef6d988e Author: Jakub Jelinek Date: Tue Aug 20 22:25:57 2024 +0200 libcpp: Adjust lang_defaults The table over the years turned to be very wide, 147 columns and any addition would add a couple of new ones. We need a 28x23 bit matrix right now. This patch changes the formatting, so that we need just 2 columns per new feature and so we have some room for expansion. In addition, the patch changes it to bitfields, which reduces .rodata by 532 bytes (so 5.75x reduction of the variable) and on x86_64-linux grows the cpp_set_lang function by 26 bytes (8.4% growth). 2024-08-20 Jakub Jelinek * init.cc (struct lang_flags): Change all members from char typed fields to unsigned bit-fields. (lang_defaults): Change formatting of the initializer so that it fits to 68 columns rather than 147. Diff: --- libcpp/init.cc | 112 ++--- 1 file changed, 59 insertions(+), 53 deletions(-) diff --git a/libcpp/init.cc b/libcpp/init.cc index 9ae06a9595d9..2dfd9d7e0623 100644 --- a/libcpp/init.cc +++ b/libcpp/init.cc @@ -77,61 +77,67 @@ END requires. */ struct lang_flags { - char c99; - char cplusplus; - char extended_numbers; - char extended_identifiers; - char c11_identifiers; - char xid_identifiers; - char std; - char digraphs; - char uliterals; - char rliterals; - char user_literals; - char binary_constants; - char digit_separators; - char trigraphs; - char utf8_char_literals; - char va_opt; - char scope; - char dfp_constants; - char size_t_literals; - char elifdef; - char warning_directive; - char delimited_escape_seqs; - char true_false; + unsigned int c99 : 1; + unsigned int cplusplus : 1; + unsigned int extended_numbers : 1; + unsigned int extended_identifiers : 1; + unsigned int c11_identifiers : 1; + unsigned int xid_identifiers : 1; + unsigned int std : 1; + unsigned int digraphs : 1; + unsigned int uliterals : 1; + unsigned int rliterals : 1; + unsigned int user_literals : 1; + unsigned int binary_constants : 1; + unsigned int digit_separators : 1; + unsigned int trigraphs : 1; + unsigned int utf8_char_literals : 1; + unsigned int va_opt : 1; + unsigned int scope : 1; + unsigned int dfp_constants : 1; + unsigned int size_t_literals : 1; + unsigned int elifdef : 1; + unsigned int warning_directive : 1; + unsigned int delimited_escape_seqs : 1; + unsigned int true_false : 1; }; -static const struct lang_flags lang_defaults[] = -{ /* c99 c++ xnum xid c11 xidid std digr ulit rlit udlit bincst digsep trig u8chlit vaopt scope dfp szlit elifdef warndir delim trufal */ - /* GNUC89 */ { 0, 0, 1, 0, 0, 0,0, 1, 0, 0, 0,0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0,0 }, - /* GNUC99 */ { 1, 0, 1, 1, 0, 0,0, 1, 1, 1, 0,0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0,0 }, - /* GNUC11 */ { 1, 0, 1, 1, 1, 0,0, 1, 1, 1, 0,0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0,0 }, - /* GNUC17 */ { 1, 0, 1, 1, 1, 0,0, 1, 1, 1, 0,0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0,0 }, - /* GNUC23 */ { 1, 0, 1, 1, 1, 1,0, 1, 1, 1, 0,1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0,1 }, - /* GNUC2Y */ { 1, 0, 1, 1, 1, 1,0, 1, 1, 1, 0,1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0,1 }, - /* STDC89 */ { 0, 0, 0, 0, 0, 0,1, 0, 0, 0, 0,0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,0 }, - /* STDC94 */ { 0, 0, 0, 0, 0, 0,1, 1, 0, 0, 0,0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,0 }, - /* STDC99 */ { 1, 0, 1, 1, 0, 0,1, 1, 0, 0, 0,0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,0 }, - /* STDC11 */ { 1, 0, 1, 1, 1, 0,1, 1, 1, 0, 0,0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,0 }, - /* STDC17 */ { 1, 0, 1, 1, 1, 0,1, 1, 1, 0, 0,0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,0 }, - /* STDC23 */ { 1, 0, 1, 1, 1, 1,1, 1, 1, 0, 0,1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0,1 }, - /* STDC2Y */ { 1, 0, 1, 1, 1, 1,1, 1, 1, 0, 0,1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0,1 }, - /* GNUCXX */ { 0, 1, 1, 1, 0, 1,0, 1, 0, 0, 0,0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0,1 }, - /* CXX98*/ { 0, 1, 0, 1, 0, 1,1, 1, 0, 0, 0
[gcc r15-3049] libstdc++: Adjust testcase for constexpr placement new [PR115744]
https://gcc.gnu.org/g:20c63093db0f230ef49a298cdb0611f38e470203 commit r15-3049-g20c63093db0f230ef49a298cdb0611f38e470203 Author: Jonathan Wakely Date: Tue Aug 20 21:47:29 2024 +0100 libstdc++: Adjust testcase for constexpr placement new [PR115744] This test now fails in C++26 mode because the declaration in is constexpr and the one in the test isn't. Add constexpr to the test. libstdc++-v3/ChangeLog: PR libstdc++/115744 * testsuite/18_support/headers/new/synopsis.cc [C++26]: Add constexpr to placement operator new and operator new[]. Diff: --- libstdc++-v3/testsuite/18_support/headers/new/synopsis.cc | 9 +++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/libstdc++-v3/testsuite/18_support/headers/new/synopsis.cc b/libstdc++-v3/testsuite/18_support/headers/new/synopsis.cc index 5c83956b5845..479f0df12b5d 100644 --- a/libstdc++-v3/testsuite/18_support/headers/new/synopsis.cc +++ b/libstdc++-v3/testsuite/18_support/headers/new/synopsis.cc @@ -46,8 +46,13 @@ void* operator new[](std::size_t size, const std::nothrow_t&) throw(); void operator delete[](void* ptr) throw(); void operator delete[](void* ptr, const std::nothrow_t&) throw(); -void* operator new (std::size_t size, void* ptr) throw(); -void* operator new[](std::size_t size, void* ptr) throw(); +#if __cplusplus > 202302L +# define CXX26_CONSTEXPR constexpr +#else +# define CXX26_CONSTEXPR +#endif +CXX26_CONSTEXPR void* operator new (std::size_t size, void* ptr) throw(); +CXX26_CONSTEXPR void* operator new[](std::size_t size, void* ptr) throw(); void operator delete (void* ptr, void*) throw(); void operator delete[](void* ptr, void*) throw();
[gcc r15-3050] libstdc++: Fix indentation of lines that follow a [[likely]] attribute
https://gcc.gnu.org/g:91ae46853858a2aa4eb8640ce1a72124679a3909 commit r15-3050-g91ae46853858a2aa4eb8640ce1a72124679a3909 Author: Jonathan Wakely Date: Mon Aug 19 16:56:28 2024 +0100 libstdc++: Fix indentation of lines that follow a [[likely]] attribute libstdc++-v3/ChangeLog: * include/std/text_encoding: Fix indentation. Diff: --- libstdc++-v3/include/std/text_encoding | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libstdc++-v3/include/std/text_encoding b/libstdc++-v3/include/std/text_encoding index 83d023bc71bd..49405a214d5f 100644 --- a/libstdc++-v3/include/std/text_encoding +++ b/libstdc++-v3/include/std/text_encoding @@ -518,7 +518,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION operator++() { if (_M_dereferenceable()) [[likely]] - ++_M_rep; + ++_M_rep; else { __glibcxx_assert(_M_dereferenceable()); @@ -533,7 +533,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION const bool __decrementable = _M_rep != nullptr && _M_rep[-1]._M_id == _M_id; if (__decrementable) [[likely]] - --_M_rep; + --_M_rep; else { __glibcxx_assert(__decrementable);
[gcc r15-3051] libstdc++: Remove redundant reclaration of std::optional
https://gcc.gnu.org/g:5d5193f0734fb4706940f7ac79899da25250be9d commit r15-3051-g5d5193f0734fb4706940f7ac79899da25250be9d Author: Jonathan Wakely Date: Tue Aug 20 11:54:25 2024 +0100 libstdc++: Remove redundant reclaration of std::optional We've already declared optional at the top of the header, so don't need to do it again. libstdc++-v3/ChangeLog: * include/std/optional: Remove redundant redeclaration. Diff: --- libstdc++-v3/include/std/optional | 3 --- 1 file changed, 3 deletions(-) diff --git a/libstdc++-v3/include/std/optional b/libstdc++-v3/include/std/optional index 2c4cc260f90e..6651686cd1d0 100644 --- a/libstdc++-v3/include/std/optional +++ b/libstdc++-v3/include/std/optional @@ -741,9 +741,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION }; #endif // __cpp_concepts - template - class optional; - template inline constexpr bool __is_optional_v = false; template
[gcc r15-3052] match: Reject non-ssa name/min invariants in gimple_extract [PR116412]
https://gcc.gnu.org/g:c7b76a076cb2c6ded7ae208464019b04cb0531a2 commit r15-3052-gc7b76a076cb2c6ded7ae208464019b04cb0531a2 Author: Andrew Pinski Date: Mon Aug 19 08:06:36 2024 -0700 match: Reject non-ssa name/min invariants in gimple_extract [PR116412] After the conversion for phiopt's conditional operand to use maybe_push_res_to_seq, it was found that gimple_extract will extract out from REALPART_EXPR/IMAGPART_EXPR/VCE and BIT_FIELD_REF, a memory load. But that extraction was not needed as memory loads are not simplified in match and simplify. So gimple_extract should return false in those cases. Changes since v1: * Move the rejection to gimple_extract from factor_out_conditional_operation. Bootstrapped and tested on x86_64-linux-gnu. PR tree-optimization/116412 gcc/ChangeLog: * gimple-match-exports.cc (gimple_extract): Return false if op0 was not a SSA name nor a min invariant for REALPART_EXPR/IMAGPART_EXPR/VCE and BIT_FIELD_REF. gcc/testsuite/ChangeLog: * gcc.dg/torture/pr116412-1.c: New test. Signed-off-by: Andrew Pinski Diff: --- gcc/gimple-match-exports.cc | 6 ++ gcc/testsuite/gcc.dg/torture/pr116412-1.c | 6 ++ 2 files changed, 12 insertions(+) diff --git a/gcc/gimple-match-exports.cc b/gcc/gimple-match-exports.cc index 15d54b7d8438..86e40100899a 100644 --- a/gcc/gimple-match-exports.cc +++ b/gcc/gimple-match-exports.cc @@ -740,6 +740,9 @@ gimple_extract (gimple *stmt, gimple_match_op *res_op, || code == VIEW_CONVERT_EXPR) { tree op0 = TREE_OPERAND (gimple_assign_rhs1 (stmt), 0); + /* op0 needs to be a SSA name or an min invariant. */ + if (TREE_CODE (op0) != SSA_NAME && !is_gimple_min_invariant (op0)) + return false; res_op->set_op (code, type, valueize_op (op0)); return true; } @@ -747,6 +750,9 @@ gimple_extract (gimple *stmt, gimple_match_op *res_op, { tree rhs1 = gimple_assign_rhs1 (stmt); tree op0 = valueize_op (TREE_OPERAND (rhs1, 0)); + /* op0 needs to be a SSA name or an min invariant. */ + if (TREE_CODE (op0) != SSA_NAME && !is_gimple_min_invariant (op0)) + return false; res_op->set_op (code, type, op0, TREE_OPERAND (rhs1, 1), TREE_OPERAND (rhs1, 2), diff --git a/gcc/testsuite/gcc.dg/torture/pr116412-1.c b/gcc/testsuite/gcc.dg/torture/pr116412-1.c new file mode 100644 index ..3bc26ecd8b83 --- /dev/null +++ b/gcc/testsuite/gcc.dg/torture/pr116412-1.c @@ -0,0 +1,6 @@ +/* { dg-do compile } */ +double f(_Complex double a, _Complex double *b, int c) +{ + if (c) return __real__ a; + return __real__ *b; +}
[gcc r15-3053] c++/modules: Remove unnecessary errors when not writing compiled module
https://gcc.gnu.org/g:e668771ff0dcc3c72937768e5c37b6f287b97799 commit r15-3053-ge668771ff0dcc3c72937768e5c37b6f287b97799 Author: Nathaniel Shead Date: Sun Aug 18 21:35:23 2024 +1000 c++/modules: Remove unnecessary errors when not writing compiled module It was pointed out to me that the current error referencing an internal linkage entity reads almost like an ICE message, with the message finishing with the unhelpful: m.cpp:1:8: error: failed to write compiled module: Bad file data 1 | export module M; |^~ Similarly, whenever we decide not to emit a module CMI due to other errors we currently emit the following message: m.cpp:1:8: warning: not writing module ‘M’ due to errors 1 | export module M; |^~ Neither of these messages really add anything useful; users already understand that when an error is reported then the normal outputs will not be created, so these messages are just noise. There is one case we still need this latter message, however; when an error in a template has been silenced with '-Wno-template-body' we still don't want to write a module CMI, so emit an error now instead. This patch also removes a number of dg-prune-output directives in the testsuite that are no longer needed with this change. gcc/cp/ChangeLog: * module.cc (module_state::write_begin): Return a boolean to indicate errors rather than just doing set_error(). (finish_module_processing): Prevent emission of unnecessary errors; only indicate module writing occurred if write_begin succeeds. gcc/testsuite/ChangeLog: * g++.dg/modules/export-1.C: Remove message. * g++.dg/modules/internal-1.C: Remove message. * g++.dg/modules/ambig-2_b.C: Remove unnecessary pruning. * g++.dg/modules/atom-decl-2.C: Likewise. * g++.dg/modules/atom-pragma-3.C: Likewise. * g++.dg/modules/atom-preamble-2_f.C: Likewise. * g++.dg/modules/block-decl-2.C: Likewise. * g++.dg/modules/dir-only-4.C: Likewise. * g++.dg/modules/enum-12.C: Likewise. * g++.dg/modules/exp-xlate-1_b.C: Likewise. * g++.dg/modules/export-3.C: Likewise. * g++.dg/modules/friend-3.C: Likewise. * g++.dg/modules/friend-5_b.C: Likewise. * g++.dg/modules/inc-xlate-1_e.C: Likewise. * g++.dg/modules/linkage-2.C: Likewise. * g++.dg/modules/local-extern-1.C: Likewise. * g++.dg/modules/main-1.C: Likewise. * g++.dg/modules/map-2.C: Likewise. * g++.dg/modules/mod-decl-1.C: Likewise. * g++.dg/modules/mod-decl-3.C: Likewise. * g++.dg/modules/pr99174.H: Likewise. * g++.dg/modules/pr99468.H: Likewise. * g++.dg/modules/token-1.C: Likewise. * g++.dg/modules/token-3.C: Likewise. * g++.dg/modules/token-4.C: Likewise. * g++.dg/modules/token-5.C: Likewise. * g++.dg/modules/using-10.C: Likewise. * g++.dg/modules/using-12.C: Likewise. * g++.dg/modules/using-3.C: Likewise. * g++.dg/modules/using-9.C: Likewise. * g++.dg/modules/using-enum-2.C: Likewise. * g++.dg/modules/permissive-error-1.C: New test. * g++.dg/modules/permissive-error-2.C: New test. Signed-off-by: Nathaniel Shead Reviewed-by: Jason Merrill Diff: --- gcc/cp/module.cc | 42 ++- gcc/testsuite/g++.dg/modules/ambig-2_b.C | 2 -- gcc/testsuite/g++.dg/modules/atom-decl-2.C| 2 -- gcc/testsuite/g++.dg/modules/atom-pragma-3.C | 2 -- gcc/testsuite/g++.dg/modules/atom-preamble-2_f.C | 1 - gcc/testsuite/g++.dg/modules/block-decl-2.C | 2 -- gcc/testsuite/g++.dg/modules/dir-only-4.C | 1 - gcc/testsuite/g++.dg/modules/enum-12.C| 2 -- gcc/testsuite/g++.dg/modules/exp-xlate-1_b.C | 1 - gcc/testsuite/g++.dg/modules/export-1.C | 2 -- gcc/testsuite/g++.dg/modules/export-3.C | 2 -- gcc/testsuite/g++.dg/modules/friend-3.C | 1 - gcc/testsuite/g++.dg/modules/friend-5_b.C | 1 - gcc/testsuite/g++.dg/modules/inc-xlate-1_e.C | 2 -- gcc/testsuite/g++.dg/modules/internal-1.C | 2 +- gcc/testsuite/g++.dg/modules/linkage-2.C | 2 -- gcc/testsuite/g++.dg/modules/local-extern-1.C | 3 -- gcc/testsuite/g++.dg/modules/main-1.C | 1 - gcc/testsuite/g++.dg/modules/map-2.C | 2 -- gcc/testsuite/g++.dg/modules/mod-decl-1.C | 2 -- gcc/testsuite/g++.dg/modules/mod-decl-3.C | 2 -- gcc/testsuite/g++.dg/modules/permissive-error-1.C | 10 ++ gcc/testsuite/g++.dg/modules/permissive-error-2.C | 11 +
[gcc r15-3054] RISC-V: Fix one typo in .SAT_TRUNC test func name [NFC]
https://gcc.gnu.org/g:1b72e07696a062e628c35e4bd25926c11ac18297 commit r15-3054-g1b72e07696a062e628c35e4bd25926c11ac18297 Author: Pan Li Date: Tue Aug 20 21:08:23 2024 +0800 RISC-V: Fix one typo in .SAT_TRUNC test func name [NFC] Fix one typo `sat_truc` to `sat_trunc`, as well as `SAT_TRUC` to `SAT_TRUNC`. gcc/testsuite/ChangeLog: * gcc.target/riscv/sat_arith.h: Fix SAT_TRUNC typo. * gcc.target/riscv/sat_u_trunc-1.c: Ditto. * gcc.target/riscv/sat_u_trunc-13.c: Ditto. * gcc.target/riscv/sat_u_trunc-14.c: Ditto. * gcc.target/riscv/sat_u_trunc-15.c: Ditto. * gcc.target/riscv/sat_u_trunc-2.c: Ditto. * gcc.target/riscv/sat_u_trunc-3.c: Ditto. * gcc.target/riscv/sat_u_trunc-4.c: Ditto. * gcc.target/riscv/sat_u_trunc-5.c: Ditto. * gcc.target/riscv/sat_u_trunc-6.c: Ditto. * gcc.target/riscv/sat_u_trunc-7.c: Ditto. * gcc.target/riscv/sat_u_trunc-8.c: Ditto. * gcc.target/riscv/sat_u_trunc-9.c: Ditto. * gcc.target/riscv/sat_u_trunc-run-1.c: Ditto. * gcc.target/riscv/sat_u_trunc-run-13.c: Ditto. * gcc.target/riscv/sat_u_trunc-run-14.c: Ditto. * gcc.target/riscv/sat_u_trunc-run-15.c: Ditto. * gcc.target/riscv/sat_u_trunc-run-2.c: Ditto. * gcc.target/riscv/sat_u_trunc-run-3.c: Ditto. * gcc.target/riscv/sat_u_trunc-run-4.c: Ditto. * gcc.target/riscv/sat_u_trunc-run-5.c: Ditto. * gcc.target/riscv/sat_u_trunc-run-6.c: Ditto. * gcc.target/riscv/sat_u_trunc-run-7.c: Ditto. * gcc.target/riscv/sat_u_trunc-run-8.c: Ditto. * gcc.target/riscv/sat_u_trunc-run-9.c: Ditto. Signed-off-by: Pan Li Diff: --- gcc/testsuite/gcc.target/riscv/sat_arith.h | 30 +++--- gcc/testsuite/gcc.target/riscv/sat_u_trunc-1.c | 4 +-- gcc/testsuite/gcc.target/riscv/sat_u_trunc-13.c| 4 +-- gcc/testsuite/gcc.target/riscv/sat_u_trunc-14.c| 4 +-- gcc/testsuite/gcc.target/riscv/sat_u_trunc-15.c| 4 +-- gcc/testsuite/gcc.target/riscv/sat_u_trunc-2.c | 4 +-- gcc/testsuite/gcc.target/riscv/sat_u_trunc-3.c | 4 +-- gcc/testsuite/gcc.target/riscv/sat_u_trunc-4.c | 4 +-- gcc/testsuite/gcc.target/riscv/sat_u_trunc-5.c | 4 +-- gcc/testsuite/gcc.target/riscv/sat_u_trunc-6.c | 4 +-- gcc/testsuite/gcc.target/riscv/sat_u_trunc-7.c | 4 +-- gcc/testsuite/gcc.target/riscv/sat_u_trunc-8.c | 4 +-- gcc/testsuite/gcc.target/riscv/sat_u_trunc-9.c | 4 +-- gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-1.c | 4 +-- .../gcc.target/riscv/sat_u_trunc-run-13.c | 4 +-- .../gcc.target/riscv/sat_u_trunc-run-14.c | 4 +-- .../gcc.target/riscv/sat_u_trunc-run-15.c | 4 +-- gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-2.c | 4 +-- gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-3.c | 4 +-- gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-4.c | 4 +-- gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-5.c | 4 +-- gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-6.c | 4 +-- gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-7.c | 4 +-- gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-8.c | 4 +-- gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-9.c | 4 +-- 25 files changed, 63 insertions(+), 63 deletions(-) diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith.h b/gcc/testsuite/gcc.target/riscv/sat_arith.h index cf055410fd1f..91853b60f592 100644 --- a/gcc/testsuite/gcc.target/riscv/sat_arith.h +++ b/gcc/testsuite/gcc.target/riscv/sat_arith.h @@ -218,40 +218,40 @@ sat_u_sub_##T##_fmt_12 (T x, T y) \ /* Saturation Truncate (unsigned and signed) */ /**/ -#define DEF_SAT_U_TRUC_FMT_1(NT, WT) \ +#define DEF_SAT_U_TRUNC_FMT_1(NT, WT)\ NT __attribute__((noinline)) \ -sat_u_truc_##WT##_to_##NT##_fmt_1 (WT x) \ +sat_u_trunc_##WT##_to_##NT##_fmt_1 (WT x) \ {\ bool overflow = x > (WT)(NT)(-1); \ return ((NT)x) | (NT)-overflow;\ } -#define DEF_SAT_U_TRUC_FMT_1_WRAP(NT, WT) DEF_SAT_U_TRUC_FMT_1(NT, WT) +#define DEF_SAT_U_TRUNC_FMT_1_WRAP(NT, WT) DEF_SAT_U_TRUNC_FMT_1(NT, WT) -#define DEF_SAT_U_TRUC_FMT_2(NT, WT) \ +#define DEF_SAT_U_TRUNC_FMT_2(NT, WT)\ NT __attribute__((noinline)) \ -sat_u_truc_##WT##_to_##NT##_fmt_2 (WT x) \ +sat_u_trunc_##WT##_to_##NT##_fmt_2 (WT x) \ {\ WT max = (WT)(NT)-1; \ return x > max ? (NT) max : (NT)x; \ } -#define DEF_SAT_U_TRUC_FMT_2_WRAP(NT, WT) DEF_SAT_U_TRUC_FMT_2(NT, WT) +#define DEF_SAT_U_TRUNC_FMT_2_WRAP(NT, WT) DEF_SAT_U_TRUNC_FMT_2(NT, WT) -#define DEF_SAT_U_TRUC_FMT_3(NT, WT) \ +#define DEF_SAT
[gcc r15-3055] ASAN: call initialize_sanitizer_builtins for hwasan [PR115205]
https://gcc.gnu.org/g:efe3da62758ed031486005e0b912de23a0a6b4c6 commit r15-3055-gefe3da62758ed031486005e0b912de23a0a6b4c6 Author: Andrew Pinski Date: Sun Aug 11 21:26:59 2024 -0700 ASAN: call initialize_sanitizer_builtins for hwasan [PR115205] Sometimes initialize_sanitizer_builtins is not called before emitting the asan builtins with hwasan. In the case of the bug report, there was a path with the fortran front-end where it was not called. So let's call it in asan_instrument before calling transform_statements and from hwasan_finish_file. Built and tested for aarch64-linux-gnu with no regressions. Changes since v1: * v2: Add call of asan_instrument to hwasan_finish_file also. gcc/ChangeLog: PR sanitizer/115205 * asan.cc (asan_instrument): Call initialize_sanitizer_builtins for hwasan. (hwasan_finish_file): Likewise. Signed-off-by: Andrew Pinski Diff: --- gcc/asan.cc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/gcc/asan.cc b/gcc/asan.cc index 9e0f51b1477c..5f262d54a3ac 100644 --- a/gcc/asan.cc +++ b/gcc/asan.cc @@ -4276,6 +4276,7 @@ asan_instrument (void) { if (hwasan_sanitize_p ()) { + initialize_sanitizer_builtins (); transform_statements (); return 0; } @@ -4694,6 +4695,8 @@ hwasan_finish_file (void) if (flag_sanitize & SANITIZE_KERNEL_HWADDRESS) return; + initialize_sanitizer_builtins (); + /* Avoid instrumenting code in the hwasan constructors/destructors. */ flag_sanitize &= ~SANITIZE_HWADDRESS; int priority = MAX_RESERVED_INIT_PRIORITY - 1;
[gcc r15-3056] builtins: Don't expand bit query builtins for __int128_t if the target supports an optab for it
https://gcc.gnu.org/g:50b5000a5e430aaf99a5e00465cc9e25563d908b commit r15-3056-g50b5000a5e430aaf99a5e00465cc9e25563d908b Author: Andrew Pinski Date: Fri Aug 16 00:39:32 2024 -0700 builtins: Don't expand bit query builtins for __int128_t if the target supports an optab for it On aarch64 (without !CSSC instructions), since popcount is implemented using the SIMD instruction cnt, instead of using two SIMD cnt (V8QI mode), it is better to use one 128bit cnt (V16QI mode). And only one reduction addition instead of 2. Currently fold_builtin_bit_query will expand always without checking if there was an optab for the type, so this changes that to check the optab to see if we should expand or have the backend handle it. Bootstrapped and tested on x86_64-linux-gnu and built and tested for aarch64-linux-gnu. gcc/ChangeLog: * builtins.cc (fold_builtin_bit_query): Don't expand double `unsigned long long` typess if there is an optab entry for that type. Signed-off-by: Andrew Pinski Diff: --- gcc/builtins.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/gcc/builtins.cc b/gcc/builtins.cc index 0b902896ddd4..b4d51eaeba5e 100644 --- a/gcc/builtins.cc +++ b/gcc/builtins.cc @@ -10185,7 +10185,9 @@ fold_builtin_bit_query (location_t loc, enum built_in_function fcode, tree call = NULL_TREE, tem; if (TYPE_PRECISION (arg0_type) == MAX_FIXED_MODE_SIZE && (TYPE_PRECISION (arg0_type) - == 2 * TYPE_PRECISION (long_long_unsigned_type_node))) + == 2 * TYPE_PRECISION (long_long_unsigned_type_node)) + /* If the target supports the optab, then don't do the expansion. */ + && !direct_internal_fn_supported_p (ifn, arg0_type, OPTIMIZE_FOR_BOTH)) { /* __int128 expansions using up to 2 long long builtins. */ arg0 = save_expr (arg0);
[gcc r15-3058] Align predicates for operands[1] between mov and *mov_internal.
https://gcc.gnu.org/g:bb42c551905024ea23095a0eb7b58fdbcfbcaef6 commit r15-3058-gbb42c551905024ea23095a0eb7b58fdbcfbcaef6 Author: liuhongt Date: Tue Aug 20 14:41:00 2024 +0800 Align predicates for operands[1] between mov and *mov_internal. > It's not obvious to me why movv16qi requires a nonimmediate_operand > > source, especially since ix86_expand_vector_mode does have code to > > cope with constant operand[1]s. emit_move_insn_1 doesn't check the > > predicates anyway, so the predicate will have little effect. > > > > A workaround would be to check legitimate_constant_p instead of the > > predicate, but I'm not sure that that should be necessary. > > > > Has this already been discussed? If not, we should loop in the x86 > > maintainers (but I didn't do that here in case it would be a repeat). > > I also noticed it. Not sure why movv16qi requires a > nonimmediate_operand, while ix86_expand_vector_mode could deal with > constant op. Looking forward to Hongtao's comments. The code has been there since 2005 before I'm involved. It looks to me at the beginning both mov and *mov_internal only support nonimmediate_operand for the operands[1]. And r0-75606-g5656a184e83983 adjusted the nonimmediate_operand to nonimmediate_or_sse_const_operand for *mov_internal, but not for mov. I think we can align the predicate between mov and *mov_internal. gcc/ChangeLog: * config/i386/sse.md (mov): Align predicates for operands[1] between mov and *mov_internal. * config/i386/mmx.md (mov): Ditto. Diff: --- gcc/config/i386/mmx.md | 2 +- gcc/config/i386/sse.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index 94d3a6e56922..cb2697537a81 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -169,7 +169,7 @@ (define_expand "mov" [(set (match_operand:MMXMODE 0 "nonimmediate_operand") - (match_operand:MMXMODE 1 "nonimmediate_operand"))] + (match_operand:MMXMODE 1 "nonimm_or_0_operand"))] "TARGET_MMX || TARGET_MMX_WITH_SSE" { ix86_expand_vector_move (mode, operands); diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 8f34c9300d03..e67d25f960e2 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -1387,7 +1387,7 @@ (define_expand "mov" [(set (match_operand:VMOVE 0 "nonimmediate_operand") - (match_operand:VMOVE 1 "nonimmediate_operand"))] + (match_operand:VMOVE 1 "nonimmediate_or_sse_const_operand"))] "TARGET_SSE" { ix86_expand_vector_move (mode, operands);
[gcc r15-3059] testsuite, rs6000: Remove all powerpc-*paired* uses
https://gcc.gnu.org/g:118a7241f4fe7132cfd7b028ffd5ad39056ec601 commit r15-3059-g118a7241f4fe7132cfd7b028ffd5ad39056ec601 Author: Kewen Lin Date: Wed Aug 21 00:26:20 2024 -0500 testsuite, rs6000: Remove all powerpc-*paired* uses Similar to r15-710-g458b23bc8b3e2b which removed all uses of powerpc-*-linux*paired*, this patch is to remove the remaining powerpc-*paired* uses which I missed to catch with "*linux*" in search keyword. gcc/testsuite/ChangeLog: * lib/target-supports.exp (check_vect_support_and_set_flags): Remove the if arm checking powerpc-*paired*. (check_750cl_hw_available): Remove. (check_effective_target_vect_unpack): Remove the check on powerpc-*paired*. Diff: --- gcc/testsuite/lib/target-supports.exp | 35 ++- 1 file changed, 2 insertions(+), 33 deletions(-) diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index 11ba77ca404d..91995bff65f7 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -2848,30 +2848,6 @@ proc check_ppc_cpu_supports_hw_available { } { }] } -# Return 1 if the target supports executing 750CL paired-single instructions, 0 -# otherwise. Cache the result. - -proc check_750cl_hw_available { } { -return [check_cached_effective_target 750cl_hw_available { - # If this is not the right target then we can skip the test. - if { ![istarget powerpc-*paired*] } { - expr 0 - } else { - check_runtime_nocache 750cl_hw_available { -int main() -{ -#ifdef __MACH__ - asm volatile ("ps_mul v0,v0,v0"); -#else - asm volatile ("ps_mul 0,0,0"); -#endif - return 0; -} - } "-mpaired" - } -}] -} - # Return 1 if the target supports executing power8 vector instructions, 0 # otherwise. Cache the result. @@ -8329,7 +8305,7 @@ proc check_effective_target_vect_pack_trunc { } { proc check_effective_target_vect_unpack { } { return [check_cached_effective_target_indexed vect_unpack { - expr { ([istarget powerpc*-*-*] && ![istarget powerpc-*paired*]) + expr { [istarget powerpc*-*-*] || [istarget i?86-*-*] || [istarget x86_64-*-*] || [istarget ia64-*-*] || [istarget aarch64*-*-*] @@ -11702,14 +11678,7 @@ proc check_vect_support_and_set_flags { } { global dg-do-what-default global EFFECTIVE_TARGETS -if [istarget powerpc-*paired*] { - lappend DEFAULT_VECTCFLAGS "-mpaired" - if [check_750cl_hw_available] { - set dg-do-what-default run - } else { - set dg-do-what-default compile - } -} elseif [istarget powerpc*-*-*] { +if [istarget powerpc*-*-*] { # Skip targets not supporting -maltivec. if ![is-effective-target powerpc_altivec_ok] { return 0
[gcc r15-3060] rs6000: Fix vsx_le_perm_store_* splitters for !reload_completed
https://gcc.gnu.org/g:ae53e4b99eaad43424f2b0cc1bbabb3b454fb6d8 commit r15-3060-gae53e4b99eaad43424f2b0cc1bbabb3b454fb6d8 Author: Kewen Lin Date: Wed Aug 21 00:26:20 2024 -0500 rs6000: Fix vsx_le_perm_store_* splitters for !reload_completed For vsx_le_perm_store_* we have two splitters, one is for !reload_completed and the other is for reload_completed. As Richard pointed out in [1], operand 1 here is a pure input for DF and most passes, but it could be used as the vector rotation (64 bit) destination of itself, so we re-compute the source (back to the original value) for the case reload_completed, while for !reload_completed we generate one new pseudo, so both cases are fine if operand 1 is still live after this insn. But according to the source code, for !reload_completed case, it can logically reuse the operand 1 as the new pseudo generation is conditional on can_create_pseudo_p, then it can cause wrong result once operand 1 is live. So considering this and there is no splitting for this when reload_in_progress, this patch is to fix the code to assert can_create_pseudo_p there, so that both !reload_completed and reload_completed cases would ensure operand 1 is unchanged (pure input), it is also prepared for the following up patch which would strip the unnecessary INOUT constraint modifier "+". This also fixes an oversight in the splitter for VSX_LE_128 (!reload_completed), it should use operand 1 rather than operand 0. [1] https://gcc.gnu.org/pipermail/gcc-patches/2024-August/660145.html gcc/ChangeLog: * config/rs6000/vsx.md (*vsx_le_perm_store_{,, v8hi,v16qi,} !reload_completed splitters): Assert can_create_pseudo_p and always generate one new pseudo for operand 1. Diff: --- gcc/config/rs6000/vsx.md | 21 ++--- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index 27069d070e15..89eaef183d99 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -703,8 +703,8 @@ /* Otherwise, fall through to transform into a swapping store. */ } - operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1]) - : operands[1]; + gcc_assert (can_create_pseudo_p ()); + operands[2] = gen_reg_rtx_and_attrs (operands[1]); }) ;; The post-reload split requires that we re-permute the source @@ -775,8 +775,8 @@ /* Otherwise, fall through to transform into a swapping store. */ } - operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1]) - : operands[1]; + gcc_assert (can_create_pseudo_p ()); + operands[2] = gen_reg_rtx_and_attrs (operands[1]); }) ;; The post-reload split requires that we re-permute the source @@ -854,8 +854,8 @@ /* Otherwise, fall through to transform into a swapping store. */ } - operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1]) - : operands[1]; + gcc_assert (can_create_pseudo_p ()); + operands[2] = gen_reg_rtx_and_attrs (operands[1]); }) ;; The post-reload split requires that we re-permute the source @@ -947,8 +947,8 @@ /* Otherwise, fall through to transform into a swapping store. */ } - operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1]) - : operands[1]; + gcc_assert (can_create_pseudo_p ()); + operands[2] = gen_reg_rtx_and_attrs (operands[1]); }) ;; The post-reload split requires that we re-permute the source @@ -1076,9 +1076,8 @@ && !altivec_indexed_or_indirect_operand (operands[0], mode)" [(const_int 0)] { - rtx tmp = (can_create_pseudo_p () -? gen_reg_rtx_and_attrs (operands[0]) -: operands[0]); + gcc_assert (can_create_pseudo_p ()); + rtx tmp = gen_reg_rtx_and_attrs (operands[1]); rs6000_emit_le_vsx_permute (tmp, operands[1], mode); rs6000_emit_le_vsx_permute (operands[0], tmp, mode); DONE;
[gcc r15-3061] rs6000: Remove "+" constraint modifier from *vsx_le_perm_store_* insns
https://gcc.gnu.org/g:34292a1ae89a13baf974ff2ecb21dcf89aab4617 commit r15-3061-g34292a1ae89a13baf974ff2ecb21dcf89aab4617 Author: Kewen Lin Date: Wed Aug 21 00:26:20 2024 -0500 rs6000: Remove "+" constraint modifier from *vsx_le_perm_store_* insns Since *vsx_le_perm_store_* can be split into vector permute and vector store, after reload_completed, we reuse the operand 1 as the destination of vector permute, so we set operand 1 with constraint modifier "+". But since it's taken as pure input in DF and most passes as Richard pointed out in [1], to ensure it's correct when operand 1 is still live, we actually restore the operand 1's value after the store with vector permute, that is: op1 = vector permute op1 (doubleword swapping) op0 = op2 op1 = vector permute op1 (doubleword swapping) , it means op1's value isn't changed by this insn. So according to the comments from Richard and Segher in that thread, this patch is to remove the "+" constraint modifier of operand 1 from *vsx_le_perm_store_* insns. [1] https://gcc.gnu.org/pipermail/gcc-patches/2024-August/660145.html gcc/ChangeLog: * config/rs6000/vsx.md (define_insn *vsx_le_perm_store_{, ,v8hi,v16qi,}): Remove constraint modifier "+" from operand 1. Diff: --- gcc/config/rs6000/vsx.md | 10 +- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index 89eaef183d99..b2fc39acf4e8 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -659,7 +659,7 @@ (define_insn "*vsx_le_perm_store_" [(set (match_operand:VSX_D 0 "indexed_or_indirect_operand" "=Z") -(match_operand:VSX_D 1 "vsx_register_operand" "+wa"))] +(match_operand:VSX_D 1 "vsx_register_operand" "wa"))] "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" "#" [(set_attr "type" "vecstore") @@ -729,7 +729,7 @@ (define_insn "*vsx_le_perm_store_" [(set (match_operand:VSX_W 0 "indexed_or_indirect_operand" "=Z") -(match_operand:VSX_W 1 "vsx_register_operand" "+wa"))] +(match_operand:VSX_W 1 "vsx_register_operand" "wa"))] "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" "#" [(set_attr "type" "vecstore") @@ -804,7 +804,7 @@ (define_insn "*vsx_le_perm_store_v8hi" [(set (match_operand:V8HI 0 "indexed_or_indirect_operand" "=Z") -(match_operand:V8HI 1 "vsx_register_operand" "+wa"))] +(match_operand:V8HI 1 "vsx_register_operand" "wa"))] "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" "#" [(set_attr "type" "vecstore") @@ -889,7 +889,7 @@ (define_insn "*vsx_le_perm_store_v16qi" [(set (match_operand:V16QI 0 "indexed_or_indirect_operand" "=Z") -(match_operand:V16QI 1 "vsx_register_operand" "+wa"))] +(match_operand:V16QI 1 "vsx_register_operand" "wa"))] "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" "#" [(set_attr "type" "vecstore") @@ -1059,7 +1059,7 @@ (define_insn "*vsx_le_perm_store_" [(set (match_operand:VSX_LE_128 0 "memory_operand" "=Z,Q") -(match_operand:VSX_LE_128 1 "vsx_register_operand" "+wa,r"))] +(match_operand:VSX_LE_128 1 "vsx_register_operand" "wa,r"))] "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !altivec_indexed_or_indirect_operand (operands[0], mode)" "@