[gcc r12-11218] middle-end/115110 - Fix view_converted_memref_p
https://gcc.gnu.org/g:4f63fd4b663bdde39524129dfa458c60b2d67133 commit r12-11218-g4f63fd4b663bdde39524129dfa458c60b2d67133 Author: Richard Biener Date: Fri May 17 11:02:29 2024 +0200 middle-end/115110 - Fix view_converted_memref_p view_converted_memref_p was checking the reference type against the pointer type of the offset operand rather than its pointed-to type which leads to all refs being subject to view-convert treatment in get_alias_set causing numerous testsuite fails but with its new uses from r15-512-g9b7cad5884f21c is also a wrong-code issue. PR middle-end/115110 * tree-ssa-alias.cc (view_converted_memref_p): Fix. (cherry picked from commit a5b3721c06646bf5b9b50a22964e8e2bd4d03f5f) Diff: --- gcc/tree-ssa-alias.cc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/gcc/tree-ssa-alias.cc b/gcc/tree-ssa-alias.cc index 1b404e055f8c..8fe4e9d9112a 100644 --- a/gcc/tree-ssa-alias.cc +++ b/gcc/tree-ssa-alias.cc @@ -2041,8 +2041,9 @@ view_converted_memref_p (tree base) { if (TREE_CODE (base) != MEM_REF && TREE_CODE (base) != TARGET_MEM_REF) return false; - return same_type_for_tbaa (TREE_TYPE (base), -TREE_TYPE (TREE_OPERAND (base, 1))) != 1; + return (same_type_for_tbaa (TREE_TYPE (base), + TREE_TYPE (TREE_TYPE (TREE_OPERAND (base, 1 + != 1); } /* Return true if an indirect reference based on *PTR1 constrained
[gcc r12-11223] lto/113207 - fix free_lang_data_in_type
https://gcc.gnu.org/g:07490d983161912fa314607c5a5beb9c49cc4a3f commit r12-11223-g07490d983161912fa314607c5a5beb9c49cc4a3f Author: Richard Biener Date: Mon Feb 3 14:27:01 2025 +0100 lto/113207 - fix free_lang_data_in_type When we process function types we strip volatile and const qualifiers after building a simplified type variant (which preserves those). The qualified type handling of both isn't really compatible, so avoid bad interaction by swapping this, first dropping const/volatile qualifiers and then building the simplified type thereof. PR lto/113207 * ipa-free-lang-data.cc (free_lang_data_in_type): First drop const/volatile qualifiers from function argument types, then build a simplified type. * gcc.dg/pr113207.c: New testcase. (cherry picked from commit a55e14b239181381204c615335929b3316d75370) Diff: --- gcc/ipa-free-lang-data.cc | 3 +-- gcc/testsuite/gcc.dg/pr113207.c | 10 ++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/gcc/ipa-free-lang-data.cc b/gcc/ipa-free-lang-data.cc index a742156858cd..6fef047e3beb 100644 --- a/gcc/ipa-free-lang-data.cc +++ b/gcc/ipa-free-lang-data.cc @@ -436,9 +436,7 @@ free_lang_data_in_type (tree type, class free_lang_data_d *fld) different front ends. */ for (tree p = TYPE_ARG_TYPES (type); p; p = TREE_CHAIN (p)) { - TREE_VALUE (p) = fld_simplified_type (TREE_VALUE (p), fld); tree arg_type = TREE_VALUE (p); - if (TYPE_READONLY (arg_type) || TYPE_VOLATILE (arg_type)) { int quals = TYPE_QUALS (arg_type) @@ -448,6 +446,7 @@ free_lang_data_in_type (tree type, class free_lang_data_d *fld) if (!fld->pset.add (TREE_VALUE (p))) free_lang_data_in_type (TREE_VALUE (p), fld); } + TREE_VALUE (p) = fld_simplified_type (TREE_VALUE (p), fld); /* C++ FE uses TREE_PURPOSE to store initial values. */ TREE_PURPOSE (p) = NULL; } diff --git a/gcc/testsuite/gcc.dg/pr113207.c b/gcc/testsuite/gcc.dg/pr113207.c new file mode 100644 index ..81f53d8fcc2f --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr113207.c @@ -0,0 +1,10 @@ +/* { dg-compile } */ +/* { dg-require-effective-target lto } */ +/* { dg-options "-flto -fchecking" } */ + +typedef struct cl_lispunion *cl_object; +struct cl_lispunion {}; +cl_object cl_error() __attribute__((noreturn)); +volatile cl_object cl_coerce_value0; +void cl_coerce() { cl_error(); } +void L66safe_canonical_type(cl_object volatile);
[gcc r12-11216] lto/91299 - weak definition inlined with LTO
https://gcc.gnu.org/g:e5d24c4e89ae6d8c08f85f3425ea9c29dd0e6646 commit r12-11216-ge5d24c4e89ae6d8c08f85f3425ea9c29dd0e6646 Author: Richard Biener Date: Fri Feb 28 14:09:29 2025 +0100 lto/91299 - weak definition inlined with LTO The following fixes a thinko in the handling of interposed weak definitions which confused the interposition check in get_availability by setting DECL_EXTERNAL too early. PR lto/91299 gcc/lto/ * lto-symtab.cc (lto_symtab_merge_symbols): Set DECL_EXTERNAL only after calling get_availability. gcc/testsuite/ * gcc.dg/lto/pr91299_0.c: New testcase. * gcc.dg/lto/pr91299_1.c: Likewise. (cherry picked from commit bc34db5b12e008f6ec4fdf4ebd22263c8617e5e3) Diff: --- gcc/lto/lto-symtab.cc| 2 +- gcc/testsuite/gcc.dg/lto/pr91299_0.c | 16 gcc/testsuite/gcc.dg/lto/pr91299_1.c | 6 ++ 3 files changed, 23 insertions(+), 1 deletion(-) diff --git a/gcc/lto/lto-symtab.cc b/gcc/lto/lto-symtab.cc index f631016b9233..bfd4c99fdc80 100644 --- a/gcc/lto/lto-symtab.cc +++ b/gcc/lto/lto-symtab.cc @@ -1016,7 +1016,6 @@ lto_symtab_merge_symbols (void) || node->resolution == LDPR_RESOLVED_EXEC || node->resolution == LDPR_RESOLVED_DYN)) { - DECL_EXTERNAL (node->decl) = 1; /* If alias to local symbol was preempted by external definition, we know it is not pointing to the local symbol. Remove it. */ if (node->alias @@ -1042,6 +1041,7 @@ lto_symtab_merge_symbols (void) node->remove_all_references (); } } + DECL_EXTERNAL (node->decl) = 1; } if (!(cnode = dyn_cast (node)) diff --git a/gcc/testsuite/gcc.dg/lto/pr91299_0.c b/gcc/testsuite/gcc.dg/lto/pr91299_0.c new file mode 100644 index ..d9a8b21d6b84 --- /dev/null +++ b/gcc/testsuite/gcc.dg/lto/pr91299_0.c @@ -0,0 +1,16 @@ +/* { dg-lto-do run } */ +/* { dg-lto-options { { -O2 -flto } } } */ + +__attribute__((weak)) int get_t(void) +{ + return 0; +} + +int a; +int main(void) +{ + a = get_t(); + if (a != 1) +__builtin_abort (); + return 0; +} diff --git a/gcc/testsuite/gcc.dg/lto/pr91299_1.c b/gcc/testsuite/gcc.dg/lto/pr91299_1.c new file mode 100644 index ..29a28520f7b5 --- /dev/null +++ b/gcc/testsuite/gcc.dg/lto/pr91299_1.c @@ -0,0 +1,6 @@ +/* { dg-options "-fno-lto" } */ + +int get_t(void) +{ +return 1; +}
[gcc] Created branch 'meissner/heads/work212' in namespace 'refs/users'
The branch 'meissner/heads/work212' was created in namespace 'refs/users' pointing to: 63076dbe2153... Remove non-SLP path from vectorizable_load
[gcc(refs/users/meissner/heads/work212-libs)] Add ChangeLog.libs and update REVISION.
https://gcc.gnu.org/g:2548cd79738346549db03ed8a7aa592ef4fe938b commit 2548cd79738346549db03ed8a7aa592ef4fe938b Author: Michael Meissner Date: Tue Jun 24 12:06:52 2025 -0400 Add ChangeLog.libs and update REVISION. 2025-06-24 Michael Meissner gcc/ * ChangeLog.libs: New file for branch. * REVISION: Update. Diff: --- gcc/ChangeLog.libs | 14 ++ gcc/REVISION | 2 +- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/gcc/ChangeLog.libs b/gcc/ChangeLog.libs new file mode 100644 index ..102a09b041e9 --- /dev/null +++ b/gcc/ChangeLog.libs @@ -0,0 +1,14 @@ + Branch work212-libs, baseline + +2025-06-24 Michael Meissner + +Add ChangeLog.libs and update REVISION. + +2025-06-24 Michael Meissner + +gcc/ + + * ChangeLog.libs: New file for branch. + * REVISION: Update. + + Clone branch diff --git a/gcc/REVISION b/gcc/REVISION index cb5771ab4fa7..0d66efc25640 100644 --- a/gcc/REVISION +++ b/gcc/REVISION @@ -1 +1 @@ -work212 branch +work212-libs branch
[gcc r16-1660] Fortran/OpenACC: Add Fortran support for acc_attach/acc_detach
https://gcc.gnu.org/g:1e35a518258e8cd970a2326bba5a4c8b10695439 commit r16-1660-g1e35a518258e8cd970a2326bba5a4c8b10695439 Author: Tobias Burnus Date: Tue Jun 24 23:28:57 2025 +0200 Fortran/OpenACC: Add Fortran support for acc_attach/acc_detach While C/++ support the routines acc_attach{,_async} and acc_detach{,_finalize}{,_async} routines since a long time, the Fortran API routines where only added in OpenACC 3.3. Unfortunately, they cannot directly be implemented in the library as GCC will introduce a temporary array descriptor in some cases, which causes the attempted attachment to the this temporary variable instead of to the original one. Therefore, those API routines are handled in a special way in the compiler. gcc/fortran/ChangeLog: * trans-stmt.cc (gfc_trans_call_acc_attach_detach): New. (gfc_trans_call): Call it. libgomp/ChangeLog: * libgomp.texi (acc_attach, acc_detach): Update for Fortran version. * openacc.f90 (acc_attach{,_async}, acc_detach{,_finalize}{,_async}): Add. * openacc_lib.h: Likewise. * testsuite/libgomp.oacc-fortran/acc-attach-detach-1.f90: New test. * testsuite/libgomp.oacc-fortran/acc-attach-detach-2.f90: New test. Diff: --- gcc/fortran/trans-stmt.cc | 74 +- libgomp/libgomp.texi | 40 ++-- libgomp/openacc.f90| 44 + libgomp/openacc_lib.h | 42 .../libgomp.oacc-fortran/acc-attach-detach-1.f90 | 25 .../libgomp.oacc-fortran/acc-attach-detach-2.f90 | 62 ++ 6 files changed, 265 insertions(+), 22 deletions(-) diff --git a/gcc/fortran/trans-stmt.cc b/gcc/fortran/trans-stmt.cc index 487b7687ef14..f10540158627 100644 --- a/gcc/fortran/trans-stmt.cc +++ b/gcc/fortran/trans-stmt.cc @@ -377,6 +377,57 @@ get_intrinsic_for_code (gfc_code *code) } +/* Handle the OpenACC routines acc_attach{,_async} and + acc_detach{,_finalize}{,_async} explicitly. This is required as the + the corresponding device pointee is attached to the corresponding device + pointer, but if a temporary array descriptor is created for the call, + that one is used as pointer instead of the original pointer. */ + +tree +gfc_trans_call_acc_attach_detach (gfc_code *code) +{ + stmtblock_t block; + gfc_se ptr_addr_se, async_se; + tree fn; + + fn = code->resolved_sym->backend_decl; + if (fn == NULL) +{ + fn = gfc_get_symbol_decl (code->resolved_sym); + code->resolved_sym->backend_decl = fn; +} + + gfc_start_block (&block); + + gfc_init_se (&ptr_addr_se, NULL); + ptr_addr_se.descriptor_only = 1; + ptr_addr_se.want_pointer = 1; + gfc_conv_expr (&ptr_addr_se, code->ext.actual->expr); + gfc_add_block_to_block (&block, &ptr_addr_se.pre); + if (GFC_DESCRIPTOR_TYPE_P (TREE_TYPE (ptr_addr_se.expr))) +ptr_addr_se.expr = gfc_conv_descriptor_data_get (ptr_addr_se.expr); + ptr_addr_se.expr = build_fold_addr_expr (ptr_addr_se.expr); + + bool async = code->ext.actual->next != NULL; + if (async) +{ + gfc_init_se (&async_se, NULL); + gfc_conv_expr (&async_se, code->ext.actual->next->expr); + fn = build_call_expr_loc (gfc_get_location (&code->loc), fn, 2, + ptr_addr_se.expr, async_se.expr); +} + else +fn = build_call_expr_loc (gfc_get_location (&code->loc), + fn, 1, ptr_addr_se.expr); + gfc_add_expr_to_block (&block, fn); + gfc_add_block_to_block (&block, &ptr_addr_se.post); + if (async) +gfc_add_block_to_block (&block, &async_se.post); + + return gfc_finish_block (&block); +} + + /* Translate the CALL statement. Builds a call to an F95 subroutine. */ tree @@ -392,13 +443,32 @@ gfc_trans_call (gfc_code * code, bool dependency_check, tree tmp; bool is_intrinsic_mvbits; + gcc_assert (code->resolved_sym); + + /* Unfortunately, acc_attach* and acc_detach* need some special treatment for + attaching the the pointee to a pointer as GCC might introduce a temporary + array descriptor, whose data component is then used as to be attached to + pointer. */ + if (flag_openacc + && code->resolved_sym->attr.subroutine + && code->resolved_sym->formal + && code->resolved_sym->formal->sym->ts.type == BT_ASSUMED + && code->resolved_sym->formal->sym->attr.dimension + && code->resolved_sym->formal->sym->as->type == AS_ASSUMED_RANK + && startswith (code->resolved_sym->name, "acc_") + && (!strcmp (code->resolved_sym->name + 4, "attach") + || !strcmp (code->resolved_sym->name + 4, "attach_async") + || !strcmp (code->resolved_sym->name + 4, "detach") + || !strcmp (code->resolved_sym->name + 4, "detach_async") + || !strcmp (code->resolved_sym->na
[gcc r12-11215] tree-optimization/87984 - hard register assignments not preserved
https://gcc.gnu.org/g:80aab83b90d0a1c9e3037a952c138ac2f1ce3f41 commit r12-11215-g80aab83b90d0a1c9e3037a952c138ac2f1ce3f41 Author: Richard Biener Date: Fri Feb 28 10:36:11 2025 +0100 tree-optimization/87984 - hard register assignments not preserved The following disables redundant store elimination to hard register variables which isn't valid. PR tree-optimization/87984 * tree-ssa-dom.cc (dom_opt_dom_walker::optimize_stmt): Do not perform redundant store elimination to hard register variables. * tree-ssa-sccvn.cc (eliminate_dom_walker::eliminate_stmt): Likewise. * gcc.target/i386/pr87984.c: New testcase. (cherry picked from commit 535115caaf97f5201fb528f67f15b4c52be5619d) Diff: --- gcc/testsuite/gcc.target/i386/pr87984.c | 23 +++ gcc/tree-ssa-dom.cc | 4 +++- gcc/tree-ssa-sccvn.cc | 2 ++ 3 files changed, 28 insertions(+), 1 deletion(-) diff --git a/gcc/testsuite/gcc.target/i386/pr87984.c b/gcc/testsuite/gcc.target/i386/pr87984.c new file mode 100644 index ..39a6a7480f9e --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr87984.c @@ -0,0 +1,23 @@ +/* { dg-do run } */ +/* { dg-options "-O" } */ + +__attribute__((noipa)) +int f(void) +{ + int o = 0; + for (int i = 0; i < 3; i++) +{ + register int a asm("eax"); + a = 1; + asm("add %1, %0" : "+r"(o) : "r"(a)); + asm("xor %%eax, %%eax" ::: "eax"); +} + return o; +} + +int main() +{ + if (f() != 3) +__builtin_abort(); + return 0; +} diff --git a/gcc/tree-ssa-dom.cc b/gcc/tree-ssa-dom.cc index 0ad2e8e7f94f..3e22e8ef341f 100644 --- a/gcc/tree-ssa-dom.cc +++ b/gcc/tree-ssa-dom.cc @@ -2245,7 +2245,9 @@ dom_opt_dom_walker::optimize_stmt (basic_block bb, gimple_stmt_iterator *si, /* Perform simple redundant store elimination. */ if (gimple_assign_single_p (stmt) - && TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME) + && TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME + && (TREE_CODE (gimple_assign_lhs (stmt)) != VAR_DECL + || !DECL_HARD_REGISTER (gimple_assign_lhs (stmt { tree lhs = gimple_assign_lhs (stmt); tree rhs = gimple_assign_rhs1 (stmt); diff --git a/gcc/tree-ssa-sccvn.cc b/gcc/tree-ssa-sccvn.cc index 60a4826e5801..54855cc9db33 100644 --- a/gcc/tree-ssa-sccvn.cc +++ b/gcc/tree-ssa-sccvn.cc @@ -6585,6 +6585,8 @@ eliminate_dom_walker::eliminate_stmt (basic_block b, gimple_stmt_iterator *gsi) if (gimple_assign_single_p (stmt) && !gimple_has_volatile_ops (stmt) && !is_gimple_reg (gimple_assign_lhs (stmt)) + && (TREE_CODE (gimple_assign_lhs (stmt)) != VAR_DECL + || !DECL_HARD_REGISTER (gimple_assign_lhs (stmt))) && (TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME || is_gimple_min_invariant (gimple_assign_rhs1 (stmt {
[gcc r12-11224] lto/114501 - missed free-lang-data for CONSTRUCTOR index
https://gcc.gnu.org/g:415bad120d8f21cd754d827da9e3d5e1fbe68d4c commit r12-11224-g415bad120d8f21cd754d827da9e3d5e1fbe68d4c Author: Richard Biener Date: Thu Mar 6 13:48:16 2025 +0100 lto/114501 - missed free-lang-data for CONSTRUCTOR index The following makes sure to also walk CONSTRUCTOR element indexes which can be FIELD_DECLs, referencing otherwise unused types we need to clean. walk_tree only walks CONSTRUCTOR element data. PR lto/114501 * ipa-free-lang-data.cc (find_decls_types_r): Explicitly handle CONSTRUCTORs as walk_tree handling of those is incomplete. * g++.dg/pr114501_0.C: New testcase. (cherry picked from commit fdd95e1cf29137a19baed25f8c817d320dfe63e3) Diff: --- gcc/ipa-free-lang-data.cc | 14 ++ gcc/testsuite/g++.dg/pr114501_0.C | 20 2 files changed, 34 insertions(+) diff --git a/gcc/ipa-free-lang-data.cc b/gcc/ipa-free-lang-data.cc index 6fef047e3beb..1bfa0239be51 100644 --- a/gcc/ipa-free-lang-data.cc +++ b/gcc/ipa-free-lang-data.cc @@ -841,6 +841,20 @@ find_decls_types_r (tree *tp, int *ws, void *data) fld_worklist_push (tem, fld); fld_worklist_push (BLOCK_ABSTRACT_ORIGIN (t), fld); } + /* walk_tree does not visit ce->index which can be a FIELD_DECL, pulling + in otherwise unused structure fields so handle CTORs explicitly. */ + else if (TREE_CODE (t) == CONSTRUCTOR) +{ + unsigned HOST_WIDE_INT idx; + constructor_elt *ce; + for (idx = 0; vec_safe_iterate (CONSTRUCTOR_ELTS (t), idx, &ce); idx++) + { + if (ce->index) + fld_worklist_push (ce->index, fld); + fld_worklist_push (ce->value, fld); + } + *ws = 0; +} if (TREE_CODE (t) != IDENTIFIER_NODE && CODE_CONTAINS_STRUCT (TREE_CODE (t), TS_TYPED)) diff --git a/gcc/testsuite/g++.dg/pr114501_0.C b/gcc/testsuite/g++.dg/pr114501_0.C new file mode 100644 index ..0439ee5f6e23 --- /dev/null +++ b/gcc/testsuite/g++.dg/pr114501_0.C @@ -0,0 +1,20 @@ +// { dg-do compile } +// { dg-require-effective-target c++17 } +// { dg-require-effective-target lto } +// { dg-options "-flto" } + +typedef long unsigned int size_t; +struct basic_string_view { + typedef long unsigned int size_type; + constexpr size_type size() const { return 0; } +}; +struct array { + char _M_elems[1]; +}; +inline constexpr auto make_it() { + constexpr basic_string_view view; + array arr{}; + arr._M_elems[view.size()] = 'a'; + return arr; +} +auto bar = make_it();
[gcc r16-1655] Remove non-SLP path from vectorizable_load
https://gcc.gnu.org/g:63076dbe21535cc7cf106d92f655e2b7d8b749cc commit r16-1655-g63076dbe21535cc7cf106d92f655e2b7d8b749cc Author: Richard Biener Date: Tue Jun 24 14:38:19 2025 +0200 Remove non-SLP path from vectorizable_load This cleans the rest of vectorizable_load from non-SLP, propagates out ncopies == 1, and elides loops from 0 to ncopies. * tree-vect-stmts.cc (vectorizable_load): Remove non-SLP paths and propagate out ncopies == 1. Diff: --- gcc/tree-vect-stmts.cc | 1935 ++-- 1 file changed, 876 insertions(+), 1059 deletions(-) diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index f699d808e688..db1b539b6c74 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -9836,7 +9836,6 @@ vectorizable_load (vec_info *vinfo, tree dataref_ptr = NULL_TREE; tree dataref_offset = NULL_TREE; gimple *ptr_incr = NULL; - int ncopies; int i, j; unsigned int group_size; poly_uint64 group_gap_adj; @@ -9850,7 +9849,6 @@ vectorizable_load (vec_info *vinfo, bool compute_in_loop = false; class loop *at_loop; int vec_num; - bool slp = (slp_node != NULL); bool slp_perm = false; bb_vec_info bb_vinfo = dyn_cast (vinfo); poly_uint64 vf; @@ -9909,7 +9907,7 @@ vectorizable_load (vec_info *vinfo, return false; mask_index = internal_fn_mask_index (ifn); - if (mask_index >= 0 && slp_node) + if (mask_index >= 0) mask_index = vect_slp_child_index_for_operand (call, mask_index, STMT_VINFO_GATHER_SCATTER_P (stmt_info)); if (mask_index >= 0 @@ -9918,7 +9916,7 @@ vectorizable_load (vec_info *vinfo, return false; els_index = internal_fn_else_index (ifn); - if (els_index >= 0 && slp_node) + if (els_index >= 0) els_index = vect_slp_child_index_for_operand (call, els_index, STMT_VINFO_GATHER_SCATTER_P (stmt_info)); if (els_index >= 0 @@ -9939,19 +9937,9 @@ vectorizable_load (vec_info *vinfo, else vf = 1; - /* Multiple types in SLP are handled by creating the appropriate number of - vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in - case of SLP. */ - if (slp) -ncopies = 1; - else -ncopies = vect_get_num_copies (loop_vinfo, vectype); - - gcc_assert (ncopies >= 1); - /* FORNOW. This restriction should be relaxed. */ if (nested_in_vect_loop - && (ncopies > 1 || (slp && SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) > 1))) + && SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) > 1) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -9959,20 +9947,6 @@ vectorizable_load (vec_info *vinfo, return false; } - /* Invalidate assumptions made by dependence analysis when vectorization - on the unrolled body effectively re-orders stmts. */ - if (ncopies > 1 - && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0 - && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo), - STMT_VINFO_MIN_NEG_DIST (stmt_info))) -{ - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, -"cannot perform implicit CSE when unrolling " -"with negative dependence distance\n"); - return false; -} - elem_type = TREE_TYPE (vectype); mode = TYPE_MODE (vectype); @@ -9997,15 +9971,6 @@ vectorizable_load (vec_info *vinfo, first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info); group_size = DR_GROUP_SIZE (first_stmt_info); - /* Refuse non-SLP vectorization of SLP-only groups. */ - if (!slp && STMT_VINFO_SLP_VECT_ONLY (first_stmt_info)) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, -"cannot vectorize load in non-SLP mode.\n"); - return false; - } - /* Invalidate assumptions made by dependence analysis when vectorization on the unrolled body effectively re-orders stmts. */ if (STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0 @@ -10031,7 +9996,7 @@ vectorizable_load (vec_info *vinfo, int maskload_elsval = 0; bool need_zeroing = false; if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, VLS_LOAD, - ncopies, &memory_access_type, &poffset, + 1, &memory_access_type, &poffset, &alignment_support_scheme, &misalignment, &gs_info, &lanes_ifn, &elsvals)) return false; @@ -10046,8 +10011,7 @@ vectorizable_load (vec_info *vinfo, /* ??? The following checks should really be part of get_group_load_store_type. */ - if (slp - && SLP_TREE_LOAD_PERMUTATION (slp_node).exists () + if (SLP_TREE_LOAD_PERMUTATION (slp_node).exists () && !((memory_access_type == VMAT_ELEMENTWISE || mem
[gcc r12-11229] tree-optimization/119534 - reject bogus emulated vectorized gather
https://gcc.gnu.org/g:550edc99476376ee0350be90b9e61b337ffb0ff3 commit r12-11229-g550edc99476376ee0350be90b9e61b337ffb0ff3 Author: Richard Biener Date: Tue Apr 1 14:13:03 2025 +0200 tree-optimization/119534 - reject bogus emulated vectorized gather The following makes sure to reject the attempts to emulate a vector gather when the discovered index vector type is a vector mask. PR tree-optimization/119534 * tree-vect-stmts.cc (get_load_store_type): Reject VECTOR_BOOLEAN_TYPE_P offset vector type for emulated gathers. * gcc.dg/vect/pr119534.c: New testcase. (cherry picked from commit d0cc14c62ad7403afcab3c2e38851d3ab179352f) Diff: --- gcc/testsuite/gcc.dg/vect/pr119534.c | 11 +++ gcc/tree-vect-stmts.cc | 1 + 2 files changed, 12 insertions(+) diff --git a/gcc/testsuite/gcc.dg/vect/pr119534.c b/gcc/testsuite/gcc.dg/vect/pr119534.c new file mode 100644 index ..0b4130b7cfaa --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr119534.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-mavx512bw" { target { x86_64-*-* i?86-*-* } } } */ + +void f(int w, int *out, double *d) +{ + for (int j = 0; j < w; j++) +{ + const int i = (j >= w / 2); + out[j] += d[i]; +} +} diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index b1ab4bce7d28..bc9f95cab970 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -2493,6 +2493,7 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info, else if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant () || !TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype).is_constant () + || VECTOR_BOOLEAN_TYPE_P (gs_info->offset_vectype) || !constant_multiple_p (TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype), TYPE_VECTOR_SUBPARTS (vectype)))
[gcc r12-11217] middle-end/101478 - ICE with degenerate address during gimplification
https://gcc.gnu.org/g:05b347c5322a50195aa3ab0d06f2058f0ccee956 commit r12-11217-g05b347c5322a50195aa3ab0d06f2058f0ccee956 Author: Richard Biener Date: Wed Jul 31 10:07:45 2024 +0200 middle-end/101478 - ICE with degenerate address during gimplification When we gimplify &MEM[0B + 4] we are re-folding the address in case types are not canonical which ends up with a constant address that recompute_tree_invariant_for_addr_expr ICEs on. Properly guard that call. PR middle-end/101478 * gimplify.cc (gimplify_addr_expr): Check we still have an ADDR_EXPR before calling recompute_tree_invariant_for_addr_expr. * gcc.dg/pr101478.c: New testcase. (cherry picked from commit 33ead6400ad59d4b38fa0527a9a7b53a28114ab7) Diff: --- gcc/gimplify.cc | 3 ++- gcc/testsuite/gcc.dg/pr101478.c | 11 +++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/gcc/gimplify.cc b/gcc/gimplify.cc index 55bc7f8624ed..a7b5a3883373 100644 --- a/gcc/gimplify.cc +++ b/gcc/gimplify.cc @@ -6518,7 +6518,8 @@ gimplify_addr_expr (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p) *expr_p = build_fold_addr_expr (op0); /* Make sure TREE_CONSTANT and TREE_SIDE_EFFECTS are set properly. */ - recompute_tree_invariant_for_addr_expr (*expr_p); + if (TREE_CODE (*expr_p) == ADDR_EXPR) + recompute_tree_invariant_for_addr_expr (*expr_p); /* If we re-built the ADDR_EXPR add a conversion to the original type if required. */ diff --git a/gcc/testsuite/gcc.dg/pr101478.c b/gcc/testsuite/gcc.dg/pr101478.c new file mode 100644 index ..527620ea0f11 --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr101478.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-options "" } */ + +struct obj { + int n; + int l; +}; +int main() +{ + (struct obj *)((char *)(__SIZE_TYPE__)({ 0; }) - (char *)&((struct obj *)0)->l); +}
[gcc r12-11230] middle-end/119706 - allow POLY_INT_CST as is_gimple_mem_ref_addr
https://gcc.gnu.org/g:75f255c11f7e5a5099ad909606e21ec6bf9b82cc commit r12-11230-g75f255c11f7e5a5099ad909606e21ec6bf9b82cc Author: Richard Biener Date: Thu Apr 10 13:30:42 2025 +0200 middle-end/119706 - allow POLY_INT_CST as is_gimple_mem_ref_addr We currently only INTEGER_CST, but not POLY_INT_CST, which leads to the situation that when the POLY_INT_CST is only indrectly present via a SSA def the IL is valid but when propagated it's not. That's unsustainable. PR middle-end/119706 * gimple-expr.cc (is_gimple_mem_ref_addr): Also allow POLY_INT_CST. (cherry picked from commit bf812c6ad83ec0b241bb3fecc7e68f883b6083df) Diff: --- gcc/gimple-expr.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/gimple-expr.cc b/gcc/gimple-expr.cc index 5faaf43eaf50..2ebcdac74136 100644 --- a/gcc/gimple-expr.cc +++ b/gcc/gimple-expr.cc @@ -843,7 +843,7 @@ bool is_gimple_mem_ref_addr (tree t) { return (is_gimple_reg (t) - || TREE_CODE (t) == INTEGER_CST + || poly_int_tree_p (t) || (TREE_CODE (t) == ADDR_EXPR && (CONSTANT_CLASS_P (TREE_OPERAND (t, 0)) || decl_address_invariant_p (TREE_OPERAND (t, 0);
[gcc r16-1661] gcn: Fix glc vs. sc0 handling for scalar memory access
https://gcc.gnu.org/g:750bc2899844d662aee93476f2da63fce68535d9 commit r16-1661-g750bc2899844d662aee93476f2da63fce68535d9 Author: Tobias Burnus Date: Tue Jun 24 23:55:27 2025 +0200 gcn: Fix glc vs. sc0 handling for scalar memory access gfx942 still uses glc for scalar access ('s_...') and only uses sc0/nt/sc1 for vector access. gcc/ChangeLog: * config/gcn/gcn-opts.h (TARGET_GLC_NAME): Fix and extend the description in the comment. * config/gcn/gcn.cc (print_operand): Extend the comment about 'G' and 'g'. * config/gcn/gcn.md: Use 'glc' instead of %G where appropriate. Diff: --- gcc/config/gcn/gcn-opts.h | 7 +-- gcc/config/gcn/gcn.cc | 2 ++ gcc/config/gcn/gcn.md | 30 +++--- 3 files changed, 22 insertions(+), 17 deletions(-) diff --git a/gcc/config/gcn/gcn-opts.h b/gcc/config/gcn/gcn-opts.h index bcea14f3fe7a..0bfc7869eefe 100644 --- a/gcc/config/gcn/gcn-opts.h +++ b/gcc/config/gcn/gcn-opts.h @@ -84,8 +84,11 @@ enum hsaco_attr_type #define TARGET_DPP8 TARGET_RDNA2_PLUS /* Device requires CDNA1-style manually inserted wait states for AVGPRs. */ #define TARGET_AVGPR_CDNA1_NOPS TARGET_CDNA1 -/* Whether to use the 'globally coherent' (glc) or the 'scope' (sc0, sc1) flag - for scalar memory operations. The string starts on purpose with a space. */ +/* Whether to use the 'globally coherent' (glc) or the 'scope' (sc0) flag + for non-scalar memory operations. The string starts on purpose with a space. + Note: for scalar memory operations (i.e. 's_...'), 'glc' is still used. + CDNA3 also uses 'nt' instead of 'slc' and 'sc1' instead of 'scc'; however, + there is no non-scalar user so far. */ #define TARGET_GLC_NAME (TARGET_CDNA3 ? " sc0" : " glc") /* The metadata on different devices need different granularity. */ #define TARGET_VGPR_GRANULARITY \ diff --git a/gcc/config/gcn/gcn.cc b/gcc/config/gcn/gcn.cc index 2d8dfa3232e2..0ce5a29fbb57 100644 --- a/gcc/config/gcn/gcn.cc +++ b/gcc/config/gcn/gcn.cc @@ -7103,6 +7103,8 @@ print_operand_address (FILE *file, rtx mem) O - print offset:n for data share operations. G - print "glc" (or for gfx94x: sc0) unconditionally [+ indep. of regnum] g - print "glc" (or for gfx94x: sc0), if appropriate for given MEM + NOTE: Do not use 'G' or 'g with scalar memory access ('s_...') as those + require "glc" also with gfx94x. L - print low-part of a multi-reg value H - print second part of a multi-reg value (high-part of 2-reg value) J - print third part of a multi-reg value diff --git a/gcc/config/gcn/gcn.md b/gcc/config/gcn/gcn.md index 1998931e0529..2ce2e054fbf0 100644 --- a/gcc/config/gcn/gcn.md +++ b/gcc/config/gcn/gcn.md @@ -206,7 +206,7 @@ ; vdata: vgpr0-255 ; srsrc: sgpr0-102 ; soffset: sgpr0-102 -; flags: offen, idxen, %G, lds, slc, tfe +; flags: offen, idxen, glc, lds, slc, tfe ; ; mtbuf - Typed memory buffer operation. Two words ; offset: 12-bit constant @@ -216,10 +216,10 @@ ; vdata: vgpr0-255 ; srsrc: sgpr0-102 ; soffset: sgpr0-102 -; flags: offen, idxen, %G, lds, slc, tfe +; flags: offen, idxen, glc, lds, slc, tfe ; ; flat - flat or global memory operations -; flags: %G, slc +; flags: {CDNA3: sc0, nt, sc1 | otherwise: glc, slc, scc} ; addr: vgpr0-255 ; data: vgpr0-255 ; vdst: vgpr0-255 @@ -1987,7 +1987,7 @@ (use (match_operand 3 "const_int_operand"))] "0 /* Disabled. */" "@ - s_atomic_\t%0, %1, %2 %G2\;s_waitcnt\tlgkmcnt(0) + s_atomic_\t%0, %1, %2 glc\;s_waitcnt\tlgkmcnt(0) flat_atomic_\t%0, %1, %2 %G2\;s_waitcnt\t0 global_atomic_\t%0, %A1, %2%O1 %G2\;s_waitcnt\tvmcnt(0)" [(set_attr "type" "smem,flat,flat") @@ -2054,7 +2054,7 @@ UNSPECV_ATOMIC))] "" "@ - s_atomic_cmpswap\t%0, %1, %2 %G2\;s_waitcnt\tlgkmcnt(0) + s_atomic_cmpswap\t%0, %1, %2 glc\;s_waitcnt\tlgkmcnt(0) flat_atomic_cmpswap\t%0, %1, %2 %G2\;s_waitcnt\t0 global_atomic_cmpswap\t%0, %A1, %2%O1 %G2\;s_waitcnt\tvmcnt(0)" [(set_attr "type" "smem,flat,flat") @@ -2096,7 +2096,7 @@ switch (which_alternative) { case 0: - return "s_load%o0\t%0, %A1 %G1\;s_waitcnt\tlgkmcnt(0)"; + return "s_load%o0\t%0, %A1 glc\;s_waitcnt\tlgkmcnt(0)"; case 1: return (TARGET_RDNA2 /* Not GFX11. */ ? "flat_load%o0\t%0, %A1%O1 %G1 dlc\;s_waitcnt\t0" @@ -2113,7 +2113,7 @@ switch (which_alternative) { case 0: - return "s_load%o0\t%0, %A1 %G1\;s_waitcnt\tlgkmcnt(0)\;" + return "s_load%o0\t%0, %A1 glc\;s_waitcnt\tlgkmcnt(0)\;" "s_dcache_wb_vol"; case 1: return (TARGET_RDNA2 @@ -2147,7 +2147,7 @@ switch (which_alternative) { case 0: - return "s_dcache_wb_vol\;s_load%o0\t%0, %A1 %G1\;
[gcc r12-11213] tree-optimization/111125 - avoid BB vectorization in novector loops
https://gcc.gnu.org/g:b908ad2b836b761f7b27b8dc650422ce9a7efecd commit r12-11213-gb908ad2b836b761f7b27b8dc650422ce9a7efecd Author: Richard Biener Date: Thu Aug 24 11:10:43 2023 +0200 tree-optimization/25 - avoid BB vectorization in novector loops When a loop is marked with #pragma GCC novector the following makes sure to also skip BB vectorization for contained blocks. That avoids gcc.dg/vect/bb-slp-29.c failing on aarch64 because of extra BB vectorization therein. I'm not specifically dealing with sub-loops of novector loops, the desired semantics isn't documented. PR tree-optimization/25 * tree-vect-slp.cc (vect_slp_function): Split at novector loop entry, do not push blocks in novector loops. (cherry picked from commit 43da77a4f1636280c4259402c9c2c543e6ec6c0b) Diff: --- gcc/tree-vect-slp.cc | 41 + 1 file changed, 29 insertions(+), 12 deletions(-) diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc index 0462fa01020d..26fc94a661e3 100644 --- a/gcc/tree-vect-slp.cc +++ b/gcc/tree-vect-slp.cc @@ -6262,6 +6262,17 @@ vect_slp_function (function *fun) bbs[0]->loop_father->num, bb->index); split = true; } + else if (!bbs.is_empty () + && bb->loop_father->header == bb + && bb->loop_father->dont_vectorize) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, +"splitting region at dont-vectorize loop %d " +"entry at bb%d\n", +bb->loop_father->num, bb->index); + split = true; + } if (split && !bbs.is_empty ()) { @@ -6269,19 +6280,25 @@ vect_slp_function (function *fun) bbs.truncate (0); } - /* We need to be able to insert at the head of the region which -we cannot for region starting with a returns-twice call. */ if (bbs.is_empty ()) - if (gcall *first = safe_dyn_cast (first_stmt (bb))) - if (gimple_call_flags (first) & ECF_RETURNS_TWICE) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, -"skipping bb%d as start of region as it " -"starts with returns-twice call\n", -bb->index); - continue; - } + { + /* We need to be able to insert at the head of the region which +we cannot for region starting with a returns-twice call. */ + if (gcall *first = safe_dyn_cast (first_stmt (bb))) + if (gimple_call_flags (first) & ECF_RETURNS_TWICE) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "skipping bb%d as start of region as it " + "starts with returns-twice call\n", + bb->index); + continue; + } + /* If the loop this BB belongs to is marked as not to be vectorized +honor that also for BB vectorization. */ + if (bb->loop_father->dont_vectorize) + continue; + } bbs.safe_push (bb);
[gcc(refs/users/meissner/heads/work212-cmodel)] Add ChangeLog.cmodel and update REVISION.
https://gcc.gnu.org/g:2ce52d84bc94196d891714e09390b8eae8a1ffa0 commit 2ce52d84bc94196d891714e09390b8eae8a1ffa0 Author: Michael Meissner Date: Tue Jun 24 12:04:12 2025 -0400 Add ChangeLog.cmodel and update REVISION. 2025-06-24 Michael Meissner gcc/ * ChangeLog.cmodel: New file for branch. * REVISION: Update. Diff: --- gcc/ChangeLog.cmodel | 14 ++ gcc/REVISION | 2 +- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/gcc/ChangeLog.cmodel b/gcc/ChangeLog.cmodel new file mode 100644 index ..b1609e4321f3 --- /dev/null +++ b/gcc/ChangeLog.cmodel @@ -0,0 +1,14 @@ + Branch work212-cmodel, baseline + +2025-06-24 Michael Meissner + +Add ChangeLog.cmodel and update REVISION. + +2025-06-24 Michael Meissner + +gcc/ + + * ChangeLog.cmodel: New file for branch. + * REVISION: Update. + + Clone branch diff --git a/gcc/REVISION b/gcc/REVISION index cb5771ab4fa7..08b2925fffe0 100644 --- a/gcc/REVISION +++ b/gcc/REVISION @@ -1 +1 @@ -work212 branch +work212-cmodel branch
[gcc(refs/users/meissner/heads/work212-bugs)] Add ChangeLog.bugs and update REVISION.
https://gcc.gnu.org/g:f62f924a42e053bd073d8d3a671b5d12ffb1829d commit f62f924a42e053bd073d8d3a671b5d12ffb1829d Author: Michael Meissner Date: Tue Jun 24 12:02:56 2025 -0400 Add ChangeLog.bugs and update REVISION. 2025-06-24 Michael Meissner gcc/ * ChangeLog.bugs: New file for branch. * REVISION: Update. Diff: --- gcc/ChangeLog.bugs | 14 ++ gcc/REVISION | 2 +- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/gcc/ChangeLog.bugs b/gcc/ChangeLog.bugs new file mode 100644 index ..88436a7b674b --- /dev/null +++ b/gcc/ChangeLog.bugs @@ -0,0 +1,14 @@ + Branch work212-bugs, baseline + +2025-06-24 Michael Meissner + +Add ChangeLog.bugs and update REVISION. + +2025-06-24 Michael Meissner + +gcc/ + + * ChangeLog.bugs: New file for branch. + * REVISION: Update. + + Clone branch diff --git a/gcc/REVISION b/gcc/REVISION index cb5771ab4fa7..308e66a07c33 100644 --- a/gcc/REVISION +++ b/gcc/REVISION @@ -1 +1 @@ -work212 branch +work212-bugs branch
[gcc] Created branch 'meissner/heads/work212-cmodel' in namespace 'refs/users'
The branch 'meissner/heads/work212-cmodel' was created in namespace 'refs/users' pointing to: 09be5ec3d304... Add ChangeLog.meissner and REVISION.
[gcc] Created branch 'meissner/heads/work212-sha' in namespace 'refs/users'
The branch 'meissner/heads/work212-sha' was created in namespace 'refs/users' pointing to: 09be5ec3d304... Add ChangeLog.meissner and REVISION.
[gcc] Created branch 'meissner/heads/work212-test' in namespace 'refs/users'
The branch 'meissner/heads/work212-test' was created in namespace 'refs/users' pointing to: 09be5ec3d304... Add ChangeLog.meissner and REVISION.
[gcc(refs/users/meissner/heads/work212-sha)] Add ChangeLog.sha and update REVISION.
https://gcc.gnu.org/g:2e4af1056a2311f743e392483ce271168afeb078 commit 2e4af1056a2311f743e392483ce271168afeb078 Author: Michael Meissner Date: Tue Jun 24 12:07:55 2025 -0400 Add ChangeLog.sha and update REVISION. 2025-06-24 Michael Meissner gcc/ * ChangeLog.sha: New file for branch. * REVISION: Update. Diff: --- gcc/ChangeLog.sha | 14 ++ gcc/REVISION | 2 +- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/gcc/ChangeLog.sha b/gcc/ChangeLog.sha new file mode 100644 index ..58ea95619888 --- /dev/null +++ b/gcc/ChangeLog.sha @@ -0,0 +1,14 @@ + Branch work212-sha, baseline + +2025-06-24 Michael Meissner + +Add ChangeLog.sha and update REVISION. + +2025-06-24 Michael Meissner + +gcc/ + + * ChangeLog.sha: New file for branch. + * REVISION: Update. + + Clone branch diff --git a/gcc/REVISION b/gcc/REVISION index cb5771ab4fa7..dd012b1d103e 100644 --- a/gcc/REVISION +++ b/gcc/REVISION @@ -1 +1 @@ -work212 branch +work212-sha branch
[gcc] Created branch 'meissner/heads/work212-submit' in namespace 'refs/users'
The branch 'meissner/heads/work212-submit' was created in namespace 'refs/users' pointing to: f2cc1f39200e... Add REVISION.
[gcc] Created branch 'meissner/heads/work212-libs' in namespace 'refs/users'
The branch 'meissner/heads/work212-libs' was created in namespace 'refs/users' pointing to: 09be5ec3d304... Add ChangeLog.meissner and REVISION.
[gcc(refs/users/meissner/heads/work212-test)] Add ChangeLog.test and update REVISION.
https://gcc.gnu.org/g:a9b578441e87a0443ded42838bd5536934cb25c7 commit a9b578441e87a0443ded42838bd5536934cb25c7 Author: Michael Meissner Date: Tue Jun 24 12:09:07 2025 -0400 Add ChangeLog.test and update REVISION. 2025-06-24 Michael Meissner gcc/ * ChangeLog.test: New file for branch. * REVISION: Update. Diff: --- gcc/ChangeLog.test | 14 ++ gcc/REVISION | 2 +- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/gcc/ChangeLog.test b/gcc/ChangeLog.test new file mode 100644 index ..bc4f2aafa995 --- /dev/null +++ b/gcc/ChangeLog.test @@ -0,0 +1,14 @@ + Branch work212-test, baseline + +2025-06-24 Michael Meissner + +Add ChangeLog.test and update REVISION. + +2025-06-24 Michael Meissner + +gcc/ + + * ChangeLog.test: New file for branch. + * REVISION: Update. + + Clone branch diff --git a/gcc/REVISION b/gcc/REVISION index cb5771ab4fa7..b15b6e0aec15 100644 --- a/gcc/REVISION +++ b/gcc/REVISION @@ -1 +1 @@ -work212 branch +work212-test branch
[gcc r16-1656] i386: Convert LEA stack adjust insn to SUB when FLAGS_REG is dead
https://gcc.gnu.org/g:8f5fac56ca39e99d7a9ad6a0c067c75e6ffcd2cf commit r16-1656-g8f5fac56ca39e99d7a9ad6a0c067c75e6ffcd2cf Author: Uros Bizjak Date: Tue Jun 24 11:02:02 2025 +0200 i386: Convert LEA stack adjust insn to SUB when FLAGS_REG is dead ADD/SUB is faster than LEA for most processors. Also, there are several peephole2 patterns available that convert prologue esp subtractions to pushes (at the end of i386.md). These process only patterns with flags reg clobber, so they are ineffective with clobber-less stack ptr adjustments, introduced by r16-1551 ("x86: Enable separate shrink wrapping"). Introduce a peephole2 pattern that adds a clobber to a clobber-less stack ptr adjustments when FLAGS_REG is dead. gcc/ChangeLog: * config/i386/i386.md (@pro_epilogue_adjust_stack_add_nocc): Add type attribute. (pro_epilogue_adjust_stack_add_nocc peephole2 pattern): Convert pro_epilogue_adjust_stack_add_nocc variant to pro_epilogue_adjust_stack_add when FLAGS_REG is dead. Diff: --- gcc/config/i386/i386.md | 24 +--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 423ef48e518f..41a86544bbf7 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -27449,7 +27449,7 @@ (cond [(and (eq_attr "alternative" "0") (not (match_test "TARGET_OPT_AGU"))) (const_string "alu") - (match_operand: 2 "const0_operand") + (match_operand 2 "const0_operand") (const_string "imov") ] (const_string "lea"))) @@ -27470,7 +27470,7 @@ (clobber (mem:BLK (scratch)))] "" { - if (operands[2] == CONST0_RTX (mode)) + if (get_attr_type (insn) == TYPE_IMOV) return "mov{}\t{%1, %0|%0, %1}"; else { @@ -27478,13 +27478,31 @@ return "lea{}\t{%E2, %0|%0, %E2}"; } } - [(set (attr "length_immediate") + [(set (attr "type") + (cond [(match_operand 2 "const0_operand") +(const_string "imov") + ] + (const_string "lea"))) + (set (attr "length_immediate") (cond [(eq_attr "type" "imov") (const_string "0") ] (const_string "*"))) (set_attr "mode" "")]) +(define_peephole2 + [(parallel + [(set (match_operand:P 0 "register_operand") + (plus:P (match_dup 0) + (match_operand:P 1 ""))) + (clobber (mem:BLK (scratch)))])] + "peep2_regno_dead_p (0, FLAGS_REG)" + [(parallel + [(set (match_dup 0) + (plus:P (match_dup 0) (match_dup 1))) + (clobber (reg:CC FLAGS_REG)) + (clobber (mem:BLK (scratch)))])]) + (define_insn "@pro_epilogue_adjust_stack_sub_" [(set (match_operand:P 0 "register_operand" "=r") (minus:P (match_operand:P 1 "register_operand" "0")
[gcc(refs/users/meissner/heads/work212-orig)] Add REVISION.
https://gcc.gnu.org/g:f2cc1f39200ec924fcce6aaff29743f192032708 commit f2cc1f39200ec924fcce6aaff29743f192032708 Author: Michael Meissner Date: Tue Jun 24 12:01:49 2025 -0400 Add REVISION. 2025-06-24 Michael Meissner gcc/ * REVISION: New file for branch. Diff: --- gcc/REVISION | 1 + 1 file changed, 1 insertion(+) diff --git a/gcc/REVISION b/gcc/REVISION new file mode 100644 index ..731dbab77c31 --- /dev/null +++ b/gcc/REVISION @@ -0,0 +1 @@ +work212-orig branch
[gcc(refs/users/meissner/heads/work212-submit)] Add ChangeLog.submit and update REVISION.
https://gcc.gnu.org/g:a17d97335c37fe49fdce2d4abdf4dfe0986c31d0 commit a17d97335c37fe49fdce2d4abdf4dfe0986c31d0 Author: Michael Meissner Date: Tue Jun 24 12:12:02 2025 -0400 Add ChangeLog.submit and update REVISION. 2025-06-24 Michael Meissner gcc/ * ChangeLog.submit: New file for branch. * REVISION: Update. Diff: --- gcc/ChangeLog.submit | 14 ++ gcc/REVISION | 2 +- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/gcc/ChangeLog.submit b/gcc/ChangeLog.submit new file mode 100644 index ..0337e6f3cd35 --- /dev/null +++ b/gcc/ChangeLog.submit @@ -0,0 +1,14 @@ + Branch work212-submit, baseline + +2025-06-24 Michael Meissner + +Add ChangeLog.submit and update REVISION. + +2025-06-24 Michael Meissner + +gcc/ + + * ChangeLog.submit: New file for branch. + * REVISION: Update. + + Clone branch diff --git a/gcc/REVISION b/gcc/REVISION index 731dbab77c31..e6a46b512581 100644 --- a/gcc/REVISION +++ b/gcc/REVISION @@ -1 +1 @@ -work212-orig branch +work212-submit branch
[gcc r16-1659] RISC-V: Add patterns for vector-scalar multiply-(subtract-)accumulate [PR119100]
https://gcc.gnu.org/g:92e1893e0155b6b3baef2a935efd5936d23a67ea commit r16-1659-g92e1893e0155b6b3baef2a935efd5936d23a67ea Author: Paul-Antoine Arras Date: Tue Jun 24 15:42:50 2025 -0600 RISC-V: Add patterns for vector-scalar multiply-(subtract-)accumulate [PR119100] This pattern enables the combine pass (or late-combine, depending on the case) to merge a vec_duplicate into a plus-mult or minus-mult RTL instruction. Before this patch, we have two instructions, e.g.: vfmv.v.f v6,fa0 vfmacc.vv v2,v6,v4 After, we get only one: vfmacc.vf v2,fa0,v4 PR target/119100 gcc/ChangeLog: * config/riscv/autovec-opt.md (*_vf_): Handle both add and acc FMA variants. * config/riscv/vector.md (*pred_mul__scalar_undef): New. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f16.c: Add vfmacc and vfmsac. * gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f32.c: Likewise. * gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f64.c: Likewise. * gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f16.c: Likewise. * gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f32.c: Likewise. * gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f64.c: Likewise. * gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f16.c: Likewise. * gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f32.c: Likewise. * gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f64.c: Likewise. * gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f16.c: Likewise. * gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f32.c: Likewise. * gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f64.c: Likewise. * gcc.target/riscv/rvv/autovec/vx_vf/vf_mulop.h: Add support for acc variants. * gcc.target/riscv/rvv/autovec/vx_vf/vf_mulop_run.h: Likewise. * gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmadd-run-1-f16.c: Define TEST_OUT. * gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmadd-run-1-f32.c: Likewise. * gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmadd-run-1-f64.c: Likewise. * gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmsub-run-1-f16.c: Likewise. * gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmsub-run-1-f32.c: Likewise. * gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmsub-run-1-f64.c: Likewise. * gcc.target/riscv/rvv/autovec/vx_vf/vf_vfnmadd-run-1-f16.c: Likewise. * gcc.target/riscv/rvv/autovec/vx_vf/vf_vfnmadd-run-1-f32.c: Likewise. * gcc.target/riscv/rvv/autovec/vx_vf/vf_vfnmadd-run-1-f64.c: Likewise. * gcc.target/riscv/rvv/autovec/vx_vf/vf_vfnmsub-run-1-f16.c: Likewise. * gcc.target/riscv/rvv/autovec/vx_vf/vf_vfnmsub-run-1-f32.c: Likewise. * gcc.target/riscv/rvv/autovec/vx_vf/vf_vfnmsub-run-1-f64.c: Likewise. * gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmacc-run-1-f16.c: New test. * gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmacc-run-1-f32.c: New test. * gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmacc-run-1-f64.c: New test. * gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmsac-run-1-f16.c: New test. * gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmsac-run-1-f32.c: New test. * gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmsac-run-1-f64.c: New test. Diff: --- gcc/config/riscv/autovec-opt.md| 14 +++-- gcc/config/riscv/vector.md | 37 +++- .../gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f16.c | 4 ++ .../gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f32.c | 4 ++ .../gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f64.c | 4 ++ .../gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f16.c | 9 +-- .../gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f32.c | 9 +-- .../gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f64.c | 9 +-- .../gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f16.c | 4 ++ .../gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f32.c | 4 ++ .../gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f64.c | 4 ++ .../gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f16.c | 9 +-- .../gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f32.c | 9 +-- .../gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f64.c | 9 +-- .../gcc.target/riscv/rvv/autovec/vx_vf/vf_mulop.h | 66 ++ .../riscv/rvv/autovec/vx_vf/vf_mulop_run.h | 8 +-- .../riscv/rvv/autovec/vx_vf/vf_vfmacc-run-1-f16.c | 16 ++ .../riscv/rvv/autovec/vx_vf/vf_vfmacc-run-1-f32.c | 16 ++ .../riscv/rvv/autovec/vx_vf/vf_vfmacc-run-1-f64.c | 16 ++ .../riscv/rvv/autovec/vx_vf/vf_vfmadd-run-1-f16.c | 1 + .../riscv/rvv/autovec/vx_vf/vf_vfmadd-run-1-f32.c | 1 + .../riscv/rvv/autovec/vx_vf/vf_vfmadd-run-1-f64.c | 1 + .../riscv/rvv/autovec/vx_vf/vf_vfmsac-run-1-f16.c | 16 ++ .../riscv/rvv/autovec/vx_vf/vf_vfmsac-run-1-f32.c | 16 ++ .../riscv/rvv/autovec/vx_vf/vf
[gcc r12-11220] ipa/111245 - bogus modref analysis for store in call that might throw
https://gcc.gnu.org/g:83f764a9ac925d479ad3fee8c44e6053adb3475a commit r12-11220-g83f764a9ac925d479ad3fee8c44e6053adb3475a Author: Richard Biener Date: Fri Feb 28 11:44:26 2025 +0100 ipa/111245 - bogus modref analysis for store in call that might throw We currently record a kill for *x_4(D) = always_throws (); because we consider the store always executing since the appropriate check for whether the stmt could throw is guarded by !cfun->can_throw_non_call_exceptions. PR ipa/111245 * ipa-modref.cc (modref_access_analysis::analyze_store): Do not guard the check of whether the stmt could throw by cfun->can_throw_non_call_exceptions. * g++.dg/torture/pr111245.C: New testcase. (cherry picked from commit e6037af6d5e5a43c437257580d75bc8b35a6dcfd) Diff: --- gcc/ipa-modref.cc | 3 +-- gcc/testsuite/g++.dg/torture/pr111245.C | 23 +++ 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/gcc/ipa-modref.cc b/gcc/ipa-modref.cc index ba7f21834ef9..5648409d60e8 100644 --- a/gcc/ipa-modref.cc +++ b/gcc/ipa-modref.cc @@ -1745,8 +1745,7 @@ modref_access_analysis::analyze_store (gimple *stmt, tree, tree op, void *data) t->record_access_lto (t->m_summary_lto->stores, &r, a); if (t->m_always_executed && a.useful_for_kill_p () - && (!cfun->can_throw_non_call_exceptions - || !stmt_could_throw_p (cfun, stmt))) + && !stmt_could_throw_p (cfun, stmt)) { if (dump_file) fprintf (dump_file, " - Recording kill\n"); diff --git a/gcc/testsuite/g++.dg/torture/pr111245.C b/gcc/testsuite/g++.dg/torture/pr111245.C new file mode 100644 index ..785f4a51761d --- /dev/null +++ b/gcc/testsuite/g++.dg/torture/pr111245.C @@ -0,0 +1,23 @@ +/* { dg-do run } */ + +struct Int { + int value; +}; + +__attribute__((noipa)) Int always_throws() { throw 123; } + +void foo(Int &x) { + try { +x = always_throws(); + } catch (...) { + } +} + +int main() +{ + Int x; + x.value = 5; + foo(x); + if (x.value != 5) +__builtin_abort (); +}
[gcc r12-11225] tree-optimization/117113 - ICE with unroll-and-jam
https://gcc.gnu.org/g:f4dbdeabb2944d014d506a537a576a6f9a1f4c1f commit r12-11225-gf4dbdeabb2944d014d506a537a576a6f9a1f4c1f Author: Richard Biener Date: Mon Feb 3 15:12:52 2025 +0100 tree-optimization/117113 - ICE with unroll-and-jam When there's an inner loop without virtual header PHI but the outer loop has one the fusion process cannot handle the need to create an inner loop virtual header PHI. Punt in this case. PR tree-optimization/117113 * gimple-loop-jam.cc (unroll_jam_possible_p): Detect when we cannot handle virtual SSA update. * gcc.dg/torture/pr117113.c: New testcase. (cherry picked from commit 0675eb17480bada678bf2769d39732027abcd6d0) Diff: --- gcc/gimple-loop-jam.cc | 12 +++- gcc/testsuite/gcc.dg/torture/pr117113.c | 20 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/gcc/gimple-loop-jam.cc b/gcc/gimple-loop-jam.cc index e33dd9091df3..22d518d9fb0d 100644 --- a/gcc/gimple-loop-jam.cc +++ b/gcc/gimple-loop-jam.cc @@ -278,13 +278,17 @@ unroll_jam_possible_p (class loop *outer, class loop *loop) body would be the after-iter value of the first body) if it's over an associative and commutative operation. We wouldn't be able to handle unknown cycles. */ + bool inner_vdef = false; for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi)) { affine_iv iv; tree op = gimple_phi_result (psi.phi ()); if (virtual_operand_p (op)) - continue; + { + inner_vdef = true; + continue; + } if (!simple_iv (loop, loop, op, &iv, true)) return false; /* The inductions must be regular, loop invariant step and initial @@ -300,6 +304,12 @@ unroll_jam_possible_p (class loop *outer, class loop *loop) copy, _not_ the next value of the second body. */ } + /* When there's no inner loop virtual PHI IV we cannot handle the update + required to the inner loop if that doesn't already have one. See + PR117113. */ + if (!inner_vdef && get_virtual_phi (outer->header)) +return false; + return true; } diff --git a/gcc/testsuite/gcc.dg/torture/pr117113.c b/gcc/testsuite/gcc.dg/torture/pr117113.c new file mode 100644 index ..e90ad034a4d3 --- /dev/null +++ b/gcc/testsuite/gcc.dg/torture/pr117113.c @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-fno-tree-dce -fno-inline" } */ + +int a, b, c; +volatile int d[1]; +void e() {} +void f(int g) {} +int main() { + int i; + for (; b; b--) { +for (i = 0; i < 3; i++) { + e(); + f(d[0]); + d[0]; +} +if (a) + c++; + } + return 0; +}
[gcc r16-1652] gcc: remove atan from edom_only_function
https://gcc.gnu.org/g:0606d2b979f4014b1dd6a1e6b030630ec5586fd3 commit r16-1652-g0606d2b979f4014b1dd6a1e6b030630ec5586fd3 Author: Yuao Ma Date: Tue Jun 24 00:06:16 2025 +0800 gcc: remove atan from edom_only_function According to the man page, atan does not produce an error. According to the C23 standard draft (N3088), a range error occurs for atan if a nonzero x is too close to zero. Neither of them mentions that atan will result in a domain error. gcc/ChangeLog: * tree-call-cdce.cc (edom_only_function): Remove atan. Signed-off-by: Yuao Ma Diff: --- gcc/tree-call-cdce.cc | 2 -- 1 file changed, 2 deletions(-) diff --git a/gcc/tree-call-cdce.cc b/gcc/tree-call-cdce.cc index 9ca5fda5126d..712ec7c8920d 100644 --- a/gcc/tree-call-cdce.cc +++ b/gcc/tree-call-cdce.cc @@ -353,8 +353,6 @@ edom_only_function (gcall *call) CASE_FLT_FN_FLOATN_NX (BUILT_IN_ACOS): CASE_FLT_FN (BUILT_IN_ASIN): CASE_FLT_FN_FLOATN_NX (BUILT_IN_ASIN): -CASE_FLT_FN (BUILT_IN_ATAN): -CASE_FLT_FN_FLOATN_NX (BUILT_IN_ATAN): CASE_FLT_FN (BUILT_IN_COS): CASE_FLT_FN_FLOATN_NX (BUILT_IN_COS): CASE_FLT_FN (BUILT_IN_SIGNIFICAND):
[gcc r16-1654] diagnostic: fix for older version of GCC
https://gcc.gnu.org/g:3f1986766c6efcd0f444902571b7a58f015267c5 commit r16-1654-g3f1986766c6efcd0f444902571b7a58f015267c5 Author: Marc Poulhiès Date: Tue Jun 24 15:12:30 2025 +0200 diagnostic: fix for older version of GCC Having both an enum and a variable with the same name triggers an error with gcc 5. gcc/ChangeLog: * diagnostic-state-to-dot.cc (get_color_for_dynalloc_state): Rename argument dynalloc_state to dynalloc_st. (add_title_tr): Rename argument style to styl. (on_xml_node): Rename local variable dynalloc_state to dynalloc_st. Diff: --- gcc/diagnostic-state-to-dot.cc | 16 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/gcc/diagnostic-state-to-dot.cc b/gcc/diagnostic-state-to-dot.cc index b6d7ec5a082b..ddae83b85cd2 100644 --- a/gcc/diagnostic-state-to-dot.cc +++ b/gcc/diagnostic-state-to-dot.cc @@ -51,9 +51,9 @@ enum class dynalloc_state }; static const char * -get_color_for_dynalloc_state (enum dynalloc_state dynalloc_state) +get_color_for_dynalloc_state (enum dynalloc_state dynalloc_st) { - switch (dynalloc_state) + switch (dynalloc_st) { default: gcc_unreachable (); @@ -242,7 +242,7 @@ private: int num_columns, const xml::element &input_element, std::string heading, - enum style style, + enum style styl, enum dynalloc_state dynalloc_state) { xp.push_tag ("tr", true); @@ -258,7 +258,7 @@ private: color = "white"; } else - switch (style) + switch (styl) { default: gcc_unreachable (); @@ -323,12 +323,12 @@ private: else if (input_element->m_kind == "heap-buffer") { const char *extents = input_element->get_attr ("dynamic-extents"); - enum dynalloc_state dynalloc_state = get_dynalloc_state (*input_element); + enum dynalloc_state dynalloc_st = get_dynalloc_state (*input_element); if (auto region_id = input_element->get_attr ("region_id")) - m_region_id_to_dynalloc_state[region_id] = dynalloc_state; + m_region_id_to_dynalloc_state[region_id] = dynalloc_st; const char *type = input_element->get_attr ("type"); pretty_printer pp; - switch (dynalloc_state) + switch (dynalloc_st) { default: gcc_unreachable (); @@ -375,7 +375,7 @@ private: add_title_tr (id_of_node, xp, num_columns, *input_element, pp_formatted_text (&pp), style::h2, - dynalloc_state); + dynalloc_st); } else {
[gcc r16-1658] Fortran: fix ICE in verify_gimple_in_seq with substrings [PR120743]
https://gcc.gnu.org/g:5bc92717b804483a17dd5095f8b6d4fd75a472b1 commit r16-1658-g5bc92717b804483a17dd5095f8b6d4fd75a472b1 Author: Harald Anlauf Date: Tue Jun 24 20:46:38 2025 +0200 Fortran: fix ICE in verify_gimple_in_seq with substrings [PR120743] PR fortran/120743 gcc/fortran/ChangeLog: * trans-expr.cc (gfc_conv_substring): Substring indices are of type gfc_charlen_type_node. Convert to size_type_node for pointer arithmetic only after offset adjustments have been made. gcc/testsuite/ChangeLog: * gfortran.dg/pr120743.f90: New test. Co-authored-by: Jerry DeLisle Co-authored-by: Mikael Morin Diff: --- gcc/fortran/trans-expr.cc | 5 +++-- gcc/testsuite/gfortran.dg/pr120743.f90 | 38 ++ 2 files changed, 41 insertions(+), 2 deletions(-) diff --git a/gcc/fortran/trans-expr.cc b/gcc/fortran/trans-expr.cc index c8a207609e4b..3e0d763d2fb0 100644 --- a/gcc/fortran/trans-expr.cc +++ b/gcc/fortran/trans-expr.cc @@ -2800,8 +2800,9 @@ gfc_conv_substring (gfc_se * se, gfc_ref * ref, int kind, else if (POINTER_TYPE_P (TREE_TYPE (tmp))) { tree diff; - diff = fold_build2 (MINUS_EXPR, size_type_node, start.expr, - build_one_cst (size_type_node)); + diff = fold_build2 (MINUS_EXPR, gfc_charlen_type_node, start.expr, + build_one_cst (gfc_charlen_type_node)); + diff = fold_convert (size_type_node, diff); se->expr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (tmp), tmp, diff); } diff --git a/gcc/testsuite/gfortran.dg/pr120743.f90 b/gcc/testsuite/gfortran.dg/pr120743.f90 new file mode 100644 index ..8682d0c8859e --- /dev/null +++ b/gcc/testsuite/gfortran.dg/pr120743.f90 @@ -0,0 +1,38 @@ +! { dg-do compile } +! PR fortran/120743 - ICE in verify_gimple_in_seq with substrings +! +! Testcase as reduced by Jerry DeLisle + +module what + implicit none + CHARACTER(LEN=:), ALLOCATABLE :: attrlist +contains + SUBROUTINE get_c_attr ( attrname, attrval_c ) +! +! returns attrval_c='' if not found +! +IMPLICIT NONE +CHARACTER(LEN=*), INTENT(IN) :: attrname +CHARACTER(LEN=*), INTENT(OUT) :: attrval_c +! +CHARACTER(LEN=1) :: quote +INTEGER :: j0, j1 +LOGICAL :: found +! +! search for attribute name in attrlist: attr1="val1" attr2="val2" ... +! +attrval_c = '' +if ( .not. allocated(attrlist) ) return +if ( len_trim(attrlist) < 1 ) return +! +j0 = 1 +do while ( j0 < len_trim(attrlist) ) + ! locate = and first quote + j1 = index ( attrlist(j0:), '=' ) + quote = attrlist(j0+j1:j0+j1) + ! next line: something is not right + if ( quote /= '"' .and. quote /= "'" ) return +end do +! + END SUBROUTINE get_c_attr +end module what
[gcc r16-1651] s390: Fix float vector extract for pre-z13
https://gcc.gnu.org/g:bd9cac12373aecaa0ebee808f805c617f2c15375 commit r16-1651-gbd9cac12373aecaa0ebee808f805c617f2c15375 Author: Juergen Christ Date: Wed Jun 18 15:16:28 2025 +0200 s390: Fix float vector extract for pre-z13 Also provide the vec_extract patterns for floats on pre-z13 machines to prevent ICEing in those cases. gcc/ChangeLog: * config/s390/vector.md (VF): Don't restrict modes. (VEC_SET_SINGLEFLOAT): Ditto. gcc/testsuite/ChangeLog: * gcc.target/s390/vector/vec-extract-1.c: Fix test on arch11. * gcc.target/s390/vector/vec-set-1.c: Run test on arch11. * gcc.target/s390/vector/vec-extract-2.c: New test. Signed-off-by: Juergen Christ Diff: --- gcc/config/s390/vector.md | 4 +- .../gcc.target/s390/vector/vec-extract-1.c | 16 +- .../gcc.target/s390/vector/vec-extract-2.c | 168 + gcc/testsuite/gcc.target/s390/vector/vec-set-1.c | 23 ++- 4 files changed, 187 insertions(+), 24 deletions(-) diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md index 6f4e1929eb80..7251a76c3aea 100644 --- a/gcc/config/s390/vector.md +++ b/gcc/config/s390/vector.md @@ -75,7 +75,7 @@ V1DF V2DF (V1TF "TARGET_VXE") (TF "TARGET_VXE")]) -(define_mode_iterator VF [(V2SF "TARGET_VXE") (V4SF "TARGET_VXE") V2DF]) +(define_mode_iterator VF [V2SF V4SF V2DF]) ; All modes present in V_HW1 and VFT. (define_mode_iterator V_HW1_FT [V16QI V8HI V4SI V2DI V1TI V1DF @@ -512,7 +512,7 @@ (define_mode_iterator VEC_SET_NONFLOAT [V1QI V2QI V4QI V8QI V16QI V1HI V2HI V4HI V8HI V1SI V2SI V4SI V1DI V2DI V2SF V4SF]) ; Iterator for single element float vectors -(define_mode_iterator VEC_SET_SINGLEFLOAT [(V1SF "TARGET_VXE") V1DF (V1TF "TARGET_VXE")]) +(define_mode_iterator VEC_SET_SINGLEFLOAT [V1SF V1DF (V1TF "TARGET_VXE")]) ; FIXME: Support also vector mode operands for 1 ; FIXME: A target memory operand seems to be useful otherwise we end diff --git a/gcc/testsuite/gcc.target/s390/vector/vec-extract-1.c b/gcc/testsuite/gcc.target/s390/vector/vec-extract-1.c index 9df7909a3ea8..83af839963be 100644 --- a/gcc/testsuite/gcc.target/s390/vector/vec-extract-1.c +++ b/gcc/testsuite/gcc.target/s390/vector/vec-extract-1.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -march=z14 -mzarch" } */ +/* { dg-options "-O2 -march=arch11 -mzarch" } */ /* { dg-final { check-function-bodies "**" "" } } */ typedef double V2DF __attribute__((vector_size(16))); @@ -110,17 +110,6 @@ extractnthfloat (V4SF x, int n) return x[n]; } -/* -** sumfirstfloat: -** vfasb %v0,%v24,%v26 -** br %r14 -*/ -float -sumfirstfloat (V4SF x, V4SF y) -{ - return (x + y)[0]; -} - /* ** extractfirst2: ** vlr %v0,%v24 @@ -179,8 +168,7 @@ extractsingled (V1DF x) /* ** extractsingleld: -** vlr (%v.),%v24 -** vst \1,0\(%r2\),3 +** vst %v24,0\(%r2\),3 ** br %r14 */ long double diff --git a/gcc/testsuite/gcc.target/s390/vector/vec-extract-2.c b/gcc/testsuite/gcc.target/s390/vector/vec-extract-2.c new file mode 100644 index ..640ac0c8c766 --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/vector/vec-extract-2.c @@ -0,0 +1,168 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=arch11 -mzarch" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +typedef double V2DF __attribute__((vector_size(16))); +typedef float V4SF __attribute__((vector_size(16))); +typedef float V2SF __attribute__((vector_size(8))); +typedef double V1DF __attribute__((vector_size(8))); +typedef float V1SF __attribute__((vector_size(4))); +typedef long double V1TF __attribute__((vector_size(16))); + +/* +** extractfirstdouble: +** vsteg %v24,0\(%r2\),0 +** br %r14 +*/ +void +extractfirstdouble (double *res, V2DF x) +{ + *res = x[0]; +} + +/* +** extractseconddouble: +** vsteg %v24,0\(%r2\),1 +** br %r14 +*/ +void +extractseconddouble (double *res, V2DF x) +{ + *res = x[1]; +} + +/* +** extractnthdouble: +** vlgvg (%r.),%v24,0\(%r3\) +** stg \1,0\(%r2\) +** br %r14 +*/ +void +extractnthdouble (double *res, V2DF x, int n) +{ + *res = x[n]; +} + +/* +** extractfirstfloat: +** vstef %v24,0\(%r2\),0 +** br %r14 +*/ +void +extractfirstfloat (float *res, V4SF x) +{ + *res = x[0]; +} + +/* +** extractsecondfloat: +** vstef %v24,0\(%r2\),1 +** br %r14 +*/ +void +extractsecondfloat (float *res, V4SF x) +{ + *res = x[1]; +} + +/* +** extractthirdfloat: +** vstef %v24,0\(%r2\),2 +** br %r14 +*/ +void +extractthirdfloat (float *res, V4SF x) +{ + *res = x[2]; +} + +/* +** extractfourthfloat: +** vstef %v24,0\(%r2\),3 +** br %r14 +*/ +void +extractfourthfloat (float *res, V4SF x) +{ + *res = x[3]; +} + +/* +** extractnthfloat: +** vlgvf (%r.
[gcc r12-11221] tree-optimization/112859 - bogus loop distribution
https://gcc.gnu.org/g:6258d3f06740c3a77cd7a91606107451d71df68d commit r12-11221-g6258d3f06740c3a77cd7a91606107451d71df68d Author: Richard Biener Date: Thu Jan 23 13:10:17 2025 +0100 tree-optimization/112859 - bogus loop distribution When we get a zero distance vector we still have to check for the situation of a common inner loop with zero distance. But we can still allow a zero distance for the loop we distribute (gcc.dg/tree-ssa/ldist-33.c is such a case). This is because zero distances in non-outermost loops are a misrepresentation of dependence by dependence analysis. Note that test coverage of loop distribution of loop nests is very low. PR tree-optimization/112859 PR tree-optimization/115347 * tree-loop-distribution.cc (loop_distribution::pg_add_dependence_edges): For a zero distance vector still make sure to not have an inner loop with zero distance. * gcc.dg/torture/pr112859.c: New testcase. * gcc.dg/torture/pr115347.c: Likewise. * gcc.dg/tree-ssa/ldist-36.c: Adjust. (cherry picked from commit 04ba1300407f106a6dd10d346f58a51d87e6d43e) Diff: --- gcc/testsuite/gcc.dg/torture/pr112859.c | 24 gcc/testsuite/gcc.dg/torture/pr115347.c | 21 + gcc/testsuite/gcc.dg/tree-ssa/ldist-36.c | 3 ++- gcc/tree-loop-distribution.cc| 27 --- 4 files changed, 63 insertions(+), 12 deletions(-) diff --git a/gcc/testsuite/gcc.dg/torture/pr112859.c b/gcc/testsuite/gcc.dg/torture/pr112859.c new file mode 100644 index ..18f5bf40cb70 --- /dev/null +++ b/gcc/testsuite/gcc.dg/torture/pr112859.c @@ -0,0 +1,24 @@ +/* { dg-do run } */ +/* { dg-additional-options "-ftree-loop-distribution" } */ + +struct a { + char b; + int c; +} f, *i = &f; +static struct a e[4]; +int *d, **g = &d; +static int h, j; +int main() +{ + for (; h < 1; h++) { +struct a k = {1, 1}; +for (j = 0; j < 2; j++) { + *i = e[h]; + e[h] = k; +} +*g = 0; + } + if (f.c != 1) +__builtin_abort(); + return 0; +} diff --git a/gcc/testsuite/gcc.dg/torture/pr115347.c b/gcc/testsuite/gcc.dg/torture/pr115347.c new file mode 100644 index ..2299495144b9 --- /dev/null +++ b/gcc/testsuite/gcc.dg/torture/pr115347.c @@ -0,0 +1,21 @@ +/* { dg-do run } */ +/* { dg-additional-options "-ftree-loop-distribution" } */ + +struct a { + int b; + int c; +} d, e[2]; +int f, g, h; +int main() +{ + for (; f < 1; f++) { +for (h = 0; h < 2; h++) { + d = e[f]; + g = e[1].c; + e[f].c = 1; +} + } + if (d.c != 1) +__builtin_abort(); + return 0; +} diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ldist-36.c b/gcc/testsuite/gcc.dg/tree-ssa/ldist-36.c index 07393f0a665a..6d560060e09a 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/ldist-36.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/ldist-36.c @@ -25,4 +25,5 @@ foo (struct st * restrict p) } } -/* { dg-final { scan-tree-dump-times "Loop nest . distributed: split to 0 loops and 3 library" 1 "ldist" } } */ +/* The cost modeling doesn't consider splitting a WAR re-use profitable. */ +/* { dg-final { scan-tree-dump-times "Loop nest . distributed: split to 1 loops and 1 library" 1 "ldist" } } */ diff --git a/gcc/tree-loop-distribution.cc b/gcc/tree-loop-distribution.cc index 449b9ffd4389..7b4fad238d5d 100644 --- a/gcc/tree-loop-distribution.cc +++ b/gcc/tree-loop-distribution.cc @@ -2122,25 +2122,30 @@ loop_distribution::pg_add_dependence_edges (struct graph *rdg, int dir, gcc.dg/tree-ssa/pr94969.c. */ if (DDR_NUM_DIST_VECTS (ddr) != 1) this_dir = 2; - /* If the overlap is exact preserve stmt order. */ - else if (lambda_vector_zerop (DDR_DIST_VECT (ddr, 0), - DDR_NB_LOOPS (ddr))) - ; - /* Else as the distance vector is lexicographic positive swap -the dependence direction. */ else { - if (DDR_REVERSED_P (ddr)) - this_dir = -this_dir; - this_dir = -this_dir; - + /* If the overlap is exact preserve stmt order. */ + if (lambda_vector_zerop (DDR_DIST_VECT (ddr, 0), + DDR_NB_LOOPS (ddr))) + ; + /* Else as the distance vector is lexicographic positive swap +the dependence direction. */ + else + { + if (DDR_REVERSED_P (ddr)) + this_dir = -this_dir; + this_dir = -this_dir; + } /* When then dependence distance of the innermost common loop of the DRs is zero we have a conflict. */ au
[gcc r16-1653] libstdc++: Unnecessary type completion in __is_complete_or_unbounded [PR120717]
https://gcc.gnu.org/g:bc8f5424977b74e107543b34af00768cdbb3a3cf commit r16-1653-gbc8f5424977b74e107543b34af00768cdbb3a3cf Author: Patrick Palka Date: Tue Jun 24 09:33:25 2025 -0400 libstdc++: Unnecessary type completion in __is_complete_or_unbounded [PR120717] When checking __is_complete_or_unbounded on a reference to incomplete type, we overeagerly try to instantiate/complete the referenced type which besides being unnecessary may also produce an unexpected -Wsfinae-incomplete warning (added in r16-1527) if the referenced type is later defined. This patch fixes this by effectively restricting the sizeof check to object (except unknown-bound array) types. In passing simplify the implementation by using is_object instead of is_function/reference/void and introducing a __maybe_complete_object_type helper. PR libstdc++/120717 libstdc++-v3/ChangeLog: * include/std/type_traits (__maybe_complete_object_type): New helper trait, factored out from ... (__is_complete_or_unbounded): ... here. Only check sizeof on a __maybe_complete_object_type type. Fix formatting. * testsuite/20_util/is_complete_or_unbounded/120717.cc: New test. Reviewed-by: Tomasz Kamiński Co-authored-by: Jonathan Wakely Reviewed-by: Jonathan Wakely Diff: --- libstdc++-v3/include/std/type_traits | 39 -- .../20_util/is_complete_or_unbounded/120717.cc | 20 +++ 2 files changed, 42 insertions(+), 17 deletions(-) diff --git a/libstdc++-v3/include/std/type_traits b/libstdc++-v3/include/std/type_traits index abff9f880001..055411195f17 100644 --- a/libstdc++-v3/include/std/type_traits +++ b/libstdc++-v3/include/std/type_traits @@ -280,11 +280,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION // Forward declarations template -struct is_reference; - template -struct is_function; - template -struct is_void; +struct is_object; template struct remove_cv; template @@ -294,21 +290,30 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION template struct __is_array_unknown_bounds; + // An object type which is not an unbounded array. + // It might still be an incomplete type, but if this is false_type + // then we can be certain it's not a complete object type. + template +using __maybe_complete_object_type + = __and_, __not_<__is_array_unknown_bounds<_Tp>>>; + // Helper functions that return false_type for incomplete classes, // incomplete unions and arrays of known bound from those. - template -constexpr true_type __is_complete_or_unbounded(__type_identity<_Tp>) -{ return {}; } - - template -constexpr typename __or_< - is_reference<_NestedType>, - is_function<_NestedType>, - is_void<_NestedType>, - __is_array_unknown_bounds<_NestedType> ->::type __is_complete_or_unbounded(_TypeIdentity) + // More specialized overload for complete object types (returning true_type). + template::value>, + size_t = sizeof(_Tp)> +constexpr true_type +__is_complete_or_unbounded(__type_identity<_Tp>) +{ return {}; }; + + // Less specialized overload for reference and unknown-bound array types + // (returning true_type), and incomplete types (returning false_type). + template +constexpr typename __not_<__maybe_complete_object_type<_NestedType>>::type +__is_complete_or_unbounded(_TypeIdentity) { return {}; } // __remove_cv_t (std::remove_cv_t for C++11). diff --git a/libstdc++-v3/testsuite/20_util/is_complete_or_unbounded/120717.cc b/libstdc++-v3/testsuite/20_util/is_complete_or_unbounded/120717.cc new file mode 100644 index ..4c07683d494e --- /dev/null +++ b/libstdc++-v3/testsuite/20_util/is_complete_or_unbounded/120717.cc @@ -0,0 +1,20 @@ +// PR libstdc++/120717 +// { dg-do compile { target c++11 } } +// { dg-additional-options "-Wsfinae-incomplete" } + +#include + +// Verify __is_complete_or_unbounded doesn't try to instantiate the underlying +// type of a reference or array of unknown bound. +template struct A { static_assert(false, "do not instantiate"); }; +static_assert(std::__is_complete_or_unbounded(std::__type_identity&>{}), ""); +static_assert(std::__is_complete_or_unbounded(std::__type_identity&&>{}), ""); +static_assert(std::__is_complete_or_unbounded(std::__type_identity[]>{}), ""); + +// Verify __is_complete_or_unbounded doesn't produce unexpected +// -Wsfinae-incomplete warnings. +struct B; +static_assert(std::__is_complete_or_unbounded(std::__type_identity{}), ""); +static_assert(std::__is_complete_or_unbounded(std::__type_identity{}), ""); +static_assert(std::__is_complete_or_unbounded(std::__type_identity{}), ""); +struct B { }; // { dg-bogus "-Wsfinae-incomplete" }
[gcc r12-11226] tree-optimization/117424 - invalid LIM of trapping ref
https://gcc.gnu.org/g:eafe890ea3904c109b6bce663a81a91d61356cb4 commit r12-11226-geafe890ea3904c109b6bce663a81a91d61356cb4 Author: Richard Biener Date: Tue Jan 28 12:28:14 2025 +0100 tree-optimization/117424 - invalid LIM of trapping ref The following addresses a bug in tree_could_trap_p leading to hoisting of a possibly trapping, because of out-of-bound, access. We only ensured the first accessed byte is within a decl there, the patch makes sure the whole base of the reference is within it. This is pessimistic if a handled component would then subset to a sub-object within the decl but upcasting of a decl to larger types should be uncommon, questionable, and wrong without -fno-strict-aliasing. The testcase is a bit fragile, but I could not devise a (portable) way to ensure an out-of-bound access to a decl would fault. PR tree-optimization/117424 * tree-eh.cc (tree_could_trap_p): Verify the base is fully contained within a decl. * gcc.dg/tree-ssa/ssa-lim-25.c: New testcase. (cherry picked from commit f1e776ce58ae4a6ae67886adb4ae806598e2c7ef) Diff: --- gcc/testsuite/gcc.dg/tree-ssa/ssa-lim-25.c | 18 ++ gcc/tree-eh.cc | 9 +++-- 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-lim-25.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-lim-25.c new file mode 100644 index ..3e0f013d1e0d --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-lim-25.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-lim2-details" } */ + +char x; + +long foo (int n) +{ + long y = 0; + for (int j = 0; j < 1024; ++j) +for (int i = 0; i < n; ++i) + y += *(long *)&x; + return y; +} + +/* Because *(long *)&x may trap we have to preserve execution and + only hoist it from the innermost loop (after the header check). */ +/* { dg-final { scan-tree-dump-not "out of loop 1" "lim2" } } */ +/* { dg-final { scan-tree-dump "out of loop 2" "lim2" } } */ diff --git a/gcc/tree-eh.cc b/gcc/tree-eh.cc index 85939ca0e895..e3cb99ab67d0 100644 --- a/gcc/tree-eh.cc +++ b/gcc/tree-eh.cc @@ -2729,11 +2729,16 @@ tree_could_trap_p (tree expr) if (TREE_CODE (base) == STRING_CST) return maybe_le (TREE_STRING_LENGTH (base), off); tree size = DECL_SIZE_UNIT (base); + tree refsz = TYPE_SIZE_UNIT (TREE_TYPE (expr)); if (size == NULL_TREE + || refsz == NULL_TREE || !poly_int_tree_p (size) - || maybe_le (wi::to_poly_offset (size), off)) + || !poly_int_tree_p (refsz) + || maybe_le (wi::to_poly_offset (size), off) + || maybe_gt (off + wi::to_poly_offset (refsz), + wi::to_poly_offset (size))) return true; - /* Now we are sure the first byte of the access is inside + /* Now we are sure the whole base of the access is inside the object. */ return false; }
[gcc r16-1657] c++: Implement C++26 P3618R0 - Allow attaching main to the global module [PR120773]
https://gcc.gnu.org/g:ed7fc2b29ead88be30b40ec2c3c51495200b08c4 commit r16-1657-ged7fc2b29ead88be30b40ec2c3c51495200b08c4 Author: Jakub Jelinek Date: Tue Jun 24 19:00:11 2025 +0200 c++: Implement C++26 P3618R0 - Allow attaching main to the global module [PR120773] The following patch implements the P3618R0 paper by tweaking pedwarn condition, adjusting pedwarn wording, adjusting one testcase and adding 4 new ones. The paper was voted in as DR, so it isn't guarded on C++ version. 2025-06-24 Jakub Jelinek PR c++/120773 * decl.cc (grokfndecl): Implement C++26 P3618R0 - Allow attaching main to the global module. Only pedwarn for current_lang_name other than lang_name_cplusplus and adjust pedwarn wording. * g++.dg/parse/linkage5.C: Don't expect error on extern "C++" int main ();. * g++.dg/parse/linkage7.C: New test. * g++.dg/parse/linkage8.C: New test. * g++.dg/modules/main-2.C: New test. * g++.dg/modules/main-3.C: New test. Diff: --- gcc/cp/decl.cc| 4 ++-- gcc/testsuite/g++.dg/modules/main-2.C | 4 gcc/testsuite/g++.dg/modules/main-3.C | 7 +++ gcc/testsuite/g++.dg/parse/linkage5.C | 5 +++-- gcc/testsuite/g++.dg/parse/linkage7.C | 7 +++ gcc/testsuite/g++.dg/parse/linkage8.C | 5 + 6 files changed, 28 insertions(+), 4 deletions(-) diff --git a/gcc/cp/decl.cc b/gcc/cp/decl.cc index febdc89f89dd..95bccfbb585b 100644 --- a/gcc/cp/decl.cc +++ b/gcc/cp/decl.cc @@ -11326,9 +11326,9 @@ grokfndecl (tree ctype, "cannot declare %<::main%> to be %qs", "consteval"); if (!publicp) error_at (location, "cannot declare %<::main%> to be static"); - if (current_lang_depth () != 0) + if (current_lang_name != lang_name_cplusplus) pedwarn (location, OPT_Wpedantic, "cannot declare %<::main%> with a" -" linkage specification"); +" linkage specification other than %"); if (module_attach_p ()) error_at (location, "cannot attach %<::main%> to a named module"); inlinep = 0; diff --git a/gcc/testsuite/g++.dg/modules/main-2.C b/gcc/testsuite/g++.dg/modules/main-2.C new file mode 100644 index ..8d17381c7fa3 --- /dev/null +++ b/gcc/testsuite/g++.dg/modules/main-2.C @@ -0,0 +1,4 @@ +// { dg-additional-options "-fmodules" } + +export module M; +extern "C++" int main() {} diff --git a/gcc/testsuite/g++.dg/modules/main-3.C b/gcc/testsuite/g++.dg/modules/main-3.C new file mode 100644 index ..10a29360232d --- /dev/null +++ b/gcc/testsuite/g++.dg/modules/main-3.C @@ -0,0 +1,7 @@ +// { dg-additional-options "-fmodules" } + +export module M; +extern "C++" { + int main() {} +} + diff --git a/gcc/testsuite/g++.dg/parse/linkage5.C b/gcc/testsuite/g++.dg/parse/linkage5.C index 451406de69b2..1bd4736906c3 100644 --- a/gcc/testsuite/g++.dg/parse/linkage5.C +++ b/gcc/testsuite/g++.dg/parse/linkage5.C @@ -1,5 +1,6 @@ // { dg-do compile } -// The main function shall not be declared with a linkage-specification. +// The main function shall not be declared with a linkage-specification +// other than "C++". extern "C" { int main(); // { dg-error "linkage" } @@ -9,6 +10,6 @@ namespace foo { extern "C" int main(); // { dg-error "linkage" } } -extern "C++" int main(); // { dg-error "linkage" } +extern "C++" int main(); extern "C" struct S { int main(); }; // OK diff --git a/gcc/testsuite/g++.dg/parse/linkage7.C b/gcc/testsuite/g++.dg/parse/linkage7.C new file mode 100644 index ..91caf265305b --- /dev/null +++ b/gcc/testsuite/g++.dg/parse/linkage7.C @@ -0,0 +1,7 @@ +// { dg-do compile } +// The main function shall not be declared with a linkage-specification +// other than "C++". + +extern "C++" { + int main(); +} diff --git a/gcc/testsuite/g++.dg/parse/linkage8.C b/gcc/testsuite/g++.dg/parse/linkage8.C new file mode 100644 index ..b757ed55b559 --- /dev/null +++ b/gcc/testsuite/g++.dg/parse/linkage8.C @@ -0,0 +1,5 @@ +// { dg-do compile } +// The main function shall not be declared with a linkage-specification +// other than "C++". + +extern "C" int main(); // { dg-error "linkage" }
[gcc(refs/users/meissner/heads/work212-dmf)] Add ChangeLog.dmf and update REVISION.
https://gcc.gnu.org/g:d59cb20302af4866bd5dd0ea3cd60a8e3445f6fc commit d59cb20302af4866bd5dd0ea3cd60a8e3445f6fc Author: Michael Meissner Date: Tue Jun 24 12:05:41 2025 -0400 Add ChangeLog.dmf and update REVISION. 2025-06-24 Michael Meissner gcc/ * ChangeLog.dmf: New file for branch. * REVISION: Update. Diff: --- gcc/ChangeLog.dmf | 14 ++ gcc/REVISION | 2 +- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/gcc/ChangeLog.dmf b/gcc/ChangeLog.dmf new file mode 100644 index ..6ad2c29a0e45 --- /dev/null +++ b/gcc/ChangeLog.dmf @@ -0,0 +1,14 @@ + Branch work212-dmf, baseline + +2025-06-24 Michael Meissner + +Add ChangeLog.dmf and update REVISION. + +2025-06-24 Michael Meissner + +gcc/ + + * ChangeLog.dmf: New file for branch. + * REVISION: Update. + + Clone branch diff --git a/gcc/REVISION b/gcc/REVISION index cb5771ab4fa7..45dac7096f85 100644 --- a/gcc/REVISION +++ b/gcc/REVISION @@ -1 +1 @@ -work212 branch +work212-dmf branch
[gcc r16-1664] Add -fauto-profile-inlining
https://gcc.gnu.org/g:aaf55e09b3d97164615e783d98cfa842f382559d commit r16-1664-gaaf55e09b3d97164615e783d98cfa842f382559d Author: Jan Hubicka Date: Wed Jun 25 03:01:29 2025 +0200 Add -fauto-profile-inlining this patch adds -fauto-profile-inlining which can be used to control the auto-profile directed inlning. gcc/ChangeLog: * common.opt: (fauto-profile-inlining): New * doc/invoke.texi (-fauto-profile-inlining): Document. * ipa-inline.cc (inline_functions_by_afdo): Check flag_auto_profile. (early_inliner): Also do inline_functions_by_afdo with !flag_early_inlining. Diff: --- gcc/common.opt | 4 gcc/doc/invoke.texi | 8 +++- gcc/ipa-inline.cc | 21 - 3 files changed, 31 insertions(+), 2 deletions(-) diff --git a/gcc/common.opt b/gcc/common.opt index 0e50305dde8e..a76a6920b54c 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -1187,6 +1187,10 @@ Common Joined RejectNegative Var(auto_profile_file) Use sample profile information for call graph node weights. The profile file is specified in the argument. +fauto-profile-inlining +Common Var(flag_auto_profile_inlining) Init(1) Optimization +Perform inlining using auto-profile. + ; -fcheck-bounds causes gcc to generate array bounds checks. ; For C, C++ and ObjC: defaults off. ; For Java: defaults to on. diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index a0c6d3d082e6..95790f7bd171 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -573,7 +573,7 @@ Objective-C and Objective-C++ Dialects}. -fmin-function-alignment=[@var{n}] -fno-allocation-dce -fallow-store-data-races -fassociative-math -fauto-profile -fauto-profile[=@var{path}] --fauto-inc-dec -fbranch-probabilities +-fauto-profile-inlining -fauto-inc-dec -fbranch-probabilities -fcaller-saves -fcombine-stack-adjustments -fconserve-stack -ffold-mem-offsets @@ -15502,6 +15502,12 @@ E.g. create_gcov --binary=your_program.unstripped --profile=perf.data \ --gcov=profile.afdo @end smallexample + +@opindex fauto-profile-inlining +@item -fauto-profile-inlining +When auto-profile is available inline all relevant functions which was +inlined in the tran run before reading the profile feedback. This improves +context sensitivity of the profile. Enabled by default. @end table The following options control compiler behavior regarding floating-point diff --git a/gcc/ipa-inline.cc b/gcc/ipa-inline.cc index a960d55b661d..ca605b027dcf 100644 --- a/gcc/ipa-inline.cc +++ b/gcc/ipa-inline.cc @@ -3120,7 +3120,7 @@ early_inline_small_functions (struct cgraph_node *node) static bool inline_functions_by_afdo (struct cgraph_node *node, bool *speculative_calls) { - if (!flag_auto_profile) + if (!flag_auto_profile || !flag_auto_profile_inlining) return false; struct cgraph_edge *e; bool inlined = false; @@ -3320,6 +3320,25 @@ early_inliner (function *fun) fprintf (dump_file, "Iterations: %i\n", iterations); } + /* do AFDO inlining in case it was not done as part of early inlining. */ + if (optimize + && !flag_no_inline + && !flag_early_inlining + && flag_auto_profile_inlining) +{ + bool speculative_calls = false; + inlined |= inline_functions_by_afdo (node, &speculative_calls); + if (speculative_calls) + { + cgraph_edge *next; + for (cgraph_edge *e = node->callees; e; e = next) + { + next = e->next_callee; + cgraph_edge::redirect_call_stmt_to_callee (e); + } + } +} + if (inlined) { timevar_push (TV_INTEGRATION);
[gcc r14-11861] i386: Remove CLDEMOTE for clients
https://gcc.gnu.org/g:11b03928bab9a52e4ec43a3d5a0ab85e5a8ee67a commit r14-11861-g11b03928bab9a52e4ec43a3d5a0ab85e5a8ee67a Author: Haochen Jiang Date: Tue Jun 17 14:08:38 2025 +0800 i386: Remove CLDEMOTE for clients CLDEMOTE is not enabled on clients according to SDM. SDM only mentioned it will be enabled on Xeon and Atom servers, not clients. Remove them since Alder Lake (where it is introduced). gcc/ChangeLog: * config/i386/i386.h (PTA_ALDERLAKE): Use PTA_GOLDMONT_PLUS as base to remove PTA_CLDEMOTE. (PTA_SIERRAFOREST): Add PTA_CLDEMOTE since PTA_ALDERLAKE does not include that anymore. * doc/invoke.texi: Update texi file. Diff: --- gcc/config/i386/i386.h | 8 +--- gcc/doc/invoke.texi| 29 ++--- 2 files changed, 19 insertions(+), 18 deletions(-) diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 2fc82b175e6d..6a833fd8dbd2 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -2415,12 +2415,14 @@ constexpr wide_int_bitmask PTA_GOLDMONT_PLUS = PTA_GOLDMONT | PTA_RDPID | PTA_SGX | PTA_PTWRITE; constexpr wide_int_bitmask PTA_TREMONT = PTA_GOLDMONT_PLUS | PTA_CLWB | PTA_GFNI | PTA_MOVDIRI | PTA_MOVDIR64B | PTA_CLDEMOTE | PTA_WAITPKG; -constexpr wide_int_bitmask PTA_ALDERLAKE = PTA_TREMONT | PTA_ADX | PTA_AVX +constexpr wide_int_bitmask PTA_ALDERLAKE = PTA_GOLDMONT_PLUS | PTA_CLWB + | PTA_GFNI | PTA_MOVDIRI | PTA_MOVDIR64B | PTA_WAITPKG | PTA_ADX | PTA_AVX | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_F16C | PTA_FMA | PTA_LZCNT | PTA_PCONFIG | PTA_PKU | PTA_VAES | PTA_VPCLMULQDQ | PTA_SERIALIZE | PTA_HRESET | PTA_KL | PTA_WIDEKL | PTA_AVXVNNI; -constexpr wide_int_bitmask PTA_SIERRAFOREST = PTA_ALDERLAKE | PTA_AVXIFMA - | PTA_AVXVNNIINT8 | PTA_AVXNECONVERT | PTA_CMPCCXADD | PTA_ENQCMD | PTA_UINTR; +constexpr wide_int_bitmask PTA_SIERRAFOREST = PTA_ALDERLAKE | PTA_CLDEMOTE + | PTA_AVXIFMA | PTA_AVXVNNIINT8 | PTA_AVXNECONVERT | PTA_CMPCCXADD + | PTA_ENQCMD | PTA_UINTR; constexpr wide_int_bitmask PTA_GRANITERAPIDS = PTA_SAPPHIRERAPIDS | PTA_AMX_FP16 | PTA_PREFETCHI; constexpr wide_int_bitmask PTA_GRANITERAPIDS_D = PTA_GRANITERAPIDS diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 64728fead512..d8ff23447f45 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -34514,37 +34514,36 @@ VPCLMULQDQ, AVX512BITALG, RDPID and AVX512VPOPCNTDQ instruction set support. Intel Alder Lake/Raptor Lake/Meteor Lake/Gracemont CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, POPCNT, AES, PREFETCHW, PCLMUL, RDRND, XSAVE, XSAVEC, XSAVES, XSAVEOPT, FSGSBASE, PTWRITE, RDPID, SGX, -GFNI-SSE, CLWB, MOVDIRI, MOVDIR64B, CLDEMOTE, WAITPKG, ADCX, AVX, AVX2, BMI, -BMI2, F16C, FMA, LZCNT, PCONFIG, PKU, VAES, VPCLMULQDQ, SERIALIZE, HRESET, KL, -WIDEKL and AVX-VNNI instruction set support. +GFNI-SSE, CLWB, MOVDIRI, MOVDIR64B, WAITPKG, ADCX, AVX, AVX2, BMI, BMI2, F16C, +FMA, LZCNT, PCONFIG, PKU, VAES, VPCLMULQDQ, SERIALIZE, HRESET, KL, WIDEKL and +AVX-VNNI instruction set support. @item arrowlake Intel Arrow Lake CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, POPCNT, AES, PREFETCHW, PCLMUL, RDRND, XSAVE, XSAVEC, XSAVES, XSAVEOPT, FSGSBASE, PTWRITE, RDPID, SGX, GFNI-SSE, CLWB, MOVDIRI, -MOVDIR64B, CLDEMOTE, WAITPKG, ADCX, AVX, AVX2, BMI, BMI2, F16C, FMA, LZCNT, -PCONFIG, PKU, VAES, VPCLMULQDQ, SERIALIZE, HRESET, KL, WIDEKL, AVX-VNNI, -UINTR, AVXIFMA, AVXVNNIINT8, AVXNECONVERT and CMPCCXADD instruction set -support. +MOVDIR64B, WAITPKG, ADCX, AVX, AVX2, BMI, BMI2, F16C, FMA, LZCNT, PCONFIG, PKU, +VAES, VPCLMULQDQ, SERIALIZE, HRESET, KL, WIDEKL, AVX-VNNI, UINTR, AVXIFMA, +AVXVNNIINT8, AVXNECONVERT and CMPCCXADD instruction set support. @item arrowlake-s @itemx lunarlake Intel Arrow Lake S/Lunar Lake CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, POPCNT, AES, PREFETCHW, PCLMUL, RDRND, XSAVE, XSAVEC, XSAVES, XSAVEOPT, FSGSBASE, PTWRITE, RDPID, SGX, GFNI-SSE, CLWB, -MOVDIRI, MOVDIR64B, CLDEMOTE, WAITPKG, ADCX, AVX, AVX2, BMI, BMI2, F16C, FMA, -LZCNT, PCONFIG, PKU, VAES, VPCLMULQDQ, SERIALIZE, HRESET, KL, WIDEKL, AVX-VNNI, -UINTR, AVXIFMA, AVXVNNIINT8, AVXNECONVERT, CMPCCXADD, AVXVNNIINT16, SHA512, -SM3 and SM4 instruction set support. +MOVDIRI, MOVDIR64B, WAITPKG, ADCX, AVX, AVX2, BMI, BMI2, F16C, FMA, LZCNT, +PCONFIG, PKU, VAES, VPCLMULQDQ, SERIALIZE, HRESET, KL, WIDEKL, AVX-VNNI, UINTR, +AVXIFMA, AVXVNNIINT8, AVXNECONVERT, CMPCCXADD, AVXVNNIINT16, SHA512, SM3 and +SM4 instruction set support. @item pantherlake Intel Panther Lake CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, POPCNT, AES, PREFETCHW, PCLMUL, RDRND, XSAVE, XSAVEC, XSAVES, XSAVEOPT, FSGSBASE, PTWRITE, RDPID, SGX, GFNI-SSE, CLWB, MOVDIRI, -MOVDIR64B, CLDEMOTE, WAITPKG, ADCX, AVX, AVX2, BMI, BMI2, F16C, FMA, LZCNT, -PCONFIG, PKU, VAES, VPCL
[gcc r13-9771] i386: Remove CLDEMOTE for clients
https://gcc.gnu.org/g:b5bdebeca28e19422bbd4e48fa6fc50371520cfe commit r13-9771-gb5bdebeca28e19422bbd4e48fa6fc50371520cfe Author: Haochen Jiang Date: Tue Jun 17 14:08:38 2025 +0800 i386: Remove CLDEMOTE for clients CLDEMOTE is not enabled on clients according to SDM. SDM only mentioned it will be enabled on Xeon and Atom servers, not clients. Remove them since Alder Lake (where it is introduced). gcc/ChangeLog: * config/i386/i386.h (PTA_ALDERLAKE): Use PTA_GOLDMONT_PLUS as base to remove PTA_CLDEMOTE. (PTA_SIERRAFOREST): Add PTA_CLDEMOTE since PTA_ALDERLAKE does not include that anymore. * doc/invoke.texi: Update texi file. Diff: --- gcc/config/i386/i386.h | 8 +--- gcc/doc/invoke.texi| 10 +- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index c147ff8732c9..d147d373e8c9 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -2362,12 +2362,14 @@ constexpr wide_int_bitmask PTA_GOLDMONT_PLUS = PTA_GOLDMONT | PTA_RDPID | PTA_SGX | PTA_PTWRITE; constexpr wide_int_bitmask PTA_TREMONT = PTA_GOLDMONT_PLUS | PTA_CLWB | PTA_GFNI | PTA_MOVDIRI | PTA_MOVDIR64B | PTA_CLDEMOTE | PTA_WAITPKG; -constexpr wide_int_bitmask PTA_ALDERLAKE = PTA_TREMONT | PTA_ADX | PTA_AVX +constexpr wide_int_bitmask PTA_ALDERLAKE = PTA_GOLDMONT_PLUS | PTA_CLWB + | PTA_GFNI | PTA_MOVDIRI | PTA_MOVDIR64B | PTA_WAITPKG | PTA_ADX | PTA_AVX | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_F16C | PTA_FMA | PTA_LZCNT | PTA_PCONFIG | PTA_PKU | PTA_VAES | PTA_VPCLMULQDQ | PTA_SERIALIZE | PTA_HRESET | PTA_KL | PTA_WIDEKL | PTA_AVXVNNI; -constexpr wide_int_bitmask PTA_SIERRAFOREST = PTA_ALDERLAKE | PTA_AVXIFMA - | PTA_AVXVNNIINT8 | PTA_AVXNECONVERT | PTA_CMPCCXADD | PTA_ENQCMD | PTA_UINTR; +constexpr wide_int_bitmask PTA_SIERRAFOREST = PTA_ALDERLAKE | PTA_CLDEMOTE + | PTA_AVXIFMA | PTA_AVXVNNIINT8 | PTA_AVXNECONVERT | PTA_CMPCCXADD + | PTA_ENQCMD | PTA_UINTR; constexpr wide_int_bitmask PTA_GRANITERAPIDS = PTA_SAPPHIRERAPIDS | PTA_AMX_FP16 | PTA_PREFETCHI; constexpr wide_int_bitmask PTA_GRANITERAPIDS_D = PTA_GRANITERAPIDS diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index b80966e13539..00d2e4950a38 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -32565,11 +32565,11 @@ VPCLMULQDQ, AVX512BITALG, RDPID and AVX512VPOPCNTDQ instruction set support. @item alderlake @itemx raptorlake @itemx meteorlake -Intel Alder Lake/Raptor Lake/Meteor Lake CPU with 64-bit extensions, MOVBE, MMX, -SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, POPCNT, AES, PREFETCHW, PCLMUL, RDRND, -XSAVE, XSAVEC, XSAVES, XSAVEOPT, FSGSBASE, PTWRITE, RDPID, SGX, GFNI-SSE, CLWB, -MOVDIRI, MOVDIR64B, CLDEMOTE, WAITPKG, ADCX, AVX, AVX2, BMI, BMI2, F16C, FMA, -LZCNT, PCONFIG, PKU, VAES, VPCLMULQDQ, SERIALIZE, HRESET, KL, WIDEKL and +Intel Alder Lake/Raptor Lake/Meteor Lake CPU with 64-bit extensions, MOVBE, +MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, POPCNT, AES, PREFETCHW, PCLMUL, +RDRND, XSAVE, XSAVEC, XSAVES, XSAVEOPT, FSGSBASE, PTWRITE, RDPID, SGX, +GFNI-SSE, CLWB, MOVDIRI, MOVDIR64B, WAITPKG, ADCX, AVX, AVX2, BMI, BMI2, F16C, +FMA, LZCNT, PCONFIG, PKU, VAES, VPCLMULQDQ, SERIALIZE, HRESET, KL, WIDEKL and AVX-VNNI instruction set support. @item sapphirerapids
[gcc(refs/users/meissner/heads/work212)] Update ChangeLog.*
https://gcc.gnu.org/g:ad8517009d27942cfcb9045b250a940c84d08752 commit ad8517009d27942cfcb9045b250a940c84d08752 Author: Michael Meissner Date: Tue Jun 24 22:14:58 2025 -0400 Update ChangeLog.* Diff: --- gcc/ChangeLog.meissner | 77 ++ 1 file changed, 77 insertions(+) diff --git a/gcc/ChangeLog.meissner b/gcc/ChangeLog.meissner index 06910da6ae46..1e1f67fc1301 100644 --- a/gcc/ChangeLog.meissner +++ b/gcc/ChangeLog.meissner @@ -1,3 +1,80 @@ + Branch work212, patch #3 + +Add -mcpu=future tests. + +This is patch #3 of 4 to add -mcpu=future support to the PowerPC. + +This patch adds simple tests for -mcpu=future. + +I have tested these patches on both big endian and little endian PowerPC +servers, with no regressions. Can I check these patchs into the trunk? + +2025-06-13 Michael Meissner + +gcc/testsuite/ + + * gcc.target/powerpc/future-1.c: New test. + * gcc.target/powerpc/future-2.c: Likewise. + + Branch work212, patch #2 + +Add -mcpu=future tuning support. + +This is patch #2 of 4 to add -mcpu=future support to the PowerPC. + +This patch makes -mtune=future use the same tuning decision as -mtune=power10 or +-mtune=power11. + +I have tested these patches on both big endian and little endian PowerPC +servers, with no regressions. Can I check these patchs into the trunk? + +2025-06-13 Michael Meissner + +gcc/ + + * config/rs6000/power10.md (all reservations): Add future as an + alterntive to power10 and power11. + + Branch work212, patch #1 + +Add support for -mcpu=future + +This is patch #1 of 4 that adds the support that can be used in developing GCC +support for future PowerPC processors. + +I have tested these patches on both big endian and little endian PowerPC +servers, with no regressions. Can I check these patchs into the trunk? + +2025-06-13 Michael Meissner + + * config.gcc (powerpc*-*-*): Add support for --with-cpu=future. + * config/rs6000/aix71.h (ASM_CPU_SPEC): Add support for -mcpu=future. + * config/rs6000/aix72.h (ASM_CPU_SPEC): Likewise. + * config/rs6000/aix73.h (ASM_CPU_SPEC): Likewise. + * config/rs6000/driver-rs6000.cc (asm_names): Likewise. + * config/rs6000/rs6000-c.cc (rs6000_target_modify_macros): If + -mcpu=future, define _ARCH_FUTURE. + * config/rs6000/rs6000-cpus.def (FUTURE_MASKS_SERVER): New macro. + (POWERPC_MASKS): Add OPTION_MASK_FUTURE. + (future cpu): Define. + * config/rs6000/rs6000-opts.h (enum processor_type): Add + PROCESSOR_FUTURE. + * config/rs6000/rs6000-tables.opt: Regenerate. + * config/rs6000/rs6000.cc (power10_cost): Update comment. + (get_arch_flags): Add support for future processor. + (rs6000_option_override_internal): Likewise. + (rs6000_machine_from_flags): Likewise. + (rs6000_reassociation_width): Likewise. + (rs6000_adjust_cost): Likewise. + (rs6000_issue_rate): Likewise. + (rs6000_sched_reorder): Likewise. + (rs6000_sched_reorder2): Likewise. + (rs6000_register_move_cost): Likewise. + (rs6000_opt_masks): Add -mfuture. + * config/rs6000/rs6000.h (ASM_CPU_SPEC): Likewise. + * config/rs6000/rs6000.md (cpu attribute): Likewise. + * config/rs6000/rs6000.opt (-mfuture): New internal option. + Branch work212, baseline 2025-06-24 Michael Meissner
[gcc(refs/users/meissner/heads/work212)] Add ChangeLog.meissner and REVISION.
https://gcc.gnu.org/g:09be5ec3d304a9f9c7baaf9f719d854fd679678c commit 09be5ec3d304a9f9c7baaf9f719d854fd679678c Author: Michael Meissner Date: Tue Jun 24 12:00:53 2025 -0400 Add ChangeLog.meissner and REVISION. 2025-06-24 Michael Meissner gcc/ * REVISION: New file for branch. * ChangeLog.meissner: New file. gcc/c-family/ * ChangeLog.meissner: New file. gcc/c/ * ChangeLog.meissner: New file. gcc/cp/ * ChangeLog.meissner: New file. gcc/fortran/ * ChangeLog.meissner: New file. gcc/testsuite/ * ChangeLog.meissner: New file. libgcc/ * ChangeLog.meissner: New file. Diff: --- gcc/ChangeLog.meissner | 38 ++ gcc/REVISION | 1 + gcc/c-family/ChangeLog.meissner | 38 ++ gcc/c/ChangeLog.meissner | 38 ++ gcc/cp/ChangeLog.meissner| 38 ++ gcc/fortran/ChangeLog.meissner | 38 ++ gcc/testsuite/ChangeLog.meissner | 38 ++ libgcc/ChangeLog.meissner| 38 ++ libstdc++-v3/ChangeLog.meissner | 38 ++ 9 files changed, 305 insertions(+) diff --git a/gcc/ChangeLog.meissner b/gcc/ChangeLog.meissner new file mode 100644 index ..06910da6ae46 --- /dev/null +++ b/gcc/ChangeLog.meissner @@ -0,0 +1,38 @@ + Branch work212, baseline + +2025-06-24 Michael Meissner + +Add ChangeLog.meissner and REVISION. + +2025-06-24 Michael Meissner + +gcc/ + + * REVISION: New file for branch. + * ChangeLog.meissner: New file. + +gcc/c-family/ + + * ChangeLog.meissner: New file. + +gcc/c/ + + * ChangeLog.meissner: New file. + +gcc/cp/ + + * ChangeLog.meissner: New file. + +gcc/fortran/ + + * ChangeLog.meissner: New file. + +gcc/testsuite/ + + * ChangeLog.meissner: New file. + +libgcc/ + + * ChangeLog.meissner: New file. + + Clone branch diff --git a/gcc/REVISION b/gcc/REVISION new file mode 100644 index ..cb5771ab4fa7 --- /dev/null +++ b/gcc/REVISION @@ -0,0 +1 @@ +work212 branch diff --git a/gcc/c-family/ChangeLog.meissner b/gcc/c-family/ChangeLog.meissner new file mode 100644 index ..06910da6ae46 --- /dev/null +++ b/gcc/c-family/ChangeLog.meissner @@ -0,0 +1,38 @@ + Branch work212, baseline + +2025-06-24 Michael Meissner + +Add ChangeLog.meissner and REVISION. + +2025-06-24 Michael Meissner + +gcc/ + + * REVISION: New file for branch. + * ChangeLog.meissner: New file. + +gcc/c-family/ + + * ChangeLog.meissner: New file. + +gcc/c/ + + * ChangeLog.meissner: New file. + +gcc/cp/ + + * ChangeLog.meissner: New file. + +gcc/fortran/ + + * ChangeLog.meissner: New file. + +gcc/testsuite/ + + * ChangeLog.meissner: New file. + +libgcc/ + + * ChangeLog.meissner: New file. + + Clone branch diff --git a/gcc/c/ChangeLog.meissner b/gcc/c/ChangeLog.meissner new file mode 100644 index ..06910da6ae46 --- /dev/null +++ b/gcc/c/ChangeLog.meissner @@ -0,0 +1,38 @@ + Branch work212, baseline + +2025-06-24 Michael Meissner + +Add ChangeLog.meissner and REVISION. + +2025-06-24 Michael Meissner + +gcc/ + + * REVISION: New file for branch. + * ChangeLog.meissner: New file. + +gcc/c-family/ + + * ChangeLog.meissner: New file. + +gcc/c/ + + * ChangeLog.meissner: New file. + +gcc/cp/ + + * ChangeLog.meissner: New file. + +gcc/fortran/ + + * ChangeLog.meissner: New file. + +gcc/testsuite/ + + * ChangeLog.meissner: New file. + +libgcc/ + + * ChangeLog.meissner: New file. + + Clone branch diff --git a/gcc/cp/ChangeLog.meissner b/gcc/cp/ChangeLog.meissner new file mode 100644 index ..06910da6ae46 --- /dev/null +++ b/gcc/cp/ChangeLog.meissner @@ -0,0 +1,38 @@ + Branch work212, baseline + +2025-06-24 Michael Meissner + +Add ChangeLog.meissner and REVISION. + +2025-06-24 Michael Meissner + +gcc/ + + * REVISION: New file for branch. + * ChangeLog.meissner: New file. + +gcc/c-family/ + + * ChangeLog.meissner: New file. + +gcc/c/ + + * ChangeLog.meissner: New file. + +gcc/cp/ + + * ChangeLog.meissner: New file. + +gcc/fortran/ + + * ChangeLog.meissner: New file. + +gcc/testsuite/ + + * ChangeLog.meissner: New file. + +libgcc/ + + * ChangeLog.meissner: New file. + + Clone branch diff --git a/gcc/fortran/ChangeLog.meissner b/gcc/fortran/ChangeLog.meissner new file mode 100644 inde
[gcc r16-1665] RISC-V: Add Profiles RVA/B23S64 support.
https://gcc.gnu.org/g:e858dc702147b7de560afad165e7f16e3ee7d6c9 commit r16-1665-ge858dc702147b7de560afad165e7f16e3ee7d6c9 Author: Jiawei Date: Tue Jun 24 17:34:05 2025 +0800 RISC-V: Add Profiles RVA/B23S64 support. This patch adds support for the RISC-V Profiles RVA23S64 and RVB23S64. gcc/ChangeLog: * common/config/riscv/riscv-common.cc: New Profiles. gcc/testsuite/ChangeLog: * gcc.target/riscv/arch-rva23s.c: New test. * gcc.target/riscv/arch-rvb23s.c: New test. Diff: --- gcc/common/config/riscv/riscv-common.cc | 18 +- gcc/testsuite/gcc.target/riscv/arch-rva23s.c | 14 ++ gcc/testsuite/gcc.target/riscv/arch-rvb23s.c | 12 3 files changed, 43 insertions(+), 1 deletion(-) diff --git a/gcc/common/config/riscv/riscv-common.cc b/gcc/common/config/riscv/riscv-common.cc index 3c25848ccd38..82037a334528 100644 --- a/gcc/common/config/riscv/riscv-common.cc +++ b/gcc/common/config/riscv/riscv-common.cc @@ -295,6 +295,15 @@ static const riscv_profiles riscv_profiles_table[] = "_zicboz_zfhmin_zkt_zvfhmin_zvbb_zvkt_zihintntl_zicond_zimop_zcmop_zcb" "_zfa_zawrs_supm"}, + /* RVA23S contains all mandatory base ISA for RVA23U64 and the privileged + extensions as mandatory extensions. */ + {"rva23s64", "rv64imafdcbv_zicsr_zicntr_zihpm_ziccif_ziccrse_ziccamoa" + "_zicclsm_zic64b_za64rs_zihintpause_zba_zbb_zbs_zicbom_zicbop" + "_zicboz_zfhmin_zkt_zvfhmin_zvbb_zvkt_zihintntl_zicond_zimop_zcmop_zcb" + "_zfa_zawrs_svbare_svade_ssccptr_sstvecd_sstvala_sscounterenw_svpbmt" + "_svinval_svnapot_sstc_sscofpmf_ssnpm_ssu64xl_sha_supm" + }, + /* RVB23 contains all mandatory base ISA for RVA22U64 and the new extension 'zihintntl,zicond,zimop,zcmop,zfa,zawrs' as mandatory extensions. */ @@ -303,7 +312,14 @@ static const riscv_profiles riscv_profiles_table[] = "_zicboz_zfhmin_zkt_zihintntl_zicond_zimop_zcmop_zcb" "_zfa_zawrs"}, - /* Currently we do not define S/M mode Profiles in gcc part. */ + /* RVB23S contains all mandatory base ISA for RVB23U64 and the privileged + extensions as mandatory extensions. */ + {"rvb23s64", "rv64imafdcb_zicsr_zicntr_zihpm_ziccif_ziccrse_ziccamoa" + "_zicclsm_zic64b_za64rs_zihintpause_zba_zbb_zbs_zicbom_zicbop" + "_zicboz_zfhmin_zkt_zvfhmin_zvbb_zvkt_zihintntl_zicond_zimop_zcmop_zcb" + "_zfa_zawrs_svbare_svade_ssccptr_sstvecd_sstvala_sscounterenw_svpbmt" + "_svinval_svnapot_sstc_sscofpmf_ssu64xl_supm" + }, /* Terminate the list. */ {NULL, NULL} diff --git a/gcc/testsuite/gcc.target/riscv/arch-rva23s.c b/gcc/testsuite/gcc.target/riscv/arch-rva23s.c new file mode 100644 index ..215249d52b14 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/arch-rva23s.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rva23s64 -mabi=lp64d" } */ + +void foo(){} + +/* { dg-final { scan-assembler-times ".attribute arch, \"rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0" +"_b1p0_v1p0_zic64b1p0_zicbom1p0_zicbop1p0_zicboz1p0_ziccamoa1p0_ziccif1p0_zicclsm1p0" +"_ziccrse1p0_zicntr2p0_zicond1p0_zicsr2p0_zihintntl1p0_zihintpause2p0_zihpm2p0_zimop1p0" +"_za64rs1p0_zaamo1p0_zalrsc1p0_zawrs1p0_zfa1p0_zfhmin1p0_zca1p0_zcb1p0_zcd1p0_zcmop1p0" +"_zba1p0_zbb1p0_zbs1p0_zkt1p0_zvbb1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0" +"_zvfhmin1p0_zvkb1p0_zvkt1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0_sha1p0_shcounterenw1p0" +"_shgatpa1p0_shtvala1p0_shvsatpa1p0_shvstvala1p0_shvstvecd1p0_ssccptr1p0_sscofpmf1p0" +"_sscounterenw1p0_ssnpm1p0_ssstateen1p0_sstc1p0_sstvala1p0_sstvecd1p0_ssu64xl1p0_supm1p0" +"_svade1p0_svbare1p0_svinval1p0_svnapot1p0_svpbmt1p0\" 1} } */ diff --git a/gcc/testsuite/gcc.target/riscv/arch-rvb23s.c b/gcc/testsuite/gcc.target/riscv/arch-rvb23s.c new file mode 100644 index ..aa71f7dad7d7 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/arch-rvb23s.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rvb23s64 -mabi=lp64d" } */ + +void foo(){} + +/* { dg-final { scan-assembler-times ".attribute arch, \"rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0" +"_b1p0_zic64b1p0_zicbom1p0_zicbop1p0_zicboz1p0_ziccamoa1p0_ziccif1p0_zicclsm1p0_ziccrse1p0" +"_zicntr2p0_zicond1p0_zicsr2p0_zihintntl1p0_zihintpause2p0_zihpm2p0_zimop1p0_zmmul1p0" +"_za64rs1p0_zaamo1p0_zalrsc1p0_zawrs1p0_zfa1p0_zfhmin1p0_zca1p0_zcb1p0_zcd1p0_zcmop1p0" +"_zba1p0_zbb1p0_zbs1p0_zkt1p0_zvbb1p0_zve32f1p0_zve32x1p0_zvfhmin1p0_zvkb1p0_zvkt1p0" +"_zvl32b1p0_ssccptr1p0_sscofpmf1p0_sscounterenw1p0_sstc1p0_sstvala1p0_sstvecd1p0" +"_ssu64xl1p0_supm1p0_svade1p0_svbare1p0_svinval1p0_svnapot1p0_svpbmt1p0\" 1} } */
[gcc r12-11214] c++/79786 - bougs invocation of DATA_ABI_ALIGNMENT macro
https://gcc.gnu.org/g:32ad5415b926ca25e9102309e92561c1a30aa8ff commit r12-11214-g32ad5415b926ca25e9102309e92561c1a30aa8ff Author: Richard Biener Date: Mon Feb 3 11:27:20 2025 +0100 c++/79786 - bougs invocation of DATA_ABI_ALIGNMENT macro The first argument is supposed to be a type, not a decl. PR c++/79786 gcc/cp/ * rtti.cc (emit_tinfo_decl): Fix DATA_ABI_ALIGNMENT invocation. (cherry picked from commit 6ec19825b4e72611cdbd4749feed67b61392aa81) Diff: --- gcc/cp/rtti.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/gcc/cp/rtti.cc b/gcc/cp/rtti.cc index f5b43ec0fb25..ac2ee70df634 100644 --- a/gcc/cp/rtti.cc +++ b/gcc/cp/rtti.cc @@ -1707,7 +1707,8 @@ emit_tinfo_decl (tree decl) /* Avoid targets optionally bumping up the alignment to improve vector instruction accesses, tinfo are never accessed this way. */ #ifdef DATA_ABI_ALIGNMENT - SET_DECL_ALIGN (decl, DATA_ABI_ALIGNMENT (decl, TYPE_ALIGN (TREE_TYPE (decl; + SET_DECL_ALIGN (decl, DATA_ABI_ALIGNMENT (TREE_TYPE (decl), + TYPE_ALIGN (TREE_TYPE (decl; DECL_USER_ALIGN (decl) = true; #endif return true;
[gcc r12-11222] tree-optimization/112859 - add comment
https://gcc.gnu.org/g:e8b18dc45950f80d74e62ac7b4e3fa51d430d9cc commit r12-11222-ge8b18dc45950f80d74e62ac7b4e3fa51d430d9cc Author: Richard Biener Date: Tue Jan 28 15:01:25 2025 +0100 tree-optimization/112859 - add comment This adds a comment before the workaround, indicating flaky dependence analysis. PR tree-optimization/112859 * tree-loop-distribution.cc (loop_distribution::pg_add_dependence_edges): Add comment. (cherry picked from commit 3ccbc8c9d182c380e396631b2b5a683de4fddba9) Diff: --- gcc/tree-loop-distribution.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/gcc/tree-loop-distribution.cc b/gcc/tree-loop-distribution.cc index 7b4fad238d5d..8629b7fa75ac 100644 --- a/gcc/tree-loop-distribution.cc +++ b/gcc/tree-loop-distribution.cc @@ -2137,7 +2137,9 @@ loop_distribution::pg_add_dependence_edges (struct graph *rdg, int dir, this_dir = -this_dir; } /* When then dependence distance of the innermost common -loop of the DRs is zero we have a conflict. */ +loop of the DRs is zero we have a conflict. This is +due to wonky dependence analysis which sometimes +ends up using a zero distance in place of unknown. */ auto l1 = gimple_bb (DR_STMT (dr1))->loop_father; auto l2 = gimple_bb (DR_STMT (dr2))->loop_father; int idx = index_in_loop_nest (find_common_loop (l1, l2)->num,
[gcc(refs/users/meissner/heads/work212)] Add -mcpu=future support.
https://gcc.gnu.org/g:777bcd6059e031450bb0c068446d070954979895 commit 777bcd6059e031450bb0c068446d070954979895 Author: Michael Meissner Date: Tue Jun 24 21:19:28 2025 -0400 Add -mcpu=future support. This is patch #1 of 3 that adds the support that can be used in developing GCC support for potential future PowerPC processors. With all 3 patches, the tuning for the 'future' processor is the same as power10 and power11. It may be in the future this tuning will change as any future PowerPC processor evolves. Patch #2 will change the tuning support in power10.md to treat -mtune=future the same as -mtune=power10. Patch #3 will add tests for -mcpu=future. These changes are being added so that hardware designers can evaluate potential new features to be added to the PowerPC processors in the future. It may be these features will be incorporated into real hardware using a different name in the future. Or it may be these features will not be incoporated into actual PowerPC hardware in the future. I have rewritten these patches to make it easier in the future to add new processors that scheduled like power10 and power11 systems, or remove the 'future' cpu from being scheduled like a power10 if we add new tuning characteristics for potential future processors. I added a new macro (CASE_PROCESSOR_POWER10_TUNING) that expands to a set of PROCESSOR_ cases for processors that should be tuned like a power10 processor. In this patch, power10, power11, and future are selected. I also added a new inline function (power10_tuning_p) that returns true if the processor is to be scheduled like a power10. I have modified the various ASM_CPU_SPEC macros to pass -mfuture to the assembler if -mcpu=future wa used. I have updated config.guess to allow the user to configure the GCC compiler using the --with-cpu=future option. I have tested these patches on both big endian and little endian PowerPC servers, with no regressions. Can I check these patchs into the trunk? 2025-06-24 Michael Meissner gcc/ * config.guess (powerpc*-*-*): Add support for using --with-cpu=future. * config/rs6000/aix71.h (ASM_CPU_SPEC): Pass -mfuture to the assembler if -mcpu=future was used on the command line. * config/rs6000/aix72.h (ASM_CPU_SPEC): Likewise. * config/rs6000/aix73.h (ASM_CPU_SPEC): Likewise. * config/rs6000/driver-rs6000.c (asm_names): Likewise. * config/rs6000/rs6000-c.cc (rs6000_target_modify_macros): If -mcpu=future, define the macro _ARCH_FUTURE. * config/rs6000/rs6000-cpus.def (FUTURE_MASKS_SERVER): New macro. (POWERPC_MASKS): Add support for the internal -mfuture that is set by the -mcpu=future option. (future cpu): Add support for -mcpu=future. * config/rs6000/rs6000-opts.h (enum processor_type): Add PROCESSOR_FUTURE. (CASE_PROCESSOR_POWER10_TUNING): New macro. (power10_tuning_p): New inline helper function. * config/rs6000/rs6000-string.cc (expand_compare_loop): Convert PROCESSOR_POWER10 and PROCESSOR_POWER11 cases into CASE_PROCESSOR_POWER10_TUNING to allow -mcpu=future as well. * config/rs6000/rs6000-tables.opt: Regenerate the table, adding the future cpu to the enumeration. * config/rs6000/rs6000.cc (rs6000_option_override_internal): Change tests against PROCESSOR_POWER10 and PROCESSOR_POWER11 to include PROCESSOR_FUTURE as well. (rs6000_reassociation_width): Likewise. (rs6000_adjust_cost): Likewise. (rs6000_issue_rate): Likewise. (rs6000_sched_reorder): Likewise. (rs6000_sched_reorder2): Likewise. (rs6000_register_move_cost): Likewise. * config/rs6000/rs6000.md (cpu attribute): Add future cpu. * config/rs6000/rs6000.h (ASM_CPU_SPEC): Pass -mfuture to the assembler if -mcpu=future was used on the command line. * config/rs6000/rs6000.opt (-mfuture): New internal ISA bit for -mcpu=future. Diff: --- gcc/config.gcc | 4 ++-- gcc/config/rs6000/aix71.h | 1 + gcc/config/rs6000/aix72.h | 1 + gcc/config/rs6000/aix73.h | 1 + gcc/config/rs6000/driver-rs6000.cc | 2 ++ gcc/config/rs6000/rs6000-c.cc | 2 ++ gcc/config/rs6000/rs6000-cpus.def | 5 + gcc/config/rs6000/rs6000-opts.h | 22 + gcc/config/rs6000/rs6000-string.cc | 3 +-- gcc/config/rs6000/rs6000-tables.opt | 11 +++ gcc/config/rs6000/rs6000.cc | 39 + gcc/config/rs6000/rs6000.h | 1 + gcc/config/rs6000/rs6000.md
[gcc(refs/users/meissner/heads/work212)] Add -mcpu=future tests.
https://gcc.gnu.org/g:bee22599ac1ac4cc217dbfc432eedb0a4be351ef commit bee22599ac1ac4cc217dbfc432eedb0a4be351ef Author: Michael Meissner Date: Tue Jun 24 22:12:44 2025 -0400 Add -mcpu=future tests. This is patch #3 of 3 to add -mcpu=future support to the PowerPC. Compared to the previous version of tis patch, I update a comment to say _ARCH_FUTURE instead of _ARCH_PWR11 that was a typo. This patch adds simple tests for -mcpu=future. I have tested these patches on both big endian and little endian PowerPC servers, with no regressions. Can I check these patchs into the trunk? 2025-06-24 Michael Meissner gcc/testsuite/ * gcc.target/powerpc/future-1.c: New test. * gcc.target/powerpc/future-2.c: Likewise. Diff: --- gcc/testsuite/gcc.target/powerpc/future-1.c | 13 + gcc/testsuite/gcc.target/powerpc/future-2.c | 24 2 files changed, 37 insertions(+) diff --git a/gcc/testsuite/gcc.target/powerpc/future-1.c b/gcc/testsuite/gcc.target/powerpc/future-1.c index e69de29bb2d1..7bd8e5ddbd00 100644 --- a/gcc/testsuite/gcc.target/powerpc/future-1.c +++ b/gcc/testsuite/gcc.target/powerpc/future-1.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-options "-mdejagnu-cpu=future -O2" } */ + +/* Basic check to see if the compiler supports -mcpu=future and if it defines + _ARCH_FUTURE. */ + +#ifndef _ARCH_FUTURE +#error "-mcpu=future is not supported" +#endif + +void foo (void) +{ +} diff --git a/gcc/testsuite/gcc.target/powerpc/future-2.c b/gcc/testsuite/gcc.target/powerpc/future-2.c index e69de29bb2d1..5552cefa3c2e 100644 --- a/gcc/testsuite/gcc.target/powerpc/future-2.c +++ b/gcc/testsuite/gcc.target/powerpc/future-2.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +/* Check if we can set the future target via a target attribute. */ + +__attribute__((__target__("cpu=power9"))) +void foo_p9 (void) +{ +} + +__attribute__((__target__("cpu=power10"))) +void foo_p10 (void) +{ +} + +__attribute__((__target__("cpu=power11"))) +void foo_p11 (void) +{ +} + +__attribute__((__target__("cpu=future"))) +void foo_future (void) +{ +}
[gcc(refs/users/meissner/heads/work212)] Add -mcpu=future tuning support.
https://gcc.gnu.org/g:3b0584f4a13647fd8e935400573af52f0fd0abb2 commit 3b0584f4a13647fd8e935400573af52f0fd0abb2 Author: Michael Meissner Date: Tue Jun 24 22:05:11 2025 -0400 Add -mcpu=future tuning support. This is patch #2 of 3 to add -mcpu=future support to the PowerPC. This patch makes -mtune=future use the same tuning decision as -mtune=power10 or -mtune=power11. I added a new attribute (power10_tuning) that says whether the current processor is tuned like a power10. This is true for power10, power11, and future processors. I modified all of the: (eq_attr "cpu" "power10,power11") tests to: (eq_attr "power10_tuning" "yes") This will allow us to make one change to add new processors that also use the power10 tuning rules, or we can easily remove processors. For example, we might want to modify the -mtune=future rules in the future. I have tested these patches on both big endian and little endian PowerPC servers, with no regressions. Can I check these patchs into the trunk? 2025-06-24 Michael Meissner gcc/ * config/rs6000/power10.md (all reservations): Switch to use the "power10_tuning" attribute for deciding if the current processor is tuned like a power10. * config/rs6000/rs6000.md (power10_tuning): New attribute. Diff: --- gcc/config/rs6000/power10.md| 142 ++-- gcc/config/rs6000/rs6000.md | 7 ++ gcc/testsuite/gcc.target/powerpc/future-1.c | 0 gcc/testsuite/gcc.target/powerpc/future-2.c | 0 4 files changed, 78 insertions(+), 71 deletions(-) diff --git a/gcc/config/rs6000/power10.md b/gcc/config/rs6000/power10.md index fd31b16b3314..0efeeb896025 100644 --- a/gcc/config/rs6000/power10.md +++ b/gcc/config/rs6000/power10.md @@ -97,12 +97,12 @@ (eq_attr "update" "no") (eq_attr "size" "!128") (eq_attr "prefixed" "no") - (eq_attr "cpu" "power10,power11")) + (eq_attr "power10_tuning" "yes")) "DU_any_power10,LU_power10") (define_insn_reservation "power10-fused-load" 4 (and (eq_attr "type" "fused_load_cmpi,fused_addis_load,fused_load_load") - (eq_attr "cpu" "power10,power11")) + (eq_attr "power10_tuning" "yes")) "DU_even_power10,LU_power10") (define_insn_reservation "power10-prefixed-load" 4 @@ -110,13 +110,13 @@ (eq_attr "update" "no") (eq_attr "size" "!128") (eq_attr "prefixed" "yes") - (eq_attr "cpu" "power10,power11")) + (eq_attr "power10_tuning" "yes")) "DU_even_power10,LU_power10") (define_insn_reservation "power10-load-update" 4 (and (eq_attr "type" "load") (eq_attr "update" "yes") - (eq_attr "cpu" "power10,power11")) + (eq_attr "power10_tuning" "yes")) "DU_even_power10,LU_power10+SXU_power10") (define_insn_reservation "power10-fpload-double" 4 @@ -124,7 +124,7 @@ (eq_attr "update" "no") (eq_attr "size" "64") (eq_attr "prefixed" "no") - (eq_attr "cpu" "power10,power11")) + (eq_attr "power10_tuning" "yes")) "DU_any_power10,LU_power10") (define_insn_reservation "power10-prefixed-fpload-double" 4 @@ -132,14 +132,14 @@ (eq_attr "update" "no") (eq_attr "size" "64") (eq_attr "prefixed" "yes") - (eq_attr "cpu" "power10,power11")) + (eq_attr "power10_tuning" "yes")) "DU_even_power10,LU_power10") (define_insn_reservation "power10-fpload-update-double" 4 (and (eq_attr "type" "fpload") (eq_attr "update" "yes") (eq_attr "size" "64") - (eq_attr "cpu" "power10,power11")) + (eq_attr "power10_tuning" "yes")) "DU_even_power10,LU_power10+SXU_power10") ; SFmode loads are cracked and have additional 3 cycles over DFmode @@ -148,27 +148,27 @@ (and (eq_attr "type" "fpload") (eq_attr "update" "no") (eq_attr "size" "32") - (eq_attr "cpu" "power10,power11")) + (eq_attr "power10_tuning" "yes")) "DU_even_power10,LU_power10") (define_insn_reservation "power10-fpload-update-single" 7 (and (eq_attr "type" "fpload") (eq_attr "update" "yes") (eq_attr "size" "32") - (eq_attr "cpu" "power10,power11")) + (eq_attr "power10_tuning" "yes")) "DU_even_power10,LU_power10+SXU_power10") (define_insn_reservation "power10-vecload" 4 (and (eq_attr "type" "vecload") (eq_attr "size" "!256") - (eq_attr "cpu" "power10,power11")) + (eq_attr "power10_tuning" "yes")) "DU_any_power10,LU_power10") ; lxvp (define_insn_reservation "power10-vecload-pair" 4 (and (eq_attr "type" "vecload") (eq_attr "size" "256") - (eq_attr "cpu" "power10,power11")) + (eq_attr "power10_tuning" "yes")) "DU_even_power10,LU_power10+SXU_power10") ; Store Unit @@ -178,12 +178,12 @@ (eq_attr "prefixed" "no") (eq_attr "
[gcc r12-11219] testsuite: add testcase for fixed PR107467
https://gcc.gnu.org/g:87d788926ba4ccca9a086c138584c10d1e63084d commit r12-11219-g87d788926ba4ccca9a086c138584c10d1e63084d Author: Sam James Date: Mon Oct 21 12:11:42 2024 +0100 testsuite: add testcase for fixed PR107467 PR107467 ended up being fixed by the fix for PR115110, but let's add the testcase on top. gcc/testsuite/ChangeLog: PR tree-optimization/107467 PR middle-end/115110 * g++.dg/lto/pr107467_0.C: New test. (cherry picked from commit 4e09ae37dbe0a10f48490214f50ff733cc92280a) Diff: --- gcc/testsuite/g++.dg/lto/pr107467_0.C | 52 +++ 1 file changed, 52 insertions(+) diff --git a/gcc/testsuite/g++.dg/lto/pr107467_0.C b/gcc/testsuite/g++.dg/lto/pr107467_0.C new file mode 100644 index ..a871aca82459 --- /dev/null +++ b/gcc/testsuite/g++.dg/lto/pr107467_0.C @@ -0,0 +1,52 @@ +/* { dg-lto-do run } */ +/* { dg-lto-options {{ -O2 -fno-strict-aliasing -flto }} } */ + +template +struct pair +{ +int first; +T second; +}; + +template +[[gnu::optimize("strict-aliasing")]] +bool __attribute__((noinline)) +compare_pairs(const pair &lhs, const pair &rhs) { + return lhs.first == rhs.first && lhs.second == rhs.second; +} + +template struct Combined { + pair +__attribute__((noinline)) get_const() { +return pair{123, nullptr}; + } +[[gnu::optimize("strict-aliasing")]] + bool +__attribute__((noinline)) clashy() { +return compare_pairs(get_const(), get_const()); + } +}; + +class SomeClass {}; +class OtherClass {}; + +[[gnu::optimize("strict-aliasing")]] +[[gnu::used]] +void some_func() { + Combined myvar; + __builtin_printf("%i\n", myvar.clashy()); +} + +[[gnu::optimize("strict-aliasing")]] +void other_func() { + Combined myvar; + int t = myvar.clashy(); + if (!t) + __builtin_abort(); +} + +[[gnu::optimize("O0")]] +int main() +{ + other_func(); +}
[gcc] Created branch 'meissner/heads/work212-orig' in namespace 'refs/users'
The branch 'meissner/heads/work212-orig' was created in namespace 'refs/users' pointing to: 63076dbe2153... Remove non-SLP path from vectorizable_load
[gcc r16-1666] i386: Remove CLDEMOTE for clients
https://gcc.gnu.org/g:0c701c7d5fb95681c6d4accfbd6382e99ebf0e82 commit r16-1666-g0c701c7d5fb95681c6d4accfbd6382e99ebf0e82 Author: Haochen Jiang Date: Wed Jun 25 10:34:37 2025 +0800 i386: Remove CLDEMOTE for clients CLDEMOTE is not enabled on clients according to SDM. SDM only mentioned it will be enabled on Xeon and Atom servers, not clients. Remove them since Alder Lake (where it is introduced). gcc/ChangeLog: * config/i386/i386.h (PTA_ALDERLAKE): Use PTA_GOLDMONT_PLUS as base to remove PTA_CLDEMOTE. (PTA_SIERRAFOREST): Add PTA_CLDEMOTE since PTA_ALDERLAKE does not include that anymore. * doc/invoke.texi: Update texi file. Diff: --- gcc/config/i386/i386.h | 8 +--- gcc/doc/invoke.texi| 29 ++--- 2 files changed, 19 insertions(+), 18 deletions(-) diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 812055085bb5..661fb8e7e52c 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -2446,12 +2446,14 @@ constexpr wide_int_bitmask PTA_GOLDMONT_PLUS = PTA_GOLDMONT | PTA_RDPID | PTA_SGX | PTA_PTWRITE; constexpr wide_int_bitmask PTA_TREMONT = PTA_GOLDMONT_PLUS | PTA_CLWB | PTA_GFNI | PTA_MOVDIRI | PTA_MOVDIR64B | PTA_CLDEMOTE | PTA_WAITPKG; -constexpr wide_int_bitmask PTA_ALDERLAKE = PTA_TREMONT | PTA_ADX | PTA_AVX +constexpr wide_int_bitmask PTA_ALDERLAKE = PTA_GOLDMONT_PLUS | PTA_CLWB + | PTA_GFNI | PTA_MOVDIRI | PTA_MOVDIR64B | PTA_WAITPKG | PTA_ADX | PTA_AVX | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_F16C | PTA_FMA | PTA_LZCNT | PTA_PCONFIG | PTA_PKU | PTA_VAES | PTA_VPCLMULQDQ | PTA_SERIALIZE | PTA_HRESET | PTA_KL | PTA_WIDEKL | PTA_AVXVNNI; -constexpr wide_int_bitmask PTA_SIERRAFOREST = PTA_ALDERLAKE | PTA_AVXIFMA - | PTA_AVXVNNIINT8 | PTA_AVXNECONVERT | PTA_CMPCCXADD | PTA_ENQCMD | PTA_UINTR; +constexpr wide_int_bitmask PTA_SIERRAFOREST = PTA_ALDERLAKE | PTA_CLDEMOTE + | PTA_AVXIFMA | PTA_AVXVNNIINT8 | PTA_AVXNECONVERT | PTA_CMPCCXADD + | PTA_ENQCMD | PTA_UINTR; constexpr wide_int_bitmask PTA_GRANITERAPIDS = PTA_SAPPHIRERAPIDS | PTA_AMX_FP16 | PTA_PREFETCHI | PTA_AVX10_1; constexpr wide_int_bitmask PTA_GRANITERAPIDS_D = PTA_GRANITERAPIDS diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 95790f7bd171..100bdaf90451 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -34846,37 +34846,36 @@ VPCLMULQDQ, AVX512BITALG, RDPID and AVX512VPOPCNTDQ instruction set support. Intel Alder Lake/Raptor Lake/Meteor Lake/Gracemont CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, POPCNT, AES, PREFETCHW, PCLMUL, RDRND, XSAVE, XSAVEC, XSAVES, XSAVEOPT, FSGSBASE, PTWRITE, RDPID, SGX, -GFNI-SSE, CLWB, MOVDIRI, MOVDIR64B, CLDEMOTE, WAITPKG, ADCX, AVX, AVX2, BMI, -BMI2, F16C, FMA, LZCNT, PCONFIG, PKU, VAES, VPCLMULQDQ, SERIALIZE, HRESET, KL, -WIDEKL and AVX-VNNI instruction set support. +GFNI-SSE, CLWB, MOVDIRI, MOVDIR64B, WAITPKG, ADCX, AVX, AVX2, BMI, BMI2, F16C, +FMA, LZCNT, PCONFIG, PKU, VAES, VPCLMULQDQ, SERIALIZE, HRESET, KL, WIDEKL and +AVX-VNNI instruction set support. @item arrowlake Intel Arrow Lake CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, POPCNT, AES, PREFETCHW, PCLMUL, RDRND, XSAVE, XSAVEC, XSAVES, XSAVEOPT, FSGSBASE, PTWRITE, RDPID, SGX, GFNI-SSE, CLWB, MOVDIRI, -MOVDIR64B, CLDEMOTE, WAITPKG, ADCX, AVX, AVX2, BMI, BMI2, F16C, FMA, LZCNT, -PCONFIG, PKU, VAES, VPCLMULQDQ, SERIALIZE, HRESET, KL, WIDEKL, AVX-VNNI, -UINTR, AVXIFMA, AVXVNNIINT8, AVXNECONVERT and CMPCCXADD instruction set -support. +MOVDIR64B, WAITPKG, ADCX, AVX, AVX2, BMI, BMI2, F16C, FMA, LZCNT, PCONFIG, PKU, +VAES, VPCLMULQDQ, SERIALIZE, HRESET, KL, WIDEKL, AVX-VNNI, UINTR, AVXIFMA, +AVXVNNIINT8, AVXNECONVERT and CMPCCXADD instruction set support. @item arrowlake-s @itemx lunarlake Intel Arrow Lake S/Lunar Lake CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, POPCNT, AES, PREFETCHW, PCLMUL, RDRND, XSAVE, XSAVEC, XSAVES, XSAVEOPT, FSGSBASE, PTWRITE, RDPID, SGX, GFNI-SSE, CLWB, -MOVDIRI, MOVDIR64B, CLDEMOTE, WAITPKG, ADCX, AVX, AVX2, BMI, BMI2, F16C, FMA, -LZCNT, PCONFIG, PKU, VAES, VPCLMULQDQ, SERIALIZE, HRESET, KL, WIDEKL, AVX-VNNI, -UINTR, AVXIFMA, AVXVNNIINT8, AVXNECONVERT, CMPCCXADD, AVXVNNIINT16, SHA512, -SM3 and SM4 instruction set support. +MOVDIRI, MOVDIR64B, WAITPKG, ADCX, AVX, AVX2, BMI, BMI2, F16C, FMA, LZCNT, +PCONFIG, PKU, VAES, VPCLMULQDQ, SERIALIZE, HRESET, KL, WIDEKL, AVX-VNNI, UINTR, +AVXIFMA, AVXVNNIINT8, AVXNECONVERT, CMPCCXADD, AVXVNNIINT16, SHA512, SM3 and +SM4 instruction set support. @item pantherlake Intel Panther Lake CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, POPCNT, AES, PREFETCHW, PCLMUL, RDRND, XSAVE, XSAVEC, XSAVES, XSAVEOPT, FSGSBASE, PTWRITE, RDPID, SGX, GFNI-SSE, CLWB, MOVDIRI, -MOVDIR64B, CLDEMOTE, WAITPKG, ADCX, AVX, AVX2, BMI, BMI2, F16C, FMA, LZCNT, -PCONFIG, PK
[gcc] Created branch 'meissner/heads/work212-bugs' in namespace 'refs/users'
The branch 'meissner/heads/work212-bugs' was created in namespace 'refs/users' pointing to: 09be5ec3d304... Add ChangeLog.meissner and REVISION.
[gcc r16-1667] x86: Update -mtune=intel for Diamond Rapids/Clearwater Forest
https://gcc.gnu.org/g:7fd6cb3c8488465ae0529f543f5309584961503d commit r16-1667-g7fd6cb3c8488465ae0529f543f5309584961503d Author: H.J. Lu Date: Wed Jun 25 07:40:31 2025 +0800 x86: Update -mtune=intel for Diamond Rapids/Clearwater Forest -mtune=intel is used to generate a single binary to run well on both big core and small core, similar to hybrid CPUs. Update -mtune=intel to tune for Diamond Rapids and Clearwater Forest, instead of Silvermont. PR target/120815 * common/config/i386/i386-common.cc (processor_alias_table): Replace CPU_SLM/PTA_NEHALEM with CPU_HASWELL/PTA_HASWELL for PROCESSOR_INTEL. * config/i386/i386-options.cc (processor_cost_table): Replace intel_cost with alderlake_cost. * config/i386/x86-tune-costs.h (intel_cost): Removed. * config/i386/x86-tune-sched.cc (ix86_issue_rate): Treat PROCESSOR_INTEL like PROCESSOR_ALDERLAKE. (ix86_adjust_cost): Likewise. * doc/invoke.texi: Update -mtune=intel for Diamond Rapids and Clearwater Forest. Signed-off-by: H.J. Lu Diff: --- gcc/common/config/i386/i386-common.cc | 2 +- gcc/config/i386/i386-options.cc | 2 +- gcc/config/i386/x86-tune-costs.h | 121 -- gcc/config/i386/x86-tune-sched.cc | 4 +- gcc/doc/invoke.texi | 4 +- 5 files changed, 6 insertions(+), 127 deletions(-) diff --git a/gcc/common/config/i386/i386-common.cc b/gcc/common/config/i386/i386-common.cc index 64908ce740a9..dfcd4e9a7276 100644 --- a/gcc/common/config/i386/i386-common.cc +++ b/gcc/common/config/i386/i386-common.cc @@ -2310,7 +2310,7 @@ const pta processor_alias_table[] = M_CPU_TYPE (INTEL_GRANDRIDGE), P_PROC_AVX2}, {"clearwaterforest", PROCESSOR_CLEARWATERFOREST, CPU_HASWELL, PTA_CLEARWATERFOREST, M_CPU_TYPE (INTEL_CLEARWATERFOREST), P_PROC_AVX2}, - {"intel", PROCESSOR_INTEL, CPU_SLM, PTA_NEHALEM, + {"intel", PROCESSOR_INTEL, CPU_HASWELL, PTA_HASWELL, M_VENDOR (VENDOR_INTEL), P_NONE}, {"geode", PROCESSOR_GEODE, CPU_GEODE, PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE, 0, P_NONE}, diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc index d1e321ad74b1..27feeddaf812 100644 --- a/gcc/config/i386/i386-options.cc +++ b/gcc/config/i386/i386-options.cc @@ -797,7 +797,7 @@ static const struct processor_costs *processor_cost_table[] = &alderlake_cost, /* PROCESSOR_ARROWLAKE_S. */ &alderlake_cost, /* PROCESSOR_PANTHERLAKE. */ &icelake_cost, /* PROCESSOR_DIAMONDRAPIDS. */ - &intel_cost, /* PROCESSOR_INTEL. */ + &alderlake_cost, /* PROCESSOR_INTEL. */ &lujiazui_cost, /* PROCESSOR_LUJIAZUI. */ &yongfeng_cost, /* PROCESSOR_YONGFENG. */ &shijidadao_cost,/* PROCESSOR_SHIJIDADAO.*/ diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h index a5b99d1f9629..c8603b982af4 100644 --- a/gcc/config/i386/x86-tune-costs.h +++ b/gcc/config/i386/x86-tune-costs.h @@ -3568,127 +3568,6 @@ struct processor_costs tremont_cost = { COSTS_N_INSNS (2), /* Branch mispredict scale. */ }; -static stringop_algs intel_memcpy[2] = { - {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}}, - {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false}, - {8192, rep_prefix_8_byte, false}, {-1, libcall, false; -static stringop_algs intel_memset[2] = { - {libcall, {{8, loop, false}, {15, unrolled_loop, false}, - {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}}, - {libcall, {{24, loop, false}, {32, unrolled_loop, false}, - {8192, rep_prefix_8_byte, false}, {-1, libcall, false; -static const -struct processor_costs intel_cost = { - { - /* Start of register allocator costs. integer->integer move cost is 2. */ - 6,/* cost for loading QImode using movzbl */ - {4, 4, 4}, /* cost of loading integer registers - in QImode, HImode and SImode. - Relative to reg-reg move (2). */ - {6, 6, 6}, /* cost of storing integer registers */ - 2, /* cost of reg,reg fld/fst */ - {6, 6, 8}, /* cost of loading fp registers - in SFmode, DFmode and XFmode */ - {6, 6, 10}, /* cost of storing fp registers - in SFmode, DFmode and XFmode */ - 2, /* cost of moving MMX register */ - {6, 6}, /* cost of loading MMX registers - in SImode and DImode */ - {6, 6},
[gcc r15-9858] i386: Remove CLDEMOTE for clients
https://gcc.gnu.org/g:9f817c98403be763e0e265c56522e256ad97329a commit r15-9858-g9f817c98403be763e0e265c56522e256ad97329a Author: Haochen Jiang Date: Tue Jun 17 14:08:38 2025 +0800 i386: Remove CLDEMOTE for clients CLDEMOTE is not enabled on clients according to SDM. SDM only mentioned it will be enabled on Xeon and Atom servers, not clients. Remove them since Alder Lake (where it is introduced). gcc/ChangeLog: * config/i386/i386.h (PTA_ALDERLAKE): Use PTA_GOLDMONT_PLUS as base to remove PTA_CLDEMOTE. (PTA_SIERRAFOREST): Add PTA_CLDEMOTE since PTA_ALDERLAKE does not include that anymore. * doc/invoke.texi: Update texi file. Diff: --- gcc/config/i386/i386.h | 8 +--- gcc/doc/invoke.texi| 29 ++--- 2 files changed, 19 insertions(+), 18 deletions(-) diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 8507243d726b..ab6ba21830e6 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -2425,12 +2425,14 @@ constexpr wide_int_bitmask PTA_GOLDMONT_PLUS = PTA_GOLDMONT | PTA_RDPID | PTA_SGX | PTA_PTWRITE; constexpr wide_int_bitmask PTA_TREMONT = PTA_GOLDMONT_PLUS | PTA_CLWB | PTA_GFNI | PTA_MOVDIRI | PTA_MOVDIR64B | PTA_CLDEMOTE | PTA_WAITPKG; -constexpr wide_int_bitmask PTA_ALDERLAKE = PTA_TREMONT | PTA_ADX | PTA_AVX +constexpr wide_int_bitmask PTA_ALDERLAKE = PTA_GOLDMONT_PLUS | PTA_CLWB + | PTA_GFNI | PTA_MOVDIRI | PTA_MOVDIR64B | PTA_WAITPKG | PTA_ADX | PTA_AVX | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_F16C | PTA_FMA | PTA_LZCNT | PTA_PCONFIG | PTA_PKU | PTA_VAES | PTA_VPCLMULQDQ | PTA_SERIALIZE | PTA_HRESET | PTA_KL | PTA_WIDEKL | PTA_AVXVNNI; -constexpr wide_int_bitmask PTA_SIERRAFOREST = PTA_ALDERLAKE | PTA_AVXIFMA - | PTA_AVXVNNIINT8 | PTA_AVXNECONVERT | PTA_CMPCCXADD | PTA_ENQCMD | PTA_UINTR; +constexpr wide_int_bitmask PTA_SIERRAFOREST = PTA_ALDERLAKE | PTA_CLDEMOTE + | PTA_AVXIFMA | PTA_AVXVNNIINT8 | PTA_AVXNECONVERT | PTA_CMPCCXADD + | PTA_ENQCMD | PTA_UINTR; constexpr wide_int_bitmask PTA_GRANITERAPIDS = PTA_SAPPHIRERAPIDS | PTA_AMX_FP16 | PTA_PREFETCHI; constexpr wide_int_bitmask PTA_GRANITERAPIDS_D = PTA_GRANITERAPIDS diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 617a3d8ae182..baaa0c1aed5e 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -35151,37 +35151,36 @@ VPCLMULQDQ, AVX512BITALG, RDPID and AVX512VPOPCNTDQ instruction set support. Intel Alder Lake/Raptor Lake/Meteor Lake/Gracemont CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, POPCNT, AES, PREFETCHW, PCLMUL, RDRND, XSAVE, XSAVEC, XSAVES, XSAVEOPT, FSGSBASE, PTWRITE, RDPID, SGX, -GFNI-SSE, CLWB, MOVDIRI, MOVDIR64B, CLDEMOTE, WAITPKG, ADCX, AVX, AVX2, BMI, -BMI2, F16C, FMA, LZCNT, PCONFIG, PKU, VAES, VPCLMULQDQ, SERIALIZE, HRESET, KL, -WIDEKL and AVX-VNNI instruction set support. +GFNI-SSE, CLWB, MOVDIRI, MOVDIR64B, WAITPKG, ADCX, AVX, AVX2, BMI, BMI2, F16C, +FMA, LZCNT, PCONFIG, PKU, VAES, VPCLMULQDQ, SERIALIZE, HRESET, KL, WIDEKL and +AVX-VNNI instruction set support. @item arrowlake Intel Arrow Lake CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, POPCNT, AES, PREFETCHW, PCLMUL, RDRND, XSAVE, XSAVEC, XSAVES, XSAVEOPT, FSGSBASE, PTWRITE, RDPID, SGX, GFNI-SSE, CLWB, MOVDIRI, -MOVDIR64B, CLDEMOTE, WAITPKG, ADCX, AVX, AVX2, BMI, BMI2, F16C, FMA, LZCNT, -PCONFIG, PKU, VAES, VPCLMULQDQ, SERIALIZE, HRESET, KL, WIDEKL, AVX-VNNI, -UINTR, AVXIFMA, AVXVNNIINT8, AVXNECONVERT and CMPCCXADD instruction set -support. +MOVDIR64B, WAITPKG, ADCX, AVX, AVX2, BMI, BMI2, F16C, FMA, LZCNT, PCONFIG, PKU, +VAES, VPCLMULQDQ, SERIALIZE, HRESET, KL, WIDEKL, AVX-VNNI, UINTR, AVXIFMA, +AVXVNNIINT8, AVXNECONVERT and CMPCCXADD instruction set support. @item arrowlake-s @itemx lunarlake Intel Arrow Lake S/Lunar Lake CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, POPCNT, AES, PREFETCHW, PCLMUL, RDRND, XSAVE, XSAVEC, XSAVES, XSAVEOPT, FSGSBASE, PTWRITE, RDPID, SGX, GFNI-SSE, CLWB, -MOVDIRI, MOVDIR64B, CLDEMOTE, WAITPKG, ADCX, AVX, AVX2, BMI, BMI2, F16C, FMA, -LZCNT, PCONFIG, PKU, VAES, VPCLMULQDQ, SERIALIZE, HRESET, KL, WIDEKL, AVX-VNNI, -UINTR, AVXIFMA, AVXVNNIINT8, AVXNECONVERT, CMPCCXADD, AVXVNNIINT16, SHA512, -SM3 and SM4 instruction set support. +MOVDIRI, MOVDIR64B, WAITPKG, ADCX, AVX, AVX2, BMI, BMI2, F16C, FMA, LZCNT, +PCONFIG, PKU, VAES, VPCLMULQDQ, SERIALIZE, HRESET, KL, WIDEKL, AVX-VNNI, UINTR, +AVXIFMA, AVXVNNIINT8, AVXNECONVERT, CMPCCXADD, AVXVNNIINT16, SHA512, SM3 and +SM4 instruction set support. @item pantherlake Intel Panther Lake CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, POPCNT, AES, PREFETCHW, PCLMUL, RDRND, XSAVE, XSAVEC, XSAVES, XSAVEOPT, FSGSBASE, PTWRITE, RDPID, SGX, GFNI-SSE, CLWB, MOVDIRI, -MOVDIR64B, CLDEMOTE, WAITPKG, ADCX, AVX, AVX2, BMI, BMI2, F16C, FMA, LZCNT, -PCONFIG, PKU, VAES, VPCLM
[gcc r12-11232] i386: Remove CLDEMOTE for clients
https://gcc.gnu.org/g:ee04d1554aa87d6155bd96e38c91351871273b51 commit r12-11232-gee04d1554aa87d6155bd96e38c91351871273b51 Author: Haochen Jiang Date: Wed Jun 25 11:04:11 2025 +0800 i386: Remove CLDEMOTE for clients CLDEMOTE is not enabled on clients according to SDM. SDM only mentioned it will be enabled on Xeon and Atom servers, not clients. Remove them since Alder Lake (where it is introduced). gcc/ChangeLog: * config/i386/i386.h (PTA_ALDERLAKE): Use PTA_GOLDMONT_PLUS as base to remove PTA_CLDEMOTE. * doc/invoke.texi: Update texi file. Diff: --- gcc/config/i386/i386.h | 3 ++- gcc/doc/invoke.texi| 5 ++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 8416e5b02b64..48c494a73652 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -2350,7 +2350,8 @@ constexpr wide_int_bitmask PTA_GOLDMONT_PLUS = PTA_GOLDMONT | PTA_RDPID | PTA_SGX | PTA_PTWRITE; constexpr wide_int_bitmask PTA_TREMONT = PTA_GOLDMONT_PLUS | PTA_CLWB | PTA_GFNI | PTA_MOVDIRI | PTA_MOVDIR64B | PTA_CLDEMOTE | PTA_WAITPKG; -constexpr wide_int_bitmask PTA_ALDERLAKE = PTA_TREMONT | PTA_ADX | PTA_AVX +constexpr wide_int_bitmask PTA_ALDERLAKE = PTA_GOLDMONT_PLUS | PTA_CLWB + | PTA_GFNI | PTA_MOVDIRI | PTA_MOVDIR64B | PTA_WAITPKG | PTA_ADX | PTA_AVX | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_F16C | PTA_FMA | PTA_LZCNT | PTA_PCONFIG | PTA_PKU | PTA_VAES | PTA_VPCLMULQDQ | PTA_SERIALIZE | PTA_HRESET | PTA_KL | PTA_WIDEKL | PTA_AVXVNNI; diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 926b72982e2d..60fbe5e0c7d7 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -31554,9 +31554,8 @@ VPCLMULQDQ, AVX512BITALG, RDPID and AVX512VPOPCNTDQ instruction set support. Intel Alder Lake CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, POPCNT, AES, PREFETCHW, PCLMUL, RDRND, XSAVE, XSAVEC, XSAVES, XSAVEOPT, FSGSBASE, PTWRITE, RDPID, SGX, GFNI-SSE, CLWB, MOVDIRI, MOVDIR64B, -CLDEMOTE, WAITPKG, ADCX, AVX, AVX2, BMI, BMI2, F16C, FMA, LZCNT, PCONFIG, PKU, -VAES, VPCLMULQDQ, SERIALIZE, HRESET, KL, WIDEKL and AVX-VNNI instruction set -support. +WAITPKG, ADCX, AVX, AVX2, BMI, BMI2, F16C, FMA, LZCNT, PCONFIG, PKU, VAES, +VPCLMULQDQ, SERIALIZE, HRESET, KL, WIDEKL and AVX-VNNI instruction set support. @item sapphirerapids Intel Sapphire Rapids CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3, SSSE3,
[gcc r16-1643] x86: Update memcpy/memset inline strategies for -mtune=generic
https://gcc.gnu.org/g:d073bb6cfc219d4b6c283a0b527ee88b42e640e0 commit r16-1643-gd073bb6cfc219d4b6c283a0b527ee88b42e640e0 Author: H.J. Lu Date: Thu Mar 18 18:43:10 2021 -0700 x86: Update memcpy/memset inline strategies for -mtune=generic Update memcpy and memset inline strategies for -mtune=generic: 1. Don't align memory. 2. For known sizes, prefer vector loop, unroll loop with 4 moves or stores per iteration without aligning the loop, up to 256 bytes. 3. For unknown sizes, use memcpy/memset. 4. Since each loop iteration has 4 stores and 8 stores for zeroing with unroll loop may be needed, change CLEAR_RATIO to 10 so that zeroing up to 72 bytes are fully unrolled with 9 stores without SSE. gcc/ PR target/70308 PR target/101366 PR target/102294 PR target/108585 PR target/118276 PR target/119596 PR target/119703 PR target/119704 * config/i386/x86-tune-costs.h (generic_memcpy): Updated. (generic_memset): Likewise. (generic_cost): Change CLEAR_RATIO to 10. gcc/testsuite/ PR target/70308 PR target/101366 PR target/102294 PR target/108585 PR target/118276 PR target/119596 PR target/119703 PR target/119704 * g++.target/i386/memset-pr101366-1.C: New test. * g++.target/i386/memset-pr101366-2.C: Likewise. * g++.target/i386/memset-pr108585-1a.C: Likewise. * g++.target/i386/memset-pr108585-1b.C: Likewise. * g++.target/i386/memset-pr118276-1a.C: Likewise. * g++.target/i386/memset-pr118276-1b.C: Likewise. * g++.target/i386/memset-pr118276-1c.C: Likewise. * gcc.target/i386/memcpy-strategy-12.c: Likewise. * gcc.target/i386/memcpy-strategy-13.c: Likewise. * gcc.target/i386/memset-pr70308-1a.c: Likewise. * gcc.target/i386/memset-pr70308-1b.c: Likewise. * gcc.target/i386/memset-strategy-25.c: Likewise. * gcc.target/i386/memset-strategy-26.c: Likewise. * gcc.target/i386/memset-strategy-27.c: Likewise. * gcc.target/i386/memset-strategy-28.c: Likewise. * gcc.target/i386/memset-strategy-29.c: Likewise. * gcc.target/i386/memset-strategy-30.c: Likewise. * gcc.target/i386/memset-strategy-31.c: Likewise. * gcc.target/i386/auto-init-padding-3.c: Expect XMM stores. * gcc.target/i386/auto-init-padding-9.c: Likewise. * gcc.target/i386/mvc17.c: Fail with "rep mov" * gcc.target/i386/pr111657-1.c: Scan for unrolled loop. Fail with "rep mov". * gcc.target/i386/shrink_wrap_1.c: Also pass -mmemset-strategy=rep_8byte:-1:align. * gcc.target/i386/sw-1.c: Also pass -mstringop-strategy=rep_byte. Signed-off-by: H.J. Lu Diff: --- gcc/config/i386/x86-tune-costs.h | 39 ++ gcc/testsuite/g++.target/i386/memset-pr101366-1.C | 30 +++ gcc/testsuite/g++.target/i386/memset-pr101366-2.C | 26 + gcc/testsuite/g++.target/i386/memset-pr108585-1a.C | 43 +++ gcc/testsuite/g++.target/i386/memset-pr108585-1b.C | 43 +++ gcc/testsuite/g++.target/i386/memset-pr118276-1a.C | 35 + gcc/testsuite/g++.target/i386/memset-pr118276-1b.C | 24 + gcc/testsuite/g++.target/i386/memset-pr118276-1c.C | 24 + .../gcc.target/i386/auto-init-padding-3.c | 7 +-- .../gcc.target/i386/auto-init-padding-9.c | 25 +++-- gcc/testsuite/gcc.target/i386/memcpy-strategy-12.c | 34 gcc/testsuite/gcc.target/i386/memcpy-strategy-13.c | 11 gcc/testsuite/gcc.target/i386/memset-pr70308-1a.c | 46 gcc/testsuite/gcc.target/i386/memset-pr70308-1b.c | 61 ++ gcc/testsuite/gcc.target/i386/memset-strategy-25.c | 29 ++ gcc/testsuite/gcc.target/i386/memset-strategy-26.c | 15 ++ gcc/testsuite/gcc.target/i386/memset-strategy-27.c | 11 gcc/testsuite/gcc.target/i386/memset-strategy-28.c | 29 ++ gcc/testsuite/gcc.target/i386/memset-strategy-29.c | 30 +++ gcc/testsuite/gcc.target/i386/memset-strategy-30.c | 30 +++ gcc/testsuite/gcc.target/i386/memset-strategy-31.c | 30 +++ gcc/testsuite/gcc.target/i386/mvc17.c | 2 +- gcc/testsuite/gcc.target/i386/pr111657-1.c | 24 - gcc/testsuite/gcc.target/i386/shrink_wrap_1.c | 2 +- gcc/testsuite/gcc.target/i386/sw-1.c | 2 +- 25 files changed, 626 insertions(+), 26 deletions(-) diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h index b08081e37cfb..a5b99d1f9629 100644 --- a/gcc/config/i386/x86-tune-costs.h +++ b/gcc/config/i386/x86-
[gcc r16-1650] AArch64: promote aarch64-autovec-peference to mautovec-preference
https://gcc.gnu.org/g:f60d3f5cf15abc32e167e3c00f79c4ab6d00ec38 commit r16-1650-gf60d3f5cf15abc32e167e3c00f79c4ab6d00ec38 Author: Tamar Christina Date: Tue Jun 24 11:11:36 2025 +0100 AArch64: promote aarch64-autovec-peference to mautovec-preference As requested in my patch for -mmax-vectorization this promotes the parameter --param aarch64-autovec-preference to a first class top target flag. If both the parameter and the flag is specified the parameter takes precedence with the reasoning that it may already be embedded in build systems. gcc/ChangeLog: * config/aarch64/aarch64.cc (aarch64_override_options_internal): Set value of parameter based on option. * config/aarch64/aarch64.opt (autovec-preference): New. * doc/invoke.texi (autovec-preference): Document it. gcc/testsuite/ChangeLog: * gcc.target/aarch64/autovec_param_asimd-only_2.c: New test. * gcc.target/aarch64/autovec_param_default_2.c: New test. * gcc.target/aarch64/autovec_param_prefer-asimd_2.c: New test. * gcc.target/aarch64/autovec_param_prefer-sve_2.c: New test. * gcc.target/aarch64/autovec_param_sve-only_2.c: New test. Diff: --- gcc/config/aarch64/aarch64.cc | 8 + gcc/config/aarch64/aarch64.opt | 11 +-- gcc/doc/invoke.texi| 38 ++ .../aarch64/autovec_param_asimd-only_2.c | 4 +++ .../gcc.target/aarch64/autovec_param_default_2.c | 4 +++ .../aarch64/autovec_param_prefer-asimd_2.c | 4 +++ .../aarch64/autovec_param_prefer-sve_2.c | 4 +++ .../gcc.target/aarch64/autovec_param_sve-only_2.c | 4 +++ 8 files changed, 61 insertions(+), 16 deletions(-) diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index adbe05ac404a..abbb97768f5e 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -19042,6 +19042,14 @@ aarch64_override_options_internal (struct gcc_options *opts) SET_OPTION_IF_UNSET (opts, &global_options_set, param_vect_scalar_cost_multiplier, 1); + /* Synchronize the -mautovec-preference and aarch64_autovec_preference using + whichever one is not default. If both are set then prefer the param flag + over the parameters. */ + if (opts->x_autovec_preference != AARCH64_AUTOVEC_DEFAULT) +SET_OPTION_IF_UNSET (opts, &global_options_set, +aarch64_autovec_preference, +opts->x_autovec_preference); + aarch64_override_options_after_change_1 (opts); } diff --git a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt index 17e1c700dd2b..9ca753e6a886 100644 --- a/gcc/config/aarch64/aarch64.opt +++ b/gcc/config/aarch64/aarch64.opt @@ -290,6 +290,13 @@ msve-vector-bits= Target RejectNegative Joined Enum(sve_vector_bits) Var(aarch64_sve_vector_bits) Init(SVE_SCALABLE) -msve-vector-bits= Set the number of bits in an SVE vector register. +mautovec-preference= +Target RejectNegative Joined Var(autovec_preference) Enum(aarch64_autovec_preference) Init(AARCH64_AUTOVEC_DEFAULT) +-mautovec-preference=[default|asimd-only|sve-only|prefer-asimd|prefer-sve] +Force an ISA selection strategy for auto-vectorization. For best performance it +is highly recommended to use -mcpu or -mtune instead. This parameter should +only be used for code exploration. + mmax-vectorization Target Var(flag_aarch64_max_vectorization) Save Override the scalar cost model such that vectorization is always profitable. @@ -360,8 +367,8 @@ The number of Newton iterations for calculating the reciprocal for double type. -param=aarch64-autovec-preference= Target Joined Var(aarch64_autovec_preference) Enum(aarch64_autovec_preference) Init(AARCH64_AUTOVEC_DEFAULT) Param ---param=aarch64-autovec-preference=[default|asimd-only|sve-only|prefer-asimd|prefer-sve] -Force an ISA selection strategy for auto-vectorization. +An old alias for -mautovec-preference. If both -mautovec-preference and +--param=aarch64-autovec-preference are passed, the --param value will be used. Enum Name(aarch64_autovec_preference) Type(enum aarch64_autovec_preference_enum) UnknownError(unknown autovec preference %qs) diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 93322778a520..a0c6d3d082e6 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -17627,20 +17627,9 @@ The precision of division is proportional to this param when division approximation is enabled. The default value is 2. @item aarch64-autovec-preference -Force an ISA selection strategy for auto-vectorization. -@table @samp -@item default -Use the default heuristics. -@item asimd-only -Use only Advanced SIMD for auto-vectorization. -@item sve-only -Use only SVE for auto-vectorization. -@item prefer-asimd -Use both Advanced SIMD and SVE. Prefer
[gcc r16-1647] Don't duplicate setup code cost when do group-candidate cost calucalution.
https://gcc.gnu.org/g:c06979ff95748559da0c2d3aa4eda9d5999eaaf6 commit r16-1647-gc06979ff95748559da0c2d3aa4eda9d5999eaaf6 Author: hongtao.liu Date: Wed Mar 5 12:25:32 2025 +0100 Don't duplicate setup code cost when do group-candidate cost calucalution. - /* Uses in a group can share setup code, so only add setup cost once. */ - cost -= cost.scratch; It looks like the original code took into account avoiding double counting, but unfortunately cost is reset inside the follow loop which invalidates the upper code, and makes same setup code cost duplicated in each use of the group. The patch fix the issue. It can also improve 548.exchange_r by 6% with -march=x86-64-v3 -O2 due to better ivopt on EMR. No big performance impact for SPEC2017 on graviton4/SPR with -mcpu=native -Ofast -fomit-framepointer -flto=auto. gcc/ChangeLog: PR target/115842 * tree-ssa-loop-ivopts.cc (determine_group_iv_cost_address): Don't recalculate inv_expr when group-candidate cost calucalution. Diff: --- gcc/tree-ssa-loop-ivopts.cc | 9 ++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/gcc/tree-ssa-loop-ivopts.cc b/gcc/tree-ssa-loop-ivopts.cc index a2150818a43f..8a6726f19889 100644 --- a/gcc/tree-ssa-loop-ivopts.cc +++ b/gcc/tree-ssa-loop-ivopts.cc @@ -5015,8 +5015,6 @@ determine_group_iv_cost_address (struct ivopts_data *data, sum_cost = infinite_cost; } - /* Uses in a group can share setup code, so only add setup cost once. */ - cost -= cost.scratch; /* Compute and add costs for rest uses of this group. */ for (i = 1; i < group->vuses.length () && !sum_cost.infinite_cost_p (); i++) { @@ -5032,7 +5030,12 @@ determine_group_iv_cost_address (struct ivopts_data *data, if (!inv_exprs) inv_exprs = BITMAP_ALLOC (NULL); - bitmap_set_bit (inv_exprs, inv_expr->id); + /* Uses in a group can share setup code, + so only add setup cost once. */ + if (bitmap_bit_p (inv_exprs, inv_expr->id)) + cost -= cost.scratch; + else + bitmap_set_bit (inv_exprs, inv_expr->id); } sum_cost += cost; }
[gcc r16-1649] AArch64: propose -mmax-vectorization as an option to override vector costing
https://gcc.gnu.org/g:8e80287601c5954bc437212be5f91aaf12074b22 commit r16-1649-g8e80287601c5954bc437212be5f91aaf12074b22 Author: Tamar Christina Date: Tue Jun 24 11:10:11 2025 +0100 AArch64: propose -mmax-vectorization as an option to override vector costing With the middle-end providing a way to make vectorization more profitable by scaling vect-scalar-cost-multiplier this makes a more user friendly option to make it easier to use. I propose making it an actual -m option that we document and retain vs using the parameter name. In the future I would like to extend this option to modify additional costing in the AArch64 backend itself. This can be used together with --param aarch64-autovec-preference to get the vectorizer to say, always vectorize with SVE. I did consider making this an additional enum to --param aarch64-autovec-preference but I also think this is a useful thing to be able to set with pragmas and attributes, but am open to suggestions. Note that as a follow up I plan on extending -fdump-tree-vect to support -stats which is then intended to be usable with this flag. gcc/ChangeLog: * config/aarch64/aarch64.opt (max-vectorization): New. * config/aarch64/aarch64.cc (aarch64_override_options_internal): Save and restore option. Implement it through vect-scalar-cost-multiplier. (aarch64_attributes): Default to off. * common/config/aarch64/aarch64-common.cc (aarch64_handle_option): Initialize option. * doc/extend.texi (max-vectorization): Document attribute. * doc/invoke.texi (max-vectorization): Document flag. gcc/testsuite/ChangeLog: * gcc.target/aarch64/sve/cost_model_17.c: New test. * gcc.target/aarch64/sve/cost_model_18.c: New test. Diff: --- gcc/common/config/aarch64/aarch64-common.cc | 4 gcc/config/aarch64/aarch64.cc | 8 gcc/config/aarch64/aarch64.opt | 4 gcc/doc/extend.texi | 10 ++ gcc/doc/invoke.texi | 9 + .../gcc.target/aarch64/sve/cost_model_17.c | 21 + .../gcc.target/aarch64/sve/cost_model_18.c | 21 + 7 files changed, 77 insertions(+) diff --git a/gcc/common/config/aarch64/aarch64-common.cc b/gcc/common/config/aarch64/aarch64-common.cc index b9ed83642ade..1488697c6ce4 100644 --- a/gcc/common/config/aarch64/aarch64-common.cc +++ b/gcc/common/config/aarch64/aarch64-common.cc @@ -142,6 +142,10 @@ aarch64_handle_option (struct gcc_options *opts, opts->x_aarch64_flag_outline_atomics = val; return true; +case OPT_mmax_vectorization: + opts->x_flag_aarch64_max_vectorization = val; + return true; + default: return true; } diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index af8415c29a97..adbe05ac404a 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -19036,6 +19036,12 @@ aarch64_override_options_internal (struct gcc_options *opts) if (TARGET_SME && !TARGET_SVE2) sorry ("no support for %qs without %qs", "sme", "sve2"); + /* Set scalar costing to a high value such that we always pick + vectorization. Increase scalar costing by 1%. */ + if (opts->x_flag_aarch64_max_vectorization) +SET_OPTION_IF_UNSET (opts, &global_options_set, +param_vect_scalar_cost_multiplier, 1); + aarch64_override_options_after_change_1 (opts); } @@ -19786,6 +19792,8 @@ static const struct aarch64_attribute_info aarch64_attributes[] = OPT_msign_return_address_ }, { "outline-atomics", aarch64_attr_bool, true, NULL, OPT_moutline_atomics}, + { "max-vectorization", aarch64_attr_bool, false, NULL, + OPT_mmax_vectorization}, { NULL, aarch64_attr_custom, false, NULL, OPT } }; diff --git a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt index f32d56d4ffae..17e1c700dd2b 100644 --- a/gcc/config/aarch64/aarch64.opt +++ b/gcc/config/aarch64/aarch64.opt @@ -290,6 +290,10 @@ msve-vector-bits= Target RejectNegative Joined Enum(sve_vector_bits) Var(aarch64_sve_vector_bits) Init(SVE_SCALABLE) -msve-vector-bits= Set the number of bits in an SVE vector register. +mmax-vectorization +Target Var(flag_aarch64_max_vectorization) Save +Override the scalar cost model such that vectorization is always profitable. + mverbose-cost-dump Target Undocumented Var(flag_aarch64_verbose_cost) Enables verbose cost model dumping in the debug dump files. diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index 7da99f77ec82..55adf649acf8 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -3884,6 +3884,16 @@ Enable or disable calls to out-of-line helpers to implemen