[gcc r12-11218] middle-end/115110 - Fix view_converted_memref_p

2025-06-24 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:4f63fd4b663bdde39524129dfa458c60b2d67133

commit r12-11218-g4f63fd4b663bdde39524129dfa458c60b2d67133
Author: Richard Biener 
Date:   Fri May 17 11:02:29 2024 +0200

middle-end/115110 - Fix view_converted_memref_p

view_converted_memref_p was checking the reference type against the
pointer type of the offset operand rather than its pointed-to type
which leads to all refs being subject to view-convert treatment
in get_alias_set causing numerous testsuite fails but with its
new uses from r15-512-g9b7cad5884f21c is also a wrong-code issue.

PR middle-end/115110
* tree-ssa-alias.cc (view_converted_memref_p): Fix.

(cherry picked from commit a5b3721c06646bf5b9b50a22964e8e2bd4d03f5f)

Diff:
---
 gcc/tree-ssa-alias.cc | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/gcc/tree-ssa-alias.cc b/gcc/tree-ssa-alias.cc
index 1b404e055f8c..8fe4e9d9112a 100644
--- a/gcc/tree-ssa-alias.cc
+++ b/gcc/tree-ssa-alias.cc
@@ -2041,8 +2041,9 @@ view_converted_memref_p (tree base)
 {
   if (TREE_CODE (base) != MEM_REF && TREE_CODE (base) != TARGET_MEM_REF)
 return false;
-  return same_type_for_tbaa (TREE_TYPE (base),
-TREE_TYPE (TREE_OPERAND (base, 1))) != 1;
+  return (same_type_for_tbaa (TREE_TYPE (base),
+ TREE_TYPE (TREE_TYPE (TREE_OPERAND (base, 1
+ != 1);
 }
 
 /* Return true if an indirect reference based on *PTR1 constrained


[gcc r12-11223] lto/113207 - fix free_lang_data_in_type

2025-06-24 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:07490d983161912fa314607c5a5beb9c49cc4a3f

commit r12-11223-g07490d983161912fa314607c5a5beb9c49cc4a3f
Author: Richard Biener 
Date:   Mon Feb 3 14:27:01 2025 +0100

lto/113207 - fix free_lang_data_in_type

When we process function types we strip volatile and const qualifiers
after building a simplified type variant (which preserves those).
The qualified type handling of both isn't really compatible, so avoid
bad interaction by swapping this, first dropping const/volatile
qualifiers and then building the simplified type thereof.

PR lto/113207
* ipa-free-lang-data.cc (free_lang_data_in_type): First drop
const/volatile qualifiers from function argument types,
then build a simplified type.

* gcc.dg/pr113207.c: New testcase.

(cherry picked from commit a55e14b239181381204c615335929b3316d75370)

Diff:
---
 gcc/ipa-free-lang-data.cc   |  3 +--
 gcc/testsuite/gcc.dg/pr113207.c | 10 ++
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/gcc/ipa-free-lang-data.cc b/gcc/ipa-free-lang-data.cc
index a742156858cd..6fef047e3beb 100644
--- a/gcc/ipa-free-lang-data.cc
+++ b/gcc/ipa-free-lang-data.cc
@@ -436,9 +436,7 @@ free_lang_data_in_type (tree type, class free_lang_data_d 
*fld)
 different front ends.  */
   for (tree p = TYPE_ARG_TYPES (type); p; p = TREE_CHAIN (p))
{
- TREE_VALUE (p) = fld_simplified_type (TREE_VALUE (p), fld);
  tree arg_type = TREE_VALUE (p);
-
  if (TYPE_READONLY (arg_type) || TYPE_VOLATILE (arg_type))
{
  int quals = TYPE_QUALS (arg_type)
@@ -448,6 +446,7 @@ free_lang_data_in_type (tree type, class free_lang_data_d 
*fld)
  if (!fld->pset.add (TREE_VALUE (p)))
free_lang_data_in_type (TREE_VALUE (p), fld);
}
+ TREE_VALUE (p) = fld_simplified_type (TREE_VALUE (p), fld);
  /* C++ FE uses TREE_PURPOSE to store initial values.  */
  TREE_PURPOSE (p) = NULL;
}
diff --git a/gcc/testsuite/gcc.dg/pr113207.c b/gcc/testsuite/gcc.dg/pr113207.c
new file mode 100644
index ..81f53d8fcc2f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr113207.c
@@ -0,0 +1,10 @@
+/* { dg-compile } */
+/* { dg-require-effective-target lto } */
+/* { dg-options "-flto -fchecking" }  */
+
+typedef struct cl_lispunion *cl_object;
+struct cl_lispunion {};
+cl_object cl_error() __attribute__((noreturn));
+volatile cl_object cl_coerce_value0;
+void cl_coerce() { cl_error(); }
+void L66safe_canonical_type(cl_object volatile);


[gcc r12-11216] lto/91299 - weak definition inlined with LTO

2025-06-24 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:e5d24c4e89ae6d8c08f85f3425ea9c29dd0e6646

commit r12-11216-ge5d24c4e89ae6d8c08f85f3425ea9c29dd0e6646
Author: Richard Biener 
Date:   Fri Feb 28 14:09:29 2025 +0100

lto/91299 - weak definition inlined with LTO

The following fixes a thinko in the handling of interposed weak
definitions which confused the interposition check in
get_availability by setting DECL_EXTERNAL too early.

PR lto/91299
gcc/lto/
* lto-symtab.cc (lto_symtab_merge_symbols): Set DECL_EXTERNAL
only after calling get_availability.

gcc/testsuite/
* gcc.dg/lto/pr91299_0.c: New testcase.
* gcc.dg/lto/pr91299_1.c: Likewise.

(cherry picked from commit bc34db5b12e008f6ec4fdf4ebd22263c8617e5e3)

Diff:
---
 gcc/lto/lto-symtab.cc|  2 +-
 gcc/testsuite/gcc.dg/lto/pr91299_0.c | 16 
 gcc/testsuite/gcc.dg/lto/pr91299_1.c |  6 ++
 3 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/gcc/lto/lto-symtab.cc b/gcc/lto/lto-symtab.cc
index f631016b9233..bfd4c99fdc80 100644
--- a/gcc/lto/lto-symtab.cc
+++ b/gcc/lto/lto-symtab.cc
@@ -1016,7 +1016,6 @@ lto_symtab_merge_symbols (void)
  || node->resolution == LDPR_RESOLVED_EXEC
  || node->resolution == LDPR_RESOLVED_DYN))
{
- DECL_EXTERNAL (node->decl) = 1;
  /* If alias to local symbol was preempted by external definition,
 we know it is not pointing to the local symbol.  Remove it.  */
  if (node->alias
@@ -1042,6 +1041,7 @@ lto_symtab_merge_symbols (void)
  node->remove_all_references ();
}
}
+ DECL_EXTERNAL (node->decl) = 1;
}
 
  if (!(cnode = dyn_cast  (node))
diff --git a/gcc/testsuite/gcc.dg/lto/pr91299_0.c 
b/gcc/testsuite/gcc.dg/lto/pr91299_0.c
new file mode 100644
index ..d9a8b21d6b84
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/lto/pr91299_0.c
@@ -0,0 +1,16 @@
+/* { dg-lto-do run } */
+/* { dg-lto-options { { -O2 -flto } } } */
+
+__attribute__((weak)) int get_t(void)
+{
+  return 0;
+}
+
+int a;
+int main(void)
+{
+  a = get_t();
+  if (a != 1)
+__builtin_abort ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/lto/pr91299_1.c 
b/gcc/testsuite/gcc.dg/lto/pr91299_1.c
new file mode 100644
index ..29a28520f7b5
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/lto/pr91299_1.c
@@ -0,0 +1,6 @@
+/* { dg-options "-fno-lto" } */
+
+int get_t(void)
+{
+return 1;
+}


[gcc] Created branch 'meissner/heads/work212' in namespace 'refs/users'

2025-06-24 Thread Michael Meissner via Gcc-cvs
The branch 'meissner/heads/work212' was created in namespace 'refs/users' 
pointing to:

 63076dbe2153... Remove non-SLP path from vectorizable_load


[gcc(refs/users/meissner/heads/work212-libs)] Add ChangeLog.libs and update REVISION.

2025-06-24 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:2548cd79738346549db03ed8a7aa592ef4fe938b

commit 2548cd79738346549db03ed8a7aa592ef4fe938b
Author: Michael Meissner 
Date:   Tue Jun 24 12:06:52 2025 -0400

Add ChangeLog.libs and update REVISION.

2025-06-24  Michael Meissner  

gcc/

* ChangeLog.libs: New file for branch.
* REVISION: Update.

Diff:
---
 gcc/ChangeLog.libs | 14 ++
 gcc/REVISION   |  2 +-
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog.libs b/gcc/ChangeLog.libs
new file mode 100644
index ..102a09b041e9
--- /dev/null
+++ b/gcc/ChangeLog.libs
@@ -0,0 +1,14 @@
+ Branch work212-libs, baseline 
+
+2025-06-24   Michael Meissner  
+
+Add ChangeLog.libs and update REVISION.
+
+2025-06-24  Michael Meissner  
+
+gcc/
+
+   * ChangeLog.libs: New file for branch.
+   * REVISION: Update.
+
+   Clone branch
diff --git a/gcc/REVISION b/gcc/REVISION
index cb5771ab4fa7..0d66efc25640 100644
--- a/gcc/REVISION
+++ b/gcc/REVISION
@@ -1 +1 @@
-work212 branch
+work212-libs branch


[gcc r16-1660] Fortran/OpenACC: Add Fortran support for acc_attach/acc_detach

2025-06-24 Thread Tobias Burnus via Gcc-cvs
https://gcc.gnu.org/g:1e35a518258e8cd970a2326bba5a4c8b10695439

commit r16-1660-g1e35a518258e8cd970a2326bba5a4c8b10695439
Author: Tobias Burnus 
Date:   Tue Jun 24 23:28:57 2025 +0200

Fortran/OpenACC: Add Fortran support for acc_attach/acc_detach

While C/++ support the routines acc_attach{,_async} and
acc_detach{,_finalize}{,_async} routines since a long time, the Fortran
API routines where only added in OpenACC 3.3.

Unfortunately, they cannot directly be implemented in the library as
GCC will introduce a temporary array descriptor in some cases, which
causes the attempted attachment to the this temporary variable instead
of to the original one.

Therefore, those API routines are handled in a special way in the compiler.

gcc/fortran/ChangeLog:

* trans-stmt.cc (gfc_trans_call_acc_attach_detach): New.
(gfc_trans_call): Call it.

libgomp/ChangeLog:

* libgomp.texi (acc_attach, acc_detach): Update for Fortran
version.
* openacc.f90 (acc_attach{,_async}, 
acc_detach{,_finalize}{,_async}):
Add.
* openacc_lib.h: Likewise.
* testsuite/libgomp.oacc-fortran/acc-attach-detach-1.f90: New test.
* testsuite/libgomp.oacc-fortran/acc-attach-detach-2.f90: New test.

Diff:
---
 gcc/fortran/trans-stmt.cc  | 74 +-
 libgomp/libgomp.texi   | 40 ++--
 libgomp/openacc.f90| 44 +
 libgomp/openacc_lib.h  | 42 
 .../libgomp.oacc-fortran/acc-attach-detach-1.f90   | 25 
 .../libgomp.oacc-fortran/acc-attach-detach-2.f90   | 62 ++
 6 files changed, 265 insertions(+), 22 deletions(-)

diff --git a/gcc/fortran/trans-stmt.cc b/gcc/fortran/trans-stmt.cc
index 487b7687ef14..f10540158627 100644
--- a/gcc/fortran/trans-stmt.cc
+++ b/gcc/fortran/trans-stmt.cc
@@ -377,6 +377,57 @@ get_intrinsic_for_code (gfc_code *code)
 }
 
 
+/* Handle the OpenACC routines acc_attach{,_async} and
+   acc_detach{,_finalize}{,_async} explicitly.  This is required as the
+   the corresponding device pointee is attached to the corresponding device
+   pointer, but if a temporary array descriptor is created for the call,
+   that one is used as pointer instead of the original pointer.  */
+
+tree
+gfc_trans_call_acc_attach_detach (gfc_code *code)
+{
+  stmtblock_t block;
+  gfc_se ptr_addr_se, async_se;
+  tree fn;
+
+  fn = code->resolved_sym->backend_decl;
+  if (fn == NULL)
+{
+  fn = gfc_get_symbol_decl (code->resolved_sym);
+  code->resolved_sym->backend_decl = fn;
+}
+
+  gfc_start_block (&block);
+
+  gfc_init_se (&ptr_addr_se, NULL);
+  ptr_addr_se.descriptor_only = 1;
+  ptr_addr_se.want_pointer = 1;
+  gfc_conv_expr (&ptr_addr_se, code->ext.actual->expr);
+  gfc_add_block_to_block (&block, &ptr_addr_se.pre);
+  if (GFC_DESCRIPTOR_TYPE_P (TREE_TYPE (ptr_addr_se.expr)))
+ptr_addr_se.expr = gfc_conv_descriptor_data_get (ptr_addr_se.expr);
+  ptr_addr_se.expr = build_fold_addr_expr (ptr_addr_se.expr);
+
+  bool async = code->ext.actual->next != NULL;
+  if (async)
+{
+  gfc_init_se (&async_se, NULL);
+  gfc_conv_expr (&async_se, code->ext.actual->next->expr);
+  fn = build_call_expr_loc (gfc_get_location (&code->loc), fn, 2,
+   ptr_addr_se.expr, async_se.expr);
+}
+  else
+fn = build_call_expr_loc (gfc_get_location (&code->loc),
+ fn, 1, ptr_addr_se.expr);
+  gfc_add_expr_to_block (&block, fn);
+  gfc_add_block_to_block (&block, &ptr_addr_se.post);
+  if (async)
+gfc_add_block_to_block (&block, &async_se.post);
+
+  return gfc_finish_block (&block);
+}
+
+
 /* Translate the CALL statement.  Builds a call to an F95 subroutine.  */
 
 tree
@@ -392,13 +443,32 @@ gfc_trans_call (gfc_code * code, bool dependency_check,
   tree tmp;
   bool is_intrinsic_mvbits;
 
+  gcc_assert (code->resolved_sym);
+
+  /* Unfortunately, acc_attach* and acc_detach* need some special treatment for
+ attaching the the pointee to a pointer as GCC might introduce a temporary
+ array descriptor, whose data component is then used as to be attached to
+ pointer.  */
+  if (flag_openacc
+  && code->resolved_sym->attr.subroutine
+  && code->resolved_sym->formal
+  && code->resolved_sym->formal->sym->ts.type == BT_ASSUMED
+  && code->resolved_sym->formal->sym->attr.dimension
+  && code->resolved_sym->formal->sym->as->type == AS_ASSUMED_RANK
+  && startswith (code->resolved_sym->name, "acc_")
+  && (!strcmp (code->resolved_sym->name + 4, "attach")
+ || !strcmp (code->resolved_sym->name + 4, "attach_async")
+ || !strcmp (code->resolved_sym->name + 4, "detach")
+ || !strcmp (code->resolved_sym->name + 4, "detach_async")
+ || !strcmp (code->resolved_sym->na

[gcc r12-11215] tree-optimization/87984 - hard register assignments not preserved

2025-06-24 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:80aab83b90d0a1c9e3037a952c138ac2f1ce3f41

commit r12-11215-g80aab83b90d0a1c9e3037a952c138ac2f1ce3f41
Author: Richard Biener 
Date:   Fri Feb 28 10:36:11 2025 +0100

tree-optimization/87984 - hard register assignments not preserved

The following disables redundant store elimination to hard register
variables which isn't valid.

PR tree-optimization/87984
* tree-ssa-dom.cc (dom_opt_dom_walker::optimize_stmt): Do
not perform redundant store elimination to hard register
variables.
* tree-ssa-sccvn.cc (eliminate_dom_walker::eliminate_stmt):
Likewise.

* gcc.target/i386/pr87984.c: New testcase.

(cherry picked from commit 535115caaf97f5201fb528f67f15b4c52be5619d)

Diff:
---
 gcc/testsuite/gcc.target/i386/pr87984.c | 23 +++
 gcc/tree-ssa-dom.cc |  4 +++-
 gcc/tree-ssa-sccvn.cc   |  2 ++
 3 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.target/i386/pr87984.c 
b/gcc/testsuite/gcc.target/i386/pr87984.c
new file mode 100644
index ..39a6a7480f9e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr87984.c
@@ -0,0 +1,23 @@
+/* { dg-do run } */
+/* { dg-options "-O" } */
+
+__attribute__((noipa))
+int f(void)
+{
+  int o = 0;
+  for (int i = 0; i < 3; i++)
+{
+  register int a asm("eax");
+  a = 1;
+  asm("add %1, %0" : "+r"(o) : "r"(a));
+  asm("xor %%eax, %%eax" ::: "eax");
+}
+  return o;
+}
+
+int main()
+{
+  if (f() != 3)
+__builtin_abort();
+  return 0;
+}
diff --git a/gcc/tree-ssa-dom.cc b/gcc/tree-ssa-dom.cc
index 0ad2e8e7f94f..3e22e8ef341f 100644
--- a/gcc/tree-ssa-dom.cc
+++ b/gcc/tree-ssa-dom.cc
@@ -2245,7 +2245,9 @@ dom_opt_dom_walker::optimize_stmt (basic_block bb, 
gimple_stmt_iterator *si,
 
   /* Perform simple redundant store elimination.  */
   if (gimple_assign_single_p (stmt)
- && TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
+ && TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME
+ && (TREE_CODE (gimple_assign_lhs (stmt)) != VAR_DECL
+ || !DECL_HARD_REGISTER (gimple_assign_lhs (stmt
{
  tree lhs = gimple_assign_lhs (stmt);
  tree rhs = gimple_assign_rhs1 (stmt);
diff --git a/gcc/tree-ssa-sccvn.cc b/gcc/tree-ssa-sccvn.cc
index 60a4826e5801..54855cc9db33 100644
--- a/gcc/tree-ssa-sccvn.cc
+++ b/gcc/tree-ssa-sccvn.cc
@@ -6585,6 +6585,8 @@ eliminate_dom_walker::eliminate_stmt (basic_block b, 
gimple_stmt_iterator *gsi)
   if (gimple_assign_single_p (stmt)
   && !gimple_has_volatile_ops (stmt)
   && !is_gimple_reg (gimple_assign_lhs (stmt))
+  && (TREE_CODE (gimple_assign_lhs (stmt)) != VAR_DECL
+ || !DECL_HARD_REGISTER (gimple_assign_lhs (stmt)))
   && (TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME
  || is_gimple_min_invariant (gimple_assign_rhs1 (stmt
 {


[gcc r12-11224] lto/114501 - missed free-lang-data for CONSTRUCTOR index

2025-06-24 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:415bad120d8f21cd754d827da9e3d5e1fbe68d4c

commit r12-11224-g415bad120d8f21cd754d827da9e3d5e1fbe68d4c
Author: Richard Biener 
Date:   Thu Mar 6 13:48:16 2025 +0100

lto/114501 - missed free-lang-data for CONSTRUCTOR index

The following makes sure to also walk CONSTRUCTOR element indexes
which can be FIELD_DECLs, referencing otherwise unused types we
need to clean.  walk_tree only walks CONSTRUCTOR element data.

PR lto/114501
* ipa-free-lang-data.cc (find_decls_types_r): Explicitly
handle CONSTRUCTORs as walk_tree handling of those is
incomplete.

* g++.dg/pr114501_0.C: New testcase.

(cherry picked from commit fdd95e1cf29137a19baed25f8c817d320dfe63e3)

Diff:
---
 gcc/ipa-free-lang-data.cc | 14 ++
 gcc/testsuite/g++.dg/pr114501_0.C | 20 
 2 files changed, 34 insertions(+)

diff --git a/gcc/ipa-free-lang-data.cc b/gcc/ipa-free-lang-data.cc
index 6fef047e3beb..1bfa0239be51 100644
--- a/gcc/ipa-free-lang-data.cc
+++ b/gcc/ipa-free-lang-data.cc
@@ -841,6 +841,20 @@ find_decls_types_r (tree *tp, int *ws, void *data)
fld_worklist_push (tem, fld);
   fld_worklist_push (BLOCK_ABSTRACT_ORIGIN (t), fld);
 }
+  /* walk_tree does not visit ce->index which can be a FIELD_DECL, pulling
+ in otherwise unused structure fields so handle CTORs explicitly.  */
+  else if (TREE_CODE (t) == CONSTRUCTOR)
+{
+  unsigned HOST_WIDE_INT idx;
+  constructor_elt *ce;
+  for (idx = 0; vec_safe_iterate (CONSTRUCTOR_ELTS (t), idx, &ce); idx++)
+   {
+ if (ce->index)
+   fld_worklist_push (ce->index, fld);
+ fld_worklist_push (ce->value, fld);
+   }
+  *ws = 0;
+}
 
   if (TREE_CODE (t) != IDENTIFIER_NODE
   && CODE_CONTAINS_STRUCT (TREE_CODE (t), TS_TYPED))
diff --git a/gcc/testsuite/g++.dg/pr114501_0.C 
b/gcc/testsuite/g++.dg/pr114501_0.C
new file mode 100644
index ..0439ee5f6e23
--- /dev/null
+++ b/gcc/testsuite/g++.dg/pr114501_0.C
@@ -0,0 +1,20 @@
+// { dg-do compile }
+// { dg-require-effective-target c++17 }
+// { dg-require-effective-target lto }
+// { dg-options "-flto" }
+
+typedef long unsigned int size_t;
+struct basic_string_view {
+  typedef long unsigned int size_type;
+  constexpr size_type size() const { return 0; }
+};
+struct array {
+  char _M_elems[1];
+};
+inline constexpr auto make_it() {
+  constexpr basic_string_view view;
+  array arr{};
+  arr._M_elems[view.size()] = 'a';
+  return arr;
+}
+auto bar = make_it();


[gcc r16-1655] Remove non-SLP path from vectorizable_load

2025-06-24 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:63076dbe21535cc7cf106d92f655e2b7d8b749cc

commit r16-1655-g63076dbe21535cc7cf106d92f655e2b7d8b749cc
Author: Richard Biener 
Date:   Tue Jun 24 14:38:19 2025 +0200

Remove non-SLP path from vectorizable_load

This cleans the rest of vectorizable_load from non-SLP, propagates
out ncopies == 1, and elides loops from 0 to ncopies.

* tree-vect-stmts.cc (vectorizable_load): Remove non-SLP
paths and propagate out ncopies == 1.

Diff:
---
 gcc/tree-vect-stmts.cc | 1935 ++--
 1 file changed, 876 insertions(+), 1059 deletions(-)

diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index f699d808e688..db1b539b6c74 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -9836,7 +9836,6 @@ vectorizable_load (vec_info *vinfo,
   tree dataref_ptr = NULL_TREE;
   tree dataref_offset = NULL_TREE;
   gimple *ptr_incr = NULL;
-  int ncopies;
   int i, j;
   unsigned int group_size;
   poly_uint64 group_gap_adj;
@@ -9850,7 +9849,6 @@ vectorizable_load (vec_info *vinfo,
   bool compute_in_loop = false;
   class loop *at_loop;
   int vec_num;
-  bool slp = (slp_node != NULL);
   bool slp_perm = false;
   bb_vec_info bb_vinfo = dyn_cast  (vinfo);
   poly_uint64 vf;
@@ -9909,7 +9907,7 @@ vectorizable_load (vec_info *vinfo,
return false;
 
   mask_index = internal_fn_mask_index (ifn);
-  if (mask_index >= 0 && slp_node)
+  if (mask_index >= 0)
mask_index = vect_slp_child_index_for_operand
(call, mask_index, STMT_VINFO_GATHER_SCATTER_P (stmt_info));
   if (mask_index >= 0
@@ -9918,7 +9916,7 @@ vectorizable_load (vec_info *vinfo,
return false;
 
   els_index = internal_fn_else_index (ifn);
-  if (els_index >= 0 && slp_node)
+  if (els_index >= 0)
els_index = vect_slp_child_index_for_operand
  (call, els_index, STMT_VINFO_GATHER_SCATTER_P (stmt_info));
   if (els_index >= 0
@@ -9939,19 +9937,9 @@ vectorizable_load (vec_info *vinfo,
   else
 vf = 1;
 
-  /* Multiple types in SLP are handled by creating the appropriate number of
- vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
- case of SLP.  */
-  if (slp)
-ncopies = 1;
-  else
-ncopies = vect_get_num_copies (loop_vinfo, vectype);
-
-  gcc_assert (ncopies >= 1);
-
   /* FORNOW. This restriction should be relaxed.  */
   if (nested_in_vect_loop
-  && (ncopies > 1 || (slp && SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) > 1)))
+  && SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) > 1)
 {
   if (dump_enabled_p ())
 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -9959,20 +9947,6 @@ vectorizable_load (vec_info *vinfo,
   return false;
 }
 
-  /* Invalidate assumptions made by dependence analysis when vectorization
- on the unrolled body effectively re-orders stmts.  */
-  if (ncopies > 1
-  && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
-  && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
-  STMT_VINFO_MIN_NEG_DIST (stmt_info)))
-{
-  if (dump_enabled_p ())
-   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-"cannot perform implicit CSE when unrolling "
-"with negative dependence distance\n");
-  return false;
-}
-
   elem_type = TREE_TYPE (vectype);
   mode = TYPE_MODE (vectype);
 
@@ -9997,15 +9971,6 @@ vectorizable_load (vec_info *vinfo,
   first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
   group_size = DR_GROUP_SIZE (first_stmt_info);
 
-  /* Refuse non-SLP vectorization of SLP-only groups.  */
-  if (!slp && STMT_VINFO_SLP_VECT_ONLY (first_stmt_info))
-   {
- if (dump_enabled_p ())
-   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-"cannot vectorize load in non-SLP mode.\n");
- return false;
-   }
-
   /* Invalidate assumptions made by dependence analysis when vectorization
 on the unrolled body effectively re-orders stmts.  */
   if (STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
@@ -10031,7 +9996,7 @@ vectorizable_load (vec_info *vinfo,
   int maskload_elsval = 0;
   bool need_zeroing = false;
   if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, 
VLS_LOAD,
-   ncopies, &memory_access_type, &poffset,
+   1, &memory_access_type, &poffset,
&alignment_support_scheme, &misalignment, &gs_info,
&lanes_ifn, &elsvals))
 return false;
@@ -10046,8 +10011,7 @@ vectorizable_load (vec_info *vinfo,
 
   /* ???  The following checks should really be part of
  get_group_load_store_type.  */
-  if (slp
-  && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
+  if (SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
   && !((memory_access_type == VMAT_ELEMENTWISE
|| mem

[gcc r12-11229] tree-optimization/119534 - reject bogus emulated vectorized gather

2025-06-24 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:550edc99476376ee0350be90b9e61b337ffb0ff3

commit r12-11229-g550edc99476376ee0350be90b9e61b337ffb0ff3
Author: Richard Biener 
Date:   Tue Apr 1 14:13:03 2025 +0200

tree-optimization/119534 - reject bogus emulated vectorized gather

The following makes sure to reject the attempts to emulate a vector
gather when the discovered index vector type is a vector mask.

PR tree-optimization/119534
* tree-vect-stmts.cc (get_load_store_type): Reject
VECTOR_BOOLEAN_TYPE_P offset vector type for emulated gathers.

* gcc.dg/vect/pr119534.c: New testcase.

(cherry picked from commit d0cc14c62ad7403afcab3c2e38851d3ab179352f)

Diff:
---
 gcc/testsuite/gcc.dg/vect/pr119534.c | 11 +++
 gcc/tree-vect-stmts.cc   |  1 +
 2 files changed, 12 insertions(+)

diff --git a/gcc/testsuite/gcc.dg/vect/pr119534.c 
b/gcc/testsuite/gcc.dg/vect/pr119534.c
new file mode 100644
index ..0b4130b7cfaa
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr119534.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-mavx512bw" { target { x86_64-*-* i?86-*-* } } } */
+
+void f(int w, int *out, double *d)
+{
+  for (int j = 0; j < w; j++)
+{
+  const int i = (j >= w / 2);
+  out[j] += d[i];
+}
+}
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index b1ab4bce7d28..bc9f95cab970 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -2493,6 +2493,7 @@ get_load_store_type (vec_info  *vinfo, stmt_vec_info 
stmt_info,
  else if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant ()
   || !TYPE_VECTOR_SUBPARTS
 (gs_info->offset_vectype).is_constant ()
+  || VECTOR_BOOLEAN_TYPE_P (gs_info->offset_vectype)
   || !constant_multiple_p (TYPE_VECTOR_SUBPARTS
  (gs_info->offset_vectype),
TYPE_VECTOR_SUBPARTS (vectype)))


[gcc r12-11217] middle-end/101478 - ICE with degenerate address during gimplification

2025-06-24 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:05b347c5322a50195aa3ab0d06f2058f0ccee956

commit r12-11217-g05b347c5322a50195aa3ab0d06f2058f0ccee956
Author: Richard Biener 
Date:   Wed Jul 31 10:07:45 2024 +0200

middle-end/101478 - ICE with degenerate address during gimplification

When we gimplify &MEM[0B + 4] we are re-folding the address in case
types are not canonical which ends up with a constant address that
recompute_tree_invariant_for_addr_expr ICEs on.  Properly guard
that call.

PR middle-end/101478
* gimplify.cc (gimplify_addr_expr): Check we still have an
ADDR_EXPR before calling recompute_tree_invariant_for_addr_expr.

* gcc.dg/pr101478.c: New testcase.

(cherry picked from commit 33ead6400ad59d4b38fa0527a9a7b53a28114ab7)

Diff:
---
 gcc/gimplify.cc |  3 ++-
 gcc/testsuite/gcc.dg/pr101478.c | 11 +++
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/gcc/gimplify.cc b/gcc/gimplify.cc
index 55bc7f8624ed..a7b5a3883373 100644
--- a/gcc/gimplify.cc
+++ b/gcc/gimplify.cc
@@ -6518,7 +6518,8 @@ gimplify_addr_expr (tree *expr_p, gimple_seq *pre_p, 
gimple_seq *post_p)
*expr_p = build_fold_addr_expr (op0);
 
   /* Make sure TREE_CONSTANT and TREE_SIDE_EFFECTS are set properly.  */
-  recompute_tree_invariant_for_addr_expr (*expr_p);
+  if (TREE_CODE (*expr_p) == ADDR_EXPR)
+   recompute_tree_invariant_for_addr_expr (*expr_p);
 
   /* If we re-built the ADDR_EXPR add a conversion to the original type
  if required.  */
diff --git a/gcc/testsuite/gcc.dg/pr101478.c b/gcc/testsuite/gcc.dg/pr101478.c
new file mode 100644
index ..527620ea0f11
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr101478.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "" } */
+
+struct obj {
+  int n;
+  int l;
+};
+int main()
+{
+  (struct obj *)((char *)(__SIZE_TYPE__)({ 0; }) - (char *)&((struct obj 
*)0)->l);
+}


[gcc r12-11230] middle-end/119706 - allow POLY_INT_CST as is_gimple_mem_ref_addr

2025-06-24 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:75f255c11f7e5a5099ad909606e21ec6bf9b82cc

commit r12-11230-g75f255c11f7e5a5099ad909606e21ec6bf9b82cc
Author: Richard Biener 
Date:   Thu Apr 10 13:30:42 2025 +0200

middle-end/119706 - allow POLY_INT_CST as is_gimple_mem_ref_addr

We currently only INTEGER_CST, but not POLY_INT_CST, which leads
to the situation that when the POLY_INT_CST is only indrectly
present via a SSA def the IL is valid but when propagated it's not.
That's unsustainable.

PR middle-end/119706
* gimple-expr.cc (is_gimple_mem_ref_addr): Also allow
POLY_INT_CST.

(cherry picked from commit bf812c6ad83ec0b241bb3fecc7e68f883b6083df)

Diff:
---
 gcc/gimple-expr.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/gimple-expr.cc b/gcc/gimple-expr.cc
index 5faaf43eaf50..2ebcdac74136 100644
--- a/gcc/gimple-expr.cc
+++ b/gcc/gimple-expr.cc
@@ -843,7 +843,7 @@ bool
 is_gimple_mem_ref_addr (tree t)
 {
   return (is_gimple_reg (t)
- || TREE_CODE (t) == INTEGER_CST
+ || poly_int_tree_p (t)
  || (TREE_CODE (t) == ADDR_EXPR
  && (CONSTANT_CLASS_P (TREE_OPERAND (t, 0))
  || decl_address_invariant_p (TREE_OPERAND (t, 0);


[gcc r16-1661] gcn: Fix glc vs. sc0 handling for scalar memory access

2025-06-24 Thread Tobias Burnus via Gcc-cvs
https://gcc.gnu.org/g:750bc2899844d662aee93476f2da63fce68535d9

commit r16-1661-g750bc2899844d662aee93476f2da63fce68535d9
Author: Tobias Burnus 
Date:   Tue Jun 24 23:55:27 2025 +0200

gcn: Fix glc vs. sc0 handling for scalar memory access

gfx942 still uses glc for scalar access ('s_...') and only uses
sc0/nt/sc1 for vector access.

gcc/ChangeLog:

* config/gcn/gcn-opts.h (TARGET_GLC_NAME): Fix and extend the
description in the comment.
* config/gcn/gcn.cc (print_operand): Extend the comment about
'G' and 'g'.
* config/gcn/gcn.md: Use 'glc' instead of %G where appropriate.

Diff:
---
 gcc/config/gcn/gcn-opts.h |  7 +--
 gcc/config/gcn/gcn.cc |  2 ++
 gcc/config/gcn/gcn.md | 30 +++---
 3 files changed, 22 insertions(+), 17 deletions(-)

diff --git a/gcc/config/gcn/gcn-opts.h b/gcc/config/gcn/gcn-opts.h
index bcea14f3fe7a..0bfc7869eefe 100644
--- a/gcc/config/gcn/gcn-opts.h
+++ b/gcc/config/gcn/gcn-opts.h
@@ -84,8 +84,11 @@ enum hsaco_attr_type
 #define TARGET_DPP8 TARGET_RDNA2_PLUS
 /* Device requires CDNA1-style manually inserted wait states for AVGPRs.  */
 #define TARGET_AVGPR_CDNA1_NOPS TARGET_CDNA1
-/* Whether to use the 'globally coherent' (glc) or the 'scope' (sc0, sc1) flag
-   for scalar memory operations. The string starts on purpose with a space.  */
+/* Whether to use the 'globally coherent' (glc) or the 'scope' (sc0) flag
+   for non-scalar memory operations. The string starts on purpose with a space.
+   Note: for scalar memory operations (i.e. 's_...'), 'glc' is still used.
+   CDNA3 also uses 'nt' instead of 'slc' and 'sc1' instead of 'scc'; however,
+   there is no non-scalar user so far.  */
 #define TARGET_GLC_NAME (TARGET_CDNA3 ? " sc0" : " glc")
 /* The metadata on different devices need different granularity.  */
 #define TARGET_VGPR_GRANULARITY \
diff --git a/gcc/config/gcn/gcn.cc b/gcc/config/gcn/gcn.cc
index 2d8dfa3232e2..0ce5a29fbb57 100644
--- a/gcc/config/gcn/gcn.cc
+++ b/gcc/config/gcn/gcn.cc
@@ -7103,6 +7103,8 @@ print_operand_address (FILE *file, rtx mem)
O - print offset:n for data share operations.
G - print "glc" (or for gfx94x: sc0) unconditionally [+ indep. of regnum]
g - print "glc" (or for gfx94x: sc0), if appropriate for given MEM
+   NOTE: Do not use 'G' or 'g with scalar memory access ('s_...') as those
+   require "glc" also with gfx94x.
L - print low-part of a multi-reg value
H - print second part of a multi-reg value (high-part of 2-reg value)
J - print third part of a multi-reg value
diff --git a/gcc/config/gcn/gcn.md b/gcc/config/gcn/gcn.md
index 1998931e0529..2ce2e054fbf0 100644
--- a/gcc/config/gcn/gcn.md
+++ b/gcc/config/gcn/gcn.md
@@ -206,7 +206,7 @@
 ;   vdata: vgpr0-255
 ;   srsrc: sgpr0-102
 ;   soffset: sgpr0-102
-;   flags: offen, idxen, %G, lds, slc, tfe
+;   flags: offen, idxen, glc, lds, slc, tfe
 ;
 ; mtbuf - Typed memory buffer operation. Two words
 ;   offset: 12-bit constant
@@ -216,10 +216,10 @@
 ;   vdata: vgpr0-255
 ;   srsrc: sgpr0-102
 ;   soffset: sgpr0-102
-;   flags: offen, idxen, %G, lds, slc, tfe
+;   flags: offen, idxen, glc, lds, slc, tfe
 ;
 ; flat - flat or global memory operations
-;   flags: %G, slc
+;   flags: {CDNA3: sc0, nt, sc1 | otherwise: glc, slc, scc}
 ;   addr: vgpr0-255
 ;   data: vgpr0-255
 ;   vdst: vgpr0-255
@@ -1987,7 +1987,7 @@
(use (match_operand 3 "const_int_operand"))]
   "0 /* Disabled.  */"
   "@
-   s_atomic_\t%0, %1, %2 %G2\;s_waitcnt\tlgkmcnt(0)
+   s_atomic_\t%0, %1, %2 glc\;s_waitcnt\tlgkmcnt(0)
flat_atomic_\t%0, %1, %2 %G2\;s_waitcnt\t0
global_atomic_\t%0, %A1, %2%O1 %G2\;s_waitcnt\tvmcnt(0)"
   [(set_attr "type" "smem,flat,flat")
@@ -2054,7 +2054,7 @@
  UNSPECV_ATOMIC))]
   ""
   "@
-   s_atomic_cmpswap\t%0, %1, %2 %G2\;s_waitcnt\tlgkmcnt(0)
+   s_atomic_cmpswap\t%0, %1, %2 glc\;s_waitcnt\tlgkmcnt(0)
flat_atomic_cmpswap\t%0, %1, %2 %G2\;s_waitcnt\t0
global_atomic_cmpswap\t%0, %A1, %2%O1 %G2\;s_waitcnt\tvmcnt(0)"
   [(set_attr "type" "smem,flat,flat")
@@ -2096,7 +2096,7 @@
switch (which_alternative)
  {
  case 0:
-   return "s_load%o0\t%0, %A1 %G1\;s_waitcnt\tlgkmcnt(0)";
+   return "s_load%o0\t%0, %A1 glc\;s_waitcnt\tlgkmcnt(0)";
  case 1:
return (TARGET_RDNA2 /* Not GFX11.  */
? "flat_load%o0\t%0, %A1%O1 %G1 dlc\;s_waitcnt\t0"
@@ -2113,7 +2113,7 @@
switch (which_alternative)
  {
  case 0:
-   return "s_load%o0\t%0, %A1 %G1\;s_waitcnt\tlgkmcnt(0)\;"
+   return "s_load%o0\t%0, %A1 glc\;s_waitcnt\tlgkmcnt(0)\;"
   "s_dcache_wb_vol";
  case 1:
return (TARGET_RDNA2
@@ -2147,7 +2147,7 @@
switch (which_alternative)
  {
  case 0:
-   return "s_dcache_wb_vol\;s_load%o0\t%0, %A1 %G1\;

[gcc r12-11213] tree-optimization/111125 - avoid BB vectorization in novector loops

2025-06-24 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:b908ad2b836b761f7b27b8dc650422ce9a7efecd

commit r12-11213-gb908ad2b836b761f7b27b8dc650422ce9a7efecd
Author: Richard Biener 
Date:   Thu Aug 24 11:10:43 2023 +0200

tree-optimization/25 - avoid BB vectorization in novector loops

When a loop is marked with

  #pragma GCC novector

the following makes sure to also skip BB vectorization for contained
blocks.  That avoids gcc.dg/vect/bb-slp-29.c failing on aarch64
because of extra BB vectorization therein.  I'm not specifically
dealing with sub-loops of novector loops, the desired semantics
isn't documented.

PR tree-optimization/25
* tree-vect-slp.cc (vect_slp_function): Split at novector
loop entry, do not push blocks in novector loops.

(cherry picked from commit 43da77a4f1636280c4259402c9c2c543e6ec6c0b)

Diff:
---
 gcc/tree-vect-slp.cc | 41 +
 1 file changed, 29 insertions(+), 12 deletions(-)

diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 0462fa01020d..26fc94a661e3 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -6262,6 +6262,17 @@ vect_slp_function (function *fun)
 bbs[0]->loop_father->num, bb->index);
  split = true;
}
+  else if (!bbs.is_empty ()
+  && bb->loop_father->header == bb
+  && bb->loop_father->dont_vectorize)
+   {
+ if (dump_enabled_p ())
+   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+"splitting region at dont-vectorize loop %d "
+"entry at bb%d\n",
+bb->loop_father->num, bb->index);
+ split = true;
+   }
 
   if (split && !bbs.is_empty ())
{
@@ -6269,19 +6280,25 @@ vect_slp_function (function *fun)
  bbs.truncate (0);
}
 
-  /* We need to be able to insert at the head of the region which
-we cannot for region starting with a returns-twice call.  */
   if (bbs.is_empty ())
-   if (gcall *first = safe_dyn_cast  (first_stmt (bb)))
- if (gimple_call_flags (first) & ECF_RETURNS_TWICE)
-   {
- if (dump_enabled_p ())
-   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-"skipping bb%d as start of region as it "
-"starts with returns-twice call\n",
-bb->index);
- continue;
-   }
+   {
+ /* We need to be able to insert at the head of the region which
+we cannot for region starting with a returns-twice call.  */
+ if (gcall *first = safe_dyn_cast  (first_stmt (bb)))
+   if (gimple_call_flags (first) & ECF_RETURNS_TWICE)
+ {
+   if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+  "skipping bb%d as start of region as it "
+  "starts with returns-twice call\n",
+  bb->index);
+   continue;
+ }
+ /* If the loop this BB belongs to is marked as not to be vectorized
+honor that also for BB vectorization.  */
+ if (bb->loop_father->dont_vectorize)
+   continue;
+   }
 
   bbs.safe_push (bb);


[gcc(refs/users/meissner/heads/work212-cmodel)] Add ChangeLog.cmodel and update REVISION.

2025-06-24 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:2ce52d84bc94196d891714e09390b8eae8a1ffa0

commit 2ce52d84bc94196d891714e09390b8eae8a1ffa0
Author: Michael Meissner 
Date:   Tue Jun 24 12:04:12 2025 -0400

Add ChangeLog.cmodel and update REVISION.

2025-06-24  Michael Meissner  

gcc/

* ChangeLog.cmodel: New file for branch.
* REVISION: Update.

Diff:
---
 gcc/ChangeLog.cmodel | 14 ++
 gcc/REVISION |  2 +-
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog.cmodel b/gcc/ChangeLog.cmodel
new file mode 100644
index ..b1609e4321f3
--- /dev/null
+++ b/gcc/ChangeLog.cmodel
@@ -0,0 +1,14 @@
+ Branch work212-cmodel, baseline 
+
+2025-06-24   Michael Meissner  
+
+Add ChangeLog.cmodel and update REVISION.
+
+2025-06-24  Michael Meissner  
+
+gcc/
+
+   * ChangeLog.cmodel: New file for branch.
+   * REVISION: Update.
+
+   Clone branch
diff --git a/gcc/REVISION b/gcc/REVISION
index cb5771ab4fa7..08b2925fffe0 100644
--- a/gcc/REVISION
+++ b/gcc/REVISION
@@ -1 +1 @@
-work212 branch
+work212-cmodel branch


[gcc(refs/users/meissner/heads/work212-bugs)] Add ChangeLog.bugs and update REVISION.

2025-06-24 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:f62f924a42e053bd073d8d3a671b5d12ffb1829d

commit f62f924a42e053bd073d8d3a671b5d12ffb1829d
Author: Michael Meissner 
Date:   Tue Jun 24 12:02:56 2025 -0400

Add ChangeLog.bugs and update REVISION.

2025-06-24  Michael Meissner  

gcc/

* ChangeLog.bugs: New file for branch.
* REVISION: Update.

Diff:
---
 gcc/ChangeLog.bugs | 14 ++
 gcc/REVISION   |  2 +-
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog.bugs b/gcc/ChangeLog.bugs
new file mode 100644
index ..88436a7b674b
--- /dev/null
+++ b/gcc/ChangeLog.bugs
@@ -0,0 +1,14 @@
+ Branch work212-bugs, baseline 
+
+2025-06-24   Michael Meissner  
+
+Add ChangeLog.bugs and update REVISION.
+
+2025-06-24  Michael Meissner  
+
+gcc/
+
+   * ChangeLog.bugs: New file for branch.
+   * REVISION: Update.
+
+   Clone branch
diff --git a/gcc/REVISION b/gcc/REVISION
index cb5771ab4fa7..308e66a07c33 100644
--- a/gcc/REVISION
+++ b/gcc/REVISION
@@ -1 +1 @@
-work212 branch
+work212-bugs branch


[gcc] Created branch 'meissner/heads/work212-cmodel' in namespace 'refs/users'

2025-06-24 Thread Michael Meissner via Gcc-cvs
The branch 'meissner/heads/work212-cmodel' was created in namespace 
'refs/users' pointing to:

 09be5ec3d304... Add ChangeLog.meissner and REVISION.


[gcc] Created branch 'meissner/heads/work212-sha' in namespace 'refs/users'

2025-06-24 Thread Michael Meissner via Gcc-cvs
The branch 'meissner/heads/work212-sha' was created in namespace 'refs/users' 
pointing to:

 09be5ec3d304... Add ChangeLog.meissner and REVISION.


[gcc] Created branch 'meissner/heads/work212-test' in namespace 'refs/users'

2025-06-24 Thread Michael Meissner via Gcc-cvs
The branch 'meissner/heads/work212-test' was created in namespace 'refs/users' 
pointing to:

 09be5ec3d304... Add ChangeLog.meissner and REVISION.


[gcc(refs/users/meissner/heads/work212-sha)] Add ChangeLog.sha and update REVISION.

2025-06-24 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:2e4af1056a2311f743e392483ce271168afeb078

commit 2e4af1056a2311f743e392483ce271168afeb078
Author: Michael Meissner 
Date:   Tue Jun 24 12:07:55 2025 -0400

Add ChangeLog.sha and update REVISION.

2025-06-24  Michael Meissner  

gcc/

* ChangeLog.sha: New file for branch.
* REVISION: Update.

Diff:
---
 gcc/ChangeLog.sha | 14 ++
 gcc/REVISION  |  2 +-
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog.sha b/gcc/ChangeLog.sha
new file mode 100644
index ..58ea95619888
--- /dev/null
+++ b/gcc/ChangeLog.sha
@@ -0,0 +1,14 @@
+ Branch work212-sha, baseline 
+
+2025-06-24   Michael Meissner  
+
+Add ChangeLog.sha and update REVISION.
+
+2025-06-24  Michael Meissner  
+
+gcc/
+
+   * ChangeLog.sha: New file for branch.
+   * REVISION: Update.
+
+   Clone branch
diff --git a/gcc/REVISION b/gcc/REVISION
index cb5771ab4fa7..dd012b1d103e 100644
--- a/gcc/REVISION
+++ b/gcc/REVISION
@@ -1 +1 @@
-work212 branch
+work212-sha branch


[gcc] Created branch 'meissner/heads/work212-submit' in namespace 'refs/users'

2025-06-24 Thread Michael Meissner via Gcc-cvs
The branch 'meissner/heads/work212-submit' was created in namespace 
'refs/users' pointing to:

 f2cc1f39200e... Add REVISION.


[gcc] Created branch 'meissner/heads/work212-libs' in namespace 'refs/users'

2025-06-24 Thread Michael Meissner via Gcc-cvs
The branch 'meissner/heads/work212-libs' was created in namespace 'refs/users' 
pointing to:

 09be5ec3d304... Add ChangeLog.meissner and REVISION.


[gcc(refs/users/meissner/heads/work212-test)] Add ChangeLog.test and update REVISION.

2025-06-24 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:a9b578441e87a0443ded42838bd5536934cb25c7

commit a9b578441e87a0443ded42838bd5536934cb25c7
Author: Michael Meissner 
Date:   Tue Jun 24 12:09:07 2025 -0400

Add ChangeLog.test and update REVISION.

2025-06-24  Michael Meissner  

gcc/

* ChangeLog.test: New file for branch.
* REVISION: Update.

Diff:
---
 gcc/ChangeLog.test | 14 ++
 gcc/REVISION   |  2 +-
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog.test b/gcc/ChangeLog.test
new file mode 100644
index ..bc4f2aafa995
--- /dev/null
+++ b/gcc/ChangeLog.test
@@ -0,0 +1,14 @@
+ Branch work212-test, baseline 
+
+2025-06-24   Michael Meissner  
+
+Add ChangeLog.test and update REVISION.
+
+2025-06-24  Michael Meissner  
+
+gcc/
+
+   * ChangeLog.test: New file for branch.
+   * REVISION: Update.
+
+   Clone branch
diff --git a/gcc/REVISION b/gcc/REVISION
index cb5771ab4fa7..b15b6e0aec15 100644
--- a/gcc/REVISION
+++ b/gcc/REVISION
@@ -1 +1 @@
-work212 branch
+work212-test branch


[gcc r16-1656] i386: Convert LEA stack adjust insn to SUB when FLAGS_REG is dead

2025-06-24 Thread Uros Bizjak via Gcc-cvs
https://gcc.gnu.org/g:8f5fac56ca39e99d7a9ad6a0c067c75e6ffcd2cf

commit r16-1656-g8f5fac56ca39e99d7a9ad6a0c067c75e6ffcd2cf
Author: Uros Bizjak 
Date:   Tue Jun 24 11:02:02 2025 +0200

i386: Convert LEA stack adjust insn to SUB when FLAGS_REG is dead

ADD/SUB is faster than LEA for most processors. Also, there are
several peephole2 patterns available that convert prologue esp
subtractions to pushes (at the end of i386.md). These process only
patterns with flags reg clobber, so they are ineffective
with clobber-less stack ptr adjustments, introduced by r16-1551
("x86: Enable separate shrink wrapping").

Introduce a peephole2 pattern that adds a clobber to a clobber-less
stack ptr adjustments when FLAGS_REG is dead.

gcc/ChangeLog:

* config/i386/i386.md
(@pro_epilogue_adjust_stack_add_nocc): Add type attribute.
(pro_epilogue_adjust_stack_add_nocc peephole2 pattern):
Convert pro_epilogue_adjust_stack_add_nocc variant to
pro_epilogue_adjust_stack_add when FLAGS_REG is dead.

Diff:
---
 gcc/config/i386/i386.md | 24 +---
 1 file changed, 21 insertions(+), 3 deletions(-)

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 423ef48e518f..41a86544bbf7 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -27449,7 +27449,7 @@
(cond [(and (eq_attr "alternative" "0")
(not (match_test "TARGET_OPT_AGU")))
 (const_string "alu")
-  (match_operand: 2 "const0_operand")
+  (match_operand 2 "const0_operand")
 (const_string "imov")
  ]
  (const_string "lea")))
@@ -27470,7 +27470,7 @@
(clobber (mem:BLK (scratch)))]
   ""
 {
-  if (operands[2] == CONST0_RTX (mode))
+  if (get_attr_type (insn) == TYPE_IMOV)
 return "mov{}\t{%1, %0|%0, %1}";
   else
 {
@@ -27478,13 +27478,31 @@
   return "lea{}\t{%E2, %0|%0, %E2}";
 }
 }
-  [(set (attr "length_immediate")
+  [(set (attr "type")
+   (cond [(match_operand 2 "const0_operand")
+(const_string "imov")
+ ]
+ (const_string "lea")))
+   (set (attr "length_immediate")
(cond [(eq_attr "type" "imov")
 (const_string "0")
  ]
  (const_string "*")))
(set_attr "mode" "")])
 
+(define_peephole2
+  [(parallel
+ [(set (match_operand:P 0 "register_operand")
+  (plus:P (match_dup 0)
+  (match_operand:P 1 "")))
+  (clobber (mem:BLK (scratch)))])]
+  "peep2_regno_dead_p (0, FLAGS_REG)"
+  [(parallel
+ [(set (match_dup 0)
+  (plus:P (match_dup 0) (match_dup 1)))
+  (clobber (reg:CC FLAGS_REG))
+  (clobber (mem:BLK (scratch)))])])
+
 (define_insn "@pro_epilogue_adjust_stack_sub_"
   [(set (match_operand:P 0 "register_operand" "=r")
(minus:P (match_operand:P 1 "register_operand" "0")


[gcc(refs/users/meissner/heads/work212-orig)] Add REVISION.

2025-06-24 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:f2cc1f39200ec924fcce6aaff29743f192032708

commit f2cc1f39200ec924fcce6aaff29743f192032708
Author: Michael Meissner 
Date:   Tue Jun 24 12:01:49 2025 -0400

Add REVISION.

2025-06-24  Michael Meissner  

gcc/

* REVISION: New file for branch.

Diff:
---
 gcc/REVISION | 1 +
 1 file changed, 1 insertion(+)

diff --git a/gcc/REVISION b/gcc/REVISION
new file mode 100644
index ..731dbab77c31
--- /dev/null
+++ b/gcc/REVISION
@@ -0,0 +1 @@
+work212-orig branch


[gcc(refs/users/meissner/heads/work212-submit)] Add ChangeLog.submit and update REVISION.

2025-06-24 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:a17d97335c37fe49fdce2d4abdf4dfe0986c31d0

commit a17d97335c37fe49fdce2d4abdf4dfe0986c31d0
Author: Michael Meissner 
Date:   Tue Jun 24 12:12:02 2025 -0400

Add ChangeLog.submit and update REVISION.

2025-06-24  Michael Meissner  

gcc/

* ChangeLog.submit: New file for branch.
* REVISION: Update.

Diff:
---
 gcc/ChangeLog.submit | 14 ++
 gcc/REVISION |  2 +-
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog.submit b/gcc/ChangeLog.submit
new file mode 100644
index ..0337e6f3cd35
--- /dev/null
+++ b/gcc/ChangeLog.submit
@@ -0,0 +1,14 @@
+ Branch work212-submit, baseline 
+
+2025-06-24   Michael Meissner  
+
+Add ChangeLog.submit and update REVISION.
+
+2025-06-24  Michael Meissner  
+
+gcc/
+
+   * ChangeLog.submit: New file for branch.
+   * REVISION: Update.
+
+   Clone branch
diff --git a/gcc/REVISION b/gcc/REVISION
index 731dbab77c31..e6a46b512581 100644
--- a/gcc/REVISION
+++ b/gcc/REVISION
@@ -1 +1 @@
-work212-orig branch
+work212-submit branch


[gcc r16-1659] RISC-V: Add patterns for vector-scalar multiply-(subtract-)accumulate [PR119100]

2025-06-24 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:92e1893e0155b6b3baef2a935efd5936d23a67ea

commit r16-1659-g92e1893e0155b6b3baef2a935efd5936d23a67ea
Author: Paul-Antoine Arras 
Date:   Tue Jun 24 15:42:50 2025 -0600

RISC-V: Add patterns for vector-scalar multiply-(subtract-)accumulate 
[PR119100]

This pattern enables the combine pass (or late-combine, depending on the 
case)
to merge a vec_duplicate into a plus-mult or minus-mult RTL instruction.

Before this patch, we have two instructions, e.g.:
  vfmv.v.f   v6,fa0
  vfmacc.vv  v2,v6,v4

After, we get only one:
  vfmacc.vf  v2,fa0,v4

PR target/119100

gcc/ChangeLog:

* config/riscv/autovec-opt.md (*_vf_): Handle both add 
and
acc FMA variants.
* config/riscv/vector.md (*pred_mul__scalar_undef): 
New.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f16.c: Add vfmacc and 
vfmsac.
* gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f32.c: Likewise.
* gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f64.c: Likewise.
* gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f16.c: Likewise.
* gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f32.c: Likewise.
* gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f64.c: Likewise.
* gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f16.c: Likewise.
* gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f32.c: Likewise.
* gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f64.c: Likewise.
* gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f16.c: Likewise.
* gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f32.c: Likewise.
* gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f64.c: Likewise.
* gcc.target/riscv/rvv/autovec/vx_vf/vf_mulop.h: Add support for acc
variants.
* gcc.target/riscv/rvv/autovec/vx_vf/vf_mulop_run.h: Likewise.
* gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmadd-run-1-f16.c: Define
TEST_OUT.
* gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmadd-run-1-f32.c: 
Likewise.
* gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmadd-run-1-f64.c: 
Likewise.
* gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmsub-run-1-f16.c: 
Likewise.
* gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmsub-run-1-f32.c: 
Likewise.
* gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmsub-run-1-f64.c: 
Likewise.
* gcc.target/riscv/rvv/autovec/vx_vf/vf_vfnmadd-run-1-f16.c: 
Likewise.
* gcc.target/riscv/rvv/autovec/vx_vf/vf_vfnmadd-run-1-f32.c: 
Likewise.
* gcc.target/riscv/rvv/autovec/vx_vf/vf_vfnmadd-run-1-f64.c: 
Likewise.
* gcc.target/riscv/rvv/autovec/vx_vf/vf_vfnmsub-run-1-f16.c: 
Likewise.
* gcc.target/riscv/rvv/autovec/vx_vf/vf_vfnmsub-run-1-f32.c: 
Likewise.
* gcc.target/riscv/rvv/autovec/vx_vf/vf_vfnmsub-run-1-f64.c: 
Likewise.
* gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmacc-run-1-f16.c: New 
test.
* gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmacc-run-1-f32.c: New 
test.
* gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmacc-run-1-f64.c: New 
test.
* gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmsac-run-1-f16.c: New 
test.
* gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmsac-run-1-f32.c: New 
test.
* gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmsac-run-1-f64.c: New 
test.

Diff:
---
 gcc/config/riscv/autovec-opt.md| 14 +++--
 gcc/config/riscv/vector.md | 37 +++-
 .../gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f16.c  |  4 ++
 .../gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f32.c  |  4 ++
 .../gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f64.c  |  4 ++
 .../gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f16.c  |  9 +--
 .../gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f32.c  |  9 +--
 .../gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f64.c  |  9 +--
 .../gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f16.c  |  4 ++
 .../gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f32.c  |  4 ++
 .../gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f64.c  |  4 ++
 .../gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f16.c  |  9 +--
 .../gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f32.c  |  9 +--
 .../gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f64.c  |  9 +--
 .../gcc.target/riscv/rvv/autovec/vx_vf/vf_mulop.h  | 66 ++
 .../riscv/rvv/autovec/vx_vf/vf_mulop_run.h |  8 +--
 .../riscv/rvv/autovec/vx_vf/vf_vfmacc-run-1-f16.c  | 16 ++
 .../riscv/rvv/autovec/vx_vf/vf_vfmacc-run-1-f32.c  | 16 ++
 .../riscv/rvv/autovec/vx_vf/vf_vfmacc-run-1-f64.c  | 16 ++
 .../riscv/rvv/autovec/vx_vf/vf_vfmadd-run-1-f16.c  |  1 +
 .../riscv/rvv/autovec/vx_vf/vf_vfmadd-run-1-f32.c  |  1 +
 .../riscv/rvv/autovec/vx_vf/vf_vfmadd-run-1-f64.c  |  1 +
 .../riscv/rvv/autovec/vx_vf/vf_vfmsac-run-1-f16.c  | 16 ++
 .../riscv/rvv/autovec/vx_vf/vf_vfmsac-run-1-f32.c  | 16 ++
 .../riscv/rvv/autovec/vx_vf/vf

[gcc r12-11220] ipa/111245 - bogus modref analysis for store in call that might throw

2025-06-24 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:83f764a9ac925d479ad3fee8c44e6053adb3475a

commit r12-11220-g83f764a9ac925d479ad3fee8c44e6053adb3475a
Author: Richard Biener 
Date:   Fri Feb 28 11:44:26 2025 +0100

ipa/111245 - bogus modref analysis for store in call that might throw

We currently record a kill for

  *x_4(D) = always_throws ();

because we consider the store always executing since the appropriate
check for whether the stmt could throw is guarded by
!cfun->can_throw_non_call_exceptions.

PR ipa/111245
* ipa-modref.cc (modref_access_analysis::analyze_store): Do
not guard the check of whether the stmt could throw by
cfun->can_throw_non_call_exceptions.

* g++.dg/torture/pr111245.C: New testcase.

(cherry picked from commit e6037af6d5e5a43c437257580d75bc8b35a6dcfd)

Diff:
---
 gcc/ipa-modref.cc   |  3 +--
 gcc/testsuite/g++.dg/torture/pr111245.C | 23 +++
 2 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/gcc/ipa-modref.cc b/gcc/ipa-modref.cc
index ba7f21834ef9..5648409d60e8 100644
--- a/gcc/ipa-modref.cc
+++ b/gcc/ipa-modref.cc
@@ -1745,8 +1745,7 @@ modref_access_analysis::analyze_store (gimple *stmt, 
tree, tree op, void *data)
 t->record_access_lto (t->m_summary_lto->stores, &r, a);
   if (t->m_always_executed
   && a.useful_for_kill_p ()
-  && (!cfun->can_throw_non_call_exceptions
- || !stmt_could_throw_p (cfun, stmt)))
+  && !stmt_could_throw_p (cfun, stmt))
 {
   if (dump_file)
fprintf (dump_file, "   - Recording kill\n");
diff --git a/gcc/testsuite/g++.dg/torture/pr111245.C 
b/gcc/testsuite/g++.dg/torture/pr111245.C
new file mode 100644
index ..785f4a51761d
--- /dev/null
+++ b/gcc/testsuite/g++.dg/torture/pr111245.C
@@ -0,0 +1,23 @@
+/* { dg-do run } */
+
+struct Int {
+  int value;
+};
+
+__attribute__((noipa)) Int always_throws() { throw 123; }
+
+void foo(Int &x) {
+  try {
+x = always_throws();
+  } catch (...) {
+  }
+}
+
+int main()
+{
+  Int x;
+  x.value = 5;
+  foo(x);
+  if (x.value != 5)
+__builtin_abort ();
+}


[gcc r12-11225] tree-optimization/117113 - ICE with unroll-and-jam

2025-06-24 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:f4dbdeabb2944d014d506a537a576a6f9a1f4c1f

commit r12-11225-gf4dbdeabb2944d014d506a537a576a6f9a1f4c1f
Author: Richard Biener 
Date:   Mon Feb 3 15:12:52 2025 +0100

tree-optimization/117113 - ICE with unroll-and-jam

When there's an inner loop without virtual header PHI but the outer
loop has one the fusion process cannot handle the need to create
an inner loop virtual header PHI.  Punt in this case.

PR tree-optimization/117113
* gimple-loop-jam.cc (unroll_jam_possible_p): Detect when
we cannot handle virtual SSA update.

* gcc.dg/torture/pr117113.c: New testcase.

(cherry picked from commit 0675eb17480bada678bf2769d39732027abcd6d0)

Diff:
---
 gcc/gimple-loop-jam.cc  | 12 +++-
 gcc/testsuite/gcc.dg/torture/pr117113.c | 20 
 2 files changed, 31 insertions(+), 1 deletion(-)

diff --git a/gcc/gimple-loop-jam.cc b/gcc/gimple-loop-jam.cc
index e33dd9091df3..22d518d9fb0d 100644
--- a/gcc/gimple-loop-jam.cc
+++ b/gcc/gimple-loop-jam.cc
@@ -278,13 +278,17 @@ unroll_jam_possible_p (class loop *outer, class loop 
*loop)
  body would be the after-iter value of the first body) if it's over
  an associative and commutative operation.  We wouldn't
  be able to handle unknown cycles.  */
+  bool inner_vdef = false;
   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
 {
   affine_iv iv;
   tree op = gimple_phi_result (psi.phi ());
 
   if (virtual_operand_p (op))
-   continue;
+   {
+ inner_vdef = true;
+ continue;
+   }
   if (!simple_iv (loop, loop, op, &iv, true))
return false;
   /* The inductions must be regular, loop invariant step and initial
@@ -300,6 +304,12 @@ unroll_jam_possible_p (class loop *outer, class loop *loop)
 copy, _not_ the next value of the second body.  */
 }
 
+  /* When there's no inner loop virtual PHI IV we cannot handle the update
+ required to the inner loop if that doesn't already have one.  See
+ PR117113.  */
+  if (!inner_vdef && get_virtual_phi (outer->header))
+return false;
+
   return true;
 }
 
diff --git a/gcc/testsuite/gcc.dg/torture/pr117113.c 
b/gcc/testsuite/gcc.dg/torture/pr117113.c
new file mode 100644
index ..e90ad034a4d3
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/pr117113.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-fno-tree-dce -fno-inline" } */
+
+int a, b, c;
+volatile int d[1];
+void e() {}
+void f(int g) {}
+int main() {
+  int i;
+  for (; b; b--) {
+for (i = 0; i < 3; i++) {
+  e();
+  f(d[0]);
+  d[0];
+}
+if (a)
+  c++;
+  }
+  return 0;
+}


[gcc r16-1652] gcc: remove atan from edom_only_function

2025-06-24 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:0606d2b979f4014b1dd6a1e6b030630ec5586fd3

commit r16-1652-g0606d2b979f4014b1dd6a1e6b030630ec5586fd3
Author: Yuao Ma 
Date:   Tue Jun 24 00:06:16 2025 +0800

gcc: remove atan from edom_only_function

According to the man page, atan does not produce an error. According to the 
C23
standard draft (N3088), a range error occurs for atan if a nonzero x is too
close to zero. Neither of them mentions that atan will result in a domain 
error.

gcc/ChangeLog:

* tree-call-cdce.cc (edom_only_function): Remove atan.

Signed-off-by: Yuao Ma 

Diff:
---
 gcc/tree-call-cdce.cc | 2 --
 1 file changed, 2 deletions(-)

diff --git a/gcc/tree-call-cdce.cc b/gcc/tree-call-cdce.cc
index 9ca5fda5126d..712ec7c8920d 100644
--- a/gcc/tree-call-cdce.cc
+++ b/gcc/tree-call-cdce.cc
@@ -353,8 +353,6 @@ edom_only_function (gcall *call)
 CASE_FLT_FN_FLOATN_NX (BUILT_IN_ACOS):
 CASE_FLT_FN (BUILT_IN_ASIN):
 CASE_FLT_FN_FLOATN_NX (BUILT_IN_ASIN):
-CASE_FLT_FN (BUILT_IN_ATAN):
-CASE_FLT_FN_FLOATN_NX (BUILT_IN_ATAN):
 CASE_FLT_FN (BUILT_IN_COS):
 CASE_FLT_FN_FLOATN_NX (BUILT_IN_COS):
 CASE_FLT_FN (BUILT_IN_SIGNIFICAND):


[gcc r16-1654] diagnostic: fix for older version of GCC

2025-06-24 Thread Marc Poulhies via Gcc-cvs
https://gcc.gnu.org/g:3f1986766c6efcd0f444902571b7a58f015267c5

commit r16-1654-g3f1986766c6efcd0f444902571b7a58f015267c5
Author: Marc Poulhiès 
Date:   Tue Jun 24 15:12:30 2025 +0200

diagnostic: fix for older version of GCC

Having both an enum and a variable with the same name triggers an error with
gcc 5.

gcc/ChangeLog:
* diagnostic-state-to-dot.cc (get_color_for_dynalloc_state):
Rename argument dynalloc_state to dynalloc_st.
(add_title_tr): Rename argument style to styl.
(on_xml_node): Rename local variable dynalloc_state to dynalloc_st.

Diff:
---
 gcc/diagnostic-state-to-dot.cc | 16 
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/gcc/diagnostic-state-to-dot.cc b/gcc/diagnostic-state-to-dot.cc
index b6d7ec5a082b..ddae83b85cd2 100644
--- a/gcc/diagnostic-state-to-dot.cc
+++ b/gcc/diagnostic-state-to-dot.cc
@@ -51,9 +51,9 @@ enum class dynalloc_state
 };
 
 static const char *
-get_color_for_dynalloc_state (enum dynalloc_state dynalloc_state)
+get_color_for_dynalloc_state (enum dynalloc_state dynalloc_st)
 {
-  switch (dynalloc_state)
+  switch (dynalloc_st)
 {
 default:
   gcc_unreachable ();
@@ -242,7 +242,7 @@ private:
int num_columns,
const xml::element &input_element,
std::string heading,
-   enum style style,
+   enum style styl,
enum dynalloc_state dynalloc_state)
   {
 xp.push_tag ("tr", true);
@@ -258,7 +258,7 @@ private:
color = "white";
   }
 else
-  switch (style)
+  switch (styl)
{
default:
  gcc_unreachable ();
@@ -323,12 +323,12 @@ private:
 else if (input_element->m_kind == "heap-buffer")
   {
const char *extents = input_element->get_attr ("dynamic-extents");
-   enum dynalloc_state dynalloc_state = get_dynalloc_state 
(*input_element);
+   enum dynalloc_state dynalloc_st = get_dynalloc_state (*input_element);
if (auto region_id = input_element->get_attr ("region_id"))
-   m_region_id_to_dynalloc_state[region_id] = dynalloc_state;
+   m_region_id_to_dynalloc_state[region_id] = dynalloc_st;
const char *type = input_element->get_attr ("type");
pretty_printer pp;
-   switch (dynalloc_state)
+   switch (dynalloc_st)
  {
  default:
gcc_unreachable ();
@@ -375,7 +375,7 @@ private:
add_title_tr (id_of_node, xp, num_columns, *input_element,
  pp_formatted_text (&pp),
  style::h2,
- dynalloc_state);
+ dynalloc_st);
   }
 else
   {


[gcc r16-1658] Fortran: fix ICE in verify_gimple_in_seq with substrings [PR120743]

2025-06-24 Thread Harald Anlauf via Gcc-cvs
https://gcc.gnu.org/g:5bc92717b804483a17dd5095f8b6d4fd75a472b1

commit r16-1658-g5bc92717b804483a17dd5095f8b6d4fd75a472b1
Author: Harald Anlauf 
Date:   Tue Jun 24 20:46:38 2025 +0200

Fortran: fix ICE in verify_gimple_in_seq with substrings [PR120743]

PR fortran/120743

gcc/fortran/ChangeLog:

* trans-expr.cc (gfc_conv_substring): Substring indices are of
type gfc_charlen_type_node.  Convert to size_type_node for
pointer arithmetic only after offset adjustments have been made.

gcc/testsuite/ChangeLog:

* gfortran.dg/pr120743.f90: New test.

Co-authored-by: Jerry DeLisle 
Co-authored-by: Mikael Morin 

Diff:
---
 gcc/fortran/trans-expr.cc  |  5 +++--
 gcc/testsuite/gfortran.dg/pr120743.f90 | 38 ++
 2 files changed, 41 insertions(+), 2 deletions(-)

diff --git a/gcc/fortran/trans-expr.cc b/gcc/fortran/trans-expr.cc
index c8a207609e4b..3e0d763d2fb0 100644
--- a/gcc/fortran/trans-expr.cc
+++ b/gcc/fortran/trans-expr.cc
@@ -2800,8 +2800,9 @@ gfc_conv_substring (gfc_se * se, gfc_ref * ref, int kind,
   else if (POINTER_TYPE_P (TREE_TYPE (tmp)))
{
  tree diff;
- diff = fold_build2 (MINUS_EXPR, size_type_node, start.expr,
- build_one_cst (size_type_node));
+ diff = fold_build2 (MINUS_EXPR, gfc_charlen_type_node, start.expr,
+ build_one_cst (gfc_charlen_type_node));
+ diff = fold_convert (size_type_node, diff);
  se->expr
= fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (tmp), tmp, diff);
}
diff --git a/gcc/testsuite/gfortran.dg/pr120743.f90 
b/gcc/testsuite/gfortran.dg/pr120743.f90
new file mode 100644
index ..8682d0c8859e
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/pr120743.f90
@@ -0,0 +1,38 @@
+! { dg-do compile }
+! PR fortran/120743 - ICE in verify_gimple_in_seq with substrings
+!
+! Testcase as reduced by Jerry DeLisle 
+
+module what
+  implicit none
+  CHARACTER(LEN=:), ALLOCATABLE :: attrlist
+contains
+  SUBROUTINE get_c_attr ( attrname, attrval_c )
+!
+! returns attrval_c='' if not found
+!
+IMPLICIT NONE
+CHARACTER(LEN=*), INTENT(IN) :: attrname
+CHARACTER(LEN=*), INTENT(OUT) :: attrval_c
+!
+CHARACTER(LEN=1) :: quote
+INTEGER :: j0, j1
+LOGICAL :: found
+!
+! search for attribute name in attrlist: attr1="val1" attr2="val2" ...
+!
+attrval_c = ''
+if ( .not. allocated(attrlist) ) return
+if ( len_trim(attrlist) < 1 ) return
+!
+j0 = 1
+do while ( j0 < len_trim(attrlist) )
+   ! locate = and first quote
+   j1 = index ( attrlist(j0:), '=' )
+   quote = attrlist(j0+j1:j0+j1)
+   ! next line: something is not right
+   if ( quote /= '"' .and. quote /= "'" ) return
+end do
+!
+  END SUBROUTINE get_c_attr
+end module what


[gcc r16-1651] s390: Fix float vector extract for pre-z13

2025-06-24 Thread Juergen Christ via Gcc-cvs
https://gcc.gnu.org/g:bd9cac12373aecaa0ebee808f805c617f2c15375

commit r16-1651-gbd9cac12373aecaa0ebee808f805c617f2c15375
Author: Juergen Christ 
Date:   Wed Jun 18 15:16:28 2025 +0200

s390: Fix float vector extract for pre-z13

Also provide the vec_extract patterns for floats on pre-z13 machines
to prevent ICEing in those cases.

gcc/ChangeLog:

* config/s390/vector.md (VF): Don't restrict modes.
(VEC_SET_SINGLEFLOAT): Ditto.

gcc/testsuite/ChangeLog:

* gcc.target/s390/vector/vec-extract-1.c: Fix test on arch11.
* gcc.target/s390/vector/vec-set-1.c: Run test on arch11.
* gcc.target/s390/vector/vec-extract-2.c: New test.

Signed-off-by: Juergen Christ 

Diff:
---
 gcc/config/s390/vector.md  |   4 +-
 .../gcc.target/s390/vector/vec-extract-1.c |  16 +-
 .../gcc.target/s390/vector/vec-extract-2.c | 168 +
 gcc/testsuite/gcc.target/s390/vector/vec-set-1.c   |  23 ++-
 4 files changed, 187 insertions(+), 24 deletions(-)

diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md
index 6f4e1929eb80..7251a76c3aea 100644
--- a/gcc/config/s390/vector.md
+++ b/gcc/config/s390/vector.md
@@ -75,7 +75,7 @@
   V1DF V2DF
   (V1TF "TARGET_VXE") (TF "TARGET_VXE")])
 
-(define_mode_iterator VF [(V2SF "TARGET_VXE") (V4SF "TARGET_VXE") V2DF])
+(define_mode_iterator VF [V2SF V4SF V2DF])
 
 ; All modes present in V_HW1 and VFT.
 (define_mode_iterator V_HW1_FT [V16QI V8HI V4SI V2DI V1TI V1DF
@@ -512,7 +512,7 @@
 (define_mode_iterator VEC_SET_NONFLOAT
   [V1QI V2QI V4QI V8QI V16QI V1HI V2HI V4HI V8HI V1SI V2SI V4SI V1DI V2DI V2SF 
V4SF])
 ; Iterator for single element float vectors
-(define_mode_iterator VEC_SET_SINGLEFLOAT [(V1SF "TARGET_VXE") V1DF (V1TF 
"TARGET_VXE")])
+(define_mode_iterator VEC_SET_SINGLEFLOAT [V1SF V1DF (V1TF "TARGET_VXE")])
 
 ; FIXME: Support also vector mode operands for 1
 ; FIXME: A target memory operand seems to be useful otherwise we end
diff --git a/gcc/testsuite/gcc.target/s390/vector/vec-extract-1.c 
b/gcc/testsuite/gcc.target/s390/vector/vec-extract-1.c
index 9df7909a3ea8..83af839963be 100644
--- a/gcc/testsuite/gcc.target/s390/vector/vec-extract-1.c
+++ b/gcc/testsuite/gcc.target/s390/vector/vec-extract-1.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -march=z14 -mzarch" } */
+/* { dg-options "-O2 -march=arch11 -mzarch" } */
 /* { dg-final { check-function-bodies "**" "" } } */
 
 typedef double V2DF __attribute__((vector_size(16)));
@@ -110,17 +110,6 @@ extractnthfloat (V4SF x, int n)
   return x[n];
 }
 
-/*
-** sumfirstfloat:
-** vfasb   %v0,%v24,%v26
-** br  %r14
-*/
-float
-sumfirstfloat (V4SF x, V4SF y)
-{
-  return (x + y)[0];
-}
-
 /*
 ** extractfirst2:
 ** vlr %v0,%v24
@@ -179,8 +168,7 @@ extractsingled (V1DF x)
 
 /*
 ** extractsingleld:
-** vlr (%v.),%v24
-** vst \1,0\(%r2\),3
+** vst %v24,0\(%r2\),3
 ** br  %r14
 */
 long double
diff --git a/gcc/testsuite/gcc.target/s390/vector/vec-extract-2.c 
b/gcc/testsuite/gcc.target/s390/vector/vec-extract-2.c
new file mode 100644
index ..640ac0c8c766
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/vector/vec-extract-2.c
@@ -0,0 +1,168 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=arch11 -mzarch" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+typedef double V2DF __attribute__((vector_size(16)));
+typedef float V4SF __attribute__((vector_size(16)));
+typedef float V2SF __attribute__((vector_size(8)));
+typedef double V1DF __attribute__((vector_size(8)));
+typedef float V1SF __attribute__((vector_size(4)));
+typedef long double V1TF __attribute__((vector_size(16)));
+
+/*
+** extractfirstdouble:
+** vsteg   %v24,0\(%r2\),0
+** br  %r14
+*/
+void
+extractfirstdouble (double *res, V2DF x)
+{
+  *res = x[0];
+}
+
+/*
+** extractseconddouble:
+** vsteg   %v24,0\(%r2\),1
+** br  %r14
+*/
+void
+extractseconddouble (double *res, V2DF x)
+{
+  *res = x[1];
+}
+
+/*
+** extractnthdouble:
+** vlgvg   (%r.),%v24,0\(%r3\)
+** stg \1,0\(%r2\)
+** br  %r14
+*/
+void
+extractnthdouble (double *res, V2DF x, int n)
+{
+  *res = x[n];
+}
+
+/*
+** extractfirstfloat:
+** vstef   %v24,0\(%r2\),0
+** br  %r14
+*/
+void
+extractfirstfloat (float *res, V4SF x)
+{
+  *res = x[0];
+}
+
+/*
+** extractsecondfloat:
+** vstef   %v24,0\(%r2\),1
+** br  %r14
+*/
+void
+extractsecondfloat (float *res, V4SF x)
+{
+  *res = x[1];
+}
+
+/*
+** extractthirdfloat:
+** vstef   %v24,0\(%r2\),2
+** br  %r14
+*/
+void
+extractthirdfloat (float *res, V4SF x)
+{
+  *res = x[2];
+}
+
+/*
+** extractfourthfloat:
+** vstef   %v24,0\(%r2\),3
+** br  %r14
+*/
+void
+extractfourthfloat (float *res, V4SF x)
+{
+  *res = x[3];
+}
+
+/*
+** extractnthfloat:
+** vlgvf   (%r.

[gcc r12-11221] tree-optimization/112859 - bogus loop distribution

2025-06-24 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:6258d3f06740c3a77cd7a91606107451d71df68d

commit r12-11221-g6258d3f06740c3a77cd7a91606107451d71df68d
Author: Richard Biener 
Date:   Thu Jan 23 13:10:17 2025 +0100

tree-optimization/112859 - bogus loop distribution

When we get a zero distance vector we still have to check for the
situation of a common inner loop with zero distance.  But we can
still allow a zero distance for the loop we distribute
(gcc.dg/tree-ssa/ldist-33.c is such a case).  This is because
zero distances in non-outermost loops are a misrepresentation
of dependence by dependence analysis.

Note that test coverage of loop distribution of loop nests is
very low.

PR tree-optimization/112859
PR tree-optimization/115347
* tree-loop-distribution.cc
(loop_distribution::pg_add_dependence_edges): For a zero
distance vector still make sure to not have an inner
loop with zero distance.

* gcc.dg/torture/pr112859.c: New testcase.
* gcc.dg/torture/pr115347.c: Likewise.
* gcc.dg/tree-ssa/ldist-36.c: Adjust.

(cherry picked from commit 04ba1300407f106a6dd10d346f58a51d87e6d43e)

Diff:
---
 gcc/testsuite/gcc.dg/torture/pr112859.c  | 24 
 gcc/testsuite/gcc.dg/torture/pr115347.c  | 21 +
 gcc/testsuite/gcc.dg/tree-ssa/ldist-36.c |  3 ++-
 gcc/tree-loop-distribution.cc| 27 ---
 4 files changed, 63 insertions(+), 12 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/torture/pr112859.c 
b/gcc/testsuite/gcc.dg/torture/pr112859.c
new file mode 100644
index ..18f5bf40cb70
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/pr112859.c
@@ -0,0 +1,24 @@
+/* { dg-do run } */
+/* { dg-additional-options "-ftree-loop-distribution" } */
+
+struct a {
+  char b;
+  int c;
+} f, *i = &f;
+static struct a e[4];
+int *d, **g = &d;
+static int h, j;
+int main()
+{
+  for (; h < 1; h++) {
+struct a k = {1, 1};
+for (j = 0; j < 2; j++) {
+  *i = e[h];
+  e[h] = k;
+}
+*g = 0;
+  }
+  if (f.c != 1)
+__builtin_abort();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/torture/pr115347.c 
b/gcc/testsuite/gcc.dg/torture/pr115347.c
new file mode 100644
index ..2299495144b9
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/pr115347.c
@@ -0,0 +1,21 @@
+/* { dg-do run } */
+/* { dg-additional-options "-ftree-loop-distribution" } */
+
+struct a {
+  int b;
+  int c;
+} d, e[2];
+int f, g, h;
+int main()
+{
+  for (; f < 1; f++) {
+for (h = 0; h < 2; h++) {
+  d = e[f];
+  g = e[1].c;
+  e[f].c = 1;
+}
+  }
+  if (d.c != 1)
+__builtin_abort();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ldist-36.c 
b/gcc/testsuite/gcc.dg/tree-ssa/ldist-36.c
index 07393f0a665a..6d560060e09a 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ldist-36.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ldist-36.c
@@ -25,4 +25,5 @@ foo (struct st * restrict p)
 }
 }
 
-/* { dg-final { scan-tree-dump-times "Loop nest . distributed: split to 0 
loops and 3 library" 1 "ldist" } } */
+/* The cost modeling doesn't consider splitting a WAR re-use profitable.  */
+/* { dg-final { scan-tree-dump-times "Loop nest . distributed: split to 1 
loops and 1 library" 1 "ldist" } } */
diff --git a/gcc/tree-loop-distribution.cc b/gcc/tree-loop-distribution.cc
index 449b9ffd4389..7b4fad238d5d 100644
--- a/gcc/tree-loop-distribution.cc
+++ b/gcc/tree-loop-distribution.cc
@@ -2122,25 +2122,30 @@ loop_distribution::pg_add_dependence_edges (struct 
graph *rdg, int dir,
 gcc.dg/tree-ssa/pr94969.c.  */
  if (DDR_NUM_DIST_VECTS (ddr) != 1)
this_dir = 2;
- /* If the overlap is exact preserve stmt order.  */
- else if (lambda_vector_zerop (DDR_DIST_VECT (ddr, 0),
-   DDR_NB_LOOPS (ddr)))
-   ;
- /* Else as the distance vector is lexicographic positive swap
-the dependence direction.  */
  else
{
- if (DDR_REVERSED_P (ddr))
-   this_dir = -this_dir;
- this_dir = -this_dir;
-
+ /* If the overlap is exact preserve stmt order.  */
+ if (lambda_vector_zerop (DDR_DIST_VECT (ddr, 0),
+  DDR_NB_LOOPS (ddr)))
+   ;
+ /* Else as the distance vector is lexicographic positive swap
+the dependence direction.  */
+ else
+   {
+ if (DDR_REVERSED_P (ddr))
+   this_dir = -this_dir;
+ this_dir = -this_dir;
+   }
  /* When then dependence distance of the innermost common
 loop of the DRs is zero we have a conflict.  */
  au

[gcc r16-1653] libstdc++: Unnecessary type completion in __is_complete_or_unbounded [PR120717]

2025-06-24 Thread Patrick Palka via Gcc-cvs
https://gcc.gnu.org/g:bc8f5424977b74e107543b34af00768cdbb3a3cf

commit r16-1653-gbc8f5424977b74e107543b34af00768cdbb3a3cf
Author: Patrick Palka 
Date:   Tue Jun 24 09:33:25 2025 -0400

libstdc++: Unnecessary type completion in __is_complete_or_unbounded 
[PR120717]

When checking __is_complete_or_unbounded on a reference to incomplete
type, we overeagerly try to instantiate/complete the referenced type
which besides being unnecessary may also produce an unexpected
-Wsfinae-incomplete warning (added in r16-1527) if the referenced type
is later defined.

This patch fixes this by effectively restricting the sizeof check to
object (except unknown-bound array) types.  In passing simplify the
implementation by using is_object instead of is_function/reference/void
and introducing a __maybe_complete_object_type helper.

PR libstdc++/120717

libstdc++-v3/ChangeLog:

* include/std/type_traits (__maybe_complete_object_type): New
helper trait, factored out from ...
(__is_complete_or_unbounded): ... here.  Only check sizeof on a
__maybe_complete_object_type type.  Fix formatting.
* testsuite/20_util/is_complete_or_unbounded/120717.cc: New test.

Reviewed-by: Tomasz Kamiński 
Co-authored-by: Jonathan Wakely 
Reviewed-by: Jonathan Wakely 

Diff:
---
 libstdc++-v3/include/std/type_traits   | 39 --
 .../20_util/is_complete_or_unbounded/120717.cc | 20 +++
 2 files changed, 42 insertions(+), 17 deletions(-)

diff --git a/libstdc++-v3/include/std/type_traits 
b/libstdc++-v3/include/std/type_traits
index abff9f880001..055411195f17 100644
--- a/libstdc++-v3/include/std/type_traits
+++ b/libstdc++-v3/include/std/type_traits
@@ -280,11 +280,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
   // Forward declarations
   template
-struct is_reference;
-  template
-struct is_function;
-  template
-struct is_void;
+struct is_object;
   template
 struct remove_cv;
   template
@@ -294,21 +290,30 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   template
 struct __is_array_unknown_bounds;
 
+  // An object type which is not an unbounded array.
+  // It might still be an incomplete type, but if this is false_type
+  // then we can be certain it's not a complete object type.
+  template
+using __maybe_complete_object_type
+  = __and_, __not_<__is_array_unknown_bounds<_Tp>>>;
+
   // Helper functions that return false_type for incomplete classes,
   // incomplete unions and arrays of known bound from those.
 
-  template 
-constexpr true_type __is_complete_or_unbounded(__type_identity<_Tp>)
-{ return {}; }
-
-  template 
-constexpr typename __or_<
-  is_reference<_NestedType>,
-  is_function<_NestedType>,
-  is_void<_NestedType>,
-  __is_array_unknown_bounds<_NestedType>
->::type __is_complete_or_unbounded(_TypeIdentity)
+  // More specialized overload for complete object types (returning true_type).
+  template::value>,
+  size_t = sizeof(_Tp)>
+constexpr true_type
+__is_complete_or_unbounded(__type_identity<_Tp>)
+{ return {}; };
+
+  // Less specialized overload for reference and unknown-bound array types
+  // (returning true_type), and incomplete types (returning false_type).
+  template
+constexpr typename __not_<__maybe_complete_object_type<_NestedType>>::type
+__is_complete_or_unbounded(_TypeIdentity)
 { return {}; }
 
   // __remove_cv_t (std::remove_cv_t for C++11).
diff --git a/libstdc++-v3/testsuite/20_util/is_complete_or_unbounded/120717.cc 
b/libstdc++-v3/testsuite/20_util/is_complete_or_unbounded/120717.cc
new file mode 100644
index ..4c07683d494e
--- /dev/null
+++ b/libstdc++-v3/testsuite/20_util/is_complete_or_unbounded/120717.cc
@@ -0,0 +1,20 @@
+// PR libstdc++/120717
+// { dg-do compile { target c++11 } }
+// { dg-additional-options "-Wsfinae-incomplete" }
+
+#include 
+
+// Verify __is_complete_or_unbounded doesn't try to instantiate the underlying
+// type of a reference or array of unknown bound.
+template struct A { static_assert(false, "do not instantiate"); };
+static_assert(std::__is_complete_or_unbounded(std::__type_identity&>{}),
 "");
+static_assert(std::__is_complete_or_unbounded(std::__type_identity&&>{}),
 "");
+static_assert(std::__is_complete_or_unbounded(std::__type_identity[]>{}),
 "");
+
+// Verify __is_complete_or_unbounded doesn't produce unexpected
+// -Wsfinae-incomplete warnings.
+struct B;
+static_assert(std::__is_complete_or_unbounded(std::__type_identity{}), "");
+static_assert(std::__is_complete_or_unbounded(std::__type_identity{}), 
"");
+static_assert(std::__is_complete_or_unbounded(std::__type_identity{}), 
"");
+struct B { }; // { dg-bogus "-Wsfinae-incomplete" }


[gcc r12-11226] tree-optimization/117424 - invalid LIM of trapping ref

2025-06-24 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:eafe890ea3904c109b6bce663a81a91d61356cb4

commit r12-11226-geafe890ea3904c109b6bce663a81a91d61356cb4
Author: Richard Biener 
Date:   Tue Jan 28 12:28:14 2025 +0100

tree-optimization/117424 - invalid LIM of trapping ref

The following addresses a bug in tree_could_trap_p leading to
hoisting of a possibly trapping, because of out-of-bound, access.
We only ensured the first accessed byte is within a decl there,
the patch makes sure the whole base of the reference is within it.
This is pessimistic if a handled component would then subset to
a sub-object within the decl but upcasting of a decl to larger
types should be uncommon, questionable, and wrong without
-fno-strict-aliasing.

The testcase is a bit fragile, but I could not devise a (portable)
way to ensure an out-of-bound access to a decl would fault.

PR tree-optimization/117424
* tree-eh.cc (tree_could_trap_p): Verify the base is
fully contained within a decl.

* gcc.dg/tree-ssa/ssa-lim-25.c: New testcase.

(cherry picked from commit f1e776ce58ae4a6ae67886adb4ae806598e2c7ef)

Diff:
---
 gcc/testsuite/gcc.dg/tree-ssa/ssa-lim-25.c | 18 ++
 gcc/tree-eh.cc |  9 +++--
 2 files changed, 25 insertions(+), 2 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-lim-25.c 
b/gcc/testsuite/gcc.dg/tree-ssa/ssa-lim-25.c
new file mode 100644
index ..3e0f013d1e0d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-lim-25.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-lim2-details" } */
+
+char x;
+
+long foo (int n)
+{
+  long y = 0;
+  for (int j = 0; j < 1024; ++j)
+for (int i = 0; i < n; ++i)
+  y += *(long *)&x;
+  return y;
+}
+
+/* Because *(long *)&x may trap we have to preserve execution and
+   only hoist it from the innermost loop (after the header check).  */
+/* { dg-final { scan-tree-dump-not "out of loop 1" "lim2" } } */
+/* { dg-final { scan-tree-dump "out of loop 2" "lim2" } } */
diff --git a/gcc/tree-eh.cc b/gcc/tree-eh.cc
index 85939ca0e895..e3cb99ab67d0 100644
--- a/gcc/tree-eh.cc
+++ b/gcc/tree-eh.cc
@@ -2729,11 +2729,16 @@ tree_could_trap_p (tree expr)
  if (TREE_CODE (base) == STRING_CST)
return maybe_le (TREE_STRING_LENGTH (base), off);
  tree size = DECL_SIZE_UNIT (base);
+ tree refsz = TYPE_SIZE_UNIT (TREE_TYPE (expr));
  if (size == NULL_TREE
+ || refsz == NULL_TREE
  || !poly_int_tree_p (size)
- || maybe_le (wi::to_poly_offset (size), off))
+ || !poly_int_tree_p (refsz)
+ || maybe_le (wi::to_poly_offset (size), off)
+ || maybe_gt (off + wi::to_poly_offset (refsz),
+  wi::to_poly_offset (size)))
return true;
- /* Now we are sure the first byte of the access is inside
+ /* Now we are sure the whole base of the access is inside
 the object.  */
  return false;
}


[gcc r16-1657] c++: Implement C++26 P3618R0 - Allow attaching main to the global module [PR120773]

2025-06-24 Thread Jakub Jelinek via Gcc-cvs
https://gcc.gnu.org/g:ed7fc2b29ead88be30b40ec2c3c51495200b08c4

commit r16-1657-ged7fc2b29ead88be30b40ec2c3c51495200b08c4
Author: Jakub Jelinek 
Date:   Tue Jun 24 19:00:11 2025 +0200

c++: Implement C++26 P3618R0 - Allow attaching main to the global module 
[PR120773]

The following patch implements the P3618R0 paper by tweaking pedwarn
condition, adjusting pedwarn wording, adjusting one testcase and adding 4
new ones.  The paper was voted in as DR, so it isn't guarded on C++ version.

2025-06-24  Jakub Jelinek  

PR c++/120773
* decl.cc (grokfndecl): Implement C++26 P3618R0 - Allow attaching
main to the global module.  Only pedwarn for current_lang_name
other than lang_name_cplusplus and adjust pedwarn wording.

* g++.dg/parse/linkage5.C: Don't expect error on
extern "C++" int main ();.
* g++.dg/parse/linkage7.C: New test.
* g++.dg/parse/linkage8.C: New test.
* g++.dg/modules/main-2.C: New test.
* g++.dg/modules/main-3.C: New test.

Diff:
---
 gcc/cp/decl.cc| 4 ++--
 gcc/testsuite/g++.dg/modules/main-2.C | 4 
 gcc/testsuite/g++.dg/modules/main-3.C | 7 +++
 gcc/testsuite/g++.dg/parse/linkage5.C | 5 +++--
 gcc/testsuite/g++.dg/parse/linkage7.C | 7 +++
 gcc/testsuite/g++.dg/parse/linkage8.C | 5 +
 6 files changed, 28 insertions(+), 4 deletions(-)

diff --git a/gcc/cp/decl.cc b/gcc/cp/decl.cc
index febdc89f89dd..95bccfbb585b 100644
--- a/gcc/cp/decl.cc
+++ b/gcc/cp/decl.cc
@@ -11326,9 +11326,9 @@ grokfndecl (tree ctype,
  "cannot declare %<::main%> to be %qs", "consteval");
   if (!publicp)
error_at (location, "cannot declare %<::main%> to be static");
-  if (current_lang_depth () != 0)
+  if (current_lang_name != lang_name_cplusplus)
pedwarn (location, OPT_Wpedantic, "cannot declare %<::main%> with a"
-" linkage specification");
+" linkage specification other than %");
   if (module_attach_p ())
error_at (location, "cannot attach %<::main%> to a named module");
   inlinep = 0;
diff --git a/gcc/testsuite/g++.dg/modules/main-2.C 
b/gcc/testsuite/g++.dg/modules/main-2.C
new file mode 100644
index ..8d17381c7fa3
--- /dev/null
+++ b/gcc/testsuite/g++.dg/modules/main-2.C
@@ -0,0 +1,4 @@
+// { dg-additional-options "-fmodules" }
+
+export module M;
+extern "C++" int main() {}
diff --git a/gcc/testsuite/g++.dg/modules/main-3.C 
b/gcc/testsuite/g++.dg/modules/main-3.C
new file mode 100644
index ..10a29360232d
--- /dev/null
+++ b/gcc/testsuite/g++.dg/modules/main-3.C
@@ -0,0 +1,7 @@
+// { dg-additional-options "-fmodules" }
+
+export module M;
+extern "C++" {
+  int main() {}
+}
+
diff --git a/gcc/testsuite/g++.dg/parse/linkage5.C 
b/gcc/testsuite/g++.dg/parse/linkage5.C
index 451406de69b2..1bd4736906c3 100644
--- a/gcc/testsuite/g++.dg/parse/linkage5.C
+++ b/gcc/testsuite/g++.dg/parse/linkage5.C
@@ -1,5 +1,6 @@
 // { dg-do compile }
-// The main function shall not be declared with a linkage-specification.
+// The main function shall not be declared with a linkage-specification
+// other than "C++".
 
 extern "C" {
   int main();  // { dg-error "linkage" }
@@ -9,6 +10,6 @@ namespace foo {
   extern "C" int main();  // { dg-error "linkage" }
 }
 
-extern "C++" int main(); // { dg-error "linkage" }
+extern "C++" int main();
 
 extern "C" struct S { int main(); };  // OK
diff --git a/gcc/testsuite/g++.dg/parse/linkage7.C 
b/gcc/testsuite/g++.dg/parse/linkage7.C
new file mode 100644
index ..91caf265305b
--- /dev/null
+++ b/gcc/testsuite/g++.dg/parse/linkage7.C
@@ -0,0 +1,7 @@
+// { dg-do compile }
+// The main function shall not be declared with a linkage-specification
+// other than "C++".
+
+extern "C++" {
+  int main();
+}
diff --git a/gcc/testsuite/g++.dg/parse/linkage8.C 
b/gcc/testsuite/g++.dg/parse/linkage8.C
new file mode 100644
index ..b757ed55b559
--- /dev/null
+++ b/gcc/testsuite/g++.dg/parse/linkage8.C
@@ -0,0 +1,5 @@
+// { dg-do compile }
+// The main function shall not be declared with a linkage-specification
+// other than "C++".
+
+extern "C" int main(); // { dg-error "linkage" }


[gcc(refs/users/meissner/heads/work212-dmf)] Add ChangeLog.dmf and update REVISION.

2025-06-24 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:d59cb20302af4866bd5dd0ea3cd60a8e3445f6fc

commit d59cb20302af4866bd5dd0ea3cd60a8e3445f6fc
Author: Michael Meissner 
Date:   Tue Jun 24 12:05:41 2025 -0400

Add ChangeLog.dmf and update REVISION.

2025-06-24  Michael Meissner  

gcc/

* ChangeLog.dmf: New file for branch.
* REVISION: Update.

Diff:
---
 gcc/ChangeLog.dmf | 14 ++
 gcc/REVISION  |  2 +-
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog.dmf b/gcc/ChangeLog.dmf
new file mode 100644
index ..6ad2c29a0e45
--- /dev/null
+++ b/gcc/ChangeLog.dmf
@@ -0,0 +1,14 @@
+ Branch work212-dmf, baseline 
+
+2025-06-24   Michael Meissner  
+
+Add ChangeLog.dmf and update REVISION.
+
+2025-06-24  Michael Meissner  
+
+gcc/
+
+   * ChangeLog.dmf: New file for branch.
+   * REVISION: Update.
+
+   Clone branch
diff --git a/gcc/REVISION b/gcc/REVISION
index cb5771ab4fa7..45dac7096f85 100644
--- a/gcc/REVISION
+++ b/gcc/REVISION
@@ -1 +1 @@
-work212 branch
+work212-dmf branch


[gcc r16-1664] Add -fauto-profile-inlining

2025-06-24 Thread Jan Hubicka via Gcc-cvs
https://gcc.gnu.org/g:aaf55e09b3d97164615e783d98cfa842f382559d

commit r16-1664-gaaf55e09b3d97164615e783d98cfa842f382559d
Author: Jan Hubicka 
Date:   Wed Jun 25 03:01:29 2025 +0200

Add -fauto-profile-inlining

this patch adds -fauto-profile-inlining which can be used to control
the auto-profile directed inlning.

gcc/ChangeLog:

* common.opt: (fauto-profile-inlining): New
* doc/invoke.texi (-fauto-profile-inlining): Document.
* ipa-inline.cc (inline_functions_by_afdo): Check
flag_auto_profile.
(early_inliner): Also do inline_functions_by_afdo with
!flag_early_inlining.

Diff:
---
 gcc/common.opt  |  4 
 gcc/doc/invoke.texi |  8 +++-
 gcc/ipa-inline.cc   | 21 -
 3 files changed, 31 insertions(+), 2 deletions(-)

diff --git a/gcc/common.opt b/gcc/common.opt
index 0e50305dde8e..a76a6920b54c 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -1187,6 +1187,10 @@ Common Joined RejectNegative Var(auto_profile_file)
 Use sample profile information for call graph node weights. The profile
 file is specified in the argument.
 
+fauto-profile-inlining
+Common Var(flag_auto_profile_inlining) Init(1) Optimization
+Perform inlining using auto-profile.
+
 ; -fcheck-bounds causes gcc to generate array bounds checks.
 ; For C, C++ and ObjC: defaults off.
 ; For Java: defaults to on.
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index a0c6d3d082e6..95790f7bd171 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -573,7 +573,7 @@ Objective-C and Objective-C++ Dialects}.
 -fmin-function-alignment=[@var{n}]
 -fno-allocation-dce -fallow-store-data-races
 -fassociative-math  -fauto-profile  -fauto-profile[=@var{path}]
--fauto-inc-dec  -fbranch-probabilities
+-fauto-profile-inlining -fauto-inc-dec  -fbranch-probabilities
 -fcaller-saves
 -fcombine-stack-adjustments  -fconserve-stack
 -ffold-mem-offsets
@@ -15502,6 +15502,12 @@ E.g.
 create_gcov --binary=your_program.unstripped --profile=perf.data \
 --gcov=profile.afdo
 @end smallexample
+
+@opindex fauto-profile-inlining
+@item -fauto-profile-inlining
+When auto-profile is available inline all relevant functions which was
+inlined in the tran run before reading the profile feedback.  This improves
+context sensitivity of the profile.  Enabled by default.
 @end table
 
 The following options control compiler behavior regarding floating-point
diff --git a/gcc/ipa-inline.cc b/gcc/ipa-inline.cc
index a960d55b661d..ca605b027dcf 100644
--- a/gcc/ipa-inline.cc
+++ b/gcc/ipa-inline.cc
@@ -3120,7 +3120,7 @@ early_inline_small_functions (struct cgraph_node *node)
 static bool
 inline_functions_by_afdo (struct cgraph_node *node, bool *speculative_calls)
 {
-  if (!flag_auto_profile)
+  if (!flag_auto_profile || !flag_auto_profile_inlining)
 return false;
   struct cgraph_edge *e;
   bool inlined = false;
@@ -3320,6 +3320,25 @@ early_inliner (function *fun)
fprintf (dump_file, "Iterations: %i\n", iterations);
 }
 
+  /* do AFDO inlining in case it was not done as part of early inlining.  */
+  if (optimize
+  && !flag_no_inline
+  && !flag_early_inlining
+  && flag_auto_profile_inlining)
+{
+  bool speculative_calls = false;
+  inlined |= inline_functions_by_afdo (node, &speculative_calls);
+  if (speculative_calls)
+   {
+ cgraph_edge *next;
+ for (cgraph_edge *e = node->callees; e; e = next)
+   {
+ next = e->next_callee;
+ cgraph_edge::redirect_call_stmt_to_callee (e);
+   }
+   }
+}
+
   if (inlined)
 {
   timevar_push (TV_INTEGRATION);


[gcc r14-11861] i386: Remove CLDEMOTE for clients

2025-06-24 Thread Haochen Jiang via Gcc-cvs
https://gcc.gnu.org/g:11b03928bab9a52e4ec43a3d5a0ab85e5a8ee67a

commit r14-11861-g11b03928bab9a52e4ec43a3d5a0ab85e5a8ee67a
Author: Haochen Jiang 
Date:   Tue Jun 17 14:08:38 2025 +0800

i386: Remove CLDEMOTE for clients

CLDEMOTE is not enabled on clients according to SDM. SDM only mentioned
it will be enabled on Xeon and Atom servers, not clients. Remove them
since Alder Lake (where it is introduced).

gcc/ChangeLog:

* config/i386/i386.h (PTA_ALDERLAKE): Use PTA_GOLDMONT_PLUS
as base to remove PTA_CLDEMOTE.
(PTA_SIERRAFOREST): Add PTA_CLDEMOTE since PTA_ALDERLAKE
does not include that anymore.
* doc/invoke.texi: Update texi file.

Diff:
---
 gcc/config/i386/i386.h |  8 +---
 gcc/doc/invoke.texi| 29 ++---
 2 files changed, 19 insertions(+), 18 deletions(-)

diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 2fc82b175e6d..6a833fd8dbd2 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -2415,12 +2415,14 @@ constexpr wide_int_bitmask PTA_GOLDMONT_PLUS = 
PTA_GOLDMONT | PTA_RDPID
   | PTA_SGX | PTA_PTWRITE;
 constexpr wide_int_bitmask PTA_TREMONT = PTA_GOLDMONT_PLUS | PTA_CLWB
   | PTA_GFNI | PTA_MOVDIRI | PTA_MOVDIR64B | PTA_CLDEMOTE | PTA_WAITPKG;
-constexpr wide_int_bitmask PTA_ALDERLAKE = PTA_TREMONT | PTA_ADX | PTA_AVX
+constexpr wide_int_bitmask PTA_ALDERLAKE = PTA_GOLDMONT_PLUS | PTA_CLWB
+  | PTA_GFNI | PTA_MOVDIRI | PTA_MOVDIR64B | PTA_WAITPKG | PTA_ADX | PTA_AVX
   | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_F16C | PTA_FMA | PTA_LZCNT
   | PTA_PCONFIG | PTA_PKU | PTA_VAES | PTA_VPCLMULQDQ | PTA_SERIALIZE
   | PTA_HRESET | PTA_KL | PTA_WIDEKL | PTA_AVXVNNI;
-constexpr wide_int_bitmask PTA_SIERRAFOREST = PTA_ALDERLAKE | PTA_AVXIFMA
-  | PTA_AVXVNNIINT8 | PTA_AVXNECONVERT | PTA_CMPCCXADD | PTA_ENQCMD | 
PTA_UINTR;
+constexpr wide_int_bitmask PTA_SIERRAFOREST = PTA_ALDERLAKE | PTA_CLDEMOTE
+  | PTA_AVXIFMA | PTA_AVXVNNIINT8 | PTA_AVXNECONVERT | PTA_CMPCCXADD
+  | PTA_ENQCMD | PTA_UINTR;
 constexpr wide_int_bitmask PTA_GRANITERAPIDS = PTA_SAPPHIRERAPIDS | 
PTA_AMX_FP16
   | PTA_PREFETCHI;
 constexpr wide_int_bitmask PTA_GRANITERAPIDS_D = PTA_GRANITERAPIDS
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 64728fead512..d8ff23447f45 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -34514,37 +34514,36 @@ VPCLMULQDQ, AVX512BITALG, RDPID and AVX512VPOPCNTDQ 
instruction set support.
 Intel Alder Lake/Raptor Lake/Meteor Lake/Gracemont CPU with 64-bit extensions,
 MOVBE, MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, POPCNT, AES, PREFETCHW,
 PCLMUL, RDRND, XSAVE, XSAVEC, XSAVES, XSAVEOPT, FSGSBASE, PTWRITE, RDPID, SGX,
-GFNI-SSE, CLWB, MOVDIRI, MOVDIR64B, CLDEMOTE, WAITPKG, ADCX, AVX, AVX2, BMI,
-BMI2, F16C, FMA, LZCNT, PCONFIG, PKU, VAES, VPCLMULQDQ, SERIALIZE, HRESET, KL,
-WIDEKL and AVX-VNNI instruction set support.
+GFNI-SSE, CLWB, MOVDIRI, MOVDIR64B, WAITPKG, ADCX, AVX, AVX2, BMI, BMI2, F16C,
+FMA, LZCNT, PCONFIG, PKU, VAES, VPCLMULQDQ, SERIALIZE, HRESET, KL, WIDEKL and
+AVX-VNNI instruction set support.
 
 @item arrowlake
 Intel Arrow Lake CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3,
 SSSE3, SSE4.1, SSE4.2, POPCNT, AES, PREFETCHW, PCLMUL, RDRND, XSAVE, XSAVEC,
 XSAVES, XSAVEOPT, FSGSBASE, PTWRITE, RDPID, SGX, GFNI-SSE, CLWB, MOVDIRI,
-MOVDIR64B, CLDEMOTE, WAITPKG, ADCX, AVX, AVX2, BMI, BMI2, F16C, FMA, LZCNT,
-PCONFIG, PKU, VAES, VPCLMULQDQ, SERIALIZE, HRESET, KL, WIDEKL, AVX-VNNI,
-UINTR, AVXIFMA, AVXVNNIINT8, AVXNECONVERT and CMPCCXADD instruction set
-support.
+MOVDIR64B, WAITPKG, ADCX, AVX, AVX2, BMI, BMI2, F16C, FMA, LZCNT, PCONFIG, PKU,
+VAES, VPCLMULQDQ, SERIALIZE, HRESET, KL, WIDEKL, AVX-VNNI, UINTR, AVXIFMA,
+AVXVNNIINT8, AVXNECONVERT and CMPCCXADD instruction set support.
 
 @item arrowlake-s
 @itemx lunarlake
 Intel Arrow Lake S/Lunar Lake CPU with 64-bit extensions, MOVBE, MMX, SSE,
 SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, POPCNT, AES, PREFETCHW, PCLMUL, RDRND,
 XSAVE, XSAVEC, XSAVES, XSAVEOPT, FSGSBASE, PTWRITE, RDPID, SGX, GFNI-SSE, CLWB,
-MOVDIRI, MOVDIR64B, CLDEMOTE, WAITPKG, ADCX, AVX, AVX2, BMI, BMI2, F16C, FMA,
-LZCNT, PCONFIG, PKU, VAES, VPCLMULQDQ, SERIALIZE, HRESET, KL, WIDEKL, AVX-VNNI,
-UINTR, AVXIFMA, AVXVNNIINT8, AVXNECONVERT, CMPCCXADD, AVXVNNIINT16, SHA512,
-SM3 and SM4 instruction set support.
+MOVDIRI, MOVDIR64B, WAITPKG, ADCX, AVX, AVX2, BMI, BMI2, F16C, FMA, LZCNT,
+PCONFIG, PKU, VAES, VPCLMULQDQ, SERIALIZE, HRESET, KL, WIDEKL, AVX-VNNI, UINTR,
+AVXIFMA, AVXVNNIINT8, AVXNECONVERT, CMPCCXADD, AVXVNNIINT16, SHA512, SM3 and
+SM4 instruction set support.
 
 @item pantherlake
 Intel Panther Lake CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3,
 SSSE3, SSE4.1, SSE4.2, POPCNT, AES, PREFETCHW, PCLMUL, RDRND, XSAVE, XSAVEC,
 XSAVES, XSAVEOPT, FSGSBASE, PTWRITE, RDPID, SGX, GFNI-SSE, CLWB, MOVDIRI,
-MOVDIR64B, CLDEMOTE, WAITPKG, ADCX, AVX, AVX2, BMI, BMI2, F16C, FMA, LZCNT,
-PCONFIG, PKU, VAES, VPCL

[gcc r13-9771] i386: Remove CLDEMOTE for clients

2025-06-24 Thread Haochen Jiang via Gcc-cvs
https://gcc.gnu.org/g:b5bdebeca28e19422bbd4e48fa6fc50371520cfe

commit r13-9771-gb5bdebeca28e19422bbd4e48fa6fc50371520cfe
Author: Haochen Jiang 
Date:   Tue Jun 17 14:08:38 2025 +0800

i386: Remove CLDEMOTE for clients

CLDEMOTE is not enabled on clients according to SDM. SDM only mentioned
it will be enabled on Xeon and Atom servers, not clients. Remove them
since Alder Lake (where it is introduced).

gcc/ChangeLog:

* config/i386/i386.h (PTA_ALDERLAKE): Use PTA_GOLDMONT_PLUS
as base to remove PTA_CLDEMOTE.
(PTA_SIERRAFOREST): Add PTA_CLDEMOTE since PTA_ALDERLAKE
does not include that anymore.
* doc/invoke.texi: Update texi file.

Diff:
---
 gcc/config/i386/i386.h |  8 +---
 gcc/doc/invoke.texi| 10 +-
 2 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index c147ff8732c9..d147d373e8c9 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -2362,12 +2362,14 @@ constexpr wide_int_bitmask PTA_GOLDMONT_PLUS = 
PTA_GOLDMONT | PTA_RDPID
   | PTA_SGX | PTA_PTWRITE;
 constexpr wide_int_bitmask PTA_TREMONT = PTA_GOLDMONT_PLUS | PTA_CLWB
   | PTA_GFNI | PTA_MOVDIRI | PTA_MOVDIR64B | PTA_CLDEMOTE | PTA_WAITPKG;
-constexpr wide_int_bitmask PTA_ALDERLAKE = PTA_TREMONT | PTA_ADX | PTA_AVX
+constexpr wide_int_bitmask PTA_ALDERLAKE = PTA_GOLDMONT_PLUS | PTA_CLWB
+  | PTA_GFNI | PTA_MOVDIRI | PTA_MOVDIR64B | PTA_WAITPKG | PTA_ADX | PTA_AVX
   | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_F16C | PTA_FMA | PTA_LZCNT
   | PTA_PCONFIG | PTA_PKU | PTA_VAES | PTA_VPCLMULQDQ | PTA_SERIALIZE
   | PTA_HRESET | PTA_KL | PTA_WIDEKL | PTA_AVXVNNI;
-constexpr wide_int_bitmask PTA_SIERRAFOREST = PTA_ALDERLAKE | PTA_AVXIFMA
-  | PTA_AVXVNNIINT8 | PTA_AVXNECONVERT | PTA_CMPCCXADD | PTA_ENQCMD | 
PTA_UINTR;
+constexpr wide_int_bitmask PTA_SIERRAFOREST = PTA_ALDERLAKE | PTA_CLDEMOTE
+  | PTA_AVXIFMA | PTA_AVXVNNIINT8 | PTA_AVXNECONVERT | PTA_CMPCCXADD
+  | PTA_ENQCMD | PTA_UINTR;
 constexpr wide_int_bitmask PTA_GRANITERAPIDS = PTA_SAPPHIRERAPIDS | 
PTA_AMX_FP16
   | PTA_PREFETCHI;
 constexpr wide_int_bitmask PTA_GRANITERAPIDS_D = PTA_GRANITERAPIDS
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index b80966e13539..00d2e4950a38 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -32565,11 +32565,11 @@ VPCLMULQDQ, AVX512BITALG, RDPID and AVX512VPOPCNTDQ 
instruction set support.
 @item alderlake
 @itemx raptorlake
 @itemx meteorlake
-Intel Alder Lake/Raptor Lake/Meteor Lake CPU with 64-bit extensions, MOVBE, 
MMX,
-SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, POPCNT, AES, PREFETCHW, PCLMUL, RDRND,
-XSAVE, XSAVEC, XSAVES, XSAVEOPT, FSGSBASE, PTWRITE, RDPID, SGX, GFNI-SSE, CLWB,
-MOVDIRI, MOVDIR64B, CLDEMOTE, WAITPKG, ADCX, AVX, AVX2, BMI, BMI2, F16C, FMA,
-LZCNT, PCONFIG, PKU, VAES, VPCLMULQDQ, SERIALIZE, HRESET, KL, WIDEKL and
+Intel Alder Lake/Raptor Lake/Meteor Lake CPU with 64-bit extensions, MOVBE,
+MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, POPCNT, AES, PREFETCHW, PCLMUL,
+RDRND, XSAVE, XSAVEC, XSAVES, XSAVEOPT, FSGSBASE, PTWRITE, RDPID, SGX,
+GFNI-SSE, CLWB, MOVDIRI, MOVDIR64B, WAITPKG, ADCX, AVX, AVX2, BMI, BMI2, F16C,
+FMA, LZCNT, PCONFIG, PKU, VAES, VPCLMULQDQ, SERIALIZE, HRESET, KL, WIDEKL and
 AVX-VNNI instruction set support.
 
 @item sapphirerapids


[gcc(refs/users/meissner/heads/work212)] Update ChangeLog.*

2025-06-24 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:ad8517009d27942cfcb9045b250a940c84d08752

commit ad8517009d27942cfcb9045b250a940c84d08752
Author: Michael Meissner 
Date:   Tue Jun 24 22:14:58 2025 -0400

Update ChangeLog.*

Diff:
---
 gcc/ChangeLog.meissner | 77 ++
 1 file changed, 77 insertions(+)

diff --git a/gcc/ChangeLog.meissner b/gcc/ChangeLog.meissner
index 06910da6ae46..1e1f67fc1301 100644
--- a/gcc/ChangeLog.meissner
+++ b/gcc/ChangeLog.meissner
@@ -1,3 +1,80 @@
+ Branch work212, patch #3 
+
+Add -mcpu=future tests.
+
+This is patch #3 of 4 to add -mcpu=future support to the PowerPC.
+
+This patch adds simple tests for -mcpu=future.
+
+I have tested these patches on both big endian and little endian PowerPC
+servers, with no regressions.  Can I check these patchs into the trunk?
+
+2025-06-13  Michael Meissner  
+
+gcc/testsuite/
+
+   * gcc.target/powerpc/future-1.c: New test.
+   * gcc.target/powerpc/future-2.c: Likewise.
+
+ Branch work212, patch #2 
+
+Add -mcpu=future tuning support.
+
+This is patch #2 of 4 to add -mcpu=future support to the PowerPC.
+
+This patch makes -mtune=future use the same tuning decision as -mtune=power10 
or
+-mtune=power11.
+
+I have tested these patches on both big endian and little endian PowerPC
+servers, with no regressions.  Can I check these patchs into the trunk?
+
+2025-06-13  Michael Meissner  
+
+gcc/
+
+   * config/rs6000/power10.md (all reservations): Add future as an
+   alterntive to power10 and power11.
+
+ Branch work212, patch #1 
+
+Add support for -mcpu=future
+
+This is patch #1 of 4 that adds the support that can be used in developing GCC
+support for future PowerPC processors.
+
+I have tested these patches on both big endian and little endian PowerPC
+servers, with no regressions.  Can I check these patchs into the trunk?
+
+2025-06-13  Michael Meissner  
+
+   * config.gcc (powerpc*-*-*): Add support for --with-cpu=future.
+   * config/rs6000/aix71.h (ASM_CPU_SPEC): Add support for -mcpu=future.
+   * config/rs6000/aix72.h (ASM_CPU_SPEC): Likewise.
+   * config/rs6000/aix73.h (ASM_CPU_SPEC): Likewise.
+   * config/rs6000/driver-rs6000.cc (asm_names): Likewise.
+   * config/rs6000/rs6000-c.cc (rs6000_target_modify_macros): If
+   -mcpu=future, define _ARCH_FUTURE.
+   * config/rs6000/rs6000-cpus.def (FUTURE_MASKS_SERVER): New macro.
+   (POWERPC_MASKS): Add OPTION_MASK_FUTURE.
+   (future cpu): Define.
+   * config/rs6000/rs6000-opts.h (enum processor_type): Add
+   PROCESSOR_FUTURE.
+   * config/rs6000/rs6000-tables.opt: Regenerate.
+   * config/rs6000/rs6000.cc (power10_cost): Update comment.
+   (get_arch_flags): Add support for future processor.
+   (rs6000_option_override_internal): Likewise.
+   (rs6000_machine_from_flags): Likewise.
+   (rs6000_reassociation_width): Likewise.
+   (rs6000_adjust_cost): Likewise.
+   (rs6000_issue_rate): Likewise.
+   (rs6000_sched_reorder): Likewise.
+   (rs6000_sched_reorder2): Likewise.
+   (rs6000_register_move_cost): Likewise.
+   (rs6000_opt_masks): Add -mfuture.
+   * config/rs6000/rs6000.h (ASM_CPU_SPEC): Likewise.
+   * config/rs6000/rs6000.md (cpu attribute): Likewise.
+   * config/rs6000/rs6000.opt (-mfuture): New internal option.
+
  Branch work212, baseline 
 
 2025-06-24   Michael Meissner  


[gcc(refs/users/meissner/heads/work212)] Add ChangeLog.meissner and REVISION.

2025-06-24 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:09be5ec3d304a9f9c7baaf9f719d854fd679678c

commit 09be5ec3d304a9f9c7baaf9f719d854fd679678c
Author: Michael Meissner 
Date:   Tue Jun 24 12:00:53 2025 -0400

Add ChangeLog.meissner and REVISION.

2025-06-24  Michael Meissner  

gcc/

* REVISION: New file for branch.
* ChangeLog.meissner: New file.

gcc/c-family/

* ChangeLog.meissner: New file.

gcc/c/

* ChangeLog.meissner: New file.

gcc/cp/

* ChangeLog.meissner: New file.

gcc/fortran/

* ChangeLog.meissner: New file.

gcc/testsuite/

* ChangeLog.meissner: New file.

libgcc/

* ChangeLog.meissner: New file.

Diff:
---
 gcc/ChangeLog.meissner   | 38 ++
 gcc/REVISION |  1 +
 gcc/c-family/ChangeLog.meissner  | 38 ++
 gcc/c/ChangeLog.meissner | 38 ++
 gcc/cp/ChangeLog.meissner| 38 ++
 gcc/fortran/ChangeLog.meissner   | 38 ++
 gcc/testsuite/ChangeLog.meissner | 38 ++
 libgcc/ChangeLog.meissner| 38 ++
 libstdc++-v3/ChangeLog.meissner  | 38 ++
 9 files changed, 305 insertions(+)

diff --git a/gcc/ChangeLog.meissner b/gcc/ChangeLog.meissner
new file mode 100644
index ..06910da6ae46
--- /dev/null
+++ b/gcc/ChangeLog.meissner
@@ -0,0 +1,38 @@
+ Branch work212, baseline 
+
+2025-06-24   Michael Meissner  
+
+Add ChangeLog.meissner and REVISION.
+
+2025-06-24  Michael Meissner  
+
+gcc/
+
+   * REVISION: New file for branch.
+   * ChangeLog.meissner: New file.
+
+gcc/c-family/
+
+   * ChangeLog.meissner: New file.
+
+gcc/c/
+
+   * ChangeLog.meissner: New file.
+
+gcc/cp/
+
+   * ChangeLog.meissner: New file.
+
+gcc/fortran/
+
+   * ChangeLog.meissner: New file.
+
+gcc/testsuite/
+
+   * ChangeLog.meissner: New file.
+
+libgcc/
+
+   * ChangeLog.meissner: New file.
+
+   Clone branch
diff --git a/gcc/REVISION b/gcc/REVISION
new file mode 100644
index ..cb5771ab4fa7
--- /dev/null
+++ b/gcc/REVISION
@@ -0,0 +1 @@
+work212 branch
diff --git a/gcc/c-family/ChangeLog.meissner b/gcc/c-family/ChangeLog.meissner
new file mode 100644
index ..06910da6ae46
--- /dev/null
+++ b/gcc/c-family/ChangeLog.meissner
@@ -0,0 +1,38 @@
+ Branch work212, baseline 
+
+2025-06-24   Michael Meissner  
+
+Add ChangeLog.meissner and REVISION.
+
+2025-06-24  Michael Meissner  
+
+gcc/
+
+   * REVISION: New file for branch.
+   * ChangeLog.meissner: New file.
+
+gcc/c-family/
+
+   * ChangeLog.meissner: New file.
+
+gcc/c/
+
+   * ChangeLog.meissner: New file.
+
+gcc/cp/
+
+   * ChangeLog.meissner: New file.
+
+gcc/fortran/
+
+   * ChangeLog.meissner: New file.
+
+gcc/testsuite/
+
+   * ChangeLog.meissner: New file.
+
+libgcc/
+
+   * ChangeLog.meissner: New file.
+
+   Clone branch
diff --git a/gcc/c/ChangeLog.meissner b/gcc/c/ChangeLog.meissner
new file mode 100644
index ..06910da6ae46
--- /dev/null
+++ b/gcc/c/ChangeLog.meissner
@@ -0,0 +1,38 @@
+ Branch work212, baseline 
+
+2025-06-24   Michael Meissner  
+
+Add ChangeLog.meissner and REVISION.
+
+2025-06-24  Michael Meissner  
+
+gcc/
+
+   * REVISION: New file for branch.
+   * ChangeLog.meissner: New file.
+
+gcc/c-family/
+
+   * ChangeLog.meissner: New file.
+
+gcc/c/
+
+   * ChangeLog.meissner: New file.
+
+gcc/cp/
+
+   * ChangeLog.meissner: New file.
+
+gcc/fortran/
+
+   * ChangeLog.meissner: New file.
+
+gcc/testsuite/
+
+   * ChangeLog.meissner: New file.
+
+libgcc/
+
+   * ChangeLog.meissner: New file.
+
+   Clone branch
diff --git a/gcc/cp/ChangeLog.meissner b/gcc/cp/ChangeLog.meissner
new file mode 100644
index ..06910da6ae46
--- /dev/null
+++ b/gcc/cp/ChangeLog.meissner
@@ -0,0 +1,38 @@
+ Branch work212, baseline 
+
+2025-06-24   Michael Meissner  
+
+Add ChangeLog.meissner and REVISION.
+
+2025-06-24  Michael Meissner  
+
+gcc/
+
+   * REVISION: New file for branch.
+   * ChangeLog.meissner: New file.
+
+gcc/c-family/
+
+   * ChangeLog.meissner: New file.
+
+gcc/c/
+
+   * ChangeLog.meissner: New file.
+
+gcc/cp/
+
+   * ChangeLog.meissner: New file.
+
+gcc/fortran/
+
+   * ChangeLog.meissner: New file.
+
+gcc/testsuite/
+
+   * ChangeLog.meissner: New file.
+
+libgcc/
+
+   * ChangeLog.meissner: New file.
+
+   Clone branch
diff --git a/gcc/fortran/ChangeLog.meissner b/gcc/fortran/ChangeLog.meissner
new file mode 100644
inde

[gcc r16-1665] RISC-V: Add Profiles RVA/B23S64 support.

2025-06-24 Thread Jiawei Chen via Gcc-cvs
https://gcc.gnu.org/g:e858dc702147b7de560afad165e7f16e3ee7d6c9

commit r16-1665-ge858dc702147b7de560afad165e7f16e3ee7d6c9
Author: Jiawei 
Date:   Tue Jun 24 17:34:05 2025 +0800

RISC-V: Add Profiles RVA/B23S64 support.

This patch adds support for the RISC-V Profiles RVA23S64 and RVB23S64.

gcc/ChangeLog:

* common/config/riscv/riscv-common.cc: New Profiles.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/arch-rva23s.c: New test.
* gcc.target/riscv/arch-rvb23s.c: New test.

Diff:
---
 gcc/common/config/riscv/riscv-common.cc  | 18 +-
 gcc/testsuite/gcc.target/riscv/arch-rva23s.c | 14 ++
 gcc/testsuite/gcc.target/riscv/arch-rvb23s.c | 12 
 3 files changed, 43 insertions(+), 1 deletion(-)

diff --git a/gcc/common/config/riscv/riscv-common.cc 
b/gcc/common/config/riscv/riscv-common.cc
index 3c25848ccd38..82037a334528 100644
--- a/gcc/common/config/riscv/riscv-common.cc
+++ b/gcc/common/config/riscv/riscv-common.cc
@@ -295,6 +295,15 @@ static const riscv_profiles riscv_profiles_table[] =
"_zicboz_zfhmin_zkt_zvfhmin_zvbb_zvkt_zihintntl_zicond_zimop_zcmop_zcb"
"_zfa_zawrs_supm"},
 
+  /* RVA23S contains all mandatory base ISA for RVA23U64 and the privileged
+ extensions as mandatory extensions.  */
+  {"rva23s64", "rv64imafdcbv_zicsr_zicntr_zihpm_ziccif_ziccrse_ziccamoa"
+   "_zicclsm_zic64b_za64rs_zihintpause_zba_zbb_zbs_zicbom_zicbop"
+   "_zicboz_zfhmin_zkt_zvfhmin_zvbb_zvkt_zihintntl_zicond_zimop_zcmop_zcb"
+   "_zfa_zawrs_svbare_svade_ssccptr_sstvecd_sstvala_sscounterenw_svpbmt"
+   "_svinval_svnapot_sstc_sscofpmf_ssnpm_ssu64xl_sha_supm"
+  },
+
   /* RVB23 contains all mandatory base ISA for RVA22U64 and the new extension
  'zihintntl,zicond,zimop,zcmop,zfa,zawrs' as mandatory
  extensions.  */
@@ -303,7 +312,14 @@ static const riscv_profiles riscv_profiles_table[] =
"_zicboz_zfhmin_zkt_zihintntl_zicond_zimop_zcmop_zcb"
"_zfa_zawrs"},
 
-  /* Currently we do not define S/M mode Profiles in gcc part.  */
+  /* RVB23S contains all mandatory base ISA for RVB23U64 and the privileged
+ extensions as mandatory extensions.  */
+  {"rvb23s64", "rv64imafdcb_zicsr_zicntr_zihpm_ziccif_ziccrse_ziccamoa"
+   "_zicclsm_zic64b_za64rs_zihintpause_zba_zbb_zbs_zicbom_zicbop"
+   "_zicboz_zfhmin_zkt_zvfhmin_zvbb_zvkt_zihintntl_zicond_zimop_zcmop_zcb"
+   "_zfa_zawrs_svbare_svade_ssccptr_sstvecd_sstvala_sscounterenw_svpbmt"
+   "_svinval_svnapot_sstc_sscofpmf_ssu64xl_supm"
+  },
 
   /* Terminate the list.  */
   {NULL, NULL}
diff --git a/gcc/testsuite/gcc.target/riscv/arch-rva23s.c 
b/gcc/testsuite/gcc.target/riscv/arch-rva23s.c
new file mode 100644
index ..215249d52b14
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/arch-rva23s.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rva23s64 -mabi=lp64d" } */
+
+void foo(){}
+
+/* { dg-final { scan-assembler-times ".attribute arch, 
\"rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0"
+"_b1p0_v1p0_zic64b1p0_zicbom1p0_zicbop1p0_zicboz1p0_ziccamoa1p0_ziccif1p0_zicclsm1p0"
+"_ziccrse1p0_zicntr2p0_zicond1p0_zicsr2p0_zihintntl1p0_zihintpause2p0_zihpm2p0_zimop1p0"
+"_za64rs1p0_zaamo1p0_zalrsc1p0_zawrs1p0_zfa1p0_zfhmin1p0_zca1p0_zcb1p0_zcd1p0_zcmop1p0"
+"_zba1p0_zbb1p0_zbs1p0_zkt1p0_zvbb1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0"
+"_zvfhmin1p0_zvkb1p0_zvkt1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0_sha1p0_shcounterenw1p0"
+"_shgatpa1p0_shtvala1p0_shvsatpa1p0_shvstvala1p0_shvstvecd1p0_ssccptr1p0_sscofpmf1p0"
+"_sscounterenw1p0_ssnpm1p0_ssstateen1p0_sstc1p0_sstvala1p0_sstvecd1p0_ssu64xl1p0_supm1p0"
+"_svade1p0_svbare1p0_svinval1p0_svnapot1p0_svpbmt1p0\" 1} } */
diff --git a/gcc/testsuite/gcc.target/riscv/arch-rvb23s.c 
b/gcc/testsuite/gcc.target/riscv/arch-rvb23s.c
new file mode 100644
index ..aa71f7dad7d7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/arch-rvb23s.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rvb23s64 -mabi=lp64d" } */
+
+void foo(){}
+
+/* { dg-final { scan-assembler-times ".attribute arch, 
\"rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0"
+"_b1p0_zic64b1p0_zicbom1p0_zicbop1p0_zicboz1p0_ziccamoa1p0_ziccif1p0_zicclsm1p0_ziccrse1p0"
+"_zicntr2p0_zicond1p0_zicsr2p0_zihintntl1p0_zihintpause2p0_zihpm2p0_zimop1p0_zmmul1p0"
+"_za64rs1p0_zaamo1p0_zalrsc1p0_zawrs1p0_zfa1p0_zfhmin1p0_zca1p0_zcb1p0_zcd1p0_zcmop1p0"
+"_zba1p0_zbb1p0_zbs1p0_zkt1p0_zvbb1p0_zve32f1p0_zve32x1p0_zvfhmin1p0_zvkb1p0_zvkt1p0"
+"_zvl32b1p0_ssccptr1p0_sscofpmf1p0_sscounterenw1p0_sstc1p0_sstvala1p0_sstvecd1p0"
+"_ssu64xl1p0_supm1p0_svade1p0_svbare1p0_svinval1p0_svnapot1p0_svpbmt1p0\" 1} } 
*/


[gcc r12-11214] c++/79786 - bougs invocation of DATA_ABI_ALIGNMENT macro

2025-06-24 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:32ad5415b926ca25e9102309e92561c1a30aa8ff

commit r12-11214-g32ad5415b926ca25e9102309e92561c1a30aa8ff
Author: Richard Biener 
Date:   Mon Feb 3 11:27:20 2025 +0100

c++/79786 - bougs invocation of DATA_ABI_ALIGNMENT macro

The first argument is supposed to be a type, not a decl.

PR c++/79786
gcc/cp/
* rtti.cc (emit_tinfo_decl): Fix DATA_ABI_ALIGNMENT invocation.

(cherry picked from commit 6ec19825b4e72611cdbd4749feed67b61392aa81)

Diff:
---
 gcc/cp/rtti.cc | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gcc/cp/rtti.cc b/gcc/cp/rtti.cc
index f5b43ec0fb25..ac2ee70df634 100644
--- a/gcc/cp/rtti.cc
+++ b/gcc/cp/rtti.cc
@@ -1707,7 +1707,8 @@ emit_tinfo_decl (tree decl)
   /* Avoid targets optionally bumping up the alignment to improve
 vector instruction accesses, tinfo are never accessed this way.  */
 #ifdef DATA_ABI_ALIGNMENT
-  SET_DECL_ALIGN (decl, DATA_ABI_ALIGNMENT (decl, TYPE_ALIGN (TREE_TYPE 
(decl;
+  SET_DECL_ALIGN (decl, DATA_ABI_ALIGNMENT (TREE_TYPE (decl),
+   TYPE_ALIGN (TREE_TYPE (decl;
   DECL_USER_ALIGN (decl) = true;
 #endif
   return true;


[gcc r12-11222] tree-optimization/112859 - add comment

2025-06-24 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:e8b18dc45950f80d74e62ac7b4e3fa51d430d9cc

commit r12-11222-ge8b18dc45950f80d74e62ac7b4e3fa51d430d9cc
Author: Richard Biener 
Date:   Tue Jan 28 15:01:25 2025 +0100

tree-optimization/112859 - add comment

This adds a comment before the workaround, indicating flaky
dependence analysis.

PR tree-optimization/112859
* tree-loop-distribution.cc
(loop_distribution::pg_add_dependence_edges): Add comment.

(cherry picked from commit 3ccbc8c9d182c380e396631b2b5a683de4fddba9)

Diff:
---
 gcc/tree-loop-distribution.cc | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/gcc/tree-loop-distribution.cc b/gcc/tree-loop-distribution.cc
index 7b4fad238d5d..8629b7fa75ac 100644
--- a/gcc/tree-loop-distribution.cc
+++ b/gcc/tree-loop-distribution.cc
@@ -2137,7 +2137,9 @@ loop_distribution::pg_add_dependence_edges (struct graph 
*rdg, int dir,
  this_dir = -this_dir;
}
  /* When then dependence distance of the innermost common
-loop of the DRs is zero we have a conflict.  */
+loop of the DRs is zero we have a conflict.  This is
+due to wonky dependence analysis which sometimes
+ends up using a zero distance in place of unknown.  */
  auto l1 = gimple_bb (DR_STMT (dr1))->loop_father;
  auto l2 = gimple_bb (DR_STMT (dr2))->loop_father;
  int idx = index_in_loop_nest (find_common_loop (l1, l2)->num,


[gcc(refs/users/meissner/heads/work212)] Add -mcpu=future support.

2025-06-24 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:777bcd6059e031450bb0c068446d070954979895

commit 777bcd6059e031450bb0c068446d070954979895
Author: Michael Meissner 
Date:   Tue Jun 24 21:19:28 2025 -0400

Add -mcpu=future support.

This is patch #1 of 3 that adds the support that can be used in developing 
GCC
support for potential future PowerPC processors.  With all 3 patches, the 
tuning
for the 'future' processor is the same as power10 and power11.  It may be 
in the
future this tuning will change as any future PowerPC processor evolves.

Patch #2 will change the tuning support in power10.md to treat 
-mtune=future the
same as -mtune=power10.

Patch #3 will add tests for -mcpu=future.

These changes are being added so that hardware designers can evaluate 
potential
new features to be added to the PowerPC processors in the future.  It may be
these features will be incorporated into real hardware using a different 
name in
the future. Or it may be these features will not be incoporated into actual
PowerPC hardware in the future.

I have rewritten these patches to make it easier in the future to add new
processors that scheduled like power10 and power11 systems, or remove the
'future' cpu from being scheduled like a power10 if we add new tuning
characteristics for potential future processors.

I added a new macro (CASE_PROCESSOR_POWER10_TUNING) that expands to a set of
PROCESSOR_ cases for processors that should be tuned like a power10
processor.  In this patch, power10, power11, and future are selected.

I also added a new inline function (power10_tuning_p) that returns true if 
the
processor is to be scheduled like a power10.

I have modified the various ASM_CPU_SPEC macros to pass -mfuture to the
assembler if -mcpu=future wa used.

I have updated config.guess to allow the user to configure the GCC compiler
using the --with-cpu=future option.

I have tested these patches on both big endian and little endian PowerPC
servers, with no regressions.  Can I check these patchs into the trunk?

2025-06-24  Michael Meissner  

gcc/

* config.guess (powerpc*-*-*): Add support for using 
--with-cpu=future.
* config/rs6000/aix71.h (ASM_CPU_SPEC): Pass -mfuture to the 
assembler
if -mcpu=future was used on the command line.
* config/rs6000/aix72.h (ASM_CPU_SPEC): Likewise.
* config/rs6000/aix73.h (ASM_CPU_SPEC): Likewise.
* config/rs6000/driver-rs6000.c (asm_names): Likewise.
* config/rs6000/rs6000-c.cc (rs6000_target_modify_macros): If
-mcpu=future, define the macro _ARCH_FUTURE.
* config/rs6000/rs6000-cpus.def (FUTURE_MASKS_SERVER): New macro.
(POWERPC_MASKS): Add support for the internal -mfuture that is set 
by
the -mcpu=future option.
(future cpu): Add support for -mcpu=future.
* config/rs6000/rs6000-opts.h (enum processor_type): Add
PROCESSOR_FUTURE.
(CASE_PROCESSOR_POWER10_TUNING): New macro.
(power10_tuning_p): New inline helper function.
* config/rs6000/rs6000-string.cc (expand_compare_loop): Convert
PROCESSOR_POWER10 and PROCESSOR_POWER11 cases into
CASE_PROCESSOR_POWER10_TUNING to allow -mcpu=future as well.
* config/rs6000/rs6000-tables.opt: Regenerate the table, adding the
future cpu to the enumeration.
* config/rs6000/rs6000.cc (rs6000_option_override_internal): Change
tests against PROCESSOR_POWER10 and PROCESSOR_POWER11 to include
PROCESSOR_FUTURE as well.
(rs6000_reassociation_width): Likewise.
(rs6000_adjust_cost): Likewise.
(rs6000_issue_rate): Likewise.
(rs6000_sched_reorder): Likewise.
(rs6000_sched_reorder2): Likewise.
(rs6000_register_move_cost): Likewise.
* config/rs6000/rs6000.md (cpu attribute): Add future cpu.
* config/rs6000/rs6000.h (ASM_CPU_SPEC): Pass -mfuture to the 
assembler
if -mcpu=future was used on the command line.
* config/rs6000/rs6000.opt (-mfuture): New internal ISA bit for
-mcpu=future.

Diff:
---
 gcc/config.gcc  |  4 ++--
 gcc/config/rs6000/aix71.h   |  1 +
 gcc/config/rs6000/aix72.h   |  1 +
 gcc/config/rs6000/aix73.h   |  1 +
 gcc/config/rs6000/driver-rs6000.cc  |  2 ++
 gcc/config/rs6000/rs6000-c.cc   |  2 ++
 gcc/config/rs6000/rs6000-cpus.def   |  5 +
 gcc/config/rs6000/rs6000-opts.h | 22 +
 gcc/config/rs6000/rs6000-string.cc  |  3 +--
 gcc/config/rs6000/rs6000-tables.opt | 11 +++
 gcc/config/rs6000/rs6000.cc | 39 +
 gcc/config/rs6000/rs6000.h  |  1 +
 gcc/config/rs6000/rs6000.md

[gcc(refs/users/meissner/heads/work212)] Add -mcpu=future tests.

2025-06-24 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:bee22599ac1ac4cc217dbfc432eedb0a4be351ef

commit bee22599ac1ac4cc217dbfc432eedb0a4be351ef
Author: Michael Meissner 
Date:   Tue Jun 24 22:12:44 2025 -0400

Add -mcpu=future tests.

This is patch #3 of 3 to add -mcpu=future support to the PowerPC.

Compared to the previous version of tis patch, I update a comment to say
_ARCH_FUTURE instead of _ARCH_PWR11 that was a typo.

This patch adds simple tests for -mcpu=future.

I have tested these patches on both big endian and little endian PowerPC
servers, with no regressions.  Can I check these patchs into the trunk?

2025-06-24  Michael Meissner  

gcc/testsuite/

* gcc.target/powerpc/future-1.c: New test.
* gcc.target/powerpc/future-2.c: Likewise.

Diff:
---
 gcc/testsuite/gcc.target/powerpc/future-1.c | 13 +
 gcc/testsuite/gcc.target/powerpc/future-2.c | 24 
 2 files changed, 37 insertions(+)

diff --git a/gcc/testsuite/gcc.target/powerpc/future-1.c 
b/gcc/testsuite/gcc.target/powerpc/future-1.c
index e69de29bb2d1..7bd8e5ddbd00 100644
--- a/gcc/testsuite/gcc.target/powerpc/future-1.c
+++ b/gcc/testsuite/gcc.target/powerpc/future-1.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-mdejagnu-cpu=future -O2" } */
+
+/* Basic check to see if the compiler supports -mcpu=future and if it defines
+   _ARCH_FUTURE.  */
+
+#ifndef _ARCH_FUTURE
+#error "-mcpu=future is not supported"
+#endif
+
+void foo (void)
+{
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/future-2.c 
b/gcc/testsuite/gcc.target/powerpc/future-2.c
index e69de29bb2d1..5552cefa3c2e 100644
--- a/gcc/testsuite/gcc.target/powerpc/future-2.c
+++ b/gcc/testsuite/gcc.target/powerpc/future-2.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+/* Check if we can set the future target via a target attribute.  */
+
+__attribute__((__target__("cpu=power9")))
+void foo_p9 (void)
+{
+}
+
+__attribute__((__target__("cpu=power10")))
+void foo_p10 (void)
+{
+}
+
+__attribute__((__target__("cpu=power11")))
+void foo_p11 (void)
+{
+}
+
+__attribute__((__target__("cpu=future")))
+void foo_future (void)
+{
+}


[gcc(refs/users/meissner/heads/work212)] Add -mcpu=future tuning support.

2025-06-24 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:3b0584f4a13647fd8e935400573af52f0fd0abb2

commit 3b0584f4a13647fd8e935400573af52f0fd0abb2
Author: Michael Meissner 
Date:   Tue Jun 24 22:05:11 2025 -0400

Add -mcpu=future tuning support.

This is patch #2 of 3 to add -mcpu=future support to the PowerPC.

This patch makes -mtune=future use the same tuning decision as 
-mtune=power10 or
-mtune=power11.

I added a new attribute (power10_tuning) that says whether the current 
processor
is tuned like a power10.  This is true for power10, power11, and future
processors.

I modified all of the:

(eq_attr "cpu" "power10,power11")

tests to:

(eq_attr "power10_tuning" "yes")

This will allow us to make one change to add new processors that also use 
the
power10 tuning rules, or we can easily remove processors.  For example, we 
might
want to modify the -mtune=future rules in the future.

I have tested these patches on both big endian and little endian PowerPC
servers, with no regressions.  Can I check these patchs into the trunk?

2025-06-24  Michael Meissner  

gcc/

* config/rs6000/power10.md (all reservations): Switch to use the
"power10_tuning" attribute for deciding if the current processor is
tuned like a power10.
* config/rs6000/rs6000.md (power10_tuning): New attribute.

Diff:
---
 gcc/config/rs6000/power10.md| 142 ++--
 gcc/config/rs6000/rs6000.md |   7 ++
 gcc/testsuite/gcc.target/powerpc/future-1.c |   0
 gcc/testsuite/gcc.target/powerpc/future-2.c |   0
 4 files changed, 78 insertions(+), 71 deletions(-)

diff --git a/gcc/config/rs6000/power10.md b/gcc/config/rs6000/power10.md
index fd31b16b3314..0efeeb896025 100644
--- a/gcc/config/rs6000/power10.md
+++ b/gcc/config/rs6000/power10.md
@@ -97,12 +97,12 @@
(eq_attr "update" "no")
(eq_attr "size" "!128")
(eq_attr "prefixed" "no")
-   (eq_attr "cpu" "power10,power11"))
+   (eq_attr "power10_tuning" "yes"))
   "DU_any_power10,LU_power10")
 
 (define_insn_reservation "power10-fused-load" 4
   (and (eq_attr "type" "fused_load_cmpi,fused_addis_load,fused_load_load")
-   (eq_attr "cpu" "power10,power11"))
+   (eq_attr "power10_tuning" "yes"))
   "DU_even_power10,LU_power10")
 
 (define_insn_reservation "power10-prefixed-load" 4
@@ -110,13 +110,13 @@
(eq_attr "update" "no")
(eq_attr "size" "!128")
(eq_attr "prefixed" "yes")
-   (eq_attr "cpu" "power10,power11"))
+   (eq_attr "power10_tuning" "yes"))
   "DU_even_power10,LU_power10")
 
 (define_insn_reservation "power10-load-update" 4
   (and (eq_attr "type" "load")
(eq_attr "update" "yes")
-   (eq_attr "cpu" "power10,power11"))
+   (eq_attr "power10_tuning" "yes"))
   "DU_even_power10,LU_power10+SXU_power10")
 
 (define_insn_reservation "power10-fpload-double" 4
@@ -124,7 +124,7 @@
(eq_attr "update" "no")
(eq_attr "size" "64")
(eq_attr "prefixed" "no")
-   (eq_attr "cpu" "power10,power11"))
+   (eq_attr "power10_tuning" "yes"))
   "DU_any_power10,LU_power10")
 
 (define_insn_reservation "power10-prefixed-fpload-double" 4
@@ -132,14 +132,14 @@
(eq_attr "update" "no")
(eq_attr "size" "64")
(eq_attr "prefixed" "yes")
-   (eq_attr "cpu" "power10,power11"))
+   (eq_attr "power10_tuning" "yes"))
   "DU_even_power10,LU_power10")
 
 (define_insn_reservation "power10-fpload-update-double" 4
   (and (eq_attr "type" "fpload")
(eq_attr "update" "yes")
(eq_attr "size" "64")
-   (eq_attr "cpu" "power10,power11"))
+   (eq_attr "power10_tuning" "yes"))
   "DU_even_power10,LU_power10+SXU_power10")
 
 ; SFmode loads are cracked and have additional 3 cycles over DFmode
@@ -148,27 +148,27 @@
   (and (eq_attr "type" "fpload")
(eq_attr "update" "no")
(eq_attr "size" "32")
-   (eq_attr "cpu" "power10,power11"))
+   (eq_attr "power10_tuning" "yes"))
   "DU_even_power10,LU_power10")
 
 (define_insn_reservation "power10-fpload-update-single" 7
   (and (eq_attr "type" "fpload")
(eq_attr "update" "yes")
(eq_attr "size" "32")
-   (eq_attr "cpu" "power10,power11"))
+   (eq_attr "power10_tuning" "yes"))
   "DU_even_power10,LU_power10+SXU_power10")
 
 (define_insn_reservation "power10-vecload" 4
   (and (eq_attr "type" "vecload")
(eq_attr "size" "!256")
-   (eq_attr "cpu" "power10,power11"))
+   (eq_attr "power10_tuning" "yes"))
   "DU_any_power10,LU_power10")
 
 ; lxvp
 (define_insn_reservation "power10-vecload-pair" 4
   (and (eq_attr "type" "vecload")
(eq_attr "size" "256")
-   (eq_attr "cpu" "power10,power11"))
+   (eq_attr "power10_tuning" "yes"))
   "DU_even_power10,LU_power10+SXU_power10")
 
 ; Store Unit
@@ -178,12 +178,12 @@
(eq_attr "prefixed" "no")
(eq_attr "

[gcc r12-11219] testsuite: add testcase for fixed PR107467

2025-06-24 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:87d788926ba4ccca9a086c138584c10d1e63084d

commit r12-11219-g87d788926ba4ccca9a086c138584c10d1e63084d
Author: Sam James 
Date:   Mon Oct 21 12:11:42 2024 +0100

testsuite: add testcase for fixed PR107467

PR107467 ended up being fixed by the fix for PR115110, but let's
add the testcase on top.

gcc/testsuite/ChangeLog:
PR tree-optimization/107467
PR middle-end/115110

* g++.dg/lto/pr107467_0.C: New test.

(cherry picked from commit 4e09ae37dbe0a10f48490214f50ff733cc92280a)

Diff:
---
 gcc/testsuite/g++.dg/lto/pr107467_0.C | 52 +++
 1 file changed, 52 insertions(+)

diff --git a/gcc/testsuite/g++.dg/lto/pr107467_0.C 
b/gcc/testsuite/g++.dg/lto/pr107467_0.C
new file mode 100644
index ..a871aca82459
--- /dev/null
+++ b/gcc/testsuite/g++.dg/lto/pr107467_0.C
@@ -0,0 +1,52 @@
+/* { dg-lto-do run } */
+/* { dg-lto-options {{ -O2 -fno-strict-aliasing -flto }} } */
+
+template 
+struct pair
+{
+int first;
+T second;
+};
+
+template 
+[[gnu::optimize("strict-aliasing")]]
+bool __attribute__((noinline))
+compare_pairs(const pair &lhs, const pair &rhs) {
+  return lhs.first == rhs.first && lhs.second == rhs.second;
+}
+
+template  struct Combined {
+  pair 
+__attribute__((noinline)) get_const() {
+return pair{123, nullptr};
+  }
+[[gnu::optimize("strict-aliasing")]]
+  bool 
+__attribute__((noinline)) clashy() {
+return compare_pairs(get_const(), get_const());
+  }
+};
+
+class SomeClass {};
+class OtherClass {};
+
+[[gnu::optimize("strict-aliasing")]]
+[[gnu::used]]
+void some_func() {
+  Combined myvar;
+  __builtin_printf("%i\n", myvar.clashy());
+}
+
+[[gnu::optimize("strict-aliasing")]]
+void other_func() {
+  Combined myvar;
+  int t = myvar.clashy();
+  if (!t)
+  __builtin_abort();
+}
+
+[[gnu::optimize("O0")]]
+int main()
+{
+  other_func();
+}


[gcc] Created branch 'meissner/heads/work212-orig' in namespace 'refs/users'

2025-06-24 Thread Michael Meissner via Gcc-cvs
The branch 'meissner/heads/work212-orig' was created in namespace 'refs/users' 
pointing to:

 63076dbe2153... Remove non-SLP path from vectorizable_load


[gcc r16-1666] i386: Remove CLDEMOTE for clients

2025-06-24 Thread Haochen Jiang via Gcc-cvs
https://gcc.gnu.org/g:0c701c7d5fb95681c6d4accfbd6382e99ebf0e82

commit r16-1666-g0c701c7d5fb95681c6d4accfbd6382e99ebf0e82
Author: Haochen Jiang 
Date:   Wed Jun 25 10:34:37 2025 +0800

i386: Remove CLDEMOTE for clients

CLDEMOTE is not enabled on clients according to SDM. SDM only mentioned
it will be enabled on Xeon and Atom servers, not clients. Remove them
since Alder Lake (where it is introduced).

gcc/ChangeLog:

* config/i386/i386.h (PTA_ALDERLAKE): Use PTA_GOLDMONT_PLUS
as base to remove PTA_CLDEMOTE.
(PTA_SIERRAFOREST): Add PTA_CLDEMOTE since PTA_ALDERLAKE
does not include that anymore.
* doc/invoke.texi: Update texi file.

Diff:
---
 gcc/config/i386/i386.h |  8 +---
 gcc/doc/invoke.texi| 29 ++---
 2 files changed, 19 insertions(+), 18 deletions(-)

diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 812055085bb5..661fb8e7e52c 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -2446,12 +2446,14 @@ constexpr wide_int_bitmask PTA_GOLDMONT_PLUS = 
PTA_GOLDMONT | PTA_RDPID
   | PTA_SGX | PTA_PTWRITE;
 constexpr wide_int_bitmask PTA_TREMONT = PTA_GOLDMONT_PLUS | PTA_CLWB
   | PTA_GFNI | PTA_MOVDIRI | PTA_MOVDIR64B | PTA_CLDEMOTE | PTA_WAITPKG;
-constexpr wide_int_bitmask PTA_ALDERLAKE = PTA_TREMONT | PTA_ADX | PTA_AVX
+constexpr wide_int_bitmask PTA_ALDERLAKE = PTA_GOLDMONT_PLUS | PTA_CLWB
+  | PTA_GFNI | PTA_MOVDIRI | PTA_MOVDIR64B | PTA_WAITPKG | PTA_ADX | PTA_AVX
   | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_F16C | PTA_FMA | PTA_LZCNT
   | PTA_PCONFIG | PTA_PKU | PTA_VAES | PTA_VPCLMULQDQ | PTA_SERIALIZE
   | PTA_HRESET | PTA_KL | PTA_WIDEKL | PTA_AVXVNNI;
-constexpr wide_int_bitmask PTA_SIERRAFOREST = PTA_ALDERLAKE | PTA_AVXIFMA
-  | PTA_AVXVNNIINT8 | PTA_AVXNECONVERT | PTA_CMPCCXADD | PTA_ENQCMD | 
PTA_UINTR;
+constexpr wide_int_bitmask PTA_SIERRAFOREST = PTA_ALDERLAKE | PTA_CLDEMOTE
+  | PTA_AVXIFMA | PTA_AVXVNNIINT8 | PTA_AVXNECONVERT | PTA_CMPCCXADD
+  | PTA_ENQCMD | PTA_UINTR;
 constexpr wide_int_bitmask PTA_GRANITERAPIDS = PTA_SAPPHIRERAPIDS | 
PTA_AMX_FP16
   | PTA_PREFETCHI | PTA_AVX10_1;
 constexpr wide_int_bitmask PTA_GRANITERAPIDS_D = PTA_GRANITERAPIDS
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 95790f7bd171..100bdaf90451 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -34846,37 +34846,36 @@ VPCLMULQDQ, AVX512BITALG, RDPID and AVX512VPOPCNTDQ 
instruction set support.
 Intel Alder Lake/Raptor Lake/Meteor Lake/Gracemont CPU with 64-bit extensions,
 MOVBE, MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, POPCNT, AES, PREFETCHW,
 PCLMUL, RDRND, XSAVE, XSAVEC, XSAVES, XSAVEOPT, FSGSBASE, PTWRITE, RDPID, SGX,
-GFNI-SSE, CLWB, MOVDIRI, MOVDIR64B, CLDEMOTE, WAITPKG, ADCX, AVX, AVX2, BMI,
-BMI2, F16C, FMA, LZCNT, PCONFIG, PKU, VAES, VPCLMULQDQ, SERIALIZE, HRESET, KL,
-WIDEKL and AVX-VNNI instruction set support.
+GFNI-SSE, CLWB, MOVDIRI, MOVDIR64B, WAITPKG, ADCX, AVX, AVX2, BMI, BMI2, F16C,
+FMA, LZCNT, PCONFIG, PKU, VAES, VPCLMULQDQ, SERIALIZE, HRESET, KL, WIDEKL and
+AVX-VNNI instruction set support.
 
 @item arrowlake
 Intel Arrow Lake CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3,
 SSSE3, SSE4.1, SSE4.2, POPCNT, AES, PREFETCHW, PCLMUL, RDRND, XSAVE, XSAVEC,
 XSAVES, XSAVEOPT, FSGSBASE, PTWRITE, RDPID, SGX, GFNI-SSE, CLWB, MOVDIRI,
-MOVDIR64B, CLDEMOTE, WAITPKG, ADCX, AVX, AVX2, BMI, BMI2, F16C, FMA, LZCNT,
-PCONFIG, PKU, VAES, VPCLMULQDQ, SERIALIZE, HRESET, KL, WIDEKL, AVX-VNNI,
-UINTR, AVXIFMA, AVXVNNIINT8, AVXNECONVERT and CMPCCXADD instruction set
-support.
+MOVDIR64B, WAITPKG, ADCX, AVX, AVX2, BMI, BMI2, F16C, FMA, LZCNT, PCONFIG, PKU,
+VAES, VPCLMULQDQ, SERIALIZE, HRESET, KL, WIDEKL, AVX-VNNI, UINTR, AVXIFMA,
+AVXVNNIINT8, AVXNECONVERT and CMPCCXADD instruction set support.
 
 @item arrowlake-s
 @itemx lunarlake
 Intel Arrow Lake S/Lunar Lake CPU with 64-bit extensions, MOVBE, MMX, SSE,
 SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, POPCNT, AES, PREFETCHW, PCLMUL, RDRND,
 XSAVE, XSAVEC, XSAVES, XSAVEOPT, FSGSBASE, PTWRITE, RDPID, SGX, GFNI-SSE, CLWB,
-MOVDIRI, MOVDIR64B, CLDEMOTE, WAITPKG, ADCX, AVX, AVX2, BMI, BMI2, F16C, FMA,
-LZCNT, PCONFIG, PKU, VAES, VPCLMULQDQ, SERIALIZE, HRESET, KL, WIDEKL, AVX-VNNI,
-UINTR, AVXIFMA, AVXVNNIINT8, AVXNECONVERT, CMPCCXADD, AVXVNNIINT16, SHA512,
-SM3 and SM4 instruction set support.
+MOVDIRI, MOVDIR64B, WAITPKG, ADCX, AVX, AVX2, BMI, BMI2, F16C, FMA, LZCNT,
+PCONFIG, PKU, VAES, VPCLMULQDQ, SERIALIZE, HRESET, KL, WIDEKL, AVX-VNNI, UINTR,
+AVXIFMA, AVXVNNIINT8, AVXNECONVERT, CMPCCXADD, AVXVNNIINT16, SHA512, SM3 and
+SM4 instruction set support.
 
 @item pantherlake
 Intel Panther Lake CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3,
 SSSE3, SSE4.1, SSE4.2, POPCNT, AES, PREFETCHW, PCLMUL, RDRND, XSAVE, XSAVEC,
 XSAVES, XSAVEOPT, FSGSBASE, PTWRITE, RDPID, SGX, GFNI-SSE, CLWB, MOVDIRI,
-MOVDIR64B, CLDEMOTE, WAITPKG, ADCX, AVX, AVX2, BMI, BMI2, F16C, FMA, LZCNT,
-PCONFIG, PK

[gcc] Created branch 'meissner/heads/work212-bugs' in namespace 'refs/users'

2025-06-24 Thread Michael Meissner via Gcc-cvs
The branch 'meissner/heads/work212-bugs' was created in namespace 'refs/users' 
pointing to:

 09be5ec3d304... Add ChangeLog.meissner and REVISION.


[gcc r16-1667] x86: Update -mtune=intel for Diamond Rapids/Clearwater Forest

2025-06-24 Thread H.J. Lu via Gcc-cvs
https://gcc.gnu.org/g:7fd6cb3c8488465ae0529f543f5309584961503d

commit r16-1667-g7fd6cb3c8488465ae0529f543f5309584961503d
Author: H.J. Lu 
Date:   Wed Jun 25 07:40:31 2025 +0800

x86: Update -mtune=intel for Diamond Rapids/Clearwater Forest

-mtune=intel is used to generate a single binary to run well on both big
core and small core, similar to hybrid CPUs.  Update -mtune=intel to tune
for Diamond Rapids and Clearwater Forest, instead of Silvermont.

PR target/120815
* common/config/i386/i386-common.cc (processor_alias_table):
Replace CPU_SLM/PTA_NEHALEM with CPU_HASWELL/PTA_HASWELL for
PROCESSOR_INTEL.
* config/i386/i386-options.cc (processor_cost_table): Replace
intel_cost with alderlake_cost.
* config/i386/x86-tune-costs.h (intel_cost): Removed.
* config/i386/x86-tune-sched.cc (ix86_issue_rate): Treat
PROCESSOR_INTEL like PROCESSOR_ALDERLAKE.
(ix86_adjust_cost): Likewise.
* doc/invoke.texi: Update -mtune=intel for Diamond Rapids and
Clearwater Forest.

Signed-off-by: H.J. Lu 

Diff:
---
 gcc/common/config/i386/i386-common.cc |   2 +-
 gcc/config/i386/i386-options.cc   |   2 +-
 gcc/config/i386/x86-tune-costs.h  | 121 --
 gcc/config/i386/x86-tune-sched.cc |   4 +-
 gcc/doc/invoke.texi   |   4 +-
 5 files changed, 6 insertions(+), 127 deletions(-)

diff --git a/gcc/common/config/i386/i386-common.cc 
b/gcc/common/config/i386/i386-common.cc
index 64908ce740a9..dfcd4e9a7276 100644
--- a/gcc/common/config/i386/i386-common.cc
+++ b/gcc/common/config/i386/i386-common.cc
@@ -2310,7 +2310,7 @@ const pta processor_alias_table[] =
 M_CPU_TYPE (INTEL_GRANDRIDGE), P_PROC_AVX2},
   {"clearwaterforest", PROCESSOR_CLEARWATERFOREST, CPU_HASWELL,
 PTA_CLEARWATERFOREST, M_CPU_TYPE (INTEL_CLEARWATERFOREST), P_PROC_AVX2},
-  {"intel", PROCESSOR_INTEL, CPU_SLM, PTA_NEHALEM,
+  {"intel", PROCESSOR_INTEL, CPU_HASWELL, PTA_HASWELL,
 M_VENDOR (VENDOR_INTEL), P_NONE},
   {"geode", PROCESSOR_GEODE, CPU_GEODE,
 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE, 0, P_NONE},
diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc
index d1e321ad74b1..27feeddaf812 100644
--- a/gcc/config/i386/i386-options.cc
+++ b/gcc/config/i386/i386-options.cc
@@ -797,7 +797,7 @@ static const struct processor_costs *processor_cost_table[] 
=
   &alderlake_cost, /* PROCESSOR_ARROWLAKE_S.   */
   &alderlake_cost, /* PROCESSOR_PANTHERLAKE.   */
   &icelake_cost,   /* PROCESSOR_DIAMONDRAPIDS. */
-  &intel_cost, /* PROCESSOR_INTEL. */
+  &alderlake_cost, /* PROCESSOR_INTEL. */
   &lujiazui_cost,  /* PROCESSOR_LUJIAZUI.  */
   &yongfeng_cost,  /* PROCESSOR_YONGFENG.  */
   &shijidadao_cost,/* PROCESSOR_SHIJIDADAO.*/
diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h
index a5b99d1f9629..c8603b982af4 100644
--- a/gcc/config/i386/x86-tune-costs.h
+++ b/gcc/config/i386/x86-tune-costs.h
@@ -3568,127 +3568,6 @@ struct processor_costs tremont_cost = {
   COSTS_N_INSNS (2),   /* Branch mispredict scale.  */
 };
 
-static stringop_algs intel_memcpy[2] = {
-  {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
-  {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
- {8192, rep_prefix_8_byte, false}, {-1, libcall, false;
-static stringop_algs intel_memset[2] = {
-  {libcall, {{8, loop, false}, {15, unrolled_loop, false},
- {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
-  {libcall, {{24, loop, false}, {32, unrolled_loop, false},
- {8192, rep_prefix_8_byte, false}, {-1, libcall, false;
-static const
-struct processor_costs intel_cost = {
-  {
-  /* Start of register allocator costs.  integer->integer move cost is 2. */
-  6,/* cost for loading QImode using movzbl */
-  {4, 4, 4},   /* cost of loading integer registers
-  in QImode, HImode and SImode.
-  Relative to reg-reg move (2).  */
-  {6, 6, 6},   /* cost of storing integer registers */
-  2,   /* cost of reg,reg fld/fst */
-  {6, 6, 8},   /* cost of loading fp registers
-  in SFmode, DFmode and XFmode */
-  {6, 6, 10},  /* cost of storing fp registers
-  in SFmode, DFmode and XFmode */
-  2,   /* cost of moving MMX register */
-  {6, 6},  /* cost of loading MMX registers
-  in SImode and DImode */
-  {6, 6},  

[gcc r15-9858] i386: Remove CLDEMOTE for clients

2025-06-24 Thread Haochen Jiang via Gcc-cvs
https://gcc.gnu.org/g:9f817c98403be763e0e265c56522e256ad97329a

commit r15-9858-g9f817c98403be763e0e265c56522e256ad97329a
Author: Haochen Jiang 
Date:   Tue Jun 17 14:08:38 2025 +0800

i386: Remove CLDEMOTE for clients

CLDEMOTE is not enabled on clients according to SDM. SDM only mentioned
it will be enabled on Xeon and Atom servers, not clients. Remove them
since Alder Lake (where it is introduced).

gcc/ChangeLog:

* config/i386/i386.h (PTA_ALDERLAKE): Use PTA_GOLDMONT_PLUS
as base to remove PTA_CLDEMOTE.
(PTA_SIERRAFOREST): Add PTA_CLDEMOTE since PTA_ALDERLAKE
does not include that anymore.
* doc/invoke.texi: Update texi file.

Diff:
---
 gcc/config/i386/i386.h |  8 +---
 gcc/doc/invoke.texi| 29 ++---
 2 files changed, 19 insertions(+), 18 deletions(-)

diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 8507243d726b..ab6ba21830e6 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -2425,12 +2425,14 @@ constexpr wide_int_bitmask PTA_GOLDMONT_PLUS = 
PTA_GOLDMONT | PTA_RDPID
   | PTA_SGX | PTA_PTWRITE;
 constexpr wide_int_bitmask PTA_TREMONT = PTA_GOLDMONT_PLUS | PTA_CLWB
   | PTA_GFNI | PTA_MOVDIRI | PTA_MOVDIR64B | PTA_CLDEMOTE | PTA_WAITPKG;
-constexpr wide_int_bitmask PTA_ALDERLAKE = PTA_TREMONT | PTA_ADX | PTA_AVX
+constexpr wide_int_bitmask PTA_ALDERLAKE = PTA_GOLDMONT_PLUS | PTA_CLWB
+  | PTA_GFNI | PTA_MOVDIRI | PTA_MOVDIR64B | PTA_WAITPKG | PTA_ADX | PTA_AVX
   | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_F16C | PTA_FMA | PTA_LZCNT
   | PTA_PCONFIG | PTA_PKU | PTA_VAES | PTA_VPCLMULQDQ | PTA_SERIALIZE
   | PTA_HRESET | PTA_KL | PTA_WIDEKL | PTA_AVXVNNI;
-constexpr wide_int_bitmask PTA_SIERRAFOREST = PTA_ALDERLAKE | PTA_AVXIFMA
-  | PTA_AVXVNNIINT8 | PTA_AVXNECONVERT | PTA_CMPCCXADD | PTA_ENQCMD | 
PTA_UINTR;
+constexpr wide_int_bitmask PTA_SIERRAFOREST = PTA_ALDERLAKE | PTA_CLDEMOTE
+  | PTA_AVXIFMA | PTA_AVXVNNIINT8 | PTA_AVXNECONVERT | PTA_CMPCCXADD
+  | PTA_ENQCMD | PTA_UINTR;
 constexpr wide_int_bitmask PTA_GRANITERAPIDS = PTA_SAPPHIRERAPIDS | 
PTA_AMX_FP16
   | PTA_PREFETCHI;
 constexpr wide_int_bitmask PTA_GRANITERAPIDS_D = PTA_GRANITERAPIDS
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 617a3d8ae182..baaa0c1aed5e 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -35151,37 +35151,36 @@ VPCLMULQDQ, AVX512BITALG, RDPID and AVX512VPOPCNTDQ 
instruction set support.
 Intel Alder Lake/Raptor Lake/Meteor Lake/Gracemont CPU with 64-bit extensions,
 MOVBE, MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, POPCNT, AES, PREFETCHW,
 PCLMUL, RDRND, XSAVE, XSAVEC, XSAVES, XSAVEOPT, FSGSBASE, PTWRITE, RDPID, SGX,
-GFNI-SSE, CLWB, MOVDIRI, MOVDIR64B, CLDEMOTE, WAITPKG, ADCX, AVX, AVX2, BMI,
-BMI2, F16C, FMA, LZCNT, PCONFIG, PKU, VAES, VPCLMULQDQ, SERIALIZE, HRESET, KL,
-WIDEKL and AVX-VNNI instruction set support.
+GFNI-SSE, CLWB, MOVDIRI, MOVDIR64B, WAITPKG, ADCX, AVX, AVX2, BMI, BMI2, F16C,
+FMA, LZCNT, PCONFIG, PKU, VAES, VPCLMULQDQ, SERIALIZE, HRESET, KL, WIDEKL and
+AVX-VNNI instruction set support.
 
 @item arrowlake
 Intel Arrow Lake CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3,
 SSSE3, SSE4.1, SSE4.2, POPCNT, AES, PREFETCHW, PCLMUL, RDRND, XSAVE, XSAVEC,
 XSAVES, XSAVEOPT, FSGSBASE, PTWRITE, RDPID, SGX, GFNI-SSE, CLWB, MOVDIRI,
-MOVDIR64B, CLDEMOTE, WAITPKG, ADCX, AVX, AVX2, BMI, BMI2, F16C, FMA, LZCNT,
-PCONFIG, PKU, VAES, VPCLMULQDQ, SERIALIZE, HRESET, KL, WIDEKL, AVX-VNNI,
-UINTR, AVXIFMA, AVXVNNIINT8, AVXNECONVERT and CMPCCXADD instruction set
-support.
+MOVDIR64B, WAITPKG, ADCX, AVX, AVX2, BMI, BMI2, F16C, FMA, LZCNT, PCONFIG, PKU,
+VAES, VPCLMULQDQ, SERIALIZE, HRESET, KL, WIDEKL, AVX-VNNI, UINTR, AVXIFMA,
+AVXVNNIINT8, AVXNECONVERT and CMPCCXADD instruction set support.
 
 @item arrowlake-s
 @itemx lunarlake
 Intel Arrow Lake S/Lunar Lake CPU with 64-bit extensions, MOVBE, MMX, SSE,
 SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, POPCNT, AES, PREFETCHW, PCLMUL, RDRND,
 XSAVE, XSAVEC, XSAVES, XSAVEOPT, FSGSBASE, PTWRITE, RDPID, SGX, GFNI-SSE, CLWB,
-MOVDIRI, MOVDIR64B, CLDEMOTE, WAITPKG, ADCX, AVX, AVX2, BMI, BMI2, F16C, FMA,
-LZCNT, PCONFIG, PKU, VAES, VPCLMULQDQ, SERIALIZE, HRESET, KL, WIDEKL, AVX-VNNI,
-UINTR, AVXIFMA, AVXVNNIINT8, AVXNECONVERT, CMPCCXADD, AVXVNNIINT16, SHA512,
-SM3 and SM4 instruction set support.
+MOVDIRI, MOVDIR64B, WAITPKG, ADCX, AVX, AVX2, BMI, BMI2, F16C, FMA, LZCNT,
+PCONFIG, PKU, VAES, VPCLMULQDQ, SERIALIZE, HRESET, KL, WIDEKL, AVX-VNNI, UINTR,
+AVXIFMA, AVXVNNIINT8, AVXNECONVERT, CMPCCXADD, AVXVNNIINT16, SHA512, SM3 and
+SM4 instruction set support.
 
 @item pantherlake
 Intel Panther Lake CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3,
 SSSE3, SSE4.1, SSE4.2, POPCNT, AES, PREFETCHW, PCLMUL, RDRND, XSAVE, XSAVEC,
 XSAVES, XSAVEOPT, FSGSBASE, PTWRITE, RDPID, SGX, GFNI-SSE, CLWB, MOVDIRI,
-MOVDIR64B, CLDEMOTE, WAITPKG, ADCX, AVX, AVX2, BMI, BMI2, F16C, FMA, LZCNT,
-PCONFIG, PKU, VAES, VPCLM

[gcc r12-11232] i386: Remove CLDEMOTE for clients

2025-06-24 Thread Haochen Jiang via Gcc-cvs
https://gcc.gnu.org/g:ee04d1554aa87d6155bd96e38c91351871273b51

commit r12-11232-gee04d1554aa87d6155bd96e38c91351871273b51
Author: Haochen Jiang 
Date:   Wed Jun 25 11:04:11 2025 +0800

i386: Remove CLDEMOTE for clients

CLDEMOTE is not enabled on clients according to SDM. SDM only mentioned
it will be enabled on Xeon and Atom servers, not clients. Remove them
since Alder Lake (where it is introduced).

gcc/ChangeLog:

* config/i386/i386.h (PTA_ALDERLAKE): Use PTA_GOLDMONT_PLUS
as base to remove PTA_CLDEMOTE.
* doc/invoke.texi: Update texi file.

Diff:
---
 gcc/config/i386/i386.h | 3 ++-
 gcc/doc/invoke.texi| 5 ++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 8416e5b02b64..48c494a73652 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -2350,7 +2350,8 @@ constexpr wide_int_bitmask PTA_GOLDMONT_PLUS = 
PTA_GOLDMONT | PTA_RDPID
   | PTA_SGX | PTA_PTWRITE;
 constexpr wide_int_bitmask PTA_TREMONT = PTA_GOLDMONT_PLUS | PTA_CLWB
   | PTA_GFNI | PTA_MOVDIRI | PTA_MOVDIR64B | PTA_CLDEMOTE | PTA_WAITPKG;
-constexpr wide_int_bitmask PTA_ALDERLAKE = PTA_TREMONT | PTA_ADX | PTA_AVX
+constexpr wide_int_bitmask PTA_ALDERLAKE = PTA_GOLDMONT_PLUS | PTA_CLWB
+  | PTA_GFNI | PTA_MOVDIRI | PTA_MOVDIR64B | PTA_WAITPKG | PTA_ADX | PTA_AVX
   | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_F16C | PTA_FMA | PTA_LZCNT
   | PTA_PCONFIG | PTA_PKU | PTA_VAES | PTA_VPCLMULQDQ | PTA_SERIALIZE
   | PTA_HRESET | PTA_KL | PTA_WIDEKL | PTA_AVXVNNI;
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 926b72982e2d..60fbe5e0c7d7 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -31554,9 +31554,8 @@ VPCLMULQDQ, AVX512BITALG, RDPID and AVX512VPOPCNTDQ 
instruction set support.
 Intel Alder Lake CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, 
SSSE3,
 SSE4.1, SSE4.2, POPCNT, AES, PREFETCHW, PCLMUL, RDRND, XSAVE, XSAVEC, XSAVES,
 XSAVEOPT, FSGSBASE, PTWRITE, RDPID, SGX, GFNI-SSE, CLWB, MOVDIRI, MOVDIR64B,
-CLDEMOTE, WAITPKG, ADCX, AVX, AVX2, BMI, BMI2, F16C, FMA, LZCNT, PCONFIG, PKU,
-VAES, VPCLMULQDQ, SERIALIZE, HRESET, KL, WIDEKL and AVX-VNNI instruction set
-support.
+WAITPKG, ADCX, AVX, AVX2, BMI, BMI2, F16C, FMA, LZCNT, PCONFIG, PKU, VAES,
+VPCLMULQDQ, SERIALIZE, HRESET, KL, WIDEKL and AVX-VNNI instruction set support.
 
 @item sapphirerapids
 Intel Sapphire Rapids CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3, SSSE3,


[gcc r16-1643] x86: Update memcpy/memset inline strategies for -mtune=generic

2025-06-24 Thread H.J. Lu via Gcc-cvs
https://gcc.gnu.org/g:d073bb6cfc219d4b6c283a0b527ee88b42e640e0

commit r16-1643-gd073bb6cfc219d4b6c283a0b527ee88b42e640e0
Author: H.J. Lu 
Date:   Thu Mar 18 18:43:10 2021 -0700

x86: Update memcpy/memset inline strategies for -mtune=generic

Update memcpy and memset inline strategies for -mtune=generic:

1. Don't align memory.
2. For known sizes, prefer vector loop, unroll loop with 4 moves or
   stores per iteration without aligning the loop, up to 256 bytes.
3. For unknown sizes, use memcpy/memset.
4. Since each loop iteration has 4 stores and 8 stores for zeroing with
   unroll loop may be needed, change CLEAR_RATIO to 10 so that zeroing
   up to 72 bytes are fully unrolled with 9 stores without SSE.

gcc/

PR target/70308
PR target/101366
PR target/102294
PR target/108585
PR target/118276
PR target/119596
PR target/119703
PR target/119704
* config/i386/x86-tune-costs.h (generic_memcpy): Updated.
(generic_memset): Likewise.
(generic_cost): Change CLEAR_RATIO to 10.

gcc/testsuite/

PR target/70308
PR target/101366
PR target/102294
PR target/108585
PR target/118276
PR target/119596
PR target/119703
PR target/119704
* g++.target/i386/memset-pr101366-1.C: New test.
* g++.target/i386/memset-pr101366-2.C: Likewise.
* g++.target/i386/memset-pr108585-1a.C: Likewise.
* g++.target/i386/memset-pr108585-1b.C: Likewise.
* g++.target/i386/memset-pr118276-1a.C: Likewise.
* g++.target/i386/memset-pr118276-1b.C: Likewise.
* g++.target/i386/memset-pr118276-1c.C: Likewise.
* gcc.target/i386/memcpy-strategy-12.c: Likewise.
* gcc.target/i386/memcpy-strategy-13.c: Likewise.
* gcc.target/i386/memset-pr70308-1a.c: Likewise.
* gcc.target/i386/memset-pr70308-1b.c: Likewise.
* gcc.target/i386/memset-strategy-25.c: Likewise.
* gcc.target/i386/memset-strategy-26.c: Likewise.
* gcc.target/i386/memset-strategy-27.c: Likewise.
* gcc.target/i386/memset-strategy-28.c: Likewise.
* gcc.target/i386/memset-strategy-29.c: Likewise.
* gcc.target/i386/memset-strategy-30.c: Likewise.
* gcc.target/i386/memset-strategy-31.c: Likewise.
* gcc.target/i386/auto-init-padding-3.c: Expect XMM stores.
* gcc.target/i386/auto-init-padding-9.c: Likewise.
* gcc.target/i386/mvc17.c: Fail with "rep mov"
* gcc.target/i386/pr111657-1.c: Scan for unrolled loop.  Fail
with "rep mov".
* gcc.target/i386/shrink_wrap_1.c: Also pass
-mmemset-strategy=rep_8byte:-1:align.
* gcc.target/i386/sw-1.c: Also pass -mstringop-strategy=rep_byte.

Signed-off-by: H.J. Lu 

Diff:
---
 gcc/config/i386/x86-tune-costs.h   | 39 ++
 gcc/testsuite/g++.target/i386/memset-pr101366-1.C  | 30 +++
 gcc/testsuite/g++.target/i386/memset-pr101366-2.C  | 26 +
 gcc/testsuite/g++.target/i386/memset-pr108585-1a.C | 43 +++
 gcc/testsuite/g++.target/i386/memset-pr108585-1b.C | 43 +++
 gcc/testsuite/g++.target/i386/memset-pr118276-1a.C | 35 +
 gcc/testsuite/g++.target/i386/memset-pr118276-1b.C | 24 +
 gcc/testsuite/g++.target/i386/memset-pr118276-1c.C | 24 +
 .../gcc.target/i386/auto-init-padding-3.c  |  7 +--
 .../gcc.target/i386/auto-init-padding-9.c  | 25 +++--
 gcc/testsuite/gcc.target/i386/memcpy-strategy-12.c | 34 
 gcc/testsuite/gcc.target/i386/memcpy-strategy-13.c | 11 
 gcc/testsuite/gcc.target/i386/memset-pr70308-1a.c  | 46 
 gcc/testsuite/gcc.target/i386/memset-pr70308-1b.c  | 61 ++
 gcc/testsuite/gcc.target/i386/memset-strategy-25.c | 29 ++
 gcc/testsuite/gcc.target/i386/memset-strategy-26.c | 15 ++
 gcc/testsuite/gcc.target/i386/memset-strategy-27.c | 11 
 gcc/testsuite/gcc.target/i386/memset-strategy-28.c | 29 ++
 gcc/testsuite/gcc.target/i386/memset-strategy-29.c | 30 +++
 gcc/testsuite/gcc.target/i386/memset-strategy-30.c | 30 +++
 gcc/testsuite/gcc.target/i386/memset-strategy-31.c | 30 +++
 gcc/testsuite/gcc.target/i386/mvc17.c  |  2 +-
 gcc/testsuite/gcc.target/i386/pr111657-1.c | 24 -
 gcc/testsuite/gcc.target/i386/shrink_wrap_1.c  |  2 +-
 gcc/testsuite/gcc.target/i386/sw-1.c   |  2 +-
 25 files changed, 626 insertions(+), 26 deletions(-)

diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h
index b08081e37cfb..a5b99d1f9629 100644
--- a/gcc/config/i386/x86-tune-costs.h
+++ b/gcc/config/i386/x86-

[gcc r16-1650] AArch64: promote aarch64-autovec-peference to mautovec-preference

2025-06-24 Thread Tamar Christina via Gcc-cvs
https://gcc.gnu.org/g:f60d3f5cf15abc32e167e3c00f79c4ab6d00ec38

commit r16-1650-gf60d3f5cf15abc32e167e3c00f79c4ab6d00ec38
Author: Tamar Christina 
Date:   Tue Jun 24 11:11:36 2025 +0100

AArch64: promote aarch64-autovec-peference to mautovec-preference

As requested in my patch for -mmax-vectorization this promotes the parameter
--param aarch64-autovec-preference to a first class top target flag.

If both the parameter and the flag is specified the parameter takes 
precedence
with the reasoning that it may already be embedded in build systems.

gcc/ChangeLog:

* config/aarch64/aarch64.cc (aarch64_override_options_internal): Set
value of parameter based on option.
* config/aarch64/aarch64.opt (autovec-preference): New.
* doc/invoke.texi (autovec-preference): Document it.

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/autovec_param_asimd-only_2.c: New test.
* gcc.target/aarch64/autovec_param_default_2.c: New test.
* gcc.target/aarch64/autovec_param_prefer-asimd_2.c: New test.
* gcc.target/aarch64/autovec_param_prefer-sve_2.c: New test.
* gcc.target/aarch64/autovec_param_sve-only_2.c: New test.

Diff:
---
 gcc/config/aarch64/aarch64.cc  |  8 +
 gcc/config/aarch64/aarch64.opt | 11 +--
 gcc/doc/invoke.texi| 38 ++
 .../aarch64/autovec_param_asimd-only_2.c   |  4 +++
 .../gcc.target/aarch64/autovec_param_default_2.c   |  4 +++
 .../aarch64/autovec_param_prefer-asimd_2.c |  4 +++
 .../aarch64/autovec_param_prefer-sve_2.c   |  4 +++
 .../gcc.target/aarch64/autovec_param_sve-only_2.c  |  4 +++
 8 files changed, 61 insertions(+), 16 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index adbe05ac404a..abbb97768f5e 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -19042,6 +19042,14 @@ aarch64_override_options_internal (struct gcc_options 
*opts)
 SET_OPTION_IF_UNSET (opts, &global_options_set,
 param_vect_scalar_cost_multiplier, 1);
 
+  /* Synchronize the -mautovec-preference and aarch64_autovec_preference using
+ whichever one is not default.  If both are set then prefer the param flag
+ over the parameters.  */
+  if (opts->x_autovec_preference != AARCH64_AUTOVEC_DEFAULT)
+SET_OPTION_IF_UNSET (opts, &global_options_set,
+aarch64_autovec_preference,
+opts->x_autovec_preference);
+
   aarch64_override_options_after_change_1 (opts);
 }
 
diff --git a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt
index 17e1c700dd2b..9ca753e6a886 100644
--- a/gcc/config/aarch64/aarch64.opt
+++ b/gcc/config/aarch64/aarch64.opt
@@ -290,6 +290,13 @@ msve-vector-bits=
 Target RejectNegative Joined Enum(sve_vector_bits) 
Var(aarch64_sve_vector_bits) Init(SVE_SCALABLE)
 -msve-vector-bits= Set the number of bits in an SVE vector 
register.
 
+mautovec-preference=
+Target RejectNegative Joined Var(autovec_preference) 
Enum(aarch64_autovec_preference) Init(AARCH64_AUTOVEC_DEFAULT)
+-mautovec-preference=[default|asimd-only|sve-only|prefer-asimd|prefer-sve]
+Force an ISA selection strategy for auto-vectorization.  For best performance 
it
+is highly recommended to use -mcpu or -mtune instead.  This parameter should
+only be used for code exploration.
+
 mmax-vectorization
 Target Var(flag_aarch64_max_vectorization) Save
 Override the scalar cost model such that vectorization is always profitable.
@@ -360,8 +367,8 @@ The number of Newton iterations for calculating the 
reciprocal for double type.
 
 -param=aarch64-autovec-preference=
 Target Joined Var(aarch64_autovec_preference) Enum(aarch64_autovec_preference) 
Init(AARCH64_AUTOVEC_DEFAULT) Param
---param=aarch64-autovec-preference=[default|asimd-only|sve-only|prefer-asimd|prefer-sve]
-Force an ISA selection strategy for auto-vectorization.
+An old alias for -mautovec-preference.  If both -mautovec-preference and
+--param=aarch64-autovec-preference are passed, the --param value will be used.
 
 Enum
 Name(aarch64_autovec_preference) Type(enum aarch64_autovec_preference_enum) 
UnknownError(unknown autovec preference %qs)
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 93322778a520..a0c6d3d082e6 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -17627,20 +17627,9 @@ The precision of division is proportional to this 
param when division
 approximation is enabled.  The default value is 2.
 
 @item aarch64-autovec-preference
-Force an ISA selection strategy for auto-vectorization.
-@table @samp
-@item default
-Use the default heuristics.
-@item asimd-only
-Use only Advanced SIMD for auto-vectorization.
-@item sve-only
-Use only SVE for auto-vectorization.
-@item prefer-asimd
-Use both Advanced SIMD and SVE.  Prefer

[gcc r16-1647] Don't duplicate setup code cost when do group-candidate cost calucalution.

2025-06-24 Thread hongtao Liu via Gcc-cvs
https://gcc.gnu.org/g:c06979ff95748559da0c2d3aa4eda9d5999eaaf6

commit r16-1647-gc06979ff95748559da0c2d3aa4eda9d5999eaaf6
Author: hongtao.liu 
Date:   Wed Mar 5 12:25:32 2025 +0100

Don't duplicate setup code cost when do group-candidate cost calucalution.

-  /* Uses in a group can share setup code, so only add setup cost once.  */
-  cost -= cost.scratch;

It looks like the original code took into account avoiding double
counting, but unfortunately cost is reset inside the follow loop which
invalidates the upper code, and makes same setup code cost duplicated in
each use of the group.

The patch fix the issue. It can also improve 548.exchange_r by 6% with
-march=x86-64-v3 -O2 due to better ivopt on EMR.

No big performance impact for SPEC2017 on graviton4/SPR with -mcpu=native
-Ofast -fomit-framepointer -flto=auto.

gcc/ChangeLog:

PR target/115842
* tree-ssa-loop-ivopts.cc (determine_group_iv_cost_address):
Don't recalculate inv_expr when group-candidate cost
calucalution.

Diff:
---
 gcc/tree-ssa-loop-ivopts.cc | 9 ++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/gcc/tree-ssa-loop-ivopts.cc b/gcc/tree-ssa-loop-ivopts.cc
index a2150818a43f..8a6726f19889 100644
--- a/gcc/tree-ssa-loop-ivopts.cc
+++ b/gcc/tree-ssa-loop-ivopts.cc
@@ -5015,8 +5015,6 @@ determine_group_iv_cost_address (struct ivopts_data *data,
sum_cost = infinite_cost;
 }
 
-  /* Uses in a group can share setup code, so only add setup cost once.  */
-  cost -= cost.scratch;
   /* Compute and add costs for rest uses of this group.  */
   for (i = 1; i < group->vuses.length () && !sum_cost.infinite_cost_p (); i++)
 {
@@ -5032,7 +5030,12 @@ determine_group_iv_cost_address (struct ivopts_data 
*data,
if (!inv_exprs)
  inv_exprs = BITMAP_ALLOC (NULL);
 
-   bitmap_set_bit (inv_exprs, inv_expr->id);
+   /* Uses in a group can share setup code,
+  so only add setup cost once.  */
+   if (bitmap_bit_p (inv_exprs, inv_expr->id))
+ cost -= cost.scratch;
+   else
+ bitmap_set_bit (inv_exprs, inv_expr->id);
  }
   sum_cost += cost;
 }


[gcc r16-1649] AArch64: propose -mmax-vectorization as an option to override vector costing

2025-06-24 Thread Tamar Christina via Gcc-cvs
https://gcc.gnu.org/g:8e80287601c5954bc437212be5f91aaf12074b22

commit r16-1649-g8e80287601c5954bc437212be5f91aaf12074b22
Author: Tamar Christina 
Date:   Tue Jun 24 11:10:11 2025 +0100

AArch64: propose -mmax-vectorization as an option to override vector costing

With the middle-end providing a way to make vectorization more profitable by
scaling vect-scalar-cost-multiplier this makes a more user friendly option
to make it easier to use.

I propose making it an actual -m option that we document and retain vs using
the parameter name.  In the future I would like to extend this option to 
modify
additional costing in the AArch64 backend itself.

This can be used together with --param aarch64-autovec-preference to get the
vectorizer to say, always vectorize with SVE.  I did consider making this an
additional enum to --param aarch64-autovec-preference but I also think this 
is
a useful thing to be able to set with pragmas and attributes, but am open to
suggestions.

Note that as a follow up I plan on extending -fdump-tree-vect to support 
-stats
which is then intended to be usable with this flag.

gcc/ChangeLog:

* config/aarch64/aarch64.opt (max-vectorization): New.
* config/aarch64/aarch64.cc (aarch64_override_options_internal): 
Save
and restore option.
Implement it through vect-scalar-cost-multiplier.
(aarch64_attributes): Default to off.
* common/config/aarch64/aarch64-common.cc (aarch64_handle_option):
Initialize option.
* doc/extend.texi (max-vectorization): Document attribute.
* doc/invoke.texi (max-vectorization): Document flag.

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/sve/cost_model_17.c: New test.
* gcc.target/aarch64/sve/cost_model_18.c: New test.

Diff:
---
 gcc/common/config/aarch64/aarch64-common.cc |  4 
 gcc/config/aarch64/aarch64.cc   |  8 
 gcc/config/aarch64/aarch64.opt  |  4 
 gcc/doc/extend.texi | 10 ++
 gcc/doc/invoke.texi |  9 +
 .../gcc.target/aarch64/sve/cost_model_17.c  | 21 +
 .../gcc.target/aarch64/sve/cost_model_18.c  | 21 +
 7 files changed, 77 insertions(+)

diff --git a/gcc/common/config/aarch64/aarch64-common.cc 
b/gcc/common/config/aarch64/aarch64-common.cc
index b9ed83642ade..1488697c6ce4 100644
--- a/gcc/common/config/aarch64/aarch64-common.cc
+++ b/gcc/common/config/aarch64/aarch64-common.cc
@@ -142,6 +142,10 @@ aarch64_handle_option (struct gcc_options *opts,
   opts->x_aarch64_flag_outline_atomics = val;
   return true;
 
+case OPT_mmax_vectorization:
+  opts->x_flag_aarch64_max_vectorization = val;
+  return true;
+
 default:
   return true;
 }
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index af8415c29a97..adbe05ac404a 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -19036,6 +19036,12 @@ aarch64_override_options_internal (struct gcc_options 
*opts)
   if (TARGET_SME && !TARGET_SVE2)
 sorry ("no support for %qs without %qs", "sme", "sve2");
 
+  /* Set scalar costing to a high value such that we always pick
+ vectorization.  Increase scalar costing by 1%.  */
+  if (opts->x_flag_aarch64_max_vectorization)
+SET_OPTION_IF_UNSET (opts, &global_options_set,
+param_vect_scalar_cost_multiplier, 1);
+
   aarch64_override_options_after_change_1 (opts);
 }
 
@@ -19786,6 +19792,8 @@ static const struct aarch64_attribute_info 
aarch64_attributes[] =
  OPT_msign_return_address_ },
   { "outline-atomics", aarch64_attr_bool, true, NULL,
  OPT_moutline_atomics},
+  { "max-vectorization", aarch64_attr_bool, false, NULL,
+ OPT_mmax_vectorization},
   { NULL, aarch64_attr_custom, false, NULL, OPT }
 };
 
diff --git a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt
index f32d56d4ffae..17e1c700dd2b 100644
--- a/gcc/config/aarch64/aarch64.opt
+++ b/gcc/config/aarch64/aarch64.opt
@@ -290,6 +290,10 @@ msve-vector-bits=
 Target RejectNegative Joined Enum(sve_vector_bits) 
Var(aarch64_sve_vector_bits) Init(SVE_SCALABLE)
 -msve-vector-bits= Set the number of bits in an SVE vector 
register.
 
+mmax-vectorization
+Target Var(flag_aarch64_max_vectorization) Save
+Override the scalar cost model such that vectorization is always profitable.
+
 mverbose-cost-dump
 Target Undocumented Var(flag_aarch64_verbose_cost)
 Enables verbose cost model dumping in the debug dump files.
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 7da99f77ec82..55adf649acf8 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -3884,6 +3884,16 @@ Enable or disable calls to out-of-line helpers to 
implemen