[gcc r16-1623] match: Simplify doubled not, negate and conjugate operators to a non-lvalue

2025-06-23 Thread Mikael Morin via Gcc-cvs
https://gcc.gnu.org/g:7e4d55f3030e4840129001a2a9a4d656a6c18da5

commit r16-1623-g7e4d55f3030e4840129001a2a9a4d656a6c18da5
Author: Mikael Morin 
Date:   Sat Jun 21 20:12:31 2025 +0200

match: Simplify doubled not, negate and conjugate operators to a non-lvalue

gcc/ChangeLog:

* match.pd (`-(-X)`, `~(~X)`, `conj(conj(X))`): Add a
NON_LVALUE_EXPR wrapper to the simplification of doubled unary
operators NEGATE_EXPR, BIT_NOT_EXPR and CONJ_EXPR.

gcc/testsuite/ChangeLog:

* gfortran.dg/non_lvalue_1.f90: New test.

Diff:
---
 gcc/match.pd   |  6 +++---
 gcc/testsuite/gfortran.dg/non_lvalue_1.f90 | 32 ++
 2 files changed, 35 insertions(+), 3 deletions(-)

diff --git a/gcc/match.pd b/gcc/match.pd
index 0f53c162fce3..f4416d9172c4 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -2357,7 +2357,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 /* ~~x -> x */
 (simplify
   (bit_not (bit_not @0))
-  @0)
+  (non_lvalue @0))
 
 /* zero_one_valued_p will match when a value is known to be either
0 or 1 including constants 0 or 1.
@@ -4037,7 +4037,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
   (negate (nop_convert? (negate @1)))
   (if (!TYPE_OVERFLOW_SANITIZED (type)
&& !TYPE_OVERFLOW_SANITIZED (TREE_TYPE (@1)))
-   (view_convert @1)))
+   (non_lvalue (view_convert @1
 
  /* We can't reassociate floating-point unless -fassociative-math
 or fixed-point plus or minus because of saturation to +-Inf.  */
@@ -5767,7 +5767,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 (simplify
  (conj (convert? (conj @0)))
  (if (tree_nop_conversion_p (TREE_TYPE (@0), type))
-  (convert @0)))
+  (non_lvalue (convert @0
 
 /* conj({x,y}) -> {x,-y}  */
 (simplify
diff --git a/gcc/testsuite/gfortran.dg/non_lvalue_1.f90 
b/gcc/testsuite/gfortran.dg/non_lvalue_1.f90
new file mode 100644
index ..61dad5a2ce1b
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/non_lvalue_1.f90
@@ -0,0 +1,32 @@
+! { dg-do compile }
+! { dg-additional-options "-fdump-tree-original" }
+!
+! Check the generation of NON_LVALUE_EXPR expressions in cases where a unary
+! operator expression would simplify to a bare data reference.
+
+! A NON_LVALUE_EXPR is generated for a double negation that would simplify to
+! a bare data reference.
+function f1 (f1_arg1)
+  integer, value :: f1_arg1
+  integer :: f1
+  f1 = -(-f1_arg1)
+end function
+! { dg-final { scan-tree-dump "__result_f1 = NON_LVALUE_EXPR ;" 
"original" } }
+
+! A NON_LVALUE_EXPR is generated for a double complement that would simplify to
+! a bare data reference.
+function f2 (f2_arg1)
+  integer, value :: f2_arg1
+  integer :: f2
+  f2 = not(not(f2_arg1))
+end function
+! { dg-final { scan-tree-dump "__result_f2 = NON_LVALUE_EXPR ;" 
"original" } }
+
+! A NON_LVALUE_EXPR is generated for a double complex conjugate that would
+! simplify to a bare data reference.
+function f3 (f3_arg1)
+  complex, value :: f3_arg1
+  complex :: f3
+  f3 = conjg(conjg(f3_arg1))
+end function
+! { dg-final { scan-tree-dump "__result_f3 = NON_LVALUE_EXPR ;" 
"original" } }


[gcc] Created branch 'mikael/heads/non_lvalue_v05' in namespace 'refs/users'

2025-06-23 Thread Mikael Morin via Gcc-cvs
The branch 'mikael/heads/non_lvalue_v05' was created in namespace 'refs/users' 
pointing to:

 7e4d55f3030e... match: Simplify doubled not, negate and conjugate operators


[gcc(refs/users/mikael/heads/non_lvalue_v05)] match: Simplify doubled not, negate and conjugate operators to a non-lvalue

2025-06-23 Thread Mikael Morin via Gcc-cvs
https://gcc.gnu.org/g:7e4d55f3030e4840129001a2a9a4d656a6c18da5

commit 7e4d55f3030e4840129001a2a9a4d656a6c18da5
Author: Mikael Morin 
Date:   Sat Jun 21 20:12:31 2025 +0200

match: Simplify doubled not, negate and conjugate operators to a non-lvalue

gcc/ChangeLog:

* match.pd (`-(-X)`, `~(~X)`, `conj(conj(X))`): Add a
NON_LVALUE_EXPR wrapper to the simplification of doubled unary
operators NEGATE_EXPR, BIT_NOT_EXPR and CONJ_EXPR.

gcc/testsuite/ChangeLog:

* gfortran.dg/non_lvalue_1.f90: New test.

Diff:
---
 gcc/match.pd   |  6 +++---
 gcc/testsuite/gfortran.dg/non_lvalue_1.f90 | 32 ++
 2 files changed, 35 insertions(+), 3 deletions(-)

diff --git a/gcc/match.pd b/gcc/match.pd
index 0f53c162fce3..f4416d9172c4 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -2357,7 +2357,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 /* ~~x -> x */
 (simplify
   (bit_not (bit_not @0))
-  @0)
+  (non_lvalue @0))
 
 /* zero_one_valued_p will match when a value is known to be either
0 or 1 including constants 0 or 1.
@@ -4037,7 +4037,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
   (negate (nop_convert? (negate @1)))
   (if (!TYPE_OVERFLOW_SANITIZED (type)
&& !TYPE_OVERFLOW_SANITIZED (TREE_TYPE (@1)))
-   (view_convert @1)))
+   (non_lvalue (view_convert @1
 
  /* We can't reassociate floating-point unless -fassociative-math
 or fixed-point plus or minus because of saturation to +-Inf.  */
@@ -5767,7 +5767,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 (simplify
  (conj (convert? (conj @0)))
  (if (tree_nop_conversion_p (TREE_TYPE (@0), type))
-  (convert @0)))
+  (non_lvalue (convert @0
 
 /* conj({x,y}) -> {x,-y}  */
 (simplify
diff --git a/gcc/testsuite/gfortran.dg/non_lvalue_1.f90 
b/gcc/testsuite/gfortran.dg/non_lvalue_1.f90
new file mode 100644
index ..61dad5a2ce1b
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/non_lvalue_1.f90
@@ -0,0 +1,32 @@
+! { dg-do compile }
+! { dg-additional-options "-fdump-tree-original" }
+!
+! Check the generation of NON_LVALUE_EXPR expressions in cases where a unary
+! operator expression would simplify to a bare data reference.
+
+! A NON_LVALUE_EXPR is generated for a double negation that would simplify to
+! a bare data reference.
+function f1 (f1_arg1)
+  integer, value :: f1_arg1
+  integer :: f1
+  f1 = -(-f1_arg1)
+end function
+! { dg-final { scan-tree-dump "__result_f1 = NON_LVALUE_EXPR ;" 
"original" } }
+
+! A NON_LVALUE_EXPR is generated for a double complement that would simplify to
+! a bare data reference.
+function f2 (f2_arg1)
+  integer, value :: f2_arg1
+  integer :: f2
+  f2 = not(not(f2_arg1))
+end function
+! { dg-final { scan-tree-dump "__result_f2 = NON_LVALUE_EXPR ;" 
"original" } }
+
+! A NON_LVALUE_EXPR is generated for a double complex conjugate that would
+! simplify to a bare data reference.
+function f3 (f3_arg1)
+  complex, value :: f3_arg1
+  complex :: f3
+  f3 = conjg(conjg(f3_arg1))
+end function
+! { dg-final { scan-tree-dump "__result_f3 = NON_LVALUE_EXPR ;" 
"original" } }


[gcc] Deleted branch 'mikael/heads/non_lvalue_v05' in namespace 'refs/users'

2025-06-23 Thread Mikael Morin via Gcc-cvs
The branch 'mikael/heads/non_lvalue_v05' in namespace 'refs/users' was deleted.
It previously pointed to:

 85b4eb8956df... match: Simplify doubled not, negate and conjugate operators

Diff:

!!! WARNING: THE FOLLOWING COMMITS ARE NO LONGER ACCESSIBLE (LOST):
---

  85b4eb8... match: Simplify doubled not, negate and conjugate operators


[gcc r16-1621] vregs: Use force_subreg when instantiating subregs [PR120721]

2025-06-23 Thread Richard Sandiford via Gcc-cvs
https://gcc.gnu.org/g:8130a2ad91ca8571b099ba020443fadab7a688ca

commit r16-1621-g8130a2ad91ca8571b099ba020443fadab7a688ca
Author: Richard Sandiford 
Date:   Mon Jun 23 08:46:27 2025 +0100

vregs: Use force_subreg when instantiating subregs [PR120721]

In this PR, we started with:

(subreg:V2DI (reg:DI virtual-reg) 0)

and vregs instantiated the virtual register to the argument pointer.
But:

(subreg:V2DI (reg:DI ap) 0)

is not a sensible subreg, since the argument pointer certainly can't
be referenced in V2DImode.  This is (IMO correctly) rejected after
g:2dcc6dbd8a00caf7cfa8cac17b3fd1c33d658016.

The vregs code that instantiates the subreg above is specific to
rvalues and already creates new instructions for nonzero offsets.
It is therefore safe to use force_subreg instead of simplify_gen_subreg.

I did wonder whether we should instead say that a subreg of a
virtual register is invalid if the same subreg would be invalid
for the associated hard registers.  But the point of virtual registers
is that the offsets from the hard registers are not known until after
expand has finished, and if an offset is nonzero, the virtual register
will be instantiated into a pseudo that contains the sum of the hard
register and the offset.  The subreg would then be correct for that
pseudo.  The subreg is only invalid in this case because there is
no offset.

gcc/
PR rtl-optimization/120721
* function.cc (instantiate_virtual_regs_in_insn): Use force_subreg
instead of simplify_gen_subreg when instantiating an rvalue SUBREG.

gcc/testsuite/
PR rtl-optimization/120721
* g++.dg/torture/pr120721.C: New test.

Diff:
---
 gcc/function.cc | 20 -
 gcc/testsuite/g++.dg/torture/pr120721.C | 39 +
 2 files changed, 48 insertions(+), 11 deletions(-)

diff --git a/gcc/function.cc b/gcc/function.cc
index a3a74b44b916..48167b0c2072 100644
--- a/gcc/function.cc
+++ b/gcc/function.cc
@@ -1722,19 +1722,17 @@ instantiate_virtual_regs_in_insn (rtx_insn *insn)
  new_rtx = instantiate_new_reg (SUBREG_REG (x), &offset);
  if (new_rtx == NULL)
continue;
+ start_sequence ();
  if (maybe_ne (offset, 0))
-   {
- start_sequence ();
- new_rtx = expand_simple_binop
-   (GET_MODE (new_rtx), PLUS, new_rtx,
-gen_int_mode (offset, GET_MODE (new_rtx)),
-NULL_RTX, 1, OPTAB_LIB_WIDEN);
- seq = end_sequence ();
- emit_insn_before (seq, insn);
-   }
- x = simplify_gen_subreg (recog_data.operand_mode[i], new_rtx,
-  GET_MODE (new_rtx), SUBREG_BYTE (x));
+   new_rtx = expand_simple_binop
+ (GET_MODE (new_rtx), PLUS, new_rtx,
+  gen_int_mode (offset, GET_MODE (new_rtx)),
+  NULL_RTX, 1, OPTAB_LIB_WIDEN);
+ x = force_subreg (recog_data.operand_mode[i], new_rtx,
+   GET_MODE (new_rtx), SUBREG_BYTE (x));
  gcc_assert (x);
+ seq = end_sequence ();
+ emit_insn_before (seq, insn);
  break;
 
default:
diff --git a/gcc/testsuite/g++.dg/torture/pr120721.C 
b/gcc/testsuite/g++.dg/torture/pr120721.C
new file mode 100644
index ..37dc46cb1187
--- /dev/null
+++ b/gcc/testsuite/g++.dg/torture/pr120721.C
@@ -0,0 +1,39 @@
+// { dg-additional-options "-w -fno-vect-cost-model" }
+
+template  struct integral_constant {
+  static constexpr int value = __v;
+};
+template  using __bool_constant = integral_constant<__v>;
+template  using enable_if_t = int;
+struct function_ref {
+  template 
+  function_ref(
+  Callable,
+  enable_if_t<__bool_constant<__is_same(int, int)>::value> * = nullptr);
+};
+struct ArrayRef {
+  int Data;
+  long Length;
+  int *begin();
+  int *end();
+};
+struct StringRef {
+  char Data;
+  long Length;
+};
+void attributeObject(function_ref);
+struct ScopedPrinter {
+  virtual void printBinaryImpl(StringRef, StringRef, ArrayRef, bool, unsigned);
+};
+struct JSONScopedPrinter : ScopedPrinter {
+  JSONScopedPrinter();
+  void printBinaryImpl(StringRef, StringRef, ArrayRef Value, bool,
+   unsigned StartOffset) {
+attributeObject([&] {
+  StartOffset;
+  for (char Val : Value)
+;
+});
+  }
+};
+JSONScopedPrinter::JSONScopedPrinter() {}


[gcc r16-1624] vect: Use combined peeling and versioning for mutually aligned DRs

2025-06-23 Thread Alex Coplan via Gcc-cvs
https://gcc.gnu.org/g:6deab186535a5aa9f930e2db637089865d0bc4ff

commit r16-1624-g6deab186535a5aa9f930e2db637089865d0bc4ff
Author: Pengfei Li 
Date:   Wed Jun 11 15:01:36 2025 +

vect: Use combined peeling and versioning for mutually aligned DRs

Current GCC uses either peeling or versioning, but not in combination,
to handle unaligned data references (DRs) during vectorization. This
limitation causes some loops with early break to fall back to scalar
code at runtime.

Consider the following loop with DRs in its early break condition:

for (int i = start; i < end; i++) {
  if (a[i] == b[i])
break;
  count++;
}

In the loop, references to a[] and b[] need to be strictly aligned for
vectorization because speculative reads that may cross page boundaries
are not allowed. Current GCC does versioning for this loop by creating a
runtime check like:

((&a[start] | &b[start]) & mask) == 0

to see if two initial addresses both have lower bits zeros. If above
runtime check fails, the loop will fall back to scalar code. However,
it's often possible that DRs are all unaligned at the beginning but they
become all aligned after a few loop iterations. We call this situation
DRs being "mutually aligned".

This patch enables combined peeling and versioning to avoid loops with
mutually aligned DRs falling back to scalar code. Specifically, the
function vect_peeling_supportable is updated in this patch to return a
three-state enum indicating how peeling can make all unsupportable DRs
aligned. In addition to previous true/false return values, a new state
peeling_maybe_supported is used to indicate that peeling may be able to
make these DRs aligned but we are not sure about it at compile time. In
this case, peeling should be combined with versioning so that a runtime
check will be generated to guard the peeled vectorized loop.

A new type of runtime check is also introduced for combined peeling and
versioning. It's enabled when LOOP_VINFO_ALLOW_MUTUAL_ALIGNMENT is true.
The new check tests if all DRs recorded in LOOP_VINFO_MAY_MISALIGN_STMTS
have the same lower address bits. For above loop case, the new test will
generate an XOR between two addresses, like:

((&a[start] ^ &b[start]) & mask) == 0

Therefore, if a and b have the same alignment step (element size) and
the same offset from an alignment boundary, a peeled vectorized loop
will run. This new runtime check also works for >2 DRs, with the LHS
expression being:

((a1 ^ a2) | (a2 ^ a3) | (a3 ^ a4) | ... | (an-1 ^ an)) & mask

where ai is the address of i'th DR.

This patch is bootstrapped and regression tested on x86_64-linux-gnu,
arm-linux-gnueabihf and aarch64-linux-gnu.

gcc/ChangeLog:

* tree-vect-data-refs.cc (vect_peeling_supportable): Return new
enum values to indicate if combined peeling and versioning can
potentially support vectorization.
(vect_enhance_data_refs_alignment): Support combined peeling and
versioning in vectorization analysis.
* tree-vect-loop-manip.cc (vect_create_cond_for_align_checks):
Add a new type of runtime check for mutually aligned DRs.
* tree-vect-loop.cc (_loop_vec_info::_loop_vec_info): Set
default value of allow_mutual_alignment in the initializer list.
* tree-vectorizer.h (enum peeling_support): Define type of
peeling support for function vect_peeling_supportable.
(LOOP_VINFO_ALLOW_MUTUAL_ALIGNMENT): New access macro.

gcc/testsuite/ChangeLog:

* gcc.dg/vect/vect-early-break_133_pfa6.c: Adjust test.

Diff:
---
 .../gcc.dg/vect/vect-early-break_133_pfa6.c|   2 +-
 gcc/tree-vect-data-refs.cc | 168 -
 gcc/tree-vect-loop-manip.cc|  98 +---
 gcc/tree-vect-loop.cc  |   1 +
 gcc/tree-vectorizer.h  |  16 ++
 5 files changed, 223 insertions(+), 62 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_133_pfa6.c 
b/gcc/testsuite/gcc.dg/vect/vect-early-break_133_pfa6.c
index ee123df6ed2b..7787d037d9dc 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-early-break_133_pfa6.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_133_pfa6.c
@@ -20,4 +20,4 @@ unsigned test4(char x, char *vect_a, char *vect_b, int n)
  return ret;
 }
 
-/* { dg-final { scan-tree-dump "Versioning for alignment will be applied" 
"vect" } } */
+/* { dg-final { scan-tree-dump "Both peeling and versioning will be applied" 
"vect" } } */
diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc
index 036903a948f6..ee040eb98881 100644
--- a/gcc/tree-vec

[gcc r16-1622] tree-optimization/120729 - limit compile time in uninit_analysis::prune_phi_opnds

2025-06-23 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:97044a47de533f2a9b3fc864e5ea318e53979079

commit r16-1622-g97044a47de533f2a9b3fc864e5ea318e53979079
Author: Richard Biener 
Date:   Fri Jun 20 15:07:20 2025 +0200

tree-optimization/120729 - limit compile time in 
uninit_analysis::prune_phi_opnds

The testcase in this PR shows, on the GCC 14 branch, that in some
degenerate cases we can spend exponential time pruning always
initialized paths through a web of PHIs.  The following adds
--param uninit-max-prune-work, defaulted to 10, to limit that
to effectively O(1).

PR tree-optimization/120729
* gimple-predicate-analysis.h (uninit_analysis::prune_phi_opnds):
Add argument of work budget remaining.
* gimple-predicate-analysis.cc (uninit_analysis::prune_phi_opnds):
Likewise.  Maintain and honor it throughout the recursion.
* params.opt (uninit-max-prune-work): New.
* doc/invoke.texi (uninit-max-prune-work): Document.

Diff:
---
 gcc/doc/invoke.texi  |  3 +++
 gcc/gimple-predicate-analysis.cc | 12 +---
 gcc/gimple-predicate-analysis.h  |  2 +-
 gcc/params.opt   |  4 
 4 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index dec3c7a1b805..91b0a201e1b6 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -17420,6 +17420,9 @@ predicate chain.
 @item uninit-max-num-chains
 Maximum number of predicates ored in the normalized predicate chain.
 
+@item uninit-max-prune-work
+Maximum amount of work done to prune paths where the variable is always 
initialized.
+
 @item sched-autopref-queue-depth
 Hardware autoprefetcher scheduler model control flag.
 Number of lookahead cycles the model looks into; at '
diff --git a/gcc/gimple-predicate-analysis.cc b/gcc/gimple-predicate-analysis.cc
index 76f6ab613107..b056b42a17ec 100644
--- a/gcc/gimple-predicate-analysis.cc
+++ b/gcc/gimple-predicate-analysis.cc
@@ -385,7 +385,8 @@ bool
 uninit_analysis::prune_phi_opnds (gphi *phi, unsigned opnds, gphi *flag_def,
  tree boundary_cst, tree_code cmp_code,
  hash_set *visited_phis,
- bitmap *visited_flag_phis)
+ bitmap *visited_flag_phis,
+ unsigned &max_attempts)
 {
   /* The Boolean predicate guarding the PHI definition.  Initialized
  lazily from PHI in the first call to is_use_guarded() and cached
@@ -398,6 +399,10 @@ uninit_analysis::prune_phi_opnds (gphi *phi, unsigned 
opnds, gphi *flag_def,
   if (!MASK_TEST_BIT (opnds, i))
continue;
 
+  if (max_attempts == 0)
+   return false;
+  --max_attempts;
+
   tree flag_arg = gimple_phi_arg_def (flag_def, i);
   if (!is_gimple_constant (flag_arg))
{
@@ -432,7 +437,7 @@ uninit_analysis::prune_phi_opnds (gphi *phi, unsigned 
opnds, gphi *flag_def,
  unsigned opnds_arg_phi = m_eval.phi_arg_set (phi_arg_def);
  if (!prune_phi_opnds (phi_arg_def, opnds_arg_phi, flag_arg_def,
boundary_cst, cmp_code, visited_phis,
-   visited_flag_phis))
+   visited_flag_phis, max_attempts))
return false;
 
  bitmap_clear_bit (*visited_flag_phis, SSA_NAME_VERSION (phi_result));
@@ -634,9 +639,10 @@ uninit_analysis::overlap (gphi *phi, unsigned opnds, 
hash_set *visited,
 value that is in conflict with the use guard/predicate.  */
   bitmap visited_flag_phis = NULL;
   gphi *phi_def = as_a (flag_def);
+  unsigned max_attempts = param_uninit_max_prune_work;
   bool all_pruned = prune_phi_opnds (phi, opnds, phi_def, boundary_cst,
 cmp_code, visited,
-&visited_flag_phis);
+&visited_flag_phis, max_attempts);
   if (visited_flag_phis)
BITMAP_FREE (visited_flag_phis);
   if (all_pruned)
diff --git a/gcc/gimple-predicate-analysis.h b/gcc/gimple-predicate-analysis.h
index f71061ec2836..67a19aa09052 100644
--- a/gcc/gimple-predicate-analysis.h
+++ b/gcc/gimple-predicate-analysis.h
@@ -152,7 +152,7 @@ private:
   bool is_use_guarded (gimple *, basic_block, gphi *, unsigned,
   hash_set *);
   bool prune_phi_opnds (gphi *, unsigned, gphi *, tree, tree_code,
-   hash_set *, bitmap *);
+   hash_set *, bitmap *, unsigned &);
   bool overlap (gphi *, unsigned, hash_set *, const predicate &);
 
   void collect_phi_def_edges (gphi *, basic_block, vec *,
diff --git a/gcc/params.opt b/gcc/params.opt
index a67f900a63f7..31aa0bd57533 100644
--- a/gcc/params.opt
+++ b/gcc/params.opt
@@ -1185,6 +1185,10 @@ predicate chain.
 Common Joined UInteger Var(param_uninit_max_num_chains) Init(8) 
IntegerRange(1, 

[gcc] Created branch 'mikael/heads/non_lvalue_v05' in namespace 'refs/users'

2025-06-23 Thread Mikael Morin via Gcc-cvs
The branch 'mikael/heads/non_lvalue_v05' was created in namespace 'refs/users' 
pointing to:

 85b4eb8956df... match: Simplify doubled not, negate and conjugate operators


[gcc r15-9855] tailc: Allow musttail tail calls with -fsanitize=address [PR120608]

2025-06-23 Thread Jakub Jelinek via Gcc-cvs
https://gcc.gnu.org/g:fa2e03effa5251a6f7c8b79a8e3be81c90fb5e4f

commit r15-9855-gfa2e03effa5251a6f7c8b79a8e3be81c90fb5e4f
Author: Jakub Jelinek 
Date:   Mon Jun 23 16:08:34 2025 +0200

tailc: Allow musttail tail calls with -fsanitize=address [PR120608]

These testcases show another problem with -fsanitize=address
vs. musttail tail calls.  In particular, there can be
  .ASAN_MARK (POISON, &a, 4);
etc. calls after a tail call and those just prevent the tailc pass
to mark the musttail calls as [tail call].
Normally, the sanopt pass (which comes after tailc) will optimize those
away, the optimization is if there are no .ASAN_CHECK calls or normal
function calls dominated by those .ASAN_MARK (POSION, ...) calls, the
poison is not needed, because in the epilog sequence (the one dealt with
in the patch posted earlier today) all the stack slots are unpoisoned anyway
(or poisoned for use-after-return).
Unlike __builtin_tsan_exit_function, .ASAN_MARK is not a real function
and is always expanded inline, so can be never tail called successfully,
so the patch just ignores those for the cfun->has_musttail && diag_musttail
cases.  If there is a non-musttail call, it will fail worst case during
expansion because there is the epilog asan sequence.

2025-06-12  Jakub Jelinek  

PR middle-end/120608
* tree-tailcall.cc (empty_eh_cleanup): Ignore .ASAN_MARK (POISON)
internal calls for the cfun->has_musttail case and diag_musttail.
(find_tail_calls): Likewise.

* c-c++-common/asan/pr120608-1.c: New test.
* c-c++-common/asan/pr120608-2.c: New test.

(cherry picked from commit 35a26f2ec55d20d524464c33b68b23328a7f6bbe)

Diff:
---
 gcc/testsuite/c-c++-common/asan/pr120608-1.c | 43 
 gcc/testsuite/c-c++-common/asan/pr120608-2.c | 39 +
 gcc/tree-tailcall.cc | 16 +++
 3 files changed, 98 insertions(+)

diff --git a/gcc/testsuite/c-c++-common/asan/pr120608-1.c 
b/gcc/testsuite/c-c++-common/asan/pr120608-1.c
new file mode 100644
index ..114c42db6f83
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/asan/pr120608-1.c
@@ -0,0 +1,43 @@
+/* PR middle-end/120608 */
+/* { dg-do run { target musttail } } */
+/* { dg-options "-O2 -fsanitize=address" } */
+
+__attribute__((noipa)) void
+foo (int *x, int *y, int *z)
+{
+  ++x[0];
+  ++y[0];
+  ++z[0];
+}
+
+__attribute__((noipa)) void
+bar (int *x, int *y, int *z)
+{
+  if (x || y || z)
+__builtin_abort ();
+}
+
+__attribute__((noipa)) void
+baz (int *x, int *y, int *z)
+{
+  (void) x; (void) y; (void) z;
+  int a = 42, b = -42, c = 0;
+  foo (&a, &b, &c);
+  [[gnu::musttail]] return bar (0, 0, 0);
+}
+
+__attribute__((noipa)) void
+qux (int *x, int *y, int *z)
+{
+  (void) x; (void) y; (void) z;
+  int a = 42, b = -42, c = 0;
+  foo (&a, &b, &c);
+  [[gnu::musttail]] return bar (0, 0, 0);
+}
+
+int
+main ()
+{
+  baz (0, 0, 0);
+  qux (0, 0, 0);
+}
diff --git a/gcc/testsuite/c-c++-common/asan/pr120608-2.c 
b/gcc/testsuite/c-c++-common/asan/pr120608-2.c
new file mode 100644
index ..251ff3a1a074
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/asan/pr120608-2.c
@@ -0,0 +1,39 @@
+/* PR middle-end/120608 */
+/* { dg-do run { target musttail } } */
+/* { dg-options "-O2 -fsanitize=address" } */
+/* { dg-set-target-env-var ASAN_OPTIONS "detect_stack_use_after_return=1" } */
+/* { dg-shouldfail "asan" } */
+
+__attribute__((noipa)) void
+foo (int *x, int *y, int *z)
+{
+  ++x[0];
+  ++y[0];
+  ++z[0];
+}
+
+__attribute__((noipa)) void
+bar (int *x, int *y, int *z)
+{
+  volatile int a = x[0] + y[0] + z[0];
+}
+
+__attribute__((noipa)) void
+baz (int *x, int *y, int *z)
+{
+  (void) x; (void) y; (void) z;
+  int a = 42, b = -42, c = 0;
+  foo (&a, &b, &c);
+  [[gnu::musttail]] return bar (&a, &b, &c);   /* { dg-warning "address of 
automatic variable 'a' passed to 'musttail' call argument" } */
+}  /* { dg-warning "address of 
automatic variable 'b' passed to 'musttail' call argument" "" { target *-*-* } 
.-1 } */
+   /* { dg-warning "address of 
automatic variable 'c' passed to 'musttail' call argument" "" { target *-*-* } 
.-2 } */
+
+int
+main ()
+{
+  baz (0, 0, 0);
+}
+
+// { dg-output "ERROR: AddressSanitizer: stack-use-after-return on 
address.*(\n|\r\n|\r)" }
+// { dg-output "READ of size .*" }
+// { dg-output ".*'a' \\(line 25\\) <== Memory access at offset \[0-9\]* is 
inside this variable.*" }
diff --git a/gcc/tree-tailcall.cc b/gcc/tree-tailcall.cc
index f593363dae43..8ce8bcf0e20b 100644
--- a/gcc/tree-tailcall.cc
+++ b/gcc/tree-tailcall.cc
@@ -528,6 +528,10 @@ empty_eh_cleanup (basic_block bb, int 
*eh_has_tsan_func_exit, int cnt)
  *eh_has_tsan_func_exit = 1;
  continue;
}
+  if (eh_has_tsan_func_exit
+  

[gcc r15-9854] expand: Allow musttail tail calls with -fsanitize=address [PR120608]

2025-06-23 Thread Jakub Jelinek via Gcc-cvs
https://gcc.gnu.org/g:e5cf6027581770e97790f6495a56515ea4d0f7c2

commit r15-9854-ge5cf6027581770e97790f6495a56515ea4d0f7c2
Author: Jakub Jelinek 
Date:   Mon Jun 23 15:58:55 2025 +0200

expand: Allow musttail tail calls with -fsanitize=address [PR120608]

The following testcase is rejected by GCC 15 but accepted (with
s/gnu/clang/) by clang.
The problem is that we want to execute a sequence of instructions to
unpoison all automatic variables in the function and mark the var block
allocated for use-after-return sanitization poisoned after the call,
so we were just disabling tail calls if there are any instructions
returned from asan_emit_stack_protection.
It is fine and necessary for normal tail calls, but for musttail
tail calls we actually document that accessing the automatic vars of
the caller is UB as if they end their lifetime right before the tail
call, so we also want address sanitizer user-after-return to diagnose
that.

The following patch will only disable normal tail calls when that sequence
is present, for musttail it will arrange to emit a copy of that sequence
before the tail call sequence.  That sequence only tweaks the shadow memory
and nothing in the code emitted by call expansion should touch the shadow
memory, so it is ok to emit it already before argument setup.

2025-06-23  Jakub Jelinek  

PR middle-end/120608
* cfgexpand.cc: Include rtl-iter.h.
(expand_gimple_tailcall): Add ASAN_EPILOG_SEQ argument, if non-NULL
and expand_gimple_stmt emitted a tail call, emit a copy of that
insn sequence before the call sequence.
(expand_gimple_basic_block): Remove DISABLE_TAIL_CALLS argument, add
ASAN_EPILOG_SEQ argument.  Disable tail call flag only on 
non-musttail
calls if that flag is set, pass it to expand_gimple_tailcall.
(pass_expand::execute): Pass VAR_RET_SEQ directly as last
expand_gimple_basic_block argument rather than its comparison with
NULL.

* g++.dg/asan/pr120608.C: New test.

(cherry picked from commit b9523a935aaa28ffae9118e199a2f43a8a98e27e)

Diff:
---
 gcc/cfgexpand.cc | 68 
 gcc/testsuite/g++.dg/asan/pr120608.C | 17 +
 2 files changed, 79 insertions(+), 6 deletions(-)

diff --git a/gcc/cfgexpand.cc b/gcc/cfgexpand.cc
index 2b27076658fd..981faf36e93e 100644
--- a/gcc/cfgexpand.cc
+++ b/gcc/cfgexpand.cc
@@ -74,6 +74,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "output.h"
 #include "builtins.h"
 #include "opts.h"
+#include "rtl-iter.h"
 
 /* Some systems use __main in a way incompatible with its use in gcc, in these
cases use the macros NAME__MAIN to give a quoted symbol and SYMBOL__MAIN to
@@ -4400,9 +4401,10 @@ expand_gimple_stmt (gimple *stmt)
tailcall) and the normal result happens via a sqrt instruction.  */
 
 static basic_block
-expand_gimple_tailcall (basic_block bb, gcall *stmt, bool *can_fallthru)
+expand_gimple_tailcall (basic_block bb, gcall *stmt, bool *can_fallthru,
+   rtx_insn *asan_epilog_seq)
 {
-  rtx_insn *last2, *last;
+  rtx_insn *last2, *last, *first = get_last_insn ();
   edge e;
   edge_iterator ei;
   profile_probability probability;
@@ -4419,6 +4421,58 @@ expand_gimple_tailcall (basic_block bb, gcall *stmt, 
bool *can_fallthru)
   return NULL;
 
  found:
+
+  if (asan_epilog_seq)
+{
+  /* We need to emit a copy of the asan_epilog_seq before
+the insns emitted by expand_gimple_stmt above.  The sequence
+can contain labels, which need to be remapped.  */
+  hash_map label_map;
+  start_sequence ();
+  emit_note (NOTE_INSN_DELETED);
+  for (rtx_insn *insn = asan_epilog_seq; insn; insn = NEXT_INSN (insn))
+   switch (GET_CODE (insn))
+ {
+ case INSN:
+ case CALL_INSN:
+ case JUMP_INSN:
+   emit_copy_of_insn_after (insn, get_last_insn ());
+   break;
+ case CODE_LABEL:
+   label_map.put ((rtx) insn, (rtx) emit_label (gen_label_rtx ()));
+   break;
+ case BARRIER:
+   emit_barrier ();
+   break;
+ default:
+   gcc_unreachable ();
+ }
+  for (rtx_insn *insn = get_insns (); insn; insn = NEXT_INSN (insn))
+   if (JUMP_P (insn))
+ {
+   subrtx_ptr_iterator::array_type array;
+   FOR_EACH_SUBRTX_PTR (iter, array, &PATTERN (insn), ALL)
+ {
+   rtx *loc = *iter;
+   if (LABEL_REF_P (*loc))
+ {
+   rtx *lab = label_map.get ((rtx) label_ref_label (*loc));
+   gcc_assert (lab);
+   set_label_ref_label (*loc, as_a  (*lab));
+ }
+ }
+   if (JUMP_LABEL (insn))
+ {
+   rt

[gcc(refs/users/mikael/heads/non_lvalue_v05)] match: Simplify doubled not, negate and conjugate operators to a non_lvalue

2025-06-23 Thread Mikael Morin via Gcc-cvs
https://gcc.gnu.org/g:85b4eb8956df894da3819cbbda4388ff6667fc23

commit 85b4eb8956df894da3819cbbda4388ff6667fc23
Author: Mikael Morin 
Date:   Sat Jun 21 20:12:31 2025 +0200

match: Simplify doubled not, negate and conjugate operators to a non_lvalue

gcc/ChangeLog:

* match.pd (`-(-X)`, `~(~X)`, `conj(conj(X))`): Add a
NON_LVALUE_EXPR wrapper to the simplification of doubled unary
operators NEGATE_EXPR, BIT_NOT_EXPR and CONJ_EXPR.

gcc/testsuite/ChangeLog:

* gfortran.dg/non_lvalue_1.f90: New test.

Diff:
---
 gcc/match.pd   |  6 +++---
 gcc/testsuite/gfortran.dg/non_lvalue_1.f90 | 32 ++
 2 files changed, 35 insertions(+), 3 deletions(-)

diff --git a/gcc/match.pd b/gcc/match.pd
index 0f53c162fce3..f4416d9172c4 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -2357,7 +2357,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 /* ~~x -> x */
 (simplify
   (bit_not (bit_not @0))
-  @0)
+  (non_lvalue @0))
 
 /* zero_one_valued_p will match when a value is known to be either
0 or 1 including constants 0 or 1.
@@ -4037,7 +4037,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
   (negate (nop_convert? (negate @1)))
   (if (!TYPE_OVERFLOW_SANITIZED (type)
&& !TYPE_OVERFLOW_SANITIZED (TREE_TYPE (@1)))
-   (view_convert @1)))
+   (non_lvalue (view_convert @1
 
  /* We can't reassociate floating-point unless -fassociative-math
 or fixed-point plus or minus because of saturation to +-Inf.  */
@@ -5767,7 +5767,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 (simplify
  (conj (convert? (conj @0)))
  (if (tree_nop_conversion_p (TREE_TYPE (@0), type))
-  (convert @0)))
+  (non_lvalue (convert @0
 
 /* conj({x,y}) -> {x,-y}  */
 (simplify
diff --git a/gcc/testsuite/gfortran.dg/non_lvalue_1.f90 
b/gcc/testsuite/gfortran.dg/non_lvalue_1.f90
new file mode 100644
index ..61dad5a2ce1b
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/non_lvalue_1.f90
@@ -0,0 +1,32 @@
+! { dg-do compile }
+! { dg-additional-options "-fdump-tree-original" }
+!
+! Check the generation of NON_LVALUE_EXPR expressions in cases where a unary
+! operator expression would simplify to a bare data reference.
+
+! A NON_LVALUE_EXPR is generated for a double negation that would simplify to
+! a bare data reference.
+function f1 (f1_arg1)
+  integer, value :: f1_arg1
+  integer :: f1
+  f1 = -(-f1_arg1)
+end function
+! { dg-final { scan-tree-dump "__result_f1 = NON_LVALUE_EXPR ;" 
"original" } }
+
+! A NON_LVALUE_EXPR is generated for a double complement that would simplify to
+! a bare data reference.
+function f2 (f2_arg1)
+  integer, value :: f2_arg1
+  integer :: f2
+  f2 = not(not(f2_arg1))
+end function
+! { dg-final { scan-tree-dump "__result_f2 = NON_LVALUE_EXPR ;" 
"original" } }
+
+! A NON_LVALUE_EXPR is generated for a double complex conjugate that would
+! simplify to a bare data reference.
+function f3 (f3_arg1)
+  complex, value :: f3_arg1
+  complex :: f3
+  f3 = conjg(conjg(f3_arg1))
+end function
+! { dg-final { scan-tree-dump "__result_f3 = NON_LVALUE_EXPR ;" 
"original" } }


[gcc r16-1625] expand: Allow musttail tail calls with -fsanitize=address [PR120608]

2025-06-23 Thread Jakub Jelinek via Gcc-cvs
https://gcc.gnu.org/g:b9523a935aaa28ffae9118e199a2f43a8a98e27e

commit r16-1625-gb9523a935aaa28ffae9118e199a2f43a8a98e27e
Author: Jakub Jelinek 
Date:   Mon Jun 23 15:58:55 2025 +0200

expand: Allow musttail tail calls with -fsanitize=address [PR120608]

The following testcase is rejected by GCC 15 but accepted (with
s/gnu/clang/) by clang.
The problem is that we want to execute a sequence of instructions to
unpoison all automatic variables in the function and mark the var block
allocated for use-after-return sanitization poisoned after the call,
so we were just disabling tail calls if there are any instructions
returned from asan_emit_stack_protection.
It is fine and necessary for normal tail calls, but for musttail
tail calls we actually document that accessing the automatic vars of
the caller is UB as if they end their lifetime right before the tail
call, so we also want address sanitizer user-after-return to diagnose
that.

The following patch will only disable normal tail calls when that sequence
is present, for musttail it will arrange to emit a copy of that sequence
before the tail call sequence.  That sequence only tweaks the shadow memory
and nothing in the code emitted by call expansion should touch the shadow
memory, so it is ok to emit it already before argument setup.

2025-06-23  Jakub Jelinek  

PR middle-end/120608
* cfgexpand.cc: Include rtl-iter.h.
(expand_gimple_tailcall): Add ASAN_EPILOG_SEQ argument, if non-NULL
and expand_gimple_stmt emitted a tail call, emit a copy of that
insn sequence before the call sequence.
(expand_gimple_basic_block): Remove DISABLE_TAIL_CALLS argument, add
ASAN_EPILOG_SEQ argument.  Disable tail call flag only on 
non-musttail
calls if that flag is set, pass it to expand_gimple_tailcall.
(pass_expand::execute): Pass VAR_RET_SEQ directly as last
expand_gimple_basic_block argument rather than its comparison with
NULL.

* g++.dg/asan/pr120608.C: New test.

Diff:
---
 gcc/cfgexpand.cc | 68 
 gcc/testsuite/g++.dg/asan/pr120608.C | 17 +
 2 files changed, 79 insertions(+), 6 deletions(-)

diff --git a/gcc/cfgexpand.cc b/gcc/cfgexpand.cc
index e1cdb718e127..33649d43f71c 100644
--- a/gcc/cfgexpand.cc
+++ b/gcc/cfgexpand.cc
@@ -75,6 +75,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "builtins.h"
 #include "opts.h"
 #include "gimple-range.h"
+#include "rtl-iter.h"
 
 /* Some systems use __main in a way incompatible with its use in gcc, in these
cases use the macros NAME__MAIN to give a quoted symbol and SYMBOL__MAIN to
@@ -4458,9 +4459,10 @@ expand_gimple_stmt (gimple *stmt)
tailcall) and the normal result happens via a sqrt instruction.  */
 
 static basic_block
-expand_gimple_tailcall (basic_block bb, gcall *stmt, bool *can_fallthru)
+expand_gimple_tailcall (basic_block bb, gcall *stmt, bool *can_fallthru,
+   rtx_insn *asan_epilog_seq)
 {
-  rtx_insn *last2, *last;
+  rtx_insn *last2, *last, *first = get_last_insn ();
   edge e;
   edge_iterator ei;
   profile_probability probability;
@@ -4477,6 +4479,58 @@ expand_gimple_tailcall (basic_block bb, gcall *stmt, 
bool *can_fallthru)
   return NULL;
 
  found:
+
+  if (asan_epilog_seq)
+{
+  /* We need to emit a copy of the asan_epilog_seq before
+the insns emitted by expand_gimple_stmt above.  The sequence
+can contain labels, which need to be remapped.  */
+  hash_map label_map;
+  start_sequence ();
+  emit_note (NOTE_INSN_DELETED);
+  for (rtx_insn *insn = asan_epilog_seq; insn; insn = NEXT_INSN (insn))
+   switch (GET_CODE (insn))
+ {
+ case INSN:
+ case CALL_INSN:
+ case JUMP_INSN:
+   emit_copy_of_insn_after (insn, get_last_insn ());
+   break;
+ case CODE_LABEL:
+   label_map.put ((rtx) insn, (rtx) emit_label (gen_label_rtx ()));
+   break;
+ case BARRIER:
+   emit_barrier ();
+   break;
+ default:
+   gcc_unreachable ();
+ }
+  for (rtx_insn *insn = get_insns (); insn; insn = NEXT_INSN (insn))
+   if (JUMP_P (insn))
+ {
+   subrtx_ptr_iterator::array_type array;
+   FOR_EACH_SUBRTX_PTR (iter, array, &PATTERN (insn), ALL)
+ {
+   rtx *loc = *iter;
+   if (LABEL_REF_P (*loc))
+ {
+   rtx *lab = label_map.get ((rtx) label_ref_label (*loc));
+   gcc_assert (lab);
+   set_label_ref_label (*loc, as_a  (*lab));
+ }
+ }
+   if (JUMP_LABEL (insn))
+ {
+   rtx *lab = label_map.get (JUMP_LABEL (insn));
+   gcc_assert (

[gcc r16-1626] tailc: Allow musttail tail calls with -fsanitize=address [PR120608]

2025-06-23 Thread Jakub Jelinek via Gcc-cvs
https://gcc.gnu.org/g:35a26f2ec55d20d524464c33b68b23328a7f6bbe

commit r16-1626-g35a26f2ec55d20d524464c33b68b23328a7f6bbe
Author: Jakub Jelinek 
Date:   Mon Jun 23 16:08:34 2025 +0200

tailc: Allow musttail tail calls with -fsanitize=address [PR120608]

These testcases show another problem with -fsanitize=address
vs. musttail tail calls.  In particular, there can be
  .ASAN_MARK (POISON, &a, 4);
etc. calls after a tail call and those just prevent the tailc pass
to mark the musttail calls as [tail call].
Normally, the sanopt pass (which comes after tailc) will optimize those
away, the optimization is if there are no .ASAN_CHECK calls or normal
function calls dominated by those .ASAN_MARK (POSION, ...) calls, the
poison is not needed, because in the epilog sequence (the one dealt with
in the patch posted earlier today) all the stack slots are unpoisoned anyway
(or poisoned for use-after-return).
Unlike __builtin_tsan_exit_function, .ASAN_MARK is not a real function
and is always expanded inline, so can be never tail called successfully,
so the patch just ignores those for the cfun->has_musttail && diag_musttail
cases.  If there is a non-musttail call, it will fail worst case during
expansion because there is the epilog asan sequence.

2025-06-12  Jakub Jelinek  

PR middle-end/120608
* tree-tailcall.cc (empty_eh_cleanup): Ignore .ASAN_MARK (POISON)
internal calls for the cfun->has_musttail case and diag_musttail.
(find_tail_calls): Likewise.

* c-c++-common/asan/pr120608-1.c: New test.
* c-c++-common/asan/pr120608-2.c: New test.

Diff:
---
 gcc/testsuite/c-c++-common/asan/pr120608-1.c | 43 
 gcc/testsuite/c-c++-common/asan/pr120608-2.c | 39 +
 gcc/tree-tailcall.cc | 16 +++
 3 files changed, 98 insertions(+)

diff --git a/gcc/testsuite/c-c++-common/asan/pr120608-1.c 
b/gcc/testsuite/c-c++-common/asan/pr120608-1.c
new file mode 100644
index ..114c42db6f83
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/asan/pr120608-1.c
@@ -0,0 +1,43 @@
+/* PR middle-end/120608 */
+/* { dg-do run { target musttail } } */
+/* { dg-options "-O2 -fsanitize=address" } */
+
+__attribute__((noipa)) void
+foo (int *x, int *y, int *z)
+{
+  ++x[0];
+  ++y[0];
+  ++z[0];
+}
+
+__attribute__((noipa)) void
+bar (int *x, int *y, int *z)
+{
+  if (x || y || z)
+__builtin_abort ();
+}
+
+__attribute__((noipa)) void
+baz (int *x, int *y, int *z)
+{
+  (void) x; (void) y; (void) z;
+  int a = 42, b = -42, c = 0;
+  foo (&a, &b, &c);
+  [[gnu::musttail]] return bar (0, 0, 0);
+}
+
+__attribute__((noipa)) void
+qux (int *x, int *y, int *z)
+{
+  (void) x; (void) y; (void) z;
+  int a = 42, b = -42, c = 0;
+  foo (&a, &b, &c);
+  [[gnu::musttail]] return bar (0, 0, 0);
+}
+
+int
+main ()
+{
+  baz (0, 0, 0);
+  qux (0, 0, 0);
+}
diff --git a/gcc/testsuite/c-c++-common/asan/pr120608-2.c 
b/gcc/testsuite/c-c++-common/asan/pr120608-2.c
new file mode 100644
index ..251ff3a1a074
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/asan/pr120608-2.c
@@ -0,0 +1,39 @@
+/* PR middle-end/120608 */
+/* { dg-do run { target musttail } } */
+/* { dg-options "-O2 -fsanitize=address" } */
+/* { dg-set-target-env-var ASAN_OPTIONS "detect_stack_use_after_return=1" } */
+/* { dg-shouldfail "asan" } */
+
+__attribute__((noipa)) void
+foo (int *x, int *y, int *z)
+{
+  ++x[0];
+  ++y[0];
+  ++z[0];
+}
+
+__attribute__((noipa)) void
+bar (int *x, int *y, int *z)
+{
+  volatile int a = x[0] + y[0] + z[0];
+}
+
+__attribute__((noipa)) void
+baz (int *x, int *y, int *z)
+{
+  (void) x; (void) y; (void) z;
+  int a = 42, b = -42, c = 0;
+  foo (&a, &b, &c);
+  [[gnu::musttail]] return bar (&a, &b, &c);   /* { dg-warning "address of 
automatic variable 'a' passed to 'musttail' call argument" } */
+}  /* { dg-warning "address of 
automatic variable 'b' passed to 'musttail' call argument" "" { target *-*-* } 
.-1 } */
+   /* { dg-warning "address of 
automatic variable 'c' passed to 'musttail' call argument" "" { target *-*-* } 
.-2 } */
+
+int
+main ()
+{
+  baz (0, 0, 0);
+}
+
+// { dg-output "ERROR: AddressSanitizer: stack-use-after-return on 
address.*(\n|\r\n|\r)" }
+// { dg-output "READ of size .*" }
+// { dg-output ".*'a' \\(line 25\\) <== Memory access at offset \[0-9\]* is 
inside this variable.*" }
diff --git a/gcc/tree-tailcall.cc b/gcc/tree-tailcall.cc
index 10e88d9c8292..d6d283022113 100644
--- a/gcc/tree-tailcall.cc
+++ b/gcc/tree-tailcall.cc
@@ -528,6 +528,10 @@ empty_eh_cleanup (basic_block bb, int 
*eh_has_tsan_func_exit, int cnt)
  *eh_has_tsan_func_exit = 1;
  continue;
}
+  if (eh_has_tsan_func_exit
+ && sanitize_flags_p (SANITIZE_ADDRESS)
+ && asan_mark_p (g, ASA

[gcc r16-1627] RISC-V: Combine vec_duplicate + vsaddu.vv to vsaddu.vx on GR2VR cost

2025-06-23 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:a2d018b642019165511e89d47bfb46af55f81f98

commit r16-1627-ga2d018b642019165511e89d47bfb46af55f81f98
Author: Pan Li 
Date:   Sat Jun 21 09:00:16 2025 +0800

RISC-V: Combine vec_duplicate + vsaddu.vv to vsaddu.vx on GR2VR cost

This patch would like to combine the vec_duplicate + vsaddu.vv to the
vsaddu.vx.  From example as below code.  The related pattern will depend
on the cost of vec_duplicate from GR2VR.  Then the late-combine will
take action if the cost of GR2VR is zero, and reject the combination
if the GR2VR cost is greater than zero.

Assume we have example code like below, GR2VR cost is 0.

  #define DEF_VX_BINARY(T, FUNC)  \
  void\
  test_vx_binary (T * restrict out, T * restrict in, T x, unsigned n) \
  {   \
for (unsigned i = 0; i < n; i++)  \
  out[i] = FUNC (in[i], x);   \
  }

  T sat_add(T a, T b)
  {
return (a + b) | (-(T)((T)(a + b) < a));
  }

  DEF_VX_BINARY(uint32_t, sat_add)

Before this patch:
  10   │ test_vx_binary_or_int32_t_case_0:
  11   │ beq a3,zero,.L8
  12   │ vsetvli a5,zero,e32,m1,ta,ma
  13   │ vmv.v.x v2,a2
  14   │ sllia3,a3,32
  15   │ srlia3,a3,32
  16   │ .L3:
  17   │ vsetvli a5,a3,e32,m1,ta,ma
  18   │ vle32.v v1,0(a1)
  19   │ sllia4,a5,2
  20   │ sub a3,a3,a5
  21   │ add a1,a1,a4
  22   │ vsaddu.vv v1,v1,v2
  23   │ vse32.v v1,0(a0)
  24   │ add a0,a0,a4
  25   │ bne a3,zero,.L3

After this patch:
  10   │ test_vx_binary_or_int32_t_case_0:
  11   │ beq a3,zero,.L8
  12   │ sllia3,a3,32
  13   │ srlia3,a3,32
  14   │ .L3:
  15   │ vsetvli a5,a3,e32,m1,ta,ma
  16   │ vle32.v v1,0(a1)
  17   │ sllia4,a5,2
  18   │ sub a3,a3,a5
  19   │ add a1,a1,a4
  20   │ vsaddu.vx v1,v1,a2
  21   │ vse32.v v1,0(a0)
  22   │ add a0,a0,a4
  23   │ bne a3,zero,.L3

gcc/ChangeLog:

* config/riscv/riscv-v.cc (expand_vx_binary_vec_dup_vec): Add
new case US_PLUS.
(expand_vx_binary_vec_vec_dup): Ditto.
* config/riscv/riscv.cc (riscv_rtx_costs): Ditto.
* config/riscv/vector-iterators.md: Add new op us_plus.

Signed-off-by: Pan Li 

Diff:
---
 gcc/config/riscv/riscv-v.cc  | 2 ++
 gcc/config/riscv/riscv.cc| 1 +
 gcc/config/riscv/vector-iterators.md | 4 ++--
 3 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index ac690df3688a..45dd9256d020 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -5541,6 +5541,7 @@ expand_vx_binary_vec_dup_vec (rtx op_0, rtx op_1, rtx 
op_2,
 case UMAX:
 case SMIN:
 case UMIN:
+case US_PLUS:
   icode = code_for_pred_scalar (code, mode);
   break;
 case MINUS:
@@ -5579,6 +5580,7 @@ expand_vx_binary_vec_vec_dup (rtx op_0, rtx op_1, rtx 
op_2,
 case UMAX:
 case SMIN:
 case UMIN:
+case US_PLUS:
   icode = code_for_pred_scalar (code, mode);
   break;
 default:
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 80498d6758ba..bbc7547d385f 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -3995,6 +3995,7 @@ riscv_rtx_costs (rtx x, machine_mode mode, int 
outer_code, int opno ATTRIBUTE_UN
case UDIV:
case MOD:
case UMOD:
+   case US_PLUS:
  *total = get_vector_binary_rtx_cost (op, scalar2vr_cost);
  break;
default:
diff --git a/gcc/config/riscv/vector-iterators.md 
b/gcc/config/riscv/vector-iterators.md
index 44ae79c48aa7..0e1318d1447c 100644
--- a/gcc/config/riscv/vector-iterators.md
+++ b/gcc/config/riscv/vector-iterators.md
@@ -4042,11 +4042,11 @@
 ])
 
 (define_code_iterator any_int_binop_no_shift_v_vdup [
-  plus minus and ior xor mult div udiv mod umod smax umax smin umin
+  plus minus and ior xor mult div udiv mod umod smax umax smin umin us_plus
 ])
 
 (define_code_iterator any_int_binop_no_shift_vdup_v [
-  plus minus and ior xor mult smax umax smin umin
+  plus minus and ior xor mult smax umax smin umin us_plus
 ])
 
 (define_code_iterator any_int_unop [neg not])


[gcc r16-1628] RISC-V: Add test for vec_duplicate + vsaddu.vv combine case 0 with GR2VR cost 0, 2 and 15

2025-06-23 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:9a8f82d6a63e36ffba883b365101b58955ca7c64

commit r16-1628-g9a8f82d6a63e36ffba883b365101b58955ca7c64
Author: Pan Li 
Date:   Sat Jun 21 09:10:07 2025 +0800

RISC-V: Add test for vec_duplicate + vsaddu.vv combine case 0 with GR2VR 
cost 0, 2 and 15

Add asm dump check and run test for vec_duplicate + vsaddu.vv
combine to vsaddu.vx, with the GR2VR cost is 0, 2 and 15.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/vx_vf/vx-1-u16.c: Add asm check.
* gcc.target/riscv/rvv/autovec/vx_vf/vx-1-u32.c: Ditto.
* gcc.target/riscv/rvv/autovec/vx_vf/vx-1-u64.c: Ditto.
* gcc.target/riscv/rvv/autovec/vx_vf/vx-1-u8.c: Ditto.
* gcc.target/riscv/rvv/autovec/vx_vf/vx-2-u16.c: Ditto.
* gcc.target/riscv/rvv/autovec/vx_vf/vx-2-u32.c: Ditto.
* gcc.target/riscv/rvv/autovec/vx_vf/vx-2-u64.c: Ditto.
* gcc.target/riscv/rvv/autovec/vx_vf/vx-2-u8.c: Ditto.
* gcc.target/riscv/rvv/autovec/vx_vf/vx-3-u16.c: Ditto.
* gcc.target/riscv/rvv/autovec/vx_vf/vx-3-u32.c: Ditto.
* gcc.target/riscv/rvv/autovec/vx_vf/vx-3-u64.c: Ditto.
* gcc.target/riscv/rvv/autovec/vx_vf/vx-3-u8.c: Ditto.
* gcc.target/riscv/rvv/autovec/vx_vf/vx_binary.h: Add test
helper macros.
* gcc.target/riscv/rvv/autovec/vx_vf/vx_binary_data.h: Add test
data for run test.
* gcc.target/riscv/rvv/autovec/vx_vf/vx_vsadd-run-1-u16.c: New test.
* gcc.target/riscv/rvv/autovec/vx_vf/vx_vsadd-run-1-u32.c: New test.
* gcc.target/riscv/rvv/autovec/vx_vf/vx_vsadd-run-1-u64.c: New test.
* gcc.target/riscv/rvv/autovec/vx_vf/vx_vsadd-run-1-u8.c: New test.

Signed-off-by: Pan Li 

Diff:
---
 .../gcc.target/riscv/rvv/autovec/vx_vf/vx-1-u16.c  |   1 +
 .../gcc.target/riscv/rvv/autovec/vx_vf/vx-1-u32.c  |   1 +
 .../gcc.target/riscv/rvv/autovec/vx_vf/vx-1-u64.c  |   1 +
 .../gcc.target/riscv/rvv/autovec/vx_vf/vx-1-u8.c   |   1 +
 .../gcc.target/riscv/rvv/autovec/vx_vf/vx-2-u16.c  |   1 +
 .../gcc.target/riscv/rvv/autovec/vx_vf/vx-2-u32.c  |   1 +
 .../gcc.target/riscv/rvv/autovec/vx_vf/vx-2-u64.c  |   1 +
 .../gcc.target/riscv/rvv/autovec/vx_vf/vx-2-u8.c   |   1 +
 .../gcc.target/riscv/rvv/autovec/vx_vf/vx-3-u16.c  |   1 +
 .../gcc.target/riscv/rvv/autovec/vx_vf/vx-3-u32.c  |   1 +
 .../gcc.target/riscv/rvv/autovec/vx_vf/vx-3-u64.c  |   1 +
 .../gcc.target/riscv/rvv/autovec/vx_vf/vx-3-u8.c   |   1 +
 .../gcc.target/riscv/rvv/autovec/vx_vf/vx_binary.h |  42 +++--
 .../riscv/rvv/autovec/vx_vf/vx_binary_data.h   | 196 +
 .../riscv/rvv/autovec/vx_vf/vx_vsadd-run-1-u16.c   |  17 ++
 .../riscv/rvv/autovec/vx_vf/vx_vsadd-run-1-u32.c   |  17 ++
 .../riscv/rvv/autovec/vx_vf/vx_vsadd-run-1-u64.c   |  17 ++
 .../riscv/rvv/autovec/vx_vf/vx_vsadd-run-1-u8.c|  17 ++
 18 files changed, 305 insertions(+), 13 deletions(-)

diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-u16.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-u16.c
index bcfd5145d24f..21a207edce75 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-u16.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-u16.c
@@ -17,3 +17,4 @@ TEST_BINARY_VX_UNSIGNED_0(T)
 /* { dg-final { scan-assembler-times {vremu.vx} 1 } } */
 /* { dg-final { scan-assembler-times {vmaxu.vx} 2 } } */
 /* { dg-final { scan-assembler-times {vminu.vx} 2 } } */
+/* { dg-final { scan-assembler-times {vsaddu.vx} 1 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-u32.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-u32.c
index b9a6a2830916..d1063adb0d6c 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-u32.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-u32.c
@@ -17,3 +17,4 @@ TEST_BINARY_VX_UNSIGNED_0(T)
 /* { dg-final { scan-assembler-times {vremu.vx} 1 } } */
 /* { dg-final { scan-assembler-times {vmaxu.vx} 2 } } */
 /* { dg-final { scan-assembler-times {vminu.vx} 2 } } */
+/* { dg-final { scan-assembler-times {vsaddu.vx} 1 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-u64.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-u64.c
index abb5e5e78428..3d96503fd9ad 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-u64.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-u64.c
@@ -17,3 +17,4 @@ TEST_BINARY_VX_UNSIGNED_0(T)
 /* { dg-final { scan-assembler-times {vremu.vx} 1 } } */
 /* { dg-final { scan-assembler-times {vmaxu.vx} 2 } } */
 /* { dg-final { scan-assembler-times {vminu.vx} 2 } } */
+/* { dg-final { scan-assembler-times {vsaddu.vx} 1 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-u8.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-u8.c
index 50065d0973b2..339a35c3f422 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-u

[gcc r16-1629] RISC-V: Add test for vec_duplicate + vsaddu.vv combine case 1 with GR2VR cost 0, 1 and 2

2025-06-23 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:11811e698b460b5fe45777f4c333aa74655cff39

commit r16-1629-g11811e698b460b5fe45777f4c333aa74655cff39
Author: Pan Li 
Date:   Sat Jun 21 10:07:38 2025 +0800

RISC-V: Add test for vec_duplicate + vsaddu.vv combine case 1 with GR2VR 
cost 0, 1 and 2

Add asm dump check test for vec_duplicate + vsaddu.vv combine to
vsaddu.vx, with the GR2VR cost is 0, 1 and 2.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u16.c: Add asm check
for vsaddu.vx combine.
* gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u32.c: Ditto.
* gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u64.c: Ditto.
* gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u8.c: Ditto.
* gcc.target/riscv/rvv/autovec/vx_vf/vx-5-u16.c: Ditto.
* gcc.target/riscv/rvv/autovec/vx_vf/vx-5-u32.c: Ditto.
* gcc.target/riscv/rvv/autovec/vx_vf/vx-5-u64.c: Ditto.
* gcc.target/riscv/rvv/autovec/vx_vf/vx-5-u8.c: Ditto.
* gcc.target/riscv/rvv/autovec/vx_vf/vx-6-u16.c: Ditto.
* gcc.target/riscv/rvv/autovec/vx_vf/vx-6-u32.c: Ditto.
* gcc.target/riscv/rvv/autovec/vx_vf/vx-6-u64.c: Ditto.
* gcc.target/riscv/rvv/autovec/vx_vf/vx-6-u8.c: Ditto.

Signed-off-by: Pan Li 

Diff:
---
 gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u16.c | 2 ++
 gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u32.c | 1 +
 gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u64.c | 2 ++
 gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u8.c  | 3 +++
 gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-u16.c | 2 ++
 gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-u32.c | 2 ++
 gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-u64.c | 2 ++
 gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-u8.c  | 2 ++
 gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-u16.c | 2 ++
 gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-u32.c | 2 ++
 gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-u64.c | 2 ++
 gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-u8.c  | 2 ++
 12 files changed, 24 insertions(+)

diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u16.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u16.c
index b62164347186..de10d66a1b23 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u16.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u16.c
@@ -17,6 +17,7 @@ DEF_VX_BINARY_CASE_3_WRAP(T, MAX_FUNC_0_WARP(T), max, 
VX_BINARY_FUNC_BODY_X8)
 DEF_VX_BINARY_CASE_3_WRAP(T, MAX_FUNC_1_WARP(T), max, VX_BINARY_FUNC_BODY_X8)
 DEF_VX_BINARY_CASE_3_WRAP(T, MIN_FUNC_0_WARP(T), min, VX_BINARY_FUNC_BODY_X8)
 DEF_VX_BINARY_CASE_3_WRAP(T, MIN_FUNC_1_WARP(T), min, VX_BINARY_FUNC_BODY_X8)
+DEF_VX_BINARY_CASE_3_WRAP(T, SAT_U_ADD_FUNC_WRAP(T), sat_add, 
VX_BINARY_FUNC_BODY_X8)
 
 /* { dg-final { scan-assembler {vadd.vx} } } */
 /* { dg-final { scan-assembler {vsub.vx} } } */
@@ -28,3 +29,4 @@ DEF_VX_BINARY_CASE_3_WRAP(T, MIN_FUNC_1_WARP(T), min, 
VX_BINARY_FUNC_BODY_X8)
 /* { dg-final { scan-assembler {vremu.vx} } } */
 /* { dg-final { scan-assembler {vmaxu.vx} } } */
 /* { dg-final { scan-assembler {vminu.vx} } } */
+/* { dg-final { scan-assembler {vsaddu.vx} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u32.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u32.c
index 741a7495f136..2e59da06c979 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u32.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u32.c
@@ -17,6 +17,7 @@ DEF_VX_BINARY_CASE_3_WRAP(T, MAX_FUNC_0_WARP(T), max, 
VX_BINARY_FUNC_BODY_X4)
 DEF_VX_BINARY_CASE_3_WRAP(T, MAX_FUNC_1_WARP(T), max, VX_BINARY_FUNC_BODY_X4)
 DEF_VX_BINARY_CASE_3_WRAP(T, MIN_FUNC_0_WARP(T), min, VX_BINARY_FUNC_BODY_X4)
 DEF_VX_BINARY_CASE_3_WRAP(T, MIN_FUNC_1_WARP(T), min, VX_BINARY_FUNC_BODY_X4)
+DEF_VX_BINARY_CASE_3_WRAP(T, SAT_U_ADD_FUNC_WRAP(T), sat_add, 
VX_BINARY_FUNC_BODY_X4)
 
 /* { dg-final { scan-assembler {vadd.vx} } } */
 /* { dg-final { scan-assembler {vsub.vx} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u64.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u64.c
index 70375b174734..064ed1f2e892 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u64.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u64.c
@@ -17,6 +17,7 @@ DEF_VX_BINARY_CASE_3_WRAP(T, MAX_FUNC_0_WARP(T), max, 
VX_BINARY_FUNC_BODY)
 DEF_VX_BINARY_CASE_3_WRAP(T, MAX_FUNC_1_WARP(T), max, VX_BINARY_FUNC_BODY)
 DEF_VX_BINARY_CASE_3_WRAP(T, MIN_FUNC_0_WARP(T), min, VX_BINARY_FUNC_BODY)
 DEF_VX_BINARY_CASE_3_WRAP(T, MIN_FUNC_1_WARP(T), min, VX_BINARY_FUNC_BODY)
+DEF_VX_BINARY_CASE_3_WRAP(T, SAT_U_ADD_FUNC_WRAP(T), sat_add, 
VX_BINARY_FUNC_BODY)
 
 /* { dg-final { scan-assembler {vadd.vx} } } */
 /* { dg-final { scan-assembler {vsub.vx} } } */
@@ -28,3 +29,4 @@ DEF_VX_BINARY_CASE_3_W

[gcc r16-1630] diagnostics: handle pp_token::kind::event_id in experimental-html sink [PR116792]

2025-06-23 Thread David Malcolm via Gcc-cvs
https://gcc.gnu.org/g:5a64c96cfe7a4d5783319c2fb8472bc75d702e9a

commit r16-1630-g5a64c96cfe7a4d5783319c2fb8472bc75d702e9a
Author: David Malcolm 
Date:   Mon Jun 23 11:06:33 2025 -0400

diagnostics: handle pp_token::kind::event_id in experimental-html sink 
[PR116792]

gcc/ChangeLog:
PR other/116792
* diagnostic-format-html.cc (html_token_printer::print_tokens):
Handle pp_token::kind::event_id.
(selftest::test_token_printer): Add coverage of printing an event
id.

Signed-off-by: David Malcolm 

Diff:
---
 gcc/diagnostic-format-html.cc | 19 +++
 1 file changed, 19 insertions(+)

diff --git a/gcc/diagnostic-format-html.cc b/gcc/diagnostic-format-html.cc
index 45d088150dd6..5668b50a91ae 100644
--- a/gcc/diagnostic-format-html.cc
+++ b/gcc/diagnostic-format-html.cc
@@ -799,6 +799,16 @@ public:
case pp_token::kind::end_url:
  m_xp.pop_tag ("a");
  break;
+
+   case pp_token::kind::event_id:
+ {
+   pp_token_event_id *sub = as_a  (iter);
+   gcc_assert (sub->m_event_id.known_p ());
+   m_xp.add_text ("(");
+   m_xp.add_text (std::to_string (sub->m_event_id.one_based ()));
+   m_xp.add_text (")");
+ }
+ break;
}
   }
 
@@ -1375,6 +1385,15 @@ test_token_printer ()
"'"
"\n");
   }
+
+  {
+token_printer_test t;
+diagnostic_event_id_t event_id (0);
+pp_printf (&t.m_pp, "foo %@ bar", &event_id);
+ASSERT_XML_PRINT_EQ
+  (t.m_top_element,
+   "foo (1) bar\n");
+  }
 }
 
 /* A subclass of html_output_format for writing selftests.


[gcc] Deleted branch 'mikael/heads/select_type_name_v04' in namespace 'refs/users'

2025-06-23 Thread Mikael Morin via Gcc-cvs
The branch 'mikael/heads/select_type_name_v04' in namespace 'refs/users' was 
deleted.
It previously pointed to:

 ed83521b3c74... fortran: Mention user variable in SELECT TYPE temporary var

Diff:

!!! WARNING: THE FOLLOWING COMMITS ARE NO LONGER ACCESSIBLE (LOST):
---

  ed83521... fortran: Mention user variable in SELECT TYPE temporary var


[gcc(refs/users/mikael/heads/select_type_name_v04)] fortran: Mention user variable in SELECT TYPE temporary variable names

2025-06-23 Thread Mikael Morin via Gcc-cvs
https://gcc.gnu.org/g:ff3ca6ea6c1ee38fa419c3539febf1efba50b088

commit ff3ca6ea6c1ee38fa419c3539febf1efba50b088
Author: Mikael Morin 
Date:   Fri Jun 20 12:08:02 2025 +0200

fortran: Mention user variable in SELECT TYPE temporary variable names

The temporary variables that are generated to implement SELECT TYPE
and TYPE IS statements have (before this change) a name depending only
on the type.  This can produce confusing dumps with code having multiple
SELECT TYPE statements, as it isn't obvious which SELECT TYPE construct
the variable relates to.  This is especially the case with nested SELECT
TYPE statements and with SELECT TYPE variables having identical types
(and thus identical names).

This change adds one additional user-provided discriminating string in
the variable names, using the value from the SELECT TYPE variable name
or last component reference name.  The additional string may be
truncated to fit in the temporary buffer.  This requires all buffers to
have matching sizes to get the same resulting name everywhere.

gcc/fortran/ChangeLog:

* misc.cc (gfc_var_name_for_select_type_temp): New function.
* gfortran.h (gfc_var_name_for_select_type_temp): Declare it.
* resolve.cc (resolve_select_type): Pick a discriminating name
from the SELECT TYPE variable reference and use it in the name
of the temporary variable that is generated.  Truncate name to
the buffer size.
* match.cc (select_type_set_tmp): Likewise.  Pass the
discriminating name...
(select_intrinsic_set_tmp): ... to this function.  Use the
discriminating name likewise.  Augment the buffer size to match
that of select_type_set_tmp and resolve_select_type.

gcc/testsuite/ChangeLog:

* gfortran.dg/select_type_51.f90: New test.

Diff:
---
 gcc/fortran/gfortran.h   |  2 ++
 gcc/fortran/match.cc | 24 +++---
 gcc/fortran/misc.cc  | 21 
 gcc/fortran/resolve.cc   | 21 ++--
 gcc/testsuite/gfortran.dg/select_type_51.f90 | 37 
 5 files changed, 88 insertions(+), 17 deletions(-)

diff --git a/gcc/fortran/gfortran.h b/gcc/fortran/gfortran.h
index f73b5f9c23f4..6848bd1762d3 100644
--- a/gcc/fortran/gfortran.h
+++ b/gcc/fortran/gfortran.h
@@ -3507,6 +3507,8 @@ void gfc_done_2 (void);
 
 int get_c_kind (const char *, CInteropKind_t *);
 
+const char * gfc_var_name_for_select_type_temp (gfc_expr *);
+
 const char *gfc_closest_fuzzy_match (const char *, char **);
 inline void
 vec_push (char **&optr, size_t &osz, const char *elt)
diff --git a/gcc/fortran/match.cc b/gcc/fortran/match.cc
index a99a757bede6..aa0b04afd563 100644
--- a/gcc/fortran/match.cc
+++ b/gcc/fortran/match.cc
@@ -7171,9 +7171,11 @@ select_type_push (gfc_symbol *sel)
 /* Set the temporary for the current intrinsic SELECT TYPE selector.  */
 
 static gfc_symtree *
-select_intrinsic_set_tmp (gfc_typespec *ts)
+select_intrinsic_set_tmp (gfc_typespec *ts, const char *var_name)
 {
-  char name[GFC_MAX_SYMBOL_LEN];
+  /* Keep size in sync with the buffer size in resolve_select_type as it
+ determines the final name through truncation.  */
+  char name[GFC_MAX_SYMBOL_LEN + 12 + 1];
   gfc_symtree *tmp;
   HOST_WIDE_INT charlen = 0;
   gfc_symbol *selector = select_type_stack->selector;
@@ -7192,12 +7194,12 @@ select_intrinsic_set_tmp (gfc_typespec *ts)
 charlen = gfc_mpz_get_hwi (ts->u.cl->length->value.integer);
 
   if (ts->type != BT_CHARACTER)
-sprintf (name, "__tmp_%s_%d", gfc_basic_typename (ts->type),
-ts->kind);
+snprintf (name, sizeof (name), "__tmp_%s_%d_%s",
+ gfc_basic_typename (ts->type), ts->kind, var_name);
   else
 snprintf (name, sizeof (name),
- "__tmp_%s_" HOST_WIDE_INT_PRINT_DEC "_%d",
- gfc_basic_typename (ts->type), charlen, ts->kind);
+ "__tmp_%s_" HOST_WIDE_INT_PRINT_DEC "_%d_%s",
+ gfc_basic_typename (ts->type), charlen, ts->kind, var_name);
 
   gfc_get_sym_tree (name, gfc_current_ns, &tmp, false);
   sym = tmp->n.sym;
@@ -7239,7 +7241,9 @@ select_type_set_tmp (gfc_typespec *ts)
   return;
 }
 
-  tmp = select_intrinsic_set_tmp (ts);
+  gfc_expr *select_type_expr = gfc_state_stack->construct->expr1;
+  const char *var_name = gfc_var_name_for_select_type_temp (select_type_expr);
+  tmp = select_intrinsic_set_tmp (ts, var_name);
 
   if (tmp == NULL)
 {
@@ -7247,9 +7251,11 @@ select_type_set_tmp (gfc_typespec *ts)
return;
 
   if (ts->type == BT_CLASS)
-   sprintf (name, "__tmp_class_%s", ts->u.derived->name);
+   snprintf (name, sizeof (name), "__tmp_class_%s_%s", ts->u.derived->name,
+ var_name);
   else
-   sprintf (name, "__tmp_type_%s", ts->u.der

[gcc] Created branch 'mikael/heads/select_type_name_v04' in namespace 'refs/users'

2025-06-23 Thread Mikael Morin via Gcc-cvs
The branch 'mikael/heads/select_type_name_v04' was created in namespace 
'refs/users' pointing to:

 ff3ca6ea6c1e... fortran: Mention user variable in SELECT TYPE temporary var


[gcc(refs/users/mikael/heads/select_type_name_v04)] fortran: Mention user variable in SELECT TYPE temporary variable names

2025-06-23 Thread Mikael Morin via Gcc-cvs
https://gcc.gnu.org/g:ed83521b3c747b7ddedeaa32b97801ca25d9633e

commit ed83521b3c747b7ddedeaa32b97801ca25d9633e
Author: Mikael Morin 
Date:   Fri Jun 20 12:08:02 2025 +0200

fortran: Mention user variable in SELECT TYPE temporary variable names

The temporary variables that are generated to implement SELECT TYPE
and TYPE IS statements have (before this change) a name depending only
on the type.  This can produce confusing dumps with code having multiple
SELECT TYPE statements, as it isn't obvious which SELECT TYPE construct
the variable relates to.  This is especially the case with nested SELECT
TYPE statements and with SELECT TYPE variables having identical types
(and thus identical names).

This change adds one additional user-provided discriminating string in
the variable names, using the value from the SELECT TYPE variable name
or last component reference name.  The additional string may be
truncated to fit in the temporary buffer.  This requires all buffers to
have matching sizes to get the same resulting name everywhere.

gcc/fortran/ChangeLog:

* misc.cc (gfc_var_name_for_select_type_temp): New function.
* gfortran.h (gfc_var_name_for_select_type_temp): Declare it.
* resolve.cc (resolve_select_type): Pick a discriminating name
from the SELECT TYPE variable reference and use it in the name
of the temporary variable that is generated.  Truncate name to
the buffer size.
* match.cc (select_type_set_tmp): Likewise.  Pass the
discriminating name...
(select_intrinsic_set_tmp): ... to this function.  Use the
discriminating name likewise.  Augment the buffer size to match
that of select_type_set_tmp and resolve_select_type.

gcc/testsuite/ChangeLog:

* gfortran.dg/select_type_51.f90: New test.

Diff:
---
 gcc/fortran/gfortran.h   |  2 ++
 gcc/fortran/match.cc | 22 ++---
 gcc/fortran/misc.cc  | 21 
 gcc/fortran/resolve.cc   | 21 ++--
 gcc/testsuite/gfortran.dg/select_type_51.f90 | 37 
 5 files changed, 86 insertions(+), 17 deletions(-)

diff --git a/gcc/fortran/gfortran.h b/gcc/fortran/gfortran.h
index f73b5f9c23f4..6848bd1762d3 100644
--- a/gcc/fortran/gfortran.h
+++ b/gcc/fortran/gfortran.h
@@ -3507,6 +3507,8 @@ void gfc_done_2 (void);
 
 int get_c_kind (const char *, CInteropKind_t *);
 
+const char * gfc_var_name_for_select_type_temp (gfc_expr *);
+
 const char *gfc_closest_fuzzy_match (const char *, char **);
 inline void
 vec_push (char **&optr, size_t &osz, const char *elt)
diff --git a/gcc/fortran/match.cc b/gcc/fortran/match.cc
index a99a757bede6..c3a6ded942d8 100644
--- a/gcc/fortran/match.cc
+++ b/gcc/fortran/match.cc
@@ -7171,9 +7171,9 @@ select_type_push (gfc_symbol *sel)
 /* Set the temporary for the current intrinsic SELECT TYPE selector.  */
 
 static gfc_symtree *
-select_intrinsic_set_tmp (gfc_typespec *ts)
+select_intrinsic_set_tmp (gfc_typespec *ts, const char *var_name)
 {
-  char name[GFC_MAX_SYMBOL_LEN];
+  char name[GFC_MAX_SYMBOL_LEN + 12 + 1];
   gfc_symtree *tmp;
   HOST_WIDE_INT charlen = 0;
   gfc_symbol *selector = select_type_stack->selector;
@@ -7192,12 +7192,12 @@ select_intrinsic_set_tmp (gfc_typespec *ts)
 charlen = gfc_mpz_get_hwi (ts->u.cl->length->value.integer);
 
   if (ts->type != BT_CHARACTER)
-sprintf (name, "__tmp_%s_%d", gfc_basic_typename (ts->type),
-ts->kind);
+snprintf (name, sizeof (name), "__tmp_%s_%d_%s",
+ gfc_basic_typename (ts->type), ts->kind, var_name);
   else
 snprintf (name, sizeof (name),
- "__tmp_%s_" HOST_WIDE_INT_PRINT_DEC "_%d",
- gfc_basic_typename (ts->type), charlen, ts->kind);
+ "__tmp_%s_" HOST_WIDE_INT_PRINT_DEC "_%d_%s",
+ gfc_basic_typename (ts->type), charlen, ts->kind, var_name);
 
   gfc_get_sym_tree (name, gfc_current_ns, &tmp, false);
   sym = tmp->n.sym;
@@ -7239,7 +7239,9 @@ select_type_set_tmp (gfc_typespec *ts)
   return;
 }
 
-  tmp = select_intrinsic_set_tmp (ts);
+  gfc_expr *select_type_expr = gfc_state_stack->construct->expr1;
+  const char *var_name = gfc_var_name_for_select_type_temp (select_type_expr);
+  tmp = select_intrinsic_set_tmp (ts, var_name);
 
   if (tmp == NULL)
 {
@@ -7247,9 +7249,11 @@ select_type_set_tmp (gfc_typespec *ts)
return;
 
   if (ts->type == BT_CLASS)
-   sprintf (name, "__tmp_class_%s", ts->u.derived->name);
+   snprintf (name, sizeof (name), "__tmp_class_%s_%s", ts->u.derived->name,
+ var_name);
   else
-   sprintf (name, "__tmp_type_%s", ts->u.derived->name);
+   snprintf (name, sizeof (name), "__tmp_type_%s_%s", ts->u.derived->name,
+ var_name);
 
   g

[gcc] Created branch 'mikael/heads/select_type_name_v04' in namespace 'refs/users'

2025-06-23 Thread Mikael Morin via Gcc-cvs
The branch 'mikael/heads/select_type_name_v04' was created in namespace 
'refs/users' pointing to:

 ed83521b3c74... fortran: Mention user variable in SELECT TYPE temporary var


[gcc r16-1639] [RISC-V][PR target/118241] Fix data prefetch predicate/constraint for RISC-V

2025-06-23 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:bf7162b321128ba93521a824e5a7a00d1cc3d1f8

commit r16-1639-gbf7162b321128ba93521a824e5a7a00d1cc3d1f8
Author: Jeff Law 
Date:   Mon Jun 23 18:27:49 2025 -0600

[RISC-V][PR target/118241] Fix data prefetch predicate/constraint for RISC-V

Fix typo in comment spotted by Peter B.

PR target/118241
gcc/
* config/riscv/predicates.md: Fix comment typo in recent change.

Diff:
---
 gcc/config/riscv/predicates.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/config/riscv/predicates.md b/gcc/config/riscv/predicates.md
index 8072d67fbd97..061904b6e000 100644
--- a/gcc/config/riscv/predicates.md
+++ b/gcc/config/riscv/predicates.md
@@ -27,7 +27,7 @@
   (ior (match_operand 0 "const_arith_operand")
(match_operand 0 "register_operand")))
 
-;; REG or REG+D where D fits in a simm12 and has the low 4 bits
+;; REG or REG+D where D fits in a simm12 and has the low 5 bits
 ;; off.  The REG+D form can be reloaded into a temporary if needed
 ;; after FP elimination if that exposes an invalid offset.
 (define_predicate "prefetch_operand"


[gcc r16-1637] Fixup dropping REG_EQUAL note in ext-dce

2025-06-23 Thread Sam James via Gcc-cvs
https://gcc.gnu.org/g:cdd678544fefc313cb1c9da0327158d3ed355f62

commit r16-1637-gcdd678544fefc313cb1c9da0327158d3ed355f62
Author: Sam James 
Date:   Mon Jun 23 23:28:01 2025 +0100

Fixup dropping REG_EQUAL note in ext-dce

Followup to r16-1613-g34e1e5e33ec3eb. remove_reg_equal_equiv_notes's
2nd argument is 'no_rescan' which we accidentally had on, tripping
an assert in combine or ira because we hadn't left things in a consistent
state.

Fix the thinko by enabling rescanning.

gcc/ChangeLog:
PR rtl-optimization/120795

* ext-dce.cc (ext_dce_try_optimize_insn): Enable rescan in
remove_reg_equal_equiv_notes call.

Co-authored-by: Jeff Law 

Diff:
---
 gcc/ext-dce.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/ext-dce.cc b/gcc/ext-dce.cc
index b1d5ee4b36c4..df17b018bf1f 100644
--- a/gcc/ext-dce.cc
+++ b/gcc/ext-dce.cc
@@ -446,7 +446,7 @@ ext_dce_try_optimize_insn (rtx_insn *insn, rtx set)
   /* INSN may have a REG_EQUAL note indicating that the value was
 sign or zero extended.  That note is no longer valid since we've
 just removed the extension.  Just wipe the notes.  */
-  remove_reg_equal_equiv_notes (insn, true);
+  remove_reg_equal_equiv_notes (insn, false);
 }
   else
 {


[gcc r16-1640] Fix shrink wrap separate ICE for mingw [PR120741]

2025-06-23 Thread Lili Cui via Gcc-cvs
https://gcc.gnu.org/g:4b739c020a90dfe2569a292c44b2293a94d4bff5

commit r16-1640-g4b739c020a90dfe2569a292c44b2293a94d4bff5
Author: Lili Cui 
Date:   Tue Jun 24 10:49:43 2025 +0800

Fix shrink wrap separate ICE for mingw [PR120741]

gcc/ChangeLog:

PR target/120741
* config/i386/i386.cc (ix86_expand_prologue):
Remove 1 assertion.

gcc/testsuite/ChangeLog:

PR target/120741
* gcc.target/i386/pr120741.c: New test.
* gcc.target/i386/shrink-wrap-separate-mingw.c: Likewise.

Diff:
---
 gcc/config/i386/i386.cc|  2 --
 gcc/testsuite/gcc.target/i386/pr120741.c   | 22 ++
 .../gcc.target/i386/shrink-wrap-separate-mingw.c   | 22 ++
 3 files changed, 44 insertions(+), 2 deletions(-)

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index fc3105919f45..84081ab12670 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -9443,8 +9443,6 @@ ix86_expand_prologue (void)
 }
   else
 {
-  gcc_assert (!crtl->shrink_wrapped_separate);
-
   rtx eax = gen_rtx_REG (Pmode, AX_REG);
   rtx r10 = NULL;
   const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
diff --git a/gcc/testsuite/gcc.target/i386/pr120741.c 
b/gcc/testsuite/gcc.target/i386/pr120741.c
new file mode 100644
index ..b59a58c48b89
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr120741.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mstack-arg-probe" } */
+
+short __mingw_swformat_format;
+__builtin_va_list __mingw_swformat_arg;
+int __mingw_swformat_fc;
+typedef struct {
+  void *fp;
+  int bch[1024];
+} _IFP;
+void __mingw_swformat(_IFP *s) {
+  if (s->fp)
+while (__mingw_swformat_format)
+  if (__mingw_swformat_fc == 'A')
+   *__builtin_va_arg(__mingw_swformat_arg, double *) = 0;
+}
+void
+__mingw_vswscanf (void)
+{
+  _IFP ifp;
+  __mingw_swformat(&ifp);
+}
diff --git a/gcc/testsuite/gcc.target/i386/shrink-wrap-separate-mingw.c 
b/gcc/testsuite/gcc.target/i386/shrink-wrap-separate-mingw.c
new file mode 100644
index ..58635e49647a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/shrink-wrap-separate-mingw.c
@@ -0,0 +1,22 @@
+/* { dg-do compile { target *-*-mingw* *-*-cygwin* } } */
+/* { dg-options "-std=gnu99 -O2" } */
+
+short __mingw_swformat_format;
+__builtin_va_list __mingw_swformat_arg;
+int __mingw_swformat_fc;
+typedef struct {
+  void *fp;
+  int bch[1024];
+} _IFP;
+void __mingw_swformat(_IFP *s) {
+  if (s->fp)
+while (__mingw_swformat_format)
+  if (__mingw_swformat_fc == 'A')
+*__builtin_va_arg(__mingw_swformat_arg, double *) = 0;
+}
+void
+__mingw_vswscanf (void)
+{
+  _IFP ifp;
+  __mingw_swformat(&ifp);
+}


[gcc r16-1641] Fix AFDO zero profile handling

2025-06-23 Thread Jan Hubicka via Gcc-cvs
https://gcc.gnu.org/g:c24eb5e01da5ce07f6b616aff1129d4acbff69e6

commit r16-1641-gc24eb5e01da5ce07f6b616aff1129d4acbff69e6
Author: Jan Hubicka 
Date:   Tue Jun 24 05:00:01 2025 +0200

Fix AFDO zero profile handling

This patch fixes roms autofdo regression I introduced yesterday.  What 
happens
is that loop vectorization is disabled, because we get loop header count 0.
I.e.

loop_header:  
  if (i < n)
goto exit;
loop_body:
  ... vectorizable computation ...

The reason is that "if (i < 0)" statement actually has 0 profile in AFDO
feedback.  This seems common and I believe it is an issue with debug info in
loop vecotrizer.  Because loop is vectorized during train run, the 
conditoinal
is replaced by vectorized loop conditional but the statement remains in the
loop epilogue which is not executed at runtime.

This is something we can fix and introduce debug statement in the 
vectorized loop
body so user can breakpoint on it. I will try to produce testcase for that.

However this patch fixes bug where I intended to only trust 0 counts from 
AFDO if they
are also 0 in static profile and reversed the conditinal.

autoprofile-bootstrapped/regtested x86_64-linux, comitted.

* auto-profile.cc (afdo_set_bb_count): Dump also 0 count stmts.
(afdo_annotate_cfg): Fix conditional for block having non-zero 
static
profile.

Diff:
---
 gcc/auto-profile.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/auto-profile.cc b/gcc/auto-profile.cc
index 9b5be665f58a..8a1d9f878c65 100644
--- a/gcc/auto-profile.cc
+++ b/gcc/auto-profile.cc
@@ -1315,7 +1315,7 @@ afdo_set_bb_count (basic_block bb, hash_set  
&zero_bbs)
{
  if (info.count > max_count)
max_count = info.count;
- if (dump_file && info.count)
+ if (dump_file)
{
  fprintf (dump_file, "  count %" PRIu64 " in stmt: ",
   (int64_t)info.count);
@@ -2108,7 +2108,7 @@ afdo_annotate_cfg (void)
  afdo samples, but if even static profile agrees with 0,
  consider it final so propagation works better.  */
   for (basic_block bb : zero_bbs)
-if (bb->count.nonzero_p ())
+if (!bb->count.nonzero_p ())
   {
update_count_by_afdo_count (&bb->count, 0);
set_bb_annotated (bb, &annotated_bb);


[gcc r16-1642] Copy discriminators when inlining

2025-06-23 Thread Jan Hubicka via Gcc-cvs
https://gcc.gnu.org/g:0235b6d41ace62064d5cd42553028136b49ad947

commit r16-1642-g0235b6d41ace62064d5cd42553028136b49ad947
Author: Jan Hubicka 
Date:   Tue Jun 24 05:07:42 2025 +0200

Copy discriminators when inlining

When inlining disciriminator info about the call statement is lost which
is not good for auto-profile and debug info quality.  This patch fixes
it.

gcc/ChangeLog:

* tree-inline.cc (expand_call_inline): Preserve discriminator.

Diff:
---
 gcc/tree-inline.cc | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/gcc/tree-inline.cc b/gcc/tree-inline.cc
index dee2dfc26206..7e0ac698e5e0 100644
--- a/gcc/tree-inline.cc
+++ b/gcc/tree-inline.cc
@@ -5018,6 +5018,9 @@ expand_call_inline (basic_block bb, gimple *stmt, 
copy_body_data *id,
loc = LOCATION_LOCUS (DECL_SOURCE_LOCATION (fn));
   if (loc == UNKNOWN_LOCATION)
loc = BUILTINS_LOCATION;
+  if (has_discriminator (gimple_location (stmt)))
+   loc = location_with_discriminator
+   (loc, get_discriminator_from_loc (gimple_location (stmt)));
   id->block = make_node (BLOCK);
   BLOCK_ABSTRACT_ORIGIN (id->block) = DECL_ORIGIN (fn);
   BLOCK_SOURCE_LOCATION (id->block) = loc;


[gcc(refs/vendors/ARM/heads/arm-14.3)] arm: Add support for MVE Tail-Predicated Low Overhead Loops

2025-06-23 Thread Andre Simoes Dias Vieira via Gcc-cvs
https://gcc.gnu.org/g:02a4b4167f6d383383e08d8cbee718baf8a566ee

commit 02a4b4167f6d383383e08d8cbee718baf8a566ee
Author: Andre Vieira 
Date:   Wed Jun 19 17:05:55 2024 +0100

arm: Add support for MVE Tail-Predicated Low Overhead Loops

This patch adds support for MVE Tail-Predicated Low Overhead Loops by using 
the
doloop funcitonality added to support predicated vectorized hardware loops.

gcc/ChangeLog:

* config/arm/arm-protos.h (arm_target_bb_ok_for_lob): Change
declaration to pass basic_block.
(arm_attempt_dlstp_transform): New declaration.
* config/arm/arm.cc (TARGET_LOOP_UNROLL_ADJUST): Define targethook.
(TARGET_PREDICT_DOLOOP_P): Likewise.
(arm_target_bb_ok_for_lob): Adapt condition.
(arm_mve_get_vctp_lanes): New function.
(arm_dl_usage_type): New internal enum.
(arm_get_required_vpr_reg): New function.
(arm_get_required_vpr_reg_param): New function.
(arm_get_required_vpr_reg_ret_val): New function.
(arm_mve_get_loop_vctp): New function.
(arm_mve_insn_predicated_by): New function.
(arm_mve_across_lane_insn_p): New function.
(arm_mve_load_store_insn_p): New function.
(arm_mve_impl_pred_on_outputs_p): New function.
(arm_mve_impl_pred_on_inputs_p): New function.
(arm_last_vect_def_insn): New function.
(arm_mve_impl_predicated_p): New function.
(arm_mve_check_reg_origin_is_num_elems): New function.
(arm_mve_dlstp_check_inc_counter): New function.
(arm_mve_dlstp_check_dec_counter): New function.
(arm_mve_loop_valid_for_dlstp): New function.
(arm_predict_doloop_p): New function.
(arm_loop_unroll_adjust): New function.
(arm_emit_mve_unpredicated_insn_to_seq): New function.
(arm_attempt_dlstp_transform): New function.
* config/arm/arm.opt (mdlstp): New option.
* config/arm/iterators.md (dlstp_elemsize, letp_num_lanes,
letp_num_lanes_neg, letp_num_lanes_minus_1): New attributes.
(DLSTP, LETP): New iterators.
* config/arm/mve.md (predicated_doloop_end_internal,
dlstp_insn): New insn patterns.
* config/arm/thumb2.md (doloop_end): Adapt to support 
tail-predicated
loops.
(doloop_begin): Likewise.
* config/arm/types.md (mve_misc): New mve type to represent
predicated_loop_end insn sequences.
* config/arm/unspecs.md:
(DLSTP8, DLSTP16, DLSTP32, DSLTP64,
LETP8, LETP16, LETP32, LETP64): New unspecs for DLSTP and LETP.

gcc/testsuite/ChangeLog:

* gcc.target/arm/lob.h: Add new helpers.
* gcc.target/arm/lob1.c: Use new helpers.
* gcc.target/arm/lob6.c: Likewise.
* gcc.target/arm/mve/dlstp-compile-asm-1.c: New test.
* gcc.target/arm/mve/dlstp-compile-asm-2.c: New test.
* gcc.target/arm/mve/dlstp-compile-asm-3.c: New test.
* gcc.target/arm/mve/dlstp-int8x16.c: New test.
* gcc.target/arm/mve/dlstp-int8x16-run.c: New test.
* gcc.target/arm/mve/dlstp-int16x8.c: New test.
* gcc.target/arm/mve/dlstp-int16x8-run.c: New test.
* gcc.target/arm/mve/dlstp-int32x4.c: New test.
* gcc.target/arm/mve/dlstp-int32x4-run.c: New test.
* gcc.target/arm/mve/dlstp-int64x2.c: New test.
* gcc.target/arm/mve/dlstp-int64x2-run.c: New test.
* gcc.target/arm/mve/dlstp-invalid-asm.c: New test.

Co-authored-by: Stam Markianos-Wright 
(cherry picked from commit 3dfc28dbbd21b1d708aa40064380ef4c42c994d7)

Diff:
---
 gcc/config/arm/arm-protos.h|4 +-
 gcc/config/arm/arm.cc  | 1249 +++-
 gcc/config/arm/arm.opt |3 +
 gcc/config/arm/iterators.md|   15 +
 gcc/config/arm/mve.md  |   50 +
 gcc/config/arm/thumb2.md   |  138 ++-
 gcc/config/arm/types.md|6 +-
 gcc/config/arm/unspecs.md  |   14 +-
 gcc/testsuite/gcc.target/arm/lob.h |  128 +-
 gcc/testsuite/gcc.target/arm/lob1.c|   23 +-
 gcc/testsuite/gcc.target/arm/lob6.c|8 +-
 .../gcc.target/arm/mve/dlstp-compile-asm-1.c   |  146 +++
 .../gcc.target/arm/mve/dlstp-compile-asm-2.c   |  749 
 .../gcc.target/arm/mve/dlstp-compile-asm-3.c   |   46 +
 .../gcc.target/arm/mve/dlstp-int16x8-run.c |   44 +
 gcc/testsuite/gcc.target/arm/mve/dlstp-int16x8.c   |   31 +
 .../gcc.target/arm/mve/dlstp-int32x4-run.c |   45 +
 gcc/testsuite/gcc.target/arm/mve/dlstp-int32x4.c   |   31 +
 .../gcc.

[gcc(refs/vendors/ARM/heads/arm-14.3)] arm: Fix missed CE optimization for armv8.1-m.main [PR 116444]

2025-06-23 Thread Andre Simoes Dias Vieira via Gcc-cvs
https://gcc.gnu.org/g:55e31fad500907f24deb88670a7071830432a2b1

commit 55e31fad500907f24deb88670a7071830432a2b1
Author: Andre Vieira 
Date:   Fri Oct 4 13:43:46 2024 +0100

arm: Fix missed CE optimization for armv8.1-m.main [PR 116444]

This patch restores missed optimizations for armv8.1-m.main targets that 
were
missed when the generation of csinc, csinv and csneg were enabled for the 
same
with patch series containing:

commit c2bb84be4a6e581bbf45891457ee632a07416982
Author: Sudi Das 
Date:   Fri Sep 18 15:47:46 2020 +0100

[PATCH 2/5][Arm] New pattern for CSINV instructions

The original patch series makes use of the "noce" machinery to transform RTL
into patterns that later match the Armv8.1-M Mainline, by getting the target
hook TARGET_HAVE_CONDITIONAL_EXECUTION, to return FALSE for such targets 
prior
to reload_completed.  The same machinery however was transforming other RTL
patterns which were later on causing the "ce" pass post reload_completed to 
no
longer optimize conditional execution opportunities, which was causing the
regression observed in PR target/116444, a regression of 
'testsuite/gcc.target/arm/thumb-ifcvt-2.c'
when ran for an Armv8.1-M Mainline target.

This patch implements the target hook TARGET_NOCE_CONVERSION_PROFITABLE_P to
only allow "noce" to generate patterns that match CSINV, CSINC and CSNEG.  
Thus
ensuring that the early "ce" passes do not ruin things for later ones.

gcc/ChangeLog:

PR target/116444
* config/arm/arm-protos.h (arm_noce_conversion_profitable_p): New
declaration.
* config/arm/arm.cc (arm_is_v81m_cond_insn): New helper function 
used
in ...
(arm_noce_conversion_profitable_p): ... here. New function to 
implement
...
(TARGET_NOCE_PROFITABLE_P): ... this target hook.  New define.

(cherry picked from commit 7766a2c1eb683352ce117e8ed014665f392f)

Diff:
---
 gcc/config/arm/arm-protos.h |  1 +
 gcc/config/arm/arm.cc   | 87 +
 2 files changed, 88 insertions(+)

diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
index c650e4298a83..c25b193315b5 100644
--- a/gcc/config/arm/arm-protos.h
+++ b/gcc/config/arm/arm-protos.h
@@ -211,6 +211,7 @@ extern bool arm_pad_reg_upward (machine_mode, tree, int);
 #endif
 extern int arm_apply_result_size (void);
 extern opt_machine_mode arm_get_mask_mode (machine_mode mode);
+extern bool arm_noce_conversion_profitable_p (rtx_insn *,struct noce_if_info 
*);
 
 #endif /* RTX_CODE */
 
diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc
index e5983242009f..cbbe67eb598a 100644
--- a/gcc/config/arm/arm.cc
+++ b/gcc/config/arm/arm.cc
@@ -835,6 +835,9 @@ static const scoped_attribute_specs *const 
arm_attribute_table[] =
 #undef TARGET_MODES_TIEABLE_P
 #define TARGET_MODES_TIEABLE_P arm_modes_tieable_p
 
+#undef TARGET_NOCE_CONVERSION_PROFITABLE_P
+#define TARGET_NOCE_CONVERSION_PROFITABLE_P arm_noce_conversion_profitable_p
+
 #undef TARGET_CAN_CHANGE_MODE_CLASS
 #define TARGET_CAN_CHANGE_MODE_CLASS arm_can_change_mode_class
 
@@ -36173,6 +36176,90 @@ arm_get_mask_mode (machine_mode mode)
   return default_get_mask_mode (mode);
 }
 
+/* Helper function to determine whether SEQ represents a sequence of
+   instructions representing the Armv8.1-M Mainline conditional arithmetic
+   instructions: csinc, csneg and csinv. The cinc instruction is generated
+   using a different mechanism.  */
+
+static bool
+arm_is_v81m_cond_insn (rtx_insn *seq)
+{
+  rtx_insn *curr_insn = seq;
+  rtx set;
+  /* The pattern may start with a simple set with register operands.  Skip
+ through any of those.  */
+  while (curr_insn)
+{
+  set = single_set (curr_insn);
+  if (!set
+ || !REG_P (SET_DEST (set)))
+   return false;
+
+  if (!REG_P (SET_SRC (set)))
+   break;
+  curr_insn = NEXT_INSN (curr_insn);
+}
+
+  if (!set)
+return false;
+
+  /* The next instruction should be one of:
+ NEG: for csneg,
+ PLUS: for csinc,
+ NOT: for csinv.  */
+  if (GET_CODE (SET_SRC (set)) != NEG
+  && GET_CODE (SET_SRC (set)) != PLUS
+  && GET_CODE (SET_SRC (set)) != NOT)
+return false;
+
+  curr_insn = NEXT_INSN (curr_insn);
+  if (!curr_insn)
+return false;
+
+  /* The next instruction should be a COMPARE.  */
+  set = single_set (curr_insn);
+  if (!set
+  || !REG_P (SET_DEST (set))
+  || GET_CODE (SET_SRC (set)) != COMPARE)
+return false;
+
+  curr_insn = NEXT_INSN (curr_insn);
+  if (!curr_insn)
+return false;
+
+  /* And the last instruction should be an IF_THEN_ELSE.  */
+  set = single_set (curr_insn);
+  if (!set
+  || !REG_P (SET_DEST (set))
+  || GET_CODE (SET_SRC (set)) != IF_THEN_ELSE)
+return false;
+
+  return !NEXT_INSN (curr_insn);
+}
+
+/* For Armv8.1-M Mainline we have b

[gcc(refs/vendors/ARM/heads/arm-14.3)] arm: Improvements to arm_noce_conversion_profitable_p call [PR 116444]

2025-06-23 Thread Andre Simoes Dias Vieira via Gcc-cvs
https://gcc.gnu.org/g:741aded788931ec98d29feafd5c382ebbbcaa867

commit 741aded788931ec98d29feafd5c382ebbbcaa867
Author: Andre Simoes Dias Vieira 
Date:   Fri Nov 8 13:34:57 2024 +

arm: Improvements to arm_noce_conversion_profitable_p call [PR 116444]

When not dealing with the special armv8.1-m.main conditional instructions 
case
make sure it uses the default_noce_conversion_profitable_p call to determine
whether the sequence is cost effective.

Also make sure arm_noce_conversion_profitable_p accepts vsel patterns 
for
Armv8.1-M Mainline targets.

gcc/ChangeLog:

PR target/116444
* config/arm/arm.cc (arm_noce_conversion_profitable_p): Call
default_noce_conversion_profitable_p when not dealing with the
armv8.1-m.main special case.
(arm_is_vsel_fp_insn): New function.

(cherry picked from commit 1e8396464cb990d554c932cd959742b86660a25a)

Diff:
---
 gcc/config/arm/arm.cc | 59 ---
 1 file changed, 56 insertions(+), 3 deletions(-)

diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc
index d85dc7b8cf31..1a43784daee2 100644
--- a/gcc/config/arm/arm.cc
+++ b/gcc/config/arm/arm.cc
@@ -36176,10 +36176,58 @@ arm_get_mask_mode (machine_mode mode)
   return default_get_mask_mode (mode);
 }
 
+/* Helper function to determine whether SEQ represents a sequence of
+   instructions representing the vsel floating point instructions.
+   This is an heuristic to check whether the proposed optimisation is desired,
+   the choice has no consequence for correctness.  */
+static bool
+arm_is_vsel_fp_insn (rtx_insn *seq)
+{
+  rtx_insn *curr_insn = seq;
+  rtx set = NULL_RTX;
+  /* The pattern may start with a simple set with register operands.  Skip
+ through any of those.  */
+  while (curr_insn)
+{
+  set = single_set (curr_insn);
+  if (!set
+ || !REG_P (SET_DEST (set)))
+   return false;
+
+  if (!REG_P (SET_SRC (set)))
+   break;
+  curr_insn = NEXT_INSN (curr_insn);
+}
+
+  if (!set)
+return false;
+
+  /* The next instruction should be a compare.  */
+  if (!REG_P (SET_DEST (set))
+  || GET_CODE (SET_SRC (set)) != COMPARE)
+return false;
+
+  curr_insn = NEXT_INSN (curr_insn);
+  if (!curr_insn)
+return false;
+
+  /* And the last instruction should be an IF_THEN_ELSE.  */
+  set = single_set (curr_insn);
+  if (!set
+  || !REG_P (SET_DEST (set))
+  || GET_CODE (SET_SRC (set)) != IF_THEN_ELSE)
+return false;
+
+  return !NEXT_INSN (curr_insn);
+}
+
+
 /* Helper function to determine whether SEQ represents a sequence of
instructions representing the Armv8.1-M Mainline conditional arithmetic
instructions: csinc, csneg and csinv. The cinc instruction is generated
-   using a different mechanism.  */
+   using a different mechanism.
+   This is an heuristic to check whether the proposed optimisation is desired,
+   the choice has no consequence for correctness.  */
 
 static bool
 arm_is_v81m_cond_insn (rtx_insn *seq)
@@ -36248,15 +36296,20 @@ arm_is_v81m_cond_insn (rtx_insn *seq)
hook to only allow "noce" to generate the patterns that are profitable.  */
 
 bool
-arm_noce_conversion_profitable_p (rtx_insn *seq, struct noce_if_info *)
+arm_noce_conversion_profitable_p (rtx_insn *seq, struct noce_if_info *if_info)
 {
   if (!TARGET_COND_ARITH
   || reload_completed)
-return true;
+return default_noce_conversion_profitable_p (seq, if_info);
 
   if (arm_is_v81m_cond_insn (seq))
 return true;
 
+  /* Look for vsel opportunities as we still want to codegen these for
+ Armv8.1-M Mainline targets.  */
+  if (arm_is_vsel_fp_insn (seq))
+return true;
+
   return false;
 }


[gcc(refs/vendors/ARM/heads/arm-14.3)] arm: fix bootstrap issue with arm_noce_conversion_profitable_p patch [NFC]

2025-06-23 Thread Andre Simoes Dias Vieira via Gcc-cvs
https://gcc.gnu.org/g:613edf1adfb77842a277fc556892e0938e6af39f

commit 613edf1adfb77842a277fc556892e0938e6af39f
Author: Andre Vieira 
Date:   Mon Oct 7 14:16:38 2024 +0100

arm: fix bootstrap issue with arm_noce_conversion_profitable_p patch [NFC]

This obvious patch fixes two warnings introduced with the implementation of
arm_noce_conversion_profitable_p hook.

gcc/ChangeLog:

* config/arm/arm.cc (arm_noce_oncersion_profitable_p): Remove unused
argument name.
(arm_is_v81m_cond_insn): Initialize variable.

(cherry picked from commit 5fb1ab539e3315175d2e843f4ce40bde6dd7c520)

Diff:
---
 gcc/config/arm/arm.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc
index cbbe67eb598a..d85dc7b8cf31 100644
--- a/gcc/config/arm/arm.cc
+++ b/gcc/config/arm/arm.cc
@@ -36185,7 +36185,7 @@ static bool
 arm_is_v81m_cond_insn (rtx_insn *seq)
 {
   rtx_insn *curr_insn = seq;
-  rtx set;
+  rtx set = NULL_RTX;
   /* The pattern may start with a simple set with register operands.  Skip
  through any of those.  */
   while (curr_insn)
@@ -36248,7 +36248,7 @@ arm_is_v81m_cond_insn (rtx_insn *seq)
hook to only allow "noce" to generate the patterns that are profitable.  */
 
 bool
-arm_noce_conversion_profitable_p (rtx_insn *seq, struct noce_if_info *if_info)
+arm_noce_conversion_profitable_p (rtx_insn *seq, struct noce_if_info *)
 {
   if (!TARGET_COND_ARITH
   || reload_completed)


[gcc(refs/vendors/ARM/heads/arm-14.3)] doloop: Add support for predicated vectorized loops

2025-06-23 Thread Andre Simoes Dias Vieira via Gcc-cvs
https://gcc.gnu.org/g:01494aa8d6e79ca19d26a59fb55035a2649d9238

commit 01494aa8d6e79ca19d26a59fb55035a2649d9238
Author: Andre Vieira 
Date:   Wed Jun 19 17:05:45 2024 +0100

doloop: Add support for predicated vectorized loops

This patch adds support in the target agnostic doloop pass for the 
detection of
predicated vectorized hardware loops.  Arm is currently the only target that
will make use of this feature.

gcc/ChangeLog:

* df-core.cc (df_bb_regno_only_def_find): New helper function.
* df.h (df_bb_regno_only_def_find): Declare new function.
* loop-doloop.cc (doloop_condition_get): Add support for detecting
predicated vectorized hardware loops.
(doloop_modify): Add support for GTU condition checks.
(doloop_optimize): Update costing computation to support 
alterations to
desc->niter_expr by the backend.

Co-authored-by: Stam Markianos-Wright 
(cherry picked from commit 5d0c1b4e0d33c2d1077264636d0a65ce206d0d96)

Diff:
---
 gcc/df-core.cc |  15 +
 gcc/df.h   |   1 +
 gcc/loop-doloop.cc | 164 +++--
 3 files changed, 113 insertions(+), 67 deletions(-)

diff --git a/gcc/df-core.cc b/gcc/df-core.cc
index f0eb4c93957f..b0e8a88d433b 100644
--- a/gcc/df-core.cc
+++ b/gcc/df-core.cc
@@ -1964,6 +1964,21 @@ df_bb_regno_last_def_find (basic_block bb, unsigned int 
regno)
   return NULL;
 }
 
+/* Return the one and only def of REGNO within BB.  If there is no def or
+   there are multiple defs, return NULL.  */
+
+df_ref
+df_bb_regno_only_def_find (basic_block bb, unsigned int regno)
+{
+  df_ref temp = df_bb_regno_first_def_find (bb, regno);
+  if (!temp)
+return NULL;
+  else if (temp == df_bb_regno_last_def_find (bb, regno))
+return temp;
+  else
+return NULL;
+}
+
 /* Finds the reference corresponding to the definition of REG in INSN.
DF is the dataflow object.  */
 
diff --git a/gcc/df.h b/gcc/df.h
index 84e5aa8b524d..c4e690b40cf2 100644
--- a/gcc/df.h
+++ b/gcc/df.h
@@ -987,6 +987,7 @@ extern void df_check_cfg_clean (void);
 #endif
 extern df_ref df_bb_regno_first_def_find (basic_block, unsigned int);
 extern df_ref df_bb_regno_last_def_find (basic_block, unsigned int);
+extern df_ref df_bb_regno_only_def_find (basic_block, unsigned int);
 extern df_ref df_find_def (rtx_insn *, rtx);
 extern bool df_reg_defined (rtx_insn *, rtx);
 extern df_ref df_find_use (rtx_insn *, rtx);
diff --git a/gcc/loop-doloop.cc b/gcc/loop-doloop.cc
index 0d101d64bbfc..7528a9225557 100644
--- a/gcc/loop-doloop.cc
+++ b/gcc/loop-doloop.cc
@@ -86,10 +86,10 @@ doloop_condition_get (rtx_insn *doloop_pat)
  forms:
 
  1)  (parallel [(set (pc) (if_then_else (condition)
-   (label_ref (label))
-   (pc)))
-(set (reg) (plus (reg) (const_int -1)))
-(additional clobbers and uses)])
+   (label_ref (label))
+   (pc)))
+(set (reg) (plus (reg) (const_int -1)))
+(additional clobbers and uses)])
 
  The branch must be the first entry of the parallel (also required
  by jump.cc), and the second entry of the parallel must be a set of
@@ -97,19 +97,33 @@ doloop_condition_get (rtx_insn *doloop_pat)
  the loop counter in an if_then_else too.
 
  2)  (set (reg) (plus (reg) (const_int -1))
- (set (pc) (if_then_else (reg != 0)
-(label_ref (label))
-(pc))).  
+(set (pc) (if_then_else (reg != 0)
+(label_ref (label))
+(pc))).
 
- Some targets (ARM) do the comparison before the branch, as in the
+ 3) Some targets (Arm) do the comparison before the branch, as in the
  following form:
 
- 3) (parallel [(set (cc) (compare ((plus (reg) (const_int -1), 0)))
-   (set (reg) (plus (reg) (const_int -1)))])
-(set (pc) (if_then_else (cc == NE)
-(label_ref (label))
-(pc))) */
-
+ (parallel [(set (cc) (compare (plus (reg) (const_int -1)) 0))
+   (set (reg) (plus (reg) (const_int -1)))])
+ (set (pc) (if_then_else (cc == NE)
+(label_ref (label))
+(pc)))
+
+  4) This form supports a construct that is used to represent a vectorized
+  do loop with predication, however we do not need to care about the
+  details of the predication here.
+  Arm uses this construct to support MVE tail predication.
+
+  (parallel
+   [(set (pc)
+(if_then_else (gtu (plus (reg) (const_int -n))
+   (const_int n-1))
+  (label_ref)
+  

[gcc(refs/vendors/ARM/heads/arm-14.3)] arm, mve: Fix arm_mve_dlstp_check_dec_counter's use of single_pred

2025-06-23 Thread Andre Simoes Dias Vieira via Gcc-cvs
https://gcc.gnu.org/g:e9a0a09b15bfaa706c6f5b0b1d29d81942096af7

commit e9a0a09b15bfaa706c6f5b0b1d29d81942096af7
Author: Andre Vieira 
Date:   Wed Nov 20 09:23:50 2024 +

arm, mve: Fix arm_mve_dlstp_check_dec_counter's use of single_pred

Call 'single_pred_p' before 'single_pred' to verify it is safe to do so.

gcc/ChangeLog:

* config/arm/arm.cc (arm_mve_dlstp_check_dec_counter): Call
single_pred_p to verify it's safe to call single_pred.

gcc/testsuite/ChangeLog:

* gcc.target/arm/mve/dlstp-loop-form.c: Add loop that triggered ICE.

(cherry picked from commit 3ec0b7cd7ce7b64b58cb0ee52fb95fb0875c805c)

Diff:
---
 gcc/config/arm/arm.cc  |  5 +++--
 gcc/testsuite/gcc.target/arm/mve/dlstp-loop-form.c | 12 
 2 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc
index 9185ce2c12ea..e5983242009f 100644
--- a/gcc/config/arm/arm.cc
+++ b/gcc/config/arm/arm.cc
@@ -35443,9 +35443,10 @@ arm_mve_dlstp_check_dec_counter (loop *loop, rtx_insn* 
vctp_insn,
 return NULL;
   else if (REG_P (condconst))
 {
-  basic_block pre_loop_bb = single_pred (loop_preheader_edge (loop)->src);
-  if (!pre_loop_bb)
+  basic_block preheader_b = loop_preheader_edge (loop)->src;
+  if (!single_pred_p (preheader_b))
return NULL;
+  basic_block pre_loop_bb = single_pred (preheader_b);
 
   rtx initial_compare = NULL_RTX;
   if (!(prev_nonnote_nondebug_insn_bb (BB_END (pre_loop_bb))
diff --git a/gcc/testsuite/gcc.target/arm/mve/dlstp-loop-form.c 
b/gcc/testsuite/gcc.target/arm/mve/dlstp-loop-form.c
index 2dc9c4f8bfe1..ca46bcb499a8 100644
--- a/gcc/testsuite/gcc.target/arm/mve/dlstp-loop-form.c
+++ b/gcc/testsuite/gcc.target/arm/mve/dlstp-loop-form.c
@@ -24,3 +24,15 @@ void n() {
   }
 }
 
+int a;
+void g2() {
+  long b;
+  while (a) {
+char *c;
+for (long d = b; d > 0; d -= 4) {
+  mve_pred16_t e = vctp32q(d);
+  int32x4_t f;
+  vstrbq_p_s32(c, f, e);
+}
+  }
+}


[gcc] Created branch 'ARM/heads/arm-14.3' in namespace 'refs/vendors'

2025-06-23 Thread Andre Simoes Dias Vieira via Gcc-cvs
The branch 'ARM/heads/arm-14.3' was created in namespace 'refs/vendors' 
pointing to:

 c6ee55bf5766... arm, mve: Detect uses of vctp_vpr_generated inside subregs


[gcc(refs/vendors/ARM/heads/arm-14.3)] arm, mve: Detect uses of vctp_vpr_generated inside subregs

2025-06-23 Thread Andre Simoes Dias Vieira via Gcc-cvs
https://gcc.gnu.org/g:c6ee55bf5766d1d38e57d92e3a757fde4722d55d

commit c6ee55bf5766d1d38e57d92e3a757fde4722d55d
Author: Andre Vieira 
Date:   Fri Nov 29 10:18:57 2024 +

arm, mve: Detect uses of vctp_vpr_generated inside subregs

Address a problem we were having where we were missing on detecting uses of
vctp_vpr_generated in the analysis for 'arm_attempt_dlstp_transform' because
the use was inside a SUBREG and rtx_equal_p does not catch that.  Using
reg_overlap_mentioned_p is much more robust.

gcc/ChangeLog:

PR target/117814
* config/arm/arm.cc (arm_attempt_dlstp_transform): Use
reg_overlap_mentioned_p instead of rtx_equal_p to detect uses of
vctp_vpr_generated inside subregs.

gcc/testsuite/ChangeLog:

PR target/117814
* gcc.target/arm/mve/dlstp-invalid-asm.c (test10): Renamed to...
(test10a): ... this.
(test10b): Variation of test10a with a small change to trigger wrong
codegen.

(cherry picked from commit f42fd8e9335354f986d69b92ab66be07cc31bc7a)

Diff:
---
 gcc/config/arm/arm.cc  |  3 +-
 .../gcc.target/arm/mve/dlstp-invalid-asm.c | 37 --
 2 files changed, 37 insertions(+), 3 deletions(-)

diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc
index 1a43784daee2..e6d5c86a8bed 100644
--- a/gcc/config/arm/arm.cc
+++ b/gcc/config/arm/arm.cc
@@ -35915,7 +35915,8 @@ arm_attempt_dlstp_transform (rtx label)
  df_ref insn_uses = NULL;
  FOR_EACH_INSN_USE (insn_uses, insn)
  {
-   if (rtx_equal_p (vctp_vpr_generated, DF_REF_REG (insn_uses)))
+   if (reg_overlap_mentioned_p (vctp_vpr_generated,
+DF_REF_REG (insn_uses)))
  {
end_sequence ();
return 1;
diff --git a/gcc/testsuite/gcc.target/arm/mve/dlstp-invalid-asm.c 
b/gcc/testsuite/gcc.target/arm/mve/dlstp-invalid-asm.c
index 26df2d30523c..eb0782ebd0de 100644
--- a/gcc/testsuite/gcc.target/arm/mve/dlstp-invalid-asm.c
+++ b/gcc/testsuite/gcc.target/arm/mve/dlstp-invalid-asm.c
@@ -127,8 +127,15 @@ void test9 (int32_t *a, int32_t *b, int32_t *c, int n)
 }
 }
 
-/* Using a VPR that gets re-generated within the loop.  */
-void test10 (int32_t *a, int32_t *b, int32_t *c, int n)
+/* Using a VPR that gets re-generated within the loop.  Even though we
+   currently reject such loops, it would be possible to dlstp transform this
+   specific loop, as long as we make sure that the first vldrwq_z mask would
+   either:
+   * remain the same as its mask in the first iteration,
+   * become the same as the loop mask after the first iteration,
+   * become all ones, since the dlstp would then mask it the same as the loop
+   mask.  */
+void test10a (int32_t *a, int32_t *b, int32_t *c, int n)
 {
   mve_pred16_t p = vctp32q (n);
   while (n > 0)
@@ -145,6 +152,32 @@ void test10 (int32_t *a, int32_t *b, int32_t *c, int n)
 }
 }
 
+/* Using a VPR that gets re-generated within the loop, the difference between
+   this test and test10a is to make sure the two vctp calls are never the same,
+   this leads to slightly different codegen in some cases triggering the issue
+   in a different way.   This loop too would be OK to dlstp transform as long
+   as we made sure that the first vldrwq_z mask would either:
+   * remain the same as the its mask in the first iteration,
+   * become the same as the loop mask after the first iteration,
+   * become all ones, since the dlstp would then mask it the same as the loop
+   mask.  */
+void test10b (int32_t *a, int32_t *b, int32_t *c, int n)
+{
+  mve_pred16_t p = vctp32q (n-4);
+  while (n > 0)
+{
+  int32x4_t va = vldrwq_z_s32 (a, p);
+  p = vctp32q (n);
+  int32x4_t vb = vldrwq_z_s32 (b, p);
+  int32x4_t vc = vaddq_x_s32 (va, vb, p);
+  vstrwq_p_s32 (c, vc, p);
+  c += 4;
+  a += 4;
+  b += 4;
+  n -= 4;
+}
+}
+
 /* Using vctp32q_m instead of vctp32q.  */
 void test11 (int32_t *a, int32_t *b, int32_t *c, int n, mve_pred16_t p0)
 {


[gcc(refs/vendors/ARM/heads/arm-14.3)] arm, mve: Fix scan-assembler for test7 in dlstp-compile-asm-2.c

2025-06-23 Thread Andre Simoes Dias Vieira via Gcc-cvs
https://gcc.gnu.org/g:653b1c38434272be4a1327efde6dd8f8b619d21c

commit 653b1c38434272be4a1327efde6dd8f8b619d21c
Author: Andre Vieira 
Date:   Fri Nov 29 09:59:25 2024 +

arm, mve: Fix scan-assembler for test7 in dlstp-compile-asm-2.c

After the changes to the vctp intrinsic codegen changed slightly, where we 
now
unfortunately seem to be generating unneeded moves and extends of the mask.
These are however not incorrect and we don't have a fix for the unneeded
codegen right now, so changing the testcase to accept them so we can catch
other changes if they occur.

gcc/testsuite/ChangeLog:

PR target/117814
* gcc.target/arm/mve/dlstp-compile-asm-2.c (test7): Add an optional
vmsr to the check-function-bodies.

(cherry picked from commit cf75f86ed980308621ab0db0dc8adc9c72e39f5e)

Diff:
---
 gcc/testsuite/gcc.target/arm/mve/dlstp-compile-asm-2.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/gcc/testsuite/gcc.target/arm/mve/dlstp-compile-asm-2.c 
b/gcc/testsuite/gcc.target/arm/mve/dlstp-compile-asm-2.c
index 84f4a2fc4f9b..2d282cb6645e 100644
--- a/gcc/testsuite/gcc.target/arm/mve/dlstp-compile-asm-2.c
+++ b/gcc/testsuite/gcc.target/arm/mve/dlstp-compile-asm-2.c
@@ -214,7 +214,12 @@ void test7 (int32_t *a, int32_t *b, int32_t *c, int n, int 
g)
 **...
 ** dlstp.32lr, r3
 ** vldrw.32q[0-9]+, \[r0\], #16
+** (
+** vmsrp0, .*
 ** vpst
+** |
+** vpst
+** )
 ** vldrwt.32   q[0-9]+, \[r1\], #16
 ** vadd.i32(q[0-9]+), q[0-9]+, q[0-9]+
 ** vstrw.32\1, \[r2\], #16


[gcc(refs/vendors/ARM/heads/arm-14.3)] arm: Fix testism with mve/ivopts-3.c testcase

2025-06-23 Thread Andre Simoes Dias Vieira via Gcc-cvs
https://gcc.gnu.org/g:7a83f663642eecd29d4b6b72c65cb92571411a47

commit 7a83f663642eecd29d4b6b72c65cb92571411a47
Author: Andre Vieira 
Date:   Fri Aug 2 16:39:34 2024 +0100

arm: Fix testism with mve/ivopts-3.c testcase

This patch ensures this testcase is ran for armv8.1-m.main+mve as this is
testing that doloops with function calls that aren't intrinsics get rejected
as potential doloop targets during ivopts.  For other targets this loop gets
rejected for different reasons.

gcc/testsuite/ChangeLog:

* gcc.target/arm/mve/ivopts-3.c: Add require target and options.

(cherry picked from commit 995ac87a053c22364bcdc0bc041fd6e5b3087bc5)

Diff:
---
 gcc/testsuite/gcc.target/arm/mve/ivopts-3.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/gcc/testsuite/gcc.target/arm/mve/ivopts-3.c 
b/gcc/testsuite/gcc.target/arm/mve/ivopts-3.c
index 19b2442ef12c..08879424501f 100644
--- a/gcc/testsuite/gcc.target/arm/mve/ivopts-3.c
+++ b/gcc/testsuite/gcc.target/arm/mve/ivopts-3.c
@@ -1,5 +1,7 @@
 /* { dg-do compile } */
+/* { dg-require-effective-target arm_v8_1m_mve_ok } */
 /* { dg-options "-O2 -fdump-tree-ivopts-details" } */
+/* { dg-add-options arm_v8_1m_mve } */
 
 void f2 (void);


[gcc(refs/vendors/ARM/heads/arm-14.3)] arm: make arm_predict_doloop_p reject loops with calls

2025-06-23 Thread Andre Simoes Dias Vieira via Gcc-cvs
https://gcc.gnu.org/g:3db8647388acfb981dfe1ea704ac1b8ae3a83e93

commit 3db8647388acfb981dfe1ea704ac1b8ae3a83e93
Author: Andre Vieira 
Date:   Wed Jun 26 11:07:01 2024 +0100

arm: make arm_predict_doloop_p reject loops with calls

With the introduction of low overhead loops we defined arm_predict_doloop_p,
this is meant to be a low-weight check to rule out loops we are not 
considering
for doloop optimization and it is used by other passes to prevent 
optimizations
that may hurt the doloop optimization later on. The reason these are meant 
to be
lightweight is because it's used by pre-RTL optimizations, meaning we can't 
do
the same checks that doloop does.

After the definition of arm_predict_doloop_p, when testing for 
armv8.1-m.main,
tree-ssa/ivopts-3.c failed the scan-dump check as the dump now matched an 
extra
'!= 0' introduced by:
Doloop cmp iv use: if (ivtmp_1 != 0)
Predict loop 1 can perform doloop optimization later.

where previously we had:
Predict doloop failure due to target specific checks.

and after this patch:
Predict doloop failure due to call in loop.
Predict doloop failure due to target specific checks.

Added a copy of the original tree-ssa/ivopts-3.c as a target specifc test to
check for the new dump message.

gcc/ChangeLog:

* config/arm/arm.cc (arm_predict_doloop_p): Reject loops with 
function
calls that are not builtins.

gcc/testsuite/ChangeLog:

* gcc.target/arm/mve/ivopts-3.c: New test.

(cherry picked from commit ad20ad7dddcb052429346ae5f94b4a603925084a)

Diff:
---
 gcc/config/arm/arm.cc   | 16 
 gcc/testsuite/gcc.target/arm/mve/ivopts-3.c | 13 +
 2 files changed, 29 insertions(+)

diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc
index bc5048a787ed..e296b0e8a33a 100644
--- a/gcc/config/arm/arm.cc
+++ b/gcc/config/arm/arm.cc
@@ -35613,6 +35613,22 @@ arm_predict_doloop_p (struct loop *loop)
" loop bb complexity.\n");
   return false;
 }
+  else
+{
+  gimple_stmt_iterator gsi = gsi_after_labels (loop->header);
+  while (!gsi_end_p (gsi))
+   {
+ if (is_gimple_call (gsi_stmt (gsi))
+ && !gimple_call_builtin_p (gsi_stmt (gsi)))
+   {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+   fprintf (dump_file, "Predict doloop failure due to"
+   " call in loop.\n");
+ return false;
+   }
+ gsi_next (&gsi);
+   }
+}
 
   return true;
 }
diff --git a/gcc/testsuite/gcc.target/arm/mve/ivopts-3.c 
b/gcc/testsuite/gcc.target/arm/mve/ivopts-3.c
new file mode 100644
index ..19b2442ef12c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/ivopts-3.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-ivopts-details" } */
+
+void f2 (void);
+
+int main (void)
+{
+  int i;
+  for (i = 0; i < 10; i++)
+f2 ();
+}
+
+/* { dg-final { scan-tree-dump "Predict doloop failure due to call in loop." 
"ivopts" } } */


[gcc(refs/vendors/ARM/heads/arm-14.3)] arm: Prevent ICE when doloop dec_set is not PLUS expr

2025-06-23 Thread Andre Simoes Dias Vieira via Gcc-cvs
https://gcc.gnu.org/g:8d44f91e835e1d7e4dd99d5b900bd9c4c2d2586b

commit 8d44f91e835e1d7e4dd99d5b900bd9c4c2d2586b
Author: Andre Vieira 
Date:   Tue Jul 16 17:47:51 2024 +0100

arm: Prevent ICE when doloop dec_set is not PLUS expr

This patch refactors and fixes an issue where 
arm_mve_dlstp_check_dec_counter
was making an assumption about the form of what a candidate for a dec_insn
should be, which caused an ICE.
This dec_insn is the instruction that decreases the loop counter inside a
decrementing loop and we expect it to have the following form:
(set (reg CONDCOUNT)
 (plus (reg CONDCOUNT)
   (const_int)))

Where CONDCOUNT is the loop counter, and const int is the negative constant
used to decrement it.

This patch also improves our search for a valid dec_insn.  Before this patch
we'd only look for a dec_insn inside the loop header if the loop latch was
empty.  We now also search the loop header if the loop latch is not empty 
but
the last instruction is not a valid dec_insn.  This could potentially be 
improved
to search all instructions inside the loop latch.

gcc/ChangeLog:

* config/arm/arm.cc (check_dec_insn): New helper function containing
code hoisted from...
(arm_mve_dlstp_check_dec_counter): ... here. Use check_dec_insn to
check the validity of the candidate dec_insn.

gcc/testsuite/ChangeLog:

* gcc.targer/arm/mve/dlstp-loop-form.c: New test.

(cherry picked from commit c78790988f7c428489292c5f9b3f80363f78f73d)
Changed testcase to use include  after cherry-picking.

Diff:
---
 gcc/config/arm/arm.cc  | 49 ++
 gcc/testsuite/gcc.target/arm/mve/dlstp-loop-form.c | 26 
 2 files changed, 58 insertions(+), 17 deletions(-)

diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc
index e296b0e8a33a..9185ce2c12ea 100644
--- a/gcc/config/arm/arm.cc
+++ b/gcc/config/arm/arm.cc
@@ -35314,6 +35314,32 @@ arm_mve_dlstp_check_inc_counter (loop *loop, rtx_insn* 
vctp_insn,
   return vctp_insn;
 }
 
+/* Helper function to 'arm_mve_dlstp_check_dec_counter' to make sure DEC_INSN
+   is of the expected form:
+   (set (reg a) (plus (reg a) (const_int)))
+   where (reg a) is the same as CONDCOUNT.
+   Return a rtx with the set if it is in the right format or NULL_RTX
+   otherwise.  */
+
+static rtx
+check_dec_insn (rtx_insn *dec_insn, rtx condcount)
+{
+  if (!NONDEBUG_INSN_P (dec_insn))
+return NULL_RTX;
+  rtx dec_set = single_set (dec_insn);
+  if (!dec_set
+  || !REG_P (SET_DEST (dec_set))
+  || GET_CODE (SET_SRC (dec_set)) != PLUS
+  || !REG_P (XEXP (SET_SRC (dec_set), 0))
+  || !CONST_INT_P (XEXP (SET_SRC (dec_set), 1))
+  || REGNO (SET_DEST (dec_set))
+ != REGNO (XEXP (SET_SRC (dec_set), 0))
+  || REGNO (SET_DEST (dec_set)) != REGNO (condcount))
+return NULL_RTX;
+
+  return dec_set;
+}
+
 /* Helper function to `arm_mve_loop_valid_for_dlstp`.  In the case of a
counter that is decrementing, ensure that it is decrementing by the
right amount in each iteration and that the target condition is what
@@ -35330,30 +35356,19 @@ arm_mve_dlstp_check_dec_counter (loop *loop, 
rtx_insn* vctp_insn,
  loop latch.  Here we simply need to verify that this counter is the same
  reg that is also used in the vctp_insn and that it is not otherwise
  modified.  */
-  rtx_insn *dec_insn = BB_END (loop->latch);
+  rtx dec_set = check_dec_insn (BB_END (loop->latch), condcount);
   /* If not in the loop latch, try to find the decrement in the loop header.  
*/
-  if (!NONDEBUG_INSN_P (dec_insn))
+  if (dec_set == NULL_RTX)
   {
 df_ref temp = df_bb_regno_only_def_find (loop->header, REGNO (condcount));
 /* If we haven't been able to find the decrement, bail out.  */
 if (!temp)
   return NULL;
-dec_insn = DF_REF_INSN (temp);
-  }
-
-  rtx dec_set = single_set (dec_insn);
+dec_set = check_dec_insn (DF_REF_INSN (temp), condcount);
 
-  /* Next, ensure that it is a PLUS of the form:
- (set (reg a) (plus (reg a) (const_int)))
- where (reg a) is the same as condcount.  */
-  if (!dec_set
-  || !REG_P (SET_DEST (dec_set))
-  || !REG_P (XEXP (SET_SRC (dec_set), 0))
-  || !CONST_INT_P (XEXP (SET_SRC (dec_set), 1))
-  || REGNO (SET_DEST (dec_set))
- != REGNO (XEXP (SET_SRC (dec_set), 0))
-  || REGNO (SET_DEST (dec_set)) != REGNO (condcount))
-return NULL;
+if (dec_set == NULL_RTX)
+  return NULL;
+  }
 
   decrementnum = INTVAL (XEXP (SET_SRC (dec_set), 1));
 
diff --git a/gcc/testsuite/gcc.target/arm/mve/dlstp-loop-form.c 
b/gcc/testsuite/gcc.target/arm/mve/dlstp-loop-form.c
new file mode 100644
index ..2dc9c4f8bfe1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/dlstp-loop-form.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target a

[gcc/aoliva/heads/testme] [lra] catch all to-sp eliminations

2025-06-23 Thread Alexandre Oliva via Gcc-cvs
The branch 'aoliva/heads/testme' was updated to point to:

 efab6fca... [lra] catch all to-sp eliminations

It previously pointed to:

 62b20bad36bd... [lra] catch all to-sp eliminations

Diff:

!!! WARNING: THE FOLLOWING COMMITS ARE NO LONGER ACCESSIBLE (LOST):
---

  62b20ba... [lra] catch all to-sp eliminations


Summary of changes (added commits):
---

  efa... [lra] catch all to-sp eliminations (*)

(*) This commit already exists in another branch.
Because the reference `refs/users/aoliva/heads/testme' matches
your hooks.email-new-commits-only configuration,
no separate email is sent for this commit.


[gcc] Created branch 'aoliva/heads/lra-elim-fp2sp' in namespace 'refs/users'

2025-06-23 Thread Alexandre Oliva via Gcc-cvs
The branch 'aoliva/heads/lra-elim-fp2sp' was created in namespace 'refs/users' 
pointing to:

 efab6fca... [lra] catch all to-sp eliminations


[gcc r16-1634] OpenACC: Add 'if' clause to 'acc wait' directive

2025-06-23 Thread Tobias Burnus via Gcc-cvs
https://gcc.gnu.org/g:2b077252cafa5045498a0e0c480ee6d48c136232

commit r16-1634-g2b077252cafa5045498a0e0c480ee6d48c136232
Author: Tobias Burnus 
Date:   Mon Jun 23 23:24:56 2025 +0200

OpenACC: Add 'if' clause to 'acc wait' directive

OpenACC 3.0 added the 'if' clause to four directives; this patch only adds
it to 'acc wait'.

gcc/c-family/ChangeLog:

* c-omp.cc (c_finish_oacc_wait): Handle if clause.

gcc/c/ChangeLog:

* c-parser.cc (OACC_WAIT_CLAUSE_MASK): Add if clause.

gcc/cp/ChangeLog:

* parser.cc (OACC_WAIT_CLAUSE_MASK): Ass if clause.

gcc/fortran/ChangeLog:

* openmp.cc (OACC_WAIT_CLAUSES): Add if clause.
* trans-openmp.cc (gfc_trans_oacc_wait_directive): Handle it.

gcc/testsuite/ChangeLog:

* c-c++-common/goacc/acc-wait-1.c: New test.
* gfortran.dg/goacc/acc-wait-1.f90: New test.

Diff:
---
 gcc/c-family/c-omp.cc  |  9 -
 gcc/c/c-parser.cc  |  3 +-
 gcc/cp/parser.cc   |  3 +-
 gcc/fortran/openmp.cc  |  2 +-
 gcc/fortran/trans-openmp.cc|  4 ++
 gcc/testsuite/c-c++-common/goacc/acc-wait-1.c  | 51 ++
 gcc/testsuite/gfortran.dg/goacc/acc-wait-1.f90 | 47 
 7 files changed, 114 insertions(+), 5 deletions(-)

diff --git a/gcc/c-family/c-omp.cc b/gcc/c-family/c-omp.cc
index 13de2fe48f96..4352214df3b7 100644
--- a/gcc/c-family/c-omp.cc
+++ b/gcc/c-family/c-omp.cc
@@ -52,8 +52,8 @@ c_finish_oacc_wait (location_t loc, tree parms, tree clauses)
   vec_alloc (args, nparms + 2);
   stmt = builtin_decl_explicit (BUILT_IN_GOACC_WAIT);
 
-  if (omp_find_clause (clauses, OMP_CLAUSE_ASYNC))
-t = OMP_CLAUSE_ASYNC_EXPR (clauses);
+  if ((t = omp_find_clause (clauses, OMP_CLAUSE_ASYNC)))
+t = OMP_CLAUSE_ASYNC_EXPR (t);
   else
 t = build_int_cst (integer_type_node, GOMP_ASYNC_SYNC);
 
@@ -71,6 +71,11 @@ c_finish_oacc_wait (location_t loc, tree parms, tree clauses)
 
   stmt = build_call_expr_loc_vec (loc, stmt, args);
 
+  t = omp_find_clause (clauses, OMP_CLAUSE_IF);
+  if (t)
+stmt = build3_loc (input_location, COND_EXPR, void_type_node,
+  OMP_CLAUSE_IF_EXPR (t), stmt, NULL_TREE);
+
   vec_free (args);
 
   return stmt;
diff --git a/gcc/c/c-parser.cc b/gcc/c/c-parser.cc
index faa50a4fd86b..0c3e3e2889c6 100644
--- a/gcc/c/c-parser.cc
+++ b/gcc/c/c-parser.cc
@@ -22501,7 +22501,8 @@ c_parser_oacc_update (c_parser *parser)
 */
 
 #define OACC_WAIT_CLAUSE_MASK  \
-   ( (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_ASYNC) )
+   ( (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_ASYNC)   \
+   | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_IF) )
 
 static tree
 c_parser_oacc_wait (location_t loc, c_parser *parser, char *p_name)
diff --git a/gcc/cp/parser.cc b/gcc/cp/parser.cc
index cfebde8b1181..80fd7990 100644
--- a/gcc/cp/parser.cc
+++ b/gcc/cp/parser.cc
@@ -49704,7 +49704,8 @@ cp_parser_oacc_update (cp_parser *parser, cp_token 
*pragma_tok)
 */
 
 #define OACC_WAIT_CLAUSE_MASK  \
-   ( (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_ASYNC))
+   ( (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_ASYNC)   \
+   | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_IF))
 
 static tree
 cp_parser_oacc_wait (cp_parser *parser, cp_token *pragma_tok)
diff --git a/gcc/fortran/openmp.cc b/gcc/fortran/openmp.cc
index df829403c34f..fe0a47a6948b 100644
--- a/gcc/fortran/openmp.cc
+++ b/gcc/fortran/openmp.cc
@@ -4474,7 +4474,7 @@ error:
| OMP_CLAUSE_COPYOUT | OMP_CLAUSE_DELETE | OMP_CLAUSE_FINALIZE\
| OMP_CLAUSE_DETACH)
 #define OACC_WAIT_CLAUSES \
-  omp_mask (OMP_CLAUSE_ASYNC)
+  omp_mask (OMP_CLAUSE_ASYNC) | OMP_CLAUSE_IF
 #define OACC_ROUTINE_CLAUSES \
   (omp_mask (OMP_CLAUSE_GANG) | OMP_CLAUSE_WORKER | OMP_CLAUSE_VECTOR\
| OMP_CLAUSE_SEQ  \
diff --git a/gcc/fortran/trans-openmp.cc b/gcc/fortran/trans-openmp.cc
index 2a48d4af5276..a2e70fca0b37 100644
--- a/gcc/fortran/trans-openmp.cc
+++ b/gcc/fortran/trans-openmp.cc
@@ -6048,6 +6048,10 @@ gfc_trans_oacc_wait_directive (gfc_code *code)
 args->quick_push (gfc_convert_expr_to_tree (&block, el->expr));
 
   stmt = build_call_expr_loc_vec (loc, stmt, args);
+  if (clauses->if_expr)
+stmt = build3_loc (input_location, COND_EXPR, void_type_node,
+  gfc_convert_expr_to_tree (&block, clauses->if_expr),
+  stmt, NULL_TREE);
   gfc_add_expr_to_block (&block, stmt);
 
   vec_free (args);
diff --git a/gcc/testsuite/c-c++-common/goacc/acc-wait-1.c 
b/gcc/testsuite/c-c++-common/goacc/acc-wait-1.c
new file mode 100644
index ..bc7ff022f173
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/goacc/acc-wait-1.c
@@ -

[gcc r16-1633] Fortran: fix checking of renamed-on-use interface name [PR120784]

2025-06-23 Thread Harald Anlauf via Gcc-cvs
https://gcc.gnu.org/g:6dd1659cf10a7ad51576f902ef3bc007db30c990

commit r16-1633-g6dd1659cf10a7ad51576f902ef3bc007db30c990
Author: Harald Anlauf 
Date:   Mon Jun 23 21:33:40 2025 +0200

Fortran: fix checking of renamed-on-use interface name [PR120784]

PR fortran/120784

gcc/fortran/ChangeLog:

* interface.cc (gfc_match_end_interface): If a use-associated
symbol is renamed, use the local_name for checking.

gcc/testsuite/ChangeLog:

* gfortran.dg/interface_63.f90: New test.

Diff:
---
 gcc/fortran/interface.cc   | 13 +--
 gcc/testsuite/gfortran.dg/interface_63.f90 | 62 ++
 2 files changed, 72 insertions(+), 3 deletions(-)

diff --git a/gcc/fortran/interface.cc b/gcc/fortran/interface.cc
index b8542920ce79..cdb838d83368 100644
--- a/gcc/fortran/interface.cc
+++ b/gcc/fortran/interface.cc
@@ -452,11 +452,18 @@ gfc_match_end_interface (void)
 
 case INTERFACE_DTIO:
 case INTERFACE_GENERIC:
+  /* If a use-associated symbol is renamed, check the local_name.   */
+  const char *local_name = current_interface.sym->name;
+
+  if (current_interface.sym->attr.use_assoc
+ && current_interface.sym->attr.use_rename
+ && current_interface.sym->ns->use_stmts->rename)
+   local_name = current_interface.sym->ns->use_stmts->rename->local_name;
+
   if (type != current_interface.type
- || strcmp (current_interface.sym->name, name) != 0)
+ || strcmp (local_name, name) != 0)
{
- gfc_error ("Expecting % at %C",
-current_interface.sym->name);
+ gfc_error ("Expecting % at %C", local_name);
  m = MATCH_ERROR;
}
 
diff --git a/gcc/testsuite/gfortran.dg/interface_63.f90 
b/gcc/testsuite/gfortran.dg/interface_63.f90
new file mode 100644
index ..a55e8ab431b1
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/interface_63.f90
@@ -0,0 +1,62 @@
+! { dg-do compile }
+! PR fortran/120784 - fix checking of renamed-on-use interface name
+!
+! Contributed by Matt Thompson  
+
+module A_mod
+  implicit none
+
+  interface Get
+ procedure :: get_1
+ procedure :: get_2
+  end interface Get
+
+contains
+
+  subroutine get_1(i)
+integer :: i
+i = 5
+  end subroutine get_1
+
+  subroutine get_2(x)
+real :: x
+x = 4
+  end subroutine get_2
+end module A_mod
+
+module B_mod
+  use A_mod, only : MyGet => Get
+  implicit none
+
+  interface MyGet
+ procedure :: other_get
+  end interface MyGet
+
+contains
+
+  subroutine other_get(c)
+character(1) :: c
+c = 'a'
+  end subroutine other_get
+
+  subroutine check_get ()
+character :: c
+integer   :: i
+real  :: r
+call myget (c)
+call myget (i)
+call myget (r)
+  end subroutine check_get
+
+end module B_MOD
+
+program p
+  use b_mod, only: myget
+  implicit none
+  character :: c
+  integer   :: i
+  real  :: r
+  call myget (c)
+  call myget (i)
+  call myget (r)
+end


[gcc r16-1635] analyzer: fix missing "final override"

2025-06-23 Thread David Malcolm via Gcc-cvs
https://gcc.gnu.org/g:e6406aefd1a25b6dba845a52cfd9484188ff5720

commit r16-1635-ge6406aefd1a25b6dba845a52cfd9484188ff5720
Author: David Malcolm 
Date:   Mon Jun 23 18:46:44 2025 -0400

analyzer: fix missing "final override"

No functional change intended.

gcc/analyzer/ChangeLog:
* region-model.cc
(exception_thrown_from_unrecognized_call::print): Add
"final override" to vfunc.

Signed-off-by: David Malcolm 

Diff:
---
 gcc/analyzer/region-model.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/analyzer/region-model.cc b/gcc/analyzer/region-model.cc
index d1c7e8cd53ea..bc44d3da44b5 100644
--- a/gcc/analyzer/region-model.cc
+++ b/gcc/analyzer/region-model.cc
@@ -2090,7 +2090,7 @@ public:
   {
   }
 
-  void print (pretty_printer *pp) const
+  void print (pretty_printer *pp) const final override
   {
 if (m_fndecl)
   pp_printf (pp, "if %qD throws an exception...", m_fndecl);


[gcc r16-1636] libgdiagnostics: sarif-replay: add extra sinks via -fdiagnostics-add-output= [PR116792, PR116163]

2025-06-23 Thread David Malcolm via Gcc-cvs
https://gcc.gnu.org/g:d0142e147486e6f319704d35930720f6dec648fb

commit r16-1636-gd0142e147486e6f319704d35930720f6dec648fb
Author: David Malcolm 
Date:   Mon Jun 23 18:46:51 2025 -0400

libgdiagnostics: sarif-replay: add extra sinks via 
-fdiagnostics-add-output= [PR116792,PR116163]

This patch refactors the support for -fdiagnostics-add-output=SCHEME
from GCC's options parsing so that it is also available to
sarif-replay and to other clients of libgdiagnostics.

With this users of sarif-replay and other such tools can generate HTML
or SARIF as well as text output, using the same
  -fdiagnostics-add-output=SCHEME
as GCC.

As a test, the patch adds support for this option to the dg-lint
script below "contrib".  For example dg-lint can now generate text,
html, and sarif output via:

  LD_LIBRARY_PATH=../build/gcc/ \
./contrib/dg-lint/dg-lint \
contrib/dg-lint/test-*.c \
-fdiagnostics-add-output=experimental-html:file=dg-lint-tests.html \
-fdiagnostics-add-output=sarif:file=dg-lint-tests.sarif

where the HTML output from dg-lint can be seen here:
  https://dmalcolm.fedorapeople.org/gcc/2025-06-20/dg-lint-tests.html
the sarif output here:
  https://dmalcolm.fedorapeople.org/gcc/2025-06-23/dg-lint-tests.sarif
and a screenshot of VS Code viewing the sarif output is here:
  
https://dmalcolm.fedorapeople.org/gcc/2025-06-23/vscode-viewing-dg-lint-sarif-output.png

As well as allowing sarif-replay to generate HTML, this patch allows
sarif-replay to also generate SARIF.  Ideally this would faithfully
round-trip all the data, but it's not perfect (which I'm tracking as
PR sarif-replay/120792).

contrib/ChangeLog:
PR other/116792
PR testsuite/116163
PR sarif-replay/120792
* dg-lint/dg-lint: Add -fdiagnostics-add-output.
* dg-lint/libgdiagnostics.py: Add
diagnostic_manager_add_sink_from_spec.
(Manager.add_sink_from_spec): New.

gcc/ChangeLog:
PR other/116792
PR testsuite/116163
PR sarif-replay/120792
* Makefile.in (OBJS-libcommon): Add diagnostic-output-spec.o.
* diagnostic-format-html.cc (html_builder::html_builder): Ensure
title is non-empty.
* diagnostic-output-spec.cc: New file, taken from material in
opts-diagnostic.cc.
* diagnostic-output-spec.h: New file.
* diagnostic.cc (diagnostic_context::set_main_input_filename):
New.
* diagnostic.h (diagnostic_context::set_main_input_filename): New
decl.
* doc/libgdiagnostics/topics/compatibility.rst
(LIBGDIAGNOSTICS_ABI_2): New.
* doc/libgdiagnostics/topics/diagnostic-manager.rst
(diagnostic_manager_add_sink_from_spec): New.
(diagnostic_manager_set_analysis_target): New.
* libgdiagnostics++.h (manager::add_sink_from_spec): New.
(manager::set_analysis_target): New.
* libgdiagnostics.cc: Include "diagnostic-output-spec.h".
(struct spec_context): New.
(diagnostic_manager_add_sink_from_spec): New.
(diagnostic_manager_set_analysis_target): New.
* libgdiagnostics.h
(LIBDIAGNOSTICS_HAVE_diagnostic_manager_add_sink_from_spec): New
define.
(diagnostic_manager_add_sink_from_spec): New decl.
(LIBDIAGNOSTICS_HAVE_diagnostic_manager_set_analysis_target): New
define.
(diagnostic_manager_set_analysis_target): New decl.
* libgdiagnostics.map (LIBGDIAGNOSTICS_ABI_2): New.
* libsarifreplay.cc (sarif_replayer::handle_artifact_obj): Looks
for "analysisTarget" in roles and call set_analysis_target using
the artifact if found.
* opts-diagnostic.cc: Refactor, moving material to
diagnostic-output-spec.cc.
(struct opt_spec_context): New.
(handle_OPT_fdiagnostics_add_output_): Use opt_spec_context.
(handle_OPT_fdiagnostics_set_output_): Likewise.
* sarif-replay.cc: Define INCLUDE_STRING.
(struct options): Add m_extra_output_specs.
(usage_msg): Add -fdiagnostics-add-output=SCHEME.
(str_starts_with): New.
(parse_options): Add -fdiagnostics-add-output=SCHEME.
(main): Likewise.
* selftest-run-tests.cc (selftest::run_tests): Call
diagnostic_output_spec_cc_tests rather than
opts_diagnostic_cc_tests.
* selftest.h (selftest::diagnostic_output_spec_cc_tests):
Replace...
(selftest::opts_diagnostic_cc_tests): ...this.

gcc/testsuite/ChangeLog:
PR other/116792
PR testsuite/116163
PR sarif-replay/120792
  

[gcc r16-1644] x86: Extend the remove_redundant_vector pass

2025-06-23 Thread H.J. Lu via Gcc-cvs
https://gcc.gnu.org/g:aba3b9d3a48a0703fd565f7c5f0caf604f59970b

commit r16-1644-gaba3b9d3a48a0703fd565f7c5f0caf604f59970b
Author: H.J. Lu 
Date:   Fri May 9 07:17:07 2025 +0800

x86: Extend the remove_redundant_vector pass

Extend the remove_redundant_vector pass to handle vector broadcasts from
constant and variable scalars.  When broadcasting from constants and
function arguments, we can place a single widest vector broadcast at
entry of the nearest common dominator for basic blocks with all uses
since constants and function arguments aren't changed.  For broadcast
from variables with a single definition, the single definition is
replaced with the widest broadcast.

gcc/

PR target/92080
* config/i386/i386-expand.cc (ix86_expand_call): Set
recursive_function to true for recursive call.
* config/i386/i386-features.cc (ix86_place_single_vector_set):
Add an argument for inner scalar, default to nullptr.  Set the
source from inner scalar if not nullptr.
(ix86_get_vector_load_mode): Renamed to ...
(ix86_get_vector_cse_mode): This.  Add an argument for scalar mode
and handle integer and float scalar modes.
(replace_vector_const): Add an argument for scalar mode and pass
it to ix86_get_vector_load_mode.
(x86_cse_kind): New.
(redundant_load): Likewise.
(ix86_broadcast_inner): Likewise.
(remove_redundant_vector_load): Also support const0_rtx and
constm1_rtx broadcasts.  Handle vector broadcasts from constant
and variable scalars.
* config/i386/i386.h (machine_function): Add recursive_function.

gcc/testsuite/

* gcc.target/i386/keylocker-aesdecwide128kl.c: Updated to expect
movdqa instead pxor.
* gcc.target/i386/keylocker-aesdecwide256kl.c: Likewise.
* gcc.target/i386/keylocker-aesencwide128kl.c: Likewise.
* gcc.target/i386/keylocker-aesencwide256kl.c: Likewise.
* gcc.target/i386/pr92080-4.c: New test.
* gcc.target/i386/pr92080-5.c: Likewise.
* gcc.target/i386/pr92080-6.c: Likewise.
* gcc.target/i386/pr92080-7.c: Likewise.
* gcc.target/i386/pr92080-8.c: Likewise.
* gcc.target/i386/pr92080-9.c: Likewise.
* gcc.target/i386/pr92080-10.c: Likewise.
* gcc.target/i386/pr92080-11.c: Likewise.
* gcc.target/i386/pr92080-12.c: Likewise.
* gcc.target/i386/pr92080-13.c: Likewise.
* gcc.target/i386/pr92080-14.c: Likewise.
* gcc.target/i386/pr92080-15.c: Likewise.
* gcc.target/i386/pr92080-16.c: Likewise.
* gcc.target/i386/pr92080-17.c: Likewise.
* gcc.target/i386/pr92080-18.c: Likewise.
* gcc.target/i386/pr92080-19.c: Likewise.
* gcc.target/i386/pr92080-20.c: Likewise.

Signed-off-by: H.J. Lu 

Diff:
---
 gcc/config/i386/i386-expand.cc |   3 +
 gcc/config/i386/i386-features.cc   | 427 -
 gcc/config/i386/i386.h |   3 +
 .../gcc.target/i386/keylocker-aesdecwide128kl.c|  14 +-
 .../gcc.target/i386/keylocker-aesdecwide256kl.c|  14 +-
 .../gcc.target/i386/keylocker-aesencwide128kl.c|  14 +-
 .../gcc.target/i386/keylocker-aesencwide256kl.c|  14 +-
 gcc/testsuite/gcc.target/i386/pr92080-10.c |  13 +
 gcc/testsuite/gcc.target/i386/pr92080-11.c |  33 ++
 gcc/testsuite/gcc.target/i386/pr92080-12.c |  16 +
 gcc/testsuite/gcc.target/i386/pr92080-13.c |  32 ++
 gcc/testsuite/gcc.target/i386/pr92080-14.c |  31 ++
 gcc/testsuite/gcc.target/i386/pr92080-15.c |  25 ++
 gcc/testsuite/gcc.target/i386/pr92080-16.c |  26 ++
 gcc/testsuite/gcc.target/i386/pr92080-17.c |  40 ++
 gcc/testsuite/gcc.target/i386/pr92080-18.c |  19 +
 gcc/testsuite/gcc.target/i386/pr92080-19.c |  20 +
 gcc/testsuite/gcc.target/i386/pr92080-20.c |  20 +
 gcc/testsuite/gcc.target/i386/pr92080-4.c  |  50 +++
 gcc/testsuite/gcc.target/i386/pr92080-5.c  | 109 ++
 gcc/testsuite/gcc.target/i386/pr92080-6.c  |  19 +
 gcc/testsuite/gcc.target/i386/pr92080-7.c  |  20 +
 gcc/testsuite/gcc.target/i386/pr92080-8.c  |  16 +
 gcc/testsuite/gcc.target/i386/pr92080-9.c  |  81 
 24 files changed, 939 insertions(+), 120 deletions(-)

diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index 423fc632003d..8e556f1b9c18 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -10141,6 +10141,9 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
  else if (lookup_attribute ("no_callee_saved_registers",
 TYPE_ATTRIB

[gcc r16-1645] middle-end: replace log_vf usages with vf to allow support for non-power of two vf

2025-06-23 Thread Tamar Christina via Gcc-cvs
https://gcc.gnu.org/g:309dbcea2cabb31bde1a65cdfd30bb7f87b170a2

commit r16-1645-g309dbcea2cabb31bde1a65cdfd30bb7f87b170a2
Author: Tamar Christina 
Date:   Tue Jun 24 07:13:22 2025 +0100

middle-end: replace log_vf usages with vf to allow support for non-power of 
two vf

This patch fixes a bug where the current code assumed that exact_log2 
returns
NULL on failure, but it instead returns -1.  So there are some cases where 
the
right shift could shift out the entire value.

Secondly it also removes the requirement that VF be a power of two.  With an
uneven unroll factor we can easily end up with a non-power of two VF which 
SLP
can handle. This replaces shifts with multiplication and division.

The 32-bit x86 testcase from PR64110 was always wrong, it used to match by 
pure
coincidence a vmovd inside the vector loop.  What it intended to match was 
that
the argument to the function isn't spilled and then reloaded from the stack 
for
no reason.

But on 32-bit x86 all arguments are passed on the stack anyway and so the 
match
would have never worked.  The patch seems to simplify the loop preheader 
which
gets it to remove an intermediate zero extend which causes the match to now
properly fail.

As such I'm skipping the test on 32-bit x86.

gcc/ChangeLog:

* tree-vect-loop-manip.cc (vect_gen_vector_loop_niters,
vect_gen_vector_loop_niters_mult_vf): Remove uses of log_vf.

gcc/testsuite/ChangeLog:

* gcc.target/i386/pr64110.c: Update testcase.

Diff:
---
 gcc/testsuite/gcc.target/i386/pr64110.c |  2 +-
 gcc/tree-vect-loop-manip.cc | 36 +
 2 files changed, 20 insertions(+), 18 deletions(-)

diff --git a/gcc/testsuite/gcc.target/i386/pr64110.c 
b/gcc/testsuite/gcc.target/i386/pr64110.c
index 99e391916cb7..11a6929835f4 100644
--- a/gcc/testsuite/gcc.target/i386/pr64110.c
+++ b/gcc/testsuite/gcc.target/i386/pr64110.c
@@ -1,6 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-O3 -march=core-avx2" } */
-/* { dg-final { scan-assembler "vmovd\[\\t \]" } } */
+/* { dg-final { scan-assembler "vmovd\[\\t \]" { target { ! ilp32 } } } } */
 
 int foo (void);
 int a;
diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc
index 20dc0e556527..469694377499 100644
--- a/gcc/tree-vect-loop-manip.cc
+++ b/gcc/tree-vect-loop-manip.cc
@@ -2794,7 +2794,6 @@ vect_gen_vector_loop_niters (loop_vec_info loop_vinfo, 
tree niters,
   tree niters_vector, step_vector, type = TREE_TYPE (niters);
   poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
   edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
-  tree log_vf = NULL_TREE;
 
   /* If epilogue loop is required because of data accesses with gaps, we
  subtract one iteration from the total number of iterations here for
@@ -2820,22 +2819,25 @@ vect_gen_vector_loop_niters (loop_vec_info loop_vinfo, 
tree niters,
   if (vf.is_constant (&const_vf)
   && !LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo))
 {
-  /* Create: niters >> log2(vf) */
+  /* Create: niters / vf, which is equivalent to niters >> log2(vf) when
+vf is a power of two, and when not we approximate using a
+truncating division.  */
   /* If it's known that niters == number of latch executions + 1 doesn't
-overflow, we can generate niters >> log2(vf); otherwise we generate
-(niters - vf) >> log2(vf) + 1 by using the fact that we know ratio
+overflow, we can generate niters / vf; otherwise we generate
+(niters - vf) / vf + 1 by using the fact that we know ratio
 will be at least one.  */
-  log_vf = build_int_cst (type, exact_log2 (const_vf));
+  tree var_vf = build_int_cst (type, const_vf);
   if (niters_no_overflow)
-   niters_vector = fold_build2 (RSHIFT_EXPR, type, ni_minus_gap, log_vf);
+   niters_vector = fold_build2 (TRUNC_DIV_EXPR, type, ni_minus_gap,
+var_vf);
   else
niters_vector
  = fold_build2 (PLUS_EXPR, type,
-fold_build2 (RSHIFT_EXPR, type,
+fold_build2 (TRUNC_DIV_EXPR, type,
  fold_build2 (MINUS_EXPR, type,
   ni_minus_gap,
-  build_int_cst (type, vf)),
- log_vf),
+  var_vf),
+ var_vf),
 build_int_cst (type, 1));
   step_vector = build_one_cst (type);
 }
@@ -2854,16 +2856,17 @@ vect_gen_vector_loop_niters (loop_vec_info loop_vinfo, 
tree niters,
   /* Peeling algorithm guarantees that vector loop bound is at least ONE,
 we set range information to make niters analyzer's life easier.
 N

[gcc r16-1646] middle-end: Apply loop->unroll directly in vectorizer

2025-06-23 Thread Tamar Christina via Gcc-cvs
https://gcc.gnu.org/g:7f87bfa4a7302ce663db51fb073a40045052cc11

commit r16-1646-g7f87bfa4a7302ce663db51fb073a40045052cc11
Author: Tamar Christina 
Date:   Tue Jun 24 07:14:27 2025 +0100

middle-end: Apply loop->unroll directly in vectorizer

Consider the loop

void f1 (int *restrict a, int n)
{
#pragma GCC unroll 4 requested
  for (int i = 0; i < n; i++)
a[i] *= 2;
}

Which today is vectorized and then unrolled 3x by the RTL unroller due to 
the
use of the pragma.  This is unfortunate because the pragma was intended for 
the
scalar loop but we end up with an unrolled vector loop and a longer path to 
the
entry which has a low enough VF requirement to enter.

This patch instead seeds the suggested_unroll_factor with the value the user
requested and instead uses it to maintain the total VF that the user wanted 
the
scalar loop to maintain.

In effect it applies the unrolling inside the vector loop itself.  This has 
the
benefits for things like reductions, as it allows us to split the 
accumulator
and so the unrolled loop is more efficient.  For early-break it allows the
cbranch call to be shared between the unrolled elements, giving you more
effective unrolling because it doesn't need the repeated cbranch which can 
be
expensive.

The target can then choose to create multiple epilogues to deal with the 
"rest".

The example above now generates:

.L4:
ldr q31, [x2]
add v31.4s, v31.4s, v31.4s
str q31, [x2], 16
cmp x2, x3
bne .L4

as V4SI maintains the requested VF, but e.g. pragma unroll 8 generates:

.L4:
ldp q30, q31, [x2]
add v30.4s, v30.4s, v30.4s
add v31.4s, v31.4s, v31.4s
stp q30, q31, [x2], 32
cmp x3, x2
bne .L4

gcc/ChangeLog:

* doc/extend.texi: Document pragma unroll interaction with 
vectorizer.
* tree-vectorizer.h (LOOP_VINFO_USER_UNROLL): New.
(class _loop_vec_info): Add user_unroll.
* tree-vect-loop.cc (vect_analyze_loop_1): Set
suggested_unroll_factor and retry.
(_loop_vec_info::_loop_vec_info): Initialize user_unroll.
(vect_transform_loop): Clear the loop->unroll value if the pragma 
was
used.

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/unroll-vect.c: New test.

Diff:
---
 gcc/doc/extend.texi|  5 ++
 gcc/testsuite/gcc.target/aarch64/unroll-vect.c | 20 
 gcc/tree-vect-loop.cc  | 63 +++---
 gcc/tree-vectorizer.h  |  5 ++
 4 files changed, 77 insertions(+), 16 deletions(-)

diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 69c651207464..7da99f77ec82 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -10382,6 +10382,11 @@ loop or a @code{#pragma GCC ivdep}, and applies only 
to the loop that follows.
 @var{n} is an integer constant expression specifying the unrolling factor.
 The values of @math{0} and @math{1} block any unrolling of the loop.
 
+If the loop was vectorized the unroll factor specified will be used to seed the
+vectorizer unroll factor.  Whether the loop is unrolled or not will be
+determined by target costing.  The resulting vectorized loop may still be
+unrolled more in later passes depending on the target costing.
+
 @end table
 
 @node Thread-Local
diff --git a/gcc/testsuite/gcc.target/aarch64/unroll-vect.c 
b/gcc/testsuite/gcc.target/aarch64/unroll-vect.c
new file mode 100644
index ..3cb774ba9578
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/unroll-vect.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O3 -march=armv8-a --param 
aarch64-autovec-preference=asimd-only -std=gnu99" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+/*
+** f1:
+** ...
+** add v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s
+** add v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s
+** add v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s
+** add v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s
+** ...
+*/
+void f1 (int *restrict a, int n)
+{
+#pragma GCC unroll 16
+  for (int i = 0; i < n; i++)
+a[i] *= 2;
+}
+
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index eb2eb8b1fc08..9ee8e50ee75a 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -1074,6 +1074,7 @@ _loop_vec_info::_loop_vec_info (class loop *loop_in, 
vec_info_shared *shared)
 peeling_for_gaps (false),
 peeling_for_niter (false),
 early_breaks (false),
+user_unroll (false),
 no_data_dependencies (false),
 has_mask_store (false),
 scalar_loop_scaling (profile_probability::uninitialized ()),
@@ -3429,27 +3430,50 @@ vect_analyze_loop_1 (class loop *loop, vec_info_shared 

[gcc r16-1632] contrib: handle GDB's 'unexpected core files' count

2025-06-23 Thread Andrew Burgess via Gcc-cvs
https://gcc.gnu.org/g:4e9104ae5455a3c02c2a7e07f52e6bc574cc761d

commit r16-1632-g4e9104ae5455a3c02c2a7e07f52e6bc574cc761d
Author: Andrew Burgess 
Date:   Mon Jun 23 16:17:19 2025 +0100

contrib: handle GDB's 'unexpected core files' count

This commit is for the benefit of GDB, but as the binutils-gdb
repository shares the contrib/ directory with gcc, this commit must
first be applied to gcc then copied back to binutils-gdb.

This commit extends the two scripts contrib/dg-extract-results.{py,sh}
to handle GDB's 'unexpected core files' count.  This test result type
should never appear in GCC, or any other tool that shares the contrib/
directory, so this change should be harmless for others.

The 'unexpected core files' count was added to GDB's results by this
series:

  
https://inbox.sourceware.org/gdb-patches/20220623183053.172430-1-pe...@palves.net

this count is added to the gdb.sum file after all the tests have run,
and counts up any core.* files that have appeared.

GDB also has a make-check-all.sh script which runs a test with all the
different board files that GDB supports.  After each test is run the
'unexpected core files' count will be added to that board's results.

I'm now trying to use the dg-extract-results.* scripts to merge the
results from all the different board files, and the 'unexpected core
files' count is confusing these scripts.

contrib/ChangeLog:

* dg-extract-results.py: Handle GDB's unexpected core file count.
* dg-extract-results.sh: Likewise.

Diff:
---
 contrib/dg-extract-results.py | 3 ++-
 contrib/dg-extract-results.sh | 8 ++--
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/contrib/dg-extract-results.py b/contrib/dg-extract-results.py
index f539275ba03c..c5bfbcaa0202 100644
--- a/contrib/dg-extract-results.py
+++ b/contrib/dg-extract-results.py
@@ -146,7 +146,8 @@ class Prog:
 '# of unresolved testcases\t',
 '# of unsupported tests\t\t',
 '# of paths in test names\t',
-'# of duplicate test names\t'
+'# of duplicate test names\t',
+'# of unexpected core files\t'
 ]
 self.runs = dict()
 
diff --git a/contrib/dg-extract-results.sh b/contrib/dg-extract-results.sh
index c2f760498da4..d64ba2558388 100755
--- a/contrib/dg-extract-results.sh
+++ b/contrib/dg-extract-results.sh
@@ -403,7 +403,7 @@ BEGIN {
   variant="$VAR"
   tool="$TOOL"
   passcnt=0; failcnt=0; untstcnt=0; xpasscnt=0; xfailcnt=0; kpasscnt=0; 
kfailcnt=0; unsupcnt=0; unrescnt=0; dgerrorcnt=0;
-  pathcnt=0; dupcnt=0
+  pathcnt=0; dupcnt=0; corecnt=0
   curvar=""; insummary=0
 }
 /^Running target / { curvar = \$3; next }
@@ -420,6 +420,7 @@ BEGIN {
 /^# of unsupported tests/  { if (insummary == 1) unsupcnt += \$5; next; }
 /^# of paths in test names/{ if (insummary == 1) pathcnt += \$7; next; }
 /^# of duplicate test names/   { if (insummary == 1) dupcnt += \$6; next; }
+/^# of unexpected core files/  { if (insummary == 1) corecnt += \$6; next; }
 /^$/   { if (insummary == 1)
{ insummary = 0; curvar = "" }
  next
@@ -439,6 +440,7 @@ END {
   if (unsupcnt != 0) printf ("# of unsupported tests\t\t%d\n", unsupcnt)
   if (pathcnt != 0) printf ("# of paths in test names\t%d\n", pathcnt)
   if (dupcnt != 0) printf ("# of duplicate test names\t%d\n", dupcnt)
+  if (corecnt != 0) printf ("# of unexpected core files\t%d\n", corecnt)
 }
 EOF
 
@@ -460,7 +462,7 @@ cat << EOF > $TOTAL_AWK
 BEGIN {
   tool="$TOOL"
   passcnt=0; failcnt=0; untstcnt=0; xpasscnt=0; xfailcnt=0; kfailcnt=0; 
unsupcnt=0; unrescnt=0; dgerrorcnt=0
-  pathcnt=0; dupcnt=0
+  pathcnt=0; dupcnt=0; corecnt=0
 }
 /^# of DejaGnu errors/ { dgerrorcnt += \$5 }
 /^# of expected passes/{ passcnt += \$5 }
@@ -474,6 +476,7 @@ BEGIN {
 /^# of unsupported tests/  { unsupcnt += \$5 }
 /^# of paths in test names/{ pathcnt += \$7 }
 /^# of duplicate test names/   { dupcnt += \$6 }
+/^# of unexpected core files/  { corecnt += \$6 }
 END {
   printf ("\n\t\t=== %s Summary ===\n\n", tool)
   if (dgerrorcnt != 0) printf ("# of DejaGnu errors\t\t%d\n", dgerrorcnt)
@@ -488,6 +491,7 @@ END {
   if (unsupcnt != 0) printf ("# of unsupported tests\t\t%d\n", unsupcnt)
   if (pathcnt != 0) printf ("# of paths in test names\t%d\n", pathcnt)
   if (dupcnt != 0) printf ("# of duplicate test names\t%d\n", dupcnt)
+  if (corecnt != 0) printf ("# of unexpected core files\t%d\n", corecnt)
 }
 EOF


[gcc(refs/users/aoliva/heads/testme)] [lra] catch all to-sp eliminations

2025-06-23 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:62b20bad36bd3fcb34d6c2cea71ee10abd686e08

commit 62b20bad36bd3fcb34d6c2cea71ee10abd686e08
Author: Alexandre Oliva 
Date:   Sun Jun 22 17:34:54 2025 -0300

[lra] catch all to-sp eliminations

An x86_64-linux-gnu native with ix86_frame_pointer_required modified
to return true for nonzero frames, to exercize
lra_update_fp2sp_elimination, reveals in stage1 testing that wrong
code is generated for gcc.c-torture/execute/ieee/fp-cmp-8l.c:
argp-to-sp eliminations are used for one_test to pass its arguments on
to *pos, and the sp offsets survive the disabling of that elimination.

We didn't really have to disable that elimination, but the backend
disables eliminations to sp if frame_pointer_needed.

The workaround for this scenario is to compile with
-maccumulate-outgoing-args.

This change extends the catching of fp2sp eliminations to all (?)
eliminations to sp, since none of them can be properly reversed and
would silently lead to wrong code.  This is probably too strict.


for  gcc/ChangeLog

PR rtl-optimization/120424
* lra-eliminations.cc (elimination_2sp_occurred_p): Rename
from...
(elimination_fp2sp_occured_p): ... this.  Adjust all uses.
(lra_eliminate_regs_1): Don't require a from-frame-pointer
elimination to set it.
(update_reg_eliminate): Likewise to test it.

Diff:
---
 gcc/lra-eliminations.cc | 46 +-
 1 file changed, 25 insertions(+), 21 deletions(-)

diff --git a/gcc/lra-eliminations.cc b/gcc/lra-eliminations.cc
index 9cdd0c5ff53a..341587f21f2e 100644
--- a/gcc/lra-eliminations.cc
+++ b/gcc/lra-eliminations.cc
@@ -309,8 +309,18 @@ move_plus_up (rtx x)
   return x;
 }
 
-/* Flag that we already did frame pointer to stack pointer elimination.  */
-static bool elimination_fp2sp_occured_p = false;
+/* Flag that we already applied nonzero stack pointer elimination
+   offset; such sp updates cannot currently be undone.  */
+static bool elimination_2sp_occurred_p = false;
+
+/* Take note of any nonzero sp-OFFSET used in eliminations to sp.  */
+static inline poly_int64
+note_spoff (poly_int64 offset)
+{
+  if (maybe_ne (offset))
+elimination_2sp_occurred_p = true;
+  return offset;
+}
 
 /* Scan X and replace any eliminable registers (such as fp) with a
replacement (such as sp) if SUBST_P, plus an offset.  The offset is
@@ -369,13 +379,10 @@ lra_eliminate_regs_1 (rtx_insn *insn, rtx x, machine_mode 
mem_mode,
{
  rtx to = subst_p ? ep->to_rtx : ep->from_rtx;
 
- if (ep->to_rtx == stack_pointer_rtx && ep->from == 
FRAME_POINTER_REGNUM)
-   elimination_fp2sp_occured_p = true;
-
  if (maybe_ne (update_sp_offset, 0))
{
  if (ep->to_rtx == stack_pointer_rtx)
-   return plus_constant (Pmode, to, update_sp_offset);
+   return plus_constant (Pmode, to, note_spoff (update_sp_offset));
  return to;
}
  else if (update_p)
@@ -385,7 +392,8 @@ lra_eliminate_regs_1 (rtx_insn *insn, rtx x, machine_mode 
mem_mode,
  ep->offset
  - (insn != NULL_RTX
 && ep->to_rtx == stack_pointer_rtx
-? lra_get_insn_recog_data (insn)->sp_offset
+? note_spoff (lra_get_insn_recog_data
+  (insn)->sp_offset)
 : 0));
  else
return to;
@@ -402,19 +410,18 @@ lra_eliminate_regs_1 (rtx_insn *insn, rtx x, machine_mode 
mem_mode,
  poly_int64 offset, curr_offset;
  rtx to = subst_p ? ep->to_rtx : ep->from_rtx;
 
- if (ep->to_rtx == stack_pointer_rtx && ep->from == 
FRAME_POINTER_REGNUM)
-   elimination_fp2sp_occured_p = true;
-
  if (! update_p && ! full_p)
return simplify_gen_binary (PLUS, Pmode, to, XEXP (x, 1));
 
  if (maybe_ne (update_sp_offset, 0))
-   offset = ep->to_rtx == stack_pointer_rtx ? update_sp_offset : 0;
+   offset = (ep->to_rtx == stack_pointer_rtx
+ ? note_spoff (update_sp_offset)
+ : 0);
  else
offset = (update_p
  ? ep->offset - ep->previous_offset : ep->offset);
  if (full_p && insn != NULL_RTX && ep->to_rtx == stack_pointer_rtx)
-   offset -= lra_get_insn_recog_data (insn)->sp_offset;
+   offset -= note_spoff (lra_get_insn_recog_data 
(insn)->sp_offset);
  if (poly_int_rtx_p (XEXP (x, 1), &curr_offset)
  && known_eq (curr_offset, -offset))
return to;
@@ -465,15 +472,13 @@ lra_eliminate_regs_1 (rtx_insn *insn, r

[gcc/aoliva/heads/testme] [lra] catch all to-sp eliminations

2025-06-23 Thread Alexandre Oliva via Gcc-cvs
The branch 'aoliva/heads/testme' was updated to point to:

 62b20bad36bd... [lra] catch all to-sp eliminations

It previously pointed to:

 87076bd78202... [lra] catch all to-sp eliminations

Diff:

!!! WARNING: THE FOLLOWING COMMITS ARE NO LONGER ACCESSIBLE (LOST):
---

  87076bd... [lra] catch all to-sp eliminations


Summary of changes (added commits):
---

  62b20ba... [lra] catch all to-sp eliminations