[gcc r15-858] Fix SLP reduction neutral op value for pointer reductions

2024-05-28 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:65aa46ffc3b06bba3d49b9b8315610c706a1215b

commit r15-858-g65aa46ffc3b06bba3d49b9b8315610c706a1215b
Author: Richard Biener 
Date:   Mon May 27 11:38:11 2024 +0200

Fix SLP reduction neutral op value for pointer reductions

When the neutral op is the initial value we might need to convert
it from pointer to integer.

* tree-vect-loop.cc (get_initial_defs_for_reduction): Convert
neutral op to the vector component type.

Diff:
---
 gcc/tree-vect-loop.cc | 9 -
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 83c0544b6aa..3b94bb13a8b 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -5616,7 +5616,14 @@ get_initial_defs_for_reduction (loop_vec_info loop_vinfo,
   /* Get the def before the loop.  In reduction chain we have only
 one initial value.  Else we have as many as PHIs in the group.  */
   if (i >= initial_values.length () || (j > i && neutral_op))
-   op = neutral_op;
+   {
+ if (!useless_type_conversion_p (TREE_TYPE (vector_type),
+ TREE_TYPE (neutral_op)))
+   neutral_op = gimple_convert (&ctor_seq,
+TREE_TYPE (vector_type),
+neutral_op);
+ op = neutral_op;
+   }
   else
{
  if (!useless_type_conversion_p (TREE_TYPE (vector_type),


[gcc r14-10250] libstdc++: Guard use of sized deallocation [PR114940]

2024-05-28 Thread Jonathan Wakely via Libstdc++-cvs
https://gcc.gnu.org/g:89dff1488ef3fde11f6451e5f9817e14bcd6a873

commit r14-10250-g89dff1488ef3fde11f6451e5f9817e14bcd6a873
Author: Jonathan Wakely 
Date:   Wed May 22 10:32:43 2024 +0100

libstdc++: Guard use of sized deallocation [PR114940]

Clang does not enable -fsized-deallocation by default, which means it
can't compile our  and  headers.

Make the __cpp_lib_generator macro depend on the compiler-defined
__cpp_sized_deallocation macro, and change  to use unsized
deallocation when __cpp_sized_deallocation isn't defined.

libstdc++-v3/ChangeLog:

PR libstdc++/114940
* include/bits/version.def (generator): Depend on
__cpp_sized_deallocation.
* include/bits/version.h: Regenerate.
* include/std/stacktrace (_GLIBCXX_SIZED_DELETE): New macro.
(basic_stacktrace::_Impl::_M_deallocate): Use it.

(cherry picked from commit b2fdd508d7e63158e9d2a6dd04f901d02900def3)

Diff:
---
 libstdc++-v3/include/bits/version.def |  2 +-
 libstdc++-v3/include/bits/version.h   |  2 +-
 libstdc++-v3/include/std/stacktrace   | 13 +++--
 3 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/libstdc++-v3/include/bits/version.def 
b/libstdc++-v3/include/bits/version.def
index 5c0477fb61e..e5f4c4c13c0 100644
--- a/libstdc++-v3/include/bits/version.def
+++ b/libstdc++-v3/include/bits/version.def
@@ -1642,7 +1642,7 @@ ftms = {
   values = {
 v = 202207;
 cxxmin = 23;
-extra_cond = "__glibcxx_coroutine";
+extra_cond = "__glibcxx_coroutine && __cpp_sized_deallocation";
   };
 };
 
diff --git a/libstdc++-v3/include/bits/version.h 
b/libstdc++-v3/include/bits/version.h
index 65e708c73fb..ad418d46664 100644
--- a/libstdc++-v3/include/bits/version.h
+++ b/libstdc++-v3/include/bits/version.h
@@ -1824,7 +1824,7 @@
 #undef __glibcxx_want_forward_like
 
 #if !defined(__cpp_lib_generator)
-# if (__cplusplus >= 202100L) && (__glibcxx_coroutine)
+# if (__cplusplus >= 202100L) && (__glibcxx_coroutine && 
__cpp_sized_deallocation)
 #  define __glibcxx_generator 202207L
 #  if defined(__glibcxx_want_all) || defined(__glibcxx_want_generator)
 #   define __cpp_lib_generator 202207L
diff --git a/libstdc++-v3/include/std/stacktrace 
b/libstdc++-v3/include/std/stacktrace
index d217d63af3b..962dbed7a41 100644
--- a/libstdc++-v3/include/std/stacktrace
+++ b/libstdc++-v3/include/std/stacktrace
@@ -551,6 +551,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 #else
 # define _GLIBCXX_OPERATOR_NEW ::operator new
 # define _GLIBCXX_OPERATOR_DELETE ::operator delete
+#endif
+
+#if __cpp_sized_deallocation
+# define _GLIBCXX_SIZED_DELETE(T, p, n) \
+  _GLIBCXX_OPERATOR_DELETE((p), (n) * sizeof(T))
+#else
+# define _GLIBCXX_SIZED_DELETE(T, p, n) _GLIBCXX_OPERATOR_DELETE(p)
 #endif
 
// Precondition: _M_frames == nullptr && __n != 0
@@ -592,8 +599,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
  if (_M_capacity)
{
  if constexpr (is_same_v>)
-   _GLIBCXX_OPERATOR_DELETE (static_cast(_M_frames),
- _M_capacity * sizeof(value_type));
+   _GLIBCXX_SIZED_DELETE(value_type,
+ static_cast(_M_frames),
+ _M_capacity);
  else
__alloc.deallocate(_M_frames, _M_capacity);
  _M_frames = nullptr;
@@ -601,6 +609,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
}
}
 
+#undef _GLIBCXX_SIZED_DELETE
 #undef _GLIBCXX_OPERATOR_DELETE
 #undef _GLIBCXX_OPERATOR_NEW


[gcc r15-859] tree-optimization/115254 - don't account single-lane SLP against discovery limit

2024-05-28 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:eaaa4b88038d4d6eda1b20ab662f1568fd9be31f

commit r15-859-geaaa4b88038d4d6eda1b20ab662f1568fd9be31f
Author: Richard Biener 
Date:   Fri Sep 29 15:12:54 2023 +0200

tree-optimization/115254 - don't account single-lane SLP against discovery 
limit

The following avoids accounting single-lane SLP to the discovery
limit.  As the two testcases show this makes discovery fail,
unfortunately even not the same across targets.  The following
should fix two FAILs for GCN as a side-effect.

PR tree-optimization/115254
* tree-vect-slp.cc (vect_build_slp_tree): Only account
multi-lane SLP to limit.

* gcc.dg/vect/slp-cond-2-big-array.c: Expect 4 times SLP.
* gcc.dg/vect/slp-cond-2.c: Likewise.

Diff:
---
 gcc/testsuite/gcc.dg/vect/slp-cond-2-big-array.c |  2 +-
 gcc/testsuite/gcc.dg/vect/slp-cond-2.c   |  2 +-
 gcc/tree-vect-slp.cc | 31 ++--
 3 files changed, 20 insertions(+), 15 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/vect/slp-cond-2-big-array.c 
b/gcc/testsuite/gcc.dg/vect/slp-cond-2-big-array.c
index cb7eb94b3a3..9a9f63c0b8d 100644
--- a/gcc/testsuite/gcc.dg/vect/slp-cond-2-big-array.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-cond-2-big-array.c
@@ -128,4 +128,4 @@ main ()
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" } 
} */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 4 "vect" } 
} */
diff --git a/gcc/testsuite/gcc.dg/vect/slp-cond-2.c 
b/gcc/testsuite/gcc.dg/vect/slp-cond-2.c
index 1dcee46cd95..08bbb3dbec6 100644
--- a/gcc/testsuite/gcc.dg/vect/slp-cond-2.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-cond-2.c
@@ -128,4 +128,4 @@ main ()
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" } 
} */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 4 "vect" } 
} */
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index c7ed520b629..7a963e28063 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -1725,21 +1725,26 @@ vect_build_slp_tree (vec_info *vinfo,
   SLP_TREE_SCALAR_STMTS (res) = stmts;
   bst_map->put (stmts.copy (), res);
 
-  if (*limit == 0)
+  /* Single-lane SLP doesn't have the chance of run-away, do not account
+ it to the limit.  */
+  if (stmts.length () > 1)
 {
-  if (dump_enabled_p ())
-   dump_printf_loc (MSG_NOTE, vect_location,
-"SLP discovery limit exceeded\n");
-  /* Mark the node invalid so we can detect those when still in use
-as backedge destinations.  */
-  SLP_TREE_SCALAR_STMTS (res) = vNULL;
-  SLP_TREE_DEF_TYPE (res) = vect_uninitialized_def;
-  res->failed = XNEWVEC (bool, group_size);
-  memset (res->failed, 0, sizeof (bool) * group_size);
-  memset (matches, 0, sizeof (bool) * group_size);
-  return NULL;
+  if (*limit == 0)
+   {
+ if (dump_enabled_p ())
+   dump_printf_loc (MSG_NOTE, vect_location,
+"SLP discovery limit exceeded\n");
+ /* Mark the node invalid so we can detect those when still in use
+as backedge destinations.  */
+ SLP_TREE_SCALAR_STMTS (res) = vNULL;
+ SLP_TREE_DEF_TYPE (res) = vect_uninitialized_def;
+ res->failed = XNEWVEC (bool, group_size);
+ memset (res->failed, 0, sizeof (bool) * group_size);
+ memset (matches, 0, sizeof (bool) * group_size);
+ return NULL;
+   }
+  --*limit;
 }
-  --*limit;
 
   if (dump_enabled_p ())
 dump_printf_loc (MSG_NOTE, vect_location,


[gcc r13-8804] libstdc++: Guard use of sized deallocation [PR114940]

2024-05-28 Thread Jonathan Wakely via Gcc-cvs
https://gcc.gnu.org/g:67434fec24bef0faeec0eb402f82ca7e43a4a112

commit r13-8804-g67434fec24bef0faeec0eb402f82ca7e43a4a112
Author: Jonathan Wakely 
Date:   Wed May 22 10:32:43 2024 +0100

libstdc++: Guard use of sized deallocation [PR114940]

Clang does not enable -fsized-deallocation by default, which means it
can't compile our  header.

Make the __cpp_lib_generator macro depend on the compiler-defined
__cpp_sized_deallocation macro, and change  to use unsized
deallocation when __cpp_sized_deallocation isn't defined.

libstdc++-v3/ChangeLog:

PR libstdc++/114940
* include/std/stacktrace (_GLIBCXX_SIZED_DELETE): New macro.
(basic_stacktrace::_Impl::_M_deallocate): Use it.

(cherry picked from commit b2fdd508d7e63158e9d2a6dd04f901d02900def3)

Diff:
---
 libstdc++-v3/include/std/stacktrace | 13 +++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/libstdc++-v3/include/std/stacktrace 
b/libstdc++-v3/include/std/stacktrace
index 8f09467d751..3d8a085a6a9 100644
--- a/libstdc++-v3/include/std/stacktrace
+++ b/libstdc++-v3/include/std/stacktrace
@@ -600,6 +600,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 #else
 # define _GLIBCXX_OPERATOR_NEW ::operator new
 # define _GLIBCXX_OPERATOR_DELETE ::operator delete
+#endif
+
+#if __cpp_sized_deallocation
+# define _GLIBCXX_SIZED_DELETE(T, p, n) \
+  _GLIBCXX_OPERATOR_DELETE((p), (n) * sizeof(T))
+#else
+# define _GLIBCXX_SIZED_DELETE(T, p, n) _GLIBCXX_OPERATOR_DELETE(p)
 #endif
 
// Precondition: _M_frames == nullptr && __n != 0
@@ -641,8 +648,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
  if (_M_capacity)
{
  if constexpr (is_same_v>)
-   _GLIBCXX_OPERATOR_DELETE (static_cast(_M_frames),
- _M_capacity * sizeof(value_type));
+   _GLIBCXX_SIZED_DELETE(value_type,
+ static_cast(_M_frames),
+ _M_capacity);
  else
__alloc.deallocate(_M_frames, _M_capacity);
  _M_frames = nullptr;
@@ -650,6 +658,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
}
}
 
+#undef _GLIBCXX_SIZED_DELETE
 #undef _GLIBCXX_OPERATOR_DELETE
 #undef _GLIBCXX_OPERATOR_NEW


[gcc r15-860] Avoid pessimistic constraints for asm memory constraints

2024-05-28 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:19cc6120087619b496732b249b48b1fbe27e

commit r15-860-g19cc6120087619b496732b249b48b1fbe27e
Author: Richard Biener 
Date:   Tue May 28 13:29:30 2024 +0200

Avoid pessimistic constraints for asm memory constraints

We process asm memory input/outputs with constraints to ESCAPED
but for this temporarily build an ADDR_EXPR.  The issue is that
the used build_fold_addr_expr ends up wrapping the ADDR_EXPR in
a conversion which ends up producing &ANYTHING constraints which
is quite bad.  The following uses get_constraint_for_address_of
instead, avoiding the temporary tree and the unhandled conversion.

This avoids a gcc.dg/tree-ssa/restrict-9.c FAIL with the fix
for PR115236.

* tree-ssa-structalias.cc (find_func_aliases): Use
get_constraint_for_address_of to build escape constraints
for asm inputs and outputs.

Diff:
---
 gcc/tree-ssa-structalias.cc | 12 ++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/gcc/tree-ssa-structalias.cc b/gcc/tree-ssa-structalias.cc
index f93c5df0767..9cec2c6cfd9 100644
--- a/gcc/tree-ssa-structalias.cc
+++ b/gcc/tree-ssa-structalias.cc
@@ -5269,7 +5269,11 @@ find_func_aliases (struct function *fn, gimple *origt)
 
  /* A memory constraint makes the address of the operand escape.  */
  if (!allows_reg && allows_mem)
-   make_escape_constraint (build_fold_addr_expr (op));
+   {
+ auto_vec tmpc;
+ get_constraint_for_address_of (op, &tmpc);
+ make_constraints_to (escaped_id, tmpc);
+   }
 
  /* The asm may read global memory, so outputs may point to
 any global memory.  */
@@ -5298,7 +5302,11 @@ find_func_aliases (struct function *fn, gimple *origt)
 
  /* A memory constraint makes the address of the operand escape.  */
  if (!allows_reg && allows_mem)
-   make_escape_constraint (build_fold_addr_expr (op));
+   {
+ auto_vec tmpc;
+ get_constraint_for_address_of (op, &tmpc);
+ make_constraints_to (escaped_id, tmpc);
+   }
  /* Strictly we'd only need the constraint to ESCAPED if
 the asm clobbers memory, otherwise using something
 along the lines of per-call clobbers/uses would be enough.  */


[gcc r15-861] tree-optimization/115236 - more points-to *ANYTHING = x fixes

2024-05-28 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:c08b0d3f7b3539b26031de31d88dea6b94474577

commit r15-861-gc08b0d3f7b3539b26031de31d88dea6b94474577
Author: Richard Biener 
Date:   Mon May 27 10:41:02 2024 +0200

tree-optimization/115236 - more points-to *ANYTHING = x fixes

The stored-to ANYTHING handling has more holes, uncovered by treating
volatile accesses as ANYTHING.  We fail to properly build the
pred and succ graphs, in particular we may not elide direct nodes
from receiving from STOREDANYTHING.

PR tree-optimization/115236
* tree-ssa-structalias.cc (build_pred_graph): Properly
handle *ANYTHING = X.
(build_succ_graph): Likewise.  Do not elide direct nodes
from receiving from STOREDANYTHING.

* gcc.dg/pr115236.c: New testcase.

Diff:
---
 gcc/testsuite/gcc.dg/pr115236.c | 12 
 gcc/tree-ssa-structalias.cc | 20 ++--
 2 files changed, 26 insertions(+), 6 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/pr115236.c b/gcc/testsuite/gcc.dg/pr115236.c
new file mode 100644
index 000..91edfab957a
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr115236.c
@@ -0,0 +1,12 @@
+/* { dg-do run } */
+/* { dg-options "-O -fno-tree-fre" } */
+
+int a, *b = &a;
+int main()
+{
+  int *c, *volatile *d = &c;
+  *d = b;
+  if (c != &a)
+__builtin_abort();
+  return 0;
+}
diff --git a/gcc/tree-ssa-structalias.cc b/gcc/tree-ssa-structalias.cc
index 9cec2c6cfd9..330e64e65da 100644
--- a/gcc/tree-ssa-structalias.cc
+++ b/gcc/tree-ssa-structalias.cc
@@ -1312,7 +1312,12 @@ build_pred_graph (void)
{
  /* *x = y.  */
  if (rhs.offset == 0 && lhs.offset == 0 && rhs.type == SCALAR)
-   add_pred_graph_edge (graph, FIRST_REF_NODE + lhsvar, rhsvar);
+   {
+ if (lhs.var == anything_id)
+   add_pred_graph_edge (graph, storedanything_id, rhsvar);
+ else
+   add_pred_graph_edge (graph, FIRST_REF_NODE + lhsvar, rhsvar);
+   }
}
   else if (rhs.type == DEREF)
{
@@ -1398,7 +1403,12 @@ build_succ_graph (void)
   if (lhs.type == DEREF)
{
  if (rhs.offset == 0 && lhs.offset == 0 && rhs.type == SCALAR)
-   add_graph_edge (graph, FIRST_REF_NODE + lhsvar, rhsvar);
+   {
+ if (lhs.var == anything_id)
+   add_graph_edge (graph, storedanything_id, rhsvar);
+ else
+   add_graph_edge (graph, FIRST_REF_NODE + lhsvar, rhsvar);
+   }
}
   else if (rhs.type == DEREF)
{
@@ -1418,13 +1428,11 @@ build_succ_graph (void)
}
 }
 
-  /* Add edges from STOREDANYTHING to all non-direct nodes that can
- receive pointers.  */
+  /* Add edges from STOREDANYTHING to all nodes that can receive pointers.  */
   t = find (storedanything_id);
   for (i = integer_id + 1; i < FIRST_REF_NODE; ++i)
 {
-  if (!bitmap_bit_p (graph->direct_nodes, i)
- && get_varinfo (i)->may_have_pointers)
+  if (get_varinfo (i)->may_have_pointers)
add_graph_edge (graph, find (i), t);
 }


[gcc r14-10251] Fortran: Fix SHAPE for zero-size arrays

2024-05-28 Thread Tobias Burnus via Gcc-cvs
https://gcc.gnu.org/g:dbeb3d127da07963ecaa26680da62a255199e9c2

commit r14-10251-gdbeb3d127da07963ecaa26680da62a255199e9c2
Author: Tobias Burnus 
Date:   Mon May 20 08:34:48 2024 +0200

Fortran: Fix SHAPE for zero-size arrays

PR fortran/115150

gcc/fortran/ChangeLog:

* trans-intrinsic.cc (gfc_conv_intrinsic_bound): Fix SHAPE
for zero-size arrays

gcc/testsuite/ChangeLog:

* gfortran.dg/shape_12.f90: New test.

(cherry picked from commit b701306a9b38bd74cdc26c7ece5add22f2203b56)

Diff:
---
 gcc/fortran/trans-intrinsic.cc |  4 ++-
 gcc/testsuite/gfortran.dg/shape_12.f90 | 51 ++
 2 files changed, 54 insertions(+), 1 deletion(-)

diff --git a/gcc/fortran/trans-intrinsic.cc b/gcc/fortran/trans-intrinsic.cc
index 4e26af21b46..7cb7c2e6949 100644
--- a/gcc/fortran/trans-intrinsic.cc
+++ b/gcc/fortran/trans-intrinsic.cc
@@ -3090,7 +3090,9 @@ gfc_conv_intrinsic_bound (gfc_se * se, gfc_expr * expr, 
enum gfc_isym_id op)
  lbound, gfc_index_one_node);
}
   else if (op == GFC_ISYM_SHAPE)
-   se->expr = size;
+   se->expr = fold_build2_loc (input_location, MAX_EXPR,
+   gfc_array_index_type, size,
+   gfc_index_zero_node);
   else
gcc_unreachable ();
 
diff --git a/gcc/testsuite/gfortran.dg/shape_12.f90 
b/gcc/testsuite/gfortran.dg/shape_12.f90
new file mode 100644
index 000..e672e1ff9f9
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/shape_12.f90
@@ -0,0 +1,51 @@
+! { dg-do run }
+!
+! PR fortran/115150
+!
+! Check that SHAPE handles zero-sized arrays correctly
+!
+implicit none
+call one
+call two
+
+contains
+
+subroutine one
+  real,allocatable :: A(:),B(:,:)
+  allocate(a(3:0), b(5:1, 2:5))
+
+  if (any (shape(a) /= [0])) stop 1
+  if (any (shape(b) /= [0, 4])) stop 2
+  if (size(a) /= 0) stop 3
+  if (size(b) /= 0) stop 4
+  if (any (lbound(a) /= [1])) stop 5
+  if (any (lbound(b) /= [1, 2])) stop 6
+  if (any (ubound(a) /= [0])) stop 5
+  if (any (ubound(b) /= [0,5])) stop 6
+end
+
+subroutine two
+integer :: x1(10), x2(10,10)
+call f(x1, x2, -3)
+end
+
+subroutine f(y1, y2, n)
+  integer, value :: n
+  integer :: y1(1:n)
+  integer :: y2(1:n,4,2:*)
+  call g(y1, y2)
+end
+
+subroutine g(z1, z2)
+  integer :: z1(..), z2(..)
+
+  if (any (shape(z1) /= [0])) stop 1
+  if (any (shape(z2) /= [0, 4, -1])) stop 2
+  if (size(z1) /= 0) stop 3
+  if (size(z2) /= 0) stop 4
+  if (any (lbound(z1) /= [1])) stop 5
+  if (any (lbound(z2) /= [1, 1, 1])) stop 6
+  if (any (ubound(z1) /= [0])) stop 5
+  if (any (ubound(z2) /= [0, 4, -1])) stop 6
+end
+end


[gcc r12-10475] ipa: Compare jump functions in ICF (PR 113907)

2024-05-28 Thread Martin Jambor via Gcc-cvs
https://gcc.gnu.org/g:72f6b7ec3915f0b5b3517dffa19e3b34c8af687d

commit r12-10475-g72f6b7ec3915f0b5b3517dffa19e3b34c8af687d
Author: Martin Jambor 
Date:   Tue May 28 13:33:02 2024 +0200

ipa: Compare jump functions in ICF (PR 113907)

This is a manual backport of r14-9840-g1162861439fd3c from master.
Manual because the bits and value range representation in jump
functions have changes during the gcc 14 development cycle.

In PR 113907 comment #58, Honza found a case where ICF thinks bodies
of functions are equivalent but becaise of difference in aliases in a
memory access, different aggregate jump functions are associated with
supposedly equivalent call statements.  This patch adds a way to
compare jump functions and plugs it into ICF to avoid the issue.

gcc/ChangeLog:

2024-05-14  Martin Jambor  

PR ipa/113907
* ipa-prop.h (ipa_jump_functions_equivalent_p): Declare.
(values_equal_for_ipcp_p): Likewise.
* ipa-prop.cc (ipa_agg_pass_through_jf_equivalent_p): New function.
(ipa_agg_jump_functions_equivalent_p): Likewise.
(ipa_jump_functions_equivalent_p): Likewise.
* ipa-cp.cc (values_equal_for_ipcp_p): Make function public.
* ipa-icf-gimple.cc: Include alloc-pool.h, symbol-summary.h, 
sreal.h,
ipa-cp.h and ipa-prop.h.
(func_checker::compare_gimple_call): Comapre jump functions.

gcc/testsuite/ChangeLog:

2024-05-10  Martin Jambor  

PR ipa/113907
* gcc.dg/lto/pr113907_0.c: New.
* gcc.dg/lto/pr113907_1.c: Likewise.
* gcc.dg/lto/pr113907_2.c: Likewise.

(cherry picked from commit 1db45e83021a8a87f41e22053910fcce6e8e2c2c)

Diff:
---
 gcc/ipa-cp.cc |   2 +-
 gcc/ipa-icf-gimple.cc |  29 +++
 gcc/ipa-prop.cc   | 157 ++
 gcc/ipa-prop.h|   3 +
 gcc/testsuite/gcc.dg/lto/pr113907_0.c |  18 
 gcc/testsuite/gcc.dg/lto/pr113907_1.c |  35 
 gcc/testsuite/gcc.dg/lto/pr113907_2.c |  11 +++
 7 files changed, 254 insertions(+), 1 deletion(-)

diff --git a/gcc/ipa-cp.cc b/gcc/ipa-cp.cc
index fbb31f6dff2..909464f4ac4 100644
--- a/gcc/ipa-cp.cc
+++ b/gcc/ipa-cp.cc
@@ -1402,7 +1402,7 @@ ipacp_value_safe_for_type (tree param_type, tree value)
 
 /* Return true iff X and Y should be considered equal values by IPA-CP.  */
 
-static bool
+bool
 values_equal_for_ipcp_p (tree x, tree y)
 {
   gcc_checking_assert (x != NULL_TREE && y != NULL_TREE);
diff --git a/gcc/ipa-icf-gimple.cc b/gcc/ipa-icf-gimple.cc
index ab398ca051c..e81409c16f9 100644
--- a/gcc/ipa-icf-gimple.cc
+++ b/gcc/ipa-icf-gimple.cc
@@ -41,7 +41,11 @@ along with GCC; see the file COPYING3.  If not see
 #include "gimple-walk.h"
 
 #include "tree-ssa-alias-compare.h"
+#include "alloc-pool.h"
+#include "symbol-summary.h"
 #include "ipa-icf-gimple.h"
+#include "sreal.h"
+#include "ipa-prop.h"
 
 namespace ipa_icf_gimple {
 
@@ -714,6 +718,31 @@ func_checker::compare_gimple_call (gcall *s1, gcall *s2)
   && !compatible_types_p (TREE_TYPE (t1), TREE_TYPE (t2)))
 return return_false_with_msg ("GIMPLE internal call LHS type mismatch");
 
+  if (!gimple_call_internal_p (s1))
+{
+  cgraph_edge *e1 = cgraph_node::get (m_source_func_decl)->get_edge (s1);
+  cgraph_edge *e2 = cgraph_node::get (m_target_func_decl)->get_edge (s2);
+  class ipa_edge_args *args1 = ipa_edge_args_sum->get (e1);
+  class ipa_edge_args *args2 = ipa_edge_args_sum->get (e2);
+  if ((args1 != nullptr) != (args2 != nullptr))
+   return return_false_with_msg ("ipa_edge_args mismatch");
+  if (args1)
+   {
+ int n1 = ipa_get_cs_argument_count (args1);
+ int n2 = ipa_get_cs_argument_count (args2);
+ if (n1 != n2)
+   return return_false_with_msg ("ipa_edge_args nargs mismatch");
+ for (int i = 0; i < n1; i++)
+   {
+ struct ipa_jump_func *jf1 = ipa_get_ith_jump_func (args1, i);
+ struct ipa_jump_func *jf2 = ipa_get_ith_jump_func (args2, i);
+ if (((jf1 != nullptr) != (jf2 != nullptr))
+ || (jf1 && !ipa_jump_functions_equivalent_p (jf1, jf2)))
+   return return_false_with_msg ("jump function mismatch");
+   }
+   }
+}
+
   return compare_operand (t1, t2, get_operand_access_type (&map, t1));
 }
 
diff --git a/gcc/ipa-prop.cc b/gcc/ipa-prop.cc
index 0197ac6108d..e2e83b5f3f5 100644
--- a/gcc/ipa-prop.cc
+++ b/gcc/ipa-prop.cc
@@ -6096,6 +6096,163 @@ ipcp_transform_function (struct cgraph_node *node)
   return modified_mem_access ? TODO_update_ssa_only_virtuals : 0;
 }
 
+/* Return true if the two pass_through components of two jump functions are
+   known to be equivalent.  AGG_JF denotes whether they are part of aggregate
+   functions or not.  The function can be 

[gcc r15-862] target/115254 - fix gcc.dg/vect/vect-gather-4.c dump scanning

2024-05-28 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:d8d70b783765361a8acef70fc9b54db526cd6ff5

commit r15-862-gd8d70b783765361a8acef70fc9b54db526cd6ff5
Author: Richard Biener 
Date:   Tue May 28 15:55:59 2024 +0200

target/115254 - fix gcc.dg/vect/vect-gather-4.c dump scanning

The dump scanning is supposed to check that we do not merge two
sligtly different gathers into one SLP node but since we now
SLP the store scanning for "ectorizing stmts using SLP" is no
longer good.  Instead the following makes us look for
"stmt 1 .* = .MASK" which would be how the second lane of an SLP
node looks like.  We have to handle both .MASK_GATHER_LOAD (for
targets with ifun mask gathers) and .MASK_LOAD (for ones without).

Tested on x86_64-linux with and without native gather and on GCN
where this now avoids a FAIL.

PR target/115254
* gcc.dg/vect/vect-gather-4.c: Adjust dump scan.

Diff:
---
 gcc/testsuite/gcc.dg/vect/vect-gather-4.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.dg/vect/vect-gather-4.c 
b/gcc/testsuite/gcc.dg/vect/vect-gather-4.c
index d18094d6982..edd9a6783c2 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-gather-4.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-gather-4.c
@@ -45,4 +45,7 @@ f3 (int *restrict y, int *restrict x, int *restrict indices)
 }
 }
 
-/* { dg-final { scan-tree-dump-not "vectorizing stmts using SLP" vect } } */
+/* We do not want to see a two-lane .MASK_LOAD or .MASK_GATHER_LOAD since
+   the gathers are different on each lane.  This is a bit fragile and
+   should possibly be turned into a runtime test.  */
+/* { dg-final { scan-tree-dump-not "stmt 1 \[^\r\n\]* = .MASK" vect } } */


[gcc r15-863] vect: Use vect representative statement instead of original in patch recog [PR115060]

2024-05-28 Thread Feng Xue via Gcc-cvs
https://gcc.gnu.org/g:a3aeff4ce95bd616a2108dc2363d9cbaba53b170

commit r15-863-ga3aeff4ce95bd616a2108dc2363d9cbaba53b170
Author: Feng Xue 
Date:   Thu May 23 15:25:53 2024 +0800

vect: Use vect representative statement instead of original in patch recog 
[PR115060]

Some utility functions (such as vect_look_through_possible_promotion) that 
are
to find out certain kind of direct or indirect definition SSA for a value, 
may
return the original one of the SSA, not its pattern representative SSA, even
pattern is involved. For example,

   a = (T1) patt_b;
   patt_b = (T2) c;// b = ...
   patt_c = not-a-cast;// c = ...

Given 'a', the mentioned function will return 'c', instead of 'patt_c'. This
subtlety would make some pattern recog code that is unaware of it mis-use 
the
original instead of the new pattern statement, which is inconsistent wth
processing logic of the pattern formation pass. This patch corrects the 
issue
by forcing another utility function (vect_get_internal_def) return the 
pattern
statement information to caller by default.

2024-05-23 Feng Xue 

gcc/
PR tree-optimization/115060
* tree-vect-patterns.cc (vect_get_internal_def): Return statement 
for
vectorization.
(vect_widened_op_tree): Call vect_get_internal_def instead of 
look_def
to get statement information.
(vect_recog_widen_abd_pattern): No need to call 
vect_stmt_to_vectorize.

Diff:
---
 gcc/tree-vect-patterns.cc | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
index a313dc64643..8929e5aa7f3 100644
--- a/gcc/tree-vect-patterns.cc
+++ b/gcc/tree-vect-patterns.cc
@@ -266,7 +266,7 @@ vect_get_internal_def (vec_info *vinfo, tree op)
   stmt_vec_info def_stmt_info = vinfo->lookup_def (op);
   if (def_stmt_info
   && STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_internal_def)
-return def_stmt_info;
+return vect_stmt_to_vectorize (def_stmt_info);
   return NULL;
 }
 
@@ -655,7 +655,8 @@ vect_widened_op_tree (vec_info *vinfo, stmt_vec_info 
stmt_info, tree_code code,
 
  /* Recursively process the definition of the operand.  */
  stmt_vec_info def_stmt_info
-   = vinfo->lookup_def (this_unprom->op);
+   = vect_get_internal_def (vinfo, this_unprom->op);
+
  nops = vect_widened_op_tree (vinfo, def_stmt_info, code,
   widened_code, shift_p, max_nops,
   this_unprom, common_type,
@@ -1739,7 +1740,6 @@ vect_recog_widen_abd_pattern (vec_info *vinfo, 
stmt_vec_info stmt_vinfo,
   if (!abd_pattern_vinfo)
 return NULL;
 
-  abd_pattern_vinfo = vect_stmt_to_vectorize (abd_pattern_vinfo);
   gcall *abd_stmt = dyn_cast  (STMT_VINFO_STMT (abd_pattern_vinfo));
   if (!abd_stmt
   || !gimple_call_internal_p (abd_stmt)


[gcc r13-8805] Fortran: Fix SHAPE for zero-size arrays

2024-05-28 Thread Tobias Burnus via Gcc-cvs
https://gcc.gnu.org/g:3185cfe495944e6e5d000ccd820bed2e6f10cd6c

commit r13-8805-g3185cfe495944e6e5d000ccd820bed2e6f10cd6c
Author: Tobias Burnus 
Date:   Mon May 20 08:34:48 2024 +0200

Fortran: Fix SHAPE for zero-size arrays

PR fortran/115150

gcc/fortran/ChangeLog:

* trans-intrinsic.cc (gfc_conv_intrinsic_bound): Fix SHAPE
for zero-size arrays

gcc/testsuite/ChangeLog:

* gfortran.dg/shape_12.f90: New test.

(cherry picked from commit b701306a9b38bd74cdc26c7ece5add22f2203b56)

Diff:
---
 gcc/fortran/trans-intrinsic.cc |  4 ++-
 gcc/testsuite/gfortran.dg/shape_12.f90 | 51 ++
 2 files changed, 54 insertions(+), 1 deletion(-)

diff --git a/gcc/fortran/trans-intrinsic.cc b/gcc/fortran/trans-intrinsic.cc
index aa0dea50089..455b61aa564 100644
--- a/gcc/fortran/trans-intrinsic.cc
+++ b/gcc/fortran/trans-intrinsic.cc
@@ -3090,7 +3090,9 @@ gfc_conv_intrinsic_bound (gfc_se * se, gfc_expr * expr, 
enum gfc_isym_id op)
  lbound, gfc_index_one_node);
}
   else if (op == GFC_ISYM_SHAPE)
-   se->expr = size;
+   se->expr = fold_build2_loc (input_location, MAX_EXPR,
+   gfc_array_index_type, size,
+   gfc_index_zero_node);
   else
gcc_unreachable ();
 
diff --git a/gcc/testsuite/gfortran.dg/shape_12.f90 
b/gcc/testsuite/gfortran.dg/shape_12.f90
new file mode 100644
index 000..e672e1ff9f9
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/shape_12.f90
@@ -0,0 +1,51 @@
+! { dg-do run }
+!
+! PR fortran/115150
+!
+! Check that SHAPE handles zero-sized arrays correctly
+!
+implicit none
+call one
+call two
+
+contains
+
+subroutine one
+  real,allocatable :: A(:),B(:,:)
+  allocate(a(3:0), b(5:1, 2:5))
+
+  if (any (shape(a) /= [0])) stop 1
+  if (any (shape(b) /= [0, 4])) stop 2
+  if (size(a) /= 0) stop 3
+  if (size(b) /= 0) stop 4
+  if (any (lbound(a) /= [1])) stop 5
+  if (any (lbound(b) /= [1, 2])) stop 6
+  if (any (ubound(a) /= [0])) stop 5
+  if (any (ubound(b) /= [0,5])) stop 6
+end
+
+subroutine two
+integer :: x1(10), x2(10,10)
+call f(x1, x2, -3)
+end
+
+subroutine f(y1, y2, n)
+  integer, value :: n
+  integer :: y1(1:n)
+  integer :: y2(1:n,4,2:*)
+  call g(y1, y2)
+end
+
+subroutine g(z1, z2)
+  integer :: z1(..), z2(..)
+
+  if (any (shape(z1) /= [0])) stop 1
+  if (any (shape(z2) /= [0, 4, -1])) stop 2
+  if (size(z1) /= 0) stop 3
+  if (size(z2) /= 0) stop 4
+  if (any (lbound(z1) /= [1])) stop 5
+  if (any (lbound(z2) /= [1, 1, 1])) stop 6
+  if (any (ubound(z1) /= [0])) stop 5
+  if (any (ubound(z2) /= [0, 4, -1])) stop 6
+end
+end


[gcc r13-8806] libstdc++: Fix up 19_diagnostics/stacktrace/hash.cc on 13 branch

2024-05-28 Thread Jakub Jelinek via Gcc-cvs
https://gcc.gnu.org/g:fd91953c4dfba2a592ec15f2b4a2da28b1cf1947

commit r13-8806-gfd91953c4dfba2a592ec15f2b4a2da28b1cf1947
Author: Jakub Jelinek 
Date:   Tue May 28 16:30:48 2024 +0200

libstdc++: Fix up 19_diagnostics/stacktrace/hash.cc on 13 branch

The r13-8207-g17acf9fbeb10d7adad commit changed some tests to use
-lstdc++exp instead of -lstdc++_libbacktrace, but it didn't change
the 19_diagnostics/stacktrace/hash.cc test, presumably because
when it was added on the trunk, it already had -lstdc++exp and
it was changed to -lstdc++_libbacktrace only in the
r13-8067-g16635b89f36c07b9e0 cherry-pick.

The test fails with
/usr/bin/ld: cannot find -lstdc++_libbacktrace
collect2: error: ld returned 1 exit status
compiler exited with status 1
FAIL: 19_diagnostics/stacktrace/hash.cc (test for excess errors)
without this (while the library is still built, it isn't added in
-L options).

2024-05-27  Jakub Jelinek  

* testsuite/19_diagnostics/stacktrace/hash.cc: Adjust
dg-options to use -lstdc++exp.

Diff:
---
 libstdc++-v3/testsuite/19_diagnostics/stacktrace/hash.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libstdc++-v3/testsuite/19_diagnostics/stacktrace/hash.cc 
b/libstdc++-v3/testsuite/19_diagnostics/stacktrace/hash.cc
index a2f61e49981..37e6d6dd7ec 100644
--- a/libstdc++-v3/testsuite/19_diagnostics/stacktrace/hash.cc
+++ b/libstdc++-v3/testsuite/19_diagnostics/stacktrace/hash.cc
@@ -1,4 +1,4 @@
-// { dg-options "-std=gnu++23 -lstdc++_libbacktrace" }
+// { dg-options "-std=gnu++23 -lstdc++exp" }
 // { dg-do run { target c++23 } }
 // { dg-require-effective-target stacktrace }


[gcc r15-864] [to-be-committed] [RISC-V] Some basic patterns for zbkb code generation

2024-05-28 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:236116068151bbc72aaaf53d0f223fe06f7e3bac

commit r15-864-g236116068151bbc72aaaf53d0f223fe06f7e3bac
Author: Lyut Nersisyan 
Date:   Tue May 28 09:17:50 2024 -0600

[to-be-committed] [RISC-V] Some basic patterns for zbkb code generation

And here's Lyut's basic Zbkb support.  Essentially it's four new patterns 
for
packh, packw, pack plus a bridge pattern needed for packh.

packw is a bit ugly as we need to match a sign extension in an inconvenient
location.  We pull it out so that the extension is exposed in a convenient
place for subsequent sign extension elimination.

We need a bridge pattern to get packh.  Thankfully the bridge pattern is a
degenerate packh where one operand is x0, so it works as-is without 
splitting
and provides the bridge to the more general form of packh.

This patch also refines the condition for the constant reassociation patch 
to
avoid a few more cases than can be handled efficiently with other 
preexisting
patterns and one bugfix to avoid losing bits, particularly in the xor/ior 
case.

Lyut did the core work here.  I think I did some minor cleanups and the 
bridge
pattern to work with gcc-15 and beyond.

This is a prerequisite for using zbkb in constant synthesis.  It also 
stands on
its own.  I know we've seen it trigger in spec without the constant 
synthesis
bits.

It's been through our internal CI and my tester.  I'll obviously wait for 
the
upstream CI to finish before taking further action.

gcc/
* config/riscv/crypto.md: Add new combiner patterns to generate
pack, packh, packw instrutions.
* config/riscv/iterators.md (HX): New iterator for half X mode.
* config/riscv/riscv.md (_shift_reverse): Tighten
cases to avoid.  Do not lose bits for XOR/IOR.

gcc/testsuite

* gcc.target/riscv/pack32.c: New test.
* gcc.target/riscv/pack64.c: New test.
* gcc.target/riscv/packh32.c: New test.
* gcc.target/riscv/packh64.c: New test.
* gcc.target/riscv/packw.c: New test.

Co-authored-by: Jeffrey A Law 

Diff:
---
 gcc/config/riscv/crypto.md   | 63 
 gcc/config/riscv/iterators.md|  3 ++
 gcc/config/riscv/riscv.md|  9 +++--
 gcc/testsuite/gcc.target/riscv/pack32.c  | 18 +
 gcc/testsuite/gcc.target/riscv/pack64.c  | 17 +
 gcc/testsuite/gcc.target/riscv/packh32.c | 13 +++
 gcc/testsuite/gcc.target/riscv/packh64.c |  6 +++
 gcc/testsuite/gcc.target/riscv/packw.c   | 13 +++
 8 files changed, 139 insertions(+), 3 deletions(-)

diff --git a/gcc/config/riscv/crypto.md b/gcc/config/riscv/crypto.md
index dd2bc94ee88..b632312ade2 100644
--- a/gcc/config/riscv/crypto.md
+++ b/gcc/config/riscv/crypto.md
@@ -104,6 +104,19 @@
   "pack\t%0,%1,%2"
   [(set_attr "type" "crypto")])
 
+;; This is slightly more complex than the other pack patterns
+;; that fully expose the RTL as it needs to self-adjust to
+;; rv32 and rv64.  But it's not that hard.
+(define_insn "*riscv_xpack__2"
+  [(set (match_operand:X 0 "register_operand" "=r")
+   (ior:X (ashift:X (match_operand:X 1 "register_operand" "r")
+(match_operand 2 "immediate_operand" "n"))
+  (zero_extend:X
+(match_operand:HX 3 "register_operand" "r"]
+  "TARGET_ZBKB && INTVAL (operands[2]) == BITS_PER_WORD / 2"
+  "pack\t%0,%3,%1"
+  [(set_attr "type" "crypto")])
+
 (define_insn "riscv_packh_"
   [(set (match_operand:X 0 "register_operand" "=r")
 (unspec:X [(match_operand:QI 1 "register_operand" "r")
@@ -113,6 +126,29 @@
   "packh\t%0,%1,%2"
   [(set_attr "type" "crypto")])
 
+;; So this is both a useful pattern unto itself and a bridge to the
+;; general packh pattern below.
+(define_insn "*riscv_packh__2"
+  [(set (match_operand:X 0 "register_operand" "=r")
+   (and:X (ashift:X (match_operand:X 1 "register_operand" "r")
+(const_int 8))
+  (const_int 65280)))]
+ "TARGET_ZBKB"
+ "packh\t%0,x0,%1"
+ [(set_attr "type" "crypto")])
+
+;; While the two operands of the IOR could be swapped, this appears
+;; to be the canonical form.  The other form doesn't seem to trigger.
+(define_insn "*riscv_packh__3"
+  [(set (match_operand:X 0 "register_operand" "=r")
+   (ior:X (and:X (ashift:X (match_operand:X 1 "register_operand" "r")
+   (const_int 8))
+ (const_int 65280))
+  (zero_extend:X (match_operand:QI 2 "register_operand" "r"]
+ "TARGET_ZBKB"
+ "packh\t%0,%2,%1"
+ [(set_attr "type" "crypto")])
+
 (define_insn "riscv_packw"
   [(set (match_operand:DI 0 "register_operand" "=r")
 (unspec:DI [(match_operand:HI 1 "register_operand" "r")
@@ -122,6 +158,33 @@
   "packw\t%0,%1,%2"
   [(set_attr "type" "crypto")])
 
+;; Im

[gcc r12-10476] Fortran: Fix SHAPE for zero-size arrays

2024-05-28 Thread Tobias Burnus via Gcc-cvs
https://gcc.gnu.org/g:e0b2c4f90f908a9bca4038c7ae0d8ca6ee157d8f

commit r12-10476-ge0b2c4f90f908a9bca4038c7ae0d8ca6ee157d8f
Author: Tobias Burnus 
Date:   Mon May 20 08:34:48 2024 +0200

Fortran: Fix SHAPE for zero-size arrays

PR fortran/115150

gcc/fortran/ChangeLog:

* trans-intrinsic.cc (gfc_conv_intrinsic_bound): Fix SHAPE
for zero-size arrays

gcc/testsuite/ChangeLog:

* gfortran.dg/shape_12.f90: New test.

(cherry picked from commit b701306a9b38bd74cdc26c7ece5add22f2203b56)

Diff:
---
 gcc/fortran/trans-intrinsic.cc |  4 ++-
 gcc/testsuite/gfortran.dg/shape_12.f90 | 51 ++
 2 files changed, 54 insertions(+), 1 deletion(-)

diff --git a/gcc/fortran/trans-intrinsic.cc b/gcc/fortran/trans-intrinsic.cc
index c30cdfd37f9..9393ca10b06 100644
--- a/gcc/fortran/trans-intrinsic.cc
+++ b/gcc/fortran/trans-intrinsic.cc
@@ -3083,7 +3083,9 @@ gfc_conv_intrinsic_bound (gfc_se * se, gfc_expr * expr, 
enum gfc_isym_id op)
  lbound, gfc_index_one_node);
}
   else if (op == GFC_ISYM_SHAPE)
-   se->expr = size;
+   se->expr = fold_build2_loc (input_location, MAX_EXPR,
+   gfc_array_index_type, size,
+   gfc_index_zero_node);
   else
gcc_unreachable ();
 
diff --git a/gcc/testsuite/gfortran.dg/shape_12.f90 
b/gcc/testsuite/gfortran.dg/shape_12.f90
new file mode 100644
index 000..e672e1ff9f9
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/shape_12.f90
@@ -0,0 +1,51 @@
+! { dg-do run }
+!
+! PR fortran/115150
+!
+! Check that SHAPE handles zero-sized arrays correctly
+!
+implicit none
+call one
+call two
+
+contains
+
+subroutine one
+  real,allocatable :: A(:),B(:,:)
+  allocate(a(3:0), b(5:1, 2:5))
+
+  if (any (shape(a) /= [0])) stop 1
+  if (any (shape(b) /= [0, 4])) stop 2
+  if (size(a) /= 0) stop 3
+  if (size(b) /= 0) stop 4
+  if (any (lbound(a) /= [1])) stop 5
+  if (any (lbound(b) /= [1, 2])) stop 6
+  if (any (ubound(a) /= [0])) stop 5
+  if (any (ubound(b) /= [0,5])) stop 6
+end
+
+subroutine two
+integer :: x1(10), x2(10,10)
+call f(x1, x2, -3)
+end
+
+subroutine f(y1, y2, n)
+  integer, value :: n
+  integer :: y1(1:n)
+  integer :: y2(1:n,4,2:*)
+  call g(y1, y2)
+end
+
+subroutine g(z1, z2)
+  integer :: z1(..), z2(..)
+
+  if (any (shape(z1) /= [0])) stop 1
+  if (any (shape(z2) /= [0, 4, -1])) stop 2
+  if (size(z1) /= 0) stop 3
+  if (size(z2) /= 0) stop 4
+  if (any (lbound(z1) /= [1])) stop 5
+  if (any (lbound(z2) /= [1, 1, 1])) stop 6
+  if (any (ubound(z1) /= [0])) stop 5
+  if (any (ubound(z2) /= [0, 4, -1])) stop 6
+end
+end


[gcc r15-865] Fix bootstrap on AIX by adding c-family/c-type-mismatch.cc [PR115167]

2024-05-28 Thread David Malcolm via Gcc-cvs
https://gcc.gnu.org/g:b544ff88560e100e53ed8966d38f172c5bafce8d

commit r15-865-gb544ff88560e100e53ed8966d38f172c5bafce8d
Author: David Malcolm 
Date:   Tue May 28 13:04:25 2024 -0400

Fix bootstrap on AIX by adding c-family/c-type-mismatch.cc [PR115167]

PR bootstrap/115167 reports a bootstrap failure on AIX triggered by
r15-636-g770657d02c986c whilst building f951 in stage 2, due to
the linker not being able to find symbols for:

  vtable for range_label_for_type_mismatch
  range_label_for_type_mismatch::get_text(unsigned int) const

The only users of the class range_label_for_type_mismatch are in the
C/C++ frontends, each of which supply their own implementation of:

  range_label_for_type_mismatch::get_text(unsigned int) const

i.e. we had a cluster of symbols that was disconnnected from any
users on f951.

The above patch added a new range_label::get_effects vfunc to the
base class.  My hunch is that we were getting away with not defining
the symbol for Fortran with AIX's linker before (since none of the
users are used), but adding the get_effects vfunc has somehow broken
things (possibly because there's an empty implementation in the base
class in the *header*).

The following patch moves all of the code in
gcc/gcc-rich-location.[cc,h,o} defining and using
range_label_for_type_mismatch to a new
gcc/c-family/c-type-mismatch.{cc,h,o}, to help the linker ignore this
cluster of symbols when it's disconnected from users.

I was able to reproduce the failure without the patch, and then
successfully bootstrap with this patch on powerpc-ibm-aix7.3.1.0
(cfarm119).

gcc/ChangeLog:
PR bootstrap/115167
* Makefile.in (C_COMMON_OBJS): Add c-family/c-type-mismatch.o.
* gcc-rich-location.cc
(maybe_range_label_for_tree_type_mismatch::get_text): Move to
c-family/c-type-mismatch.cc.
(binary_op_rich_location::binary_op_rich_location): Likewise.
(binary_op_rich_location::use_operator_loc_p): Likewise.
* gcc-rich-location.h (class range_label_for_type_mismatch):
Likewise.
(class maybe_range_label_for_tree_type_mismatch): Likewise.
(class op_location_t): Likewise for forward decl.
(class binary_op_rich_location): Likewise.

gcc/c-family/ChangeLog:
PR bootstrap/115167
* c-format.cc: Replace include of "gcc-rich-location.h" with
"c-family/c-type-mismatch.h".
* c-type-mismatch.cc: New file, taking material from
gcc-rich-location.cc.
* c-type-mismatch.h: New file, taking material from
gcc-rich-location.h.
* c-warn.cc: Replace include of "gcc-rich-location.h" with
"c-family/c-type-mismatch.h".

gcc/c/ChangeLog:
PR bootstrap/115167
* c-objc-common.cc: Replace include of "gcc-rich-location.h" with
"c-family/c-type-mismatch.h".
* c-typeck.cc: Likewise.

gcc/cp/ChangeLog:
PR bootstrap/115167
PR bootstrap/115167
* call.cc: Replace include of "gcc-rich-location.h" with
"c-family/c-type-mismatch.h".
* error.cc: Likewise.
* typeck.cc: Likewise.

Signed-off-by: David Malcolm 

Diff:
---
 gcc/Makefile.in |   3 +-
 gcc/c-family/c-format.cc|   2 +-
 gcc/c-family/c-type-mismatch.cc | 127 
 gcc/c-family/c-type-mismatch.h  | 126 +++
 gcc/c-family/c-warn.cc  |   2 +-
 gcc/c/c-objc-common.cc  |   2 +-
 gcc/c/c-typeck.cc   |   2 +-
 gcc/cp/call.cc  |   2 +-
 gcc/cp/error.cc |   2 +-
 gcc/cp/typeck.cc|   2 +-
 gcc/gcc-rich-location.cc|  89 
 gcc/gcc-rich-location.h | 101 
 12 files changed, 262 insertions(+), 198 deletions(-)

diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index a7f15694c34..66d42cc41f8 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -1301,7 +1301,8 @@ C_COMMON_OBJS = c-family/c-common.o 
c-family/c-cppbuiltin.o c-family/c-dump.o \
   c-family/c-ppoutput.o c-family/c-pragma.o c-family/c-pretty-print.o \
   c-family/c-semantics.o c-family/c-ada-spec.o \
   c-family/c-ubsan.o c-family/known-headers.o \
-  c-family/c-attribs.o c-family/c-warn.o c-family/c-spellcheck.o
+  c-family/c-attribs.o c-family/c-warn.o c-family/c-spellcheck.o \
+  c-family/c-type-mismatch.o
 
 # Analyzer object files
 ANALYZER_OBJS = \
diff --git a/gcc/c-family/c-format.cc b/gcc/c-family/c-format.cc
index 9c4deabc109..7a5ffc25602 100644
--- a/gcc/c-family/c-format.cc
+++ b/gcc/c-family/c-format.cc
@@ -35,7 +35,7 @@ along with GCC; see the file COPYING3.  If not see
 #includ

[gcc r15-866] diagnostics: disable localization of events in selftest paths [PR115203]

2024-05-28 Thread David Malcolm via Gcc-cvs
https://gcc.gnu.org/g:2dbb1c124c1e585dc413132d7a8d4be62c6b7baa

commit r15-866-g2dbb1c124c1e585dc413132d7a8d4be62c6b7baa
Author: David Malcolm 
Date:   Tue May 28 13:04:26 2024 -0400

diagnostics: disable localization of events in selftest paths [PR115203]

gcc/ChangeLog:
PR analyzer/115203
* diagnostic-path.h
(simple_diagnostic_path::disable_event_localization): New.
(simple_diagnostic_path::m_localize_events): New field.
* diagnostic.cc
(simple_diagnostic_path::simple_diagnostic_path): Initialize
m_localize_events.
(simple_diagnostic_path::add_event): Only localize fmt if
m_localize_events is true.
* tree-diagnostic-path.cc
(test_diagnostic_path::test_diagnostic_path): Call
disable_event_localization.

Signed-off-by: David Malcolm 

Diff:
---
 gcc/diagnostic-path.h   | 3 +++
 gcc/diagnostic.cc   | 8 +---
 gcc/tree-diagnostic-path.cc | 3 ++-
 3 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/gcc/diagnostic-path.h b/gcc/diagnostic-path.h
index 982d68b872e..938bd583a3d 100644
--- a/gcc/diagnostic-path.h
+++ b/gcc/diagnostic-path.h
@@ -293,12 +293,15 @@ class simple_diagnostic_path : public diagnostic_path
 
   void connect_to_next_event ();
 
+  void disable_event_localization () { m_localize_events = false; }
+
  private:
   auto_delete_vec m_threads;
   auto_delete_vec m_events;
 
   /* (for use by add_event).  */
   pretty_printer *m_event_pp;
+  bool m_localize_events;
 };
 
 extern void debug (diagnostic_path *path);
diff --git a/gcc/diagnostic.cc b/gcc/diagnostic.cc
index 1f30d1d7cda..f27b2f1a492 100644
--- a/gcc/diagnostic.cc
+++ b/gcc/diagnostic.cc
@@ -2517,7 +2517,8 @@ set_text_art_charset (enum diagnostic_text_art_charset 
charset)
 /* class simple_diagnostic_path : public diagnostic_path.  */
 
 simple_diagnostic_path::simple_diagnostic_path (pretty_printer *event_pp)
-  : m_event_pp (event_pp)
+: m_event_pp (event_pp),
+  m_localize_events (true)
 {
   add_thread ("main");
 }
@@ -2563,7 +2564,7 @@ simple_diagnostic_path::add_thread (const char *name)
stack depth DEPTH.
 
Use m_context's printer to format FMT, as the text of the new
-   event.
+   event.  Localize FMT iff m_localize_events is set.
 
Return the id of the new event.  */
 
@@ -2580,7 +2581,8 @@ simple_diagnostic_path::add_event (location_t loc, tree 
fndecl, int depth,
 
   va_start (ap, fmt);
 
-  text_info ti (_(fmt), &ap, 0, nullptr, &rich_loc);
+  text_info ti (m_localize_events ? _(fmt) : fmt,
+   &ap, 0, nullptr, &rich_loc);
   pp_format (pp, &ti);
   pp_output_formatted_text (pp);
 
diff --git a/gcc/tree-diagnostic-path.cc b/gcc/tree-diagnostic-path.cc
index 743a8c2a1d2..0ad6c5beb81 100644
--- a/gcc/tree-diagnostic-path.cc
+++ b/gcc/tree-diagnostic-path.cc
@@ -1016,7 +1016,7 @@ path_events_have_column_data_p (const diagnostic_path 
&path)
 }
 
 /* A subclass of simple_diagnostic_path that adds member functions
-   for adding test events.  */
+   for adding test events and suppresses translation of these events.  */
 
 class test_diagnostic_path : public simple_diagnostic_path
 {
@@ -1024,6 +1024,7 @@ class test_diagnostic_path : public simple_diagnostic_path
   test_diagnostic_path (pretty_printer *event_pp)
   : simple_diagnostic_path (event_pp)
   {
+disable_event_localization ();
   }
 
   void add_entry (tree fndecl, int stack_depth)


[gcc r12-10477] ubsan: Use right address space for MEM_REF created for bool/enum sanitization [PR115172]

2024-05-28 Thread Uros Bizjak via Gcc-cvs
https://gcc.gnu.org/g:da9b7a507ef38287cc16bc88e808293019f9f531

commit r12-10477-gda9b7a507ef38287cc16bc88e808293019f9f531
Author: Jakub Jelinek 
Date:   Wed May 22 09:12:28 2024 +0200

ubsan: Use right address space for MEM_REF created for bool/enum 
sanitization [PR115172]

The following testcase is miscompiled, because -fsanitize=bool,enum
creates a MEM_REF without propagating there address space qualifiers,
so what should be normally loaded using say %gs:/%fs: segment prefix
isn't.  Together with asan it then causes that load to be sanitized.

2024-05-22  Jakub Jelinek  

PR sanitizer/115172
* ubsan.cc (instrument_bool_enum_load): If rhs is not in generic
address space, use qualified version of utype with the right
address space.  Formatting fix.

* gcc.dg/asan/pr115172.c: New test.

(cherry picked from commit d3c506eff54fcbac389a529c2e98da108a410b7f)

Diff:
---
 gcc/testsuite/gcc.dg/asan/pr115172.c | 20 
 gcc/ubsan.cc |  6 +-
 2 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.dg/asan/pr115172.c 
b/gcc/testsuite/gcc.dg/asan/pr115172.c
new file mode 100644
index 000..8707e615733
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/asan/pr115172.c
@@ -0,0 +1,20 @@
+/* PR sanitizer/115172 */
+/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
+/* { dg-options "-O2 -fsanitize=address,bool -ffat-lto-objects 
-fdump-tree-asan1" } */
+/* { dg-final { scan-tree-dump-not "\.ASAN_CHECK " "asan1" } } */
+
+#ifdef __x86_64__
+#define SEG __seg_gs
+#else
+#define SEG __seg_fs
+#endif
+
+extern struct S { _Bool b; } s;
+void bar (void);
+
+void
+foo (void)
+{
+  if (*(volatile _Bool SEG *) (__UINTPTR_TYPE__) &s.b)
+bar ();
+}
diff --git a/gcc/ubsan.cc b/gcc/ubsan.cc
index 4d8e7cd86c5..70a5ef66bd9 100644
--- a/gcc/ubsan.cc
+++ b/gcc/ubsan.cc
@@ -1703,13 +1703,17 @@ instrument_bool_enum_load (gimple_stmt_iterator *gsi)
   || TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
 return;
 
+  addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (rhs));
+  if (as != TYPE_ADDR_SPACE (utype))
+utype = build_qualified_type (utype, TYPE_QUALS (utype)
+| ENCODE_QUAL_ADDR_SPACE (as));
   bool ends_bb = stmt_ends_bb_p (stmt);
   location_t loc = gimple_location (stmt);
   tree lhs = gimple_assign_lhs (stmt);
   tree ptype = build_pointer_type (TREE_TYPE (rhs));
   tree atype = reference_alias_ptr_type (rhs);
   gimple *g = gimple_build_assign (make_ssa_name (ptype),
- build_fold_addr_expr (rhs));
+  build_fold_addr_expr (rhs));
   gimple_set_location (g, loc);
   gsi_insert_before (gsi, g, GSI_SAME_STMT);
   tree mem = build2 (MEM_REF, utype, gimple_assign_lhs (g),


[gcc r11-11454] ubsan: Use right address space for MEM_REF created for bool/enum sanitization [PR115172]

2024-05-28 Thread Uros Bizjak via Gcc-cvs
https://gcc.gnu.org/g:d8985ea10c911c994e00dbd6a08dcae907ebc1f7

commit r11-11454-gd8985ea10c911c994e00dbd6a08dcae907ebc1f7
Author: Jakub Jelinek 
Date:   Wed May 22 09:12:28 2024 +0200

ubsan: Use right address space for MEM_REF created for bool/enum 
sanitization [PR115172]

The following testcase is miscompiled, because -fsanitize=bool,enum
creates a MEM_REF without propagating there address space qualifiers,
so what should be normally loaded using say %gs:/%fs: segment prefix
isn't.  Together with asan it then causes that load to be sanitized.

2024-05-22  Jakub Jelinek  

PR sanitizer/115172
* ubsan.c (instrument_bool_enum_load): If rhs is not in generic
address space, use qualified version of utype with the right
address space.  Formatting fix.

* gcc.dg/asan/pr115172.c: New test.

(cherry picked from commit d3c506eff54fcbac389a529c2e98da108a410b7f)

Diff:
---
 gcc/testsuite/gcc.dg/asan/pr115172.c | 20 
 gcc/ubsan.c  |  6 +-
 2 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.dg/asan/pr115172.c 
b/gcc/testsuite/gcc.dg/asan/pr115172.c
new file mode 100644
index 000..8707e615733
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/asan/pr115172.c
@@ -0,0 +1,20 @@
+/* PR sanitizer/115172 */
+/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
+/* { dg-options "-O2 -fsanitize=address,bool -ffat-lto-objects 
-fdump-tree-asan1" } */
+/* { dg-final { scan-tree-dump-not "\.ASAN_CHECK " "asan1" } } */
+
+#ifdef __x86_64__
+#define SEG __seg_gs
+#else
+#define SEG __seg_fs
+#endif
+
+extern struct S { _Bool b; } s;
+void bar (void);
+
+void
+foo (void)
+{
+  if (*(volatile _Bool SEG *) (__UINTPTR_TYPE__) &s.b)
+bar ();
+}
diff --git a/gcc/ubsan.c b/gcc/ubsan.c
index 2b12651b440..f77dee5fddd 100644
--- a/gcc/ubsan.c
+++ b/gcc/ubsan.c
@@ -1703,13 +1703,17 @@ instrument_bool_enum_load (gimple_stmt_iterator *gsi)
   || TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
 return;
 
+  addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (rhs));
+  if (as != TYPE_ADDR_SPACE (utype))
+utype = build_qualified_type (utype, TYPE_QUALS (utype)
+| ENCODE_QUAL_ADDR_SPACE (as));
   bool ends_bb = stmt_ends_bb_p (stmt);
   location_t loc = gimple_location (stmt);
   tree lhs = gimple_assign_lhs (stmt);
   tree ptype = build_pointer_type (TREE_TYPE (rhs));
   tree atype = reference_alias_ptr_type (rhs);
   gimple *g = gimple_build_assign (make_ssa_name (ptype),
- build_fold_addr_expr (rhs));
+  build_fold_addr_expr (rhs));
   gimple_set_location (g, loc);
   gsi_insert_before (gsi, g, GSI_SAME_STMT);
   tree mem = build2 (MEM_REF, utype, gimple_assign_lhs (g),


[gcc r15-867] testsuite/*/gomp: Remove 'dg-prune-output "not supported yet"'

2024-05-28 Thread Tobias Burnus via Gcc-cvs
https://gcc.gnu.org/g:c0d78289fcd9c04110907f8cad90d7e1e5c55a44

commit r15-867-gc0d78289fcd9c04110907f8cad90d7e1e5c55a44
Author: Tobias Burnus 
Date:   Tue May 28 19:52:44 2024 +0200

testsuite/*/gomp: Remove 'dg-prune-output "not supported yet"'

gcc/testsuite/ChangeLog:

* c-c++-common/gomp/lastprivate-conditional-1.c: Remove
'{ dg-prune-output "not supported yet" }'.
* c-c++-common/gomp/requires-1.c: Likewise.
* c-c++-common/gomp/requires-2.c: Likewise.
* c-c++-common/gomp/reverse-offload-1.c: Likewise.
* g++.dg/gomp/requires-1.C: Likewise.
* gfortran.dg/gomp/requires-1.f90: Likewise.
* gfortran.dg/gomp/requires-2.f90: Likewise.
* gfortran.dg/gomp/requires-4.f90: Likewise.
* gfortran.dg/gomp/requires-5.f90: Likewise.
* gfortran.dg/gomp/requires-6.f90: Likewise.
* gfortran.dg/gomp/requires-7.f90: Likewise.

Diff:
---
 gcc/testsuite/c-c++-common/gomp/lastprivate-conditional-1.c | 2 --
 gcc/testsuite/c-c++-common/gomp/requires-1.c| 2 --
 gcc/testsuite/c-c++-common/gomp/requires-2.c| 2 --
 gcc/testsuite/c-c++-common/gomp/reverse-offload-1.c | 2 --
 gcc/testsuite/g++.dg/gomp/requires-1.C  | 2 --
 gcc/testsuite/gfortran.dg/gomp/requires-1.f90   | 2 --
 gcc/testsuite/gfortran.dg/gomp/requires-2.f90   | 2 --
 gcc/testsuite/gfortran.dg/gomp/requires-4.f90   | 1 -
 gcc/testsuite/gfortran.dg/gomp/requires-5.f90   | 2 --
 gcc/testsuite/gfortran.dg/gomp/requires-6.f90   | 2 --
 gcc/testsuite/gfortran.dg/gomp/requires-7.f90   | 1 -
 11 files changed, 20 deletions(-)

diff --git a/gcc/testsuite/c-c++-common/gomp/lastprivate-conditional-1.c 
b/gcc/testsuite/c-c++-common/gomp/lastprivate-conditional-1.c
index 722aba79a52..d4ef49690e8 100644
--- a/gcc/testsuite/c-c++-common/gomp/lastprivate-conditional-1.c
+++ b/gcc/testsuite/c-c++-common/gomp/lastprivate-conditional-1.c
@@ -60,5 +60,3 @@ bar (int *p)
s = u;
   }
 }
-
-/* { dg-prune-output "not supported yet" } */
diff --git a/gcc/testsuite/c-c++-common/gomp/requires-1.c 
b/gcc/testsuite/c-c++-common/gomp/requires-1.c
index e1f2e3a503f..a47ec659566 100644
--- a/gcc/testsuite/c-c++-common/gomp/requires-1.c
+++ b/gcc/testsuite/c-c++-common/gomp/requires-1.c
@@ -10,5 +10,3 @@ foo ()
 
 #pragma omp requires unified_shared_memory unified_address
 #pragma omp requires atomic_default_mem_order(seq_cst)
-
-/* { dg-prune-output "not supported yet" } */
diff --git a/gcc/testsuite/c-c++-common/gomp/requires-2.c 
b/gcc/testsuite/c-c++-common/gomp/requires-2.c
index 717b65caeea..d7430b1b1a4 100644
--- a/gcc/testsuite/c-c++-common/gomp/requires-2.c
+++ b/gcc/testsuite/c-c++-common/gomp/requires-2.c
@@ -6,5 +6,3 @@
 #pragma omp requires dynamic_allocators , dynamic_allocators   /* { dg-error 
"too many 'dynamic_allocators' clauses" } */
 #pragma omp requires atomic_default_mem_order(seq_cst) 
atomic_default_mem_order(seq_cst)   /* { dg-error "too many 
'atomic_default_mem_order' clauses" } */
 #pragma omp requires atomic_default_mem_order (seq_cst)/* { dg-error 
"more than one 'atomic_default_mem_order' clause in a single compilation unit" 
} */
-
-/* { dg-prune-output "not supported yet" } */
diff --git a/gcc/testsuite/c-c++-common/gomp/reverse-offload-1.c 
b/gcc/testsuite/c-c++-common/gomp/reverse-offload-1.c
index 9a3fa5230f8..ddc3c2c6be1 100644
--- a/gcc/testsuite/c-c++-common/gomp/reverse-offload-1.c
+++ b/gcc/testsuite/c-c++-common/gomp/reverse-offload-1.c
@@ -6,8 +6,6 @@
 /* { dg-final { scan-tree-dump-times "__attribute__\\(\\(omp declare 
target\\)\\)\[\n\r\]*int called_in_target2" 1 "omplower" } }  */
 /* { dg-final { scan-tree-dump-times "__attribute__\\(\\(omp declare target, 
omp declare target block\\)\\)\[\n\r\]*void tg_fn" 1 "omplower" } }  */
 
-/* { dg-prune-output "'reverse_offload' clause on 'requires' directive not 
supported yet" } */
-
 #pragma omp requires reverse_offload
 
 extern int add_3 (int);
diff --git a/gcc/testsuite/g++.dg/gomp/requires-1.C 
b/gcc/testsuite/g++.dg/gomp/requires-1.C
index aefeb288dad..5ca5e006da1 100644
--- a/gcc/testsuite/g++.dg/gomp/requires-1.C
+++ b/gcc/testsuite/g++.dg/gomp/requires-1.C
@@ -8,5 +8,3 @@ namespace M {
 #pragma omp requires atomic_default_mem_order(seq_cst)
 }
 }
-
-/* { dg-prune-output "not supported yet" } */
diff --git a/gcc/testsuite/gfortran.dg/gomp/requires-1.f90 
b/gcc/testsuite/gfortran.dg/gomp/requires-1.f90
index b115a654e71..19007834c45 100644
--- a/gcc/testsuite/gfortran.dg/gomp/requires-1.f90
+++ b/gcc/testsuite/gfortran.dg/gomp/requires-1.f90
@@ -9,5 +9,3 @@ subroutine bar
 !$omp requires unified_shared_memory unified_address
 !$omp requires atomic_default_mem_order(seq_cst)
 end
-
-! { dg-prune-output "not yet supported" }
diff --git a/gcc/testsuite/gfortran.dg/gomp/requires-2.f90 
b/gcc/testsu

[gcc r15-868] c++: mark TARGET_EXPRs for function arguments eliding [PR114707]

2024-05-28 Thread Marek Polacek via Gcc-cvs
https://gcc.gnu.org/g:5bc731b83b51910dc7f7cacddb4257a16d62ee38

commit r15-868-g5bc731b83b51910dc7f7cacddb4257a16d62ee38
Author: Marek Polacek 
Date:   Wed May 22 16:28:02 2024 -0400

c++: mark TARGET_EXPRs for function arguments eliding [PR114707]

Coming back to our discussion in
:
TARGET_EXPRs that initialize a function argument are not marked
TARGET_EXPR_ELIDING_P even though gimplify_arg drops such TARGET_EXPRs
on the floor.  To work around it, I added a pset to
replace_placeholders_for_class_temp_r, but it would be best to just rely
on TARGET_EXPR_ELIDING_P.

PR c++/114707

gcc/cp/ChangeLog:

* call.cc (convert_for_arg_passing): Call set_target_expr_eliding.
* typeck2.cc (replace_placeholders_for_class_temp_r): Don't use 
pset.
(digest_nsdmi_init): Call cp_walk_tree_without_duplicates instead of
cp_walk_tree.

Reviewed-by: Jason Merrill 

Diff:
---
 gcc/cp/call.cc|  4 
 gcc/cp/typeck2.cc | 20 
 2 files changed, 8 insertions(+), 16 deletions(-)

diff --git a/gcc/cp/call.cc b/gcc/cp/call.cc
index 886760af699..85536fc25ff 100644
--- a/gcc/cp/call.cc
+++ b/gcc/cp/call.cc
@@ -9437,6 +9437,10 @@ convert_for_arg_passing (tree type, tree val, 
tsubst_flags_t complain)
   if (complain & tf_warning)
 warn_for_address_of_packed_member (type, val);
 
+  /* gimplify_arg elides TARGET_EXPRs that initialize a function argument.  */
+  if (SIMPLE_TARGET_EXPR_P (val))
+set_target_expr_eliding (val);
+
   return val;
 }
 
diff --git a/gcc/cp/typeck2.cc b/gcc/cp/typeck2.cc
index 06bad4d3303..7782f38da43 100644
--- a/gcc/cp/typeck2.cc
+++ b/gcc/cp/typeck2.cc
@@ -1409,16 +1409,14 @@ digest_init_flags (tree type, tree init, int flags, 
tsubst_flags_t complain)
in the context of guaranteed copy elision).  */
 
 static tree
-replace_placeholders_for_class_temp_r (tree *tp, int *, void *data)
+replace_placeholders_for_class_temp_r (tree *tp, int *, void *)
 {
   tree t = *tp;
-  auto pset = static_cast *>(data);
 
   /* We're looking for a TARGET_EXPR nested in the whole expression.  */
   if (TREE_CODE (t) == TARGET_EXPR
   /* That serves as temporary materialization, not an initializer.  */
-  && !TARGET_EXPR_ELIDING_P (t)
-  && !pset->add (t))
+  && !TARGET_EXPR_ELIDING_P (t))
 {
   tree init = TARGET_EXPR_INITIAL (t);
   while (TREE_CODE (init) == COMPOUND_EXPR)
@@ -1433,16 +1431,6 @@ replace_placeholders_for_class_temp_r (tree *tp, int *, 
void *data)
  gcc_checking_assert (!find_placeholders (init));
}
 }
-  /* TARGET_EXPRs initializing function arguments are not marked as eliding,
- even though gimplify_arg drops them on the floor.  Don't go replacing
- placeholders in them.  */
-  else if (TREE_CODE (t) == CALL_EXPR || TREE_CODE (t) == AGGR_INIT_EXPR)
-for (int i = 0; i < call_expr_nargs (t); ++i)
-  {
-   tree arg = get_nth_callarg (t, i);
-   if (TREE_CODE (arg) == TARGET_EXPR && !TARGET_EXPR_ELIDING_P (arg))
- pset->add (arg);
-  }
 
   return NULL_TREE;
 }
@@ -1490,8 +1478,8 @@ digest_nsdmi_init (tree decl, tree init, tsubst_flags_t 
complain)
  temporary materialization does not occur when initializing an object
  from a prvalue of the same type, therefore we must not replace the
  placeholder with a temporary object so that it can be elided.  */
-  hash_set pset;
-  cp_walk_tree (&init, replace_placeholders_for_class_temp_r, &pset, nullptr);
+  cp_walk_tree_without_duplicates (&init, 
replace_placeholders_for_class_temp_r,
+  nullptr);
 
   return init;
 }


[gcc r15-869] Strlen pass should set current range query.

2024-05-28 Thread Andrew Macleod via Gcc-cvs
https://gcc.gnu.org/g:d52b44aa26aa9976c292f4773a08bbaa2fbb

commit r15-869-gd52b44aa26aa9976c292f4773a08bbaa2fbb
Author: Andrew MacLeod 
Date:   Mon May 27 13:20:13 2024 -0400

Strlen pass should set current range query.

The strlen pass currently has a local ranger instance, but when it
invokes SCEV, scev will not be able to access to this ranger.

Enable/disable ranger shoud be used, allowing other components to use
the current range_query.

gcc/
* tree-ssa-strlen.cc (strlen_pass::strlen_pass): Add function
pointer and initialize ptr_qry with current range_query.
(strlen_pass::m_ranger): Remove.
(printf_strlen_execute): Enable and disable ranger.
gcc/testsuite/
* gcc.dg/Wstringop-overflow-10.c: Add truncating warning.

Diff:
---
 gcc/testsuite/gcc.dg/Wstringop-overflow-10.c |  2 +-
 gcc/tree-ssa-strlen.cc   | 10 +-
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/Wstringop-overflow-10.c 
b/gcc/testsuite/gcc.dg/Wstringop-overflow-10.c
index bace08ad5d3..ddc27fc0580 100644
--- a/gcc/testsuite/gcc.dg/Wstringop-overflow-10.c
+++ b/gcc/testsuite/gcc.dg/Wstringop-overflow-10.c
@@ -21,7 +21,7 @@ void
 baz (char *a)
 {
   char b[16] = "abcdefg";
-  __builtin_strncpy (a, b, __builtin_strnlen (b, 7));  /* { dg-bogus 
"specified bound depends on the length of the source argument" } */
+  __builtin_strncpy (a, b, __builtin_strnlen (b, 7));  /* { dg-warning "output 
truncated before terminating nul" } */
 }
 
 void fill (char *);
diff --git a/gcc/tree-ssa-strlen.cc b/gcc/tree-ssa-strlen.cc
index 7596dd80942..c43a2da2836 100644
--- a/gcc/tree-ssa-strlen.cc
+++ b/gcc/tree-ssa-strlen.cc
@@ -235,9 +235,9 @@ get_range (tree val, gimple *stmt, wide_int minmax[2],
 class strlen_pass : public dom_walker
 {
 public:
-  strlen_pass (cdi_direction direction)
+  strlen_pass (function *fun, cdi_direction direction)
 : dom_walker (direction),
-  ptr_qry (&m_ranger),
+  ptr_qry (get_range_query (fun)),
   m_cleanup_cfg (false)
   {
   }
@@ -299,8 +299,6 @@ public:
unsigned HOST_WIDE_INT lenrng[2],
unsigned HOST_WIDE_INT *size, bool *nulterm);
 
-  gimple_ranger m_ranger;
-
   /* A pointer_query object to store information about pointers and
  their targets in.  */
   pointer_query ptr_qry;
@@ -5912,9 +5910,10 @@ printf_strlen_execute (function *fun, bool warn_only)
   ssa_ver_to_stridx.safe_grow_cleared (num_ssa_names, true);
   max_stridx = 1;
 
+  enable_ranger (fun);
   /* String length optimization is implemented as a walk of the dominator
  tree and a forward walk of statements within each block.  */
-  strlen_pass walker (CDI_DOMINATORS);
+  strlen_pass walker (fun, CDI_DOMINATORS);
   walker.walk (ENTRY_BLOCK_PTR_FOR_FN (fun));
 
   if (dump_file && (dump_flags & TDF_DETAILS))
@@ -5939,6 +5938,7 @@ printf_strlen_execute (function *fun, bool warn_only)
   strlen_to_stridx = NULL;
 }
 
+  disable_ranger (fun);
   scev_finalize ();
   loop_optimizer_finalize ();


[gcc r15-870] Do not invoke SCEV if it will use a different range query.

2024-05-28 Thread Andrew Macleod via Gcc-cvs
https://gcc.gnu.org/g:5ada486079d6aa20c64985a20681573f4ac1c86e

commit r15-870-g5ada486079d6aa20c64985a20681573f4ac1c86e
Author: Andrew MacLeod 
Date:   Mon May 27 11:00:57 2024 -0400

Do not invoke SCEV if it will use a different range query.

SCEV always uses the current range_query object.
Ranger's cache uses a global value_query when propagating cache values to
avoid re-invoking ranger during simple vavhe propagations.
when folding a PHI value, SCEV can be invoked, and since it alwys uses
the current range_query object, when ranger is active this causes the
undesired re-invoking of ranger during cache propagation.

This patch checks to see if the fold_using_range specified range_query
object is the same as the one SCEV uses, and does not invoke SCEV if
they do not match.

PR tree-optimization/115221
gcc/
* gimple-range-fold.cc (range_of_ssa_name_with_loop_info): Do
not invoke SCEV is range_query's do not match.
gcc/testsuite/
* gcc.dg/pr115221.c: New.

Diff:
---
 gcc/gimple-range-fold.cc|  6 +-
 gcc/testsuite/gcc.dg/pr115221.c | 29 +
 2 files changed, 34 insertions(+), 1 deletion(-)

diff --git a/gcc/gimple-range-fold.cc b/gcc/gimple-range-fold.cc
index b3965b5ee50..98a4877ba18 100644
--- a/gcc/gimple-range-fold.cc
+++ b/gcc/gimple-range-fold.cc
@@ -1264,7 +1264,11 @@ fold_using_range::range_of_ssa_name_with_loop_info 
(vrange &r, tree name,
fur_source &src)
 {
   gcc_checking_assert (TREE_CODE (name) == SSA_NAME);
-  if (!range_of_var_in_loop (r, name, l, phi, src.query ()))
+  // SCEV currently invokes get_range_query () for values.  If the query
+  // being passed in is not the same SCEV will use, do not invoke SCEV.
+  // This can be remove if/when SCEV uses a passed in range-query.
+  if (src.query () != get_range_query (cfun)
+  || !range_of_var_in_loop (r, name, l, phi, src.query ()))
 r.set_varying (TREE_TYPE (name));
 }
 
diff --git a/gcc/testsuite/gcc.dg/pr115221.c b/gcc/testsuite/gcc.dg/pr115221.c
new file mode 100644
index 000..f139394e5c0
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr115221.c
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+typedef unsigned uint32_t;
+int cde40_t;
+int offset;
+void aal_test_bit();
+uint32_t cde40_key_pol();
+long cde40_offset_check(uint32_t pos) {
+  cde40_key_pol();
+  if (cde40_t)
+return (offset - 2) % (((pos == 3) ? 18 : 26)) != 0;
+  return 0;
+}
+void cde40_check_struct() {
+  uint32_t i, j, to_compare;
+  for (;; i++) {
+cde40_offset_check(i);
+if (to_compare == 0) {
+  if (i && cde40_key_pol())
+   ;
+  to_compare = i;
+  continue;
+}
+j = to_compare;
+for (; j < i; j++)
+  aal_test_bit();
+  }
+}


[gcc r15-871] c++: extend -Wself-move for mem-init-list [PR109396]

2024-05-28 Thread Marek Polacek via Gcc-cvs
https://gcc.gnu.org/g:efaaae49b307fcc7e771518da3edae49f92c19db

commit r15-871-gefaaae49b307fcc7e771518da3edae49f92c19db
Author: Marek Polacek 
Date:   Thu May 23 15:49:42 2024 -0400

c++: extend -Wself-move for mem-init-list [PR109396]

We already warn for:

  x = std::move (x);

which triggers:

  warning: moving 'x' of type 'int' to itself [-Wself-move]

but bug 109396 reports that this doesn't work for a member-initializer-list:

  X() : x(std::move (x))

so this patch amends that.

PR c++/109396

gcc/cp/ChangeLog:

* cp-tree.h (maybe_warn_self_move): Declare.
* init.cc (perform_member_init): Call maybe_warn_self_move.
* typeck.cc (maybe_warn_self_move): No longer static.  Change the
return type to bool.  Also warn when called from
a member-initializer-list.  Drop the inform call.

gcc/testsuite/ChangeLog:

* g++.dg/warn/Wself-move2.C: New test.

Diff:
---
 gcc/cp/cp-tree.h|  1 +
 gcc/cp/init.cc  |  5 +++--
 gcc/cp/typeck.cc| 32 +++-
 gcc/testsuite/g++.dg/warn/Wself-move2.C | 37 +
 4 files changed, 59 insertions(+), 16 deletions(-)

diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h
index 655850a9ab6..6206482c602 100644
--- a/gcc/cp/cp-tree.h
+++ b/gcc/cp/cp-tree.h
@@ -8263,6 +8263,7 @@ extern cp_expr build_c_cast   
(location_t loc, tree type,
 cp_expr expr);
 extern tree cp_build_c_cast(location_t, tree, tree,
 tsubst_flags_t);
+extern bool maybe_warn_self_move   (location_t, tree, tree);
 extern cp_expr build_x_modify_expr (location_t, tree,
 enum tree_code, tree,
 tree, tsubst_flags_t);
diff --git a/gcc/cp/init.cc b/gcc/cp/init.cc
index 52396d87a8c..4a7ed7f5302 100644
--- a/gcc/cp/init.cc
+++ b/gcc/cp/init.cc
@@ -999,7 +999,7 @@ perform_member_init (tree member, tree init, hash_set 
&uninitialized)
   if (decl == error_mark_node)
 return;
 
-  if ((warn_init_self || warn_uninitialized)
+  if ((warn_init_self || warn_uninitialized || warn_self_move)
   && init
   && TREE_CODE (init) == TREE_LIST
   && TREE_CHAIN (init) == NULL_TREE)
@@ -1013,7 +1013,8 @@ perform_member_init (tree member, tree init, 
hash_set &uninitialized)
warning_at (DECL_SOURCE_LOCATION (current_function_decl),
OPT_Winit_self, "%qD is initialized with itself",
member);
-  else
+  else if (!maybe_warn_self_move (input_location, member,
+ TREE_VALUE (init)))
find_uninit_fields (&val, &uninitialized, decl);
 }
 
diff --git a/gcc/cp/typeck.cc b/gcc/cp/typeck.cc
index 4a153a8baf9..1b7a31d32f3 100644
--- a/gcc/cp/typeck.cc
+++ b/gcc/cp/typeck.cc
@@ -9355,27 +9355,27 @@ cp_build_c_cast (location_t loc, tree type, tree expr,
 
 /* Warn when a value is moved to itself with std::move.  LHS is the target,
RHS may be the std::move call, and LOC is the location of the whole
-   assignment.  */
+   assignment.  Return true if we warned.  */
 
-static void
+bool
 maybe_warn_self_move (location_t loc, tree lhs, tree rhs)
 {
   if (!warn_self_move)
-return;
+return false;
 
   /* C++98 doesn't know move.  */
   if (cxx_dialect < cxx11)
-return;
+return false;
 
   if (processing_template_decl)
-return;
+return false;
 
   if (!REFERENCE_REF_P (rhs)
   || TREE_CODE (TREE_OPERAND (rhs, 0)) != CALL_EXPR)
-return;
+return false;
   tree fn = TREE_OPERAND (rhs, 0);
   if (!is_std_move_p (fn))
-return;
+return false;
 
   /* Just a little helper to strip * and various NOPs.  */
   auto extract_op = [] (tree &op) {
@@ -9393,13 +9393,17 @@ maybe_warn_self_move (location_t loc, tree lhs, tree 
rhs)
   tree type = TREE_TYPE (lhs);
   tree orig_lhs = lhs;
   extract_op (lhs);
-  if (cp_tree_equal (lhs, arg))
-{
-  auto_diagnostic_group d;
-  if (warning_at (loc, OPT_Wself_move,
- "moving %qE of type %qT to itself", orig_lhs, type))
-   inform (loc, "remove % call");
-}
+  if (cp_tree_equal (lhs, arg)
+  /* Also warn in a member-initializer-list, as in : i(std::move(i)).  */
+  || (TREE_CODE (lhs) == FIELD_DECL
+ && TREE_CODE (arg) == COMPONENT_REF
+ && cp_tree_equal (TREE_OPERAND (arg, 0), current_class_ref)
+ && TREE_OPERAND (arg, 1) == lhs))
+if (warning_at (loc, OPT_Wself_move,
+   "moving %qE of type %qT to itself", orig_lhs, type))
+  return true;
+
+  return false;
 }
 
 /* For use from the C common bits.  */
diff --git a/gcc/testsuite/g++.dg/warn/W

[gcc r15-872] regenerate-opt-urls.py: fix transposed values for "vax" and "v850"

2024-05-28 Thread David Malcolm via Gcc-cvs
https://gcc.gnu.org/g:7cc529fe514cc64e88208a6a7acfc6fe6419a453

commit r15-872-g7cc529fe514cc64e88208a6a7acfc6fe6419a453
Author: David Malcolm 
Date:   Tue May 28 15:47:38 2024 -0400

regenerate-opt-urls.py: fix transposed values for "vax" and "v850"

gcc/ChangeLog:
* config/v850/v850.opt.urls: Regenerate, with fix.
* config/vax/vax.opt.urls: Likewise.
* regenerate-opt-urls.py (TARGET_SPECIFIC_PAGES): Fix transposed
values for "vax" and "v850".

Signed-off-by: David Malcolm 

Diff:
---
 gcc/config/v850/v850.opt.urls | 81 ---
 gcc/config/vax/vax.opt.urls   | 21 ---
 gcc/regenerate-opt-urls.py|  4 +--
 3 files changed, 73 insertions(+), 33 deletions(-)

diff --git a/gcc/config/v850/v850.opt.urls b/gcc/config/v850/v850.opt.urls
index dc5a83107b3..a06f4833f47 100644
--- a/gcc/config/v850/v850.opt.urls
+++ b/gcc/config/v850/v850.opt.urls
@@ -1,60 +1,87 @@
 ; Autogenerated by regenerate-opt-urls.py from gcc/config/v850/v850.opt and 
generated HTML
 
-; skipping UrlSuffix for 'mapp-regs' due to finding no URLs
+mapp-regs
+UrlSuffix(gcc/V850-Options.html#index-mapp-regs-1)
 
-; skipping UrlSuffix for 'mbig-switch' due to finding no URLs
+mbig-switch
+UrlSuffix(gcc/V850-Options.html#index-mbig-switch-1)
 
 ; skipping UrlSuffix for 'mdebug' due to finding no URLs
 
-; skipping UrlSuffix for 'mdisable-callt' due to finding no URLs
+mdisable-callt
+UrlSuffix(gcc/V850-Options.html#index-mdisable-callt)
 
-; skipping UrlSuffix for 'mep' due to finding no URLs
+mep
+UrlSuffix(gcc/V850-Options.html#index-mep)
 
-; skipping UrlSuffix for 'mghs' due to finding no URLs
+mghs
+UrlSuffix(gcc/V850-Options.html#index-mghs)
 
-; skipping UrlSuffix for 'mlong-calls' due to finding no URLs
+mlong-calls
+UrlSuffix(gcc/V850-Options.html#index-mlong-calls-7)
 
-; skipping UrlSuffix for 'mprolog-function' due to finding no URLs
+mprolog-function
+UrlSuffix(gcc/V850-Options.html#index-mprolog-function)
 
-; skipping UrlSuffix for 'msda=' due to finding no URLs
+msda=
+UrlSuffix(gcc/V850-Options.html#index-msda)
 
-; skipping UrlSuffix for 'mspace' due to finding no URLs
+mspace
+UrlSuffix(gcc/V850-Options.html#index-mspace)
 
-; skipping UrlSuffix for 'mtda=' due to finding no URLs
+mtda=
+UrlSuffix(gcc/V850-Options.html#index-mtda)
 
 ; skipping UrlSuffix for 'mno-strict-align' due to finding no URLs
 
-; skipping UrlSuffix for 'mv850' due to finding no URLs
+mv850
+UrlSuffix(gcc/V850-Options.html#index-mv850)
 
-; skipping UrlSuffix for 'mv850e' due to finding no URLs
+mv850e
+UrlSuffix(gcc/V850-Options.html#index-mv850e)
 
-; skipping UrlSuffix for 'mv850e1' due to finding no URLs
+mv850e1
+UrlSuffix(gcc/V850-Options.html#index-mv850e1)
 
-; skipping UrlSuffix for 'mv850es' due to finding no URLs
+mv850es
+UrlSuffix(gcc/V850-Options.html#index-mv850es)
 
-; skipping UrlSuffix for 'mv850e2' due to finding no URLs
+mv850e2
+UrlSuffix(gcc/V850-Options.html#index-mv850e2)
 
-; skipping UrlSuffix for 'mv850e2v3' due to finding no URLs
+mv850e2v3
+UrlSuffix(gcc/V850-Options.html#index-mv850e2v3)
 
-; skipping UrlSuffix for 'mv850e3v5' due to finding no URLs
+mv850e3v5
+UrlSuffix(gcc/V850-Options.html#index-mv850e3v5)
 
-; skipping UrlSuffix for 'mv850e2v4' due to finding no URLs
+mv850e2v4
+UrlSuffix(gcc/V850-Options.html#index-mv850e2v4)
 
-; skipping UrlSuffix for 'mloop' due to finding no URLs
+mloop
+UrlSuffix(gcc/V850-Options.html#index-mloop-1)
 
-; skipping UrlSuffix for 'mzda=' due to finding no URLs
+mzda=
+UrlSuffix(gcc/V850-Options.html#index-mzda)
 
-; skipping UrlSuffix for 'mrelax' due to finding no URLs
+mrelax
+UrlSuffix(gcc/V850-Options.html#index-mrelax-9)
 
-; skipping UrlSuffix for 'mlong-jumps' due to finding no URLs
+mlong-jumps
+UrlSuffix(gcc/V850-Options.html#index-mlong-jumps)
 
-; skipping UrlSuffix for 'msoft-float' due to finding no URLs
+msoft-float
+UrlSuffix(gcc/V850-Options.html#index-msoft-float-14)
 
-; skipping UrlSuffix for 'mhard-float' due to finding no URLs
+mhard-float
+UrlSuffix(gcc/V850-Options.html#index-mhard-float-9)
 
-; skipping UrlSuffix for 'mrh850-abi' due to finding no URLs
+mrh850-abi
+UrlSuffix(gcc/V850-Options.html#index-mrh850-abi)
 
-; skipping UrlSuffix for 'mgcc-abi' due to finding no URLs
+mgcc-abi
+UrlSuffix(gcc/V850-Options.html#index-mgcc-abi)
 
-; skipping UrlSuffix for 'm8byte-align' due to finding no URLs
+m8byte-align
+UrlSuffix(gcc/V850-Options.html#index-m8byte-align)
 
diff --git a/gcc/config/vax/vax.opt.urls b/gcc/config/vax/vax.opt.urls
index c6b1c418b61..10bee25d833 100644
--- a/gcc/config/vax/vax.opt.urls
+++ b/gcc/config/vax/vax.opt.urls
@@ -1,10 +1,23 @@
 ; Autogenerated by regenerate-opt-urls.py from gcc/config/vax/vax.opt and 
generated HTML
 
-; skipping UrlSuffix for 'mg' due to finding no URLs
+md
+UrlSuffix(gcc/VAX-Options.html#index-md)
 
-; skipping UrlSuffix for 'mgnu' due to finding no URLs
+md-float
+UrlSuffix(gcc/VAX-Options.html#index-

[gcc r15-873] selftests: split out make_fndecl from selftest.h to its own header

2024-05-28 Thread David Malcolm via Gcc-cvs
https://gcc.gnu.org/g:fb7a943ead689e80e1ffdf6fa4e129dc155312d3

commit r15-873-gfb7a943ead689e80e1ffdf6fa4e129dc155312d3
Author: David Malcolm 
Date:   Tue May 28 15:55:17 2024 -0400

selftests: split out make_fndecl from selftest.h to its own header

Avoid selftest.h requiring the "tree" type.
No functional change intended.

gcc/analyzer/ChangeLog:
* region-model.cc: Include "selftest-tree.h".

gcc/ChangeLog:
* function-tests.cc: Include "selftest-tree.h".
* selftest-tree.h: New file.
* selftest.h (make_fndecl): Move to selftest-tree.h.

Signed-off-by: David Malcolm 

Diff:
---
 gcc/analyzer/region-model.cc |  1 +
 gcc/function-tests.cc|  1 +
 gcc/selftest-tree.h  | 41 +
 gcc/selftest.h   |  7 ---
 4 files changed, 43 insertions(+), 7 deletions(-)

diff --git a/gcc/analyzer/region-model.cc b/gcc/analyzer/region-model.cc
index bebe2ed3cd6..0dd5671db1b 100644
--- a/gcc/analyzer/region-model.cc
+++ b/gcc/analyzer/region-model.cc
@@ -42,6 +42,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "diagnostic-color.h"
 #include "bitmap.h"
 #include "selftest.h"
+#include "selftest-tree.h"
 #include "analyzer/analyzer.h"
 #include "analyzer/analyzer-logging.h"
 #include "ordered-hash-map.h"
diff --git a/gcc/function-tests.cc b/gcc/function-tests.cc
index 827734422d8..ea3d722d4b6 100644
--- a/gcc/function-tests.cc
+++ b/gcc/function-tests.cc
@@ -76,6 +76,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "ipa-ref.h"
 #include "cgraph.h"
 #include "selftest.h"
+#include "selftest-tree.h"
 #include "print-rtl.h"
 
 #if CHECKING_P
diff --git a/gcc/selftest-tree.h b/gcc/selftest-tree.h
new file mode 100644
index 000..9922af3340f
--- /dev/null
+++ b/gcc/selftest-tree.h
@@ -0,0 +1,41 @@
+/* A self-testing framework, for use by -fself-test.
+   Copyright (C) 2015-2024 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+.  */
+
+#ifndef GCC_SELFTEST_TREE_H
+#define GCC_SELFTEST_TREE_H
+
+/* The selftest code should entirely disappear in a production
+   configuration, hence we guard all of it with #if CHECKING_P.  */
+
+#if CHECKING_P
+
+namespace selftest {
+
+/* Helper function for selftests that need a function decl.  */
+
+extern tree make_fndecl (tree return_type,
+const char *name,
+vec  ¶m_types,
+bool is_variadic = false);
+
+} /* end of namespace selftest.  */
+
+#endif /* #if CHECKING_P */
+
+#endif /* GCC_SELFTEST_TREE_H */
diff --git a/gcc/selftest.h b/gcc/selftest.h
index 3bddaf1c322..808d432ec48 100644
--- a/gcc/selftest.h
+++ b/gcc/selftest.h
@@ -178,13 +178,6 @@ class line_table_test
   ~line_table_test ();
 };
 
-/* Helper function for selftests that need a function decl.  */
-
-extern tree make_fndecl (tree return_type,
-const char *name,
-vec  ¶m_types,
-bool is_variadic = false);
-
 /* Run TESTCASE multiple times, once for each case in our test matrix.  */
 
 extern void


[gcc r15-874] libcpp: move label_text to its own header

2024-05-28 Thread David Malcolm via Gcc-cvs
https://gcc.gnu.org/g:9bda2c4c81b668b1d9abbb58cc4e805ac955a639

commit r15-874-g9bda2c4c81b668b1d9abbb58cc4e805ac955a639
Author: David Malcolm 
Date:   Tue May 28 15:55:24 2024 -0400

libcpp: move label_text to its own header

No functional change intended.

libcpp/ChangeLog:
* Makefile.in (TAGS_SOURCES): Add include/label-text.h.
* include/label-text.h: New file.
* include/rich-location.h: Include "label-text.h".
(class label_text): Move to label-text.h.

Signed-off-by: David Malcolm 

Diff:
---
 libcpp/Makefile.in |   2 +-
 libcpp/include/label-text.h| 102 +
 libcpp/include/rich-location.h |  79 +--
 3 files changed, 105 insertions(+), 78 deletions(-)

diff --git a/libcpp/Makefile.in b/libcpp/Makefile.in
index ebbca3f..7e47153264c 100644
--- a/libcpp/Makefile.in
+++ b/libcpp/Makefile.in
@@ -271,7 +271,7 @@ ETAGS = @ETAGS@
 
 TAGS_SOURCES = $(libcpp_a_SOURCES) internal.h system.h ucnid.h \
 include/cpplib.h include/line-map.h include/mkdeps.h include/symtab.h \
-include/rich-location.h
+include/rich-location.h include/label-text.h
 
 
 TAGS: $(TAGS_SOURCES)
diff --git a/libcpp/include/label-text.h b/libcpp/include/label-text.h
new file mode 100644
index 000..13562cda41f
--- /dev/null
+++ b/libcpp/include/label-text.h
@@ -0,0 +1,102 @@
+/* A very simple string class.
+   Copyright (C) 2015-2024 Free Software Foundation, Inc.
+
+This program is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; see the file COPYING3.  If not see
+.
+
+ In other words, you are welcome to use, share and improve this program.
+ You are forbidden to forbid anyone else to use, share and improve
+ what you give them.   Help stamp out software-hoarding!  */
+
+#ifndef LIBCPP_LABEL_TEXT_H
+#define LIBCPP_LABEL_TEXT_H
+
+/* A struct for the result of range_label::get_text: a NUL-terminated buffer
+   of localized text, and a flag to determine if the caller should "free" the
+   buffer.  */
+
+class label_text
+{
+public:
+  label_text ()
+  : m_buffer (NULL), m_owned (false)
+  {}
+
+  ~label_text ()
+  {
+if (m_owned)
+  free (m_buffer);
+  }
+
+  /* Move ctor.  */
+  label_text (label_text &&other)
+  : m_buffer (other.m_buffer), m_owned (other.m_owned)
+  {
+other.release ();
+  }
+
+  /* Move assignment.  */
+  label_text & operator= (label_text &&other)
+  {
+if (m_owned)
+  free (m_buffer);
+m_buffer = other.m_buffer;
+m_owned = other.m_owned;
+other.release ();
+return *this;
+  }
+
+  /* Delete the copy ctor and copy-assignment operator.  */
+  label_text (const label_text &) = delete;
+  label_text & operator= (const label_text &) = delete;
+
+  /* Create a label_text instance that borrows BUFFER from a
+ longer-lived owner.  */
+  static label_text borrow (const char *buffer)
+  {
+return label_text (const_cast  (buffer), false);
+  }
+
+  /* Create a label_text instance that takes ownership of BUFFER.  */
+  static label_text take (char *buffer)
+  {
+return label_text (buffer, true);
+  }
+
+  void release ()
+  {
+m_buffer = NULL;
+m_owned = false;
+  }
+
+  const char *get () const
+  {
+return m_buffer;
+  }
+
+  bool is_owner () const
+  {
+return m_owned;
+  }
+
+private:
+  char *m_buffer;
+  bool m_owned;
+
+  label_text (char *buffer, bool owned)
+  : m_buffer (buffer), m_owned (owned)
+  {}
+};
+
+#endif /* !LIBCPP_LABEL_TEXT_H  */
diff --git a/libcpp/include/rich-location.h b/libcpp/include/rich-location.h
index a2ece8b033c..be424cb4b65 100644
--- a/libcpp/include/rich-location.h
+++ b/libcpp/include/rich-location.h
@@ -22,6 +22,8 @@ along with this program; see the file COPYING3.  If not see
 #ifndef LIBCPP_RICH_LOCATION_H
 #define LIBCPP_RICH_LOCATION_H
 
+#include "label-text.h"
+
 class range_label;
 class label_effects;
 
@@ -541,83 +543,6 @@ protected:
   const diagnostic_path *m_path;
 };
 
-/* A struct for the result of range_label::get_text: a NUL-terminated buffer
-   of localized text, and a flag to determine if the caller should "free" the
-   buffer.  */
-
-class label_text
-{
-public:
-  label_text ()
-  : m_buffer (NULL), m_owned (false)
-  {}
-
-  ~label_text ()
-  {
-if (m_owned)
-  free (m_buffer);
-  }
-
-  /* Move ctor.  */
-  label_text (label_text &&other)
-  : m_buffer (other.m_buffer), m_owned (other.m_owned)
-  {
-o

[gcc r15-875] diagnostics: consolidate global state in diagnostic-color.cc

2024-05-28 Thread David Malcolm via Gcc-cvs
https://gcc.gnu.org/g:21fc89bac61983a869c066f7377f8280a6adca49

commit r15-875-g21fc89bac61983a869c066f7377f8280a6adca49
Author: David Malcolm 
Date:   Tue May 28 15:55:28 2024 -0400

diagnostics: consolidate global state in diagnostic-color.cc

Simplify the table of default colors, avoiding the need to manually
add the strlen of each entry.
Consolidate the global state in diagnostic-color.cc into a
g_color_dict, adding selftests for the new class diagnostic_color_dict.

No functional change intended.

gcc/ChangeLog:
* diagnostic-color.cc: Define INCLUDE_VECTOR.
Include "label-text.h" and "selftest.h".
(struct color_cap): Replace with...
(struct color_default): ...this, adding "m_" prefixes to fields
and dropping "name_len" and "free_val" field.
(color_dict): Convert to...
(gcc_color_defaults): ...this, making const, dropping the trailing
strlen and "false" from each entry.
(class diagnostic_color_dict): New.
(g_color_dict): New.
(colorize_start): Reimplement in terms of g_color_dict.
(diagnostic_color_dict::get_entry_by_name): New, based on
colorize_start.
(diagnostic_color_dict::get_start_by_name): Likewise.
(diagnostic_color_dict::diagnostic_color_dict): New.
(parse_gcc_colors): Reimplement, moving body...
(diagnostic_color_dict::parse_envvar_value): ...here.
(colorize_init): Lazily create g_color_dict.
(selftest::test_empty_color_dict): New.
(selftest::test_default_color_dict): New.
(selftest::test_color_dict_envvar_parsing): New.
(selftest::diagnostic_color_cc_tests): New.
* selftest-run-tests.cc (selftest::run_tests): Call
selftest::diagnostic_color_cc_tests.
* selftest.h (selftest::diagnostic_color_cc_tests): New decl.

Signed-off-by: David Malcolm 

Diff:
---
 gcc/diagnostic-color.cc   | 277 +++---
 gcc/selftest-run-tests.cc |   1 +
 gcc/selftest.h|   1 +
 3 files changed, 216 insertions(+), 63 deletions(-)

diff --git a/gcc/diagnostic-color.cc b/gcc/diagnostic-color.cc
index f01a0fc2e37..cbe57ce763f 100644
--- a/gcc/diagnostic-color.cc
+++ b/gcc/diagnostic-color.cc
@@ -17,9 +17,11 @@
02110-1301, USA.  */
 
 #include "config.h"
+#define INCLUDE_VECTOR
 #include "system.h"
 #include "diagnostic-color.h"
 #include "diagnostic-url.h"
+#include "label-text.h"
 
 #ifdef __MINGW32__
 #  define WIN32_LEAN_AND_MEAN
@@ -27,6 +29,7 @@
 #endif
 
 #include "color-macros.h"
+#include "selftest.h"
 
 /* The context and logic for choosing default --color screen attributes
(foreground and background colors, etc.) are the following.
@@ -72,56 +75,124 @@
 counterparts) and possibly bold blue.  */
 /* Default colors. The user can overwrite them using environment
variable GCC_COLORS.  */
-struct color_cap
+struct color_default
 {
-  const char *name;
-  const char *val;
-  unsigned char name_len;
-  bool free_val;
+  const char *m_name;
+  const char *m_val;
 };
 
 /* For GCC_COLORS.  */
-static struct color_cap color_dict[] =
+static const color_default gcc_color_defaults[] =
 {
-  { "error", SGR_SEQ (COLOR_BOLD COLOR_SEPARATOR COLOR_FG_RED), 5, false },
-  { "warning", SGR_SEQ (COLOR_BOLD COLOR_SEPARATOR COLOR_FG_MAGENTA),
-  7, false },
-  { "note", SGR_SEQ (COLOR_BOLD COLOR_SEPARATOR COLOR_FG_CYAN), 4, false },
-  { "range1", SGR_SEQ (COLOR_FG_GREEN), 6, false },
-  { "range2", SGR_SEQ (COLOR_FG_BLUE), 6, false },
-  { "locus", SGR_SEQ (COLOR_BOLD), 5, false },
-  { "quote", SGR_SEQ (COLOR_BOLD), 5, false },
-  { "path", SGR_SEQ (COLOR_BOLD COLOR_SEPARATOR COLOR_FG_CYAN), 4, false },
-  { "fnname", SGR_SEQ (COLOR_BOLD COLOR_SEPARATOR COLOR_FG_GREEN), 6, false },
-  { "targs", SGR_SEQ (COLOR_FG_MAGENTA), 5, false },
-  { "fixit-insert", SGR_SEQ (COLOR_FG_GREEN), 12, false },
-  { "fixit-delete", SGR_SEQ (COLOR_FG_RED), 12, false },
-  { "diff-filename", SGR_SEQ (COLOR_BOLD), 13, false },
-  { "diff-hunk", SGR_SEQ (COLOR_FG_CYAN), 9, false },
-  { "diff-delete", SGR_SEQ (COLOR_FG_RED), 11, false },
-  { "diff-insert", SGR_SEQ (COLOR_FG_GREEN), 11, false },
-  { "type-diff", SGR_SEQ (COLOR_BOLD COLOR_SEPARATOR COLOR_FG_GREEN), 9, false 
},
-  { "valid", SGR_SEQ (COLOR_BOLD COLOR_SEPARATOR COLOR_FG_GREEN), 5, false },
-  { "invalid", SGR_SEQ (COLOR_BOLD COLOR_SEPARATOR COLOR_FG_RED), 7, false },
-  { NULL, NULL, 0, false }
+  { "error", SGR_SEQ (COLOR_BOLD COLOR_SEPARATOR COLOR_FG_RED) },
+  { "warning", SGR_SEQ (COLOR_BOLD COLOR_SEPARATOR COLOR_FG_MAGENTA) },
+  { "note", SGR_SEQ (COLOR_BOLD COLOR_SEPARATOR COLOR_FG_CYAN) },
+  { "range1", SGR_SEQ (COLOR_FG_GREEN) },
+  { "range2", SGR_SEQ (COLOR_FG_BLUE) },
+  { "locus", SGR_SEQ (COLOR_BOLD) },
+  { "quote", SGR_SEQ (COLOR_BOLD) },
+  { "path

[gcc r15-876] i386: Improve access to _Atomic DImode location via XMM regs for SSE4.1 x86_32 targets

2024-05-28 Thread Uros Bizjak via Gcc-cvs
https://gcc.gnu.org/g:91d79053f2b416cb9e97d9c0c3fb5b73075289e6

commit r15-876-g91d79053f2b416cb9e97d9c0c3fb5b73075289e6
Author: Uros Bizjak 
Date:   Tue May 28 20:25:14 2024 +0200

i386: Improve access to _Atomic DImode location via XMM regs for SSE4.1 
x86_32 targets

Use MOVD/PEXTRD and MOVD/PINSRD insn sequences to move DImode value
between XMM and GPR register sets for SSE4.1 x86_32 targets in order
to avoid spilling the value to stack.

The load from _Atomic location a improves from:

movqa, %xmm0
movq%xmm0, (%esp)
movl(%esp), %eax
movl4(%esp), %edx

to:
movqa, %xmm0
movd%xmm0, %eax
pextrd  $1, %xmm0, %edx

The store to _Atomic location b improves from:

movl%eax, (%esp)
movl%edx, 4(%esp)
movq(%esp), %xmm0
movq%xmm0, b

to:
movd%eax, %xmm0
pinsrd  $1, %edx, %xmm0
movq%xmm0, b

gcc/ChangeLog:

* config/i386/sync.md (atomic_loaddi_fpu): Use movd/pextrd
to move DImode value from XMM to GPR for TARGET_SSE4_1.
(atomic_storedi_fpu): Use movd/pinsrd to move DImode value
from GPR to XMM for TARGET_SSE4_1.

Diff:
---
 gcc/config/i386/sync.md | 36 
 1 file changed, 28 insertions(+), 8 deletions(-)

diff --git a/gcc/config/i386/sync.md b/gcc/config/i386/sync.md
index 8317581ebe2..f2b3ba0aa7a 100644
--- a/gcc/config/i386/sync.md
+++ b/gcc/config/i386/sync.md
@@ -215,8 +215,18 @@
}
   else
{
+ rtx tmpdi = gen_lowpart (DImode, tmp);
+
  emit_insn (gen_loaddi_via_sse (tmp, src));
- emit_insn (gen_storedi_via_sse (mem, tmp));
+
+ if (GENERAL_REG_P (dst)
+ && TARGET_SSE4_1 && TARGET_INTER_UNIT_MOVES_FROM_VEC)
+   {
+ emit_move_insn (dst, tmpdi);
+ DONE;
+   }
+ else
+   emit_move_insn (mem, tmpdi);
}
 
   if (mem != dst)
@@ -294,20 +304,30 @@
 emit_move_insn (dst, src);
   else
 {
-  if (REG_P (src))
-   {
- emit_move_insn (mem, src);
- src = mem;
-   }
-
   if (STACK_REG_P (tmp))
{
+ if (GENERAL_REG_P (src))
+   {
+ emit_move_insn (mem, src);
+ src = mem;
+   }
+
  emit_insn (gen_loaddi_via_fpu (tmp, src));
  emit_insn (gen_storedi_via_fpu (dst, tmp));
}
   else
{
- emit_insn (gen_loaddi_via_sse (tmp, src));
+ rtx tmpdi = gen_lowpart (DImode, tmp);
+
+ if (GENERAL_REG_P (src)
+ && !(TARGET_SSE4_1 && TARGET_INTER_UNIT_MOVES_TO_VEC))
+   {
+ emit_move_insn (mem, src);
+ src = mem;
+   }
+
+ emit_move_insn (tmpdi, src);
+
  emit_insn (gen_storedi_via_sse (dst, tmp));
}
 }


[gcc r15-877] resource.cc (mark_target_live_regs): Don't look past target insn, PR115182

2024-05-28 Thread Hans-Peter Nilsson via Gcc-cvs
https://gcc.gnu.org/g:84b4ed45ea81ed5c4fb656a17846b26071c23e7d

commit r15-877-g84b4ed45ea81ed5c4fb656a17846b26071c23e7d
Author: Hans-Peter Nilsson 
Date:   Tue May 28 23:15:57 2024 +0200

resource.cc (mark_target_live_regs): Don't look past target insn, PR115182

The PR115182 regression is that a delay-slot for a conditional branch,
is no longer filled with an insn that has been "sunk" because of
r15-518-g99b1daae18c095, for cris-elf w. -O2 -march=v10.

There are still sufficient "nearby" dependency-less insns that the
delay-slot shouldn't be empty.  In particular there's one candidate in
the loop, right after an off-ramp branch, off the loop: a move from
$r9 to $r3.

beq .L2
nop
move.d $r9,$r3

But, the resource.cc data-flow-analysis incorrectly says it collides
with registers "live" at that .L2 off-ramp.  The off-ramp insns
(inlined from simple_rand) look like this (left-to-right direction):

.L2:
move.d $r12,[_seed.0]
move.d $r13,[_seed.0+4]
ret
movem [$sp+],$r8

So, a store of a long long to _seed, a return instruction and a
restoring multi-register-load of r0..r8 (all callee-saved registers)
in the delay-slot of the return insn.  The return-value is kept in
$r10,$r11 so in total $r10..$r13 live plus the stack-pointer and
return-address registers.  But, mark_target_live_regs says that
$r0..$r8 are also live because it *includes the registers live for the
return instruction*!  While they "come alive" after the movem, they
certainly aren't live at the "off-ramp" .L2 label.

The problem is in mark_target_live_regs: it consults a hash-table
indexed by insn uid, where it tracks the currently live registers with
a "generation" count to handle when it moves around insn, filling
delay-slots.  As a fall-back, it starts with registers live at the
start of each basic block, calculated by the comparatively modern df
machinery (except that it can fail finding out which basic block an
insn belongs to, at which times it includes all registers film at 11),
and tracks the semantics of insns up to each insn.

You'd think that's all that should be done, but then for some reason
it *also* looks at insns *after the target insn* up to a few branches,
and includes that in the set of live registers!  This is the code in
mark_target_live_regs that starts with the call to
find_dead_or_set_registers.  I couldn't make sense of it, so I looked
at its history, and I think I found the cause; it's a thinko or
possibly two thinkos.  The original implementation, gcc-git-described
as r0-97-g9c7e297806a27f, later moved from reorg.c to resource.c in
r0-20470-gca545bb569b756.

I believe the "extra" lookup was intended to counter flaws in the
reorg.c/resource.c register liveness analysis; to inspect insns along
the execution paths to exclude registers that, when looking at
subsequent insns, weren't live.  That guess is backed by a sentence in
the updated (i.e. deleted) part of the function head comment for
mark_target_live_regs: "Next, scan forward from TARGET looking for
things set or clobbered before they are used.  These are not live."
To me that sounds like flawed register-liveness data.

An epilogue expanded as RTX (i.e. not just assembly code emitted as
text) is introduced in basepoints/gcc-0-1334-gbdac5f5848fb, so before
that time, nobody would notice that saved registers were included as
live registers in delay-slots in "next-to-last" basic blocks.

Then in r0-24783-g96e9c98d59cc40, the intersection ("and") was changed
to a union ("or"), i.e. it added to the set of live registers instead
of thinning it out.  In the gcc-patches archives, I see the patch
submission doesn't offer a C test-case and only has RTX snippets
(apparently for SPARC).  The message does admit that the change goes
"against what the comments in the code say":
https://gcc.gnu.org/pipermail/gcc-patches/1999-November/021836.html
It looks like this was related to a bug with register liveness info
messed up when moving a "delay-slotted" insn from one slot to another.
But, I can't help but thinking it's just papering over a register
liveness bug elsewhere.

I think, with a reliable "DF_LR_IN", the whole thing *after* tracking
from start-of-bb up to the target insn should be removed; thus.

This patch also removes the now-unused find_dead_or_set_registers
function.

At r15-518, it fixes the issue for CRIS and improves coremark scores
at -O2 -march=v10 a tiny bit (about 0.05%).

PR rtl-optimization/115182
* resource.cc (mark_target_live_regs): Don't look for
unconditional branches after the target to improve on the
register

[gcc r15-878] resource.cc: Replace calls to find_basic_block with cfgrtl BLOCK_FOR_INSN

2024-05-28 Thread Hans-Peter Nilsson via Gcc-cvs
https://gcc.gnu.org/g:933ab59c59bdc1ac9e3ca3a56527836564e1821b

commit r15-878-g933ab59c59bdc1ac9e3ca3a56527836564e1821b
Author: Hans-Peter Nilsson 
Date:   Tue May 28 23:16:48 2024 +0200

resource.cc: Replace calls to find_basic_block with cfgrtl BLOCK_FOR_INSN

...and call compute_bb_for_insn in init_resource_info and
free_bb_for_insn in free_resource_info.

I put a gcc_unreachable in that else-clause for a failing
find_basic_block in mark_target_live_regs after the comment that says:

/* We didn't find the start of a basic block.  Assume everything
   in use.  This should happen only extremely rarely.  */
SET_HARD_REG_SET (res->regs);

and found that it fails not extremely rarely but extremely early in
the build (compiling libgcc).

That kind of pessimization leads to suboptimal delay-slot-filling.
Instead, do like many machine_dependent_reorg passes and call
compute_bb_for_insn as part of resource.cc initialization.

After this patch, there's a whole "if (b != -1)" conditional that's
dominated by a gcc_assert (b != -1).  I separated that, as it's a NFC
whitespace patch that hampers patch readability.

Altogether this improved coremark performance for CRIS at -O2
-march=v10 by 0.36%.

* resource.cc: Include cfgrtl.h.  Use BLOCK_FOR_INSN (insn)->index
instead of calling find_basic_block (insn).  Assert for not -1.
(find_basic_block): Remove function.
(init_resource_info): Call compute_bb_for_insn.
(free_resource_info): Call free_bb_for_insn.

Diff:
---
 gcc/resource.cc | 66 +
 1 file changed, 10 insertions(+), 56 deletions(-)

diff --git a/gcc/resource.cc b/gcc/resource.cc
index 06fcfd3e44c..0d8cde93570 100644
--- a/gcc/resource.cc
+++ b/gcc/resource.cc
@@ -28,6 +28,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "tm_p.h"
 #include "regs.h"
 #include "emit-rtl.h"
+#include "cfgrtl.h"
 #include "resource.h"
 #include "insn-attr.h"
 #include "function-abi.h"
@@ -75,7 +76,6 @@ static HARD_REG_SET current_live_regs;
 static HARD_REG_SET pending_dead_regs;
 
 static void update_live_status (rtx, const_rtx, void *);
-static int find_basic_block (rtx_insn *, int);
 static rtx_insn *next_insn_no_annul (rtx_insn *);
 
 /* Utility function called from mark_target_live_regs via note_stores.
@@ -113,46 +113,6 @@ update_live_status (rtx dest, const_rtx x, void *data 
ATTRIBUTE_UNUSED)
CLEAR_HARD_REG_BIT (pending_dead_regs, i);
   }
 }
-
-/* Find the number of the basic block with correct live register
-   information that starts closest to INSN.  Return -1 if we couldn't
-   find such a basic block or the beginning is more than
-   SEARCH_LIMIT instructions before INSN.  Use SEARCH_LIMIT = -1 for
-   an unlimited search.
-
-   The delay slot filling code destroys the control-flow graph so,
-   instead of finding the basic block containing INSN, we search
-   backwards toward a BARRIER where the live register information is
-   correct.  */
-
-static int
-find_basic_block (rtx_insn *insn, int search_limit)
-{
-  /* Scan backwards to the previous BARRIER.  Then see if we can find a
- label that starts a basic block.  Return the basic block number.  */
-  for (insn = prev_nonnote_insn (insn);
-   insn && !BARRIER_P (insn) && search_limit != 0;
-   insn = prev_nonnote_insn (insn), --search_limit)
-;
-
-  /* The closest BARRIER is too far away.  */
-  if (search_limit == 0)
-return -1;
-
-  /* The start of the function.  */
-  else if (insn == 0)
-return ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb->index;
-
-  /* See if any of the upcoming CODE_LABELs start a basic block.  If we reach
- anything other than a CODE_LABEL or note, we can't find this code.  */
-  for (insn = next_nonnote_insn (insn);
-   insn && LABEL_P (insn);
-   insn = next_nonnote_insn (insn))
-if (BLOCK_FOR_INSN (insn))
-  return BLOCK_FOR_INSN (insn)->index;
-
-  return -1;
-}
 
 /* Similar to next_insn, but ignores insns in the delay slots of
an annulled branch.  */
@@ -714,7 +674,8 @@ mark_target_live_regs (rtx_insn *insns, rtx 
target_maybe_return, struct resource
 }
 
   if (b == -1)
-b = find_basic_block (target, param_max_delay_slot_live_search);
+b = BLOCK_FOR_INSN (target)->index;
+  gcc_assert (b != -1);
 
   if (target_hash_table != NULL)
 {
@@ -722,7 +683,7 @@ mark_target_live_regs (rtx_insn *insns, rtx 
target_maybe_return, struct resource
{
  /* If the information is up-to-date, use it.  Otherwise, we will
 update it below.  */
- if (b == tinfo->block && b != -1 && tinfo->bb_tick == bb_ticks[b])
+ if (b == tinfo->block && tinfo->bb_tick == bb_ticks[b])
{
  res->regs = tinfo->live_regs;
  return;
@@ -905,7 +866,6 @@ void
 init_resource_info (rtx

[gcc r15-879] resource.cc (mark_target_live_regs): Remove check for bb not found

2024-05-28 Thread Hans-Peter Nilsson via Gcc-cvs
https://gcc.gnu.org/g:e1abce5b6ad8f5aee86ec7729b516d81014db09e

commit r15-879-ge1abce5b6ad8f5aee86ec7729b516d81014db09e
Author: Hans-Peter Nilsson 
Date:   Tue May 28 23:17:31 2024 +0200

resource.cc (mark_target_live_regs): Remove check for bb not found

No functional change.

A "git diff -wb" (ignore whitespace diff) shows that this
commit just removes a "if (b != -1)" after a "gcc_assert (b
!= -1)" and also removes the subsequent "else" clause.

* resource.cc (mark_target_live_regs): Remove redundant check for b
being -1, after gcc_assert.

Diff:
---
 gcc/resource.cc | 270 +++-
 1 file changed, 132 insertions(+), 138 deletions(-)

diff --git a/gcc/resource.cc b/gcc/resource.cc
index 0d8cde93570..62bd46f786e 100644
--- a/gcc/resource.cc
+++ b/gcc/resource.cc
@@ -704,156 +704,150 @@ mark_target_live_regs (rtx_insn *insns, rtx 
target_maybe_return, struct resource
 
   CLEAR_HARD_REG_SET (pending_dead_regs);
 
-  /* If we found a basic block, get the live registers from it and update
- them with anything set or killed between its start and the insn before
- TARGET; this custom life analysis is really about registers so we need
- to use the LR problem.  Otherwise, we must assume everything is live.  */
-  if (b != -1)
+  /* Get the live registers from the basic block and update them with
+ anything set or killed between its start and the insn before
+ TARGET; this custom life analysis is really about registers so we
+ need to use the LR problem.  Otherwise, we must assume everything
+ is live.  */
+  regset regs_live = DF_LR_IN (BASIC_BLOCK_FOR_FN (cfun, b));
+  rtx_insn *start_insn, *stop_insn;
+  df_ref def;
+
+  /* Compute hard regs live at start of block.  */
+  REG_SET_TO_HARD_REG_SET (current_live_regs, regs_live);
+  FOR_EACH_ARTIFICIAL_DEF (def, b)
+if (DF_REF_FLAGS (def) & DF_REF_AT_TOP)
+  SET_HARD_REG_BIT (current_live_regs, DF_REF_REGNO (def));
+
+  /* Get starting and ending insn, handling the case where each might
+ be a SEQUENCE.  */
+  start_insn = (b == ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb->index ?
+   insns : BB_HEAD (BASIC_BLOCK_FOR_FN (cfun, b)));
+  stop_insn = target;
+
+  if (NONJUMP_INSN_P (start_insn)
+  && GET_CODE (PATTERN (start_insn)) == SEQUENCE)
+start_insn = as_a  (PATTERN (start_insn))->insn (0);
+
+  if (NONJUMP_INSN_P (stop_insn)
+  && GET_CODE (PATTERN (stop_insn)) == SEQUENCE)
+stop_insn = next_insn (PREV_INSN (stop_insn));
+
+  for (insn = start_insn; insn != stop_insn;
+   insn = next_insn_no_annul (insn))
 {
-  regset regs_live = DF_LR_IN (BASIC_BLOCK_FOR_FN (cfun, b));
-  rtx_insn *start_insn, *stop_insn;
-  df_ref def;
-
-  /* Compute hard regs live at start of block.  */
-  REG_SET_TO_HARD_REG_SET (current_live_regs, regs_live);
-  FOR_EACH_ARTIFICIAL_DEF (def, b)
-   if (DF_REF_FLAGS (def) & DF_REF_AT_TOP)
- SET_HARD_REG_BIT (current_live_regs, DF_REF_REGNO (def));
-
-  /* Get starting and ending insn, handling the case where each might
-be a SEQUENCE.  */
-  start_insn = (b == ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb->index ?
-   insns : BB_HEAD (BASIC_BLOCK_FOR_FN (cfun, b)));
-  stop_insn = target;
-
-  if (NONJUMP_INSN_P (start_insn)
- && GET_CODE (PATTERN (start_insn)) == SEQUENCE)
-   start_insn = as_a  (PATTERN (start_insn))->insn (0);
-
-  if (NONJUMP_INSN_P (stop_insn)
- && GET_CODE (PATTERN (stop_insn)) == SEQUENCE)
-   stop_insn = next_insn (PREV_INSN (stop_insn));
-
-  for (insn = start_insn; insn != stop_insn;
-  insn = next_insn_no_annul (insn))
+  rtx link;
+  rtx_insn *real_insn = insn;
+  enum rtx_code code = GET_CODE (insn);
+
+  if (DEBUG_INSN_P (insn))
+   continue;
+
+  /* If this insn is from the target of a branch, it isn't going to
+be used in the sequel.  If it is used in both cases, this
+test will not be true.  */
+  if ((code == INSN || code == JUMP_INSN || code == CALL_INSN)
+ && INSN_FROM_TARGET_P (insn))
+   continue;
+
+  /* If this insn is a USE made by update_block, we care about the
+underlying insn.  */
+  if (code == INSN
+ && GET_CODE (PATTERN (insn)) == USE
+ && INSN_P (XEXP (PATTERN (insn), 0)))
+   real_insn = as_a  (XEXP (PATTERN (insn), 0));
+
+  if (CALL_P (real_insn))
{
- rtx link;
- rtx_insn *real_insn = insn;
- enum rtx_code code = GET_CODE (insn);
-
- if (DEBUG_INSN_P (insn))
-   continue;
-
- /* If this insn is from the target of a branch, it isn't going to
-be used in the sequel.  If it is used in both cases, this
-test will not be true.  */
- if ((code == INSN || code == JUMP_INSN || code == CALL_INSN)
- && INSN_FROM_TARGET_P 

[gcc r15-880] resource.cc: Remove redundant conditionals

2024-05-28 Thread Hans-Peter Nilsson via Gcc-cvs
https://gcc.gnu.org/g:802a98d128f9b0eea2432f6511328d14e0bd721b

commit r15-880-g802a98d128f9b0eea2432f6511328d14e0bd721b
Author: Hans-Peter Nilsson 
Date:   Tue May 28 23:18:14 2024 +0200

resource.cc: Remove redundant conditionals

No functional change.

- We always have a target_hash_table and bb_ticks because
init_resource_info is always called.  These conditionals are
an ancient artifact: it's been quite a while since
resource.cc was used elsewhere than exclusively from reorg.cc

- In mark_target_live_regs, get rid of a now-redundant "if
(tinfo != NULL)" conditional and replace an "if (bb)" with a
gcc_assert.

A "git diff -wb" (ignore whitespace diff) is better at
showing the actual changes.

* resource.cc (free_resource_info, clear_hashed_info_for_insn): 
Don't
check for non-null target_hash_table and bb_ticks.
(mark_target_live_regs): Ditto.  Replace check for non-NULL result 
from
BLOCK_FOR_INSN with a call to gcc_assert.  Fold code conditioned on
tinfo != NULL.

Diff:
---
 gcc/resource.cc | 123 
 1 file changed, 52 insertions(+), 71 deletions(-)

diff --git a/gcc/resource.cc b/gcc/resource.cc
index 62bd46f786e..7c1de886432 100644
--- a/gcc/resource.cc
+++ b/gcc/resource.cc
@@ -658,49 +658,42 @@ mark_target_live_regs (rtx_insn *insns, rtx 
target_maybe_return, struct resource
   res->cc = 0;
 
   /* See if we have computed this value already.  */
-  if (target_hash_table != NULL)
-{
-  for (tinfo = target_hash_table[INSN_UID (target) % TARGET_HASH_PRIME];
-  tinfo; tinfo = tinfo->next)
-   if (tinfo->uid == INSN_UID (target))
- break;
-
-  /* Start by getting the basic block number.  If we have saved
-information, we can get it from there unless the insn at the
-start of the basic block has been deleted.  */
-  if (tinfo && tinfo->block != -1
- && ! BB_HEAD (BASIC_BLOCK_FOR_FN (cfun, tinfo->block))->deleted ())
-   b = tinfo->block;
-}
+  for (tinfo = target_hash_table[INSN_UID (target) % TARGET_HASH_PRIME];
+   tinfo; tinfo = tinfo->next)
+if (tinfo->uid == INSN_UID (target))
+  break;
+
+  /* Start by getting the basic block number.  If we have saved
+ information, we can get it from there unless the insn at the
+ start of the basic block has been deleted.  */
+  if (tinfo && tinfo->block != -1
+  && ! BB_HEAD (BASIC_BLOCK_FOR_FN (cfun, tinfo->block))->deleted ())
+b = tinfo->block;
 
   if (b == -1)
 b = BLOCK_FOR_INSN (target)->index;
   gcc_assert (b != -1);
 
-  if (target_hash_table != NULL)
+  if (tinfo)
 {
-  if (tinfo)
+  /* If the information is up-to-date, use it.  Otherwise, we will
+update it below.  */
+  if (b == tinfo->block && tinfo->bb_tick == bb_ticks[b])
{
- /* If the information is up-to-date, use it.  Otherwise, we will
-update it below.  */
- if (b == tinfo->block && tinfo->bb_tick == bb_ticks[b])
-   {
- res->regs = tinfo->live_regs;
- return;
-   }
-   }
-  else
-   {
- /* Allocate a place to put our results and chain it into the
-hash table.  */
- tinfo = XNEW (struct target_info);
- tinfo->uid = INSN_UID (target);
- tinfo->block = b;
- tinfo->next
-   = target_hash_table[INSN_UID (target) % TARGET_HASH_PRIME];
- target_hash_table[INSN_UID (target) % TARGET_HASH_PRIME] = tinfo;
+ res->regs = tinfo->live_regs;
+ return;
}
 }
+  else
+{
+  /* Allocate a place to put our results and chain it into the hash
+table.  */
+  tinfo = XNEW (struct target_info);
+  tinfo->uid = INSN_UID (target);
+  tinfo->block = b;
+  tinfo->next = target_hash_table[INSN_UID (target) % TARGET_HASH_PRIME];
+  target_hash_table[INSN_UID (target) % TARGET_HASH_PRIME] = tinfo;
+}
 
   CLEAR_HARD_REG_SET (pending_dead_regs);
 
@@ -825,13 +818,12 @@ mark_target_live_regs (rtx_insn *insns, rtx 
target_maybe_return, struct resource
 to be live here still are.  The fallthrough edge may have
 left a live register uninitialized.  */
  bb = BLOCK_FOR_INSN (real_insn);
- if (bb)
-   {
- HARD_REG_SET extra_live;
+ gcc_assert (bb);
 
- REG_SET_TO_HARD_REG_SET (extra_live, DF_LR_IN (bb));
- current_live_regs |= extra_live;
-   }
+ HARD_REG_SET extra_live;
+
+ REG_SET_TO_HARD_REG_SET (extra_live, DF_LR_IN (bb));
+ current_live_regs |= extra_live;
}
 
   /* The beginning of the epilogue corresponds to the end of the
@@ -847,10 +839,8 @@ mark_target_live_regs (rtx_insn *insns, rtx 
target_maybe_return, struct resource
 {
   tinfo->block = b;
   tinfo->bb

[gcc(refs/users/meissner/heads/work167-tar)] Add more SPR register debug options.

2024-05-28 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:309a32739e2d075bd62bf6767295acef596fe16c

commit 309a32739e2d075bd62bf6767295acef596fe16c
Author: Michael Meissner 
Date:   Tue May 28 17:23:23 2024 -0400

Add more SPR register debug options.

2024-05-28  Michael Meissner  

gcc/

* config/rs6000/rs6000.cc (rs6000_hard_regno_mode_ok_uncached): Add 
more
debug options for seeing what modes get stored in SPR registers.
* config/rs6000/rs6000.opt (-msispr): New SPR mode debut option.
(-mhispr): Likewise.
(-mqispr): Likewise.): Likewise.
(-msfspr): Likewise.
(-mdfspr): Likewise.

Diff:
---
 gcc/config/rs6000/rs6000.cc  | 30 +++---
 gcc/config/rs6000/rs6000.opt | 20 
 2 files changed, 43 insertions(+), 7 deletions(-)

diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index c6fb978977c..9a7b4cc1138 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -1959,16 +1959,32 @@ rs6000_hard_regno_mode_ok_uncached (int regno, 
machine_mode mode)
if (GET_MODE_CLASS (mode) == MODE_CC)
  return TARGET_CCSPR != 0;
 
-   if (SCALAR_FLOAT_MODE_P (mode))
- return TARGET_FPSPR != 0;
+   switch (mode)
+ {
+ case E_QImode:
+   return (TARGET_INTSPR || TARGET_QISPR);
 
-   if (!SCALAR_INT_MODE_P (mode))
- return false;
+ case E_HImode:
+   return (TARGET_INTSPR || TARGET_HISPR);
 
-   if (TARGET_INTSPR)
- return true;
+ case E_SImode:
+   return (TARGET_INTSPR || TARGET_SISPR || reg_size == 4);
+
+ case E_DImode:
+   return (reg_size == 8);
+
+ case E_SFmode:
+ case E_SDmode:
+   return (TARGET_FPSPR || TARGET_SFSPR);
 
-   return GET_MODE_SIZE (mode) == reg_size;
+ case E_DFmode:
+ case E_DDmode:
+   return (TARGET_FPSPR || TARGET_DFSPR);
+
+ default:
+   break;
+ }
+   return false;
   }
 
 default:
diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt
index 0384b92344f..7a0b52ab6e1 100644
--- a/gcc/config/rs6000/rs6000.opt
+++ b/gcc/config/rs6000/rs6000.opt
@@ -638,6 +638,18 @@ mintspr
 Target Undocumented Var(TARGET_INTSPR) Init(0) Save
 Allow (do not allow) small integers in SPR registers.
 
+msispr
+Target Undocumented Var(TARGET_SISPR) Init(0) Save
+Allow (do not allow) SImode in SPR registers.
+
+mhispr
+Target Undocumented Var(TARGET_HISPR) Init(0) Save
+Allow (do not allow) HImode in SPR registers.
+
+mqispr
+Target Undocumented Var(TARGET_QISPR) Init(0) Save
+Allow (do not allow) QImode in SPR registers.
+
 mccspr
 Target Undocumented Var(TARGET_CCSPR) Init(0) Save
 Allow (do not allow) condition codes in SPR registers.
@@ -646,6 +658,14 @@ mfpspr
 Target Undocumented Var(TARGET_FPSPR) Init(0) Save
 Allow (do not allow) floating point in SPR registers.
 
+msfspr
+Target Undocumented Var(TARGET_SFSPR) Init(0) Save
+Allow (do not allow) SFmode in SPR registers.
+
+mdfspr
+Target Undocumented Var(TARGET_DFSPR) Init(0) Save
+Allow (do not allow) DFmode in SPR registers.
+
 ; Documented parameters
 
 -param=rs6000-vect-unroll-limit=


[gcc(refs/users/meissner/heads/work167-tar)] Update ChangeLog.*

2024-05-28 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:929c37f970cd75190f1b7af30c33e422c2a4952c

commit 929c37f970cd75190f1b7af30c33e422c2a4952c
Author: Michael Meissner 
Date:   Tue May 28 17:36:48 2024 -0400

Update ChangeLog.*

Diff:
---
 gcc/ChangeLog.tar | 16 
 1 file changed, 16 insertions(+)

diff --git a/gcc/ChangeLog.tar b/gcc/ChangeLog.tar
index 3b2a674be1d..34207e707f6 100644
--- a/gcc/ChangeLog.tar
+++ b/gcc/ChangeLog.tar
@@ -1,3 +1,19 @@
+ Branch work167-tar, patch #206 
+
+Add more SPR register debug options.
+
+2024-05-28  Michael Meissner  
+
+gcc/
+
+   * config/rs6000/rs6000.cc (rs6000_hard_regno_mode_ok_uncached): Add more
+   debug options for seeing what modes get stored in SPR registers.
+   * config/rs6000/rs6000.opt (-msispr): New SPR mode debut option.
+   (-mhispr): Likewise.
+   (-mqispr): Likewise.): Likewise.
+   (-msfspr): Likewise.
+   (-mdfspr): Likewise.
+
  Branch work167-tar, patch #205 
 
 Fix test for TAR register.


[gcc r15-881] More tweaks from gimple_outgoing_range changes.

2024-05-28 Thread Andrew Macleod via Gcc-cvs
https://gcc.gnu.org/g:6f36cc2535c11b9406715836daeb87169fa79473

commit r15-881-g6f36cc2535c11b9406715836daeb87169fa79473
Author: Andrew MacLeod 
Date:   Wed May 22 19:27:01 2024 -0400

More tweaks from gimple_outgoing_range changes.

the dom_ranger used for fast vrp no longer needs a local
gimple_outgoing_range object as it is now always available from the
range_query parent class.

The builtin_unreachable code for adjusting globals and removing the
builtin calls during the final VRP pass can now function with just
a range_query object rather than a specific ranger.   This adjusts it to
use the extra methods in the range_query API.
This will now allow removal of builtin_unreachable calls even if there is no
active ranger with dependency info available.

* gimple-range.cc (dom_ranger::dom_ranger): Do not initialize m_out.
(dom_ranger::maybe_push_edge): Use gori () rather than m_out.
* gimple-range.h (dom_ranger::m_out): Remove.
* tree-vrp.cc (remove_unreachable::remove_unreachable): Use a
range-query ranther than a gimple_ranger.
(remove_unreachable::remove): New.
(remove_unreachable::m_ranger): Change to a range_query.
(remove_unreachable::handle_early): If there is no dependency
information, do nothing.
(remove_unreachable::remove_and_update_globals): Do not update
globals if there is no dependecy info to use.

Diff:
---
 gcc/gimple-range.cc |  4 ++--
 gcc/gimple-range.h  |  1 -
 gcc/tree-vrp.cc | 47 +--
 3 files changed, 47 insertions(+), 5 deletions(-)

diff --git a/gcc/gimple-range.cc b/gcc/gimple-range.cc
index 0749c9fa215..711646abb67 100644
--- a/gcc/gimple-range.cc
+++ b/gcc/gimple-range.cc
@@ -922,7 +922,7 @@ assume_query::dump (FILE *f)
 
 // Create a DOM based ranger for use by a DOM walk pass.
 
-dom_ranger::dom_ranger () : m_global (), m_out ()
+dom_ranger::dom_ranger () : m_global ()
 {
   m_freelist.create (0);
   m_freelist.truncate (0);
@@ -1156,7 +1156,7 @@ dom_ranger::maybe_push_edge (edge e, bool edge_0)
 e_cache = m_freelist.pop ();
   else
 e_cache = new ssa_lazy_cache;
-  gori_on_edge (*e_cache, e, this, &m_out);
+  gori_on_edge (*e_cache, e, this, &gori ());
   if (e_cache->empty_p ())
 m_freelist.safe_push (e_cache);
   else
diff --git a/gcc/gimple-range.h b/gcc/gimple-range.h
index 1532951a449..180090bed15 100644
--- a/gcc/gimple-range.h
+++ b/gcc/gimple-range.h
@@ -121,7 +121,6 @@ protected:
   DISABLE_COPY_AND_ASSIGN (dom_ranger);
   void maybe_push_edge (edge e, bool edge_0);
   ssa_cache m_global;
-  gimple_outgoing_range m_out;
   vec m_freelist;
   vec m_e0;
   vec m_e1;
diff --git a/gcc/tree-vrp.cc b/gcc/tree-vrp.cc
index 7d7f9fe2932..1c7b451d8fb 100644
--- a/gcc/tree-vrp.cc
+++ b/gcc/tree-vrp.cc
@@ -85,14 +85,15 @@ along with GCC; see the file COPYING3.  If not see
 
 class remove_unreachable {
 public:
-  remove_unreachable (gimple_ranger &r, bool all) : m_ranger (r), final_p (all)
+  remove_unreachable (range_query &r, bool all) : m_ranger (r), final_p (all)
 { m_list.create (30); }
   ~remove_unreachable () { m_list.release (); }
   void handle_early (gimple *s, edge e);
   void maybe_register (gimple *s);
+  bool remove ();
   bool remove_and_update_globals ();
   vec > m_list;
-  gimple_ranger &m_ranger;
+  range_query &m_ranger;
   bool final_p;
 };
 
@@ -195,6 +196,9 @@ fully_replaceable (tree name, basic_block bb)
 void
 remove_unreachable::handle_early (gimple *s, edge e)
 {
+  // If there is no gori_ssa, there is no early processsing.
+  if (!m_ranger.gori_ssa ())
+return ;
   bool lhs_p = TREE_CODE (gimple_cond_lhs (s)) == SSA_NAME;
   bool rhs_p = TREE_CODE (gimple_cond_rhs (s)) == SSA_NAME;
   // Do not remove __builtin_unreachable if it confers a relation, or
@@ -253,6 +257,41 @@ remove_unreachable::handle_early (gimple *s, edge e)
 }
 }
 
+// Process the edges in the list, change the conditions and removing any
+// dead code feeding those conditions.   This removes the unreachables, but
+// makes no attempt to set globals values.
+
+bool
+remove_unreachable::remove ()
+{
+  if (!final_p || m_list.length () == 0)
+return false;
+
+  bool change = false;
+  unsigned i;
+  for (i = 0; i < m_list.length (); i++)
+{
+  auto eb = m_list[i];
+  basic_block src = BASIC_BLOCK_FOR_FN (cfun, eb.first);
+  basic_block dest = BASIC_BLOCK_FOR_FN (cfun, eb.second);
+  if (!src || !dest)
+   continue;
+  edge e = find_edge (src, dest);
+  gimple *s = gimple_outgoing_range_stmt_p (e->src);
+  gcc_checking_assert (gimple_code (s) == GIMPLE_COND);
+
+  change = true;
+  // Rewrite the condition.
+  if (e->flags & EDGE_TRUE_VALUE)
+   gimple_cond_make_true (as_a (s));
+  else
+   gimple_cond_make_false (as_a (s));
+  update_stmt (s);
+}
+
+  return change;

[gcc r15-882] Reduce cost of MEM (A + imm).

2024-05-28 Thread hongtao Liu via Gcc-cvs
https://gcc.gnu.org/g:1d6199e5f8c1c08083eeb0279f71333234fe14ad

commit r15-882-g1d6199e5f8c1c08083eeb0279f71333234fe14ad
Author: liuhongt 
Date:   Mon Feb 19 13:57:24 2024 +0800

Reduce cost of MEM (A + imm).

For MEM, rtx_cost iterates each subrtx, and adds up the costs,
so for MEM (reg) and MEM (reg + 4), the former costs 5,
the latter costs 9, it is not accurate for x86. Ideally
address_cost should be used, but it reduce cost too much.
So current solution is make constant disp as cheap as possible.

gcc/ChangeLog:

PR target/67325
* config/i386/i386.cc (ix86_rtx_costs): Reduce cost of MEM (A
+ imm) to "cost of MEM (A)" + 1.

gcc/testsuite/ChangeLog:

* gcc.target/i386/pr67325.c: New test.

Diff:
---
 gcc/config/i386/i386.cc | 18 +-
 gcc/testsuite/gcc.target/i386/pr67325.c |  7 +++
 2 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 3e2a3a194f1..85d87b9f778 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -22194,7 +22194,23 @@ ix86_rtx_costs (rtx x, machine_mode mode, int 
outer_code_i, int opno,
   /* An insn that accesses memory is slightly more expensive
  than one that does not.  */
   if (speed)
-*total += 1;
+   {
+ *total += 1;
+ rtx addr = XEXP (x, 0);
+ /* For MEM, rtx_cost iterates each subrtx, and adds up the costs,
+so for MEM (reg) and MEM (reg + 4), the former costs 5,
+the latter costs 9, it is not accurate for x86. Ideally
+address_cost should be used, but it reduce cost too much.
+So current solution is make constant disp as cheap as possible.  */
+ if (GET_CODE (addr) == PLUS
+ && x86_64_immediate_operand (XEXP (addr, 1), Pmode))
+   {
+ *total += 1;
+ *total += rtx_cost (XEXP (addr, 0), Pmode, PLUS, 0, speed);
+ return true;
+   }
+   }
+
   return false;
 
 case ZERO_EXTRACT:
diff --git a/gcc/testsuite/gcc.target/i386/pr67325.c 
b/gcc/testsuite/gcc.target/i386/pr67325.c
new file mode 100644
index 000..c3c1e4c5b4d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr67325.c
@@ -0,0 +1,7 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2" } */
+/* { dg-final { scan-assembler-not "(?:sar|shr)" } } */
+
+int f(long*l){
+  return *l>>32;
+}


[gcc r15-884] rs6000: Don't clobber return value when eh_return called [PR114846]

2024-05-28 Thread Kewen Lin via Gcc-cvs
https://gcc.gnu.org/g:e5fc5d42d25c86ae48178db04ce64d340a834614

commit r15-884-ge5fc5d42d25c86ae48178db04ce64d340a834614
Author: Kewen Lin 
Date:   Tue May 28 21:13:40 2024 -0500

rs6000: Don't clobber return value when eh_return called [PR114846]

As the associated test case in PR114846 shows, currently
with eh_return involved some register restoring for EH
RETURN DATA in epilogue can clobber the one which holding
the return value.  Referring to the existing handlings in
some other targets, this patch makes eh_return expander
call one new define_insn_and_split eh_return_internal which
directly calls rs6000_emit_epilogue with epilogue_type
EPILOGUE_TYPE_EH_RETURN instead of the previous treating
normal return with crtl->calls_eh_return specially.

PR target/114846

gcc/ChangeLog:

* config/rs6000/rs6000-logue.cc (rs6000_emit_epilogue): As
EPILOGUE_TYPE_EH_RETURN would be passed as epilogue_type directly
now, adjust the relevant handlings on it.
* config/rs6000/rs6000.md (eh_return expander): Append by calling
gen_eh_return_internal and emit_barrier.
(eh_return_internal): New define_insn_and_split, call function
rs6000_emit_epilogue with epilogue type EPILOGUE_TYPE_EH_RETURN.

gcc/testsuite/ChangeLog:

* gcc.target/powerpc/pr114846.c: New test.

Diff:
---
 gcc/config/rs6000/rs6000-logue.cc   |  7 +++
 gcc/config/rs6000/rs6000.md | 15 +++
 gcc/testsuite/gcc.target/powerpc/pr114846.c | 20 
 3 files changed, 38 insertions(+), 4 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-logue.cc 
b/gcc/config/rs6000/rs6000-logue.cc
index 60ba15a8bc3..bd5d56ba002 100644
--- a/gcc/config/rs6000/rs6000-logue.cc
+++ b/gcc/config/rs6000/rs6000-logue.cc
@@ -4308,9 +4308,6 @@ rs6000_emit_epilogue (enum epilogue_type epilogue_type)
 
   rs6000_stack_t *info = rs6000_stack_info ();
 
-  if (epilogue_type == EPILOGUE_TYPE_NORMAL && crtl->calls_eh_return)
-epilogue_type = EPILOGUE_TYPE_EH_RETURN;
-
   int strategy = info->savres_strategy;
   bool using_load_multiple = !!(strategy & REST_MULTIPLE);
   bool restoring_GPRs_inline = !!(strategy & REST_INLINE_GPRS);
@@ -4788,7 +4785,9 @@ rs6000_emit_epilogue (enum epilogue_type epilogue_type)
 
   /* In the ELFv2 ABI we need to restore all call-saved CR fields from
  *separate* slots if the routine calls __builtin_eh_return, so
- that they can be independently restored by the unwinder.  */
+ that they can be independently restored by the unwinder.  Since
+ it is for CR fields restoring, it should be done for any epilogue
+ types (not EPILOGUE_TYPE_EH_RETURN specific).  */
   if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
 {
   int i, cr_off = info->ehcr_offset;
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index f035e68ff0f..a5d20594789 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -14274,6 +14274,8 @@
   ""
 {
   emit_insn (gen_eh_set_lr (Pmode, operands[0]));
+  emit_jump_insn (gen_eh_return_internal ());
+  emit_barrier ();
   DONE;
 })
 
@@ -14290,6 +14292,19 @@
   DONE;
 })
 
+(define_insn_and_split "eh_return_internal"
+  [(eh_return)]
+  ""
+  "#"
+  "epilogue_completed"
+  [(const_int 0)]
+{
+  if (!TARGET_SCHED_PROLOG)
+emit_insn (gen_blockage ());
+  rs6000_emit_epilogue (EPILOGUE_TYPE_EH_RETURN);
+  DONE;
+})
+
 (define_insn "prefetch"
   [(prefetch (match_operand 0 "indexed_or_indirect_address" "a")
 (match_operand:SI 1 "const_int_operand" "n")
diff --git a/gcc/testsuite/gcc.target/powerpc/pr114846.c 
b/gcc/testsuite/gcc.target/powerpc/pr114846.c
new file mode 100644
index 000..efe2300b73a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr114846.c
@@ -0,0 +1,20 @@
+/* { dg-do run } */
+/* { dg-require-effective-target builtin_eh_return } */
+
+/* Ensure it runs successfully.  */
+
+__attribute__ ((noipa))
+int f (int *a, long offset, void *handler)
+{
+  if (*a == 5)
+return 5;
+  __builtin_eh_return (offset, handler);
+}
+
+int main ()
+{
+  int t = 5;
+  if (f (&t, 0, 0) != 5)
+__builtin_abort ();
+  return 0;
+}


[gcc r15-885] Gori_on_edge tweaks.

2024-05-28 Thread Andrew Macleod via Gcc-cvs
https://gcc.gnu.org/g:a19f588d0b71a4cbc48b064177de87d3ca46b39f

commit r15-885-ga19f588d0b71a4cbc48b064177de87d3ca46b39f
Author: Andrew MacLeod 
Date:   Wed May 22 19:51:16 2024 -0400

Gori_on_edge tweaks.

FAST_VRP uses a non-ranger gori_on_edge routine which allows an optional
outgoing_edge_range object if one wanted to use switches.  This is now
integrated with the gori () method of a range_query, and is no longer
needed.

* gimple-range-gori.cc (gori_on_edge): Always use static ranges
from the specified range_query.
* gimple-range-gori.h (gori_on_edge): Change prototype.
* gimple-range.cc (dom_ranger::maybe_push_edge): Change arguments
to call.

Diff:
---
 gcc/gimple-range-gori.cc | 20 ++--
 gcc/gimple-range-gori.h  |  6 ++
 gcc/gimple-range.cc  |  2 +-
 3 files changed, 9 insertions(+), 19 deletions(-)

diff --git a/gcc/gimple-range-gori.cc b/gcc/gimple-range-gori.cc
index 0d471b46903..d489aef312c 100644
--- a/gcc/gimple-range-gori.cc
+++ b/gcc/gimple-range-gori.cc
@@ -1625,28 +1625,20 @@ gori_calc_operands (vrange &lhs, gimple *stmt, 
ssa_cache &r, range_query *q)
 }
 
 // Use ssa_cache R as a repository for all outgoing ranges on edge E that
-// can be calculated.  Use OGR if present to establish starting edge ranges,
-// and Q to resolve operand values.  If Q is NULL use the current range
+// can be calculated.  Use Q to establish starting edge ranges anbd to resolve
+// operand values.  If Q is NULL use the current range
 // query available to the system.
 
 bool
-gori_on_edge (ssa_cache &r, edge e, range_query *q, gimple_outgoing_range *ogr)
+gori_on_edge (ssa_cache &r, edge e, range_query *q)
 {
+  if (!q)
+q = get_range_query (cfun);
   // Start with an empty vector
   r.clear ();
   int_range_max lhs;
   // Determine if there is an outgoing edge.
-  gimple *stmt;
-  if (ogr)
-stmt = ogr->edge_range_p (lhs, e);
-  else
-{
-  stmt = gimple_outgoing_range_stmt_p (e->src);
-  if (stmt && is_a (stmt))
-   gcond_edge_range (lhs, e);
-  else
-   stmt = NULL;
-}
+  gimple *stmt = q->gori ().edge_range_p (lhs, e);
   if (!stmt)
 return false;
   gori_calc_operands (lhs, stmt, r, q);
diff --git a/gcc/gimple-range-gori.h b/gcc/gimple-range-gori.h
index 9b4bcd919f5..11019e38471 100644
--- a/gcc/gimple-range-gori.h
+++ b/gcc/gimple-range-gori.h
@@ -213,10 +213,8 @@ private:
 // ssa_cache structure).
 // GORI_NAME_ON_EDGE  is used to simply ask if NAME has a range on edge E
 
-// Fill ssa-cache R with any outgoing ranges on edge E, using OGR and QUERY.
-bool gori_on_edge (class ssa_cache &r, edge e,
-  range_query *query = NULL,
-  gimple_outgoing_range *ogr = NULL);
+// Fill ssa-cache R with any outgoing ranges on edge E, using QUERY.
+bool gori_on_edge (class ssa_cache &r, edge e, range_query *query = NULL);
 
 // Query if NAME has an outgoing range on edge E, and return it in R if so.
 // Note this doesnt use ranger, its a static GORI analysis of the range in
diff --git a/gcc/gimple-range.cc b/gcc/gimple-range.cc
index 711646abb67..be22bb4aa18 100644
--- a/gcc/gimple-range.cc
+++ b/gcc/gimple-range.cc
@@ -1156,7 +1156,7 @@ dom_ranger::maybe_push_edge (edge e, bool edge_0)
 e_cache = m_freelist.pop ();
   else
 e_cache = new ssa_lazy_cache;
-  gori_on_edge (*e_cache, e, this, &gori ());
+  gori_on_edge (*e_cache, e, this);
   if (e_cache->empty_p ())
 m_freelist.safe_push (e_cache);
   else


[gcc r14-10253] Adjust generic loop alignment from 16:11:8 to 16 for Intel processors

2024-05-28 Thread Haochen Jiang via Gcc-cvs
https://gcc.gnu.org/g:80600352d1282f084900ab444f2d4c83986f2ae5

commit r14-10253-g80600352d1282f084900ab444f2d4c83986f2ae5
Author: Haochen Jiang 
Date:   Wed May 29 11:12:37 2024 +0800

Adjust generic loop alignment from 16:11:8 to 16 for Intel processors

Previously, we use 16:11:8 in generic tune for Intel processors, which
lead to cross cache line issue and result in some random performance
penalty in benchmarks with small loops commit to commit.

After changing to always aligning to 16 bytes, it will somehow solve
the issue.

gcc/ChangeLog:

* config/i386/x86-tune-costs.h (generic_cost): Change from
16:11:8 to 16.

Diff:
---
 gcc/config/i386/x86-tune-costs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h
index 65d7d1f7e42..d34b5cc 100644
--- a/gcc/config/i386/x86-tune-costs.h
+++ b/gcc/config/i386/x86-tune-costs.h
@@ -3758,7 +3758,7 @@ struct processor_costs generic_cost = {
   generic_memset,
   COSTS_N_INSNS (4),   /* cond_taken_branch_cost.  */
   COSTS_N_INSNS (2),   /* cond_not_taken_branch_cost.  */
-  "16:11:8",   /* Loop alignment.  */
+  "16",/* Loop alignment.  */
   "16:11:8",   /* Jump alignment.  */
   "0:0:8", /* Label alignment.  */
   "16",/* Func alignment.  */


[gcc r14-10254] Align tight&hot loop without considering max skipping bytes.

2024-05-28 Thread Haochen Jiang via Gcc-cvs
https://gcc.gnu.org/g:b4d4ece0443433cd5c3078cfe03f18429e73b77a

commit r14-10254-gb4d4ece0443433cd5c3078cfe03f18429e73b77a
Author: liuhongt 
Date:   Wed May 29 11:12:51 2024 +0800

Align tight&hot loop without considering max skipping bytes.

When hot loop is small enough to fix into one cacheline, we should align
the loop with ceil_log2 (loop_size) without considering maximum
skipp bytes. It will help code prefetch.

gcc/ChangeLog:

* config/i386/i386.cc (ix86_avoid_jump_mispredicts): Change
gen_pad to gen_max_skip_align.
(ix86_align_loops): New function.
(ix86_reorg): Call ix86_align_loops.
* config/i386/i386.md (pad): Rename to ..
(max_skip_align): .. this, and accept 2 operands for align and
skip.

Diff:
---
 gcc/config/i386/i386.cc | 148 +++-
 gcc/config/i386/i386.md |  10 ++--
 2 files changed, 153 insertions(+), 5 deletions(-)

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index fbd9b4dac2e..984ba37beeb 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -23135,7 +23135,7 @@ ix86_avoid_jump_mispredicts (void)
  if (dump_file)
fprintf (dump_file, "Padding insn %i by %i bytes!\n",
 INSN_UID (insn), padsize);
-  emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
+ emit_insn_before (gen_max_skip_align (GEN_INT (4), GEN_INT 
(padsize)), insn);
}
 }
 }
@@ -23408,6 +23408,150 @@ ix86_split_stlf_stall_load ()
 }
 }
 
+/* When a hot loop can be fit into one cacheline,
+   force align the loop without considering the max skip.  */
+static void
+ix86_align_loops ()
+{
+  basic_block bb;
+
+  /* Don't do this when we don't know cache line size.  */
+  if (ix86_cost->prefetch_block == 0)
+return;
+
+  loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
+  profile_count count_threshold = cfun->cfg->count_max / param_align_threshold;
+  FOR_EACH_BB_FN (bb, cfun)
+{
+  rtx_insn *label = BB_HEAD (bb);
+  bool has_fallthru = 0;
+  edge e;
+  edge_iterator ei;
+
+  if (!LABEL_P (label))
+   continue;
+
+  profile_count fallthru_count = profile_count::zero ();
+  profile_count branch_count = profile_count::zero ();
+
+  FOR_EACH_EDGE (e, ei, bb->preds)
+   {
+ if (e->flags & EDGE_FALLTHRU)
+   has_fallthru = 1, fallthru_count += e->count ();
+ else
+   branch_count += e->count ();
+   }
+
+  if (!fallthru_count.initialized_p () || !branch_count.initialized_p ())
+   continue;
+
+  if (bb->loop_father
+ && bb->loop_father->latch != EXIT_BLOCK_PTR_FOR_FN (cfun)
+ && (has_fallthru
+ ? (!(single_succ_p (bb)
+  && single_succ (bb) == EXIT_BLOCK_PTR_FOR_FN (cfun))
+&& optimize_bb_for_speed_p (bb)
+&& branch_count + fallthru_count > count_threshold
+&& (branch_count > fallthru_count * 
param_align_loop_iterations))
+ /* In case there'no fallthru for the loop.
+Nops inserted won't be executed.  */
+ : (branch_count > count_threshold
+|| (bb->count > bb->prev_bb->count * 10
+&& (bb->prev_bb->count
+<= ENTRY_BLOCK_PTR_FOR_FN (cfun)->count / 2)
+   {
+ rtx_insn* insn, *end_insn;
+ HOST_WIDE_INT size = 0;
+ bool padding_p = true;
+ basic_block tbb = bb;
+ unsigned cond_branch_num = 0;
+ bool detect_tight_loop_p = false;
+
+ for (unsigned int i = 0; i != bb->loop_father->num_nodes;
+  i++, tbb = tbb->next_bb)
+   {
+ /* Only handle continuous cfg layout. */
+ if (bb->loop_father != tbb->loop_father)
+   {
+ padding_p = false;
+ break;
+   }
+
+ FOR_BB_INSNS (tbb, insn)
+   {
+ if (!NONDEBUG_INSN_P (insn))
+   continue;
+ size += ix86_min_insn_size (insn);
+
+ /* We don't know size of inline asm.
+Don't align loop for call.  */
+ if (asm_noperands (PATTERN (insn)) >= 0
+ || CALL_P (insn))
+   {
+ size = -1;
+ break;
+   }
+   }
+
+ if (size == -1 || size > ix86_cost->prefetch_block)
+   {
+ padding_p = false;
+ break;
+   }
+
+ FOR_EACH_EDGE (e, ei, tbb->succs)
+   {
+ /* It could be part of the loop.  */
+ if (e->dest == bb)
+   {
+ detect_tight_loop_p = true;
+ break;
+   }
+   }
+
+ if (

[gcc r15-887] Adjust generic loop alignment from 16:11:8 to 16 for Intel processors

2024-05-28 Thread Haochen Jiang via Gcc-cvs
https://gcc.gnu.org/g:00ed5424b1d4dcccfa187f55205521826794898c

commit r15-887-g00ed5424b1d4dcccfa187f55205521826794898c
Author: Haochen Jiang 
Date:   Wed May 29 11:13:55 2024 +0800

Adjust generic loop alignment from 16:11:8 to 16 for Intel processors

Previously, we use 16:11:8 in generic tune for Intel processors, which
lead to cross cache line issue and result in some random performance
penalty in benchmarks with small loops commit to commit.

After changing to always aligning to 16 bytes, it will somehow solve
the issue.

gcc/ChangeLog:

* config/i386/x86-tune-costs.h (generic_cost): Change from
16:11:8 to 16.

Diff:
---
 gcc/config/i386/x86-tune-costs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h
index 65d7d1f7e42..d34b5cc 100644
--- a/gcc/config/i386/x86-tune-costs.h
+++ b/gcc/config/i386/x86-tune-costs.h
@@ -3758,7 +3758,7 @@ struct processor_costs generic_cost = {
   generic_memset,
   COSTS_N_INSNS (4),   /* cond_taken_branch_cost.  */
   COSTS_N_INSNS (2),   /* cond_not_taken_branch_cost.  */
-  "16:11:8",   /* Loop alignment.  */
+  "16",/* Loop alignment.  */
   "16:11:8",   /* Jump alignment.  */
   "0:0:8", /* Label alignment.  */
   "16",/* Func alignment.  */


[gcc r15-888] Align tight&hot loop without considering max skipping bytes.

2024-05-28 Thread Haochen Jiang via Gcc-cvs
https://gcc.gnu.org/g:b644126237a1aa8599f767a5e0bbada1d7286f44

commit r15-888-gb644126237a1aa8599f767a5e0bbada1d7286f44
Author: liuhongt 
Date:   Wed May 29 11:14:26 2024 +0800

Align tight&hot loop without considering max skipping bytes.

When hot loop is small enough to fix into one cacheline, we should align
the loop with ceil_log2 (loop_size) without considering maximum
skipp bytes. It will help code prefetch.

gcc/ChangeLog:

* config/i386/i386.cc (ix86_avoid_jump_mispredicts): Change
gen_pad to gen_max_skip_align.
(ix86_align_loops): New function.
(ix86_reorg): Call ix86_align_loops.
* config/i386/i386.md (pad): Rename to ..
(max_skip_align): .. this, and accept 2 operands for align and
skip.

Diff:
---
 gcc/config/i386/i386.cc | 148 +++-
 gcc/config/i386/i386.md |  10 ++--
 2 files changed, 153 insertions(+), 5 deletions(-)

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 85d87b9f778..1a0206ab573 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -23146,7 +23146,7 @@ ix86_avoid_jump_mispredicts (void)
  if (dump_file)
fprintf (dump_file, "Padding insn %i by %i bytes!\n",
 INSN_UID (insn), padsize);
-  emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
+ emit_insn_before (gen_max_skip_align (GEN_INT (4), GEN_INT 
(padsize)), insn);
}
 }
 }
@@ -23419,6 +23419,150 @@ ix86_split_stlf_stall_load ()
 }
 }
 
+/* When a hot loop can be fit into one cacheline,
+   force align the loop without considering the max skip.  */
+static void
+ix86_align_loops ()
+{
+  basic_block bb;
+
+  /* Don't do this when we don't know cache line size.  */
+  if (ix86_cost->prefetch_block == 0)
+return;
+
+  loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
+  profile_count count_threshold = cfun->cfg->count_max / param_align_threshold;
+  FOR_EACH_BB_FN (bb, cfun)
+{
+  rtx_insn *label = BB_HEAD (bb);
+  bool has_fallthru = 0;
+  edge e;
+  edge_iterator ei;
+
+  if (!LABEL_P (label))
+   continue;
+
+  profile_count fallthru_count = profile_count::zero ();
+  profile_count branch_count = profile_count::zero ();
+
+  FOR_EACH_EDGE (e, ei, bb->preds)
+   {
+ if (e->flags & EDGE_FALLTHRU)
+   has_fallthru = 1, fallthru_count += e->count ();
+ else
+   branch_count += e->count ();
+   }
+
+  if (!fallthru_count.initialized_p () || !branch_count.initialized_p ())
+   continue;
+
+  if (bb->loop_father
+ && bb->loop_father->latch != EXIT_BLOCK_PTR_FOR_FN (cfun)
+ && (has_fallthru
+ ? (!(single_succ_p (bb)
+  && single_succ (bb) == EXIT_BLOCK_PTR_FOR_FN (cfun))
+&& optimize_bb_for_speed_p (bb)
+&& branch_count + fallthru_count > count_threshold
+&& (branch_count > fallthru_count * 
param_align_loop_iterations))
+ /* In case there'no fallthru for the loop.
+Nops inserted won't be executed.  */
+ : (branch_count > count_threshold
+|| (bb->count > bb->prev_bb->count * 10
+&& (bb->prev_bb->count
+<= ENTRY_BLOCK_PTR_FOR_FN (cfun)->count / 2)
+   {
+ rtx_insn* insn, *end_insn;
+ HOST_WIDE_INT size = 0;
+ bool padding_p = true;
+ basic_block tbb = bb;
+ unsigned cond_branch_num = 0;
+ bool detect_tight_loop_p = false;
+
+ for (unsigned int i = 0; i != bb->loop_father->num_nodes;
+  i++, tbb = tbb->next_bb)
+   {
+ /* Only handle continuous cfg layout. */
+ if (bb->loop_father != tbb->loop_father)
+   {
+ padding_p = false;
+ break;
+   }
+
+ FOR_BB_INSNS (tbb, insn)
+   {
+ if (!NONDEBUG_INSN_P (insn))
+   continue;
+ size += ix86_min_insn_size (insn);
+
+ /* We don't know size of inline asm.
+Don't align loop for call.  */
+ if (asm_noperands (PATTERN (insn)) >= 0
+ || CALL_P (insn))
+   {
+ size = -1;
+ break;
+   }
+   }
+
+ if (size == -1 || size > ix86_cost->prefetch_block)
+   {
+ padding_p = false;
+ break;
+   }
+
+ FOR_EACH_EDGE (e, ei, tbb->succs)
+   {
+ /* It could be part of the loop.  */
+ if (e->dest == bb)
+   {
+ detect_tight_loop_p = true;
+ break;
+   }
+   }
+
+ if (de

[gcc r14-10255] tree-optimization/114921 - _Float16 -> __bf16 isn't noop fixup

2024-05-28 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:9e971c671ded9647beb0a1c5b9430b4e64060862

commit r14-10255-g9e971c671ded9647beb0a1c5b9430b4e64060862
Author: Richard Biener 
Date:   Mon May 6 12:03:09 2024 +0200

tree-optimization/114921 - _Float16 -> __bf16 isn't noop fixup

The following further strengthens the check which convert expressions
we allow to vectorize as simple copy by resorting to
tree_nop_conversion_p on the vector components.

PR tree-optimization/114921
* tree-vect-stmts.cc (vectorizable_assignment): Use
tree_nop_conversion_p to identify converts we can vectorize
with a simple assignment.

(cherry picked from commit d0d6dcc019cd32eebf85d625f56e0f7573938319)

Diff:
---
 gcc/tree-vect-stmts.cc | 19 +++
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index f8d8636b139..21e8fe98e44 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -5955,14 +5955,17 @@ vectorizable_assignment (vec_info *vinfo,
   if (!vectype_in)
 vectype_in = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), slp_node);
 
-  /* We can handle NOP_EXPR conversions that do not change the number
- of elements or the vector size.  */
-  if ((CONVERT_EXPR_CODE_P (code)
-   || code == VIEW_CONVERT_EXPR)
-  && (!vectype_in
- || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
- || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
-  GET_MODE_SIZE (TYPE_MODE (vectype_in)
+  /* We can handle VIEW_CONVERT conversions that do not change the number
+ of elements or the vector size or other conversions when the component
+ types are nop-convertible.  */
+  if (!vectype_in
+  || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
+  || (code == VIEW_CONVERT_EXPR
+ && maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
+  GET_MODE_SIZE (TYPE_MODE (vectype_in
+  || (CONVERT_EXPR_CODE_P (code)
+ && !tree_nop_conversion_p (TREE_TYPE (vectype),
+TREE_TYPE (vectype_in
 return false;
 
   if (VECTOR_BOOLEAN_TYPE_P (vectype) != VECTOR_BOOLEAN_TYPE_P (vectype_in))


[gcc r14-10257] tree-optimization/115149 - VOP live and missing PHIs

2024-05-28 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:90a447677a2abb934b683a012b477e6c52088e35

commit r14-10257-g90a447677a2abb934b683a012b477e6c52088e35
Author: Richard Biener 
Date:   Tue May 21 09:48:04 2024 +0200

tree-optimization/115149 - VOP live and missing PHIs

The following fixes a bug in vop-live get_live_in which was using
NULL to indicate the first processed edge but at the same time
using it for the case the live-in virtual operand cannot be computed.
The following fixes this, avoiding sinking a load to a place where
we'd have to insert virtual PHIs to make the virtual operand SSA
web OK.

PR tree-optimization/115149
* tree-ssa-live.cc (virtual_operand_live::get_live_in):
Explicitly track the first processed edge.

* gcc.dg/pr115149.c: New testcase.

(cherry picked from commit ec9b8bafe20755d13ab9a1b834b5da79ae972c0e)

Diff:
---
 gcc/testsuite/gcc.dg/pr115149.c | 16 
 gcc/tree-ssa-live.cc|  8 ++--
 2 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/pr115149.c b/gcc/testsuite/gcc.dg/pr115149.c
new file mode 100644
index 000..9f6bc97dbe6
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr115149.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fno-inline -fno-tree-vrp -fno-ipa-sra -fno-tree-dce 
-fno-tree-ch" } */
+
+int a, c, e, f, g, h[1], i;
+static int j(int b) { return 0; }
+static void k(int d) {}
+int main()
+{
+  if (h[0])
+while (1) {
+   k(f && j(i && (h[g] = e)));
+   while (a)
+ c ^= 1;
+}
+  return 0;
+}
diff --git a/gcc/tree-ssa-live.cc b/gcc/tree-ssa-live.cc
index d94e94eb3bc..122d8e245dd 100644
--- a/gcc/tree-ssa-live.cc
+++ b/gcc/tree-ssa-live.cc
@@ -1684,14 +1684,18 @@ virtual_operand_live::get_live_in (basic_block bb)
   edge_iterator ei;
   edge e;
   tree livein = NULL_TREE;
+  bool first = true;
   FOR_EACH_EDGE (e, ei, bb->preds)
 if (e->flags & EDGE_DFS_BACK)
   /* We can ignore backedges since if there's a def there it would
 have forced a PHI in the source because it also acts as use
 downstream.  */
   continue;
-else if (!livein)
-  livein = get_live_out (e->src);
+else if (first)
+  {
+   livein = get_live_out (e->src);
+   first = false;
+  }
 else if (get_live_out (e->src) != livein)
   /* When there's no virtual use downstream this indicates a point
 where we'd insert a PHI merging the different live virtual


[gcc r14-10256] tree-optimization/115197 - fix ICE w/ constant in LC PHI and loop distribution

2024-05-28 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:2a1fdd5fd0f6bc02d25da192c8fa6487d93d2d50

commit r14-10256-g2a1fdd5fd0f6bc02d25da192c8fa6487d93d2d50
Author: Richard Biener 
Date:   Thu May 23 14:36:39 2024 +0200

tree-optimization/115197 - fix ICE w/ constant in LC PHI and loop 
distribution

Forgot a check for an SSA name before trying to replace a PHI arg with
its current definition.

PR tree-optimization/115197
* tree-loop-distribution.cc (copy_loop_before): Constant PHI
args remain the same.

* gcc.dg/pr115197.c: New testcase.

(cherry picked from commit 2b2476d4d18c92b8aba3567ebccd2100c2f7c258)

Diff:
---
 gcc/testsuite/gcc.dg/pr115197.c | 14 ++
 gcc/tree-loop-distribution.cc   |  7 +--
 2 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/pr115197.c b/gcc/testsuite/gcc.dg/pr115197.c
new file mode 100644
index 000..00d674b3bd9
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr115197.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O1 -fno-tree-scev-cprop -ftree-pre 
-ftree-loop-distribute-patterns" } */
+
+int a, b[2], c, d, e, f[2];
+int main() {
+  while (a)
+if (d) {
+  if (e)
+return 0;
+  for (; c; c++)
+f[c] = 0 < (b[c] = ~(f[c + 1] < a));
+}
+  return 0;
+}
diff --git a/gcc/tree-loop-distribution.cc b/gcc/tree-loop-distribution.cc
index 45932bae5e7..c5a05ee151d 100644
--- a/gcc/tree-loop-distribution.cc
+++ b/gcc/tree-loop-distribution.cc
@@ -977,8 +977,11 @@ copy_loop_before (class loop *loop, bool 
redirect_lc_phi_defs)
  if (virtual_operand_p (gimple_phi_result (phi)))
continue;
  use_operand_p use_p = PHI_ARG_DEF_PTR_FROM_EDGE (phi, exit);
- tree new_def = get_current_def (USE_FROM_PTR (use_p));
- SET_USE (use_p, new_def);
+ if (TREE_CODE (USE_FROM_PTR (use_p)) == SSA_NAME)
+   {
+ tree new_def = get_current_def (USE_FROM_PTR (use_p));
+ SET_USE (use_p, new_def);
+   }
}
 }


[gcc(refs/users/aoliva/heads/testme)] enable adjustment of return_pc debug attrs

2024-05-28 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:99047b701feb580e047a1b13bdf85a7fd21224e2

commit 99047b701feb580e047a1b13bdf85a7fd21224e2
Author: Alexandre Oliva 
Date:   Wed May 29 02:52:07 2024 -0300

enable adjustment of return_pc debug attrs

This patch introduces infrastructure for targets to add an offset to
the label issued after the call_insn to set the call_return_pc
attribute.  This will be used on rs6000, that sometimes issues another
instruction after the call proper as part of a call insn.


for  gcc/ChangeLog

* target.def (call_offset_return_label): New hook.
* gcc/doc/tm.texi.in (TARGET_CALL_OFFSET_RETURN_LABEL): Add
placeholder.
* gcc/doc/tm.texi: Rebuild.
* dwarf2out.cc (struct call_arg_loc_node): Record call_insn
instad of call_arg_loc_note.
(add_AT_lbl_id): Add optional offset argument.
(gen_call_site_die): Compute and pass on a return pc offset.
(gen_subprogram_die): Move call_arg_loc_note computation...
(dwarf2out_var_location): ... from here.  Set call_insn.

Diff:
---
 gcc/doc/tm.texi|  7 +++
 gcc/doc/tm.texi.in |  2 ++
 gcc/dwarf2out.cc   | 26 +-
 gcc/target.def |  9 +
 4 files changed, 35 insertions(+), 9 deletions(-)

diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index cd50078227d..8a7aa70d605 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -5557,6 +5557,13 @@ except the last are treated as named.
 You need not define this hook if it always returns @code{false}.
 @end deftypefn
 
+@deftypefn {Target Hook} int TARGET_CALL_OFFSET_RETURN_LABEL (rtx_insn 
*@var{call_insn})
+While generating call-site debug info for a CALL insn, or a SEQUENCE
+insn starting with a CALL, this target hook is invoked to compute the
+offset to be added to the debug label emitted after the call to obtain
+the return address that should be recorded as the return PC.
+@end deftypefn
+
 @deftypefn {Target Hook} void TARGET_START_CALL_ARGS (cumulative_args_t 
@var{complete_args})
 This target hook is invoked while generating RTL for a function call,
 after the argument values have been computed, and after stack arguments
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
index 058bd56487a..9e0830758ae 100644
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@@ -3887,6 +3887,8 @@ These machine description macros help implement varargs:
 
 @hook TARGET_STRICT_ARGUMENT_NAMING
 
+@hook TARGET_CALL_OFFSET_RETURN_LABEL
+
 @hook TARGET_START_CALL_ARGS
 
 @hook TARGET_CALL_ARGS
diff --git a/gcc/dwarf2out.cc b/gcc/dwarf2out.cc
index 5b064ffd78a..1092880738d 100644
--- a/gcc/dwarf2out.cc
+++ b/gcc/dwarf2out.cc
@@ -3593,7 +3593,7 @@ typedef struct var_loc_list_def var_loc_list;
 
 /* Call argument location list.  */
 struct GTY ((chain_next ("%h.next"))) call_arg_loc_node {
-  rtx GTY (()) call_arg_loc_note;
+  rtx_insn * GTY (()) call_insn;
   const char * GTY (()) label;
   tree GTY (()) block;
   bool tail_call_p;
@@ -3777,7 +3777,8 @@ static void remove_addr_table_entry (addr_table_entry *);
 static void add_AT_addr (dw_die_ref, enum dwarf_attribute, rtx, bool);
 static inline rtx AT_addr (dw_attr_node *);
 static void add_AT_symview (dw_die_ref, enum dwarf_attribute, const char *);
-static void add_AT_lbl_id (dw_die_ref, enum dwarf_attribute, const char *);
+static void add_AT_lbl_id (dw_die_ref, enum dwarf_attribute, const char *,
+  int = 0);
 static void add_AT_lineptr (dw_die_ref, enum dwarf_attribute, const char *);
 static void add_AT_macptr (dw_die_ref, enum dwarf_attribute, const char *);
 static void add_AT_range_list (dw_die_ref, enum dwarf_attribute,
@@ -5353,14 +5354,17 @@ add_AT_symview (dw_die_ref die, enum dwarf_attribute 
attr_kind,
 
 static inline void
 add_AT_lbl_id (dw_die_ref die, enum dwarf_attribute attr_kind,
-   const char *lbl_id)
+  const char *lbl_id, int offset)
 {
   dw_attr_node attr;
 
   attr.dw_attr = attr_kind;
   attr.dw_attr_val.val_class = dw_val_class_lbl_id;
   attr.dw_attr_val.val_entry = NULL;
-  attr.dw_attr_val.v.val_lbl_id = xstrdup (lbl_id);
+  if (!offset)
+attr.dw_attr_val.v.val_lbl_id = xstrdup (lbl_id);
+  else
+attr.dw_attr_val.v.val_lbl_id = xasprintf ("%s%+i", lbl_id, offset);
   if (dwarf_split_debug_info)
 attr.dw_attr_val.val_entry
 = add_addr_table_entry (attr.dw_attr_val.v.val_lbl_id,
@@ -23515,7 +23519,9 @@ gen_call_site_die (tree decl, dw_die_ref subr_die,
   if (stmt_die == NULL)
 stmt_die = subr_die;
   die = new_die (dwarf_TAG (DW_TAG_call_site), stmt_die, NULL_TREE);
-  add_AT_lbl_id (die, dwarf_AT (DW_AT_call_return_pc), ca_loc->label);
+  add_AT_lbl_id (die, dwarf_AT (DW_AT_call_return_pc),
+ca_loc->label,
+targetm.calls.call_offset_return_label (ca_loc->call_insn));
   if (ca_loc->tail_call_p)
 add_AT_flag (die, dwarf_AT (DW_AT_call_tail_call), 1);
   if (ca_lo

[gcc(refs/users/aoliva/heads/testme)] [rs6000] adjust return_pc debug attrs

2024-05-28 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:0bb10f149bfcbe874d348814c623b36e9232cb3f

commit 0bb10f149bfcbe874d348814c623b36e9232cb3f
Author: Alexandre Oliva 
Date:   Wed May 29 02:52:10 2024 -0300

[rs6000] adjust return_pc debug attrs

Some of the rs6000 call patterns, on some ABIs, issue multiple opcodes
out of a single call insn, but the call (bl) or jump (b) is not always
the last opcode in the sequence.

This does not seem to be a problem for exception handling tables, but
the return_pc attribute in the call graph output in dwarf2+ debug
information, that takes the address of a label output right after the
call, does not match the value of the link register even for non-tail
calls.  E.g., with ABI_AIX or ABI_ELFv2, such code as:

  foo ();

outputs:

  bl foo
  nop
 LVL#:
[...]
  .8byte .LVL#  # DW_AT_call_return_pc

but debug info consumers may rely on the return_pc address, and draw
incorrect conclusions from its off-by-4 value.

This patch uses the infrastructure for targets to add an offset to the
label issued after the call_insn to set the call_return_pc attribute,
on rs6000, to account for opcodes issued after actual call opcode as
part of call insns output patterns.


for  gcc/ChangeLog

* config/rs6000/rs6000.cc (TARGET_CALL_OFFSET_RETURN_LABEL):
Override.
(rs6000_call_offset_return_label): New.

Diff:
---
 gcc/config/rs6000/rs6000.cc | 18 ++
 1 file changed, 18 insertions(+)

diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index e4dc629ddcc..77e6b94a539 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -1779,6 +1779,8 @@ static const scoped_attribute_specs *const 
rs6000_attribute_table[] =
 #undef TARGET_OVERLAP_OP_BY_PIECES_P
 #define TARGET_OVERLAP_OP_BY_PIECES_P hook_bool_void_true
 
+#undef TARGET_CALL_OFFSET_RETURN_LABEL
+#define TARGET_CALL_OFFSET_RETURN_LABEL rs6000_call_offset_return_label
 
 
 /* Processor table.  */
@@ -14822,6 +14824,22 @@ rs6000_assemble_integer (rtx x, unsigned int size, int 
aligned_p)
   return default_assemble_integer (x, size, aligned_p);
 }
 
+/* Return the offset to be added to the label output after CALL_INSN
+   to compute the address to be placed in DW_AT_call_return_pc.  */
+
+static int
+rs6000_call_offset_return_label (rtx_insn *call_insn)
+{
+  /* All rs6000 CALL_INSN output patterns start with a b or bl, always
+ a 4-byte instruction, but some output patterns issue other
+ opcodes afterwards.  The return label is issued after the entire
+ call insn, including any such post-call opcodes.  Instead of
+ figuring out which cases need adjustments, we compute the offset
+ back to the address of the call opcode proper, then add the
+ constant 4 bytes, to get the address after that opcode.  */
+  return 4 - get_attr_length (call_insn);
+}
+
 /* Return a template string for assembly to emit when making an
external call.  FUNOP is the call mem argument operand number.  */


[gcc(refs/users/aoliva/heads/testme)] [testsuite] [arm] add effective target and options for pacbti tests

2024-05-28 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:1b22d42a6624285073f3ef6309af74046fa875e4

commit 1b22d42a6624285073f3ef6309af74046fa875e4
Author: Alexandre Oliva 
Date:   Wed May 29 02:52:20 2024 -0300

[testsuite] [arm] add effective target and options for pacbti tests

arm pac and bti tests that use -march=armv8.1-m.main get an implicit
-mthumb, that is incompatible with vxworks kernel mode.  Declaring the
requirement for a 8.1-m.main-compatible toolchain is enough to avoid
those fails, because the toolchain feature test fails in kernel mode,
but taking the -march options from the standardized arch tests, after
testing for support for the corresponding effective target, makes it
generally safer, and enables us to drop skip directives and extraneous
option variants.


for  gcc/testsuite/ChangeLog

* gcc.target/arm/bti-1.c: Require arch, use its opts, drop skip.
* gcc.target/arm/bti-2.c: Likewise.
* gcc.target/arm/acle/pacbti-m-predef-11.c: Likewise.
* gcc.target/arm/acle/pacbti-m-predef-12.c: Likewise.
* gcc.target/arm/acle/pacbti-m-predef-7.c: Likewise.
* g++.target/arm/pac-1.C: Likewise.  Drop +mve.

Diff:
---
 gcc/testsuite/g++.target/arm/pac-1.C   | 5 +++--
 gcc/testsuite/gcc.target/arm/acle/pacbti-m-predef-11.c | 4 ++--
 gcc/testsuite/gcc.target/arm/acle/pacbti-m-predef-12.c | 5 +++--
 gcc/testsuite/gcc.target/arm/acle/pacbti-m-predef-7.c  | 5 +++--
 gcc/testsuite/gcc.target/arm/bti-1.c   | 5 +++--
 gcc/testsuite/gcc.target/arm/bti-2.c   | 5 +++--
 6 files changed, 17 insertions(+), 12 deletions(-)

diff --git a/gcc/testsuite/g++.target/arm/pac-1.C 
b/gcc/testsuite/g++.target/arm/pac-1.C
index f671a27b048..ac15ae18197 100644
--- a/gcc/testsuite/g++.target/arm/pac-1.C
+++ b/gcc/testsuite/g++.target/arm/pac-1.C
@@ -1,7 +1,8 @@
 /* Check that GCC does .save and .cfi_offset directives with RA_AUTH_CODE 
pseudo hard-register.  */
 /* { dg-do compile } */
-/* { dg-skip-if "avoid conflicting multilib options" { *-*-* } { "-marm" 
"-mcpu=*" } } */
-/* { dg-options "-march=armv8.1-m.main+mve+pacbti -mbranch-protection=pac-ret 
-mthumb -mfloat-abi=hard -g -O0" } */
+/* { dg-require-effective-target arm_arch_v8_1m_main_pacbti_ok } */
+/* { dg-add-options arm_arch_v8_1m_main_pacbti } */
+/* { dg-additional-options "-mbranch-protection=pac-ret -mfloat-abi=hard -g 
-O0" } */
 
 __attribute__((noinline)) void
 fn1 (int a, int b, int c)
diff --git a/gcc/testsuite/gcc.target/arm/acle/pacbti-m-predef-11.c 
b/gcc/testsuite/gcc.target/arm/acle/pacbti-m-predef-11.c
index 6a5ae92c567..c9c40f44027 100644
--- a/gcc/testsuite/gcc.target/arm/acle/pacbti-m-predef-11.c
+++ b/gcc/testsuite/gcc.target/arm/acle/pacbti-m-predef-11.c
@@ -1,6 +1,6 @@
 /* { dg-do compile } */
-/* { dg-skip-if "avoid conflicting multilib options" { *-*-* } { "-marm" 
"-mcpu=*" "-mfloat-abi=*" } } */
-/* { dg-options "-march=armv8.1-m.main+fp+pacbti" } */
+/* { dg-require-effective-target arm_arch_v8_1m_main_pacbti_ok } */
+/* { dg-add-options arm_arch_v8_1m_main_pacbti } */
 
 #if (__ARM_FEATURE_BTI != 1)
 #error "Feature test macro __ARM_FEATURE_BTI_DEFAULT should be defined to 1."
diff --git a/gcc/testsuite/gcc.target/arm/acle/pacbti-m-predef-12.c 
b/gcc/testsuite/gcc.target/arm/acle/pacbti-m-predef-12.c
index db40b17c3b0..c26051347a2 100644
--- a/gcc/testsuite/gcc.target/arm/acle/pacbti-m-predef-12.c
+++ b/gcc/testsuite/gcc.target/arm/acle/pacbti-m-predef-12.c
@@ -1,6 +1,7 @@
 /* { dg-do compile } */
-/* { dg-skip-if "avoid conflicting multilib options" { *-*-* } { "-marm" 
"-mcpu=*" } } */
-/* { dg-options "-march=armv8-m.main+fp -mfloat-abi=softfp" } */
+/* { dg-require-effective-target arm_arch_v8_1m_main_ok } */
+/* { dg-add-options arm_arch_v8_1m_main } */
+/* { dg-additional-options "-mfloat-abi=softfp" } */
 
 #if defined (__ARM_FEATURE_BTI)
 #error "Feature test macro __ARM_FEATURE_BTI should not be defined."
diff --git a/gcc/testsuite/gcc.target/arm/acle/pacbti-m-predef-7.c 
b/gcc/testsuite/gcc.target/arm/acle/pacbti-m-predef-7.c
index 1b25907635e..92f500c1449 100644
--- a/gcc/testsuite/gcc.target/arm/acle/pacbti-m-predef-7.c
+++ b/gcc/testsuite/gcc.target/arm/acle/pacbti-m-predef-7.c
@@ -1,6 +1,7 @@
 /* { dg-do compile } */
-/* { dg-skip-if "avoid conflicting multilib options" { *-*-* } { "-marm" 
"-mcpu=*" } } */
-/* { dg-additional-options "-march=armv8.1-m.main+pacbti+fp --save-temps 
-mfloat-abi=hard" } */
+/* { dg-require-effective-target arm_arch_v8_1m_main_pacbti_ok } */
+/* { dg-add-options arm_arch_v8_1m_main_pacbti } */
+/* { dg-additional-options "--save-temps -mfloat-abi=hard" } */
 
 #if defined (__ARM_FEATURE_BTI_DEFAULT)
 #error "Feature test macro __ARM_FEATURE_BTI_DEFAULT should be undefined."
diff --git a/gcc/testsuite/gcc.target/arm/bti-1.c 
b/gcc/testsuite/gcc.target/arm/bti-1.c
index 79dd8010d2d..a34bb0842b6 100644
--- a/gcc/testsuite/gcc.target/arm/bti-1.c
+++ b/gcc/test

[gcc(refs/users/aoliva/heads/testme)] [tree-prof] skip if errors were seen [PR113681]

2024-05-28 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:1c340408045fdf647edca445e40a75b09bce7493

commit 1c340408045fdf647edca445e40a75b09bce7493
Author: Alexandre Oliva 
Date:   Wed May 29 02:52:23 2024 -0300

[tree-prof] skip if errors were seen [PR113681]

ipa_tree_profile asserts that the symtab is in IPA_SSA state, but we
don't reach that state and ICE if e.g. ipa-strub passes report errors.
Skip this pass if errors were seen.


for  gcc/ChangeLog

PR tree-optimization/113681
* tree-profiling.cc (pass_ipa_tree_profile::gate): Skip if
seen_errors.

for  gcc/testsuite/ChangeLog

PR tree-optimization/113681
* c-c++-common/strub-pr113681.c: New.

Diff:
---
 gcc/testsuite/c-c++-common/strub-pr113681.c | 22 ++
 gcc/tree-profile.cc |  3 ++-
 2 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/c-c++-common/strub-pr113681.c 
b/gcc/testsuite/c-c++-common/strub-pr113681.c
new file mode 100644
index 000..3ef9017b2eb
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/strub-pr113681.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-fstrub=relaxed -fbranch-probabilities" } */
+/* { dg-require-effective-target strub } */
+
+/* Same as torture/strub-inlineable1.c, but with -fbranch-probabilities, to
+   check that IPA tree-profiling won't ICE.  It would when we refrained from
+   running passes that would take it to IPA_SSA, but ran the pass that asserted
+   for IPA_SSA.  */
+
+inline void __attribute__ ((strub ("internal"), always_inline))
+inl_int_ali (void)
+{
+  /* No internal wrapper, so this body ALWAYS gets inlined,
+ but it cannot be called from non-strub contexts.  */
+}
+
+void
+bat (void)
+{
+  /* Not allowed, not a strub context.  */
+  inl_int_ali (); /* { dg-error "context" } */
+}
diff --git a/gcc/tree-profile.cc b/gcc/tree-profile.cc
index b87c121790c..e4bb689cef5 100644
--- a/gcc/tree-profile.cc
+++ b/gcc/tree-profile.cc
@@ -2070,7 +2070,8 @@ pass_ipa_tree_profile::gate (function *)
  disabled.  */
   return (!in_lto_p && !flag_auto_profile
  && (flag_branch_probabilities || flag_test_coverage
- || profile_arc_flag || condition_coverage_flag));
+ || profile_arc_flag || condition_coverage_flag)
+ && !seen_error ());
 }
 
 } // anon namespace


[gcc(refs/users/aoliva/heads/testme)] [libstdc++-v3] [rtems] enable filesystem support

2024-05-28 Thread Alexandre Oliva via Libstdc++-cvs
https://gcc.gnu.org/g:0276651ab74a1200d8c1defeee3b2a4f53712fe7

commit 0276651ab74a1200d8c1defeee3b2a4f53712fe7
Author: Alexandre Oliva 
Date:   Wed May 29 02:52:25 2024 -0300

[libstdc++-v3] [rtems] enable filesystem support

mkdir, chdir and chmod functions are defined in librtemscpu, that
doesn't get linked in during libstdc++-v3 configure, but applications
use -qrtems for linking, which brings those symbols in, so it makes
sense to mark them as available so that the C++ filesystem APIs are
enabled.


for  libstdc++-v3/ChangeLog

* configure.ac [*-*-rtems*]: Set chdir, chmod and mkdir as
available.
* configure: Rebuilt.

Diff:
---
 libstdc++-v3/configure| 7 +++
 libstdc++-v3/configure.ac | 7 +++
 2 files changed, 14 insertions(+)

diff --git a/libstdc++-v3/configure b/libstdc++-v3/configure
index 5179cc507f1..a7d1c015906 100755
--- a/libstdc++-v3/configure
+++ b/libstdc++-v3/configure
@@ -28610,6 +28610,13 @@ _ACEOF
 
 $as_echo "#define HAVE_USLEEP 1" >>confdefs.h
 
+
+   # These functions are defined in librtempscpu.  We don't use
+   # -qrtems during configure, so we don't link that in, and fail
+   # to find them.
+   glibcxx_cv_chdir=yes
+   glibcxx_cv_chmod=yes
+   glibcxx_cv_mkdir=yes
 ;;
 esac
   elif test "x$with_headers" != "xno"; then
diff --git a/libstdc++-v3/configure.ac b/libstdc++-v3/configure.ac
index 37396bd6ebb..0725c81bc9f 100644
--- a/libstdc++-v3/configure.ac
+++ b/libstdc++-v3/configure.ac
@@ -400,6 +400,13 @@ dnl # rather than hardcoding that information.
 AC_DEFINE(HAVE_SYMLINK)
 AC_DEFINE(HAVE_TRUNCATE)
 AC_DEFINE(HAVE_USLEEP)
+
+   # These functions are defined in librtempscpu.  We don't use
+   # -qrtems during configure, so we don't link that in, and fail
+   # to find them.
+   glibcxx_cv_chdir=yes
+   glibcxx_cv_chmod=yes
+   glibcxx_cv_mkdir=yes
 ;;
 esac
   elif test "x$with_headers" != "xno"; then


[gcc(refs/users/aoliva/heads/testme)] [testsuite] [powerpc] adjust -m32 counts for fold-vec-extract*

2024-05-28 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:ca809ee3fbe8a5d383c2438c123d72dbbba7f48a

commit ca809ee3fbe8a5d383c2438c123d72dbbba7f48a
Author: Alexandre Oliva 
Date:   Wed May 29 02:52:18 2024 -0300

[testsuite] [powerpc] adjust -m32 counts for fold-vec-extract*

Codegen changes caused add instruction count mismatches on
ppc-*-linux-gnu and other 32-bit ppc targets.  At some point the
expected counts were adjusted for lp64, but ilp32 differences
remained, and published test results confirm it.


for  gcc/testsuite/ChangeLog

PR testsuite/101169
* gcc.target/powerpc/fold-vec-extract-double.p7.c: Adjust addi
counts for ilp32.
* gcc.target/powerpc/fold-vec-extract-float.p7.c: Likewise.
* gcc.target/powerpc/fold-vec-extract-float.p8.c: Likewise.
* gcc.target/powerpc/fold-vec-extract-int.p7.c: Likewise.
* gcc.target/powerpc/fold-vec-extract-int.p8.c: Likewise.
* gcc.target/powerpc/fold-vec-extract-short.p7.c: Likewise.
* gcc.target/powerpc/fold-vec-extract-short.p8.c: Likewise.

Diff:
---
 gcc/testsuite/gcc.target/powerpc/fold-vec-extract-double.p7.c | 5 ++---
 gcc/testsuite/gcc.target/powerpc/fold-vec-extract-float.p7.c  | 5 ++---
 gcc/testsuite/gcc.target/powerpc/fold-vec-extract-float.p8.c  | 3 +--
 gcc/testsuite/gcc.target/powerpc/fold-vec-extract-int.p7.c| 3 +--
 gcc/testsuite/gcc.target/powerpc/fold-vec-extract-int.p8.c| 3 +--
 gcc/testsuite/gcc.target/powerpc/fold-vec-extract-short.p7.c  | 3 +--
 gcc/testsuite/gcc.target/powerpc/fold-vec-extract-short.p8.c  | 3 +--
 7 files changed, 9 insertions(+), 16 deletions(-)

diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-extract-double.p7.c 
b/gcc/testsuite/gcc.target/powerpc/fold-vec-extract-double.p7.c
index 3cae644b90b..e69d9253e2d 100644
--- a/gcc/testsuite/gcc.target/powerpc/fold-vec-extract-double.p7.c
+++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-extract-double.p7.c
@@ -13,12 +13,11 @@
 /* { dg-final { scan-assembler-times {\mxxpermdi\M} 1 } } */
 /* { dg-final { scan-assembler-times {\mli\M} 1 } } */
 /* -m32 target has an 'add' in place of one of the 'addi'. */
-/* { dg-final { scan-assembler-times {\maddi\M|\madd\M} 2 { target lp64 } } } 
*/
-/* { dg-final { scan-assembler-times {\maddi\M|\madd\M} 3 { target ilp32 } } } 
*/
+/* { dg-final { scan-assembler-times {\maddi?\M} 2 } } */
 /* -m32 target has a rlwinm in place of a rldic .  */
 /* { dg-final { scan-assembler-times {\mrldic\M|\mrlwinm\M} 1 } } */
 /* { dg-final { scan-assembler-times {\mstxvd2x\M} 1 } } */
-/* { dg-final { scan-assembler-times {\mlfdx\M|\mlfd\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mlfdx?\M} 1 } } */
 
 #include 
 
diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-extract-float.p7.c 
b/gcc/testsuite/gcc.target/powerpc/fold-vec-extract-float.p7.c
index f7c06e96109..ab03cd8adb0 100644
--- a/gcc/testsuite/gcc.target/powerpc/fold-vec-extract-float.p7.c
+++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-extract-float.p7.c
@@ -12,13 +12,12 @@
 /* { dg-final { scan-assembler-times {\mxscvspdp\M} 1 } } */
 /* { dg-final { scan-assembler-times {\mli\M} 1 } } */
 /* -m32 as an add in place of an addi. */
-/* { dg-final { scan-assembler-times {\maddi\M|\madd\M} 2 { target lp64 } } } 
*/
-/* { dg-final { scan-assembler-times {\maddi\M|\madd\M} 3 { target ilp32 } } } 
*/
+/* { dg-final { scan-assembler-times {\maddi?\M} 2 } } */
 /* { dg-final { scan-assembler-times {\mstxvd2x\M|\mstvx\M|\mstxv\M} 1 } } */
 /* -m32 uses rlwinm in place of rldic */
 /* { dg-final { scan-assembler-times {\mrldic\M|\mrlwinm\M} 1 } } */
 /* -m32 has lfs in place of lfsx */
-/* { dg-final { scan-assembler-times {\mlfsx\M|\mlfs\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mlfsx?\M} 1 } } */
 
 #include 
 
diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-extract-float.p8.c 
b/gcc/testsuite/gcc.target/powerpc/fold-vec-extract-float.p8.c
index 6819d271c53..ce435d82c16 100644
--- a/gcc/testsuite/gcc.target/powerpc/fold-vec-extract-float.p8.c
+++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-extract-float.p8.c
@@ -24,9 +24,8 @@
 /* { dg-final { scan-assembler-times {\mli\M} 1 { target ilp32 } } } */
 /* { dg-final { scan-assembler-times {\mrlwinm\M} 1 { target ilp32 } } } */
 /* { dg-final { scan-assembler-times {\mstxvd2x\M} 1 { target ilp32 } } } */
-/* { dg-final { scan-assembler-times {\madd\M} 1 { target ilp32 } } } */
 /* { dg-final { scan-assembler-times {\mlfs\M} 1 { target ilp32 } } } */
-/* { dg-final { scan-assembler-times {\maddi\M} 2 { target ilp32 } } } */
+/* { dg-final { scan-assembler-times {\maddi?\M} 2 { target ilp32 } } } */
 
 
 #include 
diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-extract-int.p7.c 
b/gcc/testsuite/gcc.target/powerpc/fold-vec-extract-int.p7.c
index 51636926953..20e3d253489 100644
--- a/gcc/testsuite/gcc.target/powerpc/fold-vec-extract-int.p7.c
+++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-extract-int.p7.