[gcc r15-4017] tree-optimiztation/114855 - profile prediction slowness

2024-10-02 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:4ba4165d66b18d7c5b8af02ecdf38bfa0690c106

commit r15-4017-g4ba4165d66b18d7c5b8af02ecdf38bfa0690c106
Author: Richard Biener 
Date:   Thu Sep 26 11:43:21 2024 +0200

tree-optimiztation/114855 - profile prediction slowness

The testcase in PR114855 shows profile prediction to evaluate
the same SSA def via expr_expected_value for each condition or
switch in a function.  The following patch caches the expected
value (and probability/predictor) for each visited SSA def,
also protecting against recursion and thus obsoleting the visited
bitmap.  This reduces the time spent in branch prediction from
1.2s to 0.3s, though callgrind which was how I noticed this
seems to be comparatively very much happier about the change than
this number suggests.

PR tree-optimization/114855
* predict.cc (ssa_expected_value): New global.
(expr_expected_value): Do not take bitmap.
(expr_expected_value_1): Likewise.  Use ssa_expected_value
to cache results for a SSA def.
(tree_predict_by_opcode): Adjust.
(tree_estimate_probability): Manage ssa_expected_value.
(tree_guess_outgoing_edge_probabilities): Likewise.

Diff:
---
 gcc/predict.cc | 117 -
 1 file changed, 91 insertions(+), 26 deletions(-)

diff --git a/gcc/predict.cc b/gcc/predict.cc
index f611161f4aa0..affa037371ca 100644
--- a/gcc/predict.cc
+++ b/gcc/predict.cc
@@ -517,6 +517,16 @@ struct edge_prediction {
 
 static hash_map *bb_predictions;
 
+/* Global cache for expr_expected_value.  */
+
+struct expected_value
+{
+  tree val;
+  enum br_predictor predictor;
+  HOST_WIDE_INT probability;
+};
+static hash_map, expected_value> *ssa_expected_value;
+
 /* Return true if the one of outgoing edges is already predicted by
PREDICTOR.  */
 
@@ -2356,14 +2366,14 @@ guess_outgoing_edge_probabilities (basic_block bb)
   combine_predictions_for_insn (BB_END (bb), bb);
 }
 
-static tree expr_expected_value (tree, bitmap, enum br_predictor *predictor,
+static tree expr_expected_value (tree, enum br_predictor *predictor,
 HOST_WIDE_INT *probability);
 
 /* Helper function for expr_expected_value.  */
 
 static tree
 expr_expected_value_1 (tree type, tree op0, enum tree_code code,
-  tree op1, bitmap visited, enum br_predictor *predictor,
+  tree op1, enum br_predictor *predictor,
   HOST_WIDE_INT *probability)
 {
   gimple *def;
@@ -2401,8 +2411,19 @@ expr_expected_value_1 (tree type, tree op0, enum 
tree_code code,
   def = SSA_NAME_DEF_STMT (op0);
 
   /* If we were already here, break the infinite cycle.  */
-  if (!bitmap_set_bit (visited, SSA_NAME_VERSION (op0)))
-   return NULL;
+  bool existed_p;
+  expected_value *res
+   = &ssa_expected_value->get_or_insert (SSA_NAME_VERSION (op0),
+ &existed_p);
+  if (existed_p)
+   {
+ *probability = res->probability;
+ *predictor = res->predictor;
+ return res->val;
+   }
+  res->val = NULL_TREE;
+  res->predictor = *predictor;
+  res->probability = *probability;
 
   if (gphi *phi = dyn_cast  (def))
{
@@ -2443,7 +2464,7 @@ expr_expected_value_1 (tree type, tree op0, enum 
tree_code code,
continue;
}
  HOST_WIDE_INT probability2;
- tree new_val = expr_expected_value (arg, visited, &predictor2,
+ tree new_val = expr_expected_value (arg, &predictor2,
  &probability2);
  /* If we know nothing about value, give up.  */
  if (!new_val)
@@ -2477,6 +2498,11 @@ expr_expected_value_1 (tree type, tree op0, enum 
tree_code code,
  *predictor = PRED_COMBINED_VALUE_PREDICTIONS_PHI;
  *probability = MIN (p1, p2);
}
+
+ res = ssa_expected_value->get (SSA_NAME_VERSION (op0));
+ res->val = val;
+ res->predictor = *predictor;
+ res->probability = *probability;
  return val;
}
   if (is_gimple_assign (def))
@@ -2484,11 +2510,19 @@ expr_expected_value_1 (tree type, tree op0, enum 
tree_code code,
  if (gimple_assign_lhs (def) != op0)
return NULL;
 
- return expr_expected_value_1 (TREE_TYPE (gimple_assign_lhs (def)),
-   gimple_assign_rhs1 (def),
-   gimple_assign_rhs_code (def),
-   gimple_assign_rhs2 (def),
-   visited, predictor, probability);
+ tree val = expr_expected_value_1 (TREE_TYPE (gimple_assign_lhs (def)),
+   gimple_assign_rhs1 (def),
+   

[gcc r15-4018] tree-optimization/113197 - bougs assert in PTA

2024-10-02 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:02f4efe3c12cf7ef54e5a71b11044c15be5c7fab

commit r15-4018-g02f4efe3c12cf7ef54e5a71b11044c15be5c7fab
Author: Richard Biener 
Date:   Mon Sep 30 09:07:36 2024 +0200

tree-optimization/113197 - bougs assert in PTA

PTA asserts that EAF_NO_DIRECT_READ is not set when flags are
set consistently which doesn't make sense.  The following removes
the assert.

PR tree-optimization/113197
* tree-ssa-structalias.cc (handle_call_arg): Remove bougs
assert.

* gcc.dg/lto/pr113197_0.c: New testcase.
* gcc.dg/lto/pr113197_1.c: Likewise.

Diff:
---
 gcc/testsuite/gcc.dg/lto/pr113197_0.c | 15 +++
 gcc/testsuite/gcc.dg/lto/pr113197_1.c |  3 +++
 gcc/tree-ssa-structalias.cc   |  1 -
 3 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.dg/lto/pr113197_0.c 
b/gcc/testsuite/gcc.dg/lto/pr113197_0.c
new file mode 100644
index ..293c8207dee0
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/lto/pr113197_0.c
@@ -0,0 +1,15 @@
+/* { dg-lto-do link } */
+/* { dg-lto-options { { -O -flto -fpie } } } */
+/* { dg-extra-ld-options { -r -nostdlib -flinker-output=nolto-rel } } */
+
+enum a { b } register_dccp();
+void c();
+void __attribute__((noreturn)) exit_error(enum a d) {
+  __builtin_va_list va;
+  __builtin_va_end(va);
+  if (d)
+c();
+  c();
+  __builtin_exit(1);
+}
+int main() { register_dccp(); }
diff --git a/gcc/testsuite/gcc.dg/lto/pr113197_1.c 
b/gcc/testsuite/gcc.dg/lto/pr113197_1.c
new file mode 100644
index ..30bf6f7e7c5d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/lto/pr113197_1.c
@@ -0,0 +1,3 @@
+int a;
+void exit_error();
+void register_dccp() { exit_error(a); }
diff --git a/gcc/tree-ssa-structalias.cc b/gcc/tree-ssa-structalias.cc
index 54c4818998d8..73ba5aa6195c 100644
--- a/gcc/tree-ssa-structalias.cc
+++ b/gcc/tree-ssa-structalias.cc
@@ -4194,7 +4194,6 @@ handle_call_arg (gcall *stmt, tree arg, vec 
*results, int flags,
 {
   make_transitive_closure_constraints (tem);
   callarg_transitive = true;
-  gcc_checking_assert (!(flags & EAF_NO_DIRECT_READ));
 }
 
   /* If necessary, produce varinfo for indirect accesses to ARG.  */


[gcc r15-4019] testsuite/116596 - fix gcc.dg/vect/slp-11a.c

2024-10-02 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:61d87f27916dd1bddb0f38d0eb53d4cf59fa5a0a

commit r15-4019-g61d87f27916dd1bddb0f38d0eb53d4cf59fa5a0a
Author: Richard Biener 
Date:   Wed Oct 2 13:00:45 2024 +0200

testsuite/116596 - fix gcc.dg/vect/slp-11a.c

The condition on "vectorizing stmts using SLP" needs to match that
of "vectorized 1 loops", obviously.

PR testsuite/116596
* gcc.dg/vect/slp-11a.c: Fix.

Diff:
---
 gcc/testsuite/gcc.dg/vect/slp-11a.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.dg/vect/slp-11a.c 
b/gcc/testsuite/gcc.dg/vect/slp-11a.c
index 2efa1796757e..196ef65bb782 100644
--- a/gcc/testsuite/gcc.dg/vect/slp-11a.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-11a.c
@@ -72,4 +72,4 @@ int main (void)
 
 /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { 
vect_strided8 && vect_int_mult } } } } */
 /* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { target { 
! { vect_strided8 && vect_int_mult } } } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" } 
} */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { 
target { vect_strided8 && vect_int_mult } } } }  */


[gcc r15-4023] Speedup iterative_hash_template_arg

2024-10-02 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:842fbfa15fff2daae4a4d8a9cbcdf18e22635f78

commit r15-4023-g842fbfa15fff2daae4a4d8a9cbcdf18e22635f78
Author: Richard Biener 
Date:   Wed Oct 2 09:39:50 2024 +0200

Speedup iterative_hash_template_arg

Using iterative_hash_object is expensive compared to using
iterative_hash_hashval_t which is fit for integer sized values.
The following reduces the number of perf cycles spent in
iterative_hash_template_arg and iterative_hash combined by 20%.

gcc/cp/
* pt.cc (iterative_hash_template_arg): Avoid using
iterative_hash_object.

Diff:
---
 gcc/cp/pt.cc | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc
index 43468e5f62e8..04f0a1d5fff7 100644
--- a/gcc/cp/pt.cc
+++ b/gcc/cp/pt.cc
@@ -1751,7 +1751,7 @@ hashval_t
 iterative_hash_template_arg (tree arg, hashval_t val)
 {
   if (arg == NULL_TREE)
-return iterative_hash_object (arg, val);
+return iterative_hash_hashval_t (0, val);
 
   if (!TYPE_P (arg))
 /* Strip nop-like things, but not the same as STRIP_NOPS.  */
@@ -1762,7 +1762,7 @@ iterative_hash_template_arg (tree arg, hashval_t val)
 
   enum tree_code code = TREE_CODE (arg);
 
-  val = iterative_hash_object (code, val);
+  val = iterative_hash_hashval_t (code, val);
 
   switch (code)
 {
@@ -1777,7 +1777,7 @@ iterative_hash_template_arg (tree arg, hashval_t val)
   return val;
 
 case IDENTIFIER_NODE:
-  return iterative_hash_object (IDENTIFIER_HASH_VALUE (arg), val);
+  return iterative_hash_hashval_t (IDENTIFIER_HASH_VALUE (arg), val);
 
 case TREE_VEC:
   for (tree elt : tree_vec_range (arg))


[gcc r15-4014] libstdc++: Fix -Wlong-long warning in

2024-10-02 Thread Jonathan Wakely via Libstdc++-cvs
https://gcc.gnu.org/g:05b7ab86e7d70e6f71a3012b08f5704b056a69fe

commit r15-4014-g05b7ab86e7d70e6f71a3012b08f5704b056a69fe
Author: Jonathan Wakely 
Date:   Tue Oct 1 10:43:43 2024 +0100

libstdc++: Fix -Wlong-long warning in 

For 32-bit targets __INT64_TYPE__ expands to long long, which gives a
pedwarn for C++98 mode, causing:

FAIL: 17_intro/headers/c++1998/all_pedantic_errors.cc  -std=gnu++98 (test 
for excess errors)
Excess errors:
.../bits/postypes.h:64: error: ISO C++ 1998 does not support 'long long' 
[-Wlong-long]

libstdc++-v3/ChangeLog:

* include/bits/postypes.h: Fix -Wlong-long warning.

Diff:
---
 libstdc++-v3/include/bits/postypes.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/libstdc++-v3/include/bits/postypes.h 
b/libstdc++-v3/include/bits/postypes.h
index 7bd973e089b6..cf5f30187fde 100644
--- a/libstdc++-v3/include/bits/postypes.h
+++ b/libstdc++-v3/include/bits/postypes.h
@@ -52,6 +52,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   // unspecified. The behaviour in this implementation is as noted
   // below.
 
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wlong-long"
   /**
*  @brief  Type used by fpos, char_traits, and char_traits.
*
@@ -65,6 +67,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 #else
   typedef long long streamoff;
 #endif
+#pragma GCC diagnostic pop
 
   /// Integral type for I/O operation counts and buffer sizes.
   typedef ptrdiff_tstreamsize; // Signed integral type


[gcc r15-4015] libstdc++: Fix rounding in chrono::parse

2024-10-02 Thread Jonathan Wakely via Gcc-cvs
https://gcc.gnu.org/g:5cf26f2569bf007a2c9c058e43ddfe9a5f67da42

commit r15-4015-g5cf26f2569bf007a2c9c058e43ddfe9a5f67da42
Author: Jonathan Wakely 
Date:   Fri Sep 6 21:41:47 2024 +0100

libstdc++: Fix rounding in chrono::parse

I noticed that chrono::parse was using duration_cast and time_point_cast
to convert the parsed value to the result. Those functions truncate
towards zero, which is not generally what you want. Especially for
negative times before the epoch, where truncating towards zero rounds
"up" towards the next duration/time_point. Using chrono::round is
typically better, as that rounds to nearest.

However, while testing the fix I realised that rounding to the nearest
can give surprising results in some cases. For example if we parse a
chrono::sys_days using chrono::parse("F %T", "2024-09-22 18:34:56", tp)
then we will round up to the next day, i.e. sys_days(2024y/09/23). That
seems surprising, and I think 2024-09-22 is what most users would
expect.

This change attempts to provide a hybrid rounding heuristic where we use
chrono::round for the general case, but when the result has a period
that is one of minutes, hours, days, weeks, or years then we truncate
towards negative infinity using chrono::floor. This means that we
truncate "2024-09-22 18:34:56" to the start of the current
minute/hour/day/week/year, instead of rounding up to 2024-09-23, or to
18:35, or 17:00. For a period of months chrono::round is used, because
the months duration is defined as a twelfth of a year, which is not
actually the length of any calendar month. We don't want to truncate to
a whole number of "months" if that can actually go from e.g. 2023-03-01
to 2023-01-31, because February is shorter than chrono::months(1).

libstdc++-v3/ChangeLog:

* include/bits/chrono_io.h (__detail::__use_floor): New
function.
(__detail::__round): New function.
(from_stream): Use __detail::__round.
* testsuite/std/time/clock/file/io.cc: Check for expected
rounding in parse.
* testsuite/std/time/clock/gps/io.cc: Likewise.

Diff:
---
 libstdc++-v3/include/bits/chrono_io.h| 64 +---
 libstdc++-v3/testsuite/std/time/clock/file/io.cc | 21 +++-
 libstdc++-v3/testsuite/std/time/clock/gps/io.cc  | 20 
 3 files changed, 97 insertions(+), 8 deletions(-)

diff --git a/libstdc++-v3/include/bits/chrono_io.h 
b/libstdc++-v3/include/bits/chrono_io.h
index 1e34c82b532d..362bb5aa9e98 100644
--- a/libstdc++-v3/include/bits/chrono_io.h
+++ b/libstdc++-v3/include/bits/chrono_io.h
@@ -2407,6 +2407,56 @@ namespace __detail
   template
 using _Parser_t = _Parser>;
 
+  template
+consteval bool
+__use_floor()
+{
+  if constexpr (_Duration::period::den == 1)
+   {
+ switch (_Duration::period::num)
+ {
+   case minutes::period::num:
+   case hours::period::num:
+   case days::period::num:
+   case weeks::period::num:
+   case years::period::num:
+ return true;
+ }
+   }
+  return false;
+}
+
+  // A "do the right thing" rounding function for duration and time_point
+  // values extracted by from_stream. When treat_as_floating_point is true
+  // we don't want to do anything, just a straightforward conversion.
+  // When the destination type has a period of minutes, hours, days, weeks,
+  // or years, we use chrono::floor to truncate towards negative infinity.
+  // This ensures that an extracted timestamp such as 2024-09-05 13:00:00
+  // will produce 2024-09-05 when rounded to days, rather than rounding up
+  // to 2024-09-06 (a different day).
+  // Otherwise, use chrono::round to get the nearest value representable
+  // in the destination type.
+  template
+constexpr auto
+__round(const _Tp& __t)
+{
+  if constexpr (__is_duration_v<_Tp>)
+   {
+ if constexpr (treat_as_floating_point_v)
+   return chrono::duration_cast<_ToDur>(__t);
+ else if constexpr (__detail::__use_floor<_ToDur>())
+   return chrono::floor<_ToDur>(__t);
+ else
+   return chrono::round<_ToDur>(__t);
+   }
+  else
+   {
+ static_assert(__is_time_point_v<_Tp>);
+ using _Tpt = time_point;
+ return _Tpt(__detail::__round<_ToDur>(__t.time_since_epoch()));
+   }
+}
+
 } // namespace __detail
 /// @endcond
 
@@ -2421,7 +2471,7 @@ namespace __detail
   auto __need = __format::_ChronoParts::_TimeOfDay;
   __detail::_Parser_t> __p(__need);
   if (__p(__is, __fmt, __abbrev, __offset))
-   __d = chrono::duration_cast>(__p._M_time);
+   __d = __detail::__round>(__p._M_time);
   return __is;
 }
 
@@ -2882,7 +2932,7 @@ namespace __detail
  else
{
  auto __st = __p._M_sys_days + __p._M

[gcc r15-4016] libstdc++: Populate std::time_get::get's %c format for C locale

2024-10-02 Thread Jonathan Wakely via Libstdc++-cvs
https://gcc.gnu.org/g:c534e37faccf481afa9bc28f0605ca0ec3846c89

commit r15-4016-gc534e37faccf481afa9bc28f0605ca0ec3846c89
Author: Jonathan Wakely 
Date:   Tue Sep 24 23:20:56 2024 +0100

libstdc++: Populate std::time_get::get's %c format for C locale

We were using the empty string "" for D_T_FMT and ERA_D_T_FMT in the C
locale, instead of "%a %b %e %T %Y" as the C standard requires. Set it
correctly for each locale implementation that defines time_members.cc.

We can also explicitly set the _M_era_xxx pointers to the same values as
the corresponding _M_xxx ones, rather than setting them to point to
identical string literals. This doesn't rely on the compiler merging
string literals, and makes it more explicit that they're the same in the
C locale.

libstdc++-v3/ChangeLog:

* config/locale/dragonfly/time_members.cc
(__timepunct::_M_initialize_timepunc)
(__timepunct::_M_initialize_timepunc): Set
_M_date_time_format for C locale. Set %Ex formats to the same
values as the %x formats.
* config/locale/generic/time_members.cc: Likewise.
* config/locale/gnu/time_members.cc: Likewise.
* testsuite/22_locale/time_get/get/char/5.cc: New test.
* testsuite/22_locale/time_get/get/wchar_t/5.cc: New test.

Diff:
---
 .../config/locale/dragonfly/time_members.cc| 16 +-
 libstdc++-v3/config/locale/generic/time_members.cc |  8 ++---
 libstdc++-v3/config/locale/gnu/time_members.cc | 16 +-
 .../testsuite/22_locale/time_get/get/char/5.cc | 37 ++
 .../testsuite/22_locale/time_get/get/wchar_t/5.cc  | 37 ++
 5 files changed, 94 insertions(+), 20 deletions(-)

diff --git a/libstdc++-v3/config/locale/dragonfly/time_members.cc 
b/libstdc++-v3/config/locale/dragonfly/time_members.cc
index 0c96928135eb..069b2ddd26bb 100644
--- a/libstdc++-v3/config/locale/dragonfly/time_members.cc
+++ b/libstdc++-v3/config/locale/dragonfly/time_members.cc
@@ -67,11 +67,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
  _M_c_locale_timepunct = _S_get_c_locale();
 
  _M_data->_M_date_format = "%m/%d/%y";
- _M_data->_M_date_era_format = "%m/%d/%y";
+ _M_data->_M_date_era_format = _M_data->_M_date_format;
  _M_data->_M_time_format = "%H:%M:%S";
- _M_data->_M_time_era_format = "%H:%M:%S";
- _M_data->_M_date_time_format = "";
- _M_data->_M_date_time_era_format = "";
+ _M_data->_M_time_era_format = _M_data->_M_time_format;
+ _M_data->_M_date_time_format = "%a %b %e %T %Y";
+ _M_data->_M_date_time_era_format = _M_data->_M_date_time_format;
  _M_data->_M_am = "AM";
  _M_data->_M_pm = "PM";
  _M_data->_M_am_pm_format = "%I:%M:%S %p";
@@ -224,11 +224,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
  _M_c_locale_timepunct = _S_get_c_locale();
 
  _M_data->_M_date_format = L"%m/%d/%y";
- _M_data->_M_date_era_format = L"%m/%d/%y";
+ _M_data->_M_date_era_format = _M_data->_M_date_format;
  _M_data->_M_time_format = L"%H:%M:%S";
- _M_data->_M_time_era_format = L"%H:%M:%S";
- _M_data->_M_date_time_format = L"";
- _M_data->_M_date_time_era_format = L"";
+ _M_data->_M_time_era_format = _M_data->_M_time_format;
+ _M_data->_M_date_time_format = L"%a %b %e %T %Y";
+ _M_data->_M_date_time_era_format = _M_data->_M_date_time_format;
  _M_data->_M_am = L"AM";
  _M_data->_M_pm = L"PM";
  _M_data->_M_am_pm_format = L"%I:%M:%S %p";
diff --git a/libstdc++-v3/config/locale/generic/time_members.cc 
b/libstdc++-v3/config/locale/generic/time_members.cc
index 68395820fefa..6619f0ca881a 100644
--- a/libstdc++-v3/config/locale/generic/time_members.cc
+++ b/libstdc++-v3/config/locale/generic/time_members.cc
@@ -65,11 +65,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_data = new __timepunct_cache;
 
   _M_data->_M_date_format = "%m/%d/%y";
-  _M_data->_M_date_era_format = "%m/%d/%y";
+  _M_data->_M_date_era_format = _M_data->_M_date_format;
   _M_data->_M_time_format = "%H:%M:%S";
-  _M_data->_M_time_era_format = "%H:%M:%S";
-  _M_data->_M_date_time_format = "";
-  _M_data->_M_date_time_era_format = "";
+  _M_data->_M_time_era_format = _M_data->_M_time_format;
+  _M_data->_M_date_time_format = "%a %b %e %T %Y";
+  _M_data->_M_date_time_era_format = _M_data->_M_date_time_format;
   _M_data->_M_am = "AM";
   _M_data->_M_pm = "PM";
   _M_data->_M_am_pm_format = "%I:%M:%S %p";
diff --git a/libstdc++-v3/config/locale/gnu/time_members.cc 
b/libstdc++-v3/config/locale/gnu/time_members.cc
index 1e3b87488fae..88c8ab700809 100644
--- a/libstdc++-v3/config/locale/gnu/time_members.cc
+++ b/libstdc++-v3/config/locale/gnu/time_members.cc
@@ -73,11 +73,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
  _M_c_locale_timepun

[gcc r15-4020] un-XFAIL gcc.dg/vect/vect-double-reduc-5.c

2024-10-02 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:71896a8bdc3841f0e08b4c6356f54c4735317f37

commit r15-4020-g71896a8bdc3841f0e08b4c6356f54c4735317f37
Author: Richard Biener 
Date:   Wed Oct 2 13:21:22 2024 +0200

un-XFAIL gcc.dg/vect/vect-double-reduc-5.c

The testcase now passes, we can handle double reductions with multiple
types fine.

* gcc.dg/vect/vect-double-reduc-5.c: Un-XFAIL everywhere.

Diff:
---
 gcc/testsuite/gcc.dg/vect/vect-double-reduc-5.c | 5 +
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/vect/vect-double-reduc-5.c 
b/gcc/testsuite/gcc.dg/vect/vect-double-reduc-5.c
index b990405745e9..a40aa304740f 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-double-reduc-5.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-double-reduc-5.c
@@ -51,7 +51,4 @@ int main ()
   return 0;
 }
 
-/* Vectorization of loops with multiple types and double reduction is not 
-   supported yet.  */   
-/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail 
{ ! { aarch64*-*-* riscv*-*-* } } } } } */
-  
+/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" } } */


[gcc r15-4021] Adjust expectation for gcc.dg/vect/slp-19c.c

2024-10-02 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:32b99dad9a59ce8d67350221f3cfb1986ee67a8f

commit r15-4021-g32b99dad9a59ce8d67350221f3cfb1986ee67a8f
Author: Richard Biener 
Date:   Wed Oct 2 13:39:14 2024 +0200

Adjust expectation for gcc.dg/vect/slp-19c.c

We can now vectorize the first loop with SLP when using V2SImode
vectors since then we can handle the non-power-of-two interleaving.
We can also SLP the second loop reliably now after adding induction
support for VLA vectors.

* gcc.dg/vect/slp-19c.c: Adjust expectation.

Diff:
---
 gcc/testsuite/gcc.dg/vect/slp-19c.c | 8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/vect/slp-19c.c 
b/gcc/testsuite/gcc.dg/vect/slp-19c.c
index 188ab37a0b61..588c171dd835 100644
--- a/gcc/testsuite/gcc.dg/vect/slp-19c.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-19c.c
@@ -105,5 +105,9 @@ int main (void)
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" } 
} */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { 
! vect64 } } } } */
+/* The unsupported interleaving works fine with V2SImode vectors given we
+   can always combine that from two vectors.  */
+/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" { target 
vect64 } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { 
target { ! vect64 } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 4 "vect" { 
target vect64 } } } */


[gcc r15-4022] Adjust gcc.dg/vect/slp-12a.c

2024-10-02 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:77c5e4ad08e42a325c6b45ea2b679bc1827b8557

commit r15-4022-g77c5e4ad08e42a325c6b45ea2b679bc1827b8557
Author: Richard Biener 
Date:   Wed Oct 2 13:40:59 2024 +0200

Adjust gcc.dg/vect/slp-12a.c

We can now SLP the loop.  There's PR116583 tracking that this still
fails for VLA vectors when load-lanes doesn't support a group of
size 8.  We can't express this right now so the testcase keeps
FAILing for aarch64 with SVE (but passes now for riscv).

* gcc.dg/vect/slp-12a.c: Adjust.

Diff:
---
 gcc/testsuite/gcc.dg/vect/slp-12a.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/vect/slp-12a.c 
b/gcc/testsuite/gcc.dg/vect/slp-12a.c
index fedf27b69d23..c526ea07c7a3 100644
--- a/gcc/testsuite/gcc.dg/vect/slp-12a.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-12a.c
@@ -80,5 +80,4 @@ int main (void)
 
 /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { 
vect_strided8 && vect_int_mult } } } } */
 /* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { target { 
! { vect_strided8 && vect_int_mult } } } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { 
target { { vect_strided8 && {! vect_load_lanes } } && vect_int_mult } } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { 
target { ! { vect_strided8 && vect_int_mult } } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { 
target { vect_strided8 && vect_int_mult } } } } */


[gcc r15-4008] backprop: Fix deleting of a phi node [PR116922]

2024-10-02 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:cea87c84eacdb422caeada734ba5138c994d7022

commit r15-4008-gcea87c84eacdb422caeada734ba5138c994d7022
Author: Andrew Pinski 
Date:   Tue Oct 1 14:48:19 2024 -0700

backprop: Fix deleting of a phi node [PR116922]

The problem here is remove_unused_var is called on a name that is
defined by a phi node but it deletes it like removing a normal statement.
remove_phi_node should be called rather than gsi_remove for phinodes.

Note there is a possibility of using simple_dce_from_worklist instead
but that is for another day.

Bootstrapped and tested on x86_64-linux-gnu.

PR tree-optimization/116922

gcc/ChangeLog:

* gimple-ssa-backprop.cc (remove_unused_var): Handle phi
nodes correctly.

gcc/testsuite/ChangeLog:

* gcc.dg/torture/pr116922.c: New test.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/gimple-ssa-backprop.cc  | 10 --
 gcc/testsuite/gcc.dg/torture/pr116922.c | 19 +++
 2 files changed, 27 insertions(+), 2 deletions(-)

diff --git a/gcc/gimple-ssa-backprop.cc b/gcc/gimple-ssa-backprop.cc
index fe27ef51cdf2..e3374b181386 100644
--- a/gcc/gimple-ssa-backprop.cc
+++ b/gcc/gimple-ssa-backprop.cc
@@ -663,8 +663,14 @@ remove_unused_var (tree var)
   print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
 }
   gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
-  gsi_remove (&gsi, true);
-  release_defs (stmt);
+  if (gimple_code (stmt) == GIMPLE_PHI)
+remove_phi_node (&gsi, true);
+  else
+{
+  unlink_stmt_vdef (stmt);
+  gsi_remove (&gsi, true);
+  release_defs (stmt);
+}
 }
 
 /* Note that we're replacing OLD_RHS with NEW_RHS in STMT.  */
diff --git a/gcc/testsuite/gcc.dg/torture/pr116922.c 
b/gcc/testsuite/gcc.dg/torture/pr116922.c
new file mode 100644
index ..0fcf912930f4
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/pr116922.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-ffast-math" } */
+/* PR tree-optimization/116922 */
+
+
+static int g;
+
+void
+foo (int c, double v, double *r)
+{
+b:
+  do
+v /= g - v;
+  while (c);
+  *r = v;
+
+  double x;
+  foo (5, (double)0, &x);
+}


[gcc r15-4009] opts: Fix up regenerate-opt-urls dependencies

2024-10-02 Thread Jakub Jelinek via Gcc-cvs
https://gcc.gnu.org/g:ba53ccad554bb4f3c2b0e457a18557ae0f54b05e

commit r15-4009-gba53ccad554bb4f3c2b0e457a18557ae0f54b05e
Author: Jakub Jelinek 
Date:   Wed Oct 2 10:14:50 2024 +0200

opts: Fix up regenerate-opt-urls dependencies

It seems that we currently require
1) enabling at least c,c++,fortran,d in --enable-languages
2) first doing make html
before one can successfully regenerate-opt-urls, otherwise without 2)
one gets
make regenerate-opt-urls
make: *** No rule to make target 
'/home/jakub/src/gcc/obj12x/gcc/HTML/gcc-15.0.0/gcc/Option-Index.html', needed 
by 'regenerate-opt-urls'.  Stop.
or say if not configuring d after make html one still gets
make regenerate-opt-urls
make: *** No rule to make target 
'/home/jakub/src/gcc/obj12x/gcc/HTML/gcc-15.0.0/gdc/Option-Index.html', needed 
by 'regenerate-opt-urls'.  Stop.

Now, I believe neither 1) nor 2) is really necessary.
The regenerate-opt-urls goal has dependency on 3 Option-Index.html files,
but those files don't have dependencies how to generate them.
make html has dependency on $(HTMLS_BUILD) which adds
$(build_htmldir)/gcc/index.html and lang.html among other things, where
the former actually builds not just index.html but also Option-Index.html
and tons of other files, and lang.html is filled in by configure depending
on configured languages, so sometimes will include gfortran.html and
sometimes d.html.

The following patch adds dependencies of the Option-Index.html on their
corresponding index.html files and that is all that seems to be needed,
make regenerate-opt-urls then works even without prior make html and
even if just a subset of c/c++, fortran and d is enabled.

2024-10-02  Jakub Jelinek  

* Makefile.in ($(OPT_URLS_HTML_DEPS)): Add dependencies of the
Option-Index.html files on the corresponding index.html files.
Don't mention the requirement that all languages that have their own
HTML manuals to be enabled.

Diff:
---
 gcc/Makefile.in | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index 68fda1a75918..059cf2e8f79f 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -3640,12 +3640,12 @@ $(build_htmldir)/gccinstall/index.html: 
$(TEXI_GCCINSTALL_FILES)
$(SHELL) $(srcdir)/doc/install.texi2html
 
 # Regenerate the .opt.urls files from the generated html, and from the .opt
-# files.  Doing so requires all languages that have their own HTML manuals
-# to be enabled.
+# files.
 .PHONY: regenerate-opt-urls
 OPT_URLS_HTML_DEPS = $(build_htmldir)/gcc/Option-Index.html \
$(build_htmldir)/gdc/Option-Index.html \
$(build_htmldir)/gfortran/Option-Index.html
+$(OPT_URLS_HTML_DEPS): %/Option-Index.html: %/index.html
 
 regenerate-opt-urls: $(srcdir)/regenerate-opt-urls.py $(OPT_URLS_HTML_DEPS)
$(srcdir)/regenerate-opt-urls.py $(build_htmldir) $(shell dirname 
$(srcdir))


[gcc r15-4024] gimple ssa: Don't use __builtin_popcount in switch exp transform [PR116616]

2024-10-02 Thread Filip Kastl via Gcc-cvs
https://gcc.gnu.org/g:ffc389cb11a2a61fb89b6034d3f3fe0896b29064

commit r15-4024-gffc389cb11a2a61fb89b6034d3f3fe0896b29064
Author: Filip Kastl 
Date:   Wed Oct 2 14:14:44 2024 +0200

gimple ssa: Don't use __builtin_popcount in switch exp transform [PR116616]

Switch exponential transformation in the switch conversion pass
currently generates

tmp1 = __builtin_popcount (var);
tmp2 = tmp1 == 1;

when inserting code to determine if var is power of two.  If the target
doesn't support expanding the builtin as special instructions switch
conversion relies on this whole pattern being expanded as bitmagic.
However, it is possible that other GIMPLE optimizations move the two
statements of the pattern apart.  In that case the builtin becomes a
libgcc call in the final binary.  The call is slow and in case of
freestanding programs can result in linking error (this bug was
originally found while compiling Linux kernel).

This patch modifies switch conversion to insert the bitmagic
(var ^ (var - 1)) > (var - 1) instead of the builtin.

gcc/ChangeLog:

PR tree-optimization/116616
* tree-switch-conversion.cc (can_pow2p): Remove this function.
(gen_pow2p): Generate bitmagic instead of a builtin.  Remove the
TYPE parameter.
(switch_conversion::is_exp_index_transform_viable): Don't call
can_pow2p.
(switch_conversion::exp_index_transform): Call gen_pow2p without
the TYPE parameter.
* tree-switch-conversion.h: Remove
m_exp_index_transform_pow2p_type.

gcc/testsuite/ChangeLog:

PR tree-optimization/116616
* gcc.target/i386/switch-exp-transform-1.c: Don't test for
presence of the POPCOUNT internal fn call.

Signed-off-by: Filip Kastl 

Diff:
---
 .../gcc.target/i386/switch-exp-transform-1.c   |  7 +-
 gcc/tree-switch-conversion.cc  | 84 +-
 gcc/tree-switch-conversion.h   |  6 +-
 3 files changed, 23 insertions(+), 74 deletions(-)

diff --git a/gcc/testsuite/gcc.target/i386/switch-exp-transform-1.c 
b/gcc/testsuite/gcc.target/i386/switch-exp-transform-1.c
index a8c9e03e515f..4832f5b52c33 100644
--- a/gcc/testsuite/gcc.target/i386/switch-exp-transform-1.c
+++ b/gcc/testsuite/gcc.target/i386/switch-exp-transform-1.c
@@ -1,10 +1,8 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-switchconv -fdump-tree-widening_mul -mpopcnt 
-mbmi" } */
+/* { dg-options "-O2 -fdump-tree-switchconv -mbmi" } */
 
 /* Checks that exponential index transform enables switch conversion to convert
-   this switch into an array lookup.  Also checks that the "index variable is a
-   power of two" check has been generated and that it has been later expanded
-   into an internal function.  */
+   this switch into an array lookup.  */
 
 int foo(unsigned bar)
 {
@@ -30,4 +28,3 @@ int foo(unsigned bar)
 }
 
 /* { dg-final { scan-tree-dump "CSWTCH" "switchconv" } } */
-/* { dg-final { scan-tree-dump "POPCOUNT" "widening_mul" } } */
diff --git a/gcc/tree-switch-conversion.cc b/gcc/tree-switch-conversion.cc
index c1332a260943..00426d46 100644
--- a/gcc/tree-switch-conversion.cc
+++ b/gcc/tree-switch-conversion.cc
@@ -133,75 +133,33 @@ gen_log2 (tree op, location_t loc, tree *result, tree 
type)
   return stmts;
 }
 
-/* Is it possible to efficiently check that a value of TYPE is a power of 2?
-
-   If yes, returns TYPE.  If no, returns NULL_TREE.  May also return another
-   type.  This indicates that logarithm of the variable can be computed but
-   only after it is converted to this type.
-
-   Also see gen_pow2p.  */
-
-static tree
-can_pow2p (tree type)
-{
-  /* __builtin_popcount supports the unsigned type or its long and long long
- variants.  Choose the smallest out of those that can still fit TYPE.  */
-  int prec = TYPE_PRECISION (type);
-  int i_prec = TYPE_PRECISION (unsigned_type_node);
-  int li_prec = TYPE_PRECISION (long_unsigned_type_node);
-  int lli_prec = TYPE_PRECISION (long_long_unsigned_type_node);
-
-  if (prec <= i_prec)
-return unsigned_type_node;
-  else if (prec <= li_prec)
-return long_unsigned_type_node;
-  else if (prec <= lli_prec)
-return long_long_unsigned_type_node;
-  else
-return NULL_TREE;
-}
-
-/* Build a sequence of gimple statements checking that OP is a power of 2.  Use
-   special optabs if target supports them.  Return the result as a
-   boolean_type_node ssa name through RESULT.  Assumes that OP's value will
-   be non-negative.  The generated check may give arbitrary answer for negative
-   values.
-
-   Before computing the check, OP may have to be converted to another type.
-   This should be specified in TYPE.  Use can_pow2p to decide what this type
-   should be.
-
-   Should only be used if can_pow2p returns true for type of OP.  */
+/* Build a sequence of g

[gcc r15-4033] phiopt: Fix VCE moving by rewriting it into cast [PR116098]

2024-10-02 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:1f619fe25925a5f79b9c33962e7a72e1f9fa

commit r15-4033-g1f619fe25925a5f79b9c33962e7a72e1f9fa
Author: Andrew Pinski 
Date:   Tue Oct 1 18:34:00 2024 +

phiopt: Fix VCE moving by rewriting it into cast [PR116098]

Phiopt match_and_simplify might move a well defined VCE assign statement
from being conditional to being uncondtitional; that VCE might no longer
being defined. It will need a rewrite into a cast instead.

This adds the rewriting code to move_stmt for the VCE case.
This is enough to fix the issue at hand. It should also be using 
rewrite_to_defined_overflow
but first I need to move the check to see a rewrite is needed into its own 
function
and that is causing issues (see 
https://gcc.gnu.org/pipermail/gcc-patches/2024-September/663938.html).
Plus this version is easiest to backport.

Bootstrapped and tested on x86_64-linux-gnu.

PR tree-optimization/116098

gcc/ChangeLog:

* tree-ssa-phiopt.cc (move_stmt): Rewrite VCEs from integer to 
integer
types to case.

gcc/testsuite/ChangeLog:

* c-c++-common/torture/pr116098-2.c: New test.
* g++.dg/torture/pr116098-1.C: New test.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/testsuite/c-c++-common/torture/pr116098-2.c | 46 +
 gcc/testsuite/g++.dg/torture/pr116098-1.C   | 33 ++
 gcc/tree-ssa-phiopt.cc  | 28 ++-
 3 files changed, 106 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/c-c++-common/torture/pr116098-2.c 
b/gcc/testsuite/c-c++-common/torture/pr116098-2.c
new file mode 100644
index ..614ed0491717
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/torture/pr116098-2.c
@@ -0,0 +1,46 @@
+/* { dg-do run } */
+/* PR tree-optimization/116098 */
+
+
+#include 
+
+struct Value {
+int type;
+union {
+bool boolean;
+long long t;
+};
+};
+
+static struct Value s_item_mem;
+
+/* truthy was being miscompiled for the value.type==2 case,
+   because we would have a VCE from unsigned char to bool
+   that went from being conditional in the value.type==1 case
+   to unconditional when `value.type!=0`.
+   The move of the VCE from conditional to unconditional,
+   needs to changed into a convert (NOP_EXPR). */
+static bool truthy(void) __attribute__((noipa));
+static bool
+truthy(void)
+{
+struct Value value = s_item_mem;
+if (value.type == 0)
+  return 0;
+if (value.type == 1)
+  return value.boolean;
+return 1;
+}
+
+int
+main(void)
+{
+s_item_mem.type = 2;
+s_item_mem.t = -1;
+bool b1 = !truthy();
+s_item_mem.type = 1;
+s_item_mem.boolean = b1;
+bool b = truthy();
+if (b1 != b)  __builtin_abort();
+if (b) __builtin_abort();
+}
diff --git a/gcc/testsuite/g++.dg/torture/pr116098-1.C 
b/gcc/testsuite/g++.dg/torture/pr116098-1.C
new file mode 100644
index ..90e44a6eeedb
--- /dev/null
+++ b/gcc/testsuite/g++.dg/torture/pr116098-1.C
@@ -0,0 +1,33 @@
+// { dg-do run }
+/* PR tree-optimization/116098 */
+
+
+static bool truthy(int type, unsigned char data) __attribute__((noipa));
+/* truthy was being miscompiled for the type==2 case,
+   because we would have a VCE from unsigned char to bool
+   that went from being conditional in the type==1 case
+   to unconditional when `type!=0`.
+   The move of the VCE from conditional to unconditional,
+   needs to changed into a convert (NOP_EXPR). */
+
+static bool truthy(void) __attribute__((noipa));
+static bool
+truthy(int type, unsigned char data)
+{
+if (type == 0)
+  return 0;
+if (type == 1)
+  /* Emulate what SRA does, so this can be
+tested without depending on SRA. */
+  return __builtin_bit_cast (bool, data);
+return 1;
+}
+
+int
+main(void)
+{
+bool b1 = !truthy(2, -1);
+bool b = truthy(1, b1);
+if (b1 != b)  __builtin_abort();
+if (b) __builtin_abort();
+}
diff --git a/gcc/tree-ssa-phiopt.cc b/gcc/tree-ssa-phiopt.cc
index bd7f9607eb9a..43b65b362a39 100644
--- a/gcc/tree-ssa-phiopt.cc
+++ b/gcc/tree-ssa-phiopt.cc
@@ -742,7 +742,8 @@ empty_bb_or_one_feeding_into_p (basic_block bb,
 }
 
 /* Move STMT to before GSI and insert its defining
-   name into INSERTED_EXPRS bitmap. */
+   name into INSERTED_EXPRS bitmap.
+   Also rewrite its if it might be undefined when unconditionalized.  */
 static void
 move_stmt (gimple *stmt, gimple_stmt_iterator *gsi, auto_bitmap 
&inserted_exprs)
 {
@@ -761,6 +762,31 @@ move_stmt (gimple *stmt, gimple_stmt_iterator *gsi, 
auto_bitmap &inserted_exprs)
   gimple_stmt_iterator gsi1 = gsi_for_stmt (stmt);
   gsi_move_before (&gsi1, gsi);
   reset_flow_sensitive_info (name);
+
+  /* Rewrite some code which might be undefined when
+ unconditionalized. */
+  if (gimple_assign_single_p (stmt))
+{
+  tree rhs = gimple_assign_rhs1 (stmt);
+  /* VCE from integral type

[gcc r15-4032] testsuite/52641 - Make gcc.dg/strict-flex-array-3.c work on int != 32 bits.

2024-10-02 Thread Georg-Johann Lay via Gcc-cvs
https://gcc.gnu.org/g:77c3ef08e946306329070ea6415abe7d9e328cd6

commit r15-4032-g77c3ef08e946306329070ea6415abe7d9e328cd6
Author: Georg-Johann Lay 
Date:   Wed Oct 2 19:09:18 2024 +0200

testsuite/52641 - Make gcc.dg/strict-flex-array-3.c work on int != 32 bits.

PR testsuite/52641
gcc/testsuite/
* gcc.dg/strict-flex-array-3.c (expect) [AVR]: Use custom
version due to AVR-LibC limitations.
(stuff): Use __SIZEOF_INT__ instead of hard-coded values.

Diff:
---
 gcc/testsuite/gcc.dg/strict-flex-array-3.c | 19 +--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/strict-flex-array-3.c 
b/gcc/testsuite/gcc.dg/strict-flex-array-3.c
index f74ed96c751c..064f779501a8 100644
--- a/gcc/testsuite/gcc.dg/strict-flex-array-3.c
+++ b/gcc/testsuite/gcc.dg/strict-flex-array-3.c
@@ -17,6 +17,21 @@
} \
 } while (0);
 
+#ifdef __AVR__
+/* AVR-Libc doesn't support %zd, thus use %d for size_t.  */
+#undef  expect
+#define expect(p, _v) do { \
+size_t v = _v; \
+if (p == v)
\
+  __builtin_printf ("ok:  %s == %d\n", #p, p); \
+else   \
+  {
\
+   __builtin_printf ("WAT: %s == %d (expected %d)\n", #p, p, v);   \
+   FAIL ();\
+  }
\
+} while (0);
+#endif /* AVR */
+
 struct trailing_array_1 {
 int a;
 int b;
@@ -46,8 +61,8 @@ void __attribute__((__noinline__)) stuff(
 struct trailing_array_3 *trailing_0,
 struct trailing_array_4 *trailing_flex)
 {
-expect(__builtin_object_size(normal->c, 1), 16);
-expect(__builtin_object_size(trailing_1->c, 1), 4);
+expect(__builtin_object_size(normal->c, 1), 4 * __SIZEOF_INT__);
+expect(__builtin_object_size(trailing_1->c, 1), __SIZEOF_INT__);
 expect(__builtin_object_size(trailing_0->c, 1), 0);
 expect(__builtin_object_size(trailing_flex->c, 1), -1);
 }


[gcc r15-4010] libcpp: Implement clang -Wheader-guard warning [PR96842]

2024-10-02 Thread Jakub Jelinek via Gcc-cvs
https://gcc.gnu.org/g:5943a2fa1bc5407332a91976c145446cdb8ded7b

commit r15-4010-g5943a2fa1bc5407332a91976c145446cdb8ded7b
Author: Jakub Jelinek 
Date:   Wed Oct 2 10:53:35 2024 +0200

libcpp: Implement clang -Wheader-guard warning [PR96842]

The following patch implements the clang -Wheader-guard warning, which warns
if a valid multiple inclusion header guard's #ifndef/#if !defined directive
is immediately (no other non-line directives nor other (non-comment)
tokens in between) followed by #define directive for some different macro,
which in get_suggestion rules is close enough to the actual header guard
macro (i.e. likely misspelling), the #define is object-like with empty
definition (I've followed what clang implements) and the macro isn't defined
later on (at least not on the final #endif at the end of a header).

In this case it emits a warning, so that
  #ifndef STDIO_H
  #define STDOI_H
  ...
  #endif
or similar misspellings can be caught.

clang enables this warning by default, but I've put it into -Wall instead
as it still seems to be a style warning, nothing more severe; if a header
doesn't survive multiple inclusion because of the misspelling, users will
get different diagnostics.

2024-10-02  Jakub Jelinek  

PR preprocessor/96842
libcpp/
* include/cpplib.h (struct cpp_options): Add warn_header_guard 
member.
(enum cpp_warning_reason): Add CPP_W_HEADER_GUARD enumerator.
* internal.h (struct cpp_reader): Add mi_def_cmacro, mi_loc and
mi_def_loc members.
(_cpp_defined_macro_p): Constify type pointed by argument type.
Formatting fix.
* init.cc (cpp_create_reader): Clear
CPP_OPTION (pfile, warn_header_guard).
* directives.cc (struct if_stack): Add def_loc and mi_def_cmacro
members.
(DIRECTIVE_TABLE): Add IF_COND flag to define.
(do_define): Set ifs->mi_def_cmacro on a define immediately 
following
#ifndef directive for the guard.  Clear pfile->mi_valid.  Formatting
fix.
(do_endif): Copy over pfile->mi_def_cmacro and pfile->mi_def_loc
if ifs->mi_def_cmacro is set and pfile->mi_cmacro isn't a defined
macro.
(push_conditional): Clear mi_def_cmacro and mi_def_loc members.
* files.cc (_cpp_pop_file_buffer): Emit -Wheader-guard diagnostics.
gcc/
* doc/invoke.texi (Wheader-guard): Document.
gcc/c-family/
* c.opt (Wheader-guard): New option.
* c.opt.urls: Regenerated.
* c-ppoutput.cc (init_pp_output): Initialize also 
cb->get_suggestion.
gcc/testsuite/
* c-c++-common/cpp/Wheader-guard-1.c: New test.
* c-c++-common/cpp/Wheader-guard-1-1.h: New test.
* c-c++-common/cpp/Wheader-guard-1-2.h: New test.
* c-c++-common/cpp/Wheader-guard-1-3.h: New test.
* c-c++-common/cpp/Wheader-guard-1-4.h: New test.
* c-c++-common/cpp/Wheader-guard-1-5.h: New test.
* c-c++-common/cpp/Wheader-guard-1-6.h: New test.
* c-c++-common/cpp/Wheader-guard-1-7.h: New test.
* c-c++-common/cpp/Wheader-guard-1-8.h: New test.
* c-c++-common/cpp/Wheader-guard-1-9.h: New test.
* c-c++-common/cpp/Wheader-guard-1-10.h: New test.
* c-c++-common/cpp/Wheader-guard-1-11.h: New test.
* c-c++-common/cpp/Wheader-guard-1-12.h: New test.
* c-c++-common/cpp/Wheader-guard-2.c: New test.
* c-c++-common/cpp/Wheader-guard-2.h: New test.
* c-c++-common/cpp/Wheader-guard-3.c: New test.
* c-c++-common/cpp/Wheader-guard-3.h: New test.

Diff:
---
 gcc/c-family/c-ppoutput.cc |  1 +
 gcc/c-family/c.opt |  4 +++
 gcc/c-family/c.opt.urls|  3 ++
 gcc/doc/invoke.texi| 15 +++-
 gcc/testsuite/c-c++-common/cpp/Wheader-guard-1-1.h |  5 +++
 .../c-c++-common/cpp/Wheader-guard-1-10.h  |  5 +++
 .../c-c++-common/cpp/Wheader-guard-1-11.h  |  5 +++
 .../c-c++-common/cpp/Wheader-guard-1-12.h  |  5 +++
 gcc/testsuite/c-c++-common/cpp/Wheader-guard-1-2.h |  4 +++
 gcc/testsuite/c-c++-common/cpp/Wheader-guard-1-3.h |  4 +++
 gcc/testsuite/c-c++-common/cpp/Wheader-guard-1-4.h |  3 ++
 gcc/testsuite/c-c++-common/cpp/Wheader-guard-1-5.h |  5 +++
 gcc/testsuite/c-c++-common/cpp/Wheader-guard-1-6.h |  8 +
 gcc/testsuite/c-c++-common/cpp/Wheader-guard-1-7.h |  4 +++
 gcc/testsuite/c-c++-common/cpp/Wheader-guard-1-8.h |  5 +++
 gcc/testsuite/c-c++-common/cpp/Wheader-guard-1-9.h |  5 +++
 gcc/testsuite/c-c++-common/cpp/Wheader-guard-1.c   | 19 ++
 gcc/testsuite/c-c++-common/cpp/Wheader-guard-2.c   | 10 ++
 gcc/testsuite/c-c++-common/cpp/

[gcc r15-4011] doc: Drop h8300-hms reference to binaries downloads

2024-10-02 Thread Gerald Pfeifer via Gcc-cvs
https://gcc.gnu.org/g:56d0ee7a8652a212f23148038c6c0c8afcdb66ad

commit r15-4011-g56d0ee7a8652a212f23148038c6c0c8afcdb66ad
Author: Gerald Pfeifer 
Date:   Wed Oct 2 17:10:33 2024 +0800

doc: Drop h8300-hms reference to binaries downloads

gcc:
PR target/69374
* doc/install.texi (Specific) : Drop obsolete
reference to binaries download docs.

Diff:
---
 gcc/doc/install.texi | 2 --
 1 file changed, 2 deletions(-)

diff --git a/gcc/doc/install.texi b/gcc/doc/install.texi
index 517d1cbb2fb2..e035061a23e1 100644
--- a/gcc/doc/install.texi
+++ b/gcc/doc/install.texi
@@ -4118,8 +4118,6 @@ This configuration is intended for embedded systems.
 @heading h8300-hms
 Renesas H8/300 series of processors.
 
-Please have a look at the @uref{binaries.html,,binaries page}.
-
 The calling convention and structure layout has changed in release 2.6.
 All code must be recompiled.  The calling convention now passes the
 first three arguments in function calls in registers.  Structures are no


[gcc r15-4012] tree-optimization/116566 - single lane SLP for VLA inductions

2024-10-02 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:ba7632674a2a9ba8193f082c8ca9614c642de3b7

commit r15-4012-gba7632674a2a9ba8193f082c8ca9614c642de3b7
Author: Richard Biener 
Date:   Mon Sep 30 17:06:24 2024 +0200

tree-optimization/116566 - single lane SLP for VLA inductions

The following adds SLP support for vectorizing single-lane inductions
with variable length vectors.

PR tree-optimization/116566
* tree-vect-loop.cc (vectorizable_induction): Handle single-lane
SLP for VLA vectors.

* gcc.dg/tree-ssa/reassoc-46.c: When using partial vectors
the dump-scan doesn't look for the required .COND_ADD so
skip for partial vectors.

Diff:
---
 gcc/testsuite/gcc.dg/tree-ssa/reassoc-46.c |   2 +-
 gcc/tree-vect-loop.cc  | 247 ++---
 2 files changed, 190 insertions(+), 59 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/reassoc-46.c 
b/gcc/testsuite/gcc.dg/tree-ssa/reassoc-46.c
index 97563dd929f6..768de9e85dbc 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/reassoc-46.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/reassoc-46.c
@@ -4,4 +4,4 @@
 #include "reassoc-46.h"
 
 /* Check that the loop accumulator is added last.  */
-/* { dg-final { scan-tree-dump-times {(?:vect_)?sum_[\d._]+ = 
(?:(?:vect_)?_[\d._]+ \+ (?:vect_)?sum_[\d._]+|(?:vect_)?sum_[\d._]+ \+ 
(?:vect_)?_[\d._]+)} 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times {(?:vect_)?sum_[\d._]+ = 
(?:(?:vect_)?_[\d._]+ \+ (?:vect_)?sum_[\d._]+|(?:vect_)?sum_[\d._]+ \+ 
(?:vect_)?_[\d._]+)} 1 "optimized" { target { ! vect_partial_vectors } } } } */
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index a5a44613cb24..f5ecf0bdb805 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -10283,7 +10283,6 @@ vectorizable_induction (loop_vec_info loop_vinfo,
   gimple *new_stmt;
   gphi *induction_phi;
   tree induc_def, vec_dest;
-  tree init_expr, step_expr;
   poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
   unsigned i;
   tree expr;
@@ -10369,7 +10368,7 @@ vectorizable_induction (loop_vec_info loop_vinfo,
 iv_loop = loop;
   gcc_assert (iv_loop == (gimple_bb (phi))->loop_father);
 
-  if (slp_node && !nunits.is_constant ())
+  if (slp_node && (!nunits.is_constant () && SLP_TREE_LANES (slp_node) != 1))
 {
   /* The current SLP code creates the step value element-by-element.  */
   if (dump_enabled_p ())
@@ -10387,7 +10386,7 @@ vectorizable_induction (loop_vec_info loop_vinfo,
   return false;
 }
 
-  step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info);
+  tree step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info);
   gcc_assert (step_expr != NULL_TREE);
   if (INTEGRAL_TYPE_P (TREE_TYPE (step_expr))
   && !type_has_mode_precision_p (TREE_TYPE (step_expr)))
@@ -10475,9 +10474,6 @@ vectorizable_induction (loop_vec_info loop_vinfo,
[i2 + 2*S2, i0 + 3*S0, i1 + 3*S1, i2 + 3*S2].  */
   if (slp_node)
 {
-  /* Enforced above.  */
-  unsigned int const_nunits = nunits.to_constant ();
-
   /* The initial values are vectorized, but any lanes > group_size
 need adjustment.  */
   slp_tree init_node
@@ -10499,11 +10495,12 @@ vectorizable_induction (loop_vec_info loop_vinfo,
 
   /* Now generate the IVs.  */
   unsigned nvects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
-  gcc_assert ((const_nunits * nvects) % group_size == 0);
+  gcc_assert (multiple_p (nunits * nvects, group_size));
   unsigned nivs;
+  unsigned HOST_WIDE_INT const_nunits;
   if (nested_in_vect_loop)
nivs = nvects;
-  else
+  else if (nunits.is_constant (&const_nunits))
{
  /* Compute the number of distinct IVs we need.  First reduce
 group_size if it is a multiple of const_nunits so we get
@@ -10514,21 +10511,43 @@ vectorizable_induction (loop_vec_info loop_vinfo,
  nivs = least_common_multiple (group_sizep,
const_nunits) / const_nunits;
}
+  else
+   {
+ gcc_assert (SLP_TREE_LANES (slp_node) == 1);
+ nivs = 1;
+   }
+  gimple_seq init_stmts = NULL;
   tree stept = TREE_TYPE (step_vectype);
   tree lupdate_mul = NULL_TREE;
   if (!nested_in_vect_loop)
{
- /* The number of iterations covered in one vector iteration.  */
- unsigned lup_mul = (nvects * const_nunits) / group_size;
- lupdate_mul
-   = build_vector_from_val (step_vectype,
-SCALAR_FLOAT_TYPE_P (stept)
-? build_real_from_wide (stept, lup_mul,
-UNSIGNED)
-: build_int_cstu (stept, lup_mul));
+ if (nunits.is_constant (&const_nunits))
+   {
+ /* The number of iterations covered in one vector iteration.  */
+ unsigned lup_mul

[gcc r15-4013] testsuite/116660 - adjust testcases unexpectedly failing on 32bit sparc

2024-10-02 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:79ea0aab75732c26c38d4b64f1d97acedf80155a

commit r15-4013-g79ea0aab75732c26c38d4b64f1d97acedf80155a
Author: Richard Biener 
Date:   Wed Oct 2 11:27:09 2024 +0200

testsuite/116660 - adjust testcases unexpectedly failing on 32bit sparc

Both testcases miss some effective target requires.

PR testsuite/116660
* gcc.dg/vect/no-scevccp-outer-12.c: Add vect_pack_trunc.
* gcc.dg/vect/vect-multitypes-6.c: Add vect_char_add, remove
explicit 32bit sparc XFAIL.

Diff:
---
 gcc/testsuite/gcc.dg/vect/no-scevccp-outer-12.c | 1 +
 gcc/testsuite/gcc.dg/vect/vect-multitypes-6.c   | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-12.c 
b/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-12.c
index 6ace6ad022ee..b94256d48db1 100644
--- a/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-12.c
+++ b/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-12.c
@@ -1,4 +1,5 @@
 /* { dg-require-effective-target vect_int } */
+/* { dg-require-effective-target vect_pack_trunc } */
 
 #include 
 #include "tree-vect.h"
diff --git a/gcc/testsuite/gcc.dg/vect/vect-multitypes-6.c 
b/gcc/testsuite/gcc.dg/vect/vect-multitypes-6.c
index 73d3b30384eb..e03d62f6a85e 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-multitypes-6.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-multitypes-6.c
@@ -1,6 +1,7 @@
 /* Disabling epilogues until we find a better way to deal with scans.  */
 /* { dg-additional-options "--param vect-epilogues-nomask=0" } */
 /* { dg-require-effective-target vect_int } */
+/* { dg-require-effective-target vect_char_add } */
 /* { dg-add-options double_vectors } */
 
 #include 
@@ -67,7 +68,7 @@ int main (void)
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail { 
sparc*-*-* && ilp32 } }} } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
 /* { dg-final { scan-tree-dump-times "Alignment of access forced using 
versioning" 6 "vect" { target { vect_no_align && { ! vect_hw_misalign } } } } } 
*/
 /* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 6 
"vect" { xfail { ! { vect_unaligned_possible && vect_align_stack_vars } } } } } 
*/


[gcc(refs/users/meissner/heads/work179-vpair)] Add vector-pair.h tests.

2024-10-02 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:2a47025f4bd01f97a215a3f501a3c56871702a0c

commit 2a47025f4bd01f97a215a3f501a3c56871702a0c
Author: Michael Meissner 
Date:   Wed Oct 2 13:58:10 2024 -0400

Add vector-pair.h tests.

2024-10-02  Michael Meissner  

gcc/testsuite

* gcc.target/powerpc/vpair-1.c: New test.
* gcc.target/powerpc/vpair-2.c: Likewise.

Diff:
---
 gcc/testsuite/gcc.target/powerpc/vpair-1.c | 141 +
 gcc/testsuite/gcc.target/powerpc/vpair-2.c | 141 +
 2 files changed, 282 insertions(+)

diff --git a/gcc/testsuite/gcc.target/powerpc/vpair-1.c 
b/gcc/testsuite/gcc.target/powerpc/vpair-1.c
new file mode 100644
index ..55772cc44e31
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vpair-1.c
@@ -0,0 +1,141 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+
+/* Test whether the vector builtin code generates the expected instructions for
+   vector pairs with 4 double elements.  */
+
+#include 
+
+void
+test_add (vector_pair_f64_t *dest,
+ vector_pair_f64_t *x,
+ vector_pair_f64_t *y)
+{
+  /* 2 lxvp, 2 xvadddp, 1 stxvp.  */
+  vpair_f64_add (dest, x, y);
+}
+
+void
+test_sub (vector_pair_f64_t *dest,
+ vector_pair_f64_t *x,
+ vector_pair_f64_t *y)
+{
+  /* 2 lxvp, 2 xvsubdp, 1 stxvp.  */
+  vpair_f64_sub (dest, x, y);
+}
+
+void
+test_multiply (vector_pair_f64_t *dest,
+  vector_pair_f64_t *x,
+  vector_pair_f64_t *y)
+{
+  /* 2 lxvp, 2 xvmuldp, 1 stxvp.  */
+  vpair_f64_mul (dest, x, y);
+}
+
+void
+test_min (vector_pair_f64_t *dest,
+ vector_pair_f64_t *x,
+ vector_pair_f64_t *y)
+{
+  /* 2 lxvp, 2 xvmindp, 1 stxvp.  */
+  vpair_f64_min (dest, x, y);
+}
+
+void
+test_max (vector_pair_f64_t *dest,
+ vector_pair_f64_t *x,
+ vector_pair_f64_t *y)
+{
+  /* 2 lxvp, 2 xvmaxdp, 1 stxvp.  */
+  vpair_f64_max (dest, x, y);
+}
+
+void
+test_negate (vector_pair_f64_t *dest,
+vector_pair_f64_t *x)
+{
+  /* 1 lxvp, 2 xvnegdp, 1 stxvp.  */
+  vpair_f64_neg (dest, x);
+}
+
+void
+test_abs (vector_pair_f64_t *dest,
+ vector_pair_f64_t *x)
+{
+  /* 1 lxvp, 2 xvabsdp, 1 stxvp.  */
+  vpair_f64_abs (dest, x);
+}
+
+void
+test_negative_abs (vector_pair_f64_t *dest,
+  vector_pair_f64_t *x)
+{
+  /* 2 lxvp, 2 xvnabsdp, 1 stxvp.  */
+  vpair_f64_nabs (dest, x);
+}
+
+void
+test_sqrt (vector_pair_f64_t *dest,
+  vector_pair_f64_t *x)
+{
+  /* 1 lxvp, 2 xvabsdp, 1 stxvp.  */
+  vpair_f64_sqrt (dest, x);
+}
+
+void
+test_fma (vector_pair_f64_t *dest,
+ vector_pair_f64_t *x,
+ vector_pair_f64_t *y,
+ vector_pair_f64_t *z)
+{
+  /* 2 lxvp, 2 xvmadd{a,m}dp, 1 stxvp.  */
+  vpair_f64_fma (dest, x, y, z);
+}
+
+void
+test_fms (vector_pair_f64_t *dest,
+ vector_pair_f64_t *x,
+ vector_pair_f64_t *y,
+ vector_pair_f64_t *z)
+{
+  /* 2 lxvp, 2 xvmsub{a,m}dp, 1 stxvp.  */
+  vpair_f64_fms (dest, x, y, z);
+}
+
+void
+test_nfma (vector_pair_f64_t *dest,
+  vector_pair_f64_t *x,
+  vector_pair_f64_t *y,
+  vector_pair_f64_t *z)
+{
+  /* 2 lxvp, 2 xvnmadd{a,m}dp, 1 stxvp.  */
+  vpair_f64_nfma (dest, x, y, z);
+}
+
+void
+test_nfms (vector_pair_f64_t *dest,
+  vector_pair_f64_t *x,
+  vector_pair_f64_t *y,
+  vector_pair_f64_t *z)
+{
+  /* 2 lxvp, 2 xvnmsub{a,m}dp, 1 stxvp.  */
+  vpair_f64_nfms (dest, x, y, z);
+}
+
+/* { dg-final { scan-assembler-times {\mlxvp\M}   26 } } */
+/* { dg-final { scan-assembler-times {\mstxvp\M}  13 } } */
+/* { dg-final { scan-assembler-times {\mxvabsdp\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mxvadddp\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mxvmadd.dp\M}   2 } } */
+/* { dg-final { scan-assembler-times {\mxvmaxdp\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mxvmindp\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mxvmsub.dp\M}   2 } } */
+/* { dg-final { scan-assembler-times {\mxvmuldp\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mxvnabsdp\M}2 } } */
+/* { dg-final { scan-assembler-times {\mxvnegdp\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mxvnmadd.dp\M}  2 } } */
+/* { dg-final { scan-assembler-times {\mxvnmsub.dp\M}  2 } } */
+/* { dg-final { scan-assembler-times {\mxvsqrtdp\M}2 } } */
+/* { dg-final { scan-assembler-times {\mxvsubdp\M} 2 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/vpair-2.c 
b/gcc/testsuite/gcc.target/powerpc/vpair-2.c
new file mode 100644
index ..3030b0b33380
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vpair-2.c
@@ -0,0 +1,141 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+
+/* Test whether the vector builtin code generates the expected instructions for
+   vector pairs with 4 d

[gcc r15-4025] Replace another missed iterative_hash_object

2024-10-02 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:dafbfdb01559092aca6be0e31afc2b39d3fdb2dd

commit r15-4025-gdafbfdb01559092aca6be0e31afc2b39d3fdb2dd
Author: Richard Biener 
Date:   Wed Oct 2 14:18:05 2024 +0200

Replace another missed iterative_hash_object

I missed one that's actually hit quite a lot, hashing of the canonical
type TYPE_HASH.

gcc/cp/
* pt.cc (iterative_hash_template_arg): Use iterative_hash_hashval_t
to hash TYPE_HASH.

Diff:
---
 gcc/cp/pt.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc
index 04f0a1d5fff7..20affcd65a27 100644
--- a/gcc/cp/pt.cc
+++ b/gcc/cp/pt.cc
@@ -1936,7 +1936,7 @@ iterative_hash_template_arg (tree arg, hashval_t val)
 
default:
  if (tree canonical = TYPE_CANONICAL (arg))
-   val = iterative_hash_object (TYPE_HASH (canonical), val);
+   val = iterative_hash_hashval_t (TYPE_HASH (canonical), val);
  else if (tree ti = TYPE_TEMPLATE_INFO (arg))
{
  val = iterative_hash_template_arg (TI_TEMPLATE (ti), val);


[gcc r15-4026] c++: Fix regression introduced by r15-3796 [PR116722]

2024-10-02 Thread Simon Martin via Gcc-cvs
https://gcc.gnu.org/g:3a528386571fffbb41703a238aee950043af3f3c

commit r15-4026-g3a528386571fffbb41703a238aee950043af3f3c
Author: Simon Martin 
Date:   Wed Oct 2 15:32:37 2024 +0200

c++: Fix regression introduced by r15-3796 [PR116722]

Jason pointed out that the fix I made for PR116722 via r15-3796
introduces a regression when running constexpr-dynamic10.C with
-fimplicit-constexpr.

The problem is that my change makes us leave cxx_eval_call_expression
early, and bypass the call to cxx_eval_thunk_call (through a recursive
call to cxx_eval_call_expression) that used to emit an error for that
testcase with -fimplicit-constexpr.

This patch emits the error if !ctx->quiet before bailing out because the
{con,de}structor belongs to a class with virtual bases.

PR c++/116722

gcc/cp/ChangeLog:

* constexpr.cc (cxx_bind_parameters_in_call): When !ctx->quiet,
emit error before bailing out due to a call to {con,de}structor
for a class with virtual bases.

Diff:
---
 gcc/cp/constexpr.cc | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/gcc/cp/constexpr.cc b/gcc/cp/constexpr.cc
index 5c6696740fc9..4e4df94f4206 100644
--- a/gcc/cp/constexpr.cc
+++ b/gcc/cp/constexpr.cc
@@ -1867,6 +1867,12 @@ cxx_bind_parameters_in_call (const constexpr_ctx *ctx, 
tree t, tree fun,
  with virtual bases.  */
   if (DECL_HAS_IN_CHARGE_PARM_P (fun) || DECL_HAS_VTT_PARM_P (fun))
 {
+  if (!ctx->quiet)
+   {
+ error_at (cp_expr_loc_or_input_loc (t),
+   "call to non-% function %qD", fun);
+ explain_invalid_constexpr_fn (fun);
+   }
   *non_constant_p = true;
   return binds;
 }


[gcc r15-4030] testsuite/52641 - Require int32 for gcc.dg/pr93820-2.c.

2024-10-02 Thread Georg-Johann Lay via Gcc-cvs
https://gcc.gnu.org/g:5bf78cf0a2a7fe999562fcef7bad0d9631af9742

commit r15-4030-g5bf78cf0a2a7fe999562fcef7bad0d9631af9742
Author: Georg-Johann Lay 
Date:   Wed Oct 2 17:16:55 2024 +0200

testsuite/52641 - Require int32 for gcc.dg/pr93820-2.c.

PR testsuite/52641
gcc/testsuite/
* gcc.dg/pr93820-2.c: Add dg-require-effective-target int32.

Diff:
---
 gcc/testsuite/gcc.dg/pr93820-2.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/gcc/testsuite/gcc.dg/pr93820-2.c b/gcc/testsuite/gcc.dg/pr93820-2.c
index be5d36898f1a..0bdae614c44a 100644
--- a/gcc/testsuite/gcc.dg/pr93820-2.c
+++ b/gcc/testsuite/gcc.dg/pr93820-2.c
@@ -1,6 +1,7 @@
 /* PR tree-optimization/93820 */
 /* { dg-do run } */
 /* { dg-options "-O2 -fgimple" } */
+/* { dg-require-effective-target int32 } */
 
 typedef int v4si __attribute__((vector_size(4 * sizeof (int;
 int a[10];


[gcc r13-9072] tree-optimization/116585 - SSA corruption with split_constant_offset

2024-10-02 Thread Qing Zhao via Gcc-cvs
https://gcc.gnu.org/g:a344ba9e42224220a7279a4051a08662435b1c60

commit r13-9072-ga344ba9e42224220a7279a4051a08662435b1c60
Author: Richard Biener 
Date:   Wed Sep 18 09:52:55 2024 +0200

tree-optimization/116585 - SSA corruption with split_constant_offset

split_constant_offset when looking through SSA defs can end up
picking SSA leafs that are subject to abnormal coalescing.  This
can lead to downstream consumers to insert code based on the
result (like from dataref analysis) in places that violate constraints
for abnormal coalescing.  It's best to not expand defs whose operands
are subject to abnormal coalescing - and not either do something when
a subexpression has operands like that already.

PR tree-optimization/116585
* tree-data-ref.cc (split_constant_offset_1): When either
operand is subject to abnormal coalescing do no further
processing.

* gcc.dg/torture/pr116585.c: New testcase.

(cherry picked from commit 1d0cb3b5fca69b81e69cfdb4aea0eebc1ac04750)

Diff:
---
 gcc/testsuite/gcc.dg/torture/pr116585.c | 32 
 gcc/tree-data-ref.cc| 11 ---
 2 files changed, 40 insertions(+), 3 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/torture/pr116585.c 
b/gcc/testsuite/gcc.dg/torture/pr116585.c
new file mode 100644
index ..108c481e1043
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/pr116585.c
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+
+char *s1, *s2;
+extern int* my_alloc (int);
+extern int _setjmp ();
+extern void bar();
+void foo(int s1len, int s2len)
+{
+  int e;
+  e = _setjmp ();
+{
+  int l, i;
+  int *md = my_alloc(((sizeof(int)) * (s1len + 1) * (s2len)));
+  s1len++;
+  for (; s1len; l)
+   for (; s2len; l)
+ for (; s1len; i)
+   {
+ int j = 1;
+ for (; j < s2len; j++)
+   {
+ int cost;
+ if (s1[1] == s2[1])
+   cost = 0;
+ else
+   cost = 1;
+ md[j * s1len ] = ((cost));
+   }
+   }
+  bar();
+}
+}
diff --git a/gcc/tree-data-ref.cc b/gcc/tree-data-ref.cc
index 96934addff14..7657eeaf9e65 100644
--- a/gcc/tree-data-ref.cc
+++ b/gcc/tree-data-ref.cc
@@ -763,6 +763,14 @@ split_constant_offset_1 (tree type, tree op0, enum 
tree_code code, tree op1,
   if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_TRAPS (type))
 return false;
 
+  if (TREE_CODE (op0) == SSA_NAME
+  && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op0))
+return false;
+  if (op1
+  && TREE_CODE (op1) == SSA_NAME
+  && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op1))
+return false;
+
   switch (code)
 {
 case INTEGER_CST:
@@ -855,9 +863,6 @@ split_constant_offset_1 (tree type, tree op0, enum 
tree_code code, tree op1,
 
 case SSA_NAME:
   {
-   if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op0))
- return false;
-
gimple *def_stmt = SSA_NAME_DEF_STMT (op0);
enum tree_code subcode;


[gcc r15-4031] AVR: Make gcc.dg/pr113596.c work.

2024-10-02 Thread Georg-Johann Lay via Gcc-cvs
https://gcc.gnu.org/g:524b9c2e6283d55dbe381dc463983d4fd7c9705a

commit r15-4031-g524b9c2e6283d55dbe381dc463983d4fd7c9705a
Author: Georg-Johann Lay 
Date:   Wed Oct 2 18:42:26 2024 +0200

AVR: Make gcc.dg/pr113596.c work.

gcc/testsuite/
* gcc.dg/pr113596.c: Require less memory so it works on AVR.

Diff:
---
 gcc/testsuite/gcc.dg/pr113596.c | 10 +-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.dg/pr113596.c b/gcc/testsuite/gcc.dg/pr113596.c
index 19e0ab6dc467..3655ffef3f97 100644
--- a/gcc/testsuite/gcc.dg/pr113596.c
+++ b/gcc/testsuite/gcc.dg/pr113596.c
@@ -16,9 +16,17 @@ foo (int n)
   bar (p, n);
 }
 
+#if defined __AVR__
+/* For AVR devices, AVRtest assigns 8 KiB of stack, which is not quite
+   enough for this test case.  Thus request less memory on AVR.  */
+#define ALLOC 6000
+#else
+#define ALLOC 8192
+#endif
+
 int
 main ()
 {
-  for (int i = 2; i < 8192; ++i)
+  for (int i = 2; i < ALLOC; ++i)
 foo (i);
 }


[gcc r14-10730] tree-optimization/116585 - SSA corruption with split_constant_offset

2024-10-02 Thread Qing Zhao via Gcc-cvs
https://gcc.gnu.org/g:e69c03971aa50fda96b3382bfded54da3d087c32

commit r14-10730-ge69c03971aa50fda96b3382bfded54da3d087c32
Author: Richard Biener 
Date:   Wed Sep 18 09:52:55 2024 +0200

tree-optimization/116585 - SSA corruption with split_constant_offset

split_constant_offset when looking through SSA defs can end up
picking SSA leafs that are subject to abnormal coalescing.  This
can lead to downstream consumers to insert code based on the
result (like from dataref analysis) in places that violate constraints
for abnormal coalescing.  It's best to not expand defs whose operands
are subject to abnormal coalescing - and not either do something when
a subexpression has operands like that already.

PR tree-optimization/116585
* tree-data-ref.cc (split_constant_offset_1): When either
operand is subject to abnormal coalescing do no further
processing.

* gcc.dg/torture/pr116585.c: New testcase.

(cherry picked from commit 1d0cb3b5fca69b81e69cfdb4aea0eebc1ac04750)

Diff:
---
 gcc/testsuite/gcc.dg/torture/pr116585.c | 32 
 gcc/tree-data-ref.cc| 11 ---
 2 files changed, 40 insertions(+), 3 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/torture/pr116585.c 
b/gcc/testsuite/gcc.dg/torture/pr116585.c
new file mode 100644
index ..108c481e1043
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/pr116585.c
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+
+char *s1, *s2;
+extern int* my_alloc (int);
+extern int _setjmp ();
+extern void bar();
+void foo(int s1len, int s2len)
+{
+  int e;
+  e = _setjmp ();
+{
+  int l, i;
+  int *md = my_alloc(((sizeof(int)) * (s1len + 1) * (s2len)));
+  s1len++;
+  for (; s1len; l)
+   for (; s2len; l)
+ for (; s1len; i)
+   {
+ int j = 1;
+ for (; j < s2len; j++)
+   {
+ int cost;
+ if (s1[1] == s2[1])
+   cost = 0;
+ else
+   cost = 1;
+ md[j * s1len ] = ((cost));
+   }
+   }
+  bar();
+}
+}
diff --git a/gcc/tree-data-ref.cc b/gcc/tree-data-ref.cc
index 654a82202147..69b421f3c941 100644
--- a/gcc/tree-data-ref.cc
+++ b/gcc/tree-data-ref.cc
@@ -765,6 +765,14 @@ split_constant_offset_1 (tree type, tree op0, enum 
tree_code code, tree op1,
   if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_TRAPS (type))
 return false;
 
+  if (TREE_CODE (op0) == SSA_NAME
+  && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op0))
+return false;
+  if (op1
+  && TREE_CODE (op1) == SSA_NAME
+  && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op1))
+return false;
+
   switch (code)
 {
 case INTEGER_CST:
@@ -860,9 +868,6 @@ split_constant_offset_1 (tree type, tree op0, enum 
tree_code code, tree op1,
 
 case SSA_NAME:
   {
-   if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op0))
- return false;
-
gimple *def_stmt = SSA_NAME_DEF_STMT (op0);
enum tree_code subcode;


[gcc r15-4029] testsuite/52641 - Fix gcc.dg/signbit-6.c for int != 32-bit targets.

2024-10-02 Thread Georg-Johann Lay via Gcc-cvs
https://gcc.gnu.org/g:875a1df8130342baf985650c5de1914cf37eb774

commit r15-4029-g875a1df8130342baf985650c5de1914cf37eb774
Author: Georg-Johann Lay 
Date:   Wed Oct 2 16:23:32 2024 +0200

testsuite/52641 - Fix gcc.dg/signbit-6.c for int != 32-bit targets.

PR testsuite/52641
gcc/testsuite/
* gcc.dg/signbit-6.c (main): Initialize a[0] and b[0]
with INT32_MIN (instead of with INT_MIN).

Diff:
---
 gcc/testsuite/gcc.dg/signbit-6.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/signbit-6.c b/gcc/testsuite/gcc.dg/signbit-6.c
index da186624cfa0..3a522893222d 100644
--- a/gcc/testsuite/gcc.dg/signbit-6.c
+++ b/gcc/testsuite/gcc.dg/signbit-6.c
@@ -38,8 +38,10 @@ int main ()
   TYPE a[N];
   TYPE b[N];
 
-  a[0] = INT_MIN;
-  b[0] = INT_MIN;
+  /* This will invoke UB due to -INT32_MIN.  The test is supposed to pass
+ because GCC is supposed to handle this UB case in a predictable way.  */
+  a[0] = INT32_MIN;
+  b[0] = INT32_MIN;
 
   for (int i = 1; i < N; ++i)
 {


[gcc r15-4027] arm: Prevent ICE when doloop dec_set is not PLUS expr

2024-10-02 Thread Andre Simoes Dias Vieira via Gcc-cvs
https://gcc.gnu.org/g:4e11ad7c345b6084ffe45ac569352dd316ee5cc6

commit r15-4027-g4e11ad7c345b6084ffe45ac569352dd316ee5cc6
Author: Andre Vieira 
Date:   Wed Oct 2 15:14:40 2024 +0100

arm: Prevent ICE when doloop dec_set is not PLUS expr

This patch refactors and fixes an issue where 
arm_mve_dlstp_check_dec_counter
was making an assumption about the form of what a candidate for a dec_insn
should be, which caused an ICE.
This dec_insn is the instruction that decreases the loop counter inside a
decrementing loop and we expect it to have the following form:
(set (reg CONDCOUNT)
 (plus (reg CONDCOUNT)
   (const_int)))

Where CONDCOUNT is the loop counter, and const int is the negative constant
used to decrement it.

This patch also improves our search for a valid dec_insn.  Before this patch
we'd only look for a dec_insn inside the loop header if the loop latch was
empty.  We now also search the loop header if the loop latch is not empty 
but
the last instruction is not a valid dec_insn.  This could potentially be 
improved
to search all instructions inside the loop latch.

gcc/ChangeLog:

* config/arm/arm.cc (check_dec_insn): New helper function containing
code hoisted from...
(arm_mve_dlstp_check_dec_counter): ... here. Use check_dec_insn to
check the validity of the candidate dec_insn.

gcc/testsuite/ChangeLog:

* gcc.target/arm/mve/dlstp-loop-form.c: New test.

Diff:
---
 gcc/config/arm/arm.cc  | 49 ++
 gcc/testsuite/gcc.target/arm/mve/dlstp-loop-form.c | 27 
 2 files changed, 59 insertions(+), 17 deletions(-)

diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc
index de34e9867e67..62eea50f1634 100644
--- a/gcc/config/arm/arm.cc
+++ b/gcc/config/arm/arm.cc
@@ -35231,6 +35231,32 @@ arm_mve_dlstp_check_inc_counter (loop *loop, rtx_insn* 
vctp_insn,
   return vctp_insn;
 }
 
+/* Helper function to 'arm_mve_dlstp_check_dec_counter' to make sure DEC_INSN
+   is of the expected form:
+   (set (reg a) (plus (reg a) (const_int)))
+   where (reg a) is the same as CONDCOUNT.
+   Return a rtx with the set if it is in the right format or NULL_RTX
+   otherwise.  */
+
+static rtx
+check_dec_insn (rtx_insn *dec_insn, rtx condcount)
+{
+  if (!NONDEBUG_INSN_P (dec_insn))
+return NULL_RTX;
+  rtx dec_set = single_set (dec_insn);
+  if (!dec_set
+  || !REG_P (SET_DEST (dec_set))
+  || GET_CODE (SET_SRC (dec_set)) != PLUS
+  || !REG_P (XEXP (SET_SRC (dec_set), 0))
+  || !CONST_INT_P (XEXP (SET_SRC (dec_set), 1))
+  || REGNO (SET_DEST (dec_set))
+ != REGNO (XEXP (SET_SRC (dec_set), 0))
+  || REGNO (SET_DEST (dec_set)) != REGNO (condcount))
+return NULL_RTX;
+
+  return dec_set;
+}
+
 /* Helper function to `arm_mve_loop_valid_for_dlstp`.  In the case of a
counter that is decrementing, ensure that it is decrementing by the
right amount in each iteration and that the target condition is what
@@ -35247,30 +35273,19 @@ arm_mve_dlstp_check_dec_counter (loop *loop, 
rtx_insn* vctp_insn,
  loop latch.  Here we simply need to verify that this counter is the same
  reg that is also used in the vctp_insn and that it is not otherwise
  modified.  */
-  rtx_insn *dec_insn = BB_END (loop->latch);
+  rtx dec_set = check_dec_insn (BB_END (loop->latch), condcount);
   /* If not in the loop latch, try to find the decrement in the loop header.  
*/
-  if (!NONDEBUG_INSN_P (dec_insn))
+  if (dec_set == NULL_RTX)
   {
 df_ref temp = df_bb_regno_only_def_find (loop->header, REGNO (condcount));
 /* If we haven't been able to find the decrement, bail out.  */
 if (!temp)
   return NULL;
-dec_insn = DF_REF_INSN (temp);
-  }
-
-  rtx dec_set = single_set (dec_insn);
+dec_set = check_dec_insn (DF_REF_INSN (temp), condcount);
 
-  /* Next, ensure that it is a PLUS of the form:
- (set (reg a) (plus (reg a) (const_int)))
- where (reg a) is the same as condcount.  */
-  if (!dec_set
-  || !REG_P (SET_DEST (dec_set))
-  || !REG_P (XEXP (SET_SRC (dec_set), 0))
-  || !CONST_INT_P (XEXP (SET_SRC (dec_set), 1))
-  || REGNO (SET_DEST (dec_set))
- != REGNO (XEXP (SET_SRC (dec_set), 0))
-  || REGNO (SET_DEST (dec_set)) != REGNO (condcount))
-return NULL;
+if (dec_set == NULL_RTX)
+  return NULL;
+  }
 
   decrementnum = INTVAL (XEXP (SET_SRC (dec_set), 1));
 
diff --git a/gcc/testsuite/gcc.target/arm/mve/dlstp-loop-form.c 
b/gcc/testsuite/gcc.target/arm/mve/dlstp-loop-form.c
new file mode 100644
index ..a1b26873d790
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/dlstp-loop-form.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
+/* { dg-options "-Ofast" } */
+/* { dg-add-options arm_v8_1m_mve_fp } */
+#pragma GCC arm "arm_mve_type

[gcc r12-10739] tree-optimization/116585 - SSA corruption with split_constant_offset

2024-10-02 Thread Qing Zhao via Gcc-cvs
https://gcc.gnu.org/g:8e5bd9b4b38f5b4fbd2a95d8f61168d9eeea97d3

commit r12-10739-g8e5bd9b4b38f5b4fbd2a95d8f61168d9eeea97d3
Author: Richard Biener 
Date:   Wed Sep 18 09:52:55 2024 +0200

tree-optimization/116585 - SSA corruption with split_constant_offset

split_constant_offset when looking through SSA defs can end up
picking SSA leafs that are subject to abnormal coalescing.  This
can lead to downstream consumers to insert code based on the
result (like from dataref analysis) in places that violate constraints
for abnormal coalescing.  It's best to not expand defs whose operands
are subject to abnormal coalescing - and not either do something when
a subexpression has operands like that already.

PR tree-optimization/116585
* tree-data-ref.cc (split_constant_offset_1): When either
operand is subject to abnormal coalescing do no further
processing.

* gcc.dg/torture/pr116585.c: New testcase.

(cherry picked from commit 1d0cb3b5fca69b81e69cfdb4aea0eebc1ac04750)

Diff:
---
 gcc/testsuite/gcc.dg/torture/pr116585.c | 32 
 gcc/tree-data-ref.cc| 11 ---
 2 files changed, 40 insertions(+), 3 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/torture/pr116585.c 
b/gcc/testsuite/gcc.dg/torture/pr116585.c
new file mode 100644
index ..108c481e1043
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/pr116585.c
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+
+char *s1, *s2;
+extern int* my_alloc (int);
+extern int _setjmp ();
+extern void bar();
+void foo(int s1len, int s2len)
+{
+  int e;
+  e = _setjmp ();
+{
+  int l, i;
+  int *md = my_alloc(((sizeof(int)) * (s1len + 1) * (s2len)));
+  s1len++;
+  for (; s1len; l)
+   for (; s2len; l)
+ for (; s1len; i)
+   {
+ int j = 1;
+ for (; j < s2len; j++)
+   {
+ int cost;
+ if (s1[1] == s2[1])
+   cost = 0;
+ else
+   cost = 1;
+ md[j * s1len ] = ((cost));
+   }
+   }
+  bar();
+}
+}
diff --git a/gcc/tree-data-ref.cc b/gcc/tree-data-ref.cc
index 706a49f226ed..b7bca6a9d064 100644
--- a/gcc/tree-data-ref.cc
+++ b/gcc/tree-data-ref.cc
@@ -761,6 +761,14 @@ split_constant_offset_1 (tree type, tree op0, enum 
tree_code code, tree op1,
   if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_TRAPS (type))
 return false;
 
+  if (TREE_CODE (op0) == SSA_NAME
+  && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op0))
+return false;
+  if (op1
+  && TREE_CODE (op1) == SSA_NAME
+  && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op1))
+return false;
+
   switch (code)
 {
 case INTEGER_CST:
@@ -853,9 +861,6 @@ split_constant_offset_1 (tree type, tree op0, enum 
tree_code code, tree op1,
 
 case SSA_NAME:
   {
-   if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op0))
- return false;
-
gimple *def_stmt = SSA_NAME_DEF_STMT (op0);
enum tree_code subcode;


[gcc r15-4028] middle-end: Fix ifcvt predicate generation for masked function calls

2024-10-02 Thread Victor Do Nascimento via Gcc-cvs
https://gcc.gnu.org/g:4d9e473d125ec36ae4818d36d42bf4fea09cef1f

commit r15-4028-g4d9e473d125ec36ae4818d36d42bf4fea09cef1f
Author: Victor Do Nascimento 
Date:   Mon Sep 23 17:10:18 2024 +0100

middle-end: Fix ifcvt predicate generation for masked function calls

Up until now, due to a latent bug in the code for the ifcvt pass,
irrespective of the branch taken in a conditional statement, the
original condition for the if statement was used in masking the
function call.

Thus, for code such as:

  if (a[i] > limit)
b[i] = fixed_const;
  else
b[i] = fn (a[i]);

we would generate the following (wrong) if-converted tree code:

  _1 = a[i_1];
  _2 = _1 > limit;
  _3 = .MASK_CALL (fn, _1, _2);
  cstore_4 = _2 ? fixed_const : _3;

as opposed to the correct expected sequence:

  _1 = a[i_1];
  _2 = _1 > limit;
  _3 = ~_2;
  _4 = .MASK_CALL (fn, _1, _3);
  cstore_5 = _2 ? fixed_const : _4;

This patch ensures that the correct predicate mask generation is
carried out such that, upon autovectorization, the correct vector
lanes are selected in the vectorized function call.

gcc/ChangeLog:

* tree-if-conv.cc (predicate_statements): Fix handling of
predicated function calls.

gcc/testsuite/ChangeLog:

* gcc.dg/vect/vect-fncall-mask.c: New.

Diff:
---
 gcc/testsuite/gcc.dg/vect/vect-fncall-mask.c | 31 
 gcc/tree-if-conv.cc  | 14 -
 2 files changed, 44 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.dg/vect/vect-fncall-mask.c 
b/gcc/testsuite/gcc.dg/vect/vect-fncall-mask.c
new file mode 100644
index ..554488e06308
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-fncall-mask.c
@@ -0,0 +1,31 @@
+/* { dg-do compile { target { aarch64*-*-* } } } */
+/* { dg-additional-options "-march=armv8.2-a+sve -fdump-tree-ifcvt-raw -Ofast" 
{ target { aarch64*-*-* } } } */
+
+extern int __attribute__ ((simd, const)) fn (int);
+
+const int N = 20;
+const float lim = 101.0;
+const float cst =  -1.0;
+float tot =   0.0;
+
+float b[20];
+float a[20] = { [0 ... 9] = 1.7014118e39, /* If branch. */
+   [10 ... 19] = 100.0 };/* Else branch.  */
+
+int main (void)
+{
+  #pragma omp simd
+  for (int i = 0; i < N; i += 1)
+{
+  if (a[i] > lim)
+   b[i] = cst;
+  else
+   b[i] = fn (a[i]);
+  tot += b[i];
+}
+  return (0);
+}
+
+/* { dg-final { scan-tree-dump {gimple_assign } ifcvt } } */
+/* { dg-final { scan-tree-dump {gimple_assign } ifcvt } } */
+/* { dg-final { scan-tree-dump {gimple_call <.MASK_CALL, _3, fn, _2, _34>} 
ifcvt } } */
diff --git a/gcc/tree-if-conv.cc b/gcc/tree-if-conv.cc
index 0346a1376c5f..3b04d1e8d34f 100644
--- a/gcc/tree-if-conv.cc
+++ b/gcc/tree-if-conv.cc
@@ -2907,6 +2907,7 @@ predicate_statements (loop_p loop)
 This will cause the vectorizer to match the "in branch"
 clone variants, and serves to build the mask vector
 in a natural way.  */
+ tree mask = cond;
  gcall *call = dyn_cast  (gsi_stmt (gsi));
  tree orig_fn = gimple_call_fn (call);
  int orig_nargs = gimple_call_num_args (call);
@@ -2914,7 +2915,18 @@ predicate_statements (loop_p loop)
  args.safe_push (orig_fn);
  for (int i = 0; i < orig_nargs; i++)
args.safe_push (gimple_call_arg (call, i));
- args.safe_push (cond);
+ /* If `swap', we invert the mask used for the if branch for use
+when masking the function call.  */
+ if (swap)
+   {
+ gimple_seq stmts = NULL;
+ tree true_val
+   = constant_boolean_node (true, TREE_TYPE (mask));
+ mask = gimple_build (&stmts, BIT_XOR_EXPR,
+  TREE_TYPE (mask), mask, true_val);
+ gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
+   }
+ args.safe_push (mask);
 
  /* Replace the call with a IFN_MASK_CALL that has the extra
 condition parameter. */


[gcc/devel/nothrow-detection] Updated code for inserting regions and changed the return parameters of extract_types_for call

2024-10-02 Thread Pranil Dey via Gcc-cvs
https://gcc.gnu.org/g:0d0df5179b80e808eb46c59ee1dd69138912e6fd

commit 0d0df5179b80e808eb46c59ee1dd69138912e6fd
Author: Pranil Dey 
Date:   Thu Oct 3 10:11:07 2024 +0530

Updated code for inserting regions and changed the return parameters of 
extract_types_for call

Diff:
---
 gcc/tree-eh.cc | 71 +-
 gcc/tree-eh.h  |  2 +-
 2 files changed, 41 insertions(+), 32 deletions(-)

diff --git a/gcc/tree-eh.cc b/gcc/tree-eh.cc
index 3723b6672bf2..464a033dbf20 100644
--- a/gcc/tree-eh.cc
+++ b/gcc/tree-eh.cc
@@ -2337,9 +2337,11 @@ void unlink_eh_region (eh_region region, eh_region 
prev_region) {
 region->outer = NULL;
 }
 
-void reinsert_eh_region (eh_region region, eh_landing_pad lp) {
-eh_region new_outer = lp->region->outer;
+void reinsert_eh_region(eh_region region, eh_landing_pad lp) {
+eh_region new_outer = lp->region;
 region->outer = new_outer;
+
+// Insert region as the inner of new_outer, or at the top of the tree
 if (new_outer) {
 region->next_peer = new_outer->inner;
 new_outer->inner = region;
@@ -2347,9 +2349,13 @@ void reinsert_eh_region (eh_region region, 
eh_landing_pad lp) {
 region->next_peer = cfun->eh->region_tree;
 cfun->eh->region_tree = region;
 }
+
+// Set the region index in the region array
+region->index = vec_safe_length(cfun->eh->region_array);
+vec_safe_push(cfun->eh->region_array, region);
 }
 
-// Function to update landing pad in throw_stmt_table for a given statement
+// Function to update landing pad and region in throw_stmt_table for a given 
statement
 void update_stmt_eh_region (gimple *stmt) {
   auto_vec exception_types;
   if (!stmt_throw_types (cfun, stmt, &exception_types)) {
@@ -3063,24 +3069,26 @@ stmt_could_throw_1_p (gassign *stmt)
   return false;
 }
 
-void extract_types_for_call (gcall *call_stmt, vec *ret_vector) {
+bool extract_types_for_call (gcall *call_stmt, vec *ret_vector) {
 tree callee = gimple_call_fndecl (call_stmt);
 if (callee == NULL_TREE) {
-return;
-  }
-  if (strcmp (IDENTIFIER_POINTER (DECL_NAME (callee)), "__cxa_throw") == 
0) {
-  // Extracting exception type
-  tree exception_type_info = gimple_call_arg (call_stmt, 1); 
-  if (exception_type_info && TREE_CODE (exception_type_info) == 
ADDR_EXPR) {
-  exception_type_info = TREE_OPERAND (exception_type_info, 0);
-  }
-  if (exception_type_info && TREE_CODE (exception_type_info) == 
VAR_DECL) {
-  // Converting the typeinfo to a compile-time type
-  tree exception_type = TREE_TYPE (exception_type_info);
-  if (exception_type) {
-  ret_vector->safe_push (exception_type);
-  }
-  }
+return false;
+}
+
+if (strcmp (IDENTIFIER_POINTER (DECL_NAME (callee)), "__cxa_throw") == 0) {
+// Extracting exception type
+tree exception_type_info = gimple_call_arg (call_stmt, 1); 
+if (exception_type_info && TREE_CODE (exception_type_info) == 
ADDR_EXPR) {
+exception_type_info = TREE_OPERAND (exception_type_info, 0);
+}
+if (exception_type_info && TREE_CODE (exception_type_info) == 
VAR_DECL) {
+// Converting the typeinfo to a compile-time type
+tree exception_type = TREE_TYPE (exception_type_info);
+if (exception_type) {
+   ret_vector->safe_push (exception_type);
+}
+}
+return true;
   }
 }
 
@@ -3096,8 +3104,9 @@ bool stmt_throw_types (function *fun, gimple *stmt, 
vec *ret_vector) {
 return !ret_vector->is_empty ();
 
 case GIMPLE_CALL:
-extract_types_for_call (as_a (stmt), ret_vector);
-return !ret_vector->is_empty ();
+bool type_exists = false;
+type_exists =  extract_types_for_call (as_a (stmt), 
ret_vector);
+return type_exists && !ret_vector->is_empty ();
   
 default:
 return false;
@@ -3122,7 +3131,7 @@ void extract_types_for_resx (gimple *resx_stmt, vec 
*ret_vector) {
  if (bb->aux)continue;
  bb->aux = (void*)1;

- if (e->flags & EDGE_EH){
+ if (e->flags & EDGE_EH && last_stmt!= NULL){
   if (gimple_code (last_stmt) == GIMPLE_CALL) {
 // check if its a throw
 extract_types_for_call (as_a (last_stmt), ret_vector);
@@ -3143,32 +3152,32 @@ void extract_types_for_resx (gimple *resx_stmt, 
vec *ret_vector) {
 void extract_fun_resx_types (function *fun, vec *ret_vector) {
basic_block bb;
gimple_stmt_iterator gsi;
-  hash_set *types;
+  hash_set types;
 
FOR_EACH_BB_FN (bb, fun)
{
bb->aux = (void*)1;
gsi = gsi_last_bb (bb);
gimple *stmt = gsi_stmt (gsi);
-   vec *resx_types;
+   auto_vec resx_types;
 
-   

[gcc r15-4036] libstdc++: [_Hashtable] Fix some implementation inconsistencies

2024-10-02 Thread Francois Dumont via Libstdc++-cvs
https://gcc.gnu.org/g:ccb6e08a4d5a067513b3a10bbf0d76e28e1d4a8e

commit r15-4036-gccb6e08a4d5a067513b3a10bbf0d76e28e1d4a8e
Author: François Dumont 
Date:   Thu Nov 9 19:06:52 2023 +0100

libstdc++: [_Hashtable] Fix some implementation inconsistencies

Get rid of the different usages of the mutable keyword except in
_Prime_rehash_policy where it is preserved for abi compatibility reason.

Fix comment to explain that we need the computation of bucket index noexcept
to be able to rehash the container when needed.

For Standard instantiations through std::unordered_xxx containers we already
force caching of hash code when hash functor is not noexcep so it is 
guarantied.

The static_assert purpose in _Hashtable on _M_bucket_index is thus limited
to usages of _Hashtable with exotic _Hashtable_traits.

libstdc++-v3/ChangeLog:

* include/bits/hashtable_policy.h (_NodeBuilder<>::_S_build): Remove
const qualification on _NodeGenerator instance.
(_ReuseOrAllocNode<>::operator()(_Args&&...)): Remove const 
qualification.
(_ReuseOrAllocNode<>::_M_nodes): Remove mutable.
(_Insert_base<>::_M_insert_range): Remove _NodeGetter const 
qualification.
(_Hash_code_base<>::_M_bucket_index(const _Hash_node_value<>&, 
size_t)):
Simplify noexcept declaration, we already static_assert that 
_RangeHash functor
is noexcept.
* include/bits/hashtable.h: Rework comments. Remove const qualifier 
on
_NodeGenerator& arguments.

Reviewed-by: Jonathan Wakely 

Diff:
---
 libstdc++-v3/include/bits/hashtable.h| 28 +++-
 libstdc++-v3/include/bits/hashtable_policy.h | 28 ++--
 2 files changed, 29 insertions(+), 27 deletions(-)

diff --git a/libstdc++-v3/include/bits/hashtable.h 
b/libstdc++-v3/include/bits/hashtable.h
index 92b57071f1c6..b4e8e4d3fb25 100644
--- a/libstdc++-v3/include/bits/hashtable.h
+++ b/libstdc++-v3/include/bits/hashtable.h
@@ -54,7 +54,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 using __cache_default
   =  __not_<__and_,
-  // Mandatory to have erase not throwing.
+  // Mandatory for the rehash process.
   __is_nothrow_invocable>>;
 
   // Helper to conditionally delete the default constructor.
@@ -489,7 +489,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
   template
void
-   _M_assign(_Ht&&, const _NodeGenerator&);
+   _M_assign(_Ht&&, _NodeGenerator&);
 
   void
   _M_move_assign(_Hashtable&&, true_type);
@@ -927,7 +927,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
   template
std::pair
-   _M_insert_unique(_Kt&&, _Arg&&, const _NodeGenerator&);
+   _M_insert_unique(_Kt&&, _Arg&&, _NodeGenerator&);
 
   template
static __conditional_t<
@@ -947,7 +947,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
   template
std::pair
-   _M_insert_unique_aux(_Arg&& __arg, const _NodeGenerator& __node_gen)
+   _M_insert_unique_aux(_Arg&& __arg, _NodeGenerator& __node_gen)
{
  return _M_insert_unique(
_S_forward_key(_ExtractKey{}(std::forward<_Arg>(__arg))),
@@ -956,7 +956,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
   template
std::pair
-   _M_insert(_Arg&& __arg, const _NodeGenerator& __node_gen,
+   _M_insert(_Arg&& __arg, _NodeGenerator& __node_gen,
  true_type /* __uks */)
{
  using __to_value
@@ -967,7 +967,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
   template
iterator
-   _M_insert(_Arg&& __arg, const _NodeGenerator& __node_gen,
+   _M_insert(_Arg&& __arg, _NodeGenerator& __node_gen,
  false_type __uks)
{
  using __to_value
@@ -980,7 +980,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   template
iterator
_M_insert(const_iterator, _Arg&& __arg,
- const _NodeGenerator& __node_gen, true_type __uks)
+ _NodeGenerator& __node_gen, true_type __uks)
{
  return
_M_insert(std::forward<_Arg>(__arg), __node_gen, __uks).first;
@@ -990,7 +990,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   template
iterator
_M_insert(const_iterator, _Arg&&,
- const _NodeGenerator&, false_type __uks);
+ _NodeGenerator&, false_type __uks);
 
   size_type
   _M_erase(true_type __uks, const key_type&);
@@ -1420,7 +1420,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   void
   _Hashtable<_Key, _Value, _Alloc, _ExtractKey, _Equal,
 _Hash, _RangeHash, _Unused, _RehashPolicy, _Traits>::
-  _M_assign(_Ht&& __ht, const _NodeGenerator& __node_gen)
+  _M_assign(_Ht&& __ht, _NodeGenerator& __node_gen)
   {
__buckets_ptr __buckets = nullptr;
if (!_M_buckets)
@@ -1662,8 +1662,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 ~_Hasht

[gcc(refs/users/meissner/heads/work179-vpair)] Add vector-pair.h runtime tests.

2024-10-02 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:924d2596c9ad1deb0acb78c32762608838ea7db4

commit 924d2596c9ad1deb0acb78c32762608838ea7db4
Author: Michael Meissner 
Date:   Thu Oct 3 00:31:49 2024 -0400

Add vector-pair.h runtime tests.

2024-10-03  Michael Meissner  

gcc/testsuite

* gcc.target/powerpc/vpair-3-not-p10.c: New test.
* gcc.target/powerpc/vpair-3-p10.c: Likewise.
* gcc.target/powerpc/vpair-3.h: New test include.
* gcc.target/powerpc/vpair-4-not-p10.c: New test.
* gcc.target/powerpc/vpair-4-p10.c: Likewise.
* gcc.target/powerpc/vpair-4.h: New test include.

Diff:
---
 gcc/testsuite/gcc.target/powerpc/vpair-3-not-p10.c |  15 +
 gcc/testsuite/gcc.target/powerpc/vpair-3-p10.c |  14 +
 gcc/testsuite/gcc.target/powerpc/vpair-3.h | 435 +
 gcc/testsuite/gcc.target/powerpc/vpair-4-not-p10.c |  15 +
 gcc/testsuite/gcc.target/powerpc/vpair-4-p10.c |  14 +
 gcc/testsuite/gcc.target/powerpc/vpair-4.h | 435 +
 6 files changed, 928 insertions(+)

diff --git a/gcc/testsuite/gcc.target/powerpc/vpair-3-not-p10.c 
b/gcc/testsuite/gcc.target/powerpc/vpair-3-not-p10.c
new file mode 100644
index ..d1a1029417f0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vpair-3-not-p10.c
@@ -0,0 +1,15 @@
+/* { dg-do run { target { vsx_hw } } } */
+/* { dg-options "-mvsx -O2 -ffast-math -mno-mma" } */
+
+/*
+ * This test of the double (f64) vector pair functions in vector-pair.h is run
+ * on VSX systems when the load/store vector pair instructions are not
+ * available.
+ *
+ * The -ffast-math option is used to just use the hardware sqrt, min, and max
+ * instructions without calling into the library.
+ *
+ * The -mno-mma option disables GCC from enabling the __vector_pair type.
+ */
+
+#include "vpair-3.h"
diff --git a/gcc/testsuite/gcc.target/powerpc/vpair-3-p10.c 
b/gcc/testsuite/gcc.target/powerpc/vpair-3-p10.c
new file mode 100644
index ..d78faf3fed47
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vpair-3-p10.c
@@ -0,0 +1,14 @@
+/* { dg-do run { target { power10_hw } } } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2 -ffast-math -mmma" } */
+
+/*
+ * This test of the double (f64) vector pair functions in vector-pair.h is run
+ * on VSX systems when the load/store vector pair instructions are available.
+ *
+ * The -ffast-math option is used to just use the hardware sqrt, min, and max
+ * instructions without calling into the library.
+ *
+ * The -mmma option makes sure GC enables the __vector_pair type.
+ */
+
+#include "vpair-3.h"
diff --git a/gcc/testsuite/gcc.target/powerpc/vpair-3.h 
b/gcc/testsuite/gcc.target/powerpc/vpair-3.h
new file mode 100644
index ..e61ad23dd57e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vpair-3.h
@@ -0,0 +1,435 @@
+/* Common include file to test the vector pair double functions.  This is run
+   two times, once compiled for a non-power10 system that does not have the
+   vector pair load and store instructions, and once with power10 defaults that
+   has load/store vector pair.  */
+
+#include 
+#include 
+#include 
+
+#ifdef DEBUG
+#include 
+#endif
+
+#ifndef NUM
+#define NUM16
+#endif
+
+static double  result1[NUM];
+static double  result2[NUM];
+static double  in_a[NUM];
+static double  in_b[NUM];
+static double  in_c[NUM];
+
+/* vector pair tests.  */
+
+void
+vpair_abs (double *r, double *a, double *b, double *c, size_t num)
+{
+  vector_pair_f64_t *vr = (vector_pair_f64_t *)r;
+  vector_pair_f64_t *va = (vector_pair_f64_t *)a;
+
+  size_t i;
+  size_t num2 = num / (sizeof (vector_pair_f64_t) / sizeof (double));
+
+  for (i = 0; i < num2; i++)
+vpair_f64_abs (vr + i, va + i);
+}
+
+void
+vpair_nabs (double *r, double *a, double *b, double *c, size_t num)
+{
+  vector_pair_f64_t *vr = (vector_pair_f64_t *)r;
+  vector_pair_f64_t *va = (vector_pair_f64_t *)a;
+
+  size_t i;
+  size_t num2 = num / (sizeof (vector_pair_f64_t) / sizeof (double));
+
+  for (i = 0; i < num2; i++)
+vpair_f64_nabs (vr + i, va + i);
+}
+
+void
+vpair_neg (double *r, double *a, double *b, double *c, size_t num)
+{
+  vector_pair_f64_t *vr = (vector_pair_f64_t *)r;
+  vector_pair_f64_t *va = (vector_pair_f64_t *)a;
+
+  size_t i;
+  size_t num2 = num / (sizeof (vector_pair_f64_t) / sizeof (double));
+
+  for (i = 0; i < num2; i++)
+vpair_f64_neg (vr + i, va + i);
+}
+
+void
+vpair_sqrt (double *r, double *a, double *b, double *c, size_t num)
+{
+  vector_pair_f64_t *vr = (vector_pair_f64_t *)r;
+  vector_pair_f64_t *va = (vector_pair_f64_t *)a;
+
+  size_t i;
+  size_t num2 = num / (sizeof (vector_pair_f64_t) / sizeof (double));
+
+  for (i = 0; i < num2; i++)
+vpair_f64_sqrt (vr + i, va + i);
+}
+
+void
+vpair_add (double *r, double *a, double *b, double *c, size_t num)
+{
+  vector_pair_f64_t *vr = (vector_pair_f64_t *)r;
+  vector_pair_f64_t *va = (vector_pair_f64_t *)a;
+  vector_pair_f64_t *vb = (vector

[gcc(refs/users/meissner/heads/work179-vpair)] Update ChangeLog.*

2024-10-02 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:bbe6bbf332952aea4bbbe71671a329848354a002

commit bbe6bbf332952aea4bbbe71671a329848354a002
Author: Michael Meissner 
Date:   Thu Oct 3 00:35:36 2024 -0400

Update ChangeLog.*

Diff:
---
 gcc/ChangeLog.vpair | 26 ++
 1 file changed, 26 insertions(+)

diff --git a/gcc/ChangeLog.vpair b/gcc/ChangeLog.vpair
index 285eda695f6b..58e4b1d07a7b 100644
--- a/gcc/ChangeLog.vpair
+++ b/gcc/ChangeLog.vpair
@@ -1,3 +1,29 @@
+ Branch work179-vpair, patch #311 
+
+Add vector-pair.h runtime tests.
+
+2024-10-03  Michael Meissner  
+
+gcc/testsuite
+
+   * gcc.target/powerpc/vpair-3-not-p10.c: New test.
+   * gcc.target/powerpc/vpair-3-p10.c: Likewise.
+   * gcc.target/powerpc/vpair-3.h: New test include.
+   * gcc.target/powerpc/vpair-4-not-p10.c: New test.
+   * gcc.target/powerpc/vpair-4-p10.c: Likewise.
+   * gcc.target/powerpc/vpair-4.h: New test include.
+
+ Branch work179-vpair, patch #310 
+
+Add vector-pair.h tests.
+
+2024-10-02  Michael Meissner  
+
+gcc/testsuite
+
+   * gcc.target/powerpc/vpair-1.c: New test.
+   * gcc.target/powerpc/vpair-2.c: Likewise.
+
  Branch work179-vpair, patch #309 
 
 Rewrite vector-pair.h to use macros.