[gcc r15-4739] jit: fix leak of pending_assemble_externals_set [PR117275]

2024-10-29 Thread David Malcolm via Gcc-cvs
https://gcc.gnu.org/g:7f41203f08b9948c1c636dc9d66571121c6c7793

commit r15-4739-g7f41203f08b9948c1c636dc9d66571121c6c7793
Author: David Malcolm 
Date:   Tue Oct 29 08:25:56 2024 -0400

jit: fix leak of pending_assemble_externals_set [PR117275]

My recent r15-4580-g779c0390e3b57d fix for resetting state in
varasm.cc introduced some noise to "make selftest-valgrind" and,
presumably, a memory leak in libgccjit:

==2462086== 160 (56 direct, 104 indirect) bytes in 1 blocks are definitely 
lost in loss record 248 of 352
==2462086==at 0x5270E7D: operator new(unsigned long) 
(vg_replace_malloc.c:342)
==2462086==by 0x1D1EB89: init_varasm_once() (varasm.cc:6806)
==2462086==by 0x181C845: backend_init() (toplev.cc:1826)
==2462086==by 0x181D41A: do_compile() (toplev.cc:2193)
==2462086==by 0x181D99C: toplev::main(int, char**) (toplev.cc:2371)
==2462086==by 0x378391D: main (main.cc:39)

Fixed thusly.

gcc/ChangeLog:
PR jit/117275
* varasm.cc (process_pending_assemble_externals): Reset
pending_assemble_externals_set to nullptr after deleting it.
(varasm_cc_finalize): Delete pending_assemble_externals_set.

Signed-off-by: David Malcolm 

Diff:
---
 gcc/varasm.cc | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/gcc/varasm.cc b/gcc/varasm.cc
index ce1077b6d4bd..deefbac5b7b2 100644
--- a/gcc/varasm.cc
+++ b/gcc/varasm.cc
@@ -2575,6 +2575,7 @@ process_pending_assemble_externals (void)
   pending_assemble_externals_processed = true;
   pending_libcall_symbols = NULL_RTX;
   delete pending_assemble_externals_set;
+  pending_assemble_externals_set = nullptr;
 #endif
 }
 
@@ -8893,6 +8894,7 @@ varasm_cc_finalize ()
 
 #ifdef ASM_OUTPUT_EXTERNAL
   pending_assemble_externals_processed = false;
+  delete pending_assemble_externals_set;
   pending_assemble_externals_set = nullptr;
 #endif


[gcc r15-4742] Remove dead vect_recog_mixed_size_cond_pattern

2024-10-29 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:4cfff6d413b15b38827494bb317867d86f997350

commit r15-4742-g4cfff6d413b15b38827494bb317867d86f997350
Author: Richard Biener 
Date:   Sat Oct 26 14:27:14 2024 +0200

Remove dead vect_recog_mixed_size_cond_pattern

vect_recog_mixed_size_cond_pattern only applies to COMPARISON_CLASS_P
rhs1 COND_EXPRs which no longer appear - the following removes it.
Its testcases still pass, I believe the situation is mitigated by
bool pattern handling of the compare use in COND_EXPRs.

* tree-vect-patterns.cc (type_conversion_p): Remove.
(vect_recog_mixed_size_cond_pattern): Likewise.
(vect_vect_recog_func_ptrs): Remove 
vect_recog_mixed_size_cond_pattern
entry.

Diff:
---
 gcc/tree-vect-patterns.cc | 198 --
 1 file changed, 198 deletions(-)

diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
index 46f439fb8a38..302101fa6a0e 100644
--- a/gcc/tree-vect-patterns.cc
+++ b/gcc/tree-vect-patterns.cc
@@ -313,55 +313,6 @@ vect_get_internal_def (vec_info *vinfo, tree op)
   return NULL;
 }
 
-/* Check whether NAME, an ssa-name used in STMT_VINFO,
-   is a result of a type promotion, such that:
- DEF_STMT: NAME = NOP (name0)
-   If CHECK_SIGN is TRUE, check that either both types are signed or both are
-   unsigned.  */
-
-static bool
-type_conversion_p (vec_info *vinfo, tree name, bool check_sign,
-  tree *orig_type, gimple **def_stmt, bool *promotion)
-{
-  tree type = TREE_TYPE (name);
-  tree oprnd0;
-  enum vect_def_type dt;
-
-  stmt_vec_info def_stmt_info;
-  if (!vect_is_simple_use (name, vinfo, &dt, &def_stmt_info, def_stmt))
-return false;
-
-  if (dt != vect_internal_def
-  && dt != vect_external_def && dt != vect_constant_def)
-return false;
-
-  if (!*def_stmt)
-return false;
-
-  if (!is_gimple_assign (*def_stmt))
-return false;
-
-  if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
-return false;
-
-  oprnd0 = gimple_assign_rhs1 (*def_stmt);
-
-  *orig_type = TREE_TYPE (oprnd0);
-  if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
-  || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
-return false;
-
-  if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
-*promotion = true;
-  else
-*promotion = false;
-
-  if (!vect_is_simple_use (oprnd0, vinfo, &dt))
-return false;
-
-  return true;
-}
-
 /* Holds information about an input operand after some sign changes
and type promotions have been peeled away.  */
 class vect_unpromoted_value {
@@ -5408,154 +5359,6 @@ vect_recog_mod_var_pattern (vec_info *vinfo,
   return pattern_stmt;
 }
 
-/* Function vect_recog_mixed_size_cond_pattern
-
-   Try to find the following pattern:
-
- type x_t, y_t;
- TYPE a_T, b_T, c_T;
-   loop:
- S1  a_T = x_t CMP y_t ? b_T : c_T;
-
-   where type 'TYPE' is an integral type which has different size
-   from 'type'.  b_T and c_T are either constants (and if 'TYPE' is wider
-   than 'type', the constants need to fit into an integer type
-   with the same width as 'type') or results of conversion from 'type'.
-
-   Input:
-
-   * STMT_VINFO: The stmt from which the pattern search begins.
-
-   Output:
-
-   * TYPE_OUT: The type of the output of this pattern.
-
-   * Return value: A new stmt that will be used to replace the pattern.
-   Additionally a def_stmt is added.
-
-   a_it = x_t CMP y_t ? b_it : c_it;
-   a_T = (TYPE) a_it;  */
-
-static gimple *
-vect_recog_mixed_size_cond_pattern (vec_info *vinfo,
-   stmt_vec_info stmt_vinfo, tree *type_out)
-{
-  gimple *last_stmt = stmt_vinfo->stmt;
-  tree cond_expr, then_clause, else_clause;
-  tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
-  gimple *pattern_stmt, *def_stmt;
-  tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
-  gimple *def_stmt0 = NULL, *def_stmt1 = NULL;
-  bool promotion;
-  tree comp_scalar_type;
-
-  if (!is_gimple_assign (last_stmt)
-  || gimple_assign_rhs_code (last_stmt) != COND_EXPR
-  || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
-return NULL;
-
-  cond_expr = gimple_assign_rhs1 (last_stmt);
-  then_clause = gimple_assign_rhs2 (last_stmt);
-  else_clause = gimple_assign_rhs3 (last_stmt);
-
-  if (!COMPARISON_CLASS_P (cond_expr))
-return NULL;
-
-  comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
-  comp_vectype = get_vectype_for_scalar_type (vinfo, comp_scalar_type);
-  if (comp_vectype == NULL_TREE)
-return NULL;
-
-  type = TREE_TYPE (gimple_assign_lhs (last_stmt));
-  if (types_compatible_p (type, comp_scalar_type)
-  || ((TREE_CODE (then_clause) != INTEGER_CST
-  || TREE_CODE (else_clause) != INTEGER_CST)
- && !INTEGRAL_TYPE_P (comp_scalar_type))
-  || !INTEGRAL_TYPE_P (type))
-return NULL;
-
-  if ((TREE_CODE (then_clause) != INTEGER_CST
-   &

[gcc r15-4741] Remove dead code in vectorizer pattern recog

2024-10-29 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:c738a15c50d7f0be8639579b04fbee8c817efcbf

commit r15-4741-gc738a15c50d7f0be8639579b04fbee8c817efcbf
Author: Richard Biener 
Date:   Sat Oct 26 14:23:15 2024 +0200

Remove dead code in vectorizer pattern recog

The following removes the code path in vect_recog_mask_conversion_pattern
dealing with comparisons in COND_EXPRs.  That can no longer happen.

* tree-vect-patterns.cc (vect_recog_mask_conversion_pattern):
Remove COMPARISON_CLASS_P rhs1 of COND_EXPR case and assert
it doesn't happen.

Diff:
---
 gcc/tree-vect-patterns.cc | 99 +--
 1 file changed, 2 insertions(+), 97 deletions(-)

diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
index a6d246f570c0..46f439fb8a38 100644
--- a/gcc/tree-vect-patterns.cc
+++ b/gcc/tree-vect-patterns.cc
@@ -6240,8 +6240,6 @@ vect_recog_mask_conversion_pattern (vec_info *vinfo,
   tree lhs = NULL_TREE, rhs1, rhs2, tmp, rhs1_type, rhs2_type;
   tree vectype1, vectype2;
   stmt_vec_info pattern_stmt_info;
-  tree rhs1_op0 = NULL_TREE, rhs1_op1 = NULL_TREE;
-  tree rhs1_op0_type = NULL_TREE, rhs1_op1_type = NULL_TREE;
 
   /* Check for MASK_LOAD and MASK_STORE as well as COND_OP calls requiring mask
  conversion.  */
@@ -6331,60 +6329,13 @@ vect_recog_mask_conversion_pattern (vec_info *vinfo,
 {
   vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
 
+  gcc_assert (! COMPARISON_CLASS_P (rhs1));
   if (TREE_CODE (rhs1) == SSA_NAME)
{
  rhs1_type = integer_type_for_mask (rhs1, vinfo);
  if (!rhs1_type)
return NULL;
}
-  else if (COMPARISON_CLASS_P (rhs1))
-   {
- /* Check whether we're comparing scalar booleans and (if so)
-whether a better mask type exists than the mask associated
-with boolean-sized elements.  This avoids unnecessary packs
-and unpacks if the booleans are set from comparisons of
-wider types.  E.g. in:
-
-  int x1, x2, x3, x4, y1, y1;
-  ...
-  bool b1 = (x1 == x2);
-  bool b2 = (x3 == x4);
-  ... = b1 == b2 ? y1 : y2;
-
-it is better for b1 and b2 to use the mask type associated
-with int elements rather bool (byte) elements.  */
- rhs1_op0 = TREE_OPERAND (rhs1, 0);
- rhs1_op1 = TREE_OPERAND (rhs1, 1);
- if (!rhs1_op0 || !rhs1_op1)
-   return NULL;
- rhs1_op0_type = integer_type_for_mask (rhs1_op0, vinfo);
- rhs1_op1_type = integer_type_for_mask (rhs1_op1, vinfo);
-
- if (!rhs1_op0_type)
-   rhs1_type = TREE_TYPE (rhs1_op0);
- else if (!rhs1_op1_type)
-   rhs1_type = TREE_TYPE (rhs1_op1);
- else if (TYPE_PRECISION (rhs1_op0_type)
-  != TYPE_PRECISION (rhs1_op1_type))
-   {
- int tmp0 = (int) TYPE_PRECISION (rhs1_op0_type)
-- (int) TYPE_PRECISION (TREE_TYPE (lhs));
- int tmp1 = (int) TYPE_PRECISION (rhs1_op1_type)
-- (int) TYPE_PRECISION (TREE_TYPE (lhs));
- if ((tmp0 > 0 && tmp1 > 0) || (tmp0 < 0 && tmp1 < 0))
-   {
- if (abs (tmp0) > abs (tmp1))
-   rhs1_type = rhs1_op1_type;
- else
-   rhs1_type = rhs1_op0_type;
-   }
- else
-   rhs1_type = build_nonstandard_integer_type
- (TYPE_PRECISION (TREE_TYPE (lhs)), 1);
-   }
- else
-   rhs1_type = rhs1_op0_type;
-   }
   else
return NULL;
 
@@ -6400,55 +6351,9 @@ vect_recog_mask_conversion_pattern (vec_info *vinfo,
 its vector type) and behave as though the comparison was an SSA
 name from the outset.  */
   if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
-   TYPE_VECTOR_SUBPARTS (vectype2))
- && !rhs1_op0_type
- && !rhs1_op1_type)
+   TYPE_VECTOR_SUBPARTS (vectype2)))
return NULL;
 
-  /* If rhs1 is invariant and we can promote it leave the COND_EXPR
- in place, we can handle it in vectorizable_condition.  This avoids
-unnecessary promotion stmts and increased vectorization factor.  */
-  if (COMPARISON_CLASS_P (rhs1)
- && INTEGRAL_TYPE_P (rhs1_type)
- && known_le (TYPE_VECTOR_SUBPARTS (vectype1),
-  TYPE_VECTOR_SUBPARTS (vectype2)))
-   {
- enum vect_def_type dt;
- if (vect_is_simple_use (TREE_OPERAND (rhs1, 0), vinfo, &dt)
- && dt == vect_external_def
- && vect_is_simple_use (TREE_OPERAND (rhs1, 1), vinfo, &dt)
- && (dt == vect_external_def
- || dt == vect_constant_def))
-   {
- tree wide_scalar_type = build_nonstandard_integer_type
-   (vector_element_bits (vecty

[gcc r12-10790] rs6000: Fix PTImode handling in power8 swap optimization pass [PR116415]

2024-10-29 Thread Peter Bergner via Gcc-cvs
https://gcc.gnu.org/g:eeb72f26ea7e70baadf2e3b9e89e8f7055fec0a9

commit r12-10790-geeb72f26ea7e70baadf2e3b9e89e8f7055fec0a9
Author: Peter Bergner 
Date:   Fri Aug 23 11:45:40 2024 -0500

rs6000: Fix PTImode handling in power8 swap optimization pass [PR116415]

Our power8 swap optimization pass has some special handling for optimizing
swaps of TImode variables.  The test case reported in bugzilla uses a call
to  __atomic_compare_exchange, which introduces a variable of PTImode and
that does not get the same treatment as TImode leading to wrong code
generation.  The simple fix is to treat PTImode identically to TImode.

2024-08-23  Peter Bergner  

gcc/
PR target/116415
* config/rs6000/rs6000.h (TI_OR_PTI_MODE): New define.
* config/rs6000/rs6000-p8swap.cc (rs6000_analyze_swaps): Use it to
handle PTImode identically to TImode.

gcc/testsuite/
PR target/116415
* gcc.target/powerpc/pr116415.c: New test.

(cherry picked from commit 6e68c3df1540c5bafbb47343698bf4e270333fdb)

Diff:
---
 gcc/config/rs6000/rs6000-p8swap.cc  |  8 +++---
 gcc/config/rs6000/rs6000.h  |  2 ++
 gcc/testsuite/gcc.target/powerpc/pr116415.c | 42 +
 3 files changed, 48 insertions(+), 4 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-p8swap.cc 
b/gcc/config/rs6000/rs6000-p8swap.cc
index 62f5ca5bff4a..e97a32b4c236 100644
--- a/gcc/config/rs6000/rs6000-p8swap.cc
+++ b/gcc/config/rs6000/rs6000-p8swap.cc
@@ -2467,10 +2467,10 @@ rs6000_analyze_swaps (function *fun)
mode = V4SImode;
}
 
- if (ALTIVEC_OR_VSX_VECTOR_MODE (mode) || mode == TImode)
+ if (ALTIVEC_OR_VSX_VECTOR_MODE (mode) || TI_OR_PTI_MODE (mode))
{
  insn_entry[uid].is_relevant = 1;
- if (mode == TImode || mode == V1TImode
+ if (TI_OR_PTI_MODE (mode) || mode == V1TImode
  || FLOAT128_VECTOR_P (mode))
insn_entry[uid].is_128_int = 1;
  if (DF_REF_INSN_INFO (mention))
@@ -2495,10 +2495,10 @@ rs6000_analyze_swaps (function *fun)
  && ALTIVEC_OR_VSX_VECTOR_MODE (GET_MODE (SET_DEST (insn
mode = GET_MODE (SET_DEST (insn));
 
- if (ALTIVEC_OR_VSX_VECTOR_MODE (mode) || mode == TImode)
+ if (ALTIVEC_OR_VSX_VECTOR_MODE (mode) || TI_OR_PTI_MODE (mode))
{
  insn_entry[uid].is_relevant = 1;
- if (mode == TImode || mode == V1TImode
+ if (TI_OR_PTI_MODE (mode) || mode == V1TImode
  || FLOAT128_VECTOR_P (mode))
insn_entry[uid].is_128_int = 1;
  if (DF_REF_INSN_INFO (mention))
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index 5588a4bae027..48ba4df1da5c 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -1051,6 +1051,8 @@ enum data_align { align_abi, align_opt, align_both };
   (ALTIVEC_VECTOR_MODE (MODE) || VSX_VECTOR_MODE (MODE)
\
|| (MODE) == V2DImode || (MODE) == V1TImode)
 
+#define TI_OR_PTI_MODE(mode) ((mode) == TImode || (mode) == PTImode)
+
 /* Post-reload, we can't use any new AltiVec registers, as we already
emitted the vrsave mask.  */
 
diff --git a/gcc/testsuite/gcc.target/powerpc/pr116415.c 
b/gcc/testsuite/gcc.target/powerpc/pr116415.c
new file mode 100644
index ..08cc282e2c27
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr116415.c
@@ -0,0 +1,42 @@
+/* { dg-do run } */
+/* { dg-require-effective-target p8vector_hw } */
+/* { dg-require-effective-target int128 } */
+/* { dg-options "-O2 -mdejagnu-cpu=power8" } */
+
+/* PR 116415: Verify our Power8 swap optimization pass doesn't incorrectly swap
+   PTImode values.  They should be handled identically to TImode values.  */
+
+#include 
+#include 
+#include 
+
+typedef union {
+  struct {
+uint64_t a;
+uint64_t b;
+  } t;
+  __uint128_t data;
+} Value;
+Value value, next;
+
+void
+bug (Value *val, Value *nxt)
+{
+  for (;;) {
+nxt->t.a = val->t.a + 1;
+nxt->t.b = val->t.b + 2;
+if (__atomic_compare_exchange (&val->data, &val->data, &nxt->data,
+  0, __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE))
+  break;
+  }
+}
+
+int
+main (void)
+{
+  bug (&value, &next);
+  printf ("%lu %lu\n", value.t.a, value.t.b);
+  if (value.t.a != 1 || value.t.b != 2)
+abort ();
+  return 0;
+}


[gcc r15-4738] tree-optimization/117343 - decide_masked_load_lanes and stale graph

2024-10-29 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:cc79e9866ba33dea0256078f4557d92d80d9

commit r15-4738-gcc79e9866ba33dea0256078f4557d92d80d9
Author: Richard Biener 
Date:   Tue Oct 29 11:26:13 2024 +0100

tree-optimization/117343 - decide_masked_load_lanes and stale graph

It turns out decide_masked_load_lanes accesses a stale SLP graph
so the following re-builds it instead.

PR tree-optimization/117343
* tree-vect-slp.cc (vect_optimize_slp_pass::build_vertices):
Support re-building the SLP graph.
(vect_optimize_slp_pass::run): Re-build the SLP graph before
decide_masked_load_lanes.

Diff:
---
 gcc/tree-vect-slp.cc | 4 
 1 file changed, 4 insertions(+)

diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 2e98a943e061..a7f064bb0edf 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -5632,6 +5632,8 @@ vect_optimize_slp_pass::build_vertices ()
   hash_set visited;
   unsigned i;
   slp_instance instance;
+  m_vertices.truncate (0);
+  m_leafs.truncate (0);
   FOR_EACH_VEC_ELT (m_vinfo->slp_instances, i, instance)
 build_vertices (visited, SLP_INSTANCE_TREE (instance));
 }
@@ -7244,6 +7246,8 @@ vect_optimize_slp_pass::run ()
 }
   else
 remove_redundant_permutations ();
+  free_graph (m_slpg);
+  build_graph ();
   decide_masked_load_lanes ();
   free_graph (m_slpg);
 }


[gcc r15-4740] libstdc++: Fix complexity of drop_view::begin() const [PR112641]

2024-10-29 Thread Patrick Palka via Libstdc++-cvs
https://gcc.gnu.org/g:7f622ee83fbbcf4a4ca70e020db8a0ce4b556b61

commit r15-4740-g7f622ee83fbbcf4a4ca70e020db8a0ce4b556b61
Author: Patrick Palka 
Date:   Tue Oct 29 09:26:19 2024 -0400

libstdc++: Fix complexity of drop_view::begin() const [PR112641]

Views are required to have a amortized O(1) begin(), but our drop_view's
const begin overload is O(n) for non-common ranges with a non-sized
sentinel.  This patch reimplements it so that it's O(1) always.  See
also LWG 4009.

PR libstdc++/112641

libstdc++-v3/ChangeLog:

* include/std/ranges (drop_view::begin): Reimplement const
overload so that it's O(1) always.
* testsuite/std/ranges/adaptors/drop.cc (test10): New test.

Reviewed-by: Jonathan Wakely 

Diff:
---
 libstdc++-v3/include/std/ranges|  4 ++--
 libstdc++-v3/testsuite/std/ranges/adaptors/drop.cc | 12 
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/libstdc++-v3/include/std/ranges b/libstdc++-v3/include/std/ranges
index cebe10683f91..743429dbceae 100644
--- a/libstdc++-v3/include/std/ranges
+++ b/libstdc++-v3/include/std/ranges
@@ -2664,8 +2664,8 @@ namespace views::__adaptor
   begin() const
requires random_access_range && sized_range
   {
-   return ranges::next(ranges::begin(_M_base), _M_count,
-   ranges::end(_M_base));
+   return ranges::begin(_M_base) + ranges::min(ranges::distance(_M_base),
+   _M_count);
   }
 
   constexpr auto
diff --git a/libstdc++-v3/testsuite/std/ranges/adaptors/drop.cc 
b/libstdc++-v3/testsuite/std/ranges/adaptors/drop.cc
index c9987c61e3c1..0bd5bebb785d 100644
--- a/libstdc++-v3/testsuite/std/ranges/adaptors/drop.cc
+++ b/libstdc++-v3/testsuite/std/ranges/adaptors/drop.cc
@@ -274,6 +274,17 @@ test09()
   static_assert(!requires { views::all | drop; });
 }
 
+constexpr bool
+test10()
+{
+  // PR libstdc++/112641 - drop_view::begin const may have O(n) complexity
+  const auto s = ranges::subrange(views::iota(size_t(1)), size_t(-1));
+  const auto r = ranges::drop_view(s, s.size() - 1);
+  const auto b = r.begin(); // time out
+  VERIFY( *b == size_t(-1) );
+  return true;
+}
+
 int
 main()
 {
@@ -286,4 +297,5 @@ main()
   test07();
   test08();
   test09();
+  static_assert(test10());
 }


[gcc r15-4729] c: Add __builtin_stdc_rotate_{left, right} builtins [PR117030]

2024-10-29 Thread Jakub Jelinek via Gcc-cvs
https://gcc.gnu.org/g:972f653cad2aedcfa901614566506c1c2e668766

commit r15-4729-g972f653cad2aedcfa901614566506c1c2e668766
Author: Jakub Jelinek 
Date:   Tue Oct 29 09:06:25 2024 +0100

c: Add __builtin_stdc_rotate_{left,right} builtins [PR117030]

I believe the new C2Y  type-generic functions
stdc_rotate_{left,right} have the same problems the other stdc_*
type-generic functions had.  If we want to support arbitrary
unsigned _BitInt(N), don't want to use statement expressions
(so that one can actually use them in static variable initializers),
don't want to evaluate the arguments multiple times and don't want
to expand the arguments multiple times during preprocessing to avoid the
old tgmath preprocessing bloat, we need a built-in for those.

The following patch adds those.  And as we need to support rotations by 0
and tree-ssa-forwprop.cc is only able to pattern recognize with BIT_AND_EXPR
for that case (i.e. for power of two widths), the patch just constructs
LROTATE_EXPR/RROTATE_EXPR right away.  Negative second arguments are
considered UB, while positive ones are modulo precision.

2024-10-29  Jakub Jelinek  

PR c/117030
gcc/
* doc/extend.texi (__builtin_stdc_rotate_left,
__builtin_stdc_rotate_right): Document.
gcc/c-family/
* c-common.cc (c_common_reswords): Add __builtin_stdc_rotate_left
and __builtin_stdc_rotate_right.
* c-ubsan.cc (ubsan_instrument_shift): For {L,R}ROTATE_EXPR
just check if op1 is negative.
gcc/c/
* c-parser.cc: Include asan.h and c-family/c-ubsan.h.
(c_parser_postfix_expression): Handle __builtin_stdc_rotate_left
and __builtin_stdc_rotate_right.
* c-fold.cc (c_fully_fold_internal): Handle LROTATE_EXPR and
RROTATE_EXPR.
gcc/testsuite/
* gcc.dg/builtin-stdc-rotate-1.c: New test.
* gcc.dg/builtin-stdc-rotate-2.c: New test.
* gcc.dg/ubsan/builtin-stdc-rotate-1.c: New test.
* gcc.dg/ubsan/builtin-stdc-rotate-2.c: New test.

Diff:
---
 gcc/c-family/c-common.cc   |   2 +
 gcc/c-family/c-ubsan.cc|  15 +-
 gcc/c/c-fold.cc|   7 +-
 gcc/c/c-parser.cc  |  85 +++-
 gcc/doc/extend.texi|  26 +++
 gcc/testsuite/gcc.dg/builtin-stdc-rotate-1.c   | 235 +
 gcc/testsuite/gcc.dg/builtin-stdc-rotate-2.c   |  70 ++
 gcc/testsuite/gcc.dg/ubsan/builtin-stdc-rotate-1.c |  14 ++
 gcc/testsuite/gcc.dg/ubsan/builtin-stdc-rotate-2.c |  14 ++
 9 files changed, 464 insertions(+), 4 deletions(-)

diff --git a/gcc/c-family/c-common.cc b/gcc/c-family/c-common.cc
index a0e6a3118d73..88827ac2bdba 100644
--- a/gcc/c-family/c-common.cc
+++ b/gcc/c-family/c-common.cc
@@ -448,6 +448,8 @@ const struct c_common_resword c_common_reswords[] =
   { "__builtin_stdc_has_single_bit", RID_BUILTIN_STDC, D_CONLY },
   { "__builtin_stdc_leading_ones", RID_BUILTIN_STDC, D_CONLY },
   { "__builtin_stdc_leading_zeros", RID_BUILTIN_STDC, D_CONLY },
+  { "__builtin_stdc_rotate_left", RID_BUILTIN_STDC, D_CONLY },
+  { "__builtin_stdc_rotate_right", RID_BUILTIN_STDC, D_CONLY },
   { "__builtin_stdc_trailing_ones", RID_BUILTIN_STDC, D_CONLY },
   { "__builtin_stdc_trailing_zeros", RID_BUILTIN_STDC, D_CONLY },
   { "__builtin_tgmath", RID_BUILTIN_TGMATH, D_CONLY },
diff --git a/gcc/c-family/c-ubsan.cc b/gcc/c-family/c-ubsan.cc
index a49158b7ad6d..ab67635d1644 100644
--- a/gcc/c-family/c-ubsan.cc
+++ b/gcc/c-family/c-ubsan.cc
@@ -176,8 +176,19 @@ ubsan_instrument_shift (location_t loc, enum tree_code 
code,
   op0 = unshare_expr (op0);
   op1 = unshare_expr (op1);
 
-  t = fold_convert_loc (loc, op1_utype, op1);
-  t = fold_build2 (GT_EXPR, boolean_type_node, t, uprecm1);
+  if (code == LROTATE_EXPR || code == RROTATE_EXPR)
+{
+  /* For rotates just check for negative op1.  */
+  if (TYPE_UNSIGNED (type1))
+   return NULL_TREE;
+  t = fold_build2 (LT_EXPR, boolean_type_node, op1,
+  build_int_cst (type1, 0));
+}
+  else
+{
+  t = fold_convert_loc (loc, op1_utype, op1);
+  t = fold_build2 (GT_EXPR, boolean_type_node, t, uprecm1);
+}
 
   /* If this is not a signed operation, don't perform overflow checks.
  Also punt on bit-fields.  */
diff --git a/gcc/c/c-fold.cc b/gcc/c/c-fold.cc
index 06085f5f58f5..deb6896589f5 100644
--- a/gcc/c/c-fold.cc
+++ b/gcc/c/c-fold.cc
@@ -328,6 +328,8 @@ c_fully_fold_internal (tree expr, bool in_init, bool 
*maybe_const_operands,
 case EXACT_DIV_EXPR:
 case LSHIFT_EXPR:
 case RSHIFT_EXPR:
+case LROTATE_EXPR:
+case RROTATE_EXPR:
 case BIT_IOR_EXPR:
 case BIT_XOR_EXPR:
 case BIT_AND_EXPR:
@@ -389,7 +391,10 @@ c_fully_fold_internal (tree expr, bool in_init, bo

[gcc r15-4743] Internal-fn: Introduce new IFN MASK_LEN_STRIDED_LOAD{STORE}

2024-10-29 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:1fdee26ee985385c8043ea0ca6ff05ffdbc34f9c

commit r15-4743-g1fdee26ee985385c8043ea0ca6ff05ffdbc34f9c
Author: Pan Li 
Date:   Wed Oct 23 16:24:19 2024 +0800

Internal-fn: Introduce new IFN MASK_LEN_STRIDED_LOAD{STORE}

This patch would like to introduce new IFN for strided load and store.

LOAD:  v = MASK_LEN_STRIDED_LOAD (ptr, stride, mask, len, bias)
STORE: MASK_LEN_STRIED_STORE (ptr, stride, v, mask, len, bias)

The IFN target below code example similar as below

void foo (int * a, int * b, int stride, int n)
{
  for (int i = 0; i < n; i++)
a[i * stride] = b[i * stride];
}

The below test suites are passed for this patch.
* The rv64gcv fully regression test.
* The x86 bootstrap test.
* The x86 fully regression test.

gcc/ChangeLog:

* internal-fn.cc (strided_load_direct): Add new define direct
for strided load.
(strided_store_direct): Ditto but for store.
(expand_strided_load_optab_fn): Add new func to expand the IFN
MASK_LEN_STRIDED_LOAD in middle-end.
(expand_strided_store_optab_fn): Ditto but for store.
(direct_strided_load_optab_supported_p): Add define for stride
load optab supported.
(direct_strided_store_optab_supported_p): Ditto but for store.
(internal_fn_len_index): Add strided load/store len index.
(internal_fn_mask_index): Ditto but for mask.
(internal_fn_stored_value_index): Add strided store value index.
* internal-fn.def (MASK_LEN_STRIDED_LOAD): Add new IFN for
strided load.
(MASK_LEN_STRIDED_STORE): Ditto but for store.
* optabs.def (mask_len_strided_load_optab): Add strided load optab.
(mask_len_strided_store_optab): Add strided store optab.

Signed-off-by: Pan Li 
Co-Authored-By: Juzhe-Zhong 

Diff:
---
 gcc/internal-fn.cc  | 71 +
 gcc/internal-fn.def |  6 +
 gcc/optabs.def  |  2 ++
 3 files changed, 79 insertions(+)

diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
index e30285203c9b..1b3fe7be0479 100644
--- a/gcc/internal-fn.cc
+++ b/gcc/internal-fn.cc
@@ -159,6 +159,7 @@ init_internal_fns ()
 #define load_lanes_direct { -1, -1, false }
 #define mask_load_lanes_direct { -1, -1, false }
 #define gather_load_direct { 3, 1, false }
+#define strided_load_direct { -1, -1, false }
 #define len_load_direct { -1, -1, false }
 #define mask_len_load_direct { -1, 4, false }
 #define mask_store_direct { 3, 2, false }
@@ -168,6 +169,7 @@ init_internal_fns ()
 #define vec_cond_mask_len_direct { 1, 1, false }
 #define vec_cond_direct { 2, 0, false }
 #define scatter_store_direct { 3, 1, false }
+#define strided_store_direct { 1, 1, false }
 #define len_store_direct { 3, 3, false }
 #define mask_len_store_direct { 4, 5, false }
 #define vec_set_direct { 3, 3, false }
@@ -3712,6 +3714,64 @@ expand_gather_load_optab_fn (internal_fn, gcall *stmt, 
direct_optab optab)
   assign_call_lhs (lhs, lhs_rtx, &ops[0]);
 }
 
+/* Expand MASK_LEN_STRIDED_LOAD call CALL by optab OPTAB.  */
+
+static void
+expand_strided_load_optab_fn (ATTRIBUTE_UNUSED internal_fn, gcall *stmt,
+ direct_optab optab)
+{
+  tree lhs = gimple_call_lhs (stmt);
+  tree base = gimple_call_arg (stmt, 0);
+  tree stride = gimple_call_arg (stmt, 1);
+
+  rtx lhs_rtx = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
+  rtx base_rtx = expand_normal (base);
+  rtx stride_rtx = expand_normal (stride);
+
+  unsigned i = 0;
+  class expand_operand ops[6];
+  machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
+
+  create_output_operand (&ops[i++], lhs_rtx, mode);
+  create_address_operand (&ops[i++], base_rtx);
+  create_address_operand (&ops[i++], stride_rtx);
+
+  i = add_mask_and_len_args (ops, i, stmt);
+  expand_insn (direct_optab_handler (optab, mode), i, ops);
+
+  if (!rtx_equal_p (lhs_rtx, ops[0].value))
+emit_move_insn (lhs_rtx, ops[0].value);
+}
+
+/* Expand MASK_LEN_STRIDED_STORE call CALL by optab OPTAB.  */
+
+static void
+expand_strided_store_optab_fn (ATTRIBUTE_UNUSED internal_fn, gcall *stmt,
+  direct_optab optab)
+{
+  internal_fn fn = gimple_call_internal_fn (stmt);
+  int rhs_index = internal_fn_stored_value_index (fn);
+
+  tree base = gimple_call_arg (stmt, 0);
+  tree stride = gimple_call_arg (stmt, 1);
+  tree rhs = gimple_call_arg (stmt, rhs_index);
+
+  rtx base_rtx = expand_normal (base);
+  rtx stride_rtx = expand_normal (stride);
+  rtx rhs_rtx = expand_normal (rhs);
+
+  unsigned i = 0;
+  class expand_operand ops[6];
+  machine_mode mode = TYPE_MODE (TREE_TYPE (rhs));
+
+  create_address_operand (&ops[i++], base_rtx);
+  create_address_operand (&ops[i++], stride_rtx);
+  create_input_operand (&ops[i++], rhs_rtx, mode);
+
+  i = add_mask_and_len_args (ops, i, stmt);
+  ex

[gcc r15-4744] Vect: Introduce MASK_LEN_STRIDED_LOAD{STORE} to loop vectorizer

2024-10-29 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:a0292ddb21475e8fd238e201d3b64f0ab02ace04

commit r15-4744-ga0292ddb21475e8fd238e201d3b64f0ab02ace04
Author: Pan Li 
Date:   Wed Oct 23 16:36:28 2024 +0800

Vect: Introduce MASK_LEN_STRIDED_LOAD{STORE} to loop vectorizer

This patch would like to allow generation of MASK_LEN_STRIDED_LOAD{STORE} IR
for invariant stride memory access.  For example as below

void foo (int * __restrict a, int * __restrict b, int stride, int n)
{
for (int i = 0; i < n; i++)
  a[i*stride] = b[i*stride] + 100;
}

Before this patch:
  66   │   _73 = .SELECT_VL (ivtmp_71, POLY_INT_CST [4, 4]);
  67   │   _52 = _54 * _73;
  68   │   vect__5.16_61 = .MASK_LEN_GATHER_LOAD (vectp_b.14_59, _58, 4, { 
0, ... }, { -1, ... }, _73, 0);
  69   │   vect__7.17_63 = vect__5.16_61 + { 100, ... };
  70   │   .MASK_LEN_SCATTER_STORE (vectp_a.18_67, _58, 4, vect__7.17_63, { 
-1, ... }, _73, 0);
  71   │   vectp_b.14_60 = vectp_b.14_59 + _52;
  72   │   vectp_a.18_68 = vectp_a.18_67 + _52;
  73   │   ivtmp_72 = ivtmp_71 - _73;

After this patch:
  60   │   _70 = .SELECT_VL (ivtmp_68, POLY_INT_CST [4, 4]);
  61   │   _52 = _54 * _70;
  62   │   vect__5.16_58 = .MASK_LEN_STRIDED_LOAD (vectp_b.14_56, _55, { 0, 
... }, { -1, ... }, _70, 0);
  63   │   vect__7.17_60 = vect__5.16_58 + { 100, ... };
  64   │   .MASK_LEN_STRIDED_STORE (vectp_a.18_64, _55, vect__7.17_60, { 
-1, ... }, _70, 0);
  65   │   vectp_b.14_57 = vectp_b.14_56 + _52;
  66   │   vectp_a.18_65 = vectp_a.18_64 + _52;
  67   │   ivtmp_69 = ivtmp_68 - _70;

The below test suites are passed for this patch:
* The x86 bootstrap test.
* The x86 fully regression test.
* The riscv fully regression test.

gcc/ChangeLog:

* tree-vect-stmts.cc (vect_get_strided_load_store_ops): Handle
MASK_LEN_STRIDED_LOAD{STORE} after supported check.
(vectorizable_store): Generate MASK_LEN_STRIDED_LOAD when the offset
of gater is not vector type.
(vectorizable_load): Ditto but for store.

Signed-off-by: Pan Li 
Co-Authored-By: Juzhe-Zhong 

Diff:
---
 gcc/tree-vect-stmts.cc | 45 -
 1 file changed, 36 insertions(+), 9 deletions(-)

diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 4a824d169550..9a2c2ea753e6 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -2951,6 +2951,15 @@ vect_get_strided_load_store_ops (stmt_vec_info stmt_info,
   *dataref_bump = cse_and_gimplify_to_preheader (loop_vinfo, bump);
 }
 
+  internal_fn ifn
+= DR_IS_READ (dr) ? IFN_MASK_LEN_STRIDED_LOAD : IFN_MASK_LEN_STRIDED_STORE;
+  if (direct_internal_fn_supported_p (ifn, vectype, OPTIMIZE_FOR_SPEED))
+{
+  *vec_offset = cse_and_gimplify_to_preheader (loop_vinfo,
+  unshare_expr (DR_STEP (dr)));
+  return;
+}
+
   /* The offset given in GS_INFO can have pointer type, so use the element
  type of the vector instead.  */
   tree offset_type = TREE_TYPE (gs_info->offset_vectype);
@@ -9195,10 +9204,20 @@ vectorizable_store (vec_info *vinfo,
 
  gcall *call;
  if (final_len && final_mask)
-   call = gimple_build_call_internal
-(IFN_MASK_LEN_SCATTER_STORE, 7, dataref_ptr,
- vec_offset, scale, vec_oprnd, final_mask,
- final_len, bias);
+   {
+ if (VECTOR_TYPE_P (TREE_TYPE (vec_offset)))
+   call = gimple_build_call_internal (
+ IFN_MASK_LEN_SCATTER_STORE, 7, dataref_ptr,
+ vec_offset, scale, vec_oprnd, final_mask, final_len,
+ bias);
+ else
+   /* Non-vector offset indicates that prefer to take
+  MASK_LEN_STRIDED_STORE instead of the
+  IFN_MASK_SCATTER_STORE with direct stride arg.  */
+   call = gimple_build_call_internal (
+ IFN_MASK_LEN_STRIDED_STORE, 6, dataref_ptr,
+ vec_offset, vec_oprnd, final_mask, final_len, bias);
+   }
  else if (final_mask)
call = gimple_build_call_internal
 (IFN_MASK_SCATTER_STORE, 5, dataref_ptr,
@@ -11195,11 +11214,19 @@ vectorizable_load (vec_info *vinfo,
 
  gcall *call;
  if (final_len && final_mask)
-   call
- = gimple_build_call_internal (IFN_MASK_LEN_GATHER_LOAD, 7,
-   dataref_ptr, vec_offset,
-   scale, zero, final_mask,
-

[gcc r15-4745] RISC-V: Adjust the gather-scatter testcases due to middle-end change

2024-10-29 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:372060d78715d9a4ab756b1b95796bd04c0be2bf

commit r15-4745-g372060d78715d9a4ab756b1b95796bd04c0be2bf
Author: Pan Li 
Date:   Wed Oct 23 16:43:37 2024 +0800

RISC-V: Adjust the gather-scatter testcases due to middle-end change

After we have MASK_LEN_STRIDED_LOAD{STORE} in the middle-end, the
strided case need to be adjust for IR check.

The below test suites are passed for this patch:
* The riscv fully regression test.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/gather-scatter/strided_load-1.c:
Adjust IR for MASK_LEN_LOAD check.
* gcc.target/riscv/rvv/autovec/gather-scatter/strided_load-2.c:
Ditto.
* gcc.target/riscv/rvv/autovec/gather-scatter/strided_store-1.c:
Ditto but for store.
* gcc.target/riscv/rvv/autovec/gather-scatter/strided_store-2.c:
Ditto.

Signed-off-by: Pan Li 
Co-Authored-By: Juzhe-Zhong 

Diff:
---
 .../gcc.target/riscv/rvv/autovec/gather-scatter/strided_load-1.c| 2 +-
 .../gcc.target/riscv/rvv/autovec/gather-scatter/strided_load-2.c| 2 +-
 .../gcc.target/riscv/rvv/autovec/gather-scatter/strided_store-1.c   | 2 +-
 .../gcc.target/riscv/rvv/autovec/gather-scatter/strided_store-2.c   | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/gather-scatter/strided_load-1.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/gather-scatter/strided_load-1.c
index 53263d16ae24..79b39f102bf2 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/gather-scatter/strided_load-1.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/gather-scatter/strided_load-1.c
@@ -40,6 +40,6 @@
 
 TEST_ALL (TEST_LOOP)
 
-/* { dg-final { scan-tree-dump-times " \.MASK_LEN_GATHER_LOAD" 66 "optimized" 
} } */
+/* { dg-final { scan-tree-dump-times " \.MASK_LEN_STRIDED_LOAD " 66 
"optimized" } } */
 /* { dg-final { scan-tree-dump-not " \.GATHER_LOAD" "optimized" } } */
 /* { dg-final { scan-tree-dump-not " \.MASK_GATHER_LOAD" "optimized" } } */
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/gather-scatter/strided_load-2.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/gather-scatter/strided_load-2.c
index 6fef474cf8e2..8a452e547a39 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/gather-scatter/strided_load-2.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/gather-scatter/strided_load-2.c
@@ -40,6 +40,6 @@
 
 TEST_ALL (TEST_LOOP)
 
-/* { dg-final { scan-tree-dump-times " \.MASK_LEN_GATHER_LOAD" 33 "optimized" 
} } */
+/* { dg-final { scan-tree-dump-times " \.MASK_LEN_STRIDED_LOAD " 33 
"optimized" } } */
 /* { dg-final { scan-tree-dump-not " \.GATHER_LOAD" "optimized" } } */
 /* { dg-final { scan-tree-dump-not " \.MASK_GATHER_LOAD" "optimized" } } */
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/gather-scatter/strided_store-1.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/gather-scatter/strided_store-1.c
index ad23ed421290..ec8c3a5c63a4 100644
--- 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/gather-scatter/strided_store-1.c
+++ 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/gather-scatter/strided_store-1.c
@@ -40,6 +40,6 @@
 
 TEST_ALL (TEST_LOOP)
 
-/* { dg-final { scan-tree-dump-times " \.MASK_LEN_SCATTER_STORE" 66 
"optimized" } } */
+/* { dg-final { scan-tree-dump-times " \.MASK_LEN_STRIDED_STORE" 66 
"optimized" } } */
 /* { dg-final { scan-tree-dump-not " \.SCATTER_STORE" "optimized" } } */
 /* { dg-final { scan-tree-dump-not " \.MASK_SCATTER_STORE" "optimized" } } */
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/gather-scatter/strided_store-2.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/gather-scatter/strided_store-2.c
index 65f3f00b8c26..b433b5b52104 100644
--- 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/gather-scatter/strided_store-2.c
+++ 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/gather-scatter/strided_store-2.c
@@ -40,6 +40,6 @@
 
 TEST_ALL (TEST_LOOP)
 
-/* { dg-final { scan-tree-dump-times " \.MASK_LEN_SCATTER_STORE" 44 
"optimized" } } */
+/* { dg-final { scan-tree-dump-times " \.MASK_LEN_STRIDED_STORE " 44 
"optimized" } } */
 /* { dg-final { scan-tree-dump-not " \.SCATTER_STORE" "optimized" } } */
 /* { dg-final { scan-tree-dump-not " \.MASK_SCATTER_STORE" "optimized" } } */


[gcc r15-4754] aarch64: Remove unnecessary casts to rtx_code [PR117349]

2024-10-29 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:9dd9a88b75334bc079b8ab5fb2dbb5d56765bd60

commit r15-4754-g9dd9a88b75334bc079b8ab5fb2dbb5d56765bd60
Author: Andrew Pinski 
Date:   Tue Oct 29 13:01:30 2024 -0700

aarch64: Remove unnecessary casts to rtx_code [PR117349]

In aarch64_gen_ccmp_first/aarch64_gen_ccmp_next, the casts
were no longer needed after r14-3412-gbf64392d66f291 which
changed the type of the arguments to rtx_code.

In aarch64_rtx_costs, they were no longer needed since
r12-4828-g1d5c43db79b7ea which changed the type of code
to rtx_code.

Pushed as obvious after a build/test for aarch64-linux-gnu.

gcc/ChangeLog:

PR target/117349
* config/aarch64/aarch64.cc (aarch64_rtx_costs): Remove
unnecessary casts to rtx_code.
(aarch64_gen_ccmp_first): Likewise.
(aarch64_gen_ccmp_next): Likewise.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/config/aarch64/aarch64.cc | 51 ++-
 1 file changed, 21 insertions(+), 30 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index a6cc00e74abd..b2dd23ccb26f 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -14286,7 +14286,7 @@ aarch64_rtx_costs (rtx x, machine_mode mode, int outer 
ATTRIBUTE_UNUSED,
   /* BFM.  */
  if (speed)
*cost += extra_cost->alu.bfi;
- *cost += rtx_cost (op1, VOIDmode, (enum rtx_code) code, 1, speed);
+ *cost += rtx_cost (op1, VOIDmode, code, 1, speed);
 }
 
  return true;
@@ -14666,8 +14666,7 @@ cost_minus:
  *cost += extra_cost->alu.extend_arith;
 
op1 = aarch64_strip_extend (op1, true);
-   *cost += rtx_cost (op1, VOIDmode,
-  (enum rtx_code) GET_CODE (op1), 0, speed);
+   *cost += rtx_cost (op1, VOIDmode, GET_CODE (op1), 0, speed);
return true;
  }
 
@@ -14678,9 +14677,7 @@ cost_minus:
 || aarch64_shift_p (GET_CODE (new_op1)))
&& code != COMPARE)
  {
-   *cost += aarch64_rtx_mult_cost (new_op1, MULT,
-   (enum rtx_code) code,
-   speed);
+   *cost += aarch64_rtx_mult_cost (new_op1, MULT, code, speed);
return true;
  }
 
@@ -14781,8 +14778,7 @@ cost_plus:
  *cost += extra_cost->alu.extend_arith;
 
op0 = aarch64_strip_extend (op0, true);
-   *cost += rtx_cost (op0, VOIDmode,
-  (enum rtx_code) GET_CODE (op0), 0, speed);
+   *cost += rtx_cost (op0, VOIDmode, GET_CODE (op0), 0, speed);
return true;
  }
 
@@ -14896,8 +14892,7 @@ cost_plus:
  && aarch64_mask_and_shift_for_ubfiz_p (int_mode, op1,
 XEXP (op0, 1)))
{
- *cost += rtx_cost (XEXP (op0, 0), int_mode,
-(enum rtx_code) code, 0, speed);
+ *cost += rtx_cost (XEXP (op0, 0), int_mode, code, 0, speed);
  if (speed)
*cost += extra_cost->alu.bfx;
 
@@ -14907,8 +14902,7 @@ cost_plus:
{
/* We possibly get the immediate for free, this is not
   modelled.  */
- *cost += rtx_cost (op0, int_mode,
-(enum rtx_code) code, 0, speed);
+ *cost += rtx_cost (op0, int_mode, code, 0, speed);
  if (speed)
*cost += extra_cost->alu.logical;
 
@@ -14943,10 +14937,8 @@ cost_plus:
}
 
  /* In both cases we want to cost both operands.  */
- *cost += rtx_cost (new_op0, int_mode, (enum rtx_code) code,
-0, speed);
- *cost += rtx_cost (op1, int_mode, (enum rtx_code) code,
-1, speed);
+ *cost += rtx_cost (new_op0, int_mode, code, 0, speed);
+ *cost += rtx_cost (op1, int_mode, code, 1, speed);
 
  return true;
}
@@ -14967,7 +14959,7 @@ cost_plus:
   /* MVN-shifted-reg.  */
   if (op0 != x)
 {
- *cost += rtx_cost (op0, mode, (enum rtx_code) code, 0, speed);
+ *cost += rtx_cost (op0, mode, code, 0, speed);
 
   if (speed)
 *cost += extra_cost->alu.log_shift;
@@ -14983,7 +14975,7 @@ cost_plus:
   rtx newop1 = XEXP (op0, 1);
   rtx op0_stripped = aarch64_strip_shift (newop0);
 
- *cost += rtx_cost (newop1, mode, (enum rtx_code) code, 1, speed);
+ *cost += rtx_cost (newop1, mode, code, 1, speed);
  *cost += rtx_cost (op0_stripped, mode, XOR, 0, speed);
 
   if (speed)
@@ -15149,7 +15141,7 @@ cost_plus:
  && kn

[gcc r15-4761] Revert "Simplify switch bit test clustering algorithm"

2024-10-29 Thread Andi Kleen via Gcc-cvs
https://gcc.gnu.org/g:220e0570f0861c1fd531ef0b309692deb2509a67

commit r15-4761-g220e0570f0861c1fd531ef0b309692deb2509a67
Author: Andi Kleen 
Date:   Tue Oct 29 16:41:57 2024 -0700

Revert "Simplify switch bit test clustering algorithm"

This reverts commit 3d06e9c3e07e13eab715e19dafbcfc1a0b7e43d6.

Diff:
---
 gcc/testsuite/gcc.dg/pr21643.c |  2 +-
 gcc/testsuite/gcc.dg/tree-ssa/if-to-switch-6.c |  2 +-
 gcc/testsuite/gcc.dg/tree-ssa/switch-1.c   |  2 +-
 gcc/testsuite/gcc.target/aarch64/pr99988.c |  2 +-
 gcc/tree-switch-conversion.cc  | 79 --
 5 files changed, 40 insertions(+), 47 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/pr21643.c b/gcc/testsuite/gcc.dg/pr21643.c
index a722a83ecb59..42517b5af1e5 100644
--- a/gcc/testsuite/gcc.dg/pr21643.c
+++ b/gcc/testsuite/gcc.dg/pr21643.c
@@ -1,6 +1,6 @@
 /* PR tree-optimization/21643 */
 /* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-reassoc1-details --param 
logical-op-non-short-circuit=1 -fno-bit-tests" } */
+/* { dg-options "-O2 -fdump-tree-reassoc1-details --param 
logical-op-non-short-circuit=1" } */
 
 int
 f1 (unsigned char c)
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/if-to-switch-6.c 
b/gcc/testsuite/gcc.dg/tree-ssa/if-to-switch-6.c
index 657af770e438..b1640673eae1 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/if-to-switch-6.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/if-to-switch-6.c
@@ -39,4 +39,4 @@ int main(int argc, char **argv)
   return 0;
 }
 
-/* { dg-final { scan-tree-dump "Condition chain" "iftoswitch" } } */
+/* { dg-final { scan-tree-dump-not "Condition chain" "iftoswitch" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/switch-1.c 
b/gcc/testsuite/gcc.dg/tree-ssa/switch-1.c
index f1654aba6d99..6f70c9de0c19 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/switch-1.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/switch-1.c
@@ -107,4 +107,4 @@ int foo5 (int x)
   }
 }
 
-/* { dg-final { scan-tree-dump ";; GIMPLE switch case clusters: BT:10-62 
600-700 BT:1000-1021 11" "switchlower1" } } */
+/* { dg-final { scan-tree-dump ";; GIMPLE switch case clusters: BT:10-62 
600-700 JT:1000-1021 11" "switchlower1" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/pr99988.c 
b/gcc/testsuite/gcc.target/aarch64/pr99988.c
index c09ce67c0fa9..7cca49629446 100644
--- a/gcc/testsuite/gcc.target/aarch64/pr99988.c
+++ b/gcc/testsuite/gcc.target/aarch64/pr99988.c
@@ -1,5 +1,5 @@
 /* { dg-do compile { target lp64 } } */
-/* { dg-options "-O2 -mbranch-protection=standard -fno-bit-tests" } */
+/* { dg-options "-O2 -mbranch-protection=standard" } */
 /* { dg-final { scan-assembler-times {bti j} 13 } } */
 int a;
 int c();
diff --git a/gcc/tree-switch-conversion.cc b/gcc/tree-switch-conversion.cc
index 9b4ddcd0146d..852419b2f4be 100644
--- a/gcc/tree-switch-conversion.cc
+++ b/gcc/tree-switch-conversion.cc
@@ -1783,62 +1783,55 @@ bit_test_cluster::find_bit_tests (vec 
&clusters, int max_c)
 return clusters.copy ();
 
   unsigned l = clusters.length ();
-  vec output;
+  auto_vec min;
+  min.reserve (l + 1);
 
-  output.create (l);
+  min.quick_push (min_cluster_item (0, 0, 0));
 
-  /* Look at sliding BITS_PER_WORD sized windows in the switch value space
- and determine if they are suitable for a bit test cluster.  Worst case
- this can examine every value BITS_PER_WORD-1 times.  */
-  unsigned end;
-  for (unsigned i = 0; i < l; i += end)
+  for (unsigned i = 1; i <= l; i++)
 {
-  HOST_WIDE_INT values = 0;
-  hash_set targets;
-  cluster *start_cluster = clusters[i];
+  /* Set minimal # of clusters with i-th item to infinite.  */
+  min.quick_push (min_cluster_item (INT_MAX, INT_MAX, INT_MAX));
 
-  end = 0;
-  while (i + end < l)
+  for (unsigned j = 0; j < i; j++)
{
- cluster *end_cluster = clusters[i + end];
-
- /* Does value range fit into the BITS_PER_WORD window?  */
- HOST_WIDE_INT w = cluster::get_range (start_cluster->get_low (),
-   end_cluster->get_high ());
- if (w == 0 || w > BITS_PER_WORD)
-   break;
-
- /* Compute # of values tested for new case.  */
- HOST_WIDE_INT r = 1;
- if (!end_cluster->is_single_value_p ())
-   r = cluster::get_range (end_cluster->get_low (),
-   end_cluster->get_high ());
- if (r == 0)
-   break;
-
- /* Check for max # of targets.  */
- if (targets.elements() == m_max_case_bit_tests
- && !targets.contains (end_cluster->m_case_bb))
-   break;
-
- targets.add (end_cluster->m_case_bb);
- values += r;
- end++;
+ if (min[j].m_count + 1 < min[i].m_count
+ && can_be_handled (clusters, j, i - 1))
+   min[i] = min_cluster_item (min[j].m_count + 1, j, INT_MAX);
}
 
-  if (is_beneficial (values, targets.elements ()))
+  gcc_checking_assert (min[i]

[gcc r15-4757] Only do switch bit test clustering when multiple labels point to same bb

2024-10-29 Thread Andi Kleen via Gcc-cvs
https://gcc.gnu.org/g:a4e2b13888267f2581ac03f076aa0d32cd045adb

commit r15-4757-ga4e2b13888267f2581ac03f076aa0d32cd045adb
Author: Andi Kleen 
Date:   Wed Oct 16 14:07:18 2024 -0700

Only do switch bit test clustering when multiple labels point to same bb

The bit cluster code generation strategy is only beneficial when
multiple case labels point to the same code. Do a quick check if
that is the case before trying to cluster.

This fixes the switch part of PR117091 where all case labels are unique
however it doesn't address the performance problems for non unique
cases.

gcc/ChangeLog:

PR middle-end/117091
* gimple-if-to-switch.cc (if_chain::is_beneficial): Update
find_bit_test call.
* tree-switch-conversion.cc (bit_test_cluster::find_bit_tests):
Get max_c argument and bail out early if all case labels are
unique.
(switch_decision_tree::compute_cases_per_edge): Record number of
targets per label and return.
(switch_decision_tree::analyze_switch_statement): ... pass to
find_bit_tests.
* tree-switch-conversion.h: Update prototypes.

Diff:
---
 gcc/gimple-if-to-switch.cc|  2 +-
 gcc/tree-switch-conversion.cc | 23 ---
 gcc/tree-switch-conversion.h  |  5 +++--
 3 files changed, 20 insertions(+), 10 deletions(-)

diff --git a/gcc/gimple-if-to-switch.cc b/gcc/gimple-if-to-switch.cc
index 4246e12606f9..8f71f43864f0 100644
--- a/gcc/gimple-if-to-switch.cc
+++ b/gcc/gimple-if-to-switch.cc
@@ -255,7 +255,7 @@ if_chain::is_beneficial ()
   else
 output.release ();
 
-  output = bit_test_cluster::find_bit_tests (filtered_clusters);
+  output = bit_test_cluster::find_bit_tests (filtered_clusters, 2);
   r = output.length () < filtered_clusters.length ();
   if (r)
 dump_clusters (&output, "BT can be built");
diff --git a/gcc/tree-switch-conversion.cc b/gcc/tree-switch-conversion.cc
index 33023875ef97..852419b2f4be 100644
--- a/gcc/tree-switch-conversion.cc
+++ b/gcc/tree-switch-conversion.cc
@@ -1773,12 +1773,13 @@ jump_table_cluster::is_beneficial (const vec 
&,
 }
 
 /* Find bit tests of given CLUSTERS, where all members of the vector
-   are of type simple_cluster.  New clusters are returned.  */
+   are of type simple_cluster.   MAX_C is the approx max number of cases per
+   label.  New clusters are returned.  */
 
 vec
-bit_test_cluster::find_bit_tests (vec &clusters)
+bit_test_cluster::find_bit_tests (vec &clusters, int max_c)
 {
-  if (!is_enabled ())
+  if (!is_enabled () || max_c == 1)
 return clusters.copy ();
 
   unsigned l = clusters.length ();
@@ -2207,18 +2208,26 @@ bit_test_cluster::hoist_edge_and_branch_if_true 
(gimple_stmt_iterator *gsip,
 }
 
 /* Compute the number of case labels that correspond to each outgoing edge of
-   switch statement.  Record this information in the aux field of the edge.  */
+   switch statement.  Record this information in the aux field of the edge.
+   Return the approx max number of cases per edge.  */
 
-void
+int
 switch_decision_tree::compute_cases_per_edge ()
 {
+  int max_c = 0;
   reset_out_edges_aux (m_switch);
   int ncases = gimple_switch_num_labels (m_switch);
   for (int i = ncases - 1; i >= 1; --i)
 {
   edge case_edge = gimple_switch_edge (cfun, m_switch, i);
   case_edge->aux = (void *) ((intptr_t) (case_edge->aux) + 1);
+  /* For a range case add one extra. That's enough for the bit
+cluster heuristic.  */
+  if ((intptr_t)case_edge->aux > max_c)
+   max_c = (intptr_t)case_edge->aux +
+   !!CASE_HIGH (gimple_switch_label (m_switch, i));
 }
+  return max_c;
 }
 
 /* Analyze switch statement and return true when the statement is expanded
@@ -2236,7 +2245,7 @@ switch_decision_tree::analyze_switch_statement ()
   m_case_bbs.reserve (l);
   m_case_bbs.quick_push (default_bb);
 
-  compute_cases_per_edge ();
+  int max_c = compute_cases_per_edge ();
 
   for (unsigned i = 1; i < l; i++)
 {
@@ -2257,7 +2266,7 @@ switch_decision_tree::analyze_switch_statement ()
   reset_out_edges_aux (m_switch);
 
   /* Find bit-test clusters.  */
-  vec output = bit_test_cluster::find_bit_tests (clusters);
+  vec output = bit_test_cluster::find_bit_tests (clusters, max_c);
 
   /* Find jump table clusters.  */
   vec output2;
diff --git a/gcc/tree-switch-conversion.h b/gcc/tree-switch-conversion.h
index 6468995eb316..e6a85fa60258 100644
--- a/gcc/tree-switch-conversion.h
+++ b/gcc/tree-switch-conversion.h
@@ -399,7 +399,7 @@ public:
 
   /* Find bit tests of given CLUSTERS, where all members of the vector
  are of type simple_cluster.  New clusters are returned.  */
-  static vec find_bit_tests (vec &clusters);
+  static vec find_bit_tests (vec &clusters, int max_c);
 
   /* Return true when RANGE of case values with UNIQ labels
  can build a bit test.  */
@@ -576,8 +576,9 @@ public:
   bool try_switch_expans

[gcc r15-4758] Simplify switch bit test clustering algorithm

2024-10-29 Thread Andi Kleen via Gcc-cvs
https://gcc.gnu.org/g:3d06e9c3e07e13eab715e19dafbcfc1a0b7e43d6

commit r15-4758-g3d06e9c3e07e13eab715e19dafbcfc1a0b7e43d6
Author: Andi Kleen 
Date:   Fri Oct 25 15:04:06 2024 -0700

Simplify switch bit test clustering algorithm

The current switch bit test clustering enumerates all possible case
clusters combinations to find ones that fit the bit test constrains
best.  This causes performance problems with very large switches.

For bit test clustering which happens naturally in word sized chunks
I don't think such an expensive algorithm is really needed.

This patch implements a simple greedy algorithm that walks
the sorted list and examines word sized windows and tries
to cluster them.

Surprisingly the new algorithm gives consistly better clusters
for the examples I tried.

For example from the gcc bootstrap:

old: 0-15 16-31 96-175
new: 0-31 96-175

I'm not fully sure why that is, probably some bug in the old
algorithm? This shows even up in the test suite where if-to-switch-6
now can generate a switch, as well as a case in switch-1.c

I don't have a proof that the new algorithm is always as good or better,
but so far at least I don't see any counter examples.

It also fixes the excessive compile time in PR117091,
however this was already fixed by an earlier patch
that doesn't run clustering when no targets have multiple
values.

gcc/ChangeLog:

PR middle-end/117091
* tree-switch-conversion.cc (bit_test_cluster::find_bit_tests):
Change clustering algorithm to simple greedy.

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/if-to-switch-6.c: Allow condition chain.
* gcc.dg/tree-ssa/switch-1.c: Allow more bit tests.
* gcc.dg/pr21643.c: Use -fno-bit-tests
* gcc.target/aarch64/pr99988.c: Use -fno-bit-tests

Diff:
---
 gcc/testsuite/gcc.dg/pr21643.c |  2 +-
 gcc/testsuite/gcc.dg/tree-ssa/if-to-switch-6.c |  2 +-
 gcc/testsuite/gcc.dg/tree-ssa/switch-1.c   |  2 +-
 gcc/testsuite/gcc.target/aarch64/pr99988.c |  2 +-
 gcc/tree-switch-conversion.cc  | 79 ++
 5 files changed, 47 insertions(+), 40 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/pr21643.c b/gcc/testsuite/gcc.dg/pr21643.c
index 42517b5af1e5..a722a83ecb59 100644
--- a/gcc/testsuite/gcc.dg/pr21643.c
+++ b/gcc/testsuite/gcc.dg/pr21643.c
@@ -1,6 +1,6 @@
 /* PR tree-optimization/21643 */
 /* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-reassoc1-details --param 
logical-op-non-short-circuit=1" } */
+/* { dg-options "-O2 -fdump-tree-reassoc1-details --param 
logical-op-non-short-circuit=1 -fno-bit-tests" } */
 
 int
 f1 (unsigned char c)
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/if-to-switch-6.c 
b/gcc/testsuite/gcc.dg/tree-ssa/if-to-switch-6.c
index b1640673eae1..657af770e438 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/if-to-switch-6.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/if-to-switch-6.c
@@ -39,4 +39,4 @@ int main(int argc, char **argv)
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-not "Condition chain" "iftoswitch" } } */
+/* { dg-final { scan-tree-dump "Condition chain" "iftoswitch" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/switch-1.c 
b/gcc/testsuite/gcc.dg/tree-ssa/switch-1.c
index 6f70c9de0c19..f1654aba6d99 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/switch-1.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/switch-1.c
@@ -107,4 +107,4 @@ int foo5 (int x)
   }
 }
 
-/* { dg-final { scan-tree-dump ";; GIMPLE switch case clusters: BT:10-62 
600-700 JT:1000-1021 11" "switchlower1" } } */
+/* { dg-final { scan-tree-dump ";; GIMPLE switch case clusters: BT:10-62 
600-700 BT:1000-1021 11" "switchlower1" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/pr99988.c 
b/gcc/testsuite/gcc.target/aarch64/pr99988.c
index 7cca49629446..c09ce67c0fa9 100644
--- a/gcc/testsuite/gcc.target/aarch64/pr99988.c
+++ b/gcc/testsuite/gcc.target/aarch64/pr99988.c
@@ -1,5 +1,5 @@
 /* { dg-do compile { target lp64 } } */
-/* { dg-options "-O2 -mbranch-protection=standard" } */
+/* { dg-options "-O2 -mbranch-protection=standard -fno-bit-tests" } */
 /* { dg-final { scan-assembler-times {bti j} 13 } } */
 int a;
 int c();
diff --git a/gcc/tree-switch-conversion.cc b/gcc/tree-switch-conversion.cc
index 852419b2f4be..9b4ddcd0146d 100644
--- a/gcc/tree-switch-conversion.cc
+++ b/gcc/tree-switch-conversion.cc
@@ -1783,55 +1783,62 @@ bit_test_cluster::find_bit_tests (vec 
&clusters, int max_c)
 return clusters.copy ();
 
   unsigned l = clusters.length ();
-  auto_vec min;
-  min.reserve (l + 1);
+  vec output;
 
-  min.quick_push (min_cluster_item (0, 0, 0));
+  output.create (l);
 
-  for (unsigned i = 1; i <= l; i++)
+  /* Look at sliding BITS_PER_WORD sized windows in the switch value space
+ and determine if they are suitable for a bit test cluster.  Worst case
+ 

[gcc(refs/users/aoliva/heads/testme)] [PR83782] ifunc: back-propagate ifunc_resolver to aliases

2024-10-29 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:44091b3480985244d5d981886f3c5624e8ca1571

commit 44091b3480985244d5d981886f3c5624e8ca1571
Author: Alexandre Oliva 
Date:   Thu Dec 14 03:21:28 2023 -0300

[PR83782] ifunc: back-propagate ifunc_resolver to aliases

gcc.target/i386/mvc10.c fails with -fPIE on ia32 because we omit the
@PLT mark when calling an alias to an indirect function.  Such aliases
aren't marked as ifunc_resolvers in the cgraph, so the test that would
have forced the PLT call fails.

I've arranged for ifunc_resolver to be back-propagated to aliases, and
relaxed the test that required the ifunc attribute to be attached to
directly the decl, rather than taken from an aliased decl, when the
ifunc_resolver bit is set.


for  gcc/ChangeLog

PR target/83782
* cgraph.h (symtab_node::set_ifunc_resolver): New, overloaded.
Back-propagate flag to aliases.
* cgraph.cc (cgraph_node::create): Use set_ifunc_resolver.
(cgraph_node::create_alias): Likewise.
* lto-cgraph.cc (input_node): Likewise.
* multiple_target.cc (create_dispatcher_calls): Propagate to
aliases when redirecting them.
* symtab.cc (symtab_node::verify_base): Accept ifunc_resolver
set in an alias to another ifunc_resolver nodes.
(symtab_node::resolve_alias): Propagate ifunc_resolver from
resolved target to alias.
* varasm.cc (do_assemble_alias): Checking for the attribute.

Diff:
---
 gcc/cgraph.cc  |  4 ++--
 gcc/cgraph.h   | 13 +
 gcc/lto-cgraph.cc  |  2 +-
 gcc/multiple_target.cc |  2 ++
 gcc/symtab.cc  | 15 ++-
 gcc/varasm.cc  |  5 -
 6 files changed, 36 insertions(+), 5 deletions(-)

diff --git a/gcc/cgraph.cc b/gcc/cgraph.cc
index b5d096d530c0..9f6493c9b156 100644
--- a/gcc/cgraph.cc
+++ b/gcc/cgraph.cc
@@ -520,7 +520,7 @@ cgraph_node::create (tree decl)
 }
 
   if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (decl)))
-node->ifunc_resolver = true;
+node->set_ifunc_resolver ();
 
   node->register_symbol ();
   maybe_record_nested_function (node);
@@ -578,7 +578,7 @@ cgraph_node::create_alias (tree alias, tree target)
   if (lookup_attribute ("weakref", DECL_ATTRIBUTES (alias)) != NULL)
 alias_node->transparent_alias = alias_node->weakref = true;
   if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (alias)))
-alias_node->ifunc_resolver = true;
+alias_node->set_ifunc_resolver ();
   return alias_node;
 }
 
diff --git a/gcc/cgraph.h b/gcc/cgraph.h
index a8c3224802c1..261d4745d142 100644
--- a/gcc/cgraph.h
+++ b/gcc/cgraph.h
@@ -471,6 +471,19 @@ public:
 return decl->decl_with_vis.symtab_node;
   }
 
+  /* Worked for the nonstatic set_ifunc_resolver, to vback-propagate
+ ifunc_resolver in the alias chain.  */
+  static bool set_ifunc_resolver (symtab_node *n, void * = NULL)
+  {
+n->ifunc_resolver = true;
+return false;
+  }
+
+  /* Set the ifunc_resolver bit in this node and in any aliases thereof.  */
+  void set_ifunc_resolver () {
+call_for_symbol_and_aliases (set_ifunc_resolver, NULL, true);
+  }
+
   /* Try to find a symtab node for declaration DECL and if it does not
  exist or if it corresponds to an inline clone, create a new one.  */
   static inline symtab_node * get_create (tree node);
diff --git a/gcc/lto-cgraph.cc b/gcc/lto-cgraph.cc
index 1d4311a8832b..e8af5ea14a98 100644
--- a/gcc/lto-cgraph.cc
+++ b/gcc/lto-cgraph.cc
@@ -1299,7 +1299,7 @@ input_node (struct lto_file_decl_data *file_data,
   node = symtab->create_empty ();
   node->decl = fn_decl;
   if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (fn_decl)))
-   node->ifunc_resolver = 1;
+   node->set_ifunc_resolver ();
   node->register_symbol ();
 }
 
diff --git a/gcc/multiple_target.cc b/gcc/multiple_target.cc
index 1fdd279da04a..1deaa3d3c3ec 100644
--- a/gcc/multiple_target.cc
+++ b/gcc/multiple_target.cc
@@ -156,6 +156,8 @@ create_dispatcher_calls (struct cgraph_node *node)
  source->create_reference (inode, IPA_REF_ALIAS);
  if (inode->get_comdat_group ())
source->add_to_same_comdat_group (inode);
+ if (!source->ifunc_resolver)
+   source->set_ifunc_resolver ();
}
  else
gcc_unreachable ();
diff --git a/gcc/symtab.cc b/gcc/symtab.cc
index 3b018ab3ea21..08607497970f 100644
--- a/gcc/symtab.cc
+++ b/gcc/symtab.cc
@@ -1109,9 +1109,19 @@ symtab_node::verify_base (void)
   error ("function symbol is not function");
   error_found = true;
}
+  /* If the ifunc attribute is present, the node must be marked as
+ifunc_resolver, but it may also be marked on a node that
+doesn't have the attribute, if it's an alias to another
+marked node.  The resolver node itself is an alias to the
+function that perfor

[gcc(refs/users/aoliva/heads/testme)] [testsuite] fix auto-init-8.c on ia32 PIC expectations

2024-10-29 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:43587057dc68be9c94ba41700a881449f8351821

commit 43587057dc68be9c94ba41700a881449f8351821
Author: Alexandre Oliva 
Date:   Tue Oct 29 18:06:48 2024 -0300

[testsuite] fix auto-init-8.c on ia32 PIC expectations

When PIC/PIE is enabled by default on ia32, we select a precomputed
constant instead of constructing it from an SImode constant and
vec_duplicate, at two of the three expected occurrences of that
constant.


for  gcc/testsuite/ChangeLog

* gcc.target/i386/auto-init-8.c: Adjust for ia32 PIC/PIE.

Diff:
---
 gcc/testsuite/gcc.target/i386/auto-init-8.c | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.target/i386/auto-init-8.c 
b/gcc/testsuite/gcc.target/i386/auto-init-8.c
index 666ee14d2bc9..868bea48e064 100644
--- a/gcc/testsuite/gcc.target/i386/auto-init-8.c
+++ b/gcc/testsuite/gcc.target/i386/auto-init-8.c
@@ -29,7 +29,11 @@ double foo()
   return result;
 }
 
-/* { dg-final { scan-rtl-dump-times "0xfefefefe" 1 "expand" } } */
+/* ia32 pic/pie prefers loading a V16QI precomputed constant to using
+   vec_duplicate to construct it, so we only get one SI constant for temp1[2],
+   and the initializers for temp3 and temp4 use the V16QI loaded from LC0 in
+   aligned and unaligned stores.  */
+/* { dg-final { scan-rtl-dump-times "0xfefefefe|\\\*\\\.LC0" 3 
"expand" } } */
 /* { dg-final { scan-rtl-dump-times "\\\[0xfefefefefefefefe\\\]" 2 "expand" } 
} */
 /* { dg-final { scan-rtl-dump-times "0xfffe\\\]\\\) repeated x16" 
2 "expand" } } */


[gcc(refs/users/aoliva/heads/testme)] [testsuite] fix pr70321.c PIC expectations

2024-10-29 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:633c9b04a93dd63268942f7704563fd4bd633d54

commit 633c9b04a93dd63268942f7704563fd4bd633d54
Author: Alexandre Oliva 
Date:   Tue Oct 29 18:06:14 2024 -0300

[testsuite] fix pr70321.c PIC expectations

When we select a non-bx get_pc_thunk, we get an extra mov to set up
the PIC register before the abort call.  Expect that mov or a
get_pc_thunk.bx call.


for  gcc/testsuite/ChangeLog

* gcc.target/i386/pr70321.c: Cope with non-bx get_pc_thunk.

Diff:
---
 gcc/testsuite/gcc.target/i386/pr70321.c | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.target/i386/pr70321.c 
b/gcc/testsuite/gcc.target/i386/pr70321.c
index 58f5f5661c7a..287b7da1b950 100644
--- a/gcc/testsuite/gcc.target/i386/pr70321.c
+++ b/gcc/testsuite/gcc.target/i386/pr70321.c
@@ -9,4 +9,8 @@ void foo (long long ixi)
 
 /* { dg-final { scan-assembler-times "mov" 1 { target nonpic } } } */
 /* get_pc_thunk adds an extra mov insn.  */
-/* { dg-final { scan-assembler-times "mov" 2 { target { ! nonpic } } } } */
+/* Choosing a non-bx get_pc_thunk requires another mov before the abort call.
+   So we require a match of either that mov or the get_pc_thunk.bx call, in
+   addition to the other 2 movs.  (Hopefully there won't be more calls for a
+   false positive.)  */
+/* { dg-final { scan-assembler-times "mov|call\[^\n\r]*get_pc_thunk\.bx" 3 { 
target { ! nonpic } } } } */


[gcc/aoliva/heads/testme] (4 commits) [testsuite] disable PIE on ia32 on more tests

2024-10-29 Thread Alexandre Oliva via Gcc-cvs
The branch 'aoliva/heads/testme' was updated to point to:

 9fee52ae3e79... [testsuite] disable PIE on ia32 on more tests

It previously pointed to:

 5a9cf11ec7d1... fold fold_truth_andor field merging into ifcombine

Diff:

!!! WARNING: THE FOLLOWING COMMITS ARE NO LONGER ACCESSIBLE (LOST):
---

  5a9cf11... fold fold_truth_andor field merging into ifcombine
  374dec3... handle TRUTH_ANDIF cond exprs in ifcombine_replace_cond
  bb30d95... ifcombine across noncontiguous blocks
  46fa49f... extend ifcombine_replace_cond to handle noncontiguous ifcom
  b7de5a6... adjust update_profile_after_ifcombine for noncontiguous ifc
  5b44192... introduce ifcombine_replace_cond
  db11a63... drop redundant ifcombine_ifandif parm
  683dabf... allow vuses in ifcombine blocks


Summary of changes (added commits):
---

  9fee52a... [testsuite] disable PIE on ia32 on more tests
  4358705... [testsuite] fix auto-init-8.c on ia32 PIC expectations
  633c9b0... [testsuite] fix pr70321.c PIC expectations
  44091b3... [PR83782] ifunc: back-propagate ifunc_resolver to aliases


[gcc r15-4759] aarch64: Use canonicalize_comparison in ccmp expansion [PR117346]

2024-10-29 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:3d8cd34a450e9ffe2b2ac8a0c8eb33fd5d613483

commit r15-4759-g3d8cd34a450e9ffe2b2ac8a0c8eb33fd5d613483
Author: Andrew Pinski 
Date:   Tue Oct 29 09:16:18 2024 -0700

aarch64: Use canonicalize_comparison in ccmp expansion [PR117346]

While testing the patch for PR 85605 on aarch64, it was noticed that
imm_choice_comparison.c test failed. This was because 
canonicalize_comparison
was not being called in the ccmp case. This can be noticed without the patch
for PR 85605 as evidence of the new testcase.

Bootstrapped and tested on aarch64-linux-gnu.

PR target/117346

gcc/ChangeLog:

* config/aarch64/aarch64.cc (aarch64_gen_ccmp_first): Call
canonicalize_comparison before figuring out the cmp_mode/cc_mode.
(aarch64_gen_ccmp_next): Likewise.

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/imm_choice_comparison-1.c: New test.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/config/aarch64/aarch64.cc  |  6 
 .../gcc.target/aarch64/imm_choice_comparison-1.c   | 42 ++
 2 files changed, 48 insertions(+)

diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index b2dd23ccb26f..df170d875f60 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -27344,6 +27344,9 @@ aarch64_gen_ccmp_first (rtx_insn **prep_seq, rtx_insn 
**gen_seq,
   if (op_mode == VOIDmode)
 op_mode = GET_MODE (op1);
 
+  if (CONST_SCALAR_INT_P (op1))
+canonicalize_comparison (op_mode, &code, &op1);
+
   switch (op_mode)
 {
 case E_QImode:
@@ -27420,6 +27423,9 @@ aarch64_gen_ccmp_next (rtx_insn **prep_seq, rtx_insn 
**gen_seq, rtx prev,
   if (op_mode == VOIDmode)
 op_mode = GET_MODE (op1);
 
+  if (CONST_SCALAR_INT_P (op1))
+canonicalize_comparison (op_mode, &cmp_code, &op1);
+
   switch (op_mode)
 {
 case E_QImode:
diff --git a/gcc/testsuite/gcc.target/aarch64/imm_choice_comparison-1.c 
b/gcc/testsuite/gcc.target/aarch64/imm_choice_comparison-1.c
new file mode 100644
index ..2afebe1a3493
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/imm_choice_comparison-1.c
@@ -0,0 +1,42 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+/* PR target/117346 */
+/* Make sure going through ccmp uses similar to non ccmp-case. */
+/* This is similar to imm_choice_comparison.c's check except to force
+   the use of ccmp by reording the comparison and putting the cast before. */
+
+/*
+** check:
+** ...
+** mov w[0-9]+, -16777217
+** ...
+*/
+
+int
+check (int x, int y)
+{
+  unsigned xu = x;
+  if (xu > 0xfefe && x > y)
+return 100;
+
+  return x;
+}
+
+/*
+** check1:
+** ...
+** mov w[0-9]+, -16777217
+** ...
+*/
+
+int
+check1 (int x, int y)
+{
+  unsigned xu = x;
+  if (x > y && xu > 0xfefe)
+return 100;
+
+  return x;
+}


[gcc r13-9155] Fix miscompilation of function containing __builtin_unreachable

2024-10-29 Thread Eric Botcazou via Gcc-cvs
https://gcc.gnu.org/g:4bcc87a66bf265805fba308593b6c7ef03aab3d3

commit r13-9155-g4bcc87a66bf265805fba308593b6c7ef03aab3d3
Author: Eric Botcazou 
Date:   Tue Oct 29 21:40:34 2024 +0100

Fix miscompilation of function containing __builtin_unreachable

This is a wrong-code generation on the SPARC for a function containing
a call to __builtin_unreachable caused by the delay slot scheduling pass,
and more specifically the find_end_label function which has these lines:

  /* Otherwise, see if there is a label at the end of the function. If there
 is, it must be that RETURN insns aren't needed, so that is our return
 label and we don't have to do anything else.  */

The comment was correct 20 years ago but no longer is nowadays in the
presence of RTL epilogues and calls to __builtin_unreachable, so the
patch just removes the associated two lines of code:

  else if (LABEL_P (insn))
*plabel = as_a  (insn);

and otherwise contains just adjustments to the commentary.

gcc/
PR rtl-optimization/117327
* reorg.cc (find_end_label): Do not return a dangling label at the
end of the function and adjust commentary.

gcc/testsuite/
* gcc.c-torture/execute/20241029-1.c: New test.

Diff:
---
 gcc/reorg.cc | 38 ++--
 gcc/testsuite/gcc.c-torture/execute/20241029-1.c | 23 ++
 2 files changed, 38 insertions(+), 23 deletions(-)

diff --git a/gcc/reorg.cc b/gcc/reorg.cc
index e5ab5e452668..3d53f3cddcec 100644
--- a/gcc/reorg.cc
+++ b/gcc/reorg.cc
@@ -336,13 +336,14 @@ insn_sets_resource_p (rtx insn, struct resources *res,
   return resource_conflicts_p (&insn_sets, res);
 }
 
-/* Find a label at the end of the function or before a RETURN.  If there
-   is none, try to make one.  If that fails, returns 0.
+/* Find a label before a RETURN.  If there is none, try to make one; if this
+   fails, return 0.  KIND is either ret_rtx or simple_return_rtx, indicating
+   which type of RETURN we're looking for.
 
-   The property of such a label is that it is placed just before the
-   epilogue or a bare RETURN insn, so that another bare RETURN can be
-   turned into a jump to the label unconditionally.  In particular, the
-   label cannot be placed before a RETURN insn with a filled delay slot.
+   The property of the label is that it is placed just before a bare RETURN
+   insn, so that another bare RETURN can be turned into a jump to the label
+   unconditionally.  In particular, the label cannot be placed before a
+   RETURN insn with a filled delay slot.
 
??? There may be a problem with the current implementation.  Suppose
we start with a bare RETURN insn and call find_end_label.  It may set
@@ -353,9 +354,7 @@ insn_sets_resource_p (rtx insn, struct resources *res,
Note that this is probably mitigated by the following observation:
once function_return_label is made, it is very likely the target of
a jump, so filling the delay slot of the RETURN will be much more
-   difficult.
-   KIND is either simple_return_rtx or ret_rtx, indicating which type of
-   return we're looking for.  */
+   difficult.  */
 
 static rtx_code_label *
 find_end_label (rtx kind)
@@ -375,10 +374,7 @@ find_end_label (rtx kind)
   if (*plabel)
 return *plabel;
 
-  /* Otherwise, see if there is a label at the end of the function.  If there
- is, it must be that RETURN insns aren't needed, so that is our return
- label and we don't have to do anything else.  */
-
+  /* Otherwise, scan the insns backward from the end of the function.  */
   insn = get_last_insn ();
   while (NOTE_P (insn)
 || (NONJUMP_INSN_P (insn)
@@ -386,9 +382,8 @@ find_end_label (rtx kind)
 || GET_CODE (PATTERN (insn)) == CLOBBER)))
 insn = PREV_INSN (insn);
 
-  /* When a target threads its epilogue we might already have a
- suitable return insn.  If so put a label before it for the
- function_return_label.  */
+  /* First, see if there is a RETURN at the end of the function.  If so,
+ put the label before it.  */
   if (BARRIER_P (insn)
   && JUMP_P (PREV_INSN (insn))
   && PATTERN (PREV_INSN (insn)) == kind)
@@ -397,8 +392,8 @@ find_end_label (rtx kind)
   rtx_code_label *label = gen_label_rtx ();
   LABEL_NUSES (label) = 0;
 
-  /* Put the label before any USE insns that may precede the RETURN
-insn.  */
+  /* Put the label before any USE insns that may precede the
+RETURN insn.  */
   while (GET_CODE (temp) == USE)
temp = PREV_INSN (temp);
 
@@ -406,15 +401,12 @@ find_end_label (rtx kind)
   *plabel = label;
 }
 
-  else if (LABEL_P (insn))
-*plabel = as_a  (insn);
+  /* If the basic block reordering pass has moved the return insn to some
+ other place, try to locate it again and put the lab

[gcc r12-10791] Fix miscompilation of function containing __builtin_unreachable

2024-10-29 Thread Eric Botcazou via Gcc-cvs
https://gcc.gnu.org/g:0711e018b77eac34efc6d2e1e66cdf16e01b47c0

commit r12-10791-g0711e018b77eac34efc6d2e1e66cdf16e01b47c0
Author: Eric Botcazou 
Date:   Tue Oct 29 21:40:34 2024 +0100

Fix miscompilation of function containing __builtin_unreachable

This is a wrong-code generation on the SPARC for a function containing
a call to __builtin_unreachable caused by the delay slot scheduling pass,
and more specifically the find_end_label function which has these lines:

  /* Otherwise, see if there is a label at the end of the function. If there
 is, it must be that RETURN insns aren't needed, so that is our return
 label and we don't have to do anything else.  */

The comment was correct 20 years ago but no longer is nowadays in the
presence of RTL epilogues and calls to __builtin_unreachable, so the
patch just removes the associated two lines of code:

  else if (LABEL_P (insn))
*plabel = as_a  (insn);

and otherwise contains just adjustments to the commentary.

gcc/
PR rtl-optimization/117327
* reorg.cc (find_end_label): Do not return a dangling label at the
end of the function and adjust commentary.

gcc/testsuite/
* gcc.c-torture/execute/20241029-1.c: New test.

Diff:
---
 gcc/reorg.cc | 38 ++--
 gcc/testsuite/gcc.c-torture/execute/20241029-1.c | 23 ++
 2 files changed, 38 insertions(+), 23 deletions(-)

diff --git a/gcc/reorg.cc b/gcc/reorg.cc
index 7624f5149068..df5d57fc0afb 100644
--- a/gcc/reorg.cc
+++ b/gcc/reorg.cc
@@ -336,13 +336,14 @@ insn_sets_resource_p (rtx insn, struct resources *res,
   return resource_conflicts_p (&insn_sets, res);
 }
 
-/* Find a label at the end of the function or before a RETURN.  If there
-   is none, try to make one.  If that fails, returns 0.
+/* Find a label before a RETURN.  If there is none, try to make one; if this
+   fails, return 0.  KIND is either ret_rtx or simple_return_rtx, indicating
+   which type of RETURN we're looking for.
 
-   The property of such a label is that it is placed just before the
-   epilogue or a bare RETURN insn, so that another bare RETURN can be
-   turned into a jump to the label unconditionally.  In particular, the
-   label cannot be placed before a RETURN insn with a filled delay slot.
+   The property of the label is that it is placed just before a bare RETURN
+   insn, so that another bare RETURN can be turned into a jump to the label
+   unconditionally.  In particular, the label cannot be placed before a
+   RETURN insn with a filled delay slot.
 
??? There may be a problem with the current implementation.  Suppose
we start with a bare RETURN insn and call find_end_label.  It may set
@@ -353,9 +354,7 @@ insn_sets_resource_p (rtx insn, struct resources *res,
Note that this is probably mitigated by the following observation:
once function_return_label is made, it is very likely the target of
a jump, so filling the delay slot of the RETURN will be much more
-   difficult.
-   KIND is either simple_return_rtx or ret_rtx, indicating which type of
-   return we're looking for.  */
+   difficult.  */
 
 static rtx_code_label *
 find_end_label (rtx kind)
@@ -375,10 +374,7 @@ find_end_label (rtx kind)
   if (*plabel)
 return *plabel;
 
-  /* Otherwise, see if there is a label at the end of the function.  If there
- is, it must be that RETURN insns aren't needed, so that is our return
- label and we don't have to do anything else.  */
-
+  /* Otherwise, scan the insns backward from the end of the function.  */
   insn = get_last_insn ();
   while (NOTE_P (insn)
 || (NONJUMP_INSN_P (insn)
@@ -386,9 +382,8 @@ find_end_label (rtx kind)
 || GET_CODE (PATTERN (insn)) == CLOBBER)))
 insn = PREV_INSN (insn);
 
-  /* When a target threads its epilogue we might already have a
- suitable return insn.  If so put a label before it for the
- function_return_label.  */
+  /* First, see if there is a RETURN at the end of the function.  If so,
+ put the label before it.  */
   if (BARRIER_P (insn)
   && JUMP_P (PREV_INSN (insn))
   && PATTERN (PREV_INSN (insn)) == kind)
@@ -397,8 +392,8 @@ find_end_label (rtx kind)
   rtx_code_label *label = gen_label_rtx ();
   LABEL_NUSES (label) = 0;
 
-  /* Put the label before any USE insns that may precede the RETURN
-insn.  */
+  /* Put the label before any USE insns that may precede the
+RETURN insn.  */
   while (GET_CODE (temp) == USE)
temp = PREV_INSN (temp);
 
@@ -406,15 +401,12 @@ find_end_label (rtx kind)
   *plabel = label;
 }
 
-  else if (LABEL_P (insn))
-*plabel = as_a  (insn);
+  /* If the basic block reordering pass has moved the return insn to some
+ other place, try to lo

[gcc r15-4756] Disable -fbit-tests and -fjump-tables at -O0

2024-10-29 Thread Andi Kleen via Gcc-cvs
https://gcc.gnu.org/g:06bc3a734e88908cad1d3bf547a722b3f9597a0d

commit r15-4756-g06bc3a734e88908cad1d3bf547a722b3f9597a0d
Author: Andi Kleen 
Date:   Tue Oct 15 13:15:09 2024 -0700

Disable -fbit-tests and -fjump-tables at -O0

gcc/ChangeLog:

* common.opt: Enable -fbit-tests and -fjump-tables only at -O1.
* opts.cc (default_options_table): Dito.

Diff:
---
 gcc/common.opt | 4 ++--
 gcc/opts.cc| 2 ++
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/gcc/common.opt b/gcc/common.opt
index 12b25ff486de..70a22cdc71a4 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -2189,11 +2189,11 @@ Common Var(flag_ivopts) Init(1) Optimization
 Optimize induction variables on trees.
 
 fjump-tables
-Common Var(flag_jump_tables) Init(1) Optimization
+Common Var(flag_jump_tables) Init(0) Optimization
 Use jump tables for sufficiently large switch statements.
 
 fbit-tests
-Common Var(flag_bit_tests) Init(1) Optimization
+Common Var(flag_bit_tests) Init(0) Optimization
 Use bit tests for sufficiently large switch statements.
 
 fkeep-inline-functions
diff --git a/gcc/opts.cc b/gcc/opts.cc
index e810e30961b9..5d08e5ab2b54 100644
--- a/gcc/opts.cc
+++ b/gcc/opts.cc
@@ -611,6 +611,7 @@ static const struct default_options default_options_table[] 
=
 { OPT_LEVELS_1_PLUS, OPT_fvar_tracking, NULL, 1 },
 
 /* -O1 (and not -Og) optimizations.  */
+{ OPT_LEVELS_1_PLUS_NOT_DEBUG, OPT_fbit_tests, NULL, 1 },
 { OPT_LEVELS_1_PLUS_NOT_DEBUG, OPT_fbranch_count_reg, NULL, 1 },
 #if DELAY_SLOTS
 { OPT_LEVELS_1_PLUS_NOT_DEBUG, OPT_fdelayed_branch, NULL, 1 },
@@ -619,6 +620,7 @@ static const struct default_options default_options_table[] 
=
 { OPT_LEVELS_1_PLUS_NOT_DEBUG, OPT_fif_conversion, NULL, 1 },
 { OPT_LEVELS_1_PLUS_NOT_DEBUG, OPT_fif_conversion2, NULL, 1 },
 { OPT_LEVELS_1_PLUS_NOT_DEBUG, OPT_finline_functions_called_once, NULL, 1 
},
+{ OPT_LEVELS_1_PLUS_NOT_DEBUG, OPT_fjump_tables, NULL, 1 },
 { OPT_LEVELS_1_PLUS_NOT_DEBUG, OPT_fmove_loop_invariants, NULL, 1 },
 { OPT_LEVELS_1_PLUS_NOT_DEBUG, OPT_fmove_loop_stores, NULL, 1 },
 { OPT_LEVELS_1_PLUS_NOT_DEBUG, OPT_fssa_phiopt, NULL, 1 },


[gcc r15-4764] RISC-V: Add testcases for unsigned .SAT_SUB form 2 with IMM = 1.

2024-10-29 Thread Li Xu via Gcc-cvs
https://gcc.gnu.org/g:179a682d047500604c6612afb425acf481e1a6b2

commit r15-4764-g179a682d047500604c6612afb425acf481e1a6b2
Author: xuli 
Date:   Wed Oct 23 01:57:51 2024 +

RISC-V: Add testcases for unsigned .SAT_SUB form 2 with IMM = 1.

form2:
T __attribute__((noinline)) \
sat_u_sub_imm##IMM##_##T##_fmt_2 (T x)  \
{   \
  return x >= (T)IMM ? x - (T)IMM : 0;  \
}

Passed the rv64gcv regression test.

Signed-off-by: Li Xu 
gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat_u_sub_imm-run-5.c: add run case for imm=1.
* gcc.target/riscv/sat_u_sub_imm-run-6.c: Ditto.
* gcc.target/riscv/sat_u_sub_imm-run-7.c: Ditto.
* gcc.target/riscv/sat_u_sub_imm-run-8.c: Ditto.
* gcc.target/riscv/sat_u_sub_imm-5_3.c: New test.
* gcc.target/riscv/sat_u_sub_imm-6_3.c: New test.
* gcc.target/riscv/sat_u_sub_imm-7_3.c: New test.
* gcc.target/riscv/sat_u_sub_imm-8_1.c: New test.

Diff:
---
 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-5_3.c   | 18 ++
 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-6_3.c   | 19 +++
 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-7_3.c   | 17 +
 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-8_1.c   | 17 +
 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-run-5.c |  1 +
 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-run-6.c |  1 +
 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-run-7.c |  1 +
 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-run-8.c |  1 +
 8 files changed, 75 insertions(+)

diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-5_3.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-5_3.c
new file mode 100644
index ..42edfc59f8aa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-5_3.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-skip-if  "" { *-*-* } { "-flto" } } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_sub_imm1_uint8_t_fmt_2:
+** snez\s+[atx][0-9]+,\s*a0
+** subw\s+a0,\s*a0,\s*[atx][0-9]+
+** andi\s+a0,\s*a0,\s*0xff
+** ret
+*/
+
+DEF_SAT_U_SUB_IMM_FMT_2(uint8_t, 1)
+
+/* { dg-final { scan-rtl-dump-not ".SAT_SUB" "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-6_3.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-6_3.c
new file mode 100644
index ..5250b90418aa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-6_3.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-skip-if  "" { *-*-* } { "-flto" } } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_sub_imm1_uint16_t_fmt_2:
+** snez\s+[atx][0-9]+,\s*a0
+** subw\s+a0,\s*a0,\s*[atx][0-9]+
+** slli\s+a0,\s*a0,\s*48
+** srli\s+a0,\s*a0,\s*48
+** ret
+*/
+
+DEF_SAT_U_SUB_IMM_FMT_2(uint16_t, 1)
+
+/* { dg-final { scan-rtl-dump-not ".SAT_SUB" "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-7_3.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-7_3.c
new file mode 100644
index ..99df0e4b683b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-7_3.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-skip-if  "" { *-*-* } { "-flto" } } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_sub_imm1_uint32_t_fmt_2:
+** snez\s+[atx][0-9]+,\s*a0
+** subw\s+a0,\s*a0,\s*[atx][0-9]+
+** ret
+*/
+
+DEF_SAT_U_SUB_IMM_FMT_2(uint32_t, 1)
+
+/* { dg-final { scan-rtl-dump-not ".SAT_SUB" "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-8_1.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-8_1.c
new file mode 100644
index ..cbbc08339f4d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-8_1.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-skip-if  "" { *-*-* } { "-flto" } } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_sub_imm1_uint64_t_fmt_2:
+** snez\s+[atx][0-9]+,\s*a0
+** sub\s+a0,\s*a0,\s*[atx][0-9]+
+** ret
+*/
+
+DEF_SAT_U_SUB_IMM_FMT_2(uint64_t, 1)
+
+/* { dg-final { scan-rtl-dump-not ".SAT_SUB" "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-run-5.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-run-5.c
index 627e81bca4bd..fc3809590dee 100644
--- a/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-run-5.c
+++ b

[gcc r15-4763] Match: Simplify (x != 0 ? x + ~0 : 0) to (x - x != 0).

2024-10-29 Thread Li Xu via Gcc-cvs
https://gcc.gnu.org/g:4af8db3eca12b2db3753ce4b098cbd0ae32b4796

commit r15-4763-g4af8db3eca12b2db3753ce4b098cbd0ae32b4796
Author: xuli 
Date:   Tue Oct 22 09:48:03 2024 +

Match: Simplify (x != 0 ? x + ~0 : 0) to (x - x != 0).

When the imm operand op1=1 in the unsigned scalar sat_sub form2 below,
we can simplify (x != 0 ? x + ~0 : 0) to (x - x != 0), thereby eliminating
a branch instruction.This simplification also applies to signed integer.

Form2:
T __attribute__((noinline)) \
sat_u_sub_imm##IMM##_##T##_fmt_2 (T x)  \
{   \
  return x >= (T)IMM ? x - (T)IMM : 0;  \
}

Take below form 2 as example:
DEF_SAT_U_SUB_IMM_FMT_2(uint8_t, 1)

Before this patch:
__attribute__((noinline))
uint8_t sat_u_sub_imm1_uint8_t_fmt_2 (uint8_t x)
{
  uint8_t _1;
  uint8_t _3;

   [local count: 1073741824]:
  if (x_2(D) != 0)
goto ; [50.00%]
  else
goto ; [50.00%]

   [local count: 536870912]:
  _3 = x_2(D) + 255;

   [local count: 1073741824]:
  # _1 = PHI 
  return _1;

}

Assembly code:
sat_u_sub_imm1_uint8_t_fmt_2:
beq a0,zero,.L2
addiw   a0,a0,-1
andia0,a0,0xff
.L2:
ret

After this patch:
__attribute__((noinline))
uint8_t sat_u_sub_imm1_uint8_t_fmt_2 (uint8_t x)
{
  _Bool _1;
  unsigned char _2;
  uint8_t _4;

   [local count: 1073741824]:
  _1 = x_3(D) != 0;
  _2 = (unsigned char) _1;
  _4 = x_3(D) - _2;
  return _4;

}

Assembly code:
sat_u_sub_imm1_uint8_t_fmt_2:
sneza5,a0
subwa0,a0,a5
andia0,a0,0xff
ret

The below test suites are passed for this patch:
1. The rv64gcv fully regression tests.
2. The x86 bootstrap tests.
3. The x86 fully regression tests.

Signed-off-by: Li Xu 

gcc/ChangeLog:

* match.pd: Simplify (x != 0 ? x + ~0 : 0) to (x - x != 0).

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/phi-opt-44.c: New test.
* gcc.dg/tree-ssa/phi-opt-45.c: New test.

Diff:
---
 gcc/match.pd   | 10 ++
 gcc/testsuite/gcc.dg/tree-ssa/phi-opt-44.c | 26 ++
 gcc/testsuite/gcc.dg/tree-ssa/phi-opt-45.c | 26 ++
 3 files changed, 62 insertions(+)

diff --git a/gcc/match.pd b/gcc/match.pd
index 809c717bc862..c851ac56e37c 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3391,6 +3391,16 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
   }
   (if (wi::eq_p (sum, wi::uhwi (0, precision)))
 
+/* The boundary condition for case 10: IMM = 1:
+   SAT_U_SUB = X >= IMM ? (X - IMM) : 0.
+   simplify (X != 0 ? X + ~0 : 0) to (X - X != 0).  */
+(simplify
+ (cond (ne@1 @0 integer_zerop)
+   (nop_convert? (plus (nop_convert? @0) integer_all_onesp))
+   integer_zerop)
+ (if (INTEGRAL_TYPE_P (type))
+   (minus @0 (convert @1
+
 /* Signed saturation sub, case 1:
T minus = (T)((UT)X - (UT)Y);
SAT_S_SUB = (X ^ Y) & (X ^ minus) < 0 ? (-(T)(X < 0) ^ MAX) : minus;
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-44.c 
b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-44.c
new file mode 100644
index ..962bf0954f62
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-44.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-phiopt1" } */
+
+#include 
+
+uint8_t f1 (uint8_t x)
+{
+  return x >= (uint8_t)1 ? x - (uint8_t)1 : 0;
+}
+
+uint16_t f2 (uint16_t x)
+{
+  return x >= (uint16_t)1 ? x - (uint16_t)1 : 0;
+}
+
+uint32_t f3 (uint32_t x)
+{
+  return x >= (uint32_t)1 ? x - (uint32_t)1 : 0;
+}
+
+uint64_t f4 (uint64_t x)
+{
+  return x >= (uint64_t)1 ? x - (uint64_t)1 : 0;
+}
+
+/* { dg-final { scan-tree-dump-not "goto" "phiopt1" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-45.c 
b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-45.c
new file mode 100644
index ..62a2ab631846
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-45.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-phiopt1" } */
+
+#include 
+
+int8_t f1 (int8_t x)
+{
+  return x != 0 ? x - (int8_t)1 : 0;
+}
+
+int16_t f2 (int16_t x)
+{
+  return x != 0 ? x - (int16_t)1 : 0;
+}
+
+int32_t f3 (int32_t x)
+{
+  return x != 0 ? x - (int32_t)1 : 0;
+}
+
+int64_t f4 (int64_t x)
+{
+  return x != 0 ? x - (int64_t)1 : 0;
+}
+
+/* { dg-final { scan-tree-dump-not "goto" "phiopt1" } } */


[gcc r15-4760] diagnostics: support multiple output formats simultaneously [PR116613]

2024-10-29 Thread David Malcolm via Gcc-cvs
https://gcc.gnu.org/g:0b73e9382ab51c00a79b2a6f8abbcd31d87f6814

commit r15-4760-g0b73e9382ab51c00a79b2a6f8abbcd31d87f6814
Author: David Malcolm 
Date:   Tue Oct 29 19:12:02 2024 -0400

diagnostics: support multiple output formats simultaneously [PR116613]

This patch generalizes diagnostic_context so that rather than having
a single output format, it has a vector of zero or more.

It adds new two options:
 -fdiagnostics-add-output=DIAGNOSTICS-OUTPUT-SPEC
 -fdiagnostics-set-output=DIAGNOSTICS-OUTPUT-SPEC
which both take a new configuration syntax of the form SCHEME ("text" or
"sarif"), optionally followed by ":" and one or more KEY=VALUE pairs,
in this form:

  
  :=
  :=,=
  ...etc

where each SCHEME supports some set of keys.  For example, it's now
possible to use:

  -fdiagnostics-add-output=sarif:version=2.1,file=foo.2.1.sarif \
  -fdiagnostics-add-output=sarif:version=2.2-prerelease,file=foo.2.2.sarif

to add a pair of outputs, each writing to a different file, using
versions 2.1 and 2.2 of the SARIF standard respectively, whilst also
emitting the classic text form of the diagnostics to stderr.

I hope the new syntax gives us room to potentially add new kinds of
output sink in the future (e.g. RPC notifications), and to add new
key/value pairs as needed by the different sinks.

Implementation-wise, the diagnostic_context's m_printer which previously
was used directly by the single output format now becomes a "reference
printer", created by the client (such as the frontend), with defaults
modified by command-line options.  Each of the multiple output sinks has
its own pretty_printer instance, created by cloning the context's
reference printer.

gcc/ChangeLog:
PR other/116613
* Makefile.in (OBJS-libcommon-target): Add opts-diagnostic.o.
* common.opt (fdiagnostics-add-output=): New.
(fdiagnostics-set-output=): New.
(diagnostics_output_format): Drop sarif-file-2.2-prerelease from
enum.
* common.opt.urls: Regenerate.
* diagnostic-buffer.h (diagnostic_buffer::~diagnostic_buffer): New.
(diagnostic_buffer::ensure_per_format_buffer): Rename to...
(diagnostic_buffer::ensure_per_format_buffers): ...this.
(diagnostic_buffer::m_per_format_buffer): Replace with...
(diagnostic_buffer::m_per_format_buffers): ...this, updating type.
* diagnostic-format-json.cc (json_output_format::update_printer):
New.
(json_output_format::follows_reference_printer_p): New.
(diagnostic_output_format_init_json): Drop redundant call to
set_path_format, as this is not a text output format.
* diagnostic-format-sarif.cc: Include "diagnostic-format-text.h".
(sarif_builder::set_printer): New.
(sarif_builder::sarif_builder): Add "printer" param and use it for
m_printer.

(sarif_builder::make_location_object::escape_nonascii_renderer::render):
Rather than using dc.m_printer, create a
diagnostic_text_output_format instance and use its printer.
(sarif_output_format::follows_reference_printer_p): New.
(sarif_output_format::update_printer): New.
(sarif_output_format::sarif_output_format): Pass in correct
printer to m_builder's ctor.
(diagnostic_output_format_init_sarif): Drop redundant call to
set_path_format, as this is not a text output format.  Replace
calls to pp_show_color and set_token_printer with call to
update_printer.  Drop redundant call to set_show_highlight_colors,
as this printer does not show colors.
(diagnostic_output_format_init_sarif_file): Split out file opening
into...
(diagnostic_output_format_open_sarif_file): ...this new function.
(make_sarif_sink): New.
(selftest::test_make_location_object): Provide a pp for the
builder.
* diagnostic-format-sarif.h
(diagnostic_output_format_open_sarif_file): New decl.
(make_sarif_sink): New decl.
* diagnostic-format-text.cc (diagnostic_text_output_format::dump):
Dump sm_follows_reference_printer.
(diagnostic_text_output_format::on_report_verbatim): New.
(diagnostic_text_output_format::follows_reference_printer_p): New.
(diagnostic_text_output_format::update_printer): New.
* diagnostic-format-text.h
(diagnostic_text_output_format::diagnostic_text_output_format):
Add optional "follows_reference_printer" param.
(diagnostic_text_output_format::on_report_verbatim): New decl.
(diagnostic_text_output_format::after_diagnostic): Drop "

[gcc r15-4755] Fix miscompilation of function containing __builtin_unreachable

2024-10-29 Thread Eric Botcazou via Gcc-cvs
https://gcc.gnu.org/g:7211155732244ac527c075d033164ec253ee0971

commit r15-4755-g7211155732244ac527c075d033164ec253ee0971
Author: Eric Botcazou 
Date:   Tue Oct 29 21:40:34 2024 +0100

Fix miscompilation of function containing __builtin_unreachable

This is a wrong-code generation on the SPARC for a function containing
a call to __builtin_unreachable caused by the delay slot scheduling pass,
and more specifically the find_end_label function which has these lines:

  /* Otherwise, see if there is a label at the end of the function. If there
 is, it must be that RETURN insns aren't needed, so that is our return
 label and we don't have to do anything else.  */

The comment was correct 20 years ago but no longer is nowadays in the
presence of RTL epilogues and calls to __builtin_unreachable, so the
patch just removes the associated two lines of code:

  else if (LABEL_P (insn))
*plabel = as_a  (insn);

and otherwise contains just adjustments to the commentary.

gcc/
PR rtl-optimization/117327
* reorg.cc (find_end_label): Do not return a dangling label at the
end of the function and adjust commentary.

gcc/testsuite/
* gcc.c-torture/execute/20241029-1.c: New test.

Diff:
---
 gcc/reorg.cc | 38 ++--
 gcc/testsuite/gcc.c-torture/execute/20241029-1.c | 23 ++
 2 files changed, 38 insertions(+), 23 deletions(-)

diff --git a/gcc/reorg.cc b/gcc/reorg.cc
index 51321ce7b80e..68bf30801cfa 100644
--- a/gcc/reorg.cc
+++ b/gcc/reorg.cc
@@ -336,13 +336,14 @@ insn_sets_resource_p (rtx insn, struct resources *res,
   return resource_conflicts_p (&insn_sets, res);
 }
 
-/* Find a label at the end of the function or before a RETURN.  If there
-   is none, try to make one.  If that fails, returns 0.
+/* Find a label before a RETURN.  If there is none, try to make one; if this
+   fails, return 0.  KIND is either ret_rtx or simple_return_rtx, indicating
+   which type of RETURN we're looking for.
 
-   The property of such a label is that it is placed just before the
-   epilogue or a bare RETURN insn, so that another bare RETURN can be
-   turned into a jump to the label unconditionally.  In particular, the
-   label cannot be placed before a RETURN insn with a filled delay slot.
+   The property of the label is that it is placed just before a bare RETURN
+   insn, so that another bare RETURN can be turned into a jump to the label
+   unconditionally.  In particular, the label cannot be placed before a
+   RETURN insn with a filled delay slot.
 
??? There may be a problem with the current implementation.  Suppose
we start with a bare RETURN insn and call find_end_label.  It may set
@@ -353,9 +354,7 @@ insn_sets_resource_p (rtx insn, struct resources *res,
Note that this is probably mitigated by the following observation:
once function_return_label is made, it is very likely the target of
a jump, so filling the delay slot of the RETURN will be much more
-   difficult.
-   KIND is either simple_return_rtx or ret_rtx, indicating which type of
-   return we're looking for.  */
+   difficult.  */
 
 static rtx_code_label *
 find_end_label (rtx kind)
@@ -375,10 +374,7 @@ find_end_label (rtx kind)
   if (*plabel)
 return *plabel;
 
-  /* Otherwise, see if there is a label at the end of the function.  If there
- is, it must be that RETURN insns aren't needed, so that is our return
- label and we don't have to do anything else.  */
-
+  /* Otherwise, scan the insns backward from the end of the function.  */
   insn = get_last_insn ();
   while (NOTE_P (insn)
 || (NONJUMP_INSN_P (insn)
@@ -386,9 +382,8 @@ find_end_label (rtx kind)
 || GET_CODE (PATTERN (insn)) == CLOBBER)))
 insn = PREV_INSN (insn);
 
-  /* When a target threads its epilogue we might already have a
- suitable return insn.  If so put a label before it for the
- function_return_label.  */
+  /* First, see if there is a RETURN at the end of the function.  If so,
+ put the label before it.  */
   if (BARRIER_P (insn)
   && JUMP_P (PREV_INSN (insn))
   && PATTERN (PREV_INSN (insn)) == kind)
@@ -397,8 +392,8 @@ find_end_label (rtx kind)
   rtx_code_label *label = gen_label_rtx ();
   LABEL_NUSES (label) = 0;
 
-  /* Put the label before any USE insns that may precede the RETURN
-insn.  */
+  /* Put the label before any USE insns that may precede the
+RETURN insn.  */
   while (GET_CODE (temp) == USE)
temp = PREV_INSN (temp);
 
@@ -406,15 +401,12 @@ find_end_label (rtx kind)
   *plabel = label;
 }
 
-  else if (LABEL_P (insn))
-*plabel = as_a  (insn);
+  /* If the basic block reordering pass has moved the return insn to some
+ other place, try to locate it again and put the lab

[gcc r14-10850] Fix miscompilation of function containing __builtin_unreachable

2024-10-29 Thread Eric Botcazou via Gcc-cvs
https://gcc.gnu.org/g:0ea3dca7946a1bce072ae5f732a873ce6f207bee

commit r14-10850-g0ea3dca7946a1bce072ae5f732a873ce6f207bee
Author: Eric Botcazou 
Date:   Tue Oct 29 21:40:34 2024 +0100

Fix miscompilation of function containing __builtin_unreachable

This is a wrong-code generation on the SPARC for a function containing
a call to __builtin_unreachable caused by the delay slot scheduling pass,
and more specifically the find_end_label function which has these lines:

  /* Otherwise, see if there is a label at the end of the function. If there
 is, it must be that RETURN insns aren't needed, so that is our return
 label and we don't have to do anything else.  */

The comment was correct 20 years ago but no longer is nowadays in the
presence of RTL epilogues and calls to __builtin_unreachable, so the
patch just removes the associated two lines of code:

  else if (LABEL_P (insn))
*plabel = as_a  (insn);

and otherwise contains just adjustments to the commentary.

gcc/
PR rtl-optimization/117327
* reorg.cc (find_end_label): Do not return a dangling label at the
end of the function and adjust commentary.

gcc/testsuite/
* gcc.c-torture/execute/20241029-1.c: New test.

Diff:
---
 gcc/reorg.cc | 38 ++--
 gcc/testsuite/gcc.c-torture/execute/20241029-1.c | 23 ++
 2 files changed, 38 insertions(+), 23 deletions(-)

diff --git a/gcc/reorg.cc b/gcc/reorg.cc
index 99228a22c69e..ffbd9c20525f 100644
--- a/gcc/reorg.cc
+++ b/gcc/reorg.cc
@@ -336,13 +336,14 @@ insn_sets_resource_p (rtx insn, struct resources *res,
   return resource_conflicts_p (&insn_sets, res);
 }
 
-/* Find a label at the end of the function or before a RETURN.  If there
-   is none, try to make one.  If that fails, returns 0.
+/* Find a label before a RETURN.  If there is none, try to make one; if this
+   fails, return 0.  KIND is either ret_rtx or simple_return_rtx, indicating
+   which type of RETURN we're looking for.
 
-   The property of such a label is that it is placed just before the
-   epilogue or a bare RETURN insn, so that another bare RETURN can be
-   turned into a jump to the label unconditionally.  In particular, the
-   label cannot be placed before a RETURN insn with a filled delay slot.
+   The property of the label is that it is placed just before a bare RETURN
+   insn, so that another bare RETURN can be turned into a jump to the label
+   unconditionally.  In particular, the label cannot be placed before a
+   RETURN insn with a filled delay slot.
 
??? There may be a problem with the current implementation.  Suppose
we start with a bare RETURN insn and call find_end_label.  It may set
@@ -353,9 +354,7 @@ insn_sets_resource_p (rtx insn, struct resources *res,
Note that this is probably mitigated by the following observation:
once function_return_label is made, it is very likely the target of
a jump, so filling the delay slot of the RETURN will be much more
-   difficult.
-   KIND is either simple_return_rtx or ret_rtx, indicating which type of
-   return we're looking for.  */
+   difficult.  */
 
 static rtx_code_label *
 find_end_label (rtx kind)
@@ -375,10 +374,7 @@ find_end_label (rtx kind)
   if (*plabel)
 return *plabel;
 
-  /* Otherwise, see if there is a label at the end of the function.  If there
- is, it must be that RETURN insns aren't needed, so that is our return
- label and we don't have to do anything else.  */
-
+  /* Otherwise, scan the insns backward from the end of the function.  */
   insn = get_last_insn ();
   while (NOTE_P (insn)
 || (NONJUMP_INSN_P (insn)
@@ -386,9 +382,8 @@ find_end_label (rtx kind)
 || GET_CODE (PATTERN (insn)) == CLOBBER)))
 insn = PREV_INSN (insn);
 
-  /* When a target threads its epilogue we might already have a
- suitable return insn.  If so put a label before it for the
- function_return_label.  */
+  /* First, see if there is a RETURN at the end of the function.  If so,
+ put the label before it.  */
   if (BARRIER_P (insn)
   && JUMP_P (PREV_INSN (insn))
   && PATTERN (PREV_INSN (insn)) == kind)
@@ -397,8 +392,8 @@ find_end_label (rtx kind)
   rtx_code_label *label = gen_label_rtx ();
   LABEL_NUSES (label) = 0;
 
-  /* Put the label before any USE insns that may precede the RETURN
-insn.  */
+  /* Put the label before any USE insns that may precede the
+RETURN insn.  */
   while (GET_CODE (temp) == USE)
temp = PREV_INSN (temp);
 
@@ -406,15 +401,12 @@ find_end_label (rtx kind)
   *plabel = label;
 }
 
-  else if (LABEL_P (insn))
-*plabel = as_a  (insn);
+  /* If the basic block reordering pass has moved the return insn to some
+ other place, try to lo

[gcc(refs/users/aoliva/heads/testme)] [testsuite] disable PIE on ia32 on more tests

2024-10-29 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:9fee52ae3e798e9bd58c1cc71559c9ace1f1a69f

commit 9fee52ae3e798e9bd58c1cc71559c9ace1f1a69f
Author: Alexandre Oliva 
Date:   Tue Oct 29 18:13:06 2024 -0300

[testsuite] disable PIE on ia32 on more tests

Multiple tests fail on ia32 with -fPIE enabled by default because of
different call sequences required by the call-saved PIC register
(no-callee-saved-*.c), uses of the constant pool instead of computing
constants (pr100865-*.c), and unexpected matches of esp in get_pc_thunk
(sse2-stv-1.c).  Disable PIE on them, to match the expectations.


for  gcc/testsuite/ChangeLog

* gcc.target/i386/no-callee-saved-13.c: Disable PIE on ia32.
* gcc.target/i386/no-callee-saved-14.c: Likewise.
* gcc.target/i386/no-callee-saved-15.c: Likewise.
* gcc.target/i386/no-callee-saved-17.c: Likewise.
* gcc.target/i386/pr100865-1.c: Likewise.
* gcc.target/i386/pr100865-7a.c: Likewise.
* gcc.target/i386/pr100865-7c.c: Likewise.
* gcc.target/i386/sse2-stv-1.c: Likewise.

Diff:
---
 gcc/testsuite/gcc.target/i386/no-callee-saved-13.c | 1 +
 gcc/testsuite/gcc.target/i386/no-callee-saved-14.c | 1 +
 gcc/testsuite/gcc.target/i386/no-callee-saved-15.c | 1 +
 gcc/testsuite/gcc.target/i386/no-callee-saved-17.c | 1 +
 gcc/testsuite/gcc.target/i386/pr100865-1.c | 1 +
 gcc/testsuite/gcc.target/i386/pr100865-7a.c| 1 +
 gcc/testsuite/gcc.target/i386/pr100865-7c.c| 1 +
 gcc/testsuite/gcc.target/i386/sse2-stv-1.c | 1 +
 8 files changed, 8 insertions(+)

diff --git a/gcc/testsuite/gcc.target/i386/no-callee-saved-13.c 
b/gcc/testsuite/gcc.target/i386/no-callee-saved-13.c
index 6757e72d8487..0b59da36786a 100644
--- a/gcc/testsuite/gcc.target/i386/no-callee-saved-13.c
+++ b/gcc/testsuite/gcc.target/i386/no-callee-saved-13.c
@@ -1,5 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-O2 -mtune-ctrl=^prologue_using_move,^epilogue_using_move" } 
*/
+/* { dg-additional-options "-fno-PIE" { target ia32 } } */
 
 extern void foo (void);
 
diff --git a/gcc/testsuite/gcc.target/i386/no-callee-saved-14.c 
b/gcc/testsuite/gcc.target/i386/no-callee-saved-14.c
index 2239e286e6a6..2127b12f120b 100644
--- a/gcc/testsuite/gcc.target/i386/no-callee-saved-14.c
+++ b/gcc/testsuite/gcc.target/i386/no-callee-saved-14.c
@@ -1,5 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-O2 -mtune-ctrl=^prologue_using_move,^epilogue_using_move" } 
*/
+/* { dg-additional-options "-fno-PIE" { target ia32 } } */
 
 extern void bar (void) __attribute__ ((no_callee_saved_registers));
 
diff --git a/gcc/testsuite/gcc.target/i386/no-callee-saved-15.c 
b/gcc/testsuite/gcc.target/i386/no-callee-saved-15.c
index 10135fec9c14..65f2a9532ffd 100644
--- a/gcc/testsuite/gcc.target/i386/no-callee-saved-15.c
+++ b/gcc/testsuite/gcc.target/i386/no-callee-saved-15.c
@@ -1,5 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-O2 -mtune-ctrl=^prologue_using_move,^epilogue_using_move" } 
*/
+/* { dg-additional-options "-fno-PIE" { target ia32 } } */
 
 typedef void (*fn_t) (void) __attribute__ ((no_callee_saved_registers));
 extern fn_t bar;
diff --git a/gcc/testsuite/gcc.target/i386/no-callee-saved-17.c 
b/gcc/testsuite/gcc.target/i386/no-callee-saved-17.c
index 1fd5daadf080..1ecf4552f3d0 100644
--- a/gcc/testsuite/gcc.target/i386/no-callee-saved-17.c
+++ b/gcc/testsuite/gcc.target/i386/no-callee-saved-17.c
@@ -1,5 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-O2 -mtune-ctrl=^prologue_using_move,^epilogue_using_move" } 
*/
+/* { dg-additional-options "-fno-PIE" { target ia32 } } */
 
 extern void foo (void) __attribute__ ((no_caller_saved_registers));
 
diff --git a/gcc/testsuite/gcc.target/i386/pr100865-1.c 
b/gcc/testsuite/gcc.target/i386/pr100865-1.c
index 75cd463cbfc2..fc0a5b33950f 100644
--- a/gcc/testsuite/gcc.target/i386/pr100865-1.c
+++ b/gcc/testsuite/gcc.target/i386/pr100865-1.c
@@ -1,5 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-O2 -march=x86-64" } */
+/* { dg-additional-options "-fno-PIE" { target ia32 } } */
 
 extern char *dst;
 
diff --git a/gcc/testsuite/gcc.target/i386/pr100865-7a.c 
b/gcc/testsuite/gcc.target/i386/pr100865-7a.c
index 7de7d4a3ce3a..9fb5dc525652 100644
--- a/gcc/testsuite/gcc.target/i386/pr100865-7a.c
+++ b/gcc/testsuite/gcc.target/i386/pr100865-7a.c
@@ -1,5 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-O3 -march=skylake" } */
+/* { dg-additional-options "-fno-PIE" { target ia32 } } */
 
 extern long long int array[64];
 
diff --git a/gcc/testsuite/gcc.target/i386/pr100865-7c.c 
b/gcc/testsuite/gcc.target/i386/pr100865-7c.c
index edbfd5b09ed6..695831e59af5 100644
--- a/gcc/testsuite/gcc.target/i386/pr100865-7c.c
+++ b/gcc/testsuite/gcc.target/i386/pr100865-7c.c
@@ -1,5 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-O3 -march=skylake -mno-avx2" } */
+/* { dg-additional-options "-fno-PIE" { target ia32 } } */
 
 extern long long int array[64];
 
diff --git a

[gcc r15-4752] c++: printing AGGR_INIT_EXPR args

2024-10-29 Thread Jason Merrill via Gcc-cvs
https://gcc.gnu.org/g:e6d21cbf5cf035b6fa9946a4321f87cbcfa9f275

commit r15-4752-ge6d21cbf5cf035b6fa9946a4321f87cbcfa9f275
Author: Jason Merrill 
Date:   Tue Oct 22 17:45:00 2024 -0400

c++: printing AGGR_INIT_EXPR args

PR30854 was about wrongly dumping the dummy object argument to a
constructor; r126582 in 4.3 fixed that by skipping the first argument.  But
not all functions called by AGGR_INIT_EXPR are constructors, as observed in
PR116634; we shouldn't skip for non-member functions.  And let's combine the
printing code for CALL_EXPR and AGGR_INIT_EXPR.

This doesn't make us accept the ill-formed 116634 testcase again with a
pedwarn, just fixes the diagnostic issue.

PR c++/30854
PR c++/116634

gcc/cp/ChangeLog:

* error.cc (dump_aggr_init_expr_args): Remove.
(dump_call_expr_args): Handle AGGR_INIT_EXPR.
(dump_expr): Combine AGGR_INIT_EXPR and CALL_EXPR cases.

gcc/testsuite/ChangeLog:

* g++.dg/coroutines/coro-bad-alloc-02-no-op-new-nt.C: Adjust
diagnostic.
* g++.dg/diagnostic/aggr-init1.C: New test.

Diff:
---
 gcc/cp/error.cc| 79 +-
 .../coroutines/coro-bad-alloc-02-no-op-new-nt.C|  2 +-
 gcc/testsuite/g++.dg/diagnostic/aggr-init1.C   | 36 ++
 3 files changed, 55 insertions(+), 62 deletions(-)

diff --git a/gcc/cp/error.cc b/gcc/cp/error.cc
index 8381f9504880..4a60fac96942 100644
--- a/gcc/cp/error.cc
+++ b/gcc/cp/error.cc
@@ -84,7 +84,6 @@ static void dump_type_prefix (cxx_pretty_printer *, tree, 
int);
 static void dump_type_suffix (cxx_pretty_printer *, tree, int);
 static void dump_function_name (cxx_pretty_printer *, tree, int);
 static void dump_call_expr_args (cxx_pretty_printer *, tree, int, bool);
-static void dump_aggr_init_expr_args (cxx_pretty_printer *, tree, int, bool);
 static void dump_expr_list (cxx_pretty_printer *, tree, int);
 static void dump_global_iord (cxx_pretty_printer *, tree);
 static void dump_parameters (cxx_pretty_printer *, tree, int);
@@ -2253,46 +2252,15 @@ dump_template_parms (cxx_pretty_printer *pp, tree info,
 static void
 dump_call_expr_args (cxx_pretty_printer *pp, tree t, int flags, bool skipfirst)
 {
-  tree arg;
-  call_expr_arg_iterator iter;
+  const int len = call_expr_nargs (t);
 
   pp_cxx_left_paren (pp);
-  FOR_EACH_CALL_EXPR_ARG (arg, iter, t)
+  for (int i = skipfirst; i < len; ++i)
 {
-  if (skipfirst)
-   skipfirst = false;
-  else
-   {
- dump_expr (pp, arg, flags | TFF_EXPR_IN_PARENS);
- if (more_call_expr_args_p (&iter))
-   pp_separate_with_comma (pp);
-   }
-}
-  pp_cxx_right_paren (pp);
-}
-
-/* Print out the arguments of AGGR_INIT_EXPR T as a parenthesized list
-   using flags FLAGS.  Skip over the first argument if SKIPFIRST is
-   true.  */
-
-static void
-dump_aggr_init_expr_args (cxx_pretty_printer *pp, tree t, int flags,
-  bool skipfirst)
-{
-  tree arg;
-  aggr_init_expr_arg_iterator iter;
-
-  pp_cxx_left_paren (pp);
-  FOR_EACH_AGGR_INIT_EXPR_ARG (arg, iter, t)
-{
-  if (skipfirst)
-   skipfirst = false;
-  else
-   {
- dump_expr (pp, arg, flags | TFF_EXPR_IN_PARENS);
- if (more_aggr_init_expr_args_p (&iter))
-   pp_separate_with_comma (pp);
-   }
+  tree arg = get_nth_callarg (t, i);
+  dump_expr (pp, arg, flags | TFF_EXPR_IN_PARENS);
+  if (i + 1 < len)
+   pp_separate_with_comma (pp);
 }
   pp_cxx_right_paren (pp);
 }
@@ -2451,28 +2419,9 @@ dump_expr (cxx_pretty_printer *pp, tree t, int flags)
   break;
 
 case AGGR_INIT_EXPR:
-  {
-   tree fn = NULL_TREE;
-
-   if (TREE_CODE (AGGR_INIT_EXPR_FN (t)) == ADDR_EXPR)
- fn = TREE_OPERAND (AGGR_INIT_EXPR_FN (t), 0);
-
-   if (fn && TREE_CODE (fn) == FUNCTION_DECL)
- {
-   if (DECL_CONSTRUCTOR_P (fn))
- dump_type (pp, DECL_CONTEXT (fn), flags);
-   else
- dump_decl (pp, fn, 0);
- }
-   else
- dump_expr (pp, AGGR_INIT_EXPR_FN (t), 0);
-  }
-  dump_aggr_init_expr_args (pp, t, flags, true);
-  break;
-
 case CALL_EXPR:
   {
-   tree fn = CALL_EXPR_FN (t);
+   tree fn = cp_get_callee (t);
bool skipfirst = false;
 
/* Deal with internal functions.  */
@@ -2494,8 +2443,10 @@ dump_expr (cxx_pretty_printer *pp, tree t, int flags)
&& NEXT_CODE (fn) == METHOD_TYPE
&& call_expr_nargs (t))
  {
-   tree ob = CALL_EXPR_ARG (t, 0);
-   if (TREE_CODE (ob) == ADDR_EXPR)
+   tree ob = get_nth_callarg (t, 0);
+   if (is_dummy_object (ob))
+ /* Don't print dummy object.  */;
+   else if (TREE_CODE (ob) == ADDR_EXPR)
  {
dump_expr (pp, TREE_OPERAND (ob, 0),
   

[gcc r15-4753] c-family: Handle RAW_DATA_CST in complete_array_type [PR117313]

2024-10-29 Thread Jakub Jelinek via Gcc-cvs
https://gcc.gnu.org/g:28b7aed124d4d86be552f24469c2cfa59a49edaf

commit r15-4753-g28b7aed124d4d86be552f24469c2cfa59a49edaf
Author: Jakub Jelinek 
Date:   Tue Oct 29 20:14:09 2024 +0100

c-family: Handle RAW_DATA_CST in complete_array_type [PR117313]

The following testcase ICEs, because
add_flexible_array_elts_to_size -> complete_array_type
is done only after braced_lists_to_strings which optimizes
RAW_DATA_CST surrounded by INTEGER_CST into a larger RAW_DATA_CST
covering even the boundaries, while I thought it is done before
that.
So, RAW_DATA_CST now can be the last constructor_elt in a CONSTRUCTOR
and so we need the function to take it into account (handle it as
RAW_DATA_CST standing for RAW_DATA_LENGTH consecutive elements).

The function wants to support both CONSTRUCTORs without indexes and with
them (for non-RAW_DATA_CST elts it was just adding 1 for the current
index).  So, if the RAW_DATA_CST elt has ce->index, we need to add
RAW_DATA_LENGTH (ce->value) - 1, while if it doesn't (and it isn't cnt == 0
case where curindex is 0), add that plus 1, i.e. RAW_DATA_LENGTH 
(ce->value).

2024-10-29  Jakub Jelinek  

PR c/117313
gcc/c-family/
* c-common.cc (complete_array_type): For RAW_DATA_CST elements
advance curindex by RAW_DATA_LENGTH or one less than that if
ce->index is non-NULL.  Handle even the first element if
it is RAW_DATA_CST.  Formatting fix.
gcc/testsuite/
* c-c++-common/init-6.c: New test.

Diff:
---
 gcc/c-family/c-common.cc| 25 ++---
 gcc/testsuite/c-c++-common/init-6.c | 29 +
 2 files changed, 47 insertions(+), 7 deletions(-)

diff --git a/gcc/c-family/c-common.cc b/gcc/c-family/c-common.cc
index 88827ac2bdba..f7feb21f70e5 100644
--- a/gcc/c-family/c-common.cc
+++ b/gcc/c-family/c-common.cc
@@ -7046,7 +7046,8 @@ complete_array_type (tree *ptype, tree initial_value, 
bool do_default)
{
  int eltsize
= int_size_in_bytes (TREE_TYPE (TREE_TYPE (initial_value)));
- maxindex = size_int (TREE_STRING_LENGTH (initial_value)/eltsize - 1);
+ maxindex = size_int (TREE_STRING_LENGTH (initial_value) / eltsize
+  - 1);
}
   else if (TREE_CODE (initial_value) == CONSTRUCTOR)
{
@@ -7061,23 +7062,25 @@ complete_array_type (tree *ptype, tree initial_value, 
bool do_default)
  else
{
  tree curindex;
- unsigned HOST_WIDE_INT cnt;
+ unsigned HOST_WIDE_INT cnt = 1;
  constructor_elt *ce;
  bool fold_p = false;
 
  if ((*v)[0].index)
maxindex = (*v)[0].index, fold_p = true;
+ if (TREE_CODE ((*v)[0].value) == RAW_DATA_CST)
+   cnt = 0;
 
  curindex = maxindex;
 
- for (cnt = 1; vec_safe_iterate (v, cnt, &ce); cnt++)
+ for (; vec_safe_iterate (v, cnt, &ce); cnt++)
{
  bool curfold_p = false;
  if (ce->index)
curindex = ce->index, curfold_p = true;
- else
+ if (!ce->index || TREE_CODE (ce->value) == RAW_DATA_CST)
{
- if (fold_p)
+ if (fold_p || curfold_p)
{
  /* Since we treat size types now as ordinary
 unsigned types, we need an explicit overflow
@@ -7085,9 +7088,17 @@ complete_array_type (tree *ptype, tree initial_value, 
bool do_default)
  tree orig = curindex;
  curindex = fold_convert (sizetype, curindex);
  overflow_p |= tree_int_cst_lt (curindex, orig);
+ curfold_p = false;
}
- curindex = size_binop (PLUS_EXPR, curindex,
-size_one_node);
+ if (TREE_CODE (ce->value) == RAW_DATA_CST)
+   curindex
+ = size_binop (PLUS_EXPR, curindex,
+   size_int (RAW_DATA_LENGTH (ce->value)
+ - ((ce->index || !cnt)
+? 1 : 0)));
+ else
+   curindex = size_binop (PLUS_EXPR, curindex,
+  size_one_node);
}
  if (tree_int_cst_lt (maxindex, curindex))
maxindex = curindex, fold_p = curfold_p;
diff --git a/gcc/testsuite/c-c++-common/init-6.c 
b/gcc/testsuite/c-c++-common/init-6.c
new file mode 100644
index ..d4c323b397f4
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/init-6.c
@@ -0,0 +1,29 @@
+/* PR c/1173

[gcc r15-4749] [PATCH 2/2] RISC-V:Add intrinsic cases for the CMOs extensions

2024-10-29 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:b22d9c8f8216d15773dee4f9677c6b26aff507fd

commit r15-4749-gb22d9c8f8216d15773dee4f9677c6b26aff507fd
Author: yulong 
Date:   Tue Oct 29 08:44:45 2024 -0600

[PATCH 2/2] RISC-V:Add intrinsic cases for the CMOs extensions

gcc/testsuite/ChangeLog:

* gcc.target/riscv/cmo-32.c: New test.
* gcc.target/riscv/cmo-64.c: New test.

Diff:
---
 gcc/testsuite/gcc.target/riscv/cmo-32.c | 58 +
 gcc/testsuite/gcc.target/riscv/cmo-64.c | 58 +
 2 files changed, 116 insertions(+)

diff --git a/gcc/testsuite/gcc.target/riscv/cmo-32.c 
b/gcc/testsuite/gcc.target/riscv/cmo-32.c
new file mode 100644
index ..8e733cc05fc1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/cmo-32.c
@@ -0,0 +1,58 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target rv32} */
+/* { dg-options "-march=rv32gc_zicbom_zicbop_zicboz -mabi=ilp32" } */
+
+#include "riscv_cmo.h"
+
+void foo1 (void *addr)
+{
+__riscv_cmo_clean(0);
+__riscv_cmo_clean(addr);
+__riscv_cmo_clean((void*)0x111);
+}
+
+void foo2 (void *addr)
+{
+__riscv_cmo_flush(0);
+__riscv_cmo_flush(addr);
+__riscv_cmo_flush((void*)0x111);
+}
+
+void foo3 (void *addr)
+{
+__riscv_cmo_inval(0);
+__riscv_cmo_inval(addr);
+__riscv_cmo_inval((void*)0x111);
+}
+
+void foo4 (void *addr)
+{
+__riscv_cmo_prefetch(addr,0,0);
+__riscv_cmo_prefetch(addr,0,1);
+__riscv_cmo_prefetch(addr,0,2);
+__riscv_cmo_prefetch(addr,0,3);
+__riscv_cmo_prefetch(addr,1,0);
+__riscv_cmo_prefetch(addr,1,1);
+__riscv_cmo_prefetch(addr,1,2);
+__riscv_cmo_prefetch(addr,1,3);
+}
+
+int foo5 (int num)
+{
+return __riscv_cmo_prefetchi(num);
+}
+
+void foo6 (void *addr)
+{
+__riscv_cmo_zero(0);
+__riscv_cmo_zero(addr);
+__riscv_cmo_zero((void*)0x121);
+}
+
+/* { dg-final { scan-assembler-times "cbo.clean\t" 3 } } */
+/* { dg-final { scan-assembler-times "cbo.flush\t" 3 } } */
+/* { dg-final { scan-assembler-times "cbo.inval\t" 3 } } */
+/* { dg-final { scan-assembler-times "prefetch.r\t" 4 } } */
+/* { dg-final { scan-assembler-times "prefetch.w\t" 4 } } */
+/* { dg-final { scan-assembler-times "prefetch.i\t" 1 } } */
+/* { dg-final { scan-assembler-times "cbo.zero\t" 3 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/cmo-64.c 
b/gcc/testsuite/gcc.target/riscv/cmo-64.c
new file mode 100644
index ..e83eddbeb6f7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/cmo-64.c
@@ -0,0 +1,58 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target rv64 } */
+/* { dg-options "-march=rv64gc_zicbom_zicbop_zicboz -mabi=lp64d" } */
+
+#include "riscv_cmo.h"
+
+void foo1 (void *addr)
+{
+__riscv_cmo_clean(0);
+__riscv_cmo_clean(addr);
+__riscv_cmo_clean((void*)0x111);
+}
+
+void foo2 (void *addr)
+{
+__riscv_cmo_flush(0);
+__riscv_cmo_flush(addr);
+__riscv_cmo_flush((void*)0x111);
+}
+
+void foo3 (void *addr)
+{
+__riscv_cmo_inval(0);
+__riscv_cmo_inval(addr);
+__riscv_cmo_inval((void*)0x111);
+}
+
+void foo4 (void *addr)
+{
+__riscv_cmo_prefetch(addr,0,0);
+__riscv_cmo_prefetch(addr,0,1);
+__riscv_cmo_prefetch(addr,0,2);
+__riscv_cmo_prefetch(addr,0,3);
+__riscv_cmo_prefetch(addr,1,0);
+__riscv_cmo_prefetch(addr,1,1);
+__riscv_cmo_prefetch(addr,1,2);
+__riscv_cmo_prefetch(addr,1,3);
+}
+
+int foo5 (int num)
+{
+return __riscv_cmo_prefetchi(num);
+}
+
+void foo6 (void *addr)
+{
+__riscv_cmo_zero(0);
+__riscv_cmo_zero(addr);
+__riscv_cmo_zero((void*)0x121);
+}
+
+/* { dg-final { scan-assembler-times "cbo.clean\t" 3 } } */
+/* { dg-final { scan-assembler-times "cbo.flush\t" 3 } } */
+/* { dg-final { scan-assembler-times "cbo.inval\t" 3 } } */
+/* { dg-final { scan-assembler-times "prefetch.r\t" 4 } } */
+/* { dg-final { scan-assembler-times "prefetch.w\t" 4 } } */
+/* { dg-final { scan-assembler-times "prefetch.i\t" 1 } } */
+/* { dg-final { scan-assembler-times "cbo.zero\t" 3 } } */


[gcc r15-4750] testcase: Add testcase for tree-optimization/117341

2024-10-29 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:17f6add3aba96681673b78862116a85d619cd806

commit r15-4750-g17f6add3aba96681673b78862116a85d619cd806
Author: Andrew Pinski 
Date:   Mon Oct 28 22:05:08 2024 -0700

testcase: Add testcase for tree-optimization/117341

Even though PR 117341 was a duplicate of PR 116768, another
testcase this time C++ does not hurt to have.
The testcase is a self-contained and does not use directly libstdc++
except for operator new (it does not even call delete).

Tested on x86_64-linux-gnu with it working.

PR tree-optimization/117341

gcc/testsuite/ChangeLog:

* g++.dg/torture/pr117341-1.C: New test.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/testsuite/g++.dg/torture/pr117341-1.C | 54 +++
 1 file changed, 54 insertions(+)

diff --git a/gcc/testsuite/g++.dg/torture/pr117341-1.C 
b/gcc/testsuite/g++.dg/torture/pr117341-1.C
new file mode 100644
index ..b13d2502e351
--- /dev/null
+++ b/gcc/testsuite/g++.dg/torture/pr117341-1.C
@@ -0,0 +1,54 @@
+void swap(long &a, long &b)
+{
+  long t = a;
+  a = b;
+  b = t;
+}
+
+struct Array {
+long arr[1];
+Array() : arr() {}
+/* Operators */
+long& operator[](int index) { return arr[index]; }
+const long& operator[](int index) const { return arr[index]; }
+/* Operations */
+void swap(Array& array)  {
+for (int i = 0; i < 1; ++i)
+::swap(arr[i], array[i]);
+}
+};
+
+class Vector : public Array {};
+
+struct v
+{
+  Vector *e;
+  v() : e (new Vector[4]){}
+  Vector& operator[](int index) { return e[index]; }
+  const Vector& operator[](int index) const { return e[index]; }
+};
+static inline Vector func(const Vector& y)
+{
+return y;
+}
+
+volatile int a;
+
+int main() {
+v solution;
+solution[0][0] = 1;
+int t = a;
+for (int i = 0; i < 3; ++i) {
+const Vector& v = solution[i];
+Vector sum;
+const long delta = func(v)[0] & t;
+sum[0] = v[0] + delta;
+solution[i + 1].swap(sum);
+}
+for(int i = 0; i < 4; i++)
+{
+  if (solution[i][0] != 1)
+__builtin_abort();
+}
+return 0;
+}


[gcc r15-4751] [RISC-V] RISC-V: Add implication for M extension.

2024-10-29 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:f003834badbfd9d0c0ad132de8b2f3d550ed120f

commit r15-4751-gf003834badbfd9d0c0ad132de8b2f3d550ed120f
Author: Tsung Chun Lin 
Date:   Tue Oct 29 09:47:57 2024 -0600

[RISC-V] RISC-V: Add implication for M extension.

That M implies Zmmul.

gcc/ChangeLog:

* common/config/riscv/riscv-common.cc: M implies Zmmul.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/attribute-15.c: Add _zmmul1p0 to arch string.
* gcc.target/riscv/attribute-16.c: Ditto.
* gcc.target/riscv/attribute-17.c: Ditto.
* gcc.target/riscv/attribute-18.c: Ditto.
* gcc.target/riscv/attribute-19.c: Ditto.
* gcc.target/riscv/pr110696.c: Ditto.
* gcc.target/riscv/target-attr-01.c: Ditto.
* gcc.target/riscv/target-attr-02.c: Ditto.
* gcc.target/riscv/target-attr-03.c: Ditto.
* gcc.target/riscv/target-attr-04.c: Ditto.
* gcc.target/riscv/target-attr-08.c: Ditto.
* gcc.target/riscv/target-attr-11.c: Ditto.
* gcc.target/riscv/target-attr-14.c: Ditto.
* gcc.target/riscv/target-attr-15.c: Ditto.
* gcc.target/riscv/target-attr-16.c: Ditto.
* gcc.target/riscv/rvv/base/pr114352-1.c: Likewise.
* gcc.target/riscv/rvv/base/pr114352-3.c: Likewise.
* gcc.dg/pr90838.c: Fix search string for rv64.

Co-Authored-By: Jeff Law  

Diff:
---
 gcc/common/config/riscv/riscv-common.cc  | 2 ++
 gcc/testsuite/gcc.dg/pr90838.c   | 2 +-
 gcc/testsuite/gcc.target/riscv/attribute-15.c| 2 +-
 gcc/testsuite/gcc.target/riscv/attribute-16.c| 2 +-
 gcc/testsuite/gcc.target/riscv/attribute-17.c| 2 +-
 gcc/testsuite/gcc.target/riscv/attribute-18.c| 2 +-
 gcc/testsuite/gcc.target/riscv/attribute-19.c| 2 +-
 gcc/testsuite/gcc.target/riscv/pr110696.c| 2 +-
 gcc/testsuite/gcc.target/riscv/rvv/base/pr114352-1.c | 4 ++--
 gcc/testsuite/gcc.target/riscv/rvv/base/pr114352-3.c | 8 
 gcc/testsuite/gcc.target/riscv/target-attr-01.c  | 2 +-
 gcc/testsuite/gcc.target/riscv/target-attr-02.c  | 2 +-
 gcc/testsuite/gcc.target/riscv/target-attr-03.c  | 2 +-
 gcc/testsuite/gcc.target/riscv/target-attr-04.c  | 2 +-
 gcc/testsuite/gcc.target/riscv/target-attr-08.c  | 2 +-
 gcc/testsuite/gcc.target/riscv/target-attr-11.c  | 2 +-
 gcc/testsuite/gcc.target/riscv/target-attr-14.c  | 4 ++--
 gcc/testsuite/gcc.target/riscv/target-attr-15.c  | 4 ++--
 gcc/testsuite/gcc.target/riscv/target-attr-16.c  | 4 ++--
 19 files changed, 27 insertions(+), 25 deletions(-)

diff --git a/gcc/common/config/riscv/riscv-common.cc 
b/gcc/common/config/riscv/riscv-common.cc
index 2adebe0b6f29..60595a3e3561 100644
--- a/gcc/common/config/riscv/riscv-common.cc
+++ b/gcc/common/config/riscv/riscv-common.cc
@@ -75,6 +75,8 @@ struct riscv_implied_info_t
 /* Implied ISA info, must end with NULL sentinel.  */
 static const riscv_implied_info_t riscv_implied_info[] =
 {
+  {"m", "zmmul"},
+
   {"d", "f"},
   {"f", "zicsr"},
   {"d", "zicsr"},
diff --git a/gcc/testsuite/gcc.dg/pr90838.c b/gcc/testsuite/gcc.dg/pr90838.c
index 40aad70499d8..db7bcec3ea9b 100644
--- a/gcc/testsuite/gcc.dg/pr90838.c
+++ b/gcc/testsuite/gcc.dg/pr90838.c
@@ -77,7 +77,7 @@ int ctz4 (unsigned long x)
 /* { dg-final { scan-assembler-times "ctz\t"  1 { target { rv64 } } } } */
 /* { dg-final { scan-assembler-times "ctzw\t" 3 { target { rv64 } } } } */
 /* { dg-final { scan-assembler-times "andi\t" 2 { target { rv64 } } } } */
-/* { dg-final { scan-assembler-not "mul" { target { rv64 } } } } */
+/* { dg-final { scan-assembler-not "mul\t" { target { rv64 } } } } */
 
 /* { dg-final { scan-tree-dump-times {= \.CTZ} 3 "forwprop2" { target { rv32 } 
} } } */
 /* { dg-final { scan-assembler-times "ctz\t" 3 { target { rv32 } } } } */
diff --git a/gcc/testsuite/gcc.target/riscv/attribute-15.c 
b/gcc/testsuite/gcc.target/riscv/attribute-15.c
index ac6caaecd4f7..d7a70e86aa1f 100644
--- a/gcc/testsuite/gcc.target/riscv/attribute-15.c
+++ b/gcc/testsuite/gcc.target/riscv/attribute-15.c
@@ -3,4 +3,4 @@
 int foo()
 {
 }
-/* { dg-final { scan-assembler ".attribute arch, 
\"rv32i2p0_m2p0_a2p0_f2p0_d2p0_c2p0_zaamo1p0_zalrsc1p0_zca1p0_zcd1p0_zcf1p0\"" 
} } */
+/* { dg-final { scan-assembler ".attribute arch, 
\"rv32i2p0_m2p0_a2p0_f2p0_d2p0_c2p0_zmmul1p0_zaamo1p0_zalrsc1p0_zca1p0_zcd1p0_zcf1p0\""
 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/attribute-16.c 
b/gcc/testsuite/gcc.target/riscv/attribute-16.c
index 539e426ca976..4818cbe90d48 100644
--- a/gcc/testsuite/gcc.target/riscv/attribute-16.c
+++ b/gcc/testsuite/gcc.target/riscv/attribute-16.c
@@ -3,4 +3,4 @@
 int foo()
 {
 }
-/* { dg-final { scan-assembler ".attribute arch, 
\"rv32i2p1_m2p0_a2p0_f2p2_d2p2_c2p0_zicsr2p0_zifencei2p0_zaamo1p0_zalrsc1p0_zca1p0_zcd1p0_zcf1p0\""
 } } */
+/* { dg-final { scan-

[gcc r15-4746] RISC-V: Implement the MASK_LEN_STRIDED_LOAD{STORE}

2024-10-29 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:30435cc261071d389d9a210f598170ecdd5ea13c

commit r15-4746-g30435cc261071d389d9a210f598170ecdd5ea13c
Author: Pan Li 
Date:   Wed Oct 23 16:46:53 2024 +0800

RISC-V: Implement the MASK_LEN_STRIDED_LOAD{STORE}

This patch would like to implment the MASK_LEN_STRIDED_LOAD{STORE} in
the RISC-V backend by leveraging the vector strided load/store insn.

For example:
void foo (int * __restrict a, int * __restrict b, int stride, int n)
{
for (int i = 0; i < n; i++)
  a[i*stride] = b[i*stride] + 100;
}

Before this patch:
  38   │ vsetvli a5,a3,e32,m1,ta,ma
  39   │ vluxei64.v  v1,(a1),v4
  40   │ mul a4,a2,a5
  41   │ sub a3,a3,a5
  42   │ vadd.vv v1,v1,v2
  43   │ vsuxei64.v  v1,(a0),v4
  44   │ add a1,a1,a4
  45   │ add a0,a0,a4

After this patch:
  33   │ vsetvli a5,a3,e32,m1,ta,ma
  34   │ vlse32.vv1,0(a1),a2
  35   │ mul a4,a2,a5
  36   │ sub a3,a3,a5
  37   │ vadd.vv v1,v1,v2
  38   │ vsse32.vv1,0(a0),a2
  39   │ add a1,a1,a4
  40   │ add a0,a0,a4

The below test suites are passed for this patch:
* The riscv fully regression test.

gcc/ChangeLog:

* config/riscv/autovec.md (mask_len_strided_load_): Add
new pattern for MASK_LEN_STRIDED_LOAD.
(mask_len_strided_store_): Ditto but for store.
* config/riscv/riscv-protos.h (expand_strided_load): Add new
func decl to expand strided load.
(expand_strided_store): Ditto but for store.
* config/riscv/riscv-v.cc (expand_strided_load): Add new
func impl to expand strided load.
(expand_strided_store): Ditto but for store.

Signed-off-by: Pan Li 
Co-Authored-By: Juzhe-Zhong 

Diff:
---
 gcc/config/riscv/autovec.md | 29 +++
 gcc/config/riscv/riscv-protos.h |  2 ++
 gcc/config/riscv/riscv-v.cc | 52 +
 3 files changed, 83 insertions(+)

diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index 774a3d337231..1f1849d52372 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -2889,3 +2889,32 @@
 DONE;
   }
 )
+
+;; =
+;; == Strided Load/Store
+;; =
+(define_expand "mask_len_strided_load_"
+  [(match_operand:V 0 "register_operand")
+   (match_operand   1 "pmode_reg_or_0_operand")
+   (match_operand   2 "pmode_reg_or_0_operand")
+   (match_operand:  3 "vector_mask_operand")
+   (match_operand   4 "autovec_length_operand")
+   (match_operand   5 "const_0_operand")]
+  "TARGET_VECTOR"
+  {
+riscv_vector::expand_strided_load (mode, operands);
+DONE;
+  })
+
+(define_expand "mask_len_strided_store_"
+  [(match_operand   0 "pmode_reg_or_0_operand")
+   (match_operand   1 "pmode_reg_or_0_operand")
+   (match_operand:V 2 "register_operand")
+   (match_operand:  3 "vector_mask_operand")
+   (match_operand   4 "autovec_length_operand")
+   (match_operand   5 "const_0_operand")]
+  "TARGET_VECTOR"
+  {
+riscv_vector::expand_strided_store (mode, operands);
+DONE;
+  })
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 54f472afd8d0..0a6b43f0c767 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -700,6 +700,8 @@ bool expand_strcmp (rtx, rtx, rtx, rtx, unsigned 
HOST_WIDE_INT, bool);
 void emit_vec_extract (rtx, rtx, rtx);
 bool expand_vec_setmem (rtx, rtx, rtx);
 bool expand_vec_cmpmem (rtx, rtx, rtx, rtx);
+void expand_strided_load (machine_mode, rtx *);
+void expand_strided_store (machine_mode, rtx *);
 
 /* Rounding mode bitfield for fixed point VXRM.  */
 enum fixed_point_rounding_mode
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index c48b87278a31..209b7ee88f18 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -3833,6 +3833,58 @@ expand_load_store (rtx *ops, bool is_load)
 }
 }
 
+/* Expand MASK_LEN_STRIDED_LOAD.  */
+void
+expand_strided_load (machine_mode mode, rtx *ops)
+{
+  rtx v_reg = ops[0];
+  rtx base = ops[1];
+  rtx stride = ops[2];
+  rtx mask = ops[3];
+  rtx len = ops[4];
+  poly_int64 len_val;
+
+  insn_code icode = code_for_pred_strided_load (mode);
+  rtx emit_ops[] = {v_reg, mask, gen_rtx_MEM (mode, base), stride};
+
+  if (poly_int_rtx_p (len, &len_val)
+  && known_eq (len_val, GET_MODE_NUNITS (mode)))
+emit_vlmax_insn (icode, BINARY_OP_TAMA, emit_ops);
+  else
+{
+  len = satisfies_constraint_K (len) ? len : force_reg (Pmode, len);
+  emit_nonvlmax_insn (icode, BINARY_OP_TAMA, emit_ops, len);
+}
+}
+
+/* Expand MASK_LEN_STRIDED_STORE.  */
+void
+ex

[gcc r15-4731] arm: [MVE intrinsics] Add load_ext intrinsic shape

2024-10-29 Thread Christophe Lyon via Gcc-cvs
https://gcc.gnu.org/g:c31cdc3d85e365ce0d233fe40bee40a8bd672b11

commit r15-4731-gc31cdc3d85e365ce0d233fe40bee40a8bd672b11
Author: Alfie Richards 
Date:   Wed Sep 11 12:32:06 2024 +0200

arm: [MVE intrinsics] Add load_ext intrinsic shape

This patch adds the extending load shape.
It also adds/fixes comments for the load and store shapes.

2024-09-11  Alfie Richards 
Christophe Lyon  

gcc/
* config/arm/arm-mve-builtins-shapes.cc:
(load_ext): New.
* config/arm/arm-mve-builtins-shapes.h:
(load_ext): New.

Diff:
---
 gcc/config/arm/arm-mve-builtins-shapes.cc | 30 +++---
 gcc/config/arm/arm-mve-builtins-shapes.h  |  1 +
 2 files changed, 28 insertions(+), 3 deletions(-)

diff --git a/gcc/config/arm/arm-mve-builtins-shapes.cc 
b/gcc/config/arm/arm-mve-builtins-shapes.cc
index 0a108cf0127e..12e62122ae4e 100644
--- a/gcc/config/arm/arm-mve-builtins-shapes.cc
+++ b/gcc/config/arm/arm-mve-builtins-shapes.cc
@@ -1461,7 +1461,9 @@ struct inherent_def : public nonoverloaded_base
 };
 SHAPE (inherent)
 
-/* sv_t svfoo[_t0](const _t *)
+/* _t vfoo[_t0](const _t *)
+
+   where  is the scalar name of .
 
Example: vld1q.
int8x16_t [__arm_]vld1q[_s8](int8_t const *base)
@@ -1493,6 +1495,24 @@ struct load_def : public overloaded_base<0>
 };
 SHAPE (load)
 
+/* _t foo_t0 (const _t *)
+
+   where  is determined by the function base name.
+
+   Example: vldrq.
+   int32x4_t [__arm_]vldrwq_s32 (int32_t const *base)
+   uint32x4_t [__arm_]vldrhq_z_u32 (uint16_t const *base, mve_pred16_t p)  */
+struct load_ext_def : public nonoverloaded_base
+{
+  void
+  build (function_builder &b, const function_group_info &group,
+bool preserve_user_namespace) const override
+  {
+build_all (b, "t0,al", group, MODE_none, preserve_user_namespace);
+  }
+};
+SHAPE (load_ext)
+
 /* _t vfoo[_t0](_t)
_t vfoo_n_t0(_t)
 
@@ -1542,14 +1562,18 @@ struct mvn_def : public overloaded_base<0>
 };
 SHAPE (mvn)
 
-/* void vfoo[_t0](_t *, v[xN]_t)
+/* void vfoo[_t0](_t *, [xN]_t)
 
where  might be tied to  (for non-truncating stores) or might
depend on the function base name (for truncating stores).
 
Example: vst1q.
void [__arm_]vst1q[_s8](int8_t *base, int8x16_t value)
-   void [__arm_]vst1q_p[_s8](int8_t *base, int8x16_t value, mve_pred16_t p)  */
+   void [__arm_]vst1q_p[_s8](int8_t *base, int8x16_t value, mve_pred16_t p)
+
+   Example: vstrb.
+   void [__arm_]vstrbq[_s16](int8_t *base, int16x8_t value)
+   void [__arm_]vstrbq_p[_s16](int8_t *base, int16x8_t value, mve_pred16_t p)  
*/
 struct store_def : public overloaded_base<0>
 {
   void
diff --git a/gcc/config/arm/arm-mve-builtins-shapes.h 
b/gcc/config/arm/arm-mve-builtins-shapes.h
index e53381d8f36c..db7c63117289 100644
--- a/gcc/config/arm/arm-mve-builtins-shapes.h
+++ b/gcc/config/arm/arm-mve-builtins-shapes.h
@@ -62,6 +62,7 @@ namespace arm_mve
 extern const function_shape *const create;
 extern const function_shape *const inherent;
 extern const function_shape *const load;
+extern const function_shape *const load_ext;
 extern const function_shape *const mvn;
 extern const function_shape *const store;
 extern const function_shape *const ternary;


[gcc r15-4733] arm: [MVE intrinsics] Add support for predicated contiguous loads and stores

2024-10-29 Thread Christophe Lyon via Gcc-cvs
https://gcc.gnu.org/g:16ee5c64e60b388df40a2b91d4a145159629cbcd

commit r15-4733-g16ee5c64e60b388df40a2b91d4a145159629cbcd
Author: Alfie Richards 
Date:   Wed Sep 11 14:56:28 2024 +0200

arm: [MVE intrinsics] Add support for predicated contiguous loads and stores

This patch extends
function_expander::use_contiguous_load_insn and
function_expander::use_contiguous_store_insn functions to
support predicated versions.

2024-09-11  Alfie Richards  
Christophe Lyon  

gcc/

* config/arm/arm-mve-builtins.cc
(function_expander::use_contiguous_load_insn): Add support for
PRED_z.
(function_expander::use_contiguous_store_insn): Add support for
PRED_p.

Diff:
---
 gcc/config/arm/arm-mve-builtins.cc | 4 
 1 file changed, 4 insertions(+)

diff --git a/gcc/config/arm/arm-mve-builtins.cc 
b/gcc/config/arm/arm-mve-builtins.cc
index 804eb88b6a5e..2bde82215642 100644
--- a/gcc/config/arm/arm-mve-builtins.cc
+++ b/gcc/config/arm/arm-mve-builtins.cc
@@ -2327,6 +2327,8 @@ function_expander::use_contiguous_load_insn (insn_code 
icode)
 
   add_output_operand (icode);
   add_mem_operand (mem_mode, get_contiguous_base ());
+  if (pred == PRED_z)
+add_input_operand (icode, args[1]);
   return generate_insn (icode);
 }
 
@@ -2339,6 +2341,8 @@ function_expander::use_contiguous_store_insn (insn_code 
icode)
 
   add_mem_operand (mem_mode, get_contiguous_base ());
   add_input_operand (icode, args[1]);
+  if (pred == PRED_p)
+add_input_operand (icode, args[2]);
   return generate_insn (icode);
 }


[gcc r15-4747] RISC-V: Add testcases for form 1 of MASK_LEN_STRIDED_LOAD{STORE}

2024-10-29 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:072d6bb67a51ceb9d7056f479f15f4c9f3b50b20

commit r15-4747-g072d6bb67a51ceb9d7056f479f15f4c9f3b50b20
Author: Pan Li 
Date:   Wed Oct 23 16:52:01 2024 +0800

RISC-V: Add testcases for form 1 of MASK_LEN_STRIDED_LOAD{STORE}

Form 1:
  void __attribute__((noinline))\
  vec_strided_load_store_##T##_form_1 (T *restrict out, T *restrict in, \
   long stride, size_t size)\
  { \
for (size_t i = 0; i < size; i++)   \
  out[i * stride] = in[i * stride]; \
  }

The below test suites are passed for this patch:
* The riscv fully regression test.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/rvv.exp: Add strided folder.
* gcc.target/riscv/rvv/autovec/strided/strided_ld_st-1-f16.c: New 
test.
* gcc.target/riscv/rvv/autovec/strided/strided_ld_st-1-f32.c: New 
test.
* gcc.target/riscv/rvv/autovec/strided/strided_ld_st-1-f64.c: New 
test.
* gcc.target/riscv/rvv/autovec/strided/strided_ld_st-1-i16.c: New 
test.
* gcc.target/riscv/rvv/autovec/strided/strided_ld_st-1-i32.c: New 
test.
* gcc.target/riscv/rvv/autovec/strided/strided_ld_st-1-i64.c: New 
test.
* gcc.target/riscv/rvv/autovec/strided/strided_ld_st-1-i8.c: New 
test.
* gcc.target/riscv/rvv/autovec/strided/strided_ld_st-1-u16.c: New 
test.
* gcc.target/riscv/rvv/autovec/strided/strided_ld_st-1-u32.c: New 
test.
* gcc.target/riscv/rvv/autovec/strided/strided_ld_st-1-u64.c: New 
test.
* gcc.target/riscv/rvv/autovec/strided/strided_ld_st-1-u8.c: New 
test.
* gcc.target/riscv/rvv/autovec/strided/strided_ld_st-run-1-f16.c: 
New test.
* gcc.target/riscv/rvv/autovec/strided/strided_ld_st-run-1-f32.c: 
New test.
* gcc.target/riscv/rvv/autovec/strided/strided_ld_st-run-1-f64.c: 
New test.
* gcc.target/riscv/rvv/autovec/strided/strided_ld_st-run-1-i16.c: 
New test.
* gcc.target/riscv/rvv/autovec/strided/strided_ld_st-run-1-i32.c: 
New test.
* gcc.target/riscv/rvv/autovec/strided/strided_ld_st-run-1-i64.c: 
New test.
* gcc.target/riscv/rvv/autovec/strided/strided_ld_st-run-1-i8.c: 
New test.
* gcc.target/riscv/rvv/autovec/strided/strided_ld_st-run-1-u16.c: 
New test.
* gcc.target/riscv/rvv/autovec/strided/strided_ld_st-run-1-u32.c: 
New test.
* gcc.target/riscv/rvv/autovec/strided/strided_ld_st-run-1-u64.c: 
New test.
* gcc.target/riscv/rvv/autovec/strided/strided_ld_st-run-1-u8.c: 
New test.
* gcc.target/riscv/rvv/autovec/strided/strided_ld_st.h: New test.
* gcc.target/riscv/rvv/autovec/strided/strided_ld_st_data.h: New 
test.
* gcc.target/riscv/rvv/autovec/strided/strided_ld_st_run.h: New 
test.

Signed-off-by: Pan Li 
Co-Authored-By: Juzhe-Zhong 

Diff:
---
 .../rvv/autovec/strided/strided_ld_st-1-f16.c  |   11 +
 .../rvv/autovec/strided/strided_ld_st-1-f32.c  |   11 +
 .../rvv/autovec/strided/strided_ld_st-1-f64.c  |   11 +
 .../rvv/autovec/strided/strided_ld_st-1-i16.c  |   11 +
 .../rvv/autovec/strided/strided_ld_st-1-i32.c  |   11 +
 .../rvv/autovec/strided/strided_ld_st-1-i64.c  |   11 +
 .../riscv/rvv/autovec/strided/strided_ld_st-1-i8.c |   11 +
 .../rvv/autovec/strided/strided_ld_st-1-u16.c  |   11 +
 .../rvv/autovec/strided/strided_ld_st-1-u32.c  |   11 +
 .../rvv/autovec/strided/strided_ld_st-1-u64.c  |   11 +
 .../riscv/rvv/autovec/strided/strided_ld_st-1-u8.c |   11 +
 .../rvv/autovec/strided/strided_ld_st-run-1-f16.c  |   15 +
 .../rvv/autovec/strided/strided_ld_st-run-1-f32.c  |   15 +
 .../rvv/autovec/strided/strided_ld_st-run-1-f64.c  |   15 +
 .../rvv/autovec/strided/strided_ld_st-run-1-i16.c  |   15 +
 .../rvv/autovec/strided/strided_ld_st-run-1-i32.c  |   15 +
 .../rvv/autovec/strided/strided_ld_st-run-1-i64.c  |   15 +
 .../rvv/autovec/strided/strided_ld_st-run-1-i8.c   |   15 +
 .../rvv/autovec/strided/strided_ld_st-run-1-u16.c  |   15 +
 .../rvv/autovec/strided/strided_ld_st-run-1-u32.c  |   15 +
 .../rvv/autovec/strided/strided_ld_st-run-1-u64.c  |   15 +
 .../rvv/autovec/strided/strided_ld_st-run-1-u8.c   |   15 +
 .../riscv/rvv/autovec/strided/strided_ld_st.h  |   22 +
 .../riscv/rvv/autovec/strided/strided_ld_st_data.h | 1145 
 .../riscv/rvv/autovec/strided/strided_ld_st_run.h  |   27 +
 gcc/testsuite/gcc.target/riscv/rvv/rvv.exp |2 +
 26 files changed, 1482 insertions(+)

diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/strided/strided_ld_st-1-f16.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/strided/strided_ld_st-1-f16.c
new file mode 100644

[gcc r15-4748] [PATCH 1/2] RISC-V:Add intrinsic support for the CMOs extensions

2024-10-29 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:d2c8548e0ce51dac6bc51d37236c50f98fca82f0

commit r15-4748-gd2c8548e0ce51dac6bc51d37236c50f98fca82f0
Author: yulong 
Date:   Tue Oct 29 08:43:42 2024 -0600

[PATCH 1/2] RISC-V:Add intrinsic support for the CMOs extensions

gcc/ChangeLog:

* config.gcc: Add riscv_cmo.h.
* config/riscv/riscv_cmo.h: New file.

Diff:
---
 gcc/config.gcc   |  2 +-
 gcc/config/riscv/riscv_cmo.h | 84 
 2 files changed, 85 insertions(+), 1 deletion(-)

diff --git a/gcc/config.gcc b/gcc/config.gcc
index fd8482287228..e2ed3b309cc2 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -558,7 +558,7 @@ riscv*)
extra_objs="${extra_objs} riscv-vector-builtins.o 
riscv-vector-builtins-shapes.o riscv-vector-builtins-bases.o"
extra_objs="${extra_objs} thead.o riscv-target-attr.o"
d_target_objs="riscv-d.o"
-   extra_headers="riscv_vector.h riscv_crypto.h riscv_bitmanip.h 
riscv_th_vector.h"
+   extra_headers="riscv_vector.h riscv_crypto.h riscv_bitmanip.h 
riscv_th_vector.h riscv_cmo.h"
target_gtfiles="$target_gtfiles 
\$(srcdir)/config/riscv/riscv-vector-builtins.cc"
target_gtfiles="$target_gtfiles 
\$(srcdir)/config/riscv/riscv-vector-builtins.h"
;;
diff --git a/gcc/config/riscv/riscv_cmo.h b/gcc/config/riscv/riscv_cmo.h
new file mode 100644
index ..3514fd3f0fe4
--- /dev/null
+++ b/gcc/config/riscv/riscv_cmo.h
@@ -0,0 +1,84 @@
+/* RISC-V CMO Extension intrinsics include file.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   .  */
+
+#ifndef __RISCV_CMO_H
+#define __RISCV_CMO_H
+
+#if defined (__riscv_zicbom)
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__riscv_cmo_clean (void *addr)
+{
+__builtin_riscv_zicbom_cbo_clean (addr);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__riscv_cmo_flush (void *addr)
+{
+__builtin_riscv_zicbom_cbo_flush (addr);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__riscv_cmo_inval (void *addr)
+{
+__builtin_riscv_zicbom_cbo_inval (addr);
+}
+
+#endif // __riscv_zicbom
+
+#if defined (__riscv_zicbop)
+
+# define rnum 1
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__riscv_cmo_prefetch (void *addr, const int vs1, const int vs2)
+{
+__builtin_prefetch (addr,vs1,vs2);
+}
+
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__riscv_cmo_prefetchi ()
+{
+return __builtin_riscv_zicbop_cbo_prefetchi (rnum);
+}
+
+#endif // __riscv_zicbop
+
+#if defined (__riscv_zicboz)
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__riscv_cmo_zero (void *addr)
+{
+__builtin_riscv_zicboz_cbo_zero (addr);
+}
+
+#endif // __riscv_zicboz
+
+#endif // __RISCV_CMO_H


[gcc r13-9154] rs6000: Fix PTImode handling in power8 swap optimization pass [PR116415]

2024-10-29 Thread Peter Bergner via Gcc-cvs
https://gcc.gnu.org/g:507ed9118b29d7a5a2b751876bec9a1f5009de01

commit r13-9154-g507ed9118b29d7a5a2b751876bec9a1f5009de01
Author: Peter Bergner 
Date:   Fri Aug 23 11:45:40 2024 -0500

rs6000: Fix PTImode handling in power8 swap optimization pass [PR116415]

Our power8 swap optimization pass has some special handling for optimizing
swaps of TImode variables.  The test case reported in bugzilla uses a call
to  __atomic_compare_exchange, which introduces a variable of PTImode and
that does not get the same treatment as TImode leading to wrong code
generation.  The simple fix is to treat PTImode identically to TImode.

2024-08-23  Peter Bergner  

gcc/
PR target/116415
* config/rs6000/rs6000.h (TI_OR_PTI_MODE): New define.
* config/rs6000/rs6000-p8swap.cc (rs6000_analyze_swaps): Use it to
handle PTImode identically to TImode.

gcc/testsuite/
PR target/116415
* gcc.target/powerpc/pr116415.c: New test.

(cherry picked from commit 6e68c3df1540c5bafbb47343698bf4e270333fdb)

Diff:
---
 gcc/config/rs6000/rs6000-p8swap.cc  |  8 +++---
 gcc/config/rs6000/rs6000.h  |  2 ++
 gcc/testsuite/gcc.target/powerpc/pr116415.c | 42 +
 3 files changed, 48 insertions(+), 4 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-p8swap.cc 
b/gcc/config/rs6000/rs6000-p8swap.cc
index 0388b9bd7366..ca03b0205206 100644
--- a/gcc/config/rs6000/rs6000-p8swap.cc
+++ b/gcc/config/rs6000/rs6000-p8swap.cc
@@ -2469,10 +2469,10 @@ rs6000_analyze_swaps (function *fun)
mode = V4SImode;
}
 
- if (ALTIVEC_OR_VSX_VECTOR_MODE (mode) || mode == TImode)
+ if (ALTIVEC_OR_VSX_VECTOR_MODE (mode) || TI_OR_PTI_MODE (mode))
{
  insn_entry[uid].is_relevant = 1;
- if (mode == TImode || mode == V1TImode
+ if (TI_OR_PTI_MODE (mode) || mode == V1TImode
  || FLOAT128_VECTOR_P (mode))
insn_entry[uid].is_128_int = 1;
  if (DF_REF_INSN_INFO (mention))
@@ -2497,10 +2497,10 @@ rs6000_analyze_swaps (function *fun)
  && ALTIVEC_OR_VSX_VECTOR_MODE (GET_MODE (SET_DEST (insn
mode = GET_MODE (SET_DEST (insn));
 
- if (ALTIVEC_OR_VSX_VECTOR_MODE (mode) || mode == TImode)
+ if (ALTIVEC_OR_VSX_VECTOR_MODE (mode) || TI_OR_PTI_MODE (mode))
{
  insn_entry[uid].is_relevant = 1;
- if (mode == TImode || mode == V1TImode
+ if (TI_OR_PTI_MODE (mode) || mode == V1TImode
  || FLOAT128_VECTOR_P (mode))
insn_entry[uid].is_128_int = 1;
  if (DF_REF_INSN_INFO (mention))
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index 9f02025b0c8d..b8edba43d538 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -1008,6 +1008,8 @@ enum data_align { align_abi, align_opt, align_both };
   (ALTIVEC_VECTOR_MODE (MODE) || VSX_VECTOR_MODE (MODE)
\
|| (MODE) == V2DImode || (MODE) == V1TImode)
 
+#define TI_OR_PTI_MODE(mode) ((mode) == TImode || (mode) == PTImode)
+
 /* Post-reload, we can't use any new AltiVec registers, as we already
emitted the vrsave mask.  */
 
diff --git a/gcc/testsuite/gcc.target/powerpc/pr116415.c 
b/gcc/testsuite/gcc.target/powerpc/pr116415.c
new file mode 100644
index ..08cc282e2c27
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr116415.c
@@ -0,0 +1,42 @@
+/* { dg-do run } */
+/* { dg-require-effective-target p8vector_hw } */
+/* { dg-require-effective-target int128 } */
+/* { dg-options "-O2 -mdejagnu-cpu=power8" } */
+
+/* PR 116415: Verify our Power8 swap optimization pass doesn't incorrectly swap
+   PTImode values.  They should be handled identically to TImode values.  */
+
+#include 
+#include 
+#include 
+
+typedef union {
+  struct {
+uint64_t a;
+uint64_t b;
+  } t;
+  __uint128_t data;
+} Value;
+Value value, next;
+
+void
+bug (Value *val, Value *nxt)
+{
+  for (;;) {
+nxt->t.a = val->t.a + 1;
+nxt->t.b = val->t.b + 2;
+if (__atomic_compare_exchange (&val->data, &val->data, &nxt->data,
+  0, __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE))
+  break;
+  }
+}
+
+int
+main (void)
+{
+  bug (&value, &next);
+  printf ("%lu %lu\n", value.t.a, value.t.b);
+  if (value.t.a != 1 || value.t.b != 2)
+abort ();
+  return 0;
+}


[gcc r15-4735] ada: Fix static_assert with one argument

2024-10-29 Thread Marc Poulhi?s via Gcc-cvs
https://gcc.gnu.org/g:61977b8af087a8e0f738a2c51f86ec12c554ec43

commit r15-4735-g61977b8af087a8e0f738a2c51f86ec12c554ec43
Author: Marc Poulhiès 
Date:   Mon Oct 28 16:10:25 2024 +0100

ada: Fix static_assert with one argument

Single argument static_assert is C++17 only and breaks the build using
older GCC (prerequisite is C++14).

gcc/ada

* types.h: fix static_assert.

Diff:
---
 gcc/ada/types.h | 8 +++-
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/gcc/ada/types.h b/gcc/ada/types.h
index 3193f01444c0..6a7d797905cb 100644
--- a/gcc/ada/types.h
+++ b/gcc/ada/types.h
@@ -383,11 +383,9 @@ typedef unsigned int any_slot;
 #define Slot_Size (sizeof (any_slot) * 8)
 
 /* Slots are 32 bits (for now, but we might want to make that 64).
-   The first bootstrap stage uses -std=gnu++98, so we cannot use
-   static_assert in that case.  */
-#if __cplusplus >= 201402L
-static_assert (Slot_Size == 32);
-#endif
+   The first bootstrap stage uses C++14, so we can only use the 2 argument
+   version of static_assert. */
+static_assert (Slot_Size == 32, "");
 
 /* Definitions of Reason codes for Raise_xxx_Error nodes.  */
 enum RT_Exception_Code


[gcc r15-4736] libstdc++: Use if consteval rather than if (std::__is_constant_evaluated()) for {, b}float16_t nextaf

2024-10-29 Thread Jakub Jelinek via Gcc-cvs
https://gcc.gnu.org/g:5e247ac0c28b9a2662f99c4a5420c5f7c2d0c6bd

commit r15-4736-g5e247ac0c28b9a2662f99c4a5420c5f7c2d0c6bd
Author: Jakub Jelinek 
Date:   Tue Oct 29 11:14:12 2024 +0100

libstdc++: Use if consteval rather than if (std::__is_constant_evaluated()) 
for {,b}float16_t nextafter [PR117321]

The nextafter_c++23.cc testcase fails to link at -O0.
The problem is that eventhough std::__is_constant_evaluated() has
always_inline attribute, that at -O0 just means that we inline the
call, but its result is still assigned to a temporary which is tested
later, nothing at -O0 propagates that false into the if and optimizes
away the if body.  And the __builtin_nextafterf16{,b} calls are meant
to be used solely for constant evaluation, the C libraries don't
define nextafterf16 these days.

As __STDCPP_FLOAT16_T__ and __STDCPP_BFLOAT16_T__ are predefined right
now only by GCC, not by clang which doesn't implement the extended floating
point types paper, and as they are predefined in C++23 and later modes only,
I think we can just use if consteval which is folded already during the FE
and the body isn't included even at -O0.  I've added a feature test for
that just in case clang implements those and implements those in some weird
way.  Note, if (__builtin_is_constant_evaluted()) would work correctly too,
that is also folded to false at gimplification time and the corresponding
if block not emitted at all.  But for -O0 it can't be wrapped into a helper
inline function.

2024-10-29  Jakub Jelinek  

PR libstdc++/117321
* include/c_global/cmath (nextafter(_Float16, _Float16)): Use
if consteval rather than if (std::__is_constant_evaluated()) around
the __builtin_nextafterf16 call.
(nextafter(__gnu_cxx::__bfloat16_t, __gnu_cxx::__bfloat16_t)): Use
if consteval rather than if (std::__is_constant_evaluated()) around
the __builtin_nextafterf16b call.
* testsuite/26_numerics/headers/cmath/117321.cc: New test.

Diff:
---
 libstdc++-v3/include/c_global/cmath  | 16 
 .../testsuite/26_numerics/headers/cmath/117321.cc|  5 +
 2 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/libstdc++-v3/include/c_global/cmath 
b/libstdc++-v3/include/c_global/cmath
index ca84f9514003..5c568c703135 100644
--- a/libstdc++-v3/include/c_global/cmath
+++ b/libstdc++-v3/include/c_global/cmath
@@ -2880,8 +2880,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   constexpr _Float16
   nextafter(_Float16 __x, _Float16 __y)
   {
-if (std::__is_constant_evaluated())
-  return __builtin_nextafterf16(__x, __y);
+#if __cpp_if_consteval >= 202106L
+// Can't use if (std::__is_constant_evaluated()) here, as it
+// doesn't guarantee optimizing the body away at -O0 and
+// nothing defines nextafterf16.
+if consteval { return __builtin_nextafterf16(__x, __y); }
+#endif
 #ifdef __INT16_TYPE__
 using __float16_int_type = __INT16_TYPE__;
 #else
@@ -3598,8 +3602,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   constexpr __gnu_cxx::__bfloat16_t
   nextafter(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
   {
-if (std::__is_constant_evaluated())
-  return __builtin_nextafterf16b(__x, __y);
+#if __cpp_if_consteval >= 202106L
+// Can't use if (std::__is_constant_evaluated()) here, as it
+// doesn't guarantee optimizing the body away at -O0 and
+// nothing defines nextafterf16b.
+if consteval { return __builtin_nextafterf16b(__x, __y); }
+#endif
 #ifdef __INT16_TYPE__
 using __bfloat16_int_type = __INT16_TYPE__;
 #else
diff --git a/libstdc++-v3/testsuite/26_numerics/headers/cmath/117321.cc 
b/libstdc++-v3/testsuite/26_numerics/headers/cmath/117321.cc
new file mode 100644
index ..18d108c6a3e2
--- /dev/null
+++ b/libstdc++-v3/testsuite/26_numerics/headers/cmath/117321.cc
@@ -0,0 +1,5 @@
+// { dg-do run { target c++23 } }
+// { dg-require-cmath "" }
+// { dg-additional-options "-O0" }
+
+#include "nextafter_c++23.cc"


[gcc r15-4737] tree-optimization/117333 - ICE with NULL access size DR

2024-10-29 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:0e99b22aa666f107c4035d32bfb5ab11534a9d2f

commit r15-4737-g0e99b22aa666f107c4035d32bfb5ab11534a9d2f
Author: Richard Biener 
Date:   Tue Oct 29 09:42:12 2024 +0100

tree-optimization/117333 - ICE with NULL access size DR

dr_may_alias_p ICEs when TYPE_SIZE of DR->ref is NULL but this is
valid IL when the access size of an aggregate copy can be infered
from the RHS.

PR tree-optimization/117333
* tree-data-ref.cc (dr_may_alias_p): Guard against NULL
access size.

* gcc.dg/torture/pr117333.c: New testcase.

Diff:
---
 gcc/testsuite/gcc.dg/torture/pr117333.c | 29 +
 gcc/tree-data-ref.cc|  2 ++
 2 files changed, 31 insertions(+)

diff --git a/gcc/testsuite/gcc.dg/torture/pr117333.c 
b/gcc/testsuite/gcc.dg/torture/pr117333.c
new file mode 100644
index ..38c1b5493a51
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/pr117333.c
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+
+struct s
+{
+  unsigned long ul;
+  int i;
+  char ac[];
+};
+
+const struct s gs = { 3, -4, "abcdef" };
+
+void copy_s(struct s*d, const struct s*s)
+{
+  *d = *s;
+}
+
+unsigned test(struct s*ps, _Bool direct)
+{
+  if(direct)
+*ps = gs;
+  else
+copy_s(ps, &gs);
+  return sizeof(*ps);
+}
+
+unsigned size(void)
+{
+  return sizeof(gs);
+}
diff --git a/gcc/tree-data-ref.cc b/gcc/tree-data-ref.cc
index de234c65e948..1fdfef282c72 100644
--- a/gcc/tree-data-ref.cc
+++ b/gcc/tree-data-ref.cc
@@ -3004,6 +3004,8 @@ dr_may_alias_p (const struct data_reference *a, const 
struct data_reference *b,
  && DR_BASE_ADDRESS (b)
  && operand_equal_p (DR_BASE_ADDRESS (a), DR_BASE_ADDRESS (b))
  && operand_equal_p (DR_OFFSET (a), DR_OFFSET (b))
+ && tree_size_a
+ && tree_size_b
  && poly_int_tree_p (tree_size_a)
  && poly_int_tree_p (tree_size_b)
  && !ranges_maybe_overlap_p (wi::to_poly_widest (DR_INIT (a)),


[gcc r15-4734] arm: [MVE intrinsics] Rework MVE vld/vst intrinsics

2024-10-29 Thread Christophe Lyon via Gcc-cvs
https://gcc.gnu.org/g:63b6967b06b5387821c4e5f2c113da6aaeeae2b7

commit r15-4734-g63b6967b06b5387821c4e5f2c113da6aaeeae2b7
Author: Alfie Richards 
Date:   Wed Sep 11 15:01:43 2024 +0200

arm: [MVE intrinsics] Rework MVE vld/vst intrinsics

Implement the mve vld and vst intrinsics using the MVE builtins framework.

The main part of the patch is to reimplement to vstr/vldr patterns
such that we now have much fewer of them:
- non-truncating stores
- predicated non-truncating stores
- truncating stores
- predicated truncating stores
- non-extending loads
- predicated non-extending loads
- extending loads
- predicated extending loads

This enables us to update the implementation of vld1/vst1 and use the
new vldr/vstr builtins.

The patch also adds support for the predicated vld1/vst1 versions.

gcc.target/arm/pr112337.c needs an update, to call the intrinsic
instead of the builtin, which this patch deletes.

2024-09-11  Alfie Richards  
Christophe Lyon  

gcc/

* config/arm/arm-mve-builtins-base.cc (vld1q_impl): Add support
for predicated version.
(vst1q_impl): Likewise.
(vstrq_impl): New class.
(vldrq_impl): New class.
(vldrbq): New.
(vldrhq): New.
(vldrwq): New.
(vstrbq): New.
(vstrhq): New.
(vstrwq): New.
* config/arm/arm-mve-builtins-base.def (vld1q): Add predicated
version.
(vldrbq): New.
(vldrhq): New.
(vldrwq): New.
(vst1q): Add predicated version.
(vstrbq): New.
(vstrhq): New.
(vstrwq): New.
(vrev32q): Update types to float_16.
* config/arm/arm-mve-builtins-base.h (vldrbq): New.
(vldrhq): New.
(vldrwq): New.
(vstrbq): New.
(vstrhq): New.
(vstrwq): New.
* config/arm/arm-mve-builtins-functions.h (memory_vector_mode):
Remove conversion of floating point vectors to integer.
* config/arm/arm-mve-builtins.cc (TYPES_float16): Change to...
(TYPES_float_16): ...this.
(TYPES_float_32): New.
(float16): Change to...
(float_16): ...this.
(float_32): New.
(preds_z_or_none): New.
(function_resolver::check_gp_argument): Add support for _z
predicate.
* config/arm/arm_mve.h (vstrbq): Remove.
(vstrbq_p): Likewise.
(vstrhq): Likewise.
(vstrhq_p): Likewise.
(vstrwq): Likewise.
(vstrwq_p): Likewise.
(vst1q_p): Likewise.
(vld1q_z): Likewise.
(vldrbq_s8): Likewise.
(vldrbq_u8): Likewise.
(vldrbq_s16): Likewise.
(vldrbq_u16): Likewise.
(vldrbq_s32): Likewise.
(vldrbq_u32): Likewise.
(vstrbq_s8): Likewise.
(vstrbq_s32): Likewise.
(vstrbq_s16): Likewise.
(vstrbq_u8): Likewise.
(vstrbq_u32): Likewise.
(vstrbq_u16): Likewise.
(vstrbq_p_s8): Likewise.
(vstrbq_p_s32): Likewise.
(vstrbq_p_s16): Likewise.
(vstrbq_p_u8): Likewise.
(vstrbq_p_u32): Likewise.
(vstrbq_p_u16): Likewise.
(vldrbq_z_s16): Likewise.
(vldrbq_z_u8): Likewise.
(vldrbq_z_s8): Likewise.
(vldrbq_z_s32): Likewise.
(vldrbq_z_u16): Likewise.
(vldrbq_z_u32): Likewise.
(vldrhq_s32): Likewise.
(vldrhq_s16): Likewise.
(vldrhq_u32): Likewise.
(vldrhq_u16): Likewise.
(vldrhq_z_s32): Likewise.
(vldrhq_z_s16): Likewise.
(vldrhq_z_u32): Likewise.
(vldrhq_z_u16): Likewise.
(vldrwq_s32): Likewise.
(vldrwq_u32): Likewise.
(vldrwq_z_s32): Likewise.
(vldrwq_z_u32): Likewise.
(vldrhq_f16): Likewise.
(vldrhq_z_f16): Likewise.
(vldrwq_f32): Likewise.
(vldrwq_z_f32): Likewise.
(vstrhq_f16): Likewise.
(vstrhq_s32): Likewise.
(vstrhq_s16): Likewise.
(vstrhq_u32): Likewise.
(vstrhq_u16): Likewise.
(vstrhq_p_f16): Likewise.
(vstrhq_p_s32): Likewise.
(vstrhq_p_s16): Likewise.
(vstrhq_p_u32): Likewise.
(vstrhq_p_u16): Likewise.
(vstrwq_f32): Likewise.
(vstrwq_s32): Likewise.
(vstrwq_u32): Likewise.
(vstrwq_p_f32): Likewise.
(vstrwq_p_s32): Likewise.
(vstrwq_p_u32): Likewise.
(vst1q_p_u8): Likewise.
(vst1q_p_s8): Likewise.
(vld1q_z

[gcc r15-4730] arm: [MVE intrinsics] fix vst tests

2024-10-29 Thread Christophe Lyon via Gcc-cvs
https://gcc.gnu.org/g:3aca5aa0f09e7b9f174c4ed3461612a0d0f002d0

commit r15-4730-g3aca5aa0f09e7b9f174c4ed3461612a0d0f002d0
Author: Alfie Richards 
Date:   Wed Sep 11 18:02:01 2024 +0200

arm: [MVE intrinsics] fix vst tests

The tests for vst* instrinsics use functions which return a void
expression which can generate a warning. This hasn't come up previously
as the inlining presumably prevents the warning.

This change removed the uneccessary and incorrect returns.

2024-09-11  Alfie Richards 

gcc/testsuite/
* gcc.target/arm/mve/intrinsics/vst1q_p_f16.c: Remove `return`.
* gcc.target/arm/mve/intrinsics/vst1q_p_f32.c: Likewise.
* gcc.target/arm/mve/intrinsics/vst1q_p_s16.c: Likewise.
* gcc.target/arm/mve/intrinsics/vst1q_p_s32.c: Likewise.
* gcc.target/arm/mve/intrinsics/vst1q_p_s8.c: Likewise.
* gcc.target/arm/mve/intrinsics/vst1q_p_u16.c: Likewise.
* gcc.target/arm/mve/intrinsics/vst1q_p_u32.c: Likewise.
* gcc.target/arm/mve/intrinsics/vst1q_p_u8.c: Likewise.
* gcc.target/arm/mve/intrinsics/vst2q_f16.c: Likewise.
* gcc.target/arm/mve/intrinsics/vst2q_f32.c: Likewise.
* gcc.target/arm/mve/intrinsics/vst2q_s16.c: Likewise.
* gcc.target/arm/mve/intrinsics/vst2q_s32.c: Likewise.
* gcc.target/arm/mve/intrinsics/vst2q_s8.c: Likewise.
* gcc.target/arm/mve/intrinsics/vst2q_u16.c: Likewise.
* gcc.target/arm/mve/intrinsics/vst2q_u32.c: Likewise.
* gcc.target/arm/mve/intrinsics/vst2q_u8.c: Likewise.
* gcc.target/arm/mve/intrinsics/vst4q_f16.c: Likewise.
* gcc.target/arm/mve/intrinsics/vst4q_f32.c: Likewise.
* gcc.target/arm/mve/intrinsics/vst4q_s16.c: Likewise.
* gcc.target/arm/mve/intrinsics/vst4q_s32.c: Likewise.
* gcc.target/arm/mve/intrinsics/vst4q_s8.c: Likewise.
* gcc.target/arm/mve/intrinsics/vst4q_u16.c: Likewise.
* gcc.target/arm/mve/intrinsics/vst4q_u32.c: Likewise.
* gcc.target/arm/mve/intrinsics/vst4q_u8.c: Likewise.
* gcc.target/arm/mve/intrinsics/vstrbq_p_s16.c: Likewise.
* gcc.target/arm/mve/intrinsics/vstrbq_p_s32.c: Likewise.
* gcc.target/arm/mve/intrinsics/vstrbq_p_s8.c: Likewise.
* gcc.target/arm/mve/intrinsics/vstrbq_p_u16.c: Likewise.
* gcc.target/arm/mve/intrinsics/vstrbq_p_u32.c: Likewise.
* gcc.target/arm/mve/intrinsics/vstrbq_p_u8.c: Likewise.
* gcc.target/arm/mve/intrinsics/vstrbq_s16.c: Likewise.
* gcc.target/arm/mve/intrinsics/vstrbq_s32.c: Likewise.
* gcc.target/arm/mve/intrinsics/vstrbq_s8.c: Likewise.
* gcc.target/arm/mve/intrinsics/vstrbq_scatter_offset_p_s16.c:
Likewise.
* gcc.target/arm/mve/intrinsics/vstrbq_scatter_offset_p_s32.c:
Likewise.
* gcc.target/arm/mve/intrinsics/vstrbq_scatter_offset_p_s8.c:
Likewise.
* gcc.target/arm/mve/intrinsics/vstrbq_scatter_offset_p_u16.c:
Likewise.
* gcc.target/arm/mve/intrinsics/vstrbq_scatter_offset_p_u32.c:
Likewise.
* gcc.target/arm/mve/intrinsics/vstrbq_scatter_offset_p_u8.c:
Likewise.
* gcc.target/arm/mve/intrinsics/vstrbq_scatter_offset_s16.c:
Likewise.
* gcc.target/arm/mve/intrinsics/vstrbq_scatter_offset_s32.c:
Likewise.
* gcc.target/arm/mve/intrinsics/vstrbq_scatter_offset_s8.c:
Likewise.
* gcc.target/arm/mve/intrinsics/vstrbq_scatter_offset_u16.c:
Likewise.
* gcc.target/arm/mve/intrinsics/vstrbq_scatter_offset_u32.c:
Likewise.
* gcc.target/arm/mve/intrinsics/vstrbq_scatter_offset_u8.c:
Likewise.
* gcc.target/arm/mve/intrinsics/vstrbq_u16.c: Likewise.
* gcc.target/arm/mve/intrinsics/vstrbq_u32.c: Likewise.
* gcc.target/arm/mve/intrinsics/vstrbq_u8.c: Likewise.
* gcc.target/arm/mve/intrinsics/vstrdq_scatter_base_p_s64.c:
Likewise.
* gcc.target/arm/mve/intrinsics/vstrdq_scatter_base_p_u64.c:
Likewise.
* gcc.target/arm/mve/intrinsics/vstrdq_scatter_base_s64.c:
Likewise.
* gcc.target/arm/mve/intrinsics/vstrdq_scatter_base_u64.c: Likewise.
* gcc.target/arm/mve/intrinsics/vstrdq_scatter_base_wb_p_s64.c:
Likewise.
* gcc.target/arm/mve/intrinsics/vstrdq_scatter_base_wb_p_u64.c:
Likewise.
* gcc.target/arm/mve/intrinsics/vstrdq_scatter_base_wb_s64.c:
Likewise.
* gcc.target/arm/mve/intrinsics/vstrdq_scatter_base_wb_u64.c:
Likewise.
* gcc.target/arm/mve/intrinsics/vstrdq_scatter_offset_p_s64.c:
 

[gcc r15-4732] arm: [MVE intrinsics] Add load_extending and store_truncating function bases

2024-10-29 Thread Christophe Lyon via Gcc-cvs
https://gcc.gnu.org/g:52e36cde0f6680ef56043cb220eb9cd0be927749

commit r15-4732-g52e36cde0f6680ef56043cb220eb9cd0be927749
Author: Alfie Richards 
Date:   Wed Sep 11 14:55:24 2024 +0200

arm: [MVE intrinsics] Add load_extending and store_truncating function bases

This patch adds the load_extending and store_truncating function bases
for MVE intrinsics.

The constructors have parameters describing the memory element
type/width which is part of the function base name (e.g. "h" in
vldrhq).

2024-09-11  Alfie Richards 

gcc/

* config/arm/arm-mve-builtins-functions.h
(load_extending): New class.
(store_truncating): New class.
* config/arm/arm-protos.h (arm_mve_data_mode): New helper function.
* config/arm/arm.cc (arm_mve_data_mode): New helper function.

Diff:
---
 gcc/config/arm/arm-mve-builtins-functions.h | 103 
 gcc/config/arm/arm-protos.h |   3 +
 gcc/config/arm/arm.cc   |  15 
 3 files changed, 121 insertions(+)

diff --git a/gcc/config/arm/arm-mve-builtins-functions.h 
b/gcc/config/arm/arm-mve-builtins-functions.h
index 57e59e30c369..48b9e79dbafa 100644
--- a/gcc/config/arm/arm-mve-builtins-functions.h
+++ b/gcc/config/arm/arm-mve-builtins-functions.h
@@ -20,6 +20,8 @@
 #ifndef GCC_ARM_MVE_BUILTINS_FUNCTIONS_H
 #define GCC_ARM_MVE_BUILTINS_FUNCTIONS_H
 
+#include "arm-protos.h"
+
 namespace arm_mve {
 
 /* Wrap T, which is derived from function_base, and indicate that the
@@ -977,6 +979,107 @@ public:
   }
 };
 
+/* A function_base that loads elements from memory and extends them
+   to a wider element.  The memory element type is a fixed part of
+   the function base name.  */
+class load_extending : public function_base
+{
+public:
+  CONSTEXPR load_extending (type_suffix_index signed_memory_type,
+   type_suffix_index unsigned_memory_type,
+   type_suffix_index float_memory_type)
+: m_signed_memory_type (signed_memory_type),
+  m_unsigned_memory_type (unsigned_memory_type),
+  m_float_memory_type (float_memory_type)
+  {}
+  CONSTEXPR load_extending (type_suffix_index signed_memory_type,
+   type_suffix_index unsigned_memory_type)
+: m_signed_memory_type (signed_memory_type),
+  m_unsigned_memory_type (unsigned_memory_type),
+  m_float_memory_type (NUM_TYPE_SUFFIXES)
+  {}
+
+  unsigned int call_properties (const function_instance &) const override
+  {
+return CP_READ_MEMORY;
+  }
+
+  tree memory_scalar_type (const function_instance &fi) const override
+  {
+type_suffix_index memory_type_suffix
+  = (fi.type_suffix (0).integer_p
+? (fi.type_suffix (0).unsigned_p
+   ? m_unsigned_memory_type
+   : m_signed_memory_type)
+: m_float_memory_type);
+return scalar_types[type_suffixes[memory_type_suffix].vector_type];
+  }
+
+  machine_mode memory_vector_mode (const function_instance &fi) const override
+  {
+type_suffix_index memory_type_suffix
+  = (fi.type_suffix (0).integer_p
+? (fi.type_suffix (0).unsigned_p
+   ? m_unsigned_memory_type
+   : m_signed_memory_type)
+: m_float_memory_type);
+machine_mode mem_mode = type_suffixes[memory_type_suffix].vector_mode;
+machine_mode reg_mode = fi.vector_mode (0);
+
+return arm_mve_data_mode (GET_MODE_INNER (mem_mode),
+ GET_MODE_NUNITS (reg_mode)).require ();
+  }
+
+  /* The type of the memory elements.  This is part of the function base
+ name rather than a true type suffix.  */
+  type_suffix_index m_signed_memory_type;
+  type_suffix_index m_unsigned_memory_type;
+  type_suffix_index m_float_memory_type;
+};
+
+/* A function_base that truncates vector elements and stores them to memory.
+   The memory element width is a fixed part of the function base name.  */
+class store_truncating : public function_base
+{
+public:
+  CONSTEXPR store_truncating (scalar_mode to_int_mode,
+ opt_scalar_mode to_float_mode)
+: m_to_int_mode (to_int_mode), m_to_float_mode (to_float_mode)
+  {}
+
+  unsigned int call_properties (const function_instance &) const override
+  {
+return CP_WRITE_MEMORY;
+  }
+
+  tree memory_scalar_type (const function_instance &fi) const override
+  {
+/* In truncating stores, the signedness of the memory element is defined
+   to be the same as the signedness of the vector element.  The signedness
+   doesn't make any difference to the behavior of the function.  */
+type_class_index tclass = fi.type_suffix (0).tclass;
+unsigned int element_bits
+  = GET_MODE_BITSIZE (fi.type_suffix (0).integer_p
+ ? m_to_int_mode
+ : m_to_float_mode.require ());
+type_suffix_index suffix = find_type_suffix (tclass, element_bits);
+return sc

[gcc r15-4765] testsuite: Adjust AVX10.2 check_effective_target

2024-10-29 Thread Haochen Jiang via Gcc-cvs
https://gcc.gnu.org/g:12086865232382f93081d5564ff44b14bd71341c

commit r15-4765-g12086865232382f93081d5564ff44b14bd71341c
Author: Haochen Jiang 
Date:   Tue Oct 29 15:51:14 2024 +0800

testsuite: Adjust AVX10.2 check_effective_target

Since Binutils haven't fully merged all AVX10.2 insts, only testing
one inst/intrin in AVX10.2 is never sufficient for check_effective_target.
Like APX_F, use inline asm to do the target check.

gcc/testsuite/ChangeLog:

PR target/117301
* lib/target-supports.exp (check_effective_target_avx10_2):
Use inline asm instead of intrin for check_effective_target.
(check_effective_target_avx10_2_512): Ditto.

Diff:
---
 gcc/testsuite/lib/target-supports.exp | 34 ++
 1 file changed, 14 insertions(+), 20 deletions(-)

diff --git a/gcc/testsuite/lib/target-supports.exp 
b/gcc/testsuite/lib/target-supports.exp
index d113a08dff7b..5638e454c0e1 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -10748,17 +10748,14 @@ proc check_effective_target_apxf { } {
 # Return 1 if avx10.2 instructions can be compiled.
 proc check_effective_target_avx10_2 { } {
 return [check_no_compiler_messages avx10.2 object {
-   typedef int __v8si __attribute__ ((__vector_size__ (32)));
-   typedef char __mmask8;
-
-   __v8si
-   _mm256_mask_vpdpbssd_epi32 (__v8si __A, __mmask8 __U,
-   __v8si __B, __v8si __C)
+   void
+   foo ()
{
- return (__v8si) __builtin_ia32_vpdpbssd_v8si_mask ((__v8si)__A,
-(__v8si)__B,
-(__v8si)__C,
-(__mmask8)__U);
+ __asm__ volatile ("vdpphps\t%ymm4, %ymm5, %ymm6");
+ __asm__ volatile ("vcvthf82ph\t%xmm5, %ymm6");
+ __asm__ volatile ("vaddnepbf16\t%ymm4, %ymm5, %ymm6");
+ __asm__ volatile ("vcvtph2ibs\t%ymm5, %ymm6");
+ __asm__ volatile ("vminmaxpd\t$123, %ymm4, %ymm5, %ymm6");
}
 } "-mavx10.2" ]
 }
@@ -10766,17 +10763,14 @@ proc check_effective_target_avx10_2 { } {
 # Return 1 if avx10.2-512 instructions can be compiled.
 proc check_effective_target_avx10_2_512 { } {
 return [check_no_compiler_messages avx10.2-512 object {
-   typedef int __v16si __attribute__ ((__vector_size__ (64)));
-   typedef short __mmask16;
-
-   __v16si
-   _mm512_vpdpbssd_epi32 (__v16si __A, __mmask16 __U,
-  __v16si __B, __v16si __C)
+   void
+   foo ()
{
- return (__v16si) __builtin_ia32_vpdpbssd_v16si_mask ((__v16si)__A,
-  (__v16si)__B,
-  (__v16si)__C,
-  (__mmask16)__U);
+ __asm__ volatile ("vdpphps\t%zmm4, %zmm5, %zmm6");
+ __asm__ volatile ("vcvthf82ph\t%ymm5, %zmm6");
+ __asm__ volatile ("vaddnepbf16\t%zmm4, %zmm5, %zmm6");
+ __asm__ volatile ("vcvtph2ibs\t%zmm5, %zmm6");
+ __asm__ volatile ("vminmaxpd\t$123, %zmm4, %zmm5, %zmm6");
}
 } "-mavx10.2-512" ]
 }