date:20250521

[gcc r16-770] libstdc++: remove two redundant statements in pb_ds binary tree

2025-05-21 Thread Xi Ruoyao via Libstdc++-cvs

https://gcc.gnu.org/g:6740732a659f9bef523f872c633d5477e8dc349c

commit r16-770-g6740732a659f9bef523f872c633d5477e8dc349c
Author: Xℹ Ruoyao 
Date:   Fri Jul 10 20:10:52 2020 +0800

libstdc++: remove two redundant statements in pb_ds binary tree

libstdc++-v3/ChangeLog:

* include/ext/pb_ds/detail/bin_search_tree_/insert_fn_imps.hpp
(insert_leaf_new, insert_imp_empty): remove redundant statements.

Diff:
---
 .../include/ext/pb_ds/detail/bin_search_tree_/insert_fn_imps.hpp| 2 --
 1 file changed, 2 deletions(-)

diff --git 
a/libstdc++-v3/include/ext/pb_ds/detail/bin_search_tree_/insert_fn_imps.hpp 
b/libstdc++-v3/include/ext/pb_ds/detail/bin_search_tree_/insert_fn_imps.hpp
index e6e954dc29c8..b8f5014838c2 100644
--- a/libstdc++-v3/include/ext/pb_ds/detail/bin_search_tree_/insert_fn_imps.hpp
+++ b/libstdc++-v3/include/ext/pb_ds/detail/bin_search_tree_/insert_fn_imps.hpp
@@ -122,7 +122,6 @@ insert_leaf_new(const_reference r_value, node_pointer p_nd, 
bool left_nd)
 }
 
   p_new_nd->m_p_parent = p_nd;
-  p_new_nd->m_p_left = p_new_nd->m_p_right = 0;
   PB_DS_ASSERT_NODE_CONSISTENT(p_nd)
 
   update_to_top(p_new_nd, (node_update* )this);
@@ -142,7 +141,6 @@ insert_imp_empty(const_reference r_value)
 m_p_head->m_p_parent = p_new_node;
 
   p_new_node->m_p_parent = m_p_head;
-  p_new_node->m_p_left = p_new_node->m_p_right = 0;
   _GLIBCXX_DEBUG_ONLY(debug_base::insert_new(PB_DS_V2F(r_value));)
 
   update_to_top(m_p_head->m_p_parent, (node_update*)this);

[gcc r16-771] libstdc++: maintain subtree size in pb_ds binary search trees

2025-05-21 Thread Xi Ruoyao via Libstdc++-cvs

https://gcc.gnu.org/g:2e27df6cbd05a3ee742434b7f50dbff5f363b487

commit r16-771-g2e27df6cbd05a3ee742434b7f50dbff5f363b487
Author: Xℹ Ruoyao 
Date:   Fri Jul 10 20:58:04 2020 +0800

libstdc++: maintain subtree size in pb_ds binary search trees

libstdc++-v3/ChangeLog:

* include/ext/pb_ds/detail/rb_tree_map_/node.hpp
(rb_tree_node_::size_type): New typedef.
(rb_tree_node_::m_subtree_size): New field.
* include/ext/pb_ds/detail/splay_tree_/node.hpp
(splay_tree_node_::size_type): New typedef.
(splay_tree_node_::m_subtree_size): New field.
* include/ext/pb_ds/detail/bin_search_tree_/bin_search_tree_.hpp
(PB_DS_BIN_TREE_NAME::update_subtree_size): Declare new member
function.
* include/ext/pb_ds/detail/bin_search_tree_/rotate_fn_imps.hpp
(update_subtree_size): Define.
(apply_update, update_to_top): Call update_subtree_size.

Diff:
---
 .../detail/bin_search_tree_/bin_search_tree_.hpp   |  3 +++
 .../detail/bin_search_tree_/rotate_fn_imps.hpp | 31 +++---
 .../include/ext/pb_ds/detail/rb_tree_map_/node.hpp |  8 ++
 .../include/ext/pb_ds/detail/splay_tree_/node.hpp  |  8 ++
 4 files changed, 46 insertions(+), 4 deletions(-)

diff --git 
a/libstdc++-v3/include/ext/pb_ds/detail/bin_search_tree_/bin_search_tree_.hpp 
b/libstdc++-v3/include/ext/pb_ds/detail/bin_search_tree_/bin_search_tree_.hpp
index 6088709998a3..a8c73b55b89d 100644
--- 
a/libstdc++-v3/include/ext/pb_ds/detail/bin_search_tree_/bin_search_tree_.hpp
+++ 
b/libstdc++-v3/include/ext/pb_ds/detail/bin_search_tree_/bin_search_tree_.hpp
@@ -304,6 +304,9 @@ namespace __gnu_pbds
   inline void
   rotate_parent(node_pointer);
 
+  inline void
+  update_subtree_size(node_pointer);
+
   inline void
   apply_update(node_pointer, null_node_update_pointer);
 
diff --git 
a/libstdc++-v3/include/ext/pb_ds/detail/bin_search_tree_/rotate_fn_imps.hpp 
b/libstdc++-v3/include/ext/pb_ds/detail/bin_search_tree_/rotate_fn_imps.hpp
index 069b17f08de2..8cadce2349bd 100644
--- a/libstdc++-v3/include/ext/pb_ds/detail/bin_search_tree_/rotate_fn_imps.hpp
+++ b/libstdc++-v3/include/ext/pb_ds/detail/bin_search_tree_/rotate_fn_imps.hpp
@@ -122,8 +122,23 @@ rotate_parent(node_pointer p_nd)
 PB_DS_CLASS_T_DEC
 inline void
 PB_DS_CLASS_C_DEC::
-apply_update(node_pointer /*p_nd*/, null_node_update_pointer /*p_update*/)
-{ }
+update_subtree_size(node_pointer p_nd)
+{
+  size_type size = 1;
+  if (p_nd->m_p_left)
+size += p_nd->m_p_left->m_subtree_size;
+  if (p_nd->m_p_right)
+size += p_nd->m_p_right->m_subtree_size;
+  p_nd->m_subtree_size = size;
+}
+
+PB_DS_CLASS_T_DEC
+inline void
+PB_DS_CLASS_C_DEC::
+apply_update(node_pointer p_nd, null_node_update_pointer /*p_update*/)
+{
+  update_subtree_size(p_nd);
+}
 
 PB_DS_CLASS_T_DEC
 template
@@ -131,6 +146,7 @@ inline void
 PB_DS_CLASS_C_DEC::
 apply_update(node_pointer p_nd, Node_Update_*  /*p_update*/)
 {
+  update_subtree_size(p_nd);
   node_update::operator()(node_iterator(p_nd),
  node_const_iterator(static_cast(0)));
 }
@@ -152,7 +168,14 @@ update_to_top(node_pointer p_nd, Node_Update_* p_update)
 PB_DS_CLASS_T_DEC
 inline void
 PB_DS_CLASS_C_DEC::
-update_to_top(node_pointer /*p_nd*/, null_node_update_pointer /*p_update*/)
-{ }
+update_to_top(node_pointer p_nd, null_node_update_pointer /*p_update */)
+{
+  while (p_nd != m_p_head)
+{
+  update_subtree_size(p_nd);
+
+  p_nd = p_nd->m_p_parent;
+}
+}
 
 #endif
diff --git a/libstdc++-v3/include/ext/pb_ds/detail/rb_tree_map_/node.hpp 
b/libstdc++-v3/include/ext/pb_ds/detail/rb_tree_map_/node.hpp
index f229be7342c6..3803ddb19c5d 100644
--- a/libstdc++-v3/include/ext/pb_ds/detail/rb_tree_map_/node.hpp
+++ b/libstdc++-v3/include/ext/pb_ds/detail/rb_tree_map_/node.hpp
@@ -58,6 +58,9 @@ namespace __gnu_pbds
   typedef typename rebind_traits<_Alloc, rb_tree_node_>::pointer
node_pointer;
 
+  typedef typename rebind_traits<_Alloc, rb_tree_node_>::size_type
+   size_type;
+
   typedef typename rebind_traits<_Alloc, metadata_type>::reference
metadata_reference;
 
@@ -88,6 +91,7 @@ namespace __gnu_pbds
   node_pointer m_p_left;
   node_pointer m_p_right;
   node_pointer m_p_parent;
+  size_typem_subtree_size;
   value_type   m_value;
   bool m_red;
   metadata_typem_metadata;
@@ -100,6 +104,9 @@ namespace __gnu_pbds
   typedef Value_Type   value_type;
   typedef null_typemetadata_type;
 
+  typedef typename rebind_traits<_Alloc, rb_tree_node_>::size_type
+   size_type;
+
   typedef typename rebind_traits<_Alloc, rb_tree_node_>::pointer
node_pointer;
 
@@ -116,6 +123,7 @@ namespace __gnu_pbds
   node_pointer m_p_left;
   node_pointer m_p_right;
   node_pointer m_p_parent;
+

[gcc r16-772] libstdc++: use maintained size when split pb_ds binary search trees

2025-05-21 Thread Xi Ruoyao via Libstdc++-cvs

https://gcc.gnu.org/g:36c20fee22d40c6d25f52e929b42f5eab62cb1eb

commit r16-772-g36c20fee22d40c6d25f52e929b42f5eab62cb1eb
Author: Xℹ Ruoyao 
Date:   Fri Jul 10 21:38:09 2020 +0800

libstdc++: use maintained size when split pb_ds binary search trees

libstdc++-v3/ChangeLog:

PR libstdc++/81806
* include/ext/pb_ds/detail/bin_search_tree_/split_join_fn_imps.hpp
(split_finish): Use maintained size, instead of calling
std::distance.

Diff:
---
 .../include/ext/pb_ds/detail/bin_search_tree_/split_join_fn_imps.hpp  | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git 
a/libstdc++-v3/include/ext/pb_ds/detail/bin_search_tree_/split_join_fn_imps.hpp 
b/libstdc++-v3/include/ext/pb_ds/detail/bin_search_tree_/split_join_fn_imps.hpp
index 0c1b26fa9e2d..a2a57757a046 100644
--- 
a/libstdc++-v3/include/ext/pb_ds/detail/bin_search_tree_/split_join_fn_imps.hpp
+++ 
b/libstdc++-v3/include/ext/pb_ds/detail/bin_search_tree_/split_join_fn_imps.hpp
@@ -133,7 +133,9 @@ PB_DS_CLASS_C_DEC::
 split_finish(PB_DS_CLASS_C_DEC& other)
 {
   other.initialize_min_max();
-  other.m_size = std::distance(other.begin(), other.end());
+  other.m_size = 0;
+  if (other.m_p_head->m_p_parent != 0)
+other.m_size = other.m_p_head->m_p_parent->m_subtree_size;
   m_size -= other.m_size;
   initialize_min_max();
   PB_DS_ASSERT_VALID((*this))

[gcc r16-776] nds32: Avoid accessing beyond the operands[] array

2025-05-21 Thread Richard Sandiford via Gcc-cvs

https://gcc.gnu.org/g:a6ec398042c6054cbf2c08b646df98b63a9418d5

commit r16-776-ga6ec398042c6054cbf2c08b646df98b63a9418d5
Author: Richard Sandiford 
Date:   Wed May 21 10:01:26 2025 +0100

nds32: Avoid accessing beyond the operands[] array

This pattern used operands[2] to hold the shift amount, even though
the pattern doesn't have an operand 2 (not even as a match_dup).
This caused a build failure with -Werror:

  array subscript 2 is above array bounds of ‘rtx_def* [2]’

gcc/
PR target/100837
* config/nds32/nds32-intrinsic.md (unspec_get_pending_int): Use
a local variable instead of operands[2].

Diff:
---
 gcc/config/nds32/nds32-intrinsic.md | 11 ++-
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/gcc/config/nds32/nds32-intrinsic.md 
b/gcc/config/nds32/nds32-intrinsic.md
index e05dce105099..85acea330f07 100644
--- a/gcc/config/nds32/nds32-intrinsic.md
+++ b/gcc/config/nds32/nds32-intrinsic.md
@@ -333,30 +333,31 @@
   ""
 {
   rtx system_reg = NULL_RTX;
+  rtx shift_amt = NULL_RTX;
 
   /* Set system register form nds32_intrinsic_register_names[].  */
   if ((INTVAL (operands[1]) >= NDS32_INT_H0)
   && (INTVAL (operands[1]) <= NDS32_INT_H15))
 {
   system_reg = GEN_INT (__NDS32_REG_INT_PEND__);
-  operands[2] = GEN_INT (31 - INTVAL (operands[1]));
+  shift_amt = GEN_INT (31 - INTVAL (operands[1]));
 }
   else if (INTVAL (operands[1]) == NDS32_INT_SWI)
 {
   system_reg = GEN_INT (__NDS32_REG_INT_PEND__);
-  operands[2] = GEN_INT (15);
+  shift_amt = GEN_INT (15);
 }
   else if ((INTVAL (operands[1]) >= NDS32_INT_H16)
   && (INTVAL (operands[1]) <= NDS32_INT_H31))
 {
   system_reg = GEN_INT (__NDS32_REG_INT_PEND2__);
-  operands[2] = GEN_INT (31 - INTVAL (operands[1]));
+  shift_amt = GEN_INT (31 - INTVAL (operands[1]));
 }
   else if ((INTVAL (operands[1]) >= NDS32_INT_H32)
   && (INTVAL (operands[1]) <= NDS32_INT_H63))
 {
   system_reg = GEN_INT (__NDS32_REG_INT_PEND3__);
-  operands[2] = GEN_INT (31 - (INTVAL (operands[1]) - 32));
+  shift_amt = GEN_INT (31 - (INTVAL (operands[1]) - 32));
 }
   else
 error ("% not support %,"
@@ -366,7 +367,7 @@
   if (system_reg != NULL_RTX)
 {
   emit_insn (gen_unspec_volatile_mfsr (operands[0], system_reg));
-  emit_insn (gen_ashlsi3 (operands[0], operands[0], operands[2]));
+  emit_insn (gen_ashlsi3 (operands[0], operands[0], shift_amt));
   emit_insn (gen_lshrsi3 (operands[0], operands[0], GEN_INT (31)));
   emit_insn (gen_unspec_dsb ());
 }

[gcc r16-789] vxworks: libgcc: include string.h for memset

2025-05-21 Thread Alexandre Oliva via Gcc-cvs

https://gcc.gnu.org/g:99a65bfe59208c33a74718ef5fc05e255a76393a

commit r16-789-g99a65bfe59208c33a74718ef5fc05e255a76393a
Author: Alexandre Oliva 
Date:   Wed May 21 06:19:46 2025 -0300

vxworks: libgcc: include string.h for memset

gthr-vxworks-thread.c calls memset in __ghtread_cond_signal, but it
fails ot include , where this function is declared, and GCC
14 rejects calls of undeclared functions.  Include the required
header.


for  libgcc/ChangeLog

* config/gthr-vxworks-thread.c: Include string.h for memset.

Diff:
---
 libgcc/config/gthr-vxworks-thread.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/libgcc/config/gthr-vxworks-thread.c 
b/libgcc/config/gthr-vxworks-thread.c
index 17c60faba48f..31f291aca67e 100644
--- a/libgcc/config/gthr-vxworks-thread.c
+++ b/libgcc/config/gthr-vxworks-thread.c
@@ -33,6 +33,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If 
not, see
 
 #include 
 #include 
+#include 
 
 #define __TIMESPEC_TO_NSEC(timespec) \
   ((long long)timespec.tv_sec * 10 + (long long)timespec.tv_nsec)

[gcc r16-790] [testsuite] [vxworks] netinet includes atomic, reqs c++11

2025-05-21 Thread Alexandre Oliva via Gcc-cvs

https://gcc.gnu.org/g:659fe2a28e8cbaf4672d4db8ef3f13c6efed9c0c

commit r16-790-g659fe2a28e8cbaf4672d4db8ef3f13c6efed9c0c
Author: Alexandre Oliva 
Date:   Wed May 21 06:19:57 2025 -0300

[testsuite] [vxworks] netinet includes atomic, reqs c++11

On vxworks, the included netinet/in.h header indirectly includes
, that fails on C++ <11.  Skip the test.


for  gcc/testsuite/ChangeLog

* c-c++-common/analyzer/fd-glibc-byte-stream-socket.c: Skip on
vxworks with C++ < 11.

Diff:
---
 gcc/testsuite/c-c++-common/analyzer/fd-glibc-byte-stream-socket.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/gcc/testsuite/c-c++-common/analyzer/fd-glibc-byte-stream-socket.c 
b/gcc/testsuite/c-c++-common/analyzer/fd-glibc-byte-stream-socket.c
index fd57d3b0894a..2a44e452127c 100644
--- a/gcc/testsuite/c-c++-common/analyzer/fd-glibc-byte-stream-socket.c
+++ b/gcc/testsuite/c-c++-common/analyzer/fd-glibc-byte-stream-socket.c
@@ -5,6 +5,8 @@
 /* { dg-additional-options "-fno-exceptions" } */
 
 /* { dg-skip-if "" { hppa*-*-hpux* powerpc*-*-aix* } } */
+/* On vxworks, netinet/in.h indirectly includes atomic, that requires C++11.  
*/
+/* { dg-skip-if "" { *-*-vxworks* && { c++ && { ! c++11 } } } } */
 
 #include 
 #include

[gcc r16-794] [testsuite] [aarch64] match alt cache clear names in sme nonlocal_goto tests

2025-05-21 Thread Alexandre Oliva via Gcc-cvs

https://gcc.gnu.org/g:4b75decf5297838ef9ddfb842da0117adbe3f975

commit r16-794-g4b75decf5297838ef9ddfb842da0117adbe3f975
Author: Alexandre Oliva 
Date:   Wed May 21 06:20:22 2025 -0300

[testsuite] [aarch64] match alt cache clear names in sme nonlocal_goto tests

vxworks calls cacheTextUpdate instead of __clear_cache.

Adjust the sme/nonlocal_goto_*.c tests for inexact matches.


for  gcc/testsuite/ChangeLog

* gcc.target/aarch64/sme/nonlocal_goto_1.c: Match
vxworks cache-clearing function as well.
* gcc.target/aarch64/sme/nonlocal_goto_2.c: Likewise.
* gcc.target/aarch64/sme/nonlocal_goto_3.c: Likewise.

Diff:
---
 gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_1.c | 2 +-
 gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_2.c | 2 +-
 gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_3.c | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_1.c 
b/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_1.c
index 4e3869fcc9ee..572c17a06d7c 100644
--- a/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_1.c
@@ -13,7 +13,7 @@ void run(void (*)());
 ** ldr x16, \1
 ** tbz x16, 0, .*
 ** smstop  sm
-** bl  __clear_cache
+** bl  [^\n]*[cC]ache[^\n]*
 ** ldr x16, \1
 ** tbz x16, 0, .*
 ** smstart sm
diff --git a/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_2.c 
b/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_2.c
index 2a2db72c3a08..721a2b789863 100644
--- a/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_2.c
+++ b/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_2.c
@@ -7,7 +7,7 @@ void run(void (*)());
 ** foo:
 ** ...
 ** smstop  sm
-** bl  __clear_cache
+** bl  [^\n]*[cC]ache[^\n]*
 ** smstart sm
 ** add x0, .*
 ** smstop  sm
diff --git a/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_3.c 
b/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_3.c
index 022b04052c54..25db9283b169 100644
--- a/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_3.c
+++ b/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_3.c
@@ -9,7 +9,7 @@ void run(void (*)());
 ** smstart sm
 ** ...
 ** smstop  sm
-** bl  __clear_cache
+** bl  [^\n]*[cC]ache[^\n]*
 ** smstart sm
 ** add x0, .*
 ** smstop  sm

[gcc r16-795] [testsuite] [x86] double copysign requires -msse2

2025-05-21 Thread Alexandre Oliva via Gcc-cvs

https://gcc.gnu.org/g:02788cde86264559ca9cb3323c73c72fd0211c5d

commit r16-795-g02788cde86264559ca9cb3323c73c72fd0211c5d
Author: Alexandre Oliva 
Date:   Wed May 21 06:20:29 2025 -0300

[testsuite] [x86] double copysign requires -msse2

SSE_FLOAT_MODE_P only holds for DFmode with SSE2, and that's a
condition for copysign3 to be available under TARGET_SSE_MATH.

Various copysign testcases use -msse -mfpmath=sse on ia32 to enable
the copysign builtins and patterns, but that would only be enough if
the tests were limited to floats.  Since they test doubles as well, we
need -msse2 instead of -msse.


for  gcc/testsuite/ChangeLog

* gcc.dg/fold-copysign-1.c: Bump to sse2 on ia32.
* gcc.dg/pr55152-2.c: Likewise.
* gcc.dg/tree-ssa/abs-4.c: Likewise.
* gcc.dg/tree-ssa/backprop-6.c: Likewise.

Diff:
---
 gcc/testsuite/gcc.dg/fold-copysign-1.c | 2 +-
 gcc/testsuite/gcc.dg/pr55152-2.c   | 2 +-
 gcc/testsuite/gcc.dg/tree-ssa/abs-4.c  | 2 +-
 gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/fold-copysign-1.c 
b/gcc/testsuite/gcc.dg/fold-copysign-1.c
index 1f5141b1c5d6..b65c08bd9a08 100644
--- a/gcc/testsuite/gcc.dg/fold-copysign-1.c
+++ b/gcc/testsuite/gcc.dg/fold-copysign-1.c
@@ -1,6 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-O -fdump-tree-cddce1" } */
-/* { dg-additional-options "-msse -mfpmath=sse" { target { { i?86-*-* 
x86_64-*-* } && ilp32 } } } */
+/* { dg-additional-options "-msse2 -mfpmath=sse" { target { { i?86-*-* 
x86_64-*-* } && ilp32 } } } */
 /* { dg-additional-options "-mdouble=64" { target { avr-*-* } } } */
 
 double foo (double x)
diff --git a/gcc/testsuite/gcc.dg/pr55152-2.c b/gcc/testsuite/gcc.dg/pr55152-2.c
index 24068cffa4a8..7533ab4db601 100644
--- a/gcc/testsuite/gcc.dg/pr55152-2.c
+++ b/gcc/testsuite/gcc.dg/pr55152-2.c
@@ -1,6 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-O -ffinite-math-only -fno-signed-zeros -fstrict-overflow 
-fdump-tree-optimized" } */
-/* { dg-additional-options "-msse -mfpmath=sse" { target { { i?86-*-* 
x86_64-*-* } && ilp32 } } } */
+/* { dg-additional-options "-msse2 -mfpmath=sse" { target { { i?86-*-* 
x86_64-*-* } && ilp32 } } } */
 
 double g (double a)
 {
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/abs-4.c 
b/gcc/testsuite/gcc.dg/tree-ssa/abs-4.c
index 4144d1cd954a..f43018d0dff4 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/abs-4.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/abs-4.c
@@ -1,6 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-O1 -fdump-tree-optimized" } */
-/* { dg-additional-options "-msse -mfpmath=sse" { target { { i?86-*-* 
x86_64-*-* } && ilp32 } } } */
+/* { dg-additional-options "-msse2 -mfpmath=sse" { target { { i?86-*-* 
x86_64-*-* } && ilp32 } } } */
 /* PR tree-optimization/109829 */
 
 float abs_f(float x) { return __builtin_signbit(x) ? x : -x; }
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c 
b/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c
index dbde681e3832..efb53f17f861 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c
@@ -1,6 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-O -fdump-tree-backprop-details" }  */
-/* { dg-additional-options "-msse -mfpmath=sse" { target { { i?86-*-* 
x86_64-*-* } && ilp32 } } } */
+/* { dg-additional-options "-msse2 -mfpmath=sse" { target { { i?86-*-* 
x86_64-*-* } && ilp32 } } } */
 
 void start (void *);
 void end (void *);

[gcc r16-792] [testsuite] tolerate missing std::stold

2025-05-21 Thread Alexandre Oliva via Libstdc++-cvs

https://gcc.gnu.org/g:207534061fb026585d65af3cf863dc434ec36d7c

commit r16-792-g207534061fb026585d65af3cf863dc434ec36d7c
Author: Alexandre Oliva 
Date:   Wed May 21 06:20:11 2025 -0300

[testsuite] tolerate missing std::stold

basic_string.h doesn't define the non-w string version of std::stold
when certain conditions aren't met, and then a couple of tests fail to
compile.

Guard the portions of the tests that depend on std::stold with the
conditions for it to be defined.


for  libstdc++-v3/ChangeLog

* 
testsuite/21_strings/basic_string/numeric_conversions/char/stold.cc:
Guard non-wide stold calls with conditions for it to be
defined.
* 
testsuite/27_io/basic_ostream/inserters_arithmetic/char/hexfloat.cc:
Likewise.

Diff:
---
 .../21_strings/basic_string/numeric_conversions/char/stold.cc   | 6 ++
 .../27_io/basic_ostream/inserters_arithmetic/char/hexfloat.cc   | 6 ++
 2 files changed, 12 insertions(+)

diff --git 
a/libstdc++-v3/testsuite/21_strings/basic_string/numeric_conversions/char/stold.cc
 
b/libstdc++-v3/testsuite/21_strings/basic_string/numeric_conversions/char/stold.cc
index b64ad0c86834..dd777c4529a0 100644
--- 
a/libstdc++-v3/testsuite/21_strings/basic_string/numeric_conversions/char/stold.cc
+++ 
b/libstdc++-v3/testsuite/21_strings/basic_string/numeric_conversions/char/stold.cc
@@ -31,6 +31,11 @@
 void
 test01()
 {
+  /* If these conditions are not met, basic_string.h doesn't define
+ std::stold(const string&, size_t* = 0), and then the test would
+ fail to compile.  */
+#if (_GLIBCXX_HAVE_STRTOLD && ! _GLIBCXX_HAVE_BROKEN_STRTOLD) \
+  || __DBL_MANT_DIG__ == __LDBL_MANT_DIG__
   bool test = false;
   using namespace std;
 
@@ -106,6 +111,7 @@ test01()
   test = false;
 }
   VERIFY( test );
+#endif
 }
 
 int main()
diff --git 
a/libstdc++-v3/testsuite/27_io/basic_ostream/inserters_arithmetic/char/hexfloat.cc
 
b/libstdc++-v3/testsuite/27_io/basic_ostream/inserters_arithmetic/char/hexfloat.cc
index b1bc7fbb9d4e..f694730901ed 100644
--- 
a/libstdc++-v3/testsuite/27_io/basic_ostream/inserters_arithmetic/char/hexfloat.cc
+++ 
b/libstdc++-v3/testsuite/27_io/basic_ostream/inserters_arithmetic/char/hexfloat.cc
@@ -95,6 +95,11 @@ test01()
 void
 test02()
 {
+  /* If these conditions are not met, basic_string.h doesn't define
+ std::stold(const string&, size_t* = 0), and then the test would
+ fail to compile.  */
+#if (_GLIBCXX_HAVE_STRTOLD && ! _GLIBCXX_HAVE_BROKEN_STRTOLD) \
+  || __DBL_MANT_DIG__ == __LDBL_MANT_DIG__
   ostringstream os;
   long double d = 272.L; // 0x1.1p+8L;
   os << hexfloat << setprecision(1);
@@ -140,6 +145,7 @@ test02()
   cout << "got: " << os.str() << endl;
 #endif
   VERIFY( os && os.str() == "15" );
+#endif
 }
 
 int

[gcc r16-791] [testsuite] [analyzer] [vxworks] define __STDC_WANT_LIB_EXT1__ to 1

2025-05-21 Thread Alexandre Oliva via Gcc-cvs

https://gcc.gnu.org/g:f3c5e0a2091ddd5cae4d7381a847aac5f546f04c

commit r16-791-gf3c5e0a2091ddd5cae4d7381a847aac5f546f04c
Author: Alexandre Oliva 
Date:   Wed May 21 06:20:03 2025 -0300

[testsuite] [analyzer] [vxworks] define __STDC_WANT_LIB_EXT1__ to 1

vxworks' headers use #if instead of #ifdef to test for
__STDC_WANT_LIB_EXT1__, so the definition in the analyzer test
strotok-cppreference.c catches a bug there, but not something it's
meant to catch or that we could fix in GCC, so amend the definition to
sidestep the libc bug.


for  gcc/testsuite/ChangeLog

* c-c++-common/analyzer/strtok-cppreference.c
(__STDC_WANT_LIB_EXT1__): Define to 1.

Diff:
---
 gcc/testsuite/c-c++-common/analyzer/strtok-cppreference.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/c-c++-common/analyzer/strtok-cppreference.c 
b/gcc/testsuite/c-c++-common/analyzer/strtok-cppreference.c
index a396c643f116..96117276ffc3 100644
--- a/gcc/testsuite/c-c++-common/analyzer/strtok-cppreference.c
+++ b/gcc/testsuite/c-c++-common/analyzer/strtok-cppreference.c
@@ -13,7 +13,7 @@
 
 /* { dg-additional-options " -Wno-analyzer-too-complex 
-Wno-analyzer-symbol-too-complex" } */
 
-#define __STDC_WANT_LIB_EXT1__ 0
+#define __STDC_WANT_LIB_EXT1__ 1
 #include 
 #include

[gcc r16-793] [testsuite] [aarch64] use uint64_t in rwsr tests

2025-05-21 Thread Alexandre Oliva via Gcc-cvs

https://gcc.gnu.org/g:d41028df8b39087ef71de84a5daf68cb305b9f7f

commit r16-793-gd41028df8b39087ef71de84a5daf68cb305b9f7f
Author: Alexandre Oliva 
Date:   Wed May 21 06:20:17 2025 -0300

[testsuite] [aarch64] use uint64_t in rwsr tests

stdint.h defines uint64_t instead of __uint64_t, so use the former.
__uint64_t is not available on e.g. vxworks.


for  gcc/testsuite/ChangeLog

* gcc.target/aarch64/acle/rwsr.c: Use uint64_t.
* gcc.target/aarch64/acle/rwsr-2.c: Likewise.

Diff:
---
 gcc/testsuite/gcc.target/aarch64/acle/rwsr-2.c | 4 ++--
 gcc/testsuite/gcc.target/aarch64/acle/rwsr.c   | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/gcc/testsuite/gcc.target/aarch64/acle/rwsr-2.c 
b/gcc/testsuite/gcc.target/aarch64/acle/rwsr-2.c
index cca88924043e..5527297e6277 100644
--- a/gcc/testsuite/gcc.target/aarch64/acle/rwsr-2.c
+++ b/gcc/testsuite/gcc.target/aarch64/acle/rwsr-2.c
@@ -9,14 +9,14 @@
 void
 test_leading_zeros ()
 {
-  __uint64_t b = __arm_rsr64 ("S1_2_C03_C04_5"); /* { dg-error "invalid system 
register name 's1_2_c03_c04_5'" } */
+  uint64_t b = __arm_rsr64 ("S1_2_C03_C04_5"); /* { dg-error "invalid system 
register name 's1_2_c03_c04_5'" } */
   __arm_wsr64 ("S1_2_C03_C04_5", b); /* { dg-error "invalid system register 
name 's1_2_c03_c04_5'" } */
 }
 
 void
 test_bounds ()
 {
-  __uint64_t b;
+  uint64_t b;
   b = __arm_rsr64 ("s4_2_c3_c4_5"); /* { dg-error "invalid system register 
name 's4_2_c3_c4_5'" } */
   b = __arm_rsr64 ("s1_8_c3_c4_5"); /* { dg-error "invalid system register 
name 's1_8_c3_c4_5'" } */
   b = __arm_rsr64 ("s1_2_c16_c4_5"); /* { dg-error "invalid system register 
name 's1_2_c16_c4_5'" } */
diff --git a/gcc/testsuite/gcc.target/aarch64/acle/rwsr.c 
b/gcc/testsuite/gcc.target/aarch64/acle/rwsr.c
index 6feb0bef2d6f..f63eb43bf7ee 100644
--- a/gcc/testsuite/gcc.target/aarch64/acle/rwsr.c
+++ b/gcc/testsuite/gcc.target/aarch64/acle/rwsr.c
@@ -171,6 +171,6 @@ set_wsrf64 (double a)
 */
 void set_custom ()
 {
-  __uint64_t b = __arm_rsr64 ("S1_2_C3_C4_5");
+  uint64_t b = __arm_rsr64 ("S1_2_C3_C4_5");
   __arm_wsr64 ("S1_2_C3_C4_5", b);
 }

[gcc r16-797] [testsuite] [x86] strlenopt-80 needs -msse2 on ia32

2025-05-21 Thread Alexandre Oliva via Gcc-cvs

https://gcc.gnu.org/g:fe9be609d7a10f2cd706aecd772e3e3427868daf

commit r16-797-gfe9be609d7a10f2cd706aecd772e3e3427868daf
Author: Alexandre Oliva 
Date:   Wed May 21 06:20:37 2025 -0300

[testsuite] [x86] strlenopt-80 needs -msse2 on ia32

The string length optimizations at 8-byte blocks requires -msse2;
-msse is not enough.  Bump it.


for  gcc/testsuite/ChangeLog

* gcc.dg/strlenopt-80.c: Bump to -msse2.

Diff:
---
 gcc/testsuite/gcc.dg/strlenopt-80.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.dg/strlenopt-80.c 
b/gcc/testsuite/gcc.dg/strlenopt-80.c
index 63d4eb17e4c3..0b16a4142366 100644
--- a/gcc/testsuite/gcc.dg/strlenopt-80.c
+++ b/gcc/testsuite/gcc.dg/strlenopt-80.c
@@ -6,7 +6,7 @@
{ dg-do compile { target { { aarch64*-*-* i?86-*-* x86_64-*-* } || { { 
powerpc*-*-* } && lp64 } } } }
 
{ dg-options "-O2 -Wall -fdump-tree-optimized" }
-   { dg-additional-options "-msse" { target i?86-*-* x86_64-*-* } } */
+   { dg-additional-options "-msse2" { target i?86-*-* x86_64-*-* } } */
 
 /* On powerpc configurations that have -mstrict-align by default,
the memcpy calls for ncpylog >= 3 are not turned into MEM_REFs.

[gcc r16-800] [testsuite] [x86] no-callee-saved-16.c needs -fomit-frame-pointer

2025-05-21 Thread Alexandre Oliva via Gcc-cvs

https://gcc.gnu.org/g:012a857d1eb5b45baee8752e3b5a434fa25c52e2

commit r16-800-g012a857d1eb5b45baee8752e3b5a434fa25c52e2
Author: Alexandre Oliva 
Date:   Wed May 21 06:20:54 2025 -0300

[testsuite] [x86] no-callee-saved-16.c needs -fomit-frame-pointer

If the toolchain is built with --enable-frame-pointer,
gcc.target/i386/no-callee-saved-16.c will not get the expected
optimization without -fomit-frame-pointer, that would be enabled by
-O2 without the configure flag.  Add it.


for  gcc/testsuite/ChangeLog

* gcc.target/i386/no-callee-saved-16.c: Add -fomit-frame-pointer.

Diff:
---
 gcc/testsuite/gcc.target/i386/no-callee-saved-16.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.target/i386/no-callee-saved-16.c 
b/gcc/testsuite/gcc.target/i386/no-callee-saved-16.c
index 112d1764f3e1..a5589e21ab3b 100644
--- a/gcc/testsuite/gcc.target/i386/no-callee-saved-16.c
+++ b/gcc/testsuite/gcc.target/i386/no-callee-saved-16.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -mtune-ctrl=^prologue_using_move,^epilogue_using_move" } 
*/
+/* { dg-options "-O2 -fomit-frame-pointer 
-mtune-ctrl=^prologue_using_move,^epilogue_using_move" } */
 
 typedef void (*fn_t) (void) __attribute__ ((no_callee_saved_registers));

[gcc r16-802] [testsuite] [x86] pr31985.c needs -fomit-frame-pointer to match movl count

2025-05-21 Thread Alexandre Oliva via Gcc-cvs

https://gcc.gnu.org/g:6621311c309fcc68ecdd395bceb9ad7994bed41f

commit r16-802-g6621311c309fcc68ecdd395bceb9ad7994bed41f
Author: Alexandre Oliva 
Date:   Wed May 21 06:21:04 2025 -0300

[testsuite] [x86] pr31985.c needs -fomit-frame-pointer to match movl count

On an --enable-frame-pointer toolchain, pr31985.c gets an extra movl
and fails.  Enable -fomit-frame-pointer explicitly.


for  gcc/testsuite/ChangeLog

* gcc.target/i386/pr31985.c: Add -fomit-frame-pointer.

Diff:
---
 gcc/testsuite/gcc.target/i386/pr31985.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.target/i386/pr31985.c 
b/gcc/testsuite/gcc.target/i386/pr31985.c
index a6de1b5b1431..a0a91116242c 100644
--- a/gcc/testsuite/gcc.target/i386/pr31985.c
+++ b/gcc/testsuite/gcc.target/i386/pr31985.c
@@ -1,5 +1,5 @@
 /* { dg-do compile { target ia32 } } */
-/* { dg-options "-O2" } */
+/* { dg-options "-O2 -fomit-frame-pointer" } */
 
 void test_c (unsigned int a, unsigned int b, unsigned int c, unsigned int d)
 {

[gcc r16-801] [testsuite] [x86] pr108938-3.c needs -msse2 for bswap in foo2 with -m32

2025-05-21 Thread Alexandre Oliva via Gcc-cvs

https://gcc.gnu.org/g:3392849511c9b9eb1d912a547f2441cece766e3b

commit r16-801-g3392849511c9b9eb1d912a547f2441cece766e3b
Author: Alexandre Oliva 
Date:   Wed May 21 06:20:59 2025 -0300

[testsuite] [x86] pr108938-3.c needs -msse2 for bswap in foo2 with -m32

Without SSE2, we don't combine the separate loads in foo2 and get
separate rotates, instead of a bswap.


for  gcc/testsuite/ChangeLog

* gcc.target/i386/pr108938-3.c: Add -msse2.

Diff:
---
 gcc/testsuite/gcc.target/i386/pr108938-3.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.target/i386/pr108938-3.c 
b/gcc/testsuite/gcc.target/i386/pr108938-3.c
index 757a0c456bc4..47293d49bb9e 100644
--- a/gcc/testsuite/gcc.target/i386/pr108938-3.c
+++ b/gcc/testsuite/gcc.target/i386/pr108938-3.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize -mno-movbe -mno-avx" } */
+/* { dg-options "-O2 -ftree-vectorize -mno-movbe -msse2 -mno-avx" } */
 /* { dg-final { scan-assembler-times "bswap\[\t ]+" 2 { target { ! ia32 } } } 
} */
 /* { dg-final { scan-assembler-times "bswap\[\t ]+" 3 { target ia32 } } } */

[gcc r16-803] [testsuite] [x86] vect-simd-clone-1[678]e.c adjust

2025-05-21 Thread Alexandre Oliva via Gcc-cvs

https://gcc.gnu.org/g:e82a9f6536ba8fcf50a7172650a86519c50aeabd

commit r16-803-ge82a9f6536ba8fcf50a7172650a86519c50aeabd
Author: Alexandre Oliva 
Date:   Wed May 21 06:21:08 2025 -0300

[testsuite] [x86] vect-simd-clone-1[678]e.c adjust

Since r13-6296, we haven't got 4 simdclone calls for these tests on
ia32 without avx_runtime.  With avx_runtime, we get 3 such calls even
on ia32, but we didn't test for anything on ia32 with avx_runtime.
Adjust and simplify the expectations and comments.


for  gcc/testsuite/ChangeLog

* gcc.dg/vect/vect-simd-clone-16e.c: Expect fewer calls on ia32.
* gcc.dg/vect/vect-simd-clone-17e.c: Likewise.
* gcc.dg/vect/vect-simd-clone-18e.c: Likewise.

Diff:
---
 gcc/testsuite/gcc.dg/vect/vect-simd-clone-16e.c | 8 +++-
 gcc/testsuite/gcc.dg/vect/vect-simd-clone-17e.c | 8 +++-
 gcc/testsuite/gcc.dg/vect/vect-simd-clone-18e.c | 8 +++-
 3 files changed, 9 insertions(+), 15 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16e.c 
b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16e.c
index f80b0e0581e3..2f7cdfb22119 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16e.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16e.c
@@ -6,11 +6,9 @@
 #include "vect-simd-clone-16.c"
 
 /* Ensure the the in-branch simd clones are used on targets that support them.
-   Some targets use another call for the epilogue loops.
-   Some targets use pairs of vectors and do twice the calls.  */
-/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" 
{ target { { ! avx_runtime } && { ! { { i?86-*-* x86_64-*-* } && { ! lp64 } } } 
} } } } */
-/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 3 "vect" 
{ target { avx_runtime && { ! { { i?86-*-* x86_64-*-* } && { ! lp64 } } } } } } 
} */
-/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 4 "vect" 
{ target { { ! avx_runtime } && { { i?86-*-* x86_64-*-* } && { ! lp64 } } } } } 
} */
+   Some targets use another call for the epilogue loops.  */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" 
{ target { ! avx_runtime } } } } */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 3 "vect" 
{ target avx_runtime } } } */
 
 /* The LTO test produces two dump files and we scan the wrong one.  */
 /* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17e.c 
b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17e.c
index c7c510b8a6ab..8f10aff3b897 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17e.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17e.c
@@ -6,11 +6,9 @@
 #include "vect-simd-clone-17.c"
 
 /* Ensure the the in-branch simd clones are used on targets that support them.
-   Some targets use another call for the epilogue loops.
-   Some targets use pairs of vectors and do twice the calls.  */
-/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" 
{ target { { ! avx_runtime } && { ! { { i?86-*-* x86_64-*-* } && { ! lp64 } } } 
} } } } */
-/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 3 "vect" 
{ target { avx_runtime && { ! { { i?86-*-* x86_64-*-* } && { ! lp64 } } } } } } 
} */
-/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 4 "vect" 
{ target { { ! avx_runtime } && { { i?86-*-* x86_64-*-* } && { ! lp64 } } } } } 
} */
+   Some targets use another call for the epilogue loops.  */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" 
{ target { ! avx_runtime } } } } */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 3 "vect" 
{ target avx_runtime } } } */
 
 /* The LTO test produces two dump files and we scan the wrong one.  */
 /* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18e.c 
b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18e.c
index e00c3d78038b..142fcc8b0b55 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18e.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18e.c
@@ -6,11 +6,9 @@
 #include "vect-simd-clone-18.c"
 
 /* Ensure the the in-branch simd clones are used on targets that support them.
-   Some targets use another call for the epilogue loops.
-   Some targets use pairs of vectors and do twice the calls.  */
-/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" 
{ target { { ! avx_runtime } && { ! { { i?86-*-* x86_64-*-* } && { ! lp64 } } } 
} } } } */
-/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 3 "vect" 
{ target { avx_runtime && { ! { { i?86-*-* x86_64-*-* } && { ! lp64 } } } } } } 
} */
-/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 4 "vect" 
{ target { { ! avx_runtime } && { { i?86-*-* x86_64-*-* } && { ! lp64 } } } } } 
} */
+   Some targets use another call for the epilogue loops.  */
+/* { dg-final { scan-

[gcc r16-796] [testsuite] [x86] memcpy-6 needs -msse2

2025-05-21 Thread Alexandre Oliva via Gcc-cvs

https://gcc.gnu.org/g:f3a758e9a2b2aa40bda68a18157996167d772e4d

commit r16-796-gf3a758e9a2b2aa40bda68a18157996167d772e4d
Author: Alexandre Oliva 
Date:   Wed May 21 06:20:33 2025 -0300

[testsuite] [x86] memcpy-6 needs -msse2

The 8-byte memory operations will only be inlined on ia32 with
-msse2.  Bump it.


for  gcc/testsuite/ChangeLog

* gcc.dg/memcpy-6.c: Bump to -msse2.

Diff:
---
 gcc/testsuite/gcc.dg/memcpy-6.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.dg/memcpy-6.c b/gcc/testsuite/gcc.dg/memcpy-6.c
index d4df03903c35..49aec338d2f2 100644
--- a/gcc/testsuite/gcc.dg/memcpy-6.c
+++ b/gcc/testsuite/gcc.dg/memcpy-6.c
@@ -7,7 +7,7 @@
{ dg-do compile }
{ dg-options "-O0 -Wrestrict -fdump-tree-optimized" }
{ dg-skip-if "skip non-x86 targets" { ! { i?86-*-* x86_64-*-* } } }
-   { dg-additional-options "-msse" { target i?86-*-* x86_64-*-* } } */
+   { dg-additional-options "-msse2" { target i?86-*-* x86_64-*-* } } */
 
 char a[32];

[gcc r16-798] [testsuite] [x86] forwprop-41 needs -msse

2025-05-21 Thread Alexandre Oliva via Gcc-cvs

https://gcc.gnu.org/g:8bb72b737c38adb08bfefabc43cb4f25f7d9e95d

commit r16-798-g8bb72b737c38adb08bfefabc43cb4f25f7d9e95d
Author: Alexandre Oliva 
Date:   Wed May 21 06:20:42 2025 -0300

[testsuite] [x86] forwprop-41 needs -msse

The vector operations are only turned into BIT_INSERT_EXPR with -msse
on ia32.


for  gcc/testsuite/ChangeLog

* gcc.dg/tree-ssa/forwprop-41.c: Add -msse on x86.

Diff:
---
 gcc/testsuite/gcc.dg/tree-ssa/forwprop-41.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/forwprop-41.c 
b/gcc/testsuite/gcc.dg/tree-ssa/forwprop-41.c
index a1f08289dd69..1c5b500deb15 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/forwprop-41.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/forwprop-41.c
@@ -1,5 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-O2 -fdump-tree-optimized -Wno-psabi -w" } */
+/* { dg-additional-options "-msse" { target i?86-*-* x86_64-*-* } } */
 
 #define vector __attribute__((__vector_size__(16) ))

[gcc r16-799] [testsuite] add missing require vect_early_break_hw for vect-tsvc

2025-05-21 Thread Alexandre Oliva via Gcc-cvs

https://gcc.gnu.org/g:57cc4f8bf31f8c25fd338b18b5e709d77cc2a0ad

commit r16-799-g57cc4f8bf31f8c25fd338b18b5e709d77cc2a0ad
Author: Alexandre Oliva 
Date:   Wed May 21 06:20:48 2025 -0300

[testsuite] add missing require vect_early_break_hw for vect-tsvc

Some tsvc tests add vect_early_break options without requiring the
feature to be available.  Add the requirements.


for  gcc/testsuite/ChangeLog

* gcc.dg/vect/tsvc/vect-tsvc-s332.c: Require vect_early_break_hw.
* gcc.dg/vect/tsvc/vect-tsvc-s481.c: Likewise.
* gcc.dg/vect/tsvc/vect-tsvc-s482.c: Likewise.

Diff:
---
 gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s332.c | 1 +
 gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s481.c | 1 +
 gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s482.c | 1 +
 3 files changed, 3 insertions(+)

diff --git a/gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s332.c 
b/gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s332.c
index 0d55d0dd67c3..21a9c5a6b2b6 100644
--- a/gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s332.c
+++ b/gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s332.c
@@ -3,6 +3,7 @@
 
 /* { dg-additional-options "--param vect-epilogues-nomask=0" } */
 /* { dg-require-effective-target vect_float } */
+/* { dg-require-effective-target vect_early_break_hw } */
 /* { dg-add-options vect_early_break } */
 
 #include "tsvc.h"
diff --git a/gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s481.c 
b/gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s481.c
index 5539f0f08411..e4433385d668 100644
--- a/gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s481.c
+++ b/gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s481.c
@@ -3,6 +3,7 @@
 
 /* { dg-additional-options "--param vect-epilogues-nomask=0" } */
 /* { dg-require-effective-target vect_float } */
+/* { dg-require-effective-target vect_early_break_hw } */
 /* { dg-add-options vect_early_break } */
 
 #include "tsvc.h"
diff --git a/gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s482.c 
b/gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s482.c
index 73bed5d4c57a..146df409ecc6 100644
--- a/gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s482.c
+++ b/gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s482.c
@@ -3,6 +3,7 @@
 
 /* { dg-additional-options "--param vect-epilogues-nomask=0" } */
 /* { dg-require-effective-target vect_float } */
+/* { dg-require-effective-target vect_early_break_hw } */
 /* { dg-add-options vect_early_break } */
 
 #include "tsvc.h"

[gcc r16-775] c++, coroutines: Clean up the ramp cleanups.

2025-05-21 Thread Iain D Sandoe via Gcc-cvs

https://gcc.gnu.org/g:18df4a10bc96946401218019ec566d867238b3e4

commit r16-775-g18df4a10bc96946401218019ec566d867238b3e4
Author: Iain Sandoe 
Date:   Mon May 12 20:38:48 2025 +0100

c++, coroutines: Clean up the ramp cleanups.

This replaces the cleanup try-catch block in the ramp with a series of
eh-only cleanup statements.

gcc/cp/ChangeLog:

* coroutines.cc
(cp_coroutine_transform::build_ramp_function): Replace ramp
cleanup try-catch block with eh-only cleanup statements.

Signed-off-by: Iain Sandoe 

Diff:
---
 gcc/cp/coroutines.cc | 199 ---
 1 file changed, 62 insertions(+), 137 deletions(-)

diff --git a/gcc/cp/coroutines.cc b/gcc/cp/coroutines.cc
index 5c4133a42b7e..a62099622288 100644
--- a/gcc/cp/coroutines.cc
+++ b/gcc/cp/coroutines.cc
@@ -4866,39 +4866,6 @@ cp_coroutine_transform::build_ramp_function ()
   coro_fp = pushdecl (coro_fp);
   add_decl_expr (coro_fp);
 
-  tree coro_promise_live = NULL_TREE;
-  if (flag_exceptions)
-{
-  /* Signal that we need to clean up the promise object on exception.  */
-  coro_promise_live
-   = coro_build_and_push_artificial_var (loc, "_Coro_promise_live",
- boolean_type_node, orig_fn_decl,
- boolean_false_node);
-
-  /* To signal that we need to cleanup copied function args.  */
-  if (DECL_ARGUMENTS (orig_fn_decl))
-   for (tree arg = DECL_ARGUMENTS (orig_fn_decl); arg != NULL;
-arg = DECL_CHAIN (arg))
- {
-   param_info *parm_i = param_uses.get (arg);
-   if (parm_i->trivial_dtor)
- continue;
-   parm_i->guard_var = pushdecl (parm_i->guard_var);
-   add_decl_expr (parm_i->guard_var);
- }
-}
-
-  /* deref the frame pointer, to use in member access code.  */
-  tree deref_fp
-= cp_build_indirect_ref (loc, coro_fp, RO_UNARY_STAR,
-tf_warning_or_error);
-  tree frame_needs_free
-= coro_build_and_push_artificial_var_with_dve (loc,
-  coro_frame_needs_free_id,
-  boolean_type_node,
-  orig_fn_decl, NULL_TREE,
-  deref_fp);
-
   /* Build the frame.  */
 
   /* The CO_FRAME internal function is a mechanism to allow the middle end
@@ -4942,25 +4909,23 @@ cp_coroutine_transform::build_ramp_function ()
   finish_if_stmt (if_stmt);
 }
 
+  /* Dereference the frame pointer, to use in member access code.  */
+  tree deref_fp
+= cp_build_indirect_ref (loc, coro_fp, RO_UNARY_STAR, tf_warning_or_error);
+
   /* For now, once allocation has succeeded we always assume that this needs
  destruction, there's no impl. for frame allocation elision.  */
-  r = cp_build_init_expr (frame_needs_free, boolean_true_node);
-  finish_expr_stmt (r);
-
-  /* Set up the promise.  */
-  tree p
-= coro_build_and_push_artificial_var_with_dve (loc, coro_promise_id,
-  promise_type, orig_fn_decl,
-  NULL_TREE, deref_fp);
+  tree frame_needs_free
+= coro_build_and_push_artificial_var_with_dve (loc,
+  coro_frame_needs_free_id,
+  boolean_type_node,
+  orig_fn_decl,
+  boolean_true_node,
+  deref_fp);
+  /* Although it appears to be unused here the frame entry is needed and we
+ just set it true.  */
+  TREE_USED (frame_needs_free) = true;
 
-  /* Up to now any exception thrown will propagate directly to the caller.
- This is OK since the only source of such exceptions would be in allocation
- of the coroutine frame, and therefore the ramp will not have initialized
- any further state.  From here, we will track state that needs explicit
- destruction in the case that promise or g.r.o setup fails or an exception
- is thrown from the initial suspend expression.  */
-  tree ramp_try_block = NULL_TREE;
-  tree ramp_try_stmts = NULL_TREE;
   tree iarc_x = NULL_TREE;
   tree coro_before_return = NULL_TREE;
   if (flag_exceptions)
@@ -4976,8 +4941,15 @@ cp_coroutine_transform::build_ramp_function ()
   orig_fn_decl,
   boolean_false_node,
   deref_fp);
-  ramp_try_block = begin_try_block ();
-  ramp_try_stmts = begin_compound_stmt (BCS_TRY_BLOCK);
+  tree frame_cleanup = push_stmt_list ();
+  tree do_fr_cleanup
+   = build1_l

[gcc r16-774] c++, coroutines: Use decltype(auto) for the g_r_o.

2025-05-21 Thread Iain D Sandoe via Gcc-cvs

https://gcc.gnu.org/g:e71a6e002c6650a7a7be99277120d3e59ecb78a3

commit r16-774-ge71a6e002c6650a7a7be99277120d3e59ecb78a3
Author: Iain Sandoe 
Date:   Sun May 11 20:36:58 2025 +0100

c++, coroutines: Use decltype(auto) for the g_r_o.

The revised wording for coroutines, uses decltype(auto) for the
type of the get return object, which preserves references.

It is quite reasonable for a  coroutine body implementation to
complete before control is returned to the ramp - and in that
case we would be creating the ramp return object from an already-
deleted promise object.

Jason observes that this is a terrible situation and we should
seek a resolution to it via core.

Since the test added here explicitly performs the unsafe action
dscribed above we expect it to fail (until a resolution is found).

gcc/cp/ChangeLog:

* coroutines.cc
(cp_coroutine_transform::build_ramp_function): Use
decltype(auto) to determine the type of the temporary
get_return_object.

gcc/testsuite/ChangeLog:

* g++.dg/coroutines/pr115908.C: Count promise construction
and destruction. Run the test and XFAIL it.

Signed-off-by: Iain Sandoe 

Diff:
---
 gcc/cp/coroutines.cc   | 12 +++--
 gcc/testsuite/g++.dg/coroutines/pr115908.C | 86 ++
 2 files changed, 72 insertions(+), 26 deletions(-)

diff --git a/gcc/cp/coroutines.cc b/gcc/cp/coroutines.cc
index bc5fb9381dbe..5c4133a42b7e 100644
--- a/gcc/cp/coroutines.cc
+++ b/gcc/cp/coroutines.cc
@@ -5120,8 +5120,11 @@ cp_coroutine_transform::build_ramp_function ()
   /* Check for a bad get return object type.
  [dcl.fct.def.coroutine] / 7 requires:
  The expression promise.get_return_object() is used to initialize the
- returned reference or prvalue result object ... */
-  tree gro_type = TREE_TYPE (get_ro);
+ returned reference or prvalue result object ...
+ When we use a local to hold this, it is decltype(auto).  */
+  tree gro_type
+= finish_decltype_type (get_ro, /*id_expression_or_member_access_p*/false,
+   tf_warning_or_error);
   if (VOID_TYPE_P (gro_type) && !void_ramp_p)
 {
   error_at (fn_start, "no viable conversion from % provided by"
@@ -5159,7 +5162,7 @@ cp_coroutine_transform::build_ramp_function ()
= coro_build_and_push_artificial_var (loc, "_Coro_gro", gro_type,
  orig_fn_decl, NULL_TREE);
 
-  r = cp_build_init_expr (coro_gro, get_ro);
+  r = cp_build_init_expr (coro_gro, STRIP_REFERENCE_REF (get_ro));
   finish_expr_stmt (r);
   tree coro_gro_cleanup
= cxx_maybe_build_cleanup (coro_gro, tf_warning_or_error);
@@ -5181,7 +5184,8 @@ cp_coroutine_transform::build_ramp_function ()
   /* The ramp is done, we just need the return statement, which we build from
  the return object we constructed before we called the function body.  */
 
-  finish_return_stmt (void_ramp_p ? NULL_TREE : coro_gro);
+  r = void_ramp_p ? NULL_TREE : convert_from_reference (coro_gro);
+  finish_return_stmt (r);
 
   if (flag_exceptions)
 {
diff --git a/gcc/testsuite/g++.dg/coroutines/pr115908.C 
b/gcc/testsuite/g++.dg/coroutines/pr115908.C
index ac27d916de2b..a40cece11438 100644
--- a/gcc/testsuite/g++.dg/coroutines/pr115908.C
+++ b/gcc/testsuite/g++.dg/coroutines/pr115908.C
@@ -1,3 +1,16 @@
+// { dg-do run }
+
+// With the changes to deal with CWG2563 (and PR119916) we now use the
+// referenced promise in the return expression.  It is quite reasonable
+// for a body implementation to complete before control is returned to
+// the ramp - and in that case we would be creating the ramp return object
+// from an already-deleted promise object.
+// This is recognised to be a poor situation and resolution via a core
+// issue is planned.
+
+// In this test we explicitly trigger the circumstance mentioned above.
+// { dg-xfail-run-if "" { *-*-* } }
+
 #include 
 
 #ifdef OUTPUT
@@ -6,23 +19,25 @@
 
 struct Promise;
 
-bool promise_live = false;
+int promise_life = 0;
 
 struct Handle : std::coroutine_handle {
+
 Handle(Promise &p) : 
std::coroutine_handle(Handle::from_promise(p)) {
-if (!promise_live)
-  __builtin_abort ();
 #ifdef OUTPUT
-std::cout << "Handle(Promise &)\n";
+std::cout << "Handle(Promise &) " << promise_life << std::endl;
 #endif
-}
-Handle(Promise &&p) : 
std::coroutine_handle(Handle::from_promise(p)) {
-if (!promise_live)
+ if (promise_life <= 0)
   __builtin_abort ();
+   }
+
+Handle(Promise &&p) : 
std::coroutine_handle(Handle::from_promise(p)) {
 #ifdef OUTPUT
-std::cout << "Handle(Promise &&)\n";
+std::cout << "Handle(Promise &&) "  << promise_life  << std::endl;
 #endif
-}
+ if (promise_life <= 0)
+  __builtin_abort ();
+   }
 
 using pro

[gcc r16-773] c++, coroutines: Address CWG2563 return value init [PR119916].

2025-05-21 Thread Iain D Sandoe via Gcc-cvs

https://gcc.gnu.org/g:e06555a40c051d5062405b02f93b89b01a397f97

commit r16-773-ge06555a40c051d5062405b02f93b89b01a397f97
Author: Iain Sandoe 
Date:   Mon May 12 19:47:42 2025 +0100

c++, coroutines: Address CWG2563 return value init [PR119916].

This addresses the clarification that, when the get_return_object is of a
different type from the ramp return, any necessary conversions should be
performed on the return expression (so that they typically occur after the
function body has started execution).

PR c++/119916

gcc/cp/ChangeLog:

* coroutines.cc
(cp_coroutine_transform::wrap_original_function_body): Do not
initialise initial_await_resume_called here...
(cp_coroutine_transform::build_ramp_function): ... but here.
When the coroutine is not void, initialize a GRO object from
promise.get_return_object().  Use this as the argument to the
return expression.  Use a regular cleanup for the GRO, since
it is ramp-local.

gcc/testsuite/ChangeLog:

* 
g++.dg/coroutines/torture/special-termination-00-sync-completion.C:
Amend for CWG2563 expected behaviour.
* g++.dg/coroutines/torture/special-termination-01-self-destruct.C:
Likewise.
* g++.dg/coroutines/torture/pr119916.C: New test.

Signed-off-by: Iain Sandoe 

Diff:
---
 gcc/cp/coroutines.cc   | 126 +++--
 gcc/testsuite/g++.dg/coroutines/torture/pr119916.C |  66 +++
 .../special-termination-00-sync-completion.C   |   2 +-
 .../torture/special-termination-01-self-destruct.C |   2 +-
 4 files changed, 109 insertions(+), 87 deletions(-)

diff --git a/gcc/cp/coroutines.cc b/gcc/cp/coroutines.cc
index 743da068e352..bc5fb9381dbe 100644
--- a/gcc/cp/coroutines.cc
+++ b/gcc/cp/coroutines.cc
@@ -4451,7 +4451,7 @@ cp_coroutine_transform::wrap_original_function_body ()
   tree i_a_r_c
= coro_build_artificial_var (loc, coro_frame_i_a_r_c_id,
 boolean_type_node, orig_fn_decl,
-boolean_false_node);
+NULL_TREE);
   DECL_CHAIN (i_a_r_c) = var_list;
   var_list = i_a_r_c;
   add_decl_expr (i_a_r_c);
@@ -4867,7 +4867,6 @@ cp_coroutine_transform::build_ramp_function ()
   add_decl_expr (coro_fp);
 
   tree coro_promise_live = NULL_TREE;
-  tree coro_gro_live = NULL_TREE;
   if (flag_exceptions)
 {
   /* Signal that we need to clean up the promise object on exception.  */
@@ -4876,13 +4875,6 @@ cp_coroutine_transform::build_ramp_function ()
  boolean_type_node, orig_fn_decl,
  boolean_false_node);
 
-  /* When the get-return-object is in the RETURN slot, we need to arrange
-for cleanup on exception.  */
-  coro_gro_live
-   = coro_build_and_push_artificial_var (loc, "_Coro_gro_live",
- boolean_type_node, orig_fn_decl,
- boolean_false_node);
-
   /* To signal that we need to cleanup copied function args.  */
   if (DECL_ARGUMENTS (orig_fn_decl))
for (tree arg = DECL_ARGUMENTS (orig_fn_decl); arg != NULL;
@@ -4970,13 +4962,19 @@ cp_coroutine_transform::build_ramp_function ()
   tree ramp_try_block = NULL_TREE;
   tree ramp_try_stmts = NULL_TREE;
   tree iarc_x = NULL_TREE;
+  tree coro_before_return = NULL_TREE;
   if (flag_exceptions)
 {
+  coro_before_return
+   = coro_build_and_push_artificial_var (loc, "_Coro_before_return",
+ boolean_type_node, orig_fn_decl,
+ boolean_true_node);
   iarc_x
= coro_build_and_push_artificial_var_with_dve (loc,
   coro_frame_i_a_r_c_id,
   boolean_type_node,
-  orig_fn_decl, NULL_TREE,
+  orig_fn_decl,
+  boolean_false_node,
   deref_fp);
   ramp_try_block = begin_try_block ();
   ramp_try_stmts = begin_compound_stmt (BCS_TRY_BLOCK);
@@ -5136,90 +5134,54 @@ cp_coroutine_transform::build_ramp_function ()
 (loc, coro_resume_index_id, short_unsigned_type_node,  orig_fn_decl,
  build_zero_cst (short_unsigned_type_node), deref_fp);
 
-  if (flag_exceptions && iarc_x)
-{
-  r = cp_build_init_expr (iarc_x, boolean_false_node);
-  finish_expr_stmt (r);
-}
-
-  /* Used for return objects in the RESULT slot.  */
-  tree ret_val_dtor = NULL_TREE;
-  tree retval = NULL_TREE;
+  /* We must ma

[gcc r16-779] genemit: Use references rather than pointers

2025-05-21 Thread Richard Sandiford via Gcc-cvs

https://gcc.gnu.org/g:9b57e38e0ef26192ebb0e9e326ab3a9df06ee275

commit r16-779-g9b57e38e0ef26192ebb0e9e326ab3a9df06ee275
Author: Richard Sandiford 
Date:   Wed May 21 10:01:27 2025 +0100

genemit: Use references rather than pointers

This patch makes genemit.cc pass the md_rtx_info around by constant
reference rather than pointer.  It's somewhat of a cosmetic change
on its own, but it makes later changes less noisy.

gcc/
* genemit.cc (gen_exp): Make the info argument a constant reference.
(gen_emit_seq, gen_insn, gen_expand, gen_split): Likewise.
(output_add_clobbers): Likewise.
(main): Update calls accordingly.

Diff:
---
 gcc/genemit.cc | 60 +-
 1 file changed, 30 insertions(+), 30 deletions(-)

diff --git a/gcc/genemit.cc b/gcc/genemit.cc
index 9f92364d9062..cb4ae47294da 100644
--- a/gcc/genemit.cc
+++ b/gcc/genemit.cc
@@ -80,8 +80,8 @@ gen_rtx_scratch (rtx x, enum rtx_code subroutine_type, FILE 
*file)
substituting any operand references appearing within.  */
 
 static void
-gen_exp (rtx x, enum rtx_code subroutine_type, char *used, md_rtx_info *info,
-FILE *file)
+gen_exp (rtx x, enum rtx_code subroutine_type, char *used,
+const md_rtx_info &info, FILE *file)
 {
   RTX_CODE code;
   int i;
@@ -281,7 +281,7 @@ gen_exp (rtx x, enum rtx_code subroutine_type, char *used, 
md_rtx_info *info,
becoming a separate instruction.  USED is as for gen_exp.  */
 
 static void
-gen_emit_seq (rtvec vec, char *used, md_rtx_info *info, FILE *file)
+gen_emit_seq (rtvec vec, char *used, const md_rtx_info &info, FILE *file)
 {
   for (int i = 0, len = GET_NUM_ELEM (vec); i < len; ++i)
 {
@@ -329,7 +329,7 @@ emit_c_code (const char *code, bool can_fail_p, const char 
*name, FILE *file)
 /* Generate the `gen_...' function for a DEFINE_INSN.  */
 
 static void
-gen_insn (md_rtx_info *info, FILE *file)
+gen_insn (const md_rtx_info &info, FILE *file)
 {
   struct pattern_stats stats;
   int i;
@@ -338,7 +338,7 @@ gen_insn (md_rtx_info *info, FILE *file)
  registers or MATCH_SCRATCHes.  If so, store away the information for
  later.  */
 
-  rtx insn = info->def;
+  rtx insn = info.def;
   if (XVEC (insn, 1))
 {
   int has_hard_reg = 0;
@@ -366,7 +366,7 @@ gen_insn (md_rtx_info *info, FILE *file)
  struct clobber_ent *link = XNEW (struct clobber_ent);
  int j;
 
- link->code_number = info->index;
+ link->code_number = info.index;
 
  /* See if any previous CLOBBER_LIST entry is the same as this
 one.  */
@@ -422,12 +422,12 @@ gen_insn (md_rtx_info *info, FILE *file)
   if (XSTR (insn, 0)[0] == 0 || XSTR (insn, 0)[0] == '*')
 return;
 
-  fprintf (file, "/* %s:%d */\n", info->loc.filename, info->loc.lineno);
+  fprintf (file, "/* %s:%d */\n", info.loc.filename, info.loc.lineno);
 
   /* Find out how many operands this function has.  */
   get_pattern_stats (&stats, XVEC (insn, 1));
   if (stats.max_dup_opno > stats.max_opno)
-fatal_at (info->loc, "match_dup operand number has no match_operand");
+fatal_at (info.loc, "match_dup operand number has no match_operand");
 
   /* Output the function name and argument declarations.  */
   fprintf (file, "rtx\ngen_%s (", XSTR (insn, 0));
@@ -458,25 +458,25 @@ gen_insn (md_rtx_info *info, FILE *file)
 /* Generate the `gen_...' function for a DEFINE_EXPAND.  */
 
 static void
-gen_expand (md_rtx_info *info, FILE *file)
+gen_expand (const md_rtx_info &info, FILE *file)
 {
   struct pattern_stats stats;
   int i;
   char *used;
 
-  rtx expand = info->def;
+  rtx expand = info.def;
   if (strlen (XSTR (expand, 0)) == 0)
-fatal_at (info->loc, "define_expand lacks a name");
+fatal_at (info.loc, "define_expand lacks a name");
   if (XVEC (expand, 1) == 0)
-fatal_at (info->loc, "define_expand for %s lacks a pattern",
+fatal_at (info.loc, "define_expand for %s lacks a pattern",
  XSTR (expand, 0));
 
   /* Find out how many operands this function has.  */
   get_pattern_stats (&stats, XVEC (expand, 1));
   if (stats.min_scratch_opno != -1
   && stats.min_scratch_opno <= MAX (stats.max_opno, stats.max_dup_opno))
-fatal_at (info->loc, "define_expand for %s needs to have match_scratch "
-"numbers above all other operands", XSTR (expand, 0));
+fatal_at (info.loc, "define_expand for %s needs to have match_scratch "
+ "numbers above all other operands", XSTR (expand, 0));
 
   /* Output the function name and argument declarations.  */
   fprintf (file, "rtx\ngen_%s (", XSTR (expand, 0));
@@ -567,21 +567,21 @@ gen_expand (md_rtx_info *info, FILE *file)
 /* Like gen_expand, but generates insns resulting from splitting SPLIT.  */
 
 static void
-gen_split (md_rtx_info *info, FILE *file)
+gen_split (const md_rtx_info &info, FILE *file)
 {
   struct pattern_stats stats;
   int i;
-  rtx split = i

[gcc r16-777] xstormy16: Avoid accessing beyond the operands[] array

2025-05-21 Thread Richard Sandiford via Gcc-cvs

https://gcc.gnu.org/g:856f6de5d19257e3c5802a250e7c749ca44beee3

commit r16-777-g856f6de5d19257e3c5802a250e7c749ca44beee3
Author: Richard Sandiford 
Date:   Wed May 21 10:01:26 2025 +0100

xstormy16: Avoid accessing beyond the operands[] array

The negsi2 C++ code writes to operands[2] even though the pattern
has no operand 2.

gcc/
* config/stormy16/stormy16.md (negsi2): Remove unused assignment.

Diff:
---
 gcc/config/stormy16/stormy16.md | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/gcc/config/stormy16/stormy16.md b/gcc/config/stormy16/stormy16.md
index 70c82827a4ac..15c60ad03880 100644
--- a/gcc/config/stormy16/stormy16.md
+++ b/gcc/config/stormy16/stormy16.md
@@ -702,8 +702,7 @@
   [(parallel [(set (match_operand:SI 0 "register_operand" "")
   (neg:SI (match_operand:SI 1 "register_operand" "")))
  (clobber (reg:BI CARRY_REG))])]
-  ""
-  { operands[2] = gen_reg_rtx (HImode); })
+  "")
 
 (define_insn_and_split "*negsi2_internal"
   [(set (match_operand:SI 0 "register_operand" "=&r")

[gcc r16-788] genemit: Use a byte encoding to generate insns

2025-05-21 Thread Richard Sandiford via Gcc-cvs

https://gcc.gnu.org/g:d63c889d5cd3ef00ec5b0c3389448eab4f7d2b68

commit r16-788-gd63c889d5cd3ef00ec5b0c3389448eab4f7d2b68
Author: Richard Sandiford 
Date:   Wed May 21 10:01:32 2025 +0100

genemit: Use a byte encoding to generate insns

genemit has traditionally used open-coded gen_rtx_FOO sequences
to build up the instruction pattern.  This is now the source of
quite a bit of bloat in the binary, and also a source of slow
compile times.

Two obvious ways of trying to deal with this are:

(1) Try to identify rtxes that have a similar form and use shared
routines to generate rtxes of that form.

(2) Use a static table to encode the rtx and call a common routine
to expand it.

I did briefly look at (1).  However, it's more complex than (2),
and I think suffers from being the worst of both worlds, for reasons
that I'll explain below.  This patch therefore does (2).

In theory, one of the advantages of open-coding the calls to
gen_rtx_FOO is that the rtx can be populated using stores of known
constants (for the rtx code, mode, unspec number, etc).  However,
the time spent constructing an rtx is likely to be dominated by
the call to rtx_alloc, rather than by the stores to the fields.

Option (1) above loses this advantage of storing constants.
The shared routines would parameterise an rtx according to things
like the modes on the rtx and its suboperands, so the code would
need to fetch the parameters.  In a sense, the rtx structure would
be open-coded but the parameters would be table-encoded (albeit
in a simple way).

The expansion code also shouldn't be particularly hot.  Anything that
treats expand/discard cycles as very cheap would be misconceived,
since each discarded expansion generates garbage memory that needs
to be cleaned up later.

Option (2) turns out to be pretty simple -- certainly simpler
than (1) -- and seems to give a reasonable saving.  Some numbers,
all for --enable-checking=yes,rtl,extra:

[A] size of the @progbits sections in insn-emit-*.o, new / old
[B] size of the load segments in cc1, new / old
[C] time to compile a typical insn-emit*.cc, new / old

Target [A]  [B]  [C]

native aarch64  0.5627   0.9585   0.5677
native x86_64   0.5925   0.9467   0.6377
aarch64-x-riscv64   0.   0.9066   0.2762

To get an idea of the effect on the final compiler, I tried compiling
fold-const.ii with -O0 (no -g), since that should give any slowdown
less room to hide.  I couldn't measure any difference in compile time
before or after the patch for any of the three variants above.

gcc/
* gensupport.h (needs_barrier_p): Delete.
* gensupport.cc (needs_barrier_p): Likewise.
* rtl.h (always_void_p): Return true for PC, RETURN and 
SIMPLE_RETURN.
(expand_opcode): New enum class.
(expand_rtx, complete_seq): Declare.
* emit-rtl.cc (rtx_expander): New class.
(expand_rtx, complete_seq): New functions.
* gengenrtl.cc (special_rtx, excluded_rtx): Add a cross-reference
comment.
* genemit.cc (FIRST_CODE): New constant.
(print_code): Delete.
(generator::file, generator::used, generator::sequence_type): 
Delete.
(generator::bytes): New member variable.
(generator::generator): Update accordingly.
(generator::gen_rtx_scratch): Delete.
(generator::add_uint, generator::add_opcode, generator::add_code)
(generator::add_match_operator, generator::add_exp)
(generator::add_vec, generator::gen_table): New member functions.
(generator::gen_exp): Rewrite to use a bytecode expansion.
(generator::gen_emit_seq): Likewise.
(start_gen_insn): Return the C++ expression for the operands array.
(gen_insn, gen_expand, gen_split): Update callers accordingly.
(emit_c_code): Remove use of _val.

Diff:
---
 gcc/emit-rtl.cc   | 292 +
 gcc/genemit.cc| 346 ++
 gcc/gengenrtl.cc  |  10 +-
 gcc/gensupport.cc |  10 --
 gcc/gensupport.h  |   1 -
 gcc/rtl.h |  42 ++-
 6 files changed, 480 insertions(+), 221 deletions(-)

diff --git a/gcc/emit-rtl.cc b/gcc/emit-rtl.cc
index 6c838d7afcc6..3f453cda67ed 100644
--- a/gcc/emit-rtl.cc
+++ b/gcc/emit-rtl.cc
@@ -64,6 +64,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "gimple.h"
 #include "gimple-ssa.h"
 #include "gimplify.h"
+#include "bbitmap.h"
 
 struct target_rtl default_target_rtl;
 #if SWITCHABLE_TARGET
@@ -6788,6 +6789,297 @@ gen_int_shift_amount (machine_mode, poly_int64 value)
   return gen_int_mode

[gcc r16-778] sparc: Avoid operandN variables in .md files

2025-05-21 Thread Richard Sandiford via Gcc-cvs

https://gcc.gnu.org/g:35dd60935336eb574194f2fe2088133f34c8

commit r16-778-g35dd60935336eb574194f2fe2088133f34c8
Author: Richard Sandiford 
Date:   Wed May 21 10:01:27 2025 +0100

sparc: Avoid operandN variables in .md files

The automatically-generated gen_* routines take their operands as
individual arguments, named "operand0" upwards.  These arguments are
stored into an "operands" array before invoking the expander's C++
code, which can then modify the operands by writing to the array.

However, the SPARC sign-extend and zero-extend expanders used the
operandN variables directly, rather than operands[N].  That's a
correct usage in context, since the code goes on to expand the
pattern manually and invoke DONE.

But it's also easy for code to accidentally write to operandN instead
of operands[N] when trying to set up something like a match_dup.
It sounds like Jeff had seen an instance of this.

A later patch is therefore going to mark the operandN arguments
as const.  This patch makes way for that by using operands[N]
instead of operandN for the SPARC expanders.

gcc/
* config/sparc/sparc.md (zero_extendhisi2, zero_extendhidi2)
(extendhisi2, extendqihi2, extendqisi2, extendqidi2)
(extendhidi2): Use operands[0] and operands[1] instead of
operand0 and operand1.

Diff:
---
 gcc/config/sparc/sparc.md | 87 +--
 1 file changed, 47 insertions(+), 40 deletions(-)

diff --git a/gcc/config/sparc/sparc.md b/gcc/config/sparc/sparc.md
index 4d46cfd0fb20..c6e06b4467fe 100644
--- a/gcc/config/sparc/sparc.md
+++ b/gcc/config/sparc/sparc.md
@@ -3014,17 +3014,18 @@
   rtx shift_16 = GEN_INT (16);
   int op1_subbyte = 0;
 
-  if (GET_CODE (operand1) == SUBREG)
+  if (GET_CODE (operands[1]) == SUBREG)
 {
-  op1_subbyte = SUBREG_BYTE (operand1);
+  op1_subbyte = SUBREG_BYTE (operands[1]);
   op1_subbyte /= GET_MODE_SIZE (SImode);
   op1_subbyte *= GET_MODE_SIZE (SImode);
-  operand1 = XEXP (operand1, 0);
+  operands[1] = XEXP (operands[1], 0);
 }
 
-  emit_insn (gen_ashlsi3 (temp, gen_rtx_SUBREG (SImode, operand1, op1_subbyte),
+  emit_insn (gen_ashlsi3 (temp, gen_rtx_SUBREG (SImode, operands[1],
+   op1_subbyte),
  shift_16));
-  emit_insn (gen_lshrsi3 (operand0, temp, shift_16));
+  emit_insn (gen_lshrsi3 (operands[0], temp, shift_16));
   DONE;
 })
 
@@ -3097,17 +3098,18 @@
   rtx shift_48 = GEN_INT (48);
   int op1_subbyte = 0;
 
-  if (GET_CODE (operand1) == SUBREG)
+  if (GET_CODE (operands[1]) == SUBREG)
 {
-  op1_subbyte = SUBREG_BYTE (operand1);
+  op1_subbyte = SUBREG_BYTE (operands[1]);
   op1_subbyte /= GET_MODE_SIZE (DImode);
   op1_subbyte *= GET_MODE_SIZE (DImode);
-  operand1 = XEXP (operand1, 0);
+  operands[1] = XEXP (operands[1], 0);
 }
 
-  emit_insn (gen_ashldi3 (temp, gen_rtx_SUBREG (DImode, operand1, op1_subbyte),
+  emit_insn (gen_ashldi3 (temp, gen_rtx_SUBREG (DImode, operands[1],
+   op1_subbyte),
  shift_48));
-  emit_insn (gen_lshrdi3 (operand0, temp, shift_48));
+  emit_insn (gen_lshrdi3 (operands[0], temp, shift_48));
   DONE;
 })
 
@@ -3283,17 +3285,18 @@
   rtx shift_16 = GEN_INT (16);
   int op1_subbyte = 0;
 
-  if (GET_CODE (operand1) == SUBREG)
+  if (GET_CODE (operands[1]) == SUBREG)
 {
-  op1_subbyte = SUBREG_BYTE (operand1);
+  op1_subbyte = SUBREG_BYTE (operands[1]);
   op1_subbyte /= GET_MODE_SIZE (SImode);
   op1_subbyte *= GET_MODE_SIZE (SImode);
-  operand1 = XEXP (operand1, 0);
+  operands[1] = XEXP (operands[1], 0);
 }
 
-  emit_insn (gen_ashlsi3 (temp, gen_rtx_SUBREG (SImode, operand1, op1_subbyte),
+  emit_insn (gen_ashlsi3 (temp, gen_rtx_SUBREG (SImode, operands[1],
+   op1_subbyte),
  shift_16));
-  emit_insn (gen_ashrsi3 (operand0, temp, shift_16));
+  emit_insn (gen_ashrsi3 (operands[0], temp, shift_16));
   DONE;
 })
 
@@ -3315,25 +3318,26 @@
   int op1_subbyte = 0;
   int op0_subbyte = 0;
 
-  if (GET_CODE (operand1) == SUBREG)
+  if (GET_CODE (operands[1]) == SUBREG)
 {
-  op1_subbyte = SUBREG_BYTE (operand1);
+  op1_subbyte = SUBREG_BYTE (operands[1]);
   op1_subbyte /= GET_MODE_SIZE (SImode);
   op1_subbyte *= GET_MODE_SIZE (SImode);
-  operand1 = XEXP (operand1, 0);
+  operands[1] = XEXP (operands[1], 0);
 }
-  if (GET_CODE (operand0) == SUBREG)
+  if (GET_CODE (operands[0]) == SUBREG)
 {
-  op0_subbyte = SUBREG_BYTE (operand0);
+  op0_subbyte = SUBREG_BYTE (operands[0]);
   op0_subbyte /= GET_MODE_SIZE (SImode);
   op0_subbyte *= GET_MODE_SIZE (SImode);
-  operand0 = XEXP (operand0, 0);
+  operands[0] = XEXP (operands[0], 0);
 }
-

[gcc r16-783] genemit: Add a generator struct

2025-05-21 Thread Richard Sandiford via Gcc-cvs

https://gcc.gnu.org/g:88b849ffb9fc4b6de3786784b4c4b074758cc2a1

commit r16-783-g88b849ffb9fc4b6de3786784b4c4b074758cc2a1
Author: Richard Sandiford 
Date:   Wed May 21 10:01:29 2025 +0100

genemit: Add a generator struct

gen_exp now has quite a few arguments that need to be passed
to each recursive call.  This patch turns it and related routines
into member functions of a new generator class, so that the shared
information can be stored in member variables.

This also helps to make later patches less noisy.

gcc/
* genemit.cc (generator): New structure.
(gen_rtx_scratch, gen_exp, gen_emit_seq): Turn into member
functions of generator.
(gen_insn, gen_expand, gen_split, output_add_clobbers): Update
users accordingly.

Diff:
---
 gcc/genemit.cc | 76 ++
 1 file changed, 55 insertions(+), 21 deletions(-)

diff --git a/gcc/genemit.cc b/gcc/genemit.cc
index ed87f1a903d3..44be50fc933c 100644
--- a/gcc/genemit.cc
+++ b/gcc/genemit.cc
@@ -66,8 +66,40 @@ print_code (RTX_CODE code, FILE *file)
 fprintf (file, "%c", TOUPPER (*p1));
 }
 
-static void
-gen_rtx_scratch (rtx x, enum rtx_code subroutine_type, FILE *file)
+/* A structure used to generate code for a particular expansion.  */
+struct generator
+{
+  generator (rtx_code, char *, const md_rtx_info &, FILE *);
+
+  void gen_rtx_scratch (rtx);
+  void gen_exp (rtx);
+  void gen_emit_seq (rtvec);
+
+  /* The type of subroutine that we're expanding.  */
+  rtx_code subroutine_type;
+
+  /* If nonnull, index N indicates that the original operand N has already
+ been used to replace a MATCH_OPERATOR or MATCH_DUP, and so any further
+ replacements must make a copy.  */
+  char *used;
+
+  /* The construct that we're expanding.  */
+  const md_rtx_info info;
+
+  /* The output file.  */
+  FILE *file;
+};
+
+generator::generator (rtx_code subroutine_type, char *used,
+ const md_rtx_info &info, FILE *file)
+  : subroutine_type (subroutine_type),
+used (used),
+info (info),
+file (file)
+{}
+
+void
+generator::gen_rtx_scratch (rtx x)
 {
   if (subroutine_type == DEFINE_PEEPHOLE2)
 {
@@ -82,9 +114,8 @@ gen_rtx_scratch (rtx x, enum rtx_code subroutine_type, FILE 
*file)
 /* Print a C expression to construct an RTX just like X,
substituting any operand references appearing within.  */
 
-static void
-gen_exp (rtx x, enum rtx_code subroutine_type, char *used,
-const md_rtx_info &info, FILE *file)
+void
+generator::gen_exp (rtx x)
 {
   RTX_CODE code;
   int i;
@@ -128,7 +159,7 @@ gen_exp (rtx x, enum rtx_code subroutine_type, char *used,
   for (i = 0; i < XVECLEN (x, 1); i++)
{
  fprintf (file, ",\n\t\t");
- gen_exp (XVECEXP (x, 1, i), subroutine_type, used, info, file);
+ gen_exp (XVECEXP (x, 1, i));
}
   fprintf (file, ")");
   return;
@@ -142,7 +173,7 @@ gen_exp (rtx x, enum rtx_code subroutine_type, char *used,
   for (i = 0; i < XVECLEN (x, 2); i++)
{
  fprintf (file, ",\n\t\t");
- gen_exp (XVECEXP (x, 2, i), subroutine_type, used, info, file);
+ gen_exp (XVECEXP (x, 2, i));
}
   fprintf (file, ")");
   return;
@@ -153,7 +184,7 @@ gen_exp (rtx x, enum rtx_code subroutine_type, char *used,
   return;
 
 case MATCH_SCRATCH:
-  gen_rtx_scratch (x, subroutine_type, file);
+  gen_rtx_scratch (x);
   return;
 
 case PC:
@@ -234,7 +265,7 @@ gen_exp (rtx x, enum rtx_code subroutine_type, char *used,
   switch (fmt[i])
{
case 'e': case 'u':
- gen_exp (XEXP (x, i), subroutine_type, used, info, file);
+ gen_exp (XEXP (x, i));
  break;
 
case 'i':
@@ -266,7 +297,7 @@ gen_exp (rtx x, enum rtx_code subroutine_type, char *used,
for (j = 0; j < XVECLEN (x, i); j++)
  {
fprintf (file, ",\n\t\t");
-   gen_exp (XVECEXP (x, i, j), subroutine_type, used, info, file);
+   gen_exp (XVECEXP (x, i, j));
  }
fprintf (file, ")");
break;
@@ -281,10 +312,10 @@ gen_exp (rtx x, enum rtx_code subroutine_type, char *used,
 }
 
 /* Output code to emit the instruction patterns in VEC, with each element
-   becoming a separate instruction.  USED is as for gen_exp.  */
+   becoming a separate instruction.  */
 
-static void
-gen_emit_seq (rtvec vec, char *used, const md_rtx_info &info, FILE *file)
+void
+generator::gen_emit_seq (rtvec vec)
 {
   for (int i = 0, len = GET_NUM_ELEM (vec); i < len; ++i)
 {
@@ -293,7 +324,7 @@ gen_emit_seq (rtvec vec, char *used, const md_rtx_info 
&info, FILE *file)
   if (const char *name = get_emit_function (next))
{
  fprintf (file, "  %s (", name);
- gen_exp (next, DEFINE_EXPAND, used, info, file);
+ gen_exp (next);
  fprintf (file, ");\n

[gcc r16-785] genemit: Remove purported handling of location_ts

2025-05-21 Thread Richard Sandiford via Gcc-cvs

https://gcc.gnu.org/g:efbc8de515c71c27e881d425f8325e39f7b4f328

commit r16-785-gefbc8de515c71c27e881d425f8325e39f7b4f328
Author: Richard Sandiford 
Date:   Wed May 21 10:01:30 2025 +0100

genemit: Remove purported handling of location_ts

gen_exp had code to handle the 'L' operand format.  But this format
is specifically for location_ts, which are only used in RTX_INSNs.
Those should never occur in this context, where the input is always
an md file rather than an __RTL function.  Any hard-coded raw
location value would be meaningless anyway.

It seemed safer to turn this into an error rather than a gcc_unreachable.

gcc/
* genemit.cc (generator::gen_exp): Raise an error if we see
an 'L' operand.

Diff:
---
 gcc/genemit.cc | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gcc/genemit.cc b/gcc/genemit.cc
index 0529b916455f..9923cf078b96 100644
--- a/gcc/genemit.cc
+++ b/gcc/genemit.cc
@@ -270,7 +270,8 @@ generator::gen_exp (rtx x)
  break;
 
case 'L':
- fprintf (file, "%llu", (unsigned long long) XLOC (x, i));
+ fatal_at (info.loc, "'%s' rtxes are not supported in this context",
+   GET_RTX_NAME (code));
  break;
 
case 'r':

[gcc r16-786] genemit: Remove support for string operands

2025-05-21 Thread Richard Sandiford via Gcc-cvs

https://gcc.gnu.org/g:97d2686decc34400e585bbc725602757c91e3fbf

commit r16-786-g97d2686decc34400e585bbc725602757c91e3fbf
Author: Richard Sandiford 
Date:   Wed May 21 10:01:31 2025 +0100

genemit: Remove support for string operands

gen_exp currently supports the 's' (string) operand type.  It would
certainly be possible to make the upcoming bytecode patch support
that too.  However, the rtx codes that have string operands should
be very rarely used in hard-coded define_insn/expand/split/peephole2
rtx templates (as opposed to things like attribute expressions,
where const_string is commonplace).  And AFAICT, no current target
does use them like that.

This patch therefore reports an error for these rtx codes,
rather than adding code that would be unused and untested.

gcc/
* genemit.cc (generator::gen_exp): Report an error for 's' operands.

Diff:
---
 gcc/genemit.cc | 5 +
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/gcc/genemit.cc b/gcc/genemit.cc
index 9923cf078b96..ba64290af53e 100644
--- a/gcc/genemit.cc
+++ b/gcc/genemit.cc
@@ -270,6 +270,7 @@ generator::gen_exp (rtx x)
  break;
 
case 'L':
+   case 's':
  fatal_at (info.loc, "'%s' rtxes are not supported in this context",
GET_RTX_NAME (code));
  break;
@@ -284,10 +285,6 @@ generator::gen_exp (rtx x)
  fprintf (file, "%d", SUBREG_BYTE (x).to_constant ());
  break;
 
-   case 's':
- fprintf (file, "\"%s\"", XSTR (x, i));
- break;
-
case 'E':
  {
int j;

[gcc r16-787] genemit: Avoid using gen_exp in output_add_clobbers

2025-05-21 Thread Richard Sandiford via Gcc-cvs

https://gcc.gnu.org/g:aca0cf1150d6f6be9ee451b5f91f505aef911f8e

commit r16-787-gaca0cf1150d6f6be9ee451b5f91f505aef911f8e
Author: Richard Sandiford 
Date:   Wed May 21 10:01:31 2025 +0100

genemit: Avoid using gen_exp in output_add_clobbers

output_add_clobbers emits code to add:

  (clobber (scratch:M))

and/or:

  (clobber (reg:M R))

expressions to the end of a PARALLEL.  At the moment, it does this
using the general gen_exp function.  That makes sense with the code
in its current form, but with later patches it's more convenient to
handle the two cases directly.

This also avoids having to pass an md_rtx_info that is unrelated
to the clobber expressions.

gcc/
* genemit.cc (clobber_pat::code): Delete.
(maybe_queue_insn): Don't set clobber_pat::code.
(output_add_clobbers): Remove info argument and output the two
REG and SCRATCH cases directly.
(main): Update call accordingly.

Diff:
---
 gcc/genemit.cc | 22 --
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/gcc/genemit.cc b/gcc/genemit.cc
index ba64290af53e..21eb0f2df7d2 100644
--- a/gcc/genemit.cc
+++ b/gcc/genemit.cc
@@ -39,7 +39,6 @@ struct clobber_pat
   int first_clobber;
   struct clobber_pat *next;
   int has_hard_reg;
-  rtx_code code;
 } *clobber_list;
 
 /* Records one insn that uses the clobber list.  */
@@ -435,7 +434,6 @@ maybe_queue_insn (const md_rtx_info &info)
  p->first_clobber = i + 1;
  p->next = clobber_list;
  p->has_hard_reg = has_hard_reg;
- p->code = GET_CODE (insn);
  clobber_list = p;
}
 
@@ -691,7 +689,7 @@ gen_split (const md_rtx_info &info, FILE *file)
the end of the vector.  */
 
 static void
-output_add_clobbers (const md_rtx_info &info, FILE *file)
+output_add_clobbers (FILE *file)
 {
   struct clobber_pat *clobber;
   struct clobber_ent *ent;
@@ -709,12 +707,16 @@ output_add_clobbers (const md_rtx_info &info, FILE *file)
 
   for (i = clobber->first_clobber; i < GET_NUM_ELEM (clobber->pattern); 
i++)
{
- fprintf (file, "  XVECEXP (pattern, 0, %d) = ", i);
- rtx clobbered_value = RTVEC_ELT (clobber->pattern, i);
- /* Pass null for USED since there are no operands.  */
- generator (clobber->code, NULL, info, file)
-   .gen_exp (clobbered_value);
- fprintf (file, ";\n");
+ fprintf (file, "XVECEXP (pattern, 0, %d) ="
+  " gen_rtx_CLOBBER (VOIDmode, ", i);
+ rtx x = XEXP (RTVEC_ELT (clobber->pattern, i), 0);
+ if (REG_P (x))
+   fprintf (file, "gen_rtx_REG (%smode, %d)",
+GET_MODE_NAME (GET_MODE (x)), REGNO (x));
+ else
+   fprintf (file, "gen_rtx_SCRATCH (%smode)",
+GET_MODE_NAME (GET_MODE (x)));
+ fprintf (file, ");\n");
}
 
   fprintf (file, "  break;\n\n");
@@ -1034,7 +1036,7 @@ main (int argc, const char **argv)
 
   /* Write out the routines to add CLOBBERs to a pattern and say whether they
  clobber a hard reg.  */
-  output_add_clobbers (info, file);
+  output_add_clobbers (file);
   output_added_clobbers_hard_reg_p (file);
 
   for (overloaded_name *oname = rtx_reader_ptr->get_overloads ();

[gcc r16-784] genemit: Always track multiple uses of operands

2025-05-21 Thread Richard Sandiford via Gcc-cvs

https://gcc.gnu.org/g:8ebe8f5eff9fda40f22b9df7a0b8a6c2fdf5f8d7

commit r16-784-g8ebe8f5eff9fda40f22b9df7a0b8a6c2fdf5f8d7
Author: Richard Sandiford 
Date:   Wed May 21 10:01:30 2025 +0100

genemit: Always track multiple uses of operands

gen_exp has code to detect when the same operand is used multiple
times.  It ensures that second and subsequent uses call copy_rtx,
to enforce correct unsharing.

However, for historical reasons that aren't clear to me, this was
skipped for a define_insn unless the define_insn was a parallel.
It was also skipped for a single define_expand instruction,
regardless of its contents.

This meant that a single parallel instruction was treated differently
between define_insn (where sharing rules were followed) and
define_expand (where sharing rules weren't followed).  define_splits
and define_peephole2s followed the sharing rules in all cases.

This patch makes everything follow the sharing rules.  The code
it touches will be removed by the proposed bytecode-based expansion,
which will use its own tracking when enforcing sharing rules.
However, it seemed better for staging and bisection purposes
to make this change first.

gcc/
* genemit.cc (generator::used): Update comment.
(generator::gen_exp): Remove handling of null unused arrays.
(gen_insn, gen_expand): Always pass a used array.
(output_add_clobbers): Note why the used array is null here.

Diff:
---
 gcc/genemit.cc | 27 ---
 1 file changed, 12 insertions(+), 15 deletions(-)

diff --git a/gcc/genemit.cc b/gcc/genemit.cc
index 44be50fc933c..0529b916455f 100644
--- a/gcc/genemit.cc
+++ b/gcc/genemit.cc
@@ -78,9 +78,9 @@ struct generator
   /* The type of subroutine that we're expanding.  */
   rtx_code subroutine_type;
 
-  /* If nonnull, index N indicates that the original operand N has already
- been used to replace a MATCH_OPERATOR or MATCH_DUP, and so any further
- replacements must make a copy.  */
+  /* Index N indicates that the original operand N has already been used to
+ replace a MATCH_OPERATOR or MATCH_DUP, and so any further replacements
+ must make a copy.  */
   char *used;
 
   /* The construct that we're expanding.  */
@@ -135,15 +135,12 @@ generator::gen_exp (rtx x)
 {
 case MATCH_OPERAND:
 case MATCH_DUP:
-  if (used)
+  if (used[XINT (x, 0)])
{
- if (used[XINT (x, 0)])
-   {
- fprintf (file, "copy_rtx (operands[%d])", XINT (x, 0));
- return;
-   }
- used[XINT (x, 0)] = 1;
+ fprintf (file, "copy_rtx (operands[%d])", XINT (x, 0));
+ return;
}
+  used[XINT (x, 0)] = 1;
   fprintf (file, "operands[%d]", XINT (x, 0));
   return;
 
@@ -505,10 +502,7 @@ gen_insn (const md_rtx_info &info, FILE *file)
   /* Output code to construct and return the rtl for the instruction body.  */
 
   rtx pattern = add_implicit_parallel (XVEC (insn, 1));
-  /* ??? This is the traditional behavior, but seems suspect.  */
-  char *used = (XVECLEN (insn, 1) == 1
-   ? NULL
-   : XCNEWVEC (char, stats.num_generator_args));
+  char *used = XCNEWVEC (char, stats.num_generator_args);
   fprintf (file, "  return ");
   generator (DEFINE_INSN, used, info, file).gen_exp (pattern);
   fprintf (file, ";\n}\n\n");
@@ -555,10 +549,12 @@ gen_expand (const md_rtx_info &info, FILE *file)
   && stats.max_opno >= stats.max_dup_opno
   && XVECLEN (expand, 1) == 1)
 {
+  used = XCNEWVEC (char, stats.num_operand_vars);
   fprintf (file, "  return ");
-  generator (DEFINE_EXPAND, NULL, info, file)
+  generator (DEFINE_EXPAND, used, info, file)
.gen_exp (XVECEXP (expand, 1, 0));
   fprintf (file, ";\n}\n\n");
+  XDELETEVEC (used);
   return;
 }
 
@@ -717,6 +713,7 @@ output_add_clobbers (const md_rtx_info &info, FILE *file)
{
  fprintf (file, "  XVECEXP (pattern, 0, %d) = ", i);
  rtx clobbered_value = RTVEC_ELT (clobber->pattern, i);
+ /* Pass null for USED since there are no operands.  */
  generator (clobber->code, NULL, info, file)
.gen_exp (clobbered_value);
  fprintf (file, ";\n");

[gcc r16-780] genemit: Add an internal queue

2025-05-21 Thread Richard Sandiford via Gcc-cvs

https://gcc.gnu.org/g:4fafb14e1f2ea068f2eb1a29ffb54d9984ab154d

commit r16-780-g4fafb14e1f2ea068f2eb1a29ffb54d9984ab154d
Author: Richard Sandiford 
Date:   Wed May 21 10:01:28 2025 +0100

genemit: Add an internal queue

An earlier version of this series wanted to collect information
about all the gen_* functions that are going to be generated.
The current version no longer does that, but the queue seemed
worth keeping anyway, since it gives a more consistent structure.

gcc/
* genemit.cc (queue): New static variable.
(maybe_queue_insn): New function, split out from...
(gen_insn): ...here.
(queue_expand): New function, split out from...
(gen_expand): ...here.
(gen_split): New function, split out from...
(queue_split): ...here.
(main): Queue definitions for later processing rather than
emitting them on the fly.

Diff:
---
 gcc/genemit.cc | 97 ++
 1 file changed, 71 insertions(+), 26 deletions(-)

diff --git a/gcc/genemit.cc b/gcc/genemit.cc
index cb4ae47294da..b73a45a04125 100644
--- a/gcc/genemit.cc
+++ b/gcc/genemit.cc
@@ -55,6 +55,9 @@ static void output_peephole2_scratches(rtx, FILE*);
 /* True for _optab if that optab isn't allowed to fail.  */
 static bool nofail_optabs[NUM_OPTABS];
 
+/* A list of the md constructs that need a gen_* function.  */
+static vec queue;
+
 static void
 print_code (RTX_CODE code, FILE *file)
 {
@@ -326,14 +329,12 @@ emit_c_code (const char *code, bool can_fail_p, const 
char *name, FILE *file)
   fprintf (file, "#undef FAIL\n");
 }
 
-/* Generate the `gen_...' function for a DEFINE_INSN.  */
+/* Process the DEFINE_INSN in LOC, and queue it if it needs a gen_*
+   function.  */
 
 static void
-gen_insn (const md_rtx_info &info, FILE *file)
+maybe_queue_insn (const md_rtx_info &info)
 {
-  struct pattern_stats stats;
-  int i;
-
   /* See if the pattern for this insn ends with a group of CLOBBERs of (hard)
  registers or MATCH_SCRATCHes.  If so, store away the information for
  later.  */
@@ -349,6 +350,7 @@ gen_insn (const md_rtx_info &info, FILE *file)
  && GET_CODE (RTVEC_ELT (pattern, 0)) == PARALLEL)
pattern = XVEC (RTVEC_ELT (pattern, 0), 0);
 
+  int i;
   for (i = GET_NUM_ELEM (pattern) - 1; i > 0; i--)
{
  if (GET_CODE (RTVEC_ELT (pattern, i)) != CLOBBER)
@@ -422,9 +424,19 @@ gen_insn (const md_rtx_info &info, FILE *file)
   if (XSTR (insn, 0)[0] == 0 || XSTR (insn, 0)[0] == '*')
 return;
 
-  fprintf (file, "/* %s:%d */\n", info.loc.filename, info.loc.lineno);
+  queue.safe_push (info);
+}
+
+/* Generate the `gen_...' function for a DEFINE_INSN.  */
+
+static void
+gen_insn (const md_rtx_info &info, FILE *file)
+{
+  struct pattern_stats stats;
+  int i;
 
   /* Find out how many operands this function has.  */
+  rtx insn = info.def;
   get_pattern_stats (&stats, XVEC (insn, 1));
   if (stats.max_dup_opno > stats.max_opno)
 fatal_at (info.loc, "match_dup operand number has no match_operand");
@@ -455,23 +467,31 @@ gen_insn (const md_rtx_info &info, FILE *file)
   XDELETEVEC (used);
 }
 
-/* Generate the `gen_...' function for a DEFINE_EXPAND.  */
+/* Process and queue the DEFINE_EXPAND in INFO.  */
 
 static void
-gen_expand (const md_rtx_info &info, FILE *file)
+queue_expand (const md_rtx_info &info)
 {
-  struct pattern_stats stats;
-  int i;
-  char *used;
-
   rtx expand = info.def;
   if (strlen (XSTR (expand, 0)) == 0)
 fatal_at (info.loc, "define_expand lacks a name");
   if (XVEC (expand, 1) == 0)
 fatal_at (info.loc, "define_expand for %s lacks a pattern",
  XSTR (expand, 0));
+  queue.safe_push (info);
+}
+
+/* Generate the `gen_...' function for a DEFINE_EXPAND.  */
+
+static void
+gen_expand (const md_rtx_info &info, FILE *file)
+{
+  struct pattern_stats stats;
+  int i;
+  char *used;
 
   /* Find out how many operands this function has.  */
+  rtx expand = info.def;
   get_pattern_stats (&stats, XVEC (expand, 1));
   if (stats.min_scratch_opno != -1
   && stats.min_scratch_opno <= MAX (stats.max_opno, stats.max_dup_opno))
@@ -564,7 +584,24 @@ gen_expand (const md_rtx_info &info, FILE *file)
   fprintf (file, "  return _val;\n}\n\n");
 }
 
-/* Like gen_expand, but generates insns resulting from splitting SPLIT.  */
+/* Process and queue the DEFINE_SPLIT or DEFINE_PEEPHOLE2 in INFO.  */
+
+static void
+queue_split (const md_rtx_info &info)
+{
+  rtx split = info.def;
+
+  if (XVEC (split, 0) == 0)
+fatal_at (info.loc, "%s lacks a pattern",
+ GET_RTX_NAME (GET_CODE (split)));
+  if (XVEC (split, 2) == 0)
+fatal_at (info.loc, "%s lacks a replacement pattern",
+ GET_RTX_NAME (GET_CODE (split)));
+
+  queue.safe_push (info);
+}
+
+/* Generate the `gen_...' function for a DEFINE_SPLIT or DEFINE_PEEPHOLE2.  */
 
 static void
 gen_split (const md

[gcc r16-782] genemit: Consistently use operand arrays in gen_* functions

2025-05-21 Thread Richard Sandiford via Gcc-cvs

https://gcc.gnu.org/g:02c3910f75ddae52dd59775bf9a6c4452bbdd0ac

commit r16-782-g02c3910f75ddae52dd59775bf9a6c4452bbdd0ac
Author: Richard Sandiford 
Date:   Wed May 21 10:01:29 2025 +0100

genemit: Consistently use operand arrays in gen_* functions

One slightly awkward part about emitting the generator function
bodies is that:

* define_insn and define_expand routines have a separate argument for
  each operand, named "operand0" upwards.

* define_split and define_peephole2 routines take a pointer to an array,
  named "operands".

* the C++ preparation code for expands, splits and peephole2s uses an
  array called "operands" to refer to the operands.

* the automatically-generated code uses individual "operand"
  variables to refer to the operands.

So define_expands have to store the incoming arguments into an operands
array before the md file's C++ code, then copy the operands array back
to the individual variables before the automatically-generated code.
splits and peephole2s have to copy the incoming operands array to
individual variables after the md file's C++ code, creating more
local variables that are live across calls to rtx_alloc.

This patch tries to simplify things by making the whole function
body use the operands array in preference to individual variables.
define_insns and define_expands store their arguments to the array
on entry.

This would have pros and cons on its own, but having a single array
helps with future efforts to reduce the duplication between gen_*
functions.

gcc/
* genemit.cc (gen_rtx_scratch, gen_exp): Use operands[%d] rather 
than
operand%d.
(start_gen_insn): Mark the incoming arguments as const and store
them to an operands array.
(gen_expand, gen_split): Remove copies into and out of the operands
array.

Diff:
---
 gcc/genemit.cc | 61 ++
 1 file changed, 19 insertions(+), 42 deletions(-)

diff --git a/gcc/genemit.cc b/gcc/genemit.cc
index 90f36e293b4b..ed87f1a903d3 100644
--- a/gcc/genemit.cc
+++ b/gcc/genemit.cc
@@ -71,7 +71,7 @@ gen_rtx_scratch (rtx x, enum rtx_code subroutine_type, FILE 
*file)
 {
   if (subroutine_type == DEFINE_PEEPHOLE2)
 {
-  fprintf (file, "operand%d", XINT (x, 0));
+  fprintf (file, "operands[%d]", XINT (x, 0));
 }
   else
 {
@@ -108,21 +108,21 @@ gen_exp (rtx x, enum rtx_code subroutine_type, char *used,
{
  if (used[XINT (x, 0)])
{
- fprintf (file, "copy_rtx (operand%d)", XINT (x, 0));
+ fprintf (file, "copy_rtx (operands[%d])", XINT (x, 0));
  return;
}
  used[XINT (x, 0)] = 1;
}
-  fprintf (file, "operand%d", XINT (x, 0));
+  fprintf (file, "operands[%d]", XINT (x, 0));
   return;
 
 case MATCH_OP_DUP:
   fprintf (file, "gen_rtx_fmt_");
   for (i = 0; i < XVECLEN (x, 1); i++)
fprintf (file, "e");
-  fprintf (file, " (GET_CODE (operand%d), ", XINT (x, 0));
+  fprintf (file, " (GET_CODE (operands[%d]), ", XINT (x, 0));
   if (GET_MODE (x) == VOIDmode)
-   fprintf (file, "GET_MODE (operand%d)", XINT (x, 0));
+   fprintf (file, "GET_MODE (operands[%d])", XINT (x, 0));
   else
fprintf (file, "%smode", GET_MODE_NAME (GET_MODE (x)));
   for (i = 0; i < XVECLEN (x, 1); i++)
@@ -137,7 +137,7 @@ gen_exp (rtx x, enum rtx_code subroutine_type, char *used,
   fprintf (file, "gen_rtx_fmt_");
   for (i = 0; i < XVECLEN (x, 2); i++)
fprintf (file, "e");
-  fprintf (file, " (GET_CODE (operand%d)", XINT (x, 0));
+  fprintf (file, " (GET_CODE (operands[%d])", XINT (x, 0));
   fprintf (file, ", %smode", GET_MODE_NAME (GET_MODE (x)));
   for (i = 0; i < XVECLEN (x, 2); i++)
{
@@ -149,7 +149,7 @@ gen_exp (rtx x, enum rtx_code subroutine_type, char *used,
 
 case MATCH_PARALLEL:
 case MATCH_PAR_DUP:
-  fprintf (file, "operand%d", XINT (x, 0));
+  fprintf (file, "operands[%d]", XINT (x, 0));
   return;
 
 case MATCH_SCRATCH:
@@ -437,14 +437,22 @@ start_gen_insn (FILE *file, const char *name, const 
pattern_stats &stats)
   fprintf (file, "rtx\ngen_%s (", name);
   if (stats.num_generator_args)
 for (int i = 0; i < stats.num_generator_args; i++)
-  if (i)
-   fprintf (file, ",\n\trtx operand%d ATTRIBUTE_UNUSED", i);
-  else
-   fprintf (file, "rtx operand%d ATTRIBUTE_UNUSED", i);
+  fprintf (file, "%sconst rtx operand%d", i == 0 ? "" : ", ", i);
   else
 fprintf (file, "void");
   fprintf (file, ")\n");
   fprintf (file, "{\n");
+  if (stats.num_generator_args)
+{
+  fprintf (file, "  rtx operands[%d] ATTRIBUTE_UNUSED = {",
+  stats.num_operand_vars);
+  for (int i = 0; i < stats.num_generator_args; i++)
+   fp

[gcc r16-781] genemit: Factor out code common to insns and expands

2025-05-21 Thread Richard Sandiford via Gcc-cvs

https://gcc.gnu.org/g:5355568c75a99fc621e2008fa98626ad811678c5

commit r16-781-g5355568c75a99fc621e2008fa98626ad811678c5
Author: Richard Sandiford 
Date:   Wed May 21 10:01:28 2025 +0100

genemit: Factor out code common to insns and expands

Mostly to reduce cut-&-paste.

gcc/
* genemit.cc (start_gen_insn): New function, split out from...
(gen_insn, gen_expand): ...here.

Diff:
---
 gcc/genemit.cc | 45 ++---
 1 file changed, 22 insertions(+), 23 deletions(-)

diff --git a/gcc/genemit.cc b/gcc/genemit.cc
index b73a45a04125..90f36e293b4b 100644
--- a/gcc/genemit.cc
+++ b/gcc/genemit.cc
@@ -427,13 +427,32 @@ maybe_queue_insn (const md_rtx_info &info)
   queue.safe_push (info);
 }
 
+/* Output the function name, argument declarations, and initial function
+   body for a pattern called NAME, given that it has the properties
+   in STATS.  */
+
+static void
+start_gen_insn (FILE *file, const char *name, const pattern_stats &stats)
+{
+  fprintf (file, "rtx\ngen_%s (", name);
+  if (stats.num_generator_args)
+for (int i = 0; i < stats.num_generator_args; i++)
+  if (i)
+   fprintf (file, ",\n\trtx operand%d ATTRIBUTE_UNUSED", i);
+  else
+   fprintf (file, "rtx operand%d ATTRIBUTE_UNUSED", i);
+  else
+fprintf (file, "void");
+  fprintf (file, ")\n");
+  fprintf (file, "{\n");
+}
+
 /* Generate the `gen_...' function for a DEFINE_INSN.  */
 
 static void
 gen_insn (const md_rtx_info &info, FILE *file)
 {
   struct pattern_stats stats;
-  int i;
 
   /* Find out how many operands this function has.  */
   rtx insn = info.def;
@@ -442,17 +461,7 @@ gen_insn (const md_rtx_info &info, FILE *file)
 fatal_at (info.loc, "match_dup operand number has no match_operand");
 
   /* Output the function name and argument declarations.  */
-  fprintf (file, "rtx\ngen_%s (", XSTR (insn, 0));
-  if (stats.num_generator_args)
-for (i = 0; i < stats.num_generator_args; i++)
-  if (i)
-   fprintf (file, ",\n\trtx operand%d ATTRIBUTE_UNUSED", i);
-  else
-   fprintf (file, "rtx operand%d ATTRIBUTE_UNUSED", i);
-  else
-fprintf (file, "void");
-  fprintf (file, ")\n");
-  fprintf (file, "{\n");
+  start_gen_insn (file, XSTR (insn, 0), stats);
 
   /* Output code to construct and return the rtl for the instruction body.  */
 
@@ -499,17 +508,7 @@ gen_expand (const md_rtx_info &info, FILE *file)
  "numbers above all other operands", XSTR (expand, 0));
 
   /* Output the function name and argument declarations.  */
-  fprintf (file, "rtx\ngen_%s (", XSTR (expand, 0));
-  if (stats.num_generator_args)
-for (i = 0; i < stats.num_generator_args; i++)
-  if (i)
-   fprintf (file, ",\n\trtx operand%d", i);
-  else
-   fprintf (file, "rtx operand%d", i);
-  else
-fprintf (file, "void");
-  fprintf (file, ")\n");
-  fprintf (file, "{\n");
+  start_gen_insn (file, XSTR (expand, 0), stats);
 
   /* If we don't have any C code to write, only one insn is being written,
  and no MATCH_DUPs are present, we can just return the desired insn

[gcc r16-807] [RISC-V][PR target/120368] Fix 32bit shift on rv64

2025-05-21 Thread Jeff Law via Gcc-cvs

https://gcc.gnu.org/g:8459c546197dc9178d250994db021b36405f1bd6

commit r16-807-g8459c546197dc9178d250994db021b36405f1bd6
Author: Jeff Law 
Date:   Wed May 21 14:15:23 2025 -0600

[RISC-V][PR target/120368] Fix 32bit shift on rv64

So a followup to last week's bugfix.  In last week's change we we stopped 
using
define_insn_and_split to rewrite instructions.  That change was done to 
avoid
dropping a masking instruction out of the RTL.

As a result the pattern(s) were changed into simple define_insns, which is
good.  One of them uses the GPR iterator since it's supposed to work for 
both
32bit and 64bit shifts on rv64.

But we failed to emit the right opcode for a 32bit shift on rv64. Thankfully
the fix is trivial.  If the mode is anything but word_mode, then we must be
doing a 32-bit shift on rv64, ie the various "w" shift instructions.

It's run through my tester.  Just waiting on the upstream CI system to spin 
it.

PR target/120368
gcc/
* config/riscv/riscv.md (shift with masked shift count): Fix
opcode when generating an SImode shift on rv64.

gcc/testsuite/
* gcc.target/riscv/pr120368.c: New test.

Diff:
---
 gcc/config/riscv/riscv.md |  9 -
 gcc/testsuite/gcc.target/riscv/pr120368.c | 19 +++
 2 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 7f6d0bbab3eb..7e35d7877ed9 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -2938,7 +2938,14 @@
   (match_operand:GPR2 2 "register_operand"  "r")
   (match_operand 3 ""))])))]
   ""
-  "\t%0,%1,%2"
+{
+  /* If the shift mode is not word mode, then it must be the
+ case that we're generating rv64 code, but this is a 32-bit
+ operation.  Thus we need to use the "w" variant.  */
+  if (E_mode != word_mode)
+return "w\t%0,%1,%2";
+  return "\t%0,%1,%2";
+}
   [(set_attr "type" "shift")
(set_attr "mode" "")])
 
diff --git a/gcc/testsuite/gcc.target/riscv/pr120368.c 
b/gcc/testsuite/gcc.target/riscv/pr120368.c
new file mode 100644
index ..4fea8e6fe7c4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/pr120368.c
@@ -0,0 +1,19 @@
+/* { dg-do run } */
+
+int g;
+
+int
+foo (int s, int v)
+{
+  __builtin_memset (&g, v >> (s & 31), sizeof(g));
+  return g;
+}
+
+int
+main ()
+{
+  int x = foo (-16, 0xd);
+  if (x != 0x0d0d0d0d)
+__builtin_abort();
+  __builtin_exit (0);
+}

[gcc r16-810] [PATCH] configure: Always add pre-installed header directories to search path

2025-05-21 Thread Jeff Law via Gcc-cvs

https://gcc.gnu.org/g:dff727b2c28c52e90e0bd61957d15f907494b245

commit r16-810-gdff727b2c28c52e90e0bd61957d15f907494b245
Author: Stephanos Ioannidis 
Date:   Wed May 21 17:28:36 2025 -0600

[PATCH] configure: Always add pre-installed header directories to search 
path

configure script was adding the target directory flags, including the
'-B' flags for the executable prefix and the '-isystem' flags for the
pre-installed header directories, to the target flags only for
non-Canadian builds under the premise that the host binaries under the
executable prefix will not be able to execute on the build system for
Canadian builds.

While that is true for the '-B' flags specifying the executable prefix,
the '-isystem' flags specifying the pre-installed header directories are
not affected by this and do not need special handling.

This patch updates the configure script to always add the 'include' and
'sys-include' pre-installed header directories to the target search
path, in order to ensure that the availability of the pre-installed
header directories in the search path is consistent across non-Canadian
and Canadian builds.

When '--with-headers' flag is specified, this effectively ensures that
the libc headers, that are copied from the specified header directory to
the sys-include directory, are used by libstdc++.

* configure.ac: Always add pre-installed heades to search path.
* configure: Regenerate.

Diff:
---
 configure| 16 ++--
 configure.ac | 16 ++--
 2 files changed, 20 insertions(+), 12 deletions(-)

diff --git a/configure b/configure
index ebc44416b6c2..3f85b6ca49de 100755
--- a/configure
+++ b/configure
@@ -11595,13 +11595,17 @@ if test x"${build}" = x"${host}" ; then
   LDFLAGS_FOR_BUILD=${LDFLAGS_FOR_BUILD-${LDFLAGS}}
 fi
 
-# On Canadian crosses, we'll be searching the right directories for
-# the previously-installed cross compiler, so don't bother to add
-# flags for directories within the install tree of the compiler
-# being built; programs in there won't even run.
-if test "${build}" = "${host}" && test -d ${srcdir}/gcc; then
+if test -d ${srcdir}/gcc; then
+  # On Canadian crosses, we'll be searching the right directories for the
+  # previously-installed cross compiler, so don't bother to add flags for
+  # executable directories within the install tree of the compiler being built;
+  # programs in there won't even run.
+  if test "${build}" = "${host}"; then
+FLAGS_FOR_TARGET=$FLAGS_FOR_TARGET' -B$(build_tooldir)/bin/ 
-B$(build_tooldir)/lib/'
+  fi
+
   # Search for pre-installed headers if nothing else fits.
-  FLAGS_FOR_TARGET=$FLAGS_FOR_TARGET' -B$(build_tooldir)/bin/ 
-B$(build_tooldir)/lib/ -isystem $(build_tooldir)/include -isystem 
$(build_tooldir)/sys-include'
+  FLAGS_FOR_TARGET=$FLAGS_FOR_TARGET' -isystem $(build_tooldir)/include 
-isystem $(build_tooldir)/sys-include'
 fi
 
 if test "x${use_gnu_ld}" = x &&
diff --git a/configure.ac b/configure.ac
index 730db3c14024..d6c3fb54f08d 100644
--- a/configure.ac
+++ b/configure.ac
@@ -3822,13 +3822,17 @@ if test x"${build}" = x"${host}" ; then
   LDFLAGS_FOR_BUILD=${LDFLAGS_FOR_BUILD-${LDFLAGS}}
 fi
 
-# On Canadian crosses, we'll be searching the right directories for
-# the previously-installed cross compiler, so don't bother to add
-# flags for directories within the install tree of the compiler
-# being built; programs in there won't even run.
-if test "${build}" = "${host}" && test -d ${srcdir}/gcc; then
+if test -d ${srcdir}/gcc; then
+  # On Canadian crosses, we'll be searching the right directories for the
+  # previously-installed cross compiler, so don't bother to add flags for
+  # executable directories within the install tree of the compiler being built;
+  # programs in there won't even run.
+  if test "${build}" = "${host}"; then
+FLAGS_FOR_TARGET=$FLAGS_FOR_TARGET' -B$(build_tooldir)/bin/ 
-B$(build_tooldir)/lib/'
+  fi
+
   # Search for pre-installed headers if nothing else fits.
-  FLAGS_FOR_TARGET=$FLAGS_FOR_TARGET' -B$(build_tooldir)/bin/ 
-B$(build_tooldir)/lib/ -isystem $(build_tooldir)/include -isystem 
$(build_tooldir)/sys-include'
+  FLAGS_FOR_TARGET=$FLAGS_FOR_TARGET' -isystem $(build_tooldir)/include 
-isystem $(build_tooldir)/sys-include'
 fi
 
 if test "x${use_gnu_ld}" = x &&

[gcc(refs/users/meissner/heads/work206-bugs)] Fix PR 118541, do not generate unordered fp cmoves for IEEE compares.

2025-05-21 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:d88170074249387a79537291b3548cb115712d86

commit d88170074249387a79537291b3548cb115712d86
Author: Michael Meissner 
Date:   Wed May 21 20:03:02 2025 -0400

Fix PR 118541, do not generate unordered fp cmoves for IEEE compares.

In bug PR target/118541 on power9, power10, and power11 systems, for the
function:

extern double __ieee754_acos (double);

double
__acospi (double x)
{
  double ret = __ieee754_acos (x) / 3.14;
  return __builtin_isgreater (ret, 1.0) ? 1.0 : ret;
}

GCC currently generates the following code:

Power9  Power10 and Power11
==  ===
bl __ieee754_acos   bl __ieee754_acos@notoc
nop plfd 0,.LC0@pcrel
addis 9,2,.LC2@toc@ha   xxspltidp 12,1065353216
addi 1,1,32 addi 1,1,32
lfd 0,.LC2@toc@l(9) ld 0,16(1)
addis 9,2,.LC0@toc@ha   fdiv 0,1,0
ld 0,16(1)  mtlr 0
lfd 12,.LC0@toc@l(9)xscmpgtdp 1,0,12
fdiv 0,1,0  xxsel 1,0,12,1
mtlr 0  blr
xscmpgtdp 1,0,12
xxsel 1,0,12,1
blr

This is because ifcvt.c optimizes the conditional floating point move to 
use the
XSCMPGTDP instruction.

However, the XSCMPGTDP instruction will generate an interrupt if one of the
arguments is a signalling NaN and signalling NaNs can generate an interrupt.
The IEEE comparison functions (isgreater, etc.) require that the comparison 
not
raise an interrupt.

The root cause of this is we allow floating point comparisons to be reversed
(i.e. LT will be reversed to UNGE).  Before power9, this was ok because we 
only
generated the FCMPU or XSCMPUDP instructions.

But with power9, we can generate the XSCMPEQDP, XSCMPGTDP, or XSCMPGEDP
instructions.  This code now does not convert an unordered compare into an
ordered compare.  Instead, it does the opposite comparison and swaps the
arguments.  I.e. it converts:

r = (a < b) ? c : d;

into:

r = (b >= a) ? c : d;

For the following code:

double
ordered_compare (double a, double b, double c, double d)
{
  return __builtin_isgreater (a, b) ? c : d;
}

/* Verify normal > does generate xscmpgtdp.  */

double
normal_compare (double a, double b, double c, double d)
{
  return a > b ? c : d;
}

with the following patch, GCC generates the following for power9, power10, 
and
power11:

ordered_compare:
fcmpu 0,1,2
fmr 1,4
bnglr 0
fmr 1,3
blr

normal_compare:
xscmpgtdp 1,1,2
xxsel 1,4,3,1
blr

I have built bootstrap compilers on big endian power9 systems and little 
endian
power9/power10 systems and there were no regressions.  Can I check this 
patch
into the GCC trunk, and after a waiting period, can I check this into the 
active
older branches?

2025-05-21  Michael Meissner  

gcc/

PR target/118541
* config/rs6000/predicates.md (invert_fpmask_comparison_operator):
Delete.
(fpmask_reverse_args_comparison_operator): New predicate.
* config/rs6000/rs6000-proto.h (rs6000_fpmask_reverse_args): New
declaration.
* config/rs6000/rs6000.cc (rs6000_fpmask_reverse_args): New 
function.
* config/rs6000/rs6000.h (REVERSIBLE_CC_MODE): Do not allow floating
point comparisons to be reversed unless -ffinite-math-only is used.
* config/rs6000/rs6000.md (movcc_p9): Add
comment.
(movcc_invert_p9): Reverse the argument 
order for
the comparison, and use an unordered comparison, instead of ordered
comparison.
(movcc_invert_p10): Likewise.

gcc/testsuite/

PR target/118541
* gcc.target/powerpc/pr118541.c: New test.

Diff:
---
 gcc/config/rs6000/predicates.md |   6 +-
 gcc/config/rs6000/rs6000-protos.h   |   1 +
 gcc/config/rs6000/rs6000.cc |  24 +
 gcc/config/rs6000/rs6000.h  |  15 ++-
 gcc/config/rs6000/rs6000.md |  12 +--
 gcc/testsuite/gcc.target/powerpc/pr118541.c | 147 
 6 files changed, 192 insertions(+), 13 deletions(-)

diff --git a/gcc/config/rs6000/predicates

[gcc r16-811] aarch64: Carry over zeroness in aarch64_evpc_reencode

2025-05-21 Thread Pengxuan Zheng via Gcc-cvs

https://gcc.gnu.org/g:84c6988c026114727693cd7cd74b8cd5cdcdeb74

commit r16-811-g84c6988c026114727693cd7cd74b8cd5cdcdeb74
Author: Pengxuan Zheng 
Date:   Tue May 20 17:58:23 2025 -0700

aarch64: Carry over zeroness in aarch64_evpc_reencode

There was a bug in aarch64_evpc_reencode which could leave zero_op0_p and
zero_op1_p of the struct "newd" uninitialized.  r16-701-gd77c3bc1c35e303 
fixed
the issue by zero initializing "newd."  This patch provides an alternative 
fix
as suggested by Richard Sandiford based on the fact that the zeroness is
preserved by aarch64_evpc_reencode.

gcc/ChangeLog:

* config/aarch64/aarch64.cc (aarch64_evpc_reencode): Copy 
zero_op0_p and
zero_op1_p from d to newd.

Signed-off-by: Pengxuan Zheng 

Diff:
---
 gcc/config/aarch64/aarch64.cc | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 1da615c8955a..2b837ec8e673 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -26327,7 +26327,7 @@ aarch64_evpc_trn (struct expand_vec_perm_d *d)
 static bool
 aarch64_evpc_reencode (struct expand_vec_perm_d *d)
 {
-  expand_vec_perm_d newd = {};
+  expand_vec_perm_d newd;
 
   /* The subregs that we'd create are not supported for big-endian SVE;
  see aarch64_modes_compatible_p for details.  */
@@ -26353,6 +26353,8 @@ aarch64_evpc_reencode (struct expand_vec_perm_d *d)
   newd.op1 = d->op1 ? gen_lowpart (new_mode, d->op1) : NULL;
   newd.testing_p = d->testing_p;
   newd.one_vector_p = d->one_vector_p;
+  newd.zero_op0_p = d->zero_op0_p;
+  newd.zero_op1_p = d->zero_op1_p;
 
   newd.perm.new_vector (newpermindices.encoding (), newd.one_vector_p ? 1 : 2,
newpermindices.nelts_per_input ());

[gcc(refs/users/meissner/heads/work206-bugs)] Update ChangeLog.*

2025-05-21 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:cec29adfc6dea03c04e6c6f6c1a3a70ed1c500c4

commit cec29adfc6dea03c04e6c6f6c1a3a70ed1c500c4
Author: Michael Meissner 
Date:   Wed May 21 20:04:35 2025 -0400

Update ChangeLog.*

Diff:
---
 gcc/ChangeLog.bugs | 30 --
 1 file changed, 20 insertions(+), 10 deletions(-)

diff --git a/gcc/ChangeLog.bugs b/gcc/ChangeLog.bugs
index 91250828ad30..0134634d76f9 100644
--- a/gcc/ChangeLog.bugs
+++ b/gcc/ChangeLog.bugs
@@ -1,4 +1,4 @@
- Branch work206-bugs, patch #111 
+ Branch work206-bugs, patch #112 
 
 Fix PR 118541, do not generate unordered fp cmoves for IEEE compares.
 
@@ -42,14 +42,18 @@ raise an interrupt.
 
 The root cause of this is we allow floating point comparisons to be reversed
 (i.e. LT will be reversed to UNGE).  Before power9, this was ok because we only
-generated the FCMPU or XSCMPUDP instructions.  But with power9, we can generate
-the XSCMPEQDP, XSCMPGTDP, or XSCMPGEDP instructions.  If NaNs are allowed, we 
no
-longer allow FP comparisons to be reversed.  If FP comparisons can't be
-reversed, the machine independent portions of the compiler will generate the
-comparison with the arguments reversed.
+generated the FCMPU or XSCMPUDP instructions.
 
-Since we do not support reversing FP comparisons, the code to support inverting
-fpmask operations on power9 has been removed.
+But with power9, we can generate the XSCMPEQDP, XSCMPGTDP, or XSCMPGEDP
+instructions.  This code now does not convert an unordered compare into an
+ordered compare.  Instead, it does the opposite comparison and swaps the
+arguments.  I.e. it converts:
+
+   r = (a < b) ? c : d;
+
+into:
+
+   r = (b >= a) ? c : d;
 
 For the following code:
 
@@ -94,12 +98,17 @@ gcc/
PR target/118541
* config/rs6000/predicates.md (invert_fpmask_comparison_operator):
Delete.
+   (fpmask_reverse_args_comparison_operator): New predicate.
+   * config/rs6000/rs6000-proto.h (rs6000_fpmask_reverse_args): New
+   declaration.
+   * config/rs6000/rs6000.cc (rs6000_fpmask_reverse_args): New function.
* config/rs6000/rs6000.h (REVERSIBLE_CC_MODE): Do not allow floating
point comparisons to be reversed unless -ffinite-math-only is used.
-   (rs6000_reverse_condition): Add argument.
* config/rs6000/rs6000.md (movcc_p9): Add
comment.
-   (movcc_invert_p9): Delete insns.
+   (movcc_invert_p9): Reverse the argument order for
+   the comparison, and use an unordered comparison, instead of ordered
+   comparison.
(movcc_invert_p10): Likewise.
 
 gcc/testsuite/
@@ -107,6 +116,7 @@ gcc/testsuite/
PR target/118541
* gcc.target/powerpc/pr118541.c: New test.
 
+ Branch work206-bugs, patch #111 was reverted 

  Branch work206-bugs, patch #110 was reverted 

 
  Branch work206-bugs, patch #101

[gcc(refs/users/meissner/heads/work206-bugs)] Revert changes

2025-05-21 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:6ac181184da7d2c0bf54646434b253d46d33b86c

commit 6ac181184da7d2c0bf54646434b253d46d33b86c
Author: Michael Meissner 
Date:   Wed May 21 15:29:23 2025 -0400

Revert changes

Diff:
---
 gcc/config/rs6000/predicates.md |   6 ++
 gcc/config/rs6000/rs6000.h  |  15 +--
 gcc/config/rs6000/rs6000.md |  81 +--
 gcc/testsuite/gcc.target/powerpc/pr118541.c | 147 
 4 files changed, 84 insertions(+), 165 deletions(-)

diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index 02ba8fa6c9b0..647e89afb6a7 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -1463,6 +1463,12 @@
 (define_predicate "fpmask_comparison_operator"
   (match_code "eq,gt,ge"))
 
+;; Return 1 if OP is a comparison operator suitable for vector/scalar
+;; comparisons that generate a 0/-1 mask (i.e. the inverse of
+;; fpmask_comparison_operator).
+(define_predicate "invert_fpmask_comparison_operator"
+  (match_code "ne,unlt,unle"))
+
 ;; Return 1 if OP is a comparison operation suitable for integer vector/scalar
 ;; comparisons that generate a -1/0 mask.
 (define_predicate "vecint_comparison_operator"
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index c8d9456e0912..9267612fbc9c 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -1810,17 +1810,10 @@ extern scalar_int_mode rs6000_pmode;
: (((OP) == EQ || (OP) == NE) && COMPARISON_P (X) \
   ? CCEQmode : CCmode))
 
-/* Can the condition code MODE be safely reversed?  Don't allow floating point
-   comparisons to be reversed unless NaNs are not allowed.
-
-   In the past, we used to allow reversing FP operations because we only
-   generated FCMPU comparisons and not FCMPO.  However, starting with power9,
-   the XSCMPEQDP, XSCMPGTDP, and XSCMPGEDP instructions will trap if a
-   signalling NaN is used.  If we allow reversing FP operations, we could wind
-   up converting a LT operation into UNGE and the instruction will trap.  The
-   machine independent parts of the compiler will handle reversing the
-   arguments if the FP comparison cannot be reversed.  */
-#define REVERSIBLE_CC_MODE(MODE) ((MODE) != CCFPmode || flag_finite_math_only)
+/* Can the condition code MODE be safely reversed?  This is safe in
+   all cases on this port, because at present it doesn't use the
+   trapping FP comparisons (fcmpo).  */
+#define REVERSIBLE_CC_MODE(MODE) 1
 
 /* Given a condition code and a mode, return the inverse condition.  */
 #define REVERSE_CONDITION(CODE, MODE) rs6000_reverse_condition (MODE, CODE)
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index b9a91b0eab24..65da0c653304 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -5699,13 +5699,6 @@
   "fsel %0,%1,%2,%3"
   [(set_attr "type" "fp")])
 
-;; On power9 and above generate the XSCMPEQFP, XSCMPGTDP, and XSCMPGEDP
-;; instructions followed by XXSEL to do a floating point conditional move.  In
-;; the past, we provided patterns for inverting the comparison, converting a LE
-;; into UNGT.  However, the XSCMPEQDP, XSCMPGTDP, and XSCMPGEDP instructions
-;; will trap if one of the arguments is a signalling NaN.  Since we aren't
-;; providing the inverted operation, the machine independent parts of the
-;; compiler generate code with the arguments swapped.
 (define_insn_and_split "*movcc_p9"
   [(set (match_operand:SFDF 0 "vsx_register_operand" "=&wa,wa")
(if_then_else:SFDF
@@ -5737,6 +5730,43 @@
  [(set_attr "length" "8")
   (set_attr "type" "vecperm")])
 
+;; Handle inverting the fpmask comparisons.
+(define_insn_and_split "*movcc_invert_p9"
+  [(set (match_operand:SFDF 0 "vsx_register_operand" "=&wa,wa")
+   (if_then_else:SFDF
+(match_operator:CCFP 1 "invert_fpmask_comparison_operator"
+   [(match_operand:SFDF2 2 "vsx_register_operand" "wa,wa")
+(match_operand:SFDF2 3 "vsx_register_operand" "wa,wa")])
+(match_operand:SFDF 4 "vsx_register_operand" "wa,wa")
+(match_operand:SFDF 5 "vsx_register_operand" "wa,wa")))
+   (clobber (match_scratch:V2DI 6 "=0,&wa"))]
+  "TARGET_P9_MINMAX"
+  "#"
+  "&& 1"
+  [(set (match_dup 6)
+   (if_then_else:V2DI (match_dup 9)
+  (match_dup 7)
+  (match_dup 8)))
+   (set (match_dup 0)
+   (if_then_else:SFDF (ne (match_dup 6)
+  (match_dup 8))
+  (match_dup 5)
+  (match_dup 4)))]
+{
+  rtx op1 = operands[1];
+  enum rtx_code cond = reverse_condition_maybe_unordered (GET_CODE (op1));
+
+  if (GET_CODE (operands[6]) == SCRATCH)
+operands[6] = gen_reg_rtx (V2DImode);
+
+  operands[7] = CONSTM1_RTX (V2DImode);
+  operands[8] = CONST0_RTX (V2DImode);
+
+  operands[9] = gen_rtx_fmt_ee (cond, CCFPmode, operands[2], operands[3]);
+}
+ [(set_attr "len

[gcc(refs/users/meissner/heads/work206-bugs)] Fix PR 118541, do not generate unordered fp cmoves for IEEE compares.

2025-05-21 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:33d453d937554d8d93861541ea8926647a674ea8

commit 33d453d937554d8d93861541ea8926647a674ea8
Author: Michael Meissner 
Date:   Wed May 21 14:01:38 2025 -0400

Fix PR 118541, do not generate unordered fp cmoves for IEEE compares.

In bug PR target/118541 on power9, power10, and power11 systems, for the
function:

extern double __ieee754_acos (double);

double
__acospi (double x)
{
  double ret = __ieee754_acos (x) / 3.14;
  return __builtin_isgreater (ret, 1.0) ? 1.0 : ret;
}

GCC currently generates the following code:

Power9  Power10 and Power11
==  ===
bl __ieee754_acos   bl __ieee754_acos@notoc
nop plfd 0,.LC0@pcrel
addis 9,2,.LC2@toc@ha   xxspltidp 12,1065353216
addi 1,1,32 addi 1,1,32
lfd 0,.LC2@toc@l(9) ld 0,16(1)
addis 9,2,.LC0@toc@ha   fdiv 0,1,0
ld 0,16(1)  mtlr 0
lfd 12,.LC0@toc@l(9)xscmpgtdp 1,0,12
fdiv 0,1,0  xxsel 1,0,12,1
mtlr 0  blr
xscmpgtdp 1,0,12
xxsel 1,0,12,1
blr

This is because ifcvt.c optimizes the conditional floating point move to 
use the
XSCMPGTDP instruction.

However, the XSCMPGTDP instruction will generate an interrupt if one of the
arguments is a signalling NaN and signalling NaNs can generate an interrupt.
The IEEE comparison functions (isgreater, etc.) require that the comparison 
not
raise an interrupt.

The root cause of this is we allow floating point comparisons to be reversed
(i.e. LT will be reversed to UNGE).  Before power9, this was ok because we 
only
generated the FCMPU or XSCMPUDP instructions.  But with power9, we can 
generate
the XSCMPEQDP, XSCMPGTDP, or XSCMPGEDP instructions.  If NaNs are allowed, 
we no
longer allow FP comparisons to be reversed.  If FP comparisons can't be
reversed, the machine independent portions of the compiler will generate the
comparison with the arguments reversed.

Since we do not support reversing FP comparisons, the code to support 
inverting
fpmask operations on power9 has been removed.

For the following code:

double
ordered_compare (double a, double b, double c, double d)
{
  return __builtin_isgreater (a, b) ? c : d;
}

/* Verify normal > does generate xscmpgtdp.  */

double
normal_compare (double a, double b, double c, double d)
{
  return a > b ? c : d;
}

with the following patch, GCC generates the following for power9, power10, 
and
power11:

ordered_compare:
fcmpu 0,1,2
fmr 1,4
bnglr 0
fmr 1,3
blr

normal_compare:
xscmpgtdp 1,1,2
xxsel 1,4,3,1
blr

I have built bootstrap compilers on big endian power9 systems and little 
endian
power9/power10 systems and there were no regressions.  Can I check this 
patch
into the GCC trunk, and after a waiting period, can I check this into the 
active
older branches?

2025-05-21  Michael Meissner  

gcc/

PR target/118541
* config/rs6000/predicates.md (invert_fpmask_comparison_operator):
Delete.
* config/rs6000/rs6000.h (REVERSIBLE_CC_MODE): Do not allow floating
point comparisons to be reversed unless -ffinite-math-only is used.
(rs6000_reverse_condition): Add argument.
* config/rs6000/rs6000.md (movcc_p9): Add
comment.
(movcc_invert_p9): Delete insns.
(movcc_invert_p10): Likewise.

gcc/testsuite/

PR target/118541
* gcc.target/powerpc/pr118541.c: New test.

Diff:
---
 gcc/config/rs6000/predicates.md |   6 --
 gcc/config/rs6000/rs6000.h  |  15 ++-
 gcc/config/rs6000/rs6000.md |  81 ++-
 gcc/testsuite/gcc.target/powerpc/pr118541.c | 147 
 4 files changed, 165 insertions(+), 84 deletions(-)

diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index 647e89afb6a7..02ba8fa6c9b0 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -1463,12 +1463,6 @@
 (define_predicate "fpmask_comparison_operator"
   (match_code "eq,gt,ge"))
 
-;; Return 1 if OP is a comparison operator suitable for vector/sca

[gcc(refs/users/meissner/heads/work206-bugs)] Update ChangeLog.*

2025-05-21 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:86eb8563626948a0bc690c1a61907bf75fb0b940

commit 86eb8563626948a0bc690c1a61907bf75fb0b940
Author: Michael Meissner 
Date:   Wed May 21 14:03:12 2025 -0400

Update ChangeLog.*

Diff:
---
 gcc/ChangeLog.bugs | 42 ++
 1 file changed, 22 insertions(+), 20 deletions(-)

diff --git a/gcc/ChangeLog.bugs b/gcc/ChangeLog.bugs
index 8eb6295859ab..91250828ad30 100644
--- a/gcc/ChangeLog.bugs
+++ b/gcc/ChangeLog.bugs
@@ -1,4 +1,4 @@
- Branch work206-bugs, patch #110 
+ Branch work206-bugs, patch #111 
 
 Fix PR 118541, do not generate unordered fp cmoves for IEEE compares.
 
@@ -40,11 +40,16 @@ arguments is a signalling NaN and signalling NaNs can 
generate an interrupt.
 The IEEE comparison functions (isgreater, etc.) require that the comparison not
 raise an interrupt.
 
-The following patch changes the PowerPC back end so that ifcvt.c will not 
change
-the if/then test and move into a conditional move if the comparison is one of
-the comparisons that do not raise an error with signalling NaNs and -Ofast is
-not used.  If a normal comparison is used or -Ofast is used, GCC will continue
-to generate XSCMPGTDP and XXSEL.
+The root cause of this is we allow floating point comparisons to be reversed
+(i.e. LT will be reversed to UNGE).  Before power9, this was ok because we only
+generated the FCMPU or XSCMPUDP instructions.  But with power9, we can generate
+the XSCMPEQDP, XSCMPGTDP, or XSCMPGEDP instructions.  If NaNs are allowed, we 
no
+longer allow FP comparisons to be reversed.  If FP comparisons can't be
+reversed, the machine independent portions of the compiler will generate the
+comparison with the arguments reversed.
+
+Since we do not support reversing FP comparisons, the code to support inverting
+fpmask operations on power9 has been removed.
 
 For the following code:
 
@@ -82,31 +87,28 @@ power9/power10 systems and there were no regressions.  Can 
I check this patch
 into the GCC trunk, and after a waiting period, can I check this into the 
active
 older branches?
 
-2025-05-20  Michael Meissner  
+2025-05-21  Michael Meissner  
 
 gcc/
 
PR target/118541
-   * config/rs6000/predicates.md (invert_fpmask_comparison_operator): Do
-   not allow UNLT and UNLE unless -ffast-math.
-   * config/rs6000/rs6000-protos.h (enum rev_cond_ordered): New 
enumeration.
-   (rs6000_reverse_condition): Add argument.
-   * config/rs6000/rs6000.cc (rs6000_reverse_condition): Do not allow
-   ordered comparisons to be reversed for floating point conditional moves,
-   but allow ordered comparisons to be reversed on jumps.
-   (rs6000_emit_sCOND): Adjust rs6000_reverse_condition call.
+   * config/rs6000/predicates.md (invert_fpmask_comparison_operator):
+   Delete.
* config/rs6000/rs6000.h (REVERSIBLE_CC_MODE): Do not allow floating
-   point comparisons to be reversed on platforms that can generate the
-   xscmp{eq,gt,ge}{dp,qp} instructions.
-   (REVERSE_CONDITION): Adjust rs6000_reverse_condition call.
-   * config/rs6000/rs6000.md (reverse_branch_comparison): Name insn.
-   Adjust rs6000_reverse_condition calls.
+   point comparisons to be reversed unless -ffinite-math-only is used.
+   (rs6000_reverse_condition): Add argument.
+   * config/rs6000/rs6000.md (movcc_p9): Add
+   comment.
+   (movcc_invert_p9): Delete insns.
+   (movcc_invert_p10): Likewise.
 
 gcc/testsuite/
 
PR target/118541
* gcc.target/powerpc/pr118541.c: New test.
 
+ Branch work206-bugs, patch #110 was reverted 

+
  Branch work206-bugs, patch #101 
 
 PR 99293: Optimize splat of a V2DF/V2DI extract with constant element

[gcc r16-814] [PATCH] testsuite: RISC-V: Update the cset-sext-sfb/zba-slliuw test optimization level.

2025-05-21 Thread Jeff Law via Gcc-cvs

https://gcc.gnu.org/g:d8636b05c559e6f060e16652bb10c59d9fb0fb54

commit r16-814-gd8636b05c559e6f060e16652bb10c59d9fb0fb54
Author: Dongyan Chen 
Date:   Wed May 21 21:46:52 2025 -0600

[PATCH] testsuite: RISC-V: Update the cset-sext-sfb/zba-slliuw test 
optimization level.

Failed testcases occurred in the regression test of gcc: cset-sext-sfb.c 
failed
the -Oz test, and zba-slliuw.c failed the -Og test.
This patch solves the problem by skipping the optimization.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/cset-sext-sfb.c: Skip for -Oz.
* gcc.target/riscv/zba-slliuw.c: Skip for -Og.

Diff:
---
 gcc/testsuite/gcc.target/riscv/cset-sext-sfb.c | 2 +-
 gcc/testsuite/gcc.target/riscv/zba-slliuw.c| 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/testsuite/gcc.target/riscv/cset-sext-sfb.c 
b/gcc/testsuite/gcc.target/riscv/cset-sext-sfb.c
index 4a8477e81621..3d46306f1e19 100644
--- a/gcc/testsuite/gcc.target/riscv/cset-sext-sfb.c
+++ b/gcc/testsuite/gcc.target/riscv/cset-sext-sfb.c
@@ -1,5 +1,5 @@
 /* { dg-do compile { target { ! riscv_abi_e } } } */
-/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" "-O1" "-Os" } } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" "-O1" "-Os" "-Oz" } } */
 /* { dg-options "-march=rv32gc -mtune=sifive-7-series -mbranch-cost=1 
-fno-ssa-phiopt -fdump-rtl-ce1" { target { rv32 } } } */
 /* { dg-options "-march=rv64gc -mtune=sifive-7-series -mbranch-cost=1 
-fno-ssa-phiopt -fdump-rtl-ce1" { target { rv64 } } } */
 
diff --git a/gcc/testsuite/gcc.target/riscv/zba-slliuw.c 
b/gcc/testsuite/gcc.target/riscv/zba-slliuw.c
index c123bb5ece0f..69914db95a2c 100644
--- a/gcc/testsuite/gcc.target/riscv/zba-slliuw.c
+++ b/gcc/testsuite/gcc.target/riscv/zba-slliuw.c
@@ -1,6 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-march=rv64gc_zba_zbs -mabi=lp64" } */
-/* { dg-skip-if "" { *-*-* } { "-O0" "-O1" } } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-O1" "-Og" } } */
 
 long
 foo (long i)

[gcc r16-808] [RISC-V] Improve (x << C1) + C2 split code

2025-05-21 Thread Jeff Law via Gcc-cvs

https://gcc.gnu.org/g:0bed343a2a640c7be4a1970d303098ccf62bd1c6

commit r16-808-g0bed343a2a640c7be4a1970d303098ccf62bd1c6
Author: Jeff Law 
Date:   Wed May 21 16:04:58 2025 -0600

[RISC-V] Improve (x << C1) + C2 split code

I wrote this a couple months ago to fix an instruction count regression in
505.mcf on risc-v, but I don't have a trivial little testcase to add to the
suite.

There were two problems with the pattern.

First, the code was generating a shift followed by an add after reload.
Naturally combine doesn't run after reload and the code stayed in that form
rather than using shadd when available.

Second the splitter was just over-active.  We need to make sure that the
shifted form of the constant operand has a cost > 1 to synthesize.  It's
useless to split if the shifted constant can be synthesized in a single
instruction.

This has been in my tester since March.  So it's been through numerous
riscv64-elf and riscv32-elf test cycles as well as multiple rv64 bootstrap
tests.  Waiting on the upstream CI system to render a verdict before moving
forward.

Looking further out I'm hoping this pattern will transform into a simpler 
and
always active define_split.

gcc/
* config/riscv/riscv.md ((x << C1) + C2): Tighten split condition
and generate more efficient code when splitting.

Diff:
---
 gcc/config/riscv/riscv.md | 33 -
 1 file changed, 24 insertions(+), 9 deletions(-)

diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 7e35d7877ed9..a5b3abbe5d45 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -4704,23 +4704,38 @@
(match_operand 2 "const_int_operand" "n"))
 (match_operand 3 "const_int_operand" "n")))
(clobber (match_scratch:DI 4 "=&r"))]
-  "(TARGET_64BIT && riscv_const_insns (operands[3], false) == 1)"
+  "(TARGET_64BIT
+&& riscv_const_insns (operands[3], false) == 1
+&& riscv_const_insns (GEN_INT (INTVAL (operands[3])
+ << INTVAL (operands[2])), false) != 1)"
   "#"
   "&& reload_completed"
   [(const_int 0)]
   "{
- rtx x = gen_rtx_ASHIFT (DImode, operands[1], operands[2]);
- emit_insn (gen_rtx_SET (operands[0], x));
-
- /* If the constant fits in a simm12, use it directly as we do not
-   get another good chance to optimize things again.  */
- if (!SMALL_OPERAND (INTVAL (operands[3])))
+ /* Prefer to generate shNadd when we can, even over using an
+   immediate form.  If we're not going to be able to generate
+   a shNadd, then use the constant directly if it fits in a
+   simm12 field since we won't get another chance to optimize this.  */
+ if ((TARGET_ZBA && imm123_operand (operands[2], word_mode))
+|| !SMALL_OPERAND (INTVAL (operands[3])))
emit_move_insn (operands[4], operands[3]);
  else
operands[4] = operands[3];
 
- x = gen_rtx_PLUS (DImode, operands[0], operands[4]);
- emit_insn (gen_rtx_SET (operands[0], x));
+ if (TARGET_ZBA && imm123_operand (operands[2], word_mode))
+   {
+rtx x = gen_rtx_ASHIFT (DImode, operands[1], operands[2]);
+x = gen_rtx_PLUS (DImode, x, operands[4]);
+emit_insn (gen_rtx_SET (operands[0], x));
+   }
+ else
+   {
+rtx x = gen_rtx_ASHIFT (DImode, operands[1], operands[2]);
+emit_insn (gen_rtx_SET (operands[0], x));
+x = gen_rtx_PLUS (DImode, operands[0], operands[4]);
+emit_insn (gen_rtx_SET (operands[0], x));
+   }
+
  DONE;
}"
   [(set_attr "type" "arith")])

[gcc r16-809] combine: gen_lowpart_no_emit vs CLOBBER [PR120090]

2025-05-21 Thread Andrew Pinski via Gcc-cvs

https://gcc.gnu.org/g:f725d6765373f7884a2ea23bc11409b15545958b

commit r16-809-gf725d6765373f7884a2ea23bc11409b15545958b
Author: Andrew Pinski 
Date:   Mon May 5 09:46:14 2025 -0700

combine: gen_lowpart_no_emit vs CLOBBER [PR120090]

The problem here is simplify-rtx.cc expects gen_lowpart_no_emit
to return NULL on failure but combine's hook was returning CLOBBER.
After r16-160-ge6f89d78c1a7528e93458278, 
gcc.target/i386/avx512bw-pr103750-2.c
started to fail at -m32 due to this as new simplify code would return
a RTL with a clobber in it rather than returning NULL.
To fix this gen_lowpart_no_emit should return NULL when there was an failure
instead of a clobber. This only changes the gen_lowpart_no_emit hook and 
not the
generic gen_lowpart hook as parts of combine just pass gen_lowpart result 
directly
without checking the return value.

Bootstrapped and tested on x86_64-linux-gnu.

PR rtl-optimization/120090
gcc/ChangeLog:

* combine.cc (gen_lowpart_for_combine_no_emit): New function.
(RTL_HOOKS_GEN_LOWPART_NO_EMIT): Set to 
gen_lowpart_for_combine_no_emit.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/combine.cc | 19 ++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/gcc/combine.cc b/gcc/combine.cc
index 67cf0447607f..4dbc1f6a4a4e 100644
--- a/gcc/combine.cc
+++ b/gcc/combine.cc
@@ -458,6 +458,7 @@ static rtx simplify_shift_const (rtx, enum rtx_code, 
machine_mode, rtx,
 int);
 static int recog_for_combine (rtx *, rtx_insn *, rtx *, unsigned = 0, unsigned 
= 0);
 static rtx gen_lowpart_for_combine (machine_mode, rtx);
+static rtx gen_lowpart_for_combine_no_emit (machine_mode, rtx);
 static enum rtx_code simplify_compare_const (enum rtx_code, machine_mode,
 rtx *, rtx *);
 static enum rtx_code simplify_comparison (enum rtx_code, rtx *, rtx *);
@@ -491,7 +492,7 @@ static rtx gen_lowpart_or_truncate (machine_mode, rtx);
 
 /* Our implementation of gen_lowpart never emits a new pseudo.  */
 #undef RTL_HOOKS_GEN_LOWPART_NO_EMIT
-#define RTL_HOOKS_GEN_LOWPART_NO_EMIT  gen_lowpart_for_combine
+#define RTL_HOOKS_GEN_LOWPART_NO_EMIT  gen_lowpart_for_combine_no_emit
 
 #undef RTL_HOOKS_REG_NONZERO_REG_BITS
 #define RTL_HOOKS_REG_NONZERO_REG_BITS reg_nonzero_bits_for_combine
@@ -11890,6 +11891,22 @@ gen_lowpart_for_combine (machine_mode omode, rtx x)
  fail:
   return gen_rtx_CLOBBER (omode, const0_rtx);
 }
+
+/* Like gen_lowpart_for_combine but returns NULL_RTX
+   for an error instead of CLOBBER.
+   Note no_emit is not called directly from combine but rather from
+   simplify_rtx and is expecting a NULL on failure rather than
+   a CLOBBER.  */
+
+static rtx
+gen_lowpart_for_combine_no_emit (machine_mode omode, rtx x)
+{
+  rtx tem = gen_lowpart_for_combine (omode, x);
+  if (!tem || GET_CODE (tem) == CLOBBER)
+return NULL_RTX;
+  return tem;
+}
+
 
 /* Try to simplify a comparison between OP0 and a constant OP1,
where CODE is the comparison code that will be tested, into a

[gcc r16-813] [RISC-V] Clear high or low bits using shift pairs

2025-05-21 Thread Jeff Law via Gcc-cvs

https://gcc.gnu.org/g:b3c778e858497f2b7f37fa8a3101854361c025da

commit r16-813-gb3c778e858497f2b7f37fa8a3101854361c025da
Author: Shreya Munnangi 
Date:   Wed May 21 18:49:14 2025 -0600

[RISC-V] Clear high or low bits using shift pairs

So the first special case of clearing bits from Shreya's work.  We can 
clear an
arbitrary number of high bits by shifting left by the number of bits to 
clear,
then logically shifting right to put everything in place.   Similarly we can
clear an arbitrary number of low bits with a right logical shift followed 
by a
left shift.  Naturally this only applies when the constant synthesis budget 
is
2+ insns.

Even with mvconst_internal still enabled this does consistently show various
small code generation improvements.

I have seen a notable regression.  The two shift form to wipe out high bits
isn't handled well by ext-dce.  Essentially it looks like we don't recognize
the sequence as wiping upper bits, instead it makes bits live and as a 
result
we're unable to remove a prior zero extension.  I've opened a bug for this
issue.

The other case I've seen is CSE related.  If we had a number of masking
operations with the same mask, we might have previously CSE'd the constant. 
 In
that scenario each instance of masking would be a single AND using the CSE'd
register holding the constant, whereas with this patch it'll be a pair of
shifts.  But on a good uarch design the pair of shifts would be fused into a
single op.  Given this is relatively rare and on the margins from a 
performance
standpoint I'm not going to worry about it.

This has spun in my tester for riscv32-elf and riscv64-elf.  Bootstrap and
regression test is in flight and due in an hour or so.   Waiting on the
upstream pre-commit tester and the bootstrap test before moving forward.

gcc/
* config/riscv/riscv.cc (synthesize_and): When profitable, use two
shift combinations to clear high or low bits rather than synthsizing
the constant.

Diff:
---
 gcc/config/riscv/riscv.cc | 37 +
 1 file changed, 37 insertions(+)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 03dcc347fb87..41a164bc7783 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -14525,6 +14525,43 @@ synthesize_and (rtx operands[3])
}
 }
 
+  /* The number of instructions to synthesize the constant is a good
+ estimate of the budget.  That does not account for out of order
+ execution an fusion in the constant synthesis those would naturally
+ decrease the budget.  It also does not account for the AND at
+ the end of the sequence which would increase the budget. */
+  int budget = riscv_const_insns (operands[2], true);
+  rtx input = NULL_RTX;
+  rtx output = NULL_RTX;
+
+  /* Left shift + right shift to clear high bits.  */
+  if (budget >= 2 && p2m1_shift_operand (operands[2], word_mode))
+{
+  int count = (GET_MODE_BITSIZE (GET_MODE (operands[1])).to_constant ()
+  - exact_log2 (INTVAL (operands[2]) + 1));
+  rtx x = gen_rtx_ASHIFT (word_mode, operands[1], GEN_INT (count));
+  output = gen_reg_rtx (word_mode);
+  emit_insn (gen_rtx_SET (output, x));
+  input = output;
+  x = gen_rtx_LSHIFTRT (word_mode, input, GEN_INT (count));
+  emit_insn (gen_rtx_SET (operands[0], x));
+  return true;
+}
+
+  /* Clears a bunch of low bits with only high bits set.  */
+  unsigned HOST_WIDE_INT t = ~INTVAL (operands[2]);
+  if (budget >= 2 && exact_log2 (t + 1) >= 0)
+{
+  int count = ctz_hwi (INTVAL (operands[2]));
+  rtx x = gen_rtx_LSHIFTRT (word_mode, operands[1], GEN_INT (count));
+  output = gen_reg_rtx (word_mode);
+  emit_insn (gen_rtx_SET (output, x));
+  input = output;
+  x = gen_rtx_ASHIFT (word_mode, input, GEN_INT (count));
+  emit_insn (gen_rtx_SET (operands[0], x));
+  return true;
+}
+
   /* If the remaining budget has gone to less than zero, it
  forces the value into a register and performs the AND
  operation.  It returns TRUE to the caller so the caller

[gcc(refs/users/meissner/heads/work206-bugs)] Revert changes

2025-05-21 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:b3b14d8bec72de46bf0721e367756d13b1b3044f

commit b3b14d8bec72de46bf0721e367756d13b1b3044f
Author: Michael Meissner 
Date:   Wed May 21 10:44:41 2025 -0400

Revert changes

Diff:
---
 gcc/config/rs6000/predicates.md |  10 +-
 gcc/config/rs6000/rs6000-protos.h   |  17 +---
 gcc/config/rs6000/rs6000.cc |  35 ++-
 gcc/config/rs6000/rs6000.h  |  13 +--
 gcc/config/rs6000/rs6000.md |  25 ++---
 gcc/testsuite/gcc.target/powerpc/pr118541.c | 147 
 6 files changed, 24 insertions(+), 223 deletions(-)

diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index ba8df6a7979d..647e89afb6a7 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -1466,16 +1466,8 @@
 ;; Return 1 if OP is a comparison operator suitable for vector/scalar
 ;; comparisons that generate a 0/-1 mask (i.e. the inverse of
 ;; fpmask_comparison_operator).
-;;
-;; invert_fpmask_comparison_operator is used to form floating point conditional
-;; moves on power9.  The instructions that would be generated (xscmpeqdp,
-;; xscmpgtdp, or xscmpgedp) will raise an error if one of the arguments is a
-;; signalling NaN.  Don't allow the test to be inverted if NaNs are supported
-;; and the comparison is an ordered comparison.
 (define_predicate "invert_fpmask_comparison_operator"
-  (ior (match_code "ne")
-   (and (match_code "unlt,unle")
-   (match_test "flag_finite_math_only"
+  (match_code "ne,unlt,unle"))
 
 ;; Return 1 if OP is a comparison operation suitable for integer vector/scalar
 ;; comparisons that generate a -1/0 mask.
diff --git a/gcc/config/rs6000/rs6000-protos.h 
b/gcc/config/rs6000/rs6000-protos.h
index 5beb44fc339b..4619142d197b 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -114,23 +114,8 @@ extern const char *rs6000_sibcall_template (rtx *, 
unsigned int);
 extern const char *rs6000_indirect_call_template (rtx *, unsigned int);
 extern const char *rs6000_indirect_sibcall_template (rtx *, unsigned int);
 extern const char *rs6000_pltseq_template (rtx *, int);
-
-/* Whether we can reverse the sense of an ordered (UNLT, UNLE, UNGT, UNGE,
-   UNEQ, or LTGT) comparison.  If we are doing floating point conditional moves
-   on power9 and above, we cannot convert an ordered comparison to unordered,
-   since the instructions (XSCMP{EQ,GT,GE}DP) that are used for conditional
-   moves can trap if an argument is a signalling NaN.  However for normal jumps
-   we can reverse a comparison since we only use unordered compare instructions
-   which do not trap on signalling NaNs.  */
-
-enum class rev_cond_ordered {
-  ordered_ok,
-  no_ordered
-};
-
 extern enum rtx_code rs6000_reverse_condition (machine_mode,
-  enum rtx_code,
-  enum rev_cond_ordered);
+  enum rtx_code);
 extern rtx rs6000_emit_eqne (machine_mode, rtx, rtx, rtx);
 extern rtx rs6000_emit_fp_cror (rtx_code, machine_mode, rtx);
 extern void rs6000_emit_sCOND (machine_mode, rtx[]);
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 86660fc534ff..11dfde7f288b 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -9358,8 +9358,7 @@ rs6000_debug_legitimize_address (rtx x, rtx oldx, 
machine_mode mode)
 
   start_sequence ();
   ret = rs6000_legitimize_address (x, oldx, mode);
-  insns = get_insns ();
-  end_sequence ();
+  insns = end_sequence ();
 
   if (ret != x)
 {
@@ -15367,28 +15366,17 @@ rs6000_print_patchable_function_entry (FILE *file,
 }
 
 enum rtx_code
-rs6000_reverse_condition (machine_mode mode,
- enum rtx_code code,
- enum rev_cond_ordered ordered_cmp_ok)
+rs6000_reverse_condition (machine_mode mode, enum rtx_code code)
 {
   /* Reversal of FP compares takes care -- an ordered compare
- becomes an unordered compare and vice versa.
-
- However, this is not safe for ordered comparisons (i.e. for isgreater,
- etc.)  starting with the power9 because ifcvt.cc will want to create a fp
- cmove, and the x{s,v}cmp{eq,gt,ge}{dp,qp} instructions will trap if one of
- the arguments is a signalling NaN.  */
-
+ becomes an unordered compare and vice versa.  */
   if (mode == CCFPmode
-  && (code == UNLT || code == UNLE || code == UNGT || code == UNGE
+  && (!flag_finite_math_only
+ || code == UNLT || code == UNLE || code == UNGT || code == UNGE
  || code == UNEQ || code == LTGT))
-{
-  return (ordered_cmp_ok == rev_cond_ordered::no_ordered
- ? UNKNOWN
- : reverse_condition_maybe_unordered (code));
-}
-
-  return reverse_condition (code);
+return reverse_condition_maybe_unordered (code);
+  else
+return reverse_condition (code);
 }

[gcc(refs/vendors/redhat/heads/gcc-15-branch)] Merge commit 'r15-9719-g7e580225e57086e335a16f9258d0401a21e468ef' into redhat/gcc-15-branch

2025-05-21 Thread Jakub Jelinek via Libstdc++-cvs

https://gcc.gnu.org/g:b9def1721b12cae307c1a1ebc49030fce6531dfa

commit b9def1721b12cae307c1a1ebc49030fce6531dfa
Merge: ac84ab706662 7e580225e570
Author: Jakub Jelinek 
Date:   Wed May 21 14:40:58 2025 +0200

Merge commit 'r15-9719-g7e580225e57086e335a16f9258d0401a21e468ef' into 
redhat/gcc-15-branch

Diff:

 contrib/ChangeLog  | 7 +
 gcc/ChangeLog  |   229 +
 gcc/DATESTAMP  | 2 +-
 gcc/ada/ChangeLog  |18 +
 gcc/ada/Makefile.rtl   | 1 +
 gcc/ada/exp_ch3.adb|30 +-
 gcc/ada/init.c | 2 +-
 gcc/ada/libgnat/s-dorepr__freebsd.adb  |   172 +
 gcc/c-family/ChangeLog | 8 +
 gcc/c-family/c-opts.cc |32 +-
 gcc/c/ChangeLog| 8 +
 gcc/c/c-typeck.cc  |20 +-
 gcc/cgraph.cc  | 2 +-
 gcc/cgraph.h   | 2 +-
 gcc/cgraphclones.cc|14 +-
 gcc/config/aarch64/aarch64.cc  |66 +-
 gcc/config/alpha/alpha.cc  |23 +-
 gcc/config/avr/avr.md  | 4 +
 gcc/config/i386/x86-tune.def   | 4 +-
 gcc/config/s390/s390.cc|13 +-
 gcc/cp/ChangeLog   |   130 +
 gcc/cp/class.cc|56 +-
 gcc/cp/cp-tree.h   | 8 +-
 gcc/cp/decl2.cc|14 +-
 gcc/cp/module.cc   |83 +-
 gcc/cp/name-lookup.cc  |43 +-
 gcc/cp/optimize.cc | 4 +-
 gcc/cp/pt.cc   |33 +-
 gcc/cp/rtti.cc | 1 +
 gcc/doc/gm2.texi   | 2 +-
 gcc/doc/invoke.texi|87 +
 gcc/dwarf2out.cc   | 3 +-
 gcc/fortran/ChangeLog  |   118 +
 gcc/fortran/check.cc   |42 +-
 gcc/fortran/dependency.cc  | 6 +-
 gcc/fortran/dump-parse-tree.cc | 7 +-
 gcc/fortran/expr.cc| 1 +
 gcc/fortran/gfortran.h | 3 +
 gcc/fortran/interface.cc   |   135 +-
 gcc/fortran/match.cc   | 3 +-
 gcc/fortran/primary.cc |13 +-
 gcc/fortran/resolve.cc | 7 +-
 gcc/fortran/trans-decl.cc  | 6 +-
 gcc/fortran/trans-expr.cc  |10 +-
 gcc/fortran/trans-intrinsic.cc |51 +-
 gcc/fortran/trans-openmp.cc|20 +
 gcc/gimple-fold.cc | 6 +-
 gcc/ipa-cp.cc  |12 +-
 gcc/ipa-sra.cc | 2 +-
 gcc/m2/ChangeLog   |57 +
 gcc/m2/gm2-compiler/M2Check.mod|21 +-
 gcc/m2/gm2-compiler/M2GenGCC.mod   | 3 -
 gcc/m2/gm2-compiler/M2Quads.mod|10 +-
 gcc/m2/gm2-compiler/M2Range.mod|43 +-
 gcc/m2/gm2-compiler/PCSymBuild.mod |13 +-
 gcc/m2/gm2-compiler/SymbolTable.def|16 +
 gcc/m2/gm2-compiler/SymbolTable.mod|   138 +-
 gcc/m2/gm2-libs/FormatStrings.mod  | 4 +-
 gcc/po/ChangeLog   |18 +
 gcc/po/be.po   |  9391 +++
 gcc/po/da.po   |  9447 +++
 gcc/po/de.po   |  9403 +++
 gcc/po/el.po   |  9377 +++
 gcc/po/es.po   |  9447 +++
 gcc/po/fi.po   |  9403 +++
 gcc/po/fr.po   |  9407 +++
 gcc/po/hr.po   | 10325 -
 gcc/po/id.po   |  9428 +++
 gcc/po/ja.po   |  9399 +++
 gcc/po/ka.po   |  9355 +++
 gcc/po/nl.po   |  9384 +++
 gcc/po/ru.po   |  9439 +++---

[gcc/redhat/heads/gcc-15-branch] (133 commits) Merge commit 'r15-9719-g7e580225e57086e335a16f9258d0401a21e

2025-05-21 Thread Jakub Jelinek via Gcc-cvs

The branch 'redhat/heads/gcc-15-branch' was updated to point to:

 b9def1721b12... Merge commit 'r15-9719-g7e580225e57086e335a16f9258d0401a21e

It previously pointed to:

 ac84ab706662... Merge commit 'r15-9587-ga36dd9ee5bb1d2f2f19b8d935db29468a35

Diff:

Summary of changes (added commits):
---

  b9def17... Merge commit 'r15-9719-g7e580225e57086e335a16f9258d0401a21e
  7e58022... Daily bump. (*)
  6683c72... Fortran: fix passing of inquiry ref of complex array to TRA (*)
  c1db46f... tree-sra: Do not create stores into const aggregates (PR111 (*)
  76d16fb... ipa: Dump cgraph_node UID instead of order into ipa-clones  (*)
  911cfea... libstdc++: Fix incorrect links to archived SGI STL docs (*)
  2d1244a... c++/modules: Fix ICE on merge of instantiation with partial (*)
  3ba1b0a... c++/modules: Always mark tinfo vars as TREE_ADDRESSABLE [PR (*)
  fedf81e... Daily bump. (*)
  06a10db... libstdc++: Fix some Clang -Wsystem-headers warnings in  for C++17 [PR1201 (*)
  d06a800... libstdc++: Restore std::scoped_lock for non-gthreads target (*)
  c9e3181... c+: -Wabi false positive [PR120012] (*)
  74f41d4... Update cpplib es.po (*)
  c1d4d81... Update gcc sv.po (*)
  981abdf... libstdc++: Update C++23 status table (*)
  0b76b58... libstdc++: Fix constraint recursion in std::expected's oper (*)
  0ff3b31... libstdc++: Fix availability of std::erase_if(std::flat_foo) (*)
  a341d96... libstdc++: Suppress GDB output from new 'skip' commands [PR (*)
  fc135d4... libstdc++: Update  rows in C++17 status table (*)
  74dbb19... Daily bump. (*)
  44cd55a... tree-optimization/120211 - constrain LOOP_VINFO_EARLY_BREAK (*)
  47e8302... ipa/120146 - deal with vanished varpool nodes in IPA PTA (*)
  94d10c0... tree-optimization/120143 - ICE with failed early break stor (*)
  4017b37... tree-optimization/120089 - force all PHIs live for early-br (*)
  856c493... tree-optimization/120043 - bogus conditional store eliminat (*)
  7b38bab... Fix PR 119928, formal arguments used to wrongly inferred fo (*)
  bdcef06... Daily bump. (*)
  de01448... testsuite: Fix pr119131-1.c for targets which emit a psabi  (*)
  1c1847f... Daily bump. (*)
  c60183d... Fortran: parsing issue with DO CONCURRENT;ENDDO on same lin (*)
  01324ff... Fortran: array subreferences and components of derived type (*)
  6683f2c... Fix wrong optimization of complex boolean expression (*)
  89ca647... Daily bump. (*)
  7317c72... fortran: Add testcases for PR120152, PR120153 and PR120158 (*)
  d4fd651... libfortran: Fix up maxval/maxloc for UNSIGNED [PR120158] (*)
  0ca51bd... libfortran: Add 5 missing UNSIGNED symbols [PR120153] (*)
  9d19251... libfortran: Readd 15 accidentally removed libgfortran symbo (*)
  bfcb5da... libcpp: Further fixes for incorrect line numbers in large f (*)
  f950bdb... Daily bump. (*)
  d0e6d79... libstdc++: Add missing export for std::is_layout_compatible (*)
  809c5d7... c++: C++17/20 class layout divergence [PR120012] (*)
  fa55a6c... c++: let plain -Wabi warn about future changes (*)
  77780c3... ipa: Do not emit info about temporary clones to ipa-clones  (*)
  99e2f11... Document option -fdump-ipa-clones (*)
  e52f71b... libstdc++: Fix width computation for the chrono formatting  (*)
  d2d8318... Daily bump. (*)
  b6f68c0... Allow IPA_CP to handle UNDEFINED as VARYING. (*)
  0d46cee... libstdc++: Add missing feature-test macro in  (*)
  ebc9606... libstdc++: Remove unnecessary dg-prune-output from tests (*)
  f2a69ee... libstdc++: fix possible undefined atomic lock-free type ali (*)
  a14d65f... gimple-fold: Fix fold_truth_andor_for_ifcombine [PR120074] (*)
  941a1b4... libgomp: Update SVE test (*)
  24a0279... Daily bump. (*)
  169ad48... ipa/120006 - wrong code with IPA PTA (*)
  b36014e... ipa/119973 - IPA PTA issue with global initializers (*)
  37c3124... Ada: Fix assertion failure on Finalizable aspect for tagged (*)
  055434c... Daily bump. (*)
  2b114d7... Daily bump. (*)
  768c8ae... Fortran: fix procedure pointer handling with -fcheck=pointe (*)
  2f0338c... Daily bump. (*)
  14c2a12... c: Fix up RAW_DATA_CST handling in check_constexpr_init [PR (*)
  d32ece4... libsanitizer: Fix build with glibc 2.42 (*)
  e9eaf25... Daily bump. (*)
  90484ea... c++/modules: Ensure deduction guides for imported types are (*)
  aa49bb9... c++/modules: Fix imported CNTTPs being considered non-const (*)
  3042862... c++/modules: Catch exposures of TU-local values through inl (*)
  25db596... Daily bump. (*)
  d3e4290... Update gcc .po files (*)
  f685d31... Always reflect lower bits from mask in subranges. (*)
  c9d4d3b... testsuite: Force -mcmodel=small for gcc.target/aarch64/pr11 (*)
  670250c... c++: UNBOUND_CLASS_TEMPLATE context substitution [PR119981] (*)
  01ebce5... Fix GNAT build failure for x86/FreeBSD (*)
  ffc40e9... AVR: fxload__libgcc: Use REG_ prefix. (*)
  e268cb2... AVR: target/119989 - Add missing clobbers to xload__l (*)
  17695fe... Fix compilation failure on FreeBSD (*)
  9483020... libs

[gcc r16-806] RISC-V: Add test for vec_duplicate + vand.vv combine case 1 with GR2VR cost 0, 1 and 2

2025-05-21 Thread Pan Li via Gcc-cvs

https://gcc.gnu.org/g:4f02bfb62da3a0e32a86cc2ac1171b11da026e7c

commit r16-806-g4f02bfb62da3a0e32a86cc2ac1171b11da026e7c
Author: Pan Li 
Date:   Tue May 20 22:30:04 2025 +0800

RISC-V: Add test for vec_duplicate + vand.vv combine case 1 with GR2VR cost 
0, 1 and 2

Add asm dump check test for vec_duplicate + vand.vv combine to vand.vx,
with the GR2VR cost is 0, 1 and 2.

The below test suites are passed for this patch.
* The rv64gcv fully regression test.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i16.c: Add asm check
for vand.vx combine.
* gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i32.c: Ditto.
* gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i64.c: Ditto.
* gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i8.c: Ditto.
* gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u16.c: Ditto.
* gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u32.c: Ditto.
* gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u64.c: Ditto.
* gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u8.c: Ditto.
* gcc.target/riscv/rvv/autovec/vx_vf/vx-5-i16.c: Ditto.
* gcc.target/riscv/rvv/autovec/vx_vf/vx-5-i32.c: Ditto.
* gcc.target/riscv/rvv/autovec/vx_vf/vx-5-i64.c: Ditto.
* gcc.target/riscv/rvv/autovec/vx_vf/vx-5-i8.c: Ditto.
* gcc.target/riscv/rvv/autovec/vx_vf/vx-5-u16.c: Ditto.
* gcc.target/riscv/rvv/autovec/vx_vf/vx-5-u32.c: Ditto.
* gcc.target/riscv/rvv/autovec/vx_vf/vx-5-u64.c: Ditto.
* gcc.target/riscv/rvv/autovec/vx_vf/vx-5-u8.c: Ditto.
* gcc.target/riscv/rvv/autovec/vx_vf/vx-6-i16.c: Ditto.
* gcc.target/riscv/rvv/autovec/vx_vf/vx-6-i32.c: Ditto.
* gcc.target/riscv/rvv/autovec/vx_vf/vx-6-i64.c: Ditto.
* gcc.target/riscv/rvv/autovec/vx_vf/vx-6-i8.c: Ditto.
* gcc.target/riscv/rvv/autovec/vx_vf/vx-6-u16.c: Ditto.
* gcc.target/riscv/rvv/autovec/vx_vf/vx-6-u32.c: Ditto.
* gcc.target/riscv/rvv/autovec/vx_vf/vx-6-u64.c: Ditto.
* gcc.target/riscv/rvv/autovec/vx_vf/vx-6-u8.c: Ditto.

Signed-off-by: Pan Li 

Diff:
---
 gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i16.c | 4 +++-
 gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i32.c | 4 +++-
 gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i64.c | 4 +++-
 gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i8.c  | 4 +++-
 gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u16.c | 4 +++-
 gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u32.c | 4 +++-
 gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u64.c | 4 +++-
 gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u8.c  | 4 +++-
 gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-i16.c | 4 +++-
 gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-i32.c | 4 +++-
 gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-i64.c | 4 +++-
 gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-i8.c  | 4 +++-
 gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-u16.c | 4 +++-
 gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-u32.c | 4 +++-
 gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-u64.c | 4 +++-
 gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-u8.c  | 4 +++-
 gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-i16.c | 2 ++
 gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-i32.c | 2 ++
 gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-i64.c | 2 ++
 gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-i8.c  | 6 --
 gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-u16.c | 2 ++
 gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-u32.c | 2 ++
 gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-u64.c | 2 ++
 gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-u8.c  | 2 ++
 24 files changed, 66 insertions(+), 18 deletions(-)

diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i16.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i16.c
index 6f59b07d236c..62fd4e39c018 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i16.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i16.c
@@ -7,8 +7,10 @@
 
 DEF_VX_BINARY_CASE_1_WRAP(T, +, add, VX_BINARY_BODY_X16)
 DEF_VX_BINARY_CASE_1_WRAP(T, -, sub, VX_BINARY_BODY_X16)
-DEF_VX_BINARY_REVERSE_CASE_1_WRAP(T, -, rsub, VX_BINARY_REVERSE_BODY_X16);
+DEF_VX_BINARY_REVERSE_CASE_1_WRAP(T, -, rsub, VX_BINARY_REVERSE_BODY_X16)
+DEF_VX_BINARY_CASE_1_WRAP(T, &, and, VX_BINARY_BODY_X16)
 
 /* { dg-final { scan-assembler {vadd.vx} } } */
 /* { dg-final { scan-assembler {vsub.vx} } } */
 /* { dg-final { scan-assembler {vrsub.vx} } } */
+/* { dg-final { scan-assembler {vand.vx} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i32.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i32.c
index 69b2227d889a..d047458b81d0 100644
--- a/gcc/test

[gcc r16-804] RISC-V: RISC-V: Combine vec_duplicate + vand.vv to vand.vx on GR2VR cost

2025-05-21 Thread Pan Li via Gcc-cvs

https://gcc.gnu.org/g:ad041944f1060be0c9280421a065037aa14e169e

commit r16-804-gad041944f1060be0c9280421a065037aa14e169e
Author: Pan Li 
Date:   Tue May 20 15:00:15 2025 +0800

RISC-V: RISC-V: Combine vec_duplicate + vand.vv to vand.vx on GR2VR cost

This patch would like to combine the vec_duplicate + vand.vv to the
vand.vx.  From example as below code.  The related pattern will depend
on the cost of vec_duplicate from GR2VR.  Then the late-combine will
take action if the cost of GR2VR is zero, and reject the combination
if the GR2VR cost is greater than zero.

Assume we have example code like below, GR2VR cost is 0.

  #define DEF_VX_BINARY(T, OP)\
  void\
  test_vx_binary (T * restrict out, T * restrict in, T x, unsigned n) \
  {   \
for (unsigned i = 0; i < n; i++)  \
  out[i] = in[i] OP x;\
  }

  DEF_VX_BINARY(int32_t, &)

Before this patch:
  10   │ test_vx_binary_and_int32_t_case_0:
  11   │ beq a3,zero,.L8
  12   │ vsetvli a5,zero,e32,m1,ta,ma
  13   │ vmv.v.x v2,a2
  14   │ sllia3,a3,32
  15   │ srlia3,a3,32
  16   │ .L3:
  17   │ vsetvli a5,a3,e32,m1,ta,ma
  18   │ vle32.v v1,0(a1)
  19   │ sllia4,a5,2
  20   │ sub a3,a3,a5
  21   │ add a1,a1,a4
  22   │ vand.vv v1,v1,v2
  23   │ vse32.v v1,0(a0)
  24   │ add a0,a0,a4
  25   │ bne a3,zero,.L3

After this patch:
  10   │ test_vx_binary_and_int32_t_case_0:
  11   │ beq a3,zero,.L8
  12   │ sllia3,a3,32
  13   │ srlia3,a3,32
  14   │ .L3:
  15   │ vsetvli a5,a3,e32,m1,ta,ma
  16   │ vle32.v v1,0(a1)
  17   │ sllia4,a5,2
  18   │ sub a3,a3,a5
  19   │ add a1,a1,a4
  20   │ vand.vx v1,v1,a2
  21   │ vse32.v v1,0(a0)
  22   │ add a0,a0,a4
  23   │ bne a3,zero,.L3

The below test suites are passed for this patch.
* The rv64gcv fully regression test.

gcc/ChangeLog:

* config/riscv/riscv-v.cc (expand_vx_binary_vec_dup_vec): Add new
case for rtx code AND.
(expand_vx_binary_vec_vec_dup): Ditto.
* config/riscv/riscv.cc (riscv_rtx_costs): Ditto.
* config/riscv/vector-iterators.md: Add new op and to 
no_shift_vx_ops.

Signed-off-by: Pan Li 

Diff:
---
 gcc/config/riscv/riscv-v.cc  | 2 ++
 gcc/config/riscv/riscv.cc| 1 +
 gcc/config/riscv/vector-iterators.md | 2 +-
 3 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 1b5ef51886e3..e406e7a7f590 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -5511,6 +5511,7 @@ expand_vx_binary_vec_dup_vec (rtx op_0, rtx op_1, rtx 
op_2,
   switch (code)
 {
 case PLUS:
+case AND:
   icode = code_for_pred_scalar (code, mode);
   break;
 case MINUS:
@@ -5537,6 +5538,7 @@ expand_vx_binary_vec_vec_dup (rtx op_0, rtx op_1, rtx 
op_2,
   switch (code)
 {
 case MINUS:
+case AND:
   icode = code_for_pred_scalar (code, mode);
   break;
 default:
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 1a88e96d8c6f..03dcc347fb87 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -3916,6 +3916,7 @@ riscv_rtx_costs (rtx x, machine_mode mode, int 
outer_code, int opno ATTRIBUTE_UN
break;
  case PLUS:
  case MINUS:
+ case AND:
{
  rtx op_0 = XEXP (x, 0);
  rtx op_1 = XEXP (x, 1);
diff --git a/gcc/config/riscv/vector-iterators.md 
b/gcc/config/riscv/vector-iterators.md
index 23cb940310f2..026be6f65d39 100644
--- a/gcc/config/riscv/vector-iterators.md
+++ b/gcc/config/riscv/vector-iterators.md
@@ -4042,7 +4042,7 @@
 ])
 
 (define_code_iterator any_int_binop_no_shift_vx [
-  plus minus
+  plus minus and
 ])
 
 (define_code_iterator any_int_unop [neg not])

[gcc r16-805] RISC-V: Add test for vec_duplicate + vand.vv combine case 0 with GR2VR cost 0, 2 and 15

2025-05-21 Thread Pan Li via Gcc-cvs

https://gcc.gnu.org/g:b7b914622e8da0d5f10027d9a4db418f21ed2ddc

commit r16-805-gb7b914622e8da0d5f10027d9a4db418f21ed2ddc
Author: Pan Li 
Date:   Tue May 20 15:06:34 2025 +0800

RISC-V: Add test for vec_duplicate + vand.vv combine case 0 with GR2VR cost 
0, 2 and 15

Add asm dump check test for vec_duplicate + vand.vv combine to vand.vx,
with the GR2VR cost is 0, 2 and 15.

The below test suites are passed for this patch.
* The rv64gcv fully regression test.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/vx_vf/vx-1-i16.c: Add test cases
for vand vx combine case 0 on GR2VR cost.
* gcc.target/riscv/rvv/autovec/vx_vf/vx-1-i32.c: Ditto
* gcc.target/riscv/rvv/autovec/vx_vf/vx-1-i64.c: Ditto
* gcc.target/riscv/rvv/autovec/vx_vf/vx-1-i8.c: Ditto
* gcc.target/riscv/rvv/autovec/vx_vf/vx-1-u16.c: Ditto
* gcc.target/riscv/rvv/autovec/vx_vf/vx-1-u32.c: Ditto
* gcc.target/riscv/rvv/autovec/vx_vf/vx-1-u64.c: Ditto
* gcc.target/riscv/rvv/autovec/vx_vf/vx-1-u8.c: Ditto
* gcc.target/riscv/rvv/autovec/vx_vf/vx-2-i16.c: Ditto
* gcc.target/riscv/rvv/autovec/vx_vf/vx-2-i32.c: Ditto
* gcc.target/riscv/rvv/autovec/vx_vf/vx-2-i64.c: Ditto
* gcc.target/riscv/rvv/autovec/vx_vf/vx-2-i8.c: Ditto
* gcc.target/riscv/rvv/autovec/vx_vf/vx-2-u16.c: Ditto
* gcc.target/riscv/rvv/autovec/vx_vf/vx-2-u32.c: Ditto
* gcc.target/riscv/rvv/autovec/vx_vf/vx-2-u64.c: Ditto
* gcc.target/riscv/rvv/autovec/vx_vf/vx-2-u8.c: Ditto
* gcc.target/riscv/rvv/autovec/vx_vf/vx-3-i16.c: Ditto
* gcc.target/riscv/rvv/autovec/vx_vf/vx-3-i32.c: Ditto
* gcc.target/riscv/rvv/autovec/vx_vf/vx-3-i64.c: Ditto
* gcc.target/riscv/rvv/autovec/vx_vf/vx-3-i8.c: Ditto
* gcc.target/riscv/rvv/autovec/vx_vf/vx-3-u16.c: Ditto
* gcc.target/riscv/rvv/autovec/vx_vf/vx-3-u32.c: Ditto
* gcc.target/riscv/rvv/autovec/vx_vf/vx-3-u64.c: Ditto
* gcc.target/riscv/rvv/autovec/vx_vf/vx-3-u8.c: Ditto
* gcc.target/riscv/rvv/autovec/vx_vf/vx_binary_data.h: Add test
data for vand.vx run test.
* gcc.target/riscv/rvv/autovec/vx_vf/vx_vand-run-1-i16.c: New test.
* gcc.target/riscv/rvv/autovec/vx_vf/vx_vand-run-1-i32.c: New test.
* gcc.target/riscv/rvv/autovec/vx_vf/vx_vand-run-1-i64.c: New test.
* gcc.target/riscv/rvv/autovec/vx_vf/vx_vand-run-1-i8.c: New test.
* gcc.target/riscv/rvv/autovec/vx_vf/vx_vand-run-1-u16.c: New test.
* gcc.target/riscv/rvv/autovec/vx_vf/vx_vand-run-1-u32.c: New test.
* gcc.target/riscv/rvv/autovec/vx_vf/vx_vand-run-1-u64.c: New test.
* gcc.target/riscv/rvv/autovec/vx_vf/vx_vand-run-1-u8.c: New test.

Signed-off-by: Pan Li 

Diff:
---
 .../gcc.target/riscv/rvv/autovec/vx_vf/vx-1-i16.c  |   2 +
 .../gcc.target/riscv/rvv/autovec/vx_vf/vx-1-i32.c  |   2 +
 .../gcc.target/riscv/rvv/autovec/vx_vf/vx-1-i64.c  |   2 +
 .../gcc.target/riscv/rvv/autovec/vx_vf/vx-1-i8.c   |   2 +
 .../gcc.target/riscv/rvv/autovec/vx_vf/vx-1-u16.c  |   2 +
 .../gcc.target/riscv/rvv/autovec/vx_vf/vx-1-u32.c  |   2 +
 .../gcc.target/riscv/rvv/autovec/vx_vf/vx-1-u64.c  |   2 +
 .../gcc.target/riscv/rvv/autovec/vx_vf/vx-1-u8.c   |   2 +
 .../gcc.target/riscv/rvv/autovec/vx_vf/vx-2-i16.c  |   4 +-
 .../gcc.target/riscv/rvv/autovec/vx_vf/vx-2-i32.c  |   4 +-
 .../gcc.target/riscv/rvv/autovec/vx_vf/vx-2-i64.c  |   4 +-
 .../gcc.target/riscv/rvv/autovec/vx_vf/vx-2-i8.c   |   4 +-
 .../gcc.target/riscv/rvv/autovec/vx_vf/vx-2-u16.c  |   4 +-
 .../gcc.target/riscv/rvv/autovec/vx_vf/vx-2-u32.c  |   4 +-
 .../gcc.target/riscv/rvv/autovec/vx_vf/vx-2-u64.c  |   4 +-
 .../gcc.target/riscv/rvv/autovec/vx_vf/vx-2-u8.c   |   4 +-
 .../gcc.target/riscv/rvv/autovec/vx_vf/vx-3-i16.c  |   4 +-
 .../gcc.target/riscv/rvv/autovec/vx_vf/vx-3-i32.c  |   4 +-
 .../gcc.target/riscv/rvv/autovec/vx_vf/vx-3-i64.c  |   4 +-
 .../gcc.target/riscv/rvv/autovec/vx_vf/vx-3-i8.c   |   4 +-
 .../gcc.target/riscv/rvv/autovec/vx_vf/vx-3-u16.c  |   4 +-
 .../gcc.target/riscv/rvv/autovec/vx_vf/vx-3-u32.c  |   4 +-
 .../gcc.target/riscv/rvv/autovec/vx_vf/vx-3-u64.c  |   4 +-
 .../gcc.target/riscv/rvv/autovec/vx_vf/vx-3-u8.c   |   4 +-
 .../riscv/rvv/autovec/vx_vf/vx_binary_data.h   | 392 +
 .../riscv/rvv/autovec/vx_vf/vx_vand-run-1-i16.c|  15 +
 .../riscv/rvv/autovec/vx_vf/vx_vand-run-1-i32.c|  15 +
 .../riscv/rvv/autovec/vx_vf/vx_vand-run-1-i64.c|  15 +
 .../riscv/rvv/autovec/vx_vf/vx_vand-run-1-i8.c |  15 +
 .../riscv/rvv/autovec/vx_vf/vx_vand-run-1-u16.c|  15 +
 .../riscv/rvv/autovec/vx_vf/vx_vand-run-1-u32.c|  15 +
 .../riscv/rvv/autovec/vx_vf/vx_vand-run-1-u64.c|  15 +
 .../riscv/rvv/autovec/vx_vf/vx_vand-run-1-u8.c |

52 matches

Mail list logo