[gcc r16-770] libstdc++: remove two redundant statements in pb_ds binary tree
https://gcc.gnu.org/g:6740732a659f9bef523f872c633d5477e8dc349c commit r16-770-g6740732a659f9bef523f872c633d5477e8dc349c Author: Xℹ Ruoyao Date: Fri Jul 10 20:10:52 2020 +0800 libstdc++: remove two redundant statements in pb_ds binary tree libstdc++-v3/ChangeLog: * include/ext/pb_ds/detail/bin_search_tree_/insert_fn_imps.hpp (insert_leaf_new, insert_imp_empty): remove redundant statements. Diff: --- .../include/ext/pb_ds/detail/bin_search_tree_/insert_fn_imps.hpp| 2 -- 1 file changed, 2 deletions(-) diff --git a/libstdc++-v3/include/ext/pb_ds/detail/bin_search_tree_/insert_fn_imps.hpp b/libstdc++-v3/include/ext/pb_ds/detail/bin_search_tree_/insert_fn_imps.hpp index e6e954dc29c8..b8f5014838c2 100644 --- a/libstdc++-v3/include/ext/pb_ds/detail/bin_search_tree_/insert_fn_imps.hpp +++ b/libstdc++-v3/include/ext/pb_ds/detail/bin_search_tree_/insert_fn_imps.hpp @@ -122,7 +122,6 @@ insert_leaf_new(const_reference r_value, node_pointer p_nd, bool left_nd) } p_new_nd->m_p_parent = p_nd; - p_new_nd->m_p_left = p_new_nd->m_p_right = 0; PB_DS_ASSERT_NODE_CONSISTENT(p_nd) update_to_top(p_new_nd, (node_update* )this); @@ -142,7 +141,6 @@ insert_imp_empty(const_reference r_value) m_p_head->m_p_parent = p_new_node; p_new_node->m_p_parent = m_p_head; - p_new_node->m_p_left = p_new_node->m_p_right = 0; _GLIBCXX_DEBUG_ONLY(debug_base::insert_new(PB_DS_V2F(r_value));) update_to_top(m_p_head->m_p_parent, (node_update*)this);
[gcc r16-771] libstdc++: maintain subtree size in pb_ds binary search trees
https://gcc.gnu.org/g:2e27df6cbd05a3ee742434b7f50dbff5f363b487 commit r16-771-g2e27df6cbd05a3ee742434b7f50dbff5f363b487 Author: Xℹ Ruoyao Date: Fri Jul 10 20:58:04 2020 +0800 libstdc++: maintain subtree size in pb_ds binary search trees libstdc++-v3/ChangeLog: * include/ext/pb_ds/detail/rb_tree_map_/node.hpp (rb_tree_node_::size_type): New typedef. (rb_tree_node_::m_subtree_size): New field. * include/ext/pb_ds/detail/splay_tree_/node.hpp (splay_tree_node_::size_type): New typedef. (splay_tree_node_::m_subtree_size): New field. * include/ext/pb_ds/detail/bin_search_tree_/bin_search_tree_.hpp (PB_DS_BIN_TREE_NAME::update_subtree_size): Declare new member function. * include/ext/pb_ds/detail/bin_search_tree_/rotate_fn_imps.hpp (update_subtree_size): Define. (apply_update, update_to_top): Call update_subtree_size. Diff: --- .../detail/bin_search_tree_/bin_search_tree_.hpp | 3 +++ .../detail/bin_search_tree_/rotate_fn_imps.hpp | 31 +++--- .../include/ext/pb_ds/detail/rb_tree_map_/node.hpp | 8 ++ .../include/ext/pb_ds/detail/splay_tree_/node.hpp | 8 ++ 4 files changed, 46 insertions(+), 4 deletions(-) diff --git a/libstdc++-v3/include/ext/pb_ds/detail/bin_search_tree_/bin_search_tree_.hpp b/libstdc++-v3/include/ext/pb_ds/detail/bin_search_tree_/bin_search_tree_.hpp index 6088709998a3..a8c73b55b89d 100644 --- a/libstdc++-v3/include/ext/pb_ds/detail/bin_search_tree_/bin_search_tree_.hpp +++ b/libstdc++-v3/include/ext/pb_ds/detail/bin_search_tree_/bin_search_tree_.hpp @@ -304,6 +304,9 @@ namespace __gnu_pbds inline void rotate_parent(node_pointer); + inline void + update_subtree_size(node_pointer); + inline void apply_update(node_pointer, null_node_update_pointer); diff --git a/libstdc++-v3/include/ext/pb_ds/detail/bin_search_tree_/rotate_fn_imps.hpp b/libstdc++-v3/include/ext/pb_ds/detail/bin_search_tree_/rotate_fn_imps.hpp index 069b17f08de2..8cadce2349bd 100644 --- a/libstdc++-v3/include/ext/pb_ds/detail/bin_search_tree_/rotate_fn_imps.hpp +++ b/libstdc++-v3/include/ext/pb_ds/detail/bin_search_tree_/rotate_fn_imps.hpp @@ -122,8 +122,23 @@ rotate_parent(node_pointer p_nd) PB_DS_CLASS_T_DEC inline void PB_DS_CLASS_C_DEC:: -apply_update(node_pointer /*p_nd*/, null_node_update_pointer /*p_update*/) -{ } +update_subtree_size(node_pointer p_nd) +{ + size_type size = 1; + if (p_nd->m_p_left) +size += p_nd->m_p_left->m_subtree_size; + if (p_nd->m_p_right) +size += p_nd->m_p_right->m_subtree_size; + p_nd->m_subtree_size = size; +} + +PB_DS_CLASS_T_DEC +inline void +PB_DS_CLASS_C_DEC:: +apply_update(node_pointer p_nd, null_node_update_pointer /*p_update*/) +{ + update_subtree_size(p_nd); +} PB_DS_CLASS_T_DEC template @@ -131,6 +146,7 @@ inline void PB_DS_CLASS_C_DEC:: apply_update(node_pointer p_nd, Node_Update_* /*p_update*/) { + update_subtree_size(p_nd); node_update::operator()(node_iterator(p_nd), node_const_iterator(static_cast(0))); } @@ -152,7 +168,14 @@ update_to_top(node_pointer p_nd, Node_Update_* p_update) PB_DS_CLASS_T_DEC inline void PB_DS_CLASS_C_DEC:: -update_to_top(node_pointer /*p_nd*/, null_node_update_pointer /*p_update*/) -{ } +update_to_top(node_pointer p_nd, null_node_update_pointer /*p_update */) +{ + while (p_nd != m_p_head) +{ + update_subtree_size(p_nd); + + p_nd = p_nd->m_p_parent; +} +} #endif diff --git a/libstdc++-v3/include/ext/pb_ds/detail/rb_tree_map_/node.hpp b/libstdc++-v3/include/ext/pb_ds/detail/rb_tree_map_/node.hpp index f229be7342c6..3803ddb19c5d 100644 --- a/libstdc++-v3/include/ext/pb_ds/detail/rb_tree_map_/node.hpp +++ b/libstdc++-v3/include/ext/pb_ds/detail/rb_tree_map_/node.hpp @@ -58,6 +58,9 @@ namespace __gnu_pbds typedef typename rebind_traits<_Alloc, rb_tree_node_>::pointer node_pointer; + typedef typename rebind_traits<_Alloc, rb_tree_node_>::size_type + size_type; + typedef typename rebind_traits<_Alloc, metadata_type>::reference metadata_reference; @@ -88,6 +91,7 @@ namespace __gnu_pbds node_pointer m_p_left; node_pointer m_p_right; node_pointer m_p_parent; + size_typem_subtree_size; value_type m_value; bool m_red; metadata_typem_metadata; @@ -100,6 +104,9 @@ namespace __gnu_pbds typedef Value_Type value_type; typedef null_typemetadata_type; + typedef typename rebind_traits<_Alloc, rb_tree_node_>::size_type + size_type; + typedef typename rebind_traits<_Alloc, rb_tree_node_>::pointer node_pointer; @@ -116,6 +123,7 @@ namespace __gnu_pbds node_pointer m_p_left; node_pointer m_p_right; node_pointer m_p_parent; +
[gcc r16-772] libstdc++: use maintained size when split pb_ds binary search trees
https://gcc.gnu.org/g:36c20fee22d40c6d25f52e929b42f5eab62cb1eb commit r16-772-g36c20fee22d40c6d25f52e929b42f5eab62cb1eb Author: Xℹ Ruoyao Date: Fri Jul 10 21:38:09 2020 +0800 libstdc++: use maintained size when split pb_ds binary search trees libstdc++-v3/ChangeLog: PR libstdc++/81806 * include/ext/pb_ds/detail/bin_search_tree_/split_join_fn_imps.hpp (split_finish): Use maintained size, instead of calling std::distance. Diff: --- .../include/ext/pb_ds/detail/bin_search_tree_/split_join_fn_imps.hpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/libstdc++-v3/include/ext/pb_ds/detail/bin_search_tree_/split_join_fn_imps.hpp b/libstdc++-v3/include/ext/pb_ds/detail/bin_search_tree_/split_join_fn_imps.hpp index 0c1b26fa9e2d..a2a57757a046 100644 --- a/libstdc++-v3/include/ext/pb_ds/detail/bin_search_tree_/split_join_fn_imps.hpp +++ b/libstdc++-v3/include/ext/pb_ds/detail/bin_search_tree_/split_join_fn_imps.hpp @@ -133,7 +133,9 @@ PB_DS_CLASS_C_DEC:: split_finish(PB_DS_CLASS_C_DEC& other) { other.initialize_min_max(); - other.m_size = std::distance(other.begin(), other.end()); + other.m_size = 0; + if (other.m_p_head->m_p_parent != 0) +other.m_size = other.m_p_head->m_p_parent->m_subtree_size; m_size -= other.m_size; initialize_min_max(); PB_DS_ASSERT_VALID((*this))
[gcc r16-776] nds32: Avoid accessing beyond the operands[] array
https://gcc.gnu.org/g:a6ec398042c6054cbf2c08b646df98b63a9418d5 commit r16-776-ga6ec398042c6054cbf2c08b646df98b63a9418d5 Author: Richard Sandiford Date: Wed May 21 10:01:26 2025 +0100 nds32: Avoid accessing beyond the operands[] array This pattern used operands[2] to hold the shift amount, even though the pattern doesn't have an operand 2 (not even as a match_dup). This caused a build failure with -Werror: array subscript 2 is above array bounds of ‘rtx_def* [2]’ gcc/ PR target/100837 * config/nds32/nds32-intrinsic.md (unspec_get_pending_int): Use a local variable instead of operands[2]. Diff: --- gcc/config/nds32/nds32-intrinsic.md | 11 ++- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/gcc/config/nds32/nds32-intrinsic.md b/gcc/config/nds32/nds32-intrinsic.md index e05dce105099..85acea330f07 100644 --- a/gcc/config/nds32/nds32-intrinsic.md +++ b/gcc/config/nds32/nds32-intrinsic.md @@ -333,30 +333,31 @@ "" { rtx system_reg = NULL_RTX; + rtx shift_amt = NULL_RTX; /* Set system register form nds32_intrinsic_register_names[]. */ if ((INTVAL (operands[1]) >= NDS32_INT_H0) && (INTVAL (operands[1]) <= NDS32_INT_H15)) { system_reg = GEN_INT (__NDS32_REG_INT_PEND__); - operands[2] = GEN_INT (31 - INTVAL (operands[1])); + shift_amt = GEN_INT (31 - INTVAL (operands[1])); } else if (INTVAL (operands[1]) == NDS32_INT_SWI) { system_reg = GEN_INT (__NDS32_REG_INT_PEND__); - operands[2] = GEN_INT (15); + shift_amt = GEN_INT (15); } else if ((INTVAL (operands[1]) >= NDS32_INT_H16) && (INTVAL (operands[1]) <= NDS32_INT_H31)) { system_reg = GEN_INT (__NDS32_REG_INT_PEND2__); - operands[2] = GEN_INT (31 - INTVAL (operands[1])); + shift_amt = GEN_INT (31 - INTVAL (operands[1])); } else if ((INTVAL (operands[1]) >= NDS32_INT_H32) && (INTVAL (operands[1]) <= NDS32_INT_H63)) { system_reg = GEN_INT (__NDS32_REG_INT_PEND3__); - operands[2] = GEN_INT (31 - (INTVAL (operands[1]) - 32)); + shift_amt = GEN_INT (31 - (INTVAL (operands[1]) - 32)); } else error ("% not support %," @@ -366,7 +367,7 @@ if (system_reg != NULL_RTX) { emit_insn (gen_unspec_volatile_mfsr (operands[0], system_reg)); - emit_insn (gen_ashlsi3 (operands[0], operands[0], operands[2])); + emit_insn (gen_ashlsi3 (operands[0], operands[0], shift_amt)); emit_insn (gen_lshrsi3 (operands[0], operands[0], GEN_INT (31))); emit_insn (gen_unspec_dsb ()); }
[gcc r16-789] vxworks: libgcc: include string.h for memset
https://gcc.gnu.org/g:99a65bfe59208c33a74718ef5fc05e255a76393a commit r16-789-g99a65bfe59208c33a74718ef5fc05e255a76393a Author: Alexandre Oliva Date: Wed May 21 06:19:46 2025 -0300 vxworks: libgcc: include string.h for memset gthr-vxworks-thread.c calls memset in __ghtread_cond_signal, but it fails ot include , where this function is declared, and GCC 14 rejects calls of undeclared functions. Include the required header. for libgcc/ChangeLog * config/gthr-vxworks-thread.c: Include string.h for memset. Diff: --- libgcc/config/gthr-vxworks-thread.c | 1 + 1 file changed, 1 insertion(+) diff --git a/libgcc/config/gthr-vxworks-thread.c b/libgcc/config/gthr-vxworks-thread.c index 17c60faba48f..31f291aca67e 100644 --- a/libgcc/config/gthr-vxworks-thread.c +++ b/libgcc/config/gthr-vxworks-thread.c @@ -33,6 +33,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see #include #include +#include #define __TIMESPEC_TO_NSEC(timespec) \ ((long long)timespec.tv_sec * 10 + (long long)timespec.tv_nsec)
[gcc r16-790] [testsuite] [vxworks] netinet includes atomic, reqs c++11
https://gcc.gnu.org/g:659fe2a28e8cbaf4672d4db8ef3f13c6efed9c0c commit r16-790-g659fe2a28e8cbaf4672d4db8ef3f13c6efed9c0c Author: Alexandre Oliva Date: Wed May 21 06:19:57 2025 -0300 [testsuite] [vxworks] netinet includes atomic, reqs c++11 On vxworks, the included netinet/in.h header indirectly includes , that fails on C++ <11. Skip the test. for gcc/testsuite/ChangeLog * c-c++-common/analyzer/fd-glibc-byte-stream-socket.c: Skip on vxworks with C++ < 11. Diff: --- gcc/testsuite/c-c++-common/analyzer/fd-glibc-byte-stream-socket.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/gcc/testsuite/c-c++-common/analyzer/fd-glibc-byte-stream-socket.c b/gcc/testsuite/c-c++-common/analyzer/fd-glibc-byte-stream-socket.c index fd57d3b0894a..2a44e452127c 100644 --- a/gcc/testsuite/c-c++-common/analyzer/fd-glibc-byte-stream-socket.c +++ b/gcc/testsuite/c-c++-common/analyzer/fd-glibc-byte-stream-socket.c @@ -5,6 +5,8 @@ /* { dg-additional-options "-fno-exceptions" } */ /* { dg-skip-if "" { hppa*-*-hpux* powerpc*-*-aix* } } */ +/* On vxworks, netinet/in.h indirectly includes atomic, that requires C++11. */ +/* { dg-skip-if "" { *-*-vxworks* && { c++ && { ! c++11 } } } } */ #include #include
[gcc r16-794] [testsuite] [aarch64] match alt cache clear names in sme nonlocal_goto tests
https://gcc.gnu.org/g:4b75decf5297838ef9ddfb842da0117adbe3f975 commit r16-794-g4b75decf5297838ef9ddfb842da0117adbe3f975 Author: Alexandre Oliva Date: Wed May 21 06:20:22 2025 -0300 [testsuite] [aarch64] match alt cache clear names in sme nonlocal_goto tests vxworks calls cacheTextUpdate instead of __clear_cache. Adjust the sme/nonlocal_goto_*.c tests for inexact matches. for gcc/testsuite/ChangeLog * gcc.target/aarch64/sme/nonlocal_goto_1.c: Match vxworks cache-clearing function as well. * gcc.target/aarch64/sme/nonlocal_goto_2.c: Likewise. * gcc.target/aarch64/sme/nonlocal_goto_3.c: Likewise. Diff: --- gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_1.c | 2 +- gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_2.c | 2 +- gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_3.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_1.c b/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_1.c index 4e3869fcc9ee..572c17a06d7c 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_1.c @@ -13,7 +13,7 @@ void run(void (*)()); ** ldr x16, \1 ** tbz x16, 0, .* ** smstop sm -** bl __clear_cache +** bl [^\n]*[cC]ache[^\n]* ** ldr x16, \1 ** tbz x16, 0, .* ** smstart sm diff --git a/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_2.c b/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_2.c index 2a2db72c3a08..721a2b789863 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_2.c +++ b/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_2.c @@ -7,7 +7,7 @@ void run(void (*)()); ** foo: ** ... ** smstop sm -** bl __clear_cache +** bl [^\n]*[cC]ache[^\n]* ** smstart sm ** add x0, .* ** smstop sm diff --git a/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_3.c b/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_3.c index 022b04052c54..25db9283b169 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_3.c +++ b/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_3.c @@ -9,7 +9,7 @@ void run(void (*)()); ** smstart sm ** ... ** smstop sm -** bl __clear_cache +** bl [^\n]*[cC]ache[^\n]* ** smstart sm ** add x0, .* ** smstop sm
[gcc r16-795] [testsuite] [x86] double copysign requires -msse2
https://gcc.gnu.org/g:02788cde86264559ca9cb3323c73c72fd0211c5d commit r16-795-g02788cde86264559ca9cb3323c73c72fd0211c5d Author: Alexandre Oliva Date: Wed May 21 06:20:29 2025 -0300 [testsuite] [x86] double copysign requires -msse2 SSE_FLOAT_MODE_P only holds for DFmode with SSE2, and that's a condition for copysign3 to be available under TARGET_SSE_MATH. Various copysign testcases use -msse -mfpmath=sse on ia32 to enable the copysign builtins and patterns, but that would only be enough if the tests were limited to floats. Since they test doubles as well, we need -msse2 instead of -msse. for gcc/testsuite/ChangeLog * gcc.dg/fold-copysign-1.c: Bump to sse2 on ia32. * gcc.dg/pr55152-2.c: Likewise. * gcc.dg/tree-ssa/abs-4.c: Likewise. * gcc.dg/tree-ssa/backprop-6.c: Likewise. Diff: --- gcc/testsuite/gcc.dg/fold-copysign-1.c | 2 +- gcc/testsuite/gcc.dg/pr55152-2.c | 2 +- gcc/testsuite/gcc.dg/tree-ssa/abs-4.c | 2 +- gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/gcc/testsuite/gcc.dg/fold-copysign-1.c b/gcc/testsuite/gcc.dg/fold-copysign-1.c index 1f5141b1c5d6..b65c08bd9a08 100644 --- a/gcc/testsuite/gcc.dg/fold-copysign-1.c +++ b/gcc/testsuite/gcc.dg/fold-copysign-1.c @@ -1,6 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-O -fdump-tree-cddce1" } */ -/* { dg-additional-options "-msse -mfpmath=sse" { target { { i?86-*-* x86_64-*-* } && ilp32 } } } */ +/* { dg-additional-options "-msse2 -mfpmath=sse" { target { { i?86-*-* x86_64-*-* } && ilp32 } } } */ /* { dg-additional-options "-mdouble=64" { target { avr-*-* } } } */ double foo (double x) diff --git a/gcc/testsuite/gcc.dg/pr55152-2.c b/gcc/testsuite/gcc.dg/pr55152-2.c index 24068cffa4a8..7533ab4db601 100644 --- a/gcc/testsuite/gcc.dg/pr55152-2.c +++ b/gcc/testsuite/gcc.dg/pr55152-2.c @@ -1,6 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-O -ffinite-math-only -fno-signed-zeros -fstrict-overflow -fdump-tree-optimized" } */ -/* { dg-additional-options "-msse -mfpmath=sse" { target { { i?86-*-* x86_64-*-* } && ilp32 } } } */ +/* { dg-additional-options "-msse2 -mfpmath=sse" { target { { i?86-*-* x86_64-*-* } && ilp32 } } } */ double g (double a) { diff --git a/gcc/testsuite/gcc.dg/tree-ssa/abs-4.c b/gcc/testsuite/gcc.dg/tree-ssa/abs-4.c index 4144d1cd954a..f43018d0dff4 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/abs-4.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/abs-4.c @@ -1,6 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-O1 -fdump-tree-optimized" } */ -/* { dg-additional-options "-msse -mfpmath=sse" { target { { i?86-*-* x86_64-*-* } && ilp32 } } } */ +/* { dg-additional-options "-msse2 -mfpmath=sse" { target { { i?86-*-* x86_64-*-* } && ilp32 } } } */ /* PR tree-optimization/109829 */ float abs_f(float x) { return __builtin_signbit(x) ? x : -x; } diff --git a/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c b/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c index dbde681e3832..efb53f17f861 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c @@ -1,6 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-O -fdump-tree-backprop-details" } */ -/* { dg-additional-options "-msse -mfpmath=sse" { target { { i?86-*-* x86_64-*-* } && ilp32 } } } */ +/* { dg-additional-options "-msse2 -mfpmath=sse" { target { { i?86-*-* x86_64-*-* } && ilp32 } } } */ void start (void *); void end (void *);
[gcc r16-792] [testsuite] tolerate missing std::stold
https://gcc.gnu.org/g:207534061fb026585d65af3cf863dc434ec36d7c commit r16-792-g207534061fb026585d65af3cf863dc434ec36d7c Author: Alexandre Oliva Date: Wed May 21 06:20:11 2025 -0300 [testsuite] tolerate missing std::stold basic_string.h doesn't define the non-w string version of std::stold when certain conditions aren't met, and then a couple of tests fail to compile. Guard the portions of the tests that depend on std::stold with the conditions for it to be defined. for libstdc++-v3/ChangeLog * testsuite/21_strings/basic_string/numeric_conversions/char/stold.cc: Guard non-wide stold calls with conditions for it to be defined. * testsuite/27_io/basic_ostream/inserters_arithmetic/char/hexfloat.cc: Likewise. Diff: --- .../21_strings/basic_string/numeric_conversions/char/stold.cc | 6 ++ .../27_io/basic_ostream/inserters_arithmetic/char/hexfloat.cc | 6 ++ 2 files changed, 12 insertions(+) diff --git a/libstdc++-v3/testsuite/21_strings/basic_string/numeric_conversions/char/stold.cc b/libstdc++-v3/testsuite/21_strings/basic_string/numeric_conversions/char/stold.cc index b64ad0c86834..dd777c4529a0 100644 --- a/libstdc++-v3/testsuite/21_strings/basic_string/numeric_conversions/char/stold.cc +++ b/libstdc++-v3/testsuite/21_strings/basic_string/numeric_conversions/char/stold.cc @@ -31,6 +31,11 @@ void test01() { + /* If these conditions are not met, basic_string.h doesn't define + std::stold(const string&, size_t* = 0), and then the test would + fail to compile. */ +#if (_GLIBCXX_HAVE_STRTOLD && ! _GLIBCXX_HAVE_BROKEN_STRTOLD) \ + || __DBL_MANT_DIG__ == __LDBL_MANT_DIG__ bool test = false; using namespace std; @@ -106,6 +111,7 @@ test01() test = false; } VERIFY( test ); +#endif } int main() diff --git a/libstdc++-v3/testsuite/27_io/basic_ostream/inserters_arithmetic/char/hexfloat.cc b/libstdc++-v3/testsuite/27_io/basic_ostream/inserters_arithmetic/char/hexfloat.cc index b1bc7fbb9d4e..f694730901ed 100644 --- a/libstdc++-v3/testsuite/27_io/basic_ostream/inserters_arithmetic/char/hexfloat.cc +++ b/libstdc++-v3/testsuite/27_io/basic_ostream/inserters_arithmetic/char/hexfloat.cc @@ -95,6 +95,11 @@ test01() void test02() { + /* If these conditions are not met, basic_string.h doesn't define + std::stold(const string&, size_t* = 0), and then the test would + fail to compile. */ +#if (_GLIBCXX_HAVE_STRTOLD && ! _GLIBCXX_HAVE_BROKEN_STRTOLD) \ + || __DBL_MANT_DIG__ == __LDBL_MANT_DIG__ ostringstream os; long double d = 272.L; // 0x1.1p+8L; os << hexfloat << setprecision(1); @@ -140,6 +145,7 @@ test02() cout << "got: " << os.str() << endl; #endif VERIFY( os && os.str() == "15" ); +#endif } int
[gcc r16-791] [testsuite] [analyzer] [vxworks] define __STDC_WANT_LIB_EXT1__ to 1
https://gcc.gnu.org/g:f3c5e0a2091ddd5cae4d7381a847aac5f546f04c commit r16-791-gf3c5e0a2091ddd5cae4d7381a847aac5f546f04c Author: Alexandre Oliva Date: Wed May 21 06:20:03 2025 -0300 [testsuite] [analyzer] [vxworks] define __STDC_WANT_LIB_EXT1__ to 1 vxworks' headers use #if instead of #ifdef to test for __STDC_WANT_LIB_EXT1__, so the definition in the analyzer test strotok-cppreference.c catches a bug there, but not something it's meant to catch or that we could fix in GCC, so amend the definition to sidestep the libc bug. for gcc/testsuite/ChangeLog * c-c++-common/analyzer/strtok-cppreference.c (__STDC_WANT_LIB_EXT1__): Define to 1. Diff: --- gcc/testsuite/c-c++-common/analyzer/strtok-cppreference.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/testsuite/c-c++-common/analyzer/strtok-cppreference.c b/gcc/testsuite/c-c++-common/analyzer/strtok-cppreference.c index a396c643f116..96117276ffc3 100644 --- a/gcc/testsuite/c-c++-common/analyzer/strtok-cppreference.c +++ b/gcc/testsuite/c-c++-common/analyzer/strtok-cppreference.c @@ -13,7 +13,7 @@ /* { dg-additional-options " -Wno-analyzer-too-complex -Wno-analyzer-symbol-too-complex" } */ -#define __STDC_WANT_LIB_EXT1__ 0 +#define __STDC_WANT_LIB_EXT1__ 1 #include #include
[gcc r16-793] [testsuite] [aarch64] use uint64_t in rwsr tests
https://gcc.gnu.org/g:d41028df8b39087ef71de84a5daf68cb305b9f7f commit r16-793-gd41028df8b39087ef71de84a5daf68cb305b9f7f Author: Alexandre Oliva Date: Wed May 21 06:20:17 2025 -0300 [testsuite] [aarch64] use uint64_t in rwsr tests stdint.h defines uint64_t instead of __uint64_t, so use the former. __uint64_t is not available on e.g. vxworks. for gcc/testsuite/ChangeLog * gcc.target/aarch64/acle/rwsr.c: Use uint64_t. * gcc.target/aarch64/acle/rwsr-2.c: Likewise. Diff: --- gcc/testsuite/gcc.target/aarch64/acle/rwsr-2.c | 4 ++-- gcc/testsuite/gcc.target/aarch64/acle/rwsr.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/gcc/testsuite/gcc.target/aarch64/acle/rwsr-2.c b/gcc/testsuite/gcc.target/aarch64/acle/rwsr-2.c index cca88924043e..5527297e6277 100644 --- a/gcc/testsuite/gcc.target/aarch64/acle/rwsr-2.c +++ b/gcc/testsuite/gcc.target/aarch64/acle/rwsr-2.c @@ -9,14 +9,14 @@ void test_leading_zeros () { - __uint64_t b = __arm_rsr64 ("S1_2_C03_C04_5"); /* { dg-error "invalid system register name 's1_2_c03_c04_5'" } */ + uint64_t b = __arm_rsr64 ("S1_2_C03_C04_5"); /* { dg-error "invalid system register name 's1_2_c03_c04_5'" } */ __arm_wsr64 ("S1_2_C03_C04_5", b); /* { dg-error "invalid system register name 's1_2_c03_c04_5'" } */ } void test_bounds () { - __uint64_t b; + uint64_t b; b = __arm_rsr64 ("s4_2_c3_c4_5"); /* { dg-error "invalid system register name 's4_2_c3_c4_5'" } */ b = __arm_rsr64 ("s1_8_c3_c4_5"); /* { dg-error "invalid system register name 's1_8_c3_c4_5'" } */ b = __arm_rsr64 ("s1_2_c16_c4_5"); /* { dg-error "invalid system register name 's1_2_c16_c4_5'" } */ diff --git a/gcc/testsuite/gcc.target/aarch64/acle/rwsr.c b/gcc/testsuite/gcc.target/aarch64/acle/rwsr.c index 6feb0bef2d6f..f63eb43bf7ee 100644 --- a/gcc/testsuite/gcc.target/aarch64/acle/rwsr.c +++ b/gcc/testsuite/gcc.target/aarch64/acle/rwsr.c @@ -171,6 +171,6 @@ set_wsrf64 (double a) */ void set_custom () { - __uint64_t b = __arm_rsr64 ("S1_2_C3_C4_5"); + uint64_t b = __arm_rsr64 ("S1_2_C3_C4_5"); __arm_wsr64 ("S1_2_C3_C4_5", b); }
[gcc r16-797] [testsuite] [x86] strlenopt-80 needs -msse2 on ia32
https://gcc.gnu.org/g:fe9be609d7a10f2cd706aecd772e3e3427868daf commit r16-797-gfe9be609d7a10f2cd706aecd772e3e3427868daf Author: Alexandre Oliva Date: Wed May 21 06:20:37 2025 -0300 [testsuite] [x86] strlenopt-80 needs -msse2 on ia32 The string length optimizations at 8-byte blocks requires -msse2; -msse is not enough. Bump it. for gcc/testsuite/ChangeLog * gcc.dg/strlenopt-80.c: Bump to -msse2. Diff: --- gcc/testsuite/gcc.dg/strlenopt-80.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/testsuite/gcc.dg/strlenopt-80.c b/gcc/testsuite/gcc.dg/strlenopt-80.c index 63d4eb17e4c3..0b16a4142366 100644 --- a/gcc/testsuite/gcc.dg/strlenopt-80.c +++ b/gcc/testsuite/gcc.dg/strlenopt-80.c @@ -6,7 +6,7 @@ { dg-do compile { target { { aarch64*-*-* i?86-*-* x86_64-*-* } || { { powerpc*-*-* } && lp64 } } } } { dg-options "-O2 -Wall -fdump-tree-optimized" } - { dg-additional-options "-msse" { target i?86-*-* x86_64-*-* } } */ + { dg-additional-options "-msse2" { target i?86-*-* x86_64-*-* } } */ /* On powerpc configurations that have -mstrict-align by default, the memcpy calls for ncpylog >= 3 are not turned into MEM_REFs.
[gcc r16-800] [testsuite] [x86] no-callee-saved-16.c needs -fomit-frame-pointer
https://gcc.gnu.org/g:012a857d1eb5b45baee8752e3b5a434fa25c52e2 commit r16-800-g012a857d1eb5b45baee8752e3b5a434fa25c52e2 Author: Alexandre Oliva Date: Wed May 21 06:20:54 2025 -0300 [testsuite] [x86] no-callee-saved-16.c needs -fomit-frame-pointer If the toolchain is built with --enable-frame-pointer, gcc.target/i386/no-callee-saved-16.c will not get the expected optimization without -fomit-frame-pointer, that would be enabled by -O2 without the configure flag. Add it. for gcc/testsuite/ChangeLog * gcc.target/i386/no-callee-saved-16.c: Add -fomit-frame-pointer. Diff: --- gcc/testsuite/gcc.target/i386/no-callee-saved-16.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/testsuite/gcc.target/i386/no-callee-saved-16.c b/gcc/testsuite/gcc.target/i386/no-callee-saved-16.c index 112d1764f3e1..a5589e21ab3b 100644 --- a/gcc/testsuite/gcc.target/i386/no-callee-saved-16.c +++ b/gcc/testsuite/gcc.target/i386/no-callee-saved-16.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -mtune-ctrl=^prologue_using_move,^epilogue_using_move" } */ +/* { dg-options "-O2 -fomit-frame-pointer -mtune-ctrl=^prologue_using_move,^epilogue_using_move" } */ typedef void (*fn_t) (void) __attribute__ ((no_callee_saved_registers));
[gcc r16-802] [testsuite] [x86] pr31985.c needs -fomit-frame-pointer to match movl count
https://gcc.gnu.org/g:6621311c309fcc68ecdd395bceb9ad7994bed41f commit r16-802-g6621311c309fcc68ecdd395bceb9ad7994bed41f Author: Alexandre Oliva Date: Wed May 21 06:21:04 2025 -0300 [testsuite] [x86] pr31985.c needs -fomit-frame-pointer to match movl count On an --enable-frame-pointer toolchain, pr31985.c gets an extra movl and fails. Enable -fomit-frame-pointer explicitly. for gcc/testsuite/ChangeLog * gcc.target/i386/pr31985.c: Add -fomit-frame-pointer. Diff: --- gcc/testsuite/gcc.target/i386/pr31985.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/testsuite/gcc.target/i386/pr31985.c b/gcc/testsuite/gcc.target/i386/pr31985.c index a6de1b5b1431..a0a91116242c 100644 --- a/gcc/testsuite/gcc.target/i386/pr31985.c +++ b/gcc/testsuite/gcc.target/i386/pr31985.c @@ -1,5 +1,5 @@ /* { dg-do compile { target ia32 } } */ -/* { dg-options "-O2" } */ +/* { dg-options "-O2 -fomit-frame-pointer" } */ void test_c (unsigned int a, unsigned int b, unsigned int c, unsigned int d) {
[gcc r16-801] [testsuite] [x86] pr108938-3.c needs -msse2 for bswap in foo2 with -m32
https://gcc.gnu.org/g:3392849511c9b9eb1d912a547f2441cece766e3b commit r16-801-g3392849511c9b9eb1d912a547f2441cece766e3b Author: Alexandre Oliva Date: Wed May 21 06:20:59 2025 -0300 [testsuite] [x86] pr108938-3.c needs -msse2 for bswap in foo2 with -m32 Without SSE2, we don't combine the separate loads in foo2 and get separate rotates, instead of a bswap. for gcc/testsuite/ChangeLog * gcc.target/i386/pr108938-3.c: Add -msse2. Diff: --- gcc/testsuite/gcc.target/i386/pr108938-3.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/testsuite/gcc.target/i386/pr108938-3.c b/gcc/testsuite/gcc.target/i386/pr108938-3.c index 757a0c456bc4..47293d49bb9e 100644 --- a/gcc/testsuite/gcc.target/i386/pr108938-3.c +++ b/gcc/testsuite/gcc.target/i386/pr108938-3.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -ftree-vectorize -mno-movbe -mno-avx" } */ +/* { dg-options "-O2 -ftree-vectorize -mno-movbe -msse2 -mno-avx" } */ /* { dg-final { scan-assembler-times "bswap\[\t ]+" 2 { target { ! ia32 } } } } */ /* { dg-final { scan-assembler-times "bswap\[\t ]+" 3 { target ia32 } } } */
[gcc r16-803] [testsuite] [x86] vect-simd-clone-1[678]e.c adjust
https://gcc.gnu.org/g:e82a9f6536ba8fcf50a7172650a86519c50aeabd commit r16-803-ge82a9f6536ba8fcf50a7172650a86519c50aeabd Author: Alexandre Oliva Date: Wed May 21 06:21:08 2025 -0300 [testsuite] [x86] vect-simd-clone-1[678]e.c adjust Since r13-6296, we haven't got 4 simdclone calls for these tests on ia32 without avx_runtime. With avx_runtime, we get 3 such calls even on ia32, but we didn't test for anything on ia32 with avx_runtime. Adjust and simplify the expectations and comments. for gcc/testsuite/ChangeLog * gcc.dg/vect/vect-simd-clone-16e.c: Expect fewer calls on ia32. * gcc.dg/vect/vect-simd-clone-17e.c: Likewise. * gcc.dg/vect/vect-simd-clone-18e.c: Likewise. Diff: --- gcc/testsuite/gcc.dg/vect/vect-simd-clone-16e.c | 8 +++- gcc/testsuite/gcc.dg/vect/vect-simd-clone-17e.c | 8 +++- gcc/testsuite/gcc.dg/vect/vect-simd-clone-18e.c | 8 +++- 3 files changed, 9 insertions(+), 15 deletions(-) diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16e.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16e.c index f80b0e0581e3..2f7cdfb22119 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16e.c +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16e.c @@ -6,11 +6,9 @@ #include "vect-simd-clone-16.c" /* Ensure the the in-branch simd clones are used on targets that support them. - Some targets use another call for the epilogue loops. - Some targets use pairs of vectors and do twice the calls. */ -/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" { target { { ! avx_runtime } && { ! { { i?86-*-* x86_64-*-* } && { ! lp64 } } } } } } } */ -/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 3 "vect" { target { avx_runtime && { ! { { i?86-*-* x86_64-*-* } && { ! lp64 } } } } } } } */ -/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 4 "vect" { target { { ! avx_runtime } && { { i?86-*-* x86_64-*-* } && { ! lp64 } } } } } } */ + Some targets use another call for the epilogue loops. */ +/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" { target { ! avx_runtime } } } } */ +/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 3 "vect" { target avx_runtime } } } */ /* The LTO test produces two dump files and we scan the wrong one. */ /* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17e.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17e.c index c7c510b8a6ab..8f10aff3b897 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17e.c +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17e.c @@ -6,11 +6,9 @@ #include "vect-simd-clone-17.c" /* Ensure the the in-branch simd clones are used on targets that support them. - Some targets use another call for the epilogue loops. - Some targets use pairs of vectors and do twice the calls. */ -/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" { target { { ! avx_runtime } && { ! { { i?86-*-* x86_64-*-* } && { ! lp64 } } } } } } } */ -/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 3 "vect" { target { avx_runtime && { ! { { i?86-*-* x86_64-*-* } && { ! lp64 } } } } } } } */ -/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 4 "vect" { target { { ! avx_runtime } && { { i?86-*-* x86_64-*-* } && { ! lp64 } } } } } } */ + Some targets use another call for the epilogue loops. */ +/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" { target { ! avx_runtime } } } } */ +/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 3 "vect" { target avx_runtime } } } */ /* The LTO test produces two dump files and we scan the wrong one. */ /* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18e.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18e.c index e00c3d78038b..142fcc8b0b55 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18e.c +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18e.c @@ -6,11 +6,9 @@ #include "vect-simd-clone-18.c" /* Ensure the the in-branch simd clones are used on targets that support them. - Some targets use another call for the epilogue loops. - Some targets use pairs of vectors and do twice the calls. */ -/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" { target { { ! avx_runtime } && { ! { { i?86-*-* x86_64-*-* } && { ! lp64 } } } } } } } */ -/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 3 "vect" { target { avx_runtime && { ! { { i?86-*-* x86_64-*-* } && { ! lp64 } } } } } } } */ -/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 4 "vect" { target { { ! avx_runtime } && { { i?86-*-* x86_64-*-* } && { ! lp64 } } } } } } */ + Some targets use another call for the epilogue loops. */ +/* { dg-final { scan-
[gcc r16-796] [testsuite] [x86] memcpy-6 needs -msse2
https://gcc.gnu.org/g:f3a758e9a2b2aa40bda68a18157996167d772e4d commit r16-796-gf3a758e9a2b2aa40bda68a18157996167d772e4d Author: Alexandre Oliva Date: Wed May 21 06:20:33 2025 -0300 [testsuite] [x86] memcpy-6 needs -msse2 The 8-byte memory operations will only be inlined on ia32 with -msse2. Bump it. for gcc/testsuite/ChangeLog * gcc.dg/memcpy-6.c: Bump to -msse2. Diff: --- gcc/testsuite/gcc.dg/memcpy-6.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/testsuite/gcc.dg/memcpy-6.c b/gcc/testsuite/gcc.dg/memcpy-6.c index d4df03903c35..49aec338d2f2 100644 --- a/gcc/testsuite/gcc.dg/memcpy-6.c +++ b/gcc/testsuite/gcc.dg/memcpy-6.c @@ -7,7 +7,7 @@ { dg-do compile } { dg-options "-O0 -Wrestrict -fdump-tree-optimized" } { dg-skip-if "skip non-x86 targets" { ! { i?86-*-* x86_64-*-* } } } - { dg-additional-options "-msse" { target i?86-*-* x86_64-*-* } } */ + { dg-additional-options "-msse2" { target i?86-*-* x86_64-*-* } } */ char a[32];
[gcc r16-798] [testsuite] [x86] forwprop-41 needs -msse
https://gcc.gnu.org/g:8bb72b737c38adb08bfefabc43cb4f25f7d9e95d commit r16-798-g8bb72b737c38adb08bfefabc43cb4f25f7d9e95d Author: Alexandre Oliva Date: Wed May 21 06:20:42 2025 -0300 [testsuite] [x86] forwprop-41 needs -msse The vector operations are only turned into BIT_INSERT_EXPR with -msse on ia32. for gcc/testsuite/ChangeLog * gcc.dg/tree-ssa/forwprop-41.c: Add -msse on x86. Diff: --- gcc/testsuite/gcc.dg/tree-ssa/forwprop-41.c | 1 + 1 file changed, 1 insertion(+) diff --git a/gcc/testsuite/gcc.dg/tree-ssa/forwprop-41.c b/gcc/testsuite/gcc.dg/tree-ssa/forwprop-41.c index a1f08289dd69..1c5b500deb15 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/forwprop-41.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/forwprop-41.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-O2 -fdump-tree-optimized -Wno-psabi -w" } */ +/* { dg-additional-options "-msse" { target i?86-*-* x86_64-*-* } } */ #define vector __attribute__((__vector_size__(16) ))
[gcc r16-799] [testsuite] add missing require vect_early_break_hw for vect-tsvc
https://gcc.gnu.org/g:57cc4f8bf31f8c25fd338b18b5e709d77cc2a0ad commit r16-799-g57cc4f8bf31f8c25fd338b18b5e709d77cc2a0ad Author: Alexandre Oliva Date: Wed May 21 06:20:48 2025 -0300 [testsuite] add missing require vect_early_break_hw for vect-tsvc Some tsvc tests add vect_early_break options without requiring the feature to be available. Add the requirements. for gcc/testsuite/ChangeLog * gcc.dg/vect/tsvc/vect-tsvc-s332.c: Require vect_early_break_hw. * gcc.dg/vect/tsvc/vect-tsvc-s481.c: Likewise. * gcc.dg/vect/tsvc/vect-tsvc-s482.c: Likewise. Diff: --- gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s332.c | 1 + gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s481.c | 1 + gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s482.c | 1 + 3 files changed, 3 insertions(+) diff --git a/gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s332.c b/gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s332.c index 0d55d0dd67c3..21a9c5a6b2b6 100644 --- a/gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s332.c +++ b/gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s332.c @@ -3,6 +3,7 @@ /* { dg-additional-options "--param vect-epilogues-nomask=0" } */ /* { dg-require-effective-target vect_float } */ +/* { dg-require-effective-target vect_early_break_hw } */ /* { dg-add-options vect_early_break } */ #include "tsvc.h" diff --git a/gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s481.c b/gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s481.c index 5539f0f08411..e4433385d668 100644 --- a/gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s481.c +++ b/gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s481.c @@ -3,6 +3,7 @@ /* { dg-additional-options "--param vect-epilogues-nomask=0" } */ /* { dg-require-effective-target vect_float } */ +/* { dg-require-effective-target vect_early_break_hw } */ /* { dg-add-options vect_early_break } */ #include "tsvc.h" diff --git a/gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s482.c b/gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s482.c index 73bed5d4c57a..146df409ecc6 100644 --- a/gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s482.c +++ b/gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s482.c @@ -3,6 +3,7 @@ /* { dg-additional-options "--param vect-epilogues-nomask=0" } */ /* { dg-require-effective-target vect_float } */ +/* { dg-require-effective-target vect_early_break_hw } */ /* { dg-add-options vect_early_break } */ #include "tsvc.h"
[gcc r16-775] c++, coroutines: Clean up the ramp cleanups.
https://gcc.gnu.org/g:18df4a10bc96946401218019ec566d867238b3e4 commit r16-775-g18df4a10bc96946401218019ec566d867238b3e4 Author: Iain Sandoe Date: Mon May 12 20:38:48 2025 +0100 c++, coroutines: Clean up the ramp cleanups. This replaces the cleanup try-catch block in the ramp with a series of eh-only cleanup statements. gcc/cp/ChangeLog: * coroutines.cc (cp_coroutine_transform::build_ramp_function): Replace ramp cleanup try-catch block with eh-only cleanup statements. Signed-off-by: Iain Sandoe Diff: --- gcc/cp/coroutines.cc | 199 --- 1 file changed, 62 insertions(+), 137 deletions(-) diff --git a/gcc/cp/coroutines.cc b/gcc/cp/coroutines.cc index 5c4133a42b7e..a62099622288 100644 --- a/gcc/cp/coroutines.cc +++ b/gcc/cp/coroutines.cc @@ -4866,39 +4866,6 @@ cp_coroutine_transform::build_ramp_function () coro_fp = pushdecl (coro_fp); add_decl_expr (coro_fp); - tree coro_promise_live = NULL_TREE; - if (flag_exceptions) -{ - /* Signal that we need to clean up the promise object on exception. */ - coro_promise_live - = coro_build_and_push_artificial_var (loc, "_Coro_promise_live", - boolean_type_node, orig_fn_decl, - boolean_false_node); - - /* To signal that we need to cleanup copied function args. */ - if (DECL_ARGUMENTS (orig_fn_decl)) - for (tree arg = DECL_ARGUMENTS (orig_fn_decl); arg != NULL; -arg = DECL_CHAIN (arg)) - { - param_info *parm_i = param_uses.get (arg); - if (parm_i->trivial_dtor) - continue; - parm_i->guard_var = pushdecl (parm_i->guard_var); - add_decl_expr (parm_i->guard_var); - } -} - - /* deref the frame pointer, to use in member access code. */ - tree deref_fp -= cp_build_indirect_ref (loc, coro_fp, RO_UNARY_STAR, -tf_warning_or_error); - tree frame_needs_free -= coro_build_and_push_artificial_var_with_dve (loc, - coro_frame_needs_free_id, - boolean_type_node, - orig_fn_decl, NULL_TREE, - deref_fp); - /* Build the frame. */ /* The CO_FRAME internal function is a mechanism to allow the middle end @@ -4942,25 +4909,23 @@ cp_coroutine_transform::build_ramp_function () finish_if_stmt (if_stmt); } + /* Dereference the frame pointer, to use in member access code. */ + tree deref_fp += cp_build_indirect_ref (loc, coro_fp, RO_UNARY_STAR, tf_warning_or_error); + /* For now, once allocation has succeeded we always assume that this needs destruction, there's no impl. for frame allocation elision. */ - r = cp_build_init_expr (frame_needs_free, boolean_true_node); - finish_expr_stmt (r); - - /* Set up the promise. */ - tree p -= coro_build_and_push_artificial_var_with_dve (loc, coro_promise_id, - promise_type, orig_fn_decl, - NULL_TREE, deref_fp); + tree frame_needs_free += coro_build_and_push_artificial_var_with_dve (loc, + coro_frame_needs_free_id, + boolean_type_node, + orig_fn_decl, + boolean_true_node, + deref_fp); + /* Although it appears to be unused here the frame entry is needed and we + just set it true. */ + TREE_USED (frame_needs_free) = true; - /* Up to now any exception thrown will propagate directly to the caller. - This is OK since the only source of such exceptions would be in allocation - of the coroutine frame, and therefore the ramp will not have initialized - any further state. From here, we will track state that needs explicit - destruction in the case that promise or g.r.o setup fails or an exception - is thrown from the initial suspend expression. */ - tree ramp_try_block = NULL_TREE; - tree ramp_try_stmts = NULL_TREE; tree iarc_x = NULL_TREE; tree coro_before_return = NULL_TREE; if (flag_exceptions) @@ -4976,8 +4941,15 @@ cp_coroutine_transform::build_ramp_function () orig_fn_decl, boolean_false_node, deref_fp); - ramp_try_block = begin_try_block (); - ramp_try_stmts = begin_compound_stmt (BCS_TRY_BLOCK); + tree frame_cleanup = push_stmt_list (); + tree do_fr_cleanup + = build1_l
[gcc r16-774] c++, coroutines: Use decltype(auto) for the g_r_o.
https://gcc.gnu.org/g:e71a6e002c6650a7a7be99277120d3e59ecb78a3 commit r16-774-ge71a6e002c6650a7a7be99277120d3e59ecb78a3 Author: Iain Sandoe Date: Sun May 11 20:36:58 2025 +0100 c++, coroutines: Use decltype(auto) for the g_r_o. The revised wording for coroutines, uses decltype(auto) for the type of the get return object, which preserves references. It is quite reasonable for a coroutine body implementation to complete before control is returned to the ramp - and in that case we would be creating the ramp return object from an already- deleted promise object. Jason observes that this is a terrible situation and we should seek a resolution to it via core. Since the test added here explicitly performs the unsafe action dscribed above we expect it to fail (until a resolution is found). gcc/cp/ChangeLog: * coroutines.cc (cp_coroutine_transform::build_ramp_function): Use decltype(auto) to determine the type of the temporary get_return_object. gcc/testsuite/ChangeLog: * g++.dg/coroutines/pr115908.C: Count promise construction and destruction. Run the test and XFAIL it. Signed-off-by: Iain Sandoe Diff: --- gcc/cp/coroutines.cc | 12 +++-- gcc/testsuite/g++.dg/coroutines/pr115908.C | 86 ++ 2 files changed, 72 insertions(+), 26 deletions(-) diff --git a/gcc/cp/coroutines.cc b/gcc/cp/coroutines.cc index bc5fb9381dbe..5c4133a42b7e 100644 --- a/gcc/cp/coroutines.cc +++ b/gcc/cp/coroutines.cc @@ -5120,8 +5120,11 @@ cp_coroutine_transform::build_ramp_function () /* Check for a bad get return object type. [dcl.fct.def.coroutine] / 7 requires: The expression promise.get_return_object() is used to initialize the - returned reference or prvalue result object ... */ - tree gro_type = TREE_TYPE (get_ro); + returned reference or prvalue result object ... + When we use a local to hold this, it is decltype(auto). */ + tree gro_type += finish_decltype_type (get_ro, /*id_expression_or_member_access_p*/false, + tf_warning_or_error); if (VOID_TYPE_P (gro_type) && !void_ramp_p) { error_at (fn_start, "no viable conversion from % provided by" @@ -5159,7 +5162,7 @@ cp_coroutine_transform::build_ramp_function () = coro_build_and_push_artificial_var (loc, "_Coro_gro", gro_type, orig_fn_decl, NULL_TREE); - r = cp_build_init_expr (coro_gro, get_ro); + r = cp_build_init_expr (coro_gro, STRIP_REFERENCE_REF (get_ro)); finish_expr_stmt (r); tree coro_gro_cleanup = cxx_maybe_build_cleanup (coro_gro, tf_warning_or_error); @@ -5181,7 +5184,8 @@ cp_coroutine_transform::build_ramp_function () /* The ramp is done, we just need the return statement, which we build from the return object we constructed before we called the function body. */ - finish_return_stmt (void_ramp_p ? NULL_TREE : coro_gro); + r = void_ramp_p ? NULL_TREE : convert_from_reference (coro_gro); + finish_return_stmt (r); if (flag_exceptions) { diff --git a/gcc/testsuite/g++.dg/coroutines/pr115908.C b/gcc/testsuite/g++.dg/coroutines/pr115908.C index ac27d916de2b..a40cece11438 100644 --- a/gcc/testsuite/g++.dg/coroutines/pr115908.C +++ b/gcc/testsuite/g++.dg/coroutines/pr115908.C @@ -1,3 +1,16 @@ +// { dg-do run } + +// With the changes to deal with CWG2563 (and PR119916) we now use the +// referenced promise in the return expression. It is quite reasonable +// for a body implementation to complete before control is returned to +// the ramp - and in that case we would be creating the ramp return object +// from an already-deleted promise object. +// This is recognised to be a poor situation and resolution via a core +// issue is planned. + +// In this test we explicitly trigger the circumstance mentioned above. +// { dg-xfail-run-if "" { *-*-* } } + #include #ifdef OUTPUT @@ -6,23 +19,25 @@ struct Promise; -bool promise_live = false; +int promise_life = 0; struct Handle : std::coroutine_handle { + Handle(Promise &p) : std::coroutine_handle(Handle::from_promise(p)) { -if (!promise_live) - __builtin_abort (); #ifdef OUTPUT -std::cout << "Handle(Promise &)\n"; +std::cout << "Handle(Promise &) " << promise_life << std::endl; #endif -} -Handle(Promise &&p) : std::coroutine_handle(Handle::from_promise(p)) { -if (!promise_live) + if (promise_life <= 0) __builtin_abort (); + } + +Handle(Promise &&p) : std::coroutine_handle(Handle::from_promise(p)) { #ifdef OUTPUT -std::cout << "Handle(Promise &&)\n"; +std::cout << "Handle(Promise &&) " << promise_life << std::endl; #endif -} + if (promise_life <= 0) + __builtin_abort (); + } using pro
[gcc r16-773] c++, coroutines: Address CWG2563 return value init [PR119916].
https://gcc.gnu.org/g:e06555a40c051d5062405b02f93b89b01a397f97 commit r16-773-ge06555a40c051d5062405b02f93b89b01a397f97 Author: Iain Sandoe Date: Mon May 12 19:47:42 2025 +0100 c++, coroutines: Address CWG2563 return value init [PR119916]. This addresses the clarification that, when the get_return_object is of a different type from the ramp return, any necessary conversions should be performed on the return expression (so that they typically occur after the function body has started execution). PR c++/119916 gcc/cp/ChangeLog: * coroutines.cc (cp_coroutine_transform::wrap_original_function_body): Do not initialise initial_await_resume_called here... (cp_coroutine_transform::build_ramp_function): ... but here. When the coroutine is not void, initialize a GRO object from promise.get_return_object(). Use this as the argument to the return expression. Use a regular cleanup for the GRO, since it is ramp-local. gcc/testsuite/ChangeLog: * g++.dg/coroutines/torture/special-termination-00-sync-completion.C: Amend for CWG2563 expected behaviour. * g++.dg/coroutines/torture/special-termination-01-self-destruct.C: Likewise. * g++.dg/coroutines/torture/pr119916.C: New test. Signed-off-by: Iain Sandoe Diff: --- gcc/cp/coroutines.cc | 126 +++-- gcc/testsuite/g++.dg/coroutines/torture/pr119916.C | 66 +++ .../special-termination-00-sync-completion.C | 2 +- .../torture/special-termination-01-self-destruct.C | 2 +- 4 files changed, 109 insertions(+), 87 deletions(-) diff --git a/gcc/cp/coroutines.cc b/gcc/cp/coroutines.cc index 743da068e352..bc5fb9381dbe 100644 --- a/gcc/cp/coroutines.cc +++ b/gcc/cp/coroutines.cc @@ -4451,7 +4451,7 @@ cp_coroutine_transform::wrap_original_function_body () tree i_a_r_c = coro_build_artificial_var (loc, coro_frame_i_a_r_c_id, boolean_type_node, orig_fn_decl, -boolean_false_node); +NULL_TREE); DECL_CHAIN (i_a_r_c) = var_list; var_list = i_a_r_c; add_decl_expr (i_a_r_c); @@ -4867,7 +4867,6 @@ cp_coroutine_transform::build_ramp_function () add_decl_expr (coro_fp); tree coro_promise_live = NULL_TREE; - tree coro_gro_live = NULL_TREE; if (flag_exceptions) { /* Signal that we need to clean up the promise object on exception. */ @@ -4876,13 +4875,6 @@ cp_coroutine_transform::build_ramp_function () boolean_type_node, orig_fn_decl, boolean_false_node); - /* When the get-return-object is in the RETURN slot, we need to arrange -for cleanup on exception. */ - coro_gro_live - = coro_build_and_push_artificial_var (loc, "_Coro_gro_live", - boolean_type_node, orig_fn_decl, - boolean_false_node); - /* To signal that we need to cleanup copied function args. */ if (DECL_ARGUMENTS (orig_fn_decl)) for (tree arg = DECL_ARGUMENTS (orig_fn_decl); arg != NULL; @@ -4970,13 +4962,19 @@ cp_coroutine_transform::build_ramp_function () tree ramp_try_block = NULL_TREE; tree ramp_try_stmts = NULL_TREE; tree iarc_x = NULL_TREE; + tree coro_before_return = NULL_TREE; if (flag_exceptions) { + coro_before_return + = coro_build_and_push_artificial_var (loc, "_Coro_before_return", + boolean_type_node, orig_fn_decl, + boolean_true_node); iarc_x = coro_build_and_push_artificial_var_with_dve (loc, coro_frame_i_a_r_c_id, boolean_type_node, - orig_fn_decl, NULL_TREE, + orig_fn_decl, + boolean_false_node, deref_fp); ramp_try_block = begin_try_block (); ramp_try_stmts = begin_compound_stmt (BCS_TRY_BLOCK); @@ -5136,90 +5134,54 @@ cp_coroutine_transform::build_ramp_function () (loc, coro_resume_index_id, short_unsigned_type_node, orig_fn_decl, build_zero_cst (short_unsigned_type_node), deref_fp); - if (flag_exceptions && iarc_x) -{ - r = cp_build_init_expr (iarc_x, boolean_false_node); - finish_expr_stmt (r); -} - - /* Used for return objects in the RESULT slot. */ - tree ret_val_dtor = NULL_TREE; - tree retval = NULL_TREE; + /* We must ma
[gcc r16-779] genemit: Use references rather than pointers
https://gcc.gnu.org/g:9b57e38e0ef26192ebb0e9e326ab3a9df06ee275 commit r16-779-g9b57e38e0ef26192ebb0e9e326ab3a9df06ee275 Author: Richard Sandiford Date: Wed May 21 10:01:27 2025 +0100 genemit: Use references rather than pointers This patch makes genemit.cc pass the md_rtx_info around by constant reference rather than pointer. It's somewhat of a cosmetic change on its own, but it makes later changes less noisy. gcc/ * genemit.cc (gen_exp): Make the info argument a constant reference. (gen_emit_seq, gen_insn, gen_expand, gen_split): Likewise. (output_add_clobbers): Likewise. (main): Update calls accordingly. Diff: --- gcc/genemit.cc | 60 +- 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/gcc/genemit.cc b/gcc/genemit.cc index 9f92364d9062..cb4ae47294da 100644 --- a/gcc/genemit.cc +++ b/gcc/genemit.cc @@ -80,8 +80,8 @@ gen_rtx_scratch (rtx x, enum rtx_code subroutine_type, FILE *file) substituting any operand references appearing within. */ static void -gen_exp (rtx x, enum rtx_code subroutine_type, char *used, md_rtx_info *info, -FILE *file) +gen_exp (rtx x, enum rtx_code subroutine_type, char *used, +const md_rtx_info &info, FILE *file) { RTX_CODE code; int i; @@ -281,7 +281,7 @@ gen_exp (rtx x, enum rtx_code subroutine_type, char *used, md_rtx_info *info, becoming a separate instruction. USED is as for gen_exp. */ static void -gen_emit_seq (rtvec vec, char *used, md_rtx_info *info, FILE *file) +gen_emit_seq (rtvec vec, char *used, const md_rtx_info &info, FILE *file) { for (int i = 0, len = GET_NUM_ELEM (vec); i < len; ++i) { @@ -329,7 +329,7 @@ emit_c_code (const char *code, bool can_fail_p, const char *name, FILE *file) /* Generate the `gen_...' function for a DEFINE_INSN. */ static void -gen_insn (md_rtx_info *info, FILE *file) +gen_insn (const md_rtx_info &info, FILE *file) { struct pattern_stats stats; int i; @@ -338,7 +338,7 @@ gen_insn (md_rtx_info *info, FILE *file) registers or MATCH_SCRATCHes. If so, store away the information for later. */ - rtx insn = info->def; + rtx insn = info.def; if (XVEC (insn, 1)) { int has_hard_reg = 0; @@ -366,7 +366,7 @@ gen_insn (md_rtx_info *info, FILE *file) struct clobber_ent *link = XNEW (struct clobber_ent); int j; - link->code_number = info->index; + link->code_number = info.index; /* See if any previous CLOBBER_LIST entry is the same as this one. */ @@ -422,12 +422,12 @@ gen_insn (md_rtx_info *info, FILE *file) if (XSTR (insn, 0)[0] == 0 || XSTR (insn, 0)[0] == '*') return; - fprintf (file, "/* %s:%d */\n", info->loc.filename, info->loc.lineno); + fprintf (file, "/* %s:%d */\n", info.loc.filename, info.loc.lineno); /* Find out how many operands this function has. */ get_pattern_stats (&stats, XVEC (insn, 1)); if (stats.max_dup_opno > stats.max_opno) -fatal_at (info->loc, "match_dup operand number has no match_operand"); +fatal_at (info.loc, "match_dup operand number has no match_operand"); /* Output the function name and argument declarations. */ fprintf (file, "rtx\ngen_%s (", XSTR (insn, 0)); @@ -458,25 +458,25 @@ gen_insn (md_rtx_info *info, FILE *file) /* Generate the `gen_...' function for a DEFINE_EXPAND. */ static void -gen_expand (md_rtx_info *info, FILE *file) +gen_expand (const md_rtx_info &info, FILE *file) { struct pattern_stats stats; int i; char *used; - rtx expand = info->def; + rtx expand = info.def; if (strlen (XSTR (expand, 0)) == 0) -fatal_at (info->loc, "define_expand lacks a name"); +fatal_at (info.loc, "define_expand lacks a name"); if (XVEC (expand, 1) == 0) -fatal_at (info->loc, "define_expand for %s lacks a pattern", +fatal_at (info.loc, "define_expand for %s lacks a pattern", XSTR (expand, 0)); /* Find out how many operands this function has. */ get_pattern_stats (&stats, XVEC (expand, 1)); if (stats.min_scratch_opno != -1 && stats.min_scratch_opno <= MAX (stats.max_opno, stats.max_dup_opno)) -fatal_at (info->loc, "define_expand for %s needs to have match_scratch " -"numbers above all other operands", XSTR (expand, 0)); +fatal_at (info.loc, "define_expand for %s needs to have match_scratch " + "numbers above all other operands", XSTR (expand, 0)); /* Output the function name and argument declarations. */ fprintf (file, "rtx\ngen_%s (", XSTR (expand, 0)); @@ -567,21 +567,21 @@ gen_expand (md_rtx_info *info, FILE *file) /* Like gen_expand, but generates insns resulting from splitting SPLIT. */ static void -gen_split (md_rtx_info *info, FILE *file) +gen_split (const md_rtx_info &info, FILE *file) { struct pattern_stats stats; int i; - rtx split = i
[gcc r16-777] xstormy16: Avoid accessing beyond the operands[] array
https://gcc.gnu.org/g:856f6de5d19257e3c5802a250e7c749ca44beee3 commit r16-777-g856f6de5d19257e3c5802a250e7c749ca44beee3 Author: Richard Sandiford Date: Wed May 21 10:01:26 2025 +0100 xstormy16: Avoid accessing beyond the operands[] array The negsi2 C++ code writes to operands[2] even though the pattern has no operand 2. gcc/ * config/stormy16/stormy16.md (negsi2): Remove unused assignment. Diff: --- gcc/config/stormy16/stormy16.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/gcc/config/stormy16/stormy16.md b/gcc/config/stormy16/stormy16.md index 70c82827a4ac..15c60ad03880 100644 --- a/gcc/config/stormy16/stormy16.md +++ b/gcc/config/stormy16/stormy16.md @@ -702,8 +702,7 @@ [(parallel [(set (match_operand:SI 0 "register_operand" "") (neg:SI (match_operand:SI 1 "register_operand" ""))) (clobber (reg:BI CARRY_REG))])] - "" - { operands[2] = gen_reg_rtx (HImode); }) + "") (define_insn_and_split "*negsi2_internal" [(set (match_operand:SI 0 "register_operand" "=&r")
[gcc r16-788] genemit: Use a byte encoding to generate insns
https://gcc.gnu.org/g:d63c889d5cd3ef00ec5b0c3389448eab4f7d2b68 commit r16-788-gd63c889d5cd3ef00ec5b0c3389448eab4f7d2b68 Author: Richard Sandiford Date: Wed May 21 10:01:32 2025 +0100 genemit: Use a byte encoding to generate insns genemit has traditionally used open-coded gen_rtx_FOO sequences to build up the instruction pattern. This is now the source of quite a bit of bloat in the binary, and also a source of slow compile times. Two obvious ways of trying to deal with this are: (1) Try to identify rtxes that have a similar form and use shared routines to generate rtxes of that form. (2) Use a static table to encode the rtx and call a common routine to expand it. I did briefly look at (1). However, it's more complex than (2), and I think suffers from being the worst of both worlds, for reasons that I'll explain below. This patch therefore does (2). In theory, one of the advantages of open-coding the calls to gen_rtx_FOO is that the rtx can be populated using stores of known constants (for the rtx code, mode, unspec number, etc). However, the time spent constructing an rtx is likely to be dominated by the call to rtx_alloc, rather than by the stores to the fields. Option (1) above loses this advantage of storing constants. The shared routines would parameterise an rtx according to things like the modes on the rtx and its suboperands, so the code would need to fetch the parameters. In a sense, the rtx structure would be open-coded but the parameters would be table-encoded (albeit in a simple way). The expansion code also shouldn't be particularly hot. Anything that treats expand/discard cycles as very cheap would be misconceived, since each discarded expansion generates garbage memory that needs to be cleaned up later. Option (2) turns out to be pretty simple -- certainly simpler than (1) -- and seems to give a reasonable saving. Some numbers, all for --enable-checking=yes,rtl,extra: [A] size of the @progbits sections in insn-emit-*.o, new / old [B] size of the load segments in cc1, new / old [C] time to compile a typical insn-emit*.cc, new / old Target [A] [B] [C] native aarch64 0.5627 0.9585 0.5677 native x86_64 0.5925 0.9467 0.6377 aarch64-x-riscv64 0. 0.9066 0.2762 To get an idea of the effect on the final compiler, I tried compiling fold-const.ii with -O0 (no -g), since that should give any slowdown less room to hide. I couldn't measure any difference in compile time before or after the patch for any of the three variants above. gcc/ * gensupport.h (needs_barrier_p): Delete. * gensupport.cc (needs_barrier_p): Likewise. * rtl.h (always_void_p): Return true for PC, RETURN and SIMPLE_RETURN. (expand_opcode): New enum class. (expand_rtx, complete_seq): Declare. * emit-rtl.cc (rtx_expander): New class. (expand_rtx, complete_seq): New functions. * gengenrtl.cc (special_rtx, excluded_rtx): Add a cross-reference comment. * genemit.cc (FIRST_CODE): New constant. (print_code): Delete. (generator::file, generator::used, generator::sequence_type): Delete. (generator::bytes): New member variable. (generator::generator): Update accordingly. (generator::gen_rtx_scratch): Delete. (generator::add_uint, generator::add_opcode, generator::add_code) (generator::add_match_operator, generator::add_exp) (generator::add_vec, generator::gen_table): New member functions. (generator::gen_exp): Rewrite to use a bytecode expansion. (generator::gen_emit_seq): Likewise. (start_gen_insn): Return the C++ expression for the operands array. (gen_insn, gen_expand, gen_split): Update callers accordingly. (emit_c_code): Remove use of _val. Diff: --- gcc/emit-rtl.cc | 292 + gcc/genemit.cc| 346 ++ gcc/gengenrtl.cc | 10 +- gcc/gensupport.cc | 10 -- gcc/gensupport.h | 1 - gcc/rtl.h | 42 ++- 6 files changed, 480 insertions(+), 221 deletions(-) diff --git a/gcc/emit-rtl.cc b/gcc/emit-rtl.cc index 6c838d7afcc6..3f453cda67ed 100644 --- a/gcc/emit-rtl.cc +++ b/gcc/emit-rtl.cc @@ -64,6 +64,7 @@ along with GCC; see the file COPYING3. If not see #include "gimple.h" #include "gimple-ssa.h" #include "gimplify.h" +#include "bbitmap.h" struct target_rtl default_target_rtl; #if SWITCHABLE_TARGET @@ -6788,6 +6789,297 @@ gen_int_shift_amount (machine_mode, poly_int64 value) return gen_int_mode
[gcc r16-778] sparc: Avoid operandN variables in .md files
https://gcc.gnu.org/g:35dd60935336eb574194f2fe2088133f34c8 commit r16-778-g35dd60935336eb574194f2fe2088133f34c8 Author: Richard Sandiford Date: Wed May 21 10:01:27 2025 +0100 sparc: Avoid operandN variables in .md files The automatically-generated gen_* routines take their operands as individual arguments, named "operand0" upwards. These arguments are stored into an "operands" array before invoking the expander's C++ code, which can then modify the operands by writing to the array. However, the SPARC sign-extend and zero-extend expanders used the operandN variables directly, rather than operands[N]. That's a correct usage in context, since the code goes on to expand the pattern manually and invoke DONE. But it's also easy for code to accidentally write to operandN instead of operands[N] when trying to set up something like a match_dup. It sounds like Jeff had seen an instance of this. A later patch is therefore going to mark the operandN arguments as const. This patch makes way for that by using operands[N] instead of operandN for the SPARC expanders. gcc/ * config/sparc/sparc.md (zero_extendhisi2, zero_extendhidi2) (extendhisi2, extendqihi2, extendqisi2, extendqidi2) (extendhidi2): Use operands[0] and operands[1] instead of operand0 and operand1. Diff: --- gcc/config/sparc/sparc.md | 87 +-- 1 file changed, 47 insertions(+), 40 deletions(-) diff --git a/gcc/config/sparc/sparc.md b/gcc/config/sparc/sparc.md index 4d46cfd0fb20..c6e06b4467fe 100644 --- a/gcc/config/sparc/sparc.md +++ b/gcc/config/sparc/sparc.md @@ -3014,17 +3014,18 @@ rtx shift_16 = GEN_INT (16); int op1_subbyte = 0; - if (GET_CODE (operand1) == SUBREG) + if (GET_CODE (operands[1]) == SUBREG) { - op1_subbyte = SUBREG_BYTE (operand1); + op1_subbyte = SUBREG_BYTE (operands[1]); op1_subbyte /= GET_MODE_SIZE (SImode); op1_subbyte *= GET_MODE_SIZE (SImode); - operand1 = XEXP (operand1, 0); + operands[1] = XEXP (operands[1], 0); } - emit_insn (gen_ashlsi3 (temp, gen_rtx_SUBREG (SImode, operand1, op1_subbyte), + emit_insn (gen_ashlsi3 (temp, gen_rtx_SUBREG (SImode, operands[1], + op1_subbyte), shift_16)); - emit_insn (gen_lshrsi3 (operand0, temp, shift_16)); + emit_insn (gen_lshrsi3 (operands[0], temp, shift_16)); DONE; }) @@ -3097,17 +3098,18 @@ rtx shift_48 = GEN_INT (48); int op1_subbyte = 0; - if (GET_CODE (operand1) == SUBREG) + if (GET_CODE (operands[1]) == SUBREG) { - op1_subbyte = SUBREG_BYTE (operand1); + op1_subbyte = SUBREG_BYTE (operands[1]); op1_subbyte /= GET_MODE_SIZE (DImode); op1_subbyte *= GET_MODE_SIZE (DImode); - operand1 = XEXP (operand1, 0); + operands[1] = XEXP (operands[1], 0); } - emit_insn (gen_ashldi3 (temp, gen_rtx_SUBREG (DImode, operand1, op1_subbyte), + emit_insn (gen_ashldi3 (temp, gen_rtx_SUBREG (DImode, operands[1], + op1_subbyte), shift_48)); - emit_insn (gen_lshrdi3 (operand0, temp, shift_48)); + emit_insn (gen_lshrdi3 (operands[0], temp, shift_48)); DONE; }) @@ -3283,17 +3285,18 @@ rtx shift_16 = GEN_INT (16); int op1_subbyte = 0; - if (GET_CODE (operand1) == SUBREG) + if (GET_CODE (operands[1]) == SUBREG) { - op1_subbyte = SUBREG_BYTE (operand1); + op1_subbyte = SUBREG_BYTE (operands[1]); op1_subbyte /= GET_MODE_SIZE (SImode); op1_subbyte *= GET_MODE_SIZE (SImode); - operand1 = XEXP (operand1, 0); + operands[1] = XEXP (operands[1], 0); } - emit_insn (gen_ashlsi3 (temp, gen_rtx_SUBREG (SImode, operand1, op1_subbyte), + emit_insn (gen_ashlsi3 (temp, gen_rtx_SUBREG (SImode, operands[1], + op1_subbyte), shift_16)); - emit_insn (gen_ashrsi3 (operand0, temp, shift_16)); + emit_insn (gen_ashrsi3 (operands[0], temp, shift_16)); DONE; }) @@ -3315,25 +3318,26 @@ int op1_subbyte = 0; int op0_subbyte = 0; - if (GET_CODE (operand1) == SUBREG) + if (GET_CODE (operands[1]) == SUBREG) { - op1_subbyte = SUBREG_BYTE (operand1); + op1_subbyte = SUBREG_BYTE (operands[1]); op1_subbyte /= GET_MODE_SIZE (SImode); op1_subbyte *= GET_MODE_SIZE (SImode); - operand1 = XEXP (operand1, 0); + operands[1] = XEXP (operands[1], 0); } - if (GET_CODE (operand0) == SUBREG) + if (GET_CODE (operands[0]) == SUBREG) { - op0_subbyte = SUBREG_BYTE (operand0); + op0_subbyte = SUBREG_BYTE (operands[0]); op0_subbyte /= GET_MODE_SIZE (SImode); op0_subbyte *= GET_MODE_SIZE (SImode); - operand0 = XEXP (operand0, 0); + operands[0] = XEXP (operands[0], 0); } -
[gcc r16-783] genemit: Add a generator struct
https://gcc.gnu.org/g:88b849ffb9fc4b6de3786784b4c4b074758cc2a1 commit r16-783-g88b849ffb9fc4b6de3786784b4c4b074758cc2a1 Author: Richard Sandiford Date: Wed May 21 10:01:29 2025 +0100 genemit: Add a generator struct gen_exp now has quite a few arguments that need to be passed to each recursive call. This patch turns it and related routines into member functions of a new generator class, so that the shared information can be stored in member variables. This also helps to make later patches less noisy. gcc/ * genemit.cc (generator): New structure. (gen_rtx_scratch, gen_exp, gen_emit_seq): Turn into member functions of generator. (gen_insn, gen_expand, gen_split, output_add_clobbers): Update users accordingly. Diff: --- gcc/genemit.cc | 76 ++ 1 file changed, 55 insertions(+), 21 deletions(-) diff --git a/gcc/genemit.cc b/gcc/genemit.cc index ed87f1a903d3..44be50fc933c 100644 --- a/gcc/genemit.cc +++ b/gcc/genemit.cc @@ -66,8 +66,40 @@ print_code (RTX_CODE code, FILE *file) fprintf (file, "%c", TOUPPER (*p1)); } -static void -gen_rtx_scratch (rtx x, enum rtx_code subroutine_type, FILE *file) +/* A structure used to generate code for a particular expansion. */ +struct generator +{ + generator (rtx_code, char *, const md_rtx_info &, FILE *); + + void gen_rtx_scratch (rtx); + void gen_exp (rtx); + void gen_emit_seq (rtvec); + + /* The type of subroutine that we're expanding. */ + rtx_code subroutine_type; + + /* If nonnull, index N indicates that the original operand N has already + been used to replace a MATCH_OPERATOR or MATCH_DUP, and so any further + replacements must make a copy. */ + char *used; + + /* The construct that we're expanding. */ + const md_rtx_info info; + + /* The output file. */ + FILE *file; +}; + +generator::generator (rtx_code subroutine_type, char *used, + const md_rtx_info &info, FILE *file) + : subroutine_type (subroutine_type), +used (used), +info (info), +file (file) +{} + +void +generator::gen_rtx_scratch (rtx x) { if (subroutine_type == DEFINE_PEEPHOLE2) { @@ -82,9 +114,8 @@ gen_rtx_scratch (rtx x, enum rtx_code subroutine_type, FILE *file) /* Print a C expression to construct an RTX just like X, substituting any operand references appearing within. */ -static void -gen_exp (rtx x, enum rtx_code subroutine_type, char *used, -const md_rtx_info &info, FILE *file) +void +generator::gen_exp (rtx x) { RTX_CODE code; int i; @@ -128,7 +159,7 @@ gen_exp (rtx x, enum rtx_code subroutine_type, char *used, for (i = 0; i < XVECLEN (x, 1); i++) { fprintf (file, ",\n\t\t"); - gen_exp (XVECEXP (x, 1, i), subroutine_type, used, info, file); + gen_exp (XVECEXP (x, 1, i)); } fprintf (file, ")"); return; @@ -142,7 +173,7 @@ gen_exp (rtx x, enum rtx_code subroutine_type, char *used, for (i = 0; i < XVECLEN (x, 2); i++) { fprintf (file, ",\n\t\t"); - gen_exp (XVECEXP (x, 2, i), subroutine_type, used, info, file); + gen_exp (XVECEXP (x, 2, i)); } fprintf (file, ")"); return; @@ -153,7 +184,7 @@ gen_exp (rtx x, enum rtx_code subroutine_type, char *used, return; case MATCH_SCRATCH: - gen_rtx_scratch (x, subroutine_type, file); + gen_rtx_scratch (x); return; case PC: @@ -234,7 +265,7 @@ gen_exp (rtx x, enum rtx_code subroutine_type, char *used, switch (fmt[i]) { case 'e': case 'u': - gen_exp (XEXP (x, i), subroutine_type, used, info, file); + gen_exp (XEXP (x, i)); break; case 'i': @@ -266,7 +297,7 @@ gen_exp (rtx x, enum rtx_code subroutine_type, char *used, for (j = 0; j < XVECLEN (x, i); j++) { fprintf (file, ",\n\t\t"); - gen_exp (XVECEXP (x, i, j), subroutine_type, used, info, file); + gen_exp (XVECEXP (x, i, j)); } fprintf (file, ")"); break; @@ -281,10 +312,10 @@ gen_exp (rtx x, enum rtx_code subroutine_type, char *used, } /* Output code to emit the instruction patterns in VEC, with each element - becoming a separate instruction. USED is as for gen_exp. */ + becoming a separate instruction. */ -static void -gen_emit_seq (rtvec vec, char *used, const md_rtx_info &info, FILE *file) +void +generator::gen_emit_seq (rtvec vec) { for (int i = 0, len = GET_NUM_ELEM (vec); i < len; ++i) { @@ -293,7 +324,7 @@ gen_emit_seq (rtvec vec, char *used, const md_rtx_info &info, FILE *file) if (const char *name = get_emit_function (next)) { fprintf (file, " %s (", name); - gen_exp (next, DEFINE_EXPAND, used, info, file); + gen_exp (next); fprintf (file, ");\n
[gcc r16-785] genemit: Remove purported handling of location_ts
https://gcc.gnu.org/g:efbc8de515c71c27e881d425f8325e39f7b4f328 commit r16-785-gefbc8de515c71c27e881d425f8325e39f7b4f328 Author: Richard Sandiford Date: Wed May 21 10:01:30 2025 +0100 genemit: Remove purported handling of location_ts gen_exp had code to handle the 'L' operand format. But this format is specifically for location_ts, which are only used in RTX_INSNs. Those should never occur in this context, where the input is always an md file rather than an __RTL function. Any hard-coded raw location value would be meaningless anyway. It seemed safer to turn this into an error rather than a gcc_unreachable. gcc/ * genemit.cc (generator::gen_exp): Raise an error if we see an 'L' operand. Diff: --- gcc/genemit.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/gcc/genemit.cc b/gcc/genemit.cc index 0529b916455f..9923cf078b96 100644 --- a/gcc/genemit.cc +++ b/gcc/genemit.cc @@ -270,7 +270,8 @@ generator::gen_exp (rtx x) break; case 'L': - fprintf (file, "%llu", (unsigned long long) XLOC (x, i)); + fatal_at (info.loc, "'%s' rtxes are not supported in this context", + GET_RTX_NAME (code)); break; case 'r':
[gcc r16-786] genemit: Remove support for string operands
https://gcc.gnu.org/g:97d2686decc34400e585bbc725602757c91e3fbf commit r16-786-g97d2686decc34400e585bbc725602757c91e3fbf Author: Richard Sandiford Date: Wed May 21 10:01:31 2025 +0100 genemit: Remove support for string operands gen_exp currently supports the 's' (string) operand type. It would certainly be possible to make the upcoming bytecode patch support that too. However, the rtx codes that have string operands should be very rarely used in hard-coded define_insn/expand/split/peephole2 rtx templates (as opposed to things like attribute expressions, where const_string is commonplace). And AFAICT, no current target does use them like that. This patch therefore reports an error for these rtx codes, rather than adding code that would be unused and untested. gcc/ * genemit.cc (generator::gen_exp): Report an error for 's' operands. Diff: --- gcc/genemit.cc | 5 + 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/gcc/genemit.cc b/gcc/genemit.cc index 9923cf078b96..ba64290af53e 100644 --- a/gcc/genemit.cc +++ b/gcc/genemit.cc @@ -270,6 +270,7 @@ generator::gen_exp (rtx x) break; case 'L': + case 's': fatal_at (info.loc, "'%s' rtxes are not supported in this context", GET_RTX_NAME (code)); break; @@ -284,10 +285,6 @@ generator::gen_exp (rtx x) fprintf (file, "%d", SUBREG_BYTE (x).to_constant ()); break; - case 's': - fprintf (file, "\"%s\"", XSTR (x, i)); - break; - case 'E': { int j;
[gcc r16-787] genemit: Avoid using gen_exp in output_add_clobbers
https://gcc.gnu.org/g:aca0cf1150d6f6be9ee451b5f91f505aef911f8e commit r16-787-gaca0cf1150d6f6be9ee451b5f91f505aef911f8e Author: Richard Sandiford Date: Wed May 21 10:01:31 2025 +0100 genemit: Avoid using gen_exp in output_add_clobbers output_add_clobbers emits code to add: (clobber (scratch:M)) and/or: (clobber (reg:M R)) expressions to the end of a PARALLEL. At the moment, it does this using the general gen_exp function. That makes sense with the code in its current form, but with later patches it's more convenient to handle the two cases directly. This also avoids having to pass an md_rtx_info that is unrelated to the clobber expressions. gcc/ * genemit.cc (clobber_pat::code): Delete. (maybe_queue_insn): Don't set clobber_pat::code. (output_add_clobbers): Remove info argument and output the two REG and SCRATCH cases directly. (main): Update call accordingly. Diff: --- gcc/genemit.cc | 22 -- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/gcc/genemit.cc b/gcc/genemit.cc index ba64290af53e..21eb0f2df7d2 100644 --- a/gcc/genemit.cc +++ b/gcc/genemit.cc @@ -39,7 +39,6 @@ struct clobber_pat int first_clobber; struct clobber_pat *next; int has_hard_reg; - rtx_code code; } *clobber_list; /* Records one insn that uses the clobber list. */ @@ -435,7 +434,6 @@ maybe_queue_insn (const md_rtx_info &info) p->first_clobber = i + 1; p->next = clobber_list; p->has_hard_reg = has_hard_reg; - p->code = GET_CODE (insn); clobber_list = p; } @@ -691,7 +689,7 @@ gen_split (const md_rtx_info &info, FILE *file) the end of the vector. */ static void -output_add_clobbers (const md_rtx_info &info, FILE *file) +output_add_clobbers (FILE *file) { struct clobber_pat *clobber; struct clobber_ent *ent; @@ -709,12 +707,16 @@ output_add_clobbers (const md_rtx_info &info, FILE *file) for (i = clobber->first_clobber; i < GET_NUM_ELEM (clobber->pattern); i++) { - fprintf (file, " XVECEXP (pattern, 0, %d) = ", i); - rtx clobbered_value = RTVEC_ELT (clobber->pattern, i); - /* Pass null for USED since there are no operands. */ - generator (clobber->code, NULL, info, file) - .gen_exp (clobbered_value); - fprintf (file, ";\n"); + fprintf (file, "XVECEXP (pattern, 0, %d) =" + " gen_rtx_CLOBBER (VOIDmode, ", i); + rtx x = XEXP (RTVEC_ELT (clobber->pattern, i), 0); + if (REG_P (x)) + fprintf (file, "gen_rtx_REG (%smode, %d)", +GET_MODE_NAME (GET_MODE (x)), REGNO (x)); + else + fprintf (file, "gen_rtx_SCRATCH (%smode)", +GET_MODE_NAME (GET_MODE (x))); + fprintf (file, ");\n"); } fprintf (file, " break;\n\n"); @@ -1034,7 +1036,7 @@ main (int argc, const char **argv) /* Write out the routines to add CLOBBERs to a pattern and say whether they clobber a hard reg. */ - output_add_clobbers (info, file); + output_add_clobbers (file); output_added_clobbers_hard_reg_p (file); for (overloaded_name *oname = rtx_reader_ptr->get_overloads ();
[gcc r16-784] genemit: Always track multiple uses of operands
https://gcc.gnu.org/g:8ebe8f5eff9fda40f22b9df7a0b8a6c2fdf5f8d7 commit r16-784-g8ebe8f5eff9fda40f22b9df7a0b8a6c2fdf5f8d7 Author: Richard Sandiford Date: Wed May 21 10:01:30 2025 +0100 genemit: Always track multiple uses of operands gen_exp has code to detect when the same operand is used multiple times. It ensures that second and subsequent uses call copy_rtx, to enforce correct unsharing. However, for historical reasons that aren't clear to me, this was skipped for a define_insn unless the define_insn was a parallel. It was also skipped for a single define_expand instruction, regardless of its contents. This meant that a single parallel instruction was treated differently between define_insn (where sharing rules were followed) and define_expand (where sharing rules weren't followed). define_splits and define_peephole2s followed the sharing rules in all cases. This patch makes everything follow the sharing rules. The code it touches will be removed by the proposed bytecode-based expansion, which will use its own tracking when enforcing sharing rules. However, it seemed better for staging and bisection purposes to make this change first. gcc/ * genemit.cc (generator::used): Update comment. (generator::gen_exp): Remove handling of null unused arrays. (gen_insn, gen_expand): Always pass a used array. (output_add_clobbers): Note why the used array is null here. Diff: --- gcc/genemit.cc | 27 --- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/gcc/genemit.cc b/gcc/genemit.cc index 44be50fc933c..0529b916455f 100644 --- a/gcc/genemit.cc +++ b/gcc/genemit.cc @@ -78,9 +78,9 @@ struct generator /* The type of subroutine that we're expanding. */ rtx_code subroutine_type; - /* If nonnull, index N indicates that the original operand N has already - been used to replace a MATCH_OPERATOR or MATCH_DUP, and so any further - replacements must make a copy. */ + /* Index N indicates that the original operand N has already been used to + replace a MATCH_OPERATOR or MATCH_DUP, and so any further replacements + must make a copy. */ char *used; /* The construct that we're expanding. */ @@ -135,15 +135,12 @@ generator::gen_exp (rtx x) { case MATCH_OPERAND: case MATCH_DUP: - if (used) + if (used[XINT (x, 0)]) { - if (used[XINT (x, 0)]) - { - fprintf (file, "copy_rtx (operands[%d])", XINT (x, 0)); - return; - } - used[XINT (x, 0)] = 1; + fprintf (file, "copy_rtx (operands[%d])", XINT (x, 0)); + return; } + used[XINT (x, 0)] = 1; fprintf (file, "operands[%d]", XINT (x, 0)); return; @@ -505,10 +502,7 @@ gen_insn (const md_rtx_info &info, FILE *file) /* Output code to construct and return the rtl for the instruction body. */ rtx pattern = add_implicit_parallel (XVEC (insn, 1)); - /* ??? This is the traditional behavior, but seems suspect. */ - char *used = (XVECLEN (insn, 1) == 1 - ? NULL - : XCNEWVEC (char, stats.num_generator_args)); + char *used = XCNEWVEC (char, stats.num_generator_args); fprintf (file, " return "); generator (DEFINE_INSN, used, info, file).gen_exp (pattern); fprintf (file, ";\n}\n\n"); @@ -555,10 +549,12 @@ gen_expand (const md_rtx_info &info, FILE *file) && stats.max_opno >= stats.max_dup_opno && XVECLEN (expand, 1) == 1) { + used = XCNEWVEC (char, stats.num_operand_vars); fprintf (file, " return "); - generator (DEFINE_EXPAND, NULL, info, file) + generator (DEFINE_EXPAND, used, info, file) .gen_exp (XVECEXP (expand, 1, 0)); fprintf (file, ";\n}\n\n"); + XDELETEVEC (used); return; } @@ -717,6 +713,7 @@ output_add_clobbers (const md_rtx_info &info, FILE *file) { fprintf (file, " XVECEXP (pattern, 0, %d) = ", i); rtx clobbered_value = RTVEC_ELT (clobber->pattern, i); + /* Pass null for USED since there are no operands. */ generator (clobber->code, NULL, info, file) .gen_exp (clobbered_value); fprintf (file, ";\n");
[gcc r16-780] genemit: Add an internal queue
https://gcc.gnu.org/g:4fafb14e1f2ea068f2eb1a29ffb54d9984ab154d commit r16-780-g4fafb14e1f2ea068f2eb1a29ffb54d9984ab154d Author: Richard Sandiford Date: Wed May 21 10:01:28 2025 +0100 genemit: Add an internal queue An earlier version of this series wanted to collect information about all the gen_* functions that are going to be generated. The current version no longer does that, but the queue seemed worth keeping anyway, since it gives a more consistent structure. gcc/ * genemit.cc (queue): New static variable. (maybe_queue_insn): New function, split out from... (gen_insn): ...here. (queue_expand): New function, split out from... (gen_expand): ...here. (gen_split): New function, split out from... (queue_split): ...here. (main): Queue definitions for later processing rather than emitting them on the fly. Diff: --- gcc/genemit.cc | 97 ++ 1 file changed, 71 insertions(+), 26 deletions(-) diff --git a/gcc/genemit.cc b/gcc/genemit.cc index cb4ae47294da..b73a45a04125 100644 --- a/gcc/genemit.cc +++ b/gcc/genemit.cc @@ -55,6 +55,9 @@ static void output_peephole2_scratches(rtx, FILE*); /* True for _optab if that optab isn't allowed to fail. */ static bool nofail_optabs[NUM_OPTABS]; +/* A list of the md constructs that need a gen_* function. */ +static vec queue; + static void print_code (RTX_CODE code, FILE *file) { @@ -326,14 +329,12 @@ emit_c_code (const char *code, bool can_fail_p, const char *name, FILE *file) fprintf (file, "#undef FAIL\n"); } -/* Generate the `gen_...' function for a DEFINE_INSN. */ +/* Process the DEFINE_INSN in LOC, and queue it if it needs a gen_* + function. */ static void -gen_insn (const md_rtx_info &info, FILE *file) +maybe_queue_insn (const md_rtx_info &info) { - struct pattern_stats stats; - int i; - /* See if the pattern for this insn ends with a group of CLOBBERs of (hard) registers or MATCH_SCRATCHes. If so, store away the information for later. */ @@ -349,6 +350,7 @@ gen_insn (const md_rtx_info &info, FILE *file) && GET_CODE (RTVEC_ELT (pattern, 0)) == PARALLEL) pattern = XVEC (RTVEC_ELT (pattern, 0), 0); + int i; for (i = GET_NUM_ELEM (pattern) - 1; i > 0; i--) { if (GET_CODE (RTVEC_ELT (pattern, i)) != CLOBBER) @@ -422,9 +424,19 @@ gen_insn (const md_rtx_info &info, FILE *file) if (XSTR (insn, 0)[0] == 0 || XSTR (insn, 0)[0] == '*') return; - fprintf (file, "/* %s:%d */\n", info.loc.filename, info.loc.lineno); + queue.safe_push (info); +} + +/* Generate the `gen_...' function for a DEFINE_INSN. */ + +static void +gen_insn (const md_rtx_info &info, FILE *file) +{ + struct pattern_stats stats; + int i; /* Find out how many operands this function has. */ + rtx insn = info.def; get_pattern_stats (&stats, XVEC (insn, 1)); if (stats.max_dup_opno > stats.max_opno) fatal_at (info.loc, "match_dup operand number has no match_operand"); @@ -455,23 +467,31 @@ gen_insn (const md_rtx_info &info, FILE *file) XDELETEVEC (used); } -/* Generate the `gen_...' function for a DEFINE_EXPAND. */ +/* Process and queue the DEFINE_EXPAND in INFO. */ static void -gen_expand (const md_rtx_info &info, FILE *file) +queue_expand (const md_rtx_info &info) { - struct pattern_stats stats; - int i; - char *used; - rtx expand = info.def; if (strlen (XSTR (expand, 0)) == 0) fatal_at (info.loc, "define_expand lacks a name"); if (XVEC (expand, 1) == 0) fatal_at (info.loc, "define_expand for %s lacks a pattern", XSTR (expand, 0)); + queue.safe_push (info); +} + +/* Generate the `gen_...' function for a DEFINE_EXPAND. */ + +static void +gen_expand (const md_rtx_info &info, FILE *file) +{ + struct pattern_stats stats; + int i; + char *used; /* Find out how many operands this function has. */ + rtx expand = info.def; get_pattern_stats (&stats, XVEC (expand, 1)); if (stats.min_scratch_opno != -1 && stats.min_scratch_opno <= MAX (stats.max_opno, stats.max_dup_opno)) @@ -564,7 +584,24 @@ gen_expand (const md_rtx_info &info, FILE *file) fprintf (file, " return _val;\n}\n\n"); } -/* Like gen_expand, but generates insns resulting from splitting SPLIT. */ +/* Process and queue the DEFINE_SPLIT or DEFINE_PEEPHOLE2 in INFO. */ + +static void +queue_split (const md_rtx_info &info) +{ + rtx split = info.def; + + if (XVEC (split, 0) == 0) +fatal_at (info.loc, "%s lacks a pattern", + GET_RTX_NAME (GET_CODE (split))); + if (XVEC (split, 2) == 0) +fatal_at (info.loc, "%s lacks a replacement pattern", + GET_RTX_NAME (GET_CODE (split))); + + queue.safe_push (info); +} + +/* Generate the `gen_...' function for a DEFINE_SPLIT or DEFINE_PEEPHOLE2. */ static void gen_split (const md
[gcc r16-782] genemit: Consistently use operand arrays in gen_* functions
https://gcc.gnu.org/g:02c3910f75ddae52dd59775bf9a6c4452bbdd0ac commit r16-782-g02c3910f75ddae52dd59775bf9a6c4452bbdd0ac Author: Richard Sandiford Date: Wed May 21 10:01:29 2025 +0100 genemit: Consistently use operand arrays in gen_* functions One slightly awkward part about emitting the generator function bodies is that: * define_insn and define_expand routines have a separate argument for each operand, named "operand0" upwards. * define_split and define_peephole2 routines take a pointer to an array, named "operands". * the C++ preparation code for expands, splits and peephole2s uses an array called "operands" to refer to the operands. * the automatically-generated code uses individual "operand" variables to refer to the operands. So define_expands have to store the incoming arguments into an operands array before the md file's C++ code, then copy the operands array back to the individual variables before the automatically-generated code. splits and peephole2s have to copy the incoming operands array to individual variables after the md file's C++ code, creating more local variables that are live across calls to rtx_alloc. This patch tries to simplify things by making the whole function body use the operands array in preference to individual variables. define_insns and define_expands store their arguments to the array on entry. This would have pros and cons on its own, but having a single array helps with future efforts to reduce the duplication between gen_* functions. gcc/ * genemit.cc (gen_rtx_scratch, gen_exp): Use operands[%d] rather than operand%d. (start_gen_insn): Mark the incoming arguments as const and store them to an operands array. (gen_expand, gen_split): Remove copies into and out of the operands array. Diff: --- gcc/genemit.cc | 61 ++ 1 file changed, 19 insertions(+), 42 deletions(-) diff --git a/gcc/genemit.cc b/gcc/genemit.cc index 90f36e293b4b..ed87f1a903d3 100644 --- a/gcc/genemit.cc +++ b/gcc/genemit.cc @@ -71,7 +71,7 @@ gen_rtx_scratch (rtx x, enum rtx_code subroutine_type, FILE *file) { if (subroutine_type == DEFINE_PEEPHOLE2) { - fprintf (file, "operand%d", XINT (x, 0)); + fprintf (file, "operands[%d]", XINT (x, 0)); } else { @@ -108,21 +108,21 @@ gen_exp (rtx x, enum rtx_code subroutine_type, char *used, { if (used[XINT (x, 0)]) { - fprintf (file, "copy_rtx (operand%d)", XINT (x, 0)); + fprintf (file, "copy_rtx (operands[%d])", XINT (x, 0)); return; } used[XINT (x, 0)] = 1; } - fprintf (file, "operand%d", XINT (x, 0)); + fprintf (file, "operands[%d]", XINT (x, 0)); return; case MATCH_OP_DUP: fprintf (file, "gen_rtx_fmt_"); for (i = 0; i < XVECLEN (x, 1); i++) fprintf (file, "e"); - fprintf (file, " (GET_CODE (operand%d), ", XINT (x, 0)); + fprintf (file, " (GET_CODE (operands[%d]), ", XINT (x, 0)); if (GET_MODE (x) == VOIDmode) - fprintf (file, "GET_MODE (operand%d)", XINT (x, 0)); + fprintf (file, "GET_MODE (operands[%d])", XINT (x, 0)); else fprintf (file, "%smode", GET_MODE_NAME (GET_MODE (x))); for (i = 0; i < XVECLEN (x, 1); i++) @@ -137,7 +137,7 @@ gen_exp (rtx x, enum rtx_code subroutine_type, char *used, fprintf (file, "gen_rtx_fmt_"); for (i = 0; i < XVECLEN (x, 2); i++) fprintf (file, "e"); - fprintf (file, " (GET_CODE (operand%d)", XINT (x, 0)); + fprintf (file, " (GET_CODE (operands[%d])", XINT (x, 0)); fprintf (file, ", %smode", GET_MODE_NAME (GET_MODE (x))); for (i = 0; i < XVECLEN (x, 2); i++) { @@ -149,7 +149,7 @@ gen_exp (rtx x, enum rtx_code subroutine_type, char *used, case MATCH_PARALLEL: case MATCH_PAR_DUP: - fprintf (file, "operand%d", XINT (x, 0)); + fprintf (file, "operands[%d]", XINT (x, 0)); return; case MATCH_SCRATCH: @@ -437,14 +437,22 @@ start_gen_insn (FILE *file, const char *name, const pattern_stats &stats) fprintf (file, "rtx\ngen_%s (", name); if (stats.num_generator_args) for (int i = 0; i < stats.num_generator_args; i++) - if (i) - fprintf (file, ",\n\trtx operand%d ATTRIBUTE_UNUSED", i); - else - fprintf (file, "rtx operand%d ATTRIBUTE_UNUSED", i); + fprintf (file, "%sconst rtx operand%d", i == 0 ? "" : ", ", i); else fprintf (file, "void"); fprintf (file, ")\n"); fprintf (file, "{\n"); + if (stats.num_generator_args) +{ + fprintf (file, " rtx operands[%d] ATTRIBUTE_UNUSED = {", + stats.num_operand_vars); + for (int i = 0; i < stats.num_generator_args; i++) + fp
[gcc r16-781] genemit: Factor out code common to insns and expands
https://gcc.gnu.org/g:5355568c75a99fc621e2008fa98626ad811678c5 commit r16-781-g5355568c75a99fc621e2008fa98626ad811678c5 Author: Richard Sandiford Date: Wed May 21 10:01:28 2025 +0100 genemit: Factor out code common to insns and expands Mostly to reduce cut-&-paste. gcc/ * genemit.cc (start_gen_insn): New function, split out from... (gen_insn, gen_expand): ...here. Diff: --- gcc/genemit.cc | 45 ++--- 1 file changed, 22 insertions(+), 23 deletions(-) diff --git a/gcc/genemit.cc b/gcc/genemit.cc index b73a45a04125..90f36e293b4b 100644 --- a/gcc/genemit.cc +++ b/gcc/genemit.cc @@ -427,13 +427,32 @@ maybe_queue_insn (const md_rtx_info &info) queue.safe_push (info); } +/* Output the function name, argument declarations, and initial function + body for a pattern called NAME, given that it has the properties + in STATS. */ + +static void +start_gen_insn (FILE *file, const char *name, const pattern_stats &stats) +{ + fprintf (file, "rtx\ngen_%s (", name); + if (stats.num_generator_args) +for (int i = 0; i < stats.num_generator_args; i++) + if (i) + fprintf (file, ",\n\trtx operand%d ATTRIBUTE_UNUSED", i); + else + fprintf (file, "rtx operand%d ATTRIBUTE_UNUSED", i); + else +fprintf (file, "void"); + fprintf (file, ")\n"); + fprintf (file, "{\n"); +} + /* Generate the `gen_...' function for a DEFINE_INSN. */ static void gen_insn (const md_rtx_info &info, FILE *file) { struct pattern_stats stats; - int i; /* Find out how many operands this function has. */ rtx insn = info.def; @@ -442,17 +461,7 @@ gen_insn (const md_rtx_info &info, FILE *file) fatal_at (info.loc, "match_dup operand number has no match_operand"); /* Output the function name and argument declarations. */ - fprintf (file, "rtx\ngen_%s (", XSTR (insn, 0)); - if (stats.num_generator_args) -for (i = 0; i < stats.num_generator_args; i++) - if (i) - fprintf (file, ",\n\trtx operand%d ATTRIBUTE_UNUSED", i); - else - fprintf (file, "rtx operand%d ATTRIBUTE_UNUSED", i); - else -fprintf (file, "void"); - fprintf (file, ")\n"); - fprintf (file, "{\n"); + start_gen_insn (file, XSTR (insn, 0), stats); /* Output code to construct and return the rtl for the instruction body. */ @@ -499,17 +508,7 @@ gen_expand (const md_rtx_info &info, FILE *file) "numbers above all other operands", XSTR (expand, 0)); /* Output the function name and argument declarations. */ - fprintf (file, "rtx\ngen_%s (", XSTR (expand, 0)); - if (stats.num_generator_args) -for (i = 0; i < stats.num_generator_args; i++) - if (i) - fprintf (file, ",\n\trtx operand%d", i); - else - fprintf (file, "rtx operand%d", i); - else -fprintf (file, "void"); - fprintf (file, ")\n"); - fprintf (file, "{\n"); + start_gen_insn (file, XSTR (expand, 0), stats); /* If we don't have any C code to write, only one insn is being written, and no MATCH_DUPs are present, we can just return the desired insn
[gcc r16-807] [RISC-V][PR target/120368] Fix 32bit shift on rv64
https://gcc.gnu.org/g:8459c546197dc9178d250994db021b36405f1bd6 commit r16-807-g8459c546197dc9178d250994db021b36405f1bd6 Author: Jeff Law Date: Wed May 21 14:15:23 2025 -0600 [RISC-V][PR target/120368] Fix 32bit shift on rv64 So a followup to last week's bugfix. In last week's change we we stopped using define_insn_and_split to rewrite instructions. That change was done to avoid dropping a masking instruction out of the RTL. As a result the pattern(s) were changed into simple define_insns, which is good. One of them uses the GPR iterator since it's supposed to work for both 32bit and 64bit shifts on rv64. But we failed to emit the right opcode for a 32bit shift on rv64. Thankfully the fix is trivial. If the mode is anything but word_mode, then we must be doing a 32-bit shift on rv64, ie the various "w" shift instructions. It's run through my tester. Just waiting on the upstream CI system to spin it. PR target/120368 gcc/ * config/riscv/riscv.md (shift with masked shift count): Fix opcode when generating an SImode shift on rv64. gcc/testsuite/ * gcc.target/riscv/pr120368.c: New test. Diff: --- gcc/config/riscv/riscv.md | 9 - gcc/testsuite/gcc.target/riscv/pr120368.c | 19 +++ 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md index 7f6d0bbab3eb..7e35d7877ed9 100644 --- a/gcc/config/riscv/riscv.md +++ b/gcc/config/riscv/riscv.md @@ -2938,7 +2938,14 @@ (match_operand:GPR2 2 "register_operand" "r") (match_operand 3 ""))])))] "" - "\t%0,%1,%2" +{ + /* If the shift mode is not word mode, then it must be the + case that we're generating rv64 code, but this is a 32-bit + operation. Thus we need to use the "w" variant. */ + if (E_mode != word_mode) +return "w\t%0,%1,%2"; + return "\t%0,%1,%2"; +} [(set_attr "type" "shift") (set_attr "mode" "")]) diff --git a/gcc/testsuite/gcc.target/riscv/pr120368.c b/gcc/testsuite/gcc.target/riscv/pr120368.c new file mode 100644 index ..4fea8e6fe7c4 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/pr120368.c @@ -0,0 +1,19 @@ +/* { dg-do run } */ + +int g; + +int +foo (int s, int v) +{ + __builtin_memset (&g, v >> (s & 31), sizeof(g)); + return g; +} + +int +main () +{ + int x = foo (-16, 0xd); + if (x != 0x0d0d0d0d) +__builtin_abort(); + __builtin_exit (0); +}
[gcc r16-810] [PATCH] configure: Always add pre-installed header directories to search path
https://gcc.gnu.org/g:dff727b2c28c52e90e0bd61957d15f907494b245 commit r16-810-gdff727b2c28c52e90e0bd61957d15f907494b245 Author: Stephanos Ioannidis Date: Wed May 21 17:28:36 2025 -0600 [PATCH] configure: Always add pre-installed header directories to search path configure script was adding the target directory flags, including the '-B' flags for the executable prefix and the '-isystem' flags for the pre-installed header directories, to the target flags only for non-Canadian builds under the premise that the host binaries under the executable prefix will not be able to execute on the build system for Canadian builds. While that is true for the '-B' flags specifying the executable prefix, the '-isystem' flags specifying the pre-installed header directories are not affected by this and do not need special handling. This patch updates the configure script to always add the 'include' and 'sys-include' pre-installed header directories to the target search path, in order to ensure that the availability of the pre-installed header directories in the search path is consistent across non-Canadian and Canadian builds. When '--with-headers' flag is specified, this effectively ensures that the libc headers, that are copied from the specified header directory to the sys-include directory, are used by libstdc++. * configure.ac: Always add pre-installed heades to search path. * configure: Regenerate. Diff: --- configure| 16 ++-- configure.ac | 16 ++-- 2 files changed, 20 insertions(+), 12 deletions(-) diff --git a/configure b/configure index ebc44416b6c2..3f85b6ca49de 100755 --- a/configure +++ b/configure @@ -11595,13 +11595,17 @@ if test x"${build}" = x"${host}" ; then LDFLAGS_FOR_BUILD=${LDFLAGS_FOR_BUILD-${LDFLAGS}} fi -# On Canadian crosses, we'll be searching the right directories for -# the previously-installed cross compiler, so don't bother to add -# flags for directories within the install tree of the compiler -# being built; programs in there won't even run. -if test "${build}" = "${host}" && test -d ${srcdir}/gcc; then +if test -d ${srcdir}/gcc; then + # On Canadian crosses, we'll be searching the right directories for the + # previously-installed cross compiler, so don't bother to add flags for + # executable directories within the install tree of the compiler being built; + # programs in there won't even run. + if test "${build}" = "${host}"; then +FLAGS_FOR_TARGET=$FLAGS_FOR_TARGET' -B$(build_tooldir)/bin/ -B$(build_tooldir)/lib/' + fi + # Search for pre-installed headers if nothing else fits. - FLAGS_FOR_TARGET=$FLAGS_FOR_TARGET' -B$(build_tooldir)/bin/ -B$(build_tooldir)/lib/ -isystem $(build_tooldir)/include -isystem $(build_tooldir)/sys-include' + FLAGS_FOR_TARGET=$FLAGS_FOR_TARGET' -isystem $(build_tooldir)/include -isystem $(build_tooldir)/sys-include' fi if test "x${use_gnu_ld}" = x && diff --git a/configure.ac b/configure.ac index 730db3c14024..d6c3fb54f08d 100644 --- a/configure.ac +++ b/configure.ac @@ -3822,13 +3822,17 @@ if test x"${build}" = x"${host}" ; then LDFLAGS_FOR_BUILD=${LDFLAGS_FOR_BUILD-${LDFLAGS}} fi -# On Canadian crosses, we'll be searching the right directories for -# the previously-installed cross compiler, so don't bother to add -# flags for directories within the install tree of the compiler -# being built; programs in there won't even run. -if test "${build}" = "${host}" && test -d ${srcdir}/gcc; then +if test -d ${srcdir}/gcc; then + # On Canadian crosses, we'll be searching the right directories for the + # previously-installed cross compiler, so don't bother to add flags for + # executable directories within the install tree of the compiler being built; + # programs in there won't even run. + if test "${build}" = "${host}"; then +FLAGS_FOR_TARGET=$FLAGS_FOR_TARGET' -B$(build_tooldir)/bin/ -B$(build_tooldir)/lib/' + fi + # Search for pre-installed headers if nothing else fits. - FLAGS_FOR_TARGET=$FLAGS_FOR_TARGET' -B$(build_tooldir)/bin/ -B$(build_tooldir)/lib/ -isystem $(build_tooldir)/include -isystem $(build_tooldir)/sys-include' + FLAGS_FOR_TARGET=$FLAGS_FOR_TARGET' -isystem $(build_tooldir)/include -isystem $(build_tooldir)/sys-include' fi if test "x${use_gnu_ld}" = x &&
[gcc(refs/users/meissner/heads/work206-bugs)] Fix PR 118541, do not generate unordered fp cmoves for IEEE compares.
https://gcc.gnu.org/g:d88170074249387a79537291b3548cb115712d86 commit d88170074249387a79537291b3548cb115712d86 Author: Michael Meissner Date: Wed May 21 20:03:02 2025 -0400 Fix PR 118541, do not generate unordered fp cmoves for IEEE compares. In bug PR target/118541 on power9, power10, and power11 systems, for the function: extern double __ieee754_acos (double); double __acospi (double x) { double ret = __ieee754_acos (x) / 3.14; return __builtin_isgreater (ret, 1.0) ? 1.0 : ret; } GCC currently generates the following code: Power9 Power10 and Power11 == === bl __ieee754_acos bl __ieee754_acos@notoc nop plfd 0,.LC0@pcrel addis 9,2,.LC2@toc@ha xxspltidp 12,1065353216 addi 1,1,32 addi 1,1,32 lfd 0,.LC2@toc@l(9) ld 0,16(1) addis 9,2,.LC0@toc@ha fdiv 0,1,0 ld 0,16(1) mtlr 0 lfd 12,.LC0@toc@l(9)xscmpgtdp 1,0,12 fdiv 0,1,0 xxsel 1,0,12,1 mtlr 0 blr xscmpgtdp 1,0,12 xxsel 1,0,12,1 blr This is because ifcvt.c optimizes the conditional floating point move to use the XSCMPGTDP instruction. However, the XSCMPGTDP instruction will generate an interrupt if one of the arguments is a signalling NaN and signalling NaNs can generate an interrupt. The IEEE comparison functions (isgreater, etc.) require that the comparison not raise an interrupt. The root cause of this is we allow floating point comparisons to be reversed (i.e. LT will be reversed to UNGE). Before power9, this was ok because we only generated the FCMPU or XSCMPUDP instructions. But with power9, we can generate the XSCMPEQDP, XSCMPGTDP, or XSCMPGEDP instructions. This code now does not convert an unordered compare into an ordered compare. Instead, it does the opposite comparison and swaps the arguments. I.e. it converts: r = (a < b) ? c : d; into: r = (b >= a) ? c : d; For the following code: double ordered_compare (double a, double b, double c, double d) { return __builtin_isgreater (a, b) ? c : d; } /* Verify normal > does generate xscmpgtdp. */ double normal_compare (double a, double b, double c, double d) { return a > b ? c : d; } with the following patch, GCC generates the following for power9, power10, and power11: ordered_compare: fcmpu 0,1,2 fmr 1,4 bnglr 0 fmr 1,3 blr normal_compare: xscmpgtdp 1,1,2 xxsel 1,4,3,1 blr I have built bootstrap compilers on big endian power9 systems and little endian power9/power10 systems and there were no regressions. Can I check this patch into the GCC trunk, and after a waiting period, can I check this into the active older branches? 2025-05-21 Michael Meissner gcc/ PR target/118541 * config/rs6000/predicates.md (invert_fpmask_comparison_operator): Delete. (fpmask_reverse_args_comparison_operator): New predicate. * config/rs6000/rs6000-proto.h (rs6000_fpmask_reverse_args): New declaration. * config/rs6000/rs6000.cc (rs6000_fpmask_reverse_args): New function. * config/rs6000/rs6000.h (REVERSIBLE_CC_MODE): Do not allow floating point comparisons to be reversed unless -ffinite-math-only is used. * config/rs6000/rs6000.md (movcc_p9): Add comment. (movcc_invert_p9): Reverse the argument order for the comparison, and use an unordered comparison, instead of ordered comparison. (movcc_invert_p10): Likewise. gcc/testsuite/ PR target/118541 * gcc.target/powerpc/pr118541.c: New test. Diff: --- gcc/config/rs6000/predicates.md | 6 +- gcc/config/rs6000/rs6000-protos.h | 1 + gcc/config/rs6000/rs6000.cc | 24 + gcc/config/rs6000/rs6000.h | 15 ++- gcc/config/rs6000/rs6000.md | 12 +-- gcc/testsuite/gcc.target/powerpc/pr118541.c | 147 6 files changed, 192 insertions(+), 13 deletions(-) diff --git a/gcc/config/rs6000/predicates
[gcc r16-811] aarch64: Carry over zeroness in aarch64_evpc_reencode
https://gcc.gnu.org/g:84c6988c026114727693cd7cd74b8cd5cdcdeb74 commit r16-811-g84c6988c026114727693cd7cd74b8cd5cdcdeb74 Author: Pengxuan Zheng Date: Tue May 20 17:58:23 2025 -0700 aarch64: Carry over zeroness in aarch64_evpc_reencode There was a bug in aarch64_evpc_reencode which could leave zero_op0_p and zero_op1_p of the struct "newd" uninitialized. r16-701-gd77c3bc1c35e303 fixed the issue by zero initializing "newd." This patch provides an alternative fix as suggested by Richard Sandiford based on the fact that the zeroness is preserved by aarch64_evpc_reencode. gcc/ChangeLog: * config/aarch64/aarch64.cc (aarch64_evpc_reencode): Copy zero_op0_p and zero_op1_p from d to newd. Signed-off-by: Pengxuan Zheng Diff: --- gcc/config/aarch64/aarch64.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index 1da615c8955a..2b837ec8e673 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -26327,7 +26327,7 @@ aarch64_evpc_trn (struct expand_vec_perm_d *d) static bool aarch64_evpc_reencode (struct expand_vec_perm_d *d) { - expand_vec_perm_d newd = {}; + expand_vec_perm_d newd; /* The subregs that we'd create are not supported for big-endian SVE; see aarch64_modes_compatible_p for details. */ @@ -26353,6 +26353,8 @@ aarch64_evpc_reencode (struct expand_vec_perm_d *d) newd.op1 = d->op1 ? gen_lowpart (new_mode, d->op1) : NULL; newd.testing_p = d->testing_p; newd.one_vector_p = d->one_vector_p; + newd.zero_op0_p = d->zero_op0_p; + newd.zero_op1_p = d->zero_op1_p; newd.perm.new_vector (newpermindices.encoding (), newd.one_vector_p ? 1 : 2, newpermindices.nelts_per_input ());
[gcc(refs/users/meissner/heads/work206-bugs)] Update ChangeLog.*
https://gcc.gnu.org/g:cec29adfc6dea03c04e6c6f6c1a3a70ed1c500c4 commit cec29adfc6dea03c04e6c6f6c1a3a70ed1c500c4 Author: Michael Meissner Date: Wed May 21 20:04:35 2025 -0400 Update ChangeLog.* Diff: --- gcc/ChangeLog.bugs | 30 -- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/gcc/ChangeLog.bugs b/gcc/ChangeLog.bugs index 91250828ad30..0134634d76f9 100644 --- a/gcc/ChangeLog.bugs +++ b/gcc/ChangeLog.bugs @@ -1,4 +1,4 @@ - Branch work206-bugs, patch #111 + Branch work206-bugs, patch #112 Fix PR 118541, do not generate unordered fp cmoves for IEEE compares. @@ -42,14 +42,18 @@ raise an interrupt. The root cause of this is we allow floating point comparisons to be reversed (i.e. LT will be reversed to UNGE). Before power9, this was ok because we only -generated the FCMPU or XSCMPUDP instructions. But with power9, we can generate -the XSCMPEQDP, XSCMPGTDP, or XSCMPGEDP instructions. If NaNs are allowed, we no -longer allow FP comparisons to be reversed. If FP comparisons can't be -reversed, the machine independent portions of the compiler will generate the -comparison with the arguments reversed. +generated the FCMPU or XSCMPUDP instructions. -Since we do not support reversing FP comparisons, the code to support inverting -fpmask operations on power9 has been removed. +But with power9, we can generate the XSCMPEQDP, XSCMPGTDP, or XSCMPGEDP +instructions. This code now does not convert an unordered compare into an +ordered compare. Instead, it does the opposite comparison and swaps the +arguments. I.e. it converts: + + r = (a < b) ? c : d; + +into: + + r = (b >= a) ? c : d; For the following code: @@ -94,12 +98,17 @@ gcc/ PR target/118541 * config/rs6000/predicates.md (invert_fpmask_comparison_operator): Delete. + (fpmask_reverse_args_comparison_operator): New predicate. + * config/rs6000/rs6000-proto.h (rs6000_fpmask_reverse_args): New + declaration. + * config/rs6000/rs6000.cc (rs6000_fpmask_reverse_args): New function. * config/rs6000/rs6000.h (REVERSIBLE_CC_MODE): Do not allow floating point comparisons to be reversed unless -ffinite-math-only is used. - (rs6000_reverse_condition): Add argument. * config/rs6000/rs6000.md (movcc_p9): Add comment. - (movcc_invert_p9): Delete insns. + (movcc_invert_p9): Reverse the argument order for + the comparison, and use an unordered comparison, instead of ordered + comparison. (movcc_invert_p10): Likewise. gcc/testsuite/ @@ -107,6 +116,7 @@ gcc/testsuite/ PR target/118541 * gcc.target/powerpc/pr118541.c: New test. + Branch work206-bugs, patch #111 was reverted Branch work206-bugs, patch #110 was reverted Branch work206-bugs, patch #101
[gcc(refs/users/meissner/heads/work206-bugs)] Revert changes
https://gcc.gnu.org/g:6ac181184da7d2c0bf54646434b253d46d33b86c commit 6ac181184da7d2c0bf54646434b253d46d33b86c Author: Michael Meissner Date: Wed May 21 15:29:23 2025 -0400 Revert changes Diff: --- gcc/config/rs6000/predicates.md | 6 ++ gcc/config/rs6000/rs6000.h | 15 +-- gcc/config/rs6000/rs6000.md | 81 +-- gcc/testsuite/gcc.target/powerpc/pr118541.c | 147 4 files changed, 84 insertions(+), 165 deletions(-) diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md index 02ba8fa6c9b0..647e89afb6a7 100644 --- a/gcc/config/rs6000/predicates.md +++ b/gcc/config/rs6000/predicates.md @@ -1463,6 +1463,12 @@ (define_predicate "fpmask_comparison_operator" (match_code "eq,gt,ge")) +;; Return 1 if OP is a comparison operator suitable for vector/scalar +;; comparisons that generate a 0/-1 mask (i.e. the inverse of +;; fpmask_comparison_operator). +(define_predicate "invert_fpmask_comparison_operator" + (match_code "ne,unlt,unle")) + ;; Return 1 if OP is a comparison operation suitable for integer vector/scalar ;; comparisons that generate a -1/0 mask. (define_predicate "vecint_comparison_operator" diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h index c8d9456e0912..9267612fbc9c 100644 --- a/gcc/config/rs6000/rs6000.h +++ b/gcc/config/rs6000/rs6000.h @@ -1810,17 +1810,10 @@ extern scalar_int_mode rs6000_pmode; : (((OP) == EQ || (OP) == NE) && COMPARISON_P (X) \ ? CCEQmode : CCmode)) -/* Can the condition code MODE be safely reversed? Don't allow floating point - comparisons to be reversed unless NaNs are not allowed. - - In the past, we used to allow reversing FP operations because we only - generated FCMPU comparisons and not FCMPO. However, starting with power9, - the XSCMPEQDP, XSCMPGTDP, and XSCMPGEDP instructions will trap if a - signalling NaN is used. If we allow reversing FP operations, we could wind - up converting a LT operation into UNGE and the instruction will trap. The - machine independent parts of the compiler will handle reversing the - arguments if the FP comparison cannot be reversed. */ -#define REVERSIBLE_CC_MODE(MODE) ((MODE) != CCFPmode || flag_finite_math_only) +/* Can the condition code MODE be safely reversed? This is safe in + all cases on this port, because at present it doesn't use the + trapping FP comparisons (fcmpo). */ +#define REVERSIBLE_CC_MODE(MODE) 1 /* Given a condition code and a mode, return the inverse condition. */ #define REVERSE_CONDITION(CODE, MODE) rs6000_reverse_condition (MODE, CODE) diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index b9a91b0eab24..65da0c653304 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -5699,13 +5699,6 @@ "fsel %0,%1,%2,%3" [(set_attr "type" "fp")]) -;; On power9 and above generate the XSCMPEQFP, XSCMPGTDP, and XSCMPGEDP -;; instructions followed by XXSEL to do a floating point conditional move. In -;; the past, we provided patterns for inverting the comparison, converting a LE -;; into UNGT. However, the XSCMPEQDP, XSCMPGTDP, and XSCMPGEDP instructions -;; will trap if one of the arguments is a signalling NaN. Since we aren't -;; providing the inverted operation, the machine independent parts of the -;; compiler generate code with the arguments swapped. (define_insn_and_split "*movcc_p9" [(set (match_operand:SFDF 0 "vsx_register_operand" "=&wa,wa") (if_then_else:SFDF @@ -5737,6 +5730,43 @@ [(set_attr "length" "8") (set_attr "type" "vecperm")]) +;; Handle inverting the fpmask comparisons. +(define_insn_and_split "*movcc_invert_p9" + [(set (match_operand:SFDF 0 "vsx_register_operand" "=&wa,wa") + (if_then_else:SFDF +(match_operator:CCFP 1 "invert_fpmask_comparison_operator" + [(match_operand:SFDF2 2 "vsx_register_operand" "wa,wa") +(match_operand:SFDF2 3 "vsx_register_operand" "wa,wa")]) +(match_operand:SFDF 4 "vsx_register_operand" "wa,wa") +(match_operand:SFDF 5 "vsx_register_operand" "wa,wa"))) + (clobber (match_scratch:V2DI 6 "=0,&wa"))] + "TARGET_P9_MINMAX" + "#" + "&& 1" + [(set (match_dup 6) + (if_then_else:V2DI (match_dup 9) + (match_dup 7) + (match_dup 8))) + (set (match_dup 0) + (if_then_else:SFDF (ne (match_dup 6) + (match_dup 8)) + (match_dup 5) + (match_dup 4)))] +{ + rtx op1 = operands[1]; + enum rtx_code cond = reverse_condition_maybe_unordered (GET_CODE (op1)); + + if (GET_CODE (operands[6]) == SCRATCH) +operands[6] = gen_reg_rtx (V2DImode); + + operands[7] = CONSTM1_RTX (V2DImode); + operands[8] = CONST0_RTX (V2DImode); + + operands[9] = gen_rtx_fmt_ee (cond, CCFPmode, operands[2], operands[3]); +} + [(set_attr "len
[gcc(refs/users/meissner/heads/work206-bugs)] Fix PR 118541, do not generate unordered fp cmoves for IEEE compares.
https://gcc.gnu.org/g:33d453d937554d8d93861541ea8926647a674ea8 commit 33d453d937554d8d93861541ea8926647a674ea8 Author: Michael Meissner Date: Wed May 21 14:01:38 2025 -0400 Fix PR 118541, do not generate unordered fp cmoves for IEEE compares. In bug PR target/118541 on power9, power10, and power11 systems, for the function: extern double __ieee754_acos (double); double __acospi (double x) { double ret = __ieee754_acos (x) / 3.14; return __builtin_isgreater (ret, 1.0) ? 1.0 : ret; } GCC currently generates the following code: Power9 Power10 and Power11 == === bl __ieee754_acos bl __ieee754_acos@notoc nop plfd 0,.LC0@pcrel addis 9,2,.LC2@toc@ha xxspltidp 12,1065353216 addi 1,1,32 addi 1,1,32 lfd 0,.LC2@toc@l(9) ld 0,16(1) addis 9,2,.LC0@toc@ha fdiv 0,1,0 ld 0,16(1) mtlr 0 lfd 12,.LC0@toc@l(9)xscmpgtdp 1,0,12 fdiv 0,1,0 xxsel 1,0,12,1 mtlr 0 blr xscmpgtdp 1,0,12 xxsel 1,0,12,1 blr This is because ifcvt.c optimizes the conditional floating point move to use the XSCMPGTDP instruction. However, the XSCMPGTDP instruction will generate an interrupt if one of the arguments is a signalling NaN and signalling NaNs can generate an interrupt. The IEEE comparison functions (isgreater, etc.) require that the comparison not raise an interrupt. The root cause of this is we allow floating point comparisons to be reversed (i.e. LT will be reversed to UNGE). Before power9, this was ok because we only generated the FCMPU or XSCMPUDP instructions. But with power9, we can generate the XSCMPEQDP, XSCMPGTDP, or XSCMPGEDP instructions. If NaNs are allowed, we no longer allow FP comparisons to be reversed. If FP comparisons can't be reversed, the machine independent portions of the compiler will generate the comparison with the arguments reversed. Since we do not support reversing FP comparisons, the code to support inverting fpmask operations on power9 has been removed. For the following code: double ordered_compare (double a, double b, double c, double d) { return __builtin_isgreater (a, b) ? c : d; } /* Verify normal > does generate xscmpgtdp. */ double normal_compare (double a, double b, double c, double d) { return a > b ? c : d; } with the following patch, GCC generates the following for power9, power10, and power11: ordered_compare: fcmpu 0,1,2 fmr 1,4 bnglr 0 fmr 1,3 blr normal_compare: xscmpgtdp 1,1,2 xxsel 1,4,3,1 blr I have built bootstrap compilers on big endian power9 systems and little endian power9/power10 systems and there were no regressions. Can I check this patch into the GCC trunk, and after a waiting period, can I check this into the active older branches? 2025-05-21 Michael Meissner gcc/ PR target/118541 * config/rs6000/predicates.md (invert_fpmask_comparison_operator): Delete. * config/rs6000/rs6000.h (REVERSIBLE_CC_MODE): Do not allow floating point comparisons to be reversed unless -ffinite-math-only is used. (rs6000_reverse_condition): Add argument. * config/rs6000/rs6000.md (movcc_p9): Add comment. (movcc_invert_p9): Delete insns. (movcc_invert_p10): Likewise. gcc/testsuite/ PR target/118541 * gcc.target/powerpc/pr118541.c: New test. Diff: --- gcc/config/rs6000/predicates.md | 6 -- gcc/config/rs6000/rs6000.h | 15 ++- gcc/config/rs6000/rs6000.md | 81 ++- gcc/testsuite/gcc.target/powerpc/pr118541.c | 147 4 files changed, 165 insertions(+), 84 deletions(-) diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md index 647e89afb6a7..02ba8fa6c9b0 100644 --- a/gcc/config/rs6000/predicates.md +++ b/gcc/config/rs6000/predicates.md @@ -1463,12 +1463,6 @@ (define_predicate "fpmask_comparison_operator" (match_code "eq,gt,ge")) -;; Return 1 if OP is a comparison operator suitable for vector/sca
[gcc(refs/users/meissner/heads/work206-bugs)] Update ChangeLog.*
https://gcc.gnu.org/g:86eb8563626948a0bc690c1a61907bf75fb0b940 commit 86eb8563626948a0bc690c1a61907bf75fb0b940 Author: Michael Meissner Date: Wed May 21 14:03:12 2025 -0400 Update ChangeLog.* Diff: --- gcc/ChangeLog.bugs | 42 ++ 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/gcc/ChangeLog.bugs b/gcc/ChangeLog.bugs index 8eb6295859ab..91250828ad30 100644 --- a/gcc/ChangeLog.bugs +++ b/gcc/ChangeLog.bugs @@ -1,4 +1,4 @@ - Branch work206-bugs, patch #110 + Branch work206-bugs, patch #111 Fix PR 118541, do not generate unordered fp cmoves for IEEE compares. @@ -40,11 +40,16 @@ arguments is a signalling NaN and signalling NaNs can generate an interrupt. The IEEE comparison functions (isgreater, etc.) require that the comparison not raise an interrupt. -The following patch changes the PowerPC back end so that ifcvt.c will not change -the if/then test and move into a conditional move if the comparison is one of -the comparisons that do not raise an error with signalling NaNs and -Ofast is -not used. If a normal comparison is used or -Ofast is used, GCC will continue -to generate XSCMPGTDP and XXSEL. +The root cause of this is we allow floating point comparisons to be reversed +(i.e. LT will be reversed to UNGE). Before power9, this was ok because we only +generated the FCMPU or XSCMPUDP instructions. But with power9, we can generate +the XSCMPEQDP, XSCMPGTDP, or XSCMPGEDP instructions. If NaNs are allowed, we no +longer allow FP comparisons to be reversed. If FP comparisons can't be +reversed, the machine independent portions of the compiler will generate the +comparison with the arguments reversed. + +Since we do not support reversing FP comparisons, the code to support inverting +fpmask operations on power9 has been removed. For the following code: @@ -82,31 +87,28 @@ power9/power10 systems and there were no regressions. Can I check this patch into the GCC trunk, and after a waiting period, can I check this into the active older branches? -2025-05-20 Michael Meissner +2025-05-21 Michael Meissner gcc/ PR target/118541 - * config/rs6000/predicates.md (invert_fpmask_comparison_operator): Do - not allow UNLT and UNLE unless -ffast-math. - * config/rs6000/rs6000-protos.h (enum rev_cond_ordered): New enumeration. - (rs6000_reverse_condition): Add argument. - * config/rs6000/rs6000.cc (rs6000_reverse_condition): Do not allow - ordered comparisons to be reversed for floating point conditional moves, - but allow ordered comparisons to be reversed on jumps. - (rs6000_emit_sCOND): Adjust rs6000_reverse_condition call. + * config/rs6000/predicates.md (invert_fpmask_comparison_operator): + Delete. * config/rs6000/rs6000.h (REVERSIBLE_CC_MODE): Do not allow floating - point comparisons to be reversed on platforms that can generate the - xscmp{eq,gt,ge}{dp,qp} instructions. - (REVERSE_CONDITION): Adjust rs6000_reverse_condition call. - * config/rs6000/rs6000.md (reverse_branch_comparison): Name insn. - Adjust rs6000_reverse_condition calls. + point comparisons to be reversed unless -ffinite-math-only is used. + (rs6000_reverse_condition): Add argument. + * config/rs6000/rs6000.md (movcc_p9): Add + comment. + (movcc_invert_p9): Delete insns. + (movcc_invert_p10): Likewise. gcc/testsuite/ PR target/118541 * gcc.target/powerpc/pr118541.c: New test. + Branch work206-bugs, patch #110 was reverted + Branch work206-bugs, patch #101 PR 99293: Optimize splat of a V2DF/V2DI extract with constant element
[gcc r16-814] [PATCH] testsuite: RISC-V: Update the cset-sext-sfb/zba-slliuw test optimization level.
https://gcc.gnu.org/g:d8636b05c559e6f060e16652bb10c59d9fb0fb54 commit r16-814-gd8636b05c559e6f060e16652bb10c59d9fb0fb54 Author: Dongyan Chen Date: Wed May 21 21:46:52 2025 -0600 [PATCH] testsuite: RISC-V: Update the cset-sext-sfb/zba-slliuw test optimization level. Failed testcases occurred in the regression test of gcc: cset-sext-sfb.c failed the -Oz test, and zba-slliuw.c failed the -Og test. This patch solves the problem by skipping the optimization. gcc/testsuite/ChangeLog: * gcc.target/riscv/cset-sext-sfb.c: Skip for -Oz. * gcc.target/riscv/zba-slliuw.c: Skip for -Og. Diff: --- gcc/testsuite/gcc.target/riscv/cset-sext-sfb.c | 2 +- gcc/testsuite/gcc.target/riscv/zba-slliuw.c| 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/gcc/testsuite/gcc.target/riscv/cset-sext-sfb.c b/gcc/testsuite/gcc.target/riscv/cset-sext-sfb.c index 4a8477e81621..3d46306f1e19 100644 --- a/gcc/testsuite/gcc.target/riscv/cset-sext-sfb.c +++ b/gcc/testsuite/gcc.target/riscv/cset-sext-sfb.c @@ -1,5 +1,5 @@ /* { dg-do compile { target { ! riscv_abi_e } } } */ -/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" "-O1" "-Os" } } */ +/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" "-O1" "-Os" "-Oz" } } */ /* { dg-options "-march=rv32gc -mtune=sifive-7-series -mbranch-cost=1 -fno-ssa-phiopt -fdump-rtl-ce1" { target { rv32 } } } */ /* { dg-options "-march=rv64gc -mtune=sifive-7-series -mbranch-cost=1 -fno-ssa-phiopt -fdump-rtl-ce1" { target { rv64 } } } */ diff --git a/gcc/testsuite/gcc.target/riscv/zba-slliuw.c b/gcc/testsuite/gcc.target/riscv/zba-slliuw.c index c123bb5ece0f..69914db95a2c 100644 --- a/gcc/testsuite/gcc.target/riscv/zba-slliuw.c +++ b/gcc/testsuite/gcc.target/riscv/zba-slliuw.c @@ -1,6 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-march=rv64gc_zba_zbs -mabi=lp64" } */ -/* { dg-skip-if "" { *-*-* } { "-O0" "-O1" } } */ +/* { dg-skip-if "" { *-*-* } { "-O0" "-O1" "-Og" } } */ long foo (long i)
[gcc r16-808] [RISC-V] Improve (x << C1) + C2 split code
https://gcc.gnu.org/g:0bed343a2a640c7be4a1970d303098ccf62bd1c6 commit r16-808-g0bed343a2a640c7be4a1970d303098ccf62bd1c6 Author: Jeff Law Date: Wed May 21 16:04:58 2025 -0600 [RISC-V] Improve (x << C1) + C2 split code I wrote this a couple months ago to fix an instruction count regression in 505.mcf on risc-v, but I don't have a trivial little testcase to add to the suite. There were two problems with the pattern. First, the code was generating a shift followed by an add after reload. Naturally combine doesn't run after reload and the code stayed in that form rather than using shadd when available. Second the splitter was just over-active. We need to make sure that the shifted form of the constant operand has a cost > 1 to synthesize. It's useless to split if the shifted constant can be synthesized in a single instruction. This has been in my tester since March. So it's been through numerous riscv64-elf and riscv32-elf test cycles as well as multiple rv64 bootstrap tests. Waiting on the upstream CI system to render a verdict before moving forward. Looking further out I'm hoping this pattern will transform into a simpler and always active define_split. gcc/ * config/riscv/riscv.md ((x << C1) + C2): Tighten split condition and generate more efficient code when splitting. Diff: --- gcc/config/riscv/riscv.md | 33 - 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md index 7e35d7877ed9..a5b3abbe5d45 100644 --- a/gcc/config/riscv/riscv.md +++ b/gcc/config/riscv/riscv.md @@ -4704,23 +4704,38 @@ (match_operand 2 "const_int_operand" "n")) (match_operand 3 "const_int_operand" "n"))) (clobber (match_scratch:DI 4 "=&r"))] - "(TARGET_64BIT && riscv_const_insns (operands[3], false) == 1)" + "(TARGET_64BIT +&& riscv_const_insns (operands[3], false) == 1 +&& riscv_const_insns (GEN_INT (INTVAL (operands[3]) + << INTVAL (operands[2])), false) != 1)" "#" "&& reload_completed" [(const_int 0)] "{ - rtx x = gen_rtx_ASHIFT (DImode, operands[1], operands[2]); - emit_insn (gen_rtx_SET (operands[0], x)); - - /* If the constant fits in a simm12, use it directly as we do not - get another good chance to optimize things again. */ - if (!SMALL_OPERAND (INTVAL (operands[3]))) + /* Prefer to generate shNadd when we can, even over using an + immediate form. If we're not going to be able to generate + a shNadd, then use the constant directly if it fits in a + simm12 field since we won't get another chance to optimize this. */ + if ((TARGET_ZBA && imm123_operand (operands[2], word_mode)) +|| !SMALL_OPERAND (INTVAL (operands[3]))) emit_move_insn (operands[4], operands[3]); else operands[4] = operands[3]; - x = gen_rtx_PLUS (DImode, operands[0], operands[4]); - emit_insn (gen_rtx_SET (operands[0], x)); + if (TARGET_ZBA && imm123_operand (operands[2], word_mode)) + { +rtx x = gen_rtx_ASHIFT (DImode, operands[1], operands[2]); +x = gen_rtx_PLUS (DImode, x, operands[4]); +emit_insn (gen_rtx_SET (operands[0], x)); + } + else + { +rtx x = gen_rtx_ASHIFT (DImode, operands[1], operands[2]); +emit_insn (gen_rtx_SET (operands[0], x)); +x = gen_rtx_PLUS (DImode, operands[0], operands[4]); +emit_insn (gen_rtx_SET (operands[0], x)); + } + DONE; }" [(set_attr "type" "arith")])
[gcc r16-809] combine: gen_lowpart_no_emit vs CLOBBER [PR120090]
https://gcc.gnu.org/g:f725d6765373f7884a2ea23bc11409b15545958b commit r16-809-gf725d6765373f7884a2ea23bc11409b15545958b Author: Andrew Pinski Date: Mon May 5 09:46:14 2025 -0700 combine: gen_lowpart_no_emit vs CLOBBER [PR120090] The problem here is simplify-rtx.cc expects gen_lowpart_no_emit to return NULL on failure but combine's hook was returning CLOBBER. After r16-160-ge6f89d78c1a7528e93458278, gcc.target/i386/avx512bw-pr103750-2.c started to fail at -m32 due to this as new simplify code would return a RTL with a clobber in it rather than returning NULL. To fix this gen_lowpart_no_emit should return NULL when there was an failure instead of a clobber. This only changes the gen_lowpart_no_emit hook and not the generic gen_lowpart hook as parts of combine just pass gen_lowpart result directly without checking the return value. Bootstrapped and tested on x86_64-linux-gnu. PR rtl-optimization/120090 gcc/ChangeLog: * combine.cc (gen_lowpart_for_combine_no_emit): New function. (RTL_HOOKS_GEN_LOWPART_NO_EMIT): Set to gen_lowpart_for_combine_no_emit. Signed-off-by: Andrew Pinski Diff: --- gcc/combine.cc | 19 ++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/gcc/combine.cc b/gcc/combine.cc index 67cf0447607f..4dbc1f6a4a4e 100644 --- a/gcc/combine.cc +++ b/gcc/combine.cc @@ -458,6 +458,7 @@ static rtx simplify_shift_const (rtx, enum rtx_code, machine_mode, rtx, int); static int recog_for_combine (rtx *, rtx_insn *, rtx *, unsigned = 0, unsigned = 0); static rtx gen_lowpart_for_combine (machine_mode, rtx); +static rtx gen_lowpart_for_combine_no_emit (machine_mode, rtx); static enum rtx_code simplify_compare_const (enum rtx_code, machine_mode, rtx *, rtx *); static enum rtx_code simplify_comparison (enum rtx_code, rtx *, rtx *); @@ -491,7 +492,7 @@ static rtx gen_lowpart_or_truncate (machine_mode, rtx); /* Our implementation of gen_lowpart never emits a new pseudo. */ #undef RTL_HOOKS_GEN_LOWPART_NO_EMIT -#define RTL_HOOKS_GEN_LOWPART_NO_EMIT gen_lowpart_for_combine +#define RTL_HOOKS_GEN_LOWPART_NO_EMIT gen_lowpart_for_combine_no_emit #undef RTL_HOOKS_REG_NONZERO_REG_BITS #define RTL_HOOKS_REG_NONZERO_REG_BITS reg_nonzero_bits_for_combine @@ -11890,6 +11891,22 @@ gen_lowpart_for_combine (machine_mode omode, rtx x) fail: return gen_rtx_CLOBBER (omode, const0_rtx); } + +/* Like gen_lowpart_for_combine but returns NULL_RTX + for an error instead of CLOBBER. + Note no_emit is not called directly from combine but rather from + simplify_rtx and is expecting a NULL on failure rather than + a CLOBBER. */ + +static rtx +gen_lowpart_for_combine_no_emit (machine_mode omode, rtx x) +{ + rtx tem = gen_lowpart_for_combine (omode, x); + if (!tem || GET_CODE (tem) == CLOBBER) +return NULL_RTX; + return tem; +} + /* Try to simplify a comparison between OP0 and a constant OP1, where CODE is the comparison code that will be tested, into a
[gcc r16-813] [RISC-V] Clear high or low bits using shift pairs
https://gcc.gnu.org/g:b3c778e858497f2b7f37fa8a3101854361c025da commit r16-813-gb3c778e858497f2b7f37fa8a3101854361c025da Author: Shreya Munnangi Date: Wed May 21 18:49:14 2025 -0600 [RISC-V] Clear high or low bits using shift pairs So the first special case of clearing bits from Shreya's work. We can clear an arbitrary number of high bits by shifting left by the number of bits to clear, then logically shifting right to put everything in place. Similarly we can clear an arbitrary number of low bits with a right logical shift followed by a left shift. Naturally this only applies when the constant synthesis budget is 2+ insns. Even with mvconst_internal still enabled this does consistently show various small code generation improvements. I have seen a notable regression. The two shift form to wipe out high bits isn't handled well by ext-dce. Essentially it looks like we don't recognize the sequence as wiping upper bits, instead it makes bits live and as a result we're unable to remove a prior zero extension. I've opened a bug for this issue. The other case I've seen is CSE related. If we had a number of masking operations with the same mask, we might have previously CSE'd the constant. In that scenario each instance of masking would be a single AND using the CSE'd register holding the constant, whereas with this patch it'll be a pair of shifts. But on a good uarch design the pair of shifts would be fused into a single op. Given this is relatively rare and on the margins from a performance standpoint I'm not going to worry about it. This has spun in my tester for riscv32-elf and riscv64-elf. Bootstrap and regression test is in flight and due in an hour or so. Waiting on the upstream pre-commit tester and the bootstrap test before moving forward. gcc/ * config/riscv/riscv.cc (synthesize_and): When profitable, use two shift combinations to clear high or low bits rather than synthsizing the constant. Diff: --- gcc/config/riscv/riscv.cc | 37 + 1 file changed, 37 insertions(+) diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index 03dcc347fb87..41a164bc7783 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -14525,6 +14525,43 @@ synthesize_and (rtx operands[3]) } } + /* The number of instructions to synthesize the constant is a good + estimate of the budget. That does not account for out of order + execution an fusion in the constant synthesis those would naturally + decrease the budget. It also does not account for the AND at + the end of the sequence which would increase the budget. */ + int budget = riscv_const_insns (operands[2], true); + rtx input = NULL_RTX; + rtx output = NULL_RTX; + + /* Left shift + right shift to clear high bits. */ + if (budget >= 2 && p2m1_shift_operand (operands[2], word_mode)) +{ + int count = (GET_MODE_BITSIZE (GET_MODE (operands[1])).to_constant () + - exact_log2 (INTVAL (operands[2]) + 1)); + rtx x = gen_rtx_ASHIFT (word_mode, operands[1], GEN_INT (count)); + output = gen_reg_rtx (word_mode); + emit_insn (gen_rtx_SET (output, x)); + input = output; + x = gen_rtx_LSHIFTRT (word_mode, input, GEN_INT (count)); + emit_insn (gen_rtx_SET (operands[0], x)); + return true; +} + + /* Clears a bunch of low bits with only high bits set. */ + unsigned HOST_WIDE_INT t = ~INTVAL (operands[2]); + if (budget >= 2 && exact_log2 (t + 1) >= 0) +{ + int count = ctz_hwi (INTVAL (operands[2])); + rtx x = gen_rtx_LSHIFTRT (word_mode, operands[1], GEN_INT (count)); + output = gen_reg_rtx (word_mode); + emit_insn (gen_rtx_SET (output, x)); + input = output; + x = gen_rtx_ASHIFT (word_mode, input, GEN_INT (count)); + emit_insn (gen_rtx_SET (operands[0], x)); + return true; +} + /* If the remaining budget has gone to less than zero, it forces the value into a register and performs the AND operation. It returns TRUE to the caller so the caller
[gcc(refs/users/meissner/heads/work206-bugs)] Revert changes
https://gcc.gnu.org/g:b3b14d8bec72de46bf0721e367756d13b1b3044f commit b3b14d8bec72de46bf0721e367756d13b1b3044f Author: Michael Meissner Date: Wed May 21 10:44:41 2025 -0400 Revert changes Diff: --- gcc/config/rs6000/predicates.md | 10 +- gcc/config/rs6000/rs6000-protos.h | 17 +--- gcc/config/rs6000/rs6000.cc | 35 ++- gcc/config/rs6000/rs6000.h | 13 +-- gcc/config/rs6000/rs6000.md | 25 ++--- gcc/testsuite/gcc.target/powerpc/pr118541.c | 147 6 files changed, 24 insertions(+), 223 deletions(-) diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md index ba8df6a7979d..647e89afb6a7 100644 --- a/gcc/config/rs6000/predicates.md +++ b/gcc/config/rs6000/predicates.md @@ -1466,16 +1466,8 @@ ;; Return 1 if OP is a comparison operator suitable for vector/scalar ;; comparisons that generate a 0/-1 mask (i.e. the inverse of ;; fpmask_comparison_operator). -;; -;; invert_fpmask_comparison_operator is used to form floating point conditional -;; moves on power9. The instructions that would be generated (xscmpeqdp, -;; xscmpgtdp, or xscmpgedp) will raise an error if one of the arguments is a -;; signalling NaN. Don't allow the test to be inverted if NaNs are supported -;; and the comparison is an ordered comparison. (define_predicate "invert_fpmask_comparison_operator" - (ior (match_code "ne") - (and (match_code "unlt,unle") - (match_test "flag_finite_math_only" + (match_code "ne,unlt,unle")) ;; Return 1 if OP is a comparison operation suitable for integer vector/scalar ;; comparisons that generate a -1/0 mask. diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h index 5beb44fc339b..4619142d197b 100644 --- a/gcc/config/rs6000/rs6000-protos.h +++ b/gcc/config/rs6000/rs6000-protos.h @@ -114,23 +114,8 @@ extern const char *rs6000_sibcall_template (rtx *, unsigned int); extern const char *rs6000_indirect_call_template (rtx *, unsigned int); extern const char *rs6000_indirect_sibcall_template (rtx *, unsigned int); extern const char *rs6000_pltseq_template (rtx *, int); - -/* Whether we can reverse the sense of an ordered (UNLT, UNLE, UNGT, UNGE, - UNEQ, or LTGT) comparison. If we are doing floating point conditional moves - on power9 and above, we cannot convert an ordered comparison to unordered, - since the instructions (XSCMP{EQ,GT,GE}DP) that are used for conditional - moves can trap if an argument is a signalling NaN. However for normal jumps - we can reverse a comparison since we only use unordered compare instructions - which do not trap on signalling NaNs. */ - -enum class rev_cond_ordered { - ordered_ok, - no_ordered -}; - extern enum rtx_code rs6000_reverse_condition (machine_mode, - enum rtx_code, - enum rev_cond_ordered); + enum rtx_code); extern rtx rs6000_emit_eqne (machine_mode, rtx, rtx, rtx); extern rtx rs6000_emit_fp_cror (rtx_code, machine_mode, rtx); extern void rs6000_emit_sCOND (machine_mode, rtx[]); diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc index 86660fc534ff..11dfde7f288b 100644 --- a/gcc/config/rs6000/rs6000.cc +++ b/gcc/config/rs6000/rs6000.cc @@ -9358,8 +9358,7 @@ rs6000_debug_legitimize_address (rtx x, rtx oldx, machine_mode mode) start_sequence (); ret = rs6000_legitimize_address (x, oldx, mode); - insns = get_insns (); - end_sequence (); + insns = end_sequence (); if (ret != x) { @@ -15367,28 +15366,17 @@ rs6000_print_patchable_function_entry (FILE *file, } enum rtx_code -rs6000_reverse_condition (machine_mode mode, - enum rtx_code code, - enum rev_cond_ordered ordered_cmp_ok) +rs6000_reverse_condition (machine_mode mode, enum rtx_code code) { /* Reversal of FP compares takes care -- an ordered compare - becomes an unordered compare and vice versa. - - However, this is not safe for ordered comparisons (i.e. for isgreater, - etc.) starting with the power9 because ifcvt.cc will want to create a fp - cmove, and the x{s,v}cmp{eq,gt,ge}{dp,qp} instructions will trap if one of - the arguments is a signalling NaN. */ - + becomes an unordered compare and vice versa. */ if (mode == CCFPmode - && (code == UNLT || code == UNLE || code == UNGT || code == UNGE + && (!flag_finite_math_only + || code == UNLT || code == UNLE || code == UNGT || code == UNGE || code == UNEQ || code == LTGT)) -{ - return (ordered_cmp_ok == rev_cond_ordered::no_ordered - ? UNKNOWN - : reverse_condition_maybe_unordered (code)); -} - - return reverse_condition (code); +return reverse_condition_maybe_unordered (code); + else +return reverse_condition (code); }
[gcc(refs/vendors/redhat/heads/gcc-15-branch)] Merge commit 'r15-9719-g7e580225e57086e335a16f9258d0401a21e468ef' into redhat/gcc-15-branch
https://gcc.gnu.org/g:b9def1721b12cae307c1a1ebc49030fce6531dfa commit b9def1721b12cae307c1a1ebc49030fce6531dfa Merge: ac84ab706662 7e580225e570 Author: Jakub Jelinek Date: Wed May 21 14:40:58 2025 +0200 Merge commit 'r15-9719-g7e580225e57086e335a16f9258d0401a21e468ef' into redhat/gcc-15-branch Diff: contrib/ChangeLog | 7 + gcc/ChangeLog | 229 + gcc/DATESTAMP | 2 +- gcc/ada/ChangeLog |18 + gcc/ada/Makefile.rtl | 1 + gcc/ada/exp_ch3.adb|30 +- gcc/ada/init.c | 2 +- gcc/ada/libgnat/s-dorepr__freebsd.adb | 172 + gcc/c-family/ChangeLog | 8 + gcc/c-family/c-opts.cc |32 +- gcc/c/ChangeLog| 8 + gcc/c/c-typeck.cc |20 +- gcc/cgraph.cc | 2 +- gcc/cgraph.h | 2 +- gcc/cgraphclones.cc|14 +- gcc/config/aarch64/aarch64.cc |66 +- gcc/config/alpha/alpha.cc |23 +- gcc/config/avr/avr.md | 4 + gcc/config/i386/x86-tune.def | 4 +- gcc/config/s390/s390.cc|13 +- gcc/cp/ChangeLog | 130 + gcc/cp/class.cc|56 +- gcc/cp/cp-tree.h | 8 +- gcc/cp/decl2.cc|14 +- gcc/cp/module.cc |83 +- gcc/cp/name-lookup.cc |43 +- gcc/cp/optimize.cc | 4 +- gcc/cp/pt.cc |33 +- gcc/cp/rtti.cc | 1 + gcc/doc/gm2.texi | 2 +- gcc/doc/invoke.texi|87 + gcc/dwarf2out.cc | 3 +- gcc/fortran/ChangeLog | 118 + gcc/fortran/check.cc |42 +- gcc/fortran/dependency.cc | 6 +- gcc/fortran/dump-parse-tree.cc | 7 +- gcc/fortran/expr.cc| 1 + gcc/fortran/gfortran.h | 3 + gcc/fortran/interface.cc | 135 +- gcc/fortran/match.cc | 3 +- gcc/fortran/primary.cc |13 +- gcc/fortran/resolve.cc | 7 +- gcc/fortran/trans-decl.cc | 6 +- gcc/fortran/trans-expr.cc |10 +- gcc/fortran/trans-intrinsic.cc |51 +- gcc/fortran/trans-openmp.cc|20 + gcc/gimple-fold.cc | 6 +- gcc/ipa-cp.cc |12 +- gcc/ipa-sra.cc | 2 +- gcc/m2/ChangeLog |57 + gcc/m2/gm2-compiler/M2Check.mod|21 +- gcc/m2/gm2-compiler/M2GenGCC.mod | 3 - gcc/m2/gm2-compiler/M2Quads.mod|10 +- gcc/m2/gm2-compiler/M2Range.mod|43 +- gcc/m2/gm2-compiler/PCSymBuild.mod |13 +- gcc/m2/gm2-compiler/SymbolTable.def|16 + gcc/m2/gm2-compiler/SymbolTable.mod| 138 +- gcc/m2/gm2-libs/FormatStrings.mod | 4 +- gcc/po/ChangeLog |18 + gcc/po/be.po | 9391 +++ gcc/po/da.po | 9447 +++ gcc/po/de.po | 9403 +++ gcc/po/el.po | 9377 +++ gcc/po/es.po | 9447 +++ gcc/po/fi.po | 9403 +++ gcc/po/fr.po | 9407 +++ gcc/po/hr.po | 10325 - gcc/po/id.po | 9428 +++ gcc/po/ja.po | 9399 +++ gcc/po/ka.po | 9355 +++ gcc/po/nl.po | 9384 +++ gcc/po/ru.po | 9439 +++---
[gcc/redhat/heads/gcc-15-branch] (133 commits) Merge commit 'r15-9719-g7e580225e57086e335a16f9258d0401a21e
The branch 'redhat/heads/gcc-15-branch' was updated to point to: b9def1721b12... Merge commit 'r15-9719-g7e580225e57086e335a16f9258d0401a21e It previously pointed to: ac84ab706662... Merge commit 'r15-9587-ga36dd9ee5bb1d2f2f19b8d935db29468a35 Diff: Summary of changes (added commits): --- b9def17... Merge commit 'r15-9719-g7e580225e57086e335a16f9258d0401a21e 7e58022... Daily bump. (*) 6683c72... Fortran: fix passing of inquiry ref of complex array to TRA (*) c1db46f... tree-sra: Do not create stores into const aggregates (PR111 (*) 76d16fb... ipa: Dump cgraph_node UID instead of order into ipa-clones (*) 911cfea... libstdc++: Fix incorrect links to archived SGI STL docs (*) 2d1244a... c++/modules: Fix ICE on merge of instantiation with partial (*) 3ba1b0a... c++/modules: Always mark tinfo vars as TREE_ADDRESSABLE [PR (*) fedf81e... Daily bump. (*) 06a10db... libstdc++: Fix some Clang -Wsystem-headers warnings in for C++17 [PR1201 (*) d06a800... libstdc++: Restore std::scoped_lock for non-gthreads target (*) c9e3181... c+: -Wabi false positive [PR120012] (*) 74f41d4... Update cpplib es.po (*) c1d4d81... Update gcc sv.po (*) 981abdf... libstdc++: Update C++23 status table (*) 0b76b58... libstdc++: Fix constraint recursion in std::expected's oper (*) 0ff3b31... libstdc++: Fix availability of std::erase_if(std::flat_foo) (*) a341d96... libstdc++: Suppress GDB output from new 'skip' commands [PR (*) fc135d4... libstdc++: Update rows in C++17 status table (*) 74dbb19... Daily bump. (*) 44cd55a... tree-optimization/120211 - constrain LOOP_VINFO_EARLY_BREAK (*) 47e8302... ipa/120146 - deal with vanished varpool nodes in IPA PTA (*) 94d10c0... tree-optimization/120143 - ICE with failed early break stor (*) 4017b37... tree-optimization/120089 - force all PHIs live for early-br (*) 856c493... tree-optimization/120043 - bogus conditional store eliminat (*) 7b38bab... Fix PR 119928, formal arguments used to wrongly inferred fo (*) bdcef06... Daily bump. (*) de01448... testsuite: Fix pr119131-1.c for targets which emit a psabi (*) 1c1847f... Daily bump. (*) c60183d... Fortran: parsing issue with DO CONCURRENT;ENDDO on same lin (*) 01324ff... Fortran: array subreferences and components of derived type (*) 6683f2c... Fix wrong optimization of complex boolean expression (*) 89ca647... Daily bump. (*) 7317c72... fortran: Add testcases for PR120152, PR120153 and PR120158 (*) d4fd651... libfortran: Fix up maxval/maxloc for UNSIGNED [PR120158] (*) 0ca51bd... libfortran: Add 5 missing UNSIGNED symbols [PR120153] (*) 9d19251... libfortran: Readd 15 accidentally removed libgfortran symbo (*) bfcb5da... libcpp: Further fixes for incorrect line numbers in large f (*) f950bdb... Daily bump. (*) d0e6d79... libstdc++: Add missing export for std::is_layout_compatible (*) 809c5d7... c++: C++17/20 class layout divergence [PR120012] (*) fa55a6c... c++: let plain -Wabi warn about future changes (*) 77780c3... ipa: Do not emit info about temporary clones to ipa-clones (*) 99e2f11... Document option -fdump-ipa-clones (*) e52f71b... libstdc++: Fix width computation for the chrono formatting (*) d2d8318... Daily bump. (*) b6f68c0... Allow IPA_CP to handle UNDEFINED as VARYING. (*) 0d46cee... libstdc++: Add missing feature-test macro in (*) ebc9606... libstdc++: Remove unnecessary dg-prune-output from tests (*) f2a69ee... libstdc++: fix possible undefined atomic lock-free type ali (*) a14d65f... gimple-fold: Fix fold_truth_andor_for_ifcombine [PR120074] (*) 941a1b4... libgomp: Update SVE test (*) 24a0279... Daily bump. (*) 169ad48... ipa/120006 - wrong code with IPA PTA (*) b36014e... ipa/119973 - IPA PTA issue with global initializers (*) 37c3124... Ada: Fix assertion failure on Finalizable aspect for tagged (*) 055434c... Daily bump. (*) 2b114d7... Daily bump. (*) 768c8ae... Fortran: fix procedure pointer handling with -fcheck=pointe (*) 2f0338c... Daily bump. (*) 14c2a12... c: Fix up RAW_DATA_CST handling in check_constexpr_init [PR (*) d32ece4... libsanitizer: Fix build with glibc 2.42 (*) e9eaf25... Daily bump. (*) 90484ea... c++/modules: Ensure deduction guides for imported types are (*) aa49bb9... c++/modules: Fix imported CNTTPs being considered non-const (*) 3042862... c++/modules: Catch exposures of TU-local values through inl (*) 25db596... Daily bump. (*) d3e4290... Update gcc .po files (*) f685d31... Always reflect lower bits from mask in subranges. (*) c9d4d3b... testsuite: Force -mcmodel=small for gcc.target/aarch64/pr11 (*) 670250c... c++: UNBOUND_CLASS_TEMPLATE context substitution [PR119981] (*) 01ebce5... Fix GNAT build failure for x86/FreeBSD (*) ffc40e9... AVR: fxload__libgcc: Use REG_ prefix. (*) e268cb2... AVR: target/119989 - Add missing clobbers to xload__l (*) 17695fe... Fix compilation failure on FreeBSD (*) 9483020... libs
[gcc r16-806] RISC-V: Add test for vec_duplicate + vand.vv combine case 1 with GR2VR cost 0, 1 and 2
https://gcc.gnu.org/g:4f02bfb62da3a0e32a86cc2ac1171b11da026e7c commit r16-806-g4f02bfb62da3a0e32a86cc2ac1171b11da026e7c Author: Pan Li Date: Tue May 20 22:30:04 2025 +0800 RISC-V: Add test for vec_duplicate + vand.vv combine case 1 with GR2VR cost 0, 1 and 2 Add asm dump check test for vec_duplicate + vand.vv combine to vand.vx, with the GR2VR cost is 0, 1 and 2. The below test suites are passed for this patch. * The rv64gcv fully regression test. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i16.c: Add asm check for vand.vx combine. * gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i32.c: Ditto. * gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i64.c: Ditto. * gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i8.c: Ditto. * gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u16.c: Ditto. * gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u32.c: Ditto. * gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u64.c: Ditto. * gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u8.c: Ditto. * gcc.target/riscv/rvv/autovec/vx_vf/vx-5-i16.c: Ditto. * gcc.target/riscv/rvv/autovec/vx_vf/vx-5-i32.c: Ditto. * gcc.target/riscv/rvv/autovec/vx_vf/vx-5-i64.c: Ditto. * gcc.target/riscv/rvv/autovec/vx_vf/vx-5-i8.c: Ditto. * gcc.target/riscv/rvv/autovec/vx_vf/vx-5-u16.c: Ditto. * gcc.target/riscv/rvv/autovec/vx_vf/vx-5-u32.c: Ditto. * gcc.target/riscv/rvv/autovec/vx_vf/vx-5-u64.c: Ditto. * gcc.target/riscv/rvv/autovec/vx_vf/vx-5-u8.c: Ditto. * gcc.target/riscv/rvv/autovec/vx_vf/vx-6-i16.c: Ditto. * gcc.target/riscv/rvv/autovec/vx_vf/vx-6-i32.c: Ditto. * gcc.target/riscv/rvv/autovec/vx_vf/vx-6-i64.c: Ditto. * gcc.target/riscv/rvv/autovec/vx_vf/vx-6-i8.c: Ditto. * gcc.target/riscv/rvv/autovec/vx_vf/vx-6-u16.c: Ditto. * gcc.target/riscv/rvv/autovec/vx_vf/vx-6-u32.c: Ditto. * gcc.target/riscv/rvv/autovec/vx_vf/vx-6-u64.c: Ditto. * gcc.target/riscv/rvv/autovec/vx_vf/vx-6-u8.c: Ditto. Signed-off-by: Pan Li Diff: --- gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i16.c | 4 +++- gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i32.c | 4 +++- gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i64.c | 4 +++- gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i8.c | 4 +++- gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u16.c | 4 +++- gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u32.c | 4 +++- gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u64.c | 4 +++- gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u8.c | 4 +++- gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-i16.c | 4 +++- gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-i32.c | 4 +++- gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-i64.c | 4 +++- gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-i8.c | 4 +++- gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-u16.c | 4 +++- gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-u32.c | 4 +++- gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-u64.c | 4 +++- gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-u8.c | 4 +++- gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-i16.c | 2 ++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-i32.c | 2 ++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-i64.c | 2 ++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-i8.c | 6 -- gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-u16.c | 2 ++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-u32.c | 2 ++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-u64.c | 2 ++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-u8.c | 2 ++ 24 files changed, 66 insertions(+), 18 deletions(-) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i16.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i16.c index 6f59b07d236c..62fd4e39c018 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i16.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i16.c @@ -7,8 +7,10 @@ DEF_VX_BINARY_CASE_1_WRAP(T, +, add, VX_BINARY_BODY_X16) DEF_VX_BINARY_CASE_1_WRAP(T, -, sub, VX_BINARY_BODY_X16) -DEF_VX_BINARY_REVERSE_CASE_1_WRAP(T, -, rsub, VX_BINARY_REVERSE_BODY_X16); +DEF_VX_BINARY_REVERSE_CASE_1_WRAP(T, -, rsub, VX_BINARY_REVERSE_BODY_X16) +DEF_VX_BINARY_CASE_1_WRAP(T, &, and, VX_BINARY_BODY_X16) /* { dg-final { scan-assembler {vadd.vx} } } */ /* { dg-final { scan-assembler {vsub.vx} } } */ /* { dg-final { scan-assembler {vrsub.vx} } } */ +/* { dg-final { scan-assembler {vand.vx} } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i32.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i32.c index 69b2227d889a..d047458b81d0 100644 --- a/gcc/test
[gcc r16-804] RISC-V: RISC-V: Combine vec_duplicate + vand.vv to vand.vx on GR2VR cost
https://gcc.gnu.org/g:ad041944f1060be0c9280421a065037aa14e169e commit r16-804-gad041944f1060be0c9280421a065037aa14e169e Author: Pan Li Date: Tue May 20 15:00:15 2025 +0800 RISC-V: RISC-V: Combine vec_duplicate + vand.vv to vand.vx on GR2VR cost This patch would like to combine the vec_duplicate + vand.vv to the vand.vx. From example as below code. The related pattern will depend on the cost of vec_duplicate from GR2VR. Then the late-combine will take action if the cost of GR2VR is zero, and reject the combination if the GR2VR cost is greater than zero. Assume we have example code like below, GR2VR cost is 0. #define DEF_VX_BINARY(T, OP)\ void\ test_vx_binary (T * restrict out, T * restrict in, T x, unsigned n) \ { \ for (unsigned i = 0; i < n; i++) \ out[i] = in[i] OP x;\ } DEF_VX_BINARY(int32_t, &) Before this patch: 10 │ test_vx_binary_and_int32_t_case_0: 11 │ beq a3,zero,.L8 12 │ vsetvli a5,zero,e32,m1,ta,ma 13 │ vmv.v.x v2,a2 14 │ sllia3,a3,32 15 │ srlia3,a3,32 16 │ .L3: 17 │ vsetvli a5,a3,e32,m1,ta,ma 18 │ vle32.v v1,0(a1) 19 │ sllia4,a5,2 20 │ sub a3,a3,a5 21 │ add a1,a1,a4 22 │ vand.vv v1,v1,v2 23 │ vse32.v v1,0(a0) 24 │ add a0,a0,a4 25 │ bne a3,zero,.L3 After this patch: 10 │ test_vx_binary_and_int32_t_case_0: 11 │ beq a3,zero,.L8 12 │ sllia3,a3,32 13 │ srlia3,a3,32 14 │ .L3: 15 │ vsetvli a5,a3,e32,m1,ta,ma 16 │ vle32.v v1,0(a1) 17 │ sllia4,a5,2 18 │ sub a3,a3,a5 19 │ add a1,a1,a4 20 │ vand.vx v1,v1,a2 21 │ vse32.v v1,0(a0) 22 │ add a0,a0,a4 23 │ bne a3,zero,.L3 The below test suites are passed for this patch. * The rv64gcv fully regression test. gcc/ChangeLog: * config/riscv/riscv-v.cc (expand_vx_binary_vec_dup_vec): Add new case for rtx code AND. (expand_vx_binary_vec_vec_dup): Ditto. * config/riscv/riscv.cc (riscv_rtx_costs): Ditto. * config/riscv/vector-iterators.md: Add new op and to no_shift_vx_ops. Signed-off-by: Pan Li Diff: --- gcc/config/riscv/riscv-v.cc | 2 ++ gcc/config/riscv/riscv.cc| 1 + gcc/config/riscv/vector-iterators.md | 2 +- 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc index 1b5ef51886e3..e406e7a7f590 100644 --- a/gcc/config/riscv/riscv-v.cc +++ b/gcc/config/riscv/riscv-v.cc @@ -5511,6 +5511,7 @@ expand_vx_binary_vec_dup_vec (rtx op_0, rtx op_1, rtx op_2, switch (code) { case PLUS: +case AND: icode = code_for_pred_scalar (code, mode); break; case MINUS: @@ -5537,6 +5538,7 @@ expand_vx_binary_vec_vec_dup (rtx op_0, rtx op_1, rtx op_2, switch (code) { case MINUS: +case AND: icode = code_for_pred_scalar (code, mode); break; default: diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index 1a88e96d8c6f..03dcc347fb87 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -3916,6 +3916,7 @@ riscv_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno ATTRIBUTE_UN break; case PLUS: case MINUS: + case AND: { rtx op_0 = XEXP (x, 0); rtx op_1 = XEXP (x, 1); diff --git a/gcc/config/riscv/vector-iterators.md b/gcc/config/riscv/vector-iterators.md index 23cb940310f2..026be6f65d39 100644 --- a/gcc/config/riscv/vector-iterators.md +++ b/gcc/config/riscv/vector-iterators.md @@ -4042,7 +4042,7 @@ ]) (define_code_iterator any_int_binop_no_shift_vx [ - plus minus + plus minus and ]) (define_code_iterator any_int_unop [neg not])
[gcc r16-805] RISC-V: Add test for vec_duplicate + vand.vv combine case 0 with GR2VR cost 0, 2 and 15
https://gcc.gnu.org/g:b7b914622e8da0d5f10027d9a4db418f21ed2ddc commit r16-805-gb7b914622e8da0d5f10027d9a4db418f21ed2ddc Author: Pan Li Date: Tue May 20 15:06:34 2025 +0800 RISC-V: Add test for vec_duplicate + vand.vv combine case 0 with GR2VR cost 0, 2 and 15 Add asm dump check test for vec_duplicate + vand.vv combine to vand.vx, with the GR2VR cost is 0, 2 and 15. The below test suites are passed for this patch. * The rv64gcv fully regression test. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/vx_vf/vx-1-i16.c: Add test cases for vand vx combine case 0 on GR2VR cost. * gcc.target/riscv/rvv/autovec/vx_vf/vx-1-i32.c: Ditto * gcc.target/riscv/rvv/autovec/vx_vf/vx-1-i64.c: Ditto * gcc.target/riscv/rvv/autovec/vx_vf/vx-1-i8.c: Ditto * gcc.target/riscv/rvv/autovec/vx_vf/vx-1-u16.c: Ditto * gcc.target/riscv/rvv/autovec/vx_vf/vx-1-u32.c: Ditto * gcc.target/riscv/rvv/autovec/vx_vf/vx-1-u64.c: Ditto * gcc.target/riscv/rvv/autovec/vx_vf/vx-1-u8.c: Ditto * gcc.target/riscv/rvv/autovec/vx_vf/vx-2-i16.c: Ditto * gcc.target/riscv/rvv/autovec/vx_vf/vx-2-i32.c: Ditto * gcc.target/riscv/rvv/autovec/vx_vf/vx-2-i64.c: Ditto * gcc.target/riscv/rvv/autovec/vx_vf/vx-2-i8.c: Ditto * gcc.target/riscv/rvv/autovec/vx_vf/vx-2-u16.c: Ditto * gcc.target/riscv/rvv/autovec/vx_vf/vx-2-u32.c: Ditto * gcc.target/riscv/rvv/autovec/vx_vf/vx-2-u64.c: Ditto * gcc.target/riscv/rvv/autovec/vx_vf/vx-2-u8.c: Ditto * gcc.target/riscv/rvv/autovec/vx_vf/vx-3-i16.c: Ditto * gcc.target/riscv/rvv/autovec/vx_vf/vx-3-i32.c: Ditto * gcc.target/riscv/rvv/autovec/vx_vf/vx-3-i64.c: Ditto * gcc.target/riscv/rvv/autovec/vx_vf/vx-3-i8.c: Ditto * gcc.target/riscv/rvv/autovec/vx_vf/vx-3-u16.c: Ditto * gcc.target/riscv/rvv/autovec/vx_vf/vx-3-u32.c: Ditto * gcc.target/riscv/rvv/autovec/vx_vf/vx-3-u64.c: Ditto * gcc.target/riscv/rvv/autovec/vx_vf/vx-3-u8.c: Ditto * gcc.target/riscv/rvv/autovec/vx_vf/vx_binary_data.h: Add test data for vand.vx run test. * gcc.target/riscv/rvv/autovec/vx_vf/vx_vand-run-1-i16.c: New test. * gcc.target/riscv/rvv/autovec/vx_vf/vx_vand-run-1-i32.c: New test. * gcc.target/riscv/rvv/autovec/vx_vf/vx_vand-run-1-i64.c: New test. * gcc.target/riscv/rvv/autovec/vx_vf/vx_vand-run-1-i8.c: New test. * gcc.target/riscv/rvv/autovec/vx_vf/vx_vand-run-1-u16.c: New test. * gcc.target/riscv/rvv/autovec/vx_vf/vx_vand-run-1-u32.c: New test. * gcc.target/riscv/rvv/autovec/vx_vf/vx_vand-run-1-u64.c: New test. * gcc.target/riscv/rvv/autovec/vx_vf/vx_vand-run-1-u8.c: New test. Signed-off-by: Pan Li Diff: --- .../gcc.target/riscv/rvv/autovec/vx_vf/vx-1-i16.c | 2 + .../gcc.target/riscv/rvv/autovec/vx_vf/vx-1-i32.c | 2 + .../gcc.target/riscv/rvv/autovec/vx_vf/vx-1-i64.c | 2 + .../gcc.target/riscv/rvv/autovec/vx_vf/vx-1-i8.c | 2 + .../gcc.target/riscv/rvv/autovec/vx_vf/vx-1-u16.c | 2 + .../gcc.target/riscv/rvv/autovec/vx_vf/vx-1-u32.c | 2 + .../gcc.target/riscv/rvv/autovec/vx_vf/vx-1-u64.c | 2 + .../gcc.target/riscv/rvv/autovec/vx_vf/vx-1-u8.c | 2 + .../gcc.target/riscv/rvv/autovec/vx_vf/vx-2-i16.c | 4 +- .../gcc.target/riscv/rvv/autovec/vx_vf/vx-2-i32.c | 4 +- .../gcc.target/riscv/rvv/autovec/vx_vf/vx-2-i64.c | 4 +- .../gcc.target/riscv/rvv/autovec/vx_vf/vx-2-i8.c | 4 +- .../gcc.target/riscv/rvv/autovec/vx_vf/vx-2-u16.c | 4 +- .../gcc.target/riscv/rvv/autovec/vx_vf/vx-2-u32.c | 4 +- .../gcc.target/riscv/rvv/autovec/vx_vf/vx-2-u64.c | 4 +- .../gcc.target/riscv/rvv/autovec/vx_vf/vx-2-u8.c | 4 +- .../gcc.target/riscv/rvv/autovec/vx_vf/vx-3-i16.c | 4 +- .../gcc.target/riscv/rvv/autovec/vx_vf/vx-3-i32.c | 4 +- .../gcc.target/riscv/rvv/autovec/vx_vf/vx-3-i64.c | 4 +- .../gcc.target/riscv/rvv/autovec/vx_vf/vx-3-i8.c | 4 +- .../gcc.target/riscv/rvv/autovec/vx_vf/vx-3-u16.c | 4 +- .../gcc.target/riscv/rvv/autovec/vx_vf/vx-3-u32.c | 4 +- .../gcc.target/riscv/rvv/autovec/vx_vf/vx-3-u64.c | 4 +- .../gcc.target/riscv/rvv/autovec/vx_vf/vx-3-u8.c | 4 +- .../riscv/rvv/autovec/vx_vf/vx_binary_data.h | 392 + .../riscv/rvv/autovec/vx_vf/vx_vand-run-1-i16.c| 15 + .../riscv/rvv/autovec/vx_vf/vx_vand-run-1-i32.c| 15 + .../riscv/rvv/autovec/vx_vf/vx_vand-run-1-i64.c| 15 + .../riscv/rvv/autovec/vx_vf/vx_vand-run-1-i8.c | 15 + .../riscv/rvv/autovec/vx_vf/vx_vand-run-1-u16.c| 15 + .../riscv/rvv/autovec/vx_vf/vx_vand-run-1-u32.c| 15 + .../riscv/rvv/autovec/vx_vf/vx_vand-run-1-u64.c| 15 + .../riscv/rvv/autovec/vx_vf/vx_vand-run-1-u8.c |