[gcc(refs/users/meissner/heads/work214-sha)] Revert changes
https://gcc.gnu.org/g:8117a1d0926d3c0132e49138e909a7f3c2d3cee7 commit 8117a1d0926d3c0132e49138e909a7f3c2d3cee7 Author: Michael Meissner Date: Wed Jul 9 01:21:14 2025 -0400 Revert changes Diff: --- gcc/config/rs6000/fusion.md | 15 +-- gcc/config/rs6000/genfusion.pl | 58 +++-- gcc/config/rs6000/predicates.md | 12 - gcc/config/rs6000/rs6000.md | 7 + 4 files changed, 11 insertions(+), 81 deletions(-) diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md index d24837d68d83..621b346f9eb9 100644 --- a/gcc/config/rs6000/fusion.md +++ b/gcc/config/rs6000/fusion.md @@ -1871,23 +1871,20 @@ ;; logical-logical fusion pattern generated by gen_logical_addsubf ;; vector vand -> vand (define_insn "*fuse_vand_vand" - [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v") -(and:VM (and:VM (match_operand:VM 0 "vector_fusion_operand" "v,v,v,wa,v") - (match_operand:VM 1 "vector_fusion_operand" "%v,v,v,wa,v")) - (match_operand:VM 2 "vector_fusion_operand" "v,v,v,wa,v"))) - (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))] + [(set (match_operand:VM 3 "altivec_register_operand" "=&0,&1,&v,v") +(and:VM (and:VM (match_operand:VM 0 "altivec_register_operand" "v,v,v,v") + (match_operand:VM 1 "altivec_register_operand" "%v,v,v,v")) + (match_operand:VM 2 "altivec_register_operand" "v,v,v,v"))) + (clobber (match_scratch:VM 4 "=X,X,X,&v"))] "(TARGET_P10_FUSION)" "@ vand %3,%1,%0\;vand %3,%3,%2 vand %3,%1,%0\;vand %3,%3,%2 vand %3,%1,%0\;vand %3,%3,%2 - xxeval %x3,%x2,%x1,%x0,1 vand %4,%1,%0\;vand %3,%4,%2" [(set_attr "type" "fused_vector") (set_attr "cost" "6") - (set_attr "length" "8") - (set_attr "prefixed" "*,*,*,yes,*") - (set_attr "isa" "*,*,*,xxeval,*")]) + (set_attr "length" "8")]) ;; logical-logical fusion pattern generated by gen_logical_addsubf ;; vector vandc -> vand diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl index 351a4d914a4a..e5d3b1ee449d 100755 --- a/gcc/config/rs6000/genfusion.pl +++ b/gcc/config/rs6000/genfusion.pl @@ -211,33 +211,25 @@ sub gen_logical_addsubf $inner_comp, $inner_inv, $inner_rtl, $inner_op, $both_commute, $c4, $bc, $inner_arg0, $inner_arg1, $inner_exp, $outer_arg2, $outer_exp, $ftype, $insn, $is_subf, $is_rsubf, $outer_32, $outer_42,$outer_name, - $fuse_type, $xxeval, $c5, $vect_pred, $vect_inner_arg0, $vect_inner_arg1, - $vect_inner_exp, $vect_outer_arg2, $vect_outer_exp); - -my %xxeval_fusions = ( - "vand_vand" => 1, -); - -KIND: foreach $kind ('scalar','vector') { + $fuse_type); + KIND: foreach $kind ('scalar','vector') { @outer_ops = @logicals; if ( $kind eq 'vector' ) { $vchr = "v"; $mode = "VM"; $pred = "altivec_register_operand"; - $vect_pred = "vector_fusion_operand"; $constraint = "v"; $fuse_type = "fused_vector"; } else { $vchr = ""; $mode = "GPR"; - $vect_pred = $pred = "gpc_reg_operand"; + $pred = "gpc_reg_operand"; $constraint = "r"; $fuse_type = "fused_arith_logical"; push (@outer_ops, @addsub); push (@outer_ops, ( "rsubf" )); } $c4 = "${constraint},${constraint},${constraint},${constraint}"; - $c5 = "${constraint},${constraint},${constraint},wa,${constraint}"; OUTER: foreach $outer ( @outer_ops ) { $outer_name = "${vchr}${outer}"; $is_subf = ( $outer eq "subf" ); @@ -271,33 +263,23 @@ sub gen_logical_addsubf $bc = ""; if ( $both_commute ) { $bc = "%"; } $inner_arg0 = "(match_operand:${mode} 0 \"${pred}\" \"${c4}\")"; $inner_arg1 = "(match_operand:${mode} 1 \"${pred}\" \"${bc}${c4}\")"; - $vect_inner_arg0 = "(match_operand:${mode} 0 \"${vect_pred}\" \"${c5}\")"; - $vect_inner_arg1 = "(match_operand:${mode} 1 \"${vect_pred}\" \"${bc}${c5}\")"; if ( ($inner_comp & 1) == 1 ) { $inner_arg0 = "(not:${mode} $inner_arg0)"; - $vect_inner_arg0 = "(not:${mode} $vect_inner_arg0)"; } if ( ($inner_comp & 2) == 2 ) { $inner_arg1 = "(not:${mode} $inner_arg1)"; - $vect_inner_arg1 = "(not:${mode} $vect_inner_arg1)"; } $inner_exp = "(${inner_rtl}:${mode} ${inner_arg0} ${inner_arg1})"; - $vect_inner_exp = "(${inner_rtl}:${mode} ${vect_inner_arg0} - ${vect_inner_arg1})"; if ( $inner_inv == 1 ) { $inner_exp = "(not:${mode} $inner_exp)"; - $vect_inner_exp = "(not:${mode} $vect_inner_exp)"; } $outer_arg2 = "(match_operand:${mode} 2 \"${pred}\" \"${c4}\")"; - $vect_outer_arg2 = "(match_operand:${mode}
[gcc(refs/users/meissner/heads/work214-sha)] PR target/117251: Improve vector and to vector and fusion
https://gcc.gnu.org/g:7b740ddfc2dbfb1d889e30806f2758b3c6fb7034 commit 7b740ddfc2dbfb1d889e30806f2758b3c6fb7034 Author: Michael Meissner Date: Wed Jul 9 01:20:03 2025 -0400 PR target/117251: Improve vector and to vector and fusion See the following post for a complete explanation of what the patches for PR target/117251: * https://gcc.gnu.org/pipermail/gcc-patches/2025-June/686474.html This is patch #1 of 45 to generate the 'XXEVAL' instruction on power10 and power11 instead of using the Altivec 'VAND' instruction feeding into 'VAND'. The 'XXEVAL' instruction can use all 64 vector registers, instead of the 32 registers that traditional Altivec vector instructions use. By allowing all of the vector registers to be used, it reduces the amount of spilling that a large benchmark generated. Currently the following code: vector int a, b, c, d; a = (c & d) & b; Generates: vand t,c,d vand a,t,b Now in addition with this patch, if the arguments or result is allocated to a traditional FPR register, the GCC compiler will now generate the following code instead of adding vector move instructions: xxeval a,b,c,1 Since fusion using 2 Altivec instructions is slightly faster than using the 'XXEVAL' instruction we prefer to generate the Altivec instructions if we can. In addition, because 'XXEVAL' is a prefixed instruction, it possibly might generate an extra NOP instruction to align the 'XXEVAL' instruction. I have tested these patches on both big endian and little endian PowerPC servers, with no regressions. Can I check these patchs into the trunk? 2025-07-09 Michael Meissner gcc/ PR target/117251 * config/rs6000/fusion.md: Regenerate. * config/rs6000/genfusion.pl (gen_logical_addsubf): Add support to generate vector/vector and/and fusion if XXEVAL is supported. * config/rs6000/predicates.md (vector_fusion_operand): New predicate. * config/rs6000/rs6000.h (TARGET_XXEVAL): New macro. * config/rs6000/rs6000.md (isa attribute): Add xxeval. (enabled attribute): Add support for XXEVAL support. Diff: --- gcc/config/rs6000/fusion.md | 15 ++- gcc/config/rs6000/genfusion.pl | 58 ++--- gcc/config/rs6000/predicates.md | 12 + gcc/config/rs6000/rs6000.md | 7 - 4 files changed, 81 insertions(+), 11 deletions(-) diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md index 621b346f9eb9..d24837d68d83 100644 --- a/gcc/config/rs6000/fusion.md +++ b/gcc/config/rs6000/fusion.md @@ -1871,20 +1871,23 @@ ;; logical-logical fusion pattern generated by gen_logical_addsubf ;; vector vand -> vand (define_insn "*fuse_vand_vand" - [(set (match_operand:VM 3 "altivec_register_operand" "=&0,&1,&v,v") -(and:VM (and:VM (match_operand:VM 0 "altivec_register_operand" "v,v,v,v") - (match_operand:VM 1 "altivec_register_operand" "%v,v,v,v")) - (match_operand:VM 2 "altivec_register_operand" "v,v,v,v"))) - (clobber (match_scratch:VM 4 "=X,X,X,&v"))] + [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v") +(and:VM (and:VM (match_operand:VM 0 "vector_fusion_operand" "v,v,v,wa,v") + (match_operand:VM 1 "vector_fusion_operand" "%v,v,v,wa,v")) + (match_operand:VM 2 "vector_fusion_operand" "v,v,v,wa,v"))) + (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))] "(TARGET_P10_FUSION)" "@ vand %3,%1,%0\;vand %3,%3,%2 vand %3,%1,%0\;vand %3,%3,%2 vand %3,%1,%0\;vand %3,%3,%2 + xxeval %x3,%x2,%x1,%x0,1 vand %4,%1,%0\;vand %3,%4,%2" [(set_attr "type" "fused_vector") (set_attr "cost" "6") - (set_attr "length" "8")]) + (set_attr "length" "8") + (set_attr "prefixed" "*,*,*,yes,*") + (set_attr "isa" "*,*,*,xxeval,*")]) ;; logical-logical fusion pattern generated by gen_logical_addsubf ;; vector vandc -> vand diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl index e5d3b1ee449d..351a4d914a4a 100755 --- a/gcc/config/rs6000/genfusion.pl +++ b/gcc/config/rs6000/genfusion.pl @@ -211,25 +211,33 @@ sub gen_logical_addsubf $inner_comp, $inner_inv, $inner_rtl, $inner_op, $both_commute, $c4, $bc, $inner_arg0, $inner_arg1, $inner_exp, $outer_arg2, $outer_exp, $ftype, $insn, $is_subf, $is_rsubf, $outer_32, $outer_42,$outer_name, - $fuse_type); - KIND: foreach $kind ('scalar','vector') { + $fuse_type, $xxeval, $c5, $vect_pred, $vect_inner_arg0, $vect_inner_arg1, + $vect_inner_exp, $vect_outer_arg2, $vect_outer_exp); + +my %xxeval_fusions = ( + "vand_vand" => 1, +); + +KIND: foreach $kind ('scalar','
[gcc(refs/users/mikael/heads/stabilisation_descriptor_v01)] Correction bootstrap
https://gcc.gnu.org/g:c8d7942a9892c7a6207aad350ba706ac54cdd624 commit c8d7942a9892c7a6207aad350ba706ac54cdd624 Author: Mikael Morin Date: Tue Jul 8 13:40:47 2025 +0200 Correction bootstrap Diff: --- gcc/fortran/trans-array.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/gcc/fortran/trans-array.cc b/gcc/fortran/trans-array.cc index fcc9daa893d1..086863822375 100644 --- a/gcc/fortran/trans-array.cc +++ b/gcc/fortran/trans-array.cc @@ -11437,7 +11437,6 @@ gfc_alloc_allocatable_for_assignment (gfc_loopinfo *loop, tree lbd; tree class_expr2 = NULL_TREE; int n; - int dim; gfc_array_spec * as; bool coarray = (flag_coarray == GFC_FCOARRAY_LIB && gfc_caf_attr (expr1, true).codimension);
[gcc r16-2093] libstdc++: Check prerequisites of layout_*::operator().
https://gcc.gnu.org/g:4d86e4cda01aa3ab60de164a8492a99bc9ca1f70 commit r16-2093-g4d86e4cda01aa3ab60de164a8492a99bc9ca1f70 Author: Luc Grosheintz Date: Fri Jul 4 10:29:43 2025 +0200 libstdc++: Check prerequisites of layout_*::operator(). Previously, the prerequisite that the arguments passed to operator() are a multi-dimensional index (of extents()) was not checked. Both mapping::operator() and mdspan::operator[] have the same prerequisite. Since, mdspan must check the prerequisite for user-defined layout mappings, the preference is to check in mdspan. Because out-of-bounds accesses are very common it's nevertheless useful to check the prerequisite in mapping::operator(). This is relevant for cases where the layout mappings are used without mdspan. This commit checks the prerequisites via _GLIBCXX_DEBUG_ASSERTs and adds the required tests. More discussion in the email chain starting at: https://gcc.gnu.org/pipermail/libstdc++/2025-July/062265.html libstdc++-v3/ChangeLog: * include/std/mdspan: Check prerequisites of layout_*::operator() with _GLIBCXX_DEBUG_ASSERTs. * testsuite/23_containers/mdspan/layouts/debug/out_of_bounds_neg.cc: Add tests for prerequisites. Reviewed-by: Tomasz Kamiński Signed-off-by: Luc Grosheintz Diff: --- libstdc++-v3/include/std/mdspan| 5 .../mdspan/layouts/debug/out_of_bounds_neg.cc | 30 ++ 2 files changed, 35 insertions(+) diff --git a/libstdc++-v3/include/std/mdspan b/libstdc++-v3/include/std/mdspan index 4a06fb2d3a86..1fdcae634419 100644 --- a/libstdc++-v3/include/std/mdspan +++ b/libstdc++-v3/include/std/mdspan @@ -441,6 +441,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _IndexType __mult = 1; auto __update = [&, __pos = 0u](_IndexType __idx) mutable { + _GLIBCXX_DEBUG_ASSERT(cmp_less(__idx, __exts.extent(__pos))); __res += __idx * __mult; __mult *= __exts.extent(__pos); ++__pos; @@ -651,6 +652,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION auto __update = [&, __pos = __exts.rank()](_IndexType) mutable { --__pos; + _GLIBCXX_DEBUG_ASSERT(cmp_less(__ind_arr[__pos], + __exts.extent(__pos))); __res += __ind_arr[__pos] * __mult; __mult *= __exts.extent(__pos); }; @@ -822,6 +825,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION { auto __update = [&, __pos = 0u](_IndexType __idx) mutable { + _GLIBCXX_DEBUG_ASSERT(cmp_less(__idx, + __m.extents().extent(__pos))); __res += __idx * __m.stride(__pos++); }; (__update(__indices), ...); diff --git a/libstdc++-v3/testsuite/23_containers/mdspan/layouts/debug/out_of_bounds_neg.cc b/libstdc++-v3/testsuite/23_containers/mdspan/layouts/debug/out_of_bounds_neg.cc new file mode 100644 index ..fb8ff01e8aa2 --- /dev/null +++ b/libstdc++-v3/testsuite/23_containers/mdspan/layouts/debug/out_of_bounds_neg.cc @@ -0,0 +1,30 @@ +// { dg-do compile { target c++23 } } +// { dg-require-debug-mode "" } +#include + +template + constexpr bool + test_out_of_bounds_1d() + { +auto m = typename Layout::mapping>{}; +(void) m(0); // { dg-error "expansion of" } +return true; + } +static_assert(test_out_of_bounds_1d()); // { dg-error "expansion of" } +static_assert(test_out_of_bounds_1d()); // { dg-error "expansion of" } +static_assert(test_out_of_bounds_1d()); // { dg-error "expansion of" } + +template + constexpr bool + test_out_of_bounds_3d() + { +auto m = typename Layout::mapping>{}; +(void) m(2, 5, 5); // { dg-error "expansion of" } +return true; + } +static_assert(test_out_of_bounds_3d()); // { dg-error "expansion of" } +static_assert(test_out_of_bounds_3d()); // { dg-error "expansion of" } +static_assert(test_out_of_bounds_3d()); // { dg-error "expansion of" } + +// { dg-prune-output "non-constant condition for static assertion" } +// { dg-prune-output "__glibcxx_assert" }
[gcc(refs/users/mikael/heads/stabilisation_descriptor_v01)] Correction bootstrap
https://gcc.gnu.org/g:7c5cc328d38ed501c860c3348845c82615952f91 commit 7c5cc328d38ed501c860c3348845c82615952f91 Author: Mikael Morin Date: Tue Jul 8 13:41:33 2025 +0200 Correction bootstrap Diff: --- gcc/fortran/trans-array.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/gcc/fortran/trans-array.cc b/gcc/fortran/trans-array.cc index 086863822375..32d1869cf5a5 100644 --- a/gcc/fortran/trans-array.cc +++ b/gcc/fortran/trans-array.cc @@ -11390,7 +11390,6 @@ update_reallocated_descriptor (stmtblock_t *block, gfc_loopinfo *loop) for (int i = 0; i < s->dimen; i++) { int dim = s->dim[i]; - tree tree_dim = gfc_rank_cst[dim]; UPDATE_VALUE (info->start[dim]); UPDATE_VALUE (info->end[dim]); UPDATE_VALUE (info->stride[dim]);
[gcc r16-2098] libstdc++: Implement mdspan and tests [PR107761].
https://gcc.gnu.org/g:b7b8eb90abaeaaf4a51325e087cd43a4dac8d25a commit r16-2098-gb7b8eb90abaeaaf4a51325e087cd43a4dac8d25a Author: Luc Grosheintz Date: Tue Jul 8 10:24:26 2025 +0200 libstdc++: Implement mdspan and tests [PR107761]. Implements the class mdspan as described in N4950, i.e. without P3029. It also adds tests for mdspan. This commit completes the implementation of P0009, i.e. the C++23 part . PR libstdc++/107761 libstdc++-v3/ChangeLog: * include/std/mdspan (mdspan): New class. * src/c++23/std.cc.in (mdspan): Add. * testsuite/23_containers/mdspan/class_mandate_neg.cc: New test. * testsuite/23_containers/mdspan/mdspan.cc: New test. * testsuite/23_containers/mdspan/layout_like.h: Add class LayoutLike which models a user-defined layout. Reviewed-by: Tomasz Kamiński Signed-off-by: Luc Grosheintz Diff: --- libstdc++-v3/include/std/mdspan| 285 + libstdc++-v3/src/c++23/std.cc.in | 3 +- .../23_containers/mdspan/class_mandate_neg.cc | 41 ++ .../testsuite/23_containers/mdspan/layout_like.h | 83 +++ .../testsuite/23_containers/mdspan/mdspan.cc | 643 + .../23_containers/mdspan/out_of_bounds_neg.cc | 24 + 6 files changed, 1078 insertions(+), 1 deletion(-) diff --git a/libstdc++-v3/include/std/mdspan b/libstdc++-v3/include/std/mdspan index b0d8088bb777..5a42aead3ebb 100644 --- a/libstdc++-v3/include/std/mdspan +++ b/libstdc++-v3/include/std/mdspan @@ -1057,6 +1057,291 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION { return __p + __i; } }; + namespace __mdspan + { +template + constexpr bool + __is_multi_index(const _Extents& __exts, span<_IndexType, _Nm> __indices) + { + static_assert(__exts.rank() == _Nm); + for (size_t __i = 0; __i < __exts.rank(); ++__i) + if (__indices[__i] >= __exts.extent(__i)) + return false; + return true; + } + } + + template> +class mdspan +{ + static_assert(!is_array_v<_ElementType>, + "ElementType must not be an array type"); + static_assert(!is_abstract_v<_ElementType>, + "ElementType must not be an abstract class type"); + static_assert(__mdspan::__is_extents<_Extents>, + "Extents must be a specialization of std::extents"); + static_assert(is_same_v<_ElementType, + typename _AccessorPolicy::element_type>); + +public: + using extents_type = _Extents; + using layout_type = _LayoutPolicy; + using accessor_type = _AccessorPolicy; + using mapping_type = typename layout_type::template mapping; + using element_type = _ElementType; + using value_type = remove_cv_t; + using index_type = typename extents_type::index_type; + using size_type = typename extents_type::size_type; + using rank_type = typename extents_type::rank_type; + using data_handle_type = typename accessor_type::data_handle_type; + using reference = typename accessor_type::reference; + + static constexpr rank_type + rank() noexcept { return extents_type::rank(); } + + static constexpr rank_type + rank_dynamic() noexcept { return extents_type::rank_dynamic(); } + + static constexpr size_t + static_extent(rank_type __r) noexcept + { return extents_type::static_extent(__r); } + + constexpr index_type + extent(rank_type __r) const noexcept { return extents().extent(__r); } + + constexpr + mdspan() + requires (rank_dynamic() > 0) + && is_default_constructible_v + && is_default_constructible_v + && is_default_constructible_v + : _M_accessor(), _M_mapping(), _M_handle() + { } + + constexpr + mdspan(const mdspan& __other) = default; + + constexpr + mdspan(mdspan&& __other) = default; + + template<__mdspan::__valid_index_type... _OIndexTypes> + requires (sizeof...(_OIndexTypes) == rank() + || sizeof...(_OIndexTypes) == rank_dynamic()) +&& is_constructible_v +&& is_default_constructible_v + constexpr explicit + mdspan(data_handle_type __handle, _OIndexTypes... __exts) + : _M_accessor(), + _M_mapping(_Extents(static_cast(std::move(__exts))...)), + _M_handle(std::move(__handle)) + { } + + template<__mdspan::__valid_index_type _OIndexType, + size_t _Nm> + requires (_Nm == rank() || _Nm == rank_dynamic()) +&& is_constructible_v +&& is_default_constructible_v + constexpr explicit(_Nm != rank_dynamic()) + mdspan(data_handle_type __handle, span<_OIndexType, _Nm> __exts) + : _M_accessor(), _M_mapping(extents_type(__exts)), + _M_handle(std::move(__handle)) + { } + + template<__mdspan::__valid_index_type _OInd
[gcc r16-2105] s390: Always compute address of stack protector guard
https://gcc.gnu.org/g:bb6075e7115208bab3d9c8b2c54e0bd6a5c808b7 commit r16-2105-gbb6075e7115208bab3d9c8b2c54e0bd6a5c808b7 Author: Stefan Schulze Frielinghaus Date: Tue Jul 8 16:40:34 2025 +0200 s390: Always compute address of stack protector guard Computing the address of the thread pointer on s390 involves multiple instructions and therefore bears the risk that the address of the canary or intermediate values of it are spilled after prologue in order to be reloaded for the epilogue. Since there exists no mechanism to ensure that a value is not coming from stack, as a precaution compute the address always twice, i.e., one time for the prologue and one time for the epilogue. Note, even if there were such a mechanism, emitting optimal code is non-trivial since there exist cases with opposing requirements as e.g. if the thread pointer is not only computed for the TLS guard but also for other TLS objects. For the latter accesses it is desired to spill and reload the thread pointer instead of recomputing it whereas for the former it is not. gcc/ChangeLog: * config/s390/s390.md (stack_protect_get_tpsi): New insn. (stack_protect_get_tpdi): New insn. (stack_protect_set): Use new insn. (stack_protect_test): Use new insn. gcc/testsuite/ChangeLog: * gcc.target/s390/stack-protector-guard-tls-1.c: New test. Diff: --- gcc/config/s390/s390.md| 47 -- .../gcc.target/s390/stack-protector-guard-tls-1.c | 39 ++ 2 files changed, 82 insertions(+), 4 deletions(-) diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md index f6db36e0ac38..02bc149b0fba 100644 --- a/gcc/config/s390/s390.md +++ b/gcc/config/s390/s390.md @@ -308,6 +308,9 @@ UNSPECV_SPLIT_STACK_CALL UNSPECV_OSC_BREAK + + ; Stack Protector + UNSPECV_SP_GET_TP ]) ;; @@ -365,6 +368,9 @@ (VR23_REGNUM 45) (VR24_REGNUM 46) (VR31_REGNUM 53) + ; Access registers + (AR0_REGNUM 36) + (AR1_REGNUM 37) ]) ; Rounding modes for binary floating point numbers @@ -11924,15 +11930,43 @@ ; Stack Protector Patterns ; +; Insns stack_protect_get_tp{si,di} are similar to *get_tp_{31,64} but still +; distinct in the sense that they force recomputation of the thread pointer +; instead of potentially reloading it from stack. + +(define_insn_and_split "stack_protect_get_tpsi" + [(set (match_operand:SI 0 "register_operand" "=d") + (unspec_volatile:SI [(const_int 0)] UNSPECV_SP_GET_TP))] + "" + "#" + "&& reload_completed" + [(set (match_dup 0) (reg:SI AR0_REGNUM))]) + +(define_insn_and_split "stack_protect_get_tpdi" + [(set (match_operand:DI 0 "register_operand" "=d") + (unspec_volatile:DI [(const_int 0)] UNSPECV_SP_GET_TP))] + "" + "#" + "&& reload_completed" + [(set (match_dup 1) (reg:SI AR0_REGNUM)) + (set (match_dup 0) (ashift:DI (match_dup 0) (const_int 32))) + (set (strict_low_part (match_dup 1)) (reg:SI AR1_REGNUM))] + "operands[1] = gen_rtx_REG (SImode, REGNO (operands[0]));") + (define_expand "stack_protect_set" [(set (match_operand 0 "memory_operand" "") (match_operand 1 "memory_operand" ""))] "" { #ifdef TARGET_THREAD_SSP_OFFSET + rtx tp = gen_reg_rtx (Pmode); + if (TARGET_64BIT) +emit_insn (gen_stack_protect_get_tpdi (tp)); + else +emit_insn (gen_stack_protect_get_tpsi (tp)); operands[1] -= gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), -GEN_INT (TARGET_THREAD_SSP_OFFSET))); += gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tp, + GEN_INT (TARGET_THREAD_SSP_OFFSET))); #endif if (TARGET_64BIT) emit_insn (gen_stack_protect_setdi (operands[0], operands[1])); @@ -11958,9 +11992,14 @@ { rtx cc_reg, test; #ifdef TARGET_THREAD_SSP_OFFSET + rtx tp = gen_reg_rtx (Pmode); + if (TARGET_64BIT) +emit_insn (gen_stack_protect_get_tpdi (tp)); + else +emit_insn (gen_stack_protect_get_tpsi (tp)); operands[1] -= gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), -GEN_INT (TARGET_THREAD_SSP_OFFSET))); += gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tp, + GEN_INT (TARGET_THREAD_SSP_OFFSET))); #endif if (TARGET_64BIT) emit_insn (gen_stack_protect_testdi (operands[0], operands[1])); diff --git a/gcc/testsuite/gcc.target/s390/stack-protector-guard-tls-1.c b/gcc/testsuite/gcc.target/s390/stack-protector-guard-tls-1.c new file mode 100644 index ..1efd24551443 --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/stack-protector-guard-tls-1.c @@ -0,0 +1,39 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fstack-protector-all" } */ +/* { dg-final { scan-assemb
[gcc(refs/users/omachota/heads/rtl-ssa-dce-wdebug)] rtl-ssa-dce: add functions for resurecting dead insns
https://gcc.gnu.org/g:3b84c69b2237dd7316dc07d2b41f39b2c494d811 commit 3b84c69b2237dd7316dc07d2b41f39b2c494d811 Author: Ondřej Machota Date: Tue Jul 8 11:33:12 2025 +0200 rtl-ssa-dce: add functions for resurecting dead insns Diff: --- gcc/dce.cc | 355 + 1 file changed, 355 insertions(+) diff --git a/gcc/dce.cc b/gcc/dce.cc index 67fb42541d84..d12cab054b48 100644 --- a/gcc/dce.cc +++ b/gcc/dce.cc @@ -19,6 +19,7 @@ along with GCC; see the file COPYING3. If not see #include #include +#include #define INCLUDE_ALGORITHM #define INCLUDE_FUNCTIONAL #define INCLUDE_ARRAY @@ -1378,8 +1379,15 @@ private: void mark_prelive_insn (insn_info *, auto_vec &); auto_vec mark_prelive (); void mark (); + + std::unordered_set propagate_dead_phis (); + void debugize_insn (insn_info *); + void unmark_debugizable(insn_info *, sbitmap); + sbitmap find_debugizable(const std::unordered_set &); + void debugize_insns (const sbitmap); void reset_dead_debug_insn (insn_info *); void reset_dead_debug (); + void sweep (); offset_bitmap m_marked; @@ -1633,6 +1641,352 @@ rtl_ssa_dce::mark () } } + +// Mark instructions that depend on a dead phi - these cannot be restored +std::unordered_set +rtl_ssa_dce::propagate_dead_phis () +{ + std::unordered_set visited_dead_phis; + std::unordered_set depends_on_dead_phi; + auto_vec worklist; + + // add dead phis to worklist + for (ebb_info *ebb : crtl->ssa->ebbs ()) +{ + for (phi_info *phi : ebb->phis ()) + { + if (bitmap_bit_p (m_marked_phis, phi->uid ())) + continue; + + worklist.safe_push (phi); + } +} + + // suppose that debug insns are marked - non marked will be removed later + // propagate dead phis via du chains and unmark reachable debug instructions + while (!worklist.is_empty ()) +{ + set_info *set = worklist.pop (); + insn_info *insn = set->insn (); + + if (insn->is_debug_insn ()) + { + if (dump_file) + fprintf (dump_file, "Debug insns %d depends on dead phi.\n", +insn->uid ()); + +m_marked.clear_bit (insn->uid ()); + // debug instructions dont have chains + continue; + } + + // mark + if (insn->is_phi ()) + { + gcc_checking_assert (!bitmap_bit_p(m_marked_phis, static_cast (set)->uid ())); + visited_dead_phis.emplace (static_cast (set)); + } + else + { + gcc_checking_assert (!m_marked.get_bit (insn->uid ())); + depends_on_dead_phi.emplace (insn); + } + + for (use_info *use : set->all_uses ()) + { + if (use->is_in_phi ()) + { + // do not add already visited dead phis + if (visited_dead_phis.count (use->phi ()) == 0) + worklist.safe_push (use->phi ()); + } + else + { + gcc_assert (use->is_in_any_insn ()); + // add all defs from insn to worklist + for (def_info *def : use->insn ()->defs ()) + { + if (def->kind () != access_kind::SET) + continue; + + worklist.safe_push (static_cast (def)); + } + } + } +} + + return depends_on_dead_phi; +} + + +void +rtl_ssa_dce::debugize_insn (insn_info *insn) +{ + +} + +struct register_replacement { + unsigned int regno; + rtx expr; +}; + +static rtx +replace_dead_reg(rtx x, const_rtx old_rtx ATTRIBUTE_UNUSED, void *data) +{ + auto replacement = static_cast(data); + + if (REG_P (x) && REGNO (x) >= FIRST_VIRTUAL_REGISTER && replacement->regno == REGNO (x)) + { + if (GET_MODE (x) == GET_MODE (replacement->expr)) + return replacement->expr; + return lowpart_subreg (GET_MODE (x), replacement->expr, GET_MODE (replacement->expr)); + } + + return NULL_RTX; +} + +// visit every marked instruction in INSN dependency tree and unmark it +void +rtl_ssa_dce::unmark_debugizable (insn_info *insn, sbitmap debugizable) +{ + auto_vec worklist; + gcc_checking_assert (!insn->is_artificial ()); + + bitmap_set_bit (debugizable, insn->uid ()); + worklist.safe_push (insn); + + // process all marked dependencies and unmark them + while (!worklist.is_empty ()) { +insn_info *current = worklist.pop (); +int current_uid = current->uid (); + +// skip instruction that are not marked +if (!bitmap_bit_p(debugizable, current_uid)) + continue; + +bitmap_clear_bit(debugizable, current_uid); + +// add all marked dependencies to the worklist +for (def_info *def : current->defs()) +{ + if (def->kind() != access_kind::SET) // skip clobbers +continue; + + auto *set = static_cast(def); + for (use_info *use : set->all_uses()) + { +// this phi node might not be dead +if (use->is_in_phi ()) + continue; + +insn_info *use_i
[gcc r16-2108] c++: Implement part of C++26 P2686R4 - constexpr structured bindings [PR117784]
https://gcc.gnu.org/g:c81447d969f27a8653ebb1a450372f0d25a2e628 commit r16-2108-gc81447d969f27a8653ebb1a450372f0d25a2e628 Author: Jakub Jelinek Date: Tue Jul 8 19:21:55 2025 +0200 c++: Implement part of C++26 P2686R4 - constexpr structured bindings [PR117784] The following patch implements the constexpr structured bindings part of the P2686R4 paper, so the [dcl.pre], [dcl.struct.bind], [dcl.constinit] and first hunk in [dcl.constexpr] changes. The paper doesn't have a feature test macro and the constexpr structured binding part of it seems more-less self-contained, so I think it is useful to get this in independently from the rest. Of course, automatic constexpr/constinit structured bindings in the tuple cases or automatic constexpr/constinit structured bindings with auto & will not really work for now. Another reason for the split is that for C++ < 26, I think what the patch implements is basically what the users will see, i.e. we can accept constexpr or constinit structured binding with pedwarn, but I think we can't change the constant expression rules in C++ < 26. I plan to look at the rest of the paper. 2025-07-08 Jakub Jelinek PR c++/117784 * decl.cc: Implement part of C++26 P2686R4 - constexpr structured bindings. (cp_finish_decl): Pedwarn for C++23 and older on constinit on structured bindings except for static/thread_local where it uses earlier error. (grokdeclarator): Pedwarn on constexpr structured bindings for C++23 and older instead of emitting error always, don't clear constexpr_p in that case. * parser.cc (cp_parser_decomposition_declaration): Copy over DECL_DECLARED_CONSTEXPR_P and DECL_DECLARED_CONSTINIT_P flags. * g++.dg/cpp1z/decomp3.C (test): For constexpr structured binding initialize from constexpr var instead of non-constexpr and expect just a pedwarn for C++23 and older instead of error always. * g++.dg/cpp26/decomp9.C (foo): Likewise. * g++.dg/cpp26/decomp22.C: New test. * g++.dg/cpp26/decomp23.C: New test. * g++.dg/cpp26/decomp24.C: New test. * g++.dg/cpp26/decomp25.C: New test. Diff: --- gcc/cp/decl.cc| 19 -- gcc/cp/parser.cc | 6 +- gcc/testsuite/g++.dg/cpp1z/decomp3.C | 3 +- gcc/testsuite/g++.dg/cpp26/decomp22.C | 66 +++ gcc/testsuite/g++.dg/cpp26/decomp23.C | 77 ++ gcc/testsuite/g++.dg/cpp26/decomp24.C | 20 ++ gcc/testsuite/g++.dg/cpp26/decomp25.C | 119 ++ gcc/testsuite/g++.dg/cpp26/decomp9.C | 5 +- 8 files changed, 305 insertions(+), 10 deletions(-) diff --git a/gcc/cp/decl.cc b/gcc/cp/decl.cc index be26bd39b225..99b9854210f7 100644 --- a/gcc/cp/decl.cc +++ b/gcc/cp/decl.cc @@ -9174,6 +9174,10 @@ cp_finish_decl (tree decl, tree init, bool init_const_expr_p, if (decomp) { + if (DECL_DECLARED_CONSTINIT_P (decl) && cxx_dialect < cxx26) + pedwarn (DECL_SOURCE_LOCATION (decl), OPT_Wc__26_extensions, +"% can be applied to structured binding " +"only with %<-std=c++2c%> or %<-std=gnu++2c%>"); cp_maybe_mangle_decomp (decl, decomp); if (TREE_STATIC (decl) && !DECL_FUNCTION_SCOPE_P (decl)) { @@ -13621,9 +13625,10 @@ grokdeclarator (const cp_declarator *declarator, if (typedef_p) error_at (declspecs->locations[ds_typedef], "structured binding declaration cannot be %qs", "typedef"); - if (constexpr_p && !concept_p) - error_at (declspecs->locations[ds_constexpr], "structured " - "binding declaration cannot be %qs", "constexpr"); + if (constexpr_p && !concept_p && cxx_dialect < cxx26) + pedwarn (declspecs->locations[ds_constexpr], OPT_Wc__26_extensions, +"structured binding declaration can be %qs only with " +"%<-std=c++2c%> or %<-std=gnu++2c%>", "constexpr"); if (consteval_p) error_at (declspecs->locations[ds_consteval], "structured " "binding declaration cannot be %qs", "consteval"); @@ -13634,8 +13639,11 @@ grokdeclarator (const cp_declarator *declarator, declspecs->gnu_thread_keyword_p ? "__thread" : "thread_local"); if (concept_p) - error_at (declspecs->locations[ds_concept], - "structured binding declaration cannot be %qs", "concept"); + { + error_at (declspecs->locations[ds_concept], + "structured binding declaration cannot be %qs", "concept"); + constexpr_p = 0; + } /* [dcl.struct.bind] "A cv that includes volatile is deprecated." */ if (type_quals & TYPE_QUAL_VO
[gcc r16-2109] libstdc++: Fix _GLIBCXX_DEBUG std::forward_list build regression
https://gcc.gnu.org/g:1f3bf202355f16d6ec0a9b37cb6a71be5f76b77f commit r16-2109-g1f3bf202355f16d6ec0a9b37cb6a71be5f76b77f Author: Jonathan Wakely Date: Tue Jul 8 19:20:13 2025 +0200 libstdc++: Fix _GLIBCXX_DEBUG std::forward_list build regression Commit 2fd6f42c17a8040dbd3460ca34d93695dacf8575 broke _GLIBCXX_DEBUG std::forward_list implementation. libstdc++-v3/ChangeLog: * include/debug/forward_list (_Safe_forward_list<>::_M_swap): Adapt to _M_this() signature change. Diff: --- libstdc++-v3/include/debug/forward_list | 8 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/libstdc++-v3/include/debug/forward_list b/libstdc++-v3/include/debug/forward_list index 4e1511da4e82..9da7dda45034 100644 --- a/libstdc++-v3/include/debug/forward_list +++ b/libstdc++-v3/include/debug/forward_list @@ -144,13 +144,13 @@ namespace __gnu_debug //std::swap(_M_this()->_M_version, __other._M_version); _Safe_iterator_base* __this_its = _M_this()->_M_iterators; _S_swap_aux(__other, __other._M_iterators, - _M_this(), _M_this()->_M_iterators); + *_M_this(), _M_this()->_M_iterators); _Safe_iterator_base* __this_const_its = _M_this()->_M_const_iterators; _S_swap_aux(__other, __other._M_const_iterators, - _M_this(), _M_this()->_M_const_iterators); - _S_swap_aux(_M_this(), __this_its, + *_M_this(), _M_this()->_M_const_iterators); + _S_swap_aux(*_M_this(), __this_its, __other, __other._M_iterators); - _S_swap_aux(_M_this(), __this_const_its, + _S_swap_aux(*_M_this(), __this_const_its, __other, __other._M_const_iterators); }
[gcc r16-2110] xtensa: Fix B[GE/LT]UI instructions with immediate values of 32768 or 65536 not being emitted
https://gcc.gnu.org/g:57da36bed1004d2b78057568176b76cb0a50d149 commit r16-2110-g57da36bed1004d2b78057568176b76cb0a50d149 Author: Takayuki 'January June' Suwa Date: Mon Jul 7 23:40:17 2025 +0900 xtensa: Fix B[GE/LT]UI instructions with immediate values of 32768 or 65536 not being emitted This is because in canonicalize_comparison() in gcc/expmed.cc, the COMPARE rtx_cost() for the immediate values in the title does not change between the old and new versions. This patch fixes that. (note: Currently, this patch only works if some constant propagation optimizations are enabled (-O2 or higher) or if bare large constant assignments are possible (-mconst16 or -mauto-litpools). In the future I hope to make it work at -O1...) gcc/ChangeLog: * config/xtensa/xtensa.cc (xtensa_b4const_or_zero): Remove. (xtensa_b4const): Add a case where the value is 0, and rename to xtensa_b4const_or_zero. (xtensa_rtx_costs): Fix to also consider the result of xtensa_b4constu(). gcc/testsuite/ChangeLog: * gcc.target/xtensa/BGEUI-BLTUI-32k-64k.c: New. Diff: --- gcc/config/xtensa/xtensa.cc | 17 + gcc/testsuite/gcc.target/xtensa/BGEUI-BLTUI-32k-64k.c | 19 +++ 2 files changed, 24 insertions(+), 12 deletions(-) diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc index 8c43a69f4cd9..b75cec13b28a 100644 --- a/gcc/config/xtensa/xtensa.cc +++ b/gcc/config/xtensa/xtensa.cc @@ -423,12 +423,13 @@ xtensa_uimm8x4 (HOST_WIDE_INT v) } -static bool -xtensa_b4const (HOST_WIDE_INT v) +bool +xtensa_b4const_or_zero (HOST_WIDE_INT v) { switch (v) { case -1: +case 0: case 1: case 2: case 3: @@ -450,15 +451,6 @@ xtensa_b4const (HOST_WIDE_INT v) } -bool -xtensa_b4const_or_zero (HOST_WIDE_INT v) -{ - if (v == 0) -return true; - return xtensa_b4const (v); -} - - bool xtensa_b4constu (HOST_WIDE_INT v) { @@ -4512,7 +4504,8 @@ xtensa_rtx_costs (rtx x, machine_mode mode, int outer_code, } break; case COMPARE: - if ((INTVAL (x) == 0) || xtensa_b4const (INTVAL (x))) + if (xtensa_b4const_or_zero (INTVAL (x)) + || xtensa_b4constu (INTVAL (x))) { *total = 0; return true; diff --git a/gcc/testsuite/gcc.target/xtensa/BGEUI-BLTUI-32k-64k.c b/gcc/testsuite/gcc.target/xtensa/BGEUI-BLTUI-32k-64k.c new file mode 100644 index ..05873b896896 --- /dev/null +++ b/gcc/testsuite/gcc.target/xtensa/BGEUI-BLTUI-32k-64k.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +extern void foo(void); + +void BGEUI_test(unsigned int a) +{ + if (a < 32768U) +foo(); +} + +void BLTUI_test(unsigned int a) +{ + if (a >= 65536U) +foo(); +} + +/* { dg-final { scan-assembler-times "bgeui" 1 } } */ +/* { dg-final { scan-assembler-times "bltui" 1 } } */
[gcc r16-2111] c++: bogus error with union in qualified name [PR83469]
https://gcc.gnu.org/g:7d11ae1dd95a0296eeb5c14bfe3a5d4ec8873e3b commit r16-2111-g7d11ae1dd95a0296eeb5c14bfe3a5d4ec8873e3b Author: Marek Polacek Date: Tue Jul 8 10:09:36 2025 -0400 c++: bogus error with union in qualified name [PR83469] While working on Reflection I noticed that we reject: union U { int i; }; constexpr auto r = ^^typename ::U; which is due to PR83469. Andrew P. posted a patch in 2021: https://gcc.gnu.org/pipermail/gcc-patches/2021-December/586344.html for which I had some comments but an updated patch never came. ~~ There are a few issues here with typenames and unions (and even struct keywords with unions). First in cp_parser_check_class_key, we need to allow typenames to name union types and union key to be able to use with typenames. The next issue is we need to record if we had a union key, right now we just record it was a struct/class/typename one which is wrong. ~~ This patch is an updated and cleaned up version; I've also addressed a missing bit in pt.cc. PR c++/83469 PR c++/93809 gcc/cp/ChangeLog: * cp-tree.h (UNION_TYPE_P): Define. (TYPENAME_IS_UNION_P): Define. * decl.cc (struct typename_info): Add union_p field. (struct typename_hasher::equal): Compare union_p field. (build_typename_type): Use ti.union_p for union_type. Set TYPENAME_IS_UNION_P. * error.cc (dump_type) : Handle TYPENAME_IS_UNION_P. * module.cc (trees_out::type_node): Likewise. * parser.cc (cp_parser_check_class_key): Allow typename key for union types and allow union keyword for typename types. * pt.cc (tsubst) : Don't conflate unions with class_type. For TYPENAME_IS_CLASS_P, check NON_UNION_CLASS_TYPE_P rather than CLASS_TYPE_P. Add TYPENAME_IS_UNION_P handling. gcc/testsuite/ChangeLog: * g++.dg/template/error45.C: Adjust dg-error. * g++.dg/warn/Wredundant-tags-3.C: Remove xfail. * g++.dg/parse/union1.C: New test. * g++.dg/parse/union2.C: New test. * g++.dg/parse/union3.C: New test. * g++.dg/parse/union4.C: New test. * g++.dg/parse/union5.C: New test. * g++.dg/parse/union6.C: New test. Co-authored-by: Andrew Pinski Reviewed-by: Jason Merrill Diff: --- gcc/cp/cp-tree.h | 12 ++-- gcc/cp/decl.cc| 10 ++ gcc/cp/error.cc | 1 + gcc/cp/module.cc | 2 ++ gcc/cp/parser.cc | 4 +++- gcc/cp/pt.cc | 25 + gcc/testsuite/g++.dg/parse/union1.C | 19 +++ gcc/testsuite/g++.dg/parse/union2.C | 19 +++ gcc/testsuite/g++.dg/parse/union3.C | 19 +++ gcc/testsuite/g++.dg/parse/union4.C | 12 gcc/testsuite/g++.dg/parse/union5.C | 5 + gcc/testsuite/g++.dg/parse/union6.C | 5 + gcc/testsuite/g++.dg/template/error45.C | 2 +- gcc/testsuite/g++.dg/warn/Wredundant-tags-3.C | 2 +- 14 files changed, 120 insertions(+), 17 deletions(-) diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h index 1b893e23543d..3b92d9af6e1c 100644 --- a/gcc/cp/cp-tree.h +++ b/gcc/cp/cp-tree.h @@ -506,6 +506,7 @@ extern GTY(()) tree cp_global_trees[CPTI_MAX]; LAMBDA_EXPR_STATIC_P (in LAMBDA_EXPR) TARGET_EXPR_ELIDING_P (in TARGET_EXPR) contract_semantic (in ASSERTION_, PRECONDITION_, POSTCONDITION_STMT) + TYPENAME_IS_UNION_P (in TYPENAME_TYPE) 4: IDENTIFIER_MARKED (IDENTIFIER_NODEs) TREE_HAS_CONSTRUCTOR (in INDIRECT_REF, SAVE_EXPR, CONSTRUCTOR, CALL_EXPR, or FIELD_DECL). @@ -2354,6 +2355,10 @@ enum languages { lang_c, lang_cplusplus }; #define NON_UNION_CLASS_TYPE_P(T) \ (TREE_CODE (T) == RECORD_TYPE && TYPE_LANG_FLAG_5 (T)) +/* Nonzero if T is a class type and is a union. */ +#define UNION_TYPE_P(T) \ + (TREE_CODE (T) == UNION_TYPE && TYPE_LANG_FLAG_5 (T)) + /* Keep these checks in ascending code order. */ #define RECORD_OR_UNION_CODE_P(T) \ ((T) == RECORD_TYPE || (T) == UNION_TYPE) @@ -4485,11 +4490,14 @@ get_vec_init_expr (tree t) #define TYPENAME_IS_ENUM_P(NODE) \ (TREE_LANG_FLAG_0 (TYPENAME_TYPE_CHECK (NODE))) -/* True if a TYPENAME_TYPE was declared as a "class", "struct", or - "union". */ +/* True if a TYPENAME_TYPE was declared as a "class" or "struct". */ #define TYPENAME_IS_CLASS_P(NODE) \ (TREE_LANG_FLAG_1 (TYPENAME_TYPE_CHECK (NODE))) +/* True if a TYPENAME_TYPE was declared as a "union". */ +#define TYPENAME_IS_UNION_P(NODE) \ + (TREE_LANG_FLAG_3 (TYPENAME_TYPE_CHECK (N
[gcc(refs/users/meissner/heads/work214-sha)] PR target/117251: Improve vector or to vector or fusion
https://gcc.gnu.org/g:d1a81130bc9a5ffc02f19b1711a2703cc0b03d74 commit d1a81130bc9a5ffc02f19b1711a2703cc0b03d74 Author: Michael Meissner Date: Wed Jul 9 01:46:41 2025 -0400 PR target/117251: Improve vector or to vector or fusion See the following post for a complete explanation of what the patches for PR target/117251: * https://gcc.gnu.org/pipermail/gcc-patches/2025-June/686474.html This is patch #23 of 45 to generate the 'XXEVAL' instruction on power10 and power11 instead of using the Altivec 'VOR' instruction feeding into 'VOR'. The 'XXEVAL' instruction can use all 64 vector registers, instead of the 32 registers that traditional Altivec vector instructions use. By allowing all of the vector registers to be used, it reduces the amount of spilling that a large benchmark generated. Currently the following code: vector int a, b, c, d; a = (c | d) | b; Generates: vort,c,d vora,t,b Now in addition with this patch, if the arguments or result is allocated to a traditional FPR register, the GCC compiler will now generate the following code instead of adding vector move instructions: xxeval a,b,c,127 Since fusion using 2 Altivec instructions is slightly faster than using the 'XXEVAL' instruction we prefer to generate the Altivec instructions if we can. In addition, because 'XXEVAL' is a prefixed instruction, it possibly might generate an extra NOP instruction to align the 'XXEVAL' instruction. I have tested these patches on both big endian and little endian PowerPC servers, with no regressions. Can I check these patchs into the trunk? 2025-07-09 Michael Meissner gcc/ PR target/117251 * config/rs6000/fusion.md: Regenerate. * config/rs6000/genfusion.pl (gen_logical_addsubf): Add support to generate vector or => or fusion if XXEVAL is supported. Diff: --- gcc/config/rs6000/fusion.md| 15 +-- gcc/config/rs6000/genfusion.pl | 1 + 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md index d1f6a38b618a..c2a2ebf4bfaf 100644 --- a/gcc/config/rs6000/fusion.md +++ b/gcc/config/rs6000/fusion.md @@ -2729,20 +2729,23 @@ ;; logical-logical fusion pattern generated by gen_logical_addsubf ;; vector vor -> vor (define_insn "*fuse_vor_vor" - [(set (match_operand:VM 3 "altivec_register_operand" "=&0,&1,&v,v") -(ior:VM (ior:VM (match_operand:VM 0 "altivec_register_operand" "v,v,v,v") - (match_operand:VM 1 "altivec_register_operand" "%v,v,v,v")) - (match_operand:VM 2 "altivec_register_operand" "v,v,v,v"))) - (clobber (match_scratch:VM 4 "=X,X,X,&v"))] + [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v") +(ior:VM (ior:VM (match_operand:VM 0 "vector_fusion_operand" "v,v,v,wa,v") + (match_operand:VM 1 "vector_fusion_operand" "%v,v,v,wa,v")) + (match_operand:VM 2 "vector_fusion_operand" "v,v,v,wa,v"))) + (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))] "(TARGET_P10_FUSION)" "@ vor %3,%1,%0\;vor %3,%3,%2 vor %3,%1,%0\;vor %3,%3,%2 vor %3,%1,%0\;vor %3,%3,%2 + xxeval %x3,%x2,%x1,%x0,127 vor %4,%1,%0\;vor %3,%4,%2" [(set_attr "type" "fused_vector") (set_attr "cost" "6") - (set_attr "length" "8")]) + (set_attr "length" "8") + (set_attr "prefixed" "*,*,*,yes,*") + (set_attr "isa" "*,*,*,xxeval,*")]) ;; logical-logical fusion pattern generated by gen_logical_addsubf ;; vector vorc -> vor diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl index 97681f37d0fa..9df4c8d6527e 100755 --- a/gcc/config/rs6000/genfusion.pl +++ b/gcc/config/rs6000/genfusion.pl @@ -237,6 +237,7 @@ sub gen_logical_addsubf "vxor_vor"=> 111, "vnor_vnor" => 112, "vor_vxor"=> 120, + "vor_vor" => 127, ); KIND: foreach $kind ('scalar','vector') {
[gcc(refs/users/meissner/heads/work214-sha)] PR target/117251: Improve vector or to vector nor fusion
https://gcc.gnu.org/g:0e4302dcb91643c47b9c396834dc9a9687410aa2 commit 0e4302dcb91643c47b9c396834dc9a9687410aa2 Author: Michael Meissner Date: Wed Jul 9 01:47:01 2025 -0400 PR target/117251: Improve vector or to vector nor fusion See the following post for a complete explanation of what the patches for PR target/117251: * https://gcc.gnu.org/pipermail/gcc-patches/2025-June/686474.html This is patch #24 of 45 to generate the 'XXEVAL' instruction on power10 and power11 instead of using the Altivec 'VOR' instruction feeding into 'VNOR'. The 'XXEVAL' instruction can use all 64 vector registers, instead of the 32 registers that traditional Altivec vector instructions use. By allowing all of the vector registers to be used, it reduces the amount of spilling that a large benchmark generated. Currently the following code: vector int a, b, c, d; a = ~ ((c | d) | b); Generates: vort,c,d vnor a,t,b Now in addition with this patch, if the arguments or result is allocated to a traditional FPR register, the GCC compiler will now generate the following code instead of adding vector move instructions: xxeval a,b,c,128 Since fusion using 2 Altivec instructions is slightly faster than using the 'XXEVAL' instruction we prefer to generate the Altivec instructions if we can. In addition, because 'XXEVAL' is a prefixed instruction, it possibly might generate an extra NOP instruction to align the 'XXEVAL' instruction. I have tested these patches on both big endian and little endian PowerPC servers, with no regressions. Can I check these patchs into the trunk? 2025-07-09 Michael Meissner gcc/ PR target/117251 * config/rs6000/fusion.md: Regenerate. * config/rs6000/genfusion.pl (gen_logical_addsubf): Add support to generate vector or => nor fusion if XXEVAL is supported. Diff: --- gcc/config/rs6000/fusion.md| 15 +-- gcc/config/rs6000/genfusion.pl | 1 + 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md index c2a2ebf4bfaf..c55e9d4abd67 100644 --- a/gcc/config/rs6000/fusion.md +++ b/gcc/config/rs6000/fusion.md @@ -2576,20 +2576,23 @@ ;; logical-logical fusion pattern generated by gen_logical_addsubf ;; vector vor -> vnor (define_insn "*fuse_vor_vnor" - [(set (match_operand:VM 3 "altivec_register_operand" "=&0,&1,&v,v") -(and:VM (not:VM (ior:VM (match_operand:VM 0 "altivec_register_operand" "v,v,v,v") - (match_operand:VM 1 "altivec_register_operand" "v,v,v,v"))) - (not:VM (match_operand:VM 2 "altivec_register_operand" "v,v,v,v" - (clobber (match_scratch:VM 4 "=X,X,X,&v"))] + [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v") +(and:VM (not:VM (ior:VM (match_operand:VM 0 "vector_fusion_operand" "v,v,v,wa,v") + (match_operand:VM 1 "vector_fusion_operand" "v,v,v,wa,v"))) + (not:VM (match_operand:VM 2 "vector_fusion_operand" "v,v,v,wa,v" + (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))] "(TARGET_P10_FUSION)" "@ vor %3,%1,%0\;vnor %3,%3,%2 vor %3,%1,%0\;vnor %3,%3,%2 vor %3,%1,%0\;vnor %3,%3,%2 + xxeval %x3,%x2,%x1,%x0,128 vor %4,%1,%0\;vnor %3,%4,%2" [(set_attr "type" "fused_vector") (set_attr "cost" "6") - (set_attr "length" "8")]) + (set_attr "length" "8") + (set_attr "prefixed" "*,*,*,yes,*") + (set_attr "isa" "*,*,*,xxeval,*")]) ;; logical-logical fusion pattern generated by gen_logical_addsubf ;; vector vorc -> vnor diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl index 9df4c8d6527e..58f900640bef 100755 --- a/gcc/config/rs6000/genfusion.pl +++ b/gcc/config/rs6000/genfusion.pl @@ -238,6 +238,7 @@ sub gen_logical_addsubf "vnor_vnor" => 112, "vor_vxor"=> 120, "vor_vor" => 127, + "vor_vnor"=> 128, ); KIND: foreach $kind ('scalar','vector') {
[gcc(refs/users/meissner/heads/work214-sha)] PR target/117251: Improve vector and to vector nor fusion
https://gcc.gnu.org/g:f0e56a8159d60e2f9ceb50f4f918f0e93e38c420 commit f0e56a8159d60e2f9ceb50f4f918f0e93e38c420 Author: Michael Meissner Date: Wed Jul 9 01:52:03 2025 -0400 PR target/117251: Improve vector and to vector nor fusion See the following post for a complete explanation of what the patches for PR target/117251: * https://gcc.gnu.org/pipermail/gcc-patches/2025-June/686474.html This is patch #34 of 45 to generate the 'XXEVAL' instruction on power10 and power11 instead of using the Altivec 'VAND' instruction feeding into 'VNOR'. The 'XXEVAL' instruction can use all 64 vector registers, instead of the 32 registers that traditional Altivec vector instructions use. By allowing all of the vector registers to be used, it reduces the amount of spilling that a large benchmark generated. Currently the following code: vector int a, b, c, d; a = ~ ((c & d) | b); Generates: vand t,c,d vnor a,t,b Now in addition with this patch, if the arguments or result is allocated to a traditional FPR register, the GCC compiler will now generate the following code instead of adding vector move instructions: xxeval a,b,c,224 Since fusion using 2 Altivec instructions is slightly faster than using the 'XXEVAL' instruction we prefer to generate the Altivec instructions if we can. In addition, because 'XXEVAL' is a prefixed instruction, it possibly might generate an extra NOP instruction to align the 'XXEVAL' instruction. I have tested these patches on both big endian and little endian PowerPC servers, with no regressions. Can I check these patchs into the trunk? 2025-07-09 Michael Meissner gcc/ PR target/117251 * config/rs6000/fusion.md: Regenerate. * config/rs6000/genfusion.pl (gen_logical_addsubf): Add support to generate vector and => nor fusion if XXEVAL is supported. Diff: --- gcc/config/rs6000/fusion.md| 15 +-- gcc/config/rs6000/genfusion.pl | 1 + 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md index e3d9f7376a8d..68b52d4f5893 100644 --- a/gcc/config/rs6000/fusion.md +++ b/gcc/config/rs6000/fusion.md @@ -2480,20 +2480,23 @@ ;; logical-logical fusion pattern generated by gen_logical_addsubf ;; vector vand -> vnor (define_insn "*fuse_vand_vnor" - [(set (match_operand:VM 3 "altivec_register_operand" "=&0,&1,&v,v") -(and:VM (not:VM (and:VM (match_operand:VM 0 "altivec_register_operand" "v,v,v,v") - (match_operand:VM 1 "altivec_register_operand" "v,v,v,v"))) - (not:VM (match_operand:VM 2 "altivec_register_operand" "v,v,v,v" - (clobber (match_scratch:VM 4 "=X,X,X,&v"))] + [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v") +(and:VM (not:VM (and:VM (match_operand:VM 0 "vector_fusion_operand" "v,v,v,wa,v") + (match_operand:VM 1 "vector_fusion_operand" "v,v,v,wa,v"))) + (not:VM (match_operand:VM 2 "vector_fusion_operand" "v,v,v,wa,v" + (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))] "(TARGET_P10_FUSION)" "@ vand %3,%1,%0\;vnor %3,%3,%2 vand %3,%1,%0\;vnor %3,%3,%2 vand %3,%1,%0\;vnor %3,%3,%2 + xxeval %x3,%x2,%x1,%x0,224 vand %4,%1,%0\;vnor %3,%4,%2" [(set_attr "type" "fused_vector") (set_attr "cost" "6") - (set_attr "length" "8")]) + (set_attr "length" "8") + (set_attr "prefixed" "*,*,*,yes,*") + (set_attr "isa" "*,*,*,xxeval,*")]) ;; logical-logical fusion pattern generated by gen_logical_addsubf ;; vector vandc -> vnor diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl index 3a603eb09675..56e5d96ec5f3 100755 --- a/gcc/config/rs6000/genfusion.pl +++ b/gcc/config/rs6000/genfusion.pl @@ -248,6 +248,7 @@ sub gen_logical_addsubf "vorc_vor"=> 191, "vandc_vnor" => 208, "vandc_veqv" => 210, + "vand_vnor" => 224, ); KIND: foreach $kind ('scalar','vector') {
[gcc(refs/users/meissner/heads/work214-sha)] PR target/117251: Improve vector nand to vector xor fusion
https://gcc.gnu.org/g:81b6e9d57983a3679b01f5d555915d95240e9bd8 commit 81b6e9d57983a3679b01f5d555915d95240e9bd8 Author: Michael Meissner Date: Wed Jul 9 01:52:22 2025 -0400 PR target/117251: Improve vector nand to vector xor fusion See the following post for a complete explanation of what the patches for PR target/117251: * https://gcc.gnu.org/pipermail/gcc-patches/2025-June/686474.html This is patch #35 of 45 to generate the 'XXEVAL' instruction on power10 and power11 instead of using the Altivec 'VNAND' instruction feeding into 'VXOR'. The 'XXEVAL' instruction can use all 64 vector registers, instead of the 32 registers that traditional Altivec vector instructions use. By allowing all of the vector registers to be used, it reduces the amount of spilling that a large benchmark generated. Currently the following code: vector int a, b, c, d; a = (~ (c & d)) ^ b; Generates: vnand t,c,d vxor a,t,b Now in addition with this patch, if the arguments or result is allocated to a traditional FPR register, the GCC compiler will now generate the following code instead of adding vector move instructions: xxeval a,b,c,225 Since fusion using 2 Altivec instructions is slightly faster than using the 'XXEVAL' instruction we prefer to generate the Altivec instructions if we can. In addition, because 'XXEVAL' is a prefixed instruction, it possibly might generate an extra NOP instruction to align the 'XXEVAL' instruction. I have tested these patches on both big endian and little endian PowerPC servers, with no regressions. Can I check these patchs into the trunk? 2025-07-09 Michael Meissner gcc/ PR target/117251 * config/rs6000/fusion.md: Regenerate. * config/rs6000/genfusion.pl (gen_logical_addsubf): Add support to generate vector nand => xor fusion if XXEVAL is supported. Diff: --- gcc/config/rs6000/fusion.md| 15 +-- gcc/config/rs6000/genfusion.pl | 1 + 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md index 68b52d4f5893..e6d13b38415a 100644 --- a/gcc/config/rs6000/fusion.md +++ b/gcc/config/rs6000/fusion.md @@ -3023,20 +3023,23 @@ ;; logical-logical fusion pattern generated by gen_logical_addsubf ;; vector vnand -> vxor (define_insn "*fuse_vnand_vxor" - [(set (match_operand:VM 3 "altivec_register_operand" "=&0,&1,&v,v") -(xor:VM (ior:VM (not:VM (match_operand:VM 0 "altivec_register_operand" "v,v,v,v")) - (not:VM (match_operand:VM 1 "altivec_register_operand" "v,v,v,v"))) - (match_operand:VM 2 "altivec_register_operand" "v,v,v,v"))) - (clobber (match_scratch:VM 4 "=X,X,X,&v"))] + [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v") +(xor:VM (ior:VM (not:VM (match_operand:VM 0 "vector_fusion_operand" "v,v,v,wa,v")) + (not:VM (match_operand:VM 1 "vector_fusion_operand" "v,v,v,wa,v"))) + (match_operand:VM 2 "vector_fusion_operand" "v,v,v,wa,v"))) + (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))] "(TARGET_P10_FUSION)" "@ vnand %3,%1,%0\;vxor %3,%3,%2 vnand %3,%1,%0\;vxor %3,%3,%2 vnand %3,%1,%0\;vxor %3,%3,%2 + xxeval %x3,%x2,%x1,%x0,225 vnand %4,%1,%0\;vxor %3,%4,%2" [(set_attr "type" "fused_vector") (set_attr "cost" "6") - (set_attr "length" "8")]) + (set_attr "length" "8") + (set_attr "prefixed" "*,*,*,yes,*") + (set_attr "isa" "*,*,*,xxeval,*")]) ;; logical-logical fusion pattern generated by gen_logical_addsubf ;; vector vnor -> vxor diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl index 56e5d96ec5f3..94eae471c64b 100755 --- a/gcc/config/rs6000/genfusion.pl +++ b/gcc/config/rs6000/genfusion.pl @@ -249,6 +249,7 @@ sub gen_logical_addsubf "vandc_vnor" => 208, "vandc_veqv" => 210, "vand_vnor" => 224, + "vnand_vxor" => 225, ); KIND: foreach $kind ('scalar','vector') {
[gcc(refs/users/meissner/heads/work214-sha)] PR target/117251: Improve vector andc to vector xor fusion
https://gcc.gnu.org/g:3eccadbd9e1f7b58074a0a472e944d92856483b3 commit 3eccadbd9e1f7b58074a0a472e944d92856483b3 Author: Michael Meissner Date: Wed Jul 9 01:42:05 2025 -0400 PR target/117251: Improve vector andc to vector xor fusion See the following post for a complete explanation of what the patches for PR target/117251: * https://gcc.gnu.org/pipermail/gcc-patches/2025-June/686474.html This is patch #13 of 45 to generate the 'XXEVAL' instruction on power10 and power11 instead of using the Altivec 'VANDC' instruction feeding into 'VXOR'. The 'XXEVAL' instruction can use all 64 vector registers, instead of the 32 registers that traditional Altivec vector instructions use. By allowing all of the vector registers to be used, it reduces the amount of spilling that a large benchmark generated. Currently the following code: vector int a, b, c, d; a = (c & ~ d) ^ b; Generates: vandc t,c,d vxor a,t,b Now in addition with this patch, if the arguments or result is allocated to a traditional FPR register, the GCC compiler will now generate the following code instead of adding vector move instructions: xxeval a,b,c,45 Since fusion using 2 Altivec instructions is slightly faster than using the 'XXEVAL' instruction we prefer to generate the Altivec instructions if we can. In addition, because 'XXEVAL' is a prefixed instruction, it possibly might generate an extra NOP instruction to align the 'XXEVAL' instruction. I have tested these patches on both big endian and little endian PowerPC servers, with no regressions. Can I check these patchs into the trunk? 2025-07-09 Michael Meissner gcc/ PR target/117251 * config/rs6000/fusion.md: Regenerate. * config/rs6000/genfusion.pl (gen_logical_addsubf): Add support to generate vector andc => xor fusion if XXEVAL is supported. Diff: --- gcc/config/rs6000/fusion.md| 15 +-- gcc/config/rs6000/genfusion.pl | 1 + 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md index fccea39d0aae..6e5c88b81b44 100644 --- a/gcc/config/rs6000/fusion.md +++ b/gcc/config/rs6000/fusion.md @@ -2933,20 +2933,23 @@ ;; logical-logical fusion pattern generated by gen_logical_addsubf ;; vector vandc -> vxor (define_insn "*fuse_vandc_vxor" - [(set (match_operand:VM 3 "altivec_register_operand" "=&0,&1,&v,v") -(xor:VM (and:VM (not:VM (match_operand:VM 0 "altivec_register_operand" "v,v,v,v")) - (match_operand:VM 1 "altivec_register_operand" "v,v,v,v")) - (match_operand:VM 2 "altivec_register_operand" "v,v,v,v"))) - (clobber (match_scratch:VM 4 "=X,X,X,&v"))] + [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v") +(xor:VM (and:VM (not:VM (match_operand:VM 0 "vector_fusion_operand" "v,v,v,wa,v")) + (match_operand:VM 1 "vector_fusion_operand" "v,v,v,wa,v")) + (match_operand:VM 2 "vector_fusion_operand" "v,v,v,wa,v"))) + (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))] "(TARGET_P10_FUSION)" "@ vandc %3,%1,%0\;vxor %3,%3,%2 vandc %3,%1,%0\;vxor %3,%3,%2 vandc %3,%1,%0\;vxor %3,%3,%2 + xxeval %x3,%x2,%x1,%x0,45 vandc %4,%1,%0\;vxor %3,%4,%2" [(set_attr "type" "fused_vector") (set_attr "cost" "6") - (set_attr "length" "8")]) + (set_attr "length" "8") + (set_attr "prefixed" "*,*,*,yes,*") + (set_attr "isa" "*,*,*,xxeval,*")]) ;; logical-logical fusion pattern generated by gen_logical_addsubf ;; vector veqv -> vxor diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl index ab714b10f622..d15208a4ad3e 100755 --- a/gcc/config/rs6000/genfusion.pl +++ b/gcc/config/rs6000/genfusion.pl @@ -227,6 +227,7 @@ sub gen_logical_addsubf "vnand_vnor" => 16, "vand_vxor" => 30, "vand_vor"=> 31, + "vandc_vxor" => 45, ); KIND: foreach $kind ('scalar','vector') {
[gcc(refs/users/meissner/heads/work214-sha)] PR target/117251: Improve vector and to vector and fusion
https://gcc.gnu.org/g:3b4588e41dc54d01b95e7b05e1f493c402ec39d2 commit 3b4588e41dc54d01b95e7b05e1f493c402ec39d2 Author: Michael Meissner Date: Wed Jul 9 01:32:46 2025 -0400 PR target/117251: Improve vector and to vector and fusion See the following post for a complete explanation of what the patches for PR target/117251: * https://gcc.gnu.org/pipermail/gcc-patches/2025-June/686474.html This is patch #1 of 45 to generate the 'XXEVAL' instruction on power10 and power11 instead of using the Altivec 'VAND' instruction feeding into 'VAND'. The 'XXEVAL' instruction can use all 64 vector registers, instead of the 32 registers that traditional Altivec vector instructions use. By allowing all of the vector registers to be used, it reduces the amount of spilling that a large benchmark generated. Currently the following code: vector int a, b, c, d; a = (c & d) & b; Generates: vand t,c,d vand a,t,b Now in addition with this patch, if the arguments or result is allocated to a traditional FPR register, the GCC compiler will now generate the following code instead of adding vector move instructions: xxeval a,b,c,1 Since fusion using 2 Altivec instructions is slightly faster than using the 'XXEVAL' instruction we prefer to generate the Altivec instructions if we can. In addition, because 'XXEVAL' is a prefixed instruction, it possibly might generate an extra NOP instruction to align the 'XXEVAL' instruction. I have tested these patches on both big endian and little endian PowerPC servers, with no regressions. Can I check these patchs into the trunk? 2025-07-09 Michael Meissner gcc/ PR target/117251 * config/rs6000/fusion.md: Regenerate. * config/rs6000/genfusion.pl (gen_logical_addsubf): Add support to generate vector/vector and/and fusion if XXEVAL is supported. * config/rs6000/predicates.md (vector_fusion_operand): New predicate. * config/rs6000/rs6000.h (TARGET_XXEVAL): New macro. * config/rs6000/rs6000.md (isa attribute): Add xxeval. (enabled attribute): Add support for XXEVAL support. Diff: --- gcc/config/rs6000/fusion.md | 15 ++- gcc/config/rs6000/genfusion.pl | 58 ++--- gcc/config/rs6000/predicates.md | 12 + gcc/config/rs6000/rs6000.h | 4 +++ gcc/config/rs6000/rs6000.md | 7 - 5 files changed, 85 insertions(+), 11 deletions(-) diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md index 621b346f9eb9..d24837d68d83 100644 --- a/gcc/config/rs6000/fusion.md +++ b/gcc/config/rs6000/fusion.md @@ -1871,20 +1871,23 @@ ;; logical-logical fusion pattern generated by gen_logical_addsubf ;; vector vand -> vand (define_insn "*fuse_vand_vand" - [(set (match_operand:VM 3 "altivec_register_operand" "=&0,&1,&v,v") -(and:VM (and:VM (match_operand:VM 0 "altivec_register_operand" "v,v,v,v") - (match_operand:VM 1 "altivec_register_operand" "%v,v,v,v")) - (match_operand:VM 2 "altivec_register_operand" "v,v,v,v"))) - (clobber (match_scratch:VM 4 "=X,X,X,&v"))] + [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v") +(and:VM (and:VM (match_operand:VM 0 "vector_fusion_operand" "v,v,v,wa,v") + (match_operand:VM 1 "vector_fusion_operand" "%v,v,v,wa,v")) + (match_operand:VM 2 "vector_fusion_operand" "v,v,v,wa,v"))) + (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))] "(TARGET_P10_FUSION)" "@ vand %3,%1,%0\;vand %3,%3,%2 vand %3,%1,%0\;vand %3,%3,%2 vand %3,%1,%0\;vand %3,%3,%2 + xxeval %x3,%x2,%x1,%x0,1 vand %4,%1,%0\;vand %3,%4,%2" [(set_attr "type" "fused_vector") (set_attr "cost" "6") - (set_attr "length" "8")]) + (set_attr "length" "8") + (set_attr "prefixed" "*,*,*,yes,*") + (set_attr "isa" "*,*,*,xxeval,*")]) ;; logical-logical fusion pattern generated by gen_logical_addsubf ;; vector vandc -> vand diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl index e5d3b1ee449d..351a4d914a4a 100755 --- a/gcc/config/rs6000/genfusion.pl +++ b/gcc/config/rs6000/genfusion.pl @@ -211,25 +211,33 @@ sub gen_logical_addsubf $inner_comp, $inner_inv, $inner_rtl, $inner_op, $both_commute, $c4, $bc, $inner_arg0, $inner_arg1, $inner_exp, $outer_arg2, $outer_exp, $ftype, $insn, $is_subf, $is_rsubf, $outer_32, $outer_42,$outer_name, - $fuse_type); - KIND: foreach $kind ('scalar','vector') { + $fuse_type, $xxeval, $c5, $vect_pred, $vect_inner_arg0, $vect_inner_arg1, + $vect_inner_exp, $vect_outer_arg2, $vect_outer_exp); + +my %xxeval_fusions = ( + "vand_vand" => 1, +
[gcc(refs/users/meissner/heads/work214-sha)] PR target/117251: Improve vector nor to vector nor fusion
https://gcc.gnu.org/g:7efaca506ac6bc38aaf6bb1e2c7682c53ce96bb1 commit 7efaca506ac6bc38aaf6bb1e2c7682c53ce96bb1 Author: Michael Meissner Date: Wed Jul 9 01:45:30 2025 -0400 PR target/117251: Improve vector nor to vector nor fusion See the following post for a complete explanation of what the patches for PR target/117251: * https://gcc.gnu.org/pipermail/gcc-patches/2025-June/686474.html This is patch #21 of 45 to generate the 'XXEVAL' instruction on power10 and power11 instead of using the Altivec 'VNOR' instruction feeding into 'VNOR'. The 'XXEVAL' instruction can use all 64 vector registers, instead of the 32 registers that traditional Altivec vector instructions use. By allowing all of the vector registers to be used, it reduces the amount of spilling that a large benchmark generated. Currently the following code: vector int a, b, c, d; a = ~ ((~ (c | d)) | b); Generates: vnor t,c,d vnor a,t,b Now in addition with this patch, if the arguments or result is allocated to a traditional FPR register, the GCC compiler will now generate the following code instead of adding vector move instructions: xxeval a,b,c,112 Since fusion using 2 Altivec instructions is slightly faster than using the 'XXEVAL' instruction we prefer to generate the Altivec instructions if we can. In addition, because 'XXEVAL' is a prefixed instruction, it possibly might generate an extra NOP instruction to align the 'XXEVAL' instruction. I have tested these patches on both big endian and little endian PowerPC servers, with no regressions. Can I check these patchs into the trunk? 2025-07-09 Michael Meissner gcc/ PR target/117251 * config/rs6000/fusion.md: Regenerate. * config/rs6000/genfusion.pl (gen_logical_addsubf): Add support to generate vector nor => nor fusion if XXEVAL is supported. Diff: --- gcc/config/rs6000/fusion.md| 15 +-- gcc/config/rs6000/genfusion.pl | 1 + 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md index 1d4b3c970c7f..032c87ac5765 100644 --- a/gcc/config/rs6000/fusion.md +++ b/gcc/config/rs6000/fusion.md @@ -2555,20 +2555,23 @@ ;; logical-logical fusion pattern generated by gen_logical_addsubf ;; vector vnor -> vnor (define_insn "*fuse_vnor_vnor" - [(set (match_operand:VM 3 "altivec_register_operand" "=&0,&1,&v,v") -(and:VM (not:VM (and:VM (not:VM (match_operand:VM 0 "altivec_register_operand" "v,v,v,v")) - (not:VM (match_operand:VM 1 "altivec_register_operand" "v,v,v,v" - (not:VM (match_operand:VM 2 "altivec_register_operand" "v,v,v,v" - (clobber (match_scratch:VM 4 "=X,X,X,&v"))] + [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v") +(and:VM (not:VM (and:VM (not:VM (match_operand:VM 0 "vector_fusion_operand" "v,v,v,wa,v")) + (not:VM (match_operand:VM 1 "vector_fusion_operand" "v,v,v,wa,v" + (not:VM (match_operand:VM 2 "vector_fusion_operand" "v,v,v,wa,v" + (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))] "(TARGET_P10_FUSION)" "@ vnor %3,%1,%0\;vnor %3,%3,%2 vnor %3,%1,%0\;vnor %3,%3,%2 vnor %3,%1,%0\;vnor %3,%3,%2 + xxeval %x3,%x2,%x1,%x0,112 vnor %4,%1,%0\;vnor %3,%4,%2" [(set_attr "type" "fused_vector") (set_attr "cost" "6") - (set_attr "length" "8")]) + (set_attr "length" "8") + (set_attr "prefixed" "*,*,*,yes,*") + (set_attr "isa" "*,*,*,xxeval,*")]) ;; logical-logical fusion pattern generated by gen_logical_addsubf ;; vector vor -> vnor diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl index 4ec38beccb9c..6af4c5d7a182 100755 --- a/gcc/config/rs6000/genfusion.pl +++ b/gcc/config/rs6000/genfusion.pl @@ -235,6 +235,7 @@ sub gen_logical_addsubf "veqv_vnor" => 96, "vxor_vxor" => 105, "vxor_vor"=> 111, + "vnor_vnor" => 112, ); KIND: foreach $kind ('scalar','vector') {
[gcc(refs/users/meissner/heads/work214-sha)] PR target/117251: Improve vector nand to vector nand fusion
https://gcc.gnu.org/g:a0c683836fd1305ce11ced99025a60ab877d3613 commit a0c683836fd1305ce11ced99025a60ab877d3613 Author: Michael Meissner Date: Wed Jul 9 01:53:07 2025 -0400 PR target/117251: Improve vector nand to vector nand fusion See the following post for a complete explanation of what the patches for PR target/117251: * https://gcc.gnu.org/pipermail/gcc-patches/2025-June/686474.html This is patch #37 of 45 to generate the 'XXEVAL' instruction on power10 and power11 instead of using the Altivec 'VNAND' instruction feeding into 'VNAND'. The 'XXEVAL' instruction can use all 64 vector registers, instead of the 32 registers that traditional Altivec vector instructions use. By allowing all of the vector registers to be used, it reduces the amount of spilling that a large benchmark generated. Currently the following code: vector int a, b, c, d; a = ~ ((~ (c & d)) & b); Generates: vnand t,c,d vnand a,t,b Now in addition with this patch, if the arguments or result is allocated to a traditional FPR register, the GCC compiler will now generate the following code instead of adding vector move instructions: xxeval a,b,c,241 Since fusion using 2 Altivec instructions is slightly faster than using the 'XXEVAL' instruction we prefer to generate the Altivec instructions if we can. In addition, because 'XXEVAL' is a prefixed instruction, it possibly might generate an extra NOP instruction to align the 'XXEVAL' instruction. I have tested these patches on both big endian and little endian PowerPC servers, with no regressions. Can I check these patchs into the trunk? 2025-07-09 Michael Meissner gcc/ PR target/117251 * config/rs6000/fusion.md: Regenerate. * config/rs6000/genfusion.pl (gen_logical_addsubf): Add support to generate vector nand => nand fusion if XXEVAL is supported. Diff: --- gcc/config/rs6000/fusion.md| 15 +-- gcc/config/rs6000/genfusion.pl | 1 + 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md index ba3a5a52b990..241b8a494fb1 100644 --- a/gcc/config/rs6000/fusion.md +++ b/gcc/config/rs6000/fusion.md @@ -2390,20 +2390,23 @@ ;; logical-logical fusion pattern generated by gen_logical_addsubf ;; vector vnand -> vnand (define_insn "*fuse_vnand_vnand" - [(set (match_operand:VM 3 "altivec_register_operand" "=&0,&1,&v,v") -(ior:VM (not:VM (ior:VM (not:VM (match_operand:VM 0 "altivec_register_operand" "v,v,v,v")) - (not:VM (match_operand:VM 1 "altivec_register_operand" "v,v,v,v" - (not:VM (match_operand:VM 2 "altivec_register_operand" "v,v,v,v" - (clobber (match_scratch:VM 4 "=X,X,X,&v"))] + [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v") +(ior:VM (not:VM (ior:VM (not:VM (match_operand:VM 0 "vector_fusion_operand" "v,v,v,wa,v")) + (not:VM (match_operand:VM 1 "vector_fusion_operand" "v,v,v,wa,v" + (not:VM (match_operand:VM 2 "vector_fusion_operand" "v,v,v,wa,v" + (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))] "(TARGET_P10_FUSION)" "@ vnand %3,%1,%0\;vnand %3,%3,%2 vnand %3,%1,%0\;vnand %3,%3,%2 vnand %3,%1,%0\;vnand %3,%3,%2 + xxeval %x3,%x2,%x1,%x0,241 vnand %4,%1,%0\;vnand %3,%4,%2" [(set_attr "type" "fused_vector") (set_attr "cost" "6") - (set_attr "length" "8")]) + (set_attr "length" "8") + (set_attr "prefixed" "*,*,*,yes,*") + (set_attr "isa" "*,*,*,xxeval,*")]) ;; logical-logical fusion pattern generated by gen_logical_addsubf ;; vector vnor -> vnand diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl index 54699d199fc5..728a447c65a9 100755 --- a/gcc/config/rs6000/genfusion.pl +++ b/gcc/config/rs6000/genfusion.pl @@ -251,6 +251,7 @@ sub gen_logical_addsubf "vand_vnor" => 224, "vnand_vxor" => 225, "vnand_vor" => 239, + "vnand_vnand" => 241, ); KIND: foreach $kind ('scalar','vector') {
[gcc(refs/users/meissner/heads/work214-sha)] PR target/117251: Improve vector orc to vector xor fusion
https://gcc.gnu.org/g:b0ea39c1cc4b31a7e093836d7e2f7177a840ab51 commit b0ea39c1cc4b31a7e093836d7e2f7177a840ab51 Author: Michael Meissner Date: Wed Jul 9 01:50:20 2025 -0400 PR target/117251: Improve vector orc to vector xor fusion See the following post for a complete explanation of what the patches for PR target/117251: * https://gcc.gnu.org/pipermail/gcc-patches/2025-June/686474.html This is patch #30 of 45 to generate the 'XXEVAL' instruction on power10 and power11 instead of using the Altivec 'VORC' instruction feeding into 'VXOR'. The 'XXEVAL' instruction can use all 64 vector registers, instead of the 32 registers that traditional Altivec vector instructions use. By allowing all of the vector registers to be used, it reduces the amount of spilling that a large benchmark generated. Currently the following code: vector int a, b, c, d; a = (c | ~ d) ^ b; Generates: vorc t,c,d vxor a,t,b Now in addition with this patch, if the arguments or result is allocated to a traditional FPR register, the GCC compiler will now generate the following code instead of adding vector move instructions: xxeval a,b,c,180 Since fusion using 2 Altivec instructions is slightly faster than using the 'XXEVAL' instruction we prefer to generate the Altivec instructions if we can. In addition, because 'XXEVAL' is a prefixed instruction, it possibly might generate an extra NOP instruction to align the 'XXEVAL' instruction. I have tested these patches on both big endian and little endian PowerPC servers, with no regressions. Can I check these patchs into the trunk? 2025-07-09 Michael Meissner gcc/ PR target/117251 * config/rs6000/fusion.md: Regenerate. * config/rs6000/genfusion.pl (gen_logical_addsubf): Add support to generate vector orc => xor fusion if XXEVAL is supported. Diff: --- gcc/config/rs6000/fusion.md| 15 +-- gcc/config/rs6000/genfusion.pl | 1 + 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md index cb1ad8b4c0cc..3d7e6502b027 100644 --- a/gcc/config/rs6000/fusion.md +++ b/gcc/config/rs6000/fusion.md @@ -3071,20 +3071,23 @@ ;; logical-logical fusion pattern generated by gen_logical_addsubf ;; vector vorc -> vxor (define_insn "*fuse_vorc_vxor" - [(set (match_operand:VM 3 "altivec_register_operand" "=&0,&1,&v,v") -(xor:VM (ior:VM (not:VM (match_operand:VM 0 "altivec_register_operand" "v,v,v,v")) - (match_operand:VM 1 "altivec_register_operand" "v,v,v,v")) - (match_operand:VM 2 "altivec_register_operand" "v,v,v,v"))) - (clobber (match_scratch:VM 4 "=X,X,X,&v"))] + [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v") +(xor:VM (ior:VM (not:VM (match_operand:VM 0 "vector_fusion_operand" "v,v,v,wa,v")) + (match_operand:VM 1 "vector_fusion_operand" "v,v,v,wa,v")) + (match_operand:VM 2 "vector_fusion_operand" "v,v,v,wa,v"))) + (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))] "(TARGET_P10_FUSION)" "@ vorc %3,%1,%0\;vxor %3,%3,%2 vorc %3,%1,%0\;vxor %3,%3,%2 vorc %3,%1,%0\;vxor %3,%3,%2 + xxeval %x3,%x2,%x1,%x0,180 vorc %4,%1,%0\;vxor %3,%4,%2" [(set_attr "type" "fused_vector") (set_attr "cost" "6") - (set_attr "length" "8")]) + (set_attr "length" "8") + (set_attr "prefixed" "*,*,*,yes,*") + (set_attr "isa" "*,*,*,xxeval,*")]) ;; logical-logical fusion pattern generated by gen_logical_addsubf ;; vector vxor -> vxor diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl index 9400aed267a6..15f931baad33 100755 --- a/gcc/config/rs6000/genfusion.pl +++ b/gcc/config/rs6000/genfusion.pl @@ -244,6 +244,7 @@ sub gen_logical_addsubf "vxor_vnor" => 144, "veqv_vxor" => 150, "veqv_vor"=> 159, + "vorc_vxor" => 180, ); KIND: foreach $kind ('scalar','vector') {
[gcc(refs/users/meissner/heads/work214-sha)] PR target/117251: Improve vector nor to vector or fusion
https://gcc.gnu.org/g:17962cd3612d48d390869c72e8db1ad907a032d0 commit 17962cd3612d48d390869c72e8db1ad907a032d0 Author: Michael Meissner Date: Wed Jul 9 01:48:00 2025 -0400 PR target/117251: Improve vector nor to vector or fusion See the following post for a complete explanation of what the patches for PR target/117251: * https://gcc.gnu.org/pipermail/gcc-patches/2025-June/686474.html This is patch #26 of 45 to generate the 'XXEVAL' instruction on power10 and power11 instead of using the Altivec 'VNOR' instruction feeding into 'VOR'. The 'XXEVAL' instruction can use all 64 vector registers, instead of the 32 registers that traditional Altivec vector instructions use. By allowing all of the vector registers to be used, it reduces the amount of spilling that a large benchmark generated. Currently the following code: vector int a, b, c, d; a = (~ (c | d)) | b; Generates: vnor t,c,d vora,t,b Now in addition with this patch, if the arguments or result is allocated to a traditional FPR register, the GCC compiler will now generate the following code instead of adding vector move instructions: xxeval a,b,c,143 Since fusion using 2 Altivec instructions is slightly faster than using the 'XXEVAL' instruction we prefer to generate the Altivec instructions if we can. In addition, because 'XXEVAL' is a prefixed instruction, it possibly might generate an extra NOP instruction to align the 'XXEVAL' instruction. I have tested these patches on both big endian and little endian PowerPC servers, with no regressions. Can I check these patchs into the trunk? 2025-07-09 Michael Meissner gcc/ PR target/117251 * config/rs6000/fusion.md: Regenerate. * config/rs6000/genfusion.pl (gen_logical_addsubf): Add support to generate vector nor => or fusion if XXEVAL is supported. Diff: --- gcc/config/rs6000/fusion.md| 15 +-- gcc/config/rs6000/genfusion.pl | 1 + 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md index 1f1756dbe63e..66d98f4537e1 100644 --- a/gcc/config/rs6000/fusion.md +++ b/gcc/config/rs6000/fusion.md @@ -2714,20 +2714,23 @@ ;; logical-logical fusion pattern generated by gen_logical_addsubf ;; vector vnor -> vor (define_insn "*fuse_vnor_vor" - [(set (match_operand:VM 3 "altivec_register_operand" "=&0,&1,&v,v") -(ior:VM (and:VM (not:VM (match_operand:VM 0 "altivec_register_operand" "v,v,v,v")) - (not:VM (match_operand:VM 1 "altivec_register_operand" "v,v,v,v"))) - (match_operand:VM 2 "altivec_register_operand" "v,v,v,v"))) - (clobber (match_scratch:VM 4 "=X,X,X,&v"))] + [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v") +(ior:VM (and:VM (not:VM (match_operand:VM 0 "vector_fusion_operand" "v,v,v,wa,v")) + (not:VM (match_operand:VM 1 "vector_fusion_operand" "v,v,v,wa,v"))) + (match_operand:VM 2 "vector_fusion_operand" "v,v,v,wa,v"))) + (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))] "(TARGET_P10_FUSION)" "@ vnor %3,%1,%0\;vor %3,%3,%2 vnor %3,%1,%0\;vor %3,%3,%2 vnor %3,%1,%0\;vor %3,%3,%2 + xxeval %x3,%x2,%x1,%x0,143 vnor %4,%1,%0\;vor %3,%4,%2" [(set_attr "type" "fused_vector") (set_attr "cost" "6") - (set_attr "length" "8")]) + (set_attr "length" "8") + (set_attr "prefixed" "*,*,*,yes,*") + (set_attr "isa" "*,*,*,xxeval,*")]) ;; logical-logical fusion pattern generated by gen_logical_addsubf ;; vector vor -> vor diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl index 0fea2d6d8482..98b56b788f03 100755 --- a/gcc/config/rs6000/genfusion.pl +++ b/gcc/config/rs6000/genfusion.pl @@ -240,6 +240,7 @@ sub gen_logical_addsubf "vor_vor" => 127, "vor_vnor"=> 128, "vnor_vxor" => 135, + "vnor_vor"=> 143, ); KIND: foreach $kind ('scalar','vector') {
[gcc(refs/users/meissner/heads/work214-sha)] PR target/117251: Improve vector orc to vector or fusion
https://gcc.gnu.org/g:2d67f441ab51f9656529ebbce0765685a09d85df commit 2d67f441ab51f9656529ebbce0765685a09d85df Author: Michael Meissner Date: Wed Jul 9 01:50:40 2025 -0400 PR target/117251: Improve vector orc to vector or fusion See the following post for a complete explanation of what the patches for PR target/117251: * https://gcc.gnu.org/pipermail/gcc-patches/2025-June/686474.html This is patch #31 of 45 to generate the 'XXEVAL' instruction on power10 and power11 instead of using the Altivec 'VORC' instruction feeding into 'VOR'. The 'XXEVAL' instruction can use all 64 vector registers, instead of the 32 registers that traditional Altivec vector instructions use. By allowing all of the vector registers to be used, it reduces the amount of spilling that a large benchmark generated. Currently the following code: vector int a, b, c, d; a = (c | ~ d) | b; Generates: vorc t,c,d vora,t,b Now in addition with this patch, if the arguments or result is allocated to a traditional FPR register, the GCC compiler will now generate the following code instead of adding vector move instructions: xxeval a,b,c,191 Since fusion using 2 Altivec instructions is slightly faster than using the 'XXEVAL' instruction we prefer to generate the Altivec instructions if we can. In addition, because 'XXEVAL' is a prefixed instruction, it possibly might generate an extra NOP instruction to align the 'XXEVAL' instruction. I have tested these patches on both big endian and little endian PowerPC servers, with no regressions. Can I check these patchs into the trunk? 2025-07-09 Michael Meissner gcc/ PR target/117251 * config/rs6000/fusion.md: Regenerate. * config/rs6000/genfusion.pl (gen_logical_addsubf): Add support to generate vector orc => or fusion if XXEVAL is supported. Diff: --- gcc/config/rs6000/fusion.md| 15 +-- gcc/config/rs6000/genfusion.pl | 1 + 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md index 3d7e6502b027..f6dc26e9c1f2 100644 --- a/gcc/config/rs6000/fusion.md +++ b/gcc/config/rs6000/fusion.md @@ -2762,20 +2762,23 @@ ;; logical-logical fusion pattern generated by gen_logical_addsubf ;; vector vorc -> vor (define_insn "*fuse_vorc_vor" - [(set (match_operand:VM 3 "altivec_register_operand" "=&0,&1,&v,v") -(ior:VM (ior:VM (not:VM (match_operand:VM 0 "altivec_register_operand" "v,v,v,v")) - (match_operand:VM 1 "altivec_register_operand" "v,v,v,v")) - (match_operand:VM 2 "altivec_register_operand" "v,v,v,v"))) - (clobber (match_scratch:VM 4 "=X,X,X,&v"))] + [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v") +(ior:VM (ior:VM (not:VM (match_operand:VM 0 "vector_fusion_operand" "v,v,v,wa,v")) + (match_operand:VM 1 "vector_fusion_operand" "v,v,v,wa,v")) + (match_operand:VM 2 "vector_fusion_operand" "v,v,v,wa,v"))) + (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))] "(TARGET_P10_FUSION)" "@ vorc %3,%1,%0\;vor %3,%3,%2 vorc %3,%1,%0\;vor %3,%3,%2 vorc %3,%1,%0\;vor %3,%3,%2 + xxeval %x3,%x2,%x1,%x0,191 vorc %4,%1,%0\;vor %3,%4,%2" [(set_attr "type" "fused_vector") (set_attr "cost" "6") - (set_attr "length" "8")]) + (set_attr "length" "8") + (set_attr "prefixed" "*,*,*,yes,*") + (set_attr "isa" "*,*,*,xxeval,*")]) ;; logical-logical fusion pattern generated by gen_logical_addsubf ;; vector vxor -> vor diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl index 15f931baad33..62f2b9e36d89 100755 --- a/gcc/config/rs6000/genfusion.pl +++ b/gcc/config/rs6000/genfusion.pl @@ -245,6 +245,7 @@ sub gen_logical_addsubf "veqv_vxor" => 150, "veqv_vor"=> 159, "vorc_vxor" => 180, + "vorc_vor"=> 191, ); KIND: foreach $kind ('scalar','vector') {
[gcc(refs/users/meissner/heads/work214-sha)] PR target/117251: Improve vector andc to vector and fusion
https://gcc.gnu.org/g:a2db53e127608d6861d6e5ec15ca2004d47c5df1 commit a2db53e127608d6861d6e5ec15ca2004d47c5df1 Author: Michael Meissner Date: Wed Jul 9 01:34:45 2025 -0400 PR target/117251: Improve vector andc to vector and fusion See the following post for a complete explanation of what the patches for PR target/117251: * https://gcc.gnu.org/pipermail/gcc-patches/2025-June/686474.html This is patch #2 of 45 to generate the 'XXEVAL' instruction on power10 and power11 instead of using the Altivec 'VANDC' instruction feeding into 'VAND'. The 'XXEVAL' instruction can use all 64 vector registers, instead of the 32 registers that traditional Altivec vector instructions use. By allowing all of the vector registers to be used, it reduces the amount of spilling that a large benchmark generated. Currently the following code: vector int a, b, c, d; a = (c & ~ d) & b; Generates: vandc t,c,d vand a,t,b Now in addition with this patch, if the arguments or result is allocated to a traditional FPR register, the GCC compiler will now generate the following code instead of adding vector move instructions: xxeval a,b,c,2 Since fusion using 2 Altivec instructions is slightly faster than using the 'XXEVAL' instruction we prefer to generate the Altivec instructions if we can. In addition, because 'XXEVAL' is a prefixed instruction, it possibly might generate an extra NOP instruction to align the 'XXEVAL' instruction. I have tested these patches on both big endian and little endian PowerPC servers, with no regressions. Can I check these patchs into the trunk? 2025-07-09 Michael Meissner gcc/ PR target/117251 * config/rs6000/fusion.md: Regenerate. * config/rs6000/genfusion.pl (gen_logical_addsubf): Add support to generate vector/vector andc/and fusion if XXEVAL is supported. Diff: --- gcc/config/rs6000/fusion.md| 15 +-- gcc/config/rs6000/genfusion.pl | 1 + 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md index d24837d68d83..b9590b6d1104 100644 --- a/gcc/config/rs6000/fusion.md +++ b/gcc/config/rs6000/fusion.md @@ -1892,20 +1892,23 @@ ;; logical-logical fusion pattern generated by gen_logical_addsubf ;; vector vandc -> vand (define_insn "*fuse_vandc_vand" - [(set (match_operand:VM 3 "altivec_register_operand" "=&0,&1,&v,v") -(and:VM (and:VM (not:VM (match_operand:VM 0 "altivec_register_operand" "v,v,v,v")) - (match_operand:VM 1 "altivec_register_operand" "v,v,v,v")) - (match_operand:VM 2 "altivec_register_operand" "v,v,v,v"))) - (clobber (match_scratch:VM 4 "=X,X,X,&v"))] + [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v") +(and:VM (and:VM (not:VM (match_operand:VM 0 "vector_fusion_operand" "v,v,v,wa,v")) + (match_operand:VM 1 "vector_fusion_operand" "v,v,v,wa,v")) + (match_operand:VM 2 "vector_fusion_operand" "v,v,v,wa,v"))) + (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))] "(TARGET_P10_FUSION)" "@ vandc %3,%1,%0\;vand %3,%3,%2 vandc %3,%1,%0\;vand %3,%3,%2 vandc %3,%1,%0\;vand %3,%3,%2 + xxeval %x3,%x2,%x1,%x0,2 vandc %4,%1,%0\;vand %3,%4,%2" [(set_attr "type" "fused_vector") (set_attr "cost" "6") - (set_attr "length" "8")]) + (set_attr "length" "8") + (set_attr "prefixed" "*,*,*,yes,*") + (set_attr "isa" "*,*,*,xxeval,*")]) ;; logical-logical fusion pattern generated by gen_logical_addsubf ;; vector veqv -> vand diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl index 351a4d914a4a..23adf98c4056 100755 --- a/gcc/config/rs6000/genfusion.pl +++ b/gcc/config/rs6000/genfusion.pl @@ -216,6 +216,7 @@ sub gen_logical_addsubf my %xxeval_fusions = ( "vand_vand" => 1, + "vandc_vand" => 2, ); KIND: foreach $kind ('scalar','vector') {
[gcc(refs/users/meissner/heads/work214-sha)] PR target/117251: Add tests
https://gcc.gnu.org/g:30dd10b26786dce5e5eb27940ee5290ecc7378df commit 30dd10b26786dce5e5eb27940ee5290ecc7378df Author: Michael Meissner Date: Wed Jul 9 01:56:14 2025 -0400 PR target/117251: Add tests This is patch #45 of 45 to generate the 'XXEVAL' instruction on power10 and power11 instead of using the Altivec 'VAND' instruction feeding into 'VNAND'. The 'XXEVAL' instruction can use all 64 vector registers, instead of the 32 registers that traditional Altivec vector instructions use. By allowing all of the vector registers to be used, it reduces the amount of spilling that a large benchmark generated. This patch adds the tests for generating 'XXEVAL' to the testsuite. I have tested these patches on both big endian and little endian PowerPC servers, with no regressions. Can I check these patchs into the trunk? 2025-07-09 Michael Meissner gcc/testsuite/ PR target/117251 * gcc.target/powerpc/p10-vector-fused-1.c: New test. * gcc.target/powerpc/p10-vector-fused-2.c: Likewise. Diff: --- .../gcc.target/powerpc/p10-vector-fused-1.c| 409 + .../gcc.target/powerpc/p10-vector-fused-2.c| 936 + 2 files changed, 1345 insertions(+) diff --git a/gcc/testsuite/gcc.target/powerpc/p10-vector-fused-1.c b/gcc/testsuite/gcc.target/powerpc/p10-vector-fused-1.c new file mode 100644 index ..28e0874b3454 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/p10-vector-fused-1.c @@ -0,0 +1,409 @@ +/* { dg-do run } */ +/* { dg-require-effective-target power10_hw } */ +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */ + +/* Generate and check most of the vector logical instruction combinations that + may or may not generate xxeval to do a fused operation on power10. */ + +#include +#include +#include + +#ifdef DEBUG +#include + +static int errors = 0; +static int tests = 0; +#endif + +typedef vector unsigned intvector_t; +typedef unsigned int scalar_t; + +/* Vector logical functions. */ +static inline vector_t +vector_and (vector_t x, vector_t y) +{ + return x & y; +} + +static inline vector_t +vector_or (vector_t x, vector_t y) +{ + return x | y; +} + +static inline vector_t +vector_xor (vector_t x, vector_t y) +{ + return x ^ y; +} + +static inline vector_t +vector_andc (vector_t x, vector_t y) +{ + return x & ~y; +} + +static inline vector_t +vector_orc (vector_t x, vector_t y) +{ + return x | ~y; +} + +static inline vector_t +vector_nand (vector_t x, vector_t y) +{ + return ~(x & y); +} + +static inline vector_t +vector_nor (vector_t x, vector_t y) +{ + return ~(x | y); +} + +static inline vector_t +vector_eqv (vector_t x, vector_t y) +{ + return ~(x ^ y); +} + +/* Scalar logical functions. */ +static inline scalar_t +scalar_and (scalar_t x, scalar_t y) +{ + return x & y; +} + +static inline scalar_t +scalar_or (scalar_t x, scalar_t y) +{ + return x | y; +} + +static inline scalar_t +scalar_xor (scalar_t x, scalar_t y) +{ + return x ^ y; +} + +static inline scalar_t +scalar_andc (scalar_t x, scalar_t y) +{ + return x & ~y; +} + +static inline scalar_t +scalar_orc (scalar_t x, scalar_t y) +{ + return x | ~y; +} + +static inline scalar_t +scalar_nand (scalar_t x, scalar_t y) +{ + return ~(x & y); +} + +static inline scalar_t +scalar_nor (scalar_t x, scalar_t y) +{ + return ~(x | y); +} + +static inline scalar_t +scalar_eqv (scalar_t x, scalar_t y) +{ + return ~(x ^ y); +} + + +/* + * Generate one function for each combination that we are checking. Do 4 + * operations: + * + * Use FPR regs that should generate either XXEVAL or XXL* insns; + * Use Altivec registers than may generated fused V* insns; + * Use VSX registers, insure fusing it not done via asm; (and) + * Use GPR registers on scalar operations. + */ + +#ifdef DEBUG +#define TRACE(INNER, OUTER)\ + do { \ +tests++; \ +printf ("%s_%s\n", INNER, OUTER); \ +fflush (stdout); \ + } while (0) \ + +#define FAILED(INNER, OUTER) \ + do { \ +errors++; \ +printf ("%s_%s failed\n", INNER, OUTER); \ +fflush (stdout); \ + } while (0) \ + +#else +#define TRACE(INNER, OUTER) +#define FAILED(INNER, OUTER) abort () +#endif + +#define FUSED_FUNC(INNER, OUTER) \ +static void
[gcc(refs/users/meissner/heads/work214-sha)] PR target/117251: Improve vector and to vector xor fusion
https://gcc.gnu.org/g:a90b97fd171aebdc46108bce9d85e5207ddf0334 commit a90b97fd171aebdc46108bce9d85e5207ddf0334 Author: Michael Meissner Date: Wed Jul 9 01:40:26 2025 -0400 PR target/117251: Improve vector and to vector xor fusion See the following post for a complete explanation of what the patches for PR target/117251: * https://gcc.gnu.org/pipermail/gcc-patches/2025-June/686474.html This is patch #11 of 45 to generate the 'XXEVAL' instruction on power10 and power11 instead of using the Altivec 'VAND' instruction feeding into 'VXOR'. The 'XXEVAL' instruction can use all 64 vector registers, instead of the 32 registers that traditional Altivec vector instructions use. By allowing all of the vector registers to be used, it reduces the amount of spilling that a large benchmark generated. Currently the following code: vector int a, b, c, d; a = (c & d) ^ b; Generates: vand t,c,d vxor a,t,b Now in addition with this patch, if the arguments or result is allocated to a traditional FPR register, the GCC compiler will now generate the following code instead of adding vector move instructions: xxeval a,b,c,30 Since fusion using 2 Altivec instructions is slightly faster than using the 'XXEVAL' instruction we prefer to generate the Altivec instructions if we can. In addition, because 'XXEVAL' is a prefixed instruction, it possibly might generate an extra NOP instruction to align the 'XXEVAL' instruction. I have tested these patches on both big endian and little endian PowerPC servers, with no regressions. Can I check these patchs into the trunk? 2025-07-09 Michael Meissner gcc/ PR target/117251 * config/rs6000/fusion.md: Regenerate. * config/rs6000/genfusion.pl (gen_logical_addsubf): Add support to generate vector/vector and/xor fusion if XXEVAL is supported. Diff: --- gcc/config/rs6000/fusion.md| 15 +-- gcc/config/rs6000/genfusion.pl | 1 + 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md index c8a27a9e5471..789a4d592419 100644 --- a/gcc/config/rs6000/fusion.md +++ b/gcc/config/rs6000/fusion.md @@ -2909,20 +2909,23 @@ ;; logical-logical fusion pattern generated by gen_logical_addsubf ;; vector vand -> vxor (define_insn "*fuse_vand_vxor" - [(set (match_operand:VM 3 "altivec_register_operand" "=&0,&1,&v,v") -(xor:VM (and:VM (match_operand:VM 0 "altivec_register_operand" "v,v,v,v") - (match_operand:VM 1 "altivec_register_operand" "v,v,v,v")) - (match_operand:VM 2 "altivec_register_operand" "v,v,v,v"))) - (clobber (match_scratch:VM 4 "=X,X,X,&v"))] + [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v") +(xor:VM (and:VM (match_operand:VM 0 "vector_fusion_operand" "v,v,v,wa,v") + (match_operand:VM 1 "vector_fusion_operand" "v,v,v,wa,v")) + (match_operand:VM 2 "vector_fusion_operand" "v,v,v,wa,v"))) + (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))] "(TARGET_P10_FUSION)" "@ vand %3,%1,%0\;vxor %3,%3,%2 vand %3,%1,%0\;vxor %3,%3,%2 vand %3,%1,%0\;vxor %3,%3,%2 + xxeval %x3,%x2,%x1,%x0,30 vand %4,%1,%0\;vxor %3,%4,%2" [(set_attr "type" "fused_vector") (set_attr "cost" "6") - (set_attr "length" "8")]) + (set_attr "length" "8") + (set_attr "prefixed" "*,*,*,yes,*") + (set_attr "isa" "*,*,*,xxeval,*")]) ;; logical-logical fusion pattern generated by gen_logical_addsubf ;; vector vandc -> vxor diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl index 078bc6ca0dab..e6d44d430b3a 100755 --- a/gcc/config/rs6000/genfusion.pl +++ b/gcc/config/rs6000/genfusion.pl @@ -225,6 +225,7 @@ sub gen_logical_addsubf "vandc_vandc" => 13, "vnand_vand" => 14, "vnand_vnor" => 16, + "vand_vxor" => 30, ); KIND: foreach $kind ('scalar','vector') {
[gcc(refs/users/meissner/heads/work214-sha)] PR target/117251: Improve vector or to vector nand fusion
https://gcc.gnu.org/g:59d1bcfe4713965941e6eec13b540c17c7ff1a14 commit 59d1bcfe4713965941e6eec13b540c17c7ff1a14 Author: Michael Meissner Date: Wed Jul 9 01:54:45 2025 -0400 PR target/117251: Improve vector or to vector nand fusion See the following post for a complete explanation of what the patches for PR target/117251: * https://gcc.gnu.org/pipermail/gcc-patches/2025-June/686474.html This is patch #41 of 45 to generate the 'XXEVAL' instruction on power10 and power11 instead of using the Altivec 'VOR' instruction feeding into 'VNAND'. The 'XXEVAL' instruction can use all 64 vector registers, instead of the 32 registers that traditional Altivec vector instructions use. By allowing all of the vector registers to be used, it reduces the amount of spilling that a large benchmark generated. Currently the following code: vector int a, b, c, d; a = ~ ((c | d) & b); Generates: vort,c,d vnand a,t,b Now in addition with this patch, if the arguments or result is allocated to a traditional FPR register, the GCC compiler will now generate the following code instead of adding vector move instructions: xxeval a,b,c,248 Since fusion using 2 Altivec instructions is slightly faster than using the 'XXEVAL' instruction we prefer to generate the Altivec instructions if we can. In addition, because 'XXEVAL' is a prefixed instruction, it possibly might generate an extra NOP instruction to align the 'XXEVAL' instruction. I have tested these patches on both big endian and little endian PowerPC servers, with no regressions. Can I check these patchs into the trunk? 2025-07-09 Michael Meissner gcc/ PR target/117251 * config/rs6000/fusion.md: Regenerate. * config/rs6000/genfusion.pl (gen_logical_addsubf): Add support to generate vector or => nand fusion if XXEVAL is supported. Diff: --- gcc/config/rs6000/fusion.md| 15 +-- gcc/config/rs6000/genfusion.pl | 1 + 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md index 01b7fda17ecc..39b586918c17 100644 --- a/gcc/config/rs6000/fusion.md +++ b/gcc/config/rs6000/fusion.md @@ -2435,20 +2435,23 @@ ;; logical-logical fusion pattern generated by gen_logical_addsubf ;; vector vor -> vnand (define_insn "*fuse_vor_vnand" - [(set (match_operand:VM 3 "altivec_register_operand" "=&0,&1,&v,v") -(ior:VM (not:VM (ior:VM (match_operand:VM 0 "altivec_register_operand" "v,v,v,v") - (match_operand:VM 1 "altivec_register_operand" "v,v,v,v"))) - (not:VM (match_operand:VM 2 "altivec_register_operand" "v,v,v,v" - (clobber (match_scratch:VM 4 "=X,X,X,&v"))] + [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v") +(ior:VM (not:VM (ior:VM (match_operand:VM 0 "vector_fusion_operand" "v,v,v,wa,v") + (match_operand:VM 1 "vector_fusion_operand" "v,v,v,wa,v"))) + (not:VM (match_operand:VM 2 "vector_fusion_operand" "v,v,v,wa,v" + (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))] "(TARGET_P10_FUSION)" "@ vor %3,%1,%0\;vnand %3,%3,%2 vor %3,%1,%0\;vnand %3,%3,%2 vor %3,%1,%0\;vnand %3,%3,%2 + xxeval %x3,%x2,%x1,%x0,248 vor %4,%1,%0\;vnand %3,%4,%2" [(set_attr "type" "fused_vector") (set_attr "cost" "6") - (set_attr "length" "8")]) + (set_attr "length" "8") + (set_attr "prefixed" "*,*,*,yes,*") + (set_attr "isa" "*,*,*,xxeval,*")]) ;; logical-logical fusion pattern generated by gen_logical_addsubf ;; vector vorc -> vnand diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl index d4965b6df864..86bca81286ca 100755 --- a/gcc/config/rs6000/genfusion.pl +++ b/gcc/config/rs6000/genfusion.pl @@ -255,6 +255,7 @@ sub gen_logical_addsubf "vorc_vnand" => 244, "veqv_vnand" => 246, "vnor_vnand" => 247, + "vor_vnand" => 248, ); KIND: foreach $kind ('scalar','vector') {
[gcc(refs/users/meissner/heads/work214-sha)] PR target/117251: Improve vector and to vector or fusion
https://gcc.gnu.org/g:fcd8a91c46d187b3840c7e4be8e60ee490ccd734 commit fcd8a91c46d187b3840c7e4be8e60ee490ccd734 Author: Michael Meissner Date: Wed Jul 9 01:41:36 2025 -0400 PR target/117251: Improve vector and to vector or fusion See the following post for a complete explanation of what the patches for PR target/117251: * https://gcc.gnu.org/pipermail/gcc-patches/2025-June/686474.html This is patch #12 of 45 to generate the 'XXEVAL' instruction on power10 and power11 instead of using the Altivec 'VAND' instruction feeding into 'VOR'. The 'XXEVAL' instruction can use all 64 vector registers, instead of the 32 registers that traditional Altivec vector instructions use. By allowing all of the vector registers to be used, it reduces the amount of spilling that a large benchmark generated. Currently the following code: vector int a, b, c, d; a = (c & d) | b; Generates: vand t,c,d vora,t,b Now in addition with this patch, if the arguments or result is allocated to a traditional FPR register, the GCC compiler will now generate the following code instead of adding vector move instructions: xxeval a,b,c,31 Since fusion using 2 Altivec instructions is slightly faster than using the 'XXEVAL' instruction we prefer to generate the Altivec instructions if we can. In addition, because 'XXEVAL' is a prefixed instruction, it possibly might generate an extra NOP instruction to align the 'XXEVAL' instruction. I have tested these patches on both big endian and little endian PowerPC servers, with no regressions. Can I check these patchs into the trunk? 2025-07-09 Michael Meissner gcc/ PR target/117251 * config/rs6000/fusion.md: Regenerate. * config/rs6000/genfusion.pl (gen_logical_addsubf): Add support to generate vector and => or fusion if XXEVAL is supported. Diff: --- gcc/config/rs6000/fusion.md| 15 +-- gcc/config/rs6000/genfusion.pl | 1 + 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md index 789a4d592419..fccea39d0aae 100644 --- a/gcc/config/rs6000/fusion.md +++ b/gcc/config/rs6000/fusion.md @@ -2621,20 +2621,23 @@ ;; logical-logical fusion pattern generated by gen_logical_addsubf ;; vector vand -> vor (define_insn "*fuse_vand_vor" - [(set (match_operand:VM 3 "altivec_register_operand" "=&0,&1,&v,v") -(ior:VM (and:VM (match_operand:VM 0 "altivec_register_operand" "v,v,v,v") - (match_operand:VM 1 "altivec_register_operand" "v,v,v,v")) - (match_operand:VM 2 "altivec_register_operand" "v,v,v,v"))) - (clobber (match_scratch:VM 4 "=X,X,X,&v"))] + [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v") +(ior:VM (and:VM (match_operand:VM 0 "vector_fusion_operand" "v,v,v,wa,v") + (match_operand:VM 1 "vector_fusion_operand" "v,v,v,wa,v")) + (match_operand:VM 2 "vector_fusion_operand" "v,v,v,wa,v"))) + (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))] "(TARGET_P10_FUSION)" "@ vand %3,%1,%0\;vor %3,%3,%2 vand %3,%1,%0\;vor %3,%3,%2 vand %3,%1,%0\;vor %3,%3,%2 + xxeval %x3,%x2,%x1,%x0,31 vand %4,%1,%0\;vor %3,%4,%2" [(set_attr "type" "fused_vector") (set_attr "cost" "6") - (set_attr "length" "8")]) + (set_attr "length" "8") + (set_attr "prefixed" "*,*,*,yes,*") + (set_attr "isa" "*,*,*,xxeval,*")]) ;; logical-logical fusion pattern generated by gen_logical_addsubf ;; vector vandc -> vor diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl index e6d44d430b3a..ab714b10f622 100755 --- a/gcc/config/rs6000/genfusion.pl +++ b/gcc/config/rs6000/genfusion.pl @@ -226,6 +226,7 @@ sub gen_logical_addsubf "vnand_vand" => 14, "vnand_vnor" => 16, "vand_vxor" => 30, + "vand_vor"=> 31, ); KIND: foreach $kind ('scalar','vector') {
[gcc(refs/users/meissner/heads/work214-sha)] PR target/117251: Improve vector or to vector and fusion
https://gcc.gnu.org/g:88b69379719059f91a37b50e55fd7f5f7560 commit 88b69379719059f91a37b50e55fd7f5f7560 Author: Michael Meissner Date: Wed Jul 9 01:36:26 2025 -0400 PR target/117251: Improve vector or to vector and fusion See the following post for a complete explanation of what the patches for PR target/117251: * https://gcc.gnu.org/pipermail/gcc-patches/2025-June/686474.html This is patch #4 of 45 to generate the 'XXEVAL' instruction on power10 and power11 instead of using the Altivec 'VOR' instruction feeding into 'VAND'. The 'XXEVAL' instruction can use all 64 vector registers, instead of the 32 registers that traditional Altivec vector instructions use. By allowing all of the vector registers to be used, it reduces the amount of spilling that a large benchmark generated. Currently the following code: vector int a, b, c, d; a = (c | d) & b; Generates: vort,c,d vand a,t,b Now in addition with this patch, if the arguments or result is allocated to a traditional FPR register, the GCC compiler will now generate the following code instead of adding vector move instructions: xxeval a,b,c,7 Since fusion using 2 Altivec instructions is slightly faster than using the 'XXEVAL' instruction we prefer to generate the Altivec instructions if we can. In addition, because 'XXEVAL' is a prefixed instruction, it possibly might generate an extra NOP instruction to align the 'XXEVAL' instruction. I have tested these patches on both big endian and little endian PowerPC servers, with no regressions. Can I check these patchs into the trunk? 2025-07-09 Michael Meissner gcc/ PR target/117251 * config/rs6000/fusion.md: Regenerate. * config/rs6000/genfusion.pl (gen_logical_addsubf): Add support to generate vector/vector or/and fusion if XXEVAL is supported. Diff: --- gcc/config/rs6000/fusion.md| 15 +-- gcc/config/rs6000/genfusion.pl | 1 + 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md index 6375cd3a8970..161419b7f586 100644 --- a/gcc/config/rs6000/fusion.md +++ b/gcc/config/rs6000/fusion.md @@ -1967,20 +1967,23 @@ ;; logical-logical fusion pattern generated by gen_logical_addsubf ;; vector vor -> vand (define_insn "*fuse_vor_vand" - [(set (match_operand:VM 3 "altivec_register_operand" "=&0,&1,&v,v") -(and:VM (ior:VM (match_operand:VM 0 "altivec_register_operand" "v,v,v,v") - (match_operand:VM 1 "altivec_register_operand" "v,v,v,v")) - (match_operand:VM 2 "altivec_register_operand" "v,v,v,v"))) - (clobber (match_scratch:VM 4 "=X,X,X,&v"))] + [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v") +(and:VM (ior:VM (match_operand:VM 0 "vector_fusion_operand" "v,v,v,wa,v") + (match_operand:VM 1 "vector_fusion_operand" "v,v,v,wa,v")) + (match_operand:VM 2 "vector_fusion_operand" "v,v,v,wa,v"))) + (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))] "(TARGET_P10_FUSION)" "@ vor %3,%1,%0\;vand %3,%3,%2 vor %3,%1,%0\;vand %3,%3,%2 vor %3,%1,%0\;vand %3,%3,%2 + xxeval %x3,%x2,%x1,%x0,7 vor %4,%1,%0\;vand %3,%4,%2" [(set_attr "type" "fused_vector") (set_attr "cost" "6") - (set_attr "length" "8")]) + (set_attr "length" "8") + (set_attr "prefixed" "*,*,*,yes,*") + (set_attr "isa" "*,*,*,xxeval,*")]) ;; logical-logical fusion pattern generated by gen_logical_addsubf ;; vector vorc -> vand diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl index 2c631b944587..9d3a01a4704a 100755 --- a/gcc/config/rs6000/genfusion.pl +++ b/gcc/config/rs6000/genfusion.pl @@ -218,6 +218,7 @@ sub gen_logical_addsubf "vand_vand" => 1, "vandc_vand" => 2, "vxor_vand" => 6, + "vor_vand"=> 7, ); KIND: foreach $kind ('scalar','vector') {
[gcc(refs/users/meissner/heads/work214-sha)] PR target/117251: Improve vector nor to vector xor fusion
https://gcc.gnu.org/g:92cead1c94acdcd1eaeafa2406a83dd79b01dd41 commit 92cead1c94acdcd1eaeafa2406a83dd79b01dd41 Author: Michael Meissner Date: Wed Jul 9 01:47:25 2025 -0400 PR target/117251: Improve vector nor to vector xor fusion See the following post for a complete explanation of what the patches for PR target/117251: * https://gcc.gnu.org/pipermail/gcc-patches/2025-June/686474.html This is patch #25 of 45 to generate the 'XXEVAL' instruction on power10 and power11 instead of using the Altivec 'VNOR' instruction feeding into 'VXOR'. The 'XXEVAL' instruction can use all 64 vector registers, instead of the 32 registers that traditional Altivec vector instructions use. By allowing all of the vector registers to be used, it reduces the amount of spilling that a large benchmark generated. Currently the following code: vector int a, b, c, d; a = (~ (c | d)) ^ b; Generates: vnor t,c,d vxor a,t,b Now in addition with this patch, if the arguments or result is allocated to a traditional FPR register, the GCC compiler will now generate the following code instead of adding vector move instructions: xxeval a,b,c,135 Since fusion using 2 Altivec instructions is slightly faster than using the 'XXEVAL' instruction we prefer to generate the Altivec instructions if we can. In addition, because 'XXEVAL' is a prefixed instruction, it possibly might generate an extra NOP instruction to align the 'XXEVAL' instruction. I have tested these patches on both big endian and little endian PowerPC servers, with no regressions. Can I check these patchs into the trunk? 2025-07-09 Michael Meissner gcc/ PR target/117251 * config/rs6000/fusion.md: Regenerate. * config/rs6000/genfusion.pl (gen_logical_addsubf): Add support to generate vector nor => xor fusion if XXEVAL is supported. Diff: --- gcc/config/rs6000/fusion.md| 15 +-- gcc/config/rs6000/genfusion.pl | 1 + 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md index c55e9d4abd67..1f1756dbe63e 100644 --- a/gcc/config/rs6000/fusion.md +++ b/gcc/config/rs6000/fusion.md @@ -3017,20 +3017,23 @@ ;; logical-logical fusion pattern generated by gen_logical_addsubf ;; vector vnor -> vxor (define_insn "*fuse_vnor_vxor" - [(set (match_operand:VM 3 "altivec_register_operand" "=&0,&1,&v,v") -(xor:VM (and:VM (not:VM (match_operand:VM 0 "altivec_register_operand" "v,v,v,v")) - (not:VM (match_operand:VM 1 "altivec_register_operand" "v,v,v,v"))) - (match_operand:VM 2 "altivec_register_operand" "v,v,v,v"))) - (clobber (match_scratch:VM 4 "=X,X,X,&v"))] + [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v") +(xor:VM (and:VM (not:VM (match_operand:VM 0 "vector_fusion_operand" "v,v,v,wa,v")) + (not:VM (match_operand:VM 1 "vector_fusion_operand" "v,v,v,wa,v"))) + (match_operand:VM 2 "vector_fusion_operand" "v,v,v,wa,v"))) + (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))] "(TARGET_P10_FUSION)" "@ vnor %3,%1,%0\;vxor %3,%3,%2 vnor %3,%1,%0\;vxor %3,%3,%2 vnor %3,%1,%0\;vxor %3,%3,%2 + xxeval %x3,%x2,%x1,%x0,135 vnor %4,%1,%0\;vxor %3,%4,%2" [(set_attr "type" "fused_vector") (set_attr "cost" "6") - (set_attr "length" "8")]) + (set_attr "length" "8") + (set_attr "prefixed" "*,*,*,yes,*") + (set_attr "isa" "*,*,*,xxeval,*")]) ;; logical-logical fusion pattern generated by gen_logical_addsubf ;; vector vor -> vxor diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl index 58f900640bef..0fea2d6d8482 100755 --- a/gcc/config/rs6000/genfusion.pl +++ b/gcc/config/rs6000/genfusion.pl @@ -239,6 +239,7 @@ sub gen_logical_addsubf "vor_vxor"=> 120, "vor_vor" => 127, "vor_vnor"=> 128, + "vnor_vxor" => 135, ); KIND: foreach $kind ('scalar','vector') {
[gcc(refs/users/meissner/heads/work214-sha)] PR target/117251: Improve vector orc to vector orc fusion
https://gcc.gnu.org/g:cb933a24f9dc77bb04ef74520ef007429e804e54 commit cb933a24f9dc77bb04ef74520ef007429e804e54 Author: Michael Meissner Date: Wed Jul 9 01:43:44 2025 -0400 PR target/117251: Improve vector orc to vector orc fusion See the following post for a complete explanation of what the patches for PR target/117251: * https://gcc.gnu.org/pipermail/gcc-patches/2025-June/686474.html This is patch #17 of 45 to generate the 'XXEVAL' instruction on power10 and power11 instead of using the Altivec 'VORC' instruction feeding into 'VORC'. The 'XXEVAL' instruction can use all 64 vector registers, instead of the 32 registers that traditional Altivec vector instructions use. By allowing all of the vector registers to be used, it reduces the amount of spilling that a large benchmark generated. Currently the following code: vector int a, b, c, d; a = (c | ~ d) | ~ b; Generates: vorc t,c,d vorc a,t,b Now in addition with this patch, if the arguments or result is allocated to a traditional FPR register, the GCC compiler will now generate the following code instead of adding vector move instructions: xxeval a,b,c,79 Since fusion using 2 Altivec instructions is slightly faster than using the 'XXEVAL' instruction we prefer to generate the Altivec instructions if we can. In addition, because 'XXEVAL' is a prefixed instruction, it possibly might generate an extra NOP instruction to align the 'XXEVAL' instruction. I have tested these patches on both big endian and little endian PowerPC servers, with no regressions. Can I check these patchs into the trunk? 2025-07-09 Michael Meissner gcc/ PR target/117251 * config/rs6000/fusion.md: Regenerate. * config/rs6000/genfusion.pl (gen_logical_addsubf): Add support to generate vector orc => orc fusion if XXEVAL is supported. Diff: --- gcc/config/rs6000/fusion.md| 15 +-- gcc/config/rs6000/genfusion.pl | 1 + 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md index f84d0aee5d79..486aa813575d 100644 --- a/gcc/config/rs6000/fusion.md +++ b/gcc/config/rs6000/fusion.md @@ -2885,20 +2885,23 @@ ;; logical-logical fusion pattern generated by gen_logical_addsubf ;; vector vorc -> vorc (define_insn "*fuse_vorc_vorc" - [(set (match_operand:VM 3 "altivec_register_operand" "=&0,&1,&v,v") -(ior:VM (ior:VM (not:VM (match_operand:VM 0 "altivec_register_operand" "v,v,v,v")) - (match_operand:VM 1 "altivec_register_operand" "v,v,v,v")) - (not:VM (match_operand:VM 2 "altivec_register_operand" "v,v,v,v" - (clobber (match_scratch:VM 4 "=X,X,X,&v"))] + [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v") +(ior:VM (ior:VM (not:VM (match_operand:VM 0 "vector_fusion_operand" "v,v,v,wa,v")) + (match_operand:VM 1 "vector_fusion_operand" "v,v,v,wa,v")) + (not:VM (match_operand:VM 2 "vector_fusion_operand" "v,v,v,wa,v" + (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))] "(TARGET_P10_FUSION)" "@ vorc %3,%1,%0\;vorc %3,%3,%2 vorc %3,%1,%0\;vorc %3,%3,%2 vorc %3,%1,%0\;vorc %3,%3,%2 + xxeval %x3,%x2,%x1,%x0,79 vorc %4,%1,%0\;vorc %3,%4,%2" [(set_attr "type" "fused_vector") (set_attr "cost" "6") - (set_attr "length" "8")]) + (set_attr "length" "8") + (set_attr "prefixed" "*,*,*,yes,*") + (set_attr "isa" "*,*,*,xxeval,*")]) ;; logical-logical fusion pattern generated by gen_logical_addsubf ;; vector vxor -> vorc diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl index 8ba1aa081f75..8f60fe76c87b 100755 --- a/gcc/config/rs6000/genfusion.pl +++ b/gcc/config/rs6000/genfusion.pl @@ -231,6 +231,7 @@ sub gen_logical_addsubf "vandc_vor" => 47, "vorc_vnor" => 64, "vorc_veqv" => 75, + "vorc_vorc" => 79, ); KIND: foreach $kind ('scalar','vector') {
[gcc(refs/users/meissner/heads/work214-sha)] PR target/117251: Improve vector orc to vector nor fusion
https://gcc.gnu.org/g:1a28902b859b26343f4392a20f6a62d8b05fc3bc commit 1a28902b859b26343f4392a20f6a62d8b05fc3bc Author: Michael Meissner Date: Wed Jul 9 01:42:48 2025 -0400 PR target/117251: Improve vector orc to vector nor fusion See the following post for a complete explanation of what the patches for PR target/117251: * https://gcc.gnu.org/pipermail/gcc-patches/2025-June/686474.html This is patch #15 of 45 to generate the 'XXEVAL' instruction on power10 and power11 instead of using the Altivec 'VORC' instruction feeding into 'VNOR'. The 'XXEVAL' instruction can use all 64 vector registers, instead of the 32 registers that traditional Altivec vector instructions use. By allowing all of the vector registers to be used, it reduces the amount of spilling that a large benchmark generated. Currently the following code: vector int a, b, c, d; a = ~ ((c | ~ d) | b); Generates: vorc t,c,d vnor a,t,b Now in addition with this patch, if the arguments or result is allocated to a traditional FPR register, the GCC compiler will now generate the following code instead of adding vector move instructions: xxeval a,b,c,64 Since fusion using 2 Altivec instructions is slightly faster than using the 'XXEVAL' instruction we prefer to generate the Altivec instructions if we can. In addition, because 'XXEVAL' is a prefixed instruction, it possibly might generate an extra NOP instruction to align the 'XXEVAL' instruction. I have tested these patches on both big endian and little endian PowerPC servers, with no regressions. Can I check these patchs into the trunk? 2025-07-09 Michael Meissner gcc/ PR target/117251 * config/rs6000/fusion.md: Regenerate. * config/rs6000/genfusion.pl (gen_logical_addsubf): Add support to generate vector orc => nor fusion if XXEVAL is supported. Diff: --- gcc/config/rs6000/fusion.md| 15 +-- gcc/config/rs6000/genfusion.pl | 1 + 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md index ed70ac059dfc..f45e65f0217c 100644 --- a/gcc/config/rs6000/fusion.md +++ b/gcc/config/rs6000/fusion.md @@ -2585,20 +2585,23 @@ ;; logical-logical fusion pattern generated by gen_logical_addsubf ;; vector vorc -> vnor (define_insn "*fuse_vorc_vnor" - [(set (match_operand:VM 3 "altivec_register_operand" "=&0,&1,&v,v") -(and:VM (not:VM (ior:VM (not:VM (match_operand:VM 0 "altivec_register_operand" "v,v,v,v")) - (match_operand:VM 1 "altivec_register_operand" "v,v,v,v"))) - (not:VM (match_operand:VM 2 "altivec_register_operand" "v,v,v,v" - (clobber (match_scratch:VM 4 "=X,X,X,&v"))] + [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v") +(and:VM (not:VM (ior:VM (not:VM (match_operand:VM 0 "vector_fusion_operand" "v,v,v,wa,v")) + (match_operand:VM 1 "vector_fusion_operand" "v,v,v,wa,v"))) + (not:VM (match_operand:VM 2 "vector_fusion_operand" "v,v,v,wa,v" + (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))] "(TARGET_P10_FUSION)" "@ vorc %3,%1,%0\;vnor %3,%3,%2 vorc %3,%1,%0\;vnor %3,%3,%2 vorc %3,%1,%0\;vnor %3,%3,%2 + xxeval %x3,%x2,%x1,%x0,64 vorc %4,%1,%0\;vnor %3,%4,%2" [(set_attr "type" "fused_vector") (set_attr "cost" "6") - (set_attr "length" "8")]) + (set_attr "length" "8") + (set_attr "prefixed" "*,*,*,yes,*") + (set_attr "isa" "*,*,*,xxeval,*")]) ;; logical-logical fusion pattern generated by gen_logical_addsubf ;; vector vxor -> vnor diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl index 69fa544f0317..720e8d440c2d 100755 --- a/gcc/config/rs6000/genfusion.pl +++ b/gcc/config/rs6000/genfusion.pl @@ -229,6 +229,7 @@ sub gen_logical_addsubf "vand_vor"=> 31, "vandc_vxor" => 45, "vandc_vor" => 47, + "vorc_vnor" => 64, ); KIND: foreach $kind ('scalar','vector') {
[gcc(refs/users/meissner/heads/work214-sha)] PR target/117251: Improve vector xor to vector xor fusion
https://gcc.gnu.org/g:0e5b5bceaedce5f1b8b8ddf5339c28e07b5a924d commit 0e5b5bceaedce5f1b8b8ddf5339c28e07b5a924d Author: Michael Meissner Date: Wed Jul 9 01:44:47 2025 -0400 PR target/117251: Improve vector xor to vector xor fusion See the following post for a complete explanation of what the patches for PR target/117251: * https://gcc.gnu.org/pipermail/gcc-patches/2025-June/686474.html This is patch #19 of 45 to generate the 'XXEVAL' instruction on power10 and power11 instead of using the Altivec 'VXOR' instruction feeding into 'VXOR'. The 'XXEVAL' instruction can use all 64 vector registers, instead of the 32 registers that traditional Altivec vector instructions use. By allowing all of the vector registers to be used, it reduces the amount of spilling that a large benchmark generated. Currently the following code: vector int a, b, c, d; a = (c ^ d) ^ b; Generates: vxor t,c,d vxor a,t,b Now in addition with this patch, if the arguments or result is allocated to a traditional FPR register, the GCC compiler will now generate the following code instead of adding vector move instructions: xxeval a,b,c,105 Since fusion using 2 Altivec instructions is slightly faster than using the 'XXEVAL' instruction we prefer to generate the Altivec instructions if we can. In addition, because 'XXEVAL' is a prefixed instruction, it possibly might generate an extra NOP instruction to align the 'XXEVAL' instruction. I have tested these patches on both big endian and little endian PowerPC servers, with no regressions. Can I check these patchs into the trunk? 2025-07-09 Michael Meissner gcc/ PR target/117251 * config/rs6000/fusion.md: Regenerate. * config/rs6000/genfusion.pl (gen_logical_addsubf): Add support to generate vector xor => xor fusion if XXEVAL is supported. Diff: --- gcc/config/rs6000/fusion.md| 15 +-- gcc/config/rs6000/genfusion.pl | 1 + 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md index e5099178d63d..a848b21bc3e2 100644 --- a/gcc/config/rs6000/fusion.md +++ b/gcc/config/rs6000/fusion.md @@ -3059,20 +3059,23 @@ ;; logical-logical fusion pattern generated by gen_logical_addsubf ;; vector vxor -> vxor (define_insn "*fuse_vxor_vxor" - [(set (match_operand:VM 3 "altivec_register_operand" "=&0,&1,&v,v") -(xor:VM (xor:VM (match_operand:VM 0 "altivec_register_operand" "v,v,v,v") - (match_operand:VM 1 "altivec_register_operand" "%v,v,v,v")) - (match_operand:VM 2 "altivec_register_operand" "v,v,v,v"))) - (clobber (match_scratch:VM 4 "=X,X,X,&v"))] + [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v") +(xor:VM (xor:VM (match_operand:VM 0 "vector_fusion_operand" "v,v,v,wa,v") + (match_operand:VM 1 "vector_fusion_operand" "%v,v,v,wa,v")) + (match_operand:VM 2 "vector_fusion_operand" "v,v,v,wa,v"))) + (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))] "(TARGET_P10_FUSION)" "@ vxor %3,%1,%0\;vxor %3,%3,%2 vxor %3,%1,%0\;vxor %3,%3,%2 vxor %3,%1,%0\;vxor %3,%3,%2 + xxeval %x3,%x2,%x1,%x0,105 vxor %4,%1,%0\;vxor %3,%4,%2" [(set_attr "type" "fused_vector") (set_attr "cost" "6") - (set_attr "length" "8")]) + (set_attr "length" "8") + (set_attr "prefixed" "*,*,*,yes,*") + (set_attr "isa" "*,*,*,xxeval,*")]) ;; add-add fusion pattern generated by gen_addadd (define_insn "*fuse_add_add" diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl index 79d9eaed7da6..b9ff6c99b95e 100755 --- a/gcc/config/rs6000/genfusion.pl +++ b/gcc/config/rs6000/genfusion.pl @@ -233,6 +233,7 @@ sub gen_logical_addsubf "vorc_veqv" => 75, "vorc_vorc" => 79, "veqv_vnor" => 96, + "vxor_vxor" => 105, ); KIND: foreach $kind ('scalar','vector') {
[gcc(refs/users/meissner/heads/work214-sha)] PR target/117251: Improve vector andc to vector or fusion
https://gcc.gnu.org/g:56fd65b1c98e4c7d7f9e59008db913dbd2843403 commit 56fd65b1c98e4c7d7f9e59008db913dbd2843403 Author: Michael Meissner Date: Wed Jul 9 01:42:25 2025 -0400 PR target/117251: Improve vector andc to vector or fusion See the following post for a complete explanation of what the patches for PR target/117251: * https://gcc.gnu.org/pipermail/gcc-patches/2025-June/686474.html This is patch #14 of 45 to generate the 'XXEVAL' instruction on power10 and power11 instead of using the Altivec 'VANDC' instruction feeding into 'VOR'. The 'XXEVAL' instruction can use all 64 vector registers, instead of the 32 registers that traditional Altivec vector instructions use. By allowing all of the vector registers to be used, it reduces the amount of spilling that a large benchmark generated. Currently the following code: vector int a, b, c, d; a = (c & ~ d) | b; Generates: vandc t,c,d vora,t,b Now in addition with this patch, if the arguments or result is allocated to a traditional FPR register, the GCC compiler will now generate the following code instead of adding vector move instructions: xxeval a,b,c,47 Since fusion using 2 Altivec instructions is slightly faster than using the 'XXEVAL' instruction we prefer to generate the Altivec instructions if we can. In addition, because 'XXEVAL' is a prefixed instruction, it possibly might generate an extra NOP instruction to align the 'XXEVAL' instruction. I have tested these patches on both big endian and little endian PowerPC servers, with no regressions. Can I check these patchs into the trunk? 2025-07-09 Michael Meissner gcc/ PR target/117251 * config/rs6000/fusion.md: Regenerate. * config/rs6000/genfusion.pl (gen_logical_addsubf): Add support to generate vector andc => or fusion if XXEVAL is supported. Diff: --- gcc/config/rs6000/fusion.md| 15 +-- gcc/config/rs6000/genfusion.pl | 1 + 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md index 6e5c88b81b44..ed70ac059dfc 100644 --- a/gcc/config/rs6000/fusion.md +++ b/gcc/config/rs6000/fusion.md @@ -2642,20 +2642,23 @@ ;; logical-logical fusion pattern generated by gen_logical_addsubf ;; vector vandc -> vor (define_insn "*fuse_vandc_vor" - [(set (match_operand:VM 3 "altivec_register_operand" "=&0,&1,&v,v") -(ior:VM (and:VM (not:VM (match_operand:VM 0 "altivec_register_operand" "v,v,v,v")) - (match_operand:VM 1 "altivec_register_operand" "v,v,v,v")) - (match_operand:VM 2 "altivec_register_operand" "v,v,v,v"))) - (clobber (match_scratch:VM 4 "=X,X,X,&v"))] + [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v") +(ior:VM (and:VM (not:VM (match_operand:VM 0 "vector_fusion_operand" "v,v,v,wa,v")) + (match_operand:VM 1 "vector_fusion_operand" "v,v,v,wa,v")) + (match_operand:VM 2 "vector_fusion_operand" "v,v,v,wa,v"))) + (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))] "(TARGET_P10_FUSION)" "@ vandc %3,%1,%0\;vor %3,%3,%2 vandc %3,%1,%0\;vor %3,%3,%2 vandc %3,%1,%0\;vor %3,%3,%2 + xxeval %x3,%x2,%x1,%x0,47 vandc %4,%1,%0\;vor %3,%4,%2" [(set_attr "type" "fused_vector") (set_attr "cost" "6") - (set_attr "length" "8")]) + (set_attr "length" "8") + (set_attr "prefixed" "*,*,*,yes,*") + (set_attr "isa" "*,*,*,xxeval,*")]) ;; logical-logical fusion pattern generated by gen_logical_addsubf ;; vector veqv -> vor diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl index d15208a4ad3e..69fa544f0317 100755 --- a/gcc/config/rs6000/genfusion.pl +++ b/gcc/config/rs6000/genfusion.pl @@ -228,6 +228,7 @@ sub gen_logical_addsubf "vand_vxor" => 30, "vand_vor"=> 31, "vandc_vxor" => 45, + "vandc_vor" => 47, ); KIND: foreach $kind ('scalar','vector') {
[gcc(refs/users/meissner/heads/work214-sha)] PR target/117251: Improve vector eqv to vector nor fusion
https://gcc.gnu.org/g:1f0cc53d3d54202c01f4b69eeb181d7212cd2321 commit 1f0cc53d3d54202c01f4b69eeb181d7212cd2321 Author: Michael Meissner Date: Wed Jul 9 01:44:08 2025 -0400 PR target/117251: Improve vector eqv to vector nor fusion See the following post for a complete explanation of what the patches for PR target/117251: * https://gcc.gnu.org/pipermail/gcc-patches/2025-June/686474.html This is patch #18 of 45 to generate the 'XXEVAL' instruction on power10 and power11 instead of using the Altivec 'VEQV' instruction feeding into 'VNOR'. The 'XXEVAL' instruction can use all 64 vector registers, instead of the 32 registers that traditional Altivec vector instructions use. By allowing all of the vector registers to be used, it reduces the amount of spilling that a large benchmark generated. Currently the following code: vector int a, b, c, d; a = ~ ((~ (c ^ d)) | b); Generates: veqv t,c,d vnor a,t,b Now in addition with this patch, if the arguments or result is allocated to a traditional FPR register, the GCC compiler will now generate the following code instead of adding vector move instructions: xxeval a,b,c,96 Since fusion using 2 Altivec instructions is slightly faster than using the 'XXEVAL' instruction we prefer to generate the Altivec instructions if we can. In addition, because 'XXEVAL' is a prefixed instruction, it possibly might generate an extra NOP instruction to align the 'XXEVAL' instruction. I have tested these patches on both big endian and little endian PowerPC servers, with no regressions. Can I check these patchs into the trunk? 2025-07-09 Michael Meissner gcc/ PR target/117251 * config/rs6000/fusion.md: Regenerate. * config/rs6000/genfusion.pl (gen_logical_addsubf): Add support to generate vector eqv => nor fusion if XXEVAL is supported. Diff: --- gcc/config/rs6000/fusion.md| 15 +-- gcc/config/rs6000/genfusion.pl | 1 + 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md index 486aa813575d..e5099178d63d 100644 --- a/gcc/config/rs6000/fusion.md +++ b/gcc/config/rs6000/fusion.md @@ -2513,20 +2513,23 @@ ;; logical-logical fusion pattern generated by gen_logical_addsubf ;; vector veqv -> vnor (define_insn "*fuse_veqv_vnor" - [(set (match_operand:VM 3 "altivec_register_operand" "=&0,&1,&v,v") -(and:VM (not:VM (not:VM (xor:VM (match_operand:VM 0 "altivec_register_operand" "v,v,v,v") - (match_operand:VM 1 "altivec_register_operand" "v,v,v,v" - (not:VM (match_operand:VM 2 "altivec_register_operand" "v,v,v,v" - (clobber (match_scratch:VM 4 "=X,X,X,&v"))] + [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v") +(and:VM (not:VM (not:VM (xor:VM (match_operand:VM 0 "vector_fusion_operand" "v,v,v,wa,v") + (match_operand:VM 1 "vector_fusion_operand" "v,v,v,wa,v" + (not:VM (match_operand:VM 2 "vector_fusion_operand" "v,v,v,wa,v" + (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))] "(TARGET_P10_FUSION)" "@ veqv %3,%1,%0\;vnor %3,%3,%2 veqv %3,%1,%0\;vnor %3,%3,%2 veqv %3,%1,%0\;vnor %3,%3,%2 + xxeval %x3,%x2,%x1,%x0,96 veqv %4,%1,%0\;vnor %3,%4,%2" [(set_attr "type" "fused_vector") (set_attr "cost" "6") - (set_attr "length" "8")]) + (set_attr "length" "8") + (set_attr "prefixed" "*,*,*,yes,*") + (set_attr "isa" "*,*,*,xxeval,*")]) ;; logical-logical fusion pattern generated by gen_logical_addsubf ;; vector vnand -> vnor diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl index 8f60fe76c87b..79d9eaed7da6 100755 --- a/gcc/config/rs6000/genfusion.pl +++ b/gcc/config/rs6000/genfusion.pl @@ -232,6 +232,7 @@ sub gen_logical_addsubf "vorc_vnor" => 64, "vorc_veqv" => 75, "vorc_vorc" => 79, + "veqv_vnor" => 96, ); KIND: foreach $kind ('scalar','vector') {
[gcc(refs/users/meissner/heads/work214-sha)] PR target/117251: Improve vector xor to vector or fusion
https://gcc.gnu.org/g:8266b1e19b1f7b52482fc92f48c47bb215aff385 commit 8266b1e19b1f7b52482fc92f48c47bb215aff385 Author: Michael Meissner Date: Wed Jul 9 01:45:10 2025 -0400 PR target/117251: Improve vector xor to vector or fusion See the following post for a complete explanation of what the patches for PR target/117251: * https://gcc.gnu.org/pipermail/gcc-patches/2025-June/686474.html This is patch #20 of 45 to generate the 'XXEVAL' instruction on power10 and power11 instead of using the Altivec 'VXOR' instruction feeding into 'VOR'. The 'XXEVAL' instruction can use all 64 vector registers, instead of the 32 registers that traditional Altivec vector instructions use. By allowing all of the vector registers to be used, it reduces the amount of spilling that a large benchmark generated. Currently the following code: vector int a, b, c, d; a = (c ^ d) | b; Generates: vxor t,c,d vora,t,b Now in addition with this patch, if the arguments or result is allocated to a traditional FPR register, the GCC compiler will now generate the following code instead of adding vector move instructions: xxeval a,b,c,111 Since fusion using 2 Altivec instructions is slightly faster than using the 'XXEVAL' instruction we prefer to generate the Altivec instructions if we can. In addition, because 'XXEVAL' is a prefixed instruction, it possibly might generate an extra NOP instruction to align the 'XXEVAL' instruction. I have tested these patches on both big endian and little endian PowerPC servers, with no regressions. Can I check these patchs into the trunk? 2025-07-09 Michael Meissner gcc/ PR target/117251 * config/rs6000/fusion.md: Regenerate. * config/rs6000/genfusion.pl (gen_logical_addsubf): Add support to generate vector xor => or fusion if XXEVAL is supported. Diff: --- gcc/config/rs6000/fusion.md| 15 +-- gcc/config/rs6000/genfusion.pl | 1 + 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md index a848b21bc3e2..1d4b3c970c7f 100644 --- a/gcc/config/rs6000/fusion.md +++ b/gcc/config/rs6000/fusion.md @@ -2762,20 +2762,23 @@ ;; logical-logical fusion pattern generated by gen_logical_addsubf ;; vector vxor -> vor (define_insn "*fuse_vxor_vor" - [(set (match_operand:VM 3 "altivec_register_operand" "=&0,&1,&v,v") -(ior:VM (xor:VM (match_operand:VM 0 "altivec_register_operand" "v,v,v,v") - (match_operand:VM 1 "altivec_register_operand" "v,v,v,v")) - (match_operand:VM 2 "altivec_register_operand" "v,v,v,v"))) - (clobber (match_scratch:VM 4 "=X,X,X,&v"))] + [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v") +(ior:VM (xor:VM (match_operand:VM 0 "vector_fusion_operand" "v,v,v,wa,v") + (match_operand:VM 1 "vector_fusion_operand" "v,v,v,wa,v")) + (match_operand:VM 2 "vector_fusion_operand" "v,v,v,wa,v"))) + (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))] "(TARGET_P10_FUSION)" "@ vxor %3,%1,%0\;vor %3,%3,%2 vxor %3,%1,%0\;vor %3,%3,%2 vxor %3,%1,%0\;vor %3,%3,%2 + xxeval %x3,%x2,%x1,%x0,111 vxor %4,%1,%0\;vor %3,%4,%2" [(set_attr "type" "fused_vector") (set_attr "cost" "6") - (set_attr "length" "8")]) + (set_attr "length" "8") + (set_attr "prefixed" "*,*,*,yes,*") + (set_attr "isa" "*,*,*,xxeval,*")]) ;; logical-logical fusion pattern generated by gen_logical_addsubf ;; vector vand -> vorc diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl index b9ff6c99b95e..4ec38beccb9c 100755 --- a/gcc/config/rs6000/genfusion.pl +++ b/gcc/config/rs6000/genfusion.pl @@ -234,6 +234,7 @@ sub gen_logical_addsubf "vorc_vorc" => 79, "veqv_vnor" => 96, "vxor_vxor" => 105, + "vxor_vor"=> 111, ); KIND: foreach $kind ('scalar','vector') {
[gcc(refs/users/meissner/heads/work214-sha)] PR target/117251: Improve vector andc to vector andc fusion
https://gcc.gnu.org/g:f6a79c0761cc446119e6f72df676fd9141e59f5f commit f6a79c0761cc446119e6f72df676fd9141e59f5f Author: Michael Meissner Date: Wed Jul 9 01:38:56 2025 -0400 PR target/117251: Improve vector andc to vector andc fusion See the following post for a complete explanation of what the patches for PR target/117251: * https://gcc.gnu.org/pipermail/gcc-patches/2025-June/686474.html This is patch #8 of 45 to generate the 'XXEVAL' instruction on power10 and power11 instead of using the Altivec 'VANDC' instruction feeding into 'VANDC'. The 'XXEVAL' instruction can use all 64 vector registers, instead of the 32 registers that traditional Altivec vector instructions use. By allowing all of the vector registers to be used, it reduces the amount of spilling that a large benchmark generated. Currently the following code: vector int a, b, c, d; a = (c & ~ d) & ~ b; Generates: vandc t,c,d vandc a,t,b Now in addition with this patch, if the arguments or result is allocated to a traditional FPR register, the GCC compiler will now generate the following code instead of adding vector move instructions: xxeval a,b,c,13 Since fusion using 2 Altivec instructions is slightly faster than using the 'XXEVAL' instruction we prefer to generate the Altivec instructions if we can. In addition, because 'XXEVAL' is a prefixed instruction, it possibly might generate an extra NOP instruction to align the 'XXEVAL' instruction. I have tested these patches on both big endian and little endian PowerPC servers, with no regressions. Can I check these patchs into the trunk? 2025-07-09 Michael Meissner gcc/ PR target/117251 * config/rs6000/fusion.md: Regenerate. * config/rs6000/genfusion.pl (gen_logical_addsubf): Add support to generate vector/vector andc/andc fusion if XXEVAL is supported. Diff: --- gcc/config/rs6000/fusion.md| 15 +-- gcc/config/rs6000/genfusion.pl | 1 + 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md index e27f05f85f12..810d97963fb9 100644 --- a/gcc/config/rs6000/fusion.md +++ b/gcc/config/rs6000/fusion.md @@ -2054,20 +2054,23 @@ ;; logical-logical fusion pattern generated by gen_logical_addsubf ;; vector vandc -> vandc (define_insn "*fuse_vandc_vandc" - [(set (match_operand:VM 3 "altivec_register_operand" "=&0,&1,&v,v") -(and:VM (and:VM (not:VM (match_operand:VM 0 "altivec_register_operand" "v,v,v,v")) - (match_operand:VM 1 "altivec_register_operand" "v,v,v,v")) - (not:VM (match_operand:VM 2 "altivec_register_operand" "v,v,v,v" - (clobber (match_scratch:VM 4 "=X,X,X,&v"))] + [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v") +(and:VM (and:VM (not:VM (match_operand:VM 0 "vector_fusion_operand" "v,v,v,wa,v")) + (match_operand:VM 1 "vector_fusion_operand" "v,v,v,wa,v")) + (not:VM (match_operand:VM 2 "vector_fusion_operand" "v,v,v,wa,v" + (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))] "(TARGET_P10_FUSION)" "@ vandc %3,%1,%0\;vandc %3,%3,%2 vandc %3,%1,%0\;vandc %3,%3,%2 vandc %3,%1,%0\;vandc %3,%3,%2 + xxeval %x3,%x2,%x1,%x0,13 vandc %4,%1,%0\;vandc %3,%4,%2" [(set_attr "type" "fused_vector") (set_attr "cost" "6") - (set_attr "length" "8")]) + (set_attr "length" "8") + (set_attr "prefixed" "*,*,*,yes,*") + (set_attr "isa" "*,*,*,xxeval,*")]) ;; logical-logical fusion pattern generated by gen_logical_addsubf ;; vector veqv -> vandc diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl index a3cc8b121eab..929257d6c03e 100755 --- a/gcc/config/rs6000/genfusion.pl +++ b/gcc/config/rs6000/genfusion.pl @@ -222,6 +222,7 @@ sub gen_logical_addsubf "vnor_vand" => 8, "veqv_vand" => 9, "vorc_vand" => 11, + "vandc_vandc" => 13, ); KIND: foreach $kind ('scalar','vector') {
[gcc(refs/users/meissner/heads/work214-sha)] PR target/117251: Improve vector xor to vector and fusion
https://gcc.gnu.org/g:f4e428bb02cc0b9a6d5c94af45a38d29802f50e1 commit f4e428bb02cc0b9a6d5c94af45a38d29802f50e1 Author: Michael Meissner Date: Wed Jul 9 01:35:27 2025 -0400 PR target/117251: Improve vector xor to vector and fusion See the following post for a complete explanation of what the patches for PR target/117251: * https://gcc.gnu.org/pipermail/gcc-patches/2025-June/686474.html This is patch #3 of 45 to generate the 'XXEVAL' instruction on power10 and power11 instead of using the Altivec 'VXOR' instruction feeding into 'VAND'. The 'XXEVAL' instruction can use all 64 vector registers, instead of the 32 registers that traditional Altivec vector instructions use. By allowing all of the vector registers to be used, it reduces the amount of spilling that a large benchmark generated. Currently the following code: vector int a, b, c, d; a = (c ^ d) & b; Generates: vxor t,c,d vand a,t,b Now in addition with this patch, if the arguments or result is allocated to a traditional FPR register, the GCC compiler will now generate the following code instead of adding vector move instructions: xxeval a,b,c,6 Since fusion using 2 Altivec instructions is slightly faster than using the 'XXEVAL' instruction we prefer to generate the Altivec instructions if we can. In addition, because 'XXEVAL' is a prefixed instruction, it possibly might generate an extra NOP instruction to align the 'XXEVAL' instruction. I have tested these patches on both big endian and little endian PowerPC servers, with no regressions. Can I check these patchs into the trunk? 2025-07-09 Michael Meissner gcc/ PR target/117251 * config/rs6000/fusion.md: Regenerate. * config/rs6000/genfusion.pl (gen_logical_addsubf): Add support to generate vector/vector xor/and fusion if XXEVAL is supported. Diff: --- gcc/config/rs6000/fusion.md| 15 +-- gcc/config/rs6000/genfusion.pl | 1 + 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md index b9590b6d1104..6375cd3a8970 100644 --- a/gcc/config/rs6000/fusion.md +++ b/gcc/config/rs6000/fusion.md @@ -2003,20 +2003,23 @@ ;; logical-logical fusion pattern generated by gen_logical_addsubf ;; vector vxor -> vand (define_insn "*fuse_vxor_vand" - [(set (match_operand:VM 3 "altivec_register_operand" "=&0,&1,&v,v") -(and:VM (xor:VM (match_operand:VM 0 "altivec_register_operand" "v,v,v,v") - (match_operand:VM 1 "altivec_register_operand" "v,v,v,v")) - (match_operand:VM 2 "altivec_register_operand" "v,v,v,v"))) - (clobber (match_scratch:VM 4 "=X,X,X,&v"))] + [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v") +(and:VM (xor:VM (match_operand:VM 0 "vector_fusion_operand" "v,v,v,wa,v") + (match_operand:VM 1 "vector_fusion_operand" "v,v,v,wa,v")) + (match_operand:VM 2 "vector_fusion_operand" "v,v,v,wa,v"))) + (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))] "(TARGET_P10_FUSION)" "@ vxor %3,%1,%0\;vand %3,%3,%2 vxor %3,%1,%0\;vand %3,%3,%2 vxor %3,%1,%0\;vand %3,%3,%2 + xxeval %x3,%x2,%x1,%x0,6 vxor %4,%1,%0\;vand %3,%4,%2" [(set_attr "type" "fused_vector") (set_attr "cost" "6") - (set_attr "length" "8")]) + (set_attr "length" "8") + (set_attr "prefixed" "*,*,*,yes,*") + (set_attr "isa" "*,*,*,xxeval,*")]) ;; logical-logical fusion pattern generated by gen_logical_addsubf ;; vector vand -> vandc diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl index 23adf98c4056..2c631b944587 100755 --- a/gcc/config/rs6000/genfusion.pl +++ b/gcc/config/rs6000/genfusion.pl @@ -217,6 +217,7 @@ sub gen_logical_addsubf my %xxeval_fusions = ( "vand_vand" => 1, "vandc_vand" => 2, + "vxor_vand" => 6, ); KIND: foreach $kind ('scalar','vector') {
[gcc(refs/users/meissner/heads/work214-sha)] PR target/117251: Improve vector orc to vector eqv fusion
https://gcc.gnu.org/g:039f33459a4defb5c66ef3f1581dc5cf49ae743e commit 039f33459a4defb5c66ef3f1581dc5cf49ae743e Author: Michael Meissner Date: Wed Jul 9 01:43:17 2025 -0400 PR target/117251: Improve vector orc to vector eqv fusion See the following post for a complete explanation of what the patches for PR target/117251: * https://gcc.gnu.org/pipermail/gcc-patches/2025-June/686474.html This is patch #16 of 45 to generate the 'XXEVAL' instruction on power10 and power11 instead of using the Altivec 'VORC' instruction feeding into 'VEQV'. The 'XXEVAL' instruction can use all 64 vector registers, instead of the 32 registers that traditional Altivec vector instructions use. By allowing all of the vector registers to be used, it reduces the amount of spilling that a large benchmark generated. Currently the following code: vector int a, b, c, d; a = ~ ((c | ~ d) ^ b); Generates: vorc t,c,d veqv a,t,b Now in addition with this patch, if the arguments or result is allocated to a traditional FPR register, the GCC compiler will now generate the following code instead of adding vector move instructions: xxeval a,b,c,75 Since fusion using 2 Altivec instructions is slightly faster than using the 'XXEVAL' instruction we prefer to generate the Altivec instructions if we can. In addition, because 'XXEVAL' is a prefixed instruction, it possibly might generate an extra NOP instruction to align the 'XXEVAL' instruction. I have tested these patches on both big endian and little endian PowerPC servers, with no regressions. Can I check these patchs into the trunk? 2025-07-09 Michael Meissner gcc/ PR target/117251 * config/rs6000/fusion.md: Regenerate. * config/rs6000/genfusion.pl (gen_logical_addsubf): Add support to generate vector orc => eqv fusion if XXEVAL is supported. Diff: --- gcc/config/rs6000/fusion.md| 15 +-- gcc/config/rs6000/genfusion.pl | 1 + 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md index f45e65f0217c..f84d0aee5d79 100644 --- a/gcc/config/rs6000/fusion.md +++ b/gcc/config/rs6000/fusion.md @@ -2294,20 +2294,23 @@ ;; logical-logical fusion pattern generated by gen_logical_addsubf ;; vector vorc -> veqv (define_insn "*fuse_vorc_veqv" - [(set (match_operand:VM 3 "altivec_register_operand" "=&0,&1,&v,v") -(not:VM (xor:VM (ior:VM (not:VM (match_operand:VM 0 "altivec_register_operand" "v,v,v,v")) - (match_operand:VM 1 "altivec_register_operand" "v,v,v,v")) - (match_operand:VM 2 "altivec_register_operand" "v,v,v,v" - (clobber (match_scratch:VM 4 "=X,X,X,&v"))] + [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v") +(not:VM (xor:VM (ior:VM (not:VM (match_operand:VM 0 "vector_fusion_operand" "v,v,v,wa,v")) + (match_operand:VM 1 "vector_fusion_operand" "v,v,v,wa,v")) + (match_operand:VM 2 "vector_fusion_operand" "v,v,v,wa,v" + (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))] "(TARGET_P10_FUSION)" "@ vorc %3,%1,%0\;veqv %3,%3,%2 vorc %3,%1,%0\;veqv %3,%3,%2 vorc %3,%1,%0\;veqv %3,%3,%2 + xxeval %x3,%x2,%x1,%x0,75 vorc %4,%1,%0\;veqv %3,%4,%2" [(set_attr "type" "fused_vector") (set_attr "cost" "6") - (set_attr "length" "8")]) + (set_attr "length" "8") + (set_attr "prefixed" "*,*,*,yes,*") + (set_attr "isa" "*,*,*,xxeval,*")]) ;; logical-logical fusion pattern generated by gen_logical_addsubf ;; vector vxor -> veqv diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl index 720e8d440c2d..8ba1aa081f75 100755 --- a/gcc/config/rs6000/genfusion.pl +++ b/gcc/config/rs6000/genfusion.pl @@ -230,6 +230,7 @@ sub gen_logical_addsubf "vandc_vxor" => 45, "vandc_vor" => 47, "vorc_vnor" => 64, + "vorc_veqv" => 75, ); KIND: foreach $kind ('scalar','vector') {
[gcc(refs/users/meissner/heads/work214-sha)] PR target/117251: Improve vector eqv to vector xor fusion
https://gcc.gnu.org/g:a4a23171994a253cfb9eeafda7a7b710a62317b4 commit a4a23171994a253cfb9eeafda7a7b710a62317b4 Author: Michael Meissner Date: Wed Jul 9 01:49:05 2025 -0400 PR target/117251: Improve vector eqv to vector xor fusion See the following post for a complete explanation of what the patches for PR target/117251: * https://gcc.gnu.org/pipermail/gcc-patches/2025-June/686474.html This is patch #28 of 45 to generate the 'XXEVAL' instruction on power10 and power11 instead of using the Altivec 'VEQV' instruction feeding into 'VXOR'. The 'XXEVAL' instruction can use all 64 vector registers, instead of the 32 registers that traditional Altivec vector instructions use. By allowing all of the vector registers to be used, it reduces the amount of spilling that a large benchmark generated. Currently the following code: vector int a, b, c, d; a = (~ (c ^ d)) ^ b; Generates: veqv t,c,d vxor a,t,b Now in addition with this patch, if the arguments or result is allocated to a traditional FPR register, the GCC compiler will now generate the following code instead of adding vector move instructions: xxeval a,b,c,150 Since fusion using 2 Altivec instructions is slightly faster than using the 'XXEVAL' instruction we prefer to generate the Altivec instructions if we can. In addition, because 'XXEVAL' is a prefixed instruction, it possibly might generate an extra NOP instruction to align the 'XXEVAL' instruction. I have tested these patches on both big endian and little endian PowerPC servers, with no regressions. Can I check these patchs into the trunk? 2025-07-09 Michael Meissner gcc/ PR target/117251 * config/rs6000/fusion.md: Regenerate. * config/rs6000/genfusion.pl (gen_logical_addsubf): Add support to generate vector eqv => xor fusion if XXEVAL is supported. Diff: --- gcc/config/rs6000/fusion.md| 15 +-- gcc/config/rs6000/genfusion.pl | 1 + 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md index e5ea37c567d6..bb62ae26445a 100644 --- a/gcc/config/rs6000/fusion.md +++ b/gcc/config/rs6000/fusion.md @@ -2987,20 +2987,23 @@ ;; logical-logical fusion pattern generated by gen_logical_addsubf ;; vector veqv -> vxor (define_insn "*fuse_veqv_vxor" - [(set (match_operand:VM 3 "altivec_register_operand" "=&0,&1,&v,v") -(xor:VM (not:VM (xor:VM (match_operand:VM 0 "altivec_register_operand" "v,v,v,v") - (match_operand:VM 1 "altivec_register_operand" "v,v,v,v"))) - (match_operand:VM 2 "altivec_register_operand" "v,v,v,v"))) - (clobber (match_scratch:VM 4 "=X,X,X,&v"))] + [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v") +(xor:VM (not:VM (xor:VM (match_operand:VM 0 "vector_fusion_operand" "v,v,v,wa,v") + (match_operand:VM 1 "vector_fusion_operand" "v,v,v,wa,v"))) + (match_operand:VM 2 "vector_fusion_operand" "v,v,v,wa,v"))) + (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))] "(TARGET_P10_FUSION)" "@ veqv %3,%1,%0\;vxor %3,%3,%2 veqv %3,%1,%0\;vxor %3,%3,%2 veqv %3,%1,%0\;vxor %3,%3,%2 + xxeval %x3,%x2,%x1,%x0,150 veqv %4,%1,%0\;vxor %3,%4,%2" [(set_attr "type" "fused_vector") (set_attr "cost" "6") - (set_attr "length" "8")]) + (set_attr "length" "8") + (set_attr "prefixed" "*,*,*,yes,*") + (set_attr "isa" "*,*,*,xxeval,*")]) ;; logical-logical fusion pattern generated by gen_logical_addsubf ;; vector vnand -> vxor diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl index d713d10a1dbc..726e29c798bc 100755 --- a/gcc/config/rs6000/genfusion.pl +++ b/gcc/config/rs6000/genfusion.pl @@ -242,6 +242,7 @@ sub gen_logical_addsubf "vnor_vxor" => 135, "vnor_vor"=> 143, "vxor_vnor" => 144, + "veqv_vxor" => 150, ); KIND: foreach $kind ('scalar','vector') {
[gcc(refs/users/meissner/heads/work214-sha)] PR target/117251: Improve vector nand to vector nor fusion
https://gcc.gnu.org/g:6474b6dbad0c66976adf904bfa942e9d76764947 commit 6474b6dbad0c66976adf904bfa942e9d76764947 Author: Michael Meissner Date: Wed Jul 9 01:40:03 2025 -0400 PR target/117251: Improve vector nand to vector nor fusion See the following post for a complete explanation of what the patches for PR target/117251: * https://gcc.gnu.org/pipermail/gcc-patches/2025-June/686474.html This is patch #10 of 45 to generate the 'XXEVAL' instruction on power10 and power11 instead of using the Altivec 'VNAND' instruction feeding into 'VNOR'. The 'XXEVAL' instruction can use all 64 vector registers, instead of the 32 registers that traditional Altivec vector instructions use. By allowing all of the vector registers to be used, it reduces the amount of spilling that a large benchmark generated. Currently the following code: vector int a, b, c, d; a = ~ ((~ (c & d)) | b); Generates: vnand t,c,d vnor a,t,b Now in addition with this patch, if the arguments or result is allocated to a traditional FPR register, the GCC compiler will now generate the following code instead of adding vector move instructions: xxeval a,b,c,16 Since fusion using 2 Altivec instructions is slightly faster than using the 'XXEVAL' instruction we prefer to generate the Altivec instructions if we can. In addition, because 'XXEVAL' is a prefixed instruction, it possibly might generate an extra NOP instruction to align the 'XXEVAL' instruction. I have tested these patches on both big endian and little endian PowerPC servers, with no regressions. Can I check these patchs into the trunk? 2025-07-09 Michael Meissner gcc/ PR target/117251 * config/rs6000/fusion.md: Regenerate. * config/rs6000/genfusion.pl (gen_logical_addsubf): Add support to generate vector/vector nand/nor fusion if XXEVAL is supported. Diff: --- gcc/config/rs6000/fusion.md| 15 +-- gcc/config/rs6000/genfusion.pl | 1 + 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md index f70422616ffd..c8a27a9e5471 100644 --- a/gcc/config/rs6000/fusion.md +++ b/gcc/config/rs6000/fusion.md @@ -2528,20 +2528,23 @@ ;; logical-logical fusion pattern generated by gen_logical_addsubf ;; vector vnand -> vnor (define_insn "*fuse_vnand_vnor" - [(set (match_operand:VM 3 "altivec_register_operand" "=&0,&1,&v,v") -(and:VM (not:VM (ior:VM (not:VM (match_operand:VM 0 "altivec_register_operand" "v,v,v,v")) - (not:VM (match_operand:VM 1 "altivec_register_operand" "v,v,v,v" - (not:VM (match_operand:VM 2 "altivec_register_operand" "v,v,v,v" - (clobber (match_scratch:VM 4 "=X,X,X,&v"))] + [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v") +(and:VM (not:VM (ior:VM (not:VM (match_operand:VM 0 "vector_fusion_operand" "v,v,v,wa,v")) + (not:VM (match_operand:VM 1 "vector_fusion_operand" "v,v,v,wa,v" + (not:VM (match_operand:VM 2 "vector_fusion_operand" "v,v,v,wa,v" + (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))] "(TARGET_P10_FUSION)" "@ vnand %3,%1,%0\;vnor %3,%3,%2 vnand %3,%1,%0\;vnor %3,%3,%2 vnand %3,%1,%0\;vnor %3,%3,%2 + xxeval %x3,%x2,%x1,%x0,16 vnand %4,%1,%0\;vnor %3,%4,%2" [(set_attr "type" "fused_vector") (set_attr "cost" "6") - (set_attr "length" "8")]) + (set_attr "length" "8") + (set_attr "prefixed" "*,*,*,yes,*") + (set_attr "isa" "*,*,*,xxeval,*")]) ;; logical-logical fusion pattern generated by gen_logical_addsubf ;; vector vnor -> vnor diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl index 5beabe530a67..078bc6ca0dab 100755 --- a/gcc/config/rs6000/genfusion.pl +++ b/gcc/config/rs6000/genfusion.pl @@ -224,6 +224,7 @@ sub gen_logical_addsubf "vorc_vand" => 11, "vandc_vandc" => 13, "vnand_vand" => 14, + "vnand_vnor" => 16, ); KIND: foreach $kind ('scalar','vector') {
[gcc(refs/users/meissner/heads/work214-sha)] PR target/117251: Improve vector nand to vector and fusion
https://gcc.gnu.org/g:64db95d5a59e6f88a00314e8f8cbd796535313c4 commit 64db95d5a59e6f88a00314e8f8cbd796535313c4 Author: Michael Meissner Date: Wed Jul 9 01:39:19 2025 -0400 PR target/117251: Improve vector nand to vector and fusion See the following post for a complete explanation of what the patches for PR target/117251: * https://gcc.gnu.org/pipermail/gcc-patches/2025-June/686474.html This is patch #9 of 45 to generate the 'XXEVAL' instruction on power10 and power11 instead of using the Altivec 'VNAND' instruction feeding into 'VAND'. The 'XXEVAL' instruction can use all 64 vector registers, instead of the 32 registers that traditional Altivec vector instructions use. By allowing all of the vector registers to be used, it reduces the amount of spilling that a large benchmark generated. Currently the following code: vector int a, b, c, d; a = (~ (c & d)) & b; Generates: vnand t,c,d vand a,t,b Now in addition with this patch, if the arguments or result is allocated to a traditional FPR register, the GCC compiler will now generate the following code instead of adding vector move instructions: xxeval a,b,c,14 Since fusion using 2 Altivec instructions is slightly faster than using the 'XXEVAL' instruction we prefer to generate the Altivec instructions if we can. In addition, because 'XXEVAL' is a prefixed instruction, it possibly might generate an extra NOP instruction to align the 'XXEVAL' instruction. I have tested these patches on both big endian and little endian PowerPC servers, with no regressions. Can I check these patchs into the trunk? 2025-07-09 Michael Meissner gcc/ PR target/117251 * config/rs6000/fusion.md: Regenerate. * config/rs6000/genfusion.pl (gen_logical_addsubf): Add support to generate vector/vector nand/and fusion if XXEVAL is supported. Diff: --- gcc/config/rs6000/fusion.md| 15 +-- gcc/config/rs6000/genfusion.pl | 1 + 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md index 810d97963fb9..f70422616ffd 100644 --- a/gcc/config/rs6000/fusion.md +++ b/gcc/config/rs6000/fusion.md @@ -1934,20 +1934,23 @@ ;; logical-logical fusion pattern generated by gen_logical_addsubf ;; vector vnand -> vand (define_insn "*fuse_vnand_vand" - [(set (match_operand:VM 3 "altivec_register_operand" "=&0,&1,&v,v") -(and:VM (ior:VM (not:VM (match_operand:VM 0 "altivec_register_operand" "v,v,v,v")) - (not:VM (match_operand:VM 1 "altivec_register_operand" "v,v,v,v"))) - (match_operand:VM 2 "altivec_register_operand" "v,v,v,v"))) - (clobber (match_scratch:VM 4 "=X,X,X,&v"))] + [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v") +(and:VM (ior:VM (not:VM (match_operand:VM 0 "vector_fusion_operand" "v,v,v,wa,v")) + (not:VM (match_operand:VM 1 "vector_fusion_operand" "v,v,v,wa,v"))) + (match_operand:VM 2 "vector_fusion_operand" "v,v,v,wa,v"))) + (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))] "(TARGET_P10_FUSION)" "@ vnand %3,%1,%0\;vand %3,%3,%2 vnand %3,%1,%0\;vand %3,%3,%2 vnand %3,%1,%0\;vand %3,%3,%2 + xxeval %x3,%x2,%x1,%x0,14 vnand %4,%1,%0\;vand %3,%4,%2" [(set_attr "type" "fused_vector") (set_attr "cost" "6") - (set_attr "length" "8")]) + (set_attr "length" "8") + (set_attr "prefixed" "*,*,*,yes,*") + (set_attr "isa" "*,*,*,xxeval,*")]) ;; logical-logical fusion pattern generated by gen_logical_addsubf ;; vector vnor -> vand diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl index 929257d6c03e..5beabe530a67 100755 --- a/gcc/config/rs6000/genfusion.pl +++ b/gcc/config/rs6000/genfusion.pl @@ -223,6 +223,7 @@ sub gen_logical_addsubf "veqv_vand" => 9, "vorc_vand" => 11, "vandc_vandc" => 13, + "vnand_vand" => 14, ); KIND: foreach $kind ('scalar','vector') {
[gcc(refs/users/meissner/heads/work214-sha)] PR target/117251: Improve vector nor to vector and fusion
https://gcc.gnu.org/g:27773b4db8507d5d0f5c203b6ad54bdd1268fb45 commit 27773b4db8507d5d0f5c203b6ad54bdd1268fb45 Author: Michael Meissner Date: Wed Jul 9 01:37:05 2025 -0400 PR target/117251: Improve vector nor to vector and fusion See the following post for a complete explanation of what the patches for PR target/117251: * https://gcc.gnu.org/pipermail/gcc-patches/2025-June/686474.html This is patch #5 of 45 to generate the 'XXEVAL' instruction on power10 and power11 instead of using the Altivec 'VNOR' instruction feeding into 'VAND'. The 'XXEVAL' instruction can use all 64 vector registers, instead of the 32 registers that traditional Altivec vector instructions use. By allowing all of the vector registers to be used, it reduces the amount of spilling that a large benchmark generated. Currently the following code: vector int a, b, c, d; a = (~ (c | d)) & b; Generates: vnor t,c,d vand a,t,b Now in addition with this patch, if the arguments or result is allocated to a traditional FPR register, the GCC compiler will now generate the following code instead of adding vector move instructions: xxeval a,b,c,8 Since fusion using 2 Altivec instructions is slightly faster than using the 'XXEVAL' instruction we prefer to generate the Altivec instructions if we can. In addition, because 'XXEVAL' is a prefixed instruction, it possibly might generate an extra NOP instruction to align the 'XXEVAL' instruction. I have tested these patches on both big endian and little endian PowerPC servers, with no regressions. Can I check these patchs into the trunk? 2025-07-09 Michael Meissner gcc/ PR target/117251 * config/rs6000/fusion.md: Regenerate. * config/rs6000/genfusion.pl (gen_logical_addsubf): Add support to generate vector/vector nor/and fusion if XXEVAL is supported. Diff: --- gcc/config/rs6000/fusion.md| 15 +-- gcc/config/rs6000/genfusion.pl | 1 + 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md index 161419b7f586..ed15fccdf760 100644 --- a/gcc/config/rs6000/fusion.md +++ b/gcc/config/rs6000/fusion.md @@ -1949,20 +1949,23 @@ ;; logical-logical fusion pattern generated by gen_logical_addsubf ;; vector vnor -> vand (define_insn "*fuse_vnor_vand" - [(set (match_operand:VM 3 "altivec_register_operand" "=&0,&1,&v,v") -(and:VM (and:VM (not:VM (match_operand:VM 0 "altivec_register_operand" "v,v,v,v")) - (not:VM (match_operand:VM 1 "altivec_register_operand" "v,v,v,v"))) - (match_operand:VM 2 "altivec_register_operand" "v,v,v,v"))) - (clobber (match_scratch:VM 4 "=X,X,X,&v"))] + [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v") +(and:VM (and:VM (not:VM (match_operand:VM 0 "vector_fusion_operand" "v,v,v,wa,v")) + (not:VM (match_operand:VM 1 "vector_fusion_operand" "v,v,v,wa,v"))) + (match_operand:VM 2 "vector_fusion_operand" "v,v,v,wa,v"))) + (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))] "(TARGET_P10_FUSION)" "@ vnor %3,%1,%0\;vand %3,%3,%2 vnor %3,%1,%0\;vand %3,%3,%2 vnor %3,%1,%0\;vand %3,%3,%2 + xxeval %x3,%x2,%x1,%x0,8 vnor %4,%1,%0\;vand %3,%4,%2" [(set_attr "type" "fused_vector") (set_attr "cost" "6") - (set_attr "length" "8")]) + (set_attr "length" "8") + (set_attr "prefixed" "*,*,*,yes,*") + (set_attr "isa" "*,*,*,xxeval,*")]) ;; logical-logical fusion pattern generated by gen_logical_addsubf ;; vector vor -> vand diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl index 9d3a01a4704a..40d62ae8e9c1 100755 --- a/gcc/config/rs6000/genfusion.pl +++ b/gcc/config/rs6000/genfusion.pl @@ -219,6 +219,7 @@ sub gen_logical_addsubf "vandc_vand" => 2, "vxor_vand" => 6, "vor_vand"=> 7, + "vnor_vand" => 8, ); KIND: foreach $kind ('scalar','vector') {
[gcc(refs/users/meissner/heads/work214-sha)] PR target/117251: Improve vector eqv to vector and fusion
https://gcc.gnu.org/g:386b0165fafb7913a86e8f3aefdaebf2f7fb4744 commit 386b0165fafb7913a86e8f3aefdaebf2f7fb4744 Author: Michael Meissner Date: Wed Jul 9 01:38:02 2025 -0400 PR target/117251: Improve vector eqv to vector and fusion See the following post for a complete explanation of what the patches for PR target/117251: * https://gcc.gnu.org/pipermail/gcc-patches/2025-June/686474.html This is patch #6 of 45 to generate the 'XXEVAL' instruction on power10 and power11 instead of using the Altivec 'VEQV' instruction feeding into 'VAND'. The 'XXEVAL' instruction can use all 64 vector registers, instead of the 32 registers that traditional Altivec vector instructions use. By allowing all of the vector registers to be used, it reduces the amount of spilling that a large benchmark generated. Currently the following code: vector int a, b, c, d; a = (~ (c ^ d)) & b; Generates: veqv t,c,d vand a,t,b Now in addition with this patch, if the arguments or result is allocated to a traditional FPR register, the GCC compiler will now generate the following code instead of adding vector move instructions: xxeval a,b,c,9 Since fusion using 2 Altivec instructions is slightly faster than using the 'XXEVAL' instruction we prefer to generate the Altivec instructions if we can. In addition, because 'XXEVAL' is a prefixed instruction, it possibly might generate an extra NOP instruction to align the 'XXEVAL' instruction. I have tested these patches on both big endian and little endian PowerPC servers, with no regressions. Can I check these patchs into the trunk? 2025-07-09 Michael Meissner gcc/ PR target/117251 * config/rs6000/fusion.md: Regenerate. * config/rs6000/genfusion.pl (gen_logical_addsubf): Add support to generate vector/vector nor/and fusion if XXEVAL is supported. Diff: --- gcc/config/rs6000/fusion.md| 15 +-- gcc/config/rs6000/genfusion.pl | 1 + 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md index ed15fccdf760..cce179e0c974 100644 --- a/gcc/config/rs6000/fusion.md +++ b/gcc/config/rs6000/fusion.md @@ -1913,20 +1913,23 @@ ;; logical-logical fusion pattern generated by gen_logical_addsubf ;; vector veqv -> vand (define_insn "*fuse_veqv_vand" - [(set (match_operand:VM 3 "altivec_register_operand" "=&0,&1,&v,v") -(and:VM (not:VM (xor:VM (match_operand:VM 0 "altivec_register_operand" "v,v,v,v") - (match_operand:VM 1 "altivec_register_operand" "v,v,v,v"))) - (match_operand:VM 2 "altivec_register_operand" "v,v,v,v"))) - (clobber (match_scratch:VM 4 "=X,X,X,&v"))] + [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v") +(and:VM (not:VM (xor:VM (match_operand:VM 0 "vector_fusion_operand" "v,v,v,wa,v") + (match_operand:VM 1 "vector_fusion_operand" "v,v,v,wa,v"))) + (match_operand:VM 2 "vector_fusion_operand" "v,v,v,wa,v"))) + (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))] "(TARGET_P10_FUSION)" "@ veqv %3,%1,%0\;vand %3,%3,%2 veqv %3,%1,%0\;vand %3,%3,%2 veqv %3,%1,%0\;vand %3,%3,%2 + xxeval %x3,%x2,%x1,%x0,9 veqv %4,%1,%0\;vand %3,%4,%2" [(set_attr "type" "fused_vector") (set_attr "cost" "6") - (set_attr "length" "8")]) + (set_attr "length" "8") + (set_attr "prefixed" "*,*,*,yes,*") + (set_attr "isa" "*,*,*,xxeval,*")]) ;; logical-logical fusion pattern generated by gen_logical_addsubf ;; vector vnand -> vand diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl index 40d62ae8e9c1..268b94089484 100755 --- a/gcc/config/rs6000/genfusion.pl +++ b/gcc/config/rs6000/genfusion.pl @@ -220,6 +220,7 @@ sub gen_logical_addsubf "vxor_vand" => 6, "vor_vand"=> 7, "vnor_vand" => 8, + "veqv_vand" => 9, ); KIND: foreach $kind ('scalar','vector') {
[gcc(refs/users/meissner/heads/work214-sha)] PR target/117251: Improve vector orc to vector and fusion
https://gcc.gnu.org/g:a444dc4bd0ba208f1ac06ac67bef918f5ffbf1f6 commit a444dc4bd0ba208f1ac06ac67bef918f5ffbf1f6 Author: Michael Meissner Date: Wed Jul 9 01:38:23 2025 -0400 PR target/117251: Improve vector orc to vector and fusion See the following post for a complete explanation of what the patches for PR target/117251: * https://gcc.gnu.org/pipermail/gcc-patches/2025-June/686474.html This is patch #7 of 45 to generate the 'XXEVAL' instruction on power10 and power11 instead of using the Altivec 'VORC' instruction feeding into 'VAND'. The 'XXEVAL' instruction can use all 64 vector registers, instead of the 32 registers that traditional Altivec vector instructions use. By allowing all of the vector registers to be used, it reduces the amount of spilling that a large benchmark generated. Currently the following code: vector int a, b, c, d; a = (c | ~ d) & b; Generates: vorc t,c,d vand a,t,b Now in addition with this patch, if the arguments or result is allocated to a traditional FPR register, the GCC compiler will now generate the following code instead of adding vector move instructions: xxeval a,b,c,11 Since fusion using 2 Altivec instructions is slightly faster than using the 'XXEVAL' instruction we prefer to generate the Altivec instructions if we can. In addition, because 'XXEVAL' is a prefixed instruction, it possibly might generate an extra NOP instruction to align the 'XXEVAL' instruction. I have tested these patches on both big endian and little endian PowerPC servers, with no regressions. Can I check these patchs into the trunk? 2025-07-09 Michael Meissner gcc/ PR target/117251 * config/rs6000/fusion.md: Regenerate. * config/rs6000/genfusion.pl (gen_logical_addsubf): Add support to generate vector/vector orc/and fusion if XXEVAL is supported. Diff: --- gcc/config/rs6000/fusion.md| 15 +-- gcc/config/rs6000/genfusion.pl | 1 + 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md index cce179e0c974..e27f05f85f12 100644 --- a/gcc/config/rs6000/fusion.md +++ b/gcc/config/rs6000/fusion.md @@ -1994,20 +1994,23 @@ ;; logical-logical fusion pattern generated by gen_logical_addsubf ;; vector vorc -> vand (define_insn "*fuse_vorc_vand" - [(set (match_operand:VM 3 "altivec_register_operand" "=&0,&1,&v,v") -(and:VM (ior:VM (not:VM (match_operand:VM 0 "altivec_register_operand" "v,v,v,v")) - (match_operand:VM 1 "altivec_register_operand" "v,v,v,v")) - (match_operand:VM 2 "altivec_register_operand" "v,v,v,v"))) - (clobber (match_scratch:VM 4 "=X,X,X,&v"))] + [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v") +(and:VM (ior:VM (not:VM (match_operand:VM 0 "vector_fusion_operand" "v,v,v,wa,v")) + (match_operand:VM 1 "vector_fusion_operand" "v,v,v,wa,v")) + (match_operand:VM 2 "vector_fusion_operand" "v,v,v,wa,v"))) + (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))] "(TARGET_P10_FUSION)" "@ vorc %3,%1,%0\;vand %3,%3,%2 vorc %3,%1,%0\;vand %3,%3,%2 vorc %3,%1,%0\;vand %3,%3,%2 + xxeval %x3,%x2,%x1,%x0,11 vorc %4,%1,%0\;vand %3,%4,%2" [(set_attr "type" "fused_vector") (set_attr "cost" "6") - (set_attr "length" "8")]) + (set_attr "length" "8") + (set_attr "prefixed" "*,*,*,yes,*") + (set_attr "isa" "*,*,*,xxeval,*")]) ;; logical-logical fusion pattern generated by gen_logical_addsubf ;; vector vxor -> vand diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl index 268b94089484..a3cc8b121eab 100755 --- a/gcc/config/rs6000/genfusion.pl +++ b/gcc/config/rs6000/genfusion.pl @@ -221,6 +221,7 @@ sub gen_logical_addsubf "vor_vand"=> 7, "vnor_vand" => 8, "veqv_vand" => 9, + "vorc_vand" => 11, ); KIND: foreach $kind ('scalar','vector') {
[gcc(refs/users/meissner/heads/work214-sha)] PR target/117251: Improve vector xor to vector nor fusion
https://gcc.gnu.org/g:4c45f874584b9b7226be1241cf7e9202a593f883 commit 4c45f874584b9b7226be1241cf7e9202a593f883 Author: Michael Meissner Date: Wed Jul 9 01:48:38 2025 -0400 PR target/117251: Improve vector xor to vector nor fusion See the following post for a complete explanation of what the patches for PR target/117251: * https://gcc.gnu.org/pipermail/gcc-patches/2025-June/686474.html This is patch #27 of 45 to generate the 'XXEVAL' instruction on power10 and power11 instead of using the Altivec 'VXOR' instruction feeding into 'VNOR'. The 'XXEVAL' instruction can use all 64 vector registers, instead of the 32 registers that traditional Altivec vector instructions use. By allowing all of the vector registers to be used, it reduces the amount of spilling that a large benchmark generated. Currently the following code: vector int a, b, c, d; a = ~ ((c ^ d) | b); Generates: vxor t,c,d vnor a,t,b Now in addition with this patch, if the arguments or result is allocated to a traditional FPR register, the GCC compiler will now generate the following code instead of adding vector move instructions: xxeval a,b,c,144 Since fusion using 2 Altivec instructions is slightly faster than using the 'XXEVAL' instruction we prefer to generate the Altivec instructions if we can. In addition, because 'XXEVAL' is a prefixed instruction, it possibly might generate an extra NOP instruction to align the 'XXEVAL' instruction. I have tested these patches on both big endian and little endian PowerPC servers, with no regressions. Can I check these patchs into the trunk? 2025-07-09 Michael Meissner gcc/ PR target/117251 * config/rs6000/fusion.md: Regenerate. * config/rs6000/genfusion.pl (gen_logical_addsubf): Add support to generate vector xor => nor fusion if XXEVAL is supported. Diff: --- gcc/config/rs6000/fusion.md| 15 +-- gcc/config/rs6000/genfusion.pl | 1 + 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md index 66d98f4537e1..e5ea37c567d6 100644 --- a/gcc/config/rs6000/fusion.md +++ b/gcc/config/rs6000/fusion.md @@ -2618,20 +2618,23 @@ ;; logical-logical fusion pattern generated by gen_logical_addsubf ;; vector vxor -> vnor (define_insn "*fuse_vxor_vnor" - [(set (match_operand:VM 3 "altivec_register_operand" "=&0,&1,&v,v") -(and:VM (not:VM (xor:VM (match_operand:VM 0 "altivec_register_operand" "v,v,v,v") - (match_operand:VM 1 "altivec_register_operand" "v,v,v,v"))) - (not:VM (match_operand:VM 2 "altivec_register_operand" "v,v,v,v" - (clobber (match_scratch:VM 4 "=X,X,X,&v"))] + [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v") +(and:VM (not:VM (xor:VM (match_operand:VM 0 "vector_fusion_operand" "v,v,v,wa,v") + (match_operand:VM 1 "vector_fusion_operand" "v,v,v,wa,v"))) + (not:VM (match_operand:VM 2 "vector_fusion_operand" "v,v,v,wa,v" + (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))] "(TARGET_P10_FUSION)" "@ vxor %3,%1,%0\;vnor %3,%3,%2 vxor %3,%1,%0\;vnor %3,%3,%2 vxor %3,%1,%0\;vnor %3,%3,%2 + xxeval %x3,%x2,%x1,%x0,144 vxor %4,%1,%0\;vnor %3,%4,%2" [(set_attr "type" "fused_vector") (set_attr "cost" "6") - (set_attr "length" "8")]) + (set_attr "length" "8") + (set_attr "prefixed" "*,*,*,yes,*") + (set_attr "isa" "*,*,*,xxeval,*")]) ;; logical-logical fusion pattern generated by gen_logical_addsubf ;; vector vand -> vor diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl index 98b56b788f03..d713d10a1dbc 100755 --- a/gcc/config/rs6000/genfusion.pl +++ b/gcc/config/rs6000/genfusion.pl @@ -241,6 +241,7 @@ sub gen_logical_addsubf "vor_vnor"=> 128, "vnor_vxor" => 135, "vnor_vor"=> 143, + "vxor_vnor" => 144, ); KIND: foreach $kind ('scalar','vector') {
[gcc(refs/users/meissner/heads/work214-sha)] PR target/117251: Improve vector or to vector xor fusion
https://gcc.gnu.org/g:2655d834eaa00bcddc76b58e32865a8e00bf3600 commit 2655d834eaa00bcddc76b58e32865a8e00bf3600 Author: Michael Meissner Date: Wed Jul 9 01:46:23 2025 -0400 PR target/117251: Improve vector or to vector xor fusion See the following post for a complete explanation of what the patches for PR target/117251: * https://gcc.gnu.org/pipermail/gcc-patches/2025-June/686474.html This is patch #22 of 45 to generate the 'XXEVAL' instruction on power10 and power11 instead of using the Altivec 'VOR' instruction feeding into 'VXOR'. The 'XXEVAL' instruction can use all 64 vector registers, instead of the 32 registers that traditional Altivec vector instructions use. By allowing all of the vector registers to be used, it reduces the amount of spilling that a large benchmark generated. Currently the following code: vector int a, b, c, d; a = (c | d) ^ b; Generates: vort,c,d vxor a,t,b Now in addition with this patch, if the arguments or result is allocated to a traditional FPR register, the GCC compiler will now generate the following code instead of adding vector move instructions: xxeval a,b,c,120 Since fusion using 2 Altivec instructions is slightly faster than using the 'XXEVAL' instruction we prefer to generate the Altivec instructions if we can. In addition, because 'XXEVAL' is a prefixed instruction, it possibly might generate an extra NOP instruction to align the 'XXEVAL' instruction. I have tested these patches on both big endian and little endian PowerPC servers, with no regressions. Can I check these patchs into the trunk? 2025-07-09 Michael Meissner gcc/ PR target/117251 * config/rs6000/fusion.md: Regenerate. * config/rs6000/genfusion.pl (gen_logical_addsubf): Add support to generate vector or => xor fusion if XXEVAL is supported. Diff: --- gcc/config/rs6000/fusion.md| 15 +-- gcc/config/rs6000/genfusion.pl | 1 + 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md index 032c87ac5765..d1f6a38b618a 100644 --- a/gcc/config/rs6000/fusion.md +++ b/gcc/config/rs6000/fusion.md @@ -3029,20 +3029,23 @@ ;; logical-logical fusion pattern generated by gen_logical_addsubf ;; vector vor -> vxor (define_insn "*fuse_vor_vxor" - [(set (match_operand:VM 3 "altivec_register_operand" "=&0,&1,&v,v") -(xor:VM (ior:VM (match_operand:VM 0 "altivec_register_operand" "v,v,v,v") - (match_operand:VM 1 "altivec_register_operand" "v,v,v,v")) - (match_operand:VM 2 "altivec_register_operand" "v,v,v,v"))) - (clobber (match_scratch:VM 4 "=X,X,X,&v"))] + [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v") +(xor:VM (ior:VM (match_operand:VM 0 "vector_fusion_operand" "v,v,v,wa,v") + (match_operand:VM 1 "vector_fusion_operand" "v,v,v,wa,v")) + (match_operand:VM 2 "vector_fusion_operand" "v,v,v,wa,v"))) + (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))] "(TARGET_P10_FUSION)" "@ vor %3,%1,%0\;vxor %3,%3,%2 vor %3,%1,%0\;vxor %3,%3,%2 vor %3,%1,%0\;vxor %3,%3,%2 + xxeval %x3,%x2,%x1,%x0,120 vor %4,%1,%0\;vxor %3,%4,%2" [(set_attr "type" "fused_vector") (set_attr "cost" "6") - (set_attr "length" "8")]) + (set_attr "length" "8") + (set_attr "prefixed" "*,*,*,yes,*") + (set_attr "isa" "*,*,*,xxeval,*")]) ;; logical-logical fusion pattern generated by gen_logical_addsubf ;; vector vorc -> vxor diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl index 6af4c5d7a182..97681f37d0fa 100755 --- a/gcc/config/rs6000/genfusion.pl +++ b/gcc/config/rs6000/genfusion.pl @@ -236,6 +236,7 @@ sub gen_logical_addsubf "vxor_vxor" => 105, "vxor_vor"=> 111, "vnor_vnor" => 112, + "vor_vxor"=> 120, ); KIND: foreach $kind ('scalar','vector') {
[gcc(refs/users/meissner/heads/work214-sha)] PR target/117251: Improve vector eqv to vector or fusion
https://gcc.gnu.org/g:831ba55445ca1f97103f68f09380b0f738c26c13 commit 831ba55445ca1f97103f68f09380b0f738c26c13 Author: Michael Meissner Date: Wed Jul 9 01:49:31 2025 -0400 PR target/117251: Improve vector eqv to vector or fusion See the following post for a complete explanation of what the patches for PR target/117251: * https://gcc.gnu.org/pipermail/gcc-patches/2025-June/686474.html This is patch #29 of 45 to generate the 'XXEVAL' instruction on power10 and power11 instead of using the Altivec 'VEQV' instruction feeding into 'VOR'. The 'XXEVAL' instruction can use all 64 vector registers, instead of the 32 registers that traditional Altivec vector instructions use. By allowing all of the vector registers to be used, it reduces the amount of spilling that a large benchmark generated. Currently the following code: vector int a, b, c, d; a = (~ (c ^ d)) | b; Generates: veqv t,c,d vora,t,b Now in addition with this patch, if the arguments or result is allocated to a traditional FPR register, the GCC compiler will now generate the following code instead of adding vector move instructions: xxeval a,b,c,159 Since fusion using 2 Altivec instructions is slightly faster than using the 'XXEVAL' instruction we prefer to generate the Altivec instructions if we can. In addition, because 'XXEVAL' is a prefixed instruction, it possibly might generate an extra NOP instruction to align the 'XXEVAL' instruction. I have tested these patches on both big endian and little endian PowerPC servers, with no regressions. Can I check these patchs into the trunk? 2025-07-09 Michael Meissner gcc/ PR target/117251 * config/rs6000/fusion.md: Regenerate. * config/rs6000/genfusion.pl (gen_logical_addsubf): Add support to generate vector eqv => or fusion if XXEVAL is supported. Diff: --- gcc/config/rs6000/fusion.md| 15 +-- gcc/config/rs6000/genfusion.pl | 1 + 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md index bb62ae26445a..cb1ad8b4c0cc 100644 --- a/gcc/config/rs6000/fusion.md +++ b/gcc/config/rs6000/fusion.md @@ -2681,20 +2681,23 @@ ;; logical-logical fusion pattern generated by gen_logical_addsubf ;; vector veqv -> vor (define_insn "*fuse_veqv_vor" - [(set (match_operand:VM 3 "altivec_register_operand" "=&0,&1,&v,v") -(ior:VM (not:VM (xor:VM (match_operand:VM 0 "altivec_register_operand" "v,v,v,v") - (match_operand:VM 1 "altivec_register_operand" "v,v,v,v"))) - (match_operand:VM 2 "altivec_register_operand" "v,v,v,v"))) - (clobber (match_scratch:VM 4 "=X,X,X,&v"))] + [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v") +(ior:VM (not:VM (xor:VM (match_operand:VM 0 "vector_fusion_operand" "v,v,v,wa,v") + (match_operand:VM 1 "vector_fusion_operand" "v,v,v,wa,v"))) + (match_operand:VM 2 "vector_fusion_operand" "v,v,v,wa,v"))) + (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))] "(TARGET_P10_FUSION)" "@ veqv %3,%1,%0\;vor %3,%3,%2 veqv %3,%1,%0\;vor %3,%3,%2 veqv %3,%1,%0\;vor %3,%3,%2 + xxeval %x3,%x2,%x1,%x0,159 veqv %4,%1,%0\;vor %3,%4,%2" [(set_attr "type" "fused_vector") (set_attr "cost" "6") - (set_attr "length" "8")]) + (set_attr "length" "8") + (set_attr "prefixed" "*,*,*,yes,*") + (set_attr "isa" "*,*,*,xxeval,*")]) ;; logical-logical fusion pattern generated by gen_logical_addsubf ;; vector vnand -> vor diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl index 726e29c798bc..9400aed267a6 100755 --- a/gcc/config/rs6000/genfusion.pl +++ b/gcc/config/rs6000/genfusion.pl @@ -243,6 +243,7 @@ sub gen_logical_addsubf "vnor_vor"=> 143, "vxor_vnor" => 144, "veqv_vxor" => 150, + "veqv_vor"=> 159, ); KIND: foreach $kind ('scalar','vector') {
[gcc(refs/users/meissner/heads/work214-sha)] PR target/117251: Improve vector nor to vector nand fusion
https://gcc.gnu.org/g:5bf7b59b601520c417e4498c5322598d9e46c5de commit 5bf7b59b601520c417e4498c5322598d9e46c5de Author: Michael Meissner Date: Wed Jul 9 01:54:11 2025 -0400 PR target/117251: Improve vector nor to vector nand fusion See the following post for a complete explanation of what the patches for PR target/117251: * https://gcc.gnu.org/pipermail/gcc-patches/2025-June/686474.html This is patch #40 of 45 to generate the 'XXEVAL' instruction on power10 and power11 instead of using the Altivec 'VNOR' instruction feeding into 'VNAND'. The 'XXEVAL' instruction can use all 64 vector registers, instead of the 32 registers that traditional Altivec vector instructions use. By allowing all of the vector registers to be used, it reduces the amount of spilling that a large benchmark generated. Currently the following code: vector int a, b, c, d; a = ~ ((~ (c | d)) & b); Generates: vnor t,c,d vnand a,t,b Now in addition with this patch, if the arguments or result is allocated to a traditional FPR register, the GCC compiler will now generate the following code instead of adding vector move instructions: xxeval a,b,c,247 Since fusion using 2 Altivec instructions is slightly faster than using the 'XXEVAL' instruction we prefer to generate the Altivec instructions if we can. In addition, because 'XXEVAL' is a prefixed instruction, it possibly might generate an extra NOP instruction to align the 'XXEVAL' instruction. I have tested these patches on both big endian and little endian PowerPC servers, with no regressions. Can I check these patchs into the trunk? 2025-07-09 Michael Meissner gcc/ PR target/117251 * config/rs6000/fusion.md: Regenerate. * config/rs6000/genfusion.pl (gen_logical_addsubf): Add support to generate vector nor => nand fusion if XXEVAL is supported. Diff: --- gcc/config/rs6000/fusion.md| 15 +-- gcc/config/rs6000/genfusion.pl | 1 + 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md index c1be0e5ff8f1..01b7fda17ecc 100644 --- a/gcc/config/rs6000/fusion.md +++ b/gcc/config/rs6000/fusion.md @@ -2414,20 +2414,23 @@ ;; logical-logical fusion pattern generated by gen_logical_addsubf ;; vector vnor -> vnand (define_insn "*fuse_vnor_vnand" - [(set (match_operand:VM 3 "altivec_register_operand" "=&0,&1,&v,v") -(ior:VM (not:VM (and:VM (not:VM (match_operand:VM 0 "altivec_register_operand" "v,v,v,v")) - (not:VM (match_operand:VM 1 "altivec_register_operand" "v,v,v,v" - (not:VM (match_operand:VM 2 "altivec_register_operand" "v,v,v,v" - (clobber (match_scratch:VM 4 "=X,X,X,&v"))] + [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v") +(ior:VM (not:VM (and:VM (not:VM (match_operand:VM 0 "vector_fusion_operand" "v,v,v,wa,v")) + (not:VM (match_operand:VM 1 "vector_fusion_operand" "v,v,v,wa,v" + (not:VM (match_operand:VM 2 "vector_fusion_operand" "v,v,v,wa,v" + (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))] "(TARGET_P10_FUSION)" "@ vnor %3,%1,%0\;vnand %3,%3,%2 vnor %3,%1,%0\;vnand %3,%3,%2 vnor %3,%1,%0\;vnand %3,%3,%2 + xxeval %x3,%x2,%x1,%x0,247 vnor %4,%1,%0\;vnand %3,%4,%2" [(set_attr "type" "fused_vector") (set_attr "cost" "6") - (set_attr "length" "8")]) + (set_attr "length" "8") + (set_attr "prefixed" "*,*,*,yes,*") + (set_attr "isa" "*,*,*,xxeval,*")]) ;; logical-logical fusion pattern generated by gen_logical_addsubf ;; vector vor -> vnand diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl index 4c70237d2d27..d4965b6df864 100755 --- a/gcc/config/rs6000/genfusion.pl +++ b/gcc/config/rs6000/genfusion.pl @@ -254,6 +254,7 @@ sub gen_logical_addsubf "vnand_vnand" => 241, "vorc_vnand" => 244, "veqv_vnand" => 246, + "vnor_vnand" => 247, ); KIND: foreach $kind ('scalar','vector') {
[gcc(refs/users/meissner/heads/work214-sha)] PR target/117251: Improve vector andc to vector nor fusion
https://gcc.gnu.org/g:9fd9d57f92c07aaec9a14381f6f7f072be2e026f commit 9fd9d57f92c07aaec9a14381f6f7f072be2e026f Author: Michael Meissner Date: Wed Jul 9 01:51:08 2025 -0400 PR target/117251: Improve vector andc to vector nor fusion See the following post for a complete explanation of what the patches for PR target/117251: * https://gcc.gnu.org/pipermail/gcc-patches/2025-June/686474.html This is patch #32 of 45 to generate the 'XXEVAL' instruction on power10 and power11 instead of using the Altivec 'VANDC' instruction feeding into 'VNOR'. The 'XXEVAL' instruction can use all 64 vector registers, instead of the 32 registers that traditional Altivec vector instructions use. By allowing all of the vector registers to be used, it reduces the amount of spilling that a large benchmark generated. Currently the following code: vector int a, b, c, d; a = ~ ((c & ~ d) | b); Generates: vandc t,c,d vnor a,t,b Now in addition with this patch, if the arguments or result is allocated to a traditional FPR register, the GCC compiler will now generate the following code instead of adding vector move instructions: xxeval a,b,c,208 Since fusion using 2 Altivec instructions is slightly faster than using the 'XXEVAL' instruction we prefer to generate the Altivec instructions if we can. In addition, because 'XXEVAL' is a prefixed instruction, it possibly might generate an extra NOP instruction to align the 'XXEVAL' instruction. I have tested these patches on both big endian and little endian PowerPC servers, with no regressions. Can I check these patchs into the trunk? 2025-07-09 Michael Meissner gcc/ PR target/117251 * config/rs6000/fusion.md: Regenerate. * config/rs6000/genfusion.pl (gen_logical_addsubf): Add support to generate vector andc => nor fusion if XXEVAL is supported. Diff: --- gcc/config/rs6000/fusion.md| 15 +-- gcc/config/rs6000/genfusion.pl | 1 + 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md index f6dc26e9c1f2..dd8401d48228 100644 --- a/gcc/config/rs6000/fusion.md +++ b/gcc/config/rs6000/fusion.md @@ -2495,20 +2495,23 @@ ;; logical-logical fusion pattern generated by gen_logical_addsubf ;; vector vandc -> vnor (define_insn "*fuse_vandc_vnor" - [(set (match_operand:VM 3 "altivec_register_operand" "=&0,&1,&v,v") -(and:VM (not:VM (and:VM (not:VM (match_operand:VM 0 "altivec_register_operand" "v,v,v,v")) - (match_operand:VM 1 "altivec_register_operand" "v,v,v,v"))) - (not:VM (match_operand:VM 2 "altivec_register_operand" "v,v,v,v" - (clobber (match_scratch:VM 4 "=X,X,X,&v"))] + [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v") +(and:VM (not:VM (and:VM (not:VM (match_operand:VM 0 "vector_fusion_operand" "v,v,v,wa,v")) + (match_operand:VM 1 "vector_fusion_operand" "v,v,v,wa,v"))) + (not:VM (match_operand:VM 2 "vector_fusion_operand" "v,v,v,wa,v" + (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))] "(TARGET_P10_FUSION)" "@ vandc %3,%1,%0\;vnor %3,%3,%2 vandc %3,%1,%0\;vnor %3,%3,%2 vandc %3,%1,%0\;vnor %3,%3,%2 + xxeval %x3,%x2,%x1,%x0,208 vandc %4,%1,%0\;vnor %3,%4,%2" [(set_attr "type" "fused_vector") (set_attr "cost" "6") - (set_attr "length" "8")]) + (set_attr "length" "8") + (set_attr "prefixed" "*,*,*,yes,*") + (set_attr "isa" "*,*,*,xxeval,*")]) ;; logical-logical fusion pattern generated by gen_logical_addsubf ;; vector veqv -> vnor diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl index 62f2b9e36d89..d89e78d4da03 100755 --- a/gcc/config/rs6000/genfusion.pl +++ b/gcc/config/rs6000/genfusion.pl @@ -246,6 +246,7 @@ sub gen_logical_addsubf "veqv_vor"=> 159, "vorc_vxor" => 180, "vorc_vor"=> 191, + "vandc_vnor" => 208, ); KIND: foreach $kind ('scalar','vector') {
[gcc(refs/users/meissner/heads/work214-sha)] PR target/117251: Improve vector nand to vector or fusion
https://gcc.gnu.org/g:c739bdabd5aeb288f4f0f9f7bf894cee51b8bb97 commit c739bdabd5aeb288f4f0f9f7bf894cee51b8bb97 Author: Michael Meissner Date: Wed Jul 9 01:52:44 2025 -0400 PR target/117251: Improve vector nand to vector or fusion See the following post for a complete explanation of what the patches for PR target/117251: * https://gcc.gnu.org/pipermail/gcc-patches/2025-June/686474.html This is patch #36 of 45 to generate the 'XXEVAL' instruction on power10 and power11 instead of using the Altivec 'VNAND' instruction feeding into 'VOR'. The 'XXEVAL' instruction can use all 64 vector registers, instead of the 32 registers that traditional Altivec vector instructions use. By allowing all of the vector registers to be used, it reduces the amount of spilling that a large benchmark generated. Currently the following code: vector int a, b, c, d; a = (~ (c & d)) | b; Generates: vnand t,c,d vora,t,b Now in addition with this patch, if the arguments or result is allocated to a traditional FPR register, the GCC compiler will now generate the following code instead of adding vector move instructions: xxeval a,b,c,239 Since fusion using 2 Altivec instructions is slightly faster than using the 'XXEVAL' instruction we prefer to generate the Altivec instructions if we can. In addition, because 'XXEVAL' is a prefixed instruction, it possibly might generate an extra NOP instruction to align the 'XXEVAL' instruction. I have tested these patches on both big endian and little endian PowerPC servers, with no regressions. Can I check these patchs into the trunk? 2025-07-09 Michael Meissner gcc/ PR target/117251 * config/rs6000/fusion.md: Regenerate. * config/rs6000/genfusion.pl (gen_logical_addsubf): Add support to generate vector nand => or fusion if XXEVAL is supported. Diff: --- gcc/config/rs6000/fusion.md| 15 +-- gcc/config/rs6000/genfusion.pl | 1 + 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md index e6d13b38415a..ba3a5a52b990 100644 --- a/gcc/config/rs6000/fusion.md +++ b/gcc/config/rs6000/fusion.md @@ -2711,20 +2711,23 @@ ;; logical-logical fusion pattern generated by gen_logical_addsubf ;; vector vnand -> vor (define_insn "*fuse_vnand_vor" - [(set (match_operand:VM 3 "altivec_register_operand" "=&0,&1,&v,v") -(ior:VM (ior:VM (not:VM (match_operand:VM 0 "altivec_register_operand" "v,v,v,v")) - (not:VM (match_operand:VM 1 "altivec_register_operand" "v,v,v,v"))) - (match_operand:VM 2 "altivec_register_operand" "v,v,v,v"))) - (clobber (match_scratch:VM 4 "=X,X,X,&v"))] + [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v") +(ior:VM (ior:VM (not:VM (match_operand:VM 0 "vector_fusion_operand" "v,v,v,wa,v")) + (not:VM (match_operand:VM 1 "vector_fusion_operand" "v,v,v,wa,v"))) + (match_operand:VM 2 "vector_fusion_operand" "v,v,v,wa,v"))) + (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))] "(TARGET_P10_FUSION)" "@ vnand %3,%1,%0\;vor %3,%3,%2 vnand %3,%1,%0\;vor %3,%3,%2 vnand %3,%1,%0\;vor %3,%3,%2 + xxeval %x3,%x2,%x1,%x0,239 vnand %4,%1,%0\;vor %3,%4,%2" [(set_attr "type" "fused_vector") (set_attr "cost" "6") - (set_attr "length" "8")]) + (set_attr "length" "8") + (set_attr "prefixed" "*,*,*,yes,*") + (set_attr "isa" "*,*,*,xxeval,*")]) ;; logical-logical fusion pattern generated by gen_logical_addsubf ;; vector vnor -> vor diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl index 94eae471c64b..54699d199fc5 100755 --- a/gcc/config/rs6000/genfusion.pl +++ b/gcc/config/rs6000/genfusion.pl @@ -250,6 +250,7 @@ sub gen_logical_addsubf "vandc_veqv" => 210, "vand_vnor" => 224, "vnand_vxor" => 225, + "vnand_vor" => 239, ); KIND: foreach $kind ('scalar','vector') {
[gcc(refs/users/meissner/heads/work214-sha)] PR target/117251: Improve vector eqv to vector nand fusion
https://gcc.gnu.org/g:2f13fe598486b76e965a6615bc4276f0864af2b5 commit 2f13fe598486b76e965a6615bc4276f0864af2b5 Author: Michael Meissner Date: Wed Jul 9 01:53:46 2025 -0400 PR target/117251: Improve vector eqv to vector nand fusion See the following post for a complete explanation of what the patches for PR target/117251: * https://gcc.gnu.org/pipermail/gcc-patches/2025-June/686474.html This is patch #39 of 45 to generate the 'XXEVAL' instruction on power10 and power11 instead of using the Altivec 'VEQV' instruction feeding into 'VNAND'. The 'XXEVAL' instruction can use all 64 vector registers, instead of the 32 registers that traditional Altivec vector instructions use. By allowing all of the vector registers to be used, it reduces the amount of spilling that a large benchmark generated. Currently the following code: vector int a, b, c, d; a = ~ ((~ (c ^ d)) & b); Generates: veqv t,c,d vnand a,t,b Now in addition with this patch, if the arguments or result is allocated to a traditional FPR register, the GCC compiler will now generate the following code instead of adding vector move instructions: xxeval a,b,c,246 Since fusion using 2 Altivec instructions is slightly faster than using the 'XXEVAL' instruction we prefer to generate the Altivec instructions if we can. In addition, because 'XXEVAL' is a prefixed instruction, it possibly might generate an extra NOP instruction to align the 'XXEVAL' instruction. I have tested these patches on both big endian and little endian PowerPC servers, with no regressions. Can I check these patchs into the trunk? 2025-07-09 Michael Meissner gcc/ PR target/117251 * config/rs6000/fusion.md: Regenerate. * config/rs6000/genfusion.pl (gen_logical_addsubf): Add support to generate vector eqv => nand fusion if XXEVAL is supported. Diff: --- gcc/config/rs6000/fusion.md| 15 +-- gcc/config/rs6000/genfusion.pl | 1 + 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md index 96d8951049c9..c1be0e5ff8f1 100644 --- a/gcc/config/rs6000/fusion.md +++ b/gcc/config/rs6000/fusion.md @@ -2372,20 +2372,23 @@ ;; logical-logical fusion pattern generated by gen_logical_addsubf ;; vector veqv -> vnand (define_insn "*fuse_veqv_vnand" - [(set (match_operand:VM 3 "altivec_register_operand" "=&0,&1,&v,v") -(ior:VM (not:VM (not:VM (xor:VM (match_operand:VM 0 "altivec_register_operand" "v,v,v,v") - (match_operand:VM 1 "altivec_register_operand" "v,v,v,v" - (not:VM (match_operand:VM 2 "altivec_register_operand" "v,v,v,v" - (clobber (match_scratch:VM 4 "=X,X,X,&v"))] + [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v") +(ior:VM (not:VM (not:VM (xor:VM (match_operand:VM 0 "vector_fusion_operand" "v,v,v,wa,v") + (match_operand:VM 1 "vector_fusion_operand" "v,v,v,wa,v" + (not:VM (match_operand:VM 2 "vector_fusion_operand" "v,v,v,wa,v" + (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))] "(TARGET_P10_FUSION)" "@ veqv %3,%1,%0\;vnand %3,%3,%2 veqv %3,%1,%0\;vnand %3,%3,%2 veqv %3,%1,%0\;vnand %3,%3,%2 + xxeval %x3,%x2,%x1,%x0,246 veqv %4,%1,%0\;vnand %3,%4,%2" [(set_attr "type" "fused_vector") (set_attr "cost" "6") - (set_attr "length" "8")]) + (set_attr "length" "8") + (set_attr "prefixed" "*,*,*,yes,*") + (set_attr "isa" "*,*,*,xxeval,*")]) ;; logical-logical fusion pattern generated by gen_logical_addsubf ;; vector vnand -> vnand diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl index 77d3e999eb93..4c70237d2d27 100755 --- a/gcc/config/rs6000/genfusion.pl +++ b/gcc/config/rs6000/genfusion.pl @@ -253,6 +253,7 @@ sub gen_logical_addsubf "vnand_vor" => 239, "vnand_vnand" => 241, "vorc_vnand" => 244, + "veqv_vnand" => 246, ); KIND: foreach $kind ('scalar','vector') {
[gcc(refs/users/meissner/heads/work214-sha)] PR target/117251: Improve vector andc to vector nand fusion
https://gcc.gnu.org/g:1f9583709bf7448144ef35f96d49ba7d0f995747 commit 1f9583709bf7448144ef35f96d49ba7d0f995747 Author: Michael Meissner Date: Wed Jul 9 01:55:34 2025 -0400 PR target/117251: Improve vector andc to vector nand fusion See the following post for a complete explanation of what the patches for PR target/117251: * https://gcc.gnu.org/pipermail/gcc-patches/2025-June/686474.html This is patch #43 of 45 to generate the 'XXEVAL' instruction on power10 and power11 instead of using the Altivec 'VANDC' instruction feeding into 'VNAND'. The 'XXEVAL' instruction can use all 64 vector registers, instead of the 32 registers that traditional Altivec vector instructions use. By allowing all of the vector registers to be used, it reduces the amount of spilling that a large benchmark generated. Currently the following code: vector int a, b, c, d; a = ~ ((c & ~ d) & b); Generates: vandc t,c,d vnand a,t,b Now in addition with this patch, if the arguments or result is allocated to a traditional FPR register, the GCC compiler will now generate the following code instead of adding vector move instructions: xxeval a,b,c,253 Since fusion using 2 Altivec instructions is slightly faster than using the 'XXEVAL' instruction we prefer to generate the Altivec instructions if we can. In addition, because 'XXEVAL' is a prefixed instruction, it possibly might generate an extra NOP instruction to align the 'XXEVAL' instruction. I have tested these patches on both big endian and little endian PowerPC servers, with no regressions. Can I check these patchs into the trunk? 2025-07-09 Michael Meissner gcc/ PR target/117251 * config/rs6000/fusion.md: Regenerate. * config/rs6000/genfusion.pl (gen_logical_addsubf): Add support to generate vector andc => nand fusion if XXEVAL is supported. Diff: --- gcc/config/rs6000/fusion.md| 15 +-- gcc/config/rs6000/genfusion.pl | 1 + 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md index e0f9ac17659a..129f7dfb26ed 100644 --- a/gcc/config/rs6000/fusion.md +++ b/gcc/config/rs6000/fusion.md @@ -2354,20 +2354,23 @@ ;; logical-logical fusion pattern generated by gen_logical_addsubf ;; vector vandc -> vnand (define_insn "*fuse_vandc_vnand" - [(set (match_operand:VM 3 "altivec_register_operand" "=&0,&1,&v,v") -(ior:VM (not:VM (and:VM (not:VM (match_operand:VM 0 "altivec_register_operand" "v,v,v,v")) - (match_operand:VM 1 "altivec_register_operand" "v,v,v,v"))) - (not:VM (match_operand:VM 2 "altivec_register_operand" "v,v,v,v" - (clobber (match_scratch:VM 4 "=X,X,X,&v"))] + [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v") +(ior:VM (not:VM (and:VM (not:VM (match_operand:VM 0 "vector_fusion_operand" "v,v,v,wa,v")) + (match_operand:VM 1 "vector_fusion_operand" "v,v,v,wa,v"))) + (not:VM (match_operand:VM 2 "vector_fusion_operand" "v,v,v,wa,v" + (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))] "(TARGET_P10_FUSION)" "@ vandc %3,%1,%0\;vnand %3,%3,%2 vandc %3,%1,%0\;vnand %3,%3,%2 vandc %3,%1,%0\;vnand %3,%3,%2 + xxeval %x3,%x2,%x1,%x0,253 vandc %4,%1,%0\;vnand %3,%4,%2" [(set_attr "type" "fused_vector") (set_attr "cost" "6") - (set_attr "length" "8")]) + (set_attr "length" "8") + (set_attr "prefixed" "*,*,*,yes,*") + (set_attr "isa" "*,*,*,xxeval,*")]) ;; logical-logical fusion pattern generated by gen_logical_addsubf ;; vector veqv -> vnand diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl index 5d22a0732df6..1d31c242042e 100755 --- a/gcc/config/rs6000/genfusion.pl +++ b/gcc/config/rs6000/genfusion.pl @@ -257,6 +257,7 @@ sub gen_logical_addsubf "vnor_vnand" => 247, "vor_vnand" => 248, "vxor_vnand" => 249, + "vandc_vnand" => 253, ); KIND: foreach $kind ('scalar','vector') {
[gcc(refs/users/meissner/heads/work214-sha)] PR target/117251: Improve vector and to vector nor fusion
https://gcc.gnu.org/g:ad81d6531bf12016615d8538a0b8ed5958b18b5a commit ad81d6531bf12016615d8538a0b8ed5958b18b5a Author: Michael Meissner Date: Wed Jul 9 01:55:54 2025 -0400 PR target/117251: Improve vector and to vector nor fusion See the following post for a complete explanation of what the patches for PR target/117251: * https://gcc.gnu.org/pipermail/gcc-patches/2025-June/686474.html This is patch #34 of 45 to generate the 'XXEVAL' instruction on power10 and power11 instead of using the Altivec 'VAND' instruction feeding into 'VNOR'. The 'XXEVAL' instruction can use all 64 vector registers, instead of the 32 registers that traditional Altivec vector instructions use. By allowing all of the vector registers to be used, it reduces the amount of spilling that a large benchmark generated. Currently the following code: vector int a, b, c, d; a = ~ ((c & d) | b); Generates: vand t,c,d vnor a,t,b Now in addition with this patch, if the arguments or result is allocated to a traditional FPR register, the GCC compiler will now generate the following code instead of adding vector move instructions: xxeval a,b,c,224 Since fusion using 2 Altivec instructions is slightly faster than using the 'XXEVAL' instruction we prefer to generate the Altivec instructions if we can. In addition, because 'XXEVAL' is a prefixed instruction, it possibly might generate an extra NOP instruction to align the 'XXEVAL' instruction. I have tested these patches on both big endian and little endian PowerPC servers, with no regressions. Can I check these patchs into the trunk? 2025-07-09 Michael Meissner gcc/ PR target/117251 * config/rs6000/fusion.md: Regenerate. * config/rs6000/genfusion.pl (gen_logical_addsubf): Add support to generate vector and => nor fusion if XXEVAL is supported. Diff: --- gcc/config/rs6000/fusion.md| 15 +-- gcc/config/rs6000/genfusion.pl | 1 + 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md index 129f7dfb26ed..61d66129da65 100644 --- a/gcc/config/rs6000/fusion.md +++ b/gcc/config/rs6000/fusion.md @@ -2336,20 +2336,23 @@ ;; logical-logical fusion pattern generated by gen_logical_addsubf ;; vector vand -> vnand (define_insn "*fuse_vand_vnand" - [(set (match_operand:VM 3 "altivec_register_operand" "=&0,&1,&v,v") -(ior:VM (not:VM (and:VM (match_operand:VM 0 "altivec_register_operand" "v,v,v,v") - (match_operand:VM 1 "altivec_register_operand" "v,v,v,v"))) - (not:VM (match_operand:VM 2 "altivec_register_operand" "v,v,v,v" - (clobber (match_scratch:VM 4 "=X,X,X,&v"))] + [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v") +(ior:VM (not:VM (and:VM (match_operand:VM 0 "vector_fusion_operand" "v,v,v,wa,v") + (match_operand:VM 1 "vector_fusion_operand" "v,v,v,wa,v"))) + (not:VM (match_operand:VM 2 "vector_fusion_operand" "v,v,v,wa,v" + (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))] "(TARGET_P10_FUSION)" "@ vand %3,%1,%0\;vnand %3,%3,%2 vand %3,%1,%0\;vnand %3,%3,%2 vand %3,%1,%0\;vnand %3,%3,%2 + xxeval %x3,%x2,%x1,%x0,254 vand %4,%1,%0\;vnand %3,%4,%2" [(set_attr "type" "fused_vector") (set_attr "cost" "6") - (set_attr "length" "8")]) + (set_attr "length" "8") + (set_attr "prefixed" "*,*,*,yes,*") + (set_attr "isa" "*,*,*,xxeval,*")]) ;; logical-logical fusion pattern generated by gen_logical_addsubf ;; vector vandc -> vnand diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl index 1d31c242042e..9261dd369340 100755 --- a/gcc/config/rs6000/genfusion.pl +++ b/gcc/config/rs6000/genfusion.pl @@ -258,6 +258,7 @@ sub gen_logical_addsubf "vor_vnand" => 248, "vxor_vnand" => 249, "vandc_vnand" => 253, + "vand_vnand" => 254, ); KIND: foreach $kind ('scalar','vector') {
[gcc(refs/users/meissner/heads/work214-sha)] PR target/117251: Improve vector orc to vector nand fusion
https://gcc.gnu.org/g:e8542b1a292ce063800aae477c76ba03b5736a36 commit e8542b1a292ce063800aae477c76ba03b5736a36 Author: Michael Meissner Date: Wed Jul 9 01:53:28 2025 -0400 PR target/117251: Improve vector orc to vector nand fusion See the following post for a complete explanation of what the patches for PR target/117251: * https://gcc.gnu.org/pipermail/gcc-patches/2025-June/686474.html This is patch #38 of 45 to generate the 'XXEVAL' instruction on power10 and power11 instead of using the Altivec 'VORC' instruction feeding into 'VNAND'. The 'XXEVAL' instruction can use all 64 vector registers, instead of the 32 registers that traditional Altivec vector instructions use. By allowing all of the vector registers to be used, it reduces the amount of spilling that a large benchmark generated. Currently the following code: vector int a, b, c, d; a = ~ ((c | ~ d) & b); Generates: vorc t,c,d vnand a,t,b Now in addition with this patch, if the arguments or result is allocated to a traditional FPR register, the GCC compiler will now generate the following code instead of adding vector move instructions: xxeval a,b,c,244 Since fusion using 2 Altivec instructions is slightly faster than using the 'XXEVAL' instruction we prefer to generate the Altivec instructions if we can. In addition, because 'XXEVAL' is a prefixed instruction, it possibly might generate an extra NOP instruction to align the 'XXEVAL' instruction. I have tested these patches on both big endian and little endian PowerPC servers, with no regressions. Can I check these patchs into the trunk? 2025-07-09 Michael Meissner gcc/ PR target/117251 * config/rs6000/fusion.md: Regenerate. * config/rs6000/genfusion.pl (gen_logical_addsubf): Add support to generate vector orc => nand fusion if XXEVAL is supported. Diff: --- gcc/config/rs6000/fusion.md| 15 +-- gcc/config/rs6000/genfusion.pl | 1 + 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md index 241b8a494fb1..96d8951049c9 100644 --- a/gcc/config/rs6000/fusion.md +++ b/gcc/config/rs6000/fusion.md @@ -2447,20 +2447,23 @@ ;; logical-logical fusion pattern generated by gen_logical_addsubf ;; vector vorc -> vnand (define_insn "*fuse_vorc_vnand" - [(set (match_operand:VM 3 "altivec_register_operand" "=&0,&1,&v,v") -(ior:VM (not:VM (ior:VM (not:VM (match_operand:VM 0 "altivec_register_operand" "v,v,v,v")) - (match_operand:VM 1 "altivec_register_operand" "v,v,v,v"))) - (not:VM (match_operand:VM 2 "altivec_register_operand" "v,v,v,v" - (clobber (match_scratch:VM 4 "=X,X,X,&v"))] + [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v") +(ior:VM (not:VM (ior:VM (not:VM (match_operand:VM 0 "vector_fusion_operand" "v,v,v,wa,v")) + (match_operand:VM 1 "vector_fusion_operand" "v,v,v,wa,v"))) + (not:VM (match_operand:VM 2 "vector_fusion_operand" "v,v,v,wa,v" + (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))] "(TARGET_P10_FUSION)" "@ vorc %3,%1,%0\;vnand %3,%3,%2 vorc %3,%1,%0\;vnand %3,%3,%2 vorc %3,%1,%0\;vnand %3,%3,%2 + xxeval %x3,%x2,%x1,%x0,244 vorc %4,%1,%0\;vnand %3,%4,%2" [(set_attr "type" "fused_vector") (set_attr "cost" "6") - (set_attr "length" "8")]) + (set_attr "length" "8") + (set_attr "prefixed" "*,*,*,yes,*") + (set_attr "isa" "*,*,*,xxeval,*")]) ;; logical-logical fusion pattern generated by gen_logical_addsubf ;; vector vxor -> vnand diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl index 728a447c65a9..77d3e999eb93 100755 --- a/gcc/config/rs6000/genfusion.pl +++ b/gcc/config/rs6000/genfusion.pl @@ -252,6 +252,7 @@ sub gen_logical_addsubf "vnand_vxor" => 225, "vnand_vor" => 239, "vnand_vnand" => 241, + "vorc_vnand" => 244, ); KIND: foreach $kind ('scalar','vector') {
[gcc(refs/users/meissner/heads/work214-sha)] PR target/117251: Improve vector andc to vector eqv fusion
https://gcc.gnu.org/g:b14b34c56ef8da774c9e6352c39907dd7fec783b commit b14b34c56ef8da774c9e6352c39907dd7fec783b Author: Michael Meissner Date: Wed Jul 9 01:51:28 2025 -0400 PR target/117251: Improve vector andc to vector eqv fusion See the following post for a complete explanation of what the patches for PR target/117251: * https://gcc.gnu.org/pipermail/gcc-patches/2025-June/686474.html This is patch #33 of 45 to generate the 'XXEVAL' instruction on power10 and power11 instead of using the Altivec 'VANDC' instruction feeding into 'VEQV'. The 'XXEVAL' instruction can use all 64 vector registers, instead of the 32 registers that traditional Altivec vector instructions use. By allowing all of the vector registers to be used, it reduces the amount of spilling that a large benchmark generated. Currently the following code: vector int a, b, c, d; a = ~ ((c & ~ d) ^ b); Generates: vandc t,c,d veqv a,t,b Now in addition with this patch, if the arguments or result is allocated to a traditional FPR register, the GCC compiler will now generate the following code instead of adding vector move instructions: xxeval a,b,c,210 Since fusion using 2 Altivec instructions is slightly faster than using the 'XXEVAL' instruction we prefer to generate the Altivec instructions if we can. In addition, because 'XXEVAL' is a prefixed instruction, it possibly might generate an extra NOP instruction to align the 'XXEVAL' instruction. I have tested these patches on both big endian and little endian PowerPC servers, with no regressions. Can I check these patchs into the trunk? 2025-07-09 Michael Meissner gcc/ PR target/117251 * config/rs6000/fusion.md: Regenerate. * config/rs6000/genfusion.pl (gen_logical_addsubf): Add support to generate vector andc => eqv fusion if XXEVAL is supported. Diff: --- gcc/config/rs6000/fusion.md| 15 +-- gcc/config/rs6000/genfusion.pl | 1 + 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md index dd8401d48228..e3d9f7376a8d 100644 --- a/gcc/config/rs6000/fusion.md +++ b/gcc/config/rs6000/fusion.md @@ -2204,20 +2204,23 @@ ;; logical-logical fusion pattern generated by gen_logical_addsubf ;; vector vandc -> veqv (define_insn "*fuse_vandc_veqv" - [(set (match_operand:VM 3 "altivec_register_operand" "=&0,&1,&v,v") -(not:VM (xor:VM (and:VM (not:VM (match_operand:VM 0 "altivec_register_operand" "v,v,v,v")) - (match_operand:VM 1 "altivec_register_operand" "v,v,v,v")) - (match_operand:VM 2 "altivec_register_operand" "v,v,v,v" - (clobber (match_scratch:VM 4 "=X,X,X,&v"))] + [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v") +(not:VM (xor:VM (and:VM (not:VM (match_operand:VM 0 "vector_fusion_operand" "v,v,v,wa,v")) + (match_operand:VM 1 "vector_fusion_operand" "v,v,v,wa,v")) + (match_operand:VM 2 "vector_fusion_operand" "v,v,v,wa,v" + (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))] "(TARGET_P10_FUSION)" "@ vandc %3,%1,%0\;veqv %3,%3,%2 vandc %3,%1,%0\;veqv %3,%3,%2 vandc %3,%1,%0\;veqv %3,%3,%2 + xxeval %x3,%x2,%x1,%x0,210 vandc %4,%1,%0\;veqv %3,%4,%2" [(set_attr "type" "fused_vector") (set_attr "cost" "6") - (set_attr "length" "8")]) + (set_attr "length" "8") + (set_attr "prefixed" "*,*,*,yes,*") + (set_attr "isa" "*,*,*,xxeval,*")]) ;; logical-logical fusion pattern generated by gen_logical_addsubf ;; vector veqv -> veqv diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl index d89e78d4da03..3a603eb09675 100755 --- a/gcc/config/rs6000/genfusion.pl +++ b/gcc/config/rs6000/genfusion.pl @@ -247,6 +247,7 @@ sub gen_logical_addsubf "vorc_vxor" => 180, "vorc_vor"=> 191, "vandc_vnor" => 208, + "vandc_veqv" => 210, ); KIND: foreach $kind ('scalar','vector') {
[gcc(refs/users/meissner/heads/work214-sha)] PR target/117251: Improve vector xor to vector nand fusion
https://gcc.gnu.org/g:3928830b0fa97afc8f2ca54b7a9b203c8502dc3a commit 3928830b0fa97afc8f2ca54b7a9b203c8502dc3a Author: Michael Meissner Date: Wed Jul 9 01:55:11 2025 -0400 PR target/117251: Improve vector xor to vector nand fusion See the following post for a complete explanation of what the patches for PR target/117251: * https://gcc.gnu.org/pipermail/gcc-patches/2025-June/686474.html This is patch #42 of 45 to generate the 'XXEVAL' instruction on power10 and power11 instead of using the Altivec 'VXOR' instruction feeding into 'VNAND'. The 'XXEVAL' instruction can use all 64 vector registers, instead of the 32 registers that traditional Altivec vector instructions use. By allowing all of the vector registers to be used, it reduces the amount of spilling that a large benchmark generated. Currently the following code: vector int a, b, c, d; a = ~ ((c ^ d) & b); Generates: vxor t,c,d vnand a,t,b Now in addition with this patch, if the arguments or result is allocated to a traditional FPR register, the GCC compiler will now generate the following code instead of adding vector move instructions: xxeval a,b,c,249 Since fusion using 2 Altivec instructions is slightly faster than using the 'XXEVAL' instruction we prefer to generate the Altivec instructions if we can. In addition, because 'XXEVAL' is a prefixed instruction, it possibly might generate an extra NOP instruction to align the 'XXEVAL' instruction. I have tested these patches on both big endian and little endian PowerPC servers, with no regressions. Can I check these patchs into the trunk? 2025-07-09 Michael Meissner gcc/ PR target/117251 * config/rs6000/fusion.md: Regenerate. * config/rs6000/genfusion.pl (gen_logical_addsubf): Add support to generate vector xor => nand fusion if XXEVAL is supported. Diff: --- gcc/config/rs6000/fusion.md| 15 +-- gcc/config/rs6000/genfusion.pl | 1 + 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md index 39b586918c17..e0f9ac17659a 100644 --- a/gcc/config/rs6000/fusion.md +++ b/gcc/config/rs6000/fusion.md @@ -2477,20 +2477,23 @@ ;; logical-logical fusion pattern generated by gen_logical_addsubf ;; vector vxor -> vnand (define_insn "*fuse_vxor_vnand" - [(set (match_operand:VM 3 "altivec_register_operand" "=&0,&1,&v,v") -(ior:VM (not:VM (xor:VM (match_operand:VM 0 "altivec_register_operand" "v,v,v,v") - (match_operand:VM 1 "altivec_register_operand" "v,v,v,v"))) - (not:VM (match_operand:VM 2 "altivec_register_operand" "v,v,v,v" - (clobber (match_scratch:VM 4 "=X,X,X,&v"))] + [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v") +(ior:VM (not:VM (xor:VM (match_operand:VM 0 "vector_fusion_operand" "v,v,v,wa,v") + (match_operand:VM 1 "vector_fusion_operand" "v,v,v,wa,v"))) + (not:VM (match_operand:VM 2 "vector_fusion_operand" "v,v,v,wa,v" + (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))] "(TARGET_P10_FUSION)" "@ vxor %3,%1,%0\;vnand %3,%3,%2 vxor %3,%1,%0\;vnand %3,%3,%2 vxor %3,%1,%0\;vnand %3,%3,%2 + xxeval %x3,%x2,%x1,%x0,249 vxor %4,%1,%0\;vnand %3,%4,%2" [(set_attr "type" "fused_vector") (set_attr "cost" "6") - (set_attr "length" "8")]) + (set_attr "length" "8") + (set_attr "prefixed" "*,*,*,yes,*") + (set_attr "isa" "*,*,*,xxeval,*")]) ;; logical-logical fusion pattern generated by gen_logical_addsubf ;; vector vand -> vnor diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl index 86bca81286ca..5d22a0732df6 100755 --- a/gcc/config/rs6000/genfusion.pl +++ b/gcc/config/rs6000/genfusion.pl @@ -256,6 +256,7 @@ sub gen_logical_addsubf "veqv_vnand" => 246, "vnor_vnand" => 247, "vor_vnand" => 248, + "vxor_vnand" => 249, ); KIND: foreach $kind ('scalar','vector') {
[gcc(refs/users/meissner/heads/work214-sha)] Update ChangeLog.*
https://gcc.gnu.org/g:8916191afc59ba6583fe065b828c75175cf10ffb commit 8916191afc59ba6583fe065b828c75175cf10ffb Author: Michael Meissner Date: Wed Jul 9 02:03:34 2025 -0400 Update ChangeLog.* Diff: --- gcc/ChangeLog.sha | 2328 + 1 file changed, 2328 insertions(+) diff --git a/gcc/ChangeLog.sha b/gcc/ChangeLog.sha index 7d7ad9d46b77..cb36b080f21b 100644 --- a/gcc/ChangeLog.sha +++ b/gcc/ChangeLog.sha @@ -1,3 +1,2331 @@ + Branch work214-sha, patch #345 + +PR target/117251: Add tests + +This is patch #45 of 45 to generate the 'XXEVAL' instruction on power10 +and power11 instead of using the Altivec 'VAND' instruction feeding +into 'VNAND'. The 'XXEVAL' instruction can use all 64 vector +registers, instead of the 32 registers that traditional Altivec vector +instructions use. By allowing all of the vector registers to be used, +it reduces the amount of spilling that a large benchmark generated. + +This patch adds the tests for generating 'XXEVAL' to the testsuite. + +I have tested these patches on both big endian and little endian +PowerPC servers, with no regressions. Can I check these patchs into +the trunk? + +2025-07-09 Michael Meissner + +gcc/testsuite/ + + PR target/117251 + * gcc.target/powerpc/p10-vector-fused-1.c: New test. + * gcc.target/powerpc/p10-vector-fused-2.c: Likewise. + + + Branch work214-sha, patch #344 + +PR target/117251: Improve vector and to vector nand fusion + +See the following post for a complete explanation of what the patches +for PR target/117251: + + * https://gcc.gnu.org/pipermail/gcc-patches/2025-June/686474.html + +This is patch #44 of 45 to generate the 'XXEVAL' instruction on power10 +and power11 instead of using the Altivec 'VAND' instruction feeding +into 'VNAND'. The 'XXEVAL' instruction can use all 64 vector +registers, instead of the 32 registers that traditional Altivec vector +instructions use. By allowing all of the vector registers to be used, +it reduces the amount of spilling that a large benchmark generated. + +Currently the following code: + + vector int a, b, c, d; + a = ~ ((c & d) & b); + +Generates: + + vand t,c,d + vnand a,t,b + +Now in addition with this patch, if the arguments or result is +allocated to a traditional FPR register, the GCC compiler will now +generate the following code instead of adding vector move instructions: + + xxeval a,b,c,254 + +Since fusion using 2 Altivec instructions is slightly faster than using +the 'XXEVAL' instruction we prefer to generate the Altivec instructions +if we can. In addition, because 'XXEVAL' is a prefixed instruction, it +possibly might generate an extra NOP instruction to align the 'XXEVAL' +instruction. + +I have tested these patches on both big endian and little endian +PowerPC servers, with no regressions. Can I check these patchs into +the trunk? + +2025-07-09 Michael Meissner + +gcc/ + + PR target/117251 + * config/rs6000/fusion.md: Regenerate. + * config/rs6000/genfusion.pl (gen_logical_addsubf): Add support + to generate vector and => nand fusion if XXEVAL is supported. + + + Branch work214-sha, patch #343 + +PR target/117251: Improve vector andc to vector nand fusion + +See the following post for a complete explanation of what the patches +for PR target/117251: + + * https://gcc.gnu.org/pipermail/gcc-patches/2025-June/686474.html + +This is patch #43 of 45 to generate the 'XXEVAL' instruction on power10 +and power11 instead of using the Altivec 'VANDC' instruction feeding +into 'VNAND'. The 'XXEVAL' instruction can use all 64 vector +registers, instead of the 32 registers that traditional Altivec vector +instructions use. By allowing all of the vector registers to be used, +it reduces the amount of spilling that a large benchmark generated. + +Currently the following code: + + vector int a, b, c, d; + a = ~ ((c & ~ d) & b); + +Generates: + + vandc t,c,d + vnand a,t,b + +Now in addition with this patch, if the arguments or result is +allocated to a traditional FPR register, the GCC compiler will now +generate the following code instead of adding vector move instructions: + + xxeval a,b,c,253 + +Since fusion using 2 Altivec instructions is slightly faster than using +the 'XXEVAL' instruction we prefer to generate the Altivec instructions +if we can. In addition, because 'XXEVAL' is a prefixed instruction, it +possibly might generate an extra NOP instruction to align the 'XXEVAL' +instruction. + +I have tested these patches on both big endian and little endian +PowerPC servers, with no regressions. Can I check these patchs into +the trunk? + +2025-07-09 Michael Meissner + +gcc/ + + PR target/117251 + * config/rs6000/fusion.md: Regenerate. + * config/rs6000/genfusion.pl (gen_logica
[gcc r15-9940] tree-optimization/120927 - 510.parest_r segfault with masked epilog
https://gcc.gnu.org/g:b8599692a336b29851bdc5d8506a51d57521595c commit r15-9940-gb8599692a336b29851bdc5d8506a51d57521595c Author: Richard Biener Date: Thu Jul 3 14:39:22 2025 +0200 tree-optimization/120927 - 510.parest_r segfault with masked epilog The following fixes bad alignment computaton for epilog vectorization when as in this case for 510.parest_r and masked epilog vectorization with AVX512 we end up choosing AVX to vectorize the main loop and masked AVX512 (sic!) to vectorize the epilog. In that case alignment analysis for the epilog tries to force alignment of the base to 64, but that cannot possibly help the epilog when the main loop had used a vector mode with smaller alignment requirement. There's another issue, that the check whether the step preserves alignment needs to consider possibly previously involved VFs (here, the main loops smaller VF) as well. These might not be the only case with problems for such a mode mix but at least there it seems wise to never use DR alignment forcing when analyzing an epilog. We get to chose this mode setup because the iteration over epilog modes doesn't prevent this, the maybe_ge (cached_vf_per_mode[0], first_vinfo_vf) skip is conditional on !supports_partial_vectors and it is also conditional on having a cached VF. Further nothing in vect_analyze_loop_1 rejects this setup - it might be conceivable that a target can do masking only for larger modes. There is a second reason we end up with this mode setup, which is that vect_need_peeling_or_partial_vectors_p says we do not need peeling or partial vectors when analyzing the main loop with AVX512 (if it would say so we'd have chosen a masked AVX512 epilog-only vectorization). It does that because it looks at LOOP_VINFO_COST_MODEL_THRESHOLD (which is not yet computed, so always zero at this point), and compares max_niter (5) against the VF (8), but not with equality as the comment says but with greater. This also needs looking at, PR120939. PR tree-optimization/120927 * tree-vect-data-refs.cc (vect_compute_data_ref_alignment): Do not force a DRs base alignment when analyzing an epilog loop. Check whether the step preserves alignment for all VFs possibly involved sofar. * gcc.dg/vect/vect-pr120927.c: New testcase. * gcc.dg/vect/vect-pr120927-2.c: Likewise. (cherry picked from commit 918f4517564c2cf7e5bb907428d5413742bee56f) Diff: --- gcc/testsuite/gcc.dg/vect/vect-pr120927-2.c | 24 gcc/testsuite/gcc.dg/vect/vect-pr120927.c | 24 gcc/tree-vect-data-refs.cc | 16 3 files changed, 60 insertions(+), 4 deletions(-) diff --git a/gcc/testsuite/gcc.dg/vect/vect-pr120927-2.c b/gcc/testsuite/gcc.dg/vect/vect-pr120927-2.c new file mode 100644 index ..e38cebeb9201 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-pr120927-2.c @@ -0,0 +1,24 @@ +/* { dg-additional-options "--param vect-partial-vector-usage=1" } */ +/* { dg-additional-options "-mavx512bw -mavx512vl" { target avx512f_runtime } } */ + +#include "tree-vect.h" + +static const double __attribute__((aligned(__BIGGEST_ALIGNMENT__))) a[] = { 1., 2., 3., 4., 5. }; + +void __attribute__((noipa)) +foo (double *b, double *bp, double c, int n) +{ + for (int i = 0; i < n; ++i) +b[i] = bp[i] = a[i] * c; +} + +int main() +{ + double b[6], bp[6]; + b[5] = bp[5] = 13.; + check_vect (); + foo (b, bp, 3., 5); + if (b[5] != 13. || bp[5] != 13.) +abort (); + return 0; +} diff --git a/gcc/testsuite/gcc.dg/vect/vect-pr120927.c b/gcc/testsuite/gcc.dg/vect/vect-pr120927.c new file mode 100644 index ..793593f758f2 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-pr120927.c @@ -0,0 +1,24 @@ +/* { dg-additional-options "--param vect-partial-vector-usage=1" } */ +/* { dg-additional-options "-mavx512bw -mavx512vl" { target avx512f_runtime } } */ + +#include "tree-vect.h" + +static const double a[] = { 1., 2., 3., 4., 5. }; + +void __attribute__((noipa)) +foo (double *b, double *bp, double c, int n) +{ + for (int i = 0; i < n; ++i) +b[i] = bp[i] = a[i] * c; +} + +int main() +{ + double b[6], bp[6]; + b[5] = bp[5] = 13.; + check_vect (); + foo (b, bp, 3., 5); + if (b[5] != 13. || bp[5] != 13.) +abort (); + return 0; +} diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc index 4ca9ab73d690..85145f94516a 100644 --- a/gcc/tree-vect-data-refs.cc +++ b/gcc/tree-vect-data-refs.cc @@ -1410,10 +1410,17 @@ vect_compute_data_ref_alignment (vec_info *vinfo, dr_vec_info *dr_info, /* We can only use base and misalignment information relative to an innermost loop if the misalignment stays the same throughout the execution of the loop. As above, this is the case if the s
[gcc r15-9941] tree-optimization/120817 - bogus DSE of .MASK_STORE
https://gcc.gnu.org/g:0ebeed53983dbcefcf7b950895c9d88c85342cf4 commit r15-9941-g0ebeed53983dbcefcf7b950895c9d88c85342cf4 Author: Richard Biener Date: Mon Jul 7 09:56:50 2025 +0200 tree-optimization/120817 - bogus DSE of .MASK_STORE DSE used ao_ref_init_from_ptr_and_size for .MASK_STORE but alias-analysis will use the specified size to disambiguate against smaller objects. For .MASK_STORE we instead have to make the access size unspecified but we can still constrain the access extent based on the maximum size possible. PR tree-optimization/120817 * tree-ssa-dse.cc (initialize_ao_ref_for_dse): Use ao_ref_init_from_ptr_and_range with unknown size for .MASK_STORE and .MASK_LEN_STORE. * gcc.dg/vect/pr120817.c: New testcase. (cherry picked from commit 439b14e222571da76da2bfec04b9035fb9f1862d) Diff: --- gcc/testsuite/gcc.dg/vect/pr120817.c | 40 gcc/tree-ssa-dse.cc | 8 2 files changed, 44 insertions(+), 4 deletions(-) diff --git a/gcc/testsuite/gcc.dg/vect/pr120817.c b/gcc/testsuite/gcc.dg/vect/pr120817.c new file mode 100644 index ..d8f55c9b98d2 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr120817.c @@ -0,0 +1,40 @@ +/* { dg-additional-options "-O1" } */ +/* { dg-additional-options "-mcpu=neoverse-n2" { target aarch64*-*-* } } */ + +#include "tree-vect.h" + +typedef struct { +int _M_current; +} __normal_iterator; + +typedef struct { +char _M_elems[5]; +} array_5; + +__normal_iterator __trans_tmp_1 = {-5}; + +__attribute__((noipa)) +array_5 copySourceIntoTarget() { +array_5 target; +char* target_it = target._M_elems; + +while (__trans_tmp_1._M_current != 0) { +*target_it = 1; +__trans_tmp_1._M_current++; +target_it++; +} + +return target; +} + +int main () +{ + check_vect (); + + array_5 res = copySourceIntoTarget(); + +#pragma GCC novector + for (int i = 0; i < 5; i++) +if (res._M_elems[i] != 1) + __builtin_abort (); +} diff --git a/gcc/tree-ssa-dse.cc b/gcc/tree-ssa-dse.cc index bc632e384841..215f5c3e2869 100644 --- a/gcc/tree-ssa-dse.cc +++ b/gcc/tree-ssa-dse.cc @@ -181,10 +181,10 @@ initialize_ao_ref_for_dse (gimple *stmt, ao_ref *write, bool may_def_ok = false) can provide a may-def variant. */ if (may_def_ok) { - ao_ref_init_from_ptr_and_size ( - write, gimple_call_arg (stmt, 0), - TYPE_SIZE_UNIT ( - TREE_TYPE (gimple_call_arg (stmt, stored_value_index; + ao_ref_init_from_ptr_and_range ( + write, gimple_call_arg (stmt, 0), true, 0, -1, + tree_to_poly_int64 (TYPE_SIZE ( + TREE_TYPE (gimple_call_arg (stmt, stored_value_index); return true; } break;
[gcc r15-9943] tree-optimization/118669 - fixup wrongly aligned loads/stores
https://gcc.gnu.org/g:c625bc9c7c294ef2851ae42d4a5b6cc899fecb5e commit r15-9943-gc625bc9c7c294ef2851ae42d4a5b6cc899fecb5e Author: Richard Biener Date: Wed Jul 2 09:30:05 2025 +0200 tree-optimization/118669 - fixup wrongly aligned loads/stores The vectorizer tracks alignment of datarefs with dr_aligned and dr_unaligned_supported but that's aligned with respect to the target alignment which can be less aligned than the mode used for the access. The following fixes this discrepancy for vectorizing loads and stores. The issue is visible for aarch64 SVE and risc-v where VLA vector modes have larger than element alignment but the target handles element alignment just fine. PR tree-optimization/118669 * tree-vect-stmts.cc (vectorizable_load): Emit loads with proper (element) alignment. (vectorizable_store): Likewise. (cherry picked from commit 37bf13adcda564dfdb28c3aa736f2cac71c73d09) Diff: --- gcc/tree-vect-stmts.cc | 6 -- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index 978a4626b35b..89ac5f611276 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -10062,7 +10062,8 @@ vectorizable_store (vec_info *vinfo, = fold_build2 (MEM_REF, vectype, dataref_ptr, dataref_offset ? dataref_offset : build_int_cst (ref_type, 0)); - if (alignment_support_scheme == dr_aligned) + if (alignment_support_scheme == dr_aligned + && align >= TYPE_ALIGN_UNIT (vectype)) ; else TREE_TYPE (data_ref) @@ -12254,7 +12255,8 @@ vectorizable_load (vec_info *vinfo, { data_ref = fold_build2 (MEM_REF, ltype, dataref_ptr, offset); - if (alignment_support_scheme == dr_aligned) + if (alignment_support_scheme == dr_aligned + && align >= TYPE_ALIGN_UNIT (ltype)) ; else TREE_TYPE (data_ref)
[gcc r15-9939] tree-optimization/120944 - bogus VN with volatile copies
https://gcc.gnu.org/g:2c23368ed910a911e72af5decfc39bef11a9efac commit r15-9939-g2c23368ed910a911e72af5decfc39bef11a9efac Author: Richard Biener Date: Fri Jul 4 09:08:19 2025 +0200 tree-optimization/120944 - bogus VN with volatile copies The following avoids translating expressions through volatile copies. PR tree-optimization/120944 * tree-ssa-sccvn.cc (vn_reference_lookup_3): Gate optimizations invalid when volatile is involved. * gcc.dg/torture/pr120944.c: New testcase. (cherry picked from commit 6ed1e2ae1a742d859c2dd74c9e7cebdd3618e8b1) Diff: --- gcc/testsuite/gcc.dg/torture/pr120944.c | 34 + gcc/tree-ssa-sccvn.cc | 9 +++-- 2 files changed, 41 insertions(+), 2 deletions(-) diff --git a/gcc/testsuite/gcc.dg/torture/pr120944.c b/gcc/testsuite/gcc.dg/torture/pr120944.c new file mode 100644 index ..92f3c7749963 --- /dev/null +++ b/gcc/testsuite/gcc.dg/torture/pr120944.c @@ -0,0 +1,34 @@ +/* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */ +/* { dg-additional-options "-fdump-tree-optimized" } */ + +#include + +typedef union { + int u32; + struct + { + int A:1; + int B:2; + int C:3; + }; +} u_t; + +typedef union { + volatile int u[3]; + volatile struct { +u_t a; +int b; +int c; + }; +} DATA; + +void foo (volatile DATA *d) +{ + d->a.u32 = ~0; + u_t u = d->a; + int v = u.A; + if (v) +abort(); +} + +/* { dg-final { scan-tree-dump-times "if \\\(" 1 "optimized" } } */ diff --git a/gcc/tree-ssa-sccvn.cc b/gcc/tree-ssa-sccvn.cc index f3bc6dbebe13..7f2500e5d591 100644 --- a/gcc/tree-ssa-sccvn.cc +++ b/gcc/tree-ssa-sccvn.cc @@ -2809,7 +2809,8 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_, we find a VN result with exactly the same value as the possible clobber. In this case we can ignore the clobber and return the found value. */ - if (is_gimple_reg_type (TREE_TYPE (lhs)) + if (!gimple_has_volatile_ops (def_stmt) + && is_gimple_reg_type (TREE_TYPE (lhs)) && types_compatible_p (TREE_TYPE (lhs), vr->type) && (ref->ref || data->orig_ref.ref) && !data->mask @@ -3093,7 +3094,8 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_, else if (is_gimple_reg_type (vr->type) && gimple_assign_single_p (def_stmt) && gimple_assign_rhs_code (def_stmt) == CONSTRUCTOR - && CONSTRUCTOR_NELTS (gimple_assign_rhs1 (def_stmt)) == 0) + && CONSTRUCTOR_NELTS (gimple_assign_rhs1 (def_stmt)) == 0 + && !TREE_THIS_VOLATILE (gimple_assign_lhs (def_stmt))) { tree base2; poly_int64 offset2, size2, maxsize2; @@ -3149,6 +3151,7 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_, && !reverse_storage_order_for_component_p (vr->operands) && !contains_storage_order_barrier_p (vr->operands) && gimple_assign_single_p (def_stmt) + && !TREE_THIS_VOLATILE (gimple_assign_lhs (def_stmt)) && CHAR_BIT == 8 && BITS_PER_UNIT == 8 && BYTES_BIG_ENDIAN == WORDS_BIG_ENDIAN @@ -3307,6 +3310,7 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_, && !reverse_storage_order_for_component_p (vr->operands) && !contains_storage_order_barrier_p (vr->operands) && gimple_assign_single_p (def_stmt) + && !TREE_THIS_VOLATILE (gimple_assign_lhs (def_stmt)) && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME) { tree lhs = gimple_assign_lhs (def_stmt); @@ -3518,6 +3522,7 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_, the copy kills ref. */ else if (data->vn_walk_kind == VN_WALKREWRITE && gimple_assign_single_p (def_stmt) + && !gimple_has_volatile_ops (def_stmt) && (DECL_P (gimple_assign_rhs1 (def_stmt)) || TREE_CODE (gimple_assign_rhs1 (def_stmt)) == MEM_REF || handled_component_p (gimple_assign_rhs1 (def_stmt
[gcc r15-9944] tree-optimization/120358 - bogus PTA with structure access
https://gcc.gnu.org/g:0f1e4dd1f9354ea962113e066152d0a77209f732 commit r15-9944-g0f1e4dd1f9354ea962113e066152d0a77209f732 Author: Richard Biener Date: Mon Jul 7 15:13:38 2025 +0200 tree-optimization/120358 - bogus PTA with structure access When we compute the constraint for something like MEM[(const struct QStringView &)&tok2 + 32] we go and compute what (const struct QStringView &)&tok2 + 32 points to and then add subvariables to its dereference that possibly fall in the range of the access according to the original refs size. In doing that we disregarded that the subvariable the starting address points to might not be aligned to it and thus the access might start at any point within that variable. The following conservatively adjusts the pruning of adjacent sub-variables to honor this. PR tree-optimization/120358 * tree-ssa-structalias.cc (get_constraint_for_1): Adjust pruning of sub-variables according to the imprecise known start offset. (cherry picked from commit aa5ae523e84a97bf3a582ea0fa73d959afa9b9c7) Diff: --- gcc/tree-ssa-structalias.cc | 5 - 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/gcc/tree-ssa-structalias.cc b/gcc/tree-ssa-structalias.cc index deca44ae0bf3..0215243d5be9 100644 --- a/gcc/tree-ssa-structalias.cc +++ b/gcc/tree-ssa-structalias.cc @@ -3690,7 +3690,10 @@ get_constraint_for_1 (tree t, vec *results, bool address_p, size = -1; for (; curr; curr = vi_next (curr)) { - if (curr->offset - vi->offset < size) + /* The start of the access might happen anywhere +within vi, so conservatively assume it was +at its end. */ + if (curr->offset - (vi->offset + vi->size - 1) < size) { cs.var = curr->id; results->safe_push (cs);
[gcc r15-9942] testsuite: add sve hw check to testcase [PR120817]
https://gcc.gnu.org/g:77066fec7ae3b57806c5d8fed9429c7db9ee446b commit r15-9942-g77066fec7ae3b57806c5d8fed9429c7db9ee446b Author: Tamar Christina Date: Mon Jul 7 17:05:01 2025 +0100 testsuite: add sve hw check to testcase [PR120817] Drop down from SVE2 to SVE1 as that's the minimum required for the test, and since it's a mid-end test add the aarch64_sve_hw check. gcc/testsuite/ChangeLog: PR tree-optimization/120817 * gcc.dg/vect/pr120817.c: Add SVE HW check. (cherry picked from commit 4b9f760c511a4ef3a390dd6cfab80bada57c2535) Diff: --- gcc/testsuite/gcc.dg/vect/pr120817.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/gcc/testsuite/gcc.dg/vect/pr120817.c b/gcc/testsuite/gcc.dg/vect/pr120817.c index d8f55c9b98d2..199189a8b9ad 100644 --- a/gcc/testsuite/gcc.dg/vect/pr120817.c +++ b/gcc/testsuite/gcc.dg/vect/pr120817.c @@ -1,5 +1,6 @@ /* { dg-additional-options "-O1" } */ -/* { dg-additional-options "-mcpu=neoverse-n2" { target aarch64*-*-* } } */ +/* { dg-require-effective-target aarch64_sve_hw { target aarch64*-*-* } } */ +/* { dg-additional-options "-march=armv8-a+sve -mtune=neoverse-n2" { target aarch64*-*-* } } */ #include "tree-vect.h"
[gcc r15-9937] libstdc++: Fix typo in __size_to_integer(__GLIBCXX_TYPE_INT_N_3)
https://gcc.gnu.org/g:dff46525b78b8c9433ad63e5d7396b2ecca450f9 commit r15-9937-gdff46525b78b8c9433ad63e5d7396b2ecca450f9 Author: Jonathan Wakely Date: Fri Jul 4 21:33:05 2025 +0100 libstdc++: Fix typo in __size_to_integer(__GLIBCXX_TYPE_INT_N_3) The overload taking a signed type was returning unsigned and the overload taking an unsigned type was returning signed. libstdc++-v3/ChangeLog: * include/bits/stl_algobase.h (__size_to_integer): Move misplaced unsigned keyword on __size_to_integer overloads for __GLIBCXX_TYPE_INT_N_3 integer type. (cherry picked from commit 106591f79a3beaed226400fbdc568e95229dc936) Diff: --- libstdc++-v3/include/bits/stl_algobase.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libstdc++-v3/include/bits/stl_algobase.h b/libstdc++-v3/include/bits/stl_algobase.h index 119dbe9a0936..4d5662ca45bf 100644 --- a/libstdc++-v3/include/bits/stl_algobase.h +++ b/libstdc++-v3/include/bits/stl_algobase.h @@ -1046,9 +1046,9 @@ _GLIBCXX_END_NAMESPACE_CONTAINER __size_to_integer(unsigned __GLIBCXX_TYPE_INT_N_2 __n) { return __n; } #endif #if defined(__GLIBCXX_TYPE_INT_N_3) - __extension__ inline _GLIBCXX_CONSTEXPR unsigned __GLIBCXX_TYPE_INT_N_3 - __size_to_integer(__GLIBCXX_TYPE_INT_N_3 __n) { return __n; } __extension__ inline _GLIBCXX_CONSTEXPR __GLIBCXX_TYPE_INT_N_3 + __size_to_integer(__GLIBCXX_TYPE_INT_N_3 __n) { return __n; } + __extension__ inline _GLIBCXX_CONSTEXPR unsigned __GLIBCXX_TYPE_INT_N_3 __size_to_integer(unsigned __GLIBCXX_TYPE_INT_N_3 __n) { return __n; } #endif
[gcc(refs/users/mikael/heads/stabilisation_descriptor_v01)] Ajout directive warning
https://gcc.gnu.org/g:e6eb899b5e7ce65a45db2fa7d90693f2509812f1 commit e6eb899b5e7ce65a45db2fa7d90693f2509812f1 Author: Mikael Morin Date: Tue Jul 8 22:41:06 2025 +0200 Ajout directive warning Diff: --- libgomp/testsuite/libgomp.oacc-fortran/privatized-ref-2.f90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libgomp/testsuite/libgomp.oacc-fortran/privatized-ref-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/privatized-ref-2.f90 index 14e8f99d391e..d95479599e49 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/privatized-ref-2.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/privatized-ref-2.f90 @@ -25,7 +25,7 @@ program main character(len=:), allocatable :: my_str character(len=15), allocatable :: my_str15 - A = [(3*j, j=1, 10)] + A = [(3*j, j=1, 10)] ! { dg-warning {may be used uninitialized} {variables used uninitialized in dead code} { target { ! __OPTIMIZE__ } } } call foo (A, size(A)) call bar (A) my_str = "1234567890"
[gcc(refs/users/mikael/heads/stabilisation_descriptor_v01)] Ajout directive note
https://gcc.gnu.org/g:5b9aa2101d8a98621a5b5e955f82d2b4e7079d00 commit 5b9aa2101d8a98621a5b5e955f82d2b4e7079d00 Author: Mikael Morin Date: Tue Jul 8 22:48:09 2025 +0200 Ajout directive note Diff: --- libgomp/testsuite/libgomp.oacc-fortran/privatized-ref-2.f90 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libgomp/testsuite/libgomp.oacc-fortran/privatized-ref-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/privatized-ref-2.f90 index d95479599e49..4ac7e06e2c5e 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/privatized-ref-2.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/privatized-ref-2.f90 @@ -21,11 +21,11 @@ program main implicit none (type, external) integer :: j - integer, allocatable :: A(:) + integer, allocatable :: A(:) ! { dg-note {declared here} {A's fields used initialized in dead code} { target { ! __OPTIMIZE__ } } } character(len=:), allocatable :: my_str character(len=15), allocatable :: my_str15 - A = [(3*j, j=1, 10)] ! { dg-warning {may be used uninitialized} {variables used uninitialized in dead code} { target { ! __OPTIMIZE__ } } } + A = [(3*j, j=1, 10)] ! { dg-warning {may be used uninitialized} {A's fields used uninitialized in dead code} { target { ! __OPTIMIZE__ } } } call foo (A, size(A)) call bar (A) my_str = "1234567890"
[gcc r16-2112] libstdc++: Ensure pool resources meet alignment requirements [PR118681]
https://gcc.gnu.org/g:ac2fb60a67d6d1de6446c25c5623b8a1389f4770 commit r16-2112-gac2fb60a67d6d1de6446c25c5623b8a1389f4770 Author: Jonathan Wakely Date: Fri Jul 4 16:44:13 2025 +0100 libstdc++: Ensure pool resources meet alignment requirements [PR118681] For allocations with size > alignment and size % alignment != 0 we were sometimes returning pointers that did not meet the requested aligment. For example, allocate(24, 16) would select the pool for 24-byte objects and the second allocation from that pool (at offset 24 bytes into the pool) is only 8-byte aligned not 16-byte aligned. The pool resources need to round up the requested allocation size to a multiple of the alignment, so that the selected pool will always return allocations that meet the alignment requirement. libstdc++-v3/ChangeLog: PR libstdc++/118681 * src/c++17/memory_resource.cc (choose_block_size): New function. (synchronized_pool_resource::do_allocate): Use choose_block_size to determine appropriate block size. (synchronized_pool_resource::do_deallocate): Likewise (unsynchronized_pool_resource::do_allocate): Likewise. (unsynchronized_pool_resource::do_deallocate): Likewise * testsuite/20_util/synchronized_pool_resource/118681.cc: New test. * testsuite/20_util/unsynchronized_pool_resource/118681.cc: New test. Reviewed-by: Tomasz Kamiński Diff: --- libstdc++-v3/src/c++17/memory_resource.cc | 26 -- .../20_util/synchronized_pool_resource/118681.cc | 5 ++ .../20_util/unsynchronized_pool_resource/118681.cc | 58 ++ 3 files changed, 85 insertions(+), 4 deletions(-) diff --git a/libstdc++-v3/src/c++17/memory_resource.cc b/libstdc++-v3/src/c++17/memory_resource.cc index fac4c782c5f7..fddfe2c7dd98 100644 --- a/libstdc++-v3/src/c++17/memory_resource.cc +++ b/libstdc++-v3/src/c++17/memory_resource.cc @@ -1242,12 +1242,30 @@ namespace pmr return pools; } + static inline size_t + choose_block_size(size_t bytes, size_t alignment) + { +if (bytes == 0) [[unlikely]] + return alignment; + +// Use bit_ceil in case alignment is invalid (i.e. not a power of two). +size_t mask = std::__bit_ceil(alignment) - 1; +// Round up to a multiple of alignment. +size_t block_size = (bytes + mask) & ~mask; + +if (block_size >= bytes) [[likely]] + return block_size; + +// Wrapped around to zero, bytes must have been impossibly large. +return numeric_limits::max(); + } + // Override for memory_resource::do_allocate void* synchronized_pool_resource:: do_allocate(size_t bytes, size_t alignment) { -const auto block_size = std::max(bytes, alignment); +const auto block_size = choose_block_size(bytes, alignment); const pool_options opts = _M_impl._M_opts; if (block_size <= opts.largest_required_pool_block) { @@ -1294,7 +1312,7 @@ namespace pmr synchronized_pool_resource:: do_deallocate(void* p, size_t bytes, size_t alignment) { -size_t block_size = std::max(bytes, alignment); +size_t block_size = choose_block_size(bytes, alignment); if (block_size <= _M_impl._M_opts.largest_required_pool_block) { const ptrdiff_t index = pool_index(block_size, _M_impl._M_npools); @@ -1453,7 +1471,7 @@ namespace pmr void* unsynchronized_pool_resource::do_allocate(size_t bytes, size_t alignment) { -const auto block_size = std::max(bytes, alignment); +const auto block_size = choose_block_size(bytes, alignment); if (block_size <= _M_impl._M_opts.largest_required_pool_block) { // Recreate pools if release() has been called: @@ -1470,7 +1488,7 @@ namespace pmr unsynchronized_pool_resource:: do_deallocate(void* p, size_t bytes, size_t alignment) { -size_t block_size = std::max(bytes, alignment); +size_t block_size = choose_block_size(bytes, alignment); if (block_size <= _M_impl._M_opts.largest_required_pool_block) { if (auto pool = _M_find_pool(block_size)) diff --git a/libstdc++-v3/testsuite/20_util/synchronized_pool_resource/118681.cc b/libstdc++-v3/testsuite/20_util/synchronized_pool_resource/118681.cc new file mode 100644 index ..6d7434ff9106 --- /dev/null +++ b/libstdc++-v3/testsuite/20_util/synchronized_pool_resource/118681.cc @@ -0,0 +1,5 @@ +// { dg-do run { target c++17 } } +// Bug 118681 - unsynchronized_pool_resource may fail to respect alignment + +#define RESOURCE std::pmr::synchronized_pool_resource +#include "../unsynchronized_pool_resource/118681.cc" diff --git a/libstdc++-v3/testsuite/20_util/unsynchronized_pool_resource/118681.cc b/libstdc++-v3/testsuite/20_util/unsynchronized_pool_resource/118681.cc new file mode 100644 index ..87e1b1d94043 --- /dev/null +++ b/libstdc++-v3/testsuite/20_util/unsynch
[gcc r16-2115] libstdc++: Fix double free in new pool resource test [PR118681]
https://gcc.gnu.org/g:eb412029f5cec52275d14956fe01473015a9ce0e commit r16-2115-geb412029f5cec52275d14956fe01473015a9ce0e Author: Jonathan Wakely Date: Wed Jul 9 00:54:33 2025 +0100 libstdc++: Fix double free in new pool resource test [PR118681] This was supposed to free p1 and p2, not free p2 twice. libstdc++-v3/ChangeLog: PR libstdc++/118681 * testsuite/20_util/unsynchronized_pool_resource/118681.cc: Fix deallocate argument. Diff: --- libstdc++-v3/testsuite/20_util/unsynchronized_pool_resource/118681.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libstdc++-v3/testsuite/20_util/unsynchronized_pool_resource/118681.cc b/libstdc++-v3/testsuite/20_util/unsynchronized_pool_resource/118681.cc index 87e1b1d94043..9935f793cf91 100644 --- a/libstdc++-v3/testsuite/20_util/unsynchronized_pool_resource/118681.cc +++ b/libstdc++-v3/testsuite/20_util/unsynchronized_pool_resource/118681.cc @@ -39,7 +39,7 @@ test_alignment(std::pmr::memory_resource& res, bool dealloc) if (dealloc) { - res.deallocate(p2, size, alignment); + res.deallocate(p1, size, alignment); res.deallocate(p2, size, alignment); } }
[gcc r16-2086] Fortran: Ensure finalizers are created correctly [PR120637]
https://gcc.gnu.org/g:d1f05661fa6c8a6ea6f59ad365a84469100e425e commit r16-2086-gd1f05661fa6c8a6ea6f59ad365a84469100e425e Author: Andre Vehreschild Date: Wed Jun 25 14:46:16 2025 +0200 Fortran: Ensure finalizers are created correctly [PR120637] Finalize_component freeed an expression that it used to remember which components in which context it had finalized already. While it makes sense to free the copy of the expression, if it is unused, it causes issues, when comparing to a non existent expression. This is now detected by returning true, when the expression has been used. PR fortran/120637 gcc/fortran/ChangeLog: * class.cc (finalize_component): Return true, when a finalizable component was detect and do not free it. gcc/testsuite/ChangeLog: * gfortran.dg/asan/finalize_1.f90: New test. Diff: --- gcc/fortran/class.cc | 24 ++ gcc/testsuite/gfortran.dg/asan/finalize_1.f90 | 67 +++ 2 files changed, 81 insertions(+), 10 deletions(-) diff --git a/gcc/fortran/class.cc b/gcc/fortran/class.cc index df18601e45bd..a1c6fafa75ef 100644 --- a/gcc/fortran/class.cc +++ b/gcc/fortran/class.cc @@ -1034,7 +1034,7 @@ comp_is_finalizable (gfc_component *comp) of calling the appropriate finalizers, coarray deregistering, and deallocation of allocatable subcomponents. */ -static void +static bool finalize_component (gfc_expr *expr, gfc_symbol *derived, gfc_component *comp, gfc_symbol *stat, gfc_symbol *fini_coarray, gfc_code **code, gfc_namespace *sub_ns) @@ -1044,14 +1044,14 @@ finalize_component (gfc_expr *expr, gfc_symbol *derived, gfc_component *comp, gfc_was_finalized *f; if (!comp_is_finalizable (comp)) -return; +return false; /* If this expression with this component has been finalized already in this namespace, there is nothing to do. */ for (f = sub_ns->was_finalized; f; f = f->next) { if (f->e == expr && f->c == comp) - return; + return false; } e = gfc_copy_expr (expr); @@ -1208,8 +1208,6 @@ finalize_component (gfc_expr *expr, gfc_symbol *derived, gfc_component *comp, final_wrap->ext.actual->next->next = gfc_get_actual_arglist (); final_wrap->ext.actual->next->next->expr = fini_coarray_expr; - - if (*code) { (*code)->next = final_wrap; @@ -1221,11 +1219,14 @@ finalize_component (gfc_expr *expr, gfc_symbol *derived, gfc_component *comp, else { gfc_component *c; + bool ret = false; for (c = comp->ts.u.derived->components; c; c = c->next) - finalize_component (e, comp->ts.u.derived, c, stat, fini_coarray, code, - sub_ns); - gfc_free_expr (e); + ret |= finalize_component (e, comp->ts.u.derived, c, stat, fini_coarray, + code, sub_ns); + /* Only free the expression, if it has never been used. */ + if (!ret) + gfc_free_expr (e); } /* Record that this was finalized already in this namespace. */ @@ -1234,6 +1235,7 @@ finalize_component (gfc_expr *expr, gfc_symbol *derived, gfc_component *comp, sub_ns->was_finalized->e = expr; sub_ns->was_finalized->c = comp; sub_ns->was_finalized->next = f; + return true; } @@ -2314,6 +2316,7 @@ finish_assumed_rank: { gfc_symbol *stat; gfc_code *block = NULL; + gfc_expr *ptr_expr; if (!ptr) { @@ -2359,14 +2362,15 @@ finish_assumed_rank: sub_ns); block = block->next; + ptr_expr = gfc_lval_expr_from_sym (ptr); for (comp = derived->components; comp; comp = comp->next) { if (comp == derived->components && derived->attr.extension && ancestor_wrapper && ancestor_wrapper->expr_type != EXPR_NULL) continue; - finalize_component (gfc_lval_expr_from_sym (ptr), derived, comp, - stat, fini_coarray, &block, sub_ns); + finalize_component (ptr_expr, derived, comp, stat, fini_coarray, + &block, sub_ns); if (!last_code->block->next) last_code->block->next = block; } diff --git a/gcc/testsuite/gfortran.dg/asan/finalize_1.f90 b/gcc/testsuite/gfortran.dg/asan/finalize_1.f90 new file mode 100644 index ..ab53a9ecf2be --- /dev/null +++ b/gcc/testsuite/gfortran.dg/asan/finalize_1.f90 @@ -0,0 +1,67 @@ +!{ dg-do run } + +! PR fortran/120637 + +! Contributed by Antony Lewis +! The unused module is needed to trigger the issue of not freeing the +! memory of second module. + +module MiscUtils +implicit none + +contains + +logical function isFloat0(R) +class(*), intent(in) :: R + +select type(R) +type is (real) +isFloat0 = .true. +e
[gcc r16-2091] testsuite: i386: Fix gcc.target/i386/memcpy-pr120683-1.c etc. on Solaris/x86
https://gcc.gnu.org/g:20407a41e840440ccb5d746a5ef6e72765de55f3 commit r16-2091-g20407a41e840440ccb5d746a5ef6e72765de55f3 Author: Rainer Orth Date: Tue Jul 8 12:53:34 2025 +0200 testsuite: i386: Fix gcc.target/i386/memcpy-pr120683-1.c etc. on Solaris/x86 The new tests from commit 401199377c50045ede560daf3f6e8b51749c2a87 Author: H.J. Lu Date: Tue Jun 17 10:17:17 2025 +0800 x86: Improve vector_loop/unrolled_loop for memset/memcpy FAIL on 64-bit Solaris/x86: FAIL: gcc.target/i386/memcpy-pr120683-1.c check-function-bodies foo FAIL: gcc.target/i386/memcpy-pr120683-2.c check-function-bodies foo FAIL: gcc.target/i386/memcpy-pr120683-3.c check-function-bodies foo FAIL: gcc.target/i386/memcpy-pr120683-4.c check-function-bodies foo FAIL: gcc.target/i386/memcpy-pr120683-5.c check-function-bodies foo FAIL: gcc.target/i386/memcpy-pr120683-6.c check-function-bodies foo FAIL: gcc.target/i386/memcpy-pr120683-7.c check-function-bodies foo FAIL: gcc.target/i386/memcpy-strategy-12.c check-function-bodies foo FAIL: gcc.target/i386/memset-pr120683-1.c check-function-bodies foo FAIL: gcc.target/i386/memset-pr120683-10.c check-function-bodies foo FAIL: gcc.target/i386/memset-pr120683-11.c check-function-bodies foo FAIL: gcc.target/i386/memset-pr120683-12.c check-function-bodies foo FAIL: gcc.target/i386/memset-pr120683-13.c check-function-bodies foo FAIL: gcc.target/i386/memset-pr120683-14.c check-function-bodies foo FAIL: gcc.target/i386/memset-pr120683-15.c check-function-bodies foo FAIL: gcc.target/i386/memset-pr120683-16.c check-function-bodies foo FAIL: gcc.target/i386/memset-pr120683-17.c check-function-bodies foo FAIL: gcc.target/i386/memset-pr120683-18.c check-function-bodies foo FAIL: gcc.target/i386/memset-pr120683-19.c check-function-bodies foo FAIL: gcc.target/i386/memset-pr120683-2.c check-function-bodies foo FAIL: gcc.target/i386/memset-pr120683-20.c check-function-bodies foo FAIL: gcc.target/i386/memset-pr120683-21.c check-function-bodies foo FAIL: gcc.target/i386/memset-pr120683-22.c check-function-bodies foo FAIL: gcc.target/i386/memset-pr120683-23.c check-function-bodies foo FAIL: gcc.target/i386/memset-pr120683-3.c check-function-bodies foo FAIL: gcc.target/i386/memset-pr120683-4.c check-function-bodies foo FAIL: gcc.target/i386/memset-pr120683-5.c check-function-bodies foo FAIL: gcc.target/i386/memset-pr120683-6.c check-function-bodies foo FAIL: gcc.target/i386/memset-pr120683-7.c check-function-bodies foo FAIL: gcc.target/i386/memset-pr120683-8.c check-function-bodies foo FAIL: gcc.target/i386/memset-pr120683-9.c check-function-bodies foo Like several times before, they need to be compiled with -fasynchronous-unwind-tables -fdwarf2-cfi-asm. Tested on i386-pc-solaris2.11 and x86_64-pc-linux-gnu. 2025-07-08 Rainer Orth gcc/testsuite: * gcc.target/i386/memcpy-pr120683-1.c (dg-options): Add -fasynchronous-unwind-tables -fdwarf2-cfi-asm. * gcc.target/i386/memcpy-pr120683-2.c: Likewise. * gcc.target/i386/memcpy-pr120683-3.c: Likewise. * gcc.target/i386/memcpy-pr120683-4.c: Likewise. * gcc.target/i386/memcpy-pr120683-5.c: Likewise. * gcc.target/i386/memcpy-pr120683-6.c: Likewise. * gcc.target/i386/memcpy-pr120683-7.c: Likewise. * gcc.target/i386/memcpy-strategy-12.c: Likewise. * gcc.target/i386/memset-pr120683-1.c: Likewise. * gcc.target/i386/memset-pr120683-10.c: Likewise. * gcc.target/i386/memset-pr120683-11.c: Likewise. * gcc.target/i386/memset-pr120683-12.c: Likewise. * gcc.target/i386/memset-pr120683-13.c: Likewise. * gcc.target/i386/memset-pr120683-14.c: Likewise. * gcc.target/i386/memset-pr120683-15.c: Likewise. * gcc.target/i386/memset-pr120683-16.c: Likewise. * gcc.target/i386/memset-pr120683-17.c: Likewise. * gcc.target/i386/memset-pr120683-18.c: Likewise. * gcc.target/i386/memset-pr120683-19.c: Likewise. * gcc.target/i386/memset-pr120683-2.c: Likewise. * gcc.target/i386/memset-pr120683-20.c: Likewise. * gcc.target/i386/memset-pr120683-21.c: Likewise. * gcc.target/i386/memset-pr120683-22.c: Likewise. * gcc.target/i386/memset-pr120683-23.c: Likewise. * gcc.target/i386/memset-pr120683-3.c: Likewise. * gcc.target/i386/memset-pr120683-4.c: Likewise. * gcc.target/i386/memset-pr120683-5.c: Likewise. * gcc.target/i386/memset-pr120683-6.c: Likewise. * gcc.target/i386/memset-pr120683-7.c: Likewise. * gcc.target/i386/memset-pr120683-8.c: Likewise. * gcc.target/i386/memset-pr120683-9.c: Likewise. Diff: -
[gcc(refs/users/mikael/heads/stabilisation_descriptor_v01)] Sauvegarde/restoration cfun
https://gcc.gnu.org/g:a7c50573681b8cdbb0bee9df90074ca2081b7a91 commit a7c50573681b8cdbb0bee9df90074ca2081b7a91 Author: Mikael Morin Date: Tue Jul 8 13:13:25 2025 +0200 Sauvegarde/restoration cfun Diff: --- gcc/gimple-simulate.cc | 8 1 file changed, 8 insertions(+) diff --git a/gcc/gimple-simulate.cc b/gcc/gimple-simulate.cc index a85e6f63cc92..09491076e95d 100644 --- a/gcc/gimple-simulate.cc +++ b/gcc/gimple-simulate.cc @@ -4720,7 +4720,9 @@ simul_scope_evaluate_tests () DECL_CONTEXT (result) = func; DECL_RESULT (func) = result; + push_cfun (nullptr); init_lowered_empty_function (func, true, profile_count::one ()); + pop_cfun (); tree def_var = create_var (integer_type_node, "def_var"); DECL_CONTEXT (def_var) = func; @@ -6482,8 +6484,10 @@ simul_scope_simulate_call_tests () DECL_CONTEXT (result) = my_int_func; DECL_RESULT (my_int_func) = result; + push_cfun (nullptr); basic_block bb = init_lowered_empty_function (my_int_func, true, profile_count::one ()); + pop_cfun (); gimple_stmt_iterator gsi = gsi_last_bb (bb); greturn *ret_stmt = gimple_build_return (cst6); gsi_insert_after (&gsi, ret_stmt, GSI_CONTINUE_LINKING); @@ -6534,8 +6538,10 @@ simul_scope_simulate_call_tests () DECL_ARGUMENTS (int_func_with_arg) = arg; layout_decl (arg, 0); + push_cfun (nullptr); basic_block bb2 = init_lowered_empty_function (int_func_with_arg, true, profile_count::one ()); + pop_cfun (); gimple_stmt_iterator gsi2 = gsi_last_bb (bb2); greturn *ret_stmt2 = gimple_build_return (arg); gsi_insert_after (&gsi2, ret_stmt2, GSI_CONTINUE_LINKING); @@ -6618,7 +6624,9 @@ simul_scope_simulate_call_tests () DECL_CONTEXT (void_result) = simple_func; DECL_RESULT (simple_func) = void_result; + push_cfun (nullptr); init_lowered_empty_function (simple_func, true, profile_count::one ()); + pop_cfun (); gcall * simple_call = gimple_build_call (simple_func, 0);
[gcc r16-2090] s390: Split tests for 31bit support
https://gcc.gnu.org/g:32d41517c7276399e57b2b3f29e9790ae32d4883 commit r16-2090-g32d41517c7276399e57b2b3f29e9790ae32d4883 Author: Juergen Christ Date: Tue Jul 8 11:26:38 2025 +0200 s390: Split tests for 31bit support The new vector pattern tests used int128 without guard. This causes failure on 31bit targets. Split the tests such that the tests requiring 128 bit support are only executed on targets supporting them. Signed-off-by: Juergen Christ gcc/testsuite/ChangeLog: * gcc.target/s390/vector/pattern-avg-1.c: Split test. * gcc.target/s390/vector/pattern-mulh-1.c: Split test. * gcc.target/s390/vector/pattern-avg-2.c: New test. * gcc.target/s390/vector/pattern-mulh-2.c: New test. Diff: --- .../gcc.target/s390/vector/pattern-avg-1.c | 3 +-- .../gcc.target/s390/vector/pattern-avg-2.c | 23 +++ .../gcc.target/s390/vector/pattern-mulh-1.c| 3 +-- .../gcc.target/s390/vector/pattern-mulh-2.c| 26 ++ 4 files changed, 51 insertions(+), 4 deletions(-) diff --git a/gcc/testsuite/gcc.target/s390/vector/pattern-avg-1.c b/gcc/testsuite/gcc.target/s390/vector/pattern-avg-1.c index a15301aabe54..30c6ed476846 100644 --- a/gcc/testsuite/gcc.target/s390/vector/pattern-avg-1.c +++ b/gcc/testsuite/gcc.target/s390/vector/pattern-avg-1.c @@ -21,6 +21,5 @@ TEST(char,short,16) TEST(short,int,8) TEST(int,long,4) -TEST(long,__int128,2) -/* { dg-final { scan-tree-dump-times "\.AVG_CEIL" 8 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "\.AVG_CEIL" 6 "optimized" } } */ diff --git a/gcc/testsuite/gcc.target/s390/vector/pattern-avg-2.c b/gcc/testsuite/gcc.target/s390/vector/pattern-avg-2.c new file mode 100644 index ..1cc614eb1dea --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/vector/pattern-avg-2.c @@ -0,0 +1,23 @@ +/* { dg-do compile { target int128 } } */ +/* { dg-options "-O3 -mzarch -march=z16 -ftree-vectorize -fdump-tree-optimized" } */ + +#define TEST(T1,T2,N) \ + void \ + avg##T1 (signed T1 *__restrict res, signed T1 *__restrict a, \ + signed T1 *__restrict b) \ + { \ +for (int i = 0; i < N; ++i) \ + res[i] = ((signed T2)a[i] + b[i] + 1) >> 1; \ + } \ +\ + void \ + uavg##T1 (unsigned T1 *__restrict res, unsigned T1 *__restrict a, \ +unsigned T1 *__restrict b) \ + { \ +for (int i = 0; i < N; ++i) \ + res[i] = ((unsigned T2)a[i] + b[i] + 1) >> 1; \ + } + +TEST(long,__int128,2) + +/* { dg-final { scan-tree-dump-times "\.AVG_CEIL" 2 "optimized" } } */ diff --git a/gcc/testsuite/gcc.target/s390/vector/pattern-mulh-1.c b/gcc/testsuite/gcc.target/s390/vector/pattern-mulh-1.c index cd8e4e7d7a09..f71ef06c8252 100644 --- a/gcc/testsuite/gcc.target/s390/vector/pattern-mulh-1.c +++ b/gcc/testsuite/gcc.target/s390/vector/pattern-mulh-1.c @@ -24,6 +24,5 @@ TEST(char,short,16,8) TEST(short,int,8,16) TEST(int,long,4,32) -TEST(long,__int128,2,64) -/* { dg-final { scan-tree-dump-times "\.MULH" 8 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "\.MULH" 6 "optimized" } } */ diff --git a/gcc/testsuite/gcc.target/s390/vector/pattern-mulh-2.c b/gcc/testsuite/gcc.target/s390/vector/pattern-mulh-2.c new file mode 100644 index ..6ac6855b1bdf --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/vector/pattern-mulh-2.c @@ -0,0 +1,26 @@ +/* { dg-do compile { target int128 } } */ +/* { dg-options "-O3 -mzarch -march=arch15 -ftree-vectorize -fdump-tree-optimized" } */ + +#define TEST(T1,T2,N,S) \ + void \ + mulh##T1 (signed T1 *__restrict res, \ +signed T1 *__restrict l,\ +signed T1 *__restrict r)\ + { \ +for (int i = 0; i < N; ++i) \ + res[i] = (signed T1) (((signed T2)l[i] * (signed T2)r[i]) >> S); \ + } \ +\ + void
[gcc] Created branch 'mikael/heads/base_patch' in namespace 'refs/users'
The branch 'mikael/heads/base_patch' was created in namespace 'refs/users' pointing to: 8f05d8056216... Correction array_constructor_1
[gcc] Created branch 'mikael/heads/gimple_simulate_v01' in namespace 'refs/users'
The branch 'mikael/heads/gimple_simulate_v01' was created in namespace 'refs/users' pointing to: 8b7bf0d4fa6a... Prise en charge affichage TARGET_MEM_REF
[gcc] Deleted branch 'mikael/heads/stabilisation_descriptor_v01' in namespace 'refs/users'
The branch 'mikael/heads/stabilisation_descriptor_v01' in namespace 'refs/users' was deleted. It previously pointed to: 2865e8dcb340... Essai simplification évaluation Diff: !!! WARNING: THE FOLLOWING COMMITS ARE NO LONGER ACCESSIBLE (LOST): --- 2865e8d... Essai simplification évaluation f09bd1d... Ajout commentaire 1353289... Déplacement variables après réallocation 2ef2bbc... fortran: generate array reallocation out of loops
[gcc] Created branch 'mikael/heads/stabilisation_descriptor_v01' in namespace 'refs/users'
The branch 'mikael/heads/stabilisation_descriptor_v01' was created in namespace 'refs/users' pointing to: d53ac098d644... Suppression mise à jour delta.
[gcc(refs/users/mikael/heads/stabilisation_descriptor_v01)] Déplacement variables après réallocation
https://gcc.gnu.org/g:2647f123f1c984b6c780aaf5ae16129620af3e54 commit 2647f123f1c984b6c780aaf5ae16129620af3e54 Author: Mikael Morin Date: Mon Jul 7 11:46:08 2025 +0200 Déplacement variables après réallocation Sauvegarde data Renommage nom fonction. Diff: --- gcc/fortran/gfortran.h | 4 -- gcc/fortran/trans-array.cc | 167 ++--- gcc/fortran/trans-expr.cc | 14 ++-- 3 files changed, 102 insertions(+), 83 deletions(-) diff --git a/gcc/fortran/gfortran.h b/gcc/fortran/gfortran.h index 6848bd1762d3..69367e638c5b 100644 --- a/gcc/fortran/gfortran.h +++ b/gcc/fortran/gfortran.h @@ -2028,10 +2028,6 @@ typedef struct gfc_symbol /* Set if this should be passed by value, but is not a VALUE argument according to the Fortran standard. */ unsigned pass_as_value:1; - /* Set if an allocatable array variable has been allocated in the current - scope. Used in the suppression of uninitialized warnings in reallocation - on assignment. */ - unsigned allocated_in_scope:1; /* Set if an external dummy argument is called with different argument lists. This is legal in Fortran, but can cause problems with autogenerated C prototypes for C23. */ diff --git a/gcc/fortran/trans-array.cc b/gcc/fortran/trans-array.cc index 7be2d7b11a62..3cd6d90f47e7 100644 --- a/gcc/fortran/trans-array.cc +++ b/gcc/fortran/trans-array.cc @@ -3420,6 +3420,23 @@ gfc_add_loop_ss_code (gfc_loopinfo * loop, gfc_ss * ss, bool subscript, } +/* Given an array descriptor expression DESCR and its data pointer DATA, decide + whether to either save the data pointer to a variable and use the variable or + use the data pointer expression directly without any intermediary variable. + */ + +static bool +save_descriptor_data (tree descr, tree data) +{ + return !(DECL_P (data) + || (TREE_CODE (data) == ADDR_EXPR + && DECL_P (TREE_OPERAND (data, 0))) + || (GFC_DESCRIPTOR_TYPE_P (TREE_TYPE (descr)) + && TREE_CODE (descr) == COMPONENT_REF + && GFC_CLASS_TYPE_P (TREE_TYPE (TREE_OPERAND (descr, 0); +} + + /* Translate expressions for the descriptor and data pointer of a SS. */ /*GCC ARRAYS*/ @@ -3466,17 +3483,14 @@ gfc_conv_ss_descriptor (stmtblock_t * block, gfc_ss * ss, int base) Otherwise we must evaluate it now to avoid breaking dependency analysis by pulling the expressions for elemental array indices inside the loop. */ - if (!(DECL_P (tmp) - || (TREE_CODE (tmp) == ADDR_EXPR - && DECL_P (TREE_OPERAND (tmp, 0))) - || (GFC_DESCRIPTOR_TYPE_P (TREE_TYPE (se.expr)) - && TREE_CODE (se.expr) == COMPONENT_REF - && GFC_CLASS_TYPE_P (TREE_TYPE (TREE_OPERAND (se.expr, 0)) + if (save_descriptor_data (se.expr, tmp) && !ss->is_alloc_lhs) tmp = gfc_evaluate_now (tmp, block); info->data = tmp; tmp = gfc_conv_array_offset (se.expr); - info->offset = gfc_evaluate_now (tmp, block); + if (!ss->is_alloc_lhs) + tmp = gfc_evaluate_now (tmp, block); + info->offset = tmp; /* Make absolutely sure that the saved_offset is indeed saved so that the variable is still accessible after the loops @@ -4769,13 +4783,12 @@ gfc_trans_scalarized_loop_boundary (gfc_loopinfo * loop, stmtblock_t * body) static void evaluate_bound (stmtblock_t *block, tree *bounds, gfc_expr ** values, - tree desc, int dim, bool lbound, bool deferred) + tree desc, int dim, bool lbound, bool deferred, bool save_value) { gfc_se se; gfc_expr * input_val = values[dim]; tree *output = &bounds[dim]; - if (input_val) { /* Specified section bound. */ @@ -4801,7 +4814,8 @@ evaluate_bound (stmtblock_t *block, tree *bounds, gfc_expr ** values, *output = lbound ? gfc_conv_array_lbound (desc, dim) : gfc_conv_array_ubound (desc, dim); } - *output = gfc_evaluate_now (*output, block); + if (save_value) +*output = gfc_evaluate_now (*output, block); } @@ -4834,18 +4848,18 @@ gfc_conv_section_startstride (stmtblock_t * block, gfc_ss * ss, int dim) || ar->dimen_type[dim] == DIMEN_THIS_IMAGE); desc = info->descriptor; stride = ar->stride[dim]; - + bool save_value = !ss->is_alloc_lhs; /* Calculate the start of the range. For vector subscripts this will be the range of the vector. */ evaluate_bound (block, info->start, ar->start, desc, dim, true, - ar->as->type == AS_DEFERRED); + ar->as->type == AS_DEFERRED, save_value); /* Similarly calculate the end. Although this is not used in the scalarizer, it is needed when checking bounds and where the end is an expression with side-effects. */ evaluate_bound (block, info->end, ar->end, desc, dim, false, - ar->as->type == AS_DEFERRED); +
[gcc(refs/users/mikael/heads/stabilisation_descriptor_v01)] fortran: generate array reallocation out of loops
https://gcc.gnu.org/g:c7c760c314a9674b2f0a3665c8a0d1e0db99694f commit c7c760c314a9674b2f0a3665c8a0d1e0db99694f Author: Mikael Morin Date: Sun Jul 6 16:56:16 2025 +0200 fortran: generate array reallocation out of loops Generate the array reallocation on assignment code before entering the scalarization loops. This doesn't move the generated code itself, which was already put out of the outermost loop, but only changes the current scope at the time the code is generated. This is a prerequisite for a followup patch that makes the reallocation code create new variables. Without this change the new variables would be declared in the innermost loop body and couldn't be used outside of it. gcc/fortran/ChangeLog: * trans-expr.cc (gfc_trans_assignment_1): Generate array reallocation code before entering the scalarisation loops. Diff: --- gcc/fortran/trans-expr.cc | 20 +++- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/gcc/fortran/trans-expr.cc b/gcc/fortran/trans-expr.cc index 3e0d763d2fb0..65d0ee4ff235 100644 --- a/gcc/fortran/trans-expr.cc +++ b/gcc/fortran/trans-expr.cc @@ -12943,6 +12943,7 @@ gfc_trans_assignment_1 (gfc_expr * expr1, gfc_expr * expr2, bool init_flag, rhs_caf_attr = gfc_caf_attr (expr2, false, &rhs_refs_comp); } + tree reallocation = NULL_TREE; if (lss != gfc_ss_terminator) { /* The assignment needs scalarization. */ @@ -13011,6 +13012,14 @@ gfc_trans_assignment_1 (gfc_expr * expr1, gfc_expr * expr2, bool init_flag, ompws_flags |= OMPWS_SCALARIZER_WS | OMPWS_SCALARIZER_BODY; } + /* F2003: Allocate or reallocate lhs of allocatable array. */ + if (realloc_flag) + { + realloc_lhs_warning (expr1->ts.type, true, &expr1->where); + ompws_flags &= ~OMPWS_SCALARIZER_WS; + reallocation = gfc_alloc_allocatable_for_assignment (&loop, expr1, expr2); + } + /* Start the scalarized loop body. */ gfc_start_scalarized_body (&loop, &body); } @@ -13319,15 +13328,8 @@ gfc_trans_assignment_1 (gfc_expr * expr1, gfc_expr * expr2, bool init_flag, gfc_add_expr_to_block (&body, tmp); } - /* F2003: Allocate or reallocate lhs of allocatable array. */ - if (realloc_flag) - { - realloc_lhs_warning (expr1->ts.type, true, &expr1->where); - ompws_flags &= ~OMPWS_SCALARIZER_WS; - tmp = gfc_alloc_allocatable_for_assignment (&loop, expr1, expr2); - if (tmp != NULL_TREE) - gfc_add_expr_to_block (&loop.code[expr1->rank - 1], tmp); - } + if (reallocation != NULL_TREE) + gfc_add_expr_to_block (&loop.code[loop.dimen - 1], reallocation); if (maybe_workshare) ompws_flags &= ~OMPWS_SCALARIZER_BODY;
[gcc(refs/users/mikael/heads/stabilisation_descriptor_v01)] Simplification mise à jour descripteur
https://gcc.gnu.org/g:a0951d4448036d35343b534b58c85f96f76ce7e6 commit a0951d4448036d35343b534b58c85f96f76ce7e6 Author: Mikael Morin Date: Mon Jul 7 19:00:09 2025 +0200 Simplification mise à jour descripteur Diff: --- gcc/fortran/trans-array.cc | 28 +--- 1 file changed, 9 insertions(+), 19 deletions(-) diff --git a/gcc/fortran/trans-array.cc b/gcc/fortran/trans-array.cc index 832db1c3df4e..2d3ae44a8186 100644 --- a/gcc/fortran/trans-array.cc +++ b/gcc/fortran/trans-array.cc @@ -11375,36 +11375,26 @@ update_reallocated_descriptor (stmtblock_t *block, gfc_loopinfo *loop) gcc_assert (s->info->type == GFC_SS_SECTION); gfc_array_info *info = &s->info->data.array; - tree desc = info->descriptor; -#define UPDATE_VALUE(field, value) \ +#define UPDATE_VALUE(value) \ do \ { \ - if (false && (field) && VAR_P ((field))) \ - { \ - tree val = (value); \ - gfc_add_modify (block, (field), val); \ - } \ - else \ - (field) = gfc_evaluate_now ((field), block); \ + value = gfc_evaluate_now (value, block); \ } \ while (0) - if (save_descriptor_data (desc, info->data)) - UPDATE_VALUE (info->data, gfc_conv_descriptor_data_get (desc)); - UPDATE_VALUE (info->offset, gfc_conv_descriptor_offset_get (desc)); + if (save_descriptor_data (info->descriptor, info->data)) + UPDATE_VALUE (info->data); + UPDATE_VALUE (info->offset); info->saved_offset = info->offset; for (int i = 0; i < s->dimen; i++) { int dim = s->dim[i]; tree tree_dim = gfc_rank_cst[dim]; - UPDATE_VALUE (info->start[dim], - gfc_conv_descriptor_lbound_get (desc, tree_dim)); - UPDATE_VALUE (info->end[dim], - gfc_conv_descriptor_ubound_get (desc, tree_dim)); - UPDATE_VALUE (info->stride[dim], - gfc_conv_descriptor_stride_get (desc, tree_dim)); - info->delta[dim] = gfc_evaluate_now (info->delta[dim], block); + UPDATE_VALUE (info->start[dim]); + UPDATE_VALUE (info->end[dim]); + UPDATE_VALUE (info->stride[dim]); + UPDATE_VALUE (info->delta[dim]); } #undef UPDATE_VALUE
[gcc(refs/users/mikael/heads/stabilisation_descriptor_v01)] Essai simplification évaluation
https://gcc.gnu.org/g:5531dc0eabad3686bdc2c0513907a8b8a083953b commit 5531dc0eabad3686bdc2c0513907a8b8a083953b Author: Mikael Morin Date: Mon Jul 7 14:50:14 2025 +0200 Essai simplification évaluation Diff: --- gcc/fortran/trans-array.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/fortran/trans-array.cc b/gcc/fortran/trans-array.cc index abf535cdaedb..832db1c3df4e 100644 --- a/gcc/fortran/trans-array.cc +++ b/gcc/fortran/trans-array.cc @@ -11380,7 +11380,7 @@ update_reallocated_descriptor (stmtblock_t *block, gfc_loopinfo *loop) #define UPDATE_VALUE(field, value) \ do \ { \ - if ((field) && VAR_P ((field))) \ + if (false && (field) && VAR_P ((field))) \ { \ tree val = (value); \ gfc_add_modify (block, (field), val); \
[gcc(refs/users/mikael/heads/stabilisation_descriptor_v01)] Correction array_constructor_1
https://gcc.gnu.org/g:1f61e0b0fc176a4ccd40808feb8338c8f18cf558 commit 1f61e0b0fc176a4ccd40808feb8338c8f18cf558 Author: Mikael Morin Date: Sat Jul 5 15:05:20 2025 +0200 Correction array_constructor_1 Diff: --- gcc/testsuite/gfortran.dg/asan/array_constructor_1.f90 | 2 ++ 1 file changed, 2 insertions(+) diff --git a/gcc/testsuite/gfortran.dg/asan/array_constructor_1.f90 b/gcc/testsuite/gfortran.dg/asan/array_constructor_1.f90 index 45eafacd5a67..a0c55076a9ae 100644 --- a/gcc/testsuite/gfortran.dg/asan/array_constructor_1.f90 +++ b/gcc/testsuite/gfortran.dg/asan/array_constructor_1.f90 @@ -9,6 +9,8 @@ program grow_type_array type(container), allocatable :: list(:) +allocate(list(0)) + list = [list, new_elem(5)] deallocate(list)
[gcc] Deleted branch 'mikael/heads/gimple_simulate_v01' in namespace 'refs/users'
The branch 'mikael/heads/gimple_simulate_v01' in namespace 'refs/users' was deleted. It previously pointed to: 8e5da2bd03ad... gimple-simulate: Add a gimple IR interpreter/simulator
[gcc(refs/users/mikael/heads/stabilisation_descriptor_v01)] Suppression mise à jour delta.
https://gcc.gnu.org/g:d53ac098d6447299535c0748f0f69dd23bf39c2e commit d53ac098d6447299535c0748f0f69dd23bf39c2e Author: Mikael Morin Date: Mon Jul 7 19:01:09 2025 +0200 Suppression mise à jour delta. Diff: --- gcc/fortran/trans-array.cc | 11 --- 1 file changed, 11 deletions(-) diff --git a/gcc/fortran/trans-array.cc b/gcc/fortran/trans-array.cc index 2d3ae44a8186..fcc9daa893d1 100644 --- a/gcc/fortran/trans-array.cc +++ b/gcc/fortran/trans-array.cc @@ -11394,7 +11394,6 @@ update_reallocated_descriptor (stmtblock_t *block, gfc_loopinfo *loop) UPDATE_VALUE (info->start[dim]); UPDATE_VALUE (info->end[dim]); UPDATE_VALUE (info->stride[dim]); - UPDATE_VALUE (info->delta[dim]); } #undef UPDATE_VALUE @@ -11768,16 +11767,6 @@ gfc_alloc_allocatable_for_assignment (gfc_loopinfo *loop, tmp = gfc_conv_descriptor_offset (desc); gfc_add_modify (&fblock, tmp, offset); - /* Now set the deltas for the lhs. */ - for (n = 0; n < expr1->rank; n++) -{ - tmp = gfc_conv_descriptor_lbound_get (desc, gfc_rank_cst[n]); - dim = lss->dim[n]; - tmp = fold_build2_loc (input_location, MINUS_EXPR, -gfc_array_index_type, tmp, -loop->from[dim]); -} - /* Take into account _len of unlimited polymorphic entities, so that span for array descriptors and allocation sizes are computed correctly. */ if (UNLIMITED_POLY (expr2))
[gcc(refs/users/mikael/heads/stabilisation_descriptor_v01)] Ajout commentaire
https://gcc.gnu.org/g:5e187ee28b94066c3f7a3ef573d2d0c55d0e1f97 commit 5e187ee28b94066c3f7a3ef573d2d0c55d0e1f97 Author: Mikael Morin Date: Mon Jul 7 14:38:51 2025 +0200 Ajout commentaire Diff: --- gcc/fortran/trans-array.cc | 13 + 1 file changed, 13 insertions(+) diff --git a/gcc/fortran/trans-array.cc b/gcc/fortran/trans-array.cc index 3cd6d90f47e7..abf535cdaedb 100644 --- a/gcc/fortran/trans-array.cc +++ b/gcc/fortran/trans-array.cc @@ -11352,6 +11352,19 @@ concat_str_length (gfc_expr* expr) } +/* Among the scalarization chain of LOOP, find the element associated with an + allocatable array on the lhs of an assignment and evaluate its fields + (bounds, offset, etc) to new variables, putting the new code in BLOCK. This + function is to be called after putting the reallocation code in BLOCK and + before the beginning of the scalarization loop body. + + The fields to be saved are expected to hold on entry to the function + expressions referencing the array descriptor. Especially the expressions + shouldn't be already temporary variable references as the value saved before + reallocation would be incorrect after reallocation. + At the end of the function, the expressions have been replaced with variable + references. */ + static void update_reallocated_descriptor (stmtblock_t *block, gfc_loopinfo *loop) {
[gcc(refs/users/mikael/heads/stabilisation_descriptor_v01)] Prise en charge affichage TARGET_MEM_REF
https://gcc.gnu.org/g:4d9ba90bdfabf26d022c80d07f1e8c221e38ceec commit 4d9ba90bdfabf26d022c80d07f1e8c221e38ceec Author: Mikael Morin Date: Mon Jul 7 08:52:38 2025 +0200 Prise en charge affichage TARGET_MEM_REF Diff: --- gcc/gimple-simulate.cc| 87 --- gcc/selftest-run-tests.cc | 2 ++ gcc/selftest.h| 1 + 3 files changed, 86 insertions(+), 4 deletions(-) diff --git a/gcc/gimple-simulate.cc b/gcc/gimple-simulate.cc index aa29b68b748c..a85e6f63cc92 100644 --- a/gcc/gimple-simulate.cc +++ b/gcc/gimple-simulate.cc @@ -903,6 +903,9 @@ static tree find_mem_ref_replacement (simul_scope & context, tree data_ref, unsigned offset, unsigned min_size) { + gcc_assert (TREE_CODE (data_ref) == MEM_REF + || TREE_CODE (data_ref) == TARGET_MEM_REF); + tree ptr = TREE_OPERAND (data_ref, 0); data_value ptr_val = context.evaluate (ptr); if (ptr_val.classify () != VAL_ADDRESS) @@ -923,12 +926,30 @@ find_mem_ref_replacement (simul_scope & context, tree data_ref, { tree access_offset = TREE_OPERAND (data_ref, 1); gcc_assert (TREE_CONSTANT (access_offset)); - gcc_assert (tree_fits_shwi_p (access_offset)); - HOST_WIDE_INT shwi_offset = tree_to_shwi (access_offset); - gcc_assert (offset < UINT_MAX - shwi_offset); - HOST_WIDE_INT remaining_offset = shwi_offset * CHAR_BIT + gcc_assert (tree_fits_uhwi_p (access_offset)); + HOST_WIDE_INT uhwi_offset = tree_to_uhwi (access_offset); + gcc_assert (offset < UINT_MAX - uhwi_offset); + HOST_WIDE_INT remaining_offset = uhwi_offset * CHAR_BIT + offset + ptr_address->offset; + if (TREE_CODE (data_ref) == TARGET_MEM_REF) + { + tree idx = TREE_OPERAND (data_ref, 2); + data_value idx_val = context.evaluate (idx); + gcc_assert (idx_val.classify () == VAL_KNOWN); + wide_int wi_idx = idx_val.get_known (); + + tree step = TREE_OPERAND (data_ref, 3); + data_value step_val = context.evaluate (step); + gcc_assert (step_val.classify () == VAL_KNOWN); + wide_int wi_step = step_val.get_known (); + + wi_idx *= wi_step; + gcc_assert (wi::fits_uhwi_p (wi_idx)); + HOST_WIDE_INT idx_offset = wi_idx.to_uhwi (); + remaining_offset += idx_offset * CHAR_BIT; + } + return pick_subref_at (var_ref, remaining_offset, nullptr, min_size); } } @@ -957,6 +978,7 @@ context_printer::print_first_data_ref_part (simul_scope & context, switch (TREE_CODE (data_ref)) { case MEM_REF: +case TARGET_MEM_REF: { tree mem_replacement = find_mem_ref_replacement (context, data_ref, offset, min_size); @@ -4432,6 +4454,63 @@ context_printer_print_value_update_tests () printer9.print_value_update (ctx9, ref9, val9_addr_i); const char *str9 = pp_formatted_text (&pp9); ASSERT_STREQ (str9, "# v17c[8B:+8B] = &i\n"); + + + heap_memory mem10; + context_printer printer10; + pretty_printer & pp10 = printer10.pp; + pp_buffer (&pp10)->m_flush_p = false; + + tree a11c_10 = build_array_type_nelts (char_type_node, 11); + tree v11c_10 = create_var (a11c_10, "v11c"); + tree p_10 = create_var (ptr_type_node, "p"); + tree i_10 = create_var (size_type_node, "i"); + + vec decls10{}; + decls10.safe_push (v11c_10); + decls10.safe_push (p_10); + decls10.safe_push (i_10); + + context_builder builder10; + builder10.add_decls (&decls10); + simul_scope ctx10 = builder10.build (mem10, printer10); + + data_storage *strg10_v11 = ctx10.find_reachable_var (v11c_10); + gcc_assert (strg10_v11 != nullptr); + storage_address addr10_v11 (strg10_v11->get_ref (), 0); + + data_value val10_addr_v11 (ptr_type_node); + val10_addr_v11.set_address (addr10_v11); + + data_storage *strg10_p = ctx10.find_reachable_var (p_10); + gcc_assert (strg10_p != nullptr); + strg10_p->set (val10_addr_v11); + + data_value val10_cst_2 (size_type_node); + wide_int cst2_10 = wi::uhwi (2, TYPE_PRECISION (size_type_node)); + val10_cst_2.set_known (cst2_10); + + data_storage *strg10_i = ctx10.find_reachable_var (i_10); + gcc_assert (strg10_i != nullptr); + strg10_i->set (val10_cst_2); + + tree int_ptr_10 = build_pointer_type (integer_type_node); + + tree ref10 = build5 (TARGET_MEM_REF, integer_type_node, p_10, + build_int_cst (int_ptr_10, -4), i_10, + build_int_cst (size_type_node, 4), NULL_TREE); + + data_value val10_cst_13 (integer_type_node); + wide_int wi10_13 = wi::shwi (13, TYPE_PRECISION (integer_type_node)); + val10_cst_13.set_known (wi10_13); + + printer10.print_value_update (ctx10, ref10, val10_cst_13); + const char *str10 = pp_formatted_text (&pp10); + ASSERT_STREQ (str10, + "# v11c[4] = 13\n" + "# v11c[5] = 0\n" + "# v11c[6] = 0\n" +
[gcc r15-9935] Fortran: Ensure arguments in coarray call get unique components in add_data [PR120847]
https://gcc.gnu.org/g:67452737d8e6d2629104ac811eaf6ec8c1790614 commit r15-9935-g67452737d8e6d2629104ac811eaf6ec8c1790614 Author: Andre Vehreschild Date: Fri Jun 27 15:31:21 2025 +0200 Fortran: Ensure arguments in coarray call get unique components in add_data [PR120847] PR fortran/120847 gcc/fortran/ChangeLog: * coarray.cc (check_add_new_comp_handle_array): Make the count of components static to be able to create more than one. Create an array component only for array expressions. gcc/testsuite/ChangeLog: * gfortran.dg/coarray/coindexed_7.f90: New test. (cherry picked from commit ee31ab9b1950b7f47f030bda231ace34d187ae26) Diff: --- gcc/fortran/coarray.cc| 4 ++-- gcc/testsuite/gfortran.dg/coarray/coindexed_7.f90 | 24 +++ 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/gcc/fortran/coarray.cc b/gcc/fortran/coarray.cc index 70583254d0d8..a2cfdc0fb52c 100644 --- a/gcc/fortran/coarray.cc +++ b/gcc/fortran/coarray.cc @@ -498,7 +498,7 @@ check_add_new_comp_handle_array (gfc_expr *e, gfc_symbol *type, gfc_symbol *add_data) { gfc_component *comp; - int cnt = -1; + static int cnt = -1; gfc_symtree *caller_image; gfc_code *pre_code = caf_accessor_prepend; bool static_array_or_scalar = true; @@ -561,7 +561,7 @@ check_add_new_comp_handle_array (gfc_expr *e, gfc_symbol *type, else { comp->initializer = gfc_copy_expr (e); - if (e_attr.dimension) + if (e_attr.dimension && e->rank) { comp->attr.dimension = 1; comp->as = get_arrayspec_from_expr (e); diff --git a/gcc/testsuite/gfortran.dg/coarray/coindexed_7.f90 b/gcc/testsuite/gfortran.dg/coarray/coindexed_7.f90 new file mode 100644 index ..066397024f47 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/coarray/coindexed_7.f90 @@ -0,0 +1,24 @@ +!{ dg-do compile } + +! Check PR120847 is fixed. + +program p + implicit none + + type T +integer, allocatable :: i(:, :) [:] + end type T + + type(T) :: o + integer, allocatable :: c[:] + integer :: i + + c = 7 + + allocate(o%i(4, 5)[*], source=6) + + do i = 1, 4 +c = o%i(mod(i, 2), mod(i, 3))[1] + end do + +end program p
[gcc r15-9934] Fortran: Fix non-conformable corank on this_image ref [PR120843]
https://gcc.gnu.org/g:887ddb4d8c3ddd27c3a5cfd79f21dd52403c82fa commit r15-9934-g887ddb4d8c3ddd27c3a5cfd79f21dd52403c82fa Author: Andre Vehreschild Date: Fri Jun 27 14:39:13 2025 +0200 Fortran: Fix non-conformable corank on this_image ref [PR120843] PR fortran/120843 gcc/fortran/ChangeLog: * resolve.cc (resolve_operator): Report inconsistent coranks only when not referencing this_image. (gfc_op_rank_conformable): Treat coranks as inconformable only when a coindex other then implicit this_image is used. gcc/testsuite/ChangeLog: * gfortran.dg/coarray/coindexed_6.f90: New test. (cherry picked from commit 1b0930e9046e0b6201fa03c2843f3b06e522acd1) Diff: --- gcc/fortran/resolve.cc| 7 --- gcc/testsuite/gfortran.dg/coarray/coindexed_6.f90 | 17 + 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/gcc/fortran/resolve.cc b/gcc/fortran/resolve.cc index ee5b22a728d4..2fbe7c451428 100644 --- a/gcc/fortran/resolve.cc +++ b/gcc/fortran/resolve.cc @@ -4828,7 +4828,8 @@ resolve_operator (gfc_expr *e) if (e->shape == NULL) e->shape = gfc_copy_shape (op2->shape, op2->corank); } - else + else if ((op1->ref && !gfc_ref_this_image (op1->ref)) + || (op2->ref && !gfc_ref_this_image (op2->ref))) { gfc_error ("Inconsistent coranks for operator at %L and %L", &op1->where, &op2->where); @@ -6070,8 +6071,8 @@ gfc_op_rank_conformable (gfc_expr *op1, gfc_expr *op2) gfc_expression_rank (op2); return (op1->rank == 0 || op2->rank == 0 || op1->rank == op2->rank) -&& (op1->corank == 0 || op2->corank == 0 -|| op1->corank == op2->corank); +&& (op1->corank == 0 || op2->corank == 0 || op1->corank == op2->corank +|| (!gfc_is_coindexed (op1) && !gfc_is_coindexed (op2))); } /* Resolve a variable expression. */ diff --git a/gcc/testsuite/gfortran.dg/coarray/coindexed_6.f90 b/gcc/testsuite/gfortran.dg/coarray/coindexed_6.f90 new file mode 100644 index ..8f5dcabb859a --- /dev/null +++ b/gcc/testsuite/gfortran.dg/coarray/coindexed_6.f90 @@ -0,0 +1,17 @@ +!{ dg-do compile } + +! Check PR120843 is fixed + +program p + implicit none + + integer, allocatable :: arr(:,:) [:,:] + integer :: c[*] + + c = 7 + + allocate(arr(4,3)[2,*], source=6) + + if (arr(2,2)* c /= 42) stop 1 + +end program p
[gcc r16-2087] Allow the target to request a masked vector epilogue
https://gcc.gnu.org/g:df64d099faf843d90e8fe29aec17d84277986ee9 commit r16-2087-gdf64d099faf843d90e8fe29aec17d84277986ee9 Author: Richard Biener Date: Sun May 25 19:28:54 2025 +0200 Allow the target to request a masked vector epilogue Targets recently got the ability to request the vector mode to be used for a vector epilogue (or the epilogue of a vector epilogue). The following adds the ability for it to indicate the epilogue should use loop masking, irrespective of the --param vect-partial-vector-usage default setting. The patch below uses a separate flag from the epilogue mode, not addressing the issue that on x86 the vector_modes mode iteration hook would not allow for both masked and unmasked variants to be tried and costed given this doesn't naturally map to modes on that target. That's left for a future exercise - turning on cost comparison for the x86 backend would be a prerequesite there. * tree-vectorizer.h (vector_costs::suggested_epilogue_mode): Add masked output parameter and return m_masked_epilogue. (vector_costs::m_masked_epilogue): New tristate flag. (vector_costs::vector_costs): Initialize m_masked_epilogue. * tree-vect-loop.cc (vect_analyze_loop_1): Pass in masked flag to optionally initialize can_use_partial_vectors_p. (vect_analyze_loop): For epilogues also get whether to use a masked epilogue for this loop from the target and use that for the first epilogue mode we try. Diff: --- gcc/tree-vect-loop.cc | 35 ++- gcc/tree-vectorizer.h | 13 ++--- 2 files changed, 36 insertions(+), 12 deletions(-) diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc index 2782d61a5fc2..d5044d5fe227 100644 --- a/gcc/tree-vect-loop.cc +++ b/gcc/tree-vect-loop.cc @@ -58,6 +58,7 @@ along with GCC; see the file COPYING3. If not see #include "tree-eh.h" #include "case-cfn-macros.h" #include "langhooks.h" +#include "opts.h" /* Loop Vectorization Pass. @@ -3400,8 +3401,10 @@ vect_joust_loop_vinfos (loop_vec_info new_loop_vinfo, } /* Analyze LOOP with VECTOR_MODES[MODE_I] and as epilogue if ORIG_LOOP_VINFO is - not NULL. Set AUTODETECTED_VECTOR_MODE if VOIDmode and advance - MODE_I to the next mode useful to analyze. + not NULL. When MASKED_P is not -1 override the default + LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P with it. + Set AUTODETECTED_VECTOR_MODE if VOIDmode and advance MODE_I to the next + mode useful to analyze. Return the loop_vinfo on success and wrapped null on failure. */ static opt_loop_vec_info @@ -3409,6 +3412,7 @@ vect_analyze_loop_1 (class loop *loop, vec_info_shared *shared, const vect_loop_form_info *loop_form_info, loop_vec_info orig_loop_vinfo, const vector_modes &vector_modes, unsigned &mode_i, +int masked_p, machine_mode &autodetected_vector_mode, bool &fatal) { @@ -3417,6 +3421,8 @@ vect_analyze_loop_1 (class loop *loop, vec_info_shared *shared, machine_mode vector_mode = vector_modes[mode_i]; loop_vinfo->vector_mode = vector_mode; + if (masked_p != -1) +loop_vinfo->can_use_partial_vectors_p = masked_p; unsigned int suggested_unroll_factor = 1; unsigned slp_done_for_suggested_uf = 0; @@ -3600,7 +3606,7 @@ vect_analyze_loop (class loop *loop, gimple *loop_vectorized_call, cached_vf_per_mode[last_mode_i] = -1; opt_loop_vec_info loop_vinfo = vect_analyze_loop_1 (loop, shared, &loop_form_info, - NULL, vector_modes, mode_i, + NULL, vector_modes, mode_i, -1, autodetected_vector_mode, fatal); if (fatal) break; @@ -3685,18 +3691,21 @@ vect_analyze_loop (class loop *loop, gimple *loop_vectorized_call, array may contain length-agnostic and length-specific modes. Their ordering is not guaranteed, so we could end up picking a mode for the main loop that is after the epilogue's optimal mode. */ + int masked_p = -1; if (!unlimited_cost_model (loop) - && first_loop_vinfo->vector_costs->suggested_epilogue_mode () != VOIDmode) + && (first_loop_vinfo->vector_costs->suggested_epilogue_mode (masked_p) + != VOIDmode)) { vector_modes[0] - = first_loop_vinfo->vector_costs->suggested_epilogue_mode (); + = first_loop_vinfo->vector_costs->suggested_epilogue_mode (masked_p); cached_vf_per_mode[0] = 0; } else vector_modes[0] = autodetected_vector_mode; mode_i = 0; - bool supports_partial_vectors = param_vect_partial_vector_usage != 0; + bool supports_partial_vectors = (param_vect_partial_vector_usage != 0 + || masked_p == 1); machine_mode mask_mode; if (support
[gcc r16-2088] add masked-epilogue tuning
https://gcc.gnu.org/g:e9079e4f43d13579c41110ce1871051a43c577b6 commit r16-2088-ge9079e4f43d13579c41110ce1871051a43c577b6 Author: Richard Biener Date: Sun May 25 19:29:04 2025 +0200 add masked-epilogue tuning The following adds a x86 tuning to enable the use of AVX512 masked epilogues in cases we heuristically determine it to be not detrimental by high chance. Basically problematic cases are when there are data streams that are both stored and loaded from and an outer loop could end up executing only the inner loop masked epilogue and with unlucky data stream advacement from the outer loop end up needing to forward from masked stores to masked loads. This isn't very well handled, esp. for the case where unmasked operations would not need to forward at all - that is, when forwarding completely from the masked out portion of the store (like the AVX upper half to the AVX lower half of a load). There's also the case where the number of iterations is known at compile time, only with cost comparing we'd consider a non-masked epilog - as we are not doing that we have to add heuristics to avoid masking when a single vector epilog iteration would cover all scalar iterations left (this is exercised by gcc.target/i386/pr110310.c). SPEC CPU 2017 shows 3% text size savings over not using masked epilogues with performance impact in the noise. Masking all vector epilogues gets that to 4% text size savings with some major runtime regressions in 503.bwaves_r and 527.cam4_r (measured on a Zen4 system), we're leaving a 5% improvement for 549.fotonik3d_r unrealized with the implemented heuristic. With the heuristics we turn 22513 vector epilogues + up to 12305 scalar epilogues into 12305 masked vector epilogues of which 574 are for AVX vector sizes, 79 for SSE vector sizes and the rest for AVX512. When masking all epilogues we get 14567 of them from 29467 vector + up to 14567 scalar epilogues, so the heuristics disable an additional 20% of masked epilogues. * config/i386/x86-tune.def (X86_TUNE_AVX512_MASKED_EPILOGUES): New tunable, default on for m_ZNVER4 and m_ZNVER5. * config/i386/i386.cc (ix86_vector_costs::finish_cost): With X86_TUNE_AVX512_MASKED_EPILOGUES and when the main loop had a vectorization factor > 2 use a masked epilogue when possible and when not obviously problematic. * gcc.target/i386/vect-mask-epilogue-1.c: New testcase. * gcc.target/i386/vect-mask-epilogue-2.c: Likewise. * gcc.target/i386/vect-epilogues-3.c: Adjust. Diff: --- gcc/config/i386/i386.cc| 59 ++ gcc/config/i386/x86-tune.def | 5 ++ gcc/testsuite/gcc.target/i386/vect-epilogues-3.c | 2 +- .../gcc.target/i386/vect-mask-epilogue-1.c | 11 .../gcc.target/i386/vect-mask-epilogue-2.c | 14 + 5 files changed, 90 insertions(+), 1 deletion(-) diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index fd3f35de14d3..ad7360ec71a4 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -26295,6 +26295,65 @@ ix86_vector_costs::finish_cost (const vector_costs *scalar_costs) && LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant () >= 16) m_suggested_epilogue_mode = V8QImode; + /* When X86_TUNE_AVX512_MASKED_EPILOGUES is enabled try to use + a masked epilogue if that doesn't seem detrimental. */ + if (loop_vinfo + && !LOOP_VINFO_EPILOGUE_P (loop_vinfo) + && LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant () > 2 + && ix86_tune_features[X86_TUNE_AVX512_MASKED_EPILOGUES] + && !OPTION_SET_P (param_vect_partial_vector_usage)) +{ + bool avoid = false; + if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) + && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) >= 0) + { + unsigned int peel_niter + = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo); + if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)) + peel_niter += 1; + /* When we know the number of scalar iterations of the epilogue, +avoid masking when a single vector epilog iteration handles +it in full. */ + if (pow2p_hwi ((LOOP_VINFO_INT_NITERS (loop_vinfo) - peel_niter) +% LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant ())) + avoid = true; + } + if (!avoid && loop_outer (loop_outer (LOOP_VINFO_LOOP (loop_vinfo + for (auto ddr : LOOP_VINFO_DDRS (loop_vinfo)) + { + if (DDR_ARE_DEPENDENT (ddr) == chrec_known) + ; + else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know) + ; + else + { + int loop_depth + = index_in_loop_nest (LOOP_VINFO_LOOP (loop_vinfo)->num, +
[gcc r16-2089] libstdc++: Document that LWG 3881 is resolved, by using different apporach.
https://gcc.gnu.org/g:a10723efcd611977c5e4a875b9cb1de920732d42 commit r16-2089-ga10723efcd611977c5e4a875b9cb1de920732d42 Author: Tomasz Kamiński Date: Tue Jul 8 09:30:06 2025 +0200 libstdc++: Document that LWG 3881 is resolved, by using different apporach. libstdc++-v3/ChangeLog: * include/std/queue (formatter, _CharT>) (formatter, _CharT>): Add _GLIBCXX_RESOLVE_LIB_DEFECTS comments. Diff: --- libstdc++-v3/include/std/queue | 10 -- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/libstdc++-v3/include/std/queue b/libstdc++-v3/include/std/queue index 90525897da75..1b76088b31b3 100644 --- a/libstdc++-v3/include/std/queue +++ b/libstdc++-v3/include/std/queue @@ -105,7 +105,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION { return _M_f.format(__a.c, __fc); } private: - // Standard uses formatter, _CharT>. + // Standard uses formatter, _CharT>, but range_formatter + // provides same behavior. + // _GLIBCXX_RESOLVE_LIB_DEFECTS + // 3881. Incorrect formatting of container adapters backed by std::string range_formatter<_Tp, _CharT> _M_f; }; @@ -136,7 +139,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION { return _M_f.format(__a.c, __fc); } private: - // Standard uses formatter, _CharT>. + // Standard uses formatter, _CharT>, but range_formatter + // provides same behavior. + // _GLIBCXX_RESOLVE_LIB_DEFECTS + // 3881. Incorrect formatting of container adapters backed by std::string range_formatter<_Tp, _CharT> _M_f; };
[gcc] Created branch 'gaius/heads/m2wideset' in namespace 'refs/users'
The branch 'gaius/heads/m2wideset' was created in namespace 'refs/users' pointing to: 2fd6f42c17a8... libstdc++: Make debug iterator pointer sequence const [PR11
[gcc r16-2085] tree-optimization/120358 - bogus PTA with structure access
https://gcc.gnu.org/g:aa5ae523e84a97bf3a582ea0fa73d959afa9b9c7 commit r16-2085-gaa5ae523e84a97bf3a582ea0fa73d959afa9b9c7 Author: Richard Biener Date: Mon Jul 7 15:13:38 2025 +0200 tree-optimization/120358 - bogus PTA with structure access When we compute the constraint for something like MEM[(const struct QStringView &)&tok2 + 32] we go and compute what (const struct QStringView &)&tok2 + 32 points to and then add subvariables to its dereference that possibly fall in the range of the access according to the original refs size. In doing that we disregarded that the subvariable the starting address points to might not be aligned to it and thus the access might start at any point within that variable. The following conservatively adjusts the pruning of adjacent sub-variables to honor this. PR tree-optimization/120358 * tree-ssa-structalias.cc (get_constraint_for_1): Adjust pruning of sub-variables according to the imprecise known start offset. Diff: --- gcc/tree-ssa-structalias.cc | 5 - 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/gcc/tree-ssa-structalias.cc b/gcc/tree-ssa-structalias.cc index deca44ae0bf3..0215243d5be9 100644 --- a/gcc/tree-ssa-structalias.cc +++ b/gcc/tree-ssa-structalias.cc @@ -3690,7 +3690,10 @@ get_constraint_for_1 (tree t, vec *results, bool address_p, size = -1; for (; curr; curr = vi_next (curr)) { - if (curr->offset - vi->offset < size) + /* The start of the access might happen anywhere +within vi, so conservatively assume it was +at its end. */ + if (curr->offset - (vi->offset + vi->size - 1) < size) { cs.var = curr->id; results->safe_push (cs);
[gcc r15-9936] nr2.0: late: Correctly initialize funny_error member
https://gcc.gnu.org/g:7e3c677f96138547f27d10f748cdfa04b759de1e commit r15-9936-g7e3c677f96138547f27d10f748cdfa04b759de1e Author: Arthur Cohen Date: Mon Apr 28 21:45:13 2025 +0200 nr2.0: late: Correctly initialize funny_error member gcc/rust/ChangeLog: * resolve/rust-late-name-resolver-2.0.cc (Late::Late): False initialize the funny_error field. Diff: --- gcc/rust/resolve/rust-late-name-resolver-2.0.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/gcc/rust/resolve/rust-late-name-resolver-2.0.cc b/gcc/rust/resolve/rust-late-name-resolver-2.0.cc index f743e1e03f34..8702c8987d63 100644 --- a/gcc/rust/resolve/rust-late-name-resolver-2.0.cc +++ b/gcc/rust/resolve/rust-late-name-resolver-2.0.cc @@ -33,7 +33,9 @@ namespace Rust { namespace Resolver2_0 { -Late::Late (NameResolutionContext &ctx) : DefaultResolver (ctx) {} +Late::Late (NameResolutionContext &ctx) + : DefaultResolver (ctx), funny_error (false) +{} static NodeId next_node_id ()
[gcc r16-2107] libstdc++: Do not expose set_brackets/set_separator for formatter with format_kind other than sequen
https://gcc.gnu.org/g:70bd97e89ddf8fcb8c14e84a8fd580404536eeb1 commit r16-2107-g70bd97e89ddf8fcb8c14e84a8fd580404536eeb1 Author: Tomasz Kamiński Date: Tue Jul 8 10:04:41 2025 +0200 libstdc++: Do not expose set_brackets/set_separator for formatter with format_kind other than sequence [PR119861] The standard defines separate specializations of range-default-formatter, out of which only one for range_format::sequence provide the set_brackets and set_separator methods. We implemented it as one specialization and exposed this method for range_format other than string or debug_string, i.e. when range_formatter was used as underlying formatter. PR libstdc++/119861 libstdc++-v3/ChangeLog: * include/std/format (formatter<_Rg, _CharT>::set_separator) (formatter<_Rg, _CharT>::set_brackets): Constrain with (format_kind<_Rg> == range_format::sequence). * testsuite/std/format/ranges/pr119861_neg.cc: New test. Diff: --- libstdc++-v3/include/std/format| 4 +- .../testsuite/std/format/ranges/pr119861_neg.cc| 52 ++ 2 files changed, 54 insertions(+), 2 deletions(-) diff --git a/libstdc++-v3/include/std/format b/libstdc++-v3/include/std/format index 5749aa1995a9..d584b81c78a1 100644 --- a/libstdc++-v3/include/std/format +++ b/libstdc++-v3/include/std/format @@ -6030,13 +6030,13 @@ namespace __format constexpr void set_separator(basic_string_view<_CharT> __sep) noexcept - requires (!_S_range_format_is_string) + requires (format_kind<_Rg> == range_format::sequence) { _M_under.set_separator(__sep); } constexpr void set_brackets(basic_string_view<_CharT> __open, basic_string_view<_CharT> __close) noexcept - requires (!_S_range_format_is_string) + requires (format_kind<_Rg> == range_format::sequence) { _M_under.set_brackets(__open, __close); } // We deviate from standard, that declares this as template accepting diff --git a/libstdc++-v3/testsuite/std/format/ranges/pr119861_neg.cc b/libstdc++-v3/testsuite/std/format/ranges/pr119861_neg.cc new file mode 100644 index ..9a6ed16393ee --- /dev/null +++ b/libstdc++-v3/testsuite/std/format/ranges/pr119861_neg.cc @@ -0,0 +1,52 @@ +// { dg-do compile { target c++23 } } + +#include +#include + +// only format_kind::sequence provides set_brackets and set_separator methods + +template +struct MyCont : std::vector +{ + using std::vector::vector; +}; + +template +constexpr std::range_format std::format_kind> = fk; + +void test_sequence() +{ + std::formatter, char> fmtter; + fmtter.set_brackets("{", "}"); + fmtter.set_separator(","); +} + +void test_map() +{ + std::formatter>, char> fmtter; + fmtter.set_brackets("{", "}"); // { dg-error "here" } + fmtter.set_separator(","); // { dg-error "here" } +} + +void test_set() +{ + std::formatter, char> fmtter; + fmtter.set_brackets("{", "}"); // { dg-error "here" } + fmtter.set_separator(","); // { dg-error "here" } +} + +void test_string() +{ + std::formatter, char> fmtter; + fmtter.set_brackets("{", "}"); // { dg-error "here" } + fmtter.set_separator(","); // { dg-error "here" } +} + +void test_debug_string() +{ + std::formatter, char> fmtter; + fmtter.set_brackets("{", "}"); // { dg-error "here" } + fmtter.set_separator(","); // { dg-error "here" } +} + +// { dg-error "no matching function for call to 'std::formatter<" "" { target *-*-* } 0 }
[gcc r16-2095] libstdc++: Restructure mdspan tests to reuse IntLike.
https://gcc.gnu.org/g:45b81ebf7815e3cea15f6fb18e83a101a4c50fb3 commit r16-2095-g45b81ebf7815e3cea15f6fb18e83a101a4c50fb3 Author: Luc Grosheintz Date: Fri Jul 4 10:29:45 2025 +0200 libstdc++: Restructure mdspan tests to reuse IntLike. The class IntLike is used for testing extents with user-defined classes that convert to int. This commit places the class into a separate header file. This allows it to be reused across different parts of the mdspan related testsuite. libstdc++-v3/ChangeLog: * testsuite/23_containers/mdspan/extents/custom_integer.cc: Delete IntLike and include "int_like.h". * testsuite/23_containers/mdspan/extents/int_like.h: Add IntLike. Reviewed-by: Tomasz Kamiński Signed-off-by: Luc Grosheintz Diff: --- .../23_containers/mdspan/extents/custom_integer.cc | 27 +-- .../23_containers/mdspan/extents/int_like.h| 30 ++ 2 files changed, 31 insertions(+), 26 deletions(-) diff --git a/libstdc++-v3/testsuite/23_containers/mdspan/extents/custom_integer.cc b/libstdc++-v3/testsuite/23_containers/mdspan/extents/custom_integer.cc index 2907ad12ae72..404755bd5ac4 100644 --- a/libstdc++-v3/testsuite/23_containers/mdspan/extents/custom_integer.cc +++ b/libstdc++-v3/testsuite/23_containers/mdspan/extents/custom_integer.cc @@ -2,38 +2,13 @@ #include #include +#include "int_like.h" // Test construction from a custom integer-like object, that has // no copy/move ctor or copy/move assignment operator. constexpr size_t dyn = std::dynamic_extent; -class IntLike -{ -public: - explicit - IntLike(int i) - : _M_i(i) - { } - - IntLike() = delete; - IntLike(const IntLike&) = delete; - IntLike(IntLike&&) = delete; - - const IntLike& - operator=(const IntLike&) = delete; - - const IntLike& - operator=(IntLike&&) = delete; - - constexpr - operator int() const noexcept - { return _M_i; } - -private: - int _M_i; -}; - static_assert(std::is_convertible_v); static_assert(std::is_nothrow_constructible_v); diff --git a/libstdc++-v3/testsuite/23_containers/mdspan/extents/int_like.h b/libstdc++-v3/testsuite/23_containers/mdspan/extents/int_like.h new file mode 100644 index ..f39f4cc90816 --- /dev/null +++ b/libstdc++-v3/testsuite/23_containers/mdspan/extents/int_like.h @@ -0,0 +1,30 @@ +#ifndef TEST_MDSPAN_INT_LIKE_H +#define TEST_MDSPAN_INT_LIKE_H + +class IntLike +{ +public: + explicit + IntLike(int i) + : _M_i(i) + { } + + IntLike() = delete; + IntLike(const IntLike&) = delete; + IntLike(IntLike&&) = delete; + + const IntLike& + operator=(const IntLike&) = delete; + + const IntLike& + operator=(IntLike&&) = delete; + + constexpr + operator int() const noexcept + { return _M_i; } + +private: + int _M_i; +}; + +#endif // TEST_MDSPAN_INT_LIKE_H
[gcc r16-2096] libstdc++: Implement __mdspan::__size.
https://gcc.gnu.org/g:aa961cae42e23461887a6cf38aa47413b8425243 commit r16-2096-gaa961cae42e23461887a6cf38aa47413b8425243 Author: Luc Grosheintz Date: Fri Jul 4 10:29:46 2025 +0200 libstdc++: Implement __mdspan::__size. The current code uses __mdspan::__fwd_prod(__exts, __rank) to express computing the size of an extent. This commit adds an function __mdspan:: __size(__exts) to express the idea more directly. libstdc++-v3/ChangeLog: * include/std/mdspan (__mdspan::__size): New function. Reviewed-by: Tomasz Kamiński Signed-off-by: Luc Grosheintz Diff: --- libstdc++-v3/include/std/mdspan | 12 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/libstdc++-v3/include/std/mdspan b/libstdc++-v3/include/std/mdspan index d97fa22e4f03..b0d8088bb777 100644 --- a/libstdc++-v3/include/std/mdspan +++ b/libstdc++-v3/include/std/mdspan @@ -398,6 +398,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION __rev_prod(const _Extents& __exts, size_t __r) noexcept { return __exts_prod(__exts, __r + 1, __exts.rank()); } +template + constexpr typename _Extents::index_type + __size(const _Extents& __exts) noexcept + { return __fwd_prod(__exts, __exts.rank()); } + template auto __build_dextents_type(integer_sequence) -> extents<_IndexType, ((void) _Counts, dynamic_extent)...>; @@ -591,7 +596,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION constexpr index_type required_span_size() const noexcept - { return __mdspan::__fwd_prod(_M_extents, extents_type::rank()); } + { return __mdspan::__size(_M_extents); } template<__mdspan::__valid_index_type... _Indices> requires (sizeof...(_Indices) == extents_type::rank()) @@ -730,7 +735,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION constexpr index_type required_span_size() const noexcept - { return __mdspan::__fwd_prod(_M_extents, extents_type::rank()); } + { return __mdspan::__size(_M_extents); } template<__mdspan::__valid_index_type... _Indices> requires (sizeof...(_Indices) == extents_type::rank()) @@ -986,8 +991,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION { if constexpr (!is_always_exhaustive()) { - constexpr auto __rank = extents_type::rank(); - auto __size = __mdspan::__fwd_prod(_M_extents, __rank); + auto __size = __mdspan::__size(_M_extents); if(__size > 0) return __size == required_span_size(); }
[gcc r16-2099] [PATCH] riscv: allow zero in zacas subword atomic cas
https://gcc.gnu.org/g:3fd638a9e5497dfdf00f1783d6e704af03fb44b0 commit r16-2099-g3fd638a9e5497dfdf00f1783d6e704af03fb44b0 Author: Andreas Schwab Date: Tue Jul 8 07:32:17 2025 -0600 [PATCH] riscv: allow zero in zacas subword atomic cas gcc: PR target/120995 * config/riscv/sync.md (zacas_atomic_cas_value_strong): Allow op3 to be zero. gcc/testsuite: PR target/120995 * gcc.target/riscv/amo/zabha-zacas-atomic-cas.c: New test. Diff: --- gcc/config/riscv/sync.md| 2 +- gcc/testsuite/gcc.target/riscv/amo/zabha-zacas-atomic-cas.c | 11 +++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/gcc/config/riscv/sync.md b/gcc/config/riscv/sync.md index a75ea6834e46..50ec8b38f723 100644 --- a/gcc/config/riscv/sync.md +++ b/gcc/config/riscv/sync.md @@ -627,7 +627,7 @@ (match_operand:SHORT 1 "memory_operand" "+A")) ;; memory (set (match_dup 1) (unspec_volatile:SHORT [(match_operand:SHORT 2 "register_operand" "0") ;; expected_val - (match_operand:SHORT 3 "register_operand" "rJ") ;; desired_val + (match_operand:SHORT 3 "reg_or_0_operand" "rJ") ;; desired_val (match_operand:SI 4 "const_int_operand") ;; mod_s (match_operand:SI 5 "const_int_operand")] ;; mod_f UNSPEC_COMPARE_AND_SWAP))] diff --git a/gcc/testsuite/gcc.target/riscv/amo/zabha-zacas-atomic-cas.c b/gcc/testsuite/gcc.target/riscv/amo/zabha-zacas-atomic-cas.c new file mode 100644 index ..d3d84fd30882 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/amo/zabha-zacas-atomic-cas.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* PR target/120995 ICE unrecognized subword atomic cas */ +/* { dg-options "-O" } */ +/* { dg-add-options riscv_zacas } */ +/* { dg-add-options riscv_zabha } */ + +_Bool b; +void atomic_bool_cmpxchg() +{ + __sync_bool_compare_and_swap(&b, 1, 0); +}
[gcc r16-2092] Handle non default git prefix configurations
https://gcc.gnu.org/g:90f7df6194e3c687000fcf725e18532838881d2f commit r16-2092-g90f7df6194e3c687000fcf725e18532838881d2f Author: Pierre-Emmanuel Patry Date: Mon Jul 7 17:05:44 2025 +0200 Handle non default git prefix configurations Mklog parses the diff content from prepare-commit-msg hook but fails when git has been configured with mnemonicPrefix. Forcing the default values for the prefixes would set a distinct diff configuration supported by mklog and prevent most failures. contrib/ChangeLog: * prepare-commit-msg: Force default git prefixes. Signed-off-by: Pierre-Emmanuel Patry Diff: --- contrib/prepare-commit-msg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/prepare-commit-msg b/contrib/prepare-commit-msg index 1b878772dcc4..75d102559c78 100755 --- a/contrib/prepare-commit-msg +++ b/contrib/prepare-commit-msg @@ -78,4 +78,4 @@ else tee="cat" fi -git $cmd | $tee | git gcc-mklog -c "$COMMIT_MSG_FILE" +git $cmd --default-prefix | $tee | git gcc-mklog -c "$COMMIT_MSG_FILE"
[gcc r16-2094] libstdc++: Check prerequisite of extents::extents.
https://gcc.gnu.org/g:74ddf1792ae3538ee829f2c399dfcb75bfae8fd3 commit r16-2094-g74ddf1792ae3538ee829f2c399dfcb75bfae8fd3 Author: Luc Grosheintz Date: Fri Jul 4 10:29:44 2025 +0200 libstdc++: Check prerequisite of extents::extents. Previously the prerequisite of the extents ctors that static_extent(i) == dynamic_extent || extent(i) == other.extent(i). was not checked. This commit adds the __glibcxx_assert and test them. libstdc++-v3/ChangeLog: * include/std/mdspan (extents): Check prerequisite of the ctor that static_extent(i) == dynamic_extent || extent(i) == other.extent(i). * testsuite/23_containers/mdspan/extents/class_mandates_neg.cc: Test the implemented prerequisite. Reviewed-by: Tomasz Kamiński Signed-off-by: Luc Grosheintz Diff: --- libstdc++-v3/include/std/mdspan| 13 .../mdspan/extents/class_mandates_neg.cc | 2 ++ .../mdspan/extents/extents_mismatch_neg.cc | 35 ++ 3 files changed, 50 insertions(+) diff --git a/libstdc++-v3/include/std/mdspan b/libstdc++-v3/include/std/mdspan index 1fdcae634419..d97fa22e4f03 100644 --- a/libstdc++-v3/include/std/mdspan +++ b/libstdc++-v3/include/std/mdspan @@ -110,10 +110,23 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION return __se; } + template + static constexpr bool + _S_is_compatible_extents(_GetOtherExtent __get_extent) noexcept + { + if constexpr (_OtherRank == _S_rank) + for (size_t __i = 0; __i < _S_rank; ++__i) + if (_Extents[__i] != dynamic_extent + && !cmp_equal(_Extents[__i], _S_int_cast(__get_extent(__i + return false; + return true; + } + template constexpr void _M_init_dynamic_extents(_GetOtherExtent __get_extent) noexcept { + __glibcxx_assert(_S_is_compatible_extents<_OtherRank>(__get_extent)); for (size_t __i = 0; __i < _S_rank_dynamic; ++__i) { size_t __di = __i; diff --git a/libstdc++-v3/testsuite/23_containers/mdspan/extents/class_mandates_neg.cc b/libstdc++-v3/testsuite/23_containers/mdspan/extents/class_mandates_neg.cc index f9c1c0196669..67d18feda96c 100644 --- a/libstdc++-v3/testsuite/23_containers/mdspan/extents/class_mandates_neg.cc +++ b/libstdc++-v3/testsuite/23_containers/mdspan/extents/class_mandates_neg.cc @@ -7,6 +7,8 @@ std::extents e1; // { dg-error "from here" } std::extents e2; // { dg-error "from here" } std::extents e3; // { dg-error "from here" } std::extents e4; // { dg-error "from here" } + // { dg-prune-output "dynamic or representable as IndexType" } // { dg-prune-output "signed or unsigned integer" } // { dg-prune-output "invalid use of incomplete type" } +// { dg-prune-output "non-constant condition for static assertion" } diff --git a/libstdc++-v3/testsuite/23_containers/mdspan/extents/extents_mismatch_neg.cc b/libstdc++-v3/testsuite/23_containers/mdspan/extents/extents_mismatch_neg.cc new file mode 100644 index ..b35e5310d415 --- /dev/null +++ b/libstdc++-v3/testsuite/23_containers/mdspan/extents/extents_mismatch_neg.cc @@ -0,0 +1,35 @@ +// { dg-do compile { target c++23 } } +#include + +#include + +constexpr size_t dyn = std::dynamic_extent; + +constexpr bool +test_dyn2sta_extents_mismatch_00() +{ + auto e0 = std::extents{1}; + [[maybe_unused]] auto e1 = std::extents{e0};// { dg-error "expansion of" } + return true; +} +static_assert(test_dyn2sta_extents_mismatch_00());// { dg-error "expansion of" } + +constexpr bool +test_dyn2sta_extents_mismatch_01() +{ + [[maybe_unused]] auto e = std::extents{2, 2}; // { dg-error "expansion of" } + return true; +} +static_assert(test_dyn2sta_extents_mismatch_01()); // { dg-error "expansion of" } + +constexpr bool +test_dyn2sta_extents_mismatch_02() +{ + std::array exts{2, 2}; + [[maybe_unused]] auto e = std::extents{exts}; // { dg-error "expansion of" } + return true; +} +static_assert(test_dyn2sta_extents_mismatch_02()); // { dg-error "expansion of" } + +// { dg-prune-output "non-constant condition for static assertion" } +// { dg-prune-output "__glibcxx_assert" }