date:20250708

[gcc(refs/users/meissner/heads/work214-sha)] Revert changes

2025-07-08 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:8117a1d0926d3c0132e49138e909a7f3c2d3cee7

commit 8117a1d0926d3c0132e49138e909a7f3c2d3cee7
Author: Michael Meissner 
Date:   Wed Jul 9 01:21:14 2025 -0400

Revert changes

Diff:
---
 gcc/config/rs6000/fusion.md | 15 +--
 gcc/config/rs6000/genfusion.pl  | 58 +++--
 gcc/config/rs6000/predicates.md | 12 -
 gcc/config/rs6000/rs6000.md |  7 +
 4 files changed, 11 insertions(+), 81 deletions(-)

diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md
index d24837d68d83..621b346f9eb9 100644
--- a/gcc/config/rs6000/fusion.md
+++ b/gcc/config/rs6000/fusion.md
@@ -1871,23 +1871,20 @@
 ;; logical-logical fusion pattern generated by gen_logical_addsubf
 ;; vector vand -> vand
 (define_insn "*fuse_vand_vand"
-  [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v")
-(and:VM (and:VM (match_operand:VM 0 "vector_fusion_operand" 
"v,v,v,wa,v")
-  (match_operand:VM 1 "vector_fusion_operand" 
"%v,v,v,wa,v"))
- (match_operand:VM 2 "vector_fusion_operand" "v,v,v,wa,v")))
-   (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))]
+  [(set (match_operand:VM 3 "altivec_register_operand" "=&0,&1,&v,v")
+(and:VM (and:VM (match_operand:VM 0 "altivec_register_operand" 
"v,v,v,v")
+  (match_operand:VM 1 "altivec_register_operand" 
"%v,v,v,v"))
+ (match_operand:VM 2 "altivec_register_operand" "v,v,v,v")))
+   (clobber (match_scratch:VM 4 "=X,X,X,&v"))]
   "(TARGET_P10_FUSION)"
   "@
vand %3,%1,%0\;vand %3,%3,%2
vand %3,%1,%0\;vand %3,%3,%2
vand %3,%1,%0\;vand %3,%3,%2
-   xxeval %x3,%x2,%x1,%x0,1
vand %4,%1,%0\;vand %3,%4,%2"
   [(set_attr "type" "fused_vector")
(set_attr "cost" "6")
-   (set_attr "length" "8")
-   (set_attr "prefixed" "*,*,*,yes,*")
-   (set_attr "isa" "*,*,*,xxeval,*")])
+   (set_attr "length" "8")])
 
 ;; logical-logical fusion pattern generated by gen_logical_addsubf
 ;; vector vandc -> vand
diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl
index 351a4d914a4a..e5d3b1ee449d 100755
--- a/gcc/config/rs6000/genfusion.pl
+++ b/gcc/config/rs6000/genfusion.pl
@@ -211,33 +211,25 @@ sub gen_logical_addsubf
$inner_comp, $inner_inv, $inner_rtl, $inner_op, $both_commute, $c4,
$bc, $inner_arg0, $inner_arg1, $inner_exp, $outer_arg2, $outer_exp,
$ftype, $insn, $is_subf, $is_rsubf, $outer_32, $outer_42,$outer_name,
-   $fuse_type, $xxeval, $c5, $vect_pred, $vect_inner_arg0, 
$vect_inner_arg1,
-   $vect_inner_exp, $vect_outer_arg2, $vect_outer_exp);
-
-my %xxeval_fusions = (
-  "vand_vand"   =>   1,
-);
-
-KIND: foreach $kind ('scalar','vector') {
+   $fuse_type);
+  KIND: foreach $kind ('scalar','vector') {
   @outer_ops = @logicals;
   if ( $kind eq 'vector' ) {
  $vchr = "v";
  $mode = "VM";
  $pred = "altivec_register_operand";
- $vect_pred = "vector_fusion_operand";
  $constraint = "v";
  $fuse_type = "fused_vector";
   } else {
  $vchr = "";
  $mode = "GPR";
- $vect_pred = $pred = "gpc_reg_operand";
+ $pred = "gpc_reg_operand";
  $constraint = "r";
  $fuse_type = "fused_arith_logical";
  push (@outer_ops, @addsub);
  push (@outer_ops, ( "rsubf" ));
   }
   $c4 = "${constraint},${constraint},${constraint},${constraint}";
-  $c5 = "${constraint},${constraint},${constraint},wa,${constraint}";
 OUTER: foreach $outer ( @outer_ops ) {
$outer_name = "${vchr}${outer}";
$is_subf = ( $outer eq "subf" );
@@ -271,33 +263,23 @@ sub gen_logical_addsubf
  $bc = ""; if ( $both_commute ) { $bc = "%"; }
  $inner_arg0 = "(match_operand:${mode} 0 \"${pred}\" \"${c4}\")";
  $inner_arg1 = "(match_operand:${mode} 1 \"${pred}\" \"${bc}${c4}\")";
- $vect_inner_arg0 = "(match_operand:${mode} 0 \"${vect_pred}\" 
\"${c5}\")";
- $vect_inner_arg1 = "(match_operand:${mode} 1 \"${vect_pred}\" 
\"${bc}${c5}\")";
  if ( ($inner_comp & 1) == 1 ) {
  $inner_arg0 = "(not:${mode} $inner_arg0)";
- $vect_inner_arg0 = "(not:${mode} $vect_inner_arg0)";
  }
  if ( ($inner_comp & 2) == 2 ) {
  $inner_arg1 = "(not:${mode} $inner_arg1)";
- $vect_inner_arg1 = "(not:${mode} $vect_inner_arg1)";
  }
  $inner_exp = "(${inner_rtl}:${mode} ${inner_arg0}
   ${inner_arg1})";
- $vect_inner_exp = "(${inner_rtl}:${mode} ${vect_inner_arg0}
-  ${vect_inner_arg1})";
  if ( $inner_inv == 1 ) {
  $inner_exp = "(not:${mode} $inner_exp)";
- $vect_inner_exp = "(not:${mode} $vect_inner_exp)";
  }
  $outer_arg2 = "(match_operand:${mode} 2 \"${pred}\" \"${c4}\")";
- $vect_outer_arg2 = "(match_operand:${mode}

[gcc(refs/users/meissner/heads/work214-sha)] PR target/117251: Improve vector and to vector and fusion

2025-07-08 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:7b740ddfc2dbfb1d889e30806f2758b3c6fb7034

commit 7b740ddfc2dbfb1d889e30806f2758b3c6fb7034
Author: Michael Meissner 
Date:   Wed Jul 9 01:20:03 2025 -0400

PR target/117251: Improve vector and to vector and fusion

See the following post for a complete explanation of what the patches
for PR target/117251:

 * https://gcc.gnu.org/pipermail/gcc-patches/2025-June/686474.html

This is patch #1 of 45 to generate the 'XXEVAL' instruction on power10
and power11 instead of using the Altivec 'VAND' instruction feeding
into 'VAND'.  The 'XXEVAL' instruction can use all 64 vector registers,
instead of the 32 registers that traditional Altivec vector
instructions use.  By allowing all of the vector registers to be used,
it reduces the amount of spilling that a large benchmark generated.

Currently the following code:

vector int a, b, c, d;
a = (c & d) & b;

Generates:

vand   t,c,d
vand   a,t,b

Now in addition with this patch, if the arguments or result is
allocated to a traditional FPR register, the GCC compiler will now
generate the following code instead of adding vector move instructions:

xxeval a,b,c,1

Since fusion using 2 Altivec instructions is slightly faster than using
the 'XXEVAL' instruction we prefer to generate the Altivec instructions
if we can.  In addition, because 'XXEVAL' is a prefixed instruction, it
possibly might generate an extra NOP instruction to align the 'XXEVAL'
instruction.

I have tested these patches on both big endian and little endian
PowerPC servers, with no regressions.  Can I check these patchs into
the trunk?

2025-07-09  Michael Meissner  

gcc/

PR target/117251
* config/rs6000/fusion.md: Regenerate.
* config/rs6000/genfusion.pl (gen_logical_addsubf): Add
support to generate vector/vector and/and fusion if XXEVAL is
supported.
* config/rs6000/predicates.md (vector_fusion_operand): New
predicate.
* config/rs6000/rs6000.h (TARGET_XXEVAL): New macro.
* config/rs6000/rs6000.md (isa attribute): Add xxeval.
(enabled attribute): Add support for XXEVAL support.

Diff:
---
 gcc/config/rs6000/fusion.md | 15 ++-
 gcc/config/rs6000/genfusion.pl  | 58 ++---
 gcc/config/rs6000/predicates.md | 12 +
 gcc/config/rs6000/rs6000.md |  7 -
 4 files changed, 81 insertions(+), 11 deletions(-)

diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md
index 621b346f9eb9..d24837d68d83 100644
--- a/gcc/config/rs6000/fusion.md
+++ b/gcc/config/rs6000/fusion.md
@@ -1871,20 +1871,23 @@
 ;; logical-logical fusion pattern generated by gen_logical_addsubf
 ;; vector vand -> vand
 (define_insn "*fuse_vand_vand"
-  [(set (match_operand:VM 3 "altivec_register_operand" "=&0,&1,&v,v")
-(and:VM (and:VM (match_operand:VM 0 "altivec_register_operand" 
"v,v,v,v")
-  (match_operand:VM 1 "altivec_register_operand" 
"%v,v,v,v"))
- (match_operand:VM 2 "altivec_register_operand" "v,v,v,v")))
-   (clobber (match_scratch:VM 4 "=X,X,X,&v"))]
+  [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v")
+(and:VM (and:VM (match_operand:VM 0 "vector_fusion_operand" 
"v,v,v,wa,v")
+  (match_operand:VM 1 "vector_fusion_operand" 
"%v,v,v,wa,v"))
+ (match_operand:VM 2 "vector_fusion_operand" "v,v,v,wa,v")))
+   (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))]
   "(TARGET_P10_FUSION)"
   "@
vand %3,%1,%0\;vand %3,%3,%2
vand %3,%1,%0\;vand %3,%3,%2
vand %3,%1,%0\;vand %3,%3,%2
+   xxeval %x3,%x2,%x1,%x0,1
vand %4,%1,%0\;vand %3,%4,%2"
   [(set_attr "type" "fused_vector")
(set_attr "cost" "6")
-   (set_attr "length" "8")])
+   (set_attr "length" "8")
+   (set_attr "prefixed" "*,*,*,yes,*")
+   (set_attr "isa" "*,*,*,xxeval,*")])
 
 ;; logical-logical fusion pattern generated by gen_logical_addsubf
 ;; vector vandc -> vand
diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl
index e5d3b1ee449d..351a4d914a4a 100755
--- a/gcc/config/rs6000/genfusion.pl
+++ b/gcc/config/rs6000/genfusion.pl
@@ -211,25 +211,33 @@ sub gen_logical_addsubf
$inner_comp, $inner_inv, $inner_rtl, $inner_op, $both_commute, $c4,
$bc, $inner_arg0, $inner_arg1, $inner_exp, $outer_arg2, $outer_exp,
$ftype, $insn, $is_subf, $is_rsubf, $outer_32, $outer_42,$outer_name,
-   $fuse_type);
-  KIND: foreach $kind ('scalar','vector') {
+   $fuse_type, $xxeval, $c5, $vect_pred, $vect_inner_arg0, 
$vect_inner_arg1,
+   $vect_inner_exp, $vect_outer_arg2, $vect_outer_exp);
+
+my %xxeval_fusions = (
+  "vand_vand"   =>   1,
+);
+
+KIND: foreach $kind ('scalar','

[gcc(refs/users/mikael/heads/stabilisation_descriptor_v01)] Correction bootstrap

2025-07-08 Thread Mikael Morin via Gcc-cvs

https://gcc.gnu.org/g:c8d7942a9892c7a6207aad350ba706ac54cdd624

commit c8d7942a9892c7a6207aad350ba706ac54cdd624
Author: Mikael Morin 
Date:   Tue Jul 8 13:40:47 2025 +0200

Correction bootstrap

Diff:
---
 gcc/fortran/trans-array.cc | 1 -
 1 file changed, 1 deletion(-)

diff --git a/gcc/fortran/trans-array.cc b/gcc/fortran/trans-array.cc
index fcc9daa893d1..086863822375 100644
--- a/gcc/fortran/trans-array.cc
+++ b/gcc/fortran/trans-array.cc
@@ -11437,7 +11437,6 @@ gfc_alloc_allocatable_for_assignment (gfc_loopinfo 
*loop,
   tree lbd;
   tree class_expr2 = NULL_TREE;
   int n;
-  int dim;
   gfc_array_spec * as;
   bool coarray = (flag_coarray == GFC_FCOARRAY_LIB
  && gfc_caf_attr (expr1, true).codimension);

[gcc r16-2093] libstdc++: Check prerequisites of layout_*::operator().

2025-07-08 Thread Tomasz Kaminski via Libstdc++-cvs

https://gcc.gnu.org/g:4d86e4cda01aa3ab60de164a8492a99bc9ca1f70

commit r16-2093-g4d86e4cda01aa3ab60de164a8492a99bc9ca1f70
Author: Luc Grosheintz 
Date:   Fri Jul 4 10:29:43 2025 +0200

libstdc++: Check prerequisites of layout_*::operator().

Previously, the prerequisite that the arguments passed to operator() are
a multi-dimensional index (of extents()) was not checked.

Both mapping::operator() and mdspan::operator[] have the same
prerequisite. Since, mdspan must check the prerequisite for user-defined
layout mappings, the preference is to check in mdspan.

Because out-of-bounds accesses are very common it's nevertheless useful
to check the prerequisite in mapping::operator(). This is relevant for
cases where the layout mappings are used without mdspan. This commit
checks the prerequisites via _GLIBCXX_DEBUG_ASSERTs and adds the required
tests.

More discussion in the email chain starting at:

  https://gcc.gnu.org/pipermail/libstdc++/2025-July/062265.html

libstdc++-v3/ChangeLog:

* include/std/mdspan: Check prerequisites of
layout_*::operator() with _GLIBCXX_DEBUG_ASSERTs.
* testsuite/23_containers/mdspan/layouts/debug/out_of_bounds_neg.cc:
Add tests for prerequisites.

Reviewed-by: Tomasz Kamiński 
Signed-off-by: Luc Grosheintz 

Diff:
---
 libstdc++-v3/include/std/mdspan|  5 
 .../mdspan/layouts/debug/out_of_bounds_neg.cc  | 30 ++
 2 files changed, 35 insertions(+)

diff --git a/libstdc++-v3/include/std/mdspan b/libstdc++-v3/include/std/mdspan
index 4a06fb2d3a86..1fdcae634419 100644
--- a/libstdc++-v3/include/std/mdspan
+++ b/libstdc++-v3/include/std/mdspan
@@ -441,6 +441,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_IndexType __mult = 1;
auto __update = [&, __pos = 0u](_IndexType __idx) mutable
  {
+   _GLIBCXX_DEBUG_ASSERT(cmp_less(__idx, __exts.extent(__pos)));
__res += __idx * __mult;
__mult *= __exts.extent(__pos);
++__pos;
@@ -651,6 +652,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
auto __update = [&, __pos = __exts.rank()](_IndexType) mutable
  {
--__pos;
+   _GLIBCXX_DEBUG_ASSERT(cmp_less(__ind_arr[__pos],
+  __exts.extent(__pos)));
__res += __ind_arr[__pos] * __mult;
__mult *= __exts.extent(__pos);
  };
@@ -822,6 +825,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
  {
auto __update = [&, __pos = 0u](_IndexType __idx) mutable
  {
+   _GLIBCXX_DEBUG_ASSERT(cmp_less(__idx,
+  __m.extents().extent(__pos)));
__res += __idx * __m.stride(__pos++);
  };
(__update(__indices), ...);
diff --git 
a/libstdc++-v3/testsuite/23_containers/mdspan/layouts/debug/out_of_bounds_neg.cc
 
b/libstdc++-v3/testsuite/23_containers/mdspan/layouts/debug/out_of_bounds_neg.cc
new file mode 100644
index ..fb8ff01e8aa2
--- /dev/null
+++ 
b/libstdc++-v3/testsuite/23_containers/mdspan/layouts/debug/out_of_bounds_neg.cc
@@ -0,0 +1,30 @@
+// { dg-do compile { target c++23 } }
+// { dg-require-debug-mode "" }
+#include
+
+template
+  constexpr bool
+  test_out_of_bounds_1d()
+  {
+auto m = typename Layout::mapping>{};
+(void) m(0); // { dg-error "expansion of" }
+return true;
+  }
+static_assert(test_out_of_bounds_1d()); // { dg-error 
"expansion of" }
+static_assert(test_out_of_bounds_1d()); // { dg-error 
"expansion of" }
+static_assert(test_out_of_bounds_1d()); // { dg-error 
"expansion of" }
+
+template
+  constexpr bool
+  test_out_of_bounds_3d()
+  {
+auto m = typename Layout::mapping>{};
+(void) m(2, 5, 5); // { dg-error "expansion of" }
+return true;
+  }
+static_assert(test_out_of_bounds_3d()); // { dg-error 
"expansion of" }
+static_assert(test_out_of_bounds_3d()); // { dg-error 
"expansion of" }
+static_assert(test_out_of_bounds_3d()); // { dg-error 
"expansion of" }
+
+// { dg-prune-output "non-constant condition for static assertion" }
+// { dg-prune-output "__glibcxx_assert" }

[gcc(refs/users/mikael/heads/stabilisation_descriptor_v01)] Correction bootstrap

2025-07-08 Thread Mikael Morin via Gcc-cvs

https://gcc.gnu.org/g:7c5cc328d38ed501c860c3348845c82615952f91

commit 7c5cc328d38ed501c860c3348845c82615952f91
Author: Mikael Morin 
Date:   Tue Jul 8 13:41:33 2025 +0200

Correction bootstrap

Diff:
---
 gcc/fortran/trans-array.cc | 1 -
 1 file changed, 1 deletion(-)

diff --git a/gcc/fortran/trans-array.cc b/gcc/fortran/trans-array.cc
index 086863822375..32d1869cf5a5 100644
--- a/gcc/fortran/trans-array.cc
+++ b/gcc/fortran/trans-array.cc
@@ -11390,7 +11390,6 @@ update_reallocated_descriptor (stmtblock_t *block, 
gfc_loopinfo *loop)
   for (int i = 0; i < s->dimen; i++)
{
  int dim = s->dim[i];
- tree tree_dim = gfc_rank_cst[dim]; 
  UPDATE_VALUE (info->start[dim]);
  UPDATE_VALUE (info->end[dim]);
  UPDATE_VALUE (info->stride[dim]);

[gcc r16-2098] libstdc++: Implement mdspan and tests [PR107761].

2025-07-08 Thread Tomasz Kaminski via Gcc-cvs

https://gcc.gnu.org/g:b7b8eb90abaeaaf4a51325e087cd43a4dac8d25a

commit r16-2098-gb7b8eb90abaeaaf4a51325e087cd43a4dac8d25a
Author: Luc Grosheintz 
Date:   Tue Jul 8 10:24:26 2025 +0200

libstdc++: Implement mdspan and tests [PR107761].

Implements the class mdspan as described in N4950, i.e. without P3029.
It also adds tests for mdspan. This commit completes the implementation
of P0009, i.e. the C++23 part .

PR libstdc++/107761

libstdc++-v3/ChangeLog:

* include/std/mdspan (mdspan): New class.
* src/c++23/std.cc.in (mdspan): Add.
* testsuite/23_containers/mdspan/class_mandate_neg.cc: New test.
* testsuite/23_containers/mdspan/mdspan.cc: New test.
* testsuite/23_containers/mdspan/layout_like.h: Add class
LayoutLike which models a user-defined layout.

Reviewed-by: Tomasz Kamiński 
Signed-off-by: Luc Grosheintz 

Diff:
---
 libstdc++-v3/include/std/mdspan| 285 +
 libstdc++-v3/src/c++23/std.cc.in   |   3 +-
 .../23_containers/mdspan/class_mandate_neg.cc  |  41 ++
 .../testsuite/23_containers/mdspan/layout_like.h   |  83 +++
 .../testsuite/23_containers/mdspan/mdspan.cc   | 643 +
 .../23_containers/mdspan/out_of_bounds_neg.cc  |  24 +
 6 files changed, 1078 insertions(+), 1 deletion(-)

diff --git a/libstdc++-v3/include/std/mdspan b/libstdc++-v3/include/std/mdspan
index b0d8088bb777..5a42aead3ebb 100644
--- a/libstdc++-v3/include/std/mdspan
+++ b/libstdc++-v3/include/std/mdspan
@@ -1057,6 +1057,291 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   { return __p + __i; }
 };
 
+  namespace __mdspan
+  {
+template
+  constexpr bool
+  __is_multi_index(const _Extents& __exts, span<_IndexType, _Nm> __indices)
+  {
+   static_assert(__exts.rank() == _Nm);
+   for (size_t __i = 0; __i < __exts.rank(); ++__i)
+ if (__indices[__i] >= __exts.extent(__i))
+   return false;
+   return true;
+  }
+  }
+
+  template>
+class mdspan
+{
+  static_assert(!is_array_v<_ElementType>,
+   "ElementType must not be an array type");
+  static_assert(!is_abstract_v<_ElementType>,
+   "ElementType must not be an abstract class type");
+  static_assert(__mdspan::__is_extents<_Extents>,
+   "Extents must be a specialization of std::extents");
+  static_assert(is_same_v<_ElementType,
+ typename _AccessorPolicy::element_type>);
+
+public:
+  using extents_type = _Extents;
+  using layout_type = _LayoutPolicy;
+  using accessor_type = _AccessorPolicy;
+  using mapping_type = typename layout_type::template 
mapping;
+  using element_type = _ElementType;
+  using value_type = remove_cv_t;
+  using index_type = typename extents_type::index_type;
+  using size_type = typename extents_type::size_type;
+  using rank_type = typename extents_type::rank_type;
+  using data_handle_type = typename accessor_type::data_handle_type;
+  using reference = typename accessor_type::reference;
+
+  static constexpr rank_type
+  rank() noexcept { return extents_type::rank(); }
+
+  static constexpr rank_type
+  rank_dynamic() noexcept { return extents_type::rank_dynamic(); }
+
+  static constexpr size_t
+  static_extent(rank_type __r) noexcept
+  { return extents_type::static_extent(__r); }
+
+  constexpr index_type
+  extent(rank_type __r) const noexcept { return extents().extent(__r); }
+
+  constexpr
+  mdspan()
+  requires (rank_dynamic() > 0)
+  && is_default_constructible_v
+ && is_default_constructible_v
+ && is_default_constructible_v
+  : _M_accessor(), _M_mapping(), _M_handle()
+  { }
+
+  constexpr
+  mdspan(const mdspan& __other) = default;
+
+  constexpr
+  mdspan(mdspan&& __other) = default;
+
+  template<__mdspan::__valid_index_type... _OIndexTypes>
+   requires (sizeof...(_OIndexTypes) == rank()
+  || sizeof...(_OIndexTypes) == rank_dynamic())
+&& is_constructible_v
+&& is_default_constructible_v
+   constexpr explicit
+   mdspan(data_handle_type __handle, _OIndexTypes... __exts)
+   : _M_accessor(),
+ _M_mapping(_Extents(static_cast(std::move(__exts))...)),
+ _M_handle(std::move(__handle))
+   { }
+
+  template<__mdspan::__valid_index_type _OIndexType,
+  size_t _Nm>
+   requires (_Nm == rank() || _Nm == rank_dynamic())
+&& is_constructible_v
+&& is_default_constructible_v
+   constexpr explicit(_Nm != rank_dynamic())
+   mdspan(data_handle_type __handle, span<_OIndexType, _Nm> __exts)
+   : _M_accessor(), _M_mapping(extents_type(__exts)),
+ _M_handle(std::move(__handle))
+   { }
+
+  template<__mdspan::__valid_index_type _OInd

[gcc r16-2105] s390: Always compute address of stack protector guard

2025-07-08 Thread Stefan Schulze Frielinghaus via Gcc-cvs

https://gcc.gnu.org/g:bb6075e7115208bab3d9c8b2c54e0bd6a5c808b7

commit r16-2105-gbb6075e7115208bab3d9c8b2c54e0bd6a5c808b7
Author: Stefan Schulze Frielinghaus 
Date:   Tue Jul 8 16:40:34 2025 +0200

s390: Always compute address of stack protector guard

Computing the address of the thread pointer on s390 involves multiple
instructions and therefore bears the risk that the address of the canary
or intermediate values of it are spilled after prologue in order to be
reloaded for the epilogue.  Since there exists no mechanism to ensure
that a value is not coming from stack, as a precaution compute the
address always twice, i.e., one time for the prologue and one time for
the epilogue.  Note, even if there were such a mechanism, emitting
optimal code is non-trivial since there exist cases with opposing
requirements as e.g. if the thread pointer is not only computed for the
TLS guard but also for other TLS objects.  For the latter accesses it is
desired to spill and reload the thread pointer instead of recomputing it
whereas for the former it is not.

gcc/ChangeLog:

* config/s390/s390.md (stack_protect_get_tpsi): New insn.
(stack_protect_get_tpdi): New insn.
(stack_protect_set): Use new insn.
(stack_protect_test): Use new insn.

gcc/testsuite/ChangeLog:

* gcc.target/s390/stack-protector-guard-tls-1.c: New test.

Diff:
---
 gcc/config/s390/s390.md| 47 --
 .../gcc.target/s390/stack-protector-guard-tls-1.c  | 39 ++
 2 files changed, 82 insertions(+), 4 deletions(-)

diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md
index f6db36e0ac38..02bc149b0fba 100644
--- a/gcc/config/s390/s390.md
+++ b/gcc/config/s390/s390.md
@@ -308,6 +308,9 @@
UNSPECV_SPLIT_STACK_CALL
 
UNSPECV_OSC_BREAK
+
+   ; Stack Protector
+   UNSPECV_SP_GET_TP
   ])
 
 ;;
@@ -365,6 +368,9 @@
(VR23_REGNUM 45)
(VR24_REGNUM 46)
(VR31_REGNUM 53)
+   ; Access registers
+   (AR0_REGNUM  36)
+   (AR1_REGNUM  37)
   ])
 
 ; Rounding modes for binary floating point numbers
@@ -11924,15 +11930,43 @@
 ; Stack Protector Patterns
 ;
 
+; Insns stack_protect_get_tp{si,di} are similar to *get_tp_{31,64} but still
+; distinct in the sense that they force recomputation of the thread pointer
+; instead of potentially reloading it from stack.
+
+(define_insn_and_split "stack_protect_get_tpsi"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+   (unspec_volatile:SI [(const_int 0)] UNSPECV_SP_GET_TP))]
+  ""
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0) (reg:SI AR0_REGNUM))])
+
+(define_insn_and_split "stack_protect_get_tpdi"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+   (unspec_volatile:DI [(const_int 0)] UNSPECV_SP_GET_TP))]
+  ""
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 1) (reg:SI AR0_REGNUM))
+   (set (match_dup 0) (ashift:DI (match_dup 0) (const_int 32)))
+   (set (strict_low_part (match_dup 1)) (reg:SI AR1_REGNUM))]
+  "operands[1] = gen_rtx_REG (SImode, REGNO (operands[0]));")
+
 (define_expand "stack_protect_set"
   [(set (match_operand 0 "memory_operand" "")
(match_operand 1 "memory_operand" ""))]
   ""
 {
 #ifdef TARGET_THREAD_SSP_OFFSET
+  rtx tp = gen_reg_rtx (Pmode);
+  if (TARGET_64BIT)
+emit_insn (gen_stack_protect_get_tpdi (tp));
+  else
+emit_insn (gen_stack_protect_get_tpsi (tp));
   operands[1]
-= gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, s390_get_thread_pointer (),
-GEN_INT (TARGET_THREAD_SSP_OFFSET)));
+= gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tp,
+   GEN_INT (TARGET_THREAD_SSP_OFFSET)));
 #endif
   if (TARGET_64BIT)
 emit_insn (gen_stack_protect_setdi (operands[0], operands[1]));
@@ -11958,9 +11992,14 @@
 {
   rtx cc_reg, test;
 #ifdef TARGET_THREAD_SSP_OFFSET
+  rtx tp = gen_reg_rtx (Pmode);
+  if (TARGET_64BIT)
+emit_insn (gen_stack_protect_get_tpdi (tp));
+  else
+emit_insn (gen_stack_protect_get_tpsi (tp));
   operands[1]
-= gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, s390_get_thread_pointer (),
-GEN_INT (TARGET_THREAD_SSP_OFFSET)));
+= gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tp,
+   GEN_INT (TARGET_THREAD_SSP_OFFSET)));
 #endif
   if (TARGET_64BIT)
 emit_insn (gen_stack_protect_testdi (operands[0], operands[1]));
diff --git a/gcc/testsuite/gcc.target/s390/stack-protector-guard-tls-1.c 
b/gcc/testsuite/gcc.target/s390/stack-protector-guard-tls-1.c
new file mode 100644
index ..1efd24551443
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/stack-protector-guard-tls-1.c
@@ -0,0 +1,39 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fstack-protector-all" } */
+/* { dg-final { scan-assemb

[gcc(refs/users/omachota/heads/rtl-ssa-dce-wdebug)] rtl-ssa-dce: add functions for resurecting dead insns

2025-07-08 Thread Ondrej Machota via Gcc-cvs

https://gcc.gnu.org/g:3b84c69b2237dd7316dc07d2b41f39b2c494d811

commit 3b84c69b2237dd7316dc07d2b41f39b2c494d811
Author: Ondřej Machota 
Date:   Tue Jul 8 11:33:12 2025 +0200

rtl-ssa-dce: add functions for resurecting dead insns

Diff:
---
 gcc/dce.cc | 355 +
 1 file changed, 355 insertions(+)

diff --git a/gcc/dce.cc b/gcc/dce.cc
index 67fb42541d84..d12cab054b48 100644
--- a/gcc/dce.cc
+++ b/gcc/dce.cc
@@ -19,6 +19,7 @@ along with GCC; see the file COPYING3.  If not see
 
 #include 
 #include 
+#include 
 #define INCLUDE_ALGORITHM
 #define INCLUDE_FUNCTIONAL
 #define INCLUDE_ARRAY
@@ -1378,8 +1379,15 @@ private:
   void mark_prelive_insn (insn_info *, auto_vec &);
   auto_vec mark_prelive ();
   void mark ();
+
+  std::unordered_set propagate_dead_phis ();
+  void debugize_insn (insn_info *);
+  void unmark_debugizable(insn_info *, sbitmap);
+  sbitmap find_debugizable(const std::unordered_set &);
+  void debugize_insns (const sbitmap);
   void reset_dead_debug_insn (insn_info *);
   void reset_dead_debug ();
+
   void sweep ();
 
   offset_bitmap m_marked;
@@ -1633,6 +1641,352 @@ rtl_ssa_dce::mark ()
 }
 }
 
+
+// Mark instructions that depend on a dead phi - these cannot be restored
+std::unordered_set
+rtl_ssa_dce::propagate_dead_phis ()
+{
+  std::unordered_set visited_dead_phis;
+  std::unordered_set depends_on_dead_phi;
+  auto_vec worklist;
+
+  // add dead phis to worklist
+  for (ebb_info *ebb : crtl->ssa->ebbs ())
+{
+  for (phi_info *phi : ebb->phis ())
+   {
+ if (bitmap_bit_p (m_marked_phis, phi->uid ()))
+   continue;
+
+ worklist.safe_push (phi);
+   }
+}
+
+  // suppose that debug insns are marked - non marked will be removed later
+  // propagate dead phis via du chains and unmark reachable debug instructions
+  while (!worklist.is_empty ())
+{
+  set_info *set = worklist.pop ();
+  insn_info *insn = set->insn ();
+
+  if (insn->is_debug_insn ())
+   {
+ if (dump_file)
+   fprintf (dump_file, "Debug insns %d depends on dead phi.\n",
+insn->uid ());
+
+m_marked.clear_bit (insn->uid ());
+ // debug instructions dont have chains
+ continue;
+   }
+
+  // mark
+  if (insn->is_phi ())
+   {
+ gcc_checking_assert (!bitmap_bit_p(m_marked_phis, 
static_cast (set)->uid ()));
+ visited_dead_phis.emplace (static_cast (set));
+   }
+  else
+   {
+ gcc_checking_assert (!m_marked.get_bit (insn->uid ()));
+ depends_on_dead_phi.emplace (insn);
+   }
+
+  for (use_info *use : set->all_uses ())
+   {
+ if (use->is_in_phi ())
+   {
+ // do not add already visited dead phis
+ if (visited_dead_phis.count (use->phi ()) == 0)
+   worklist.safe_push (use->phi ());
+   }
+ else
+   {
+ gcc_assert (use->is_in_any_insn ());
+ // add all defs from insn to worklist
+ for (def_info *def : use->insn ()->defs ())
+   {
+ if (def->kind () != access_kind::SET)
+   continue;
+
+ worklist.safe_push (static_cast (def));
+   }
+   }
+   }
+}
+
+  return depends_on_dead_phi;
+}
+
+
+void
+rtl_ssa_dce::debugize_insn (insn_info *insn)
+{
+  
+}
+
+struct register_replacement {
+  unsigned int regno;
+  rtx expr;
+};
+
+static rtx
+replace_dead_reg(rtx x, const_rtx old_rtx ATTRIBUTE_UNUSED, void *data)
+{
+  auto replacement = static_cast(data);
+  
+ if (REG_P (x) && REGNO (x) >= FIRST_VIRTUAL_REGISTER && replacement->regno == 
REGNO (x))
+ {
+  if (GET_MODE (x) == GET_MODE (replacement->expr))
+ return replacement->expr;
+  return lowpart_subreg (GET_MODE (x), replacement->expr, GET_MODE 
(replacement->expr));
+ }
+
+ return NULL_RTX;
+}
+
+// visit every marked instruction in INSN dependency tree and unmark it
+void
+rtl_ssa_dce::unmark_debugizable (insn_info *insn, sbitmap debugizable) 
+{
+  auto_vec worklist;
+  gcc_checking_assert (!insn->is_artificial ());
+
+  bitmap_set_bit (debugizable, insn->uid ());
+  worklist.safe_push (insn);
+
+  // process all marked dependencies and unmark them
+  while (!worklist.is_empty ()) {
+insn_info *current = worklist.pop ();
+int current_uid = current->uid ();
+
+// skip instruction that are not marked
+if (!bitmap_bit_p(debugizable, current_uid))
+  continue;
+
+bitmap_clear_bit(debugizable, current_uid);
+
+// add all marked dependencies to the worklist
+for (def_info *def : current->defs())
+{
+  if (def->kind() != access_kind::SET) // skip clobbers
+continue;
+  
+  auto *set = static_cast(def);
+  for (use_info *use : set->all_uses()) 
+  {
+// this phi node might not be dead
+if (use->is_in_phi ())
+  continue;
+
+insn_info *use_i

[gcc r16-2108] c++: Implement part of C++26 P2686R4 - constexpr structured bindings [PR117784]

2025-07-08 Thread Jakub Jelinek via Gcc-cvs

https://gcc.gnu.org/g:c81447d969f27a8653ebb1a450372f0d25a2e628

commit r16-2108-gc81447d969f27a8653ebb1a450372f0d25a2e628
Author: Jakub Jelinek 
Date:   Tue Jul 8 19:21:55 2025 +0200

c++: Implement part of C++26 P2686R4 - constexpr structured bindings 
[PR117784]

The following patch implements the constexpr structured bindings part of
the P2686R4 paper, so the [dcl.pre], [dcl.struct.bind], [dcl.constinit]
and first hunk in [dcl.constexpr] changes.
The paper doesn't have a feature test macro and the constexpr structured
binding part of it seems more-less self-contained, so I think it is useful
to get this in independently from the rest.
Of course, automatic constexpr/constinit structured bindings in the
tuple cases or automatic constexpr/constinit structured bindings with auto &
will not really work for now.
Another reason for the split is that for C++ < 26, I think what the patch
implements is basically what the users will see, i.e. we can accept
constexpr or constinit structured binding with pedwarn, but I think we can't
change the constant expression rules in C++ < 26.

I plan to look at the rest of the paper.

2025-07-08  Jakub Jelinek  

PR c++/117784
* decl.cc: Implement part of C++26 P2686R4 - constexpr structured
bindings.
(cp_finish_decl): Pedwarn for C++23 and older on constinit on
structured bindings except for static/thread_local where it uses
earlier error.
(grokdeclarator): Pedwarn on constexpr structured bindings for
C++23 and older instead of emitting error always, don't clear
constexpr_p in that case.
* parser.cc (cp_parser_decomposition_declaration): Copy over
DECL_DECLARED_CONSTEXPR_P and DECL_DECLARED_CONSTINIT_P flags.

* g++.dg/cpp1z/decomp3.C (test): For constexpr structured binding
initialize from constexpr var instead of non-constexpr and expect
just a pedwarn for C++23 and older instead of error always.
* g++.dg/cpp26/decomp9.C (foo): Likewise.
* g++.dg/cpp26/decomp22.C: New test.
* g++.dg/cpp26/decomp23.C: New test.
* g++.dg/cpp26/decomp24.C: New test.
* g++.dg/cpp26/decomp25.C: New test.

Diff:
---
 gcc/cp/decl.cc|  19 --
 gcc/cp/parser.cc  |   6 +-
 gcc/testsuite/g++.dg/cpp1z/decomp3.C  |   3 +-
 gcc/testsuite/g++.dg/cpp26/decomp22.C |  66 +++
 gcc/testsuite/g++.dg/cpp26/decomp23.C |  77 ++
 gcc/testsuite/g++.dg/cpp26/decomp24.C |  20 ++
 gcc/testsuite/g++.dg/cpp26/decomp25.C | 119 ++
 gcc/testsuite/g++.dg/cpp26/decomp9.C  |   5 +-
 8 files changed, 305 insertions(+), 10 deletions(-)

diff --git a/gcc/cp/decl.cc b/gcc/cp/decl.cc
index be26bd39b225..99b9854210f7 100644
--- a/gcc/cp/decl.cc
+++ b/gcc/cp/decl.cc
@@ -9174,6 +9174,10 @@ cp_finish_decl (tree decl, tree init, bool 
init_const_expr_p,
 
   if (decomp)
{
+ if (DECL_DECLARED_CONSTINIT_P (decl) && cxx_dialect < cxx26)
+   pedwarn (DECL_SOURCE_LOCATION (decl), OPT_Wc__26_extensions,
+"% can be applied to structured binding "
+"only with %<-std=c++2c%> or %<-std=gnu++2c%>");
  cp_maybe_mangle_decomp (decl, decomp);
  if (TREE_STATIC (decl) && !DECL_FUNCTION_SCOPE_P (decl))
{
@@ -13621,9 +13625,10 @@ grokdeclarator (const cp_declarator *declarator,
   if (typedef_p)
error_at (declspecs->locations[ds_typedef],
  "structured binding declaration cannot be %qs", "typedef");
-  if (constexpr_p && !concept_p)
-   error_at (declspecs->locations[ds_constexpr], "structured "
- "binding declaration cannot be %qs", "constexpr");
+  if (constexpr_p && !concept_p && cxx_dialect < cxx26)
+   pedwarn (declspecs->locations[ds_constexpr], OPT_Wc__26_extensions,
+"structured binding declaration can be %qs only with "
+"%<-std=c++2c%> or %<-std=gnu++2c%>", "constexpr");
   if (consteval_p)
error_at (declspecs->locations[ds_consteval], "structured "
  "binding declaration cannot be %qs", "consteval");
@@ -13634,8 +13639,11 @@ grokdeclarator (const cp_declarator *declarator,
 declspecs->gnu_thread_keyword_p
 ? "__thread" : "thread_local");
   if (concept_p)
-   error_at (declspecs->locations[ds_concept],
- "structured binding declaration cannot be %qs", "concept");
+   {
+ error_at (declspecs->locations[ds_concept],
+   "structured binding declaration cannot be %qs", "concept");
+ constexpr_p = 0;
+   }
   /* [dcl.struct.bind] "A cv that includes volatile is deprecated."  */
   if (type_quals & TYPE_QUAL_VO

[gcc r16-2109] libstdc++: Fix _GLIBCXX_DEBUG std::forward_list build regression

2025-07-08 Thread Francois Dumont via Libstdc++-cvs

https://gcc.gnu.org/g:1f3bf202355f16d6ec0a9b37cb6a71be5f76b77f

commit r16-2109-g1f3bf202355f16d6ec0a9b37cb6a71be5f76b77f
Author: Jonathan Wakely 
Date:   Tue Jul 8 19:20:13 2025 +0200

libstdc++: Fix _GLIBCXX_DEBUG std::forward_list build regression

Commit 2fd6f42c17a8040dbd3460ca34d93695dacf8575 broke _GLIBCXX_DEBUG
std::forward_list implementation.

libstdc++-v3/ChangeLog:

* include/debug/forward_list (_Safe_forward_list<>::_M_swap):
Adapt to _M_this() signature change.

Diff:
---
 libstdc++-v3/include/debug/forward_list | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/libstdc++-v3/include/debug/forward_list 
b/libstdc++-v3/include/debug/forward_list
index 4e1511da4e82..9da7dda45034 100644
--- a/libstdc++-v3/include/debug/forward_list
+++ b/libstdc++-v3/include/debug/forward_list
@@ -144,13 +144,13 @@ namespace __gnu_debug
   //std::swap(_M_this()->_M_version, __other._M_version);
   _Safe_iterator_base* __this_its = _M_this()->_M_iterators;
   _S_swap_aux(__other, __other._M_iterators,
- _M_this(), _M_this()->_M_iterators);
+ *_M_this(), _M_this()->_M_iterators);
   _Safe_iterator_base* __this_const_its = _M_this()->_M_const_iterators;
   _S_swap_aux(__other, __other._M_const_iterators,
- _M_this(), _M_this()->_M_const_iterators);
-  _S_swap_aux(_M_this(), __this_its,
+ *_M_this(), _M_this()->_M_const_iterators);
+  _S_swap_aux(*_M_this(), __this_its,
  __other, __other._M_iterators);
-  _S_swap_aux(_M_this(), __this_const_its,
+  _S_swap_aux(*_M_this(), __this_const_its,
  __other, __other._M_const_iterators);
 }

[gcc r16-2110] xtensa: Fix B[GE/LT]UI instructions with immediate values of 32768 or 65536 not being emitted

2025-07-08 Thread Max Filippov via Gcc-cvs

https://gcc.gnu.org/g:57da36bed1004d2b78057568176b76cb0a50d149

commit r16-2110-g57da36bed1004d2b78057568176b76cb0a50d149
Author: Takayuki 'January June' Suwa 
Date:   Mon Jul 7 23:40:17 2025 +0900

xtensa: Fix B[GE/LT]UI instructions with immediate values of 32768 or 65536 
not being emitted

This is because in canonicalize_comparison() in gcc/expmed.cc, the COMPARE
rtx_cost() for the immediate values in the title does not change between
the old and new versions.  This patch fixes that.

(note: Currently, this patch only works if some constant propagation
optimizations are enabled (-O2 or higher) or if bare large constant
assignments are possible (-mconst16 or -mauto-litpools).  In the future
I hope to make it work at -O1...)

gcc/ChangeLog:

* config/xtensa/xtensa.cc (xtensa_b4const_or_zero):
Remove.
(xtensa_b4const): Add a case where the value is 0, and rename
to xtensa_b4const_or_zero.
(xtensa_rtx_costs): Fix to also consider the result of
xtensa_b4constu().

gcc/testsuite/ChangeLog:

* gcc.target/xtensa/BGEUI-BLTUI-32k-64k.c: New.

Diff:
---
 gcc/config/xtensa/xtensa.cc   | 17 +
 gcc/testsuite/gcc.target/xtensa/BGEUI-BLTUI-32k-64k.c | 19 +++
 2 files changed, 24 insertions(+), 12 deletions(-)

diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc
index 8c43a69f4cd9..b75cec13b28a 100644
--- a/gcc/config/xtensa/xtensa.cc
+++ b/gcc/config/xtensa/xtensa.cc
@@ -423,12 +423,13 @@ xtensa_uimm8x4 (HOST_WIDE_INT v)
 }
 
 
-static bool
-xtensa_b4const (HOST_WIDE_INT v)
+bool
+xtensa_b4const_or_zero (HOST_WIDE_INT v)
 {
   switch (v)
 {
 case -1:
+case 0:
 case 1:
 case 2:
 case 3:
@@ -450,15 +451,6 @@ xtensa_b4const (HOST_WIDE_INT v)
 }
 
 
-bool
-xtensa_b4const_or_zero (HOST_WIDE_INT v)
-{
-  if (v == 0)
-return true;
-  return xtensa_b4const (v);
-}
-
-
 bool
 xtensa_b4constu (HOST_WIDE_INT v)
 {
@@ -4512,7 +4504,8 @@ xtensa_rtx_costs (rtx x, machine_mode mode, int 
outer_code,
}
  break;
case COMPARE:
- if ((INTVAL (x) == 0) || xtensa_b4const (INTVAL (x)))
+ if (xtensa_b4const_or_zero (INTVAL (x))
+ || xtensa_b4constu (INTVAL (x)))
{
  *total = 0;
  return true;
diff --git a/gcc/testsuite/gcc.target/xtensa/BGEUI-BLTUI-32k-64k.c 
b/gcc/testsuite/gcc.target/xtensa/BGEUI-BLTUI-32k-64k.c
new file mode 100644
index ..05873b896896
--- /dev/null
+++ b/gcc/testsuite/gcc.target/xtensa/BGEUI-BLTUI-32k-64k.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+extern void foo(void);
+
+void BGEUI_test(unsigned int a)
+{
+  if (a < 32768U)
+foo();
+}
+
+void BLTUI_test(unsigned int a)
+{
+  if (a >= 65536U)
+foo();
+}
+
+/* { dg-final { scan-assembler-times "bgeui" 1 } } */
+/* { dg-final { scan-assembler-times "bltui" 1 } } */

[gcc r16-2111] c++: bogus error with union in qualified name [PR83469]

2025-07-08 Thread Marek Polacek via Gcc-cvs

https://gcc.gnu.org/g:7d11ae1dd95a0296eeb5c14bfe3a5d4ec8873e3b

commit r16-2111-g7d11ae1dd95a0296eeb5c14bfe3a5d4ec8873e3b
Author: Marek Polacek 
Date:   Tue Jul 8 10:09:36 2025 -0400

c++: bogus error with union in qualified name [PR83469]

While working on Reflection I noticed that we reject:

  union U { int i; };
  constexpr auto r = ^^typename ::U;

which is due to PR83469.  Andrew P. posted a patch in 2021:
https://gcc.gnu.org/pipermail/gcc-patches/2021-December/586344.html
for which I had some comments but an updated patch never came.

~~
There are a few issues here with typenames and unions (and even struct
keywords with unions). First in cp_parser_check_class_key,
we need to allow typenames to name union types and union key
to be able to use with typenames.

The next issue is we need to record if we had a union key,
right now we just record it was a struct/class/typename one
which is wrong.
~~

This patch is an updated and cleaned up version; I've also addressed
a missing bit in pt.cc.

PR c++/83469
PR c++/93809

gcc/cp/ChangeLog:

* cp-tree.h (UNION_TYPE_P): Define.
(TYPENAME_IS_UNION_P): Define.
* decl.cc (struct typename_info): Add union_p field.
(struct typename_hasher::equal): Compare union_p field.
(build_typename_type): Use ti.union_p for union_type.  Set
TYPENAME_IS_UNION_P.
* error.cc (dump_type) : Handle
TYPENAME_IS_UNION_P.
* module.cc (trees_out::type_node): Likewise.
* parser.cc (cp_parser_check_class_key): Allow typename key for 
union
types and allow union keyword for typename types.
* pt.cc (tsubst) : Don't conflate unions with
class_type.  For TYPENAME_IS_CLASS_P, check NON_UNION_CLASS_TYPE_P
rather than CLASS_TYPE_P.  Add TYPENAME_IS_UNION_P handling.

gcc/testsuite/ChangeLog:

* g++.dg/template/error45.C: Adjust dg-error.
* g++.dg/warn/Wredundant-tags-3.C: Remove xfail.
* g++.dg/parse/union1.C: New test.
* g++.dg/parse/union2.C: New test.
* g++.dg/parse/union3.C: New test.
* g++.dg/parse/union4.C: New test.
* g++.dg/parse/union5.C: New test.
* g++.dg/parse/union6.C: New test.

Co-authored-by: Andrew Pinski 
Reviewed-by: Jason Merrill 

Diff:
---
 gcc/cp/cp-tree.h  | 12 ++--
 gcc/cp/decl.cc| 10 ++
 gcc/cp/error.cc   |  1 +
 gcc/cp/module.cc  |  2 ++
 gcc/cp/parser.cc  |  4 +++-
 gcc/cp/pt.cc  | 25 +
 gcc/testsuite/g++.dg/parse/union1.C   | 19 +++
 gcc/testsuite/g++.dg/parse/union2.C   | 19 +++
 gcc/testsuite/g++.dg/parse/union3.C   | 19 +++
 gcc/testsuite/g++.dg/parse/union4.C   | 12 
 gcc/testsuite/g++.dg/parse/union5.C   |  5 +
 gcc/testsuite/g++.dg/parse/union6.C   |  5 +
 gcc/testsuite/g++.dg/template/error45.C   |  2 +-
 gcc/testsuite/g++.dg/warn/Wredundant-tags-3.C |  2 +-
 14 files changed, 120 insertions(+), 17 deletions(-)

diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h
index 1b893e23543d..3b92d9af6e1c 100644
--- a/gcc/cp/cp-tree.h
+++ b/gcc/cp/cp-tree.h
@@ -506,6 +506,7 @@ extern GTY(()) tree cp_global_trees[CPTI_MAX];
   LAMBDA_EXPR_STATIC_P (in LAMBDA_EXPR)
   TARGET_EXPR_ELIDING_P (in TARGET_EXPR)
   contract_semantic (in ASSERTION_, PRECONDITION_, POSTCONDITION_STMT)
+  TYPENAME_IS_UNION_P (in TYPENAME_TYPE)
4: IDENTIFIER_MARKED (IDENTIFIER_NODEs)
   TREE_HAS_CONSTRUCTOR (in INDIRECT_REF, SAVE_EXPR, CONSTRUCTOR,
  CALL_EXPR, or FIELD_DECL).
@@ -2354,6 +2355,10 @@ enum languages { lang_c, lang_cplusplus };
 #define NON_UNION_CLASS_TYPE_P(T) \
   (TREE_CODE (T) == RECORD_TYPE && TYPE_LANG_FLAG_5 (T))
 
+/* Nonzero if T is a class type and is a union.  */
+#define UNION_TYPE_P(T) \
+  (TREE_CODE (T) == UNION_TYPE && TYPE_LANG_FLAG_5 (T))
+
 /* Keep these checks in ascending code order.  */
 #define RECORD_OR_UNION_CODE_P(T)  \
   ((T) == RECORD_TYPE || (T) == UNION_TYPE)
@@ -4485,11 +4490,14 @@ get_vec_init_expr (tree t)
 #define TYPENAME_IS_ENUM_P(NODE) \
   (TREE_LANG_FLAG_0 (TYPENAME_TYPE_CHECK (NODE)))
 
-/* True if a TYPENAME_TYPE was declared as a "class", "struct", or
-   "union".  */
+/* True if a TYPENAME_TYPE was declared as a "class" or "struct".  */
 #define TYPENAME_IS_CLASS_P(NODE) \
   (TREE_LANG_FLAG_1 (TYPENAME_TYPE_CHECK (NODE)))
 
+/* True if a TYPENAME_TYPE was declared as a "union".  */
+#define TYPENAME_IS_UNION_P(NODE) \
+  (TREE_LANG_FLAG_3 (TYPENAME_TYPE_CHECK (N

[gcc(refs/users/meissner/heads/work214-sha)] PR target/117251: Improve vector or to vector or fusion

2025-07-08 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:d1a81130bc9a5ffc02f19b1711a2703cc0b03d74

commit d1a81130bc9a5ffc02f19b1711a2703cc0b03d74
Author: Michael Meissner 
Date:   Wed Jul 9 01:46:41 2025 -0400

PR target/117251: Improve vector or to vector or fusion

See the following post for a complete explanation of what the patches
for PR target/117251:

 * https://gcc.gnu.org/pipermail/gcc-patches/2025-June/686474.html

This is patch #23 of 45 to generate the 'XXEVAL' instruction on power10
and power11 instead of using the Altivec 'VOR' instruction feeding into
'VOR'.  The 'XXEVAL' instruction can use all 64 vector registers,
instead of the 32 registers that traditional Altivec vector
instructions use.  By allowing all of the vector registers to be used,
it reduces the amount of spilling that a large benchmark generated.

Currently the following code:

vector int a, b, c, d;
a = (c | d) | b;

Generates:

vort,c,d
vora,t,b

Now in addition with this patch, if the arguments or result is
allocated to a traditional FPR register, the GCC compiler will now
generate the following code instead of adding vector move instructions:

xxeval a,b,c,127

Since fusion using 2 Altivec instructions is slightly faster than using
the 'XXEVAL' instruction we prefer to generate the Altivec instructions
if we can.  In addition, because 'XXEVAL' is a prefixed instruction, it
possibly might generate an extra NOP instruction to align the 'XXEVAL'
instruction.

I have tested these patches on both big endian and little endian
PowerPC servers, with no regressions.  Can I check these patchs into
the trunk?

2025-07-09  Michael Meissner  

gcc/

PR target/117251
* config/rs6000/fusion.md: Regenerate.
* config/rs6000/genfusion.pl (gen_logical_addsubf): Add support
to generate vector or => or fusion if XXEVAL is supported.

Diff:
---
 gcc/config/rs6000/fusion.md| 15 +--
 gcc/config/rs6000/genfusion.pl |  1 +
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md
index d1f6a38b618a..c2a2ebf4bfaf 100644
--- a/gcc/config/rs6000/fusion.md
+++ b/gcc/config/rs6000/fusion.md
@@ -2729,20 +2729,23 @@
 ;; logical-logical fusion pattern generated by gen_logical_addsubf
 ;; vector vor -> vor
 (define_insn "*fuse_vor_vor"
-  [(set (match_operand:VM 3 "altivec_register_operand" "=&0,&1,&v,v")
-(ior:VM (ior:VM (match_operand:VM 0 "altivec_register_operand" 
"v,v,v,v")
-  (match_operand:VM 1 "altivec_register_operand" 
"%v,v,v,v"))
- (match_operand:VM 2 "altivec_register_operand" "v,v,v,v")))
-   (clobber (match_scratch:VM 4 "=X,X,X,&v"))]
+  [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v")
+(ior:VM (ior:VM (match_operand:VM 0 "vector_fusion_operand" 
"v,v,v,wa,v")
+  (match_operand:VM 1 "vector_fusion_operand" 
"%v,v,v,wa,v"))
+ (match_operand:VM 2 "vector_fusion_operand" "v,v,v,wa,v")))
+   (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))]
   "(TARGET_P10_FUSION)"
   "@
vor %3,%1,%0\;vor %3,%3,%2
vor %3,%1,%0\;vor %3,%3,%2
vor %3,%1,%0\;vor %3,%3,%2
+   xxeval %x3,%x2,%x1,%x0,127
vor %4,%1,%0\;vor %3,%4,%2"
   [(set_attr "type" "fused_vector")
(set_attr "cost" "6")
-   (set_attr "length" "8")])
+   (set_attr "length" "8")
+   (set_attr "prefixed" "*,*,*,yes,*")
+   (set_attr "isa" "*,*,*,xxeval,*")])
 
 ;; logical-logical fusion pattern generated by gen_logical_addsubf
 ;; vector vorc -> vor
diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl
index 97681f37d0fa..9df4c8d6527e 100755
--- a/gcc/config/rs6000/genfusion.pl
+++ b/gcc/config/rs6000/genfusion.pl
@@ -237,6 +237,7 @@ sub gen_logical_addsubf
   "vxor_vor"=> 111,
   "vnor_vnor"   => 112,
   "vor_vxor"=> 120,
+  "vor_vor" => 127,
 );
 
 KIND: foreach $kind ('scalar','vector') {

[gcc(refs/users/meissner/heads/work214-sha)] PR target/117251: Improve vector or to vector nor fusion

2025-07-08 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:0e4302dcb91643c47b9c396834dc9a9687410aa2

commit 0e4302dcb91643c47b9c396834dc9a9687410aa2
Author: Michael Meissner 
Date:   Wed Jul 9 01:47:01 2025 -0400

PR target/117251: Improve vector or to vector nor fusion

See the following post for a complete explanation of what the patches
for PR target/117251:

 * https://gcc.gnu.org/pipermail/gcc-patches/2025-June/686474.html

This is patch #24 of 45 to generate the 'XXEVAL' instruction on power10
and power11 instead of using the Altivec 'VOR' instruction feeding into
'VNOR'.  The 'XXEVAL' instruction can use all 64 vector registers,
instead of the 32 registers that traditional Altivec vector
instructions use.  By allowing all of the vector registers to be used,
it reduces the amount of spilling that a large benchmark generated.

Currently the following code:

vector int a, b, c, d;
a = ~ ((c | d) | b);

Generates:

vort,c,d
vnor   a,t,b

Now in addition with this patch, if the arguments or result is
allocated to a traditional FPR register, the GCC compiler will now
generate the following code instead of adding vector move instructions:

xxeval a,b,c,128

Since fusion using 2 Altivec instructions is slightly faster than using
the 'XXEVAL' instruction we prefer to generate the Altivec instructions
if we can.  In addition, because 'XXEVAL' is a prefixed instruction, it
possibly might generate an extra NOP instruction to align the 'XXEVAL'
instruction.

I have tested these patches on both big endian and little endian
PowerPC servers, with no regressions.  Can I check these patchs into
the trunk?

2025-07-09  Michael Meissner  

gcc/

PR target/117251
* config/rs6000/fusion.md: Regenerate.
* config/rs6000/genfusion.pl (gen_logical_addsubf): Add support
to generate vector or => nor fusion if XXEVAL is supported.

Diff:
---
 gcc/config/rs6000/fusion.md| 15 +--
 gcc/config/rs6000/genfusion.pl |  1 +
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md
index c2a2ebf4bfaf..c55e9d4abd67 100644
--- a/gcc/config/rs6000/fusion.md
+++ b/gcc/config/rs6000/fusion.md
@@ -2576,20 +2576,23 @@
 ;; logical-logical fusion pattern generated by gen_logical_addsubf
 ;; vector vor -> vnor
 (define_insn "*fuse_vor_vnor"
-  [(set (match_operand:VM 3 "altivec_register_operand" "=&0,&1,&v,v")
-(and:VM (not:VM (ior:VM (match_operand:VM 0 "altivec_register_operand" 
"v,v,v,v")
-  (match_operand:VM 1 "altivec_register_operand" 
"v,v,v,v")))
- (not:VM (match_operand:VM 2 "altivec_register_operand" 
"v,v,v,v"
-   (clobber (match_scratch:VM 4 "=X,X,X,&v"))]
+  [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v")
+(and:VM (not:VM (ior:VM (match_operand:VM 0 "vector_fusion_operand" 
"v,v,v,wa,v")
+  (match_operand:VM 1 "vector_fusion_operand" 
"v,v,v,wa,v")))
+ (not:VM (match_operand:VM 2 "vector_fusion_operand" 
"v,v,v,wa,v"
+   (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))]
   "(TARGET_P10_FUSION)"
   "@
vor %3,%1,%0\;vnor %3,%3,%2
vor %3,%1,%0\;vnor %3,%3,%2
vor %3,%1,%0\;vnor %3,%3,%2
+   xxeval %x3,%x2,%x1,%x0,128
vor %4,%1,%0\;vnor %3,%4,%2"
   [(set_attr "type" "fused_vector")
(set_attr "cost" "6")
-   (set_attr "length" "8")])
+   (set_attr "length" "8")
+   (set_attr "prefixed" "*,*,*,yes,*")
+   (set_attr "isa" "*,*,*,xxeval,*")])
 
 ;; logical-logical fusion pattern generated by gen_logical_addsubf
 ;; vector vorc -> vnor
diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl
index 9df4c8d6527e..58f900640bef 100755
--- a/gcc/config/rs6000/genfusion.pl
+++ b/gcc/config/rs6000/genfusion.pl
@@ -238,6 +238,7 @@ sub gen_logical_addsubf
   "vnor_vnor"   => 112,
   "vor_vxor"=> 120,
   "vor_vor" => 127,
+  "vor_vnor"=> 128,
 );
 
 KIND: foreach $kind ('scalar','vector') {

[gcc(refs/users/meissner/heads/work214-sha)] PR target/117251: Improve vector and to vector nor fusion

2025-07-08 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:f0e56a8159d60e2f9ceb50f4f918f0e93e38c420

commit f0e56a8159d60e2f9ceb50f4f918f0e93e38c420
Author: Michael Meissner 
Date:   Wed Jul 9 01:52:03 2025 -0400

PR target/117251: Improve vector and to vector nor fusion

See the following post for a complete explanation of what the patches
for PR target/117251:

 * https://gcc.gnu.org/pipermail/gcc-patches/2025-June/686474.html

This is patch #34 of 45 to generate the 'XXEVAL' instruction on power10
and power11 instead of using the Altivec 'VAND' instruction feeding
into 'VNOR'.  The 'XXEVAL' instruction can use all 64 vector registers,
instead of the 32 registers that traditional Altivec vector
instructions use.  By allowing all of the vector registers to be used,
it reduces the amount of spilling that a large benchmark generated.

Currently the following code:

vector int a, b, c, d;
a = ~ ((c & d) | b);

Generates:

vand   t,c,d
vnor   a,t,b

Now in addition with this patch, if the arguments or result is
allocated to a traditional FPR register, the GCC compiler will now
generate the following code instead of adding vector move instructions:

xxeval a,b,c,224

Since fusion using 2 Altivec instructions is slightly faster than using
the 'XXEVAL' instruction we prefer to generate the Altivec instructions
if we can.  In addition, because 'XXEVAL' is a prefixed instruction, it
possibly might generate an extra NOP instruction to align the 'XXEVAL'
instruction.

I have tested these patches on both big endian and little endian
PowerPC servers, with no regressions.  Can I check these patchs into
the trunk?

2025-07-09  Michael Meissner  

gcc/

PR target/117251
* config/rs6000/fusion.md: Regenerate.
* config/rs6000/genfusion.pl (gen_logical_addsubf): Add support
to generate vector and => nor fusion if XXEVAL is supported.

Diff:
---
 gcc/config/rs6000/fusion.md| 15 +--
 gcc/config/rs6000/genfusion.pl |  1 +
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md
index e3d9f7376a8d..68b52d4f5893 100644
--- a/gcc/config/rs6000/fusion.md
+++ b/gcc/config/rs6000/fusion.md
@@ -2480,20 +2480,23 @@
 ;; logical-logical fusion pattern generated by gen_logical_addsubf
 ;; vector vand -> vnor
 (define_insn "*fuse_vand_vnor"
-  [(set (match_operand:VM 3 "altivec_register_operand" "=&0,&1,&v,v")
-(and:VM (not:VM (and:VM (match_operand:VM 0 "altivec_register_operand" 
"v,v,v,v")
-  (match_operand:VM 1 "altivec_register_operand" 
"v,v,v,v")))
- (not:VM (match_operand:VM 2 "altivec_register_operand" 
"v,v,v,v"
-   (clobber (match_scratch:VM 4 "=X,X,X,&v"))]
+  [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v")
+(and:VM (not:VM (and:VM (match_operand:VM 0 "vector_fusion_operand" 
"v,v,v,wa,v")
+  (match_operand:VM 1 "vector_fusion_operand" 
"v,v,v,wa,v")))
+ (not:VM (match_operand:VM 2 "vector_fusion_operand" 
"v,v,v,wa,v"
+   (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))]
   "(TARGET_P10_FUSION)"
   "@
vand %3,%1,%0\;vnor %3,%3,%2
vand %3,%1,%0\;vnor %3,%3,%2
vand %3,%1,%0\;vnor %3,%3,%2
+   xxeval %x3,%x2,%x1,%x0,224
vand %4,%1,%0\;vnor %3,%4,%2"
   [(set_attr "type" "fused_vector")
(set_attr "cost" "6")
-   (set_attr "length" "8")])
+   (set_attr "length" "8")
+   (set_attr "prefixed" "*,*,*,yes,*")
+   (set_attr "isa" "*,*,*,xxeval,*")])
 
 ;; logical-logical fusion pattern generated by gen_logical_addsubf
 ;; vector vandc -> vnor
diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl
index 3a603eb09675..56e5d96ec5f3 100755
--- a/gcc/config/rs6000/genfusion.pl
+++ b/gcc/config/rs6000/genfusion.pl
@@ -248,6 +248,7 @@ sub gen_logical_addsubf
   "vorc_vor"=> 191,
   "vandc_vnor"  => 208,
   "vandc_veqv"  => 210,
+  "vand_vnor"   => 224,
 );
 
 KIND: foreach $kind ('scalar','vector') {

[gcc(refs/users/meissner/heads/work214-sha)] PR target/117251: Improve vector nand to vector xor fusion

2025-07-08 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:81b6e9d57983a3679b01f5d555915d95240e9bd8

commit 81b6e9d57983a3679b01f5d555915d95240e9bd8
Author: Michael Meissner 
Date:   Wed Jul 9 01:52:22 2025 -0400

PR target/117251: Improve vector nand to vector xor fusion

See the following post for a complete explanation of what the patches
for PR target/117251:

 * https://gcc.gnu.org/pipermail/gcc-patches/2025-June/686474.html

This is patch #35 of 45 to generate the 'XXEVAL' instruction on power10
and power11 instead of using the Altivec 'VNAND' instruction feeding
into 'VXOR'.  The 'XXEVAL' instruction can use all 64 vector registers,
instead of the 32 registers that traditional Altivec vector
instructions use.  By allowing all of the vector registers to be used,
it reduces the amount of spilling that a large benchmark generated.

Currently the following code:

vector int a, b, c, d;
a = (~ (c & d)) ^ b;

Generates:

vnand  t,c,d
vxor   a,t,b

Now in addition with this patch, if the arguments or result is
allocated to a traditional FPR register, the GCC compiler will now
generate the following code instead of adding vector move instructions:

xxeval a,b,c,225

Since fusion using 2 Altivec instructions is slightly faster than using
the 'XXEVAL' instruction we prefer to generate the Altivec instructions
if we can.  In addition, because 'XXEVAL' is a prefixed instruction, it
possibly might generate an extra NOP instruction to align the 'XXEVAL'
instruction.

I have tested these patches on both big endian and little endian
PowerPC servers, with no regressions.  Can I check these patchs into
the trunk?

2025-07-09  Michael Meissner  

gcc/

PR target/117251
* config/rs6000/fusion.md: Regenerate.
* config/rs6000/genfusion.pl (gen_logical_addsubf): Add support
to generate vector nand => xor fusion if XXEVAL is supported.

Diff:
---
 gcc/config/rs6000/fusion.md| 15 +--
 gcc/config/rs6000/genfusion.pl |  1 +
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md
index 68b52d4f5893..e6d13b38415a 100644
--- a/gcc/config/rs6000/fusion.md
+++ b/gcc/config/rs6000/fusion.md
@@ -3023,20 +3023,23 @@
 ;; logical-logical fusion pattern generated by gen_logical_addsubf
 ;; vector vnand -> vxor
 (define_insn "*fuse_vnand_vxor"
-  [(set (match_operand:VM 3 "altivec_register_operand" "=&0,&1,&v,v")
-(xor:VM (ior:VM (not:VM (match_operand:VM 0 "altivec_register_operand" 
"v,v,v,v"))
-  (not:VM (match_operand:VM 1 
"altivec_register_operand" "v,v,v,v")))
- (match_operand:VM 2 "altivec_register_operand" "v,v,v,v")))
-   (clobber (match_scratch:VM 4 "=X,X,X,&v"))]
+  [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v")
+(xor:VM (ior:VM (not:VM (match_operand:VM 0 "vector_fusion_operand" 
"v,v,v,wa,v"))
+  (not:VM (match_operand:VM 1 "vector_fusion_operand" 
"v,v,v,wa,v")))
+ (match_operand:VM 2 "vector_fusion_operand" "v,v,v,wa,v")))
+   (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))]
   "(TARGET_P10_FUSION)"
   "@
vnand %3,%1,%0\;vxor %3,%3,%2
vnand %3,%1,%0\;vxor %3,%3,%2
vnand %3,%1,%0\;vxor %3,%3,%2
+   xxeval %x3,%x2,%x1,%x0,225
vnand %4,%1,%0\;vxor %3,%4,%2"
   [(set_attr "type" "fused_vector")
(set_attr "cost" "6")
-   (set_attr "length" "8")])
+   (set_attr "length" "8")
+   (set_attr "prefixed" "*,*,*,yes,*")
+   (set_attr "isa" "*,*,*,xxeval,*")])
 
 ;; logical-logical fusion pattern generated by gen_logical_addsubf
 ;; vector vnor -> vxor
diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl
index 56e5d96ec5f3..94eae471c64b 100755
--- a/gcc/config/rs6000/genfusion.pl
+++ b/gcc/config/rs6000/genfusion.pl
@@ -249,6 +249,7 @@ sub gen_logical_addsubf
   "vandc_vnor"  => 208,
   "vandc_veqv"  => 210,
   "vand_vnor"   => 224,
+  "vnand_vxor"  => 225,
 );
 
 KIND: foreach $kind ('scalar','vector') {

[gcc(refs/users/meissner/heads/work214-sha)] PR target/117251: Improve vector andc to vector xor fusion

2025-07-08 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:3eccadbd9e1f7b58074a0a472e944d92856483b3

commit 3eccadbd9e1f7b58074a0a472e944d92856483b3
Author: Michael Meissner 
Date:   Wed Jul 9 01:42:05 2025 -0400

PR target/117251: Improve vector andc to vector xor fusion

See the following post for a complete explanation of what the patches
for PR target/117251:

 * https://gcc.gnu.org/pipermail/gcc-patches/2025-June/686474.html

This is patch #13 of 45 to generate the 'XXEVAL' instruction on power10
and power11 instead of using the Altivec 'VANDC' instruction feeding
into 'VXOR'.  The 'XXEVAL' instruction can use all 64 vector registers,
instead of the 32 registers that traditional Altivec vector
instructions use.  By allowing all of the vector registers to be used,
it reduces the amount of spilling that a large benchmark generated.

Currently the following code:

vector int a, b, c, d;
a = (c & ~ d) ^ b;

Generates:

vandc  t,c,d
vxor   a,t,b

Now in addition with this patch, if the arguments or result is
allocated to a traditional FPR register, the GCC compiler will now
generate the following code instead of adding vector move instructions:

xxeval a,b,c,45

Since fusion using 2 Altivec instructions is slightly faster than using
the 'XXEVAL' instruction we prefer to generate the Altivec instructions
if we can.  In addition, because 'XXEVAL' is a prefixed instruction, it
possibly might generate an extra NOP instruction to align the 'XXEVAL'
instruction.

I have tested these patches on both big endian and little endian
PowerPC servers, with no regressions.  Can I check these patchs into
the trunk?

2025-07-09  Michael Meissner  

gcc/

PR target/117251
* config/rs6000/fusion.md: Regenerate.
* config/rs6000/genfusion.pl (gen_logical_addsubf): Add support
to generate vector andc => xor fusion if XXEVAL is supported.

Diff:
---
 gcc/config/rs6000/fusion.md| 15 +--
 gcc/config/rs6000/genfusion.pl |  1 +
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md
index fccea39d0aae..6e5c88b81b44 100644
--- a/gcc/config/rs6000/fusion.md
+++ b/gcc/config/rs6000/fusion.md
@@ -2933,20 +2933,23 @@
 ;; logical-logical fusion pattern generated by gen_logical_addsubf
 ;; vector vandc -> vxor
 (define_insn "*fuse_vandc_vxor"
-  [(set (match_operand:VM 3 "altivec_register_operand" "=&0,&1,&v,v")
-(xor:VM (and:VM (not:VM (match_operand:VM 0 "altivec_register_operand" 
"v,v,v,v"))
-  (match_operand:VM 1 "altivec_register_operand" 
"v,v,v,v"))
- (match_operand:VM 2 "altivec_register_operand" "v,v,v,v")))
-   (clobber (match_scratch:VM 4 "=X,X,X,&v"))]
+  [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v")
+(xor:VM (and:VM (not:VM (match_operand:VM 0 "vector_fusion_operand" 
"v,v,v,wa,v"))
+  (match_operand:VM 1 "vector_fusion_operand" 
"v,v,v,wa,v"))
+ (match_operand:VM 2 "vector_fusion_operand" "v,v,v,wa,v")))
+   (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))]
   "(TARGET_P10_FUSION)"
   "@
vandc %3,%1,%0\;vxor %3,%3,%2
vandc %3,%1,%0\;vxor %3,%3,%2
vandc %3,%1,%0\;vxor %3,%3,%2
+   xxeval %x3,%x2,%x1,%x0,45
vandc %4,%1,%0\;vxor %3,%4,%2"
   [(set_attr "type" "fused_vector")
(set_attr "cost" "6")
-   (set_attr "length" "8")])
+   (set_attr "length" "8")
+   (set_attr "prefixed" "*,*,*,yes,*")
+   (set_attr "isa" "*,*,*,xxeval,*")])
 
 ;; logical-logical fusion pattern generated by gen_logical_addsubf
 ;; vector veqv -> vxor
diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl
index ab714b10f622..d15208a4ad3e 100755
--- a/gcc/config/rs6000/genfusion.pl
+++ b/gcc/config/rs6000/genfusion.pl
@@ -227,6 +227,7 @@ sub gen_logical_addsubf
   "vnand_vnor"  =>  16,
   "vand_vxor"   =>  30,
   "vand_vor"=>  31,
+  "vandc_vxor"  =>  45,
 );
 
 KIND: foreach $kind ('scalar','vector') {

[gcc(refs/users/meissner/heads/work214-sha)] PR target/117251: Improve vector and to vector and fusion

2025-07-08 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:3b4588e41dc54d01b95e7b05e1f493c402ec39d2

commit 3b4588e41dc54d01b95e7b05e1f493c402ec39d2
Author: Michael Meissner 
Date:   Wed Jul 9 01:32:46 2025 -0400

PR target/117251: Improve vector and to vector and fusion

See the following post for a complete explanation of what the patches
for PR target/117251:

 * https://gcc.gnu.org/pipermail/gcc-patches/2025-June/686474.html

This is patch #1 of 45 to generate the 'XXEVAL' instruction on power10
and power11 instead of using the Altivec 'VAND' instruction feeding
into 'VAND'.  The 'XXEVAL' instruction can use all 64 vector registers,
instead of the 32 registers that traditional Altivec vector
instructions use.  By allowing all of the vector registers to be used,
it reduces the amount of spilling that a large benchmark generated.

Currently the following code:

vector int a, b, c, d;
a = (c & d) & b;

Generates:

vand   t,c,d
vand   a,t,b

Now in addition with this patch, if the arguments or result is
allocated to a traditional FPR register, the GCC compiler will now
generate the following code instead of adding vector move instructions:

xxeval a,b,c,1

Since fusion using 2 Altivec instructions is slightly faster than using
the 'XXEVAL' instruction we prefer to generate the Altivec instructions
if we can.  In addition, because 'XXEVAL' is a prefixed instruction, it
possibly might generate an extra NOP instruction to align the 'XXEVAL'
instruction.

I have tested these patches on both big endian and little endian
PowerPC servers, with no regressions.  Can I check these patchs into
the trunk?

2025-07-09  Michael Meissner  

gcc/

PR target/117251
* config/rs6000/fusion.md: Regenerate.
* config/rs6000/genfusion.pl (gen_logical_addsubf): Add
support to generate vector/vector and/and fusion if XXEVAL is
supported.
* config/rs6000/predicates.md (vector_fusion_operand): New
predicate.
* config/rs6000/rs6000.h (TARGET_XXEVAL): New macro.
* config/rs6000/rs6000.md (isa attribute): Add xxeval.
(enabled attribute): Add support for XXEVAL support.

Diff:
---
 gcc/config/rs6000/fusion.md | 15 ++-
 gcc/config/rs6000/genfusion.pl  | 58 ++---
 gcc/config/rs6000/predicates.md | 12 +
 gcc/config/rs6000/rs6000.h  |  4 +++
 gcc/config/rs6000/rs6000.md |  7 -
 5 files changed, 85 insertions(+), 11 deletions(-)

diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md
index 621b346f9eb9..d24837d68d83 100644
--- a/gcc/config/rs6000/fusion.md
+++ b/gcc/config/rs6000/fusion.md
@@ -1871,20 +1871,23 @@
 ;; logical-logical fusion pattern generated by gen_logical_addsubf
 ;; vector vand -> vand
 (define_insn "*fuse_vand_vand"
-  [(set (match_operand:VM 3 "altivec_register_operand" "=&0,&1,&v,v")
-(and:VM (and:VM (match_operand:VM 0 "altivec_register_operand" 
"v,v,v,v")
-  (match_operand:VM 1 "altivec_register_operand" 
"%v,v,v,v"))
- (match_operand:VM 2 "altivec_register_operand" "v,v,v,v")))
-   (clobber (match_scratch:VM 4 "=X,X,X,&v"))]
+  [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v")
+(and:VM (and:VM (match_operand:VM 0 "vector_fusion_operand" 
"v,v,v,wa,v")
+  (match_operand:VM 1 "vector_fusion_operand" 
"%v,v,v,wa,v"))
+ (match_operand:VM 2 "vector_fusion_operand" "v,v,v,wa,v")))
+   (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))]
   "(TARGET_P10_FUSION)"
   "@
vand %3,%1,%0\;vand %3,%3,%2
vand %3,%1,%0\;vand %3,%3,%2
vand %3,%1,%0\;vand %3,%3,%2
+   xxeval %x3,%x2,%x1,%x0,1
vand %4,%1,%0\;vand %3,%4,%2"
   [(set_attr "type" "fused_vector")
(set_attr "cost" "6")
-   (set_attr "length" "8")])
+   (set_attr "length" "8")
+   (set_attr "prefixed" "*,*,*,yes,*")
+   (set_attr "isa" "*,*,*,xxeval,*")])
 
 ;; logical-logical fusion pattern generated by gen_logical_addsubf
 ;; vector vandc -> vand
diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl
index e5d3b1ee449d..351a4d914a4a 100755
--- a/gcc/config/rs6000/genfusion.pl
+++ b/gcc/config/rs6000/genfusion.pl
@@ -211,25 +211,33 @@ sub gen_logical_addsubf
$inner_comp, $inner_inv, $inner_rtl, $inner_op, $both_commute, $c4,
$bc, $inner_arg0, $inner_arg1, $inner_exp, $outer_arg2, $outer_exp,
$ftype, $insn, $is_subf, $is_rsubf, $outer_32, $outer_42,$outer_name,
-   $fuse_type);
-  KIND: foreach $kind ('scalar','vector') {
+   $fuse_type, $xxeval, $c5, $vect_pred, $vect_inner_arg0, 
$vect_inner_arg1,
+   $vect_inner_exp, $vect_outer_arg2, $vect_outer_exp);
+
+my %xxeval_fusions = (
+  "vand_vand"   =>   1,
+

[gcc(refs/users/meissner/heads/work214-sha)] PR target/117251: Improve vector nor to vector nor fusion

2025-07-08 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:7efaca506ac6bc38aaf6bb1e2c7682c53ce96bb1

commit 7efaca506ac6bc38aaf6bb1e2c7682c53ce96bb1
Author: Michael Meissner 
Date:   Wed Jul 9 01:45:30 2025 -0400

PR target/117251: Improve vector nor to vector nor fusion

See the following post for a complete explanation of what the patches
for PR target/117251:

 * https://gcc.gnu.org/pipermail/gcc-patches/2025-June/686474.html

This is patch #21 of 45 to generate the 'XXEVAL' instruction on power10
and power11 instead of using the Altivec 'VNOR' instruction feeding
into 'VNOR'.  The 'XXEVAL' instruction can use all 64 vector registers,
instead of the 32 registers that traditional Altivec vector
instructions use.  By allowing all of the vector registers to be used,
it reduces the amount of spilling that a large benchmark generated.

Currently the following code:

vector int a, b, c, d;
a = ~ ((~ (c | d)) | b);

Generates:

vnor   t,c,d
vnor   a,t,b

Now in addition with this patch, if the arguments or result is
allocated to a traditional FPR register, the GCC compiler will now
generate the following code instead of adding vector move instructions:

xxeval a,b,c,112

Since fusion using 2 Altivec instructions is slightly faster than using
the 'XXEVAL' instruction we prefer to generate the Altivec instructions
if we can.  In addition, because 'XXEVAL' is a prefixed instruction, it
possibly might generate an extra NOP instruction to align the 'XXEVAL'
instruction.

I have tested these patches on both big endian and little endian
PowerPC servers, with no regressions.  Can I check these patchs into
the trunk?

2025-07-09  Michael Meissner  

gcc/

PR target/117251
* config/rs6000/fusion.md: Regenerate.
* config/rs6000/genfusion.pl (gen_logical_addsubf): Add support
to generate vector nor => nor fusion if XXEVAL is supported.

Diff:
---
 gcc/config/rs6000/fusion.md| 15 +--
 gcc/config/rs6000/genfusion.pl |  1 +
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md
index 1d4b3c970c7f..032c87ac5765 100644
--- a/gcc/config/rs6000/fusion.md
+++ b/gcc/config/rs6000/fusion.md
@@ -2555,20 +2555,23 @@
 ;; logical-logical fusion pattern generated by gen_logical_addsubf
 ;; vector vnor -> vnor
 (define_insn "*fuse_vnor_vnor"
-  [(set (match_operand:VM 3 "altivec_register_operand" "=&0,&1,&v,v")
-(and:VM (not:VM (and:VM (not:VM (match_operand:VM 0 
"altivec_register_operand" "v,v,v,v"))
-  (not:VM (match_operand:VM 1 
"altivec_register_operand" "v,v,v,v"
- (not:VM (match_operand:VM 2 "altivec_register_operand" 
"v,v,v,v"
-   (clobber (match_scratch:VM 4 "=X,X,X,&v"))]
+  [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v")
+(and:VM (not:VM (and:VM (not:VM (match_operand:VM 0 
"vector_fusion_operand" "v,v,v,wa,v"))
+  (not:VM (match_operand:VM 1 "vector_fusion_operand" 
"v,v,v,wa,v"
+ (not:VM (match_operand:VM 2 "vector_fusion_operand" 
"v,v,v,wa,v"
+   (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))]
   "(TARGET_P10_FUSION)"
   "@
vnor %3,%1,%0\;vnor %3,%3,%2
vnor %3,%1,%0\;vnor %3,%3,%2
vnor %3,%1,%0\;vnor %3,%3,%2
+   xxeval %x3,%x2,%x1,%x0,112
vnor %4,%1,%0\;vnor %3,%4,%2"
   [(set_attr "type" "fused_vector")
(set_attr "cost" "6")
-   (set_attr "length" "8")])
+   (set_attr "length" "8")
+   (set_attr "prefixed" "*,*,*,yes,*")
+   (set_attr "isa" "*,*,*,xxeval,*")])
 
 ;; logical-logical fusion pattern generated by gen_logical_addsubf
 ;; vector vor -> vnor
diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl
index 4ec38beccb9c..6af4c5d7a182 100755
--- a/gcc/config/rs6000/genfusion.pl
+++ b/gcc/config/rs6000/genfusion.pl
@@ -235,6 +235,7 @@ sub gen_logical_addsubf
   "veqv_vnor"   =>  96,
   "vxor_vxor"   => 105,
   "vxor_vor"=> 111,
+  "vnor_vnor"   => 112,
 );
 
 KIND: foreach $kind ('scalar','vector') {

[gcc(refs/users/meissner/heads/work214-sha)] PR target/117251: Improve vector nand to vector nand fusion

2025-07-08 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:a0c683836fd1305ce11ced99025a60ab877d3613

commit a0c683836fd1305ce11ced99025a60ab877d3613
Author: Michael Meissner 
Date:   Wed Jul 9 01:53:07 2025 -0400

PR target/117251: Improve vector nand to vector nand fusion

See the following post for a complete explanation of what the patches
for PR target/117251:

 * https://gcc.gnu.org/pipermail/gcc-patches/2025-June/686474.html

This is patch #37 of 45 to generate the 'XXEVAL' instruction on power10
and power11 instead of using the Altivec 'VNAND' instruction feeding
into 'VNAND'.  The 'XXEVAL' instruction can use all 64 vector
registers, instead of the 32 registers that traditional Altivec vector
instructions use.  By allowing all of the vector registers to be used,
it reduces the amount of spilling that a large benchmark generated.

Currently the following code:

vector int a, b, c, d;
a = ~ ((~ (c & d)) & b);

Generates:

vnand  t,c,d
vnand  a,t,b

Now in addition with this patch, if the arguments or result is
allocated to a traditional FPR register, the GCC compiler will now
generate the following code instead of adding vector move instructions:

xxeval a,b,c,241

Since fusion using 2 Altivec instructions is slightly faster than using
the 'XXEVAL' instruction we prefer to generate the Altivec instructions
if we can.  In addition, because 'XXEVAL' is a prefixed instruction, it
possibly might generate an extra NOP instruction to align the 'XXEVAL'
instruction.

I have tested these patches on both big endian and little endian
PowerPC servers, with no regressions.  Can I check these patchs into
the trunk?

2025-07-09  Michael Meissner  

gcc/

PR target/117251
* config/rs6000/fusion.md: Regenerate.
* config/rs6000/genfusion.pl (gen_logical_addsubf): Add support
to generate vector nand => nand fusion if XXEVAL is supported.

Diff:
---
 gcc/config/rs6000/fusion.md| 15 +--
 gcc/config/rs6000/genfusion.pl |  1 +
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md
index ba3a5a52b990..241b8a494fb1 100644
--- a/gcc/config/rs6000/fusion.md
+++ b/gcc/config/rs6000/fusion.md
@@ -2390,20 +2390,23 @@
 ;; logical-logical fusion pattern generated by gen_logical_addsubf
 ;; vector vnand -> vnand
 (define_insn "*fuse_vnand_vnand"
-  [(set (match_operand:VM 3 "altivec_register_operand" "=&0,&1,&v,v")
-(ior:VM (not:VM (ior:VM (not:VM (match_operand:VM 0 
"altivec_register_operand" "v,v,v,v"))
-  (not:VM (match_operand:VM 1 
"altivec_register_operand" "v,v,v,v"
- (not:VM (match_operand:VM 2 "altivec_register_operand" 
"v,v,v,v"
-   (clobber (match_scratch:VM 4 "=X,X,X,&v"))]
+  [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v")
+(ior:VM (not:VM (ior:VM (not:VM (match_operand:VM 0 
"vector_fusion_operand" "v,v,v,wa,v"))
+  (not:VM (match_operand:VM 1 "vector_fusion_operand" 
"v,v,v,wa,v"
+ (not:VM (match_operand:VM 2 "vector_fusion_operand" 
"v,v,v,wa,v"
+   (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))]
   "(TARGET_P10_FUSION)"
   "@
vnand %3,%1,%0\;vnand %3,%3,%2
vnand %3,%1,%0\;vnand %3,%3,%2
vnand %3,%1,%0\;vnand %3,%3,%2
+   xxeval %x3,%x2,%x1,%x0,241
vnand %4,%1,%0\;vnand %3,%4,%2"
   [(set_attr "type" "fused_vector")
(set_attr "cost" "6")
-   (set_attr "length" "8")])
+   (set_attr "length" "8")
+   (set_attr "prefixed" "*,*,*,yes,*")
+   (set_attr "isa" "*,*,*,xxeval,*")])
 
 ;; logical-logical fusion pattern generated by gen_logical_addsubf
 ;; vector vnor -> vnand
diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl
index 54699d199fc5..728a447c65a9 100755
--- a/gcc/config/rs6000/genfusion.pl
+++ b/gcc/config/rs6000/genfusion.pl
@@ -251,6 +251,7 @@ sub gen_logical_addsubf
   "vand_vnor"   => 224,
   "vnand_vxor"  => 225,
   "vnand_vor"   => 239,
+  "vnand_vnand" => 241,
 );
 
 KIND: foreach $kind ('scalar','vector') {

[gcc(refs/users/meissner/heads/work214-sha)] PR target/117251: Improve vector orc to vector xor fusion

2025-07-08 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:b0ea39c1cc4b31a7e093836d7e2f7177a840ab51

commit b0ea39c1cc4b31a7e093836d7e2f7177a840ab51
Author: Michael Meissner 
Date:   Wed Jul 9 01:50:20 2025 -0400

PR target/117251: Improve vector orc to vector xor fusion

See the following post for a complete explanation of what the patches
for PR target/117251:

 * https://gcc.gnu.org/pipermail/gcc-patches/2025-June/686474.html

This is patch #30 of 45 to generate the 'XXEVAL' instruction on power10
and power11 instead of using the Altivec 'VORC' instruction feeding
into 'VXOR'.  The 'XXEVAL' instruction can use all 64 vector registers,
instead of the 32 registers that traditional Altivec vector
instructions use.  By allowing all of the vector registers to be used,
it reduces the amount of spilling that a large benchmark generated.

Currently the following code:

vector int a, b, c, d;
a = (c | ~ d) ^ b;

Generates:

vorc   t,c,d
vxor   a,t,b

Now in addition with this patch, if the arguments or result is
allocated to a traditional FPR register, the GCC compiler will now
generate the following code instead of adding vector move instructions:

xxeval a,b,c,180

Since fusion using 2 Altivec instructions is slightly faster than using
the 'XXEVAL' instruction we prefer to generate the Altivec instructions
if we can.  In addition, because 'XXEVAL' is a prefixed instruction, it
possibly might generate an extra NOP instruction to align the 'XXEVAL'
instruction.

I have tested these patches on both big endian and little endian
PowerPC servers, with no regressions.  Can I check these patchs into
the trunk?

2025-07-09  Michael Meissner  

gcc/

PR target/117251
* config/rs6000/fusion.md: Regenerate.
* config/rs6000/genfusion.pl (gen_logical_addsubf): Add support
to generate vector orc => xor fusion if XXEVAL is supported.

Diff:
---
 gcc/config/rs6000/fusion.md| 15 +--
 gcc/config/rs6000/genfusion.pl |  1 +
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md
index cb1ad8b4c0cc..3d7e6502b027 100644
--- a/gcc/config/rs6000/fusion.md
+++ b/gcc/config/rs6000/fusion.md
@@ -3071,20 +3071,23 @@
 ;; logical-logical fusion pattern generated by gen_logical_addsubf
 ;; vector vorc -> vxor
 (define_insn "*fuse_vorc_vxor"
-  [(set (match_operand:VM 3 "altivec_register_operand" "=&0,&1,&v,v")
-(xor:VM (ior:VM (not:VM (match_operand:VM 0 "altivec_register_operand" 
"v,v,v,v"))
-  (match_operand:VM 1 "altivec_register_operand" 
"v,v,v,v"))
- (match_operand:VM 2 "altivec_register_operand" "v,v,v,v")))
-   (clobber (match_scratch:VM 4 "=X,X,X,&v"))]
+  [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v")
+(xor:VM (ior:VM (not:VM (match_operand:VM 0 "vector_fusion_operand" 
"v,v,v,wa,v"))
+  (match_operand:VM 1 "vector_fusion_operand" 
"v,v,v,wa,v"))
+ (match_operand:VM 2 "vector_fusion_operand" "v,v,v,wa,v")))
+   (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))]
   "(TARGET_P10_FUSION)"
   "@
vorc %3,%1,%0\;vxor %3,%3,%2
vorc %3,%1,%0\;vxor %3,%3,%2
vorc %3,%1,%0\;vxor %3,%3,%2
+   xxeval %x3,%x2,%x1,%x0,180
vorc %4,%1,%0\;vxor %3,%4,%2"
   [(set_attr "type" "fused_vector")
(set_attr "cost" "6")
-   (set_attr "length" "8")])
+   (set_attr "length" "8")
+   (set_attr "prefixed" "*,*,*,yes,*")
+   (set_attr "isa" "*,*,*,xxeval,*")])
 
 ;; logical-logical fusion pattern generated by gen_logical_addsubf
 ;; vector vxor -> vxor
diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl
index 9400aed267a6..15f931baad33 100755
--- a/gcc/config/rs6000/genfusion.pl
+++ b/gcc/config/rs6000/genfusion.pl
@@ -244,6 +244,7 @@ sub gen_logical_addsubf
   "vxor_vnor"   => 144,
   "veqv_vxor"   => 150,
   "veqv_vor"=> 159,
+  "vorc_vxor"   => 180,
 );
 
 KIND: foreach $kind ('scalar','vector') {

[gcc(refs/users/meissner/heads/work214-sha)] PR target/117251: Improve vector nor to vector or fusion

2025-07-08 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:17962cd3612d48d390869c72e8db1ad907a032d0

commit 17962cd3612d48d390869c72e8db1ad907a032d0
Author: Michael Meissner 
Date:   Wed Jul 9 01:48:00 2025 -0400

PR target/117251: Improve vector nor to vector or fusion

See the following post for a complete explanation of what the patches
for PR target/117251:

 * https://gcc.gnu.org/pipermail/gcc-patches/2025-June/686474.html

This is patch #26 of 45 to generate the 'XXEVAL' instruction on power10
and power11 instead of using the Altivec 'VNOR' instruction feeding
into 'VOR'.  The 'XXEVAL' instruction can use all 64 vector registers,
instead of the 32 registers that traditional Altivec vector
instructions use.  By allowing all of the vector registers to be used,
it reduces the amount of spilling that a large benchmark generated.

Currently the following code:

vector int a, b, c, d;
a = (~ (c | d)) | b;

Generates:

vnor   t,c,d
vora,t,b

Now in addition with this patch, if the arguments or result is
allocated to a traditional FPR register, the GCC compiler will now
generate the following code instead of adding vector move instructions:

xxeval a,b,c,143

Since fusion using 2 Altivec instructions is slightly faster than using
the 'XXEVAL' instruction we prefer to generate the Altivec instructions
if we can.  In addition, because 'XXEVAL' is a prefixed instruction, it
possibly might generate an extra NOP instruction to align the 'XXEVAL'
instruction.

I have tested these patches on both big endian and little endian
PowerPC servers, with no regressions.  Can I check these patchs into
the trunk?

2025-07-09  Michael Meissner  

gcc/

PR target/117251
* config/rs6000/fusion.md: Regenerate.
* config/rs6000/genfusion.pl (gen_logical_addsubf): Add support
to generate vector nor => or fusion if XXEVAL is supported.

Diff:
---
 gcc/config/rs6000/fusion.md| 15 +--
 gcc/config/rs6000/genfusion.pl |  1 +
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md
index 1f1756dbe63e..66d98f4537e1 100644
--- a/gcc/config/rs6000/fusion.md
+++ b/gcc/config/rs6000/fusion.md
@@ -2714,20 +2714,23 @@
 ;; logical-logical fusion pattern generated by gen_logical_addsubf
 ;; vector vnor -> vor
 (define_insn "*fuse_vnor_vor"
-  [(set (match_operand:VM 3 "altivec_register_operand" "=&0,&1,&v,v")
-(ior:VM (and:VM (not:VM (match_operand:VM 0 "altivec_register_operand" 
"v,v,v,v"))
-  (not:VM (match_operand:VM 1 
"altivec_register_operand" "v,v,v,v")))
- (match_operand:VM 2 "altivec_register_operand" "v,v,v,v")))
-   (clobber (match_scratch:VM 4 "=X,X,X,&v"))]
+  [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v")
+(ior:VM (and:VM (not:VM (match_operand:VM 0 "vector_fusion_operand" 
"v,v,v,wa,v"))
+  (not:VM (match_operand:VM 1 "vector_fusion_operand" 
"v,v,v,wa,v")))
+ (match_operand:VM 2 "vector_fusion_operand" "v,v,v,wa,v")))
+   (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))]
   "(TARGET_P10_FUSION)"
   "@
vnor %3,%1,%0\;vor %3,%3,%2
vnor %3,%1,%0\;vor %3,%3,%2
vnor %3,%1,%0\;vor %3,%3,%2
+   xxeval %x3,%x2,%x1,%x0,143
vnor %4,%1,%0\;vor %3,%4,%2"
   [(set_attr "type" "fused_vector")
(set_attr "cost" "6")
-   (set_attr "length" "8")])
+   (set_attr "length" "8")
+   (set_attr "prefixed" "*,*,*,yes,*")
+   (set_attr "isa" "*,*,*,xxeval,*")])
 
 ;; logical-logical fusion pattern generated by gen_logical_addsubf
 ;; vector vor -> vor
diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl
index 0fea2d6d8482..98b56b788f03 100755
--- a/gcc/config/rs6000/genfusion.pl
+++ b/gcc/config/rs6000/genfusion.pl
@@ -240,6 +240,7 @@ sub gen_logical_addsubf
   "vor_vor" => 127,
   "vor_vnor"=> 128,
   "vnor_vxor"   => 135,
+  "vnor_vor"=> 143,
 );
 
 KIND: foreach $kind ('scalar','vector') {

[gcc(refs/users/meissner/heads/work214-sha)] PR target/117251: Improve vector orc to vector or fusion

2025-07-08 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:2d67f441ab51f9656529ebbce0765685a09d85df

commit 2d67f441ab51f9656529ebbce0765685a09d85df
Author: Michael Meissner 
Date:   Wed Jul 9 01:50:40 2025 -0400

PR target/117251: Improve vector orc to vector or fusion

See the following post for a complete explanation of what the patches
for PR target/117251:

 * https://gcc.gnu.org/pipermail/gcc-patches/2025-June/686474.html

This is patch #31 of 45 to generate the 'XXEVAL' instruction on power10
and power11 instead of using the Altivec 'VORC' instruction feeding
into 'VOR'.  The 'XXEVAL' instruction can use all 64 vector registers,
instead of the 32 registers that traditional Altivec vector
instructions use.  By allowing all of the vector registers to be used,
it reduces the amount of spilling that a large benchmark generated.

Currently the following code:

vector int a, b, c, d;
a = (c | ~ d) | b;

Generates:

vorc   t,c,d
vora,t,b

Now in addition with this patch, if the arguments or result is
allocated to a traditional FPR register, the GCC compiler will now
generate the following code instead of adding vector move instructions:

xxeval a,b,c,191

Since fusion using 2 Altivec instructions is slightly faster than using
the 'XXEVAL' instruction we prefer to generate the Altivec instructions
if we can.  In addition, because 'XXEVAL' is a prefixed instruction, it
possibly might generate an extra NOP instruction to align the 'XXEVAL'
instruction.

I have tested these patches on both big endian and little endian
PowerPC servers, with no regressions.  Can I check these patchs into
the trunk?

2025-07-09  Michael Meissner  

gcc/

PR target/117251
* config/rs6000/fusion.md: Regenerate.
* config/rs6000/genfusion.pl (gen_logical_addsubf): Add support
to generate vector orc => or fusion if XXEVAL is supported.

Diff:
---
 gcc/config/rs6000/fusion.md| 15 +--
 gcc/config/rs6000/genfusion.pl |  1 +
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md
index 3d7e6502b027..f6dc26e9c1f2 100644
--- a/gcc/config/rs6000/fusion.md
+++ b/gcc/config/rs6000/fusion.md
@@ -2762,20 +2762,23 @@
 ;; logical-logical fusion pattern generated by gen_logical_addsubf
 ;; vector vorc -> vor
 (define_insn "*fuse_vorc_vor"
-  [(set (match_operand:VM 3 "altivec_register_operand" "=&0,&1,&v,v")
-(ior:VM (ior:VM (not:VM (match_operand:VM 0 "altivec_register_operand" 
"v,v,v,v"))
-  (match_operand:VM 1 "altivec_register_operand" 
"v,v,v,v"))
- (match_operand:VM 2 "altivec_register_operand" "v,v,v,v")))
-   (clobber (match_scratch:VM 4 "=X,X,X,&v"))]
+  [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v")
+(ior:VM (ior:VM (not:VM (match_operand:VM 0 "vector_fusion_operand" 
"v,v,v,wa,v"))
+  (match_operand:VM 1 "vector_fusion_operand" 
"v,v,v,wa,v"))
+ (match_operand:VM 2 "vector_fusion_operand" "v,v,v,wa,v")))
+   (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))]
   "(TARGET_P10_FUSION)"
   "@
vorc %3,%1,%0\;vor %3,%3,%2
vorc %3,%1,%0\;vor %3,%3,%2
vorc %3,%1,%0\;vor %3,%3,%2
+   xxeval %x3,%x2,%x1,%x0,191
vorc %4,%1,%0\;vor %3,%4,%2"
   [(set_attr "type" "fused_vector")
(set_attr "cost" "6")
-   (set_attr "length" "8")])
+   (set_attr "length" "8")
+   (set_attr "prefixed" "*,*,*,yes,*")
+   (set_attr "isa" "*,*,*,xxeval,*")])
 
 ;; logical-logical fusion pattern generated by gen_logical_addsubf
 ;; vector vxor -> vor
diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl
index 15f931baad33..62f2b9e36d89 100755
--- a/gcc/config/rs6000/genfusion.pl
+++ b/gcc/config/rs6000/genfusion.pl
@@ -245,6 +245,7 @@ sub gen_logical_addsubf
   "veqv_vxor"   => 150,
   "veqv_vor"=> 159,
   "vorc_vxor"   => 180,
+  "vorc_vor"=> 191,
 );
 
 KIND: foreach $kind ('scalar','vector') {

[gcc(refs/users/meissner/heads/work214-sha)] PR target/117251: Improve vector andc to vector and fusion

2025-07-08 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:a2db53e127608d6861d6e5ec15ca2004d47c5df1

commit a2db53e127608d6861d6e5ec15ca2004d47c5df1
Author: Michael Meissner 
Date:   Wed Jul 9 01:34:45 2025 -0400

PR target/117251: Improve vector andc to vector and fusion

See the following post for a complete explanation of what the patches
for PR target/117251:

 * https://gcc.gnu.org/pipermail/gcc-patches/2025-June/686474.html

This is patch #2 of 45 to generate the 'XXEVAL' instruction on power10
and power11 instead of using the Altivec 'VANDC' instruction feeding
into 'VAND'.  The 'XXEVAL' instruction can use all 64 vector registers,
instead of the 32 registers that traditional Altivec vector
instructions use.  By allowing all of the vector registers to be used,
it reduces the amount of spilling that a large benchmark generated.

Currently the following code:

vector int a, b, c, d;
a = (c & ~ d) & b;

Generates:

vandc  t,c,d
vand   a,t,b

Now in addition with this patch, if the arguments or result is
allocated to a traditional FPR register, the GCC compiler will now
generate the following code instead of adding vector move instructions:

xxeval a,b,c,2

Since fusion using 2 Altivec instructions is slightly faster than using
the 'XXEVAL' instruction we prefer to generate the Altivec instructions
if we can.  In addition, because 'XXEVAL' is a prefixed instruction, it
possibly might generate an extra NOP instruction to align the 'XXEVAL'
instruction.

I have tested these patches on both big endian and little endian
PowerPC servers, with no regressions.  Can I check these patchs into
the trunk?

2025-07-09  Michael Meissner  

gcc/

PR target/117251
* config/rs6000/fusion.md: Regenerate.
* config/rs6000/genfusion.pl (gen_logical_addsubf): Add support
to generate vector/vector andc/and fusion if XXEVAL is
supported.

Diff:
---
 gcc/config/rs6000/fusion.md| 15 +--
 gcc/config/rs6000/genfusion.pl |  1 +
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md
index d24837d68d83..b9590b6d1104 100644
--- a/gcc/config/rs6000/fusion.md
+++ b/gcc/config/rs6000/fusion.md
@@ -1892,20 +1892,23 @@
 ;; logical-logical fusion pattern generated by gen_logical_addsubf
 ;; vector vandc -> vand
 (define_insn "*fuse_vandc_vand"
-  [(set (match_operand:VM 3 "altivec_register_operand" "=&0,&1,&v,v")
-(and:VM (and:VM (not:VM (match_operand:VM 0 "altivec_register_operand" 
"v,v,v,v"))
-  (match_operand:VM 1 "altivec_register_operand" 
"v,v,v,v"))
- (match_operand:VM 2 "altivec_register_operand" "v,v,v,v")))
-   (clobber (match_scratch:VM 4 "=X,X,X,&v"))]
+  [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v")
+(and:VM (and:VM (not:VM (match_operand:VM 0 "vector_fusion_operand" 
"v,v,v,wa,v"))
+  (match_operand:VM 1 "vector_fusion_operand" 
"v,v,v,wa,v"))
+ (match_operand:VM 2 "vector_fusion_operand" "v,v,v,wa,v")))
+   (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))]
   "(TARGET_P10_FUSION)"
   "@
vandc %3,%1,%0\;vand %3,%3,%2
vandc %3,%1,%0\;vand %3,%3,%2
vandc %3,%1,%0\;vand %3,%3,%2
+   xxeval %x3,%x2,%x1,%x0,2
vandc %4,%1,%0\;vand %3,%4,%2"
   [(set_attr "type" "fused_vector")
(set_attr "cost" "6")
-   (set_attr "length" "8")])
+   (set_attr "length" "8")
+   (set_attr "prefixed" "*,*,*,yes,*")
+   (set_attr "isa" "*,*,*,xxeval,*")])
 
 ;; logical-logical fusion pattern generated by gen_logical_addsubf
 ;; vector veqv -> vand
diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl
index 351a4d914a4a..23adf98c4056 100755
--- a/gcc/config/rs6000/genfusion.pl
+++ b/gcc/config/rs6000/genfusion.pl
@@ -216,6 +216,7 @@ sub gen_logical_addsubf
 
 my %xxeval_fusions = (
   "vand_vand"   =>   1,
+  "vandc_vand"  =>   2,
 );
 
 KIND: foreach $kind ('scalar','vector') {

[gcc(refs/users/meissner/heads/work214-sha)] PR target/117251: Add tests

2025-07-08 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:30dd10b26786dce5e5eb27940ee5290ecc7378df

commit 30dd10b26786dce5e5eb27940ee5290ecc7378df
Author: Michael Meissner 
Date:   Wed Jul 9 01:56:14 2025 -0400

PR target/117251: Add tests

This is patch #45 of 45 to generate the 'XXEVAL' instruction on power10
and power11 instead of using the Altivec 'VAND' instruction feeding
into 'VNAND'.  The 'XXEVAL' instruction can use all 64 vector
registers, instead of the 32 registers that traditional Altivec vector
instructions use.  By allowing all of the vector registers to be used,
it reduces the amount of spilling that a large benchmark generated.

This patch adds the tests for generating 'XXEVAL' to the testsuite.

I have tested these patches on both big endian and little endian
PowerPC servers, with no regressions.  Can I check these patchs into
the trunk?

2025-07-09  Michael Meissner  

gcc/testsuite/

PR target/117251
* gcc.target/powerpc/p10-vector-fused-1.c: New test.
* gcc.target/powerpc/p10-vector-fused-2.c: Likewise.

Diff:
---
 .../gcc.target/powerpc/p10-vector-fused-1.c| 409 +
 .../gcc.target/powerpc/p10-vector-fused-2.c| 936 +
 2 files changed, 1345 insertions(+)

diff --git a/gcc/testsuite/gcc.target/powerpc/p10-vector-fused-1.c 
b/gcc/testsuite/gcc.target/powerpc/p10-vector-fused-1.c
new file mode 100644
index ..28e0874b3454
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/p10-vector-fused-1.c
@@ -0,0 +1,409 @@
+/* { dg-do run } */
+/* { dg-require-effective-target power10_hw } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+
+/* Generate and check most of the vector logical instruction combinations that
+   may or may not generate xxeval to do a fused operation on power10.  */
+
+#include 
+#include 
+#include 
+
+#ifdef DEBUG
+#include 
+
+static int errors = 0;
+static int tests  = 0;
+#endif
+
+typedef vector unsigned intvector_t;
+typedef unsigned int   scalar_t;
+
+/* Vector logical functions.  */
+static inline vector_t
+vector_and (vector_t x, vector_t y)
+{
+  return x & y;
+}
+
+static inline vector_t
+vector_or (vector_t x, vector_t y)
+{
+  return x | y;
+}
+
+static inline vector_t
+vector_xor (vector_t x, vector_t y)
+{
+  return x ^ y;
+}
+
+static inline vector_t
+vector_andc (vector_t x, vector_t y)
+{
+  return x & ~y;
+}
+
+static inline vector_t
+vector_orc (vector_t x, vector_t y)
+{
+  return x | ~y;
+}
+
+static inline vector_t
+vector_nand (vector_t x, vector_t y)
+{
+  return ~(x & y);
+}
+
+static inline vector_t
+vector_nor (vector_t x, vector_t y)
+{
+  return ~(x | y);
+}
+
+static inline vector_t
+vector_eqv (vector_t x, vector_t y)
+{
+  return ~(x ^ y);
+}
+
+/* Scalar logical functions.  */
+static inline scalar_t
+scalar_and (scalar_t x, scalar_t y)
+{
+  return x & y;
+}
+
+static inline scalar_t
+scalar_or (scalar_t x, scalar_t y)
+{
+  return x | y;
+}
+
+static inline scalar_t
+scalar_xor (scalar_t x, scalar_t y)
+{
+  return x ^ y;
+}
+
+static inline scalar_t
+scalar_andc (scalar_t x, scalar_t y)
+{
+  return x & ~y;
+}
+
+static inline scalar_t
+scalar_orc (scalar_t x, scalar_t y)
+{
+  return x | ~y;
+}
+
+static inline scalar_t
+scalar_nand (scalar_t x, scalar_t y)
+{
+  return ~(x & y);
+}
+
+static inline scalar_t
+scalar_nor (scalar_t x, scalar_t y)
+{
+  return ~(x | y);
+}
+
+static inline scalar_t
+scalar_eqv (scalar_t x, scalar_t y)
+{
+  return ~(x ^ y);
+}
+
+
+/*
+ * Generate one function for each combination that we are checking.  Do 4
+ * operations:
+ *
+ * Use FPR regs that should generate either XXEVAL or XXL* insns;
+ * Use Altivec registers than may generated fused V* insns;
+ * Use VSX registers, insure fusing it not done via asm; (and)
+ * Use GPR registers on scalar operations.
+ */
+
+#ifdef DEBUG
+#define TRACE(INNER, OUTER)\
+  do { \
+tests++;   \
+printf ("%s_%s\n", INNER, OUTER);  \
+fflush (stdout);   \
+  } while (0)  \
+
+#define FAILED(INNER, OUTER)   \
+  do { \
+errors++;  \
+printf ("%s_%s failed\n", INNER, OUTER);   \
+fflush (stdout);   \
+  } while (0)  \
+
+#else
+#define TRACE(INNER, OUTER)
+#define FAILED(INNER, OUTER)   abort ()
+#endif
+
+#define FUSED_FUNC(INNER, OUTER)   \
+static void

[gcc(refs/users/meissner/heads/work214-sha)] PR target/117251: Improve vector and to vector xor fusion

2025-07-08 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:a90b97fd171aebdc46108bce9d85e5207ddf0334

commit a90b97fd171aebdc46108bce9d85e5207ddf0334
Author: Michael Meissner 
Date:   Wed Jul 9 01:40:26 2025 -0400

PR target/117251: Improve vector and to vector xor fusion

See the following post for a complete explanation of what the patches
for PR target/117251:

 * https://gcc.gnu.org/pipermail/gcc-patches/2025-June/686474.html

This is patch #11 of 45 to generate the 'XXEVAL' instruction on power10
and power11 instead of using the Altivec 'VAND' instruction feeding
into 'VXOR'.  The 'XXEVAL' instruction can use all 64 vector registers,
instead of the 32 registers that traditional Altivec vector
instructions use.  By allowing all of the vector registers to be used,
it reduces the amount of spilling that a large benchmark generated.

Currently the following code:

vector int a, b, c, d;
a = (c & d) ^ b;

Generates:

vand   t,c,d
vxor   a,t,b

Now in addition with this patch, if the arguments or result is
allocated to a traditional FPR register, the GCC compiler will now
generate the following code instead of adding vector move instructions:

xxeval a,b,c,30

Since fusion using 2 Altivec instructions is slightly faster than using
the 'XXEVAL' instruction we prefer to generate the Altivec instructions
if we can.  In addition, because 'XXEVAL' is a prefixed instruction, it
possibly might generate an extra NOP instruction to align the 'XXEVAL'
instruction.

I have tested these patches on both big endian and little endian
PowerPC servers, with no regressions.  Can I check these patchs into
the trunk?

2025-07-09  Michael Meissner  

gcc/

PR target/117251
* config/rs6000/fusion.md: Regenerate.
* config/rs6000/genfusion.pl (gen_logical_addsubf): Add support
to generate vector/vector and/xor fusion if XXEVAL is
supported.

Diff:
---
 gcc/config/rs6000/fusion.md| 15 +--
 gcc/config/rs6000/genfusion.pl |  1 +
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md
index c8a27a9e5471..789a4d592419 100644
--- a/gcc/config/rs6000/fusion.md
+++ b/gcc/config/rs6000/fusion.md
@@ -2909,20 +2909,23 @@
 ;; logical-logical fusion pattern generated by gen_logical_addsubf
 ;; vector vand -> vxor
 (define_insn "*fuse_vand_vxor"
-  [(set (match_operand:VM 3 "altivec_register_operand" "=&0,&1,&v,v")
-(xor:VM (and:VM (match_operand:VM 0 "altivec_register_operand" 
"v,v,v,v")
-  (match_operand:VM 1 "altivec_register_operand" 
"v,v,v,v"))
- (match_operand:VM 2 "altivec_register_operand" "v,v,v,v")))
-   (clobber (match_scratch:VM 4 "=X,X,X,&v"))]
+  [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v")
+(xor:VM (and:VM (match_operand:VM 0 "vector_fusion_operand" 
"v,v,v,wa,v")
+  (match_operand:VM 1 "vector_fusion_operand" 
"v,v,v,wa,v"))
+ (match_operand:VM 2 "vector_fusion_operand" "v,v,v,wa,v")))
+   (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))]
   "(TARGET_P10_FUSION)"
   "@
vand %3,%1,%0\;vxor %3,%3,%2
vand %3,%1,%0\;vxor %3,%3,%2
vand %3,%1,%0\;vxor %3,%3,%2
+   xxeval %x3,%x2,%x1,%x0,30
vand %4,%1,%0\;vxor %3,%4,%2"
   [(set_attr "type" "fused_vector")
(set_attr "cost" "6")
-   (set_attr "length" "8")])
+   (set_attr "length" "8")
+   (set_attr "prefixed" "*,*,*,yes,*")
+   (set_attr "isa" "*,*,*,xxeval,*")])
 
 ;; logical-logical fusion pattern generated by gen_logical_addsubf
 ;; vector vandc -> vxor
diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl
index 078bc6ca0dab..e6d44d430b3a 100755
--- a/gcc/config/rs6000/genfusion.pl
+++ b/gcc/config/rs6000/genfusion.pl
@@ -225,6 +225,7 @@ sub gen_logical_addsubf
   "vandc_vandc" =>  13,
   "vnand_vand"  =>  14,
   "vnand_vnor"  =>  16,
+  "vand_vxor"   =>  30,
 );
 
 KIND: foreach $kind ('scalar','vector') {

[gcc(refs/users/meissner/heads/work214-sha)] PR target/117251: Improve vector or to vector nand fusion

2025-07-08 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:59d1bcfe4713965941e6eec13b540c17c7ff1a14

commit 59d1bcfe4713965941e6eec13b540c17c7ff1a14
Author: Michael Meissner 
Date:   Wed Jul 9 01:54:45 2025 -0400

PR target/117251: Improve vector or to vector nand fusion

See the following post for a complete explanation of what the patches
for PR target/117251:

 * https://gcc.gnu.org/pipermail/gcc-patches/2025-June/686474.html

This is patch #41 of 45 to generate the 'XXEVAL' instruction on power10
and power11 instead of using the Altivec 'VOR' instruction feeding into
'VNAND'.  The 'XXEVAL' instruction can use all 64 vector registers,
instead of the 32 registers that traditional Altivec vector
instructions use.  By allowing all of the vector registers to be used,
it reduces the amount of spilling that a large benchmark generated.

Currently the following code:

vector int a, b, c, d;
a = ~ ((c | d) & b);

Generates:

vort,c,d
vnand  a,t,b

Now in addition with this patch, if the arguments or result is
allocated to a traditional FPR register, the GCC compiler will now
generate the following code instead of adding vector move instructions:

xxeval a,b,c,248

Since fusion using 2 Altivec instructions is slightly faster than using
the 'XXEVAL' instruction we prefer to generate the Altivec instructions
if we can.  In addition, because 'XXEVAL' is a prefixed instruction, it
possibly might generate an extra NOP instruction to align the 'XXEVAL'
instruction.

I have tested these patches on both big endian and little endian
PowerPC servers, with no regressions.  Can I check these patchs into
the trunk?

2025-07-09  Michael Meissner  

gcc/

PR target/117251
* config/rs6000/fusion.md: Regenerate.
* config/rs6000/genfusion.pl (gen_logical_addsubf): Add support
to generate vector or => nand fusion if XXEVAL is supported.

Diff:
---
 gcc/config/rs6000/fusion.md| 15 +--
 gcc/config/rs6000/genfusion.pl |  1 +
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md
index 01b7fda17ecc..39b586918c17 100644
--- a/gcc/config/rs6000/fusion.md
+++ b/gcc/config/rs6000/fusion.md
@@ -2435,20 +2435,23 @@
 ;; logical-logical fusion pattern generated by gen_logical_addsubf
 ;; vector vor -> vnand
 (define_insn "*fuse_vor_vnand"
-  [(set (match_operand:VM 3 "altivec_register_operand" "=&0,&1,&v,v")
-(ior:VM (not:VM (ior:VM (match_operand:VM 0 "altivec_register_operand" 
"v,v,v,v")
-  (match_operand:VM 1 "altivec_register_operand" 
"v,v,v,v")))
- (not:VM (match_operand:VM 2 "altivec_register_operand" 
"v,v,v,v"
-   (clobber (match_scratch:VM 4 "=X,X,X,&v"))]
+  [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v")
+(ior:VM (not:VM (ior:VM (match_operand:VM 0 "vector_fusion_operand" 
"v,v,v,wa,v")
+  (match_operand:VM 1 "vector_fusion_operand" 
"v,v,v,wa,v")))
+ (not:VM (match_operand:VM 2 "vector_fusion_operand" 
"v,v,v,wa,v"
+   (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))]
   "(TARGET_P10_FUSION)"
   "@
vor %3,%1,%0\;vnand %3,%3,%2
vor %3,%1,%0\;vnand %3,%3,%2
vor %3,%1,%0\;vnand %3,%3,%2
+   xxeval %x3,%x2,%x1,%x0,248
vor %4,%1,%0\;vnand %3,%4,%2"
   [(set_attr "type" "fused_vector")
(set_attr "cost" "6")
-   (set_attr "length" "8")])
+   (set_attr "length" "8")
+   (set_attr "prefixed" "*,*,*,yes,*")
+   (set_attr "isa" "*,*,*,xxeval,*")])
 
 ;; logical-logical fusion pattern generated by gen_logical_addsubf
 ;; vector vorc -> vnand
diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl
index d4965b6df864..86bca81286ca 100755
--- a/gcc/config/rs6000/genfusion.pl
+++ b/gcc/config/rs6000/genfusion.pl
@@ -255,6 +255,7 @@ sub gen_logical_addsubf
   "vorc_vnand"  => 244,
   "veqv_vnand"  => 246,
   "vnor_vnand"  => 247,
+  "vor_vnand"   => 248,
 );
 
 KIND: foreach $kind ('scalar','vector') {

[gcc(refs/users/meissner/heads/work214-sha)] PR target/117251: Improve vector and to vector or fusion

2025-07-08 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:fcd8a91c46d187b3840c7e4be8e60ee490ccd734

commit fcd8a91c46d187b3840c7e4be8e60ee490ccd734
Author: Michael Meissner 
Date:   Wed Jul 9 01:41:36 2025 -0400

PR target/117251: Improve vector and to vector or fusion

See the following post for a complete explanation of what the patches
for PR target/117251:

 * https://gcc.gnu.org/pipermail/gcc-patches/2025-June/686474.html

This is patch #12 of 45 to generate the 'XXEVAL' instruction on power10
and power11 instead of using the Altivec 'VAND' instruction feeding
into 'VOR'.  The 'XXEVAL' instruction can use all 64 vector registers,
instead of the 32 registers that traditional Altivec vector
instructions use.  By allowing all of the vector registers to be used,
it reduces the amount of spilling that a large benchmark generated.

Currently the following code:

vector int a, b, c, d;
a = (c & d) | b;

Generates:

vand   t,c,d
vora,t,b

Now in addition with this patch, if the arguments or result is
allocated to a traditional FPR register, the GCC compiler will now
generate the following code instead of adding vector move instructions:

xxeval a,b,c,31

Since fusion using 2 Altivec instructions is slightly faster than using
the 'XXEVAL' instruction we prefer to generate the Altivec instructions
if we can.  In addition, because 'XXEVAL' is a prefixed instruction, it
possibly might generate an extra NOP instruction to align the 'XXEVAL'
instruction.

I have tested these patches on both big endian and little endian
PowerPC servers, with no regressions.  Can I check these patchs into
the trunk?

2025-07-09  Michael Meissner  

gcc/

PR target/117251
* config/rs6000/fusion.md: Regenerate.
* config/rs6000/genfusion.pl (gen_logical_addsubf): Add support
to generate vector and => or fusion if XXEVAL is supported.

Diff:
---
 gcc/config/rs6000/fusion.md| 15 +--
 gcc/config/rs6000/genfusion.pl |  1 +
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md
index 789a4d592419..fccea39d0aae 100644
--- a/gcc/config/rs6000/fusion.md
+++ b/gcc/config/rs6000/fusion.md
@@ -2621,20 +2621,23 @@
 ;; logical-logical fusion pattern generated by gen_logical_addsubf
 ;; vector vand -> vor
 (define_insn "*fuse_vand_vor"
-  [(set (match_operand:VM 3 "altivec_register_operand" "=&0,&1,&v,v")
-(ior:VM (and:VM (match_operand:VM 0 "altivec_register_operand" 
"v,v,v,v")
-  (match_operand:VM 1 "altivec_register_operand" 
"v,v,v,v"))
- (match_operand:VM 2 "altivec_register_operand" "v,v,v,v")))
-   (clobber (match_scratch:VM 4 "=X,X,X,&v"))]
+  [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v")
+(ior:VM (and:VM (match_operand:VM 0 "vector_fusion_operand" 
"v,v,v,wa,v")
+  (match_operand:VM 1 "vector_fusion_operand" 
"v,v,v,wa,v"))
+ (match_operand:VM 2 "vector_fusion_operand" "v,v,v,wa,v")))
+   (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))]
   "(TARGET_P10_FUSION)"
   "@
vand %3,%1,%0\;vor %3,%3,%2
vand %3,%1,%0\;vor %3,%3,%2
vand %3,%1,%0\;vor %3,%3,%2
+   xxeval %x3,%x2,%x1,%x0,31
vand %4,%1,%0\;vor %3,%4,%2"
   [(set_attr "type" "fused_vector")
(set_attr "cost" "6")
-   (set_attr "length" "8")])
+   (set_attr "length" "8")
+   (set_attr "prefixed" "*,*,*,yes,*")
+   (set_attr "isa" "*,*,*,xxeval,*")])
 
 ;; logical-logical fusion pattern generated by gen_logical_addsubf
 ;; vector vandc -> vor
diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl
index e6d44d430b3a..ab714b10f622 100755
--- a/gcc/config/rs6000/genfusion.pl
+++ b/gcc/config/rs6000/genfusion.pl
@@ -226,6 +226,7 @@ sub gen_logical_addsubf
   "vnand_vand"  =>  14,
   "vnand_vnor"  =>  16,
   "vand_vxor"   =>  30,
+  "vand_vor"=>  31,
 );
 
 KIND: foreach $kind ('scalar','vector') {

[gcc(refs/users/meissner/heads/work214-sha)] PR target/117251: Improve vector or to vector and fusion

2025-07-08 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:88b69379719059f91a37b50e55fd7f5f7560

commit 88b69379719059f91a37b50e55fd7f5f7560
Author: Michael Meissner 
Date:   Wed Jul 9 01:36:26 2025 -0400

PR target/117251: Improve vector or to vector and fusion

See the following post for a complete explanation of what the patches
for PR target/117251:

 * https://gcc.gnu.org/pipermail/gcc-patches/2025-June/686474.html

This is patch #4 of 45 to generate the 'XXEVAL' instruction on power10
and power11 instead of using the Altivec 'VOR' instruction feeding into
'VAND'.  The 'XXEVAL' instruction can use all 64 vector registers,
instead of the 32 registers that traditional Altivec vector
instructions use.  By allowing all of the vector registers to be used,
it reduces the amount of spilling that a large benchmark generated.

Currently the following code:

vector int a, b, c, d;
a = (c | d) & b;

Generates:

vort,c,d
vand   a,t,b

Now in addition with this patch, if the arguments or result is
allocated to a traditional FPR register, the GCC compiler will now
generate the following code instead of adding vector move instructions:

xxeval a,b,c,7

Since fusion using 2 Altivec instructions is slightly faster than using
the 'XXEVAL' instruction we prefer to generate the Altivec instructions
if we can.  In addition, because 'XXEVAL' is a prefixed instruction, it
possibly might generate an extra NOP instruction to align the 'XXEVAL'
instruction.

I have tested these patches on both big endian and little endian
PowerPC servers, with no regressions.  Can I check these patchs into
the trunk?

2025-07-09  Michael Meissner  

gcc/

PR target/117251
* config/rs6000/fusion.md: Regenerate.
* config/rs6000/genfusion.pl (gen_logical_addsubf): Add support to
generate vector/vector or/and fusion if XXEVAL is supported.

Diff:
---
 gcc/config/rs6000/fusion.md| 15 +--
 gcc/config/rs6000/genfusion.pl |  1 +
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md
index 6375cd3a8970..161419b7f586 100644
--- a/gcc/config/rs6000/fusion.md
+++ b/gcc/config/rs6000/fusion.md
@@ -1967,20 +1967,23 @@
 ;; logical-logical fusion pattern generated by gen_logical_addsubf
 ;; vector vor -> vand
 (define_insn "*fuse_vor_vand"
-  [(set (match_operand:VM 3 "altivec_register_operand" "=&0,&1,&v,v")
-(and:VM (ior:VM (match_operand:VM 0 "altivec_register_operand" 
"v,v,v,v")
-  (match_operand:VM 1 "altivec_register_operand" 
"v,v,v,v"))
- (match_operand:VM 2 "altivec_register_operand" "v,v,v,v")))
-   (clobber (match_scratch:VM 4 "=X,X,X,&v"))]
+  [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v")
+(and:VM (ior:VM (match_operand:VM 0 "vector_fusion_operand" 
"v,v,v,wa,v")
+  (match_operand:VM 1 "vector_fusion_operand" 
"v,v,v,wa,v"))
+ (match_operand:VM 2 "vector_fusion_operand" "v,v,v,wa,v")))
+   (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))]
   "(TARGET_P10_FUSION)"
   "@
vor %3,%1,%0\;vand %3,%3,%2
vor %3,%1,%0\;vand %3,%3,%2
vor %3,%1,%0\;vand %3,%3,%2
+   xxeval %x3,%x2,%x1,%x0,7
vor %4,%1,%0\;vand %3,%4,%2"
   [(set_attr "type" "fused_vector")
(set_attr "cost" "6")
-   (set_attr "length" "8")])
+   (set_attr "length" "8")
+   (set_attr "prefixed" "*,*,*,yes,*")
+   (set_attr "isa" "*,*,*,xxeval,*")])
 
 ;; logical-logical fusion pattern generated by gen_logical_addsubf
 ;; vector vorc -> vand
diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl
index 2c631b944587..9d3a01a4704a 100755
--- a/gcc/config/rs6000/genfusion.pl
+++ b/gcc/config/rs6000/genfusion.pl
@@ -218,6 +218,7 @@ sub gen_logical_addsubf
   "vand_vand"   =>   1,
   "vandc_vand"  =>   2,
   "vxor_vand"   =>   6,
+  "vor_vand"=>   7,
 );
 
 KIND: foreach $kind ('scalar','vector') {

[gcc(refs/users/meissner/heads/work214-sha)] PR target/117251: Improve vector nor to vector xor fusion

2025-07-08 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:92cead1c94acdcd1eaeafa2406a83dd79b01dd41

commit 92cead1c94acdcd1eaeafa2406a83dd79b01dd41
Author: Michael Meissner 
Date:   Wed Jul 9 01:47:25 2025 -0400

PR target/117251: Improve vector nor to vector xor fusion

See the following post for a complete explanation of what the patches
for PR target/117251:

 * https://gcc.gnu.org/pipermail/gcc-patches/2025-June/686474.html

This is patch #25 of 45 to generate the 'XXEVAL' instruction on power10
and power11 instead of using the Altivec 'VNOR' instruction feeding
into 'VXOR'.  The 'XXEVAL' instruction can use all 64 vector registers,
instead of the 32 registers that traditional Altivec vector
instructions use.  By allowing all of the vector registers to be used,
it reduces the amount of spilling that a large benchmark generated.

Currently the following code:

vector int a, b, c, d;
a = (~ (c | d)) ^ b;

Generates:

vnor   t,c,d
vxor   a,t,b

Now in addition with this patch, if the arguments or result is
allocated to a traditional FPR register, the GCC compiler will now
generate the following code instead of adding vector move instructions:

xxeval a,b,c,135

Since fusion using 2 Altivec instructions is slightly faster than using
the 'XXEVAL' instruction we prefer to generate the Altivec instructions
if we can.  In addition, because 'XXEVAL' is a prefixed instruction, it
possibly might generate an extra NOP instruction to align the 'XXEVAL'
instruction.

I have tested these patches on both big endian and little endian
PowerPC servers, with no regressions.  Can I check these patchs into
the trunk?

2025-07-09  Michael Meissner  

gcc/

PR target/117251
* config/rs6000/fusion.md: Regenerate.
* config/rs6000/genfusion.pl (gen_logical_addsubf): Add support
to generate vector nor => xor fusion if XXEVAL is supported.

Diff:
---
 gcc/config/rs6000/fusion.md| 15 +--
 gcc/config/rs6000/genfusion.pl |  1 +
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md
index c55e9d4abd67..1f1756dbe63e 100644
--- a/gcc/config/rs6000/fusion.md
+++ b/gcc/config/rs6000/fusion.md
@@ -3017,20 +3017,23 @@
 ;; logical-logical fusion pattern generated by gen_logical_addsubf
 ;; vector vnor -> vxor
 (define_insn "*fuse_vnor_vxor"
-  [(set (match_operand:VM 3 "altivec_register_operand" "=&0,&1,&v,v")
-(xor:VM (and:VM (not:VM (match_operand:VM 0 "altivec_register_operand" 
"v,v,v,v"))
-  (not:VM (match_operand:VM 1 
"altivec_register_operand" "v,v,v,v")))
- (match_operand:VM 2 "altivec_register_operand" "v,v,v,v")))
-   (clobber (match_scratch:VM 4 "=X,X,X,&v"))]
+  [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v")
+(xor:VM (and:VM (not:VM (match_operand:VM 0 "vector_fusion_operand" 
"v,v,v,wa,v"))
+  (not:VM (match_operand:VM 1 "vector_fusion_operand" 
"v,v,v,wa,v")))
+ (match_operand:VM 2 "vector_fusion_operand" "v,v,v,wa,v")))
+   (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))]
   "(TARGET_P10_FUSION)"
   "@
vnor %3,%1,%0\;vxor %3,%3,%2
vnor %3,%1,%0\;vxor %3,%3,%2
vnor %3,%1,%0\;vxor %3,%3,%2
+   xxeval %x3,%x2,%x1,%x0,135
vnor %4,%1,%0\;vxor %3,%4,%2"
   [(set_attr "type" "fused_vector")
(set_attr "cost" "6")
-   (set_attr "length" "8")])
+   (set_attr "length" "8")
+   (set_attr "prefixed" "*,*,*,yes,*")
+   (set_attr "isa" "*,*,*,xxeval,*")])
 
 ;; logical-logical fusion pattern generated by gen_logical_addsubf
 ;; vector vor -> vxor
diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl
index 58f900640bef..0fea2d6d8482 100755
--- a/gcc/config/rs6000/genfusion.pl
+++ b/gcc/config/rs6000/genfusion.pl
@@ -239,6 +239,7 @@ sub gen_logical_addsubf
   "vor_vxor"=> 120,
   "vor_vor" => 127,
   "vor_vnor"=> 128,
+  "vnor_vxor"   => 135,
 );
 
 KIND: foreach $kind ('scalar','vector') {

[gcc(refs/users/meissner/heads/work214-sha)] PR target/117251: Improve vector orc to vector orc fusion

2025-07-08 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:cb933a24f9dc77bb04ef74520ef007429e804e54

commit cb933a24f9dc77bb04ef74520ef007429e804e54
Author: Michael Meissner 
Date:   Wed Jul 9 01:43:44 2025 -0400

PR target/117251: Improve vector orc to vector orc fusion

See the following post for a complete explanation of what the patches
for PR target/117251:

 * https://gcc.gnu.org/pipermail/gcc-patches/2025-June/686474.html

This is patch #17 of 45 to generate the 'XXEVAL' instruction on power10
and power11 instead of using the Altivec 'VORC' instruction feeding
into 'VORC'.  The 'XXEVAL' instruction can use all 64 vector registers,
instead of the 32 registers that traditional Altivec vector
instructions use.  By allowing all of the vector registers to be used,
it reduces the amount of spilling that a large benchmark generated.

Currently the following code:

vector int a, b, c, d;
a = (c | ~ d) | ~ b;

Generates:

vorc   t,c,d
vorc   a,t,b

Now in addition with this patch, if the arguments or result is
allocated to a traditional FPR register, the GCC compiler will now
generate the following code instead of adding vector move instructions:

xxeval a,b,c,79

Since fusion using 2 Altivec instructions is slightly faster than using
the 'XXEVAL' instruction we prefer to generate the Altivec instructions
if we can.  In addition, because 'XXEVAL' is a prefixed instruction, it
possibly might generate an extra NOP instruction to align the 'XXEVAL'
instruction.

I have tested these patches on both big endian and little endian
PowerPC servers, with no regressions.  Can I check these patchs into
the trunk?

2025-07-09  Michael Meissner  

gcc/

PR target/117251
* config/rs6000/fusion.md: Regenerate.
* config/rs6000/genfusion.pl (gen_logical_addsubf): Add support
to generate vector orc => orc fusion if XXEVAL is supported.

Diff:
---
 gcc/config/rs6000/fusion.md| 15 +--
 gcc/config/rs6000/genfusion.pl |  1 +
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md
index f84d0aee5d79..486aa813575d 100644
--- a/gcc/config/rs6000/fusion.md
+++ b/gcc/config/rs6000/fusion.md
@@ -2885,20 +2885,23 @@
 ;; logical-logical fusion pattern generated by gen_logical_addsubf
 ;; vector vorc -> vorc
 (define_insn "*fuse_vorc_vorc"
-  [(set (match_operand:VM 3 "altivec_register_operand" "=&0,&1,&v,v")
-(ior:VM (ior:VM (not:VM (match_operand:VM 0 "altivec_register_operand" 
"v,v,v,v"))
-  (match_operand:VM 1 "altivec_register_operand" 
"v,v,v,v"))
- (not:VM (match_operand:VM 2 "altivec_register_operand" 
"v,v,v,v"
-   (clobber (match_scratch:VM 4 "=X,X,X,&v"))]
+  [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v")
+(ior:VM (ior:VM (not:VM (match_operand:VM 0 "vector_fusion_operand" 
"v,v,v,wa,v"))
+  (match_operand:VM 1 "vector_fusion_operand" 
"v,v,v,wa,v"))
+ (not:VM (match_operand:VM 2 "vector_fusion_operand" 
"v,v,v,wa,v"
+   (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))]
   "(TARGET_P10_FUSION)"
   "@
vorc %3,%1,%0\;vorc %3,%3,%2
vorc %3,%1,%0\;vorc %3,%3,%2
vorc %3,%1,%0\;vorc %3,%3,%2
+   xxeval %x3,%x2,%x1,%x0,79
vorc %4,%1,%0\;vorc %3,%4,%2"
   [(set_attr "type" "fused_vector")
(set_attr "cost" "6")
-   (set_attr "length" "8")])
+   (set_attr "length" "8")
+   (set_attr "prefixed" "*,*,*,yes,*")
+   (set_attr "isa" "*,*,*,xxeval,*")])
 
 ;; logical-logical fusion pattern generated by gen_logical_addsubf
 ;; vector vxor -> vorc
diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl
index 8ba1aa081f75..8f60fe76c87b 100755
--- a/gcc/config/rs6000/genfusion.pl
+++ b/gcc/config/rs6000/genfusion.pl
@@ -231,6 +231,7 @@ sub gen_logical_addsubf
   "vandc_vor"   =>  47,
   "vorc_vnor"   =>  64,
   "vorc_veqv"   =>  75,
+  "vorc_vorc"   =>  79,
 );
 
 KIND: foreach $kind ('scalar','vector') {

[gcc(refs/users/meissner/heads/work214-sha)] PR target/117251: Improve vector orc to vector nor fusion

2025-07-08 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:1a28902b859b26343f4392a20f6a62d8b05fc3bc

commit 1a28902b859b26343f4392a20f6a62d8b05fc3bc
Author: Michael Meissner 
Date:   Wed Jul 9 01:42:48 2025 -0400

PR target/117251: Improve vector orc to vector nor fusion

See the following post for a complete explanation of what the patches
for PR target/117251:

 * https://gcc.gnu.org/pipermail/gcc-patches/2025-June/686474.html

This is patch #15 of 45 to generate the 'XXEVAL' instruction on power10
and power11 instead of using the Altivec 'VORC' instruction feeding
into 'VNOR'.  The 'XXEVAL' instruction can use all 64 vector registers,
instead of the 32 registers that traditional Altivec vector
instructions use.  By allowing all of the vector registers to be used,
it reduces the amount of spilling that a large benchmark generated.

Currently the following code:

vector int a, b, c, d;
a = ~ ((c | ~ d) | b);

Generates:

vorc   t,c,d
vnor   a,t,b

Now in addition with this patch, if the arguments or result is
allocated to a traditional FPR register, the GCC compiler will now
generate the following code instead of adding vector move instructions:

xxeval a,b,c,64

Since fusion using 2 Altivec instructions is slightly faster than using
the 'XXEVAL' instruction we prefer to generate the Altivec instructions
if we can.  In addition, because 'XXEVAL' is a prefixed instruction, it
possibly might generate an extra NOP instruction to align the 'XXEVAL'
instruction.

I have tested these patches on both big endian and little endian
PowerPC servers, with no regressions.  Can I check these patchs into
the trunk?

2025-07-09  Michael Meissner  

gcc/

PR target/117251
* config/rs6000/fusion.md: Regenerate.
* config/rs6000/genfusion.pl (gen_logical_addsubf): Add support
to generate vector orc => nor fusion if XXEVAL is supported.

Diff:
---
 gcc/config/rs6000/fusion.md| 15 +--
 gcc/config/rs6000/genfusion.pl |  1 +
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md
index ed70ac059dfc..f45e65f0217c 100644
--- a/gcc/config/rs6000/fusion.md
+++ b/gcc/config/rs6000/fusion.md
@@ -2585,20 +2585,23 @@
 ;; logical-logical fusion pattern generated by gen_logical_addsubf
 ;; vector vorc -> vnor
 (define_insn "*fuse_vorc_vnor"
-  [(set (match_operand:VM 3 "altivec_register_operand" "=&0,&1,&v,v")
-(and:VM (not:VM (ior:VM (not:VM (match_operand:VM 0 
"altivec_register_operand" "v,v,v,v"))
-  (match_operand:VM 1 "altivec_register_operand" 
"v,v,v,v")))
- (not:VM (match_operand:VM 2 "altivec_register_operand" 
"v,v,v,v"
-   (clobber (match_scratch:VM 4 "=X,X,X,&v"))]
+  [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v")
+(and:VM (not:VM (ior:VM (not:VM (match_operand:VM 0 
"vector_fusion_operand" "v,v,v,wa,v"))
+  (match_operand:VM 1 "vector_fusion_operand" 
"v,v,v,wa,v")))
+ (not:VM (match_operand:VM 2 "vector_fusion_operand" 
"v,v,v,wa,v"
+   (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))]
   "(TARGET_P10_FUSION)"
   "@
vorc %3,%1,%0\;vnor %3,%3,%2
vorc %3,%1,%0\;vnor %3,%3,%2
vorc %3,%1,%0\;vnor %3,%3,%2
+   xxeval %x3,%x2,%x1,%x0,64
vorc %4,%1,%0\;vnor %3,%4,%2"
   [(set_attr "type" "fused_vector")
(set_attr "cost" "6")
-   (set_attr "length" "8")])
+   (set_attr "length" "8")
+   (set_attr "prefixed" "*,*,*,yes,*")
+   (set_attr "isa" "*,*,*,xxeval,*")])
 
 ;; logical-logical fusion pattern generated by gen_logical_addsubf
 ;; vector vxor -> vnor
diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl
index 69fa544f0317..720e8d440c2d 100755
--- a/gcc/config/rs6000/genfusion.pl
+++ b/gcc/config/rs6000/genfusion.pl
@@ -229,6 +229,7 @@ sub gen_logical_addsubf
   "vand_vor"=>  31,
   "vandc_vxor"  =>  45,
   "vandc_vor"   =>  47,
+  "vorc_vnor"   =>  64,
 );
 
 KIND: foreach $kind ('scalar','vector') {

[gcc(refs/users/meissner/heads/work214-sha)] PR target/117251: Improve vector xor to vector xor fusion

2025-07-08 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:0e5b5bceaedce5f1b8b8ddf5339c28e07b5a924d

commit 0e5b5bceaedce5f1b8b8ddf5339c28e07b5a924d
Author: Michael Meissner 
Date:   Wed Jul 9 01:44:47 2025 -0400

PR target/117251: Improve vector xor to vector xor fusion

See the following post for a complete explanation of what the patches
for PR target/117251:

 * https://gcc.gnu.org/pipermail/gcc-patches/2025-June/686474.html

This is patch #19 of 45 to generate the 'XXEVAL' instruction on power10
and power11 instead of using the Altivec 'VXOR' instruction feeding
into 'VXOR'.  The 'XXEVAL' instruction can use all 64 vector registers,
instead of the 32 registers that traditional Altivec vector
instructions use.  By allowing all of the vector registers to be used,
it reduces the amount of spilling that a large benchmark generated.

Currently the following code:

vector int a, b, c, d;
a = (c ^ d) ^ b;

Generates:

vxor   t,c,d
vxor   a,t,b

Now in addition with this patch, if the arguments or result is
allocated to a traditional FPR register, the GCC compiler will now
generate the following code instead of adding vector move instructions:

xxeval a,b,c,105

Since fusion using 2 Altivec instructions is slightly faster than using
the 'XXEVAL' instruction we prefer to generate the Altivec instructions
if we can.  In addition, because 'XXEVAL' is a prefixed instruction, it
possibly might generate an extra NOP instruction to align the 'XXEVAL'
instruction.

I have tested these patches on both big endian and little endian
PowerPC servers, with no regressions.  Can I check these patchs into
the trunk?

2025-07-09  Michael Meissner  

gcc/

PR target/117251
* config/rs6000/fusion.md: Regenerate.
* config/rs6000/genfusion.pl (gen_logical_addsubf): Add support
to generate vector xor => xor fusion if XXEVAL is supported.

Diff:
---
 gcc/config/rs6000/fusion.md| 15 +--
 gcc/config/rs6000/genfusion.pl |  1 +
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md
index e5099178d63d..a848b21bc3e2 100644
--- a/gcc/config/rs6000/fusion.md
+++ b/gcc/config/rs6000/fusion.md
@@ -3059,20 +3059,23 @@
 ;; logical-logical fusion pattern generated by gen_logical_addsubf
 ;; vector vxor -> vxor
 (define_insn "*fuse_vxor_vxor"
-  [(set (match_operand:VM 3 "altivec_register_operand" "=&0,&1,&v,v")
-(xor:VM (xor:VM (match_operand:VM 0 "altivec_register_operand" 
"v,v,v,v")
-  (match_operand:VM 1 "altivec_register_operand" 
"%v,v,v,v"))
- (match_operand:VM 2 "altivec_register_operand" "v,v,v,v")))
-   (clobber (match_scratch:VM 4 "=X,X,X,&v"))]
+  [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v")
+(xor:VM (xor:VM (match_operand:VM 0 "vector_fusion_operand" 
"v,v,v,wa,v")
+  (match_operand:VM 1 "vector_fusion_operand" 
"%v,v,v,wa,v"))
+ (match_operand:VM 2 "vector_fusion_operand" "v,v,v,wa,v")))
+   (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))]
   "(TARGET_P10_FUSION)"
   "@
vxor %3,%1,%0\;vxor %3,%3,%2
vxor %3,%1,%0\;vxor %3,%3,%2
vxor %3,%1,%0\;vxor %3,%3,%2
+   xxeval %x3,%x2,%x1,%x0,105
vxor %4,%1,%0\;vxor %3,%4,%2"
   [(set_attr "type" "fused_vector")
(set_attr "cost" "6")
-   (set_attr "length" "8")])
+   (set_attr "length" "8")
+   (set_attr "prefixed" "*,*,*,yes,*")
+   (set_attr "isa" "*,*,*,xxeval,*")])
 
 ;; add-add fusion pattern generated by gen_addadd
 (define_insn "*fuse_add_add"
diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl
index 79d9eaed7da6..b9ff6c99b95e 100755
--- a/gcc/config/rs6000/genfusion.pl
+++ b/gcc/config/rs6000/genfusion.pl
@@ -233,6 +233,7 @@ sub gen_logical_addsubf
   "vorc_veqv"   =>  75,
   "vorc_vorc"   =>  79,
   "veqv_vnor"   =>  96,
+  "vxor_vxor"   => 105,
 );
 
 KIND: foreach $kind ('scalar','vector') {

[gcc(refs/users/meissner/heads/work214-sha)] PR target/117251: Improve vector andc to vector or fusion

2025-07-08 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:56fd65b1c98e4c7d7f9e59008db913dbd2843403

commit 56fd65b1c98e4c7d7f9e59008db913dbd2843403
Author: Michael Meissner 
Date:   Wed Jul 9 01:42:25 2025 -0400

PR target/117251: Improve vector andc to vector or fusion

See the following post for a complete explanation of what the patches
for PR target/117251:

 * https://gcc.gnu.org/pipermail/gcc-patches/2025-June/686474.html

This is patch #14 of 45 to generate the 'XXEVAL' instruction on power10
and power11 instead of using the Altivec 'VANDC' instruction feeding
into 'VOR'.  The 'XXEVAL' instruction can use all 64 vector registers,
instead of the 32 registers that traditional Altivec vector
instructions use.  By allowing all of the vector registers to be used,
it reduces the amount of spilling that a large benchmark generated.

Currently the following code:

vector int a, b, c, d;
a = (c & ~ d) | b;

Generates:

vandc  t,c,d
vora,t,b

Now in addition with this patch, if the arguments or result is
allocated to a traditional FPR register, the GCC compiler will now
generate the following code instead of adding vector move instructions:

xxeval a,b,c,47

Since fusion using 2 Altivec instructions is slightly faster than using
the 'XXEVAL' instruction we prefer to generate the Altivec instructions
if we can.  In addition, because 'XXEVAL' is a prefixed instruction, it
possibly might generate an extra NOP instruction to align the 'XXEVAL'
instruction.

I have tested these patches on both big endian and little endian
PowerPC servers, with no regressions.  Can I check these patchs into
the trunk?

2025-07-09  Michael Meissner  

gcc/

PR target/117251
* config/rs6000/fusion.md: Regenerate.
* config/rs6000/genfusion.pl (gen_logical_addsubf): Add support
to generate vector andc => or fusion if XXEVAL is supported.

Diff:
---
 gcc/config/rs6000/fusion.md| 15 +--
 gcc/config/rs6000/genfusion.pl |  1 +
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md
index 6e5c88b81b44..ed70ac059dfc 100644
--- a/gcc/config/rs6000/fusion.md
+++ b/gcc/config/rs6000/fusion.md
@@ -2642,20 +2642,23 @@
 ;; logical-logical fusion pattern generated by gen_logical_addsubf
 ;; vector vandc -> vor
 (define_insn "*fuse_vandc_vor"
-  [(set (match_operand:VM 3 "altivec_register_operand" "=&0,&1,&v,v")
-(ior:VM (and:VM (not:VM (match_operand:VM 0 "altivec_register_operand" 
"v,v,v,v"))
-  (match_operand:VM 1 "altivec_register_operand" 
"v,v,v,v"))
- (match_operand:VM 2 "altivec_register_operand" "v,v,v,v")))
-   (clobber (match_scratch:VM 4 "=X,X,X,&v"))]
+  [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v")
+(ior:VM (and:VM (not:VM (match_operand:VM 0 "vector_fusion_operand" 
"v,v,v,wa,v"))
+  (match_operand:VM 1 "vector_fusion_operand" 
"v,v,v,wa,v"))
+ (match_operand:VM 2 "vector_fusion_operand" "v,v,v,wa,v")))
+   (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))]
   "(TARGET_P10_FUSION)"
   "@
vandc %3,%1,%0\;vor %3,%3,%2
vandc %3,%1,%0\;vor %3,%3,%2
vandc %3,%1,%0\;vor %3,%3,%2
+   xxeval %x3,%x2,%x1,%x0,47
vandc %4,%1,%0\;vor %3,%4,%2"
   [(set_attr "type" "fused_vector")
(set_attr "cost" "6")
-   (set_attr "length" "8")])
+   (set_attr "length" "8")
+   (set_attr "prefixed" "*,*,*,yes,*")
+   (set_attr "isa" "*,*,*,xxeval,*")])
 
 ;; logical-logical fusion pattern generated by gen_logical_addsubf
 ;; vector veqv -> vor
diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl
index d15208a4ad3e..69fa544f0317 100755
--- a/gcc/config/rs6000/genfusion.pl
+++ b/gcc/config/rs6000/genfusion.pl
@@ -228,6 +228,7 @@ sub gen_logical_addsubf
   "vand_vxor"   =>  30,
   "vand_vor"=>  31,
   "vandc_vxor"  =>  45,
+  "vandc_vor"   =>  47,
 );
 
 KIND: foreach $kind ('scalar','vector') {

[gcc(refs/users/meissner/heads/work214-sha)] PR target/117251: Improve vector eqv to vector nor fusion

2025-07-08 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:1f0cc53d3d54202c01f4b69eeb181d7212cd2321

commit 1f0cc53d3d54202c01f4b69eeb181d7212cd2321
Author: Michael Meissner 
Date:   Wed Jul 9 01:44:08 2025 -0400

PR target/117251: Improve vector eqv to vector nor fusion

See the following post for a complete explanation of what the patches
for PR target/117251:

 * https://gcc.gnu.org/pipermail/gcc-patches/2025-June/686474.html

This is patch #18 of 45 to generate the 'XXEVAL' instruction on power10
and power11 instead of using the Altivec 'VEQV' instruction feeding
into 'VNOR'.  The 'XXEVAL' instruction can use all 64 vector registers,
instead of the 32 registers that traditional Altivec vector
instructions use.  By allowing all of the vector registers to be used,
it reduces the amount of spilling that a large benchmark generated.

Currently the following code:

vector int a, b, c, d;
a = ~ ((~ (c ^ d)) | b);

Generates:

veqv   t,c,d
vnor   a,t,b

Now in addition with this patch, if the arguments or result is
allocated to a traditional FPR register, the GCC compiler will now
generate the following code instead of adding vector move instructions:

xxeval a,b,c,96

Since fusion using 2 Altivec instructions is slightly faster than using
the 'XXEVAL' instruction we prefer to generate the Altivec instructions
if we can.  In addition, because 'XXEVAL' is a prefixed instruction, it
possibly might generate an extra NOP instruction to align the 'XXEVAL'
instruction.

I have tested these patches on both big endian and little endian
PowerPC servers, with no regressions.  Can I check these patchs into
the trunk?

2025-07-09  Michael Meissner  

gcc/

PR target/117251
* config/rs6000/fusion.md: Regenerate.
* config/rs6000/genfusion.pl (gen_logical_addsubf): Add support
to generate vector eqv => nor fusion if XXEVAL is supported.

Diff:
---
 gcc/config/rs6000/fusion.md| 15 +--
 gcc/config/rs6000/genfusion.pl |  1 +
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md
index 486aa813575d..e5099178d63d 100644
--- a/gcc/config/rs6000/fusion.md
+++ b/gcc/config/rs6000/fusion.md
@@ -2513,20 +2513,23 @@
 ;; logical-logical fusion pattern generated by gen_logical_addsubf
 ;; vector veqv -> vnor
 (define_insn "*fuse_veqv_vnor"
-  [(set (match_operand:VM 3 "altivec_register_operand" "=&0,&1,&v,v")
-(and:VM (not:VM (not:VM (xor:VM (match_operand:VM 0 
"altivec_register_operand" "v,v,v,v")
-  (match_operand:VM 1 "altivec_register_operand" 
"v,v,v,v"
- (not:VM (match_operand:VM 2 "altivec_register_operand" 
"v,v,v,v"
-   (clobber (match_scratch:VM 4 "=X,X,X,&v"))]
+  [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v")
+(and:VM (not:VM (not:VM (xor:VM (match_operand:VM 0 
"vector_fusion_operand" "v,v,v,wa,v")
+  (match_operand:VM 1 "vector_fusion_operand" 
"v,v,v,wa,v"
+ (not:VM (match_operand:VM 2 "vector_fusion_operand" 
"v,v,v,wa,v"
+   (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))]
   "(TARGET_P10_FUSION)"
   "@
veqv %3,%1,%0\;vnor %3,%3,%2
veqv %3,%1,%0\;vnor %3,%3,%2
veqv %3,%1,%0\;vnor %3,%3,%2
+   xxeval %x3,%x2,%x1,%x0,96
veqv %4,%1,%0\;vnor %3,%4,%2"
   [(set_attr "type" "fused_vector")
(set_attr "cost" "6")
-   (set_attr "length" "8")])
+   (set_attr "length" "8")
+   (set_attr "prefixed" "*,*,*,yes,*")
+   (set_attr "isa" "*,*,*,xxeval,*")])
 
 ;; logical-logical fusion pattern generated by gen_logical_addsubf
 ;; vector vnand -> vnor
diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl
index 8f60fe76c87b..79d9eaed7da6 100755
--- a/gcc/config/rs6000/genfusion.pl
+++ b/gcc/config/rs6000/genfusion.pl
@@ -232,6 +232,7 @@ sub gen_logical_addsubf
   "vorc_vnor"   =>  64,
   "vorc_veqv"   =>  75,
   "vorc_vorc"   =>  79,
+  "veqv_vnor"   =>  96,
 );
 
 KIND: foreach $kind ('scalar','vector') {

[gcc(refs/users/meissner/heads/work214-sha)] PR target/117251: Improve vector xor to vector or fusion

2025-07-08 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:8266b1e19b1f7b52482fc92f48c47bb215aff385

commit 8266b1e19b1f7b52482fc92f48c47bb215aff385
Author: Michael Meissner 
Date:   Wed Jul 9 01:45:10 2025 -0400

PR target/117251: Improve vector xor to vector or fusion

See the following post for a complete explanation of what the patches
for PR target/117251:

 * https://gcc.gnu.org/pipermail/gcc-patches/2025-June/686474.html

This is patch #20 of 45 to generate the 'XXEVAL' instruction on power10
and power11 instead of using the Altivec 'VXOR' instruction feeding
into 'VOR'.  The 'XXEVAL' instruction can use all 64 vector registers,
instead of the 32 registers that traditional Altivec vector
instructions use.  By allowing all of the vector registers to be used,
it reduces the amount of spilling that a large benchmark generated.

Currently the following code:

vector int a, b, c, d;
a = (c ^ d) | b;

Generates:

vxor   t,c,d
vora,t,b

Now in addition with this patch, if the arguments or result is
allocated to a traditional FPR register, the GCC compiler will now
generate the following code instead of adding vector move instructions:

xxeval a,b,c,111

Since fusion using 2 Altivec instructions is slightly faster than using
the 'XXEVAL' instruction we prefer to generate the Altivec instructions
if we can.  In addition, because 'XXEVAL' is a prefixed instruction, it
possibly might generate an extra NOP instruction to align the 'XXEVAL'
instruction.

I have tested these patches on both big endian and little endian
PowerPC servers, with no regressions.  Can I check these patchs into
the trunk?

2025-07-09  Michael Meissner  

gcc/

PR target/117251
* config/rs6000/fusion.md: Regenerate.
* config/rs6000/genfusion.pl (gen_logical_addsubf): Add support
to generate vector xor => or fusion if XXEVAL is supported.

Diff:
---
 gcc/config/rs6000/fusion.md| 15 +--
 gcc/config/rs6000/genfusion.pl |  1 +
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md
index a848b21bc3e2..1d4b3c970c7f 100644
--- a/gcc/config/rs6000/fusion.md
+++ b/gcc/config/rs6000/fusion.md
@@ -2762,20 +2762,23 @@
 ;; logical-logical fusion pattern generated by gen_logical_addsubf
 ;; vector vxor -> vor
 (define_insn "*fuse_vxor_vor"
-  [(set (match_operand:VM 3 "altivec_register_operand" "=&0,&1,&v,v")
-(ior:VM (xor:VM (match_operand:VM 0 "altivec_register_operand" 
"v,v,v,v")
-  (match_operand:VM 1 "altivec_register_operand" 
"v,v,v,v"))
- (match_operand:VM 2 "altivec_register_operand" "v,v,v,v")))
-   (clobber (match_scratch:VM 4 "=X,X,X,&v"))]
+  [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v")
+(ior:VM (xor:VM (match_operand:VM 0 "vector_fusion_operand" 
"v,v,v,wa,v")
+  (match_operand:VM 1 "vector_fusion_operand" 
"v,v,v,wa,v"))
+ (match_operand:VM 2 "vector_fusion_operand" "v,v,v,wa,v")))
+   (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))]
   "(TARGET_P10_FUSION)"
   "@
vxor %3,%1,%0\;vor %3,%3,%2
vxor %3,%1,%0\;vor %3,%3,%2
vxor %3,%1,%0\;vor %3,%3,%2
+   xxeval %x3,%x2,%x1,%x0,111
vxor %4,%1,%0\;vor %3,%4,%2"
   [(set_attr "type" "fused_vector")
(set_attr "cost" "6")
-   (set_attr "length" "8")])
+   (set_attr "length" "8")
+   (set_attr "prefixed" "*,*,*,yes,*")
+   (set_attr "isa" "*,*,*,xxeval,*")])
 
 ;; logical-logical fusion pattern generated by gen_logical_addsubf
 ;; vector vand -> vorc
diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl
index b9ff6c99b95e..4ec38beccb9c 100755
--- a/gcc/config/rs6000/genfusion.pl
+++ b/gcc/config/rs6000/genfusion.pl
@@ -234,6 +234,7 @@ sub gen_logical_addsubf
   "vorc_vorc"   =>  79,
   "veqv_vnor"   =>  96,
   "vxor_vxor"   => 105,
+  "vxor_vor"=> 111,
 );
 
 KIND: foreach $kind ('scalar','vector') {

[gcc(refs/users/meissner/heads/work214-sha)] PR target/117251: Improve vector andc to vector andc fusion

2025-07-08 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:f6a79c0761cc446119e6f72df676fd9141e59f5f

commit f6a79c0761cc446119e6f72df676fd9141e59f5f
Author: Michael Meissner 
Date:   Wed Jul 9 01:38:56 2025 -0400

PR target/117251: Improve vector andc to vector andc fusion

See the following post for a complete explanation of what the patches
for PR target/117251:

 * https://gcc.gnu.org/pipermail/gcc-patches/2025-June/686474.html

This is patch #8 of 45 to generate the 'XXEVAL' instruction on power10
and power11 instead of using the Altivec 'VANDC' instruction feeding
into 'VANDC'.  The 'XXEVAL' instruction can use all 64 vector
registers, instead of the 32 registers that traditional Altivec vector
instructions use.  By allowing all of the vector registers to be used,
it reduces the amount of spilling that a large benchmark generated.

Currently the following code:

vector int a, b, c, d;
a = (c & ~ d) & ~ b;

Generates:

vandc  t,c,d
vandc  a,t,b

Now in addition with this patch, if the arguments or result is
allocated to a traditional FPR register, the GCC compiler will now
generate the following code instead of adding vector move instructions:

xxeval a,b,c,13

Since fusion using 2 Altivec instructions is slightly faster than using
the 'XXEVAL' instruction we prefer to generate the Altivec instructions
if we can.  In addition, because 'XXEVAL' is a prefixed instruction, it
possibly might generate an extra NOP instruction to align the 'XXEVAL'
instruction.

I have tested these patches on both big endian and little endian
PowerPC servers, with no regressions.  Can I check these patchs into
the trunk?

2025-07-09  Michael Meissner  

gcc/

PR target/117251
* config/rs6000/fusion.md: Regenerate.
* config/rs6000/genfusion.pl (gen_logical_addsubf): Add support
to generate vector/vector andc/andc fusion if XXEVAL is
supported.

Diff:
---
 gcc/config/rs6000/fusion.md| 15 +--
 gcc/config/rs6000/genfusion.pl |  1 +
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md
index e27f05f85f12..810d97963fb9 100644
--- a/gcc/config/rs6000/fusion.md
+++ b/gcc/config/rs6000/fusion.md
@@ -2054,20 +2054,23 @@
 ;; logical-logical fusion pattern generated by gen_logical_addsubf
 ;; vector vandc -> vandc
 (define_insn "*fuse_vandc_vandc"
-  [(set (match_operand:VM 3 "altivec_register_operand" "=&0,&1,&v,v")
-(and:VM (and:VM (not:VM (match_operand:VM 0 "altivec_register_operand" 
"v,v,v,v"))
-  (match_operand:VM 1 "altivec_register_operand" 
"v,v,v,v"))
- (not:VM (match_operand:VM 2 "altivec_register_operand" 
"v,v,v,v"
-   (clobber (match_scratch:VM 4 "=X,X,X,&v"))]
+  [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v")
+(and:VM (and:VM (not:VM (match_operand:VM 0 "vector_fusion_operand" 
"v,v,v,wa,v"))
+  (match_operand:VM 1 "vector_fusion_operand" 
"v,v,v,wa,v"))
+ (not:VM (match_operand:VM 2 "vector_fusion_operand" 
"v,v,v,wa,v"
+   (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))]
   "(TARGET_P10_FUSION)"
   "@
vandc %3,%1,%0\;vandc %3,%3,%2
vandc %3,%1,%0\;vandc %3,%3,%2
vandc %3,%1,%0\;vandc %3,%3,%2
+   xxeval %x3,%x2,%x1,%x0,13
vandc %4,%1,%0\;vandc %3,%4,%2"
   [(set_attr "type" "fused_vector")
(set_attr "cost" "6")
-   (set_attr "length" "8")])
+   (set_attr "length" "8")
+   (set_attr "prefixed" "*,*,*,yes,*")
+   (set_attr "isa" "*,*,*,xxeval,*")])
 
 ;; logical-logical fusion pattern generated by gen_logical_addsubf
 ;; vector veqv -> vandc
diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl
index a3cc8b121eab..929257d6c03e 100755
--- a/gcc/config/rs6000/genfusion.pl
+++ b/gcc/config/rs6000/genfusion.pl
@@ -222,6 +222,7 @@ sub gen_logical_addsubf
   "vnor_vand"   =>   8,
   "veqv_vand"   =>   9,
   "vorc_vand"   =>  11,
+  "vandc_vandc" =>  13,
 );
 
 KIND: foreach $kind ('scalar','vector') {

[gcc(refs/users/meissner/heads/work214-sha)] PR target/117251: Improve vector xor to vector and fusion

2025-07-08 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:f4e428bb02cc0b9a6d5c94af45a38d29802f50e1

commit f4e428bb02cc0b9a6d5c94af45a38d29802f50e1
Author: Michael Meissner 
Date:   Wed Jul 9 01:35:27 2025 -0400

PR target/117251: Improve vector xor to vector and fusion

See the following post for a complete explanation of what the patches
for PR target/117251:

 * https://gcc.gnu.org/pipermail/gcc-patches/2025-June/686474.html

This is patch #3 of 45 to generate the 'XXEVAL' instruction on power10
and power11 instead of using the Altivec 'VXOR' instruction feeding
into 'VAND'.  The 'XXEVAL' instruction can use all 64 vector registers,
instead of the 32 registers that traditional Altivec vector
instructions use.  By allowing all of the vector registers to be used,
it reduces the amount of spilling that a large benchmark generated.

Currently the following code:

vector int a, b, c, d;
a = (c ^ d) & b;

Generates:

vxor   t,c,d
vand   a,t,b

Now in addition with this patch, if the arguments or result is
allocated to a traditional FPR register, the GCC compiler will now
generate the following code instead of adding vector move instructions:

xxeval a,b,c,6

Since fusion using 2 Altivec instructions is slightly faster than using
the 'XXEVAL' instruction we prefer to generate the Altivec instructions
if we can.  In addition, because 'XXEVAL' is a prefixed instruction, it
possibly might generate an extra NOP instruction to align the 'XXEVAL'
instruction.

I have tested these patches on both big endian and little endian
PowerPC servers, with no regressions.  Can I check these patchs into
the trunk?

2025-07-09  Michael Meissner  

gcc/

PR target/117251
* config/rs6000/fusion.md: Regenerate.
* config/rs6000/genfusion.pl (gen_logical_addsubf): Add support to
generate vector/vector xor/and fusion if XXEVAL is supported.

Diff:
---
 gcc/config/rs6000/fusion.md| 15 +--
 gcc/config/rs6000/genfusion.pl |  1 +
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md
index b9590b6d1104..6375cd3a8970 100644
--- a/gcc/config/rs6000/fusion.md
+++ b/gcc/config/rs6000/fusion.md
@@ -2003,20 +2003,23 @@
 ;; logical-logical fusion pattern generated by gen_logical_addsubf
 ;; vector vxor -> vand
 (define_insn "*fuse_vxor_vand"
-  [(set (match_operand:VM 3 "altivec_register_operand" "=&0,&1,&v,v")
-(and:VM (xor:VM (match_operand:VM 0 "altivec_register_operand" 
"v,v,v,v")
-  (match_operand:VM 1 "altivec_register_operand" 
"v,v,v,v"))
- (match_operand:VM 2 "altivec_register_operand" "v,v,v,v")))
-   (clobber (match_scratch:VM 4 "=X,X,X,&v"))]
+  [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v")
+(and:VM (xor:VM (match_operand:VM 0 "vector_fusion_operand" 
"v,v,v,wa,v")
+  (match_operand:VM 1 "vector_fusion_operand" 
"v,v,v,wa,v"))
+ (match_operand:VM 2 "vector_fusion_operand" "v,v,v,wa,v")))
+   (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))]
   "(TARGET_P10_FUSION)"
   "@
vxor %3,%1,%0\;vand %3,%3,%2
vxor %3,%1,%0\;vand %3,%3,%2
vxor %3,%1,%0\;vand %3,%3,%2
+   xxeval %x3,%x2,%x1,%x0,6
vxor %4,%1,%0\;vand %3,%4,%2"
   [(set_attr "type" "fused_vector")
(set_attr "cost" "6")
-   (set_attr "length" "8")])
+   (set_attr "length" "8")
+   (set_attr "prefixed" "*,*,*,yes,*")
+   (set_attr "isa" "*,*,*,xxeval,*")])
 
 ;; logical-logical fusion pattern generated by gen_logical_addsubf
 ;; vector vand -> vandc
diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl
index 23adf98c4056..2c631b944587 100755
--- a/gcc/config/rs6000/genfusion.pl
+++ b/gcc/config/rs6000/genfusion.pl
@@ -217,6 +217,7 @@ sub gen_logical_addsubf
 my %xxeval_fusions = (
   "vand_vand"   =>   1,
   "vandc_vand"  =>   2,
+  "vxor_vand"   =>   6,
 );
 
 KIND: foreach $kind ('scalar','vector') {

[gcc(refs/users/meissner/heads/work214-sha)] PR target/117251: Improve vector orc to vector eqv fusion

2025-07-08 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:039f33459a4defb5c66ef3f1581dc5cf49ae743e

commit 039f33459a4defb5c66ef3f1581dc5cf49ae743e
Author: Michael Meissner 
Date:   Wed Jul 9 01:43:17 2025 -0400

PR target/117251: Improve vector orc to vector eqv fusion

See the following post for a complete explanation of what the patches
for PR target/117251:

 * https://gcc.gnu.org/pipermail/gcc-patches/2025-June/686474.html

This is patch #16 of 45 to generate the 'XXEVAL' instruction on power10
and power11 instead of using the Altivec 'VORC' instruction feeding
into 'VEQV'.  The 'XXEVAL' instruction can use all 64 vector registers,
instead of the 32 registers that traditional Altivec vector
instructions use.  By allowing all of the vector registers to be used,
it reduces the amount of spilling that a large benchmark generated.

Currently the following code:

vector int a, b, c, d;
a = ~ ((c | ~ d) ^ b);

Generates:

vorc   t,c,d
veqv   a,t,b

Now in addition with this patch, if the arguments or result is
allocated to a traditional FPR register, the GCC compiler will now
generate the following code instead of adding vector move instructions:

xxeval a,b,c,75

Since fusion using 2 Altivec instructions is slightly faster than using
the 'XXEVAL' instruction we prefer to generate the Altivec instructions
if we can.  In addition, because 'XXEVAL' is a prefixed instruction, it
possibly might generate an extra NOP instruction to align the 'XXEVAL'
instruction.

I have tested these patches on both big endian and little endian
PowerPC servers, with no regressions.  Can I check these patchs into
the trunk?

2025-07-09  Michael Meissner  

gcc/

PR target/117251
* config/rs6000/fusion.md: Regenerate.
* config/rs6000/genfusion.pl (gen_logical_addsubf): Add support
to generate vector orc => eqv fusion if XXEVAL is supported.

Diff:
---
 gcc/config/rs6000/fusion.md| 15 +--
 gcc/config/rs6000/genfusion.pl |  1 +
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md
index f45e65f0217c..f84d0aee5d79 100644
--- a/gcc/config/rs6000/fusion.md
+++ b/gcc/config/rs6000/fusion.md
@@ -2294,20 +2294,23 @@
 ;; logical-logical fusion pattern generated by gen_logical_addsubf
 ;; vector vorc -> veqv
 (define_insn "*fuse_vorc_veqv"
-  [(set (match_operand:VM 3 "altivec_register_operand" "=&0,&1,&v,v")
-(not:VM (xor:VM (ior:VM (not:VM (match_operand:VM 0 
"altivec_register_operand" "v,v,v,v"))
-  (match_operand:VM 1 "altivec_register_operand" 
"v,v,v,v"))
- (match_operand:VM 2 "altivec_register_operand" "v,v,v,v"
-   (clobber (match_scratch:VM 4 "=X,X,X,&v"))]
+  [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v")
+(not:VM (xor:VM (ior:VM (not:VM (match_operand:VM 0 
"vector_fusion_operand" "v,v,v,wa,v"))
+  (match_operand:VM 1 "vector_fusion_operand" 
"v,v,v,wa,v"))
+ (match_operand:VM 2 "vector_fusion_operand" "v,v,v,wa,v"
+   (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))]
   "(TARGET_P10_FUSION)"
   "@
vorc %3,%1,%0\;veqv %3,%3,%2
vorc %3,%1,%0\;veqv %3,%3,%2
vorc %3,%1,%0\;veqv %3,%3,%2
+   xxeval %x3,%x2,%x1,%x0,75
vorc %4,%1,%0\;veqv %3,%4,%2"
   [(set_attr "type" "fused_vector")
(set_attr "cost" "6")
-   (set_attr "length" "8")])
+   (set_attr "length" "8")
+   (set_attr "prefixed" "*,*,*,yes,*")
+   (set_attr "isa" "*,*,*,xxeval,*")])
 
 ;; logical-logical fusion pattern generated by gen_logical_addsubf
 ;; vector vxor -> veqv
diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl
index 720e8d440c2d..8ba1aa081f75 100755
--- a/gcc/config/rs6000/genfusion.pl
+++ b/gcc/config/rs6000/genfusion.pl
@@ -230,6 +230,7 @@ sub gen_logical_addsubf
   "vandc_vxor"  =>  45,
   "vandc_vor"   =>  47,
   "vorc_vnor"   =>  64,
+  "vorc_veqv"   =>  75,
 );
 
 KIND: foreach $kind ('scalar','vector') {

[gcc(refs/users/meissner/heads/work214-sha)] PR target/117251: Improve vector eqv to vector xor fusion

2025-07-08 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:a4a23171994a253cfb9eeafda7a7b710a62317b4

commit a4a23171994a253cfb9eeafda7a7b710a62317b4
Author: Michael Meissner 
Date:   Wed Jul 9 01:49:05 2025 -0400

PR target/117251: Improve vector eqv to vector xor fusion

See the following post for a complete explanation of what the patches
for PR target/117251:

 * https://gcc.gnu.org/pipermail/gcc-patches/2025-June/686474.html

This is patch #28 of 45 to generate the 'XXEVAL' instruction on power10
and power11 instead of using the Altivec 'VEQV' instruction feeding
into 'VXOR'.  The 'XXEVAL' instruction can use all 64 vector registers,
instead of the 32 registers that traditional Altivec vector
instructions use.  By allowing all of the vector registers to be used,
it reduces the amount of spilling that a large benchmark generated.

Currently the following code:

vector int a, b, c, d;
a = (~ (c ^ d)) ^ b;

Generates:

veqv   t,c,d
vxor   a,t,b

Now in addition with this patch, if the arguments or result is
allocated to a traditional FPR register, the GCC compiler will now
generate the following code instead of adding vector move instructions:

xxeval a,b,c,150

Since fusion using 2 Altivec instructions is slightly faster than using
the 'XXEVAL' instruction we prefer to generate the Altivec instructions
if we can.  In addition, because 'XXEVAL' is a prefixed instruction, it
possibly might generate an extra NOP instruction to align the 'XXEVAL'
instruction.

I have tested these patches on both big endian and little endian
PowerPC servers, with no regressions.  Can I check these patchs into
the trunk?

2025-07-09  Michael Meissner  

gcc/

PR target/117251
* config/rs6000/fusion.md: Regenerate.
* config/rs6000/genfusion.pl (gen_logical_addsubf): Add support
to generate vector eqv => xor fusion if XXEVAL is supported.

Diff:
---
 gcc/config/rs6000/fusion.md| 15 +--
 gcc/config/rs6000/genfusion.pl |  1 +
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md
index e5ea37c567d6..bb62ae26445a 100644
--- a/gcc/config/rs6000/fusion.md
+++ b/gcc/config/rs6000/fusion.md
@@ -2987,20 +2987,23 @@
 ;; logical-logical fusion pattern generated by gen_logical_addsubf
 ;; vector veqv -> vxor
 (define_insn "*fuse_veqv_vxor"
-  [(set (match_operand:VM 3 "altivec_register_operand" "=&0,&1,&v,v")
-(xor:VM (not:VM (xor:VM (match_operand:VM 0 "altivec_register_operand" 
"v,v,v,v")
-  (match_operand:VM 1 "altivec_register_operand" 
"v,v,v,v")))
- (match_operand:VM 2 "altivec_register_operand" "v,v,v,v")))
-   (clobber (match_scratch:VM 4 "=X,X,X,&v"))]
+  [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v")
+(xor:VM (not:VM (xor:VM (match_operand:VM 0 "vector_fusion_operand" 
"v,v,v,wa,v")
+  (match_operand:VM 1 "vector_fusion_operand" 
"v,v,v,wa,v")))
+ (match_operand:VM 2 "vector_fusion_operand" "v,v,v,wa,v")))
+   (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))]
   "(TARGET_P10_FUSION)"
   "@
veqv %3,%1,%0\;vxor %3,%3,%2
veqv %3,%1,%0\;vxor %3,%3,%2
veqv %3,%1,%0\;vxor %3,%3,%2
+   xxeval %x3,%x2,%x1,%x0,150
veqv %4,%1,%0\;vxor %3,%4,%2"
   [(set_attr "type" "fused_vector")
(set_attr "cost" "6")
-   (set_attr "length" "8")])
+   (set_attr "length" "8")
+   (set_attr "prefixed" "*,*,*,yes,*")
+   (set_attr "isa" "*,*,*,xxeval,*")])
 
 ;; logical-logical fusion pattern generated by gen_logical_addsubf
 ;; vector vnand -> vxor
diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl
index d713d10a1dbc..726e29c798bc 100755
--- a/gcc/config/rs6000/genfusion.pl
+++ b/gcc/config/rs6000/genfusion.pl
@@ -242,6 +242,7 @@ sub gen_logical_addsubf
   "vnor_vxor"   => 135,
   "vnor_vor"=> 143,
   "vxor_vnor"   => 144,
+  "veqv_vxor"   => 150,
 );
 
 KIND: foreach $kind ('scalar','vector') {

[gcc(refs/users/meissner/heads/work214-sha)] PR target/117251: Improve vector nand to vector nor fusion

2025-07-08 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:6474b6dbad0c66976adf904bfa942e9d76764947

commit 6474b6dbad0c66976adf904bfa942e9d76764947
Author: Michael Meissner 
Date:   Wed Jul 9 01:40:03 2025 -0400

PR target/117251: Improve vector nand to vector nor fusion

See the following post for a complete explanation of what the patches
for PR target/117251:

 * https://gcc.gnu.org/pipermail/gcc-patches/2025-June/686474.html

This is patch #10 of 45 to generate the 'XXEVAL' instruction on power10
and power11 instead of using the Altivec 'VNAND' instruction feeding
into 'VNOR'.  The 'XXEVAL' instruction can use all 64 vector registers,
instead of the 32 registers that traditional Altivec vector
instructions use.  By allowing all of the vector registers to be used,
it reduces the amount of spilling that a large benchmark generated.

Currently the following code:

vector int a, b, c, d;
a = ~ ((~ (c & d)) | b);

Generates:

vnand  t,c,d
vnor   a,t,b

Now in addition with this patch, if the arguments or result is
allocated to a traditional FPR register, the GCC compiler will now
generate the following code instead of adding vector move instructions:

xxeval a,b,c,16

Since fusion using 2 Altivec instructions is slightly faster than using
the 'XXEVAL' instruction we prefer to generate the Altivec instructions
if we can.  In addition, because 'XXEVAL' is a prefixed instruction, it
possibly might generate an extra NOP instruction to align the 'XXEVAL'
instruction.

I have tested these patches on both big endian and little endian
PowerPC servers, with no regressions.  Can I check these patchs into
the trunk?

2025-07-09  Michael Meissner  

gcc/

PR target/117251
* config/rs6000/fusion.md: Regenerate.
* config/rs6000/genfusion.pl (gen_logical_addsubf): Add support
to generate vector/vector nand/nor fusion if XXEVAL is
supported.

Diff:
---
 gcc/config/rs6000/fusion.md| 15 +--
 gcc/config/rs6000/genfusion.pl |  1 +
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md
index f70422616ffd..c8a27a9e5471 100644
--- a/gcc/config/rs6000/fusion.md
+++ b/gcc/config/rs6000/fusion.md
@@ -2528,20 +2528,23 @@
 ;; logical-logical fusion pattern generated by gen_logical_addsubf
 ;; vector vnand -> vnor
 (define_insn "*fuse_vnand_vnor"
-  [(set (match_operand:VM 3 "altivec_register_operand" "=&0,&1,&v,v")
-(and:VM (not:VM (ior:VM (not:VM (match_operand:VM 0 
"altivec_register_operand" "v,v,v,v"))
-  (not:VM (match_operand:VM 1 
"altivec_register_operand" "v,v,v,v"
- (not:VM (match_operand:VM 2 "altivec_register_operand" 
"v,v,v,v"
-   (clobber (match_scratch:VM 4 "=X,X,X,&v"))]
+  [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v")
+(and:VM (not:VM (ior:VM (not:VM (match_operand:VM 0 
"vector_fusion_operand" "v,v,v,wa,v"))
+  (not:VM (match_operand:VM 1 "vector_fusion_operand" 
"v,v,v,wa,v"
+ (not:VM (match_operand:VM 2 "vector_fusion_operand" 
"v,v,v,wa,v"
+   (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))]
   "(TARGET_P10_FUSION)"
   "@
vnand %3,%1,%0\;vnor %3,%3,%2
vnand %3,%1,%0\;vnor %3,%3,%2
vnand %3,%1,%0\;vnor %3,%3,%2
+   xxeval %x3,%x2,%x1,%x0,16
vnand %4,%1,%0\;vnor %3,%4,%2"
   [(set_attr "type" "fused_vector")
(set_attr "cost" "6")
-   (set_attr "length" "8")])
+   (set_attr "length" "8")
+   (set_attr "prefixed" "*,*,*,yes,*")
+   (set_attr "isa" "*,*,*,xxeval,*")])
 
 ;; logical-logical fusion pattern generated by gen_logical_addsubf
 ;; vector vnor -> vnor
diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl
index 5beabe530a67..078bc6ca0dab 100755
--- a/gcc/config/rs6000/genfusion.pl
+++ b/gcc/config/rs6000/genfusion.pl
@@ -224,6 +224,7 @@ sub gen_logical_addsubf
   "vorc_vand"   =>  11,
   "vandc_vandc" =>  13,
   "vnand_vand"  =>  14,
+  "vnand_vnor"  =>  16,
 );
 
 KIND: foreach $kind ('scalar','vector') {

[gcc(refs/users/meissner/heads/work214-sha)] PR target/117251: Improve vector nand to vector and fusion

2025-07-08 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:64db95d5a59e6f88a00314e8f8cbd796535313c4

commit 64db95d5a59e6f88a00314e8f8cbd796535313c4
Author: Michael Meissner 
Date:   Wed Jul 9 01:39:19 2025 -0400

PR target/117251: Improve vector nand to vector and fusion

See the following post for a complete explanation of what the patches
for PR target/117251:

 * https://gcc.gnu.org/pipermail/gcc-patches/2025-June/686474.html

This is patch #9 of 45 to generate the 'XXEVAL' instruction on power10
and power11 instead of using the Altivec 'VNAND' instruction feeding
into 'VAND'.  The 'XXEVAL' instruction can use all 64 vector registers,
instead of the 32 registers that traditional Altivec vector
instructions use.  By allowing all of the vector registers to be used,
it reduces the amount of spilling that a large benchmark generated.

Currently the following code:

vector int a, b, c, d;
a = (~ (c & d)) & b;

Generates:

vnand  t,c,d
vand   a,t,b

Now in addition with this patch, if the arguments or result is
allocated to a traditional FPR register, the GCC compiler will now
generate the following code instead of adding vector move instructions:

xxeval a,b,c,14

Since fusion using 2 Altivec instructions is slightly faster than using
the 'XXEVAL' instruction we prefer to generate the Altivec instructions
if we can.  In addition, because 'XXEVAL' is a prefixed instruction, it
possibly might generate an extra NOP instruction to align the 'XXEVAL'
instruction.

I have tested these patches on both big endian and little endian
PowerPC servers, with no regressions.  Can I check these patchs into
the trunk?

2025-07-09  Michael Meissner  

gcc/

PR target/117251
* config/rs6000/fusion.md: Regenerate.
* config/rs6000/genfusion.pl (gen_logical_addsubf): Add support
to generate vector/vector nand/and fusion if XXEVAL is
supported.

Diff:
---
 gcc/config/rs6000/fusion.md| 15 +--
 gcc/config/rs6000/genfusion.pl |  1 +
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md
index 810d97963fb9..f70422616ffd 100644
--- a/gcc/config/rs6000/fusion.md
+++ b/gcc/config/rs6000/fusion.md
@@ -1934,20 +1934,23 @@
 ;; logical-logical fusion pattern generated by gen_logical_addsubf
 ;; vector vnand -> vand
 (define_insn "*fuse_vnand_vand"
-  [(set (match_operand:VM 3 "altivec_register_operand" "=&0,&1,&v,v")
-(and:VM (ior:VM (not:VM (match_operand:VM 0 "altivec_register_operand" 
"v,v,v,v"))
-  (not:VM (match_operand:VM 1 
"altivec_register_operand" "v,v,v,v")))
- (match_operand:VM 2 "altivec_register_operand" "v,v,v,v")))
-   (clobber (match_scratch:VM 4 "=X,X,X,&v"))]
+  [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v")
+(and:VM (ior:VM (not:VM (match_operand:VM 0 "vector_fusion_operand" 
"v,v,v,wa,v"))
+  (not:VM (match_operand:VM 1 "vector_fusion_operand" 
"v,v,v,wa,v")))
+ (match_operand:VM 2 "vector_fusion_operand" "v,v,v,wa,v")))
+   (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))]
   "(TARGET_P10_FUSION)"
   "@
vnand %3,%1,%0\;vand %3,%3,%2
vnand %3,%1,%0\;vand %3,%3,%2
vnand %3,%1,%0\;vand %3,%3,%2
+   xxeval %x3,%x2,%x1,%x0,14
vnand %4,%1,%0\;vand %3,%4,%2"
   [(set_attr "type" "fused_vector")
(set_attr "cost" "6")
-   (set_attr "length" "8")])
+   (set_attr "length" "8")
+   (set_attr "prefixed" "*,*,*,yes,*")
+   (set_attr "isa" "*,*,*,xxeval,*")])
 
 ;; logical-logical fusion pattern generated by gen_logical_addsubf
 ;; vector vnor -> vand
diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl
index 929257d6c03e..5beabe530a67 100755
--- a/gcc/config/rs6000/genfusion.pl
+++ b/gcc/config/rs6000/genfusion.pl
@@ -223,6 +223,7 @@ sub gen_logical_addsubf
   "veqv_vand"   =>   9,
   "vorc_vand"   =>  11,
   "vandc_vandc" =>  13,
+  "vnand_vand"  =>  14,
 );
 
 KIND: foreach $kind ('scalar','vector') {

[gcc(refs/users/meissner/heads/work214-sha)] PR target/117251: Improve vector nor to vector and fusion

2025-07-08 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:27773b4db8507d5d0f5c203b6ad54bdd1268fb45

commit 27773b4db8507d5d0f5c203b6ad54bdd1268fb45
Author: Michael Meissner 
Date:   Wed Jul 9 01:37:05 2025 -0400

PR target/117251: Improve vector nor to vector and fusion

See the following post for a complete explanation of what the patches
for PR target/117251:

 * https://gcc.gnu.org/pipermail/gcc-patches/2025-June/686474.html

This is patch #5 of 45 to generate the 'XXEVAL' instruction on power10
and power11 instead of using the Altivec 'VNOR' instruction feeding
into 'VAND'.  The 'XXEVAL' instruction can use all 64 vector registers,
instead of the 32 registers that traditional Altivec vector
instructions use.  By allowing all of the vector registers to be used,
it reduces the amount of spilling that a large benchmark generated.

Currently the following code:

vector int a, b, c, d;
a = (~ (c | d)) & b;

Generates:

vnor   t,c,d
vand   a,t,b

Now in addition with this patch, if the arguments or result is
allocated to a traditional FPR register, the GCC compiler will now
generate the following code instead of adding vector move instructions:

xxeval a,b,c,8

Since fusion using 2 Altivec instructions is slightly faster than using
the 'XXEVAL' instruction we prefer to generate the Altivec instructions
if we can.  In addition, because 'XXEVAL' is a prefixed instruction, it
possibly might generate an extra NOP instruction to align the 'XXEVAL'
instruction.

I have tested these patches on both big endian and little endian
PowerPC servers, with no regressions.  Can I check these patchs into
the trunk?

2025-07-09  Michael Meissner  

gcc/

PR target/117251
* config/rs6000/fusion.md: Regenerate.
* config/rs6000/genfusion.pl (gen_logical_addsubf): Add support
to generate vector/vector nor/and fusion if XXEVAL is
supported.

Diff:
---
 gcc/config/rs6000/fusion.md| 15 +--
 gcc/config/rs6000/genfusion.pl |  1 +
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md
index 161419b7f586..ed15fccdf760 100644
--- a/gcc/config/rs6000/fusion.md
+++ b/gcc/config/rs6000/fusion.md
@@ -1949,20 +1949,23 @@
 ;; logical-logical fusion pattern generated by gen_logical_addsubf
 ;; vector vnor -> vand
 (define_insn "*fuse_vnor_vand"
-  [(set (match_operand:VM 3 "altivec_register_operand" "=&0,&1,&v,v")
-(and:VM (and:VM (not:VM (match_operand:VM 0 "altivec_register_operand" 
"v,v,v,v"))
-  (not:VM (match_operand:VM 1 
"altivec_register_operand" "v,v,v,v")))
- (match_operand:VM 2 "altivec_register_operand" "v,v,v,v")))
-   (clobber (match_scratch:VM 4 "=X,X,X,&v"))]
+  [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v")
+(and:VM (and:VM (not:VM (match_operand:VM 0 "vector_fusion_operand" 
"v,v,v,wa,v"))
+  (not:VM (match_operand:VM 1 "vector_fusion_operand" 
"v,v,v,wa,v")))
+ (match_operand:VM 2 "vector_fusion_operand" "v,v,v,wa,v")))
+   (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))]
   "(TARGET_P10_FUSION)"
   "@
vnor %3,%1,%0\;vand %3,%3,%2
vnor %3,%1,%0\;vand %3,%3,%2
vnor %3,%1,%0\;vand %3,%3,%2
+   xxeval %x3,%x2,%x1,%x0,8
vnor %4,%1,%0\;vand %3,%4,%2"
   [(set_attr "type" "fused_vector")
(set_attr "cost" "6")
-   (set_attr "length" "8")])
+   (set_attr "length" "8")
+   (set_attr "prefixed" "*,*,*,yes,*")
+   (set_attr "isa" "*,*,*,xxeval,*")])
 
 ;; logical-logical fusion pattern generated by gen_logical_addsubf
 ;; vector vor -> vand
diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl
index 9d3a01a4704a..40d62ae8e9c1 100755
--- a/gcc/config/rs6000/genfusion.pl
+++ b/gcc/config/rs6000/genfusion.pl
@@ -219,6 +219,7 @@ sub gen_logical_addsubf
   "vandc_vand"  =>   2,
   "vxor_vand"   =>   6,
   "vor_vand"=>   7,
+  "vnor_vand"   =>   8,
 );
 
 KIND: foreach $kind ('scalar','vector') {

[gcc(refs/users/meissner/heads/work214-sha)] PR target/117251: Improve vector eqv to vector and fusion

2025-07-08 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:386b0165fafb7913a86e8f3aefdaebf2f7fb4744

commit 386b0165fafb7913a86e8f3aefdaebf2f7fb4744
Author: Michael Meissner 
Date:   Wed Jul 9 01:38:02 2025 -0400

PR target/117251: Improve vector eqv to vector and fusion

See the following post for a complete explanation of what the patches
for PR target/117251:

 * https://gcc.gnu.org/pipermail/gcc-patches/2025-June/686474.html

This is patch #6 of 45 to generate the 'XXEVAL' instruction on power10
and power11 instead of using the Altivec 'VEQV' instruction feeding
into 'VAND'.  The 'XXEVAL' instruction can use all 64 vector registers,
instead of the 32 registers that traditional Altivec vector
instructions use.  By allowing all of the vector registers to be used,
it reduces the amount of spilling that a large benchmark generated.

Currently the following code:

vector int a, b, c, d;
a = (~ (c ^ d)) & b;

Generates:

veqv   t,c,d
vand   a,t,b

Now in addition with this patch, if the arguments or result is
allocated to a traditional FPR register, the GCC compiler will now
generate the following code instead of adding vector move instructions:

xxeval a,b,c,9

Since fusion using 2 Altivec instructions is slightly faster than using
the 'XXEVAL' instruction we prefer to generate the Altivec instructions
if we can.  In addition, because 'XXEVAL' is a prefixed instruction, it
possibly might generate an extra NOP instruction to align the 'XXEVAL'
instruction.

I have tested these patches on both big endian and little endian
PowerPC servers, with no regressions.  Can I check these patchs into
the trunk?

2025-07-09  Michael Meissner  

gcc/

PR target/117251
* config/rs6000/fusion.md: Regenerate.
* config/rs6000/genfusion.pl (gen_logical_addsubf): Add support
to generate vector/vector nor/and fusion if XXEVAL is
supported.

Diff:
---
 gcc/config/rs6000/fusion.md| 15 +--
 gcc/config/rs6000/genfusion.pl |  1 +
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md
index ed15fccdf760..cce179e0c974 100644
--- a/gcc/config/rs6000/fusion.md
+++ b/gcc/config/rs6000/fusion.md
@@ -1913,20 +1913,23 @@
 ;; logical-logical fusion pattern generated by gen_logical_addsubf
 ;; vector veqv -> vand
 (define_insn "*fuse_veqv_vand"
-  [(set (match_operand:VM 3 "altivec_register_operand" "=&0,&1,&v,v")
-(and:VM (not:VM (xor:VM (match_operand:VM 0 "altivec_register_operand" 
"v,v,v,v")
-  (match_operand:VM 1 "altivec_register_operand" 
"v,v,v,v")))
- (match_operand:VM 2 "altivec_register_operand" "v,v,v,v")))
-   (clobber (match_scratch:VM 4 "=X,X,X,&v"))]
+  [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v")
+(and:VM (not:VM (xor:VM (match_operand:VM 0 "vector_fusion_operand" 
"v,v,v,wa,v")
+  (match_operand:VM 1 "vector_fusion_operand" 
"v,v,v,wa,v")))
+ (match_operand:VM 2 "vector_fusion_operand" "v,v,v,wa,v")))
+   (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))]
   "(TARGET_P10_FUSION)"
   "@
veqv %3,%1,%0\;vand %3,%3,%2
veqv %3,%1,%0\;vand %3,%3,%2
veqv %3,%1,%0\;vand %3,%3,%2
+   xxeval %x3,%x2,%x1,%x0,9
veqv %4,%1,%0\;vand %3,%4,%2"
   [(set_attr "type" "fused_vector")
(set_attr "cost" "6")
-   (set_attr "length" "8")])
+   (set_attr "length" "8")
+   (set_attr "prefixed" "*,*,*,yes,*")
+   (set_attr "isa" "*,*,*,xxeval,*")])
 
 ;; logical-logical fusion pattern generated by gen_logical_addsubf
 ;; vector vnand -> vand
diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl
index 40d62ae8e9c1..268b94089484 100755
--- a/gcc/config/rs6000/genfusion.pl
+++ b/gcc/config/rs6000/genfusion.pl
@@ -220,6 +220,7 @@ sub gen_logical_addsubf
   "vxor_vand"   =>   6,
   "vor_vand"=>   7,
   "vnor_vand"   =>   8,
+  "veqv_vand"   =>   9,
 );
 
 KIND: foreach $kind ('scalar','vector') {

[gcc(refs/users/meissner/heads/work214-sha)] PR target/117251: Improve vector orc to vector and fusion

2025-07-08 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:a444dc4bd0ba208f1ac06ac67bef918f5ffbf1f6

commit a444dc4bd0ba208f1ac06ac67bef918f5ffbf1f6
Author: Michael Meissner 
Date:   Wed Jul 9 01:38:23 2025 -0400

PR target/117251: Improve vector orc to vector and fusion

See the following post for a complete explanation of what the patches
for PR target/117251:

 * https://gcc.gnu.org/pipermail/gcc-patches/2025-June/686474.html

This is patch #7 of 45 to generate the 'XXEVAL' instruction on power10
and power11 instead of using the Altivec 'VORC' instruction feeding
into 'VAND'.  The 'XXEVAL' instruction can use all 64 vector registers,
instead of the 32 registers that traditional Altivec vector
instructions use.  By allowing all of the vector registers to be used,
it reduces the amount of spilling that a large benchmark generated.

Currently the following code:

vector int a, b, c, d;
a = (c | ~ d) & b;

Generates:

vorc   t,c,d
vand   a,t,b

Now in addition with this patch, if the arguments or result is
allocated to a traditional FPR register, the GCC compiler will now
generate the following code instead of adding vector move instructions:

xxeval a,b,c,11

Since fusion using 2 Altivec instructions is slightly faster than using
the 'XXEVAL' instruction we prefer to generate the Altivec instructions
if we can.  In addition, because 'XXEVAL' is a prefixed instruction, it
possibly might generate an extra NOP instruction to align the 'XXEVAL'
instruction.

I have tested these patches on both big endian and little endian
PowerPC servers, with no regressions.  Can I check these patchs into
the trunk?

2025-07-09  Michael Meissner  

gcc/

PR target/117251
* config/rs6000/fusion.md: Regenerate.
* config/rs6000/genfusion.pl (gen_logical_addsubf): Add support
to generate vector/vector orc/and fusion if XXEVAL is
supported.

Diff:
---
 gcc/config/rs6000/fusion.md| 15 +--
 gcc/config/rs6000/genfusion.pl |  1 +
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md
index cce179e0c974..e27f05f85f12 100644
--- a/gcc/config/rs6000/fusion.md
+++ b/gcc/config/rs6000/fusion.md
@@ -1994,20 +1994,23 @@
 ;; logical-logical fusion pattern generated by gen_logical_addsubf
 ;; vector vorc -> vand
 (define_insn "*fuse_vorc_vand"
-  [(set (match_operand:VM 3 "altivec_register_operand" "=&0,&1,&v,v")
-(and:VM (ior:VM (not:VM (match_operand:VM 0 "altivec_register_operand" 
"v,v,v,v"))
-  (match_operand:VM 1 "altivec_register_operand" 
"v,v,v,v"))
- (match_operand:VM 2 "altivec_register_operand" "v,v,v,v")))
-   (clobber (match_scratch:VM 4 "=X,X,X,&v"))]
+  [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v")
+(and:VM (ior:VM (not:VM (match_operand:VM 0 "vector_fusion_operand" 
"v,v,v,wa,v"))
+  (match_operand:VM 1 "vector_fusion_operand" 
"v,v,v,wa,v"))
+ (match_operand:VM 2 "vector_fusion_operand" "v,v,v,wa,v")))
+   (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))]
   "(TARGET_P10_FUSION)"
   "@
vorc %3,%1,%0\;vand %3,%3,%2
vorc %3,%1,%0\;vand %3,%3,%2
vorc %3,%1,%0\;vand %3,%3,%2
+   xxeval %x3,%x2,%x1,%x0,11
vorc %4,%1,%0\;vand %3,%4,%2"
   [(set_attr "type" "fused_vector")
(set_attr "cost" "6")
-   (set_attr "length" "8")])
+   (set_attr "length" "8")
+   (set_attr "prefixed" "*,*,*,yes,*")
+   (set_attr "isa" "*,*,*,xxeval,*")])
 
 ;; logical-logical fusion pattern generated by gen_logical_addsubf
 ;; vector vxor -> vand
diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl
index 268b94089484..a3cc8b121eab 100755
--- a/gcc/config/rs6000/genfusion.pl
+++ b/gcc/config/rs6000/genfusion.pl
@@ -221,6 +221,7 @@ sub gen_logical_addsubf
   "vor_vand"=>   7,
   "vnor_vand"   =>   8,
   "veqv_vand"   =>   9,
+  "vorc_vand"   =>  11,
 );
 
 KIND: foreach $kind ('scalar','vector') {

[gcc(refs/users/meissner/heads/work214-sha)] PR target/117251: Improve vector xor to vector nor fusion

2025-07-08 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:4c45f874584b9b7226be1241cf7e9202a593f883

commit 4c45f874584b9b7226be1241cf7e9202a593f883
Author: Michael Meissner 
Date:   Wed Jul 9 01:48:38 2025 -0400

PR target/117251: Improve vector xor to vector nor fusion

See the following post for a complete explanation of what the patches
for PR target/117251:

 * https://gcc.gnu.org/pipermail/gcc-patches/2025-June/686474.html

This is patch #27 of 45 to generate the 'XXEVAL' instruction on power10
and power11 instead of using the Altivec 'VXOR' instruction feeding
into 'VNOR'.  The 'XXEVAL' instruction can use all 64 vector registers,
instead of the 32 registers that traditional Altivec vector
instructions use.  By allowing all of the vector registers to be used,
it reduces the amount of spilling that a large benchmark generated.

Currently the following code:

vector int a, b, c, d;
a = ~ ((c ^ d) | b);

Generates:

vxor   t,c,d
vnor   a,t,b

Now in addition with this patch, if the arguments or result is
allocated to a traditional FPR register, the GCC compiler will now
generate the following code instead of adding vector move instructions:

xxeval a,b,c,144

Since fusion using 2 Altivec instructions is slightly faster than using
the 'XXEVAL' instruction we prefer to generate the Altivec instructions
if we can.  In addition, because 'XXEVAL' is a prefixed instruction, it
possibly might generate an extra NOP instruction to align the 'XXEVAL'
instruction.

I have tested these patches on both big endian and little endian
PowerPC servers, with no regressions.  Can I check these patchs into
the trunk?

2025-07-09  Michael Meissner  

gcc/

PR target/117251
* config/rs6000/fusion.md: Regenerate.
* config/rs6000/genfusion.pl (gen_logical_addsubf): Add support
to generate vector xor => nor fusion if XXEVAL is supported.

Diff:
---
 gcc/config/rs6000/fusion.md| 15 +--
 gcc/config/rs6000/genfusion.pl |  1 +
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md
index 66d98f4537e1..e5ea37c567d6 100644
--- a/gcc/config/rs6000/fusion.md
+++ b/gcc/config/rs6000/fusion.md
@@ -2618,20 +2618,23 @@
 ;; logical-logical fusion pattern generated by gen_logical_addsubf
 ;; vector vxor -> vnor
 (define_insn "*fuse_vxor_vnor"
-  [(set (match_operand:VM 3 "altivec_register_operand" "=&0,&1,&v,v")
-(and:VM (not:VM (xor:VM (match_operand:VM 0 "altivec_register_operand" 
"v,v,v,v")
-  (match_operand:VM 1 "altivec_register_operand" 
"v,v,v,v")))
- (not:VM (match_operand:VM 2 "altivec_register_operand" 
"v,v,v,v"
-   (clobber (match_scratch:VM 4 "=X,X,X,&v"))]
+  [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v")
+(and:VM (not:VM (xor:VM (match_operand:VM 0 "vector_fusion_operand" 
"v,v,v,wa,v")
+  (match_operand:VM 1 "vector_fusion_operand" 
"v,v,v,wa,v")))
+ (not:VM (match_operand:VM 2 "vector_fusion_operand" 
"v,v,v,wa,v"
+   (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))]
   "(TARGET_P10_FUSION)"
   "@
vxor %3,%1,%0\;vnor %3,%3,%2
vxor %3,%1,%0\;vnor %3,%3,%2
vxor %3,%1,%0\;vnor %3,%3,%2
+   xxeval %x3,%x2,%x1,%x0,144
vxor %4,%1,%0\;vnor %3,%4,%2"
   [(set_attr "type" "fused_vector")
(set_attr "cost" "6")
-   (set_attr "length" "8")])
+   (set_attr "length" "8")
+   (set_attr "prefixed" "*,*,*,yes,*")
+   (set_attr "isa" "*,*,*,xxeval,*")])
 
 ;; logical-logical fusion pattern generated by gen_logical_addsubf
 ;; vector vand -> vor
diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl
index 98b56b788f03..d713d10a1dbc 100755
--- a/gcc/config/rs6000/genfusion.pl
+++ b/gcc/config/rs6000/genfusion.pl
@@ -241,6 +241,7 @@ sub gen_logical_addsubf
   "vor_vnor"=> 128,
   "vnor_vxor"   => 135,
   "vnor_vor"=> 143,
+  "vxor_vnor"   => 144,
 );
 
 KIND: foreach $kind ('scalar','vector') {

[gcc(refs/users/meissner/heads/work214-sha)] PR target/117251: Improve vector or to vector xor fusion

2025-07-08 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:2655d834eaa00bcddc76b58e32865a8e00bf3600

commit 2655d834eaa00bcddc76b58e32865a8e00bf3600
Author: Michael Meissner 
Date:   Wed Jul 9 01:46:23 2025 -0400

PR target/117251: Improve vector or to vector xor fusion

See the following post for a complete explanation of what the patches
for PR target/117251:

 * https://gcc.gnu.org/pipermail/gcc-patches/2025-June/686474.html

This is patch #22 of 45 to generate the 'XXEVAL' instruction on power10
and power11 instead of using the Altivec 'VOR' instruction feeding into
'VXOR'.  The 'XXEVAL' instruction can use all 64 vector registers,
instead of the 32 registers that traditional Altivec vector
instructions use.  By allowing all of the vector registers to be used,
it reduces the amount of spilling that a large benchmark generated.

Currently the following code:

vector int a, b, c, d;
a = (c | d) ^ b;

Generates:

vort,c,d
vxor   a,t,b

Now in addition with this patch, if the arguments or result is
allocated to a traditional FPR register, the GCC compiler will now
generate the following code instead of adding vector move instructions:

xxeval a,b,c,120

Since fusion using 2 Altivec instructions is slightly faster than using
the 'XXEVAL' instruction we prefer to generate the Altivec instructions
if we can.  In addition, because 'XXEVAL' is a prefixed instruction, it
possibly might generate an extra NOP instruction to align the 'XXEVAL'
instruction.

I have tested these patches on both big endian and little endian
PowerPC servers, with no regressions.  Can I check these patchs into
the trunk?

2025-07-09  Michael Meissner  

gcc/

PR target/117251
* config/rs6000/fusion.md: Regenerate.
* config/rs6000/genfusion.pl (gen_logical_addsubf): Add support
to generate vector or => xor fusion if XXEVAL is supported.

Diff:
---
 gcc/config/rs6000/fusion.md| 15 +--
 gcc/config/rs6000/genfusion.pl |  1 +
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md
index 032c87ac5765..d1f6a38b618a 100644
--- a/gcc/config/rs6000/fusion.md
+++ b/gcc/config/rs6000/fusion.md
@@ -3029,20 +3029,23 @@
 ;; logical-logical fusion pattern generated by gen_logical_addsubf
 ;; vector vor -> vxor
 (define_insn "*fuse_vor_vxor"
-  [(set (match_operand:VM 3 "altivec_register_operand" "=&0,&1,&v,v")
-(xor:VM (ior:VM (match_operand:VM 0 "altivec_register_operand" 
"v,v,v,v")
-  (match_operand:VM 1 "altivec_register_operand" 
"v,v,v,v"))
- (match_operand:VM 2 "altivec_register_operand" "v,v,v,v")))
-   (clobber (match_scratch:VM 4 "=X,X,X,&v"))]
+  [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v")
+(xor:VM (ior:VM (match_operand:VM 0 "vector_fusion_operand" 
"v,v,v,wa,v")
+  (match_operand:VM 1 "vector_fusion_operand" 
"v,v,v,wa,v"))
+ (match_operand:VM 2 "vector_fusion_operand" "v,v,v,wa,v")))
+   (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))]
   "(TARGET_P10_FUSION)"
   "@
vor %3,%1,%0\;vxor %3,%3,%2
vor %3,%1,%0\;vxor %3,%3,%2
vor %3,%1,%0\;vxor %3,%3,%2
+   xxeval %x3,%x2,%x1,%x0,120
vor %4,%1,%0\;vxor %3,%4,%2"
   [(set_attr "type" "fused_vector")
(set_attr "cost" "6")
-   (set_attr "length" "8")])
+   (set_attr "length" "8")
+   (set_attr "prefixed" "*,*,*,yes,*")
+   (set_attr "isa" "*,*,*,xxeval,*")])
 
 ;; logical-logical fusion pattern generated by gen_logical_addsubf
 ;; vector vorc -> vxor
diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl
index 6af4c5d7a182..97681f37d0fa 100755
--- a/gcc/config/rs6000/genfusion.pl
+++ b/gcc/config/rs6000/genfusion.pl
@@ -236,6 +236,7 @@ sub gen_logical_addsubf
   "vxor_vxor"   => 105,
   "vxor_vor"=> 111,
   "vnor_vnor"   => 112,
+  "vor_vxor"=> 120,
 );
 
 KIND: foreach $kind ('scalar','vector') {

[gcc(refs/users/meissner/heads/work214-sha)] PR target/117251: Improve vector eqv to vector or fusion

2025-07-08 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:831ba55445ca1f97103f68f09380b0f738c26c13

commit 831ba55445ca1f97103f68f09380b0f738c26c13
Author: Michael Meissner 
Date:   Wed Jul 9 01:49:31 2025 -0400

PR target/117251: Improve vector eqv to vector or fusion

See the following post for a complete explanation of what the patches
for PR target/117251:

 * https://gcc.gnu.org/pipermail/gcc-patches/2025-June/686474.html

This is patch #29 of 45 to generate the 'XXEVAL' instruction on power10
and power11 instead of using the Altivec 'VEQV' instruction feeding
into 'VOR'.  The 'XXEVAL' instruction can use all 64 vector registers,
instead of the 32 registers that traditional Altivec vector
instructions use.  By allowing all of the vector registers to be used,
it reduces the amount of spilling that a large benchmark generated.

Currently the following code:

vector int a, b, c, d;
a = (~ (c ^ d)) | b;

Generates:

veqv   t,c,d
vora,t,b

Now in addition with this patch, if the arguments or result is
allocated to a traditional FPR register, the GCC compiler will now
generate the following code instead of adding vector move instructions:

xxeval a,b,c,159

Since fusion using 2 Altivec instructions is slightly faster than using
the 'XXEVAL' instruction we prefer to generate the Altivec instructions
if we can.  In addition, because 'XXEVAL' is a prefixed instruction, it
possibly might generate an extra NOP instruction to align the 'XXEVAL'
instruction.

I have tested these patches on both big endian and little endian
PowerPC servers, with no regressions.  Can I check these patchs into
the trunk?

2025-07-09  Michael Meissner  

gcc/

PR target/117251
* config/rs6000/fusion.md: Regenerate.
* config/rs6000/genfusion.pl (gen_logical_addsubf): Add support
to generate vector eqv => or fusion if XXEVAL is supported.

Diff:
---
 gcc/config/rs6000/fusion.md| 15 +--
 gcc/config/rs6000/genfusion.pl |  1 +
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md
index bb62ae26445a..cb1ad8b4c0cc 100644
--- a/gcc/config/rs6000/fusion.md
+++ b/gcc/config/rs6000/fusion.md
@@ -2681,20 +2681,23 @@
 ;; logical-logical fusion pattern generated by gen_logical_addsubf
 ;; vector veqv -> vor
 (define_insn "*fuse_veqv_vor"
-  [(set (match_operand:VM 3 "altivec_register_operand" "=&0,&1,&v,v")
-(ior:VM (not:VM (xor:VM (match_operand:VM 0 "altivec_register_operand" 
"v,v,v,v")
-  (match_operand:VM 1 "altivec_register_operand" 
"v,v,v,v")))
- (match_operand:VM 2 "altivec_register_operand" "v,v,v,v")))
-   (clobber (match_scratch:VM 4 "=X,X,X,&v"))]
+  [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v")
+(ior:VM (not:VM (xor:VM (match_operand:VM 0 "vector_fusion_operand" 
"v,v,v,wa,v")
+  (match_operand:VM 1 "vector_fusion_operand" 
"v,v,v,wa,v")))
+ (match_operand:VM 2 "vector_fusion_operand" "v,v,v,wa,v")))
+   (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))]
   "(TARGET_P10_FUSION)"
   "@
veqv %3,%1,%0\;vor %3,%3,%2
veqv %3,%1,%0\;vor %3,%3,%2
veqv %3,%1,%0\;vor %3,%3,%2
+   xxeval %x3,%x2,%x1,%x0,159
veqv %4,%1,%0\;vor %3,%4,%2"
   [(set_attr "type" "fused_vector")
(set_attr "cost" "6")
-   (set_attr "length" "8")])
+   (set_attr "length" "8")
+   (set_attr "prefixed" "*,*,*,yes,*")
+   (set_attr "isa" "*,*,*,xxeval,*")])
 
 ;; logical-logical fusion pattern generated by gen_logical_addsubf
 ;; vector vnand -> vor
diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl
index 726e29c798bc..9400aed267a6 100755
--- a/gcc/config/rs6000/genfusion.pl
+++ b/gcc/config/rs6000/genfusion.pl
@@ -243,6 +243,7 @@ sub gen_logical_addsubf
   "vnor_vor"=> 143,
   "vxor_vnor"   => 144,
   "veqv_vxor"   => 150,
+  "veqv_vor"=> 159,
 );
 
 KIND: foreach $kind ('scalar','vector') {

[gcc(refs/users/meissner/heads/work214-sha)] PR target/117251: Improve vector nor to vector nand fusion

2025-07-08 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:5bf7b59b601520c417e4498c5322598d9e46c5de

commit 5bf7b59b601520c417e4498c5322598d9e46c5de
Author: Michael Meissner 
Date:   Wed Jul 9 01:54:11 2025 -0400

PR target/117251: Improve vector nor to vector nand fusion

See the following post for a complete explanation of what the patches
for PR target/117251:

 * https://gcc.gnu.org/pipermail/gcc-patches/2025-June/686474.html

This is patch #40 of 45 to generate the 'XXEVAL' instruction on power10
and power11 instead of using the Altivec 'VNOR' instruction feeding
into 'VNAND'.  The 'XXEVAL' instruction can use all 64 vector
registers, instead of the 32 registers that traditional Altivec vector
instructions use.  By allowing all of the vector registers to be used,
it reduces the amount of spilling that a large benchmark generated.

Currently the following code:

vector int a, b, c, d;
a = ~ ((~ (c | d)) & b);

Generates:

vnor   t,c,d
vnand  a,t,b

Now in addition with this patch, if the arguments or result is
allocated to a traditional FPR register, the GCC compiler will now
generate the following code instead of adding vector move instructions:

xxeval a,b,c,247

Since fusion using 2 Altivec instructions is slightly faster than using
the 'XXEVAL' instruction we prefer to generate the Altivec instructions
if we can.  In addition, because 'XXEVAL' is a prefixed instruction, it
possibly might generate an extra NOP instruction to align the 'XXEVAL'
instruction.

I have tested these patches on both big endian and little endian
PowerPC servers, with no regressions.  Can I check these patchs into
the trunk?

2025-07-09  Michael Meissner  

gcc/

PR target/117251
* config/rs6000/fusion.md: Regenerate.
* config/rs6000/genfusion.pl (gen_logical_addsubf): Add support
to generate vector nor => nand fusion if XXEVAL is supported.

Diff:
---
 gcc/config/rs6000/fusion.md| 15 +--
 gcc/config/rs6000/genfusion.pl |  1 +
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md
index c1be0e5ff8f1..01b7fda17ecc 100644
--- a/gcc/config/rs6000/fusion.md
+++ b/gcc/config/rs6000/fusion.md
@@ -2414,20 +2414,23 @@
 ;; logical-logical fusion pattern generated by gen_logical_addsubf
 ;; vector vnor -> vnand
 (define_insn "*fuse_vnor_vnand"
-  [(set (match_operand:VM 3 "altivec_register_operand" "=&0,&1,&v,v")
-(ior:VM (not:VM (and:VM (not:VM (match_operand:VM 0 
"altivec_register_operand" "v,v,v,v"))
-  (not:VM (match_operand:VM 1 
"altivec_register_operand" "v,v,v,v"
- (not:VM (match_operand:VM 2 "altivec_register_operand" 
"v,v,v,v"
-   (clobber (match_scratch:VM 4 "=X,X,X,&v"))]
+  [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v")
+(ior:VM (not:VM (and:VM (not:VM (match_operand:VM 0 
"vector_fusion_operand" "v,v,v,wa,v"))
+  (not:VM (match_operand:VM 1 "vector_fusion_operand" 
"v,v,v,wa,v"
+ (not:VM (match_operand:VM 2 "vector_fusion_operand" 
"v,v,v,wa,v"
+   (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))]
   "(TARGET_P10_FUSION)"
   "@
vnor %3,%1,%0\;vnand %3,%3,%2
vnor %3,%1,%0\;vnand %3,%3,%2
vnor %3,%1,%0\;vnand %3,%3,%2
+   xxeval %x3,%x2,%x1,%x0,247
vnor %4,%1,%0\;vnand %3,%4,%2"
   [(set_attr "type" "fused_vector")
(set_attr "cost" "6")
-   (set_attr "length" "8")])
+   (set_attr "length" "8")
+   (set_attr "prefixed" "*,*,*,yes,*")
+   (set_attr "isa" "*,*,*,xxeval,*")])
 
 ;; logical-logical fusion pattern generated by gen_logical_addsubf
 ;; vector vor -> vnand
diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl
index 4c70237d2d27..d4965b6df864 100755
--- a/gcc/config/rs6000/genfusion.pl
+++ b/gcc/config/rs6000/genfusion.pl
@@ -254,6 +254,7 @@ sub gen_logical_addsubf
   "vnand_vnand" => 241,
   "vorc_vnand"  => 244,
   "veqv_vnand"  => 246,
+  "vnor_vnand"  => 247,
 );
 
 KIND: foreach $kind ('scalar','vector') {

[gcc(refs/users/meissner/heads/work214-sha)] PR target/117251: Improve vector andc to vector nor fusion

2025-07-08 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:9fd9d57f92c07aaec9a14381f6f7f072be2e026f

commit 9fd9d57f92c07aaec9a14381f6f7f072be2e026f
Author: Michael Meissner 
Date:   Wed Jul 9 01:51:08 2025 -0400

PR target/117251: Improve vector andc to vector nor fusion

See the following post for a complete explanation of what the patches
for PR target/117251:

 * https://gcc.gnu.org/pipermail/gcc-patches/2025-June/686474.html

This is patch #32 of 45 to generate the 'XXEVAL' instruction on power10
and power11 instead of using the Altivec 'VANDC' instruction feeding
into 'VNOR'.  The 'XXEVAL' instruction can use all 64 vector registers,
instead of the 32 registers that traditional Altivec vector
instructions use.  By allowing all of the vector registers to be used,
it reduces the amount of spilling that a large benchmark generated.

Currently the following code:

vector int a, b, c, d;
a = ~ ((c & ~ d) | b);

Generates:

vandc  t,c,d
vnor   a,t,b

Now in addition with this patch, if the arguments or result is
allocated to a traditional FPR register, the GCC compiler will now
generate the following code instead of adding vector move instructions:

xxeval a,b,c,208

Since fusion using 2 Altivec instructions is slightly faster than using
the 'XXEVAL' instruction we prefer to generate the Altivec instructions
if we can.  In addition, because 'XXEVAL' is a prefixed instruction, it
possibly might generate an extra NOP instruction to align the 'XXEVAL'
instruction.

I have tested these patches on both big endian and little endian
PowerPC servers, with no regressions.  Can I check these patchs into
the trunk?

2025-07-09  Michael Meissner  

gcc/

PR target/117251
* config/rs6000/fusion.md: Regenerate.
* config/rs6000/genfusion.pl (gen_logical_addsubf): Add support
to generate vector andc => nor fusion if XXEVAL is supported.

Diff:
---
 gcc/config/rs6000/fusion.md| 15 +--
 gcc/config/rs6000/genfusion.pl |  1 +
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md
index f6dc26e9c1f2..dd8401d48228 100644
--- a/gcc/config/rs6000/fusion.md
+++ b/gcc/config/rs6000/fusion.md
@@ -2495,20 +2495,23 @@
 ;; logical-logical fusion pattern generated by gen_logical_addsubf
 ;; vector vandc -> vnor
 (define_insn "*fuse_vandc_vnor"
-  [(set (match_operand:VM 3 "altivec_register_operand" "=&0,&1,&v,v")
-(and:VM (not:VM (and:VM (not:VM (match_operand:VM 0 
"altivec_register_operand" "v,v,v,v"))
-  (match_operand:VM 1 "altivec_register_operand" 
"v,v,v,v")))
- (not:VM (match_operand:VM 2 "altivec_register_operand" 
"v,v,v,v"
-   (clobber (match_scratch:VM 4 "=X,X,X,&v"))]
+  [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v")
+(and:VM (not:VM (and:VM (not:VM (match_operand:VM 0 
"vector_fusion_operand" "v,v,v,wa,v"))
+  (match_operand:VM 1 "vector_fusion_operand" 
"v,v,v,wa,v")))
+ (not:VM (match_operand:VM 2 "vector_fusion_operand" 
"v,v,v,wa,v"
+   (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))]
   "(TARGET_P10_FUSION)"
   "@
vandc %3,%1,%0\;vnor %3,%3,%2
vandc %3,%1,%0\;vnor %3,%3,%2
vandc %3,%1,%0\;vnor %3,%3,%2
+   xxeval %x3,%x2,%x1,%x0,208
vandc %4,%1,%0\;vnor %3,%4,%2"
   [(set_attr "type" "fused_vector")
(set_attr "cost" "6")
-   (set_attr "length" "8")])
+   (set_attr "length" "8")
+   (set_attr "prefixed" "*,*,*,yes,*")
+   (set_attr "isa" "*,*,*,xxeval,*")])
 
 ;; logical-logical fusion pattern generated by gen_logical_addsubf
 ;; vector veqv -> vnor
diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl
index 62f2b9e36d89..d89e78d4da03 100755
--- a/gcc/config/rs6000/genfusion.pl
+++ b/gcc/config/rs6000/genfusion.pl
@@ -246,6 +246,7 @@ sub gen_logical_addsubf
   "veqv_vor"=> 159,
   "vorc_vxor"   => 180,
   "vorc_vor"=> 191,
+  "vandc_vnor"  => 208,
 );
 
 KIND: foreach $kind ('scalar','vector') {

[gcc(refs/users/meissner/heads/work214-sha)] PR target/117251: Improve vector nand to vector or fusion

2025-07-08 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:c739bdabd5aeb288f4f0f9f7bf894cee51b8bb97

commit c739bdabd5aeb288f4f0f9f7bf894cee51b8bb97
Author: Michael Meissner 
Date:   Wed Jul 9 01:52:44 2025 -0400

PR target/117251: Improve vector nand to vector or fusion

See the following post for a complete explanation of what the patches
for PR target/117251:

 * https://gcc.gnu.org/pipermail/gcc-patches/2025-June/686474.html

This is patch #36 of 45 to generate the 'XXEVAL' instruction on power10
and power11 instead of using the Altivec 'VNAND' instruction feeding
into 'VOR'.  The 'XXEVAL' instruction can use all 64 vector registers,
instead of the 32 registers that traditional Altivec vector
instructions use.  By allowing all of the vector registers to be used,
it reduces the amount of spilling that a large benchmark generated.

Currently the following code:

vector int a, b, c, d;
a = (~ (c & d)) | b;

Generates:

vnand  t,c,d
vora,t,b

Now in addition with this patch, if the arguments or result is
allocated to a traditional FPR register, the GCC compiler will now
generate the following code instead of adding vector move instructions:

xxeval a,b,c,239

Since fusion using 2 Altivec instructions is slightly faster than using
the 'XXEVAL' instruction we prefer to generate the Altivec instructions
if we can.  In addition, because 'XXEVAL' is a prefixed instruction, it
possibly might generate an extra NOP instruction to align the 'XXEVAL'
instruction.

I have tested these patches on both big endian and little endian
PowerPC servers, with no regressions.  Can I check these patchs into
the trunk?

2025-07-09  Michael Meissner  

gcc/

PR target/117251
* config/rs6000/fusion.md: Regenerate.
* config/rs6000/genfusion.pl (gen_logical_addsubf): Add support
to generate vector nand => or fusion if XXEVAL is supported.

Diff:
---
 gcc/config/rs6000/fusion.md| 15 +--
 gcc/config/rs6000/genfusion.pl |  1 +
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md
index e6d13b38415a..ba3a5a52b990 100644
--- a/gcc/config/rs6000/fusion.md
+++ b/gcc/config/rs6000/fusion.md
@@ -2711,20 +2711,23 @@
 ;; logical-logical fusion pattern generated by gen_logical_addsubf
 ;; vector vnand -> vor
 (define_insn "*fuse_vnand_vor"
-  [(set (match_operand:VM 3 "altivec_register_operand" "=&0,&1,&v,v")
-(ior:VM (ior:VM (not:VM (match_operand:VM 0 "altivec_register_operand" 
"v,v,v,v"))
-  (not:VM (match_operand:VM 1 
"altivec_register_operand" "v,v,v,v")))
- (match_operand:VM 2 "altivec_register_operand" "v,v,v,v")))
-   (clobber (match_scratch:VM 4 "=X,X,X,&v"))]
+  [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v")
+(ior:VM (ior:VM (not:VM (match_operand:VM 0 "vector_fusion_operand" 
"v,v,v,wa,v"))
+  (not:VM (match_operand:VM 1 "vector_fusion_operand" 
"v,v,v,wa,v")))
+ (match_operand:VM 2 "vector_fusion_operand" "v,v,v,wa,v")))
+   (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))]
   "(TARGET_P10_FUSION)"
   "@
vnand %3,%1,%0\;vor %3,%3,%2
vnand %3,%1,%0\;vor %3,%3,%2
vnand %3,%1,%0\;vor %3,%3,%2
+   xxeval %x3,%x2,%x1,%x0,239
vnand %4,%1,%0\;vor %3,%4,%2"
   [(set_attr "type" "fused_vector")
(set_attr "cost" "6")
-   (set_attr "length" "8")])
+   (set_attr "length" "8")
+   (set_attr "prefixed" "*,*,*,yes,*")
+   (set_attr "isa" "*,*,*,xxeval,*")])
 
 ;; logical-logical fusion pattern generated by gen_logical_addsubf
 ;; vector vnor -> vor
diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl
index 94eae471c64b..54699d199fc5 100755
--- a/gcc/config/rs6000/genfusion.pl
+++ b/gcc/config/rs6000/genfusion.pl
@@ -250,6 +250,7 @@ sub gen_logical_addsubf
   "vandc_veqv"  => 210,
   "vand_vnor"   => 224,
   "vnand_vxor"  => 225,
+  "vnand_vor"   => 239,
 );
 
 KIND: foreach $kind ('scalar','vector') {

[gcc(refs/users/meissner/heads/work214-sha)] PR target/117251: Improve vector eqv to vector nand fusion

2025-07-08 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:2f13fe598486b76e965a6615bc4276f0864af2b5

commit 2f13fe598486b76e965a6615bc4276f0864af2b5
Author: Michael Meissner 
Date:   Wed Jul 9 01:53:46 2025 -0400

PR target/117251: Improve vector eqv to vector nand fusion

See the following post for a complete explanation of what the patches
for PR target/117251:

 * https://gcc.gnu.org/pipermail/gcc-patches/2025-June/686474.html

This is patch #39 of 45 to generate the 'XXEVAL' instruction on power10
and power11 instead of using the Altivec 'VEQV' instruction feeding
into 'VNAND'.  The 'XXEVAL' instruction can use all 64 vector
registers, instead of the 32 registers that traditional Altivec vector
instructions use.  By allowing all of the vector registers to be used,
it reduces the amount of spilling that a large benchmark generated.

Currently the following code:

vector int a, b, c, d;
a = ~ ((~ (c ^ d)) & b);

Generates:

veqv   t,c,d
vnand  a,t,b

Now in addition with this patch, if the arguments or result is
allocated to a traditional FPR register, the GCC compiler will now
generate the following code instead of adding vector move instructions:

xxeval a,b,c,246

Since fusion using 2 Altivec instructions is slightly faster than using
the 'XXEVAL' instruction we prefer to generate the Altivec instructions
if we can.  In addition, because 'XXEVAL' is a prefixed instruction, it
possibly might generate an extra NOP instruction to align the 'XXEVAL'
instruction.

I have tested these patches on both big endian and little endian
PowerPC servers, with no regressions.  Can I check these patchs into
the trunk?

2025-07-09  Michael Meissner  

gcc/

PR target/117251
* config/rs6000/fusion.md: Regenerate.
* config/rs6000/genfusion.pl (gen_logical_addsubf): Add support
to generate vector eqv => nand fusion if XXEVAL is supported.

Diff:
---
 gcc/config/rs6000/fusion.md| 15 +--
 gcc/config/rs6000/genfusion.pl |  1 +
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md
index 96d8951049c9..c1be0e5ff8f1 100644
--- a/gcc/config/rs6000/fusion.md
+++ b/gcc/config/rs6000/fusion.md
@@ -2372,20 +2372,23 @@
 ;; logical-logical fusion pattern generated by gen_logical_addsubf
 ;; vector veqv -> vnand
 (define_insn "*fuse_veqv_vnand"
-  [(set (match_operand:VM 3 "altivec_register_operand" "=&0,&1,&v,v")
-(ior:VM (not:VM (not:VM (xor:VM (match_operand:VM 0 
"altivec_register_operand" "v,v,v,v")
-  (match_operand:VM 1 "altivec_register_operand" 
"v,v,v,v"
- (not:VM (match_operand:VM 2 "altivec_register_operand" 
"v,v,v,v"
-   (clobber (match_scratch:VM 4 "=X,X,X,&v"))]
+  [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v")
+(ior:VM (not:VM (not:VM (xor:VM (match_operand:VM 0 
"vector_fusion_operand" "v,v,v,wa,v")
+  (match_operand:VM 1 "vector_fusion_operand" 
"v,v,v,wa,v"
+ (not:VM (match_operand:VM 2 "vector_fusion_operand" 
"v,v,v,wa,v"
+   (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))]
   "(TARGET_P10_FUSION)"
   "@
veqv %3,%1,%0\;vnand %3,%3,%2
veqv %3,%1,%0\;vnand %3,%3,%2
veqv %3,%1,%0\;vnand %3,%3,%2
+   xxeval %x3,%x2,%x1,%x0,246
veqv %4,%1,%0\;vnand %3,%4,%2"
   [(set_attr "type" "fused_vector")
(set_attr "cost" "6")
-   (set_attr "length" "8")])
+   (set_attr "length" "8")
+   (set_attr "prefixed" "*,*,*,yes,*")
+   (set_attr "isa" "*,*,*,xxeval,*")])
 
 ;; logical-logical fusion pattern generated by gen_logical_addsubf
 ;; vector vnand -> vnand
diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl
index 77d3e999eb93..4c70237d2d27 100755
--- a/gcc/config/rs6000/genfusion.pl
+++ b/gcc/config/rs6000/genfusion.pl
@@ -253,6 +253,7 @@ sub gen_logical_addsubf
   "vnand_vor"   => 239,
   "vnand_vnand" => 241,
   "vorc_vnand"  => 244,
+  "veqv_vnand"  => 246,
 );
 
 KIND: foreach $kind ('scalar','vector') {

[gcc(refs/users/meissner/heads/work214-sha)] PR target/117251: Improve vector andc to vector nand fusion

2025-07-08 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:1f9583709bf7448144ef35f96d49ba7d0f995747

commit 1f9583709bf7448144ef35f96d49ba7d0f995747
Author: Michael Meissner 
Date:   Wed Jul 9 01:55:34 2025 -0400

PR target/117251: Improve vector andc to vector nand fusion

See the following post for a complete explanation of what the patches
for PR target/117251:

 * https://gcc.gnu.org/pipermail/gcc-patches/2025-June/686474.html

This is patch #43 of 45 to generate the 'XXEVAL' instruction on power10
and power11 instead of using the Altivec 'VANDC' instruction feeding
into 'VNAND'.  The 'XXEVAL' instruction can use all 64 vector
registers, instead of the 32 registers that traditional Altivec vector
instructions use.  By allowing all of the vector registers to be used,
it reduces the amount of spilling that a large benchmark generated.

Currently the following code:

vector int a, b, c, d;
a = ~ ((c & ~ d) & b);

Generates:

vandc  t,c,d
vnand  a,t,b

Now in addition with this patch, if the arguments or result is
allocated to a traditional FPR register, the GCC compiler will now
generate the following code instead of adding vector move instructions:

xxeval a,b,c,253

Since fusion using 2 Altivec instructions is slightly faster than using
the 'XXEVAL' instruction we prefer to generate the Altivec instructions
if we can.  In addition, because 'XXEVAL' is a prefixed instruction, it
possibly might generate an extra NOP instruction to align the 'XXEVAL'
instruction.

I have tested these patches on both big endian and little endian
PowerPC servers, with no regressions.  Can I check these patchs into
the trunk?

2025-07-09  Michael Meissner  

gcc/

PR target/117251
* config/rs6000/fusion.md: Regenerate.
* config/rs6000/genfusion.pl (gen_logical_addsubf): Add support
to generate vector andc => nand fusion if XXEVAL is supported.

Diff:
---
 gcc/config/rs6000/fusion.md| 15 +--
 gcc/config/rs6000/genfusion.pl |  1 +
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md
index e0f9ac17659a..129f7dfb26ed 100644
--- a/gcc/config/rs6000/fusion.md
+++ b/gcc/config/rs6000/fusion.md
@@ -2354,20 +2354,23 @@
 ;; logical-logical fusion pattern generated by gen_logical_addsubf
 ;; vector vandc -> vnand
 (define_insn "*fuse_vandc_vnand"
-  [(set (match_operand:VM 3 "altivec_register_operand" "=&0,&1,&v,v")
-(ior:VM (not:VM (and:VM (not:VM (match_operand:VM 0 
"altivec_register_operand" "v,v,v,v"))
-  (match_operand:VM 1 "altivec_register_operand" 
"v,v,v,v")))
- (not:VM (match_operand:VM 2 "altivec_register_operand" 
"v,v,v,v"
-   (clobber (match_scratch:VM 4 "=X,X,X,&v"))]
+  [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v")
+(ior:VM (not:VM (and:VM (not:VM (match_operand:VM 0 
"vector_fusion_operand" "v,v,v,wa,v"))
+  (match_operand:VM 1 "vector_fusion_operand" 
"v,v,v,wa,v")))
+ (not:VM (match_operand:VM 2 "vector_fusion_operand" 
"v,v,v,wa,v"
+   (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))]
   "(TARGET_P10_FUSION)"
   "@
vandc %3,%1,%0\;vnand %3,%3,%2
vandc %3,%1,%0\;vnand %3,%3,%2
vandc %3,%1,%0\;vnand %3,%3,%2
+   xxeval %x3,%x2,%x1,%x0,253
vandc %4,%1,%0\;vnand %3,%4,%2"
   [(set_attr "type" "fused_vector")
(set_attr "cost" "6")
-   (set_attr "length" "8")])
+   (set_attr "length" "8")
+   (set_attr "prefixed" "*,*,*,yes,*")
+   (set_attr "isa" "*,*,*,xxeval,*")])
 
 ;; logical-logical fusion pattern generated by gen_logical_addsubf
 ;; vector veqv -> vnand
diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl
index 5d22a0732df6..1d31c242042e 100755
--- a/gcc/config/rs6000/genfusion.pl
+++ b/gcc/config/rs6000/genfusion.pl
@@ -257,6 +257,7 @@ sub gen_logical_addsubf
   "vnor_vnand"  => 247,
   "vor_vnand"   => 248,
   "vxor_vnand"  => 249,
+  "vandc_vnand" => 253,
 );
 
 KIND: foreach $kind ('scalar','vector') {

[gcc(refs/users/meissner/heads/work214-sha)] PR target/117251: Improve vector and to vector nor fusion

2025-07-08 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:ad81d6531bf12016615d8538a0b8ed5958b18b5a

commit ad81d6531bf12016615d8538a0b8ed5958b18b5a
Author: Michael Meissner 
Date:   Wed Jul 9 01:55:54 2025 -0400

PR target/117251: Improve vector and to vector nor fusion

See the following post for a complete explanation of what the patches
for PR target/117251:

 * https://gcc.gnu.org/pipermail/gcc-patches/2025-June/686474.html

This is patch #34 of 45 to generate the 'XXEVAL' instruction on power10
and power11 instead of using the Altivec 'VAND' instruction feeding
into 'VNOR'.  The 'XXEVAL' instruction can use all 64 vector registers,
instead of the 32 registers that traditional Altivec vector
instructions use.  By allowing all of the vector registers to be used,
it reduces the amount of spilling that a large benchmark generated.

Currently the following code:

vector int a, b, c, d;
a = ~ ((c & d) | b);

Generates:

vand   t,c,d
vnor   a,t,b

Now in addition with this patch, if the arguments or result is
allocated to a traditional FPR register, the GCC compiler will now
generate the following code instead of adding vector move instructions:

xxeval a,b,c,224

Since fusion using 2 Altivec instructions is slightly faster than using
the 'XXEVAL' instruction we prefer to generate the Altivec instructions
if we can.  In addition, because 'XXEVAL' is a prefixed instruction, it
possibly might generate an extra NOP instruction to align the 'XXEVAL'
instruction.

I have tested these patches on both big endian and little endian
PowerPC servers, with no regressions.  Can I check these patchs into
the trunk?

2025-07-09  Michael Meissner  

gcc/

PR target/117251
* config/rs6000/fusion.md: Regenerate.
* config/rs6000/genfusion.pl (gen_logical_addsubf): Add support
to generate vector and => nor fusion if XXEVAL is supported.

Diff:
---
 gcc/config/rs6000/fusion.md| 15 +--
 gcc/config/rs6000/genfusion.pl |  1 +
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md
index 129f7dfb26ed..61d66129da65 100644
--- a/gcc/config/rs6000/fusion.md
+++ b/gcc/config/rs6000/fusion.md
@@ -2336,20 +2336,23 @@
 ;; logical-logical fusion pattern generated by gen_logical_addsubf
 ;; vector vand -> vnand
 (define_insn "*fuse_vand_vnand"
-  [(set (match_operand:VM 3 "altivec_register_operand" "=&0,&1,&v,v")
-(ior:VM (not:VM (and:VM (match_operand:VM 0 "altivec_register_operand" 
"v,v,v,v")
-  (match_operand:VM 1 "altivec_register_operand" 
"v,v,v,v")))
- (not:VM (match_operand:VM 2 "altivec_register_operand" 
"v,v,v,v"
-   (clobber (match_scratch:VM 4 "=X,X,X,&v"))]
+  [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v")
+(ior:VM (not:VM (and:VM (match_operand:VM 0 "vector_fusion_operand" 
"v,v,v,wa,v")
+  (match_operand:VM 1 "vector_fusion_operand" 
"v,v,v,wa,v")))
+ (not:VM (match_operand:VM 2 "vector_fusion_operand" 
"v,v,v,wa,v"
+   (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))]
   "(TARGET_P10_FUSION)"
   "@
vand %3,%1,%0\;vnand %3,%3,%2
vand %3,%1,%0\;vnand %3,%3,%2
vand %3,%1,%0\;vnand %3,%3,%2
+   xxeval %x3,%x2,%x1,%x0,254
vand %4,%1,%0\;vnand %3,%4,%2"
   [(set_attr "type" "fused_vector")
(set_attr "cost" "6")
-   (set_attr "length" "8")])
+   (set_attr "length" "8")
+   (set_attr "prefixed" "*,*,*,yes,*")
+   (set_attr "isa" "*,*,*,xxeval,*")])
 
 ;; logical-logical fusion pattern generated by gen_logical_addsubf
 ;; vector vandc -> vnand
diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl
index 1d31c242042e..9261dd369340 100755
--- a/gcc/config/rs6000/genfusion.pl
+++ b/gcc/config/rs6000/genfusion.pl
@@ -258,6 +258,7 @@ sub gen_logical_addsubf
   "vor_vnand"   => 248,
   "vxor_vnand"  => 249,
   "vandc_vnand" => 253,
+  "vand_vnand"  => 254,
 );
 
 KIND: foreach $kind ('scalar','vector') {

[gcc(refs/users/meissner/heads/work214-sha)] PR target/117251: Improve vector orc to vector nand fusion

2025-07-08 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:e8542b1a292ce063800aae477c76ba03b5736a36

commit e8542b1a292ce063800aae477c76ba03b5736a36
Author: Michael Meissner 
Date:   Wed Jul 9 01:53:28 2025 -0400

PR target/117251: Improve vector orc to vector nand fusion

See the following post for a complete explanation of what the patches
for PR target/117251:

 * https://gcc.gnu.org/pipermail/gcc-patches/2025-June/686474.html

This is patch #38 of 45 to generate the 'XXEVAL' instruction on power10
and power11 instead of using the Altivec 'VORC' instruction feeding
into 'VNAND'.  The 'XXEVAL' instruction can use all 64 vector
registers, instead of the 32 registers that traditional Altivec vector
instructions use.  By allowing all of the vector registers to be used,
it reduces the amount of spilling that a large benchmark generated.

Currently the following code:

vector int a, b, c, d;
a = ~ ((c | ~ d) & b);

Generates:

vorc   t,c,d
vnand  a,t,b

Now in addition with this patch, if the arguments or result is
allocated to a traditional FPR register, the GCC compiler will now
generate the following code instead of adding vector move instructions:

xxeval a,b,c,244

Since fusion using 2 Altivec instructions is slightly faster than using
the 'XXEVAL' instruction we prefer to generate the Altivec instructions
if we can.  In addition, because 'XXEVAL' is a prefixed instruction, it
possibly might generate an extra NOP instruction to align the 'XXEVAL'
instruction.

I have tested these patches on both big endian and little endian
PowerPC servers, with no regressions.  Can I check these patchs into
the trunk?

2025-07-09  Michael Meissner  

gcc/

PR target/117251
* config/rs6000/fusion.md: Regenerate.
* config/rs6000/genfusion.pl (gen_logical_addsubf): Add support
to generate vector orc => nand fusion if XXEVAL is supported.

Diff:
---
 gcc/config/rs6000/fusion.md| 15 +--
 gcc/config/rs6000/genfusion.pl |  1 +
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md
index 241b8a494fb1..96d8951049c9 100644
--- a/gcc/config/rs6000/fusion.md
+++ b/gcc/config/rs6000/fusion.md
@@ -2447,20 +2447,23 @@
 ;; logical-logical fusion pattern generated by gen_logical_addsubf
 ;; vector vorc -> vnand
 (define_insn "*fuse_vorc_vnand"
-  [(set (match_operand:VM 3 "altivec_register_operand" "=&0,&1,&v,v")
-(ior:VM (not:VM (ior:VM (not:VM (match_operand:VM 0 
"altivec_register_operand" "v,v,v,v"))
-  (match_operand:VM 1 "altivec_register_operand" 
"v,v,v,v")))
- (not:VM (match_operand:VM 2 "altivec_register_operand" 
"v,v,v,v"
-   (clobber (match_scratch:VM 4 "=X,X,X,&v"))]
+  [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v")
+(ior:VM (not:VM (ior:VM (not:VM (match_operand:VM 0 
"vector_fusion_operand" "v,v,v,wa,v"))
+  (match_operand:VM 1 "vector_fusion_operand" 
"v,v,v,wa,v")))
+ (not:VM (match_operand:VM 2 "vector_fusion_operand" 
"v,v,v,wa,v"
+   (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))]
   "(TARGET_P10_FUSION)"
   "@
vorc %3,%1,%0\;vnand %3,%3,%2
vorc %3,%1,%0\;vnand %3,%3,%2
vorc %3,%1,%0\;vnand %3,%3,%2
+   xxeval %x3,%x2,%x1,%x0,244
vorc %4,%1,%0\;vnand %3,%4,%2"
   [(set_attr "type" "fused_vector")
(set_attr "cost" "6")
-   (set_attr "length" "8")])
+   (set_attr "length" "8")
+   (set_attr "prefixed" "*,*,*,yes,*")
+   (set_attr "isa" "*,*,*,xxeval,*")])
 
 ;; logical-logical fusion pattern generated by gen_logical_addsubf
 ;; vector vxor -> vnand
diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl
index 728a447c65a9..77d3e999eb93 100755
--- a/gcc/config/rs6000/genfusion.pl
+++ b/gcc/config/rs6000/genfusion.pl
@@ -252,6 +252,7 @@ sub gen_logical_addsubf
   "vnand_vxor"  => 225,
   "vnand_vor"   => 239,
   "vnand_vnand" => 241,
+  "vorc_vnand"  => 244,
 );
 
 KIND: foreach $kind ('scalar','vector') {

[gcc(refs/users/meissner/heads/work214-sha)] PR target/117251: Improve vector andc to vector eqv fusion

2025-07-08 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:b14b34c56ef8da774c9e6352c39907dd7fec783b

commit b14b34c56ef8da774c9e6352c39907dd7fec783b
Author: Michael Meissner 
Date:   Wed Jul 9 01:51:28 2025 -0400

PR target/117251: Improve vector andc to vector eqv fusion

See the following post for a complete explanation of what the patches
for PR target/117251:

 * https://gcc.gnu.org/pipermail/gcc-patches/2025-June/686474.html

This is patch #33 of 45 to generate the 'XXEVAL' instruction on power10
and power11 instead of using the Altivec 'VANDC' instruction feeding
into 'VEQV'.  The 'XXEVAL' instruction can use all 64 vector registers,
instead of the 32 registers that traditional Altivec vector
instructions use.  By allowing all of the vector registers to be used,
it reduces the amount of spilling that a large benchmark generated.

Currently the following code:

vector int a, b, c, d;
a = ~ ((c & ~ d) ^ b);

Generates:

vandc  t,c,d
veqv   a,t,b

Now in addition with this patch, if the arguments or result is
allocated to a traditional FPR register, the GCC compiler will now
generate the following code instead of adding vector move instructions:

xxeval a,b,c,210

Since fusion using 2 Altivec instructions is slightly faster than using
the 'XXEVAL' instruction we prefer to generate the Altivec instructions
if we can.  In addition, because 'XXEVAL' is a prefixed instruction, it
possibly might generate an extra NOP instruction to align the 'XXEVAL'
instruction.

I have tested these patches on both big endian and little endian
PowerPC servers, with no regressions.  Can I check these patchs into
the trunk?

2025-07-09  Michael Meissner  

gcc/

PR target/117251
* config/rs6000/fusion.md: Regenerate.
* config/rs6000/genfusion.pl (gen_logical_addsubf): Add support
to generate vector andc => eqv fusion if XXEVAL is supported.

Diff:
---
 gcc/config/rs6000/fusion.md| 15 +--
 gcc/config/rs6000/genfusion.pl |  1 +
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md
index dd8401d48228..e3d9f7376a8d 100644
--- a/gcc/config/rs6000/fusion.md
+++ b/gcc/config/rs6000/fusion.md
@@ -2204,20 +2204,23 @@
 ;; logical-logical fusion pattern generated by gen_logical_addsubf
 ;; vector vandc -> veqv
 (define_insn "*fuse_vandc_veqv"
-  [(set (match_operand:VM 3 "altivec_register_operand" "=&0,&1,&v,v")
-(not:VM (xor:VM (and:VM (not:VM (match_operand:VM 0 
"altivec_register_operand" "v,v,v,v"))
-  (match_operand:VM 1 "altivec_register_operand" 
"v,v,v,v"))
- (match_operand:VM 2 "altivec_register_operand" "v,v,v,v"
-   (clobber (match_scratch:VM 4 "=X,X,X,&v"))]
+  [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v")
+(not:VM (xor:VM (and:VM (not:VM (match_operand:VM 0 
"vector_fusion_operand" "v,v,v,wa,v"))
+  (match_operand:VM 1 "vector_fusion_operand" 
"v,v,v,wa,v"))
+ (match_operand:VM 2 "vector_fusion_operand" "v,v,v,wa,v"
+   (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))]
   "(TARGET_P10_FUSION)"
   "@
vandc %3,%1,%0\;veqv %3,%3,%2
vandc %3,%1,%0\;veqv %3,%3,%2
vandc %3,%1,%0\;veqv %3,%3,%2
+   xxeval %x3,%x2,%x1,%x0,210
vandc %4,%1,%0\;veqv %3,%4,%2"
   [(set_attr "type" "fused_vector")
(set_attr "cost" "6")
-   (set_attr "length" "8")])
+   (set_attr "length" "8")
+   (set_attr "prefixed" "*,*,*,yes,*")
+   (set_attr "isa" "*,*,*,xxeval,*")])
 
 ;; logical-logical fusion pattern generated by gen_logical_addsubf
 ;; vector veqv -> veqv
diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl
index d89e78d4da03..3a603eb09675 100755
--- a/gcc/config/rs6000/genfusion.pl
+++ b/gcc/config/rs6000/genfusion.pl
@@ -247,6 +247,7 @@ sub gen_logical_addsubf
   "vorc_vxor"   => 180,
   "vorc_vor"=> 191,
   "vandc_vnor"  => 208,
+  "vandc_veqv"  => 210,
 );
 
 KIND: foreach $kind ('scalar','vector') {

[gcc(refs/users/meissner/heads/work214-sha)] PR target/117251: Improve vector xor to vector nand fusion

2025-07-08 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:3928830b0fa97afc8f2ca54b7a9b203c8502dc3a

commit 3928830b0fa97afc8f2ca54b7a9b203c8502dc3a
Author: Michael Meissner 
Date:   Wed Jul 9 01:55:11 2025 -0400

PR target/117251: Improve vector xor to vector nand fusion

See the following post for a complete explanation of what the patches
for PR target/117251:

 * https://gcc.gnu.org/pipermail/gcc-patches/2025-June/686474.html

This is patch #42 of 45 to generate the 'XXEVAL' instruction on power10
and power11 instead of using the Altivec 'VXOR' instruction feeding
into 'VNAND'.  The 'XXEVAL' instruction can use all 64 vector
registers, instead of the 32 registers that traditional Altivec vector
instructions use.  By allowing all of the vector registers to be used,
it reduces the amount of spilling that a large benchmark generated.

Currently the following code:

vector int a, b, c, d;
a = ~ ((c ^ d) & b);

Generates:

vxor   t,c,d
vnand  a,t,b

Now in addition with this patch, if the arguments or result is
allocated to a traditional FPR register, the GCC compiler will now
generate the following code instead of adding vector move instructions:

xxeval a,b,c,249

Since fusion using 2 Altivec instructions is slightly faster than using
the 'XXEVAL' instruction we prefer to generate the Altivec instructions
if we can.  In addition, because 'XXEVAL' is a prefixed instruction, it
possibly might generate an extra NOP instruction to align the 'XXEVAL'
instruction.

I have tested these patches on both big endian and little endian
PowerPC servers, with no regressions.  Can I check these patchs into
the trunk?

2025-07-09  Michael Meissner  

gcc/

PR target/117251
* config/rs6000/fusion.md: Regenerate.
* config/rs6000/genfusion.pl (gen_logical_addsubf): Add support
to generate vector xor => nand fusion if XXEVAL is supported.

Diff:
---
 gcc/config/rs6000/fusion.md| 15 +--
 gcc/config/rs6000/genfusion.pl |  1 +
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md
index 39b586918c17..e0f9ac17659a 100644
--- a/gcc/config/rs6000/fusion.md
+++ b/gcc/config/rs6000/fusion.md
@@ -2477,20 +2477,23 @@
 ;; logical-logical fusion pattern generated by gen_logical_addsubf
 ;; vector vxor -> vnand
 (define_insn "*fuse_vxor_vnand"
-  [(set (match_operand:VM 3 "altivec_register_operand" "=&0,&1,&v,v")
-(ior:VM (not:VM (xor:VM (match_operand:VM 0 "altivec_register_operand" 
"v,v,v,v")
-  (match_operand:VM 1 "altivec_register_operand" 
"v,v,v,v")))
- (not:VM (match_operand:VM 2 "altivec_register_operand" 
"v,v,v,v"
-   (clobber (match_scratch:VM 4 "=X,X,X,&v"))]
+  [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v")
+(ior:VM (not:VM (xor:VM (match_operand:VM 0 "vector_fusion_operand" 
"v,v,v,wa,v")
+  (match_operand:VM 1 "vector_fusion_operand" 
"v,v,v,wa,v")))
+ (not:VM (match_operand:VM 2 "vector_fusion_operand" 
"v,v,v,wa,v"
+   (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))]
   "(TARGET_P10_FUSION)"
   "@
vxor %3,%1,%0\;vnand %3,%3,%2
vxor %3,%1,%0\;vnand %3,%3,%2
vxor %3,%1,%0\;vnand %3,%3,%2
+   xxeval %x3,%x2,%x1,%x0,249
vxor %4,%1,%0\;vnand %3,%4,%2"
   [(set_attr "type" "fused_vector")
(set_attr "cost" "6")
-   (set_attr "length" "8")])
+   (set_attr "length" "8")
+   (set_attr "prefixed" "*,*,*,yes,*")
+   (set_attr "isa" "*,*,*,xxeval,*")])
 
 ;; logical-logical fusion pattern generated by gen_logical_addsubf
 ;; vector vand -> vnor
diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl
index 86bca81286ca..5d22a0732df6 100755
--- a/gcc/config/rs6000/genfusion.pl
+++ b/gcc/config/rs6000/genfusion.pl
@@ -256,6 +256,7 @@ sub gen_logical_addsubf
   "veqv_vnand"  => 246,
   "vnor_vnand"  => 247,
   "vor_vnand"   => 248,
+  "vxor_vnand"  => 249,
 );
 
 KIND: foreach $kind ('scalar','vector') {

[gcc(refs/users/meissner/heads/work214-sha)] Update ChangeLog.*

2025-07-08 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:8916191afc59ba6583fe065b828c75175cf10ffb

commit 8916191afc59ba6583fe065b828c75175cf10ffb
Author: Michael Meissner 
Date:   Wed Jul 9 02:03:34 2025 -0400

Update ChangeLog.*

Diff:
---
 gcc/ChangeLog.sha | 2328 +
 1 file changed, 2328 insertions(+)

diff --git a/gcc/ChangeLog.sha b/gcc/ChangeLog.sha
index 7d7ad9d46b77..cb36b080f21b 100644
--- a/gcc/ChangeLog.sha
+++ b/gcc/ChangeLog.sha
@@ -1,3 +1,2331 @@
+ Branch work214-sha, patch #345 
+
+PR target/117251: Add tests
+
+This is patch #45 of 45 to generate the 'XXEVAL' instruction on power10
+and power11 instead of using the Altivec 'VAND' instruction feeding
+into 'VNAND'.  The 'XXEVAL' instruction can use all 64 vector
+registers, instead of the 32 registers that traditional Altivec vector
+instructions use.  By allowing all of the vector registers to be used,
+it reduces the amount of spilling that a large benchmark generated.
+
+This patch adds the tests for generating 'XXEVAL' to the testsuite.
+
+I have tested these patches on both big endian and little endian
+PowerPC servers, with no regressions.  Can I check these patchs into
+the trunk?
+
+2025-07-09  Michael Meissner  
+
+gcc/testsuite/
+
+   PR target/117251
+   * gcc.target/powerpc/p10-vector-fused-1.c: New test.
+   * gcc.target/powerpc/p10-vector-fused-2.c: Likewise.
+
+
+ Branch work214-sha, patch #344 
+
+PR target/117251: Improve vector and to vector nand fusion
+
+See the following post for a complete explanation of what the patches
+for PR target/117251:
+
+ * https://gcc.gnu.org/pipermail/gcc-patches/2025-June/686474.html
+
+This is patch #44 of 45 to generate the 'XXEVAL' instruction on power10
+and power11 instead of using the Altivec 'VAND' instruction feeding
+into 'VNAND'.  The 'XXEVAL' instruction can use all 64 vector
+registers, instead of the 32 registers that traditional Altivec vector
+instructions use.  By allowing all of the vector registers to be used,
+it reduces the amount of spilling that a large benchmark generated.
+
+Currently the following code:
+
+   vector int a, b, c, d;
+   a = ~ ((c & d) & b);
+
+Generates:
+
+   vand   t,c,d
+   vnand  a,t,b
+
+Now in addition with this patch, if the arguments or result is
+allocated to a traditional FPR register, the GCC compiler will now
+generate the following code instead of adding vector move instructions:
+
+   xxeval a,b,c,254
+
+Since fusion using 2 Altivec instructions is slightly faster than using
+the 'XXEVAL' instruction we prefer to generate the Altivec instructions
+if we can.  In addition, because 'XXEVAL' is a prefixed instruction, it
+possibly might generate an extra NOP instruction to align the 'XXEVAL'
+instruction.
+
+I have tested these patches on both big endian and little endian
+PowerPC servers, with no regressions.  Can I check these patchs into
+the trunk?
+
+2025-07-09  Michael Meissner  
+
+gcc/
+
+   PR target/117251
+   * config/rs6000/fusion.md: Regenerate.
+   * config/rs6000/genfusion.pl (gen_logical_addsubf): Add support
+   to generate vector and => nand fusion if XXEVAL is supported.
+
+
+ Branch work214-sha, patch #343 
+
+PR target/117251: Improve vector andc to vector nand fusion
+
+See the following post for a complete explanation of what the patches
+for PR target/117251:
+
+ * https://gcc.gnu.org/pipermail/gcc-patches/2025-June/686474.html
+
+This is patch #43 of 45 to generate the 'XXEVAL' instruction on power10
+and power11 instead of using the Altivec 'VANDC' instruction feeding
+into 'VNAND'.  The 'XXEVAL' instruction can use all 64 vector
+registers, instead of the 32 registers that traditional Altivec vector
+instructions use.  By allowing all of the vector registers to be used,
+it reduces the amount of spilling that a large benchmark generated.
+
+Currently the following code:
+
+   vector int a, b, c, d;
+   a = ~ ((c & ~ d) & b);
+
+Generates:
+
+   vandc  t,c,d
+   vnand  a,t,b
+
+Now in addition with this patch, if the arguments or result is
+allocated to a traditional FPR register, the GCC compiler will now
+generate the following code instead of adding vector move instructions:
+
+   xxeval a,b,c,253
+
+Since fusion using 2 Altivec instructions is slightly faster than using
+the 'XXEVAL' instruction we prefer to generate the Altivec instructions
+if we can.  In addition, because 'XXEVAL' is a prefixed instruction, it
+possibly might generate an extra NOP instruction to align the 'XXEVAL'
+instruction.
+
+I have tested these patches on both big endian and little endian
+PowerPC servers, with no regressions.  Can I check these patchs into
+the trunk?
+
+2025-07-09  Michael Meissner  
+
+gcc/
+
+   PR target/117251
+   * config/rs6000/fusion.md: Regenerate.
+   * config/rs6000/genfusion.pl (gen_logica

[gcc r15-9940] tree-optimization/120927 - 510.parest_r segfault with masked epilog

2025-07-08 Thread Richard Biener via Gcc-cvs

https://gcc.gnu.org/g:b8599692a336b29851bdc5d8506a51d57521595c

commit r15-9940-gb8599692a336b29851bdc5d8506a51d57521595c
Author: Richard Biener 
Date:   Thu Jul 3 14:39:22 2025 +0200

tree-optimization/120927 - 510.parest_r segfault with masked epilog

The following fixes bad alignment computaton for epilog vectorization
when as in this case for 510.parest_r and masked epilog vectorization
with AVX512 we end up choosing AVX to vectorize the main loop and
masked AVX512 (sic!) to vectorize the epilog.  In that case alignment
analysis for the epilog tries to force alignment of the base to 64,
but that cannot possibly help the epilog when the main loop had used
a vector mode with smaller alignment requirement.

There's another issue, that the check whether the step preserves
alignment needs to consider possibly previously involved VFs
(here, the main loops smaller VF) as well.

These might not be the only case with problems for such a mode mix
but at least there it seems wise to never use DR alignment forcing
when analyzing an epilog.

We get to chose this mode setup because the iteration over epilog
modes doesn't prevent this, the maybe_ge (cached_vf_per_mode[0],
first_vinfo_vf) skip is conditional on !supports_partial_vectors
and it is also conditional on having a cached VF.  Further nothing
in vect_analyze_loop_1 rejects this setup - it might be conceivable
that a target can do masking only for larger modes.  There is a
second reason we end up with this mode setup, which is that
vect_need_peeling_or_partial_vectors_p says we do not need
peeling or partial vectors when analyzing the main loop with
AVX512 (if it would say so we'd have chosen a masked AVX512
epilog-only vectorization).  It does that because it looks at
LOOP_VINFO_COST_MODEL_THRESHOLD (which is not yet computed, so
always zero at this point), and compares max_niter (5) against
the VF (8), but not with equality as the comment says but with
greater.  This also needs looking at, PR120939.

PR tree-optimization/120927
* tree-vect-data-refs.cc (vect_compute_data_ref_alignment):
Do not force a DRs base alignment when analyzing an
epilog loop.  Check whether the step preserves alignment
for all VFs possibly involved sofar.

* gcc.dg/vect/vect-pr120927.c: New testcase.
* gcc.dg/vect/vect-pr120927-2.c: Likewise.

(cherry picked from commit 918f4517564c2cf7e5bb907428d5413742bee56f)

Diff:
---
 gcc/testsuite/gcc.dg/vect/vect-pr120927-2.c | 24 
 gcc/testsuite/gcc.dg/vect/vect-pr120927.c   | 24 
 gcc/tree-vect-data-refs.cc  | 16 
 3 files changed, 60 insertions(+), 4 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/vect/vect-pr120927-2.c 
b/gcc/testsuite/gcc.dg/vect/vect-pr120927-2.c
new file mode 100644
index ..e38cebeb9201
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-pr120927-2.c
@@ -0,0 +1,24 @@
+/* { dg-additional-options "--param vect-partial-vector-usage=1" } */
+/* { dg-additional-options "-mavx512bw -mavx512vl" { target avx512f_runtime } 
} */
+
+#include "tree-vect.h"
+
+static const double __attribute__((aligned(__BIGGEST_ALIGNMENT__))) a[] = { 
1., 2., 3., 4., 5. };
+
+void __attribute__((noipa))
+foo (double *b, double *bp, double c, int n)
+{
+  for (int i = 0; i < n; ++i)
+b[i] = bp[i] = a[i] * c;
+}
+
+int main()
+{
+  double b[6], bp[6];
+  b[5] = bp[5] = 13.;
+  check_vect ();
+  foo (b, bp, 3., 5);
+  if (b[5] != 13. || bp[5] != 13.)
+abort ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/vect/vect-pr120927.c 
b/gcc/testsuite/gcc.dg/vect/vect-pr120927.c
new file mode 100644
index ..793593f758f2
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-pr120927.c
@@ -0,0 +1,24 @@
+/* { dg-additional-options "--param vect-partial-vector-usage=1" } */
+/* { dg-additional-options "-mavx512bw -mavx512vl" { target avx512f_runtime } 
} */
+
+#include "tree-vect.h"
+
+static const double a[] = { 1., 2., 3., 4., 5. };
+
+void __attribute__((noipa))
+foo (double *b, double *bp, double c, int n)
+{
+  for (int i = 0; i < n; ++i)
+b[i] = bp[i] = a[i] * c;
+}
+
+int main()
+{
+  double b[6], bp[6];
+  b[5] = bp[5] = 13.;
+  check_vect ();
+  foo (b, bp, 3., 5);
+  if (b[5] != 13. || bp[5] != 13.)
+abort ();
+  return 0;
+}
diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc
index 4ca9ab73d690..85145f94516a 100644
--- a/gcc/tree-vect-data-refs.cc
+++ b/gcc/tree-vect-data-refs.cc
@@ -1410,10 +1410,17 @@ vect_compute_data_ref_alignment (vec_info *vinfo, 
dr_vec_info *dr_info,
   /* We can only use base and misalignment information relative to
 an innermost loop if the misalignment stays the same throughout the
 execution of the loop.  As above, this is the case if the s

[gcc r15-9941] tree-optimization/120817 - bogus DSE of .MASK_STORE

2025-07-08 Thread Richard Biener via Gcc-cvs

https://gcc.gnu.org/g:0ebeed53983dbcefcf7b950895c9d88c85342cf4

commit r15-9941-g0ebeed53983dbcefcf7b950895c9d88c85342cf4
Author: Richard Biener 
Date:   Mon Jul 7 09:56:50 2025 +0200

tree-optimization/120817 - bogus DSE of .MASK_STORE

DSE used ao_ref_init_from_ptr_and_size for .MASK_STORE but
alias-analysis will use the specified size to disambiguate
against smaller objects.  For .MASK_STORE we instead have to
make the access size unspecified but we can still constrain
the access extent based on the maximum size possible.

PR tree-optimization/120817
* tree-ssa-dse.cc (initialize_ao_ref_for_dse): Use
ao_ref_init_from_ptr_and_range with unknown size for
.MASK_STORE and .MASK_LEN_STORE.

* gcc.dg/vect/pr120817.c: New testcase.

(cherry picked from commit 439b14e222571da76da2bfec04b9035fb9f1862d)

Diff:
---
 gcc/testsuite/gcc.dg/vect/pr120817.c | 40 
 gcc/tree-ssa-dse.cc  |  8 
 2 files changed, 44 insertions(+), 4 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/vect/pr120817.c 
b/gcc/testsuite/gcc.dg/vect/pr120817.c
new file mode 100644
index ..d8f55c9b98d2
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr120817.c
@@ -0,0 +1,40 @@
+/* { dg-additional-options "-O1" } */
+/* { dg-additional-options "-mcpu=neoverse-n2" { target aarch64*-*-* } } */
+
+#include "tree-vect.h"
+
+typedef struct {
+int _M_current;
+} __normal_iterator;
+
+typedef struct {
+char _M_elems[5];
+} array_5;
+
+__normal_iterator __trans_tmp_1 = {-5};
+
+__attribute__((noipa))
+array_5 copySourceIntoTarget() {
+array_5 target;
+char* target_it = target._M_elems;
+
+while (__trans_tmp_1._M_current != 0) {
+*target_it = 1;
+__trans_tmp_1._M_current++;
+target_it++;
+}
+
+return target;
+}
+
+int main ()
+{
+  check_vect ();
+
+  array_5 res = copySourceIntoTarget();
+
+#pragma GCC novector
+  for (int i = 0; i < 5; i++)
+if (res._M_elems[i] != 1)
+  __builtin_abort ();
+}
diff --git a/gcc/tree-ssa-dse.cc b/gcc/tree-ssa-dse.cc
index bc632e384841..215f5c3e2869 100644
--- a/gcc/tree-ssa-dse.cc
+++ b/gcc/tree-ssa-dse.cc
@@ -181,10 +181,10 @@ initialize_ao_ref_for_dse (gimple *stmt, ao_ref *write, 
bool may_def_ok = false)
   can provide a may-def variant.  */
if (may_def_ok)
  {
-   ao_ref_init_from_ptr_and_size (
- write, gimple_call_arg (stmt, 0),
- TYPE_SIZE_UNIT (
-   TREE_TYPE (gimple_call_arg (stmt, stored_value_index;
+   ao_ref_init_from_ptr_and_range (
+ write, gimple_call_arg (stmt, 0), true, 0, -1,
+ tree_to_poly_int64 (TYPE_SIZE (
+   TREE_TYPE (gimple_call_arg (stmt, stored_value_index);
return true;
  }
break;

[gcc r15-9943] tree-optimization/118669 - fixup wrongly aligned loads/stores

2025-07-08 Thread Richard Biener via Gcc-cvs

https://gcc.gnu.org/g:c625bc9c7c294ef2851ae42d4a5b6cc899fecb5e

commit r15-9943-gc625bc9c7c294ef2851ae42d4a5b6cc899fecb5e
Author: Richard Biener 
Date:   Wed Jul 2 09:30:05 2025 +0200

tree-optimization/118669 - fixup wrongly aligned loads/stores

The vectorizer tracks alignment of datarefs with dr_aligned
and dr_unaligned_supported but that's aligned with respect to
the target alignment which can be less aligned than the mode
used for the access.  The following fixes this discrepancy
for vectorizing loads and stores.  The issue is visible for
aarch64 SVE and risc-v where VLA vector modes have larger than
element alignment but the target handles element alignment
just fine.

PR tree-optimization/118669
* tree-vect-stmts.cc (vectorizable_load): Emit loads
with proper (element) alignment.
(vectorizable_store): Likewise.

(cherry picked from commit 37bf13adcda564dfdb28c3aa736f2cac71c73d09)

Diff:
---
 gcc/tree-vect-stmts.cc | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 978a4626b35b..89ac5f611276 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -10062,7 +10062,8 @@ vectorizable_store (vec_info *vinfo,
= fold_build2 (MEM_REF, vectype, dataref_ptr,
   dataref_offset ? dataref_offset
  : build_int_cst (ref_type, 0));
- if (alignment_support_scheme == dr_aligned)
+ if (alignment_support_scheme == dr_aligned
+ && align >= TYPE_ALIGN_UNIT (vectype))
;
  else
TREE_TYPE (data_ref)
@@ -12254,7 +12255,8 @@ vectorizable_load (vec_info *vinfo,
  {
data_ref
  = fold_build2 (MEM_REF, ltype, dataref_ptr, offset);
-   if (alignment_support_scheme == dr_aligned)
+   if (alignment_support_scheme == dr_aligned
+   && align >= TYPE_ALIGN_UNIT (ltype))
  ;
else
  TREE_TYPE (data_ref)

[gcc r15-9939] tree-optimization/120944 - bogus VN with volatile copies

2025-07-08 Thread Richard Biener via Gcc-cvs

https://gcc.gnu.org/g:2c23368ed910a911e72af5decfc39bef11a9efac

commit r15-9939-g2c23368ed910a911e72af5decfc39bef11a9efac
Author: Richard Biener 
Date:   Fri Jul 4 09:08:19 2025 +0200

tree-optimization/120944 - bogus VN with volatile copies

The following avoids translating expressions through volatile
copies.

PR tree-optimization/120944
* tree-ssa-sccvn.cc (vn_reference_lookup_3): Gate optimizations
invalid when volatile is involved.

* gcc.dg/torture/pr120944.c: New testcase.

(cherry picked from commit 6ed1e2ae1a742d859c2dd74c9e7cebdd3618e8b1)

Diff:
---
 gcc/testsuite/gcc.dg/torture/pr120944.c | 34 +
 gcc/tree-ssa-sccvn.cc   |  9 +++--
 2 files changed, 41 insertions(+), 2 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/torture/pr120944.c 
b/gcc/testsuite/gcc.dg/torture/pr120944.c
new file mode 100644
index ..92f3c7749963
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/pr120944.c
@@ -0,0 +1,34 @@
+/* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */
+/* { dg-additional-options "-fdump-tree-optimized" } */
+
+#include 
+
+typedef union {
+  int u32;
+  struct
+  {
+ int A:1;
+ int B:2;
+ int C:3;
+  };
+} u_t;
+
+typedef union {
+   volatile int u[3];
+   volatile struct {
+u_t a;
+int b;
+int c;
+   };
+} DATA;
+
+void foo (volatile DATA *d)
+{
+ d->a.u32 = ~0;
+ u_t u = d->a;
+ int v = u.A;
+ if (v)
+abort();
+}
+
+/* { dg-final { scan-tree-dump-times "if \\\(" 1 "optimized" } } */
diff --git a/gcc/tree-ssa-sccvn.cc b/gcc/tree-ssa-sccvn.cc
index f3bc6dbebe13..7f2500e5d591 100644
--- a/gcc/tree-ssa-sccvn.cc
+++ b/gcc/tree-ssa-sccvn.cc
@@ -2809,7 +2809,8 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void 
*data_,
  we find a VN result with exactly the same value as the
 possible clobber.  In this case we can ignore the clobber
 and return the found value.  */
-  if (is_gimple_reg_type (TREE_TYPE (lhs))
+  if (!gimple_has_volatile_ops (def_stmt)
+ && is_gimple_reg_type (TREE_TYPE (lhs))
  && types_compatible_p (TREE_TYPE (lhs), vr->type)
  && (ref->ref || data->orig_ref.ref)
  && !data->mask
@@ -3093,7 +3094,8 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void 
*data_,
   else if (is_gimple_reg_type (vr->type)
   && gimple_assign_single_p (def_stmt)
   && gimple_assign_rhs_code (def_stmt) == CONSTRUCTOR
-  && CONSTRUCTOR_NELTS (gimple_assign_rhs1 (def_stmt)) == 0)
+  && CONSTRUCTOR_NELTS (gimple_assign_rhs1 (def_stmt)) == 0
+  && !TREE_THIS_VOLATILE (gimple_assign_lhs (def_stmt)))
 {
   tree base2;
   poly_int64 offset2, size2, maxsize2;
@@ -3149,6 +3151,7 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void 
*data_,
   && !reverse_storage_order_for_component_p (vr->operands)
   && !contains_storage_order_barrier_p (vr->operands)
   && gimple_assign_single_p (def_stmt)
+  && !TREE_THIS_VOLATILE (gimple_assign_lhs (def_stmt))
   && CHAR_BIT == 8
   && BITS_PER_UNIT == 8
   && BYTES_BIG_ENDIAN == WORDS_BIG_ENDIAN
@@ -3307,6 +3310,7 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void 
*data_,
   && !reverse_storage_order_for_component_p (vr->operands)
   && !contains_storage_order_barrier_p (vr->operands)
   && gimple_assign_single_p (def_stmt)
+  && !TREE_THIS_VOLATILE (gimple_assign_lhs (def_stmt))
   && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME)
 {
   tree lhs = gimple_assign_lhs (def_stmt);
@@ -3518,6 +3522,7 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void 
*data_,
  the copy kills ref.  */
   else if (data->vn_walk_kind == VN_WALKREWRITE
   && gimple_assign_single_p (def_stmt)
+  && !gimple_has_volatile_ops (def_stmt)
   && (DECL_P (gimple_assign_rhs1 (def_stmt))
   || TREE_CODE (gimple_assign_rhs1 (def_stmt)) == MEM_REF
   || handled_component_p (gimple_assign_rhs1 (def_stmt

[gcc r15-9944] tree-optimization/120358 - bogus PTA with structure access

2025-07-08 Thread Richard Biener via Gcc-cvs

https://gcc.gnu.org/g:0f1e4dd1f9354ea962113e066152d0a77209f732

commit r15-9944-g0f1e4dd1f9354ea962113e066152d0a77209f732
Author: Richard Biener 
Date:   Mon Jul 7 15:13:38 2025 +0200

tree-optimization/120358 - bogus PTA with structure access

When we compute the constraint for something like
MEM[(const struct QStringView &)&tok2 + 32] we go and compute
what (const struct QStringView &)&tok2 + 32 points to and then
add subvariables to its dereference that possibly fall in the
range of the access according to the original refs size.  In
doing that we disregarded that the subvariable the starting
address points to might not be aligned to it and thus the
access might start at any point within that variable.  The following
conservatively adjusts the pruning of adjacent sub-variables to
honor this.

PR tree-optimization/120358
* tree-ssa-structalias.cc (get_constraint_for_1): Adjust
pruning of sub-variables according to the imprecise
known start offset.

(cherry picked from commit aa5ae523e84a97bf3a582ea0fa73d959afa9b9c7)

Diff:
---
 gcc/tree-ssa-structalias.cc | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/gcc/tree-ssa-structalias.cc b/gcc/tree-ssa-structalias.cc
index deca44ae0bf3..0215243d5be9 100644
--- a/gcc/tree-ssa-structalias.cc
+++ b/gcc/tree-ssa-structalias.cc
@@ -3690,7 +3690,10 @@ get_constraint_for_1 (tree t, vec *results, bool 
address_p,
size = -1;
  for (; curr; curr = vi_next (curr))
{
- if (curr->offset - vi->offset < size)
+ /* The start of the access might happen anywhere
+within vi, so conservatively assume it was
+at its end.  */
+ if (curr->offset - (vi->offset + vi->size - 1) < size)
{
  cs.var = curr->id;
  results->safe_push (cs);

[gcc r15-9942] testsuite: add sve hw check to testcase [PR120817]

2025-07-08 Thread Richard Biener via Gcc-cvs

https://gcc.gnu.org/g:77066fec7ae3b57806c5d8fed9429c7db9ee446b

commit r15-9942-g77066fec7ae3b57806c5d8fed9429c7db9ee446b
Author: Tamar Christina 
Date:   Mon Jul 7 17:05:01 2025 +0100

testsuite: add sve hw check to testcase [PR120817]

Drop down from SVE2 to SVE1 as that's the minimum
required for the test, and since it's a mid-end test
add the aarch64_sve_hw check.

gcc/testsuite/ChangeLog:

PR tree-optimization/120817
* gcc.dg/vect/pr120817.c: Add SVE HW check.

(cherry picked from commit 4b9f760c511a4ef3a390dd6cfab80bada57c2535)

Diff:
---
 gcc/testsuite/gcc.dg/vect/pr120817.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.dg/vect/pr120817.c 
b/gcc/testsuite/gcc.dg/vect/pr120817.c
index d8f55c9b98d2..199189a8b9ad 100644
--- a/gcc/testsuite/gcc.dg/vect/pr120817.c
+++ b/gcc/testsuite/gcc.dg/vect/pr120817.c
@@ -1,5 +1,6 @@
 /* { dg-additional-options "-O1" } */
-/* { dg-additional-options "-mcpu=neoverse-n2" { target aarch64*-*-* } } */
+/* { dg-require-effective-target aarch64_sve_hw { target aarch64*-*-* } } */
+/* { dg-additional-options "-march=armv8-a+sve -mtune=neoverse-n2" { target 
aarch64*-*-* } } */
 
 #include "tree-vect.h"

[gcc r15-9937] libstdc++: Fix typo in __size_to_integer(__GLIBCXX_TYPE_INT_N_3)

2025-07-08 Thread Jonathan Wakely via Gcc-cvs

https://gcc.gnu.org/g:dff46525b78b8c9433ad63e5d7396b2ecca450f9

commit r15-9937-gdff46525b78b8c9433ad63e5d7396b2ecca450f9
Author: Jonathan Wakely 
Date:   Fri Jul 4 21:33:05 2025 +0100

libstdc++: Fix typo in __size_to_integer(__GLIBCXX_TYPE_INT_N_3)

The overload taking a signed type was returning unsigned and the
overload taking an unsigned type was returning signed.

libstdc++-v3/ChangeLog:

* include/bits/stl_algobase.h (__size_to_integer): Move
misplaced unsigned keyword on __size_to_integer overloads for
__GLIBCXX_TYPE_INT_N_3 integer type.

(cherry picked from commit 106591f79a3beaed226400fbdc568e95229dc936)

Diff:
---
 libstdc++-v3/include/bits/stl_algobase.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libstdc++-v3/include/bits/stl_algobase.h 
b/libstdc++-v3/include/bits/stl_algobase.h
index 119dbe9a0936..4d5662ca45bf 100644
--- a/libstdc++-v3/include/bits/stl_algobase.h
+++ b/libstdc++-v3/include/bits/stl_algobase.h
@@ -1046,9 +1046,9 @@ _GLIBCXX_END_NAMESPACE_CONTAINER
   __size_to_integer(unsigned __GLIBCXX_TYPE_INT_N_2 __n) { return __n; }
 #endif
 #if defined(__GLIBCXX_TYPE_INT_N_3)
-  __extension__ inline _GLIBCXX_CONSTEXPR unsigned __GLIBCXX_TYPE_INT_N_3
-  __size_to_integer(__GLIBCXX_TYPE_INT_N_3 __n) { return __n; }
   __extension__ inline _GLIBCXX_CONSTEXPR __GLIBCXX_TYPE_INT_N_3
+  __size_to_integer(__GLIBCXX_TYPE_INT_N_3 __n) { return __n; }
+  __extension__ inline _GLIBCXX_CONSTEXPR unsigned __GLIBCXX_TYPE_INT_N_3
   __size_to_integer(unsigned __GLIBCXX_TYPE_INT_N_3 __n) { return __n; }
 #endif

[gcc(refs/users/mikael/heads/stabilisation_descriptor_v01)] Ajout directive warning

2025-07-08 Thread Mikael Morin via Gcc-cvs

https://gcc.gnu.org/g:e6eb899b5e7ce65a45db2fa7d90693f2509812f1

commit e6eb899b5e7ce65a45db2fa7d90693f2509812f1
Author: Mikael Morin 
Date:   Tue Jul 8 22:41:06 2025 +0200

Ajout directive warning

Diff:
---
 libgomp/testsuite/libgomp.oacc-fortran/privatized-ref-2.f90 | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libgomp/testsuite/libgomp.oacc-fortran/privatized-ref-2.f90 
b/libgomp/testsuite/libgomp.oacc-fortran/privatized-ref-2.f90
index 14e8f99d391e..d95479599e49 100644
--- a/libgomp/testsuite/libgomp.oacc-fortran/privatized-ref-2.f90
+++ b/libgomp/testsuite/libgomp.oacc-fortran/privatized-ref-2.f90
@@ -25,7 +25,7 @@ program main
   character(len=:), allocatable :: my_str
   character(len=15), allocatable :: my_str15
 
-  A = [(3*j, j=1, 10)]
+  A = [(3*j, j=1, 10)] ! { dg-warning {may be used uninitialized} {variables 
used uninitialized in dead code} { target { ! __OPTIMIZE__ } } }
   call foo (A, size(A))
   call bar (A)
   my_str = "1234567890"

[gcc(refs/users/mikael/heads/stabilisation_descriptor_v01)] Ajout directive note

2025-07-08 Thread Mikael Morin via Gcc-cvs

https://gcc.gnu.org/g:5b9aa2101d8a98621a5b5e955f82d2b4e7079d00

commit 5b9aa2101d8a98621a5b5e955f82d2b4e7079d00
Author: Mikael Morin 
Date:   Tue Jul 8 22:48:09 2025 +0200

Ajout directive note

Diff:
---
 libgomp/testsuite/libgomp.oacc-fortran/privatized-ref-2.f90 | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libgomp/testsuite/libgomp.oacc-fortran/privatized-ref-2.f90 
b/libgomp/testsuite/libgomp.oacc-fortran/privatized-ref-2.f90
index d95479599e49..4ac7e06e2c5e 100644
--- a/libgomp/testsuite/libgomp.oacc-fortran/privatized-ref-2.f90
+++ b/libgomp/testsuite/libgomp.oacc-fortran/privatized-ref-2.f90
@@ -21,11 +21,11 @@
 program main
   implicit none (type, external)
   integer :: j
-  integer, allocatable :: A(:)
+  integer, allocatable :: A(:) ! { dg-note {declared here} {A's fields used 
initialized in dead code} { target { ! __OPTIMIZE__ } } }
   character(len=:), allocatable :: my_str
   character(len=15), allocatable :: my_str15
 
-  A = [(3*j, j=1, 10)] ! { dg-warning {may be used uninitialized} {variables 
used uninitialized in dead code} { target { ! __OPTIMIZE__ } } }
+  A = [(3*j, j=1, 10)] ! { dg-warning {may be used uninitialized} {A's fields 
used uninitialized in dead code} { target { ! __OPTIMIZE__ } } }
   call foo (A, size(A))
   call bar (A)
   my_str = "1234567890"

[gcc r16-2112] libstdc++: Ensure pool resources meet alignment requirements [PR118681]

2025-07-08 Thread Jonathan Wakely via Libstdc++-cvs

https://gcc.gnu.org/g:ac2fb60a67d6d1de6446c25c5623b8a1389f4770

commit r16-2112-gac2fb60a67d6d1de6446c25c5623b8a1389f4770
Author: Jonathan Wakely 
Date:   Fri Jul 4 16:44:13 2025 +0100

libstdc++: Ensure pool resources meet alignment requirements [PR118681]

For allocations with size > alignment and size % alignment != 0 we were
sometimes returning pointers that did not meet the requested aligment.
For example, allocate(24, 16) would select the pool for 24-byte objects
and the second allocation from that pool (at offset 24 bytes into the
pool) is only 8-byte aligned not 16-byte aligned.

The pool resources need to round up the requested allocation size to a
multiple of the alignment, so that the selected pool will always return
allocations that meet the alignment requirement.

libstdc++-v3/ChangeLog:

PR libstdc++/118681
* src/c++17/memory_resource.cc (choose_block_size): New
function.
(synchronized_pool_resource::do_allocate): Use choose_block_size
to determine appropriate block size.
(synchronized_pool_resource::do_deallocate): Likewise
(unsynchronized_pool_resource::do_allocate): Likewise.
(unsynchronized_pool_resource::do_deallocate): Likewise
* testsuite/20_util/synchronized_pool_resource/118681.cc: New
test.
* testsuite/20_util/unsynchronized_pool_resource/118681.cc: New
test.

Reviewed-by: Tomasz Kamiński 

Diff:
---
 libstdc++-v3/src/c++17/memory_resource.cc  | 26 --
 .../20_util/synchronized_pool_resource/118681.cc   |  5 ++
 .../20_util/unsynchronized_pool_resource/118681.cc | 58 ++
 3 files changed, 85 insertions(+), 4 deletions(-)

diff --git a/libstdc++-v3/src/c++17/memory_resource.cc 
b/libstdc++-v3/src/c++17/memory_resource.cc
index fac4c782c5f7..fddfe2c7dd98 100644
--- a/libstdc++-v3/src/c++17/memory_resource.cc
+++ b/libstdc++-v3/src/c++17/memory_resource.cc
@@ -1242,12 +1242,30 @@ namespace pmr
 return pools;
   }
 
+  static inline size_t
+  choose_block_size(size_t bytes, size_t alignment)
+  {
+if (bytes == 0) [[unlikely]]
+  return alignment;
+
+// Use bit_ceil in case alignment is invalid (i.e. not a power of two).
+size_t mask = std::__bit_ceil(alignment) - 1;
+// Round up to a multiple of alignment.
+size_t block_size = (bytes + mask) & ~mask;
+
+if (block_size >= bytes) [[likely]]
+  return block_size;
+
+// Wrapped around to zero, bytes must have been impossibly large.
+return numeric_limits::max();
+  }
+
   // Override for memory_resource::do_allocate
   void*
   synchronized_pool_resource::
   do_allocate(size_t bytes, size_t alignment)
   {
-const auto block_size = std::max(bytes, alignment);
+const auto block_size = choose_block_size(bytes, alignment);
 const pool_options opts = _M_impl._M_opts;
 if (block_size <= opts.largest_required_pool_block)
   {
@@ -1294,7 +1312,7 @@ namespace pmr
   synchronized_pool_resource::
   do_deallocate(void* p, size_t bytes, size_t alignment)
   {
-size_t block_size = std::max(bytes, alignment);
+size_t block_size = choose_block_size(bytes, alignment);
 if (block_size <= _M_impl._M_opts.largest_required_pool_block)
   {
const ptrdiff_t index = pool_index(block_size, _M_impl._M_npools);
@@ -1453,7 +1471,7 @@ namespace pmr
   void*
   unsynchronized_pool_resource::do_allocate(size_t bytes, size_t alignment)
   {
-const auto block_size = std::max(bytes, alignment);
+const auto block_size = choose_block_size(bytes, alignment);
 if (block_size <= _M_impl._M_opts.largest_required_pool_block)
   {
// Recreate pools if release() has been called:
@@ -1470,7 +1488,7 @@ namespace pmr
   unsynchronized_pool_resource::
   do_deallocate(void* p, size_t bytes, size_t alignment)
   {
-size_t block_size = std::max(bytes, alignment);
+size_t block_size = choose_block_size(bytes, alignment);
 if (block_size <= _M_impl._M_opts.largest_required_pool_block)
   {
if (auto pool = _M_find_pool(block_size))
diff --git 
a/libstdc++-v3/testsuite/20_util/synchronized_pool_resource/118681.cc 
b/libstdc++-v3/testsuite/20_util/synchronized_pool_resource/118681.cc
new file mode 100644
index ..6d7434ff9106
--- /dev/null
+++ b/libstdc++-v3/testsuite/20_util/synchronized_pool_resource/118681.cc
@@ -0,0 +1,5 @@
+// { dg-do run { target c++17 } }
+// Bug 118681 - unsynchronized_pool_resource may fail to respect alignment
+
+#define RESOURCE std::pmr::synchronized_pool_resource
+#include "../unsynchronized_pool_resource/118681.cc"
diff --git 
a/libstdc++-v3/testsuite/20_util/unsynchronized_pool_resource/118681.cc 
b/libstdc++-v3/testsuite/20_util/unsynchronized_pool_resource/118681.cc
new file mode 100644
index ..87e1b1d94043
--- /dev/null
+++ b/libstdc++-v3/testsuite/20_util/unsynch

[gcc r16-2115] libstdc++: Fix double free in new pool resource test [PR118681]

2025-07-08 Thread Jonathan Wakely via Libstdc++-cvs

https://gcc.gnu.org/g:eb412029f5cec52275d14956fe01473015a9ce0e

commit r16-2115-geb412029f5cec52275d14956fe01473015a9ce0e
Author: Jonathan Wakely 
Date:   Wed Jul 9 00:54:33 2025 +0100

libstdc++: Fix double free in new pool resource test [PR118681]

This was supposed to free p1 and p2, not free p2 twice.

libstdc++-v3/ChangeLog:

PR libstdc++/118681
* testsuite/20_util/unsynchronized_pool_resource/118681.cc: Fix
deallocate argument.

Diff:
---
 libstdc++-v3/testsuite/20_util/unsynchronized_pool_resource/118681.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git 
a/libstdc++-v3/testsuite/20_util/unsynchronized_pool_resource/118681.cc 
b/libstdc++-v3/testsuite/20_util/unsynchronized_pool_resource/118681.cc
index 87e1b1d94043..9935f793cf91 100644
--- a/libstdc++-v3/testsuite/20_util/unsynchronized_pool_resource/118681.cc
+++ b/libstdc++-v3/testsuite/20_util/unsynchronized_pool_resource/118681.cc
@@ -39,7 +39,7 @@ test_alignment(std::pmr::memory_resource& res, bool dealloc)
 
   if (dealloc)
   {
-   res.deallocate(p2, size, alignment);
+   res.deallocate(p1, size, alignment);
res.deallocate(p2, size, alignment);
   }
 }

[gcc r16-2086] Fortran: Ensure finalizers are created correctly [PR120637]

2025-07-08 Thread Andre Vehreschild via Gcc-cvs

https://gcc.gnu.org/g:d1f05661fa6c8a6ea6f59ad365a84469100e425e

commit r16-2086-gd1f05661fa6c8a6ea6f59ad365a84469100e425e
Author: Andre Vehreschild 
Date:   Wed Jun 25 14:46:16 2025 +0200

Fortran: Ensure finalizers are created correctly [PR120637]

Finalize_component freeed an expression that it used to remember which
components in which context it had finalized already.  While it makes
sense to free the copy of the expression, if it is unused, it causes
issues, when comparing to a non existent expression. This is now
detected by returning true, when the expression has been used.

PR fortran/120637

gcc/fortran/ChangeLog:

* class.cc (finalize_component): Return true, when a finalizable
component was detect and do not free it.

gcc/testsuite/ChangeLog:

* gfortran.dg/asan/finalize_1.f90: New test.

Diff:
---
 gcc/fortran/class.cc  | 24 ++
 gcc/testsuite/gfortran.dg/asan/finalize_1.f90 | 67 +++
 2 files changed, 81 insertions(+), 10 deletions(-)

diff --git a/gcc/fortran/class.cc b/gcc/fortran/class.cc
index df18601e45bd..a1c6fafa75ef 100644
--- a/gcc/fortran/class.cc
+++ b/gcc/fortran/class.cc
@@ -1034,7 +1034,7 @@ comp_is_finalizable (gfc_component *comp)
of calling the appropriate finalizers, coarray deregistering, and
deallocation of allocatable subcomponents.  */
 
-static void
+static bool
 finalize_component (gfc_expr *expr, gfc_symbol *derived, gfc_component *comp,
gfc_symbol *stat, gfc_symbol *fini_coarray, gfc_code **code,
gfc_namespace *sub_ns)
@@ -1044,14 +1044,14 @@ finalize_component (gfc_expr *expr, gfc_symbol 
*derived, gfc_component *comp,
   gfc_was_finalized *f;
 
   if (!comp_is_finalizable (comp))
-return;
+return false;
 
   /* If this expression with this component has been finalized
  already in this namespace, there is nothing to do.  */
   for (f = sub_ns->was_finalized; f; f = f->next)
 {
   if (f->e == expr && f->c == comp)
-   return;
+   return false;
 }
 
   e = gfc_copy_expr (expr);
@@ -1208,8 +1208,6 @@ finalize_component (gfc_expr *expr, gfc_symbol *derived, 
gfc_component *comp,
   final_wrap->ext.actual->next->next = gfc_get_actual_arglist ();
   final_wrap->ext.actual->next->next->expr = fini_coarray_expr;
 
-
-
   if (*code)
{
  (*code)->next = final_wrap;
@@ -1221,11 +1219,14 @@ finalize_component (gfc_expr *expr, gfc_symbol 
*derived, gfc_component *comp,
   else
 {
   gfc_component *c;
+  bool ret = false;
 
   for (c = comp->ts.u.derived->components; c; c = c->next)
-   finalize_component (e, comp->ts.u.derived, c, stat, fini_coarray, code,
-   sub_ns);
-  gfc_free_expr (e);
+   ret |= finalize_component (e, comp->ts.u.derived, c, stat, fini_coarray,
+  code, sub_ns);
+  /* Only free the expression, if it has never been used.  */
+  if (!ret)
+   gfc_free_expr (e);
 }
 
   /* Record that this was finalized already in this namespace.  */
@@ -1234,6 +1235,7 @@ finalize_component (gfc_expr *expr, gfc_symbol *derived, 
gfc_component *comp,
   sub_ns->was_finalized->e = expr;
   sub_ns->was_finalized->c = comp;
   sub_ns->was_finalized->next = f;
+  return true;
 }
 
 
@@ -2314,6 +2316,7 @@ finish_assumed_rank:
 {
   gfc_symbol *stat;
   gfc_code *block = NULL;
+  gfc_expr *ptr_expr;
 
   if (!ptr)
{
@@ -2359,14 +2362,15 @@ finish_assumed_rank:
 sub_ns);
   block = block->next;
 
+  ptr_expr = gfc_lval_expr_from_sym (ptr);
   for (comp = derived->components; comp; comp = comp->next)
{
  if (comp == derived->components && derived->attr.extension
  && ancestor_wrapper && ancestor_wrapper->expr_type != EXPR_NULL)
continue;
 
- finalize_component (gfc_lval_expr_from_sym (ptr), derived, comp,
- stat, fini_coarray, &block, sub_ns);
+ finalize_component (ptr_expr, derived, comp, stat, fini_coarray,
+ &block, sub_ns);
  if (!last_code->block->next)
last_code->block->next = block;
}
diff --git a/gcc/testsuite/gfortran.dg/asan/finalize_1.f90 
b/gcc/testsuite/gfortran.dg/asan/finalize_1.f90
new file mode 100644
index ..ab53a9ecf2be
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/asan/finalize_1.f90
@@ -0,0 +1,67 @@
+!{ dg-do run }
+
+! PR fortran/120637
+
+! Contributed by Antony Lewis  
+! The unused module is needed to trigger the issue of not freeing the
+! memory of second module.
+
+module MiscUtils
+implicit none
+
+contains
+
+logical function isFloat0(R)
+class(*), intent(in) :: R
+
+select type(R)
+type is (real)
+isFloat0 = .true.
+e

[gcc r16-2091] testsuite: i386: Fix gcc.target/i386/memcpy-pr120683-1.c etc. on Solaris/x86

2025-07-08 Thread Rainer Orth via Gcc-cvs

https://gcc.gnu.org/g:20407a41e840440ccb5d746a5ef6e72765de55f3

commit r16-2091-g20407a41e840440ccb5d746a5ef6e72765de55f3
Author: Rainer Orth 
Date:   Tue Jul 8 12:53:34 2025 +0200

testsuite: i386: Fix gcc.target/i386/memcpy-pr120683-1.c etc. on Solaris/x86

The new tests from

commit 401199377c50045ede560daf3f6e8b51749c2a87
Author: H.J. Lu 
Date:   Tue Jun 17 10:17:17 2025 +0800

x86: Improve vector_loop/unrolled_loop for memset/memcpy

FAIL on 64-bit Solaris/x86:

FAIL: gcc.target/i386/memcpy-pr120683-1.c check-function-bodies foo
FAIL: gcc.target/i386/memcpy-pr120683-2.c check-function-bodies foo
FAIL: gcc.target/i386/memcpy-pr120683-3.c check-function-bodies foo
FAIL: gcc.target/i386/memcpy-pr120683-4.c check-function-bodies foo
FAIL: gcc.target/i386/memcpy-pr120683-5.c check-function-bodies foo
FAIL: gcc.target/i386/memcpy-pr120683-6.c check-function-bodies foo
FAIL: gcc.target/i386/memcpy-pr120683-7.c check-function-bodies foo
FAIL: gcc.target/i386/memcpy-strategy-12.c check-function-bodies foo
FAIL: gcc.target/i386/memset-pr120683-1.c check-function-bodies foo
FAIL: gcc.target/i386/memset-pr120683-10.c check-function-bodies foo
FAIL: gcc.target/i386/memset-pr120683-11.c check-function-bodies foo
FAIL: gcc.target/i386/memset-pr120683-12.c check-function-bodies foo
FAIL: gcc.target/i386/memset-pr120683-13.c check-function-bodies foo
FAIL: gcc.target/i386/memset-pr120683-14.c check-function-bodies foo
FAIL: gcc.target/i386/memset-pr120683-15.c check-function-bodies foo
FAIL: gcc.target/i386/memset-pr120683-16.c check-function-bodies foo
FAIL: gcc.target/i386/memset-pr120683-17.c check-function-bodies foo
FAIL: gcc.target/i386/memset-pr120683-18.c check-function-bodies foo
FAIL: gcc.target/i386/memset-pr120683-19.c check-function-bodies foo
FAIL: gcc.target/i386/memset-pr120683-2.c check-function-bodies foo
FAIL: gcc.target/i386/memset-pr120683-20.c check-function-bodies foo
FAIL: gcc.target/i386/memset-pr120683-21.c check-function-bodies foo
FAIL: gcc.target/i386/memset-pr120683-22.c check-function-bodies foo
FAIL: gcc.target/i386/memset-pr120683-23.c check-function-bodies foo
FAIL: gcc.target/i386/memset-pr120683-3.c check-function-bodies foo
FAIL: gcc.target/i386/memset-pr120683-4.c check-function-bodies foo
FAIL: gcc.target/i386/memset-pr120683-5.c check-function-bodies foo
FAIL: gcc.target/i386/memset-pr120683-6.c check-function-bodies foo
FAIL: gcc.target/i386/memset-pr120683-7.c check-function-bodies foo
FAIL: gcc.target/i386/memset-pr120683-8.c check-function-bodies foo
FAIL: gcc.target/i386/memset-pr120683-9.c check-function-bodies foo

Like several times before, they need to be compiled with
-fasynchronous-unwind-tables -fdwarf2-cfi-asm.

Tested on i386-pc-solaris2.11 and x86_64-pc-linux-gnu.

2025-07-08  Rainer Orth  

gcc/testsuite:
* gcc.target/i386/memcpy-pr120683-1.c (dg-options): Add
-fasynchronous-unwind-tables -fdwarf2-cfi-asm.
* gcc.target/i386/memcpy-pr120683-2.c: Likewise.
* gcc.target/i386/memcpy-pr120683-3.c: Likewise.
* gcc.target/i386/memcpy-pr120683-4.c: Likewise.
* gcc.target/i386/memcpy-pr120683-5.c: Likewise.
* gcc.target/i386/memcpy-pr120683-6.c: Likewise.
* gcc.target/i386/memcpy-pr120683-7.c: Likewise.
* gcc.target/i386/memcpy-strategy-12.c: Likewise.
* gcc.target/i386/memset-pr120683-1.c: Likewise.
* gcc.target/i386/memset-pr120683-10.c: Likewise.
* gcc.target/i386/memset-pr120683-11.c: Likewise.
* gcc.target/i386/memset-pr120683-12.c: Likewise.
* gcc.target/i386/memset-pr120683-13.c: Likewise.
* gcc.target/i386/memset-pr120683-14.c: Likewise.
* gcc.target/i386/memset-pr120683-15.c: Likewise.
* gcc.target/i386/memset-pr120683-16.c: Likewise.
* gcc.target/i386/memset-pr120683-17.c: Likewise.
* gcc.target/i386/memset-pr120683-18.c: Likewise.
* gcc.target/i386/memset-pr120683-19.c: Likewise.
* gcc.target/i386/memset-pr120683-2.c: Likewise.
* gcc.target/i386/memset-pr120683-20.c: Likewise.
* gcc.target/i386/memset-pr120683-21.c: Likewise.
* gcc.target/i386/memset-pr120683-22.c: Likewise.
* gcc.target/i386/memset-pr120683-23.c: Likewise.
* gcc.target/i386/memset-pr120683-3.c: Likewise.
* gcc.target/i386/memset-pr120683-4.c: Likewise.
* gcc.target/i386/memset-pr120683-5.c: Likewise.
* gcc.target/i386/memset-pr120683-6.c: Likewise.
* gcc.target/i386/memset-pr120683-7.c: Likewise.
* gcc.target/i386/memset-pr120683-8.c: Likewise.
* gcc.target/i386/memset-pr120683-9.c: Likewise.

Diff:
-

[gcc(refs/users/mikael/heads/stabilisation_descriptor_v01)] Sauvegarde/restoration cfun

2025-07-08 Thread Mikael Morin via Gcc-cvs

https://gcc.gnu.org/g:a7c50573681b8cdbb0bee9df90074ca2081b7a91

commit a7c50573681b8cdbb0bee9df90074ca2081b7a91
Author: Mikael Morin 
Date:   Tue Jul 8 13:13:25 2025 +0200

Sauvegarde/restoration cfun

Diff:
---
 gcc/gimple-simulate.cc | 8 
 1 file changed, 8 insertions(+)

diff --git a/gcc/gimple-simulate.cc b/gcc/gimple-simulate.cc
index a85e6f63cc92..09491076e95d 100644
--- a/gcc/gimple-simulate.cc
+++ b/gcc/gimple-simulate.cc
@@ -4720,7 +4720,9 @@ simul_scope_evaluate_tests ()
   DECL_CONTEXT (result) = func;
   DECL_RESULT (func) = result;
 
+  push_cfun (nullptr);
   init_lowered_empty_function (func, true, profile_count::one ());
+  pop_cfun ();
 
   tree def_var = create_var (integer_type_node, "def_var");
   DECL_CONTEXT (def_var) = func;
@@ -6482,8 +6484,10 @@ simul_scope_simulate_call_tests ()
   DECL_CONTEXT (result) = my_int_func;
   DECL_RESULT (my_int_func) = result;
 
+  push_cfun (nullptr);
   basic_block bb = init_lowered_empty_function (my_int_func, true,
profile_count::one ());
+  pop_cfun ();
   gimple_stmt_iterator gsi = gsi_last_bb (bb);
   greturn *ret_stmt = gimple_build_return (cst6);
   gsi_insert_after (&gsi, ret_stmt, GSI_CONTINUE_LINKING);
@@ -6534,8 +6538,10 @@ simul_scope_simulate_call_tests ()
   DECL_ARGUMENTS (int_func_with_arg) = arg;
   layout_decl (arg, 0);
 
+  push_cfun (nullptr);
   basic_block bb2 = init_lowered_empty_function (int_func_with_arg, true,
 profile_count::one ());
+  pop_cfun ();
   gimple_stmt_iterator gsi2 = gsi_last_bb (bb2);
   greturn *ret_stmt2 = gimple_build_return (arg);
   gsi_insert_after (&gsi2, ret_stmt2, GSI_CONTINUE_LINKING);
@@ -6618,7 +6624,9 @@ simul_scope_simulate_call_tests ()
   DECL_CONTEXT (void_result) = simple_func;
   DECL_RESULT (simple_func) = void_result;
 
+  push_cfun (nullptr);
   init_lowered_empty_function (simple_func, true, profile_count::one ());
+  pop_cfun ();
 
   gcall * simple_call = gimple_build_call (simple_func, 0);

[gcc r16-2090] s390: Split tests for 31bit support

2025-07-08 Thread Juergen Christ via Gcc-cvs

https://gcc.gnu.org/g:32d41517c7276399e57b2b3f29e9790ae32d4883

commit r16-2090-g32d41517c7276399e57b2b3f29e9790ae32d4883
Author: Juergen Christ 
Date:   Tue Jul 8 11:26:38 2025 +0200

s390: Split tests for 31bit support

The new vector pattern tests used int128 without guard.  This causes
failure on 31bit targets.  Split the tests such that the tests
requiring 128 bit support are only executed on targets supporting
them.

Signed-off-by: Juergen Christ 

gcc/testsuite/ChangeLog:

* gcc.target/s390/vector/pattern-avg-1.c: Split test.
* gcc.target/s390/vector/pattern-mulh-1.c: Split test.
* gcc.target/s390/vector/pattern-avg-2.c: New test.
* gcc.target/s390/vector/pattern-mulh-2.c: New test.

Diff:
---
 .../gcc.target/s390/vector/pattern-avg-1.c |  3 +--
 .../gcc.target/s390/vector/pattern-avg-2.c | 23 +++
 .../gcc.target/s390/vector/pattern-mulh-1.c|  3 +--
 .../gcc.target/s390/vector/pattern-mulh-2.c| 26 ++
 4 files changed, 51 insertions(+), 4 deletions(-)

diff --git a/gcc/testsuite/gcc.target/s390/vector/pattern-avg-1.c 
b/gcc/testsuite/gcc.target/s390/vector/pattern-avg-1.c
index a15301aabe54..30c6ed476846 100644
--- a/gcc/testsuite/gcc.target/s390/vector/pattern-avg-1.c
+++ b/gcc/testsuite/gcc.target/s390/vector/pattern-avg-1.c
@@ -21,6 +21,5 @@
 TEST(char,short,16)
 TEST(short,int,8)
 TEST(int,long,4)
-TEST(long,__int128,2)
 
-/* { dg-final { scan-tree-dump-times "\.AVG_CEIL" 8 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "\.AVG_CEIL" 6 "optimized" } } */
diff --git a/gcc/testsuite/gcc.target/s390/vector/pattern-avg-2.c 
b/gcc/testsuite/gcc.target/s390/vector/pattern-avg-2.c
new file mode 100644
index ..1cc614eb1dea
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/vector/pattern-avg-2.c
@@ -0,0 +1,23 @@
+/* { dg-do compile { target int128 } } */
+/* { dg-options "-O3 -mzarch -march=z16 -ftree-vectorize 
-fdump-tree-optimized" } */
+
+#define TEST(T1,T2,N)   \
+  void  \
+  avg##T1 (signed T1 *__restrict res, signed T1 *__restrict a,  \
+   signed T1 *__restrict b) \
+  { \
+for (int i = 0; i < N; ++i) \
+  res[i] = ((signed T2)a[i] + b[i] + 1) >> 1;   \
+  } \
+\
+  void  \
+  uavg##T1 (unsigned T1 *__restrict res, unsigned T1 *__restrict a, \
+unsigned T1 *__restrict b)  \
+  { \
+for (int i = 0; i < N; ++i) \
+  res[i] = ((unsigned T2)a[i] + b[i] + 1) >> 1; \
+  }
+
+TEST(long,__int128,2)
+
+/* { dg-final { scan-tree-dump-times "\.AVG_CEIL" 2 "optimized" } } */
diff --git a/gcc/testsuite/gcc.target/s390/vector/pattern-mulh-1.c 
b/gcc/testsuite/gcc.target/s390/vector/pattern-mulh-1.c
index cd8e4e7d7a09..f71ef06c8252 100644
--- a/gcc/testsuite/gcc.target/s390/vector/pattern-mulh-1.c
+++ b/gcc/testsuite/gcc.target/s390/vector/pattern-mulh-1.c
@@ -24,6 +24,5 @@
 TEST(char,short,16,8)
 TEST(short,int,8,16)
 TEST(int,long,4,32)
-TEST(long,__int128,2,64)
 
-/* { dg-final { scan-tree-dump-times "\.MULH" 8 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "\.MULH" 6 "optimized" } } */
diff --git a/gcc/testsuite/gcc.target/s390/vector/pattern-mulh-2.c 
b/gcc/testsuite/gcc.target/s390/vector/pattern-mulh-2.c
new file mode 100644
index ..6ac6855b1bdf
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/vector/pattern-mulh-2.c
@@ -0,0 +1,26 @@
+/* { dg-do compile { target int128 } } */
+/* { dg-options "-O3 -mzarch -march=arch15 -ftree-vectorize 
-fdump-tree-optimized" } */
+
+#define TEST(T1,T2,N,S) \
+  void  \
+  mulh##T1 (signed T1 *__restrict res,  \
+signed T1 *__restrict l,\
+signed T1 *__restrict r)\
+  { \
+for (int i = 0; i < N; ++i) \
+  res[i] = (signed T1) (((signed T2)l[i] * (signed T2)r[i]) >> S);  \
+  } \
+\
+  void

[gcc] Created branch 'mikael/heads/base_patch' in namespace 'refs/users'

2025-07-08 Thread Mikael Morin via Gcc-cvs

The branch 'mikael/heads/base_patch' was created in namespace 'refs/users' 
pointing to:

 8f05d8056216... Correction array_constructor_1

[gcc] Created branch 'mikael/heads/gimple_simulate_v01' in namespace 'refs/users'

2025-07-08 Thread Mikael Morin via Gcc-cvs

The branch 'mikael/heads/gimple_simulate_v01' was created in namespace 
'refs/users' pointing to:

 8b7bf0d4fa6a... Prise en charge affichage TARGET_MEM_REF

[gcc] Deleted branch 'mikael/heads/stabilisation_descriptor_v01' in namespace 'refs/users'

2025-07-08 Thread Mikael Morin via Gcc-cvs

The branch 'mikael/heads/stabilisation_descriptor_v01' in namespace 
'refs/users' was deleted.
It previously pointed to:

 2865e8dcb340... Essai simplification évaluation

Diff:

!!! WARNING: THE FOLLOWING COMMITS ARE NO LONGER ACCESSIBLE (LOST):
---

  2865e8d... Essai simplification évaluation
  f09bd1d... Ajout commentaire
  1353289... Déplacement variables après réallocation
  2ef2bbc... fortran: generate array reallocation out of loops

[gcc] Created branch 'mikael/heads/stabilisation_descriptor_v01' in namespace 'refs/users'

2025-07-08 Thread Mikael Morin via Gcc-cvs

The branch 'mikael/heads/stabilisation_descriptor_v01' was created in namespace 
'refs/users' pointing to:

 d53ac098d644... Suppression mise à jour delta.

[gcc(refs/users/mikael/heads/stabilisation_descriptor_v01)] Déplacement variables après réallocation

2025-07-08 Thread Mikael Morin via Gcc-cvs

https://gcc.gnu.org/g:2647f123f1c984b6c780aaf5ae16129620af3e54

commit 2647f123f1c984b6c780aaf5ae16129620af3e54
Author: Mikael Morin 
Date:   Mon Jul 7 11:46:08 2025 +0200

Déplacement variables après réallocation

Sauvegarde data

Renommage nom fonction.

Diff:
---
 gcc/fortran/gfortran.h |   4 --
 gcc/fortran/trans-array.cc | 167 ++---
 gcc/fortran/trans-expr.cc  |  14 ++--
 3 files changed, 102 insertions(+), 83 deletions(-)

diff --git a/gcc/fortran/gfortran.h b/gcc/fortran/gfortran.h
index 6848bd1762d3..69367e638c5b 100644
--- a/gcc/fortran/gfortran.h
+++ b/gcc/fortran/gfortran.h
@@ -2028,10 +2028,6 @@ typedef struct gfc_symbol
   /* Set if this should be passed by value, but is not a VALUE argument
  according to the Fortran standard.  */
   unsigned pass_as_value:1;
-  /* Set if an allocatable array variable has been allocated in the current
- scope. Used in the suppression of uninitialized warnings in reallocation
- on assignment.  */
-  unsigned allocated_in_scope:1;
   /* Set if an external dummy argument is called with different argument lists.
  This is legal in Fortran, but can cause problems with autogenerated
  C prototypes for C23.  */
diff --git a/gcc/fortran/trans-array.cc b/gcc/fortran/trans-array.cc
index 7be2d7b11a62..3cd6d90f47e7 100644
--- a/gcc/fortran/trans-array.cc
+++ b/gcc/fortran/trans-array.cc
@@ -3420,6 +3420,23 @@ gfc_add_loop_ss_code (gfc_loopinfo * loop, gfc_ss * ss, 
bool subscript,
 }
 
 
+/* Given an array descriptor expression DESCR and its data pointer DATA, decide
+   whether to either save the data pointer to a variable and use the variable 
or
+   use the data pointer expression directly without any intermediary variable.
+   */
+
+static bool
+save_descriptor_data (tree descr, tree data)
+{
+  return !(DECL_P (data)
+  || (TREE_CODE (data) == ADDR_EXPR
+  && DECL_P (TREE_OPERAND (data, 0)))
+  || (GFC_DESCRIPTOR_TYPE_P (TREE_TYPE (descr))
+  && TREE_CODE (descr) == COMPONENT_REF
+  && GFC_CLASS_TYPE_P (TREE_TYPE (TREE_OPERAND (descr, 0);
+}
+
+
 /* Translate expressions for the descriptor and data pointer of a SS.  */
 /*GCC ARRAYS*/
 
@@ -3466,17 +3483,14 @@ gfc_conv_ss_descriptor (stmtblock_t * block, gfc_ss * 
ss, int base)
  Otherwise we must evaluate it now to avoid breaking dependency
 analysis by pulling the expressions for elemental array indices
 inside the loop.  */
-  if (!(DECL_P (tmp)
-   || (TREE_CODE (tmp) == ADDR_EXPR
-   && DECL_P (TREE_OPERAND (tmp, 0)))
-   || (GFC_DESCRIPTOR_TYPE_P (TREE_TYPE (se.expr))
-   && TREE_CODE (se.expr) == COMPONENT_REF
-   && GFC_CLASS_TYPE_P (TREE_TYPE (TREE_OPERAND (se.expr, 0))
+  if (save_descriptor_data (se.expr, tmp) && !ss->is_alloc_lhs)
tmp = gfc_evaluate_now (tmp, block);
   info->data = tmp;
 
   tmp = gfc_conv_array_offset (se.expr);
-  info->offset = gfc_evaluate_now (tmp, block);
+  if (!ss->is_alloc_lhs)
+   tmp = gfc_evaluate_now (tmp, block);
+  info->offset = tmp;
 
   /* Make absolutely sure that the saved_offset is indeed saved
 so that the variable is still accessible after the loops
@@ -4769,13 +4783,12 @@ gfc_trans_scalarized_loop_boundary (gfc_loopinfo * 
loop, stmtblock_t * body)
 
 static void
 evaluate_bound (stmtblock_t *block, tree *bounds, gfc_expr ** values,
-   tree desc, int dim, bool lbound, bool deferred)
+   tree desc, int dim, bool lbound, bool deferred, bool save_value)
 {
   gfc_se se;
   gfc_expr * input_val = values[dim];
   tree *output = &bounds[dim];
 
-
   if (input_val)
 {
   /* Specified section bound.  */
@@ -4801,7 +4814,8 @@ evaluate_bound (stmtblock_t *block, tree *bounds, 
gfc_expr ** values,
   *output = lbound ? gfc_conv_array_lbound (desc, dim) :
 gfc_conv_array_ubound (desc, dim);
 }
-  *output = gfc_evaluate_now (*output, block);
+  if (save_value)
+*output = gfc_evaluate_now (*output, block);
 }
 
 
@@ -4834,18 +4848,18 @@ gfc_conv_section_startstride (stmtblock_t * block, 
gfc_ss * ss, int dim)
  || ar->dimen_type[dim] == DIMEN_THIS_IMAGE);
   desc = info->descriptor;
   stride = ar->stride[dim];
-
+  bool save_value = !ss->is_alloc_lhs;
 
   /* Calculate the start of the range.  For vector subscripts this will
  be the range of the vector.  */
   evaluate_bound (block, info->start, ar->start, desc, dim, true,
- ar->as->type == AS_DEFERRED);
+ ar->as->type == AS_DEFERRED, save_value);
 
   /* Similarly calculate the end.  Although this is not used in the
  scalarizer, it is needed when checking bounds and where the end
  is an expression with side-effects.  */
   evaluate_bound (block, info->end, ar->end, desc, dim, false,
- ar->as->type == AS_DEFERRED);
+

[gcc(refs/users/mikael/heads/stabilisation_descriptor_v01)] fortran: generate array reallocation out of loops

2025-07-08 Thread Mikael Morin via Gcc-cvs

https://gcc.gnu.org/g:c7c760c314a9674b2f0a3665c8a0d1e0db99694f

commit c7c760c314a9674b2f0a3665c8a0d1e0db99694f
Author: Mikael Morin 
Date:   Sun Jul 6 16:56:16 2025 +0200

fortran: generate array reallocation out of loops

Generate the array reallocation on assignment code before entering the
scalarization loops.  This doesn't move the generated code itself,
which was already put out of the outermost loop, but only changes the
current scope at the time the code is generated.  This is a prerequisite
for a followup patch that makes the reallocation code create new
variables.  Without this change the new variables would be declared in
the innermost loop body and couldn't be used outside of it.

gcc/fortran/ChangeLog:

* trans-expr.cc (gfc_trans_assignment_1): Generate array
reallocation code before entering the scalarisation loops.

Diff:
---
 gcc/fortran/trans-expr.cc | 20 +++-
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/gcc/fortran/trans-expr.cc b/gcc/fortran/trans-expr.cc
index 3e0d763d2fb0..65d0ee4ff235 100644
--- a/gcc/fortran/trans-expr.cc
+++ b/gcc/fortran/trans-expr.cc
@@ -12943,6 +12943,7 @@ gfc_trans_assignment_1 (gfc_expr * expr1, gfc_expr * 
expr2, bool init_flag,
   rhs_caf_attr = gfc_caf_attr (expr2, false, &rhs_refs_comp);
 }
 
+  tree reallocation = NULL_TREE;
   if (lss != gfc_ss_terminator)
 {
   /* The assignment needs scalarization.  */
@@ -13011,6 +13012,14 @@ gfc_trans_assignment_1 (gfc_expr * expr1, gfc_expr * 
expr2, bool init_flag,
  ompws_flags |= OMPWS_SCALARIZER_WS | OMPWS_SCALARIZER_BODY;
}
 
+  /* F2003: Allocate or reallocate lhs of allocatable array.  */
+  if (realloc_flag)
+   {
+ realloc_lhs_warning (expr1->ts.type, true, &expr1->where);
+ ompws_flags &= ~OMPWS_SCALARIZER_WS;
+ reallocation = gfc_alloc_allocatable_for_assignment (&loop, expr1, 
expr2);
+   }
+
   /* Start the scalarized loop body.  */
   gfc_start_scalarized_body (&loop, &body);
 }
@@ -13319,15 +13328,8 @@ gfc_trans_assignment_1 (gfc_expr * expr1, gfc_expr * 
expr2, bool init_flag,
  gfc_add_expr_to_block (&body, tmp);
}
 
-  /* F2003: Allocate or reallocate lhs of allocatable array.  */
-  if (realloc_flag)
-   {
- realloc_lhs_warning (expr1->ts.type, true, &expr1->where);
- ompws_flags &= ~OMPWS_SCALARIZER_WS;
- tmp = gfc_alloc_allocatable_for_assignment (&loop, expr1, expr2);
- if (tmp != NULL_TREE)
-   gfc_add_expr_to_block (&loop.code[expr1->rank - 1], tmp);
-   }
+  if (reallocation != NULL_TREE)
+   gfc_add_expr_to_block (&loop.code[loop.dimen - 1], reallocation);
 
   if (maybe_workshare)
ompws_flags &= ~OMPWS_SCALARIZER_BODY;

[gcc(refs/users/mikael/heads/stabilisation_descriptor_v01)] Simplification mise à jour descripteur

2025-07-08 Thread Mikael Morin via Gcc-cvs

https://gcc.gnu.org/g:a0951d4448036d35343b534b58c85f96f76ce7e6

commit a0951d4448036d35343b534b58c85f96f76ce7e6
Author: Mikael Morin 
Date:   Mon Jul 7 19:00:09 2025 +0200

Simplification mise à jour descripteur

Diff:
---
 gcc/fortran/trans-array.cc | 28 +---
 1 file changed, 9 insertions(+), 19 deletions(-)

diff --git a/gcc/fortran/trans-array.cc b/gcc/fortran/trans-array.cc
index 832db1c3df4e..2d3ae44a8186 100644
--- a/gcc/fortran/trans-array.cc
+++ b/gcc/fortran/trans-array.cc
@@ -11375,36 +11375,26 @@ update_reallocated_descriptor (stmtblock_t *block, 
gfc_loopinfo *loop)
 
   gcc_assert (s->info->type == GFC_SS_SECTION);
   gfc_array_info *info = &s->info->data.array;
-  tree desc = info->descriptor;
 
-#define UPDATE_VALUE(field, value) \
+#define UPDATE_VALUE(value) \
  do \
{ \
- if (false && (field) && VAR_P ((field))) \
-   { \
- tree val = (value); \
- gfc_add_modify (block, (field), val); \
-   } \
- else \
-   (field) = gfc_evaluate_now ((field), block); \
+ value = gfc_evaluate_now (value, block); \
} \
  while (0)
 
-  if (save_descriptor_data (desc, info->data))
-   UPDATE_VALUE (info->data, gfc_conv_descriptor_data_get (desc)); 
-  UPDATE_VALUE (info->offset, gfc_conv_descriptor_offset_get (desc));
+  if (save_descriptor_data (info->descriptor, info->data))
+   UPDATE_VALUE (info->data);
+  UPDATE_VALUE (info->offset);
   info->saved_offset = info->offset;
   for (int i = 0; i < s->dimen; i++)
{
  int dim = s->dim[i];
  tree tree_dim = gfc_rank_cst[dim]; 
- UPDATE_VALUE (info->start[dim],
-   gfc_conv_descriptor_lbound_get (desc, tree_dim));
- UPDATE_VALUE (info->end[dim],
-   gfc_conv_descriptor_ubound_get (desc, tree_dim));
- UPDATE_VALUE (info->stride[dim],
-   gfc_conv_descriptor_stride_get (desc, tree_dim));
- info->delta[dim] = gfc_evaluate_now (info->delta[dim], block);
+ UPDATE_VALUE (info->start[dim]);
+ UPDATE_VALUE (info->end[dim]);
+ UPDATE_VALUE (info->stride[dim]);
+ UPDATE_VALUE (info->delta[dim]);
}
 
 #undef UPDATE_VALUE

[gcc(refs/users/mikael/heads/stabilisation_descriptor_v01)] Essai simplification évaluation

2025-07-08 Thread Mikael Morin via Gcc-cvs

https://gcc.gnu.org/g:5531dc0eabad3686bdc2c0513907a8b8a083953b

commit 5531dc0eabad3686bdc2c0513907a8b8a083953b
Author: Mikael Morin 
Date:   Mon Jul 7 14:50:14 2025 +0200

Essai simplification évaluation

Diff:
---
 gcc/fortran/trans-array.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/fortran/trans-array.cc b/gcc/fortran/trans-array.cc
index abf535cdaedb..832db1c3df4e 100644
--- a/gcc/fortran/trans-array.cc
+++ b/gcc/fortran/trans-array.cc
@@ -11380,7 +11380,7 @@ update_reallocated_descriptor (stmtblock_t *block, 
gfc_loopinfo *loop)
 #define UPDATE_VALUE(field, value) \
  do \
{ \
- if ((field) && VAR_P ((field))) \
+ if (false && (field) && VAR_P ((field))) \
{ \
  tree val = (value); \
  gfc_add_modify (block, (field), val); \

[gcc(refs/users/mikael/heads/stabilisation_descriptor_v01)] Correction array_constructor_1

2025-07-08 Thread Mikael Morin via Gcc-cvs

https://gcc.gnu.org/g:1f61e0b0fc176a4ccd40808feb8338c8f18cf558

commit 1f61e0b0fc176a4ccd40808feb8338c8f18cf558
Author: Mikael Morin 
Date:   Sat Jul 5 15:05:20 2025 +0200

Correction array_constructor_1

Diff:
---
 gcc/testsuite/gfortran.dg/asan/array_constructor_1.f90 | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/gcc/testsuite/gfortran.dg/asan/array_constructor_1.f90 
b/gcc/testsuite/gfortran.dg/asan/array_constructor_1.f90
index 45eafacd5a67..a0c55076a9ae 100644
--- a/gcc/testsuite/gfortran.dg/asan/array_constructor_1.f90
+++ b/gcc/testsuite/gfortran.dg/asan/array_constructor_1.f90
@@ -9,6 +9,8 @@ program grow_type_array
 
 type(container), allocatable :: list(:)
 
+allocate(list(0))
+
 list = [list, new_elem(5)]
 
 deallocate(list)

[gcc] Deleted branch 'mikael/heads/gimple_simulate_v01' in namespace 'refs/users'

2025-07-08 Thread Mikael Morin via Gcc-cvs

The branch 'mikael/heads/gimple_simulate_v01' in namespace 'refs/users' was 
deleted.
It previously pointed to:

 8e5da2bd03ad... gimple-simulate: Add a gimple IR interpreter/simulator

[gcc(refs/users/mikael/heads/stabilisation_descriptor_v01)] Suppression mise à jour delta.

2025-07-08 Thread Mikael Morin via Gcc-cvs

https://gcc.gnu.org/g:d53ac098d6447299535c0748f0f69dd23bf39c2e

commit d53ac098d6447299535c0748f0f69dd23bf39c2e
Author: Mikael Morin 
Date:   Mon Jul 7 19:01:09 2025 +0200

Suppression mise à jour delta.

Diff:
---
 gcc/fortran/trans-array.cc | 11 ---
 1 file changed, 11 deletions(-)

diff --git a/gcc/fortran/trans-array.cc b/gcc/fortran/trans-array.cc
index 2d3ae44a8186..fcc9daa893d1 100644
--- a/gcc/fortran/trans-array.cc
+++ b/gcc/fortran/trans-array.cc
@@ -11394,7 +11394,6 @@ update_reallocated_descriptor (stmtblock_t *block, 
gfc_loopinfo *loop)
  UPDATE_VALUE (info->start[dim]);
  UPDATE_VALUE (info->end[dim]);
  UPDATE_VALUE (info->stride[dim]);
- UPDATE_VALUE (info->delta[dim]);
}
 
 #undef UPDATE_VALUE
@@ -11768,16 +11767,6 @@ gfc_alloc_allocatable_for_assignment (gfc_loopinfo 
*loop,
   tmp = gfc_conv_descriptor_offset (desc);
   gfc_add_modify (&fblock, tmp, offset);
 
-  /* Now set the deltas for the lhs.  */
-  for (n = 0; n < expr1->rank; n++)
-{
-  tmp = gfc_conv_descriptor_lbound_get (desc, gfc_rank_cst[n]);
-  dim = lss->dim[n];
-  tmp = fold_build2_loc (input_location, MINUS_EXPR,
-gfc_array_index_type, tmp,
-loop->from[dim]);
-}
-
   /* Take into account _len of unlimited polymorphic entities, so that span
  for array descriptors and allocation sizes are computed correctly.  */
   if (UNLIMITED_POLY (expr2))

[gcc(refs/users/mikael/heads/stabilisation_descriptor_v01)] Ajout commentaire

2025-07-08 Thread Mikael Morin via Gcc-cvs

https://gcc.gnu.org/g:5e187ee28b94066c3f7a3ef573d2d0c55d0e1f97

commit 5e187ee28b94066c3f7a3ef573d2d0c55d0e1f97
Author: Mikael Morin 
Date:   Mon Jul 7 14:38:51 2025 +0200

Ajout commentaire

Diff:
---
 gcc/fortran/trans-array.cc | 13 +
 1 file changed, 13 insertions(+)

diff --git a/gcc/fortran/trans-array.cc b/gcc/fortran/trans-array.cc
index 3cd6d90f47e7..abf535cdaedb 100644
--- a/gcc/fortran/trans-array.cc
+++ b/gcc/fortran/trans-array.cc
@@ -11352,6 +11352,19 @@ concat_str_length (gfc_expr* expr)
 }
 
 
+/* Among the scalarization chain of LOOP, find the element associated with an
+   allocatable array on the lhs of an assignment and evaluate its fields
+   (bounds, offset, etc) to new variables, putting the new code in BLOCK.  This
+   function is to be called after putting the reallocation code in BLOCK and
+   before the beginning of the scalarization loop body.
+
+   The fields to be saved are expected to hold on entry to the function
+   expressions referencing the array descriptor.  Especially the expressions
+   shouldn't be already temporary variable references as the value saved before
+   reallocation would be incorrect after reallocation.
+   At the end of the function, the expressions have been replaced with variable
+   references.  */
+
 static void
 update_reallocated_descriptor (stmtblock_t *block, gfc_loopinfo *loop)
 {

[gcc(refs/users/mikael/heads/stabilisation_descriptor_v01)] Prise en charge affichage TARGET_MEM_REF

2025-07-08 Thread Mikael Morin via Gcc-cvs

https://gcc.gnu.org/g:4d9ba90bdfabf26d022c80d07f1e8c221e38ceec

commit 4d9ba90bdfabf26d022c80d07f1e8c221e38ceec
Author: Mikael Morin 
Date:   Mon Jul 7 08:52:38 2025 +0200

Prise en charge affichage TARGET_MEM_REF

Diff:
---
 gcc/gimple-simulate.cc| 87 ---
 gcc/selftest-run-tests.cc |  2 ++
 gcc/selftest.h|  1 +
 3 files changed, 86 insertions(+), 4 deletions(-)

diff --git a/gcc/gimple-simulate.cc b/gcc/gimple-simulate.cc
index aa29b68b748c..a85e6f63cc92 100644
--- a/gcc/gimple-simulate.cc
+++ b/gcc/gimple-simulate.cc
@@ -903,6 +903,9 @@ static tree
 find_mem_ref_replacement (simul_scope & context, tree data_ref,
  unsigned offset, unsigned min_size)
 {
+  gcc_assert (TREE_CODE (data_ref) == MEM_REF
+ || TREE_CODE (data_ref) == TARGET_MEM_REF);
+
   tree ptr = TREE_OPERAND (data_ref, 0);
   data_value ptr_val = context.evaluate (ptr);
   if (ptr_val.classify () != VAL_ADDRESS)
@@ -923,12 +926,30 @@ find_mem_ref_replacement (simul_scope & context, tree 
data_ref,
 {
   tree access_offset = TREE_OPERAND (data_ref, 1);
   gcc_assert (TREE_CONSTANT (access_offset));
-  gcc_assert (tree_fits_shwi_p (access_offset));
-  HOST_WIDE_INT shwi_offset = tree_to_shwi (access_offset);
-  gcc_assert (offset < UINT_MAX - shwi_offset);
-  HOST_WIDE_INT remaining_offset = shwi_offset * CHAR_BIT
+  gcc_assert (tree_fits_uhwi_p (access_offset));
+  HOST_WIDE_INT uhwi_offset = tree_to_uhwi (access_offset);
+  gcc_assert (offset < UINT_MAX - uhwi_offset);
+  HOST_WIDE_INT remaining_offset = uhwi_offset * CHAR_BIT
   + offset + ptr_address->offset;
 
+  if (TREE_CODE (data_ref) == TARGET_MEM_REF)
+   {
+ tree idx = TREE_OPERAND (data_ref, 2);
+ data_value idx_val = context.evaluate (idx);
+ gcc_assert (idx_val.classify () == VAL_KNOWN);
+ wide_int wi_idx = idx_val.get_known ();
+
+ tree step = TREE_OPERAND (data_ref, 3);
+ data_value step_val = context.evaluate (step);
+ gcc_assert (step_val.classify () == VAL_KNOWN);
+ wide_int wi_step = step_val.get_known ();
+
+ wi_idx *= wi_step;
+ gcc_assert (wi::fits_uhwi_p (wi_idx));
+ HOST_WIDE_INT idx_offset = wi_idx.to_uhwi ();
+ remaining_offset += idx_offset * CHAR_BIT;
+   }
+
   return pick_subref_at (var_ref, remaining_offset, nullptr, min_size);
 }
 }
@@ -957,6 +978,7 @@ context_printer::print_first_data_ref_part (simul_scope & 
context,
   switch (TREE_CODE (data_ref))
 {
 case MEM_REF:
+case TARGET_MEM_REF:
   {
tree mem_replacement = find_mem_ref_replacement (context, data_ref,
 offset, min_size);
@@ -4432,6 +4454,63 @@ context_printer_print_value_update_tests ()
   printer9.print_value_update (ctx9, ref9, val9_addr_i);
   const char *str9 = pp_formatted_text (&pp9);
   ASSERT_STREQ (str9, "# v17c[8B:+8B] = &i\n");
+
+
+  heap_memory mem10;
+  context_printer printer10;
+  pretty_printer & pp10 = printer10.pp;
+  pp_buffer (&pp10)->m_flush_p = false;
+
+  tree a11c_10 = build_array_type_nelts (char_type_node, 11);
+  tree v11c_10 = create_var (a11c_10, "v11c");
+  tree p_10 = create_var (ptr_type_node, "p");
+  tree i_10 = create_var (size_type_node, "i");
+
+  vec decls10{};
+  decls10.safe_push (v11c_10);
+  decls10.safe_push (p_10);
+  decls10.safe_push (i_10);
+
+  context_builder builder10;
+  builder10.add_decls (&decls10);
+  simul_scope ctx10 = builder10.build (mem10, printer10);
+
+  data_storage *strg10_v11 = ctx10.find_reachable_var (v11c_10);
+  gcc_assert (strg10_v11 != nullptr);
+  storage_address addr10_v11 (strg10_v11->get_ref (), 0);
+
+  data_value val10_addr_v11 (ptr_type_node);
+  val10_addr_v11.set_address (addr10_v11);
+
+  data_storage *strg10_p = ctx10.find_reachable_var (p_10);
+  gcc_assert (strg10_p != nullptr);
+  strg10_p->set (val10_addr_v11);
+
+  data_value val10_cst_2 (size_type_node);
+  wide_int cst2_10 = wi::uhwi (2, TYPE_PRECISION (size_type_node));
+  val10_cst_2.set_known (cst2_10);
+
+  data_storage *strg10_i = ctx10.find_reachable_var (i_10);
+  gcc_assert (strg10_i != nullptr);
+  strg10_i->set (val10_cst_2);
+
+  tree int_ptr_10 = build_pointer_type (integer_type_node);
+
+  tree ref10 = build5 (TARGET_MEM_REF, integer_type_node, p_10,
+  build_int_cst (int_ptr_10, -4), i_10,
+  build_int_cst (size_type_node, 4), NULL_TREE);
+
+  data_value val10_cst_13 (integer_type_node);
+  wide_int wi10_13 = wi::shwi (13, TYPE_PRECISION (integer_type_node));
+  val10_cst_13.set_known (wi10_13);
+
+  printer10.print_value_update (ctx10, ref10, val10_cst_13);
+  const char *str10 = pp_formatted_text (&pp10);
+  ASSERT_STREQ (str10,
+   "# v11c[4] = 13\n"
+   "# v11c[5] = 0\n"
+   "# v11c[6] = 0\n"
+

[gcc r15-9935] Fortran: Ensure arguments in coarray call get unique components in add_data [PR120847]

2025-07-08 Thread Andre Vehreschild via Gcc-cvs

https://gcc.gnu.org/g:67452737d8e6d2629104ac811eaf6ec8c1790614

commit r15-9935-g67452737d8e6d2629104ac811eaf6ec8c1790614
Author: Andre Vehreschild 
Date:   Fri Jun 27 15:31:21 2025 +0200

Fortran: Ensure arguments in coarray call get unique components in add_data 
[PR120847]

PR fortran/120847

gcc/fortran/ChangeLog:

* coarray.cc (check_add_new_comp_handle_array): Make the count
of components static to be able to create more than one.  Create
an array component only for array expressions.

gcc/testsuite/ChangeLog:

* gfortran.dg/coarray/coindexed_7.f90: New test.

(cherry picked from commit ee31ab9b1950b7f47f030bda231ace34d187ae26)

Diff:
---
 gcc/fortran/coarray.cc|  4 ++--
 gcc/testsuite/gfortran.dg/coarray/coindexed_7.f90 | 24 +++
 2 files changed, 26 insertions(+), 2 deletions(-)

diff --git a/gcc/fortran/coarray.cc b/gcc/fortran/coarray.cc
index 70583254d0d8..a2cfdc0fb52c 100644
--- a/gcc/fortran/coarray.cc
+++ b/gcc/fortran/coarray.cc
@@ -498,7 +498,7 @@ check_add_new_comp_handle_array (gfc_expr *e, gfc_symbol 
*type,
 gfc_symbol *add_data)
 {
   gfc_component *comp;
-  int cnt = -1;
+  static int cnt = -1;
   gfc_symtree *caller_image;
   gfc_code *pre_code = caf_accessor_prepend;
   bool static_array_or_scalar = true;
@@ -561,7 +561,7 @@ check_add_new_comp_handle_array (gfc_expr *e, gfc_symbol 
*type,
   else
 {
   comp->initializer = gfc_copy_expr (e);
-  if (e_attr.dimension)
+  if (e_attr.dimension && e->rank)
{
  comp->attr.dimension = 1;
  comp->as = get_arrayspec_from_expr (e);
diff --git a/gcc/testsuite/gfortran.dg/coarray/coindexed_7.f90 
b/gcc/testsuite/gfortran.dg/coarray/coindexed_7.f90
new file mode 100644
index ..066397024f47
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/coarray/coindexed_7.f90
@@ -0,0 +1,24 @@
+!{ dg-do compile }
+
+! Check PR120847 is fixed.
+
+program p
+  implicit none
+
+  type T
+integer, allocatable :: i(:, :) [:]
+  end type T
+
+  type(T) :: o
+  integer, allocatable :: c[:]
+  integer :: i
+
+  c = 7
+
+  allocate(o%i(4, 5)[*], source=6)
+
+  do i = 1, 4
+c = o%i(mod(i, 2), mod(i, 3))[1]
+  end do
+
+end program p

[gcc r15-9934] Fortran: Fix non-conformable corank on this_image ref [PR120843]

2025-07-08 Thread Andre Vehreschild via Gcc-cvs

https://gcc.gnu.org/g:887ddb4d8c3ddd27c3a5cfd79f21dd52403c82fa

commit r15-9934-g887ddb4d8c3ddd27c3a5cfd79f21dd52403c82fa
Author: Andre Vehreschild 
Date:   Fri Jun 27 14:39:13 2025 +0200

Fortran: Fix non-conformable corank on this_image ref [PR120843]

PR fortran/120843

gcc/fortran/ChangeLog:

* resolve.cc (resolve_operator): Report inconsistent coranks
only when not referencing this_image.
(gfc_op_rank_conformable): Treat coranks as inconformable only
when a coindex other then implicit this_image is used.

gcc/testsuite/ChangeLog:

* gfortran.dg/coarray/coindexed_6.f90: New test.

(cherry picked from commit 1b0930e9046e0b6201fa03c2843f3b06e522acd1)

Diff:
---
 gcc/fortran/resolve.cc|  7 ---
 gcc/testsuite/gfortran.dg/coarray/coindexed_6.f90 | 17 +
 2 files changed, 21 insertions(+), 3 deletions(-)

diff --git a/gcc/fortran/resolve.cc b/gcc/fortran/resolve.cc
index ee5b22a728d4..2fbe7c451428 100644
--- a/gcc/fortran/resolve.cc
+++ b/gcc/fortran/resolve.cc
@@ -4828,7 +4828,8 @@ resolve_operator (gfc_expr *e)
  if (e->shape == NULL)
e->shape = gfc_copy_shape (op2->shape, op2->corank);
}
-  else
+  else if ((op1->ref && !gfc_ref_this_image (op1->ref))
+  || (op2->ref && !gfc_ref_this_image (op2->ref)))
{
  gfc_error ("Inconsistent coranks for operator at %L and %L",
 &op1->where, &op2->where);
@@ -6070,8 +6071,8 @@ gfc_op_rank_conformable (gfc_expr *op1, gfc_expr *op2)
 gfc_expression_rank (op2);
 
   return (op1->rank == 0 || op2->rank == 0 || op1->rank == op2->rank)
-&& (op1->corank == 0 || op2->corank == 0
-|| op1->corank == op2->corank);
+&& (op1->corank == 0 || op2->corank == 0 || op1->corank == op2->corank
+|| (!gfc_is_coindexed (op1) && !gfc_is_coindexed (op2)));
 }
 
 /* Resolve a variable expression.  */
diff --git a/gcc/testsuite/gfortran.dg/coarray/coindexed_6.f90 
b/gcc/testsuite/gfortran.dg/coarray/coindexed_6.f90
new file mode 100644
index ..8f5dcabb859a
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/coarray/coindexed_6.f90
@@ -0,0 +1,17 @@
+!{ dg-do compile }
+
+! Check PR120843 is fixed
+
+program p
+  implicit none
+
+  integer, allocatable :: arr(:,:) [:,:]
+  integer :: c[*]
+
+  c = 7
+
+  allocate(arr(4,3)[2,*], source=6)
+
+  if (arr(2,2)* c /= 42) stop 1
+
+end program p

[gcc r16-2087] Allow the target to request a masked vector epilogue

2025-07-08 Thread Richard Biener via Gcc-cvs

https://gcc.gnu.org/g:df64d099faf843d90e8fe29aec17d84277986ee9

commit r16-2087-gdf64d099faf843d90e8fe29aec17d84277986ee9
Author: Richard Biener 
Date:   Sun May 25 19:28:54 2025 +0200

Allow the target to request a masked vector epilogue

Targets recently got the ability to request the vector mode to be
used for a vector epilogue (or the epilogue of a vector epilogue).  The
following adds the ability for it to indicate the epilogue should use
loop masking, irrespective of the --param vect-partial-vector-usage
default setting.

The patch below uses a separate flag from the epilogue mode, not
addressing the issue that on x86 the vector_modes mode iteration
hook would not allow for both masked and unmasked variants to be
tried and costed given this doesn't naturally map to modes on
that target.  That's left for a future exercise - turning on
cost comparison for the x86 backend would be a prerequesite there.

* tree-vectorizer.h (vector_costs::suggested_epilogue_mode):
Add masked output parameter and return m_masked_epilogue.
(vector_costs::m_masked_epilogue): New tristate flag.
(vector_costs::vector_costs): Initialize m_masked_epilogue.
* tree-vect-loop.cc (vect_analyze_loop_1): Pass in masked
flag to optionally initialize can_use_partial_vectors_p.
(vect_analyze_loop): For epilogues also get whether to use
a masked epilogue for this loop from the target and use
that for the first epilogue mode we try.

Diff:
---
 gcc/tree-vect-loop.cc | 35 ++-
 gcc/tree-vectorizer.h | 13 ++---
 2 files changed, 36 insertions(+), 12 deletions(-)

diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 2782d61a5fc2..d5044d5fe227 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -58,6 +58,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "tree-eh.h"
 #include "case-cfn-macros.h"
 #include "langhooks.h"
+#include "opts.h"
 
 /* Loop Vectorization Pass.
 
@@ -3400,8 +3401,10 @@ vect_joust_loop_vinfos (loop_vec_info new_loop_vinfo,
 }
 
 /* Analyze LOOP with VECTOR_MODES[MODE_I] and as epilogue if ORIG_LOOP_VINFO is
-   not NULL.  Set AUTODETECTED_VECTOR_MODE if VOIDmode and advance
-   MODE_I to the next mode useful to analyze.
+   not NULL.  When MASKED_P is not -1 override the default
+   LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P with it.
+   Set AUTODETECTED_VECTOR_MODE if VOIDmode and advance MODE_I to the next
+   mode useful to analyze.
Return the loop_vinfo on success and wrapped null on failure.  */
 
 static opt_loop_vec_info
@@ -3409,6 +3412,7 @@ vect_analyze_loop_1 (class loop *loop, vec_info_shared 
*shared,
 const vect_loop_form_info *loop_form_info,
 loop_vec_info orig_loop_vinfo,
 const vector_modes &vector_modes, unsigned &mode_i,
+int masked_p,
 machine_mode &autodetected_vector_mode,
 bool &fatal)
 {
@@ -3417,6 +3421,8 @@ vect_analyze_loop_1 (class loop *loop, vec_info_shared 
*shared,
 
   machine_mode vector_mode = vector_modes[mode_i];
   loop_vinfo->vector_mode = vector_mode;
+  if (masked_p != -1)
+loop_vinfo->can_use_partial_vectors_p = masked_p;
   unsigned int suggested_unroll_factor = 1;
   unsigned slp_done_for_suggested_uf = 0;
 
@@ -3600,7 +3606,7 @@ vect_analyze_loop (class loop *loop, gimple 
*loop_vectorized_call,
   cached_vf_per_mode[last_mode_i] = -1;
   opt_loop_vec_info loop_vinfo
= vect_analyze_loop_1 (loop, shared, &loop_form_info,
-  NULL, vector_modes, mode_i,
+  NULL, vector_modes, mode_i, -1,
   autodetected_vector_mode, fatal);
   if (fatal)
break;
@@ -3685,18 +3691,21 @@ vect_analyze_loop (class loop *loop, gimple 
*loop_vectorized_call,
  array may contain length-agnostic and length-specific modes.  Their
  ordering is not guaranteed, so we could end up picking a mode for the main
  loop that is after the epilogue's optimal mode.  */
+  int masked_p = -1;
   if (!unlimited_cost_model (loop)
-  && first_loop_vinfo->vector_costs->suggested_epilogue_mode () != 
VOIDmode)
+  && (first_loop_vinfo->vector_costs->suggested_epilogue_mode (masked_p)
+ != VOIDmode))
 {
   vector_modes[0]
-   = first_loop_vinfo->vector_costs->suggested_epilogue_mode ();
+   = first_loop_vinfo->vector_costs->suggested_epilogue_mode (masked_p);
   cached_vf_per_mode[0] = 0;
 }
   else
 vector_modes[0] = autodetected_vector_mode;
   mode_i = 0;
 
-  bool supports_partial_vectors = param_vect_partial_vector_usage != 0;
+  bool supports_partial_vectors = (param_vect_partial_vector_usage != 0
+  || masked_p == 1);
   machine_mode mask_mode;
   if (support

[gcc r16-2088] add masked-epilogue tuning

2025-07-08 Thread Richard Biener via Gcc-cvs

https://gcc.gnu.org/g:e9079e4f43d13579c41110ce1871051a43c577b6

commit r16-2088-ge9079e4f43d13579c41110ce1871051a43c577b6
Author: Richard Biener 
Date:   Sun May 25 19:29:04 2025 +0200

add masked-epilogue tuning

The following adds a x86 tuning to enable the use of AVX512 masked
epilogues in cases we heuristically determine it to be not detrimental
by high chance.  Basically problematic cases are when there are
data streams that are both stored and loaded from and an outer loop
could end up executing only the inner loop masked epilogue and with
unlucky data stream advacement from the outer loop end up needing
to forward from masked stores to masked loads.  This isn't very
well handled, esp. for the case where unmasked operations would
not need to forward at all - that is, when forwarding completely
from the masked out portion of the store (like the AVX upper half
to the AVX lower half of a load).  There's also the case where
the number of iterations is known at compile time, only with
cost comparing we'd consider a non-masked epilog - as we are not
doing that we have to add heuristics to avoid masking when a
single vector epilog iteration would cover all scalar iterations
left (this is exercised by gcc.target/i386/pr110310.c).

SPEC CPU 2017 shows 3% text size savings over not using masked
epilogues with performance impact in the noise.  Masking all vector
epilogues gets that to 4% text size savings with some major
runtime regressions in 503.bwaves_r and 527.cam4_r
(measured on a Zen4 system), we're leaving a 5% improvement
for 549.fotonik3d_r unrealized with the implemented heuristic.

With the heuristics we turn 22513 vector epilogues + up to 12305 scalar
epilogues into 12305 masked vector epilogues of which 574 are for
AVX vector sizes, 79 for SSE vector sizes and the rest for AVX512.
When masking all epilogues we get 14567 of them from
29467 vector + up to 14567 scalar epilogues, so the heuristics disable
an additional 20% of masked epilogues.

* config/i386/x86-tune.def (X86_TUNE_AVX512_MASKED_EPILOGUES):
New tunable, default on for m_ZNVER4 and m_ZNVER5.
* config/i386/i386.cc (ix86_vector_costs::finish_cost): With
X86_TUNE_AVX512_MASKED_EPILOGUES and when the main loop
had a vectorization factor > 2 use a masked epilogue when
possible and when not obviously problematic.

* gcc.target/i386/vect-mask-epilogue-1.c: New testcase.
* gcc.target/i386/vect-mask-epilogue-2.c: Likewise.
* gcc.target/i386/vect-epilogues-3.c: Adjust.

Diff:
---
 gcc/config/i386/i386.cc| 59 ++
 gcc/config/i386/x86-tune.def   |  5 ++
 gcc/testsuite/gcc.target/i386/vect-epilogues-3.c   |  2 +-
 .../gcc.target/i386/vect-mask-epilogue-1.c | 11 
 .../gcc.target/i386/vect-mask-epilogue-2.c | 14 +
 5 files changed, 90 insertions(+), 1 deletion(-)

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index fd3f35de14d3..ad7360ec71a4 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -26295,6 +26295,65 @@ ix86_vector_costs::finish_cost (const vector_costs 
*scalar_costs)
   && LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant () >= 16)
 m_suggested_epilogue_mode = V8QImode;
 
+  /* When X86_TUNE_AVX512_MASKED_EPILOGUES is enabled try to use
+ a masked epilogue if that doesn't seem detrimental.  */
+  if (loop_vinfo
+  && !LOOP_VINFO_EPILOGUE_P (loop_vinfo)
+  && LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant () > 2
+  && ix86_tune_features[X86_TUNE_AVX512_MASKED_EPILOGUES]
+  && !OPTION_SET_P (param_vect_partial_vector_usage))
+{
+  bool avoid = false;
+  if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
+ && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) >= 0)
+   {
+ unsigned int peel_niter
+   = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
+ if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
+   peel_niter += 1;
+ /* When we know the number of scalar iterations of the epilogue,
+avoid masking when a single vector epilog iteration handles
+it in full.  */
+ if (pow2p_hwi ((LOOP_VINFO_INT_NITERS (loop_vinfo) - peel_niter)
+% LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant ()))
+   avoid = true;
+   }
+  if (!avoid && loop_outer (loop_outer (LOOP_VINFO_LOOP (loop_vinfo
+   for (auto ddr : LOOP_VINFO_DDRS (loop_vinfo))
+ {
+   if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
+ ;
+   else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
+ ;
+   else
+ {
+   int loop_depth
+   = index_in_loop_nest (LOOP_VINFO_LOOP (loop_vinfo)->num,
+

[gcc r16-2089] libstdc++: Document that LWG 3881 is resolved, by using different apporach.

2025-07-08 Thread Tomasz Kaminski via Libstdc++-cvs

https://gcc.gnu.org/g:a10723efcd611977c5e4a875b9cb1de920732d42

commit r16-2089-ga10723efcd611977c5e4a875b9cb1de920732d42
Author: Tomasz Kamiński 
Date:   Tue Jul 8 09:30:06 2025 +0200

libstdc++: Document that LWG 3881 is resolved, by using different apporach.

libstdc++-v3/ChangeLog:

* include/std/queue (formatter, _CharT>)
(formatter, _CharT>):
Add _GLIBCXX_RESOLVE_LIB_DEFECTS comments.

Diff:
---
 libstdc++-v3/include/std/queue | 10 --
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/libstdc++-v3/include/std/queue b/libstdc++-v3/include/std/queue
index 90525897da75..1b76088b31b3 100644
--- a/libstdc++-v3/include/std/queue
+++ b/libstdc++-v3/include/std/queue
@@ -105,7 +105,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
{ return _M_f.format(__a.c, __fc); }
 
 private:
-  // Standard uses formatter, _CharT>.
+  // Standard uses formatter, _CharT>, but 
range_formatter
+  // provides same behavior.
+  // _GLIBCXX_RESOLVE_LIB_DEFECTS
+  // 3881. Incorrect formatting of container adapters backed by std::string
   range_formatter<_Tp, _CharT> _M_f;
 };
 
@@ -136,7 +139,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
{ return _M_f.format(__a.c, __fc); }
 
 private:
-  // Standard uses formatter, _CharT>.
+  // Standard uses formatter, _CharT>, but 
range_formatter
+  // provides same behavior.
+  // _GLIBCXX_RESOLVE_LIB_DEFECTS
+  // 3881. Incorrect formatting of container adapters backed by std::string
   range_formatter<_Tp, _CharT> _M_f;
 };

[gcc] Created branch 'gaius/heads/m2wideset' in namespace 'refs/users'

2025-07-08 Thread Gaius Mulley via Gcc-cvs

The branch 'gaius/heads/m2wideset' was created in namespace 'refs/users' 
pointing to:

 2fd6f42c17a8... libstdc++: Make debug iterator pointer sequence const [PR11

[gcc r16-2085] tree-optimization/120358 - bogus PTA with structure access

2025-07-08 Thread Richard Biener via Gcc-cvs

https://gcc.gnu.org/g:aa5ae523e84a97bf3a582ea0fa73d959afa9b9c7

commit r16-2085-gaa5ae523e84a97bf3a582ea0fa73d959afa9b9c7
Author: Richard Biener 
Date:   Mon Jul 7 15:13:38 2025 +0200

tree-optimization/120358 - bogus PTA with structure access

When we compute the constraint for something like
MEM[(const struct QStringView &)&tok2 + 32] we go and compute
what (const struct QStringView &)&tok2 + 32 points to and then
add subvariables to its dereference that possibly fall in the
range of the access according to the original refs size.  In
doing that we disregarded that the subvariable the starting
address points to might not be aligned to it and thus the
access might start at any point within that variable.  The following
conservatively adjusts the pruning of adjacent sub-variables to
honor this.

PR tree-optimization/120358
* tree-ssa-structalias.cc (get_constraint_for_1): Adjust
pruning of sub-variables according to the imprecise
known start offset.

Diff:
---
 gcc/tree-ssa-structalias.cc | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/gcc/tree-ssa-structalias.cc b/gcc/tree-ssa-structalias.cc
index deca44ae0bf3..0215243d5be9 100644
--- a/gcc/tree-ssa-structalias.cc
+++ b/gcc/tree-ssa-structalias.cc
@@ -3690,7 +3690,10 @@ get_constraint_for_1 (tree t, vec *results, bool 
address_p,
size = -1;
  for (; curr; curr = vi_next (curr))
{
- if (curr->offset - vi->offset < size)
+ /* The start of the access might happen anywhere
+within vi, so conservatively assume it was
+at its end.  */
+ if (curr->offset - (vi->offset + vi->size - 1) < size)
{
  cs.var = curr->id;
  results->safe_push (cs);

[gcc r15-9936] nr2.0: late: Correctly initialize funny_error member

2025-07-08 Thread Arthur Cohen via Gcc-cvs

https://gcc.gnu.org/g:7e3c677f96138547f27d10f748cdfa04b759de1e

commit r15-9936-g7e3c677f96138547f27d10f748cdfa04b759de1e
Author: Arthur Cohen 
Date:   Mon Apr 28 21:45:13 2025 +0200

nr2.0: late: Correctly initialize funny_error member

gcc/rust/ChangeLog:

* resolve/rust-late-name-resolver-2.0.cc (Late::Late): False 
initialize the
funny_error field.

Diff:
---
 gcc/rust/resolve/rust-late-name-resolver-2.0.cc | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/gcc/rust/resolve/rust-late-name-resolver-2.0.cc 
b/gcc/rust/resolve/rust-late-name-resolver-2.0.cc
index f743e1e03f34..8702c8987d63 100644
--- a/gcc/rust/resolve/rust-late-name-resolver-2.0.cc
+++ b/gcc/rust/resolve/rust-late-name-resolver-2.0.cc
@@ -33,7 +33,9 @@
 namespace Rust {
 namespace Resolver2_0 {
 
-Late::Late (NameResolutionContext &ctx) : DefaultResolver (ctx) {}
+Late::Late (NameResolutionContext &ctx)
+  : DefaultResolver (ctx), funny_error (false)
+{}
 
 static NodeId
 next_node_id ()

[gcc r16-2107] libstdc++: Do not expose set_brackets/set_separator for formatter with format_kind other than sequen

2025-07-08 Thread Tomasz Kaminski via Libstdc++-cvs

https://gcc.gnu.org/g:70bd97e89ddf8fcb8c14e84a8fd580404536eeb1

commit r16-2107-g70bd97e89ddf8fcb8c14e84a8fd580404536eeb1
Author: Tomasz Kamiński 
Date:   Tue Jul 8 10:04:41 2025 +0200

libstdc++: Do not expose set_brackets/set_separator for formatter with 
format_kind other than sequence [PR119861]

The standard defines separate specializations of range-default-formatter, 
out
of which only one for range_format::sequence provide the set_brackets and
set_separator methods. We implemented it as one specialization and exposed
this method for range_format other than string or debug_string, i.e. when
range_formatter was used as underlying formatter.

PR libstdc++/119861

libstdc++-v3/ChangeLog:

* include/std/format (formatter<_Rg, _CharT>::set_separator)
(formatter<_Rg, _CharT>::set_brackets): Constrain with
(format_kind<_Rg> == range_format::sequence).
* testsuite/std/format/ranges/pr119861_neg.cc: New test.

Diff:
---
 libstdc++-v3/include/std/format|  4 +-
 .../testsuite/std/format/ranges/pr119861_neg.cc| 52 ++
 2 files changed, 54 insertions(+), 2 deletions(-)

diff --git a/libstdc++-v3/include/std/format b/libstdc++-v3/include/std/format
index 5749aa1995a9..d584b81c78a1 100644
--- a/libstdc++-v3/include/std/format
+++ b/libstdc++-v3/include/std/format
@@ -6030,13 +6030,13 @@ namespace __format
 
   constexpr void
   set_separator(basic_string_view<_CharT> __sep) noexcept
-   requires (!_S_range_format_is_string)
+   requires (format_kind<_Rg> == range_format::sequence)
   { _M_under.set_separator(__sep); }
 
   constexpr void
   set_brackets(basic_string_view<_CharT> __open,
   basic_string_view<_CharT> __close) noexcept
-   requires (!_S_range_format_is_string)
+   requires (format_kind<_Rg> == range_format::sequence)
   { _M_under.set_brackets(__open, __close); }
 
   // We deviate from standard, that declares this as template accepting
diff --git a/libstdc++-v3/testsuite/std/format/ranges/pr119861_neg.cc 
b/libstdc++-v3/testsuite/std/format/ranges/pr119861_neg.cc
new file mode 100644
index ..9a6ed16393ee
--- /dev/null
+++ b/libstdc++-v3/testsuite/std/format/ranges/pr119861_neg.cc
@@ -0,0 +1,52 @@
+// { dg-do compile { target c++23 } }
+
+#include 
+#include 
+
+// only format_kind::sequence provides set_brackets and set_separator methods 
+
+template
+struct MyCont : std::vector
+{
+  using std::vector::vector;
+};
+
+template
+constexpr std::range_format std::format_kind> = fk;
+
+void test_sequence()
+{
+  std::formatter, char> fmtter;
+  fmtter.set_brackets("{", "}");
+  fmtter.set_separator(",");
+}
+
+void test_map()
+{
+  std::formatter>, char> 
fmtter;
+  fmtter.set_brackets("{", "}"); // { dg-error "here" }
+  fmtter.set_separator(","); // { dg-error "here" }
+}
+
+void test_set()
+{
+  std::formatter, char> fmtter;
+  fmtter.set_brackets("{", "}"); // { dg-error "here" }
+  fmtter.set_separator(","); // { dg-error "here" }
+}
+
+void test_string()
+{
+  std::formatter, char> fmtter;
+  fmtter.set_brackets("{", "}"); // { dg-error "here" }
+  fmtter.set_separator(","); // { dg-error "here" }
+}
+
+void test_debug_string()
+{
+  std::formatter, char> fmtter;
+  fmtter.set_brackets("{", "}"); // { dg-error "here" }
+  fmtter.set_separator(","); // { dg-error "here" }
+}
+
+// { dg-error "no matching function for call to 'std::formatter<" "" { target 
*-*-* } 0 }

[gcc r16-2095] libstdc++: Restructure mdspan tests to reuse IntLike.

2025-07-08 Thread Tomasz Kaminski via Libstdc++-cvs

https://gcc.gnu.org/g:45b81ebf7815e3cea15f6fb18e83a101a4c50fb3

commit r16-2095-g45b81ebf7815e3cea15f6fb18e83a101a4c50fb3
Author: Luc Grosheintz 
Date:   Fri Jul 4 10:29:45 2025 +0200

libstdc++: Restructure mdspan tests to reuse IntLike.

The class IntLike is used for testing extents with user-defined classes
that convert to int. This commit places the class into a separate header
file. This allows it to be reused across different parts of the mdspan
related testsuite.

libstdc++-v3/ChangeLog:

* testsuite/23_containers/mdspan/extents/custom_integer.cc:
Delete IntLike and include "int_like.h".
* testsuite/23_containers/mdspan/extents/int_like.h: Add
IntLike.

Reviewed-by: Tomasz Kamiński 
Signed-off-by: Luc Grosheintz 

Diff:
---
 .../23_containers/mdspan/extents/custom_integer.cc | 27 +--
 .../23_containers/mdspan/extents/int_like.h| 30 ++
 2 files changed, 31 insertions(+), 26 deletions(-)

diff --git 
a/libstdc++-v3/testsuite/23_containers/mdspan/extents/custom_integer.cc 
b/libstdc++-v3/testsuite/23_containers/mdspan/extents/custom_integer.cc
index 2907ad12ae72..404755bd5ac4 100644
--- a/libstdc++-v3/testsuite/23_containers/mdspan/extents/custom_integer.cc
+++ b/libstdc++-v3/testsuite/23_containers/mdspan/extents/custom_integer.cc
@@ -2,38 +2,13 @@
 #include 
 
 #include 
+#include "int_like.h"
 
 // Test construction from a custom integer-like object, that has
 // no copy/move ctor or copy/move assignment operator.
 
 constexpr size_t dyn = std::dynamic_extent;
 
-class IntLike
-{
-public:
-  explicit
-  IntLike(int i)
-  : _M_i(i)
-  { }
-
-  IntLike() = delete;
-  IntLike(const IntLike&) = delete;
-  IntLike(IntLike&&) = delete;
-
-  const IntLike&
-  operator=(const IntLike&) = delete;
-
-  const IntLike&
-  operator=(IntLike&&) = delete;
-
-  constexpr
-  operator int() const noexcept
-  { return _M_i; }
-
-private:
-  int _M_i;
-};
-
 static_assert(std::is_convertible_v);
 static_assert(std::is_nothrow_constructible_v);
 
diff --git a/libstdc++-v3/testsuite/23_containers/mdspan/extents/int_like.h 
b/libstdc++-v3/testsuite/23_containers/mdspan/extents/int_like.h
new file mode 100644
index ..f39f4cc90816
--- /dev/null
+++ b/libstdc++-v3/testsuite/23_containers/mdspan/extents/int_like.h
@@ -0,0 +1,30 @@
+#ifndef TEST_MDSPAN_INT_LIKE_H
+#define TEST_MDSPAN_INT_LIKE_H
+
+class IntLike
+{
+public:
+  explicit
+  IntLike(int i)
+  : _M_i(i)
+  { }
+
+  IntLike() = delete;
+  IntLike(const IntLike&) = delete;
+  IntLike(IntLike&&) = delete;
+
+  const IntLike&
+  operator=(const IntLike&) = delete;
+
+  const IntLike&
+  operator=(IntLike&&) = delete;
+
+  constexpr
+  operator int() const noexcept
+  { return _M_i; }
+
+private:
+  int _M_i;
+};
+
+#endif // TEST_MDSPAN_INT_LIKE_H

[gcc r16-2096] libstdc++: Implement mdspan::size.

2025-07-08 Thread Tomasz Kaminski via Gcc-cvs

https://gcc.gnu.org/g:aa961cae42e23461887a6cf38aa47413b8425243

commit r16-2096-gaa961cae42e23461887a6cf38aa47413b8425243
Author: Luc Grosheintz 
Date:   Fri Jul 4 10:29:46 2025 +0200

libstdc++: Implement __mdspan::__size.

The current code uses __mdspan::__fwd_prod(__exts, __rank) to express
computing the size of an extent. This commit adds an function __mdspan::
__size(__exts) to express the idea more directly.

libstdc++-v3/ChangeLog:

* include/std/mdspan (__mdspan::__size): New function.

Reviewed-by: Tomasz Kamiński 
Signed-off-by: Luc Grosheintz 

Diff:
---
 libstdc++-v3/include/std/mdspan | 12 
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/libstdc++-v3/include/std/mdspan b/libstdc++-v3/include/std/mdspan
index d97fa22e4f03..b0d8088bb777 100644
--- a/libstdc++-v3/include/std/mdspan
+++ b/libstdc++-v3/include/std/mdspan
@@ -398,6 +398,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   __rev_prod(const _Extents& __exts, size_t __r) noexcept
   { return __exts_prod(__exts, __r + 1, __exts.rank()); }
 
+template
+  constexpr typename _Extents::index_type
+  __size(const _Extents& __exts) noexcept
+  { return __fwd_prod(__exts, __exts.rank()); }
+
 template
   auto __build_dextents_type(integer_sequence)
-> extents<_IndexType, ((void) _Counts, dynamic_extent)...>;
@@ -591,7 +596,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
   constexpr index_type
   required_span_size() const noexcept
-  { return __mdspan::__fwd_prod(_M_extents, extents_type::rank()); }
+  { return __mdspan::__size(_M_extents); }
 
   template<__mdspan::__valid_index_type... _Indices>
requires (sizeof...(_Indices) == extents_type::rank())
@@ -730,7 +735,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
   constexpr index_type
   required_span_size() const noexcept
-  { return __mdspan::__fwd_prod(_M_extents, extents_type::rank()); }
+  { return __mdspan::__size(_M_extents); }
 
   template<__mdspan::__valid_index_type... _Indices>
requires (sizeof...(_Indices) == extents_type::rank())
@@ -986,8 +991,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   {
if constexpr (!is_always_exhaustive())
  {
-   constexpr auto __rank = extents_type::rank();
-   auto __size = __mdspan::__fwd_prod(_M_extents, __rank);
+   auto __size = __mdspan::__size(_M_extents);
if(__size > 0)
  return __size == required_span_size();
  }

[gcc r16-2099] [PATCH] riscv: allow zero in zacas subword atomic cas

2025-07-08 Thread Jeff Law via Gcc-cvs

https://gcc.gnu.org/g:3fd638a9e5497dfdf00f1783d6e704af03fb44b0

commit r16-2099-g3fd638a9e5497dfdf00f1783d6e704af03fb44b0
Author: Andreas Schwab 
Date:   Tue Jul 8 07:32:17 2025 -0600

[PATCH] riscv: allow zero in zacas subword atomic cas

gcc:
PR target/120995
* config/riscv/sync.md (zacas_atomic_cas_value_strong):
Allow op3 to be zero.

gcc/testsuite:
PR target/120995
* gcc.target/riscv/amo/zabha-zacas-atomic-cas.c: New test.

Diff:
---
 gcc/config/riscv/sync.md|  2 +-
 gcc/testsuite/gcc.target/riscv/amo/zabha-zacas-atomic-cas.c | 11 +++
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/gcc/config/riscv/sync.md b/gcc/config/riscv/sync.md
index a75ea6834e46..50ec8b38f723 100644
--- a/gcc/config/riscv/sync.md
+++ b/gcc/config/riscv/sync.md
@@ -627,7 +627,7 @@
(match_operand:SHORT 1 "memory_operand" "+A"))  
;; memory
(set (match_dup 1)
(unspec_volatile:SHORT [(match_operand:SHORT 2 "register_operand" "0")  
;; expected_val
-   (match_operand:SHORT 3 "register_operand" "rJ") 
;; desired_val
+   (match_operand:SHORT 3 "reg_or_0_operand" "rJ") 
;; desired_val
(match_operand:SI 4 "const_int_operand")
;; mod_s
(match_operand:SI 5 "const_int_operand")]   
;; mod_f
 UNSPEC_COMPARE_AND_SWAP))]
diff --git a/gcc/testsuite/gcc.target/riscv/amo/zabha-zacas-atomic-cas.c 
b/gcc/testsuite/gcc.target/riscv/amo/zabha-zacas-atomic-cas.c
new file mode 100644
index ..d3d84fd30882
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/amo/zabha-zacas-atomic-cas.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* PR target/120995 ICE unrecognized subword atomic cas */
+/* { dg-options "-O" } */
+/* { dg-add-options riscv_zacas } */
+/* { dg-add-options riscv_zabha } */
+
+_Bool b;
+void atomic_bool_cmpxchg()
+{
+  __sync_bool_compare_and_swap(&b, 1, 0);
+}

[gcc r16-2092] Handle non default git prefix configurations

2025-07-08 Thread Alexander Monakov via Gcc-cvs

https://gcc.gnu.org/g:90f7df6194e3c687000fcf725e18532838881d2f

commit r16-2092-g90f7df6194e3c687000fcf725e18532838881d2f
Author: Pierre-Emmanuel Patry 
Date:   Mon Jul 7 17:05:44 2025 +0200

Handle non default git prefix configurations

Mklog parses the diff content from prepare-commit-msg hook but fails
when git has been configured with mnemonicPrefix. Forcing the default
values for the prefixes would set a distinct diff configuration supported
by mklog and prevent most failures.

contrib/ChangeLog:

* prepare-commit-msg: Force default git prefixes.

Signed-off-by: Pierre-Emmanuel Patry 

Diff:
---
 contrib/prepare-commit-msg | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/contrib/prepare-commit-msg b/contrib/prepare-commit-msg
index 1b878772dcc4..75d102559c78 100755
--- a/contrib/prepare-commit-msg
+++ b/contrib/prepare-commit-msg
@@ -78,4 +78,4 @@ else
 tee="cat"
 fi
 
-git $cmd | $tee | git gcc-mklog -c "$COMMIT_MSG_FILE"
+git $cmd --default-prefix | $tee | git gcc-mklog -c "$COMMIT_MSG_FILE"

[gcc r16-2094] libstdc++: Check prerequisite of extents::extents.

2025-07-08 Thread Tomasz Kaminski via Libstdc++-cvs

https://gcc.gnu.org/g:74ddf1792ae3538ee829f2c399dfcb75bfae8fd3

commit r16-2094-g74ddf1792ae3538ee829f2c399dfcb75bfae8fd3
Author: Luc Grosheintz 
Date:   Fri Jul 4 10:29:44 2025 +0200

libstdc++: Check prerequisite of extents::extents.

Previously the prerequisite of the extents ctors that

static_extent(i) == dynamic_extent || extent(i) == other.extent(i).

was not checked. This commit adds the __glibcxx_assert and test them.

libstdc++-v3/ChangeLog:

* include/std/mdspan (extents): Check prerequisite of the ctor that
static_extent(i) == dynamic_extent || extent(i) == other.extent(i).
* testsuite/23_containers/mdspan/extents/class_mandates_neg.cc:
Test the implemented prerequisite.

Reviewed-by: Tomasz Kamiński 
Signed-off-by: Luc Grosheintz 

Diff:
---
 libstdc++-v3/include/std/mdspan| 13 
 .../mdspan/extents/class_mandates_neg.cc   |  2 ++
 .../mdspan/extents/extents_mismatch_neg.cc | 35 ++
 3 files changed, 50 insertions(+)

diff --git a/libstdc++-v3/include/std/mdspan b/libstdc++-v3/include/std/mdspan
index 1fdcae634419..d97fa22e4f03 100644
--- a/libstdc++-v3/include/std/mdspan
+++ b/libstdc++-v3/include/std/mdspan
@@ -110,10 +110,23 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
return __se;
}
 
+   template
+ static constexpr bool
+ _S_is_compatible_extents(_GetOtherExtent __get_extent) noexcept
+ {
+   if constexpr (_OtherRank == _S_rank)
+ for (size_t __i = 0; __i < _S_rank; ++__i)
+   if (_Extents[__i] != dynamic_extent
+   && !cmp_equal(_Extents[__i], 
_S_int_cast(__get_extent(__i
+ return false;
+   return true;
+ }
+
template
  constexpr void
  _M_init_dynamic_extents(_GetOtherExtent __get_extent) noexcept
  {
+   
__glibcxx_assert(_S_is_compatible_extents<_OtherRank>(__get_extent));
for (size_t __i = 0; __i < _S_rank_dynamic; ++__i)
  {
size_t __di = __i;
diff --git 
a/libstdc++-v3/testsuite/23_containers/mdspan/extents/class_mandates_neg.cc 
b/libstdc++-v3/testsuite/23_containers/mdspan/extents/class_mandates_neg.cc
index f9c1c0196669..67d18feda96c 100644
--- a/libstdc++-v3/testsuite/23_containers/mdspan/extents/class_mandates_neg.cc
+++ b/libstdc++-v3/testsuite/23_containers/mdspan/extents/class_mandates_neg.cc
@@ -7,6 +7,8 @@ std::extents e1; // { dg-error "from 
here" }
 std::extents e2; // { dg-error "from here" }
 std::extents e3; // { dg-error "from here" }
 std::extents e4;   // { dg-error "from here" }
+
 // { dg-prune-output "dynamic or representable as IndexType" }
 // { dg-prune-output "signed or unsigned integer" }
 // { dg-prune-output "invalid use of incomplete type" }
+// { dg-prune-output "non-constant condition for static assertion" }
diff --git 
a/libstdc++-v3/testsuite/23_containers/mdspan/extents/extents_mismatch_neg.cc 
b/libstdc++-v3/testsuite/23_containers/mdspan/extents/extents_mismatch_neg.cc
new file mode 100644
index ..b35e5310d415
--- /dev/null
+++ 
b/libstdc++-v3/testsuite/23_containers/mdspan/extents/extents_mismatch_neg.cc
@@ -0,0 +1,35 @@
+// { dg-do compile { target c++23 } }
+#include
+
+#include 
+
+constexpr size_t dyn = std::dynamic_extent;
+
+constexpr bool
+test_dyn2sta_extents_mismatch_00()
+{
+  auto e0 = std::extents{1};
+  [[maybe_unused]] auto e1 = std::extents{e0};// { dg-error 
"expansion of" }
+  return true;
+}
+static_assert(test_dyn2sta_extents_mismatch_00());// { dg-error 
"expansion of" }
+
+constexpr bool
+test_dyn2sta_extents_mismatch_01()
+{
+  [[maybe_unused]] auto e = std::extents{2, 2}; // { dg-error 
"expansion of" }
+  return true;
+}
+static_assert(test_dyn2sta_extents_mismatch_01());   // { dg-error 
"expansion of" }
+
+constexpr bool
+test_dyn2sta_extents_mismatch_02()
+{
+  std::array exts{2, 2};
+  [[maybe_unused]] auto e = std::extents{exts}; // { dg-error 
"expansion of" }
+  return true;
+}
+static_assert(test_dyn2sta_extents_mismatch_02());   // { dg-error 
"expansion of" }
+
+// { dg-prune-output "non-constant condition for static assertion" }
+// { dg-prune-output "__glibcxx_assert" }

1 2 >

1 - 100 of 111 matches

Mail list logo