date:20250131

[gcc r15-7300] niter: Make build_cltz_expr more robust [PR118689]

2025-01-31 Thread Jakub Jelinek via Gcc-cvs

https://gcc.gnu.org/g:85e1714b0606579a339c234510063e057fe662af

commit r15-7300-g85e1714b0606579a339c234510063e057fe662af
Author: Jakub Jelinek 
Date:   Fri Jan 31 11:02:41 2025 +0100

niter: Make build_cltz_expr more robust [PR118689]

Since my r15-7223 the niter analysis can recognize one loop during bootstrap
as being ctz like.
The patch just turned
@@ -2173,7 +2173,7 @@ PROC m2pim_NumberIO_BinToStr (CARDINAL x
   _T535_44 = &buf[i.40_2]{lb: 1 sz: 4};
   _T536_45 = x_21 & 1;
   *_T535_44 = _T536_45;
-  _T537_47 = x_21 / 2;
+  _T537_47 = x_21 >> 1;
   x_48 = _T537_47;
   # DEBUG x => x_48
   if (x_48 != 0)
which is not a big deal for the number_of_iterations_cltz optimization, it
recognizes both right shift by 1 and unsigned division by 2 (and similarly
for clz left shift by 1 or multiplication by 2).
But starting with forwprop1 that change also resulted in
@@ -1875,9 +1875,9 @@ PROC m2pim_NumberIO_BinToStr (CARDINAL x
   i.40_2 = (INTEGER) _T530_34;
   _T536_45 = x_21 & 1;
   MEM  [(CARDINAL *)&buf][i.40_2]{lb: 1 sz: 4} = _T536_45;
-  _T537_47 = x_21 / 2;
+  _T537_47 = x_21 >> 1;
   # DEBUG x => _T537_47
-  if (x_21 > 1)
+  if (_T537_47 != 0)
 goto ; [INV]
   else
 goto ; [INV]
and apparently it is only the latter form that number_of_iterations_cltz
pattern matches, not the former (after all, that was the exact reason
for r15-7223).
The problem is that build_cltz_expr assumes if IFN_C[LT]Z can't be used it
can use the __builtin_c[lt]z{,l,ll} builtins, and while most of the FEs do
create them, modula 2 does not.

The following patch just lets us punt if the FE doesn't build those 
builtins.
I've filed a PR against modula2 so that they add the builtins too.

2025-01-31  Jakub Jelinek  

PR tree-optimization/118689
PR modula2/115032
* tree-ssa-loop-niter.cc (build_cltz_expr): Return NULL_TREE if fn 
is
NULL and use_ifn is false.

Diff:
---
 gcc/tree-ssa-loop-niter.cc | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/gcc/tree-ssa-loop-niter.cc b/gcc/tree-ssa-loop-niter.cc
index 7743970bf3d5..9ce881344142 100644
--- a/gcc/tree-ssa-loop-niter.cc
+++ b/gcc/tree-ssa-loop-niter.cc
@@ -2238,6 +2238,8 @@ build_cltz_expr (tree src, bool leading, bool 
define_at_zero)
  build_int_cst (integer_type_node, prec));
}
 }
+  else if (fn == NULL_TREE)
+return NULL_TREE;
   else if (prec == 2 * lli_prec)
 {
   tree src1 = fold_convert (long_long_unsigned_type_node,

[gcc r15-7299] Do not rely on non-SLP analysis for SLP outer loop vectorization

2025-01-31 Thread Richard Biener via Gcc-cvs

https://gcc.gnu.org/g:9e3ceed8d50912e271e84389ed80aeea47184e59

commit r15-7299-g9e3ceed8d50912e271e84389ed80aeea47184e59
Author: Richard Biener 
Date:   Thu Jan 30 14:52:14 2025 +0100

Do not rely on non-SLP analysis for SLP outer loop vectorization

We end up relying on non-SLP analysis of the inner loop LC PHI to
set the vectorizationb method for SLP since vectorizable_reduction
claims responsibility.  The following fixes this.

* tree-vect-loop.cc (vect_analyze_loop_operations): Only
call vectorizable_lc_phi when not PURE_SLP.
(vectorizable_reduction): Do not claim having handled
the inner loop LC PHI for outer loop vectorization.

Diff:
---
 gcc/tree-vect-loop.cc | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index ce674a71e8a0..03426207879f 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -2171,6 +2171,7 @@ vect_analyze_loop_operations (loop_vec_info loop_vinfo)
  if ((STMT_VINFO_DEF_TYPE (stmt_info) == vect_internal_def
   || (STMT_VINFO_DEF_TYPE (stmt_info)
   == vect_double_reduction_def))
+ && ! PURE_SLP_STMT (stmt_info)
  && !vectorizable_lc_phi (loop_vinfo,
   stmt_info, NULL, NULL))
return opt_result::failure_at (phi, "unsupported phi\n");
@@ -7770,9 +7771,10 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
{
  /* For SLP we arrive here for both the inner loop LC PHI and
 the outer loop PHI.  The latter is what we want to analyze
-the reduction with.  */
+the reduction with.  The LC PHI is handled by
+vectorizable_lc_phi.  */
  gcc_assert (slp_node);
- return true;
+ return gimple_phi_num_args (as_a  (stmt_info->stmt)) == 2;
}
   use_operand_p use_p;
   gimple *use_stmt;

[gcc r15-7301] debug/100530 - Revert QUAL_ADDR_SPACE handling from dwarf2out.cc

2025-01-31 Thread Richard Biener via Gcc-cvs

https://gcc.gnu.org/g:319f1d042179b381becf4bf1d0f6b9dab6e84884

commit r15-7301-g319f1d042179b381becf4bf1d0f6b9dab6e84884
Author: Richard Biener 
Date:   Fri Jan 31 08:56:39 2025 +0100

debug/100530 - Revert QUAL_ADDR_SPACE handling from dwarf2out.cc

The bug clearly shows that r8-4385-ga297ccb52e0c89 was wrong in
enabling handling of address-space qualification as DWARF type
qualifiers as the code isn't prepared to it actually be not handled
and ends up changing a lesser qualified (without address-space)
type DIE in ways tripping asserts.  The following reverts that
part which then causes the DIE for the same type with address-space
qualifiers removed to be re-used since there's currently no code
to encode address-spaces within dwarf2out.cc or in the DWARF spec.

r8-4385-ga297ccb52e0c89 did not come with a testcase nor a good
description of the bug fixed - I've verified const qualification
mixed with address-spaces creates the expected DWARF.

PR debug/100530
* dwarf2out.cc (modified_type_die): Do not claim we handle
address-space qualification with dwarf_qual_info[].

* gcc.target/i386/pr100530.c: New testcase.

Diff:
---
 gcc/dwarf2out.cc | 3 +--
 gcc/testsuite/gcc.target/i386/pr100530.c | 5 +
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/gcc/dwarf2out.cc b/gcc/dwarf2out.cc
index 8085b8d85d89..43884f206c07 100644
--- a/gcc/dwarf2out.cc
+++ b/gcc/dwarf2out.cc
@@ -13678,8 +13678,7 @@ modified_type_die (tree type, int cv_quals, bool 
reverse,
   struct array_descr_info info;
   /* Only these cv-qualifiers are currently handled.  */
   const int cv_qual_mask = (TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE
-   | TYPE_QUAL_RESTRICT | TYPE_QUAL_ATOMIC |
-   ENCODE_QUAL_ADDR_SPACE(~0U));
+   | TYPE_QUAL_RESTRICT | TYPE_QUAL_ATOMIC);
   /* DW_AT_endianity is specified only for base types in the standard.  */
   const bool reverse_type
 = need_endianity_attribute_p (reverse)
diff --git a/gcc/testsuite/gcc.target/i386/pr100530.c 
b/gcc/testsuite/gcc.target/i386/pr100530.c
new file mode 100644
index ..005c01991295
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr100530.c
@@ -0,0 +1,5 @@
+/* { dg-do compile } */
+/* { dg-options "-g" } */
+
+__seg_gs const int var;
+__seg_gs int foo;

[gcc r15-7302] force-indirect-call-2.c: Allow indirect branch via GOT

2025-01-31 Thread H.J. Lu via Gcc-cvs

https://gcc.gnu.org/g:5f34558100e8466aa70373e2f930bf1013192ba8

commit r15-7302-g5f34558100e8466aa70373e2f930bf1013192ba8
Author: H.J. Lu 
Date:   Fri Jan 31 18:28:23 2025 +0800

force-indirect-call-2.c: Allow indirect branch via GOT

r15-1619-g3b9b8d6cfdf593 changed the codegen from

f2:
.cfi_startproc
pushq   %rbx
.cfi_def_cfa_offset 16
.cfi_offset 3, -16
movqf1@GOTPCREL(%rip), %rbx
call*%rbx
leaqf3(%rip), %rax
call*%rax
movq%rbx, %rax
popq%rbx
.cfi_def_cfa_offset 8
jmp *%rax
.cfi_endproc

to

f2:
.cfi_startproc
subq$8, %rsp
.cfi_def_cfa_offset 16
call*f1@GOTPCREL(%rip)
leaqf3(%rip), %rax
call*%rax
addq$8, %rsp
.cfi_def_cfa_offset 8
jmp *f1@GOTPCREL(%rip)
.cfi_endproc

Since it is OK to indirect call via memory for -mforce-indirect-call,
allow indirect branch via GOT.

PR target/115673
* gcc.target/i386/force-indirect-call-2.c: Allow indirect branch
via GOT.

Signed-off-by: H.J. Lu 

Diff:
---
 gcc/testsuite/gcc.target/i386/force-indirect-call-2.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.target/i386/force-indirect-call-2.c 
b/gcc/testsuite/gcc.target/i386/force-indirect-call-2.c
index 2f7023630415..405c97c80008 100644
--- a/gcc/testsuite/gcc.target/i386/force-indirect-call-2.c
+++ b/gcc/testsuite/gcc.target/i386/force-indirect-call-2.c
@@ -1,6 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-O2 -mforce-indirect-call -fPIC" } */
 /* { dg-require-effective-target fpic } */
-/* { dg-final { scan-assembler-times "(?:call|jmp)\[ \\t\]+\\*%" 3 } } */
+/* { dg-final { scan-assembler-times "(?:call|jmp)\[ \\t\]+\\*" 3 } } */
 
 #include "force-indirect-call-1.c"

[gcc r15-7307] OpenMP/Fortran: Add missing pop_state in parse_omp_dispatch [PR118714]

2025-01-31 Thread Paul-Antoine Arras via Gcc-cvs

https://gcc.gnu.org/g:af51fe9593ec0e9373f8a453bab2129a48193a44

commit r15-7307-gaf51fe9593ec0e9373f8a453bab2129a48193a44
Author: Paul-Antoine Arras 
Date:   Fri Jan 31 11:41:47 2025 +0100

OpenMP/Fortran: Add missing pop_state in parse_omp_dispatch [PR118714]

When the ST_NONE case is taken, the function returns immediately. Not 
calling
pop_state causes a dangling pointer.

PR fortran/118714

gcc/fortran/ChangeLog:

* parse.cc (parse_omp_dispatch): Add missing pop_state.

Diff:
---
 gcc/fortran/parse.cc | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/gcc/fortran/parse.cc b/gcc/fortran/parse.cc
index 00cd23d77299..5094d9d3eadf 100644
--- a/gcc/fortran/parse.cc
+++ b/gcc/fortran/parse.cc
@@ -6375,7 +6375,10 @@ parse_omp_dispatch (void)
 
   st = next_statement ();
   if (st == ST_NONE)
-return st;
+{
+  pop_state ();
+  return st;
+}
   if (st == ST_CALL || st == ST_ASSIGNMENT)
 accept_statement (st);
   else

[gcc r15-7304] Fix wrong elaboration for allocator at library level of dynamic library

2025-01-31 Thread Eric Botcazou via Gcc-cvs

https://gcc.gnu.org/g:3b49727014f29d46a605228d137e5e582df3

commit r15-7304-g3b49727014f29d46a605228d137e5e582df3
Author: Eric Botcazou 
Date:   Fri Jan 31 12:41:19 2025 +0100

Fix wrong elaboration for allocator at library level of dynamic library

The problem was preexisting for class-wide allocators, but now occurs for
allocators of controlled types too, because of the recent overhaul of the
finalization machinery.

gcc/ada/
* gcc-interface/utils.cc (gnat_pushdecl): Clear TREE_PUBLIC on
functions really nested in another function.

Diff:
---
 gcc/ada/gcc-interface/utils.cc | 16 ++--
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/gcc/ada/gcc-interface/utils.cc b/gcc/ada/gcc-interface/utils.cc
index 5a90a1b81f72..1448716acc5a 100644
--- a/gcc/ada/gcc-interface/utils.cc
+++ b/gcc/ada/gcc-interface/utils.cc
@@ -882,16 +882,20 @@ gnat_pushdecl (tree decl, Node_Id gnat_node)
   if (!deferred_decl_context && !context)
 context = get_global_context ();
 
-  /* Functions imported in another function are not really nested.
- For really nested functions mark them initially as needing
- a static chain for uses of that flag before unnesting;
- lower_nested_functions will then recompute it.  */
+  /* Mark functions really nested in another function, that is to say defined
+ there as opposed to imported from elsewhere, as initially needing a static
+ chain for the sake of uniformity (lower_nested_functions will recompute it
+ exacly later) and as private to the translation unit (the static chain may
+ be clobbered by calling conventions used across translation units).  */
   if (TREE_CODE (decl) == FUNCTION_DECL
-  && !TREE_PUBLIC (decl)
+  && !DECL_EXTERNAL (decl)
   && context
   && (TREE_CODE (context) == FUNCTION_DECL
  || decl_function_context (context)))
-DECL_STATIC_CHAIN (decl) = 1;
+{
+  DECL_STATIC_CHAIN (decl) = 1;
+  TREE_PUBLIC (decl) = 0;
+}
 
   if (!deferred_decl_context)
 DECL_CONTEXT (decl) = context;

[gcc r15-7308] Fortran: host association issue with symbol in COMMON block [PR108454]

2025-01-31 Thread Harald Anlauf via Gcc-cvs

https://gcc.gnu.org/g:d6418fe22684f9335474d1fd405ade45954c069d

commit r15-7308-gd6418fe22684f9335474d1fd405ade45954c069d
Author: Harald Anlauf 
Date:   Thu Jan 30 22:21:19 2025 +0100

Fortran: host association issue with symbol in COMMON block [PR108454]

When resolving a flavorless symbol that is already registered with a COMMON
block, and which neither has the intrinsic, generic, or external attribute,
skip searching among interfaces to avoid false resolution to a derived type
of the same name.

PR fortran/108454

gcc/fortran/ChangeLog:

* resolve.cc (resolve_common_blocks): Initialize variable.
(resolve_symbol): If a symbol is already registered with a COMMON
block, do not search for an interface with the same name.

gcc/testsuite/ChangeLog:

* gfortran.dg/common_29.f90: New test.

Diff:
---
 gcc/fortran/resolve.cc  |  9 -
 gcc/testsuite/gfortran.dg/common_29.f90 | 34 +
 2 files changed, 42 insertions(+), 1 deletion(-)

diff --git a/gcc/fortran/resolve.cc b/gcc/fortran/resolve.cc
index 12a623da8511..f2eef12199c0 100644
--- a/gcc/fortran/resolve.cc
+++ b/gcc/fortran/resolve.cc
@@ -1049,7 +1049,7 @@ resolve_common_vars (gfc_common_head *common_block, bool 
named_common)
 static void
 resolve_common_blocks (gfc_symtree *common_root)
 {
-  gfc_symbol *sym;
+  gfc_symbol *sym = NULL;
   gfc_gsymbol * gsym;
 
   if (common_root == NULL)
@@ -17693,6 +17693,12 @@ resolve_symbol (gfc_symbol *sym)
  && sym->attr.if_source == IFSRC_UNKNOWN
  && sym->ts.type == BT_UNKNOWN))
 {
+  /* A symbol in a common block might not have been resolved yet properly.
+Do not try to find an interface with the same name.  */
+  if (sym->attr.flavor == FL_UNKNOWN && !sym->attr.intrinsic
+ && !sym->attr.generic && !sym->attr.external
+ && sym->attr.in_common)
+   goto skip_interfaces;
 
 /* If we find that a flavorless symbol is an interface in one of the
parent namespaces, find its symtree in this namespace, free the
@@ -17716,6 +17722,7 @@ resolve_symbol (gfc_symbol *sym)
}
}
 
+skip_interfaces:
   /* Otherwise give it a flavor according to such attributes as
 it has.  */
   if (sym->attr.flavor == FL_UNKNOWN && sym->attr.external == 0
diff --git a/gcc/testsuite/gfortran.dg/common_29.f90 
b/gcc/testsuite/gfortran.dg/common_29.f90
new file mode 100644
index ..66f2a18a4836
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/common_29.f90
@@ -0,0 +1,34 @@
+! { dg-do compile }
+! PR fortran/108454
+!
+! Contributed by G.Steinmetz
+
+module m
+  type t
+  end type
+contains
+  subroutine s
+common t
+  end
+end
+
+module m2
+  implicit none
+  type t
+  end type
+contains
+  subroutine s
+real :: t
+common /com/ t
+  end
+end
+
+module m3
+  type t
+  end type
+contains
+  subroutine s
+type(t) :: x  ! { dg-error "cannot be host associated at .1." }
+common t  ! { dg-error "incompatible object of the same name" }
+  end
+end

[gcc(refs/users/meissner/heads/work192)] Change TARGET_POPCNTB to TARGET_POWER5.

2025-01-31 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:26a0ac5054ad8dea107ffba593501f62e6d56aeb

commit 26a0ac5054ad8dea107ffba593501f62e6d56aeb
Author: Michael Meissner 
Date:   Fri Jan 31 22:20:48 2025 -0500

Change TARGET_POPCNTB to TARGET_POWER5.

This patch changes TARGET_POPCNTB to TARGET_POWER5.  The -mpopcntb switch 
is not
being changed in this patch, just the name of the macros used to determine 
if
the PowerPC processor supports ISA 2.2 (Power5).

2025-01-31  Michael Meissner  

gcc/

* gcc/config/rs6000/rs6000-builtin.cc (rs6000_builtin_is_supported):
Change TARGET_POPCNTB to TARGET_POWER5.
* gcc/config/rs6000/rs6000.cc (rs6000_option_override_internal):
Likewise.
* gcc/config/rs6000/rs6000.h (TARGET_FCFID): Likewise.
(TARGET_POWER5): New macro.
(TARGET_EXTRA_BUILTINS): Change TARGET_POPCNTB to TARGET_POWER5.
(TARGET_FRE): Likewise.
(TARGET_FRSQRTES): Likewise.
* gcc/config/rs6000/rs6000.md (enabled attribute): Likewise.

Diff:
---
 gcc/config/rs6000/rs6000-builtin.cc |  2 +-
 gcc/config/rs6000/rs6000.cc |  2 +-
 gcc/config/rs6000/rs6000.h  | 11 +++
 gcc/config/rs6000/rs6000.md |  2 +-
 4 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-builtin.cc 
b/gcc/config/rs6000/rs6000-builtin.cc
index 111802381acb..4ed2bc1ca89e 100644
--- a/gcc/config/rs6000/rs6000-builtin.cc
+++ b/gcc/config/rs6000/rs6000-builtin.cc
@@ -155,7 +155,7 @@ rs6000_builtin_is_supported (enum rs6000_gen_builtins 
fncode)
 case ENB_ALWAYS:
   return true;
 case ENB_P5:
-  return TARGET_POPCNTB;
+  return TARGET_POWER5;
 case ENB_P6:
   return TARGET_CMPB;
 case ENB_P6_64:
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 675b039c2b65..011ba7c899ec 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -3926,7 +3926,7 @@ rs6000_option_override_internal (bool global_init_p)
 rs6000_isa_flags |= (ISA_2_5_MASKS_EMBEDDED & ~ignore_masks);
   else if (TARGET_FPRND)
 rs6000_isa_flags |= (ISA_2_4_MASKS & ~ignore_masks);
-  else if (TARGET_POPCNTB)
+  else if (TARGET_POWER5)
 rs6000_isa_flags |= (ISA_2_2_MASKS & ~ignore_masks);
   else if (TARGET_ALTIVEC)
 rs6000_isa_flags |= (OPTION_MASK_PPC_GFXOPT & ~ignore_masks);
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index a60d7a53cfaf..5ff801c8801d 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -448,7 +448,7 @@ extern int rs6000_vector_align[];
Enable 32-bit fcfid's on any of the switches for newer ISA machines.  */
 #define TARGET_FCFID   (TARGET_POWERPC64   \
 || TARGET_PPC_GPOPT/* 970/power4 */\
-|| TARGET_POPCNTB  /* ISA 2.02 */  \
+|| TARGET_POWER5   /* ISA 2.02 */  \
 || TARGET_CMPB /* ISA 2.05 */  \
 || TARGET_POPCNTD) /* ISA 2.06 */
 
@@ -499,6 +499,9 @@ extern int rs6000_vector_align[];
 #define TARGET_MINMAX  (TARGET_HARD_FLOAT && TARGET_PPC_GFXOPT \
 && (TARGET_P9_MINMAX || !flag_trapping_math))
 
+/* Convert ISA bits like POPCNTB to PowerPC processors like POWER5.  */
+#define TARGET_POWER5  TARGET_POPCNTB
+
 /* In switching from using target_flags to using rs6000_isa_flags, the options
machinery creates OPTION_MASK_ instead of MASK_.  The MASK_
options that have not yet been replaced by their OPTION_MASK_
@@ -525,7 +528,7 @@ extern int rs6000_vector_align[];
 
 #define TARGET_EXTRA_BUILTINS  (TARGET_POWERPC64\
 || TARGET_PPC_GPOPT /* 970/power4 */\
-|| TARGET_POPCNTB   /* ISA 2.02 */  \
+|| TARGET_POWER5/* ISA 2.02 */  \
 || TARGET_CMPB  /* ISA 2.05 */  \
 || TARGET_POPCNTD   /* ISA 2.06 */  \
 || TARGET_ALTIVEC   \
@@ -541,9 +544,9 @@ extern int rs6000_vector_align[];
 #define TARGET_FRES(TARGET_HARD_FLOAT && TARGET_PPC_GFXOPT)
 
 #define TARGET_FRE (TARGET_HARD_FLOAT \
-&& (TARGET_POPCNTB || VECTOR_UNIT_VSX_P (DFmode)))
+&& (TARGET_POWER5 || VECTOR_UNIT_VSX_P (DFmode)))
 
-#define TARGET_FRSQRTES(TARGET_HARD_FLOAT && TARGET_POPCNTB \
+#define TARGET_FRSQRTES(TARGET_HARD_FLOAT && TARGET_POWER5 \
 && TARGET_PPC_GFXOPT)
 
 #define TARGET_FRSQRTE (TARGET_HARD_FLOAT \
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 9c718ca2a226..c5bd273be8b3 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs

[gcc(refs/users/meissner/heads/work192)] Change TARGET_FPRND to TARGET_POWER5X.

2025-01-31 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:d092688f7f87f2eff20cbb856bcff5c1295e54d0

commit d092688f7f87f2eff20cbb856bcff5c1295e54d0
Author: Michael Meissner 
Date:   Fri Jan 31 22:21:28 2025 -0500

Change TARGET_FPRND to TARGET_POWER5X.

This patch changes TARGET_POWER5X to TARGET_POWER5.  The -mfprnd switch is 
not
being changed, just the name of the macros used to determine if the PowerPC
processor supports ISA 2.4 (Power5x).

2025-01-31  Michael Meissner  

gcc/

* gcc/config/rs6000/rs6000.cc (rs6000_option_override_internal):
Change TARGET_FPRND to TARGET_POWER5X.
* gcc/config/rs6000/rs6000.h (TARGET_POWERP5X): New macro.
* gcc/config/rs6000/rs6000.md (fmod3): Change TARGET_FPRND to
TARGET_POWER5X.
(remainder3): Likewise.
(fctiwuz_): Likewise.
(ceil2): Likewise.
(floor2): Likewise.
(round2): Likewise.

Diff:
---
 gcc/config/rs6000/rs6000.cc |  4 ++--
 gcc/config/rs6000/rs6000.h  |  1 +
 gcc/config/rs6000/rs6000.md | 14 +++---
 3 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 011ba7c899ec..07f5e58532a5 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -3924,7 +3924,7 @@ rs6000_option_override_internal (bool global_init_p)
 rs6000_isa_flags |= (ISA_2_5_MASKS_SERVER & ~ignore_masks);
   else if (TARGET_CMPB)
 rs6000_isa_flags |= (ISA_2_5_MASKS_EMBEDDED & ~ignore_masks);
-  else if (TARGET_FPRND)
+  else if (TARGET_POWER5X)
 rs6000_isa_flags |= (ISA_2_4_MASKS & ~ignore_masks);
   else if (TARGET_POWER5)
 rs6000_isa_flags |= (ISA_2_2_MASKS & ~ignore_masks);
@@ -3951,7 +3951,7 @@ rs6000_option_override_internal (bool global_init_p)
   rs6000_isa_flags &= ~OPTION_MASK_CRYPTO;
 }
 
-  if (!TARGET_FPRND && TARGET_VSX)
+  if (!TARGET_POWER5X && TARGET_VSX)
 {
   if (rs6000_isa_flags_explicit & OPTION_MASK_FPRND)
/* TARGET_VSX = 1 implies Power 7 and newer */
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index 5ff801c8801d..882a3864ca66 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -501,6 +501,7 @@ extern int rs6000_vector_align[];
 
 /* Convert ISA bits like POPCNTB to PowerPC processors like POWER5.  */
 #define TARGET_POWER5  TARGET_POPCNTB
+#define TARGET_POWER5X TARGET_FPRND
 
 /* In switching from using target_flags to using rs6000_isa_flags, the options
machinery creates OPTION_MASK_ instead of MASK_.  The MASK_
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index c5bd273be8b3..045ce22a03c8 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -5171,7 +5171,7 @@
(use (match_operand:SFDF 1 "gpc_reg_operand"))
(use (match_operand:SFDF 2 "gpc_reg_operand"))]
   "TARGET_HARD_FLOAT
-   && TARGET_FPRND
+   && TARGET_POWER5X
&& flag_unsafe_math_optimizations"
 {
   rtx div = gen_reg_rtx (mode);
@@ -5189,7 +5189,7 @@
(use (match_operand:SFDF 1 "gpc_reg_operand"))
(use (match_operand:SFDF 2 "gpc_reg_operand"))]
   "TARGET_HARD_FLOAT
-   && TARGET_FPRND
+   && TARGET_POWER5X
&& flag_unsafe_math_optimizations"
 {
   rtx div = gen_reg_rtx (mode);
@@ -6689,7 +6689,7 @@
 (define_insn "*friz"
   [(set (match_operand:DF 0 "gpc_reg_operand" "=d,wa")
(float:DF (fix:DI (match_operand:DF 1 "gpc_reg_operand" "d,wa"]
-  "TARGET_HARD_FLOAT && TARGET_FPRND
+  "TARGET_HARD_FLOAT && TARGET_POWER5X
&& flag_unsafe_math_optimizations && !flag_trapping_math && TARGET_FRIZ"
   "@
friz %0,%1
@@ -6817,7 +6817,7 @@
   [(set (match_operand:SFDF 0 "gpc_reg_operand" "=d,wa")
(unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "d,wa")]
 UNSPEC_FRIZ))]
-  "TARGET_HARD_FLOAT && TARGET_FPRND"
+  "TARGET_HARD_FLOAT && TARGET_POWER5X"
   "@
friz %0,%1
xsrdpiz %x0,%x1"
@@ -6827,7 +6827,7 @@
   [(set (match_operand:SFDF 0 "gpc_reg_operand" "=d,wa")
(unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "d,wa")]
 UNSPEC_FRIP))]
-  "TARGET_HARD_FLOAT && TARGET_FPRND"
+  "TARGET_HARD_FLOAT && TARGET_POWER5X"
   "@
frip %0,%1
xsrdpip %x0,%x1"
@@ -6837,7 +6837,7 @@
   [(set (match_operand:SFDF 0 "gpc_reg_operand" "=d,wa")
(unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "d,wa")]
 UNSPEC_FRIM))]
-  "TARGET_HARD_FLOAT && TARGET_FPRND"
+  "TARGET_HARD_FLOAT && TARGET_POWER5X"
   "@
frim %0,%1
xsrdpim %x0,%x1"
@@ -6848,7 +6848,7 @@
   [(set (match_operand:SFDF 0 "gpc_reg_operand" "=")
(unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "")]
 UNSPEC_FRIN))]
-  "TARGET_HARD_FLOAT && TARGET_FPRND"
+  "TARGET_HARD_FLOAT && TARGET_POWER5X"
   "frin %0,%1"
   [(set_attr "type" "fp")])

[gcc(refs/users/meissner/heads/work192)] Change TARGET_CMPB to TARGET_POWER6.

2025-01-31 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:42c2dcb5cc6baa9fc1298b88813f59a1805d0a39

commit 42c2dcb5cc6baa9fc1298b88813f59a1805d0a39
Author: Michael Meissner 
Date:   Fri Jan 31 22:22:10 2025 -0500

Change TARGET_CMPB to TARGET_POWER6.

This patch changes TARGET_CMPB to TARGET_POWER6.  The -mcmpb switch is not 
being
changed, just the name of the macros used to determine if the PowerPC 
processor
supports ISA 2.5 (Power6).

2025-01-31  Michael Meissner  

gcc/

* gcc/config/rs6000/rs6000-builtin.cc (rs6000_builtin_is_supported):
Change TARGET_CMPB to TARGET_POWER6.
* gcc/config/rs6000/rs6000.cc (rs6000_option_override_internal):
Likewise.
(rs6000_rtx_costs): Likewise.
(rs6000_emit_parity): Likewise.
* gcc/config/rs6000/rs6000.h (TARGET_FCFID): Likewise.
(TARGET_LFIWAX): Likewise.
(TARGET_POWER6): New macro.
(TARGET_EXTRA_BUILTINS): Change TARGET_CMPB to TARGET_POWER6.
* gcc/config/rs6000/rs6000.md (enabled attribute): Likewise.
(parity2_cmp): Likewise.
(cmpb3): Likewise.
(copysign3): Likewise.
(copysign3_fcpsgn): Likewise.
(cmpstrnsi): Likewise.
(cmpstrsi): Likewise.

Diff:
---
 gcc/config/rs6000/rs6000-builtin.cc |  4 ++--
 gcc/config/rs6000/rs6000.cc |  8 
 gcc/config/rs6000/rs6000.h  |  7 ---
 gcc/config/rs6000/rs6000.md | 16 
 4 files changed, 18 insertions(+), 17 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-builtin.cc 
b/gcc/config/rs6000/rs6000-builtin.cc
index 4ed2bc1ca89e..dbb8520ab039 100644
--- a/gcc/config/rs6000/rs6000-builtin.cc
+++ b/gcc/config/rs6000/rs6000-builtin.cc
@@ -157,9 +157,9 @@ rs6000_builtin_is_supported (enum rs6000_gen_builtins 
fncode)
 case ENB_P5:
   return TARGET_POWER5;
 case ENB_P6:
-  return TARGET_CMPB;
+  return TARGET_POWER6;
 case ENB_P6_64:
-  return TARGET_CMPB && TARGET_POWERPC64;
+  return TARGET_POWER6 && TARGET_POWERPC64;
 case ENB_P7:
   return TARGET_POPCNTD;
 case ENB_P7_64:
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 07f5e58532a5..d2814364b3d0 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -3922,7 +3922,7 @@ rs6000_option_override_internal (bool global_init_p)
 rs6000_isa_flags |= (ISA_2_6_MASKS_EMBEDDED & ~ignore_masks);
   else if (TARGET_DFP)
 rs6000_isa_flags |= (ISA_2_5_MASKS_SERVER & ~ignore_masks);
-  else if (TARGET_CMPB)
+  else if (TARGET_POWER6)
 rs6000_isa_flags |= (ISA_2_5_MASKS_EMBEDDED & ~ignore_masks);
   else if (TARGET_POWER5X)
 rs6000_isa_flags |= (ISA_2_4_MASKS & ~ignore_masks);
@@ -4797,7 +4797,7 @@ rs6000_option_override_internal (bool global_init_p)
  DERAT mispredict penalty.  However the LVE and STVE altivec instructions
  need indexed accesses and the type used is the scalar type of the element
  being loaded or stored.  */
-TARGET_AVOID_XFORM = (rs6000_tune == PROCESSOR_POWER6 && TARGET_CMPB
+TARGET_AVOID_XFORM = (rs6000_tune == PROCESSOR_POWER6 && TARGET_POWER6
  && !TARGET_ALTIVEC);
 
   /* Set the -mrecip options.  */
@@ -22377,7 +22377,7 @@ rs6000_rtx_costs (rtx x, machine_mode mode, int 
outer_code,
   return false;
 
 case PARITY:
-  *total = COSTS_N_INSNS (TARGET_CMPB ? 2 : 6);
+  *total = COSTS_N_INSNS (TARGET_POWER6 ? 2 : 6);
   return false;
 
 case NOT:
@@ -23204,7 +23204,7 @@ rs6000_emit_parity (rtx dst, rtx src)
   tmp = gen_reg_rtx (mode);
 
   /* Use the PPC ISA 2.05 prtyw/prtyd instruction if we can.  */
-  if (TARGET_CMPB)
+  if (TARGET_POWER6)
 {
   if (mode == SImode)
{
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index 882a3864ca66..62e1662d078a 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -449,12 +449,12 @@ extern int rs6000_vector_align[];
 #define TARGET_FCFID   (TARGET_POWERPC64   \
 || TARGET_PPC_GPOPT/* 970/power4 */\
 || TARGET_POWER5   /* ISA 2.02 */  \
-|| TARGET_CMPB /* ISA 2.05 */  \
+|| TARGET_POWER6   /* ISA 2.05 */  \
 || TARGET_POPCNTD) /* ISA 2.06 */
 
 #define TARGET_FCTIDZ  TARGET_FCFID
 #define TARGET_STFIWX  TARGET_PPC_GFXOPT
-#define TARGET_LFIWAX  TARGET_CMPB
+#define TARGET_LFIWAX  TARGET_POWER6
 #define TARGET_LFIWZX  TARGET_POPCNTD
 #define TARGET_FCFIDS  TARGET_POPCNTD
 #define TARGET_FCFIDU  TARGET_POPCNTD
@@ -502,6 +502,7 @@ extern int rs6000_vector_align[];
 /* Convert ISA bits like POPCNTB to PowerPC processors like POWER5.  */
 #define TARGET_POWER5  TARGET_POPCNTB
 #define TARGET_POWER5X TARGET_FPRND
+#define TARGET_POWER6

[gcc(refs/users/meissner/heads/work192)] Change TARGET_POPCNTD to TARGET_POWER7.

2025-01-31 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:24cf922852a755bbff858b0ebdea14bd6fb862f5

commit 24cf922852a755bbff858b0ebdea14bd6fb862f5
Author: Michael Meissner 
Date:   Fri Jan 31 22:22:51 2025 -0500

Change TARGET_POPCNTD to TARGET_POWER7.

This patch changes TARGET_POPCNTD to TARGET_POWER7.  The -mpopcntd switch 
is not
being changed, just the name of the macros used to determine if the PowerPC
processor supports ISA 2.6 (Power7).

2025-01-31  Michael Meissner  

gcc/

* gcc/config/rs6000/dfp.md (cmp_internal1): Change 
TARGET_POPCNTD
to TARGET_POWER7.
* gcc/config/rs6000/rs6000-builtin.cc (rs6000_builtin_is_supported):
Likewise.
* gcc/config/rs6000/rs6000-string.cc (expand_block_compare): 
Likewise.
* gcc/config/rs6000/rs6000.cc (rs6000_hard_regno_mode_ok_uncached):
Likewise.
(rs6000_option_override_internal): Likewise.
(rs6000_rtx_costs): Likewise.
* gcc/config/rs6000/rs6000.h (TARGET_LDBRX): Likewise.
(TARGET_FCFID): Likewise.
(TARGET_LFIWZX): Likewise.
(TARGET_FCFIDS): Likewise.
(TARGET_FCFIDU): Likewise.
(TARGET_FCFIDUS): Likewise.
(TARGET_FCTIDUZ): Likewise.
(TARGET_FCTIWUZ): Likewise.
(TARGET_FCTIDUZ): Likewise.
(TARGET_POWER7): New macro.
(TARGET_EXTRA_BUILTINS): Change TARGET_POPCNTD to TARGET_POWER7.
(CTZ_DEFINED_VALUE_AT_ZERO): Likewise.
* gcc/config/rs6000/rs6000.md (enabled attribute): Likewise.
(lrintsi2): Likewise.
(lrintsi): Likewise.
(lrintsi_di): Likewise.
(cmpmemsi): Likewise.
(bpermd_): Likewise.
(addg6s): Likewise.
(cdtbcd): Likewise.
(cbcdtd): Likewise.
(div_): Likewise.

Diff:
---
 gcc/config/rs6000/dfp.md|  2 +-
 gcc/config/rs6000/rs6000-builtin.cc |  4 ++--
 gcc/config/rs6000/rs6000-string.cc  |  2 +-
 gcc/config/rs6000/rs6000.cc |  8 
 gcc/config/rs6000/rs6000.h  | 21 +++--
 gcc/config/rs6000/rs6000.md | 20 ++--
 6 files changed, 29 insertions(+), 28 deletions(-)

diff --git a/gcc/config/rs6000/dfp.md b/gcc/config/rs6000/dfp.md
index 59fa66ae15c8..5919149682b2 100644
--- a/gcc/config/rs6000/dfp.md
+++ b/gcc/config/rs6000/dfp.md
@@ -214,7 +214,7 @@
 (define_insn "floatdidd2"
   [(set (match_operand:DD 0 "gpc_reg_operand" "=d")
(float:DD (match_operand:DI 1 "gpc_reg_operand" "d")))]
-  "TARGET_DFP && TARGET_POPCNTD"
+  "TARGET_DFP && TARGET_POWER7"
   "dcffix %0,%1"
   [(set_attr "type" "dfp")])
 
diff --git a/gcc/config/rs6000/rs6000-builtin.cc 
b/gcc/config/rs6000/rs6000-builtin.cc
index dbb8520ab039..2366b2aee00a 100644
--- a/gcc/config/rs6000/rs6000-builtin.cc
+++ b/gcc/config/rs6000/rs6000-builtin.cc
@@ -161,9 +161,9 @@ rs6000_builtin_is_supported (enum rs6000_gen_builtins 
fncode)
 case ENB_P6_64:
   return TARGET_POWER6 && TARGET_POWERPC64;
 case ENB_P7:
-  return TARGET_POPCNTD;
+  return TARGET_POWER7;
 case ENB_P7_64:
-  return TARGET_POPCNTD && TARGET_POWERPC64;
+  return TARGET_POWER7 && TARGET_POWERPC64;
 case ENB_P8:
   return TARGET_POWER8;
 case ENB_P8V:
diff --git a/gcc/config/rs6000/rs6000-string.cc 
b/gcc/config/rs6000/rs6000-string.cc
index 3d2911ca08a0..703f77fa0bf1 100644
--- a/gcc/config/rs6000/rs6000-string.cc
+++ b/gcc/config/rs6000/rs6000-string.cc
@@ -1949,7 +1949,7 @@ bool
 expand_block_compare (rtx operands[])
 {
   /* TARGET_POPCNTD is already guarded at expand cmpmemsi.  */
-  gcc_assert (TARGET_POPCNTD);
+  gcc_assert (TARGET_POWER7);
 
   /* For P8, this case is complicated to handle because the subtract
  with carry instructions do not generate the 64-bit carry and so
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index d2814364b3d0..1bba77244c25 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -1924,7 +1924,7 @@ rs6000_hard_regno_mode_ok_uncached (int regno, 
machine_mode mode)
  if(GET_MODE_SIZE (mode) == UNITS_PER_FP_WORD)
return 1;
 
- if (TARGET_POPCNTD && mode == SImode)
+ if (TARGET_POWER7 && mode == SImode)
return 1;
 
  if (TARGET_P9_VECTOR && (mode == QImode || mode == HImode))
@@ -3918,7 +3918,7 @@ rs6000_option_override_internal (bool global_init_p)
 rs6000_isa_flags |= (ISA_2_7_MASKS_SERVER & ~ignore_masks);
   else if (TARGET_VSX)
 rs6000_isa_flags |= (ISA_2_6_MASKS_SERVER & ~ignore_masks);
-  else if (TARGET_POPCNTD)
+  else if (TARGET_POWER7)
 rs6000_isa_flags |= (ISA_2_6_MASKS_EMBEDDED & ~ignore_masks);
   else if (TARGET_DFP)
 rs6000_isa_flags |= (ISA_2_5_MASKS_SERVER & ~ignore_masks);
@@ -4131,7 +4131,7 @@ rs6000_option_override_internal (bool global_init_p)
   else if (TARGET_LONG_DOUBLE_128)

[gcc(refs/users/meissner/heads/work192)] Add support for -mcpu=future

2025-01-31 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:d60b9a8fad9298f2edadb2d10bac9c308c3a16ba

commit d60b9a8fad9298f2edadb2d10bac9c308c3a16ba
Author: Michael Meissner 
Date:   Fri Jan 31 22:25:16 2025 -0500

Add support for -mcpu=future

This patch adds the support that can be used in developing GCC support for
future PowerPC processors.

2025-01-31  Michael Meissner  

* config.gcc (powerpc*-*-*): Add support for --with-cpu=future.
* config/rs6000/aix71.h (ASM_CPU_SPEC): Add support for 
-mcpu=future.
* config/rs6000/aix72.h (ASM_CPU_SPEC): Likewise.
* config/rs6000/aix73.h (ASM_CPU_SPEC): Likewise.
* config/rs6000/driver-rs6000.cc (asm_names): Likewise.
* config/rs6000/rs6000-c.cc (rs6000_target_modify_macros): If
-mcpu=future, define _ARCH_FUTURE.
* config/rs6000/rs6000-cpus.def (FUTURE_MASKS_SERVER): New macro.
(POWERPC_MASKS): Add OPTION_MASK_FUTURE.
(future cpu): Define.
* config/rs6000/rs6000-opts.h (enum processor_type): Add
PROCESSOR_FUTURE.
* config/rs6000/rs6000-tables.opt: Regenerate.
* config/rs6000/rs6000.cc (power10_cost): Update comment.
(get_arch_flags): Add support for future processor.
(rs6000_option_override_internal): Likewise.
(rs6000_machine_from_flags): Likewise.
(rs6000_reassociation_width): Likewise.
(rs6000_adjust_cost): Likewise.
(rs6000_issue_rate): Likewise.
(rs6000_sched_reorder): Likewise.
(rs6000_sched_reorder2): Likewise.
(rs6000_register_move_cost): Likewise.
(rs6000_opt_masks): Add -mfuture.
* config/rs6000/rs6000.h (ASM_CPU_SPEC): Likewise.
* config/rs6000/rs6000.md (cpu attribute): Likewise.
* config/rs6000/rs6000.opt (-mfuture): New internal option.

Diff:
---
 gcc/config.gcc  |  4 ++--
 gcc/config/rs6000/aix71.h   |  1 +
 gcc/config/rs6000/aix72.h   |  1 +
 gcc/config/rs6000/aix73.h   |  1 +
 gcc/config/rs6000/driver-rs6000.cc  |  2 ++
 gcc/config/rs6000/rs6000-c.cc   |  2 ++
 gcc/config/rs6000/rs6000-cpus.def   |  5 +
 gcc/config/rs6000/rs6000-opts.h |  1 +
 gcc/config/rs6000/rs6000-tables.opt | 11 +++
 gcc/config/rs6000/rs6000.cc | 30 ++
 gcc/config/rs6000/rs6000.h  |  1 +
 gcc/config/rs6000/rs6000.md |  2 +-
 gcc/config/rs6000/rs6000.opt|  6 ++
 13 files changed, 52 insertions(+), 15 deletions(-)

diff --git a/gcc/config.gcc b/gcc/config.gcc
index 9e167f7f00d5..46d08c16b316 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -536,7 +536,7 @@ powerpc*-*-*)
extra_headers="${extra_headers} ppu_intrinsics.h spu2vmx.h vec_types.h 
si2vmx.h"
extra_headers="${extra_headers} amo.h"
case x$with_cpu in
-   
xpowerpc64|xdefault64|x6[23]0|x970|xG5|xpower[3456789]|xpower1[01]|xpower6x|xrs64a|xcell|xa2|xe500mc64|xe5500|xe6500)
+   
xpowerpc64|xdefault64|x6[23]0|x970|xG5|xpower[3456789]|xpower1[01]|xpower6x|xrs64a|xcell|xa2|xe500mc64|xe5500|xe6500|xfuture)
cpu_is_64bit=yes
;;
esac
@@ -5683,7 +5683,7 @@ case "${target}" in
tm_defines="${tm_defines} CONFIG_PPC405CR"
eval "with_$which=405"
;;
-   "" | common | native \
+   "" | common | native | future \
| power[3456789] | power1[01] | power5+ | power6x \
| powerpc | powerpc64 | powerpc64le \
| rs64 \
diff --git a/gcc/config/rs6000/aix71.h b/gcc/config/rs6000/aix71.h
index 2b21dd7cd1e0..77651f5ea309 100644
--- a/gcc/config/rs6000/aix71.h
+++ b/gcc/config/rs6000/aix71.h
@@ -79,6 +79,7 @@ do {  
\
 #undef ASM_CPU_SPEC
 #define ASM_CPU_SPEC \
 "%{mcpu=native: %(asm_cpu_native); \
+  mcpu=future: -mfuture; \
   mcpu=power11: -mpwr11; \
   mcpu=power10: -mpwr10; \
   mcpu=power9: -mpwr9; \
diff --git a/gcc/config/rs6000/aix72.h b/gcc/config/rs6000/aix72.h
index 53c0bde5ad4a..652f60c7f494 100644
--- a/gcc/config/rs6000/aix72.h
+++ b/gcc/config/rs6000/aix72.h
@@ -79,6 +79,7 @@ do {  
\
 #undef ASM_CPU_SPEC
 #define ASM_CPU_SPEC \
 "%{mcpu=native: %(asm_cpu_native); \
+  mcpu=future: -mfuture; \
   mcpu=power11: -mpwr11; \
   mcpu=power10: -mpwr10; \
   mcpu=power9: -mpwr9; \
diff --git a/gcc/config/rs6000/aix73.h b/gcc/config/rs6000/aix73.h
index c7639368a264..3c66ac1d9171 100644
--- a/gcc/config/rs6000/aix73.h
+++ b/gcc/config/rs6000/aix73.h
@@ -79,6 +79,7 @@ do {  
\
 #undef ASM_CPU_SPEC
 #define ASM_CPU_SPEC \
 "%{mcpu=nati

[gcc(refs/users/meissner/heads/work192)] Add -mcpu=future tuning support.

2025-01-31 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:132de401ee43ecc57c2011e41217c761f5b01082

commit 132de401ee43ecc57c2011e41217c761f5b01082
Author: Michael Meissner 
Date:   Fri Jan 31 22:26:01 2025 -0500

Add -mcpu=future tuning support.

This patch makes -mtune=future use the same tuning decision as 
-mtune=power11.

2025-01-31  Michael Meissner  

gcc/

* config/rs6000/power10.md (all reservations): Add future as an
alterntive to power10 and power11.

Diff:
---
 gcc/config/rs6000/power10.md | 145 ++-
 1 file changed, 73 insertions(+), 72 deletions(-)

diff --git a/gcc/config/rs6000/power10.md b/gcc/config/rs6000/power10.md
index fd31b16b3314..bdd7e58145ba 100644
--- a/gcc/config/rs6000/power10.md
+++ b/gcc/config/rs6000/power10.md
@@ -1,4 +1,5 @@
-;; Scheduling description for the IBM Power10 and Power11 processors.
+;; Scheduling description for the IBM Power10, Power11, and
+;; potential future processors.
 ;; Copyright (C) 2020-2025 Free Software Foundation, Inc.
 ;;
 ;; Contributed by Pat Haugen (pthau...@us.ibm.com).
@@ -97,12 +98,12 @@
(eq_attr "update" "no")
(eq_attr "size" "!128")
(eq_attr "prefixed" "no")
-   (eq_attr "cpu" "power10,power11"))
+   (eq_attr "cpu" "power10,power11,future"))
   "DU_any_power10,LU_power10")
 
 (define_insn_reservation "power10-fused-load" 4
   (and (eq_attr "type" "fused_load_cmpi,fused_addis_load,fused_load_load")
-   (eq_attr "cpu" "power10,power11"))
+   (eq_attr "cpu" "power10,power11,future"))
   "DU_even_power10,LU_power10")
 
 (define_insn_reservation "power10-prefixed-load" 4
@@ -110,13 +111,13 @@
(eq_attr "update" "no")
(eq_attr "size" "!128")
(eq_attr "prefixed" "yes")
-   (eq_attr "cpu" "power10,power11"))
+   (eq_attr "cpu" "power10,power11,future"))
   "DU_even_power10,LU_power10")
 
 (define_insn_reservation "power10-load-update" 4
   (and (eq_attr "type" "load")
(eq_attr "update" "yes")
-   (eq_attr "cpu" "power10,power11"))
+   (eq_attr "cpu" "power10,power11,future"))
   "DU_even_power10,LU_power10+SXU_power10")
 
 (define_insn_reservation "power10-fpload-double" 4
@@ -124,7 +125,7 @@
(eq_attr "update" "no")
(eq_attr "size" "64")
(eq_attr "prefixed" "no")
-   (eq_attr "cpu" "power10,power11"))
+   (eq_attr "cpu" "power10,power11,future"))
   "DU_any_power10,LU_power10")
 
 (define_insn_reservation "power10-prefixed-fpload-double" 4
@@ -132,14 +133,14 @@
(eq_attr "update" "no")
(eq_attr "size" "64")
(eq_attr "prefixed" "yes")
-   (eq_attr "cpu" "power10,power11"))
+   (eq_attr "cpu" "power10,power11,future"))
   "DU_even_power10,LU_power10")
 
 (define_insn_reservation "power10-fpload-update-double" 4
   (and (eq_attr "type" "fpload")
(eq_attr "update" "yes")
(eq_attr "size" "64")
-   (eq_attr "cpu" "power10,power11"))
+   (eq_attr "cpu" "power10,power11,future"))
   "DU_even_power10,LU_power10+SXU_power10")
 
 ; SFmode loads are cracked and have additional 3 cycles over DFmode
@@ -148,27 +149,27 @@
   (and (eq_attr "type" "fpload")
(eq_attr "update" "no")
(eq_attr "size" "32")
-   (eq_attr "cpu" "power10,power11"))
+   (eq_attr "cpu" "power10,power11,future"))
   "DU_even_power10,LU_power10")
 
 (define_insn_reservation "power10-fpload-update-single" 7
   (and (eq_attr "type" "fpload")
(eq_attr "update" "yes")
(eq_attr "size" "32")
-   (eq_attr "cpu" "power10,power11"))
+   (eq_attr "cpu" "power10,power11,future"))
   "DU_even_power10,LU_power10+SXU_power10")
 
 (define_insn_reservation "power10-vecload" 4
   (and (eq_attr "type" "vecload")
(eq_attr "size" "!256")
-   (eq_attr "cpu" "power10,power11"))
+   (eq_attr "cpu" "power10,power11,future"))
   "DU_any_power10,LU_power10")
 
 ; lxvp
 (define_insn_reservation "power10-vecload-pair" 4
   (and (eq_attr "type" "vecload")
(eq_attr "size" "256")
-   (eq_attr "cpu" "power10,power11"))
+   (eq_attr "cpu" "power10,power11,future"))
   "DU_even_power10,LU_power10+SXU_power10")
 
 ; Store Unit
@@ -178,12 +179,12 @@
(eq_attr "prefixed" "no")
(eq_attr "size" "!128")
(eq_attr "size" "!256")
-   (eq_attr "cpu" "power10,power11"))
+   (eq_attr "cpu" "power10,power11,future"))
   "DU_any_power10,STU_power10")
 
 (define_insn_reservation "power10-fused-store" 0
   (and (eq_attr "type" "fused_store_store")
-   (eq_attr "cpu" "power10,power11"))
+   (eq_attr "cpu" "power10,power11,future"))
   "DU_even_power10,STU_power10")
 
 (define_insn_reservation "power10-prefixed-store" 0
@@ -191,52 +192,52 @@
(eq_attr "prefixed" "yes")
(eq_attr "size" "!128")
(eq_attr "size" "!256")
-   (eq_attr "cpu" "power10,power11"))
+   (eq_attr "cpu" "power10,power11,future"))
   "DU_even_power10,STU_power10")
 
 ; Update forms have 2 cycle lat

[gcc(refs/users/meissner/heads/work192)] Change TARGET_MODULO to TARGET_POWER9.

2025-01-31 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:952767b509d350eba3477e8a6c345a09a709d3b7

commit 952767b509d350eba3477e8a6c345a09a709d3b7
Author: Michael Meissner 
Date:   Fri Jan 31 22:23:29 2025 -0500

Change TARGET_MODULO to TARGET_POWER9.

This patch changes TARGET_MODULO to TARGET_POWER9.  The -mmodulo switch is 
not
being changed, just the name of the macros used to determine if the PowerPC
processor supports ISA 3.0 (Power9).

2025-01-31  Michael Meissner  

gcc/

* gcc/config/rs6000/rs6000-builtin.cc (rs6000_builtin_is_supported):
Change TARGET_MODULO to TARGET_POWER9.
* gcc/config/rs6000/rs6000.cc (rs6000_option_override_internal):
Likewise.
* gcc/config/rs6000/rs6000.h (TARGET_CTZ): Likewise.
(TARGET_EXTSWSLI): Likewise.
(TARGET_MADDLD): Likewise.
(TARGET_POWER9): New macro.
* gcc/config/rs6000/rs6000.md (enabled attribute): Change 
TARGET_MODULO
to TARGET_POWER9.
(mod3): Likewise.
(umod3): Likewise.
(divide/modulo peephole2): Likewise.

Diff:
---
 gcc/config/rs6000/rs6000-builtin.cc |  4 ++--
 gcc/config/rs6000/rs6000.cc |  4 ++--
 gcc/config/rs6000/rs6000.h  |  7 ---
 gcc/config/rs6000/rs6000.md | 14 +++---
 4 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-builtin.cc 
b/gcc/config/rs6000/rs6000-builtin.cc
index 2366b2aee00a..d8ff7cf32dfd 100644
--- a/gcc/config/rs6000/rs6000-builtin.cc
+++ b/gcc/config/rs6000/rs6000-builtin.cc
@@ -169,9 +169,9 @@ rs6000_builtin_is_supported (enum rs6000_gen_builtins 
fncode)
 case ENB_P8V:
   return TARGET_P8_VECTOR;
 case ENB_P9:
-  return TARGET_MODULO;
+  return TARGET_POWER9;
 case ENB_P9_64:
-  return TARGET_MODULO && TARGET_POWERPC64;
+  return TARGET_POWER9 && TARGET_POWERPC64;
 case ENB_P9V:
   return TARGET_P9_VECTOR;
 case ENB_P10:
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 1bba77244c25..06d1bac5aa83 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -3888,7 +3888,7 @@ rs6000_option_override_internal (bool global_init_p)
 
   /* For the newer switches (vsx, dfp, etc.) set some of the older options,
  unless the user explicitly used the -mno- to disable the code.  */
-  if (TARGET_P9_VECTOR || TARGET_MODULO || TARGET_P9_MISC)
+  if (TARGET_P9_VECTOR || TARGET_POWER9 || TARGET_P9_MISC)
 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
   else if (TARGET_P9_MINMAX)
 {
@@ -22358,7 +22358,7 @@ rs6000_rtx_costs (rtx x, machine_mode mode, int 
outer_code,
*total = rs6000_cost->divsi;
}
   /* Add in shift and subtract for MOD unless we have a mod instruction. */
-  if ((!TARGET_MODULO
+  if ((!TARGET_POWER9
   || (RS6000_DISABLE_SCALAR_MODULO && SCALAR_INT_MODE_P (mode)))
 && (code == MOD || code == UMOD))
*total += COSTS_N_INSNS (2);
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index 856d268d2d27..caf8cddf905e 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -463,9 +463,9 @@ extern int rs6000_vector_align[];
 #define TARGET_FCTIWUZ TARGET_POWER7
 /* Only powerpc64 and powerpc476 support fctid.  */
 #define TARGET_FCTID   (TARGET_POWERPC64 || rs6000_cpu == PROCESSOR_PPC476)
-#define TARGET_CTZ TARGET_MODULO
-#define TARGET_EXTSWSLI(TARGET_MODULO && TARGET_POWERPC64)
-#define TARGET_MADDLD  TARGET_MODULO
+#define TARGET_CTZ TARGET_POWER9
+#define TARGET_EXTSWSLI(TARGET_POWER9 && TARGET_POWERPC64)
+#define TARGET_MADDLD  TARGET_POWER9
 
 /* TARGET_DIRECT_MOVE is redundant to TARGET_P8_VECTOR, so alias it to that.  
*/
 #define TARGET_DIRECT_MOVE TARGET_P8_VECTOR
@@ -504,6 +504,7 @@ extern int rs6000_vector_align[];
 #define TARGET_POWER5X TARGET_FPRND
 #define TARGET_POWER6  TARGET_CMPB
 #define TARGET_POWER7  TARGET_POPCNTD
+#define TARGET_POWER9  TARGET_MODULO
 
 /* In switching from using target_flags to using rs6000_isa_flags, the options
machinery creates OPTION_MASK_ instead of MASK_.  The MASK_
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 87ec37a9f8e4..db1b6c2d1164 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -403,7 +403,7 @@
  (const_int 1)
 
  (and (eq_attr "isa" "p9")
- (match_test "TARGET_MODULO"))
+ (match_test "TARGET_POWER9"))
  (const_int 1)
 
  (and (eq_attr "isa" "p9v")
@@ -3457,7 +3457,7 @@
   || INTVAL (operands[2]) <= 0
   || (i = exact_log2 (INTVAL (operands[2]))) < 0)
 {
-  if (!TARGET_MODULO)
+  if (!TARGET_POWER9)
FAIL;
 
   operands[2] = force_reg (mode, operands[2]);
@@ -3491,7 +3491,7 @@
   [(set (match_operand:GPR 0 "gpc_reg_operand" "=&r,r")
 (mod:GPR (match_oper

[gcc(refs/users/meissner/heads/work192)] Do not allow -mvsx to boost processor to power7.

2025-01-31 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:522ffbc52b0da47a4c279911814755418039ddf1

commit 522ffbc52b0da47a4c279911814755418039ddf1
Author: Michael Meissner 
Date:   Fri Jan 31 22:29:06 2025 -0500

Do not allow -mvsx to boost processor to power7.

This patch restructures the code so that -mvsx for example will not silently
convert the processor to power7.  The user must now use -mcpu=power7 or 
higher.
This means if the user does -mvsx and the default processor does not have 
VSX
support, it will be an error.

I have built both big endian and little endian bootstrap compilers and there
were no regressions.

I updated the 2 tests that used -mvsx to raise the cpu to power7, and the 
test
case that checks if -mno-vsx produces the expected warning.

Note, Peter had some questions about one of the tests in the previous 
version of
the patch.  The test is still the same in this patch.  But the code for
preventing -mvsx is different from the previous patch, and I wanted to get 
that
patch for review before stage1 closes.

Can I install this patch on the GCC 15 trunk?

2025-01-31  Michael Meissner  

gcc/

* config/rs6000/rs6000.cc (rs6000_option_override_internal): Check 
if
the user asked for VSX instructions whether the cpu was at least 
power7.

gcc/testsuite/

* gcc.target/powerpc/ppc-target-4.c: Rewrite the test to add 
cpu=power7
when we need to add VSX support.  Add test for adding cpu=power7 
no-vsx
to generate only Altivec instructions.
* gcc.target/powerpc/pr115688.c: Add cpu=power7 in target 
__attribute__
when requesting VSX instructions.
* gcc.target/powerpc/pr87496-1.c: Update options to use
-mdejagnu-cpu=power6 to get the appropriate error message.

Diff:
---
 gcc/config/rs6000/rs6000.cc |  7 +
 gcc/testsuite/gcc.target/powerpc/ppc-target-4.c | 38 +++--
 gcc/testsuite/gcc.target/powerpc/pr115688.c |  3 +-
 gcc/testsuite/gcc.target/powerpc/pr87496-1.c|  2 +-
 4 files changed, 39 insertions(+), 11 deletions(-)

diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 3c0c6e1a74d0..e2f0b8b7de57 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -3862,6 +3862,13 @@ rs6000_option_override_internal (bool global_init_p)
  rs6000_isa_flags &= ~OPTION_MASK_VSX;
  rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
}
+  else if (!TARGET_POWER7)
+   {
+ if (explicit_vsx_p)
+   error ("%<-mvsx%> requires at least %<-mcpu=power%>");
+ rs6000_isa_flags &= ~OPTION_MASK_VSX;
+ rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
+   }
 }
 
   /* If hard-float/altivec/vsx were explicitly turned off then don't allow
diff --git a/gcc/testsuite/gcc.target/powerpc/ppc-target-4.c 
b/gcc/testsuite/gcc.target/powerpc/ppc-target-4.c
index feef76db4618..5e2ecf34f249 100644
--- a/gcc/testsuite/gcc.target/powerpc/ppc-target-4.c
+++ b/gcc/testsuite/gcc.target/powerpc/ppc-target-4.c
@@ -2,7 +2,7 @@
 /* { dg-skip-if "" { powerpc*-*-darwin* } } */
 /* { dg-require-effective-target powerpc_fprs } */
 /* { dg-options "-O2 -ffast-math -mdejagnu-cpu=power5 -mno-altivec 
-mabi=altivec -fno-unroll-loops" } */
-/* { dg-final { scan-assembler-times "vaddfp" 1 } } */
+/* { dg-final { scan-assembler-times "vaddfp" 2 } } */
 /* { dg-final { scan-assembler-times "xvaddsp" 1 } } */
 /* { dg-final { scan-assembler-times "fadds" 1 } } */
 
@@ -18,10 +18,6 @@
 #error "__VSX__ should not be defined."
 #endif
 
-#pragma GCC target("altivec,vsx")
-#include 
-#pragma GCC reset_options
-
 #pragma GCC push_options
 #pragma GCC target("altivec,no-vsx")
 
@@ -33,6 +29,7 @@
 #error "__VSX__ should not be defined."
 #endif
 
+/* Altivec build, generate vaddfp.  */
 void
 av_add (vector float *a, vector float *b, vector float *c)
 {
@@ -40,10 +37,11 @@ av_add (vector float *a, vector float *b, vector float *c)
   unsigned long n = SIZE / 4;
 
   for (i = 0; i < n; i++)
-a[i] = vec_add (b[i], c[i]);
+a[i] = b[i] + c[i];
 }
 
-#pragma GCC target("vsx")
+/* cpu=power7 must be used to enable VSX.  */
+#pragma GCC target("cpu=power7,vsx")
 
 #ifndef __ALTIVEC__
 #error "__ALTIVEC__ should be defined."
@@ -53,6 +51,7 @@ av_add (vector float *a, vector float *b, vector float *c)
 #error "__VSX__ should be defined."
 #endif
 
+/* VSX build on power7, generate xsaddsp.  */
 void
 vsx_add (vector float *a, vector float *b, vector float *c)
 {
@@ -60,11 +59,31 @@ vsx_add (vector float *a, vector float *b, vector float *c)
   unsigned long n = SIZE / 4;
 
   for (i = 0; i < n; i++)
-a[i] = vec_add (b[i], c[i]);
+a[i] = b[i] + c[i];
+}
+
+#pragma GCC target("cpu=power7,no-vsx")
+
+#ifndef __ALTIVEC__
+#error "__ALTIVEC__ should be defined."
+#endif
+
+#ifdef __VSX__
+#error "__VSX__ should not be defined."

[gcc(refs/users/meissner/heads/work192)] Add -mcpu=future tests.

2025-01-31 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:b240938e56fd8c8f325f7957f132eb5dd60f6de5

commit b240938e56fd8c8f325f7957f132eb5dd60f6de5
Author: Michael Meissner 
Date:   Fri Jan 31 22:26:39 2025 -0500

Add -mcpu=future tests.

This patch adds simple tests for -mcpu=future.

2025-01-31  Michael Meissner  

gcc/testsuite/

* gcc.target/powerpc/future-1.c: New test.
* gcc.target/powerpc/future-2.c: Likewise.

Diff:
---
 gcc/testsuite/gcc.target/powerpc/future-1.c | 13 +
 gcc/testsuite/gcc.target/powerpc/future-2.c | 24 
 2 files changed, 37 insertions(+)

diff --git a/gcc/testsuite/gcc.target/powerpc/future-1.c 
b/gcc/testsuite/gcc.target/powerpc/future-1.c
new file mode 100644
index ..f1b940d7bebf
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/future-1.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-mdejagnu-cpu=future -O2" } */
+
+/* Basic check to see if the compiler supports -mcpu=future and if it defines
+   _ARCH_PWR11.  */
+
+#ifndef _ARCH_FUTURE
+#error "-mcpu=future is not supported"
+#endif
+
+void foo (void)
+{
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/future-2.c 
b/gcc/testsuite/gcc.target/powerpc/future-2.c
new file mode 100644
index ..5552cefa3c2e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/future-2.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+/* Check if we can set the future target via a target attribute.  */
+
+__attribute__((__target__("cpu=power9")))
+void foo_p9 (void)
+{
+}
+
+__attribute__((__target__("cpu=power10")))
+void foo_p10 (void)
+{
+}
+
+__attribute__((__target__("cpu=power11")))
+void foo_p11 (void)
+{
+}
+
+__attribute__((__target__("cpu=future")))
+void foo_future (void)
+{
+}

[gcc(refs/users/meissner/heads/work192)] Use vector pair load/store for memcpy with -mcpu=future

2025-01-31 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:fbd931341b1e9512d464c0efec170220084ffecc

commit fbd931341b1e9512d464c0efec170220084ffecc
Author: Michael Meissner 
Date:   Fri Jan 31 22:27:50 2025 -0500

Use vector pair load/store for memcpy with -mcpu=future

In the development for the power10 processor, GCC did not enable using the 
load
vector pair and store vector pair instructions when optimizing things like
memory copy.  This patch enables using those instructions if -mcpu=future is
used.

2025-01-31  Michael Meissner  

gcc/

* config/rs6000/rs6000-cpus.def (ISA_FUTURE_MASKS_SERVER): Enable 
using
load vector pair and store vector pair instructions for memory copy
operations.
(POWERPC_MASKS): Make the bit for enabling using load vector pair 
and
store vector pair operations set and reset when the PowerPC 
processor is
changed.
* gcc/config/rs6000/rs6000.cc (rs6000_machine_from_flags): Disable
-mblock-ops-vector-pair from influcing .machine selection.

gcc/testsuite/

* gcc.target/powerpc/future-3.c: New test.

Diff:
---
 gcc/config/rs6000/rs6000-cpus.def   |  4 +++-
 gcc/config/rs6000/rs6000.cc |  2 +-
 gcc/testsuite/gcc.target/powerpc/future-3.c | 22 ++
 3 files changed, 26 insertions(+), 2 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-cpus.def 
b/gcc/config/rs6000/rs6000-cpus.def
index 228d0b5e7b54..063591f5c094 100644
--- a/gcc/config/rs6000/rs6000-cpus.def
+++ b/gcc/config/rs6000/rs6000-cpus.def
@@ -84,7 +84,8 @@
  | OPTION_MASK_POWER11)
 
 #define FUTURE_MASKS_SERVER(POWER11_MASKS_SERVER   \
-| OPTION_MASK_FUTURE)
+| OPTION_MASK_FUTURE   \
+| OPTION_MASK_BLOCK_OPS_VECTOR_PAIR)
 
 /* Flags that need to be turned off if -mno-vsx.  */
 #define OTHER_VSX_VECTOR_MASKS (OPTION_MASK_EFFICIENT_UNALIGNED_VSX\
@@ -114,6 +115,7 @@
 
 /* Mask of all options to set the default isa flags based on -mcpu=.  */
 #define POWERPC_MASKS  (OPTION_MASK_ALTIVEC\
+| OPTION_MASK_BLOCK_OPS_VECTOR_PAIR\
 | OPTION_MASK_CMPB \
 | OPTION_MASK_CRYPTO   \
 | OPTION_MASK_DFP  \
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index f3586f2f1e17..3c0c6e1a74d0 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -5908,7 +5908,7 @@ rs6000_machine_from_flags (void)
 
   /* Disable the flags that should never influence the .machine selection.  */
   flags &= ~(OPTION_MASK_PPC_GFXOPT | OPTION_MASK_PPC_GPOPT | OPTION_MASK_ISEL
-| OPTION_MASK_ALTIVEC);
+| OPTION_MASK_ALTIVEC | OPTION_MASK_BLOCK_OPS_VECTOR_PAIR);
 
   if ((flags & (FUTURE_MASKS_SERVER & ~ISA_3_1_MASKS_SERVER)) != 0)
 return "future";
diff --git a/gcc/testsuite/gcc.target/powerpc/future-3.c 
b/gcc/testsuite/gcc.target/powerpc/future-3.c
new file mode 100644
index ..afa8b96d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/future-3.c
@@ -0,0 +1,22 @@
+/* 32-bit doesn't generate vector pair instructions.  */
+/* { dg-do compile { target lp64 } } */
+/* { dg-options "-mdejagnu-cpu=future -O2" } */
+
+/* Test to see that memcpy will use load/store vector pair with
+   -mcpu=future.  */
+
+#ifndef SIZE
+#define SIZE 4
+#endif
+
+extern vector double to[SIZE], from[SIZE];
+
+void
+copy (void)
+{
+  __builtin_memcpy (to, from, sizeof (to));
+  return;
+}
+
+/* { dg-final { scan-assembler {\mlxvpx?\M}  } } */
+/* { dg-final { scan-assembler {\mstxvpx?\M} } } */

[gcc(refs/users/meissner/heads/work192)] Add rs6000 architecture masks.

2025-01-31 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:260b199d4cbc2762d162809ce60a38341d4924d2

commit 260b199d4cbc2762d162809ce60a38341d4924d2
Author: Michael Meissner 
Date:   Fri Jan 31 22:29:57 2025 -0500

Add rs6000 architecture masks.

This patch begins the journey to move architecture bits that are not user 
ISA
options from rs6000_isa_flags to a new targt variable rs6000_arch_flags.  
The
intention is to remove switches that are currently isa options, but the user
should not be using this particular option. For example, we want users to 
use
-mcpu=power10 and not just -mpower10.

This patch also changes the target_clones support to use an architecture 
mask
instead of isa bits.

This patch also switches the handling of .machine to use architecture masks 
if
they exist (power4 through power11).  All of the other PowerPCs will 
continue to
use the existing code for setting the .machine option.

I have built both big endian and little endian bootstrap compilers and there
were no regressions.

In addition, I constructed a test case that used every archiecture define 
(like
_ARCH_PWR4, etc.) and I also looked at the .machine directive generated.  I 
ran
this test for all supported combinations of -mcpu, big/little endian, and 
32/64
bit support.  Every single instance generated exactly the same code with the
patches installed compared to the compiler before installing the patches.

The only difference in this patch compared to the first version posted on
November 6th is that I the correct attribution and copyright year (i.e. 
that I
created rs6000-arch.def in 2024).

Can I install this patch on the GCC 15 trunk?

2025-01-31  Michael Meissner  

gcc/

* config/rs6000/default64.h (TARGET_CPU_DEFAULT): Set default cpu 
name.
* config/rs6000/rs6000-arch.def: New file.
* config/rs6000/rs6000.cc (struct clone_map): Switch to using
architecture masks instead of ISA masks.
(rs6000_clone_map): Likewise.
(rs6000_print_isa_options): Add an architecture flags argument, 
change
all callers.
(get_arch_flag): New function.
(rs6000_debug_reg_global): Update rs6000_print_isa_options calls.
(rs6000_option_override_internal): Likewise.
(rs6000_machine_from_flags): Switch to using architecture masks 
instead
of ISA masks.
(struct rs6000_arch_mask): New structure.
(rs6000_arch_masks): New table of architecutre masks and names.
(rs6000_function_specific_save): Save architecture flags.
(rs6000_function_specific_restore): Restore architecture flags.
(rs6000_function_specific_print): Update rs6000_print_isa_options 
calls.
(rs6000_print_options_internal): Add architecture flags options.
(rs6000_clone_priority): Switch to using architecture masks instead 
of
ISA masks.
(rs6000_can_inline_p): Don't allow inling if the callee requires a 
newer
architecture than the caller.
* config/rs6000/rs6000.h: Use rs6000-arch.def to create the 
architecture
masks.
* config/rs6000/rs6000.opt (rs6000_arch_flags): New target variable.
(x_rs6000_arch_flags): New save/restore field for rs6000_arch_flags.

Diff:
---
 gcc/config/rs6000/default64.h |  11 ++
 gcc/config/rs6000/rs6000-arch.def |  49 +
 gcc/config/rs6000/rs6000.cc   | 222 +++---
 gcc/config/rs6000/rs6000.h|  24 +
 gcc/config/rs6000/rs6000.opt  |   8 ++
 5 files changed, 277 insertions(+), 37 deletions(-)

diff --git a/gcc/config/rs6000/default64.h b/gcc/config/rs6000/default64.h
index 7f6001ded852..188f5c1d1378 100644
--- a/gcc/config/rs6000/default64.h
+++ b/gcc/config/rs6000/default64.h
@@ -21,6 +21,7 @@ along with GCC; see the file COPYING3.  If not see
 #define RS6000_CPU(NAME, CPU, FLAGS)
 #include "rs6000-cpus.def"
 #undef RS6000_CPU
+#undef TARGET_CPU_DEFAULT
 
 #if (TARGET_DEFAULT & MASK_LITTLE_ENDIAN)
 #undef TARGET_DEFAULT
@@ -28,10 +29,20 @@ along with GCC; see the file COPYING3.  If not see
| MASK_LITTLE_ENDIAN)
 #undef ASM_DEFAULT_SPEC
 #define ASM_DEFAULT_SPEC "-mpower8"
+#define TARGET_CPU_DEFAULT "power8"
+
 #else
 #undef TARGET_DEFAULT
 #define TARGET_DEFAULT (OPTION_MASK_PPC_GFXOPT | OPTION_MASK_PPC_GPOPT \
| OPTION_MASK_MFCRF | MASK_POWERPC64 | MASK_64BIT)
 #undef ASM_DEFAULT_SPEC
 #define ASM_DEFAULT_SPEC "-mpower4"
+
+#if (TARGET_DEFAULT & MASK_POWERPC64)
+#define TARGET_CPU_DEFAULT "powerpc64"
+
+#else
+#define TARGET_CPU_DEFAULT "powerpc"
+#endif
+
 #endif
diff --git a/gcc/config/rs6000/rs6000-arch.def 
b/gcc/config/rs6000/rs6000-arch.def
new file mode 100644
index ..c0dbc5834333
--- /dev/null
+++ b/gcc/config/rs6000/rs6000-arch.def
@

[gcc(refs/users/meissner/heads/work192)] Use architecture flags for defining _ARCH_PWR macros.

2025-01-31 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:3119762b5557de61f44394b726b48b9bfae62d71

commit 3119762b5557de61f44394b726b48b9bfae62d71
Author: Michael Meissner 
Date:   Fri Jan 31 22:31:14 2025 -0500

Use architecture flags for defining _ARCH_PWR macros.

For the newer architectures, this patch changes GCC to define the 
_ARCH_PWR
macros using the new architecture flags instead of relying on isa options 
like
-mpower10.

The -mpower8-internal, -mpower10, -mpower11, and -mfuture options were 
removed.
The -mpower11 and -mfuture options were removed completely, since they were 
just
added in GCC 15. The other two options were marked as WarnRemoved, and the
various ISA bits were removed.

TARGET_POWER8, TARGET_POWER10, TARGET_POWER11, and TARGET_FUTURE were 
re-defined
to use the architeture bits instead of the ISA bits.

There are other internal isa bits that aren't removed with this patch 
because
the built-in function support uses those bits.

I have built both big endian and little endian bootstrap compilers and there
were no regressions.

Can I install this patch on the GCC 15 trunk?

2025-01-31  Michael Meissner  

gcc/

* config/rs6000/rs6000-c.cc (rs6000_target_modify_macros) Add 
support to
use architecture flags instead of ISA flags for setting most of the
_ARCH_PWR* macros.
(rs6000_cpu_cpp_builtins): Update rs6000_target_modify_macros call.
* config/rs6000/rs6000-cpus.def (ISA_2_7_MASKS_SERVER): Remove
OPTION_MASK_POWER8.
(ISA_3_1_MASKS_SERVER): Remove OPTION_MASK_POWER10.
(POWER11_MASKS_SERVER): Remove OPTION_MASK_POWER11.
(FUTURE_MASKS_SERVER): Remove OPTION_MASK_FUTURE.
(POWERPC_MASKS): Remove OPTION_MASK_POWER8, OPTION_MASK_POWER10,
OPTION_MASK_POWER11, and OPTION_MASK_FUTURE.
* config/rs6000/rs6000-protos.h (rs6000_target_modify_macros): 
Update
declaration.
(rs6000_target_modify_macros_ptr): Likewise.
* config/rs6000/rs6000.cc (rs6000_target_modify_macros_ptr): 
Likewise.
(rs6000_option_override_internal): Use architecture flags instead 
of ISA
flags.
(rs6000_opt_masks): Remove -mpower10, -mpower11, and -mfuture which 
are
no longer in the ISA flags.
(rs6000_pragma_target_parse): Use architecture flags as well as ISA
flags.
* config/rs6000/rs6000.h (TARGET_POWER5): Redefine to use 
architecture
flags.
(TARGET_POWER5X): Likewise.
(TARGET_POWER6): Likewise.
(TARGET_POWER7): Likewise.
(TARGET_POWER8): Likewise.
(TARGET_POWER9): Likewise.
(TARGET_POWER10): New macro.
(TARGET_POWER11): Likewise.
(TARGET_FUTURE): Likewise.
* config/rs6000/rs6000.opt (-mpower8-internal): Remove ISA flag 
bits.
(-mpower10): Likewise.
(-mpower11): Likewise.
(-mfuture): Likewise.

Diff:
---
 gcc/config/rs6000/rs6000-c.cc | 29 -
 gcc/config/rs6000/rs6000-cpus.def | 10 +-
 gcc/config/rs6000/rs6000-protos.h |  5 +++--
 gcc/config/rs6000/rs6000.cc   | 20 +++-
 gcc/config/rs6000/rs6000.h| 19 +--
 gcc/config/rs6000/rs6000.opt  | 17 ++---
 6 files changed, 46 insertions(+), 54 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-c.cc b/gcc/config/rs6000/rs6000-c.cc
index 6757a2477ad1..6d6838735b33 100644
--- a/gcc/config/rs6000/rs6000-c.cc
+++ b/gcc/config/rs6000/rs6000-c.cc
@@ -338,7 +338,8 @@ rs6000_define_or_undefine_macro (bool define_p, const char 
*name)
#pragma GCC target, we need to adjust the macros dynamically.  */
 
 void
-rs6000_target_modify_macros (bool define_p, HOST_WIDE_INT flags)
+rs6000_target_modify_macros (bool define_p, HOST_WIDE_INT flags,
+HOST_WIDE_INT arch_flags)
 {
   if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
 fprintf (stderr,
@@ -411,7 +412,7 @@ rs6000_target_modify_macros (bool define_p, HOST_WIDE_INT 
flags)
summary of the flags associated with particular cpu
definitions.  */
 
-  /* rs6000_isa_flags based options.  */
+  /* rs6000_isa_flags and rs6000_arch_flags based options.  */
   rs6000_define_or_undefine_macro (define_p, "_ARCH_PPC");
   if ((flags & OPTION_MASK_PPC_GPOPT) != 0)
 rs6000_define_or_undefine_macro (define_p, "_ARCH_PPCSQ");
@@ -419,25 +420,27 @@ rs6000_target_modify_macros (bool define_p, HOST_WIDE_INT 
flags)
 rs6000_define_or_undefine_macro (define_p, "_ARCH_PPCGR");
   if ((flags & OPTION_MASK_POWERPC64) != 0)
 rs6000_define_or_undefine_macro (define_p, "_ARCH_PPC64");
-  if ((flags & OPTION_MASK_MFCRF) != 0)
+  if ((flags & OPTION_MASK_POWERPC64) != 0)
+rs6000_define_or_undefine_macro (define_p, "_ARCH_PPC64");

[gcc(refs/users/meissner/heads/work192)] Update ChangeLog.*

2025-01-31 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:7db75b55e69eab6348e9ce7d98d7c226f6d832f7

commit 7db75b55e69eab6348e9ce7d98d7c226f6d832f7
Author: Michael Meissner 
Date:   Fri Jan 31 22:35:12 2025 -0500

Update ChangeLog.*

Diff:
---
 gcc/ChangeLog.meissner | 435 +
 1 file changed, 435 insertions(+)

diff --git a/gcc/ChangeLog.meissner b/gcc/ChangeLog.meissner
index b598e153c514..03d6fd0bf743 100644
--- a/gcc/ChangeLog.meissner
+++ b/gcc/ChangeLog.meissner
@@ -1,5 +1,440 @@
+ Branch work192, patch #31 
+
+Use architecture flags for defining _ARCH_PWR macros.
+
+For the newer architectures, this patch changes GCC to define the _ARCH_PWR
+macros using the new architecture flags instead of relying on isa options like
+-mpower10.
+
+The -mpower8-internal, -mpower10, -mpower11, and -mfuture options were removed.
+The -mpower11 and -mfuture options were removed completely, since they were 
just
+added in GCC 15. The other two options were marked as WarnRemoved, and the
+various ISA bits were removed.
+
+TARGET_POWER8, TARGET_POWER10, TARGET_POWER11, and TARGET_FUTURE were 
re-defined
+to use the architeture bits instead of the ISA bits.
+
+There are other internal isa bits that aren't removed with this patch because
+the built-in function support uses those bits.
+
+I have built both big endian and little endian bootstrap compilers and there
+were no regressions.
+
+Can I install this patch on the GCC 15 trunk?
+
+2025-01-31  Michael Meissner  
+
+gcc/
+
+   * config/rs6000/rs6000-c.cc (rs6000_target_modify_macros) Add support to
+   use architecture flags instead of ISA flags for setting most of the
+   _ARCH_PWR* macros.
+   (rs6000_cpu_cpp_builtins): Update rs6000_target_modify_macros call.
+   * config/rs6000/rs6000-cpus.def (ISA_2_7_MASKS_SERVER): Remove
+   OPTION_MASK_POWER8.
+   (ISA_3_1_MASKS_SERVER): Remove OPTION_MASK_POWER10.
+   (POWER11_MASKS_SERVER): Remove OPTION_MASK_POWER11.
+   (FUTURE_MASKS_SERVER): Remove OPTION_MASK_FUTURE.
+   (POWERPC_MASKS): Remove OPTION_MASK_POWER8, OPTION_MASK_POWER10,
+   OPTION_MASK_POWER11, and OPTION_MASK_FUTURE.
+   * config/rs6000/rs6000-protos.h (rs6000_target_modify_macros): Update
+   declaration.
+   (rs6000_target_modify_macros_ptr): Likewise.
+   * config/rs6000/rs6000.cc (rs6000_target_modify_macros_ptr): Likewise.
+   (rs6000_option_override_internal): Use architecture flags instead of ISA
+   flags.
+   (rs6000_opt_masks): Remove -mpower10, -mpower11, and -mfuture which are
+   no longer in the ISA flags.
+   (rs6000_pragma_target_parse): Use architecture flags as well as ISA
+   flags.
+   * config/rs6000/rs6000.h (TARGET_POWER5): Redefine to use architecture
+   flags.
+   (TARGET_POWER5X): Likewise.
+   (TARGET_POWER6): Likewise.
+   (TARGET_POWER7): Likewise.
+   (TARGET_POWER8): Likewise.
+   (TARGET_POWER9): Likewise.
+   (TARGET_POWER10): New macro.
+   (TARGET_POWER11): Likewise.
+   (TARGET_FUTURE): Likewise.
+   * config/rs6000/rs6000.opt (-mpower8-internal): Remove ISA flag bits.
+   (-mpower10): Likewise.
+   (-mpower11): Likewise.
+   (-mfuture): Likewise.
+
+ Branch work192, patch #30 
+
+Add rs6000 architecture masks.
+
+This patch begins the journey to move architecture bits that are not user ISA
+options from rs6000_isa_flags to a new targt variable rs6000_arch_flags.  The
+intention is to remove switches that are currently isa options, but the user
+should not be using this particular option. For example, we want users to use
+-mcpu=power10 and not just -mpower10.
+
+This patch also changes the target_clones support to use an architecture mask
+instead of isa bits.
+
+This patch also switches the handling of .machine to use architecture masks if
+they exist (power4 through power11).  All of the other PowerPCs will continue 
to
+use the existing code for setting the .machine option.
+
+I have built both big endian and little endian bootstrap compilers and there
+were no regressions.
+
+In addition, I constructed a test case that used every archiecture define (like
+_ARCH_PWR4, etc.) and I also looked at the .machine directive generated.  I ran
+this test for all supported combinations of -mcpu, big/little endian, and 32/64
+bit support.  Every single instance generated exactly the same code with the
+patches installed compared to the compiler before installing the patches.
+
+The only difference in this patch compared to the first version posted on
+November 6th is that I the correct attribution and copyright year (i.e. that I
+created rs6000-arch.def in 2024).
+
+Can I install this patch on the GCC 15 trunk?
+
+2025-01-31  Michael Meissner  
+
+gcc/
+
+   * config/rs6000/default64.h (TARGET_CPU_DEFAULT): Set default cpu name.
+   * config/rs6000/rs6000-arch.def: New file.
+   * conf

[gcc/meissner/heads/work192-bugs] (15 commits) Merge commit 'refs/users/meissner/heads/work192-bugs' of gi

2025-01-31 Thread Michael Meissner via Gcc-cvs

The branch 'meissner/heads/work192-bugs' was updated to point to:

 74095f2f685d... Merge commit 'refs/users/meissner/heads/work192-bugs' of gi

It previously pointed to:

 e11b83e7ccc8... Add ChangeLog.bugs and update REVISION.

Diff:

Summary of changes (added commits):
---

  74095f2... Merge commit 'refs/users/meissner/heads/work192-bugs' of gi
  ece22b3... Add ChangeLog.bugs and update REVISION.
  7db75b5... Update ChangeLog.* (*)
  3119762... Use architecture flags for defining _ARCH_PWR macros. (*)
  260b199... Add rs6000 architecture masks. (*)
  522ffbc... Do not allow -mvsx to boost processor to power7. (*)
  fbd9313... Use vector pair load/store for memcpy with -mcpu=future (*)
  b240938... Add -mcpu=future tests. (*)
  132de40... Add -mcpu=future tuning support. (*)
  d60b9a8... Add support for -mcpu=future (*)
  952767b... Change TARGET_MODULO to TARGET_POWER9. (*)
  24cf922... Change TARGET_POPCNTD to TARGET_POWER7. (*)
  42c2dcb... Change TARGET_CMPB to TARGET_POWER6. (*)
  d092688... Change TARGET_FPRND to TARGET_POWER5X. (*)
  26a0ac5... Change TARGET_POPCNTB to TARGET_POWER5. (*)

(*) This commit already exists in another branch.
Because the reference `refs/users/meissner/heads/work192-bugs' matches
your hooks.email-new-commits-only configuration,
no separate email is sent for this commit.

[gcc(refs/users/meissner/heads/work192-bugs)] Add ChangeLog.bugs and update REVISION.

2025-01-31 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:ece22b39938dfc5a91cf3b188e3a9669311836d8

commit ece22b39938dfc5a91cf3b188e3a9669311836d8
Author: Michael Meissner 
Date:   Fri Jan 31 17:39:44 2025 -0500

Add ChangeLog.bugs and update REVISION.

2025-01-31  Michael Meissner  

gcc/

* ChangeLog.bugs: New file for branch.
* REVISION: Update.

Diff:
---
 gcc/ChangeLog.bugs | 5 +
 gcc/REVISION   | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog.bugs b/gcc/ChangeLog.bugs
new file mode 100644
index ..fa480c0b3325
--- /dev/null
+++ b/gcc/ChangeLog.bugs
@@ -0,0 +1,5 @@
+ Branch work192-bugs, baseline 
+
+2025-01-31   Michael Meissner  
+
+   Clone branch
diff --git a/gcc/REVISION b/gcc/REVISION
index 6126a6968da2..76b5abe64278 100644
--- a/gcc/REVISION
+++ b/gcc/REVISION
@@ -1 +1 @@
-work192 branch
+work192-bugs branch

[gcc(refs/users/meissner/heads/work192-dmf)] Add ChangeLog.dmf and update REVISION.

2025-01-31 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:aae18f82e8407419e451ff15077a2023600c898e

commit aae18f82e8407419e451ff15077a2023600c898e
Author: Michael Meissner 
Date:   Fri Jan 31 17:37:51 2025 -0500

Add ChangeLog.dmf and update REVISION.

2025-01-31  Michael Meissner  

gcc/

* ChangeLog.dmf: New file for branch.
* REVISION: Update.

Diff:
---
 gcc/ChangeLog.dmf | 5 +
 gcc/REVISION  | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog.dmf b/gcc/ChangeLog.dmf
new file mode 100644
index ..8e54036bacfc
--- /dev/null
+++ b/gcc/ChangeLog.dmf
@@ -0,0 +1,5 @@
+ Branch work192-dmf, baseline 
+
+2025-01-31   Michael Meissner  
+
+   Clone branch
diff --git a/gcc/REVISION b/gcc/REVISION
index 6126a6968da2..5189ba6393e2 100644
--- a/gcc/REVISION
+++ b/gcc/REVISION
@@ -1 +1 @@
-work192 branch
+work192-dmf branch

[gcc(refs/users/meissner/heads/work192-dmf)] Merge commit 'refs/users/meissner/heads/work192-dmf' of git+ssh://gcc.gnu.org/git/gcc into me/work19

2025-01-31 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:7b57b2a28716b3e3bf1ae0f4be2dee3924b4f51d

commit 7b57b2a28716b3e3bf1ae0f4be2dee3924b4f51d
Merge: aae18f82e840 c0482b2f27c9
Author: Michael Meissner 
Date:   Fri Jan 31 22:37:43 2025 -0500

Merge commit 'refs/users/meissner/heads/work192-dmf' of 
git+ssh://gcc.gnu.org/git/gcc into me/work192-dmf

Diff:

[gcc(refs/users/meissner/heads/work192-bugs)] Merge commit 'refs/users/meissner/heads/work192-bugs' of git+ssh://gcc.gnu.org/git/gcc into me/work1

2025-01-31 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:74095f2f685d97db599ce3c70bcc888c207ebd18

commit 74095f2f685d97db599ce3c70bcc888c207ebd18
Merge: ece22b39938d e11b83e7ccc8
Author: Michael Meissner 
Date:   Fri Jan 31 22:36:12 2025 -0500

Merge commit 'refs/users/meissner/heads/work192-bugs' of 
git+ssh://gcc.gnu.org/git/gcc into me/work192-bugs

Diff:

[gcc/meissner/heads/work192-dmf] (15 commits) Merge commit 'refs/users/meissner/heads/work192-dmf' of git

2025-01-31 Thread Michael Meissner via Gcc-cvs

The branch 'meissner/heads/work192-dmf' was updated to point to:

 7b57b2a28716... Merge commit 'refs/users/meissner/heads/work192-dmf' of git

It previously pointed to:

 c0482b2f27c9... Add ChangeLog.dmf and update REVISION.

Diff:

Summary of changes (added commits):
---

  7b57b2a... Merge commit 'refs/users/meissner/heads/work192-dmf' of git
  aae18f8... Add ChangeLog.dmf and update REVISION.
  7db75b5... Update ChangeLog.* (*)
  3119762... Use architecture flags for defining _ARCH_PWR macros. (*)
  260b199... Add rs6000 architecture masks. (*)
  522ffbc... Do not allow -mvsx to boost processor to power7. (*)
  fbd9313... Use vector pair load/store for memcpy with -mcpu=future (*)
  b240938... Add -mcpu=future tests. (*)
  132de40... Add -mcpu=future tuning support. (*)
  d60b9a8... Add support for -mcpu=future (*)
  952767b... Change TARGET_MODULO to TARGET_POWER9. (*)
  24cf922... Change TARGET_POPCNTD to TARGET_POWER7. (*)
  42c2dcb... Change TARGET_CMPB to TARGET_POWER6. (*)
  d092688... Change TARGET_FPRND to TARGET_POWER5X. (*)
  26a0ac5... Change TARGET_POPCNTB to TARGET_POWER5. (*)

(*) This commit already exists in another branch.
Because the reference `refs/users/meissner/heads/work192-dmf' matches
your hooks.email-new-commits-only configuration,
no separate email is sent for this commit.

[gcc/meissner/heads/work192-libs] (15 commits) Merge commit 'refs/users/meissner/heads/work192-libs' of gi

2025-01-31 Thread Michael Meissner via Gcc-cvs

The branch 'meissner/heads/work192-libs' was updated to point to:

 6366b843d035... Merge commit 'refs/users/meissner/heads/work192-libs' of gi

It previously pointed to:

 df70209ac4de... Add ChangeLog.libs and update REVISION.

Diff:

Summary of changes (added commits):
---

  6366b84... Merge commit 'refs/users/meissner/heads/work192-libs' of gi
  35ea79d... Add ChangeLog.libs and update REVISION.
  7db75b5... Update ChangeLog.* (*)
  3119762... Use architecture flags for defining _ARCH_PWR macros. (*)
  260b199... Add rs6000 architecture masks. (*)
  522ffbc... Do not allow -mvsx to boost processor to power7. (*)
  fbd9313... Use vector pair load/store for memcpy with -mcpu=future (*)
  b240938... Add -mcpu=future tests. (*)
  132de40... Add -mcpu=future tuning support. (*)
  d60b9a8... Add support for -mcpu=future (*)
  952767b... Change TARGET_MODULO to TARGET_POWER9. (*)
  24cf922... Change TARGET_POPCNTD to TARGET_POWER7. (*)
  42c2dcb... Change TARGET_CMPB to TARGET_POWER6. (*)
  d092688... Change TARGET_FPRND to TARGET_POWER5X. (*)
  26a0ac5... Change TARGET_POPCNTB to TARGET_POWER5. (*)

(*) This commit already exists in another branch.
Because the reference `refs/users/meissner/heads/work192-libs' matches
your hooks.email-new-commits-only configuration,
no separate email is sent for this commit.

[gcc(refs/users/meissner/heads/work192-libs)] Merge commit 'refs/users/meissner/heads/work192-libs' of git+ssh://gcc.gnu.org/git/gcc into me/work1

2025-01-31 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:6366b843d0353dd4aeb54ff1160aa9793ea59fec

commit 6366b843d0353dd4aeb54ff1160aa9793ea59fec
Merge: 35ea79d291b1 df70209ac4de
Author: Michael Meissner 
Date:   Fri Jan 31 22:38:53 2025 -0500

Merge commit 'refs/users/meissner/heads/work192-libs' of 
git+ssh://gcc.gnu.org/git/gcc into me/work192-libs

Diff:

[gcc/meissner/heads/work192-math] (15 commits) Merge commit 'refs/users/meissner/heads/work192-math' of gi

2025-01-31 Thread Michael Meissner via Gcc-cvs

The branch 'meissner/heads/work192-math' was updated to point to:

 ef69a8421a18... Merge commit 'refs/users/meissner/heads/work192-math' of gi

It previously pointed to:

 9923c123baea... Add ChangeLog.math and update REVISION.

Diff:

Summary of changes (added commits):
---

  ef69a84... Merge commit 'refs/users/meissner/heads/work192-math' of gi
  e9357bc... Add ChangeLog.math and update REVISION.
  7db75b5... Update ChangeLog.* (*)
  3119762... Use architecture flags for defining _ARCH_PWR macros. (*)
  260b199... Add rs6000 architecture masks. (*)
  522ffbc... Do not allow -mvsx to boost processor to power7. (*)
  fbd9313... Use vector pair load/store for memcpy with -mcpu=future (*)
  b240938... Add -mcpu=future tests. (*)
  132de40... Add -mcpu=future tuning support. (*)
  d60b9a8... Add support for -mcpu=future (*)
  952767b... Change TARGET_MODULO to TARGET_POWER9. (*)
  24cf922... Change TARGET_POPCNTD to TARGET_POWER7. (*)
  42c2dcb... Change TARGET_CMPB to TARGET_POWER6. (*)
  d092688... Change TARGET_FPRND to TARGET_POWER5X. (*)
  26a0ac5... Change TARGET_POPCNTB to TARGET_POWER5. (*)

(*) This commit already exists in another branch.
Because the reference `refs/users/meissner/heads/work192-math' matches
your hooks.email-new-commits-only configuration,
no separate email is sent for this commit.

[gcc(refs/users/meissner/heads/work192-math)] Add ChangeLog.math and update REVISION.

2025-01-31 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:e9357bcf3af663ff3b75adf4697370b3baa37e6c

commit e9357bcf3af663ff3b75adf4697370b3baa37e6c
Author: Michael Meissner 
Date:   Fri Jan 31 17:43:25 2025 -0500

Add ChangeLog.math and update REVISION.

2025-01-31  Michael Meissner  

gcc/

* ChangeLog.math: New file for branch.
* REVISION: Update.

Diff:
---
 gcc/ChangeLog.math | 5 +
 gcc/REVISION   | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog.math b/gcc/ChangeLog.math
new file mode 100644
index ..a707e6f0391d
--- /dev/null
+++ b/gcc/ChangeLog.math
@@ -0,0 +1,5 @@
+ Branch work192-math, baseline 
+
+2025-01-31   Michael Meissner  
+
+   Clone branch
diff --git a/gcc/REVISION b/gcc/REVISION
index 6126a6968da2..ffe46b08b0bb 100644
--- a/gcc/REVISION
+++ b/gcc/REVISION
@@ -1 +1 @@
-work192 branch
+work192-math branch

[gcc/meissner/heads/work192-sha] (15 commits) Merge commit 'refs/users/meissner/heads/work192-sha' of git

2025-01-31 Thread Michael Meissner via Gcc-cvs

The branch 'meissner/heads/work192-sha' was updated to point to:

 a5c32ecddff0... Merge commit 'refs/users/meissner/heads/work192-sha' of git

It previously pointed to:

 db096f3ecc18... Add ChangeLog.sha and update REVISION.

Diff:

Summary of changes (added commits):
---

  a5c32ec... Merge commit 'refs/users/meissner/heads/work192-sha' of git
  0726603... Add ChangeLog.sha and update REVISION.
  7db75b5... Update ChangeLog.* (*)
  3119762... Use architecture flags for defining _ARCH_PWR macros. (*)
  260b199... Add rs6000 architecture masks. (*)
  522ffbc... Do not allow -mvsx to boost processor to power7. (*)
  fbd9313... Use vector pair load/store for memcpy with -mcpu=future (*)
  b240938... Add -mcpu=future tests. (*)
  132de40... Add -mcpu=future tuning support. (*)
  d60b9a8... Add support for -mcpu=future (*)
  952767b... Change TARGET_MODULO to TARGET_POWER9. (*)
  24cf922... Change TARGET_POPCNTD to TARGET_POWER7. (*)
  42c2dcb... Change TARGET_CMPB to TARGET_POWER6. (*)
  d092688... Change TARGET_FPRND to TARGET_POWER5X. (*)
  26a0ac5... Change TARGET_POPCNTB to TARGET_POWER5. (*)

(*) This commit already exists in another branch.
Because the reference `refs/users/meissner/heads/work192-sha' matches
your hooks.email-new-commits-only configuration,
no separate email is sent for this commit.

[gcc(refs/users/meissner/heads/work192-sha)] Add ChangeLog.sha and update REVISION.

2025-01-31 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:07266030ea291f6b8c0c789b400a3d1cf9f7d228

commit 07266030ea291f6b8c0c789b400a3d1cf9f7d228
Author: Michael Meissner 
Date:   Fri Jan 31 17:41:30 2025 -0500

Add ChangeLog.sha and update REVISION.

2025-01-31  Michael Meissner  

gcc/

* ChangeLog.sha: New file for branch.
* REVISION: Update.

Diff:
---
 gcc/ChangeLog.sha | 5 +
 gcc/REVISION  | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog.sha b/gcc/ChangeLog.sha
new file mode 100644
index ..4bc792425d41
--- /dev/null
+++ b/gcc/ChangeLog.sha
@@ -0,0 +1,5 @@
+ Branch work192-sha, baseline 
+
+2025-01-31   Michael Meissner  
+
+   Clone branch
diff --git a/gcc/REVISION b/gcc/REVISION
index 6126a6968da2..e43b04109ca7 100644
--- a/gcc/REVISION
+++ b/gcc/REVISION
@@ -1 +1 @@
-work192 branch
+work192-sha branch

[gcc(refs/users/meissner/heads/work192-math)] Merge commit 'refs/users/meissner/heads/work192-math' of git+ssh://gcc.gnu.org/git/gcc into me/work1

2025-01-31 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:ef69a8421a1826a77c047b78db6399d9effb3096

commit ef69a8421a1826a77c047b78db6399d9effb3096
Merge: e9357bcf3af6 9923c123baea
Author: Michael Meissner 
Date:   Fri Jan 31 22:40:15 2025 -0500

Merge commit 'refs/users/meissner/heads/work192-math' of 
git+ssh://gcc.gnu.org/git/gcc into me/work192-math

Diff:

[gcc(refs/users/meissner/heads/work192-sha)] Merge commit 'refs/users/meissner/heads/work192-sha' of git+ssh://gcc.gnu.org/git/gcc into me/work19

2025-01-31 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:a5c32ecddff0b5c939f88ca0abde52749011327d

commit a5c32ecddff0b5c939f88ca0abde52749011327d
Merge: 07266030ea29 db096f3ecc18
Author: Michael Meissner 
Date:   Fri Jan 31 22:41:21 2025 -0500

Merge commit 'refs/users/meissner/heads/work192-sha' of 
git+ssh://gcc.gnu.org/git/gcc into me/work192-sha

Diff:

[gcc(refs/users/meissner/heads/work192-test)] Add ChangeLog.test and update REVISION.

2025-01-31 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:99b461fef4a77c21a88141b009d183756d272712

commit 99b461fef4a77c21a88141b009d183756d272712
Author: Michael Meissner 
Date:   Fri Jan 31 17:42:21 2025 -0500

Add ChangeLog.test and update REVISION.

2025-01-31  Michael Meissner  

gcc/

* ChangeLog.test: New file for branch.
* REVISION: Update.

Diff:
---
 gcc/ChangeLog.test | 5 +
 gcc/REVISION   | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog.test b/gcc/ChangeLog.test
new file mode 100644
index ..a8d24725a1e5
--- /dev/null
+++ b/gcc/ChangeLog.test
@@ -0,0 +1,5 @@
+ Branch work192-test, baseline 
+
+2025-01-31   Michael Meissner  
+
+   Clone branch
diff --git a/gcc/REVISION b/gcc/REVISION
index 6126a6968da2..74e20ba666c2 100644
--- a/gcc/REVISION
+++ b/gcc/REVISION
@@ -1 +1 @@
-work192 branch
+work192-test branch

[gcc/meissner/heads/work192-test] (15 commits) Merge commit 'refs/users/meissner/heads/work192-test' of gi

2025-01-31 Thread Michael Meissner via Gcc-cvs

The branch 'meissner/heads/work192-test' was updated to point to:

 a22c9e6f4d7c... Merge commit 'refs/users/meissner/heads/work192-test' of gi

It previously pointed to:

 a6110a7d2c38... Add ChangeLog.test and update REVISION.

Diff:

Summary of changes (added commits):
---

  a22c9e6... Merge commit 'refs/users/meissner/heads/work192-test' of gi
  99b461f... Add ChangeLog.test and update REVISION.
  7db75b5... Update ChangeLog.* (*)
  3119762... Use architecture flags for defining _ARCH_PWR macros. (*)
  260b199... Add rs6000 architecture masks. (*)
  522ffbc... Do not allow -mvsx to boost processor to power7. (*)
  fbd9313... Use vector pair load/store for memcpy with -mcpu=future (*)
  b240938... Add -mcpu=future tests. (*)
  132de40... Add -mcpu=future tuning support. (*)
  d60b9a8... Add support for -mcpu=future (*)
  952767b... Change TARGET_MODULO to TARGET_POWER9. (*)
  24cf922... Change TARGET_POPCNTD to TARGET_POWER7. (*)
  42c2dcb... Change TARGET_CMPB to TARGET_POWER6. (*)
  d092688... Change TARGET_FPRND to TARGET_POWER5X. (*)
  26a0ac5... Change TARGET_POPCNTB to TARGET_POWER5. (*)

(*) This commit already exists in another branch.
Because the reference `refs/users/meissner/heads/work192-test' matches
your hooks.email-new-commits-only configuration,
no separate email is sent for this commit.

[gcc(refs/users/meissner/heads/work192-vpair)] Merge commit 'refs/users/meissner/heads/work192-vpair' of git+ssh://gcc.gnu.org/git/gcc into me/work

2025-01-31 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:0e67b42e5e80c20151f427d90d5d8e8df5ab38a5

commit 0e67b42e5e80c20151f427d90d5d8e8df5ab38a5
Merge: f70a5294efb2 1aed6858e482
Author: Michael Meissner 
Date:   Fri Jan 31 22:43:31 2025 -0500

Merge commit 'refs/users/meissner/heads/work192-vpair' of 
git+ssh://gcc.gnu.org/git/gcc into me/work192-vpair

Diff:

[gcc/meissner/heads/work192-vpair] (15 commits) Merge commit 'refs/users/meissner/heads/work192-vpair' of g

2025-01-31 Thread Michael Meissner via Gcc-cvs

The branch 'meissner/heads/work192-vpair' was updated to point to:

 0e67b42e5e80... Merge commit 'refs/users/meissner/heads/work192-vpair' of g

It previously pointed to:

 1aed6858e482... Add ChangeLog.vpair and update REVISION.

Diff:

Summary of changes (added commits):
---

  0e67b42... Merge commit 'refs/users/meissner/heads/work192-vpair' of g
  f70a529... Add ChangeLog.vpair and update REVISION.
  7db75b5... Update ChangeLog.* (*)
  3119762... Use architecture flags for defining _ARCH_PWR macros. (*)
  260b199... Add rs6000 architecture masks. (*)
  522ffbc... Do not allow -mvsx to boost processor to power7. (*)
  fbd9313... Use vector pair load/store for memcpy with -mcpu=future (*)
  b240938... Add -mcpu=future tests. (*)
  132de40... Add -mcpu=future tuning support. (*)
  d60b9a8... Add support for -mcpu=future (*)
  952767b... Change TARGET_MODULO to TARGET_POWER9. (*)
  24cf922... Change TARGET_POPCNTD to TARGET_POWER7. (*)
  42c2dcb... Change TARGET_CMPB to TARGET_POWER6. (*)
  d092688... Change TARGET_FPRND to TARGET_POWER5X. (*)
  26a0ac5... Change TARGET_POPCNTB to TARGET_POWER5. (*)

(*) This commit already exists in another branch.
Because the reference `refs/users/meissner/heads/work192-vpair' matches
your hooks.email-new-commits-only configuration,
no separate email is sent for this commit.

[gcc(refs/users/meissner/heads/work192-bugs)] Add power9 and power10 float to logical optimizations.

2025-01-31 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:22082a664c1ef0dbfeb07ae4b14d7684e76a

commit 22082a664c1ef0dbfeb07ae4b14d7684e76a
Author: Michael Meissner 
Date:   Fri Jan 31 22:47:25 2025 -0500

Add power9 and power10 float to logical optimizations.

I was answering an email from a co-worker and I pointed him to work I had 
done
for the Power8 era that optimizes the 32-bit float math library in Glibc.  
In
doing so, I discovered with the Power9 and later computers, this 
optimization
is no longer taking place.

The glibc 32-bit floating point math functions have code that looks like:

union u {
  float f;
  uint32_t u32;
};

float
math_foo (float x, unsigned int mask)
{
  union u arg;
  float x2;

  arg.f = x;
  arg.u32 &= mask;

  x2 = arg.f;
  /* ... */
}

On power8 with the optimization it generates:

xscvdpspn 0,1
sldi 9,4,32
mtvsrd 32,9
xxland 1,0,32
xscvspdpn 1,1

I.e., it converts the SFmode to the memory format (instead of the DFmode 
that
is used within the register), converts the mask so that it is in the vector
register in the upper 32-bits, and does a XXLAND (i.e. there is only one 
direct
move from GPR to vector register).  Then after doing this, it converts the
upper 32-bits back to DFmode.

If the XSCVSPDN instruction took the value in the normal 32-bit scalar in a
vector register, we wouldn't have needed the SLDI of the mask.

On power9/power10/power11 it currently generates:

xscvdpspn 0,1
mfvsrwz 2,0
and 2,2,4
mtvsrws 1,2
xscvspdpn 1,1
blr

I.e convert to SFmode representation, move the value to a GPR, do an AND
operation, move the 32-bit value with a splat, and then convert it back to
DFmode format.

With this patch, it now generates:

xscvdpspn 0,1
mtvsrwz 32,2
xxland 32,0,32
xxspltw 1,32,1
xscvspdpn 1,1
blr

I.e. convert to SFmode representation, move the mask to the vector 
register, do
the operation using XXLAND.  Splat the value to get the value in the correct
location, and then convert back to DFmode.

I have built GCC with the patches in this patch set applied on both little 
and
big endian PowerPC systems and there were no regressions.  Can I apply this
patch to GCC 15?

2025-01-31  Michael Meissner  

gcc/

PR target/117487
* config/rs6000/vsx.md (SFmode logical peephoole): Update comments 
in
the original code that supports power8.  Add a new define_peephole2 
to
do the optimization on power9/power10.

Diff:
---
 gcc/config/rs6000/vsx.md| 142 +++-
 gcc/testsuite/gcc.target/powerpc/pr108958.c |   0
 2 files changed, 137 insertions(+), 5 deletions(-)

diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index d84a2a357a31..f47c4e2f7766 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -6282,7 +6282,7 @@
(SFBOOL_MFVSR_A  3) ;; move to gpr src
(SFBOOL_BOOL_D   4) ;; and/ior/xor dest
(SFBOOL_BOOL_A1  5) ;; and/ior/xor arg1
-   (SFBOOL_BOOL_A2  6) ;; and/ior/xor arg1
+   (SFBOOL_BOOL_A2  6) ;; and/ior/xor arg2
(SFBOOL_SHL_D7) ;; shift left dest
(SFBOOL_SHL_A8) ;; shift left arg
(SFBOOL_MTVSR_D  9) ;; move to vecter dest
@@ -6322,18 +6322,18 @@
 ;; GPR, and instead move the integer mask value to the vector register after a
 ;; shift and do the VSX logical operation.
 
-;; The insns for dealing with SFmode in GPR registers looks like:
+;; The insns for dealing with SFmode in GPR registers looks like on power8:
 ;; (set (reg:V4SF reg2) (unspec:V4SF [(reg:SF reg1)] UNSPEC_VSX_CVDPSPN))
 ;;
-;; (set (reg:DI reg3) (unspec:DI [(reg:V4SF reg2)] UNSPEC_P8V_RELOAD_FROM_VSX))
+;; (set (reg:DI reg3) (zero_extend:DI (reg:SI reg2)))
 ;;
-;; (set (reg:DI reg4) (and:DI (reg:DI reg3) (reg:DI reg3)))
+;; (set (reg:DI reg4) (and:SI (reg:SI reg3) (reg:SI mask)))
 ;;
 ;; (set (reg:DI reg5) (ashift:DI (reg:DI reg4) (const_int 32)))
 ;;
 ;; (set (reg:SF reg6) (unspec:SF [(reg:DI reg5)] UNSPEC_P8V_MTVSRD))
 ;;
-;; (set (reg:SF reg6) (unspec:SF [(reg:SF reg6)] UNSPEC_VSX_CVSPDPN))
+;; (set (reg:SF reg7) (unspec:SF [(reg:SF reg6)] UNSPEC_VSX_CVSPDPN))
 
 (define_peephole2
   [(match_scratch:DI SFBOOL_TMP_GPR "r")
@@ -6414,6 +6414,138 @@
   operands[SFBOOL_MTVSR_D_V4SF] = gen_rtx_REG (V4SFmode, regno_mtvsr_d);
 })
 
+;; Constants for

[gcc(refs/users/meissner/heads/work192-bugs)] PR 99293: Optimize splat of a V2DF/V2DI extract with constant element

2025-01-31 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:2ed23706a58de435abe56002e0dced6a4bae6fed

commit 2ed23706a58de435abe56002e0dced6a4bae6fed
Author: Michael Meissner 
Date:   Fri Jan 31 22:46:02 2025 -0500

PR 99293: Optimize splat of a V2DF/V2DI extract with constant element

We had optimizations for splat of a vector extract for the other vector
types, but we missed having one for V2DI and V2DF.  This patch adds a
combiner insn to do this optimization.

In looking at the source, we had similar optimizations for V4SI and V4SF
extract and splats, but we missed doing V2DI/V2DF.

Without the patch for the code:

vector long long splat_dup_l_0 (vector long long v)
{
  return __builtin_vec_splats (__builtin_vec_extract (v, 0));
}

the compiler generates (on a little endian power9):

splat_dup_l_0:
mfvsrld 9,34
mtvsrdd 34,9,9
blr

Now it generates:

splat_dup_l_0:
xxpermdi 34,34,34,3
blr

2025-01-31  Michael Meissner  

gcc/

PR target/99293
* config/rs6000/vsx.md (vsx_splat_extract_): New insn.

gcc/testsuite/

PR target/99293
* gcc.target/powerpc/builtins-1.c: Adjust insn count.
* gcc.target/powerpc/pr99293.c: New test.

Diff:
---
 gcc/config/rs6000/vsx.md  | 18 ++
 gcc/testsuite/gcc.target/powerpc/builtins-1.c |  2 +-
 gcc/testsuite/gcc.target/powerpc/pr99293.c| 22 ++
 3 files changed, 41 insertions(+), 1 deletion(-)

diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index dd3573b80868..d84a2a357a31 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -4798,6 +4798,24 @@
   "lxvdsx %x0,%y1"
   [(set_attr "type" "vecload")])
 
+;; Optimize SPLAT of an extract from a V2DF/V2DI vector with a constant element
+(define_insn "*vsx_splat_extract_"
+  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
+   (vec_duplicate:VSX_D
+(vec_select:
+ (match_operand:VSX_D 1 "vsx_register_operand" "wa")
+ (parallel [(match_operand 2 "const_0_to_1_operand" "n")]]
+  "VECTOR_MEM_VSX_P (mode)"
+{
+  int which_word = INTVAL (operands[2]);
+  if (!BYTES_BIG_ENDIAN)
+which_word = 1 - which_word;
+
+  operands[3] = GEN_INT (which_word ? 3 : 0);
+  return "xxpermdi %x0,%x1,%x1,%3";
+}
+  [(set_attr "type" "vecperm")])
+
 ;; V4SI splat support
 (define_insn "vsx_splat_v4si"
   [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa,wa")
diff --git a/gcc/testsuite/gcc.target/powerpc/builtins-1.c 
b/gcc/testsuite/gcc.target/powerpc/builtins-1.c
index 8410a5fd4319..4e7e5384675f 100644
--- a/gcc/testsuite/gcc.target/powerpc/builtins-1.c
+++ b/gcc/testsuite/gcc.target/powerpc/builtins-1.c
@@ -1035,4 +1035,4 @@ foo156 (vector unsigned short usa)
 /* { dg-final { scan-assembler-times {\mvmrglb\M} 3 } } */
 /* { dg-final { scan-assembler-times {\mvmrgew\M} 4 } } */
 /* { dg-final { scan-assembler-times {\mvsplth|xxsplth\M} 4 } } */
-/* { dg-final { scan-assembler-times {\mxxpermdi\M} 44 } } */
+/* { dg-final { scan-assembler-times {\mxxpermdi\M} 42 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/pr99293.c 
b/gcc/testsuite/gcc.target/powerpc/pr99293.c
new file mode 100644
index ..20adc1f27f65
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr99293.c
@@ -0,0 +1,22 @@
+/* { dg-do compile { target powerpc*-*-* } } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-options "-O2 -mvsx" } */
+
+/* Test for PR 99263, which wants to do:
+   __builtin_vec_splats (__builtin_vec_extract (v, n))
+
+   where v is a V2DF or V2DI vector and n is either 0 or 1.  Previously the
+   compiler would do a direct move to the GPR registers to select the item and 
a
+   direct move from the GPR registers to do the splat.  */
+
+vector long long splat_dup_l_0 (vector long long v)
+{
+  return __builtin_vec_splats (__builtin_vec_extract (v, 0));
+}
+
+vector long long splat_dup_l_1 (vector long long v)
+{
+  return __builtin_vec_splats (__builtin_vec_extract (v, 1));
+}
+
+/* { dg-final { scan-assembler-times "xxpermdi" 2 } } */

[gcc(refs/users/meissner/heads/work192-bugs)] PR target/108958 -- use mtvsrdd to zero extend GPR DImode to VSX TImode

2025-01-31 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:a0239b3c38157e079b899d0dee709802a8c70c01

commit a0239b3c38157e079b899d0dee709802a8c70c01
Author: Michael Meissner 
Date:   Fri Jan 31 22:48:13 2025 -0500

PR target/108958 -- use mtvsrdd to zero extend GPR DImode to VSX TImode

Previously GCC would zero externd a DImode GPR value to TImode by first zero
extending the DImode value into a GPR TImode value, and then do a MTVSRDD to
move this value to a VSX register.

This patch does the move directly, since if the middle argument to MTVSRDD 
is 0,
it does the zero extend.

If the DImode value is already in a vector register, it does a XXSPLTIB and
XXPERMDI to get the value into the bottom 64-bits of the register.

I have built GCC with the patches in this patch set applied on both little 
and
big endian PowerPC systems and there were no regressions.  Can I apply this
patch to GCC 15?

2025-01-31  Michael Meissner  

gcc/

PR target/108598
* gcc/config/rs6000/rs6000.md (zero_extendditi2): New insn.

gcc/testsuite/

PR target/108598
* gcc.target/powerpc/pr108958.c: New test.

Diff:
---
 gcc/config/rs6000/rs6000.md | 46 +
 gcc/testsuite/gcc.target/powerpc/pr108958.c | 27 +
 2 files changed, 73 insertions(+)

diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 4c2bc81caf56..65da0c653304 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -1026,6 +1026,52 @@
(set_attr "dot" "yes")
(set_attr "length" "4,8")])
 
+(define_insn_and_split "zero_extendditi2"
+  [(set (match_operand:TI 0 "gpc_reg_operand" "=r,wa,&wa")
+   (zero_extend:TI
+(match_operand:DI 1 "gpc_reg_operand" "rwa,r,wa")))]
+  "TARGET_P9_VECTOR && TARGET_POWERPC64"
+  "@
+  #
+  mtvsrdd %x0,0,%1
+  #"
+  "&& reload_completed
+   && (int_reg_operand (operands[0], TImode)
+   || vsx_register_operand (operands[1], DImode))"
+  [(set (match_dup 2)
+   (match_dup 3))
+   (set (match_dup 4)
+   (match_dup 5))]
+{
+  rtx op0 = operands[0];
+  rtx op1 = operands[1];
+  int r = reg_or_subregno (op0);
+
+  if (int_reg_operand (op0, TImode))
+{
+  int lo = BYTES_BIG_ENDIAN ? 1 : 0;
+  int hi = 1 - lo;
+
+  operands[2] = gen_rtx_REG (DImode, r + lo);
+  operands[3] = op1;
+  operands[4] = gen_rtx_REG (DImode, r + hi);
+  operands[5] = const0_rtx;
+}
+  else
+{
+  rtx op0_di = gen_rtx_REG (DImode, r);
+  rtx op0_v2di = gen_rtx_REG (V2DImode, r);
+  rtx lo = WORDS_BIG_ENDIAN ? op1 : op0_di;
+  rtx hi = WORDS_BIG_ENDIAN ? op0_di : op1;
+
+  operands[2] = op0_v2di;
+  operands[3] = CONST0_RTX (V2DImode);
+  operands[4] = op0_v2di;
+  operands[5] = gen_rtx_VEC_CONCAT (V2DImode, hi, lo);
+}
+}
+  [(set_attr "type" "*,mtvsr,vecperm")
+   (set_attr "length" "8,*,8")])
 
 (define_insn "extendqi2"
   [(set (match_operand:EXTQI 0 "gpc_reg_operand" "=r,?*v")
diff --git a/gcc/testsuite/gcc.target/powerpc/pr108958.c 
b/gcc/testsuite/gcc.target/powerpc/pr108958.c
index e69de29bb2d1..03eb58d069e7 100644
--- a/gcc/testsuite/gcc.target/powerpc/pr108958.c
+++ b/gcc/testsuite/gcc.target/powerpc/pr108958.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target int128 } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-mdejagnu-cpu=power9 -O2" } */
+
+/* PR target/108958, use mtvsrdd to zero extend gpr to vsx register.  */
+
+void
+gpr_to_vsx (unsigned long long x, __uint128_t *p)
+{
+  /* mtvsrdd vsx,0,gpr.  */
+  __uint128_t y = x;
+  __asm__ (" # %x0" : "+wa" (y));
+  *p = y;
+}
+
+void
+gpr_to_gpr (unsigned long long x, __uint128_t *p)
+{
+  /* mr and li.  */
+  __uint128_t y = x;
+  __asm__ (" # %0" : "+r" (y));
+  *p = y;
+}
+
+/* { dg-final { scan-assembler-times {\mli\M}  1 } } */
+/* { dg-final { scan-assembler-times {\mmtvsrdd .*,0,.*\M} 1 } } */

[gcc(refs/users/meissner/heads/work192-libs)] Add ChangeLog.libs and update REVISION.

2025-01-31 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:35ea79d291b1d10f16008dc86b16f8bb3a835a6d

commit 35ea79d291b1d10f16008dc86b16f8bb3a835a6d
Author: Michael Meissner 
Date:   Fri Jan 31 17:40:39 2025 -0500

Add ChangeLog.libs and update REVISION.

2025-01-31  Michael Meissner  

gcc/

* ChangeLog.libs: New file for branch.
* REVISION: Update.

Diff:
---
 gcc/ChangeLog.libs | 5 +
 gcc/REVISION   | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog.libs b/gcc/ChangeLog.libs
new file mode 100644
index ..11fa38d3c6a6
--- /dev/null
+++ b/gcc/ChangeLog.libs
@@ -0,0 +1,5 @@
+ Branch work192-libs, baseline 
+
+2025-01-31   Michael Meissner  
+
+   Clone branch
diff --git a/gcc/REVISION b/gcc/REVISION
index 6126a6968da2..922a6d91048c 100644
--- a/gcc/REVISION
+++ b/gcc/REVISION
@@ -1 +1 @@
-work192 branch
+work192-libs branch

[gcc(refs/users/meissner/heads/work192-test)] Merge commit 'refs/users/meissner/heads/work192-test' of git+ssh://gcc.gnu.org/git/gcc into me/work1

2025-01-31 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:a22c9e6f4d7cc497883e32e614b92ea5a3fc2bdf

commit a22c9e6f4d7cc497883e32e614b92ea5a3fc2bdf
Merge: 99b461fef4a7 a6110a7d2c38
Author: Michael Meissner 
Date:   Fri Jan 31 22:42:28 2025 -0500

Merge commit 'refs/users/meissner/heads/work192-test' of 
git+ssh://gcc.gnu.org/git/gcc into me/work192-test

Diff:

[gcc(refs/users/meissner/heads/work192-vpair)] Add ChangeLog.vpair and update REVISION.

2025-01-31 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:f70a5294efb2d82387c4ac7c4a6460330d25d273

commit f70a5294efb2d82387c4ac7c4a6460330d25d273
Author: Michael Meissner 
Date:   Fri Jan 31 17:38:45 2025 -0500

Add ChangeLog.vpair and update REVISION.

2025-01-31  Michael Meissner  

gcc/

* ChangeLog.vpair: New file for branch.
* REVISION: Update.

Diff:
---
 gcc/ChangeLog.vpair | 5 +
 gcc/REVISION| 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog.vpair b/gcc/ChangeLog.vpair
new file mode 100644
index ..e63eb38880eb
--- /dev/null
+++ b/gcc/ChangeLog.vpair
@@ -0,0 +1,5 @@
+ Branch work192-vpair, baseline 
+
+2025-01-31   Michael Meissner  
+
+   Clone branch
diff --git a/gcc/REVISION b/gcc/REVISION
index 6126a6968da2..0bc6a3c906da 100644
--- a/gcc/REVISION
+++ b/gcc/REVISION
@@ -1 +1 @@
-work192 branch
+work192-vpair branch

[gcc(refs/users/meissner/heads/work192-bugs)] Fix PR 118541, do not generate unordered fp cmoves for IEEE compares.

2025-01-31 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:844346db42f4de32bde00c281a5630ad2b0f5db7

commit 844346db42f4de32bde00c281a5630ad2b0f5db7
Author: Michael Meissner 
Date:   Fri Jan 31 23:21:52 2025 -0500

Fix PR 118541, do not generate unordered fp cmoves for IEEE compares.

In bug PR target/118541 on power9, power10, and power11 systems, for the
function:

extern double __ieee754_acos (double);

double
__acospi (double x)
{
  double ret = __ieee754_acos (x) / 3.14;
  return __builtin_isgreater (ret, 1.0) ? 1.0 : ret;
}

GCC currently generates the following code:

Power9  Power10 and Power11
==  ===
bl __ieee754_acos   bl __ieee754_acos@notoc
nop plfd 0,.LC0@pcrel
addis 9,2,.LC2@toc@ha   xxspltidp 12,1065353216
addi 1,1,32 addi 1,1,32
lfd 0,.LC2@toc@l(9) ld 0,16(1)
addis 9,2,.LC0@toc@ha   fdiv 0,1,0
ld 0,16(1)  mtlr 0
lfd 12,.LC0@toc@l(9)xscmpgtdp 1,0,12
fdiv 0,1,0  xxsel 1,0,12,1
mtlr 0  blr
xscmpgtdp 1,0,12
xxsel 1,0,12,1
blr

This is because ifcvt.c optimizes the conditional floating point move to 
use the
XSCMPGTDP instruction.

However, the XSCMPGTDP instruction will generate an interrupt if one of the
arguments is a signalling NaN and signalling NaNs can generate an interrupt.
The IEEE comparison functions (isgreater, etc.) require that the comparison 
not
raise an interrupt.

The following patch changes the PowerPC back end so that ifcvt.c will not 
change
the if/then test and move into a conditional move if the comparison is one 
of
the comparisons that do not raise an error with signalling NaNs and -Ofast 
is
not used.  If a normal comparison is used or -Ofast is used, GCC will 
continue
to generate XSCMPGTDP and XXSEL.

For the following code:

double
ordered_compare (double a, double b, double c, double d)
{
  return __builtin_isgreater (a, b) ? c : d;
}

/* Verify normal > does generate xscmpgtdp.  */

double
normal_compare (double a, double b, double c, double d)
{
  return a > b ? c : d;
}

with the following patch, GCC generates the following for power9, power10, 
and
power11:

ordered_compare:
fcmpu 0,1,2
fmr 1,4
bnglr 0
fmr 1,3
blr

normal_compare:
xscmpgtdp 1,1,2
xxsel 1,4,3,1
blr

I have built bootstrap compilers on big endian power9 systems and little 
endian
power9/power10 systems and there were no regressions.  Can I check this 
patch
into the GCC trunk, and after a waiting period, can I check this into the 
active
older branches?

2025-01-31  Michael Meissner  

gcc/

PR target/118541
* config/rs6000/rs6000-protos.h (REVERSE_COND_ORDERED_OK): New 
macro.
(REVERSE_COND_NO_ORDERED): Likewise.
(rs6000_reverse_condition): Add argument.
* config/rs6000/rs6000.cc (rs6000_reverse_condition): Do not allow
ordered comparisons to be reversed for floating point cmoves.
(rs6000_emit_sCOND): Adjust rs6000_reverse_condition call.
* config/rs6000/rs6000.h (REVERSE_CONDITION): Likewise.
* config/rs6000/rs6000.md (reverse_branch_comparison): Name insn.
Adjust rs6000_reverse_condition call.

gcc/testsuite/

PR target/118541
* gcc.target/powerpc/pr118541.c: New test.

Diff:
---
 gcc/config/rs6000/rs6000-protos.h   |  6 +++-
 gcc/config/rs6000/rs6000.cc | 28 +--
 gcc/config/rs6000/rs6000.h  | 10 +--
 gcc/config/rs6000/rs6000.md | 24 ++--
 gcc/testsuite/gcc.target/powerpc/pr118541.c | 43 +
 5 files changed, 91 insertions(+), 20 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-protos.h 
b/gcc/config/rs6000/rs6000-protos.h
index 4619142d197b..112332660d3b 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -114,8 +114,12 @@ extern const char *rs6000_sibcall_template (rtx *, 
unsigned int);
 extern const char *rs6000_indirect_call_template (rtx *, unsigned int);
 extern const char *rs6000_indirect_sibcall_template (rtx *, unsigned int);
 extern const char *rs60

[gcc(refs/users/meissner/heads/work192-dmf)] RFC2653-PowerPC: Add support for 1, 024 bit DMR registers.

2025-01-31 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:7769302f9e1d4f3e24ea5cb1fb2c7520820836da

commit 7769302f9e1d4f3e24ea5cb1fb2c7520820836da
Author: Michael Meissner 
Date:   Fri Jan 31 23:28:07 2025 -0500

RFC2653-PowerPC: Add support for 1,024 bit DMR registers.

This patch is a prelimianry patch to add the full 1,024 bit dense math 
register
(DMRs) for -mcpu=future.  The MMA 512-bit accumulators map onto the top of 
the
DMR register.

This patch only adds the new 1,024 bit register support.  It does not add
support for any instructions that need 1,024 bit registers instead of 512 
bit
registers.

I used the new mode 'TDOmode' to be the opaque mode used for 1,024 bit
registers.  The 'wD' constraint added in previous patches is used for these
registers.  I added support to do load and store of DMRs via the VSX 
registers,
since there are no load/store dense math instructions.  I added the new 
keyword
'__dmr' to create 1,024 bit types that can be loaded into DMRs.  At 
present, I
don't have aliases for __dmr512 and __dmr1024 that we've discussed 
internally.

The patches have been tested on both little and big endian systems.  Can I 
check
it into the master branch?

2025-01-31   Michael Meissner  

gcc/

* config/rs6000/mma.md (UNSPEC_DM_INSERT512_UPPER): New unspec.
(UNSPEC_DM_INSERT512_LOWER): Likewise.
(UNSPEC_DM_EXTRACT512): Likewise.
(UNSPEC_DMR_RELOAD_FROM_MEMORY): Likewise.
(UNSPEC_DMR_RELOAD_TO_MEMORY): Likewise.
(movtdo): New define_expand and define_insn_and_split to implement 
1,024
bit DMR registers.
(movtdo_insert512_upper): New insn.
(movtdo_insert512_lower): Likewise.
(movtdo_extract512): Likewise.
(reload_dmr_from_memory): Likewise.
(reload_dmr_to_memory): Likewise.
* config/rs6000/rs6000-builtin.cc (rs6000_type_string): Add DMR
support.
(rs6000_init_builtins): Add support for __dmr keyword.
* config/rs6000/rs6000-call.cc (rs6000_return_in_memory): Add 
support
for TDOmode.
(rs6000_function_arg): Likewise.
* config/rs6000/rs6000-modes.def (TDOmode): New mode.
* config/rs6000/rs6000.cc (rs6000_hard_regno_nregs_internal): Add
support for TDOmode.
(rs6000_hard_regno_mode_ok_uncached): Likewise.
(rs6000_hard_regno_mode_ok): Likewise.
(rs6000_modes_tieable_p): Likewise.
(rs6000_debug_reg_global): Likewise.
(rs6000_setup_reg_addr_masks): Likewise.
(rs6000_init_hard_regno_mode_ok): Add support for TDOmode.  Setup 
reload
hooks for DMR mode.
(reg_offset_addressing_ok_p): Add support for TDOmode.
(rs6000_emit_move): Likewise.
(rs6000_secondary_reload_simple_move): Likewise.
(rs6000_preferred_reload_class): Likewise.
(rs6000_secondary_reload_class): Likewise.
(rs6000_mangle_type): Add mangling for __dmr type.
(rs6000_dmr_register_move_cost): Add support for TDOmode.
(rs6000_split_multireg_move): Likewise.
(rs6000_invalid_conversion): Likewise.
* config/rs6000/rs6000.h (VECTOR_ALIGNMENT_P): Add TDOmode.
(enum rs6000_builtin_type_index): Add DMR type nodes.
(dmr_type_node): Likewise.
(ptr_dmr_type_node): Likewise.

gcc/testsuite/

* gcc.target/powerpc/dm-1024bit.c: New test.
* lib/target-supports.exp (check_effective_target_ppc_dmr_ok): New
target test.

Diff:
---
 gcc/config/rs6000/mma.md  | 154 ++
 gcc/config/rs6000/rs6000-builtin.cc   |  17 +++
 gcc/config/rs6000/rs6000-call.cc  |  10 +-
 gcc/config/rs6000/rs6000-modes.def|   4 +
 gcc/config/rs6000/rs6000.cc   | 101 -
 gcc/config/rs6000/rs6000.h|   6 +-
 gcc/testsuite/gcc.target/powerpc/dm-1024bit.c |  63 +++
 gcc/testsuite/lib/target-supports.exp |  35 ++
 8 files changed, 356 insertions(+), 34 deletions(-)

diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md
index 683d2398ef90..1420fadd4355 100644
--- a/gcc/config/rs6000/mma.md
+++ b/gcc/config/rs6000/mma.md
@@ -92,6 +92,11 @@
UNSPEC_MMA_XXMFACC
UNSPEC_MMA_XXMTACC
UNSPEC_MMA_DMSETDMRZ
+   UNSPEC_DM_INSERT512_UPPER
+   UNSPEC_DM_INSERT512_LOWER
+   UNSPEC_DM_EXTRACT512
+   UNSPEC_DMR_RELOAD_FROM_MEMORY
+   UNSPEC_DMR_RELOAD_TO_MEMORY
   ])
 
 (define_c_enum "unspecv"
@@ -742,3 +747,152 @@
   " %A0,%x2,%x3,%4,%5,%6"
   [(set_attr "type" "mma")
(set_attr "prefixed" "yes")])
+
+;; TDOmode (__dmr keyword for 1,024 bit registers).
+(define_expand "movtdo"
+  [(set (match_operand:TDO 0 "nonimmediate_operand")
+   (match_operand:

[gcc(refs/users/meissner/heads/work192-dmf)] RFC2653-Add support for dense math registers.

2025-01-31 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:22e07fef3eb1552b6eaee1a078509cd532a2622d

commit 22e07fef3eb1552b6eaee1a078509cd532a2622d
Author: Michael Meissner 
Date:   Fri Jan 31 23:27:11 2025 -0500

RFC2653-Add support for dense math registers.

The MMA subsystem added the notion of accumulator registers as an optional
feature of ISA 3.1 (power10).  In ISA 3.1, these accumulators overlapped 
with
the VSX registers 0..31, but logically the accumulator registers were 
separate
from the FPR registers.  In ISA 3.1, it was anticipated that in future 
systems,
the accumulator registers may no overlap with the FPR registers.  This patch
adds the support for dense math registers as separate registers.

This particular patch does not change the MMA support to use the 
accumulators
within the dense math registers.  This patch just adds the basic support for
having separate DMRs.  The next patch will switch the MMA support to use the
accumulators if -mcpu=future is used.

For testing purposes, I added an undocumented option '-mdense-math' to 
enable
or disable the dense math support.

This patch updates the wD constraint added in the previous patch.  If MMA is
selected but dense math is not selected (i.e. -mcpu=power10), the wD 
constraint
will allow access to accumulators that overlap with VSX registers 0..31.  If
both MMA and dense math are selected (i.e. -mcpu=future), the wD constraint
will only allow dense math registers.

This patch modifies the existing %A output modifier.  If MMA is selected but
dense math is not selected, then %A output modifier converts the VSX 
register
number to the accumulator number, by dividing it by 4.  If both MMA and 
dense
math are selected, then %A will map the separate DMR registers into 0..7.

The intention is that user code using extended asm can be modified to run on
both MMA without dense math and MMA with dense math:

1)  If possible, don't use extended asm, but instead use the MMA 
built-in
functions;

2)  If you do need to write extended asm, change the d constraints
targetting accumulators should now use wD;

3)  Only use the built-in zero, assemble and disassemble functions 
create
move data between vector quad types and dense math accumulators.
I.e. do not use the xxmfacc, xxmtacc, and xxsetaccz directly in the
extended asm code.  The reason is these instructions assume there 
is a
1-to-1 correspondence between 4 adjacent FPR registers and an
accumulator that overlaps with those instructions.  With 
accumulators
now being separate registers, there no longer is a 1-to-1
correspondence.

It is possible that the mangling for DMRs and the GDB register numbers may
produce other changes in the future.

gcc/

2025-01-31   Michael Meissner  

* config/rs6000/mma.md (UNSPEC_MMA_DMSETDMRZ): New unspec.
(movxo): Add comments about dense math registers.
(movxo_nodm): Rename from movxo and restrict the usage to machines
without dense math registers.
(movxo_dm): New insn for movxo support for machines with dense math
registers.
(mma_): Restrict usage to machines without dense math 
registers.
(mma_xxsetaccz): Add a define_expand wrapper, and add support for 
dense
math registers.
(mma_dmsetaccz): New insn.
* config/rs6000/predicates.md (dmr_operand): New predicate.
(accumulator_operand): Add support for dense math registers.
* config/rs6000/rs6000-builtin.cc (rs6000_gimple_fold_mma_builtin): 
Do
not issue a de-prime instruction when disassembling a vector quad 
on a
system with dense math registers.
* config/rs6000/rs6000-c.cc (rs6000_define_or_undefine_macro): 
Define
__DENSE_MATH__ if we have dense math registers.
* config/rs6000/rs6000.cc (enum rs6000_reg_type): Add DMR_REG_TYPE.
(enum rs6000_reload_reg_type): Add RELOAD_REG_DMR.
(LAST_RELOAD_REG_CLASS): Add support for DMR registers and the wD
constraint.
(reload_reg_map): Likewise.
(rs6000_reg_names): Likewise.
(alt_reg_names): Likewise.
(rs6000_hard_regno_nregs_internal): Likewise.
(rs6000_hard_regno_mode_ok_uncached): Likewise.
(rs6000_debug_reg_global): Likewise.
(rs6000_setup_reg_addr_masks): Likewise.
(rs6000_init_hard_regno_mode_ok): Likewise.
(rs6000_secondary_reload_memory): Add support for DMR registers.
(rs6000_secondary_reload_simple_move): Likewise.
(rs6000_preferred_reload_class): Likewise.
(rs6000_secondary_reload_class): Likewise.
(print_operan

[gcc(refs/users/meissner/heads/work192-bugs)] Update ChangeLog.*

2025-01-31 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:18dd24fe91c3fec407c1d28e5ec14471b18d637f

commit 18dd24fe91c3fec407c1d28e5ec14471b18d637f
Author: Michael Meissner 
Date:   Fri Jan 31 23:24:26 2025 -0500

Update ChangeLog.*

Diff:
---
 gcc/ChangeLog.bugs | 272 +
 1 file changed, 272 insertions(+)

diff --git a/gcc/ChangeLog.bugs b/gcc/ChangeLog.bugs
index fa480c0b3325..25d92dc2245a 100644
--- a/gcc/ChangeLog.bugs
+++ b/gcc/ChangeLog.bugs
@@ -1,5 +1,277 @@
+ Branch work192-bugs, patch #210 
+
+Fix PR 118541, do not generate unordered fp cmoves for IEEE compares.
+
+In bug PR target/118541 on power9, power10, and power11 systems, for the
+function:
+
+extern double __ieee754_acos (double);
+
+double
+__acospi (double x)
+{
+  double ret = __ieee754_acos (x) / 3.14;
+  return __builtin_isgreater (ret, 1.0) ? 1.0 : ret;
+}
+
+GCC currently generates the following code:
+
+Power9  Power10 and Power11
+==  ===
+bl __ieee754_acos   bl __ieee754_acos@notoc
+nop plfd 0,.LC0@pcrel
+addis 9,2,.LC2@toc@ha   xxspltidp 12,1065353216
+addi 1,1,32 addi 1,1,32
+lfd 0,.LC2@toc@l(9) ld 0,16(1)
+addis 9,2,.LC0@toc@ha   fdiv 0,1,0
+ld 0,16(1)  mtlr 0
+lfd 12,.LC0@toc@l(9)xscmpgtdp 1,0,12
+fdiv 0,1,0  xxsel 1,0,12,1
+mtlr 0  blr
+xscmpgtdp 1,0,12
+xxsel 1,0,12,1
+blr
+
+This is because ifcvt.c optimizes the conditional floating point move to use 
the
+XSCMPGTDP instruction.
+
+However, the XSCMPGTDP instruction will generate an interrupt if one of the
+arguments is a signalling NaN and signalling NaNs can generate an interrupt.
+The IEEE comparison functions (isgreater, etc.) require that the comparison not
+raise an interrupt.
+
+The following patch changes the PowerPC back end so that ifcvt.c will not 
change
+the if/then test and move into a conditional move if the comparison is one of
+the comparisons that do not raise an error with signalling NaNs and -Ofast is
+not used.  If a normal comparison is used or -Ofast is used, GCC will continue
+to generate XSCMPGTDP and XXSEL.
+
+For the following code:
+
+double
+ordered_compare (double a, double b, double c, double d)
+{
+  return __builtin_isgreater (a, b) ? c : d;
+}
+
+/* Verify normal > does generate xscmpgtdp.  */
+
+double
+normal_compare (double a, double b, double c, double d)
+{
+  return a > b ? c : d;
+}
+
+with the following patch, GCC generates the following for power9, power10, and
+power11:
+
+ordered_compare:
+fcmpu 0,1,2
+fmr 1,4
+bnglr 0
+fmr 1,3
+blr
+
+normal_compare:
+xscmpgtdp 1,1,2
+xxsel 1,4,3,1
+blr
+
+I have built bootstrap compilers on big endian power9 systems and little endian
+power9/power10 systems and there were no regressions.  Can I check this patch
+into the GCC trunk, and after a waiting period, can I check this into the 
active
+older branches?
+
+2025-01-31  Michael Meissner  
+
+gcc/
+
+   PR target/118541
+   * config/rs6000/rs6000-protos.h (REVERSE_COND_ORDERED_OK): New macro.
+   (REVERSE_COND_NO_ORDERED): Likewise.
+   (rs6000_reverse_condition): Add argument.
+   * config/rs6000/rs6000.cc (rs6000_reverse_condition): Do not allow
+   ordered comparisons to be reversed for floating point cmoves.
+   (rs6000_emit_sCOND): Adjust rs6000_reverse_condition call.
+   * config/rs6000/rs6000.h (REVERSE_CONDITION): Likewise.
+   * config/rs6000/rs6000.md (reverse_branch_comparison): Name insn.
+   Adjust rs6000_reverse_condition call.
+
+gcc/testsuite/
+
+   PR target/118541
+   * gcc.target/powerpc/pr118541.c: New test.
+
+ Branch work192-bugs, patch #202 
+
+PR target/108958 -- use mtvsrdd to zero extend GPR DImode to VSX TImode
+
+Previously GCC would zero externd a DImode GPR value to TImode by first zero
+extending the DImode value into a GPR TImode value, and then do a MTVSRDD to
+move this value to a VSX register.
+
+This patch does the move directly, since if the middle argument to MTVSRDD is 
0,
+it does the zero extend.
+
+If the DImode value is already in a vector register, it does a XXSPLTIB and
+XXPERMDI to get the value into the bottom 64-bits of the register.
+
+I have built GCC with the patches in this patch set applied on both little and
+big endian PowerPC systems and there were no regressions.  Can I apply this
+patch to GCC 15?
+
+2025-01-31

[gcc(refs/users/meissner/heads/work192-dmf)] RFC2653-Add wD constraint.

2025-01-31 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:03fc17fb142ff7bd684a11a12fb168ffc02ac64b

commit 03fc17fb142ff7bd684a11a12fb168ffc02ac64b
Author: Michael Meissner 
Date:   Fri Jan 31 23:26:07 2025 -0500

RFC2653-Add wD constraint.

This patch adds a new constraint ('wD') that matches the accumulator 
registers
that overlap with VSX registers 0..31 on power10.  Future patches will add 
the
support for a separate accumulator register class that will be used when the
support for dense math registes is added.

2025-01-31   Michael Meissner  

* config/rs6000/constraints.md (wD): New constraint.
* config/rs6000/mma.md (mma_): Prepare for alternate 
accumulator
registers.  Use wD constraint instead of 'd' constraint.  Use
accumulator_operand instead of fpr_reg_operand.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
-   (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")]
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD")
+   (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0")]
MMA_ACC))]
   "TARGET_MMA"
   " %A0"
@@ -523,7 +523,7 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")]
MMA_VV))]
@@ -532,8 +532,8 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-   (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+   (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 3 "vsx_register_operand" "v,?wa")]
MMA_AVV))]
@@ -542,7 +542,7 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
(unspec:XO [(match_operand:OO 1 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")]
MMA_PV))]
@@ -551,8 +551,8 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-   (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+   (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
(match_operand:OO 2 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 3 "vsx_register_operand" "v,?wa")]
MMA_APV))]
@@ -561,7 +561,7 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
(match_operand:SI 3 "const_0_to_15_operand" "n,n")
@@ -574,8 +574,8 @@
(set_attr "prefixed" "yes")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-   (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+   (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
(match_operand:SI 4 "const_0_to_15_operand" "n,n")
@@ -588,7 +588,7 @@
(set_attr "prefixed" "yes")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
(match_operand:SI 3 "const_0_to_15_operand" "n,n")
@@ -601,8 +601,8 @@
(set_attr "prefixed" "yes")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-   (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+   (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")

[gcc(refs/users/meissner/heads/work192-dmf)] RFC2656-Support load/store vector with right length.

2025-01-31 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:27be229258349708c8ea7b186874f63ec48ef22a

commit 27be229258349708c8ea7b186874f63ec48ef22a
Author: Michael Meissner 
Date:   Fri Jan 31 23:30:12 2025 -0500

RFC2656-Support load/store vector with right length.

This patch adds support for new instructions that may be added to the 
PowerPC
architecture in the future to enhance the load and store vector with length
instructions.

The current instructions (lxvl, lxvll, stxvl, and stxvll) are inconvient to 
use
since the count for the number of bytes must be in the top 8 bits of the GPR
register, instead of the bottom 8 bits.  This meant that code generating 
these
instructions typically had to do a shift left by 56 bits to get the count 
into
the right position.  In a future version of the PowerPC architecture, new
variants of these instructions might be added that expect the count to be in
the bottom 8 bits of the GPR register.  These patches add this support to 
GCC
if the user uses the -mcpu=future option.

I discovered that the code in rs6000-string.cc to generate ISA 3.1 
lxvl/stxvl
future lxvll/stxvll instructions would generate these instructions on 
32-bit.
However the patterns for these instructions is only done on 64-bit systems. 
 So
I added a check for 64-bit support before generating the instructions.

The patches have been tested on both little and big endian systems.  Can I 
check
it into the master branch?

2025-01-31   Michael Meissner  

gcc/

* config/rs6000/rs6000-string.cc (expand_block_move): Do not 
generate
lxvl and stxvl on 32-bit.
* config/rs6000/vsx.md (lxvl): If -mcpu=future, generate the lxvl 
with
the shift count automaticaly used in the insn.
(lxvrl): New insn for -mcpu=future.
(lxvrll): Likewise.
(stxvl): If -mcpu=future, generate the stxvl with the shift count
automaticaly used in the insn.
(stxvrl): New insn for -mcpu=future.
(stxvrll): Likewise.

gcc/testsuite/

* gcc.target/powerpc/lxvrl.c: New test.
* lib/target-supports.exp 
(check_effective_target_powerpc_future_ok):
New effective target.

Diff:
---
 gcc/config/rs6000/rs6000-string.cc   |   1 +
 gcc/config/rs6000/vsx.md | 122 +--
 gcc/testsuite/gcc.target/powerpc/lxvrl.c |  32 
 3 files changed, 134 insertions(+), 21 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-string.cc 
b/gcc/config/rs6000/rs6000-string.cc
index 703f77fa0bf1..814328140553 100644
--- a/gcc/config/rs6000/rs6000-string.cc
+++ b/gcc/config/rs6000/rs6000-string.cc
@@ -2786,6 +2786,7 @@ expand_block_move (rtx operands[], bool might_overlap)
 
   if (TARGET_MMA && TARGET_BLOCK_OPS_UNALIGNED_VSX
  && TARGET_BLOCK_OPS_VECTOR_PAIR
+ && TARGET_POWERPC64
  && bytes >= 32
  && (align >= 256 || !STRICT_ALIGNMENT))
{
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index dd3573b80868..89523cf4a0e5 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -5712,20 +5712,32 @@
   DONE;
 })
 
-;; Load VSX Vector with Length
+;; Load VSX Vector with Length.  If we have lxvrl, we don't have to do an
+;; explicit shift left into a pseudo.
 (define_expand "lxvl"
-  [(set (match_dup 3)
-(ashift:DI (match_operand:DI 2 "register_operand")
-   (const_int 56)))
-   (set (match_operand:V16QI 0 "vsx_register_operand")
-   (unspec:V16QI
-[(match_operand:DI 1 "gpc_reg_operand")
-  (mem:V16QI (match_dup 1))
- (match_dup 3)]
-UNSPEC_LXVL))]
+  [(use (match_operand:V16QI 0 "vsx_register_operand"))
+   (use (match_operand:DI 1 "gpc_reg_operand"))
+   (use (match_operand:DI 2 "gpc_reg_operand"))]
   "TARGET_P9_VECTOR && TARGET_64BIT"
 {
-  operands[3] = gen_reg_rtx (DImode);
+  rtx shift_len = gen_rtx_ASHIFT (DImode, operands[2], GEN_INT (56));
+  rtx len;
+
+  if (TARGET_FUTURE)
+len = shift_len;
+  else
+{
+  len = gen_reg_rtx (DImode);
+  emit_insn (gen_rtx_SET (len, shift_len));
+}
+
+  rtx dest = operands[0];
+  rtx addr = operands[1];
+  rtx mem = gen_rtx_MEM (V16QImode, addr);
+  rtvec rv = gen_rtvec (3, addr, mem, len);
+  rtx lxvl = gen_rtx_UNSPEC (V16QImode, rv, UNSPEC_LXVL);
+  emit_insn (gen_rtx_SET (dest, lxvl));
+  DONE;
 })
 
 (define_insn "*lxvl"
@@ -5749,6 +5761,34 @@
   "lxvll %x0,%1,%2"
   [(set_attr "type" "vecload")])
 
+;; For lxvrl and lxvrll, use the combiner to eliminate the shift.  The
+;; define_expand for lxvl will already incorporate the shift in generating the
+;; insn.  The lxvll buitl-in function required the user to have already done
+;; the shift.  Defining lxvrll this way, will optimize cases where the user has
+;; done the shift immediately before the built-in.
+(define_insn "*lxvrl"
+  [(set (matc

[gcc(refs/users/meissner/heads/work192-dmf)] RFC2655-Add saturating subtract built-ins.

2025-01-31 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:517a75e834a7ea7e5094e3b072cfc7a77b938293

commit 517a75e834a7ea7e5094e3b072cfc7a77b938293
Author: Michael Meissner 
Date:   Fri Jan 31 23:31:17 2025 -0500

RFC2655-Add saturating subtract built-ins.

This patch adds support for a saturating subtract built-in function that 
may be
added to a future PowerPC processor.  Note, if it is added, the name of the
built-in function may change before GCC 13 is released.  If the name 
changes,
we will submit a patch changing the name.

I also added support for providing dense math built-in functions, even 
though
at present, we have not added any new built-in functions for dense math.  
It is
likely we will want to add new dense math built-in functions as the dense 
math
support is fleshed out.

The patches have been tested on both little and big endian systems.  Can I 
check
it into the master branch?

2025-01-31   Michael Meissner  

gcc/

* config/rs6000/rs6000-builtin.cc (rs6000_invalid_builtin): Add 
support
for flagging invalid use of future built-in functions.
(rs6000_builtin_is_supported): Add support for future built-in
functions.
* config/rs6000/rs6000-builtins.def 
(__builtin_saturate_subtract32): New
built-in function for -mcpu=future.
(__builtin_saturate_subtract64): Likewise.
* config/rs6000/rs6000-gen-builtins.cc (enum bif_stanza): Add 
stanzas
for -mcpu=future built-ins.
(stanza_map): Likewise.
(enable_string): Likewise.
(struct attrinfo): Likewise.
(parse_bif_attrs): Likewise.
(write_decls): Likewise.
* config/rs6000/rs6000.md (sat_sub3): Add saturating subtract
built-in insn declarations.
(sat_sub3_dot): Likewise.
(sat_sub3_dot2): Likewise.
* doc/extend.texi (Future PowerPC built-ins): New section.

gcc/testsuite/

* gcc.target/powerpc/subfus-1.c: New test.
* gcc.target/powerpc/subfus-2.c: Likewise.

Diff:
---
 gcc/config/rs6000/rs6000-builtin.cc | 17 
 gcc/config/rs6000/rs6000-builtins.def   | 10 +
 gcc/config/rs6000/rs6000-gen-builtins.cc| 35 ++---
 gcc/config/rs6000/rs6000.md | 60 +
 gcc/doc/extend.texi | 24 
 gcc/testsuite/gcc.target/powerpc/subfus-1.c | 32 +++
 gcc/testsuite/gcc.target/powerpc/subfus-2.c | 32 +++
 7 files changed, 205 insertions(+), 5 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-builtin.cc 
b/gcc/config/rs6000/rs6000-builtin.cc
index ea8755b3ef8a..1885b1f636f3 100644
--- a/gcc/config/rs6000/rs6000-builtin.cc
+++ b/gcc/config/rs6000/rs6000-builtin.cc
@@ -139,6 +139,17 @@ rs6000_invalid_builtin (enum rs6000_gen_builtins fncode)
 case ENB_MMA:
   error ("%qs requires the %qs option", name, "-mmma");
   break;
+case ENB_FUTURE:
+  error ("%qs requires the %qs option", name, "-mcpu=future");
+  break;
+case ENB_FUTURE_64:
+  error ("%qs requires the %qs option and either the %qs or %qs option",
+name, "-mcpu=future", "-m64", "-mpowerpc64");
+  break;
+case ENB_DM:
+  error ("%qs requires the %qs or %qs options", name, "-mcpu=future",
+"-mdense-math");
+  break;
 default:
 case ENB_ALWAYS:
   gcc_unreachable ();
@@ -194,6 +205,12 @@ rs6000_builtin_is_supported (enum rs6000_gen_builtins 
fncode)
   return TARGET_HTM;
 case ENB_MMA:
   return TARGET_MMA;
+case ENB_FUTURE:
+  return TARGET_FUTURE;
+case ENB_FUTURE_64:
+  return TARGET_FUTURE && TARGET_POWERPC64;
+case ENB_DM:
+  return TARGET_DENSE_MATH;
 default:
   gcc_unreachable ();
 }
diff --git a/gcc/config/rs6000/rs6000-builtins.def 
b/gcc/config/rs6000/rs6000-builtins.def
index 555d7d589506..eef5f41f7615 100644
--- a/gcc/config/rs6000/rs6000-builtins.def
+++ b/gcc/config/rs6000/rs6000-builtins.def
@@ -137,6 +137,8 @@
 ;   endian   Needs special handling for endianness
 ;   ibmldRestrict usage to the case when TFmode is IBM-128
 ;   ibm128   Restrict usage to the case where __ibm128 is supported or if ibmld
+;   future   Restrict usage to future instructions
+;   dm   Restrict usage to dense math
 ;
 ; Each attribute corresponds to extra processing required when
 ; the built-in is expanded.  All such special processing should
@@ -3924,3 +3926,11 @@
 
   void __builtin_vsx_stxvp (v256, unsigned long, const v256 *);
 STXVP nothing {mma,pair}
+
+[future]
+  const signed int __builtin_saturate_subtract32 (signed int, signed int);
+  SAT_SUBSI sat_subsi3 {}
+
+[future-64]
+  const signed long __builtin_saturate_subtract64 (signed long,  signed long);
+  SAT_SUBDI sat_subdi3 {}
diff --git a/gcc/config/rs6000/rs6000-gen-builtins.cc 
b/gcc/conf

[gcc(refs/users/meissner/heads/work192-dmf)] RFC2686-Add paddis support.

2025-01-31 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:a2815716eceaa38776be5bce14ae5a5c8a4c4d92

commit a2815716eceaa38776be5bce14ae5a5c8a4c4d92
Author: Michael Meissner 
Date:   Fri Jan 31 23:32:56 2025 -0500

RFC2686-Add paddis support.

2025-01-31  Michael Meissner  

gcc/

* config/rs6000/constraints.md (eU): New constraint.
(eV): Likewise.
* config/rs6000/predicates.md (paddis_operand): New predicate.
(paddis_paddi_operand): Likewise.
(add_operand): Add paddis support.
* config/rs6000/rs6000.cc (num_insns_constant_gpr): Add paddis 
support.
(num_insns_constant_multi): Likewise.
(print_operand): Add %B for paddis support.
* config/rs6000/rs6000.h (TARGET_PADDIS): New macro.
(SIGNED_INTEGER_32BIT_P): Likewise.
* config/rs6000/rs6000.md (isa attribute): Add paddis support.
(enabled attribute); Likewise.
(add3): Likewise.
(adddi3 splitter): New splitter for paddis.
(movdi_internal64): Add paddis support.
(movdi splitter): New splitter for paddis.

gcc/testsuite/

* gcc.target/powerpc/prefixed-addis.c: New test.

Diff:
---
 gcc/config/rs6000/constraints.md  | 10 +++
 gcc/config/rs6000/predicates.md   | 52 +++-
 gcc/config/rs6000/rs6000.cc   | 25 ++
 gcc/config/rs6000/rs6000.h|  4 +
 gcc/config/rs6000/rs6000.md   | 96 ---
 gcc/testsuite/gcc.target/powerpc/prefixed-addis.c | 24 ++
 6 files changed, 197 insertions(+), 14 deletions(-)

diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md
index 3da9ed086810..5440becb6e6c 100644
--- a/gcc/config/rs6000/constraints.md
+++ b/gcc/config/rs6000/constraints.md
@@ -222,6 +222,16 @@
   "An IEEE 128-bit constant that can be loaded into VSX registers."
   (match_operand 0 "easy_vector_constant_ieee128"))
 
+(define_constraint "eU"
+  "@internal integer constant that can be loaded with paddis"
+  (and (match_code "const_int")
+   (match_operand 0 "paddis_operand")))
+
+(define_constraint "eV"
+  "@internal integer constant that can be loaded with paddis + paddi"
+  (and (match_code "const_int")
+   (match_operand 0 "paddis_paddi_operand")))
+
 ;; Floating-point constraints.  These two are defined so that insn
 ;; length attributes can be calculated exactly.
 
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index c95b4336f062..c206860e4927 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -369,6 +369,53 @@
   return SIGNED_INTEGER_34BIT_P (INTVAL (op));
 })
 
+;; Return 1 if op is a 64-bit constant that uses the paddis instruction
+(define_predicate "paddis_operand"
+  (match_code "const_int")
+{
+  if (!TARGET_PADDIS && TARGET_POWERPC64)
+return 0;
+
+  /* If addi, addis, or paddi can handle the number, don't return true.  */
+  HOST_WIDE_INT value = INTVAL (op);
+  if (SIGNED_INTEGER_34BIT_P (value))
+return false;
+
+  /* If the number is too large for padds, return false.  */
+  if (!SIGNED_INTEGER_32BIT_P (value >> 32))
+return false;
+
+  /* If the bottom 32-bits are non-zero, paddis can't handle it.  */
+  if ((value & HOST_WIDE_INT_C(0x)) != 0)
+return false;
+
+  return true;
+})
+
+;; Return 1 if op is a 64-bit constant that needs the paddis instruction and an
+;; addi/addis/paddi instruction combination.
+(define_predicate "paddis_paddi_operand"
+  (match_code "const_int")
+{
+  if (!TARGET_PADDIS && TARGET_POWERPC64)
+return 0;
+
+  /* If addi, addis, or paddi can handle the number, don't return true.  */
+  HOST_WIDE_INT value = INTVAL (op);
+  if (SIGNED_INTEGER_34BIT_P (value))
+return false;
+
+  /* If the number is too large for padds, return false.  */
+  if (!SIGNED_INTEGER_32BIT_P (value >> 32))
+return false;
+
+  /* If the bottom 32-bits are zero, we can use paddis alone to handle it.  */
+  if ((value & HOST_WIDE_INT_C(0x)) == 0)
+return false;
+
+  return true;
+})
+
 ;; Return 1 if op is a register that is not special.
 ;; Disallow (SUBREG:SF (REG:SI)) and (SUBREG:SI (REG:SF)) on VSX systems where
 ;; you need to be careful in moving a SFmode to SImode and vice versa due to
@@ -1113,7 +1160,10 @@
   (if_then_else (match_code "const_int")
 (match_test "satisfies_constraint_I (op)
 || satisfies_constraint_L (op)
-|| satisfies_constraint_eI (op)")
+|| satisfies_constraint_eI (op)
+|| satisfies_constraint_eU (op)
+|| satisfies_constraint_eV (op)")
+
 (match_operand 0 "gpc_reg_operand")))
 
 ;; Return 1 if the operand is either a non-special register, or 0, or -1.
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 80c59cfc4bc3..7595998163b3 100644
--- a/gcc/con

[gcc(refs/users/meissner/heads/work192-dmf)] RFC2677-Add xvrlw support.

2025-01-31 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:479eb3c513b76ab505d25ad2ce0171ec6fc59c13

commit 479eb3c513b76ab505d25ad2ce0171ec6fc59c13
Author: Michael Meissner 
Date:   Fri Jan 31 23:33:45 2025 -0500

RFC2677-Add xvrlw support.

2025-01-31  Michael Meissner  

gcc/

* config/rs6000/altivec.md (xvrlw): New insn.
* config/rs6000/rs6000.h (TARGET_XVRLW): New macro.

gcc/testsuite/

* gcc.target/powerpc/vector-rotate-left.c: New test.

Diff:
---
 gcc/config/rs6000/altivec.md   | 14 +
 gcc/config/rs6000/rs6000.h |  3 ++
 .../gcc.target/powerpc/vector-rotate-left.c| 34 ++
 3 files changed, 51 insertions(+)

diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index 7edc288a6565..d158cf479d60 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -1982,6 +1982,20 @@
 }
   [(set_attr "type" "vecperm")])
 
+;; -mcpu=future adds a vector rotate left word variant.  There is no vector
+;; byte/half-word/double-word/quad-word rotate left.  This insn occurs before
+;; altivec_vrl and will match for -mcpu=future, while other cpus will
+;; match the generic insn.
+(define_insn "*xvrlw"
+  [(set (match_operand:V4SI 0 "register_operand" "=v,wa")
+   (rotate:V4SI (match_operand:V4SI 1 "register_operand" "v,wa")
+(match_operand:V4SI 2 "register_operand" "v,wa")))]
+  "TARGET_XVRLW"
+  "@
+   vrlw %0,%1,%2
+   xvrlw %x0,%x1,%x2"
+  [(set_attr "type" "vecsimple")])
+
 (define_insn "altivec_vrl"
   [(set (match_operand:VI2 0 "register_operand" "=v")
 (rotate:VI2 (match_operand:VI2 1 "register_operand" "v")
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index 330548797815..8b9c23cebc44 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -584,6 +584,9 @@ extern int rs6000_vector_align[];
 /* Whether we have PADDIS support.  */
 #define TARGET_PADDIS  TARGET_FUTURE
 
+/* Whether we have XVRLW support.  */
+#define TARGET_XVRLW   TARGET_FUTURE
+
 /* Whether the various reciprocal divide/square root estimate instructions
exist, and whether we should automatically generate code for the instruction
by default.  */
diff --git a/gcc/testsuite/gcc.target/powerpc/vector-rotate-left.c 
b/gcc/testsuite/gcc.target/powerpc/vector-rotate-left.c
new file mode 100644
index ..5a5f37755077
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vector-rotate-left.c
@@ -0,0 +1,34 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_future_ok } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-mdejagnu-cpu=future -O2" } */
+
+/* Test whether the xvrl (vector word rotate left using VSX registers insead of
+   Altivec registers is generated.  */
+
+#include 
+
+typedef vector unsigned int  v4si_t;
+
+v4si_t
+rotl_v4si_scalar (v4si_t x, unsigned long n)
+{
+  __asm__ (" # %x0" : "+f" (x));
+  return (x << n) | (x >> (32 - n));   /* xvrlw.  */
+}
+
+v4si_t
+rotr_v4si_scalar (v4si_t x, unsigned long n)
+{
+  __asm__ (" # %x0" : "+f" (x));
+  return (x >> n) | (x << (32 - n));   /* xvrlw.  */
+}
+
+v4si_t
+rotl_v4si_vector (v4si_t x, v4si_t y)
+{
+  __asm__ (" # %x0" : "+f" (x));   /* xvrlw.  */
+  return vec_rl (x, y);
+}
+
+/* { dg-final { scan-assembler-times {\mxvrlw\M} 3  } } */

[gcc r15-7305] [PR116234][LRA]: Check debug insn when looking at one insn pseudo occurrence

2025-01-31 Thread Vladimir Makarov via Gcc-cvs

https://gcc.gnu.org/g:decc6c0d4d909ce510b6533c48d70d0b353f909a

commit r15-7305-gdecc6c0d4d909ce510b6533c48d70d0b353f909a
Author: Vladimir N. Makarov 
Date:   Fri Jan 31 09:39:45 2025 -0500

[PR116234][LRA]: Check debug insn when looking at one insn pseudo occurrence

  LRA can change reg class to NO_REGS when pseudo referred in one
insn.  Checking the references did not take into account that referring
insn can be a debug insn.  This resulted in different code generation
with and without debug info generation.  The patch fixes this pitfall.

gcc/ChangeLog:

PR rtl-optimization/116234
* lra-constraints.cc (multiple_insn_refs_p): New function.
(curr_insn_transform): Use it.

gcc/testsuite/ChangeLog:

PR rtl-optimization/116234
* gfortran.target/aarch64/aarch64.exp: New.
* gfortran.target/aarch64/pr116234.f: New.

Diff:
---
 gcc/lra-constraints.cc| 20 +-
 gcc/testsuite/gfortran.target/aarch64/aarch64.exp | 45 +
 gcc/testsuite/gfortran.target/aarch64/pr116234.f  | 80 +++
 3 files changed, 144 insertions(+), 1 deletion(-)

diff --git a/gcc/lra-constraints.cc b/gcc/lra-constraints.cc
index ee3fd7a503aa..0659aed94c7d 100644
--- a/gcc/lra-constraints.cc
+++ b/gcc/lra-constraints.cc
@@ -4144,6 +4144,24 @@ static bool invalid_mode_reg_p (enum machine_mode mode, 
rtx x)
  ira_prohibited_class_mode_regs[rclass][mode]));
 }
 
+/* Return TRUE if regno is referenced in more than one non-debug insn.  */
+static bool
+multiple_insn_refs_p (int regno)
+{
+  unsigned int uid;
+  bitmap_iterator bi;
+  int nrefs = 0;
+  EXECUTE_IF_SET_IN_BITMAP (&lra_reg_info[regno].insn_bitmap, 0, uid, bi)
+{
+  if (!NONDEBUG_INSN_P (lra_insn_recog_data[uid]->insn))
+   continue;
+  if (nrefs == 1)
+   return true;
+  nrefs++;
+}
+  return false;
+}
+
 /* Main entry point of the constraint code: search the body of the
current insn to choose the best alternative.  It is mimicking insn
alternative cost calculation model of former reload pass.  That is
@@ -4602,7 +4620,7 @@ curr_insn_transform (bool check_only_p)
 registers for other pseudos referenced in the insn.  The most
 common case of this is a scratch register which will be
 transformed to scratch back at the end of LRA.  */
- && bitmap_single_bit_set_p (&lra_reg_info[regno].insn_bitmap))
+ && !multiple_insn_refs_p (regno))
{
  if (lra_get_allocno_class (regno) != NO_REGS)
lra_change_class (regno, NO_REGS, "  Change to", true);
diff --git a/gcc/testsuite/gfortran.target/aarch64/aarch64.exp 
b/gcc/testsuite/gfortran.target/aarch64/aarch64.exp
new file mode 100644
index ..79afc6993187
--- /dev/null
+++ b/gcc/testsuite/gfortran.target/aarch64/aarch64.exp
@@ -0,0 +1,45 @@
+#   Copyright (C) 2025 Free Software Foundation, Inc.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# .
+
+# GCC testsuite that uses the `dg.exp' driver.
+
+# Exit immediately if this isn't a aarch64 target.
+if { ![istarget aarch64*-*-*] } then {
+  return
+}
+
+# Make sure there is a fortran compiler to test.
+if { ![check_no_compiler_messages fortran_available assembly {
+! Fortran
+program P
+  stop
+end program P
+} ""] } {
+return
+}
+
+# Load support procs.
+load_lib gfortran-dg.exp
+
+# Initialize `dg'.
+dg-init
+
+# Main loop.
+gfortran-dg-runtest [lsort \
+   [glob -nocomplain $srcdir/$subdir/*.\[fF\]{,90,95,03,08} ] ] "" ""
+
+# All done.
+dg-finish
diff --git a/gcc/testsuite/gfortran.target/aarch64/pr116234.f 
b/gcc/testsuite/gfortran.target/aarch64/pr116234.f
new file mode 100644
index ..78b49bc86f10
--- /dev/null
+++ b/gcc/testsuite/gfortran.target/aarch64/pr116234.f
@@ -0,0 +1,80 @@
+! { dg-do compile }
+! { dg-options "-fcompare-debug -mcpu=phecda -O2 -funroll-all-loops -c 
-fno-rename-registers -fno-ivopts" }
+
+  SUBROUTINE FOO(UPLO, N, A, IA, JA, DESCA, SR, SC, SCOND, AMAX,
+ $ EQUED)
+
+  CHARACTER  EQUED, UPLO
+  INTEGERIA, JA
+  DOUBLE PRECISION   AMAX, SCOND
+
+  INTEGERDESCA(*)
+  DOUBLE PRECISION   A(*), SR(*)
+
+  INTEGERIACOL, IAROW, IC, IIA, I

[gcc r15-7306] c++: wrong-code with consteval constructor [PR117501]

2025-01-31 Thread Marek Polacek via Gcc-cvs

https://gcc.gnu.org/g:0d97700443b45b947eda40dac7cf4d0397770b87

commit r15-7306-g0d97700443b45b947eda40dac7cf4d0397770b87
Author: Marek Polacek 
Date:   Mon Jan 27 14:23:22 2025 -0500

c++: wrong-code with consteval constructor [PR117501]

We've had a wrong-code problem since r14-4140, due to which we
forget to initialize a variable.

In consteval39.C, we evaluate

struct QQQ q;
  <>>
  (const char *) "" ) >;

into

struct QQQ q;
  <;

and then the useless expr_stmt is dropped on the floor, so q isn't
initialized.  As pre-r14-4140, we need to handle constructors specially.

With this patch, we generate:

struct QQQ q;
  <;

initializing q properly.

PR c++/117501

gcc/cp/ChangeLog:

* cp-gimplify.cc (cp_build_init_expr_for_ctor): New.
(cp_fold_immediate_r): Call it.
(cp_fold): Break out code into cp_build_init_expr_for_ctor.

gcc/testsuite/ChangeLog:

* g++.dg/cpp2a/consteval39.C: New test.
* g++.dg/cpp2a/consteval40.C: New test.

Reviewed-by: Jason Merrill 

Diff:
---
 gcc/cp/cp-gimplify.cc| 42 ++--
 gcc/testsuite/g++.dg/cpp2a/consteval39.C | 27 
 gcc/testsuite/g++.dg/cpp2a/consteval40.C | 25 +++
 3 files changed, 81 insertions(+), 13 deletions(-)

diff --git a/gcc/cp/cp-gimplify.cc b/gcc/cp/cp-gimplify.cc
index 4ec3de13008c..45f4e27b8b74 100644
--- a/gcc/cp/cp-gimplify.cc
+++ b/gcc/cp/cp-gimplify.cc
@@ -1182,6 +1182,28 @@ taking_address_of_imm_fn_error (tree expr, tree decl)
   maybe_explain_promoted_consteval (loc, decl);
 }
 
+/* Build up an INIT_EXPR to initialize the object of a constructor call that
+   has been folded to a constant value.  CALL is the CALL_EXPR for the
+   constructor call; INIT is the value.  */
+
+static tree
+cp_build_init_expr_for_ctor (tree call, tree init)
+{
+  tree a = CALL_EXPR_ARG (call, 0);
+  if (is_dummy_object (a))
+return init;
+  const bool return_this = targetm.cxx.cdtor_returns_this ();
+  const location_t loc = EXPR_LOCATION (call);
+  if (return_this)
+a = cp_save_expr (a);
+  tree s = build_fold_indirect_ref_loc (loc, a);
+  init = cp_build_init_expr (s, init);
+  if (return_this)
+init = build2_loc (loc, COMPOUND_EXPR, TREE_TYPE (call), init,
+   fold_convert_loc (loc, TREE_TYPE (call), a));
+  return init;
+}
+
 /* A subroutine of cp_fold_r to handle immediate functions.  */
 
 static tree
@@ -1297,7 +1319,12 @@ cp_fold_immediate_r (tree *stmt_p, int *walk_subtrees, 
void *data_)
}
   /* We've evaluated the consteval function call.  */
   if (call_p)
-   *stmt_p = e;
+   {
+ if (code == CALL_EXPR && DECL_CONSTRUCTOR_P (decl))
+   *stmt_p = cp_build_init_expr_for_ctor (stmt, e);
+ else
+   *stmt_p = e;
+   }
 }
   /* We've encountered a function call that may turn out to be consteval
  later.  Store its caller so that we can ensure that the call is
@@ -3422,18 +3449,7 @@ cp_fold (tree x, fold_flags_t flags)
 if (TREE_CODE (r) != CALL_EXPR)
  {
if (DECL_CONSTRUCTOR_P (callee))
- {
-   loc = EXPR_LOCATION (x);
-   tree a = CALL_EXPR_ARG (x, 0);
-   bool return_this = targetm.cxx.cdtor_returns_this ();
-   if (return_this)
- a = cp_save_expr (a);
-   tree s = build_fold_indirect_ref_loc (loc, a);
-   r = cp_build_init_expr (s, r);
-   if (return_this)
- r = build2_loc (loc, COMPOUND_EXPR, TREE_TYPE (x), r,
- fold_convert_loc (loc, TREE_TYPE (x), a));
- }
+ r = cp_build_init_expr_for_ctor (x, r);
x = r;
break;
  }
diff --git a/gcc/testsuite/g++.dg/cpp2a/consteval39.C 
b/gcc/testsuite/g++.dg/cpp2a/consteval39.C
new file mode 100644
index ..523e8260eab2
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp2a/consteval39.C
@@ -0,0 +1,27 @@
+// PR c++/117501
+// { dg-do run { target c++20 } }
+
+constexpr unsigned
+length ()
+{
+  bool __trans_tmp_1 = __builtin_is_constant_evaluated();
+  if (__trans_tmp_1)
+return 42;
+  return 1;
+}
+struct basic_string_view {
+  constexpr basic_string_view(const char *) : _M_len{length()}, _M_str{} {}
+  long _M_len;
+  char _M_str;
+};
+struct QQQ {
+  consteval QQQ(basic_string_view d) : data(d) {}
+  basic_string_view data;
+};
+int
+main ()
+{
+  QQQ q("");
+  if (q.data._M_len != 42)
+__builtin_abort ();
+}
diff --git a/gcc/testsuite/g++.dg/cpp2a/consteval40.C 
b/gcc/testsuite/g++.dg/cpp2a/consteval40.C
new file mode 100644
index ..4d3ba20092b1
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp2a/consteval40.C
@@ -0,0 +1,25 @@
+// PR c++/117501
+// { dg-do ru

[gcc r15-7303] testsuite: Add testcase for already fixed PR [PR117498]

2025-01-31 Thread Jakub Jelinek via Gcc-cvs

https://gcc.gnu.org/g:9fc0683082067801e3790f7cfffedbf5441e0f82

commit r15-7303-g9fc0683082067801e3790f7cfffedbf5441e0f82
Author: Jakub Jelinek 
Date:   Fri Jan 31 12:39:34 2025 +0100

testsuite: Add testcase for already fixed PR [PR117498]

This wrong-code issue has been fixed with r15-7249.
We still emit warnings which are questionable and perhaps we'd
get better generated code if niters determined the loop has only a single
iteration without UB and we'd punt on vectorizing it (or unrolling).

2025-01-31  Jakub Jelinek  

PR middle-end/117498
* gcc.c-torture/execute/pr117498.c: New test.

Diff:
---
 gcc/testsuite/gcc.c-torture/execute/pr117498.c | 35 ++
 1 file changed, 35 insertions(+)

diff --git a/gcc/testsuite/gcc.c-torture/execute/pr117498.c 
b/gcc/testsuite/gcc.c-torture/execute/pr117498.c
new file mode 100644
index ..085c6b6684bc
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/pr117498.c
@@ -0,0 +1,35 @@
+/* PR middle-end/117498 */
+
+int a, d, f;
+char g;
+volatile int c = 1;
+
+int
+foo ()
+{
+  if (c == 0)
+return -1;
+  return 1;
+}
+
+void
+bar (int h, int i, char *k, char *m)
+{
+  for (; d < i; d += 2)
+for (int j = 0; j < h; j++)
+  m[j] = k[4 * j];
+}
+
+void
+baz (long h)
+{
+  char n = 0;
+  bar (h, 4, &n, &g);
+}
+
+int
+main ()
+{
+  f = foo ();
+  baz ((unsigned char) f - 4);
+}

[gcc r15-7313] [committed][PR tree-optimization/114277] Fix missed optimization for multiplication against boolean

2025-01-31 Thread Jeff Law via Gcc-cvs

https://gcc.gnu.org/g:2c0a9b7fb7902522fb8484342fcc19fd44df53e6

commit r15-7313-g2c0a9b7fb7902522fb8484342fcc19fd44df53e6
Author: Jeff Law 
Date:   Fri Jan 31 16:59:35 2025 -0700

[committed][PR tree-optimization/114277] Fix missed optimization for 
multiplication against boolean value

Andrew, Raphael and I have all poked at it in various ways over the last 
year
or so.  I think when Raphael and I first looked at it I sent us down a bit 
of
rathole.

In particular it's odd that we're using a multiply to implement a select 
and it
seemed like recognizing the idiom and rewriting into a conditional move was 
the
right path.  That looked reasonably good for the test, but runs into 
problems
with min/max detection elsewhere.

I think that initial investigation somewhat polluted our thinking.  The
regression can be fixed with a fairly simple match.pd pattern.

Essentially we want to handle

x * (x || b) -> x
x * !(x || b) -> 0

There's simplifications that can be made for "&&" cases, but I haven't seen
them in practice.  Rather than drop in untested patterns, I'm leaving that 
as a
future todo.

My original was two match.pd patterns.  Andrew combined them into a single
pattern.  I've made this conditional on GIMPLE as an earlier version that
simplified to a conditional move showed that when applied on GENERIC we 
could
drop an operand with a side effect which is clearly not good.

I've bootstrapped and regression tested this on x86.  I've also tested on 
the
various embedded targets in my tester.

PR tree-optimization/114277
gcc/
* match.pd (a * (a || b) -> a): New pattern.
(a * !(a || b) -> 0): Likewise.

gcc/testsuite
* gcc.target/i386/pr114277.c: New test.
* gcc.target/riscv/pr114277.c: Likewise.

Co-author:  Andrew Pinski 

Diff:
---
 gcc/match.pd  | 17 +
 gcc/testsuite/gcc.target/i386/pr114277.c  | 10 ++
 gcc/testsuite/gcc.target/riscv/pr114277.c |  9 +
 3 files changed, 36 insertions(+)

diff --git a/gcc/match.pd b/gcc/match.pd
index 6991868fbe29..97e0bafdda4b 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -303,6 +303,23 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
  (mult @0 integer_zerop@1)
  @1)
 
+#if GIMPLE
+/* When multiplying a value by a boolean involving the value, we may
+   be able to simplify further.
+ a * ((a || b) != 0) -> a
+ a * ((a || b) == 0) -> 0
+
+   There are also bit-and cases which don't show up in practice yet.
+ a * ((a && b) != 0) -> a * b
+ a * ((a && b) == 0) -> b != 0 ? a : b */
+(for neeq (ne eq)
+ (simplify
+  (mult:c (convert? (neeq (bit_ior:c @0 @1) integer_zerop@2)) @0)
+  (if (neeq == EQ_EXPR)
+   { build_zero_cst (type); }
+   @0)))
+#endif
+
 /* -x == x -> x == 0 */
 (for cmp (eq ne)
  (simplify
diff --git a/gcc/testsuite/gcc.target/i386/pr114277.c 
b/gcc/testsuite/gcc.target/i386/pr114277.c
new file mode 100644
index ..eb611d26d6af
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr114277.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+int a,b;
+void func0(int x) { a=x * (x || b); }
+void func1(int x) { a=x * !(x || b); }
+
+/* { dg-final { scan-assembler-not "or" } } */
+/* { dg-final { scan-assembler-not "cmove" } } */
+
diff --git a/gcc/testsuite/gcc.target/riscv/pr114277.c 
b/gcc/testsuite/gcc.target/riscv/pr114277.c
new file mode 100644
index ..cc1db19ee4c5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/pr114277.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=rv64gc_zicond -mabi=lp64d" { target rv64 } } */
+/* { dg-options "-O2 -march=rv32gc_zicond -mabi=ilp32" { target rv32 } } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-O1" "-Os" "-Oz" "-O3" "-Og" } } */
+
+#include "../i386/pr114277.c"
+
+/* { dg-final { scan-assembler-not "czero" } } */
+

[gcc] Created branch 'meissner/heads/work192' in namespace 'refs/users'

2025-01-31 Thread Michael Meissner via Gcc-cvs

The branch 'meissner/heads/work192' was created in namespace 'refs/users' 
pointing to:

 a9172b107a24... libstdc++: Fix flat_foo::insert_range for non-common ranges

[gcc(refs/users/meissner/heads/work192)] Add ChangeLog.meissner and REVISION.

2025-01-31 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:333c087a61857694370e801f536fc493c3d721b5

commit 333c087a61857694370e801f536fc493c3d721b5
Author: Michael Meissner 
Date:   Fri Jan 31 17:36:50 2025 -0500

Add ChangeLog.meissner and REVISION.

2025-01-31  Michael Meissner  

gcc/

* REVISION: New file for branch.
* ChangeLog.meissner: New file.

gcc/c-family/

* ChangeLog.meissner: New file.

gcc/c/

* ChangeLog.meissner: New file.

gcc/cp/

* ChangeLog.meissner: New file.

gcc/fortran/

* ChangeLog.meissner: New file.

gcc/testsuite/

* ChangeLog.meissner: New file.

libgcc/

* ChangeLog.meissner: New file.

Diff:
---
 gcc/ChangeLog.meissner   | 5 +
 gcc/REVISION | 1 +
 gcc/c-family/ChangeLog.meissner  | 5 +
 gcc/c/ChangeLog.meissner | 5 +
 gcc/cp/ChangeLog.meissner| 5 +
 gcc/fortran/ChangeLog.meissner   | 5 +
 gcc/testsuite/ChangeLog.meissner | 5 +
 libgcc/ChangeLog.meissner| 5 +
 libstdc++-v3/ChangeLog.meissner  | 5 +
 9 files changed, 41 insertions(+)

diff --git a/gcc/ChangeLog.meissner b/gcc/ChangeLog.meissner
new file mode 100644
index ..b598e153c514
--- /dev/null
+++ b/gcc/ChangeLog.meissner
@@ -0,0 +1,5 @@
+ Branch work192, baseline 
+
+2025-01-31   Michael Meissner  
+
+   Clone branch
diff --git a/gcc/REVISION b/gcc/REVISION
new file mode 100644
index ..6126a6968da2
--- /dev/null
+++ b/gcc/REVISION
@@ -0,0 +1 @@
+work192 branch
diff --git a/gcc/c-family/ChangeLog.meissner b/gcc/c-family/ChangeLog.meissner
new file mode 100644
index ..b598e153c514
--- /dev/null
+++ b/gcc/c-family/ChangeLog.meissner
@@ -0,0 +1,5 @@
+ Branch work192, baseline 
+
+2025-01-31   Michael Meissner  
+
+   Clone branch
diff --git a/gcc/c/ChangeLog.meissner b/gcc/c/ChangeLog.meissner
new file mode 100644
index ..b598e153c514
--- /dev/null
+++ b/gcc/c/ChangeLog.meissner
@@ -0,0 +1,5 @@
+ Branch work192, baseline 
+
+2025-01-31   Michael Meissner  
+
+   Clone branch
diff --git a/gcc/cp/ChangeLog.meissner b/gcc/cp/ChangeLog.meissner
new file mode 100644
index ..b598e153c514
--- /dev/null
+++ b/gcc/cp/ChangeLog.meissner
@@ -0,0 +1,5 @@
+ Branch work192, baseline 
+
+2025-01-31   Michael Meissner  
+
+   Clone branch
diff --git a/gcc/fortran/ChangeLog.meissner b/gcc/fortran/ChangeLog.meissner
new file mode 100644
index ..b598e153c514
--- /dev/null
+++ b/gcc/fortran/ChangeLog.meissner
@@ -0,0 +1,5 @@
+ Branch work192, baseline 
+
+2025-01-31   Michael Meissner  
+
+   Clone branch
diff --git a/gcc/testsuite/ChangeLog.meissner b/gcc/testsuite/ChangeLog.meissner
new file mode 100644
index ..b598e153c514
--- /dev/null
+++ b/gcc/testsuite/ChangeLog.meissner
@@ -0,0 +1,5 @@
+ Branch work192, baseline 
+
+2025-01-31   Michael Meissner  
+
+   Clone branch
diff --git a/libgcc/ChangeLog.meissner b/libgcc/ChangeLog.meissner
new file mode 100644
index ..b598e153c514
--- /dev/null
+++ b/libgcc/ChangeLog.meissner
@@ -0,0 +1,5 @@
+ Branch work192, baseline 
+
+2025-01-31   Michael Meissner  
+
+   Clone branch
diff --git a/libstdc++-v3/ChangeLog.meissner b/libstdc++-v3/ChangeLog.meissner
new file mode 100644
index ..b598e153c514
--- /dev/null
+++ b/libstdc++-v3/ChangeLog.meissner
@@ -0,0 +1,5 @@
+ Branch work192, baseline 
+
+2025-01-31   Michael Meissner  
+
+   Clone branch

[gcc r15-7311] c++: check_flexarray fixes [PR117516]

2025-01-31 Thread Jakub Jelinek via Gcc-cvs

https://gcc.gnu.org/g:6141fd55181d4a4005ef499694ad6dcaeba6249b

commit r15-7311-g6141fd55181d4a4005ef499694ad6dcaeba6249b
Author: Jakub Jelinek 
Date:   Sat Feb 1 00:48:21 2025 +0100

c++: check_flexarray fixes [PR117516]

On the pr117516.C testcase check_flexarrays and its helper functions
have exponential complexity, plus it reports the same bug over and over
again in some cases instead of reporting perhaps other bugs.
The functions want to diagnose flexible array member (and strangely [0]
arrays too) followed by some other non-empty or array members in the same
strcuture, or followed by other non-empty or array members in a containing
structure (any of them), or flexible array members/[0] arrays in structures
with no other non-empty members, or those nested in other structures.
Strangely it doesn't complain if flexible array member is in a structure
used in an array.

As can be seeen on e.g. the flexary41.C test, it keeps reporting the
same bug over and over:
flexary41.C:5:24: error: flexible array member ‘A::b’ not at end of ‘struct 
A’
flexary41.C:5:24: error: flexible array member ‘A::b’ not at end of ‘struct 
B’
flexary41.C:5:24: error: flexible array member ‘A::b’ not at end of ‘struct 
C’
flexary41.C:5:24: error: flexible array member ‘A::b’ not at end of ‘struct 
D’
flexary41.C:13:39: error: flexible array member ‘En’ 
not at end of ‘struct E’
flexary41.C:18:23: error: flexible array member ‘H::t’ not at end of 
‘struct K’
flexary41.C:25:36: note: next member ‘int K::ab’ declared here
flexary41.C:25:8: note: in the definition of ‘struct K’
The bug that A::b is followed by A::c is one bug reported 4 times, while it
doesn't report the other bugs, that B::e flexarray is followed by B::f
and that C::h flexarray is followed by C::i.
That is because it always walks all the structures/unions of all the members
and just finds the first flexarray in there.

Now, this has horrible complexity plus it doesn't seem really useful to
users.  So, for cases where a flexible array member is followed by a
non-empty other member in the same structure, the following patch just
reports it once when finalizing that structure, and otherwise just recurses
in structures solely into the last member, so that it can report cases like
struct X { int a; int b[]; };
struct Y { X c; int d; };
or
struct Z { X c; };
i.e. correct use of flexarray in X but following it by another member in Y
or just nesting it (the former is error, the latter pedwarn as before).
By only looking at the last member for structures we get rid of the 
complexity.

Note, the patch doesn't do anything about unions, I think we still could
spend a lot of time compiling.
struct S { char s; };
union U0 { S a, b; };
union U1 { union U0 a, b; };
union U2 { union U1 a, b; };
...
union U32 { union U31 a, b; };
struct T { union U32 a; int b; };
Not really sure what we could do about that, all the elements are "last"
(but admittedly I haven't studied in detail how the original code worked
in union, there is fmem->after[pun] where pun is whether it is somewhere
inside of a union).  Perhaps in a hash table marking unions which don't have
any flexarrays at the end, nested or not, so that we don't walk them again?
Plus if we find some with flexarray at the end, maybe there is no point
to look other union members?  In any case, I think that is less severe,
because people usually don't nest unions deeply.

2025-02-01  Jakub Jelinek  

PR c++/117516
* class.cc (field_nonempty_p): Formatting fixes.  Use
integer_zerop instead of tree_int_cst_equal with size_zero_node.
(struct flexmems_t): Change type of first member from tree to bool.
(find_flexarrays): Add nested_p argument.  Change pun argument type
from tree to bool, adjust uses.  Formatting fixes.  If BASE_P or
NESTED_P and T is RECORD_TYPE, start looking only at the last
non-empty or array FIELD_DECL.  Adjust recursive call, set first
if it was a nested call and found an array.
(diagnose_invalid_flexarray, diagnose_flexarrays, check_flexarrays):
Formatting fixes.

* g++.dg/ext/flexary9.C: Expect different wording of one of the
warnings and at a different line.
* g++.dg/ext/flexary19.C: Likewise.
* g++.dg/ext/flexary42.C: New test.
* g++.dg/other/pr117516.C: New test.

Diff:
---
 gcc/cp/class.cc   | 152 --
 gcc/testsuite/g++.dg/ext/flexary19.C  |   2 +-
 gcc/testsuite/g++.dg/ext/flexary42.C  |  26 ++
 gcc/testsuite/g++.dg/ext/flexary9.C   |   2 +-
 gcc/testsuite/g++.dg/other/pr117516.C |  21 +
 5 files changed, 141 inserti

[gcc r15-7312] icf: Compare call argument types in certain cases and asm operands [PR117432]

2025-01-31 Thread Jakub Jelinek via Gcc-cvs

https://gcc.gnu.org/g:ebd111a2896816e4f5ddf5108f361b3d9d287fa0

commit r15-7312-gebd111a2896816e4f5ddf5108f361b3d9d287fa0
Author: Jakub Jelinek 
Date:   Sat Feb 1 00:50:24 2025 +0100

icf: Compare call argument types in certain cases and asm operands 
[PR117432]

compare_operand uses operand_equal_p under the hood, which e.g. for
INTEGER_CSTs will just match the values rather regardless of their types.
Now, in many comparing the type is redundant, if we have
  x_2 = y_3 + 1;
we've already compared the type for the lhs and also for rhs1, there won't
be any surprises on rhs2.
As noted in the PR, there are cases where the type of the operand is the
sole place of information and we don't want to ICF merge functions if the
types differ.
One case is stdarg functions, arguments passed to ..., it is different
if we pass 1, 1L, 1LL.
Another case are the K&R unprototyped functions (sure, gone in C23).
And yet another case are inline asm operands, "r" (1) is different from "r"
(1L) from "r" (1LL).

So, the following patch determines based on lack of fntype (e.g. for
internal functions), or on !prototype_p, or on stdarg_p (in that case
using number of named arguments) which arguments need to have type checked
and does that, plus compares types on inline asm operands (maybe it would be
enough to do that just for input operands but we have just a routine to
handle both and I didn't feel we need to differentiate).

Furthermore, I've noticed fntype{1,2} isn't actually compared if it is a
direct call (gimple_call_fndecl is non-NULL).  That is wrong too, we could
have
  void (*fn) (int, long long) = (void (*) (int, long long)) foo;
  fn (1, 1LL);
in one case and
  void (*fn) (long long, int) = (void (*) (long long, int)) foo;
  fn (1LL, 1);
in another, both folded into a direct call of foo with different
gimple_call_fntype.  Sure, one of them would be UB at runtime (or both), but
what if we ICF merge it into something that into the one UB at runtime
and the program actually calls the correct one only?

2025-02-01  Jakub Jelinek  

PR ipa/117432
* ipa-icf-gimple.cc (func_checker::compare_asm_inputs_outputs):
Also return_false if operands have incompatible types.
(func_checker::compare_gimple_call): Check fntype1 vs. fntype2
compatibility for all non-internal calls and assume fntype1 and
fntype2 are non-NULL for those.  For calls to non-prototyped
calls or for stdarg_p functions after the last named argument (if 
any)
check type compatibility of call arguments.

* gcc.c-torture/execute/pr117432.c: New test.
* gcc.target/i386/pr117432.c: New test.

Diff:
---
 gcc/ipa-icf-gimple.cc  | 53 ---
 gcc/testsuite/gcc.c-torture/execute/pr117432.c | 71 ++
 gcc/testsuite/gcc.target/i386/pr117432.c   | 17 ++
 3 files changed, 123 insertions(+), 18 deletions(-)

diff --git a/gcc/ipa-icf-gimple.cc b/gcc/ipa-icf-gimple.cc
index 50b459c74b29..51ca8a3e1143 100644
--- a/gcc/ipa-icf-gimple.cc
+++ b/gcc/ipa-icf-gimple.cc
@@ -459,7 +459,9 @@ func_checker::compare_asm_inputs_outputs (tree t1, tree t2,
return false;
 
   if (!compare_operand (TREE_VALUE (t1), TREE_VALUE (t2),
-   get_operand_access_type (map, t1)))
+   get_operand_access_type (map, t1))
+ || !types_compatible_p (TREE_TYPE (TREE_VALUE (t1)),
+ TREE_TYPE (TREE_VALUE (t2
return return_false ();
 
   tree p1 = TREE_PURPOSE (t1);
@@ -709,26 +711,37 @@ func_checker::compare_gimple_call (gcall *s1, gcall *s2)
   || gimple_call_alloca_for_var_p (s1) != gimple_call_alloca_for_var_p 
(s2))
 return false;
 
-  if (gimple_call_internal_p (s1)
-  && gimple_call_internal_fn (s1) != gimple_call_internal_fn (s2))
-return false;
-
-  tree fntype1 = gimple_call_fntype (s1);
-  tree fntype2 = gimple_call_fntype (s2);
-
-  /* For direct calls we verify that types are compatible so if we matched
- callees, callers must match, too.  For indirect calls however verify
- function type.  */
-  if (!gimple_call_fndecl (s1))
+  unsigned check_arg_types_from = 0;
+  if (gimple_call_internal_p (s1))
 {
-  if ((fntype1 && !fntype2)
- || (!fntype1 && fntype2)
- || (fntype1 && !types_compatible_p (fntype1, fntype2)))
-   return return_false_with_msg ("call function types are not compatible");
+  if (gimple_call_internal_fn (s1) != gimple_call_internal_fn (s2))
+   return false;
 }
+  else
+{
+  tree fntype1 = gimple_call_fntype (s1);
+  tree fntype2 = gimple_call_fntype (s2);
+  if (!types_compatible_p (fntype1, fntype2))
+   return return_false_with_msg ("call function typ

[gcc r15-7310] libstdc++: Fix flat_foo::insert_range for non-common ranges [PR118156]

2025-01-31 Thread Patrick Palka via Libstdc++-cvs

https://gcc.gnu.org/g:a9172b107a24b244e0b71c2575dd6448d48b3ae3

commit r15-7310-ga9172b107a24b244e0b71c2575dd6448d48b3ae3
Author: Patrick Palka 
Date:   Fri Jan 31 15:53:12 2025 -0500

libstdc++: Fix flat_foo::insert_range for non-common ranges [PR118156]

This fixes flat_map/multimap::insert_range by just generalizing the
insert implementation to handle heterogenous iterator/sentinel pair.
I'm not sure we can do better than this, e.g. we can't implement it in
terms of the adapted containers' insert_range because that'd require two
passes over the range.

For flat_set/multiset, we can implement insert_range directly in terms
of the adapted container's insert_range.  A fallback implementation
is also provided if insert_range isn't available, as is the case for
std::deque currently.

PR libstdc++/118156

libstdc++-v3/ChangeLog:

* include/std/flat_map (_Flat_map_impl::_M_insert): Generalized
version of insert taking heterogenous iterator/sentinel pair.
(_Flat_map_impl::insert): Dispatch to _M_insert.
(_Flat_map_impl::insert_range): Likewise.
(flat_map): Export _Flat_map_impl::insert_range.
(flat_multimap): Likewise.
* include/std/flat_set (_Flat_set_impl::insert_range):
Reimplement directly, not in terms of insert.
(flat_set): Export _Flat_set_impl::insert_range.
(flat_multiset): Likewise.
* testsuite/23_containers/flat_map/1.cc (test06): New test.
* testsuite/23_containers/flat_multimap/1.cc (test06): New test.
* testsuite/23_containers/flat_multiset/1.cc (test06): New test.
* testsuite/23_containers/flat_set/1.cc (test06): New test.

Reviewed-by: Jonathan Wakely 

Diff:
---
 libstdc++-v3/include/std/flat_map  | 17 ++
 libstdc++-v3/include/std/flat_set  | 27 +++---
 libstdc++-v3/testsuite/23_containers/flat_map/1.cc | 17 ++
 .../testsuite/23_containers/flat_multimap/1.cc | 16 +
 .../testsuite/23_containers/flat_multiset/1.cc | 16 +
 libstdc++-v3/testsuite/23_containers/flat_set/1.cc | 16 +
 6 files changed, 101 insertions(+), 8 deletions(-)

diff --git a/libstdc++-v3/include/std/flat_map 
b/libstdc++-v3/include/std/flat_map
index 1ecc2e7f6e7e..405caa8a81bf 100644
--- a/libstdc++-v3/include/std/flat_map
+++ b/libstdc++-v3/include/std/flat_map
@@ -538,9 +538,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
insert(const_iterator __position, _Arg&& __x)
{ return emplace_hint(__position, std::forward<_Arg>(__x)); }
 
-  template<__has_input_iter_cat _InputIterator>
+private:
+  template
void
-   insert(_InputIterator __first, _InputIterator __last)
+   _M_insert(_Iter __first, _Sent __last)
{
  // FIXME: This implementation fails its complexity requirements.
  // We can't idiomatically implement an efficient version (as in the
@@ -574,6 +575,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 #endif
}
 
+public:
+  template<__has_input_iter_cat _InputIterator>
+   void
+   insert(_InputIterator __first, _InputIterator __last)
+   { _M_insert(std::move(__first), std::move(__last)); }
+
   template<__has_input_iter_cat _InputIterator>
void
insert(__sorted_t, _InputIterator __first, _InputIterator __last)
@@ -585,7 +592,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   template<__detail::__container_compatible_range _Rg>
void
insert_range(_Rg&& __rg)
-   { insert(ranges::begin(__rg), ranges::end(__rg)); }
+   { _M_insert(ranges::begin(__rg), ranges::end(__rg)); }
 
   void
   insert(initializer_list __il)
@@ -1181,7 +1188,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   using _Impl::emplace;
   using _Impl::emplace_hint;
   using _Impl::insert;
-  // using _Impl::insert_range;
+  using _Impl::insert_range;
   using _Impl::extract;
   using _Impl::replace;
   using _Impl::erase;
@@ -1460,7 +1467,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   using _Impl::emplace;
   using _Impl::emplace_hint;
   using _Impl::insert;
-  // using _Impl::insert_range;
+  using _Impl::insert_range;
   using _Impl::extract;
   using _Impl::replace;
   using _Impl::erase;
diff --git a/libstdc++-v3/include/std/flat_set 
b/libstdc++-v3/include/std/flat_set
index 3e1347a6a0ae..9240f2b9a2eb 100644
--- a/libstdc++-v3/include/std/flat_set
+++ b/libstdc++-v3/include/std/flat_set
@@ -475,7 +475,28 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   template<__detail::__container_compatible_range _Rg>
void
insert_range(_Rg&& __rg)
-   { insert(ranges::begin(__rg), ranges::end(__rg)); }
+   {
+ auto __guard = _M_make_clear_guard();
+ typename container_type::iterator __it;
+ if con

[gcc r15-7309] libstdc++: Fix return value of vector::insert_range

2025-01-31 Thread Patrick Palka via Libstdc++-cvs

https://gcc.gnu.org/g:ee797739606ce9b8cf6ebb0236977861e49aa0d1

commit r15-7309-gee797739606ce9b8cf6ebb0236977861e49aa0d1
Author: Patrick Palka 
Date:   Fri Jan 31 15:53:10 2025 -0500

libstdc++: Fix return value of vector::insert_range

In some cases we're wrongly returning an iterator to (one past) the last
element inserted instead of to the first element inserted.

libstdc++-v3/ChangeLog:

* include/bits/stl_bvector.h (vector::insert_range):
Consistently return an iterator pointing to the first element
inserted.
* include/bits/vector.tcc (vector::insert_range): Likewise.
* 
testsuite/23_containers/vector/bool/modifiers/insert/insert_range.cc:
Verify insert_range return values.
* testsuite/23_containers/vector/modifiers/insert/insert_range.cc:
Likewise.

Reviewed-by: Jonathan Wakely 

Diff:
---
 libstdc++-v3/include/bits/stl_bvector.h|  8 
 libstdc++-v3/include/bits/vector.tcc   |  3 ++-
 .../vector/bool/modifiers/insert/insert_range.cc   | 18 --
 .../vector/modifiers/insert/insert_range.cc| 18 --
 4 files changed, 30 insertions(+), 17 deletions(-)

diff --git a/libstdc++-v3/include/bits/stl_bvector.h 
b/libstdc++-v3/include/bits/stl_bvector.h
index 961e4a252996..2292eec54ad7 100644
--- a/libstdc++-v3/include/bits/stl_bvector.h
+++ b/libstdc++-v3/include/bits/stl_bvector.h
@@ -1341,9 +1341,9 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER
  std::copy_backward(__pos._M_const_cast(), end(),
 this->_M_impl._M_finish
   + difference_type(__n));
- auto __i = ranges::copy(__rg, __pos._M_const_cast()).out;
+ ranges::copy(__rg, __pos._M_const_cast());
  this->_M_impl._M_finish += difference_type(__n);
- return __i;
+ return __pos._M_const_cast();
}
  else
{
@@ -1355,9 +1355,9 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER
  iterator __i = _M_copy_aligned(__begin,
 __pos._M_const_cast(),
 __start);
- __i = ranges::copy(__rg, __i).out;
+ iterator __j = ranges::copy(__rg, __i).out;
  iterator __finish = std::copy(__pos._M_const_cast(),
-   __end, __i);
+   __end, __j);
  this->_M_deallocate();
  this->_M_impl._M_end_of_storage = __q + _S_nword(__len);
  this->_M_impl._M_start = __start;
diff --git a/libstdc++-v3/include/bits/vector.tcc 
b/libstdc++-v3/include/bits/vector.tcc
index 4f4c366080be..acb2f5fca1e7 100644
--- a/libstdc++-v3/include/bits/vector.tcc
+++ b/libstdc++-v3/include/bits/vector.tcc
@@ -984,8 +984,9 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER
   {
if (__pos == cend())
  {
+   const auto __ins_idx = size();
append_range(std::forward<_Rg>(__rg));
-   return end();
+   return begin() + __ins_idx;
  }
 
if constexpr (ranges::forward_range<_Rg>)
diff --git 
a/libstdc++-v3/testsuite/23_containers/vector/bool/modifiers/insert/insert_range.cc
 
b/libstdc++-v3/testsuite/23_containers/vector/bool/modifiers/insert/insert_range.cc
index 4f4835746ea4..5c65610667d5 100644
--- 
a/libstdc++-v3/testsuite/23_containers/vector/bool/modifiers/insert/insert_range.cc
+++ 
b/libstdc++-v3/testsuite/23_containers/vector/bool/modifiers/insert/insert_range.cc
@@ -35,16 +35,22 @@ do_test()
   VERIFY( eq(v, a) );
   v.clear();
   v.shrink_to_fit();
-  v.insert_range(v.begin(), Range(a, a+3));
-  v.insert_range(v.end(), Range(a+6, a+9));
-  v.insert_range(v.begin()+3, Range(a+3, a+6));
+  auto it = v.insert_range(v.begin(), Range(a, a+3));
+  VERIFY( it == v.begin() );
+  it = v.insert_range(v.end(), Range(a+6, a+9));
+  VERIFY( it == v.begin()+3 );
+  it = v.insert_range(v.begin()+3, Range(a+3, a+6));
+  VERIFY( it == v.begin()+3 );
   VERIFY( eq(v, a) );
   v.resize(3);
-  v.insert_range(v.begin()+1, Range(a+4, a+9));
-  v.insert_range(v.begin()+1, Range(a+1, a+4));
+  it = v.insert_range(v.begin()+1, Range(a+4, a+9));
+  VERIFY( it == v.begin()+1 );
+  it = v.insert_range(v.begin()+1, Range(a+1, a+4));
+  VERIFY( it == v.begin()+1 );
   v.resize(9);
   VERIFY( eq(v, a) );
-  v.insert_range(v.begin(), Range(a, a));
+  it = v.insert_range(v.begin(), Range(a, a));
+  VERIFY( it == v.begin() );
   VERIFY( eq(v, a) );
 }
 
diff --git 
a/libstdc++-v3/testsuite/23_containers/vector/modifiers/insert/insert_range.cc 
b/libstdc++-v3/testsuite/23_containers/vector/modifiers/insert/insert_ra

[gcc r14-11264] Fortran: fix bogus diagnostics on renamed interface import [PR110993]

2025-01-31 Thread Harald Anlauf via Gcc-cvs

https://gcc.gnu.org/g:63ea47f594c8aedaa0527ca73466aaa3b4f870bd

commit r14-11264-g63ea47f594c8aedaa0527ca73466aaa3b4f870bd
Author: Harald Anlauf 
Date:   Sun Jan 26 22:56:57 2025 +0100

Fortran: fix bogus diagnostics on renamed interface import [PR110993]

PR fortran/110993

gcc/fortran/ChangeLog:

* frontend-passes.cc (check_externals_procedure): Do not compare
interfaces of a non-bind(C) procedure against a bind(C) global one.
(check_against_globals): Use local name from rename-on-use in the
search for interfaces.

gcc/testsuite/ChangeLog:

* gfortran.dg/use_rename_14.f90: New test.

(cherry picked from commit 9104472b645f76a212af9f9c58378500f9ba937e)

Diff:
---
 gcc/fortran/frontend-passes.cc  |  7 +
 gcc/testsuite/gfortran.dg/use_rename_14.f90 | 46 +
 2 files changed, 53 insertions(+)

diff --git a/gcc/fortran/frontend-passes.cc b/gcc/fortran/frontend-passes.cc
index 3c06018fdbbf..ed8f71a8623f 100644
--- a/gcc/fortran/frontend-passes.cc
+++ b/gcc/fortran/frontend-passes.cc
@@ -5741,6 +5741,9 @@ check_externals_procedure (gfc_symbol *sym, locus *loc,
   if (gsym->ns)
 gfc_find_symbol (sym->name, gsym->ns, 0, &def_sym);
 
+  if (gsym->bind_c && def_sym && def_sym->binding_label == NULL)
+return 0;
+
   if (def_sym)
 {
   gfc_compare_actual_formal (&actual, def_sym->formal, 0, 0, 0, loc);
@@ -5837,6 +5840,10 @@ check_against_globals (gfc_symbol *sym)
 
   if (sym->binding_label)
 sym_name = sym->binding_label;
+  else if (sym->attr.use_rename
+  && sym->ns->use_stmts->rename
+  && sym->ns->use_stmts->rename->local_name[0] != '\0')
+sym_name = sym->ns->use_stmts->rename->local_name;
   else
 sym_name = sym->name;
 
diff --git a/gcc/testsuite/gfortran.dg/use_rename_14.f90 
b/gcc/testsuite/gfortran.dg/use_rename_14.f90
new file mode 100644
index ..03815a5f229e
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/use_rename_14.f90
@@ -0,0 +1,46 @@
+! { dg-do compile }
+!
+! PR fortran/110993 - bogus diagnostics on renamed interface import
+!
+! Contributed by Rimvydas Jasinskas 
+
+module m
+  interface
+subroutine bar(x)
+  use iso_c_binding, only : c_float
+  implicit none
+  real(c_float) :: x(45)
+end subroutine
+  end interface
+end
+
+module m1
+  interface
+subroutine bar1(x) bind(c)
+  use iso_c_binding, only : c_float
+  implicit none
+  real(c_float) :: x(45)
+end subroutine
+  end interface
+end
+
+module m2
+  interface
+subroutine bar2(x) bind(c, name="bar2_")
+  use iso_c_binding, only : c_float
+  implicit none
+  real(c_float) :: x(45)
+end subroutine
+  end interface
+end
+
+subroutine foo(y)
+  use m,  notthisone => bar
+  use m1, northisone => bar1
+  use m2,  orthisone => bar2
+  implicit none
+  real :: y(3)
+  call bar (y)
+  call bar1(y)
+  call bar2(y)
+end subroutine

[gcc] Created branch 'meissner/heads/work192-libs' in namespace 'refs/users'

2025-01-31 Thread Michael Meissner via Gcc-cvs

The branch 'meissner/heads/work192-libs' was created in namespace 'refs/users' 
pointing to:

 333c087a6185... Add ChangeLog.meissner and REVISION.

[gcc(refs/users/meissner/heads/work192-dmf)] Add ChangeLog.dmf and update REVISION.

2025-01-31 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:c0482b2f27c91779096ade3384d9a4d181f2bfc6

commit c0482b2f27c91779096ade3384d9a4d181f2bfc6
Author: Michael Meissner 
Date:   Fri Jan 31 17:37:51 2025 -0500

Add ChangeLog.dmf and update REVISION.

2025-01-31  Michael Meissner  

gcc/

* ChangeLog.dmf: New file for branch.
* REVISION: Update.

Diff:
---
 gcc/ChangeLog.dmf | 5 +
 gcc/REVISION  | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog.dmf b/gcc/ChangeLog.dmf
new file mode 100644
index ..8e54036bacfc
--- /dev/null
+++ b/gcc/ChangeLog.dmf
@@ -0,0 +1,5 @@
+ Branch work192-dmf, baseline 
+
+2025-01-31   Michael Meissner  
+
+   Clone branch
diff --git a/gcc/REVISION b/gcc/REVISION
index 6126a6968da2..5189ba6393e2 100644
--- a/gcc/REVISION
+++ b/gcc/REVISION
@@ -1 +1 @@
-work192 branch
+work192-dmf branch

[gcc(refs/users/meissner/heads/work192-bugs)] Add ChangeLog.bugs and update REVISION.

2025-01-31 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:e11b83e7ccc8b5ed3ecdd838b8871fc8e1177050

commit e11b83e7ccc8b5ed3ecdd838b8871fc8e1177050
Author: Michael Meissner 
Date:   Fri Jan 31 17:39:44 2025 -0500

Add ChangeLog.bugs and update REVISION.

2025-01-31  Michael Meissner  

gcc/

* ChangeLog.bugs: New file for branch.
* REVISION: Update.

Diff:
---
 gcc/ChangeLog.bugs | 5 +
 gcc/REVISION   | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog.bugs b/gcc/ChangeLog.bugs
new file mode 100644
index ..fa480c0b3325
--- /dev/null
+++ b/gcc/ChangeLog.bugs
@@ -0,0 +1,5 @@
+ Branch work192-bugs, baseline 
+
+2025-01-31   Michael Meissner  
+
+   Clone branch
diff --git a/gcc/REVISION b/gcc/REVISION
index 6126a6968da2..76b5abe64278 100644
--- a/gcc/REVISION
+++ b/gcc/REVISION
@@ -1 +1 @@
-work192 branch
+work192-bugs branch

[gcc(refs/users/meissner/heads/work192-test)] Add ChangeLog.test and update REVISION.

2025-01-31 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:a6110a7d2c383de3dcb7a2b85699bd8b417addfe

commit a6110a7d2c383de3dcb7a2b85699bd8b417addfe
Author: Michael Meissner 
Date:   Fri Jan 31 17:42:21 2025 -0500

Add ChangeLog.test and update REVISION.

2025-01-31  Michael Meissner  

gcc/

* ChangeLog.test: New file for branch.
* REVISION: Update.

Diff:
---
 gcc/ChangeLog.test | 5 +
 gcc/REVISION   | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog.test b/gcc/ChangeLog.test
new file mode 100644
index ..a8d24725a1e5
--- /dev/null
+++ b/gcc/ChangeLog.test
@@ -0,0 +1,5 @@
+ Branch work192-test, baseline 
+
+2025-01-31   Michael Meissner  
+
+   Clone branch
diff --git a/gcc/REVISION b/gcc/REVISION
index 6126a6968da2..74e20ba666c2 100644
--- a/gcc/REVISION
+++ b/gcc/REVISION
@@ -1 +1 @@
-work192 branch
+work192-test branch

[gcc] Created branch 'meissner/heads/work192-orig' in namespace 'refs/users'

2025-01-31 Thread Michael Meissner via Gcc-cvs

The branch 'meissner/heads/work192-orig' was created in namespace 'refs/users' 
pointing to:

 a9172b107a24... libstdc++: Fix flat_foo::insert_range for non-common ranges

[gcc] Created branch 'meissner/heads/work192-bugs' in namespace 'refs/users'

2025-01-31 Thread Michael Meissner via Gcc-cvs

The branch 'meissner/heads/work192-bugs' was created in namespace 'refs/users' 
pointing to:

 333c087a6185... Add ChangeLog.meissner and REVISION.

[gcc] Created branch 'meissner/heads/work192-dmf' in namespace 'refs/users'

2025-01-31 Thread Michael Meissner via Gcc-cvs

The branch 'meissner/heads/work192-dmf' was created in namespace 'refs/users' 
pointing to:

 333c087a6185... Add ChangeLog.meissner and REVISION.

[gcc(refs/users/meissner/heads/work192-libs)] Add ChangeLog.libs and update REVISION.

2025-01-31 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:df70209ac4def71d665f5c551f08fa43e5124ba0

commit df70209ac4def71d665f5c551f08fa43e5124ba0
Author: Michael Meissner 
Date:   Fri Jan 31 17:40:39 2025 -0500

Add ChangeLog.libs and update REVISION.

2025-01-31  Michael Meissner  

gcc/

* ChangeLog.libs: New file for branch.
* REVISION: Update.

Diff:
---
 gcc/ChangeLog.libs | 5 +
 gcc/REVISION   | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog.libs b/gcc/ChangeLog.libs
new file mode 100644
index ..11fa38d3c6a6
--- /dev/null
+++ b/gcc/ChangeLog.libs
@@ -0,0 +1,5 @@
+ Branch work192-libs, baseline 
+
+2025-01-31   Michael Meissner  
+
+   Clone branch
diff --git a/gcc/REVISION b/gcc/REVISION
index 6126a6968da2..922a6d91048c 100644
--- a/gcc/REVISION
+++ b/gcc/REVISION
@@ -1 +1 @@
-work192 branch
+work192-libs branch

[gcc] Created branch 'meissner/heads/work192-sha' in namespace 'refs/users'

2025-01-31 Thread Michael Meissner via Gcc-cvs

The branch 'meissner/heads/work192-sha' was created in namespace 'refs/users' 
pointing to:

 333c087a6185... Add ChangeLog.meissner and REVISION.

[gcc] Created branch 'meissner/heads/work192-math' in namespace 'refs/users'

2025-01-31 Thread Michael Meissner via Gcc-cvs

The branch 'meissner/heads/work192-math' was created in namespace 'refs/users' 
pointing to:

 333c087a6185... Add ChangeLog.meissner and REVISION.

[gcc(refs/users/meissner/heads/work192-vpair)] Add ChangeLog.vpair and update REVISION.

2025-01-31 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:1aed6858e482651b82832074434210637c44bc7c

commit 1aed6858e482651b82832074434210637c44bc7c
Author: Michael Meissner 
Date:   Fri Jan 31 17:38:45 2025 -0500

Add ChangeLog.vpair and update REVISION.

2025-01-31  Michael Meissner  

gcc/

* ChangeLog.vpair: New file for branch.
* REVISION: Update.

Diff:
---
 gcc/ChangeLog.vpair | 5 +
 gcc/REVISION| 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog.vpair b/gcc/ChangeLog.vpair
new file mode 100644
index ..e63eb38880eb
--- /dev/null
+++ b/gcc/ChangeLog.vpair
@@ -0,0 +1,5 @@
+ Branch work192-vpair, baseline 
+
+2025-01-31   Michael Meissner  
+
+   Clone branch
diff --git a/gcc/REVISION b/gcc/REVISION
index 6126a6968da2..0bc6a3c906da 100644
--- a/gcc/REVISION
+++ b/gcc/REVISION
@@ -1 +1 @@
-work192 branch
+work192-vpair branch

[gcc(refs/users/meissner/heads/work192-math)] Add ChangeLog.math and update REVISION.

2025-01-31 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:9923c123baea61fd8ddabf6ad037d4baa5ced4ae

commit 9923c123baea61fd8ddabf6ad037d4baa5ced4ae
Author: Michael Meissner 
Date:   Fri Jan 31 17:43:25 2025 -0500

Add ChangeLog.math and update REVISION.

2025-01-31  Michael Meissner  

gcc/

* ChangeLog.math: New file for branch.
* REVISION: Update.

Diff:
---
 gcc/ChangeLog.math | 5 +
 gcc/REVISION   | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog.math b/gcc/ChangeLog.math
new file mode 100644
index ..a707e6f0391d
--- /dev/null
+++ b/gcc/ChangeLog.math
@@ -0,0 +1,5 @@
+ Branch work192-math, baseline 
+
+2025-01-31   Michael Meissner  
+
+   Clone branch
diff --git a/gcc/REVISION b/gcc/REVISION
index 6126a6968da2..ffe46b08b0bb 100644
--- a/gcc/REVISION
+++ b/gcc/REVISION
@@ -1 +1 @@
-work192 branch
+work192-math branch

[gcc] Created branch 'meissner/heads/work192-test' in namespace 'refs/users'

2025-01-31 Thread Michael Meissner via Gcc-cvs

The branch 'meissner/heads/work192-test' was created in namespace 'refs/users' 
pointing to:

 333c087a6185... Add ChangeLog.meissner and REVISION.

[gcc] Created branch 'meissner/heads/work192-vpair' in namespace 'refs/users'

2025-01-31 Thread Michael Meissner via Gcc-cvs

The branch 'meissner/heads/work192-vpair' was created in namespace 'refs/users' 
pointing to:

 333c087a6185... Add ChangeLog.meissner and REVISION.

[gcc(refs/users/meissner/heads/work192-sha)] Add ChangeLog.sha and update REVISION.

2025-01-31 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:db096f3ecc1815e5758cfd0cab5a9e3045403373

commit db096f3ecc1815e5758cfd0cab5a9e3045403373
Author: Michael Meissner 
Date:   Fri Jan 31 17:41:30 2025 -0500

Add ChangeLog.sha and update REVISION.

2025-01-31  Michael Meissner  

gcc/

* ChangeLog.sha: New file for branch.
* REVISION: Update.

Diff:
---
 gcc/ChangeLog.sha | 5 +
 gcc/REVISION  | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog.sha b/gcc/ChangeLog.sha
new file mode 100644
index ..4bc792425d41
--- /dev/null
+++ b/gcc/ChangeLog.sha
@@ -0,0 +1,5 @@
+ Branch work192-sha, baseline 
+
+2025-01-31   Michael Meissner  
+
+   Clone branch
diff --git a/gcc/REVISION b/gcc/REVISION
index 6126a6968da2..e43b04109ca7 100644
--- a/gcc/REVISION
+++ b/gcc/REVISION
@@ -1 +1 @@
-work192 branch
+work192-sha branch

[gcc(refs/users/meissner/heads/work192-orig)] Add REVISION.

2025-01-31 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:878e711eb227cdb77b99ef298c1df3e00ddea421

commit 878e711eb227cdb77b99ef298c1df3e00ddea421
Author: Michael Meissner 
Date:   Fri Jan 31 17:45:01 2025 -0500

Add REVISION.

2025-01-31  Michael Meissner  

gcc/

* REVISION: New file for branch.

Diff:
---
 gcc/REVISION | 1 +
 1 file changed, 1 insertion(+)

diff --git a/gcc/REVISION b/gcc/REVISION
new file mode 100644
index ..93b659e699ef
--- /dev/null
+++ b/gcc/REVISION
@@ -0,0 +1 @@
+work192-orig branch

[gcc(refs/users/meissner/heads/work192-dmf)] Update ChangeLog.*

2025-01-31 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:daf7ed58417a669551524468ccf29bd06830df96

commit daf7ed58417a669551524468ccf29bd06830df96
Author: Michael Meissner 
Date:   Fri Jan 31 23:37:38 2025 -0500

Update ChangeLog.*

Diff:
---
 gcc/ChangeLog.dmf | 329 ++
 1 file changed, 329 insertions(+)

diff --git a/gcc/ChangeLog.dmf b/gcc/ChangeLog.dmf
index 8e54036bacfc..65b5dede589d 100644
--- a/gcc/ChangeLog.dmf
+++ b/gcc/ChangeLog.dmf
@@ -1,5 +1,334 @@
+ Branch work192-dmf, patch #121 was reverted 

+ Branch work192-dmf, patch #120 was reverted 

+
+ Branch work192-dmf, patch #111 
+
+RFC2655-Add saturating subtract built-ins.
+
+This patch adds support for a saturating subtract built-in function that may be
+added to a future PowerPC processor.  Note, if it is added, the name of the
+built-in function may change before GCC 13 is released.  If the name changes,
+we will submit a patch changing the name.
+
+I also added support for providing dense math built-in functions, even though
+at present, we have not added any new built-in functions for dense math.  It is
+likely we will want to add new dense math built-in functions as the dense math
+support is fleshed out.
+
+The patches have been tested on both little and big endian systems.  Can I 
check
+it into the master branch?
+
+2025-01-31   Michael Meissner  
+
+gcc/
+
+   * config/rs6000/rs6000-builtin.cc (rs6000_invalid_builtin): Add support
+   for flagging invalid use of future built-in functions.
+   (rs6000_builtin_is_supported): Add support for future built-in
+   functions.
+   * config/rs6000/rs6000-builtins.def (__builtin_saturate_subtract32): New
+   built-in function for -mcpu=future.
+   (__builtin_saturate_subtract64): Likewise.
+   * config/rs6000/rs6000-gen-builtins.cc (enum bif_stanza): Add stanzas
+   for -mcpu=future built-ins.
+   (stanza_map): Likewise.
+   (enable_string): Likewise.
+   (struct attrinfo): Likewise.
+   (parse_bif_attrs): Likewise.
+   (write_decls): Likewise.
+   * config/rs6000/rs6000.md (sat_sub3): Add saturating subtract
+   built-in insn declarations.
+   (sat_sub3_dot): Likewise.
+   (sat_sub3_dot2): Likewise.
+   * doc/extend.texi (Future PowerPC built-ins): New section.
+
+gcc/testsuite/
+
+   * gcc.target/powerpc/subfus-1.c: New test.
+   * gcc.target/powerpc/subfus-2.c: Likewise.
+
+ Branch work192-dmf, patch #110 
+
+RFC2656-Support load/store vector with right length.
+
+This patch adds support for new instructions that may be added to the PowerPC
+architecture in the future to enhance the load and store vector with length
+instructions.
+
+The current instructions (lxvl, lxvll, stxvl, and stxvll) are inconvient to use
+since the count for the number of bytes must be in the top 8 bits of the GPR
+register, instead of the bottom 8 bits.  This meant that code generating these
+instructions typically had to do a shift left by 56 bits to get the count into
+the right position.  In a future version of the PowerPC architecture, new
+variants of these instructions might be added that expect the count to be in
+the bottom 8 bits of the GPR register.  These patches add this support to GCC
+if the user uses the -mcpu=future option.
+
+I discovered that the code in rs6000-string.cc to generate ISA 3.1 lxvl/stxvl
+future lxvll/stxvll instructions would generate these instructions on 32-bit.
+However the patterns for these instructions is only done on 64-bit systems.  So
+I added a check for 64-bit support before generating the instructions.
+
+The patches have been tested on both little and big endian systems.  Can I 
check
+it into the master branch?
+
+2025-01-31   Michael Meissner  
+
+gcc/
+
+   * config/rs6000/rs6000-string.cc (expand_block_move): Do not generate
+   lxvl and stxvl on 32-bit.
+   * config/rs6000/vsx.md (lxvl): If -mcpu=future, generate the lxvl with
+   the shift count automaticaly used in the insn.
+   (lxvrl): New insn for -mcpu=future.
+   (lxvrll): Likewise.
+   (stxvl): If -mcpu=future, generate the stxvl with the shift count
+   automaticaly used in the insn.
+   (stxvrl): New insn for -mcpu=future.
+   (stxvrll): Likewise.
+
+gcc/testsuite/
+
+   * gcc.target/powerpc/lxvrl.c: New test.
+   * lib/target-supports.exp (check_effective_target_powerpc_future_ok):
+   New effective target.
+
+ Branch work192-dmf, patch #102 
+
+RFC2653-PowerPC: Add support for 1,024 bit DMR registers.
+
+This patch is a prelimianry patch to add the full 1,024 bit dense math register
+(DMRs) for -mcpu=future.  The MMA 512-bit accumulators map onto the top of the
+DMR register.
+
+This patch only adds the new 1,024 bit register support.  It does not add
+su

[gcc(refs/users/meissner/heads/work192-sha)] PR target/117251: Add PowerPC XXEVAL support to speed up SHA3 calculations

2025-01-31 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:3caa2a1aa45e83f92248c419ed1b1e890b52c6ee

commit 3caa2a1aa45e83f92248c419ed1b1e890b52c6ee
Author: Michael Meissner 
Date:   Fri Jan 31 23:39:06 2025 -0500

PR target/117251: Add PowerPC XXEVAL support to speed up SHA3 calculations

The multibuff.c benchmark attached to the PR target/117251 compiled for 
Power10
PowerPC that implement SHA3 has a slowdown in the current trunk and GCC 14
compared to GCC 11 - GCC 13, due to excessive amounts of spilling.

The main function for the multibuf.c file has 3,747 lines, all of which are
using vector unsigned long long.  There are 696 vector rotates (all rotates 
are
constant), 1,824 vector xor's and 600 vector andc's.

In looking at it, the main thing that steps out is the reason for either
spilling or moving variables is the support in fusion.md (generated by
genfusion.pl) that tries to fuse the vec_andc feeding into vec_xor, and 
other
vec_xor's feeding into vec_xor.

On the powerpc for power10, there is a special fusion mode that happens if 
the
machine has a VANDC or VXOR instruction that is adjacent to a VXOR 
instruction
and the VANDC/VXOR feeds into the 2nd VXOR instruction.

While the Power10 has 64 vector registers (which uses the XXL prefix to do
logical operations), the fusion only works with the older Altivec 
instruction
set (which uses the V prefix).  The Altivec instruction only has 32 vector
registers (which are overlaid over the VSX vector registers 32-63).

By having the combiner patterns fuse_vandc_vxor and fuse_vxor_vxor to do 
this
fusion, it means that the register allocator has more register pressure for 
the
traditional Altivec registers instead of the VSX registers.

In addition, since there are vector rotates, these rotates only work on the
traditional Altivec registers, which adds to the Altivec register pressure.

Finally in addition to doing the explicit xor, andc, and rotates using the
Altivec registers, we have to also load vector constants for the rotate 
amount
and these registers also are allocated as Altivec registers.

Current trunk and GCC 12-14 have more vector spills than GCC 11, but GCC 11 
has
many more vector moves that the later compilers.  Thus even though it has 
way
less spills, the vector moves are why GCC 11 have the slowest results.

There is an instruction that was added in power10 (XXEVAL) that does provide
fusion between VSX vectors that includes ANDC->XOR and XOR->XOR fusion.

The latency of XXEVAL is slightly more than the fused VANDC/VXOR or 
VXOR/VXOR,
so I have written the patch to prefer doing the Altivec instructions if they
don't need a temporary register.

Here are the results for adding support for XXEVAL for the multibuff.c
benchmark attached to the PR.  Note that we essentially recover the speed 
with
this patch that were lost with GCC 14 and the current trunk:

  XXEVALTrunk   GCC14   GCC13   GCC12
GCC11
  ---   -   -   -
-
Benchmark time in seconds   5.53 6.156.265.575.61 
9.56

Fuse VANDC -> VXOR   209 600  600 600 600  
600
Fuse VXOR -> VXOR  0 240  240 120 120  
120
XXEVAL to fuse ANDC -> XOR   391   00   0   0   
 0
XXEVAL to fuse XOR -> XOR240   00   0   0   
 0

Spill vector to stack 78 364  364 172 184  
110
Load spilled vector from stack   431 962  962 713 723  
166
Vector moves  10 100  100  70  72
3,055

Vector rotate right  696 696  696 696 696  
696
XXLANDC or VANDC 209 600  600 600 600  
600
XXLXOR or VXOR   953   1,8241,824   1,824   1,824
1,825
XXEVAL   631   00   0   0   
 0

Load vector rotate constants  24  24   24  24  24   
24

Here are the results for adding support for XXEVAL for the singlebuff.c
benchmark attached to the PR.  Note that adding XXEVAL greatly speeds up 
this
particular benchmark:

  XXEVALTrunk   GCC14   GCC13   GCC12
GCC11
  ---   -   -   -
-
Benchmark time in seconds   4.46 5.405.405.355.36 
7.54

Fuse VANDC -> VXOR   210  600 600 600 600  
600
Fuse VXOR -> VXOR  0  240 240 120 120  
120
XXEVAL to fuse ANDC -> XOR   3900   0

[gcc(refs/users/meissner/heads/work192-vpair)] Vector pair support.

2025-01-31 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:488b0985d7d3f9079edd2e98befb894a581a31a5

commit 488b0985d7d3f9079edd2e98befb894a581a31a5
Author: Michael Meissner 
Date:   Fri Jan 31 23:43:08 2025 -0500

Vector pair support.

This patch adds a new include file (vector-pair.h) that adds support so that
users writing high performance libraries can change their code to allow the
generation of the vector pair load and store instructions on power10.

The intention is that if the library authors need to write special loops 
that
go over arrays that they could modify their code to use the functions 
provided
to change loops that can take advantage of the higher bandwidth for load 
vector
pair and store instructions.

This particular patch just adds a new include file (vector-pair.h) that
provides a bunch of functions that on a power10 system would use the vector
pair load operation, 2 floating point operations, and a vector pair store.  
It
does not add any new types, modes, or built-in function.

I have additional patches that can add built-in functions that the 
functions in
vector-pair.h could utilize so that the compiler can optimize and combine
operations.  I may submit those patches in the future, but I would like to
provide this patch to allow the library writer to optimize their code.

I've measured the performance of these new functions on a power10.  For 
default
unrolling, the percentage of change for the 3 methods over the normal vector
loop method:

116%Vector-pair.h function, default unroll
 93%Vector pair split built-in & 2 vector stores, default unroll
 86%Vector pair split & combine built-ins, default unroll

Using explicit 2 way unrolling the numbers are:

114%Vector-pair.h function, unroll 2
106%Vector pair split built-in & 2 vector stores, unroll 2
 98%Vector pair split & combine built-ins, unroll 2

These new functions provided in vector-pair.h use the vector pair load/store
instructions, and don't generate extra vector moves.  Using the existing
vector pair disassemble and assemble built-ins generate extra vector moves
which can hinder performance.

If I compile the loop code for power9, there is a minor speed up for default
unrolling and more of an improvement using the framework provided in the
vector-pair.h for explicit unrolling by 2:

101%Vector-pair.h function, default unroll for power9
107%Vector-pair.h function, unroll 2 for power9

Of course this is a synthetic benchmark run on a quiet power10 system.  
Results
would vary for real code on real systems.  However, I feel adding these
functions can allow the writers of high performance libraries to better
optimize their code.

As an example, if the library wants to code a simple fused multiply-add 
loop,
they might write the code as follows:

#include 
#include 
#include 

void
fma_vector (double * __restrict__ r,
const double * __restrict__ a,
const double * __restrict__ b,
size_t n)
{
  vector double * __restrict__ vr = (vector double * __restrict__)r;
  const vector double * __restrict__ va = (const vector double * 
__restrict__)a;
  const vector double * __restrict__ vb = (const vector double * 
__restrict__)b;
  size_t num_elements = sizeof (vector double) / sizeof (double);
  size_t nv = n / num_elements;
  size_t i;

  for (i = 0; i < nv; i++)
vr[i] = __builtin_vsx_xvmadddp (va[i], vb[i], vr[i]);

  for (i = nv * num_elements; i < n; i++)
r[i] = fma (a[i], b[i], r[i]);
}

The inner loop would look like:

.L3:
lxvx 0,3,9
lxvx 12,4,9
addi 10,9,16
addi 2,2,-2
lxvx 11,5,9
xvmaddadp 0,12,11
lxvx 12,4,10
lxvx 11,5,10
stxvx 0,3,9
lxvx 0,3,10
addi 9,9,32
xvmaddadp 0,12,11
stxvx 0,3,10
bdnz .L3

Now if you code the loop to use __builtin_vsx_disassemble_pair to do a 
vector
pair load, but then do 2 vector stores:

#include 
#include 
#include 

void
fma_mma_ld (double * __restrict__ r,
const double * __restrict__ a,
const double * __restrict__ b,
size_t n)
{
  __vector_pair * __restrict__ vp_r

[gcc(refs/users/meissner/heads/work192-vpair)] Update ChangeLog.*

2025-01-31 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:27fedf9497fff8795d6c5663667459fcf1bcf06f

commit 27fedf9497fff8795d6c5663667459fcf1bcf06f
Author: Michael Meissner 
Date:   Fri Jan 31 23:44:54 2025 -0500

Update ChangeLog.*

Diff:
---
 gcc/ChangeLog.vpair | 420 
 1 file changed, 420 insertions(+)

diff --git a/gcc/ChangeLog.vpair b/gcc/ChangeLog.vpair
index e63eb38880eb..431ca4d60f55 100644
--- a/gcc/ChangeLog.vpair
+++ b/gcc/ChangeLog.vpair
@@ -1,5 +1,425 @@
+ Branch work192-vpair, patch #300 
+
+Vector pair support.
+
+This patch adds a new include file (vector-pair.h) that adds support so that
+users writing high performance libraries can change their code to allow the
+generation of the vector pair load and store instructions on power10.
+
+The intention is that if the library authors need to write special loops that
+go over arrays that they could modify their code to use the functions provided
+to change loops that can take advantage of the higher bandwidth for load vector
+pair and store instructions.
+
+This particular patch just adds a new include file (vector-pair.h) that
+provides a bunch of functions that on a power10 system would use the vector
+pair load operation, 2 floating point operations, and a vector pair store.  It
+does not add any new types, modes, or built-in function.
+
+I have additional patches that can add built-in functions that the functions in
+vector-pair.h could utilize so that the compiler can optimize and combine
+operations.  I may submit those patches in the future, but I would like to
+provide this patch to allow the library writer to optimize their code.
+
+I've measured the performance of these new functions on a power10.  For default
+unrolling, the percentage of change for the 3 methods over the normal vector
+loop method:
+
+   116%Vector-pair.h function, default unroll
+93%Vector pair split built-in & 2 vector stores, default unroll
+86%Vector pair split & combine built-ins, default unroll
+
+Using explicit 2 way unrolling the numbers are:
+
+   114%Vector-pair.h function, unroll 2
+   106%Vector pair split built-in & 2 vector stores, unroll 2
+98%Vector pair split & combine built-ins, unroll 2
+
+These new functions provided in vector-pair.h use the vector pair load/store
+instructions, and don't generate extra vector moves.  Using the existing
+vector pair disassemble and assemble built-ins generate extra vector moves
+which can hinder performance.
+
+If I compile the loop code for power9, there is a minor speed up for default
+unrolling and more of an improvement using the framework provided in the
+vector-pair.h for explicit unrolling by 2:
+
+   101%Vector-pair.h function, default unroll for power9
+   107%Vector-pair.h function, unroll 2 for power9
+
+Of course this is a synthetic benchmark run on a quiet power10 system.  Results
+would vary for real code on real systems.  However, I feel adding these
+functions can allow the writers of high performance libraries to better
+optimize their code.
+
+As an example, if the library wants to code a simple fused multiply-add loop,
+they might write the code as follows:
+
+   #include 
+   #include 
+   #include 
+
+   void
+   fma_vector (double * __restrict__ r,
+   const double * __restrict__ a,
+   const double * __restrict__ b,
+   size_t n)
+   {
+ vector double * __restrict__ vr = (vector double * __restrict__)r;
+ const vector double * __restrict__ va = (const vector double * 
__restrict__)a;
+ const vector double * __restrict__ vb = (const vector double * 
__restrict__)b;
+ size_t num_elements = sizeof (vector double) / sizeof (double);
+ size_t nv = n / num_elements;
+ size_t i;
+
+ for (i = 0; i < nv; i++)
+   vr[i] = __builtin_vsx_xvmadddp (va[i], vb[i], vr[i]);
+
+ for (i = nv * num_elements; i < n; i++)
+   r[i] = fma (a[i], b[i], r[i]);
+   }
+
+The inner loop would look like:
+
+   .L3:
+   lxvx 0,3,9
+   lxvx 12,4,9
+   addi 10,9,16
+   addi 2,2,-2
+   lxvx 11,5,9
+   xvmaddadp 0,12,11
+   lxvx 12,4,10
+   lxvx 11,5,10
+   stxvx 0,3,9
+   lxvx 0,3,10
+   addi 9,9,32
+   xvmaddadp 0,12,11
+   stxvx 0,3,10
+   bdnz .L3
+
+Now if you code the loop to use __builtin_vsx_disassemble_pair to do a vector
+pair load, but then do 2 vector stores:
+
+
+   #include 
+   #include 
+   #include 
+
+   void
+   fma_mma_ld (double * __restrict__ r,
+   const double * __restrict__ a,
+   const double * __restrict__ b,
+   size_t n)
+   {
+ __vector_pair * __restrict__ vp_r

[gcc(refs/users/meissner/heads/work192-sha)] Update ChangeLog.*

2025-01-31 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:50719e1ecd21b3f2261234ce9ef24a210e449a99

commit 50719e1ecd21b3f2261234ce9ef24a210e449a99
Author: Michael Meissner 
Date:   Fri Jan 31 23:41:41 2025 -0500

Update ChangeLog.*

Diff:
---
 gcc/ChangeLog.sha | 168 ++
 1 file changed, 168 insertions(+)

diff --git a/gcc/ChangeLog.sha b/gcc/ChangeLog.sha
index 4bc792425d41..d1334b9632a0 100644
--- a/gcc/ChangeLog.sha
+++ b/gcc/ChangeLog.sha
@@ -1,5 +1,173 @@
+ Branch work192-sha, patch #401 
+
+Add potential p-future XVRLD and XVRLDI instructions.
+
+2025-01-16  Michael Meissner  
+
+gcc/
+
+   * config/rs6000/altivec.md (altivec_vrl): Add support for a
+   possible XVRLD instruction in the future.
+   (altivec_vrl_immediate): New insns.
+   * config/rs6000/predicates.md (vector_shift_immediate): New predicate.
+   * config/rs6000/rs6000.h (TARGET_XVRLW): New macro.
+   * config/rs6000/rs6000.md (isa attribute): Add xvrlw.
+   (enabled attribute): Add support for xvrlw.
+
+gcc/testsuite/
+
+   * gcc.target/powerpc/vector-rotate-left.c: New test.
+   * lib/target-supports.exp (check_effective_target_powerpc_future_ok):
+   Add support to test -mcpu=future.
+
+ Branch work192-sha, patch #400 
+
+PR target/117251: Add PowerPC XXEVAL support to speed up SHA3 calculations
+
+The multibuff.c benchmark attached to the PR target/117251 compiled for Power10
+PowerPC that implement SHA3 has a slowdown in the current trunk and GCC 14
+compared to GCC 11 - GCC 13, due to excessive amounts of spilling.
+
+The main function for the multibuf.c file has 3,747 lines, all of which are
+using vector unsigned long long.  There are 696 vector rotates (all rotates are
+constant), 1,824 vector xor's and 600 vector andc's.
+
+In looking at it, the main thing that steps out is the reason for either
+spilling or moving variables is the support in fusion.md (generated by
+genfusion.pl) that tries to fuse the vec_andc feeding into vec_xor, and other
+vec_xor's feeding into vec_xor.
+
+On the powerpc for power10, there is a special fusion mode that happens if the
+machine has a VANDC or VXOR instruction that is adjacent to a VXOR instruction
+and the VANDC/VXOR feeds into the 2nd VXOR instruction.
+
+While the Power10 has 64 vector registers (which uses the XXL prefix to do
+logical operations), the fusion only works with the older Altivec instruction
+set (which uses the V prefix).  The Altivec instruction only has 32 vector
+registers (which are overlaid over the VSX vector registers 32-63).
+
+By having the combiner patterns fuse_vandc_vxor and fuse_vxor_vxor to do this
+fusion, it means that the register allocator has more register pressure for the
+traditional Altivec registers instead of the VSX registers.
+
+In addition, since there are vector rotates, these rotates only work on the
+traditional Altivec registers, which adds to the Altivec register pressure.
+
+Finally in addition to doing the explicit xor, andc, and rotates using the
+Altivec registers, we have to also load vector constants for the rotate amount
+and these registers also are allocated as Altivec registers.
+
+Current trunk and GCC 12-14 have more vector spills than GCC 11, but GCC 11 has
+many more vector moves that the later compilers.  Thus even though it has way
+less spills, the vector moves are why GCC 11 have the slowest results.
+
+There is an instruction that was added in power10 (XXEVAL) that does provide
+fusion between VSX vectors that includes ANDC->XOR and XOR->XOR fusion.
+
+The latency of XXEVAL is slightly more than the fused VANDC/VXOR or VXOR/VXOR,
+so I have written the patch to prefer doing the Altivec instructions if they
+don't need a temporary register.
+
+Here are the results for adding support for XXEVAL for the multibuff.c
+benchmark attached to the PR.  Note that we essentially recover the speed with
+this patch that were lost with GCC 14 and the current trunk:
+
+  XXEVALTrunk   GCC14   GCC13   GCC12GCC11
+  ---   -   -   --
+Benchmark time in seconds   5.53 6.156.265.575.61 9.56
+
+Fuse VANDC -> VXOR   209 600  600 600 600  600
+Fuse VXOR -> VXOR  0 240  240 120 120  120
+XXEVAL to fuse ANDC -> XOR   391   00   0   00
+XXEVAL to fuse XOR -> XOR240   00   0   00
+
+Spill vector to stack 78 364  364 172 184  110
+Load spilled vector from stack   431 962  962 713 723  166
+Vector moves  10 100  100  70  723,055
+
+Vector rotate right  696 696  696 696 696  696
+XXLANDC or VANDC 209 600  600 600

[gcc(refs/users/meissner/heads/work192-sha)] Add potential p-future XVRLD and XVRLDI instructions.

2025-01-31 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:2b3ef8d42459c73090a5f5f26035e8ab226b4b33

commit 2b3ef8d42459c73090a5f5f26035e8ab226b4b33
Author: Michael Meissner 
Date:   Fri Jan 31 23:39:52 2025 -0500

Add potential p-future XVRLD and XVRLDI instructions.

2025-01-31  Michael Meissner  

gcc/

* config/rs6000/altivec.md (altivec_vrl): Add support for a
possible XVRLD instruction in the future.
(altivec_vrl_immediate): New insns.
* config/rs6000/predicates.md (vector_shift_immediate): New 
predicate.
* config/rs6000/rs6000.h (TARGET_XVRLW): New macro.
* config/rs6000/rs6000.md (isa attribute): Add xvrlw.
(enabled attribute): Add support for xvrlw.

gcc/testsuite/

* gcc.target/powerpc/vector-rotate-left.c: New test.
* lib/target-supports.exp 
(check_effective_target_powerpc_future_ok):
Add support to test -mcpu=future.

Diff:
---
 gcc/config/rs6000/altivec.md   | 35 +++---
 gcc/config/rs6000/predicates.md| 26 
 gcc/config/rs6000/rs6000.h |  3 ++
 gcc/config/rs6000/rs6000.md|  6 +++-
 .../gcc.target/powerpc/vector-rotate-left.c| 34 +
 gcc/testsuite/lib/target-supports.exp  | 12 
 6 files changed, 111 insertions(+), 5 deletions(-)

diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index 7edc288a6565..013960438b04 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -1982,12 +1982,39 @@
 }
   [(set_attr "type" "vecperm")])
 
+;; -mcpu=future adds a vector rotate left word variant.  There is no vector
+;; byte/half-word/double-word/quad-word rotate left.  This insn occurs before
+;; altivec_vrl and will match for -mcpu=future, while other cpus will
+;; match the generic insn.
+;; However for testing, allow other xvrl variants.  In particular, XVRLD for
+;; the sha3 tests for multibuf/singlebuf.
 (define_insn "altivec_vrl"
-  [(set (match_operand:VI2 0 "register_operand" "=v")
-(rotate:VI2 (match_operand:VI2 1 "register_operand" "v")
-   (match_operand:VI2 2 "register_operand" "v")))]
+  [(set (match_operand:VI2 0 "register_operand" "=v,wa")
+(rotate:VI2 (match_operand:VI2 1 "register_operand" "v,wa")
+   (match_operand:VI2 2 "register_operand" "v,wa")))]
   ""
-  "vrl %0,%1,%2"
+  "@
+   vrl %0,%1,%2
+   xvrl %x0,%x1,%x2"
+  [(set_attr "type" "vecsimple")
+   (set_attr "isa" "*,xvrlw")])
+
+(define_insn "*altivec_vrl_immediate"
+  [(set (match_operand:VI2 0 "register_operand" "=wa,wa,wa,wa")
+   (rotate:VI2 (match_operand:VI2 1 "register_operand" "wa,wa,wa,wa")
+   (match_operand:VI2 2 "vector_shift_immediate" 
"j,wM,wE,wS")))]
+  "TARGET_XVRLW && "
+{
+  rtx op2 = operands[2];
+  int value = 256;
+  int num_insns = -1;
+
+  if (!xxspltib_constant_p (op2, mode, &num_insns, &value))
+gcc_unreachable ();
+
+  operands[3] = GEN_INT (value & 0xff);
+  return "xvrli %x0,%x1,%3";
+}
   [(set_attr "type" "vecsimple")])
 
 (define_insn "altivec_vrlq"
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index 6485ee3eeecc..276812573977 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -728,6 +728,32 @@
   return num_insns == 1;
 })
 
+;; Return 1 if the operand is a CONST_VECTOR whose elements are all the
+;; same and the elements can be an immediate shift or rotate factor
+(define_predicate "vector_shift_immediate"
+  (match_code "const_vector,vec_duplicate,const_int")
+{
+  int value = 256;
+  int num_insns = -1;
+
+  if (zero_constant (op, mode) || all_ones_constant (op, mode))
+return true;
+
+  if (!xxspltib_constant_p (op, mode, &num_insns, &value))
+return false;
+
+  switch (mode)
+{
+case V16QImode: return IN_RANGE (value, 0, 7);
+case V8HImode:  return IN_RANGE (value, 0, 15);
+case V4SImode:  return IN_RANGE (value, 0, 31);
+case V2DImode:  return IN_RANGE (value, 0, 63);
+default:break;
+}
+
+  return false;
+})
+  
 ;; Return 1 if the operand is a CONST_VECTOR and can be loaded into a
 ;; vector register without using memory.
 (define_predicate "easy_vector_constant"
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index ec08c96d0f67..00f6ff2be636 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -575,6 +575,9 @@ extern int rs6000_vector_align[];
below.  */
 #define RS6000_FN_TARGET_INFO_HTM 1
 
+/* Whether we have XVRLW support.  */
+#define TARGET_XVRLW   TARGET_FUTURE
+
 /* Whether the various reciprocal divide/square root estimate instructions
exist, and whether we should automatically generate code for the instruction
by default.  */
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 3b

[gcc(refs/users/meissner/heads/work192-dmf)] Revert changes

2025-01-31 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:3e16927c1fc6abd0bba4ac31b2b69f4b36146eb6

commit 3e16927c1fc6abd0bba4ac31b2b69f4b36146eb6
Author: Michael Meissner 
Date:   Fri Jan 31 23:35:02 2025 -0500

Revert changes

Diff:
---
 gcc/config/rs6000/altivec.md   | 14 
 gcc/config/rs6000/constraints.md   | 10 ---
 gcc/config/rs6000/predicates.md| 52 +---
 gcc/config/rs6000/rs6000.cc| 25 --
 gcc/config/rs6000/rs6000.h |  7 --
 gcc/config/rs6000/rs6000.md| 96 +++---
 gcc/testsuite/gcc.target/powerpc/prefixed-addis.c  | 24 --
 .../gcc.target/powerpc/vector-rotate-left.c| 34 
 8 files changed, 14 insertions(+), 248 deletions(-)

diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index d158cf479d60..7edc288a6565 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -1982,20 +1982,6 @@
 }
   [(set_attr "type" "vecperm")])
 
-;; -mcpu=future adds a vector rotate left word variant.  There is no vector
-;; byte/half-word/double-word/quad-word rotate left.  This insn occurs before
-;; altivec_vrl and will match for -mcpu=future, while other cpus will
-;; match the generic insn.
-(define_insn "*xvrlw"
-  [(set (match_operand:V4SI 0 "register_operand" "=v,wa")
-   (rotate:V4SI (match_operand:V4SI 1 "register_operand" "v,wa")
-(match_operand:V4SI 2 "register_operand" "v,wa")))]
-  "TARGET_XVRLW"
-  "@
-   vrlw %0,%1,%2
-   xvrlw %x0,%x1,%x2"
-  [(set_attr "type" "vecsimple")])
-
 (define_insn "altivec_vrl"
   [(set (match_operand:VI2 0 "register_operand" "=v")
 (rotate:VI2 (match_operand:VI2 1 "register_operand" "v")
diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md
index 5440becb6e6c..3da9ed086810 100644
--- a/gcc/config/rs6000/constraints.md
+++ b/gcc/config/rs6000/constraints.md
@@ -222,16 +222,6 @@
   "An IEEE 128-bit constant that can be loaded into VSX registers."
   (match_operand 0 "easy_vector_constant_ieee128"))
 
-(define_constraint "eU"
-  "@internal integer constant that can be loaded with paddis"
-  (and (match_code "const_int")
-   (match_operand 0 "paddis_operand")))
-
-(define_constraint "eV"
-  "@internal integer constant that can be loaded with paddis + paddi"
-  (and (match_code "const_int")
-   (match_operand 0 "paddis_paddi_operand")))
-
 ;; Floating-point constraints.  These two are defined so that insn
 ;; length attributes can be calculated exactly.
 
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index c206860e4927..c95b4336f062 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -369,53 +369,6 @@
   return SIGNED_INTEGER_34BIT_P (INTVAL (op));
 })
 
-;; Return 1 if op is a 64-bit constant that uses the paddis instruction
-(define_predicate "paddis_operand"
-  (match_code "const_int")
-{
-  if (!TARGET_PADDIS && TARGET_POWERPC64)
-return 0;
-
-  /* If addi, addis, or paddi can handle the number, don't return true.  */
-  HOST_WIDE_INT value = INTVAL (op);
-  if (SIGNED_INTEGER_34BIT_P (value))
-return false;
-
-  /* If the number is too large for padds, return false.  */
-  if (!SIGNED_INTEGER_32BIT_P (value >> 32))
-return false;
-
-  /* If the bottom 32-bits are non-zero, paddis can't handle it.  */
-  if ((value & HOST_WIDE_INT_C(0x)) != 0)
-return false;
-
-  return true;
-})
-
-;; Return 1 if op is a 64-bit constant that needs the paddis instruction and an
-;; addi/addis/paddi instruction combination.
-(define_predicate "paddis_paddi_operand"
-  (match_code "const_int")
-{
-  if (!TARGET_PADDIS && TARGET_POWERPC64)
-return 0;
-
-  /* If addi, addis, or paddi can handle the number, don't return true.  */
-  HOST_WIDE_INT value = INTVAL (op);
-  if (SIGNED_INTEGER_34BIT_P (value))
-return false;
-
-  /* If the number is too large for padds, return false.  */
-  if (!SIGNED_INTEGER_32BIT_P (value >> 32))
-return false;
-
-  /* If the bottom 32-bits are zero, we can use paddis alone to handle it.  */
-  if ((value & HOST_WIDE_INT_C(0x)) == 0)
-return false;
-
-  return true;
-})
-
 ;; Return 1 if op is a register that is not special.
 ;; Disallow (SUBREG:SF (REG:SI)) and (SUBREG:SI (REG:SF)) on VSX systems where
 ;; you need to be careful in moving a SFmode to SImode and vice versa due to
@@ -1160,10 +1113,7 @@
   (if_then_else (match_code "const_int")
 (match_test "satisfies_constraint_I (op)
 || satisfies_constraint_L (op)
-|| satisfies_constraint_eI (op)
-|| satisfies_constraint_eU (op)
-|| satisfies_constraint_eV (op)")
-
+|| satisfies_constraint_eI (op)")
 (match_operand 0 "gpc_reg_operand")))
 
 ;; Return 1 if the operand is either a non-special register, or 0, or -1.
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/conf

87 matches

Mail list logo