[gcc r15-5860] RISC-V: Add intrinsics support for SiFive Xsfvfnrclipxfqf extensions.

2024-12-02 Thread Kito Cheng via Gcc-cvs
https://gcc.gnu.org/g:1352d4dd09293faf170072269fcef3aa6694d6ae

commit r15-5860-g1352d4dd09293faf170072269fcef3aa6694d6ae
Author: yulong 
Date:   Mon Dec 2 09:31:53 2024 +0800

RISC-V: Add intrinsics support for SiFive Xsfvfnrclipxfqf extensions.

This commit adds intrinsics support for XXsfvfnrclipxfqf. We also redefine
the enum type frm_op_type in riscv-vector-builtins-bases.h file, because it
be used in sifive-vector-builtins-bases.cc file.

Co-Authored by: Jiawei Chen 
Co-Authored by: Shihua Liao 
Co-Authored by: Yixuan Chen 

gcc/ChangeLog:

* config/riscv/generic-vector-ooo.md: New reservation.
* config/riscv/genrvv-type-indexer.cc (main): New type.
* config/riscv/riscv-vector-builtins-bases.cc (enum frm_op_type): 
Delete it.
* config/riscv/riscv-vector-builtins-bases.h (enum frm_op_type): 
Redefine in h file.
* config/riscv/riscv-vector-builtins-shapes.cc (struct 
sf_vfnrclip_def): New function.
(SHAPE): Ditto.
* config/riscv/riscv-vector-builtins-shapes.h: Ditto.
* config/riscv/riscv-vector-builtins.cc (DEF_RVV_TYPE_INDEX): New 
builtins def.
* config/riscv/riscv-vector-builtins.def (DEF_RVV_TYPE_INDEX): New 
base def.
(signed_eew8_index): Ditto.
* config/riscv/riscv-vector-builtins.h (enum required_ext): New 
extension.
(required_ext_to_isa_name): Ditto.
(required_extensions_specified): Ditto.
(struct function_group_info): Ditto.
* config/riscv/riscv.md: New attr.
* config/riscv/sifive-vector-builtins-bases.cc (class 
sf_vfnrclip_x_f_qf): New function.
(class sf_vfnrclip_xu_f_qf): Ditto.
(BASE): New base_name.
* config/riscv/sifive-vector-builtins-bases.h: New function_base.
* config/riscv/sifive-vector-builtins-functions.def
(REQUIRED_EXTENSIONS): New intrinsics def.
(sf_vfnrclip_x_f_qf): Ditto.
(sf_vfnrclip_xu_f_qf): Ditto.
* config/riscv/sifive-vector.md 
(@pred_sf_vfnrclip_x_f_qf): New RTL mode.
* config/riscv/vector-iterators.md: New iterator.

Diff:
---
 gcc/config/riscv/generic-vector-ooo.md |  2 +-
 gcc/config/riscv/genrvv-type-indexer.cc| 10 +
 gcc/config/riscv/riscv-vector-builtins-bases.cc|  6 ---
 gcc/config/riscv/riscv-vector-builtins-bases.h |  6 +++
 gcc/config/riscv/riscv-vector-builtins-shapes.cc   | 28 
 gcc/config/riscv/riscv-vector-builtins-shapes.h|  1 +
 gcc/config/riscv/riscv-vector-builtins.cc  | 51 ++---
 gcc/config/riscv/riscv-vector-builtins.def | 31 ++---
 gcc/config/riscv/riscv-vector-builtins.h   |  7 +++
 gcc/config/riscv/riscv.md  |  3 +-
 gcc/config/riscv/sifive-vector-builtins-bases.cc   | 52 ++
 gcc/config/riscv/sifive-vector-builtins-bases.h|  2 +
 .../riscv/sifive-vector-builtins-functions.def |  4 ++
 gcc/config/riscv/sifive-vector.md  | 20 +
 gcc/config/riscv/vector-iterators.md   | 30 -
 15 files changed, 214 insertions(+), 39 deletions(-)

diff --git a/gcc/config/riscv/generic-vector-ooo.md 
b/gcc/config/riscv/generic-vector-ooo.md
index 132ab0398228..bcad36c1a36d 100644
--- a/gcc/config/riscv/generic-vector-ooo.md
+++ b/gcc/config/riscv/generic-vector-ooo.md
@@ -69,7 +69,7 @@
 
 ;; Vector float multiplication and FMA.
 (define_insn_reservation "vec_fmul" 6
-  (eq_attr "type" "vfmul,vfwmul,vfmuladd,vfwmuladd,vfwmaccbf16,sf_vqmacc")
+  (eq_attr "type" 
"vfmul,vfwmul,vfmuladd,vfwmuladd,vfwmaccbf16,sf_vqmacc,sf_vfnrclip")
   "vxu_ooo_issue,vxu_ooo_alu")
 
 ;; Vector crypto, assumed to be a generic operation for now.
diff --git a/gcc/config/riscv/genrvv-type-indexer.cc 
b/gcc/config/riscv/genrvv-type-indexer.cc
index 8822e101c530..e1eee34237a3 100644
--- a/gcc/config/riscv/genrvv-type-indexer.cc
+++ b/gcc/config/riscv/genrvv-type-indexer.cc
@@ -250,6 +250,7 @@ main (int argc, const char **argv)
   fprintf (fp, "  /*MASK*/ %s,\n", mode.str ().c_str ());
   fprintf (fp, "  /*SIGNED*/ INVALID,\n");
   fprintf (fp, "  /*UNSIGNED*/ INVALID,\n");
+  fprintf (fp, "  /*SIGNED_EEW8_INDEX*/ INVALID,\n");
   for (unsigned eew : {8, 16, 32, 64})
fprintf (fp, "  /*EEW%d_INDEX*/ INVALID,\n", eew);
   fprintf (fp, "  /*SHIFT*/ INVALID,\n");
@@ -316,6 +317,10 @@ main (int argc, const char **argv)
 inttype (sew, lmul_log2, /*unsigned_p*/ false).c_str ());
fprintf (fp, "  /*UNSIGNED*/ %s,\n",
 inttype (sew, lmul_log2, /*unsigned_p*/ true).c_str ());
+   fprintf (fp, "  /*SIGNED_EEW8_INDEX*/ %s,\n",
+same_ratio_eew_type (sew, lmul_log2, 8,
+ /*unsigned_p*/ false, false)
+  

[gcc r15-5861] RISC-V: Add intrinsics testcases for SiFive Xsfvfnrclipxfqf extensions.

2024-12-02 Thread Kito Cheng via Gcc-cvs
https://gcc.gnu.org/g:275197057677406d575bfdbffa259ba7225e671f

commit r15-5861-g275197057677406d575bfdbffa259ba7225e671f
Author: yulong 
Date:   Mon Dec 2 09:31:54 2024 +0800

RISC-V: Add intrinsics testcases for SiFive Xsfvfnrclipxfqf extensions.

This commit adds testcases for Xsfvfnrclipxfqf.

Co-Authored by: Jiawei Chen 
Co-Authored by: Shihua Liao 
Co-Authored by: Yixuan Chen 

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/xsfvector/sf_vfnrclip_x_f_qf.c: New test.
* gcc.target/riscv/rvv/xsfvector/sf_vfnrclip_xu_f_qf.c: New test.

Diff:
---
 .../riscv/rvv/xsfvector/sf_vfnrclip_x_f_qf.c   | 606 +
 .../riscv/rvv/xsfvector/sf_vfnrclip_xu_f_qf.c  | 605 
 2 files changed, 1211 insertions(+)

diff --git a/gcc/testsuite/gcc.target/riscv/rvv/xsfvector/sf_vfnrclip_x_f_qf.c 
b/gcc/testsuite/gcc.target/riscv/rvv/xsfvector/sf_vfnrclip_x_f_qf.c
new file mode 100644
index ..813f7860f645
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/xsfvector/sf_vfnrclip_x_f_qf.c
@@ -0,0 +1,606 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_xsfvfnrclipxfqf -mabi=lp64d -O3" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "riscv_vector.h"
+
+/*
+** test_sf_vfnrclip_x_f_qf_i8mf8_vint8mf8_t:
+** ...
+** sf\.vfnrclip\.x\.f\.qf\tv[0-9]+,v[0-9]+,fa[0-9]+
+** ...
+*/
+vint8mf8_t test_sf_vfnrclip_x_f_qf_i8mf8_vint8mf8_t(vfloat32mf2_t vs2, float 
rs1, size_t vl) {
+return __riscv_sf_vfnrclip_x_f_qf_i8mf8(vs2, rs1, vl);
+}
+
+/*
+** test_sf_vfnrclip_x_f_qf_i8mf4_vint8mf4_t:
+** ...
+** sf\.vfnrclip\.x\.f\.qf\tv[0-9]+,v[0-9]+,fa[0-9]+
+** ...
+*/
+vint8mf4_t test_sf_vfnrclip_x_f_qf_i8mf4_vint8mf4_t(vfloat32m1_t vs2, float 
rs1, size_t vl) {
+return __riscv_sf_vfnrclip_x_f_qf_i8mf4(vs2, rs1, vl);
+}
+
+/*
+** test_sf_vfnrclip_x_f_qf_i8mf2_vint8mf2_t:
+** ...
+** sf\.vfnrclip\.x\.f\.qf\tv[0-9]+,v[0-9]+,fa[0-9]+
+** ...
+*/
+vint8mf2_t test_sf_vfnrclip_x_f_qf_i8mf2_vint8mf2_t(vfloat32m2_t vs2, float 
rs1, size_t vl) {
+return __riscv_sf_vfnrclip_x_f_qf_i8mf2(vs2, rs1, vl);
+}
+
+/*
+** test_sf_vfnrclip_x_f_qf_i8m1_vint8m1_t:
+** ...
+** sf\.vfnrclip\.x\.f\.qf\tv[0-9]+,v[0-9]+,fa[0-9]+
+** ...
+*/
+vint8m1_t test_sf_vfnrclip_x_f_qf_i8m1_vint8m1_t(vfloat32m4_t vs2, float rs1, 
size_t vl) {
+return __riscv_sf_vfnrclip_x_f_qf_i8m1(vs2, rs1, vl);
+}
+
+/*
+** test_sf_vfnrclip_x_f_qf_i8m2_vint8m2_t:
+** ...
+** sf\.vfnrclip\.x\.f\.qf\tv[0-9]+,v[0-9]+,fa[0-9]+
+** ...
+*/
+vint8m2_t test_sf_vfnrclip_x_f_qf_i8m2_vint8m2_t(vfloat32m8_t vs2, float rs1, 
size_t vl) {
+return __riscv_sf_vfnrclip_x_f_qf_i8m2(vs2, rs1, vl);
+}
+
+/*
+** test_sf_vfnrclip_x_f_qf_i8mf8_m_vint8mf8_t:
+** ...
+** sf\.vfnrclip\.x\.f\.qf\tv[0-9]+,v[0-9]+,fa[0-9]+,v0.t
+** ...
+*/
+vint8mf8_t test_sf_vfnrclip_x_f_qf_i8mf8_m_vint8mf8_t(vbool64_t mask, 
vfloat32mf2_t vs2, float rs1, size_t vl) {
+return __riscv_sf_vfnrclip_x_f_qf_i8mf8_m(mask, vs2, rs1, vl);
+}
+
+/*
+** test_sf_vfnrclip_x_f_qf_i8mf4_m_vint8mf4_t:
+** ...
+** sf\.vfnrclip\.x\.f\.qf\tv[0-9]+,v[0-9]+,fa[0-9]+,v0.t
+** ...
+*/
+vint8mf4_t test_sf_vfnrclip_x_f_qf_i8mf4_m_vint8mf4_t(vbool32_t mask, 
vfloat32m1_t vs2, float rs1, size_t vl) {
+return __riscv_sf_vfnrclip_x_f_qf_i8mf4_m(mask, vs2, rs1, vl);
+}
+
+/*
+** test_sf_vfnrclip_x_f_qf_i8mf2_m_vint8mf2_t:
+** ...
+** sf\.vfnrclip\.x\.f\.qf\tv[0-9]+,v[0-9]+,fa[0-9]+,v0.t
+** ...
+*/
+vint8mf2_t test_sf_vfnrclip_x_f_qf_i8mf2_m_vint8mf2_t(vbool16_t mask, 
vfloat32m2_t vs2, float rs1, size_t vl) {
+return __riscv_sf_vfnrclip_x_f_qf_i8mf2_m(mask, vs2, rs1, vl);
+}
+
+/*
+** test_sf_vfnrclip_x_f_qf_i8m1_m_vint8m1_t:
+** ...
+** sf\.vfnrclip\.x\.f\.qf\tv[0-9]+,v[0-9]+,fa[0-9]+,v0.t
+** ...
+*/
+vint8m1_t test_sf_vfnrclip_x_f_qf_i8m1_m_vint8m1_t(vbool8_t mask, vfloat32m4_t 
vs2, float rs1, size_t vl) {
+return __riscv_sf_vfnrclip_x_f_qf_i8m1_m(mask, vs2, rs1, vl);
+}
+
+/*
+** test_sf_vfnrclip_x_f_qf_i8m2_m_vint8m2_t:
+** ...
+** sf\.vfnrclip\.x\.f\.qf\tv[0-9]+,v[0-9]+,fa[0-9]+,v0.t
+** ...
+*/
+vint8m2_t test_sf_vfnrclip_x_f_qf_i8m2_m_vint8m2_t(vbool4_t mask, vfloat32m8_t 
vs2, float rs1, size_t vl) {
+return __riscv_sf_vfnrclip_x_f_qf_i8m2_m(mask, vs2, rs1, vl);
+}
+
+/*
+** test_sf_vfnrclip_x_f_qf_vint8mf8_t:
+** ...
+** sf\.vfnrclip\.x\.f\.qf\tv[0-9]+,v[0-9]+,fa[0-9]+
+** ...
+*/
+vint8mf8_t test_sf_vfnrclip_x_f_qf_vint8mf8_t(vfloat32mf2_t vs2, float rs1, 
size_t vl) {
+return __riscv_sf_vfnrclip_x_f_qf(vs2, rs1, vl);
+}
+
+/*
+** test_sf_vfnrclip_x_f_qf_vint8mf4_t:
+** ...
+** sf\.vfnrclip\.x\.f\.qf\tv[0-9]+,v[0-9]+,fa[0-9]+
+** ...
+*/
+vint8mf4_t test_sf_vfnrclip_x_f_qf_vint8mf4_t(vfloat32m1_t vs2, float rs1, 
size_t vl) {
+return __riscv_sf_vfnrclip_x_f_qf(vs2, rs1, vl);
+}
+
+/*
+** test_sf_vfnrclip_x_f_qf_vint8mf2_t:
+** ...
+** sf\.vfnrclip\.x\.f\.qf\tv[0-9]+,v[0-9]+,fa[0-9]+
+** ...
+*/
+vint8mf2_t test_sf_vfnrclip_x_f_qf_vint8mf2_t(vfloat32m2_t vs2, 

[gcc r15-5863] tree-optimization/116352 - SLP scheduling and stmt order

2024-12-02 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:5ab3f091b3eb42795340d3c9cea8aaec2060693c

commit r15-5863-g5ab3f091b3eb42795340d3c9cea8aaec2060693c
Author: Richard Biener 
Date:   Mon Dec 2 11:07:46 2024 +0100

tree-optimization/116352 - SLP scheduling and stmt order

The PR uncovers unchecked constraints on the ability to code-generate
with SLP but also latent issues with regard to stmt order checking
since loop (early-break) and BB (for quite some time) vectorization
are no longer constraint to single-BBs.  In particular get_later_stmt
simply compares UIDs of stmts, but that's only reliable when they
are in the same BB.

For the PR in question the problematical case is demoting a SLP node
to external which fails to check we can actually code generate this
in the way we do (using get_later_stmt).  The following thus adds
checking that we demote to external only when all defs are from
the same BB.

We no longer vectorize gcc.dg/vect/bb-slp-49.c but the testcase was
for a wrong-code issue and the vectorization done is a no-op.

PR tree-optimization/116352
PR tree-optimization/117876
* tree-vect-slp.cc (vect_slp_can_convert_to_external): New.
(vect_slp_convert_to_external): Call it.
(vect_build_slp_tree_2): Likewise.

* gcc.dg/vect/pr116352.c: New testcase.
* gcc.dg/vect/bb-slp-49.c: Remove vectorization check.

Diff:
---
 gcc/testsuite/gcc.dg/vect/bb-slp-49.c |  3 +--
 gcc/testsuite/gcc.dg/vect/pr116352.c  | 34 ++
 gcc/tree-vect-slp.cc  | 29 +++--
 3 files changed, 58 insertions(+), 8 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-49.c 
b/gcc/testsuite/gcc.dg/vect/bb-slp-49.c
index e7101fcff462..c0ad5d70a9ac 100644
--- a/gcc/testsuite/gcc.dg/vect/bb-slp-49.c
+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-49.c
@@ -23,6 +23,5 @@ main ()
   return 0;
 }
 
-/* See that we vectorize an SLP instance.  */
+/* See that we try to vectorize an SLP instance.  */
 /* { dg-final { scan-tree-dump "Analyzing vectorizable constructor" "slp1" } } 
*/
-/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "slp1" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/pr116352.c 
b/gcc/testsuite/gcc.dg/vect/pr116352.c
new file mode 100644
index ..3fe537c34ff6
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr116352.c
@@ -0,0 +1,34 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O3" } */
+
+static void addPrior(float center_x, float center_y, float width, float height,
+ bool normalized, float *dst)
+{
+  if (normalized)
+{
+  dst[0] = (center_x - width * 0.5f);
+  dst[1] = (center_y - height * 0.5f);
+  dst[2] = (center_x + width * 0.5f);
+  dst[3] = (center_y + height * 0.5f);
+}
+  else
+{
+  dst[0] = center_x - width * 0.5f;
+  dst[1] = center_y - height * 0.5f;
+  dst[2] = center_x + width * 0.5f - 1.0f;
+  dst[3] = center_y + height * 0.5f - 1.0f;
+}
+}
+void forward(float *outputPtr, int _offsetsXs, float *_offsetsX,
+float *_offsetsY, float _stepX, float _stepY,
+bool _bboxesNormalized, float _boxWidth, float _boxHeight)
+{
+  for (int j = 0; j < _offsetsXs; ++j)
+{
+  float center_x = (_offsetsX[j]) * _stepX;
+  float center_y = (_offsetsY[j]) * _stepY;
+  addPrior(center_x, center_y, _boxWidth, _boxHeight, _bboxesNormalized,
+  outputPtr);
+  outputPtr += 4;
+}
+}
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index ec986cc3f686..1799d5a619b1 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -67,6 +67,7 @@ static int vectorizable_slp_permutation_1 (vec_info *, 
gimple_stmt_iterator *,
 static bool vectorizable_slp_permutation (vec_info *, gimple_stmt_iterator *,
  slp_tree, stmt_vector_for_cost *);
 static void vect_print_slp_tree (dump_flags_t, dump_location_t, slp_tree);
+static bool vect_slp_can_convert_to_external (const vec &);
 
 static object_allocator<_slp_tree> *slp_tree_pool;
 static slp_tree slp_first_node;
@@ -2887,7 +2888,8 @@ fail:
  for (j = 0; j < group_size; ++j)
if (!matches[j])
  break;
- if (!known_ge (j, TYPE_VECTOR_SUBPARTS (vectype)))
+ if (!known_ge (j, TYPE_VECTOR_SUBPARTS (vectype))
+ && vect_slp_can_convert_to_external (oprnd_info->def_stmts))
{
  if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
@@ -7764,6 +7766,24 @@ vect_slp_analyze_node_operations_1 (vec_info *vinfo, 
slp_tree node,
node, node_instance, cost_vec);
 }
 
+/* Verify if we can externalize a set of internal defs.  */
+
+static bool
+vect_slp_can_convert_to_external (const vec &stmts)
+{
+  basic_block bb = NULL;
+  for (stmt_vec_info stmt : stmts)
+if (!stmt)
+  re

[gcc/aoliva/heads/testbase] (75 commits) VN: Don't recurse on for the same value of `a != 0` [PR1178

2024-12-02 Thread Alexandre Oliva via Gcc-cvs
The branch 'aoliva/heads/testbase' was updated to point to:

 e1009b3de2d0... VN: Don't recurse on for the same value of `a != 0` [PR1178

It previously pointed to:

 70f7c603da67... ifcombine: avoid unsound forwarder-enabled combinations [PR

Diff:

Summary of changes (added commits):
---

  e1009b3... VN: Don't recurse on for the same value of `a != 0` [PR1178 (*)
  24949e6... gimple-lim: Reuse boolean var when moving PHI (*)
  e0ffe66... testsuite: Fix aarch64/sve/acle/general-c/gnu_vectors_[12]. (*)
  8491723... testsuite: Fix aarch64/sve/acle/general-c++/gnu_vectors_[12 (*)
  b996304... testsuite: Fix sve-sizeless-[12].C for C++98 (*)
  86b0750... testsuite: Fix sve-sizeless-[12].C for aggregate change (*)
  99d1fcf... testsuite: Fix another issue with sve-sizeless-[12].C (*)
  cdcc938... testsuite: Fix part of sve-sizeless-2.c (*)
  e4c1b3d... [PATCH v3] zero_extend(not) -> xor optimization [PR112398] (*)
  ff5e235... Daily bump. (*)
  abed480... libstdc++: Improve new testcase for std::optional assignmen (*)
  c2c7d71... libstdc++: Fix constraints on std::optional converting assi (*)
  91f4550... libstdc++: Move std::monostate to  for C++26 (P047 (*)
  0598e2f... libstdc++: Improve test for  synopsis (*)
  2ae0566... Support for 64-bit location_t: Internal parts (*)
  8cc9d27... Support for 64-bit location_t: toplev parts (*)
  9bba906... Support for 64-bit location_t: Backend parts (*)
  abea0db... gimplify: Handle void expression as asm input [PR100501, PR (*)
  5297795... Write S_INLINESITE CodeView symbols (*)
  65b5c4a... Write S_INLINEELINES CodeView subsection (*)
  e908efb... Don't output CodeView line numbers for inlined functions (*)
  4ed1898... Add block parameter to begin_block debug hook (*)
  214985f... AVR: ad target/84211 - Split MOVW into MOVs in try_split_an (*)
  6bebb3b... strlen: Handle vector CONSTRUCTORs [PR117057] (*)
  f089ef8... openmp: Add crtoffloadtableS.o and use it [PR117851] (*)
  cd107c1... LoongArch: Mask shift offset when emit {xv, v}{srl, sll, sr (*)
  4ad1c87... LoongArch: testsuite: Fix l{a}sx-andn-iorn.c. (*)
  4f650ef... LoongArch: testsuite: Fix loongarch/vect-frint-scalar.c. (*)
  1539bcd... c: Set attributes for fields when forming a composite type  (*)
  1701efd... gimplefe: Error recovery for invalid declarations [PR117749 (*)
  eb9f1ba... ext-dce: Fix SIGN_EXTEND handling and cleanups [PR117360] (*)
  cc67d95... c++: Implement C++26 P3176R1 - The Oxford variadic comma (*)
  20dcb79... Daily bump. (*)
  bc35976... Rename "libdiagnostics" to "libgdiagnostics" (*)
  b02b9e8... AVR: Skip the gcc.c-torture/execute/memcpy-a*.c tests. (*)
  f8f5732... libbacktrace: use WIN32_LEAN_AND_MEAN, not WIN32_MEAN_AND_L (*)
  ed712cf... compiler: increase buffer size to avoid warning (*)
  1a1ac4f... AVR: Fix some coding rule nits and typos. (*)
  4c46ad7... aarch64: Add attributes to the data intrinsics. (*)
  5034cec... aarch64: add attributes to the prefetch_builtins (*)
  b35f9c2... aarch64: Fix up flags for vget_low_*, vget_high_* and vrein (*)
  af974df... aarch64: Mark __builtin_aarch64_im_lane_boundsi as leaf and (*)
  e79583c... [PR117770][LRA]: Check hard regs corresponding insn operand (*)
  75ade61... AVR: target/117681 - Set UNWIND_WORD_MODE to Pmode. (*)
  d833114... AVR: target/117726 - Better optimize shifts. (*)
  f3ee8bc... aarch64: Fix build failure due to missing header (*)
  f42fd8e... arm, mve: Detect uses of vctp_vpr_generated inside subregs (*)
  15bd625... arm, mve: Pass -std=c99 to dlstp-loop-form.c to avoid new w (*)
  cf75f86... arm, mve: Fix scan-assembler for test7 in dlstp-compile-asm (*)
  74eb357... [PATCH v7 03/12] RISC-V: Add CRC expander to generate faste (*)
  fe29b03... RISC-V: Add intrinsics testcases for SiFive Xsfvqmaccqoq/do (*)
  356bfe8... RISC-V: Add intrinsics support for SiFive Xsfvqmaccqoq/dod  (*)
  b6a5139... c: Correct type compatibility for bit-fields [PR117828] (*)
  5b0e4ed... AArch64: Suppress default options when march or mcpu used i (*)
  7028b1b... aarch64: Add ISA requirements to some SVE/SME md comments (*)
  441f8d6... aarch64: add SVE2 FP8DOT2 and FP8DOT4 intrinsics (*)
  5382040... aarch64: add SVE2 FP8 multiply accumulate intrinsics (*)
  75c3a5c... aarch64: add svcvt* FP8 intrinsics (*)
  4936599... aarch64: specify fpm mode in function instances and groups (*)
  dfa7868... aarch64: Add basic svmfloat8_t support to arm_sve.h (*)
  a54aa75... tree-optimization/115438 - SLP reduction vect vs. bwaves (*)
  b5df3ee... cp: Fix another assumption in the FE about constant vector  (*)
  6338716... aarch64: Update SVE ACLE tests (*)
  91fb1da... aarch64: Add testcase for C/C++ ops on SVE ACLE types. (*)
  17b520a... c: Fix constructor bounds checking for VLA and construct VL (*)
  4f593db... gimple: Handle variable-sized vectors in BIT_FIELD_REF (*)
  47fa008... c: Range-check indexing of SVE ACLE vectors (*)
  761cf60... aarch64: Make C/C++ operations possible on SVE ACLE types. (*)

[gcc r15-5862] testsuite: Adjust rs6000-ldouble-2.c for switch to -std=gnu23 by default [PR117663]

2024-12-02 Thread Jakub Jelinek via Gcc-cvs
https://gcc.gnu.org/g:e36eae19f3a4cc9e5efa9ebfa31e081c7ee52fdc

commit r15-5862-ge36eae19f3a4cc9e5efa9ebfa31e081c7ee52fdc
Author: Jakub Jelinek 
Date:   Mon Dec 2 13:55:02 2024 +0100

testsuite: Adjust rs6000-ldouble-2.c for switch to -std=gnu23 by default 
[PR117663]

-std=gnu23/-std=c23 changes LDBL_EPSILON for IBM long double, see r13-3029 
and
https://gcc.gnu.org/pipermail/gcc-patches/2022-October/602738.html
for details.

That change even had a note:
"and when we move to a C2x
default, gcc.target/powerpc/rs6000-ldouble-2.c will need an
appropriate option added to keep using an older language version"

The following patch just implements it to fix rs6000-ldouble-2.c regression.

2024-12-02  Jakub Jelinek  

PR testsuite/117663
* gcc.target/powerpc/rs6000-ldouble-2.c: Add -std=gnu17 to 
dg-options.

Diff:
---
 gcc/testsuite/gcc.target/powerpc/rs6000-ldouble-2.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.target/powerpc/rs6000-ldouble-2.c 
b/gcc/testsuite/gcc.target/powerpc/rs6000-ldouble-2.c
index 5dc74cd2de48..46167376c03d 100644
--- a/gcc/testsuite/gcc.target/powerpc/rs6000-ldouble-2.c
+++ b/gcc/testsuite/gcc.target/powerpc/rs6000-ldouble-2.c
@@ -1,5 +1,5 @@
 /* { dg-do run { target { { powerpc*-*-darwin* powerpc*-*-aix* rs6000-*-* } || 
{ powerpc*-*-linux* && lp64 } } } } */
-/* { dg-options "-mlong-double-128" } */
+/* { dg-options "-mlong-double-128 -std=gnu17" } */
 
 /* Check that LDBL_EPSILON is right for 'long double'.  */


[gcc r15-5864] arm, mve: Adding missing Runtime Library Exception to header files

2024-12-02 Thread Andre Simoes Dias Vieira via Gcc-cvs
https://gcc.gnu.org/g:cde7ce0628f66a5d03cc97c70d4695e6f2acd4db

commit r15-5864-gcde7ce0628f66a5d03cc97c70d4695e6f2acd4db
Author: Andre Vieira 
Date:   Mon Dec 2 13:35:03 2024 +

arm, mve: Adding missing Runtime Library Exception to header files

Add missing Runtime Library Exception to mve header files to bring them into
line with other similar headers. Not adding it in the first place was an
oversight.

gcc/ChangeLog:

* config/arm/arm_mve.h: Add Runtime Library Exception.
* config/arm/arm_mve_types.h: Likewise.

Diff:
---
 gcc/config/arm/arm_mve.h   | 4 
 gcc/config/arm/arm_mve_types.h | 4 
 2 files changed, 8 insertions(+)

diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h
index 8ffdbc7e1095..21a2ae7353bf 100644
--- a/gcc/config/arm/arm_mve.h
+++ b/gcc/config/arm/arm_mve.h
@@ -15,6 +15,10 @@
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
License for more details.
 
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING3.  If not see
.  */
diff --git a/gcc/config/arm/arm_mve_types.h b/gcc/config/arm/arm_mve_types.h
index f549f881b490..7771435f1d75 100644
--- a/gcc/config/arm/arm_mve_types.h
+++ b/gcc/config/arm/arm_mve_types.h
@@ -15,6 +15,10 @@
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
License for more details.
 
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING3.  If not see
.  */


[gcc r14-11018] arm, mve: Adding missing Runtime Library Exception to header files

2024-12-02 Thread Andre Simoes Dias Vieira via Gcc-cvs
https://gcc.gnu.org/g:b11f53a91238a0ac5499862a3e7e127f02bff81e

commit r14-11018-gb11f53a91238a0ac5499862a3e7e127f02bff81e
Author: Andre Vieira 
Date:   Mon Dec 2 13:35:03 2024 +

arm, mve: Adding missing Runtime Library Exception to header files

Add missing Runtime Library Exception to mve header files to bring them into
line with other similar headers. Not adding it in the first place was an
oversight.

gcc/ChangeLog:

* config/arm/arm_mve.h: Add Runtime Library Exception.
* config/arm/arm_mve_types.h: Likewise.

(cherry picked from commit cde7ce0628f66a5d03cc97c70d4695e6f2acd4db)

Diff:
---
 gcc/config/arm/arm_mve.h   | 4 
 gcc/config/arm/arm_mve_types.h | 4 
 2 files changed, 8 insertions(+)

diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h
index ae1b54387979..7325b71cff71 100644
--- a/gcc/config/arm/arm_mve.h
+++ b/gcc/config/arm/arm_mve.h
@@ -15,6 +15,10 @@
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
License for more details.
 
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING3.  If not see
.  */
diff --git a/gcc/config/arm/arm_mve_types.h b/gcc/config/arm/arm_mve_types.h
index f549f881b490..7771435f1d75 100644
--- a/gcc/config/arm/arm_mve_types.h
+++ b/gcc/config/arm/arm_mve_types.h
@@ -15,6 +15,10 @@
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
License for more details.
 
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING3.  If not see
.  */


[gcc r13-9226] arm, mve: Adding missing Runtime Library Exception to header files

2024-12-02 Thread Andre Simoes Dias Vieira via Gcc-cvs
https://gcc.gnu.org/g:373d4d85cf941a94fe70c3e472a8b7be9982f08f

commit r13-9226-g373d4d85cf941a94fe70c3e472a8b7be9982f08f
Author: Andre Vieira 
Date:   Mon Dec 2 13:35:03 2024 +

arm, mve: Adding missing Runtime Library Exception to header files

Add missing Runtime Library Exception to mve header files to bring them into
line with other similar headers. Not adding it in the first place was an
oversight.

gcc/ChangeLog:

* config/arm/arm_mve.h: Add Runtime Library Exception.
* config/arm/arm_mve_types.h: Likewise.

(cherry picked from commit cde7ce0628f66a5d03cc97c70d4695e6f2acd4db)

Diff:
---
 gcc/config/arm/arm_mve.h   | 4 
 gcc/config/arm/arm_mve_types.h | 4 
 2 files changed, 8 insertions(+)

diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h
index 71ea3ee275ee..f288a8a3e363 100644
--- a/gcc/config/arm/arm_mve.h
+++ b/gcc/config/arm/arm_mve.h
@@ -15,6 +15,10 @@
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
License for more details.
 
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING3.  If not see
.  */
diff --git a/gcc/config/arm/arm_mve_types.h b/gcc/config/arm/arm_mve_types.h
index 12bb519142f3..81a1f048b949 100644
--- a/gcc/config/arm/arm_mve_types.h
+++ b/gcc/config/arm/arm_mve_types.h
@@ -15,6 +15,10 @@
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
License for more details.
 
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING3.  If not see
.  */


[gcc r12-10843] arm, mve: Adding missing Runtime Library Exception to header files

2024-12-02 Thread Andre Simoes Dias Vieira via Gcc-cvs
https://gcc.gnu.org/g:ddfc04188bca888f1cbdadd8a2457ed7d7031f32

commit r12-10843-gddfc04188bca888f1cbdadd8a2457ed7d7031f32
Author: Andre Vieira 
Date:   Mon Dec 2 13:35:03 2024 +

arm, mve: Adding missing Runtime Library Exception to header files

Add missing Runtime Library Exception to mve header files to bring them into
line with other similar headers. Not adding it in the first place was an
oversight.

gcc/ChangeLog:

* config/arm/arm_mve.h: Add Runtime Library Exception.
* config/arm/arm_mve_types.h: Likewise.

(cherry picked from commit cde7ce0628f66a5d03cc97c70d4695e6f2acd4db)

Diff:
---
 gcc/config/arm/arm_mve.h   | 4 
 gcc/config/arm/arm_mve_types.h | 4 
 2 files changed, 8 insertions(+)

diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h
index c359e9c63369..9727c5384638 100644
--- a/gcc/config/arm/arm_mve.h
+++ b/gcc/config/arm/arm_mve.h
@@ -15,6 +15,10 @@
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
License for more details.
 
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING3.  If not see
.  */
diff --git a/gcc/config/arm/arm_mve_types.h b/gcc/config/arm/arm_mve_types.h
index 0b2d6422545f..e1193d52c0f1 100644
--- a/gcc/config/arm/arm_mve_types.h
+++ b/gcc/config/arm/arm_mve_types.h
@@ -15,6 +15,10 @@
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
License for more details.
 
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING3.  If not see
.  */


[gcc r14-11017] [PATCH] PR modula2/117555: libgm2 build failure after r15-5081-g95960cd473297c

2024-12-02 Thread Gaius Mulley via Gcc-cvs
https://gcc.gnu.org/g:9c926d8b83a8229e0b4335cc78b2bee1b85b58af

commit r14-11017-g9c926d8b83a8229e0b4335cc78b2bee1b85b58af
Author: Gaius Mulley 
Date:   Mon Dec 2 13:25:14 2024 +

[PATCH] PR modula2/117555: libgm2 build failure after 
r15-5081-g95960cd473297c

This patch adds missing return statements to library procedure
functions.  These missing statements occur after a call to RAISE.

gcc/m2/ChangeLog:

PR modula2/117555
* gm2-libs-iso/M2EXCEPTION.mod (M2Exception): Add missing
return statement.
* gm2-libs-iso/RealConv.mod (ValueReal): Ditto.
* gm2-libs-iso/RndFile.mod (StartPos): Ditto.
(EndPos): Ditto.
(NewPos): Ditto.
* gm2-libs-iso/ShortConv.mod (ValueReal): Ditto.
* gm2-libs-iso/WholeConv.mod (ValueInt): Ditto.
(ValueCard): Ditto.

(cherry picked from commit 6d90f5d0ae928320e6e4ce9fce8e658404d8cb72)

Signed-off-by: Gaius Mulley 

Diff:
---
 gcc/m2/gm2-libs-iso/M2EXCEPTION.mod |  3 ++-
 gcc/m2/gm2-libs-iso/RealConv.mod|  3 ++-
 gcc/m2/gm2-libs-iso/RndFile.mod | 15 +--
 gcc/m2/gm2-libs-iso/ShortConv.mod   |  3 ++-
 gcc/m2/gm2-libs-iso/WholeConv.mod   |  6 --
 5 files changed, 19 insertions(+), 11 deletions(-)

diff --git a/gcc/m2/gm2-libs-iso/M2EXCEPTION.mod 
b/gcc/m2/gm2-libs-iso/M2EXCEPTION.mod
index 637e086c2998..2ff7c7296647 100644
--- a/gcc/m2/gm2-libs-iso/M2EXCEPTION.mod
+++ b/gcc/m2/gm2-libs-iso/M2EXCEPTION.mod
@@ -42,7 +42,8 @@ BEGIN
ELSE
   RTExceptions.Raise(ORD(exException),
  ADR(__FILE__), __LINE__, __COLUMN__, 
ADR(__FUNCTION__),
- ADR('current coroutine is not in the exceptional 
execution state'))
+ ADR('current coroutine is not in the exceptional 
execution state')) ;
+  RETURN exException
END
 END M2Exception ;
 
diff --git a/gcc/m2/gm2-libs-iso/RealConv.mod b/gcc/m2/gm2-libs-iso/RealConv.mod
index 4223f3029ae8..6f9fe6fcc7b2 100644
--- a/gcc/m2/gm2-libs-iso/RealConv.mod
+++ b/gcc/m2/gm2-libs-iso/RealConv.mod
@@ -256,7 +256,8 @@ BEGIN
   RETURN( doValueReal(str) )
ELSE
   EXCEPTIONS.RAISE(realConv, ORD(invalid),
-   'RealConv.' + __FUNCTION__ + ': real number is invalid')
+   'RealConv.' + __FUNCTION__ + ': real number is 
invalid') ;
+  RETURN 0.0
END
 END ValueReal ;
 
diff --git a/gcc/m2/gm2-libs-iso/RndFile.mod b/gcc/m2/gm2-libs-iso/RndFile.mod
index 46a2efdaac4a..42451888bafe 100644
--- a/gcc/m2/gm2-libs-iso/RndFile.mod
+++ b/gcc/m2/gm2-libs-iso/RndFile.mod
@@ -359,13 +359,13 @@ VAR
 BEGIN
IF IsRndFile(cid)
THEN
-  d := DeviceTablePtrValue(cid, did) ;
-  RETURN( 0 )
+  d := DeviceTablePtrValue(cid, did)
ELSE
   RAISEdevException(cid, did, IOChan.wrongDevice,
 'RndFile.' + __FUNCTION__ +
 ': channel is not a random file')
-   END
+   END ;
+   RETURN( 0 )
 END StartPos ;
 
 
@@ -386,7 +386,8 @@ BEGIN
ELSE
   RAISEdevException(cid, did, IOChan.wrongDevice,
 'RndFile.' + __FUNCTION__ +
-': channel is not a random file')
+': channel is not a random file') ;
+  RETURN 0
END
 END CurrentPos ;
 
@@ -416,7 +417,8 @@ BEGIN
ELSE
   RAISEdevException(cid, did, IOChan.wrongDevice,
 'RndFile.' + __FUNCTION__ +
-': channel is not a random file')
+': channel is not a random file') ;
+  RETURN 0
END
 END EndPos ;
 
@@ -442,7 +444,8 @@ BEGIN
ELSE
   RAISEdevException(cid, did, IOChan.wrongDevice,
 'RndFile.' + __FUNCTION__ +
-': channel is not a random file')
+': channel is not a random file') ;
+  RETURN 0
END
 END NewPos ;
 
diff --git a/gcc/m2/gm2-libs-iso/ShortConv.mod 
b/gcc/m2/gm2-libs-iso/ShortConv.mod
index cfceb25c8f4c..bb835c6102ec 100644
--- a/gcc/m2/gm2-libs-iso/ShortConv.mod
+++ b/gcc/m2/gm2-libs-iso/ShortConv.mod
@@ -257,7 +257,8 @@ BEGIN
   RETURN( doValueReal(str) )
ELSE
   EXCEPTIONS.RAISE(realConv, ORD(invalid),
-   'ShortConv.' + __FUNCTION__ + ': real number is 
invalid')
+   'ShortConv.' + __FUNCTION__ + ': real number is 
invalid') ;
+  RETURN 0.0
END
 END ValueReal ;
 
diff --git a/gcc/m2/gm2-libs-iso/WholeConv.mod 
b/gcc/m2/gm2-libs-iso/WholeConv.mod
index 34ca7aca1a11..769a568fe32f 100644
--- a/gcc/m2/gm2-libs-iso/WholeConv.mod
+++ b/gcc/m2/gm2-libs-iso/WholeConv.mod
@@ -196,7 +196,8 @@ BEGIN
   RETURN( v )
ELSE
   EXCEPTIONS.RAISE(wholeConv, ORD(invalidSigned),
-   'WholeConv.' + __FUNCTION__ + ': signed number is 
invalid')
+   'WholeConv.' + __FUNCTION__ + ': signed number is 
invalid') ;
+ 

[gcc(refs/users/meissner/heads/work188)] Add ChangeLog.meissner and REVISION.

2024-12-02 Thread Michael Meissner via Libstdc++-cvs
https://gcc.gnu.org/g:d92b2e78950f4fad72b14a1a7b75e300473f71b3

commit d92b2e78950f4fad72b14a1a7b75e300473f71b3
Author: Michael Meissner 
Date:   Mon Dec 2 14:38:09 2024 -0500

Add ChangeLog.meissner and REVISION.

2024-12-02  Michael Meissner  

gcc/

* REVISION: New file for branch.
* ChangeLog.meissner: New file.

gcc/c-family/

* ChangeLog.meissner: New file.

gcc/c/

* ChangeLog.meissner: New file.

gcc/cp/

* ChangeLog.meissner: New file.

gcc/fortran/

* ChangeLog.meissner: New file.

gcc/testsuite/

* ChangeLog.meissner: New file.

libgcc/

* ChangeLog.meissner: New file.

Diff:
---
 gcc/ChangeLog.meissner   | 5 +
 gcc/REVISION | 1 +
 gcc/c-family/ChangeLog.meissner  | 5 +
 gcc/c/ChangeLog.meissner | 5 +
 gcc/cp/ChangeLog.meissner| 5 +
 gcc/fortran/ChangeLog.meissner   | 5 +
 gcc/testsuite/ChangeLog.meissner | 5 +
 libgcc/ChangeLog.meissner| 5 +
 libstdc++-v3/ChangeLog.meissner  | 5 +
 9 files changed, 41 insertions(+)

diff --git a/gcc/ChangeLog.meissner b/gcc/ChangeLog.meissner
new file mode 100644
index ..99eff4717850
--- /dev/null
+++ b/gcc/ChangeLog.meissner
@@ -0,0 +1,5 @@
+ Branch work188, baseline 
+
+2024-12-02   Michael Meissner  
+
+   Clone branch
diff --git a/gcc/REVISION b/gcc/REVISION
new file mode 100644
index ..d4db5e09c166
--- /dev/null
+++ b/gcc/REVISION
@@ -0,0 +1 @@
+work188 branch
diff --git a/gcc/c-family/ChangeLog.meissner b/gcc/c-family/ChangeLog.meissner
new file mode 100644
index ..99eff4717850
--- /dev/null
+++ b/gcc/c-family/ChangeLog.meissner
@@ -0,0 +1,5 @@
+ Branch work188, baseline 
+
+2024-12-02   Michael Meissner  
+
+   Clone branch
diff --git a/gcc/c/ChangeLog.meissner b/gcc/c/ChangeLog.meissner
new file mode 100644
index ..99eff4717850
--- /dev/null
+++ b/gcc/c/ChangeLog.meissner
@@ -0,0 +1,5 @@
+ Branch work188, baseline 
+
+2024-12-02   Michael Meissner  
+
+   Clone branch
diff --git a/gcc/cp/ChangeLog.meissner b/gcc/cp/ChangeLog.meissner
new file mode 100644
index ..99eff4717850
--- /dev/null
+++ b/gcc/cp/ChangeLog.meissner
@@ -0,0 +1,5 @@
+ Branch work188, baseline 
+
+2024-12-02   Michael Meissner  
+
+   Clone branch
diff --git a/gcc/fortran/ChangeLog.meissner b/gcc/fortran/ChangeLog.meissner
new file mode 100644
index ..99eff4717850
--- /dev/null
+++ b/gcc/fortran/ChangeLog.meissner
@@ -0,0 +1,5 @@
+ Branch work188, baseline 
+
+2024-12-02   Michael Meissner  
+
+   Clone branch
diff --git a/gcc/testsuite/ChangeLog.meissner b/gcc/testsuite/ChangeLog.meissner
new file mode 100644
index ..99eff4717850
--- /dev/null
+++ b/gcc/testsuite/ChangeLog.meissner
@@ -0,0 +1,5 @@
+ Branch work188, baseline 
+
+2024-12-02   Michael Meissner  
+
+   Clone branch
diff --git a/libgcc/ChangeLog.meissner b/libgcc/ChangeLog.meissner
new file mode 100644
index ..99eff4717850
--- /dev/null
+++ b/libgcc/ChangeLog.meissner
@@ -0,0 +1,5 @@
+ Branch work188, baseline 
+
+2024-12-02   Michael Meissner  
+
+   Clone branch
diff --git a/libstdc++-v3/ChangeLog.meissner b/libstdc++-v3/ChangeLog.meissner
new file mode 100644
index ..99eff4717850
--- /dev/null
+++ b/libstdc++-v3/ChangeLog.meissner
@@ -0,0 +1,5 @@
+ Branch work188, baseline 
+
+2024-12-02   Michael Meissner  
+
+   Clone branch


[gcc(refs/users/meissner/heads/work188-libs)] Add ChangeLog.libs and update REVISION.

2024-12-02 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:d74c57ac0900a43432f81453b000a6e1ddb98454

commit d74c57ac0900a43432f81453b000a6e1ddb98454
Author: Michael Meissner 
Date:   Mon Dec 2 14:41:55 2024 -0500

Add ChangeLog.libs and update REVISION.

2024-12-02  Michael Meissner  

gcc/

* ChangeLog.libs: New file for branch.
* REVISION: Update.

Diff:
---
 gcc/ChangeLog.libs | 5 +
 gcc/REVISION   | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog.libs b/gcc/ChangeLog.libs
new file mode 100644
index ..2feb12efdb23
--- /dev/null
+++ b/gcc/ChangeLog.libs
@@ -0,0 +1,5 @@
+ Branch work188-libs, baseline 
+
+2024-12-02   Michael Meissner  
+
+   Clone branch
diff --git a/gcc/REVISION b/gcc/REVISION
index d4db5e09c166..9c4f5fc5a7ab 100644
--- a/gcc/REVISION
+++ b/gcc/REVISION
@@ -1 +1 @@
-work188 branch
+work188-libs branch


[gcc] Created branch 'meissner/heads/work188-sha' in namespace 'refs/users'

2024-12-02 Thread Michael Meissner via Gcc-cvs
The branch 'meissner/heads/work188-sha' was created in namespace 'refs/users' 
pointing to:

 d92b2e78950f... Add ChangeLog.meissner and REVISION.


[gcc] Created branch 'meissner/heads/work188-test' in namespace 'refs/users'

2024-12-02 Thread Michael Meissner via Gcc-cvs
The branch 'meissner/heads/work188-test' was created in namespace 'refs/users' 
pointing to:

 d92b2e78950f... Add ChangeLog.meissner and REVISION.


[gcc(refs/users/meissner/heads/work188-test)] Add ChangeLog.test and update REVISION.

2024-12-02 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:af0ca62a9d6f4d4088bc0a040c34be29fe5a1407

commit af0ca62a9d6f4d4088bc0a040c34be29fe5a1407
Author: Michael Meissner 
Date:   Mon Dec 2 14:43:46 2024 -0500

Add ChangeLog.test and update REVISION.

2024-12-02  Michael Meissner  

gcc/

* ChangeLog.test: New file for branch.
* REVISION: Update.

Diff:
---
 gcc/ChangeLog.test | 5 +
 gcc/REVISION   | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog.test b/gcc/ChangeLog.test
new file mode 100644
index ..39c2bf5c53f9
--- /dev/null
+++ b/gcc/ChangeLog.test
@@ -0,0 +1,5 @@
+ Branch work188-test, baseline 
+
+2024-12-02   Michael Meissner  
+
+   Clone branch
diff --git a/gcc/REVISION b/gcc/REVISION
index d4db5e09c166..83751d566db6 100644
--- a/gcc/REVISION
+++ b/gcc/REVISION
@@ -1 +1 @@
-work188 branch
+work188-test branch


[gcc] Created branch 'meissner/heads/work188-orig' in namespace 'refs/users'

2024-12-02 Thread Michael Meissner via Gcc-cvs
The branch 'meissner/heads/work188-orig' was created in namespace 'refs/users' 
pointing to:

 4df8e6fc0cbc... [committed] Add sym-exec subdirectory to configure.in rathe


[gcc(refs/users/meissner/heads/work188-sha)] Add ChangeLog.sha and update REVISION.

2024-12-02 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:57890269720d080810cc35455154499b93b00da8

commit 57890269720d080810cc35455154499b93b00da8
Author: Michael Meissner 
Date:   Mon Dec 2 14:42:48 2024 -0500

Add ChangeLog.sha and update REVISION.

2024-12-02  Michael Meissner  

gcc/

* ChangeLog.sha: New file for branch.
* REVISION: Update.

Diff:
---
 gcc/ChangeLog.sha | 5 +
 gcc/REVISION  | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog.sha b/gcc/ChangeLog.sha
new file mode 100644
index ..0c12046271e5
--- /dev/null
+++ b/gcc/ChangeLog.sha
@@ -0,0 +1,5 @@
+ Branch work188-sha, baseline 
+
+2024-12-02   Michael Meissner  
+
+   Clone branch
diff --git a/gcc/REVISION b/gcc/REVISION
index d4db5e09c166..174385591d8c 100644
--- a/gcc/REVISION
+++ b/gcc/REVISION
@@ -1 +1 @@
-work188 branch
+work188-sha branch


[gcc] Created branch 'meissner/heads/work188-dmf' in namespace 'refs/users'

2024-12-02 Thread Michael Meissner via Gcc-cvs
The branch 'meissner/heads/work188-dmf' was created in namespace 'refs/users' 
pointing to:

 d92b2e78950f... Add ChangeLog.meissner and REVISION.


[gcc(refs/users/meissner/heads/work188-dmf)] Add ChangeLog.dmf and update REVISION.

2024-12-02 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:bc8d78add9fabfd59a482016d3055806e3fe1dbc

commit bc8d78add9fabfd59a482016d3055806e3fe1dbc
Author: Michael Meissner 
Date:   Mon Dec 2 14:39:04 2024 -0500

Add ChangeLog.dmf and update REVISION.

2024-12-02  Michael Meissner  

gcc/

* ChangeLog.dmf: New file for branch.
* REVISION: Update.

Diff:
---
 gcc/ChangeLog.dmf | 5 +
 gcc/REVISION  | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog.dmf b/gcc/ChangeLog.dmf
new file mode 100644
index ..a606d503af09
--- /dev/null
+++ b/gcc/ChangeLog.dmf
@@ -0,0 +1,5 @@
+ Branch work188-dmf, baseline 
+
+2024-12-02   Michael Meissner  
+
+   Clone branch
diff --git a/gcc/REVISION b/gcc/REVISION
index d4db5e09c166..3e28a9228fd5 100644
--- a/gcc/REVISION
+++ b/gcc/REVISION
@@ -1 +1 @@
-work188 branch
+work188-dmf branch


[gcc] Created branch 'meissner/heads/work188-vpair' in namespace 'refs/users'

2024-12-02 Thread Michael Meissner via Gcc-cvs
The branch 'meissner/heads/work188-vpair' was created in namespace 'refs/users' 
pointing to:

 d92b2e78950f... Add ChangeLog.meissner and REVISION.


[gcc(refs/users/meissner/heads/work188-vpair)] Add ChangeLog.vpair and update REVISION.

2024-12-02 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:07f9d40f6aca06b353ed052cea86e23d6694e783

commit 07f9d40f6aca06b353ed052cea86e23d6694e783
Author: Michael Meissner 
Date:   Mon Dec 2 14:39:56 2024 -0500

Add ChangeLog.vpair and update REVISION.

2024-12-02  Michael Meissner  

gcc/

* ChangeLog.vpair: New file for branch.
* REVISION: Update.

Diff:
---
 gcc/ChangeLog.vpair | 5 +
 gcc/REVISION| 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog.vpair b/gcc/ChangeLog.vpair
new file mode 100644
index ..8394f9d97c9c
--- /dev/null
+++ b/gcc/ChangeLog.vpair
@@ -0,0 +1,5 @@
+ Branch work188-vpair, baseline 
+
+2024-12-02   Michael Meissner  
+
+   Clone branch
diff --git a/gcc/REVISION b/gcc/REVISION
index d4db5e09c166..3ff9ab1309f4 100644
--- a/gcc/REVISION
+++ b/gcc/REVISION
@@ -1 +1 @@
-work188 branch
+work188-vpair branch


[gcc] Created branch 'meissner/heads/work188-bugs' in namespace 'refs/users'

2024-12-02 Thread Michael Meissner via Gcc-cvs
The branch 'meissner/heads/work188-bugs' was created in namespace 'refs/users' 
pointing to:

 d92b2e78950f... Add ChangeLog.meissner and REVISION.


[gcc(refs/users/meissner/heads/work188-orig)] Add REVISION.

2024-12-02 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:d04f534aa4eb6789f85b46710388f8d8acba10fa

commit d04f534aa4eb6789f85b46710388f8d8acba10fa
Author: Michael Meissner 
Date:   Mon Dec 2 14:44:50 2024 -0500

Add REVISION.

2024-12-02  Michael Meissner  

gcc/

* REVISION: New file for branch.

Diff:
---
 gcc/REVISION | 1 +
 1 file changed, 1 insertion(+)

diff --git a/gcc/REVISION b/gcc/REVISION
new file mode 100644
index ..17f3e8e67fa4
--- /dev/null
+++ b/gcc/REVISION
@@ -0,0 +1 @@
+work188-orig branch


[gcc(refs/users/meissner/heads/work188)] Change TARGET_FPRND to TARGET_POWER5X.

2024-12-02 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:798cbe52225226dcb604f807497c8dd45f4cd752

commit 798cbe52225226dcb604f807497c8dd45f4cd752
Author: Michael Meissner 
Date:   Mon Dec 2 14:51:00 2024 -0500

Change TARGET_FPRND to TARGET_POWER5X.

This patch changes TARGET_POWER5X to TARGET_POWER5.  The -mfprnd switch is 
not
being changed, just the name of the macros used to determine if the PowerPC
processor supports ISA 2.4 (Power5x).

2024-12-02  Michael Meissner  

gcc/

* gcc/config/rs6000/rs6000.cc (rs6000_option_override_internal):
Change TARGET_FPRND to TARGET_POWER5X.
* gcc/config/rs6000/rs6000.h (TARGET_POWERP5X): New macro.
* gcc/config/rs6000/rs6000.md (fmod3): Change TARGET_FPRND to
TARGET_POWER5X.
(remainder3): Likewise.
(fctiwuz_): Likewise.
(ceil2): Likewise.
(floor2): Likewise.
(round2): Likewise.

Diff:
---
 gcc/config/rs6000/rs6000.cc |  4 ++--
 gcc/config/rs6000/rs6000.h  |  1 +
 gcc/config/rs6000/rs6000.md | 14 +++---
 3 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index de5561d59029..23cc39f7a052 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -3924,7 +3924,7 @@ rs6000_option_override_internal (bool global_init_p)
 rs6000_isa_flags |= (ISA_2_5_MASKS_SERVER & ~ignore_masks);
   else if (TARGET_CMPB)
 rs6000_isa_flags |= (ISA_2_5_MASKS_EMBEDDED & ~ignore_masks);
-  else if (TARGET_FPRND)
+  else if (TARGET_POWER5X)
 rs6000_isa_flags |= (ISA_2_4_MASKS & ~ignore_masks);
   else if (TARGET_POWER5)
 rs6000_isa_flags |= (ISA_2_2_MASKS & ~ignore_masks);
@@ -3951,7 +3951,7 @@ rs6000_option_override_internal (bool global_init_p)
   rs6000_isa_flags &= ~OPTION_MASK_CRYPTO;
 }
 
-  if (!TARGET_FPRND && TARGET_VSX)
+  if (!TARGET_POWER5X && TARGET_VSX)
 {
   if (rs6000_isa_flags_explicit & OPTION_MASK_FPRND)
/* TARGET_VSX = 1 implies Power 7 and newer */
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index 072e0349b338..32c52824c7db 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -501,6 +501,7 @@ extern int rs6000_vector_align[];
 
 /* Convert ISA bits like POPCNTB to PowerPC processors like POWER5.  */
 #define TARGET_POWER5  TARGET_POPCNTB
+#define TARGET_POWER5X TARGET_FPRND
 
 /* In switching from using target_flags to using rs6000_isa_flags, the options
machinery creates OPTION_MASK_ instead of MASK_.  The MASK_
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 8259b3f0f4b8..7f7a622119a2 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -5171,7 +5171,7 @@
(use (match_operand:SFDF 1 "gpc_reg_operand"))
(use (match_operand:SFDF 2 "gpc_reg_operand"))]
   "TARGET_HARD_FLOAT
-   && TARGET_FPRND
+   && TARGET_POWER5X
&& flag_unsafe_math_optimizations"
 {
   rtx div = gen_reg_rtx (mode);
@@ -5189,7 +5189,7 @@
(use (match_operand:SFDF 1 "gpc_reg_operand"))
(use (match_operand:SFDF 2 "gpc_reg_operand"))]
   "TARGET_HARD_FLOAT
-   && TARGET_FPRND
+   && TARGET_POWER5X
&& flag_unsafe_math_optimizations"
 {
   rtx div = gen_reg_rtx (mode);
@@ -6689,7 +6689,7 @@
 (define_insn "*friz"
   [(set (match_operand:DF 0 "gpc_reg_operand" "=d,wa")
(float:DF (fix:DI (match_operand:DF 1 "gpc_reg_operand" "d,wa"]
-  "TARGET_HARD_FLOAT && TARGET_FPRND
+  "TARGET_HARD_FLOAT && TARGET_POWER5X
&& flag_unsafe_math_optimizations && !flag_trapping_math && TARGET_FRIZ"
   "@
friz %0,%1
@@ -6817,7 +6817,7 @@
   [(set (match_operand:SFDF 0 "gpc_reg_operand" "=d,wa")
(unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "d,wa")]
 UNSPEC_FRIZ))]
-  "TARGET_HARD_FLOAT && TARGET_FPRND"
+  "TARGET_HARD_FLOAT && TARGET_POWER5X"
   "@
friz %0,%1
xsrdpiz %x0,%x1"
@@ -6827,7 +6827,7 @@
   [(set (match_operand:SFDF 0 "gpc_reg_operand" "=d,wa")
(unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "d,wa")]
 UNSPEC_FRIP))]
-  "TARGET_HARD_FLOAT && TARGET_FPRND"
+  "TARGET_HARD_FLOAT && TARGET_POWER5X"
   "@
frip %0,%1
xsrdpip %x0,%x1"
@@ -6837,7 +6837,7 @@
   [(set (match_operand:SFDF 0 "gpc_reg_operand" "=d,wa")
(unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "d,wa")]
 UNSPEC_FRIM))]
-  "TARGET_HARD_FLOAT && TARGET_FPRND"
+  "TARGET_HARD_FLOAT && TARGET_POWER5X"
   "@
frim %0,%1
xsrdpim %x0,%x1"
@@ -6848,7 +6848,7 @@
   [(set (match_operand:SFDF 0 "gpc_reg_operand" "=")
(unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "")]
 UNSPEC_FRIN))]
-  "TARGET_HARD_FLOAT && TARGET_FPRND"
+  "TARGET_HARD_FLOAT && TARGET_POWER5X"
   "frin %0,%1"
   [(set_attr "type" "fp")])


[gcc(refs/users/meissner/heads/work188)] Change TARGET_CMPB to TARGET_POWER6.

2024-12-02 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:d0188bec4eef64f9140e89b6c5e479c4a7a79216

commit d0188bec4eef64f9140e89b6c5e479c4a7a79216
Author: Michael Meissner 
Date:   Mon Dec 2 14:53:00 2024 -0500

Change TARGET_CMPB to TARGET_POWER6.

This patch changes TARGET_CMPB to TARGET_POWER6.  The -mcmpb switch is not 
being
changed, just the name of the macros used to determine if the PowerPC 
processor
supports ISA 2.5 (Power6).

2024-12-02  Michael Meissner  

gcc/

* gcc/config/rs6000/rs6000-builtin.cc (rs6000_builtin_is_supported):
Change TARGET_CMPB to TARGET_POWER6.
* gcc/config/rs6000/rs6000.cc (rs6000_option_override_internal):
Likewise.
(rs6000_rtx_costs): Likewise.
(rs6000_emit_parity): Likewise.
* gcc/config/rs6000/rs6000.h (TARGET_FCFID): Likewise.
(TARGET_LFIWAX): Likewise.
(TARGET_POWER6): New macro.
(TARGET_EXTRA_BUILTINS): Change TARGET_CMPB to TARGET_POWER6.
* gcc/config/rs6000/rs6000.md (enabled attribute): Likewise.
(parity2_cmp): Likewise.
(cmpb3): Likewise.
(copysign3): Likewise.
(copysign3_fcpsgn): Likewise.
(cmpstrnsi): Likewise.
(cmpstrsi): Likewise.

Diff:
---
 gcc/config/rs6000/rs6000-builtin.cc |  4 ++--
 gcc/config/rs6000/rs6000.cc |  8 
 gcc/config/rs6000/rs6000.h  |  7 ---
 gcc/config/rs6000/rs6000.md | 16 
 4 files changed, 18 insertions(+), 17 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-builtin.cc 
b/gcc/config/rs6000/rs6000-builtin.cc
index 98a0545030cd..76421bd1de0b 100644
--- a/gcc/config/rs6000/rs6000-builtin.cc
+++ b/gcc/config/rs6000/rs6000-builtin.cc
@@ -157,9 +157,9 @@ rs6000_builtin_is_supported (enum rs6000_gen_builtins 
fncode)
 case ENB_P5:
   return TARGET_POWER5;
 case ENB_P6:
-  return TARGET_CMPB;
+  return TARGET_POWER6;
 case ENB_P6_64:
-  return TARGET_CMPB && TARGET_POWERPC64;
+  return TARGET_POWER6 && TARGET_POWERPC64;
 case ENB_P7:
   return TARGET_POPCNTD;
 case ENB_P7_64:
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 23cc39f7a052..3418fa1d316a 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -3922,7 +3922,7 @@ rs6000_option_override_internal (bool global_init_p)
 rs6000_isa_flags |= (ISA_2_6_MASKS_EMBEDDED & ~ignore_masks);
   else if (TARGET_DFP)
 rs6000_isa_flags |= (ISA_2_5_MASKS_SERVER & ~ignore_masks);
-  else if (TARGET_CMPB)
+  else if (TARGET_POWER6)
 rs6000_isa_flags |= (ISA_2_5_MASKS_EMBEDDED & ~ignore_masks);
   else if (TARGET_POWER5X)
 rs6000_isa_flags |= (ISA_2_4_MASKS & ~ignore_masks);
@@ -4797,7 +4797,7 @@ rs6000_option_override_internal (bool global_init_p)
  DERAT mispredict penalty.  However the LVE and STVE altivec instructions
  need indexed accesses and the type used is the scalar type of the element
  being loaded or stored.  */
-TARGET_AVOID_XFORM = (rs6000_tune == PROCESSOR_POWER6 && TARGET_CMPB
+TARGET_AVOID_XFORM = (rs6000_tune == PROCESSOR_POWER6 && TARGET_POWER6
  && !TARGET_ALTIVEC);
 
   /* Set the -mrecip options.  */
@@ -22372,7 +22372,7 @@ rs6000_rtx_costs (rtx x, machine_mode mode, int 
outer_code,
   return false;
 
 case PARITY:
-  *total = COSTS_N_INSNS (TARGET_CMPB ? 2 : 6);
+  *total = COSTS_N_INSNS (TARGET_POWER6 ? 2 : 6);
   return false;
 
 case NOT:
@@ -23199,7 +23199,7 @@ rs6000_emit_parity (rtx dst, rtx src)
   tmp = gen_reg_rtx (mode);
 
   /* Use the PPC ISA 2.05 prtyw/prtyd instruction if we can.  */
-  if (TARGET_CMPB)
+  if (TARGET_POWER6)
 {
   if (mode == SImode)
{
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index 32c52824c7db..612fd1d77ee2 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -449,12 +449,12 @@ extern int rs6000_vector_align[];
 #define TARGET_FCFID   (TARGET_POWERPC64   \
 || TARGET_PPC_GPOPT/* 970/power4 */\
 || TARGET_POWER5   /* ISA 2.02 */  \
-|| TARGET_CMPB /* ISA 2.05 */  \
+|| TARGET_POWER6   /* ISA 2.05 */  \
 || TARGET_POPCNTD) /* ISA 2.06 */
 
 #define TARGET_FCTIDZ  TARGET_FCFID
 #define TARGET_STFIWX  TARGET_PPC_GFXOPT
-#define TARGET_LFIWAX  TARGET_CMPB
+#define TARGET_LFIWAX  TARGET_POWER6
 #define TARGET_LFIWZX  TARGET_POPCNTD
 #define TARGET_FCFIDS  TARGET_POPCNTD
 #define TARGET_FCFIDU  TARGET_POPCNTD
@@ -502,6 +502,7 @@ extern int rs6000_vector_align[];
 /* Convert ISA bits like POPCNTB to PowerPC processors like POWER5.  */
 #define TARGET_POWER5  TARGET_POPCNTB
 #define TARGET_POWER5X TARGET_FPRND
+#define TARGET_POWER6 

[gcc(refs/users/meissner/heads/work188)] Change TARGET_POPCNTD to TARGET_POWER7.

2024-12-02 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:ff12ee1499c4f8a6e47d7b9e81bc1fe0c1fc576f

commit ff12ee1499c4f8a6e47d7b9e81bc1fe0c1fc576f
Author: Michael Meissner 
Date:   Mon Dec 2 14:53:47 2024 -0500

Change TARGET_POPCNTD to TARGET_POWER7.

This patch changes TARGET_POPCNTD to TARGET_POWER7.  The -mpopcntd switch 
is not
being changed, just the name of the macros used to determine if the PowerPC
processor supports ISA 2.6 (Power7).

2024-12-02  Michael Meissner  

gcc/

* gcc/config/rs6000/dfp.md (cmp_internal1): Change 
TARGET_POPCNTD
to TARGET_POWER7.
* gcc/config/rs6000/rs6000-builtin.cc (rs6000_builtin_is_supported):
Likewise.
* gcc/config/rs6000/rs6000-string.cc (expand_block_compare): 
Likewise.
* gcc/config/rs6000/rs6000.cc (rs6000_hard_regno_mode_ok_uncached):
Likewise.
(rs6000_option_override_internal): Likewise.
(rs6000_rtx_costs): Likewise.
* gcc/config/rs6000/rs6000.h (TARGET_LDBRX): Likewise.
(TARGET_FCFID): Likewise.
(TARGET_LFIWZX): Likewise.
(TARGET_FCFIDS): Likewise.
(TARGET_FCFIDU): Likewise.
(TARGET_FCFIDUS): Likewise.
(TARGET_FCTIDUZ): Likewise.
(TARGET_FCTIWUZ): Likewise.
(TARGET_FCTIDUZ): Likewise.
(TARGET_POWER7): New macro.
(TARGET_EXTRA_BUILTINS): Change TARGET_POPCNTD to TARGET_POWER7.
(CTZ_DEFINED_VALUE_AT_ZERO): Likewise.
* gcc/config/rs6000/rs6000.md (enabled attribute): Likewise.
(lrintsi2): Likewise.
(lrintsi): Likewise.
(lrintsi_di): Likewise.
(cmpmemsi): Likewise.
(bpermd_): Likewise.
(addg6s): Likewise.
(cdtbcd): Likewise.
(cbcdtd): Likewise.
(div_): Likewise.

Diff:
---
 gcc/config/rs6000/dfp.md|  2 +-
 gcc/config/rs6000/rs6000-builtin.cc |  4 ++--
 gcc/config/rs6000/rs6000-string.cc  |  2 +-
 gcc/config/rs6000/rs6000.cc |  8 
 gcc/config/rs6000/rs6000.h  | 21 +++--
 gcc/config/rs6000/rs6000.md | 20 ++--
 6 files changed, 29 insertions(+), 28 deletions(-)

diff --git a/gcc/config/rs6000/dfp.md b/gcc/config/rs6000/dfp.md
index fa9d7dd45dd3..b8189390d410 100644
--- a/gcc/config/rs6000/dfp.md
+++ b/gcc/config/rs6000/dfp.md
@@ -214,7 +214,7 @@
 (define_insn "floatdidd2"
   [(set (match_operand:DD 0 "gpc_reg_operand" "=d")
(float:DD (match_operand:DI 1 "gpc_reg_operand" "d")))]
-  "TARGET_DFP && TARGET_POPCNTD"
+  "TARGET_DFP && TARGET_POWER7"
   "dcffix %0,%1"
   [(set_attr "type" "dfp")])
 
diff --git a/gcc/config/rs6000/rs6000-builtin.cc 
b/gcc/config/rs6000/rs6000-builtin.cc
index 76421bd1de0b..dae43b672ea7 100644
--- a/gcc/config/rs6000/rs6000-builtin.cc
+++ b/gcc/config/rs6000/rs6000-builtin.cc
@@ -161,9 +161,9 @@ rs6000_builtin_is_supported (enum rs6000_gen_builtins 
fncode)
 case ENB_P6_64:
   return TARGET_POWER6 && TARGET_POWERPC64;
 case ENB_P7:
-  return TARGET_POPCNTD;
+  return TARGET_POWER7;
 case ENB_P7_64:
-  return TARGET_POPCNTD && TARGET_POWERPC64;
+  return TARGET_POWER7 && TARGET_POWERPC64;
 case ENB_P8:
   return TARGET_POWER8;
 case ENB_P8V:
diff --git a/gcc/config/rs6000/rs6000-string.cc 
b/gcc/config/rs6000/rs6000-string.cc
index de618da9b5dc..b633d80110d0 100644
--- a/gcc/config/rs6000/rs6000-string.cc
+++ b/gcc/config/rs6000/rs6000-string.cc
@@ -1949,7 +1949,7 @@ bool
 expand_block_compare (rtx operands[])
 {
   /* TARGET_POPCNTD is already guarded at expand cmpmemsi.  */
-  gcc_assert (TARGET_POPCNTD);
+  gcc_assert (TARGET_POWER7);
 
   /* For P8, this case is complicated to handle because the subtract
  with carry instructions do not generate the 64-bit carry and so
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 3418fa1d316a..786785d19ba1 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -1924,7 +1924,7 @@ rs6000_hard_regno_mode_ok_uncached (int regno, 
machine_mode mode)
  if(GET_MODE_SIZE (mode) == UNITS_PER_FP_WORD)
return 1;
 
- if (TARGET_POPCNTD && mode == SImode)
+ if (TARGET_POWER7 && mode == SImode)
return 1;
 
  if (TARGET_P9_VECTOR && (mode == QImode || mode == HImode))
@@ -3918,7 +3918,7 @@ rs6000_option_override_internal (bool global_init_p)
 rs6000_isa_flags |= (ISA_2_7_MASKS_SERVER & ~ignore_masks);
   else if (TARGET_VSX)
 rs6000_isa_flags |= (ISA_2_6_MASKS_SERVER & ~ignore_masks);
-  else if (TARGET_POPCNTD)
+  else if (TARGET_POWER7)
 rs6000_isa_flags |= (ISA_2_6_MASKS_EMBEDDED & ~ignore_masks);
   else if (TARGET_DFP)
 rs6000_isa_flags |= (ISA_2_5_MASKS_SERVER & ~ignore_masks);
@@ -4131,7 +4131,7 @@ rs6000_option_override_internal (bool global_init_p)
   else if (TARGET_LONG_DOUBLE_128)
  

[gcc] Created branch 'meissner/heads/work188' in namespace 'refs/users'

2024-12-02 Thread Michael Meissner via Gcc-cvs
The branch 'meissner/heads/work188' was created in namespace 'refs/users' 
pointing to:

 4df8e6fc0cbc... [committed] Add sym-exec subdirectory to configure.in rathe


[gcc(refs/users/meissner/heads/work188)] Change TARGET_MODULO to TARGET_POWER9.

2024-12-02 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:5c3cbdbcdfbac90c61bafdf610729dea0f28a6a2

commit 5c3cbdbcdfbac90c61bafdf610729dea0f28a6a2
Author: Michael Meissner 
Date:   Mon Dec 2 14:54:41 2024 -0500

Change TARGET_MODULO to TARGET_POWER9.

This patch changes TARGET_MODULO to TARGET_POWER9.  The -mmodulo switch is 
not
being changed, just the name of the macros used to determine if the PowerPC
processor supports ISA 3.0 (Power9).

2024-12-02  Michael Meissner  

gcc/

* gcc/config/rs6000/rs6000-builtin.cc (rs6000_builtin_is_supported):
Change TARGET_MODULO to TARGET_POWER9.
* gcc/config/rs6000/rs6000.cc (rs6000_option_override_internal):
Likewise.
* gcc/config/rs6000/rs6000.h (TARGET_CTZ): Likewise.
(TARGET_EXTSWSLI): Likewise.
(TARGET_MADDLD): Likewise.
(TARGET_POWER9): New macro.
* gcc/config/rs6000/rs6000.md (enabled attribute): Change 
TARGET_MODULO
to TARGET_POWER9.
(mod3): Likewise.
(umod3): Likewise.
(divide/modulo peephole2): Likewise.

Diff:
---
 gcc/config/rs6000/rs6000-builtin.cc |  4 ++--
 gcc/config/rs6000/rs6000.cc |  4 ++--
 gcc/config/rs6000/rs6000.h  |  7 ---
 gcc/config/rs6000/rs6000.md | 14 +++---
 4 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-builtin.cc 
b/gcc/config/rs6000/rs6000-builtin.cc
index dae43b672ea7..b6093b3cb64c 100644
--- a/gcc/config/rs6000/rs6000-builtin.cc
+++ b/gcc/config/rs6000/rs6000-builtin.cc
@@ -169,9 +169,9 @@ rs6000_builtin_is_supported (enum rs6000_gen_builtins 
fncode)
 case ENB_P8V:
   return TARGET_P8_VECTOR;
 case ENB_P9:
-  return TARGET_MODULO;
+  return TARGET_POWER9;
 case ENB_P9_64:
-  return TARGET_MODULO && TARGET_POWERPC64;
+  return TARGET_POWER9 && TARGET_POWERPC64;
 case ENB_P9V:
   return TARGET_P9_VECTOR;
 case ENB_P10:
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 786785d19ba1..3da6b4233d09 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -3888,7 +3888,7 @@ rs6000_option_override_internal (bool global_init_p)
 
   /* For the newer switches (vsx, dfp, etc.) set some of the older options,
  unless the user explicitly used the -mno- to disable the code.  */
-  if (TARGET_P9_VECTOR || TARGET_MODULO || TARGET_P9_MISC)
+  if (TARGET_P9_VECTOR || TARGET_POWER9 || TARGET_P9_MISC)
 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
   else if (TARGET_P9_MINMAX)
 {
@@ -22353,7 +22353,7 @@ rs6000_rtx_costs (rtx x, machine_mode mode, int 
outer_code,
*total = rs6000_cost->divsi;
}
   /* Add in shift and subtract for MOD unless we have a mod instruction. */
-  if ((!TARGET_MODULO
+  if ((!TARGET_POWER9
   || (RS6000_DISABLE_SCALAR_MODULO && SCALAR_INT_MODE_P (mode)))
 && (code == MOD || code == UMOD))
*total += COSTS_N_INSNS (2);
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index 954cfe633c27..1ed04db52ab5 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -463,9 +463,9 @@ extern int rs6000_vector_align[];
 #define TARGET_FCTIWUZ TARGET_POWER7
 /* Only powerpc64 and powerpc476 support fctid.  */
 #define TARGET_FCTID   (TARGET_POWERPC64 || rs6000_cpu == PROCESSOR_PPC476)
-#define TARGET_CTZ TARGET_MODULO
-#define TARGET_EXTSWSLI(TARGET_MODULO && TARGET_POWERPC64)
-#define TARGET_MADDLD  TARGET_MODULO
+#define TARGET_CTZ TARGET_POWER9
+#define TARGET_EXTSWSLI(TARGET_POWER9 && TARGET_POWERPC64)
+#define TARGET_MADDLD  TARGET_POWER9
 
 /* TARGET_DIRECT_MOVE is redundant to TARGET_P8_VECTOR, so alias it to that.  
*/
 #define TARGET_DIRECT_MOVE TARGET_P8_VECTOR
@@ -504,6 +504,7 @@ extern int rs6000_vector_align[];
 #define TARGET_POWER5X TARGET_FPRND
 #define TARGET_POWER6  TARGET_CMPB
 #define TARGET_POWER7  TARGET_POPCNTD
+#define TARGET_POWER9  TARGET_MODULO
 
 /* In switching from using target_flags to using rs6000_isa_flags, the options
machinery creates OPTION_MASK_ instead of MASK_.  The MASK_
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index deb83ce64361..da53b3853147 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -403,7 +403,7 @@
  (const_int 1)
 
  (and (eq_attr "isa" "p9")
- (match_test "TARGET_MODULO"))
+ (match_test "TARGET_POWER9"))
  (const_int 1)
 
  (and (eq_attr "isa" "p9v")
@@ -3457,7 +3457,7 @@
   || INTVAL (operands[2]) <= 0
   || (i = exact_log2 (INTVAL (operands[2]))) < 0)
 {
-  if (!TARGET_MODULO)
+  if (!TARGET_POWER9)
FAIL;
 
   operands[2] = force_reg (mode, operands[2]);
@@ -3491,7 +3491,7 @@
   [(set (match_operand:GPR 0 "gpc_reg_operand" "=&r,r")
 (mod:GPR (match_opera

[gcc(refs/users/meissner/heads/work188)] Change TARGET_POPCNTB to TARGET_POWER5.

2024-12-02 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:96a8105e6a30ef9962ebd25248f4b4f71eaef5cd

commit 96a8105e6a30ef9962ebd25248f4b4f71eaef5cd
Author: Michael Meissner 
Date:   Mon Dec 2 14:50:03 2024 -0500

Change TARGET_POPCNTB to TARGET_POWER5.

This patch changes TARGET_POPCNTB to TARGET_POWER5.  The -mpopcntb switch 
is not
being changed in this patch, just the name of the macros used to determine 
if
the PowerPC processor supports ISA 2.2 (Power5).

2024-12-02  Michael Meissner  

gcc/

* gcc/config/rs6000/rs6000-builtin.cc (rs6000_builtin_is_supported):
Change TARGET_POPCNTB to TARGET_POWER5.
* gcc/config/rs6000/rs6000.cc (rs6000_option_override_internal):
Likewise.
* gcc/config/rs6000/rs6000.h (TARGET_FCFID): Likewise.
(TARGET_POWER5): New macro.
(TARGET_EXTRA_BUILTINS): Change TARGET_POPCNTB to TARGET_POWER5.
(TARGET_FRE): Likewise.
(TARGET_FRSQRTES): Likewise.
* gcc/config/rs6000/rs6000.md (enabled attribute): Likewise.

Diff:
---
 gcc/config/rs6000/rs6000-builtin.cc |  2 +-
 gcc/config/rs6000/rs6000.cc |  2 +-
 gcc/config/rs6000/rs6000.h  | 11 +++
 gcc/config/rs6000/rs6000.md |  2 +-
 4 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-builtin.cc 
b/gcc/config/rs6000/rs6000-builtin.cc
index 9bdbae1ecf94..98a0545030cd 100644
--- a/gcc/config/rs6000/rs6000-builtin.cc
+++ b/gcc/config/rs6000/rs6000-builtin.cc
@@ -155,7 +155,7 @@ rs6000_builtin_is_supported (enum rs6000_gen_builtins 
fncode)
 case ENB_ALWAYS:
   return true;
 case ENB_P5:
-  return TARGET_POPCNTB;
+  return TARGET_POWER5;
 case ENB_P6:
   return TARGET_CMPB;
 case ENB_P6_64:
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 02a2f1152dbe..de5561d59029 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -3926,7 +3926,7 @@ rs6000_option_override_internal (bool global_init_p)
 rs6000_isa_flags |= (ISA_2_5_MASKS_EMBEDDED & ~ignore_masks);
   else if (TARGET_FPRND)
 rs6000_isa_flags |= (ISA_2_4_MASKS & ~ignore_masks);
-  else if (TARGET_POPCNTB)
+  else if (TARGET_POWER5)
 rs6000_isa_flags |= (ISA_2_2_MASKS & ~ignore_masks);
   else if (TARGET_ALTIVEC)
 rs6000_isa_flags |= (OPTION_MASK_PPC_GFXOPT & ~ignore_masks);
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index 926b6b2180ec..072e0349b338 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -448,7 +448,7 @@ extern int rs6000_vector_align[];
Enable 32-bit fcfid's on any of the switches for newer ISA machines.  */
 #define TARGET_FCFID   (TARGET_POWERPC64   \
 || TARGET_PPC_GPOPT/* 970/power4 */\
-|| TARGET_POPCNTB  /* ISA 2.02 */  \
+|| TARGET_POWER5   /* ISA 2.02 */  \
 || TARGET_CMPB /* ISA 2.05 */  \
 || TARGET_POPCNTD) /* ISA 2.06 */
 
@@ -499,6 +499,9 @@ extern int rs6000_vector_align[];
 #define TARGET_MINMAX  (TARGET_HARD_FLOAT && TARGET_PPC_GFXOPT \
 && (TARGET_P9_MINMAX || !flag_trapping_math))
 
+/* Convert ISA bits like POPCNTB to PowerPC processors like POWER5.  */
+#define TARGET_POWER5  TARGET_POPCNTB
+
 /* In switching from using target_flags to using rs6000_isa_flags, the options
machinery creates OPTION_MASK_ instead of MASK_.  The MASK_
options that have not yet been replaced by their OPTION_MASK_
@@ -525,7 +528,7 @@ extern int rs6000_vector_align[];
 
 #define TARGET_EXTRA_BUILTINS  (TARGET_POWERPC64\
 || TARGET_PPC_GPOPT /* 970/power4 */\
-|| TARGET_POPCNTB   /* ISA 2.02 */  \
+|| TARGET_POWER5/* ISA 2.02 */  \
 || TARGET_CMPB  /* ISA 2.05 */  \
 || TARGET_POPCNTD   /* ISA 2.06 */  \
 || TARGET_ALTIVEC   \
@@ -541,9 +544,9 @@ extern int rs6000_vector_align[];
 #define TARGET_FRES(TARGET_HARD_FLOAT && TARGET_PPC_GFXOPT)
 
 #define TARGET_FRE (TARGET_HARD_FLOAT \
-&& (TARGET_POPCNTB || VECTOR_UNIT_VSX_P (DFmode)))
+&& (TARGET_POWER5 || VECTOR_UNIT_VSX_P (DFmode)))
 
-#define TARGET_FRSQRTES(TARGET_HARD_FLOAT && TARGET_POPCNTB \
+#define TARGET_FRSQRTES(TARGET_HARD_FLOAT && TARGET_POWER5 \
 && TARGET_PPC_GFXOPT)
 
 #define TARGET_FRSQRTE (TARGET_HARD_FLOAT \
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index edccd7817ae1..8259b3f0f4b8 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6

[gcc(refs/users/meissner/heads/work188)] Add support for -mcpu=future

2024-12-02 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:2114420f64725dd6a6abd215c4c86ab2fed5809a

commit 2114420f64725dd6a6abd215c4c86ab2fed5809a
Author: Michael Meissner 
Date:   Mon Dec 2 14:56:29 2024 -0500

Add support for -mcpu=future

This patch adds the support that can be used in developing GCC support for
future PowerPC processors.

2024-12-02  Michael Meissner  

* config.gcc (powerpc*-*-*): Add support for --with-cpu=future.
* config/rs6000/aix71.h (ASM_CPU_SPEC): Add support for 
-mcpu=future.
* config/rs6000/aix72.h (ASM_CPU_SPEC): Likewise.
* config/rs6000/aix73.h (ASM_CPU_SPEC): Likewise.
* config/rs6000/driver-rs6000.cc (asm_names): Likewise.
* config/rs6000/rs6000-c.cc (rs6000_target_modify_macros): If
-mcpu=future, define _ARCH_FUTURE.
* config/rs6000/rs6000-cpus.def (FUTURE_MASKS_SERVER): New macro.
(POWERPC_MASKS): Add OPTION_MASK_FUTURE.
(future cpu): Define.
* config/rs6000/rs6000-opts.h (enum processor_type): Add
PROCESSOR_FUTURE.
* config/rs6000/rs6000-tables.opt: Regenerate.
* config/rs6000/rs6000.cc (power10_cost): Update comment.
(get_arch_flags): Add support for future processor.
(rs6000_option_override_internal): Likewise.
(rs6000_machine_from_flags): Likewise.
(rs6000_reassociation_width): Likewise.
(rs6000_adjust_cost): Likewise.
(rs6000_issue_rate): Likewise.
(rs6000_sched_reorder): Likewise.
(rs6000_sched_reorder2): Likewise.
(rs6000_register_move_cost): Likewise.
(rs6000_opt_masks): Add -mfuture.
* config/rs6000/rs6000.h (ASM_CPU_SPEC): Likewise.
* config/rs6000/rs6000.md (cpu attribute): Likewise.
* config/rs6000/rs6000.opt (-mfuture): New internal option.

Diff:
---
 gcc/config.gcc  |  4 ++--
 gcc/config/rs6000/aix71.h   |  1 +
 gcc/config/rs6000/aix72.h   |  1 +
 gcc/config/rs6000/aix73.h   |  1 +
 gcc/config/rs6000/driver-rs6000.cc  |  2 ++
 gcc/config/rs6000/rs6000-c.cc   |  2 ++
 gcc/config/rs6000/rs6000-cpus.def   |  5 +
 gcc/config/rs6000/rs6000-opts.h |  1 +
 gcc/config/rs6000/rs6000-tables.opt | 11 +++
 gcc/config/rs6000/rs6000.cc | 30 ++
 gcc/config/rs6000/rs6000.h  |  1 +
 gcc/config/rs6000/rs6000.md |  2 +-
 gcc/config/rs6000/rs6000.opt|  6 ++
 13 files changed, 52 insertions(+), 15 deletions(-)

diff --git a/gcc/config.gcc b/gcc/config.gcc
index afa78453197a..6c1cd665ab2c 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -536,7 +536,7 @@ powerpc*-*-*)
extra_headers="${extra_headers} ppu_intrinsics.h spu2vmx.h vec_types.h 
si2vmx.h"
extra_headers="${extra_headers} amo.h"
case x$with_cpu in
-   
xpowerpc64|xdefault64|x6[23]0|x970|xG5|xpower[3456789]|xpower1[01]|xpower6x|xrs64a|xcell|xa2|xe500mc64|xe5500|xe6500)
+   
xpowerpc64|xdefault64|x6[23]0|x970|xG5|xpower[3456789]|xpower1[01]|xpower6x|xrs64a|xcell|xa2|xe500mc64|xe5500|xe6500|xfuture)
cpu_is_64bit=yes
;;
esac
@@ -5615,7 +5615,7 @@ case "${target}" in
tm_defines="${tm_defines} CONFIG_PPC405CR"
eval "with_$which=405"
;;
-   "" | common | native \
+   "" | common | native | future \
| power[3456789] | power1[01] | power5+ | power6x \
| powerpc | powerpc64 | powerpc64le \
| rs64 \
diff --git a/gcc/config/rs6000/aix71.h b/gcc/config/rs6000/aix71.h
index 4350dcd89524..505986b33d63 100644
--- a/gcc/config/rs6000/aix71.h
+++ b/gcc/config/rs6000/aix71.h
@@ -79,6 +79,7 @@ do {  
\
 #undef ASM_CPU_SPEC
 #define ASM_CPU_SPEC \
 "%{mcpu=native: %(asm_cpu_native); \
+  mcpu=future: -mfuture; \
   mcpu=power11: -mpwr11; \
   mcpu=power10: -mpwr10; \
   mcpu=power9: -mpwr9; \
diff --git a/gcc/config/rs6000/aix72.h b/gcc/config/rs6000/aix72.h
index fe59f8319b48..242ca94bd065 100644
--- a/gcc/config/rs6000/aix72.h
+++ b/gcc/config/rs6000/aix72.h
@@ -79,6 +79,7 @@ do {  
\
 #undef ASM_CPU_SPEC
 #define ASM_CPU_SPEC \
 "%{mcpu=native: %(asm_cpu_native); \
+  mcpu=future: -mfuture; \
   mcpu=power11: -mpwr11; \
   mcpu=power10: -mpwr10; \
   mcpu=power9: -mpwr9; \
diff --git a/gcc/config/rs6000/aix73.h b/gcc/config/rs6000/aix73.h
index 1318b0b3662d..2bd6b4bb3c4f 100644
--- a/gcc/config/rs6000/aix73.h
+++ b/gcc/config/rs6000/aix73.h
@@ -79,6 +79,7 @@ do {  
\
 #undef ASM_CPU_SPEC
 #define ASM_CPU_SPEC \
 "%{mcpu=nativ

[gcc(refs/users/meissner/heads/work188-bugs)] Add ChangeLog.bugs and update REVISION.

2024-12-02 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:70613e2aed6bd90432a1e73e5bcd66191bf92b47

commit 70613e2aed6bd90432a1e73e5bcd66191bf92b47
Author: Michael Meissner 
Date:   Mon Dec 2 14:41:02 2024 -0500

Add ChangeLog.bugs and update REVISION.

2024-12-02  Michael Meissner  

gcc/

* ChangeLog.bugs: New file for branch.
* REVISION: Update.

Diff:
---
 gcc/ChangeLog.bugs | 5 +
 gcc/REVISION   | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog.bugs b/gcc/ChangeLog.bugs
new file mode 100644
index ..6627fb747e1b
--- /dev/null
+++ b/gcc/ChangeLog.bugs
@@ -0,0 +1,5 @@
+ Branch work188-bugs, baseline 
+
+2024-12-02   Michael Meissner  
+
+   Clone branch
diff --git a/gcc/REVISION b/gcc/REVISION
index d4db5e09c166..13ce8a10f646 100644
--- a/gcc/REVISION
+++ b/gcc/REVISION
@@ -1 +1 @@
-work188 branch
+work188-bugs branch


[gcc(refs/users/meissner/heads/work188)] Use vector pair load/store for memcpy with -mcpu=future

2024-12-02 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:70521420b0487fd67f26566b19b2666290d14d4c

commit 70521420b0487fd67f26566b19b2666290d14d4c
Author: Michael Meissner 
Date:   Mon Dec 2 14:59:55 2024 -0500

Use vector pair load/store for memcpy with -mcpu=future

In the development for the power10 processor, GCC did not enable using the 
load
vector pair and store vector pair instructions when optimizing things like
memory copy.  This patch enables using those instructions if -mcpu=future is
used.

2024-12-02  Michael Meissner  

gcc/

* config/rs6000/rs6000-cpus.def (ISA_FUTURE_MASKS_SERVER): Enable 
using
load vector pair and store vector pair instructions for memory copy
operations.
(POWERPC_MASKS): Make the bit for enabling using load vector pair 
and
store vector pair operations set and reset when the PowerPC 
processor is
changed.
* gcc/config/rs6000/rs6000.cc (rs6000_machine_from_flags): Disable
-mblock-ops-vector-pair from influcing .machine selection.

gcc/testsuite/

* gcc.target/powerpc/future-3.c: New test.

Diff:
---
 gcc/config/rs6000/rs6000-cpus.def   |  4 +++-
 gcc/config/rs6000/rs6000.cc |  2 +-
 gcc/testsuite/gcc.target/powerpc/future-3.c | 22 ++
 3 files changed, 26 insertions(+), 2 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-cpus.def 
b/gcc/config/rs6000/rs6000-cpus.def
index 354c1d8de4f0..2f189dd416ca 100644
--- a/gcc/config/rs6000/rs6000-cpus.def
+++ b/gcc/config/rs6000/rs6000-cpus.def
@@ -84,7 +84,8 @@
  | OPTION_MASK_POWER11)
 
 #define FUTURE_MASKS_SERVER(POWER11_MASKS_SERVER   \
-| OPTION_MASK_FUTURE)
+| OPTION_MASK_FUTURE   \
+| OPTION_MASK_BLOCK_OPS_VECTOR_PAIR)
 
 /* Flags that need to be turned off if -mno-vsx.  */
 #define OTHER_VSX_VECTOR_MASKS (OPTION_MASK_EFFICIENT_UNALIGNED_VSX\
@@ -114,6 +115,7 @@
 
 /* Mask of all options to set the default isa flags based on -mcpu=.  */
 #define POWERPC_MASKS  (OPTION_MASK_ALTIVEC\
+| OPTION_MASK_BLOCK_OPS_VECTOR_PAIR\
 | OPTION_MASK_CMPB \
 | OPTION_MASK_CRYPTO   \
 | OPTION_MASK_DFP  \
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 331e3cf2f24d..83e8141cf98c 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -5908,7 +5908,7 @@ rs6000_machine_from_flags (void)
 
   /* Disable the flags that should never influence the .machine selection.  */
   flags &= ~(OPTION_MASK_PPC_GFXOPT | OPTION_MASK_PPC_GPOPT | OPTION_MASK_ISEL
-| OPTION_MASK_ALTIVEC);
+| OPTION_MASK_ALTIVEC | OPTION_MASK_BLOCK_OPS_VECTOR_PAIR);
 
   if ((flags & (FUTURE_MASKS_SERVER & ~ISA_3_1_MASKS_SERVER)) != 0)
 return "future";
diff --git a/gcc/testsuite/gcc.target/powerpc/future-3.c 
b/gcc/testsuite/gcc.target/powerpc/future-3.c
new file mode 100644
index ..afa8b96d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/future-3.c
@@ -0,0 +1,22 @@
+/* 32-bit doesn't generate vector pair instructions.  */
+/* { dg-do compile { target lp64 } } */
+/* { dg-options "-mdejagnu-cpu=future -O2" } */
+
+/* Test to see that memcpy will use load/store vector pair with
+   -mcpu=future.  */
+
+#ifndef SIZE
+#define SIZE 4
+#endif
+
+extern vector double to[SIZE], from[SIZE];
+
+void
+copy (void)
+{
+  __builtin_memcpy (to, from, sizeof (to));
+  return;
+}
+
+/* { dg-final { scan-assembler {\mlxvpx?\M}  } } */
+/* { dg-final { scan-assembler {\mstxvpx?\M} } } */


[gcc(refs/users/meissner/heads/work188)] Add -mcpu=future tests.

2024-12-02 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:5fbb82a5962b3e1777aa15ee9409b3824ae50694

commit 5fbb82a5962b3e1777aa15ee9409b3824ae50694
Author: Michael Meissner 
Date:   Mon Dec 2 14:59:01 2024 -0500

Add -mcpu=future tests.

This patch adds simple tests for -mcpu=future.

2024-12-02  Michael Meissner  

gcc/testsuite/

* gcc.target/powerpc/future-1.c: New test.
* gcc.target/powerpc/future-2.c: Likewise.

Diff:
---
 gcc/testsuite/gcc.target/powerpc/future-1.c | 13 +
 gcc/testsuite/gcc.target/powerpc/future-2.c | 24 
 2 files changed, 37 insertions(+)

diff --git a/gcc/testsuite/gcc.target/powerpc/future-1.c 
b/gcc/testsuite/gcc.target/powerpc/future-1.c
new file mode 100644
index ..f1b940d7bebf
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/future-1.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-mdejagnu-cpu=future -O2" } */
+
+/* Basic check to see if the compiler supports -mcpu=future and if it defines
+   _ARCH_PWR11.  */
+
+#ifndef _ARCH_FUTURE
+#error "-mcpu=future is not supported"
+#endif
+
+void foo (void)
+{
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/future-2.c 
b/gcc/testsuite/gcc.target/powerpc/future-2.c
new file mode 100644
index ..5552cefa3c2e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/future-2.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+/* Check if we can set the future target via a target attribute.  */
+
+__attribute__((__target__("cpu=power9")))
+void foo_p9 (void)
+{
+}
+
+__attribute__((__target__("cpu=power10")))
+void foo_p10 (void)
+{
+}
+
+__attribute__((__target__("cpu=power11")))
+void foo_p11 (void)
+{
+}
+
+__attribute__((__target__("cpu=future")))
+void foo_future (void)
+{
+}


[gcc(refs/users/meissner/heads/work188)] Add -mcpu=future tuning support.

2024-12-02 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:37d4ea1fab95f387b5dc1394b8b544848adbaa5d

commit 37d4ea1fab95f387b5dc1394b8b544848adbaa5d
Author: Michael Meissner 
Date:   Mon Dec 2 14:58:20 2024 -0500

Add -mcpu=future tuning support.

This patch makes -mtune=future use the same tuning decision as 
-mtune=power11.

2024-12-02  Michael Meissner  

gcc/

* config/rs6000/power10.md (all reservations): Add future as an
alterntive to power10 and power11.

Diff:
---
 gcc/config/rs6000/power10.md | 144 +--
 1 file changed, 72 insertions(+), 72 deletions(-)

diff --git a/gcc/config/rs6000/power10.md b/gcc/config/rs6000/power10.md
index 2310c4603457..e42b057dc45b 100644
--- a/gcc/config/rs6000/power10.md
+++ b/gcc/config/rs6000/power10.md
@@ -1,4 +1,4 @@
-;; Scheduling description for the IBM Power10 and Power11 processors.
+;; Scheduling description for the IBM Power10, Power11, and Future processors.
 ;; Copyright (C) 2020-2024 Free Software Foundation, Inc.
 ;;
 ;; Contributed by Pat Haugen (pthau...@us.ibm.com).
@@ -97,12 +97,12 @@
(eq_attr "update" "no")
(eq_attr "size" "!128")
(eq_attr "prefixed" "no")
-   (eq_attr "cpu" "power10,power11"))
+   (eq_attr "cpu" "power10,power11,future"))
   "DU_any_power10,LU_power10")
 
 (define_insn_reservation "power10-fused-load" 4
   (and (eq_attr "type" "fused_load_cmpi,fused_addis_load,fused_load_load")
-   (eq_attr "cpu" "power10,power11"))
+   (eq_attr "cpu" "power10,power11,future"))
   "DU_even_power10,LU_power10")
 
 (define_insn_reservation "power10-prefixed-load" 4
@@ -110,13 +110,13 @@
(eq_attr "update" "no")
(eq_attr "size" "!128")
(eq_attr "prefixed" "yes")
-   (eq_attr "cpu" "power10,power11"))
+   (eq_attr "cpu" "power10,power11,future"))
   "DU_even_power10,LU_power10")
 
 (define_insn_reservation "power10-load-update" 4
   (and (eq_attr "type" "load")
(eq_attr "update" "yes")
-   (eq_attr "cpu" "power10,power11"))
+   (eq_attr "cpu" "power10,power11,future"))
   "DU_even_power10,LU_power10+SXU_power10")
 
 (define_insn_reservation "power10-fpload-double" 4
@@ -124,7 +124,7 @@
(eq_attr "update" "no")
(eq_attr "size" "64")
(eq_attr "prefixed" "no")
-   (eq_attr "cpu" "power10,power11"))
+   (eq_attr "cpu" "power10,power11,future"))
   "DU_any_power10,LU_power10")
 
 (define_insn_reservation "power10-prefixed-fpload-double" 4
@@ -132,14 +132,14 @@
(eq_attr "update" "no")
(eq_attr "size" "64")
(eq_attr "prefixed" "yes")
-   (eq_attr "cpu" "power10,power11"))
+   (eq_attr "cpu" "power10,power11,future"))
   "DU_even_power10,LU_power10")
 
 (define_insn_reservation "power10-fpload-update-double" 4
   (and (eq_attr "type" "fpload")
(eq_attr "update" "yes")
(eq_attr "size" "64")
-   (eq_attr "cpu" "power10,power11"))
+   (eq_attr "cpu" "power10,power11,future"))
   "DU_even_power10,LU_power10+SXU_power10")
 
 ; SFmode loads are cracked and have additional 3 cycles over DFmode
@@ -148,27 +148,27 @@
   (and (eq_attr "type" "fpload")
(eq_attr "update" "no")
(eq_attr "size" "32")
-   (eq_attr "cpu" "power10,power11"))
+   (eq_attr "cpu" "power10,power11,future"))
   "DU_even_power10,LU_power10")
 
 (define_insn_reservation "power10-fpload-update-single" 7
   (and (eq_attr "type" "fpload")
(eq_attr "update" "yes")
(eq_attr "size" "32")
-   (eq_attr "cpu" "power10,power11"))
+   (eq_attr "cpu" "power10,power11,future"))
   "DU_even_power10,LU_power10+SXU_power10")
 
 (define_insn_reservation "power10-vecload" 4
   (and (eq_attr "type" "vecload")
(eq_attr "size" "!256")
-   (eq_attr "cpu" "power10,power11"))
+   (eq_attr "cpu" "power10,power11,future"))
   "DU_any_power10,LU_power10")
 
 ; lxvp
 (define_insn_reservation "power10-vecload-pair" 4
   (and (eq_attr "type" "vecload")
(eq_attr "size" "256")
-   (eq_attr "cpu" "power10,power11"))
+   (eq_attr "cpu" "power10,power11,future"))
   "DU_even_power10,LU_power10+SXU_power10")
 
 ; Store Unit
@@ -178,12 +178,12 @@
(eq_attr "prefixed" "no")
(eq_attr "size" "!128")
(eq_attr "size" "!256")
-   (eq_attr "cpu" "power10,power11"))
+   (eq_attr "cpu" "power10,power11,future"))
   "DU_any_power10,STU_power10")
 
 (define_insn_reservation "power10-fused-store" 0
   (and (eq_attr "type" "fused_store_store")
-   (eq_attr "cpu" "power10,power11"))
+   (eq_attr "cpu" "power10,power11,future"))
   "DU_even_power10,STU_power10")
 
 (define_insn_reservation "power10-prefixed-store" 0
@@ -191,52 +191,52 @@
(eq_attr "prefixed" "yes")
(eq_attr "size" "!128")
(eq_attr "size" "!256")
-   (eq_attr "cpu" "power10,power11"))
+   (eq_attr "cpu" "power10,power11,future"))
   "DU_even_power10,STU_power10")
 
 ; Update forms have 2 cycle latency for update

[gcc(refs/users/meissner/heads/work188)] Do not allow -mvsx to boost processor to power7.

2024-12-02 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:aeaa1945bca2afab4a66f6749a5cf323d7a6f28f

commit aeaa1945bca2afab4a66f6749a5cf323d7a6f28f
Author: Michael Meissner 
Date:   Mon Dec 2 15:01:33 2024 -0500

Do not allow -mvsx to boost processor to power7.

This patch restructures the code so that -mvsx for example will not silently
convert the processor to power7.  The user must now use -mcpu=power7 or 
higher.
This means if the user does -mvsx and the default processor does not have 
VSX
support, it will be an error.

I have built both big endian and little endian bootstrap compilers and there
were no regressions.

I updated the 2 tests that used -mvsx to raise the cpu to power7, and the 
test
case that checks if -mno-vsx produces the expected warning.

Note, Peter had some questions about one of the tests in the previous 
version of
the patch.  The test is still the same in this patch.  But the code for
preventing -mvsx is different from the previous patch, and I wanted to get 
that
patch for review before stage1 closes.

Can I install this patch on the GCC 15 trunk?

2024-12-02  Michael Meissner  

gcc/

* config/rs6000/rs6000.cc (rs6000_option_override_internal): Check 
if
the user asked for VSX instructions whether the cpu was at least 
power7.

gcc/testsuite/

* gcc.target/powerpc/ppc-target-4.c: Rewrite the test to add 
cpu=power7
when we need to add VSX support.  Add test for adding cpu=power7 
no-vsx
to generate only Altivec instructions.
* gcc.target/powerpc/pr115688.c: Add cpu=power7 in target 
__attribute__
when requesting VSX instructions.
* gcc.target/powerpc/pr87496-1.c: Update options to use
-mdejagnu-cpu=power6 to get the appropriate error message.

Diff:
---
 gcc/config/rs6000/rs6000.cc |  7 +
 gcc/testsuite/gcc.target/powerpc/ppc-target-4.c | 38 +++--
 gcc/testsuite/gcc.target/powerpc/pr115688.c |  3 +-
 gcc/testsuite/gcc.target/powerpc/pr87496-1.c|  2 +-
 4 files changed, 39 insertions(+), 11 deletions(-)

diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 83e8141cf98c..5c64310b16c1 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -3862,6 +3862,13 @@ rs6000_option_override_internal (bool global_init_p)
  rs6000_isa_flags &= ~OPTION_MASK_VSX;
  rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
}
+  else if (!TARGET_POWER7)
+   {
+ if (explicit_vsx_p)
+   error ("%<-mvsx%> requires at least %<-mcpu=power%>");
+ rs6000_isa_flags &= ~OPTION_MASK_VSX;
+ rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
+   }
 }
 
   /* If hard-float/altivec/vsx were explicitly turned off then don't allow
diff --git a/gcc/testsuite/gcc.target/powerpc/ppc-target-4.c 
b/gcc/testsuite/gcc.target/powerpc/ppc-target-4.c
index feef76db4618..5e2ecf34f249 100644
--- a/gcc/testsuite/gcc.target/powerpc/ppc-target-4.c
+++ b/gcc/testsuite/gcc.target/powerpc/ppc-target-4.c
@@ -2,7 +2,7 @@
 /* { dg-skip-if "" { powerpc*-*-darwin* } } */
 /* { dg-require-effective-target powerpc_fprs } */
 /* { dg-options "-O2 -ffast-math -mdejagnu-cpu=power5 -mno-altivec 
-mabi=altivec -fno-unroll-loops" } */
-/* { dg-final { scan-assembler-times "vaddfp" 1 } } */
+/* { dg-final { scan-assembler-times "vaddfp" 2 } } */
 /* { dg-final { scan-assembler-times "xvaddsp" 1 } } */
 /* { dg-final { scan-assembler-times "fadds" 1 } } */
 
@@ -18,10 +18,6 @@
 #error "__VSX__ should not be defined."
 #endif
 
-#pragma GCC target("altivec,vsx")
-#include 
-#pragma GCC reset_options
-
 #pragma GCC push_options
 #pragma GCC target("altivec,no-vsx")
 
@@ -33,6 +29,7 @@
 #error "__VSX__ should not be defined."
 #endif
 
+/* Altivec build, generate vaddfp.  */
 void
 av_add (vector float *a, vector float *b, vector float *c)
 {
@@ -40,10 +37,11 @@ av_add (vector float *a, vector float *b, vector float *c)
   unsigned long n = SIZE / 4;
 
   for (i = 0; i < n; i++)
-a[i] = vec_add (b[i], c[i]);
+a[i] = b[i] + c[i];
 }
 
-#pragma GCC target("vsx")
+/* cpu=power7 must be used to enable VSX.  */
+#pragma GCC target("cpu=power7,vsx")
 
 #ifndef __ALTIVEC__
 #error "__ALTIVEC__ should be defined."
@@ -53,6 +51,7 @@ av_add (vector float *a, vector float *b, vector float *c)
 #error "__VSX__ should be defined."
 #endif
 
+/* VSX build on power7, generate xsaddsp.  */
 void
 vsx_add (vector float *a, vector float *b, vector float *c)
 {
@@ -60,11 +59,31 @@ vsx_add (vector float *a, vector float *b, vector float *c)
   unsigned long n = SIZE / 4;
 
   for (i = 0; i < n; i++)
-a[i] = vec_add (b[i], c[i]);
+a[i] = b[i] + c[i];
+}
+
+#pragma GCC target("cpu=power7,no-vsx")
+
+#ifndef __ALTIVEC__
+#error "__ALTIVEC__ should be defined."
+#endif
+
+#ifdef __VSX__
+#error "__VSX__ should not be defined."

[gcc(refs/users/meissner/heads/work188)] Add rs6000 architecture masks.

2024-12-02 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:71cab11033b9e3144abad8520df80f2a221a7afb

commit 71cab11033b9e3144abad8520df80f2a221a7afb
Author: Michael Meissner 
Date:   Mon Dec 2 15:03:13 2024 -0500

Add rs6000 architecture masks.

This patch begins the journey to move architecture bits that are not user 
ISA
options from rs6000_isa_flags to a new targt variable rs6000_arch_flags.  
The
intention is to remove switches that are currently isa options, but the user
should not be using this particular option. For example, we want users to 
use
-mcpu=power10 and not just -mpower10.

This patch also changes the target_clones support to use an architecture 
mask
instead of isa bits.

This patch also switches the handling of .machine to use architecture masks 
if
they exist (power4 through power11).  All of the other PowerPCs will 
continue to
use the existing code for setting the .machine option.

I have built both big endian and little endian bootstrap compilers and there
were no regressions.

In addition, I constructed a test case that used every archiecture define 
(like
_ARCH_PWR4, etc.) and I also looked at the .machine directive generated.  I 
ran
this test for all supported combinations of -mcpu, big/little endian, and 
32/64
bit support.  Every single instance generated exactly the same code with the
patches installed compared to the compiler before installing the patches.

The only difference in this patch compared to the first version posted on
November 6th is that I the correct attribution and copyright year (i.e. 
that I
created rs6000-arch.def in 2024).

Can I install this patch on the GCC 15 trunk?

2024-12-02  Michael Meissner  

gcc/

* config/rs6000/default64.h (TARGET_CPU_DEFAULT): Set default cpu 
name.
* config/rs6000/rs6000-arch.def: New file.
* config/rs6000/rs6000.cc (struct clone_map): Switch to using
architecture masks instead of ISA masks.
(rs6000_clone_map): Likewise.
(rs6000_print_isa_options): Add an architecture flags argument, 
change
all callers.
(get_arch_flag): New function.
(rs6000_debug_reg_global): Update rs6000_print_isa_options calls.
(rs6000_option_override_internal): Likewise.
(rs6000_machine_from_flags): Switch to using architecture masks 
instead
of ISA masks.
(struct rs6000_arch_mask): New structure.
(rs6000_arch_masks): New table of architecutre masks and names.
(rs6000_function_specific_save): Save architecture flags.
(rs6000_function_specific_restore): Restore architecture flags.
(rs6000_function_specific_print): Update rs6000_print_isa_options 
calls.
(rs6000_print_options_internal): Add architecture flags options.
(rs6000_clone_priority): Switch to using architecture masks instead 
of
ISA masks.
(rs6000_can_inline_p): Don't allow inling if the callee requires a 
newer
architecture than the caller.
* config/rs6000/rs6000.h: Use rs6000-arch.def to create the 
architecture
masks.
* config/rs6000/rs6000.opt (rs6000_arch_flags): New target variable.
(x_rs6000_arch_flags): New save/restore field for rs6000_arch_flags.

Diff:
---
 gcc/config/rs6000/default64.h |  11 ++
 gcc/config/rs6000/rs6000-arch.def |  49 +
 gcc/config/rs6000/rs6000.cc   | 222 +++---
 gcc/config/rs6000/rs6000.h|  24 +
 gcc/config/rs6000/rs6000.opt  |   8 ++
 5 files changed, 277 insertions(+), 37 deletions(-)

diff --git a/gcc/config/rs6000/default64.h b/gcc/config/rs6000/default64.h
index 10e3dec78aca..afa6542e040c 100644
--- a/gcc/config/rs6000/default64.h
+++ b/gcc/config/rs6000/default64.h
@@ -21,6 +21,7 @@ along with GCC; see the file COPYING3.  If not see
 #define RS6000_CPU(NAME, CPU, FLAGS)
 #include "rs6000-cpus.def"
 #undef RS6000_CPU
+#undef TARGET_CPU_DEFAULT
 
 #if (TARGET_DEFAULT & MASK_LITTLE_ENDIAN)
 #undef TARGET_DEFAULT
@@ -28,10 +29,20 @@ along with GCC; see the file COPYING3.  If not see
| MASK_LITTLE_ENDIAN)
 #undef ASM_DEFAULT_SPEC
 #define ASM_DEFAULT_SPEC "-mpower8"
+#define TARGET_CPU_DEFAULT "power8"
+
 #else
 #undef TARGET_DEFAULT
 #define TARGET_DEFAULT (OPTION_MASK_PPC_GFXOPT | OPTION_MASK_PPC_GPOPT \
| OPTION_MASK_MFCRF | MASK_POWERPC64 | MASK_64BIT)
 #undef ASM_DEFAULT_SPEC
 #define ASM_DEFAULT_SPEC "-mpower4"
+
+#if (TARGET_DEFAULT & MASK_POWERPC64)
+#define TARGET_CPU_DEFAULT "powerpc64"
+
+#else
+#define TARGET_CPU_DEFAULT "powerpc"
+#endif
+
 #endif
diff --git a/gcc/config/rs6000/rs6000-arch.def 
b/gcc/config/rs6000/rs6000-arch.def
new file mode 100644
index ..c0dbc5834333
--- /dev/null
+++ b/gcc/config/rs6000/rs6000-arch.def
@@

[gcc(refs/users/meissner/heads/work188)] Use architecture flags for defining _ARCH_PWR macros.

2024-12-02 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:c9a0ac54aa976f6bff9d84e2292d03dbd040ff16

commit c9a0ac54aa976f6bff9d84e2292d03dbd040ff16
Author: Michael Meissner 
Date:   Mon Dec 2 15:04:09 2024 -0500

Use architecture flags for defining _ARCH_PWR macros.

For the newer architectures, this patch changes GCC to define the 
_ARCH_PWR
macros using the new architecture flags instead of relying on isa options 
like
-mpower10.

The -mpower8-internal, -mpower10, -mpower11, and -mfuture options were 
removed.
The -mpower11 and -mfuture options were removed completely, since they were 
just
added in GCC 15. The other two options were marked as WarnRemoved, and the
various ISA bits were removed.

TARGET_POWER8, TARGET_POWER10, TARGET_POWER11, and TARGET_FUTURE were 
re-defined
to use the architeture bits instead of the ISA bits.

There are other internal isa bits that aren't removed with this patch 
because
the built-in function support uses those bits.

I have built both big endian and little endian bootstrap compilers and there
were no regressions.

Can I install this patch on the GCC 15 trunk?

2024-11-22  Michael Meissner  

gcc/

* config/rs6000/rs6000-c.cc (rs6000_target_modify_macros) Add 
support to
use architecture flags instead of ISA flags for setting most of the
_ARCH_PWR* macros.
(rs6000_cpu_cpp_builtins): Update rs6000_target_modify_macros call.
* config/rs6000/rs6000-cpus.def (ISA_2_7_MASKS_SERVER): Remove
OPTION_MASK_POWER8.
(ISA_3_1_MASKS_SERVER): Remove OPTION_MASK_POWER10.
(POWER11_MASKS_SERVER): Remove OPTION_MASK_POWER11.
(FUTURE_MASKS_SERVER): Remove OPTION_MASK_FUTURE.
(POWERPC_MASKS): Remove OPTION_MASK_POWER8, OPTION_MASK_POWER10,
OPTION_MASK_POWER11, and OPTION_MASK_FUTURE.
* config/rs6000/rs6000-protos.h (rs6000_target_modify_macros): 
Update
declaration.
(rs6000_target_modify_macros_ptr): Likewise.
* config/rs6000/rs6000.cc (rs6000_target_modify_macros_ptr): 
Likewise.
(rs6000_option_override_internal): Use architecture flags instead 
of ISA
flags.
(rs6000_opt_masks): Remove -mpower10, -mpower11, and -mfuture which 
are
no longer in the ISA flags.
(rs6000_pragma_target_parse): Use architecture flags as well as ISA
flags.
* config/rs6000/rs6000.h (TARGET_POWER5): Redefine to use 
architecture
flags.
(TARGET_POWER5X): Likewise.
(TARGET_POWER6): Likewise.
(TARGET_POWER7): Likewise.
(TARGET_POWER8): Likewise.
(TARGET_POWER9): Likewise.
(TARGET_POWER10): New macro.
(TARGET_POWER11): Likewise.
(TARGET_FUTURE): Likewise.
* config/rs6000/rs6000.opt (-mpower8-internal): Remove ISA flag 
bits.
(-mpower10): Likewise.
(-mpower11): Likewise.
(-mfuture): Likewise.

Diff:
---
 gcc/config/rs6000/rs6000-c.cc | 29 -
 gcc/config/rs6000/rs6000-cpus.def | 10 +-
 gcc/config/rs6000/rs6000-protos.h |  5 +++--
 gcc/config/rs6000/rs6000.cc   | 20 +++-
 gcc/config/rs6000/rs6000.h| 19 +--
 gcc/config/rs6000/rs6000.opt  | 17 ++---
 6 files changed, 46 insertions(+), 54 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-c.cc b/gcc/config/rs6000/rs6000-c.cc
index b406e67a77df..6863e34fa705 100644
--- a/gcc/config/rs6000/rs6000-c.cc
+++ b/gcc/config/rs6000/rs6000-c.cc
@@ -338,7 +338,8 @@ rs6000_define_or_undefine_macro (bool define_p, const char 
*name)
#pragma GCC target, we need to adjust the macros dynamically.  */
 
 void
-rs6000_target_modify_macros (bool define_p, HOST_WIDE_INT flags)
+rs6000_target_modify_macros (bool define_p, HOST_WIDE_INT flags,
+HOST_WIDE_INT arch_flags)
 {
   if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
 fprintf (stderr,
@@ -411,7 +412,7 @@ rs6000_target_modify_macros (bool define_p, HOST_WIDE_INT 
flags)
summary of the flags associated with particular cpu
definitions.  */
 
-  /* rs6000_isa_flags based options.  */
+  /* rs6000_isa_flags and rs6000_arch_flags based options.  */
   rs6000_define_or_undefine_macro (define_p, "_ARCH_PPC");
   if ((flags & OPTION_MASK_PPC_GPOPT) != 0)
 rs6000_define_or_undefine_macro (define_p, "_ARCH_PPCSQ");
@@ -419,25 +420,27 @@ rs6000_target_modify_macros (bool define_p, HOST_WIDE_INT 
flags)
 rs6000_define_or_undefine_macro (define_p, "_ARCH_PPCGR");
   if ((flags & OPTION_MASK_POWERPC64) != 0)
 rs6000_define_or_undefine_macro (define_p, "_ARCH_PPC64");
-  if ((flags & OPTION_MASK_MFCRF) != 0)
+  if ((flags & OPTION_MASK_POWERPC64) != 0)
+rs6000_define_or_undefine_macro (define_p, "_ARCH_PPC64");

[gcc(refs/users/mikael/heads/pr115494_v01)] tree-optimization: Always select a representative available in the block [PR115494]

2024-12-02 Thread Mikael Morin via Gcc-cvs
https://gcc.gnu.org/g:6c41f4ab1f64bcd0dad8abc68073e3b6118c12b0

commit 6c41f4ab1f64bcd0dad8abc68073e3b6118c12b0
Author: Mikael Morin 
Date:   Mon Dec 2 10:37:01 2024 +0100

tree-optimization: Always select a representative available in the block 
[PR115494]

Force the creation of a new representative if phi-translation returns a
leader whose definition doesn't dominate the source block.  This avoids
using the flow-sensitive information (value range, known bits) of the leader
during simplification, in a block that is out of the scope where that
information is valid.

PR tree-optimization/115494

gcc/ChangeLog:

* tree-ssa-pre.cc (phi_translate_1): Force the selection of a
representative available in the block in the case where the leader
is left untouched by phi-translation.

Diff:
---
 gcc/tree-ssa-pre.cc | 10 +++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/gcc/tree-ssa-pre.cc b/gcc/tree-ssa-pre.cc
index c696111690f1..fe55d1126f92 100644
--- a/gcc/tree-ssa-pre.cc
+++ b/gcc/tree-ssa-pre.cc
@@ -1430,12 +1430,16 @@ phi_translate_1 (bitmap_set_t dest,
unsigned int op_val_id = VN_INFO (newnary->op[i])->value_id;
leader = find_leader_in_sets (op_val_id, set1, set2);
result = phi_translate (dest, leader, set1, set2, e);
-   if (result && result != leader)
+   if (result)
  /* If op has a leader in the sets we translate make
 sure to use the value of the translated expression.
-We might need a new representative for that.  */
+We might need a new representative for that.  We have to
+restrict to a representative whose definition dominates
+PRED, as its flow-sensitive information such as value range
+or known bits may be used by the simplification attempt
+further down.  */
  newnary->op[i] = get_representative_for (result, pred);
-   else if (!result)
+   else
  return NULL;
 
changed |= newnary->op[i] != nary->op[i];


[gcc(refs/users/mikael/heads/pr115494_v01)] Add test.

2024-12-02 Thread Mikael Morin via Gcc-cvs
https://gcc.gnu.org/g:cf2fdd51954eccb804759b31fcd3f73c3cd133b4

commit cf2fdd51954eccb804759b31fcd3f73c3cd133b4
Author: Mikael Morin 
Date:   Mon Dec 2 20:48:50 2024 +0100

Add test.

PR tree-optimization/115494

gcc/testsuite/ChangeLog:

* gcc.dg/torture/pr115494.c: New test.

Diff:
---
 gcc/testsuite/gcc.dg/torture/pr115494.c | 25 +
 1 file changed, 25 insertions(+)

diff --git a/gcc/testsuite/gcc.dg/torture/pr115494.c 
b/gcc/testsuite/gcc.dg/torture/pr115494.c
new file mode 100644
index ..38551c79d0fd
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/pr115494.c
@@ -0,0 +1,25 @@
+/* { dg-do run }
+
+   PR tree-optimization/115494
+   When trying to factor the redundant expression A | B below, the information
+   that B has [0,1] range in the if branch was wrongly used to simplify the
+   factored expression in the true FLAG case.  */
+
+__attribute__((noipa))
+unsigned f(_Bool flag, unsigned b, int a)
+{
+  int x;
+  if (flag)
+a = 1;
+  if ((b & 1) == b) // b [0,1]
+x = a | b;
+  else
+x = a | b;
+  return x;
+}
+
+int main()
+{
+  if (f(1, 3, 3) != 3)
+__builtin_abort();
+}


[gcc(refs/users/meissner/heads/work188)] Update ChangeLog.*

2024-12-02 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:b51496b4b2b3e1b3a233db7f68ba17599f4f

commit b51496b4b2b3e1b3a233db7f68ba17599f4f
Author: Michael Meissner 
Date:   Mon Dec 2 15:07:55 2024 -0500

Update ChangeLog.*

Diff:
---
 gcc/ChangeLog.meissner | 435 +
 1 file changed, 435 insertions(+)

diff --git a/gcc/ChangeLog.meissner b/gcc/ChangeLog.meissner
index 99eff4717850..49e6679b7cd4 100644
--- a/gcc/ChangeLog.meissner
+++ b/gcc/ChangeLog.meissner
@@ -1,5 +1,440 @@
+ Branch work188, patch #31 
+
+Use architecture flags for defining _ARCH_PWR macros.
+
+For the newer architectures, this patch changes GCC to define the _ARCH_PWR
+macros using the new architecture flags instead of relying on isa options like
+-mpower10.
+
+The -mpower8-internal, -mpower10, -mpower11, and -mfuture options were removed.
+The -mpower11 and -mfuture options were removed completely, since they were 
just
+added in GCC 15. The other two options were marked as WarnRemoved, and the
+various ISA bits were removed.
+
+TARGET_POWER8, TARGET_POWER10, TARGET_POWER11, and TARGET_FUTURE were 
re-defined
+to use the architeture bits instead of the ISA bits.
+
+There are other internal isa bits that aren't removed with this patch because
+the built-in function support uses those bits.
+
+I have built both big endian and little endian bootstrap compilers and there
+were no regressions.
+
+Can I install this patch on the GCC 15 trunk?
+
+2024-11-22  Michael Meissner  
+
+gcc/
+
+   * config/rs6000/rs6000-c.cc (rs6000_target_modify_macros) Add support to
+   use architecture flags instead of ISA flags for setting most of the
+   _ARCH_PWR* macros.
+   (rs6000_cpu_cpp_builtins): Update rs6000_target_modify_macros call.
+   * config/rs6000/rs6000-cpus.def (ISA_2_7_MASKS_SERVER): Remove
+   OPTION_MASK_POWER8.
+   (ISA_3_1_MASKS_SERVER): Remove OPTION_MASK_POWER10.
+   (POWER11_MASKS_SERVER): Remove OPTION_MASK_POWER11.
+   (FUTURE_MASKS_SERVER): Remove OPTION_MASK_FUTURE.
+   (POWERPC_MASKS): Remove OPTION_MASK_POWER8, OPTION_MASK_POWER10,
+   OPTION_MASK_POWER11, and OPTION_MASK_FUTURE.
+   * config/rs6000/rs6000-protos.h (rs6000_target_modify_macros): Update
+   declaration.
+   (rs6000_target_modify_macros_ptr): Likewise.
+   * config/rs6000/rs6000.cc (rs6000_target_modify_macros_ptr): Likewise.
+   (rs6000_option_override_internal): Use architecture flags instead of ISA
+   flags.
+   (rs6000_opt_masks): Remove -mpower10, -mpower11, and -mfuture which are
+   no longer in the ISA flags.
+   (rs6000_pragma_target_parse): Use architecture flags as well as ISA
+   flags.
+   * config/rs6000/rs6000.h (TARGET_POWER5): Redefine to use architecture
+   flags.
+   (TARGET_POWER5X): Likewise.
+   (TARGET_POWER6): Likewise.
+   (TARGET_POWER7): Likewise.
+   (TARGET_POWER8): Likewise.
+   (TARGET_POWER9): Likewise.
+   (TARGET_POWER10): New macro.
+   (TARGET_POWER11): Likewise.
+   (TARGET_FUTURE): Likewise.
+   * config/rs6000/rs6000.opt (-mpower8-internal): Remove ISA flag bits.
+   (-mpower10): Likewise.
+   (-mpower11): Likewise.
+   (-mfuture): Likewise.
+
+ Branch work188, patch #30 
+
+Add rs6000 architecture masks.
+
+This patch begins the journey to move architecture bits that are not user ISA
+options from rs6000_isa_flags to a new targt variable rs6000_arch_flags.  The
+intention is to remove switches that are currently isa options, but the user
+should not be using this particular option. For example, we want users to use
+-mcpu=power10 and not just -mpower10.
+
+This patch also changes the target_clones support to use an architecture mask
+instead of isa bits.
+
+This patch also switches the handling of .machine to use architecture masks if
+they exist (power4 through power11).  All of the other PowerPCs will continue 
to
+use the existing code for setting the .machine option.
+
+I have built both big endian and little endian bootstrap compilers and there
+were no regressions.
+
+In addition, I constructed a test case that used every archiecture define (like
+_ARCH_PWR4, etc.) and I also looked at the .machine directive generated.  I ran
+this test for all supported combinations of -mcpu, big/little endian, and 32/64
+bit support.  Every single instance generated exactly the same code with the
+patches installed compared to the compiler before installing the patches.
+
+The only difference in this patch compared to the first version posted on
+November 6th is that I the correct attribution and copyright year (i.e. that I
+created rs6000-arch.def in 2024).
+
+Can I install this patch on the GCC 15 trunk?
+
+2024-12-02  Michael Meissner  
+
+gcc/
+
+   * config/rs6000/default64.h (TARGET_CPU_DEFAULT): Set default cpu name.
+   * config/rs6000/rs6000-arch.def: New file.
+   * confi

[gcc] Created branch 'mikael/heads/pr115494_v01' in namespace 'refs/users'

2024-12-02 Thread Mikael Morin via Gcc-cvs
The branch 'mikael/heads/pr115494_v01' was created in namespace 'refs/users' 
pointing to:

 6c41f4ab1f64... tree-optimization: Always select a representative available


[gcc(refs/users/meissner/heads/work188-bugs)] Merge commit 'refs/users/meissner/heads/work188-bugs' of git+ssh://gcc.gnu.org/git/gcc into me/work1

2024-12-02 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:4f41c757f6cca4fbd9dc41ceb9d9d9e1214fe1e9

commit 4f41c757f6cca4fbd9dc41ceb9d9d9e1214fe1e9
Merge: 2678d5d8d607 70613e2aed6b
Author: Michael Meissner 
Date:   Mon Dec 2 15:10:33 2024 -0500

Merge commit 'refs/users/meissner/heads/work188-bugs' of 
git+ssh://gcc.gnu.org/git/gcc into me/work188-bugs

Diff:


[gcc(refs/users/meissner/heads/work188-bugs)] Add ChangeLog.bugs and update REVISION.

2024-12-02 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:2678d5d8d607bca3da62b4c3ce43477fa775a7e7

commit 2678d5d8d607bca3da62b4c3ce43477fa775a7e7
Author: Michael Meissner 
Date:   Mon Dec 2 14:41:02 2024 -0500

Add ChangeLog.bugs and update REVISION.

2024-12-02  Michael Meissner  

gcc/

* ChangeLog.bugs: New file for branch.
* REVISION: Update.

Diff:
---
 gcc/ChangeLog.bugs | 5 +
 gcc/REVISION   | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog.bugs b/gcc/ChangeLog.bugs
new file mode 100644
index ..6627fb747e1b
--- /dev/null
+++ b/gcc/ChangeLog.bugs
@@ -0,0 +1,5 @@
+ Branch work188-bugs, baseline 
+
+2024-12-02   Michael Meissner  
+
+   Clone branch
diff --git a/gcc/REVISION b/gcc/REVISION
index d4db5e09c166..13ce8a10f646 100644
--- a/gcc/REVISION
+++ b/gcc/REVISION
@@ -1 +1 @@
-work188 branch
+work188-bugs branch


[gcc/meissner/heads/work188-dmf] (15 commits) Merge commit 'refs/users/meissner/heads/work188-dmf' of git

2024-12-02 Thread Michael Meissner via Gcc-cvs
The branch 'meissner/heads/work188-dmf' was updated to point to:

 abe46f55f72e... Merge commit 'refs/users/meissner/heads/work188-dmf' of git

It previously pointed to:

 bc8d78add9fa... Add ChangeLog.dmf and update REVISION.

Diff:

Summary of changes (added commits):
---

  abe46f5... Merge commit 'refs/users/meissner/heads/work188-dmf' of git
  2ca7a2a... Add ChangeLog.dmf and update REVISION.
  b51496b... Update ChangeLog.* (*)
  c9a0ac5... Use architecture flags for defining _ARCH_PWR macros. (*)
  71cab11... Add rs6000 architecture masks. (*)
  aeaa194... Do not allow -mvsx to boost processor to power7. (*)
  7052142... Use vector pair load/store for memcpy with -mcpu=future (*)
  5fbb82a... Add -mcpu=future tests. (*)
  37d4ea1... Add -mcpu=future tuning support. (*)
  2114420... Add support for -mcpu=future (*)
  5c3cbdb... Change TARGET_MODULO to TARGET_POWER9. (*)
  ff12ee1... Change TARGET_POPCNTD to TARGET_POWER7. (*)
  d0188be... Change TARGET_CMPB to TARGET_POWER6. (*)
  798cbe5... Change TARGET_FPRND to TARGET_POWER5X. (*)
  96a8105... Change TARGET_POPCNTB to TARGET_POWER5. (*)

(*) This commit already exists in another branch.
Because the reference `refs/users/meissner/heads/work188-dmf' matches
your hooks.email-new-commits-only configuration,
no separate email is sent for this commit.


[gcc(refs/users/meissner/heads/work188-dmf)] Add ChangeLog.dmf and update REVISION.

2024-12-02 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:2ca7a2a4ecb0bb6e5c7c03120d15ca21ecb73221

commit 2ca7a2a4ecb0bb6e5c7c03120d15ca21ecb73221
Author: Michael Meissner 
Date:   Mon Dec 2 14:39:04 2024 -0500

Add ChangeLog.dmf and update REVISION.

2024-12-02  Michael Meissner  

gcc/

* ChangeLog.dmf: New file for branch.
* REVISION: Update.

Diff:
---
 gcc/ChangeLog.dmf | 5 +
 gcc/REVISION  | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog.dmf b/gcc/ChangeLog.dmf
new file mode 100644
index ..a606d503af09
--- /dev/null
+++ b/gcc/ChangeLog.dmf
@@ -0,0 +1,5 @@
+ Branch work188-dmf, baseline 
+
+2024-12-02   Michael Meissner  
+
+   Clone branch
diff --git a/gcc/REVISION b/gcc/REVISION
index d4db5e09c166..3e28a9228fd5 100644
--- a/gcc/REVISION
+++ b/gcc/REVISION
@@ -1 +1 @@
-work188 branch
+work188-dmf branch


[gcc(refs/users/meissner/heads/work188-libs)] Add ChangeLog.libs and update REVISION.

2024-12-02 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:ab05ee280294a919dbb26efa5f8bcae41adbf8d6

commit ab05ee280294a919dbb26efa5f8bcae41adbf8d6
Author: Michael Meissner 
Date:   Mon Dec 2 14:41:55 2024 -0500

Add ChangeLog.libs and update REVISION.

2024-12-02  Michael Meissner  

gcc/

* ChangeLog.libs: New file for branch.
* REVISION: Update.

Diff:
---
 gcc/ChangeLog.libs | 5 +
 gcc/REVISION   | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog.libs b/gcc/ChangeLog.libs
new file mode 100644
index ..2feb12efdb23
--- /dev/null
+++ b/gcc/ChangeLog.libs
@@ -0,0 +1,5 @@
+ Branch work188-libs, baseline 
+
+2024-12-02   Michael Meissner  
+
+   Clone branch
diff --git a/gcc/REVISION b/gcc/REVISION
index d4db5e09c166..9c4f5fc5a7ab 100644
--- a/gcc/REVISION
+++ b/gcc/REVISION
@@ -1 +1 @@
-work188 branch
+work188-libs branch


[gcc/meissner/heads/work188-libs] (15 commits) Merge commit 'refs/users/meissner/heads/work188-libs' of gi

2024-12-02 Thread Michael Meissner via Gcc-cvs
The branch 'meissner/heads/work188-libs' was updated to point to:

 23529ab45e7c... Merge commit 'refs/users/meissner/heads/work188-libs' of gi

It previously pointed to:

 d74c57ac0900... Add ChangeLog.libs and update REVISION.

Diff:

Summary of changes (added commits):
---

  23529ab... Merge commit 'refs/users/meissner/heads/work188-libs' of gi
  ab05ee2... Add ChangeLog.libs and update REVISION.
  b51496b... Update ChangeLog.* (*)
  c9a0ac5... Use architecture flags for defining _ARCH_PWR macros. (*)
  71cab11... Add rs6000 architecture masks. (*)
  aeaa194... Do not allow -mvsx to boost processor to power7. (*)
  7052142... Use vector pair load/store for memcpy with -mcpu=future (*)
  5fbb82a... Add -mcpu=future tests. (*)
  37d4ea1... Add -mcpu=future tuning support. (*)
  2114420... Add support for -mcpu=future (*)
  5c3cbdb... Change TARGET_MODULO to TARGET_POWER9. (*)
  ff12ee1... Change TARGET_POPCNTD to TARGET_POWER7. (*)
  d0188be... Change TARGET_CMPB to TARGET_POWER6. (*)
  798cbe5... Change TARGET_FPRND to TARGET_POWER5X. (*)
  96a8105... Change TARGET_POPCNTB to TARGET_POWER5. (*)

(*) This commit already exists in another branch.
Because the reference `refs/users/meissner/heads/work188-libs' matches
your hooks.email-new-commits-only configuration,
no separate email is sent for this commit.


[gcc(refs/users/meissner/heads/work188-libs)] Merge commit 'refs/users/meissner/heads/work188-libs' of git+ssh://gcc.gnu.org/git/gcc into me/work1

2024-12-02 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:23529ab45e7cf4967836c56516938213466e276b

commit 23529ab45e7cf4967836c56516938213466e276b
Merge: ab05ee280294 d74c57ac0900
Author: Michael Meissner 
Date:   Mon Dec 2 15:19:06 2024 -0500

Merge commit 'refs/users/meissner/heads/work188-libs' of 
git+ssh://gcc.gnu.org/git/gcc into me/work188-libs

Diff:


[gcc/meissner/heads/work188-sha] (15 commits) Merge commit 'refs/users/meissner/heads/work188-sha' of git

2024-12-02 Thread Michael Meissner via Gcc-cvs
The branch 'meissner/heads/work188-sha' was updated to point to:

 ba4bee06d304... Merge commit 'refs/users/meissner/heads/work188-sha' of git

It previously pointed to:

 57890269720d... Add ChangeLog.sha and update REVISION.

Diff:

Summary of changes (added commits):
---

  ba4bee0... Merge commit 'refs/users/meissner/heads/work188-sha' of git
  f91e6c8... Add ChangeLog.sha and update REVISION.
  b51496b... Update ChangeLog.* (*)
  c9a0ac5... Use architecture flags for defining _ARCH_PWR macros. (*)
  71cab11... Add rs6000 architecture masks. (*)
  aeaa194... Do not allow -mvsx to boost processor to power7. (*)
  7052142... Use vector pair load/store for memcpy with -mcpu=future (*)
  5fbb82a... Add -mcpu=future tests. (*)
  37d4ea1... Add -mcpu=future tuning support. (*)
  2114420... Add support for -mcpu=future (*)
  5c3cbdb... Change TARGET_MODULO to TARGET_POWER9. (*)
  ff12ee1... Change TARGET_POPCNTD to TARGET_POWER7. (*)
  d0188be... Change TARGET_CMPB to TARGET_POWER6. (*)
  798cbe5... Change TARGET_FPRND to TARGET_POWER5X. (*)
  96a8105... Change TARGET_POPCNTB to TARGET_POWER5. (*)

(*) This commit already exists in another branch.
Because the reference `refs/users/meissner/heads/work188-sha' matches
your hooks.email-new-commits-only configuration,
no separate email is sent for this commit.


[gcc(refs/users/meissner/heads/work188-sha)] Add ChangeLog.sha and update REVISION.

2024-12-02 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:f91e6c8dceea4f1a5a42565bc59e1f7427ac581e

commit f91e6c8dceea4f1a5a42565bc59e1f7427ac581e
Author: Michael Meissner 
Date:   Mon Dec 2 14:42:48 2024 -0500

Add ChangeLog.sha and update REVISION.

2024-12-02  Michael Meissner  

gcc/

* ChangeLog.sha: New file for branch.
* REVISION: Update.

Diff:
---
 gcc/ChangeLog.sha | 5 +
 gcc/REVISION  | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog.sha b/gcc/ChangeLog.sha
new file mode 100644
index ..0c12046271e5
--- /dev/null
+++ b/gcc/ChangeLog.sha
@@ -0,0 +1,5 @@
+ Branch work188-sha, baseline 
+
+2024-12-02   Michael Meissner  
+
+   Clone branch
diff --git a/gcc/REVISION b/gcc/REVISION
index d4db5e09c166..174385591d8c 100644
--- a/gcc/REVISION
+++ b/gcc/REVISION
@@ -1 +1 @@
-work188 branch
+work188-sha branch


[gcc r14-11021] [PATCH] PR modula2/116918 -fswig correct syntax

2024-12-02 Thread Gaius Mulley via Gcc-cvs
https://gcc.gnu.org/g:aa38196eb45bfb8a8632381ba3e277deb1b25782

commit r14-11021-gaa38196eb45bfb8a8632381ba3e277deb1b25782
Author: Gaius Mulley 
Date:   Mon Dec 2 20:25:25 2024 +

[PATCH] PR modula2/116918 -fswig correct syntax

This patch fixes the syntax for the generated swig interface file.
The % characters in fprintf require escaping.

gcc/m2/ChangeLog:

PR modula2/116918
* gm2-compiler/M2Swig.mod (AnnotateProcedure): Capitalize
the generated comment, split comment into multiple lines and
terminate the comment with ".  */".
(DoCheckUnbounded): Escape the % character with %%.
(DoWriteFile): Ditto.

(cherry picked from commit fda30a3c8a7c6b06f02be40e3fd0740f893a1b4f)

Signed-off-by: Gaius Mulley 

Diff:
---
 gcc/m2/gm2-compiler/M2Swig.mod | 22 +++---
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/gcc/m2/gm2-compiler/M2Swig.mod b/gcc/m2/gm2-compiler/M2Swig.mod
index 194abd5fb786..b7f34426adb8 100644
--- a/gcc/m2/gm2-compiler/M2Swig.mod
+++ b/gcc/m2/gm2-compiler/M2Swig.mod
@@ -685,7 +685,7 @@ VAR
son, p, i: CARDINAL ;
needComma: BOOLEAN ;
 BEGIN
-   fprintf0(f, '/*  parameter: ') ;
+   fprintf0(f, '/* Parameter: ') ;
p := NoOfParam(sym) ;
i := 1 ;
needComma := FALSE ;
@@ -695,14 +695,14 @@ BEGIN
   THEN
  IF needComma
  THEN
-fprintf0(f, ', ')
+fprintf0(f, ',\n   ')
  END ;
  CalculateVarDirective(sym, son, TRUE) ;
  needComma := TRUE
   END ;
   INC(i)
END ;
-   fprintf0(f, ' */\n\n')
+   fprintf0(f, '.  */\n\n')
 END AnnotateProcedure ;
 
 
@@ -879,9 +879,9 @@ BEGIN
  IF NOT includedArray
  THEN
 includedArray := TRUE ;
-fprintf0(f, '%include "carrays.i"\n')
+fprintf0(f, '%%include "carrays.i"\n')
  END ;
- fprintf0(f, '%') ;
+ fprintf0(f, '%%') ;
  fprintf0(f, 'apply (char *STRING, int LENGTH) { (') ;
  DoUnbounded(sym) ;
  fprintf0(f, ') };\n') ;
@@ -908,12 +908,12 @@ VAR
 BEGIN
mainModule := sym ;
n := GetSymName(sym) ;
-   fprintf0(f, '/* automatically generated by gm2 -fswig */\n') ;
-   fprintf0(f, '%') ;
+   fprintf0(f, '/* Automatically generated by gm2 -fswig.  */\n') ;
+   fprintf0(f, '%%') ;
fprintf1(f, 'module %a\n\n', n) ;
-   fprintf0(f, '%') ;
+   fprintf0(f, '%%') ;
fprintf1(f, 'include exception.i\n\n', n) ;
-   fprintf0(f, '%') ;
+   fprintf0(f, '%%') ;
fprintf0(f, 'exception {\n') ;
fprintf0(f, '  try {\n') ;
fprintf0(f, ' $action\n') ;
@@ -922,9 +922,9 @@ BEGIN
fprintf0(f, '  }\n') ;
fprintf0(f, '}\n\n') ;
ForeachItemInListDo(Done, DoCheckUnbounded) ;
-   fprintf0(f, '\n%{\n') ;
+   fprintf0(f, '\n%%{\n') ;
ForeachItemInListDo(Done, DoCheckExported) ;
-   fprintf0(f, '%}\n\n') ;
+   fprintf0(f, '%%}\n\n') ;
ForeachItemInListDo(Done, DoCheckExported)
 END DoWriteFile ;


[gcc(refs/users/meissner/heads/work188-dmf)] Merge commit 'refs/users/meissner/heads/work188-dmf' of git+ssh://gcc.gnu.org/git/gcc into me/work18

2024-12-02 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:abe46f55f72ee5471f23ff580bbe50c4626d153b

commit abe46f55f72ee5471f23ff580bbe50c4626d153b
Merge: 2ca7a2a4ecb0 bc8d78add9fa
Author: Michael Meissner 
Date:   Mon Dec 2 15:17:54 2024 -0500

Merge commit 'refs/users/meissner/heads/work188-dmf' of 
git+ssh://gcc.gnu.org/git/gcc into me/work188-dmf

Diff:


[gcc(refs/users/meissner/heads/work188-sha)] Merge commit 'refs/users/meissner/heads/work188-sha' of git+ssh://gcc.gnu.org/git/gcc into me/work18

2024-12-02 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:ba4bee06d30462622803558bce5232da071575ea

commit ba4bee06d30462622803558bce5232da071575ea
Merge: f91e6c8dceea 57890269720d
Author: Michael Meissner 
Date:   Mon Dec 2 15:23:46 2024 -0500

Merge commit 'refs/users/meissner/heads/work188-sha' of 
git+ssh://gcc.gnu.org/git/gcc into me/work188-sha

Diff:


[gcc/meissner/heads/work188-bugs] (15 commits) Merge commit 'refs/users/meissner/heads/work188-bugs' of gi

2024-12-02 Thread Michael Meissner via Gcc-cvs
The branch 'meissner/heads/work188-bugs' was updated to point to:

 4f41c757f6cc... Merge commit 'refs/users/meissner/heads/work188-bugs' of gi

It previously pointed to:

 70613e2aed6b... Add ChangeLog.bugs and update REVISION.

Diff:

Summary of changes (added commits):
---

  4f41c75... Merge commit 'refs/users/meissner/heads/work188-bugs' of gi
  2678d5d... Add ChangeLog.bugs and update REVISION.
  b51496b... Update ChangeLog.* (*)
  c9a0ac5... Use architecture flags for defining _ARCH_PWR macros. (*)
  71cab11... Add rs6000 architecture masks. (*)
  aeaa194... Do not allow -mvsx to boost processor to power7. (*)
  7052142... Use vector pair load/store for memcpy with -mcpu=future (*)
  5fbb82a... Add -mcpu=future tests. (*)
  37d4ea1... Add -mcpu=future tuning support. (*)
  2114420... Add support for -mcpu=future (*)
  5c3cbdb... Change TARGET_MODULO to TARGET_POWER9. (*)
  ff12ee1... Change TARGET_POPCNTD to TARGET_POWER7. (*)
  d0188be... Change TARGET_CMPB to TARGET_POWER6. (*)
  798cbe5... Change TARGET_FPRND to TARGET_POWER5X. (*)
  96a8105... Change TARGET_POPCNTB to TARGET_POWER5. (*)

(*) This commit already exists in another branch.
Because the reference `refs/users/meissner/heads/work188-bugs' matches
your hooks.email-new-commits-only configuration,
no separate email is sent for this commit.


[gcc r15-5859] riscv: Avoid narrowing warning

2024-12-02 Thread Andreas Schwab via Gcc-cvs
https://gcc.gnu.org/g:712cb2967bd91d4097e7125aa9d498a66e1654c5

commit r15-5859-g712cb2967bd91d4097e7125aa9d498a66e1654c5
Author: Andreas Schwab 
Date:   Sun Dec 1 11:44:50 2024 +0100

riscv: Avoid narrowing warning

* config/riscv/riscv.cc (fli_value_hf, fli_value_sf)
(fli_value_df): Use integer constants.  Constify.
(riscv_float_const_rtx_index_for_fli): Add const.

Diff:
---
 gcc/config/riscv/riscv.cc | 64 +--
 1 file changed, 39 insertions(+), 25 deletions(-)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 7a1724d6e732..0a6c00926b31 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -1637,35 +1637,49 @@ static int riscv_symbol_insns (enum riscv_symbol_type 
type)
Manual draft. For details, please see:
https://github.com/riscv/riscv-isa-manual/releases/tag/isa-449cd0c  */
 
-static unsigned HOST_WIDE_INT fli_value_hf[32] =
-{
-  0xbcp8, 0x4p8, 0x1p8, 0x2p8, 0x1cp8, 0x20p8, 0x2cp8, 0x30p8,
-  0x34p8, 0x35p8, 0x36p8, 0x37p8, 0x38p8, 0x39p8, 0x3ap8, 0x3bp8,
-  0x3cp8, 0x3dp8, 0x3ep8, 0x3fp8, 0x40p8, 0x41p8, 0x42p8, 0x44p8,
-  0x48p8, 0x4cp8, 0x58p8, 0x5cp8, 0x78p8,
+static const unsigned HOST_WIDE_INT fli_value_hf[32] =
+{
+#define P8(v) ((unsigned HOST_WIDE_INT) (v) << 8)
+  P8(0xbc), P8(0x4), P8(0x1), P8(0x2),
+  P8(0x1c), P8(0x20), P8(0x2c), P8(0x30),
+  P8(0x34), P8(0x35), P8(0x36), P8(0x37),
+  P8(0x38), P8(0x39), P8(0x3a), P8(0x3b),
+  P8(0x3c), P8(0x3d), P8(0x3e), P8(0x3f),
+  P8(0x40), P8(0x41), P8(0x42), P8(0x44),
+  P8(0x48), P8(0x4c), P8(0x58), P8(0x5c),
+  P8(0x78),
   /* Only used for filling, ensuring that 29 and 30 of HF are the same.  */
-  0x78p8,
-  0x7cp8, 0x7ep8
+  P8(0x78),
+  P8(0x7c), P8(0x7e)
+#undef P8
 };
 
-static unsigned HOST_WIDE_INT fli_value_sf[32] =
-{
-  0xbf8p20, 0x008p20, 0x378p20, 0x380p20, 0x3b8p20, 0x3c0p20, 0x3d8p20, 
0x3e0p20,
-  0x3e8p20, 0x3eap20, 0x3ecp20, 0x3eep20, 0x3f0p20, 0x3f2p20, 0x3f4p20, 
0x3f6p20,
-  0x3f8p20, 0x3fap20, 0x3fcp20, 0x3fep20, 0x400p20, 0x402p20, 0x404p20, 
0x408p20,
-  0x410p20, 0x418p20, 0x430p20, 0x438p20, 0x470p20, 0x478p20, 0x7f8p20, 
0x7fcp20
+static const unsigned HOST_WIDE_INT fli_value_sf[32] =
+{
+#define P20(v) ((unsigned HOST_WIDE_INT) (v) << 20)
+  P20(0xbf8), P20(0x008), P20(0x378), P20(0x380),
+  P20(0x3b8), P20(0x3c0), P20(0x3d8), P20(0x3e0),
+  P20(0x3e8), P20(0x3ea), P20(0x3ec), P20(0x3ee),
+  P20(0x3f0), P20(0x3f2), P20(0x3f4), P20(0x3f6),
+  P20(0x3f8), P20(0x3fa), P20(0x3fc), P20(0x3fe),
+  P20(0x400), P20(0x402), P20(0x404), P20(0x408),
+  P20(0x410), P20(0x418), P20(0x430), P20(0x438),
+  P20(0x470), P20(0x478), P20(0x7f8), P20(0x7fc)
+#undef P20
 };
 
-static unsigned HOST_WIDE_INT fli_value_df[32] =
-{
-  0xbff0p48, 0x10p48, 0x3ef0p48, 0x3f00p48,
-  0x3f70p48, 0x3f80p48, 0x3fb0p48, 0x3fc0p48,
-  0x3fd0p48, 0x3fd4p48, 0x3fd8p48, 0x3fdcp48,
-  0x3fe0p48, 0x3fe4p48, 0x3fe8p48, 0x3fecp48,
-  0x3ff0p48, 0x3ff4p48, 0x3ff8p48, 0x3ffcp48,
-  0x4000p48, 0x4004p48, 0x4008p48, 0x4010p48,
-  0x4020p48, 0x4030p48, 0x4060p48, 0x4070p48,
-  0x40e0p48, 0x40f0p48, 0x7ff0p48, 0x7ff8p48
+static const unsigned HOST_WIDE_INT fli_value_df[32] =
+{
+#define P48(v) ((unsigned HOST_WIDE_INT) (v) << 48)
+  P48(0xbff0), P48(0x10), P48(0x3ef0), P48(0x3f00),
+  P48(0x3f70), P48(0x3f80), P48(0x3fb0), P48(0x3fc0),
+  P48(0x3fd0), P48(0x3fd4), P48(0x3fd8), P48(0x3fdc),
+  P48(0x3fe0), P48(0x3fe4), P48(0x3fe8), P48(0x3fec),
+  P48(0x3ff0), P48(0x3ff4), P48(0x3ff8), P48(0x3ffc),
+  P48(0x4000), P48(0x4004), P48(0x4008), P48(0x4010),
+  P48(0x4020), P48(0x4030), P48(0x4060), P48(0x4070),
+  P48(0x40e0), P48(0x40f0), P48(0x7ff0), P48(0x7ff8)
+#undef P48
 };
 
 /* Display floating-point values at the assembly level, which is consistent
@@ -1686,7 +1700,7 @@ const char *fli_value_print[32] =
 int
 riscv_float_const_rtx_index_for_fli (rtx x)
 {
-  unsigned HOST_WIDE_INT *fli_value_array;
+  const unsigned HOST_WIDE_INT *fli_value_array;
 
   machine_mode mode = GET_MODE (x);


[gcc(refs/users/meissner/heads/work188-bugs)] PR 99293: Optimize splat of a V2DF/V2DI extract with constant element

2024-12-02 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:0adba44bae1fe6c88fdc2949065618efd46c10ca

commit 0adba44bae1fe6c88fdc2949065618efd46c10ca
Author: Michael Meissner 
Date:   Mon Dec 2 15:57:45 2024 -0500

PR 99293: Optimize splat of a V2DF/V2DI extract with constant element

We had optimizations for splat of a vector extract for the other vector
types, but we missed having one for V2DI and V2DF.  This patch adds a
combiner insn to do this optimization.

In looking at the source, we had similar optimizations for V4SI and V4SF
extract and splats, but we missed doing V2DI/V2DF.

Without the patch for the code:

vector long long splat_dup_l_0 (vector long long v)
{
  return __builtin_vec_splats (__builtin_vec_extract (v, 0));
}

the compiler generates (on a little endian power9):

splat_dup_l_0:
mfvsrld 9,34
mtvsrdd 34,9,9
blr

Now it generates:

splat_dup_l_0:
xxpermdi 34,34,34,3
blr

2024-12-02  Michael Meissner  

gcc/

PR target/99293
* config/rs6000/vsx.md (vsx_splat_extract_): New insn.

gcc/testsuite/

PR target/99293
* gcc.target/powerpc/builtins-1.c: Adjust insn count.
* gcc.target/powerpc/pr99293.c: New test.

Diff:
---
 gcc/config/rs6000/vsx.md  | 18 ++
 gcc/testsuite/gcc.target/powerpc/builtins-1.c |  2 +-
 gcc/testsuite/gcc.target/powerpc/pr99293.c| 22 ++
 3 files changed, 41 insertions(+), 1 deletion(-)

diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index f4f7113f5fe8..af9846391db2 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -4796,6 +4796,24 @@
   "lxvdsx %x0,%y1"
   [(set_attr "type" "vecload")])
 
+;; Optimize SPLAT of an extract from a V2DF/V2DI vector with a constant element
+(define_insn "*vsx_splat_extract_"
+  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
+   (vec_duplicate:VSX_D
+(vec_select:
+ (match_operand:VSX_D 1 "vsx_register_operand" "wa")
+ (parallel [(match_operand 2 "const_0_to_1_operand" "n")]]
+  "VECTOR_MEM_VSX_P (mode)"
+{
+  int which_word = INTVAL (operands[2]);
+  if (!BYTES_BIG_ENDIAN)
+which_word = 1 - which_word;
+
+  operands[3] = GEN_INT (which_word ? 3 : 0);
+  return "xxpermdi %x0,%x1,%x1,%3";
+}
+  [(set_attr "type" "vecperm")])
+
 ;; V4SI splat support
 (define_insn "vsx_splat_v4si"
   [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa,wa")
diff --git a/gcc/testsuite/gcc.target/powerpc/builtins-1.c 
b/gcc/testsuite/gcc.target/powerpc/builtins-1.c
index 8410a5fd4319..4e7e5384675f 100644
--- a/gcc/testsuite/gcc.target/powerpc/builtins-1.c
+++ b/gcc/testsuite/gcc.target/powerpc/builtins-1.c
@@ -1035,4 +1035,4 @@ foo156 (vector unsigned short usa)
 /* { dg-final { scan-assembler-times {\mvmrglb\M} 3 } } */
 /* { dg-final { scan-assembler-times {\mvmrgew\M} 4 } } */
 /* { dg-final { scan-assembler-times {\mvsplth|xxsplth\M} 4 } } */
-/* { dg-final { scan-assembler-times {\mxxpermdi\M} 44 } } */
+/* { dg-final { scan-assembler-times {\mxxpermdi\M} 42 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/pr99293.c 
b/gcc/testsuite/gcc.target/powerpc/pr99293.c
new file mode 100644
index ..20adc1f27f65
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr99293.c
@@ -0,0 +1,22 @@
+/* { dg-do compile { target powerpc*-*-* } } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-options "-O2 -mvsx" } */
+
+/* Test for PR 99263, which wants to do:
+   __builtin_vec_splats (__builtin_vec_extract (v, n))
+
+   where v is a V2DF or V2DI vector and n is either 0 or 1.  Previously the
+   compiler would do a direct move to the GPR registers to select the item and 
a
+   direct move from the GPR registers to do the splat.  */
+
+vector long long splat_dup_l_0 (vector long long v)
+{
+  return __builtin_vec_splats (__builtin_vec_extract (v, 0));
+}
+
+vector long long splat_dup_l_1 (vector long long v)
+{
+  return __builtin_vec_splats (__builtin_vec_extract (v, 1));
+}
+
+/* { dg-final { scan-assembler-times "xxpermdi" 2 } } */


[gcc r15-5868] libstdc++: Disable deprecated warnings for std::rel_ops in std.cc

2024-12-02 Thread Jonathan Wakely via Gcc-cvs
https://gcc.gnu.org/g:27e5d860879c8f6075231e2a547fe8ead5f382e4

commit r15-5868-g27e5d860879c8f6075231e2a547fe8ead5f382e4
Author: Jonathan Wakely 
Date:   Mon Dec 2 16:14:01 2024 +

libstdc++: Disable deprecated warnings for std::rel_ops in std.cc

This avoids some warnings when building the std module.

libstdc++-v3/ChangeLog:

* src/c++23/std.cc.in: Disable deprecated warnings when
exporting std::rel_ops members.

Diff:
---
 libstdc++-v3/src/c++23/std.cc.in | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/libstdc++-v3/src/c++23/std.cc.in b/libstdc++-v3/src/c++23/std.cc.in
index 16e66c3d9210..7a0ff8edad6e 100644
--- a/libstdc++-v3/src/c++23/std.cc.in
+++ b/libstdc++-v3/src/c++23/std.cc.in
@@ -3151,6 +3151,8 @@ export namespace std
   using std::piecewise_construct_t;
   using std::tuple_element;
   using std::tuple_size;
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
   namespace rel_ops
   {
 using std::rel_ops::operator!=;
@@ -3158,6 +3160,7 @@ export namespace std
 using std::rel_ops::operator<=;
 using std::rel_ops::operator>=;
   }
+#pragma GCC diagnostic pop
 #if __cpp_lib_unreachable
   using std::unreachable;
 #endif


[gcc(refs/users/meissner/heads/work188-vpair)] Vector pair support.

2024-12-02 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:e0096d84a0f6f34f1f5d417eeae1b73a5c706674

commit e0096d84a0f6f34f1f5d417eeae1b73a5c706674
Author: Michael Meissner 
Date:   Mon Dec 2 16:14:39 2024 -0500

Vector pair support.

This patch adds a new include file (vector-pair.h) that adds support so that
users writing high performance libraries can change their code to allow the
generation of the vector pair load and store instructions on power10.

The intention is that if the library authors need to write special loops 
that
go over arrays that they could modify their code to use the functions 
provided
to change loops that can take advantage of the higher bandwidth for load 
vector
pair and store instructions.

This particular patch just adds a new include file (vector-pair.h) that
provides a bunch of functions that on a power10 system would use the vector
pair load operation, 2 floating point operations, and a vector pair store.  
It
does not add any new types, modes, or built-in function.

I have additional patches that can add built-in functions that the 
functions in
vector-pair.h could utilize so that the compiler can optimize and combine
operations.  I may submit those patches in the future, but I would like to
provide this patch to allow the library writer to optimize their code.

I've measured the performance of these new functions on a power10.  For 
default
unrolling, the percentage of change for the 3 methods over the normal vector
loop method:

116%Vector-pair.h function, default unroll
 93%Vector pair split built-in & 2 vector stores, default unroll
 86%Vector pair split & combine built-ins, default unroll

Using explicit 2 way unrolling the numbers are:

114%Vector-pair.h function, unroll 2
106%Vector pair split built-in & 2 vector stores, unroll 2
 98%Vector pair split & combine built-ins, unroll 2

These new functions provided in vector-pair.h use the vector pair load/store
instructions, and don't generate extra vector moves.  Using the existing
vector pair disassemble and assemble built-ins generate extra vector moves
which can hinder performance.

If I compile the loop code for power9, there is a minor speed up for default
unrolling and more of an improvement using the framework provided in the
vector-pair.h for explicit unrolling by 2:

101%Vector-pair.h function, default unroll for power9
107%Vector-pair.h function, unroll 2 for power9

Of course this is a synthetic benchmark run on a quiet power10 system.  
Results
would vary for real code on real systems.  However, I feel adding these
functions can allow the writers of high performance libraries to better
optimize their code.

As an example, if the library wants to code a simple fused multiply-add 
loop,
they might write the code as follows:

#include 
#include 
#include 

void
fma_vector (double * __restrict__ r,
const double * __restrict__ a,
const double * __restrict__ b,
size_t n)
{
  vector double * __restrict__ vr = (vector double * __restrict__)r;
  const vector double * __restrict__ va = (const vector double * 
__restrict__)a;
  const vector double * __restrict__ vb = (const vector double * 
__restrict__)b;
  size_t num_elements = sizeof (vector double) / sizeof (double);
  size_t nv = n / num_elements;
  size_t i;

  for (i = 0; i < nv; i++)
vr[i] = __builtin_vsx_xvmadddp (va[i], vb[i], vr[i]);

  for (i = nv * num_elements; i < n; i++)
r[i] = fma (a[i], b[i], r[i]);
}

The inner loop would look like:

.L3:
lxvx 0,3,9
lxvx 12,4,9
addi 10,9,16
addi 2,2,-2
lxvx 11,5,9
xvmaddadp 0,12,11
lxvx 12,4,10
lxvx 11,5,10
stxvx 0,3,9
lxvx 0,3,10
addi 9,9,32
xvmaddadp 0,12,11
stxvx 0,3,10
bdnz .L3

Now if you code the loop to use __builtin_vsx_disassemble_pair to do a 
vector
pair load, but then do 2 vector stores:

#include 
#include 
#include 

void
fma_mma_ld (double * __restrict__ r,
const double * __restrict__ a,
const double * __restrict__ b,
size_t n)
{
  __vector_pair * __restrict__ vp_r 

[gcc r15-5869] MAINTAINERS: add myself to write after approval

2024-12-02 Thread Claudio Bantaloukas via Gcc-cvs
https://gcc.gnu.org/g:4c857e9c0270a5e71b1f6e9d6d40962f7fef95b9

commit r15-5869-g4c857e9c0270a5e71b1f6e9d6d40962f7fef95b9
Author: Claudio Bantaloukas 
Date:   Mon Dec 2 16:26:19 2024 +

MAINTAINERS: add myself to write after approval

ChangeLog:

* MAINTAINERS: Add myself to write after approval.

Diff:
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 6851affb6cb6..7d65ed64bdda 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -345,6 +345,7 @@ Simon Baldwin   simonb  

 Richard Ballricbal02
 Scott Bambrough -   
 Wolfgang Bangerth   -   
+Claudio Bantaloukas rdfm
 Gergö Barany-   
 Thiago Jung Bauermann   -   
 Charles Baylis  cbaylis 


[gcc(refs/users/meissner/heads/work188-vpair)] Update ChangeLog.*

2024-12-02 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:1082fdef5049adeb7852673088a04f19510d5a28

commit 1082fdef5049adeb7852673088a04f19510d5a28
Author: Michael Meissner 
Date:   Mon Dec 2 16:16:43 2024 -0500

Update ChangeLog.*

Diff:
---
 gcc/ChangeLog.vpair | 420 
 1 file changed, 420 insertions(+)

diff --git a/gcc/ChangeLog.vpair b/gcc/ChangeLog.vpair
index 8394f9d97c9c..2ab930d8a5b7 100644
--- a/gcc/ChangeLog.vpair
+++ b/gcc/ChangeLog.vpair
@@ -1,5 +1,425 @@
+ Branch work188-vpair, patch #300 
+
+Vector pair support.
+
+This patch adds a new include file (vector-pair.h) that adds support so that
+users writing high performance libraries can change their code to allow the
+generation of the vector pair load and store instructions on power10.
+
+The intention is that if the library authors need to write special loops that
+go over arrays that they could modify their code to use the functions provided
+to change loops that can take advantage of the higher bandwidth for load vector
+pair and store instructions.
+
+This particular patch just adds a new include file (vector-pair.h) that
+provides a bunch of functions that on a power10 system would use the vector
+pair load operation, 2 floating point operations, and a vector pair store.  It
+does not add any new types, modes, or built-in function.
+
+I have additional patches that can add built-in functions that the functions in
+vector-pair.h could utilize so that the compiler can optimize and combine
+operations.  I may submit those patches in the future, but I would like to
+provide this patch to allow the library writer to optimize their code.
+
+I've measured the performance of these new functions on a power10.  For default
+unrolling, the percentage of change for the 3 methods over the normal vector
+loop method:
+
+   116%Vector-pair.h function, default unroll
+93%Vector pair split built-in & 2 vector stores, default unroll
+86%Vector pair split & combine built-ins, default unroll
+
+Using explicit 2 way unrolling the numbers are:
+
+   114%Vector-pair.h function, unroll 2
+   106%Vector pair split built-in & 2 vector stores, unroll 2
+98%Vector pair split & combine built-ins, unroll 2
+
+These new functions provided in vector-pair.h use the vector pair load/store
+instructions, and don't generate extra vector moves.  Using the existing
+vector pair disassemble and assemble built-ins generate extra vector moves
+which can hinder performance.
+
+If I compile the loop code for power9, there is a minor speed up for default
+unrolling and more of an improvement using the framework provided in the
+vector-pair.h for explicit unrolling by 2:
+
+   101%Vector-pair.h function, default unroll for power9
+   107%Vector-pair.h function, unroll 2 for power9
+
+Of course this is a synthetic benchmark run on a quiet power10 system.  Results
+would vary for real code on real systems.  However, I feel adding these
+functions can allow the writers of high performance libraries to better
+optimize their code.
+
+As an example, if the library wants to code a simple fused multiply-add loop,
+they might write the code as follows:
+
+   #include 
+   #include 
+   #include 
+
+   void
+   fma_vector (double * __restrict__ r,
+   const double * __restrict__ a,
+   const double * __restrict__ b,
+   size_t n)
+   {
+ vector double * __restrict__ vr = (vector double * __restrict__)r;
+ const vector double * __restrict__ va = (const vector double * 
__restrict__)a;
+ const vector double * __restrict__ vb = (const vector double * 
__restrict__)b;
+ size_t num_elements = sizeof (vector double) / sizeof (double);
+ size_t nv = n / num_elements;
+ size_t i;
+
+ for (i = 0; i < nv; i++)
+   vr[i] = __builtin_vsx_xvmadddp (va[i], vb[i], vr[i]);
+
+ for (i = nv * num_elements; i < n; i++)
+   r[i] = fma (a[i], b[i], r[i]);
+   }
+
+The inner loop would look like:
+
+   .L3:
+   lxvx 0,3,9
+   lxvx 12,4,9
+   addi 10,9,16
+   addi 2,2,-2
+   lxvx 11,5,9
+   xvmaddadp 0,12,11
+   lxvx 12,4,10
+   lxvx 11,5,10
+   stxvx 0,3,9
+   lxvx 0,3,10
+   addi 9,9,32
+   xvmaddadp 0,12,11
+   stxvx 0,3,10
+   bdnz .L3
+
+Now if you code the loop to use __builtin_vsx_disassemble_pair to do a vector
+pair load, but then do 2 vector stores:
+
+
+   #include 
+   #include 
+   #include 
+
+   void
+   fma_mma_ld (double * __restrict__ r,
+   const double * __restrict__ a,
+   const double * __restrict__ b,
+   size_t n)
+   {
+ __vector_pair * __restrict__ vp_r 

[gcc(refs/users/aoliva/heads/testme)] fold fold_truth_andor field merging into ifcombine

2024-12-02 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:6cddc9a107c7f78590283059f002491d27fa9012

commit 6cddc9a107c7f78590283059f002491d27fa9012
Author: Alexandre Oliva 
Date:   Sun Dec 1 08:17:58 2024 -0300

fold fold_truth_andor field merging into ifcombine

This patch introduces various improvements to the logic that merges
field compares, while moving it into ifcombine.

Before the patch, we could merge:

  (a.x1 EQNE b.x1)  ANDOR  (a.y1 EQNE b.y1)

into something like:

  (((type *)&a)[Na] & MASK) EQNE (((type *)&b)[Nb] & MASK)

if both of A's fields live within the same alignment boundaries, and
so do B's, at the same relative positions.  Constants may be used
instead of the object B.

The initial goal of this patch was to enable such combinations when a
field crossed alignment boundaries, e.g. for packed types.  We can't
generally access such fields with a single memory access, so when we
come across such a compare, we will attempt to combine each access
separately.

Some merging opportunities were missed because of right-shifts,
compares expressed as e.g. ((a.x1 ^ b.x1) & MASK) EQNE 0, and
narrowing conversions, especially after earlier merges.  This patch
introduces handlers for several cases involving these.

The merging of multiple field accesses into wider bitfield-like
accesses is undesirable to do too early in compilation, so we move it
from folding to ifcombine.

When it is the second of a noncontiguous pair of compares that first
accesses a word, we may merge the first compare with part of the
second compare that refers to the same word, keeping the compare of
the remaining bits at the spot where the second compare used to be.

Handling compares with non-constant fields was somewhat generalized
from what fold used to do, now handling non-adjacent fields, even if a
field of one object crosses an alignment boundary but the other
doesn't.


for  gcc/ChangeLog

* fold-const.cc (make_bit_field): Export.
(unextend, all_ones_mask_p): Drop.
(decode_field_reference, fold_truth_andor_1): Move
field compare merging logic...
* gimple-fold.cc: (fold_truth_andor_for_ifcombine) ... here.
(compute_split_boundary_from_align): New.
(make_bit_field_load, build_split_load): New.
(reuse_split_load): New.
* fold-const.h: (make_bit_field_ref): Declare
(fold_truth_andor_for_ifcombine): Declare.
* match.pd (any_convert, bit_and_cst, rshift_cst): New.
* tree-ssa-ifcombine.cc (ifcombine_ifandif): Try
fold_truth_andor_for_ifcombine.

for  gcc/testsuite/ChangeLog

* gcc.dg/field-merge-1.c: New.
* gcc.dg/field-merge-2.c: New.
* gcc.dg/field-merge-3.c: New.
* gcc.dg/field-merge-4.c: New.
* gcc.dg/field-merge-5.c: New.
* gcc.dg/field-merge-6.c: New.
* gcc.dg/field-merge-7.c: New.
* gcc.dg/field-merge-8.c: New.
* gcc.dg/field-merge-9.c: New.
* gcc.dg/field-merge-10.c: New.
* gcc.dg/field-merge-11.c: New.

Diff:
---
 gcc/fold-const.cc |  512 +--
 gcc/fold-const.h  |   10 +
 gcc/gimple-fold.cc| 1107 +
 gcc/match.pd  |   11 +
 gcc/testsuite/gcc.dg/field-merge-1.c  |   64 ++
 gcc/testsuite/gcc.dg/field-merge-10.c |   36 ++
 gcc/testsuite/gcc.dg/field-merge-11.c |   32 +
 gcc/testsuite/gcc.dg/field-merge-2.c  |   31 +
 gcc/testsuite/gcc.dg/field-merge-3.c  |   36 ++
 gcc/testsuite/gcc.dg/field-merge-4.c  |   40 ++
 gcc/testsuite/gcc.dg/field-merge-5.c  |   40 ++
 gcc/testsuite/gcc.dg/field-merge-6.c  |   26 +
 gcc/testsuite/gcc.dg/field-merge-7.c  |   23 +
 gcc/testsuite/gcc.dg/field-merge-8.c  |   25 +
 gcc/testsuite/gcc.dg/field-merge-9.c  |   36 ++
 gcc/tree-ssa-ifcombine.cc |   14 +-
 16 files changed, 1534 insertions(+), 509 deletions(-)

diff --git a/gcc/fold-const.cc b/gcc/fold-const.cc
index 1e8ae1ab493b..644966459864 100644
--- a/gcc/fold-const.cc
+++ b/gcc/fold-const.cc
@@ -137,7 +137,6 @@ static tree range_successor (tree);
 static tree fold_range_test (location_t, enum tree_code, tree, tree, tree);
 static tree fold_cond_expr_with_comparison (location_t, tree, enum tree_code,
tree, tree, tree, tree);
-static tree unextend (tree, int, int, tree);
 static tree extract_muldiv (tree, tree, enum tree_code, tree, bool *);
 static tree extract_muldiv_1 (tree, tree, enum tree_code, tree, bool *);
 static tree fold_binary_op_with_conditional_arg (location_t,
@@ -4711,7 +4710,7 @@ invert_truthvalue_loc (location_t loc, tree arg)
is the original memory reference used to preserve the alias set of
the 

[gcc r14-11019] [PATCH] PR modula2/117555: Add missing return statement after raise

2024-12-02 Thread Gaius Mulley via Gcc-cvs
https://gcc.gnu.org/g:89761f1f03565468eb3b15259f6ad42af0cfe198

commit r14-11019-g89761f1f03565468eb3b15259f6ad42af0cfe198
Author: Gaius Mulley 
Date:   Mon Dec 2 14:34:32 2024 +

[PATCH] PR modula2/117555: Add missing return statement after raise

This patch adds missing return statements after a call to RAISE.  Four
of the modules in libgm2 have procedure functions with missing return
statements.  These errors were exposed after the reimplementation of
parameter declaration patch and triggered by -Wreturn-type.  The patch
also adds exit statements to the M2RTS noreturn functions.

gcc/m2/ChangeLog:

PR modula2/117555
* gm2-libs-iso/EXCEPTIONS.mod (CurrentNumber): Add return
statement.
* gm2-libs-iso/IOChan.mod (ReadResult): Ditto.
(CurrentFlags): Ditto.
(DeviceError): Ditto.
* gm2-libs-iso/IOLink.mod (DeviceTablePtrValue): Ditto.
* gm2-libs-iso/LongConv.mod (ValueReal): Ditto.
* gm2-libs/M2RTS.mod (Halt): Add noreturn attribute.
Add exit (1).
(HaltC): Add exit (1).
* pge-boot/GM2RTS.cc (M2RTS_Halt): Add exit (1).
(M2RTS_HaltC): Ditto.

(cherry picked from commit e77fd9aa89c210db6006fcefb03d80bae0fae851)

Signed-off-by: Gaius Mulley 

Diff:
---
 gcc/m2/gm2-libs-iso/EXCEPTIONS.mod |  3 ++-
 gcc/m2/gm2-libs-iso/IOChan.mod | 15 ++-
 gcc/m2/gm2-libs-iso/IOLink.mod |  3 ++-
 gcc/m2/gm2-libs-iso/LongConv.mod   |  3 ++-
 gcc/m2/gm2-libs/M2RTS.mod  |  8 +---
 gcc/m2/pge-boot/GM2RTS.cc  |  2 ++
 6 files changed, 23 insertions(+), 11 deletions(-)

diff --git a/gcc/m2/gm2-libs-iso/EXCEPTIONS.mod 
b/gcc/m2/gm2-libs-iso/EXCEPTIONS.mod
index 21ddd3267494..6154e8b68fe3 100644
--- a/gcc/m2/gm2-libs-iso/EXCEPTIONS.mod
+++ b/gcc/m2/gm2-libs-iso/EXCEPTIONS.mod
@@ -81,7 +81,8 @@ BEGIN
ELSE
   RTExceptions.Raise(ORD(M2EXCEPTION.coException),
  ADR(__FILE__), __LINE__, __COLUMN__, 
ADR(__FUNCTION__),
- ADR('current coroutine is not in the exceptional 
execution state'))
+ ADR('current coroutine is not in the exceptional 
execution state')) ;
+  RETURN VAL (ExceptionNumber, M2EXCEPTION.exException)
END
 END CurrentNumber ;
 
diff --git a/gcc/m2/gm2-libs-iso/IOChan.mod b/gcc/m2/gm2-libs-iso/IOChan.mod
index 3361c0393391..5287a1bf9d21 100644
--- a/gcc/m2/gm2-libs-iso/IOChan.mod
+++ b/gcc/m2/gm2-libs-iso/IOChan.mod
@@ -459,7 +459,8 @@ BEGIN
IF dtp=NIL
THEN
   RAISE(iochan, ORD(hardDeviceError),
-'IOChan.SetReadResult: device table ptr is NIL')
+'IOChan.SetReadResult: device table ptr is NIL') ;
+  RETURN IOConsts.notKnown
ELSE
   RETURN( dtp^.result )
END
@@ -471,8 +472,9 @@ END ReadResult ;
 PROCEDURE CurrentFlags (cid: ChanId) : ChanConsts.FlagSet ;
   (* Returns the set of flags that currently apply to the channel cid. *)
 VAR
-   did: IOLink.DeviceId ;
-   dtp: IOLink.DeviceTablePtr ;
+   did  : IOLink.DeviceId ;
+   dtp  : IOLink.DeviceTablePtr ;
+   empty: ChanConsts.FlagSet ;
 BEGIN
CheckValid(cid) ;
did := RTio.GetDeviceId(cid) ;
@@ -480,7 +482,9 @@ BEGIN
IF dtp=NIL
THEN
   RAISE(iochan, ORD(hardDeviceError),
-'IOChan.SetReadResult: device table ptr is NIL')
+'IOChan.SetReadResult: device table ptr is NIL') ;
+  empty := ChanConsts.FlagSet {} ;
+  RETURN empty
ELSE
   RETURN( dtp^.flags )
END
@@ -537,7 +541,8 @@ BEGIN
IF dtp=NIL
THEN
   RAISE(iochan, ORD(hardDeviceError),
-'IOChan.DeviceError: device table ptr is NIL')
+'IOChan.DeviceError: device table ptr is NIL') ;
+  RETURN DeviceError (invalid)
ELSE
   RETURN( dtp^.errNum )
END
diff --git a/gcc/m2/gm2-libs-iso/IOLink.mod b/gcc/m2/gm2-libs-iso/IOLink.mod
index 8fdc83bad025..c01698e56ae2 100644
--- a/gcc/m2/gm2-libs-iso/IOLink.mod
+++ b/gcc/m2/gm2-libs-iso/IOLink.mod
@@ -284,7 +284,8 @@ BEGIN
  RETURN( RTio.GetDevicePtr(cid) )
   ELSE
  EXCEPTIONS.RAISE(iolink, ORD(IOChan.wrongDevice),
-  'IOLink.DeviceTablePtrValue: channel does belong to 
device')
+  'IOLink.DeviceTablePtrValue: channel does belong to 
device') ;
+ RETURN NIL
   END
END
 END DeviceTablePtrValue ;
diff --git a/gcc/m2/gm2-libs-iso/LongConv.mod b/gcc/m2/gm2-libs-iso/LongConv.mod
index 056fc1ee1ece..fb350058c885 100644
--- a/gcc/m2/gm2-libs-iso/LongConv.mod
+++ b/gcc/m2/gm2-libs-iso/LongConv.mod
@@ -257,7 +257,8 @@ BEGIN
   RETURN( doValueReal(str) )
ELSE
   EXCEPTIONS.RAISE(realConv, ORD(invalid),
-   'LongConv.' + __FUNCTION__ + ': real number is invalid')
+   'LongConv.' + __FUNCTION__ + ': real number is 
invalid') ;
+  RETURN 0.0
END
 END ValueReal ;
 
diff

[gcc(refs/users/meissner/heads/work188-dmf)] RFC2655-Add saturating subtract built-ins.

2024-12-02 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:a0c9d1462e9409ac26f39caa06c2fe28e6f9b74b

commit a0c9d1462e9409ac26f39caa06c2fe28e6f9b74b
Author: Michael Meissner 
Date:   Mon Dec 2 16:21:43 2024 -0500

RFC2655-Add saturating subtract built-ins.

This patch adds support for a saturating subtract built-in function that 
may be
added to a future PowerPC processor.  Note, if it is added, the name of the
built-in function may change before GCC 13 is released.  If the name 
changes,
we will submit a patch changing the name.

I also added support for providing dense math built-in functions, even 
though
at present, we have not added any new built-in functions for dense math.  
It is
likely we will want to add new dense math built-in functions as the dense 
math
support is fleshed out.

The patches have been tested on both little and big endian systems.  Can I 
check
it into the master branch?

2024-12-02   Michael Meissner  

gcc/

* config/rs6000/rs6000-builtin.cc (rs6000_invalid_builtin): Add 
support
for flagging invalid use of future built-in functions.
(rs6000_builtin_is_supported): Add support for future built-in
functions.
* config/rs6000/rs6000-builtins.def 
(__builtin_saturate_subtract32): New
built-in function for -mcpu=future.
(__builtin_saturate_subtract64): Likewise.
* config/rs6000/rs6000-gen-builtins.cc (enum bif_stanza): Add 
stanzas
for -mcpu=future built-ins.
(stanza_map): Likewise.
(enable_string): Likewise.
(struct attrinfo): Likewise.
(parse_bif_attrs): Likewise.
(write_decls): Likewise.
* config/rs6000/rs6000.md (sat_sub3): Add saturating subtract
built-in insn declarations.
(sat_sub3_dot): Likewise.
(sat_sub3_dot2): Likewise.
* doc/extend.texi (Future PowerPC built-ins): New section.

gcc/testsuite/

* gcc.target/powerpc/subfus-1.c: New test.
* gcc.target/powerpc/subfus-2.c: Likewise.

Diff:
---
 gcc/config/rs6000/rs6000-builtin.cc | 17 
 gcc/config/rs6000/rs6000-builtins.def   | 10 +
 gcc/config/rs6000/rs6000-gen-builtins.cc| 35 ++---
 gcc/config/rs6000/rs6000.md | 60 +
 gcc/doc/extend.texi | 24 
 gcc/testsuite/gcc.target/powerpc/subfus-1.c | 32 +++
 gcc/testsuite/gcc.target/powerpc/subfus-2.c | 32 +++
 7 files changed, 205 insertions(+), 5 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-builtin.cc 
b/gcc/config/rs6000/rs6000-builtin.cc
index 8e4335e9b44f..a5f33eb9da18 100644
--- a/gcc/config/rs6000/rs6000-builtin.cc
+++ b/gcc/config/rs6000/rs6000-builtin.cc
@@ -139,6 +139,17 @@ rs6000_invalid_builtin (enum rs6000_gen_builtins fncode)
 case ENB_MMA:
   error ("%qs requires the %qs option", name, "-mmma");
   break;
+case ENB_FUTURE:
+  error ("%qs requires the %qs option", name, "-mcpu=future");
+  break;
+case ENB_FUTURE_64:
+  error ("%qs requires the %qs option and either the %qs or %qs option",
+name, "-mcpu=future", "-m64", "-mpowerpc64");
+  break;
+case ENB_DM:
+  error ("%qs requires the %qs or %qs options", name, "-mcpu=future",
+"-mdense-math");
+  break;
 default:
 case ENB_ALWAYS:
   gcc_unreachable ();
@@ -194,6 +205,12 @@ rs6000_builtin_is_supported (enum rs6000_gen_builtins 
fncode)
   return TARGET_HTM;
 case ENB_MMA:
   return TARGET_MMA;
+case ENB_FUTURE:
+  return TARGET_FUTURE;
+case ENB_FUTURE_64:
+  return TARGET_FUTURE && TARGET_POWERPC64;
+case ENB_DM:
+  return TARGET_DENSE_MATH;
 default:
   gcc_unreachable ();
 }
diff --git a/gcc/config/rs6000/rs6000-builtins.def 
b/gcc/config/rs6000/rs6000-builtins.def
index 69046fd22442..84de393bc597 100644
--- a/gcc/config/rs6000/rs6000-builtins.def
+++ b/gcc/config/rs6000/rs6000-builtins.def
@@ -137,6 +137,8 @@
 ;   endian   Needs special handling for endianness
 ;   ibmldRestrict usage to the case when TFmode is IBM-128
 ;   ibm128   Restrict usage to the case where __ibm128 is supported or if ibmld
+;   future   Restrict usage to future instructions
+;   dm   Restrict usage to dense math
 ;
 ; Each attribute corresponds to extra processing required when
 ; the built-in is expanded.  All such special processing should
@@ -3933,3 +3935,11 @@
 
   void __builtin_vsx_stxvp (v256, unsigned long, const v256 *);
 STXVP nothing {mma,pair}
+
+[future]
+  const signed int __builtin_saturate_subtract32 (signed int, signed int);
+  SAT_SUBSI sat_subsi3 {}
+
+[future-64]
+  const signed long __builtin_saturate_subtract64 (signed long,  signed long);
+  SAT_SUBDI sat_subdi3 {}
diff --git a/gcc/config/rs6000/rs6000-gen-builtins.cc 
b/gcc/confi

[gcc(refs/users/meissner/heads/work188-sha)] PR target/117251: Add PowerPC XXEVAL support to speed up SHA3 calculations

2024-12-02 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:eec27cbb93e05c8ca77a0bd45875e1e14f4a1921

commit eec27cbb93e05c8ca77a0bd45875e1e14f4a1921
Author: Michael Meissner 
Date:   Mon Dec 2 16:32:02 2024 -0500

PR target/117251: Add PowerPC XXEVAL support to speed up SHA3 calculations

The multibuff.c benchmark attached to the PR target/117251 compiled for 
Power10
PowerPC that implement SHA3 has a slowdown in the current trunk and GCC 14
compared to GCC 11 - GCC 13, due to excessive amounts of spilling.

The main function for the multibuf.c file has 3,747 lines, all of which are
using vector unsigned long long.  There are 696 vector rotates (all rotates 
are
constant), 1,824 vector xor's and 600 vector andc's.

In looking at it, the main thing that steps out is the reason for either
spilling or moving variables is the support in fusion.md (generated by
genfusion.pl) that tries to fuse the vec_andc feeding into vec_xor, and 
other
vec_xor's feeding into vec_xor.

On the powerpc for power10, there is a special fusion mode that happens if 
the
machine has a VANDC or VXOR instruction that is adjacent to a VXOR 
instruction
and the VANDC/VXOR feeds into the 2nd VXOR instruction.

While the Power10 has 64 vector registers (which uses the XXL prefix to do
logical operations), the fusion only works with the older Altivec 
instruction
set (which uses the V prefix).  The Altivec instruction only has 32 vector
registers (which are overlaid over the VSX vector registers 32-63).

By having the combiner patterns fuse_vandc_vxor and fuse_vxor_vxor to do 
this
fusion, it means that the register allocator has more register pressure for 
the
traditional Altivec registers instead of the VSX registers.

In addition, since there are vector rotates, these rotates only work on the
traditional Altivec registers, which adds to the Altivec register pressure.

Finally in addition to doing the explicit xor, andc, and rotates using the
Altivec registers, we have to also load vector constants for the rotate 
amount
and these registers also are allocated as Altivec registers.

Current trunk and GCC 12-14 have more vector spills than GCC 11, but GCC 11 
has
many more vector moves that the later compilers.  Thus even though it has 
way
less spills, the vector moves are why GCC 11 have the slowest results.

There is an instruction that was added in power10 (XXEVAL) that does provide
fusion between VSX vectors that includes ANDC->XOR and XOR->XOR fusion.

The latency of XXEVAL is slightly more than the fused VANDC/VXOR or 
VXOR/VXOR,
so I have written the patch to prefer doing the Altivec instructions if they
don't need a temporary register.

Here are the results for adding support for XXEVAL for the multibuff.c
benchmark attached to the PR.  Note that we essentially recover the speed 
with
this patch that were lost with GCC 14 and the current trunk:

  XXEVALTrunk   GCC14   GCC13   GCC12
GCC11
  ---   -   -   -
-
Benchmark time in seconds   5.53 6.156.265.575.61 
9.56

Fuse VANDC -> VXOR   209 600  600 600 600  
600
Fuse VXOR -> VXOR  0 240  240 120 120  
120
XXEVAL to fuse ANDC -> XOR   391   00   0   0   
 0
XXEVAL to fuse XOR -> XOR240   00   0   0   
 0

Spill vector to stack 78 364  364 172 184  
110
Load spilled vector from stack   431 962  962 713 723  
166
Vector moves  10 100  100  70  72
3,055

Vector rotate right  696 696  696 696 696  
696
XXLANDC or VANDC 209 600  600 600 600  
600
XXLXOR or VXOR   953   1,8241,824   1,824   1,824
1,825
XXEVAL   631   00   0   0   
 0

Load vector rotate constants  24  24   24  24  24   
24

Here are the results for adding support for XXEVAL for the singlebuff.c
benchmark attached to the PR.  Note that adding XXEVAL greatly speeds up 
this
particular benchmark:

  XXEVALTrunk   GCC14   GCC13   GCC12
GCC11
  ---   -   -   -
-
Benchmark time in seconds   4.46 5.405.405.355.36 
7.54

Fuse VANDC -> VXOR   210  600 600 600 600  
600
Fuse VXOR -> VXOR  0  240 240 120 120  
120
XXEVAL to fuse ANDC -> XOR   3900   0  

[gcc(refs/users/meissner/heads/work188-dmf)] RFC2686-Add paddis support.

2024-12-02 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:ec84b5b5868c61f01b1aa059da6257ae714aba36

commit ec84b5b5868c61f01b1aa059da6257ae714aba36
Author: Michael Meissner 
Date:   Mon Dec 2 16:24:29 2024 -0500

RFC2686-Add paddis support.

2024-11-22  Michael Meissner  

gcc/

* config/rs6000/constraints.md (eU): New constraint.
(eV): Likewise.
* config/rs6000/predicates.md (paddis_operand): New predicate.
(paddis_paddi_operand): Likewise.
(add_operand): Add paddis support.
* config/rs6000/rs6000.cc (num_insns_constant_gpr): Add paddis 
support.
(num_insns_constant_multi): Likewise.
(print_operand): Add %B for paddis support.
* config/rs6000/rs6000.h (TARGET_PADDIS): New macro.
(SIGNED_INTEGER_32BIT_P): Likewise.
* config/rs6000/rs6000.md (isa attribute): Add paddis support.
(enabled attribute); Likewise.
(add3): Likewise.
(adddi3 splitter): New splitter for paddis.
(movdi_internal64): Add paddis support.
(movdi splitter): New splitter for paddis.

gcc/testsuite/

* gcc.target/powerpc/prefixed-addis.c: New test.

Diff:
---
 gcc/config/rs6000/constraints.md  | 10 +++
 gcc/config/rs6000/predicates.md   | 52 +++-
 gcc/config/rs6000/rs6000.cc   | 25 ++
 gcc/config/rs6000/rs6000.h|  4 +
 gcc/config/rs6000/rs6000.md   | 96 ---
 gcc/testsuite/gcc.target/powerpc/prefixed-addis.c | 24 ++
 6 files changed, 197 insertions(+), 14 deletions(-)

diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md
index 277a30a82458..4d8d21fd6bbb 100644
--- a/gcc/config/rs6000/constraints.md
+++ b/gcc/config/rs6000/constraints.md
@@ -222,6 +222,16 @@
   "An IEEE 128-bit constant that can be loaded into VSX registers."
   (match_operand 0 "easy_vector_constant_ieee128"))
 
+(define_constraint "eU"
+  "@internal integer constant that can be loaded with paddis"
+  (and (match_code "const_int")
+   (match_operand 0 "paddis_operand")))
+
+(define_constraint "eV"
+  "@internal integer constant that can be loaded with paddis + paddi"
+  (and (match_code "const_int")
+   (match_operand 0 "paddis_paddi_operand")))
+
 ;; Floating-point constraints.  These two are defined so that insn
 ;; length attributes can be calculated exactly.
 
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index 2797c3cf619b..f8e7df5e7f5b 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -369,6 +369,53 @@
   return SIGNED_INTEGER_34BIT_P (INTVAL (op));
 })
 
+;; Return 1 if op is a 64-bit constant that uses the paddis instruction
+(define_predicate "paddis_operand"
+  (match_code "const_int")
+{
+  if (!TARGET_PADDIS && TARGET_POWERPC64)
+return 0;
+
+  /* If addi, addis, or paddi can handle the number, don't return true.  */
+  HOST_WIDE_INT value = INTVAL (op);
+  if (SIGNED_INTEGER_34BIT_P (value))
+return false;
+
+  /* If the number is too large for padds, return false.  */
+  if (!SIGNED_INTEGER_32BIT_P (value >> 32))
+return false;
+
+  /* If the bottom 32-bits are non-zero, paddis can't handle it.  */
+  if ((value & HOST_WIDE_INT_C(0x)) != 0)
+return false;
+
+  return true;
+})
+
+;; Return 1 if op is a 64-bit constant that needs the paddis instruction and an
+;; addi/addis/paddi instruction combination.
+(define_predicate "paddis_paddi_operand"
+  (match_code "const_int")
+{
+  if (!TARGET_PADDIS && TARGET_POWERPC64)
+return 0;
+
+  /* If addi, addis, or paddi can handle the number, don't return true.  */
+  HOST_WIDE_INT value = INTVAL (op);
+  if (SIGNED_INTEGER_34BIT_P (value))
+return false;
+
+  /* If the number is too large for padds, return false.  */
+  if (!SIGNED_INTEGER_32BIT_P (value >> 32))
+return false;
+
+  /* If the bottom 32-bits are zero, we can use paddis alone to handle it.  */
+  if ((value & HOST_WIDE_INT_C(0x)) == 0)
+return false;
+
+  return true;
+})
+
 ;; Return 1 if op is a register that is not special.
 ;; Disallow (SUBREG:SF (REG:SI)) and (SUBREG:SI (REG:SF)) on VSX systems where
 ;; you need to be careful in moving a SFmode to SImode and vice versa due to
@@ -1113,7 +1160,10 @@
   (if_then_else (match_code "const_int")
 (match_test "satisfies_constraint_I (op)
 || satisfies_constraint_L (op)
-|| satisfies_constraint_eI (op)")
+|| satisfies_constraint_eI (op)
+|| satisfies_constraint_eU (op)
+|| satisfies_constraint_eV (op)")
+
 (match_operand 0 "gpc_reg_operand")))
 
 ;; Return 1 if the operand is either a non-special register, or 0, or -1.
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 36e33de51119..b879b3407d88 100644
--- a/gcc/conf

[gcc(refs/users/meissner/heads/work188-sha)] Add potential p-future XVRLD and XVRLDI instructions.

2024-12-02 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:8346610803fd729fe7e4572f2c733fe4bfa36788

commit 8346610803fd729fe7e4572f2c733fe4bfa36788
Author: Michael Meissner 
Date:   Mon Dec 2 16:34:09 2024 -0500

Add potential p-future XVRLD and XVRLDI instructions.

2024-12-02  Michael Meissner  

gcc/

* config/rs6000/altivec.md (altivec_vrl): Add support for a
possible XVRLD instruction in the future.
(altivec_vrl_immediate): New insns.
* config/rs6000/predicates.md (vector_shift_immediate): New 
predicate.
* config/rs6000/rs6000.h (TARGET_XVRLW): New macro.
* config/rs6000/rs6000.md (isa attribute): Add xvrlw.
(enabled attribute): Add support for xvrlw.

gcc/testsuite/

* gcc.target/powerpc/vector-rotate-left.c: New test.

Diff:
---
 gcc/config/rs6000/altivec.md   | 35 +++---
 gcc/config/rs6000/predicates.md| 26 
 gcc/config/rs6000/rs6000.h |  3 ++
 gcc/config/rs6000/rs6000.md|  6 +++-
 .../gcc.target/powerpc/vector-rotate-left.c| 34 +
 5 files changed, 99 insertions(+), 5 deletions(-)

diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index b6a778ef6179..abe6130a94e3 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -1982,12 +1982,39 @@
 }
   [(set_attr "type" "vecperm")])
 
+;; -mcpu=future adds a vector rotate left word variant.  There is no vector
+;; byte/half-word/double-word/quad-word rotate left.  This insn occurs before
+;; altivec_vrl and will match for -mcpu=future, while other cpus will
+;; match the generic insn.
+;; However for testing, allow other xvrl variants.  In particular, XVRLD for
+;; the sha3 tests for multibuf/singlebuf.
 (define_insn "altivec_vrl"
-  [(set (match_operand:VI2 0 "register_operand" "=v")
-(rotate:VI2 (match_operand:VI2 1 "register_operand" "v")
-   (match_operand:VI2 2 "register_operand" "v")))]
+  [(set (match_operand:VI2 0 "register_operand" "=v,wa")
+(rotate:VI2 (match_operand:VI2 1 "register_operand" "v,wa")
+   (match_operand:VI2 2 "register_operand" "v,wa")))]
   ""
-  "vrl %0,%1,%2"
+  "@
+   vrl %0,%1,%2
+   xvrl %x0,%x1,%x2"
+  [(set_attr "type" "vecsimple")
+   (set_attr "isa" "*,xvrlw")])
+
+(define_insn "*altivec_vrl_immediate"
+  [(set (match_operand:VI2 0 "register_operand" "=wa,wa,wa,wa")
+   (rotate:VI2 (match_operand:VI2 1 "register_operand" "wa,wa,wa,wa")
+   (match_operand:VI2 2 "vector_shift_immediate" 
"j,wM,wE,wS")))]
+  "TARGET_XVRLW && "
+{
+  rtx op2 = operands[2];
+  int value = 256;
+  int num_insns = -1;
+
+  if (!xxspltib_constant_p (op2, mode, &num_insns, &value))
+gcc_unreachable ();
+
+  operands[3] = GEN_INT (value & 0xff);
+  return "xvrli %x0,%x1,%3";
+}
   [(set_attr "type" "vecsimple")])
 
 (define_insn "altivec_vrlq"
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index 1d95e34557e5..fccfbd7e4904 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -728,6 +728,32 @@
   return num_insns == 1;
 })
 
+;; Return 1 if the operand is a CONST_VECTOR whose elements are all the
+;; same and the elements can be an immediate shift or rotate factor
+(define_predicate "vector_shift_immediate"
+  (match_code "const_vector,vec_duplicate,const_int")
+{
+  int value = 256;
+  int num_insns = -1;
+
+  if (zero_constant (op, mode) || all_ones_constant (op, mode))
+return true;
+
+  if (!xxspltib_constant_p (op, mode, &num_insns, &value))
+return false;
+
+  switch (mode)
+{
+case V16QImode: return IN_RANGE (value, 0, 7);
+case V8HImode:  return IN_RANGE (value, 0, 15);
+case V4SImode:  return IN_RANGE (value, 0, 31);
+case V2DImode:  return IN_RANGE (value, 0, 63);
+default:break;
+}
+
+  return false;
+})
+  
 ;; Return 1 if the operand is a CONST_VECTOR and can be loaded into a
 ;; vector register without using memory.
 (define_predicate "easy_vector_constant"
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index 392ca858fc40..52eb5136c932 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -575,6 +575,9 @@ extern int rs6000_vector_align[];
below.  */
 #define RS6000_FN_TARGET_INFO_HTM 1
 
+/* Whether we have XVRLW support.  */
+#define TARGET_XVRLW   TARGET_FUTURE
+
 /* Whether the various reciprocal divide/square root estimate instructions
exist, and whether we should automatically generate code for the instruction
by default.  */
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 940dd3cad3b8..6803d5c1934d 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -369,7 +369,7 @@
   (const (symbol_ref "(enum attr_cpu) rs6000_tune")))
 
 ;; The ISA we imple

[gcc r14-11023] [PATCH] PR modula2/116918 -fswig correct syntax fixup

2024-12-02 Thread Gaius Mulley via Gcc-cvs
https://gcc.gnu.org/g:58ef1c521c15d3c7755e0f96b472b625122ab609

commit r14-11023-g58ef1c521c15d3c7755e0f96b472b625122ab609
Author: Gaius Mulley 
Date:   Mon Dec 2 21:34:38 2024 +

[PATCH] PR modula2/116918 -fswig correct syntax fixup

This patch adds a missing % escape in DoCheckUnbounded.

gcc/m2/ChangeLog:

PR modula2/116918
* gm2-compiler/M2Swig.mod (DoCheckUnbounded): Escape
the % character used in array_functions with %%.

(cherry picked from commit e2a701cd0419a1898a2a359711572a8bcf4f8eb3)

Signed-off-by: Gaius Mulley 

Diff:
---
 gcc/m2/gm2-compiler/M2Swig.mod | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/m2/gm2-compiler/M2Swig.mod b/gcc/m2/gm2-compiler/M2Swig.mod
index b7f34426adb8..7ef8ff36da77 100644
--- a/gcc/m2/gm2-compiler/M2Swig.mod
+++ b/gcc/m2/gm2-compiler/M2Swig.mod
@@ -887,7 +887,7 @@ BEGIN
  fprintf0(f, ') };\n') ;
  IF typeUnique
  THEN
-fprintf0(f, '%array_functions(') ;
+fprintf0(f, '%%array_functions(') ;
 DoType(type) ;
 fprintf0(f, ', ') ;
 DoType(type) ;


[gcc(refs/users/meissner/heads/work188-dmf)] RFC2656-Support load/store vector with right length.

2024-12-02 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:38fb2060921fe268873c8753a8a3956f9092cda2

commit 38fb2060921fe268873c8753a8a3956f9092cda2
Author: Michael Meissner 
Date:   Mon Dec 2 16:20:29 2024 -0500

RFC2656-Support load/store vector with right length.

This patch adds support for new instructions that may be added to the 
PowerPC
architecture in the future to enhance the load and store vector with length
instructions.

The current instructions (lxvl, lxvll, stxvl, and stxvll) are inconvient to 
use
since the count for the number of bytes must be in the top 8 bits of the GPR
register, instead of the bottom 8 bits.  This meant that code generating 
these
instructions typically had to do a shift left by 56 bits to get the count 
into
the right position.  In a future version of the PowerPC architecture, new
variants of these instructions might be added that expect the count to be in
the bottom 8 bits of the GPR register.  These patches add this support to 
GCC
if the user uses the -mcpu=future option.

I discovered that the code in rs6000-string.cc to generate ISA 3.1 
lxvl/stxvl
future lxvll/stxvll instructions would generate these instructions on 
32-bit.
However the patterns for these instructions is only done on 64-bit systems. 
 So
I added a check for 64-bit support before generating the instructions.

The patches have been tested on both little and big endian systems.  Can I 
check
it into the master branch?

2024-12-02   Michael Meissner  

gcc/

* config/rs6000/rs6000-string.cc (expand_block_move): Do not 
generate
lxvl and stxvl on 32-bit.
* config/rs6000/vsx.md (lxvl): If -mcpu=future, generate the lxvl 
with
the shift count automaticaly used in the insn.
(lxvrl): New insn for -mcpu=future.
(lxvrll): Likewise.
(stxvl): If -mcpu=future, generate the stxvl with the shift count
automaticaly used in the insn.
(stxvrl): New insn for -mcpu=future.
(stxvrll): Likewise.

gcc/testsuite/

* gcc.target/powerpc/lxvrl.c: New test.
* lib/target-supports.exp 
(check_effective_target_powerpc_future_ok):
New effective target.

Diff:
---
 gcc/config/rs6000/rs6000-string.cc   |   1 +
 gcc/config/rs6000/vsx.md | 122 +--
 gcc/testsuite/gcc.target/powerpc/lxvrl.c |  32 
 3 files changed, 134 insertions(+), 21 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-string.cc 
b/gcc/config/rs6000/rs6000-string.cc
index b633d80110d0..afcbe4fef657 100644
--- a/gcc/config/rs6000/rs6000-string.cc
+++ b/gcc/config/rs6000/rs6000-string.cc
@@ -2786,6 +2786,7 @@ expand_block_move (rtx operands[], bool might_overlap)
 
   if (TARGET_MMA && TARGET_BLOCK_OPS_UNALIGNED_VSX
  && TARGET_BLOCK_OPS_VECTOR_PAIR
+ && TARGET_POWERPC64
  && bytes >= 32
  && (align >= 256 || !STRICT_ALIGNMENT))
{
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index f4f7113f5fe8..43c10a1b0970 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -5710,20 +5710,32 @@
   DONE;
 })
 
-;; Load VSX Vector with Length
+;; Load VSX Vector with Length.  If we have lxvrl, we don't have to do an
+;; explicit shift left into a pseudo.
 (define_expand "lxvl"
-  [(set (match_dup 3)
-(ashift:DI (match_operand:DI 2 "register_operand")
-   (const_int 56)))
-   (set (match_operand:V16QI 0 "vsx_register_operand")
-   (unspec:V16QI
-[(match_operand:DI 1 "gpc_reg_operand")
-  (mem:V16QI (match_dup 1))
- (match_dup 3)]
-UNSPEC_LXVL))]
+  [(use (match_operand:V16QI 0 "vsx_register_operand"))
+   (use (match_operand:DI 1 "gpc_reg_operand"))
+   (use (match_operand:DI 2 "gpc_reg_operand"))]
   "TARGET_P9_VECTOR && TARGET_64BIT"
 {
-  operands[3] = gen_reg_rtx (DImode);
+  rtx shift_len = gen_rtx_ASHIFT (DImode, operands[2], GEN_INT (56));
+  rtx len;
+
+  if (TARGET_FUTURE)
+len = shift_len;
+  else
+{
+  len = gen_reg_rtx (DImode);
+  emit_insn (gen_rtx_SET (len, shift_len));
+}
+
+  rtx dest = operands[0];
+  rtx addr = operands[1];
+  rtx mem = gen_rtx_MEM (V16QImode, addr);
+  rtvec rv = gen_rtvec (3, addr, mem, len);
+  rtx lxvl = gen_rtx_UNSPEC (V16QImode, rv, UNSPEC_LXVL);
+  emit_insn (gen_rtx_SET (dest, lxvl));
+  DONE;
 })
 
 (define_insn "*lxvl"
@@ -5747,6 +5759,34 @@
   "lxvll %x0,%1,%2"
   [(set_attr "type" "vecload")])
 
+;; For lxvrl and lxvrll, use the combiner to eliminate the shift.  The
+;; define_expand for lxvl will already incorporate the shift in generating the
+;; insn.  The lxvll buitl-in function required the user to have already done
+;; the shift.  Defining lxvrll this way, will optimize cases where the user has
+;; done the shift immediately before the built-in.
+(define_insn "*lxvrl"
+  [(set (match

[gcc] Created branch 'meissner/heads/work188-libs' in namespace 'refs/users'

2024-12-02 Thread Michael Meissner via Gcc-cvs
The branch 'meissner/heads/work188-libs' was created in namespace 'refs/users' 
pointing to:

 d92b2e78950f... Add ChangeLog.meissner and REVISION.


[gcc(refs/users/meissner/heads/work188-sha)] Update ChangeLog.*

2024-12-02 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:1f0a811ac9f0cbbc94cb2eff02342af736f2024c

commit 1f0a811ac9f0cbbc94cb2eff02342af736f2024c
Author: Michael Meissner 
Date:   Mon Dec 2 16:36:13 2024 -0500

Update ChangeLog.*

Diff:
---
 gcc/ChangeLog.sha | 166 ++
 1 file changed, 166 insertions(+)

diff --git a/gcc/ChangeLog.sha b/gcc/ChangeLog.sha
index 0c12046271e5..00f7831c3545 100644
--- a/gcc/ChangeLog.sha
+++ b/gcc/ChangeLog.sha
@@ -1,5 +1,171 @@
+ Branch work188-sha, patch #401 
+
+Add potential p-future XVRLD and XVRLDI instructions.
+
+2024-12-02  Michael Meissner  
+
+gcc/
+
+   * config/rs6000/altivec.md (altivec_vrl): Add support for a
+   possible XVRLD instruction in the future.
+   (altivec_vrl_immediate): New insns.
+   * config/rs6000/predicates.md (vector_shift_immediate): New predicate.
+   * config/rs6000/rs6000.h (TARGET_XVRLW): New macro.
+   * config/rs6000/rs6000.md (isa attribute): Add xvrlw.
+   (enabled attribute): Add support for xvrlw.
+
+gcc/testsuite/
+
+   * gcc.target/powerpc/vector-rotate-left.c: New test.
+
+ Branch work188-sha, patch #400 
+
+PR target/117251: Add PowerPC XXEVAL support to speed up SHA3 calculations
+
+The multibuff.c benchmark attached to the PR target/117251 compiled for Power10
+PowerPC that implement SHA3 has a slowdown in the current trunk and GCC 14
+compared to GCC 11 - GCC 13, due to excessive amounts of spilling.
+
+The main function for the multibuf.c file has 3,747 lines, all of which are
+using vector unsigned long long.  There are 696 vector rotates (all rotates are
+constant), 1,824 vector xor's and 600 vector andc's.
+
+In looking at it, the main thing that steps out is the reason for either
+spilling or moving variables is the support in fusion.md (generated by
+genfusion.pl) that tries to fuse the vec_andc feeding into vec_xor, and other
+vec_xor's feeding into vec_xor.
+
+On the powerpc for power10, there is a special fusion mode that happens if the
+machine has a VANDC or VXOR instruction that is adjacent to a VXOR instruction
+and the VANDC/VXOR feeds into the 2nd VXOR instruction.
+
+While the Power10 has 64 vector registers (which uses the XXL prefix to do
+logical operations), the fusion only works with the older Altivec instruction
+set (which uses the V prefix).  The Altivec instruction only has 32 vector
+registers (which are overlaid over the VSX vector registers 32-63).
+
+By having the combiner patterns fuse_vandc_vxor and fuse_vxor_vxor to do this
+fusion, it means that the register allocator has more register pressure for the
+traditional Altivec registers instead of the VSX registers.
+
+In addition, since there are vector rotates, these rotates only work on the
+traditional Altivec registers, which adds to the Altivec register pressure.
+
+Finally in addition to doing the explicit xor, andc, and rotates using the
+Altivec registers, we have to also load vector constants for the rotate amount
+and these registers also are allocated as Altivec registers.
+
+Current trunk and GCC 12-14 have more vector spills than GCC 11, but GCC 11 has
+many more vector moves that the later compilers.  Thus even though it has way
+less spills, the vector moves are why GCC 11 have the slowest results.
+
+There is an instruction that was added in power10 (XXEVAL) that does provide
+fusion between VSX vectors that includes ANDC->XOR and XOR->XOR fusion.
+
+The latency of XXEVAL is slightly more than the fused VANDC/VXOR or VXOR/VXOR,
+so I have written the patch to prefer doing the Altivec instructions if they
+don't need a temporary register.
+
+Here are the results for adding support for XXEVAL for the multibuff.c
+benchmark attached to the PR.  Note that we essentially recover the speed with
+this patch that were lost with GCC 14 and the current trunk:
+
+  XXEVALTrunk   GCC14   GCC13   GCC12GCC11
+  ---   -   -   --
+Benchmark time in seconds   5.53 6.156.265.575.61 9.56
+
+Fuse VANDC -> VXOR   209 600  600 600 600  600
+Fuse VXOR -> VXOR  0 240  240 120 120  120
+XXEVAL to fuse ANDC -> XOR   391   00   0   00
+XXEVAL to fuse XOR -> XOR240   00   0   00
+
+Spill vector to stack 78 364  364 172 184  110
+Load spilled vector from stack   431 962  962 713 723  166
+Vector moves  10 100  100  70  723,055
+
+Vector rotate right  696 696  696 696 696  696
+XXLANDC or VANDC 209 600  600 600 600  600
+XXLXOR or VXOR   953   1,8241,824   1,824   1,8241,825
+XXEVAL 

[gcc r15-5872] gccrs: Remove unused files 'gcc/rust/typecheck/rust-hir-type-check-toplevel.{cc, h}'

2024-12-02 Thread Thomas Schwinge via Gcc-cvs
https://gcc.gnu.org/g:8173d0a4b75ae2b25e9ed8b4ed8bdc39c3438560

commit r15-5872-g8173d0a4b75ae2b25e9ed8b4ed8bdc39c3438560
Author: Owen Avery 
Date:   Sun Dec 1 21:17:39 2024 -0500

gccrs: Remove unused files 
'gcc/rust/typecheck/rust-hir-type-check-toplevel.{cc,h}'

These files only still exist upstream; they should have been removed as
part of commit 104cc285533e742726ae18a7d3d4f384dd20c350
"gccrs: Refactor TypeResolution to be a simple query based system".

gcc/rust/ChangeLog:

* typecheck/rust-hir-type-check-toplevel.cc: Removed.
* typecheck/rust-hir-type-check-toplevel.h: Removed.

Signed-off-by: Owen Avery 
Co-authored-by: Thomas Schwinge 

Diff:
---
 gcc/rust/typecheck/rust-hir-type-check-toplevel.cc | 378 -
 gcc/rust/typecheck/rust-hir-type-check-toplevel.h  |  56 ---
 2 files changed, 434 deletions(-)

diff --git a/gcc/rust/typecheck/rust-hir-type-check-toplevel.cc 
b/gcc/rust/typecheck/rust-hir-type-check-toplevel.cc
deleted file mode 100644
index 8224afb4b684..
--- a/gcc/rust/typecheck/rust-hir-type-check-toplevel.cc
+++ /dev/null
@@ -1,378 +0,0 @@
-// Copyright (C) 2020-2024 Free Software Foundation, Inc.
-
-// This file is part of GCC.
-
-// GCC is free software; you can redistribute it and/or modify it under
-// the terms of the GNU General Public License as published by the Free
-// Software Foundation; either version 3, or (at your option) any later
-// version.
-
-// GCC is distributed in the hope that it will be useful, but WITHOUT ANY
-// WARRANTY; without even the implied warranty of MERCHANTABILITY or
-// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-// for more details.
-
-// You should have received a copy of the GNU General Public License
-// along with GCC; see the file COPYING3.  If not see
-// .
-
-#include "rust-hir-type-check-toplevel.h"
-#include "rust-hir-type-check-enumitem.h"
-#include "rust-hir-type-check-type.h"
-#include "rust-hir-type-check-expr.h"
-#include "rust-hir-type-check-pattern.h"
-#include "rust-hir-type-check-implitem.h"
-
-namespace Rust {
-namespace Resolver {
-
-TypeCheckTopLevel::TypeCheckTopLevel () : TypeCheckBase () {}
-
-void
-TypeCheckTopLevel::Resolve (HIR::Item &item)
-{
-  rust_assert (item.get_hir_kind () == HIR::Node::BaseKind::VIS_ITEM);
-  HIR::VisItem &vis_item = static_cast (item);
-
-  TypeCheckTopLevel resolver;
-  vis_item.accept_vis (resolver);
-}
-
-void
-TypeCheckTopLevel::visit (HIR::TypeAlias &alias)
-{
-  TyTy::BaseType *actual_type
-= TypeCheckType::Resolve (alias.get_type_aliased ().get ());
-
-  context->insert_type (alias.get_mappings (), actual_type);
-
-  for (auto &where_clause_item : alias.get_where_clause ().get_items ())
-{
-  ResolveWhereClauseItem::Resolve (*where_clause_item.get ());
-}
-}
-
-void
-TypeCheckTopLevel::visit (HIR::TupleStruct &struct_decl)
-{
-  std::vector substitutions;
-  if (struct_decl.has_generics ())
-resolve_generic_params (struct_decl.get_generic_params (), substitutions);
-
-  for (auto &where_clause_item : struct_decl.get_where_clause ().get_items ())
-{
-  ResolveWhereClauseItem::Resolve (*where_clause_item.get ());
-}
-
-  std::vector fields;
-  size_t idx = 0;
-  for (auto &field : struct_decl.get_fields ())
-{
-  TyTy::BaseType *field_type
-   = TypeCheckType::Resolve (field.get_field_type ().get ());
-  TyTy::StructFieldType *ty_field
-   = new TyTy::StructFieldType (field.get_mappings ().get_hirid (),
-std::to_string (idx), field_type,
-field.get_locus ());
-  fields.push_back (ty_field);
-  context->insert_type (field.get_mappings (), ty_field->get_field_type 
());
-  idx++;
-}
-
-  // get the path
-  const CanonicalPath *canonical_path = nullptr;
-  bool ok = mappings->lookup_canonical_path (
-struct_decl.get_mappings ().get_nodeid (), &canonical_path);
-  rust_assert (ok);
-  RustIdent ident{*canonical_path, struct_decl.get_locus ()};
-
-  // its a single variant ADT
-  std::vector variants;
-  variants.push_back (new TyTy::VariantDef (
-struct_decl.get_mappings ().get_hirid (), struct_decl.get_identifier (),
-ident, TyTy::VariantDef::VariantType::TUPLE, nullptr, std::move (fields)));
-
-  // Process #[repr(X)] attribute, if any
-  const AST::AttrVec &attrs = struct_decl.get_outer_attrs ();
-  TyTy::ADTType::ReprOptions repr
-= parse_repr_options (attrs, struct_decl.get_locus ());
-
-  TyTy::BaseType *type
-= new TyTy::ADTType (struct_decl.get_mappings ().get_hirid (),
-mappings->get_next_hir_id (),
-struct_decl.get_identifier (), ident,
-TyTy::ADTType::ADTKind::TUPLE_STRUCT,
-std::move (variants), std::move (substitutions), repr);
-
-  context->insert_type (struct_decl.get_mappings 

[gcc/meissner/heads/work188-vpair] (15 commits) Merge commit 'refs/users/meissner/heads/work188-vpair' of g

2024-12-02 Thread Michael Meissner via Gcc-cvs
The branch 'meissner/heads/work188-vpair' was updated to point to:

 68539971b381... Merge commit 'refs/users/meissner/heads/work188-vpair' of g

It previously pointed to:

 07f9d40f6aca... Add ChangeLog.vpair and update REVISION.

Diff:

Summary of changes (added commits):
---

  6853997... Merge commit 'refs/users/meissner/heads/work188-vpair' of g
  451c14b... Add ChangeLog.vpair and update REVISION.
  b51496b... Update ChangeLog.* (*)
  c9a0ac5... Use architecture flags for defining _ARCH_PWR macros. (*)
  71cab11... Add rs6000 architecture masks. (*)
  aeaa194... Do not allow -mvsx to boost processor to power7. (*)
  7052142... Use vector pair load/store for memcpy with -mcpu=future (*)
  5fbb82a... Add -mcpu=future tests. (*)
  37d4ea1... Add -mcpu=future tuning support. (*)
  2114420... Add support for -mcpu=future (*)
  5c3cbdb... Change TARGET_MODULO to TARGET_POWER9. (*)
  ff12ee1... Change TARGET_POPCNTD to TARGET_POWER7. (*)
  d0188be... Change TARGET_CMPB to TARGET_POWER6. (*)
  798cbe5... Change TARGET_FPRND to TARGET_POWER5X. (*)
  96a8105... Change TARGET_POPCNTB to TARGET_POWER5. (*)

(*) This commit already exists in another branch.
Because the reference `refs/users/meissner/heads/work188-vpair' matches
your hooks.email-new-commits-only configuration,
no separate email is sent for this commit.


[gcc(refs/users/meissner/heads/work188-dmf)] RFC2653-Add wD constraint.

2024-12-02 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:67fa78e3953c481018d73fb7258d938a9c67eb4e

commit 67fa78e3953c481018d73fb7258d938a9c67eb4e
Author: Michael Meissner 
Date:   Mon Dec 2 15:34:47 2024 -0500

RFC2653-Add wD constraint.

This patch adds a new constraint ('wD') that matches the accumulator 
registers
that overlap with VSX registers 0..31 on power10.  Future patches will add 
the
support for a separate accumulator register class that will be used when the
support for dense math registes is added.

2024-12-02   Michael Meissner  

* config/rs6000/constraints.md (wD): New constraint.
* config/rs6000/mma.md (mma_): Prepare for alternate 
accumulator
registers.  Use wD constraint instead of 'd' constraint.  Use
accumulator_operand instead of fpr_reg_operand.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
-   (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")]
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD")
+   (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0")]
MMA_ACC))]
   "TARGET_MMA"
   " %A0"
@@ -523,7 +523,7 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")]
MMA_VV))]
@@ -532,8 +532,8 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-   (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+   (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 3 "vsx_register_operand" "v,?wa")]
MMA_AVV))]
@@ -542,7 +542,7 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
(unspec:XO [(match_operand:OO 1 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")]
MMA_PV))]
@@ -551,8 +551,8 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-   (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+   (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
(match_operand:OO 2 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 3 "vsx_register_operand" "v,?wa")]
MMA_APV))]
@@ -561,7 +561,7 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
(match_operand:SI 3 "const_0_to_15_operand" "n,n")
@@ -574,8 +574,8 @@
(set_attr "prefixed" "yes")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-   (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+   (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
(match_operand:SI 4 "const_0_to_15_operand" "n,n")
@@ -588,7 +588,7 @@
(set_attr "prefixed" "yes")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
(match_operand:SI 3 "const_0_to_15_operand" "n,n")
@@ -601,8 +601,8 @@
(set_attr "prefixed" "yes")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-   (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+   (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")

[gcc(refs/users/meissner/heads/work188-vpair)] Merge commit 'refs/users/meissner/heads/work188-vpair' of git+ssh://gcc.gnu.org/git/gcc into me/work

2024-12-02 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:68539971b381d1f606e8b8da62ef15fa1fc1b74e

commit 68539971b381d1f606e8b8da62ef15fa1fc1b74e
Merge: 451c14b5d5ab 07f9d40f6aca
Author: Michael Meissner 
Date:   Mon Dec 2 15:29:26 2024 -0500

Merge commit 'refs/users/meissner/heads/work188-vpair' of 
git+ssh://gcc.gnu.org/git/gcc into me/work188-vpair

Diff:


[gcc r15-5871] libstdc++: Simplify std::_Destroy using 'if constexpr'

2024-12-02 Thread Jonathan Wakely via Gcc-cvs
https://gcc.gnu.org/g:1467409beb27a693e96994899f30e4db015b5c2c

commit r15-5871-g1467409beb27a693e96994899f30e4db015b5c2c
Author: Jonathan Wakely 
Date:   Thu Nov 28 12:32:59 2024 +

libstdc++: Simplify std::_Destroy using 'if constexpr'

This is another place where we can use 'if constexpr' to replace
dispatching to a specialized class template, improving compile times and
avoiding a function call.

libstdc++-v3/ChangeLog:

* include/bits/stl_construct.h (_Destroy(FwdIter, FwdIter)): Use
'if constexpr' instead of dispatching to a member function of a
class template.
(_Destroy_n(FwdIter, Size)): Likewise.
(_Destroy_aux, _Destroy_n_aux): Only define for C++98.

Reviewed-by: Patrick Palka 

Diff:
---
 libstdc++-v3/include/bits/stl_construct.h | 33 +--
 1 file changed, 27 insertions(+), 6 deletions(-)

diff --git a/libstdc++-v3/include/bits/stl_construct.h 
b/libstdc++-v3/include/bits/stl_construct.h
index 9d6111396e1c..6889a9bfa0e6 100644
--- a/libstdc++-v3/include/bits/stl_construct.h
+++ b/libstdc++-v3/include/bits/stl_construct.h
@@ -166,6 +166,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 #endif
 }
 
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wc++17-extensions" // for if-constexpr
+
+#if __cplusplus < 201103L
   template
 struct _Destroy_aux
 {
@@ -185,6 +189,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 static void
 __destroy(_ForwardIterator, _ForwardIterator) { }
 };
+#endif
 
   /**
* Destroy a range of objects.  If the value_type of the object has
@@ -201,15 +206,21 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   // A deleted destructor is trivial, this ensures we reject such types:
   static_assert(is_destructible<_Value_type>::value,
"value type is destructible");
-#endif
+  if constexpr (!__has_trivial_destructor(_Value_type))
+   for (; __first != __last; ++__first)
+ std::_Destroy(std::__addressof(*__first));
 #if __cpp_constexpr_dynamic_alloc // >= C++20
-  if (std::__is_constant_evaluated())
-   return std::_Destroy_aux::__destroy(__first, __last);
+  else if (std::__is_constant_evaluated())
+   for (; __first != __last; ++__first)
+ std::destroy_at(std::__addressof(*__first));
 #endif
+#else
   std::_Destroy_aux<__has_trivial_destructor(_Value_type)>::
__destroy(__first, __last);
+#endif
 }
 
+#if __cplusplus < 201103L
   template
 struct _Destroy_n_aux
 {
@@ -234,6 +245,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
  return __first;
}
 };
+#endif
 
   /**
* Destroy a range of objects.  If the value_type of the object has
@@ -250,14 +262,23 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   // A deleted destructor is trivial, this ensures we reject such types:
   static_assert(is_destructible<_Value_type>::value,
"value type is destructible");
-#endif
+  if constexpr (!__has_trivial_destructor(_Value_type))
+   for (; __count > 0; (void)++__first, --__count)
+ std::_Destroy(std::__addressof(*__first));
 #if __cpp_constexpr_dynamic_alloc // >= C++20
-  if (std::__is_constant_evaluated())
-   return std::_Destroy_n_aux::__destroy_n(__first, __count);
+  else if (std::__is_constant_evaluated())
+   for (; __count > 0; (void)++__first, --__count)
+ std::destroy_at(std::__addressof(*__first));
 #endif
+  else
+   std::advance(__first, __count);
+  return __first;
+#else
   return std::_Destroy_n_aux<__has_trivial_destructor(_Value_type)>::
__destroy_n(__first, __count);
+#endif
 }
+#pragma GCC diagnostic pop
 
 #if __glibcxx_raw_memory_algorithms // >= C++17
   template 


[gcc r15-5866] m68k: don't allow o/o in movdi, movdf, movxf

2024-12-02 Thread Andreas Schwab via Gcc-cvs
https://gcc.gnu.org/g:cec97549b781643f55bde34d025c3170309e3646

commit r15-5866-gcec97549b781643f55bde34d025c3170309e3646
Author: Andreas Schwab 
Date:   Thu Nov 21 15:35:01 2024 +0100

m68k: don't allow o/o in movdi, movdf, movxf

The movdi, movdf and movxf patterns allow both operands to be offsettable
memory, but output_move_double cannot handle overlapping objects.  This is
visible in the failure of gcc.c-torture/execute/pr97073.c when compiled
with LTO (where cprop optimizes out the AND operation; the failure also
occurs without LTO when the AND is removed).  Split the constraints so
that the operands cannot both be "o" in the same insn.

* config/m68k/m68k.md (movdi+1, movdf+1, movxf+2): Split
constraints so that the operands cannot both be "o".

Diff:
---
 gcc/config/m68k/m68k.md | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/gcc/config/m68k/m68k.md b/gcc/config/m68k/m68k.md
index 1c9a6bf1748c..d7329004e910 100644
--- a/gcc/config/m68k/m68k.md
+++ b/gcc/config/m68k/m68k.md
@@ -1354,8 +1354,8 @@
 })
 
 (define_insn ""
-  [(set (match_operand:DF 0 "nonimmediate_operand" "=rm,rf,rf,&rof<>")
-   (match_operand:DF 1 "general_operand" "*rf,m,0,*rofE<>"))]
+  [(set (match_operand:DF 0 "nonimmediate_operand" "=rm,rf,rf,&rof<>,&rf<>")
+   (match_operand:DF 1 "general_operand" "*rf,m,0,*rfE<>,*rofE<>"))]
 ;  [(set (match_operand:DF 0 "nonimmediate_operand" "=rm,&rf,&rof<>")
 ;  (match_operand:DF 1 "general_operand" "rf,m,rofF<>"))]
   "!TARGET_COLDFIRE"
@@ -1514,8 +1514,8 @@
   [(set_attr "flags_valid" "move")])
 
 (define_insn ""
-  [(set (match_operand:XF 0 "nonimmediate_operand" "=rm,rf,&rof<>")
-   (match_operand:XF 1 "nonimmediate_operand" "rf,m,rof<>"))]
+  [(set (match_operand:XF 0 "nonimmediate_operand" "=rm,rf,&rof<>,&rf<>")
+   (match_operand:XF 1 "nonimmediate_operand" "rf,m,rf<>,rof<>"))]
   "! TARGET_68881 && ! TARGET_COLDFIRE"
 {
   if (FP_REG_P (operands[0]))
@@ -1568,8 +1568,8 @@
 ;; movdi can apply to fp regs in some cases
 (define_insn ""
   ;; Let's see if it really still needs to handle fp regs, and, if so, why.
-  [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,r,&ro<>")
-   (match_operand:DI 1 "general_operand" "rF,m,roi<>F"))]
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,r,&ro<>,&r<>")
+   (match_operand:DI 1 "general_operand" "rF,m,ri<>F,roi<>F"))]
 ;  [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,&r,&ro<>,!&rm,!&f")
 ;  (match_operand:DI 1 "general_operand" "r,m,roi<>,fF"))]
 ;  [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,&rf,&ro<>,!&rm,!&f")


[gcc(refs/users/meissner/heads/work188-dmf)] Revert changes

2024-12-02 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:fe3fa22c90fbf61ef15c0b9e645952c4305c4ef5

commit fe3fa22c90fbf61ef15c0b9e645952c4305c4ef5
Author: Michael Meissner 
Date:   Mon Dec 2 15:47:21 2024 -0500

Revert changes

Diff:
---
 gcc/config/rs6000/constraints.md  |   3 -
 gcc/config/rs6000/mma.md  | 427 +-
 gcc/config/rs6000/predicates.md   |  32 --
 gcc/config/rs6000/rs6000-builtin.cc   |  22 +-
 gcc/config/rs6000/rs6000-c.cc |   9 +-
 gcc/config/rs6000/rs6000-call.cc  |  10 +-
 gcc/config/rs6000/rs6000-modes.def|   4 -
 gcc/config/rs6000/rs6000.cc   | 318 
 gcc/config/rs6000/rs6000.h|  50 +--
 gcc/config/rs6000/rs6000.md   |   2 -
 gcc/doc/md.texi   |   5 -
 gcc/testsuite/gcc.target/powerpc/dm-1024bit.c |  63 
 gcc/testsuite/gcc.target/powerpc/dm-double-test.c | 194 --
 gcc/testsuite/lib/target-supports.exp |  23 --
 14 files changed, 165 insertions(+), 997 deletions(-)

diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md
index 277a30a82458..369a7b75042d 100644
--- a/gcc/config/rs6000/constraints.md
+++ b/gcc/config/rs6000/constraints.md
@@ -107,9 +107,6 @@
(match_test "TARGET_P8_VECTOR")
(match_operand 0 "s5bit_cint_operand")))
 
-(define_register_constraint "wD" "rs6000_constraints[RS6000_CONSTRAINT_wD]"
-  "Accumulator register.")
-
 (define_constraint "wE"
   "@internal Vector constant that can be loaded with the XXSPLTIB instruction."
   (match_test "xxspltib_constant_nosplit (op, mode)"))
diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md
index 8461499e1c3d..04e2d0066df2 100644
--- a/gcc/config/rs6000/mma.md
+++ b/gcc/config/rs6000/mma.md
@@ -91,12 +91,6 @@
UNSPEC_MMA_XVI8GER4SPP
UNSPEC_MMA_XXMFACC
UNSPEC_MMA_XXMTACC
-   UNSPEC_MMA_DMSETDMRZ
-   UNSPEC_DM_INSERT512_UPPER
-   UNSPEC_DM_INSERT512_LOWER
-   UNSPEC_DM_EXTRACT512
-   UNSPEC_DMR_RELOAD_FROM_MEMORY
-   UNSPEC_DMR_RELOAD_TO_MEMORY
   ])
 
 (define_c_enum "unspecv"
@@ -230,47 +224,44 @@
 (UNSPEC_MMA_XVF64GERNP "xvf64gernp")
 (UNSPEC_MMA_XVF64GERNN "xvf64gernn")])
 
-;; The "pm" prefix is not in these expansions, so that we can generate
-;; pmdmxvi4ger8 on systems with dense math registers and xvi4ger8 on systems
-;; without dense math registers.
-(define_int_attr vvi4i4i8  [(UNSPEC_MMA_PMXVI4GER8 "xvi4ger8")])
+(define_int_attr vvi4i4i8  [(UNSPEC_MMA_PMXVI4GER8 "pmxvi4ger8")])
 
-(define_int_attr avvi4i4i8 [(UNSPEC_MMA_PMXVI4GER8PP   "xvi4ger8pp")])
+(define_int_attr avvi4i4i8 [(UNSPEC_MMA_PMXVI4GER8PP   
"pmxvi4ger8pp")])
 
-(define_int_attr vvi4i4i2  [(UNSPEC_MMA_PMXVI16GER2"xvi16ger2")
-(UNSPEC_MMA_PMXVI16GER2S   "xvi16ger2s")
-(UNSPEC_MMA_PMXVF16GER2"xvf16ger2")
-(UNSPEC_MMA_PMXVBF16GER2   "xvbf16ger2")])
+(define_int_attr vvi4i4i2  [(UNSPEC_MMA_PMXVI16GER2"pmxvi16ger2")
+(UNSPEC_MMA_PMXVI16GER2S   "pmxvi16ger2s")
+(UNSPEC_MMA_PMXVF16GER2"pmxvf16ger2")
+(UNSPEC_MMA_PMXVBF16GER2   
"pmxvbf16ger2")])
 
-(define_int_attr avvi4i4i2 [(UNSPEC_MMA_PMXVI16GER2PP  "xvi16ger2pp")
-(UNSPEC_MMA_PMXVI16GER2SPP "xvi16ger2spp")
-(UNSPEC_MMA_PMXVF16GER2PP  "xvf16ger2pp")
-(UNSPEC_MMA_PMXVF16GER2PN  "xvf16ger2pn")
-(UNSPEC_MMA_PMXVF16GER2NP  "xvf16ger2np")
-(UNSPEC_MMA_PMXVF16GER2NN  "xvf16ger2nn")
-(UNSPEC_MMA_PMXVBF16GER2PP "xvbf16ger2pp")
-(UNSPEC_MMA_PMXVBF16GER2PN "xvbf16ger2pn")
-(UNSPEC_MMA_PMXVBF16GER2NP "xvbf16ger2np")
-(UNSPEC_MMA_PMXVBF16GER2NN 
"xvbf16ger2nn")])
+(define_int_attr avvi4i4i2 [(UNSPEC_MMA_PMXVI16GER2PP  "pmxvi16ger2pp")
+(UNSPEC_MMA_PMXVI16GER2SPP 
"pmxvi16ger2spp")
+(UNSPEC_MMA_PMXVF16GER2PP  "pmxvf16ger2pp")
+(UNSPEC_MMA_PMXVF16GER2PN  "pmxvf16ger2pn")
+(UNSPEC_MMA_PMXVF16GER2NP  "pmxvf16ger2np")
+(UNSPEC_MMA_PMXVF16GER2NN  "pmxvf16ger2nn")
+(UNSPEC_MMA_PMXVBF16GER2PP 
"pmxvbf16ger2pp")
+(UNSPEC_MMA_PMXVBF16GER2PN 
"pmxvbf16ger2pn")
+

[gcc(refs/users/meissner/heads/work188-bugs)] Add power9 and power10 float to logical optimizations.

2024-12-02 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:ff65b318a28d8bc8adcc5bf104f3aea8648c1b11

commit ff65b318a28d8bc8adcc5bf104f3aea8648c1b11
Author: Michael Meissner 
Date:   Mon Dec 2 15:58:44 2024 -0500

Add power9 and power10 float to logical optimizations.

I was answering an email from a co-worker and I pointed him to work I had 
done
for the Power8 era that optimizes the 32-bit float math library in Glibc.  
In
doing so, I discovered with the Power9 and later computers, this 
optimization
is no longer taking place.

The glibc 32-bit floating point math functions have code that looks like:

union u {
  float f;
  uint32_t u32;
};

float
math_foo (float x, unsigned int mask)
{
  union u arg;
  float x2;

  arg.f = x;
  arg.u32 &= mask;

  x2 = arg.f;
  /* ... */
}

On power8 with the optimization it generates:

xscvdpspn 0,1
sldi 9,4,32
mtvsrd 32,9
xxland 1,0,32
xscvspdpn 1,1

I.e., it converts the SFmode to the memory format (instead of the DFmode 
that
is used within the register), converts the mask so that it is in the vector
register in the upper 32-bits, and does a XXLAND (i.e. there is only one 
direct
move from GPR to vector register).  Then after doing this, it converts the
upper 32-bits back to DFmode.

If the XSCVSPDN instruction took the value in the normal 32-bit scalar in a
vector register, we wouldn't have needed the SLDI of the mask.

On power9/power10/power11 it currently generates:

xscvdpspn 0,1
mfvsrwz 2,0
and 2,2,4
mtvsrws 1,2
xscvspdpn 1,1
blr

I.e convert to SFmode representation, move the value to a GPR, do an AND
operation, move the 32-bit value with a splat, and then convert it back to
DFmode format.

With this patch, it now generates:

xscvdpspn 0,1
mtvsrwz 32,2
xxland 32,0,32
xxspltw 1,32,1
xscvspdpn 1,1
blr

I.e. convert to SFmode representation, move the mask to the vector 
register, do
the operation using XXLAND.  Splat the value to get the value in the correct
location, and then convert back to DFmode.

I have built GCC with the patches in this patch set applied on both little 
and
big endian PowerPC systems and there were no regressions.  Can I apply this
patch to GCC 15?

2024-12-02  Michael Meissner  

gcc/

PR target/117487
* config/rs6000/vsx.md (SFmode logical peephoole): Update comments 
in
the original code that supports power8.  Add a new define_peephole2 
to
do the optimization on power9/power10.

Diff:
---
 gcc/config/rs6000/vsx.md | 142 +--
 1 file changed, 137 insertions(+), 5 deletions(-)

diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index af9846391db2..ee3d85525e7e 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -6280,7 +6280,7 @@
(SFBOOL_MFVSR_A  3) ;; move to gpr src
(SFBOOL_BOOL_D   4) ;; and/ior/xor dest
(SFBOOL_BOOL_A1  5) ;; and/ior/xor arg1
-   (SFBOOL_BOOL_A2  6) ;; and/ior/xor arg1
+   (SFBOOL_BOOL_A2  6) ;; and/ior/xor arg2
(SFBOOL_SHL_D7) ;; shift left dest
(SFBOOL_SHL_A8) ;; shift left arg
(SFBOOL_MTVSR_D  9) ;; move to vecter dest
@@ -6320,18 +6320,18 @@
 ;; GPR, and instead move the integer mask value to the vector register after a
 ;; shift and do the VSX logical operation.
 
-;; The insns for dealing with SFmode in GPR registers looks like:
+;; The insns for dealing with SFmode in GPR registers looks like on power8:
 ;; (set (reg:V4SF reg2) (unspec:V4SF [(reg:SF reg1)] UNSPEC_VSX_CVDPSPN))
 ;;
-;; (set (reg:DI reg3) (unspec:DI [(reg:V4SF reg2)] UNSPEC_P8V_RELOAD_FROM_VSX))
+;; (set (reg:DI reg3) (zero_extend:DI (reg:SI reg2)))
 ;;
-;; (set (reg:DI reg4) (and:DI (reg:DI reg3) (reg:DI reg3)))
+;; (set (reg:DI reg4) (and:SI (reg:SI reg3) (reg:SI mask)))
 ;;
 ;; (set (reg:DI reg5) (ashift:DI (reg:DI reg4) (const_int 32)))
 ;;
 ;; (set (reg:SF reg6) (unspec:SF [(reg:DI reg5)] UNSPEC_P8V_MTVSRD))
 ;;
-;; (set (reg:SF reg6) (unspec:SF [(reg:SF reg6)] UNSPEC_VSX_CVSPDPN))
+;; (set (reg:SF reg7) (unspec:SF [(reg:SF reg6)] UNSPEC_VSX_CVSPDPN))
 
 (define_peephole2
   [(match_scratch:DI SFBOOL_TMP_GPR "r")
@@ -6412,6 +6412,138 @@
   operands[SFBOOL_MTVSR_D_V4SF] = gen_rtx_REG (V4SFmode, regno_mtvsr_d);
 })
 
+;; Constants for SFbool optimization on power9/power10
+(define_consta

[gcc(refs/users/meissner/heads/work188-bugs)] PR target/108958 -- use mtvsrdd to zero extend GPR DImode to VSX TImode

2024-12-02 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:784ec9f987664069eda1187879c7b752aa2aa03a

commit 784ec9f987664069eda1187879c7b752aa2aa03a
Author: Michael Meissner 
Date:   Mon Dec 2 15:59:40 2024 -0500

PR target/108958 -- use mtvsrdd to zero extend GPR DImode to VSX TImode

Previously GCC would zero externd a DImode GPR value to TImode by first zero
extending the DImode value into a GPR TImode value, and then do a MTVSRDD to
move this value to a VSX register.

This patch does the move directly, since if the middle argument to MTVSRDD 
is 0,
it does the zero extend.

If the DImode value is already in a vector register, it does a XXSPLTIB and
XXPERMDI to get the value into the bottom 64-bits of the register.

I have built GCC with the patches in this patch set applied on both little 
and
big endian PowerPC systems and there were no regressions.  Can I apply this
patch to GCC 15?

2024-12-02  Michael Meissner  

gcc/

PR target/108598
* gcc/config/rs6000/rs6000.md (zero_extendditi2): New insn.

gcc/testsuite/

PR target/108598
* gcc.target/powerpc/pr108958.c: New test.

Diff:
---
 gcc/config/rs6000/rs6000.md | 46 +
 gcc/testsuite/gcc.target/powerpc/pr108958.c | 27 +
 2 files changed, 73 insertions(+)

diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index fa3484294ca8..707721c7ac0a 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -1026,6 +1026,52 @@
(set_attr "dot" "yes")
(set_attr "length" "4,8")])
 
+(define_insn_and_split "zero_extendditi2"
+  [(set (match_operand:TI 0 "gpc_reg_operand" "=r,wa,&wa")
+   (zero_extend:TI
+(match_operand:DI 1 "gpc_reg_operand" "rwa,r,wa")))]
+  "TARGET_P9_VECTOR && TARGET_POWERPC64"
+  "@
+  #
+  mtvsrdd %x0,0,%1
+  #"
+  "&& reload_completed
+   && (int_reg_operand (operands[0], TImode)
+   || vsx_register_operand (operands[1], DImode))"
+  [(set (match_dup 2)
+   (match_dup 3))
+   (set (match_dup 4)
+   (match_dup 5))]
+{
+  rtx op0 = operands[0];
+  rtx op1 = operands[1];
+  int r = reg_or_subregno (op0);
+
+  if (int_reg_operand (op0, TImode))
+{
+  int lo = BYTES_BIG_ENDIAN ? 1 : 0;
+  int hi = 1 - lo;
+
+  operands[2] = gen_rtx_REG (DImode, r + lo);
+  operands[3] = op1;
+  operands[4] = gen_rtx_REG (DImode, r + hi);
+  operands[5] = const0_rtx;
+}
+  else
+{
+  rtx op0_di = gen_rtx_REG (DImode, r);
+  rtx op0_v2di = gen_rtx_REG (V2DImode, r);
+  rtx lo = WORDS_BIG_ENDIAN ? op1 : op0_di;
+  rtx hi = WORDS_BIG_ENDIAN ? op0_di : op1;
+
+  operands[2] = op0_v2di;
+  operands[3] = CONST0_RTX (V2DImode);
+  operands[4] = op0_v2di;
+  operands[5] = gen_rtx_VEC_CONCAT (V2DImode, hi, lo);
+}
+}
+  [(set_attr "type" "*,mtvsr,vecperm")
+   (set_attr "length" "8,*,8")])
 
 (define_insn "extendqi2"
   [(set (match_operand:EXTQI 0 "gpc_reg_operand" "=r,?*v")
diff --git a/gcc/testsuite/gcc.target/powerpc/pr108958.c 
b/gcc/testsuite/gcc.target/powerpc/pr108958.c
new file mode 100644
index ..03eb58d069e7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr108958.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target int128 } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-mdejagnu-cpu=power9 -O2" } */
+
+/* PR target/108958, use mtvsrdd to zero extend gpr to vsx register.  */
+
+void
+gpr_to_vsx (unsigned long long x, __uint128_t *p)
+{
+  /* mtvsrdd vsx,0,gpr.  */
+  __uint128_t y = x;
+  __asm__ (" # %x0" : "+wa" (y));
+  *p = y;
+}
+
+void
+gpr_to_gpr (unsigned long long x, __uint128_t *p)
+{
+  /* mr and li.  */
+  __uint128_t y = x;
+  __asm__ (" # %0" : "+r" (y));
+  *p = y;
+}
+
+/* { dg-final { scan-assembler-times {\mli\M}  1 } } */
+/* { dg-final { scan-assembler-times {\mmtvsrdd .*,0,.*\M} 1 } } */


[gcc(refs/users/aoliva/heads/testme)] ifcombine: don't try xor on right-hand op

2024-12-02 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:155cb8f0d01dbaab55ac339e6c55cf36bd5d7b13

commit 155cb8f0d01dbaab55ac339e6c55cf36bd5d7b13
Author: Alexandre Oliva 
Date:   Sun Dec 1 08:18:01 2024 -0300

ifcombine: don't try xor on right-hand op

Diff:
---
 gcc/gimple-fold.cc | 4 
 1 file changed, 4 insertions(+)

diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc
index 31011b726f36..149df985bee4 100644
--- a/gcc/gimple-fold.cc
+++ b/gcc/gimple-fold.cc
@@ -7539,6 +7539,10 @@ decode_field_reference (tree *pexp, HOST_WIDE_INT 
*pbitsize,
  exp = res_ops[1];
  gcc_checking_assert (!xor_cmp_op);
}
+  else if (!xor_cmp_op)
+   /* Not much we can do when xor appears in the right-hand compare
+  operand.  */
+   return NULL_TREE;
   else
{
  *xor_p = true;


[gcc(refs/users/meissner/heads/work188-dmf)] RFC2653-Add dense math test for new instruction names.

2024-12-02 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:0d0f93fa26ce6e403adbf4e88d2c8fba29c71fd6

commit 0d0f93fa26ce6e403adbf4e88d2c8fba29c71fd6
Author: Michael Meissner 
Date:   Mon Dec 2 15:41:47 2024 -0500

RFC2653-Add dense math test for new instruction names.

2024-12-02   Michael Meissner  

gcc/testsuite/

* gcc.target/powerpc/dm-double-test.c: New test.
* lib/target-supports.exp (check_effective_target_ppc_dmr_ok): New
target test.

Diff:
---
 gcc/testsuite/gcc.target/powerpc/dm-double-test.c | 194 ++
 gcc/testsuite/lib/target-supports.exp |  23 +++
 2 files changed, 217 insertions(+)

diff --git a/gcc/testsuite/gcc.target/powerpc/dm-double-test.c 
b/gcc/testsuite/gcc.target/powerpc/dm-double-test.c
new file mode 100644
index ..66c197795856
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/dm-double-test.c
@@ -0,0 +1,194 @@
+/* Test derived from mma-double-1.c, modified for dense math.  */
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_dense_math_ok } */
+/* { dg-options "-mdejagnu-cpu=future -O2" } */
+
+#include 
+#include 
+#include 
+
+typedef unsigned char vec_t __attribute__ ((vector_size (16)));
+typedef double v4sf_t __attribute__ ((vector_size (16)));
+#define SAVE_ACC(ACC, ldc, J)  \
+ __builtin_mma_disassemble_acc (result, ACC); \
+ rowC = (v4sf_t *) &CO[0*ldc+J]; \
+  rowC[0] += result[0]; \
+  rowC = (v4sf_t *) &CO[1*ldc+J]; \
+  rowC[0] += result[1]; \
+  rowC = (v4sf_t *) &CO[2*ldc+J]; \
+  rowC[0] += result[2]; \
+  rowC = (v4sf_t *) &CO[3*ldc+J]; \
+ rowC[0] += result[3];
+
+void
+DM (int m, int n, int k, double *A, double *B, double *C)
+{
+  __vector_quad acc0, acc1, acc2, acc3, acc4, acc5, acc6, acc7;
+  v4sf_t result[4];
+  v4sf_t *rowC;
+  for (int l = 0; l < n; l += 4)
+{
+  double *CO;
+  double *AO;
+  AO = A;
+  CO = C;
+  C += m * 4;
+  for (int j = 0; j < m; j += 16)
+   {
+ double *BO = B;
+ __builtin_mma_xxsetaccz (&acc0);
+ __builtin_mma_xxsetaccz (&acc1);
+ __builtin_mma_xxsetaccz (&acc2);
+ __builtin_mma_xxsetaccz (&acc3);
+ __builtin_mma_xxsetaccz (&acc4);
+ __builtin_mma_xxsetaccz (&acc5);
+ __builtin_mma_xxsetaccz (&acc6);
+ __builtin_mma_xxsetaccz (&acc7);
+ unsigned long i;
+
+ for (i = 0; i < k; i++)
+   {
+ vec_t *rowA = (vec_t *) & AO[i * 16];
+ __vector_pair rowB;
+ vec_t *rb = (vec_t *) & BO[i * 4];
+ __builtin_mma_assemble_pair (&rowB, rb[1], rb[0]);
+ __builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]);
+ __builtin_mma_xvf64gerpp (&acc1, rowB, rowA[1]);
+ __builtin_mma_xvf64gerpp (&acc2, rowB, rowA[2]);
+ __builtin_mma_xvf64gerpp (&acc3, rowB, rowA[3]);
+ __builtin_mma_xvf64gerpp (&acc4, rowB, rowA[4]);
+ __builtin_mma_xvf64gerpp (&acc5, rowB, rowA[5]);
+ __builtin_mma_xvf64gerpp (&acc6, rowB, rowA[6]);
+ __builtin_mma_xvf64gerpp (&acc7, rowB, rowA[7]);
+   }
+ SAVE_ACC (&acc0, m, 0);
+ SAVE_ACC (&acc2, m, 4);
+ SAVE_ACC (&acc1, m, 2);
+ SAVE_ACC (&acc3, m, 6);
+ SAVE_ACC (&acc4, m, 8);
+ SAVE_ACC (&acc6, m, 12);
+ SAVE_ACC (&acc5, m, 10);
+ SAVE_ACC (&acc7, m, 14);
+ AO += k * 16;
+ BO += k * 4;
+ CO += 16;
+   }
+  B += k * 4;
+}
+}
+
+void
+init (double *matrix, int row, int column)
+{
+  for (int j = 0; j < column; j++)
+{
+  for (int i = 0; i < row; i++)
+   {
+ matrix[j * row + i] = (i * 16 + 2 + j) / 0.123;
+   }
+}
+}
+
+void
+init0 (double *matrix, double *matrix1, int row, int column)
+{
+  for (int j = 0; j < column; j++)
+for (int i = 0; i < row; i++)
+  matrix[j * row + i] = matrix1[j * row + i] = 0;
+}
+
+
+void
+print (const char *name, const double *matrix, int row, int column)
+{
+  printf ("Matrix %s has %d rows and %d columns:\n", name, row, column);
+  for (int i = 0; i < row; i++)
+{
+  for (int j = 0; j < column; j++)
+   {
+ printf ("%f ", matrix[j * row + i]);
+   }
+  printf ("\n");
+}
+  printf ("\n");
+}
+
+int
+main (int argc, char *argv[])
+{
+  int rowsA, colsB, common;
+  int i, j, k;
+  int ret = 0;
+
+  for (int t = 16; t <= 128; t += 16)
+{
+  for (int t1 = 4; t1 <= 16; t1 += 4)
+   {
+ rowsA = t;
+ colsB = t1;
+ common = 1;
+ /* printf ("Running test for rows = %d,cols = %d\n", t, t1); */
+ double A[rowsA * common];
+ double B[common * colsB];
+ double C[rowsA * colsB];
+ double D[rowsA * colsB];
+
+
+ init (A, rowsA, common);
+ init (B, common, colsB);
+ init0 (C, D, rowsA, colsB);
+ DM (rowsA, colsB, common, A, B,

[gcc(refs/users/meissner/heads/work188-dmf)] RFC2653-Add support for dense math registers.

2024-12-02 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:b432684dd7f5113b0f51ba2cae04d2c5fbfff987

commit b432684dd7f5113b0f51ba2cae04d2c5fbfff987
Author: Michael Meissner 
Date:   Mon Dec 2 15:49:39 2024 -0500

RFC2653-Add support for dense math registers.

The MMA subsystem added the notion of accumulator registers as an optional
feature of ISA 3.1 (power10).  In ISA 3.1, these accumulators overlapped 
with
the VSX registers 0..31, but logically the accumulator registers were 
separate
from the FPR registers.  In ISA 3.1, it was anticipated that in future 
systems,
the accumulator registers may no overlap with the FPR registers.  This patch
adds the support for dense math registers as separate registers.

This particular patch does not change the MMA support to use the 
accumulators
within the dense math registers.  This patch just adds the basic support for
having separate DMRs.  The next patch will switch the MMA support to use the
accumulators if -mcpu=future is used.

For testing purposes, I added an undocumented option '-mdense-math' to 
enable
or disable the dense math support.

This patch updates the wD constraint added in the previous patch.  If MMA is
selected but dense math is not selected (i.e. -mcpu=power10), the wD 
constraint
will allow access to accumulators that overlap with VSX registers 0..31.  If
both MMA and dense math are selected (i.e. -mcpu=future), the wD constraint
will only allow dense math registers.

This patch modifies the existing %A output modifier.  If MMA is selected but
dense math is not selected, then %A output modifier converts the VSX 
register
number to the accumulator number, by dividing it by 4.  If both MMA and 
dense
math are selected, then %A will map the separate DMR registers into 0..7.

The intention is that user code using extended asm can be modified to run on
both MMA without dense math and MMA with dense math:

1)  If possible, don't use extended asm, but instead use the MMA 
built-in
functions;

2)  If you do need to write extended asm, change the d constraints
targetting accumulators should now use wD;

3)  Only use the built-in zero, assemble and disassemble functions 
create
move data between vector quad types and dense math accumulators.
I.e. do not use the xxmfacc, xxmtacc, and xxsetaccz directly in the
extended asm code.  The reason is these instructions assume there 
is a
1-to-1 correspondence between 4 adjacent FPR registers and an
accumulator that overlaps with those instructions.  With 
accumulators
now being separate registers, there no longer is a 1-to-1
correspondence.

It is possible that the mangling for DMRs and the GDB register numbers may
produce other changes in the future.

gcc/

2024-12-02   Michael Meissner  

* config/rs6000/mma.md (UNSPEC_MMA_DMSETDMRZ): New unspec.
(movxo): Add comments about dense math registers.
(movxo_nodm): Rename from movxo and restrict the usage to machines
without dense math registers.
(movxo_dm): New insn for movxo support for machines with dense math
registers.
(mma_): Restrict usage to machines without dense math 
registers.
(mma_xxsetaccz): Add a define_expand wrapper, and add support for 
dense
math registers.
(mma_dmsetaccz): New insn.
* config/rs6000/predicates.md (dmr_operand): New predicate.
(accumulator_operand): Add support for dense math registers.
* config/rs6000/rs6000-builtin.cc (rs6000_gimple_fold_mma_builtin): 
Do
not issue a de-prime instruction when disassembling a vector quad 
on a
system with dense math registers.
* config/rs6000/rs6000-c.cc (rs6000_define_or_undefine_macro): 
Define
__DENSE_MATH__ if we have dense math registers.
* config/rs6000/rs6000.cc (enum rs6000_reg_type): Add DMR_REG_TYPE.
(enum rs6000_reload_reg_type): Add RELOAD_REG_DMR.
(LAST_RELOAD_REG_CLASS): Add support for DMR registers and the wD
constraint.
(reload_reg_map): Likewise.
(rs6000_reg_names): Likewise.
(alt_reg_names): Likewise.
(rs6000_hard_regno_nregs_internal): Likewise.
(rs6000_hard_regno_mode_ok_uncached): Likewise.
(rs6000_debug_reg_global): Likewise.
(rs6000_setup_reg_addr_masks): Likewise.
(rs6000_init_hard_regno_mode_ok): Likewise.
(rs6000_secondary_reload_memory): Add support for DMR registers.
(rs6000_secondary_reload_simple_move): Likewise.
(rs6000_preferred_reload_class): Likewise.
(rs6000_secondary_reload_class): Likewise.
(print_operand

[gcc(refs/users/meissner/heads/work188-dmf)] RFC2653-PowerPC: Add support for 1, 024 bit DMR registers.

2024-12-02 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:45765c11daf943ed877fa757c916b069ba792f16

commit 45765c11daf943ed877fa757c916b069ba792f16
Author: Michael Meissner 
Date:   Mon Dec 2 15:52:53 2024 -0500

RFC2653-PowerPC: Add support for 1,024 bit DMR registers.

This patch is a prelimianry patch to add the full 1,024 bit dense math 
register
(DMRs) for -mcpu=future.  The MMA 512-bit accumulators map onto the top of 
the
DMR register.

This patch only adds the new 1,024 bit register support.  It does not add
support for any instructions that need 1,024 bit registers instead of 512 
bit
registers.

I used the new mode 'TDOmode' to be the opaque mode used for 1,024 bit
registers.  The 'wD' constraint added in previous patches is used for these
registers.  I added support to do load and store of DMRs via the VSX 
registers,
since there are no load/store dense math instructions.  I added the new 
keyword
'__dmr' to create 1,024 bit types that can be loaded into DMRs.  At 
present, I
don't have aliases for __dmr512 and __dmr1024 that we've discussed 
internally.

The patches have been tested on both little and big endian systems.  Can I 
check
it into the master branch?

2024-12-02   Michael Meissner  

gcc/

* config/rs6000/mma.md (UNSPEC_DM_INSERT512_UPPER): New unspec.
(UNSPEC_DM_INSERT512_LOWER): Likewise.
(UNSPEC_DM_EXTRACT512): Likewise.
(UNSPEC_DMR_RELOAD_FROM_MEMORY): Likewise.
(UNSPEC_DMR_RELOAD_TO_MEMORY): Likewise.
(movtdo): New define_expand and define_insn_and_split to implement 
1,024
bit DMR registers.
(movtdo_insert512_upper): New insn.
(movtdo_insert512_lower): Likewise.
(movtdo_extract512): Likewise.
(reload_dmr_from_memory): Likewise.
(reload_dmr_to_memory): Likewise.
* config/rs6000/rs6000-builtin.cc (rs6000_type_string): Add DMR
support.
(rs6000_init_builtins): Add support for __dmr keyword.
* config/rs6000/rs6000-call.cc (rs6000_return_in_memory): Add 
support
for TDOmode.
(rs6000_function_arg): Likewise.
* config/rs6000/rs6000-modes.def (TDOmode): New mode.
* config/rs6000/rs6000.cc (rs6000_hard_regno_nregs_internal): Add
support for TDOmode.
(rs6000_hard_regno_mode_ok_uncached): Likewise.
(rs6000_hard_regno_mode_ok): Likewise.
(rs6000_modes_tieable_p): Likewise.
(rs6000_debug_reg_global): Likewise.
(rs6000_setup_reg_addr_masks): Likewise.
(rs6000_init_hard_regno_mode_ok): Add support for TDOmode.  Setup 
reload
hooks for DMR mode.
(reg_offset_addressing_ok_p): Add support for TDOmode.
(rs6000_emit_move): Likewise.
(rs6000_secondary_reload_simple_move): Likewise.
(rs6000_preferred_reload_class): Likewise.
(rs6000_secondary_reload_class): Likewise.
(rs6000_mangle_type): Add mangling for __dmr type.
(rs6000_dmr_register_move_cost): Add support for TDOmode.
(rs6000_split_multireg_move): Likewise.
(rs6000_invalid_conversion): Likewise.
* config/rs6000/rs6000.h (VECTOR_ALIGNMENT_P): Add TDOmode.
(enum rs6000_builtin_type_index): Add DMR type nodes.
(dmr_type_node): Likewise.
(ptr_dmr_type_node): Likewise.

gcc/testsuite/

* gcc.target/powerpc/dm-1024bit.c: New test.
* lib/target-supports.exp (check_effective_target_ppc_dmr_ok): New
target test.

Diff:
---
 gcc/config/rs6000/mma.md  | 154 ++
 gcc/config/rs6000/rs6000-builtin.cc   |  17 +++
 gcc/config/rs6000/rs6000-call.cc  |  10 +-
 gcc/config/rs6000/rs6000-modes.def|   4 +
 gcc/config/rs6000/rs6000.cc   | 101 -
 gcc/config/rs6000/rs6000.h|   6 +-
 gcc/testsuite/gcc.target/powerpc/dm-1024bit.c |  63 +++
 gcc/testsuite/lib/target-supports.exp |  35 ++
 8 files changed, 356 insertions(+), 34 deletions(-)

diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md
index ae6e7e9695be..978660ea993a 100644
--- a/gcc/config/rs6000/mma.md
+++ b/gcc/config/rs6000/mma.md
@@ -92,6 +92,11 @@
UNSPEC_MMA_XXMFACC
UNSPEC_MMA_XXMTACC
UNSPEC_MMA_DMSETDMRZ
+   UNSPEC_DM_INSERT512_UPPER
+   UNSPEC_DM_INSERT512_LOWER
+   UNSPEC_DM_EXTRACT512
+   UNSPEC_DMR_RELOAD_FROM_MEMORY
+   UNSPEC_DMR_RELOAD_TO_MEMORY
   ])
 
 (define_c_enum "unspecv"
@@ -742,3 +747,152 @@
   " %A0,%x2,%x3,%4,%5,%6"
   [(set_attr "type" "mma")
(set_attr "prefixed" "yes")])
+
+;; TDOmode (__dmr keyword for 1,024 bit registers).
+(define_expand "movtdo"
+  [(set (match_operand:TDO 0 "nonimmediate_operand")
+   (match_operand:T

[gcc(refs/users/meissner/heads/work188-dmf)] RFC2653-Add wD constraint.

2024-12-02 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:c6783d9bb0b9c40da0e08a35303ddf7f186be046

commit c6783d9bb0b9c40da0e08a35303ddf7f186be046
Author: Michael Meissner 
Date:   Mon Dec 2 15:49:13 2024 -0500

RFC2653-Add wD constraint.

This patch adds a new constraint ('wD') that matches the accumulator 
registers
that overlap with VSX registers 0..31 on power10.  Future patches will add 
the
support for a separate accumulator register class that will be used when the
support for dense math registes is added.

2024-12-02   Michael Meissner  

* config/rs6000/constraints.md (wD): New constraint.
* config/rs6000/mma.md (mma_): Prepare for alternate 
accumulator
registers.  Use wD constraint instead of 'd' constraint.  Use
accumulator_operand instead of fpr_reg_operand.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
-   (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")]
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD")
+   (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0")]
MMA_ACC))]
   "TARGET_MMA"
   " %A0"
@@ -523,7 +523,7 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")]
MMA_VV))]
@@ -532,8 +532,8 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-   (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+   (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 3 "vsx_register_operand" "v,?wa")]
MMA_AVV))]
@@ -542,7 +542,7 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
(unspec:XO [(match_operand:OO 1 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")]
MMA_PV))]
@@ -551,8 +551,8 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-   (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+   (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
(match_operand:OO 2 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 3 "vsx_register_operand" "v,?wa")]
MMA_APV))]
@@ -561,7 +561,7 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
(match_operand:SI 3 "const_0_to_15_operand" "n,n")
@@ -574,8 +574,8 @@
(set_attr "prefixed" "yes")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-   (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+   (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
(match_operand:SI 4 "const_0_to_15_operand" "n,n")
@@ -588,7 +588,7 @@
(set_attr "prefixed" "yes")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
(match_operand:SI 3 "const_0_to_15_operand" "n,n")
@@ -601,8 +601,8 @@
(set_attr "prefixed" "yes")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-   (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+   (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")

[gcc(refs/users/meissner/heads/work188-dmf)] Revert changes

2024-12-02 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:0a75cc6c64552c86ce3694a1b1cde22d31dfb740

commit 0a75cc6c64552c86ce3694a1b1cde22d31dfb740
Author: Michael Meissner 
Date:   Mon Dec 2 15:37:46 2024 -0500

Revert changes

Diff:
---
 gcc/config/rs6000/constraints.md|   3 -
 gcc/config/rs6000/mma.md| 116 +--
 gcc/config/rs6000/predicates.md |  32 -
 gcc/config/rs6000/rs6000-builtin.cc |   5 +-
 gcc/config/rs6000/rs6000-c.cc   |   9 +-
 gcc/config/rs6000/rs6000.cc | 225 +++-
 gcc/config/rs6000/rs6000.h  |  44 +--
 gcc/config/rs6000/rs6000.md |   2 -
 gcc/doc/md.texi |   5 -
 9 files changed, 82 insertions(+), 359 deletions(-)

diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md
index 277a30a82458..369a7b75042d 100644
--- a/gcc/config/rs6000/constraints.md
+++ b/gcc/config/rs6000/constraints.md
@@ -107,9 +107,6 @@
(match_test "TARGET_P8_VECTOR")
(match_operand 0 "s5bit_cint_operand")))
 
-(define_register_constraint "wD" "rs6000_constraints[RS6000_CONSTRAINT_wD]"
-  "Accumulator register.")
-
 (define_constraint "wE"
   "@internal Vector constant that can be loaded with the XXSPLTIB instruction."
   (match_test "xxspltib_constant_nosplit (op, mode)"))
diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md
index ae6e7e9695be..04e2d0066df2 100644
--- a/gcc/config/rs6000/mma.md
+++ b/gcc/config/rs6000/mma.md
@@ -91,7 +91,6 @@
UNSPEC_MMA_XVI8GER4SPP
UNSPEC_MMA_XXMFACC
UNSPEC_MMA_XXMTACC
-   UNSPEC_MMA_DMSETDMRZ
   ])
 
 (define_c_enum "unspecv"
@@ -315,9 +314,7 @@
(set_attr "length" "*,*,8")])
 
 
-;; Vector quad support.  Under the original MMA, XOmode can only live in VSX
-;; registers 0..31.  With dense math, XOmode can live in either VSX registers
-;; (0..63) or DMR registers.
+;; Vector quad support.  XOmode can only live in FPRs.
 (define_expand "movxo"
   [(set (match_operand:XO 0 "nonimmediate_operand")
(match_operand:XO 1 "input_operand"))]
@@ -342,10 +339,10 @@
 gcc_assert (false);
 })
 
-(define_insn_and_split "*movxo_nodm"
+(define_insn_and_split "*movxo"
   [(set (match_operand:XO 0 "nonimmediate_operand" "=d,ZwO,d")
(match_operand:XO 1 "input_operand" "ZwO,d,d"))]
-  "TARGET_MMA_NO_DENSE_MATH
+  "TARGET_MMA
&& (gpc_reg_operand (operands[0], XOmode)
|| gpc_reg_operand (operands[1], XOmode))"
   "@
@@ -362,31 +359,6 @@
(set_attr "length" "*,*,16")
(set_attr "max_prefixed_insns" "2,2,*")])
 
-(define_insn_and_split "*movxo_dm"
-  [(set (match_operand:XO 0 "nonimmediate_operand" "=wa,ZwO,wa,wD,wD,wa")
-   (match_operand:XO 1 "input_operand""ZwO,wa, wa,wa,wD,wD"))]
-  "TARGET_MMA_DENSE_MATH
-   && (gpc_reg_operand (operands[0], XOmode)
-   || gpc_reg_operand (operands[1], XOmode))"
-  "@
-   #
-   #
-   #
-   dmxxinstdmr512 %0,%1,%Y1,0
-   dmmr %0,%1
-   dmxxextfdmr512 %0,%Y0,%1,0"
-  "&& reload_completed
-   && !dmr_operand (operands[0], XOmode)
-   && !dmr_operand (operands[1], XOmode)"
-  [(const_int 0)]
-{
-  rs6000_split_multireg_move (operands[0], operands[1]);
-  DONE;
-}
-  [(set_attr "type" "vecload,vecstore,veclogical,mma,mma,mma")
-   (set_attr "length" "*,*,16,*,*,*")
-   (set_attr "max_prefixed_insns" "2,2,*,*,*,*")])
-
 (define_expand "vsx_assemble_pair"
   [(match_operand:OO 0 "vsx_register_operand")
(match_operand:V16QI 1 "mma_assemble_input_operand")
@@ -527,55 +499,31 @@
   DONE;
 })
 
-;; MMA instructions that do not use their accumulators as an input, still must
-;; not allow their vector operands to overlap the registers used by the
-;; accumulator.  We enforce this by marking the output as early clobber.  The
-;; prime and de-prime instructions are not needed on systems with dense math
-;; registers.
+;; MMA instructions that do not use their accumulators as an input, still
+;; must not allow their vector operands to overlap the registers used by
+;; the accumulator.  We enforce this by marking the output as early clobber.
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "accumulator_operand" "=&wD")
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")]
MMA_ACC))]
-  "TARGET_MMA_NO_DENSE_MATH"
+  "TARGET_MMA"
   " %A0"
   [(set_attr "type" "mma")])
 
 ;; We can't have integer constants in XOmode so we wrap this in an
-;; UNSPEC_VOLATILE.  If we have dense math registers, we can just use a normal
-;; UNSPEC instead of UNSPEC_VOLATILE.
-
-(define_expand "mma_xxsetaccz"
-  [(set (match_operand:XO 0 "accumulator_operand")
-   (unspec_volatile:XO [(const_int 0)]
-   UNSPECV_MMA_XXSETACCZ))]
-  "TARGET_MMA"
-{
-  if (TARGET_DENSE_MATH)
-{
-  emit_insn (gen_mma_dmsetdmrz (operands[0]));
-  DONE;
-}
-})
+;; UNSPEC_VOLATILE.
 
-(define_insn "*mma_xxsetaccz"
+(define_insn "mma_xxsetaccz"
   [(set (match_operand:XO 0 "fpr_reg_operand" "=d"

[gcc(refs/users/meissner/heads/work188-dmf)] RFC2653-Add wD constraint.

2024-12-02 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:44857eb8bab4114bba7d294f796b0fa57c42a85a

commit 44857eb8bab4114bba7d294f796b0fa57c42a85a
Author: Michael Meissner 
Date:   Mon Dec 2 15:35:57 2024 -0500

RFC2653-Add wD constraint.

This patch adds a new constraint ('wD') that matches the accumulator 
registers
that overlap with VSX registers 0..31 on power10.  Future patches will add 
the
support for a separate accumulator register class that will be used when the
support for dense math registes is added.

2024-12-02   Michael Meissner  

* config/rs6000/constraints.md (wD): New constraint.
* config/rs6000/mma.md (mma_): Prepare for alternate 
accumulator
registers.  Use wD constraint instead of 'd' constraint.  Use
accumulator_operand instead of fpr_reg_operand.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_"
   [(set (match_operand:XO 0 "accumulator_operand" "=&wD")
-   (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0")]
+   (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")]
MMA_ACC))]
-  "TARGET_MMA"
+  "TARGET_MMA_NO_DENSE_MATH"
   " %A0"
   [(set_attr "type" "mma")])
 
 ;; We can't have integer constants in XOmode so we wrap this in an
-;; UNSPEC_VOLATILE.
+;; UNSPEC_VOLATILE.  If we have dense math registers, we can just use a normal
+;; UNSPEC instead of UNSPEC_VOLATILE.
 
-(define_insn "mma_xxsetaccz"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=d")
+(define_expand "mma_xxsetaccz"
+  [(set (match_operand:XO 0 "accumulator_operand")
(unspec_volatile:XO [(const_int 0)]
UNSPECV_MMA_XXSETACCZ))]
   "TARGET_MMA"
+{
+  if (TARGET_DENSE_MATH)
+{
+  emit_insn (gen_mma_dmsetdmrz (operands[0]));
+  DONE;
+}
+})
+
+(define_insn "*mma_xxsetaccz"
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=d")
+   (unspec_volatile:XO [(const_int 0)]
+   UNSPECV_MMA_XXSETACCZ))]
+  "TARGET_MMA_NO_DENSE_MATH"
   "xxsetaccz %A0"
   [(set_attr "type" "mma")])
 
+(define_insn "mma_dmsetdmrz"
+  [(set (match_operand:XO 0 "accumulator_operand" "=wD")
+   (unspec [(const_int 0)]
+   UNSPEC_MMA_DMSETDMRZ))]
+  "TARGET_MMA_DENSE_MATH"
+  "dmsetdmrz %A0"
+  [(set_attr "type" "mma")])
+
 (define_insn "mma_"
   [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index 1827647b7c1e..2797c3cf619b 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -186,8 +186,23 @@
   return VLOGICAL_REGNO_P (REGNO (op));
 })
 
+;; Return 1 if op is a DMR register
+(define_predicate "dmr_operand"
+  (match_operand 0 "register_operand")
+{
+  if (!REG_P (op))
+return 0;
+
+  if (!HARD_REGISTER_P (op))
+return 1;
+
+  return DMR_REGNO_P (REGNO (op));
+})
+
 ;; Return 1 if op is an accumulator.  On power10 systems, the accumulators
-;; overlap with the FPRs.
+;; overlap with the FPRs, while on systems with dense math, the accumulators
+;; are separate dense math registers and do not overlap with the FPR
+;; registers..
 (define_predicate "accumulator_operand"
   (match_operand 0 "register_operand")
 {
@@ -198,7 +213,9 @@
 return 1;
 
   int r = REGNO (op);
-  return FP_REGNO_P (r) && (r & 3) == 0;
+  return (TARGET_MMA_DENSE_MATH
+ ? DMR_REGNO_P (r)
+ : FP_REGNO_P (r) && (r & 3) == 0);
 })
 
 ;; Return 1 if op is the carry register.
diff --git a/gcc/config/rs6000/rs6000-builtin.cc 
b/gcc/config/rs6000/rs6000-builtin.cc
index b6093b3cb64c..f2063edd2c39 100644
--- a/gcc/config/rs6000/rs6000-builtin.cc
+++ b/gcc/config/rs6000/rs6000-builtin.cc
@@ -1125,8 +1125,9 @@ rs6000_gimple_fold_mma_builtin (gimple_stmt_iterator *gsi,
}
 
   /* If we're disassembling an accumulator into a different type, we need
-to emit a xxmfacc instruction now, since we cannot do it later.  */
-  if (fncode == RS6000_BIF_DISASSEMBLE_ACC)
+to emit a xxmfacc instruction now, since we cannot do it later.  If we
+have dense math registers, we don't need to do this.  */
+  if (fncode == RS6000_BIF_DISASSEMBLE_ACC && !TARGET_DENSE_MATH)
{
  new_decl = rs6000_builtin_decls[RS6000_BIF_XXMFACC_INTERNAL];
  new_call = gimple_build_call (new_decl, 1, src);
diff --git a/gcc/config/rs6000/rs6000-c.cc b/gcc/config/rs6000/rs6000-c.cc
index 6863e34fa705..cb832e1faf81 100644
--- a/gcc/config/rs6000/rs6000-c.cc
+++ b/gcc/config/rs6000/rs6000-c.cc
@@ -590,9 +590,14 @@ rs6000_target_modify_macros (bo

[gcc(refs/users/aoliva/heads/testme)] ifcombine: simplify and check for build error

2024-12-02 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:a1d17cc43d776ab661baa33420c7320a82d61a4b

commit a1d17cc43d776ab661baa33420c7320a82d61a4b
Author: Alexandre Oliva 
Date:   Sun Dec 1 08:18:05 2024 -0300

ifcombine: simplify and check for build error

Diff:
---
 gcc/gimple-fold.cc| 151 --
 gcc/testsuite/gcc.dg/field-merge-12.c |  33 
 gcc/testsuite/gcc.dg/field-merge-9.c  |   6 +-
 3 files changed, 107 insertions(+), 83 deletions(-)

diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc
index 149df985bee4..2c33cdfb1b29 100644
--- a/gcc/gimple-fold.cc
+++ b/gcc/gimple-fold.cc
@@ -7446,9 +7446,6 @@ maybe_fold_comparisons_from_match_pd (tree type, enum 
tree_code code,
*PBITSIZE is set to the number of bits in the reference, *PBITPOS is
set to the starting bit number.
 
-   If the innermost field can be completely contained in a mode-sized
-   unit, *PMODE is set to that mode.  Otherwise, it is set to VOIDmode.
-
*PVOLATILEP is set to 1 if the any expression encountered is volatile;
otherwise it is not changed.
 
@@ -7456,10 +7453,8 @@ maybe_fold_comparisons_from_match_pd (tree type, enum 
tree_code code,
 
*PREVERSEP is set to the storage order of the field.
 
-   *PMASK is set to the mask used.  This is either contained in a
-   BIT_AND_EXPR or derived from the width of the field.
-
*PAND_MASK is set to the mask found in a BIT_AND_EXPR, if any.
+   If PAND_MASK *is NULL, BIT_AND_EXPR is not recognized.
 
*XOR_P is to be FALSE if EXP might be a XOR used in a compare, in which
case, if XOR_CMP_OP is a zero constant, it will be overridden with *PEXP,
@@ -7478,10 +7473,9 @@ maybe_fold_comparisons_from_match_pd (tree type, enum 
tree_code code,
 
 static tree
 decode_field_reference (tree *pexp, HOST_WIDE_INT *pbitsize,
-   HOST_WIDE_INT *pbitpos, machine_mode *pmode,
+   HOST_WIDE_INT *pbitpos,
bool *punsignedp, bool *preversep, bool *pvolatilep,
-   wide_int *pmask, wide_int *pand_mask,
-   bool *xor_p, tree *xor_cmp_op,
+   wide_int *pand_mask, bool *xor_p, tree *xor_cmp_op,
gimple **load, location_t loc[4])
 {
   /* These are from match.pd.  */
@@ -7494,10 +7488,9 @@ decode_field_reference (tree *pexp, HOST_WIDE_INT 
*pbitsize,
   tree outer_type = 0;
   wide_int and_mask;
   tree inner, offset;
-  unsigned int precision;
   int shiftrt = 0;
-  wide_int mask;
   tree res_ops[2];
+  machine_mode mode;
 
   *load = NULL;
 
@@ -7522,7 +7515,7 @@ decode_field_reference (tree *pexp, HOST_WIDE_INT 
*pbitsize,
 }
 
   /* Recognize and save a masking operation.  */
-  if (gimple_bit_and_cst (exp, res_ops, follow_all_ssa_edges))
+  if (pand_mask && gimple_bit_and_cst (exp, res_ops, follow_all_ssa_edges))
 {
   loc[1] = gimple_location (SSA_NAME_DEF_STMT (exp));
   exp = res_ops[0];
@@ -7600,11 +7593,10 @@ decode_field_reference (tree *pexp, HOST_WIDE_INT 
*pbitsize,
   poly_int64 poly_bitsize, poly_bitpos;
   int unsignedp, reversep = *preversep, volatilep = *pvolatilep;
   inner = get_inner_reference (exp, &poly_bitsize, &poly_bitpos, &offset,
-  pmode, &unsignedp, &reversep, &volatilep);
+  &mode, &unsignedp, &reversep, &volatilep);
 
   HOST_WIDE_INT bs, bp;
-  if ((inner == exp && !and_mask.get_precision ())
-  || !poly_bitsize.is_constant (&bs)
+  if (!poly_bitsize.is_constant (&bs)
   || !poly_bitpos.is_constant (&bp)
   || bs <= shiftrt
   || offset != 0
@@ -7646,17 +7638,13 @@ decode_field_reference (tree *pexp, HOST_WIDE_INT 
*pbitsize,
   if (outer_type && *pbitsize == TYPE_PRECISION (outer_type))
 *punsignedp = TYPE_UNSIGNED (outer_type);
 
-  /* Compute the mask to access the bitfield.  */
-  precision = *pbitsize;
-
-  mask = wi::mask (*pbitsize, false, precision);
-
-  /* Merge it with the mask we found in the BIT_AND_EXPR, if any.  */
+  /* Make the mask the expected width.  */
   if (and_mask.get_precision () != 0)
-mask &= wide_int::from (and_mask, precision, UNSIGNED);
+and_mask = wide_int::from (and_mask, *pbitsize, UNSIGNED);
+
+  if (pand_mask)
+*pand_mask = and_mask;
 
-  *pmask = mask;
-  *pand_mask = and_mask;
   return inner;
 }
 
@@ -7913,9 +7901,7 @@ fold_truth_andor_for_ifcombine (enum tree_code code, tree 
truth_type,
   HOST_WIDE_INT rnbitsize, rnbitpos, rnprec;
   bool ll_unsignedp, lr_unsignedp, rl_unsignedp, rr_unsignedp;
   bool ll_reversep, lr_reversep, rl_reversep, rr_reversep;
-  machine_mode ll_mode, lr_mode, rl_mode, rr_mode;
   scalar_int_mode lnmode, lnmode2, rnmode;
-  wide_int ll_mask, lr_mask, rl_mask, rr_mask;
   wide_int ll_and_mask, lr_and_mask, rl_and_mask, rr_and_mask;
   wide_int l_const, r_const;
   tree lntype, rntype, result;
@@ -7987,25 +7973,21 @@ fold_truth_andor_for_ifcombine (enum tree_code code, 
tree truth_type,
   ll_reversep = lr_reversep = rl

[gcc r15-5867] c++: some further concepts cleanups

2024-12-02 Thread Patrick Palka via Gcc-cvs
https://gcc.gnu.org/g:73e7f63ffaacf018b1fad331d2369bf891620e97

commit r15-5867-g73e7f63ffaacf018b1fad331d2369bf891620e97
Author: Patrick Palka 
Date:   Mon Dec 2 10:58:50 2024 -0500

c++: some further concepts cleanups

This patch further cleans up the concepts code following the removal of
Concepts TS support:

  * concept-ids are now the only kind of "concept check", so we can
simplify some code accordingly.  In particular resolve_concept_check
seems like a no-op and can be removed.
  * In turn, deduce_constrained_parameter doesn't seem to do anything
interesting.
  * In light of the above we might as well inline finish_type_constraints
into its only caller.
  * Introduce and use a helper for obtaining the prototype parameter of
a concept, i.e. its first template parameter.
  * placeholder_extract_concept_and_args is only ever called on a
concept-id, so it's simpler to inline it into its callers.
  * There's no such thing as a template-template-parameter with a
type-constraint, so we can remove such handling from the parser.
This means is_constrained_parameter is currently equivalent to
declares_constrained_type_template_parameter, so let's prefer
to use the latter.
  * Remove WILDCARD_DECL and instead use the concept's prototype parameter
as the dummy first argument of a type-constraint during template
argument coercion.
  * Remove a redundant concept_definition_p overload.

gcc/cp/ChangeLog:

* constraint.cc (resolve_concept_check): Remove.
(deduce_constrained_parameter): Remove.
(finish_type_constraints): Inline into its only caller
cp_parser_placeholder_type_specifier and remove.
(build_concept_check_arguments): Coding style tweaks.
(build_standard_check): Inline into its only caller ...
(build_concept_check): ... here.
(build_type_constraint): Use the prototype parameter as the
first template argument.
(finish_shorthand_constraint): Remove function concept
handling.  Use concept_prototype_parameter.
(placeholder_extract_concept_and_args): Inline into its
callers and remove.
(equivalent_placeholder_constraints): Adjust after
placeholder_extract_concept_and_args removal.
(iterative_hash_placeholder_constraint): Likewise.
* cp-objcp-common.cc (cp_common_init_ts): Remove WILDCARD_DECL
handling.
* cp-tree.def (WILDCARD_DECL): Remove.
* cp-tree.h (WILDCARD_PACK_P): Remove.
(type_uses_auto_or_concept): Remove declaration of nonexistent
function.
(append_type_to_template_for_access_check): Likewise.
(finish_type_constraints): Remove declaration.
(placeholder_extract_concept_and_args): Remove declaration.
(deduce_constrained_parameter): Remove declaration.
(resolve_constraint_check): Remove declaration.
(valid_requirements_p): Remove declaration of nonexistent
function.
(finish_concept_name): Likewise.
(concept_definition_p): Remove redundant overload.
(concept_prototype_parameter): Define.
* cxx-pretty-print.cc (pp_cxx_constrained_type_spec): Adjust
after placeholder_extract_concept_and_args.
* error.cc (dump_decl) : Remove.
(dump_expr) : Likewise.
* parser.cc (is_constrained_parameter): Inline into
declares_constrained_type_template_parameter and remove.
(cp_parser_check_constrained_type_parm): Declare static.
(finish_constrained_template_template_parm): Remove.
(cp_parser_constrained_template_template_parm): Remove.
(finish_constrained_parameter): Remove dead code guarded by
cp_parser_constrained_template_template_parm.
(declares_constrained_type_template_parameter): Adjust after
is_constrained_parameter removal.
(declares_constrained_template_template_parameter): Remove.
(cp_parser_placeholder_type_specifier): Adjust after
finish_type_constraints removal.  Check the prototype parameter
earlier, before build_type_constraint.
Use concept_prototype_parameter.
(cp_parser_parameter_declaration): Remove dead code guarded by
declares_constrained_template_template_parameter.
* pt.cc (convert_wildcard_argument): Remove.
(convert_template_argument): Remove WILDCARD_DECL handling.
(coerce_template_parameter_pack): Likewise.
(tsubst) : Likewise.
(type_dependent_expression_p): Likewise.
(make_constrained_placeholder_type): Remove function concept
handling.

[gcc(refs/users/meissner/heads/work188-dmf)] RFC2653-Add support for dense math registers.

2024-12-02 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:85c5e5f7a026c2faac5923678ff9a32fdb18af55

commit 85c5e5f7a026c2faac5923678ff9a32fdb18af55
Author: Michael Meissner 
Date:   Mon Dec 2 15:39:23 2024 -0500

RFC2653-Add support for dense math registers.

The MMA subsystem added the notion of accumulator registers as an optional
feature of ISA 3.1 (power10).  In ISA 3.1, these accumulators overlapped 
with
the VSX registers 0..31, but logically the accumulator registers were 
separate
from the FPR registers.  In ISA 3.1, it was anticipated that in future 
systems,
the accumulator registers may no overlap with the FPR registers.  This patch
adds the support for dense math registers as separate registers.

This particular patch does not change the MMA support to use the 
accumulators
within the dense math registers.  This patch just adds the basic support for
having separate DMRs.  The next patch will switch the MMA support to use the
accumulators if -mcpu=future is used.

For testing purposes, I added an undocumented option '-mdense-math' to 
enable
or disable the dense math support.

This patch updates the wD constraint added in the previous patch.  If MMA is
selected but dense math is not selected (i.e. -mcpu=power10), the wD 
constraint
will allow access to accumulators that overlap with VSX registers 0..31.  If
both MMA and dense math are selected (i.e. -mcpu=future), the wD constraint
will only allow dense math registers.

This patch modifies the existing %A output modifier.  If MMA is selected but
dense math is not selected, then %A output modifier converts the VSX 
register
number to the accumulator number, by dividing it by 4.  If both MMA and 
dense
math are selected, then %A will map the separate DMR registers into 0..7.

The intention is that user code using extended asm can be modified to run on
both MMA without dense math and MMA with dense math:

1)  If possible, don't use extended asm, but instead use the MMA 
built-in
functions;

2)  If you do need to write extended asm, change the d constraints
targetting accumulators should now use wD;

3)  Only use the built-in zero, assemble and disassemble functions 
create
move data between vector quad types and dense math accumulators.
I.e. do not use the xxmfacc, xxmtacc, and xxsetaccz directly in the
extended asm code.  The reason is these instructions assume there 
is a
1-to-1 correspondence between 4 adjacent FPR registers and an
accumulator that overlaps with those instructions.  With 
accumulators
now being separate registers, there no longer is a 1-to-1
correspondence.

It is possible that the mangling for DMRs and the GDB register numbers may
produce other changes in the future.

gcc/

2024-12-02   Michael Meissner  

* config/rs6000/mma.md (UNSPEC_MMA_DMSETDMRZ): New unspec.
(movxo): Add comments about dense math registers.
(movxo_nodm): Rename from movxo and restrict the usage to machines
without dense math registers.
(movxo_dm): New insn for movxo support for machines with dense math
registers.
(mma_): Restrict usage to machines without dense math 
registers.
(mma_xxsetaccz): Add a define_expand wrapper, and add support for 
dense
math registers.
(mma_dmsetaccz): New insn.
* config/rs6000/predicates.md (dmr_operand): New predicate.
(accumulator_operand): Add support for dense math registers.
* config/rs6000/rs6000-builtin.cc (rs6000_gimple_fold_mma_builtin): 
Do
not issue a de-prime instruction when disassembling a vector quad 
on a
system with dense math registers.
* config/rs6000/rs6000-c.cc (rs6000_define_or_undefine_macro): 
Define
__DENSE_MATH__ if we have dense math registers.
* config/rs6000/rs6000.cc (enum rs6000_reg_type): Add DMR_REG_TYPE.
(enum rs6000_reload_reg_type): Add RELOAD_REG_DMR.
(LAST_RELOAD_REG_CLASS): Add support for DMR registers and the wD
constraint.
(reload_reg_map): Likewise.
(rs6000_reg_names): Likewise.
(alt_reg_names): Likewise.
(rs6000_hard_regno_nregs_internal): Likewise.
(rs6000_hard_regno_mode_ok_uncached): Likewise.
(rs6000_debug_reg_global): Likewise.
(rs6000_setup_reg_addr_masks): Likewise.
(rs6000_init_hard_regno_mode_ok): Likewise.
(rs6000_secondary_reload_memory): Add support for DMR registers.
(rs6000_secondary_reload_simple_move): Likewise.
(rs6000_preferred_reload_class): Likewise.
(rs6000_secondary_reload_class): Likewise.
(print_operand

[gcc(refs/users/meissner/heads/work188-dmf)] RFC2653-PowerPC: Add support for 1, 024 bit DMR registers.

2024-12-02 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:8785c29758ba110950c22124d420faaeb2c0e373

commit 8785c29758ba110950c22124d420faaeb2c0e373
Author: Michael Meissner 
Date:   Mon Dec 2 15:43:13 2024 -0500

RFC2653-PowerPC: Add support for 1,024 bit DMR registers.

This patch is a prelimianry patch to add the full 1,024 bit dense math 
register
(DMRs) for -mcpu=future.  The MMA 512-bit accumulators map onto the top of 
the
DMR register.

This patch only adds the new 1,024 bit register support.  It does not add
support for any instructions that need 1,024 bit registers instead of 512 
bit
registers.

I used the new mode 'TDOmode' to be the opaque mode used for 1,024 bit
registers.  The 'wD' constraint added in previous patches is used for these
registers.  I added support to do load and store of DMRs via the VSX 
registers,
since there are no load/store dense math instructions.  I added the new 
keyword
'__dmr' to create 1,024 bit types that can be loaded into DMRs.  At 
present, I
don't have aliases for __dmr512 and __dmr1024 that we've discussed 
internally.

The patches have been tested on both little and big endian systems.  Can I 
check
it into the master branch?

2024-12-02   Michael Meissner  

gcc/

* config/rs6000/mma.md (UNSPEC_DM_INSERT512_UPPER): New unspec.
(UNSPEC_DM_INSERT512_LOWER): Likewise.
(UNSPEC_DM_EXTRACT512): Likewise.
(UNSPEC_DMR_RELOAD_FROM_MEMORY): Likewise.
(UNSPEC_DMR_RELOAD_TO_MEMORY): Likewise.
(movtdo): New define_expand and define_insn_and_split to implement 
1,024
bit DMR registers.
(movtdo_insert512_upper): New insn.
(movtdo_insert512_lower): Likewise.
(movtdo_extract512): Likewise.
(reload_dmr_from_memory): Likewise.
(reload_dmr_to_memory): Likewise.
* config/rs6000/rs6000-builtin.cc (rs6000_type_string): Add DMR
support.
(rs6000_init_builtins): Add support for __dmr keyword.
* config/rs6000/rs6000-call.cc (rs6000_return_in_memory): Add 
support
for TDOmode.
(rs6000_function_arg): Likewise.
* config/rs6000/rs6000-modes.def (TDOmode): New mode.
* config/rs6000/rs6000.cc (rs6000_hard_regno_nregs_internal): Add
support for TDOmode.
(rs6000_hard_regno_mode_ok_uncached): Likewise.
(rs6000_hard_regno_mode_ok): Likewise.
(rs6000_modes_tieable_p): Likewise.
(rs6000_debug_reg_global): Likewise.
(rs6000_setup_reg_addr_masks): Likewise.
(rs6000_init_hard_regno_mode_ok): Add support for TDOmode.  Setup 
reload
hooks for DMR mode.
(reg_offset_addressing_ok_p): Add support for TDOmode.
(rs6000_emit_move): Likewise.
(rs6000_secondary_reload_simple_move): Likewise.
(rs6000_preferred_reload_class): Likewise.
(rs6000_secondary_reload_class): Likewise.
(rs6000_mangle_type): Add mangling for __dmr type.
(rs6000_dmr_register_move_cost): Add support for TDOmode.
(rs6000_split_multireg_move): Likewise.
(rs6000_invalid_conversion): Likewise.
* config/rs6000/rs6000.h (VECTOR_ALIGNMENT_P): Add TDOmode.
(enum rs6000_builtin_type_index): Add DMR type nodes.
(dmr_type_node): Likewise.
(ptr_dmr_type_node): Likewise.

gcc/testsuite/

* gcc.target/powerpc/dm-1024bit.c: New test.

Diff:
---
 gcc/config/rs6000/mma.md  | 154 ++
 gcc/config/rs6000/rs6000-builtin.cc   |  17 +++
 gcc/config/rs6000/rs6000-call.cc  |  10 +-
 gcc/config/rs6000/rs6000-modes.def|   4 +
 gcc/config/rs6000/rs6000.cc   | 101 -
 gcc/config/rs6000/rs6000.h|   6 +-
 gcc/testsuite/gcc.target/powerpc/dm-1024bit.c |  63 +++
 7 files changed, 321 insertions(+), 34 deletions(-)

diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md
index 2e04eb653fa6..8461499e1c3d 100644
--- a/gcc/config/rs6000/mma.md
+++ b/gcc/config/rs6000/mma.md
@@ -92,6 +92,11 @@
UNSPEC_MMA_XXMFACC
UNSPEC_MMA_XXMTACC
UNSPEC_MMA_DMSETDMRZ
+   UNSPEC_DM_INSERT512_UPPER
+   UNSPEC_DM_INSERT512_LOWER
+   UNSPEC_DM_EXTRACT512
+   UNSPEC_DMR_RELOAD_FROM_MEMORY
+   UNSPEC_DMR_RELOAD_TO_MEMORY
   ])
 
 (define_c_enum "unspecv"
@@ -793,3 +798,152 @@
 }
   [(set_attr "type" "mma")
(set_attr "prefixed" "yes")])
+
+;; TDOmode (__dmr keyword for 1,024 bit registers).
+(define_expand "movtdo"
+  [(set (match_operand:TDO 0 "nonimmediate_operand")
+   (match_operand:TDO 1 "input_operand"))]
+  "TARGET_MMA_DENSE_MATH"
+{
+  rs6000_emit_move (operands[0], operands[1], TDOmode);
+  DONE;
+})
+
+(define_insn_and_split "*movtdo"
+  [(set (match_operand:TDO 

[gcc(refs/users/meissner/heads/work188-dmf)] RFC2653-PowerPC: Switch to dense math names for all MMA operations.

2024-12-02 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:57c2305f2ea7e631e2aad02f28606bbac2b7dd75

commit 57c2305f2ea7e631e2aad02f28606bbac2b7dd75
Author: Michael Meissner 
Date:   Mon Dec 2 15:40:34 2024 -0500

RFC2653-PowerPC: Switch to dense math names for all MMA operations.

This patch changes the assembler instruction names for MMA instructions from
the original name used in power10 to the new name when used with the dense 
math
system.  I.e. xvf64gerpp becomes dmxvf64gerpp.  The assembler will emit the
same bits for either spelling.

For the non-prefixed MMA instructions, we add a 'dm' prefix in front of the
instruction.  However, the prefixed instructions have a 'pm' prefix, and we 
add
the 'dm' prefix afterwards.  To prevent having two sets of parallel int
attributes, we remove the "pm" prefix from the instruction string in the
attributes, and add it later, both in the insn name and in the output 
template.

2024-12-02   Michael Meissner  

gcc/

* config/rs6000/mma.md (vvi4i4i8): Change the instruction to not 
have a
"pm" prefix.
(avvi4i4i8): Likewise.
(vvi4i4i2): Likewise.
(avvi4i4i2): Likewise.
(vvi4i4): Likewise.
(avvi4i4): Likewise.
(pvi4i2): Likewise.
(apvi4i2): Likewise.
(vvi4i4i4): Likewise.
(avvi4i4i4): Likewise.
(mma_): Add support for running on DMF systems, generating the 
dense
math instruction and using the dense math accumulators.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_pm): Add support for running on DMF systems, 
generating
the dense math instruction and using the dense math accumulators.
Rename the insn with a 'pm' prefix and add either 'pm' or 'pmdm'
prefixes based on whether we have the original MMA specification or 
if
we have dense math support.
(mma_pm): Likewise.
(mma_pm): Likewise.
(mma_pm): Likewise.
(mma_pm): Likewise.
(mma_pm): Likewise.
(mma_pm): Likewise.
(mma_pm): Likewise.

Diff:
---
 gcc/config/rs6000/mma.md | 157 +++
 1 file changed, 104 insertions(+), 53 deletions(-)

diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md
index ae6e7e9695be..2e04eb653fa6 100644
--- a/gcc/config/rs6000/mma.md
+++ b/gcc/config/rs6000/mma.md
@@ -225,44 +225,47 @@
 (UNSPEC_MMA_XVF64GERNP "xvf64gernp")
 (UNSPEC_MMA_XVF64GERNN "xvf64gernn")])
 
-(define_int_attr vvi4i4i8  [(UNSPEC_MMA_PMXVI4GER8 "pmxvi4ger8")])
+;; The "pm" prefix is not in these expansions, so that we can generate
+;; pmdmxvi4ger8 on systems with dense math registers and xvi4ger8 on systems
+;; without dense math registers.
+(define_int_attr vvi4i4i8  [(UNSPEC_MMA_PMXVI4GER8 "xvi4ger8")])
 
-(define_int_attr avvi4i4i8 [(UNSPEC_MMA_PMXVI4GER8PP   
"pmxvi4ger8pp")])
+(define_int_attr avvi4i4i8 [(UNSPEC_MMA_PMXVI4GER8PP   "xvi4ger8pp")])
 
-(define_int_attr vvi4i4i2  [(UNSPEC_MMA_PMXVI16GER2"pmxvi16ger2")
-(UNSPEC_MMA_PMXVI16GER2S   "pmxvi16ger2s")
-(UNSPEC_MMA_PMXVF16GER2"pmxvf16ger2")
-(UNSPEC_MMA_PMXVBF16GER2   
"pmxvbf16ger2")])
+(define_int_attr vvi4i4i2  [(UNSPEC_MMA_PMXVI16GER2"xvi16ger2")
+(UNSPEC_MMA_PMXVI16GER2S   "xvi16ger2s")
+(UNSPEC_MMA_PMXVF16GER2"xvf16ger2")
+(UNSPEC_MMA_PMXVBF16GER2   "xvbf16ger2")])
 
-(define_int_attr avvi4i4i2 [(UNSPEC_MMA_PMXVI16GER2PP  "pmxvi16ger2pp")
-(UNSPEC_MMA_PMXVI16GER2SPP 
"pmxvi16ger2spp")
-(UNSPEC_MMA_PMXVF16GER2PP  "pmxvf16ger2pp")
-(UNSPEC_MMA_PMXVF16GER2PN  "pmxvf16ger2pn")
-(UNSPEC_MMA_PMXVF16GER2NP  "pmxvf16ger2np")
-(UNSPEC_MMA_PMXVF16GER2NN  "pmxvf16ger2nn")
-(UNSPEC_MMA_PMXVBF16GER2PP 
"pmxvbf16ger2pp")
-(UNSPEC_MMA_PMXVBF16GER2PN 
"pmxvbf16ger2pn")
-(UNSPEC_MMA_PMXVBF16GER2NP 
"pmxvbf16ger2np")
-(UNSPEC_MMA_PMXVBF16GER2NN 
"pmxvbf16ger2nn")])
+(define_int_attr avvi4i4i2 [(UNSPEC_MMA_PMXVI16GER2PP  "xvi16ger2pp")
+(UNSPEC_MMA_PMXVI16GER2SPP "xvi16ger2spp")
+(UNSPEC_MMA_PMXVF16GER2PP  "xvf16ger2pp")
+(UNSPEC_MMA_PMXVF16GER2PN  "xvf1

[gcc(refs/users/meissner/heads/work188-dmf)] Update ChangeLog.*

2024-12-02 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:5b7555fb5ade47de48c64f0fa3eb1eccdf486585

commit 5b7555fb5ade47de48c64f0fa3eb1eccdf486585
Author: Michael Meissner 
Date:   Mon Dec 2 15:55:33 2024 -0500

Update ChangeLog.*

Diff:
---
 gcc/ChangeLog.dmf | 240 ++
 1 file changed, 240 insertions(+)

diff --git a/gcc/ChangeLog.dmf b/gcc/ChangeLog.dmf
index a606d503af09..5716c95fc0eb 100644
--- a/gcc/ChangeLog.dmf
+++ b/gcc/ChangeLog.dmf
@@ -1,5 +1,245 @@
+ Branch work188-dmf, patch #11 
+
+RFC2653-PowerPC: Add support for 1,024 bit DMR registers.
+
+This patch is a prelimianry patch to add the full 1,024 bit dense math register
+(DMRs) for -mcpu=future.  The MMA 512-bit accumulators map onto the top of the
+DMR register.
+
+This patch only adds the new 1,024 bit register support.  It does not add
+support for any instructions that need 1,024 bit registers instead of 512 bit
+registers.
+
+I used the new mode 'TDOmode' to be the opaque mode used for 1,024 bit
+registers.  The 'wD' constraint added in previous patches is used for these
+registers.  I added support to do load and store of DMRs via the VSX registers,
+since there are no load/store dense math instructions.  I added the new keyword
+'__dmr' to create 1,024 bit types that can be loaded into DMRs.  At present, I
+don't have aliases for __dmr512 and __dmr1024 that we've discussed internally.
+
+The patches have been tested on both little and big endian systems.  Can I 
check
+it into the master branch?
+
+2024-12-02   Michael Meissner  
+
+gcc/
+
+   * config/rs6000/mma.md (UNSPEC_DM_INSERT512_UPPER): New unspec.
+   (UNSPEC_DM_INSERT512_LOWER): Likewise.
+   (UNSPEC_DM_EXTRACT512): Likewise.
+   (UNSPEC_DMR_RELOAD_FROM_MEMORY): Likewise.
+   (UNSPEC_DMR_RELOAD_TO_MEMORY): Likewise.
+   (movtdo): New define_expand and define_insn_and_split to implement 1,024
+   bit DMR registers.
+   (movtdo_insert512_upper): New insn.
+   (movtdo_insert512_lower): Likewise.
+   (movtdo_extract512): Likewise.
+   (reload_dmr_from_memory): Likewise.
+   (reload_dmr_to_memory): Likewise.
+   * config/rs6000/rs6000-builtin.cc (rs6000_type_string): Add DMR
+   support.
+   (rs6000_init_builtins): Add support for __dmr keyword.
+   * config/rs6000/rs6000-call.cc (rs6000_return_in_memory): Add support
+   for TDOmode.
+   (rs6000_function_arg): Likewise.
+   * config/rs6000/rs6000-modes.def (TDOmode): New mode.
+   * config/rs6000/rs6000.cc (rs6000_hard_regno_nregs_internal): Add
+   support for TDOmode.
+   (rs6000_hard_regno_mode_ok_uncached): Likewise.
+   (rs6000_hard_regno_mode_ok): Likewise.
+   (rs6000_modes_tieable_p): Likewise.
+   (rs6000_debug_reg_global): Likewise.
+   (rs6000_setup_reg_addr_masks): Likewise.
+   (rs6000_init_hard_regno_mode_ok): Add support for TDOmode.  Setup reload
+   hooks for DMR mode.
+   (reg_offset_addressing_ok_p): Add support for TDOmode.
+   (rs6000_emit_move): Likewise.
+   (rs6000_secondary_reload_simple_move): Likewise.
+   (rs6000_preferred_reload_class): Likewise.
+   (rs6000_secondary_reload_class): Likewise.
+   (rs6000_mangle_type): Add mangling for __dmr type.
+   (rs6000_dmr_register_move_cost): Add support for TDOmode.
+   (rs6000_split_multireg_move): Likewise.
+   (rs6000_invalid_conversion): Likewise.
+   * config/rs6000/rs6000.h (VECTOR_ALIGNMENT_P): Add TDOmode.
+   (enum rs6000_builtin_type_index): Add DMR type nodes.
+   (dmr_type_node): Likewise.
+   (ptr_dmr_type_node): Likewise.
+
+gcc/testsuite/
+
+   * gcc.target/powerpc/dm-1024bit.c: New test.
+   * lib/target-supports.exp (check_effective_target_ppc_dmr_ok): New
+   target test.
+
+ Branch work188-dmf, patch #111 
+
+RFC2653-Add support for dense math registers.
+
+The MMA subsystem added the notion of accumulator registers as an optional
+feature of ISA 3.1 (power10).  In ISA 3.1, these accumulators overlapped with
+the VSX registers 0..31, but logically the accumulator registers were separate
+from the FPR registers.  In ISA 3.1, it was anticipated that in future systems,
+the accumulator registers may no overlap with the FPR registers.  This patch
+adds the support for dense math registers as separate registers.
+
+This particular patch does not change the MMA support to use the accumulators
+within the dense math registers.  This patch just adds the basic support for
+having separate DMRs.  The next patch will switch the MMA support to use the
+accumulators if -mcpu=future is used.
+
+For testing purposes, I added an undocumented option '-mdense-math' to enable
+or disable the dense math support.
+
+This patch updates the wD constraint added in the previous patch.  If MMA is
+selected but dense math is not selected (i.e. -mcpu=power10), the wD constraint
+will allow access to

[gcc(refs/users/meissner/heads/work188-vpair)] Add ChangeLog.vpair and update REVISION.

2024-12-02 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:451c14b5d5abd810fe5ee7a5ceb9c346392dd9a5

commit 451c14b5d5abd810fe5ee7a5ceb9c346392dd9a5
Author: Michael Meissner 
Date:   Mon Dec 2 14:39:56 2024 -0500

Add ChangeLog.vpair and update REVISION.

2024-12-02  Michael Meissner  

gcc/

* ChangeLog.vpair: New file for branch.
* REVISION: Update.

Diff:
---
 gcc/ChangeLog.vpair | 5 +
 gcc/REVISION| 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog.vpair b/gcc/ChangeLog.vpair
new file mode 100644
index ..8394f9d97c9c
--- /dev/null
+++ b/gcc/ChangeLog.vpair
@@ -0,0 +1,5 @@
+ Branch work188-vpair, baseline 
+
+2024-12-02   Michael Meissner  
+
+   Clone branch
diff --git a/gcc/REVISION b/gcc/REVISION
index d4db5e09c166..3ff9ab1309f4 100644
--- a/gcc/REVISION
+++ b/gcc/REVISION
@@ -1 +1 @@
-work188 branch
+work188-vpair branch


[gcc(refs/users/meissner/heads/work188-dmf)] RFC2653-Add wD constraint.

2024-12-02 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:4d616a3de1f9fbd8d247d8eec31476e319659c41

commit 4d616a3de1f9fbd8d247d8eec31476e319659c41
Author: Michael Meissner 
Date:   Mon Dec 2 15:38:21 2024 -0500

RFC2653-Add wD constraint.

This patch adds a new constraint ('wD') that matches the accumulator 
registers
that overlap with VSX registers 0..31 on power10.  Future patches will add 
the
support for a separate accumulator register class that will be used when the
support for dense math registes is added.

2024-12-02   Michael Meissner  

* config/rs6000/constraints.md (wD): New constraint.
* config/rs6000/mma.md (mma_): Prepare for alternate 
accumulator
registers.  Use wD constraint instead of 'd' constraint.  Use
accumulator_operand instead of fpr_reg_operand.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
-   (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")]
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD")
+   (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0")]
MMA_ACC))]
   "TARGET_MMA"
   " %A0"
@@ -523,7 +523,7 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")]
MMA_VV))]
@@ -532,8 +532,8 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-   (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+   (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 3 "vsx_register_operand" "v,?wa")]
MMA_AVV))]
@@ -542,7 +542,7 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
(unspec:XO [(match_operand:OO 1 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")]
MMA_PV))]
@@ -551,8 +551,8 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-   (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+   (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
(match_operand:OO 2 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 3 "vsx_register_operand" "v,?wa")]
MMA_APV))]
@@ -561,7 +561,7 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
(match_operand:SI 3 "const_0_to_15_operand" "n,n")
@@ -574,8 +574,8 @@
(set_attr "prefixed" "yes")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-   (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+   (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
(match_operand:SI 4 "const_0_to_15_operand" "n,n")
@@ -588,7 +588,7 @@
(set_attr "prefixed" "yes")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
(match_operand:SI 3 "const_0_to_15_operand" "n,n")
@@ -601,8 +601,8 @@
(set_attr "prefixed" "yes")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-   (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+   (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")

[gcc/meissner/heads/work188-test] (15 commits) Merge commit 'refs/users/meissner/heads/work188-test' of gi

2024-12-02 Thread Michael Meissner via Gcc-cvs
The branch 'meissner/heads/work188-test' was updated to point to:

 a80482c82051... Merge commit 'refs/users/meissner/heads/work188-test' of gi

It previously pointed to:

 af0ca62a9d6f... Add ChangeLog.test and update REVISION.

Diff:

Summary of changes (added commits):
---

  a80482c... Merge commit 'refs/users/meissner/heads/work188-test' of gi
  c1f94b1... Add ChangeLog.test and update REVISION.
  b51496b... Update ChangeLog.* (*)
  c9a0ac5... Use architecture flags for defining _ARCH_PWR macros. (*)
  71cab11... Add rs6000 architecture masks. (*)
  aeaa194... Do not allow -mvsx to boost processor to power7. (*)
  7052142... Use vector pair load/store for memcpy with -mcpu=future (*)
  5fbb82a... Add -mcpu=future tests. (*)
  37d4ea1... Add -mcpu=future tuning support. (*)
  2114420... Add support for -mcpu=future (*)
  5c3cbdb... Change TARGET_MODULO to TARGET_POWER9. (*)
  ff12ee1... Change TARGET_POPCNTD to TARGET_POWER7. (*)
  d0188be... Change TARGET_CMPB to TARGET_POWER6. (*)
  798cbe5... Change TARGET_FPRND to TARGET_POWER5X. (*)
  96a8105... Change TARGET_POPCNTB to TARGET_POWER5. (*)

(*) This commit already exists in another branch.
Because the reference `refs/users/meissner/heads/work188-test' matches
your hooks.email-new-commits-only configuration,
no separate email is sent for this commit.


[gcc(refs/users/meissner/heads/work188-test)] Add ChangeLog.test and update REVISION.

2024-12-02 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:c1f94b1ce2a83bb6df5ce325efc5ad679b5d6d8d

commit c1f94b1ce2a83bb6df5ce325efc5ad679b5d6d8d
Author: Michael Meissner 
Date:   Mon Dec 2 14:43:46 2024 -0500

Add ChangeLog.test and update REVISION.

2024-12-02  Michael Meissner  

gcc/

* ChangeLog.test: New file for branch.
* REVISION: Update.

Diff:
---
 gcc/ChangeLog.test | 5 +
 gcc/REVISION   | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog.test b/gcc/ChangeLog.test
new file mode 100644
index ..39c2bf5c53f9
--- /dev/null
+++ b/gcc/ChangeLog.test
@@ -0,0 +1,5 @@
+ Branch work188-test, baseline 
+
+2024-12-02   Michael Meissner  
+
+   Clone branch
diff --git a/gcc/REVISION b/gcc/REVISION
index d4db5e09c166..83751d566db6 100644
--- a/gcc/REVISION
+++ b/gcc/REVISION
@@ -1 +1 @@
-work188 branch
+work188-test branch


[gcc(refs/users/meissner/heads/work188-test)] Merge commit 'refs/users/meissner/heads/work188-test' of git+ssh://gcc.gnu.org/git/gcc into me/work1

2024-12-02 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:a80482c82051f3b32022e050a527284703ae86d5

commit a80482c82051f3b32022e050a527284703ae86d5
Merge: c1f94b1ce2a8 af0ca62a9d6f
Author: Michael Meissner 
Date:   Mon Dec 2 15:27:17 2024 -0500

Merge commit 'refs/users/meissner/heads/work188-test' of 
git+ssh://gcc.gnu.org/git/gcc into me/work188-test

Diff:


[gcc(refs/users/meissner/heads/work188-bugs)] Update ChangeLog.*

2024-12-02 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:5e361688edcdff3457b6790df98d60438d0dbd3d

commit 5e361688edcdff3457b6790df98d60438d0dbd3d
Author: Michael Meissner 
Date:   Mon Dec 2 16:01:57 2024 -0500

Update ChangeLog.*

Diff:
---
 gcc/ChangeLog.bugs | 168 +
 1 file changed, 168 insertions(+)

diff --git a/gcc/ChangeLog.bugs b/gcc/ChangeLog.bugs
index 6627fb747e1b..03c6c93fe5f1 100644
--- a/gcc/ChangeLog.bugs
+++ b/gcc/ChangeLog.bugs
@@ -1,5 +1,173 @@
+ Branch work188-bugs, patch #202 
+
+PR target/108958 -- use mtvsrdd to zero extend GPR DImode to VSX TImode
+
+Previously GCC would zero externd a DImode GPR value to TImode by first zero
+extending the DImode value into a GPR TImode value, and then do a MTVSRDD to
+move this value to a VSX register.
+
+This patch does the move directly, since if the middle argument to MTVSRDD is 
0,
+it does the zero extend.
+
+If the DImode value is already in a vector register, it does a XXSPLTIB and
+XXPERMDI to get the value into the bottom 64-bits of the register.
+
+I have built GCC with the patches in this patch set applied on both little and
+big endian PowerPC systems and there were no regressions.  Can I apply this
+patch to GCC 15?
+
+2024-11-22  Michael Meissner  
+
+gcc/
+
+   PR target/108598
+   * gcc/config/rs6000/rs6000.md (zero_extendditi2): New insn.
+
+gcc/testsuite/
+
+   PR target/108598
+   * gcc.target/powerpc/pr108958.c: New test.
+
+ Branch work188-bugs, patch #201 
+
+Add power9 and power10 float to logical optimizations.
+
+I was answering an email from a co-worker and I pointed him to work I had done
+for the Power8 era that optimizes the 32-bit float math library in Glibc.  In
+doing so, I discovered with the Power9 and later computers, this optimization
+is no longer taking place.
+
+The glibc 32-bit floating point math functions have code that looks like:
+
+   union u {
+ float f;
+ uint32_t u32;
+   };
+
+   float
+   math_foo (float x, unsigned int mask)
+   {
+ union u arg;
+ float x2;
+
+ arg.f = x;
+ arg.u32 &= mask;
+
+ x2 = arg.f;
+ /* ... */
+   }
+
+On power8 with the optimization it generates:
+
+xscvdpspn 0,1
+sldi 9,4,32
+mtvsrd 32,9
+xxland 1,0,32
+xscvspdpn 1,1
+
+I.e., it converts the SFmode to the memory format (instead of the DFmode that
+is used within the register), converts the mask so that it is in the vector
+register in the upper 32-bits, and does a XXLAND (i.e. there is only one direct
+move from GPR to vector register).  Then after doing this, it converts the
+upper 32-bits back to DFmode.
+
+If the XSCVSPDN instruction took the value in the normal 32-bit scalar in a
+vector register, we wouldn't have needed the SLDI of the mask.
+
+On power9/power10/power11 it currently generates:
+
+xscvdpspn 0,1
+mfvsrwz 2,0
+and 2,2,4
+mtvsrws 1,2
+xscvspdpn 1,1
+blr
+
+I.e convert to SFmode representation, move the value to a GPR, do an AND
+operation, move the 32-bit value with a splat, and then convert it back to
+DFmode format.
+
+With this patch, it now generates:
+
+xscvdpspn 0,1
+mtvsrwz 32,2
+xxland 32,0,32
+xxspltw 1,32,1
+xscvspdpn 1,1
+blr
+
+I.e. convert to SFmode representation, move the mask to the vector register, do
+the operation using XXLAND.  Splat the value to get the value in the correct
+location, and then convert back to DFmode.
+
+I have built GCC with the patches in this patch set applied on both little and
+big endian PowerPC systems and there were no regressions.  Can I apply this
+patch to GCC 15?
+
+2024-11-22  Michael Meissner  
+
+gcc/
+
+   PR target/117487
+   * config/rs6000/vsx.md (SFmode logical peephoole): Update comments in
+   the original code that supports power8.  Add a new define_peephole2 to
+   do the optimization on power9/power10.
+
+ Branch work188-bugs, patch #200 
+
+PR 99293: Optimize splat of a V2DF/V2DI extract with constant element
+
+We had optimizations for splat of a vector extract for the other vector
+types, but we missed having one for V2DI and V2DF.  This patch adds a
+combiner insn to do this optimization.
+
+In looking at the source, we had similar optimizations for V4SI and V4SF
+extract and splats, but we missed doing V2DI/V2DF.
+
+Without the patch for the code:
+
+   vector long long splat_dup_l_0 (vector long long v)
+   {
+ return __builtin_vec_splats (__builtin_vec_extract (v, 0));
+   }
+
+the compiler generates (on a little endian power9):
+
+   splat_dup_l_0:
+   mfvsrld 9,34
+   mtvsrdd 34,9,9
+   blr
+
+Now it generates:
+
+   splat_dup_l_0:
+   xxpermdi 34,34,34,3
+  

[gcc(refs/users/meissner/heads/work188-dmf)] Revert changes

2024-12-02 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:9df687c75126f18e40367044154fee432edf3318

commit 9df687c75126f18e40367044154fee432edf3318
Author: Michael Meissner 
Date:   Mon Dec 2 16:27:32 2024 -0500

Revert changes

Diff:
---
 gcc/config/rs6000/altivec.md   | 14 
 gcc/config/rs6000/constraints.md   | 10 ---
 gcc/config/rs6000/predicates.md| 52 +---
 gcc/config/rs6000/rs6000.cc| 25 --
 gcc/config/rs6000/rs6000.h |  7 --
 gcc/config/rs6000/rs6000.md| 96 +++---
 gcc/testsuite/gcc.target/powerpc/prefixed-addis.c  | 24 --
 .../gcc.target/powerpc/vector-rotate-left.c| 34 
 8 files changed, 14 insertions(+), 248 deletions(-)

diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index c76b1eeefe35..b6a778ef6179 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -1982,20 +1982,6 @@
 }
   [(set_attr "type" "vecperm")])
 
-;; -mcpu=future adds a vector rotate left word variant.  There is no vector
-;; byte/half-word/double-word/quad-word rotate left.  This insn occurs before
-;; altivec_vrl and will match for -mcpu=future, while other cpus will
-;; match the generic insn.
-(define_insn "*xvrlw"
-  [(set (match_operand:V4SI 0 "register_operand" "=v,wa")
-   (rotate:V4SI (match_operand:V4SI 1 "register_operand" "v,wa")
-(match_operand:V4SI 2 "register_operand" "v,wa")))]
-  "TARGET_XVRLW"
-  "@
-   vrlw %0,%1,%2
-   xvrlw %x0,%x1,%x2"
-  [(set_attr "type" "vecsimple")])
-
 (define_insn "altivec_vrl"
   [(set (match_operand:VI2 0 "register_operand" "=v")
 (rotate:VI2 (match_operand:VI2 1 "register_operand" "v")
diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md
index 4d8d21fd6bbb..277a30a82458 100644
--- a/gcc/config/rs6000/constraints.md
+++ b/gcc/config/rs6000/constraints.md
@@ -222,16 +222,6 @@
   "An IEEE 128-bit constant that can be loaded into VSX registers."
   (match_operand 0 "easy_vector_constant_ieee128"))
 
-(define_constraint "eU"
-  "@internal integer constant that can be loaded with paddis"
-  (and (match_code "const_int")
-   (match_operand 0 "paddis_operand")))
-
-(define_constraint "eV"
-  "@internal integer constant that can be loaded with paddis + paddi"
-  (and (match_code "const_int")
-   (match_operand 0 "paddis_paddi_operand")))
-
 ;; Floating-point constraints.  These two are defined so that insn
 ;; length attributes can be calculated exactly.
 
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index f8e7df5e7f5b..2797c3cf619b 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -369,53 +369,6 @@
   return SIGNED_INTEGER_34BIT_P (INTVAL (op));
 })
 
-;; Return 1 if op is a 64-bit constant that uses the paddis instruction
-(define_predicate "paddis_operand"
-  (match_code "const_int")
-{
-  if (!TARGET_PADDIS && TARGET_POWERPC64)
-return 0;
-
-  /* If addi, addis, or paddi can handle the number, don't return true.  */
-  HOST_WIDE_INT value = INTVAL (op);
-  if (SIGNED_INTEGER_34BIT_P (value))
-return false;
-
-  /* If the number is too large for padds, return false.  */
-  if (!SIGNED_INTEGER_32BIT_P (value >> 32))
-return false;
-
-  /* If the bottom 32-bits are non-zero, paddis can't handle it.  */
-  if ((value & HOST_WIDE_INT_C(0x)) != 0)
-return false;
-
-  return true;
-})
-
-;; Return 1 if op is a 64-bit constant that needs the paddis instruction and an
-;; addi/addis/paddi instruction combination.
-(define_predicate "paddis_paddi_operand"
-  (match_code "const_int")
-{
-  if (!TARGET_PADDIS && TARGET_POWERPC64)
-return 0;
-
-  /* If addi, addis, or paddi can handle the number, don't return true.  */
-  HOST_WIDE_INT value = INTVAL (op);
-  if (SIGNED_INTEGER_34BIT_P (value))
-return false;
-
-  /* If the number is too large for padds, return false.  */
-  if (!SIGNED_INTEGER_32BIT_P (value >> 32))
-return false;
-
-  /* If the bottom 32-bits are zero, we can use paddis alone to handle it.  */
-  if ((value & HOST_WIDE_INT_C(0x)) == 0)
-return false;
-
-  return true;
-})
-
 ;; Return 1 if op is a register that is not special.
 ;; Disallow (SUBREG:SF (REG:SI)) and (SUBREG:SI (REG:SF)) on VSX systems where
 ;; you need to be careful in moving a SFmode to SImode and vice versa due to
@@ -1160,10 +1113,7 @@
   (if_then_else (match_code "const_int")
 (match_test "satisfies_constraint_I (op)
 || satisfies_constraint_L (op)
-|| satisfies_constraint_eI (op)
-|| satisfies_constraint_eU (op)
-|| satisfies_constraint_eV (op)")
-
+|| satisfies_constraint_eI (op)")
 (match_operand 0 "gpc_reg_operand")))
 
 ;; Return 1 if the operand is either a non-special register, or 0, or -1.
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/confi

[gcc(refs/users/meissner/heads/work188-dmf)] RFC2677-Add xvrlw support.

2024-12-02 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:382baf97dd99f4c5d8c07d1f4034d237eb6e

commit 382baf97dd99f4c5d8c07d1f4034d237eb6e
Author: Michael Meissner 
Date:   Mon Dec 2 16:26:10 2024 -0500

RFC2677-Add xvrlw support.

2024-11-22  Michael Meissner  

gcc/

* config/rs6000/altivec.md (xvrlw): New insn.
* config/rs6000/rs6000.h (TARGET_XVRLW): New macro.

gcc/testsuite/

* gcc.target/powerpc/vector-rotate-left.c: New test.

Diff:
---
 gcc/config/rs6000/altivec.md   | 14 +
 gcc/config/rs6000/rs6000.h |  3 ++
 .../gcc.target/powerpc/vector-rotate-left.c| 34 ++
 3 files changed, 51 insertions(+)

diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index b6a778ef6179..c76b1eeefe35 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -1982,6 +1982,20 @@
 }
   [(set_attr "type" "vecperm")])
 
+;; -mcpu=future adds a vector rotate left word variant.  There is no vector
+;; byte/half-word/double-word/quad-word rotate left.  This insn occurs before
+;; altivec_vrl and will match for -mcpu=future, while other cpus will
+;; match the generic insn.
+(define_insn "*xvrlw"
+  [(set (match_operand:V4SI 0 "register_operand" "=v,wa")
+   (rotate:V4SI (match_operand:V4SI 1 "register_operand" "v,wa")
+(match_operand:V4SI 2 "register_operand" "v,wa")))]
+  "TARGET_XVRLW"
+  "@
+   vrlw %0,%1,%2
+   xvrlw %x0,%x1,%x2"
+  [(set_attr "type" "vecsimple")])
+
 (define_insn "altivec_vrl"
   [(set (match_operand:VI2 0 "register_operand" "=v")
 (rotate:VI2 (match_operand:VI2 1 "register_operand" "v")
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index bf83c2e57bcf..bd4ed41bef98 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -584,6 +584,9 @@ extern int rs6000_vector_align[];
 /* Whether we have PADDIS support.  */
 #define TARGET_PADDIS  TARGET_FUTURE
 
+/* Whether we have XVRLW support.  */
+#define TARGET_XVRLW   TARGET_FUTURE
+
 /* Whether the various reciprocal divide/square root estimate instructions
exist, and whether we should automatically generate code for the instruction
by default.  */
diff --git a/gcc/testsuite/gcc.target/powerpc/vector-rotate-left.c 
b/gcc/testsuite/gcc.target/powerpc/vector-rotate-left.c
new file mode 100644
index ..5a5f37755077
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vector-rotate-left.c
@@ -0,0 +1,34 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_future_ok } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-mdejagnu-cpu=future -O2" } */
+
+/* Test whether the xvrl (vector word rotate left using VSX registers insead of
+   Altivec registers is generated.  */
+
+#include 
+
+typedef vector unsigned int  v4si_t;
+
+v4si_t
+rotl_v4si_scalar (v4si_t x, unsigned long n)
+{
+  __asm__ (" # %x0" : "+f" (x));
+  return (x << n) | (x >> (32 - n));   /* xvrlw.  */
+}
+
+v4si_t
+rotr_v4si_scalar (v4si_t x, unsigned long n)
+{
+  __asm__ (" # %x0" : "+f" (x));
+  return (x >> n) | (x << (32 - n));   /* xvrlw.  */
+}
+
+v4si_t
+rotl_v4si_vector (v4si_t x, v4si_t y)
+{
+  __asm__ (" # %x0" : "+f" (x));   /* xvrlw.  */
+  return vec_rl (x, y);
+}
+
+/* { dg-final { scan-assembler-times {\mxvrlw\M} 3  } } */


  1   2   >