[gcc r14-9589] tree-optimization/113727 - bogus SRA with BIT_FIELD_REF

2024-03-21 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:9d6ff6f1ea2ae7fc32ec9fbd0554fb06238ed045

commit r14-9589-g9d6ff6f1ea2ae7fc32ec9fbd0554fb06238ed045
Author: Richard Biener 
Date:   Tue Mar 19 14:50:06 2024 +0100

tree-optimization/113727 - bogus SRA with BIT_FIELD_REF

When SRA analyzes BIT_FIELD_REFs it handles writes and not byte
aligned reads differently from byte aligned reads.  Instead of
trying to create replacements for the loaded portion the former
cases try to replace the base object while keeping the wrapping
BIT_FIELD_REFs.  This breaks when we have both kinds operating
on the same base object if there's no appearant overlap conflict
as the conflict that then nevertheless exists isn't handled with.
The fix is to enforce what I think is part of the design handling
the former case - that only the full base object gets replaced
and no further sub-objects are created within as otherwise
keeping the wrapping BIT_FIELD_REF cannot work.  The patch
enforces this within analyze_access_subtree.

PR tree-optimization/113727
* tree-sra.cc (analyze_access_subtree): Do not allow
replacements in subtrees when grp_partial_lhs.

* gcc.dg/torture/pr113727.c: New testcase.

Diff:
---
 gcc/testsuite/gcc.dg/torture/pr113727.c | 26 ++
 gcc/tree-sra.cc |  3 ++-
 2 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.dg/torture/pr113727.c 
b/gcc/testsuite/gcc.dg/torture/pr113727.c
new file mode 100644
index 000..f92ddad5c8e
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/pr113727.c
@@ -0,0 +1,26 @@
+/* { dg-do run } */
+/* { dg-require-effective-target int32plus } */
+
+struct f {
+  unsigned au : 5;
+  unsigned f3 : 21;
+} g_994;
+
+int main()
+{
+  struct f aq1 = {};
+{
+  struct f aq = {9, 5};
+  struct f as = aq;
+  for (int y = 0 ; y <= 4; y += 1)
+   if (as.au)
+ {
+   struct f aa[5] = {{2, 154}, {2, 154}, {2, 154}, {2, 154}, {2, 154}};
+   as = aa[0];
+ }
+  aq1 = as;
+}
+  if (aq1.f3 != 0x9a)
+__builtin_abort();
+  return 0;
+}
diff --git a/gcc/tree-sra.cc b/gcc/tree-sra.cc
index f8e71ec48b9..dbfae5e7fdd 100644
--- a/gcc/tree-sra.cc
+++ b/gcc/tree-sra.cc
@@ -2735,7 +2735,8 @@ analyze_access_subtree (struct access *root, struct 
access *parent,
 {
   hole |= covered_to < child->offset;
   sth_created |= analyze_access_subtree (child, root,
-allow_replacements && !scalar,
+allow_replacements && !scalar
+&& !root->grp_partial_lhs,
 totally);
 
   root->grp_unscalarized_data |= child->grp_unscalarized_data;


[gcc r14-9590] tree-optimization/111736 - avoid address sanitizing of __seg_gs

2024-03-21 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:134ef2a8cac1a5cc718739bd7d3b3472947c80d6

commit r14-9590-g134ef2a8cac1a5cc718739bd7d3b3472947c80d6
Author: Richard Biener 
Date:   Thu Mar 21 08:30:39 2024 +0100

tree-optimization/111736 - avoid address sanitizing of __seg_gs

The following more thoroughly avoids address sanitizing accesses
to non-generic address-spaces.

PR tree-optimization/111736
* asan.cc (instrument_derefs): Do not instrument accesses
to non-generic address-spaces.

* gcc.target/i386/pr111736.c: New testcase.

Diff:
---
 gcc/asan.cc  |  4 
 gcc/testsuite/gcc.target/i386/pr111736.c | 23 +++
 2 files changed, 27 insertions(+)

diff --git a/gcc/asan.cc b/gcc/asan.cc
index cfe83106460..7f91cc616fc 100644
--- a/gcc/asan.cc
+++ b/gcc/asan.cc
@@ -2755,6 +2755,10 @@ instrument_derefs (gimple_stmt_iterator *iter, tree t,
   if (VAR_P (inner) && DECL_HARD_REGISTER (inner))
 return;
 
+  /* Accesses to non-generic address-spaces should not be instrumented.  */
+  if (!ADDR_SPACE_GENERIC_P (TYPE_ADDR_SPACE (TREE_TYPE (inner
+return;
+
   poly_int64 decl_size;
   if ((VAR_P (inner)
|| (TREE_CODE (inner) == RESULT_DECL
diff --git a/gcc/testsuite/gcc.target/i386/pr111736.c 
b/gcc/testsuite/gcc.target/i386/pr111736.c
new file mode 100644
index 000..231fdd07e80
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr111736.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fsanitize=address" } */
+
+int __seg_gs m;
+
+int foo (void)
+{
+  return m;
+}
+
+extern int  __seg_gs n;
+
+int bar (void)
+{
+  return n;
+}
+
+int baz (int __seg_gs *o)
+{
+  return *o;
+}
+
+/* { dg-final { scan-assembler-not "asan_report_load" } } */


[gcc r13-8474] rs6000: Don't ICE when compiling the __builtin_vsx_splat_2di [PR113950]

2024-03-21 Thread jeevitha via Gcc-cvs
https://gcc.gnu.org/g:27eb6e81e6e578da9f9947d3f96c0fa58971fe7f

commit r13-8474-g27eb6e81e6e578da9f9947d3f96c0fa58971fe7f
Author: Jeevitha 
Date:   Wed Mar 20 23:34:46 2024 -0500

rs6000: Don't ICE when compiling the __builtin_vsx_splat_2di [PR113950]

When we expand the __builtin_vsx_splat_2di built-in, we were allowing 
immediate
value for second operand which causes an unrecognizable insn ICE. Even 
though
the immediate value was forced into a register, it wasn't correctly assigned
to the second operand. So corrected the assignment of op1 to operands[1].

2024-03-07  Jeevitha Palanisamy  

gcc/
PR target/113950
* config/rs6000/vsx.md (vsx_splat_): Correct assignment to 
operand1
and simplify else if with else.

gcc/testsuite/
PR target/113950
* gcc.target/powerpc/pr113950.c: New testcase.

(cherry picked from commit fa0468877869f52b05742de6deef582e4dd296fc)

Diff:
---
 gcc/config/rs6000/vsx.md|  4 ++--
 gcc/testsuite/gcc.target/powerpc/pr113950.c | 24 
 2 files changed, 26 insertions(+), 2 deletions(-)

diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 3506913bd02..f70d69ee4b9 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -4551,8 +4551,8 @@
   rtx op1 = operands[1];
   if (MEM_P (op1))
 operands[1] = rs6000_force_indexed_or_indirect_mem (op1);
-  else if (!REG_P (op1))
-op1 = force_reg (mode, op1);
+  else
+operands[1] = force_reg (mode, op1);
 })
 
 (define_insn "vsx_splat__reg"
diff --git a/gcc/testsuite/gcc.target/powerpc/pr113950.c 
b/gcc/testsuite/gcc.target/powerpc/pr113950.c
new file mode 100644
index 000..359963d1041
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr113950.c
@@ -0,0 +1,24 @@
+/* PR target/113950 */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-options "-O2 -mvsx" } */
+
+/* Verify we do not ICE on the following.  */
+
+void abort (void);
+
+int main ()
+{
+  int i;
+  vector signed long long vsll_result, vsll_expected_result;
+  signed long long sll_arg1;
+
+  sll_arg1 = 300;
+  vsll_expected_result = (vector signed long long) {300, 300};
+  vsll_result = __builtin_vsx_splat_2di (sll_arg1);  
+
+  for (i = 0; i < 2; i++)
+if (vsll_result[i] != vsll_expected_result[i])
+  abort();
+
+  return 0;
+}


[gcc r14-9591] Fix runtime error for nonlinear iv vectorization(step_mult).

2024-03-21 Thread hongtao Liu via Gcc-cvs
https://gcc.gnu.org/g:ac2f8c2a367151fc0410f904339c475a953cffc8

commit r14-9591-gac2f8c2a367151fc0410f904339c475a953cffc8
Author: liuhongt 
Date:   Thu Mar 21 13:15:23 2024 +0800

Fix runtime error for nonlinear iv vectorization(step_mult).

wi::from_mpz doesn't take a sign argument, we want it to be wrapped
instead of saturation, so pass utype and true to it, and it fixes the
bug.

gcc/ChangeLog:

PR tree-optimization/114396
* tree-vect-loop.cc (vect_peel_nonlinear_iv_init): Pass utype
and true to wi::from_mpz.

gcc/testsuite/ChangeLog:

* gcc.target/i386/pr114396.c: New test.

Diff:
---
 gcc/testsuite/gcc.target/i386/pr114396.c | 105 +++
 gcc/tree-vect-loop.cc|   2 +-
 2 files changed, 106 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.target/i386/pr114396.c 
b/gcc/testsuite/gcc.target/i386/pr114396.c
new file mode 100644
index 000..4c4015f871f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr114396.c
@@ -0,0 +1,105 @@
+/* { dg-do run } */
+/* { dg-options "-O1 -fwrapv -fno-vect-cost-model" } */
+
+short a = 0xF;
+short b[16];
+unsigned short ua = 0xF;
+unsigned short ub[16];
+
+short
+__attribute__((noipa))
+foo (short a)
+{
+  for (int e = 0; e < 9; e += 1)
+b[e] = a *= 5;
+  return a;
+}
+
+short
+__attribute__((noipa))
+foo1 (short a)
+{
+  for (int e = 0; e < 9; e += 1)
+b[e] = a *= -5;
+  return a;
+}
+
+unsigned short
+__attribute__((noipa))
+foou (unsigned short a)
+{
+  for (int e = 0; e < 9; e += 1)
+ub[e] = a *= -5;
+  return a;
+}
+
+unsigned short
+__attribute__((noipa))
+foou1 (unsigned short a)
+{
+  for (int e = 0; e < 9; e += 1)
+ub[e] = a *= 5;
+  return a;
+}
+
+short
+__attribute__((noipa,optimize("O3")))
+foo_o3 (short a)
+{
+  for (int e = 0; e < 9; e += 1)
+b[e] = a *= 5;
+  return a;
+}
+
+short
+__attribute__((noipa,optimize("O3")))
+foo1_o3 (short a)
+{
+  for (int e = 0; e < 9; e += 1)
+b[e] = a *= -5;
+  return a;
+}
+
+unsigned short
+__attribute__((noipa,optimize("O3")))
+foou_o3 (unsigned short a)
+{
+  for (int e = 0; e < 9; e += 1)
+ub[e] = a *= -5;
+  return a;
+}
+
+unsigned short
+__attribute__((noipa,optimize("O3")))
+foou1_o3 (unsigned short a)
+{
+  for (int e = 0; e < 9; e += 1)
+ub[e] = a *= 5;
+  return a;
+}
+
+int main() {
+  unsigned short uexp, ures;
+  short exp, res;
+  exp = foo (a);
+  res = foo_o3 (a);
+  if (exp != res)
+__builtin_abort ();
+
+  exp = foo1 (a);
+  res = foo1_o3 (a);
+  if (uexp != ures)
+__builtin_abort ();
+
+  uexp = foou (a);
+  ures = foou_o3 (a);
+  if (uexp != ures)
+__builtin_abort ();
+
+  uexp = foou1 (a);
+  ures = foou1_o3 (a);
+  if (uexp != ures)
+__builtin_abort ();
+
+  return 0;
+}
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 4375ebdcb49..2921a9e6aa1 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -9454,7 +9454,7 @@ vect_peel_nonlinear_iv_init (gimple_seq* stmts, tree 
init_expr,
wi::to_mpz (skipn, exp, UNSIGNED);
mpz_ui_pow_ui (mod, 2, TYPE_PRECISION (type));
mpz_powm (res, base, exp, mod);
-   begin = wi::from_mpz (type, res, TYPE_SIGN (type));
+   begin = wi::from_mpz (utype, res, true);
tree mult_expr = wide_int_to_tree (utype, begin);
init_expr = gimple_build (stmts, MULT_EXPR, utype,
  init_expr, mult_expr);


[gcc r13-8475] Fix runtime error for nonlinear iv vectorization(step_mult).

2024-03-21 Thread hongtao Liu via Gcc-cvs
https://gcc.gnu.org/g:199b021a38f30b681e0dbecd2d0296beabd50b13

commit r13-8475-g199b021a38f30b681e0dbecd2d0296beabd50b13
Author: liuhongt 
Date:   Thu Mar 21 13:15:23 2024 +0800

Fix runtime error for nonlinear iv vectorization(step_mult).

wi::from_mpz doesn't take a sign argument, we want it to be wrapped
instead of saturation, so pass utype and true to it, and it fixes the
bug.

gcc/ChangeLog:

PR tree-optimization/114396
* tree-vect-loop.cc (vect_peel_nonlinear_iv_init): Pass utype
and true to wi::from_mpz.

gcc/testsuite/ChangeLog:

* gcc.target/i386/pr114396.c: New test.

(cherry picked from commit ac2f8c2a367151fc0410f904339c475a953cffc8)

Diff:
---
 gcc/testsuite/gcc.target/i386/pr114396.c | 105 +++
 gcc/tree-vect-loop.cc|   2 +-
 2 files changed, 106 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.target/i386/pr114396.c 
b/gcc/testsuite/gcc.target/i386/pr114396.c
new file mode 100644
index 000..4c4015f871f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr114396.c
@@ -0,0 +1,105 @@
+/* { dg-do run } */
+/* { dg-options "-O1 -fwrapv -fno-vect-cost-model" } */
+
+short a = 0xF;
+short b[16];
+unsigned short ua = 0xF;
+unsigned short ub[16];
+
+short
+__attribute__((noipa))
+foo (short a)
+{
+  for (int e = 0; e < 9; e += 1)
+b[e] = a *= 5;
+  return a;
+}
+
+short
+__attribute__((noipa))
+foo1 (short a)
+{
+  for (int e = 0; e < 9; e += 1)
+b[e] = a *= -5;
+  return a;
+}
+
+unsigned short
+__attribute__((noipa))
+foou (unsigned short a)
+{
+  for (int e = 0; e < 9; e += 1)
+ub[e] = a *= -5;
+  return a;
+}
+
+unsigned short
+__attribute__((noipa))
+foou1 (unsigned short a)
+{
+  for (int e = 0; e < 9; e += 1)
+ub[e] = a *= 5;
+  return a;
+}
+
+short
+__attribute__((noipa,optimize("O3")))
+foo_o3 (short a)
+{
+  for (int e = 0; e < 9; e += 1)
+b[e] = a *= 5;
+  return a;
+}
+
+short
+__attribute__((noipa,optimize("O3")))
+foo1_o3 (short a)
+{
+  for (int e = 0; e < 9; e += 1)
+b[e] = a *= -5;
+  return a;
+}
+
+unsigned short
+__attribute__((noipa,optimize("O3")))
+foou_o3 (unsigned short a)
+{
+  for (int e = 0; e < 9; e += 1)
+ub[e] = a *= -5;
+  return a;
+}
+
+unsigned short
+__attribute__((noipa,optimize("O3")))
+foou1_o3 (unsigned short a)
+{
+  for (int e = 0; e < 9; e += 1)
+ub[e] = a *= 5;
+  return a;
+}
+
+int main() {
+  unsigned short uexp, ures;
+  short exp, res;
+  exp = foo (a);
+  res = foo_o3 (a);
+  if (exp != res)
+__builtin_abort ();
+
+  exp = foo1 (a);
+  res = foo1_o3 (a);
+  if (uexp != ures)
+__builtin_abort ();
+
+  uexp = foou (a);
+  ures = foou_o3 (a);
+  if (uexp != ures)
+__builtin_abort ();
+
+  uexp = foou1 (a);
+  ures = foou1_o3 (a);
+  if (uexp != ures)
+__builtin_abort ();
+
+  return 0;
+}
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index d08d4996771..9615161ad37 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -8730,7 +8730,7 @@ vect_peel_nonlinear_iv_init (gimple_seq* stmts, tree 
init_expr,
wi::to_mpz (skipn, exp, UNSIGNED);
mpz_ui_pow_ui (mod, 2, TYPE_PRECISION (type));
mpz_powm (res, base, exp, mod);
-   begin = wi::from_mpz (type, res, TYPE_SIGN (type));
+   begin = wi::from_mpz (utype, res, true);
tree mult_expr = wide_int_to_tree (utype, begin);
init_expr = gimple_build (stmts, MULT_EXPR, utype,
  init_expr, mult_expr);


[gcc r13-8476] tree-optimization/111736 - avoid address sanitizing of __seg_gs

2024-03-21 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:6d5eb47849bcf9aecefacf7d7e4767750b1ec83b

commit r13-8476-g6d5eb47849bcf9aecefacf7d7e4767750b1ec83b
Author: Richard Biener 
Date:   Thu Mar 21 08:30:39 2024 +0100

tree-optimization/111736 - avoid address sanitizing of __seg_gs

The following more thoroughly avoids address sanitizing accesses
to non-generic address-spaces.

PR tree-optimization/111736
* asan.cc (instrument_derefs): Do not instrument accesses
to non-generic address-spaces.

* gcc.target/i386/pr111736.c: New testcase.

(cherry picked from commit 134ef2a8cac1a5cc718739bd7d3b3472947c80d6)

Diff:
---
 gcc/asan.cc  |  4 
 gcc/testsuite/gcc.target/i386/pr111736.c | 23 +++
 2 files changed, 27 insertions(+)

diff --git a/gcc/asan.cc b/gcc/asan.cc
index 15feecfb495..df732c02150 100644
--- a/gcc/asan.cc
+++ b/gcc/asan.cc
@@ -2752,6 +2752,10 @@ instrument_derefs (gimple_stmt_iterator *iter, tree t,
   if (VAR_P (inner) && DECL_HARD_REGISTER (inner))
 return;
 
+  /* Accesses to non-generic address-spaces should not be instrumented.  */
+  if (!ADDR_SPACE_GENERIC_P (TYPE_ADDR_SPACE (TREE_TYPE (inner
+return;
+
   poly_int64 decl_size;
   if ((VAR_P (inner) || TREE_CODE (inner) == RESULT_DECL)
   && offset == NULL_TREE
diff --git a/gcc/testsuite/gcc.target/i386/pr111736.c 
b/gcc/testsuite/gcc.target/i386/pr111736.c
new file mode 100644
index 000..231fdd07e80
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr111736.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fsanitize=address" } */
+
+int __seg_gs m;
+
+int foo (void)
+{
+  return m;
+}
+
+extern int  __seg_gs n;
+
+int bar (void)
+{
+  return n;
+}
+
+int baz (int __seg_gs *o)
+{
+  return *o;
+}
+
+/* { dg-final { scan-assembler-not "asan_report_load" } } */


[gcc r13-8477] debug/112718 - reset all type units with -ffat-lto-objects

2024-03-21 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:dd1948d467dc25b9b462b173ec40b95f6aa51356

commit r13-8477-gdd1948d467dc25b9b462b173ec40b95f6aa51356
Author: Richard Biener 
Date:   Mon Jan 22 15:42:59 2024 +0100

debug/112718 - reset all type units with -ffat-lto-objects

When mixing -flto, -ffat-lto-objects and -fdebug-type-section we
fail to reset all type units after early output resulting in an
ICE when attempting to add then duplicate sibling attributes.

PR debug/112718
* dwarf2out.cc (dwarf2out_finish): Reset all type units
for the fat part of an LTO compile.

* gcc.dg/debug/pr112718.c: New testcase.

(cherry picked from commit 7218f5050cb7163edae331f54ca163248ab48bfa)

Diff:
---
 gcc/dwarf2out.cc  | 12 
 gcc/testsuite/gcc.dg/debug/pr112718.c | 12 
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/gcc/dwarf2out.cc b/gcc/dwarf2out.cc
index 33f5f4d5ddb..bd82b86a829 100644
--- a/gcc/dwarf2out.cc
+++ b/gcc/dwarf2out.cc
@@ -32191,24 +32191,12 @@ dwarf2out_finish (const char *filename)
   reset_dies (comp_unit_die ());
   for (limbo_die_node *node = cu_die_list; node; node = node->next)
reset_dies (node->die);
-
-  hash_table comdat_type_table (100);
   for (ctnode = comdat_type_list; ctnode != NULL; ctnode = ctnode->next)
{
- comdat_type_node **slot
- = comdat_type_table.find_slot (ctnode, INSERT);
-
- /* Don't reset types twice.  */
- if (*slot != HTAB_EMPTY_ENTRY)
-   continue;
-
  /* Remove the pointer to the line table.  */
  remove_AT (ctnode->root_die, DW_AT_stmt_list);
-
  if (debug_info_level >= DINFO_LEVEL_TERSE)
reset_dies (ctnode->root_die);
-
- *slot = ctnode;
}
 
   /* Reset die CU symbol so we don't output it twice.  */
diff --git a/gcc/testsuite/gcc.dg/debug/pr112718.c 
b/gcc/testsuite/gcc.dg/debug/pr112718.c
new file mode 100644
index 000..ff80ca5a298
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/debug/pr112718.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target lto } */
+/* { dg-options "-g -fdebug-types-section -flto -ffat-lto-objects" } */
+
+struct {
+  int h;
+  unsigned char data[20 + 24 * 6];
+} _EC_X9_62_PRIME_192V2;
+struct {
+  int h;
+  unsigned char data[20 + 24 * 6];
+} _EC_X9_62_PRIME_192V3;


[gcc r13-8479] tree-optimization/114027 - conditional reduction chain

2024-03-21 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:bd276b5340563182f7d95c383196fdd6fb7e6a1d

commit r13-8479-gbd276b5340563182f7d95c383196fdd6fb7e6a1d
Author: Richard Biener 
Date:   Thu Feb 22 10:50:12 2024 +0100

tree-optimization/114027 - conditional reduction chain

When we classify a conditional reduction chain as CONST_COND_REDUCTION
we fail to verify all involved conditionals have the same constant.
That's a quite unlikely situation so the following simply disables
such classification when there's more than one reduction statement.

PR tree-optimization/114027
* tree-vect-loop.cc (vecctorizable_reduction): Use optimized
condition reduction classification only for single-element
chains.

* gcc.dg/vect/pr114027.c: New testcase.

(cherry picked from commit 549f251f055e3a0b0084189a3012c4f15d635e75)

Diff:
---
 gcc/testsuite/gcc.dg/vect/pr114027.c | 26 ++
 gcc/tree-vect-loop.cc| 11 ++-
 2 files changed, 32 insertions(+), 5 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/vect/pr114027.c 
b/gcc/testsuite/gcc.dg/vect/pr114027.c
new file mode 100644
index 000..ead9cdd982d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr114027.c
@@ -0,0 +1,26 @@
+#include "tree-vect.h"
+
+int __attribute__((noipa))
+foo (int *f, int n)
+{
+  int res = 0;
+  for (int i = 0; i < n; ++i)
+{
+  if (f[2*i])
+res = 2;
+  if (f[2*i+1])
+res = -2;
+}
+  return res;
+}
+
+int f[] = { 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 0 };
+
+int
+main ()
+{
+  if (foo (f, 16) != 2)
+__builtin_abort ();
+  return 0;
+}
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 9615161ad37..b4ce9535646 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -7065,17 +7065,18 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
  < GET_MODE_SIZE (SCALAR_TYPE_MODE (TREE_TYPE 
(vectype_op[i]))
vectype_in = vectype_op[i];
 
-  if (op.code == COND_EXPR)
+  /* Record how the non-reduction-def value of COND_EXPR is defined.
+???  For a chain of multiple CONDs we'd have to match them up all.  */
+  if (op.code == COND_EXPR && reduc_chain_length == 1)
{
- /* Record how the non-reduction-def value of COND_EXPR is defined.  */
  if (dt == vect_constant_def)
{
  cond_reduc_dt = dt;
  cond_reduc_val = op.ops[i];
}
- if (dt == vect_induction_def
- && def_stmt_info
- && is_nonwrapping_integer_induction (def_stmt_info, loop))
+ else if (dt == vect_induction_def
+  && def_stmt_info
+  && is_nonwrapping_integer_induction (def_stmt_info, loop))
{
  cond_reduc_dt = dt;
  cond_stmt_vinfo = def_stmt_info;


[gcc r13-8481] middle-end/114070 - VEC_COND_EXPR folding

2024-03-21 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:a3ff14ac4804be400a52dcf630f0de2d57cae835

commit r13-8481-ga3ff14ac4804be400a52dcf630f0de2d57cae835
Author: Richard Biener 
Date:   Thu Feb 29 09:22:19 2024 +0100

middle-end/114070 - VEC_COND_EXPR folding

The following amends the PR114070 fix to optimistically allow
the folding when we cannot expand the current vec_cond using
vcond_mask and we're still before vector lowering.  This leaves
a small window between vectorization and lowering where we could
break vec_conds that can be expanded via vcond{,u,eq}, most
susceptible is the loop unrolling pass which applies VN and thus
possibly folding to the unrolled body of a vectorized loop.

This gets back the folding for targets that cannot do vectorization.
It doesn't get back the folding for x86 with AVX512 for example
since that can handle the original IL but not the folded since
it misses some vcond_mask expanders.

PR middle-end/114070
* match.pd ((c ? a : b) op d  -->  c ? (a op d) : (b op d)):
Allow the folding if before lowering and the current IL
isn't supported with vcond_mask.

(cherry picked from commit f9c30ea737b806caac917d8f501305151a2cbd57)

Diff:
---
 gcc/match.pd | 18 +++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/gcc/match.pd b/gcc/match.pd
index 18a523fe9a8..908959de3e7 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -4464,7 +4464,13 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
   (op (vec_cond:s @0 @1 @2) (vec_cond:s @0 @3 @4))
   (if (TREE_CODE_CLASS (op) != tcc_comparison
|| types_match (type, TREE_TYPE (@1))
-   || expand_vec_cond_expr_p (type, TREE_TYPE (@0), ERROR_MARK))
+   || expand_vec_cond_expr_p (type, TREE_TYPE (@0), ERROR_MARK)
+   || (optimize_vectors_before_lowering_p ()
+  /* The following is optimistic on the side of non-support, we are
+ missing the legacy vcond{,u,eq} cases.  Do this only when
+ lowering will be able to fixup..  */
+  && !expand_vec_cond_expr_p (TREE_TYPE (@1),
+  TREE_TYPE (@0), ERROR_MARK)))
(vec_cond @0 (op! @1 @3) (op! @2 @4
 
 /* (c ? a : b) op d  -->  c ? (a op d) : (b op d) */
@@ -4472,13 +4478,19 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
   (op (vec_cond:s @0 @1 @2) @3)
   (if (TREE_CODE_CLASS (op) != tcc_comparison
|| types_match (type, TREE_TYPE (@1))
-   || expand_vec_cond_expr_p (type, TREE_TYPE (@0), ERROR_MARK))
+   || expand_vec_cond_expr_p (type, TREE_TYPE (@0), ERROR_MARK)
+   || (optimize_vectors_before_lowering_p ()
+  && !expand_vec_cond_expr_p (TREE_TYPE (@1),
+  TREE_TYPE (@0), ERROR_MARK)))
(vec_cond @0 (op! @1 @3) (op! @2 @3
  (simplify
   (op @3 (vec_cond:s @0 @1 @2))
   (if (TREE_CODE_CLASS (op) != tcc_comparison
|| types_match (type, TREE_TYPE (@1))
-   || expand_vec_cond_expr_p (type, TREE_TYPE (@0), ERROR_MARK))
+   || expand_vec_cond_expr_p (type, TREE_TYPE (@0), ERROR_MARK)
+   || (optimize_vectors_before_lowering_p ()
+  && !expand_vec_cond_expr_p (TREE_TYPE (@1),
+  TREE_TYPE (@0), ERROR_MARK)))
(vec_cond @0 (op! @3 @1) (op! @3 @2)
 
 #if GIMPLE


[gcc r13-8482] tree-optimization/114203 - wrong CLZ niter computation

2024-03-21 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:a729b1227bc8c84cd91a3b8c9c9d11bc43d415de

commit r13-8482-ga729b1227bc8c84cd91a3b8c9c9d11bc43d415de
Author: Richard Biener 
Date:   Mon Mar 4 10:38:31 2024 +0100

tree-optimization/114203 - wrong CLZ niter computation

For precision less than int we apply the adjustment to make it defined
at zero after the adjustment to make it compute CLZ rather than CTZ.
That's wrong.

PR tree-optimization/114203
* tree-ssa-loop-niter.cc (build_cltz_expr): Apply CTZ->CLZ
adjustment before making the result defined at zero.

* gcc.dg/torture/pr114203.c: New testcase.

(cherry picked from commit cde50296a19b109909089b91d532d2c8455f5f10)

Diff:
---
 gcc/testsuite/gcc.dg/torture/pr114203.c | 21 +
 gcc/tree-ssa-loop-niter.cc  |  7 +++
 2 files changed, 24 insertions(+), 4 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/torture/pr114203.c 
b/gcc/testsuite/gcc.dg/torture/pr114203.c
new file mode 100644
index 000..0ef6279942a
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/pr114203.c
@@ -0,0 +1,21 @@
+/* { dg-do run } */
+
+int __attribute__((noipa))
+foo (unsigned char b)
+{
+  int c = 0;
+
+  while (b) {
+  b >>= 1;
+  c++;
+  }
+
+  return c;
+}
+
+int main()
+{
+  if (foo(0) != 0)
+__builtin_abort ();
+  return 0;
+}
diff --git a/gcc/tree-ssa-loop-niter.cc b/gcc/tree-ssa-loop-niter.cc
index dc4c7a418f6..0ffe4b8465d 100644
--- a/gcc/tree-ssa-loop-niter.cc
+++ b/gcc/tree-ssa-loop-niter.cc
@@ -2299,6 +2299,9 @@ build_cltz_expr (tree src, bool leading, bool 
define_at_zero)
src = fold_convert (unsigned_type_node, src);
 
   call = build_call_expr (fn, 1, src);
+  if (leading && prec < i_prec)
+   call = fold_build2 (MINUS_EXPR, integer_type_node, call,
+   build_int_cst (integer_type_node, i_prec - prec));
   if (define_at_zero)
{
  tree is_zero = fold_build2 (NE_EXPR, boolean_type_node, src,
@@ -2306,10 +2309,6 @@ build_cltz_expr (tree src, bool leading, bool 
define_at_zero)
  call = fold_build3 (COND_EXPR, integer_type_node, is_zero, call,
  build_int_cst (integer_type_node, prec));
}
-
-  if (leading && prec < i_prec)
-   call = fold_build2 (MINUS_EXPR, integer_type_node, call,
-   build_int_cst (integer_type_node, i_prec - prec));
 }
 
   return call;


[gcc r13-8478] tree-optimization/113910 - huge compile time during PTA

2024-03-21 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:9a19811ea1e9b3024c0f41b074d71679088bb2d7

commit r13-8478-g9a19811ea1e9b3024c0f41b074d71679088bb2d7
Author: Richard Biener 
Date:   Wed Feb 14 12:33:13 2024 +0100

tree-optimization/113910 - huge compile time during PTA

For the testcase in PR113910 we spend a lot of time in PTA comparing
bitmaps for looking up equivalence class members.  This points to
the very weak bitmap_hash function which effectively hashes set
and a subset of not set bits.

The major problem with it is that it simply truncates the
BITMAP_WORD sized intermediate hash to hashval_t which is
unsigned int, effectively not hashing half of the bits.

This reduces the compile-time for the testcase from tens of minutes
to 42 seconds and PTA time from 99% to 46%.

PR tree-optimization/113910
* bitmap.cc (bitmap_hash): Mix the full element "hash" to
the hashval_t hash.

(cherry picked from commit ad7a365aaccecd23ea287c7faaab9c7bd50b944a)

Diff:
---
 gcc/bitmap.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/bitmap.cc b/gcc/bitmap.cc
index 20de562caac..d65f6b259dd 100644
--- a/gcc/bitmap.cc
+++ b/gcc/bitmap.cc
@@ -2673,7 +2673,7 @@ bitmap_hash (const_bitmap head)
   for (ix = 0; ix != BITMAP_ELEMENT_WORDS; ix++)
hash ^= ptr->bits[ix];
 }
-  return (hashval_t)hash;
+  return iterative_hash (&hash, sizeof (hash), 0);
 }


[gcc r13-8480] middle-end/114070 - folding breaking VEC_COND expansion

2024-03-21 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:a9a425df628ab80374cc6a132d39e470bc78c8bc

commit r13-8480-ga9a425df628ab80374cc6a132d39e470bc78c8bc
Author: Richard Biener 
Date:   Fri Feb 23 16:06:05 2024 +0100

middle-end/114070 - folding breaking VEC_COND expansion

The following properly guards the simplifications that move
operations into VEC_CONDs, in particular when that changes the
type constraints on this operation.

This needed a genmatch fix which was recording spurious implicit fors
when tcc_comparison is used in a C expression.

PR middle-end/114070
* genmatch.cc (parser::parse_c_expr): Do not record operand
lists but only mark operators used.
* match.pd ((c ? a : b) op (c ? d : e)  -->  c ? (a op d) : (b op 
e)):
Properly guard the case of tcc_comparison changing the VEC_COND
value operand type.

* gcc.dg/torture/pr114070.c: New testcase.

(cherry picked from commit af66ad89e8169f44db723813662917cf4cbb78fc)

Diff:
---
 gcc/genmatch.cc |  6 ++
 gcc/match.pd| 15 ---
 gcc/testsuite/gcc.dg/torture/pr114070.c | 12 
 3 files changed, 26 insertions(+), 7 deletions(-)

diff --git a/gcc/genmatch.cc b/gcc/genmatch.cc
index 98b429a9d0b..c1023d921fc 100644
--- a/gcc/genmatch.cc
+++ b/gcc/genmatch.cc
@@ -4519,10 +4519,8 @@ parser::parse_c_expr (cpp_ttype start)
= (const char *)CPP_HASHNODE (token->val.node.node)->ident.str;
  if (strcmp (str, "return") == 0)
fatal_at (token, "return statement not allowed in C expression");
- id_base *idb = get_operator (str);
- user_id *p;
- if (idb && (p = dyn_cast (idb)) && p->is_oper_list)
-   record_operlist (token->src_loc, p);
+ /* Mark user operators corresponding to 'str' as used.  */
+ get_operator (str);
}
 
   /* Record the token.  */
diff --git a/gcc/match.pd b/gcc/match.pd
index 8e41c973dc2..18a523fe9a8 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -4462,15 +4462,24 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 /* (c ? a : b) op (c ? d : e)  -->  c ? (a op d) : (b op e) */
  (simplify
   (op (vec_cond:s @0 @1 @2) (vec_cond:s @0 @3 @4))
-  (vec_cond @0 (op! @1 @3) (op! @2 @4)))
+  (if (TREE_CODE_CLASS (op) != tcc_comparison
+   || types_match (type, TREE_TYPE (@1))
+   || expand_vec_cond_expr_p (type, TREE_TYPE (@0), ERROR_MARK))
+   (vec_cond @0 (op! @1 @3) (op! @2 @4
 
 /* (c ? a : b) op d  -->  c ? (a op d) : (b op d) */
  (simplify
   (op (vec_cond:s @0 @1 @2) @3)
-  (vec_cond @0 (op! @1 @3) (op! @2 @3)))
+  (if (TREE_CODE_CLASS (op) != tcc_comparison
+   || types_match (type, TREE_TYPE (@1))
+   || expand_vec_cond_expr_p (type, TREE_TYPE (@0), ERROR_MARK))
+   (vec_cond @0 (op! @1 @3) (op! @2 @3
  (simplify
   (op @3 (vec_cond:s @0 @1 @2))
-  (vec_cond @0 (op! @3 @1) (op! @3 @2
+  (if (TREE_CODE_CLASS (op) != tcc_comparison
+   || types_match (type, TREE_TYPE (@1))
+   || expand_vec_cond_expr_p (type, TREE_TYPE (@0), ERROR_MARK))
+   (vec_cond @0 (op! @3 @1) (op! @3 @2)
 
 #if GIMPLE
 (match (nop_atomic_bit_test_and_p @0 @1 @4)
diff --git a/gcc/testsuite/gcc.dg/torture/pr114070.c 
b/gcc/testsuite/gcc.dg/torture/pr114070.c
new file mode 100644
index 000..cf46ec45a04
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/pr114070.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-fno-vect-cost-model" } */
+
+int unresolved(unsigned dirmask, unsigned mask, int *unresolved_n)
+{
+  for (int i = 0; i < 1024; i++) {
+mask |= 1;
+if (!unresolved_n[i] || unresolved_n[i] & 7)
+  dirmask |= 1;
+  }
+  return (dirmask == mask);
+}


[gcc r14-9592] libgcc: Fix up bitint division [PR114397]

2024-03-21 Thread Jakub Jelinek via Gcc-cvs
https://gcc.gnu.org/g:59b6cece54f33ac4994834d01e18269856576556

commit r14-9592-g59b6cece54f33ac4994834d01e18269856576556
Author: Jakub Jelinek 
Date:   Thu Mar 21 13:07:50 2024 +0100

libgcc: Fix up bitint division [PR114397]

The Knuth's division algorithm relies on the number of dividend limbs
to be greater ore equal to number of divisor limbs, which is why
I've added a special case for un < vn at the start of __divmodbitint4.
Unfortunately, my assumption that it then implies abs(v) > abs(u) and
so quotient must be 0 and remainder same as dividend is incorrect.
This is because this check is done before negation of the operands.
While bitint_reduce_prec reduces precision from clearly useless limbs,
the problematic case is when the dividend is unsigned or non-negative
and divisor is negative.  We can have limbs (from MS to LS):
dividend:   0   M   ?...
divisor:-1  -N  ?...
where M has most significant bit set and M >= N (if M == N then it
also the following limbs matter) and the most significant limbs can
be even partial.  In this case, the quotient should be -1 rather than
0.  bitint_reduce_prec will reduce the precision of the dividend so
that M is the most significant limb, but can't reduce precision of the
divisor to more than having the -1 as most significant limb, because
-N doesn't have the most significant bit set.

The following patch fixes it by detecting this problematic case in the
un < vn handling, and instead of assuming q is 0 and r is u will
decrease vn by 1 because it knows the later code will negate the divisor
and it can be then expressed after negation in one fewer limbs.

2024-03-21  Jakub Jelinek  

PR libgcc/114397
* libgcc2.c (__divmodbitint4): Don't assume un < vn always means
abs(v) > abs(u), check for a special case of un + 1 == vn where
u is non-negative and v negative and after v's negation vn could
be reduced by 1.

* gcc.dg/torture/bitint-65.c: New test.

Diff:
---
 gcc/testsuite/gcc.dg/torture/bitint-65.c | 44 
 libgcc/libgcc2.c | 89 
 2 files changed, 100 insertions(+), 33 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/torture/bitint-65.c 
b/gcc/testsuite/gcc.dg/torture/bitint-65.c
new file mode 100644
index 000..b7724d05382
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/bitint-65.c
@@ -0,0 +1,44 @@
+/* PR libgcc/114397 */
+/* { dg-do run { target bitint } } */
+/* { dg-options "-std=c23" } */
+/* { dg-skip-if "" { ! run_expensive_tests }  { "*" } { "-O0" "-O2" } } */
+/* { dg-skip-if "" { ! run_expensive_tests } { "-flto" } { "" } } */
+
+#if __BITINT_MAXWIDTH__ >= 129
+int
+foo (unsigned _BitInt (128) a, _BitInt (129) b)
+{
+  return a / b;
+}
+#endif
+
+#if __BITINT_MAXWIDTH__ >= 192
+int
+bar (unsigned _BitInt (128) a, _BitInt (192) b)
+{
+  return a / b;
+}
+#endif
+
+int
+main ()
+{
+#if __BITINT_MAXWIDTH__ >= 129
+  if (foo (336225022742818342628768636932743029911uwb,
+  -336225022742818342628768636932743029911wb) != -1
+  || foo (336225022742818342628768636932743029912uwb,
+ -336225022742818342628768636932743029911wb) != -1
+  || foo (336225022742818342628768636932743029911uwb,
+ -336225022742818342628768636932743029912wb) != 0)
+__builtin_abort ();
+#endif
+#if __BITINT_MAXWIDTH__ >= 192
+  if (bar (336225022742818342628768636932743029911uwb,
+  -336225022742818342628768636932743029911wb) != -1
+  || bar (336225022742818342628768636932743029912uwb,
+ -336225022742818342628768636932743029911wb) != -1
+  || bar (336225022742818342628768636932743029911uwb,
+ -336225022742818342628768636932743029912wb) != 0)
+__builtin_abort ();
+#endif
+}
diff --git a/libgcc/libgcc2.c b/libgcc/libgcc2.c
index dc856740a69..71c73d6b846 100644
--- a/libgcc/libgcc2.c
+++ b/libgcc/libgcc2.c
@@ -1707,44 +1707,67 @@ __divmodbitint4 (UBILtype *q, SItype qprec,
   USItype vp = avprec % W_TYPE_SIZE;
   if (__builtin_expect (un < vn, 0))
 {
-  /* If abs(v) > abs(u), then q is 0 and r is u.  */
-  if (q)
-   __builtin_memset (q, 0, qn * sizeof (UWtype));
-  if (r == NULL)
-   return;
-#if __LIBGCC_BITINT_ORDER__ == __ORDER_BIG_ENDIAN__
-  r += rn - 1;
-  u += un - 1;
-#endif
-  if (up)
-   --un;
-  if (rn < un)
-   un = rn;
-  for (rn -= un; un; --un)
+  /* If abs(v) > abs(u), then q is 0 and r is u.
+Unfortunately un < vn doesn't always mean abs(v) > abs(u).
+If uprec > 0 and vprec < 0 and vn == un + 1, if the
+top limb of v is all ones and the second most significant
+limb has most significant bit clear, then just decrease
+vn/avprec/vp and continue, after negation both numbers
+will have the same number of limb

[gcc r14-9593] amdgcn: Clean up device memory in gcn-run

2024-03-21 Thread Andrew Stubbs via Gcc-cvs
https://gcc.gnu.org/g:c3fb8a4d150586459a9fa177cb2aeeac5e4c0464

commit r14-9593-gc3fb8a4d150586459a9fa177cb2aeeac5e4c0464
Author: Andrew Stubbs 
Date:   Wed Mar 20 12:49:24 2024 +

amdgcn: Clean up device memory in gcn-run

gcc/ChangeLog:

* config/gcn/gcn-run.cc (main): Add an hsa_memory_free calls for 
each
device_malloc call.

Diff:
---
 gcc/config/gcn/gcn-run.cc | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/gcc/config/gcn/gcn-run.cc b/gcc/config/gcn/gcn-run.cc
index d45ff3e6c2b..2f3ed2d41d2 100644
--- a/gcc/config/gcn/gcn-run.cc
+++ b/gcc/config/gcn/gcn-run.cc
@@ -755,7 +755,13 @@ main (int argc, char *argv[])
 
   /* Clean shut down.  */
   XHSA (hsa_fns.hsa_memory_free_fn (kernargs),
-   "Clean up device memory");
+   "Clean up device kernargs memory");
+  XHSA (hsa_fns.hsa_memory_free_fn (args),
+   "Clean up device args memory");
+  XHSA (hsa_fns.hsa_memory_free_fn (heap),
+   "Clean up device heap memory");
+  XHSA (hsa_fns.hsa_memory_free_fn (stack),
+   "Clean up device stack memory");
   XHSA (hsa_fns.hsa_executable_destroy_fn (executable),
"Clean up GCN executable");
   XHSA (hsa_fns.hsa_queue_destroy_fn (queue),


[gcc r14-9594] amdgcn: Ensure gfx11 is running in cumode

2024-03-21 Thread Andrew Stubbs via Gcc-cvs
https://gcc.gnu.org/g:69dc2dc7e0e853856b84b1bcc89d0241d8a570aa

commit r14-9594-g69dc2dc7e0e853856b84b1bcc89d0241d8a570aa
Author: Andrew Stubbs 
Date:   Mon Mar 4 15:48:47 2024 +

amdgcn: Ensure gfx11 is running in cumode

CUmode "on" is the setting for compatibility with GCN and CDNA devices.

gcc/ChangeLog:

* config/gcn/gcn-hsa.h (ASM_SPEC): Pass -mattr=+cumode.

Diff:
---
 gcc/config/gcn/gcn-hsa.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/gcc/config/gcn/gcn-hsa.h b/gcc/config/gcn/gcn-hsa.h
index 9cf181f52a4..c75256dbac3 100644
--- a/gcc/config/gcn/gcn-hsa.h
+++ b/gcc/config/gcn/gcn-hsa.h
@@ -107,6 +107,7 @@ extern unsigned int gcn_local_sym_hash (const char *name);
  "%{" NO_XNACK XNACKOPT "} " \
  "%{" NO_SRAM_ECC SRAMOPT "} " \
  "%{march=gfx1030|march=gfx1100:-mattr=+wavefrontsize64} " \
+ "%{march=gfx1030|march=gfx1100:-mattr=+cumode} " \
  "-filetype=obj"
 #define LINK_SPEC "--pie --export-dynamic"
 #define LIB_SPEC  "-lc"


[gcc r14-9595] amdgcn: Comment correction

2024-03-21 Thread Andrew Stubbs via Gcc-cvs
https://gcc.gnu.org/g:a2fe34e0b993d5fb879d75ddb42b24b45c4b7242

commit r14-9595-ga2fe34e0b993d5fb879d75ddb42b24b45c4b7242
Author: Andrew Stubbs 
Date:   Mon Mar 4 15:52:00 2024 +

amdgcn: Comment correction

The location of the marker was changed, but the comment wasn't updated.
Fixed now.

gcc/ChangeLog:

* config/gcn/gcn.cc (gcn_expand_builtin_1): Comment correction.

Diff:
---
 gcc/config/gcn/gcn.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/config/gcn/gcn.cc b/gcc/config/gcn/gcn.cc
index bc076d1120d..fca001811e5 100644
--- a/gcc/config/gcn/gcn.cc
+++ b/gcc/config/gcn/gcn.cc
@@ -4932,8 +4932,8 @@ gcn_expand_builtin_1 (tree exp, rtx target, rtx 
/*subtarget */ ,
   }
 case GCN_BUILTIN_FIRST_CALL_THIS_THREAD_P:
   {
-   /* Stash a marker in the unused upper 16 bits of s[0:1] to indicate
-  whether it was the first call.  */
+   /* Stash a marker in the unused upper 16 bits of QUEUE_PTR_ARG to
+  indicate whether it was the first call.  */
rtx result = gen_reg_rtx (BImode);
emit_move_insn (result, const0_rtx);
if (cfun->machine->args.reg[QUEUE_PTR_ARG] >= 0)


[gcc r13-8483] middle-end/113622 - allow .VEC_SET and .VEC_EXTRACT for global hard regs

2024-03-21 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:d4c0800aab864bb95260e12342d18695c6ebbec8

commit r13-8483-gd4c0800aab864bb95260e12342d18695c6ebbec8
Author: Richard Biener 
Date:   Mon Jan 29 09:47:31 2024 +0100

middle-end/113622 - allow .VEC_SET and .VEC_EXTRACT for global hard regs

The following expands .VEC_SET and .VEC_EXTRACT instruction selection
to global hard registers, not only automatic variables (possibly)
promoted to registers.  This can avoid some ICEs later and create
better code.

PR middle-end/113622
* gimple-isel.cc (gimple_expand_vec_set_extract_expr):
Also allow DECL_HARD_REGISTER variables.

* gcc.target/i386/pr113622-1.c: New testcase.

(cherry picked from commit 96bc048d78f804bac0fa7b2ca3b6dd3a04c68217)

Diff:
---
 gcc/gimple-isel.cc |  3 ++-
 gcc/testsuite/gcc.target/i386/pr113622-1.c | 12 
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/gcc/gimple-isel.cc b/gcc/gimple-isel.cc
index ef688ddb57f..760029d27c9 100644
--- a/gcc/gimple-isel.cc
+++ b/gcc/gimple-isel.cc
@@ -77,7 +77,8 @@ gimple_expand_vec_set_expr (struct function *fun, 
gimple_stmt_iterator *gsi)
   tree pos = TREE_OPERAND (lhs, 1);
   tree view_op0 = TREE_OPERAND (op0, 0);
   machine_mode outermode = TYPE_MODE (TREE_TYPE (view_op0));
-  if (auto_var_in_fn_p (view_op0, fun->decl)
+  if ((auto_var_in_fn_p (view_op0, fun->decl)
+  || (VAR_P (view_op0) && DECL_HARD_REGISTER (view_op0)))
  && !TREE_ADDRESSABLE (view_op0) && can_vec_set_var_idx_p (outermode))
{
  location_t loc = gimple_location (stmt);
diff --git a/gcc/testsuite/gcc.target/i386/pr113622-1.c 
b/gcc/testsuite/gcc.target/i386/pr113622-1.c
new file mode 100644
index 000..d3a51cd81dc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr113622-1.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f -w" } */
+
+typedef float __attribute__ ((vector_size (64))) vec;
+register vec a asm("zmm5"), b asm("zmm6"), c asm("zmm7");
+
+void
+test (void)
+{
+  for (int i = 0; i < 8; i++)
+c[i] = a[i] < b[i] ? 0.1 : 0.2;
+}


[gcc r13-8484] tree-optimization/113670 - gather/scatter to/from hard registers

2024-03-21 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:ac664905b837095b15099e44e83471672eee7aa9

commit r13-8484-gac664905b837095b15099e44e83471672eee7aa9
Author: Richard Biener 
Date:   Wed Jan 31 09:09:50 2024 +0100

tree-optimization/113670 - gather/scatter to/from hard registers

The following makes sure we're not taking the address of hard
registers when vectorizing appearant gathers or scatters to/from
them.

PR tree-optimization/113670
* tree-vect-data-refs.cc (vect_check_gather_scatter):
Make sure we can take the address of the reference base.

* gcc.target/i386/pr113670.c: New testcase.

(cherry picked from commit 924137b9012cee5603482242de08fbf0b2030f6a)

Diff:
---
 gcc/testsuite/gcc.target/i386/pr113670.c | 16 
 gcc/tree-vect-data-refs.cc   |  5 +
 2 files changed, 21 insertions(+)

diff --git a/gcc/testsuite/gcc.target/i386/pr113670.c 
b/gcc/testsuite/gcc.target/i386/pr113670.c
new file mode 100644
index 000..8b9d3744fe2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr113670.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-msse2 -O2 -fno-vect-cost-model" } */
+
+typedef float __attribute__ ((vector_size (16))) vec;
+typedef int __attribute__ ((vector_size (16))) ivec;
+ivec x;
+
+void
+test (void)
+{
+  register vec a asm("xmm3"), b asm("xmm4");
+  register ivec c asm("xmm5");
+  for (int i = 0; i < 4; i++)
+c[i] = a[i] < b[i] ? -1 : 1;
+  x = c;
+}
diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc
index babd83dd830..4fefd046207 100644
--- a/gcc/tree-vect-data-refs.cc
+++ b/gcc/tree-vect-data-refs.cc
@@ -4029,6 +4029,11 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, 
loop_vec_info loop_vinfo,
   if (!multiple_p (pbitpos, BITS_PER_UNIT))
 return false;
 
+  /* We need to be able to form an address to the base which for example
+ isn't possible for hard registers.  */
+  if (may_be_nonaddressable_p (base))
+return false;
+
   poly_int64 pbytepos = exact_div (pbitpos, BITS_PER_UNIT);
 
   if (TREE_CODE (base) == MEM_REF)


[gcc r13-8486] tree-optimization/114231 - use patterns for BB SLP discovery root stmts

2024-03-21 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:04fffbaa87997ac893a9aa68b674c938ba3ecddb

commit r13-8486-g04fffbaa87997ac893a9aa68b674c938ba3ecddb
Author: Richard Biener 
Date:   Tue Mar 5 10:55:56 2024 +0100

tree-optimization/114231 - use patterns for BB SLP discovery root stmts

The following makes sure to use recognized patterns when vectorizing
roots during BB SLP discovery.  We need to apply those late since
during root discovery we've not yet done pattern recognition.
All parts of the vectorizer assume patterns get used, for the testcase
we mix this up when doing live lane computation.

PR tree-optimization/114231
* tree-vect-slp.cc (vect_analyze_slp): Lookup patterns when
processing a BB SLP root.

* gcc.dg/vect/pr114231.c: New testcase.

Diff:
---
 gcc/testsuite/gcc.dg/vect/pr114231.c | 12 
 gcc/tree-vect-slp.cc |  4 
 2 files changed, 16 insertions(+)

diff --git a/gcc/testsuite/gcc.dg/vect/pr114231.c 
b/gcc/testsuite/gcc.dg/vect/pr114231.c
new file mode 100644
index 000..5e3a8103918
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr114231.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+
+void f(long*);
+int ff[2];
+void f2(long, long, unsigned long);
+void k(unsigned long x, unsigned long y)
+{
+  long t = x >> ff[0];
+  long t1 = ff[1];
+  unsigned long t2 = y >> ff[0];
+  f2(t1, t+t2, t2);
+}
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 6120620c9a8..e4d3cb89e6f 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -3478,6 +3478,10 @@ vect_analyze_slp (vec_info *vinfo, unsigned 
max_tree_size)
   for (unsigned i = 0; i < bb_vinfo->roots.length (); ++i)
{
  vect_location = bb_vinfo->roots[i].roots[0]->stmt;
+ /* Apply patterns.  */
+ for (unsigned j = 0; j < bb_vinfo->roots[i].stmts.length (); ++j)
+   bb_vinfo->roots[i].stmts[j]
+ = vect_stmt_to_vectorize (bb_vinfo->roots[i].stmts[j]);
  if (vect_build_slp_instance (bb_vinfo, bb_vinfo->roots[i].kind,
   bb_vinfo->roots[i].stmts,
   bb_vinfo->roots[i].roots,


[gcc r13-8485] tree-optimization/112793 - SLP of constant/external code-generated twice

2024-03-21 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:42d5985e9884299c8d837ad1588fb47b211b4baf

commit r13-8485-g42d5985e9884299c8d837ad1588fb47b211b4baf
Author: Richard Biener 
Date:   Wed Dec 13 14:23:31 2023 +0100

tree-optimization/112793 - SLP of constant/external code-generated twice

The following makes the attempt at code-generating a constant/external
SLP node twice well-formed as that can happen when partitioning BB
vectorization attempts where we keep constants/externals unpartitioned.

PR tree-optimization/112793
* tree-vect-slp.cc (vect_schedule_slp_node): Already
code-generated constant/external nodes are OK.

* g++.dg/vect/pr112793.cc: New testcase.

(cherry picked from commit d782ec8362eadc3169286eb1e39c631effd02323)

Diff:
---
 gcc/testsuite/g++.dg/vect/pr112793.cc | 32 
 gcc/tree-vect-slp.cc  | 16 +---
 2 files changed, 41 insertions(+), 7 deletions(-)

diff --git a/gcc/testsuite/g++.dg/vect/pr112793.cc 
b/gcc/testsuite/g++.dg/vect/pr112793.cc
new file mode 100644
index 000..258d7c1b111
--- /dev/null
+++ b/gcc/testsuite/g++.dg/vect/pr112793.cc
@@ -0,0 +1,32 @@
+// { dg-do compile }
+// { dg-require-effective-target c++11 }
+// { dg-additional-options "-march=znver2" { target x86_64-*-* i?86-*-* } }
+
+typedef double T;
+T c, s;
+T a[16];
+struct Matrix4 {
+  Matrix4(){}
+  Matrix4(T e, T f, T i, T j) {
+r[1] = r[4] = e;
+r[5] = f;
+r[8] = i;
+r[9] = j;
+  }
+  Matrix4 operator*(Matrix4 a) {
+return Matrix4(
+   r[0] * a.r[4] + r[4] + r[15] + r[6],
+   r[1] * a.r[4] + 1 + 2 + 3,  r[0] * r[8] + 1 + 2 + 3,
+   r[1] * r[8] + r[1] + r[14] + r[2] * r[3]);
+  }
+  T r[16] = {};
+};
+Matrix4 t1, t2;
+Matrix4 tt;
+Matrix4 getRotAltAzToEquatorial()
+{
+  t2.r[4] =  0;
+  t1.r[1] =  -s;
+  t1.r[8] = 0;
+  return t1 * t2;
+}
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index a6c03fe6442..6120620c9a8 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -8777,12 +8777,6 @@ vect_schedule_slp_node (vec_info *vinfo,
   int i;
   slp_tree child;
 
-  /* For existing vectors there's nothing to do.  */
-  if (SLP_TREE_VEC_DEFS (node).exists ())
-return;
-
-  gcc_assert (SLP_TREE_VEC_STMTS (node).is_empty ());
-
   /* Vectorize externals and constants.  */
   if (SLP_TREE_DEF_TYPE (node) == vect_constant_def
   || SLP_TREE_DEF_TYPE (node) == vect_external_def)
@@ -8793,10 +8787,18 @@ vect_schedule_slp_node (vec_info *vinfo,
   if (!SLP_TREE_VECTYPE (node))
return;
 
-  vect_create_constant_vectors (vinfo, node);
+  /* There are two reasons vector defs might already exist.  The first
+is that we are vectorizing an existing vector def.  The second is
+when performing BB vectorization shared constant/external nodes
+are not split apart during partitioning so during the code-gen
+DFS walk we can end up visiting them twice.  */
+  if (! SLP_TREE_VEC_DEFS (node).exists ())
+   vect_create_constant_vectors (vinfo, node);
   return;
 }
 
+  gcc_assert (SLP_TREE_VEC_DEFS (node).is_empty ());
+
   stmt_vec_info stmt_info = SLP_TREE_REPRESENTATIVE (node);
 
   gcc_assert (SLP_TREE_NUMBER_OF_VEC_STMTS (node) != 0);


[gcc r14-9596] c++: explicit inst of template method not generated [PR110323]

2024-03-21 Thread Marek Polacek via Gcc-cvs
https://gcc.gnu.org/g:081f8937cb82da311c224da04b0c6cbd57a8fb5d

commit r14-9596-g081f8937cb82da311c224da04b0c6cbd57a8fb5d
Author: Marek Polacek 
Date:   Thu Mar 7 20:41:23 2024 -0500

c++: explicit inst of template method not generated [PR110323]

Consider

  constexpr int VAL = 1;
  struct foo {
  template 
  void bar(typename std::conditional::type arg) { }
  };
  template void foo::bar<1>(int arg);

where we since r11-291 fail to emit the code for the explicit
instantiation.  That's because cp_walk_subtrees/TYPENAME_TYPE now
walks TYPE_CONTEXT ('conditional' here) as well, and in a template
finds the B==VAL template argument.  VAL is constexpr, which implies const,
which in the global scope implies static.  constrain_visibility_for_template
then makes "struct conditional<(B == VAL), int, float>" non-TREE_PUBLIC.
Then symtab_node::needed_p checks TREE_PUBLIC, sees it's 0, and we don't
emit any code.

I thought the fix would be some ODR-esque check to not consider
constexpr variables/fns that are used just for their value.  But
it turned out to be tricky.  For instance, we can't skip
determine_visibility in a template; we can't even skip it for value-dep
expressions.  For example, no-linkage-expr1.C has

  using P = struct {}*;
  template 
  void f(int(*)[((P)0, N)]) {}

where ((P)0, N) is value-dep, but N is not relevant here: we have to
ferret out the anonymous type.  When instantiating, it's already gone.

This patch uses decl_constant_var_p.  This is to implement (an
approximation) [basic.def.odr]#14.5.1 and [basic.def.odr]#5.2.

PR c++/110323

gcc/cp/ChangeLog:

* decl2.cc (min_vis_expr_r) : Do nothing for
decl_constant_var_p VAR_DECLs.

gcc/testsuite/ChangeLog:

* g++.dg/template/explicit-instantiation6.C: New test.
* g++.dg/template/explicit-instantiation7.C: New test.

Diff:
---
 gcc/cp/decl2.cc|  7 +++-
 .../g++.dg/template/explicit-instantiation6.C  | 43 ++
 .../g++.dg/template/explicit-instantiation7.C  | 22 +++
 3 files changed, 71 insertions(+), 1 deletion(-)

diff --git a/gcc/cp/decl2.cc b/gcc/cp/decl2.cc
index 2562d8aeff6..1339f210dde 100644
--- a/gcc/cp/decl2.cc
+++ b/gcc/cp/decl2.cc
@@ -2718,7 +2718,12 @@ min_vis_expr_r (tree *tp, int */*walk_subtrees*/, void 
*data)
   /* Fall through.  */
 case VAR_DECL:
 case FUNCTION_DECL:
-  if (! TREE_PUBLIC (t))
+  if (decl_constant_var_p (t))
+   /* The ODR allows definitions in different TUs to refer to distinct
+  constant variables with internal or no linkage, so such a reference
+  shouldn't affect visibility (PR110323).  FIXME but only if the
+  lvalue-rvalue conversion is applied.  */;
+  else if (! TREE_PUBLIC (t))
tpvis = VISIBILITY_ANON;
   else
tpvis = DECL_VISIBILITY (t);
diff --git a/gcc/testsuite/g++.dg/template/explicit-instantiation6.C 
b/gcc/testsuite/g++.dg/template/explicit-instantiation6.C
new file mode 100644
index 000..8b77c9deb20
--- /dev/null
+++ b/gcc/testsuite/g++.dg/template/explicit-instantiation6.C
@@ -0,0 +1,43 @@
+// PR c++/110323
+// { dg-do compile { target c++14 } }
+
+template
+struct conditional { using type = T; };
+
+template
+struct conditional { using type = F; };
+
+constexpr int VAL = 1;
+
+static constexpr int getval () { return 1; }
+
+template
+constexpr int TVAL = 1;
+
+static struct S {
+  constexpr operator bool() { return true; }
+} s;
+
+struct foo {
+template 
+void bar(typename conditional::type arg) { }
+
+template 
+void qux(typename conditional, int, float>::type arg) { }
+
+template 
+void sox(typename conditional::type arg) 
{ }
+
+template 
+void nim(typename conditional::type arg) { }
+};
+
+template void foo::bar<1>(int arg);
+template void foo::qux<1>(int arg);
+template void foo::sox<1>(int arg);
+template void foo::nim<1>(int arg);
+
+// { dg-final { scan-assembler 
"_ZN3foo3barILi1EEEvN11conditionalIXeqT_L_ZL3VALEEifE4typeE" } }
+// { dg-final { scan-assembler 
"_ZN3foo3quxILi1EEEvN11conditionalIXeqT_L_Z4TVALIiEEEifE4typeE" } }
+// { dg-final { scan-assembler 
"_ZN3foo3soxILi1EEEvN11conditionalIXeqT_nxL_ZL3VALEEifE4typeE" } }
+// { dg-final { scan-assembler 
"_ZN3foo3nimILi1EEEvN11conditionalIXneT_szL_ZL3VALEEifE4typeE" } }
diff --git a/gcc/testsuite/g++.dg/template/explicit-instantiation7.C 
b/gcc/testsuite/g++.dg/template/explicit-instantiation7.C
new file mode 100644
index 000..9a870e808fa
--- /dev/null
+++ b/gcc/testsuite/g++.dg/template/explicit-instantiation7.C
@@ -0,0 +1,22 @@
+// PR c++/110323
+// { dg-do compile { target c++11 } }
+
+using P = struct { }*;
+using N = struct A { }*;
+
+template
+struct conditional { using type = T; };
+
+struct foo {
+te

[gcc r14-9597] Fortran: improve array component description in runtime error message [PR30802]

2024-03-21 Thread Harald Anlauf via Gcc-cvs
https://gcc.gnu.org/g:509352069d6f166d396f4b4a86e71ea521f2ca78

commit r14-9597-g509352069d6f166d396f4b4a86e71ea521f2ca78
Author: Harald Anlauf 
Date:   Wed Mar 20 20:59:24 2024 +0100

Fortran: improve array component description in runtime error message 
[PR30802]

Runtime error messages for array bounds violation shall use the following
scheme for a coherent, abridged description of arrays or array components
of derived types:
(1) If x is an ordinary array variable, use "x"
(2) if z is a DT scalar and x an array component at level 1, use "z%x"
(3) if z is a DT scalar and x an array component at level > 1, or
if z is a DT array and x an array (at any level), use "z...%x"
Use a new helper function abridged_ref_name for construction of that name.

gcc/fortran/ChangeLog:

PR fortran/30802
* trans-array.cc (abridged_ref_name): New helper function.
(trans_array_bound_check): Use it.
(array_bound_check_elemental): Likewise.
(gfc_conv_array_ref): Likewise.

gcc/testsuite/ChangeLog:

PR fortran/30802
* gfortran.dg/bounds_check_17.f90: Adjust pattern.
* gfortran.dg/bounds_check_fail_8.f90: New test.

Diff:
---
 gcc/fortran/trans-array.cc| 132 ++
 gcc/testsuite/gfortran.dg/bounds_check_17.f90 |   2 +-
 gcc/testsuite/gfortran.dg/bounds_check_fail_8.f90 |  56 +
 3 files changed, 142 insertions(+), 48 deletions(-)

diff --git a/gcc/fortran/trans-array.cc b/gcc/fortran/trans-array.cc
index 0a453828bad..30b84762346 100644
--- a/gcc/fortran/trans-array.cc
+++ b/gcc/fortran/trans-array.cc
@@ -3485,6 +3485,78 @@ gfc_conv_array_ubound (tree descriptor, int dim)
 }
 
 
+/* Generate abridged name of a part-ref for use in bounds-check message.
+   Cases:
+   (1) for an ordinary array variable x return "x"
+   (2) for z a DT scalar and array component x (at level 1) return "z%%x"
+   (3) for z a DT scalar and array component x (at level > 1) or
+   for z a DT array and array x (at any number of levels): "z...%%x"
+ */
+
+static char *
+abridged_ref_name (gfc_expr * expr, gfc_array_ref * ar)
+{
+  gfc_ref *ref;
+  gfc_symbol *sym;
+  char *ref_name = NULL;
+  const char *comp_name = NULL;
+  int len_sym, last_len = 0, level = 0;
+  bool sym_is_array;
+
+  gcc_assert (expr->expr_type == EXPR_VARIABLE && expr->ref != NULL);
+
+  sym = expr->symtree->n.sym;
+  sym_is_array = (sym->ts.type != BT_CLASS
+ ? sym->as != NULL
+ : IS_CLASS_ARRAY (sym));
+  len_sym = strlen (sym->name);
+
+  /* Scan ref chain to get name of the array component (when ar != NULL) or
+ array section, determine depth and remember its component name.  */
+  for (ref = expr->ref; ref; ref = ref->next)
+{
+  if (ref->type == REF_COMPONENT
+ && strcmp (ref->u.c.component->name, "_data") != 0)
+   {
+ level++;
+ comp_name = ref->u.c.component->name;
+ continue;
+   }
+
+  if (ref->type != REF_ARRAY)
+   continue;
+
+  if (ar)
+   {
+ if (&ref->u.ar == ar)
+   break;
+   }
+  else if (ref->u.ar.type == AR_SECTION)
+   break;
+}
+
+  if (level > 0)
+last_len = strlen (comp_name);
+
+  /* Provide a buffer sufficiently large to hold "x...%%z".  */
+  ref_name = XNEWVEC (char, len_sym + last_len + 6);
+  strcpy (ref_name, sym->name);
+
+  if (level == 1 && !sym_is_array)
+{
+  strcat (ref_name, "%%");
+  strcat (ref_name, comp_name);
+}
+  else if (level > 0)
+{
+  strcat (ref_name, "...%%");
+  strcat (ref_name, comp_name);
+}
+
+  return ref_name;
+}
+
+
 /* Generate code to perform an array index bound check.  */
 
 static tree
@@ -3496,7 +3568,9 @@ trans_array_bound_check (gfc_se * se, gfc_ss *ss, tree 
index, int n,
   tree tmp_lo, tmp_up;
   tree descriptor;
   char *msg;
+  char *ref_name = NULL;
   const char * name = NULL;
+  gfc_expr *expr;
 
   if (!(gfc_option.rtcheck & GFC_RTCHECK_BOUNDS))
 return index;
@@ -3509,6 +3583,12 @@ trans_array_bound_check (gfc_se * se, gfc_ss *ss, tree 
index, int n,
   name = ss->info->expr->symtree->n.sym->name;
   gcc_assert (name != NULL);
 
+  /* When we have a component ref, get name of the array section.
+ Note that there can only be one part ref.  */
+  expr = ss->info->expr;
+  if (expr->ref && !compname)
+name = ref_name = abridged_ref_name (expr, NULL);
+
   if (VAR_P (descriptor))
 name = IDENTIFIER_POINTER (DECL_NAME (descriptor));
 
@@ -3562,6 +3642,7 @@ trans_array_bound_check (gfc_se * se, gfc_ss *ss, tree 
index, int n,
   free (msg);
 }
 
+  free (ref_name);
   return index;
 }
 
@@ -3573,36 +3654,17 @@ array_bound_check_elemental (gfc_se * se, gfc_ss * ss, 
gfc_expr * expr)
 {
   gfc_array_ref *ar;
   gfc_ref *ref;
-  gfc_symbol *sym;
   char *var_name = NULL;
-  size_t len;
   int dim;
 
   if (expr->

[gcc r14-9598] PR modula2/114418 missing import of TSIZE from system causes ICE

2024-03-21 Thread Gaius Mulley via Gcc-cvs
https://gcc.gnu.org/g:ba744d50ac0360f7992a42494db766f6548913e3

commit r14-9598-gba744d50ac0360f7992a42494db766f6548913e3
Author: Gaius Mulley 
Date:   Thu Mar 21 18:30:23 2024 +

PR modula2/114418 missing import of TSIZE from system causes ICE

This patch detects whether the symbol func is NulSym before generating
an error and if so just uses the token location and fixed string to
generate an error message.

gcc/m2/ChangeLog:

PR modula2/114418
* gm2-compiler/PCSymBuild.mod (PushConstFunctionType): Check
func against NulSym and issue an error.

gcc/testsuite/ChangeLog:

PR modula2/114418
* gm2/pim/fail/missingtsize.mod: New test.
* gm2/pim/fail/missingtsize2.mod: New test.

Signed-off-by: Gaius Mulley 

Diff:
---
 gcc/m2/gm2-compiler/PCSymBuild.mod   | 48 +---
 gcc/testsuite/gm2/pim/fail/missingtsize.mod  |  8 +
 gcc/testsuite/gm2/pim/fail/missingtsize2.mod |  8 +
 3 files changed, 53 insertions(+), 11 deletions(-)

diff --git a/gcc/m2/gm2-compiler/PCSymBuild.mod 
b/gcc/m2/gm2-compiler/PCSymBuild.mod
index e2165408781..9a6e8c06e70 100644
--- a/gcc/m2/gm2-compiler/PCSymBuild.mod
+++ b/gcc/m2/gm2-compiler/PCSymBuild.mod
@@ -1412,6 +1412,38 @@ BEGIN
 END buildConstFunction ;
 
 
+(*
+   ErrorConstFunction - generate an error message at functok using func in the
+error message providing it is not NulSym.
+*)
+
+PROCEDURE ErrorConstFunction (func: CARDINAL; functok: CARDINAL) ;
+BEGIN
+   IF func = NulSym
+   THEN
+  IF Iso
+  THEN
+ ErrorFormat0 (NewError (functok),
+   'the only functions permissible in a constant 
expression are: CAP, CHR, CMPLX, FLOAT, HIGH, IM, LENGTH, MAX, MIN, ODD, ORD, 
RE, SIZE, TSIZE, TRUNC, VAL and gcc builtins')
+  ELSE
+ ErrorFormat0 (NewError (functok),
+   'the only functions permissible in a constant 
expression are: CAP, CHR, FLOAT, HIGH, MAX, MIN, ODD, ORD, SIZE, TSIZE, TRUNC, 
VAL and gcc builtins')
+  END
+   ELSE
+  IF Iso
+  THEN
+ MetaErrorT1 (functok,
+  'the only functions permissible in a constant expression 
are: CAP, CHR, CMPLX, FLOAT, HIGH, IM, LENGTH, MAX, MIN, ODD, ORD, RE, SIZE, 
TSIZE, TRUNC, VAL and gcc builtins, but not {%1Ead}',
+  func)
+  ELSE
+ MetaErrorT1 (functok,
+  'the only functions permissible in a constant expression 
are: CAP, CHR, FLOAT, HIGH, MAX, MIN, ODD, ORD, SIZE, TSIZE, TRUNC, VAL and gcc 
builtins, but not {%1Ead}',
+  func)
+  END
+   END
+END ErrorConstFunction ;
+
+
 (*
PushConstFunctionType -
 *)
@@ -1426,7 +1458,10 @@ BEGIN
PopTtok (func, functok) ;
IF inDesignator
THEN
-  IF (func#Convert) AND
+  IF func = NulSym
+  THEN
+ ErrorConstFunction (func, functok)
+  ELSIF (func#Convert) AND
  (IsPseudoBaseFunction(func) OR
   IsPseudoSystemFunctionConstExpression(func) OR
   (IsProcedure(func) AND IsProcedureBuiltin(func)))
@@ -1442,16 +1477,7 @@ BEGIN
 WriteFormat0('a constant type conversion can only have one 
argument')
  END
   ELSE
- IF Iso
- THEN
-MetaErrorT1 (functok,
- 'the only functions permissible in a constant 
expression are: CAP, CHR, CMPLX, FLOAT, HIGH, IM, LENGTH, MAX, MIN, ODD, ORD, 
RE, SIZE, TSIZE, TRUNC, VAL and gcc builtins, but not {%1Ead}',
-func)
- ELSE
-MetaErrorT1 (functok,
- 'the only functions permissible in a constant 
expression are: CAP, CHR, FLOAT, HIGH, MAX, MIN, ODD, ORD, SIZE, TSIZE, TRUNC, 
VAL and gcc builtins, but not {%1Ead}',
-func)
- END
+ ErrorConstFunction (func, functok)
   END
END ;
PushTtok (func, functok)
diff --git a/gcc/testsuite/gm2/pim/fail/missingtsize.mod 
b/gcc/testsuite/gm2/pim/fail/missingtsize.mod
new file mode 100644
index 000..23ec055d7be
--- /dev/null
+++ b/gcc/testsuite/gm2/pim/fail/missingtsize.mod
@@ -0,0 +1,8 @@
+MODULE missingtsize ;
+
+CONST
+   NoOfBytes = TSIZE (CARDINAL) ;
+
+BEGIN
+
+END missingtsize.
diff --git a/gcc/testsuite/gm2/pim/fail/missingtsize2.mod 
b/gcc/testsuite/gm2/pim/fail/missingtsize2.mod
new file mode 100644
index 000..8e859445ab0
--- /dev/null
+++ b/gcc/testsuite/gm2/pim/fail/missingtsize2.mod
@@ -0,0 +1,8 @@
+MODULE missingtsize2 ;
+
+CONST
+   NoOfBytes = TSIZE (CARDINAL) * 4 ;
+
+BEGIN
+
+END missingtsize2.
\ No newline at end of file


[gcc r14-9599] PR modula2/113836 gm2 does not dump gimple or quadruples to file

2024-03-21 Thread Gaius Mulley via Gcc-cvs
https://gcc.gnu.org/g:48d49200510198cafcab55601cd8e5f8eb541f01

commit r14-9599-g48d49200510198cafcab55601cd8e5f8eb541f01
Author: Gaius Mulley 
Date:   Thu Mar 21 19:38:03 2024 +

PR modula2/113836 gm2 does not dump gimple or quadruples to file

This patch provides the localized modula2 changes to gcc/m2
which facilitate the dumping of gimple and quadruples to file.
PR modula2/113836 will be full complete after a subsequent patch
adding changes to lang.opt and documentation.  The lang.opt
patch requires all language bootstrap regression testing whereas
this patch is isolated to gcc/m2 and only the m2 language.

gcc/m2/ChangeLog:

PR modula2/113836
* Make-lang.in (GM2_C_OBJS): Add m2/gm2-gcc/m2pp.o.
(m2/m2pp.o): Remove rule.
(GM2-COMP-BOOT-DEFS): Add M2LangDump.def.
(GM2-COMP-BOOT-MODS): Add M2LangDump.mod.
(GM2-GCC-DEFS): Add M2LangDump.def.
(GM2-GCC-MODS): Add M2LangDump.mod.
* gm2-compiler/M2CaseList.mod (WriteCase): Rewrite.
* gm2-compiler/M2Code.mod (DoModuleDeclare): Call
DumpFilteredResolver depending upon DumpLangDecl.
(DoCodeBlock): Call CreateDumpGimple depending upon
DumpLangGimple.
(Code): Replace DisplayQuadList blocks with DumpQuadruples.
(DisplayQuadsInScope): Remove.
(DisplayQuadNumbers): Remove.
(CodeBlock): Rewrite.
* gm2-compiler/M2GCCDeclare.def (IncludeDumpSymbol): New procedure.
(DumpFilteredResolver): New procedure.
(DumpFilteredDefinitive): New procedure.
* gm2-compiler/M2GCCDeclare.mod (IncludeDumpSymbol): New procedure.
(DumpFilteredResolver): New procedure.
(DumpFilteredDefinitive): New procedure.
(doInclude): Rewrite to use GetDumpFile.
(WatchIncludeList): Remove fixed debugging value.
(doExclude): Rewrite to use GetDumpFile.
(DeclareTypesConstantsProceduresInRange): Remove fixed debugging
values.
(PreAddModGcc): Rename parameter t as tree.
(IncludeGetNth): Rewrite to use GetDumpFile.
(IncludeType): Ditto.
(IncludeSubscript): Ditto.
(PrintLocalSymbol): Ditto.
(PrintLocalSymbols): Ditto.
(IncludeGetVarient): Ditto.
(PrintDeclared): Ditto.
(PrintAlignment): Ditto.
(PrintDecl): Ditto.
(PrintScope): Ditto.
(PrintProcedure): Ditto.
(PrintSym): Ditto.
(PrintSymbol): Ditto.
(PrintTerse): Ditto.
* gm2-compiler/M2Options.def (GetDumpLangDeclFilename): New
procedure function.
(SetDumpLangDeclFilename): New procedure.
(GetDumpLangQuadFilename): New procedure function.
(SetDumpLangQuadFilename): New procedure.
(GetDumpLangGimpleFilename): New procedure function.
(SetDumpLangGimpleFilename): New procedure.
(SetM2DumpFilter): New procedure.
(GetM2DumpFilter): New procedure function.
(GetDumpLangGimple): New procedure function.
* gm2-compiler/M2Options.mod (GetDumpLangDeclFilename): New
procedure function.
(SetDumpLangDeclFilename): New procedure.
(GetDumpLangQuadFilename): New procedure function.
(SetDumpLangQuadFilename): New procedure.
(GetDumpLangGimpleFilename): New procedure function.
(SetDumpLangGimpleFilename): New procedure.
(SetM2DumpFilter): New procedure.
(GetM2DumpFilter): New procedure function.
(GetDumpLangGimple): New procedure function.
* gm2-compiler/M2Quads.def (DumpQuadruples): New procedure.
* gm2-compiler/M2Quads.mod (DumpUntil): New procedure.
(GetCtorInit): New procedure function.
(GetCtorFini): New procedure function.
(DumpQuadrupleFilter): New procedure function.
(DumpQuadrupleAll): New procedure.
(DisplayQuadList): Remove procedure.
(DumpQuadruples): New procedure.
(DisplayQuadRange): Rewrite.
(DisplayQuad): Ditto.
(DisplayProcedureAttributes): Ditto.
(WriteOperator): Ditto.
(WriteMode): Ditto.
* gm2-compiler/M2Scope.mod (ForeachScopeBlockDo2): Replace
DisplayQuadruples with TraceQuadruples.
(ForeachScopeBlockDo3): Replace DisplayQuadruples with
TraceQuadruples.
* gm2-compiler/SymbolConversion.def (Gcc2Mod): New procedure 
function.
* gm2-compiler/SymbolConversion.mod: New procedure function.
* gm2-gcc/m2misc.cc (m2misc_DebugTree): New function.
(m2misc_DebugTreeChain): New function.
* gm2-gcc/m2options.h (M2Options_GetDumpLangDeclF

[gcc r14-9600] analyzer: fix ignored constraints involving casts [PR113619]

2024-03-21 Thread David Malcolm via Gcc-cvs
https://gcc.gnu.org/g:7a5a4a4467b2e18ff4fe24f565e120280d3e6ba7

commit r14-9600-g7a5a4a4467b2e18ff4fe24f565e120280d3e6ba7
Author: David Malcolm 
Date:   Thu Mar 21 17:48:38 2024 -0400

analyzer: fix ignored constraints involving casts [PR113619]

gcc/analyzer/ChangeLog:
PR analyzer/113619
* region-model.cc (region_model::eval_condition): Fix
cast-handling from r14-3632-ge7b267444045c5 so that if those give
an unknown result, we continue trying the constraint manager.

gcc/testsuite/ChangeLog:
PR analyzer/113619
* c-c++-common/analyzer/taint-divisor-pr113619.c: New test.

Signed-off-by: David Malcolm 

Diff:
---
 gcc/analyzer/region-model.cc   | 24 --
 .../c-c++-common/analyzer/taint-divisor-pr113619.c | 29 ++
 2 files changed, 46 insertions(+), 7 deletions(-)

diff --git a/gcc/analyzer/region-model.cc b/gcc/analyzer/region-model.cc
index c3a4ec7bcfc..902b887fc07 100644
--- a/gcc/analyzer/region-model.cc
+++ b/gcc/analyzer/region-model.cc
@@ -4704,17 +4704,27 @@ region_model::eval_condition (const svalue *lhs,
 if (lhs_un_op && CONVERT_EXPR_CODE_P (lhs_un_op->get_op ())
&& rhs_un_op && CONVERT_EXPR_CODE_P (rhs_un_op->get_op ())
&& lhs_type == rhs_type)
-  return eval_condition (lhs_un_op->get_arg (),
-op,
-rhs_un_op->get_arg ());
-
+  {
+   tristate res = eval_condition (lhs_un_op->get_arg (),
+  op,
+  rhs_un_op->get_arg ());
+   if (res.is_known ())
+ return res;
+  }
 else if (lhs_un_op && CONVERT_EXPR_CODE_P (lhs_un_op->get_op ())
 && lhs_type == rhs_type)
-  return eval_condition (lhs_un_op->get_arg (), op, rhs);
-
+  {
+   tristate res = eval_condition (lhs_un_op->get_arg (), op, rhs);
+   if (res.is_known ())
+ return res;
+  }
 else if (rhs_un_op && CONVERT_EXPR_CODE_P (rhs_un_op->get_op ())
 && lhs_type == rhs_type)
-  return eval_condition (lhs, op, rhs_un_op->get_arg ());
+  {
+   tristate res = eval_condition (lhs, op, rhs_un_op->get_arg ());
+   if (res.is_known ())
+ return res;
+  }
   }
 
   /* Otherwise, try constraints.
diff --git a/gcc/testsuite/c-c++-common/analyzer/taint-divisor-pr113619.c 
b/gcc/testsuite/c-c++-common/analyzer/taint-divisor-pr113619.c
new file mode 100644
index 000..15c881247ce
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/analyzer/taint-divisor-pr113619.c
@@ -0,0 +1,29 @@
+/* Reduced from false positive in Linux kernel's fs/ceph/ioctl.c: */
+
+__extension__ typedef unsigned long long __u64;
+
+struct ceph_ioctl_layout
+{
+  __u64 stripe_unit, object_size;
+};
+static long
+__validate_layout(struct ceph_ioctl_layout* l)
+{
+  if ((l->object_size & ~(~(((1UL) << 12) - 1))) ||
+  (l->stripe_unit & ~(~(((1UL) << 12) - 1))) ||
+  ((unsigned)l->stripe_unit != 0 &&
+   ((unsigned)l->object_size % (unsigned)l->stripe_unit))) /* { dg-bogus 
"use of attacker-controlled value 'l.stripe_unit' as divisor without checking 
for zero" "PR analyzer/113619" } */
+return -22;
+  return 0;
+}
+
+long
+__attribute__((tainted_args))
+ceph_ioctl_set_layout_policy(struct ceph_ioctl_layout l)
+{
+  int err;
+  err = __validate_layout(&l);
+  if (err)
+return err;
+  return err;
+}


[gcc r14-9602] PR modula2/114422 Attempting to declare a set of unknown type causes ICE

2024-03-21 Thread Gaius Mulley via Gcc-cvs
https://gcc.gnu.org/g:1542e8a44cc35e63233d3557afbf501c5ff84c55

commit r14-9602-g1542e8a44cc35e63233d3557afbf501c5ff84c55
Author: Gaius Mulley 
Date:   Fri Mar 22 01:47:31 2024 +

PR modula2/114422 Attempting to declare a set of unknown type causes ICE

This patch corrects an error message directive which did not
escape the { character.  The patch also contains test cases
to stress set declaration errors.

gcc/m2/ChangeLog:

PR modula2/114422
* gm2-compiler/M2Quads.mod (BuildConstructor): Add escape
character.

gcc/testsuite/ChangeLog:

PR modula2/114422
* gm2/iso/fail/badset.mod: New test.
* gm2/iso/fail/badset2.mod: New test.
* gm2/iso/fail/badset3.mod: New test.

Signed-off-by: Gaius Mulley 

Diff:
---
 gcc/m2/gm2-compiler/M2Quads.mod| 2 +-
 gcc/testsuite/gm2/iso/fail/badset.mod  | 7 +++
 gcc/testsuite/gm2/iso/fail/badset2.mod | 9 +
 gcc/testsuite/gm2/iso/fail/badset3.mod | 9 +
 4 files changed, 26 insertions(+), 1 deletion(-)

diff --git a/gcc/m2/gm2-compiler/M2Quads.mod b/gcc/m2/gm2-compiler/M2Quads.mod
index ac654e89c91..52d72f6cd62 100644
--- a/gcc/m2/gm2-compiler/M2Quads.mod
+++ b/gcc/m2/gm2-compiler/M2Quads.mod
@@ -12172,7 +12172,7 @@ BEGIN
IF type = NulSym
THEN
   MetaErrorT0 (tokcbrpos,
-   '{%E}constructor requires a type before the opening {')
+   '{%E}constructor requires a type before the opening %{')
ELSE
   ChangeToConstructor (tok, type) ;
   PutConstructorFrom (constValue, type) ;
diff --git a/gcc/testsuite/gm2/iso/fail/badset.mod 
b/gcc/testsuite/gm2/iso/fail/badset.mod
new file mode 100644
index 000..8bfc49f948e
--- /dev/null
+++ b/gcc/testsuite/gm2/iso/fail/badset.mod
@@ -0,0 +1,7 @@
+MODULE badset ;
+
+VAR
+   set: set ;
+BEGIN
+
+END badset.
diff --git a/gcc/testsuite/gm2/iso/fail/badset2.mod 
b/gcc/testsuite/gm2/iso/fail/badset2.mod
new file mode 100644
index 000..d883d1ffe75
--- /dev/null
+++ b/gcc/testsuite/gm2/iso/fail/badset2.mod
@@ -0,0 +1,9 @@
+MODULE badset2 ;
+
+TYPE
+   userset = SET OF CHAR ;
+VAR
+   set: userset ;
+BEGIN
+   set := set {}
+END badset2.
diff --git a/gcc/testsuite/gm2/iso/fail/badset3.mod 
b/gcc/testsuite/gm2/iso/fail/badset3.mod
new file mode 100644
index 000..29131ff4c5a
--- /dev/null
+++ b/gcc/testsuite/gm2/iso/fail/badset3.mod
@@ -0,0 +1,9 @@
+MODULE badset3 ;
+
+TYPE
+   userset = SET OF CHAR ;
+VAR
+   set: userset ;
+BEGIN
+   set := unknown {}
+END badset3.


[gcc r14-9603] Move pr114396.c from gcc.target/i386 to gcc.c-torture/execute.

2024-03-21 Thread hongtao Liu via Gcc-cvs
https://gcc.gnu.org/g:9a6c7aa1b011b77fcd9b19f7b8d7ff0fc823cdb2

commit r14-9603-g9a6c7aa1b011b77fcd9b19f7b8d7ff0fc823cdb2
Author: liuhongt 
Date:   Fri Mar 22 10:09:43 2024 +0800

Move pr114396.c from gcc.target/i386 to gcc.c-torture/execute.

Also fixed a typo in the testcase.

gcc/testsuite/ChangeLog:

PR tree-optimization/114396
* gcc.target/i386/pr114396.c: Move to...
* gcc.c-torture/execute/pr114396.c: ...here.

Diff:
---
 gcc/testsuite/{gcc.target/i386 => gcc.c-torture/execute}/pr114396.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/gcc/testsuite/gcc.target/i386/pr114396.c 
b/gcc/testsuite/gcc.c-torture/execute/pr114396.c
similarity index 92%
rename from gcc/testsuite/gcc.target/i386/pr114396.c
rename to gcc/testsuite/gcc.c-torture/execute/pr114396.c
index 4c4015f871f..baf90eafabf 100644
--- a/gcc/testsuite/gcc.target/i386/pr114396.c
+++ b/gcc/testsuite/gcc.c-torture/execute/pr114396.c
@@ -1,5 +1,5 @@
-/* { dg-do run } */
-/* { dg-options "-O1 -fwrapv -fno-vect-cost-model" } */
+/* PR tree-optimization/114396 */
+/* { dg-additional-options "-fwrapv -fno-vect-cost-model" } */
 
 short a = 0xF;
 short b[16];
@@ -88,7 +88,7 @@ int main() {
 
   exp = foo1 (a);
   res = foo1_o3 (a);
-  if (uexp != ures)
+  if (exp != res)
 __builtin_abort ();
 
   uexp = foou (a);


[gcc r13-8488] Move pr114396.c from gcc.target/i386 to gcc.c-torture/execute.

2024-03-21 Thread hongtao Liu via Gcc-cvs
https://gcc.gnu.org/g:e6a3d1f5bcfd954b614155d96c97bde8ac230e2e

commit r13-8488-ge6a3d1f5bcfd954b614155d96c97bde8ac230e2e
Author: liuhongt 
Date:   Fri Mar 22 10:09:43 2024 +0800

Move pr114396.c from gcc.target/i386 to gcc.c-torture/execute.

Also fixed a typo in the testcase.

gcc/testsuite/ChangeLog:

PR tree-optimization/114396
* gcc.target/i386/pr114396.c: Move to...
* gcc.c-torture/execute/pr114396.c: ...here.

(cherry picked from commit 9a6c7aa1b011b77fcd9b19f7b8d7ff0fc823cdb2)

Diff:
---
 gcc/testsuite/{gcc.target/i386 => gcc.c-torture/execute}/pr114396.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/gcc/testsuite/gcc.target/i386/pr114396.c 
b/gcc/testsuite/gcc.c-torture/execute/pr114396.c
similarity index 92%
rename from gcc/testsuite/gcc.target/i386/pr114396.c
rename to gcc/testsuite/gcc.c-torture/execute/pr114396.c
index 4c4015f871f..baf90eafabf 100644
--- a/gcc/testsuite/gcc.target/i386/pr114396.c
+++ b/gcc/testsuite/gcc.c-torture/execute/pr114396.c
@@ -1,5 +1,5 @@
-/* { dg-do run } */
-/* { dg-options "-O1 -fwrapv -fno-vect-cost-model" } */
+/* PR tree-optimization/114396 */
+/* { dg-additional-options "-fwrapv -fno-vect-cost-model" } */
 
 short a = 0xF;
 short b[16];
@@ -88,7 +88,7 @@ int main() {
 
   exp = foo1 (a);
   res = foo1_o3 (a);
-  if (uexp != ures)
+  if (exp != res)
 __builtin_abort ();
 
   uexp = foou (a);


[gcc r14-9604] RISC-V: Bugfix ICE for __attribute__((target("arch=+v"))

2024-03-21 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:d3c24e9e55a7cf18df313a8b32b6de4b3ba81013

commit r14-9604-gd3c24e9e55a7cf18df313a8b32b6de4b3ba81013
Author: Pan Li 
Date:   Mon Mar 18 11:21:29 2024 +0800

RISC-V: Bugfix ICE for __attribute__((target("arch=+v"))

This patch would like to fix one ICE for __attribute__((target("arch=+v"))
and likewise extension(s). Given we have sample code as below:

void __attribute__((target("arch=+v")))
test_2 (int *a, int *b, int *out, unsigned count)
{
  unsigned i;
  for (i = 0; i < count; i++)
   out[i] = a[i] + b[i];
}

It will have ICE when build with -march=rv64gc -O3.

test.c: In function ‘test_2’:
test.c:4:1: internal compiler error: Floating point exception
4 | {
  | ^
0x1a5891b crash_signal
.../__RISC-V_BUILD__/../gcc/toplev.cc:319
0x7f0a7884251f ???
./signal/../sysdeps/unix/sysv/linux/x86_64/libc_sigaction.c:0
0x1f51ba4 riscv_hard_regno_nregs
.../__RISC-V_BUILD__/../gcc/config/riscv/riscv.cc:8143
0x1967bb9 init_reg_modes_target()
.../__RISC-V_BUILD__/../gcc/reginfo.cc:471
0x13fc029 init_emit_regs()
.../__RISC-V_BUILD__/../gcc/emit-rtl.cc:6237
0x1a5b83d target_reinit()
.../__RISC-V_BUILD__/../gcc/toplev.cc:1936
0x35e374d save_target_globals()
.../__RISC-V_BUILD__/../gcc/target-globals.cc:92
0x35e381f save_target_globals_default_opts()
.../__RISC-V_BUILD__/../gcc/target-globals.cc:122
0x1f544cc riscv_save_restore_target_globals(tree_node*)
.../__RISC-V_BUILD__/../gcc/config/riscv/riscv.cc:9138
0x1f55c36 riscv_set_current_function
...

There are two reasons for this ICE.
1. The implied extension(s) of v are not well handled and the
   TARGET_MIN_VLEN is 0 which is not reinitialized.  Then the
   size / TARGET_MIN_VLEN will have DivideByZero.
2. The machine modes of the vector types will be vary after
   the v extension is introduced.

This patch passed below testsuite:
1. The riscv fully regression test.

PR target/114352

gcc/ChangeLog:

* common/config/riscv/riscv-common.cc (riscv_subset_list::parse):
Replace implied, combine and check to func finalize.
(riscv_subset_list::finalize): New func impl to take care of
implied, combine ext and related checks.
* config/riscv/riscv-subset.h: Add func decl for finalize.
* config/riscv/riscv-target-attr.cc 
(riscv_target_attr_parser::parse_arch):
Finalize the ext before return succeed.
* config/riscv/riscv.cc (riscv_set_current_function): Reinit the
machine mode before when set cur function.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/pr114352-1.c: New test.
* gcc.target/riscv/rvv/base/pr114352-2.c: New test.

Signed-off-by: Pan Li 

Diff:
---
 gcc/common/config/riscv/riscv-common.cc| 31 
 gcc/config/riscv/riscv-subset.h|  2 +
 gcc/config/riscv/riscv-target-attr.cc  |  2 +
 gcc/config/riscv/riscv.cc  |  4 ++
 .../gcc.target/riscv/rvv/base/pr114352-1.c | 58 ++
 .../gcc.target/riscv/rvv/base/pr114352-2.c | 27 ++
 6 files changed, 114 insertions(+), 10 deletions(-)

diff --git a/gcc/common/config/riscv/riscv-common.cc 
b/gcc/common/config/riscv/riscv-common.cc
index 440127a2af0..15d44245b3c 100644
--- a/gcc/common/config/riscv/riscv-common.cc
+++ b/gcc/common/config/riscv/riscv-common.cc
@@ -1428,16 +1428,7 @@ riscv_subset_list::parse (const char *arch, location_t 
loc)
   if (p == NULL)
 goto fail;
 
-  for (itr = subset_list->m_head; itr != NULL; itr = itr->next)
-{
-  subset_list->handle_implied_ext (itr->name.c_str ());
-}
-
-  /* Make sure all implied extensions are included. */
-  gcc_assert (subset_list->check_implied_ext ());
-
-  subset_list->handle_combine_ext ();
-  subset_list->check_conflict_ext ();
+  subset_list->finalize ();
 
   return subset_list;
 
@@ -1467,6 +1458,26 @@ riscv_subset_list::set_loc (location_t loc)
   m_loc = loc;
 }
 
+/* Make sure the implied or combined extension is included after add
+   a new std extension to subset list or likewise.  For exmaple as below,
+
+   void __attribute__((target("arch=+v"))) func () with -march=rv64gc.
+
+   The implied zvl128b and zve64d of the std v should be included.  */
+void
+riscv_subset_list::finalize ()
+{
+  riscv_subset_t *subset;
+
+  for (subset = m_head; subset != NULL; subset = subset->next)
+handle_implied_ext (subset->name.c_str ());
+
+  gcc_assert (check_implied_ext ());
+
+  handle_combine_ext ();
+  check_conflict_ext ();
+}
+
 /* Return the current arch string.  */
 
 std::string
diff --git a/gcc/config/riscv/riscv-subset.h b/gcc/config/riscv/riscv-subset.h
index ae849e2a302.

[gcc r14-9605] RISC-V: Bugfix function target attribute pollution

2024-03-21 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:9941f0295a14659e25260458efd2e46a68ad0342

commit r14-9605-g9941f0295a14659e25260458efd2e46a68ad0342
Author: Pan Li 
Date:   Tue Mar 19 09:43:24 2024 +0800

RISC-V: Bugfix function target attribute pollution

This patch depends on below ICE fix.

https://gcc.gnu.org/pipermail/gcc-patches/2024-March/647915.html

The function target attribute should be on a per-function basis.
For example, we have 3 function as below:

void test_1 () {}

void __attribute__((target("arch=+v"))) test_2 () {}

void __attribute__((target("arch=+zfh"))) test_3 () {}

void test_4 () {}

The scope of the target attribute should not extend the function body.
Aka, test_3 cannot have the 'v' extension, as well as the test_4
cannot have both the 'v' and 'zfh' extension.

Unfortunately, for now the test_4 is able to leverage the 'v' and
the 'zfh' extension which is incorrect.  This patch would like to
fix the sticking attribute by introduce the commandline subset_list.
When parse_arch, we always clone from the cmdline_subset_list instead
of the current_subset_list.

Meanwhile, we correct the print information about arch like below.

.option arch, rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_zicsr2p0_zifencei2p0_zbb1p0

The riscv_declare_function_name hook is always after the hook
riscv_process_target_attr.  Thus, we introduce one hash_map to record
the 1:1 mapping from fndel to its' subset_list in advance.  And later
the riscv_declare_function_name is able to get the right information
about the arch.

Below test are passed for this patch
* The riscv fully regression test.

PR target/114352

gcc/ChangeLog:

* common/config/riscv/riscv-common.cc (struct 
riscv_func_target_info):
New struct for func decl and target name.
(struct riscv_func_target_hasher): New hasher for hash table mapping
from the fn_decl to fn_target_name.
(riscv_func_decl_hash): New func to compute the hash for fn_decl.
(riscv_func_target_hasher::hash): New func to impl hash interface.
(riscv_func_target_hasher::equal): New func to impl equal interface.
(riscv_cmdline_subset_list): New static var for cmdline subset list.
(riscv_func_target_table_lazy_init): New func to lazy init the func
target hash table.
(riscv_func_target_get): New func to get target name from hash 
table.
(riscv_func_target_put): New func to put target name into hash 
table.
(riscv_func_target_remove_and_destory): New func to remove target
info from the hash table and destory it.
(riscv_parse_arch_string): Set the static var cmdline_subset_list.
* config/riscv/riscv-subset.h (riscv_cmdline_subset_list): New 
static
var for cmdline subset list.
(riscv_func_target_get): New func decl.
(riscv_func_target_put): Ditto.
(riscv_func_target_remove_and_destory): Ditto.
* config/riscv/riscv-target-attr.cc 
(riscv_target_attr_parser::parse_arch):
Take cmdline_subset_list instead of current_subset_list when clone.
(riscv_process_target_attr): Record the func target info to hash 
table.
(riscv_option_valid_attribute_p): Add new arg tree fndel.
* config/riscv/riscv.cc (riscv_declare_function_name): Consume the
func target info and print the arch message.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/pr114352-3.c: New test.

Signed-off-by: Pan Li 

Diff:
---
 gcc/common/config/riscv/riscv-common.cc| 105 ++-
 gcc/config/riscv/riscv-subset.h|   4 +
 gcc/config/riscv/riscv-target-attr.cc  |  18 +++-
 gcc/config/riscv/riscv.cc  |   7 +-
 .../gcc.target/riscv/rvv/base/pr114352-3.c | 113 +
 5 files changed, 240 insertions(+), 7 deletions(-)

diff --git a/gcc/common/config/riscv/riscv-common.cc 
b/gcc/common/config/riscv/riscv-common.cc
index 15d44245b3c..7095f303cbb 100644
--- a/gcc/common/config/riscv/riscv-common.cc
+++ b/gcc/common/config/riscv/riscv-common.cc
@@ -426,11 +426,108 @@ bool riscv_subset_list::parse_failed = false;
 
 static riscv_subset_list *current_subset_list = NULL;
 
+static riscv_subset_list *cmdline_subset_list = NULL;
+
+struct riscv_func_target_info
+{
+  tree fn_decl;
+  std::string fn_target_name;
+
+  riscv_func_target_info (const tree &decl, const std::string &target_name)
+: fn_decl (decl), fn_target_name (target_name)
+  {
+  }
+};
+
+struct riscv_func_target_hasher : nofree_ptr_hash
+{
+  typedef tree compare_type;
+
+  static hashval_t hash (value_type);
+  static bool equal (value_type, const compare_type &);
+};
+
+static hash_table *f

[gcc r14-9606] [committed] Fix RISC-V missing stack tie

2024-03-21 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:c65046ff2ef0a9a46e59bc0b3369b2d226f6a239

commit r14-9606-gc65046ff2ef0a9a46e59bc0b3369b2d226f6a239
Author: Jeff Law 
Date:   Thu Mar 21 20:41:59 2024 -0600

[committed] Fix RISC-V missing stack tie

As some of you know, Raphael has been working on stack-clash support for the
RISC-V port.  A little while ago Florian reached out to us with an issue 
where
glibc was failing its smoke test due to referencing an unallocated stack 
slot.

Without diving into the code in detail I (incorrectly) concluded it was a
problem with the fallback of using Ada's stack-check paths due to not having
stack-clash support.

Once enough stack-clash bits were ready I had Raphael review the code 
generated
for Florian's test and we concluded the the original case from Florian was 
just
wrong irrespective of stack clash/stack check.  While Raphael's stack-clash
work will indirectly fix Florian's case, it really should also work without
stack-clash.

In particular this code was called out by valgrind:

> 0003cb5e :
> __GI___realpath():
>3cb5e:   81010113addisp,sp,-2032
>3cb62:   7d313423sd  s3,1992(sp)
>3cb66:   79fdlui s3,0xf
>3cb68:   7e813023sd  s0,2016(sp)
>3cb6c:   7c913c23sd  s1,2008(sp)
>3cb70:   7f010413addis0,sp,2032
>3cb74:   35098793addia5,s3,848 # 
f350 <__libc_initial+0xffe8946a>
>3cb78:   74fdlui s1,0xf
>3cb7a:   008789b3add s3,a5,s0
>3cb7e:   f9048793addia5,s1,-112 # 
ef90 <__libc_initial+0xffe890aa>
>3cb82:   008784b3add s1,a5,s0
>3cb86:   77fdlui a5,0xf
>3cb88:   7d413023sd  s4,1984(sp)
>3cb8c:   7b513c23sd  s5,1976(sp)
>3cb90:   7e113423sd  ra,2024(sp)
>3cb94:   7d213823sd  s2,2000(sp)
>3cb98:   7b613823sd  s6,1968(sp)
>3cb9c:   7b713423sd  s7,1960(sp)
>3cba0:   7b813023sd  s8,1952(sp)
>3cba4:   79913c23sd  s9,1944(sp)
>3cba8:   79a13823sd  s10,1936(sp)
>3cbac:   79b13423sd  s11,1928(sp)
>3cbb0:   34878793addia5,a5,840 # 
f348 <__libc_initial+0xffe89462>
>3cbb4:   4713li  a4,1024
>3cbb8:   00132a17auipc   s4,0x132
>3cbbc:   ae0a3a03ld  s4,-1312(s4) # 16e698 
<__stack_chk_guard>
>3cbc0:   01098893addia7,s3,16
>3cbc4:   42098693addia3,s3,1056
>3cbc8:   b8040a93addis5,s0,-1152
>3cbcc:   97a2add a5,a5,s0
>3cbce:   000a3603ld  a2,0(s4)
>3cbd2:   f8c43423sd  a2,-120(s0)
>3cbd6:   4601li  a2,0
>3cbd8:   3d14b023sd  a7,960(s1)
>3cbdc:   3ce4b423sd  a4,968(s1)
>3cbe0:   7cd4b823sd  a3,2000(s1)
>3cbe4:   7ce4bc23sd  a4,2008(s1)
>3cbe8:   b7543823sd  s5,-1168(s0)
>3cbec:   b6e43c23sd  a4,-1160(s0)
>3cbf0:   e38csd  a1,0(a5)
>3cbf2:   b0010113addisp,sp,-1280
In particular note the store at 0x3cbd8.  That's hitting (s1 + 960). If you
chase the values around, you'll find it's a bit more than 1k into 
unallocated
stack space.  It's also worth noting the final stack adjustment at 0x3cbf2.

While I haven't reproduced Florian's code exactly, I was able to get 
reasonably
close and verify my suspicion that everything was fine before sched2 and
incorrect after sched2.  It was also obvious at that point what had gone 
wrong
-- we were missing a stack tie after the final stack pointer adjustment.

This patch adds the missing stack tie.

While not technically a regression, I shudder at the thought of chasing one 
of
these issues down again in the wild.  Been there, done that.

Regression tested on rv64gc.  Verified the scheduler no longer mucked up
realpath by hand.  Pushing to the trunk.

gcc/
* config/riscv/riscv.cc (riscv_expand_prologue): Add missing stack
 

[gcc(refs/users/meissner/heads/work163-dmf)] Add support for XVRL instruction.

2024-03-21 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:6ff874d066d523bd6b71e2f944f5740f651ed022

commit 6ff874d066d523bd6b71e2f944f5740f651ed022
Author: Michael Meissner 
Date:   Thu Mar 21 23:39:11 2024 -0400

Add support for XVRL instruction.

2024-03-21  Michael Meissner  

gcc/

* config/rs6000/altivec.md (xvrlw): New insn.

Diff:
---
 gcc/config/rs6000/altivec.md | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index 4d4c94ff0a0..da5db49d3af 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -1883,6 +1883,17 @@
 }
   [(set_attr "type" "vecperm")])
 
+;; Future cpu adds a vector rotate left word variant
+(define_insn "*xvrlw"
+  [(set (match_operand:V4SI 0 "register_operand" "=v,wa")
+   (rotate:V4SI (match_operand:V4SI 1 "register_operand" "v,wa")
+(match_operand:V4SI 2 "register_operand" "v,wa")))]
+  "TARGET_FUTURE"
+  "@
+   vrlw %0,%1,%2
+   xvrlw %x0,%x1,%x2"
+  [(set_attr "type" "vecsimple")])
+
 (define_insn "altivec_vrl"
   [(set (match_operand:VI2 0 "register_operand" "=v")
 (rotate:VI2 (match_operand:VI2 1 "register_operand" "v")


[gcc(refs/users/meissner/heads/work163-dmf)] Revert all changes

2024-03-21 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:d49aa664ce768f629c858158eca406991a66da85

commit d49aa664ce768f629c858158eca406991a66da85
Author: Michael Meissner 
Date:   Thu Mar 21 23:40:14 2024 -0400

Revert all changes

Diff:
---
 gcc/config/rs6000/altivec.md | 11 ---
 1 file changed, 11 deletions(-)

diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index da5db49d3af..4d4c94ff0a0 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -1883,17 +1883,6 @@
 }
   [(set_attr "type" "vecperm")])
 
-;; Future cpu adds a vector rotate left word variant
-(define_insn "*xvrlw"
-  [(set (match_operand:V4SI 0 "register_operand" "=v,wa")
-   (rotate:V4SI (match_operand:V4SI 1 "register_operand" "v,wa")
-(match_operand:V4SI 2 "register_operand" "v,wa")))]
-  "TARGET_FUTURE"
-  "@
-   vrlw %0,%1,%2
-   xvrlw %x0,%x1,%x2"
-  [(set_attr "type" "vecsimple")])
-
 (define_insn "altivec_vrl"
   [(set (match_operand:VI2 0 "register_operand" "=v")
 (rotate:VI2 (match_operand:VI2 1 "register_operand" "v")


[gcc(refs/users/meissner/heads/work163-dmf)] Add support for XVRL instruction.

2024-03-21 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:145bc7b00c10cf6e59897aba7f61c3a24c85ca0b

commit 145bc7b00c10cf6e59897aba7f61c3a24c85ca0b
Author: Michael Meissner 
Date:   Thu Mar 21 23:49:02 2024 -0400

Add support for XVRL instruction.

2024-03-21  Michael Meissner  

gcc/

* config/rs6000/altivec.md (futue_xvrlw): New insn.

Diff:
---
 gcc/config/rs6000/altivec.md | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index 4d4c94ff0a0..afe3d72316c 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -1883,6 +1883,20 @@
 }
   [(set_attr "type" "vecperm")])
 
+;; -mcpu=future adds a vector rotate left word variant.  There is no vector
+;; byte/half-word/double-word/quad-word rotate left.  This insn occurs before
+;; altivec_vrl and will match for -mcpu=future, while other cpus will
+;; match the generic insn.
+(define_insn "*future_xvrlw"
+  [(set (match_operand:V4SI 0 "register_operand" "=v,wa")
+   (rotate:V4SI (match_operand:V4SI 1 "register_operand" "v,wa")
+(match_operand:V4SI 2 "register_operand" "v,wa")))]
+  "TARGET_FUTURE"
+  "@
+   vrlw %0,%1,%2
+   xvrlw %x0,%x1,%x2"
+  [(set_attr "type" "vecsimple")])
+
 (define_insn "altivec_vrl"
   [(set (match_operand:VI2 0 "register_operand" "=v")
 (rotate:VI2 (match_operand:VI2 1 "register_operand" "v")


[gcc(refs/users/meissner/heads/work163-dmf)] Revert all changes

2024-03-21 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:889e9e1165b5f84cf84820ca9b1926548941aded

commit 889e9e1165b5f84cf84820ca9b1926548941aded
Author: Michael Meissner 
Date:   Thu Mar 21 23:59:52 2024 -0400

Revert all changes

Diff:
---
 gcc/config/rs6000/altivec.md  |  14 -
 gcc/config/rs6000/constraints.md  |   3 -
 gcc/config/rs6000/mma.md  | 410 +-
 gcc/config/rs6000/predicates.md   |  32 --
 gcc/config/rs6000/rs6000-builtin.cc   |  22 +-
 gcc/config/rs6000/rs6000-call.cc  |  10 +-
 gcc/config/rs6000/rs6000-cpus.def |   2 -
 gcc/config/rs6000/rs6000-modes.def|   4 -
 gcc/config/rs6000/rs6000.cc   | 318 -
 gcc/config/rs6000/rs6000.h|  50 +--
 gcc/config/rs6000/rs6000.md   |   2 -
 gcc/doc/md.texi   |   5 -
 gcc/testsuite/gcc.target/powerpc/dm-1024bit.c |  63 
 gcc/testsuite/gcc.target/powerpc/dm-double-test.c | 194 --
 gcc/testsuite/lib/target-supports.exp |  23 --
 15 files changed, 166 insertions(+), 986 deletions(-)

diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index afe3d72316c..4d4c94ff0a0 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -1883,20 +1883,6 @@
 }
   [(set_attr "type" "vecperm")])
 
-;; -mcpu=future adds a vector rotate left word variant.  There is no vector
-;; byte/half-word/double-word/quad-word rotate left.  This insn occurs before
-;; altivec_vrl and will match for -mcpu=future, while other cpus will
-;; match the generic insn.
-(define_insn "*future_xvrlw"
-  [(set (match_operand:V4SI 0 "register_operand" "=v,wa")
-   (rotate:V4SI (match_operand:V4SI 1 "register_operand" "v,wa")
-(match_operand:V4SI 2 "register_operand" "v,wa")))]
-  "TARGET_FUTURE"
-  "@
-   vrlw %0,%1,%2
-   xvrlw %x0,%x1,%x2"
-  [(set_attr "type" "vecsimple")])
-
 (define_insn "altivec_vrl"
   [(set (match_operand:VI2 0 "register_operand" "=v")
 (rotate:VI2 (match_operand:VI2 1 "register_operand" "v")
diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md
index 277a30a8245..369a7b75042 100644
--- a/gcc/config/rs6000/constraints.md
+++ b/gcc/config/rs6000/constraints.md
@@ -107,9 +107,6 @@
(match_test "TARGET_P8_VECTOR")
(match_operand 0 "s5bit_cint_operand")))
 
-(define_register_constraint "wD" "rs6000_constraints[RS6000_CONSTRAINT_wD]"
-  "Accumulator register.")
-
 (define_constraint "wE"
   "@internal Vector constant that can be loaded with the XXSPLTIB instruction."
   (match_test "xxspltib_constant_nosplit (op, mode)"))
diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md
index 4f9c59046ea..04e2d0066df 100644
--- a/gcc/config/rs6000/mma.md
+++ b/gcc/config/rs6000/mma.md
@@ -91,11 +91,6 @@
UNSPEC_MMA_XVI8GER4SPP
UNSPEC_MMA_XXMFACC
UNSPEC_MMA_XXMTACC
-   UNSPEC_DM_INSERT512_UPPER
-   UNSPEC_DM_INSERT512_LOWER
-   UNSPEC_DM_EXTRACT512
-   UNSPEC_DMR_RELOAD_FROM_MEMORY
-   UNSPEC_DMR_RELOAD_TO_MEMORY
   ])
 
 (define_c_enum "unspecv"
@@ -229,47 +224,44 @@
 (UNSPEC_MMA_XVF64GERNP "xvf64gernp")
 (UNSPEC_MMA_XVF64GERNN "xvf64gernn")])
 
-;; The "pm" prefix is not in these expansions, so that we can generate
-;; pmdmxvi4ger8 on systems with dense math registers and xvi4ger8 on systems
-;; without dense math registers.
-(define_int_attr vvi4i4i8  [(UNSPEC_MMA_PMXVI4GER8 "xvi4ger8")])
+(define_int_attr vvi4i4i8  [(UNSPEC_MMA_PMXVI4GER8 "pmxvi4ger8")])
 
-(define_int_attr avvi4i4i8 [(UNSPEC_MMA_PMXVI4GER8PP   "xvi4ger8pp")])
+(define_int_attr avvi4i4i8 [(UNSPEC_MMA_PMXVI4GER8PP   
"pmxvi4ger8pp")])
 
-(define_int_attr vvi4i4i2  [(UNSPEC_MMA_PMXVI16GER2"xvi16ger2")
-(UNSPEC_MMA_PMXVI16GER2S   "xvi16ger2s")
-(UNSPEC_MMA_PMXVF16GER2"xvf16ger2")
-(UNSPEC_MMA_PMXVBF16GER2   "xvbf16ger2")])
+(define_int_attr vvi4i4i2  [(UNSPEC_MMA_PMXVI16GER2"pmxvi16ger2")
+(UNSPEC_MMA_PMXVI16GER2S   "pmxvi16ger2s")
+(UNSPEC_MMA_PMXVF16GER2"pmxvf16ger2")
+(UNSPEC_MMA_PMXVBF16GER2   
"pmxvbf16ger2")])
 
-(define_int_attr avvi4i4i2 [(UNSPEC_MMA_PMXVI16GER2PP  "xvi16ger2pp")
-(UNSPEC_MMA_PMXVI16GER2SPP "xvi16ger2spp")
-(UNSPEC_MMA_PMXVF16GER2PP  "xvf16ger2pp")
-(UNSPEC_MMA_PMXVF16GER2PN  "xvf16ger2pn")
-(UNSPEC_MMA_PMXVF16GER2NP  "xvf16ger2np")
-(UNSPEC_MMA_PMXVF16GER2NN  "xv

[gcc(refs/users/meissner/heads/work163-dmf)] Add wD constraint.

2024-03-21 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:935c71424570d5e85a85edb9a1516c3ef578a6f8

commit 935c71424570d5e85a85edb9a1516c3ef578a6f8
Author: Michael Meissner 
Date:   Fri Mar 22 00:14:19 2024 -0400

Add wD constraint.

This patch adds a new constraint ('wD') that matches the accumulator 
registers
that overlap with VSX registers 0..31 on power10.  Future patches will add 
the
support for a separate accumulator register class that will be used when the
support for dense math registes is added.

2024-03-22   Michael Meissner  

* config/rs6000/constraints.md (wD): New constraint.
* config/rs6000/mma.md (mma_disassemble_acc): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
-   (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")]
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD")
+   (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0")]
MMA_ACC))]
   "TARGET_MMA"
   " %A0"
@@ -515,7 +513,7 @@
 ;; UNSPEC_VOLATILE.
 
 (define_insn "mma_xxsetaccz"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=wD")
(unspec_volatile:XO [(const_int 0)]
UNSPECV_MMA_XXSETACCZ))]
   "TARGET_MMA"
@@ -523,7 +521,7 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")]
MMA_VV))]
@@ -532,8 +530,8 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-   (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+   (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 3 "vsx_register_operand" "v,?wa")]
MMA_AVV))]
@@ -542,7 +540,7 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
(unspec:XO [(match_operand:OO 1 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")]
MMA_PV))]
@@ -551,8 +549,8 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-   (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+   (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
(match_operand:OO 2 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 3 "vsx_register_operand" "v,?wa")]
MMA_APV))]
@@ -561,7 +559,7 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
(match_operand:SI 3 "const_0_to_15_operand" "n,n")
@@ -574,8 +572,8 @@
(set_attr "prefixed" "yes")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-   (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+   (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
(match_operand:SI 4 "const_0_to_15_operand" "n,n")
@@ -588,7 +586,7 @@
(set_attr "prefixed" "yes")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
(match_operand:SI 3 "const_0_to_15_operand" "n,n")
@@ -601,8 +599,8 @@
(set_attr "prefixed" "yes")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-   (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0"

[gcc(refs/users/meissner/heads/work163-dmf)] Revert all changes

2024-03-21 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:8ed4af0366a9b0d8670b96dfba25b85d3ba9b65c

commit 8ed4af0366a9b0d8670b96dfba25b85d3ba9b65c
Author: Michael Meissner 
Date:   Fri Mar 22 00:16:08 2024 -0400

Revert all changes

Diff:
---
 gcc/config/rs6000/constraints.md  |  3 ---
 gcc/config/rs6000/mma.md  | 54 ---
 gcc/config/rs6000/predicates.md   | 15 ---
 gcc/config/rs6000/rs6000-c.cc |  9 ++-
 gcc/config/rs6000/rs6000-cpus.def |  2 --
 gcc/config/rs6000/rs6000.cc   |  7 +
 gcc/config/rs6000/rs6000.h|  1 -
 gcc/doc/md.texi   |  5 
 8 files changed, 31 insertions(+), 65 deletions(-)

diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md
index 277a30a8245..369a7b75042 100644
--- a/gcc/config/rs6000/constraints.md
+++ b/gcc/config/rs6000/constraints.md
@@ -107,9 +107,6 @@
(match_test "TARGET_P8_VECTOR")
(match_operand 0 "s5bit_cint_operand")))
 
-(define_register_constraint "wD" "rs6000_constraints[RS6000_CONSTRAINT_wD]"
-  "Accumulator register.")
-
 (define_constraint "wE"
   "@internal Vector constant that can be loaded with the XXSPLTIB instruction."
   (match_test "xxspltib_constant_nosplit (op, mode)"))
diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md
index 49cf5f8fe43..04e2d0066df 100644
--- a/gcc/config/rs6000/mma.md
+++ b/gcc/config/rs6000/mma.md
@@ -452,7 +452,8 @@
   (match_operand:V16QI 3 "mma_assemble_input_operand" "mwa")
   (match_operand:V16QI 4 "mma_assemble_input_operand" "mwa")]
  UNSPECV_MMA_ASSEMBLE))]
-  "TARGET_MMA"
+  "TARGET_MMA
+   && fpr_reg_operand (operands[0], XOmode)"
   "#"
   "&& reload_completed"
   [(const_int 0)]
@@ -485,7 +486,8 @@
(unspec:V16QI [(match_operand:XO 1 "fpr_reg_operand" "d")
  (match_operand 2 "const_0_to_3_operand")]
  UNSPEC_MMA_EXTRACT))]
-  "TARGET_MMA"
+  "TARGET_MMA
+   && fpr_reg_operand (operands[1], XOmode)"
   "#"
   "&& reload_completed"
   [(const_int 0)]
@@ -502,8 +504,8 @@
 ;; the accumulator.  We enforce this by marking the output as early clobber.
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "accumulator_operand" "=&wD")
-   (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0")]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+   (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")]
MMA_ACC))]
   "TARGET_MMA"
   " %A0"
@@ -513,7 +515,7 @@
 ;; UNSPEC_VOLATILE.
 
 (define_insn "mma_xxsetaccz"
-  [(set (match_operand:XO 0 "accumulator_operand" "=wD")
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=d")
(unspec_volatile:XO [(const_int 0)]
UNSPECV_MMA_XXSETACCZ))]
   "TARGET_MMA"
@@ -521,7 +523,7 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")]
MMA_VV))]
@@ -530,8 +532,8 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
-   (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+   (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 3 "vsx_register_operand" "v,?wa")]
MMA_AVV))]
@@ -540,7 +542,7 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
(unspec:XO [(match_operand:OO 1 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")]
MMA_PV))]
@@ -549,8 +551,8 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
-   (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+   (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
(match_operand:OO 2 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 3 "vsx_register_operand" "v,?wa")]
MMA_APV))]
@@ -559,7 +561,7 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
(match_operand:SI 3 "const_0_to_15_operand" "n,n")
@@ -572,8 +574,8 @@
(set_attr "prefixed" "yes")])

[gcc(refs/users/meissner/heads/work163-dmf)] Use vector pair load/store for memcpy with -mcpu=future

2024-03-21 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:e9f36e1c173a1967318ccfc2341e46b526d6f0aa

commit e9f36e1c173a1967318ccfc2341e46b526d6f0aa
Author: Michael Meissner 
Date:   Fri Mar 22 00:16:39 2024 -0400

Use vector pair load/store for memcpy with -mcpu=future

In the development for the power10 processor, GCC did not enable using the 
load
vector pair and store vector pair instructions when optimizing things like
memory copy.  This patch enables using those instructions if -mcpu=future is
used.

2024-03-22  Michael Meissner  

gcc/

* config/rs6000/rs6000-cpus.def (ISA_FUTURE_MASKS_SERVER): Enable 
using
load vector pair and store vector pair instructions for memory copy
operations.
(POWERPC_MASKS): Make the bit for enabling using load vector pair 
and
store vector pair operations set and reset when the PowerPC 
processor is
changed.

Diff:
---
 gcc/config/rs6000/rs6000-cpus.def | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/gcc/config/rs6000/rs6000-cpus.def 
b/gcc/config/rs6000/rs6000-cpus.def
index 47365534af8..4ddba142e44 100644
--- a/gcc/config/rs6000/rs6000-cpus.def
+++ b/gcc/config/rs6000/rs6000-cpus.def
@@ -90,6 +90,7 @@
  | OPTION_MASK_POWER11)
 
 #define ISA_FUTURE_MASKS_SERVER(ISA_POWER11_MASKS_SERVER   
\
+| OPTION_MASK_BLOCK_OPS_VECTOR_PAIR\
 | OPTION_MASK_FUTURE)
 
 /* Flags that need to be turned off if -mno-vsx.  */
@@ -121,6 +122,7 @@
 
 /* Mask of all options to set the default isa flags based on -mcpu=.  */
 #define POWERPC_MASKS  (OPTION_MASK_ALTIVEC\
+| OPTION_MASK_BLOCK_OPS_VECTOR_PAIR\
 | OPTION_MASK_CMPB \
 | OPTION_MASK_CRYPTO   \
 | OPTION_MASK_DFP  \


[gcc(refs/users/meissner/heads/work163-dmf)] Add wD constraint.

2024-03-21 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:ca9dad30fef736a655999cb4ab1cbe1d8cdd20f1

commit ca9dad30fef736a655999cb4ab1cbe1d8cdd20f1
Author: Michael Meissner 
Date:   Fri Mar 22 00:19:46 2024 -0400

Add wD constraint.

This patch adds a new constraint ('wD') that matches the accumulator 
registers
that overlap with VSX registers 0..31 on power10.  Future patches will add 
the
support for a separate accumulator register class that will be used when the
support for dense math registes is added.

2024-03-22   Michael Meissner  

* config/rs6000/constraints.md (wD): New constraint.
* config/rs6000/mma.md (mma_disassemble_acc): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
-   (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")]
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD")
+   (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0")]
MMA_ACC))]
   "TARGET_MMA"
   " %A0"
@@ -515,7 +513,7 @@
 ;; UNSPEC_VOLATILE.
 
 (define_insn "mma_xxsetaccz"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=wD")
(unspec_volatile:XO [(const_int 0)]
UNSPECV_MMA_XXSETACCZ))]
   "TARGET_MMA"
@@ -523,7 +521,7 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")]
MMA_VV))]
@@ -532,8 +530,8 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-   (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+   (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 3 "vsx_register_operand" "v,?wa")]
MMA_AVV))]
@@ -542,7 +540,7 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
(unspec:XO [(match_operand:OO 1 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")]
MMA_PV))]
@@ -551,8 +549,8 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-   (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+   (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
(match_operand:OO 2 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 3 "vsx_register_operand" "v,?wa")]
MMA_APV))]
@@ -561,7 +559,7 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
(match_operand:SI 3 "const_0_to_15_operand" "n,n")
@@ -574,8 +572,8 @@
(set_attr "prefixed" "yes")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-   (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+   (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
(match_operand:SI 4 "const_0_to_15_operand" "n,n")
@@ -588,7 +586,7 @@
(set_attr "prefixed" "yes")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
(match_operand:SI 3 "const_0_to_15_operand" "n,n")
@@ -601,8 +599,8 @@
(set_attr "prefixed" "yes")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-   (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0"

[gcc(refs/users/meissner/heads/work163-dmf)] Revert all changes

2024-03-21 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:90b5e76409a32556b7672139d6a2d031d0d5937e

commit 90b5e76409a32556b7672139d6a2d031d0d5937e
Author: Michael Meissner 
Date:   Fri Mar 22 00:21:34 2024 -0400

Revert all changes

Diff:
---
 gcc/config/rs6000/constraints.md  |  3 ---
 gcc/config/rs6000/mma.md  | 54 ---
 gcc/config/rs6000/predicates.md   | 15 ---
 gcc/config/rs6000/rs6000-c.cc |  9 ++-
 gcc/config/rs6000/rs6000-cpus.def |  2 --
 gcc/config/rs6000/rs6000.cc   |  7 +
 gcc/config/rs6000/rs6000.h|  1 -
 gcc/doc/md.texi   |  5 
 8 files changed, 31 insertions(+), 65 deletions(-)

diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md
index 277a30a8245..369a7b75042 100644
--- a/gcc/config/rs6000/constraints.md
+++ b/gcc/config/rs6000/constraints.md
@@ -107,9 +107,6 @@
(match_test "TARGET_P8_VECTOR")
(match_operand 0 "s5bit_cint_operand")))
 
-(define_register_constraint "wD" "rs6000_constraints[RS6000_CONSTRAINT_wD]"
-  "Accumulator register.")
-
 (define_constraint "wE"
   "@internal Vector constant that can be loaded with the XXSPLTIB instruction."
   (match_test "xxspltib_constant_nosplit (op, mode)"))
diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md
index 49cf5f8fe43..04e2d0066df 100644
--- a/gcc/config/rs6000/mma.md
+++ b/gcc/config/rs6000/mma.md
@@ -452,7 +452,8 @@
   (match_operand:V16QI 3 "mma_assemble_input_operand" "mwa")
   (match_operand:V16QI 4 "mma_assemble_input_operand" "mwa")]
  UNSPECV_MMA_ASSEMBLE))]
-  "TARGET_MMA"
+  "TARGET_MMA
+   && fpr_reg_operand (operands[0], XOmode)"
   "#"
   "&& reload_completed"
   [(const_int 0)]
@@ -485,7 +486,8 @@
(unspec:V16QI [(match_operand:XO 1 "fpr_reg_operand" "d")
  (match_operand 2 "const_0_to_3_operand")]
  UNSPEC_MMA_EXTRACT))]
-  "TARGET_MMA"
+  "TARGET_MMA
+   && fpr_reg_operand (operands[1], XOmode)"
   "#"
   "&& reload_completed"
   [(const_int 0)]
@@ -502,8 +504,8 @@
 ;; the accumulator.  We enforce this by marking the output as early clobber.
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "accumulator_operand" "=&wD")
-   (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0")]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+   (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")]
MMA_ACC))]
   "TARGET_MMA"
   " %A0"
@@ -513,7 +515,7 @@
 ;; UNSPEC_VOLATILE.
 
 (define_insn "mma_xxsetaccz"
-  [(set (match_operand:XO 0 "accumulator_operand" "=wD")
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=d")
(unspec_volatile:XO [(const_int 0)]
UNSPECV_MMA_XXSETACCZ))]
   "TARGET_MMA"
@@ -521,7 +523,7 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")]
MMA_VV))]
@@ -530,8 +532,8 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
-   (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+   (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 3 "vsx_register_operand" "v,?wa")]
MMA_AVV))]
@@ -540,7 +542,7 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
(unspec:XO [(match_operand:OO 1 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")]
MMA_PV))]
@@ -549,8 +551,8 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
-   (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+   (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
(match_operand:OO 2 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 3 "vsx_register_operand" "v,?wa")]
MMA_APV))]
@@ -559,7 +561,7 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
(match_operand:SI 3 "const_0_to_15_operand" "n,n")
@@ -572,8 +574,8 @@
(set_attr "prefixed" "yes")])

[gcc(refs/users/meissner/heads/work163-dmf)] Use vector pair load/store for memcpy with -mcpu=future

2024-03-21 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:385b09d7c32c2cb7a3d16f738e870d1d0bad6997

commit 385b09d7c32c2cb7a3d16f738e870d1d0bad6997
Author: Michael Meissner 
Date:   Fri Mar 22 00:22:20 2024 -0400

Use vector pair load/store for memcpy with -mcpu=future

In the development for the power10 processor, GCC did not enable using the 
load
vector pair and store vector pair instructions when optimizing things like
memory copy.  This patch enables using those instructions if -mcpu=future is
used.

2024-03-22  Michael Meissner  

gcc/

* config/rs6000/rs6000-cpus.def (ISA_FUTURE_MASKS_SERVER): Enable 
using
load vector pair and store vector pair instructions for memory copy
operations.
(POWERPC_MASKS): Make the bit for enabling using load vector pair 
and
store vector pair operations set and reset when the PowerPC 
processor is
changed.

Diff:
---
 gcc/config/rs6000/rs6000-cpus.def | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/gcc/config/rs6000/rs6000-cpus.def 
b/gcc/config/rs6000/rs6000-cpus.def
index 47365534af8..4ddba142e44 100644
--- a/gcc/config/rs6000/rs6000-cpus.def
+++ b/gcc/config/rs6000/rs6000-cpus.def
@@ -90,6 +90,7 @@
  | OPTION_MASK_POWER11)
 
 #define ISA_FUTURE_MASKS_SERVER(ISA_POWER11_MASKS_SERVER   
\
+| OPTION_MASK_BLOCK_OPS_VECTOR_PAIR\
 | OPTION_MASK_FUTURE)
 
 /* Flags that need to be turned off if -mno-vsx.  */
@@ -121,6 +122,7 @@
 
 /* Mask of all options to set the default isa flags based on -mcpu=.  */
 #define POWERPC_MASKS  (OPTION_MASK_ALTIVEC\
+| OPTION_MASK_BLOCK_OPS_VECTOR_PAIR\
 | OPTION_MASK_CMPB \
 | OPTION_MASK_CRYPTO   \
 | OPTION_MASK_DFP  \


[gcc(refs/users/meissner/heads/work163-dmf)] Add wD constraint.

2024-03-21 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:1aef3129fb903b6ca80746e0fcffdcf2c86728ee

commit 1aef3129fb903b6ca80746e0fcffdcf2c86728ee
Author: Michael Meissner 
Date:   Fri Mar 22 00:22:56 2024 -0400

Add wD constraint.

This patch adds a new constraint ('wD') that matches the accumulator 
registers
that overlap with VSX registers 0..31 on power10.  Future patches will add 
the
support for a separate accumulator register class that will be used when the
support for dense math registes is added.

2024-03-22   Michael Meissner  

* config/rs6000/constraints.md (wD): New constraint.
* config/rs6000/mma.md (mma_disassemble_acc): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
-   (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")]
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD")
+   (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0")]
MMA_ACC))]
   "TARGET_MMA"
   " %A0"
@@ -515,7 +513,7 @@
 ;; UNSPEC_VOLATILE.
 
 (define_insn "mma_xxsetaccz"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=wD")
(unspec_volatile:XO [(const_int 0)]
UNSPECV_MMA_XXSETACCZ))]
   "TARGET_MMA"
@@ -523,7 +521,7 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")]
MMA_VV))]
@@ -532,8 +530,8 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-   (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+   (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 3 "vsx_register_operand" "v,?wa")]
MMA_AVV))]
@@ -542,7 +540,7 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
(unspec:XO [(match_operand:OO 1 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")]
MMA_PV))]
@@ -551,8 +549,8 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-   (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+   (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
(match_operand:OO 2 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 3 "vsx_register_operand" "v,?wa")]
MMA_APV))]
@@ -561,7 +559,7 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
(match_operand:SI 3 "const_0_to_15_operand" "n,n")
@@ -574,8 +572,8 @@
(set_attr "prefixed" "yes")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-   (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+   (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
(match_operand:SI 4 "const_0_to_15_operand" "n,n")
@@ -588,7 +586,7 @@
(set_attr "prefixed" "yes")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
(match_operand:SI 3 "const_0_to_15_operand" "n,n")
@@ -601,8 +599,8 @@
(set_attr "prefixed" "yes")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-   (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0"

[gcc(refs/users/meissner/heads/work163-dmf)] Add support for dense math registers.

2024-03-21 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:732fcb3a20b89b6cf405c4f7131c82de6bde8303

commit 732fcb3a20b89b6cf405c4f7131c82de6bde8303
Author: Michael Meissner 
Date:   Fri Mar 22 00:27:47 2024 -0400

Add support for dense math registers.

The MMA subsystem added the notion of accumulator registers as an optional
feature of ISA 3.1 (power10).  In ISA 3.1, these accumulators overlapped 
with
the VSX registers 0..31, but logically the accumulator registers were 
separate
from the FPR registers.  In ISA 3.1, it was anticipated that in future 
systems,
the accumulator registers may no overlap with the FPR registers.  This patch
adds the support for dense math registers as separate registers.

This particular patch does not change the MMA support to use the 
accumulators
within the dense math registers.  This patch just adds the basic support for
having separate DMRs.  The next patch will switch the MMA support to use the
accumulators if -mcpu=future is used.

For testing purposes, I added an undocumented option '-mdense-math' to 
enable
or disable the dense math support.

This patch adds a new constraint (wD).  If MMA is selected but dense math is
not selected (i.e. -mcpu=power10), the wD constraint will allow access to
accumulators that overlap with VSX registers 0..31.  If both MMA and dense 
math
are selected (i.e. -mcpu=future), the wD constraint will only allow dense 
math
registers.

This patch modifies the existing %A output modifier.  If MMA is selected but
dense math is not selected, then %A output modifier converts the VSX 
register
number to the accumulator number, by dividing it by 4.  If both MMA and 
dense
math are selected, then %A will map the separate DMR registers into 0..7.

The intention is that user code using extended asm can be modified to run on
both MMA without dense math and MMA with dense math:

1)  If possible, don't use extended asm, but instead use the MMA 
built-in
functions;

2)  If you do need to write extended asm, change the d constraints
targetting accumulators should now use wD;

3)  Only use the built-in zero, assemble and disassemble functions 
create
move data between vector quad types and dense math accumulators.
I.e. do not use the xxmfacc, xxmtacc, and xxsetaccz directly in the
extended asm code.  The reason is these instructions assume there 
is a
1-to-1 correspondence between 4 adjacent FPR registers and an
accumulator that overlaps with those instructions.  With 
accumulators
now being separate registers, there no longer is a 1-to-1
correspondence.

It is possible that the mangling for DMRs and the GDB register numbers may
produce other changes in the future.

2024-03-22   Michael Meissner  

* config/rs6000/mma.md (movxo): Add comments about dense math 
registers.
(movxo_nodm): Rename from movxo and restrict the usage to machines
without dense math registers.
(movxo_dm): New insn for movxo support for machines with dense math
registers.
(mma_): Restrict usage to machines without dense math 
registers.
(mma_xxsetaccz): Make a define_expand, and add support for dense 
math
registers.
(mma_xxsetaccz_nodm): Rename from mma_xxsetaccz, and restrict to
machines without dense math registers.
(mma_dmsetaccz): New insn.
* config/rs6000/predicates.md (dmr_operand): New predicate.
(accumulator_operand): Add support for dense math registers.
* config/rs6000/rs6000-builtin.cc (rs6000_gimple_fold_mma_builtin): 
Do
not de-prime accumulator when disassembling a vector quad.
* config/rs6000/rs6000-c.cc (rs6000_define_or_undefine_macro): 
Define
__DENSE_MATH__ if we have dense math registers.
* config/rs6000/rs6000.cc (enum rs6000_reg_type): Add DMR_REG_TYPE.
(enum rs6000_reload_reg_type): Add RELOAD_REG_DMR.
(LAST_RELOAD_REG_CLASS): Add support for DMR registers and the wD
constraint.
(reload_reg_map): Likewise.
(rs6000_reg_names): Likewise.
(alt_reg_names): Likewise.
(rs6000_hard_regno_nregs_internal): Likewise.
(rs6000_hard_regno_mode_ok_uncached): Likewise.
(rs6000_debug_reg_global): Likewise.
(rs6000_setup_reg_addr_masks): Likewise.
(rs6000_init_hard_regno_mode_ok): Likewise.
(rs6000_secondary_reload_memory): Add support for DMR registers.
(rs6000_secondary_reload_simple_move): Likewise.
(rs6000_preferred_reload_class): Likewise.
(rs6000_secondary_reload_class): Likewise.
(print_operand): Make %A handle both FPRs and 

[gcc(refs/users/meissner/heads/work163-dmf)] PowerPC: Switch to dense math names for all MMA operations.

2024-03-21 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:f2c0b60fd88b3108e4225a2dceac25832d8537b5

commit f2c0b60fd88b3108e4225a2dceac25832d8537b5
Author: Michael Meissner 
Date:   Fri Mar 22 00:29:03 2024 -0400

PowerPC: Switch to dense math names for all MMA operations.

This patch changes the assembler instruction names for MMA instructions from
the original name used in power10 to the new name when used with the dense 
math
system.  I.e. xvf64gerpp becomes dmxvf64gerpp.  The assembler will emit the
same bits for either spelling.

For the non-prefixed MMA instructions, we add a 'dm' prefix in front of the
instruction.  However, the prefixed instructions have a 'pm' prefix, and we 
add
the 'dm' prefix afterwards.  To prevent having two sets of parallel int
attributes, we remove the "pm" prefix from the instruction string in the
attributes, and add it later, both in the insn name and in the output 
template.

2024-03-22   Michael Meissner  

gcc/

* config/rs6000/mma.md (vvi4i4i8): Change the instruction to not 
have a
"pm" prefix.
(avvi4i4i8): Likewise.
(vvi4i4i2): Likewise.
(avvi4i4i2): Likewise.
(vvi4i4): Likewise.
(avvi4i4): Likewise.
(pvi4i2): Likewise.
(apvi4i2): Likewise.
(vvi4i4i4): Likewise.
(avvi4i4i4): Likewise.
(mma_xxsetaccz): Add support for running on DMF systems, generating 
the
dense math instruction and using the dense math accumulators.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_pm): Add support for running on DMF systems, 
generating
the dense math instruction and using the dense math accumulators.
Rename the insn with a 'pm' prefix and add either 'pm' or 'pmdm'
prefixes based on whether we have the original MMA specification or 
if
we have dense math support.
(mma_pm): Likewise.
(mma_pm): Likewise.
(mma_pm): Likewise.
(mma_pm): Likewise.
(mma_pm): Likewise.
(mma_pm): Likewise.
(mma_pm): Likewise.

Diff:
---
 gcc/config/rs6000/mma.md | 161 +++
 1 file changed, 107 insertions(+), 54 deletions(-)

diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md
index 2ce613b46cc..f3870eac51a 100644
--- a/gcc/config/rs6000/mma.md
+++ b/gcc/config/rs6000/mma.md
@@ -224,44 +224,47 @@
 (UNSPEC_MMA_XVF64GERNP "xvf64gernp")
 (UNSPEC_MMA_XVF64GERNN "xvf64gernn")])
 
-(define_int_attr vvi4i4i8  [(UNSPEC_MMA_PMXVI4GER8 "pmxvi4ger8")])
+;; The "pm" prefix is not in these expansions, so that we can generate
+;; pmdmxvi4ger8 on systems with dense math registers and xvi4ger8 on systems
+;; without dense math registers.
+(define_int_attr vvi4i4i8  [(UNSPEC_MMA_PMXVI4GER8 "xvi4ger8")])
 
-(define_int_attr avvi4i4i8 [(UNSPEC_MMA_PMXVI4GER8PP   
"pmxvi4ger8pp")])
+(define_int_attr avvi4i4i8 [(UNSPEC_MMA_PMXVI4GER8PP   "xvi4ger8pp")])
 
-(define_int_attr vvi4i4i2  [(UNSPEC_MMA_PMXVI16GER2"pmxvi16ger2")
-(UNSPEC_MMA_PMXVI16GER2S   "pmxvi16ger2s")
-(UNSPEC_MMA_PMXVF16GER2"pmxvf16ger2")
-(UNSPEC_MMA_PMXVBF16GER2   
"pmxvbf16ger2")])
+(define_int_attr vvi4i4i2  [(UNSPEC_MMA_PMXVI16GER2"xvi16ger2")
+(UNSPEC_MMA_PMXVI16GER2S   "xvi16ger2s")
+(UNSPEC_MMA_PMXVF16GER2"xvf16ger2")
+(UNSPEC_MMA_PMXVBF16GER2   "xvbf16ger2")])
 
-(define_int_attr avvi4i4i2 [(UNSPEC_MMA_PMXVI16GER2PP  "pmxvi16ger2pp")
-(UNSPEC_MMA_PMXVI16GER2SPP 
"pmxvi16ger2spp")
-(UNSPEC_MMA_PMXVF16GER2PP  "pmxvf16ger2pp")
-(UNSPEC_MMA_PMXVF16GER2PN  "pmxvf16ger2pn")
-(UNSPEC_MMA_PMXVF16GER2NP  "pmxvf16ger2np")
-(UNSPEC_MMA_PMXVF16GER2NN  "pmxvf16ger2nn")
-(UNSPEC_MMA_PMXVBF16GER2PP 
"pmxvbf16ger2pp")
-(UNSPEC_MMA_PMXVBF16GER2PN 
"pmxvbf16ger2pn")
-(UNSPEC_MMA_PMXVBF16GER2NP 
"pmxvbf16ger2np")
-(UNSPEC_MMA_PMXVBF16GER2NN 
"pmxvbf16ger2nn")])
+(define_int_attr avvi4i4i2 [(UNSPEC_MMA_PMXVI16GER2PP  "xvi16ger2pp")
+(UNSPEC_MMA_PMXVI16GER2SPP "xvi16ger2spp")
+(UNSPEC_MMA_PMXVF16GER2PP  "xvf16ger2pp")
+(UNSPE

[gcc(refs/users/meissner/heads/work163-dmf)] Add dense math test for new instruction names.

2024-03-21 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:cf80b10c5a0b0e109c4d00404e03ed6f3c0606c0

commit cf80b10c5a0b0e109c4d00404e03ed6f3c0606c0
Author: Michael Meissner 
Date:   Fri Mar 22 00:31:11 2024 -0400

Add dense math test for new instruction names.

2024-03-22   Michael Meissner  

gcc/testsuite/

* gcc.target/powerpc/dm-double-test.c: New test.
* lib/target-supports.exp (check_effective_target_ppc_dmr_ok): New
target test.

Diff:
---
 gcc/testsuite/gcc.target/powerpc/dm-double-test.c | 194 ++
 gcc/testsuite/lib/target-supports.exp |  23 +++
 2 files changed, 217 insertions(+)

diff --git a/gcc/testsuite/gcc.target/powerpc/dm-double-test.c 
b/gcc/testsuite/gcc.target/powerpc/dm-double-test.c
new file mode 100644
index 000..66c19779585
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/dm-double-test.c
@@ -0,0 +1,194 @@
+/* Test derived from mma-double-1.c, modified for dense math.  */
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_dense_math_ok } */
+/* { dg-options "-mdejagnu-cpu=future -O2" } */
+
+#include 
+#include 
+#include 
+
+typedef unsigned char vec_t __attribute__ ((vector_size (16)));
+typedef double v4sf_t __attribute__ ((vector_size (16)));
+#define SAVE_ACC(ACC, ldc, J)  \
+ __builtin_mma_disassemble_acc (result, ACC); \
+ rowC = (v4sf_t *) &CO[0*ldc+J]; \
+  rowC[0] += result[0]; \
+  rowC = (v4sf_t *) &CO[1*ldc+J]; \
+  rowC[0] += result[1]; \
+  rowC = (v4sf_t *) &CO[2*ldc+J]; \
+  rowC[0] += result[2]; \
+  rowC = (v4sf_t *) &CO[3*ldc+J]; \
+ rowC[0] += result[3];
+
+void
+DM (int m, int n, int k, double *A, double *B, double *C)
+{
+  __vector_quad acc0, acc1, acc2, acc3, acc4, acc5, acc6, acc7;
+  v4sf_t result[4];
+  v4sf_t *rowC;
+  for (int l = 0; l < n; l += 4)
+{
+  double *CO;
+  double *AO;
+  AO = A;
+  CO = C;
+  C += m * 4;
+  for (int j = 0; j < m; j += 16)
+   {
+ double *BO = B;
+ __builtin_mma_xxsetaccz (&acc0);
+ __builtin_mma_xxsetaccz (&acc1);
+ __builtin_mma_xxsetaccz (&acc2);
+ __builtin_mma_xxsetaccz (&acc3);
+ __builtin_mma_xxsetaccz (&acc4);
+ __builtin_mma_xxsetaccz (&acc5);
+ __builtin_mma_xxsetaccz (&acc6);
+ __builtin_mma_xxsetaccz (&acc7);
+ unsigned long i;
+
+ for (i = 0; i < k; i++)
+   {
+ vec_t *rowA = (vec_t *) & AO[i * 16];
+ __vector_pair rowB;
+ vec_t *rb = (vec_t *) & BO[i * 4];
+ __builtin_mma_assemble_pair (&rowB, rb[1], rb[0]);
+ __builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]);
+ __builtin_mma_xvf64gerpp (&acc1, rowB, rowA[1]);
+ __builtin_mma_xvf64gerpp (&acc2, rowB, rowA[2]);
+ __builtin_mma_xvf64gerpp (&acc3, rowB, rowA[3]);
+ __builtin_mma_xvf64gerpp (&acc4, rowB, rowA[4]);
+ __builtin_mma_xvf64gerpp (&acc5, rowB, rowA[5]);
+ __builtin_mma_xvf64gerpp (&acc6, rowB, rowA[6]);
+ __builtin_mma_xvf64gerpp (&acc7, rowB, rowA[7]);
+   }
+ SAVE_ACC (&acc0, m, 0);
+ SAVE_ACC (&acc2, m, 4);
+ SAVE_ACC (&acc1, m, 2);
+ SAVE_ACC (&acc3, m, 6);
+ SAVE_ACC (&acc4, m, 8);
+ SAVE_ACC (&acc6, m, 12);
+ SAVE_ACC (&acc5, m, 10);
+ SAVE_ACC (&acc7, m, 14);
+ AO += k * 16;
+ BO += k * 4;
+ CO += 16;
+   }
+  B += k * 4;
+}
+}
+
+void
+init (double *matrix, int row, int column)
+{
+  for (int j = 0; j < column; j++)
+{
+  for (int i = 0; i < row; i++)
+   {
+ matrix[j * row + i] = (i * 16 + 2 + j) / 0.123;
+   }
+}
+}
+
+void
+init0 (double *matrix, double *matrix1, int row, int column)
+{
+  for (int j = 0; j < column; j++)
+for (int i = 0; i < row; i++)
+  matrix[j * row + i] = matrix1[j * row + i] = 0;
+}
+
+
+void
+print (const char *name, const double *matrix, int row, int column)
+{
+  printf ("Matrix %s has %d rows and %d columns:\n", name, row, column);
+  for (int i = 0; i < row; i++)
+{
+  for (int j = 0; j < column; j++)
+   {
+ printf ("%f ", matrix[j * row + i]);
+   }
+  printf ("\n");
+}
+  printf ("\n");
+}
+
+int
+main (int argc, char *argv[])
+{
+  int rowsA, colsB, common;
+  int i, j, k;
+  int ret = 0;
+
+  for (int t = 16; t <= 128; t += 16)
+{
+  for (int t1 = 4; t1 <= 16; t1 += 4)
+   {
+ rowsA = t;
+ colsB = t1;
+ common = 1;
+ /* printf ("Running test for rows = %d,cols = %d\n", t, t1); */
+ double A[rowsA * common];
+ double B[common * colsB];
+ double C[rowsA * colsB];
+ double D[rowsA * colsB];
+
+
+ init (A, rowsA, common);
+ init (B, common, colsB);
+ init0 (C, D, rowsA, colsB);
+ DM (rowsA, colsB, common, A, B, C);
+
+ 

[gcc(refs/users/meissner/heads/work163-dmf)] PowerPC: Add support for 1, 024 bit DMR registers.

2024-03-21 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:365106aa952ceba51092df38086d212802b1fb5a

commit 365106aa952ceba51092df38086d212802b1fb5a
Author: Michael Meissner 
Date:   Fri Mar 22 00:33:23 2024 -0400

PowerPC: Add support for 1,024 bit DMR registers.

This patch is a prelimianry patch to add the full 1,024 bit dense math 
register
(DMRs) for -mcpu=future.  The MMA 512-bit accumulators map onto the top of 
the
DMR register.

This patch only adds the new 1,024 bit register support.  It does not add
support for any instructions that need 1,024 bit registers instead of 512 
bit
registers.

I used the new mode 'TDOmode' to be the opaque mode used for 1,024 bit
registers.  The 'wD' constraint added in previous patches is used for these
registers.  I added support to do load and store of DMRs via the VSX 
registers,
since there are no load/store dense math instructions.  I added the new 
keyword
'__dmr' to create 1,024 bit types that can be loaded into DMRs.  At 
present, I
don't have aliases for __dmr512 and __dmr1024 that we've discussed 
internally.

The patches have been tested on both little and big endian systems.  Can I 
check
it into the master branch?

2024-03-22   Michael Meissner  

gcc/

* config/rs6000/mma.md (UNSPEC_DM_INSERT512_UPPER): New unspec.
(UNSPEC_DM_INSERT512_LOWER): Likewise.
(UNSPEC_DM_EXTRACT512): Likewise.
(UNSPEC_DMR_RELOAD_FROM_MEMORY): Likewise.
(UNSPEC_DMR_RELOAD_TO_MEMORY): Likewise.
(movtdo): New define_expand and define_insn_and_split to implement 
1,024
bit DMR registers.
(movtdo_insert512_upper): New insn.
(movtdo_insert512_lower): Likewise.
(movtdo_extract512): Likewise.
(reload_dmr_from_memory): Likewise.
(reload_dmr_to_memory): Likewise.
* config/rs6000/rs6000-builtin.cc (rs6000_type_string): Add DMR
support.
(rs6000_init_builtins): Add support for __dmr keyword.
* config/rs6000/rs6000-call.cc (rs6000_return_in_memory): Add 
support
for TDOmode.
(rs6000_function_arg): Likewise.
* config/rs6000/rs6000-modes.def (TDOmode): New mode.
* config/rs6000/rs6000.cc (rs6000_hard_regno_nregs_internal): Add
support for TDOmode.
(rs6000_hard_regno_mode_ok_uncached): Likewise.
(rs6000_hard_regno_mode_ok): Likewise.
(rs6000_modes_tieable_p): Likewise.
(rs6000_debug_reg_global): Likewise.
(rs6000_setup_reg_addr_masks): Likewise.
(rs6000_init_hard_regno_mode_ok): Add support for TDOmode.  Setup 
reload
hooks for DMR mode.
(reg_offset_addressing_ok_p): Add support for TDOmode.
(rs6000_emit_move): Likewise.
(rs6000_secondary_reload_simple_move): Likewise.
(rs6000_preferred_reload_class): Likewise.
(rs6000_secondary_reload_class): Likewise.
(rs6000_mangle_type): Add mangling for __dmr type.
(rs6000_dmr_register_move_cost): Add support for TDOmode.
(rs6000_split_multireg_move): Likewise.
(rs6000_invalid_conversion): Likewise.
* config/rs6000/rs6000.h (VECTOR_ALIGNMENT_P): Add TDOmode.
(enum rs6000_builtin_type_index): Add DMR type nodes.
(dmr_type_node): Likewise.
(ptr_dmr_type_node): Likewise.

gcc/testsuite/

* gcc.target/powerpc/dm-1024bit.c: New test.

Diff:
---
 gcc/config/rs6000/mma.md  | 154 ++
 gcc/config/rs6000/rs6000-builtin.cc   |  17 +++
 gcc/config/rs6000/rs6000-call.cc  |  10 +-
 gcc/config/rs6000/rs6000-modes.def|   4 +
 gcc/config/rs6000/rs6000.cc   | 101 -
 gcc/config/rs6000/rs6000.h|   6 +-
 gcc/testsuite/gcc.target/powerpc/dm-1024bit.c |  63 +++
 7 files changed, 321 insertions(+), 34 deletions(-)

diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md
index f3870eac51a..4f9c59046ea 100644
--- a/gcc/config/rs6000/mma.md
+++ b/gcc/config/rs6000/mma.md
@@ -91,6 +91,11 @@
UNSPEC_MMA_XVI8GER4SPP
UNSPEC_MMA_XXMFACC
UNSPEC_MMA_XXMTACC
+   UNSPEC_DM_INSERT512_UPPER
+   UNSPEC_DM_INSERT512_LOWER
+   UNSPEC_DM_EXTRACT512
+   UNSPEC_DMR_RELOAD_FROM_MEMORY
+   UNSPEC_DMR_RELOAD_TO_MEMORY
   ])
 
 (define_c_enum "unspecv"
@@ -770,3 +775,152 @@
 }
   [(set_attr "type" "mma")
(set_attr "prefixed" "yes")])
+
+;; TDOmode (__dmr keyword for 1,024 bit registers).
+(define_expand "movtdo"
+  [(set (match_operand:TDO 0 "nonimmediate_operand")
+   (match_operand:TDO 1 "input_operand"))]
+  "TARGET_MMA_DENSE_MATH"
+{
+  rs6000_emit_move (operands[0], operands[1], TDOmode);
+  DONE;
+})
+
+(define_insn_and_split "*movtdo"
+  [(set (match_operand:TDO 0 "noni

[gcc(refs/users/meissner/heads/work163-dmf)] Add support for XVRL instruction.

2024-03-21 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:7200cbd8395cb620028eeb6c6ef003f6064615e2

commit 7200cbd8395cb620028eeb6c6ef003f6064615e2
Author: Michael Meissner 
Date:   Fri Mar 22 00:37:34 2024 -0400

Add support for XVRL instruction.

2024-03-22  Michael Meissner  

gcc/

* config/rs6000/altivec.md (xvrlw): New insn.

Diff:
---
 gcc/config/rs6000/altivec.md | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index 4d4c94ff0a0..bf01af15286 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -1883,6 +1883,20 @@
 }
   [(set_attr "type" "vecperm")])
 
+;; -mcpu=future adds a vector rotate left word variant.  There is no vector
+;; byte/half-word/double-word/quad-word rotate left.  This insn occurs before
+;; altivec_vrl and will match for -mcpu=future, while other cpus will
+;; match the generic insn.
+(define_insn "*xvrlw"
+  [(set (match_operand:V4SI 0 "register_operand" "=v,wa")
+   (rotate:V4SI (match_operand:V4SI 1 "register_operand" "v,wa")
+(match_operand:V4SI 2 "register_operand" "v,wa")))]
+  "TARGET_FUTURE"
+  "@
+   vrlw %0,%1,%2
+   xvrlw %x0,%x1,%x2"
+  [(set_attr "type" "vecsimple")])
+
 (define_insn "altivec_vrl"
   [(set (match_operand:VI2 0 "register_operand" "=v")
 (rotate:VI2 (match_operand:VI2 1 "register_operand" "v")


[gcc(refs/users/meissner/heads/work163-dmf)] Update ChangeLog.*

2024-03-21 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:7bedd90064489ea6db3a28999df34d7e1340dbb2

commit 7bedd90064489ea6db3a28999df34d7e1340dbb2
Author: Michael Meissner 
Date:   Fri Mar 22 00:45:00 2024 -0400

Update ChangeLog.*

Diff:
---
 gcc/ChangeLog.dmf | 45 +
 1 file changed, 33 insertions(+), 12 deletions(-)

diff --git a/gcc/ChangeLog.dmf b/gcc/ChangeLog.dmf
index 5a28e3e994b..edc0448b14f 100644
--- a/gcc/ChangeLog.dmf
+++ b/gcc/ChangeLog.dmf
@@ -1,4 +1,14 @@
- Branch work163-dmf, patch #106 
+ Branch work163-dmf, patch #130 
+
+Add support for XVRL instruction.
+
+2024-03-22  Michael Meissner  
+
+gcc/
+
+   * config/rs6000/altivec.md (xvrlw): New insn.
+
+ Branch work163-dmf, patch #126 
 
 PowerPC: Add support for 1,024 bit DMR registers.
 
@@ -20,7 +30,7 @@ don't have aliases for __dmr512 and __dmr1024 that we've 
discussed internally.
 The patches have been tested on both little and big endian systems.  Can I 
check
 it into the master branch?
 
-2024-03-19   Michael Meissner  
+2024-03-22   Michael Meissner  
 
 gcc/
 
@@ -70,11 +80,11 @@ gcc/testsuite/
 
* gcc.target/powerpc/dm-1024bit.c: New test.
 
- Branch work163-dmf, patch #105 
+ Branch work163-dmf, patch #125 
 
 Add dense math test for new instruction names.
 
-2024-03-19   Michael Meissner  
+2024-03-22   Michael Meissner  
 
 gcc/testsuite/
 
@@ -82,7 +92,7 @@ gcc/testsuite/
* lib/target-supports.exp (check_effective_target_ppc_dmr_ok): New
target test.
 
- Branch work163-dmf, patch #104 
+ Branch work163-dmf, patch #124 
 
 PowerPC: Switch to dense math names for all MMA operations.
 
@@ -97,7 +107,7 @@ the 'dm' prefix afterwards.  To prevent having two sets of 
parallel int
 attributes, we remove the "pm" prefix from the instruction string in the
 attributes, and add it later, both in the insn name and in the output template.
 
-2024-03-19   Michael Meissner  
+2024-03-22   Michael Meissner  
 
 gcc/
 
@@ -133,7 +143,7 @@ gcc/
(mma_pm): Likewise.
(mma_pm): Likewise.
 
- Branch work163-dmf, patch #103 
+ Branch work163-dmf, patch #123 
 
 Add support for dense math registers.
 
@@ -184,7 +194,7 @@ both MMA without dense math and MMA with dense math:
 It is possible that the mangling for DMRs and the GDB register numbers may
 produce other changes in the future.
 
-2024-03-19   Michael Meissner  
+2024-03-22   Michael Meissner  
 
* config/rs6000/mma.md (movxo): Add comments about dense math registers.
(movxo_nodm): Rename from movxo and restrict the usage to machines
@@ -201,6 +211,8 @@ produce other changes in the future.
(accumulator_operand): Add support for dense math registers.
* config/rs6000/rs6000-builtin.cc (rs6000_gimple_fold_mma_builtin): Do
not de-prime accumulator when disassembling a vector quad.
+   * config/rs6000/rs6000-c.cc (rs6000_define_or_undefine_macro): Define
+   __DENSE_MATH__ if we have dense math registers.
* config/rs6000/rs6000.cc (enum rs6000_reg_type): Add DMR_REG_TYPE.
(enum rs6000_reload_reg_type): Add RELOAD_REG_DMR.
(LAST_RELOAD_REG_CLASS): Add support for DMR registers and the wD
@@ -239,8 +251,10 @@ produce other changes in the future.
(enum r6000_reg_class_enum): Add RS6000_CONSTRAINT_wD.
(REGISTER_NAMES): Add DMR registers.
(ADDITIONAL_REGISTER_NAMES): Likewise.
+   * config/rs6000/rs6000.md (FIRST_DMR_REGNO): New constant.
+   (LAST_DMR_REGNO): Likewise.
 
- Branch work163-dmf, patch #102 
+ Branch work163-dmf, patch #122 
 
 Add wD constraint.
 
@@ -249,7 +263,7 @@ that overlap with VSX registers 0..31 on power10.  Future 
patches will add the
 support for a separate accumulator register class that will be used when the
 support for dense math registes is added.
 
-2024-03-19   Michael Meissner  
+2024-03-22   Michael Meissner  
 
* config/rs6000/constraints.md (wD): New constraint.
* config/rs6000/mma.md (mma_disassemble_acc): Likewise.
@@ -276,7 +290,7 @@ support for dense math registes is added.
the 'wD' constraint.
* doc/md.texi (PowerPC constraints): Document the 'wD' constraint.
 
- Branch work163-dmf, patch #101 
+ Branch work163-dmf, patch #121 
 
 Use vector pair load/store for memcpy with -mcpu=future
 
@@ -285,7 +299,7 @@ vector pair and store vector pair instructions when 
optimizing things like
 memory copy.  This patch enables using those instructions if -mcpu=future is
 use

[gcc(refs/users/meissner/heads/work163-dmf)] Support load/store vector with right length.

2024-03-21 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:57f8bb746fcadd38e99dd9eee715f9ff15ad2822

commit 57f8bb746fcadd38e99dd9eee715f9ff15ad2822
Author: Michael Meissner 
Date:   Fri Mar 22 00:48:43 2024 -0400

Support load/store vector with right length.

This patch adds support for new instructions that may be added to the 
PowerPC
architecture in the future to enhance the load and store vector with length
instructions.

The current instructions (lxvl, lxvll, stxvl, and stxvll) are inconvient to 
use
since the count for the number of bytes must be in the top 8 bits of the GPR
register, instead of the bottom 8 bits.  This meant that code generating 
these
instructions typically had to do a shift left by 56 bits to get the count 
into
the right position.  In a future version of the PowerPC architecture, new
variants of these instructions might be added that expect the count to be in
the bottom 8 bits of the GPR register.  These patches add this support to 
GCC
if the user uses the -mcpu=future option.

I discovered that the code in rs6000-string.cc to generate ISA 3.1 
lxvl/stxvl
future lxvll/stxvll instructions would generate these instructions on 
32-bit.
However the patterns for these instructions is only done on 64-bit systems. 
 So
I added a check for 64-bit support before generating the instructions.

The patches have been tested on both little and big endian systems.  Can I 
check
it into the master branch?

2024-03-22   Michael Meissner  

gcc/

* config/rs6000/rs6000-string.cc (expand_block_move): Do not 
generate
lxvl and stxvl on 32-bit.
* config/rs6000/vsx.md (lxvl): If -mcpu=future, generate the lxvl 
with
the shift count automaticaly used in the insn.
(lxvrl): New insn for -mcpu=future.
(lxvrll): Likewise.
(stxvl): If -mcpu=future, generate the stxvl with the shift count
automaticaly used in the insn.
(stxvrl): New insn for -mcpu=future.
(stxvrll): Likewise.

gcc/testsuite/

* gcc.target/powerpc/lxvrl.c: New test.
* lib/target-supports.exp 
(check_effective_target_powerpc_future_ok):
New effective target.

Diff:
---
 gcc/config/rs6000/rs6000-string.cc   |   1 +
 gcc/config/rs6000/vsx.md | 122 +--
 gcc/testsuite/gcc.target/powerpc/lxvrl.c |  32 
 gcc/testsuite/lib/target-supports.exp|  12 +++
 4 files changed, 146 insertions(+), 21 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-string.cc 
b/gcc/config/rs6000/rs6000-string.cc
index e74ccf41937..c6737e66cbe 100644
--- a/gcc/config/rs6000/rs6000-string.cc
+++ b/gcc/config/rs6000/rs6000-string.cc
@@ -2787,6 +2787,7 @@ expand_block_move (rtx operands[], bool might_overlap)
 
   if (TARGET_MMA && TARGET_BLOCK_OPS_UNALIGNED_VSX
  && TARGET_BLOCK_OPS_VECTOR_PAIR
+ && TARGET_POWERPC64
  && bytes >= 32
  && (align >= 256 || !STRICT_ALIGNMENT))
{
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index f135fa079bd..9520191e613 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -5629,20 +5629,32 @@
   DONE;
 })
 
-;; Load VSX Vector with Length
+;; Load VSX Vector with Length.  If we have lxvrl, we don't have to do an
+;; explicit shift left into a pseudo.
 (define_expand "lxvl"
-  [(set (match_dup 3)
-(ashift:DI (match_operand:DI 2 "register_operand")
-   (const_int 56)))
-   (set (match_operand:V16QI 0 "vsx_register_operand")
-   (unspec:V16QI
-[(match_operand:DI 1 "gpc_reg_operand")
-  (mem:V16QI (match_dup 1))
- (match_dup 3)]
-UNSPEC_LXVL))]
+  [(use (match_operand:V16QI 0 "vsx_register_operand"))
+   (use (match_operand:DI 1 "gpc_reg_operand"))
+   (use (match_operand:DI 2 "gpc_reg_operand"))]
   "TARGET_P9_VECTOR && TARGET_64BIT"
 {
-  operands[3] = gen_reg_rtx (DImode);
+  rtx shift_len = gen_rtx_ASHIFT (DImode, operands[2], GEN_INT (56));
+  rtx len;
+
+  if (TARGET_FUTURE)
+len = shift_len;
+  else
+{
+  len = gen_reg_rtx (DImode);
+  emit_insn (gen_rtx_SET (len, shift_len));
+}
+
+  rtx dest = operands[0];
+  rtx addr = operands[1];
+  rtx mem = gen_rtx_MEM (V16QImode, addr);
+  rtvec rv = gen_rtvec (3, addr, mem, len);
+  rtx lxvl = gen_rtx_UNSPEC (V16QImode, rv, UNSPEC_LXVL);
+  emit_insn (gen_rtx_SET (dest, lxvl));
+  DONE;
 })
 
 (define_insn "*lxvl"
@@ -5666,6 +5678,34 @@
   "lxvll %x0,%1,%2"
   [(set_attr "type" "vecload")])
 
+;; For lxvrl and lxvrll, use the combiner to eliminate the shift.  The
+;; define_expand for lxvl will already incorporate the shift in generating the
+;; insn.  The lxvll buitl-in function required the user to have already done
+;; the shift.  Defining lxvrll this way, will optimize cases where the user has
+;; done the shift immediately before the built-i

[gcc(refs/users/meissner/heads/work163-dmf)] Add saturating subtract built-ins.

2024-03-21 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:0376ff1f351466628aa8fd9f304c6e8c6e3cdb82

commit 0376ff1f351466628aa8fd9f304c6e8c6e3cdb82
Author: Michael Meissner 
Date:   Fri Mar 22 00:51:29 2024 -0400

Add saturating subtract built-ins.

This patch adds support for a saturating subtract built-in function that 
may be
added to a future PowerPC processor.  Note, if it is added, the name of the
built-in function may change before GCC 13 is released.  If the name 
changes,
we will submit a patch changing the name.

I also added support for providing dense math built-in functions, even 
though
at present, we have not added any new built-in functions for dense math.  
It is
likely we will want to add new dense math built-in functions as the dense 
math
support is fleshed out.

The patches have been tested on both little and big endian systems.  Can I 
check
it into the master branch?

2024-03-22   Michael Meissner  

gcc/

* config/rs6000/rs6000-builtin.cc (rs6000_invalid_builtin): Add 
support
for flagging invalid use of future built-in functions.
(rs6000_builtin_is_supported): Add support for future built-in
functions.
* config/rs6000/rs6000-builtins.def 
(__builtin_saturate_subtract32): New
built-in function for -mcpu=future.
(__builtin_saturate_subtract64): Likewise.
* config/rs6000/rs6000-gen-builtins.cc (enum bif_stanza): Add 
stanzas
for -mcpu=future built-ins.
(stanza_map): Likewise.
(enable_string): Likewise.
(struct attrinfo): Likewise.
(parse_bif_attrs): Likewise.
(write_decls): Likewise.
* config/rs6000/rs6000.md (sat_sub3): Add saturating subtract
built-in insn declarations.
(sat_sub3_dot): Likewise.
(sat_sub3_dot2): Likewise.
* doc/extend.texi (Future PowerPC built-ins): New section.

gcc/testsuite/

* gcc.target/powerpc/subfus-1.c: New test.
* gcc.target/powerpc/subfus-2.c: Likewise.

Diff:
---
 gcc/config/rs6000/rs6000-builtin.cc | 17 
 gcc/config/rs6000/rs6000-builtins.def   | 10 +
 gcc/config/rs6000/rs6000-gen-builtins.cc| 35 ++---
 gcc/config/rs6000/rs6000.md | 60 +
 gcc/doc/extend.texi | 24 
 gcc/testsuite/gcc.target/powerpc/subfus-1.c | 32 +++
 gcc/testsuite/gcc.target/powerpc/subfus-2.c | 32 +++
 7 files changed, 205 insertions(+), 5 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-builtin.cc 
b/gcc/config/rs6000/rs6000-builtin.cc
index 976a42a74cd..1af38698bf3 100644
--- a/gcc/config/rs6000/rs6000-builtin.cc
+++ b/gcc/config/rs6000/rs6000-builtin.cc
@@ -139,6 +139,17 @@ rs6000_invalid_builtin (enum rs6000_gen_builtins fncode)
 case ENB_MMA:
   error ("%qs requires the %qs option", name, "-mmma");
   break;
+case ENB_FUTURE:
+  error ("%qs requires the %qs option", name, "-mcpu=future");
+  break;
+case ENB_FUTURE_64:
+  error ("%qs requires the %qs option and either the %qs or %qs option",
+name, "-mcpu=future", "-m64", "-mpowerpc64");
+  break;
+case ENB_DM:
+  error ("%qs requires the %qs or %qs options", name, "-mcpu=future",
+"-mdense-math");
+  break;
 default:
 case ENB_ALWAYS:
   gcc_unreachable ();
@@ -194,6 +205,12 @@ rs6000_builtin_is_supported (enum rs6000_gen_builtins 
fncode)
   return TARGET_HTM;
 case ENB_MMA:
   return TARGET_MMA;
+case ENB_FUTURE:
+  return TARGET_FUTURE;
+case ENB_FUTURE_64:
+  return TARGET_FUTURE && TARGET_POWERPC64;
+case ENB_DM:
+  return TARGET_DENSE_MATH;
 default:
   gcc_unreachable ();
 }
diff --git a/gcc/config/rs6000/rs6000-builtins.def 
b/gcc/config/rs6000/rs6000-builtins.def
index 3bc7fed6956..437ab0e09e9 100644
--- a/gcc/config/rs6000/rs6000-builtins.def
+++ b/gcc/config/rs6000/rs6000-builtins.def
@@ -139,6 +139,8 @@
 ;   endian   Needs special handling for endianness
 ;   ibmldRestrict usage to the case when TFmode is IBM-128
 ;   ibm128   Restrict usage to the case where __ibm128 is supported or if ibmld
+;   future   Restrict usage to future instructions
+;   dm   Restrict usage to dense math
 ;
 ; Each attribute corresponds to extra processing required when
 ; the built-in is expanded.  All such special processing should
@@ -4131,3 +4133,11 @@
 
   void __builtin_vsx_stxvp (v256, unsigned long, const v256 *);
 STXVP nothing {mma,pair}
+
+[future]
+  const signed int __builtin_saturate_subtract32 (signed int, signed int);
+  SAT_SUBSI sat_subsi3 {}
+
+[future-64]
+  const signed long __builtin_saturate_subtract64 (signed long,  signed long);
+  SAT_SUBDI sat_subdi3 {}
diff --git a/gcc/config/rs6000/rs6000-gen-builtins.cc 
b/gcc/config/rs6000/rs

[gcc(refs/users/meissner/heads/work163-dmf)] Add paddis support.

2024-03-21 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:f453411169f9aaaf02b04c2c5cf843a608df8173

commit f453411169f9aaaf02b04c2c5cf843a608df8173
Author: Michael Meissner 
Date:   Fri Mar 22 00:56:43 2024 -0400

Add paddis support.

2024-03-22  Michael Meissner  

gcc/

* config/rs6000/constraints.md (eU): New constraint.
(eV): Likewise.
* config/rs6000/predicates.md (paddis_operand): New predicate.
(paddis_paddi_operand): Likewise.
(add_operand): Add paddis support.
* config/rs6000/rs6000-cpus.def (ISA_FUTURE_MASKS): Add -mpaddis
support.
(POWERPC_MASKS): Likewise.
* config/rs6000/rs6000.cc (num_insns_constant_gpr): Add -mpaddis
support.
(num_insns_constant_multi): Likewise.
(print_operand): Add %B for paddis support.
(rs6000_opt_masks): Add -mpaddis.
& config/rs6000/rs6000.h (SIGNED_INTEGER_32BIT_P): New macro.
* config/rs6000/rs6000.md (isa attribute): Add -mpaddis support.
(enabled attribute); Likewise.
(add3): Likewise.
(adddi3 splitter): New splitter for paddis.
(movdi_internal64): Add -mpaddis support.
(movdi splitter): New splitter for -mpaddis.
* config/rs6000/rs6000.opt (-mpaddis): New switch.

Diff:
---
 gcc/config/rs6000/constraints.md | 10 +
 gcc/config/rs6000/predicates.md  | 52 +-
 gcc/config/rs6000/rs6000.cc  | 40 +
 gcc/config/rs6000/rs6000.h   |  1 +
 gcc/config/rs6000/rs6000.md  | 96 ++--
 gcc/config/rs6000/rs6000.opt |  4 ++
 6 files changed, 189 insertions(+), 14 deletions(-)

diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md
index 277a30a8245..4d8d21fd6bb 100644
--- a/gcc/config/rs6000/constraints.md
+++ b/gcc/config/rs6000/constraints.md
@@ -222,6 +222,16 @@
   "An IEEE 128-bit constant that can be loaded into VSX registers."
   (match_operand 0 "easy_vector_constant_ieee128"))
 
+(define_constraint "eU"
+  "@internal integer constant that can be loaded with paddis"
+  (and (match_code "const_int")
+   (match_operand 0 "paddis_operand")))
+
+(define_constraint "eV"
+  "@internal integer constant that can be loaded with paddis + paddi"
+  (and (match_code "const_int")
+   (match_operand 0 "paddis_paddi_operand")))
+
 ;; Floating-point constraints.  These two are defined so that insn
 ;; length attributes can be calculated exactly.
 
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index b325000690b..0b7c0bf4b0f 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -369,6 +369,53 @@
   return SIGNED_INTEGER_34BIT_P (INTVAL (op));
 })
 
+;; Return 1 if op is a 64-bit constant that uses the paddis instruction
+(define_predicate "paddis_operand"
+  (match_code "const_int")
+{
+  if (!TARGET_PADDIS && TARGET_POWERPC64)
+return 0;
+
+  /* If addi, addis, or paddi can handle the number, don't return true.  */
+  HOST_WIDE_INT value = INTVAL (op);
+  if (SIGNED_INTEGER_34BIT_P (value))
+return false;
+
+  /* If the number is too large for padds, return false.  */
+  if (!SIGNED_INTEGER_32BIT_P (value >> 32))
+return false;
+
+  /* If the bottom 32-bits are non-zero, paddis can't handle it.  */
+  if ((value & HOST_WIDE_INT_C(0x)) != 0)
+return false;
+
+  return true;
+})
+
+;; Return 1 if op is a 64-bit constant that needs the paddis instruction and an
+;; addi/addis/paddi instruction combination.
+(define_predicate "paddis_paddi_operand"
+  (match_code "const_int")
+{
+  if (!TARGET_PADDIS && TARGET_POWERPC64)
+return 0;
+
+  /* If addi, addis, or paddi can handle the number, don't return true.  */
+  HOST_WIDE_INT value = INTVAL (op);
+  if (SIGNED_INTEGER_34BIT_P (value))
+return false;
+
+  /* If the number is too large for padds, return false.  */
+  if (!SIGNED_INTEGER_32BIT_P (value >> 32))
+return false;
+
+  /* If the bottom 32-bits are zero, we can use paddis alone to handle it.  */
+  if ((value & HOST_WIDE_INT_C(0x)) == 0)
+return false;
+
+  return true;
+})
+
 ;; Return 1 if op is a register that is not special.
 ;; Disallow (SUBREG:SF (REG:SI)) and (SUBREG:SI (REG:SF)) on VSX systems where
 ;; you need to be careful in moving a SFmode to SImode and vice versa due to
@@ -1050,7 +1097,10 @@
   (if_then_else (match_code "const_int")
 (match_test "satisfies_constraint_I (op)
 || satisfies_constraint_L (op)
-|| satisfies_constraint_eI (op)")
+|| satisfies_constraint_eI (op)
+|| satisfies_constraint_eU (op)
+|| satisfies_constraint_eV (op)")
+
 (match_operand 0 "gpc_reg_operand")))
 
 ;; Return 1 if the operand is either a non-special register, or 0, or -1.
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs600

[gcc(refs/users/meissner/heads/work163-dmf)] Update ChangeLog.*

2024-03-21 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:ae0e0f7725093cfc154ea376e6da9ac652624d45

commit ae0e0f7725093cfc154ea376e6da9ac652624d45
Author: Michael Meissner 
Date:   Fri Mar 22 00:58:36 2024 -0400

Update ChangeLog.*

Diff:
---
 gcc/ChangeLog.dmf | 122 ++
 1 file changed, 122 insertions(+)

diff --git a/gcc/ChangeLog.dmf b/gcc/ChangeLog.dmf
index edc0448b14f..1d1ae3c7d2d 100644
--- a/gcc/ChangeLog.dmf
+++ b/gcc/ChangeLog.dmf
@@ -1,3 +1,125 @@
+ Branch work163-dmf, patch #133 
+
+Add paddis support.
+
+2024-03-22  Michael Meissner  
+
+gcc/
+
+   * config/rs6000/constraints.md (eU): New constraint.
+   (eV): Likewise.
+   * config/rs6000/predicates.md (paddis_operand): New predicate.
+   (paddis_paddi_operand): Likewise.
+   (add_operand): Add paddis support.
+   * config/rs6000/rs6000-cpus.def (ISA_FUTURE_MASKS): Add -mpaddis
+   support.
+   (POWERPC_MASKS): Likewise.
+   * config/rs6000/rs6000.cc (num_insns_constant_gpr): Add -mpaddis
+   support.
+   (num_insns_constant_multi): Likewise.
+   (print_operand): Add %B for paddis support.
+   (rs6000_opt_masks): Add -mpaddis.
+   & config/rs6000/rs6000.h (SIGNED_INTEGER_32BIT_P): New macro.
+   * config/rs6000/rs6000.md (isa attribute): Add -mpaddis support.
+   (enabled attribute); Likewise.
+   (add3): Likewise.
+   (adddi3 splitter): New splitter for paddis.
+   (movdi_internal64): Add -mpaddis support.
+   (movdi splitter): New splitter for -mpaddis.
+   * config/rs6000/rs6000.opt (-mpaddis): New switch.
+
+ Branch work163-dmf, patch #132 
+
+Add saturating subtract built-ins.
+
+This patch adds support for a saturating subtract built-in function that may be
+added to a future PowerPC processor.  Note, if it is added, the name of the
+built-in function may change before GCC 13 is released.  If the name changes,
+we will submit a patch changing the name.
+
+I also added support for providing dense math built-in functions, even though
+at present, we have not added any new built-in functions for dense math.  It is
+likely we will want to add new dense math built-in functions as the dense math
+support is fleshed out.
+
+The patches have been tested on both little and big endian systems.  Can I 
check
+it into the master branch?
+
+2024-03-22   Michael Meissner  
+
+gcc/
+
+   * config/rs6000/rs6000-builtin.cc (rs6000_invalid_builtin): Add support
+   for flagging invalid use of future built-in functions.
+   (rs6000_builtin_is_supported): Add support for future built-in
+   functions.
+   * config/rs6000/rs6000-builtins.def (__builtin_saturate_subtract32): New
+   built-in function for -mcpu=future.
+   (__builtin_saturate_subtract64): Likewise.
+   * config/rs6000/rs6000-gen-builtins.cc (enum bif_stanza): Add stanzas
+   for -mcpu=future built-ins.
+   (stanza_map): Likewise.
+   (enable_string): Likewise.
+   (struct attrinfo): Likewise.
+   (parse_bif_attrs): Likewise.
+   (write_decls): Likewise.
+   * config/rs6000/rs6000.md (sat_sub3): Add saturating subtract
+   built-in insn declarations.
+   (sat_sub3_dot): Likewise.
+   (sat_sub3_dot2): Likewise.
+   * doc/extend.texi (Future PowerPC built-ins): New section.
+
+gcc/testsuite/
+
+   * gcc.target/powerpc/subfus-1.c: New test.
+   * gcc.target/powerpc/subfus-2.c: Likewise.
+
+ Branch work163-dmf, patch #131 
+
+Support load/store vector with right length.
+
+This patch adds support for new instructions that may be added to the PowerPC
+architecture in the future to enhance the load and store vector with length
+instructions.
+
+The current instructions (lxvl, lxvll, stxvl, and stxvll) are inconvient to use
+since the count for the number of bytes must be in the top 8 bits of the GPR
+register, instead of the bottom 8 bits.  This meant that code generating these
+instructions typically had to do a shift left by 56 bits to get the count into
+the right position.  In a future version of the PowerPC architecture, new
+variants of these instructions might be added that expect the count to be in
+the bottom 8 bits of the GPR register.  These patches add this support to GCC
+if the user uses the -mcpu=future option.
+
+I discovered that the code in rs6000-string.cc to generate ISA 3.1 lxvl/stxvl
+future lxvll/stxvll instructions would generate these instructions on 32-bit.
+However the patterns for these instructions is only done on 64-bit systems.  So
+I added a check for 64-bit support before generating the instructions.
+
+The patches have been tested on both little and big endian systems.  Can I 
check
+it into the master branch?
+
+2024-03-22   Michael Meissner  
+
+gcc/
+
+   * config/rs6000/rs6000-string.cc (expand_block_move): Do not generate
+   lxvl and stxvl on 32-bit.

[gcc r14-9607] RISC-V: Don't add fractional LMUL types to V_VLS for XTheadVector

2024-03-21 Thread Christoph Mテシllner via Gcc-cvs
https://gcc.gnu.org/g:fd5e5dda8d79d62396f56d4fdd628b4bc5f9fa24

commit r14-9607-gfd5e5dda8d79d62396f56d4fdd628b4bc5f9fa24
Author: Christoph Müllner 
Date:   Thu Mar 21 15:40:49 2024 +0100

RISC-V: Don't add fractional LMUL types to V_VLS for XTheadVector

The expansion of `memset` (via expand_builtin_memset_args())
uses clear_by_pieces() and store_by_pieces() to avoid calls
to the C runtime. To check if a type can be used for that purpose
the function by_pieces_mode_supported_p() tests if a `mov` and
a `vec_duplicate` INSN can be expaned by the backend.

The `vec_duplicate` expansion takes arguments of type `V_VLS`.
The `mov` expansions take arguments of type `V`, `VB`, `VT`,
`VLS_AVL_IMM`, and `VLS_AVL_REG`. Some of these types (in fact
not types but type iterators) include fractional LMUL types.
E.g. `V_VLS` includes `V`, which includes `VI`, which includes
`RVVMF2QI`.

This results in an attempt to use fractional LMUL-types for
the `memset` expansion resulting in an ICE for XTheadVector,
because that extension cannot handle fractional LMULs.

This patch addresses this issue by splitting the definition
of the `VI` mode itereator into `VI_NOFRAC` (without fractional
LMUL types) and `VI_FRAC` (only fractional LMUL types).
Further, it defines `V_VLS` such, that `VI_FRAC` types are only
included if XTheadVector is not enabled.

The effect is demonstrated by a new test case that shows
that the by-pieces framework now emits `sb` instructions
instead of triggering an ICE.

Signed-off-by: Christoph Müllner 

PR target/114194

gcc/ChangeLog:

* config/riscv/vector-iterators.md: Split VI into VI_FRAC and 
VI_NOFRAC.
Only include VI_NOFRAC in V_VLS without TARGET_XTHEADVECTOR.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/xtheadvector/pr114194.c: New test.

Signed-off-by: Christoph Müllner 

Diff:
---
 gcc/config/riscv/vector-iterators.md   | 19 +---
 .../gcc.target/riscv/rvv/xtheadvector/pr114194.c   | 56 ++
 2 files changed, 69 insertions(+), 6 deletions(-)

diff --git a/gcc/config/riscv/vector-iterators.md 
b/gcc/config/riscv/vector-iterators.md
index c2ea7e8b10a..a24e1bf078f 100644
--- a/gcc/config/riscv/vector-iterators.md
+++ b/gcc/config/riscv/vector-iterators.md
@@ -108,17 +108,24 @@
   UNSPECV_FRM_RESTORE_EXIT
 ])
 
-(define_mode_iterator VI [
-  RVVM8QI RVVM4QI RVVM2QI RVVM1QI RVVMF2QI RVVMF4QI (RVVMF8QI "TARGET_MIN_VLEN 
> 32")
-
-  RVVM8HI RVVM4HI RVVM2HI RVVM1HI RVVMF2HI (RVVMF4HI "TARGET_MIN_VLEN > 32")
-
-  RVVM8SI RVVM4SI RVVM2SI RVVM1SI (RVVMF2SI "TARGET_MIN_VLEN > 32")
+;; Subset of VI with fractional LMUL types
+(define_mode_iterator VI_FRAC [
+  RVVMF2QI RVVMF4QI (RVVMF8QI "TARGET_MIN_VLEN > 32")
+  RVVMF2HI (RVVMF4HI "TARGET_MIN_VLEN > 32")
+  (RVVMF2SI "TARGET_MIN_VLEN > 32")
+])
 
+;; Subset of VI with non-fractional LMUL types
+(define_mode_iterator VI_NOFRAC [
+  RVVM8QI RVVM4QI RVVM2QI RVVM1QI
+  RVVM8HI RVVM4HI RVVM2HI RVVM1HI
+  RVVM8SI RVVM4SI RVVM2SI RVVM1SI
   (RVVM8DI "TARGET_VECTOR_ELEN_64") (RVVM4DI "TARGET_VECTOR_ELEN_64")
   (RVVM2DI "TARGET_VECTOR_ELEN_64") (RVVM1DI "TARGET_VECTOR_ELEN_64")
 ])
 
+(define_mode_iterator VI [ VI_NOFRAC (VI_FRAC "!TARGET_XTHEADVECTOR") ])
+
 ;; This iterator is the same as above but with TARGET_VECTOR_ELEN_FP_16
 ;; changed to TARGET_ZVFH.  TARGET_VECTOR_ELEN_FP_16 is also true for
 ;; TARGET_ZVFHMIN while we actually want to disable all instructions apart
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/pr114194.c 
b/gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/pr114194.c
new file mode 100644
index 000..fc2d1349425
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/pr114194.c
@@ -0,0 +1,56 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gc_xtheadvector" { target { rv32 } } } */
+/* { dg-options "-march=rv64gc_xtheadvector" { target { rv64 } } } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+/*
+** foo0_1:
+** sb\tzero,0([a-x0-9]+)
+** ret
+*/
+void foo0_1 (void *p)
+{
+  __builtin_memset (p, 0, 1);
+}
+
+/*
+** foo0_7:
+** sb\tzero,0([a-x0-9]+)
+** sb\tzero,1([a-x0-9]+)
+** sb\tzero,2([a-x0-9]+)
+** sb\tzero,3([a-x0-9]+)
+** sb\tzero,4([a-x0-9]+)
+** sb\tzero,5([a-x0-9]+)
+** sb\tzero,6([a-x0-9]+)
+** ret
+*/
+void foo0_7 (void *p)
+{
+  __builtin_memset (p, 0, 7);
+}
+
+/*
+** foo1_1:
+** li\t[a-x0-9]+,1
+** sb\t[a-x0-9]+,0([a-x0-9]+)
+** ret
+*/
+void foo1_1 (void *p)
+{
+  __builtin_memset (p, 1, 1);
+}
+
+/*
+** foo1_5:
+** li\t[a-x0-9]+,1
+** sb\t[a-x0-9]+,0([a-x0-9]+)
+** sb\t[a-x0-9]+,1([a-x0-9]+)
+** sb\t[a-x0-9]+,2([a-x0-9]+)
+** sb\t[a-x0-9]+,3([a-x0-9]+)
+** sb\t[a-x0-9]+,4([a-x0-9]+)
+** ret
+*/
+void foo1_5 (void *p)
+{
+  __builtin_memset (p, 1, 5);
+}