[gcc r15-3014] Fix ICE in recompute_tree_invariant_for_addr_expr, at tree.c:4535 [PR84244]

2024-08-19 Thread Andre Vehreschild via Gcc-cvs
https://gcc.gnu.org/g:661acde60ef4e9ac5a9e48be18770fb3a9aeb9a5

commit r15-3014-g661acde60ef4e9ac5a9e48be18770fb3a9aeb9a5
Author: Andre Vehreschild 
Date:   Thu Jul 11 15:44:56 2024 +0200

Fix ICE in recompute_tree_invariant_for_addr_expr, at tree.c:4535 [PR84244]

Declaring an unused function with a derived type having a pointer
component and using that derived type as a coarray, lead the compiler to
ICE because the caf_token for the pointer was not linked into the
component correctly.

PR fortran/84244

gcc/fortran/ChangeLog:

* trans-types.cc (gfc_get_derived_type): When a caf_sub_token is
generated for a component, link it to the component it is
generated for (the previous one).

gcc/testsuite/ChangeLog:

* gfortran.dg/coarray/ptr_comp_5.f08: New test.

Diff:
---
 gcc/fortran/trans-types.cc   |  6 +-
 gcc/testsuite/gfortran.dg/coarray/ptr_comp_5.f08 | 19 +++
 2 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/gcc/fortran/trans-types.cc b/gcc/fortran/trans-types.cc
index e6da8e1a58b..bc582085f57 100644
--- a/gcc/fortran/trans-types.cc
+++ b/gcc/fortran/trans-types.cc
@@ -2661,7 +2661,7 @@ gfc_get_derived_type (gfc_symbol * derived, int codimen)
   tree *chain = NULL;
   bool got_canonical = false;
   bool unlimited_entity = false;
-  gfc_component *c;
+  gfc_component *c, *last_c = nullptr;
   gfc_namespace *ns;
   tree tmp;
   bool coarray_flag, class_coarray_flag;
@@ -2961,10 +2961,14 @@ gfc_get_derived_type (gfc_symbol * derived, int codimen)
 types.  */
   if (class_coarray_flag || !c->backend_decl)
c->backend_decl = field;
+  if (c->attr.caf_token && last_c)
+   last_c->caf_token = field;
 
   if (c->attr.pointer && (c->attr.dimension || c->attr.codimension)
  && !(c->ts.type == BT_DERIVED && strcmp (c->name, "_data") == 0))
GFC_DECL_PTR_ARRAY_P (c->backend_decl) = 1;
+
+  last_c = c;
 }
 
   /* Now lay out the derived type, including the fields.  */
diff --git a/gcc/testsuite/gfortran.dg/coarray/ptr_comp_5.f08 
b/gcc/testsuite/gfortran.dg/coarray/ptr_comp_5.f08
new file mode 100644
index 000..ed3a8db13fa
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/coarray/ptr_comp_5.f08
@@ -0,0 +1,19 @@
+! { dg-do compile }
+
+! Check PR84244 does not ICE anymore.
+
+program ptr_comp_5
+  integer, target :: dest = 42
+  type t
+integer, pointer :: p
+  end type
+  type(t) :: o[*]
+
+  o%p => dest
+contains
+  ! This unused routine is crucial for the ICE.
+  function f(x)
+type(t), intent(in) ::x
+  end function
+end program
+


[gcc r15-3015] testsuite: Reduce cut-&-paste in scanltranstree.exp

2024-08-19 Thread Richard Sandiford via Gcc-cvs
https://gcc.gnu.org/g:71059d268f567b664e74101c8be7b58441f15d29

commit r15-3015-g71059d268f567b664e74101c8be7b58441f15d29
Author: Richard Sandiford 
Date:   Mon Aug 19 09:40:33 2024 +0100

testsuite: Reduce cut-&-paste in scanltranstree.exp

scanltranstree.exp defines some LTO wrappers around standard
non-LTO scanners.  Four of them are cut-&-paste variants of
one another, so this patch generates them from a single template.
It also does the same for scan-ltrans-tree-dump-times, so that
other *-times scanners can be added easily in future.

The scanners seem to be lightly used.  gcc.dg/ipa/ipa-icf-38.c uses
scan-ltrans-tree-dump{,-not} and libgomp.c/declare-variant-1.c
uses scan-ltrans-tree-dump-{not,times}.  Nothing currently seems
to use scan-ltrans-tree-dump-dem*.

gcc/testsuite/
* lib/scanltranstree.exp: Redefine the routines using two
templates.

Diff:
---
 gcc/testsuite/lib/scanltranstree.exp | 186 ---
 1 file changed, 62 insertions(+), 124 deletions(-)

diff --git a/gcc/testsuite/lib/scanltranstree.exp 
b/gcc/testsuite/lib/scanltranstree.exp
index 79f05f0ffed..bc6e02dc369 100644
--- a/gcc/testsuite/lib/scanltranstree.exp
+++ b/gcc/testsuite/lib/scanltranstree.exp
@@ -19,130 +19,68 @@
 
 load_lib scandump.exp
 
-# Utility for scanning compiler result, invoked via dg-final.
-# Call pass if pattern is present, otherwise fail.
-#
-# Argument 0 is the regexp to match
-# Argument 1 is the name of the dumped tree pass
-# Argument 2 handles expected failures and the like
-proc scan-ltrans-tree-dump { args } {
-
-if { [llength $args] < 2 } {
-   error "scan-ltrans-tree-dump: too few arguments"
-   return
-}
-if { [llength $args] > 3 } {
-   error "scan-ltrans-tree-dump: too many arguments"
-   return
-}
-if { [llength $args] >= 3 } {
-   scan-dump "ltrans-tree" [lindex $args 0] \
- "\[0-9\]\[0-9\]\[0-9\]t.[lindex $args 1]" ".ltrans0.ltrans" \
- [lindex $args 2]
-} else {
-   scan-dump "ltrans-tree" [lindex $args 0] \
- "\[0-9\]\[0-9\]\[0-9\]t.[lindex $args 1]" ".ltrans0.ltrans"
-}
-}
-
-# Call pass if pattern is present given number of times, otherwise fail.
-# Argument 0 is the regexp to match
-# Argument 1 is number of times the regexp must be found
-# Argument 2 is the name of the dumped tree pass
-# Argument 3 handles expected failures and the like
-proc scan-ltrans-tree-dump-times { args } {
-
-if { [llength $args] < 3 } {
-   error "scan-ltrans-tree-dump-times: too few arguments"
-   return
-}
-if { [llength $args] > 4 } {
-   error "scan-ltrans-tree-dump-times: too many arguments"
-   return
-}
-if { [llength $args] >= 4 } {
-   scan-dump-times "ltrans-tree" [lindex $args 0] [lindex $args 1] \
-   "\[0-9\]\[0-9\]\[0-9\]t.[lindex $args 2]" \
-   ".ltrans0.ltrans" [lindex $args 3]
-} else {
-   scan-dump-times "ltrans-tree" [lindex $args 0] [lindex $args 1] \
-   "\[0-9\]\[0-9\]\[0-9\]t.[lindex $args 2]" 
".ltrans0.ltrans"
-}
+# The first item in the list is an LTO equivalent of the second item
+# in the list; see the documentation of the second item for details.
+foreach { name scan type suffix } {
+scan-ltrans-tree-dump scan-dump ltrans-tree t
+scan-ltrans-tree-dump-not scan-dump-not ltrans-tree t
+scan-ltrans-tree-dump-dem scan-dump-dem ltrans-tree t
+scan-ltrans-tree-dump-dem-not scan-dump-dem-not ltrans-tree t
+} {
+eval [string map [list @NAME@ $name \
+  @SCAN@ $scan \
+  @TYPE@ $type \
+  @SUFFIX@ $suffix] {
+proc @NAME@ { args } {
+   if { [llength $args] < 2 } {
+   error "@NAME@: too few arguments"
+   return
+   }
+   if { [llength $args] > 3 } {
+   error "@NAME@: too many arguments"
+   return
+   }
+   if { [llength $args] >= 3 } {
+   @SCAN@ @TYPE@ [lindex $args 0] \
+   "\[0-9\]\[0-9\]\[0-9\]@SUFFIX@.[lindex $args 1]" \
+   ".ltrans0.ltrans" \
+   [lindex $args 2]
+   } else {
+   @SCAN@ @TYPE@ [lindex $args 0] \
+   "\[0-9\]\[0-9\]\[0-9\]@SUFFIX@.[lindex $args 1]" \
+   ".ltrans0.ltrans"
+   }
+}
+}]
 }
 
-# Call pass if pattern is not present, otherwise fail.
-#
-# Argument 0 is the regexp to match
-# Argument 1 is the name of the dumped tree pass
-# Argument 2 handles expected failures and the like
-proc scan-ltrans-tree-dump-not { args } {
-
-if { [llength $args] < 2 } {
-   error "scan-ltrans-tree-dump-not: too few arguments"
-   return
-}
-if { [llength $args] > 3 } {
-   error "scan-ltrans-tree-dump-not: too many arguments"

[gcc r15-3016] rtl: Enable the use of rtx values with int and mode attributes

2024-08-19 Thread Andre Simoes Dias Vieira via Gcc-cvs
https://gcc.gnu.org/g:e57d3cce4e4fdf50fa59b807ea43c8b14c5c1711

commit r15-3016-ge57d3cce4e4fdf50fa59b807ea43c8b14c5c1711
Author: Andre Vieira 
Date:   Mon Aug 19 09:38:41 2024 +0100

rtl: Enable the use of rtx values with int and mode attributes

The 'code' part of a 'define_code_attr' refers to the type of the key, in 
other
words, it uses a code_iterator to pick the 'value' from their (key "value") 
pair
list.

However, rtx_alloc_for_name requires a code_attribute to be used when the
'value' needs to be a type. In other words, no other type of attributes 
could be
used, before this patch, to produce a rtx typed 'value'.

This patch removes that restriction and allows the backend to use any kind 
of
attribute as long as that attribute always produces a valid code typed 
'value'.

gcc/ChangeLog:

* read-rtl.cc (rtx_reader::rtx_alloc_for_name): Allow all attribute
types to produce code 'values'.
(check_code_attribute): Rename ...
(check_attribute_codes): ... to this.  And change comments to refer 
to
* doc/md.texi: Add paragraph to document that you can use int and 
mode
attributes to produce codes.

Diff:
---
 gcc/doc/md.texi |  5 +
 gcc/read-rtl.cc | 21 ++---
 2 files changed, 19 insertions(+), 7 deletions(-)

diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index 5dc0d55edd6..a9259112251 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -12077,6 +12077,11 @@ while the unsigned version uses @code{umax} and 
@code{umin}.  There
 are no versions that pair @code{smax} with @code{umin} or @code{umax}
 with @code{smin}.
 
+It is also possible to use other types of attributes as codes,
+in a similar way.  For example, an int iterator could be used to
+iterate over @code{unspec} numbers, with an int attribute specifying
+an associated rtx code.  @xref{Int Iterators}.
+
 Here's an example of code iterators in action, taken from the MIPS port:
 
 @smallexample
diff --git a/gcc/read-rtl.cc b/gcc/read-rtl.cc
index 4f09e449c81..bfce806f9d6 100644
--- a/gcc/read-rtl.cc
+++ b/gcc/read-rtl.cc
@@ -1423,21 +1423,21 @@ check_code_iterator (struct mapping *iterator)
consistent format.  Return a representative code.  */
 
 static rtx_code
-check_code_attribute (mapping *attr)
+check_attribute_codes (mapping *attr)
 {
   rtx_code bellwether = UNKNOWN;
   for (map_value *v = attr->values; v != 0; v = v->next)
 {
   rtx_code code = maybe_find_code (v->string);
   if (code == UNKNOWN)
-   fatal_with_file_and_line ("code attribute `%s' contains "
+   fatal_with_file_and_line ("attribute `%s' contains "
  "unrecognized rtx code `%s'",
  attr->name, v->string);
   if (bellwether == UNKNOWN)
bellwether = code;
   else if (strcmp (GET_RTX_FORMAT (bellwether),
   GET_RTX_FORMAT (code)) != 0)
-   fatal_with_file_and_line ("code attribute `%s' combines "
+   fatal_with_file_and_line ("attribute `%s' combines "
  "`%s' and `%s', which have different "
  "rtx formats", attr->name,
  GET_RTX_NAME (bellwether),
@@ -1604,7 +1604,7 @@ parse_reg_note_name (const char *string)
   fatal_with_file_and_line ("unrecognized REG_NOTE name: `%s'", string);
 }
 
-/* Allocate an rtx for code NAME.  If NAME is a code iterator or code
+/* Allocate an rtx for code NAME.  If NAME is a code iterator or an
attribute, record its use for later and use one of its possible
values as an interim rtx code.  */
 
@@ -1627,13 +1627,20 @@ rtx_reader::rtx_alloc_for_name (const char *name)
attr = deferred_name;
 
   /* Find the attribute itself.  */
-  mapping *m = (mapping *) htab_find (codes.attrs, &attr);
+  mapping *m = nullptr;
+  for (auto attrs : { codes.attrs, ints.attrs, modes.attrs })
+   if (auto *newm = (mapping *) htab_find (attrs, &attr))
+{
+  if (m)
+fatal_with_file_and_line ("ambiguous attribute `%s`", attr);
+  m = newm;
+}
   if (!m)
-   fatal_with_file_and_line ("unknown code attribute `%s'", attr);
+   fatal_with_file_and_line ("unknown attribute `%s'", attr);
 
   /* Pick the first possible code for now, and record the attribute
 use for later.  */
-  rtx x = rtx_alloc (check_code_attribute (m));
+  rtx x = rtx_alloc (check_attribute_codes (m));
   record_attribute_use (&codes, get_current_location (),
x, 0, deferred_name);
   return x;


[gcc r15-3017] testsuite: Prune warning about size of enums

2024-08-19 Thread Torbjorn Svensson via Gcc-cvs
https://gcc.gnu.org/g:6d8b9b772e0b3969e6b3fcf0363d6afcce2e65c9

commit r15-3017-g6d8b9b772e0b3969e6b3fcf0363d6afcce2e65c9
Author: Torbjörn SVENSSON 
Date:   Mon Aug 19 10:44:44 2024 +0200

testsuite: Prune warning about size of enums

This fixes reported regression at
https://linaro.atlassian.net/browse/GNU-1315.

gcc/testsuite/ChangeLog:

* g++.dg/warn/pr33738-2.C: dg-prune arm linker messages about
size of enums.

Signed-off-by: Torbjörn SVENSSON 

Diff:
---
 gcc/testsuite/g++.dg/warn/pr33738-2.C | 1 +
 1 file changed, 1 insertion(+)

diff --git a/gcc/testsuite/g++.dg/warn/pr33738-2.C 
b/gcc/testsuite/g++.dg/warn/pr33738-2.C
index 84bbdaeecc7..1ab121893ee 100644
--- a/gcc/testsuite/g++.dg/warn/pr33738-2.C
+++ b/gcc/testsuite/g++.dg/warn/pr33738-2.C
@@ -1,4 +1,5 @@
 // { dg-do run }
+// { dg-prune-output "use of enum values across objects may fail" }
 // { dg-options "-O2 -Wtype-limits -fstrict-enums -fshort-enums" }
 extern void link_error (void);


[gcc r14-10602] testsuite: Prune warning about size of enums

2024-08-19 Thread Torbjorn Svensson via Gcc-cvs
https://gcc.gnu.org/g:af97b5eb023756c4a00c8c3327395f3d069d7e26

commit r14-10602-gaf97b5eb023756c4a00c8c3327395f3d069d7e26
Author: Torbjörn SVENSSON 
Date:   Mon Aug 19 10:44:44 2024 +0200

testsuite: Prune warning about size of enums

This fixes reported regression at
https://linaro.atlassian.net/browse/GNU-1315.

gcc/testsuite/ChangeLog:

* g++.dg/warn/pr33738-2.C: dg-prune arm linker messages about
size of enums.

Signed-off-by: Torbjörn SVENSSON 
(cherry picked from commit 6d8b9b772e0b3969e6b3fcf0363d6afcce2e65c9)

Diff:
---
 gcc/testsuite/g++.dg/warn/pr33738-2.C | 1 +
 1 file changed, 1 insertion(+)

diff --git a/gcc/testsuite/g++.dg/warn/pr33738-2.C 
b/gcc/testsuite/g++.dg/warn/pr33738-2.C
index 84bbdaeecc7..1ab121893ee 100644
--- a/gcc/testsuite/g++.dg/warn/pr33738-2.C
+++ b/gcc/testsuite/g++.dg/warn/pr33738-2.C
@@ -1,4 +1,5 @@
 // { dg-do run }
+// { dg-prune-output "use of enum values across objects may fail" }
 // { dg-options "-O2 -Wtype-limits -fstrict-enums -fshort-enums" }
 extern void link_error (void);


[gcc r15-3018] aarch64: Reduce FP reassociation width for Neoverse V2 and set AARCH64_EXTRA_TUNE_FULLY_PIPELINED_FM

2024-08-19 Thread Kyrylo Tkachov via Gcc-cvs
https://gcc.gnu.org/g:cc572242688f0c6f8733c173038163efb09560fa

commit r15-3018-gcc572242688f0c6f8733c173038163efb09560fa
Author: Kyrylo Tkachov 
Date:   Fri Aug 2 06:48:47 2024 -0700

aarch64: Reduce FP reassociation width for Neoverse V2 and set 
AARCH64_EXTRA_TUNE_FULLY_PIPELINED_FMA

The fp reassociation width for Neoverse V2 was set to 6 since its
introduction and I guess it was empirically tuned.  But since
AARCH64_EXTRA_TUNE_FULLY_PIPELINED_FMA was added the tree reassociation
pass seems to be more deliberate in forming FMAs and when that flag is
used it seems to more properly evaluate the FMA vs non-FMA reassociation
widths.
According to the Neoverse V2 SWOG the core has a throughput of 4 for
most FP operations, so the value 6 is not accurate anyway.
Also, the SWOG does state that FMADD operations are pipelined and the
results can be forwarded from FP multiplies to the accumulation operands
of FMADD instructions, which seems to be what
AARCH64_EXTRA_TUNE_FULLY_PIPELINED_FMA expresses.

This patch sets the fp_reassoc_width field to 4 and enables
AARCH64_EXTRA_TUNE_FULLY_PIPELINED_FMA for -mcpu=neoverse-v2.

On SPEC2017 fprate I see the following changes on a Grace system:
503.bwaves_r0.16%
507.cactuBSSN_r -0.32%
508.namd_r  3.04%
510.parest_r0.00%
511.povray_r0.78%
519.lbm_r   0.35%
521.wrf_r   0.69%
526.blender_r   -0.53%
527.cam4_r  0.84%
538.imagick_r   0.00%
544.nab_r   -0.97%
549.fotonik3d_r -0.45%
554.roms_r  0.97%
Geomean 0.35%

with -Ofast -mcpu=grace -flto.

So slight overall improvement with a meaningful improvement in
508.namd_r.

I think other tunings in aarch64 should look into
AARCH64_EXTRA_TUNE_FULLY_PIPELINED_FMA as well, but I'll leave the
benchmarking to someone else.

Signed-off-by: Kyrylo Tkachov 

gcc/ChangeLog:

* config/aarch64/tuning_models/neoversev2.h (fp_reassoc_width):
Set to 4.
(tune_flags): Add AARCH64_EXTRA_TUNE_FULLY_PIPELINED_FMA.

Diff:
---
 gcc/config/aarch64/tuning_models/neoversev2.h | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/gcc/config/aarch64/tuning_models/neoversev2.h 
b/gcc/config/aarch64/tuning_models/neoversev2.h
index 1ebb96b296d..52aad7d4a43 100644
--- a/gcc/config/aarch64/tuning_models/neoversev2.h
+++ b/gcc/config/aarch64/tuning_models/neoversev2.h
@@ -231,7 +231,7 @@ static const struct tune_params neoversev2_tunings =
   "4", /* jump_align.  */
   "32:16", /* loop_align.  */
   3,   /* int_reassoc_width.  */
-  6,   /* fp_reassoc_width.  */
+  4,   /* fp_reassoc_width.  */
   4,   /* fma_reassoc_width.  */
   3,   /* vec_reassoc_width.  */
   2,   /* min_div_recip_mul_sf.  */
@@ -242,10 +242,11 @@ static const struct tune_params neoversev2_tunings =
| AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS
| AARCH64_EXTRA_TUNE_USE_NEW_VECTOR_COSTS
| AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT
-   | AARCH64_EXTRA_TUNE_AVOID_PRED_RMW),   /* tune_flags.  */
+   | AARCH64_EXTRA_TUNE_AVOID_PRED_RMW
+   | AARCH64_EXTRA_TUNE_FULLY_PIPELINED_FMA),  /* tune_flags.  */
   &generic_prefetch_tune,
   AARCH64_LDP_STP_POLICY_ALWAYS,   /* ldp_policy_model.  */
   AARCH64_LDP_STP_POLICY_ALWAYS   /* stp_policy_model.  */
 };
 
-#endif /* GCC_AARCH64_H_NEOVERSEV2.  */
\ No newline at end of file
+#endif /* GCC_AARCH64_H_NEOVERSEV2.  */


[gcc r15-3019] gnat: fix lto-type-mismatch between C_Version_String and gnat_version_string [PR115917]

2024-08-19 Thread Arsen Arsenovic via Gcc-cvs
https://gcc.gnu.org/g:9cbcf8d1de159e6113fafb5dc2feb4a7e467a302

commit r15-3019-g9cbcf8d1de159e6113fafb5dc2feb4a7e467a302
Author: Arsen Arsenović 
Date:   Thu Aug 15 19:17:41 2024 +0200

gnat: fix lto-type-mismatch between C_Version_String and 
gnat_version_string [PR115917]

gcc/ada/ChangeLog:

PR ada/115917
* gnatvsn.ads: Add note about the duplication of this value in
version.c.
* version.c (VER_LEN_MAX): Define to the same value as
Gnatvsn.Ver_Len_Max.
(gnat_version_string): Use VER_LEN_MAX as bound.

Diff:
---
 gcc/ada/gnatvsn.ads | 3 ++-
 gcc/ada/version.c   | 5 -
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/gcc/ada/gnatvsn.ads b/gcc/ada/gnatvsn.ads
index 6cf170dc4ca..ca7744b9767 100644
--- a/gcc/ada/gnatvsn.ads
+++ b/gcc/ada/gnatvsn.ads
@@ -83,7 +83,8 @@ package Gnatvsn is
--  space to store any possible version string value for checks. This
--  value should never be decreased in the future, but it would be
--  OK to increase it if absolutely necessary. If it is increased,
-   --  be sure to increase GNAT.Compiler.Version.Ver_Len_Max as well.
+   --  be sure to increase GNAT.Compiler.Version.Ver_Len_Max, and to update
+   --  the VER_LEN_MAX define in version.c as well.
 
Ver_Prefix : constant String := "GNAT Version: ";
--  Prefix generated by binder. If it is changed, be sure to change
diff --git a/gcc/ada/version.c b/gcc/ada/version.c
index 5e64edd0b17..2fa9b8c2c85 100644
--- a/gcc/ada/version.c
+++ b/gcc/ada/version.c
@@ -31,4 +31,7 @@
 
 #include "version.h"
 
-char gnat_version_string[] = version_string;
+/* Logically a reference to Gnatvsn.Ver_Len_Max.  Please keep in sync.  */
+#define VER_LEN_MAX 256
+
+char gnat_version_string[VER_LEN_MAX] = version_string;


[gcc r15-3020] Allow coarrays in select type. [PR46371, PR56496]

2024-08-19 Thread Andre Vehreschild via Gcc-cvs
https://gcc.gnu.org/g:8871489c5162067c72a9b9ab05fe2179560e9986

commit r15-3020-g8871489c5162067c72a9b9ab05fe2179560e9986
Author: Andre Vehreschild 
Date:   Thu Aug 15 20:23:23 2024 +0200

Allow coarrays in select type. [PR46371, PR56496]

Fix ICE when scalar coarrays are used in a select type. Prevent
coindexing in associate/select type/select rank selector expression.

gcc/fortran/ChangeLog:

PR fortran/46371
PR fortran/56496

* expr.cc (gfc_is_coindexed): Detect is coindexed also when
rewritten to caf_get.
* trans-stmt.cc (trans_associate_var): Always accept a
descriptor for coarrays.

gcc/testsuite/ChangeLog:

* gfortran.dg/coarray/select_type_1.f90: New test.
* gfortran.dg/coarray/select_type_2.f90: New test.
* gfortran.dg/coarray/select_type_3.f90: New test.

Diff:
---
 gcc/fortran/expr.cc|  4 +++
 gcc/fortran/trans-stmt.cc  | 10 ++-
 .../gfortran.dg/coarray/select_type_1.f90  | 34 ++
 .../gfortran.dg/coarray/select_type_2.f90  | 19 
 .../gfortran.dg/coarray/select_type_3.f90  | 23 +++
 5 files changed, 83 insertions(+), 7 deletions(-)

diff --git a/gcc/fortran/expr.cc b/gcc/fortran/expr.cc
index d3a1f8c0ba1..4f2d80c04f8 100644
--- a/gcc/fortran/expr.cc
+++ b/gcc/fortran/expr.cc
@@ -5803,6 +5803,10 @@ gfc_is_coindexed (gfc_expr *e)
 {
   gfc_ref *ref;
 
+  if (e->expr_type == EXPR_FUNCTION && e->value.function.isym
+  && e->value.function.isym->id == GFC_ISYM_CAF_GET)
+e = e->value.function.actual->expr;
+
   for (ref = e->ref; ref; ref = ref->next)
 if (ref->type == REF_ARRAY && ref->u.ar.codimen > 0)
   return !gfc_ref_this_image (ref);
diff --git a/gcc/fortran/trans-stmt.cc b/gcc/fortran/trans-stmt.cc
index 3b09a139dc0..023b1739b85 100644
--- a/gcc/fortran/trans-stmt.cc
+++ b/gcc/fortran/trans-stmt.cc
@@ -2200,16 +2200,12 @@ trans_associate_var (gfc_symbol *sym, gfc_wrapped_block 
*block)
  else
stmp = gfc_class_data_get (ctmp);
 
- /* Coarray scalar component expressions can emerge from
-the front end as array elements of the _data field.  */
- if (GFC_DESCRIPTOR_TYPE_P (TREE_TYPE (stmp)))
-   stmp = gfc_conv_descriptor_data_get (stmp);
-
- if (!POINTER_TYPE_P (TREE_TYPE (stmp)))
+ if (!CLASS_DATA (sym)->attr.codimension
+ && !POINTER_TYPE_P (TREE_TYPE (stmp)))
stmp = gfc_build_addr_expr (NULL, stmp);
 
  dtmp = gfc_class_data_get (ctree);
- stmp = fold_convert (TREE_TYPE (dtmp), stmp);
+ stmp = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (dtmp), stmp);
  gfc_add_modify (&se.pre, dtmp, stmp);
  stmp = gfc_class_vptr_get (ctmp);
  dtmp = gfc_class_vptr_get (ctree);
diff --git a/gcc/testsuite/gfortran.dg/coarray/select_type_1.f90 
b/gcc/testsuite/gfortran.dg/coarray/select_type_1.f90
new file mode 100644
index 000..7f12fb9aec7
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/coarray/select_type_1.f90
@@ -0,0 +1,34 @@
+!{ dg-do run }
+
+! Check PR46371 is fixed.
+! Contributed by Tobias Burnus  
+
+program pr46371
+  type :: foo
+integer :: i = 0
+  end type
+
+  class(foo), allocatable :: o_foo[:]
+  integer :: j
+
+  allocate(foo :: o_foo[*])
+  if (this_image() == 1) then
+
+select type(a => o_foo)
+  type is(foo)
+  j = a[1]%i
+  a[1]%i = 3
+end select
+
+if (j /= 0) stop 1
+
+select type(o_foo)
+  type is(foo)
+  j = o_foo[1]%i
+end select
+
+if (o_foo[1]%i /= 3) stop 2
+if (j /= 3) stop 3
+  end if
+end program pr46371
+
diff --git a/gcc/testsuite/gfortran.dg/coarray/select_type_2.f90 
b/gcc/testsuite/gfortran.dg/coarray/select_type_2.f90
new file mode 100644
index 000..1694d095708
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/coarray/select_type_2.f90
@@ -0,0 +1,19 @@
+!{ dg-do compile }
+
+! Check PR46371 is fixed.
+! Contributed by Tobias Burnus  
+
+program pr46371
+  type :: foo
+integer :: i = 0
+  end type
+
+  class(foo), allocatable :: o_foo[:]
+  integer :: j
+
+  select type(a => o_foo[2])  !{ dg-error "must not be coindexed" }
+type is(foo)
+j = a%i
+  end select
+end program pr46371
+
diff --git a/gcc/testsuite/gfortran.dg/coarray/select_type_3.f90 
b/gcc/testsuite/gfortran.dg/coarray/select_type_3.f90
new file mode 100644
index 000..50f27893ccc
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/coarray/select_type_3.f90
@@ -0,0 +1,23 @@
+!{ dg-do run }
+
+! Check pr56496 is fixed.
+! Contributed by Tobias Burnus  
+
+program pr56496
+
+  class(*), allocatable :: a[:]
+
+  allocate(integer :: a[*])
+  select type(a)
+type is (integer)
+  a= 5
+

[gcc r15-3023] aarch64: Fix tme intrinsic availability

2024-08-19 Thread Andrew Carlotti via Gcc-cvs
https://gcc.gnu.org/g:32afbb604b4958e78428006b10b3ca5e9ccd49f5

commit r15-3023-g32afbb604b4958e78428006b10b3ca5e9ccd49f5
Author: Andrew Carlotti 
Date:   Thu Oct 26 15:43:44 2023 +0100

aarch64: Fix tme intrinsic availability

The availability of tme intrinsics was previously gated at both
initialisation time (using global target options) and usage time
(accounting for function-specific target options).  This patch removes
the check at initialisation time, and also moves the intrinsics out of
the header file to allow for better error messages (matching the
existing error messages for SVE intrinsics).

gcc/ChangeLog:

PR target/112108
* config/aarch64/aarch64-builtins.cc (aarch64_init_tme_builtins):
Define intrinsic names directly.
(aarch64_general_init_builtins): Move tme initialisation...
(handle_arm_acle_h): ...to here, and remove feature check.
(aarch64_general_check_builtin_call): Check tme intrinsics.
* config/aarch64/arm_acle.h (__tstart, __tcommit, __tcancel)
(__ttest): Remove.
(_TMFAILURE_*): Define unconditionally.

gcc/testsuite/ChangeLog:

PR target/112108
* gcc.target/aarch64/acle/tme_guard-1.c: New test.
* gcc.target/aarch64/acle/tme_guard-2.c: New test.
* gcc.target/aarch64/acle/tme_guard-3.c: New test.
* gcc.target/aarch64/acle/tme_guard-4.c: New test.

Diff:
---
 gcc/config/aarch64/aarch64-builtins.cc | 57 +-
 gcc/config/aarch64/arm_acle.h  | 36 +-
 .../gcc.target/aarch64/acle/tme_guard-1.c  |  9 
 .../gcc.target/aarch64/acle/tme_guard-2.c  | 10 
 .../gcc.target/aarch64/acle/tme_guard-3.c  |  9 
 .../gcc.target/aarch64/acle/tme_guard-4.c  | 10 
 6 files changed, 72 insertions(+), 59 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-builtins.cc 
b/gcc/config/aarch64/aarch64-builtins.cc
index a07adcee6e2..60e4c217921 100644
--- a/gcc/config/aarch64/aarch64-builtins.cc
+++ b/gcc/config/aarch64/aarch64-builtins.cc
@@ -1791,21 +1791,17 @@ aarch64_init_tme_builtins (void)
 = build_function_type_list (void_type_node, uint64_type_node, NULL);
 
   aarch64_builtin_decls[AARCH64_TME_BUILTIN_TSTART]
-= aarch64_general_add_builtin ("__builtin_aarch64_tstart",
-  ftype_uint64_void,
-  AARCH64_TME_BUILTIN_TSTART);
+= aarch64_general_simulate_builtin ("__tstart", ftype_uint64_void,
+   AARCH64_TME_BUILTIN_TSTART);
   aarch64_builtin_decls[AARCH64_TME_BUILTIN_TTEST]
-= aarch64_general_add_builtin ("__builtin_aarch64_ttest",
-  ftype_uint64_void,
-  AARCH64_TME_BUILTIN_TTEST);
+= aarch64_general_simulate_builtin ("__ttest", ftype_uint64_void,
+   AARCH64_TME_BUILTIN_TTEST);
   aarch64_builtin_decls[AARCH64_TME_BUILTIN_TCOMMIT]
-= aarch64_general_add_builtin ("__builtin_aarch64_tcommit",
-  ftype_void_void,
-  AARCH64_TME_BUILTIN_TCOMMIT);
+= aarch64_general_simulate_builtin ("__tcommit", ftype_void_void,
+   AARCH64_TME_BUILTIN_TCOMMIT);
   aarch64_builtin_decls[AARCH64_TME_BUILTIN_TCANCEL]
-= aarch64_general_add_builtin ("__builtin_aarch64_tcancel",
-  ftype_void_uint64,
-  AARCH64_TME_BUILTIN_TCANCEL);
+= aarch64_general_simulate_builtin ("__tcancel", ftype_void_uint64,
+   AARCH64_TME_BUILTIN_TCANCEL);
 }
 
 /* Add builtins for Random Number instructions.  */
@@ -2068,6 +2064,7 @@ handle_arm_acle_h (void)
 {
   if (TARGET_LS64)
 aarch64_init_ls64_builtins ();
+  aarch64_init_tme_builtins ();
 }
 
 /* Initialize fpsr fpcr getters and setters.  */
@@ -2160,9 +2157,6 @@ aarch64_general_init_builtins (void)
   if (!TARGET_ILP32)
 aarch64_init_pauth_hint_builtins ();
 
-  if (TARGET_TME)
-aarch64_init_tme_builtins ();
-
   if (TARGET_MEMTAG)
 aarch64_init_memtag_builtins ();
 
@@ -2285,6 +2279,7 @@ aarch64_general_check_builtin_call (location_t location, 
vec,
unsigned int code, tree fndecl,
unsigned int nargs ATTRIBUTE_UNUSED, tree *args)
 {
+  tree decl = aarch64_builtin_decls[code];
   switch (code)
 {
 case AARCH64_RSR:
@@ -2297,15 +2292,29 @@ aarch64_general_check_builtin_call (location_t 
location, vec,
 case AARCH64_WSR64:
 case AARCH64_WSRF:
 case AARCH64_WSRF64:
-  tree addr = STRIP_NOPS (args[0]);
-  if (TREE_CODE (TREE_TYPE (addr)) != POINTER_TYPE
- || TREE_CODE (addr) != ADDR_EXPR
- || TREE_CODE (TREE_OPERAND (addr, 0)) !

[gcc r15-3024] aarch64: Fix memtag intrinsic availability

2024-08-19 Thread Andrew Carlotti via Gcc-cvs
https://gcc.gnu.org/g:4e1b617b35631df4dd6089d4044aa19d0c1adea7

commit r15-3024-g4e1b617b35631df4dd6089d4044aa19d0c1adea7
Author: Andrew Carlotti 
Date:   Tue Jul 18 20:09:38 2023 +0100

aarch64: Fix memtag intrinsic availability

The availability of memtag intrinsics and data types were determined
solely by the globally specified architecture features, which did not
reflect any changes specified in target pragmas or attributes.

This patch removes the initialisation-time guards for the intrinsics,
and replaces them with checks at use time. It also removes the macro
indirection from the header file - this simplifies the header, and
allows the missing extension error reporting to find the user-facing
intrinsic names.

gcc/ChangeLog:

PR target/112108
* config/aarch64/aarch64-builtins.cc (aarch64_init_memtag_builtins):
Define intrinsic names directly.
(aarch64_general_init_builtins): Move memtag intialisation...
(handle_arm_acle_h): ...to here, and remove feature check.
(aarch64_general_check_builtin_call): Check memtag intrinsics.
* config/aarch64/arm_acle.h (__arm_mte_create_random_tag)
(__arm_mte_exclude_tag, __arm_mte_ptrdiff)
(__arm_mte_increment_tag, __arm_mte_set_tag, __arm_mte_get_tag):
Remove.

gcc/testsuite/ChangeLog:

PR target/112108
* gcc.target/aarch64/acle/memtag_guard-1.c: New test.
* gcc.target/aarch64/acle/memtag_guard-2.c: New test.
* gcc.target/aarch64/acle/memtag_guard-3.c: New test.
* gcc.target/aarch64/acle/memtag_guard-4.c: New test.

Diff:
---
 gcc/config/aarch64/aarch64-builtins.cc | 23 --
 gcc/config/aarch64/arm_acle.h  | 23 --
 .../gcc.target/aarch64/acle/memtag_guard-1.c   |  9 +
 .../gcc.target/aarch64/acle/memtag_guard-2.c   | 10 ++
 .../gcc.target/aarch64/acle/memtag_guard-3.c   |  9 +
 .../gcc.target/aarch64/acle/memtag_guard-4.c   | 10 ++
 6 files changed, 51 insertions(+), 33 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-builtins.cc 
b/gcc/config/aarch64/aarch64-builtins.cc
index 60e4c217921..9c6d9ec7537 100644
--- a/gcc/config/aarch64/aarch64-builtins.cc
+++ b/gcc/config/aarch64/aarch64-builtins.cc
@@ -1932,27 +1932,27 @@ aarch64_init_memtag_builtins (void)
 
 #define AARCH64_INIT_MEMTAG_BUILTINS_DECL(F, N, I, T) \
   aarch64_builtin_decls[AARCH64_MEMTAG_BUILTIN_##F] \
-= aarch64_general_add_builtin ("__builtin_aarch64_memtag_"#N, \
-  T, AARCH64_MEMTAG_BUILTIN_##F); \
+= aarch64_general_simulate_builtin ("__arm_mte_"#N, T, \
+   AARCH64_MEMTAG_BUILTIN_##F); \
   aarch64_memtag_builtin_data[AARCH64_MEMTAG_BUILTIN_##F - \
  AARCH64_MEMTAG_BUILTIN_START - 1] = \
{T, CODE_FOR_##I};
 
   fntype = build_function_type_list (ptr_type_node, ptr_type_node,
 uint64_type_node, NULL);
-  AARCH64_INIT_MEMTAG_BUILTINS_DECL (IRG, irg, irg, fntype);
+  AARCH64_INIT_MEMTAG_BUILTINS_DECL (IRG, create_random_tag, irg, fntype);
 
   fntype = build_function_type_list (uint64_type_node, ptr_type_node,
 uint64_type_node, NULL);
-  AARCH64_INIT_MEMTAG_BUILTINS_DECL (GMI, gmi, gmi, fntype);
+  AARCH64_INIT_MEMTAG_BUILTINS_DECL (GMI, exclude_tag, gmi, fntype);
 
   fntype = build_function_type_list (ptrdiff_type_node, ptr_type_node,
 ptr_type_node, NULL);
-  AARCH64_INIT_MEMTAG_BUILTINS_DECL (SUBP, subp, subp, fntype);
+  AARCH64_INIT_MEMTAG_BUILTINS_DECL (SUBP, ptrdiff, subp, fntype);
 
   fntype = build_function_type_list (ptr_type_node, ptr_type_node,
 unsigned_type_node, NULL);
-  AARCH64_INIT_MEMTAG_BUILTINS_DECL (INC_TAG, inc_tag, addg, fntype);
+  AARCH64_INIT_MEMTAG_BUILTINS_DECL (INC_TAG, increment_tag, addg, fntype);
 
   fntype = build_function_type_list (void_type_node, ptr_type_node, NULL);
   AARCH64_INIT_MEMTAG_BUILTINS_DECL (SET_TAG, set_tag, stg, fntype);
@@ -2065,6 +2065,7 @@ handle_arm_acle_h (void)
   if (TARGET_LS64)
 aarch64_init_ls64_builtins ();
   aarch64_init_tme_builtins ();
+  aarch64_init_memtag_builtins ();
 }
 
 /* Initialize fpsr fpcr getters and setters.  */
@@ -2157,9 +2158,6 @@ aarch64_general_init_builtins (void)
   if (!TARGET_ILP32)
 aarch64_init_pauth_hint_builtins ();
 
-  if (TARGET_MEMTAG)
-aarch64_init_memtag_builtins ();
-
   if (in_lto_p)
 handle_arm_acle_h ();
 }
@@ -2316,7 +2314,12 @@ aarch64_general_check_builtin_call (location_t location, 
vec,
 default:
   break;
 }
-  /* Default behavior.  */
+
+  if (code >= AARCH64_MEMTAG_BUILTIN_START
+  && code <= AARCH64_MEMTAG_BUILTIN

[gcc r15-3021] aarch64: Refactor check_required_extensions

2024-08-19 Thread Andrew Carlotti via Gcc-cvs
https://gcc.gnu.org/g:a4b39dc4bfad2b224cd2041568d469b5724f8f88

commit r15-3021-ga4b39dc4bfad2b224cd2041568d469b5724f8f88
Author: Andrew Carlotti 
Date:   Tue Aug 13 16:15:11 2024 +0100

aarch64: Refactor check_required_extensions

Replace TARGET_GENERAL_REGS_ONLY check with an explicit check that
aarch64_isa_flags enables all required extensions.  This will be more
flexible when repurposing this function for non-SVE intrinsics.

gcc/ChangeLog:

* config/aarch64/aarch64-sve-builtins.cc
(check_required_registers): Remove target check and rename to...
(report_missing_registers): ...this.
(check_required_extensions): Refactor.

Diff:
---
 gcc/config/aarch64/aarch64-sve-builtins.cc | 38 --
 1 file changed, 20 insertions(+), 18 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc 
b/gcc/config/aarch64/aarch64-sve-builtins.cc
index 0a560eaedca..1fe380dd1ef 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins.cc
@@ -1094,27 +1094,19 @@ report_missing_extension (location_t location, tree 
fndecl,
   reported_missing_extension_p = true;
 }
 
-/* Check whether the registers required by SVE function fndecl are available.
-   Report an error against LOCATION and return false if not.  */
-static bool
-check_required_registers (location_t location, tree fndecl)
+/* Report an error against LOCATION that the user has tried to use
+   function FNDECL when non-general registers are disabled.  */
+static void
+report_missing_registers (location_t location, tree fndecl)
 {
   /* Avoid reporting a slew of messages for a single oversight.  */
   if (reported_missing_registers_p)
-return false;
-
-  if (TARGET_GENERAL_REGS_ONLY)
-{
-  /* SVE registers are not usable when -mgeneral-regs-only option
-is specified.  */
-  error_at (location,
-   "ACLE function %qD is incompatible with the use of %qs",
-   fndecl, "-mgeneral-regs-only");
-  reported_missing_registers_p = true;
-  return false;
-}
+return;
 
-  return true;
+  error_at (location,
+   "ACLE function %qD is incompatible with the use of %qs",
+   fndecl, "-mgeneral-regs-only");
+  reported_missing_registers_p = true;
 }
 
 /* Check whether all the AARCH64_FL_* values in REQUIRED_EXTENSIONS are
@@ -1124,9 +1116,19 @@ static bool
 check_required_extensions (location_t location, tree fndecl,
   aarch64_feature_flags required_extensions)
 {
+  if ((required_extensions & ~aarch64_isa_flags) == 0)
+return true;
+
   auto missing_extensions = required_extensions & ~aarch64_asm_isa_flags;
+
   if (missing_extensions == 0)
-return check_required_registers (location, fndecl);
+{
+  /* All required extensions are enabled in aarch64_asm_isa_flags, so the
+error must be the use of general-regs-only.  */
+  report_missing_registers (location, fndecl);
+  return false;
+}
+
 
   if (missing_extensions & AARCH64_FL_SM_OFF)
 {


[gcc r15-3022] aarch64: Move check_required_extensions

2024-08-19 Thread Andrew Carlotti via Gcc-cvs
https://gcc.gnu.org/g:baf71ec56b40858c5b2a4cc8481403685d753477

commit r15-3022-gbaf71ec56b40858c5b2a4cc8481403685d753477
Author: Andrew Carlotti 
Date:   Tue Jul 18 16:40:58 2023 +0100

aarch64: Move check_required_extensions

Move SVE extension checking functionality to aarch64-builtins.cc, so
that it can be shared by non-SVE intrinsics.

gcc/ChangeLog:

* config/aarch64/aarch64-sve-builtins.cc (check_builtin_call)
(expand_builtin): Update calls to the below.
(report_missing_extension, report_missing_registers)
(check_required_extensions): Move out of aarch64_sve namespace,
rename, and move into...
* config/aarch64/aarch64-builtins.cc 
(aarch64_report_missing_extension)
(aarch64_report_missing_registers)
(aarch64_check_required_extensions) ...here.
* config/aarch64/aarch64-protos.h 
(aarch64_check_required_extensions):
Add prototype.

Diff:
---
 gcc/config/aarch64/aarch64-builtins.cc | 100 +++
 gcc/config/aarch64/aarch64-protos.h|   2 +
 gcc/config/aarch64/aarch64-sve-builtins.cc | 107 ++---
 3 files changed, 106 insertions(+), 103 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-builtins.cc 
b/gcc/config/aarch64/aarch64-builtins.cc
index 30669f8aa18..a07adcee6e2 100644
--- a/gcc/config/aarch64/aarch64-builtins.cc
+++ b/gcc/config/aarch64/aarch64-builtins.cc
@@ -2180,6 +2180,106 @@ aarch64_general_builtin_decl (unsigned code, bool)
   return aarch64_builtin_decls[code];
 }
 
+/* True if we've already complained about attempts to use functions
+   when the required extension is disabled.  */
+static bool reported_missing_extension_p;
+
+/* True if we've already complained about attempts to use functions
+   which require registers that are missing.  */
+static bool reported_missing_registers_p;
+
+/* Report an error against LOCATION that the user has tried to use
+   function FNDECL when extension EXTENSION is disabled.  */
+static void
+aarch64_report_missing_extension (location_t location, tree fndecl,
+ const char *extension)
+{
+  /* Avoid reporting a slew of messages for a single oversight.  */
+  if (reported_missing_extension_p)
+return;
+
+  error_at (location, "ACLE function %qD requires ISA extension %qs",
+   fndecl, extension);
+  inform (location, "you can enable %qs using the command-line"
+ " option %<-march%>, or by using the %"
+ " attribute or pragma", extension);
+  reported_missing_extension_p = true;
+}
+
+/* Report an error against LOCATION that the user has tried to use
+   function FNDECL when non-general registers are disabled.  */
+static void
+aarch64_report_missing_registers (location_t location, tree fndecl)
+{
+  /* Avoid reporting a slew of messages for a single oversight.  */
+  if (reported_missing_registers_p)
+return;
+
+  error_at (location,
+   "ACLE function %qD is incompatible with the use of %qs",
+   fndecl, "-mgeneral-regs-only");
+  reported_missing_registers_p = true;
+}
+
+/* Check whether all the AARCH64_FL_* values in REQUIRED_EXTENSIONS are
+   enabled, given that those extensions are required for function FNDECL.
+   Report an error against LOCATION if not.  */
+bool
+aarch64_check_required_extensions (location_t location, tree fndecl,
+  aarch64_feature_flags required_extensions)
+{
+  if ((required_extensions & ~aarch64_isa_flags) == 0)
+return true;
+
+  auto missing_extensions = required_extensions & ~aarch64_asm_isa_flags;
+
+  if (missing_extensions == 0)
+{
+  /* All required extensions are enabled in aarch64_asm_isa_flags, so the
+error must be the use of general-regs-only.  */
+  aarch64_report_missing_registers (location, fndecl);
+  return false;
+}
+
+  if (missing_extensions & AARCH64_FL_SM_OFF)
+{
+  error_at (location, "ACLE function %qD cannot be called when"
+   " SME streaming mode is enabled", fndecl);
+  return false;
+}
+
+  if (missing_extensions & AARCH64_FL_SM_ON)
+{
+  error_at (location, "ACLE function %qD can only be called when"
+   " SME streaming mode is enabled", fndecl);
+  return false;
+}
+
+  if (missing_extensions & AARCH64_FL_ZA_ON)
+{
+  error_at (location, "ACLE function %qD can only be called from"
+   " a function that has %qs state", fndecl, "za");
+  return false;
+}
+
+  static const struct {
+aarch64_feature_flags flag;
+const char *name;
+  } extensions[] = {
+#define AARCH64_OPT_EXTENSION(EXT_NAME, IDENT, C, D, E, F) \
+{ AARCH64_FL_##IDENT, EXT_NAME },
+#include "aarch64-option-extensions.def"
+  };
+
+  for (unsigned int i = 0; i < ARRAY_SIZE (extensions); ++i)
+if (missing_extensions & extensions[i].flag)
+  {
+   aarch64_report_missing_extension (loca

[gcc r15-3025] aarch64: Fix ls64 intrinsic availability

2024-08-19 Thread Andrew Carlotti via Gcc-cvs
https://gcc.gnu.org/g:fceecc511d4918e2b27a0609f8885ec8aba8723d

commit r15-3025-gfceecc511d4918e2b27a0609f8885ec8aba8723d
Author: Andrew Carlotti 
Date:   Thu Oct 26 15:45:15 2023 +0100

aarch64: Fix ls64 intrinsic availability

The availability of ls64 intrinsics and data types were determined
solely by the globally specified architecture features, which did not
reflect any changes specified in target pragmas or attributes.

This patch removes the initialisation-time guards for the intrinsics,
and replaces them with checks at use time. We also get better error
messages when ls64 is not available (matching the existing error
messages for SVE intrinsics).

The data512_t type is made always available; this is consistent with the
present behaviour for Neon fp16/bf16 types.

gcc/ChangeLog:

PR target/112108
* config/aarch64/aarch64-builtins.cc (handle_arm_acle_h): Remove
feature check at initialisation.
(aarch64_general_check_builtin_call): Check ls64 intrinsics.
* config/aarch64/arm_acle.h: (data512_t) Make always available.

gcc/testsuite/ChangeLog:

PR target/112108
* gcc.target/aarch64/acle/ls64_guard-1.c: New test.
* gcc.target/aarch64/acle/ls64_guard-2.c: New test.
* gcc.target/aarch64/acle/ls64_guard-3.c: New test.
* gcc.target/aarch64/acle/ls64_guard-4.c: New test.

Diff:
---
 gcc/config/aarch64/aarch64-builtins.cc   | 10 --
 gcc/config/aarch64/arm_acle.h|  2 --
 gcc/testsuite/gcc.target/aarch64/acle/ls64_guard-1.c |  9 +
 gcc/testsuite/gcc.target/aarch64/acle/ls64_guard-2.c | 10 ++
 gcc/testsuite/gcc.target/aarch64/acle/ls64_guard-3.c |  9 +
 gcc/testsuite/gcc.target/aarch64/acle/ls64_guard-4.c | 10 ++
 6 files changed, 46 insertions(+), 4 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-builtins.cc 
b/gcc/config/aarch64/aarch64-builtins.cc
index 9c6d9ec7537..eb878b933fe 100644
--- a/gcc/config/aarch64/aarch64-builtins.cc
+++ b/gcc/config/aarch64/aarch64-builtins.cc
@@ -2062,8 +2062,7 @@ aarch64_init_data_intrinsics (void)
 void
 handle_arm_acle_h (void)
 {
-  if (TARGET_LS64)
-aarch64_init_ls64_builtins ();
+  aarch64_init_ls64_builtins ();
   aarch64_init_tme_builtins ();
   aarch64_init_memtag_builtins ();
 }
@@ -2311,6 +2310,13 @@ aarch64_general_check_builtin_call (location_t location, 
vec,
   return aarch64_check_required_extensions (location, decl,
AARCH64_FL_TME);
 
+case AARCH64_LS64_BUILTIN_LD64B:
+case AARCH64_LS64_BUILTIN_ST64B:
+case AARCH64_LS64_BUILTIN_ST64BV:
+case AARCH64_LS64_BUILTIN_ST64BV0:
+  return aarch64_check_required_extensions (location, decl,
+   AARCH64_FL_LS64);
+
 default:
   break;
 }
diff --git a/gcc/config/aarch64/arm_acle.h b/gcc/config/aarch64/arm_acle.h
index ab043267913..ab4e7e60e04 100644
--- a/gcc/config/aarch64/arm_acle.h
+++ b/gcc/config/aarch64/arm_acle.h
@@ -265,9 +265,7 @@ __crc32d (uint32_t __a, uint64_t __b)
 #define _TMFAILURE_INT0x0080u
 #define _TMFAILURE_TRIVIAL0x0100u
 
-#ifdef __ARM_FEATURE_LS64
 typedef __arm_data512_t data512_t;
-#endif
 
 #pragma GCC push_options
 #pragma GCC target ("+nothing+rng")
diff --git a/gcc/testsuite/gcc.target/aarch64/acle/ls64_guard-1.c 
b/gcc/testsuite/gcc.target/aarch64/acle/ls64_guard-1.c
new file mode 100644
index 000..7dfc193a293
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/acle/ls64_guard-1.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=armv8.6-a" } */
+
+#include 
+
+data512_t foo (void * p)
+{
+  return __arm_ld64b (p); /* { dg-error {ACLE function '__arm_ld64b' requires 
ISA extension 'ls64'} } */
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/acle/ls64_guard-2.c 
b/gcc/testsuite/gcc.target/aarch64/acle/ls64_guard-2.c
new file mode 100644
index 000..3ede05a81f0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/acle/ls64_guard-2.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=armv8.6-a" } */
+
+#include 
+
+#pragma GCC target("arch=armv8-a+ls64")
+data512_t foo (void * p)
+{
+  return __arm_ld64b (p);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/acle/ls64_guard-3.c 
b/gcc/testsuite/gcc.target/aarch64/acle/ls64_guard-3.c
new file mode 100644
index 000..e0fccdad7be
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/acle/ls64_guard-3.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=armv8-a+ls64 -mgeneral-regs-only" } */
+
+#include 
+
+data512_t foo (void * p)
+{
+  return __arm_ld64b (p);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/acle/ls64_guard-4.c 
b/gcc/testsuite/gcc.target/aarch64/acle/ls64_guard-4.c
new file mode 100644
index 000..af1d9a

[gcc/meissner/heads/work176] (80 commits) Merge commit 'refs/users/meissner/heads/work176' of git+ssh

2024-08-19 Thread Michael Meissner via Gcc-cvs
The branch 'meissner/heads/work176' was updated to point to:

 248fc70e14e... Merge commit 'refs/users/meissner/heads/work176' of git+ssh

It previously pointed to:

 04913bc20cb... Update ChangeLog.*

Diff:

Summary of changes (added commits):
---

  248fc70... Merge commit 'refs/users/meissner/heads/work176' of git+ssh
  03ac745... Update ChangeLog.*
  aa65901... Add -mcpu=future tuning support.
  3031a7c... Add support for -mcpu=future
  df189b6... Update ChangeLog.*
  9e3ab51... Update tests to work with architecture flags changes.
  255d09d... Change TARGET_MODULO to TARGET_POWER9
  2f2353e... Change TARGET_POPCNTD to TARGET_POWER7
  daa12c0... Change TARGET_CMPB to TARGET_POWER6
  9b740bf... Change TARGET_FPRND to TARGET_POWER5X
  d83a1cd... Change TARGET_POPCNTB to TARGET_POWER5
  eeab600... Do not allow -mvsx to boost processor to power7.
  5825848... Use architecture flags for defining _ARCH_PWR macros.
  7a80d6a... Add rs6000 architecture masks.
  b04aa92... Add ChangeLog.meissner and REVISION.
  fceecc5... aarch64: Fix ls64 intrinsic availability (*)
  4e1b617... aarch64: Fix memtag intrinsic availability (*)
  32afbb6... aarch64: Fix tme intrinsic availability (*)
  baf71ec... aarch64: Move check_required_extensions (*)
  a4b39dc... aarch64: Refactor check_required_extensions (*)
  8871489... Allow coarrays in select type. [PR46371, PR56496] (*)
  9cbcf8d... gnat: fix lto-type-mismatch between C_Version_String and gn (*)
  cc57224... aarch64: Reduce FP reassociation width for Neoverse V2 and  (*)
  6d8b9b7... testsuite: Prune warning about size of enums (*)
  e57d3cc... rtl: Enable the use of rtx values with int and mode attribu (*)
  71059d2... testsuite: Reduce cut-&-paste in scanltranstree.exp (*)
  661acde... Fix ICE in recompute_tree_invariant_for_addr_expr, at tree. (*)
  8d6c6fb... aarch64: Implement 16-byte vector mode const0 store by TImo (*)
  7f62e71... AVX10.2 ymm rounding: Support vsqrtp{s,d,h} and vsubp{s,d,h (*)
  1f86cf0... AVX10.2 ymm rounding: Support vscalefp{s,d,h} intrins (*)
  9afa508... AVX10.2 ymm rounding: Support vreducep{s,d,h} and vrndscale (*)
  90cc5b0... AVX10.2 ymm rounding: Support vmulp{s,d,h} and vrangep{s,d} (*)
  cc8a759... AVX10.2 ymm rounding: Support v{max,min}p{s,d,h} intrins (*)
  8d4f542... AVX10.2 ymm rounding: Support vgetexpp{s,d,h} and vgetmantp (*)
  0983d40... AVX10.2 ymm rounding: Support vfnmsub{132,231,213}p{s,d,h}  (*)
  6f0aa7a... AVX10.2 ymm rounding: Support vfmulcph and vfnmadd{132,231, (*)
  dd48acb... AVX10.2 ymm rounding: Support vfm{sub,subadd}{132,231,213}p (*)
  cfbc94e... AVX10.2 ymm rounding: Support vfmaddcph and vfmaddsub{132,2 (*)
  0683ca3... AVX10.2 ymm rounding: Support vfmadd{132,231,213}p{s,d,h} i (*)
  95980b2... AVX10.2 ymm rounding: Support vfc{madd,mul}cph, vfixupimmp{ (*)
  3d1b553... AVX10.2 ymm rounding: Support vcvt{,u}w2ph and vdivp{s,d,h} (*)
  b275422... AVX10.2 ymm rounding: Support vcvttps2{,u}{dq,qq} and vcvtu (*)
  493c509... AVX10.2 ymm rounding: Support vcvttph2{,u}{dq,qq,w} intrins (*)
  6e231f8... AVX10.2 ymm rounding: Support vcvtqq2p{s,d,h} and vcvttpd2{ (*)
  0f5a42d... AVX10.2 ymm rounding: Support vcvtps2{,u}{dq,qq} intrins (*)
  b70bb94... AVX10.2 ymm rounding: Support vcvtph2{,u}w and vcvtps2p{d,h (*)
  6f2eac5... AVX10.2 ymm rounding: Support vcvtph2p{s,d,sx} and vcvtph2{ (*)
  508ac49... AVX10.2 ymm rounding: Support vcvtpd2{,u}{dq,qq} intrins (*)
  85e874d... AVX10.2 ymm rounding: Support vcvtdq2p{s,h} and vcvtpd2p{s, (*)
  e22e3af... AVX10.2 ymm rounding: Support vadd{s,d,h} and vcmp{s,d,h} i (*)
  f11bc08... Daily bump. (*)
  f10d2ee... [PR rtl-optimization/115876] Avoid ubsan in ext-dce.cc (*)
  fc41263... libstdc++: Remove note from the GCC 4.0.1 days (*)
  b9ac01d... doc: Tweak gm2 mailing list address (*)
  cd2f394... PHIOPT: move factor_out_conditional_operation over to use g (*)
  1cfe4a4... libgfortran: implement fpu-macppc for Darwin, support IEEE  (*)
  1ed1dd5... AVR: Tweak 16-bit addition with const that didn't get a LD_ (*)
  22acd3c... AVR: ad target/116407 - Fix linker error "relocation trunca (*)
  dfb2e8c... AVR: target/116407 - Fix linker error "relocation truncated (*)
  3ae8794... forwprop: Also dce from added statements from gimple_simpli (*)
  a183b25... RISC-V: Implement the quad and oct .SAT_TRUNC for scalar (*)
  e8f31f4... RISC-V: Make sure high bits of usadd operands is clean for  (*)
  8d0efcf... RISC-V: Add testcases for unsigned scalar .SAT_TRUNC form 3 (*)
  6fbdbad... RISC-V: Add testcases for unsigned scalar .SAT_TRUNC form 2 (*)
  0555f65... Daily bump. (*)
  61e179b... [committed] Avoid right shifting signed value on ext-dce.cc (*)
  efcfd1d... t-rtems: add rv32imf architecture to the RTEMS multilib for (*)
  abfc140... Adjust v850 rotate expander to allow more cases for V850E3V (*)
  6d734ba... RISC-V: Fix ICE for vector single-width integer multiply-ad (*)
  7aed8de... [RISC-V][PR target/116282] Stabilize pattern conditions (

[gcc(refs/users/meissner/heads/work176)] Add ChangeLog.meissner and REVISION.

2024-08-19 Thread Michael Meissner via Libstdc++-cvs
https://gcc.gnu.org/g:b04aa92b92c588802feb39d2b0fe8efecb47ded6

commit b04aa92b92c588802feb39d2b0fe8efecb47ded6
Author: Michael Meissner 
Date:   Fri Aug 16 20:00:35 2024 -0400

Add ChangeLog.meissner and REVISION.

2024-08-16  Michael Meissner  

gcc/

* REVISION: New file for branch.
* ChangeLog.meissner: New file.

gcc/c-family/

* ChangeLog.meissner: New file.

gcc/c/

* ChangeLog.meissner: New file.

gcc/cp/

* ChangeLog.meissner: New file.

gcc/fortran/

* ChangeLog.meissner: New file.

gcc/testsuite/

* ChangeLog.meissner: New file.

libgcc/

* ChangeLog.meissner: New file.

Diff:
---
 gcc/ChangeLog.meissner   | 6 ++
 gcc/REVISION | 1 +
 gcc/c-family/ChangeLog.meissner  | 6 ++
 gcc/c/ChangeLog.meissner | 6 ++
 gcc/cp/ChangeLog.meissner| 6 ++
 gcc/fortran/ChangeLog.meissner   | 6 ++
 gcc/testsuite/ChangeLog.meissner | 6 ++
 libgcc/ChangeLog.meissner| 6 ++
 libstdc++-v3/ChangeLog.meissner  | 6 ++
 9 files changed, 49 insertions(+)

diff --git a/gcc/ChangeLog.meissner b/gcc/ChangeLog.meissner
new file mode 100644
index 000..6bd73d667fe
--- /dev/null
+++ b/gcc/ChangeLog.meissner
@@ -0,0 +1,6 @@
+ Branch work176, baseline 
+
+2024-08-16   Michael Meissner  
+
+   Clone branch
+
diff --git a/gcc/REVISION b/gcc/REVISION
new file mode 100644
index 000..9974885ad20
--- /dev/null
+++ b/gcc/REVISION
@@ -0,0 +1 @@
+work176 branch
diff --git a/gcc/c-family/ChangeLog.meissner b/gcc/c-family/ChangeLog.meissner
new file mode 100644
index 000..6bd73d667fe
--- /dev/null
+++ b/gcc/c-family/ChangeLog.meissner
@@ -0,0 +1,6 @@
+ Branch work176, baseline 
+
+2024-08-16   Michael Meissner  
+
+   Clone branch
+
diff --git a/gcc/c/ChangeLog.meissner b/gcc/c/ChangeLog.meissner
new file mode 100644
index 000..6bd73d667fe
--- /dev/null
+++ b/gcc/c/ChangeLog.meissner
@@ -0,0 +1,6 @@
+ Branch work176, baseline 
+
+2024-08-16   Michael Meissner  
+
+   Clone branch
+
diff --git a/gcc/cp/ChangeLog.meissner b/gcc/cp/ChangeLog.meissner
new file mode 100644
index 000..6bd73d667fe
--- /dev/null
+++ b/gcc/cp/ChangeLog.meissner
@@ -0,0 +1,6 @@
+ Branch work176, baseline 
+
+2024-08-16   Michael Meissner  
+
+   Clone branch
+
diff --git a/gcc/fortran/ChangeLog.meissner b/gcc/fortran/ChangeLog.meissner
new file mode 100644
index 000..6bd73d667fe
--- /dev/null
+++ b/gcc/fortran/ChangeLog.meissner
@@ -0,0 +1,6 @@
+ Branch work176, baseline 
+
+2024-08-16   Michael Meissner  
+
+   Clone branch
+
diff --git a/gcc/testsuite/ChangeLog.meissner b/gcc/testsuite/ChangeLog.meissner
new file mode 100644
index 000..6bd73d667fe
--- /dev/null
+++ b/gcc/testsuite/ChangeLog.meissner
@@ -0,0 +1,6 @@
+ Branch work176, baseline 
+
+2024-08-16   Michael Meissner  
+
+   Clone branch
+
diff --git a/libgcc/ChangeLog.meissner b/libgcc/ChangeLog.meissner
new file mode 100644
index 000..6bd73d667fe
--- /dev/null
+++ b/libgcc/ChangeLog.meissner
@@ -0,0 +1,6 @@
+ Branch work176, baseline 
+
+2024-08-16   Michael Meissner  
+
+   Clone branch
+
diff --git a/libstdc++-v3/ChangeLog.meissner b/libstdc++-v3/ChangeLog.meissner
new file mode 100644
index 000..6bd73d667fe
--- /dev/null
+++ b/libstdc++-v3/ChangeLog.meissner
@@ -0,0 +1,6 @@
+ Branch work176, baseline 
+
+2024-08-16   Michael Meissner  
+
+   Clone branch
+


[gcc(refs/users/meissner/heads/work176)] Add rs6000 architecture masks.

2024-08-19 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:7a80d6af56554fe3388d59f71341a6c96b5d5d01

commit 7a80d6af56554fe3388d59f71341a6c96b5d5d01
Author: Michael Meissner 
Date:   Fri Aug 16 22:34:18 2024 -0400

Add rs6000 architecture masks.

This patch begins the journey to move architecture bits that are not user 
ISA
options from rs6000_isa_flags to a new targt variable rs6000_arch_flags.  
The
intention is to remove switches that are currently isa options, but the user
should not be using this particular option. For example, we want users to 
use
-mcpu=power10 and not just -mpower10.

This patch also changes the target_clones support to use an architecture 
mask
instead of isa bits.

This patch also switches the handling of .machine to use architecture masks 
if
they exist (power4 through power11).  All of the other PowerPCs will 
continue to
use the existing code for setting the .machine option.

I have built both big endian and little endian bootstrap compilers and there
were no regressions.

In addition, I constructed a test case that used every archiecture define 
(like
_ARCH_PWR4, etc.) and I also looked at the .machine directive generated.  I 
ran
this test for all supported combinations of -mcpu, big/little endian, and 
32/64
bit support.  Every single instance generated exactly the same code with the
patches installed compared to the compiler before installing the patches.

Can I install this patch on the GCC 15 trunk?

2024-08-16  Michael Meissner  

gcc/

* config/rs6000/rs6000-arch.def: New file.
* config/rs6000/rs6000.cc (struct clone_map): Switch to using
architecture masks instead of ISA masks.
(rs6000_clone_map): Likewise.
(rs6000_print_isa_options): Add an architecture flags argument, 
change
all callers.
(get_arch_flag): New function.
(rs6000_debug_reg_global): Update rs6000_print_isa_options calls.
(rs6000_option_override_internal): Likewise.
(rs6000_machine_from_flags): Switch to using architecture masks 
instead
of ISA masks.
(struct rs6000_arch_mask): New structure.
(rs6000_arch_masks): New table of architecutre masks and names.
(rs6000_function_specific_save): Save architecture flags.
(rs6000_function_specific_restore): Restore architecture flags.
(rs6000_function_specific_print): Update rs6000_print_isa_options 
calls.
(rs6000_print_options_internal): Add architecture flags options.
(rs6000_clone_priority): Switch to using architecture masks instead 
of
ISA masks.
(rs6000_can_inline_p): Don't allow inling if the callee requires a 
newer
architecture than the caller.
* config/rs6000/rs6000.h: Use rs6000-arch.def to create the 
architecture
masks.
* config/rs6000/rs6000.opt (rs6000_arch_flags): New target variable.
(x_rs6000_arch_flags): New save/restore field for rs6000_arch_flags.

Diff:
---
 gcc/config/rs6000/rs6000-arch.def |  48 +
 gcc/config/rs6000/rs6000.cc   | 215 +++---
 gcc/config/rs6000/rs6000.h|  24 +
 gcc/config/rs6000/rs6000.opt  |   8 ++
 4 files changed, 259 insertions(+), 36 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-arch.def 
b/gcc/config/rs6000/rs6000-arch.def
new file mode 100644
index 000..e5b6e958133
--- /dev/null
+++ b/gcc/config/rs6000/rs6000-arch.def
@@ -0,0 +1,48 @@
+/* IBM RS/6000 CPU architecture features by processor type.
+   Copyright (C) 1991-2024 Free Software Foundation, Inc.
+   Contributed by Richard Kenner (ken...@vlsi1.ultra.nyu.edu)
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   .  */
+
+/* This file defines architecture features that are based on the -mcpu=
+   option, and not on user options that can be turned on or off.  The intention
+   is for newer processors (power7 and above) to not add new ISA bits for the
+   particular processor, but add these bits.  Otherwise we have to add a bunch
+   of hidden options, just so we have the proper ISA bits.
+
+   For example, in the past we added -mpower8-internal, so that on power8,
+   power9, and power10 would inherit the option,

[gcc(refs/users/meissner/heads/work176)] Use architecture flags for defining _ARCH_PWR macros.

2024-08-19 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:582584801c3823d194edcece5048b9d4e0cbccf5

commit 582584801c3823d194edcece5048b9d4e0cbccf5
Author: Michael Meissner 
Date:   Fri Aug 16 22:35:24 2024 -0400

Use architecture flags for defining _ARCH_PWR macros.

For the newer architectures, this patch changes GCC to define the 
_ARCH_PWR
macros using the new architecture flags instead of relying on isa options 
like
-mpower10.

The -mpower8-internal, -mpower10, and -mpower11 options were removed.  The
-mpower11 option was removed completely, since it was just added in GCC 15. 
 The
other two options were marked as WarnRemoved, and the various ISA bits were
removed.

TARGET_POWER8 and TARGET_POWER10 were re-defined to use the architeture bits
instead of the ISA bits.

There are other internal isa bits that aren't removed with this patch 
because
the built-in function support uses those bits.

I have built both big endian and little endian bootstrap compilers and there
were no regressions.

In addition, I constructed a test case that used every archiecture define 
(like
_ARCH_PWR4, etc.) and I also looked at the .machine directive generated.  I 
ran
this test for all supported combinations of -mcpu, big/little endian, and 
32/64
bit support.  Every single instance generated exactly the same code with the
patches installed compared to the compiler before installing the patches.

Can I install this patch on the GCC 15 trunk?

2024-08-16  Michael Meissner  

gcc/

* config/rs6000/rs6000-c.cc (rs6000_target_modify_macros) Add 
support to
use architecture flags instead of ISA flags for setting most of the
_ARCH_PWR* macros.
(rs6000_cpu_cpp_builtins): Update rs6000_target_modify_macros call.
* config/rs6000/rs6000-cpus.def (ISA_2_7_MASKS_SERVER): Remove
OPTION_MASK_POWER8.
(ISA_3_1_MASKS_SERVER): Remove OPTION_MASK_POWER10.
(POWER11_MASKS_SERVER): Remove OPTION_MASK_POWER11.
(POWERPC_MASKS): Remove OPTION_MASK_POWER8, OPTION_MASK_POWER10, and
OPTION_MASK_POWER11.
* config/rs6000/rs6000-protos.h (rs6000_target_modify_macros): 
Update
declaration.
(rs6000_target_modify_macros_ptr): Likewise.
* config/rs6000/rs6000.cc (rs6000_target_modify_macros_ptr): 
Likewise.
(rs6000_option_override_internal): Use architecture flags instead 
of ISA
flags.
(rs6000_opt_masks): Remove -mpower10 and -mpower11, which are no 
longer
in the ISA flags.
(rs6000_pragma_target_parse): Use architecture flags as well as ISA
flags.
* config/rs6000/rs6000.h (TARGET_POWER4): New macro.
(TARGET_POWER5): Likewise.
(TARGET_POWER5X): Likewise.
(TARGET_POWER6): Likewise.
(TARGET_POWER7): Likewise.
(TARGET_POWER8): Likewise.
(TARGET_POWER9): Likewise.
(TARGET_POWER10): Likewise.
(TARGET_POWER11): Likewise.
* config/rs6000/rs6000.opt (-mpower8-internal): Remove ISA flag 
bits.
(-mpower10): Likewise.
(-mpower11): Likewise.

Diff:
---
 gcc/config/rs6000/rs6000-c.cc | 27 +++
 gcc/config/rs6000/rs6000-cpus.def |  8 +---
 gcc/config/rs6000/rs6000-protos.h |  5 +++--
 gcc/config/rs6000/rs6000.cc   | 19 +++
 gcc/config/rs6000/rs6000.h| 20 
 gcc/config/rs6000/rs6000.opt  | 11 ++-
 6 files changed, 52 insertions(+), 38 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-c.cc b/gcc/config/rs6000/rs6000-c.cc
index 04882c396bf..c8f33289fa3 100644
--- a/gcc/config/rs6000/rs6000-c.cc
+++ b/gcc/config/rs6000/rs6000-c.cc
@@ -338,7 +338,8 @@ rs6000_define_or_undefine_macro (bool define_p, const char 
*name)
#pragma GCC target, we need to adjust the macros dynamically.  */
 
 void
-rs6000_target_modify_macros (bool define_p, HOST_WIDE_INT flags)
+rs6000_target_modify_macros (bool define_p, HOST_WIDE_INT flags,
+HOST_WIDE_INT arch_flags)
 {
   if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
 fprintf (stderr,
@@ -411,7 +412,7 @@ rs6000_target_modify_macros (bool define_p, HOST_WIDE_INT 
flags)
summary of the flags associated with particular cpu
definitions.  */
 
-  /* rs6000_isa_flags based options.  */
+  /* rs6000_isa_flags and rs6000_arch_flags based options.  */
   rs6000_define_or_undefine_macro (define_p, "_ARCH_PPC");
   if ((flags & OPTION_MASK_PPC_GPOPT) != 0)
 rs6000_define_or_undefine_macro (define_p, "_ARCH_PPCSQ");
@@ -419,23 +420,25 @@ rs6000_target_modify_macros (bool define_p, HOST_WIDE_INT 
flags)
 rs6000_define_or_undefine_macro (define_p, "_ARCH_PPCGR");
   if ((flags & OPTION_MASK_POWERPC64) != 0)
 rs6000_define_or_undefine_

[gcc(refs/users/meissner/heads/work176)] Do not allow -mvsx to boost processor to power7.

2024-08-19 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:eeab60022333b52fefc8b17c56198d934170a8c5

commit eeab60022333b52fefc8b17c56198d934170a8c5
Author: Michael Meissner 
Date:   Fri Aug 16 22:36:24 2024 -0400

Do not allow -mvsx to boost processor to power7.

This patch restructures the code so that -mvsx for example will not silently
convert the processor to power7.  The user must now use -mcpu=power7 or 
higher.
This means if the user does -mvsx and the default processor does not have 
VSX
support, it will be an error.

I have built both big endian and little endian bootstrap compilers and there
were no regressions.

In addition, I constructed a test case that used every archiecture define 
(like
_ARCH_PWR4, etc.) and I also looked at the .machine directive generated.  I 
ran
this test for all supported combinations of -mcpu, big/little endian, and 
32/64
bit support.  Every single instance generated exactly the same code with the
patches installed compared to the compiler before installing the patches.

Can I install this patch on the GCC 15 trunk?

2024-08-16  Michael Meissner  

gcc/

* config/rs6000/rs6000.cc (report_architecture_mismatch): New 
function.
Report an error if the user used an option such as -mvsx when the
default processor would not allow the option.
(rs6000_option_override_internal): Move some ISA checking code into
report_architecture_mismatch.

Diff:
---
 gcc/config/rs6000/rs6000.cc | 129 +++-
 1 file changed, 79 insertions(+), 50 deletions(-)

diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 2d775184f98..cdfd26da8b5 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -1172,6 +1172,7 @@ const int INSN_NOT_AVAILABLE = -1;
 static void rs6000_print_isa_options (FILE *, int, const char *,
  HOST_WIDE_INT, HOST_WIDE_INT);
 static HOST_WIDE_INT rs6000_disable_incompatible_switches (void);
+static void report_architecture_mismatch (void);
 
 static enum rs6000_reg_type register_to_reg_type (rtx, bool *);
 static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
@@ -3694,7 +3695,6 @@ rs6000_option_override_internal (bool global_init_p)
   bool ret = true;
 
   HOST_WIDE_INT set_masks;
-  HOST_WIDE_INT ignore_masks;
   int cpu_index = -1;
   int tune_index;
   struct cl_target_option *main_target_opt
@@ -3963,59 +3963,13 @@ rs6000_option_override_internal (bool global_init_p)
 dwarf_offset_size = POINTER_SIZE_UNITS;
 #endif
 
-  /* Handle explicit -mno-{altivec,vsx} and turn off all of
- the options that depend on those flags.  */
-  ignore_masks = rs6000_disable_incompatible_switches ();
-
-  /* For the newer switches (vsx, dfp, etc.) set some of the older options,
- unless the user explicitly used the -mno- to disable the code.  */
-  if (TARGET_P9_VECTOR || TARGET_MODULO || TARGET_P9_MISC)
-rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
-  else if (TARGET_P9_MINMAX)
-{
-  if (cpu_index >= 0)
-   {
- if (cpu_index == PROCESSOR_POWER9)
-   {
- /* legacy behavior: allow -mcpu=power9 with certain
-capabilities explicitly disabled.  */
- rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
-   }
- else
-   error ("power9 target option is incompatible with %<%s=%> "
-  "for  less than power9", "-mcpu");
-   }
-  else if ((ISA_3_0_MASKS_SERVER & rs6000_isa_flags_explicit)
-  != (ISA_3_0_MASKS_SERVER & rs6000_isa_flags
-  & rs6000_isa_flags_explicit))
-   /* Enforce that none of the ISA_3_0_MASKS_SERVER flags
-  were explicitly cleared.  */
-   error ("%qs incompatible with explicitly disabled options",
-  "-mpower9-minmax");
-  else
-   rs6000_isa_flags |= ISA_3_0_MASKS_SERVER;
-}
-  else if (TARGET_P8_VECTOR || TARGET_POWER8 || TARGET_CRYPTO)
-rs6000_isa_flags |= (ISA_2_7_MASKS_SERVER & ~ignore_masks);
-  else if (TARGET_VSX)
-rs6000_isa_flags |= (ISA_2_6_MASKS_SERVER & ~ignore_masks);
-  else if (TARGET_POPCNTD)
-rs6000_isa_flags |= (ISA_2_6_MASKS_EMBEDDED & ~ignore_masks);
-  else if (TARGET_DFP)
-rs6000_isa_flags |= (ISA_2_5_MASKS_SERVER & ~ignore_masks);
-  else if (TARGET_CMPB)
-rs6000_isa_flags |= (ISA_2_5_MASKS_EMBEDDED & ~ignore_masks);
-  else if (TARGET_FPRND)
-rs6000_isa_flags |= (ISA_2_4_MASKS & ~ignore_masks);
-  else if (TARGET_POPCNTB)
-rs6000_isa_flags |= (ISA_2_2_MASKS & ~ignore_masks);
-  else if (TARGET_ALTIVEC)
-rs6000_isa_flags |= (OPTION_MASK_PPC_GFXOPT & ~ignore_masks);
+  /* Report trying to use things like -mmodulo to imply -mcpu=power9.  */
+  report_architecture_mismatch ();
 
   /* Disable VSX and Altivec silently if the user switched cpus to power7 in a
  target att

[gcc(refs/users/meissner/heads/work176)] Change TARGET_POPCNTB to TARGET_POWER5

2024-08-19 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:d83a1cd5ecc371c87813a2f8a1616d1fca72283d

commit d83a1cd5ecc371c87813a2f8a1616d1fca72283d
Author: Michael Meissner 
Date:   Fri Aug 16 22:37:10 2024 -0400

Change TARGET_POPCNTB to TARGET_POWER5

As part of the architecture flags patches, this patch changes the use of
TARGET_POPCNTB to TARGET_POWER5.  The POPCNTB instruction was added in ISA 
2.02
(power5).

I have built both big endian and little endian bootstrap compilers and there
were no regressions.

In addition, I constructed a test case that used every archiecture define 
(like
_ARCH_PWR4, etc.) and I also looked at the .machine directive generated.  I 
ran
this test for all supported combinations of -mcpu, big/little endian, and 
32/64
bit support.  Every single instance generated exactly the same code with the
patches installed compared to the compiler before installing the patches.

Can I install this patch on the GCC 15 trunk?

2024-08-16  Michael Meissner  

* config/rs6000/rs6000-builtin.cc (rs6000_builtin_is_supported): Use
TARGET_POWER5 instead of TARGET_POPCNTB.
* config/rs6000/rs6000.h (TARGET_EXTRA_BUILTINS): Use TARGET_POWER5
instead of TARGET_POPCNTB.  Eliminate TARGET_CMPB and TARGET_POPCNTD
tests since TARGET_POWER5 will always be true for those tests.
(TARGET_FRE): Use TARGET_POWER5 instead of TARGET_POPCNTB.
(TARGET_FRSQRTES): Likewise.
* config/rs6000/rs6000.md (enabled attribute): Likewise.
(popcount): Use TARGET_POWER5 instead of TARGET_POPCNTB.  Drop
test for TARGET_POPCNTD (i.e power7), since TARGET_POPCNTB will 
always
be set if TARGET_POPCNTD is set.
(popcntb2): Use TARGET_POWER5 instead of TARGET_POPCNTB.
(parity2): Likewise.
(parity2_cmpb): Remove TARGET_POPCNTB test, since it will 
always
be true when TARGET_CMPB (i.e. power6) is set.

Diff:
---
 gcc/config/rs6000/rs6000-builtin.cc |  2 +-
 gcc/config/rs6000/rs6000.h  |  8 +++-
 gcc/config/rs6000/rs6000.md | 10 +-
 3 files changed, 9 insertions(+), 11 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-builtin.cc 
b/gcc/config/rs6000/rs6000-builtin.cc
index 9bdbae1ecf9..98a0545030c 100644
--- a/gcc/config/rs6000/rs6000-builtin.cc
+++ b/gcc/config/rs6000/rs6000-builtin.cc
@@ -155,7 +155,7 @@ rs6000_builtin_is_supported (enum rs6000_gen_builtins 
fncode)
 case ENB_ALWAYS:
   return true;
 case ENB_P5:
-  return TARGET_POPCNTB;
+  return TARGET_POWER5;
 case ENB_P6:
   return TARGET_CMPB;
 case ENB_P6_64:
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index 292b2ca164d..4ad0fca1d60 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -547,9 +547,7 @@ extern int rs6000_vector_align[];
 
 #define TARGET_EXTRA_BUILTINS  (TARGET_POWERPC64\
 || TARGET_PPC_GPOPT /* 970/power4 */\
-|| TARGET_POPCNTB   /* ISA 2.02 */  \
-|| TARGET_CMPB  /* ISA 2.05 */  \
-|| TARGET_POPCNTD   /* ISA 2.06 */  \
+|| TARGET_POWER5/* ISA 2.02 & above */ \
 || TARGET_ALTIVEC   \
 || TARGET_VSX   \
 || TARGET_HARD_FLOAT)
@@ -563,9 +561,9 @@ extern int rs6000_vector_align[];
 #define TARGET_FRES(TARGET_HARD_FLOAT && TARGET_PPC_GFXOPT)
 
 #define TARGET_FRE (TARGET_HARD_FLOAT \
-&& (TARGET_POPCNTB || VECTOR_UNIT_VSX_P (DFmode)))
+&& (TARGET_POWER5 || VECTOR_UNIT_VSX_P (DFmode)))
 
-#define TARGET_FRSQRTES(TARGET_HARD_FLOAT && TARGET_POPCNTB \
+#define TARGET_FRSQRTES(TARGET_HARD_FLOAT && TARGET_POWER5 \
 && TARGET_PPC_GFXOPT)
 
 #define TARGET_FRSQRTE (TARGET_HARD_FLOAT \
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 8eda2f7bb0d..10d13bf812d 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -379,7 +379,7 @@
  (const_int 1)
 
  (and (eq_attr "isa" "p5")
- (match_test "TARGET_POPCNTB"))
+ (match_test "TARGET_POWER5"))
  (const_int 1)
 
  (and (eq_attr "isa" "p6")
@@ -2510,7 +2510,7 @@
 (define_expand "popcount2"
   [(set (match_operand:GPR 0 "gpc_reg_operand")
(popcount:GPR (match_operand:GPR 1 "gpc_reg_operand")))]
-  "TARGET_POPCNTB || TARGET_POPCNTD"
+  "TARGET_POWER5"
 {
   rs6000_emit_popcount (operands[0], operands[1]);
   DONE;
@@ -2520,7 +2520,7 @@
   [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
(unspec:GPR [(match_operand:GPR 1 "gpc_reg_operand" "r")]
UNSPEC_PO

[gcc(refs/users/meissner/heads/work176)] Change TARGET_FPRND to TARGET_POWER5X

2024-08-19 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:9b740bf277b8db7b96f06d1ccf28d3ef2660c79f

commit 9b740bf277b8db7b96f06d1ccf28d3ef2660c79f
Author: Michael Meissner 
Date:   Fri Aug 16 22:38:21 2024 -0400

Change TARGET_FPRND to TARGET_POWER5X

As part of the architecture flags patches, this patch changes the use of
TARGET_FPRND to TARGET_POWER5X.  The FPRND instruction was added in power5+.

I have built both big endian and little endian bootstrap compilers and there
were no regressions.

In addition, I constructed a test case that used every archiecture define 
(like
_ARCH_PWR4, etc.) and I also looked at the .machine directive generated.  I 
ran
this test for all supported combinations of -mcpu, big/little endian, and 
32/64
bit support.  Every single instance generated exactly the same code with the
patches installed compared to the compiler before installing the patches.

Can I install this patch on the GCC 15 trunk?

2024-09-16  Michael Meissner  

* config/rs6000/rs6000.cc (report_architecture_mismatch): Use
TARGET_POWER5X instead of TARGET_FPRND.
* config/rs6000/rs6000.md (fmod3): Use TARGET_POWER5X instead 
of
TARGET_FPRND.
(remainder3): Likewise.
(fctiwuz_): Likewise.
(btrunc2): Likewise.
(ceil2): Likewise.
(floor2): Likewise.
(round): Likewise.

Diff:
---
 gcc/config/rs6000/rs6000.cc |  2 +-
 gcc/config/rs6000/rs6000.md | 14 +++---
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index cdfd26da8b5..1c8fa94f18c 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -25427,7 +25427,7 @@ report_architecture_mismatch (void)
 rs6000_isa_flags |= (ISA_2_5_MASKS_SERVER & ~ignore_masks);
   else if (TARGET_CMPB)
 rs6000_isa_flags |= (ISA_2_5_MASKS_EMBEDDED & ~ignore_masks);
-  else if (TARGET_FPRND)
+  else if (TARGET_POWER5X)
 rs6000_isa_flags |= (ISA_2_4_MASKS & ~ignore_masks);
   else if (TARGET_POPCNTB)
 rs6000_isa_flags |= (ISA_2_2_MASKS & ~ignore_masks);
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 10d13bf812d..7f9fe609a03 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -5171,7 +5171,7 @@
(use (match_operand:SFDF 1 "gpc_reg_operand"))
(use (match_operand:SFDF 2 "gpc_reg_operand"))]
   "TARGET_HARD_FLOAT
-   && TARGET_FPRND
+   && TARGET_POWER5X
&& flag_unsafe_math_optimizations"
 {
   rtx div = gen_reg_rtx (mode);
@@ -5189,7 +5189,7 @@
(use (match_operand:SFDF 1 "gpc_reg_operand"))
(use (match_operand:SFDF 2 "gpc_reg_operand"))]
   "TARGET_HARD_FLOAT
-   && TARGET_FPRND
+   && TARGET_POWER5X
&& flag_unsafe_math_optimizations"
 {
   rtx div = gen_reg_rtx (mode);
@@ -6687,7 +6687,7 @@
 (define_insn "*friz"
   [(set (match_operand:DF 0 "gpc_reg_operand" "=d,wa")
(float:DF (fix:DI (match_operand:DF 1 "gpc_reg_operand" "d,wa"]
-  "TARGET_HARD_FLOAT && TARGET_FPRND
+  "TARGET_HARD_FLOAT && TARGET_POWER5X
&& flag_unsafe_math_optimizations && !flag_trapping_math && TARGET_FRIZ"
   "@
friz %0,%1
@@ -6815,7 +6815,7 @@
   [(set (match_operand:SFDF 0 "gpc_reg_operand" "=d,wa")
(unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "d,wa")]
 UNSPEC_FRIZ))]
-  "TARGET_HARD_FLOAT && TARGET_FPRND"
+  "TARGET_HARD_FLOAT && TARGET_POWER5X"
   "@
friz %0,%1
xsrdpiz %x0,%x1"
@@ -6825,7 +6825,7 @@
   [(set (match_operand:SFDF 0 "gpc_reg_operand" "=d,wa")
(unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "d,wa")]
 UNSPEC_FRIP))]
-  "TARGET_HARD_FLOAT && TARGET_FPRND"
+  "TARGET_HARD_FLOAT && TARGET_POWER5X"
   "@
frip %0,%1
xsrdpip %x0,%x1"
@@ -6835,7 +6835,7 @@
   [(set (match_operand:SFDF 0 "gpc_reg_operand" "=d,wa")
(unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "d,wa")]
 UNSPEC_FRIM))]
-  "TARGET_HARD_FLOAT && TARGET_FPRND"
+  "TARGET_HARD_FLOAT && TARGET_POWER5X"
   "@
frim %0,%1
xsrdpim %x0,%x1"
@@ -6846,7 +6846,7 @@
   [(set (match_operand:SFDF 0 "gpc_reg_operand" "=")
(unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "")]
 UNSPEC_FRIN))]
-  "TARGET_HARD_FLOAT && TARGET_FPRND"
+  "TARGET_HARD_FLOAT && TARGET_POWER5X"
   "frin %0,%1"
   [(set_attr "type" "fp")])


[gcc(refs/users/meissner/heads/work176)] Change TARGET_CMPB to TARGET_POWER6

2024-08-19 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:daa12c0fb7a81a446d45b3e25173fe62cce96c5a

commit daa12c0fb7a81a446d45b3e25173fe62cce96c5a
Author: Michael Meissner 
Date:   Fri Aug 16 22:39:19 2024 -0400

Change TARGET_CMPB to TARGET_POWER6

As part of the architecture flags patches, this patch changes the use of
TARGET_FPRND to TARGET_POWER6.  The CMPB instruction was added in power6 
(ISA
2.05).

I have built both big endian and little endian bootstrap compilers and there
were no regressions.

In addition, I constructed a test case that used every archiecture define 
(like
_ARCH_PWR4, etc.) and I also looked at the .machine directive generated.  I 
ran
this test for all supported combinations of -mcpu, big/little endian, and 
32/64
bit support.  Every single instance generated exactly the same code with the
patches installed compared to the compiler before installing the patches.

Can I install this patch on the GCC 15 trunk?

2024-08-16  Michael Meissner  

* config/rs6000/rs6000-builtin.cc (rs6000_builtin_is_supported): Use
TARGET_POWER6 instead of TARGET_CMPB.
* config/rs6000/rs6000.h (TARGET_FCFID): Merge tests for popcntb, 
cmpb,
and popcntd into a single test for TARGET_POWER5.
(TARGET_LFIWAX): Use TARGET_POWER6 instead of TARGET_CMPB.
* config/rs6000/rs6000.md (enabled attribute): Likewise.
(parity2_cmp): Likewise.
(cmpb): Likewise.
(copysign3): Likewise.
(copysign3_fcpsgn): Likewise.
(cmpstrnsi): Likewise.
(cmpstrsi): Likewise.

Diff:
---
 gcc/config/rs6000/rs6000-builtin.cc |  4 ++--
 gcc/config/rs6000/rs6000.h  |  6 ++
 gcc/config/rs6000/rs6000.md | 16 
 3 files changed, 12 insertions(+), 14 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-builtin.cc 
b/gcc/config/rs6000/rs6000-builtin.cc
index 98a0545030c..76421bd1de0 100644
--- a/gcc/config/rs6000/rs6000-builtin.cc
+++ b/gcc/config/rs6000/rs6000-builtin.cc
@@ -157,9 +157,9 @@ rs6000_builtin_is_supported (enum rs6000_gen_builtins 
fncode)
 case ENB_P5:
   return TARGET_POWER5;
 case ENB_P6:
-  return TARGET_CMPB;
+  return TARGET_POWER6;
 case ENB_P6_64:
-  return TARGET_CMPB && TARGET_POWERPC64;
+  return TARGET_POWER6 && TARGET_POWERPC64;
 case ENB_P7:
   return TARGET_POPCNTD;
 case ENB_P7_64:
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index 4ad0fca1d60..924ca682172 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -448,13 +448,11 @@ extern int rs6000_vector_align[];
Enable 32-bit fcfid's on any of the switches for newer ISA machines.  */
 #define TARGET_FCFID   (TARGET_POWERPC64   \
 || TARGET_PPC_GPOPT/* 970/power4 */\
-|| TARGET_POPCNTB  /* ISA 2.02 */  \
-|| TARGET_CMPB /* ISA 2.05 */  \
-|| TARGET_POPCNTD) /* ISA 2.06 */
+|| TARGET_POWER5)  /* ISA 2.02 and above */ \
 
 #define TARGET_FCTIDZ  TARGET_FCFID
 #define TARGET_STFIWX  TARGET_PPC_GFXOPT
-#define TARGET_LFIWAX  TARGET_CMPB
+#define TARGET_LFIWAX  TARGET_POWER6
 #define TARGET_LFIWZX  TARGET_POPCNTD
 #define TARGET_FCFIDS  TARGET_POPCNTD
 #define TARGET_FCFIDU  TARGET_POPCNTD
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 7f9fe609a03..0c303087e94 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -383,7 +383,7 @@
  (const_int 1)
 
  (and (eq_attr "isa" "p6")
- (match_test "TARGET_CMPB"))
+ (match_test "TARGET_POWER6"))
  (const_int 1)
 
  (and (eq_attr "isa" "p7")
@@ -2544,7 +2544,7 @@
 (define_insn "parity2_cmpb"
   [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
(unspec:GPR [(match_operand:GPR 1 "gpc_reg_operand" "r")] 
UNSPEC_PARITY))]
-  "TARGET_CMPB"
+  "TARGET_POWER6"
   "prty %0,%1"
   [(set_attr "type" "popcnt")])
 
@@ -2597,7 +2597,7 @@
   [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
(unspec:GPR [(match_operand:GPR 1 "gpc_reg_operand" "r")
 (match_operand:GPR 2 "gpc_reg_operand" "r")] UNSPEC_CMPB))]
-  "TARGET_CMPB"
+  "TARGET_POWER6"
   "cmpb %0,%1,%2"
   [(set_attr "type" "cmp")])
 
@@ -5401,7 +5401,7 @@
&& ((TARGET_PPC_GFXOPT
 && !HONOR_NANS (mode)
 && !HONOR_SIGNED_ZEROS (mode))
-   || TARGET_CMPB
+   || TARGET_POWER6
|| VECTOR_UNIT_VSX_P (mode))"
 {
   /* Middle-end canonicalizes -fabs (x) to copysign (x, -1),
@@ -5422,7 +5422,7 @@
   if (!gpc_reg_operand (operands[2], mode))
 operands[2] = copy_to_mode_reg (mode, operands[2]);
 
-  if (TARGET_CMPB || VECTOR_UNIT_VSX_P (mode))
+  if (TARGET_POWER6 || VECTOR_UNIT_VSX_P (mode))
 {
   emit_insn (gen_copysign3_fcpsgn (ope

[gcc(refs/users/meissner/heads/work176)] Change TARGET_POPCNTD to TARGET_POWER7

2024-08-19 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:2f2353e1a515954f9fe2e0e1e6afd825239f6be7

commit 2f2353e1a515954f9fe2e0e1e6afd825239f6be7
Author: Michael Meissner 
Date:   Fri Aug 16 22:40:12 2024 -0400

Change TARGET_POPCNTD to TARGET_POWER7

As part of the architecture flags patches, this patch changes the use of
TARGET_POPCNTD to TARGET_POWER7.  The POPCNTD instruction was added in 
power7
(ISA 2.06).

I have built both big endian and little endian bootstrap compilers and there
were no regressions.

In addition, I constructed a test case that used every archiecture define 
(like
_ARCH_PWR4, etc.) and I also looked at the .machine directive generated.  I 
ran
this test for all supported combinations of -mcpu, big/little endian, and 
32/64
bit support.  Every single instance generated exactly the same code with the
patches installed compared to the compiler before installing the patches.

Can I install this patch on the GCC 15 trunk?

2024-08-16  Michael Meissner  

* config/rs6000/dfp.md (floatdidd2): Change TARGET_POPCNTD to
TARGET_POWER7.
* config/rs6000/rs6000-builtin.cc (rs6000_builtin_is_supported):
Likewise.
* config/rs6000/rs6000-string.cc (expand_block_compare_gpr): 
Likewise.
* config/rs6000/rs6000.cc (rs6000_hard_regno_mode_ok_uncached):
Likewise.
(rs6000_rtx_costs): Likewise.
(rs6000_emit_popcount): Likewise.
* config/rs6000/rs6000.h (TARGET_LDBRX): Likewise.
(TARGET_LFIWZX): Likewise.
(TARGET_FCFIDS): Likewise.
(TARGET_FCFIDU): Likewise.
(TARGET_FCFIDUS): Likewise.
(TARGET_FCTIDUZ): Likewise.
(TARGET_FCTIWUZ): Likewise.
(CTZ_DEFINED_VALUE_AT_ZERO): Likewise.
* config/rs6000/rs6000.md (enabled attribute): Likewise.
(ctz2): Likewise.
(popcntd2): Likewise.
(lrintsi2): Likewise.
(lrintsi): Likewise.
(lrintsi_di): Likewise.
(cmpmemsi): Likewise.
(bpermd_"): Likewise.
(addg6s): Likewise.
(cdtbcd): Likewise.
(cbcdtd): Likewise.
(div_): Likewise.

Diff:
---
 gcc/config/rs6000/dfp.md|  2 +-
 gcc/config/rs6000/rs6000-builtin.cc |  4 ++--
 gcc/config/rs6000/rs6000-string.cc  |  4 ++--
 gcc/config/rs6000/rs6000.cc |  6 +++---
 gcc/config/rs6000/rs6000.h  | 16 
 gcc/config/rs6000/rs6000.md | 24 
 6 files changed, 28 insertions(+), 28 deletions(-)

diff --git a/gcc/config/rs6000/dfp.md b/gcc/config/rs6000/dfp.md
index fa9d7dd45dd..b8189390d41 100644
--- a/gcc/config/rs6000/dfp.md
+++ b/gcc/config/rs6000/dfp.md
@@ -214,7 +214,7 @@
 (define_insn "floatdidd2"
   [(set (match_operand:DD 0 "gpc_reg_operand" "=d")
(float:DD (match_operand:DI 1 "gpc_reg_operand" "d")))]
-  "TARGET_DFP && TARGET_POPCNTD"
+  "TARGET_DFP && TARGET_POWER7"
   "dcffix %0,%1"
   [(set_attr "type" "dfp")])
 
diff --git a/gcc/config/rs6000/rs6000-builtin.cc 
b/gcc/config/rs6000/rs6000-builtin.cc
index 76421bd1de0..dae43b672ea 100644
--- a/gcc/config/rs6000/rs6000-builtin.cc
+++ b/gcc/config/rs6000/rs6000-builtin.cc
@@ -161,9 +161,9 @@ rs6000_builtin_is_supported (enum rs6000_gen_builtins 
fncode)
 case ENB_P6_64:
   return TARGET_POWER6 && TARGET_POWERPC64;
 case ENB_P7:
-  return TARGET_POPCNTD;
+  return TARGET_POWER7;
 case ENB_P7_64:
-  return TARGET_POPCNTD && TARGET_POWERPC64;
+  return TARGET_POWER7 && TARGET_POWERPC64;
 case ENB_P8:
   return TARGET_POWER8;
 case ENB_P8V:
diff --git a/gcc/config/rs6000/rs6000-string.cc 
b/gcc/config/rs6000/rs6000-string.cc
index 55b4133b1a3..3674c4bd984 100644
--- a/gcc/config/rs6000/rs6000-string.cc
+++ b/gcc/config/rs6000/rs6000-string.cc
@@ -1948,8 +1948,8 @@ expand_block_compare_gpr(unsigned HOST_WIDE_INT bytes, 
unsigned int base_align,
 bool
 expand_block_compare (rtx operands[])
 {
-  /* TARGET_POPCNTD is already guarded at expand cmpmemsi.  */
-  gcc_assert (TARGET_POPCNTD);
+  /* TARGET_POWER7 is already guarded at expand cmpmemsi.  */
+  gcc_assert (TARGET_POWER7);
 
   /* For P8, this case is complicated to handle because the subtract
  with carry instructions do not generate the 64-bit carry and so
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 1c8fa94f18c..5dad64eecb3 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -1998,7 +1998,7 @@ rs6000_hard_regno_mode_ok_uncached (int regno, 
machine_mode mode)
  if(GET_MODE_SIZE (mode) == UNITS_PER_FP_WORD)
return 1;
 
- if (TARGET_POPCNTD && mode == SImode)
+ if (TARGET_POWER7 && mode == SImode)
return 1;
 
  if (TARGET_P9_VECTOR && (mode == QImode || mode == HImode))
@@ -22472,7 +22472,7 @@ rs6000_rtx_costs (

[gcc(refs/users/meissner/heads/work176)] Change TARGET_MODULO to TARGET_POWER9

2024-08-19 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:255d09dcd5282a6e931be6fa4d245689e78811cb

commit 255d09dcd5282a6e931be6fa4d245689e78811cb
Author: Michael Meissner 
Date:   Fri Aug 16 22:41:01 2024 -0400

Change TARGET_MODULO to TARGET_POWER9

As part of the architecture flags patches, this patch changes the use of
TARGET_MODULO to TARGET_POWER9.  The modulo instructions were added in 
power9 (ISA
3.0).  Note, I did not change the uses of TARGET_MODULO where it was 
explicitly
generating different code if the machine had a modulo instruction.

I have built both big endian and little endian bootstrap compilers and there
were no regressions.

In addition, I constructed a test case that used every archiecture define 
(like
_ARCH_PWR4, etc.) and I also looked at the .machine directive generated.  I 
ran
this test for all supported combinations of -mcpu, big/little endian, and 
32/64
bit support.  Every single instance generated exactly the same code with the
patches installed compared to the compiler before installing the patches.

Can I install this patch on the GCC 15 trunk?

2024-08-16  Michael Meissner  

* config/rs6000/rs6000-builtin.cc (rs6000_builtin_is_supported): Use
TARGET_POWER9 instead of TARGET_MODULO.
* config/rs6000/rs6000.h (TARGET_CTZ): Likewise.
(TARGET_EXTSWSLI): Likewise.
(TARGET_MADDLD): Likewise.
* config/rs6000/rs6000.md (enabled attribute): Likewise.

Diff:
---
 gcc/config/rs6000/rs6000-builtin.cc | 4 ++--
 gcc/config/rs6000/rs6000.h  | 6 +++---
 gcc/config/rs6000/rs6000.md | 2 +-
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-builtin.cc 
b/gcc/config/rs6000/rs6000-builtin.cc
index dae43b672ea..b6093b3cb64 100644
--- a/gcc/config/rs6000/rs6000-builtin.cc
+++ b/gcc/config/rs6000/rs6000-builtin.cc
@@ -169,9 +169,9 @@ rs6000_builtin_is_supported (enum rs6000_gen_builtins 
fncode)
 case ENB_P8V:
   return TARGET_P8_VECTOR;
 case ENB_P9:
-  return TARGET_MODULO;
+  return TARGET_POWER9;
 case ENB_P9_64:
-  return TARGET_MODULO && TARGET_POWERPC64;
+  return TARGET_POWER9 && TARGET_POWERPC64;
 case ENB_P9V:
   return TARGET_P9_VECTOR;
 case ENB_P10:
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index b5b9c01683b..14dc66676c1 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -461,9 +461,9 @@ extern int rs6000_vector_align[];
 #define TARGET_FCTIWUZ TARGET_POWER7
 /* Only powerpc64 and powerpc476 support fctid.  */
 #define TARGET_FCTID   (TARGET_POWERPC64 || rs6000_cpu == PROCESSOR_PPC476)
-#define TARGET_CTZ TARGET_MODULO
-#define TARGET_EXTSWSLI(TARGET_MODULO && TARGET_POWERPC64)
-#define TARGET_MADDLD  TARGET_MODULO
+#define TARGET_CTZ TARGET_POWER9
+#define TARGET_EXTSWSLI(TARGET_POWER9 && TARGET_POWERPC64)
+#define TARGET_MADDLD  TARGET_POWER9
 
 /* TARGET_DIRECT_MOVE is redundant to TARGET_P8_VECTOR, so alias it to that.  
*/
 #define TARGET_DIRECT_MOVE TARGET_P8_VECTOR
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index bff898a4eff..fc0d454e9a4 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -403,7 +403,7 @@
  (const_int 1)
 
  (and (eq_attr "isa" "p9")
- (match_test "TARGET_MODULO"))
+ (match_test "TARGET_POWER9"))
  (const_int 1)
 
  (and (eq_attr "isa" "p9v")


[gcc(refs/users/meissner/heads/work176)] Update tests to work with architecture flags changes.

2024-08-19 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:9e3ab515a72678f56a9b34aa80ce413a99c7bb84

commit 9e3ab515a72678f56a9b34aa80ce413a99c7bb84
Author: Michael Meissner 
Date:   Sat Aug 17 01:27:41 2024 -0400

Update tests to work with architecture flags changes.

Two tests used -mvsx to raise the processor level to at least power7.  These
tests were rewritten to add cpu=power7 support.

I have built both big endian and little endian bootstrap compilers and there
were no regressions.

In addition, I constructed a test case that used every archiecture define 
(like
_ARCH_PWR4, etc.) and I also looked at the .machine directive generated.  I 
ran
this test for all supported combinations of -mcpu, big/little endian, and 
32/64
bit support.  Every single instance generated exactly the same code with the
patches installed compared to the compiler before installing the patches.

Can I install this patch on the GCC 15 trunk?

2024-08-17  Michael Meissner  

gcc/testsuite/

* gcc.target/powerpc/ppc-target-4.c: Rewrite the test to add 
cpu=power7
when we need to add VSX support.  Add test for adding cpu=power7 
no-vsx
to generate only Altivec instructions.
* gcc.target/powerpc/pr115688.c: Add cpu=power7 when requesting VSX
instructions.

Diff:
---
 gcc/testsuite/gcc.target/powerpc/ppc-target-4.c | 38 +++--
 gcc/testsuite/gcc.target/powerpc/pr115688.c |  3 +-
 2 files changed, 31 insertions(+), 10 deletions(-)

diff --git a/gcc/testsuite/gcc.target/powerpc/ppc-target-4.c 
b/gcc/testsuite/gcc.target/powerpc/ppc-target-4.c
index feef76db461..5e2ecf34f24 100644
--- a/gcc/testsuite/gcc.target/powerpc/ppc-target-4.c
+++ b/gcc/testsuite/gcc.target/powerpc/ppc-target-4.c
@@ -2,7 +2,7 @@
 /* { dg-skip-if "" { powerpc*-*-darwin* } } */
 /* { dg-require-effective-target powerpc_fprs } */
 /* { dg-options "-O2 -ffast-math -mdejagnu-cpu=power5 -mno-altivec 
-mabi=altivec -fno-unroll-loops" } */
-/* { dg-final { scan-assembler-times "vaddfp" 1 } } */
+/* { dg-final { scan-assembler-times "vaddfp" 2 } } */
 /* { dg-final { scan-assembler-times "xvaddsp" 1 } } */
 /* { dg-final { scan-assembler-times "fadds" 1 } } */
 
@@ -18,10 +18,6 @@
 #error "__VSX__ should not be defined."
 #endif
 
-#pragma GCC target("altivec,vsx")
-#include 
-#pragma GCC reset_options
-
 #pragma GCC push_options
 #pragma GCC target("altivec,no-vsx")
 
@@ -33,6 +29,7 @@
 #error "__VSX__ should not be defined."
 #endif
 
+/* Altivec build, generate vaddfp.  */
 void
 av_add (vector float *a, vector float *b, vector float *c)
 {
@@ -40,10 +37,11 @@ av_add (vector float *a, vector float *b, vector float *c)
   unsigned long n = SIZE / 4;
 
   for (i = 0; i < n; i++)
-a[i] = vec_add (b[i], c[i]);
+a[i] = b[i] + c[i];
 }
 
-#pragma GCC target("vsx")
+/* cpu=power7 must be used to enable VSX.  */
+#pragma GCC target("cpu=power7,vsx")
 
 #ifndef __ALTIVEC__
 #error "__ALTIVEC__ should be defined."
@@ -53,6 +51,7 @@ av_add (vector float *a, vector float *b, vector float *c)
 #error "__VSX__ should be defined."
 #endif
 
+/* VSX build on power7, generate xsaddsp.  */
 void
 vsx_add (vector float *a, vector float *b, vector float *c)
 {
@@ -60,11 +59,31 @@ vsx_add (vector float *a, vector float *b, vector float *c)
   unsigned long n = SIZE / 4;
 
   for (i = 0; i < n; i++)
-a[i] = vec_add (b[i], c[i]);
+a[i] = b[i] + c[i];
+}
+
+#pragma GCC target("cpu=power7,no-vsx")
+
+#ifndef __ALTIVEC__
+#error "__ALTIVEC__ should be defined."
+#endif
+
+#ifdef __VSX__
+#error "__VSX__ should not be defined."
+#endif
+
+/* Altivec build on power7 with no VSX, generate vaddfp.  */
+void
+av2_add (vector float *a, vector float *b, vector float *c)
+{
+  unsigned long i;
+  unsigned long n = SIZE / 4;
+
+  for (i = 0; i < n; i++)
+a[i] = b[i] + c[i];
 }
 
 #pragma GCC pop_options
-#pragma GCC target("no-vsx,no-altivec")
 
 #ifdef __ALTIVEC__
 #error "__ALTIVEC__ should not be defined."
@@ -74,6 +93,7 @@ vsx_add (vector float *a, vector float *b, vector float *c)
 #error "__VSX__ should not be defined."
 #endif
 
+/* Default power5 build, generate scalar fadds.  */
 void
 norm_add (float *a, float *b, float *c)
 {
diff --git a/gcc/testsuite/gcc.target/powerpc/pr115688.c 
b/gcc/testsuite/gcc.target/powerpc/pr115688.c
index 5222e66ef17..00c7c301436 100644
--- a/gcc/testsuite/gcc.target/powerpc/pr115688.c
+++ b/gcc/testsuite/gcc.target/powerpc/pr115688.c
@@ -7,7 +7,8 @@
 
 /* Verify there is no ICE under 32 bit env.  */
 
-__attribute__((target("vsx")))
+/* cpu=power7 must be used to enable VSX.  */
+__attribute__((target("cpu=power7,vsx")))
 int test (void)
 {
   return 0;


[gcc(refs/users/meissner/heads/work176)] Update ChangeLog.*

2024-08-19 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:df189b69e46ecf12ff284d388fb279d300b78676

commit df189b69e46ecf12ff284d388fb279d300b78676
Author: Michael Meissner 
Date:   Sat Aug 17 01:34:04 2024 -0400

Update ChangeLog.*

Diff:
---
 gcc/ChangeLog.meissner | 400 -
 1 file changed, 399 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog.meissner b/gcc/ChangeLog.meissner
index 6bd73d667fe..24abcad81cb 100644
--- a/gcc/ChangeLog.meissner
+++ b/gcc/ChangeLog.meissner
@@ -1,6 +1,404 @@
+ Branch work176, patch #9 
+
+Add support for -mcpu=future
+
+This patch adds the support that can be used in developing GCC support for
+future PowerPC processors.
+
+2024-08-02  Michael Meissner  
+
+   * config/rs6000/rs6000-arch.def: Add future processor.
+   * config/rs6000/rs6000-c.cc (rs6000_target_modify_macros): Define
+   _ARCH_FUTURE if -mcpu=future.
+   * config/rs6000/rs6000-cpus.def (FUTURE_MASKS_SERVER): New macro.
+   (future cpu): Add support for -mcpu=future.
+   * config/rs6000/rs6000-opts.h (enum processor_type): Likewise.
+   * config/rs6000/rs6000-tables.opt: Regenerate.
+   * config/rs6000/rs6000.cc (get_arch_flags): Likewise.
+   (rs6000_option_override_internal): Likewise.
+   (rs6000_machine_from_flags): Likewise.
+   (rs6000_reassociation_width): Likewise.
+   (rs6000_adjust_cost): Likewise.
+   (rs6000_issue_rate): Likewise.
+   (rs6000_sched_reorder): Likewise.
+   (rs6000_issue_rate): Likewise.
+   (rs6000_register_move_cost): Likewise.
+   * config/rs6000/rs6000.h (TARGET_FUTURE): New macro.
+   * config/rs6000/rs6000.md (cpu attribute): Add future processor.
+
+ Branch work176, patch #8 
+
+Change TARGET_MODULO to TARGET_POWER9
+
+As part of the architecture flags patches, this patch changes the use of
+TARGET_MODULO to TARGET_POWER9.  The modulo instructions were added in power9 
(ISA
+3.0).  Note, I did not change the uses of TARGET_MODULO where it was explicitly
+generating different code if the machine had a modulo instruction.
+
+I have built both big endian and little endian bootstrap compilers and there
+were no regressions.
+
+In addition, I constructed a test case that used every archiecture define (like
+_ARCH_PWR4, etc.) and I also looked at the .machine directive generated.  I ran
+this test for all supported combinations of -mcpu, big/little endian, and 32/64
+bit support.  Every single instance generated exactly the same code with the
+patches installed compared to the compiler before installing the patches.
+
+Can I install this patch on the GCC 15 trunk?
+
+2024-08-01  Michael Meissner  
+
+   * config/rs6000/rs6000-builtin.cc (rs6000_builtin_is_supported): Use
+   TARGET_POWER9 instead of TARGET_MODULO.
+   * config/rs6000/rs6000.h (TARGET_CTZ): Likewise.
+   (TARGET_EXTSWSLI): Likewise.
+   (TARGET_MADDLD): Likewise.
+   * config/rs6000/rs6000.md (enabled attribute): Likewise.
+
+ Branch work176, patch #7 
+
+Change TARGET_POPCNTD to TARGET_POWER7
+
+As part of the architecture flags patches, this patch changes the use of
+TARGET_POPCNTD to TARGET_POWER7.  The POPCNTD instruction was added in power7
+(ISA 2.06).
+
+I have built both big endian and little endian bootstrap compilers and there
+were no regressions.
+
+In addition, I constructed a test case that used every archiecture define (like
+_ARCH_PWR4, etc.) and I also looked at the .machine directive generated.  I ran
+this test for all supported combinations of -mcpu, big/little endian, and 32/64
+bit support.  Every single instance generated exactly the same code with the
+patches installed compared to the compiler before installing the patches.
+
+Can I install this patch on the GCC 15 trunk?
+
+2024-08-01  Michael Meissner  
+
+   * config/rs6000/dfp.md (floatdidd2): Change TARGET_POPCNTD to
+   TARGET_POWER7.
+   * config/rs6000/rs6000-builtin.cc (rs6000_builtin_is_supported):
+   Likewise.
+   * config/rs6000/rs6000-string.cc (expand_block_compare_gpr): Likewise.
+   * config/rs6000/rs6000.cc (rs6000_hard_regno_mode_ok_uncached):
+   Likewise.
+   (rs6000_rtx_costs): Likewise.
+   (rs6000_emit_popcount): Likewise.
+   * config/rs6000/rs6000.h (TARGET_LDBRX): Likewise.
+   (TARGET_LFIWZX): Likewise.
+   (TARGET_FCFIDS): Likewise.
+   (TARGET_FCFIDU): Likewise.
+   (TARGET_FCFIDUS): Likewise.
+   (TARGET_FCTIDUZ): Likewise.
+   (TARGET_FCTIWUZ): Likewise.
+   (CTZ_DEFINED_VALUE_AT_ZERO): Likewise.
+   * config/rs6000/rs6000.md (enabled attribute): Likewise.
+   (ctz2): Likewise.
+   (popcntd2): Likewise.
+   (lrintsi2): Likewise.
+   (lrintsi): Likewise.
+   (lrintsi_di): Likewise.
+   (cmpmemsi): Likewise.
+   (bpermd_"): Likewise.
+   (addg6s): Likewise.
+   (cdtbcd): L

[gcc(refs/users/meissner/heads/work176)] Add support for -mcpu=future

2024-08-19 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:3031a7ca9367f11554c16c035a8642435a1c147e

commit 3031a7ca9367f11554c16c035a8642435a1c147e
Author: Michael Meissner 
Date:   Sat Aug 17 03:24:08 2024 -0400

Add support for -mcpu=future

This patch adds the support that can be used in developing GCC support for
future PowerPC processors.

2024-08-17  Michael Meissner  

* config.gcc (powerpc*-*-*): Add support for --with-cpu=future.
* config/rs6000/aix71.h (ASM_CPU_SPEC): Add support for 
-mcpu=future.
* config/rs6000/aix72.h (ASM_CPU_SPEC): Likewise.
* config/rs6000/aix73.h (ASM_CPU_SPEC): Likewise.
* config/rs6000/driver-rs6000.cc (asm_names): Likewise.
* config/rs6000/rs6000-arch.def: Add future cpu.
* config/rs6000/rs6000-c.cc (rs6000_target_modify_macros): If
-mcpu=future, define _ARCH_FUTURE.
* config/rs6000/rs6000-cpus.def (FUTURE_MASKS_SERVER): New macro.
(future cpu): Define.
* config/rs6000/rs6000-opts.h (enum processor_type): Add
PROCESSOR_FUTURE.
* config/rs6000/rs6000-tables.opt: Regenerate.
* config/rs6000/rs6000.cc (power10_cost): Update comment.
(get_arch_flags): Add support for future processor.
(rs6000_option_override_internal): Likewise.
(rs6000_machine_from_flags): Likewise.
(rs6000_reassociation_width): Likewise.
(rs6000_adjust_cost): Likewise.
(rs6000_issue_rate): Likewise.
(rs6000_sched_reorder): Likewise.
(rs6000_sched_reorder2): Likewise.
(rs6000_register_move_cost): Likewise.
* config/rs6000/rs6000.h (ASM_CPU_SPEC): Likewise.
(TARGET_POWER11): New macro.
* config/rs6000/rs6000.md (cpu attribute): Likewise.

Diff:
---
 gcc/config.gcc  |  4 ++--
 gcc/config/rs6000/aix71.h   |  1 +
 gcc/config/rs6000/aix72.h   |  1 +
 gcc/config/rs6000/aix73.h   |  1 +
 gcc/config/rs6000/driver-rs6000.cc  |  2 ++
 gcc/config/rs6000/rs6000-arch.def   |  1 +
 gcc/config/rs6000/rs6000-c.cc   |  2 ++
 gcc/config/rs6000/rs6000-cpus.def   |  3 +++
 gcc/config/rs6000/rs6000-opts.h |  1 +
 gcc/config/rs6000/rs6000-tables.opt | 11 +++
 gcc/config/rs6000/rs6000.cc | 34 ++
 gcc/config/rs6000/rs6000.h  |  2 ++
 gcc/config/rs6000/rs6000.md |  2 +-
 13 files changed, 50 insertions(+), 15 deletions(-)

diff --git a/gcc/config.gcc b/gcc/config.gcc
index 2c0f4518638..7fe05121389 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -533,7 +533,7 @@ powerpc*-*-*)
extra_headers="${extra_headers} ppu_intrinsics.h spu2vmx.h vec_types.h 
si2vmx.h"
extra_headers="${extra_headers} amo.h"
case x$with_cpu in
-   
xpowerpc64|xdefault64|x6[23]0|x970|xG5|xpower[3456789]|xpower1[01]|xpower6x|xrs64a|xcell|xa2|xe500mc64|xe5500|xe6500)
+   
xpowerpc64|xdefault64|x6[23]0|x970|xG5|xpower[3456789]|xpower1[01]|xpower6x|xrs64a|xcell|xa2|xe500mc64|xe5500|xe6500|xfuture)
cpu_is_64bit=yes
;;
esac
@@ -5640,7 +5640,7 @@ case "${target}" in
tm_defines="${tm_defines} CONFIG_PPC405CR"
eval "with_$which=405"
;;
-   "" | common | native \
+   "" | common | native | future \
| power[3456789] | power1[01] | power5+ | power6x \
| powerpc | powerpc64 | powerpc64le \
| rs64 \
diff --git a/gcc/config/rs6000/aix71.h b/gcc/config/rs6000/aix71.h
index 41037b3852d..570ddcc451d 100644
--- a/gcc/config/rs6000/aix71.h
+++ b/gcc/config/rs6000/aix71.h
@@ -79,6 +79,7 @@ do {  
\
 #undef ASM_CPU_SPEC
 #define ASM_CPU_SPEC \
 "%{mcpu=native: %(asm_cpu_native); \
+  mcpu=future: -mfuture; \
   mcpu=power11: -mpwr11; \
   mcpu=power10: -mpwr10; \
   mcpu=power9: -mpwr9; \
diff --git a/gcc/config/rs6000/aix72.h b/gcc/config/rs6000/aix72.h
index fe59f8319b4..242ca94bd06 100644
--- a/gcc/config/rs6000/aix72.h
+++ b/gcc/config/rs6000/aix72.h
@@ -79,6 +79,7 @@ do {  
\
 #undef ASM_CPU_SPEC
 #define ASM_CPU_SPEC \
 "%{mcpu=native: %(asm_cpu_native); \
+  mcpu=future: -mfuture; \
   mcpu=power11: -mpwr11; \
   mcpu=power10: -mpwr10; \
   mcpu=power9: -mpwr9; \
diff --git a/gcc/config/rs6000/aix73.h b/gcc/config/rs6000/aix73.h
index 1318b0b3662..2bd6b4bb3c4 100644
--- a/gcc/config/rs6000/aix73.h
+++ b/gcc/config/rs6000/aix73.h
@@ -79,6 +79,7 @@ do {  
\
 #undef ASM_CPU_SPEC
 #define ASM_CPU_SPEC \
 "%{mcpu=native: %(asm_cpu_native); \
+  mcpu=future: -mfuture; \
   mcpu=power11: -mpwr11; 

[gcc(refs/users/meissner/heads/work176)] Add -mcpu=future tuning support.

2024-08-19 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:aa65901ff72a56dda008d78e421ff832595306d2

commit aa65901ff72a56dda008d78e421ff832595306d2
Author: Michael Meissner 
Date:   Sat Aug 17 03:24:57 2024 -0400

Add -mcpu=future tuning support.

This patch makes -mtune=future use the same tuning decision as 
-mtune=power11.

2024-08-17  Michael Meissner  

gcc/

* config/rs6000/power10.md (all reservations): Add future as an
alterntive to power10 and power11.

Diff:
---
 gcc/config/rs6000/power10.md | 144 +--
 1 file changed, 72 insertions(+), 72 deletions(-)

diff --git a/gcc/config/rs6000/power10.md b/gcc/config/rs6000/power10.md
index 2310c460345..e42b057dc45 100644
--- a/gcc/config/rs6000/power10.md
+++ b/gcc/config/rs6000/power10.md
@@ -1,4 +1,4 @@
-;; Scheduling description for the IBM Power10 and Power11 processors.
+;; Scheduling description for the IBM Power10, Power11, and Future processors.
 ;; Copyright (C) 2020-2024 Free Software Foundation, Inc.
 ;;
 ;; Contributed by Pat Haugen (pthau...@us.ibm.com).
@@ -97,12 +97,12 @@
(eq_attr "update" "no")
(eq_attr "size" "!128")
(eq_attr "prefixed" "no")
-   (eq_attr "cpu" "power10,power11"))
+   (eq_attr "cpu" "power10,power11,future"))
   "DU_any_power10,LU_power10")
 
 (define_insn_reservation "power10-fused-load" 4
   (and (eq_attr "type" "fused_load_cmpi,fused_addis_load,fused_load_load")
-   (eq_attr "cpu" "power10,power11"))
+   (eq_attr "cpu" "power10,power11,future"))
   "DU_even_power10,LU_power10")
 
 (define_insn_reservation "power10-prefixed-load" 4
@@ -110,13 +110,13 @@
(eq_attr "update" "no")
(eq_attr "size" "!128")
(eq_attr "prefixed" "yes")
-   (eq_attr "cpu" "power10,power11"))
+   (eq_attr "cpu" "power10,power11,future"))
   "DU_even_power10,LU_power10")
 
 (define_insn_reservation "power10-load-update" 4
   (and (eq_attr "type" "load")
(eq_attr "update" "yes")
-   (eq_attr "cpu" "power10,power11"))
+   (eq_attr "cpu" "power10,power11,future"))
   "DU_even_power10,LU_power10+SXU_power10")
 
 (define_insn_reservation "power10-fpload-double" 4
@@ -124,7 +124,7 @@
(eq_attr "update" "no")
(eq_attr "size" "64")
(eq_attr "prefixed" "no")
-   (eq_attr "cpu" "power10,power11"))
+   (eq_attr "cpu" "power10,power11,future"))
   "DU_any_power10,LU_power10")
 
 (define_insn_reservation "power10-prefixed-fpload-double" 4
@@ -132,14 +132,14 @@
(eq_attr "update" "no")
(eq_attr "size" "64")
(eq_attr "prefixed" "yes")
-   (eq_attr "cpu" "power10,power11"))
+   (eq_attr "cpu" "power10,power11,future"))
   "DU_even_power10,LU_power10")
 
 (define_insn_reservation "power10-fpload-update-double" 4
   (and (eq_attr "type" "fpload")
(eq_attr "update" "yes")
(eq_attr "size" "64")
-   (eq_attr "cpu" "power10,power11"))
+   (eq_attr "cpu" "power10,power11,future"))
   "DU_even_power10,LU_power10+SXU_power10")
 
 ; SFmode loads are cracked and have additional 3 cycles over DFmode
@@ -148,27 +148,27 @@
   (and (eq_attr "type" "fpload")
(eq_attr "update" "no")
(eq_attr "size" "32")
-   (eq_attr "cpu" "power10,power11"))
+   (eq_attr "cpu" "power10,power11,future"))
   "DU_even_power10,LU_power10")
 
 (define_insn_reservation "power10-fpload-update-single" 7
   (and (eq_attr "type" "fpload")
(eq_attr "update" "yes")
(eq_attr "size" "32")
-   (eq_attr "cpu" "power10,power11"))
+   (eq_attr "cpu" "power10,power11,future"))
   "DU_even_power10,LU_power10+SXU_power10")
 
 (define_insn_reservation "power10-vecload" 4
   (and (eq_attr "type" "vecload")
(eq_attr "size" "!256")
-   (eq_attr "cpu" "power10,power11"))
+   (eq_attr "cpu" "power10,power11,future"))
   "DU_any_power10,LU_power10")
 
 ; lxvp
 (define_insn_reservation "power10-vecload-pair" 4
   (and (eq_attr "type" "vecload")
(eq_attr "size" "256")
-   (eq_attr "cpu" "power10,power11"))
+   (eq_attr "cpu" "power10,power11,future"))
   "DU_even_power10,LU_power10+SXU_power10")
 
 ; Store Unit
@@ -178,12 +178,12 @@
(eq_attr "prefixed" "no")
(eq_attr "size" "!128")
(eq_attr "size" "!256")
-   (eq_attr "cpu" "power10,power11"))
+   (eq_attr "cpu" "power10,power11,future"))
   "DU_any_power10,STU_power10")
 
 (define_insn_reservation "power10-fused-store" 0
   (and (eq_attr "type" "fused_store_store")
-   (eq_attr "cpu" "power10,power11"))
+   (eq_attr "cpu" "power10,power11,future"))
   "DU_even_power10,STU_power10")
 
 (define_insn_reservation "power10-prefixed-store" 0
@@ -191,52 +191,52 @@
(eq_attr "prefixed" "yes")
(eq_attr "size" "!128")
(eq_attr "size" "!256")
-   (eq_attr "cpu" "power10,power11"))
+   (eq_attr "cpu" "power10,power11,future"))
   "DU_even_power10,STU_power10")
 
 ; Update forms have 2 cycle latency for updated

[gcc(refs/users/meissner/heads/work176)] Update ChangeLog.*

2024-08-19 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:03ac745bc431b766a951e55d2911ef1f571ccb6b

commit 03ac745bc431b766a951e55d2911ef1f571ccb6b
Author: Michael Meissner 
Date:   Sat Aug 17 05:40:54 2024 -0400

Update ChangeLog.*

Diff:
---
 gcc/ChangeLog.meissner | 49 +
 1 file changed, 49 insertions(+)

diff --git a/gcc/ChangeLog.meissner b/gcc/ChangeLog.meissner
index 24abcad81cb..d1366c23650 100644
--- a/gcc/ChangeLog.meissner
+++ b/gcc/ChangeLog.meissner
@@ -1,3 +1,52 @@
+ Branch work176, patch #21 
+
+Add -mcpu=future tuning support.
+
+This patch makes -mtune=future use the same tuning decision as -mtune=power11.
+
+2024-08-02  Michael Meissner  
+
+gcc/
+
+   * config/rs6000/power10.md (all reservations): Add future as an
+   alterntive to power10 and power11.
+
+ Branch work176, patch #20 
+
+Add support for -mcpu=future
+
+This patch adds the support that can be used in developing GCC support for
+future PowerPC processors.
+
+2024-08-02  Michael Meissner  
+
+   * config.gcc (powerpc*-*-*): Add support for --with-cpu=future.
+   * config/rs6000/aix71.h (ASM_CPU_SPEC): Add support for -mcpu=future.
+   * config/rs6000/aix72.h (ASM_CPU_SPEC): Likewise.
+   * config/rs6000/aix73.h (ASM_CPU_SPEC): Likewise.
+   * config/rs6000/driver-rs6000.cc (asm_names): Likewise.
+   * config/rs6000/rs6000-arch.def: Add future cpu.
+   * config/rs6000/rs6000-c.cc (rs6000_target_modify_macros): If
+   -mcpu=future, define _ARCH_FUTURE.
+   * config/rs6000/rs6000-cpus.def (FUTURE_MASKS_SERVER): New macro.
+   (future cpu): Define.
+   * config/rs6000/rs6000-opts.h (enum processor_type): Add
+   PROCESSOR_FUTURE.
+   * config/rs6000/rs6000-tables.opt: Regenerate.
+   * config/rs6000/rs6000.cc (power10_cost): Update comment.
+   (get_arch_flags): Add support for future processor.
+   (rs6000_option_override_internal): Likewise.
+   (rs6000_machine_from_flags): Likewise.
+   (rs6000_reassociation_width): Likewise.
+   (rs6000_adjust_cost): Likewise.
+   (rs6000_issue_rate): Likewise.
+   (rs6000_sched_reorder): Likewise.
+   (rs6000_sched_reorder2): Likewise.
+   (rs6000_register_move_cost): Likewise.
+   * config/rs6000/rs6000.h (ASM_CPU_SPEC): Likewise.
+   (TARGET_POWER11): New macro.
+   * config/rs6000/rs6000.md (cpu attribute): Likewise.
+
  Branch work176, patch #9 
 
 Add support for -mcpu=future


[gcc(refs/users/meissner/heads/work176)] Merge commit 'refs/users/meissner/heads/work176' of git+ssh://gcc.gnu.org/git/gcc into me/work176

2024-08-19 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:248fc70e14ee9c6a974a44bc0bbbd82d3bb3b8dc

commit 248fc70e14ee9c6a974a44bc0bbbd82d3bb3b8dc
Merge: 03ac745bc43 04913bc20cb
Author: Michael Meissner 
Date:   Mon Aug 19 13:04:41 2024 -0400

Merge commit 'refs/users/meissner/heads/work176' of 
git+ssh://gcc.gnu.org/git/gcc into me/work176

Diff:


[gcc(refs/users/meissner/heads/work176-bugs)] Merge commit 'refs/users/meissner/heads/work176-bugs' of git+ssh://gcc.gnu.org/git/gcc into me/work1

2024-08-19 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:bb95d1744c6896582aa054923434e8dccd6f2a1c

commit bb95d1744c6896582aa054923434e8dccd6f2a1c
Merge: 1833352955b 0ec692e56ea
Author: Michael Meissner 
Date:   Mon Aug 19 13:08:44 2024 -0400

Merge commit 'refs/users/meissner/heads/work176-bugs' of 
git+ssh://gcc.gnu.org/git/gcc into me/work176-bugs

Diff:


[gcc(refs/users/meissner/heads/work176-bugs)] Add ChangeLog.bugs and update REVISION.

2024-08-19 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:1833352955bb5ce263780493e895c6edd23cfd01

commit 1833352955bb5ce263780493e895c6edd23cfd01
Author: Michael Meissner 
Date:   Fri Aug 16 20:04:36 2024 -0400

Add ChangeLog.bugs and update REVISION.

2024-08-16  Michael Meissner  

gcc/

* ChangeLog.bugs: New file for branch.
* REVISION: Update.

Diff:
---
 gcc/ChangeLog.bugs | 6 ++
 gcc/REVISION   | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog.bugs b/gcc/ChangeLog.bugs
new file mode 100644
index 000..c36f5407481
--- /dev/null
+++ b/gcc/ChangeLog.bugs
@@ -0,0 +1,6 @@
+ Branch work176-bugs, baseline 
+
+2024-08-16   Michael Meissner  
+
+   Clone branch
+
diff --git a/gcc/REVISION b/gcc/REVISION
index 9974885ad20..4bbc2ba400f 100644
--- a/gcc/REVISION
+++ b/gcc/REVISION
@@ -1 +1 @@
-work176 branch
+work176-bugs branch


[gcc/meissner/heads/work176-bugs] (95 commits) Merge commit 'refs/users/meissner/heads/work176-bugs' of gi

2024-08-19 Thread Michael Meissner via Gcc-cvs
The branch 'meissner/heads/work176-bugs' was updated to point to:

 bb95d1744c6... Merge commit 'refs/users/meissner/heads/work176-bugs' of gi

It previously pointed to:

 0ec692e56ea... Add ChangeLog.bugs and update REVISION.

Diff:

Summary of changes (added commits):
---

  bb95d17... Merge commit 'refs/users/meissner/heads/work176-bugs' of gi
  1833352... Add ChangeLog.bugs and update REVISION.
  248fc70... Merge commit 'refs/users/meissner/heads/work176' of git+ssh (*)
  03ac745... Update ChangeLog.* (*)
  aa65901... Add -mcpu=future tuning support. (*)
  3031a7c... Add support for -mcpu=future (*)
  df189b6... Update ChangeLog.* (*)
  9e3ab51... Update tests to work with architecture flags changes. (*)
  255d09d... Change TARGET_MODULO to TARGET_POWER9 (*)
  2f2353e... Change TARGET_POPCNTD to TARGET_POWER7 (*)
  daa12c0... Change TARGET_CMPB to TARGET_POWER6 (*)
  9b740bf... Change TARGET_FPRND to TARGET_POWER5X (*)
  d83a1cd... Change TARGET_POPCNTB to TARGET_POWER5 (*)
  eeab600... Do not allow -mvsx to boost processor to power7. (*)
  5825848... Use architecture flags for defining _ARCH_PWR macros. (*)
  7a80d6a... Add rs6000 architecture masks. (*)
  b04aa92... Add ChangeLog.meissner and REVISION. (*)
  fceecc5... aarch64: Fix ls64 intrinsic availability (*)
  4e1b617... aarch64: Fix memtag intrinsic availability (*)
  32afbb6... aarch64: Fix tme intrinsic availability (*)
  baf71ec... aarch64: Move check_required_extensions (*)
  a4b39dc... aarch64: Refactor check_required_extensions (*)
  8871489... Allow coarrays in select type. [PR46371, PR56496] (*)
  9cbcf8d... gnat: fix lto-type-mismatch between C_Version_String and gn (*)
  cc57224... aarch64: Reduce FP reassociation width for Neoverse V2 and  (*)
  6d8b9b7... testsuite: Prune warning about size of enums (*)
  e57d3cc... rtl: Enable the use of rtx values with int and mode attribu (*)
  71059d2... testsuite: Reduce cut-&-paste in scanltranstree.exp (*)
  661acde... Fix ICE in recompute_tree_invariant_for_addr_expr, at tree. (*)
  8d6c6fb... aarch64: Implement 16-byte vector mode const0 store by TImo (*)
  7f62e71... AVX10.2 ymm rounding: Support vsqrtp{s,d,h} and vsubp{s,d,h (*)
  1f86cf0... AVX10.2 ymm rounding: Support vscalefp{s,d,h} intrins (*)
  9afa508... AVX10.2 ymm rounding: Support vreducep{s,d,h} and vrndscale (*)
  90cc5b0... AVX10.2 ymm rounding: Support vmulp{s,d,h} and vrangep{s,d} (*)
  cc8a759... AVX10.2 ymm rounding: Support v{max,min}p{s,d,h} intrins (*)
  8d4f542... AVX10.2 ymm rounding: Support vgetexpp{s,d,h} and vgetmantp (*)
  0983d40... AVX10.2 ymm rounding: Support vfnmsub{132,231,213}p{s,d,h}  (*)
  6f0aa7a... AVX10.2 ymm rounding: Support vfmulcph and vfnmadd{132,231, (*)
  dd48acb... AVX10.2 ymm rounding: Support vfm{sub,subadd}{132,231,213}p (*)
  cfbc94e... AVX10.2 ymm rounding: Support vfmaddcph and vfmaddsub{132,2 (*)
  0683ca3... AVX10.2 ymm rounding: Support vfmadd{132,231,213}p{s,d,h} i (*)
  95980b2... AVX10.2 ymm rounding: Support vfc{madd,mul}cph, vfixupimmp{ (*)
  3d1b553... AVX10.2 ymm rounding: Support vcvt{,u}w2ph and vdivp{s,d,h} (*)
  b275422... AVX10.2 ymm rounding: Support vcvttps2{,u}{dq,qq} and vcvtu (*)
  493c509... AVX10.2 ymm rounding: Support vcvttph2{,u}{dq,qq,w} intrins (*)
  6e231f8... AVX10.2 ymm rounding: Support vcvtqq2p{s,d,h} and vcvttpd2{ (*)
  0f5a42d... AVX10.2 ymm rounding: Support vcvtps2{,u}{dq,qq} intrins (*)
  b70bb94... AVX10.2 ymm rounding: Support vcvtph2{,u}w and vcvtps2p{d,h (*)
  6f2eac5... AVX10.2 ymm rounding: Support vcvtph2p{s,d,sx} and vcvtph2{ (*)
  508ac49... AVX10.2 ymm rounding: Support vcvtpd2{,u}{dq,qq} intrins (*)
  85e874d... AVX10.2 ymm rounding: Support vcvtdq2p{s,h} and vcvtpd2p{s, (*)
  e22e3af... AVX10.2 ymm rounding: Support vadd{s,d,h} and vcmp{s,d,h} i (*)
  f11bc08... Daily bump. (*)
  f10d2ee... [PR rtl-optimization/115876] Avoid ubsan in ext-dce.cc (*)
  fc41263... libstdc++: Remove note from the GCC 4.0.1 days (*)
  b9ac01d... doc: Tweak gm2 mailing list address (*)
  cd2f394... PHIOPT: move factor_out_conditional_operation over to use g (*)
  1cfe4a4... libgfortran: implement fpu-macppc for Darwin, support IEEE  (*)
  1ed1dd5... AVR: Tweak 16-bit addition with const that didn't get a LD_ (*)
  22acd3c... AVR: ad target/116407 - Fix linker error "relocation trunca (*)
  dfb2e8c... AVR: target/116407 - Fix linker error "relocation truncated (*)
  3ae8794... forwprop: Also dce from added statements from gimple_simpli (*)
  a183b25... RISC-V: Implement the quad and oct .SAT_TRUNC for scalar (*)
  e8f31f4... RISC-V: Make sure high bits of usadd operands is clean for  (*)
  8d0efcf... RISC-V: Add testcases for unsigned scalar .SAT_TRUNC form 3 (*)
  6fbdbad... RISC-V: Add testcases for unsigned scalar .SAT_TRUNC form 2 (*)
  0555f65... Daily bump. (*)
  61e179b... [committed] Avoid right shifting signed value on ext-dce.cc (*)
  efcfd1d... t-rtems: add rv32imf architecture to the RTEMS multilib for (*)
  abfc140...

[gcc/meissner/heads/work176-dmf] (95 commits) Merge commit 'refs/users/meissner/heads/work176-dmf' of git

2024-08-19 Thread Michael Meissner via Gcc-cvs
The branch 'meissner/heads/work176-dmf' was updated to point to:

 0d03a3fd23b... Merge commit 'refs/users/meissner/heads/work176-dmf' of git

It previously pointed to:

 2f5f585dae7... Add ChangeLog.dmf and update REVISION.

Diff:

Summary of changes (added commits):
---

  0d03a3f... Merge commit 'refs/users/meissner/heads/work176-dmf' of git
  9d15a61... Add ChangeLog.dmf and update REVISION.
  248fc70... Merge commit 'refs/users/meissner/heads/work176' of git+ssh (*)
  03ac745... Update ChangeLog.* (*)
  aa65901... Add -mcpu=future tuning support. (*)
  3031a7c... Add support for -mcpu=future (*)
  df189b6... Update ChangeLog.* (*)
  9e3ab51... Update tests to work with architecture flags changes. (*)
  255d09d... Change TARGET_MODULO to TARGET_POWER9 (*)
  2f2353e... Change TARGET_POPCNTD to TARGET_POWER7 (*)
  daa12c0... Change TARGET_CMPB to TARGET_POWER6 (*)
  9b740bf... Change TARGET_FPRND to TARGET_POWER5X (*)
  d83a1cd... Change TARGET_POPCNTB to TARGET_POWER5 (*)
  eeab600... Do not allow -mvsx to boost processor to power7. (*)
  5825848... Use architecture flags for defining _ARCH_PWR macros. (*)
  7a80d6a... Add rs6000 architecture masks. (*)
  b04aa92... Add ChangeLog.meissner and REVISION. (*)
  fceecc5... aarch64: Fix ls64 intrinsic availability (*)
  4e1b617... aarch64: Fix memtag intrinsic availability (*)
  32afbb6... aarch64: Fix tme intrinsic availability (*)
  baf71ec... aarch64: Move check_required_extensions (*)
  a4b39dc... aarch64: Refactor check_required_extensions (*)
  8871489... Allow coarrays in select type. [PR46371, PR56496] (*)
  9cbcf8d... gnat: fix lto-type-mismatch between C_Version_String and gn (*)
  cc57224... aarch64: Reduce FP reassociation width for Neoverse V2 and  (*)
  6d8b9b7... testsuite: Prune warning about size of enums (*)
  e57d3cc... rtl: Enable the use of rtx values with int and mode attribu (*)
  71059d2... testsuite: Reduce cut-&-paste in scanltranstree.exp (*)
  661acde... Fix ICE in recompute_tree_invariant_for_addr_expr, at tree. (*)
  8d6c6fb... aarch64: Implement 16-byte vector mode const0 store by TImo (*)
  7f62e71... AVX10.2 ymm rounding: Support vsqrtp{s,d,h} and vsubp{s,d,h (*)
  1f86cf0... AVX10.2 ymm rounding: Support vscalefp{s,d,h} intrins (*)
  9afa508... AVX10.2 ymm rounding: Support vreducep{s,d,h} and vrndscale (*)
  90cc5b0... AVX10.2 ymm rounding: Support vmulp{s,d,h} and vrangep{s,d} (*)
  cc8a759... AVX10.2 ymm rounding: Support v{max,min}p{s,d,h} intrins (*)
  8d4f542... AVX10.2 ymm rounding: Support vgetexpp{s,d,h} and vgetmantp (*)
  0983d40... AVX10.2 ymm rounding: Support vfnmsub{132,231,213}p{s,d,h}  (*)
  6f0aa7a... AVX10.2 ymm rounding: Support vfmulcph and vfnmadd{132,231, (*)
  dd48acb... AVX10.2 ymm rounding: Support vfm{sub,subadd}{132,231,213}p (*)
  cfbc94e... AVX10.2 ymm rounding: Support vfmaddcph and vfmaddsub{132,2 (*)
  0683ca3... AVX10.2 ymm rounding: Support vfmadd{132,231,213}p{s,d,h} i (*)
  95980b2... AVX10.2 ymm rounding: Support vfc{madd,mul}cph, vfixupimmp{ (*)
  3d1b553... AVX10.2 ymm rounding: Support vcvt{,u}w2ph and vdivp{s,d,h} (*)
  b275422... AVX10.2 ymm rounding: Support vcvttps2{,u}{dq,qq} and vcvtu (*)
  493c509... AVX10.2 ymm rounding: Support vcvttph2{,u}{dq,qq,w} intrins (*)
  6e231f8... AVX10.2 ymm rounding: Support vcvtqq2p{s,d,h} and vcvttpd2{ (*)
  0f5a42d... AVX10.2 ymm rounding: Support vcvtps2{,u}{dq,qq} intrins (*)
  b70bb94... AVX10.2 ymm rounding: Support vcvtph2{,u}w and vcvtps2p{d,h (*)
  6f2eac5... AVX10.2 ymm rounding: Support vcvtph2p{s,d,sx} and vcvtph2{ (*)
  508ac49... AVX10.2 ymm rounding: Support vcvtpd2{,u}{dq,qq} intrins (*)
  85e874d... AVX10.2 ymm rounding: Support vcvtdq2p{s,h} and vcvtpd2p{s, (*)
  e22e3af... AVX10.2 ymm rounding: Support vadd{s,d,h} and vcmp{s,d,h} i (*)
  f11bc08... Daily bump. (*)
  f10d2ee... [PR rtl-optimization/115876] Avoid ubsan in ext-dce.cc (*)
  fc41263... libstdc++: Remove note from the GCC 4.0.1 days (*)
  b9ac01d... doc: Tweak gm2 mailing list address (*)
  cd2f394... PHIOPT: move factor_out_conditional_operation over to use g (*)
  1cfe4a4... libgfortran: implement fpu-macppc for Darwin, support IEEE  (*)
  1ed1dd5... AVR: Tweak 16-bit addition with const that didn't get a LD_ (*)
  22acd3c... AVR: ad target/116407 - Fix linker error "relocation trunca (*)
  dfb2e8c... AVR: target/116407 - Fix linker error "relocation truncated (*)
  3ae8794... forwprop: Also dce from added statements from gimple_simpli (*)
  a183b25... RISC-V: Implement the quad and oct .SAT_TRUNC for scalar (*)
  e8f31f4... RISC-V: Make sure high bits of usadd operands is clean for  (*)
  8d0efcf... RISC-V: Add testcases for unsigned scalar .SAT_TRUNC form 3 (*)
  6fbdbad... RISC-V: Add testcases for unsigned scalar .SAT_TRUNC form 2 (*)
  0555f65... Daily bump. (*)
  61e179b... [committed] Avoid right shifting signed value on ext-dce.cc (*)
  efcfd1d... t-rtems: add rv32imf architecture to the RTEMS multilib for (*)
  abfc140... Ad

[gcc(refs/users/meissner/heads/work176-dmf)] Add ChangeLog.dmf and update REVISION.

2024-08-19 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:9d15a613c0ae6e75837955001ff97c8a665e72db

commit 9d15a613c0ae6e75837955001ff97c8a665e72db
Author: Michael Meissner 
Date:   Fri Aug 16 20:01:40 2024 -0400

Add ChangeLog.dmf and update REVISION.

2024-08-16  Michael Meissner  

gcc/

* ChangeLog.dmf: New file for branch.
* REVISION: Update.

Diff:
---
 gcc/ChangeLog.dmf | 6 ++
 gcc/REVISION  | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog.dmf b/gcc/ChangeLog.dmf
new file mode 100644
index 000..c5ecfe8ec49
--- /dev/null
+++ b/gcc/ChangeLog.dmf
@@ -0,0 +1,6 @@
+ Branch work176-dmf, baseline 
+
+2024-08-16   Michael Meissner  
+
+   Clone branch
+
diff --git a/gcc/REVISION b/gcc/REVISION
index 9974885ad20..3c346ba1d56 100644
--- a/gcc/REVISION
+++ b/gcc/REVISION
@@ -1 +1 @@
-work176 branch
+work176-dmf branch


[gcc(refs/users/meissner/heads/work176-dmf)] Merge commit 'refs/users/meissner/heads/work176-dmf' of git+ssh://gcc.gnu.org/git/gcc into me/work17

2024-08-19 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:0d03a3fd23b2fe6c2459bc1fee6f164cf758419c

commit 0d03a3fd23b2fe6c2459bc1fee6f164cf758419c
Merge: 9d15a613c0a 2f5f585dae7
Author: Michael Meissner 
Date:   Mon Aug 19 13:10:13 2024 -0400

Merge commit 'refs/users/meissner/heads/work176-dmf' of 
git+ssh://gcc.gnu.org/git/gcc into me/work176-dmf

Diff:


[gcc/meissner/heads/work176-libs] (95 commits) Merge commit 'refs/users/meissner/heads/work176-libs' of gi

2024-08-19 Thread Michael Meissner via Gcc-cvs
The branch 'meissner/heads/work176-libs' was updated to point to:

 f4ab5ad347d... Merge commit 'refs/users/meissner/heads/work176-libs' of gi

It previously pointed to:

 f7c2e5662a4... Add ChangeLog.libs and update REVISION.

Diff:

Summary of changes (added commits):
---

  f4ab5ad... Merge commit 'refs/users/meissner/heads/work176-libs' of gi
  3b92cb4... Add ChangeLog.libs and update REVISION.
  248fc70... Merge commit 'refs/users/meissner/heads/work176' of git+ssh (*)
  03ac745... Update ChangeLog.* (*)
  aa65901... Add -mcpu=future tuning support. (*)
  3031a7c... Add support for -mcpu=future (*)
  df189b6... Update ChangeLog.* (*)
  9e3ab51... Update tests to work with architecture flags changes. (*)
  255d09d... Change TARGET_MODULO to TARGET_POWER9 (*)
  2f2353e... Change TARGET_POPCNTD to TARGET_POWER7 (*)
  daa12c0... Change TARGET_CMPB to TARGET_POWER6 (*)
  9b740bf... Change TARGET_FPRND to TARGET_POWER5X (*)
  d83a1cd... Change TARGET_POPCNTB to TARGET_POWER5 (*)
  eeab600... Do not allow -mvsx to boost processor to power7. (*)
  5825848... Use architecture flags for defining _ARCH_PWR macros. (*)
  7a80d6a... Add rs6000 architecture masks. (*)
  b04aa92... Add ChangeLog.meissner and REVISION. (*)
  fceecc5... aarch64: Fix ls64 intrinsic availability (*)
  4e1b617... aarch64: Fix memtag intrinsic availability (*)
  32afbb6... aarch64: Fix tme intrinsic availability (*)
  baf71ec... aarch64: Move check_required_extensions (*)
  a4b39dc... aarch64: Refactor check_required_extensions (*)
  8871489... Allow coarrays in select type. [PR46371, PR56496] (*)
  9cbcf8d... gnat: fix lto-type-mismatch between C_Version_String and gn (*)
  cc57224... aarch64: Reduce FP reassociation width for Neoverse V2 and  (*)
  6d8b9b7... testsuite: Prune warning about size of enums (*)
  e57d3cc... rtl: Enable the use of rtx values with int and mode attribu (*)
  71059d2... testsuite: Reduce cut-&-paste in scanltranstree.exp (*)
  661acde... Fix ICE in recompute_tree_invariant_for_addr_expr, at tree. (*)
  8d6c6fb... aarch64: Implement 16-byte vector mode const0 store by TImo (*)
  7f62e71... AVX10.2 ymm rounding: Support vsqrtp{s,d,h} and vsubp{s,d,h (*)
  1f86cf0... AVX10.2 ymm rounding: Support vscalefp{s,d,h} intrins (*)
  9afa508... AVX10.2 ymm rounding: Support vreducep{s,d,h} and vrndscale (*)
  90cc5b0... AVX10.2 ymm rounding: Support vmulp{s,d,h} and vrangep{s,d} (*)
  cc8a759... AVX10.2 ymm rounding: Support v{max,min}p{s,d,h} intrins (*)
  8d4f542... AVX10.2 ymm rounding: Support vgetexpp{s,d,h} and vgetmantp (*)
  0983d40... AVX10.2 ymm rounding: Support vfnmsub{132,231,213}p{s,d,h}  (*)
  6f0aa7a... AVX10.2 ymm rounding: Support vfmulcph and vfnmadd{132,231, (*)
  dd48acb... AVX10.2 ymm rounding: Support vfm{sub,subadd}{132,231,213}p (*)
  cfbc94e... AVX10.2 ymm rounding: Support vfmaddcph and vfmaddsub{132,2 (*)
  0683ca3... AVX10.2 ymm rounding: Support vfmadd{132,231,213}p{s,d,h} i (*)
  95980b2... AVX10.2 ymm rounding: Support vfc{madd,mul}cph, vfixupimmp{ (*)
  3d1b553... AVX10.2 ymm rounding: Support vcvt{,u}w2ph and vdivp{s,d,h} (*)
  b275422... AVX10.2 ymm rounding: Support vcvttps2{,u}{dq,qq} and vcvtu (*)
  493c509... AVX10.2 ymm rounding: Support vcvttph2{,u}{dq,qq,w} intrins (*)
  6e231f8... AVX10.2 ymm rounding: Support vcvtqq2p{s,d,h} and vcvttpd2{ (*)
  0f5a42d... AVX10.2 ymm rounding: Support vcvtps2{,u}{dq,qq} intrins (*)
  b70bb94... AVX10.2 ymm rounding: Support vcvtph2{,u}w and vcvtps2p{d,h (*)
  6f2eac5... AVX10.2 ymm rounding: Support vcvtph2p{s,d,sx} and vcvtph2{ (*)
  508ac49... AVX10.2 ymm rounding: Support vcvtpd2{,u}{dq,qq} intrins (*)
  85e874d... AVX10.2 ymm rounding: Support vcvtdq2p{s,h} and vcvtpd2p{s, (*)
  e22e3af... AVX10.2 ymm rounding: Support vadd{s,d,h} and vcmp{s,d,h} i (*)
  f11bc08... Daily bump. (*)
  f10d2ee... [PR rtl-optimization/115876] Avoid ubsan in ext-dce.cc (*)
  fc41263... libstdc++: Remove note from the GCC 4.0.1 days (*)
  b9ac01d... doc: Tweak gm2 mailing list address (*)
  cd2f394... PHIOPT: move factor_out_conditional_operation over to use g (*)
  1cfe4a4... libgfortran: implement fpu-macppc for Darwin, support IEEE  (*)
  1ed1dd5... AVR: Tweak 16-bit addition with const that didn't get a LD_ (*)
  22acd3c... AVR: ad target/116407 - Fix linker error "relocation trunca (*)
  dfb2e8c... AVR: target/116407 - Fix linker error "relocation truncated (*)
  3ae8794... forwprop: Also dce from added statements from gimple_simpli (*)
  a183b25... RISC-V: Implement the quad and oct .SAT_TRUNC for scalar (*)
  e8f31f4... RISC-V: Make sure high bits of usadd operands is clean for  (*)
  8d0efcf... RISC-V: Add testcases for unsigned scalar .SAT_TRUNC form 3 (*)
  6fbdbad... RISC-V: Add testcases for unsigned scalar .SAT_TRUNC form 2 (*)
  0555f65... Daily bump. (*)
  61e179b... [committed] Avoid right shifting signed value on ext-dce.cc (*)
  efcfd1d... t-rtems: add rv32imf architecture to the RTEMS multilib for (*)
  abfc140...

[gcc(refs/users/meissner/heads/work176-libs)] Add ChangeLog.libs and update REVISION.

2024-08-19 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:3b92cb41e27b73f453bc3b851fa3881ff8e69f15

commit 3b92cb41e27b73f453bc3b851fa3881ff8e69f15
Author: Michael Meissner 
Date:   Fri Aug 16 20:05:31 2024 -0400

Add ChangeLog.libs and update REVISION.

2024-08-16  Michael Meissner  

gcc/

* ChangeLog.libs: New file for branch.
* REVISION: Update.

Diff:
---
 gcc/ChangeLog.libs | 6 ++
 gcc/REVISION   | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog.libs b/gcc/ChangeLog.libs
new file mode 100644
index 000..c571465904c
--- /dev/null
+++ b/gcc/ChangeLog.libs
@@ -0,0 +1,6 @@
+ Branch work176-libs, baseline 
+
+2024-08-16   Michael Meissner  
+
+   Clone branch
+
diff --git a/gcc/REVISION b/gcc/REVISION
index 9974885ad20..d16ccbbe6b5 100644
--- a/gcc/REVISION
+++ b/gcc/REVISION
@@ -1 +1 @@
-work176 branch
+work176-libs branch


[gcc(refs/users/meissner/heads/work176-libs)] Merge commit 'refs/users/meissner/heads/work176-libs' of git+ssh://gcc.gnu.org/git/gcc into me/work1

2024-08-19 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:f4ab5ad347d3547ef20abf22f2d7a53c63a9c663

commit f4ab5ad347d3547ef20abf22f2d7a53c63a9c663
Merge: 3b92cb41e27 f7c2e5662a4
Author: Michael Meissner 
Date:   Mon Aug 19 13:12:37 2024 -0400

Merge commit 'refs/users/meissner/heads/work176-libs' of 
git+ssh://gcc.gnu.org/git/gcc into me/work176-libs

Diff:


[gcc/meissner/heads/work176-tar] (95 commits) Merge commit 'refs/users/meissner/heads/work176-tar' of git

2024-08-19 Thread Michael Meissner via Gcc-cvs
The branch 'meissner/heads/work176-tar' was updated to point to:

 42ecdd64c12... Merge commit 'refs/users/meissner/heads/work176-tar' of git

It previously pointed to:

 f10dbe3d9d2... Add ChangeLog.tar and update REVISION.

Diff:

Summary of changes (added commits):
---

  42ecdd6... Merge commit 'refs/users/meissner/heads/work176-tar' of git
  6c8633d... Add ChangeLog.tar and update REVISION.
  248fc70... Merge commit 'refs/users/meissner/heads/work176' of git+ssh (*)
  03ac745... Update ChangeLog.* (*)
  aa65901... Add -mcpu=future tuning support. (*)
  3031a7c... Add support for -mcpu=future (*)
  df189b6... Update ChangeLog.* (*)
  9e3ab51... Update tests to work with architecture flags changes. (*)
  255d09d... Change TARGET_MODULO to TARGET_POWER9 (*)
  2f2353e... Change TARGET_POPCNTD to TARGET_POWER7 (*)
  daa12c0... Change TARGET_CMPB to TARGET_POWER6 (*)
  9b740bf... Change TARGET_FPRND to TARGET_POWER5X (*)
  d83a1cd... Change TARGET_POPCNTB to TARGET_POWER5 (*)
  eeab600... Do not allow -mvsx to boost processor to power7. (*)
  5825848... Use architecture flags for defining _ARCH_PWR macros. (*)
  7a80d6a... Add rs6000 architecture masks. (*)
  b04aa92... Add ChangeLog.meissner and REVISION. (*)
  fceecc5... aarch64: Fix ls64 intrinsic availability (*)
  4e1b617... aarch64: Fix memtag intrinsic availability (*)
  32afbb6... aarch64: Fix tme intrinsic availability (*)
  baf71ec... aarch64: Move check_required_extensions (*)
  a4b39dc... aarch64: Refactor check_required_extensions (*)
  8871489... Allow coarrays in select type. [PR46371, PR56496] (*)
  9cbcf8d... gnat: fix lto-type-mismatch between C_Version_String and gn (*)
  cc57224... aarch64: Reduce FP reassociation width for Neoverse V2 and  (*)
  6d8b9b7... testsuite: Prune warning about size of enums (*)
  e57d3cc... rtl: Enable the use of rtx values with int and mode attribu (*)
  71059d2... testsuite: Reduce cut-&-paste in scanltranstree.exp (*)
  661acde... Fix ICE in recompute_tree_invariant_for_addr_expr, at tree. (*)
  8d6c6fb... aarch64: Implement 16-byte vector mode const0 store by TImo (*)
  7f62e71... AVX10.2 ymm rounding: Support vsqrtp{s,d,h} and vsubp{s,d,h (*)
  1f86cf0... AVX10.2 ymm rounding: Support vscalefp{s,d,h} intrins (*)
  9afa508... AVX10.2 ymm rounding: Support vreducep{s,d,h} and vrndscale (*)
  90cc5b0... AVX10.2 ymm rounding: Support vmulp{s,d,h} and vrangep{s,d} (*)
  cc8a759... AVX10.2 ymm rounding: Support v{max,min}p{s,d,h} intrins (*)
  8d4f542... AVX10.2 ymm rounding: Support vgetexpp{s,d,h} and vgetmantp (*)
  0983d40... AVX10.2 ymm rounding: Support vfnmsub{132,231,213}p{s,d,h}  (*)
  6f0aa7a... AVX10.2 ymm rounding: Support vfmulcph and vfnmadd{132,231, (*)
  dd48acb... AVX10.2 ymm rounding: Support vfm{sub,subadd}{132,231,213}p (*)
  cfbc94e... AVX10.2 ymm rounding: Support vfmaddcph and vfmaddsub{132,2 (*)
  0683ca3... AVX10.2 ymm rounding: Support vfmadd{132,231,213}p{s,d,h} i (*)
  95980b2... AVX10.2 ymm rounding: Support vfc{madd,mul}cph, vfixupimmp{ (*)
  3d1b553... AVX10.2 ymm rounding: Support vcvt{,u}w2ph and vdivp{s,d,h} (*)
  b275422... AVX10.2 ymm rounding: Support vcvttps2{,u}{dq,qq} and vcvtu (*)
  493c509... AVX10.2 ymm rounding: Support vcvttph2{,u}{dq,qq,w} intrins (*)
  6e231f8... AVX10.2 ymm rounding: Support vcvtqq2p{s,d,h} and vcvttpd2{ (*)
  0f5a42d... AVX10.2 ymm rounding: Support vcvtps2{,u}{dq,qq} intrins (*)
  b70bb94... AVX10.2 ymm rounding: Support vcvtph2{,u}w and vcvtps2p{d,h (*)
  6f2eac5... AVX10.2 ymm rounding: Support vcvtph2p{s,d,sx} and vcvtph2{ (*)
  508ac49... AVX10.2 ymm rounding: Support vcvtpd2{,u}{dq,qq} intrins (*)
  85e874d... AVX10.2 ymm rounding: Support vcvtdq2p{s,h} and vcvtpd2p{s, (*)
  e22e3af... AVX10.2 ymm rounding: Support vadd{s,d,h} and vcmp{s,d,h} i (*)
  f11bc08... Daily bump. (*)
  f10d2ee... [PR rtl-optimization/115876] Avoid ubsan in ext-dce.cc (*)
  fc41263... libstdc++: Remove note from the GCC 4.0.1 days (*)
  b9ac01d... doc: Tweak gm2 mailing list address (*)
  cd2f394... PHIOPT: move factor_out_conditional_operation over to use g (*)
  1cfe4a4... libgfortran: implement fpu-macppc for Darwin, support IEEE  (*)
  1ed1dd5... AVR: Tweak 16-bit addition with const that didn't get a LD_ (*)
  22acd3c... AVR: ad target/116407 - Fix linker error "relocation trunca (*)
  dfb2e8c... AVR: target/116407 - Fix linker error "relocation truncated (*)
  3ae8794... forwprop: Also dce from added statements from gimple_simpli (*)
  a183b25... RISC-V: Implement the quad and oct .SAT_TRUNC for scalar (*)
  e8f31f4... RISC-V: Make sure high bits of usadd operands is clean for  (*)
  8d0efcf... RISC-V: Add testcases for unsigned scalar .SAT_TRUNC form 3 (*)
  6fbdbad... RISC-V: Add testcases for unsigned scalar .SAT_TRUNC form 2 (*)
  0555f65... Daily bump. (*)
  61e179b... [committed] Avoid right shifting signed value on ext-dce.cc (*)
  efcfd1d... t-rtems: add rv32imf architecture to the RTEMS multilib for (*)
  abfc140... Ad

[gcc(refs/users/meissner/heads/work176-tar)] Add ChangeLog.tar and update REVISION.

2024-08-19 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:6c8633d383609c501bd96e38fcc9f8b297de179e

commit 6c8633d383609c501bd96e38fcc9f8b297de179e
Author: Michael Meissner 
Date:   Fri Aug 16 20:03:42 2024 -0400

Add ChangeLog.tar and update REVISION.

2024-08-16  Michael Meissner  

gcc/

* ChangeLog.tar: New file for branch.
* REVISION: Update.

Diff:
---
 gcc/ChangeLog.tar | 6 ++
 gcc/REVISION  | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog.tar b/gcc/ChangeLog.tar
new file mode 100644
index 000..b24eb12cb69
--- /dev/null
+++ b/gcc/ChangeLog.tar
@@ -0,0 +1,6 @@
+ Branch work176-tar, baseline 
+
+2024-08-16   Michael Meissner  
+
+   Clone branch
+
diff --git a/gcc/REVISION b/gcc/REVISION
index 9974885ad20..15d5175ea44 100644
--- a/gcc/REVISION
+++ b/gcc/REVISION
@@ -1 +1 @@
-work176 branch
+work176-tar branch


[gcc(refs/users/meissner/heads/work176-tar)] Merge commit 'refs/users/meissner/heads/work176-tar' of git+ssh://gcc.gnu.org/git/gcc into me/work17

2024-08-19 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:42ecdd64c126b7afedba43107c30fc4a2b635c8a

commit 42ecdd64c126b7afedba43107c30fc4a2b635c8a
Merge: 6c8633d3836 f10dbe3d9d2
Author: Michael Meissner 
Date:   Mon Aug 19 13:15:17 2024 -0400

Merge commit 'refs/users/meissner/heads/work176-tar' of 
git+ssh://gcc.gnu.org/git/gcc into me/work176-tar

Diff:


[gcc/meissner/heads/work176-test] (95 commits) Merge commit 'refs/users/meissner/heads/work176-test' of gi

2024-08-19 Thread Michael Meissner via Gcc-cvs
The branch 'meissner/heads/work176-test' was updated to point to:

 0a2935428aa... Merge commit 'refs/users/meissner/heads/work176-test' of gi

It previously pointed to:

 05975e9bc07... Add ChangeLog.test and update REVISION.

Diff:

Summary of changes (added commits):
---

  0a29354... Merge commit 'refs/users/meissner/heads/work176-test' of gi
  19c0b32... Add ChangeLog.test and update REVISION.
  248fc70... Merge commit 'refs/users/meissner/heads/work176' of git+ssh (*)
  03ac745... Update ChangeLog.* (*)
  aa65901... Add -mcpu=future tuning support. (*)
  3031a7c... Add support for -mcpu=future (*)
  df189b6... Update ChangeLog.* (*)
  9e3ab51... Update tests to work with architecture flags changes. (*)
  255d09d... Change TARGET_MODULO to TARGET_POWER9 (*)
  2f2353e... Change TARGET_POPCNTD to TARGET_POWER7 (*)
  daa12c0... Change TARGET_CMPB to TARGET_POWER6 (*)
  9b740bf... Change TARGET_FPRND to TARGET_POWER5X (*)
  d83a1cd... Change TARGET_POPCNTB to TARGET_POWER5 (*)
  eeab600... Do not allow -mvsx to boost processor to power7. (*)
  5825848... Use architecture flags for defining _ARCH_PWR macros. (*)
  7a80d6a... Add rs6000 architecture masks. (*)
  b04aa92... Add ChangeLog.meissner and REVISION. (*)
  fceecc5... aarch64: Fix ls64 intrinsic availability (*)
  4e1b617... aarch64: Fix memtag intrinsic availability (*)
  32afbb6... aarch64: Fix tme intrinsic availability (*)
  baf71ec... aarch64: Move check_required_extensions (*)
  a4b39dc... aarch64: Refactor check_required_extensions (*)
  8871489... Allow coarrays in select type. [PR46371, PR56496] (*)
  9cbcf8d... gnat: fix lto-type-mismatch between C_Version_String and gn (*)
  cc57224... aarch64: Reduce FP reassociation width for Neoverse V2 and  (*)
  6d8b9b7... testsuite: Prune warning about size of enums (*)
  e57d3cc... rtl: Enable the use of rtx values with int and mode attribu (*)
  71059d2... testsuite: Reduce cut-&-paste in scanltranstree.exp (*)
  661acde... Fix ICE in recompute_tree_invariant_for_addr_expr, at tree. (*)
  8d6c6fb... aarch64: Implement 16-byte vector mode const0 store by TImo (*)
  7f62e71... AVX10.2 ymm rounding: Support vsqrtp{s,d,h} and vsubp{s,d,h (*)
  1f86cf0... AVX10.2 ymm rounding: Support vscalefp{s,d,h} intrins (*)
  9afa508... AVX10.2 ymm rounding: Support vreducep{s,d,h} and vrndscale (*)
  90cc5b0... AVX10.2 ymm rounding: Support vmulp{s,d,h} and vrangep{s,d} (*)
  cc8a759... AVX10.2 ymm rounding: Support v{max,min}p{s,d,h} intrins (*)
  8d4f542... AVX10.2 ymm rounding: Support vgetexpp{s,d,h} and vgetmantp (*)
  0983d40... AVX10.2 ymm rounding: Support vfnmsub{132,231,213}p{s,d,h}  (*)
  6f0aa7a... AVX10.2 ymm rounding: Support vfmulcph and vfnmadd{132,231, (*)
  dd48acb... AVX10.2 ymm rounding: Support vfm{sub,subadd}{132,231,213}p (*)
  cfbc94e... AVX10.2 ymm rounding: Support vfmaddcph and vfmaddsub{132,2 (*)
  0683ca3... AVX10.2 ymm rounding: Support vfmadd{132,231,213}p{s,d,h} i (*)
  95980b2... AVX10.2 ymm rounding: Support vfc{madd,mul}cph, vfixupimmp{ (*)
  3d1b553... AVX10.2 ymm rounding: Support vcvt{,u}w2ph and vdivp{s,d,h} (*)
  b275422... AVX10.2 ymm rounding: Support vcvttps2{,u}{dq,qq} and vcvtu (*)
  493c509... AVX10.2 ymm rounding: Support vcvttph2{,u}{dq,qq,w} intrins (*)
  6e231f8... AVX10.2 ymm rounding: Support vcvtqq2p{s,d,h} and vcvttpd2{ (*)
  0f5a42d... AVX10.2 ymm rounding: Support vcvtps2{,u}{dq,qq} intrins (*)
  b70bb94... AVX10.2 ymm rounding: Support vcvtph2{,u}w and vcvtps2p{d,h (*)
  6f2eac5... AVX10.2 ymm rounding: Support vcvtph2p{s,d,sx} and vcvtph2{ (*)
  508ac49... AVX10.2 ymm rounding: Support vcvtpd2{,u}{dq,qq} intrins (*)
  85e874d... AVX10.2 ymm rounding: Support vcvtdq2p{s,h} and vcvtpd2p{s, (*)
  e22e3af... AVX10.2 ymm rounding: Support vadd{s,d,h} and vcmp{s,d,h} i (*)
  f11bc08... Daily bump. (*)
  f10d2ee... [PR rtl-optimization/115876] Avoid ubsan in ext-dce.cc (*)
  fc41263... libstdc++: Remove note from the GCC 4.0.1 days (*)
  b9ac01d... doc: Tweak gm2 mailing list address (*)
  cd2f394... PHIOPT: move factor_out_conditional_operation over to use g (*)
  1cfe4a4... libgfortran: implement fpu-macppc for Darwin, support IEEE  (*)
  1ed1dd5... AVR: Tweak 16-bit addition with const that didn't get a LD_ (*)
  22acd3c... AVR: ad target/116407 - Fix linker error "relocation trunca (*)
  dfb2e8c... AVR: target/116407 - Fix linker error "relocation truncated (*)
  3ae8794... forwprop: Also dce from added statements from gimple_simpli (*)
  a183b25... RISC-V: Implement the quad and oct .SAT_TRUNC for scalar (*)
  e8f31f4... RISC-V: Make sure high bits of usadd operands is clean for  (*)
  8d0efcf... RISC-V: Add testcases for unsigned scalar .SAT_TRUNC form 3 (*)
  6fbdbad... RISC-V: Add testcases for unsigned scalar .SAT_TRUNC form 2 (*)
  0555f65... Daily bump. (*)
  61e179b... [committed] Avoid right shifting signed value on ext-dce.cc (*)
  efcfd1d... t-rtems: add rv32imf architecture to the RTEMS multilib for (*)
  abfc140...

[gcc(refs/users/meissner/heads/work176-test)] Add ChangeLog.test and update REVISION.

2024-08-19 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:19c0b322453aacf4a53fe67ff8811a0a2684798e

commit 19c0b322453aacf4a53fe67ff8811a0a2684798e
Author: Michael Meissner 
Date:   Fri Aug 16 20:06:31 2024 -0400

Add ChangeLog.test and update REVISION.

2024-08-16  Michael Meissner  

gcc/

* ChangeLog.test: New file for branch.
* REVISION: Update.

Diff:
---
 gcc/ChangeLog.test | 6 ++
 gcc/REVISION   | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog.test b/gcc/ChangeLog.test
new file mode 100644
index 000..cc479aca03f
--- /dev/null
+++ b/gcc/ChangeLog.test
@@ -0,0 +1,6 @@
+ Branch work176-test, baseline 
+
+2024-08-16   Michael Meissner  
+
+   Clone branch
+
diff --git a/gcc/REVISION b/gcc/REVISION
index 9974885ad20..1a0288a69b1 100644
--- a/gcc/REVISION
+++ b/gcc/REVISION
@@ -1 +1 @@
-work176 branch
+work176-test branch


[gcc(refs/users/meissner/heads/work176-test)] Merge commit 'refs/users/meissner/heads/work176-test' of git+ssh://gcc.gnu.org/git/gcc into me/work1

2024-08-19 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:0a2935428aa3e6a7d986e2b07a0b895aba1b0059

commit 0a2935428aa3e6a7d986e2b07a0b895aba1b0059
Merge: 19c0b322453 05975e9bc07
Author: Michael Meissner 
Date:   Mon Aug 19 13:18:15 2024 -0400

Merge commit 'refs/users/meissner/heads/work176-test' of 
git+ssh://gcc.gnu.org/git/gcc into me/work176-test

Diff:


[gcc/meissner/heads/work176-vpair] (95 commits) Merge commit 'refs/users/meissner/heads/work176-vpair' of g

2024-08-19 Thread Michael Meissner via Gcc-cvs
The branch 'meissner/heads/work176-vpair' was updated to point to:

 b9f77deab8b... Merge commit 'refs/users/meissner/heads/work176-vpair' of g

It previously pointed to:

 217eac978ad... Add ChangeLog.vpair and update REVISION.

Diff:

Summary of changes (added commits):
---

  b9f77de... Merge commit 'refs/users/meissner/heads/work176-vpair' of g
  9adc2e6... Add ChangeLog.vpair and update REVISION.
  248fc70... Merge commit 'refs/users/meissner/heads/work176' of git+ssh (*)
  03ac745... Update ChangeLog.* (*)
  aa65901... Add -mcpu=future tuning support. (*)
  3031a7c... Add support for -mcpu=future (*)
  df189b6... Update ChangeLog.* (*)
  9e3ab51... Update tests to work with architecture flags changes. (*)
  255d09d... Change TARGET_MODULO to TARGET_POWER9 (*)
  2f2353e... Change TARGET_POPCNTD to TARGET_POWER7 (*)
  daa12c0... Change TARGET_CMPB to TARGET_POWER6 (*)
  9b740bf... Change TARGET_FPRND to TARGET_POWER5X (*)
  d83a1cd... Change TARGET_POPCNTB to TARGET_POWER5 (*)
  eeab600... Do not allow -mvsx to boost processor to power7. (*)
  5825848... Use architecture flags for defining _ARCH_PWR macros. (*)
  7a80d6a... Add rs6000 architecture masks. (*)
  b04aa92... Add ChangeLog.meissner and REVISION. (*)
  fceecc5... aarch64: Fix ls64 intrinsic availability (*)
  4e1b617... aarch64: Fix memtag intrinsic availability (*)
  32afbb6... aarch64: Fix tme intrinsic availability (*)
  baf71ec... aarch64: Move check_required_extensions (*)
  a4b39dc... aarch64: Refactor check_required_extensions (*)
  8871489... Allow coarrays in select type. [PR46371, PR56496] (*)
  9cbcf8d... gnat: fix lto-type-mismatch between C_Version_String and gn (*)
  cc57224... aarch64: Reduce FP reassociation width for Neoverse V2 and  (*)
  6d8b9b7... testsuite: Prune warning about size of enums (*)
  e57d3cc... rtl: Enable the use of rtx values with int and mode attribu (*)
  71059d2... testsuite: Reduce cut-&-paste in scanltranstree.exp (*)
  661acde... Fix ICE in recompute_tree_invariant_for_addr_expr, at tree. (*)
  8d6c6fb... aarch64: Implement 16-byte vector mode const0 store by TImo (*)
  7f62e71... AVX10.2 ymm rounding: Support vsqrtp{s,d,h} and vsubp{s,d,h (*)
  1f86cf0... AVX10.2 ymm rounding: Support vscalefp{s,d,h} intrins (*)
  9afa508... AVX10.2 ymm rounding: Support vreducep{s,d,h} and vrndscale (*)
  90cc5b0... AVX10.2 ymm rounding: Support vmulp{s,d,h} and vrangep{s,d} (*)
  cc8a759... AVX10.2 ymm rounding: Support v{max,min}p{s,d,h} intrins (*)
  8d4f542... AVX10.2 ymm rounding: Support vgetexpp{s,d,h} and vgetmantp (*)
  0983d40... AVX10.2 ymm rounding: Support vfnmsub{132,231,213}p{s,d,h}  (*)
  6f0aa7a... AVX10.2 ymm rounding: Support vfmulcph and vfnmadd{132,231, (*)
  dd48acb... AVX10.2 ymm rounding: Support vfm{sub,subadd}{132,231,213}p (*)
  cfbc94e... AVX10.2 ymm rounding: Support vfmaddcph and vfmaddsub{132,2 (*)
  0683ca3... AVX10.2 ymm rounding: Support vfmadd{132,231,213}p{s,d,h} i (*)
  95980b2... AVX10.2 ymm rounding: Support vfc{madd,mul}cph, vfixupimmp{ (*)
  3d1b553... AVX10.2 ymm rounding: Support vcvt{,u}w2ph and vdivp{s,d,h} (*)
  b275422... AVX10.2 ymm rounding: Support vcvttps2{,u}{dq,qq} and vcvtu (*)
  493c509... AVX10.2 ymm rounding: Support vcvttph2{,u}{dq,qq,w} intrins (*)
  6e231f8... AVX10.2 ymm rounding: Support vcvtqq2p{s,d,h} and vcvttpd2{ (*)
  0f5a42d... AVX10.2 ymm rounding: Support vcvtps2{,u}{dq,qq} intrins (*)
  b70bb94... AVX10.2 ymm rounding: Support vcvtph2{,u}w and vcvtps2p{d,h (*)
  6f2eac5... AVX10.2 ymm rounding: Support vcvtph2p{s,d,sx} and vcvtph2{ (*)
  508ac49... AVX10.2 ymm rounding: Support vcvtpd2{,u}{dq,qq} intrins (*)
  85e874d... AVX10.2 ymm rounding: Support vcvtdq2p{s,h} and vcvtpd2p{s, (*)
  e22e3af... AVX10.2 ymm rounding: Support vadd{s,d,h} and vcmp{s,d,h} i (*)
  f11bc08... Daily bump. (*)
  f10d2ee... [PR rtl-optimization/115876] Avoid ubsan in ext-dce.cc (*)
  fc41263... libstdc++: Remove note from the GCC 4.0.1 days (*)
  b9ac01d... doc: Tweak gm2 mailing list address (*)
  cd2f394... PHIOPT: move factor_out_conditional_operation over to use g (*)
  1cfe4a4... libgfortran: implement fpu-macppc for Darwin, support IEEE  (*)
  1ed1dd5... AVR: Tweak 16-bit addition with const that didn't get a LD_ (*)
  22acd3c... AVR: ad target/116407 - Fix linker error "relocation trunca (*)
  dfb2e8c... AVR: target/116407 - Fix linker error "relocation truncated (*)
  3ae8794... forwprop: Also dce from added statements from gimple_simpli (*)
  a183b25... RISC-V: Implement the quad and oct .SAT_TRUNC for scalar (*)
  e8f31f4... RISC-V: Make sure high bits of usadd operands is clean for  (*)
  8d0efcf... RISC-V: Add testcases for unsigned scalar .SAT_TRUNC form 3 (*)
  6fbdbad... RISC-V: Add testcases for unsigned scalar .SAT_TRUNC form 2 (*)
  0555f65... Daily bump. (*)
  61e179b... [committed] Avoid right shifting signed value on ext-dce.cc (*)
  efcfd1d... t-rtems: add rv32imf architecture to the RTEMS multilib for (*)
  abfc140

[gcc(refs/users/meissner/heads/work176-vpair)] Add ChangeLog.vpair and update REVISION.

2024-08-19 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:9adc2e6b2815cefe418d4b0af4657795ddfe4df0

commit 9adc2e6b2815cefe418d4b0af4657795ddfe4df0
Author: Michael Meissner 
Date:   Fri Aug 16 20:02:38 2024 -0400

Add ChangeLog.vpair and update REVISION.

2024-08-16  Michael Meissner  

gcc/

* ChangeLog.vpair: New file for branch.
* REVISION: Update.

Diff:
---
 gcc/ChangeLog.vpair | 6 ++
 gcc/REVISION| 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog.vpair b/gcc/ChangeLog.vpair
new file mode 100644
index 000..2157c2b62b0
--- /dev/null
+++ b/gcc/ChangeLog.vpair
@@ -0,0 +1,6 @@
+ Branch work176-vpair, baseline 
+
+2024-08-16   Michael Meissner  
+
+   Clone branch
+
diff --git a/gcc/REVISION b/gcc/REVISION
index 9974885ad20..b858d38e1b1 100644
--- a/gcc/REVISION
+++ b/gcc/REVISION
@@ -1 +1 @@
-work176 branch
+work176-vpair branch


[gcc(refs/users/meissner/heads/work176-vpair)] Merge commit 'refs/users/meissner/heads/work176-vpair' of git+ssh://gcc.gnu.org/git/gcc into me/work

2024-08-19 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:b9f77deab8b4da60cf36312105b3edb066c5a93c

commit b9f77deab8b4da60cf36312105b3edb066c5a93c
Merge: 9adc2e6b281 217eac978ad
Author: Michael Meissner 
Date:   Mon Aug 19 13:19:53 2024 -0400

Merge commit 'refs/users/meissner/heads/work176-vpair' of 
git+ssh://gcc.gnu.org/git/gcc into me/work176-vpair

Diff:


[gcc r15-3026] c++: fix ICE in convert_nontype_argument [PR116384]

2024-08-19 Thread Marek Polacek via Gcc-cvs
https://gcc.gnu.org/g:8191f15022b0ea44fcb549449b0458d07ae02e0a

commit r15-3026-g8191f15022b0ea44fcb549449b0458d07ae02e0a
Author: Marek Polacek 
Date:   Thu Aug 15 11:53:10 2024 -0400

c++: fix ICE in convert_nontype_argument [PR116384]

Here we ICE since r14-8291 in C++11/C++14 modes.  Fortunately
this is an easy one.

The important bit of r14-8291 is this:

@@ -20056,9 +20071,12 @@ tsubst_expr (tree t, tree args, tsubst_flags_t 
complain, tree in_decl)
RETURN (retval);
  }
if (IMPLICIT_CONV_EXPR_NONTYPE_ARG (t))
- /* We'll pass this to convert_nontype_argument again, we don't need
-to actually perform any conversion here.  */
- RETURN (expr);
+ {
+   tree r = convert_nontype_argument (type, expr, complain);
+   if (r == NULL_TREE)
+ r = error_mark_node;
+   RETURN (r);
+ }

which obviously means that instead of returning right away we go
to convert_nontype_argument.  When type is error_mark_node and we're
in C++17, in convert_nontype_argument we go down this path:

  else if (INTEGRAL_OR_ENUMERATION_TYPE_P (type)
   || cxx_dialect >= cxx17)
{
  expr = build_converted_constant_expr (type, expr, complain);
  if (expr == error_mark_node)
return (complain & tf_error) ? NULL_TREE : error_mark_node;
  // ...
}

but pre-C++17, we take a different route and end up crashing on
gcc_unreachable.

It would of course also work to check for error_mark_node early in
build_converted_constant_expr.

PR c++/116384

gcc/cp/ChangeLog:

* pt.cc (tsubst_expr) : Bail if tsubst
returns error_mark_node.

gcc/testsuite/ChangeLog:

* g++.dg/cpp0x/vt-116384.C: New test.

Diff:
---
 gcc/cp/pt.cc   |  2 ++
 gcc/testsuite/g++.dg/cpp0x/vt-116384.C | 26 ++
 2 files changed, 28 insertions(+)

diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc
index 8725a5eeb3f..684ee0c8a60 100644
--- a/gcc/cp/pt.cc
+++ b/gcc/cp/pt.cc
@@ -20217,6 +20217,8 @@ tsubst_expr (tree t, tree args, tsubst_flags_t 
complain, tree in_decl)
 case IMPLICIT_CONV_EXPR:
   {
tree type = tsubst (TREE_TYPE (t), args, complain, in_decl);
+   if (type == error_mark_node)
+ RETURN (error_mark_node);
tree expr = RECUR (TREE_OPERAND (t, 0));
if (dependent_type_p (type) || type_dependent_expression_p (expr))
  {
diff --git a/gcc/testsuite/g++.dg/cpp0x/vt-116384.C 
b/gcc/testsuite/g++.dg/cpp0x/vt-116384.C
new file mode 100644
index 000..54d7f0774c5
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/vt-116384.C
@@ -0,0 +1,26 @@
+// PR c++/116384
+// { dg-do compile { target c++11 } }
+
+namespace a {
+template  struct c;
+template  struct d;
+}
+namespace e {
+namespace g {
+template  using h = void;
+template  class, typename...> struct detector {};
+template  class i, typename... args>
+struct detector>, i, args...>;
+}
+template  class i, typename... args>
+using j = g::detector;
+template  using l = typename a::c::m;
+template  struct conjunction;
+namespace g {
+template  using n = l>::p>;
+}
+template  = true> class o;
+}
+struct r;
+template  using q = e::o;
+void s() { e::j f; }


[gcc r15-3027] c++: ICE with enum and conversion fn in template [PR115657]

2024-08-19 Thread Marek Polacek via Gcc-cvs
https://gcc.gnu.org/g:53283c3231a7b94e728619cccbf21170fb36b2a8

commit r15-3027-g53283c3231a7b94e728619cccbf21170fb36b2a8
Author: Marek Polacek 
Date:   Thu Aug 15 18:47:29 2024 -0400

c++: ICE with enum and conversion fn in template [PR115657]

Here we initialize an enumerator with a class prvalue with a conversion
function.  When we fold it in build_enumerator, we create a TARGET_EXPR
for the object, and subsequently crash in tsubst_expr, which should not
see such a code.

Normally, we fix similar problems by using an IMPLICIT_CONV_EXPR but here
I may get away with not using the result of fold_non_dependent_expr unless
the result is a constant.  A TARGET_EXPR is not constant.

PR c++/115657

gcc/cp/ChangeLog:

* decl.cc (build_enumerator): Call maybe_fold_non_dependent_expr
instead of fold_non_dependent_expr.

gcc/testsuite/ChangeLog:

* g++.dg/cpp1y/constexpr-recursion2.C: New test.
* g++.dg/template/conv21.C: New test.

Diff:
---
 gcc/cp/decl.cc| 10 --
 gcc/testsuite/g++.dg/cpp1y/constexpr-recursion2.C | 22 ++
 gcc/testsuite/g++.dg/template/conv21.C| 14 ++
 3 files changed, 44 insertions(+), 2 deletions(-)

diff --git a/gcc/cp/decl.cc b/gcc/cp/decl.cc
index f23b635aec9..12139e1d862 100644
--- a/gcc/cp/decl.cc
+++ b/gcc/cp/decl.cc
@@ -17387,9 +17387,15 @@ build_enumerator (tree name, tree value, tree 
enumtype, tree attributes,
   tree type;
 
   /* scalar_constant_value will pull out this expression, so make sure
- it's folded as appropriate.  */
+ it's folded as appropriate.
+
+ Creating a TARGET_EXPR in a template breaks when substituting, and
+ here we would create it for instance when using a class prvalue with
+ a user-defined conversion function.  So don't use such a tree.  We
+ instantiate VALUE here to get errors about bad enumerators even in
+ a template that does not get instantiated.  */
   if (processing_template_decl)
-value = fold_non_dependent_expr (value);
+value = maybe_fold_non_dependent_expr (value);
 
   /* If the VALUE was erroneous, pretend it wasn't there; that will
  result in the enum being assigned the next value in sequence.  */
diff --git a/gcc/testsuite/g++.dg/cpp1y/constexpr-recursion2.C 
b/gcc/testsuite/g++.dg/cpp1y/constexpr-recursion2.C
new file mode 100644
index 000..f268f52e2b5
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp1y/constexpr-recursion2.C
@@ -0,0 +1,22 @@
+// PR c++/115657
+// { dg-do compile { target c++14 } }
+// { dg-options "-Wall" }
+
+// Like constexpr-recursion1.C but use a class with a conversion function.
+
+struct X {
+  constexpr operator int() { return 0; }
+};
+
+template 
+constexpr X f1 ()
+{
+  enum E { a = f1<0> () }; // { dg-error "called in a constant expression 
before its definition is complete|is not an integer constant" }
+  return {};
+}
+
+constexpr X f3 ()
+{
+  enum E { a = f3 () };// { dg-error "called in a constant expression 
before its definition is complete|is not an integer constant" }
+  return {};
+}
diff --git a/gcc/testsuite/g++.dg/template/conv21.C 
b/gcc/testsuite/g++.dg/template/conv21.C
new file mode 100644
index 000..1dc7b3d50d9
--- /dev/null
+++ b/gcc/testsuite/g++.dg/template/conv21.C
@@ -0,0 +1,14 @@
+// PR c++/115657
+// { dg-do compile { target c++11 } }
+
+struct NonIntegral
+{
+constexpr operator int() { return 0; }
+};
+
+template struct TemplatedStructural
+{
+enum { e = NonIntegral{} };
+};
+
+template struct TemplatedStructural;


[gcc(refs/users/meissner/heads/work176)] Use vector pair load/store for memcpy with -mcpu=future

2024-08-19 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:0d4fd03f8cbe6eb44f17361cd4d1b8f889a89393

commit 0d4fd03f8cbe6eb44f17361cd4d1b8f889a89393
Author: Michael Meissner 
Date:   Mon Aug 19 13:22:38 2024 -0400

Use vector pair load/store for memcpy with -mcpu=future

In the development for the power10 processor, GCC did not enable using the 
load
vector pair and store vector pair instructions when optimizing things like
memory copy.  This patch enables using those instructions if -mcpu=future is
used.

2024-08-19  Michael Meissner  

gcc/

* config/rs6000/rs6000-cpus.def (ISA_FUTURE_MASKS_SERVER): Enable 
using
load vector pair and store vector pair instructions for memory copy
operations.
(POWERPC_MASKS): Make the bit for enabling using load vector pair 
and
store vector pair operations set and reset when the PowerPC 
processor is
changed.

Diff:
---
 gcc/config/rs6000/rs6000-cpus.def | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/gcc/config/rs6000/rs6000-cpus.def 
b/gcc/config/rs6000/rs6000-cpus.def
index e73d9ef51f8..74151be4048 100644
--- a/gcc/config/rs6000/rs6000-cpus.def
+++ b/gcc/config/rs6000/rs6000-cpus.def
@@ -86,7 +86,8 @@
 
 #define POWER11_MASKS_SERVER   ISA_3_1_MASKS_SERVER
 
-#define FUTURE_MASKS_SERVERPOWER11_MASKS_SERVER
+#define FUTURE_MASKS_SERVER(POWER11_MASKS_SERVER   \
+| OPTION_MASK_BLOCK_OPS_VECTOR_PAIR)
 
 /* Flags that need to be turned off if -mno-vsx.  */
 #define OTHER_VSX_VECTOR_MASKS (OPTION_MASK_EFFICIENT_UNALIGNED_VSX\
@@ -116,6 +117,7 @@
 
 /* Mask of all options to set the default isa flags based on -mcpu=.  */
 #define POWERPC_MASKS  (OPTION_MASK_ALTIVEC\
+| OPTION_MASK_BLOCK_OPS_VECTOR_PAIR\
 | OPTION_MASK_CMPB \
 | OPTION_MASK_CRYPTO   \
 | OPTION_MASK_DFP  \


[gcc(refs/users/meissner/heads/work176)] RFC2653-Add support for dense math registers.

2024-08-19 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:8e9996449f3217a6255968bfae32cced7feaba44

commit 8e9996449f3217a6255968bfae32cced7feaba44
Author: Michael Meissner 
Date:   Mon Aug 19 13:25:14 2024 -0400

RFC2653-Add support for dense math registers.

The MMA subsystem added the notion of accumulator registers as an optional
feature of ISA 3.1 (power10).  In ISA 3.1, these accumulators overlapped 
with
the VSX registers 0..31, but logically the accumulator registers were 
separate
from the FPR registers.  In ISA 3.1, it was anticipated that in future 
systems,
the accumulator registers may no overlap with the FPR registers.  This patch
adds the support for dense math registers as separate registers.

This particular patch does not change the MMA support to use the 
accumulators
within the dense math registers.  This patch just adds the basic support for
having separate DMRs.  The next patch will switch the MMA support to use the
accumulators if -mcpu=future is used.

For testing purposes, I added an undocumented option '-mdense-math' to 
enable
or disable the dense math support.

This patch adds a new constraint (wD).  If MMA is selected but dense math is
not selected (i.e. -mcpu=power10), the wD constraint will allow access to
accumulators that overlap with VSX registers 0..31.  If both MMA and dense 
math
are selected (i.e. -mcpu=future), the wD constraint will only allow dense 
math
registers.

This patch modifies the existing %A output modifier.  If MMA is selected but
dense math is not selected, then %A output modifier converts the VSX 
register
number to the accumulator number, by dividing it by 4.  If both MMA and 
dense
math are selected, then %A will map the separate DMR registers into 0..7.

The intention is that user code using extended asm can be modified to run on
both MMA without dense math and MMA with dense math:

1)  If possible, don't use extended asm, but instead use the MMA 
built-in
functions;

2)  If you do need to write extended asm, change the d constraints
targetting accumulators should now use wD;

3)  Only use the built-in zero, assemble and disassemble functions 
create
move data between vector quad types and dense math accumulators.
I.e. do not use the xxmfacc, xxmtacc, and xxsetaccz directly in the
extended asm code.  The reason is these instructions assume there 
is a
1-to-1 correspondence between 4 adjacent FPR registers and an
accumulator that overlaps with those instructions.  With 
accumulators
now being separate registers, there no longer is a 1-to-1
correspondence.

It is possible that the mangling for DMRs and the GDB register numbers may
produce other changes in the future.

2024-08-19   Michael Meissner  

* config/rs6000/mma.md (UNSPEC_MMA_DMSETDMRZ): New unspec.
(movxo): Add comments about dense math registers.
(movxo_nodm): Rename from movxo and restrict the usage to machines
without dense math registers.
(movxo_dm): New insn for movxo support for machines with dense math
registers.
(mma_): Restrict usage to machines without dense math 
registers.
(mma_xxsetaccz): Add a define_expand wrapper, and add support for 
dense
math registers.
(mma_dmsetaccz): New insn.
* config/rs6000/predicates.md (dmr_operand): New predicate.
(accumulator_operand): Add support for dense math registers.
* config/rs6000/rs6000-builtin.cc (rs6000_gimple_fold_mma_builtin): 
Do
not issue a de-prime instruction when disassembling a vector quad 
on a
system with dense math registers.
* config/rs6000/rs6000-c.cc (rs6000_define_or_undefine_macro): 
Define
__DENSE_MATH__ if we have dense math registers.
* config/rs6000/rs6000.cc (enum rs6000_reg_type): Add DMR_REG_TYPE.
(enum rs6000_reload_reg_type): Add RELOAD_REG_DMR.
(LAST_RELOAD_REG_CLASS): Add support for DMR registers and the wD
constraint.
(reload_reg_map): Likewise.
(rs6000_reg_names): Likewise.
(alt_reg_names): Likewise.
(rs6000_hard_regno_nregs_internal): Likewise.
(rs6000_hard_regno_mode_ok_uncached): Likewise.
(rs6000_debug_reg_global): Likewise.
(rs6000_setup_reg_addr_masks): Likewise.
(rs6000_init_hard_regno_mode_ok): Likewise.
(rs6000_secondary_reload_memory): Add support for DMR registers.
(rs6000_secondary_reload_simple_move): Likewise.
(rs6000_preferred_reload_class): Likewise.
(rs6000_secondary_reload_class): Likewise.
(print_operand): Make %A handle both FPRs and DMRs.
 

[gcc(refs/users/meissner/heads/work176)] RFC2653-PowerPC: Switch to dense math names for all MMA operations.

2024-08-19 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:b994c1d199b858a92d56739893898f3dd2b6736c

commit b994c1d199b858a92d56739893898f3dd2b6736c
Author: Michael Meissner 
Date:   Mon Aug 19 13:26:15 2024 -0400

RFC2653-PowerPC: Switch to dense math names for all MMA operations.

This patch changes the assembler instruction names for MMA instructions from
the original name used in power10 to the new name when used with the dense 
math
system.  I.e. xvf64gerpp becomes dmxvf64gerpp.  The assembler will emit the
same bits for either spelling.

For the non-prefixed MMA instructions, we add a 'dm' prefix in front of the
instruction.  However, the prefixed instructions have a 'pm' prefix, and we 
add
the 'dm' prefix afterwards.  To prevent having two sets of parallel int
attributes, we remove the "pm" prefix from the instruction string in the
attributes, and add it later, both in the insn name and in the output 
template.

2024-08-19   Michael Meissner  

gcc/

* config/rs6000/mma.md (vvi4i4i8): Change the instruction to not 
have a
"pm" prefix.
(avvi4i4i8): Likewise.
(vvi4i4i2): Likewise.
(avvi4i4i2): Likewise.
(vvi4i4): Likewise.
(avvi4i4): Likewise.
(pvi4i2): Likewise.
(apvi4i2): Likewise.
(vvi4i4i4): Likewise.
(avvi4i4i4): Likewise.
(mma_): Add support for running on DMF systems, generating the 
dense
math instruction and using the dense math accumulators.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_pm): Add support for running on DMF systems, 
generating
the dense math instruction and using the dense math accumulators.
Rename the insn with a 'pm' prefix and add either 'pm' or 'pmdm'
prefixes based on whether we have the original MMA specification or 
if
we have dense math support.
(mma_pm): Likewise.
(mma_pm): Likewise.
(mma_pm): Likewise.
(mma_pm): Likewise.
(mma_pm): Likewise.
(mma_pm): Likewise.
(mma_pm): Likewise.

Diff:
---
 gcc/config/rs6000/mma.md | 157 +++
 1 file changed, 104 insertions(+), 53 deletions(-)

diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md
index ae6e7e9695b..2e04eb653fa 100644
--- a/gcc/config/rs6000/mma.md
+++ b/gcc/config/rs6000/mma.md
@@ -225,44 +225,47 @@
 (UNSPEC_MMA_XVF64GERNP "xvf64gernp")
 (UNSPEC_MMA_XVF64GERNN "xvf64gernn")])
 
-(define_int_attr vvi4i4i8  [(UNSPEC_MMA_PMXVI4GER8 "pmxvi4ger8")])
+;; The "pm" prefix is not in these expansions, so that we can generate
+;; pmdmxvi4ger8 on systems with dense math registers and xvi4ger8 on systems
+;; without dense math registers.
+(define_int_attr vvi4i4i8  [(UNSPEC_MMA_PMXVI4GER8 "xvi4ger8")])
 
-(define_int_attr avvi4i4i8 [(UNSPEC_MMA_PMXVI4GER8PP   
"pmxvi4ger8pp")])
+(define_int_attr avvi4i4i8 [(UNSPEC_MMA_PMXVI4GER8PP   "xvi4ger8pp")])
 
-(define_int_attr vvi4i4i2  [(UNSPEC_MMA_PMXVI16GER2"pmxvi16ger2")
-(UNSPEC_MMA_PMXVI16GER2S   "pmxvi16ger2s")
-(UNSPEC_MMA_PMXVF16GER2"pmxvf16ger2")
-(UNSPEC_MMA_PMXVBF16GER2   
"pmxvbf16ger2")])
+(define_int_attr vvi4i4i2  [(UNSPEC_MMA_PMXVI16GER2"xvi16ger2")
+(UNSPEC_MMA_PMXVI16GER2S   "xvi16ger2s")
+(UNSPEC_MMA_PMXVF16GER2"xvf16ger2")
+(UNSPEC_MMA_PMXVBF16GER2   "xvbf16ger2")])
 
-(define_int_attr avvi4i4i2 [(UNSPEC_MMA_PMXVI16GER2PP  "pmxvi16ger2pp")
-(UNSPEC_MMA_PMXVI16GER2SPP 
"pmxvi16ger2spp")
-(UNSPEC_MMA_PMXVF16GER2PP  "pmxvf16ger2pp")
-(UNSPEC_MMA_PMXVF16GER2PN  "pmxvf16ger2pn")
-(UNSPEC_MMA_PMXVF16GER2NP  "pmxvf16ger2np")
-(UNSPEC_MMA_PMXVF16GER2NN  "pmxvf16ger2nn")
-(UNSPEC_MMA_PMXVBF16GER2PP 
"pmxvbf16ger2pp")
-(UNSPEC_MMA_PMXVBF16GER2PN 
"pmxvbf16ger2pn")
-(UNSPEC_MMA_PMXVBF16GER2NP 
"pmxvbf16ger2np")
-(UNSPEC_MMA_PMXVBF16GER2NN 
"pmxvbf16ger2nn")])
+(define_int_attr avvi4i4i2 [(UNSPEC_MMA_PMXVI16GER2PP  "xvi16ger2pp")
+(UNSPEC_MMA_PMXVI16GER2SPP "xvi16ger2spp")
+(UNSPEC_MMA_PMXVF16GER2PP  "xvf16ger2pp")
+(UNSPEC_MMA_PMXVF16GER2PN  "xvf16

[gcc(refs/users/meissner/heads/work176)] RFC2653-Add dense math test for new instruction names.

2024-08-19 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:5337baaab6eac19696df611a23611d07ca705fd8

commit 5337baaab6eac19696df611a23611d07ca705fd8
Author: Michael Meissner 
Date:   Mon Aug 19 13:27:50 2024 -0400

RFC2653-Add dense math test for new instruction names.

2024-08-19   Michael Meissner  

gcc/testsuite/

* gcc.target/powerpc/dm-double-test.c: New test.
* lib/target-supports.exp (check_effective_target_ppc_dmr_ok): New
target test.

Diff:
---
 gcc/testsuite/gcc.target/powerpc/dm-double-test.c | 194 ++
 gcc/testsuite/lib/target-supports.exp |  23 +++
 2 files changed, 217 insertions(+)

diff --git a/gcc/testsuite/gcc.target/powerpc/dm-double-test.c 
b/gcc/testsuite/gcc.target/powerpc/dm-double-test.c
new file mode 100644
index 000..66c19779585
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/dm-double-test.c
@@ -0,0 +1,194 @@
+/* Test derived from mma-double-1.c, modified for dense math.  */
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_dense_math_ok } */
+/* { dg-options "-mdejagnu-cpu=future -O2" } */
+
+#include 
+#include 
+#include 
+
+typedef unsigned char vec_t __attribute__ ((vector_size (16)));
+typedef double v4sf_t __attribute__ ((vector_size (16)));
+#define SAVE_ACC(ACC, ldc, J)  \
+ __builtin_mma_disassemble_acc (result, ACC); \
+ rowC = (v4sf_t *) &CO[0*ldc+J]; \
+  rowC[0] += result[0]; \
+  rowC = (v4sf_t *) &CO[1*ldc+J]; \
+  rowC[0] += result[1]; \
+  rowC = (v4sf_t *) &CO[2*ldc+J]; \
+  rowC[0] += result[2]; \
+  rowC = (v4sf_t *) &CO[3*ldc+J]; \
+ rowC[0] += result[3];
+
+void
+DM (int m, int n, int k, double *A, double *B, double *C)
+{
+  __vector_quad acc0, acc1, acc2, acc3, acc4, acc5, acc6, acc7;
+  v4sf_t result[4];
+  v4sf_t *rowC;
+  for (int l = 0; l < n; l += 4)
+{
+  double *CO;
+  double *AO;
+  AO = A;
+  CO = C;
+  C += m * 4;
+  for (int j = 0; j < m; j += 16)
+   {
+ double *BO = B;
+ __builtin_mma_xxsetaccz (&acc0);
+ __builtin_mma_xxsetaccz (&acc1);
+ __builtin_mma_xxsetaccz (&acc2);
+ __builtin_mma_xxsetaccz (&acc3);
+ __builtin_mma_xxsetaccz (&acc4);
+ __builtin_mma_xxsetaccz (&acc5);
+ __builtin_mma_xxsetaccz (&acc6);
+ __builtin_mma_xxsetaccz (&acc7);
+ unsigned long i;
+
+ for (i = 0; i < k; i++)
+   {
+ vec_t *rowA = (vec_t *) & AO[i * 16];
+ __vector_pair rowB;
+ vec_t *rb = (vec_t *) & BO[i * 4];
+ __builtin_mma_assemble_pair (&rowB, rb[1], rb[0]);
+ __builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]);
+ __builtin_mma_xvf64gerpp (&acc1, rowB, rowA[1]);
+ __builtin_mma_xvf64gerpp (&acc2, rowB, rowA[2]);
+ __builtin_mma_xvf64gerpp (&acc3, rowB, rowA[3]);
+ __builtin_mma_xvf64gerpp (&acc4, rowB, rowA[4]);
+ __builtin_mma_xvf64gerpp (&acc5, rowB, rowA[5]);
+ __builtin_mma_xvf64gerpp (&acc6, rowB, rowA[6]);
+ __builtin_mma_xvf64gerpp (&acc7, rowB, rowA[7]);
+   }
+ SAVE_ACC (&acc0, m, 0);
+ SAVE_ACC (&acc2, m, 4);
+ SAVE_ACC (&acc1, m, 2);
+ SAVE_ACC (&acc3, m, 6);
+ SAVE_ACC (&acc4, m, 8);
+ SAVE_ACC (&acc6, m, 12);
+ SAVE_ACC (&acc5, m, 10);
+ SAVE_ACC (&acc7, m, 14);
+ AO += k * 16;
+ BO += k * 4;
+ CO += 16;
+   }
+  B += k * 4;
+}
+}
+
+void
+init (double *matrix, int row, int column)
+{
+  for (int j = 0; j < column; j++)
+{
+  for (int i = 0; i < row; i++)
+   {
+ matrix[j * row + i] = (i * 16 + 2 + j) / 0.123;
+   }
+}
+}
+
+void
+init0 (double *matrix, double *matrix1, int row, int column)
+{
+  for (int j = 0; j < column; j++)
+for (int i = 0; i < row; i++)
+  matrix[j * row + i] = matrix1[j * row + i] = 0;
+}
+
+
+void
+print (const char *name, const double *matrix, int row, int column)
+{
+  printf ("Matrix %s has %d rows and %d columns:\n", name, row, column);
+  for (int i = 0; i < row; i++)
+{
+  for (int j = 0; j < column; j++)
+   {
+ printf ("%f ", matrix[j * row + i]);
+   }
+  printf ("\n");
+}
+  printf ("\n");
+}
+
+int
+main (int argc, char *argv[])
+{
+  int rowsA, colsB, common;
+  int i, j, k;
+  int ret = 0;
+
+  for (int t = 16; t <= 128; t += 16)
+{
+  for (int t1 = 4; t1 <= 16; t1 += 4)
+   {
+ rowsA = t;
+ colsB = t1;
+ common = 1;
+ /* printf ("Running test for rows = %d,cols = %d\n", t, t1); */
+ double A[rowsA * common];
+ double B[common * colsB];
+ double C[rowsA * colsB];
+ double D[rowsA * colsB];
+
+
+ init (A, rowsA, common);
+ init (B, common, colsB);
+ init0 (C, D, rowsA, colsB);
+ DM (rowsA, colsB, common, A, B, 

[gcc(refs/users/meissner/heads/work176)] RFC2653-PowerPC: Add support for 1, 024 bit DMR registers.

2024-08-19 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:6cc817c10c29be4958d76da3eda063f7757c71cc

commit 6cc817c10c29be4958d76da3eda063f7757c71cc
Author: Michael Meissner 
Date:   Mon Aug 19 13:30:17 2024 -0400

RFC2653-PowerPC: Add support for 1,024 bit DMR registers.

This patch is a prelimianry patch to add the full 1,024 bit dense math 
register
(DMRs) for -mcpu=future.  The MMA 512-bit accumulators map onto the top of 
the
DMR register.

This patch only adds the new 1,024 bit register support.  It does not add
support for any instructions that need 1,024 bit registers instead of 512 
bit
registers.

I used the new mode 'TDOmode' to be the opaque mode used for 1,024 bit
registers.  The 'wD' constraint added in previous patches is used for these
registers.  I added support to do load and store of DMRs via the VSX 
registers,
since there are no load/store dense math instructions.  I added the new 
keyword
'__dmr' to create 1,024 bit types that can be loaded into DMRs.  At 
present, I
don't have aliases for __dmr512 and __dmr1024 that we've discussed 
internally.

The patches have been tested on both little and big endian systems.  Can I 
check
it into the master branch?

2024-08-19   Michael Meissner  

gcc/

* config/rs6000/mma.md (UNSPEC_DM_INSERT512_UPPER): New unspec.
(UNSPEC_DM_INSERT512_LOWER): Likewise.
(UNSPEC_DM_EXTRACT512): Likewise.
(UNSPEC_DMR_RELOAD_FROM_MEMORY): Likewise.
(UNSPEC_DMR_RELOAD_TO_MEMORY): Likewise.
(movtdo): New define_expand and define_insn_and_split to implement 
1,024
bit DMR registers.
(movtdo_insert512_upper): New insn.
(movtdo_insert512_lower): Likewise.
(movtdo_extract512): Likewise.
(reload_dmr_from_memory): Likewise.
(reload_dmr_to_memory): Likewise.
* config/rs6000/rs6000-builtin.cc (rs6000_type_string): Add DMR
support.
(rs6000_init_builtins): Add support for __dmr keyword.
* config/rs6000/rs6000-call.cc (rs6000_return_in_memory): Add 
support
for TDOmode.
(rs6000_function_arg): Likewise.
* config/rs6000/rs6000-modes.def (TDOmode): New mode.
* config/rs6000/rs6000.cc (rs6000_hard_regno_nregs_internal): Add
support for TDOmode.
(rs6000_hard_regno_mode_ok_uncached): Likewise.
(rs6000_hard_regno_mode_ok): Likewise.
(rs6000_modes_tieable_p): Likewise.
(rs6000_debug_reg_global): Likewise.
(rs6000_setup_reg_addr_masks): Likewise.
(rs6000_init_hard_regno_mode_ok): Add support for TDOmode.  Setup 
reload
hooks for DMR mode.
(reg_offset_addressing_ok_p): Add support for TDOmode.
(rs6000_emit_move): Likewise.
(rs6000_secondary_reload_simple_move): Likewise.
(rs6000_preferred_reload_class): Likewise.
(rs6000_secondary_reload_class): Likewise.
(rs6000_mangle_type): Add mangling for __dmr type.
(rs6000_dmr_register_move_cost): Add support for TDOmode.
(rs6000_split_multireg_move): Likewise.
(rs6000_invalid_conversion): Likewise.
* config/rs6000/rs6000.h (VECTOR_ALIGNMENT_P): Add TDOmode.
(enum rs6000_builtin_type_index): Add DMR type nodes.
(dmr_type_node): Likewise.
(ptr_dmr_type_node): Likewise.

gcc/testsuite/

* gcc.target/powerpc/dm-1024bit.c: New test.

Diff:
---
 gcc/config/rs6000/mma.md  | 154 ++
 gcc/config/rs6000/rs6000-builtin.cc   |  17 +++
 gcc/config/rs6000/rs6000-call.cc  |  10 +-
 gcc/config/rs6000/rs6000-modes.def|   4 +
 gcc/config/rs6000/rs6000.cc   | 101 -
 gcc/config/rs6000/rs6000.h|   6 +-
 gcc/testsuite/gcc.target/powerpc/dm-1024bit.c |  63 +++
 7 files changed, 321 insertions(+), 34 deletions(-)

diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md
index 2e04eb653fa..8461499e1c3 100644
--- a/gcc/config/rs6000/mma.md
+++ b/gcc/config/rs6000/mma.md
@@ -92,6 +92,11 @@
UNSPEC_MMA_XXMFACC
UNSPEC_MMA_XXMTACC
UNSPEC_MMA_DMSETDMRZ
+   UNSPEC_DM_INSERT512_UPPER
+   UNSPEC_DM_INSERT512_LOWER
+   UNSPEC_DM_EXTRACT512
+   UNSPEC_DMR_RELOAD_FROM_MEMORY
+   UNSPEC_DMR_RELOAD_TO_MEMORY
   ])
 
 (define_c_enum "unspecv"
@@ -793,3 +798,152 @@
 }
   [(set_attr "type" "mma")
(set_attr "prefixed" "yes")])
+
+;; TDOmode (__dmr keyword for 1,024 bit registers).
+(define_expand "movtdo"
+  [(set (match_operand:TDO 0 "nonimmediate_operand")
+   (match_operand:TDO 1 "input_operand"))]
+  "TARGET_MMA_DENSE_MATH"
+{
+  rs6000_emit_move (operands[0], operands[1], TDOmode);
+  DONE;
+})
+
+(define_insn_and_split "*movtdo"
+  [(set (match_operand:TDO 0

[gcc(refs/users/meissner/heads/work176)] Revert changes

2024-08-19 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:fa18267733f303996608ccc9940d21663a929d2d

commit fa18267733f303996608ccc9940d21663a929d2d
Author: Michael Meissner 
Date:   Mon Aug 19 13:34:29 2024 -0400

Revert changes

Diff:
---
 gcc/config/rs6000/constraints.md  |   3 -
 gcc/config/rs6000/mma.md  | 427 +-
 gcc/config/rs6000/predicates.md   |  32 --
 gcc/config/rs6000/rs6000-builtin.cc   |  22 +-
 gcc/config/rs6000/rs6000-c.cc |   9 +-
 gcc/config/rs6000/rs6000-call.cc  |  10 +-
 gcc/config/rs6000/rs6000-cpus.def |   4 +-
 gcc/config/rs6000/rs6000-modes.def|   4 -
 gcc/config/rs6000/rs6000.cc   | 318 
 gcc/config/rs6000/rs6000.h|  50 +--
 gcc/config/rs6000/rs6000.md   |   2 -
 gcc/doc/md.texi   |   5 -
 gcc/testsuite/gcc.target/powerpc/dm-1024bit.c |  63 
 gcc/testsuite/gcc.target/powerpc/dm-double-test.c | 194 --
 gcc/testsuite/lib/target-supports.exp |  23 --
 15 files changed, 166 insertions(+), 1000 deletions(-)

diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md
index 277a30a8245..369a7b75042 100644
--- a/gcc/config/rs6000/constraints.md
+++ b/gcc/config/rs6000/constraints.md
@@ -107,9 +107,6 @@
(match_test "TARGET_P8_VECTOR")
(match_operand 0 "s5bit_cint_operand")))
 
-(define_register_constraint "wD" "rs6000_constraints[RS6000_CONSTRAINT_wD]"
-  "Accumulator register.")
-
 (define_constraint "wE"
   "@internal Vector constant that can be loaded with the XXSPLTIB instruction."
   (match_test "xxspltib_constant_nosplit (op, mode)"))
diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md
index 8461499e1c3..04e2d0066df 100644
--- a/gcc/config/rs6000/mma.md
+++ b/gcc/config/rs6000/mma.md
@@ -91,12 +91,6 @@
UNSPEC_MMA_XVI8GER4SPP
UNSPEC_MMA_XXMFACC
UNSPEC_MMA_XXMTACC
-   UNSPEC_MMA_DMSETDMRZ
-   UNSPEC_DM_INSERT512_UPPER
-   UNSPEC_DM_INSERT512_LOWER
-   UNSPEC_DM_EXTRACT512
-   UNSPEC_DMR_RELOAD_FROM_MEMORY
-   UNSPEC_DMR_RELOAD_TO_MEMORY
   ])
 
 (define_c_enum "unspecv"
@@ -230,47 +224,44 @@
 (UNSPEC_MMA_XVF64GERNP "xvf64gernp")
 (UNSPEC_MMA_XVF64GERNN "xvf64gernn")])
 
-;; The "pm" prefix is not in these expansions, so that we can generate
-;; pmdmxvi4ger8 on systems with dense math registers and xvi4ger8 on systems
-;; without dense math registers.
-(define_int_attr vvi4i4i8  [(UNSPEC_MMA_PMXVI4GER8 "xvi4ger8")])
+(define_int_attr vvi4i4i8  [(UNSPEC_MMA_PMXVI4GER8 "pmxvi4ger8")])
 
-(define_int_attr avvi4i4i8 [(UNSPEC_MMA_PMXVI4GER8PP   "xvi4ger8pp")])
+(define_int_attr avvi4i4i8 [(UNSPEC_MMA_PMXVI4GER8PP   
"pmxvi4ger8pp")])
 
-(define_int_attr vvi4i4i2  [(UNSPEC_MMA_PMXVI16GER2"xvi16ger2")
-(UNSPEC_MMA_PMXVI16GER2S   "xvi16ger2s")
-(UNSPEC_MMA_PMXVF16GER2"xvf16ger2")
-(UNSPEC_MMA_PMXVBF16GER2   "xvbf16ger2")])
+(define_int_attr vvi4i4i2  [(UNSPEC_MMA_PMXVI16GER2"pmxvi16ger2")
+(UNSPEC_MMA_PMXVI16GER2S   "pmxvi16ger2s")
+(UNSPEC_MMA_PMXVF16GER2"pmxvf16ger2")
+(UNSPEC_MMA_PMXVBF16GER2   
"pmxvbf16ger2")])
 
-(define_int_attr avvi4i4i2 [(UNSPEC_MMA_PMXVI16GER2PP  "xvi16ger2pp")
-(UNSPEC_MMA_PMXVI16GER2SPP "xvi16ger2spp")
-(UNSPEC_MMA_PMXVF16GER2PP  "xvf16ger2pp")
-(UNSPEC_MMA_PMXVF16GER2PN  "xvf16ger2pn")
-(UNSPEC_MMA_PMXVF16GER2NP  "xvf16ger2np")
-(UNSPEC_MMA_PMXVF16GER2NN  "xvf16ger2nn")
-(UNSPEC_MMA_PMXVBF16GER2PP "xvbf16ger2pp")
-(UNSPEC_MMA_PMXVBF16GER2PN "xvbf16ger2pn")
-(UNSPEC_MMA_PMXVBF16GER2NP "xvbf16ger2np")
-(UNSPEC_MMA_PMXVBF16GER2NN 
"xvbf16ger2nn")])
+(define_int_attr avvi4i4i2 [(UNSPEC_MMA_PMXVI16GER2PP  "pmxvi16ger2pp")
+(UNSPEC_MMA_PMXVI16GER2SPP 
"pmxvi16ger2spp")
+(UNSPEC_MMA_PMXVF16GER2PP  "pmxvf16ger2pp")
+(UNSPEC_MMA_PMXVF16GER2PN  "pmxvf16ger2pn")
+(UNSPEC_MMA_PMXVF16GER2NP  "pmxvf16ger2np")
+(UNSPEC_MMA_PMXVF16GER2NN  "pmxvf16ger2nn")
+(UNSPEC_MMA_PMXVBF16GER2PP 
"pmxvbf16ger2pp")
+(UNSPEC_MMA_PMXVBF16G

[gcc r14-10603] c++: fix ICE in convert_nontype_argument [PR116384]

2024-08-19 Thread Marek Polacek via Gcc-cvs
https://gcc.gnu.org/g:52da8588fd06198edcda81d7acf83ec92ccb63ef

commit r14-10603-g52da8588fd06198edcda81d7acf83ec92ccb63ef
Author: Marek Polacek 
Date:   Thu Aug 15 11:53:10 2024 -0400

c++: fix ICE in convert_nontype_argument [PR116384]

Here we ICE since r14-8291 in C++11/C++14 modes.  Fortunately
this is an easy one.

The important bit of r14-8291 is this:

@@ -20056,9 +20071,12 @@ tsubst_expr (tree t, tree args, tsubst_flags_t 
complain, tree in_decl)
RETURN (retval);
  }
if (IMPLICIT_CONV_EXPR_NONTYPE_ARG (t))
- /* We'll pass this to convert_nontype_argument again, we don't need
-to actually perform any conversion here.  */
- RETURN (expr);
+ {
+   tree r = convert_nontype_argument (type, expr, complain);
+   if (r == NULL_TREE)
+ r = error_mark_node;
+   RETURN (r);
+ }

which obviously means that instead of returning right away we go
to convert_nontype_argument.  When type is error_mark_node and we're
in C++17, in convert_nontype_argument we go down this path:

  else if (INTEGRAL_OR_ENUMERATION_TYPE_P (type)
   || cxx_dialect >= cxx17)
{
  expr = build_converted_constant_expr (type, expr, complain);
  if (expr == error_mark_node)
return (complain & tf_error) ? NULL_TREE : error_mark_node;
  // ...
}

but pre-C++17, we take a different route and end up crashing on
gcc_unreachable.

It would of course also work to check for error_mark_node early in
build_converted_constant_expr.

PR c++/116384

gcc/cp/ChangeLog:

* pt.cc (tsubst_expr) : Bail if tsubst
returns error_mark_node.

gcc/testsuite/ChangeLog:

* g++.dg/cpp0x/vt-116384.C: New test.

(cherry picked from commit 8191f15022b0ea44fcb549449b0458d07ae02e0a)

Diff:
---
 gcc/cp/pt.cc   |  2 ++
 gcc/testsuite/g++.dg/cpp0x/vt-116384.C | 26 ++
 2 files changed, 28 insertions(+)

diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc
index 96a4f45909d..8e6a7d1a64a 100644
--- a/gcc/cp/pt.cc
+++ b/gcc/cp/pt.cc
@@ -20220,6 +20220,8 @@ tsubst_expr (tree t, tree args, tsubst_flags_t 
complain, tree in_decl)
 case IMPLICIT_CONV_EXPR:
   {
tree type = tsubst (TREE_TYPE (t), args, complain, in_decl);
+   if (type == error_mark_node)
+ RETURN (error_mark_node);
tree expr = RECUR (TREE_OPERAND (t, 0));
if (dependent_type_p (type) || type_dependent_expression_p (expr))
  {
diff --git a/gcc/testsuite/g++.dg/cpp0x/vt-116384.C 
b/gcc/testsuite/g++.dg/cpp0x/vt-116384.C
new file mode 100644
index 000..54d7f0774c5
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/vt-116384.C
@@ -0,0 +1,26 @@
+// PR c++/116384
+// { dg-do compile { target c++11 } }
+
+namespace a {
+template  struct c;
+template  struct d;
+}
+namespace e {
+namespace g {
+template  using h = void;
+template  class, typename...> struct detector {};
+template  class i, typename... args>
+struct detector>, i, args...>;
+}
+template  class i, typename... args>
+using j = g::detector;
+template  using l = typename a::c::m;
+template  struct conjunction;
+namespace g {
+template  using n = l>::p>;
+}
+template  = true> class o;
+}
+struct r;
+template  using q = e::o;
+void s() { e::j f; }


[gcc(refs/users/meissner/heads/work176)] Use vector pair load/store for memcpy with -mcpu=future

2024-08-19 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:3548ae9e3ef2e61267350c7f81b567fc05693297

commit 3548ae9e3ef2e61267350c7f81b567fc05693297
Author: Michael Meissner 
Date:   Mon Aug 19 13:36:44 2024 -0400

Use vector pair load/store for memcpy with -mcpu=future

In the development for the power10 processor, GCC did not enable using the 
load
vector pair and store vector pair instructions when optimizing things like
memory copy.  This patch enables using those instructions if -mcpu=future is
used.

2024-08-19  Michael Meissner  

gcc/

* config/rs6000/rs6000-cpus.def (ISA_FUTURE_MASKS_SERVER): Enable 
using
load vector pair and store vector pair instructions for memory copy
operations.
(POWERPC_MASKS): Make the bit for enabling using load vector pair 
and
store vector pair operations set and reset when the PowerPC 
processor is
changed.

Diff:
---
 gcc/config/rs6000/rs6000-cpus.def | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/gcc/config/rs6000/rs6000-cpus.def 
b/gcc/config/rs6000/rs6000-cpus.def
index e73d9ef51f8..74151be4048 100644
--- a/gcc/config/rs6000/rs6000-cpus.def
+++ b/gcc/config/rs6000/rs6000-cpus.def
@@ -86,7 +86,8 @@
 
 #define POWER11_MASKS_SERVER   ISA_3_1_MASKS_SERVER
 
-#define FUTURE_MASKS_SERVERPOWER11_MASKS_SERVER
+#define FUTURE_MASKS_SERVER(POWER11_MASKS_SERVER   \
+| OPTION_MASK_BLOCK_OPS_VECTOR_PAIR)
 
 /* Flags that need to be turned off if -mno-vsx.  */
 #define OTHER_VSX_VECTOR_MASKS (OPTION_MASK_EFFICIENT_UNALIGNED_VSX\
@@ -116,6 +117,7 @@
 
 /* Mask of all options to set the default isa flags based on -mcpu=.  */
 #define POWERPC_MASKS  (OPTION_MASK_ALTIVEC\
+| OPTION_MASK_BLOCK_OPS_VECTOR_PAIR\
 | OPTION_MASK_CMPB \
 | OPTION_MASK_CRYPTO   \
 | OPTION_MASK_DFP  \


[gcc(refs/users/meissner/heads/work176)] RFC2653-Add wD constraint.

2024-08-19 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:5b68a1cf7d131e20e82dd1fc33163830e5661062

commit 5b68a1cf7d131e20e82dd1fc33163830e5661062
Author: Michael Meissner 
Date:   Mon Aug 19 13:38:39 2024 -0400

RFC2653-Add wD constraint.

This patch adds a new constraint ('wD') that matches the accumulator 
registers
that overlap with VSX registers 0..31 on power10.  Future patches will add 
the
support for a separate accumulator register class that will be used when the
support for dense math registes is added.

2024-08-19   Michael Meissner  

* config/rs6000/constraints.md (wD): New constraint.
* config/rs6000/mma.md (mma_): Prepare for alternate 
accumulator
registers.  Use wD constraint instead of 'd' constraint.  Use
accumulator_operand instead of fpr_reg_operand.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
-   (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")]
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD")
+   (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0")]
MMA_ACC))]
   "TARGET_MMA"
   " %A0"
@@ -523,7 +523,7 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")]
MMA_VV))]
@@ -532,8 +532,8 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-   (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+   (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 3 "vsx_register_operand" "v,?wa")]
MMA_AVV))]
@@ -542,7 +542,7 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
(unspec:XO [(match_operand:OO 1 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")]
MMA_PV))]
@@ -551,8 +551,8 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-   (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+   (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
(match_operand:OO 2 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 3 "vsx_register_operand" "v,?wa")]
MMA_APV))]
@@ -561,7 +561,7 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
(match_operand:SI 3 "const_0_to_15_operand" "n,n")
@@ -574,8 +574,8 @@
(set_attr "prefixed" "yes")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-   (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+   (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
(match_operand:SI 4 "const_0_to_15_operand" "n,n")
@@ -588,7 +588,7 @@
(set_attr "prefixed" "yes")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
(match_operand:SI 3 "const_0_to_15_operand" "n,n")
@@ -601,8 +601,8 @@
(set_attr "prefixed" "yes")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-   (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+   (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
   

[gcc(refs/users/meissner/heads/work176)] RFC2653-Add support for dense math registers.

2024-08-19 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:2f6844a7aa601f68f9450134f46cb8fda5715b57

commit 2f6844a7aa601f68f9450134f46cb8fda5715b57
Author: Michael Meissner 
Date:   Mon Aug 19 13:39:13 2024 -0400

RFC2653-Add support for dense math registers.

The MMA subsystem added the notion of accumulator registers as an optional
feature of ISA 3.1 (power10).  In ISA 3.1, these accumulators overlapped 
with
the VSX registers 0..31, but logically the accumulator registers were 
separate
from the FPR registers.  In ISA 3.1, it was anticipated that in future 
systems,
the accumulator registers may no overlap with the FPR registers.  This patch
adds the support for dense math registers as separate registers.

This particular patch does not change the MMA support to use the 
accumulators
within the dense math registers.  This patch just adds the basic support for
having separate DMRs.  The next patch will switch the MMA support to use the
accumulators if -mcpu=future is used.

For testing purposes, I added an undocumented option '-mdense-math' to 
enable
or disable the dense math support.

This patch adds a new constraint (wD).  If MMA is selected but dense math is
not selected (i.e. -mcpu=power10), the wD constraint will allow access to
accumulators that overlap with VSX registers 0..31.  If both MMA and dense 
math
are selected (i.e. -mcpu=future), the wD constraint will only allow dense 
math
registers.

This patch modifies the existing %A output modifier.  If MMA is selected but
dense math is not selected, then %A output modifier converts the VSX 
register
number to the accumulator number, by dividing it by 4.  If both MMA and 
dense
math are selected, then %A will map the separate DMR registers into 0..7.

The intention is that user code using extended asm can be modified to run on
both MMA without dense math and MMA with dense math:

1)  If possible, don't use extended asm, but instead use the MMA 
built-in
functions;

2)  If you do need to write extended asm, change the d constraints
targetting accumulators should now use wD;

3)  Only use the built-in zero, assemble and disassemble functions 
create
move data between vector quad types and dense math accumulators.
I.e. do not use the xxmfacc, xxmtacc, and xxsetaccz directly in the
extended asm code.  The reason is these instructions assume there 
is a
1-to-1 correspondence between 4 adjacent FPR registers and an
accumulator that overlaps with those instructions.  With 
accumulators
now being separate registers, there no longer is a 1-to-1
correspondence.

It is possible that the mangling for DMRs and the GDB register numbers may
produce other changes in the future.

2024-08-19   Michael Meissner  

* config/rs6000/mma.md (UNSPEC_MMA_DMSETDMRZ): New unspec.
(movxo): Add comments about dense math registers.
(movxo_nodm): Rename from movxo and restrict the usage to machines
without dense math registers.
(movxo_dm): New insn for movxo support for machines with dense math
registers.
(mma_): Restrict usage to machines without dense math 
registers.
(mma_xxsetaccz): Add a define_expand wrapper, and add support for 
dense
math registers.
(mma_dmsetaccz): New insn.
* config/rs6000/predicates.md (dmr_operand): New predicate.
(accumulator_operand): Add support for dense math registers.
* config/rs6000/rs6000-builtin.cc (rs6000_gimple_fold_mma_builtin): 
Do
not issue a de-prime instruction when disassembling a vector quad 
on a
system with dense math registers.
* config/rs6000/rs6000-c.cc (rs6000_define_or_undefine_macro): 
Define
__DENSE_MATH__ if we have dense math registers.
* config/rs6000/rs6000.cc (enum rs6000_reg_type): Add DMR_REG_TYPE.
(enum rs6000_reload_reg_type): Add RELOAD_REG_DMR.
(LAST_RELOAD_REG_CLASS): Add support for DMR registers and the wD
constraint.
(reload_reg_map): Likewise.
(rs6000_reg_names): Likewise.
(alt_reg_names): Likewise.
(rs6000_hard_regno_nregs_internal): Likewise.
(rs6000_hard_regno_mode_ok_uncached): Likewise.
(rs6000_debug_reg_global): Likewise.
(rs6000_setup_reg_addr_masks): Likewise.
(rs6000_init_hard_regno_mode_ok): Likewise.
(rs6000_secondary_reload_memory): Add support for DMR registers.
(rs6000_secondary_reload_simple_move): Likewise.
(rs6000_preferred_reload_class): Likewise.
(rs6000_secondary_reload_class): Likewise.
(print_operand): Make %A handle both FPRs and DMRs.
 

[gcc(refs/users/meissner/heads/work176)] RFC2653-Add support for dense math registers.

2024-08-19 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:6a73439005e310de104d07d715b0cd36daa2d681

commit 6a73439005e310de104d07d715b0cd36daa2d681
Author: Michael Meissner 
Date:   Mon Aug 19 13:39:53 2024 -0400

RFC2653-Add support for dense math registers.

The MMA subsystem added the notion of accumulator registers as an optional
feature of ISA 3.1 (power10).  In ISA 3.1, these accumulators overlapped 
with
the VSX registers 0..31, but logically the accumulator registers were 
separate
from the FPR registers.  In ISA 3.1, it was anticipated that in future 
systems,
the accumulator registers may no overlap with the FPR registers.  This patch
adds the support for dense math registers as separate registers.

This particular patch does not change the MMA support to use the 
accumulators
within the dense math registers.  This patch just adds the basic support for
having separate DMRs.  The next patch will switch the MMA support to use the
accumulators if -mcpu=future is used.

For testing purposes, I added an undocumented option '-mdense-math' to 
enable
or disable the dense math support.

This patch adds a new constraint (wD).  If MMA is selected but dense math is
not selected (i.e. -mcpu=power10), the wD constraint will allow access to
accumulators that overlap with VSX registers 0..31.  If both MMA and dense 
math
are selected (i.e. -mcpu=future), the wD constraint will only allow dense 
math
registers.

This patch modifies the existing %A output modifier.  If MMA is selected but
dense math is not selected, then %A output modifier converts the VSX 
register
number to the accumulator number, by dividing it by 4.  If both MMA and 
dense
math are selected, then %A will map the separate DMR registers into 0..7.

The intention is that user code using extended asm can be modified to run on
both MMA without dense math and MMA with dense math:

1)  If possible, don't use extended asm, but instead use the MMA 
built-in
functions;

2)  If you do need to write extended asm, change the d constraints
targetting accumulators should now use wD;

3)  Only use the built-in zero, assemble and disassemble functions 
create
move data between vector quad types and dense math accumulators.
I.e. do not use the xxmfacc, xxmtacc, and xxsetaccz directly in the
extended asm code.  The reason is these instructions assume there 
is a
1-to-1 correspondence between 4 adjacent FPR registers and an
accumulator that overlaps with those instructions.  With 
accumulators
now being separate registers, there no longer is a 1-to-1
correspondence.

It is possible that the mangling for DMRs and the GDB register numbers may
produce other changes in the future.

2024-08-19   Michael Meissner  

* config/rs6000/mma.md (UNSPEC_MMA_DMSETDMRZ): New unspec.
(movxo): Add comments about dense math registers.
(movxo_nodm): Rename from movxo and restrict the usage to machines
without dense math registers.
(movxo_dm): New insn for movxo support for machines with dense math
registers.
(mma_): Restrict usage to machines without dense math 
registers.
(mma_xxsetaccz): Add a define_expand wrapper, and add support for 
dense
math registers.
(mma_dmsetaccz): New insn.
* config/rs6000/predicates.md (dmr_operand): New predicate.
(accumulator_operand): Add support for dense math registers.
* config/rs6000/rs6000-builtin.cc (rs6000_gimple_fold_mma_builtin): 
Do
not issue a de-prime instruction when disassembling a vector quad 
on a
system with dense math registers.
* config/rs6000/rs6000-c.cc (rs6000_define_or_undefine_macro): 
Define
__DENSE_MATH__ if we have dense math registers.
* config/rs6000/rs6000.cc (enum rs6000_reg_type): Add DMR_REG_TYPE.
(enum rs6000_reload_reg_type): Add RELOAD_REG_DMR.
(LAST_RELOAD_REG_CLASS): Add support for DMR registers and the wD
constraint.
(reload_reg_map): Likewise.
(rs6000_reg_names): Likewise.
(alt_reg_names): Likewise.
(rs6000_hard_regno_nregs_internal): Likewise.
(rs6000_hard_regno_mode_ok_uncached): Likewise.
(rs6000_debug_reg_global): Likewise.
(rs6000_setup_reg_addr_masks): Likewise.
(rs6000_init_hard_regno_mode_ok): Likewise.
(rs6000_secondary_reload_memory): Add support for DMR registers.
(rs6000_secondary_reload_simple_move): Likewise.
(rs6000_preferred_reload_class): Likewise.
(rs6000_secondary_reload_class): Likewise.
(print_operand): Make %A handle both FPRs and DMRs.
 

[gcc(refs/users/meissner/heads/work176)] RFC2653-Add dense math test for new instruction names.

2024-08-19 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:42a51bfe1a5e19fb3905bead97b777c68674e9c5

commit 42a51bfe1a5e19fb3905bead97b777c68674e9c5
Author: Michael Meissner 
Date:   Mon Aug 19 13:40:32 2024 -0400

RFC2653-Add dense math test for new instruction names.

2024-08-19   Michael Meissner  

gcc/testsuite/

* gcc.target/powerpc/dm-double-test.c: New test.
* lib/target-supports.exp (check_effective_target_ppc_dmr_ok): New
target test.

Diff:
---
 gcc/testsuite/gcc.target/powerpc/dm-double-test.c | 194 ++
 gcc/testsuite/lib/target-supports.exp |  23 +++
 2 files changed, 217 insertions(+)

diff --git a/gcc/testsuite/gcc.target/powerpc/dm-double-test.c 
b/gcc/testsuite/gcc.target/powerpc/dm-double-test.c
new file mode 100644
index 000..66c19779585
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/dm-double-test.c
@@ -0,0 +1,194 @@
+/* Test derived from mma-double-1.c, modified for dense math.  */
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_dense_math_ok } */
+/* { dg-options "-mdejagnu-cpu=future -O2" } */
+
+#include 
+#include 
+#include 
+
+typedef unsigned char vec_t __attribute__ ((vector_size (16)));
+typedef double v4sf_t __attribute__ ((vector_size (16)));
+#define SAVE_ACC(ACC, ldc, J)  \
+ __builtin_mma_disassemble_acc (result, ACC); \
+ rowC = (v4sf_t *) &CO[0*ldc+J]; \
+  rowC[0] += result[0]; \
+  rowC = (v4sf_t *) &CO[1*ldc+J]; \
+  rowC[0] += result[1]; \
+  rowC = (v4sf_t *) &CO[2*ldc+J]; \
+  rowC[0] += result[2]; \
+  rowC = (v4sf_t *) &CO[3*ldc+J]; \
+ rowC[0] += result[3];
+
+void
+DM (int m, int n, int k, double *A, double *B, double *C)
+{
+  __vector_quad acc0, acc1, acc2, acc3, acc4, acc5, acc6, acc7;
+  v4sf_t result[4];
+  v4sf_t *rowC;
+  for (int l = 0; l < n; l += 4)
+{
+  double *CO;
+  double *AO;
+  AO = A;
+  CO = C;
+  C += m * 4;
+  for (int j = 0; j < m; j += 16)
+   {
+ double *BO = B;
+ __builtin_mma_xxsetaccz (&acc0);
+ __builtin_mma_xxsetaccz (&acc1);
+ __builtin_mma_xxsetaccz (&acc2);
+ __builtin_mma_xxsetaccz (&acc3);
+ __builtin_mma_xxsetaccz (&acc4);
+ __builtin_mma_xxsetaccz (&acc5);
+ __builtin_mma_xxsetaccz (&acc6);
+ __builtin_mma_xxsetaccz (&acc7);
+ unsigned long i;
+
+ for (i = 0; i < k; i++)
+   {
+ vec_t *rowA = (vec_t *) & AO[i * 16];
+ __vector_pair rowB;
+ vec_t *rb = (vec_t *) & BO[i * 4];
+ __builtin_mma_assemble_pair (&rowB, rb[1], rb[0]);
+ __builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]);
+ __builtin_mma_xvf64gerpp (&acc1, rowB, rowA[1]);
+ __builtin_mma_xvf64gerpp (&acc2, rowB, rowA[2]);
+ __builtin_mma_xvf64gerpp (&acc3, rowB, rowA[3]);
+ __builtin_mma_xvf64gerpp (&acc4, rowB, rowA[4]);
+ __builtin_mma_xvf64gerpp (&acc5, rowB, rowA[5]);
+ __builtin_mma_xvf64gerpp (&acc6, rowB, rowA[6]);
+ __builtin_mma_xvf64gerpp (&acc7, rowB, rowA[7]);
+   }
+ SAVE_ACC (&acc0, m, 0);
+ SAVE_ACC (&acc2, m, 4);
+ SAVE_ACC (&acc1, m, 2);
+ SAVE_ACC (&acc3, m, 6);
+ SAVE_ACC (&acc4, m, 8);
+ SAVE_ACC (&acc6, m, 12);
+ SAVE_ACC (&acc5, m, 10);
+ SAVE_ACC (&acc7, m, 14);
+ AO += k * 16;
+ BO += k * 4;
+ CO += 16;
+   }
+  B += k * 4;
+}
+}
+
+void
+init (double *matrix, int row, int column)
+{
+  for (int j = 0; j < column; j++)
+{
+  for (int i = 0; i < row; i++)
+   {
+ matrix[j * row + i] = (i * 16 + 2 + j) / 0.123;
+   }
+}
+}
+
+void
+init0 (double *matrix, double *matrix1, int row, int column)
+{
+  for (int j = 0; j < column; j++)
+for (int i = 0; i < row; i++)
+  matrix[j * row + i] = matrix1[j * row + i] = 0;
+}
+
+
+void
+print (const char *name, const double *matrix, int row, int column)
+{
+  printf ("Matrix %s has %d rows and %d columns:\n", name, row, column);
+  for (int i = 0; i < row; i++)
+{
+  for (int j = 0; j < column; j++)
+   {
+ printf ("%f ", matrix[j * row + i]);
+   }
+  printf ("\n");
+}
+  printf ("\n");
+}
+
+int
+main (int argc, char *argv[])
+{
+  int rowsA, colsB, common;
+  int i, j, k;
+  int ret = 0;
+
+  for (int t = 16; t <= 128; t += 16)
+{
+  for (int t1 = 4; t1 <= 16; t1 += 4)
+   {
+ rowsA = t;
+ colsB = t1;
+ common = 1;
+ /* printf ("Running test for rows = %d,cols = %d\n", t, t1); */
+ double A[rowsA * common];
+ double B[common * colsB];
+ double C[rowsA * colsB];
+ double D[rowsA * colsB];
+
+
+ init (A, rowsA, common);
+ init (B, common, colsB);
+ init0 (C, D, rowsA, colsB);
+ DM (rowsA, colsB, common, A, B, 

[gcc(refs/users/meissner/heads/work176)] RFC2653-PowerPC: Add support for 1, 024 bit DMR registers.

2024-08-19 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:9bb7924857212ebaed7d46832d148c50461412e6

commit 9bb7924857212ebaed7d46832d148c50461412e6
Author: Michael Meissner 
Date:   Mon Aug 19 13:41:01 2024 -0400

RFC2653-PowerPC: Add support for 1,024 bit DMR registers.

This patch is a prelimianry patch to add the full 1,024 bit dense math 
register
(DMRs) for -mcpu=future.  The MMA 512-bit accumulators map onto the top of 
the
DMR register.

This patch only adds the new 1,024 bit register support.  It does not add
support for any instructions that need 1,024 bit registers instead of 512 
bit
registers.

I used the new mode 'TDOmode' to be the opaque mode used for 1,024 bit
registers.  The 'wD' constraint added in previous patches is used for these
registers.  I added support to do load and store of DMRs via the VSX 
registers,
since there are no load/store dense math instructions.  I added the new 
keyword
'__dmr' to create 1,024 bit types that can be loaded into DMRs.  At 
present, I
don't have aliases for __dmr512 and __dmr1024 that we've discussed 
internally.

The patches have been tested on both little and big endian systems.  Can I 
check
it into the master branch?

2024-08-19   Michael Meissner  

gcc/

* config/rs6000/mma.md (UNSPEC_DM_INSERT512_UPPER): New unspec.
(UNSPEC_DM_INSERT512_LOWER): Likewise.
(UNSPEC_DM_EXTRACT512): Likewise.
(UNSPEC_DMR_RELOAD_FROM_MEMORY): Likewise.
(UNSPEC_DMR_RELOAD_TO_MEMORY): Likewise.
(movtdo): New define_expand and define_insn_and_split to implement 
1,024
bit DMR registers.
(movtdo_insert512_upper): New insn.
(movtdo_insert512_lower): Likewise.
(movtdo_extract512): Likewise.
(reload_dmr_from_memory): Likewise.
(reload_dmr_to_memory): Likewise.
* config/rs6000/rs6000-builtin.cc (rs6000_type_string): Add DMR
support.
(rs6000_init_builtins): Add support for __dmr keyword.
* config/rs6000/rs6000-call.cc (rs6000_return_in_memory): Add 
support
for TDOmode.
(rs6000_function_arg): Likewise.
* config/rs6000/rs6000-modes.def (TDOmode): New mode.
* config/rs6000/rs6000.cc (rs6000_hard_regno_nregs_internal): Add
support for TDOmode.
(rs6000_hard_regno_mode_ok_uncached): Likewise.
(rs6000_hard_regno_mode_ok): Likewise.
(rs6000_modes_tieable_p): Likewise.
(rs6000_debug_reg_global): Likewise.
(rs6000_setup_reg_addr_masks): Likewise.
(rs6000_init_hard_regno_mode_ok): Add support for TDOmode.  Setup 
reload
hooks for DMR mode.
(reg_offset_addressing_ok_p): Add support for TDOmode.
(rs6000_emit_move): Likewise.
(rs6000_secondary_reload_simple_move): Likewise.
(rs6000_preferred_reload_class): Likewise.
(rs6000_secondary_reload_class): Likewise.
(rs6000_mangle_type): Add mangling for __dmr type.
(rs6000_dmr_register_move_cost): Add support for TDOmode.
(rs6000_split_multireg_move): Likewise.
(rs6000_invalid_conversion): Likewise.
* config/rs6000/rs6000.h (VECTOR_ALIGNMENT_P): Add TDOmode.
(enum rs6000_builtin_type_index): Add DMR type nodes.
(dmr_type_node): Likewise.
(ptr_dmr_type_node): Likewise.

gcc/testsuite/

* gcc.target/powerpc/dm-1024bit.c: New test.

Diff:
---
 gcc/config/rs6000/mma.md  | 154 ++
 gcc/config/rs6000/rs6000-builtin.cc   |  17 +++
 gcc/config/rs6000/rs6000-call.cc  |  10 +-
 gcc/config/rs6000/rs6000-modes.def|   4 +
 gcc/config/rs6000/rs6000.cc   | 101 -
 gcc/config/rs6000/rs6000.h|   6 +-
 gcc/testsuite/gcc.target/powerpc/dm-1024bit.c |  63 +++
 7 files changed, 321 insertions(+), 34 deletions(-)

diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md
index 2e04eb653fa..8461499e1c3 100644
--- a/gcc/config/rs6000/mma.md
+++ b/gcc/config/rs6000/mma.md
@@ -92,6 +92,11 @@
UNSPEC_MMA_XXMFACC
UNSPEC_MMA_XXMTACC
UNSPEC_MMA_DMSETDMRZ
+   UNSPEC_DM_INSERT512_UPPER
+   UNSPEC_DM_INSERT512_LOWER
+   UNSPEC_DM_EXTRACT512
+   UNSPEC_DMR_RELOAD_FROM_MEMORY
+   UNSPEC_DMR_RELOAD_TO_MEMORY
   ])
 
 (define_c_enum "unspecv"
@@ -793,3 +798,152 @@
 }
   [(set_attr "type" "mma")
(set_attr "prefixed" "yes")])
+
+;; TDOmode (__dmr keyword for 1,024 bit registers).
+(define_expand "movtdo"
+  [(set (match_operand:TDO 0 "nonimmediate_operand")
+   (match_operand:TDO 1 "input_operand"))]
+  "TARGET_MMA_DENSE_MATH"
+{
+  rs6000_emit_move (operands[0], operands[1], TDOmode);
+  DONE;
+})
+
+(define_insn_and_split "*movtdo"
+  [(set (match_operand:TDO 0

[gcc(refs/users/meissner/heads/work176)] RFC2656-Support load/store vector with right length.

2024-08-19 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:56a9db5160a34db170f68370c1f0848f2969742f

commit 56a9db5160a34db170f68370c1f0848f2969742f
Author: Michael Meissner 
Date:   Mon Aug 19 13:42:46 2024 -0400

RFC2656-Support load/store vector with right length.

This patch adds support for new instructions that may be added to the 
PowerPC
architecture in the future to enhance the load and store vector with length
instructions.

The current instructions (lxvl, lxvll, stxvl, and stxvll) are inconvient to 
use
since the count for the number of bytes must be in the top 8 bits of the GPR
register, instead of the bottom 8 bits.  This meant that code generating 
these
instructions typically had to do a shift left by 56 bits to get the count 
into
the right position.  In a future version of the PowerPC architecture, new
variants of these instructions might be added that expect the count to be in
the bottom 8 bits of the GPR register.  These patches add this support to 
GCC
if the user uses the -mcpu=future option.

I discovered that the code in rs6000-string.cc to generate ISA 3.1 
lxvl/stxvl
future lxvll/stxvll instructions would generate these instructions on 
32-bit.
However the patterns for these instructions is only done on 64-bit systems. 
 So
I added a check for 64-bit support before generating the instructions.

The patches have been tested on both little and big endian systems.  Can I 
check
it into the master branch?

2024-08-19   Michael Meissner  

gcc/

* config/rs6000/rs6000-string.cc (expand_block_move): Do not 
generate
lxvl and stxvl on 32-bit.
* config/rs6000/vsx.md (lxvl): If -mcpu=future, generate the lxvl 
with
the shift count automaticaly used in the insn.
(lxvrl): New insn for -mcpu=future.
(lxvrll): Likewise.
(stxvl): If -mcpu=future, generate the stxvl with the shift count
automaticaly used in the insn.
(stxvrl): New insn for -mcpu=future.
(stxvrll): Likewise.

gcc/testsuite/

* gcc.target/powerpc/lxvrl.c: New test.
* lib/target-supports.exp 
(check_effective_target_powerpc_future_ok):
New effective target.

Diff:
---
 gcc/config/rs6000/rs6000-string.cc   |   1 +
 gcc/config/rs6000/vsx.md | 122 +--
 gcc/testsuite/gcc.target/powerpc/lxvrl.c |  32 
 gcc/testsuite/lib/target-supports.exp|  12 +++
 4 files changed, 146 insertions(+), 21 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-string.cc 
b/gcc/config/rs6000/rs6000-string.cc
index 3674c4bd984..818ff10a8ac 100644
--- a/gcc/config/rs6000/rs6000-string.cc
+++ b/gcc/config/rs6000/rs6000-string.cc
@@ -2786,6 +2786,7 @@ expand_block_move (rtx operands[], bool might_overlap)
 
   if (TARGET_MMA && TARGET_BLOCK_OPS_UNALIGNED_VSX
  && TARGET_BLOCK_OPS_VECTOR_PAIR
+ && TARGET_POWERPC64
  && bytes >= 32
  && (align >= 256 || !STRICT_ALIGNMENT))
{
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 27069d070e1..b4399f2375e 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -5711,20 +5711,32 @@
   DONE;
 })
 
-;; Load VSX Vector with Length
+;; Load VSX Vector with Length.  If we have lxvrl, we don't have to do an
+;; explicit shift left into a pseudo.
 (define_expand "lxvl"
-  [(set (match_dup 3)
-(ashift:DI (match_operand:DI 2 "register_operand")
-   (const_int 56)))
-   (set (match_operand:V16QI 0 "vsx_register_operand")
-   (unspec:V16QI
-[(match_operand:DI 1 "gpc_reg_operand")
-  (mem:V16QI (match_dup 1))
- (match_dup 3)]
-UNSPEC_LXVL))]
+  [(use (match_operand:V16QI 0 "vsx_register_operand"))
+   (use (match_operand:DI 1 "gpc_reg_operand"))
+   (use (match_operand:DI 2 "gpc_reg_operand"))]
   "TARGET_P9_VECTOR && TARGET_64BIT"
 {
-  operands[3] = gen_reg_rtx (DImode);
+  rtx shift_len = gen_rtx_ASHIFT (DImode, operands[2], GEN_INT (56));
+  rtx len;
+
+  if (TARGET_FUTURE)
+len = shift_len;
+  else
+{
+  len = gen_reg_rtx (DImode);
+  emit_insn (gen_rtx_SET (len, shift_len));
+}
+
+  rtx dest = operands[0];
+  rtx addr = operands[1];
+  rtx mem = gen_rtx_MEM (V16QImode, addr);
+  rtvec rv = gen_rtvec (3, addr, mem, len);
+  rtx lxvl = gen_rtx_UNSPEC (V16QImode, rv, UNSPEC_LXVL);
+  emit_insn (gen_rtx_SET (dest, lxvl));
+  DONE;
 })
 
 (define_insn "*lxvl"
@@ -5748,6 +5760,34 @@
   "lxvll %x0,%1,%2"
   [(set_attr "type" "vecload")])
 
+;; For lxvrl and lxvrll, use the combiner to eliminate the shift.  The
+;; define_expand for lxvl will already incorporate the shift in generating the
+;; insn.  The lxvll buitl-in function required the user to have already done
+;; the shift.  Defining lxvrll this way, will optimize cases where the user has
+;; done the shift immediately before the

[gcc(refs/users/meissner/heads/work176)] RFC2686-Add paddis support.

2024-08-19 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:0db29e6841be33457118d7888dde0f258c5c360b

commit 0db29e6841be33457118d7888dde0f258c5c360b
Author: Michael Meissner 
Date:   Mon Aug 19 13:44:27 2024 -0400

RFC2686-Add paddis support.

2024-08-19  Michael Meissner  

gcc/

* config/rs6000/constraints.md (eU): New constraint.
(eV): Likewise.
* config/rs6000/predicates.md (paddis_operand): New predicate.
(paddis_paddi_operand): Likewise.
(add_operand): Add paddis support.
* config/rs6000/rs6000.cc (num_insns_constant_gpr): Add paddis 
support.
(num_insns_constant_multi): Likewise.
(print_operand): Add %B for paddis support.
* config/rs6000/rs6000.h (TARGET_PADDIS): New macro.
(SIGNED_INTEGER_32BIT_P): Likewise.
* config/rs6000/rs6000.md (isa attribute): Add paddis support.
(enabled attribute); Likewise.
(add3): Likewise.
(adddi3 splitter): New splitter for paddis.
(movdi_internal64): Add paddis support.
(movdi splitter): New splitter for paddis.

gcc/testsuite/

* gcc.target/powerpc/prefixed-addis.c: New test.

Diff:
---
 gcc/config/rs6000/constraints.md  | 10 +++
 gcc/config/rs6000/predicates.md   | 52 +++-
 gcc/config/rs6000/rs6000.cc   | 25 ++
 gcc/config/rs6000/rs6000.h|  4 +
 gcc/config/rs6000/rs6000.md   | 96 ---
 gcc/testsuite/gcc.target/powerpc/prefixed-addis.c | 24 ++
 6 files changed, 197 insertions(+), 14 deletions(-)

diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md
index 277a30a8245..4d8d21fd6bb 100644
--- a/gcc/config/rs6000/constraints.md
+++ b/gcc/config/rs6000/constraints.md
@@ -222,6 +222,16 @@
   "An IEEE 128-bit constant that can be loaded into VSX registers."
   (match_operand 0 "easy_vector_constant_ieee128"))
 
+(define_constraint "eU"
+  "@internal integer constant that can be loaded with paddis"
+  (and (match_code "const_int")
+   (match_operand 0 "paddis_operand")))
+
+(define_constraint "eV"
+  "@internal integer constant that can be loaded with paddis + paddi"
+  (and (match_code "const_int")
+   (match_operand 0 "paddis_paddi_operand")))
+
 ;; Floating-point constraints.  These two are defined so that insn
 ;; length attributes can be calculated exactly.
 
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index 83813fe9ddc..c4a14cf13bb 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -369,6 +369,53 @@
   return SIGNED_INTEGER_34BIT_P (INTVAL (op));
 })
 
+;; Return 1 if op is a 64-bit constant that uses the paddis instruction
+(define_predicate "paddis_operand"
+  (match_code "const_int")
+{
+  if (!TARGET_PADDIS && TARGET_POWERPC64)
+return 0;
+
+  /* If addi, addis, or paddi can handle the number, don't return true.  */
+  HOST_WIDE_INT value = INTVAL (op);
+  if (SIGNED_INTEGER_34BIT_P (value))
+return false;
+
+  /* If the number is too large for padds, return false.  */
+  if (!SIGNED_INTEGER_32BIT_P (value >> 32))
+return false;
+
+  /* If the bottom 32-bits are non-zero, paddis can't handle it.  */
+  if ((value & HOST_WIDE_INT_C(0x)) != 0)
+return false;
+
+  return true;
+})
+
+;; Return 1 if op is a 64-bit constant that needs the paddis instruction and an
+;; addi/addis/paddi instruction combination.
+(define_predicate "paddis_paddi_operand"
+  (match_code "const_int")
+{
+  if (!TARGET_PADDIS && TARGET_POWERPC64)
+return 0;
+
+  /* If addi, addis, or paddi can handle the number, don't return true.  */
+  HOST_WIDE_INT value = INTVAL (op);
+  if (SIGNED_INTEGER_34BIT_P (value))
+return false;
+
+  /* If the number is too large for padds, return false.  */
+  if (!SIGNED_INTEGER_32BIT_P (value >> 32))
+return false;
+
+  /* If the bottom 32-bits are zero, we can use paddis alone to handle it.  */
+  if ((value & HOST_WIDE_INT_C(0x)) == 0)
+return false;
+
+  return true;
+})
+
 ;; Return 1 if op is a register that is not special.
 ;; Disallow (SUBREG:SF (REG:SI)) and (SUBREG:SI (REG:SF)) on VSX systems where
 ;; you need to be careful in moving a SFmode to SImode and vice versa due to
@@ -1050,7 +1097,10 @@
   (if_then_else (match_code "const_int")
 (match_test "satisfies_constraint_I (op)
 || satisfies_constraint_L (op)
-|| satisfies_constraint_eI (op)")
+|| satisfies_constraint_eI (op)
+|| satisfies_constraint_eU (op)
+|| satisfies_constraint_eV (op)")
+
 (match_operand 0 "gpc_reg_operand")))
 
 ;; Return 1 if the operand is either a non-special register, or 0, or -1.
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 9c219ccae08..dfc407c57fc 100644
--- a/gcc/config/rs

[gcc(refs/users/meissner/heads/work176)] RFC2655-Add saturating subtract built-ins.

2024-08-19 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:251349263ff84a4c5c5b6a712e8c106402405101

commit 251349263ff84a4c5c5b6a712e8c106402405101
Author: Michael Meissner 
Date:   Mon Aug 19 13:43:33 2024 -0400

RFC2655-Add saturating subtract built-ins.

This patch adds support for a saturating subtract built-in function that 
may be
added to a future PowerPC processor.  Note, if it is added, the name of the
built-in function may change before GCC 13 is released.  If the name 
changes,
we will submit a patch changing the name.

I also added support for providing dense math built-in functions, even 
though
at present, we have not added any new built-in functions for dense math.  
It is
likely we will want to add new dense math built-in functions as the dense 
math
support is fleshed out.

The patches have been tested on both little and big endian systems.  Can I 
check
it into the master branch?

2024-08-19   Michael Meissner  

gcc/

* config/rs6000/rs6000-builtin.cc (rs6000_invalid_builtin): Add 
support
for flagging invalid use of future built-in functions.
(rs6000_builtin_is_supported): Add support for future built-in
functions.
* config/rs6000/rs6000-builtins.def 
(__builtin_saturate_subtract32): New
built-in function for -mcpu=future.
(__builtin_saturate_subtract64): Likewise.
* config/rs6000/rs6000-gen-builtins.cc (enum bif_stanza): Add 
stanzas
for -mcpu=future built-ins.
(stanza_map): Likewise.
(enable_string): Likewise.
(struct attrinfo): Likewise.
(parse_bif_attrs): Likewise.
(write_decls): Likewise.
* config/rs6000/rs6000.md (sat_sub3): Add saturating subtract
built-in insn declarations.
(sat_sub3_dot): Likewise.
(sat_sub3_dot2): Likewise.
* doc/extend.texi (Future PowerPC built-ins): New section.

gcc/testsuite/

* gcc.target/powerpc/subfus-1.c: New test.
* gcc.target/powerpc/subfus-2.c: Likewise.

Diff:
---
 gcc/config/rs6000/rs6000-builtin.cc | 17 
 gcc/config/rs6000/rs6000-builtins.def   | 10 +
 gcc/config/rs6000/rs6000-gen-builtins.cc| 35 ++---
 gcc/config/rs6000/rs6000.md | 60 +
 gcc/doc/extend.texi | 24 
 gcc/testsuite/gcc.target/powerpc/subfus-1.c | 32 +++
 gcc/testsuite/gcc.target/powerpc/subfus-2.c | 32 +++
 7 files changed, 205 insertions(+), 5 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-builtin.cc 
b/gcc/config/rs6000/rs6000-builtin.cc
index 8e4335e9b44..a5f33eb9da1 100644
--- a/gcc/config/rs6000/rs6000-builtin.cc
+++ b/gcc/config/rs6000/rs6000-builtin.cc
@@ -139,6 +139,17 @@ rs6000_invalid_builtin (enum rs6000_gen_builtins fncode)
 case ENB_MMA:
   error ("%qs requires the %qs option", name, "-mmma");
   break;
+case ENB_FUTURE:
+  error ("%qs requires the %qs option", name, "-mcpu=future");
+  break;
+case ENB_FUTURE_64:
+  error ("%qs requires the %qs option and either the %qs or %qs option",
+name, "-mcpu=future", "-m64", "-mpowerpc64");
+  break;
+case ENB_DM:
+  error ("%qs requires the %qs or %qs options", name, "-mcpu=future",
+"-mdense-math");
+  break;
 default:
 case ENB_ALWAYS:
   gcc_unreachable ();
@@ -194,6 +205,12 @@ rs6000_builtin_is_supported (enum rs6000_gen_builtins 
fncode)
   return TARGET_HTM;
 case ENB_MMA:
   return TARGET_MMA;
+case ENB_FUTURE:
+  return TARGET_FUTURE;
+case ENB_FUTURE_64:
+  return TARGET_FUTURE && TARGET_POWERPC64;
+case ENB_DM:
+  return TARGET_DENSE_MATH;
 default:
   gcc_unreachable ();
 }
diff --git a/gcc/config/rs6000/rs6000-builtins.def 
b/gcc/config/rs6000/rs6000-builtins.def
index 0e9dc05dbcf..7d47dc4e402 100644
--- a/gcc/config/rs6000/rs6000-builtins.def
+++ b/gcc/config/rs6000/rs6000-builtins.def
@@ -137,6 +137,8 @@
 ;   endian   Needs special handling for endianness
 ;   ibmldRestrict usage to the case when TFmode is IBM-128
 ;   ibm128   Restrict usage to the case where __ibm128 is supported or if ibmld
+;   future   Restrict usage to future instructions
+;   dm   Restrict usage to dense math
 ;
 ; Each attribute corresponds to extra processing required when
 ; the built-in is expanded.  All such special processing should
@@ -3933,3 +3935,11 @@
 
   void __builtin_vsx_stxvp (v256, unsigned long, const v256 *);
 STXVP nothing {mma,pair}
+
+[future]
+  const signed int __builtin_saturate_subtract32 (signed int, signed int);
+  SAT_SUBSI sat_subsi3 {}
+
+[future-64]
+  const signed long __builtin_saturate_subtract64 (signed long,  signed long);
+  SAT_SUBDI sat_subdi3 {}
diff --git a/gcc/config/rs6000/rs6000-gen-builtins.cc 
b/gcc/config/r

[gcc(refs/users/meissner/heads/work176)] RFC2677-Add xvrlw support.

2024-08-19 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:11f9bbc963f9096f58f1e57fd3a61476333df2c3

commit 11f9bbc963f9096f58f1e57fd3a61476333df2c3
Author: Michael Meissner 
Date:   Mon Aug 19 13:45:28 2024 -0400

RFC2677-Add xvrlw support.

2024-08-19  Michael Meissner  

gcc/

* config/rs6000/altivec.md (xvrlw): New insn.
* config/rs6000/rs6000.h (TARGET_XVRLW): New macro.

gcc/testsuite/

* gcc.target/powerpc/vector-rotate-left.c: New test.

Diff:
---
 gcc/config/rs6000/altivec.md   | 14 +
 gcc/config/rs6000/rs6000.h |  3 ++
 .../gcc.target/powerpc/vector-rotate-left.c| 34 ++
 3 files changed, 51 insertions(+)

diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index 1f5489b974f..f891ccc7403 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -1982,6 +1982,20 @@
 }
   [(set_attr "type" "vecperm")])
 
+;; -mcpu=future adds a vector rotate left word variant.  There is no vector
+;; byte/half-word/double-word/quad-word rotate left.  This insn occurs before
+;; altivec_vrl and will match for -mcpu=future, while other cpus will
+;; match the generic insn.
+(define_insn "*xvrlw"
+  [(set (match_operand:V4SI 0 "register_operand" "=v,wa")
+   (rotate:V4SI (match_operand:V4SI 1 "register_operand" "v,wa")
+(match_operand:V4SI 2 "register_operand" "v,wa")))]
+  "TARGET_XVRLW"
+  "@
+   vrlw %0,%1,%2
+   xvrlw %x0,%x1,%x2"
+  [(set_attr "type" "vecsimple")])
+
 (define_insn "altivec_vrl"
   [(set (match_operand:VI2 0 "register_operand" "=v")
 (rotate:VI2 (match_operand:VI2 1 "register_operand" "v")
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index 6a0784855cb..a4ed3f4945d 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -590,6 +590,9 @@ extern int rs6000_vector_align[];
 /* Whether we have PADDIS support.  */
 #define TARGET_PADDIS  TARGET_FUTURE
 
+/* Whether we have XVRLW support.  */
+#define TARGET_XVRLW   TARGET_FUTURE
+
 /* Whether the various reciprocal divide/square root estimate instructions
exist, and whether we should automatically generate code for the instruction
by default.  */
diff --git a/gcc/testsuite/gcc.target/powerpc/vector-rotate-left.c 
b/gcc/testsuite/gcc.target/powerpc/vector-rotate-left.c
new file mode 100644
index 000..5a5f3775507
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vector-rotate-left.c
@@ -0,0 +1,34 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_future_ok } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-mdejagnu-cpu=future -O2" } */
+
+/* Test whether the xvrl (vector word rotate left using VSX registers insead of
+   Altivec registers is generated.  */
+
+#include 
+
+typedef vector unsigned int  v4si_t;
+
+v4si_t
+rotl_v4si_scalar (v4si_t x, unsigned long n)
+{
+  __asm__ (" # %x0" : "+f" (x));
+  return (x << n) | (x >> (32 - n));   /* xvrlw.  */
+}
+
+v4si_t
+rotr_v4si_scalar (v4si_t x, unsigned long n)
+{
+  __asm__ (" # %x0" : "+f" (x));
+  return (x >> n) | (x << (32 - n));   /* xvrlw.  */
+}
+
+v4si_t
+rotl_v4si_vector (v4si_t x, v4si_t y)
+{
+  __asm__ (" # %x0" : "+f" (x));   /* xvrlw.  */
+  return vec_rl (x, y);
+}
+
+/* { dg-final { scan-assembler-times {\mxvrlw\M} 3  } } */


[gcc(refs/users/meissner/heads/work176)] Revert changes

2024-08-19 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:5f249111a45818b00063a4674c46041805269bf5

commit 5f249111a45818b00063a4674c46041805269bf5
Author: Michael Meissner 
Date:   Mon Aug 19 13:51:17 2024 -0400

Revert changes

Diff:
---
 gcc/config/rs6000/altivec.md   |  14 -
 gcc/config/rs6000/constraints.md   |  13 -
 gcc/config/rs6000/mma.md   | 427 -
 gcc/config/rs6000/predicates.md|  84 +---
 gcc/config/rs6000/rs6000-builtin.cc|  39 +-
 gcc/config/rs6000/rs6000-builtins.def  |  10 -
 gcc/config/rs6000/rs6000-c.cc  |   9 +-
 gcc/config/rs6000/rs6000-call.cc   |  10 +-
 gcc/config/rs6000/rs6000-cpus.def  |   4 +-
 gcc/config/rs6000/rs6000-gen-builtins.cc   |  35 +-
 gcc/config/rs6000/rs6000-modes.def |   4 -
 gcc/config/rs6000/rs6000-string.cc |   1 -
 gcc/config/rs6000/rs6000.cc| 343 -
 gcc/config/rs6000/rs6000.h |  57 +--
 gcc/config/rs6000/rs6000.md| 158 +---
 gcc/config/rs6000/vsx.md   | 122 +-
 gcc/doc/extend.texi|  24 --
 gcc/doc/md.texi|   5 -
 gcc/testsuite/gcc.target/powerpc/dm-1024bit.c  |  63 ---
 gcc/testsuite/gcc.target/powerpc/dm-double-test.c  | 194 --
 gcc/testsuite/gcc.target/powerpc/lxvrl.c   |  32 --
 gcc/testsuite/gcc.target/powerpc/prefixed-addis.c  |  24 --
 gcc/testsuite/gcc.target/powerpc/subfus-1.c|  32 --
 gcc/testsuite/gcc.target/powerpc/subfus-2.c|  32 --
 .../gcc.target/powerpc/vector-rotate-left.c|  34 --
 gcc/testsuite/lib/target-supports.exp  |  35 --
 26 files changed, 206 insertions(+), 1599 deletions(-)

diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index f891ccc7403..1f5489b974f 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -1982,20 +1982,6 @@
 }
   [(set_attr "type" "vecperm")])
 
-;; -mcpu=future adds a vector rotate left word variant.  There is no vector
-;; byte/half-word/double-word/quad-word rotate left.  This insn occurs before
-;; altivec_vrl and will match for -mcpu=future, while other cpus will
-;; match the generic insn.
-(define_insn "*xvrlw"
-  [(set (match_operand:V4SI 0 "register_operand" "=v,wa")
-   (rotate:V4SI (match_operand:V4SI 1 "register_operand" "v,wa")
-(match_operand:V4SI 2 "register_operand" "v,wa")))]
-  "TARGET_XVRLW"
-  "@
-   vrlw %0,%1,%2
-   xvrlw %x0,%x1,%x2"
-  [(set_attr "type" "vecsimple")])
-
 (define_insn "altivec_vrl"
   [(set (match_operand:VI2 0 "register_operand" "=v")
 (rotate:VI2 (match_operand:VI2 1 "register_operand" "v")
diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md
index 4d8d21fd6bb..369a7b75042 100644
--- a/gcc/config/rs6000/constraints.md
+++ b/gcc/config/rs6000/constraints.md
@@ -107,9 +107,6 @@
(match_test "TARGET_P8_VECTOR")
(match_operand 0 "s5bit_cint_operand")))
 
-(define_register_constraint "wD" "rs6000_constraints[RS6000_CONSTRAINT_wD]"
-  "Accumulator register.")
-
 (define_constraint "wE"
   "@internal Vector constant that can be loaded with the XXSPLTIB instruction."
   (match_test "xxspltib_constant_nosplit (op, mode)"))
@@ -222,16 +219,6 @@
   "An IEEE 128-bit constant that can be loaded into VSX registers."
   (match_operand 0 "easy_vector_constant_ieee128"))
 
-(define_constraint "eU"
-  "@internal integer constant that can be loaded with paddis"
-  (and (match_code "const_int")
-   (match_operand 0 "paddis_operand")))
-
-(define_constraint "eV"
-  "@internal integer constant that can be loaded with paddis + paddi"
-  (and (match_code "const_int")
-   (match_operand 0 "paddis_paddi_operand")))
-
 ;; Floating-point constraints.  These two are defined so that insn
 ;; length attributes can be calculated exactly.
 
diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md
index 8461499e1c3..04e2d0066df 100644
--- a/gcc/config/rs6000/mma.md
+++ b/gcc/config/rs6000/mma.md
@@ -91,12 +91,6 @@
UNSPEC_MMA_XVI8GER4SPP
UNSPEC_MMA_XXMFACC
UNSPEC_MMA_XXMTACC
-   UNSPEC_MMA_DMSETDMRZ
-   UNSPEC_DM_INSERT512_UPPER
-   UNSPEC_DM_INSERT512_LOWER
-   UNSPEC_DM_EXTRACT512
-   UNSPEC_DMR_RELOAD_FROM_MEMORY
-   UNSPEC_DMR_RELOAD_TO_MEMORY
   ])
 
 (define_c_enum "unspecv"
@@ -230,47 +224,44 @@
 (UNSPEC_MMA_XVF64GERNP "xvf64gernp")
 (UNSPEC_MMA_XVF64GERNN "xvf64gernn")])
 
-;; The "pm" prefix is not in these expansions, so that we can generate
-;; pmdmxvi4ger8 on systems with dense math registers and xvi4ger8 on systems
-;; without dense math registers.
-(define_int_attr vvi4i4i8  [(UNSPEC_MMA_PMXVI4GER8 "xvi4ger8")])
+(define_int_attr vvi

[gcc(refs/users/meissner/heads/work176-dmf)] Use vector pair load/store for memcpy with -mcpu=future

2024-08-19 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:aa3552fcdfe7f9c6103229a5c1a194d4ed625474

commit aa3552fcdfe7f9c6103229a5c1a194d4ed625474
Author: Michael Meissner 
Date:   Mon Aug 19 13:51:56 2024 -0400

Use vector pair load/store for memcpy with -mcpu=future

In the development for the power10 processor, GCC did not enable using the 
load
vector pair and store vector pair instructions when optimizing things like
memory copy.  This patch enables using those instructions if -mcpu=future is
used.

2024-08-19  Michael Meissner  

gcc/

* config/rs6000/rs6000-cpus.def (ISA_FUTURE_MASKS_SERVER): Enable 
using
load vector pair and store vector pair instructions for memory copy
operations.
(POWERPC_MASKS): Make the bit for enabling using load vector pair 
and
store vector pair operations set and reset when the PowerPC 
processor is
changed.

Diff:
---
 gcc/ChangeLog.dmf | 449 +-
 gcc/config/rs6000/rs6000-cpus.def |   4 +-
 2 files changed, 451 insertions(+), 2 deletions(-)

diff --git a/gcc/ChangeLog.dmf b/gcc/ChangeLog.dmf
index c5ecfe8ec49..a0f50c6e3aa 100644
--- a/gcc/ChangeLog.dmf
+++ b/gcc/ChangeLog.dmf
@@ -1,6 +1,453 @@
+ Branch work176-dmf, patch #113 
+
+RFC2677-Add xvrlw support.
+
+2024-08-05  Michael Meissner  
+
+gcc/
+
+   * config/rs6000/altivec.md (xvrlw): New insn.
+   * config/rs6000/rs6000.h (TARGET_XVRLW): New macro.
+
+gcc/testsuite/
+
+   * gcc.target/powerpc/vector-rotate-left.c: New test.
+
+ Branch work176-dmf, patch #112 
+
+RFC2686-Add paddis support.
+
+2024-08-05  Michael Meissner  
+
+gcc/
+
+   * config/rs6000/constraints.md (eU): New constraint.
+   (eV): Likewise.
+   * config/rs6000/predicates.md (paddis_operand): New predicate.
+   (paddis_paddi_operand): Likewise.
+   (add_operand): Add paddis support.
+   * config/rs6000/rs6000.cc (num_insns_constant_gpr): Add paddis support.
+   (num_insns_constant_multi): Likewise.
+   (print_operand): Add %B for paddis support.
+   * config/rs6000/rs6000.h (TARGET_PADDIS): New macro.
+   (SIGNED_INTEGER_32BIT_P): Likewise.
+   * config/rs6000/rs6000.md (isa attribute): Add paddis support.
+   (enabled attribute); Likewise.
+   (add3): Likewise.
+   (adddi3 splitter): New splitter for paddis.
+   (movdi_internal64): Add paddis support.
+   (movdi splitter): New splitter for paddis.
+
+gcc/testsuite/
+
+   * gcc.target/powerpc/prefixed-addis.c: New test.
+
+ Branch work176-dmf, patch #111 
+
+RFC2655-Add saturating subtract built-ins.
+
+This patch adds support for a saturating subtract built-in function that may be
+added to a future PowerPC processor.  Note, if it is added, the name of the
+built-in function may change before GCC 13 is released.  If the name changes,
+we will submit a patch changing the name.
+
+I also added support for providing dense math built-in functions, even though
+at present, we have not added any new built-in functions for dense math.  It is
+likely we will want to add new dense math built-in functions as the dense math
+support is fleshed out.
+
+The patches have been tested on both little and big endian systems.  Can I 
check
+it into the master branch?
+
+2024-08-05   Michael Meissner  
+
+gcc/
+
+   * config/rs6000/rs6000-builtin.cc (rs6000_invalid_builtin): Add support
+   for flagging invalid use of future built-in functions.
+   (rs6000_builtin_is_supported): Add support for future built-in
+   functions.
+   * config/rs6000/rs6000-builtins.def (__builtin_saturate_subtract32): New
+   built-in function for -mcpu=future.
+   (__builtin_saturate_subtract64): Likewise.
+   * config/rs6000/rs6000-gen-builtins.cc (enum bif_stanza): Add stanzas
+   for -mcpu=future built-ins.
+   (stanza_map): Likewise.
+   (enable_string): Likewise.
+   (struct attrinfo): Likewise.
+   (parse_bif_attrs): Likewise.
+   (write_decls): Likewise.
+   * config/rs6000/rs6000.md (sat_sub3): Add saturating subtract
+   built-in insn declarations.
+   (sat_sub3_dot): Likewise.
+   (sat_sub3_dot2): Likewise.
+   * doc/extend.texi (Future PowerPC built-ins): New section.
+
+gcc/testsuite/
+
+   * gcc.target/powerpc/subfus-1.c: New test.
+   * gcc.target/powerpc/subfus-2.c: Likewise.
+
+ Branch work176-dmf, patch #110 
+
+RFC2656-Support load/store vector with right length.
+
+This patch adds support for new instructions that may be added to the PowerPC
+architecture in the future to enhance the load and store vector with length
+instructions.
+
+The current instructions (lxvl, lxvll, stxvl, and stxvll) are inconvient to use
+since the count for the number of bytes must be in the top 

[gcc(refs/users/meissner/heads/work176-dmf)] RFC2653-Add wD constraint.

2024-08-19 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:631c2bebb7aad48deba16a5e0d4f02c08f8e56bc

commit 631c2bebb7aad48deba16a5e0d4f02c08f8e56bc
Author: Michael Meissner 
Date:   Mon Aug 19 13:52:23 2024 -0400

RFC2653-Add wD constraint.

This patch adds a new constraint ('wD') that matches the accumulator 
registers
that overlap with VSX registers 0..31 on power10.  Future patches will add 
the
support for a separate accumulator register class that will be used when the
support for dense math registes is added.

2024-08-19   Michael Meissner  

* config/rs6000/constraints.md (wD): New constraint.
* config/rs6000/mma.md (mma_): Prepare for alternate 
accumulator
registers.  Use wD constraint instead of 'd' constraint.  Use
accumulator_operand instead of fpr_reg_operand.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
-   (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")]
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD")
+   (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0")]
MMA_ACC))]
   "TARGET_MMA"
   " %A0"
@@ -523,7 +523,7 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")]
MMA_VV))]
@@ -532,8 +532,8 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-   (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+   (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 3 "vsx_register_operand" "v,?wa")]
MMA_AVV))]
@@ -542,7 +542,7 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
(unspec:XO [(match_operand:OO 1 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")]
MMA_PV))]
@@ -551,8 +551,8 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-   (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+   (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
(match_operand:OO 2 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 3 "vsx_register_operand" "v,?wa")]
MMA_APV))]
@@ -561,7 +561,7 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
(match_operand:SI 3 "const_0_to_15_operand" "n,n")
@@ -574,8 +574,8 @@
(set_attr "prefixed" "yes")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-   (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+   (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
(match_operand:SI 4 "const_0_to_15_operand" "n,n")
@@ -588,7 +588,7 @@
(set_attr "prefixed" "yes")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
(match_operand:SI 3 "const_0_to_15_operand" "n,n")
@@ -601,8 +601,8 @@
(set_attr "prefixed" "yes")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-   (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+   (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
   

[gcc(refs/users/meissner/heads/work176-dmf)] RFC2653-Add support for dense math registers.

2024-08-19 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:621467db4c48a51fa4b26b560f31e32cb5882525

commit 621467db4c48a51fa4b26b560f31e32cb5882525
Author: Michael Meissner 
Date:   Mon Aug 19 13:53:00 2024 -0400

RFC2653-Add support for dense math registers.

The MMA subsystem added the notion of accumulator registers as an optional
feature of ISA 3.1 (power10).  In ISA 3.1, these accumulators overlapped 
with
the VSX registers 0..31, but logically the accumulator registers were 
separate
from the FPR registers.  In ISA 3.1, it was anticipated that in future 
systems,
the accumulator registers may no overlap with the FPR registers.  This patch
adds the support for dense math registers as separate registers.

This particular patch does not change the MMA support to use the 
accumulators
within the dense math registers.  This patch just adds the basic support for
having separate DMRs.  The next patch will switch the MMA support to use the
accumulators if -mcpu=future is used.

For testing purposes, I added an undocumented option '-mdense-math' to 
enable
or disable the dense math support.

This patch adds a new constraint (wD).  If MMA is selected but dense math is
not selected (i.e. -mcpu=power10), the wD constraint will allow access to
accumulators that overlap with VSX registers 0..31.  If both MMA and dense 
math
are selected (i.e. -mcpu=future), the wD constraint will only allow dense 
math
registers.

This patch modifies the existing %A output modifier.  If MMA is selected but
dense math is not selected, then %A output modifier converts the VSX 
register
number to the accumulator number, by dividing it by 4.  If both MMA and 
dense
math are selected, then %A will map the separate DMR registers into 0..7.

The intention is that user code using extended asm can be modified to run on
both MMA without dense math and MMA with dense math:

1)  If possible, don't use extended asm, but instead use the MMA 
built-in
functions;

2)  If you do need to write extended asm, change the d constraints
targetting accumulators should now use wD;

3)  Only use the built-in zero, assemble and disassemble functions 
create
move data between vector quad types and dense math accumulators.
I.e. do not use the xxmfacc, xxmtacc, and xxsetaccz directly in the
extended asm code.  The reason is these instructions assume there 
is a
1-to-1 correspondence between 4 adjacent FPR registers and an
accumulator that overlaps with those instructions.  With 
accumulators
now being separate registers, there no longer is a 1-to-1
correspondence.

It is possible that the mangling for DMRs and the GDB register numbers may
produce other changes in the future.

2024-08-19   Michael Meissner  

* config/rs6000/mma.md (UNSPEC_MMA_DMSETDMRZ): New unspec.
(movxo): Add comments about dense math registers.
(movxo_nodm): Rename from movxo and restrict the usage to machines
without dense math registers.
(movxo_dm): New insn for movxo support for machines with dense math
registers.
(mma_): Restrict usage to machines without dense math 
registers.
(mma_xxsetaccz): Add a define_expand wrapper, and add support for 
dense
math registers.
(mma_dmsetaccz): New insn.
* config/rs6000/predicates.md (dmr_operand): New predicate.
(accumulator_operand): Add support for dense math registers.
* config/rs6000/rs6000-builtin.cc (rs6000_gimple_fold_mma_builtin): 
Do
not issue a de-prime instruction when disassembling a vector quad 
on a
system with dense math registers.
* config/rs6000/rs6000-c.cc (rs6000_define_or_undefine_macro): 
Define
__DENSE_MATH__ if we have dense math registers.
* config/rs6000/rs6000.cc (enum rs6000_reg_type): Add DMR_REG_TYPE.
(enum rs6000_reload_reg_type): Add RELOAD_REG_DMR.
(LAST_RELOAD_REG_CLASS): Add support for DMR registers and the wD
constraint.
(reload_reg_map): Likewise.
(rs6000_reg_names): Likewise.
(alt_reg_names): Likewise.
(rs6000_hard_regno_nregs_internal): Likewise.
(rs6000_hard_regno_mode_ok_uncached): Likewise.
(rs6000_debug_reg_global): Likewise.
(rs6000_setup_reg_addr_masks): Likewise.
(rs6000_init_hard_regno_mode_ok): Likewise.
(rs6000_secondary_reload_memory): Add support for DMR registers.
(rs6000_secondary_reload_simple_move): Likewise.
(rs6000_preferred_reload_class): Likewise.
(rs6000_secondary_reload_class): Likewise.
(print_operand): Make %A handle both FPRs and DMRs.
 

[gcc(refs/users/meissner/heads/work176-dmf)] xRFC2653-Add dense math test for new instruction names.

2024-08-19 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:ce4973acae8715305b2e8e6bec1bd73eccd1a971

commit ce4973acae8715305b2e8e6bec1bd73eccd1a971
Author: Michael Meissner 
Date:   Mon Aug 19 13:54:59 2024 -0400

xRFC2653-Add dense math test for new instruction names.

2024-08-19   Michael Meissner  

gcc/testsuite/

* gcc.target/powerpc/dm-double-test.c: New test.
* lib/target-supports.exp (check_effective_target_ppc_dmr_ok): New
target test.

Diff:
---
 gcc/testsuite/gcc.target/powerpc/dm-double-test.c | 194 ++
 gcc/testsuite/lib/target-supports.exp |  23 +++
 2 files changed, 217 insertions(+)

diff --git a/gcc/testsuite/gcc.target/powerpc/dm-double-test.c 
b/gcc/testsuite/gcc.target/powerpc/dm-double-test.c
new file mode 100644
index 000..66c19779585
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/dm-double-test.c
@@ -0,0 +1,194 @@
+/* Test derived from mma-double-1.c, modified for dense math.  */
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_dense_math_ok } */
+/* { dg-options "-mdejagnu-cpu=future -O2" } */
+
+#include 
+#include 
+#include 
+
+typedef unsigned char vec_t __attribute__ ((vector_size (16)));
+typedef double v4sf_t __attribute__ ((vector_size (16)));
+#define SAVE_ACC(ACC, ldc, J)  \
+ __builtin_mma_disassemble_acc (result, ACC); \
+ rowC = (v4sf_t *) &CO[0*ldc+J]; \
+  rowC[0] += result[0]; \
+  rowC = (v4sf_t *) &CO[1*ldc+J]; \
+  rowC[0] += result[1]; \
+  rowC = (v4sf_t *) &CO[2*ldc+J]; \
+  rowC[0] += result[2]; \
+  rowC = (v4sf_t *) &CO[3*ldc+J]; \
+ rowC[0] += result[3];
+
+void
+DM (int m, int n, int k, double *A, double *B, double *C)
+{
+  __vector_quad acc0, acc1, acc2, acc3, acc4, acc5, acc6, acc7;
+  v4sf_t result[4];
+  v4sf_t *rowC;
+  for (int l = 0; l < n; l += 4)
+{
+  double *CO;
+  double *AO;
+  AO = A;
+  CO = C;
+  C += m * 4;
+  for (int j = 0; j < m; j += 16)
+   {
+ double *BO = B;
+ __builtin_mma_xxsetaccz (&acc0);
+ __builtin_mma_xxsetaccz (&acc1);
+ __builtin_mma_xxsetaccz (&acc2);
+ __builtin_mma_xxsetaccz (&acc3);
+ __builtin_mma_xxsetaccz (&acc4);
+ __builtin_mma_xxsetaccz (&acc5);
+ __builtin_mma_xxsetaccz (&acc6);
+ __builtin_mma_xxsetaccz (&acc7);
+ unsigned long i;
+
+ for (i = 0; i < k; i++)
+   {
+ vec_t *rowA = (vec_t *) & AO[i * 16];
+ __vector_pair rowB;
+ vec_t *rb = (vec_t *) & BO[i * 4];
+ __builtin_mma_assemble_pair (&rowB, rb[1], rb[0]);
+ __builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]);
+ __builtin_mma_xvf64gerpp (&acc1, rowB, rowA[1]);
+ __builtin_mma_xvf64gerpp (&acc2, rowB, rowA[2]);
+ __builtin_mma_xvf64gerpp (&acc3, rowB, rowA[3]);
+ __builtin_mma_xvf64gerpp (&acc4, rowB, rowA[4]);
+ __builtin_mma_xvf64gerpp (&acc5, rowB, rowA[5]);
+ __builtin_mma_xvf64gerpp (&acc6, rowB, rowA[6]);
+ __builtin_mma_xvf64gerpp (&acc7, rowB, rowA[7]);
+   }
+ SAVE_ACC (&acc0, m, 0);
+ SAVE_ACC (&acc2, m, 4);
+ SAVE_ACC (&acc1, m, 2);
+ SAVE_ACC (&acc3, m, 6);
+ SAVE_ACC (&acc4, m, 8);
+ SAVE_ACC (&acc6, m, 12);
+ SAVE_ACC (&acc5, m, 10);
+ SAVE_ACC (&acc7, m, 14);
+ AO += k * 16;
+ BO += k * 4;
+ CO += 16;
+   }
+  B += k * 4;
+}
+}
+
+void
+init (double *matrix, int row, int column)
+{
+  for (int j = 0; j < column; j++)
+{
+  for (int i = 0; i < row; i++)
+   {
+ matrix[j * row + i] = (i * 16 + 2 + j) / 0.123;
+   }
+}
+}
+
+void
+init0 (double *matrix, double *matrix1, int row, int column)
+{
+  for (int j = 0; j < column; j++)
+for (int i = 0; i < row; i++)
+  matrix[j * row + i] = matrix1[j * row + i] = 0;
+}
+
+
+void
+print (const char *name, const double *matrix, int row, int column)
+{
+  printf ("Matrix %s has %d rows and %d columns:\n", name, row, column);
+  for (int i = 0; i < row; i++)
+{
+  for (int j = 0; j < column; j++)
+   {
+ printf ("%f ", matrix[j * row + i]);
+   }
+  printf ("\n");
+}
+  printf ("\n");
+}
+
+int
+main (int argc, char *argv[])
+{
+  int rowsA, colsB, common;
+  int i, j, k;
+  int ret = 0;
+
+  for (int t = 16; t <= 128; t += 16)
+{
+  for (int t1 = 4; t1 <= 16; t1 += 4)
+   {
+ rowsA = t;
+ colsB = t1;
+ common = 1;
+ /* printf ("Running test for rows = %d,cols = %d\n", t, t1); */
+ double A[rowsA * common];
+ double B[common * colsB];
+ double C[rowsA * colsB];
+ double D[rowsA * colsB];
+
+
+ init (A, rowsA, common);
+ init (B, common, colsB);
+ init0 (C, D, rowsA, colsB);
+ DM (rowsA, colsB, common, A, B,

[gcc(refs/users/meissner/heads/work176-dmf)] RFC2653-PowerPC: Switch to dense math names for all MMA operations.

2024-08-19 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:d5e1fc8ad02bfd5b133b328cbb8d8e51bd247f6b

commit d5e1fc8ad02bfd5b133b328cbb8d8e51bd247f6b
Author: Michael Meissner 
Date:   Mon Aug 19 13:54:13 2024 -0400

RFC2653-PowerPC: Switch to dense math names for all MMA operations.

This patch changes the assembler instruction names for MMA instructions from
the original name used in power10 to the new name when used with the dense 
math
system.  I.e. xvf64gerpp becomes dmxvf64gerpp.  The assembler will emit the
same bits for either spelling.

For the non-prefixed MMA instructions, we add a 'dm' prefix in front of the
instruction.  However, the prefixed instructions have a 'pm' prefix, and we 
add
the 'dm' prefix afterwards.  To prevent having two sets of parallel int
attributes, we remove the "pm" prefix from the instruction string in the
attributes, and add it later, both in the insn name and in the output 
template.

2024-08-19   Michael Meissner  

gcc/

* config/rs6000/mma.md (vvi4i4i8): Change the instruction to not 
have a
"pm" prefix.
(avvi4i4i8): Likewise.
(vvi4i4i2): Likewise.
(avvi4i4i2): Likewise.
(vvi4i4): Likewise.
(avvi4i4): Likewise.
(pvi4i2): Likewise.
(apvi4i2): Likewise.
(vvi4i4i4): Likewise.
(avvi4i4i4): Likewise.
(mma_): Add support for running on DMF systems, generating the 
dense
math instruction and using the dense math accumulators.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_pm): Add support for running on DMF systems, 
generating
the dense math instruction and using the dense math accumulators.
Rename the insn with a 'pm' prefix and add either 'pm' or 'pmdm'
prefixes based on whether we have the original MMA specification or 
if
we have dense math support.
(mma_pm): Likewise.
(mma_pm): Likewise.
(mma_pm): Likewise.
(mma_pm): Likewise.
(mma_pm): Likewise.
(mma_pm): Likewise.
(mma_pm): Likewise.

Diff:
---
 gcc/config/rs6000/mma.md | 157 +++
 1 file changed, 104 insertions(+), 53 deletions(-)

diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md
index ae6e7e9695b..2e04eb653fa 100644
--- a/gcc/config/rs6000/mma.md
+++ b/gcc/config/rs6000/mma.md
@@ -225,44 +225,47 @@
 (UNSPEC_MMA_XVF64GERNP "xvf64gernp")
 (UNSPEC_MMA_XVF64GERNN "xvf64gernn")])
 
-(define_int_attr vvi4i4i8  [(UNSPEC_MMA_PMXVI4GER8 "pmxvi4ger8")])
+;; The "pm" prefix is not in these expansions, so that we can generate
+;; pmdmxvi4ger8 on systems with dense math registers and xvi4ger8 on systems
+;; without dense math registers.
+(define_int_attr vvi4i4i8  [(UNSPEC_MMA_PMXVI4GER8 "xvi4ger8")])
 
-(define_int_attr avvi4i4i8 [(UNSPEC_MMA_PMXVI4GER8PP   
"pmxvi4ger8pp")])
+(define_int_attr avvi4i4i8 [(UNSPEC_MMA_PMXVI4GER8PP   "xvi4ger8pp")])
 
-(define_int_attr vvi4i4i2  [(UNSPEC_MMA_PMXVI16GER2"pmxvi16ger2")
-(UNSPEC_MMA_PMXVI16GER2S   "pmxvi16ger2s")
-(UNSPEC_MMA_PMXVF16GER2"pmxvf16ger2")
-(UNSPEC_MMA_PMXVBF16GER2   
"pmxvbf16ger2")])
+(define_int_attr vvi4i4i2  [(UNSPEC_MMA_PMXVI16GER2"xvi16ger2")
+(UNSPEC_MMA_PMXVI16GER2S   "xvi16ger2s")
+(UNSPEC_MMA_PMXVF16GER2"xvf16ger2")
+(UNSPEC_MMA_PMXVBF16GER2   "xvbf16ger2")])
 
-(define_int_attr avvi4i4i2 [(UNSPEC_MMA_PMXVI16GER2PP  "pmxvi16ger2pp")
-(UNSPEC_MMA_PMXVI16GER2SPP 
"pmxvi16ger2spp")
-(UNSPEC_MMA_PMXVF16GER2PP  "pmxvf16ger2pp")
-(UNSPEC_MMA_PMXVF16GER2PN  "pmxvf16ger2pn")
-(UNSPEC_MMA_PMXVF16GER2NP  "pmxvf16ger2np")
-(UNSPEC_MMA_PMXVF16GER2NN  "pmxvf16ger2nn")
-(UNSPEC_MMA_PMXVBF16GER2PP 
"pmxvbf16ger2pp")
-(UNSPEC_MMA_PMXVBF16GER2PN 
"pmxvbf16ger2pn")
-(UNSPEC_MMA_PMXVBF16GER2NP 
"pmxvbf16ger2np")
-(UNSPEC_MMA_PMXVBF16GER2NN 
"pmxvbf16ger2nn")])
+(define_int_attr avvi4i4i2 [(UNSPEC_MMA_PMXVI16GER2PP  "xvi16ger2pp")
+(UNSPEC_MMA_PMXVI16GER2SPP "xvi16ger2spp")
+(UNSPEC_MMA_PMXVF16GER2PP  "xvf16ger2pp")
+(UNSPEC_MMA_PMXVF16GER2PN  "xvf16

[gcc(refs/users/meissner/heads/work176-dmf)] RFC2653-PowerPC: Add support for 1, 024 bit DMR registers

2024-08-19 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:b9841440d225bf35f9903212cef11b6679c963cc

commit b9841440d225bf35f9903212cef11b6679c963cc
Author: Michael Meissner 
Date:   Mon Aug 19 13:57:32 2024 -0400

RFC2653-PowerPC: Add support for 1,024 bit DMR registers

This patch is a prelimianry patch to add the full 1,024 bit dense math 
register
(DMRs) for -mcpu=future.  The MMA 512-bit accumulators map onto the top of 
the
DMR register.

This patch only adds the new 1,024 bit register support.  It does not add
support for any instructions that need 1,024 bit registers instead of 512 
bit
registers.

I used the new mode 'TDOmode' to be the opaque mode used for 1,024 bit
registers.  The 'wD' constraint added in previous patches is used for these
registers.  I added support to do load and store of DMRs via the VSX 
registers,
since there are no load/store dense math instructions.  I added the new 
keyword
'__dmr' to create 1,024 bit types that can be loaded into DMRs.  At 
present, I
don't have aliases for __dmr512 and __dmr1024 that we've discussed 
internally.

The patches have been tested on both little and big endian systems.  Can I 
check
it into the master branch?

2024-08-19   Michael Meissner  

gcc/

* config/rs6000/mma.md (UNSPEC_DM_INSERT512_UPPER): New unspec.
(UNSPEC_DM_INSERT512_LOWER): Likewise.
(UNSPEC_DM_EXTRACT512): Likewise.
(UNSPEC_DMR_RELOAD_FROM_MEMORY): Likewise.
(UNSPEC_DMR_RELOAD_TO_MEMORY): Likewise.
(movtdo): New define_expand and define_insn_and_split to implement 
1,024
bit DMR registers.
(movtdo_insert512_upper): New insn.
(movtdo_insert512_lower): Likewise.
(movtdo_extract512): Likewise.
(reload_dmr_from_memory): Likewise.
(reload_dmr_to_memory): Likewise.
* config/rs6000/rs6000-builtin.cc (rs6000_type_string): Add DMR
support.
(rs6000_init_builtins): Add support for __dmr keyword.
* config/rs6000/rs6000-call.cc (rs6000_return_in_memory): Add 
support
for TDOmode.
(rs6000_function_arg): Likewise.
* config/rs6000/rs6000-modes.def (TDOmode): New mode.
* config/rs6000/rs6000.cc (rs6000_hard_regno_nregs_internal): Add
support for TDOmode.
(rs6000_hard_regno_mode_ok_uncached): Likewise.
(rs6000_hard_regno_mode_ok): Likewise.
(rs6000_modes_tieable_p): Likewise.
(rs6000_debug_reg_global): Likewise.
(rs6000_setup_reg_addr_masks): Likewise.
(rs6000_init_hard_regno_mode_ok): Add support for TDOmode.  Setup 
reload
hooks for DMR mode.
(reg_offset_addressing_ok_p): Add support for TDOmode.
(rs6000_emit_move): Likewise.
(rs6000_secondary_reload_simple_move): Likewise.
(rs6000_preferred_reload_class): Likewise.
(rs6000_secondary_reload_class): Likewise.
(rs6000_mangle_type): Add mangling for __dmr type.
(rs6000_dmr_register_move_cost): Add support for TDOmode.
(rs6000_split_multireg_move): Likewise.
(rs6000_invalid_conversion): Likewise.
* config/rs6000/rs6000.h (VECTOR_ALIGNMENT_P): Add TDOmode.
(enum rs6000_builtin_type_index): Add DMR type nodes.
(dmr_type_node): Likewise.
(ptr_dmr_type_node): Likewise.

gcc/testsuite/

* gcc.target/powerpc/dm-1024bit.c: New test.

Diff:
---
 gcc/config/rs6000/mma.md  | 154 ++
 gcc/config/rs6000/rs6000-builtin.cc   |  17 +++
 gcc/config/rs6000/rs6000-call.cc  |  10 +-
 gcc/config/rs6000/rs6000-modes.def|   4 +
 gcc/config/rs6000/rs6000.cc   | 101 -
 gcc/config/rs6000/rs6000.h|   6 +-
 gcc/testsuite/gcc.target/powerpc/dm-1024bit.c |  63 +++
 7 files changed, 321 insertions(+), 34 deletions(-)

diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md
index 2e04eb653fa..8461499e1c3 100644
--- a/gcc/config/rs6000/mma.md
+++ b/gcc/config/rs6000/mma.md
@@ -92,6 +92,11 @@
UNSPEC_MMA_XXMFACC
UNSPEC_MMA_XXMTACC
UNSPEC_MMA_DMSETDMRZ
+   UNSPEC_DM_INSERT512_UPPER
+   UNSPEC_DM_INSERT512_LOWER
+   UNSPEC_DM_EXTRACT512
+   UNSPEC_DMR_RELOAD_FROM_MEMORY
+   UNSPEC_DMR_RELOAD_TO_MEMORY
   ])
 
 (define_c_enum "unspecv"
@@ -793,3 +798,152 @@
 }
   [(set_attr "type" "mma")
(set_attr "prefixed" "yes")])
+
+;; TDOmode (__dmr keyword for 1,024 bit registers).
+(define_expand "movtdo"
+  [(set (match_operand:TDO 0 "nonimmediate_operand")
+   (match_operand:TDO 1 "input_operand"))]
+  "TARGET_MMA_DENSE_MATH"
+{
+  rs6000_emit_move (operands[0], operands[1], TDOmode);
+  DONE;
+})
+
+(define_insn_and_split "*movtdo"
+  [(set (match_operand:TDO 0 

[gcc(refs/users/meissner/heads/work176-dmf)] RFC2656-Support load/store vector with right length

2024-08-19 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:5a59d920eb3dbcad808d1ed496cb279de9adfa2f

commit 5a59d920eb3dbcad808d1ed496cb279de9adfa2f
Author: Michael Meissner 
Date:   Mon Aug 19 13:59:03 2024 -0400

RFC2656-Support load/store vector with right length

This patch adds support for new instructions that may be added to the 
PowerPC
architecture in the future to enhance the load and store vector with length
instructions.

The current instructions (lxvl, lxvll, stxvl, and stxvll) are inconvient to 
use
since the count for the number of bytes must be in the top 8 bits of the GPR
register, instead of the bottom 8 bits.  This meant that code generating 
these
instructions typically had to do a shift left by 56 bits to get the count 
into
the right position.  In a future version of the PowerPC architecture, new
variants of these instructions might be added that expect the count to be in
the bottom 8 bits of the GPR register.  These patches add this support to 
GCC
if the user uses the -mcpu=future option.

I discovered that the code in rs6000-string.cc to generate ISA 3.1 
lxvl/stxvl
future lxvll/stxvll instructions would generate these instructions on 
32-bit.
However the patterns for these instructions is only done on 64-bit systems. 
 So
I added a check for 64-bit support before generating the instructions.

The patches have been tested on both little and big endian systems.  Can I 
check
it into the master branch?

2024-08-19   Michael Meissner  

gcc/

* config/rs6000/rs6000-string.cc (expand_block_move): Do not 
generate
lxvl and stxvl on 32-bit.
* config/rs6000/vsx.md (lxvl): If -mcpu=future, generate the lxvl 
with
the shift count automaticaly used in the insn.
(lxvrl): New insn for -mcpu=future.
(lxvrll): Likewise.
(stxvl): If -mcpu=future, generate the stxvl with the shift count
automaticaly used in the insn.
(stxvrl): New insn for -mcpu=future.
(stxvrll): Likewise.

gcc/testsuite/

* gcc.target/powerpc/lxvrl.c: New test.
* lib/target-supports.exp 
(check_effective_target_powerpc_future_ok):
New effective target.

Diff:
---
 gcc/config/rs6000/rs6000-string.cc   |   1 +
 gcc/config/rs6000/vsx.md | 122 +--
 gcc/testsuite/gcc.target/powerpc/lxvrl.c |  32 
 gcc/testsuite/lib/target-supports.exp|  12 +++
 4 files changed, 146 insertions(+), 21 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-string.cc 
b/gcc/config/rs6000/rs6000-string.cc
index 3674c4bd984..818ff10a8ac 100644
--- a/gcc/config/rs6000/rs6000-string.cc
+++ b/gcc/config/rs6000/rs6000-string.cc
@@ -2786,6 +2786,7 @@ expand_block_move (rtx operands[], bool might_overlap)
 
   if (TARGET_MMA && TARGET_BLOCK_OPS_UNALIGNED_VSX
  && TARGET_BLOCK_OPS_VECTOR_PAIR
+ && TARGET_POWERPC64
  && bytes >= 32
  && (align >= 256 || !STRICT_ALIGNMENT))
{
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 27069d070e1..b4399f2375e 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -5711,20 +5711,32 @@
   DONE;
 })
 
-;; Load VSX Vector with Length
+;; Load VSX Vector with Length.  If we have lxvrl, we don't have to do an
+;; explicit shift left into a pseudo.
 (define_expand "lxvl"
-  [(set (match_dup 3)
-(ashift:DI (match_operand:DI 2 "register_operand")
-   (const_int 56)))
-   (set (match_operand:V16QI 0 "vsx_register_operand")
-   (unspec:V16QI
-[(match_operand:DI 1 "gpc_reg_operand")
-  (mem:V16QI (match_dup 1))
- (match_dup 3)]
-UNSPEC_LXVL))]
+  [(use (match_operand:V16QI 0 "vsx_register_operand"))
+   (use (match_operand:DI 1 "gpc_reg_operand"))
+   (use (match_operand:DI 2 "gpc_reg_operand"))]
   "TARGET_P9_VECTOR && TARGET_64BIT"
 {
-  operands[3] = gen_reg_rtx (DImode);
+  rtx shift_len = gen_rtx_ASHIFT (DImode, operands[2], GEN_INT (56));
+  rtx len;
+
+  if (TARGET_FUTURE)
+len = shift_len;
+  else
+{
+  len = gen_reg_rtx (DImode);
+  emit_insn (gen_rtx_SET (len, shift_len));
+}
+
+  rtx dest = operands[0];
+  rtx addr = operands[1];
+  rtx mem = gen_rtx_MEM (V16QImode, addr);
+  rtvec rv = gen_rtvec (3, addr, mem, len);
+  rtx lxvl = gen_rtx_UNSPEC (V16QImode, rv, UNSPEC_LXVL);
+  emit_insn (gen_rtx_SET (dest, lxvl));
+  DONE;
 })
 
 (define_insn "*lxvl"
@@ -5748,6 +5760,34 @@
   "lxvll %x0,%1,%2"
   [(set_attr "type" "vecload")])
 
+;; For lxvrl and lxvrll, use the combiner to eliminate the shift.  The
+;; define_expand for lxvl will already incorporate the shift in generating the
+;; insn.  The lxvll buitl-in function required the user to have already done
+;; the shift.  Defining lxvrll this way, will optimize cases where the user has
+;; done the shift immediately before the 

[gcc(refs/users/meissner/heads/work176-dmf)] RFC2655-Add saturating subtract built-ins

2024-08-19 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:5777392dbf05d4fe6ad82405b4cf81055b8bc12f

commit 5777392dbf05d4fe6ad82405b4cf81055b8bc12f
Author: Michael Meissner 
Date:   Mon Aug 19 13:59:27 2024 -0400

RFC2655-Add saturating subtract built-ins

This patch adds support for a saturating subtract built-in function that 
may be
added to a future PowerPC processor.  Note, if it is added, the name of the
built-in function may change before GCC 13 is released.  If the name 
changes,
we will submit a patch changing the name.

I also added support for providing dense math built-in functions, even 
though
at present, we have not added any new built-in functions for dense math.  
It is
likely we will want to add new dense math built-in functions as the dense 
math
support is fleshed out.

The patches have been tested on both little and big endian systems.  Can I 
check
it into the master branch?

2024-08-19   Michael Meissner  

gcc/

* config/rs6000/rs6000-builtin.cc (rs6000_invalid_builtin): Add 
support
for flagging invalid use of future built-in functions.
(rs6000_builtin_is_supported): Add support for future built-in
functions.
* config/rs6000/rs6000-builtins.def 
(__builtin_saturate_subtract32): New
built-in function for -mcpu=future.
(__builtin_saturate_subtract64): Likewise.
* config/rs6000/rs6000-gen-builtins.cc (enum bif_stanza): Add 
stanzas
for -mcpu=future built-ins.
(stanza_map): Likewise.
(enable_string): Likewise.
(struct attrinfo): Likewise.
(parse_bif_attrs): Likewise.
(write_decls): Likewise.
* config/rs6000/rs6000.md (sat_sub3): Add saturating subtract
built-in insn declarations.
(sat_sub3_dot): Likewise.
(sat_sub3_dot2): Likewise.
* doc/extend.texi (Future PowerPC built-ins): New section.

gcc/testsuite/

* gcc.target/powerpc/subfus-1.c: New test.
* gcc.target/powerpc/subfus-2.c: Likewise.

Diff:
---
 gcc/config/rs6000/rs6000-builtin.cc | 17 
 gcc/config/rs6000/rs6000-builtins.def   | 10 +
 gcc/config/rs6000/rs6000-gen-builtins.cc| 35 ++---
 gcc/config/rs6000/rs6000.md | 60 +
 gcc/doc/extend.texi | 24 
 gcc/testsuite/gcc.target/powerpc/subfus-1.c | 32 +++
 gcc/testsuite/gcc.target/powerpc/subfus-2.c | 32 +++
 7 files changed, 205 insertions(+), 5 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-builtin.cc 
b/gcc/config/rs6000/rs6000-builtin.cc
index 8e4335e9b44..a5f33eb9da1 100644
--- a/gcc/config/rs6000/rs6000-builtin.cc
+++ b/gcc/config/rs6000/rs6000-builtin.cc
@@ -139,6 +139,17 @@ rs6000_invalid_builtin (enum rs6000_gen_builtins fncode)
 case ENB_MMA:
   error ("%qs requires the %qs option", name, "-mmma");
   break;
+case ENB_FUTURE:
+  error ("%qs requires the %qs option", name, "-mcpu=future");
+  break;
+case ENB_FUTURE_64:
+  error ("%qs requires the %qs option and either the %qs or %qs option",
+name, "-mcpu=future", "-m64", "-mpowerpc64");
+  break;
+case ENB_DM:
+  error ("%qs requires the %qs or %qs options", name, "-mcpu=future",
+"-mdense-math");
+  break;
 default:
 case ENB_ALWAYS:
   gcc_unreachable ();
@@ -194,6 +205,12 @@ rs6000_builtin_is_supported (enum rs6000_gen_builtins 
fncode)
   return TARGET_HTM;
 case ENB_MMA:
   return TARGET_MMA;
+case ENB_FUTURE:
+  return TARGET_FUTURE;
+case ENB_FUTURE_64:
+  return TARGET_FUTURE && TARGET_POWERPC64;
+case ENB_DM:
+  return TARGET_DENSE_MATH;
 default:
   gcc_unreachable ();
 }
diff --git a/gcc/config/rs6000/rs6000-builtins.def 
b/gcc/config/rs6000/rs6000-builtins.def
index 0e9dc05dbcf..7d47dc4e402 100644
--- a/gcc/config/rs6000/rs6000-builtins.def
+++ b/gcc/config/rs6000/rs6000-builtins.def
@@ -137,6 +137,8 @@
 ;   endian   Needs special handling for endianness
 ;   ibmldRestrict usage to the case when TFmode is IBM-128
 ;   ibm128   Restrict usage to the case where __ibm128 is supported or if ibmld
+;   future   Restrict usage to future instructions
+;   dm   Restrict usage to dense math
 ;
 ; Each attribute corresponds to extra processing required when
 ; the built-in is expanded.  All such special processing should
@@ -3933,3 +3935,11 @@
 
   void __builtin_vsx_stxvp (v256, unsigned long, const v256 *);
 STXVP nothing {mma,pair}
+
+[future]
+  const signed int __builtin_saturate_subtract32 (signed int, signed int);
+  SAT_SUBSI sat_subsi3 {}
+
+[future-64]
+  const signed long __builtin_saturate_subtract64 (signed long,  signed long);
+  SAT_SUBDI sat_subdi3 {}
diff --git a/gcc/config/rs6000/rs6000-gen-builtins.cc 
b/gcc/config/rs

[gcc(refs/users/meissner/heads/work176-dmf)] RFC2677-Add xvrlw support

2024-08-19 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:bb360cc7fdb5b20190372b0628b03e25390c0f96

commit bb360cc7fdb5b20190372b0628b03e25390c0f96
Author: Michael Meissner 
Date:   Mon Aug 19 14:00:19 2024 -0400

RFC2677-Add xvrlw support

2024-08-19  Michael Meissner  

gcc/

* config/rs6000/altivec.md (xvrlw): New insn.
* config/rs6000/rs6000.h (TARGET_XVRLW): New macro.

gcc/testsuite/

* gcc.target/powerpc/vector-rotate-left.c: New test.

Diff:
---
 gcc/config/rs6000/altivec.md   | 14 +
 gcc/config/rs6000/rs6000.h |  3 ++
 .../gcc.target/powerpc/vector-rotate-left.c| 34 ++
 3 files changed, 51 insertions(+)

diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index 1f5489b974f..f891ccc7403 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -1982,6 +1982,20 @@
 }
   [(set_attr "type" "vecperm")])
 
+;; -mcpu=future adds a vector rotate left word variant.  There is no vector
+;; byte/half-word/double-word/quad-word rotate left.  This insn occurs before
+;; altivec_vrl and will match for -mcpu=future, while other cpus will
+;; match the generic insn.
+(define_insn "*xvrlw"
+  [(set (match_operand:V4SI 0 "register_operand" "=v,wa")
+   (rotate:V4SI (match_operand:V4SI 1 "register_operand" "v,wa")
+(match_operand:V4SI 2 "register_operand" "v,wa")))]
+  "TARGET_XVRLW"
+  "@
+   vrlw %0,%1,%2
+   xvrlw %x0,%x1,%x2"
+  [(set_attr "type" "vecsimple")])
+
 (define_insn "altivec_vrl"
   [(set (match_operand:VI2 0 "register_operand" "=v")
 (rotate:VI2 (match_operand:VI2 1 "register_operand" "v")
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index 6a0784855cb..a4ed3f4945d 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -590,6 +590,9 @@ extern int rs6000_vector_align[];
 /* Whether we have PADDIS support.  */
 #define TARGET_PADDIS  TARGET_FUTURE
 
+/* Whether we have XVRLW support.  */
+#define TARGET_XVRLW   TARGET_FUTURE
+
 /* Whether the various reciprocal divide/square root estimate instructions
exist, and whether we should automatically generate code for the instruction
by default.  */
diff --git a/gcc/testsuite/gcc.target/powerpc/vector-rotate-left.c 
b/gcc/testsuite/gcc.target/powerpc/vector-rotate-left.c
new file mode 100644
index 000..5a5f3775507
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vector-rotate-left.c
@@ -0,0 +1,34 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_future_ok } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-mdejagnu-cpu=future -O2" } */
+
+/* Test whether the xvrl (vector word rotate left using VSX registers insead of
+   Altivec registers is generated.  */
+
+#include 
+
+typedef vector unsigned int  v4si_t;
+
+v4si_t
+rotl_v4si_scalar (v4si_t x, unsigned long n)
+{
+  __asm__ (" # %x0" : "+f" (x));
+  return (x << n) | (x >> (32 - n));   /* xvrlw.  */
+}
+
+v4si_t
+rotr_v4si_scalar (v4si_t x, unsigned long n)
+{
+  __asm__ (" # %x0" : "+f" (x));
+  return (x >> n) | (x << (32 - n));   /* xvrlw.  */
+}
+
+v4si_t
+rotl_v4si_vector (v4si_t x, v4si_t y)
+{
+  __asm__ (" # %x0" : "+f" (x));   /* xvrlw.  */
+  return vec_rl (x, y);
+}
+
+/* { dg-final { scan-assembler-times {\mxvrlw\M} 3  } } */


[gcc(refs/users/meissner/heads/work176-dmf)] RFC2686-Add paddis support

2024-08-19 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:c827e64576d202661c6224ff540659ced605297e

commit c827e64576d202661c6224ff540659ced605297e
Author: Michael Meissner 
Date:   Mon Aug 19 13:59:50 2024 -0400

RFC2686-Add paddis support

2024-08-19  Michael Meissner  

gcc/

* config/rs6000/constraints.md (eU): New constraint.
(eV): Likewise.
* config/rs6000/predicates.md (paddis_operand): New predicate.
(paddis_paddi_operand): Likewise.
(add_operand): Add paddis support.
* config/rs6000/rs6000.cc (num_insns_constant_gpr): Add paddis 
support.
(num_insns_constant_multi): Likewise.
(print_operand): Add %B for paddis support.
* config/rs6000/rs6000.h (TARGET_PADDIS): New macro.
(SIGNED_INTEGER_32BIT_P): Likewise.
* config/rs6000/rs6000.md (isa attribute): Add paddis support.
(enabled attribute); Likewise.
(add3): Likewise.
(adddi3 splitter): New splitter for paddis.
(movdi_internal64): Add paddis support.
(movdi splitter): New splitter for paddis.

gcc/testsuite/

* gcc.target/powerpc/prefixed-addis.c: New test.

Diff:
---
 gcc/config/rs6000/constraints.md  | 10 +++
 gcc/config/rs6000/predicates.md   | 52 +++-
 gcc/config/rs6000/rs6000.cc   | 25 ++
 gcc/config/rs6000/rs6000.h|  4 +
 gcc/config/rs6000/rs6000.md   | 96 ---
 gcc/testsuite/gcc.target/powerpc/prefixed-addis.c | 24 ++
 6 files changed, 197 insertions(+), 14 deletions(-)

diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md
index 277a30a8245..4d8d21fd6bb 100644
--- a/gcc/config/rs6000/constraints.md
+++ b/gcc/config/rs6000/constraints.md
@@ -222,6 +222,16 @@
   "An IEEE 128-bit constant that can be loaded into VSX registers."
   (match_operand 0 "easy_vector_constant_ieee128"))
 
+(define_constraint "eU"
+  "@internal integer constant that can be loaded with paddis"
+  (and (match_code "const_int")
+   (match_operand 0 "paddis_operand")))
+
+(define_constraint "eV"
+  "@internal integer constant that can be loaded with paddis + paddi"
+  (and (match_code "const_int")
+   (match_operand 0 "paddis_paddi_operand")))
+
 ;; Floating-point constraints.  These two are defined so that insn
 ;; length attributes can be calculated exactly.
 
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index 83813fe9ddc..c4a14cf13bb 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -369,6 +369,53 @@
   return SIGNED_INTEGER_34BIT_P (INTVAL (op));
 })
 
+;; Return 1 if op is a 64-bit constant that uses the paddis instruction
+(define_predicate "paddis_operand"
+  (match_code "const_int")
+{
+  if (!TARGET_PADDIS && TARGET_POWERPC64)
+return 0;
+
+  /* If addi, addis, or paddi can handle the number, don't return true.  */
+  HOST_WIDE_INT value = INTVAL (op);
+  if (SIGNED_INTEGER_34BIT_P (value))
+return false;
+
+  /* If the number is too large for padds, return false.  */
+  if (!SIGNED_INTEGER_32BIT_P (value >> 32))
+return false;
+
+  /* If the bottom 32-bits are non-zero, paddis can't handle it.  */
+  if ((value & HOST_WIDE_INT_C(0x)) != 0)
+return false;
+
+  return true;
+})
+
+;; Return 1 if op is a 64-bit constant that needs the paddis instruction and an
+;; addi/addis/paddi instruction combination.
+(define_predicate "paddis_paddi_operand"
+  (match_code "const_int")
+{
+  if (!TARGET_PADDIS && TARGET_POWERPC64)
+return 0;
+
+  /* If addi, addis, or paddi can handle the number, don't return true.  */
+  HOST_WIDE_INT value = INTVAL (op);
+  if (SIGNED_INTEGER_34BIT_P (value))
+return false;
+
+  /* If the number is too large for padds, return false.  */
+  if (!SIGNED_INTEGER_32BIT_P (value >> 32))
+return false;
+
+  /* If the bottom 32-bits are zero, we can use paddis alone to handle it.  */
+  if ((value & HOST_WIDE_INT_C(0x)) == 0)
+return false;
+
+  return true;
+})
+
 ;; Return 1 if op is a register that is not special.
 ;; Disallow (SUBREG:SF (REG:SI)) and (SUBREG:SI (REG:SF)) on VSX systems where
 ;; you need to be careful in moving a SFmode to SImode and vice versa due to
@@ -1050,7 +1097,10 @@
   (if_then_else (match_code "const_int")
 (match_test "satisfies_constraint_I (op)
 || satisfies_constraint_L (op)
-|| satisfies_constraint_eI (op)")
+|| satisfies_constraint_eI (op)
+|| satisfies_constraint_eU (op)
+|| satisfies_constraint_eV (op)")
+
 (match_operand 0 "gpc_reg_operand")))
 
 ;; Return 1 if the operand is either a non-special register, or 0, or -1.
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 9c219ccae08..dfc407c57fc 100644
--- a/gcc/config/rs6

[gcc/meissner/heads/work176] Merge commit 'refs/users/meissner/heads/work176' of git+ssh

2024-08-19 Thread Michael Meissner via Gcc-cvs
The branch 'meissner/heads/work176' was updated to point to:

 248fc70e14e... Merge commit 'refs/users/meissner/heads/work176' of git+ssh

It previously pointed to:

 5f249111a45... Revert changes

Diff:

!!! WARNING: THE FOLLOWING COMMITS ARE NO LONGER ACCESSIBLE (LOST):
---

  5f24911... Revert changes
  11f9bbc... RFC2677-Add xvrlw support.
  0db29e6... RFC2686-Add paddis support.
  2513492... RFC2655-Add saturating subtract built-ins.
  56a9db5... RFC2656-Support load/store vector with right length.
  9bb7924... RFC2653-PowerPC: Add support for 1,024 bit DMR registers.
  42a51bf... RFC2653-Add dense math test for new instruction names.
  6a73439... RFC2653-Add support for dense math registers.
  2f6844a... RFC2653-Add support for dense math registers.
  5b68a1c... RFC2653-Add wD constraint.
  3548ae9... Use vector pair load/store for memcpy with -mcpu=future
  fa18267... Revert changes
  6cc817c... RFC2653-PowerPC: Add support for 1,024 bit DMR registers.
  5337baa... RFC2653-Add dense math test for new instruction names.
  b994c1d... RFC2653-PowerPC: Switch to dense math names for all MMA ope
  8e99964... RFC2653-Add support for dense math registers.
  0d4fd03... Use vector pair load/store for memcpy with -mcpu=future


[gcc(refs/users/meissner/heads/work176-bugs)] Add better support for shifting vectors with 64-bit elements

2024-08-19 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:b79d939912a9826b29603037848503ca56ed2ea8

commit b79d939912a9826b29603037848503ca56ed2ea8
Author: Michael Meissner 
Date:   Mon Aug 19 14:09:32 2024 -0400

Add better support for shifting vectors with 64-bit elements

This patch fixes PR target/89213 to allow better code to be generated to do
constant shifts of V2DI/V2DF vectors.  Previously GCC would do constant 
shifts
of vectors with 64-bit elements by using:

XXSPLTIB 32,4
VEXTSB2D 0,0
VSRAD 2,2,0

I.e., the PowerPC does not have a VSPLTISD instruction to load -15..14 for 
the
64-bit shift count in one instruction.  Instead, it would need to load a 
byte
and then convert it to 64-bit.

With this patch, GCC now realizes that the vector shift instructions will 
look
at the bottom 6 bits for the shift count, and it can use either a VSPLTISW 
or
XXSPLTIB instruction to load the shift count.

2024-08-19  Michael Meissner  

gcc/

PR target/89213
* config/rs6000/altivec.md (UNSPEC_VECTOR_SHIFT): New unspec.
(VSHIFT_MODE): New mode iterator.
(vshift_code): New code iterator.
(vshift_attr): New code attribute.
(altivec___const): New pattern to optimize
vector long long/int shifts by a constant.
(altivec__shift_const): New helper insn to load up a
constant used by the shift operation.
* config/rs6000/predicates.md (vector_shift_constant): New
predicate.

gcc/testsuite/

PR target/89213
* gcc.target/powerpc/pr89213.c: New test.
* gcc.target/powerpc/vec-rlmi-rlnm.c: Update instruction count.

Diff:
---
 gcc/config/rs6000/altivec.md |  51 +++
 gcc/config/rs6000/predicates.md  |  63 ++
 gcc/testsuite/gcc.target/powerpc/pr89213.c   | 106 +++
 gcc/testsuite/gcc.target/powerpc/vec-rlmi-rlnm.c |   4 +-
 4 files changed, 222 insertions(+), 2 deletions(-)

diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index 1f5489b974f..8faece984e9 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -170,6 +170,7 @@
UNSPEC_VSTRIL
UNSPEC_SLDB
UNSPEC_SRDB
+   UNSPEC_VECTOR_SHIFT
 ])
 
 (define_c_enum "unspecv"
@@ -2176,6 +2177,56 @@
   "vsro %0,%1,%2"
   [(set_attr "type" "vecperm")])
 
+;; Optimize V2DI shifts by constants.  This relies on the shift instructions
+;; only looking at the bits needed to do the shift.  This means we can use
+;; VSPLTISW or XXSPLTIB to load up the constant, and not worry about the bits
+;; that the vector shift instructions will not use.
+(define_mode_iterator VSHIFT_MODE  [(V4SI "TARGET_P9_VECTOR")
+(V2DI "TARGET_P8_VECTOR")])
+
+(define_code_iterator vshift_code  [ashift ashiftrt lshiftrt])
+(define_code_attr vshift_attr  [(ashift   "ashift")
+(ashiftrt "ashiftrt")
+(lshiftrt "lshiftrt")])
+
+(define_insn_and_split "*altivec___const"
+  [(set (match_operand:VSHIFT_MODE 0 "register_operand" "=v")
+   (vshift_code:VSHIFT_MODE
+(match_operand:VSHIFT_MODE 1 "register_operand" "v")
+(match_operand:VSHIFT_MODE 2 "vector_shift_constant" "")))
+   (clobber (match_scratch:VSHIFT_MODE 3 "=&v"))]
+  "((mode == V2DImode && TARGET_P8_VECTOR)
+|| (mode == V4SImode && TARGET_P9_VECTOR))"
+  "#"
+  "&& 1"
+  [(set (match_dup 3)
+   (unspec:VSHIFT_MODE [(match_dup 4)] UNSPEC_VECTOR_SHIFT))
+   (set (match_dup 0)
+   (vshift_code:VSHIFT_MODE (match_dup 1)
+(match_dup 3)))]
+{
+  if (GET_CODE (operands[3]) == SCRATCH)
+operands[3] = gen_reg_rtx (mode);
+
+  operands[4] = ((GET_CODE (operands[2]) == CONST_VECTOR)
+? CONST_VECTOR_ELT (operands[2], 0)
+: XEXP (operands[2], 0));
+})
+
+(define_insn "*altivec__shift_const"
+  [(set (match_operand:VSHIFT_MODE 0 "register_operand" "=v")
+   (unspec:VSHIFT_MODE [(match_operand 1 "const_int_operand" "n")]
+   UNSPEC_VECTOR_SHIFT))]
+  "TARGET_P8_VECTOR"
+{
+  if (UINTVAL (operands[1]) <= 15)
+return "vspltisw %0,%1";
+  else if (TARGET_P9_VECTOR)
+return "xxspltib %x0,%1";
+  else
+gcc_unreachable ();
+})
+
 (define_insn "altivec_vsum4ubs"
   [(set (match_operand:V4SI 0 "register_operand" "=v")
 (unspec:V4SI [(match_operand:V16QI 1 "register_operand" "v")
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index cdfd400f639..2b29d45de9e 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -861,6 +861,69 @@
 return op == CONST0_RTX (mode) || op == CONSTM1_RTX (mode);
 })
 
+;; Return 1 if the operand is a V2DI or V4SI const_vector, where each 

[gcc(refs/users/meissner/heads/work176-bugs)] Optimize splat of a V2DF/V2DI extract with constant element

2024-08-19 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:d4e863527f6453d567400381cb3df53fa3c03f22

commit d4e863527f6453d567400381cb3df53fa3c03f22
Author: Michael Meissner 
Date:   Mon Aug 19 14:10:44 2024 -0400

Optimize splat of a V2DF/V2DI extract with constant element

We had optimizations for splat of a vector extract for the other vector
types, but we missed having one for V2DI and V2DF.  This patch adds a
combiner insn to do this optimization.

In looking at the source, we had similar optimizations for V4SI and V4SF
extract and splats, but we missed doing V2DI/V2DF.

Without the patch for the code:

vector long long splat_dup_l_0 (vector long long v)
{
  return __builtin_vec_splats (__builtin_vec_extract (v, 0));
}

the compiler generates (on a little endian power9):

splat_dup_l_0:
mfvsrld 9,34
mtvsrdd 34,9,9
blr

Now it generates:

splat_dup_l_0:
xxpermdi 34,34,34,3
blr

2024-08-19  Michael Meissner  

gcc/

* config/rs6000/vsx.md (vsx_splat_extract_): New insn.

gcc/testsuite/

* gcc.target/powerpc/builtins-1.c: Adjust insn count.
* gcc.target/powerpc/pr99293.c: New test.

Diff:
---
 gcc/config/rs6000/vsx.md  | 18 ++
 gcc/testsuite/gcc.target/powerpc/builtins-1.c |  2 +-
 gcc/testsuite/gcc.target/powerpc/pr99293.c| 22 ++
 3 files changed, 41 insertions(+), 1 deletion(-)

diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 27069d070e1..a7dad219ff2 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -4797,6 +4797,24 @@
   "lxvdsx %x0,%y1"
   [(set_attr "type" "vecload")])
 
+;; Optimize SPLAT of an extract from a V2DF/V2DI vector with a constant element
+(define_insn "*vsx_splat_extract_"
+  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
+   (vec_duplicate:VSX_D
+(vec_select:
+ (match_operand:VSX_D 1 "vsx_register_operand" "wa")
+ (parallel [(match_operand 2 "const_0_to_1_operand" "n")]]
+  "VECTOR_MEM_VSX_P (mode)"
+{
+  int which_word = INTVAL (operands[2]);
+  if (!BYTES_BIG_ENDIAN)
+which_word = 1 - which_word;
+
+  operands[3] = GEN_INT (which_word ? 3 : 0);
+  return "xxpermdi %x0,%x1,%x1,%3";
+}
+  [(set_attr "type" "vecperm")])
+
 ;; V4SI splat support
 (define_insn "vsx_splat_v4si"
   [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa,wa")
diff --git a/gcc/testsuite/gcc.target/powerpc/builtins-1.c 
b/gcc/testsuite/gcc.target/powerpc/builtins-1.c
index 8410a5fd431..4e7e5384675 100644
--- a/gcc/testsuite/gcc.target/powerpc/builtins-1.c
+++ b/gcc/testsuite/gcc.target/powerpc/builtins-1.c
@@ -1035,4 +1035,4 @@ foo156 (vector unsigned short usa)
 /* { dg-final { scan-assembler-times {\mvmrglb\M} 3 } } */
 /* { dg-final { scan-assembler-times {\mvmrgew\M} 4 } } */
 /* { dg-final { scan-assembler-times {\mvsplth|xxsplth\M} 4 } } */
-/* { dg-final { scan-assembler-times {\mxxpermdi\M} 44 } } */
+/* { dg-final { scan-assembler-times {\mxxpermdi\M} 42 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/pr99293.c 
b/gcc/testsuite/gcc.target/powerpc/pr99293.c
new file mode 100644
index 000..20adc1f27f6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr99293.c
@@ -0,0 +1,22 @@
+/* { dg-do compile { target powerpc*-*-* } } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-options "-O2 -mvsx" } */
+
+/* Test for PR 99263, which wants to do:
+   __builtin_vec_splats (__builtin_vec_extract (v, n))
+
+   where v is a V2DF or V2DI vector and n is either 0 or 1.  Previously the
+   compiler would do a direct move to the GPR registers to select the item and 
a
+   direct move from the GPR registers to do the splat.  */
+
+vector long long splat_dup_l_0 (vector long long v)
+{
+  return __builtin_vec_splats (__builtin_vec_extract (v, 0));
+}
+
+vector long long splat_dup_l_1 (vector long long v)
+{
+  return __builtin_vec_splats (__builtin_vec_extract (v, 1));
+}
+
+/* { dg-final { scan-assembler-times "xxpermdi" 2 } } */


[gcc(refs/users/meissner/heads/work176-bugs)] Update ChagneLog.bugs

2024-08-19 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:74a68d9476981f47d53073131b05cfdd28c28a64

commit 74a68d9476981f47d53073131b05cfdd28c28a64
Author: Michael Meissner 
Date:   Mon Aug 19 14:12:25 2024 -0400

Update ChagneLog.bugs

Diff:
---
 gcc/ChangeLog.bugs | 94 +-
 1 file changed, 93 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog.bugs b/gcc/ChangeLog.bugs
index c36f5407481..39f50010e9a 100644
--- a/gcc/ChangeLog.bugs
+++ b/gcc/ChangeLog.bugs
@@ -1,6 +1,98 @@
+ Branch work176-bugs, patch #201 
+
+Optimize splat of a V2DF/V2DI extract with constant element
+
+We had optimizations for splat of a vector extract for the other vector
+types, but we missed having one for V2DI and V2DF.  This patch adds a
+combiner insn to do this optimization.
+
+In looking at the source, we had similar optimizations for V4SI and V4SF
+extract and splats, but we missed doing V2DI/V2DF.
+
+Without the patch for the code:
+
+   vector long long splat_dup_l_0 (vector long long v)
+   {
+ return __builtin_vec_splats (__builtin_vec_extract (v, 0));
+   }
+
+the compiler generates (on a little endian power9):
+
+   splat_dup_l_0:
+   mfvsrld 9,34
+   mtvsrdd 34,9,9
+   blr
+
+Now it generates:
+
+   splat_dup_l_0:
+   xxpermdi 34,34,34,3
+   blr
+
+2024-08-19  Michael Meissner  
+
+gcc/
+
+   * config/rs6000/vsx.md (vsx_splat_extract_): New insn.
+
+gcc/testsuite/
+
+   * gcc.target/powerpc/builtins-1.c: Adjust insn count.
+   * gcc.target/powerpc/pr99293.c: New test.
+
+ Branch work176-bugs, patch #200 
+
+Add better support for shifting vectors with 64-bit elements
+
+This patch fixes PR target/89213 to allow better code to be generated to do
+constant shifts of V2DI/V2DF vectors.  Previously GCC would do constant shifts
+of vectors with 64-bit elements by using:
+
+   XXSPLTIB 32,4
+   VEXTSB2D 0,0
+   VSRAD 2,2,0
+
+I.e., the PowerPC does not have a VSPLTISD instruction to load -15..14 for the
+64-bit shift count in one instruction.  Instead, it would need to load a byte
+and then convert it to 64-bit.
+
+With this patch, GCC now realizes that the vector shift instructions will look
+at the bottom 6 bits for the shift count, and it can use either a VSPLTISW or
+XXSPLTIB instruction to load the shift count.
+
+2024-08-19  Michael Meissner  
+
+gcc/
+
+   PR target/89213
+   * config/rs6000/altivec.md (UNSPEC_VECTOR_SHIFT): New unspec.
+   (VSHIFT_MODE): New mode iterator.
+   (vshift_code): New code iterator.
+   (vshift_attr): New code attribute.
+   (altivec___const): New pattern to optimize
+   vector long long/int shifts by a constant.
+   (altivec__shift_const): New helper insn to load up a
+   constant used by the shift operation.
+   * config/rs6000/predicates.md (vector_shift_constant): New
+   predicate.
+
+gcc/testsuite/
+
+   PR target/89213
+   * gcc.target/powerpc/pr89213.c: New test.
+   * gcc.target/powerpc/vec-rlmi-rlnm.c: Update instruction count.
+
  Branch work176-bugs, baseline 
 
+Add ChangeLog.bugs and update REVISION.
+
+2024-08-16  Michael Meissner  
+
+gcc/
+
+   * ChangeLog.bugs: New file for branch.
+   * REVISION: Update.
+
 2024-08-16   Michael Meissner  
 
Clone branch
-


[gcc(refs/users/meissner/heads/work176-tar)] Remove SPR alternatives for move insns.

2024-08-19 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:1eaca9796aaad6e085323f03fd523d087ee659db

commit 1eaca9796aaad6e085323f03fd523d087ee659db
Author: Michael Meissner 
Date:   Mon Aug 19 14:17:13 2024 -0400

Remove SPR alternatives for move insns.

2024-08-19  Michael Meissner  

* config/rs6000/rs6000.md (mov_internal): Remove alternatives 
for
moving values to/from SPR registers.
(movcc_): Likewise.
(movsf_hardfloat): Likewise.
(movsd_hardfloat): Likewise.
(mov_softfloat): Likewise.
(mov_hardfloat64): Likewise.
(mov_softfloat64): Likewise.

Diff:
---
 gcc/config/rs6000/rs6000.md | 114 +---
 1 file changed, 44 insertions(+), 70 deletions(-)

diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 2c932061b93..16f3cd1ba6b 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -8099,16 +8099,16 @@
 
 ;; MR  LHZ/LBZLXSI*ZXSTH/STBSTXSI*XLI
 ;; XXLOR   load 0 load -1VSPLTI*#  MFVSRWZ
-;; MTVSRWZ MF%1   MT%1   NOP
+;; MTVSRWZ
 (define_insn "*mov_internal"
   [(set (match_operand:QHI 0 "nonimmediate_operand"
"=r,r, wa,m, ?Z,r,
 wa,wa,wa,v, ?v,r,
-wa,r, *c*l,  *h")
+wa")
(match_operand:QHI 1 "input_operand"
"r, m, ?Z,r, wa,i,
 wa,O, wM,wB,wS,wa,
-r, *h,r, 0"))]
+r"))]
   "gpc_reg_operand (operands[0], mode)
|| gpc_reg_operand (operands[1], mode)"
   "@
@@ -8124,22 +8124,19 @@
vspltis %0,%1
#
mfvsrwz %0,%x1
-   mtvsrwz %x0,%1
-   mf%1 %0
-   mt%0 %1
-   nop"
+   mtvsrwz %x0,%1"
   [(set_attr "type"
"*, load,  fpload,store, fpstore,   *,
 vecsimple, vecperm,   vecperm,   vecperm,   vecperm,   mfvsr,
-mtvsr, mfjmpr,mtjmpr,*")
+mtvsr")
(set_attr "length"
"*, *, *, *, *, *,
 *, *, *, *, 8, *,
-*, *, *, *")
+*")
(set_attr "isa"
"*, *, p9v,   *, p9v,   *,
 p9v,   p9v,   p9v,   p9v,   p9v,   p9v,
-p9v,   *, *, *")])
+p9v")])
 
 
 ;; Here is how to move condition codes around.  When we store CC data in
@@ -8155,9 +8152,9 @@
 
 (define_insn "*movcc_"
   [(set (match_operand:CC_any 0 "nonimmediate_operand"
-   "=y,x,?y,y,r,r,r,r, r,*c*l,r,m")
+   "=y,x,?y,y,r,r,r,r,r,m")
(match_operand:CC_any 1 "general_operand"
-   " y,r, r,O,x,y,r,I,*h,   r,m,r"))]
+   " y,r, r,O,x,y,r,I,m,r"))]
   "register_operand (operands[0], mode)
|| register_operand (operands[1], mode)"
   "@
@@ -8169,8 +8166,6 @@
mfcr %0%Q1\;rlwinm %0,%0,%f1,0xf000
mr %0,%1
li %0,%1
-   mf%1 %0
-   mt%0 %1
lwz%U1%X1 %0,%1
stw%U0%X0 %1,%0"
   [(set_attr_alternative "type"
@@ -8184,11 +8179,9 @@
(const_string "mfcrf") (const_string "mfcr"))
   (const_string "integer")
   (const_string "integer")
-  (const_string "mfjmpr")
-  (const_string "mtjmpr")
   (const_string "load")
   (const_string "store")])
-   (set_attr "length" "*,*,12,*,*,8,*,*,*,*,*,*")])
+   (set_attr "length" "*,*,12,*,*,8,*,*,*,*")])
 
 ;; For floating-point, we normally deal with the floating-point registers
 ;; unless -msoft-float is used.  The sole exception is that parameter passing
@@ -8239,17 +8232,17 @@
 ;;
 ;; LWZ  LFSLXSSP   LXSSPX STFS   STXSSP
 ;; STXSSPX  STWXXLXOR  LI FMRXSCPSGNDP
-;; MR   MT  MF   NOPXXSPLTIDP
+;; MR   XXSPLTIDP
 
 (define_insn "movsf_hardfloat"
   [(set (match_operand:SF 0 "nonimmediate_operand"
 "=!r,   f, v,  wa,m, wY,
  Z, m, wa, !r,f, wa,
- !r,*c*l,  !r, *h,wa")
+ !r,wa")
(match_operand:SF 1 "input_operand"
 "m, m, wY, Z, f, v,
  wa,r, j,  j, f, wa,
- r, r, *h, 0, eP"))]
+ r, eP"))]
   "(register_operand (operands[0], SFmode)
|| register_operand (op

[gcc(refs/users/meissner/heads/work176-tar)] Add support for the TAR register.

2024-08-19 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:987957c82206af4bf69175c7438fe9d3de5a3cc0

commit 987957c82206af4bf69175c7438fe9d3de5a3cc0
Author: Michael Meissner 
Date:   Mon Aug 19 14:16:09 2024 -0400

Add support for the TAR register.

2024-08-19  Michael Meissner  

gcc/

* config/rs6000/constraints.md (h constraint): Add TAR register to 
the
documentation.
(wt constraint): New constraint.
* config/rs6000/rs6000-cpus.def (ISA_3_0_MASKS_SERVER): Add -mtar.
(POWERPC_MASKS): Likewise.
* config/rs6000/rs6000.cc (rs6000_reg_names): Add TAR register 
support.
(alt_reg_names): Likewise.
(rs6000_hard_regno_mode_ok_uncached): Restrict SPR registers to only
hold scalar integer modes of an appropriate size.  Add TAR register
support.
(rs6000_debug_reg_global): Print the register class that wt maps 
too.
(rs6000_init_hard_regno_mode_ok): Add TAR register support.
(rs6000_conditional_register_usage): Add TAR register support.
(print_operand): Likewise.
(rs6000_debugger_regno): Likewise.
(rs6000_opt_masks): Add support for -mtar.
* config/rs6000/rs6000.h (FIRST_PSEUDO_REGISTER): Add TAR register
support.
(FIXED_REGISTERS): Likewise.
(CALL_REALLY_USED_REGISTERS): Likewise.
(REG_ALLOC_ORDER): Likewise.
(enum reg_class): Likewise.
(REG_CLASS_NAMES): Likewise.
(REG_CLASS_CONTENTS): Likewise.
(enum r6000_reg_class_enum): Add support for the wt constraint.
* config/rs6000/rs6000.md (TAR_REGNO): New constant.
(call_indirect_nonlocal_sysv): Likewise.
(call_value_indirect_nonlocal_sysv): Likewise.
(call_indirect_aix): Likewise.
(call_value_indirect_aix): Likewise.
(call_indirect_elfv2): Likewise.
(call_indirect_pcrel): Likewise.
(call_value_indirect_elfv2): Likewise.
(call_value_indirect_pcrel): Likewise.
(*sibcall_indirect_nonlocal_sysv): Likewise.
(sibcall_value_indirect_nonlocal_sysv): Likewise.
(indirect_jump): Likewise.
(@indirect_jump_nospec): Likewise.
(@tablejump_insn_normal): Likewise.
(@tablejump_insn_nospec): Likewise.
* config/rs6000/rs6000.opt (-mtar): New option.

gcc/testsuite/

* gcc.target/powerpc/ppc-switch-1.c: Update test for the TAR 
register.
* gcc.target/powerpc/pr51513.c: Likewise.
* gcc.target/powerpc/safe-indirect-jump-2.c: Likewise.
* gcc.target/powerpc/safe-indirect-jump-3.c: Likewise.
* gcc.target/powerpc/tar-register.c: New test.

Diff:
---
 gcc/config/rs6000/constraints.md   |  5 +-
 gcc/config/rs6000/rs6000-cpus.def  |  4 +-
 gcc/config/rs6000/rs6000.cc| 58 +++---
 gcc/config/rs6000/rs6000.h | 31 +++-
 gcc/config/rs6000/rs6000.md| 33 ++--
 gcc/config/rs6000/rs6000.opt   |  4 ++
 gcc/testsuite/gcc.target/powerpc/ppc-switch-1.c|  4 +-
 gcc/testsuite/gcc.target/powerpc/pr51513.c |  4 +-
 .../gcc.target/powerpc/safe-indirect-jump-2.c  |  2 +-
 .../gcc.target/powerpc/safe-indirect-jump-3.c  |  2 +-
 gcc/testsuite/gcc.target/powerpc/tar-register.c| 34 +
 11 files changed, 138 insertions(+), 43 deletions(-)

diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md
index 369a7b75042..14f0465d7ae 100644
--- a/gcc/config/rs6000/constraints.md
+++ b/gcc/config/rs6000/constraints.md
@@ -57,7 +57,7 @@
   "@internal A compatibility alias for @code{wa}.")
 
 (define_register_constraint "h" "SPECIAL_REGS"
-  "@internal A special register (@code{vrsave}, @code{ctr}, or @code{lr}).")
+  "@internal A special register (@code{vrsave}, @code{ctr}, @code{lr} or 
@code{tar}).")
 
 (define_register_constraint "c" "CTR_REGS"
   "The count register, @code{ctr}.")
@@ -91,6 +91,9 @@
   "@internal Like @code{r}, if @option{-mpowerpc64} is used; otherwise,
@code{NO_REGS}.")
 
+(define_register_constraint "wt" "rs6000_constraints[RS6000_CONSTRAINT_wt]"
+  "The tar register, @code{tar}.")
+
 (define_register_constraint "wx" "rs6000_constraints[RS6000_CONSTRAINT_wx]"
   "@internal Like @code{d}, if @option{-mpowerpc-gfxopt} is used; otherwise,
@code{NO_REGS}.")
diff --git a/gcc/config/rs6000/rs6000-cpus.def 
b/gcc/config/rs6000/rs6000-cpus.def
index e73d9ef51f8..a7ecd38f8ee 100644
--- a/gcc/config/rs6000/rs6000-cpus.def
+++ b/gcc/config/rs6000/rs6000-cpus.def
@@ -64,7 +64,8 @@
  | OPTION_MASK_MODULO  \
  | OPTION_MASK_P9_MINMAX   \
  

[gcc(refs/users/meissner/heads/work176-tar)] Update ChangeLog.*

2024-08-19 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:15ff7216870b8c3689c848ab8d42d67598c6337d

commit 15ff7216870b8c3689c848ab8d42d67598c6337d
Author: Michael Meissner 
Date:   Mon Aug 19 14:19:19 2024 -0400

Update ChangeLog.*

Diff:
---
 gcc/ChangeLog.tar | 83 ++-
 1 file changed, 82 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog.tar b/gcc/ChangeLog.tar
index b24eb12cb69..d65db37c865 100644
--- a/gcc/ChangeLog.tar
+++ b/gcc/ChangeLog.tar
@@ -1,6 +1,87 @@
+ Branch work176-tar, patch #301 
+
+Remove SPR alternatives for move insns.
+
+2024-08-19  Michael Meissner  
+
+   * config/rs6000/rs6000.md (mov_internal): Remove alternatives for
+   moving values to/from SPR registers.
+   (movcc_): Likewise.
+   (movsf_hardfloat): Likewise.
+   (movsd_hardfloat): Likewise.
+   (mov_softfloat): Likewise.
+   (mov_hardfloat64): Likewise.
+   (mov_softfloat64): Likewise.
+
+ Branch work176-tar, patch #300 
+
+Add support for the TAR register.
+
+2024-08-19  Michael Meissner  
+
+gcc/
+
+   * config/rs6000/constraints.md (h constraint): Add TAR register to the
+   documentation.
+   (wt constraint): New constraint.
+   * config/rs6000/rs6000-cpus.def (ISA_3_0_MASKS_SERVER): Add -mtar.
+   (POWERPC_MASKS): Likewise.
+   * config/rs6000/rs6000.cc (rs6000_reg_names): Add TAR register support.
+   (alt_reg_names): Likewise.
+   (rs6000_hard_regno_mode_ok_uncached): Restrict SPR registers to only
+   hold scalar integer modes of an appropriate size.  Add TAR register
+   support.
+   (rs6000_debug_reg_global): Print the register class that wt maps too.
+   (rs6000_init_hard_regno_mode_ok): Add TAR register support.
+   (rs6000_conditional_register_usage): Add TAR register support.
+   (print_operand): Likewise.
+   (rs6000_debugger_regno): Likewise.
+   (rs6000_opt_masks): Add support for -mtar.
+   * config/rs6000/rs6000.h (FIRST_PSEUDO_REGISTER): Add TAR register
+   support.
+   (FIXED_REGISTERS): Likewise.
+   (CALL_REALLY_USED_REGISTERS): Likewise.
+   (REG_ALLOC_ORDER): Likewise.
+   (enum reg_class): Likewise.
+   (REG_CLASS_NAMES): Likewise.
+   (REG_CLASS_CONTENTS): Likewise.
+   (enum r6000_reg_class_enum): Add support for the wt constraint.
+   * config/rs6000/rs6000.md (TAR_REGNO): New constant.
+   (call_indirect_nonlocal_sysv): Likewise.
+   (call_value_indirect_nonlocal_sysv): Likewise.
+   (call_indirect_aix): Likewise.
+   (call_value_indirect_aix): Likewise.
+   (call_indirect_elfv2): Likewise.
+   (call_indirect_pcrel): Likewise.
+   (call_value_indirect_elfv2): Likewise.
+   (call_value_indirect_pcrel): Likewise.
+   (*sibcall_indirect_nonlocal_sysv): Likewise.
+   (sibcall_value_indirect_nonlocal_sysv): Likewise.
+   (indirect_jump): Likewise.
+   (@indirect_jump_nospec): Likewise.
+   (@tablejump_insn_normal): Likewise.
+   (@tablejump_insn_nospec): Likewise.
+   * config/rs6000/rs6000.opt (-mtar): New option.
+
+gcc/testsuite/
+
+   * gcc.target/powerpc/ppc-switch-1.c: Update test for the TAR register.
+   * gcc.target/powerpc/pr51513.c: Likewise.
+   * gcc.target/powerpc/safe-indirect-jump-2.c: Likewise.
+   * gcc.target/powerpc/safe-indirect-jump-3.c: Likewise.
+   * gcc.target/powerpc/tar-register.c: New test.
+
  Branch work176-tar, baseline 
 
+Add ChangeLog.tar and update REVISION.
+
+2024-08-16  Michael Meissner  
+
+gcc/
+
+   * ChangeLog.tar: New file for branch.
+   * REVISION: Update.
+
 2024-08-16   Michael Meissner  
 
Clone branch
-


[gcc(refs/users/meissner/heads/work176-vpair)] Add support for vector pair unary and binary operations.

2024-08-19 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:843b0025117e9a3c3a3bb57b5dc05c149b94575e

commit 843b0025117e9a3c3a3bb57b5dc05c149b94575e
Author: Michael Meissner 
Date:   Mon Aug 19 14:21:35 2024 -0400

Add support for vector pair unary and binary operations.

2024-08-19  Michael Meissner  

gcc/

* config/rs6000/rs6000-builtins.def (__builtin_vpair_*): Add new
built-in functions for vector pair support.
* config/rs6000/rs6000-protos.h (enum vpair_split_unary): New
enumeration.
(vpair_split_unary): New declaration.
(vpair_split_binary): Likewise.
* config/rs6000/rs6000.cc (vpair_split_unary): New function to split
vector pair operations.
(vpair_split_binary): Likewise.
* config/rs6000/rs6000.md (toplevel): Include vector-pair.md.
* config/rs6000/t-rs6000 (MD_INCLUDES): Add vector-pair.md.
* config/rs6000/vector-pair.md: New file.
* doc/extend.texi (PowerPC Vector Pair Built-in Functions): Add
documentation for the new vector pair built-in functions.

gcc/testsuite/

* gcc.target/powerpc/vector-pair-1.c: New test.
* gcc.target/powerpc/vector-pair-2.c: Likewise.

Diff:
---
 gcc/config/rs6000/rs6000-builtins.def|  56 
 gcc/config/rs6000/rs6000-protos.h|  12 ++
 gcc/config/rs6000/rs6000.cc  |  67 ++
 gcc/config/rs6000/rs6000.md  |   1 +
 gcc/config/rs6000/t-rs6000   |   1 +
 gcc/config/rs6000/vector-pair.md | 160 +++
 gcc/doc/extend.texi  |  51 
 gcc/testsuite/gcc.target/powerpc/vector-pair-1.c |  87 
 gcc/testsuite/gcc.target/powerpc/vector-pair-2.c |  86 
 9 files changed, 521 insertions(+)

diff --git a/gcc/config/rs6000/rs6000-builtins.def 
b/gcc/config/rs6000/rs6000-builtins.def
index 0e9dc05dbcf..d164ad1b303 100644
--- a/gcc/config/rs6000/rs6000-builtins.def
+++ b/gcc/config/rs6000/rs6000-builtins.def
@@ -3933,3 +3933,59 @@
 
   void __builtin_vsx_stxvp (v256, unsigned long, const v256 *);
 STXVP nothing {mma,pair}
+
+;; Vector pair built-in functions with float elements
+  v256 __builtin_vpair_f32_abs (v256);
+VPAIR_F32_ABS vpair_abs_v8sf2 {mma}
+
+  v256 __builtin_vpair_f32_add (v256, v256);
+VPAIR_F32_ADD vpair_add_v8sf3 {mma}
+
+  v256 __builtin_vpair_f32_div (v256, v256);
+VPAIR_F32_DIV vpair_div_v8sf3 {mma}
+
+  v256 __builtin_vpair_f32_max (v256, v256);
+VPAIR_F32_MAX vpair_smax_v8sf3 {mma}
+
+  v256 __builtin_vpair_f32_min (v256, v256);
+VPAIR_F32_MIN vpair_smin_v8sf3 {mma}
+
+  v256 __builtin_vpair_f32_mul (v256, v256);
+VPAIR_F32_MUL vpair_mul_v8sf3 {mma}
+
+  v256 __builtin_vpair_f32_nabs (v256);
+VPAIR_F32_NABS vpair_nabs_v8sf2 {mma}
+
+  v256 __builtin_vpair_f32_neg (v256);
+VPAIR_F32_NEG vpair_neg_v8sf2 {mma}
+
+  v256 __builtin_vpair_f32_sub (v256, v256);
+VPAIR_F32_SUB vpair_sub_v8sf3 {mma}
+
+;; Vector pair built-in functions with double elements
+  v256 __builtin_vpair_f64_abs (v256);
+VPAIR_F64_ABS vpair_abs_v4df2 {mma}
+
+  v256 __builtin_vpair_f64_add (v256, v256);
+VPAIR_F64_ADD vpair_add_v4df3 {mma}
+
+  v256 __builtin_vpair_f64_div (v256, v256);
+VPAIR_F64_DIV vpair_div_v4df3 {mma}
+
+  v256 __builtin_vpair_f64_max (v256, v256);
+VPAIR_F64_MAX vpair_smax_v4df3 {mma}
+
+  v256 __builtin_vpair_f64_min (v256, v256);
+VPAIR_F64_MIN vpair_smin_v4df3 {mma}
+
+  v256 __builtin_vpair_f64_mul (v256, v256);
+VPAIR_F64_MUL vpair_mul_v4df3 {mma}
+
+  v256 __builtin_vpair_f64_nabs (v256);
+VPAIR_F64_NABS vpair_nabs_v4df2 {mma}
+
+  v256 __builtin_vpair_f64_neg (v256);
+VPAIR_F64_NEG vpair_neg_v4df2 {mma}
+
+  v256 __builtin_vpair_f64_sub (v256, v256);
+VPAIR_F64_SUB vpair_sub_v4df3 {mma}
diff --git a/gcc/config/rs6000/rs6000-protos.h 
b/gcc/config/rs6000/rs6000-protos.h
index da658cd5ab2..7b8b3b0c237 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -161,6 +161,18 @@ extern bool rs6000_pcrel_p (void);
 extern bool rs6000_fndecl_pcrel_p (const_tree);
 extern void rs6000_output_addr_vec_elt (FILE *, int);
 
+/* If we are splitting a vector pair unary operator into two separate vector
+   operations, we need to generate a NEG if this is NABS.  */
+
+enum vpair_split_unary {
+  VPAIR_SPLIT_NORMAL,  /* No extra processing is needed.  */
+  VPAIR_SPLIT_NEGATE   /* Wrap operation with a NEG.  */
+};
+
+extern void vpair_split_unary (rtx [], machine_mode, enum rtx_code,
+  enum vpair_split_unary);
+extern void vpair_split_binary (rtx [], machine_mode, enum rtx_code);
+
 /* Different PowerPC instruction formats that are used by GCC.  There are
various other instruction formats used by the PowerPC hardware, but these
formats are not currently 

[gcc(refs/users/meissner/heads/work176-vpair)] Add support for vector pair fma operations.

2024-08-19 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:64af04ceb49a577e8c263db5e1ac31fb0b8c18b7

commit 64af04ceb49a577e8c263db5e1ac31fb0b8c18b7
Author: Michael Meissner 
Date:   Mon Aug 19 14:22:38 2024 -0400

Add support for vector pair fma operations.

2024-08-19  Michael Meissner  

gcc/

* config/rs6000/rs6000-builtins.def (__builtin_vpair_f32_fma): New
built-in.
(__builtin_vpair_f32_fms): Likewise.
(__builtin_vpair_f32_nfma): Likewise.
(__builtin_vpair_f32_nfms): Likewise.
(__builtin_vpair_f64_fma): Likewise.
(__builtin_vpair_f64_fms): Likewise.
(__builtin_vpair_f64_nfma): Likewise.
* config/rs6000/rs6000/rs6000-proto.h (enum vpair_split_fma): New
enumeration.
(vpair_split_fma): New declaration.
* config/rs6000/rs6000.cc (vpair_split_fma): New function to split
vector pair FMA operations.
* config/rs6000/vector-pair.md (UNSPEC_VPAIR_FMA): New unspec.
(vpair_stdname): Add UNSPEC_VPAIR_FMA.
(VPAIR_OP): Likewise.
(vpair_fma_4): New insns.
(vpair_fms_4): Likewise.
(vpair_nfma_4): Likewise.
(vpair_nfms_4): Likewise.
* doc/extend.texi (PowerPC Vector Pair Built-in Functions): 
Document new
vector pair fma built-in functions.

gcc/testsuite/

* gcc.target/powerpc/vector-pair-3.c: New test.
* gcc.target/powerpc/vector-pair-4.c: Likewise.

Diff:
---
 gcc/config/rs6000/rs6000-builtins.def| 24 ++
 gcc/config/rs6000/rs6000-protos.h| 13 
 gcc/config/rs6000/rs6000.cc  | 71 ++
 gcc/config/rs6000/vector-pair.md | 96 
 gcc/doc/extend.texi  | 25 ++
 gcc/testsuite/gcc.target/powerpc/vector-pair-3.c | 57 ++
 gcc/testsuite/gcc.target/powerpc/vector-pair-4.c | 57 ++
 7 files changed, 343 insertions(+)

diff --git a/gcc/config/rs6000/rs6000-builtins.def 
b/gcc/config/rs6000/rs6000-builtins.def
index d164ad1b303..099f4b6a008 100644
--- a/gcc/config/rs6000/rs6000-builtins.def
+++ b/gcc/config/rs6000/rs6000-builtins.def
@@ -3944,6 +3944,12 @@
   v256 __builtin_vpair_f32_div (v256, v256);
 VPAIR_F32_DIV vpair_div_v8sf3 {mma}
 
+  v256 __builtin_vpair_f32_fma (v256, v256, v256);
+VPAIR_F32_FMA vpair_fma_v8sf4 {mma}
+
+  v256 __builtin_vpair_f32_fms (v256, v256, v256);
+VPAIR_F32_FMS vpair_fms_v8sf4 {mma}
+
   v256 __builtin_vpair_f32_max (v256, v256);
 VPAIR_F32_MAX vpair_smax_v8sf3 {mma}
 
@@ -3959,6 +3965,12 @@
   v256 __builtin_vpair_f32_neg (v256);
 VPAIR_F32_NEG vpair_neg_v8sf2 {mma}
 
+  v256 __builtin_vpair_f32_nfma (v256, v256, v256);
+VPAIR_F32_NFMA vpair_nfma_v8sf4 {mma}
+
+  v256 __builtin_vpair_f32_nfms (v256, v256, v256);
+VPAIR_F32_NFMS vpair_nfms_v8sf4 {mma}
+
   v256 __builtin_vpair_f32_sub (v256, v256);
 VPAIR_F32_SUB vpair_sub_v8sf3 {mma}
 
@@ -3972,6 +3984,12 @@
   v256 __builtin_vpair_f64_div (v256, v256);
 VPAIR_F64_DIV vpair_div_v4df3 {mma}
 
+  v256 __builtin_vpair_f64_fma (v256, v256, v256);
+VPAIR_F64_FMA vpair_fma_v4df4 {mma}
+
+  v256 __builtin_vpair_f64_fms (v256, v256, v256);
+VPAIR_F64_FMS vpair_fms_v4df4 {mma}
+
   v256 __builtin_vpair_f64_max (v256, v256);
 VPAIR_F64_MAX vpair_smax_v4df3 {mma}
 
@@ -3987,5 +4005,11 @@
   v256 __builtin_vpair_f64_neg (v256);
 VPAIR_F64_NEG vpair_neg_v4df2 {mma}
 
+  v256 __builtin_vpair_f64_nfma (v256, v256, v256);
+VPAIR_F64_NFMA vpair_nfma_v4df4 {mma}
+
+  v256 __builtin_vpair_f64_nfms (v256, v256, v256);
+VPAIR_F64_NFMS vpair_nfms_v4df4 {mma}
+
   v256 __builtin_vpair_f64_sub (v256, v256);
 VPAIR_F64_SUB vpair_sub_v4df3 {mma}
diff --git a/gcc/config/rs6000/rs6000-protos.h 
b/gcc/config/rs6000/rs6000-protos.h
index 7b8b3b0c237..bab5fb437c2 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -173,6 +173,19 @@ extern void vpair_split_unary (rtx [], machine_mode, enum 
rtx_code,
   enum vpair_split_unary);
 extern void vpair_split_binary (rtx [], machine_mode, enum rtx_code);
 
+/* When we are splitting a vector pair FMA operation into two vector 
operations, we
+   may need to modify the code generated.  This enumeration encodes the
+   different choices.  */
+
+enum vpair_split_fma {
+  VPAIR_SPLIT_FMA, /* Fused multiply-add.  */
+  VPAIR_SPLIT_FMS, /* Fused multiply-subtract.  */
+  VPAIR_SPLIT_NFMA,/* Fused negate multiply-add.  */
+  VPAIR_SPLIT_NFMS /* Fused negate multiply-subtract.  */
+};
+
+extern void vpair_split_fma (rtx [], machine_mode, enum vpair_split_fma);
+
 /* Different PowerPC instruction formats that are used by GCC.  There are
various other instruction formats used by the PowerPC hardware, but these
formats are

[gcc(refs/users/meissner/heads/work176-vpair)] Add vector pair init and splat.

2024-08-19 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:19677035f937f71409fdf6cc52999ab1e7e0c25f

commit 19677035f937f71409fdf6cc52999ab1e7e0c25f
Author: Michael Meissner 
Date:   Mon Aug 19 14:23:29 2024 -0400

Add vector pair init and splat.

2024-08-19  Michael Meissner  

gcc/

* config/rs6000/rs6000-builtins.def (__builtin_vpair_zero): New
built-in function.
(__builtin_vpair_f32_splat): Likewise.
(__builtin_vpair_f64_splat): Likewise.
* config/rs6000/vector-pair.md (UNSPEC_VPAIR_ZERO): New unspec.
(UNSPEC_VPAIR_SPLAT): Likewise.
(VPAIR_SPLAT_VMODE): New mode iterator.
(VPAIR_SPLAT_ELEMENT_TO_VMODE): New mode attribute.
(vpair_splat_name): Likewise.
(vpair_zero): New insn.
(vpair_splat_): New define_expand.
(vpair_splat__internal): New insns.

gcc/testsuite/

* gcc.target/powerpc/vector-pair-5.c: New test.
* gcc.target/powerpc/vector-pair-6.c: Likewise.

Diff:
---
 gcc/config/rs6000/rs6000-builtins.def|  10 +++
 gcc/config/rs6000/vector-pair.md | 102 ++-
 gcc/doc/extend.texi  |   9 ++
 gcc/testsuite/gcc.target/powerpc/vector-pair-5.c |  56 +
 gcc/testsuite/gcc.target/powerpc/vector-pair-6.c |  56 +
 5 files changed, 232 insertions(+), 1 deletion(-)

diff --git a/gcc/config/rs6000/rs6000-builtins.def 
b/gcc/config/rs6000/rs6000-builtins.def
index 099f4b6a008..b3eaa842f12 100644
--- a/gcc/config/rs6000/rs6000-builtins.def
+++ b/gcc/config/rs6000/rs6000-builtins.def
@@ -3934,6 +3934,10 @@
   void __builtin_vsx_stxvp (v256, unsigned long, const v256 *);
 STXVP nothing {mma,pair}
 
+;; Vector pair built-in functions.
+  v256 __builtin_vpair_zero ();
+VPAIR_ZERO vpair_zero {mma}
+
 ;; Vector pair built-in functions with float elements
   v256 __builtin_vpair_f32_abs (v256);
 VPAIR_F32_ABS vpair_abs_v8sf2 {mma}
@@ -3971,6 +3975,9 @@
   v256 __builtin_vpair_f32_nfms (v256, v256, v256);
 VPAIR_F32_NFMS vpair_nfms_v8sf4 {mma}
 
+  v256 __builtin_vpair_f32_splat (float);
+VPAIR_F32_SPLAT vpair_splat_v8sf {mma}
+
   v256 __builtin_vpair_f32_sub (v256, v256);
 VPAIR_F32_SUB vpair_sub_v8sf3 {mma}
 
@@ -4011,5 +4018,8 @@
   v256 __builtin_vpair_f64_nfms (v256, v256, v256);
 VPAIR_F64_NFMS vpair_nfms_v4df4 {mma}
 
+  v256 __builtin_vpair_f64_splat (double);
+VPAIR_F64_SPLAT vpair_splat_v4df {mma}
+
   v256 __builtin_vpair_f64_sub (v256, v256);
 VPAIR_F64_SUB vpair_sub_v4df3 {mma}
diff --git a/gcc/config/rs6000/vector-pair.md b/gcc/config/rs6000/vector-pair.md
index 73ae46e6d40..39b419c6814 100644
--- a/gcc/config/rs6000/vector-pair.md
+++ b/gcc/config/rs6000/vector-pair.md
@@ -38,7 +38,9 @@
UNSPEC_VPAIR_NEG
UNSPEC_VPAIR_PLUS
UNSPEC_VPAIR_SMAX
-   UNSPEC_VPAIR_SMIN])
+   UNSPEC_VPAIR_SMIN
+   UNSPEC_VPAIR_ZERO
+   UNSPEC_VPAIR_SPLAT])
 
 ;; Vector pair element ID that defines the scaler element within the vector 
pair.
 (define_c_enum "vpair_element"
@@ -98,6 +100,104 @@
 ;; Map the scalar element ID into the appropriate insn type for divide.
 (define_int_attr vpair_divtype [(VPAIR_ELEMENT_FLOAT  "vecfdiv")
(VPAIR_ELEMENT_DOUBLE "vecdiv")])
+
+;; Mode iterator for the vector modes that we provide splat operations for.
+(define_mode_iterator VPAIR_SPLAT_VMODE [V4SF V2DF])
+
+;; Map element mode to 128-bit vector mode for splat operations
+(define_mode_attr VPAIR_SPLAT_ELEMENT_TO_VMODE [(SF "V4SF")
+   (DF "V2DF")])
+
+;; Map either element mode or vector mode into the name for the splat insn.
+(define_mode_attr vpair_splat_name [(SF   "v8sf")
+   (DF   "v4df")
+   (V4SF "v8sf")
+   (V2DF "v4df")])
+
+;; Initialize a vector pair to 0
+(define_insn_and_split "vpair_zero"
+  [(set (match_operand:OO 0 "vsx_register_operand" "=wa")
+   (unspec:OO [(const_int 0)] UNSPEC_VPAIR_ZERO))]
+  "TARGET_MMA"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 1) (match_dup 3))
+   (set (match_dup 2) (match_dup 3))]
+{
+  rtx op0 = operands[0];
+
+  operands[1] = simplify_gen_subreg (V2DFmode, op0, OOmode, 0);
+  operands[2] = simplify_gen_subreg (V2DFmode, op0, OOmode, 16);
+  operands[3] = CONST0_RTX (V2DFmode);
+}
+  [(set_attr "length" "8")
+   (set_attr "type" "vecperm")])
+
+;; Create a vector pair with a value splat'ed (duplicated) to all of the
+;; elements.
+(define_expand "vpair_splat_"
+  [(use (match_operand:OO 0 "vsx_register_operand"))
+   (use (match_operand:SFDF 1 "input_operand"))]
+  "TARGET_MMA"
+{
+  rtx op0 = operands[0];
+  rtx op1 = operands[1];
+  machine_mode element_mode = mode;
+
+  if (op1 == CONST0_RTX (element_mode))
+{
+  emit_insn (gen_vpair_zero (op0));
+  DONE;
+}
+
+  machine_mode vector_mode = mode;

[gcc(refs/users/meissner/heads/work176-vpair)] Add vector pair optimizations.

2024-08-19 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:a2ecfd5d0a177a8c3cb187fafdb807e98a4b58a8

commit a2ecfd5d0a177a8c3cb187fafdb807e98a4b58a8
Author: Michael Meissner 
Date:   Mon Aug 19 14:24:38 2024 -0400

Add vector pair optimizations.

2024-08-19  Michael Meissner  

gcc/

* config/rs6000/vector-pair.md (vpair_add_neg_3): 
New
combiner insn to convert vector plus/neg into a minus operation.
(vpair_fma__merge): Optimize multiply, 
add/subtract, and
negation into fma operations if the user specifies to create fmas.
(vpair_fma__merge): Likewise.
(vpair_fma__merge2): Likewise.
(vpair_nfma__merge): Likewise.
(vpair_nfms__merge): Likewise.
(vpair_nfms__merge2): Likewise.

gcc/testsuite/

* gcc.target/powerpc/vector-pair-7.c: New test.
* gcc.target/powerpc/vector-pair-8.c: Likewise.
* gcc.target/powerpc/vector-pair-9.c: Likewise.
* gcc.target/powerpc/vector-pair-10.c: Likewise.
* gcc.target/powerpc/vector-pair-11.c: Likewise.
* gcc.target/powerpc/vector-pair-12xs.c: Likewise.

Diff:
---
 gcc/config/rs6000/vector-pair.md  | 224 ++
 gcc/testsuite/gcc.target/powerpc/vector-pair-10.c |  61 ++
 gcc/testsuite/gcc.target/powerpc/vector-pair-11.c |  65 +++
 gcc/testsuite/gcc.target/powerpc/vector-pair-12.c |  65 +++
 gcc/testsuite/gcc.target/powerpc/vector-pair-7.c  |  18 ++
 gcc/testsuite/gcc.target/powerpc/vector-pair-8.c  |  18 ++
 gcc/testsuite/gcc.target/powerpc/vector-pair-9.c  |  61 ++
 7 files changed, 512 insertions(+)

diff --git a/gcc/config/rs6000/vector-pair.md b/gcc/config/rs6000/vector-pair.md
index 39b419c6814..7a81acbdc05 100644
--- a/gcc/config/rs6000/vector-pair.md
+++ b/gcc/config/rs6000/vector-pair.md
@@ -261,6 +261,31 @@
(set (attr "type") (if_then_else (match_test " == DIV")
(const_string "")
(const_string "")))])
+
+;; Optimize vector pair add of a negative value into a subtract.
+(define_insn_and_split "*vpair_add_neg_3"
+  [(set (match_operand:OO 0 "vsx_register_operand" "=wa")
+   (unspec:OO
+[(match_operand:OO 1 "vsx_register_operand" "wa")
+ (unspec:OO
+  [(match_operand:OO 2 "vsx_register_operand" "wa")
+   (const_int VPAIR_FP_ELEMENT)]
+  UNSPEC_VPAIR_NEG)
+ (const_int VPAIR_FP_ELEMENT)]
+VPAIR_FP_BINARY))]
+  "TARGET_MMA"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+   (unspec:OO
+[(match_dup 1)
+ (match_dup 2)
+ (const_int VPAIR_FP_ELEMENT)]
+UNSPEC_VPAIR_MINUS))]
+{
+}
+  [(set_attr "length" "8")
+   (set_attr "type" "")])
 
 ;; Vector pair fused-multiply (FMA) operations.  The last argument in the
 ;; UNSPEC is a CONST_INT which identifies what the scalar element is.
@@ -354,3 +379,202 @@
 }
   [(set_attr "length" "8")
(set_attr "type" "")])
+
+;; Optimize vector pair multiply and vector pair add into vector pair fma,
+;; providing the compiler would do this optimization for scalar and vectors.
+;; Unlike most of the define_insn_and_splits, this can be done before register
+;; allocation.
+(define_insn_and_split "*vpair_fma__merge"
+  [(set (match_operand:OO 0 "vsx_register_operand" "=wa,wa")
+   (unspec:OO
+[(unspec:OO
+  [(match_operand:OO 1 "vsx_register_operand" "%wa,wa")
+   (match_operand:OO 2 "vsx_register_operand" "wa,0")
+   (const_int VPAIR_FP_ELEMENT)]
+  UNSPEC_VPAIR_MULT)
+ (match_operand:OO 3 "vsx_register_operand" "0,wa")
+ (const_int VPAIR_FP_ELEMENT)]
+UNSPEC_VPAIR_PLUS))]
+  "TARGET_MMA && flag_fp_contract_mode == FP_CONTRACT_FAST"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+   (unspec:OO
+[(match_dup 1)
+ (match_dup 2)
+ (match_dup 3)
+ (const_int VPAIR_FP_ELEMENT)]
+UNSPEC_VPAIR_FMA))]
+{
+}
+  [(set_attr "length" "8")
+   (set_attr "type" "")])
+
+;; Merge multiply and subtract.
+(define_insn_and_split "*vpair_fma__merge"
+  [(set (match_operand:OO 0 "vsx_register_operand" "=wa,wa")
+   (unspec:OO
+[(unspec:OO
+  [(match_operand:OO 1 "vsx_register_operand" "%wa,wa")
+   (match_operand:OO 2 "vsx_register_operand" "wa,0")
+   (const_int VPAIR_FP_ELEMENT)]
+  UNSPEC_VPAIR_MULT)
+ (match_operand:OO 3 "vsx_register_operand" "0,wa")
+ (const_int VPAIR_FP_ELEMENT)]
+UNSPEC_VPAIR_MINUS))]
+  "TARGET_MMA && flag_fp_contract_mode == FP_CONTRACT_FAST"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+   (unspec:OO
+[(match_dup 1)
+ (match_dup 2)
+ (unspec:OO
+  [(match_dup 3)
+   (const_int VPAIR_FP_ELEMENT)]
+  UNSPEC_VPAIR_NEG)
+ (const_int VPAIR_FP_ELEMENT)]
+UNSPEC_VPAIR_FMA))]
+{
+}
+  [(set_attr "length" "8")
+   (set_attr "type" "

[gcc(refs/users/meissner/heads/work176-vpair)] Update ChangeLog.*

2024-08-19 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:83e3e4b42b551cbf6e544a2d1fb4bf63f8d95d0c

commit 83e3e4b42b551cbf6e544a2d1fb4bf63f8d95d0c
Author: Michael Meissner 
Date:   Mon Aug 19 14:27:08 2024 -0400

Update ChangeLog.*

Diff:
---
 gcc/ChangeLog.vpair | 127 +++-
 1 file changed, 126 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog.vpair b/gcc/ChangeLog.vpair
index 2157c2b62b0..27b76e33c8e 100644
--- a/gcc/ChangeLog.vpair
+++ b/gcc/ChangeLog.vpair
@@ -1,6 +1,131 @@
+ Branch work176-vpair, patch #403 
+
+Add vector pair optimizations.
+
+2024-08-19  Michael Meissner  
+
+gcc/
+
+   * config/rs6000/vector-pair.md (vpair_add_neg_3): New
+   combiner insn to convert vector plus/neg into a minus operation.
+   (vpair_fma__merge): Optimize multiply, add/subtract, and
+   negation into fma operations if the user specifies to create fmas.
+   (vpair_fma__merge): Likewise.
+   (vpair_fma__merge2): Likewise.
+   (vpair_nfma__merge): Likewise.
+   (vpair_nfms__merge): Likewise.
+   (vpair_nfms__merge2): Likewise.
+
+gcc/testsuite/
+
+   * gcc.target/powerpc/vector-pair-7.c: New test.
+   * gcc.target/powerpc/vector-pair-8.c: Likewise.
+   * gcc.target/powerpc/vector-pair-9.c: Likewise.
+   * gcc.target/powerpc/vector-pair-10.c: Likewise.
+   * gcc.target/powerpc/vector-pair-11.c: Likewise.
+   * gcc.target/powerpc/vector-pair-12xs.c: Likewise.
+
+ Branch work176-vpair, patch #402 
+
+Add vector pair init and splat.
+
+2024-08-19  Michael Meissner  
+
+gcc/
+
+   * config/rs6000/rs6000-builtins.def (__builtin_vpair_zero): New
+   built-in function.
+   (__builtin_vpair_f32_splat): Likewise.
+   (__builtin_vpair_f64_splat): Likewise.
+   * config/rs6000/vector-pair.md (UNSPEC_VPAIR_ZERO): New unspec.
+   (UNSPEC_VPAIR_SPLAT): Likewise.
+   (VPAIR_SPLAT_VMODE): New mode iterator.
+   (VPAIR_SPLAT_ELEMENT_TO_VMODE): New mode attribute.
+   (vpair_splat_name): Likewise.
+   (vpair_zero): New insn.
+   (vpair_splat_): New define_expand.
+   (vpair_splat__internal): New insns.
+
+gcc/testsuite/
+
+   * gcc.target/powerpc/vector-pair-5.c: New test.
+   * gcc.target/powerpc/vector-pair-6.c: Likewise.
+
+ Branch work176-vpair, patch #401 
+
+Add support for vector pair fma operations.
+
+2024-08-19  Michael Meissner  
+
+gcc/
+
+   * config/rs6000/rs6000-builtins.def (__builtin_vpair_f32_fma): New
+   built-in.
+   (__builtin_vpair_f32_fms): Likewise.
+   (__builtin_vpair_f32_nfma): Likewise.
+   (__builtin_vpair_f32_nfms): Likewise.
+   (__builtin_vpair_f64_fma): Likewise.
+   (__builtin_vpair_f64_fms): Likewise.
+   (__builtin_vpair_f64_nfma): Likewise.
+   * config/rs6000/rs6000/rs6000-proto.h (enum vpair_split_fma): New
+   enumeration.
+   (vpair_split_fma): New declaration.
+   * config/rs6000/rs6000.cc (vpair_split_fma): New function to split
+   vector pair FMA operations.
+   * config/rs6000/vector-pair.md (UNSPEC_VPAIR_FMA): New unspec.
+   (vpair_stdname): Add UNSPEC_VPAIR_FMA.
+   (VPAIR_OP): Likewise.
+   (vpair_fma_4): New insns.
+   (vpair_fms_4): Likewise.
+   (vpair_nfma_4): Likewise.
+   (vpair_nfms_4): Likewise.
+   * doc/extend.texi (PowerPC Vector Pair Built-in Functions): Document new
+   vector pair fma built-in functions.
+
+gcc/testsuite/
+
+   * gcc.target/powerpc/vector-pair-3.c: New test.
+   * gcc.target/powerpc/vector-pair-4.c: Likewise.
+
+ Branch work176-vpair, patch #400 
+
+Add support for vector pair unary and binary operations.
+
+2024-08-19  Michael Meissner  
+
+gcc/
+
+   * config/rs6000/rs6000-builtins.def (__builtin_vpair_*): Add new
+   built-in functions for vector pair support.
+   * config/rs6000/rs6000-protos.h (enum vpair_split_unary): New
+   enumeration.
+   (vpair_split_unary): New declaration.
+   (vpair_split_binary): Likewise.
+   * config/rs6000/rs6000.cc (vpair_split_unary): New function to split
+   vector pair operations.
+   (vpair_split_binary): Likewise.
+   * config/rs6000/rs6000.md (toplevel): Include vector-pair.md.
+   * config/rs6000/t-rs6000 (MD_INCLUDES): Add vector-pair.md.
+   * config/rs6000/vector-pair.md: New file.
+   * doc/extend.texi (PowerPC Vector Pair Built-in Functions): Add
+   documentation for the new vector pair built-in functions.
+
+gcc/testsuite/
+
+   * gcc.target/powerpc/vector-pair-1.c: New test.
+   * gcc.target/powerpc/vector-pair-2.c: Likewise.
+
  Branch work176-vpair, baseline 
 
+Add ChangeLog.vpair and update REVISION.
+
+2024-08-16  Michael Meissner  
+
+gcc/
+
+   * ChangeLog.vpair: New file for branch.
+   * REVISION: U

[gcc(refs/users/meissner/heads/work176-libs)] Do not build IEEE 128-bit libgfortran support if VSX is not available.

2024-08-19 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:5a17794170f2ae8a3e81f3d4c7e62e5f6abbeeaa

commit 5a17794170f2ae8a3e81f3d4c7e62e5f6abbeeaa
Author: Michael Meissner 
Date:   Mon Aug 19 14:32:54 2024 -0400

Do not build IEEE 128-bit libgfortran support if VSX is not available.

If you build a little endian compiler and select a default CPU of power5
(i.e. --with-cpu=power5), GCC cannot be built.  The reason is that both the
libgfortran and libstdc++-v3 libraries assume that all little endian powerpc
builds support IEEE 128-bit floating point.

However, if the default cpu does not support the VSX instruction set, then 
we
cannot build the IEEE 128-bit libraries.  This patch fixes the libgfortran
library so if the GCC compiler does not support IEEE 128-bit floating 
point, the
IEEE 128-bit floating point libraries are not built.  A companion patch 
will fix
the libstdc++-v3 library.

I have built these patches on a little endian system, doing both normal 
builds,
and making a build with a power5 default.  There was no regression in the 
normal
builds.  I have also built a big endian GCC compiler and there was no 
regression
there.  Can I check this patch into the trunk?

2024-08-19  Michael Meissner  

libgfortran/

PR target/115800
* configure.ac (powerpc64le*-linux*): Check to see that the compiler
uses VSX before enabling IEEE 128-bit support.
* configure: Regenerate.
* kinds-override.h (GFC_REAL_17): Add check for __VSX__.
* libgfortran.h (POWER_IEEE128): Likewise.

Diff:
---
 libgfortran/configure| 7 +--
 libgfortran/configure.ac | 3 +++
 libgfortran/kinds-override.h | 2 +-
 libgfortran/libgfortran.h| 2 +-
 4 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/libgfortran/configure b/libgfortran/configure
index 11a1bc5f070..2708e5c7eca 100755
--- a/libgfortran/configure
+++ b/libgfortran/configure
@@ -5981,6 +5981,9 @@ if test "x$GCC" = "xyes"; then
 #if __SIZEOF_LONG_DOUBLE__ != 16
   #error long double is double
   #endif
+  #if !defined(__VSX__)
+  #error VSX is not available
+  #endif
 int
 main ()
 {
@@ -12847,7 +12850,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<_LT_EOF
-#line 12850 "configure"
+#line 12853 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -12953,7 +12956,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<_LT_EOF
-#line 12956 "configure"
+#line 12959 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
diff --git a/libgfortran/configure.ac b/libgfortran/configure.ac
index cca1ea0ea97..cfaeb9717ab 100644
--- a/libgfortran/configure.ac
+++ b/libgfortran/configure.ac
@@ -148,6 +148,9 @@ if test "x$GCC" = "xyes"; then
   AC_PREPROC_IFELSE(
 [AC_LANG_PROGRAM([[#if __SIZEOF_LONG_DOUBLE__ != 16
   #error long double is double
+  #endif
+  #if !defined(__VSX__)
+  #error VSX is not available
   #endif]],
  [[(void) 0;]])],
 [AM_FCFLAGS="$AM_FCFLAGS -mabi=ibmlongdouble -mno-gnu-attribute";
diff --git a/libgfortran/kinds-override.h b/libgfortran/kinds-override.h
index f6b4956c5ca..51f440e5323 100644
--- a/libgfortran/kinds-override.h
+++ b/libgfortran/kinds-override.h
@@ -30,7 +30,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If 
not, see
 #endif
 
 /* Keep these conditions on one line so grep can filter it out.  */
-#if defined(__powerpc64__)  && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__  && 
__SIZEOF_LONG_DOUBLE__ == 16
+#if defined(__powerpc64__)  && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__  && 
__SIZEOF_LONG_DOUBLE__ == 16 && defined(__VSX__)
 typedef _Float128 GFC_REAL_17;
 typedef _Complex _Float128 GFC_COMPLEX_17;
 #define HAVE_GFC_REAL_17
diff --git a/libgfortran/libgfortran.h b/libgfortran/libgfortran.h
index effa3732c18..70db350ba01 100644
--- a/libgfortran/libgfortran.h
+++ b/libgfortran/libgfortran.h
@@ -104,7 +104,7 @@ typedef off_t gfc_offset;
 #endif
 
 #if defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ \
-&& defined __GLIBC_PREREQ
+&& defined __GLIBC_PREREQ && defined(__VSX__)
 #if __GLIBC_PREREQ (2, 32)
 #define POWER_IEEE128 1
 #endif


[gcc(refs/users/meissner/heads/work176-libs)] Do not build IEEE 128-bit libstdc++ support if VSX is not available.

2024-08-19 Thread Michael Meissner via Libstdc++-cvs
https://gcc.gnu.org/g:c698591d93c802faf42befb1cfdbf7da1e599373

commit c698591d93c802faf42befb1cfdbf7da1e599373
Author: Michael Meissner 
Date:   Mon Aug 19 14:34:16 2024 -0400

Do not build IEEE 128-bit libstdc++ support if VSX is not available.

If you build a little endian compiler and select a default CPU of power5
(i.e. --with-cpu=power5), GCC cannot be built.  The reason is that both the
libgfortran and libstdc++-v3 libraries assume that all little endian powerpc
builds support IEEE 128-bit floating point.

However, if the default cpu does not support the VSX instruction set, then 
we
cannot build the IEEE 128-bit libraries.  This patch fixes the libstdc++-v3
library so if the GCC compiler does not support IEEE 128-bit floating 
point, the
IEEE 128-bit floating point libraries are not built.  A companion patch 
will fix
the libgfortran library.

I have built these patches on a little endian system, doing both normal 
builds,
and making a build with a power5 default.  There was no regression in the 
normal
builds.  I have also built a big endian GCC compiler and there was no 
regression
there.  Can I check this patch into the trunk?

2024-08-19  Michael Meissner  

libstdc++-v3/

PR target/115800
* configure.ac (powerpc*-*-linux*): Don't enable IEEE 128-bit on 
PowerPC
systems without VSX.
* configure: Regenerate.
* numeric_traits.h: Don't enable IEEE 128-bit on PowerPC systems 
without
VSX.

Diff:
---
 libstdc++-v3/configure| 68 ++-
 libstdc++-v3/configure.ac | 58 --
 libstdc++-v3/include/ext/numeric_traits.h |  2 +-
 3 files changed, 86 insertions(+), 42 deletions(-)

diff --git a/libstdc++-v3/configure b/libstdc++-v3/configure
index 305675eaa1e..bafe1c4d099 100755
--- a/libstdc++-v3/configure
+++ b/libstdc++-v3/configure
@@ -51379,8 +51379,31 @@ $as_echo "#define _GLIBCXX_LONG_DOUBLE_COMPAT 1" 
>>confdefs.h
 case "$target" in
   powerpc*-*-linux*)
LONG_DOUBLE_COMPAT_FLAGS="$LONG_DOUBLE_COMPAT_FLAGS -mno-gnu-attribute"
-# Check for IEEE128 support in libm:
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __frexpieee128 
in -lm" >&5
+   # Eliminate little endian systems without VSX
+   cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+ #ifndef __VSX__
+ #error "IEEE 128-bit needs VSX"
+ #endif
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  ac_ieee128_possible=yes
+else
+  ac_ieee128_possible=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+   if test $ac_ieee128_possible = yes; then
+  # Check for IEEE128 support in libm:
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for __frexpieee128 
in -lm" >&5
 $as_echo_n "checking for __frexpieee128 in -lm... " >&6; }
 if ${ac_cv_lib_m___frexpieee128+:} false; then :
   $as_echo_n "(cached) " >&6
@@ -51425,18 +51448,18 @@ else
   ac_ldbl_ieee128_in_libc=no
 fi
 
-if test $ac_ldbl_ieee128_in_libc = yes; then
-  # Determine which long double format is the compiler's default:
-  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+  if test $ac_ldbl_ieee128_in_libc = yes; then
+# Determine which long double format is the compiler's default:
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
 /* end confdefs.h.  */
 
 int
 main ()
 {
 
-#ifndef __LONG_DOUBLE_IEEE128__
-#error compiler defaults to ibm128
-#endif
+  #ifndef __LONG_DOUBLE_IEEE128__
+  #error compiler defaults to ibm128
+  #endif
 
   ;
   return 0;
@@ -51448,21 +51471,28 @@ else
   ac_ldbl_ieee128_default=no
 fi
 rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
-  # Library objects should use default long double format.
-  if test "$ac_ldbl_ieee128_default" = yes; then
-LONG_DOUBLE_128_FLAGS="-mno-gnu-attribute"
-# Except for the ones that explicitly use these flags:
-LONG_DOUBLE_ALT128_COMPAT_FLAGS="-mabi=ibmlongdouble 
-mno-gnu-attribute -Wno-psabi"
-  else
-LONG_DOUBLE_128_FLAGS="-mno-gnu-attribute"
-LONG_DOUBLE_ALT128_COMPAT_FLAGS="-mabi=ieeelongdouble 
-mno-gnu-attribute -Wno-psabi"
-  fi
+# Library objects should use default long double format.
+if test "$ac_ldbl_ieee128_default" = yes; then
+  LONG_DOUBLE_128_FLAGS="-mno-gnu-attribute"
+  # Except for the ones that explicitly use these flags:
+  LONG_DOUBLE_ALT128_COMPAT_FLAGS="-mabi=ibmlongdouble 
-mno-gnu-attribute -Wno-psabi"
+else
+  LONG_DOUBLE_128_FLAGS="-mno-gnu-attribute"
+  LONG_DOUBLE_ALT128_COMPAT_F

[gcc(refs/users/meissner/heads/work176-libs)] Do not add -mvsx when building libgcc float128 support.

2024-08-19 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:18edb686452a6217166adc336a42eec1619e5cf0

commit 18edb686452a6217166adc336a42eec1619e5cf0
Author: Michael Meissner 
Date:   Mon Aug 19 14:38:05 2024 -0400

Do not add -mvsx when building libgcc float128 support.

Currently, we add -mvsx when building the float128 support in libgcc.  This
allows us to build the float128 support on a big endian system where the
default cpu is power4.  While the libgcc support can be built, given there 
is
no glibc support for float128 available.

In the past, we would add -mvsx when building the float128 support in 
libgcc.
This allowed us to build the float128 support on a big endian system where 
the
default cpu is power4.  While the libgcc support can be built, given there 
is no
glibc support for float128 available.

However, adding -mvsx and building the libgcc float128 support causes 
problems
if you set the default cpu to something like a 7540, which does not have VSX
support.  The assembler complains that when the code does a ".machine 
7450", you
cannot use VSX instructions.

With these patches, the float128 libgcc support is only built if the default
compiler has VSX support.  If somebody wanted to enable the glibc support 
for
big endian, they would need to set the base cpu to power8 to enable 
building the
libgcc float128 libraries.

I built little endian compilers and there were no regressions.

I built big endian compilers with the --with-cpu=power5 configure option, 
and I
verified that none of the float128 support functions are built.

I also built big endian compilers on a power9 with the --with-cpu=native
configure option, and I verified that the float128 support functions were
built, since the default compiler used the VSX instruction set.

I verified that on both sets of big endian builds, that all of the float128
tests were skipped, since there is no support for float128 in glibc and the 
GCC
compiler does not enable float128 on those systems.

Can I check these patches into the trunk assuming the original bugzilla 
author
says they fix the problem?

2024-08-19 Michael Meissner  

libgcc/

PR target/115800
PR target/113652
* config.host (powerpc*-*-linux*): Do not add t-float128-hw or
t-float128-p10-hw if the default compiler does not support float128.
* config/rs6000/t-float128 (FP128_CFLAGS_SW): Do not add -mvsx when
building the basic float128 support.
* config/rs6000/t-float128-hw (FP128_CFLAGS_HW): Likewise.
* config/rs6000/t-float128-p10-hw (FP128_3_1_CFLAGS_HW): Likewise.
* configure.ac (powerpc*-*-linux*): Do not add -mvsx when testing
whether to build the float128 support.
* configure: Regenerate.

Diff:
---
 libgcc/config.host | 12 ++--
 libgcc/config/rs6000/t-float128|  8 +++-
 libgcc/config/rs6000/t-float128-hw |  3 +--
 libgcc/config/rs6000/t-float128-p10-hw |  3 +--
 libgcc/configure   |  8 +++-
 libgcc/configure.ac|  8 +++-
 6 files changed, 29 insertions(+), 13 deletions(-)

diff --git a/libgcc/config.host b/libgcc/config.host
index 9fae51d4ce7..261b08859a4 100644
--- a/libgcc/config.host
+++ b/libgcc/config.host
@@ -1292,14 +1292,14 @@ powerpc*-*-linux*)
 
if test $libgcc_cv_powerpc_float128 = yes; then
tmake_file="${tmake_file} rs6000/t-float128"
-   fi
 
-   if test $libgcc_cv_powerpc_float128_hw = yes; then
-   tmake_file="${tmake_file} rs6000/t-float128-hw"
-   fi
+   if test $libgcc_cv_powerpc_float128_hw = yes; then
+   tmake_file="${tmake_file} rs6000/t-float128-hw"
 
-   if test $libgcc_cv_powerpc_3_1_float128_hw = yes; then
-   tmake_file="${tmake_file} rs6000/t-float128-p10-hw"
+   if test $libgcc_cv_powerpc_3_1_float128_hw = yes; then
+   tmake_file="${tmake_file} 
rs6000/t-float128-p10-hw"
+   fi
+   fi
fi
 
extra_parts="$extra_parts ecrti.o ecrtn.o ncrti.o ncrtn.o"
diff --git a/libgcc/config/rs6000/t-float128 b/libgcc/config/rs6000/t-float128
index b09b5664af0..93e78adcd62 100644
--- a/libgcc/config/rs6000/t-float128
+++ b/libgcc/config/rs6000/t-float128
@@ -74,7 +74,13 @@ fp128_includes   = $(srcdir)/soft-fp/double.h \
  $(srcdir)/soft-fp/soft-fp.h
 
 # Build the emulator without ISA 3.0 hardware support.
-FP128_CFLAGS_SW = -Wno-type-limits -mvsx -mfloat128 \
+#
+# In the past we added -mvsx to build the float128 specific libraries with the
+# VSX instruction set.  This allowed the big endian GCC on server platforms to
+# build the float128 support.  However, is causes problems 

[gcc(refs/users/meissner/heads/work176-libs)] Do not add -mvsx when testing the float128 support.

2024-08-19 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:a10b0df9be3c909e5adc0d13fe2f4c89e465a8e6

commit a10b0df9be3c909e5adc0d13fe2f4c89e465a8e6
Author: Michael Meissner 
Date:   Mon Aug 19 14:42:27 2024 -0400

Do not add -mvsx when testing the float128 support.

Currently, we add -mvsx when building the float128 support in libgcc.  This
allows us to build the float128 support on a big endian system where the
default cpu is power4.  While the libgcc support can be built, given there 
is
no glibc support for float128 available.

However, adding -mvsx and building the libgcc float128 support causes 
problems
if you set the default cpu to something like a 7540, which does not have VSX
support.  The assembler complains that when the code does a ".machine 
7450", you
cannot use VSX instructions.

After patching libgcc to not build the float128 support unless the host can
support float128 normally, this patch changes the GCC tests so that it will 
only
do the IEEE 128-bit tests if the default compiler enables the VSX 
instruction
set by default.  Otherwise all of the float128 tests will fail because the
libgcc support is not available.

In addition to not doing the float128 tests when the compiler does not 
natively
support float128, this patch also removes adding -mvsx, -mfloat128, and
-mfloat128-hardware enable the support if the compiler did not natively 
enable
it.

I built little endian compilers and there were no regressions.

I built big endian compilers with the --with-cpu=power5 configure option, 
and I
verified that none of the float128 support functions are built.

I also built big endian compilers on a power9 with the --with-cpu=native
configure option, and I verified that the float128 support functions were
built, since the default compiler used the VSX instruction set.

I verified that on both sets of big endian builds, that all of the float128
tests were skipped, since there is no support for float128 in glibc and the 
GCC
compiler does not enable float128 on those systems.

Can I check these patches into the trunk assuming the original bugzilla 
author
says they fix the problem?

2024-08-19 Michael Meissner  

gcc/testsuite/

PR target/115800
PR target/113652
* gcc.target/powerpc/abs128-1.c: Remove adding -mvsx, -mfloat128, 
and
-mfloat128-hardware options to float128 test.  Add explicit checks 
for
the float128 support, rather than just using VSX as a stand in, or
assuming we can silently enable VSX if the default is power4.  For
pr99708.c, also use the correct spelling to disable the float128 
tests.
* gcc.target/powerpc/bfp/scalar-insert-exp-16.c: Likewise.
* gcc.target/powerpc/copysign128-1.c: Likewise.
* gcc.target/powerpc/divkc3-1.c: Likewise.
* gcc.target/powerpc/float128-3.c: Likewise.
* gcc.target/powerpc/float128-5.c: Likewise.
* gcc.target/powerpc/float128-complex-2.: Likewise.
* gcc.target/powerpc/float128-math.: Likewise.
* gcc.target/powerpc/inf128-1.: Likewise.
* gcc.target/powerpc/mulkc3-1.c: Likewise.
* gcc.target/powerpc/nan128-1.c: Likewise.
* gcc.target/powerpc/p9-lxvx-stxvx-3.: Likewise.
* gcc.target/powerpc/pr104253.: Likewise.
* gcc.target/powerpc/pr70669.c: Likewise.
* gcc.target/powerpc/pr79004.c: Likewise.
* gcc.target/powerpc/pr79038-1.c: Likewise.
* gcc.target/powerpc/pr81959.c: Likewise.
* gcc.target/powerpc/pr85657-1.: Likewise.
* gcc.target/powerpc/pr85657-2.c: Likewise.
* gcc.target/powerpc/pr99708.: Likewise.
* gcc.target/powerpc/signbit-1.c: Likewise.
* gcc.target/powerpc/signbit-2.c: Likewise.
* lib/target-supports.exp (check_ppc_float128_sw_available): 
Likewise.
(check_ppc_float128_hw_available): Likewise.
(check_effective_target_ppc_ieee128_ok): Likewise.
(add_options_for___float128): Likewise.
(check_effective_target___float128): Likewise.
(check_effective_target_base_quadfloat_support): Likewise.
(check_effective_target_powerpc_float128_sw_ok): Likewise.
(check_effective_target_powerpc_float128_hw_ok): Likewise.

Diff:
---
 gcc/testsuite/gcc.target/powerpc/abs128-1.c|  3 ++-
 .../gcc.target/powerpc/bfp/scalar-insert-exp-16.c  |  1 +
 gcc/testsuite/gcc.target/powerpc/copysign128-1.c   |  3 ++-
 gcc/testsuite/gcc.target/powerpc/divkc3-1.c|  2 +-
 gcc/testsuite/gcc.target/powerpc/float128-3.c  |  3 ++-
 gcc/testsuite/gcc.target/powerpc/float128-5.c  |  3 ++-
 .../gcc.target/powerpc/float128-complex-2.c|  2 +-
 gcc/testsuite/gcc.target/powerpc/float128-math.c   |  2 +-

[gcc(refs/users/meissner/heads/work176-libs)] Update ChangeLog.*

2024-08-19 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:6a2d250899a3691fe55dddf3cf671d49de2d88bd

commit 6a2d250899a3691fe55dddf3cf671d49de2d88bd
Author: Michael Meissner 
Date:   Mon Aug 19 14:44:50 2024 -0400

Update ChangeLog.*

Diff:
---
 gcc/ChangeLog.libs | 210 -
 1 file changed, 209 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog.libs b/gcc/ChangeLog.libs
index c571465904c..fafa573868d 100644
--- a/gcc/ChangeLog.libs
+++ b/gcc/ChangeLog.libs
@@ -1,6 +1,214 @@
+ Branch work176-libs, patch #503 
+
+Do not add -mvsx when testing the float128 support.
+
+Currently, we add -mvsx when building the float128 support in libgcc.  This
+allows us to build the float128 support on a big endian system where the
+default cpu is power4.  While the libgcc support can be built, given there is
+no glibc support for float128 available.
+
+However, adding -mvsx and building the libgcc float128 support causes problems
+if you set the default cpu to something like a 7540, which does not have VSX
+support.  The assembler complains that when the code does a ".machine 7450", 
you
+cannot use VSX instructions.
+
+After patching libgcc to not build the float128 support unless the host can
+support float128 normally, this patch changes the GCC tests so that it will 
only
+do the IEEE 128-bit tests if the default compiler enables the VSX instruction
+set by default.  Otherwise all of the float128 tests will fail because the
+libgcc support is not available.
+
+In addition to not doing the float128 tests when the compiler does not natively
+support float128, this patch also removes adding -mvsx, -mfloat128, and
+-mfloat128-hardware enable the support if the compiler did not natively enable
+it.
+
+I built little endian compilers and there were no regressions.
+
+I built big endian compilers with the --with-cpu=power5 configure option, and I
+verified that none of the float128 support functions are built.
+
+I also built big endian compilers on a power9 with the --with-cpu=native
+configure option, and I verified that the float128 support functions were
+built, since the default compiler used the VSX instruction set.
+
+I verified that on both sets of big endian builds, that all of the float128
+tests were skipped, since there is no support for float128 in glibc and the GCC
+compiler does not enable float128 on those systems.
+
+Can I check these patches into the trunk assuming the original bugzilla author
+says they fix the problem?
+
+2024-08-19 Michael Meissner  
+
+gcc/testsuite/
+
+   PR target/115800
+   PR target/113652
+   * gcc.target/powerpc/abs128-1.c: Remove adding -mvsx, -mfloat128, and
+   -mfloat128-hardware options to float128 test.  Add explicit checks for
+   the float128 support, rather than just using VSX as a stand in, or
+   assuming we can silently enable VSX if the default is power4.  For
+   pr99708.c, also use the correct spelling to disable the float128 tests.
+   * gcc.target/powerpc/bfp/scalar-insert-exp-16.c: Likewise.
+   * gcc.target/powerpc/copysign128-1.c: Likewise.
+   * gcc.target/powerpc/divkc3-1.c: Likewise.
+   * gcc.target/powerpc/float128-3.c: Likewise.
+   * gcc.target/powerpc/float128-5.c: Likewise.
+   * gcc.target/powerpc/float128-complex-2.: Likewise.
+   * gcc.target/powerpc/float128-math.: Likewise.
+   * gcc.target/powerpc/inf128-1.: Likewise.
+   * gcc.target/powerpc/mulkc3-1.c: Likewise.
+   * gcc.target/powerpc/nan128-1.c: Likewise.
+   * gcc.target/powerpc/p9-lxvx-stxvx-3.: Likewise.
+   * gcc.target/powerpc/pr104253.: Likewise.
+   * gcc.target/powerpc/pr70669.c: Likewise.
+   * gcc.target/powerpc/pr79004.c: Likewise.
+   * gcc.target/powerpc/pr79038-1.c: Likewise.
+   * gcc.target/powerpc/pr81959.c: Likewise.
+   * gcc.target/powerpc/pr85657-1.: Likewise.
+   * gcc.target/powerpc/pr85657-2.c: Likewise.
+   * gcc.target/powerpc/pr99708.: Likewise.
+   * gcc.target/powerpc/signbit-1.c: Likewise.
+   * gcc.target/powerpc/signbit-2.c: Likewise.
+   * lib/target-supports.exp (check_ppc_float128_sw_available): Likewise.
+   (check_ppc_float128_hw_available): Likewise.
+   (check_effective_target_ppc_ieee128_ok): Likewise.
+   (add_options_for___float128): Likewise.
+   (check_effective_target___float128): Likewise.
+   (check_effective_target_base_quadfloat_support): Likewise.
+   (check_effective_target_powerpc_float128_sw_ok): Likewise.
+   (check_effective_target_powerpc_float128_hw_ok): Likewise.
+
+ Branch work176-libs, patch #502 
+
+Do not add -mvsx when building libgcc float128 support.
+
+Currently, we add -mvsx when building the float128 support in libgcc.  This
+allows us to build the float128 support on a big endian system where the
+default cpu is power4.  While the libgcc support can be built, given there is
+no glibc 

[gcc r15-3028] m68k: Add -mlra

2024-08-19 Thread Andreas Schwab via Gcc-cvs
https://gcc.gnu.org/g:c4971bae71cf4d6bb0b458dd9d457ec57d14a4f4

commit r15-3028-gc4971bae71cf4d6bb0b458dd9d457ec57d14a4f4
Author: Andreas Schwab 
Date:   Mon Aug 19 20:59:13 2024 +0200

m68k: Add -mlra

PR target/113939
* config/m68k/m68k.opt (mlra): New target option.
* config/m68k/m68k.cc (m68k_use_lra_p): New function.
(TARGET_LRA_P): Use it.
* config/m68k/m68k.opt.urls: Regenerate.

Diff:
---
 gcc/config/m68k/m68k.cc   | 11 ++-
 gcc/config/m68k/m68k.opt  |  5 +
 gcc/config/m68k/m68k.opt.urls |  2 ++
 3 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/gcc/config/m68k/m68k.cc b/gcc/config/m68k/m68k.cc
index 79ba4d5343c..21c94981d22 100644
--- a/gcc/config/m68k/m68k.cc
+++ b/gcc/config/m68k/m68k.cc
@@ -199,6 +199,7 @@ static machine_mode m68k_promote_function_mode (const_tree, 
machine_mode,
 static void m68k_asm_final_postscan_insn (FILE *, rtx_insn *insn, rtx [], int);
 static HARD_REG_SET m68k_zero_call_used_regs (HARD_REG_SET);
 static machine_mode m68k_c_mode_for_floating_type (enum tree_index);
+static bool m68k_use_lra_p (void);
 
 /* Initialize the GCC target structure.  */
 
@@ -307,7 +308,7 @@ static machine_mode m68k_c_mode_for_floating_type (enum 
tree_index);
 #endif
 
 #undef TARGET_LRA_P
-#define TARGET_LRA_P hook_bool_void_false
+#define TARGET_LRA_P m68k_use_lra_p
 
 #undef TARGET_LEGITIMATE_ADDRESS_P
 #define TARGET_LEGITIMATE_ADDRESS_Pm68k_legitimate_address_p
@@ -7228,4 +7229,12 @@ m68k_c_mode_for_floating_type (enum tree_index ti)
   return default_mode_for_floating_type (ti);
 }
 
+/* Implement TARGET_LRA_P.  */
+
+static bool
+m68k_use_lra_p ()
+{
+  return m68k_lra_p;
+}
+
 #include "gt-m68k.h"
diff --git a/gcc/config/m68k/m68k.opt b/gcc/config/m68k/m68k.opt
index b2ab41a758f..1b393f54e3c 100644
--- a/gcc/config/m68k/m68k.opt
+++ b/gcc/config/m68k/m68k.opt
@@ -146,6 +146,11 @@ mlong-jump-table-offsets
 Target RejectNegative Mask(LONG_JUMP_TABLE_OFFSETS)
 Use 32-bit offsets in jump tables rather than 16-bit offsets.
 
+mlra
+Target Var(m68k_lra_p) Undocumented
+Usa LRA for reload instead of the old reload framework.  This option is
+experimental, and it may be removed in future versions of the compiler.
+
 mnobitfield
 Target RejectNegative InverseMask(BITFIELD)
 Do not use the bit-field instructions.
diff --git a/gcc/config/m68k/m68k.opt.urls b/gcc/config/m68k/m68k.opt.urls
index 079fbf4a88e..1f1ac88bcb2 100644
--- a/gcc/config/m68k/m68k.opt.urls
+++ b/gcc/config/m68k/m68k.opt.urls
@@ -77,6 +77,8 @@ UrlSuffix(gcc/M680x0-Options.html#index-mhard-float-2)
 mlong-jump-table-offsets
 UrlSuffix(gcc/M680x0-Options.html#index-mlong-jump-table-offsets)
 
+; skipping UrlSuffix for 'mlra' due to finding no URLs
+
 mnobitfield
 UrlSuffix(gcc/M680x0-Options.html#index-mnobitfield)


[gcc r13-8982] Compare loop bounds in ipa-icf

2024-08-19 Thread Martin Jambor via Gcc-cvs
https://gcc.gnu.org/g:e469654e5e7bdd823c5aa996075e903c6b4d47e2

commit r13-8982-ge469654e5e7bdd823c5aa996075e903c6b4d47e2
Author: Jan Hubicka 
Date:   Mon Aug 19 17:10:25 2024 +0200

Compare loop bounds in ipa-icf

Hi,
this testcase shows another poblem with missing comparators for metadata
in ICF. With value ranges available to loop optimizations during early
opts we can estimate number of iterations based on guarding condition that
can be split away by the fnsplit pass. This patch disables ICF when
number of iteraitons does not match.

Bootstrapped/regtesed x86_64-linux, will commit it shortly

gcc/ChangeLog:

PR ipa/115277
* ipa-icf-gimple.cc (func_checker::compare_loops): compare loop
bounds.

gcc/testsuite/ChangeLog:

* gcc.c-torture/compile/pr115277.c: New test.

(cherry picked from commit 0d19fbc7b0760ce665fa6a88cd40cfa0311358d7)

Diff:
---
 gcc/ipa-icf-gimple.cc  |  4 
 gcc/testsuite/gcc.c-torture/compile/pr115277.c | 28 ++
 2 files changed, 32 insertions(+)

diff --git a/gcc/ipa-icf-gimple.cc b/gcc/ipa-icf-gimple.cc
index 054a557bd58..a844e74792a 100644
--- a/gcc/ipa-icf-gimple.cc
+++ b/gcc/ipa-icf-gimple.cc
@@ -542,6 +542,10 @@ func_checker::compare_loops (basic_block bb1, basic_block 
bb2)
 return return_false_with_msg ("unroll");
   if (!compare_variable_decl (l1->simduid, l2->simduid))
 return return_false_with_msg ("simduid");
+  if ((l1->any_upper_bound != l2->any_upper_bound)
+  || (l1->any_upper_bound
+ && (l1->nb_iterations_upper_bound != l2->nb_iterations_upper_bound)))
+return return_false_with_msg ("nb_iterations_upper_bound");
 
   return true;
 }
diff --git a/gcc/testsuite/gcc.c-torture/compile/pr115277.c 
b/gcc/testsuite/gcc.c-torture/compile/pr115277.c
new file mode 100644
index 000..27449eb254f
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/compile/pr115277.c
@@ -0,0 +1,28 @@
+int array[1000];
+void
+test (int a)
+{
+if (__builtin_expect (a > 3, 1))
+return;
+for (int i = 0; i < a; i++)
+array[i]=i;
+}
+void
+test2 (int a)
+{
+if (__builtin_expect (a > 10, 1))
+return;
+for (int i = 0; i < a; i++)
+array[i]=i;
+}
+int
+main()
+{
+test(1);
+test(2);
+test(3);
+test2(10);
+if (array[9] != 9)
+__builtin_abort ();
+return 0;
+}


[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Fix factor in dwarf_poly_indeterminate_value [PR116305]

2024-08-19 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:168bc423824158a6933c06b1ac8a03dbf1736ac6

commit 168bc423824158a6933c06b1ac8a03dbf1736ac6
Author: 曾治金 
Date:   Wed Aug 14 14:06:23 2024 +0800

RISC-V: Fix factor in dwarf_poly_indeterminate_value [PR116305]

This patch is to fix the bug (BugId:116305) introduced by the commit
bd93ef for risc-v target.

The commit bd93ef changes the chunk_num from 1 to TARGET_MIN_VLEN/128
if TARGET_MIN_VLEN is larger than 128 in riscv_convert_vector_bits. So
it changes the value of BYTES_PER_RISCV_VECTOR. For example, before
merging the commit bd93ef and if TARGET_MIN_VLEN is 256, the value
of BYTES_PER_RISCV_VECTOR should be [8, 8], but now [16, 16]. The value
of riscv_bytes_per_vector_chunk and BYTES_PER_RISCV_VECTOR are no longer
equal.

Prologue will use BYTES_PER_RISCV_VECTOR.coeffs[1] to estimate the vlenb
register value in riscv_legitimize_poly_move, and dwarf2cfi will also
get the estimated vlenb register value in 
riscv_dwarf_poly_indeterminate_value
to calculate the number of times to multiply the vlenb register value.

So need to change the factor from riscv_bytes_per_vector_chunk to
BYTES_PER_RISCV_VECTOR, otherwise we will get the incorrect dwarf
information. The incorrect example as follow:

```
csrr    t0,vlenb
slli    t1,t0,1
sub     sp,sp,t1

.cfi_escape 0xf,0xb,0x72,0,0x92,0xa2,0x38,0,0x34,0x1e,0x23,0x50,0x22
```

The sequence '0x92,0xa2,0x38,0' means the vlenb register, '0x34' means
the literal 4, '0x1e' means the multiply operation. But in fact, the
vlenb register value just need to multiply the literal 2.

PR target/116305

gcc/ChangeLog:

* config/riscv/riscv.cc (riscv_dwarf_poly_indeterminate_value): Take
BYTES_PER_RISCV_VECTOR for *factor instead of 
riscv_bytes_per_vector_chunk.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/scalable_vector_cfi.c: New test.

Signed-off-by: Zhijin Zeng 
(cherry picked from commit a11dcaff9fc94971188d54310d3053e9f68a0d3d)

Diff:
---
 gcc/config/riscv/riscv.cc  |  4 +--
 .../riscv/rvv/base/scalable_vector_cfi.c   | 32 ++
 2 files changed, 34 insertions(+), 2 deletions(-)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index abe1a56f20e..938ec02b750 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -11010,12 +11010,12 @@ static unsigned int
 riscv_dwarf_poly_indeterminate_value (unsigned int i, unsigned int *factor,
  int *offset)
 {
-  /* Polynomial invariant 1 == (VLENB / riscv_bytes_per_vector_chunk) - 1.
+  /* Polynomial invariant 1 == (VLENB / BYTES_PER_RISCV_VECTOR) - 1.
  1. TARGET_MIN_VLEN == 32, polynomial invariant 1 == (VLENB / 4) - 1.
  2. TARGET_MIN_VLEN > 32, polynomial invariant 1 == (VLENB / 8) - 1.
   */
   gcc_assert (i == 1);
-  *factor = riscv_bytes_per_vector_chunk;
+  *factor = BYTES_PER_RISCV_VECTOR.coeffs[1];
   *offset = 1;
   return RISCV_DWARF_VLENB;
 }
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/scalable_vector_cfi.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/scalable_vector_cfi.c
new file mode 100644
index 000..184da10caf3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/scalable_vector_cfi.c
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-g -O3 -march=rv64gcv -mabi=lp64d" } */
+/* { dg-skip-if "" { *-*-* } {"-O2" "-O1" "-O0" "-Og" "-Oz" "-flto"} } */
+/* { dg-final { scan-assembler {cfi_escape .*0x92,0xa2,0x38,0,0x32,0x1e} } } */
+
+#include "riscv_vector.h"
+
+#define PI_2 1.570796326795
+
+extern void func(float *result);
+
+void test(const float *ys, const float *xs, float *result, size_t length) {
+size_t gvl = __riscv_vsetvlmax_e32m2();
+vfloat32m2_t vpi2 = __riscv_vfmv_v_f_f32m2(PI_2, gvl);
+
+for(size_t i = 0; i < length;) {
+gvl = __riscv_vsetvl_e32m2(length - i);
+vfloat32m2_t y = __riscv_vle32_v_f32m2(ys, gvl);
+vfloat32m2_t x = __riscv_vle32_v_f32m2(xs, gvl);
+vbool16_t mask0  = __riscv_vmflt_vv_f32m2_b16(x, y, gvl);
+vfloat32m2_t fixpi = __riscv_vfrsub_vf_f32m2_mu(mask0, vpi2, vpi2, 0, 
gvl);
+
+__riscv_vse32_v_f32m2(result, fixpi, gvl);
+
+func(result);
+
+i += gvl;
+ys += gvl;
+xs += gvl;
+result += gvl;
+}
+}


[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Add auto-vect pattern for vector rotate shift

2024-08-19 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:ed5cb5a783e2d7aab117bf575e45089a86d4e3c4

commit ed5cb5a783e2d7aab117bf575e45089a86d4e3c4
Author: Feng Wang 
Date:   Sat Aug 17 08:40:42 2024 -0600

RISC-V: Add auto-vect pattern for vector rotate shift

This patch add the vector rotate shift pattern for auto-vect.
With this patch, the scalar rotate shift can be automatically
vectorized into vector rotate shift.

gcc/ChangeLog:

* config/riscv/autovec.md (v3):
Add new define_expand pattern for vector rotate shift.
gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/binop/vrolr-1.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vrolr-run.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vrolr-template.h: New test.

(cherry picked from commit 54b228d80c54d32ab49cee6148cfd1364b2bc817)

Diff:
---
 gcc/config/riscv/autovec.md| 16 
 .../gcc.target/riscv/rvv/autovec/binop/vrolr-1.c   |  9 +++
 .../gcc.target/riscv/rvv/autovec/binop/vrolr-run.c | 88 ++
 .../riscv/rvv/autovec/binop/vrolr-template.h   | 29 +++
 4 files changed, 142 insertions(+)

diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index 0423d7bee13..decfe2bf8cc 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -2764,3 +2764,19 @@
 operands[2] = const0_rtx;
   }
 )
+
+;; -
+;; - vrol.vv vror.vv
+;; -
+(define_expand "v3"
+  [(set (match_operand:VI 0 "register_operand")
+   (bitmanip_rotate:VI
+ (match_operand:VI 1 "register_operand")
+ (match_operand:VI 2 "register_operand")))]
+  "TARGET_ZVBB || TARGET_ZVKB"
+  {
+riscv_vector::emit_vlmax_insn (code_for_pred_v (, mode),
+  riscv_vector::BINARY_OP, operands);
+DONE;
+  }
+)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vrolr-1.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vrolr-1.c
new file mode 100644
index 000..55dac27697c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vrolr-1.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-add-options "riscv_v" } */
+/* { dg-add-options "riscv_zvbb" } */
+/* { dg-additional-options "-std=c99 -fno-vect-cost-model" } */
+
+#include "vrolr-template.h"
+
+/* { dg-final { scan-assembler-times {\tvrol\.vv} 4 } } */
+/* { dg-final { scan-assembler-times {\tvror\.vv} 4 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vrolr-run.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vrolr-run.c
new file mode 100644
index 000..b659a0804f5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vrolr-run.c
@@ -0,0 +1,88 @@
+/* { dg-do run } */
+/* { dg-require-effective-target "riscv_zvbb_ok" } */
+/* { dg-add-options "riscv_v" } */
+/* { dg-add-options "riscv_zvbb" } */
+/* { dg-additional-options "-std=c99 -fno-vect-cost-model" } */
+
+#include 
+#include 
+
+#include 
+#include 
+#include 
+
+#define ARRAY_SIZE 512
+
+#define CIRCULAR_LEFT_SHIFT_ARRAY(arr, shifts, bit_size, size) \
+for (int i = 0; i < size; i++) { \
+(arr)[i] = (((arr)[i] << (shifts)[i]) | ((arr)[i] >> (bit_size - 
(shifts)[i]))); \
+}
+
+#define CIRCULAR_RIGHT_SHIFT_ARRAY(arr, shifts, bit_size, size) \
+for (int i = 0; i < size; i++) { \
+(arr)[i] = (((arr)[i] >> (shifts)[i]) | ((arr)[i] << (bit_size - 
(shifts)[i]))); \
+}
+
+void __attribute__((optimize("no-tree-vectorize"))) compare_results8(
+uint8_t *result_left, uint8_t *result_right,
+int bit_size, uint8_t *shift_values)
+{
+for (int i = 0; i < ARRAY_SIZE; i++) {
+assert(result_left[i] == (i << shift_values[i]) | (i >> (bit_size - 
shift_values[i])));
+assert(result_right[i] == (i >> shift_values[i]) | (i << (bit_size - 
shift_values[i])));
+}
+}
+
+void __attribute__((optimize("no-tree-vectorize"))) compare_results16(
+uint16_t *result_left, uint16_t *result_right,
+int bit_size, uint16_t *shift_values)
+{
+for (int i = 0; i < ARRAY_SIZE; i++) {
+assert(result_left[i] == (i << shift_values[i]) | (i >> (bit_size - 
shift_values[i])));
+assert(result_right[i] == (i >> shift_values[i]) | (i << (bit_size - 
shift_values[i])));
+}
+}
+
+void __attribute__((optimize("no-tree-vectorize"))) compare_results32(
+uint32_t *result_left, uint32_t *result_right,
+int bit_size, uint32_t *shift_values)
+{
+for (int i = 0; i < ARRAY_SIZE; i++) {
+assert(result_left[i] == (i << shift_values[i]) | (i >> (bit_size - 
shift_values[i])));
+assert(result_right[i] == (i >> shift_values[i]) | (i << (bit_size - 
shift_values[i])));
+}
+}
+
+void __attribute__((optimize("no-tree-vectorize"))) compare_results64(
+uint64_t *result_left, uint64_t *result_right,

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Bugfix incorrect operand for vwsll auto-vect

2024-08-19 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:457adc650d8fdaf6bc153930bf60038189d7

commit 457adc650d8fdaf6bc153930bf60038189d7
Author: Pan Li 
Date:   Sat Aug 17 09:25:58 2024 -0600

RISC-V: Bugfix incorrect operand for vwsll auto-vect

This patch would like to fix one ICE when rv64gcv_zvbb for vwsll.
Consider below example.

void vwsll_vv_test (short *restrict dst, char *restrict a,
int *restrict b, int n)
{
  for (int i = 0; i < n; i++)
dst[i] = a[i] << b[i];
}

It will hit the vwsll pattern with following operands.
operand 0 -> (reg:RVVMF2HI 146 [ vect__7.13 ])
operand 1 -> (reg:RVVMF4QI 165 [ vect_cst__33 ])
operand 2 -> (reg:RVVM1SI 171 [ vect_cst__36 ])

According to the ISA, operand 2 should be the same as operand 1.
Aka operand 2 should have RVVMF4QI mode as above.  Thus,  add
quad truncation for operand 2 before emit vwsll.

The below test suites are passed for this patch.
* The rv64gcv fully regression test.

PR target/116280

gcc/ChangeLog:

* config/riscv/autovec-opt.md: Add quad truncation to
align the mode requirement for vwsll.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/pr116280-1.c: New test.
* gcc.target/riscv/rvv/base/pr116280-2.c: New test.

(cherry picked from commit 06ae7bc1345a31a5f23dc86b348a1bef59bb3cc1)

Diff:
---
 gcc/config/riscv/autovec-opt.md  |  4 
 gcc/testsuite/gcc.target/riscv/rvv/base/pr116280-1.c | 14 ++
 gcc/testsuite/gcc.target/riscv/rvv/base/pr116280-2.c | 10 ++
 3 files changed, 28 insertions(+)

diff --git a/gcc/config/riscv/autovec-opt.md b/gcc/config/riscv/autovec-opt.md
index d7a3cfd4602..4b33a145c17 100644
--- a/gcc/config/riscv/autovec-opt.md
+++ b/gcc/config/riscv/autovec-opt.md
@@ -1546,6 +1546,10 @@
   "&& 1"
   [(const_int 0)]
   {
+rtx truncated = gen_reg_rtx (mode);
+emit_insn (gen_trunc2 (truncated, operands[2]));
+operands[2] = truncated;
+
 insn_code icode = code_for_pred_vwsll (mode);
 riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, operands);
 DONE;
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr116280-1.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/pr116280-1.c
new file mode 100644
index 000..8b8547e2c34
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr116280-1.c
@@ -0,0 +1,14 @@
+/* Test there is no ICE when compile.  */
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvbb -mabi=lp64d -O3" } */
+
+short a;
+char b;
+
+void
+test (int e[][1][1], char f[][1][1][1][1]) {
+  for (int g; b;)
+for (;;)
+  for (int h; h < 4073709551572ULL; h += 18446744073709551612U)
+a = f[2][2][1][4073709551612][1] << e[1][1][g];
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr116280-2.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/pr116280-2.c
new file mode 100644
index 000..02f2de66eff
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr116280-2.c
@@ -0,0 +1,10 @@
+/* Test there is no ICE when compile.  */
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvbb -mabi=lp64d -O3" } */
+
+void
+test (short *restrict dst, char *restrict a, int *restrict b, int n)
+{
+  for (int i = 0; i < n; i++)
+dst[i] = a[i] << b[i];
+}


[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Bugfix for RVV rounding intrinsic ICE in function checker

2024-08-19 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:5b2f688a7fad7b06e4f4fa070373e2781f8b4494

commit 5b2f688a7fad7b06e4f4fa070373e2781f8b4494
Author: Jin Ma 
Date:   Sat Aug 17 09:29:11 2024 -0600

RISC-V: Bugfix for RVV rounding intrinsic ICE in function checker

When compiling an interface for rounding of type 'vfloat16*' without using 
zvfh
or zvfhmin, it is not enough to use FLOAT_MODE_P because the type does not
support it. Although the subsequent riscv_validate_vector_type checks will
still fail and throw exceptions, I don't think we should have ICE here.

internal compiler error: in check, at 
config/riscv/riscv-vector-builtins-shapes.cc:444
   10 |   return __riscv_vfadd_vv_f16m1_rm (vs2, vs1, 0, vl);
  |   ^~
0x4191794 internal_error(char const*, ...)
/iothome/jin.ma/code/master/gcc/gcc/diagnostic-global-context.cc:491
0x416ebf5 fancy_abort(char const*, int, char const*)
/iothome/jin.ma/code/master/gcc/gcc/diagnostic.cc:1772
0x220aae6 
riscv_vector::build_frm_base::check(riscv_vector::function_checker&) const

/iothome/jin.ma/code/master/gcc/gcc/config/riscv/riscv-vector-builtins-shapes.cc:444
0x2205323 riscv_vector::function_checker::check()

/iothome/jin.ma/code/master/gcc/gcc/config/riscv/riscv-vector-builtins.cc:4414

gcc/ChangeLog:

* config/riscv/riscv-protos.h (riscv_vector_float_type_p): New.
* config/riscv/riscv-vector-builtins.cc 
(function_instance::any_type_float_p):
Use riscv_vector_float_type_p instead of FLOAT_MODE_P for judgment.
* config/riscv/riscv.cc (riscv_vector_int_type_p): Change static to 
extern.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/bug-9.c: New test.

(cherry picked from commit 3f51684ac05f065a87c53d9506400cbe97af6b79)

Diff:
---
 gcc/config/riscv/riscv-protos.h |  1 +
 gcc/config/riscv/riscv-vector-builtins.cc   |  4 ++--
 gcc/config/riscv/riscv.cc   |  5 -
 gcc/testsuite/gcc.target/riscv/rvv/base/bug-9.c | 13 +
 4 files changed, 20 insertions(+), 3 deletions(-)

diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 124ae2c073a..f8fc2874cbb 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -171,6 +171,7 @@ extern enum memmodel riscv_union_memmodels (enum memmodel, 
enum memmodel);
 extern bool riscv_reg_frame_related (rtx);
 extern void riscv_split_sum_of_two_s12 (HOST_WIDE_INT, HOST_WIDE_INT *,
HOST_WIDE_INT *);
+extern bool riscv_vector_float_type_p (const_tree type);
 
 /* Routines implemented in riscv-c.cc.  */
 void riscv_cpu_cpp_builtins (cpp_reader *);
diff --git a/gcc/config/riscv/riscv-vector-builtins.cc 
b/gcc/config/riscv/riscv-vector-builtins.cc
index 9f707efa533..41730c483ee 100644
--- a/gcc/config/riscv/riscv-vector-builtins.cc
+++ b/gcc/config/riscv/riscv-vector-builtins.cc
@@ -3497,11 +3497,11 @@ function_instance::operator== (const function_instance 
&other) const
 bool
 function_instance::any_type_float_p () const
 {
-  if (FLOAT_MODE_P (TYPE_MODE (get_return_type (
+  if (riscv_vector_float_type_p (get_return_type ()))
 return true;
 
   for (int i = 0; op_info->args[i].base_type != NUM_BASE_TYPES; ++i)
-if (FLOAT_MODE_P (TYPE_MODE (get_arg_type (i
+if (riscv_vector_float_type_p (get_arg_type (i)))
   return true;
 
   return false;
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 938ec02b750..420037885be 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -5898,9 +5898,12 @@ riscv_vector_int_type_p (const_tree type)
   return strstr (name, "int") != NULL || strstr (name, "uint") != NULL;
 }
 
-static bool
+bool
 riscv_vector_float_type_p (const_tree type)
 {
+  if (!riscv_vector_type_p (type))
+return false;
+
   machine_mode mode = TYPE_MODE (type);
 
   if (VECTOR_MODE_P (mode))
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/bug-9.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/bug-9.c
new file mode 100644
index 000..20ae9ebf6f2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/bug-9.c
@@ -0,0 +1,13 @@
+/* Test that we do not have ice when compile */
+/* { dg-do assemble } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O2"  { target { rv64 } } } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32d -O2"  { target { rv32 } } } */
+
+#include 
+
+vfloat16m1_t f0 (vfloat16m1_t vs2, vfloat16m1_t vs1, size_t vl)
+{
+  return __riscv_vfadd_vv_f16m1_rm (vs2, vs1, 0, vl); 
+}
+
+/* { dg-error "return type 'vfloat16m1_t' requires the zvfhmin or zvfh ISA 
extension" "" { target { "riscv*-*-*" } } 0 } */


  1   2   >