[gcc r15-3597] Better recover from SLP reassociation fails during discovery

2024-09-12 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:f9e9ba9563c81ca2db99bf3daa6511c1471f1b78

commit r15-3597-gf9e9ba9563c81ca2db99bf3daa6511c1471f1b78
Author: Richard Biener 
Date:   Wed Sep 11 14:50:02 2024 +0200

Better recover from SLP reassociation fails during discovery

When we decide to not process a association chain of size two and
that would also mismatch with a different chain size on another lane
we shouldn't fail discovery hard at this point.  Instead let the
regular discovery figure out matching lanes so the parent can
decide to perform operand swapping or we can split groups at better
points rather than forcefully splitting away the first single lane.

For example on gcc.dg/vect/vect-strided-u8-i8.c we now see two
groups of size 4 feeding the store instead of groups of size 1,
three, two, one and one.

* tree-vect-slp.cc (vect_build_slp_tree_2): On reassociation
chain length mismatch do not fail discovery of the node
but try without re-associating to compute a better matches[].
Provide a reassociation failure hint in the dump.
(vect_slp_analyze_node_operations): Avoid stray failure
dumping.
(vectorizable_slp_permutation_1): Dump the address of the
SLP node representing the permutation.

Diff:
---
 gcc/tree-vect-slp.cc | 29 ++---
 1 file changed, 14 insertions(+), 15 deletions(-)

diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 31c7e20f8c9a..975949ccbd14 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -2143,19 +2143,11 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
  if (chain.length () == 2)
{
  /* In a chain of just two elements resort to the regular
-operand swapping scheme.  If we run into a length
-mismatch still hard-FAIL.  */
- if (chain_len == 0)
-   hard_fail = false;
- else
-   {
- matches[lane] = false;
- /* ???  We might want to process the other lanes, but
-make sure to not give false matching hints to the
-caller for lanes we did not process.  */
- if (lane != group_size - 1)
-   matches[0] = false;
-   }
+operand swapping scheme.  Likewise if we run into a
+length mismatch process regularly as well as we did not
+process the other lanes we cannot report a good hint what
+lanes to try swapping in the parent.  */
+ hard_fail = false;
  break;
}
  else if (chain_len == 0)
@@ -2428,6 +2420,11 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
  return node;
}
 out:
+  if (dump_enabled_p ())
+   dump_printf_loc (MSG_NOTE, vect_location,
+"failed to line up SLP graph by re-associating "
+"operations in lanes%s\n",
+!hard_fail ? " trying regular discovery" : "");
   while (!children.is_empty ())
vect_free_slp_tree (children.pop ());
   while (!chains.is_empty ())
@@ -7554,7 +7551,9 @@ vect_slp_analyze_node_operations (vec_info *vinfo, 
slp_tree node,
   /* We're having difficulties scheduling nodes with just constant
  operands and no scalar stmts since we then cannot compute a stmt
  insertion place.  */
-  if (!seen_non_constant_child && SLP_TREE_SCALAR_STMTS (node).is_empty ())
+  if (res
+  && !seen_non_constant_child
+  && SLP_TREE_SCALAR_STMTS (node).is_empty ())
 {
   if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
@@ -10280,7 +10279,7 @@ vectorizable_slp_permutation_1 (vec_info *vinfo, 
gimple_stmt_iterator *gsi,
   if (dump_p)
 {
   dump_printf_loc (MSG_NOTE, vect_location,
-  "vectorizing permutation");
+  "vectorizing permutation %p", (void *)node);
   for (unsigned i = 0; i < perm.length (); ++i)
dump_printf (MSG_NOTE, " op%u[%u]", perm[i].first, perm[i].second);
   if (repeating_p)


[gcc r15-3598] c++: Don't ICE to build private access error message [PR116323]

2024-09-12 Thread Simon Martin via Gcc-cvs
https://gcc.gnu.org/g:19831baf4904d09a74c7cf684a27b091947a610b

commit r15-3598-g19831baf4904d09a74c7cf684a27b091947a610b
Author: Simon Martin 
Date:   Tue Sep 10 22:33:18 2024 +0200

c++: Don't ICE to build private access error message [PR116323]

We currently ICE upon the following code while building the "[...] is
private within this context" error message

=== cut here ===
class A { enum Enum{}; };
template class Alloc>
class B : private Alloc, private A {};
template class Alloc>
int B::foo (Enum m) { return 42; }
=== cut here ===

The problem is that since r11-6880, after detecting that Enum cannot be
accessed in B, enforce_access will access the TYPE_BINFO of all the
bases of B, which ICEs for any that is a BOUND_TEMPLATE_TEMPLATE_PARM.
This patch simply skips such bases.

PR c++/116323

gcc/cp/ChangeLog:

* search.cc (get_parent_with_private_access): Only call 
access_in_type
for RECORD_OR_UNION_TYPE_P base BINFOs.

gcc/testsuite/ChangeLog:

* g++.dg/template/access43.C: New test.

Diff:
---
 gcc/cp/search.cc |  8 ++--
 gcc/testsuite/g++.dg/template/access43.C | 11 +++
 2 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/gcc/cp/search.cc b/gcc/cp/search.cc
index 60c30ecb8818..6a21a25272b2 100644
--- a/gcc/cp/search.cc
+++ b/gcc/cp/search.cc
@@ -160,12 +160,16 @@ get_parent_with_private_access (tree decl, tree binfo)
 
   tree base_binfo = NULL_TREE;
 
-  /* Iterate through immediate parent classes.  */
+  /* Iterate through immediate parent classes.
+ Note that the base list might contain WILDCARD_TYPE_P types, that
+ should be ignored here.  */
   for (int i = 0; BINFO_BASE_ITERATE (binfo, i, base_binfo); i++)
 {
+  tree base_binfo_type = BINFO_TYPE (base_binfo);
   /* This parent had private access.  Therefore that's why BINFO can't
  access DECL.  */
-  if (access_in_type (BINFO_TYPE (base_binfo), decl) == ak_private)
+  if (RECORD_OR_UNION_TYPE_P (base_binfo_type)
+ && access_in_type (base_binfo_type, decl) == ak_private)
return base_binfo;
 }
 
diff --git a/gcc/testsuite/g++.dg/template/access43.C 
b/gcc/testsuite/g++.dg/template/access43.C
new file mode 100644
index ..ce9e6c8fbb25
--- /dev/null
+++ b/gcc/testsuite/g++.dg/template/access43.C
@@ -0,0 +1,11 @@
+// PR c++/116323
+// { dg-do "compile" }
+// { dg-additional-options "-Wno-template-body" }
+
+class A { enum Enum{}; };
+
+template class Alloc>
+class B : private Alloc, private A {};
+
+template class Alloc>
+int B::foo (Enum m) { return 42; } // { dg-error "is private" }


[gcc r15-3599] libcpp, c-family: Add (dumb) C23 N3017 #embed support [PR105863]

2024-09-12 Thread Jakub Jelinek via Gcc-cvs
https://gcc.gnu.org/g:eba6d2aa71a9b59386e5a2453cbe924371626b0b

commit r15-3599-geba6d2aa71a9b59386e5a2453cbe924371626b0b
Author: Jakub Jelinek 
Date:   Thu Sep 12 11:15:38 2024 +0200

libcpp, c-family: Add (dumb) C23 N3017 #embed support [PR105863]

The following patch implements the C23 N3017 "#embed - a scannable,
tooling-friendly binary resource inclusion mechanism" paper.

The implementation is intentionally dumb, in that it doesn't significantly
speed up compilation of larger initializers and doesn't make it possible
to use huge #embeds (like several gigabytes large, that is compile time
and memory still infeasible).
There are 2 reasons for this.  One is that I think like it is implemented
now in the patch is how we should use it for the smaller #embed sizes,
dunno with which boundary, whether 32 bytes or 64 or something like that,
certainly handling the single byte cases which is something that can appear
anywhere in the source where constant integer literal can appear is
desirable and I think for a few bytes it isn't worth it to come up with
something smarter and users would like to e.g. see it in -E readably as
well (perhaps the slow vs. fast boundary should be determined by command
line option).  And the other one is to be able to more easily find
regressions in behavior caused by the optimizations, so we have something
to get back in git to compare against.
I'm definitely willing to work on the optimizations (likely introduce a new
CPP_* token type to refer to a range of libcpp owned memory (start + size)
and similarly some tree which can do the same, and can be at any time e.g.
split into 2 subparts + say INTEGER_CST in between if needed say for
const unsigned char d[] = {
 #embed "2GB.dat" prefix (0, 0, ) suffix (, [0x4000] = 42)
}; still without having to copy around huge amounts of data; STRING_CST
owns the memory it points to and can be only 2GB in size), but would
like to do that incrementally.
And would like to first include some extensions also not included in
this patch, like gnu::offset (off) parameter to allow to skip certain
constant amount of bytes at the start of the files, plus
gnu::base64 ("base64_encoded_data") parameter to add something which can
store more efficiently large amounts of the #embed data in preprocessed
source.

I've been cross-checking all the tests also against the LLVM implementation
https://github.com/llvm/llvm-project/pull/68620
which has been for a few hours even committed to LLVM trunk but reverted
afterwards.  LLVM now has the support committed and I admit I haven't
rechecked whether the behavior on the below mentioned spots have been fixed
in it already or not yet.

The patch uses --embed-dir= option that clang plans to add above and doesn't
use other variants on the search directories yet, plus there are no
default directories at least for the time being where to search for embed
files.  So, #embed "..." works if it is found in the same directory (or
relative to the current file's directory) and #embed "/..." or #embed 
work always, but relative #embed <...> doesn't unless at least one
--embed-dir= is specified.  There is no reason to differentiate between
system and non-system directories, so we don't need -isystem like
counterpart, perhaps -iquote like counterpart could be useful in the future,
dunno what else.  It has --embed-directory=dir and --embed-directory dir
as aliases.

There are some differences beyond clang ICEs, so I'd like to point them out
to make sure there is agreement on the choices in the patch.  They are also
mentioned in the comments of the llvm pull request.

The most important is that the GCC patch (as well as the original thephd.dev
LLVM branch on godbolt) expands #embed (or acts as if it is expanded) into
a mere sequence of numbers like 123,2,35,26 rather then what clang
effectively treats as (unsigned char)123,(unsigned char)2,(unsigned
char)35,(unsigned char)26 but only does that when using integrated
preprocessor, not when using -save-temps where it acts as GCC.
JeanHeyd as the original author agrees that is how it is currently worded in
C23.

Another difference (not tested in the testsuite, not sure how to check for
effective target /dev/urandom nor am sure it is desirable to check that
during testsuite) is how to treat character devices, named pipes etc.
(block devices are errored on).  The original paper uses /dev/urandom
in various examples and seems to assume that unlike regular files the
devices aren't really cached, so
 #embed  limit(1) prefix(int a = ) suffix(;)
 #embed  limit(1) prefix(int b = ) suffix(;)
usually results in a != b.  That is what the godbolt thephd.dev branch
implements too and what this pat

[gcc r15-3600] libcpp: Add support for gnu::offset #embed/__has_embed parameter

2024-09-12 Thread Jakub Jelinek via Gcc-cvs
https://gcc.gnu.org/g:44058b847145166715f15e49fa8854f30e852f24

commit r15-3600-g44058b847145166715f15e49fa8854f30e852f24
Author: Jakub Jelinek 
Date:   Thu Sep 12 11:34:06 2024 +0200

libcpp: Add support for gnu::offset #embed/__has_embed parameter

The following patch adds on top of the just posted #embed patch
a first extension, gnu::offset which allows to seek in the data
file (for seekable files, otherwise read and throw away).
I think this is useful e.g. when some binary data start with
some well known header which shouldn't be included in the data etc.

2024-09-12  Jakub Jelinek  

libcpp/
* internal.h (struct cpp_embed_params): Add offset member.
* directives.cc (EMBED_PARAMS): Add gnu::offset entry.
(enum embed_param_kind): Add NUM_EMBED_STD_PARAMS.
(_cpp_parse_embed_params): Use NUM_EMBED_STD_PARAMS rather than
NUM_EMBED_PARAMS when parsing standard parameters.  Parse 
gnu::offset
parameter.
* files.cc (struct _cpp_file): Add offset member.
(_cpp_stack_embed): Handle params->offset.
gcc/
* doc/cpp.texi (Binary Resource Inclusion): Document gnu::offset
#embed parameter.
gcc/testsuite/
* c-c++-common/cpp/embed-15.c: New test.
* c-c++-common/cpp/embed-16.c: New test.
* gcc.dg/cpp/embed-5.c: New test.

Diff:
---
 gcc/doc/cpp.texi  |  8 ++-
 gcc/testsuite/c-c++-common/cpp/embed-15.c | 88 
 gcc/testsuite/c-c++-common/cpp/embed-16.c | 31 ++
 gcc/testsuite/gcc.dg/cpp/embed-5.c|  4 ++
 libcpp/directives.cc  | 40 ++---
 libcpp/files.cc   | 95 ++-
 libcpp/internal.h |  2 +-
 7 files changed, 244 insertions(+), 24 deletions(-)

diff --git a/gcc/doc/cpp.texi b/gcc/doc/cpp.texi
index 032b095602d5..612d97e16df8 100644
--- a/gcc/doc/cpp.texi
+++ b/gcc/doc/cpp.texi
@@ -3966,8 +3966,8 @@ treated the same), followed by parameter argument in 
parentheses, like
 with currently supported standard parameters @code{limit}, @code{prefix},
 @code{suffix} and @code{if_empty}, or implementation defined parameters
 specified by a unique vendor prefix followed by @code{::} followed by
-name of the parameter.  GCC will use the @code{gnu} prefix but currently
-doesn't support any extensions.
+name of the parameter.  GCC uses the @code{gnu} prefix for vendor
+parameters and currently supports the @code{gnu::offset} parameter.
 
 The @code{limit} parameter argument is a constant expression which
 specifies the maximum number of bytes included by the directive,
@@ -3977,6 +3977,10 @@ that sequence is not empty and @code{if_empty} argument 
is balanced token
 sequence which is used as expansion for @code{#embed} directive if the
 resource is empty.
 
+The @code{gnu::offset} parameter argument is a constant expression
+which specifies how many bytes to skip from the start of the resource.
+@code{limit} is then counted from that position.
+
 The @code{#embed} directive is not supported in the Traditional Mode
 (@pxref{Traditional Mode}).
 
diff --git a/gcc/testsuite/c-c++-common/cpp/embed-15.c 
b/gcc/testsuite/c-c++-common/cpp/embed-15.c
new file mode 100644
index ..c12aeb31db53
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/cpp/embed-15.c
@@ -0,0 +1,88 @@
+/* { dg-do run } */
+/* { dg-options "--embed-dir=${srcdir}/c-c++-common/cpp/embed-dir" } */
+/* { dg-additional-options "-std=gnu99" { target c } } */
+
+#if __has_embed (__FILE__ gnu::offset (4 + FOOBAR) limit (3)) != 
__STDC_EMBED_FOUND__
+#error "__has_embed fail"
+#endif
+
+#embed  limit(1) gnu::offset (0) prefix(int a = ) suffix (;) 
+#embed  limit(1) __gnu__::offset (1 * 1) prefix(int b = ) 
suffix (;) 
+#embed  limit(1) gnu::__offset__ (1 + 1) prefix(int c = ) 
suffix (;) 
+#embed  __limit__(1) __gnu__::__offset__ (1 + (1 \
+  + 1)) __prefix__(int d = ) __suffix__ (;)
+const unsigned char e[] = {
+  #embed  limit(5) gnu::offset (999)
+};
+const unsigned char f[] = {
+  #embed  limit(7) gnu::offset (998)
+};
+const unsigned char g[] = {
+  #embed  limit(8) gnu::offset (998)
+};
+const unsigned char h[] = {
+  #embed  limit(8) gnu::offset (997)
+};
+const unsigned char i[] = {
+  #embed  limit(9) gnu::offset (997)
+};
+const unsigned char j[] = {
+  #embed  limit(30) gnu::offset (990)
+};
+const unsigned char k[] = {
+  #embed  limit(26) gnu::offset (992)
+};
+const unsigned char l[] = {
+  #embed 
+};
+const unsigned char m[] = {
+  #embed  __limit__ (1000) __gnu__::__offset__ (32)
+};
+#if __has_embed ( limit(5) gnu::offset (999)) != 
__STDC_EMBED_FOUND__ \
+|| __has_embed ( limit(5) gnu::offset (999)) != 
__STDC_EMBED_FOUND__ \
+|| __has_embed ( limit(7) gnu::offset (998)) != 
__STDC_EMBED_FOUND__ \
+|| __has_embed ( limit(8) gnu::offset (998)) != 
__STDC_EMBED_FOUND__ \
+   

[gcc r15-3601] Abort loop SLP analysis quicker

2024-09-12 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:412c156d78c764d4aec3e94469ba5a4c068cee4c

commit r15-3601-g412c156d78c764d4aec3e94469ba5a4c068cee4c
Author: Richard Biener 
Date:   Thu Sep 12 11:31:59 2024 +0200

Abort loop SLP analysis quicker

As we can't cope with removed SLP instances during analysis there's
no point in doing that or even continuing analysis of SLP instances
after a failure.  The following makes us abort early.

* tree-vect-slp.cc (vect_slp_analyze_operations): When
doing loop analysis fail after the first failed SLP
instance.  Only remove instances when doing BB vectorization.
* tree-vect-loop.cc (vect_analyze_loop_2): Check whether
vect_slp_analyze_operations failed instead of checking
the number of SLP instances remaining.

Diff:
---
 gcc/tree-vect-loop.cc | 10 --
 gcc/tree-vect-slp.cc  | 10 +-
 2 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 65d7ed51067e..cc15492f6a01 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -2947,12 +2947,10 @@ start_over:
 
   if (slp)
 {
-  /* Analyze operations in the SLP instances.  Note this may
-remove unsupported SLP instances which makes the above
-SLP kind detection invalid.  */
-  unsigned old_size = LOOP_VINFO_SLP_INSTANCES (loop_vinfo).length ();
-  vect_slp_analyze_operations (loop_vinfo);
-  if (LOOP_VINFO_SLP_INSTANCES (loop_vinfo).length () != old_size)
+  /* Analyze operations in the SLP instances.  We can't simply
+remove unsupported SLP instances as this makes the above
+SLP kind detection invalid and might also affect the VF.  */
+  if (! vect_slp_analyze_operations (loop_vinfo))
{
  ok = opt_result::failure_at (vect_location,
   "unsupported SLP instances\n");
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 975949ccbd14..4fcb9e2fa2bb 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -7976,19 +7976,27 @@ vect_slp_analyze_operations (vec_info *vinfo)
  || (SLP_INSTANCE_KIND (instance) == slp_inst_kind_bb_reduc
  && !vectorizable_bb_reduc_epilogue (instance, &cost_vec)))
 {
+ cost_vec.release ();
  slp_tree node = SLP_INSTANCE_TREE (instance);
  stmt_vec_info stmt_info;
  if (!SLP_INSTANCE_ROOT_STMTS (instance).is_empty ())
stmt_info = SLP_INSTANCE_ROOT_STMTS (instance)[0];
  else
stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
+ if (is_a  (vinfo))
+   {
+ if (dump_enabled_p ())
+   dump_printf_loc (MSG_NOTE, vect_location,
+"unsupported SLP instance starting from: %G",
+stmt_info->stmt);
+ return false;
+   }
  if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
 "removing SLP instance operations starting from: 
%G",
 stmt_info->stmt);
  vect_free_slp_instance (instance);
   vinfo->slp_instances.ordered_remove (i);
- cost_vec.release ();
  while (!visited_vec.is_empty ())
visited.remove (visited_vec.pop ());
}


[gcc r15-3602] s390: Fix strict_low_part generation

2024-09-12 Thread Stefan Schulze Frielinghaus via Gcc-cvs
https://gcc.gnu.org/g:9ebc9fbdddfe1ec85355b068354315a4da8e1ca0

commit r15-3602-g9ebc9fbdddfe1ec85355b068354315a4da8e1ca0
Author: Stefan Schulze Frielinghaus 
Date:   Thu Sep 12 13:29:43 2024 +0200

s390: Fix strict_low_part generation

In s390_expand_insv(), if generating code for ICM et al. src is a MEM
and gen_lowpart might force src into a register such that we end up with
patterns which do not match anymore.  Use adjust_address() instead in
order to preserve a MEM.

Furthermore, it is not straight forward to enforce a subreg.  For
example, in case of a paradoxical subreg, gen_lowpart() may return a
register.  In order to compensate this, s390_gen_lowpart_subreg() emits
a reference to a pseudo which does not coincide with its definition
which is wrong.  Additionally, if dest is a paradoxical subreg, then do
not try to emit a strict_low_part since it could mean that dest was not
initialized even though this might be fixed up later by init-regs.

Splitter for insn *get_tp_64, *zero_extendhisi2_31,
*zero_extendqisi2_31, *zero_extendqihi2_31 are applied after reload.
Thus, operands[0] is a hard register and gen_lowpart (m, operands[0])
just returns the hard register for mode m which is fine to use as an
argument for strict_low_part, i.e., we do not need to enforce subregs
here since after reload subregs are supposed to be eliminated anyway.

This fixes gcc.dg/torture/pr111821.c.

gcc/ChangeLog:

* config/s390/s390-protos.h (s390_gen_lowpart_subreg): Remove.
* config/s390/s390.cc (s390_gen_lowpart_subreg): Remove.
(s390_expand_insv): Use adjust_address() and emit a
strict_low_part only in case of a natural subreg.
* config/s390/s390.md: Use gen_lowpart() instead of
s390_gen_lowpart_subreg().

Diff:
---
 gcc/config/s390/s390-protos.h |  1 -
 gcc/config/s390/s390.cc   | 47 +--
 gcc/config/s390/s390.md   | 13 ++--
 3 files changed, 20 insertions(+), 41 deletions(-)

diff --git a/gcc/config/s390/s390-protos.h b/gcc/config/s390/s390-protos.h
index b4646ccb6060..e7ac59d17daa 100644
--- a/gcc/config/s390/s390-protos.h
+++ b/gcc/config/s390/s390-protos.h
@@ -50,7 +50,6 @@ extern void s390_set_has_landing_pad_p (bool);
 extern bool s390_hard_regno_rename_ok (unsigned int, unsigned int);
 extern int s390_class_max_nregs (enum reg_class, machine_mode);
 extern bool s390_return_addr_from_memory(void);
-extern rtx s390_gen_lowpart_subreg (machine_mode, rtx);
 extern bool s390_fma_allowed_p (machine_mode);
 #if S390_USE_TARGET_ATTRIBUTE
 extern tree s390_valid_target_attribute_tree (tree args,
diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc
index 47e1d5adfd99..c1649ca49bd1 100644
--- a/gcc/config/s390/s390.cc
+++ b/gcc/config/s390/s390.cc
@@ -516,31 +516,6 @@ s390_return_addr_from_memory ()
   return cfun_gpr_save_slot(RETURN_REGNUM) == SAVE_SLOT_STACK;
 }
 
-/* Generate a SUBREG for the MODE lowpart of EXPR.
-
-   In contrast to gen_lowpart it will always return a SUBREG
-   expression.  This is useful to generate STRICT_LOW_PART
-   expressions.  */
-rtx
-s390_gen_lowpart_subreg (machine_mode mode, rtx expr)
-{
-  rtx lowpart = gen_lowpart (mode, expr);
-
-  /* There might be no SUBREG in case it could be applied to the hard
- REG rtx or it could be folded with a paradoxical subreg.  Bring
- it back.  */
-  if (!SUBREG_P (lowpart))
-{
-  machine_mode reg_mode = TARGET_ZARCH ? DImode : SImode;
-  gcc_assert (REG_P (lowpart));
-  lowpart = gen_lowpart_SUBREG (mode,
-   gen_rtx_REG (reg_mode,
-REGNO (lowpart)));
-}
-
-  return lowpart;
-}
-
 /* Return nonzero if it's OK to use fused multiply-add for MODE.  */
 bool
 s390_fma_allowed_p (machine_mode mode)
@@ -7112,15 +7087,21 @@ s390_expand_insv (rtx dest, rtx op1, rtx op2, rtx src)
   /* Emit a strict_low_part pattern if possible.  */
   if (smode_bsize == bitsize && bitpos == mode_bsize - smode_bsize)
{
- rtx low_dest = s390_gen_lowpart_subreg (smode, dest);
- rtx low_src = gen_lowpart (smode, src);
-
- switch (smode)
+ rtx low_dest = gen_lowpart (smode, dest);
+ if (SUBREG_P (low_dest) && !paradoxical_subreg_p (low_dest))
{
-   case E_QImode: emit_insn (gen_movstrictqi (low_dest, low_src)); 
return true;
-   case E_HImode: emit_insn (gen_movstricthi (low_dest, low_src)); 
return true;
-   case E_SImode: emit_insn (gen_movstrictsi (low_dest, low_src)); 
return true;
-   default: break;
+ poly_int64 offset = GET_MODE_SIZE (mode) - GET_MODE_SIZE (smode);
+ rtx low_src = adjust_address (src, smode, offset);
+ switch (smode)
+   {
+   case E_QImode: emit_insn (gen_movst

[gcc r15-3603] c++: ICE with TTP [PR96097]

2024-09-12 Thread Marek Polacek via Gcc-cvs
https://gcc.gnu.org/g:25ac2bb57ae400621050a7e0845994336ca83b99

commit r15-3603-g25ac2bb57ae400621050a7e0845994336ca83b99
Author: Marek Polacek 
Date:   Tue Sep 3 17:01:48 2024 -0400

c++: ICE with TTP [PR96097]

We crash when dependent_type_p gets a TEMPLATE_TYPE_PARM outside
a template.  That happens here because in

  template  typename X>
  void func() {}
  template 
  struct Y {};
  void g() { func(); }

when performing overload resolution for func() we have to check
if U matches T and I matches TT.  So we wind up in
coerce_template_template_parm/PARM_DECL.  TREE_TYPE (arg) is int
so we try to substitute TT's type, which is T::type.  But we have
nothing to substitute T with.  And we call make_typename_type where
ctx is still T, which checks dependent_scope_p and we trip the assert.

It should work to always perform the substitution in a template context.
If the result still contains template parameters, we cannot say if they
match.

PR c++/96097

gcc/cp/ChangeLog:

* pt.cc (coerce_template_template_parm): Increment
processing_template_decl before calling tsubst.

gcc/testsuite/ChangeLog:

* g++.dg/template/ttp44.C: New test.

Diff:
---
 gcc/cp/pt.cc  |  2 ++
 gcc/testsuite/g++.dg/template/ttp44.C | 13 +
 2 files changed, 15 insertions(+)

diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc
index cb3164d49147..769e7999dac1 100644
--- a/gcc/cp/pt.cc
+++ b/gcc/cp/pt.cc
@@ -7951,7 +7951,9 @@ coerce_template_template_parm (tree parm, tree arg, 
tsubst_flags_t complain,
 i.e. the parameter list of TT depends on earlier parameters.  */
   if (!uses_template_parms (TREE_TYPE (arg)))
{
+ ++processing_template_decl;
  tree t = tsubst (TREE_TYPE (parm), outer_args, complain, in_decl);
+ --processing_template_decl;
  if (!uses_template_parms (t)
  && !same_type_p (t, TREE_TYPE (arg)))
return false;
diff --git a/gcc/testsuite/g++.dg/template/ttp44.C 
b/gcc/testsuite/g++.dg/template/ttp44.C
new file mode 100644
index ..2a4129752433
--- /dev/null
+++ b/gcc/testsuite/g++.dg/template/ttp44.C
@@ -0,0 +1,13 @@
+// PR c++/96097
+// { dg-do compile }
+
+template  class X>
+void func() {}
+
+template 
+struct Y {};
+
+void test()
+{
+  func();
+}


[gcc r15-3604] i386: Use offsetable address constraint for double-word memory operands, part 2

2024-09-12 Thread Uros Bizjak via Gcc-cvs
https://gcc.gnu.org/g:8c01976b8e34eaa2483ab37d1bd18ebc5c8ada95

commit r15-3604-g8c01976b8e34eaa2483ab37d1bd18ebc5c8ada95
Author: Uros Bizjak 
Date:   Thu Sep 12 16:28:10 2024 +0200

i386: Use offsetable address constraint for double-word memory operands, 
part 2

Double-word memory operands are accessed as their high and low part, so the
memory location has to be offsettable.  Use "o" constraint instead of "m"
for double-word memory operands.

gcc/ChangeLog:

* config/i386/i386.md (*insvti_lowpart_1): Use "o" constraint
instead of "m" for double-word mode memory operands.

Diff:
---
 gcc/config/i386/i386.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 8d269feee837..c04415149490 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -3685,7 +3685,7 @@
   [(set (match_operand:TI 0 "nonimmediate_operand" "=ro,r,r,&r")
(any_or_plus:TI
  (and:TI
-   (match_operand:TI 1 "nonimmediate_operand" "r,m,r,m")
+   (match_operand:TI 1 "nonimmediate_operand" "r,o,r,o")
(match_operand:TI 3 "const_scalar_int_operand" "n,n,n,n"))
  (zero_extend:TI
(match_operand:DI 2 "nonimmediate_operand" "r,r,m,m"]


[gcc r15-3605] Git ignores .vscode

2024-09-12 Thread YunQiang Su via Gcc-cvs
https://gcc.gnu.org/g:477f7e2808d38c3192894e0109e1f185ad86d3d7

commit r15-3605-g477f7e2808d38c3192894e0109e1f185ad86d3d7
Author: YunQiang Su 
Date:   Fri Sep 6 11:09:13 2024 +0800

Git ignores .vscode

ChangeLog
* .gitignore: Add .vscode.

Diff:
---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index 93a16b0b950c..f044fe16b5f6 100644
--- a/.gitignore
+++ b/.gitignore
@@ -38,6 +38,7 @@ cscope.out
 
 .local.vimrc
 .lvimrc
+.vscode
 
 .clang-format
 .clang-tidy


[gcc r15-3607] arm: testsuite: make use of -mcpu=unset/-march=unset

2024-09-12 Thread Richard Earnshaw via Gcc-cvs
https://gcc.gnu.org/g:9a94c8ffdc8b554a2d95e0101e96830efee58add

commit r15-3607-g9a94c8ffdc8b554a2d95e0101e96830efee58add
Author: Richard Earnshaw 
Date:   Thu Sep 12 14:24:55 2024 +0100

arm: testsuite: make use of -mcpu=unset/-march=unset

This patch makes use of the new ability to unset the CPU or
architecture flags on the command line to enable several more tests on
Arm.  It doesn't cover every case and it does enable some tests that
now fail for different reasons when the tests are no-longer skipped;
these were failing anyway for other testsuite configurations, so it's
still an overall improvement.

There's some restructuring required to fully implement this change: we
could previously treat Xscale as an architecture, even though the
option set -mcpu=, we now need to handle this correctly so that we
unset the architecture rather than the CPU.  To do this I've added a
new table for these variants and renamed the template functions to use
'cpu' rather than 'arch'.  This entailed updating the two XScale
related tests accordingly.

gcc/testsuite/ChangeLog:

* lib/target-supports.exp: Move xscale to new generator table.
(check_effective_target_arm_arch_FUNC_ok): Add -mcpu=unset to the
list of flags.
(add_options_for_arm_arch_FUNC): Likewise.
(check_effective_target_arm_cpu_FUNC_ok): New function.
(add_options_for_arm_cpu_FUNC): Likewise.
(check_effective_target_arm_cpu_FUNC_link): Likewise.
(check_effective_target_arm_cpu_FUNC_multilib): Likewise.
* gcc.target/arm/g2.c: Update dg directives.
* gcc.target/arm/scd42-2.c: Likewise.

Diff:
---
 gcc/testsuite/gcc.target/arm/g2.c  |  4 +--
 gcc/testsuite/gcc.target/arm/scd42-2.c |  4 +--
 gcc/testsuite/lib/target-supports.exp  | 59 --
 3 files changed, 60 insertions(+), 7 deletions(-)

diff --git a/gcc/testsuite/gcc.target/arm/g2.c 
b/gcc/testsuite/gcc.target/arm/g2.c
index 04334c97713b..7e43a907a4c5 100644
--- a/gcc/testsuite/gcc.target/arm/g2.c
+++ b/gcc/testsuite/gcc.target/arm/g2.c
@@ -1,8 +1,8 @@
 /* Verify that hardware multiply is preferred on XScale. */
 /* { dg-do compile } */
 /* { dg-options "-O2" } */
-/* { dg-require-effective-target arm_arch_xscale_arm_ok } */
-/* { dg-add-options arm_arch_xscale_arm } */
+/* { dg-require-effective-target arm_cpu_xscale_arm_ok } */
+/* { dg-add-options arm_cpu_xscale_arm } */
 
 
 /* Brett Gaines' test case. */
diff --git a/gcc/testsuite/gcc.target/arm/scd42-2.c 
b/gcc/testsuite/gcc.target/arm/scd42-2.c
index cd416885a804..a263c1fbff92 100644
--- a/gcc/testsuite/gcc.target/arm/scd42-2.c
+++ b/gcc/testsuite/gcc.target/arm/scd42-2.c
@@ -1,8 +1,8 @@
 /* Verify that mov is preferred on XScale for loading a 2 byte constant. */
 /* { dg-do compile } */
-/* { dg-require-effective-target arm_arch_xscale_arm_ok } */
+/* { dg-require-effective-target arm_cpu_xscale_arm_ok } */
 /* { dg-options "-O" } */
-/* { dg-add-options arm_arch_xscale_arm } */
+/* { dg-add-options arm_cpu_xscale_arm } */
 
 unsigned load2(void) __attribute__ ((naked));
 unsigned load2(void)
diff --git a/gcc/testsuite/lib/target-supports.exp 
b/gcc/testsuite/lib/target-supports.exp
index cb9971d53980..c4d2c33cf628 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -5679,6 +5679,9 @@ proc check_effective_target_arm_fp16_hw { } {
 # Usage: /* { dg-require-effective-target arm_arch_v5_ok } */
 #/* { dg-add-options arm_arch_v5t } */
 #   /* { dg-require-effective-target arm_arch_v5t_multilib } */
+
+# This table should only be used to set -march= (and associated
+# flags).  See below for setting -mcpu
 foreach { armfunc armflag armdefs } {
v4 "-march=armv4 -marm" __ARM_ARCH_4__
v4t "-march=armv4t -mfloat-abi=softfp" __ARM_ARCH_4T__
@@ -5690,7 +5693,6 @@ foreach { armfunc armflag armdefs } {
v5te "-march=armv5te+fp -mfloat-abi=softfp" __ARM_ARCH_5TE__
v5te_arm "-march=armv5te+fp -marm" "__ARM_ARCH_5TE__ && !__thumb__"
v5te_thumb "-march=armv5te+fp -mthumb -mfloat-abi=softfp" 
"__ARM_ARCH_5TE__ && __thumb__"
-   xscale_arm "-mcpu=xscale -mfloat-abi=soft -marm" "__XSCALE__ && 
!__thumb__"
v6 "-march=armv6+fp -mfloat-abi=softfp" __ARM_ARCH_6__
v6_arm "-march=armv6+fp -marm" "__ARM_ARCH_6__ && !__thumb__"
v6_thumb "-march=armv6+fp -mthumb -mfloat-abi=softfp" "__ARM_ARCH_6__ 
&& __thumb__"
@@ -5735,11 +5737,11 @@ foreach { armfunc armflag armdefs } {
{
return 0;
}
-   } "FLAG" ]
+   } "-mcpu=unset FLAG" ]
}
 
proc add_options_for_arm_arch_FUNC { flags } {
-   return "$flags FLAG"
+   return "$flags -mcpu=unset FLAG"
}
 
proc check_effective_target_arm_arch_FUNC_link { } {
@@ -5762,6 +5764,57 @@ for

[gcc r15-3606] arm: Allow -mcpu and -march options to be unset

2024-09-12 Thread Richard Earnshaw via Gcc-cvs
https://gcc.gnu.org/g:7d6c6a0d15c136a68d066c60da0f48265a2b1886

commit r15-3606-g7d6c6a0d15c136a68d066c60da0f48265a2b1886
Author: Richard Earnshaw 
Date:   Wed Sep 11 17:06:12 2024 +0100

arm: Allow -mcpu and -march options to be unset

The compiler will warn if the architectural specification derived from
a -mcpu option is not the same as that specified by -march.  This is
because it was never intended that the two should be used at the same
time: -mcpu= is supposed to be shorthand for -mtune=
-march=arch-of().

Unfortunately, there are times when the two options passed to the
compiler may come from distinct sources: one example is makefiles
which accumulate options; another is the testsuite itself, when some
tests require a particular architecture setting to be useful - only
running the tests when the compiler/testsuite configuration exactly
matched the requirements would make regression testing especially hard
(we have too many permutations).

So this patch allows a user to cancel any earlier setting of a
particular flag and to make the compiler behave as though it was never
passed.  The intended usecase is (sources of options are shown in
parenthesis, but that's just for grouping:

 (-march=armv7-a+simd) (-march=unset -mcpu=cortex-m33)

The option processing logic will now simplify this to:

 -mcpu=cortex-m33

A useful corollary of this is that

 -march=armv7-a -march=unset

will now cause the compiler to behave as though neither the
architecture nor the CPU was ever set and to default back to the
configure-time settings.

gcc/ChangeLog:

* config/arm/arm.h (OPTION_DEFAULT_SPECS): Allow -mcpu and -march
to be unset.
(ARCH_CPU_CLEANUP_SPECS): Likewise
(DRIVER_SELF_SPECS): Add ARCH_CPU_CLEANUP_SPECS
* doc/invoke.texi (arm: -mcpu= and -march=): Document use of 
'unset'.

Diff:
---
 gcc/config/arm/arm.h | 14 +++---
 gcc/doc/invoke.texi  | 12 
 2 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h
index 0cd5d733952d..b092ba6ffe01 100644
--- a/gcc/config/arm/arm.h
+++ b/gcc/config/arm/arm.h
@@ -394,9 +394,11 @@ emission of floating point pcs attributes.  */
TARGET_MODE_CHECK that also takes into account the selected CPU and
architecture.  */
 #define OPTION_DEFAULT_SPECS \
-  {"arch", "%{!march=*:%{!mcpu=*:-march=%(VALUE)}}" }, \
-  {"cpu", "%{!march=*:%{!mcpu=*:-mcpu=%(VALUE)}}" }, \
-  {"tune", "%{!mcpu=*:%{!mtune=*:-mtune=%(VALUE)}}" }, \
+  {"arch", "%{!march=*|march=unset:"\
+  "%{!mcpu=*|mcpu=unset:%

[gcc r15-3608] libcpp: adjust pedwarn handling

2024-09-12 Thread Jason Merrill via Gcc-cvs
https://gcc.gnu.org/g:c5009eb887910271ea35a857aa68941c7227b9c7

commit r15-3608-gc5009eb887910271ea35a857aa68941c7227b9c7
Author: Jason Merrill 
Date:   Tue Aug 27 13:15:16 2024 -0400

libcpp: adjust pedwarn handling

Using cpp_pedwarning (CPP_W_PEDANTIC instead of if (CPP_PEDANTIC cpp_error
lets users suppress these diagnostics with
 #pragma GCC diagnostic ignored "-Wpedantic".

This patch changes all instances of the cpp_error (CPP_DL_PEDWARN to
cpp_pedwarning.  In cases where the extension appears in a later C++
revision, we now condition the warning on the relevant -Wc++??-extensions
flag instead of -Wpedantic; in such cases often the if (CPP_PEDANTIC) check
is retained to preserve the default non-warning behavior.

I didn't attempt to adjust the warning flags for the C compiler, since it
seems to follow a different system than C++.

The CPP_PEDANTIC check is also kept in _cpp_lex_direct to avoid an ICE in
the self-tests from cb.diagnostics not being initialized.

While working on testcases for these changes I noticed that the c-c++-common
tests are not run with -pedantic-errors by default like the gcc.dg and
g++.dg directories are.  And if I specify -pedantic-errors with dg-options,
the default -std= changes from c++?? to gnu++??, which interferes with some
other pedwarns.  So two of the tests are C++-only.

libcpp/ChangeLog:

* include/cpplib.h (enum cpp_warning_reason): Add
CPP_W_CXX{14,17,20,23}_EXTENSIONS.
* charset.cc (_cpp_valid_ucn, convert_hex, convert_oct)
(convert_escape, narrow_str_to_charconst): Use cpp_pedwarning
instead of cpp_error for pedwarns.
* directives.cc (directive_diagnostics, _cpp_handle_directive)
(do_line, do_elif): Likewise.
* expr.cc (cpp_classify_number, eval_token): Likewise.
* lex.cc (skip_whitespace, maybe_va_opt_error)
(_cpp_lex_direct): Likewise.
* macro.cc (_cpp_arguments_ok): Likewise.
(replace_args): Use -Wvariadic-macros for pedwarn about
empty macro arguments.

gcc/c-family/ChangeLog:

* c.opt: Add CppReason for Wc++{14,17,20,23}-extensions.
* c-pragma.cc (handle_pragma_diagnostic_impl): Don't check
OPT_Wc__23_extensions.

gcc/testsuite/ChangeLog:

* c-c++-common/pragma-diag-17.c: New test.
* g++.dg/cpp0x/va-opt1.C: New test.
* g++.dg/cpp23/named-universal-char-escape3.C: New test.

Diff:
---
 gcc/c-family/c.opt |  8 +--
 libcpp/include/cpplib.h|  4 ++
 gcc/c-family/c-pragma.cc   |  2 -
 gcc/testsuite/c-c++-common/pragma-diag-17.c| 25 +++
 gcc/testsuite/g++.dg/cpp0x/va-opt1.C   | 18 +
 .../g++.dg/cpp23/named-universal-char-escape3.C| 19 ++
 libcpp/charset.cc  | 65 +-
 libcpp/directives.cc   | 78 --
 libcpp/expr.cc | 77 -
 libcpp/lex.cc  | 24 ---
 libcpp/macro.cc| 14 ++--
 11 files changed, 213 insertions(+), 121 deletions(-)

diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt
index 7d7445bd367c..ec23249c9592 100644
--- a/gcc/c-family/c.opt
+++ b/gcc/c-family/c.opt
@@ -497,19 +497,19 @@ C++ ObjC++ Var(warn_cxx11_extensions) Warning Init(1)
 Warn about C++11 constructs in code compiled with an older standard.
 
 Wc++14-extensions
-C++ ObjC++ Var(warn_cxx14_extensions) Warning Init(1)
+C++ ObjC++ Var(warn_cxx14_extensions) Warning Init(1) 
CppReason(CPP_W_CXX14_EXTENSIONS)
 Warn about C++14 constructs in code compiled with an older standard.
 
 Wc++17-extensions
-C++ ObjC++ Var(warn_cxx17_extensions) Warning Init(1)
+C++ ObjC++ Var(warn_cxx17_extensions) Warning Init(1) 
CppReason(CPP_W_CXX17_EXTENSIONS)
 Warn about C++17 constructs in code compiled with an older standard.
 
 Wc++20-extensions
-C++ ObjC++ Var(warn_cxx20_extensions) Warning Init(1)
+C++ ObjC++ Var(warn_cxx20_extensions) Warning Init(1) 
CppReason(CPP_W_CXX20_EXTENSIONS)
 Warn about C++20 constructs in code compiled with an older standard.
 
 Wc++23-extensions
-C++ ObjC++ Var(warn_cxx23_extensions) Warning Init(1)
+C++ ObjC++ Var(warn_cxx23_extensions) Warning Init(1) 
CppReason(CPP_W_CXX23_EXTENSIONS)
 Warn about C++23 constructs in code compiled with an older standard.
 
 Wc++26-extensions
diff --git a/libcpp/include/cpplib.h b/libcpp/include/cpplib.h
index 317e5430091b..76e2437e06a1 100644
--- a/libcpp/include/cpplib.h
+++ b/libcpp/include/cpplib.h
@@ -702,6 +702,10 @@ enum cpp_warning_reason {
   CPP_W_C11_C23_COMPAT,
   CPP_W_CXX11_COMPAT,
   CPP_W_CXX20_COMPAT,
+  CPP_W_CXX14_EXTENSIONS,
+  CPP_W_CXX17_

[gcc r15-3609] libcpp, v2: Add support for gnu::base64 #embed parameter

2024-09-12 Thread Jakub Jelinek via Gcc-cvs
https://gcc.gnu.org/g:ce0aecc7df1ff0be24c278dff5575ec28042ee58

commit r15-3609-gce0aecc7df1ff0be24c278dff5575ec28042ee58
Author: Jakub Jelinek 
Date:   Thu Sep 12 18:17:05 2024 +0200

libcpp, v2: Add support for gnu::base64 #embed parameter

This patch which adds another #embed extension, gnu::base64.

As mentioned in the documentation, this extension is primarily
intended for use by the preprocessor, so that for the larger (say 32+ or
64+ bytes long embeds it doesn't have to emit tens of thousands or
millions of comma separated string literals which would be very expensive
to parse again, but can emit
 #embed "." __gnu__::__base64__( \
 
"Tm9uIGVyYW0gbsOpc2NpdXMsIEJydXRlLCBjdW0sIHF1w6Ygc3VtbWlzIGluZ8OpbmlpcyBleHF1" \
 
"aXNpdMOhcXVlIGRvY3Ryw61uYSBwaGlsw7Nzb3BoaSBHcsOmY28gc2VybcOzbmUgdHJhY3RhdsOt" \
 
"c3NlbnQsIGVhIExhdMOtbmlzIGzDrXR0ZXJpcyBtYW5kYXLDqW11cywgZm9yZSB1dCBoaWMgbm9z" \
 
"dGVyIGxhYm9yIGluIHbDoXJpYXMgcmVwcmVoZW5zacOzbmVzIGluY8O6cnJlcmV0LiBuYW0gcXVp" \
 
"YsO6c2RhbSwgZXQgaWlzIHF1aWRlbSBub24gw6FkbW9kdW0gaW5kw7NjdGlzLCB0b3R1bSBob2Mg" \
 
"ZMOtc3BsaWNldCBwaGlsb3NvcGjDoXJpLiBxdWlkYW0gYXV0ZW0gbm9uIHRhbSBpZCByZXByZWjD" \
 
"qW5kdW50LCBzaSByZW3DrXNzaXVzIGFnw6F0dXIsIHNlZCB0YW50dW0gc3TDumRpdW0gdGFtcXVl" \
 
"IG11bHRhbSDDs3BlcmFtIHBvbsOpbmRhbSBpbiBlbyBub24gYXJiaXRyw6FudHVyLiBlcnVudCDD" \
 
"qXRpYW0sIGV0IGlpIHF1aWRlbSBlcnVkw610aSBHcsOmY2lzIGzDrXR0ZXJpcywgY29udGVtbsOp" \
 
"bnRlcyBMYXTDrW5hcywgcXVpIHNlIGRpY2FudCBpbiBHcsOmY2lzIGxlZ8OpbmRpcyDDs3BlcmFt" \
 
"IG1hbGxlIGNvbnPDum1lcmUuIHBvc3Ryw6ltbyDDoWxpcXVvcyBmdXTDunJvcyBzw7pzcGljb3Is" \
 
"IHF1aSBtZSBhZCDDoWxpYXMgbMOtdHRlcmFzIHZvY2VudCwgZ2VudXMgaG9jIHNjcmliw6luZGks" \
 
"IGV0c2kgc2l0IGVsw6lnYW5zLCBwZXJzw7Nuw6YgdGFtZW4gZXQgZGlnbml0w6F0aXMgZXNzZSBu" \
 "ZWdlbnQu")
with the meaning don't actually load some file, instead base64 decode
(RFC4648 with A-Za-z0-9+/ chars and = padding, no newlines in between)
the string and use that as data.  This is chosen because it should be
-pedantic-errors clean, fairly cheap to decode and then in optimizing
compiler could be handled as similar binary blob to normal #embed,
while the data isn't left somewhere on the disk, so distcc/ccache etc.
can move the preprocessed source without issues.
It makes no sense to support limit and gnu::offset parameters together
with it IMHO, why would somebody waste providing full data and then
threw some away?  prefix/suffix/if_empty are normally supported though,
but not intended to be used by the preprocessor.

This patch adds just the extension side, not the actual emitting of this
during -E or -E -fdirectives-only for now, that will be included in the
upcoming patch.

Compared to the earlier posted version of this extension, this patch
allows the string concatenation in the parameter argument (but still
doesn't allow escapes in the string, why would anyone use them when
only A-Za-z0-9+/= are valid).  The patch also adds support for parsing
this even in -fpreprocessed compilation.

2024-09-12  Jakub Jelinek  

libcpp/
* internal.h (struct cpp_embed_params): Add base64 member.
(_cpp_free_embed_params_tokens): Declare.
* directives.cc (DIRECTIVE_TABLE): Add IN_I flag to T_EMBED.
(save_token_for_embed, _cpp_free_embed_params_tokens): New 
functions.
(EMBED_PARAMS): Add gnu::base64 entry.
(_cpp_parse_embed_params): Parse gnu::base64 parameter.  If
-fpreprocessed without -fdirectives-only, require #embed to have
gnu::base64 parameter.  Diagnose conflict between gnu::base64 and
limit or gnu::offset parameters.
(do_embed): Use _cpp_free_embed_params_tokens.
* files.cc (finish_embed, base64_dec_fn): New functions.
(base64_dec): New array.
(B64D0, B64D1, B64D2, B64D3): Define.
(finish_base64_embed): New function.
(_cpp_stack_embed): Use finish_embed.  Handle params->base64
using finish_base64_embed.
* macro.cc (builtin_has_embed): Call _cpp_free_embed_params_tokens.
gcc/
* doc/cpp.texi (Binary Resource Inclusion): Document gnu::base64
parameter.
gcc/testsuite/
* c-c++-common/cpp/embed-17.c: New test.
* c-c++-common/cpp/embed-18.c: New test.
* c-c++-common/cpp/embed-19.c: New test.
* c-c++-common/cpp/embed-27.c: New test.
* gcc.dg/cpp/embed-6.c: New test.
* gcc.dg/cpp/embed-7.c: New test.

Diff:
---
 gcc/doc/cpp.texi  |  14 +-
 gcc/testsuite/c-c++-common/cpp/embed-17.c | 116 
 gcc/testsuite/c-c++-common/cpp/embed-18.c |  54 
 gcc/testsuite/c-c++-common/cpp/embed-19.c |   5 +
 gcc/testsuite/c-c++-common/cpp/embed-27.c |  64 +
 gcc/testsuite/gcc.dg/cpp/embed-6.c| 

[gcc r15-3610] c++: Disable deprecated/unavailable diagnostics when creating thunks for methods with such attribute

2024-09-12 Thread Jakub Jelinek via Gcc-cvs
https://gcc.gnu.org/g:4026d89d623e322920b052f7ac0d940ef267dc0f

commit r15-3610-g4026d89d623e322920b052f7ac0d940ef267dc0f
Author: Jakub Jelinek 
Date:   Thu Sep 12 18:22:21 2024 +0200

c++: Disable deprecated/unavailable diagnostics when creating thunks for 
methods with such attributes [PR116636]

On the following testcase, we emit false positive warnings/errors about 
using
the deprecated or unavailable methods when creating thunks for them, even
when nothing (in the testcase so far) actually used those.

The following patch temporarily disables that diagnostics when creating
the thunks.

2024-09-12  Jakub Jelinek  

PR c++/116636
* method.cc: Include decl.h.
(use_thunk): Temporarily change deprecated_state to
UNAVAILABLE_DEPRECATED_SUPPRESS.

* g++.dg/warn/deprecated-19.C: New test.

Diff:
---
 gcc/cp/method.cc  |  6 ++
 gcc/testsuite/g++.dg/warn/deprecated-19.C | 22 ++
 2 files changed, 28 insertions(+)

diff --git a/gcc/cp/method.cc b/gcc/cp/method.cc
index 68a776d2c5a6..21c06c744c9a 100644
--- a/gcc/cp/method.cc
+++ b/gcc/cp/method.cc
@@ -26,6 +26,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "coretypes.h"
 #include "target.h"
 #include "cp-tree.h"
+#include "decl.h"
 #include "stringpool.h"
 #include "cgraph.h"
 #include "varasm.h"
@@ -283,6 +284,11 @@ use_thunk (tree thunk_fndecl, bool emit_p)
   /* Thunks are always addressable; they only appear in vtables.  */
   TREE_ADDRESSABLE (thunk_fndecl) = 1;
 
+  /* Don't diagnose deprecated or unavailable functions just because they
+ have thunks emitted for them.  */
+  auto du = make_temp_override (deprecated_state,
+UNAVAILABLE_DEPRECATED_SUPPRESS);
+
   /* Figure out what function is being thunked to.  It's referenced in
  this translation unit.  */
   TREE_ADDRESSABLE (function) = 1;
diff --git a/gcc/testsuite/g++.dg/warn/deprecated-19.C 
b/gcc/testsuite/g++.dg/warn/deprecated-19.C
new file mode 100644
index ..561f1241e005
--- /dev/null
+++ b/gcc/testsuite/g++.dg/warn/deprecated-19.C
@@ -0,0 +1,22 @@
+// PR c++/116636
+// { dg-do compile }
+// { dg-options "-pedantic -Wdeprecated" }
+
+struct A {
+  virtual int foo () = 0;
+};
+struct B : virtual A {
+  [[deprecated]] int foo () { return 0; }  // { dg-message "declared here" 
}
+}; // { dg-warning "C\\\+\\\+11 
attributes only available with" "" { target c++98_only } .-1 }
+struct C : virtual A {
+  [[gnu::unavailable]] int foo () { return 0; }// { dg-message 
"declared here" }
+}; // { dg-warning "C\\\+\\\+11 
attributes only available with" "" { target c++98_only } .-1 }
+
+void
+bar ()
+{
+  B b;
+  b.foo ();// { dg-warning "'virtual int 
B::foo\\\(\\\)' is deprecated" }
+  C c;
+  c.foo ();// { dg-error "'virtual int 
C::foo\\\(\\\)' is unavailable" }
+}


[gcc r15-3611] c++: decltype(auto) deduction of statement-expression [PR116418]

2024-09-12 Thread Patrick Palka via Gcc-cvs
https://gcc.gnu.org/g:12bdcc3d7970860b9d66ed4dea203bde8fd68d4d

commit r15-3611-g12bdcc3d7970860b9d66ed4dea203bde8fd68d4d
Author: Patrick Palka 
Date:   Thu Sep 12 12:45:03 2024 -0400

c++: decltype(auto) deduction of statement-expression [PR116418]

r8-7538 for PR84968 made strip_typedefs_expr diagnose STATEMENT_LIST
so that we reject statement-expressions in noexcept-specifiers to
match our behavior in template arguments (which the parser diagnoses
directly).

Later r11-7452 made decltype(auto) deduction canonicalize the expression
(as an implementation detail) which in turn calls strip_typedefs_expr,
and so ever since we inadvertently reject decltype(auto) deduction of a
statement-expression.

This patch just removes the diagnostic in strip_typedefs_expr and instead
treats statement-expressions similar to lambda-expressions.  The function
doesn't seem like the right place for such a diagnostic and so it seems
easier to just accept rather than try to reject them in a suitable place.

PR c++/116418

gcc/cp/ChangeLog:

* tree.cc (strip_typedefs_expr) : Replace
this error path with ...
: ... this, returning the original tree.

gcc/testsuite/ChangeLog:

* g++.dg/eh/pr84968.C: No longer expect an ahead of time diagnostic
for the statement-expresssion.  Instantiate the template and expect
an incomplete type error instead.
* g++.dg/ext/stmtexpr26.C: New test.

Reviewed-by: Jason Merrill 

Diff:
---
 gcc/cp/tree.cc|  5 +
 gcc/testsuite/g++.dg/eh/pr84968.C |  4 +++-
 gcc/testsuite/g++.dg/ext/stmtexpr26.C | 10 ++
 3 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/gcc/cp/tree.cc b/gcc/cp/tree.cc
index c3a38de4f486..99088da9cee0 100644
--- a/gcc/cp/tree.cc
+++ b/gcc/cp/tree.cc
@@ -2009,12 +2009,9 @@ strip_typedefs_expr (tree t, bool *remove_attributes, 
unsigned int flags)
   }
 
 case LAMBDA_EXPR:
+case STMT_EXPR:
   return t;
 
-case STATEMENT_LIST:
-  error ("statement-expression in a constant expression");
-  return error_mark_node;
-
 default:
   break;
 }
diff --git a/gcc/testsuite/g++.dg/eh/pr84968.C 
b/gcc/testsuite/g++.dg/eh/pr84968.C
index 23c49f477a88..a6e21914eed1 100644
--- a/gcc/testsuite/g++.dg/eh/pr84968.C
+++ b/gcc/testsuite/g++.dg/eh/pr84968.C
@@ -9,7 +9,9 @@ struct S {
   void a()
 try {
 } catch (int ()
-noexcept (({ union b a; true; }))) // { dg-error "constant" }
+noexcept (({ union b a; true; }))) // { dg-error "'b a' has 
incomplete type" }
   {
   }
 };
+
+template void S::a(); // { dg-message "required from here" }
diff --git a/gcc/testsuite/g++.dg/ext/stmtexpr26.C 
b/gcc/testsuite/g++.dg/ext/stmtexpr26.C
new file mode 100644
index ..2250df550d48
--- /dev/null
+++ b/gcc/testsuite/g++.dg/ext/stmtexpr26.C
@@ -0,0 +1,10 @@
+// PR c++/116418
+// { dg-do compile { target c++14 } }
+// { dg-options "" }
+
+void foo ();
+template 
+void bar ()
+{
+  decltype(auto) v = ({ foo (); 3; });
+}


[gcc(refs/users/meissner/heads/work177-vpair)] Tweak vector-pair.h.

2024-09-12 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:919a4a544acd1d04950899c18cf7fe1e12e99471

commit 919a4a544acd1d04950899c18cf7fe1e12e99471
Author: Michael Meissner 
Date:   Thu Sep 12 13:26:30 2024 -0400

Tweak vector-pair.h.

2024-09-12  Michael Meissner  

* config/rs6000/vector-pair.h: If __VPAIR_ASM__ or __VPAIR_NOP10__ 
are
defined, don't enable using vector pair built-in functions.

Diff:
---
 gcc/config/rs6000/vector-pair.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/config/rs6000/vector-pair.h b/gcc/config/rs6000/vector-pair.h
index ad42e246c083..ebfaaa1e8a0c 100644
--- a/gcc/config/rs6000/vector-pair.h
+++ b/gcc/config/rs6000/vector-pair.h
@@ -80,7 +80,7 @@
 #endif
 
 /* Do we have MMA support and the vector pair built-in function?  */
-#if __VPAIR_BUILTIN__
+#if __VPAIR_BUILTIN__ && !__VPAIR_ASM__ && !__VPAIR_NOP10__
 #define vector_pair_t  __vector_pair
 #define vector_pair_f64_t  __vector_pair
 #define vector_pair_f32_t  __vector_pair


[gcc(refs/users/meissner/heads/work177-vpair)] Update ChangeLog.*

2024-09-12 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:d51223b3f3ddaa4fe4d3c221864380a281cfedad

commit d51223b3f3ddaa4fe4d3c221864380a281cfedad
Author: Michael Meissner 
Date:   Thu Sep 12 13:27:58 2024 -0400

Update ChangeLog.*

Diff:
---
 gcc/ChangeLog.vpair | 9 +
 1 file changed, 9 insertions(+)

diff --git a/gcc/ChangeLog.vpair b/gcc/ChangeLog.vpair
index b10bf8549387..e7e29a602fa0 100644
--- a/gcc/ChangeLog.vpair
+++ b/gcc/ChangeLog.vpair
@@ -2,6 +2,15 @@
 
 Tweak vector-pair.h.
 
+2024-09-12  Michael Meissner  
+
+   * config/rs6000/vector-pair.h: If __VPAIR_ASM__ or __VPAIR_NOP10__ are
+   defined, don't enable using vector pair built-in functions.
+
+ Branch work177-vpair, patch #406 
+
+Tweak vector-pair.h.
+
 2024-09-04  Michael Meissner  
 
* config/rs6000/vector-pair.h: Add fixes.


[gcc(refs/users/meissner/heads/work177-vpair)] Update ChangeLog.*

2024-09-12 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:cbbf4998cdc92374928e71a186a97f52bd824ef3

commit cbbf4998cdc92374928e71a186a97f52bd824ef3
Author: Michael Meissner 
Date:   Thu Sep 12 13:30:42 2024 -0400

Update ChangeLog.*

Diff:
---
 gcc/ChangeLog.vpair | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/ChangeLog.vpair b/gcc/ChangeLog.vpair
index e7e29a602fa0..bded7a95834e 100644
--- a/gcc/ChangeLog.vpair
+++ b/gcc/ChangeLog.vpair
@@ -1,4 +1,4 @@
- Branch work177-vpair, patch #406 
+ Branch work177-vpair, patch #407 
 
 Tweak vector-pair.h.


[gcc r14-10665] libiberty: Fix up > 64K section handling in simple_object_elf_copy_lto_debug_section [PR116614]

2024-09-12 Thread Jakub Jelinek via Gcc-cvs
https://gcc.gnu.org/g:c9fd43a8df0e30109794e2480e2d8d05d00763c0

commit r14-10665-gc9fd43a8df0e30109794e2480e2d8d05d00763c0
Author: Jakub Jelinek 
Date:   Sat Sep 7 09:36:53 2024 +0200

libiberty: Fix up > 64K section handling in 
simple_object_elf_copy_lto_debug_section [PR116614]

cat abc.C
  #define A(n) struct T##n {} t##n;
  #define B(n) A(n##0) A(n##1) A(n##2) A(n##3) A(n##4) A(n##5) A(n##6) 
A(n##7) A(n##8) A(n##9)
  #define C(n) B(n##0) B(n##1) B(n##2) B(n##3) B(n##4) B(n##5) B(n##6) 
B(n##7) B(n##8) B(n##9)
  #define D(n) C(n##0) C(n##1) C(n##2) C(n##3) C(n##4) C(n##5) C(n##6) 
C(n##7) C(n##8) C(n##9)
  #define E(n) D(n##0) D(n##1) D(n##2) D(n##3) D(n##4) D(n##5) D(n##6) 
D(n##7) D(n##8) D(n##9)
  E(1) E(2) E(3)
  int main () { return 0; }
./xg++ -B ./ -o abc{.o,.C} -flto -flto-partition=1to1 -O2 -g 
-fdebug-types-section -c
./xgcc -B ./ -o abc{,.o} -flto -flto-partition=1to1 -O2
(not included in testsuite as it takes a while to compile) FAILs with
lto-wrapper: fatal error: Too many copied sections: Operation not supported
compilation terminated.
/usr/bin/ld: error: lto-wrapper failed
collect2: error: ld returned 1 exit status

The following patch fixes that.  Most of the 64K+ section support for
reading and writing was already there years ago (and especially reading used
quite often already) and a further bug fixed in it in the PR104617 fix.

Yet, the fix isn't solely about removing the
  if (new_i - 1 >= SHN_LORESERVE)
{
  *err = ENOTSUP;
  return "Too many copied sections";
}
5 lines, the missing part was that the function only handled reading of
the .symtab_shndx section but not copying/updating of it.
If the result has less than 64K-epsilon sections, that actually wasn't
needed, but e.g. with -fdebug-types-section one can exceed that pretty
easily (reported to us on WebKitGtk build on ppc64le).
Updating the section is slightly more complicated, because it basically
needs to be done in lock step with updating the .symtab section, if one
doesn't need to use SHN_XINDEX in there, the section should (or should be
updated to) contain SHN_UNDEF entry, otherwise needs to have whatever would
be overwise stored but couldn't fit.  But repeating due to that all the
symtab decisions what to discard and how to rewrite it would be ugly.

So, the patch instead emits the .symtab_shndx section (or sections) last
and prepares the content during the .symtab processing and in a second
pass when going just through .symtab_shndx sections just uses the saved
content.

2024-09-07  Jakub Jelinek  

PR lto/116614
* simple-object-elf.c (SHN_COMMON): Align comment with neighbouring
comments.
(SHN_HIRESERVE): Use uppercase hex digits instead of lowercase for
consistency.
(simple_object_elf_find_sections): Formatting fixes.
(simple_object_elf_fetch_attributes): Likewise.
(simple_object_elf_attributes_merge): Likewise.
(simple_object_elf_start_write): Likewise.
(simple_object_elf_write_ehdr): Likewise.
(simple_object_elf_write_shdr): Likewise.
(simple_object_elf_write_to_file): Likewise.
(simple_object_elf_copy_lto_debug_section): Likewise.  Don't fail 
for
new_i - 1 >= SHN_LORESERVE, instead arrange in that case to copy
over .symtab_shndx sections, though emit those last and compute 
their
section content when processing associated .symtab sections.  Handle
simple_object_internal_read failure even in the .symtab_shndx 
reading
case.

(cherry picked from commit bb8dd0980b39cfd601f88703fd356055727ef24d)

Diff:
---
 libiberty/simple-object-elf.c | 210 --
 1 file changed, 143 insertions(+), 67 deletions(-)

diff --git a/libiberty/simple-object-elf.c b/libiberty/simple-object-elf.c
index c09c216656c2..5e95297b2fc1 100644
--- a/libiberty/simple-object-elf.c
+++ b/libiberty/simple-object-elf.c
@@ -128,9 +128,9 @@ typedef struct {
 
 #define SHN_UNDEF  0   /* Undefined section */
 #define SHN_LORESERVE  0xFF00  /* Begin range of reserved indices */
-#define SHN_COMMON 0xFFF2  /* Associated symbol is in common */
+#define SHN_COMMON 0xFFF2  /* Associated symbol is in common */
 #define SHN_XINDEX 0x  /* Section index is held elsewhere */
-#define SHN_HIRESERVE  0x  /* End of reserved indices */
+#define SHN_HIRESERVE  0x  /* End of reserved indices */
 
 
 /* 32-bit ELF program header.  */
@@ -569,8 +569,8 @@ simple_object_elf_find_sections (simple_object_read *sobj,
 void *data,
 int *err)
 {
-  struct simple_object_elf_read *eor =

[gcc r14-10666] c++: Fix get_member_function_from_ptrfunc with -fsanitize=bounds [PR116449]

2024-09-12 Thread Jakub Jelinek via Gcc-cvs
https://gcc.gnu.org/g:90a9c36dc3ba341cf662ba1d60c939027487fe9a

commit r14-10666-g90a9c36dc3ba341cf662ba1d60c939027487fe9a
Author: Jakub Jelinek 
Date:   Tue Sep 10 18:32:58 2024 +0200

c++: Fix get_member_function_from_ptrfunc with -fsanitize=bounds [PR116449]

The following testcase is miscompiled, because
get_member_function_from_ptrfunc
emits something like
(((FUNCTION.__pfn & 1) != 0)
 ? ptr + FUNCTION.__delta + FUNCTION.__pfn - 1
 : FUNCTION.__pfn) (ptr + FUNCTION.__delta, ...)
or so, so FUNCTION tree is used there 5 times.  There is
if (TREE_SIDE_EFFECTS (function)) function = save_expr (function);
but in this case function doesn't have side-effects, just nested ARRAY_REFs.
Now, if all the FUNCTION trees would be shared, it would work fine,
FUNCTION is evaluated in the first operand of COND_EXPR; but unfortunately
that isn't the case, both the BIT_AND_EXPR shortening and conversion to
bool done for build_conditional_expr actually unshare_expr that first
expression, but none of the other 4 are unshared.  With -fsanitize=bounds,
.UBSAN_BOUNDS calls are added to the ARRAY_REFs and use save_expr to avoid
evaluating the argument multiple times, but because that FUNCTION tree is
first used in the second argument of COND_EXPR (i.e. conditionally), the
SAVE_EXPR initialization is done just there and then the third argument
of COND_EXPR just uses the uninitialized temporary and so does the first
argument computation as well.

The following patch fixes that by doing save_expr even if 
!TREE_SIDE_EFFECTS,
but to avoid doing that too often only if !nonvirtual and if the expression
isn't a simple decl.

2024-09-10  Jakub Jelinek  

PR c++/116449
* typeck.cc (get_member_function_from_ptrfunc): Use save_expr
on instance_ptr and function even if it doesn't have side-effects,
as long as it isn't a decl.

* g++.dg/ubsan/pr116449.C: New test.

(cherry picked from commit 0008050b9d6046ba4e811a03b406fb5d98707cae)

Diff:
---
 gcc/cp/typeck.cc  | 19 ---
 gcc/testsuite/g++.dg/ubsan/pr116449.C | 14 ++
 2 files changed, 30 insertions(+), 3 deletions(-)

diff --git a/gcc/cp/typeck.cc b/gcc/cp/typeck.cc
index 21436f836fa6..3d62943f8647 100644
--- a/gcc/cp/typeck.cc
+++ b/gcc/cp/typeck.cc
@@ -4175,10 +4175,23 @@ get_member_function_from_ptrfunc (tree 
*instance_ptrptr, tree function,
   if (!nonvirtual && is_dummy_object (instance_ptr))
nonvirtual = true;
 
-  if (TREE_SIDE_EFFECTS (instance_ptr))
-   instance_ptr = instance_save_expr = save_expr (instance_ptr);
+  /* Use save_expr even when instance_ptr doesn't have side-effects,
+unless it is a simple decl (save_expr won't do anything on
+constants), so that we don't ubsan instrument the expression
+multiple times.  See PR116449.  */
+  if (TREE_SIDE_EFFECTS (instance_ptr)
+ || (!nonvirtual && !DECL_P (instance_ptr)))
+   {
+ instance_save_expr = save_expr (instance_ptr);
+ if (instance_save_expr == instance_ptr)
+   instance_save_expr = NULL_TREE;
+ else
+   instance_ptr = instance_save_expr;
+   }
 
-  if (TREE_SIDE_EFFECTS (function))
+  /* See above comment.  */
+  if (TREE_SIDE_EFFECTS (function)
+ || (!nonvirtual && !DECL_P (function)))
function = save_expr (function);
 
   /* Start by extracting all the information from the PMF itself.  */
diff --git a/gcc/testsuite/g++.dg/ubsan/pr116449.C 
b/gcc/testsuite/g++.dg/ubsan/pr116449.C
new file mode 100644
index ..f13368a51b00
--- /dev/null
+++ b/gcc/testsuite/g++.dg/ubsan/pr116449.C
@@ -0,0 +1,14 @@
+// PR c++/116449
+// { dg-do compile }
+// { dg-options "-O2 -Wall -fsanitize=undefined" }
+
+struct C { void foo (int); void bar (); int c[16]; };
+typedef void (C::*P) ();
+struct D { P d; };
+static D e[1] = { { &C::bar } };
+
+void
+C::foo (int x)
+{
+  (this->*e[c[x]].d) ();
+}


[gcc r14-10667] c++: Disable deprecated/unavailable diagnostics when creating thunks for methods with such attribute

2024-09-12 Thread Jakub Jelinek via Gcc-cvs
https://gcc.gnu.org/g:5609246b561ab929b24eeb32965911884b58b0df

commit r14-10667-g5609246b561ab929b24eeb32965911884b58b0df
Author: Jakub Jelinek 
Date:   Thu Sep 12 18:22:21 2024 +0200

c++: Disable deprecated/unavailable diagnostics when creating thunks for 
methods with such attributes [PR116636]

On the following testcase, we emit false positive warnings/errors about 
using
the deprecated or unavailable methods when creating thunks for them, even
when nothing (in the testcase so far) actually used those.

The following patch temporarily disables that diagnostics when creating
the thunks.

2024-09-12  Jakub Jelinek  

PR c++/116636
* method.cc: Include decl.h.
(use_thunk): Temporarily change deprecated_state to
UNAVAILABLE_DEPRECATED_SUPPRESS.

* g++.dg/warn/deprecated-19.C: New test.

(cherry picked from commit 4026d89d623e322920b052f7ac0d940ef267dc0f)

Diff:
---
 gcc/cp/method.cc  |  6 ++
 gcc/testsuite/g++.dg/warn/deprecated-19.C | 22 ++
 2 files changed, 28 insertions(+)

diff --git a/gcc/cp/method.cc b/gcc/cp/method.cc
index 08a3d34fb016..a2ca83ce354c 100644
--- a/gcc/cp/method.cc
+++ b/gcc/cp/method.cc
@@ -26,6 +26,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "coretypes.h"
 #include "target.h"
 #include "cp-tree.h"
+#include "decl.h"
 #include "stringpool.h"
 #include "cgraph.h"
 #include "varasm.h"
@@ -283,6 +284,11 @@ use_thunk (tree thunk_fndecl, bool emit_p)
   /* Thunks are always addressable; they only appear in vtables.  */
   TREE_ADDRESSABLE (thunk_fndecl) = 1;
 
+  /* Don't diagnose deprecated or unavailable functions just because they
+ have thunks emitted for them.  */
+  auto du = make_temp_override (deprecated_state,
+UNAVAILABLE_DEPRECATED_SUPPRESS);
+
   /* Figure out what function is being thunked to.  It's referenced in
  this translation unit.  */
   TREE_ADDRESSABLE (function) = 1;
diff --git a/gcc/testsuite/g++.dg/warn/deprecated-19.C 
b/gcc/testsuite/g++.dg/warn/deprecated-19.C
new file mode 100644
index ..e49af4f74d0d
--- /dev/null
+++ b/gcc/testsuite/g++.dg/warn/deprecated-19.C
@@ -0,0 +1,22 @@
+// PR c++/116636
+// { dg-do compile { target c++11 } }
+// { dg-options "-pedantic -Wdeprecated" }
+
+struct A {
+  virtual int foo () = 0;
+};
+struct B : virtual A {
+  [[deprecated]] int foo () { return 0; }  // { dg-message "declared here" 
}
+};
+struct C : virtual A {
+  [[gnu::unavailable]] int foo () { return 0; }// { dg-message 
"declared here" }
+};
+
+void
+bar ()
+{
+  B b;
+  b.foo ();// { dg-warning "'virtual int 
B::foo\\\(\\\)' is deprecated" }
+  C c;
+  c.foo ();// { dg-error "'virtual int 
C::foo\\\(\\\)' is unavailable" }
+}


[gcc/redhat/heads/gcc-14-branch] (22 commits) Merge commit 'r14-10667-g5609246b561ab929b24eeb32965911884b

2024-09-12 Thread Jakub Jelinek via Gcc-cvs
The branch 'redhat/heads/gcc-14-branch' was updated to point to:

 c7a1c1a4bf73... Merge commit 'r14-10667-g5609246b561ab929b24eeb32965911884b

It previously pointed to:

 b30927153ae4... Merge commit 'r14-10646-g24909512101d59807f6d23a9963d64390e

Diff:

Summary of changes (added commits):
---

  c7a1c1a... Merge commit 'r14-10667-g5609246b561ab929b24eeb32965911884b
  5609246... c++: Disable deprecated/unavailable diagnostics when creati (*)
  90a9c36... c++: Fix get_member_function_from_ptrfunc with -fsanitize=b (*)
  c9fd43a... libiberty: Fix up > 64K section handling in simple_object_e (*)
  78a08bf... Daily bump. (*)
  2003f89... libstdc++: Only use std::ios_base_library_init() for ELF [P (*)
  d5d6d3f... libstdc++: std::string move assignment should not use POCCA (*)
  f0f00c4... Daily bump. (*)
  ab884ff... libstdc++: Fix std::chrono::tzdb to work with vanguard form (*)
  8a0f0fc... Daily bump. (*)
  3951efe... doc: Enhance Intel CPU documentation (*)
  9366940... Daily bump. (*)
  149d87f... c++: c->B::m access resolved through current inst [PR116320 (*)
  b5ed381... c++: inherited CTAD fixes [PR116276] (*)
  140aab2... libstdc++: use concrete return type for std::forward_like (*)
  7e0649a... Daily bump. (*)
  0c80216... c++: template depth of lambda in default targ [PR116567] (*)
  1e79541... Daily bump. (*)
  d4d7c4e... Update gcc uk.po (*)
  aedf6f8... Daily bump. (*)
  fe66863... c++: vtable referring to "unavailable" virtual fn [PR116606 (*)
  6abedee... ipa: Don't disable function parameter analysis for fat LTO (*)

(*) This commit already exists in another branch.
Because the reference `refs/vendors/redhat/heads/gcc-14-branch' matches
your hooks.email-new-commits-only configuration,
no separate email is sent for this commit.


[gcc(refs/vendors/redhat/heads/gcc-14-branch)] Merge commit 'r14-10667-g5609246b561ab929b24eeb32965911884b58b0df' into redhat/gcc-14-branch

2024-09-12 Thread Jakub Jelinek via Libstdc++-cvs
https://gcc.gnu.org/g:c7a1c1a4bf73b3cb4943c428085fe5cbb433cde4

commit c7a1c1a4bf73b3cb4943c428085fe5cbb433cde4
Merge: b30927153ae4 5609246b561a
Author: Jakub Jelinek 
Date:   Thu Sep 12 20:28:59 2024 +0200

Merge commit 'r14-10667-g5609246b561ab929b24eeb32965911884b58b0df' into 
redhat/gcc-14-branch

Diff:

 gcc/ChangeLog  |   35 +
 gcc/DATESTAMP  |2 +-
 gcc/cp/ChangeLog   |   46 +
 gcc/cp/call.cc |   27 +-
 gcc/cp/cp-tree.h   |8 +-
 gcc/cp/decl2.cc|3 +-
 gcc/cp/method.cc   |6 +
 gcc/cp/pt.cc   |   55 +-
 gcc/cp/semantics.cc|   11 +-
 gcc/cp/typeck.cc   |   19 +-
 gcc/doc/invoke.texi|   25 +-
 gcc/ipa-modref.cc  |4 +-
 gcc/po/ChangeLog   |4 +
 gcc/po/uk.po   | 3876 
 gcc/testsuite/ChangeLog|   46 +
 .../g++.dg/cpp23/class-deduction-inherited4.C  |4 +-
 .../g++.dg/cpp23/class-deduction-inherited5.C  |   25 +
 .../g++.dg/cpp23/class-deduction-inherited6.C  |   46 +
 gcc/testsuite/g++.dg/cpp2a/lambda-targ7.C  |   42 +
 gcc/testsuite/g++.dg/ext/attr-unavailable-13.C |8 +
 gcc/testsuite/g++.dg/template/access42.C   |   17 +
 gcc/testsuite/g++.dg/ubsan/pr116449.C  |   14 +
 gcc/testsuite/g++.dg/warn/deprecated-19.C  |   22 +
 libiberty/simple-object-elf.c  |  210 +-
 libsanitizer/ChangeLog |9 +
 libstdc++-v3/ChangeLog |   53 +
 libstdc++-v3/include/bits/basic_string.h   |2 +-
 libstdc++-v3/include/bits/move.h   |   47 +-
 libstdc++-v3/include/std/iostream  |2 +-
 libstdc++-v3/src/c++20/tzdb.cc |  265 +-
 libstdc++-v3/src/c++98/ios_init.cc |2 +-
 .../testsuite/20_util/forward_like/2_neg.cc|6 +-
 .../21_strings/basic_string/allocator/116641.cc|   53 +
 .../std/time/time_zone/sys_info_abbrev.cc  |  106 +
 libstdc++-v3/testsuite/std/time/tzdb/1.cc  |6 +-
 35 files changed, 2449 insertions(+), 2657 deletions(-)


[gcc r15-3612] i386: Implement SAT_ADD for signed vector integers

2024-09-12 Thread Uros Bizjak via Gcc-cvs
https://gcc.gnu.org/g:19d751601d012bbe31512d26f968e75873a408ab

commit r15-3612-g19d751601d012bbe31512d26f968e75873a408ab
Author: Uros Bizjak 
Date:   Thu Sep 12 20:34:28 2024 +0200

i386: Implement SAT_ADD for signed vector integers

Enable V4QI, V2QI and V2HI mode signed saturated arithmetic insn patterns
and add a couple of testcases to test for PADDSB and PADDSW instructions.

PR target/112600

gcc/ChangeLog:

* config/i386/mmx.md (3): Rename
from *3.

gcc/testsuite/ChangeLog:

* gcc.target/i386/pr112600-3a.c: New test.
* gcc.target/i386/pr112600-3b.c: New test.

Diff:
---
 gcc/config/i386/mmx.md  |  2 +-
 gcc/testsuite/gcc.target/i386/pr112600-3a.c | 25 +
 gcc/testsuite/gcc.target/i386/pr112600-3b.c | 25 +
 3 files changed, 51 insertions(+), 1 deletion(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 2f8d958dd5f0..e88a06c441fa 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -3218,7 +3218,7 @@
(set_attr "type" "mmxadd,sseadd,sseadd")
(set_attr "mode" "DI,TI,TI")])
 
-(define_insn "*3"
+(define_insn "3"
   [(set (match_operand:VI_16_32 0 "register_operand" "=x,Yw")
 (sat_plusminus:VI_16_32
  (match_operand:VI_16_32 1 "register_operand" "0,Yw")
diff --git a/gcc/testsuite/gcc.target/i386/pr112600-3a.c 
b/gcc/testsuite/gcc.target/i386/pr112600-3a.c
new file mode 100644
index ..0c38659643da
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr112600-3a.c
@@ -0,0 +1,25 @@
+/* PR middle-end/112600 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -msse2" } */
+
+#define MIN -128
+#define MAX 127
+
+typedef char T;
+typedef unsigned char UT;
+
+void foo (T *out, T *op_1, T *op_2, int n)
+{
+  int i;
+
+  for (i = 0; i < n; i++)
+{
+  T x = op_1[i];
+  T y = op_2[i];
+  T sum = (UT) x + (UT) y;
+
+  out[i] = (x ^ y) < 0 ? sum : (sum ^ x) >= 0 ? sum : x < 0 ? MIN : MAX;
+}
+}
+
+/* { dg-final { scan-assembler "paddsb" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr112600-3b.c 
b/gcc/testsuite/gcc.target/i386/pr112600-3b.c
new file mode 100644
index ..746c422ceb94
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr112600-3b.c
@@ -0,0 +1,25 @@
+/* PR middle-end/112600 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -msse2" } */
+
+#define MIN -32768
+#define MAX 32767
+
+typedef short T;
+typedef unsigned short UT;
+
+void foo (T *out, T *op_1, T *op_2, int n)
+{
+  int i;
+
+  for (i = 0; i < n; i++)
+{
+  T x = op_1[i];
+  T y = op_2[i];
+  T sum = (UT) x + (UT) y;
+
+  out[i] = (x ^ y) < 0 ? sum : (sum ^ x) >= 0 ? sum : x < 0 ? MIN : MAX;
+}
+}
+
+/* { dg-final { scan-assembler "paddsw" } } */


[gcc r14-10668] c++: ICE with TTP [PR96097]

2024-09-12 Thread Marek Polacek via Gcc-cvs
https://gcc.gnu.org/g:4088319e7ce98f813988a36f98a18ee30ff7f1d8

commit r14-10668-g4088319e7ce98f813988a36f98a18ee30ff7f1d8
Author: Marek Polacek 
Date:   Tue Sep 3 17:01:48 2024 -0400

c++: ICE with TTP [PR96097]

We crash when dependent_type_p gets a TEMPLATE_TYPE_PARM outside
a template.  That happens here because in

  template  typename X>
  void func() {}
  template 
  struct Y {};
  void g() { func(); }

when performing overload resolution for func() we have to check
if U matches T and I matches TT.  So we wind up in
coerce_template_template_parm/PARM_DECL.  TREE_TYPE (arg) is int
so we try to substitute TT's type, which is T::type.  But we have
nothing to substitute T with.  And we call make_typename_type where
ctx is still T, which checks dependent_scope_p and we trip the assert.

It should work to always perform the substitution in a template context.
If the result still contains template parameters, we cannot say if they
match.

PR c++/96097

gcc/cp/ChangeLog:

* pt.cc (coerce_template_template_parm): Increment
processing_template_decl before calling tsubst.

gcc/testsuite/ChangeLog:

* g++.dg/template/ttp44.C: New test.

(cherry picked from commit 25ac2bb57ae400621050a7e0845994336ca83b99)

Diff:
---
 gcc/cp/pt.cc  |  2 ++
 gcc/testsuite/g++.dg/template/ttp44.C | 13 +
 2 files changed, 15 insertions(+)

diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc
index 6695e4ff49f0..85228b9f9435 100644
--- a/gcc/cp/pt.cc
+++ b/gcc/cp/pt.cc
@@ -7933,7 +7933,9 @@ coerce_template_template_parm (tree parm,
 i.e. the parameter list of TT depends on earlier parameters.  */
   if (!uses_template_parms (TREE_TYPE (arg)))
{
+ ++processing_template_decl;
  tree t = tsubst (TREE_TYPE (parm), outer_args, complain, in_decl);
+ --processing_template_decl;
  if (!uses_template_parms (t)
  && !same_type_p (t, TREE_TYPE (arg)))
return 0;
diff --git a/gcc/testsuite/g++.dg/template/ttp44.C 
b/gcc/testsuite/g++.dg/template/ttp44.C
new file mode 100644
index ..2a4129752433
--- /dev/null
+++ b/gcc/testsuite/g++.dg/template/ttp44.C
@@ -0,0 +1,13 @@
+// PR c++/96097
+// { dg-do compile }
+
+template  class X>
+void func() {}
+
+template 
+struct Y {};
+
+void test()
+{
+  func();
+}


[gcc] Created branch 'meissner/heads/work178' in namespace 'refs/users'

2024-09-12 Thread Michael Meissner via Gcc-cvs
The branch 'meissner/heads/work178' was created in namespace 'refs/users' 
pointing to:

 19d751601d01... i386: Implement SAT_ADD for signed vector integers


[gcc(refs/users/meissner/heads/work178)] Add ChangeLog.meissner and REVISION.

2024-09-12 Thread Michael Meissner via Libstdc++-cvs
https://gcc.gnu.org/g:56d2af4368070f672be1757118db04c61de9d7a3

commit 56d2af4368070f672be1757118db04c61de9d7a3
Author: Michael Meissner 
Date:   Thu Sep 12 15:25:57 2024 -0400

Add ChangeLog.meissner and REVISION.

2024-09-12  Michael Meissner  

gcc/

* REVISION: New file for branch.
* ChangeLog.meissner: New file.

gcc/c-family/

* ChangeLog.meissner: New file.

gcc/c/

* ChangeLog.meissner: New file.

gcc/cp/

* ChangeLog.meissner: New file.

gcc/fortran/

* ChangeLog.meissner: New file.

gcc/testsuite/

* ChangeLog.meissner: New file.

libgcc/

* ChangeLog.meissner: New file.

Diff:
---
 gcc/ChangeLog.meissner   | 6 ++
 gcc/REVISION | 1 +
 gcc/c-family/ChangeLog.meissner  | 6 ++
 gcc/c/ChangeLog.meissner | 6 ++
 gcc/cp/ChangeLog.meissner| 6 ++
 gcc/fortran/ChangeLog.meissner   | 6 ++
 gcc/testsuite/ChangeLog.meissner | 6 ++
 libgcc/ChangeLog.meissner| 6 ++
 libstdc++-v3/ChangeLog.meissner  | 6 ++
 9 files changed, 49 insertions(+)

diff --git a/gcc/ChangeLog.meissner b/gcc/ChangeLog.meissner
new file mode 100644
index ..a05760a5b9f9
--- /dev/null
+++ b/gcc/ChangeLog.meissner
@@ -0,0 +1,6 @@
+ Branch work178, baseline 
+
+2024-09-12   Michael Meissner  
+
+   Clone branch
+
diff --git a/gcc/REVISION b/gcc/REVISION
new file mode 100644
index ..f0e6035292ab
--- /dev/null
+++ b/gcc/REVISION
@@ -0,0 +1 @@
+work178 branch
diff --git a/gcc/c-family/ChangeLog.meissner b/gcc/c-family/ChangeLog.meissner
new file mode 100644
index ..a05760a5b9f9
--- /dev/null
+++ b/gcc/c-family/ChangeLog.meissner
@@ -0,0 +1,6 @@
+ Branch work178, baseline 
+
+2024-09-12   Michael Meissner  
+
+   Clone branch
+
diff --git a/gcc/c/ChangeLog.meissner b/gcc/c/ChangeLog.meissner
new file mode 100644
index ..a05760a5b9f9
--- /dev/null
+++ b/gcc/c/ChangeLog.meissner
@@ -0,0 +1,6 @@
+ Branch work178, baseline 
+
+2024-09-12   Michael Meissner  
+
+   Clone branch
+
diff --git a/gcc/cp/ChangeLog.meissner b/gcc/cp/ChangeLog.meissner
new file mode 100644
index ..a05760a5b9f9
--- /dev/null
+++ b/gcc/cp/ChangeLog.meissner
@@ -0,0 +1,6 @@
+ Branch work178, baseline 
+
+2024-09-12   Michael Meissner  
+
+   Clone branch
+
diff --git a/gcc/fortran/ChangeLog.meissner b/gcc/fortran/ChangeLog.meissner
new file mode 100644
index ..a05760a5b9f9
--- /dev/null
+++ b/gcc/fortran/ChangeLog.meissner
@@ -0,0 +1,6 @@
+ Branch work178, baseline 
+
+2024-09-12   Michael Meissner  
+
+   Clone branch
+
diff --git a/gcc/testsuite/ChangeLog.meissner b/gcc/testsuite/ChangeLog.meissner
new file mode 100644
index ..a05760a5b9f9
--- /dev/null
+++ b/gcc/testsuite/ChangeLog.meissner
@@ -0,0 +1,6 @@
+ Branch work178, baseline 
+
+2024-09-12   Michael Meissner  
+
+   Clone branch
+
diff --git a/libgcc/ChangeLog.meissner b/libgcc/ChangeLog.meissner
new file mode 100644
index ..a05760a5b9f9
--- /dev/null
+++ b/libgcc/ChangeLog.meissner
@@ -0,0 +1,6 @@
+ Branch work178, baseline 
+
+2024-09-12   Michael Meissner  
+
+   Clone branch
+
diff --git a/libstdc++-v3/ChangeLog.meissner b/libstdc++-v3/ChangeLog.meissner
new file mode 100644
index ..a05760a5b9f9
--- /dev/null
+++ b/libstdc++-v3/ChangeLog.meissner
@@ -0,0 +1,6 @@
+ Branch work178, baseline 
+
+2024-09-12   Michael Meissner  
+
+   Clone branch
+


[gcc] Created branch 'meissner/heads/work178-dmf' in namespace 'refs/users'

2024-09-12 Thread Michael Meissner via Gcc-cvs
The branch 'meissner/heads/work178-dmf' was created in namespace 'refs/users' 
pointing to:

 56d2af436807... Add ChangeLog.meissner and REVISION.


[gcc(refs/users/meissner/heads/work178-dmf)] Add ChangeLog.dmf and update REVISION.

2024-09-12 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:0ad31e9889b0c18d363f724d91c541ccae2337c1

commit 0ad31e9889b0c18d363f724d91c541ccae2337c1
Author: Michael Meissner 
Date:   Thu Sep 12 15:26:50 2024 -0400

Add ChangeLog.dmf and update REVISION.

2024-09-12  Michael Meissner  

gcc/

* ChangeLog.dmf: New file for branch.
* REVISION: Update.

Diff:
---
 gcc/ChangeLog.dmf | 6 ++
 gcc/REVISION  | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog.dmf b/gcc/ChangeLog.dmf
new file mode 100644
index ..d94fb1b8de24
--- /dev/null
+++ b/gcc/ChangeLog.dmf
@@ -0,0 +1,6 @@
+ Branch work178-dmf, baseline 
+
+2024-09-12   Michael Meissner  
+
+   Clone branch
+
diff --git a/gcc/REVISION b/gcc/REVISION
index f0e6035292ab..5bda8286629d 100644
--- a/gcc/REVISION
+++ b/gcc/REVISION
@@ -1 +1 @@
-work178 branch
+work178-dmf branch


[gcc] Created branch 'meissner/heads/work178-vpair' in namespace 'refs/users'

2024-09-12 Thread Michael Meissner via Gcc-cvs
The branch 'meissner/heads/work178-vpair' was created in namespace 'refs/users' 
pointing to:

 56d2af436807... Add ChangeLog.meissner and REVISION.


[gcc(refs/users/meissner/heads/work178-vpair)] Add ChangeLog.vpair and update REVISION.

2024-09-12 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:f7abfc56ea5df212c9746ce03a693d5ef5526829

commit f7abfc56ea5df212c9746ce03a693d5ef5526829
Author: Michael Meissner 
Date:   Thu Sep 12 15:27:48 2024 -0400

Add ChangeLog.vpair and update REVISION.

2024-09-12  Michael Meissner  

gcc/

* ChangeLog.vpair: New file for branch.
* REVISION: Update.

Diff:
---
 gcc/ChangeLog.vpair | 6 ++
 gcc/REVISION| 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog.vpair b/gcc/ChangeLog.vpair
new file mode 100644
index ..4350f0a50f5f
--- /dev/null
+++ b/gcc/ChangeLog.vpair
@@ -0,0 +1,6 @@
+ Branch work178-vpair, baseline 
+
+2024-09-12   Michael Meissner  
+
+   Clone branch
+
diff --git a/gcc/REVISION b/gcc/REVISION
index f0e6035292ab..55bdd433a1dd 100644
--- a/gcc/REVISION
+++ b/gcc/REVISION
@@ -1 +1 @@
-work178 branch
+work178-vpair branch


[gcc] Created branch 'meissner/heads/work178-tar' in namespace 'refs/users'

2024-09-12 Thread Michael Meissner via Gcc-cvs
The branch 'meissner/heads/work178-tar' was created in namespace 'refs/users' 
pointing to:

 56d2af436807... Add ChangeLog.meissner and REVISION.


[gcc(refs/users/meissner/heads/work178-tar)] Add ChangeLog.tar and update REVISION.

2024-09-12 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:a5d043977b95b1752bf17ea84cc091cd1ddc1636

commit a5d043977b95b1752bf17ea84cc091cd1ddc1636
Author: Michael Meissner 
Date:   Thu Sep 12 15:28:38 2024 -0400

Add ChangeLog.tar and update REVISION.

2024-09-12  Michael Meissner  

gcc/

* ChangeLog.tar: New file for branch.
* REVISION: Update.

Diff:
---
 gcc/ChangeLog.tar | 6 ++
 gcc/REVISION  | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog.tar b/gcc/ChangeLog.tar
new file mode 100644
index ..0e7ea4d69602
--- /dev/null
+++ b/gcc/ChangeLog.tar
@@ -0,0 +1,6 @@
+ Branch work178-tar, baseline 
+
+2024-09-12   Michael Meissner  
+
+   Clone branch
+
diff --git a/gcc/REVISION b/gcc/REVISION
index f0e6035292ab..249a722e02a7 100644
--- a/gcc/REVISION
+++ b/gcc/REVISION
@@ -1 +1 @@
-work178 branch
+work178-tar branch


[gcc] Created branch 'meissner/heads/work178-bugs' in namespace 'refs/users'

2024-09-12 Thread Michael Meissner via Gcc-cvs
The branch 'meissner/heads/work178-bugs' was created in namespace 'refs/users' 
pointing to:

 56d2af436807... Add ChangeLog.meissner and REVISION.


[gcc(refs/users/meissner/heads/work178-bugs)] Add ChangeLog.bugs and update REVISION.

2024-09-12 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:4ed9d16a0b781671ce4e5aa8f530cd504c626e34

commit 4ed9d16a0b781671ce4e5aa8f530cd504c626e34
Author: Michael Meissner 
Date:   Thu Sep 12 15:29:44 2024 -0400

Add ChangeLog.bugs and update REVISION.

2024-09-12  Michael Meissner  

gcc/

* ChangeLog.bugs: New file for branch.
* REVISION: Update.

Diff:
---
 gcc/ChangeLog.bugs | 6 ++
 gcc/REVISION   | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog.bugs b/gcc/ChangeLog.bugs
new file mode 100644
index ..8413d3fa1ca2
--- /dev/null
+++ b/gcc/ChangeLog.bugs
@@ -0,0 +1,6 @@
+ Branch work178-bugs, baseline 
+
+2024-09-12   Michael Meissner  
+
+   Clone branch
+
diff --git a/gcc/REVISION b/gcc/REVISION
index f0e6035292ab..eae270db9f81 100644
--- a/gcc/REVISION
+++ b/gcc/REVISION
@@ -1 +1 @@
-work178 branch
+work178-bugs branch


[gcc] Created branch 'meissner/heads/work178-libs' in namespace 'refs/users'

2024-09-12 Thread Michael Meissner via Gcc-cvs
The branch 'meissner/heads/work178-libs' was created in namespace 'refs/users' 
pointing to:

 56d2af436807... Add ChangeLog.meissner and REVISION.


[gcc(refs/users/meissner/heads/work178-libs)] Add ChangeLog.libs and update REVISION.

2024-09-12 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:5a3b5823db6d2e7a942507b8b0f2b3786d17a438

commit 5a3b5823db6d2e7a942507b8b0f2b3786d17a438
Author: Michael Meissner 
Date:   Thu Sep 12 15:30:36 2024 -0400

Add ChangeLog.libs and update REVISION.

2024-09-12  Michael Meissner  

gcc/

* ChangeLog.libs: New file for branch.
* REVISION: Update.

Diff:
---
 gcc/ChangeLog.libs | 6 ++
 gcc/REVISION   | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog.libs b/gcc/ChangeLog.libs
new file mode 100644
index ..cf5c7cc035f2
--- /dev/null
+++ b/gcc/ChangeLog.libs
@@ -0,0 +1,6 @@
+ Branch work178-libs, baseline 
+
+2024-09-12   Michael Meissner  
+
+   Clone branch
+
diff --git a/gcc/REVISION b/gcc/REVISION
index f0e6035292ab..bb0330a068b1 100644
--- a/gcc/REVISION
+++ b/gcc/REVISION
@@ -1 +1 @@
-work178 branch
+work178-libs branch


[gcc] Created branch 'meissner/heads/work178-test' in namespace 'refs/users'

2024-09-12 Thread Michael Meissner via Gcc-cvs
The branch 'meissner/heads/work178-test' was created in namespace 'refs/users' 
pointing to:

 56d2af436807... Add ChangeLog.meissner and REVISION.


[gcc(refs/users/meissner/heads/work178-test)] Add ChangeLog.test and update REVISION.

2024-09-12 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:8124f1e6aff8f42b5497c8463bed244f2d9729e3

commit 8124f1e6aff8f42b5497c8463bed244f2d9729e3
Author: Michael Meissner 
Date:   Thu Sep 12 15:31:34 2024 -0400

Add ChangeLog.test and update REVISION.

2024-09-12  Michael Meissner  

gcc/

* ChangeLog.test: New file for branch.
* REVISION: Update.

Diff:
---
 gcc/ChangeLog.test | 6 ++
 gcc/REVISION   | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog.test b/gcc/ChangeLog.test
new file mode 100644
index ..e4a1af6026ec
--- /dev/null
+++ b/gcc/ChangeLog.test
@@ -0,0 +1,6 @@
+ Branch work178-test, baseline 
+
+2024-09-12   Michael Meissner  
+
+   Clone branch
+
diff --git a/gcc/REVISION b/gcc/REVISION
index f0e6035292ab..c238c7a0e337 100644
--- a/gcc/REVISION
+++ b/gcc/REVISION
@@ -1 +1 @@
-work178 branch
+work178-test branch


[gcc] Created branch 'meissner/heads/work178-orig' in namespace 'refs/users'

2024-09-12 Thread Michael Meissner via Gcc-cvs
The branch 'meissner/heads/work178-orig' was created in namespace 'refs/users' 
pointing to:

 19d751601d01... i386: Implement SAT_ADD for signed vector integers


[gcc(refs/users/meissner/heads/work178-orig)] Add REVISION.

2024-09-12 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:bc34cca3e304c769bc1544ad9bc88213558849e1

commit bc34cca3e304c769bc1544ad9bc88213558849e1
Author: Michael Meissner 
Date:   Thu Sep 12 15:32:31 2024 -0400

Add REVISION.

2024-09-12  Michael Meissner  

gcc/

* REVISION: New file for branch.

Diff:
---
 gcc/REVISION | 1 +
 1 file changed, 1 insertion(+)

diff --git a/gcc/REVISION b/gcc/REVISION
new file mode 100644
index ..2bd14dbdf2e4
--- /dev/null
+++ b/gcc/REVISION
@@ -0,0 +1 @@
+work178-orig branch


[gcc r15-3613] Implement modules for UNSIGNED.

2024-09-12 Thread Thomas Kテカnig via Gcc-cvs
https://gcc.gnu.org/g:2847a541c1f19b67ae84be8d0f6dc8e1f9371d16

commit r15-3613-g2847a541c1f19b67ae84be8d0f6dc8e1f9371d16
Author: Steven G. Kargl 
Date:   Thu Sep 12 21:33:31 2024 +0200

Implement modules for UNSIGNED.

gcc/fortran/ChangeLog:

* module.cc (bt_types): Add BT_UNSIGNED.

gcc/testsuite/ChangeLog:

* gfortran.dg/unsigned_kiss.f90: New test.

Diff:
---
 gcc/fortran/module.cc   |   1 +
 gcc/testsuite/gfortran.dg/unsigned_kiss.f90 | 100 
 2 files changed, 101 insertions(+)

diff --git a/gcc/fortran/module.cc b/gcc/fortran/module.cc
index c565b84d61b7..8cf58ff51429 100644
--- a/gcc/fortran/module.cc
+++ b/gcc/fortran/module.cc
@@ -2781,6 +2781,7 @@ static const mstring bt_types[] = {
 minit ("UNKNOWN", BT_UNKNOWN),
 minit ("VOID", BT_VOID),
 minit ("ASSUMED", BT_ASSUMED),
+minit ("UNSIGNED", BT_UNSIGNED),
 minit (NULL, -1)
 };
 
diff --git a/gcc/testsuite/gfortran.dg/unsigned_kiss.f90 
b/gcc/testsuite/gfortran.dg/unsigned_kiss.f90
new file mode 100644
index ..46ee86ccd263
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/unsigned_kiss.f90
@@ -0,0 +1,100 @@
+!
+! { dg-do run }
+! { dg-options "-funsigned" }
+!
+! Modern Fortran rewrite of Marsaglia's 64-bit KISS PRNG.
+! https://www.thecodingforums.com/threads/64-bit-kiss-rngs.673657/
+!
+module kissm
+
+   implicit none
+   private
+   public uk, kseed, kiss
+
+   integer, parameter :: uk = kind(1u_8)  ! Check kind() works.
+
+   ! Default seeds.  Checks unsigned with parameter attribute.
+   unsigned(uk), parameter :: seed(4) = [ &
+   &  1234567890987654321u_uk, 362436362436362436u_uk, &
+   &  1066149217761810u_uk, 123456123456123456u_uk ]
+
+   ! Seeds used during generation
+   unsigned(uk), save :: sd(4) = seed
+
+   contains
+
+  ! Tests unsigned in an internal function.
+  function s(x)
+ unsigned(uk) s
+ unsigned(uk), intent(in) :: x
+ s = ishft(x, -63)! Tests ishft
+  end function
+
+  ! Poor seeding routine.  Need to check v for entropy!
+  ! Tests intent(in) and optional attributes.
+  ! Tests ishftc() and array constructors.
+  subroutine kseed(v)
+ unsigned(uk), intent(in), optional :: v
+ if (present(v)) then
+sd = seed + [ishftc(v,1), ishftc(v,15), ishftc(v,31), ishftc(v,44)]
+ else
+sd = seed
+ end if
+  end subroutine kseed
+
+  function kiss()
+ unsigned(uk) kiss
+ unsigned(uk) m, t
+ integer k
+
+ ! Test unsigned in a statement function
+ m(t, k) = ieor(t, ishft(t, k))
+
+ t = ishft(sd(1), 58) + sd(4)
+ if (s(sd(1)) == s(t)) then
+sd(4) = ishft(sd(1), -6) + s(sd(1))
+ else
+sd(4) = ishft(sd(1), -6) + 1u_uk - s(sd(1) + t)
+ endif
+
+ sd(1) = t + sd(1)
+ sd(2) = m(m(m(sd(2), 13), -17), 43)
+ sd(3) = 6906969069u_uk * sd(3) + 1234567u_uk
+ kiss = sd(1) + sd(2) + sd(3)
+  end function kiss
+  
+end module kissm
+
+program testkiss
+   use kissm
+   integer, parameter :: n = 4
+   unsigned(uk) prn(4)
+
+   ! Default sequence
+   unsigned(uk), parameter :: a(4) = [8932985056925012148u_uk, &
+   &  5710300428094272059u_uk, 18342510866933518593u_uk,   &
+   &  14303636270573868250u_uk]
+   
+   ! Sequence with the seed 123412341234u_uk
+   unsigned(uk), parameter :: b(4) = [4002508872477953753u_uk, &
+   &  18025327658415290923u_uk,  16058856976144281263u_uk, &
+   &  11842224026193909403u_uk]
+
+   do i = 1, n
+  prn(i) = kiss()
+   end do
+   if (any(prn /= a)) stop 1
+
+   call kseed(123412341234u_uk)
+   do i = 1, n
+  prn(i) = kiss()
+   end do
+   if (any(prn /= b)) stop 2
+
+   call kseed()
+   do i = 1, n
+  prn(i) = kiss()
+   end do
+   if (any(prn /= a)) stop 3
+
+end program testkiss


[gcc(refs/users/meissner/heads/work178)] Add rs6000 architecture masks.

2024-09-12 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:77cfcb47453e1843f639920100d6171036bf01af

commit 77cfcb47453e1843f639920100d6171036bf01af
Author: Michael Meissner 
Date:   Thu Sep 12 15:41:03 2024 -0400

Add rs6000 architecture masks.

This patch begins the journey to move architecture bits that are not user 
ISA
options from rs6000_isa_flags to a new targt variable rs6000_arch_flags.  
The
intention is to remove switches that are currently isa options, but the user
should not be using this particular option. For example, we want users to 
use
-mcpu=power10 and not just -mpower10.

This patch also changes the target_clones support to use an architecture 
mask
instead of isa bits.

This patch also switches the handling of .machine to use architecture masks 
if
they exist (power4 through power11).  All of the other PowerPCs will 
continue to
use the existing code for setting the .machine option.

I have built both big endian and little endian bootstrap compilers and there
were no regressions.

In addition, I constructed a test case that used every archiecture define 
(like
_ARCH_PWR4, etc.) and I also looked at the .machine directive generated.  I 
ran
this test for all supported combinations of -mcpu, big/little endian, and 
32/64
bit support.  Every single instance generated exactly the same code with the
patches installed compared to the compiler before installing the patches.

Can I install this patch on the GCC 15 trunk?

2024-09-12  Michael Meissner  

gcc/

* config/rs6000/rs6000-arch.def: New file.
* config/rs6000/rs6000.cc (struct clone_map): Switch to using
architecture masks instead of ISA masks.
(rs6000_clone_map): Likewise.
(rs6000_print_isa_options): Add an architecture flags argument, 
change
all callers.
(get_arch_flag): New function.
(rs6000_debug_reg_global): Update rs6000_print_isa_options calls.
(rs6000_option_override_internal): Likewise.
(rs6000_machine_from_flags): Switch to using architecture masks 
instead
of ISA masks.
(struct rs6000_arch_mask): New structure.
(rs6000_arch_masks): New table of architecutre masks and names.
(rs6000_function_specific_save): Save architecture flags.
(rs6000_function_specific_restore): Restore architecture flags.
(rs6000_function_specific_print): Update rs6000_print_isa_options 
calls.
(rs6000_print_options_internal): Add architecture flags options.
(rs6000_clone_priority): Switch to using architecture masks instead 
of
ISA masks.
(rs6000_can_inline_p): Don't allow inling if the callee requires a 
newer
architecture than the caller.
* config/rs6000/rs6000.h: Use rs6000-arch.def to create the 
architecture
masks.
* config/rs6000/rs6000.opt (rs6000_arch_flags): New target variable.
(x_rs6000_arch_flags): New save/restore field for rs6000_arch_flags.

Diff:
---
 gcc/config/rs6000/rs6000-arch.def |  48 +
 gcc/config/rs6000/rs6000.cc   | 215 +++---
 gcc/config/rs6000/rs6000.h|  24 +
 gcc/config/rs6000/rs6000.opt  |   8 ++
 4 files changed, 259 insertions(+), 36 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-arch.def 
b/gcc/config/rs6000/rs6000-arch.def
new file mode 100644
index ..e5b6e9581331
--- /dev/null
+++ b/gcc/config/rs6000/rs6000-arch.def
@@ -0,0 +1,48 @@
+/* IBM RS/6000 CPU architecture features by processor type.
+   Copyright (C) 1991-2024 Free Software Foundation, Inc.
+   Contributed by Richard Kenner (ken...@vlsi1.ultra.nyu.edu)
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   .  */
+
+/* This file defines architecture features that are based on the -mcpu=
+   option, and not on user options that can be turned on or off.  The intention
+   is for newer processors (power7 and above) to not add new ISA bits for the
+   particular processor, but add these bits.  Otherwise we have to add a bunch
+   of hidden options, just so we have the proper ISA bits.
+
+   For example, in the past we added -mpower8-internal, so that on power8,
+   power9, and power10 would inherit the optio

[gcc(refs/users/meissner/heads/work178)] Use architecture flags for defining _ARCH_PWR macros.

2024-09-12 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:fbd5596704c2d9a183464c37c46a7f551de0531e

commit fbd5596704c2d9a183464c37c46a7f551de0531e
Author: Michael Meissner 
Date:   Thu Sep 12 15:42:33 2024 -0400

Use architecture flags for defining _ARCH_PWR macros.

For the newer architectures, this patch changes GCC to define the 
_ARCH_PWR
macros using the new architecture flags instead of relying on isa options 
like
-mpower10.

The -mpower8-internal, -mpower10, and -mpower11 options were removed.  The
-mpower11 option was removed completely, since it was just added in GCC 15. 
 The
other two options were marked as WarnRemoved, and the various ISA bits were
removed.

TARGET_POWER8 and TARGET_POWER10 were re-defined to use the architeture bits
instead of the ISA bits.

There are other internal isa bits that aren't removed with this patch 
because
the built-in function support uses those bits.

I have built both big endian and little endian bootstrap compilers and there
were no regressions.

In addition, I constructed a test case that used every archiecture define 
(like
_ARCH_PWR4, etc.) and I also looked at the .machine directive generated.  I 
ran
this test for all supported combinations of -mcpu, big/little endian, and 
32/64
bit support.  Every single instance generated exactly the same code with the
patches installed compared to the compiler before installing the patches.

Can I install this patch on the GCC 15 trunk?

2024-09-12  Michael Meissner  

gcc/

* config/rs6000/rs6000-c.cc (rs6000_target_modify_macros) Add 
support to
use architecture flags instead of ISA flags for setting most of the
_ARCH_PWR* macros.
(rs6000_cpu_cpp_builtins): Update rs6000_target_modify_macros call.
* config/rs6000/rs6000-cpus.def (ISA_2_7_MASKS_SERVER): Remove
OPTION_MASK_POWER8.
(ISA_3_1_MASKS_SERVER): Remove OPTION_MASK_POWER10.
(POWER11_MASKS_SERVER): Remove OPTION_MASK_POWER11.
(POWERPC_MASKS): Remove OPTION_MASK_POWER8, OPTION_MASK_POWER10, and
OPTION_MASK_POWER11.
* config/rs6000/rs6000-protos.h (rs6000_target_modify_macros): 
Update
declaration.
(rs6000_target_modify_macros_ptr): Likewise.
* config/rs6000/rs6000.cc (rs6000_target_modify_macros_ptr): 
Likewise.
(rs6000_option_override_internal): Use architecture flags instead 
of ISA
flags.
(rs6000_opt_masks): Remove -mpower10 and -mpower11, which are no 
longer
in the ISA flags.
(rs6000_pragma_target_parse): Use architecture flags as well as ISA
flags.
* config/rs6000/rs6000.h (TARGET_POWER4): New macro.
(TARGET_POWER5): Likewise.
(TARGET_POWER5X): Likewise.
(TARGET_POWER6): Likewise.
(TARGET_POWER7): Likewise.
(TARGET_POWER8): Likewise.
(TARGET_POWER9): Likewise.
(TARGET_POWER10): Likewise.
(TARGET_POWER11): Likewise.
* config/rs6000/rs6000.opt (-mpower8-internal): Remove ISA flag 
bits.
(-mpower10): Likewise.
(-mpower11): Likewise.

Diff:
---
 gcc/config/rs6000/rs6000-c.cc | 27 +++
 gcc/config/rs6000/rs6000-cpus.def |  8 +---
 gcc/config/rs6000/rs6000-protos.h |  5 +++--
 gcc/config/rs6000/rs6000.cc   | 19 +++
 gcc/config/rs6000/rs6000.h| 20 
 gcc/config/rs6000/rs6000.opt  | 11 ++-
 6 files changed, 52 insertions(+), 38 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-c.cc b/gcc/config/rs6000/rs6000-c.cc
index 04882c396bfe..c8f33289fa38 100644
--- a/gcc/config/rs6000/rs6000-c.cc
+++ b/gcc/config/rs6000/rs6000-c.cc
@@ -338,7 +338,8 @@ rs6000_define_or_undefine_macro (bool define_p, const char 
*name)
#pragma GCC target, we need to adjust the macros dynamically.  */
 
 void
-rs6000_target_modify_macros (bool define_p, HOST_WIDE_INT flags)
+rs6000_target_modify_macros (bool define_p, HOST_WIDE_INT flags,
+HOST_WIDE_INT arch_flags)
 {
   if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
 fprintf (stderr,
@@ -411,7 +412,7 @@ rs6000_target_modify_macros (bool define_p, HOST_WIDE_INT 
flags)
summary of the flags associated with particular cpu
definitions.  */
 
-  /* rs6000_isa_flags based options.  */
+  /* rs6000_isa_flags and rs6000_arch_flags based options.  */
   rs6000_define_or_undefine_macro (define_p, "_ARCH_PPC");
   if ((flags & OPTION_MASK_PPC_GPOPT) != 0)
 rs6000_define_or_undefine_macro (define_p, "_ARCH_PPCSQ");
@@ -419,23 +420,25 @@ rs6000_target_modify_macros (bool define_p, HOST_WIDE_INT 
flags)
 rs6000_define_or_undefine_macro (define_p, "_ARCH_PPCGR");
   if ((flags & OPTION_MASK_POWERPC64) != 0)
 rs6000_define_or_undefin

[gcc(refs/users/meissner/heads/work178)] Do not allow -mvsx to boost processor to power7.

2024-09-12 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:59c893c3b58e6f7e8e55263681276a7f8f3024eb

commit 59c893c3b58e6f7e8e55263681276a7f8f3024eb
Author: Michael Meissner 
Date:   Thu Sep 12 15:43:32 2024 -0400

Do not allow -mvsx to boost processor to power7.

This patch restructures the code so that -mvsx for example will not silently
convert the processor to power7.  The user must now use -mcpu=power7 or 
higher.
This means if the user does -mvsx and the default processor does not have 
VSX
support, it will be an error.

I have built both big endian and little endian bootstrap compilers and there
were no regressions.

In addition, I constructed a test case that used every archiecture define 
(like
_ARCH_PWR4, etc.) and I also looked at the .machine directive generated.  I 
ran
this test for all supported combinations of -mcpu, big/little endian, and 
32/64
bit support.  Every single instance generated exactly the same code with the
patches installed compared to the compiler before installing the patches.

Can I install this patch on the GCC 15 trunk?

2024-09-12  Michael Meissner  

gcc/

* config/rs6000/rs6000.cc (report_architecture_mismatch): New 
function.
Report an error if the user used an option such as -mvsx when the
default processor would not allow the option.
(rs6000_option_override_internal): Move some ISA checking code into
report_architecture_mismatch.

Diff:
---
 gcc/config/rs6000/rs6000.cc | 129 +++-
 1 file changed, 79 insertions(+), 50 deletions(-)

diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 8388542b7210..a944ffde28a6 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -1173,6 +1173,7 @@ const int INSN_NOT_AVAILABLE = -1;
 static void rs6000_print_isa_options (FILE *, int, const char *,
  HOST_WIDE_INT, HOST_WIDE_INT);
 static HOST_WIDE_INT rs6000_disable_incompatible_switches (void);
+static void report_architecture_mismatch (void);
 
 static enum rs6000_reg_type register_to_reg_type (rtx, bool *);
 static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
@@ -3695,7 +3696,6 @@ rs6000_option_override_internal (bool global_init_p)
   bool ret = true;
 
   HOST_WIDE_INT set_masks;
-  HOST_WIDE_INT ignore_masks;
   int cpu_index = -1;
   int tune_index;
   struct cl_target_option *main_target_opt
@@ -3964,59 +3964,13 @@ rs6000_option_override_internal (bool global_init_p)
 dwarf_offset_size = POINTER_SIZE_UNITS;
 #endif
 
-  /* Handle explicit -mno-{altivec,vsx} and turn off all of
- the options that depend on those flags.  */
-  ignore_masks = rs6000_disable_incompatible_switches ();
-
-  /* For the newer switches (vsx, dfp, etc.) set some of the older options,
- unless the user explicitly used the -mno- to disable the code.  */
-  if (TARGET_P9_VECTOR || TARGET_MODULO || TARGET_P9_MISC)
-rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
-  else if (TARGET_P9_MINMAX)
-{
-  if (cpu_index >= 0)
-   {
- if (cpu_index == PROCESSOR_POWER9)
-   {
- /* legacy behavior: allow -mcpu=power9 with certain
-capabilities explicitly disabled.  */
- rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
-   }
- else
-   error ("power9 target option is incompatible with %<%s=%> "
-  "for  less than power9", "-mcpu");
-   }
-  else if ((ISA_3_0_MASKS_SERVER & rs6000_isa_flags_explicit)
-  != (ISA_3_0_MASKS_SERVER & rs6000_isa_flags
-  & rs6000_isa_flags_explicit))
-   /* Enforce that none of the ISA_3_0_MASKS_SERVER flags
-  were explicitly cleared.  */
-   error ("%qs incompatible with explicitly disabled options",
-  "-mpower9-minmax");
-  else
-   rs6000_isa_flags |= ISA_3_0_MASKS_SERVER;
-}
-  else if (TARGET_P8_VECTOR || TARGET_POWER8 || TARGET_CRYPTO)
-rs6000_isa_flags |= (ISA_2_7_MASKS_SERVER & ~ignore_masks);
-  else if (TARGET_VSX)
-rs6000_isa_flags |= (ISA_2_6_MASKS_SERVER & ~ignore_masks);
-  else if (TARGET_POPCNTD)
-rs6000_isa_flags |= (ISA_2_6_MASKS_EMBEDDED & ~ignore_masks);
-  else if (TARGET_DFP)
-rs6000_isa_flags |= (ISA_2_5_MASKS_SERVER & ~ignore_masks);
-  else if (TARGET_CMPB)
-rs6000_isa_flags |= (ISA_2_5_MASKS_EMBEDDED & ~ignore_masks);
-  else if (TARGET_FPRND)
-rs6000_isa_flags |= (ISA_2_4_MASKS & ~ignore_masks);
-  else if (TARGET_POPCNTB)
-rs6000_isa_flags |= (ISA_2_2_MASKS & ~ignore_masks);
-  else if (TARGET_ALTIVEC)
-rs6000_isa_flags |= (OPTION_MASK_PPC_GFXOPT & ~ignore_masks);
+  /* Report trying to use things like -mmodulo to imply -mcpu=power9.  */
+  report_architecture_mismatch ();
 
   /* Disable VSX and Altivec silently if the user switched cpus to power7 in a
  target a

[gcc(refs/users/meissner/heads/work178)] Change TARGET_POPCNTB to TARGET_POWER5

2024-09-12 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:71a8da6141287722fa1099426b59e199d25d44cc

commit 71a8da6141287722fa1099426b59e199d25d44cc
Author: Michael Meissner 
Date:   Thu Sep 12 15:45:05 2024 -0400

Change TARGET_POPCNTB to TARGET_POWER5

As part of the architecture flags patches, this patch changes the use of
TARGET_POPCNTB to TARGET_POWER5.  The POPCNTB instruction was added in ISA 
2.02
(power5).

I have built both big endian and little endian bootstrap compilers and there
were no regressions.

In addition, I constructed a test case that used every archiecture define 
(like
_ARCH_PWR4, etc.) and I also looked at the .machine directive generated.  I 
ran
this test for all supported combinations of -mcpu, big/little endian, and 
32/64
bit support.  Every single instance generated exactly the same code with the
patches installed compared to the compiler before installing the patches.

Can I install this patch on the GCC 15 trunk?

2024-09-12  Michael Meissner  

* config/rs6000/rs6000-builtin.cc (rs6000_builtin_is_supported): Use
TARGET_POWER5 instead of TARGET_POPCNTB.
* config/rs6000/rs6000.h (TARGET_EXTRA_BUILTINS): Use TARGET_POWER5
instead of TARGET_POPCNTB.  Eliminate TARGET_CMPB and TARGET_POPCNTD
tests since TARGET_POWER5 will always be true for those tests.
(TARGET_FRE): Use TARGET_POWER5 instead of TARGET_POPCNTB.
(TARGET_FRSQRTES): Likewise.
* config/rs6000/rs6000.md (enabled attribute): Likewise.
(popcount): Use TARGET_POWER5 instead of TARGET_POPCNTB.  Drop
test for TARGET_POPCNTD (i.e power7), since TARGET_POPCNTB will 
always
be set if TARGET_POPCNTD is set.
(popcntb2): Use TARGET_POWER5 instead of TARGET_POPCNTB.
(parity2): Likewise.
(parity2_cmpb): Remove TARGET_POPCNTB test, since it will 
always
be true when TARGET_CMPB (i.e. power6) is set.

Diff:
---
 gcc/config/rs6000/rs6000-builtin.cc |  2 +-
 gcc/config/rs6000/rs6000.h  |  8 +++-
 gcc/config/rs6000/rs6000.md | 10 +-
 3 files changed, 9 insertions(+), 11 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-builtin.cc 
b/gcc/config/rs6000/rs6000-builtin.cc
index 9bdbae1ecf94..98a0545030cd 100644
--- a/gcc/config/rs6000/rs6000-builtin.cc
+++ b/gcc/config/rs6000/rs6000-builtin.cc
@@ -155,7 +155,7 @@ rs6000_builtin_is_supported (enum rs6000_gen_builtins 
fncode)
 case ENB_ALWAYS:
   return true;
 case ENB_P5:
-  return TARGET_POPCNTB;
+  return TARGET_POWER5;
 case ENB_P6:
   return TARGET_CMPB;
 case ENB_P6_64:
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index 7ad8baca177a..4500724d895c 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -547,9 +547,7 @@ extern int rs6000_vector_align[];
 
 #define TARGET_EXTRA_BUILTINS  (TARGET_POWERPC64\
 || TARGET_PPC_GPOPT /* 970/power4 */\
-|| TARGET_POPCNTB   /* ISA 2.02 */  \
-|| TARGET_CMPB  /* ISA 2.05 */  \
-|| TARGET_POPCNTD   /* ISA 2.06 */  \
+|| TARGET_POWER5/* ISA 2.02 & above */ \
 || TARGET_ALTIVEC   \
 || TARGET_VSX   \
 || TARGET_HARD_FLOAT)
@@ -563,9 +561,9 @@ extern int rs6000_vector_align[];
 #define TARGET_FRES(TARGET_HARD_FLOAT && TARGET_PPC_GFXOPT)
 
 #define TARGET_FRE (TARGET_HARD_FLOAT \
-&& (TARGET_POPCNTB || VECTOR_UNIT_VSX_P (DFmode)))
+&& (TARGET_POWER5 || VECTOR_UNIT_VSX_P (DFmode)))
 
-#define TARGET_FRSQRTES(TARGET_HARD_FLOAT && TARGET_POPCNTB \
+#define TARGET_FRSQRTES(TARGET_HARD_FLOAT && TARGET_POWER5 \
 && TARGET_PPC_GFXOPT)
 
 #define TARGET_FRSQRTE (TARGET_HARD_FLOAT \
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 8eda2f7bb0d7..10d13bf812d2 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -379,7 +379,7 @@
  (const_int 1)
 
  (and (eq_attr "isa" "p5")
- (match_test "TARGET_POPCNTB"))
+ (match_test "TARGET_POWER5"))
  (const_int 1)
 
  (and (eq_attr "isa" "p6")
@@ -2510,7 +2510,7 @@
 (define_expand "popcount2"
   [(set (match_operand:GPR 0 "gpc_reg_operand")
(popcount:GPR (match_operand:GPR 1 "gpc_reg_operand")))]
-  "TARGET_POPCNTB || TARGET_POPCNTD"
+  "TARGET_POWER5"
 {
   rs6000_emit_popcount (operands[0], operands[1]);
   DONE;
@@ -2520,7 +2520,7 @@
   [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
(unspec:GPR [(match_operand:GPR 1 "gpc_reg_operand" "r")]
UNS

[gcc(refs/users/meissner/heads/work178)] Change TARGET_FPRND to TARGET_POWER5X

2024-09-12 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:c290843ac5164175d4813e1d6edb5216930d1348

commit c290843ac5164175d4813e1d6edb5216930d1348
Author: Michael Meissner 
Date:   Thu Sep 12 15:47:07 2024 -0400

Change TARGET_FPRND to TARGET_POWER5X

As part of the architecture flags patches, this patch changes the use of
TARGET_FPRND to TARGET_POWER5X.  The FPRND instruction was added in power5+.

I have built both big endian and little endian bootstrap compilers and there
were no regressions.

In addition, I constructed a test case that used every archiecture define 
(like
_ARCH_PWR4, etc.) and I also looked at the .machine directive generated.  I 
ran
this test for all supported combinations of -mcpu, big/little endian, and 
32/64
bit support.  Every single instance generated exactly the same code with the
patches installed compared to the compiler before installing the patches.

Can I install this patch on the GCC 15 trunk?

2024-09-12  Michael Meissner  

* config/rs6000/rs6000.cc (report_architecture_mismatch): Use
TARGET_POWER5X instead of TARGET_FPRND.
* config/rs6000/rs6000.md (fmod3): Use TARGET_POWER5X instead 
of
TARGET_FPRND.
(remainder3): Likewise.
(fctiwuz_): Likewise.
(btrunc2): Likewise.
(ceil2): Likewise.
(floor2): Likewise.
(round): Likewise.

Diff:
---
 gcc/config/rs6000/rs6000.cc |  2 +-
 gcc/config/rs6000/rs6000.md | 14 +++---
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index a944ffde28a6..dd51d75c4957 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -25428,7 +25428,7 @@ report_architecture_mismatch (void)
 rs6000_isa_flags |= (ISA_2_5_MASKS_SERVER & ~ignore_masks);
   else if (TARGET_CMPB)
 rs6000_isa_flags |= (ISA_2_5_MASKS_EMBEDDED & ~ignore_masks);
-  else if (TARGET_FPRND)
+  else if (TARGET_POWER5X)
 rs6000_isa_flags |= (ISA_2_4_MASKS & ~ignore_masks);
   else if (TARGET_POPCNTB)
 rs6000_isa_flags |= (ISA_2_2_MASKS & ~ignore_masks);
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 10d13bf812d2..7f9fe609a031 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -5171,7 +5171,7 @@
(use (match_operand:SFDF 1 "gpc_reg_operand"))
(use (match_operand:SFDF 2 "gpc_reg_operand"))]
   "TARGET_HARD_FLOAT
-   && TARGET_FPRND
+   && TARGET_POWER5X
&& flag_unsafe_math_optimizations"
 {
   rtx div = gen_reg_rtx (mode);
@@ -5189,7 +5189,7 @@
(use (match_operand:SFDF 1 "gpc_reg_operand"))
(use (match_operand:SFDF 2 "gpc_reg_operand"))]
   "TARGET_HARD_FLOAT
-   && TARGET_FPRND
+   && TARGET_POWER5X
&& flag_unsafe_math_optimizations"
 {
   rtx div = gen_reg_rtx (mode);
@@ -6687,7 +6687,7 @@
 (define_insn "*friz"
   [(set (match_operand:DF 0 "gpc_reg_operand" "=d,wa")
(float:DF (fix:DI (match_operand:DF 1 "gpc_reg_operand" "d,wa"]
-  "TARGET_HARD_FLOAT && TARGET_FPRND
+  "TARGET_HARD_FLOAT && TARGET_POWER5X
&& flag_unsafe_math_optimizations && !flag_trapping_math && TARGET_FRIZ"
   "@
friz %0,%1
@@ -6815,7 +6815,7 @@
   [(set (match_operand:SFDF 0 "gpc_reg_operand" "=d,wa")
(unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "d,wa")]
 UNSPEC_FRIZ))]
-  "TARGET_HARD_FLOAT && TARGET_FPRND"
+  "TARGET_HARD_FLOAT && TARGET_POWER5X"
   "@
friz %0,%1
xsrdpiz %x0,%x1"
@@ -6825,7 +6825,7 @@
   [(set (match_operand:SFDF 0 "gpc_reg_operand" "=d,wa")
(unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "d,wa")]
 UNSPEC_FRIP))]
-  "TARGET_HARD_FLOAT && TARGET_FPRND"
+  "TARGET_HARD_FLOAT && TARGET_POWER5X"
   "@
frip %0,%1
xsrdpip %x0,%x1"
@@ -6835,7 +6835,7 @@
   [(set (match_operand:SFDF 0 "gpc_reg_operand" "=d,wa")
(unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "d,wa")]
 UNSPEC_FRIM))]
-  "TARGET_HARD_FLOAT && TARGET_FPRND"
+  "TARGET_HARD_FLOAT && TARGET_POWER5X"
   "@
frim %0,%1
xsrdpim %x0,%x1"
@@ -6846,7 +6846,7 @@
   [(set (match_operand:SFDF 0 "gpc_reg_operand" "=")
(unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "")]
 UNSPEC_FRIN))]
-  "TARGET_HARD_FLOAT && TARGET_FPRND"
+  "TARGET_HARD_FLOAT && TARGET_POWER5X"
   "frin %0,%1"
   [(set_attr "type" "fp")])


[gcc(refs/users/meissner/heads/work178)] Change TARGET_POPCNTD to TARGET_POWER7

2024-09-12 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:30a040f6304e4c1b43ca30a954b52ab195d79aa6

commit 30a040f6304e4c1b43ca30a954b52ab195d79aa6
Author: Michael Meissner 
Date:   Thu Sep 12 15:49:19 2024 -0400

Change TARGET_POPCNTD to TARGET_POWER7

As part of the architecture flags patches, this patch changes the use of
TARGET_POPCNTD to TARGET_POWER7.  The POPCNTD instruction was added in 
power7
(ISA 2.06).

I have built both big endian and little endian bootstrap compilers and there
were no regressions.

In addition, I constructed a test case that used every archiecture define 
(like
_ARCH_PWR4, etc.) and I also looked at the .machine directive generated.  I 
ran
this test for all supported combinations of -mcpu, big/little endian, and 
32/64
bit support.  Every single instance generated exactly the same code with the
patches installed compared to the compiler before installing the patches.

Can I install this patch on the GCC 15 trunk?

2024-09-12  Michael Meissner  

* config/rs6000/dfp.md (floatdidd2): Change TARGET_POPCNTD to
TARGET_POWER7.
* config/rs6000/rs6000-builtin.cc (rs6000_builtin_is_supported):
Likewise.
* config/rs6000/rs6000-string.cc (expand_block_compare_gpr): 
Likewise.
* config/rs6000/rs6000.cc (rs6000_hard_regno_mode_ok_uncached):
Likewise.
(rs6000_rtx_costs): Likewise.
(rs6000_emit_popcount): Likewise.
* config/rs6000/rs6000.h (TARGET_LDBRX): Likewise.
(TARGET_LFIWZX): Likewise.
(TARGET_FCFIDS): Likewise.
(TARGET_FCFIDU): Likewise.
(TARGET_FCFIDUS): Likewise.
(TARGET_FCTIDUZ): Likewise.
(TARGET_FCTIWUZ): Likewise.
(CTZ_DEFINED_VALUE_AT_ZERO): Likewise.
* config/rs6000/rs6000.md (enabled attribute): Likewise.
(ctz2): Likewise.
(popcntd2): Likewise.
(lrintsi2): Likewise.
(lrintsi): Likewise.
(lrintsi_di): Likewise.
(cmpmemsi): Likewise.
(bpermd_"): Likewise.
(addg6s): Likewise.
(cdtbcd): Likewise.
(cbcdtd): Likewise.
(div_): Likewise.

Diff:
---
 gcc/config/rs6000/dfp.md|  2 +-
 gcc/config/rs6000/rs6000-builtin.cc |  4 ++--
 gcc/config/rs6000/rs6000-string.cc  |  4 ++--
 gcc/config/rs6000/rs6000.cc |  6 +++---
 gcc/config/rs6000/rs6000.h  | 16 
 gcc/config/rs6000/rs6000.md | 24 
 6 files changed, 28 insertions(+), 28 deletions(-)

diff --git a/gcc/config/rs6000/dfp.md b/gcc/config/rs6000/dfp.md
index fa9d7dd45dd3..b8189390d410 100644
--- a/gcc/config/rs6000/dfp.md
+++ b/gcc/config/rs6000/dfp.md
@@ -214,7 +214,7 @@
 (define_insn "floatdidd2"
   [(set (match_operand:DD 0 "gpc_reg_operand" "=d")
(float:DD (match_operand:DI 1 "gpc_reg_operand" "d")))]
-  "TARGET_DFP && TARGET_POPCNTD"
+  "TARGET_DFP && TARGET_POWER7"
   "dcffix %0,%1"
   [(set_attr "type" "dfp")])
 
diff --git a/gcc/config/rs6000/rs6000-builtin.cc 
b/gcc/config/rs6000/rs6000-builtin.cc
index 76421bd1de0b..dae43b672ea7 100644
--- a/gcc/config/rs6000/rs6000-builtin.cc
+++ b/gcc/config/rs6000/rs6000-builtin.cc
@@ -161,9 +161,9 @@ rs6000_builtin_is_supported (enum rs6000_gen_builtins 
fncode)
 case ENB_P6_64:
   return TARGET_POWER6 && TARGET_POWERPC64;
 case ENB_P7:
-  return TARGET_POPCNTD;
+  return TARGET_POWER7;
 case ENB_P7_64:
-  return TARGET_POPCNTD && TARGET_POWERPC64;
+  return TARGET_POWER7 && TARGET_POWERPC64;
 case ENB_P8:
   return TARGET_POWER8;
 case ENB_P8V:
diff --git a/gcc/config/rs6000/rs6000-string.cc 
b/gcc/config/rs6000/rs6000-string.cc
index 55b4133b1a34..3674c4bd9847 100644
--- a/gcc/config/rs6000/rs6000-string.cc
+++ b/gcc/config/rs6000/rs6000-string.cc
@@ -1948,8 +1948,8 @@ expand_block_compare_gpr(unsigned HOST_WIDE_INT bytes, 
unsigned int base_align,
 bool
 expand_block_compare (rtx operands[])
 {
-  /* TARGET_POPCNTD is already guarded at expand cmpmemsi.  */
-  gcc_assert (TARGET_POPCNTD);
+  /* TARGET_POWER7 is already guarded at expand cmpmemsi.  */
+  gcc_assert (TARGET_POWER7);
 
   /* For P8, this case is complicated to handle because the subtract
  with carry instructions do not generate the 64-bit carry and so
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index dd51d75c4957..7d20e757c7c4 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -1999,7 +1999,7 @@ rs6000_hard_regno_mode_ok_uncached (int regno, 
machine_mode mode)
  if(GET_MODE_SIZE (mode) == UNITS_PER_FP_WORD)
return 1;
 
- if (TARGET_POPCNTD && mode == SImode)
+ if (TARGET_POWER7 && mode == SImode)
return 1;
 
  if (TARGET_P9_VECTOR && (mode == QImode || mode == HImode))
@@ -22473,7 +22473,7 @@ rs6000_rtx

[gcc(refs/users/meissner/heads/work178)] Update tests to work with architecture flags changes.

2024-09-12 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:cf2e2df71bb48fd39c8d621df83a07cea27179de

commit cf2e2df71bb48fd39c8d621df83a07cea27179de
Author: Michael Meissner 
Date:   Thu Sep 12 15:52:08 2024 -0400

Update tests to work with architecture flags changes.

Two tests used -mvsx to raise the processor level to at least power7.  These
tests were rewritten to add cpu=power7 support.

I have built both big endian and little endian bootstrap compilers and there
were no regressions.

In addition, I constructed a test case that used every archiecture define 
(like
_ARCH_PWR4, etc.) and I also looked at the .machine directive generated.  I 
ran
this test for all supported combinations of -mcpu, big/little endian, and 
32/64
bit support.  Every single instance generated exactly the same code with the
patches installed compared to the compiler before installing the patches.

Can I install this patch on the GCC 15 trunk?

2024-09-12  Michael Meissner  

gcc/testsuite/

* gcc.target/powerpc/ppc-target-4.c: Rewrite the test to add 
cpu=power7
when we need to add VSX support.  Add test for adding cpu=power7 
no-vsx
to generate only Altivec instructions.
* gcc.target/powerpc/pr115688.c: Add cpu=power7 when requesting VSX
instructions.

Diff:
---
 gcc/testsuite/gcc.target/powerpc/ppc-target-4.c | 38 +++--
 gcc/testsuite/gcc.target/powerpc/pr115688.c |  3 +-
 2 files changed, 31 insertions(+), 10 deletions(-)

diff --git a/gcc/testsuite/gcc.target/powerpc/ppc-target-4.c 
b/gcc/testsuite/gcc.target/powerpc/ppc-target-4.c
index feef76db4618..5e2ecf34f249 100644
--- a/gcc/testsuite/gcc.target/powerpc/ppc-target-4.c
+++ b/gcc/testsuite/gcc.target/powerpc/ppc-target-4.c
@@ -2,7 +2,7 @@
 /* { dg-skip-if "" { powerpc*-*-darwin* } } */
 /* { dg-require-effective-target powerpc_fprs } */
 /* { dg-options "-O2 -ffast-math -mdejagnu-cpu=power5 -mno-altivec 
-mabi=altivec -fno-unroll-loops" } */
-/* { dg-final { scan-assembler-times "vaddfp" 1 } } */
+/* { dg-final { scan-assembler-times "vaddfp" 2 } } */
 /* { dg-final { scan-assembler-times "xvaddsp" 1 } } */
 /* { dg-final { scan-assembler-times "fadds" 1 } } */
 
@@ -18,10 +18,6 @@
 #error "__VSX__ should not be defined."
 #endif
 
-#pragma GCC target("altivec,vsx")
-#include 
-#pragma GCC reset_options
-
 #pragma GCC push_options
 #pragma GCC target("altivec,no-vsx")
 
@@ -33,6 +29,7 @@
 #error "__VSX__ should not be defined."
 #endif
 
+/* Altivec build, generate vaddfp.  */
 void
 av_add (vector float *a, vector float *b, vector float *c)
 {
@@ -40,10 +37,11 @@ av_add (vector float *a, vector float *b, vector float *c)
   unsigned long n = SIZE / 4;
 
   for (i = 0; i < n; i++)
-a[i] = vec_add (b[i], c[i]);
+a[i] = b[i] + c[i];
 }
 
-#pragma GCC target("vsx")
+/* cpu=power7 must be used to enable VSX.  */
+#pragma GCC target("cpu=power7,vsx")
 
 #ifndef __ALTIVEC__
 #error "__ALTIVEC__ should be defined."
@@ -53,6 +51,7 @@ av_add (vector float *a, vector float *b, vector float *c)
 #error "__VSX__ should be defined."
 #endif
 
+/* VSX build on power7, generate xsaddsp.  */
 void
 vsx_add (vector float *a, vector float *b, vector float *c)
 {
@@ -60,11 +59,31 @@ vsx_add (vector float *a, vector float *b, vector float *c)
   unsigned long n = SIZE / 4;
 
   for (i = 0; i < n; i++)
-a[i] = vec_add (b[i], c[i]);
+a[i] = b[i] + c[i];
+}
+
+#pragma GCC target("cpu=power7,no-vsx")
+
+#ifndef __ALTIVEC__
+#error "__ALTIVEC__ should be defined."
+#endif
+
+#ifdef __VSX__
+#error "__VSX__ should not be defined."
+#endif
+
+/* Altivec build on power7 with no VSX, generate vaddfp.  */
+void
+av2_add (vector float *a, vector float *b, vector float *c)
+{
+  unsigned long i;
+  unsigned long n = SIZE / 4;
+
+  for (i = 0; i < n; i++)
+a[i] = b[i] + c[i];
 }
 
 #pragma GCC pop_options
-#pragma GCC target("no-vsx,no-altivec")
 
 #ifdef __ALTIVEC__
 #error "__ALTIVEC__ should not be defined."
@@ -74,6 +93,7 @@ vsx_add (vector float *a, vector float *b, vector float *c)
 #error "__VSX__ should not be defined."
 #endif
 
+/* Default power5 build, generate scalar fadds.  */
 void
 norm_add (float *a, float *b, float *c)
 {
diff --git a/gcc/testsuite/gcc.target/powerpc/pr115688.c 
b/gcc/testsuite/gcc.target/powerpc/pr115688.c
index 5222e66ef170..00c7c301436a 100644
--- a/gcc/testsuite/gcc.target/powerpc/pr115688.c
+++ b/gcc/testsuite/gcc.target/powerpc/pr115688.c
@@ -7,7 +7,8 @@
 
 /* Verify there is no ICE under 32 bit env.  */
 
-__attribute__((target("vsx")))
+/* cpu=power7 must be used to enable VSX.  */
+__attribute__((target("cpu=power7,vsx")))
 int test (void)
 {
   return 0;


[gcc(refs/users/meissner/heads/work178)] Change TARGET_MODULO to TARGET_POWER9

2024-09-12 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:e629d8c9706c7a37e2a83247df96899515e5e5c5

commit e629d8c9706c7a37e2a83247df96899515e5e5c5
Author: Michael Meissner 
Date:   Thu Sep 12 15:50:46 2024 -0400

Change TARGET_MODULO to TARGET_POWER9

As part of the architecture flags patches, this patch changes the use of
TARGET_MODULO to TARGET_POWER9.  The modulo instructions were added in 
power9 (ISA
3.0).  Note, I did not change the uses of TARGET_MODULO where it was 
explicitly
generating different code if the machine had a modulo instruction.

I have built both big endian and little endian bootstrap compilers and there
were no regressions.

In addition, I constructed a test case that used every archiecture define 
(like
_ARCH_PWR4, etc.) and I also looked at the .machine directive generated.  I 
ran
this test for all supported combinations of -mcpu, big/little endian, and 
32/64
bit support.  Every single instance generated exactly the same code with the
patches installed compared to the compiler before installing the patches.

Can I install this patch on the GCC 15 trunk?

2024-09-12  Michael Meissner  

* config/rs6000/rs6000-builtin.cc (rs6000_builtin_is_supported): Use
TARGET_POWER9 instead of TARGET_MODULO.
* config/rs6000/rs6000.h (TARGET_CTZ): Likewise.
(TARGET_EXTSWSLI): Likewise.
(TARGET_MADDLD): Likewise.
* config/rs6000/rs6000.md (enabled attribute): Likewise.

Diff:
---
 gcc/config/rs6000/rs6000-builtin.cc | 4 ++--
 gcc/config/rs6000/rs6000.h  | 6 +++---
 gcc/config/rs6000/rs6000.md | 2 +-
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-builtin.cc 
b/gcc/config/rs6000/rs6000-builtin.cc
index dae43b672ea7..b6093b3cb64c 100644
--- a/gcc/config/rs6000/rs6000-builtin.cc
+++ b/gcc/config/rs6000/rs6000-builtin.cc
@@ -169,9 +169,9 @@ rs6000_builtin_is_supported (enum rs6000_gen_builtins 
fncode)
 case ENB_P8V:
   return TARGET_P8_VECTOR;
 case ENB_P9:
-  return TARGET_MODULO;
+  return TARGET_POWER9;
 case ENB_P9_64:
-  return TARGET_MODULO && TARGET_POWERPC64;
+  return TARGET_POWER9 && TARGET_POWERPC64;
 case ENB_P9V:
   return TARGET_P9_VECTOR;
 case ENB_P10:
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index 3a03c32f..89ca1bad80f3 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -461,9 +461,9 @@ extern int rs6000_vector_align[];
 #define TARGET_FCTIWUZ TARGET_POWER7
 /* Only powerpc64 and powerpc476 support fctid.  */
 #define TARGET_FCTID   (TARGET_POWERPC64 || rs6000_cpu == PROCESSOR_PPC476)
-#define TARGET_CTZ TARGET_MODULO
-#define TARGET_EXTSWSLI(TARGET_MODULO && TARGET_POWERPC64)
-#define TARGET_MADDLD  TARGET_MODULO
+#define TARGET_CTZ TARGET_POWER9
+#define TARGET_EXTSWSLI(TARGET_POWER9 && TARGET_POWERPC64)
+#define TARGET_MADDLD  TARGET_POWER9
 
 /* TARGET_DIRECT_MOVE is redundant to TARGET_P8_VECTOR, so alias it to that.  
*/
 #define TARGET_DIRECT_MOVE TARGET_P8_VECTOR
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index bff898a4eff1..fc0d454e9a42 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -403,7 +403,7 @@
  (const_int 1)
 
  (and (eq_attr "isa" "p9")
- (match_test "TARGET_MODULO"))
+ (match_test "TARGET_POWER9"))
  (const_int 1)
 
  (and (eq_attr "isa" "p9v")


[gcc(refs/users/meissner/heads/work178)] Add support for -mcpu=future

2024-09-12 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:9a8c9ebb3313c72aff0d34dda9ee2f752af10f6d

commit 9a8c9ebb3313c72aff0d34dda9ee2f752af10f6d
Author: Michael Meissner 
Date:   Thu Sep 12 15:54:08 2024 -0400

Add support for -mcpu=future

This patch adds the support that can be used in developing GCC support for
future PowerPC processors.

2024-09-12  Michael Meissner  

* config.gcc (powerpc*-*-*): Add support for --with-cpu=future.
* config/rs6000/aix71.h (ASM_CPU_SPEC): Add support for 
-mcpu=future.
* config/rs6000/aix72.h (ASM_CPU_SPEC): Likewise.
* config/rs6000/aix73.h (ASM_CPU_SPEC): Likewise.
* config/rs6000/driver-rs6000.cc (asm_names): Likewise.
* config/rs6000/rs6000-arch.def: Add future cpu.
* config/rs6000/rs6000-c.cc (rs6000_target_modify_macros): If
-mcpu=future, define _ARCH_FUTURE.
* config/rs6000/rs6000-cpus.def (FUTURE_MASKS_SERVER): New macro.
(future cpu): Define.
* config/rs6000/rs6000-opts.h (enum processor_type): Add
PROCESSOR_FUTURE.
* config/rs6000/rs6000-tables.opt: Regenerate.
* config/rs6000/rs6000.cc (power10_cost): Update comment.
(get_arch_flags): Add support for future processor.
(rs6000_option_override_internal): Likewise.
(rs6000_machine_from_flags): Likewise.
(rs6000_reassociation_width): Likewise.
(rs6000_adjust_cost): Likewise.
(rs6000_issue_rate): Likewise.
(rs6000_sched_reorder): Likewise.
(rs6000_sched_reorder2): Likewise.
(rs6000_register_move_cost): Likewise.
* config/rs6000/rs6000.h (ASM_CPU_SPEC): Likewise.
(TARGET_POWER11): New macro.
* config/rs6000/rs6000.md (cpu attribute): Likewise.

Diff:
---
 gcc/config.gcc  |  4 ++--
 gcc/config/rs6000/aix71.h   |  1 +
 gcc/config/rs6000/aix72.h   |  1 +
 gcc/config/rs6000/aix73.h   |  1 +
 gcc/config/rs6000/driver-rs6000.cc  |  2 ++
 gcc/config/rs6000/rs6000-arch.def   |  1 +
 gcc/config/rs6000/rs6000-c.cc   |  2 ++
 gcc/config/rs6000/rs6000-cpus.def   |  3 +++
 gcc/config/rs6000/rs6000-opts.h |  1 +
 gcc/config/rs6000/rs6000-tables.opt | 11 +++
 gcc/config/rs6000/rs6000.cc | 34 ++
 gcc/config/rs6000/rs6000.h  |  2 ++
 gcc/config/rs6000/rs6000.md |  2 +-
 13 files changed, 50 insertions(+), 15 deletions(-)

diff --git a/gcc/config.gcc b/gcc/config.gcc
index f09ce9f63a01..0b794e977f6a 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -539,7 +539,7 @@ powerpc*-*-*)
extra_headers="${extra_headers} ppu_intrinsics.h spu2vmx.h vec_types.h 
si2vmx.h"
extra_headers="${extra_headers} amo.h"
case x$with_cpu in
-   
xpowerpc64|xdefault64|x6[23]0|x970|xG5|xpower[3456789]|xpower1[01]|xpower6x|xrs64a|xcell|xa2|xe500mc64|xe5500|xe6500)
+   
xpowerpc64|xdefault64|x6[23]0|x970|xG5|xpower[3456789]|xpower1[01]|xpower6x|xrs64a|xcell|xa2|xe500mc64|xe5500|xe6500|xfuture)
cpu_is_64bit=yes
;;
esac
@@ -5646,7 +5646,7 @@ case "${target}" in
tm_defines="${tm_defines} CONFIG_PPC405CR"
eval "with_$which=405"
;;
-   "" | common | native \
+   "" | common | native | future \
| power[3456789] | power1[01] | power5+ | power6x \
| powerpc | powerpc64 | powerpc64le \
| rs64 \
diff --git a/gcc/config/rs6000/aix71.h b/gcc/config/rs6000/aix71.h
index 41037b3852d7..570ddcc451db 100644
--- a/gcc/config/rs6000/aix71.h
+++ b/gcc/config/rs6000/aix71.h
@@ -79,6 +79,7 @@ do {  
\
 #undef ASM_CPU_SPEC
 #define ASM_CPU_SPEC \
 "%{mcpu=native: %(asm_cpu_native); \
+  mcpu=future: -mfuture; \
   mcpu=power11: -mpwr11; \
   mcpu=power10: -mpwr10; \
   mcpu=power9: -mpwr9; \
diff --git a/gcc/config/rs6000/aix72.h b/gcc/config/rs6000/aix72.h
index fe59f8319b48..242ca94bd065 100644
--- a/gcc/config/rs6000/aix72.h
+++ b/gcc/config/rs6000/aix72.h
@@ -79,6 +79,7 @@ do {  
\
 #undef ASM_CPU_SPEC
 #define ASM_CPU_SPEC \
 "%{mcpu=native: %(asm_cpu_native); \
+  mcpu=future: -mfuture; \
   mcpu=power11: -mpwr11; \
   mcpu=power10: -mpwr10; \
   mcpu=power9: -mpwr9; \
diff --git a/gcc/config/rs6000/aix73.h b/gcc/config/rs6000/aix73.h
index 1318b0b3662d..2bd6b4bb3c4f 100644
--- a/gcc/config/rs6000/aix73.h
+++ b/gcc/config/rs6000/aix73.h
@@ -79,6 +79,7 @@ do {  
\
 #undef ASM_CPU_SPEC
 #define ASM_CPU_SPEC \
 "%{mcpu=native: %(asm_cpu_native); \
+  mcpu=future: -mfuture; \
   mcpu=power11: -

[gcc(refs/users/meissner/heads/work178)] Add -mcpu=future tuning support.

2024-09-12 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:db845de0bf64e872da038a7b0913bf2825f504fb

commit db845de0bf64e872da038a7b0913bf2825f504fb
Author: Michael Meissner 
Date:   Thu Sep 12 15:54:51 2024 -0400

Add -mcpu=future tuning support.

This patch makes -mtune=future use the same tuning decision as 
-mtune=power11.

2024-09-12  Michael Meissner  

gcc/

* config/rs6000/power10.md (all reservations): Add future as an
alterntive to power10 and power11.

Diff:
---
 gcc/config/rs6000/power10.md | 144 +--
 1 file changed, 72 insertions(+), 72 deletions(-)

diff --git a/gcc/config/rs6000/power10.md b/gcc/config/rs6000/power10.md
index 2310c4603457..e42b057dc45b 100644
--- a/gcc/config/rs6000/power10.md
+++ b/gcc/config/rs6000/power10.md
@@ -1,4 +1,4 @@
-;; Scheduling description for the IBM Power10 and Power11 processors.
+;; Scheduling description for the IBM Power10, Power11, and Future processors.
 ;; Copyright (C) 2020-2024 Free Software Foundation, Inc.
 ;;
 ;; Contributed by Pat Haugen (pthau...@us.ibm.com).
@@ -97,12 +97,12 @@
(eq_attr "update" "no")
(eq_attr "size" "!128")
(eq_attr "prefixed" "no")
-   (eq_attr "cpu" "power10,power11"))
+   (eq_attr "cpu" "power10,power11,future"))
   "DU_any_power10,LU_power10")
 
 (define_insn_reservation "power10-fused-load" 4
   (and (eq_attr "type" "fused_load_cmpi,fused_addis_load,fused_load_load")
-   (eq_attr "cpu" "power10,power11"))
+   (eq_attr "cpu" "power10,power11,future"))
   "DU_even_power10,LU_power10")
 
 (define_insn_reservation "power10-prefixed-load" 4
@@ -110,13 +110,13 @@
(eq_attr "update" "no")
(eq_attr "size" "!128")
(eq_attr "prefixed" "yes")
-   (eq_attr "cpu" "power10,power11"))
+   (eq_attr "cpu" "power10,power11,future"))
   "DU_even_power10,LU_power10")
 
 (define_insn_reservation "power10-load-update" 4
   (and (eq_attr "type" "load")
(eq_attr "update" "yes")
-   (eq_attr "cpu" "power10,power11"))
+   (eq_attr "cpu" "power10,power11,future"))
   "DU_even_power10,LU_power10+SXU_power10")
 
 (define_insn_reservation "power10-fpload-double" 4
@@ -124,7 +124,7 @@
(eq_attr "update" "no")
(eq_attr "size" "64")
(eq_attr "prefixed" "no")
-   (eq_attr "cpu" "power10,power11"))
+   (eq_attr "cpu" "power10,power11,future"))
   "DU_any_power10,LU_power10")
 
 (define_insn_reservation "power10-prefixed-fpload-double" 4
@@ -132,14 +132,14 @@
(eq_attr "update" "no")
(eq_attr "size" "64")
(eq_attr "prefixed" "yes")
-   (eq_attr "cpu" "power10,power11"))
+   (eq_attr "cpu" "power10,power11,future"))
   "DU_even_power10,LU_power10")
 
 (define_insn_reservation "power10-fpload-update-double" 4
   (and (eq_attr "type" "fpload")
(eq_attr "update" "yes")
(eq_attr "size" "64")
-   (eq_attr "cpu" "power10,power11"))
+   (eq_attr "cpu" "power10,power11,future"))
   "DU_even_power10,LU_power10+SXU_power10")
 
 ; SFmode loads are cracked and have additional 3 cycles over DFmode
@@ -148,27 +148,27 @@
   (and (eq_attr "type" "fpload")
(eq_attr "update" "no")
(eq_attr "size" "32")
-   (eq_attr "cpu" "power10,power11"))
+   (eq_attr "cpu" "power10,power11,future"))
   "DU_even_power10,LU_power10")
 
 (define_insn_reservation "power10-fpload-update-single" 7
   (and (eq_attr "type" "fpload")
(eq_attr "update" "yes")
(eq_attr "size" "32")
-   (eq_attr "cpu" "power10,power11"))
+   (eq_attr "cpu" "power10,power11,future"))
   "DU_even_power10,LU_power10+SXU_power10")
 
 (define_insn_reservation "power10-vecload" 4
   (and (eq_attr "type" "vecload")
(eq_attr "size" "!256")
-   (eq_attr "cpu" "power10,power11"))
+   (eq_attr "cpu" "power10,power11,future"))
   "DU_any_power10,LU_power10")
 
 ; lxvp
 (define_insn_reservation "power10-vecload-pair" 4
   (and (eq_attr "type" "vecload")
(eq_attr "size" "256")
-   (eq_attr "cpu" "power10,power11"))
+   (eq_attr "cpu" "power10,power11,future"))
   "DU_even_power10,LU_power10+SXU_power10")
 
 ; Store Unit
@@ -178,12 +178,12 @@
(eq_attr "prefixed" "no")
(eq_attr "size" "!128")
(eq_attr "size" "!256")
-   (eq_attr "cpu" "power10,power11"))
+   (eq_attr "cpu" "power10,power11,future"))
   "DU_any_power10,STU_power10")
 
 (define_insn_reservation "power10-fused-store" 0
   (and (eq_attr "type" "fused_store_store")
-   (eq_attr "cpu" "power10,power11"))
+   (eq_attr "cpu" "power10,power11,future"))
   "DU_even_power10,STU_power10")
 
 (define_insn_reservation "power10-prefixed-store" 0
@@ -191,52 +191,52 @@
(eq_attr "prefixed" "yes")
(eq_attr "size" "!128")
(eq_attr "size" "!256")
-   (eq_attr "cpu" "power10,power11"))
+   (eq_attr "cpu" "power10,power11,future"))
   "DU_even_power10,STU_power10")
 
 ; Update forms have 2 cycle latency for updat

[gcc r15-3614] c++: Make __builtin_launder reject invalid types [PR116673]

2024-09-12 Thread Jonathan Wakely via Gcc-cvs
https://gcc.gnu.org/g:9fe57e4879de93b6e3c7b4c226f42d5f3a48474f

commit r15-3614-g9fe57e4879de93b6e3c7b4c226f42d5f3a48474f
Author: Jonathan Wakely 
Date:   Wed Sep 11 11:47:44 2024 +0100

c++: Make __builtin_launder reject invalid types [PR116673]

The standard says that std::launder is ill-formed for function pointers
and cv void pointers, so there's no reason for __builtin_launder to
accept them. This change allows implementations of std::launder to defer
to the built-in for error checking, although libstdc++ will continue to
diagnose it directly for more user-friendly diagnostics.

PR c++/116673

gcc/cp/ChangeLog:

* semantics.cc (finish_builtin_launder): Diagnose function
pointers and cv void pointers.

gcc/testsuite/ChangeLog:

* g++.dg/cpp1z/launder2.C: Adjust dg-error strings.
* g++.dg/cpp1z/launder10.C: New test.

Diff:
---
 gcc/cp/semantics.cc|  6 +++---
 gcc/testsuite/g++.dg/cpp1z/launder10.C | 15 +++
 gcc/testsuite/g++.dg/cpp1z/launder2.C  |  6 +++---
 3 files changed, 21 insertions(+), 6 deletions(-)

diff --git a/gcc/cp/semantics.cc b/gcc/cp/semantics.cc
index 63212afafb3b..8219d6410b8e 100644
--- a/gcc/cp/semantics.cc
+++ b/gcc/cp/semantics.cc
@@ -13482,10 +13482,10 @@ finish_builtin_launder (location_t loc, tree arg, 
tsubst_flags_t complain)
 arg = decay_conversion (arg, complain);
   if (error_operand_p (arg))
 return error_mark_node;
-  if (!type_dependent_expression_p (arg)
-  && !TYPE_PTR_P (TREE_TYPE (arg)))
+  if (!type_dependent_expression_p (arg) && !TYPE_PTROB_P (TREE_TYPE (arg)))
 {
-  error_at (loc, "non-pointer argument to %<__builtin_launder%>");
+  error_at (loc, "type %qT of argument to %<__builtin_launder%> "
+   "is not a pointer to object type", TREE_TYPE (arg));
   return error_mark_node;
 }
   if (processing_template_decl)
diff --git a/gcc/testsuite/g++.dg/cpp1z/launder10.C 
b/gcc/testsuite/g++.dg/cpp1z/launder10.C
new file mode 100644
index ..2109a2e38393
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp1z/launder10.C
@@ -0,0 +1,15 @@
+// PR c++/116673
+// { dg-do compile }
+
+void
+bar (void *p)
+{
+  __builtin_launder (bar); // { dg-error {argument to '__builtin_launder'} }
+  __builtin_launder (p);   // { dg-error {argument to '__builtin_launder'} }
+  const void* cp = p;
+  __builtin_launder (cp);  // { dg-error {argument to '__builtin_launder'} }
+  volatile void* vp = p;
+  __builtin_launder (vp);  // { dg-error {argument to '__builtin_launder'} }
+  const volatile void* cvp = p;
+  __builtin_launder (cvp); // { dg-error {argument to '__builtin_launder'} }
+}
diff --git a/gcc/testsuite/g++.dg/cpp1z/launder2.C 
b/gcc/testsuite/g++.dg/cpp1z/launder2.C
index 9cd1779704b3..a2d448612655 100644
--- a/gcc/testsuite/g++.dg/cpp1z/launder2.C
+++ b/gcc/testsuite/g++.dg/cpp1z/launder2.C
@@ -4,11 +4,11 @@ int a;
 int *b = __builtin_launder (); // { dg-error "wrong number of 
arguments to" }
 int *c = __builtin_launder (&a, 2);// { dg-error "wrong number of 
arguments to" }
 int *d = __builtin_launder (&a);
-int e = __builtin_launder (a); // { dg-error "non-pointer argument to" 
}
+int e = __builtin_launder (a); // { dg-error "not a pointer to object 
type" }
 int &f = a;
-int g = __builtin_launder (f); // { dg-error "non-pointer argument to" 
}
+int g = __builtin_launder (f); // { dg-error "not a pointer to object 
type" }
 
-template  T f1 (T x) { return __builtin_launder (x); } // { 
dg-error "non-pointer argument to" }
+template  T f1 (T x) { return __builtin_launder (x); } // { 
dg-error "not a pointer to object type" }
 template  T f2 (T x) { return __builtin_launder (x); }
 
 int h = f1 (a);


[gcc r15-3616] libstdc++: Remove unused alias template in std::optional

2024-09-12 Thread Jonathan Wakely via Libstdc++-cvs
https://gcc.gnu.org/g:f5f55b6e732d119a5c4a1d8e48d1364eb94d9d1d

commit r15-3616-gf5f55b6e732d119a5c4a1d8e48d1364eb94d9d1d
Author: Jonathan Wakely 
Date:   Thu Sep 12 10:55:23 2024 +0100

libstdc++: Remove unused alias template in std::optional

I added this __is_bool alias template in r15-2309-g6d86486292acbe but
it isn't actually used so can be removed.

libstdc++-v3/ChangeLog:

* include/std/optional (__is_bool): Remove.

Diff:
---
 libstdc++-v3/include/std/optional | 2 --
 1 file changed, 2 deletions(-)

diff --git a/libstdc++-v3/include/std/optional 
b/libstdc++-v3/include/std/optional
index 933a5b15e569..6a8e76f60e3a 100644
--- a/libstdc++-v3/include/std/optional
+++ b/libstdc++-v3/include/std/optional
@@ -850,8 +850,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
using __not_self = __not_>>;
   template
using __not_tag = __not_>>;
-  template
-   using __is_bool = is_same, bool>;
   template
using _Requires = enable_if_t<__and_v<_Cond...>, bool>;
 #endif


[gcc r15-3615] libstdc++: Simplify std::launder definition

2024-09-12 Thread Jonathan Wakely via Libstdc++-cvs
https://gcc.gnu.org/g:27c985b774b2ccb0d2aa9eb72acf05f98b790296

commit r15-3615-g27c985b774b2ccb0d2aa9eb72acf05f98b790296
Author: Jonathan Wakely 
Date:   Wed Sep 11 10:17:23 2024 +0100

libstdc++: Simplify std::launder definition

A single static assert is a much simpler way to implement the
compile-time preconditions on std::launder than an overload set of
deleted functions and function templates. The only difficulty is that
 doesn't include  so we can't use std::is_function and
std::is_void for the checks. That can be worked around though, by using
the __is_same and __is_function built-ins. If the __is_function built-in
isn't supported then the __builtin_launder built-in will give an error
anyway, since the commit preceding this one.

We can also remove the redundant __cplusplus >= 201703L check around the
definitions of std::launder and the interference constants, which are
already guarded by the appropriate feature test macros.

libstdc++-v3/ChangeLog:

* libsupc++/new (launder): Add static_assert and remove deleted
overloads.
* testsuite/18_support/launder/requirements_neg.cc: Adjust
expected diagnostics.

Diff:
---
 libstdc++-v3/libsupc++/new | 36 +-
 .../18_support/launder/requirements_neg.cc | 15 +
 2 files changed, 24 insertions(+), 27 deletions(-)

diff --git a/libstdc++-v3/libsupc++/new b/libstdc++-v3/libsupc++/new
index 2e2038e1a82c..af5c7690bb99 100644
--- a/libstdc++-v3/libsupc++/new
+++ b/libstdc++-v3/libsupc++/new
@@ -198,7 +198,6 @@ inline void operator delete[](void*, void*) 
_GLIBCXX_USE_NOEXCEPT { }
 //@}
 } // extern "C++"
 
-#if __cplusplus >= 201703L
 namespace std
 {
 #ifdef __cpp_lib_launder // C++ >= 17 && HAVE_BUILTIN_LAUNDER
@@ -206,33 +205,28 @@ namespace std
   template
 [[nodiscard]] constexpr _Tp*
 launder(_Tp* __p) noexcept
-{ return __builtin_launder(__p); }
-
-  // The program is ill-formed if T is a function type or
-  // (possibly cv-qualified) void.
-
-  template
-void launder(_Ret (*)(_Args...) _GLIBCXX_NOEXCEPT_QUAL) = delete;
-  template
-void launder(_Ret (*)(_Args..) _GLIBCXX_NOEXCEPT_QUAL) = delete;
-
-  void launder(void*) = delete;
-  void launder(const void*) = delete;
-  void launder(volatile void*) = delete;
-  void launder(const volatile void*) = delete;
+{
+  if constexpr (__is_same(const volatile _Tp, const volatile void))
+   static_assert(!__is_same(const volatile _Tp, const volatile void),
+ "std::launder argument must not be a void pointer");
+#if _GLIBCXX_USE_BUILTIN_TRAIT(__is_function)
+  else if constexpr (__is_function(_Tp))
+   static_assert(!__is_function(_Tp),
+ "std::launder argument must not be a function pointer");
+#endif
+  else
+   return __builtin_launder(__p);
+  return nullptr;
+}
 #endif // __cpp_lib_launder
 
 #ifdef __cpp_lib_hardware_interference_size // C++ >= 17 && 
defined(gcc_dest_sz)
   inline constexpr size_t hardware_destructive_interference_size = 
__GCC_DESTRUCTIVE_SIZE;
   inline constexpr size_t hardware_constructive_interference_size = 
__GCC_CONSTRUCTIVE_SIZE;
 #endif // __cpp_lib_hardware_interference_size
-}
-#endif // C++17
 
 // Emitted despite the FTM potentially being undefined.
-#if __cplusplus > 201703L
-namespace std
-{
+#if __cplusplus >= 202002L
   /// Tag type used to declare a class-specific operator delete that can
   /// invoke the destructor before deallocating the memory.
   struct destroying_delete_t
@@ -241,8 +235,8 @@ namespace std
   };
   /// Tag variable of type destroying_delete_t.
   inline constexpr destroying_delete_t destroying_delete{};
-}
 #endif // C++20
+}
 
 #pragma GCC visibility pop
 
diff --git a/libstdc++-v3/testsuite/18_support/launder/requirements_neg.cc 
b/libstdc++-v3/testsuite/18_support/launder/requirements_neg.cc
index 2808ebf614dd..82ce0b35a8c6 100644
--- a/libstdc++-v3/testsuite/18_support/launder/requirements_neg.cc
+++ b/libstdc++-v3/testsuite/18_support/launder/requirements_neg.cc
@@ -25,14 +25,17 @@ int f2(const char*, ...);
 void
 test01()
 {
-  std::launder( &f1 ); // { dg-error "deleted function" }
-  std::launder( &f2 ); // { dg-error "deleted function" }
+  std::launder( &f1 ); // { dg-error "here" }
+  std::launder( &f2 ); // { dg-error "here" }
   void* p = nullptr;
-  std::launder( p );  // { dg-error "deleted function" }
+  std::launder( p );   // { dg-error "here" }
   const void* cp = nullptr;
-  std::launder( cp );  // { dg-error "deleted function" }
+  std::launder( cp );  // { dg-error "here" }
   volatile void* vp = nullptr;
-  std::launder( vp );  // { dg-error "deleted function" }
+  std::launder( vp );  // { dg-error "here" }
   const volatile void* cvp = nullptr;
-  std::launder( cvp );  // { dg-error "deleted function" }
+  std::launder( cvp ); // { dg-error "here" }
 }
+// { d

[gcc(refs/users/meissner/heads/work178)] Change TARGET_CMPB to TARGET_POWER6

2024-09-12 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:2447d85d64522c91861f562acb5696b73c22141a

commit 2447d85d64522c91861f562acb5696b73c22141a
Author: Michael Meissner 
Date:   Thu Sep 12 15:48:21 2024 -0400

Change TARGET_CMPB to TARGET_POWER6

As part of the architecture flags patches, this patch changes the use of
TARGET_FPRND to TARGET_POWER6.  The CMPB instruction was added in power6 
(ISA
2.05).

I have built both big endian and little endian bootstrap compilers and there
were no regressions.

In addition, I constructed a test case that used every archiecture define 
(like
_ARCH_PWR4, etc.) and I also looked at the .machine directive generated.  I 
ran
this test for all supported combinations of -mcpu, big/little endian, and 
32/64
bit support.  Every single instance generated exactly the same code with the
patches installed compared to the compiler before installing the patches.

Can I install this patch on the GCC 15 trunk?

2024-09-03  Michael Meissner  

* config/rs6000/rs6000-builtin.cc (rs6000_builtin_is_supported): Use
TARGET_POWER6 instead of TARGET_CMPB.
* config/rs6000/rs6000.h (TARGET_FCFID): Merge tests for popcntb, 
cmpb,
and popcntd into a single test for TARGET_POWER5.
(TARGET_LFIWAX): Use TARGET_POWER6 instead of TARGET_CMPB.
* config/rs6000/rs6000.md (enabled attribute): Likewise.
(parity2_cmp): Likewise.
(cmpb): Likewise.
(copysign3): Likewise.
(copysign3_fcpsgn): Likewise.
(cmpstrnsi): Likewise.
(cmpstrsi): Likewise.

Diff:
---
 gcc/config/rs6000/rs6000-builtin.cc |  4 ++--
 gcc/config/rs6000/rs6000.h  |  6 ++
 gcc/config/rs6000/rs6000.md | 16 
 3 files changed, 12 insertions(+), 14 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-builtin.cc 
b/gcc/config/rs6000/rs6000-builtin.cc
index 98a0545030cd..76421bd1de0b 100644
--- a/gcc/config/rs6000/rs6000-builtin.cc
+++ b/gcc/config/rs6000/rs6000-builtin.cc
@@ -157,9 +157,9 @@ rs6000_builtin_is_supported (enum rs6000_gen_builtins 
fncode)
 case ENB_P5:
   return TARGET_POWER5;
 case ENB_P6:
-  return TARGET_CMPB;
+  return TARGET_POWER6;
 case ENB_P6_64:
-  return TARGET_CMPB && TARGET_POWERPC64;
+  return TARGET_POWER6 && TARGET_POWERPC64;
 case ENB_P7:
   return TARGET_POPCNTD;
 case ENB_P7_64:
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index 4500724d895c..d22693eb2bfb 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -448,13 +448,11 @@ extern int rs6000_vector_align[];
Enable 32-bit fcfid's on any of the switches for newer ISA machines.  */
 #define TARGET_FCFID   (TARGET_POWERPC64   \
 || TARGET_PPC_GPOPT/* 970/power4 */\
-|| TARGET_POPCNTB  /* ISA 2.02 */  \
-|| TARGET_CMPB /* ISA 2.05 */  \
-|| TARGET_POPCNTD) /* ISA 2.06 */
+|| TARGET_POWER5)  /* ISA 2.02 and above */ \
 
 #define TARGET_FCTIDZ  TARGET_FCFID
 #define TARGET_STFIWX  TARGET_PPC_GFXOPT
-#define TARGET_LFIWAX  TARGET_CMPB
+#define TARGET_LFIWAX  TARGET_POWER6
 #define TARGET_LFIWZX  TARGET_POPCNTD
 #define TARGET_FCFIDS  TARGET_POPCNTD
 #define TARGET_FCFIDU  TARGET_POPCNTD
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 7f9fe609a031..0c303087e944 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -383,7 +383,7 @@
  (const_int 1)
 
  (and (eq_attr "isa" "p6")
- (match_test "TARGET_CMPB"))
+ (match_test "TARGET_POWER6"))
  (const_int 1)
 
  (and (eq_attr "isa" "p7")
@@ -2544,7 +2544,7 @@
 (define_insn "parity2_cmpb"
   [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
(unspec:GPR [(match_operand:GPR 1 "gpc_reg_operand" "r")] 
UNSPEC_PARITY))]
-  "TARGET_CMPB"
+  "TARGET_POWER6"
   "prty %0,%1"
   [(set_attr "type" "popcnt")])
 
@@ -2597,7 +2597,7 @@
   [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
(unspec:GPR [(match_operand:GPR 1 "gpc_reg_operand" "r")
 (match_operand:GPR 2 "gpc_reg_operand" "r")] UNSPEC_CMPB))]
-  "TARGET_CMPB"
+  "TARGET_POWER6"
   "cmpb %0,%1,%2"
   [(set_attr "type" "cmp")])
 
@@ -5401,7 +5401,7 @@
&& ((TARGET_PPC_GFXOPT
 && !HONOR_NANS (mode)
 && !HONOR_SIGNED_ZEROS (mode))
-   || TARGET_CMPB
+   || TARGET_POWER6
|| VECTOR_UNIT_VSX_P (mode))"
 {
   /* Middle-end canonicalizes -fabs (x) to copysign (x, -1),
@@ -5422,7 +5422,7 @@
   if (!gpc_reg_operand (operands[2], mode))
 operands[2] = copy_to_mode_reg (mode, operands[2]);
 
-  if (TARGET_CMPB || VECTOR_UNIT_VSX_P (mode))
+  if (TARGET_POWER6 || VECTOR_UNIT_VSX_P (mode))
 {
   emit_insn (gen_copysign3_fcpsg

[gcc/meissner/heads/work178-bugs] (13 commits) Merge commit 'refs/users/meissner/heads/work178-bugs' of gi

2024-09-12 Thread Michael Meissner via Gcc-cvs
The branch 'meissner/heads/work178-bugs' was updated to point to:

 d8b655099e0c... Merge commit 'refs/users/meissner/heads/work178-bugs' of gi

It previously pointed to:

 4ed9d16a0b78... Add ChangeLog.bugs and update REVISION.

Diff:

Summary of changes (added commits):
---

  d8b6550... Merge commit 'refs/users/meissner/heads/work178-bugs' of gi
  c425f9a... Add ChangeLog.bugs and update REVISION.
  db845de... Add -mcpu=future tuning support. (*)
  9a8c9eb... Add support for -mcpu=future (*)
  cf2e2df... Update tests to work with architecture flags changes. (*)
  e629d8c... Change TARGET_MODULO to TARGET_POWER9 (*)
  30a040f... Change TARGET_POPCNTD to TARGET_POWER7 (*)
  2447d85... Change TARGET_CMPB to TARGET_POWER6 (*)
  c290843... Change TARGET_FPRND to TARGET_POWER5X (*)
  71a8da6... Change TARGET_POPCNTB to TARGET_POWER5 (*)
  59c893c... Do not allow -mvsx to boost processor to power7. (*)
  fbd5596... Use architecture flags for defining _ARCH_PWR macros. (*)
  77cfcb4... Add rs6000 architecture masks. (*)

(*) This commit already exists in another branch.
Because the reference `refs/users/meissner/heads/work178-bugs' matches
your hooks.email-new-commits-only configuration,
no separate email is sent for this commit.


[gcc(refs/users/meissner/heads/work178-bugs)] Merge commit 'refs/users/meissner/heads/work178-bugs' of git+ssh://gcc.gnu.org/git/gcc into me/work1

2024-09-12 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:d8b655099e0c06f20d746a7a9397ddfdbe271825

commit d8b655099e0c06f20d746a7a9397ddfdbe271825
Merge: c425f9a89518 4ed9d16a0b78
Author: Michael Meissner 
Date:   Thu Sep 12 16:00:59 2024 -0400

Merge commit 'refs/users/meissner/heads/work178-bugs' of 
git+ssh://gcc.gnu.org/git/gcc into me/work178-bugs

Diff:


[gcc(refs/users/meissner/heads/work178-bugs)] Add ChangeLog.bugs and update REVISION.

2024-09-12 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:c425f9a8951881d9790dd9197ce259b6e5f28eef

commit c425f9a8951881d9790dd9197ce259b6e5f28eef
Author: Michael Meissner 
Date:   Thu Sep 12 15:29:44 2024 -0400

Add ChangeLog.bugs and update REVISION.

2024-09-12  Michael Meissner  

gcc/

* ChangeLog.bugs: New file for branch.
* REVISION: Update.

Diff:
---
 gcc/ChangeLog.bugs | 6 ++
 gcc/REVISION   | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog.bugs b/gcc/ChangeLog.bugs
new file mode 100644
index ..8413d3fa1ca2
--- /dev/null
+++ b/gcc/ChangeLog.bugs
@@ -0,0 +1,6 @@
+ Branch work178-bugs, baseline 
+
+2024-09-12   Michael Meissner  
+
+   Clone branch
+
diff --git a/gcc/REVISION b/gcc/REVISION
index f0e6035292ab..eae270db9f81 100644
--- a/gcc/REVISION
+++ b/gcc/REVISION
@@ -1 +1 @@
-work178 branch
+work178-bugs branch


[gcc/meissner/heads/work178-dmf] (13 commits) Merge commit 'refs/users/meissner/heads/work178-dmf' of git

2024-09-12 Thread Michael Meissner via Gcc-cvs
The branch 'meissner/heads/work178-dmf' was updated to point to:

 ab8879b1dcda... Merge commit 'refs/users/meissner/heads/work178-dmf' of git

It previously pointed to:

 0ad31e9889b0... Add ChangeLog.dmf and update REVISION.

Diff:

Summary of changes (added commits):
---

  ab8879b... Merge commit 'refs/users/meissner/heads/work178-dmf' of git
  70215b7... Add ChangeLog.dmf and update REVISION.
  db845de... Add -mcpu=future tuning support. (*)
  9a8c9eb... Add support for -mcpu=future (*)
  cf2e2df... Update tests to work with architecture flags changes. (*)
  e629d8c... Change TARGET_MODULO to TARGET_POWER9 (*)
  30a040f... Change TARGET_POPCNTD to TARGET_POWER7 (*)
  2447d85... Change TARGET_CMPB to TARGET_POWER6 (*)
  c290843... Change TARGET_FPRND to TARGET_POWER5X (*)
  71a8da6... Change TARGET_POPCNTB to TARGET_POWER5 (*)
  59c893c... Do not allow -mvsx to boost processor to power7. (*)
  fbd5596... Use architecture flags for defining _ARCH_PWR macros. (*)
  77cfcb4... Add rs6000 architecture masks. (*)

(*) This commit already exists in another branch.
Because the reference `refs/users/meissner/heads/work178-dmf' matches
your hooks.email-new-commits-only configuration,
no separate email is sent for this commit.


[gcc(refs/users/meissner/heads/work178-dmf)] Merge commit 'refs/users/meissner/heads/work178-dmf' of git+ssh://gcc.gnu.org/git/gcc into me/work17

2024-09-12 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:ab8879b1dcdae988641cb2ae0d76ff7f05c3c0d8

commit ab8879b1dcdae988641cb2ae0d76ff7f05c3c0d8
Merge: 70215b7e3844 0ad31e9889b0
Author: Michael Meissner 
Date:   Thu Sep 12 16:02:46 2024 -0400

Merge commit 'refs/users/meissner/heads/work178-dmf' of 
git+ssh://gcc.gnu.org/git/gcc into me/work178-dmf

Diff:


[gcc(refs/users/meissner/heads/work178-dmf)] Add ChangeLog.dmf and update REVISION.

2024-09-12 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:70215b7e3844d8f958ab2a3008c0c2586a45839a

commit 70215b7e3844d8f958ab2a3008c0c2586a45839a
Author: Michael Meissner 
Date:   Thu Sep 12 15:26:50 2024 -0400

Add ChangeLog.dmf and update REVISION.

2024-09-12  Michael Meissner  

gcc/

* ChangeLog.dmf: New file for branch.
* REVISION: Update.

Diff:
---
 gcc/ChangeLog.dmf | 6 ++
 gcc/REVISION  | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog.dmf b/gcc/ChangeLog.dmf
new file mode 100644
index ..d94fb1b8de24
--- /dev/null
+++ b/gcc/ChangeLog.dmf
@@ -0,0 +1,6 @@
+ Branch work178-dmf, baseline 
+
+2024-09-12   Michael Meissner  
+
+   Clone branch
+
diff --git a/gcc/REVISION b/gcc/REVISION
index f0e6035292ab..5bda8286629d 100644
--- a/gcc/REVISION
+++ b/gcc/REVISION
@@ -1 +1 @@
-work178 branch
+work178-dmf branch


[gcc/meissner/heads/work178-libs] (13 commits) Merge commit 'refs/users/meissner/heads/work178-libs' of gi

2024-09-12 Thread Michael Meissner via Gcc-cvs
The branch 'meissner/heads/work178-libs' was updated to point to:

 70adefb839e8... Merge commit 'refs/users/meissner/heads/work178-libs' of gi

It previously pointed to:

 5a3b5823db6d... Add ChangeLog.libs and update REVISION.

Diff:

Summary of changes (added commits):
---

  70adefb... Merge commit 'refs/users/meissner/heads/work178-libs' of gi
  3f1870d... Add ChangeLog.libs and update REVISION.
  db845de... Add -mcpu=future tuning support. (*)
  9a8c9eb... Add support for -mcpu=future (*)
  cf2e2df... Update tests to work with architecture flags changes. (*)
  e629d8c... Change TARGET_MODULO to TARGET_POWER9 (*)
  30a040f... Change TARGET_POPCNTD to TARGET_POWER7 (*)
  2447d85... Change TARGET_CMPB to TARGET_POWER6 (*)
  c290843... Change TARGET_FPRND to TARGET_POWER5X (*)
  71a8da6... Change TARGET_POPCNTB to TARGET_POWER5 (*)
  59c893c... Do not allow -mvsx to boost processor to power7. (*)
  fbd5596... Use architecture flags for defining _ARCH_PWR macros. (*)
  77cfcb4... Add rs6000 architecture masks. (*)

(*) This commit already exists in another branch.
Because the reference `refs/users/meissner/heads/work178-libs' matches
your hooks.email-new-commits-only configuration,
no separate email is sent for this commit.


[gcc(refs/users/meissner/heads/work178-libs)] Add ChangeLog.libs and update REVISION.

2024-09-12 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:3f1870d6aff3aef8f84c6e95311b1138ebac1aa3

commit 3f1870d6aff3aef8f84c6e95311b1138ebac1aa3
Author: Michael Meissner 
Date:   Thu Sep 12 15:30:36 2024 -0400

Add ChangeLog.libs and update REVISION.

2024-09-12  Michael Meissner  

gcc/

* ChangeLog.libs: New file for branch.
* REVISION: Update.

Diff:
---
 gcc/ChangeLog.libs | 6 ++
 gcc/REVISION   | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog.libs b/gcc/ChangeLog.libs
new file mode 100644
index ..cf5c7cc035f2
--- /dev/null
+++ b/gcc/ChangeLog.libs
@@ -0,0 +1,6 @@
+ Branch work178-libs, baseline 
+
+2024-09-12   Michael Meissner  
+
+   Clone branch
+
diff --git a/gcc/REVISION b/gcc/REVISION
index f0e6035292ab..bb0330a068b1 100644
--- a/gcc/REVISION
+++ b/gcc/REVISION
@@ -1 +1 @@
-work178 branch
+work178-libs branch


[gcc(refs/users/meissner/heads/work178-libs)] Merge commit 'refs/users/meissner/heads/work178-libs' of git+ssh://gcc.gnu.org/git/gcc into me/work1

2024-09-12 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:70adefb839e81654ba4733b9efd0b703636924b7

commit 70adefb839e81654ba4733b9efd0b703636924b7
Merge: 3f1870d6aff3 5a3b5823db6d
Author: Michael Meissner 
Date:   Thu Sep 12 16:04:03 2024 -0400

Merge commit 'refs/users/meissner/heads/work178-libs' of 
git+ssh://gcc.gnu.org/git/gcc into me/work178-libs

Diff:


[gcc/meissner/heads/work178-tar] (13 commits) Merge commit 'refs/users/meissner/heads/work178-tar' of git

2024-09-12 Thread Michael Meissner via Gcc-cvs
The branch 'meissner/heads/work178-tar' was updated to point to:

 e91856119cce... Merge commit 'refs/users/meissner/heads/work178-tar' of git

It previously pointed to:

 a5d043977b95... Add ChangeLog.tar and update REVISION.

Diff:

Summary of changes (added commits):
---

  e918561... Merge commit 'refs/users/meissner/heads/work178-tar' of git
  34491a7... Add ChangeLog.tar and update REVISION.
  db845de... Add -mcpu=future tuning support. (*)
  9a8c9eb... Add support for -mcpu=future (*)
  cf2e2df... Update tests to work with architecture flags changes. (*)
  e629d8c... Change TARGET_MODULO to TARGET_POWER9 (*)
  30a040f... Change TARGET_POPCNTD to TARGET_POWER7 (*)
  2447d85... Change TARGET_CMPB to TARGET_POWER6 (*)
  c290843... Change TARGET_FPRND to TARGET_POWER5X (*)
  71a8da6... Change TARGET_POPCNTB to TARGET_POWER5 (*)
  59c893c... Do not allow -mvsx to boost processor to power7. (*)
  fbd5596... Use architecture flags for defining _ARCH_PWR macros. (*)
  77cfcb4... Add rs6000 architecture masks. (*)

(*) This commit already exists in another branch.
Because the reference `refs/users/meissner/heads/work178-tar' matches
your hooks.email-new-commits-only configuration,
no separate email is sent for this commit.


[gcc(refs/users/meissner/heads/work178-tar)] Merge commit 'refs/users/meissner/heads/work178-tar' of git+ssh://gcc.gnu.org/git/gcc into me/work17

2024-09-12 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:e91856119cce502ef99fe4f06d5063b9a09412d5

commit e91856119cce502ef99fe4f06d5063b9a09412d5
Merge: 34491a7fa688 a5d043977b95
Author: Michael Meissner 
Date:   Thu Sep 12 16:05:44 2024 -0400

Merge commit 'refs/users/meissner/heads/work178-tar' of 
git+ssh://gcc.gnu.org/git/gcc into me/work178-tar

Diff:


[gcc(refs/users/meissner/heads/work178-tar)] Add ChangeLog.tar and update REVISION.

2024-09-12 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:34491a7fa6889fe2679909377843c2bc1ff52910

commit 34491a7fa6889fe2679909377843c2bc1ff52910
Author: Michael Meissner 
Date:   Thu Sep 12 15:28:38 2024 -0400

Add ChangeLog.tar and update REVISION.

2024-09-12  Michael Meissner  

gcc/

* ChangeLog.tar: New file for branch.
* REVISION: Update.

Diff:
---
 gcc/ChangeLog.tar | 6 ++
 gcc/REVISION  | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog.tar b/gcc/ChangeLog.tar
new file mode 100644
index ..0e7ea4d69602
--- /dev/null
+++ b/gcc/ChangeLog.tar
@@ -0,0 +1,6 @@
+ Branch work178-tar, baseline 
+
+2024-09-12   Michael Meissner  
+
+   Clone branch
+
diff --git a/gcc/REVISION b/gcc/REVISION
index f0e6035292ab..249a722e02a7 100644
--- a/gcc/REVISION
+++ b/gcc/REVISION
@@ -1 +1 @@
-work178 branch
+work178-tar branch


[gcc(refs/users/meissner/heads/work178-test)] Add ChangeLog.test and update REVISION.

2024-09-12 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:881c995ea9b807ee7d0710045e1217adbe16a09a

commit 881c995ea9b807ee7d0710045e1217adbe16a09a
Author: Michael Meissner 
Date:   Thu Sep 12 15:31:34 2024 -0400

Add ChangeLog.test and update REVISION.

2024-09-12  Michael Meissner  

gcc/

* ChangeLog.test: New file for branch.
* REVISION: Update.

Diff:
---
 gcc/ChangeLog.test | 6 ++
 gcc/REVISION   | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog.test b/gcc/ChangeLog.test
new file mode 100644
index ..e4a1af6026ec
--- /dev/null
+++ b/gcc/ChangeLog.test
@@ -0,0 +1,6 @@
+ Branch work178-test, baseline 
+
+2024-09-12   Michael Meissner  
+
+   Clone branch
+
diff --git a/gcc/REVISION b/gcc/REVISION
index f0e6035292ab..c238c7a0e337 100644
--- a/gcc/REVISION
+++ b/gcc/REVISION
@@ -1 +1 @@
-work178 branch
+work178-test branch


[gcc/meissner/heads/work178-vpair] (13 commits) Merge commit 'refs/users/meissner/heads/work178-vpair' of g

2024-09-12 Thread Michael Meissner via Gcc-cvs
The branch 'meissner/heads/work178-vpair' was updated to point to:

 d06132bfafb4... Merge commit 'refs/users/meissner/heads/work178-vpair' of g

It previously pointed to:

 f7abfc56ea5d... Add ChangeLog.vpair and update REVISION.

Diff:

Summary of changes (added commits):
---

  d06132b... Merge commit 'refs/users/meissner/heads/work178-vpair' of g
  1d50625... Add ChangeLog.vpair and update REVISION.
  db845de... Add -mcpu=future tuning support. (*)
  9a8c9eb... Add support for -mcpu=future (*)
  cf2e2df... Update tests to work with architecture flags changes. (*)
  e629d8c... Change TARGET_MODULO to TARGET_POWER9 (*)
  30a040f... Change TARGET_POPCNTD to TARGET_POWER7 (*)
  2447d85... Change TARGET_CMPB to TARGET_POWER6 (*)
  c290843... Change TARGET_FPRND to TARGET_POWER5X (*)
  71a8da6... Change TARGET_POPCNTB to TARGET_POWER5 (*)
  59c893c... Do not allow -mvsx to boost processor to power7. (*)
  fbd5596... Use architecture flags for defining _ARCH_PWR macros. (*)
  77cfcb4... Add rs6000 architecture masks. (*)

(*) This commit already exists in another branch.
Because the reference `refs/users/meissner/heads/work178-vpair' matches
your hooks.email-new-commits-only configuration,
no separate email is sent for this commit.


[gcc(refs/users/meissner/heads/work178-vpair)] Add ChangeLog.vpair and update REVISION.

2024-09-12 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:1d506250b1fb168e72cf2143e761d02c924f8eb3

commit 1d506250b1fb168e72cf2143e761d02c924f8eb3
Author: Michael Meissner 
Date:   Thu Sep 12 15:27:48 2024 -0400

Add ChangeLog.vpair and update REVISION.

2024-09-12  Michael Meissner  

gcc/

* ChangeLog.vpair: New file for branch.
* REVISION: Update.

Diff:
---
 gcc/ChangeLog.vpair | 6 ++
 gcc/REVISION| 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog.vpair b/gcc/ChangeLog.vpair
new file mode 100644
index ..4350f0a50f5f
--- /dev/null
+++ b/gcc/ChangeLog.vpair
@@ -0,0 +1,6 @@
+ Branch work178-vpair, baseline 
+
+2024-09-12   Michael Meissner  
+
+   Clone branch
+
diff --git a/gcc/REVISION b/gcc/REVISION
index f0e6035292ab..55bdd433a1dd 100644
--- a/gcc/REVISION
+++ b/gcc/REVISION
@@ -1 +1 @@
-work178 branch
+work178-vpair branch


[gcc(refs/users/meissner/heads/work178-bugs)] PR 89213: Add better support for shifting vectors with 64-bit elements

2024-09-12 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:3aa2b0a70ccadb176c78ba9a9bf837143f3d59ae

commit 3aa2b0a70ccadb176c78ba9a9bf837143f3d59ae
Author: Michael Meissner 
Date:   Thu Sep 12 16:13:23 2024 -0400

PR 89213: Add better support for shifting vectors with 64-bit elements

This patch fixes PR target/89213 to allow better code to be generated to do
constant shifts of V2DI/V2DF vectors.  Previously GCC would do constant 
shifts
of vectors with 64-bit elements by using:

XXSPLTIB 32,4
VEXTSB2D 0,0
VSRAD 2,2,0

I.e., the PowerPC does not have a VSPLTISD instruction to load -15..14 for 
the
64-bit shift count in one instruction.  Instead, it would need to load a 
byte
and then convert it to 64-bit.

With this patch, GCC now realizes that the vector shift instructions will 
look
at the bottom 6 bits for the shift count, and it can use either a VSPLTISW 
or
XXSPLTIB instruction to load the shift count.

2024-09-12  Michael Meissner  

gcc/

PR target/89213
* config/rs6000/altivec.md (UNSPEC_VECTOR_SHIFT): New unspec.
(VSHIFT_MODE): New mode iterator.
(vshift_code): New code iterator.
(vshift_attr): New code attribute.
(altivec___const): New pattern to optimize
vector long long/int shifts by a constant.
(altivec__shift_const): New helper insn to load up a
constant used by the shift operation.
* config/rs6000/predicates.md (vector_shift_constant): New
predicate.

gcc/testsuite/

PR target/89213
* gcc.target/powerpc/pr89213.c: New test.
* gcc.target/powerpc/vec-rlmi-rlnm.c: Update instruction count.

Diff:
---
 gcc/config/rs6000/altivec.md |  51 +++
 gcc/config/rs6000/predicates.md  |  63 ++
 gcc/testsuite/gcc.target/powerpc/pr89213.c   | 106 +++
 gcc/testsuite/gcc.target/powerpc/vec-rlmi-rlnm.c |   4 +-
 4 files changed, 222 insertions(+), 2 deletions(-)

diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index 1f5489b974f6..8faece984e9f 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -170,6 +170,7 @@
UNSPEC_VSTRIL
UNSPEC_SLDB
UNSPEC_SRDB
+   UNSPEC_VECTOR_SHIFT
 ])
 
 (define_c_enum "unspecv"
@@ -2176,6 +2177,56 @@
   "vsro %0,%1,%2"
   [(set_attr "type" "vecperm")])
 
+;; Optimize V2DI shifts by constants.  This relies on the shift instructions
+;; only looking at the bits needed to do the shift.  This means we can use
+;; VSPLTISW or XXSPLTIB to load up the constant, and not worry about the bits
+;; that the vector shift instructions will not use.
+(define_mode_iterator VSHIFT_MODE  [(V4SI "TARGET_P9_VECTOR")
+(V2DI "TARGET_P8_VECTOR")])
+
+(define_code_iterator vshift_code  [ashift ashiftrt lshiftrt])
+(define_code_attr vshift_attr  [(ashift   "ashift")
+(ashiftrt "ashiftrt")
+(lshiftrt "lshiftrt")])
+
+(define_insn_and_split "*altivec___const"
+  [(set (match_operand:VSHIFT_MODE 0 "register_operand" "=v")
+   (vshift_code:VSHIFT_MODE
+(match_operand:VSHIFT_MODE 1 "register_operand" "v")
+(match_operand:VSHIFT_MODE 2 "vector_shift_constant" "")))
+   (clobber (match_scratch:VSHIFT_MODE 3 "=&v"))]
+  "((mode == V2DImode && TARGET_P8_VECTOR)
+|| (mode == V4SImode && TARGET_P9_VECTOR))"
+  "#"
+  "&& 1"
+  [(set (match_dup 3)
+   (unspec:VSHIFT_MODE [(match_dup 4)] UNSPEC_VECTOR_SHIFT))
+   (set (match_dup 0)
+   (vshift_code:VSHIFT_MODE (match_dup 1)
+(match_dup 3)))]
+{
+  if (GET_CODE (operands[3]) == SCRATCH)
+operands[3] = gen_reg_rtx (mode);
+
+  operands[4] = ((GET_CODE (operands[2]) == CONST_VECTOR)
+? CONST_VECTOR_ELT (operands[2], 0)
+: XEXP (operands[2], 0));
+})
+
+(define_insn "*altivec__shift_const"
+  [(set (match_operand:VSHIFT_MODE 0 "register_operand" "=v")
+   (unspec:VSHIFT_MODE [(match_operand 1 "const_int_operand" "n")]
+   UNSPEC_VECTOR_SHIFT))]
+  "TARGET_P8_VECTOR"
+{
+  if (UINTVAL (operands[1]) <= 15)
+return "vspltisw %0,%1";
+  else if (TARGET_P9_VECTOR)
+return "xxspltib %x0,%1";
+  else
+gcc_unreachable ();
+})
+
 (define_insn "altivec_vsum4ubs"
   [(set (match_operand:V4SI 0 "register_operand" "=v")
 (unspec:V4SI [(match_operand:V16QI 1 "register_operand" "v")
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index 7f0b4ab61e65..0b78901e94be 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -861,6 +861,69 @@
 return op == CONST0_RTX (mode) || op == CONSTM1_RTX (mode);
 })
 
+;; Return 1 if the operand is a V2DI or V4SI const_vecto

[gcc(refs/users/meissner/heads/work178-bugs)] PR 99293: Optimize splat of a V2DF/V2DI extract with constant element

2024-09-12 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:645cebe1a5373a7fb6505943dc5b486481975bbe

commit 645cebe1a5373a7fb6505943dc5b486481975bbe
Author: Michael Meissner 
Date:   Thu Sep 12 16:15:13 2024 -0400

PR 99293: Optimize splat of a V2DF/V2DI extract with constant element

We had optimizations for splat of a vector extract for the other vector
types, but we missed having one for V2DI and V2DF.  This patch adds a
combiner insn to do this optimization.

In looking at the source, we had similar optimizations for V4SI and V4SF
extract and splats, but we missed doing V2DI/V2DF.

Without the patch for the code:

vector long long splat_dup_l_0 (vector long long v)
{
  return __builtin_vec_splats (__builtin_vec_extract (v, 0));
}

the compiler generates (on a little endian power9):

splat_dup_l_0:
mfvsrld 9,34
mtvsrdd 34,9,9
blr

Now it generates:

splat_dup_l_0:
xxpermdi 34,34,34,3
blr

2024-09-12  Michael Meissner  

gcc/

* config/rs6000/vsx.md (vsx_splat_extract_): New insn.

gcc/testsuite/

* gcc.target/powerpc/builtins-1.c: Adjust insn count.
* gcc.target/powerpc/pr99293.c: New test.

Diff:
---
 gcc/config/rs6000/vsx.md  | 18 ++
 gcc/testsuite/gcc.target/powerpc/builtins-1.c |  2 +-
 gcc/testsuite/gcc.target/powerpc/pr99293.c| 22 ++
 3 files changed, 41 insertions(+), 1 deletion(-)

diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index b2fc39acf4e8..73f20a86e56a 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -4796,6 +4796,24 @@
   "lxvdsx %x0,%y1"
   [(set_attr "type" "vecload")])
 
+;; Optimize SPLAT of an extract from a V2DF/V2DI vector with a constant element
+(define_insn "*vsx_splat_extract_"
+  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
+   (vec_duplicate:VSX_D
+(vec_select:
+ (match_operand:VSX_D 1 "vsx_register_operand" "wa")
+ (parallel [(match_operand 2 "const_0_to_1_operand" "n")]]
+  "VECTOR_MEM_VSX_P (mode)"
+{
+  int which_word = INTVAL (operands[2]);
+  if (!BYTES_BIG_ENDIAN)
+which_word = 1 - which_word;
+
+  operands[3] = GEN_INT (which_word ? 3 : 0);
+  return "xxpermdi %x0,%x1,%x1,%3";
+}
+  [(set_attr "type" "vecperm")])
+
 ;; V4SI splat support
 (define_insn "vsx_splat_v4si"
   [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa,wa")
diff --git a/gcc/testsuite/gcc.target/powerpc/builtins-1.c 
b/gcc/testsuite/gcc.target/powerpc/builtins-1.c
index 8410a5fd4319..4e7e5384675f 100644
--- a/gcc/testsuite/gcc.target/powerpc/builtins-1.c
+++ b/gcc/testsuite/gcc.target/powerpc/builtins-1.c
@@ -1035,4 +1035,4 @@ foo156 (vector unsigned short usa)
 /* { dg-final { scan-assembler-times {\mvmrglb\M} 3 } } */
 /* { dg-final { scan-assembler-times {\mvmrgew\M} 4 } } */
 /* { dg-final { scan-assembler-times {\mvsplth|xxsplth\M} 4 } } */
-/* { dg-final { scan-assembler-times {\mxxpermdi\M} 44 } } */
+/* { dg-final { scan-assembler-times {\mxxpermdi\M} 42 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/pr99293.c 
b/gcc/testsuite/gcc.target/powerpc/pr99293.c
new file mode 100644
index ..20adc1f27f65
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr99293.c
@@ -0,0 +1,22 @@
+/* { dg-do compile { target powerpc*-*-* } } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-options "-O2 -mvsx" } */
+
+/* Test for PR 99263, which wants to do:
+   __builtin_vec_splats (__builtin_vec_extract (v, n))
+
+   where v is a V2DF or V2DI vector and n is either 0 or 1.  Previously the
+   compiler would do a direct move to the GPR registers to select the item and 
a
+   direct move from the GPR registers to do the splat.  */
+
+vector long long splat_dup_l_0 (vector long long v)
+{
+  return __builtin_vec_splats (__builtin_vec_extract (v, 0));
+}
+
+vector long long splat_dup_l_1 (vector long long v)
+{
+  return __builtin_vec_splats (__builtin_vec_extract (v, 1));
+}
+
+/* { dg-final { scan-assembler-times "xxpermdi" 2 } } */


[gcc/meissner/heads/work178-test] (13 commits) Merge commit 'refs/users/meissner/heads/work178-test' of gi

2024-09-12 Thread Michael Meissner via Gcc-cvs
The branch 'meissner/heads/work178-test' was updated to point to:

 b10cc7a6851c... Merge commit 'refs/users/meissner/heads/work178-test' of gi

It previously pointed to:

 8124f1e6aff8... Add ChangeLog.test and update REVISION.

Diff:

Summary of changes (added commits):
---

  b10cc7a... Merge commit 'refs/users/meissner/heads/work178-test' of gi
  881c995... Add ChangeLog.test and update REVISION.
  db845de... Add -mcpu=future tuning support. (*)
  9a8c9eb... Add support for -mcpu=future (*)
  cf2e2df... Update tests to work with architecture flags changes. (*)
  e629d8c... Change TARGET_MODULO to TARGET_POWER9 (*)
  30a040f... Change TARGET_POPCNTD to TARGET_POWER7 (*)
  2447d85... Change TARGET_CMPB to TARGET_POWER6 (*)
  c290843... Change TARGET_FPRND to TARGET_POWER5X (*)
  71a8da6... Change TARGET_POPCNTB to TARGET_POWER5 (*)
  59c893c... Do not allow -mvsx to boost processor to power7. (*)
  fbd5596... Use architecture flags for defining _ARCH_PWR macros. (*)
  77cfcb4... Add rs6000 architecture masks. (*)

(*) This commit already exists in another branch.
Because the reference `refs/users/meissner/heads/work178-test' matches
your hooks.email-new-commits-only configuration,
no separate email is sent for this commit.


[gcc(refs/users/meissner/heads/work178-test)] Merge commit 'refs/users/meissner/heads/work178-test' of git+ssh://gcc.gnu.org/git/gcc into me/work1

2024-09-12 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:b10cc7a6851cd9a2228a6f595c74806818d41b24

commit b10cc7a6851cd9a2228a6f595c74806818d41b24
Merge: 881c995ea9b8 8124f1e6aff8
Author: Michael Meissner 
Date:   Thu Sep 12 16:06:51 2024 -0400

Merge commit 'refs/users/meissner/heads/work178-test' of 
git+ssh://gcc.gnu.org/git/gcc into me/work178-test

Diff:


[gcc(refs/users/meissner/heads/work178-vpair)] Merge commit 'refs/users/meissner/heads/work178-vpair' of git+ssh://gcc.gnu.org/git/gcc into me/work

2024-09-12 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:d06132bfafb4dd7ed82cadf50263cc0b6e943125

commit d06132bfafb4dd7ed82cadf50263cc0b6e943125
Merge: 1d506250b1fb f7abfc56ea5d
Author: Michael Meissner 
Date:   Thu Sep 12 16:08:49 2024 -0400

Merge commit 'refs/users/meissner/heads/work178-vpair' of 
git+ssh://gcc.gnu.org/git/gcc into me/work178-vpair

Diff:


[gcc(refs/users/meissner/heads/work178-bugs)] Update ChangeLog.*

2024-09-12 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:b9f44c9162a08f667bc97185134b88aea72981d8

commit b9f44c9162a08f667bc97185134b88aea72981d8
Author: Michael Meissner 
Date:   Thu Sep 12 16:17:50 2024 -0400

Update ChangeLog.*

Diff:
---
 gcc/ChangeLog.bugs | 94 +-
 1 file changed, 93 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog.bugs b/gcc/ChangeLog.bugs
index 8413d3fa1ca2..31668b76dbc5 100644
--- a/gcc/ChangeLog.bugs
+++ b/gcc/ChangeLog.bugs
@@ -1,6 +1,98 @@
+ Branch work178-bugs, patch #201 
+
+PR 99293: Optimize splat of a V2DF/V2DI extract with constant element
+
+We had optimizations for splat of a vector extract for the other vector
+types, but we missed having one for V2DI and V2DF.  This patch adds a
+combiner insn to do this optimization.
+
+In looking at the source, we had similar optimizations for V4SI and V4SF
+extract and splats, but we missed doing V2DI/V2DF.
+
+Without the patch for the code:
+
+   vector long long splat_dup_l_0 (vector long long v)
+   {
+ return __builtin_vec_splats (__builtin_vec_extract (v, 0));
+   }
+
+the compiler generates (on a little endian power9):
+
+   splat_dup_l_0:
+   mfvsrld 9,34
+   mtvsrdd 34,9,9
+   blr
+
+Now it generates:
+
+   splat_dup_l_0:
+   xxpermdi 34,34,34,3
+   blr
+
+2024-09-12  Michael Meissner  
+
+gcc/
+
+   * config/rs6000/vsx.md (vsx_splat_extract_): New insn.
+
+gcc/testsuite/
+
+   * gcc.target/powerpc/builtins-1.c: Adjust insn count.
+   * gcc.target/powerpc/pr99293.c: New test.
+
+ Branch work178-bugs, patch #200 
+
+PR 89213: Add better support for shifting vectors with 64-bit elements
+
+This patch fixes PR target/89213 to allow better code to be generated to do
+constant shifts of V2DI/V2DF vectors.  Previously GCC would do constant shifts
+of vectors with 64-bit elements by using:
+
+   XXSPLTIB 32,4
+   VEXTSB2D 0,0
+   VSRAD 2,2,0
+
+I.e., the PowerPC does not have a VSPLTISD instruction to load -15..14 for the
+64-bit shift count in one instruction.  Instead, it would need to load a byte
+and then convert it to 64-bit.
+
+With this patch, GCC now realizes that the vector shift instructions will look
+at the bottom 6 bits for the shift count, and it can use either a VSPLTISW or
+XXSPLTIB instruction to load the shift count.
+
+2024-09-12  Michael Meissner  
+
+gcc/
+
+   PR target/89213
+   * config/rs6000/altivec.md (UNSPEC_VECTOR_SHIFT): New unspec.
+   (VSHIFT_MODE): New mode iterator.
+   (vshift_code): New code iterator.
+   (vshift_attr): New code attribute.
+   (altivec___const): New pattern to optimize
+   vector long long/int shifts by a constant.
+   (altivec__shift_const): New helper insn to load up a
+   constant used by the shift operation.
+   * config/rs6000/predicates.md (vector_shift_constant): New
+   predicate.
+
+gcc/testsuite/
+
+   PR target/89213
+   * gcc.target/powerpc/pr89213.c: New test.
+   * gcc.target/powerpc/vec-rlmi-rlnm.c: Update instruction count.
+
  Branch work178-bugs, baseline 
 
+Add ChangeLog.bugs and update REVISION.
+
+2024-09-12  Michael Meissner  
+
+gcc/
+
+   * ChangeLog.bugs: New file for branch.
+   * REVISION: Update.
+
 2024-09-12   Michael Meissner  
 
Clone branch
-


[gcc(refs/users/meissner/heads/work178-dmf)] Use vector pair load/store for memcpy with -mcpu=future

2024-09-12 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:9764af0303d209a7ad828a13f58ad1a6c920142a

commit 9764af0303d209a7ad828a13f58ad1a6c920142a
Author: Michael Meissner 
Date:   Thu Sep 12 16:35:38 2024 -0400

Use vector pair load/store for memcpy with -mcpu=future

In the development for the power10 processor, GCC did not enable using the 
load
vector pair and store vector pair instructions when optimizing things like
memory copy.  This patch enables using those instructions if -mcpu=future is
used.

2024-09-12  Michael Meissner  

gcc/

* config/rs6000/rs6000-cpus.def (ISA_FUTURE_MASKS_SERVER): Enable 
using
load vector pair and store vector pair instructions for memory copy
operations.
(POWERPC_MASKS): Make the bit for enabling using load vector pair 
and
store vector pair operations set and reset when the PowerPC 
processor is
changed.

Diff:
---
 gcc/config/rs6000/rs6000-cpus.def | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/gcc/config/rs6000/rs6000-cpus.def 
b/gcc/config/rs6000/rs6000-cpus.def
index e73d9ef51f8d..74151be40484 100644
--- a/gcc/config/rs6000/rs6000-cpus.def
+++ b/gcc/config/rs6000/rs6000-cpus.def
@@ -86,7 +86,8 @@
 
 #define POWER11_MASKS_SERVER   ISA_3_1_MASKS_SERVER
 
-#define FUTURE_MASKS_SERVERPOWER11_MASKS_SERVER
+#define FUTURE_MASKS_SERVER(POWER11_MASKS_SERVER   \
+| OPTION_MASK_BLOCK_OPS_VECTOR_PAIR)
 
 /* Flags that need to be turned off if -mno-vsx.  */
 #define OTHER_VSX_VECTOR_MASKS (OPTION_MASK_EFFICIENT_UNALIGNED_VSX\
@@ -116,6 +117,7 @@
 
 /* Mask of all options to set the default isa flags based on -mcpu=.  */
 #define POWERPC_MASKS  (OPTION_MASK_ALTIVEC\
+| OPTION_MASK_BLOCK_OPS_VECTOR_PAIR\
 | OPTION_MASK_CMPB \
 | OPTION_MASK_CRYPTO   \
 | OPTION_MASK_DFP  \


[gcc(refs/users/meissner/heads/work178-dmf)] RFC2653-Add wD constraint.

2024-09-12 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:16cf5254202b83638035fe28e881c62efa89f666

commit 16cf5254202b83638035fe28e881c62efa89f666
Author: Michael Meissner 
Date:   Thu Sep 12 16:36:51 2024 -0400

RFC2653-Add wD constraint.

This patch adds a new constraint ('wD') that matches the accumulator 
registers
that overlap with VSX registers 0..31 on power10.  Future patches will add 
the
support for a separate accumulator register class that will be used when the
support for dense math registes is added.

2024-09-12   Michael Meissner  

* config/rs6000/constraints.md (wD): New constraint.
* config/rs6000/mma.md (mma_): Prepare for alternate 
accumulator
registers.  Use wD constraint instead of 'd' constraint.  Use
accumulator_operand instead of fpr_reg_operand.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
-   (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")]
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD")
+   (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0")]
MMA_ACC))]
   "TARGET_MMA"
   " %A0"
@@ -523,7 +523,7 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")]
MMA_VV))]
@@ -532,8 +532,8 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-   (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+   (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 3 "vsx_register_operand" "v,?wa")]
MMA_AVV))]
@@ -542,7 +542,7 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
(unspec:XO [(match_operand:OO 1 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")]
MMA_PV))]
@@ -551,8 +551,8 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-   (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+   (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
(match_operand:OO 2 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 3 "vsx_register_operand" "v,?wa")]
MMA_APV))]
@@ -561,7 +561,7 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
(match_operand:SI 3 "const_0_to_15_operand" "n,n")
@@ -574,8 +574,8 @@
(set_attr "prefixed" "yes")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-   (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+   (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
(match_operand:SI 4 "const_0_to_15_operand" "n,n")
@@ -588,7 +588,7 @@
(set_attr "prefixed" "yes")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
(match_operand:SI 3 "const_0_to_15_operand" "n,n")
@@ -601,8 +601,8 @@
(set_attr "prefixed" "yes")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-   (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+   (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
   

[gcc(refs/users/meissner/heads/work178-dmf)] RFC2653-Add support for dense math registers.

2024-09-12 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:95ef2562cd8d6d0543ba0aaa1a2c9b10f40ed542

commit 95ef2562cd8d6d0543ba0aaa1a2c9b10f40ed542
Author: Michael Meissner 
Date:   Thu Sep 12 16:38:06 2024 -0400

RFC2653-Add support for dense math registers.

The MMA subsystem added the notion of accumulator registers as an optional
feature of ISA 3.1 (power10).  In ISA 3.1, these accumulators overlapped 
with
the VSX registers 0..31, but logically the accumulator registers were 
separate
from the FPR registers.  In ISA 3.1, it was anticipated that in future 
systems,
the accumulator registers may no overlap with the FPR registers.  This patch
adds the support for dense math registers as separate registers.

This particular patch does not change the MMA support to use the 
accumulators
within the dense math registers.  This patch just adds the basic support for
having separate DMRs.  The next patch will switch the MMA support to use the
accumulators if -mcpu=future is used.

For testing purposes, I added an undocumented option '-mdense-math' to 
enable
or disable the dense math support.

This patch adds a new constraint (wD).  If MMA is selected but dense math is
not selected (i.e. -mcpu=power10), the wD constraint will allow access to
accumulators that overlap with VSX registers 0..31.  If both MMA and dense 
math
are selected (i.e. -mcpu=future), the wD constraint will only allow dense 
math
registers.

This patch modifies the existing %A output modifier.  If MMA is selected but
dense math is not selected, then %A output modifier converts the VSX 
register
number to the accumulator number, by dividing it by 4.  If both MMA and 
dense
math are selected, then %A will map the separate DMR registers into 0..7.

The intention is that user code using extended asm can be modified to run on
both MMA without dense math and MMA with dense math:

1)  If possible, don't use extended asm, but instead use the MMA 
built-in
functions;

2)  If you do need to write extended asm, change the d constraints
targetting accumulators should now use wD;

3)  Only use the built-in zero, assemble and disassemble functions 
create
move data between vector quad types and dense math accumulators.
I.e. do not use the xxmfacc, xxmtacc, and xxsetaccz directly in the
extended asm code.  The reason is these instructions assume there 
is a
1-to-1 correspondence between 4 adjacent FPR registers and an
accumulator that overlaps with those instructions.  With 
accumulators
now being separate registers, there no longer is a 1-to-1
correspondence.

It is possible that the mangling for DMRs and the GDB register numbers may
produce other changes in the future.

2024-09-12   Michael Meissner  

* config/rs6000/mma.md (UNSPEC_MMA_DMSETDMRZ): New unspec.
(movxo): Add comments about dense math registers.
(movxo_nodm): Rename from movxo and restrict the usage to machines
without dense math registers.
(movxo_dm): New insn for movxo support for machines with dense math
registers.
(mma_): Restrict usage to machines without dense math 
registers.
(mma_xxsetaccz): Add a define_expand wrapper, and add support for 
dense
math registers.
(mma_dmsetaccz): New insn.
* config/rs6000/predicates.md (dmr_operand): New predicate.
(accumulator_operand): Add support for dense math registers.
* config/rs6000/rs6000-builtin.cc (rs6000_gimple_fold_mma_builtin): 
Do
not issue a de-prime instruction when disassembling a vector quad 
on a
system with dense math registers.
* config/rs6000/rs6000-c.cc (rs6000_define_or_undefine_macro): 
Define
__DENSE_MATH__ if we have dense math registers.
* config/rs6000/rs6000.cc (enum rs6000_reg_type): Add DMR_REG_TYPE.
(enum rs6000_reload_reg_type): Add RELOAD_REG_DMR.
(LAST_RELOAD_REG_CLASS): Add support for DMR registers and the wD
constraint.
(reload_reg_map): Likewise.
(rs6000_reg_names): Likewise.
(alt_reg_names): Likewise.
(rs6000_hard_regno_nregs_internal): Likewise.
(rs6000_hard_regno_mode_ok_uncached): Likewise.
(rs6000_debug_reg_global): Likewise.
(rs6000_setup_reg_addr_masks): Likewise.
(rs6000_init_hard_regno_mode_ok): Likewise.
(rs6000_secondary_reload_memory): Add support for DMR registers.
(rs6000_secondary_reload_simple_move): Likewise.
(rs6000_preferred_reload_class): Likewise.
(rs6000_secondary_reload_class): Likewise.
(print_operand): Make %A handle both FPRs and DMRs.
 

[gcc(refs/users/meissner/heads/work178-dmf)] RFC2653-PowerPC: Switch to dense math names for all MMA operations.

2024-09-12 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:ace2e85d487f4126573be32e36f2d5b610cb0bbe

commit ace2e85d487f4126573be32e36f2d5b610cb0bbe
Author: Michael Meissner 
Date:   Thu Sep 12 16:39:17 2024 -0400

RFC2653-PowerPC: Switch to dense math names for all MMA operations.

This patch changes the assembler instruction names for MMA instructions from
the original name used in power10 to the new name when used with the dense 
math
system.  I.e. xvf64gerpp becomes dmxvf64gerpp.  The assembler will emit the
same bits for either spelling.

For the non-prefixed MMA instructions, we add a 'dm' prefix in front of the
instruction.  However, the prefixed instructions have a 'pm' prefix, and we 
add
the 'dm' prefix afterwards.  To prevent having two sets of parallel int
attributes, we remove the "pm" prefix from the instruction string in the
attributes, and add it later, both in the insn name and in the output 
template.

2024-09-12   Michael Meissner  

gcc/

* config/rs6000/mma.md (vvi4i4i8): Change the instruction to not 
have a
"pm" prefix.
(avvi4i4i8): Likewise.
(vvi4i4i2): Likewise.
(avvi4i4i2): Likewise.
(vvi4i4): Likewise.
(avvi4i4): Likewise.
(pvi4i2): Likewise.
(apvi4i2): Likewise.
(vvi4i4i4): Likewise.
(avvi4i4i4): Likewise.
(mma_): Add support for running on DMF systems, generating the 
dense
math instruction and using the dense math accumulators.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_pm): Add support for running on DMF systems, 
generating
the dense math instruction and using the dense math accumulators.
Rename the insn with a 'pm' prefix and add either 'pm' or 'pmdm'
prefixes based on whether we have the original MMA specification or 
if
we have dense math support.
(mma_pm): Likewise.
(mma_pm): Likewise.
(mma_pm): Likewise.
(mma_pm): Likewise.
(mma_pm): Likewise.
(mma_pm): Likewise.
(mma_pm): Likewise.

Diff:
---
 gcc/config/rs6000/mma.md | 157 +++
 1 file changed, 104 insertions(+), 53 deletions(-)

diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md
index ae6e7e9695be..2e04eb653fa6 100644
--- a/gcc/config/rs6000/mma.md
+++ b/gcc/config/rs6000/mma.md
@@ -225,44 +225,47 @@
 (UNSPEC_MMA_XVF64GERNP "xvf64gernp")
 (UNSPEC_MMA_XVF64GERNN "xvf64gernn")])
 
-(define_int_attr vvi4i4i8  [(UNSPEC_MMA_PMXVI4GER8 "pmxvi4ger8")])
+;; The "pm" prefix is not in these expansions, so that we can generate
+;; pmdmxvi4ger8 on systems with dense math registers and xvi4ger8 on systems
+;; without dense math registers.
+(define_int_attr vvi4i4i8  [(UNSPEC_MMA_PMXVI4GER8 "xvi4ger8")])
 
-(define_int_attr avvi4i4i8 [(UNSPEC_MMA_PMXVI4GER8PP   
"pmxvi4ger8pp")])
+(define_int_attr avvi4i4i8 [(UNSPEC_MMA_PMXVI4GER8PP   "xvi4ger8pp")])
 
-(define_int_attr vvi4i4i2  [(UNSPEC_MMA_PMXVI16GER2"pmxvi16ger2")
-(UNSPEC_MMA_PMXVI16GER2S   "pmxvi16ger2s")
-(UNSPEC_MMA_PMXVF16GER2"pmxvf16ger2")
-(UNSPEC_MMA_PMXVBF16GER2   
"pmxvbf16ger2")])
+(define_int_attr vvi4i4i2  [(UNSPEC_MMA_PMXVI16GER2"xvi16ger2")
+(UNSPEC_MMA_PMXVI16GER2S   "xvi16ger2s")
+(UNSPEC_MMA_PMXVF16GER2"xvf16ger2")
+(UNSPEC_MMA_PMXVBF16GER2   "xvbf16ger2")])
 
-(define_int_attr avvi4i4i2 [(UNSPEC_MMA_PMXVI16GER2PP  "pmxvi16ger2pp")
-(UNSPEC_MMA_PMXVI16GER2SPP 
"pmxvi16ger2spp")
-(UNSPEC_MMA_PMXVF16GER2PP  "pmxvf16ger2pp")
-(UNSPEC_MMA_PMXVF16GER2PN  "pmxvf16ger2pn")
-(UNSPEC_MMA_PMXVF16GER2NP  "pmxvf16ger2np")
-(UNSPEC_MMA_PMXVF16GER2NN  "pmxvf16ger2nn")
-(UNSPEC_MMA_PMXVBF16GER2PP 
"pmxvbf16ger2pp")
-(UNSPEC_MMA_PMXVBF16GER2PN 
"pmxvbf16ger2pn")
-(UNSPEC_MMA_PMXVBF16GER2NP 
"pmxvbf16ger2np")
-(UNSPEC_MMA_PMXVBF16GER2NN 
"pmxvbf16ger2nn")])
+(define_int_attr avvi4i4i2 [(UNSPEC_MMA_PMXVI16GER2PP  "xvi16ger2pp")
+(UNSPEC_MMA_PMXVI16GER2SPP "xvi16ger2spp")
+(UNSPEC_MMA_PMXVF16GER2PP  "xvf16ger2pp")
+(UNSPEC_MMA_PMXVF16GER2PN  "xvf

[gcc(refs/users/meissner/heads/work178-dmf)] RFC2653-Add dense math test for new instruction names.

2024-09-12 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:eeb17942c1af0428783739a22fbfdac7a8605664

commit eeb17942c1af0428783739a22fbfdac7a8605664
Author: Michael Meissner 
Date:   Thu Sep 12 16:40:49 2024 -0400

RFC2653-Add dense math test for new instruction names.

2024-09-12   Michael Meissner  

gcc/testsuite/

* gcc.target/powerpc/dm-double-test.c: New test.
* lib/target-supports.exp (check_effective_target_ppc_dmr_ok): New
target test.

Diff:
---
 gcc/testsuite/gcc.target/powerpc/dm-double-test.c | 194 ++
 gcc/testsuite/lib/target-supports.exp |  23 +++
 2 files changed, 217 insertions(+)

diff --git a/gcc/testsuite/gcc.target/powerpc/dm-double-test.c 
b/gcc/testsuite/gcc.target/powerpc/dm-double-test.c
new file mode 100644
index ..66c197795856
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/dm-double-test.c
@@ -0,0 +1,194 @@
+/* Test derived from mma-double-1.c, modified for dense math.  */
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_dense_math_ok } */
+/* { dg-options "-mdejagnu-cpu=future -O2" } */
+
+#include 
+#include 
+#include 
+
+typedef unsigned char vec_t __attribute__ ((vector_size (16)));
+typedef double v4sf_t __attribute__ ((vector_size (16)));
+#define SAVE_ACC(ACC, ldc, J)  \
+ __builtin_mma_disassemble_acc (result, ACC); \
+ rowC = (v4sf_t *) &CO[0*ldc+J]; \
+  rowC[0] += result[0]; \
+  rowC = (v4sf_t *) &CO[1*ldc+J]; \
+  rowC[0] += result[1]; \
+  rowC = (v4sf_t *) &CO[2*ldc+J]; \
+  rowC[0] += result[2]; \
+  rowC = (v4sf_t *) &CO[3*ldc+J]; \
+ rowC[0] += result[3];
+
+void
+DM (int m, int n, int k, double *A, double *B, double *C)
+{
+  __vector_quad acc0, acc1, acc2, acc3, acc4, acc5, acc6, acc7;
+  v4sf_t result[4];
+  v4sf_t *rowC;
+  for (int l = 0; l < n; l += 4)
+{
+  double *CO;
+  double *AO;
+  AO = A;
+  CO = C;
+  C += m * 4;
+  for (int j = 0; j < m; j += 16)
+   {
+ double *BO = B;
+ __builtin_mma_xxsetaccz (&acc0);
+ __builtin_mma_xxsetaccz (&acc1);
+ __builtin_mma_xxsetaccz (&acc2);
+ __builtin_mma_xxsetaccz (&acc3);
+ __builtin_mma_xxsetaccz (&acc4);
+ __builtin_mma_xxsetaccz (&acc5);
+ __builtin_mma_xxsetaccz (&acc6);
+ __builtin_mma_xxsetaccz (&acc7);
+ unsigned long i;
+
+ for (i = 0; i < k; i++)
+   {
+ vec_t *rowA = (vec_t *) & AO[i * 16];
+ __vector_pair rowB;
+ vec_t *rb = (vec_t *) & BO[i * 4];
+ __builtin_mma_assemble_pair (&rowB, rb[1], rb[0]);
+ __builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]);
+ __builtin_mma_xvf64gerpp (&acc1, rowB, rowA[1]);
+ __builtin_mma_xvf64gerpp (&acc2, rowB, rowA[2]);
+ __builtin_mma_xvf64gerpp (&acc3, rowB, rowA[3]);
+ __builtin_mma_xvf64gerpp (&acc4, rowB, rowA[4]);
+ __builtin_mma_xvf64gerpp (&acc5, rowB, rowA[5]);
+ __builtin_mma_xvf64gerpp (&acc6, rowB, rowA[6]);
+ __builtin_mma_xvf64gerpp (&acc7, rowB, rowA[7]);
+   }
+ SAVE_ACC (&acc0, m, 0);
+ SAVE_ACC (&acc2, m, 4);
+ SAVE_ACC (&acc1, m, 2);
+ SAVE_ACC (&acc3, m, 6);
+ SAVE_ACC (&acc4, m, 8);
+ SAVE_ACC (&acc6, m, 12);
+ SAVE_ACC (&acc5, m, 10);
+ SAVE_ACC (&acc7, m, 14);
+ AO += k * 16;
+ BO += k * 4;
+ CO += 16;
+   }
+  B += k * 4;
+}
+}
+
+void
+init (double *matrix, int row, int column)
+{
+  for (int j = 0; j < column; j++)
+{
+  for (int i = 0; i < row; i++)
+   {
+ matrix[j * row + i] = (i * 16 + 2 + j) / 0.123;
+   }
+}
+}
+
+void
+init0 (double *matrix, double *matrix1, int row, int column)
+{
+  for (int j = 0; j < column; j++)
+for (int i = 0; i < row; i++)
+  matrix[j * row + i] = matrix1[j * row + i] = 0;
+}
+
+
+void
+print (const char *name, const double *matrix, int row, int column)
+{
+  printf ("Matrix %s has %d rows and %d columns:\n", name, row, column);
+  for (int i = 0; i < row; i++)
+{
+  for (int j = 0; j < column; j++)
+   {
+ printf ("%f ", matrix[j * row + i]);
+   }
+  printf ("\n");
+}
+  printf ("\n");
+}
+
+int
+main (int argc, char *argv[])
+{
+  int rowsA, colsB, common;
+  int i, j, k;
+  int ret = 0;
+
+  for (int t = 16; t <= 128; t += 16)
+{
+  for (int t1 = 4; t1 <= 16; t1 += 4)
+   {
+ rowsA = t;
+ colsB = t1;
+ common = 1;
+ /* printf ("Running test for rows = %d,cols = %d\n", t, t1); */
+ double A[rowsA * common];
+ double B[common * colsB];
+ double C[rowsA * colsB];
+ double D[rowsA * colsB];
+
+
+ init (A, rowsA, common);
+ init (B, common, colsB);
+ init0 (C, D, rowsA, colsB);
+ DM (rowsA, colsB, common, A, B

[gcc(refs/users/meissner/heads/work178-dmf)] RFC2653-PowerPC: Add support for 1, 024 bit DMR registers.

2024-09-12 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:eecc79a113cda76c252f56b185c413275bbd7b79

commit eecc79a113cda76c252f56b185c413275bbd7b79
Author: Michael Meissner 
Date:   Thu Sep 12 16:41:55 2024 -0400

RFC2653-PowerPC: Add support for 1,024 bit DMR registers.

This patch is a prelimianry patch to add the full 1,024 bit dense math 
register
(DMRs) for -mcpu=future.  The MMA 512-bit accumulators map onto the top of 
the
DMR register.

This patch only adds the new 1,024 bit register support.  It does not add
support for any instructions that need 1,024 bit registers instead of 512 
bit
registers.

I used the new mode 'TDOmode' to be the opaque mode used for 1,024 bit
registers.  The 'wD' constraint added in previous patches is used for these
registers.  I added support to do load and store of DMRs via the VSX 
registers,
since there are no load/store dense math instructions.  I added the new 
keyword
'__dmr' to create 1,024 bit types that can be loaded into DMRs.  At 
present, I
don't have aliases for __dmr512 and __dmr1024 that we've discussed 
internally.

The patches have been tested on both little and big endian systems.  Can I 
check
it into the master branch?

2024-09-12   Michael Meissner  

gcc/

* config/rs6000/mma.md (UNSPEC_DM_INSERT512_UPPER): New unspec.
(UNSPEC_DM_INSERT512_LOWER): Likewise.
(UNSPEC_DM_EXTRACT512): Likewise.
(UNSPEC_DMR_RELOAD_FROM_MEMORY): Likewise.
(UNSPEC_DMR_RELOAD_TO_MEMORY): Likewise.
(movtdo): New define_expand and define_insn_and_split to implement 
1,024
bit DMR registers.
(movtdo_insert512_upper): New insn.
(movtdo_insert512_lower): Likewise.
(movtdo_extract512): Likewise.
(reload_dmr_from_memory): Likewise.
(reload_dmr_to_memory): Likewise.
* config/rs6000/rs6000-builtin.cc (rs6000_type_string): Add DMR
support.
(rs6000_init_builtins): Add support for __dmr keyword.
* config/rs6000/rs6000-call.cc (rs6000_return_in_memory): Add 
support
for TDOmode.
(rs6000_function_arg): Likewise.
* config/rs6000/rs6000-modes.def (TDOmode): New mode.
* config/rs6000/rs6000.cc (rs6000_hard_regno_nregs_internal): Add
support for TDOmode.
(rs6000_hard_regno_mode_ok_uncached): Likewise.
(rs6000_hard_regno_mode_ok): Likewise.
(rs6000_modes_tieable_p): Likewise.
(rs6000_debug_reg_global): Likewise.
(rs6000_setup_reg_addr_masks): Likewise.
(rs6000_init_hard_regno_mode_ok): Add support for TDOmode.  Setup 
reload
hooks for DMR mode.
(reg_offset_addressing_ok_p): Add support for TDOmode.
(rs6000_emit_move): Likewise.
(rs6000_secondary_reload_simple_move): Likewise.
(rs6000_preferred_reload_class): Likewise.
(rs6000_secondary_reload_class): Likewise.
(rs6000_mangle_type): Add mangling for __dmr type.
(rs6000_dmr_register_move_cost): Add support for TDOmode.
(rs6000_split_multireg_move): Likewise.
(rs6000_invalid_conversion): Likewise.
* config/rs6000/rs6000.h (VECTOR_ALIGNMENT_P): Add TDOmode.
(enum rs6000_builtin_type_index): Add DMR type nodes.
(dmr_type_node): Likewise.
(ptr_dmr_type_node): Likewise.

gcc/testsuite/

* gcc.target/powerpc/dm-1024bit.c: New test.

Diff:
---
 gcc/config/rs6000/mma.md  | 154 ++
 gcc/config/rs6000/rs6000-builtin.cc   |  17 +++
 gcc/config/rs6000/rs6000-call.cc  |  10 +-
 gcc/config/rs6000/rs6000-modes.def|   4 +
 gcc/config/rs6000/rs6000.cc   | 101 -
 gcc/config/rs6000/rs6000.h|   6 +-
 gcc/testsuite/gcc.target/powerpc/dm-1024bit.c |  63 +++
 7 files changed, 321 insertions(+), 34 deletions(-)

diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md
index 2e04eb653fa6..8461499e1c3d 100644
--- a/gcc/config/rs6000/mma.md
+++ b/gcc/config/rs6000/mma.md
@@ -92,6 +92,11 @@
UNSPEC_MMA_XXMFACC
UNSPEC_MMA_XXMTACC
UNSPEC_MMA_DMSETDMRZ
+   UNSPEC_DM_INSERT512_UPPER
+   UNSPEC_DM_INSERT512_LOWER
+   UNSPEC_DM_EXTRACT512
+   UNSPEC_DMR_RELOAD_FROM_MEMORY
+   UNSPEC_DMR_RELOAD_TO_MEMORY
   ])
 
 (define_c_enum "unspecv"
@@ -793,3 +798,152 @@
 }
   [(set_attr "type" "mma")
(set_attr "prefixed" "yes")])
+
+;; TDOmode (__dmr keyword for 1,024 bit registers).
+(define_expand "movtdo"
+  [(set (match_operand:TDO 0 "nonimmediate_operand")
+   (match_operand:TDO 1 "input_operand"))]
+  "TARGET_MMA_DENSE_MATH"
+{
+  rs6000_emit_move (operands[0], operands[1], TDOmode);
+  DONE;
+})
+
+(define_insn_and_split "*movtdo"
+  [(set (match_operand:TDO

[gcc(refs/users/meissner/heads/work178-dmf)] RFC2656-Support load/store vector with right length.

2024-09-12 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:2ae87991d8a4ac874d19598afcfa72bf378c193a

commit 2ae87991d8a4ac874d19598afcfa72bf378c193a
Author: Michael Meissner 
Date:   Thu Sep 12 16:44:27 2024 -0400

RFC2656-Support load/store vector with right length.

This patch adds support for new instructions that may be added to the 
PowerPC
architecture in the future to enhance the load and store vector with length
instructions.

The current instructions (lxvl, lxvll, stxvl, and stxvll) are inconvient to 
use
since the count for the number of bytes must be in the top 8 bits of the GPR
register, instead of the bottom 8 bits.  This meant that code generating 
these
instructions typically had to do a shift left by 56 bits to get the count 
into
the right position.  In a future version of the PowerPC architecture, new
variants of these instructions might be added that expect the count to be in
the bottom 8 bits of the GPR register.  These patches add this support to 
GCC
if the user uses the -mcpu=future option.

I discovered that the code in rs6000-string.cc to generate ISA 3.1 
lxvl/stxvl
future lxvll/stxvll instructions would generate these instructions on 
32-bit.
However the patterns for these instructions is only done on 64-bit systems. 
 So
I added a check for 64-bit support before generating the instructions.

The patches have been tested on both little and big endian systems.  Can I 
check
it into the master branch?

2024-09-12   Michael Meissner  

gcc/

* config/rs6000/rs6000-string.cc (expand_block_move): Do not 
generate
lxvl and stxvl on 32-bit.
* config/rs6000/vsx.md (lxvl): If -mcpu=future, generate the lxvl 
with
the shift count automaticaly used in the insn.
(lxvrl): New insn for -mcpu=future.
(lxvrll): Likewise.
(stxvl): If -mcpu=future, generate the stxvl with the shift count
automaticaly used in the insn.
(stxvrl): New insn for -mcpu=future.
(stxvrll): Likewise.

gcc/testsuite/

* gcc.target/powerpc/lxvrl.c: New test.
* lib/target-supports.exp 
(check_effective_target_powerpc_future_ok):
New effective target.

Diff:
---
 gcc/config/rs6000/rs6000-string.cc   |   1 +
 gcc/config/rs6000/vsx.md | 122 +--
 gcc/testsuite/gcc.target/powerpc/lxvrl.c |  32 
 gcc/testsuite/lib/target-supports.exp|  12 +++
 4 files changed, 146 insertions(+), 21 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-string.cc 
b/gcc/config/rs6000/rs6000-string.cc
index 3674c4bd9847..818ff10a8ac8 100644
--- a/gcc/config/rs6000/rs6000-string.cc
+++ b/gcc/config/rs6000/rs6000-string.cc
@@ -2786,6 +2786,7 @@ expand_block_move (rtx operands[], bool might_overlap)
 
   if (TARGET_MMA && TARGET_BLOCK_OPS_UNALIGNED_VSX
  && TARGET_BLOCK_OPS_VECTOR_PAIR
+ && TARGET_POWERPC64
  && bytes >= 32
  && (align >= 256 || !STRICT_ALIGNMENT))
{
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index b2fc39acf4e8..9a082ec21958 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -5710,20 +5710,32 @@
   DONE;
 })
 
-;; Load VSX Vector with Length
+;; Load VSX Vector with Length.  If we have lxvrl, we don't have to do an
+;; explicit shift left into a pseudo.
 (define_expand "lxvl"
-  [(set (match_dup 3)
-(ashift:DI (match_operand:DI 2 "register_operand")
-   (const_int 56)))
-   (set (match_operand:V16QI 0 "vsx_register_operand")
-   (unspec:V16QI
-[(match_operand:DI 1 "gpc_reg_operand")
-  (mem:V16QI (match_dup 1))
- (match_dup 3)]
-UNSPEC_LXVL))]
+  [(use (match_operand:V16QI 0 "vsx_register_operand"))
+   (use (match_operand:DI 1 "gpc_reg_operand"))
+   (use (match_operand:DI 2 "gpc_reg_operand"))]
   "TARGET_P9_VECTOR && TARGET_64BIT"
 {
-  operands[3] = gen_reg_rtx (DImode);
+  rtx shift_len = gen_rtx_ASHIFT (DImode, operands[2], GEN_INT (56));
+  rtx len;
+
+  if (TARGET_FUTURE)
+len = shift_len;
+  else
+{
+  len = gen_reg_rtx (DImode);
+  emit_insn (gen_rtx_SET (len, shift_len));
+}
+
+  rtx dest = operands[0];
+  rtx addr = operands[1];
+  rtx mem = gen_rtx_MEM (V16QImode, addr);
+  rtvec rv = gen_rtvec (3, addr, mem, len);
+  rtx lxvl = gen_rtx_UNSPEC (V16QImode, rv, UNSPEC_LXVL);
+  emit_insn (gen_rtx_SET (dest, lxvl));
+  DONE;
 })
 
 (define_insn "*lxvl"
@@ -5747,6 +5759,34 @@
   "lxvll %x0,%1,%2"
   [(set_attr "type" "vecload")])
 
+;; For lxvrl and lxvrll, use the combiner to eliminate the shift.  The
+;; define_expand for lxvl will already incorporate the shift in generating the
+;; insn.  The lxvll buitl-in function required the user to have already done
+;; the shift.  Defining lxvrll this way, will optimize cases where the user has
+;; done the shift immediately before

[gcc(refs/users/meissner/heads/work178-dmf)] RFC2655-Add saturating subtract built-ins.

2024-09-12 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:4a9fc4dcfc36dd2e6dadc3d142d349a1b1dc22a0

commit 4a9fc4dcfc36dd2e6dadc3d142d349a1b1dc22a0
Author: Michael Meissner 
Date:   Thu Sep 12 16:45:44 2024 -0400

RFC2655-Add saturating subtract built-ins.

This patch adds support for a saturating subtract built-in function that 
may be
added to a future PowerPC processor.  Note, if it is added, the name of the
built-in function may change before GCC 13 is released.  If the name 
changes,
we will submit a patch changing the name.

I also added support for providing dense math built-in functions, even 
though
at present, we have not added any new built-in functions for dense math.  
It is
likely we will want to add new dense math built-in functions as the dense 
math
support is fleshed out.

The patches have been tested on both little and big endian systems.  Can I 
check
it into the master branch?

2024-09-12   Michael Meissner  

gcc/

* config/rs6000/rs6000-builtin.cc (rs6000_invalid_builtin): Add 
support
for flagging invalid use of future built-in functions.
(rs6000_builtin_is_supported): Add support for future built-in
functions.
* config/rs6000/rs6000-builtins.def 
(__builtin_saturate_subtract32): New
built-in function for -mcpu=future.
(__builtin_saturate_subtract64): Likewise.
* config/rs6000/rs6000-gen-builtins.cc (enum bif_stanza): Add 
stanzas
for -mcpu=future built-ins.
(stanza_map): Likewise.
(enable_string): Likewise.
(struct attrinfo): Likewise.
(parse_bif_attrs): Likewise.
(write_decls): Likewise.
* config/rs6000/rs6000.md (sat_sub3): Add saturating subtract
built-in insn declarations.
(sat_sub3_dot): Likewise.
(sat_sub3_dot2): Likewise.
* doc/extend.texi (Future PowerPC built-ins): New section.

gcc/testsuite/

* gcc.target/powerpc/subfus-1.c: New test.
* gcc.target/powerpc/subfus-2.c: Likewise.

Diff:
---
 gcc/config/rs6000/rs6000-builtin.cc | 17 
 gcc/config/rs6000/rs6000-builtins.def   | 10 +
 gcc/config/rs6000/rs6000-gen-builtins.cc| 35 ++---
 gcc/config/rs6000/rs6000.md | 60 +
 gcc/doc/extend.texi | 24 
 gcc/testsuite/gcc.target/powerpc/subfus-1.c | 32 +++
 gcc/testsuite/gcc.target/powerpc/subfus-2.c | 32 +++
 7 files changed, 205 insertions(+), 5 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-builtin.cc 
b/gcc/config/rs6000/rs6000-builtin.cc
index 8e4335e9b44f..a5f33eb9da18 100644
--- a/gcc/config/rs6000/rs6000-builtin.cc
+++ b/gcc/config/rs6000/rs6000-builtin.cc
@@ -139,6 +139,17 @@ rs6000_invalid_builtin (enum rs6000_gen_builtins fncode)
 case ENB_MMA:
   error ("%qs requires the %qs option", name, "-mmma");
   break;
+case ENB_FUTURE:
+  error ("%qs requires the %qs option", name, "-mcpu=future");
+  break;
+case ENB_FUTURE_64:
+  error ("%qs requires the %qs option and either the %qs or %qs option",
+name, "-mcpu=future", "-m64", "-mpowerpc64");
+  break;
+case ENB_DM:
+  error ("%qs requires the %qs or %qs options", name, "-mcpu=future",
+"-mdense-math");
+  break;
 default:
 case ENB_ALWAYS:
   gcc_unreachable ();
@@ -194,6 +205,12 @@ rs6000_builtin_is_supported (enum rs6000_gen_builtins 
fncode)
   return TARGET_HTM;
 case ENB_MMA:
   return TARGET_MMA;
+case ENB_FUTURE:
+  return TARGET_FUTURE;
+case ENB_FUTURE_64:
+  return TARGET_FUTURE && TARGET_POWERPC64;
+case ENB_DM:
+  return TARGET_DENSE_MATH;
 default:
   gcc_unreachable ();
 }
diff --git a/gcc/config/rs6000/rs6000-builtins.def 
b/gcc/config/rs6000/rs6000-builtins.def
index 0e9dc05dbcff..7d47dc4e402c 100644
--- a/gcc/config/rs6000/rs6000-builtins.def
+++ b/gcc/config/rs6000/rs6000-builtins.def
@@ -137,6 +137,8 @@
 ;   endian   Needs special handling for endianness
 ;   ibmldRestrict usage to the case when TFmode is IBM-128
 ;   ibm128   Restrict usage to the case where __ibm128 is supported or if ibmld
+;   future   Restrict usage to future instructions
+;   dm   Restrict usage to dense math
 ;
 ; Each attribute corresponds to extra processing required when
 ; the built-in is expanded.  All such special processing should
@@ -3933,3 +3935,11 @@
 
   void __builtin_vsx_stxvp (v256, unsigned long, const v256 *);
 STXVP nothing {mma,pair}
+
+[future]
+  const signed int __builtin_saturate_subtract32 (signed int, signed int);
+  SAT_SUBSI sat_subsi3 {}
+
+[future-64]
+  const signed long __builtin_saturate_subtract64 (signed long,  signed long);
+  SAT_SUBDI sat_subdi3 {}
diff --git a/gcc/config/rs6000/rs6000-gen-builtins.cc 
b/gcc/conf

[gcc(refs/users/meissner/heads/work178-dmf)] RFC2686-Add paddis support.

2024-09-12 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:8c9f1dc1c8fd2ab40c9140b57a5c50515271fc77

commit 8c9f1dc1c8fd2ab40c9140b57a5c50515271fc77
Author: Michael Meissner 
Date:   Thu Sep 12 16:46:36 2024 -0400

RFC2686-Add paddis support.

2024-09-12  Michael Meissner  

gcc/

* config/rs6000/constraints.md (eU): New constraint.
(eV): Likewise.
* config/rs6000/predicates.md (paddis_operand): New predicate.
(paddis_paddi_operand): Likewise.
(add_operand): Add paddis support.
* config/rs6000/rs6000.cc (num_insns_constant_gpr): Add paddis 
support.
(num_insns_constant_multi): Likewise.
(print_operand): Add %B for paddis support.
* config/rs6000/rs6000.h (TARGET_PADDIS): New macro.
(SIGNED_INTEGER_32BIT_P): Likewise.
* config/rs6000/rs6000.md (isa attribute): Add paddis support.
(enabled attribute); Likewise.
(add3): Likewise.
(adddi3 splitter): New splitter for paddis.
(movdi_internal64): Add paddis support.
(movdi splitter): New splitter for paddis.

gcc/testsuite/

* gcc.target/powerpc/prefixed-addis.c: New test.

Diff:
---
 gcc/config/rs6000/constraints.md  | 10 +++
 gcc/config/rs6000/predicates.md   | 52 +++-
 gcc/config/rs6000/rs6000.cc   | 25 ++
 gcc/config/rs6000/rs6000.h|  4 +
 gcc/config/rs6000/rs6000.md   | 96 ---
 gcc/testsuite/gcc.target/powerpc/prefixed-addis.c | 24 ++
 6 files changed, 197 insertions(+), 14 deletions(-)

diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md
index 277a30a82458..4d8d21fd6bbb 100644
--- a/gcc/config/rs6000/constraints.md
+++ b/gcc/config/rs6000/constraints.md
@@ -222,6 +222,16 @@
   "An IEEE 128-bit constant that can be loaded into VSX registers."
   (match_operand 0 "easy_vector_constant_ieee128"))
 
+(define_constraint "eU"
+  "@internal integer constant that can be loaded with paddis"
+  (and (match_code "const_int")
+   (match_operand 0 "paddis_operand")))
+
+(define_constraint "eV"
+  "@internal integer constant that can be loaded with paddis + paddi"
+  (and (match_code "const_int")
+   (match_operand 0 "paddis_paddi_operand")))
+
 ;; Floating-point constraints.  These two are defined so that insn
 ;; length attributes can be calculated exactly.
 
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index c03f87c2a19c..c849642bfc8f 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -369,6 +369,53 @@
   return SIGNED_INTEGER_34BIT_P (INTVAL (op));
 })
 
+;; Return 1 if op is a 64-bit constant that uses the paddis instruction
+(define_predicate "paddis_operand"
+  (match_code "const_int")
+{
+  if (!TARGET_PADDIS && TARGET_POWERPC64)
+return 0;
+
+  /* If addi, addis, or paddi can handle the number, don't return true.  */
+  HOST_WIDE_INT value = INTVAL (op);
+  if (SIGNED_INTEGER_34BIT_P (value))
+return false;
+
+  /* If the number is too large for padds, return false.  */
+  if (!SIGNED_INTEGER_32BIT_P (value >> 32))
+return false;
+
+  /* If the bottom 32-bits are non-zero, paddis can't handle it.  */
+  if ((value & HOST_WIDE_INT_C(0x)) != 0)
+return false;
+
+  return true;
+})
+
+;; Return 1 if op is a 64-bit constant that needs the paddis instruction and an
+;; addi/addis/paddi instruction combination.
+(define_predicate "paddis_paddi_operand"
+  (match_code "const_int")
+{
+  if (!TARGET_PADDIS && TARGET_POWERPC64)
+return 0;
+
+  /* If addi, addis, or paddi can handle the number, don't return true.  */
+  HOST_WIDE_INT value = INTVAL (op);
+  if (SIGNED_INTEGER_34BIT_P (value))
+return false;
+
+  /* If the number is too large for padds, return false.  */
+  if (!SIGNED_INTEGER_32BIT_P (value >> 32))
+return false;
+
+  /* If the bottom 32-bits are zero, we can use paddis alone to handle it.  */
+  if ((value & HOST_WIDE_INT_C(0x)) == 0)
+return false;
+
+  return true;
+})
+
 ;; Return 1 if op is a register that is not special.
 ;; Disallow (SUBREG:SF (REG:SI)) and (SUBREG:SI (REG:SF)) on VSX systems where
 ;; you need to be careful in moving a SFmode to SImode and vice versa due to
@@ -1050,7 +1097,10 @@
   (if_then_else (match_code "const_int")
 (match_test "satisfies_constraint_I (op)
 || satisfies_constraint_L (op)
-|| satisfies_constraint_eI (op)")
+|| satisfies_constraint_eI (op)
+|| satisfies_constraint_eU (op)
+|| satisfies_constraint_eV (op)")
+
 (match_operand 0 "gpc_reg_operand")))
 
 ;; Return 1 if the operand is either a non-special register, or 0, or -1.
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index a660101f51a5..8ba87a3a2814 100644
--- a/gcc/con

[gcc(refs/users/meissner/heads/work178-dmf)] RFC2677-Add xvrlw support.

2024-09-12 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:6a4bc9883d33b45e567ac0d334c78227ede83484

commit 6a4bc9883d33b45e567ac0d334c78227ede83484
Author: Michael Meissner 
Date:   Thu Sep 12 16:47:42 2024 -0400

RFC2677-Add xvrlw support.

2024-09-12  Michael Meissner  

gcc/

* config/rs6000/altivec.md (xvrlw): New insn.
* config/rs6000/rs6000.h (TARGET_XVRLW): New macro.

gcc/testsuite/

* gcc.target/powerpc/vector-rotate-left.c: New test.

Diff:
---
 gcc/config/rs6000/altivec.md   | 14 +
 gcc/config/rs6000/rs6000.h |  3 ++
 .../gcc.target/powerpc/vector-rotate-left.c| 34 ++
 3 files changed, 51 insertions(+)

diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index 1f5489b974f6..f891ccc7403a 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -1982,6 +1982,20 @@
 }
   [(set_attr "type" "vecperm")])
 
+;; -mcpu=future adds a vector rotate left word variant.  There is no vector
+;; byte/half-word/double-word/quad-word rotate left.  This insn occurs before
+;; altivec_vrl and will match for -mcpu=future, while other cpus will
+;; match the generic insn.
+(define_insn "*xvrlw"
+  [(set (match_operand:V4SI 0 "register_operand" "=v,wa")
+   (rotate:V4SI (match_operand:V4SI 1 "register_operand" "v,wa")
+(match_operand:V4SI 2 "register_operand" "v,wa")))]
+  "TARGET_XVRLW"
+  "@
+   vrlw %0,%1,%2
+   xvrlw %x0,%x1,%x2"
+  [(set_attr "type" "vecsimple")])
+
 (define_insn "altivec_vrl"
   [(set (match_operand:VI2 0 "register_operand" "=v")
 (rotate:VI2 (match_operand:VI2 1 "register_operand" "v")
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index 6a3fbc1e0fe5..c4d8e52a28a6 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -590,6 +590,9 @@ extern int rs6000_vector_align[];
 /* Whether we have PADDIS support.  */
 #define TARGET_PADDIS  TARGET_FUTURE
 
+/* Whether we have XVRLW support.  */
+#define TARGET_XVRLW   TARGET_FUTURE
+
 /* Whether the various reciprocal divide/square root estimate instructions
exist, and whether we should automatically generate code for the instruction
by default.  */
diff --git a/gcc/testsuite/gcc.target/powerpc/vector-rotate-left.c 
b/gcc/testsuite/gcc.target/powerpc/vector-rotate-left.c
new file mode 100644
index ..5a5f37755077
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vector-rotate-left.c
@@ -0,0 +1,34 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_future_ok } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-mdejagnu-cpu=future -O2" } */
+
+/* Test whether the xvrl (vector word rotate left using VSX registers insead of
+   Altivec registers is generated.  */
+
+#include 
+
+typedef vector unsigned int  v4si_t;
+
+v4si_t
+rotl_v4si_scalar (v4si_t x, unsigned long n)
+{
+  __asm__ (" # %x0" : "+f" (x));
+  return (x << n) | (x >> (32 - n));   /* xvrlw.  */
+}
+
+v4si_t
+rotr_v4si_scalar (v4si_t x, unsigned long n)
+{
+  __asm__ (" # %x0" : "+f" (x));
+  return (x >> n) | (x << (32 - n));   /* xvrlw.  */
+}
+
+v4si_t
+rotl_v4si_vector (v4si_t x, v4si_t y)
+{
+  __asm__ (" # %x0" : "+f" (x));   /* xvrlw.  */
+  return vec_rl (x, y);
+}
+
+/* { dg-final { scan-assembler-times {\mxvrlw\M} 3  } } */


[gcc(refs/users/meissner/heads/work178-dmf)] Update ChangeLog.*

2024-09-12 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:1aaa4dcb3e9afa976972eb00b57477dc3697d219

commit 1aaa4dcb3e9afa976972eb00b57477dc3697d219
Author: Michael Meissner 
Date:   Thu Sep 12 16:50:55 2024 -0400

Update ChangeLog.*

Diff:
---
 gcc/ChangeLog.dmf | 449 +-
 1 file changed, 448 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog.dmf b/gcc/ChangeLog.dmf
index d94fb1b8de24..b25bcd101e02 100644
--- a/gcc/ChangeLog.dmf
+++ b/gcc/ChangeLog.dmf
@@ -1,6 +1,453 @@
+ Branch work178-dmf, patch #113 
+
+RFC2677-Add xvrlw support.
+
+2024-09-04  Michael Meissner  
+
+gcc/
+
+   * config/rs6000/altivec.md (xvrlw): New insn.
+   * config/rs6000/rs6000.h (TARGET_XVRLW): New macro.
+
+gcc/testsuite/
+
+   * gcc.target/powerpc/vector-rotate-left.c: New test.
+
+ Branch work178-dmf, patch #112 
+
+RFC2686-Add paddis support.
+
+2024-09-04  Michael Meissner  
+
+gcc/
+
+   * config/rs6000/constraints.md (eU): New constraint.
+   (eV): Likewise.
+   * config/rs6000/predicates.md (paddis_operand): New predicate.
+   (paddis_paddi_operand): Likewise.
+   (add_operand): Add paddis support.
+   * config/rs6000/rs6000.cc (num_insns_constant_gpr): Add paddis support.
+   (num_insns_constant_multi): Likewise.
+   (print_operand): Add %B for paddis support.
+   * config/rs6000/rs6000.h (TARGET_PADDIS): New macro.
+   (SIGNED_INTEGER_32BIT_P): Likewise.
+   * config/rs6000/rs6000.md (isa attribute): Add paddis support.
+   (enabled attribute); Likewise.
+   (add3): Likewise.
+   (adddi3 splitter): New splitter for paddis.
+   (movdi_internal64): Add paddis support.
+   (movdi splitter): New splitter for paddis.
+
+gcc/testsuite/
+
+   * gcc.target/powerpc/prefixed-addis.c: New test.
+
+ Branch work178-dmf, patch #111 
+
+RFC2655-Add saturating subtract built-ins.
+
+This patch adds support for a saturating subtract built-in function that may be
+added to a future PowerPC processor.  Note, if it is added, the name of the
+built-in function may change before GCC 13 is released.  If the name changes,
+we will submit a patch changing the name.
+
+I also added support for providing dense math built-in functions, even though
+at present, we have not added any new built-in functions for dense math.  It is
+likely we will want to add new dense math built-in functions as the dense math
+support is fleshed out.
+
+The patches have been tested on both little and big endian systems.  Can I 
check
+it into the master branch?
+
+2024-09-04   Michael Meissner  
+
+gcc/
+
+   * config/rs6000/rs6000-builtin.cc (rs6000_invalid_builtin): Add support
+   for flagging invalid use of future built-in functions.
+   (rs6000_builtin_is_supported): Add support for future built-in
+   functions.
+   * config/rs6000/rs6000-builtins.def (__builtin_saturate_subtract32): New
+   built-in function for -mcpu=future.
+   (__builtin_saturate_subtract64): Likewise.
+   * config/rs6000/rs6000-gen-builtins.cc (enum bif_stanza): Add stanzas
+   for -mcpu=future built-ins.
+   (stanza_map): Likewise.
+   (enable_string): Likewise.
+   (struct attrinfo): Likewise.
+   (parse_bif_attrs): Likewise.
+   (write_decls): Likewise.
+   * config/rs6000/rs6000.md (sat_sub3): Add saturating subtract
+   built-in insn declarations.
+   (sat_sub3_dot): Likewise.
+   (sat_sub3_dot2): Likewise.
+   * doc/extend.texi (Future PowerPC built-ins): New section.
+
+gcc/testsuite/
+
+   * gcc.target/powerpc/subfus-1.c: New test.
+   * gcc.target/powerpc/subfus-2.c: Likewise.
+
+ Branch work178-dmf, patch #110 
+
+RFC2656-Support load/store vector with right length.
+
+This patch adds support for new instructions that may be added to the PowerPC
+architecture in the future to enhance the load and store vector with length
+instructions.
+
+The current instructions (lxvl, lxvll, stxvl, and stxvll) are inconvient to use
+since the count for the number of bytes must be in the top 8 bits of the GPR
+register, instead of the bottom 8 bits.  This meant that code generating these
+instructions typically had to do a shift left by 56 bits to get the count into
+the right position.  In a future version of the PowerPC architecture, new
+variants of these instructions might be added that expect the count to be in
+the bottom 8 bits of the GPR register.  These patches add this support to GCC
+if the user uses the -mcpu=future option.
+
+I discovered that the code in rs6000-string.cc to generate ISA 3.1 lxvl/stxvl
+future lxvll/stxvll instructions would generate these instructions on 32-bit.
+However the patterns for these instructions is only done on 64-bit systems.  So
+I added a check for 64-bit support before generating the instructions.
+
+The patche

[gcc(refs/users/aoliva/heads/testme)] assorted improvements for fold_truth_andor_1

2024-09-12 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:5c5b83b5ad497638c9bf250d2ea33d67d758cc77

commit 5c5b83b5ad497638c9bf250d2ea33d67d758cc77
Author: Alexandre Oliva 
Date:   Tue Sep 29 04:08:46 2020 -0300

assorted improvements for fold_truth_andor_1

This patch introduces various improvements to the logic that merges
field compares.

Before the patch, we could merge:

  (a.x1 EQNE b.x1)  ANDOR  (a.y1 EQNE b.y1)

into something like:

  (((type *)&a)[Na] & MASK) EQNE (((type *)&b)[Nb] & MASK)

if both of A's fields live within the same alignment boundaries, and
so do B's, at the same relative positions.  Constants may be used
instead of the object B.

The initial goal of this patch was to enable such combinations when a
field crossed alignment boundaries, e.g. for packed types.  We can't
generally access such fields with a single memory access, so when we
come across such a compare, we will attempt to combine each access
separately.

Some merging opportunities were missed because of right-shifts,
compares expressed as e.g. ((a.x1 ^ b.x1) & MASK) EQNE 0, and
narrowing conversions, especially after earlier merges.  This patch
introduces handlers for several cases involving these.

Other merging opportunities were missed because of association.  The
existing logic would only succeed in merging a pair of consecutive
compares, or e.g. B with C in (A ANDOR B) ANDOR C, not even trying
e.g. C and D in (A ANDOR (B ANDOR C)) ANDOR D.  I've generalized the
handling of the rightmost compare in the left-hand operand, going for
the leftmost compare in the right-hand operand, and then onto trying
to merge compares pairwise, one from each operand, even if they are
not consecutive, taking care to avoid merging operations with
intervening side effects, including volatile accesses.

When it is the second of a non-consecutive pair of compares that first
accesses a word, we may merge the first compare with part of the
second compare that refers to the same word, keeping the compare of
the remaining bits at the spot where the second compare used to be.

Handling compares with non-constant fields was somewhat generalized,
now handling non-adjacent fields.  When a field of one object crosses
an alignment boundary but the other doesn't, we issue the same load in
both compares; gimple optimizers will later turn it into a single
load, without our having to handle SAVE_EXPRs at this point.

The logic for issuing split loads and compares, and ordering them, is
now shared between all cases of compares with constants and with
another object.

The -Wno-error for toplev.o on rs6000 is because of toplev.c's:

  if ((flag_sanitize & SANITIZE_ADDRESS)
  && !FRAME_GROWS_DOWNWARD)

and rs6000.h's:

#define FRAME_GROWS_DOWNWARD (flag_stack_protect != 0   \
  || (flag_sanitize & SANITIZE_ADDRESS) != 0)

The mutually exclusive conditions involving flag_sanitize are now
noticed and reported by fold-const.c's:

  warning (0,
   "% of mutually exclusive equal-tests"
   " is always 0");

This patch enables over 12k compare-merging opportunities that we used
to miss in a GCC bootstrap.


for  gcc/ChangeLog

* fold-const.cc (prepare_xor): New.
(decode_field_reference): Handle xor, shift, and narrowing
conversions.
(all_ones_mask_p): Remove.
(compute_split_boundary_from_align): New.
(build_split_load, reuse_split_load): New.
(fold_truth_andor_1): Add recursion to combine pairs of
non-neighboring compares.  Handle xor compared with zero.
Handle fields straddling across alignment boundaries.
Generalize handling of non-constant rhs.
(fold_truth_andor): Leave sub-expression handling to the
recursion above.
* config/rs6000/t-rs6000 (toplev.o-warn): Disable errors.

for  gcc/testsuite/ChangeLog

* gcc.dg/field-merge-1.c: New.
* gcc.dg/field-merge-2.c: New.
* gcc.dg/field-merge-3.c: New.
* gcc.dg/field-merge-4.c: New.
* gcc.dg/field-merge-5.c: New.

Diff:
---
 gcc/config/rs6000/t-rs6000   |   4 +
 gcc/fold-const.cc| 818 ---
 gcc/testsuite/gcc.dg/field-merge-1.c |  64 +++
 gcc/testsuite/gcc.dg/field-merge-2.c |  31 ++
 gcc/testsuite/gcc.dg/field-merge-3.c |  36 ++
 gcc/testsuite/gcc.dg/field-merge-4.c |  40 ++
 gcc/testsuite/gcc.dg/field-merge-5.c |  40 ++
 7 files changed, 881 insertions(+), 152 deletions(-)

diff --git a/gcc/config/rs6000/t-rs6000 b/gcc/config/rs6000/t-rs6000
index 155788de40a3..a83968d663a6 

[gcc(refs/users/aoliva/heads/testme)] testsuite: a few more hostedlib adjustments

2024-09-12 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:6af7fe931d949dbf453fed41d2198abe2abd766c

commit 6af7fe931d949dbf453fed41d2198abe2abd766c
Author: Alexandre Oliva 
Date:   Thu Sep 12 20:03:53 2024 -0300

testsuite: a few more hostedlib adjustments

This adjusts some recently-added tests that won't compile without a
hostedlib libstdc++, missed in the patch that just went in, and also
an old test that I'd missed because it also failed in my baseline.


for  gcc/testsuite/ChangeLog

* g++.dg/coroutines/pr108620.C: Skip if !hostedlib because of
unavailable headers.
* g++.dg/other/profile1.C: Likewise.
* g+.dg/ext/pragma-unroll-lambda-lto.C: Skip if !hostedlib
because of unavailable declarations.

Diff:
---
 gcc/testsuite/g++.dg/coroutines/pr108620.C  | 2 ++
 gcc/testsuite/g++.dg/ext/pragma-unroll-lambda-lto.C | 1 +
 gcc/testsuite/g++.dg/other/profile1.C   | 1 +
 3 files changed, 4 insertions(+)

diff --git a/gcc/testsuite/g++.dg/coroutines/pr108620.C 
b/gcc/testsuite/g++.dg/coroutines/pr108620.C
index e8016b9f8a23..22bf0c18bac4 100644
--- a/gcc/testsuite/g++.dg/coroutines/pr108620.C
+++ b/gcc/testsuite/g++.dg/coroutines/pr108620.C
@@ -1,3 +1,5 @@
+// { dg-skip-if "requires hosted libstdc++ for iostream" { ! hostedlib } }
+
 // https://gcc.gnu.org/PR108620
 #include 
 #include 
diff --git a/gcc/testsuite/g++.dg/ext/pragma-unroll-lambda-lto.C 
b/gcc/testsuite/g++.dg/ext/pragma-unroll-lambda-lto.C
index 144c4c326924..64cdf90f34d3 100644
--- a/gcc/testsuite/g++.dg/ext/pragma-unroll-lambda-lto.C
+++ b/gcc/testsuite/g++.dg/ext/pragma-unroll-lambda-lto.C
@@ -1,5 +1,6 @@
 // { dg-do link { target c++11 } }
 // { dg-options "-O2 -flto -fdump-rtl-loop2_unroll" }
+// { dg-skip-if "requires hosted libstdc++ for cstdlib rand" { ! hostedlib } }
 
 #include 
 
diff --git a/gcc/testsuite/g++.dg/other/profile1.C 
b/gcc/testsuite/g++.dg/other/profile1.C
index a4bf6b3d0fea..99844373189e 100644
--- a/gcc/testsuite/g++.dg/other/profile1.C
+++ b/gcc/testsuite/g++.dg/other/profile1.C
@@ -2,6 +2,7 @@
 // { dg-do run }
 // { dg-require-profiling "" }
 // { dg-options "-fnon-call-exceptions -fprofile-arcs" }
+// { dg-skip-if "requires hosted libstdc++ for string" { ! hostedlib } }
 
 #include 


[gcc/aoliva/heads/testme] (2 commits) assorted improvements for fold_truth_andor_1

2024-09-12 Thread Alexandre Oliva via Gcc-cvs
The branch 'aoliva/heads/testme' was updated to point to:

 66212571a719... assorted improvements for fold_truth_andor_1

It previously pointed to:

 6af7fe931d94... testsuite: a few more hostedlib adjustments

Diff:

!!! WARNING: THE FOLLOWING COMMITS ARE NO LONGER ACCESSIBLE (LOST):
---

  6af7fe9... testsuite: a few more hostedlib adjustments
  5c5b83b... assorted improvements for fold_truth_andor_1


Summary of changes (added commits):
---

  6621257... assorted improvements for fold_truth_andor_1
  90eb457... testsuite: a few more hostedlib adjustments


[gcc(refs/users/aoliva/heads/testme)] testsuite: a few more hostedlib adjustments

2024-09-12 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:90eb457e05f73d5bd81beb3419b5803bb68491b1

commit 90eb457e05f73d5bd81beb3419b5803bb68491b1
Author: Alexandre Oliva 
Date:   Thu Sep 12 20:03:53 2024 -0300

testsuite: a few more hostedlib adjustments

This adjusts some recently-added tests that won't compile without a
hostedlib libstdc++, missed in the patch that just went in, and also
an old test that I'd missed because it also failed in my baseline.


for  gcc/testsuite/ChangeLog

* g++.dg/coroutines/pr108620.C: Skip if !hostedlib because of
unavailable headers.
* g++.dg/other/profile1.C: Likewise.
* g+.dg/ext/pragma-unroll-lambda-lto.C: Skip if !hostedlib
because of unavailable declarations.

Diff:
---
 gcc/testsuite/g++.dg/coroutines/pr108620.C  | 2 ++
 gcc/testsuite/g++.dg/ext/pragma-unroll-lambda-lto.C | 1 +
 gcc/testsuite/g++.dg/other/profile1.C   | 1 +
 3 files changed, 4 insertions(+)

diff --git a/gcc/testsuite/g++.dg/coroutines/pr108620.C 
b/gcc/testsuite/g++.dg/coroutines/pr108620.C
index e8016b9f8a23..22bf0c18bac4 100644
--- a/gcc/testsuite/g++.dg/coroutines/pr108620.C
+++ b/gcc/testsuite/g++.dg/coroutines/pr108620.C
@@ -1,3 +1,5 @@
+// { dg-skip-if "requires hosted libstdc++ for iostream" { ! hostedlib } }
+
 // https://gcc.gnu.org/PR108620
 #include 
 #include 
diff --git a/gcc/testsuite/g++.dg/ext/pragma-unroll-lambda-lto.C 
b/gcc/testsuite/g++.dg/ext/pragma-unroll-lambda-lto.C
index 144c4c326924..64cdf90f34d3 100644
--- a/gcc/testsuite/g++.dg/ext/pragma-unroll-lambda-lto.C
+++ b/gcc/testsuite/g++.dg/ext/pragma-unroll-lambda-lto.C
@@ -1,5 +1,6 @@
 // { dg-do link { target c++11 } }
 // { dg-options "-O2 -flto -fdump-rtl-loop2_unroll" }
+// { dg-skip-if "requires hosted libstdc++ for cstdlib rand" { ! hostedlib } }
 
 #include 
 
diff --git a/gcc/testsuite/g++.dg/other/profile1.C 
b/gcc/testsuite/g++.dg/other/profile1.C
index a4bf6b3d0fea..99844373189e 100644
--- a/gcc/testsuite/g++.dg/other/profile1.C
+++ b/gcc/testsuite/g++.dg/other/profile1.C
@@ -2,6 +2,7 @@
 // { dg-do run }
 // { dg-require-profiling "" }
 // { dg-options "-fnon-call-exceptions -fprofile-arcs" }
+// { dg-skip-if "requires hosted libstdc++ for string" { ! hostedlib } }
 
 #include 


[gcc(refs/users/aoliva/heads/testme)] assorted improvements for fold_truth_andor_1

2024-09-12 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:66212571a719e4cfcedb59103eb8fd7dc52e84f8

commit 66212571a719e4cfcedb59103eb8fd7dc52e84f8
Author: Alexandre Oliva 
Date:   Tue Sep 29 04:08:46 2020 -0300

assorted improvements for fold_truth_andor_1

This patch introduces various improvements to the logic that merges
field compares.

Before the patch, we could merge:

  (a.x1 EQNE b.x1)  ANDOR  (a.y1 EQNE b.y1)

into something like:

  (((type *)&a)[Na] & MASK) EQNE (((type *)&b)[Nb] & MASK)

if both of A's fields live within the same alignment boundaries, and
so do B's, at the same relative positions.  Constants may be used
instead of the object B.

The initial goal of this patch was to enable such combinations when a
field crossed alignment boundaries, e.g. for packed types.  We can't
generally access such fields with a single memory access, so when we
come across such a compare, we will attempt to combine each access
separately.

Some merging opportunities were missed because of right-shifts,
compares expressed as e.g. ((a.x1 ^ b.x1) & MASK) EQNE 0, and
narrowing conversions, especially after earlier merges.  This patch
introduces handlers for several cases involving these.

Other merging opportunities were missed because of association.  The
existing logic would only succeed in merging a pair of consecutive
compares, or e.g. B with C in (A ANDOR B) ANDOR C, not even trying
e.g. C and D in (A ANDOR (B ANDOR C)) ANDOR D.  I've generalized the
handling of the rightmost compare in the left-hand operand, going for
the leftmost compare in the right-hand operand, and then onto trying
to merge compares pairwise, one from each operand, even if they are
not consecutive, taking care to avoid merging operations with
intervening side effects, including volatile accesses.

When it is the second of a non-consecutive pair of compares that first
accesses a word, we may merge the first compare with part of the
second compare that refers to the same word, keeping the compare of
the remaining bits at the spot where the second compare used to be.

Handling compares with non-constant fields was somewhat generalized,
now handling non-adjacent fields.  When a field of one object crosses
an alignment boundary but the other doesn't, we issue the same load in
both compares; gimple optimizers will later turn it into a single
load, without our having to handle SAVE_EXPRs at this point.

The logic for issuing split loads and compares, and ordering them, is
now shared between all cases of compares with constants and with
another object.

The -Wno-error for toplev.o on rs6000 is because of toplev.c's:

  if ((flag_sanitize & SANITIZE_ADDRESS)
  && !FRAME_GROWS_DOWNWARD)

and rs6000.h's:

#define FRAME_GROWS_DOWNWARD (flag_stack_protect != 0   \
  || (flag_sanitize & SANITIZE_ADDRESS) != 0)

The mutually exclusive conditions involving flag_sanitize are now
noticed and reported by fold-const.c's:

  warning (0,
   "% of mutually exclusive equal-tests"
   " is always 0");

This patch enables over 12k compare-merging opportunities that we used
to miss in a GCC bootstrap.


for  gcc/ChangeLog

* fold-const.cc (prepare_xor): New.
(decode_field_reference): Handle xor, shift, and narrowing
conversions.
(all_ones_mask_p): Remove.
(compute_split_boundary_from_align): New.
(build_split_load, reuse_split_load): New.
(fold_truth_andor_1): Add recursion to combine pairs of
non-neighboring compares.  Handle xor compared with zero.
Handle fields straddling across alignment boundaries.
Generalize handling of non-constant rhs.
(fold_truth_andor): Leave sub-expression handling to the
recursion above.
* config/rs6000/t-rs6000 (toplev.o-warn): Disable errors.

for  gcc/testsuite/ChangeLog

* gcc.dg/field-merge-1.c: New.
* gcc.dg/field-merge-2.c: New.
* gcc.dg/field-merge-3.c: New.
* gcc.dg/field-merge-4.c: New.
* gcc.dg/field-merge-5.c: New.

Diff:
---
 gcc/config/rs6000/t-rs6000   |   4 +
 gcc/fold-const.cc| 818 ---
 gcc/testsuite/gcc.dg/field-merge-1.c |  64 +++
 gcc/testsuite/gcc.dg/field-merge-2.c |  31 ++
 gcc/testsuite/gcc.dg/field-merge-3.c |  36 ++
 gcc/testsuite/gcc.dg/field-merge-4.c |  40 ++
 gcc/testsuite/gcc.dg/field-merge-5.c |  40 ++
 7 files changed, 881 insertions(+), 152 deletions(-)

diff --git a/gcc/config/rs6000/t-rs6000 b/gcc/config/rs6000/t-rs6000
index 155788de40a3..a83968d663a6 

[gcc r15-3618] Daily bump.

2024-09-12 Thread GCC Administrator via Gcc-cvs
https://gcc.gnu.org/g:3d021a024b793129a95a4c0b5c461885badea832

commit r15-3618-g3d021a024b793129a95a4c0b5c461885badea832
Author: GCC Administrator 
Date:   Fri Sep 13 00:18:06 2024 +

Daily bump.

Diff:
---
 ChangeLog   |4 +
 gcc/ChangeLog   |   92 
 gcc/DATESTAMP   |2 +-
 gcc/c-family/ChangeLog  |   14 +
 gcc/cp/ChangeLog|   41 ++
 gcc/fortran/ChangeLog   |4 +
 gcc/testsuite/ChangeLog | 1141 +++
 libcpp/ChangeLog|  120 +
 libstdc++-v3/ChangeLog  |   11 +
 9 files changed, 1428 insertions(+), 1 deletion(-)

diff --git a/ChangeLog b/ChangeLog
index fee764caabf4..b1f2d7eb0a9f 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,7 @@
+2024-09-12  YunQiang Su  
+
+   * .gitignore: Add .vscode.
+
 2024-09-04  Marc Poulhiès  
 
* configure: Regenerate.
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index ec0821e52998..301cf5895ec8 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,95 @@
+2024-09-12  Alexandre Oliva  
+   Olivier Hainque  
+
+   * doc/sourcebuild.texi (hostedlib): New effective target.
+
+2024-09-12  Uros Bizjak  
+
+   PR target/112600
+   * config/i386/mmx.md (3): Rename
+   from *3.
+
+2024-09-12  Jakub Jelinek  
+
+   * doc/cpp.texi (Binary Resource Inclusion): Document gnu::base64
+   parameter.
+
+2024-09-12  Richard Earnshaw  
+
+   * config/arm/arm.h (OPTION_DEFAULT_SPECS): Allow -mcpu and -march
+   to be unset.
+   (ARCH_CPU_CLEANUP_SPECS): Likewise
+   (DRIVER_SELF_SPECS): Add ARCH_CPU_CLEANUP_SPECS
+   * doc/invoke.texi (arm: -mcpu= and -march=): Document use of 'unset'.
+
+2024-09-12  Uros Bizjak  
+
+   * config/i386/i386.md (*insvti_lowpart_1): Use "o" constraint
+   instead of "m" for double-word mode memory operands.
+
+2024-09-12  Stefan Schulze Frielinghaus  
+
+   * config/s390/s390-protos.h (s390_gen_lowpart_subreg): Remove.
+   * config/s390/s390.cc (s390_gen_lowpart_subreg): Remove.
+   (s390_expand_insv): Use adjust_address() and emit a
+   strict_low_part only in case of a natural subreg.
+   * config/s390/s390.md: Use gen_lowpart() instead of
+   s390_gen_lowpart_subreg().
+
+2024-09-12  Richard Biener  
+
+   * tree-vect-slp.cc (vect_slp_analyze_operations): When
+   doing loop analysis fail after the first failed SLP
+   instance.  Only remove instances when doing BB vectorization.
+   * tree-vect-loop.cc (vect_analyze_loop_2): Check whether
+   vect_slp_analyze_operations failed instead of checking
+   the number of SLP instances remaining.
+
+2024-09-12  Jakub Jelinek  
+
+   * doc/cpp.texi (Binary Resource Inclusion): Document gnu::offset
+   #embed parameter.
+
+2024-09-12  Jakub Jelinek  
+
+   PR c/105863
+   * doc/cppdiropts.texi (--embed-dir=): Document.
+   * doc/cpp.texi (Binary Resource Inclusion): New chapter.
+   (__has_embed): Document.
+   * doc/invoke.texi (Directory Options): Mention --embed-dir=.
+   * gcc.cc (cpp_unique_options): Add %{-embed*}.
+   * genmatch.cc (main): Adjust cpp_set_include_chains caller.
+   * incpath.h (enum incpath_kind): Add INC_EMBED.
+   * incpath.cc (merge_include_chains): Handle INC_EMBED.
+   (register_include_chains): Adjust cpp_set_include_chains caller.
+
+2024-09-12  Richard Biener  
+
+   * tree-vect-slp.cc (vect_build_slp_tree_2): On reassociation
+   chain length mismatch do not fail discovery of the node
+   but try without re-associating to compute a better matches[].
+   Provide a reassociation failure hint in the dump.
+   (vect_slp_analyze_node_operations): Avoid stray failure
+   dumping.
+   (vectorizable_slp_permutation_1): Dump the address of the
+   SLP node representing the permutation.
+
+2024-09-12  Levy Hsu  
+
+   * config/i386/i386.cc (ix86_get_mask_mode):
+   Enable BFmode for targetm.vectorize.get_mask_mode with AVX10.2.
+   * config/i386/mmx.md (vec_cmpqi):
+   Implement vec_cmpv2bfqi and vec_cmpv4bfqi.
+
+2024-09-12  Bohan Lei  
+
+   * config/riscv/riscv-vsetvl.cc (pre_vsetvl::fuse_local_vsetvl_info):
+   Delete vsetvl insn when `prev_info` is compatible
+
+2024-09-12  garthlei  
+
+   * config/riscv/riscv-vsetvl.cc: Use `dest_vl` for dest VL operand
+
 2024-09-11  Martin Jambor  
 
* ipa-cp.cc (propagate_vr_across_jump_function): Use
diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP
index 8dcf10768ee4..bc2400990747 100644
--- a/gcc/DATESTAMP
+++ b/gcc/DATESTAMP
@@ -1 +1 @@
-20240912
+20240913
diff --git a/gcc/c-family/ChangeLog b/gcc/c-family/ChangeLog
index 4ca7193b6bc1..d0dbaa141454 100644
--- a/gcc/c-family/ChangeLog
+++ b/gcc/c-family/ChangeLog
@@ -1,3 +1,17 @@
+2024-09-12  Jason Merrill  
+
+   * c.opt: Add CppReason for Wc++{14,17,20,23}-extensions.
+   * c-pragma.cc (handle_pragma

[gcc r12-10707] Daily bump.

2024-09-12 Thread GCC Administrator via Gcc-cvs
https://gcc.gnu.org/g:0344276a0015415e2076c79e2f56c980efae004f

commit r12-10707-g0344276a0015415e2076c79e2f56c980efae004f
Author: GCC Administrator 
Date:   Fri Sep 13 00:19:01 2024 +

Daily bump.

Diff:
---
 gcc/DATESTAMP | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP
index 8dcf10768ee4..bc2400990747 100644
--- a/gcc/DATESTAMP
+++ b/gcc/DATESTAMP
@@ -1 +1 @@
-20240912
+20240913


[gcc r13-9018] Daily bump.

2024-09-12 Thread GCC Administrator via Gcc-cvs
https://gcc.gnu.org/g:ff8421191a8cc6a15691a38d1e40ad3173f5ce88

commit r13-9018-gff8421191a8cc6a15691a38d1e40ad3173f5ce88
Author: GCC Administrator 
Date:   Fri Sep 13 00:21:03 2024 +

Daily bump.

Diff:
---
 gcc/DATESTAMP | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP
index 8dcf10768ee4..bc2400990747 100644
--- a/gcc/DATESTAMP
+++ b/gcc/DATESTAMP
@@ -1 +1 @@
-20240912
+20240913


[gcc r14-10669] Daily bump.

2024-09-12 Thread GCC Administrator via Gcc-cvs
https://gcc.gnu.org/g:890a26bb95c7db41c99dbb6695a661b6d3675e9d

commit r14-10669-g890a26bb95c7db41c99dbb6695a661b6d3675e9d
Author: GCC Administrator 
Date:   Fri Sep 13 00:21:45 2024 +

Daily bump.

Diff:
---
 gcc/DATESTAMP   |  2 +-
 gcc/cp/ChangeLog| 29 +
 gcc/testsuite/ChangeLog | 24 
 libiberty/ChangeLog | 24 
 4 files changed, 78 insertions(+), 1 deletion(-)

diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP
index 8dcf10768ee4..bc2400990747 100644
--- a/gcc/DATESTAMP
+++ b/gcc/DATESTAMP
@@ -1 +1 @@
-20240912
+20240913
diff --git a/gcc/cp/ChangeLog b/gcc/cp/ChangeLog
index c54bdfddd516..b1ef6d1bf3b6 100644
--- a/gcc/cp/ChangeLog
+++ b/gcc/cp/ChangeLog
@@ -1,3 +1,32 @@
+2024-09-12  Marek Polacek  
+
+   Backported from master:
+   2024-09-12  Marek Polacek  
+
+   PR c++/96097
+   * pt.cc (coerce_template_template_parm): Increment
+   processing_template_decl before calling tsubst.
+
+2024-09-12  Jakub Jelinek  
+
+   Backported from master:
+   2024-09-12  Jakub Jelinek  
+
+   PR c++/116636
+   * method.cc: Include decl.h.
+   (use_thunk): Temporarily change deprecated_state to
+   UNAVAILABLE_DEPRECATED_SUPPRESS.
+
+2024-09-12  Jakub Jelinek  
+
+   Backported from master:
+   2024-09-10  Jakub Jelinek  
+
+   PR c++/116449
+   * typeck.cc (get_member_function_from_ptrfunc): Use save_expr
+   on instance_ptr and function even if it doesn't have side-effects,
+   as long as it isn't a decl.
+
 2024-09-08  Patrick Palka  
 
Backported from master:
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index a2096eaa0c9f..bc0576f02bb7 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,27 @@
+2024-09-12  Marek Polacek  
+
+   Backported from master:
+   2024-09-12  Marek Polacek  
+
+   PR c++/96097
+   * g++.dg/template/ttp44.C: New test.
+
+2024-09-12  Jakub Jelinek  
+
+   Backported from master:
+   2024-09-12  Jakub Jelinek  
+
+   PR c++/116636
+   * g++.dg/warn/deprecated-19.C: New test.
+
+2024-09-12  Jakub Jelinek  
+
+   Backported from master:
+   2024-09-10  Jakub Jelinek  
+
+   PR c++/116449
+   * g++.dg/ubsan/pr116449.C: New test.
+
 2024-09-08  Patrick Palka  
 
Backported from master:
diff --git a/libiberty/ChangeLog b/libiberty/ChangeLog
index 8f4eca39bda4..205fd58759ae 100644
--- a/libiberty/ChangeLog
+++ b/libiberty/ChangeLog
@@ -1,3 +1,27 @@
+2024-09-12  Jakub Jelinek  
+
+   Backported from master:
+   2024-09-07  Jakub Jelinek  
+
+   PR lto/116614
+   * simple-object-elf.c (SHN_COMMON): Align comment with neighbouring
+   comments.
+   (SHN_HIRESERVE): Use uppercase hex digits instead of lowercase for
+   consistency.
+   (simple_object_elf_find_sections): Formatting fixes.
+   (simple_object_elf_fetch_attributes): Likewise.
+   (simple_object_elf_attributes_merge): Likewise.
+   (simple_object_elf_start_write): Likewise.
+   (simple_object_elf_write_ehdr): Likewise.
+   (simple_object_elf_write_shdr): Likewise.
+   (simple_object_elf_write_to_file): Likewise.
+   (simple_object_elf_copy_lto_debug_section): Likewise.  Don't fail for
+   new_i - 1 >= SHN_LORESERVE, instead arrange in that case to copy
+   over .symtab_shndx sections, though emit those last and compute their
+   section content when processing associated .symtab sections.  Handle
+   simple_object_internal_read failure even in the .symtab_shndx reading
+   case.
+
 2024-08-01  Release Manager
 
* GCC 14.2.0 released.


[gcc(refs/users/aoliva/heads/testme)] rework truth_andor folding into tree-ssa-ifcombine

2024-09-12 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:e4f6196e7a16de0ceb4b9f4b68993a1f8454a7fc

commit e4f6196e7a16de0ceb4b9f4b68993a1f8454a7fc
Author: Alexandre Oliva 
Date:   Tue Sep 29 12:55:20 2020 -0300

rework truth_andor folding into tree-ssa-ifcombine

Diff:
---
 gcc/fold-const.cc | 1048 +
 gcc/gimple-fold.cc| 1149 +
 gcc/tree-ssa-ifcombine.cc |7 +-
 3 files changed, 1170 insertions(+), 1034 deletions(-)

diff --git a/gcc/fold-const.cc b/gcc/fold-const.cc
index 81814de5b04b..19824e6a477f 100644
--- a/gcc/fold-const.cc
+++ b/gcc/fold-const.cc
@@ -137,7 +137,6 @@ static tree range_successor (tree);
 static tree fold_range_test (location_t, enum tree_code, tree, tree, tree);
 static tree fold_cond_expr_with_comparison (location_t, tree, enum tree_code,
tree, tree, tree, tree);
-static tree unextend (tree, int, int, tree);
 static tree extract_muldiv (tree, tree, enum tree_code, tree, bool *);
 static tree extract_muldiv_1 (tree, tree, enum tree_code, tree, bool *);
 static tree fold_binary_op_with_conditional_arg (location_t,
@@ -4695,7 +4694,7 @@ invert_truthvalue_loc (location_t loc, tree arg)
is the original memory reference used to preserve the alias set of
the access.  */
 
-static tree
+tree
 make_bit_field_ref (location_t loc, tree inner, tree orig_inner, tree type,
HOST_WIDE_INT bitsize, poly_int64 bitpos,
int unsignedp, int reversep)
@@ -4945,212 +4944,6 @@ optimize_bit_field_compare (location_t loc, enum 
tree_code code,
   return lhs;
 }
 
-/* If *R_ARG is a constant zero, and L_ARG is a possibly masked
-   BIT_XOR_EXPR, return 1 and set *r_arg to l_arg.
-   Otherwise, return 0.
-
-   The returned value should be passed to decode_field_reference for it
-   to handle l_arg, and then doubled for r_arg.  */
-static int
-prepare_xor (tree l_arg, tree *r_arg)
-{
-  int ret = 0;
-
-  if (!integer_zerop (*r_arg))
-return ret;
-
-  tree exp = l_arg;
-  STRIP_NOPS (exp);
-
-  if (TREE_CODE (exp) == BIT_AND_EXPR)
-{
-  tree and_mask = TREE_OPERAND (exp, 1);
-  exp = TREE_OPERAND (exp, 0);
-  STRIP_NOPS (exp); STRIP_NOPS (and_mask);
-  if (TREE_CODE (and_mask) != INTEGER_CST)
-   return ret;
-}
-
-  if (TREE_CODE (exp) == BIT_XOR_EXPR)
-{
-  *r_arg = l_arg;
-  return 1;
-}
-
-  return ret;
-}
-
-/* Subroutine for fold_truth_andor_1: decode a field reference.
-
-   If EXP is a comparison reference, we return the innermost reference.
-
-   *PBITSIZE is set to the number of bits in the reference, *PBITPOS is
-   set to the starting bit number.
-
-   If the innermost field can be completely contained in a mode-sized
-   unit, *PMODE is set to that mode.  Otherwise, it is set to VOIDmode.
-
-   *PVOLATILEP is set to 1 if the any expression encountered is volatile;
-   otherwise it is not changed.
-
-   *PUNSIGNEDP is set to the signedness of the field.
-
-   *PREVERSEP is set to the storage order of the field.
-
-   *PMASK is set to the mask used.  This is either contained in a
-   BIT_AND_EXPR or derived from the width of the field.
-
-   *PAND_MASK is set to the mask found in a BIT_AND_EXPR, if any.
-
-   XOR_WHICH is 1 or 2 if EXP was found to be a (possibly masked)
-   BIT_XOR_EXPR compared with zero.  We're to take the first or second
-   operand thereof if so.  It should be zero otherwise.
-
-   Return 0 if this is not a component reference or is one that we can't
-   do anything with.  */
-
-static tree
-decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
-   HOST_WIDE_INT *pbitpos, machine_mode *pmode,
-   int *punsignedp, int *preversep, int *pvolatilep,
-   tree *pmask, tree *pand_mask, int xor_which)
-{
-  tree exp = *exp_;
-  tree outer_type = 0;
-  tree and_mask = 0;
-  tree mask, inner, offset;
-  tree unsigned_type;
-  unsigned int precision;
-  HOST_WIDE_INT shiftrt = 0;
-
-  /* All the optimizations using this function assume integer fields.
- There are problems with FP fields since the type_for_size call
- below can fail for, e.g., XFmode.  */
-  if (! INTEGRAL_TYPE_P (TREE_TYPE (exp)))
-return NULL_TREE;
-
-  /* We are interested in the bare arrangement of bits, so strip everything
- that doesn't affect the machine mode.  However, record the type of the
- outermost expression if it may matter below.  */
-  if (CONVERT_EXPR_P (exp)
-  || TREE_CODE (exp) == NON_LVALUE_EXPR)
-outer_type = TREE_TYPE (exp);
-  STRIP_NOPS (exp);
-
-  if (TREE_CODE (exp) == BIT_AND_EXPR)
-{
-  and_mask = TREE_OPERAND (exp, 1);
-  exp = TREE_OPERAND (exp, 0);
-  STRIP_NOPS (exp); STRIP_NOPS (and_mask);
-  if (TREE_CODE (and_mask) != INTEGER_CST)
-   return NULL_TREE;
-}
-
-  if (xor_which)
-{
-  gcc_checking_assert (TREE_CODE (exp) == BIT_XOR_EXPR);
-  

[gcc(refs/users/aoliva/heads/testme)] check for mergeable loads, choose insertion points accordingly

2024-09-12 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:163a7691962e2a60402d2b75fb2243bfd33b3595

commit 163a7691962e2a60402d2b75fb2243bfd33b3595
Author: Alexandre Oliva 
Date:   Thu Jul 27 05:15:20 2023 -0300

check for mergeable loads, choose insertion points accordingly

Diff:
---
 gcc/gimple-fold.cc | 253 ++---
 1 file changed, 219 insertions(+), 34 deletions(-)

diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc
index 64426bd76977..85a0ec028030 100644
--- a/gcc/gimple-fold.cc
+++ b/gcc/gimple-fold.cc
@@ -69,6 +69,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "varasm.h"
 #include "internal-fn.h"
 #include "gimple-range.h"
+#include "tree-ssa-loop-niter.h" // stmt_dominates_stmt_p
 
 /* ??? Move this to some header, it's defined in fold-const.c.  */
 extern tree
@@ -7395,7 +7396,7 @@ maybe_fold_comparisons_from_match_pd (tree type, enum 
tree_code code,
Same as ssa_is_replaceable_p, except that we don't insist it has a
single use.  */
 
-bool
+static bool
 ssa_is_substitutable_p (gimple *stmt)
 {
 #if 0
@@ -7476,9 +7477,10 @@ is_cast_p (tree *name)
   if (gimple_num_ops (def) != 2)
break;
 
-  if (get_gimple_rhs_class (gimple_expr_code (def))
- == GIMPLE_SINGLE_RHS)
+  if (gimple_assign_single_p (def))
{
+ if (gimple_assign_load_p (def))
+   break;
  *name = gimple_assign_rhs1 (def);
  continue;
}
@@ -7515,8 +7517,7 @@ is_binop_p (enum tree_code code, tree *name)
  return 0;
 
case 2:
- if (get_gimple_rhs_class (gimple_expr_code (def))
- == GIMPLE_SINGLE_RHS)
+ if (gimple_assign_single_p (def) && !gimple_assign_load_p (def))
{
  *name = gimple_assign_rhs1 (def);
  continue;
@@ -7524,7 +7525,7 @@ is_binop_p (enum tree_code code, tree *name)
  return 0;
 
case 3:
- ;
+ break;
}
 
   if (gimple_assign_rhs_code (def) != code)
@@ -7569,6 +7570,26 @@ prepare_xor (tree l_arg, tree *r_arg)
   return ret;
 }
 
+/* If EXP is a SSA_NAME whose DEF is a load stmt, set *LOAD to it and
+   return its RHS, otherwise return EXP.  */
+
+static tree
+follow_load (tree exp, gimple **load)
+{
+  if (TREE_CODE (exp) == SSA_NAME
+  && !SSA_NAME_IS_DEFAULT_DEF (exp))
+{
+  gimple *def = SSA_NAME_DEF_STMT (exp);
+  if (gimple_assign_load_p (def))
+   {
+ *load = def;
+ exp = gimple_assign_rhs1 (def);
+   }
+}
+
+  return exp;
+}
+
 /* Subroutine for fold_truth_andor_1: decode a field reference.
 
If EXP is a comparison reference, we return the innermost reference.
@@ -7595,6 +7616,9 @@ prepare_xor (tree l_arg, tree *r_arg)
BIT_XOR_EXPR compared with zero.  We're to take the first or second
operand thereof if so.  It should be zero otherwise.
 
+   *LOAD is set to the load stmt of the innermost reference, if any,
+   *and NULL otherwise.
+
Return 0 if this is not a component reference or is one that we can't
do anything with.  */
 
@@ -7602,7 +7626,8 @@ static tree
 decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
HOST_WIDE_INT *pbitpos, machine_mode *pmode,
int *punsignedp, int *preversep, int *pvolatilep,
-   tree *pmask, tree *pand_mask, int xor_which)
+   tree *pmask, tree *pand_mask, int xor_which,
+   gimple **load)
 {
   tree exp = *exp_;
   tree outer_type = 0;
@@ -7612,11 +7637,13 @@ decode_field_reference (location_t loc, tree *exp_, 
HOST_WIDE_INT *pbitsize,
   unsigned int precision;
   HOST_WIDE_INT shiftrt = 0;
 
+  *load = NULL;
+
   /* All the optimizations using this function assume integer fields.
  There are problems with FP fields since the type_for_size call
  below can fail for, e.g., XFmode.  */
   if (! INTEGRAL_TYPE_P (TREE_TYPE (exp)))
-return 0;
+return NULL_TREE;
 
   /* We are interested in the bare arrangement of bits, so strip everything
  that doesn't affect the machine mode.  However, record the type of the
@@ -7626,7 +7653,7 @@ decode_field_reference (location_t loc, tree *exp_, 
HOST_WIDE_INT *pbitsize,
   if ((and_mask = is_binop_p (BIT_AND_EXPR, &exp)))
 {
   if (TREE_CODE (and_mask) != INTEGER_CST)
-   return 0;
+   return NULL_TREE;
 }
 
   if (xor_which)
@@ -7644,16 +7671,18 @@ decode_field_reference (location_t loc, tree *exp_, 
HOST_WIDE_INT *pbitsize,
   if (tree shift = is_binop_p (RSHIFT_EXPR, &exp))
 {
   if (TREE_CODE (shift) != INTEGER_CST || !tree_fits_shwi_p (shift))
-   return 0;
+   return NULL_TREE;
   shiftrt = tree_to_shwi (shift);
   if (shiftrt <= 0)
-   return 0;
+   return NULL_TREE;
 }
 
   if (tree t = is_cast_p (&exp))
 if (!outer_type)
   outer_type = t;
 
+  exp = follow_load (exp, load);
+
   poly_int64 poly_bitsize, poly_bitpos;
   inner = ge

[gcc(refs/users/aoliva/heads/testme)] fold truth-and only in ifcombine

2024-09-12 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:1494b67efaa0c4c3ebd46b7fcaee5a3389124d4b

commit 1494b67efaa0c4c3ebd46b7fcaee5a3389124d4b
Author: Alexandre Oliva 
Date:   Fri Aug 18 00:51:23 2023 -0300

fold truth-and only in ifcombine

Diff:
---
 gcc/gimple-fold.cc|  2 ++
 gcc/tree-ssa-ifcombine.cc | 24 +---
 2 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc
index 85a0ec028030..5b7d83edbea9 100644
--- a/gcc/gimple-fold.cc
+++ b/gcc/gimple-fold.cc
@@ -8738,12 +8738,14 @@ maybe_fold_and_comparisons (tree type,
 op2b, outer_cond_bb))
 return t;
 
+#if 0
   if (tree t = fold_truth_andor_maybe_separate (UNKNOWN_LOCATION,
TRUTH_ANDIF_EXPR, type,
code2, op2a, op2b,
code1, op1a, op1b,
NULL))
 return t;
+#endif
 
   return NULL_TREE;
 }
diff --git a/gcc/tree-ssa-ifcombine.cc b/gcc/tree-ssa-ifcombine.cc
index 79a4bdd363b9..61480e5fa894 100644
--- a/gcc/tree-ssa-ifcombine.cc
+++ b/gcc/tree-ssa-ifcombine.cc
@@ -399,6 +399,14 @@ update_profile_after_ifcombine (basic_block inner_cond_bb,
   outer2->probability = profile_probability::never ();
 }
 
+/* FIXME: move to a header file.  */
+extern tree
+fold_truth_andor_maybe_separate (location_t loc,
+enum tree_code code, tree truth_type,
+enum tree_code lcode, tree ll_arg, tree lr_arg,
+enum tree_code rcode, tree rl_arg, tree rr_arg,
+tree *separatep);
+
 /* If-convert on a and pattern with a common else block.  The inner
if is specified by its INNER_COND_BB, the outer by OUTER_COND_BB.
inner_inv, outer_inv and result_inv indicate whether the conditions
@@ -576,7 +584,7 @@ ifcombine_ifandif (basic_block inner_cond_bb, bool 
inner_inv,
   else if (TREE_CODE_CLASS (gimple_cond_code (inner_cond)) == tcc_comparison
   && TREE_CODE_CLASS (gimple_cond_code (outer_cond)) == tcc_comparison)
 {
-  tree t;
+  tree t, ts = NULL_TREE;
   enum tree_code inner_cond_code = gimple_cond_code (inner_cond);
   enum tree_code outer_cond_code = gimple_cond_code (outer_cond);
 
@@ -599,7 +607,17 @@ ifcombine_ifandif (basic_block inner_cond_bb, bool 
inner_inv,
outer_cond_code,
gimple_cond_lhs (outer_cond),
gimple_cond_rhs (outer_cond),
-   gimple_bb (outer_cond
+   gimple_bb (outer_cond)))
+ && !(t = ts = (fold_truth_andor_maybe_separate
+(UNKNOWN_LOCATION, TRUTH_ANDIF_EXPR,
+ boolean_type_node,
+ outer_cond_code,
+ gimple_cond_lhs (outer_cond),
+ gimple_cond_rhs (outer_cond),
+ inner_cond_code,
+ gimple_cond_lhs (inner_cond),
+ gimple_cond_rhs (inner_cond),
+ NULL
{
  {
  tree t1, t2;
@@ -636,7 +654,7 @@ ifcombine_ifandif (basic_block inner_cond_bb, bool 
inner_inv,
  NULL, true, GSI_SAME_STMT);
 }
   /* ??? Fold should avoid this.  */
-  else if (!is_gimple_condexpr_for_cond (t))
+  else if (ts && !is_gimple_condexpr_for_cond (t))
goto gimplify_after_fold;
   if (result_inv)
t = fold_build1 (TRUTH_NOT_EXPR, TREE_TYPE (t), t);


  1   2   >