[gcc r15-7455] arm: fix typo in dg-require-effective-target [PR118089]

2025-02-10 Thread Richard Earnshaw via Gcc-cvs
https://gcc.gnu.org/g:6ed1b40268ed56c82ea75e7403ded7750d01c85a

commit r15-7455-g6ed1b40268ed56c82ea75e7403ded7750d01c85a
Author: Richard Earnshaw 
Date:   Mon Feb 10 10:50:36 2025 +

arm: fix typo in dg-require-effective-target [PR118089]

Trivial typo.

gcc/testsuite:
PR target/118089
* gcc.target/arm/thumb2-pop-loreg.c (dg-require-effective-target): 
Fix
typo in directive.

Diff:
---
 gcc/testsuite/gcc.target/arm/thumb2-pop-loreg.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.target/arm/thumb2-pop-loreg.c 
b/gcc/testsuite/gcc.target/arm/thumb2-pop-loreg.c
index 6db66b84cd96..c8397f64318f 100644
--- a/gcc/testsuite/gcc.target/arm/thumb2-pop-loreg.c
+++ b/gcc/testsuite/gcc.target/arm/thumb2-pop-loreg.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-require-effective_target arm_thumb2_ok } */
+/* { dg-require-effective-target arm_thumb2_ok } */
 /* { dg-options "-Os" } */
 
 int __attribute__((noinline)) f (void)


[gcc r15-7453] testsuite: Fix two testisms on x86 after PFA [PR118754]

2025-02-10 Thread Tamar Christina via Gcc-cvs
https://gcc.gnu.org/g:aaf5f5027d3f29c6c0d836753dddac16ba94a49a

commit r15-7453-gaaf5f5027d3f29c6c0d836753dddac16ba94a49a
Author: Tamar Christina 
Date:   Mon Feb 10 09:32:29 2025 +

testsuite: Fix two testisms on x86 after PFA [PR118754]

These two tests now vectorize the result finding
loop with PFA and so the number of loops checked
fails.

This fixes them by adding #pragma GCC novector to
the testcases.

gcc/testsuite/ChangeLog:

PR testsuite/118754
* gcc.dg/vect/vect-tail-nomask-1.c: Add novector.
* gcc.target/i386/pr106010-8c.c: Likewise.

Diff:
---
 gcc/testsuite/gcc.dg/vect/vect-tail-nomask-1.c | 2 ++
 gcc/testsuite/gcc.target/i386/pr106010-8c.c| 1 +
 2 files changed, 3 insertions(+)

diff --git a/gcc/testsuite/gcc.dg/vect/vect-tail-nomask-1.c 
b/gcc/testsuite/gcc.dg/vect/vect-tail-nomask-1.c
index ee9ab2e9d910..116a7aefca6c 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-tail-nomask-1.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-tail-nomask-1.c
@@ -72,6 +72,7 @@ run_test ()
 
   init_data (a, b, c, SIZE);
   test_citer (a, b, c);
+#pragma GCC novector
   for (i = 0; i < SIZE; i++)
 if (c[i] != a[i] + b[i])
   __builtin_abort ();
@@ -80,6 +81,7 @@ run_test ()
 
   init_data (a, b, c, SIZE);
   test_viter (a, b, c, SIZE);
+#pragma GCC novector
   for (i = 0; i < SIZE; i++)
 if (c[i] != a[i] + b[i])
   __builtin_abort ();
diff --git a/gcc/testsuite/gcc.target/i386/pr106010-8c.c 
b/gcc/testsuite/gcc.target/i386/pr106010-8c.c
index 61ae131829dc..76a3b3cbb628 100644
--- a/gcc/testsuite/gcc.target/i386/pr106010-8c.c
+++ b/gcc/testsuite/gcc.target/i386/pr106010-8c.c
@@ -30,6 +30,7 @@ do_test (void)
   __builtin_memset (ph_dst, 0, 2 * N * sizeof (_Float16));
 
   foo_ph (ph_dst);
+#pragma GCC novector
   for (int i = 0; i != N; i++)
 {
   if (ph_dst[i] != ph_src)


[gcc r15-7454] i386: Change RTL representation of bt[lq] [PR118623]

2025-02-10 Thread Jakub Jelinek via Gcc-cvs
https://gcc.gnu.org/g:92142019b6cd0cf1fe483203cf3ec451a9848a42

commit r15-7454-g92142019b6cd0cf1fe483203cf3ec451a9848a42
Author: Jakub Jelinek 
Date:   Mon Feb 10 10:40:22 2025 +0100

i386: Change RTL representation of bt[lq] [PR118623]

The following testcase is miscompiled because of RTL represententation
of bt{l,q} insn followed by e.g. j{c,nc} being misleading to what it
actually does.
Let's look e.g. at
(define_insn_and_split "*jcc_bt"
  [(set (pc)
(if_then_else (match_operator 0 "bt_comparison_operator"
[(zero_extract:SWI48
   (match_operand:SWI48 1 "nonimmediate_operand")
   (const_int 1)
   (match_operand:QI 2 "nonmemory_operand"))
 (const_int 0)])
  (label_ref (match_operand 3))
  (pc)))
   (clobber (reg:CC FLAGS_REG))]
  "(TARGET_USE_BT || optimize_function_for_size_p (cfun))
   && (CONST_INT_P (operands[2])
   ? (INTVAL (operands[2]) < GET_MODE_BITSIZE (mode)
  && INTVAL (operands[2])
   >= (optimize_function_for_size_p (cfun) ? 8 : 32))
   : !memory_operand (operands[1], mode))
   && ix86_pre_reload_split ()"
  "#"
  "&& 1"
  [(set (reg:CCC FLAGS_REG)
(compare:CCC
  (zero_extract:SWI48
(match_dup 1)
(const_int 1)
(match_dup 2))
  (const_int 0)))
   (set (pc)
(if_then_else (match_op_dup 0 [(reg:CCC FLAGS_REG) (const_int 0)])
  (label_ref (match_dup 3))
  (pc)))]
{
  operands[0] = shallow_copy_rtx (operands[0]);
  PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0])));
})
The define_insn part in RTL describes exactly what it does,
jumps to op3 if bit op2 in op1 is set (for op0 NE) or not set (for op0 EQ).
The problem is with what it splits into.
put_condition_code %C1 for CCCmode comparisons emits c for EQ and LTU,
nc for NE and GEU and ICEs otherwise.
CCCmode is used mainly for carry out of add/adc, borrow out of sub/sbb,
in those cases e.g. for add we have
(set (reg:CCC flags) (compare:CCC (plus:M x y) x))
and use (ltu (reg:CCC flags) (const_int 0)) for carry set and
(geu (reg:CCC flags) (const_int 0)) for carry not set.  These cases
model in RTL what is actually happening, compare in infinite precision
x from the result of finite precision addition in M mode and if it is
less than unsigned (i.e. overflow happened), carry is set.
Another use of CCCmode is in UNSPEC_* patterns, those are used with
(eq (reg:CCC flags) (const_int 0)) for carry set and ne for unset,
given the UNSPEC no big deal, the middle-end doesn't know what means
set or unset.
But for the bt{l,q}; j{c,nc} case the above splits it into
(set (reg:CCC flags) (compare:CCC (zero_extract) (const_int 0)))
for bt and
(set (pc) (if_then_else (eq (reg:CCC flags) (const_int 0)) (label_ref) 
(pc)))
for the bit set case (so that the jump expands to jc) and ne for
the bit not set case (so that the jump expands to jnc).
Similarly for the different splitters for cmov and set{c,nc} etc.
The problem is that when the middle-end reads this RTL, it feels
the exact opposite to it.  If zero_extract is 1, flags is set
to comparison of 1 and 0 and that would mean using ne ne in the
if_then_else, and vice versa.

So, in order to better describe in RTL what is actually happening,
one possibility would be to swap the behavior of put_condition_code
and use NE + LTU -> c and EQ + GEU -> nc rather than the current
EQ + LTU -> c and NE + GEU -> nc; and adjust everything.  The
following patch uses a more limited approach, instead of representing
bt{l,q}; j{c,nc} case as written above it uses
(set (reg:CCC flags) (compare:CCC (const_int 0) (zero_extract)))
and
(set (pc) (if_then_else (ltu (reg:CCC flags) (const_int 0)) (label_ref) 
(pc)))
which uses the existing put_condition_code but describes what the
insns actually do in RTL clearly.  If zero_extract is 1,
then flags are LTU, 0U < 1U, if zero_extract is 0, then flags are GEU,
0U >= 0U.  The patch adjusts the *bt define_insn and all the
splitters to it and its comparisons/conditional moves/setXX.

2025-02-10  Jakub Jelinek  

PR target/118623
* config/i386/i386.md (*bt): Represent bt as
compare:CCC of const0_rtx and zero_extract rather than
zero_extract and const0_rtx.
(*bt_mask): Likewise.
(*jcc_bt): Likewise.  Use LTU and GEU as flags test
instead of EQ and NE.
(*jcc_bt_mask): Likewise.
(*jcc_bt_mask_1): Likewise.
(Help combine recognize

[gcc r15-7456] ipa-cp: Perform operations in the appropriate types (PR 118097)

2025-02-10 Thread Martin Jambor via Gcc-cvs
https://gcc.gnu.org/g:6d07e3de7e8d39ac144ba1d83bba08d48bacae13

commit r15-7456-g6d07e3de7e8d39ac144ba1d83bba08d48bacae13
Author: Martin Jambor 
Date:   Mon Feb 10 16:49:59 2025 +0100

ipa-cp: Perform operations in the appropriate types (PR 118097)

One of the testcases from PR 118097 and the one from PR 118535 show
that the fix to PR 118138 was incomplete.  We must not only make sure
that (intermediate) results of operations performed by IPA-CP are
fold_converted to the type of the destination formal parameter but we
also must decouple the these types from the ones in which operations
are performed.

This patch does that, even though we do not store or stream the
operation types, instead we simply limit ourselves to tcc_comparisons
and operations for which the first operand and the result are of the
same type as determined by expr_type_first_operand_type_p.  If we
wanted to go beyond these, we would indeed need to store/stream the
respective operation type.

ipa_value_from_jfunc needs an additional check that res_type is not
NULL because it is not called just from within IPA-CP (where we know
we have a destination lattice slot belonging to a defined parameter)
but also from inlining, ipa-fnsummary and ipa-modref where it is used
to examine a call to a function with variadic arguments and we do not
have types for the unknown parameters.  But we cannot really work with
those or estimate any benefits when it comes to them, so ignoring them
should be OK.

Even after this patch, ipa_get_jf_arith_result has a parameter called
res_type in which it performs operations for aggregate jump functions,
where we do not allow type conversions when constucting the jump
functions and the type is the type of the stored data.  In GCC 16, we
could relax this and allow conversions like for scalars.

gcc/ChangeLog:

2025-01-20  Martin Jambor  

PR ipa/118097
* ipa-cp.cc (ipa_get_jf_arith_result): Adjust comment.
(ipa_get_jf_pass_through_result): Removed.
(ipa_value_from_jfunc): Use directly ipa_get_jf_arith_result, do
not specify operation type but make sure we check and possibly
convert the result.
(get_val_across_arith_op): Remove the last parameter, always pass
NULL_TREE to ipa_get_jf_arith_result in its last argument.
(propagate_vals_across_arith_jfunc): Do not pass res_type to
get_val_across_arith_op.
(propagate_vals_across_pass_through): Add checking assert that
parm_type is not NULL.

gcc/testsuite/ChangeLog:

2025-01-24  Martin Jambor  

PR ipa/118097
* gcc.dg/ipa/pr118097.c: New test.
* gcc.dg/ipa/pr118535.c: Likewise.
* gcc.dg/ipa/ipa-notypes-1.c: Likewise.

Diff:
---
 gcc/ipa-cp.cc| 46 +---
 gcc/testsuite/gcc.dg/ipa/ipa-notypes-1.c | 17 
 gcc/testsuite/gcc.dg/ipa/pr118097.c  | 23 
 gcc/testsuite/gcc.dg/ipa/pr118535.c  | 17 
 4 files changed, 75 insertions(+), 28 deletions(-)

diff --git a/gcc/ipa-cp.cc b/gcc/ipa-cp.cc
index d89324a00775..68959f2677ba 100644
--- a/gcc/ipa-cp.cc
+++ b/gcc/ipa-cp.cc
@@ -1467,11 +1467,10 @@ ipacp_value_safe_for_type (tree param_type, tree value)
 return NULL_TREE;
 }
 
-/* Return the result of a (possibly arithmetic) operation on the constant
-   value INPUT.  OPERAND is 2nd operand for binary operation.  RES_TYPE is
-   the type of the parameter to which the result is passed.  Return
-   NULL_TREE if that cannot be determined or be considered an
-   interprocedural invariant.  */
+/* Return the result of a (possibly arithmetic) operation on the constant value
+   INPUT.  OPERAND is 2nd operand for binary operation.  RES_TYPE is the type
+   in which any operation is to be performed.  Return NULL_TREE if that cannot
+   be determined or be considered an interprocedural invariant.  */
 
 static tree
 ipa_get_jf_arith_result (enum tree_code opcode, tree input, tree operand,
@@ -1513,21 +1512,6 @@ ipa_get_jf_arith_result (enum tree_code opcode, tree 
input, tree operand,
   return res;
 }
 
-/* Return the result of a (possibly arithmetic) pass through jump function
-   JFUNC on the constant value INPUT.  RES_TYPE is the type of the parameter
-   to which the result is passed.  Return NULL_TREE if that cannot be
-   determined or be considered an interprocedural invariant.  */
-
-static tree
-ipa_get_jf_pass_through_result (struct ipa_jump_func *jfunc, tree input,
-   tree res_type)
-{
-  return ipa_get_jf_arith_result (ipa_get_jf_pass_through_operation (jfunc),
- input,
- ipa_get_jf_pass_through_operand (jfunc),
- re

[gcc r15-7457] [gcn] install.texi: Update for new ISA targets and their requirements

2025-02-10 Thread Tobias Burnus via Gcc-cvs
https://gcc.gnu.org/g:7037fdf6bd0a4eea4a436b432265d6b7cc481737

commit r15-7457-g7037fdf6bd0a4eea4a436b432265d6b7cc481737
Author: Tobias Burnus 
Date:   Mon Feb 10 18:05:51 2025 +0100

[gcn] install.texi: Update for new ISA targets and their requirements

GCN now supports several additional ISA targets such that no longer
all targets have a multilib by default; add a note about this, the
generic targets and the required LLVM (and ROCm) versions.

gcc/ChangeLog:

* doc/install.texi (GCN): Update section about multilibs and
required LLVM version.

Diff:
---
 gcc/doc/install.texi | 30 ++
 1 file changed, 26 insertions(+), 4 deletions(-)

diff --git a/gcc/doc/install.texi b/gcc/doc/install.texi
index 3b9f56b05292..d6cf318b3afe 100644
--- a/gcc/doc/install.texi
+++ b/gcc/doc/install.texi
@@ -3991,14 +3991,36 @@ This is a synonym for @samp{x86_64-*-solaris2*}.
 @heading amdgcn-*-amdhsa
 AMD GCN GPU target.
 
-Instead of GNU Binutils, you will need to install LLVM 15, or later, and copy
+Instead of GNU Binutils, you need to install LLVM and copy
 @file{bin/llvm-mc} to @file{amdgcn-amdhsa/bin/as},
 @file{bin/lld} to @file{amdgcn-amdhsa/bin/ld},
 @file{bin/llvm-nm} to @file{amdgcn-amdhsa/bin/nm}, and
 @file{bin/llvm-ar} to both @file{bin/amdgcn-amdhsa-ar} and
-@file{bin/amdgcn-amdhsa-ranlib}.  Note that LLVM 13.0.1 or LLVM 14 can be used
-by specifying a @code{--with-multilib-list=} that does not list @code{gfx1100}
-and @code{gfx1103}.
+@file{bin/amdgcn-amdhsa-ranlib}.
+
+The required version of LLVM depends on the devices that you want to support.
+As the list of ISAs is long, GCC by default only builds a subset of the
+supported ISAs as multilib; use @code{--with-multilib-list=} to tailor the 
built
+multilibs.  Note that mixing ISAs in the same binary is not supported and gives
+a linker error.
+
+By default, multilib support is built for @code{gfx900}, @code{gfx906},
+@code{gfx908}, @code{gfx90a}, @code{gfx90c}, @code{gfx1030}, @code{gfx1036},
+@code{gfx1100} and @code{gfx1103}.  The default multilib configuration
+requires LLVM 15 or newer.  LLVM 13.0.1 or LLVM 14 can be used by specifying
+a @code{--with-multilib-list=} that does not list any GFX 11 device nor
+@code{gfx1036}.  At least LLVM 16 is required for @code{gfx1150} and
+@code{gfx1151}, LLVM 19 for the generic @code{gfx9-generic},
+@code{gfx10-3-generic}, and @code{gfx11-generic} targets and for
+@code{gfx1152}, while LLVM 20 is required for @code{gfx1153}.
+
+The supported ISA architectures are listed in the GCC manual. The generic
+ISA targets @code{gfx9-generic}, @code{gfx10-3-generic}, and
+@code{gfx11-generic} reduce the number of required multilibs but note
+that @code{gfx9-generic} does not include @code{gfx908} or @code{gfx90a},
+that linking specific ISA code with generic code is currently not supported,
+and that only a future ROCm release (newer than 6.3.2) will be able to execute
+generic code.
 
 Use Newlib (4.3.0 or newer; 4.4.0 contains some improvements and 4.5.0 fixes
 the device console output for GFX10 and GFX11 devices).


[gcc r15-7458] [gcn] mkoffload.cc: Print fatal error if -march has no multilib but generic has

2025-02-10 Thread Tobias Burnus via Gcc-cvs
https://gcc.gnu.org/g:4ce8ad684b90606a74f0cccfd9455184046c6c4e

commit r15-7458-g4ce8ad684b90606a74f0cccfd9455184046c6c4e
Author: Tobias Burnus 
Date:   Mon Feb 10 18:24:34 2025 +0100

[gcn] mkoffload.cc: Print fatal error if -march has no multilib but generic 
has

Assume that a distro has configured, e.g., a gfx9-generic multilib but not
for gfx902. In that case, mkoffload would fail to link with "error:
incompatible mach".  With this commit, an error is printed suggesting to try
the associated generic architecture instead.  The behavior is unchanged if
there is a multilib available for the specific ISA or when there is also no
multilib for the generic ICA.

Note: The build of generic multilibs are currently not enabled by default;
they also require the linker/assembler of LLVM 19 or newer and, in 
particular,
for the execution a future ROCm release. (The next one? In any case, 6.3.2
does not support generic ISAs, yet.)

gcc/ChangeLog:

* config/gcn/mkoffload.cc (enum elf_arch_code): Add
EF_AMDGPU_MACH_AMDGCN_NONE.
(elf_arch): Use enum elf_arch_code as type.
(tool_cleanup): Silence warning by removing tailing '.' from error.
(get_arch_name): Return enum elf_arch_code.
(check_for_missing_lib): New; print fatal error if the multilib
is not available but it is for the associate generic ISA.
(main): Call it.

Diff:
---
 gcc/config/gcn/mkoffload.cc | 101 +---
 1 file changed, 94 insertions(+), 7 deletions(-)

diff --git a/gcc/config/gcn/mkoffload.cc b/gcc/config/gcn/mkoffload.cc
index 92e8fe70c12e..fbd68a6cd8c9 100644
--- a/gcc/config/gcn/mkoffload.cc
+++ b/gcc/config/gcn/mkoffload.cc
@@ -53,6 +53,7 @@
 
 /* Extract the EF_AMDGPU_MACH_AMDGCN_GFXnnn from the def file.  */
 enum elf_arch_code {
+  EF_AMDGPU_MACH_AMDGCN_NONE = -1,  /* For generic handling.  */
 #define GCN_DEVICE(name, NAME, ELF_ARCH, ...) \
   EF_AMDGPU_MACH_AMDGCN_ ## NAME = ELF_ARCH,
 #include "gcn-devices.def"
@@ -135,9 +136,8 @@ static struct obstack files_to_cleanup;
 enum offload_abi offload_abi = OFFLOAD_ABI_UNSET;
 const char *offload_abi_host_opts = NULL;
 
-uint32_t elf_arch = EF_AMDGPU_MACH_AMDGCN_GFX900;  // Default GPU architecture.
+enum elf_arch_code elf_arch = EF_AMDGPU_MACH_AMDGCN_GFX900;  // Default GPU 
architecture.
 uint32_t elf_flags = EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4;
-
 static int gcn_stack_size = 0;  /* Zero means use default.  */
 
 /* Delete tempfiles.  */
@@ -782,7 +782,7 @@ compile_native (const char *infile, const char *outfile, 
const char *compiler,
   obstack_ptr_grow (&argv_obstack, ".c");
   if (!offload_abi_host_opts)
 fatal_error (input_location,
-"%<-foffload-abi-host-opts%> not specified.");
+"%<-foffload-abi-host-opts%> not specified");
   obstack_ptr_grow (&argv_obstack, offload_abi_host_opts);
   obstack_ptr_grow (&argv_obstack, infile);
   obstack_ptr_grow (&argv_obstack, "-c");
@@ -796,16 +796,15 @@ compile_native (const char *infile, const char *outfile, 
const char *compiler,
   obstack_free (&argv_obstack, NULL);
 }
 
-static int
+static enum elf_arch_code
 get_arch (const char *str, const char *with_arch_str)
 {
   /* Use the def file to map the name to the elf_arch_code.  */
   if (!str) ;
 #define GCN_DEVICE(name, NAME, ELF, ...) \
   else if (strcmp (str, #name) == 0) \
-return ELF;
+return (enum elf_arch_code) ELF;
 #include "gcn-devices.def"
-#undef GCN_DEVICE
 
   /* else */
   error ("unrecognized argument in option %<-march=%s%>", str);
@@ -839,7 +838,91 @@ get_arch (const char *str, const char *with_arch_str)
 
   exit (FATAL_EXIT_CODE);
 
-  return 0;
+  return EF_AMDGPU_MACH_AMDGCN_NONE;
+}
+
+static const char*
+get_arch_name (enum elf_arch_code arch_code)
+{
+  switch (arch_code)
+{
+#define GCN_DEVICE(name, NAME, ELF, ...) \
+case EF_AMDGPU_MACH_AMDGCN_ ## NAME: \
+  return #name;
+#include "../../gcc/config/gcn/gcn-devices.def"
+default: return NULL;
+}
+}
+
+/* If an generic arch exists and for the chosen arch no (multi)lib is
+   available, print a fatal error - and suggest to compile for the generic
+   version instead.  */
+
+static void
+check_for_missing_lib (enum elf_arch_code elf_arch,
+  enum elf_arch_code default_arch)
+{
+  enum elf_arch_code generic_arch;
+  switch (elf_arch)
+{
+#define GCN_DEVICE(name, NAME, ELF, ISA, XNACK, SRAM, WAVE64, CU, \
+  MAX_ISA_VGPRS, GEN_VER, ARCH_FAM, GEN_MACH, ...) \
+case EF_AMDGPU_MACH_AMDGCN_ ## NAME: \
+  generic_arch = EF_AMDGPU_MACH_AMDGCN_ ## GEN_MACH; break;
+#include "../../gcc/config/gcn/gcn-devices.def"
+default: generic_arch = EF_AMDGPU_MACH_AMDGCN_NONE;
+}
+
+  /* If not generic or the default arch, the library version exists.  */
+  if (generic_arch == EF_AMDGPU_MACH_AMDGCN_NONE || elf_arch == default_a

[gcc(refs/users/meissner/heads/work193-sha)] Add ChangeLog.sha and update REVISION.

2025-02-10 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:9dee8398d4b3b5b6f31714771cb2f28a2c68da2f

commit 9dee8398d4b3b5b6f31714771cb2f28a2c68da2f
Author: Michael Meissner 
Date:   Mon Feb 10 13:16:04 2025 -0500

Add ChangeLog.sha and update REVISION.

2025-02-10  Michael Meissner  

gcc/

* ChangeLog.sha: New file for branch.
* REVISION: Update.

Diff:
---
 gcc/ChangeLog.sha | 5 +
 gcc/REVISION  | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog.sha b/gcc/ChangeLog.sha
new file mode 100644
index ..ff4861cd436b
--- /dev/null
+++ b/gcc/ChangeLog.sha
@@ -0,0 +1,5 @@
+ Branch work193-sha, baseline 
+
+2025-02-10   Michael Meissner  
+
+   Clone branch
diff --git a/gcc/REVISION b/gcc/REVISION
index 721d4c312fe3..d23d27743a1a 100644
--- a/gcc/REVISION
+++ b/gcc/REVISION
@@ -1 +1 @@
-work193 branch
+work193-sha branch


[gcc/devel/omp/gcc-14] OpenMP: Add flag for code elision to omp_context_selector_matches.

2025-02-10 Thread Sandra Loosemore via Gcc-cvs
https://gcc.gnu.org/g:d17095149a6b935e91dbf5f6ac9c6df8532c8b4e

commit d17095149a6b935e91dbf5f6ac9c6df8532c8b4e
Author: Sandra Loosemore 
Date:   Sun Feb 9 21:32:35 2025 +

OpenMP: Add flag for code elision to omp_context_selector_matches.

The "begin declare variant" has different rules for determining
whether a context selector cannot match for purposes of code elision
than we normally use; it excludes the case of a constant false
"condition" selector for the "user" set.

gcc/ChangeLog
* omp-general.cc (omp_context_selector_matches): Add an optional
bool argument for the code elision case.
* omp-general.h (omp_context_selector_matches): Likewise.

Diff:
---
 gcc/omp-general.cc | 28 
 gcc/omp-general.h  |  2 +-
 2 files changed, 25 insertions(+), 5 deletions(-)

diff --git a/gcc/omp-general.cc b/gcc/omp-general.cc
index 249916ac7e32..82a585f932f0 100644
--- a/gcc/omp-general.cc
+++ b/gcc/omp-general.cc
@@ -1717,13 +1717,19 @@ omp_construct_traits_match (tree selector_traits, tree 
context_traits,
CONSTRUCT_CONTEXT is known to be complete and not missing constructs
filled in later during compilation.
 
+   If DECLARE_VARIANT_ELISION_P is true, the function implements the test
+   for elision of preprocessed code in "begin declare variant" constructs,
+   and returns 0 only for failure to match traits in the device and
+   implementation sets.
+
Dynamic properties (which are evaluated at run-time) should always
return 1.  */
 
 int
 omp_context_selector_matches (tree ctx,
  tree construct_context,
- bool complete_p)
+ bool complete_p,
+ bool declare_variant_elision_p)
 {
   int ret = 1;
   bool maybe_offloaded = omp_maybe_offloaded (construct_context);
@@ -1735,9 +1741,12 @@ omp_context_selector_matches (tree ctx,
 
   /* Immediately reject the match if there are any ignored
 selectors present.  */
-  for (tree ts = selectors; ts; ts = TREE_CHAIN (ts))
-   if (OMP_TS_CODE (ts) == OMP_TRAIT_INVALID)
- return 0;
+  if (!declare_variant_elision_p
+ || set == OMP_TRAIT_SET_DEVICE
+ || set == OMP_TRAIT_SET_IMPLEMENTATION)
+   for (tree ts = selectors; ts; ts = TREE_CHAIN (ts))
+ if (OMP_TS_CODE (ts) == OMP_TRAIT_INVALID)
+   return 0;
 
   if (set == OMP_TRAIT_SET_CONSTRUCT)
{
@@ -2066,6 +2075,13 @@ omp_context_selector_matches (tree ctx,
  break;
case OMP_TRAIT_USER_CONDITION:
  gcc_assert (set == OMP_TRAIT_SET_USER);
+ /* The spec does not include the "user" set in the things that
+can trigger code elision in "begin declare variant".  */
+ if (declare_variant_elision_p)
+   {
+ ret = -1;
+ break;
+   }
  for (tree p = OMP_TS_PROPERTIES (ts); p; p = TREE_CHAIN (p))
if (OMP_TP_NAME (p) == NULL_TREE)
  {
@@ -2081,6 +2097,10 @@ omp_context_selector_matches (tree ctx,
ret = -1;
  }
  break;
+   case OMP_TRAIT_INVALID:
+ /* This is only for the declare_variant_elision_p case.  */
+ ret = -1;
+ break;
default:
  break;
}
diff --git a/gcc/omp-general.h b/gcc/omp-general.h
index 8cf9f8aaad2f..47918b5c69ca 100644
--- a/gcc/omp-general.h
+++ b/gcc/omp-general.h
@@ -213,7 +213,7 @@ extern bool omp_check_for_duplicate_variant (location_t loc,
 tree base_decl, tree ctx);
 extern void omp_mark_declare_variant (location_t loc, tree variant,
  tree construct);
-extern int omp_context_selector_matches (tree, tree, bool);
+extern int omp_context_selector_matches (tree, tree, bool, bool = false);
 extern tree omp_merge_context_selectors (location_t, tree, tree,
 enum omp_ctx_directive);
 extern tree omp_get_context_selector (tree, enum omp_tss_code,


[gcc(refs/users/meissner/heads/work193-bugs)] Add ChangeLog.bugs and update REVISION.

2025-02-10 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:22f27cbf845b721cffcbc5a54810c0fb391033ed

commit 22f27cbf845b721cffcbc5a54810c0fb391033ed
Author: Michael Meissner 
Date:   Mon Feb 10 13:14:22 2025 -0500

Add ChangeLog.bugs and update REVISION.

2025-02-10  Michael Meissner  

gcc/

* ChangeLog.bugs: New file for branch.
* REVISION: Update.

Diff:
---
 gcc/ChangeLog.bugs | 5 +
 gcc/REVISION   | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog.bugs b/gcc/ChangeLog.bugs
new file mode 100644
index ..d6cb192a2cf3
--- /dev/null
+++ b/gcc/ChangeLog.bugs
@@ -0,0 +1,5 @@
+ Branch work193-bugs, baseline 
+
+2025-02-10   Michael Meissner  
+
+   Clone branch
diff --git a/gcc/REVISION b/gcc/REVISION
index 721d4c312fe3..211d21a037a6 100644
--- a/gcc/REVISION
+++ b/gcc/REVISION
@@ -1 +1 @@
-work193 branch
+work193-bugs branch


[gcc r14-11298] i386: Fix ICE with conditional QI/HI vector maxmin [PR118776]

2025-02-10 Thread Jakub Jelinek via Gcc-cvs
https://gcc.gnu.org/g:7cb022442444833796b384684afef14d0b478941

commit r14-11298-g7cb022442444833796b384684afef14d0b478941
Author: Jakub Jelinek 
Date:   Sat Feb 8 08:54:31 2025 +0100

i386: Fix ICE with conditional QI/HI vector maxmin [PR118776]

The following testcase ICEs starting with GCC 12 since r12-4526
although the bug has been introduced already in r12-2751.
The problem was in the addition of cond_ define_expand
which uses nonimmediate_operand predicates for both maxmin operands
for all VI1248_AVX512VLBW modes.  It works fine with
VI48_AVX512VL modes because the 3_mask VI48_AVX512VL
define_expand uses ix86_fixup_binary_operands_no_copy and the
*avx512f_3 VI48_AVX512VL define_insn uses
% in constraint and !(MEM_P && MEM_P) check in condition (and
3 define_expand with VI124_256_AVX512F_AVX512BW iterator
does that too), but eventhough the 8-bit and 16-bit element maxmin
is commutative too, the 3
define_insn with VI12_AVX512VL iterator didn't use % in constraint
to make it commutative.  So, e.g. cond_umaxv32qi define_expand
allowed nonimmediate_operand for both umax operands, but used
gen_umaxv32qi_mask which wasn't commutative and only allowed
nonimmediate_operand for the second operand.

The following patch fixes it by keeping the 3
VI124_256_AVX512F_AVX512BW define_expand as is (it does
ix86_fixup_binary_operands_no_copy) but extending the
3_mask define_expand from VI48_AVX512VL to
VI1248_AVX512VLBW which keeps the current modes with their
ISA conditions and adds the VI12_AVX512VL modes under additional
TARGET_AVX512BW condition, and turning the actual define_insn
into an * prefixed name (which it was before just for the non-masked
case) and having the same commutative operand handling as in other
define_insns.

2025-02-08  Jakub Jelinek  

PR target/118776
* config/i386/sse.md (3_mask): Use VI1248_AVX512VLBW
iterator rather than VI48_AVX512VL.
(3): Rename to ...
(*avx512bw_3): ... this.  Use
nonimmediate_operand rather than register_operand predicate and %v
rather than v constraint for operand 1 and adjust condition to 
reject
MEMs in both operand 1 and 2.

* gcc.target/i386/pr118776.c: New test.

(cherry picked from commit 64d8ea056a5c339700118a412dea1c44a57acf55)

Diff:
---
 gcc/config/i386/sse.md   | 18 +-
 gcc/testsuite/gcc.target/i386/pr118776.c | 23 +++
 2 files changed, 32 insertions(+), 9 deletions(-)

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 111455cfcb79..67313d518530 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -16722,12 +16722,12 @@
 })
 
 (define_expand "3_mask"
-  [(set (match_operand:VI48_AVX512VL 0 "register_operand")
-   (vec_merge:VI48_AVX512VL
- (maxmin:VI48_AVX512VL
-   (match_operand:VI48_AVX512VL 1 "nonimmediate_operand")
-   (match_operand:VI48_AVX512VL 2 "nonimmediate_operand"))
- (match_operand:VI48_AVX512VL 3 "nonimm_or_0_operand")
+  [(set (match_operand:VI1248_AVX512VLBW 0 "register_operand")
+   (vec_merge:VI1248_AVX512VLBW
+ (maxmin:VI1248_AVX512VLBW
+   (match_operand:VI1248_AVX512VLBW 1 "nonimmediate_operand")
+   (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand"))
+ (match_operand:VI1248_AVX512VLBW 3 "nonimm_or_0_operand")
  (match_operand: 4 "register_operand")))]
   "TARGET_AVX512F"
   "ix86_fixup_binary_operands_no_copy (, mode, operands);")
@@ -16743,12 +16743,12 @@
(set_attr "prefix" "maybe_evex")
(set_attr "mode" "")])
 
-(define_insn "3"
+(define_insn "*avx512bw_3"
   [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
 (maxmin:VI12_AVX512VL
-  (match_operand:VI12_AVX512VL 1 "register_operand" "v")
+  (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "%v")
   (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")))]
-  "TARGET_AVX512BW"
+  "TARGET_AVX512BW && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
   "vp\t{%2, %1, 
%0|%0, %1, %2}"
   [(set_attr "type" "sseiadd")
(set_attr "prefix" "evex")
diff --git a/gcc/testsuite/gcc.target/i386/pr118776.c 
b/gcc/testsuite/gcc.target/i386/pr118776.c
new file mode 100644
index ..44c18caf8967
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr118776.c
@@ -0,0 +1,23 @@
+/* PR target/118776 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
+
+void bar (unsigned char *);
+
+void
+foo (unsigned char *x)
+{
+  unsigned char b[32];
+  bar (b);
+  for (int i = 0; i < 32; i++)
+{
+  unsigned char c = 8;
+  if (i > 3)
+   {
+ unsigned char d = b[i];
+ d = 1 > d ? 1 : d;
+ c = d;
+   }
+  x[i] = c;
+}
+}


[gcc r14-11296] c++: Don't use CLEANUP_EH_ONLY for new expression cleanup [PR118763]

2025-02-10 Thread Jakub Jelinek via Gcc-cvs
https://gcc.gnu.org/g:6cd1daf5206d9e0b2da84bf587c487f68aabd8db

commit r14-11296-g6cd1daf5206d9e0b2da84bf587c487f68aabd8db
Author: Jakub Jelinek 
Date:   Fri Feb 7 14:30:11 2025 +0100

c++: Don't use CLEANUP_EH_ONLY for new expression cleanup [PR118763]

The following testcase is miscompiled since r12-6325 stopped
preevaluating the initializers for new expression.
If evaluating the initializers throws, there is a correct cleanup
for that, but it is marked CLEANUP_EH_ONLY.  While in standard
C++ that is just fine, if it has statement expressions, it can
return or goto out of the expression and we should delete the
pointer in that case too.

There is already a sentry variable initialized to true and
set to false after everything is initialized and used as a guard
for the cleanup, so just removing the CLEANUP_EH_ONLY flag does
everything we need.  And in the normal case of the initializer
not using statement expressions at least with -O2 we get the same code,
while the change changes one
try { sentry = true; ... sentry = false; } catch { if (sentry) delete ...; }
into
try { sentry = true; ... sentry = false; } finally { if (sentry) delete 
...; }
optimizations will see that sentry is false when reaching the finally
other than through an exception.

Though, wonder what other CLEANUP_EH_ONLY cleanups might be an issue
with statement expressions.

2025-02-07  Jakub Jelinek  

PR c++/118763
* init.cc (build_new_1): Don't set CLEANUP_EH_ONLY.

* g++.dg/asan/pr118763.C: New test.

(cherry picked from commit fcecc74cb38723457a0447924d9993b31252a8f9)

Diff:
---
 gcc/cp/init.cc   |  1 -
 gcc/testsuite/g++.dg/asan/pr118763.C | 15 +++
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/gcc/cp/init.cc b/gcc/cp/init.cc
index 22d93793a4d8..e622547a092f 100644
--- a/gcc/cp/init.cc
+++ b/gcc/cp/init.cc
@@ -3826,7 +3826,6 @@ build_new_1 (vec **placement, tree type, 
tree nelts,
  tree end, sentry, begin;
 
  begin = get_target_expr (boolean_true_node);
- CLEANUP_EH_ONLY (begin) = 1;
 
  sentry = TARGET_EXPR_SLOT (begin);
 
diff --git a/gcc/testsuite/g++.dg/asan/pr118763.C 
b/gcc/testsuite/g++.dg/asan/pr118763.C
new file mode 100644
index ..401528583ed3
--- /dev/null
+++ b/gcc/testsuite/g++.dg/asan/pr118763.C
@@ -0,0 +1,15 @@
+// PR c++/118763
+// { dg-do run }
+
+int *
+foo (bool x)
+{
+  return new int (({ if (x) return nullptr; 1; }));
+}
+
+int
+main ()
+{
+  delete foo (true);
+  delete foo (false);
+}


[gcc r14-11295] c++: Allow constexpr reads from volatile std::nullptr_t objects [PR118661]

2025-02-10 Thread Jakub Jelinek via Gcc-cvs
https://gcc.gnu.org/g:6f5ada57162c10d5ff4b04de275aadf9c81a3da5

commit r14-11295-g6f5ada57162c10d5ff4b04de275aadf9c81a3da5
Author: Jakub Jelinek 
Date:   Fri Feb 7 14:27:18 2025 +0100

c++: Allow constexpr reads from volatile std::nullptr_t objects [PR118661]

As mentioned in the PR, https://eel.is/c++draft/conv.lval#note-1
says that even volatile reads from std::nullptr_t typed objects actually
don't read anything and https://eel.is/c++draft/expr.const#10.9
says that even those are ok in constant expressions.

So, the following patch adjusts the r9-4793 changes to have an exception
for NULLPTR_TYPE.
As [conv.lval]/3 also talks about accessing to inactive member, I've added
testcase to cover that as well.

2025-02-07  Jakub Jelinek  

PR c++/118661
* constexpr.cc (potential_constant_expression_1): Don't diagnose
lvalue-to-rvalue conversion of volatile lvalue if it has 
NULLPTR_TYPE.
* decl2.cc (decl_maybe_constant_var_p): Return true for constexpr
decls with NULLPTR_TYPE even if they are volatile.

* g++.dg/cpp0x/constexpr-volatile4.C: New test.
* g++.dg/cpp0x/constexpr-union9.C: New test.

(cherry picked from commit 6c8e6d6febaed3c167ca9534935c2cb18045528e)

Diff:
---
 gcc/cp/constexpr.cc  |  3 ++-
 gcc/cp/decl2.cc  |  3 ++-
 gcc/testsuite/g++.dg/cpp0x/constexpr-union9.C| 16 
 gcc/testsuite/g++.dg/cpp0x/constexpr-volatile4.C | 20 
 4 files changed, 40 insertions(+), 2 deletions(-)

diff --git a/gcc/cp/constexpr.cc b/gcc/cp/constexpr.cc
index 79dd6a7423c0..83a9175b265d 100644
--- a/gcc/cp/constexpr.cc
+++ b/gcc/cp/constexpr.cc
@@ -9597,7 +9597,8 @@ potential_constant_expression_1 (tree t, bool want_rval, 
bool strict, bool now,
 return true;
 
   if (TREE_THIS_VOLATILE (t) && want_rval
-  && !FUNC_OR_METHOD_TYPE_P (TREE_TYPE (t)))
+  && !FUNC_OR_METHOD_TYPE_P (TREE_TYPE (t))
+  && !NULLPTR_TYPE_P (TREE_TYPE (t)))
 {
   if (flags & tf_error)
constexpr_error (loc, fundef_p, "lvalue-to-rvalue conversion of "
diff --git a/gcc/cp/decl2.cc b/gcc/cp/decl2.cc
index 139b5c15b3e8..5cbe4c0ceda7 100644
--- a/gcc/cp/decl2.cc
+++ b/gcc/cp/decl2.cc
@@ -4650,7 +4650,8 @@ decl_maybe_constant_var_p (tree decl)
   tree type = TREE_TYPE (decl);
   if (!VAR_P (decl))
 return false;
-  if (DECL_DECLARED_CONSTEXPR_P (decl) && !TREE_THIS_VOLATILE (decl))
+  if (DECL_DECLARED_CONSTEXPR_P (decl)
+  && (!TREE_THIS_VOLATILE (decl) || NULLPTR_TYPE_P (type)))
 return true;
   if (DECL_HAS_VALUE_EXPR_P (decl))
 /* A proxy isn't constant.  */
diff --git a/gcc/testsuite/g++.dg/cpp0x/constexpr-union9.C 
b/gcc/testsuite/g++.dg/cpp0x/constexpr-union9.C
new file mode 100644
index ..5d365f9cca62
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/constexpr-union9.C
@@ -0,0 +1,16 @@
+// PR c++/118661
+// { dg-do compile { target c++11 } }
+
+using nullptr_t = decltype (nullptr);
+union U { int i; nullptr_t n; };
+constexpr U u = { 42 };
+static_assert (u.n == nullptr, "");
+
+#if __cplusplus >= 201402L
+constexpr nullptr_t
+foo ()
+{
+  union U { int i; nullptr_t n; } u = { 42 };
+  return u.n;
+}
+#endif
diff --git a/gcc/testsuite/g++.dg/cpp0x/constexpr-volatile4.C 
b/gcc/testsuite/g++.dg/cpp0x/constexpr-volatile4.C
new file mode 100644
index ..5ef024009b50
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/constexpr-volatile4.C
@@ -0,0 +1,20 @@
+// PR c++/118661
+// { dg-do compile { target c++11 } }
+
+using nullptr_t = decltype (nullptr);
+constexpr volatile nullptr_t a = {};
+constexpr nullptr_t b = a;
+
+constexpr nullptr_t
+foo ()
+{
+#if __cplusplus >= 201402L
+  volatile nullptr_t c = {};
+  return c;
+#else
+  return nullptr;
+#endif
+}
+
+static_assert (b == nullptr, "");
+static_assert (foo () == nullptr, "");


[gcc r14-11290] c++: Return false from __is_bounded_array for zero-sized arrays [PR118655]

2025-02-10 Thread Jakub Jelinek via Gcc-cvs
https://gcc.gnu.org/g:b7553f76a7aac7ece958ff83661ac7b379f4d073

commit r14-11290-gb7553f76a7aac7ece958ff83661ac7b379f4d073
Author: Jakub Jelinek 
Date:   Wed Jan 29 09:32:04 2025 +0100

c++: Return false from __is_bounded_array for zero-sized arrays [PR118655]

This is basically Marek's PR114479 r14-9759 __is_array fix applied to
__is_bounded_array as well.  Similarly to that trait, when not using
the builtin it returned false for zero sized arrays but when using
the builtin it returns true.

2025-01-29  Jakub Jelinek  

PR c++/118655
* semantics.cc (trait_expr_value) : 
Return
false for zero-sized arrays.

* g++.dg/ext/is_bounded_array.C: Extend.

(cherry picked from commit 3a6ddbf7b241e1cd9f73495ea373b0a12015bb07)

Diff:
---
 gcc/cp/semantics.cc |  9 -
 gcc/testsuite/g++.dg/ext/is_bounded_array.C | 14 ++
 2 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/gcc/cp/semantics.cc b/gcc/cp/semantics.cc
index 50e8deb9ef93..9ebe35dbaffe 100644
--- a/gcc/cp/semantics.cc
+++ b/gcc/cp/semantics.cc
@@ -12527,7 +12527,14 @@ trait_expr_value (cp_trait_kind kind, tree type1, tree 
type2)
  || DERIVED_FROM_P (type1, type2)));
 
 case CPTK_IS_BOUNDED_ARRAY:
-  return type_code1 == ARRAY_TYPE && TYPE_DOMAIN (type1);
+  return (type_code1 == ARRAY_TYPE
+ && TYPE_DOMAIN (type1)
+ /* We don't want to report T[0] as being a bounded array type.
+This is for compatibility with an implementation of
+std::is_bounded_array by template argument deduction, because
+compute_array_index_type_loc rejects a zero-size array
+in SFINAE context.  */
+ && !(TYPE_SIZE (type1) && integer_zerop (TYPE_SIZE (type1;
 
 case CPTK_IS_CLASS:
   return NON_UNION_CLASS_TYPE_P (type1);
diff --git a/gcc/testsuite/g++.dg/ext/is_bounded_array.C 
b/gcc/testsuite/g++.dg/ext/is_bounded_array.C
index b5fe435de957..63cd4f89091b 100644
--- a/gcc/testsuite/g++.dg/ext/is_bounded_array.C
+++ b/gcc/testsuite/g++.dg/ext/is_bounded_array.C
@@ -1,4 +1,5 @@
 // { dg-do compile { target c++11 } }
+// { dg-options "" }
 
 #define SA(X) static_assert((X),#X)
 
@@ -14,21 +15,34 @@
 
 class ClassType { };
 
+constexpr int sz0 = 0;
+constexpr int sz2 = 2;
+
 SA_TEST_CATEGORY(__is_bounded_array, int[2], true);
 SA_TEST_CATEGORY(__is_bounded_array, int[], false);
+SA_TEST_CATEGORY(__is_bounded_array, int[0], false);
 SA_TEST_CATEGORY(__is_bounded_array, int[2][3], true);
 SA_TEST_CATEGORY(__is_bounded_array, int[][3], false);
+SA_TEST_CATEGORY(__is_bounded_array, int[0][3], false);
+SA_TEST_CATEGORY(__is_bounded_array, int[3][0], false);
 SA_TEST_CATEGORY(__is_bounded_array, float*[2], true);
 SA_TEST_CATEGORY(__is_bounded_array, float*[], false);
 SA_TEST_CATEGORY(__is_bounded_array, float*[2][3], true);
 SA_TEST_CATEGORY(__is_bounded_array, float*[][3], false);
 SA_TEST_CATEGORY(__is_bounded_array, ClassType[2], true);
 SA_TEST_CATEGORY(__is_bounded_array, ClassType[], false);
+SA_TEST_CATEGORY(__is_bounded_array, ClassType[0], false);
 SA_TEST_CATEGORY(__is_bounded_array, ClassType[2][3], true);
 SA_TEST_CATEGORY(__is_bounded_array, ClassType[][3], false);
+SA_TEST_CATEGORY(__is_bounded_array, ClassType[0][3], false);
+SA_TEST_CATEGORY(__is_bounded_array, ClassType[2][0], false);
+SA_TEST_CATEGORY(__is_bounded_array, int[sz2], true);
+SA_TEST_CATEGORY(__is_bounded_array, int[sz0], false);
 SA_TEST_CATEGORY(__is_bounded_array, int(*)[2], false);
 SA_TEST_CATEGORY(__is_bounded_array, int(*)[], false);
+SA_TEST_CATEGORY(__is_bounded_array, int(*)[0], false);
 SA_TEST_CATEGORY(__is_bounded_array, int(&)[2], false);
+SA_TEST_CATEGORY(__is_bounded_array, int(&)[0], false);
 SA_TEST_FN(__is_bounded_array, int(&)[], false);
 
 // Sanity check.


[gcc/devel/omp/gcc-14] OpenMP: C front end support for "begin declare variant"

2025-02-10 Thread Sandra Loosemore via Gcc-cvs
https://gcc.gnu.org/g:f715700e7e999d52bb9944776e678c69ad63fb92

commit f715700e7e999d52bb9944776e678c69ad63fb92
Author: Sandra Loosemore 
Date:   Sun Feb 9 21:32:36 2025 +

OpenMP: C front end support for "begin declare variant"

gcc/c/ChangeLog
* c-decl.cc (current_omp_declare_variant_attribute): Define.
* c-lang.h (struct c_omp_declare_variant_attr): Declare.
(current_omp_declare_variant_attribute): Declare.
* c-parser.cc (c_parser_skip_to_pragma_omp_end_declare_variant): 
New.
(c_parser_translation_unit): Check for "omp begin declare variant"
with no matching "end".
(c_parser_declaration_or_fndef): Handle functions in "omp begin
declare variant" block.
(c_finish_omp_declare_variant): Merge context selectors with
surrounding "omp begin declare variant".
(JOIN_STR): Define.
(omp_start_variant_function): New.
(omp_finish_variant_function): New.
(c_parser_omp_begin): Handle "omp begin declare variant".
(c_parser_omp_end): Likewise.

Co-Authored-By: Julian Brown 

Diff:
---
 gcc/c/c-decl.cc   |   3 +
 gcc/c/c-lang.h|   8 ++
 gcc/c/c-parser.cc | 342 --
 3 files changed, 317 insertions(+), 36 deletions(-)

diff --git a/gcc/c/c-decl.cc b/gcc/c/c-decl.cc
index 97e0bd755745..03822c47fbc8 100644
--- a/gcc/c/c-decl.cc
+++ b/gcc/c/c-decl.cc
@@ -162,6 +162,9 @@ vec 
*current_omp_declare_target_attribute;
#pragma omp begin assumes ... #pragma omp end assumes regions
we are in.  */
 vec *current_omp_begin_assumes;
+
+/* Vector of "omp begin/end declare variant" blocks we are in.  */
+vec *current_omp_declare_variant_attribute;
 
 /* Each c_binding structure describes one binding of an identifier to
a decl.  All the decls in a scope - irrespective of namespace - are
diff --git a/gcc/c/c-lang.h b/gcc/c/c-lang.h
index e51264495fe3..1ca04e1b1064 100644
--- a/gcc/c/c-lang.h
+++ b/gcc/c/c-lang.h
@@ -70,6 +70,11 @@ struct GTY(()) c_omp_begin_assumes_data {
   bool attr_syntax;
 };
 
+struct GTY(()) c_omp_declare_variant_attr {
+  bool attr_syntax;
+  tree selector;
+};
+
 /* If non-empty, implicit "omp declare target" attribute is added into the
attribute lists.  */
 extern GTY(()) vec
@@ -78,5 +83,8 @@ extern GTY(()) vec
#pragma omp end assumes (and how many times when nested).  */
 extern GTY(()) vec
   *current_omp_begin_assumes;
+/* And similarly for #pragma omp begin/end declare variant.  */
+extern GTY(()) vec
+  *current_omp_declare_variant_attribute;
 
 #endif /* ! GCC_C_LANG_H */
diff --git a/gcc/c/c-parser.cc b/gcc/c/c-parser.cc
index 4ae1cd500ff3..1ba0f2485e98 100644
--- a/gcc/c/c-parser.cc
+++ b/gcc/c/c-parser.cc
@@ -1441,6 +1441,55 @@ c_parser_skip_to_pragma_eol (c_parser *parser, bool 
error_if_not_eol = true)
   parser->error = false;
 }
 
+/* Skip tokens up to and including "#pragma omp end declare variant".
+   Properly handle nested "#pragma omp begin declare variant" pragmas.  */
+static void
+c_parser_skip_to_pragma_omp_end_declare_variant (c_parser *parser)
+{
+  for (int depth = 0; depth >= 0; )
+{
+  c_token *token = c_parser_peek_token (parser);
+
+  switch (token->type)
+   {
+   case CPP_PRAGMA_EOL:
+ if (!parser->in_pragma)
+   break;
+ /* FALLTHRU */
+   case CPP_EOF:
+ /* If we've run out of tokens, stop.  */
+ return;
+
+   case CPP_PRAGMA:
+ if ((token->pragma_kind == PRAGMA_OMP_BEGIN
+  || token->pragma_kind == PRAGMA_OMP_END)
+ && c_parser_peek_nth_token (parser, 2)->type == CPP_NAME
+ && c_parser_peek_nth_token (parser, 3)->type == CPP_NAME)
+   {
+ tree id1 = c_parser_peek_nth_token (parser, 2)->value;
+ tree id2 = c_parser_peek_nth_token (parser, 3)->value;
+ if (strcmp (IDENTIFIER_POINTER (id1), "declare") == 0
+ && strcmp (IDENTIFIER_POINTER (id2), "variant") == 0)
+   {
+ if (token->pragma_kind == PRAGMA_OMP_BEGIN)
+   depth++;
+ else
+   depth--;
+   }
+   }
+ c_parser_consume_pragma (parser);
+ c_parser_skip_to_pragma_eol (parser, false);
+ continue;
+
+   default:
+ break;
+   }
+
+  /* Consume the token.  */
+  c_parser_consume_token (parser);
+}
+}
+
 /* Skip tokens until we have consumed an entire block, or until we
have consumed a non-nested ';'.  */
 
@@ -1949,6 +1998,13 @@ c_parser_translation_unit (c_parser *parser)
   "#pragma omp end declare target");
   vec_safe_truncate (current_omp_declare_target_attribute, 0);
 }
+  if (vec_safe_length (current_omp_declare_variant_attribute))
+{
+  if (!errorcount)
+   error ("% without corresponding "
+  

[gcc(refs/users/meissner/heads/work193)] Change TARGET_FPRND to TARGET_POWER5X.

2025-02-10 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:b6bac5b5f3abeb4736000845b930b0f4e3342ffd

commit b6bac5b5f3abeb4736000845b930b0f4e3342ffd
Author: Michael Meissner 
Date:   Mon Feb 10 13:21:55 2025 -0500

Change TARGET_FPRND to TARGET_POWER5X.

This patch changes TARGET_POWER5X to TARGET_POWER5.  The -mfprnd switch is 
not
being changed, just the name of the macros used to determine if the PowerPC
processor supports ISA 2.4 (Power5x).

2025-02-10  Michael Meissner  

gcc/

* gcc/config/rs6000/rs6000.cc (rs6000_option_override_internal):
Change TARGET_FPRND to TARGET_POWER5X.
* gcc/config/rs6000/rs6000.h (TARGET_POWERP5X): New macro.
* gcc/config/rs6000/rs6000.md (fmod3): Change TARGET_FPRND to
TARGET_POWER5X.
(remainder3): Likewise.
(fctiwuz_): Likewise.
(ceil2): Likewise.
(floor2): Likewise.
(round2): Likewise.

Diff:
---
 gcc/config/rs6000/rs6000.cc |  4 ++--
 gcc/config/rs6000/rs6000.h  |  1 +
 gcc/config/rs6000/rs6000.md | 14 +++---
 3 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 011ba7c899ec..07f5e58532a5 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -3924,7 +3924,7 @@ rs6000_option_override_internal (bool global_init_p)
 rs6000_isa_flags |= (ISA_2_5_MASKS_SERVER & ~ignore_masks);
   else if (TARGET_CMPB)
 rs6000_isa_flags |= (ISA_2_5_MASKS_EMBEDDED & ~ignore_masks);
-  else if (TARGET_FPRND)
+  else if (TARGET_POWER5X)
 rs6000_isa_flags |= (ISA_2_4_MASKS & ~ignore_masks);
   else if (TARGET_POWER5)
 rs6000_isa_flags |= (ISA_2_2_MASKS & ~ignore_masks);
@@ -3951,7 +3951,7 @@ rs6000_option_override_internal (bool global_init_p)
   rs6000_isa_flags &= ~OPTION_MASK_CRYPTO;
 }
 
-  if (!TARGET_FPRND && TARGET_VSX)
+  if (!TARGET_POWER5X && TARGET_VSX)
 {
   if (rs6000_isa_flags_explicit & OPTION_MASK_FPRND)
/* TARGET_VSX = 1 implies Power 7 and newer */
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index 5ff801c8801d..882a3864ca66 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -501,6 +501,7 @@ extern int rs6000_vector_align[];
 
 /* Convert ISA bits like POPCNTB to PowerPC processors like POWER5.  */
 #define TARGET_POWER5  TARGET_POPCNTB
+#define TARGET_POWER5X TARGET_FPRND
 
 /* In switching from using target_flags to using rs6000_isa_flags, the options
machinery creates OPTION_MASK_ instead of MASK_.  The MASK_
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index c5bd273be8b3..045ce22a03c8 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -5171,7 +5171,7 @@
(use (match_operand:SFDF 1 "gpc_reg_operand"))
(use (match_operand:SFDF 2 "gpc_reg_operand"))]
   "TARGET_HARD_FLOAT
-   && TARGET_FPRND
+   && TARGET_POWER5X
&& flag_unsafe_math_optimizations"
 {
   rtx div = gen_reg_rtx (mode);
@@ -5189,7 +5189,7 @@
(use (match_operand:SFDF 1 "gpc_reg_operand"))
(use (match_operand:SFDF 2 "gpc_reg_operand"))]
   "TARGET_HARD_FLOAT
-   && TARGET_FPRND
+   && TARGET_POWER5X
&& flag_unsafe_math_optimizations"
 {
   rtx div = gen_reg_rtx (mode);
@@ -6689,7 +6689,7 @@
 (define_insn "*friz"
   [(set (match_operand:DF 0 "gpc_reg_operand" "=d,wa")
(float:DF (fix:DI (match_operand:DF 1 "gpc_reg_operand" "d,wa"]
-  "TARGET_HARD_FLOAT && TARGET_FPRND
+  "TARGET_HARD_FLOAT && TARGET_POWER5X
&& flag_unsafe_math_optimizations && !flag_trapping_math && TARGET_FRIZ"
   "@
friz %0,%1
@@ -6817,7 +6817,7 @@
   [(set (match_operand:SFDF 0 "gpc_reg_operand" "=d,wa")
(unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "d,wa")]
 UNSPEC_FRIZ))]
-  "TARGET_HARD_FLOAT && TARGET_FPRND"
+  "TARGET_HARD_FLOAT && TARGET_POWER5X"
   "@
friz %0,%1
xsrdpiz %x0,%x1"
@@ -6827,7 +6827,7 @@
   [(set (match_operand:SFDF 0 "gpc_reg_operand" "=d,wa")
(unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "d,wa")]
 UNSPEC_FRIP))]
-  "TARGET_HARD_FLOAT && TARGET_FPRND"
+  "TARGET_HARD_FLOAT && TARGET_POWER5X"
   "@
frip %0,%1
xsrdpip %x0,%x1"
@@ -6837,7 +6837,7 @@
   [(set (match_operand:SFDF 0 "gpc_reg_operand" "=d,wa")
(unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "d,wa")]
 UNSPEC_FRIM))]
-  "TARGET_HARD_FLOAT && TARGET_FPRND"
+  "TARGET_HARD_FLOAT && TARGET_POWER5X"
   "@
frim %0,%1
xsrdpim %x0,%x1"
@@ -6848,7 +6848,7 @@
   [(set (match_operand:SFDF 0 "gpc_reg_operand" "=")
(unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "")]
 UNSPEC_FRIN))]
-  "TARGET_HARD_FLOAT && TARGET_FPRND"
+  "TARGET_HARD_FLOAT && TARGET_POWER5X"
   "frin %0,%1"
   [(set_attr "type" "fp")])


[gcc(refs/users/meissner/heads/work193)] Change TARGET_POPCNTB to TARGET_POWER5.

2025-02-10 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:ab2c7aa1d7750fc36cfd9591486d8473951924ee

commit ab2c7aa1d7750fc36cfd9591486d8473951924ee
Author: Michael Meissner 
Date:   Mon Feb 10 13:21:16 2025 -0500

Change TARGET_POPCNTB to TARGET_POWER5.

This patch changes TARGET_POPCNTB to TARGET_POWER5.  The -mpopcntb switch 
is not
being changed in this patch, just the name of the macros used to determine 
if
the PowerPC processor supports ISA 2.2 (Power5).

2025-02-10  Michael Meissner  

gcc/

* gcc/config/rs6000/rs6000-builtin.cc (rs6000_builtin_is_supported):
Change TARGET_POPCNTB to TARGET_POWER5.
* gcc/config/rs6000/rs6000.cc (rs6000_option_override_internal):
Likewise.
* gcc/config/rs6000/rs6000.h (TARGET_FCFID): Likewise.
(TARGET_POWER5): New macro.
(TARGET_EXTRA_BUILTINS): Change TARGET_POPCNTB to TARGET_POWER5.
(TARGET_FRE): Likewise.
(TARGET_FRSQRTES): Likewise.
* gcc/config/rs6000/rs6000.md (enabled attribute): Likewise.

Diff:
---
 gcc/config/rs6000/rs6000-builtin.cc |  2 +-
 gcc/config/rs6000/rs6000.cc |  2 +-
 gcc/config/rs6000/rs6000.h  | 11 +++
 gcc/config/rs6000/rs6000.md |  2 +-
 4 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-builtin.cc 
b/gcc/config/rs6000/rs6000-builtin.cc
index 111802381acb..4ed2bc1ca89e 100644
--- a/gcc/config/rs6000/rs6000-builtin.cc
+++ b/gcc/config/rs6000/rs6000-builtin.cc
@@ -155,7 +155,7 @@ rs6000_builtin_is_supported (enum rs6000_gen_builtins 
fncode)
 case ENB_ALWAYS:
   return true;
 case ENB_P5:
-  return TARGET_POPCNTB;
+  return TARGET_POWER5;
 case ENB_P6:
   return TARGET_CMPB;
 case ENB_P6_64:
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 675b039c2b65..011ba7c899ec 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -3926,7 +3926,7 @@ rs6000_option_override_internal (bool global_init_p)
 rs6000_isa_flags |= (ISA_2_5_MASKS_EMBEDDED & ~ignore_masks);
   else if (TARGET_FPRND)
 rs6000_isa_flags |= (ISA_2_4_MASKS & ~ignore_masks);
-  else if (TARGET_POPCNTB)
+  else if (TARGET_POWER5)
 rs6000_isa_flags |= (ISA_2_2_MASKS & ~ignore_masks);
   else if (TARGET_ALTIVEC)
 rs6000_isa_flags |= (OPTION_MASK_PPC_GFXOPT & ~ignore_masks);
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index a60d7a53cfaf..5ff801c8801d 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -448,7 +448,7 @@ extern int rs6000_vector_align[];
Enable 32-bit fcfid's on any of the switches for newer ISA machines.  */
 #define TARGET_FCFID   (TARGET_POWERPC64   \
 || TARGET_PPC_GPOPT/* 970/power4 */\
-|| TARGET_POPCNTB  /* ISA 2.02 */  \
+|| TARGET_POWER5   /* ISA 2.02 */  \
 || TARGET_CMPB /* ISA 2.05 */  \
 || TARGET_POPCNTD) /* ISA 2.06 */
 
@@ -499,6 +499,9 @@ extern int rs6000_vector_align[];
 #define TARGET_MINMAX  (TARGET_HARD_FLOAT && TARGET_PPC_GFXOPT \
 && (TARGET_P9_MINMAX || !flag_trapping_math))
 
+/* Convert ISA bits like POPCNTB to PowerPC processors like POWER5.  */
+#define TARGET_POWER5  TARGET_POPCNTB
+
 /* In switching from using target_flags to using rs6000_isa_flags, the options
machinery creates OPTION_MASK_ instead of MASK_.  The MASK_
options that have not yet been replaced by their OPTION_MASK_
@@ -525,7 +528,7 @@ extern int rs6000_vector_align[];
 
 #define TARGET_EXTRA_BUILTINS  (TARGET_POWERPC64\
 || TARGET_PPC_GPOPT /* 970/power4 */\
-|| TARGET_POPCNTB   /* ISA 2.02 */  \
+|| TARGET_POWER5/* ISA 2.02 */  \
 || TARGET_CMPB  /* ISA 2.05 */  \
 || TARGET_POPCNTD   /* ISA 2.06 */  \
 || TARGET_ALTIVEC   \
@@ -541,9 +544,9 @@ extern int rs6000_vector_align[];
 #define TARGET_FRES(TARGET_HARD_FLOAT && TARGET_PPC_GFXOPT)
 
 #define TARGET_FRE (TARGET_HARD_FLOAT \
-&& (TARGET_POPCNTB || VECTOR_UNIT_VSX_P (DFmode)))
+&& (TARGET_POWER5 || VECTOR_UNIT_VSX_P (DFmode)))
 
-#define TARGET_FRSQRTES(TARGET_HARD_FLOAT && TARGET_POPCNTB \
+#define TARGET_FRSQRTES(TARGET_HARD_FLOAT && TARGET_POWER5 \
 && TARGET_PPC_GFXOPT)
 
 #define TARGET_FRSQRTE (TARGET_HARD_FLOAT \
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 9c718ca2a226..c5bd273be8b3 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs

[gcc] Created branch 'meissner/heads/work193' in namespace 'refs/users'

2025-02-10 Thread Michael Meissner via Gcc-cvs
The branch 'meissner/heads/work193' was created in namespace 'refs/users' 
pointing to:

 4ce8ad684b90... [gcn] mkoffload.cc: Print fatal error if -march has no mult


[gcc(refs/users/meissner/heads/work193)] Change TARGET_POPCNTD to TARGET_POWER7.

2025-02-10 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:c71e1b6f2ea3590a3ce09a51e1bc2433177b8d48

commit c71e1b6f2ea3590a3ce09a51e1bc2433177b8d48
Author: Michael Meissner 
Date:   Mon Feb 10 13:23:19 2025 -0500

Change TARGET_POPCNTD to TARGET_POWER7.

This patch changes TARGET_POPCNTD to TARGET_POWER7.  The -mpopcntd switch 
is not
being changed, just the name of the macros used to determine if the PowerPC
processor supports ISA 2.6 (Power7).

2025-02-10  Michael Meissner  

gcc/

* gcc/config/rs6000/dfp.md (cmp_internal1): Change 
TARGET_POPCNTD
to TARGET_POWER7.
* gcc/config/rs6000/rs6000-builtin.cc (rs6000_builtin_is_supported):
Likewise.
* gcc/config/rs6000/rs6000-string.cc (expand_block_compare): 
Likewise.
* gcc/config/rs6000/rs6000.cc (rs6000_hard_regno_mode_ok_uncached):
Likewise.
(rs6000_option_override_internal): Likewise.
(rs6000_rtx_costs): Likewise.
* gcc/config/rs6000/rs6000.h (TARGET_LDBRX): Likewise.
(TARGET_FCFID): Likewise.
(TARGET_LFIWZX): Likewise.
(TARGET_FCFIDS): Likewise.
(TARGET_FCFIDU): Likewise.
(TARGET_FCFIDUS): Likewise.
(TARGET_FCTIDUZ): Likewise.
(TARGET_FCTIWUZ): Likewise.
(TARGET_FCTIDUZ): Likewise.
(TARGET_POWER7): New macro.
(TARGET_EXTRA_BUILTINS): Change TARGET_POPCNTD to TARGET_POWER7.
(CTZ_DEFINED_VALUE_AT_ZERO): Likewise.
* gcc/config/rs6000/rs6000.md (enabled attribute): Likewise.
(lrintsi2): Likewise.
(lrintsi): Likewise.
(lrintsi_di): Likewise.
(cmpmemsi): Likewise.
(bpermd_): Likewise.
(addg6s): Likewise.
(cdtbcd): Likewise.
(cbcdtd): Likewise.
(div_): Likewise.

Diff:
---
 gcc/config/rs6000/dfp.md|  2 +-
 gcc/config/rs6000/rs6000-builtin.cc |  4 ++--
 gcc/config/rs6000/rs6000-string.cc  |  2 +-
 gcc/config/rs6000/rs6000.cc |  8 
 gcc/config/rs6000/rs6000.h  | 21 +++--
 gcc/config/rs6000/rs6000.md | 20 ++--
 6 files changed, 29 insertions(+), 28 deletions(-)

diff --git a/gcc/config/rs6000/dfp.md b/gcc/config/rs6000/dfp.md
index 59fa66ae15c8..5919149682b2 100644
--- a/gcc/config/rs6000/dfp.md
+++ b/gcc/config/rs6000/dfp.md
@@ -214,7 +214,7 @@
 (define_insn "floatdidd2"
   [(set (match_operand:DD 0 "gpc_reg_operand" "=d")
(float:DD (match_operand:DI 1 "gpc_reg_operand" "d")))]
-  "TARGET_DFP && TARGET_POPCNTD"
+  "TARGET_DFP && TARGET_POWER7"
   "dcffix %0,%1"
   [(set_attr "type" "dfp")])
 
diff --git a/gcc/config/rs6000/rs6000-builtin.cc 
b/gcc/config/rs6000/rs6000-builtin.cc
index dbb8520ab039..2366b2aee00a 100644
--- a/gcc/config/rs6000/rs6000-builtin.cc
+++ b/gcc/config/rs6000/rs6000-builtin.cc
@@ -161,9 +161,9 @@ rs6000_builtin_is_supported (enum rs6000_gen_builtins 
fncode)
 case ENB_P6_64:
   return TARGET_POWER6 && TARGET_POWERPC64;
 case ENB_P7:
-  return TARGET_POPCNTD;
+  return TARGET_POWER7;
 case ENB_P7_64:
-  return TARGET_POPCNTD && TARGET_POWERPC64;
+  return TARGET_POWER7 && TARGET_POWERPC64;
 case ENB_P8:
   return TARGET_POWER8;
 case ENB_P8V:
diff --git a/gcc/config/rs6000/rs6000-string.cc 
b/gcc/config/rs6000/rs6000-string.cc
index 3d2911ca08a0..703f77fa0bf1 100644
--- a/gcc/config/rs6000/rs6000-string.cc
+++ b/gcc/config/rs6000/rs6000-string.cc
@@ -1949,7 +1949,7 @@ bool
 expand_block_compare (rtx operands[])
 {
   /* TARGET_POPCNTD is already guarded at expand cmpmemsi.  */
-  gcc_assert (TARGET_POPCNTD);
+  gcc_assert (TARGET_POWER7);
 
   /* For P8, this case is complicated to handle because the subtract
  with carry instructions do not generate the 64-bit carry and so
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index d2814364b3d0..1bba77244c25 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -1924,7 +1924,7 @@ rs6000_hard_regno_mode_ok_uncached (int regno, 
machine_mode mode)
  if(GET_MODE_SIZE (mode) == UNITS_PER_FP_WORD)
return 1;
 
- if (TARGET_POPCNTD && mode == SImode)
+ if (TARGET_POWER7 && mode == SImode)
return 1;
 
  if (TARGET_P9_VECTOR && (mode == QImode || mode == HImode))
@@ -3918,7 +3918,7 @@ rs6000_option_override_internal (bool global_init_p)
 rs6000_isa_flags |= (ISA_2_7_MASKS_SERVER & ~ignore_masks);
   else if (TARGET_VSX)
 rs6000_isa_flags |= (ISA_2_6_MASKS_SERVER & ~ignore_masks);
-  else if (TARGET_POPCNTD)
+  else if (TARGET_POWER7)
 rs6000_isa_flags |= (ISA_2_6_MASKS_EMBEDDED & ~ignore_masks);
   else if (TARGET_DFP)
 rs6000_isa_flags |= (ISA_2_5_MASKS_SERVER & ~ignore_masks);
@@ -4131,7 +4131,7 @@ rs6000_option_override_internal (bool global_init_p)
   else if (TARGET_LONG_DOUBLE_128)
 

[gcc/devel/omp/gcc-14] OpenMP: Pass a 3-way flag to omp_check_context_selector instead of a bool.

2025-02-10 Thread Sandra Loosemore via Gcc-cvs
https://gcc.gnu.org/g:3b7d4cce6bb46e7cdbdd079b25884c7ba4ca09b8

commit 3b7d4cce6bb46e7cdbdd079b25884c7ba4ca09b8
Author: Sandra Loosemore 
Date:   Sun Feb 9 21:32:35 2025 +

OpenMP: Pass a 3-way flag to omp_check_context_selector instead of a bool.

The OpenMP "begin declare variant" directive has slightly different
requirements for context selectors than regular "declare variant", so
something more than a bool is required to tell the error-checking routine
what to check.

gcc/ChangeLog
* omp-general.cc (omp_check_context_selector): Change
metadirective_p argument to a 3-way flag.  Add extra check for
OMP_CTX_BEGIN_DECLARE_VARIANT.
* omp-general.h (enum omp_ctx_directive): New.
(omp_check_context_selector): Adjust declaration.

gcc/c/ChangeLog
* c-parser.cc (c_finish_omp_declare_variant): Update call to
omp_check_context_selector.
(c_parser_omp_metadirective): Likewise.

gcc/cp/ChangeLog
* parser.cc (cp_finish_omp_declare_variant): Update call to
omp_check_context_selector.
(cp_parser_omp_metadirective): Likewise.

gcc/fortran/ChangeLog
* trans-openmp.cc (gfc_trans_omp_declare_variant): Update call to
omp_check_context_selector.
(gfc_trans_omp_metadirective): Likewise.

Diff:
---
 gcc/c/c-parser.cc   |  6 --
 gcc/cp/parser.cc|  6 --
 gcc/fortran/trans-openmp.cc |  5 +++--
 gcc/omp-general.cc  | 19 ---
 gcc/omp-general.h   |  6 +-
 5 files changed, 32 insertions(+), 10 deletions(-)

diff --git a/gcc/c/c-parser.cc b/gcc/c/c-parser.cc
index 1d17ec7802d9..4ae1cd500ff3 100644
--- a/gcc/c/c-parser.cc
+++ b/gcc/c/c-parser.cc
@@ -26702,7 +26702,8 @@ c_finish_omp_declare_variant (c_parser *parser, tree 
fndecl, tree parms)
  ctx  = c_parser_omp_context_selector_specification (parser, parms);
  if (ctx == error_mark_node)
goto fail;
- ctx = omp_check_context_selector (match_loc, ctx, false);
+ ctx = omp_check_context_selector (match_loc, ctx,
+   OMP_CTX_DECLARE_VARIANT);
  if (ctx != error_mark_node && variant != error_mark_node)
{
  if (TREE_CODE (variant) != FUNCTION_DECL)
@@ -28956,7 +28957,8 @@ c_parser_omp_metadirective (c_parser *parser, bool 
*if_p)
 NULL_TREE);
  if (ctx == error_mark_node)
goto error;
- ctx = omp_check_context_selector (match_loc, ctx, true);
+ ctx = omp_check_context_selector (match_loc, ctx,
+   OMP_CTX_METADIRECTIVE);
  if (ctx == error_mark_node)
goto error;
 
diff --git a/gcc/cp/parser.cc b/gcc/cp/parser.cc
index 5d28c8986516..36a093a1b0d1 100644
--- a/gcc/cp/parser.cc
+++ b/gcc/cp/parser.cc
@@ -50439,7 +50439,8 @@ cp_finish_omp_declare_variant (cp_parser *parser, 
cp_token *pragma_tok,
  ctx = cp_parser_omp_context_selector_specification (parser, true);
  if (ctx == error_mark_node)
goto fail;
- ctx = omp_check_context_selector (match_loc, ctx, false);
+ ctx = omp_check_context_selector (match_loc, ctx,
+   OMP_CTX_DECLARE_VARIANT);
  if (ctx != error_mark_node && variant != error_mark_node)
{
  tree match_loc_node
@@ -51419,7 +51420,8 @@ cp_parser_omp_metadirective (cp_parser *parser, 
cp_token *pragma_tok,
  ctx = cp_parser_omp_context_selector_specification (parser, false);
  if (ctx == error_mark_node)
goto fail;
- ctx = omp_check_context_selector (match_loc, ctx, true);
+ ctx = omp_check_context_selector (match_loc, ctx,
+   OMP_CTX_METADIRECTIVE);
  if (ctx == error_mark_node)
goto fail;
 
diff --git a/gcc/fortran/trans-openmp.cc b/gcc/fortran/trans-openmp.cc
index f4a338078ebd..e79794d1859e 100644
--- a/gcc/fortran/trans-openmp.cc
+++ b/gcc/fortran/trans-openmp.cc
@@ -11320,7 +11320,8 @@ gfc_trans_omp_declare_variant (gfc_namespace *ns)
  continue;
}
   set_selectors = omp_check_context_selector
-   (gfc_get_location (&odv->where), set_selectors, false);
+   (gfc_get_location (&odv->where), set_selectors,
+OMP_CTX_DECLARE_VARIANT);
   if (set_selectors != error_mark_node)
{
  if (!variant_proc_sym->attr.implicit_type
@@ -11736,7 +11737,7 @@ gfc_trans_omp_metadirective (gfc_code *code)
   tree ctx = gfc_trans_omp_set_selector (variant->selectors,
 variant->where);
   ctx = omp_check_context_selector (gfc_get_location (&variant->where),
-   ctx, true);
+

[gcc(refs/vendors/redhat/heads/gcc-14-branch)] Merge commit 'r14-11298-g7cb022442444833796b384684afef14d0b478941' into redhat/gcc-14-branch

2025-02-10 Thread Jakub Jelinek via Libstdc++-cvs
https://gcc.gnu.org/g:c45fb1d26a522a6e06a9e2d8403284e4436fc965

commit c45fb1d26a522a6e06a9e2d8403284e4436fc965
Merge: e525669e462d 7cb022442444
Author: Jakub Jelinek 
Date:   Mon Feb 10 19:23:53 2025 +0100

Merge commit 'r14-11298-g7cb022442444833796b384684afef14d0b478941' into 
redhat/gcc-14-branch

Diff:

 gcc/ChangeLog  | 181 +++
 gcc/DATESTAMP  |   2 +-
 gcc/ada/ChangeLog  |   6 +
 gcc/ada/sem_warn.adb   |   4 +
 gcc/asan.cc|  37 +-
 gcc/auto-profile.cc|   4 +-
 gcc/builtins.cc|  10 +-
 gcc/combine.cc |   2 +-
 gcc/config/aarch64/aarch64-cores.def   |   2 +-
 gcc/config/aarch64/aarch64-early-ra.cc |  51 +-
 gcc/config/aarch64/aarch64.h   |   2 +-
 gcc/config/aarch64/driver-aarch64.cc   |  52 ++-
 gcc/config/arm/t-rtems |   5 +-
 gcc/config/avr/avr-modes.def   |   1 +
 gcc/config/avr/avr.cc  |  12 +-
 gcc/config/i386/i386.md|  12 +-
 gcc/config/i386/sse.md |  18 +-
 gcc/config/loongarch/loongarch-builtins.cc |   7 +-
 gcc/config/pa/pa32-regs.h  |   2 +-
 gcc/config/rs6000/rs6000-builtin.cc|  10 +-
 gcc/config/rs6000/rs6000-builtins.def  |   4 +-
 gcc/cp/ChangeLog   |  85 
 gcc/cp/constexpr.cc|   3 +-
 gcc/cp/constraint.cc   |  18 +-
 gcc/cp/cp-tree.h   |   3 +-
 gcc/cp/decl.cc |   1 +
 gcc/cp/decl2.cc|   5 +-
 gcc/cp/init.cc |  18 +-
 gcc/cp/module.cc   |  19 +-
 gcc/cp/name-lookup.cc  |   5 +-
 gcc/cp/parser.cc   |  11 +-
 gcc/cp/semantics.cc|   9 +-
 gcc/cp/typeck.cc   |   7 +-
 gcc/cp/typeck2.cc  |   8 +-
 gcc/d/ChangeLog|  29 ++
 gcc/d/Make-lang.in |   4 +-
 gcc/d/dmd/enumsem.d|   2 +-
 gcc/d/dmd/expressionsem.d  |   6 +-
 gcc/d/expr.cc  |   4 +-
 gcc/d/typeinfo.cc  |  19 +-
 gcc/fortran/ChangeLog  |  85 
 gcc/fortran/class.cc   |   2 +-
 gcc/fortran/decl.cc|  19 +-
 gcc/fortran/frontend-passes.cc |   7 +
 gcc/fortran/interface.cc   |   9 +-
 gcc/fortran/resolve.cc |  11 +-
 gcc/fortran/trans-array.cc |  10 +-
 gcc/fortran/trans-expr.cc  |  27 +-
 gcc/gimple-match-exports.cc|  26 +-
 gcc/ipa-icf-gimple.cc  |  53 ++-
 gcc/loop-iv.cc |   1 +
 gcc/match.pd   |   2 +-
 gcc/optc-save-gen.awk  |   5 +
 gcc/po/ChangeLog   |   4 +
 gcc/po/zh_CN.po| 520 ++---
 gcc/rtl.h  |   3 +-
 gcc/simplify-rtx.cc| 285 +++
 gcc/testsuite/ChangeLog| 296 
 gcc/testsuite/c-c++-common/cpp/pr115913.c  |   7 +
 gcc/testsuite/g++.dg/asan/pr118763.C   |  15 +
 gcc/testsuite/g++.dg/cpp0x/constexpr-union9.C  |  16 +
 gcc/testsuite/g++.dg/cpp0x/constexpr-volatile4.C   |  20 +
 gcc/testsuite/g++.dg/cpp0x/nsdmi-defer7.C  |  13 +
 gcc/testsuite/g++.dg/cpp0x/nsdmi-list10.C  |  35 ++
 gcc/testsuite/g++.dg/cpp0x/nsdmi-list9.C   |  34 ++
 .../g++.dg/cpp23/class-deduction-inherited7.C  |  12 +
 .../g++.dg/cpp26/name-independent-decl10.C |  63 +++
 .../g++.dg/cpp26/name-independent-decl9.C  |  49 ++
 .../g++.dg/cpp2a/concepts-explicit-spec7.C |  30 ++
 gcc/testsuite/g++.dg/cpp2a/constexpr-117775.C  |  13 +
 gcc/testsuite/g++.dg/expr/pmf-4.C  |  22 +
 gcc/testsuite/g++.dg/ext/is_bounded_array.C|  14 +
 gcc/testsuite/g++.dg/init/array66.C|  33 ++
 gcc/testsuite/g++.dg/lookup/pr99116-1.C|   2 +-
 gcc/testsuite/g++.dg/modules/auto-5_a.C|  10 +
 gcc/testsuite/g++.dg/modules/auto-5_b.C|  14 +
 gcc/testsuite/g++

[gcc(refs/users/meissner/heads/work193)] Change TARGET_CMPB to TARGET_POWER6.

2025-02-10 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:4f658772e8aaf0cc9dc5fe03b8dcba01c2f37a8e

commit 4f658772e8aaf0cc9dc5fe03b8dcba01c2f37a8e
Author: Michael Meissner 
Date:   Mon Feb 10 13:22:41 2025 -0500

Change TARGET_CMPB to TARGET_POWER6.

This patch changes TARGET_CMPB to TARGET_POWER6.  The -mcmpb switch is not 
being
changed, just the name of the macros used to determine if the PowerPC 
processor
supports ISA 2.5 (Power6).

2025-02-10  Michael Meissner  

gcc/

* gcc/config/rs6000/rs6000-builtin.cc (rs6000_builtin_is_supported):
Change TARGET_CMPB to TARGET_POWER6.
* gcc/config/rs6000/rs6000.cc (rs6000_option_override_internal):
Likewise.
(rs6000_rtx_costs): Likewise.
(rs6000_emit_parity): Likewise.
* gcc/config/rs6000/rs6000.h (TARGET_FCFID): Likewise.
(TARGET_LFIWAX): Likewise.
(TARGET_POWER6): New macro.
(TARGET_EXTRA_BUILTINS): Change TARGET_CMPB to TARGET_POWER6.
* gcc/config/rs6000/rs6000.md (enabled attribute): Likewise.
(parity2_cmp): Likewise.
(cmpb3): Likewise.
(copysign3): Likewise.
(copysign3_fcpsgn): Likewise.
(cmpstrnsi): Likewise.
(cmpstrsi): Likewise.

Diff:
---
 gcc/config/rs6000/rs6000-builtin.cc |  4 ++--
 gcc/config/rs6000/rs6000.cc |  8 
 gcc/config/rs6000/rs6000.h  |  7 ---
 gcc/config/rs6000/rs6000.md | 16 
 4 files changed, 18 insertions(+), 17 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-builtin.cc 
b/gcc/config/rs6000/rs6000-builtin.cc
index 4ed2bc1ca89e..dbb8520ab039 100644
--- a/gcc/config/rs6000/rs6000-builtin.cc
+++ b/gcc/config/rs6000/rs6000-builtin.cc
@@ -157,9 +157,9 @@ rs6000_builtin_is_supported (enum rs6000_gen_builtins 
fncode)
 case ENB_P5:
   return TARGET_POWER5;
 case ENB_P6:
-  return TARGET_CMPB;
+  return TARGET_POWER6;
 case ENB_P6_64:
-  return TARGET_CMPB && TARGET_POWERPC64;
+  return TARGET_POWER6 && TARGET_POWERPC64;
 case ENB_P7:
   return TARGET_POPCNTD;
 case ENB_P7_64:
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 07f5e58532a5..d2814364b3d0 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -3922,7 +3922,7 @@ rs6000_option_override_internal (bool global_init_p)
 rs6000_isa_flags |= (ISA_2_6_MASKS_EMBEDDED & ~ignore_masks);
   else if (TARGET_DFP)
 rs6000_isa_flags |= (ISA_2_5_MASKS_SERVER & ~ignore_masks);
-  else if (TARGET_CMPB)
+  else if (TARGET_POWER6)
 rs6000_isa_flags |= (ISA_2_5_MASKS_EMBEDDED & ~ignore_masks);
   else if (TARGET_POWER5X)
 rs6000_isa_flags |= (ISA_2_4_MASKS & ~ignore_masks);
@@ -4797,7 +4797,7 @@ rs6000_option_override_internal (bool global_init_p)
  DERAT mispredict penalty.  However the LVE and STVE altivec instructions
  need indexed accesses and the type used is the scalar type of the element
  being loaded or stored.  */
-TARGET_AVOID_XFORM = (rs6000_tune == PROCESSOR_POWER6 && TARGET_CMPB
+TARGET_AVOID_XFORM = (rs6000_tune == PROCESSOR_POWER6 && TARGET_POWER6
  && !TARGET_ALTIVEC);
 
   /* Set the -mrecip options.  */
@@ -22377,7 +22377,7 @@ rs6000_rtx_costs (rtx x, machine_mode mode, int 
outer_code,
   return false;
 
 case PARITY:
-  *total = COSTS_N_INSNS (TARGET_CMPB ? 2 : 6);
+  *total = COSTS_N_INSNS (TARGET_POWER6 ? 2 : 6);
   return false;
 
 case NOT:
@@ -23204,7 +23204,7 @@ rs6000_emit_parity (rtx dst, rtx src)
   tmp = gen_reg_rtx (mode);
 
   /* Use the PPC ISA 2.05 prtyw/prtyd instruction if we can.  */
-  if (TARGET_CMPB)
+  if (TARGET_POWER6)
 {
   if (mode == SImode)
{
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index 882a3864ca66..62e1662d078a 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -449,12 +449,12 @@ extern int rs6000_vector_align[];
 #define TARGET_FCFID   (TARGET_POWERPC64   \
 || TARGET_PPC_GPOPT/* 970/power4 */\
 || TARGET_POWER5   /* ISA 2.02 */  \
-|| TARGET_CMPB /* ISA 2.05 */  \
+|| TARGET_POWER6   /* ISA 2.05 */  \
 || TARGET_POPCNTD) /* ISA 2.06 */
 
 #define TARGET_FCTIDZ  TARGET_FCFID
 #define TARGET_STFIWX  TARGET_PPC_GFXOPT
-#define TARGET_LFIWAX  TARGET_CMPB
+#define TARGET_LFIWAX  TARGET_POWER6
 #define TARGET_LFIWZX  TARGET_POPCNTD
 #define TARGET_FCFIDS  TARGET_POPCNTD
 #define TARGET_FCFIDU  TARGET_POPCNTD
@@ -502,6 +502,7 @@ extern int rs6000_vector_align[];
 /* Convert ISA bits like POPCNTB to PowerPC processors like POWER5.  */
 #define TARGET_POWER5  TARGET_POPCNTB
 #define TARGET_POWER5X TARGET_FPRND
+#define TARGET_POWER6

[gcc(refs/users/mikael/heads/refactor_descriptor_v01)] Introduction getters et setters descriptor compil' OK

2025-02-10 Thread Mikael Morin via Gcc-cvs
https://gcc.gnu.org/g:3938b3761bc36b0dda67e2233558444effa25fdf

commit 3938b3761bc36b0dda67e2233558444effa25fdf
Author: Mikael Morin 
Date:   Mon Feb 10 19:24:59 2025 +0100

Introduction getters et setters descriptor compil' OK

Diff:
---
 gcc/fortran/trans-array.cc | 779 ++---
 gcc/fortran/trans-array.h  |  25 +-
 gcc/fortran/trans-decl.cc  |   8 +-
 gcc/fortran/trans-expr.cc  |  66 ++--
 gcc/fortran/trans-intrinsic.cc |  63 ++--
 gcc/fortran/trans-openmp.cc|   2 +-
 gcc/fortran/trans-stmt.cc  |   7 +-
 gcc/fortran/trans.cc   |   7 +-
 8 files changed, 647 insertions(+), 310 deletions(-)

diff --git a/gcc/fortran/trans-array.cc b/gcc/fortran/trans-array.cc
index e60204ae3ee2..5e9f487615d3 100644
--- a/gcc/fortran/trans-array.cc
+++ b/gcc/fortran/trans-array.cc
@@ -242,8 +242,15 @@ gfc_get_cfi_dim_sm (tree desc, tree idx)
 #define LBOUND_SUBFIELD 1
 #define UBOUND_SUBFIELD 2
 
-static tree
-gfc_get_descriptor_field (tree desc, unsigned field_idx)
+
+namespace gfc_descriptor
+{
+
+namespace
+{
+
+tree
+get_field (tree desc, unsigned field_idx)
 {
   tree type = TREE_TYPE (desc);
   gcc_assert (GFC_DESCRIPTOR_TYPE_P (type));
@@ -255,107 +262,107 @@ gfc_get_descriptor_field (tree desc, unsigned field_idx)
  desc, field, NULL_TREE);
 }
 
-/* This provides READ-ONLY access to the data field.  The field itself
-   doesn't have the proper type.  */
+tree
+get_data (tree desc)
+{
+  return get_field (desc, DATA_FIELD);
+}
 
 tree
-gfc_conv_descriptor_data_get (tree desc)
+conv_data_get (tree desc)
 {
   tree type = TREE_TYPE (desc);
-  if (TREE_CODE (type) == REFERENCE_TYPE)
-gcc_unreachable ();
+  gcc_assert (TREE_CODE (type) != REFERENCE_TYPE);
 
-  tree field = gfc_get_descriptor_field (desc, DATA_FIELD);
-  return fold_convert (GFC_TYPE_ARRAY_DATAPTR_TYPE (type), field);
+  tree field = get_data (desc);
+  tree t = fold_convert (GFC_TYPE_ARRAY_DATAPTR_TYPE (type), field);
+  return non_lvalue_loc (input_location, t);
 }
 
-/* This provides WRITE access to the data field.
-
-   TUPLES_P is true if we are generating tuples.
-
-   This function gets called through the following macros:
- gfc_conv_descriptor_data_set
- gfc_conv_descriptor_data_set.  */
-
 void
-gfc_conv_descriptor_data_set (stmtblock_t *block, tree desc, tree value)
+conv_data_set (stmtblock_t *block, tree desc, tree value)
 {
-  tree field = gfc_get_descriptor_field (desc, DATA_FIELD);
+  tree field = get_data (desc);
   gfc_add_modify (block, field, fold_convert (TREE_TYPE (field), value));
 }
 
-
-/* This provides address access to the data field.  This should only be
-   used by array allocation, passing this on to the runtime.  */
-
 tree
-gfc_conv_descriptor_data_addr (tree desc)
+conv_data_addr (tree desc)
 {
-  tree field = gfc_get_descriptor_field (desc, DATA_FIELD);
+  tree field = get_data (desc);
   return gfc_build_addr_expr (NULL_TREE, field);
 }
 
-static tree
-gfc_conv_descriptor_offset (tree desc)
+tree
+get_offset (tree desc)
 {
-  tree field = gfc_get_descriptor_field (desc, OFFSET_FIELD);
+  tree field = get_field (desc, OFFSET_FIELD);
   gcc_assert (TREE_TYPE (field) == gfc_array_index_type);
   return field;
 }
 
 tree
-gfc_conv_descriptor_offset_get (tree desc)
+conv_offset_get (tree desc)
 {
-  return gfc_conv_descriptor_offset (desc);
+  return non_lvalue_loc (input_location, get_offset (desc));
 }
 
 void
-gfc_conv_descriptor_offset_set (stmtblock_t *block, tree desc,
-   tree value)
+conv_offset_set (stmtblock_t *block, tree desc, tree value)
 {
-  tree t = gfc_conv_descriptor_offset (desc);
+  tree t = get_offset (desc);
   gfc_add_modify (block, t, fold_convert (TREE_TYPE (t), value));
 }
 
-
 tree
-gfc_conv_descriptor_dtype (tree desc)
+get_dtype (tree desc)
 {
-  tree field = gfc_get_descriptor_field (desc, DTYPE_FIELD);
+  tree field = get_field (desc, DTYPE_FIELD);
   gcc_assert (TREE_TYPE (field) == get_dtype_type_node ());
   return field;
 }
 
-static tree
-gfc_conv_descriptor_span (tree desc)
+tree
+conv_dtype_get (tree desc)
 {
-  tree field = gfc_get_descriptor_field (desc, SPAN_FIELD);
+  return non_lvalue_loc (input_location, get_dtype (desc));
+}
+
+void
+conv_dtype_set (stmtblock_t *block, tree desc, tree val)
+{
+  tree t = get_dtype (desc);
+  gfc_add_modify (block, t, val);
+}
+
+tree
+get_span (tree desc)
+{
+  tree field = get_field (desc, SPAN_FIELD);
   gcc_assert (TREE_TYPE (field) == gfc_array_index_type);
   return field;
 }
 
 tree
-gfc_conv_descriptor_span_get (tree desc)
+conv_span_get (tree desc)
 {
-  return gfc_conv_descriptor_span (desc);
+  return non_lvalue_loc (input_location, get_span (desc));
 }
 
 void
-gfc_conv_descriptor_span_set (stmtblock_t *block, tree desc,
-   tree value)
+conv_span_set (stmtblock_t *block, tree desc, tree value)
 {
-  tree t = gfc_conv_descriptor_span (desc);
+  tree t = get_span (desc);
   gfc_ad

[gcc(refs/users/meissner/heads/work193-test)] Add ChangeLog.test and update REVISION.

2025-02-10 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:07ce8e18f65d56d3e6660ea2e8be94c911643f68

commit 07ce8e18f65d56d3e6660ea2e8be94c911643f68
Author: Michael Meissner 
Date:   Mon Feb 10 13:16:51 2025 -0500

Add ChangeLog.test and update REVISION.

2025-02-10  Michael Meissner  

gcc/

* ChangeLog.test: New file for branch.
* REVISION: Update.

Diff:
---
 gcc/ChangeLog.test | 5 +
 gcc/REVISION   | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog.test b/gcc/ChangeLog.test
new file mode 100644
index ..1aa763f78d87
--- /dev/null
+++ b/gcc/ChangeLog.test
@@ -0,0 +1,5 @@
+ Branch work193-test, baseline 
+
+2025-02-10   Michael Meissner  
+
+   Clone branch
diff --git a/gcc/REVISION b/gcc/REVISION
index 721d4c312fe3..8d47aa64eb1f 100644
--- a/gcc/REVISION
+++ b/gcc/REVISION
@@ -1 +1 @@
-work193 branch
+work193-test branch


[gcc] Created branch 'meissner/heads/work193-bugs' in namespace 'refs/users'

2025-02-10 Thread Michael Meissner via Gcc-cvs
The branch 'meissner/heads/work193-bugs' was created in namespace 'refs/users' 
pointing to:

 637f35ea9880... Add ChangeLog.meissner and REVISION.


[gcc(refs/users/meissner/heads/work193)] Add ChangeLog.meissner and REVISION.

2025-02-10 Thread Michael Meissner via Libstdc++-cvs
https://gcc.gnu.org/g:637f35ea98808fdc4e4e0afd4ad82725c192de3c

commit 637f35ea98808fdc4e4e0afd4ad82725c192de3c
Author: Michael Meissner 
Date:   Mon Feb 10 13:11:49 2025 -0500

Add ChangeLog.meissner and REVISION.

2025-02-10  Michael Meissner  

gcc/

* REVISION: New file for branch.
* ChangeLog.meissner: New file.

gcc/c-family/

* ChangeLog.meissner: New file.

gcc/c/

* ChangeLog.meissner: New file.

gcc/cp/

* ChangeLog.meissner: New file.

gcc/fortran/

* ChangeLog.meissner: New file.

gcc/testsuite/

* ChangeLog.meissner: New file.

libgcc/

* ChangeLog.meissner: New file.

Diff:
---
 gcc/ChangeLog.meissner   | 5 +
 gcc/REVISION | 1 +
 gcc/c-family/ChangeLog.meissner  | 5 +
 gcc/c/ChangeLog.meissner | 5 +
 gcc/cp/ChangeLog.meissner| 5 +
 gcc/fortran/ChangeLog.meissner   | 5 +
 gcc/testsuite/ChangeLog.meissner | 5 +
 libgcc/ChangeLog.meissner| 5 +
 libstdc++-v3/ChangeLog.meissner  | 5 +
 9 files changed, 41 insertions(+)

diff --git a/gcc/ChangeLog.meissner b/gcc/ChangeLog.meissner
new file mode 100644
index ..ce6974218485
--- /dev/null
+++ b/gcc/ChangeLog.meissner
@@ -0,0 +1,5 @@
+ Branch work193, baseline 
+
+2025-02-10   Michael Meissner  
+
+   Clone branch
diff --git a/gcc/REVISION b/gcc/REVISION
new file mode 100644
index ..721d4c312fe3
--- /dev/null
+++ b/gcc/REVISION
@@ -0,0 +1 @@
+work193 branch
diff --git a/gcc/c-family/ChangeLog.meissner b/gcc/c-family/ChangeLog.meissner
new file mode 100644
index ..ce6974218485
--- /dev/null
+++ b/gcc/c-family/ChangeLog.meissner
@@ -0,0 +1,5 @@
+ Branch work193, baseline 
+
+2025-02-10   Michael Meissner  
+
+   Clone branch
diff --git a/gcc/c/ChangeLog.meissner b/gcc/c/ChangeLog.meissner
new file mode 100644
index ..ce6974218485
--- /dev/null
+++ b/gcc/c/ChangeLog.meissner
@@ -0,0 +1,5 @@
+ Branch work193, baseline 
+
+2025-02-10   Michael Meissner  
+
+   Clone branch
diff --git a/gcc/cp/ChangeLog.meissner b/gcc/cp/ChangeLog.meissner
new file mode 100644
index ..ce6974218485
--- /dev/null
+++ b/gcc/cp/ChangeLog.meissner
@@ -0,0 +1,5 @@
+ Branch work193, baseline 
+
+2025-02-10   Michael Meissner  
+
+   Clone branch
diff --git a/gcc/fortran/ChangeLog.meissner b/gcc/fortran/ChangeLog.meissner
new file mode 100644
index ..ce6974218485
--- /dev/null
+++ b/gcc/fortran/ChangeLog.meissner
@@ -0,0 +1,5 @@
+ Branch work193, baseline 
+
+2025-02-10   Michael Meissner  
+
+   Clone branch
diff --git a/gcc/testsuite/ChangeLog.meissner b/gcc/testsuite/ChangeLog.meissner
new file mode 100644
index ..ce6974218485
--- /dev/null
+++ b/gcc/testsuite/ChangeLog.meissner
@@ -0,0 +1,5 @@
+ Branch work193, baseline 
+
+2025-02-10   Michael Meissner  
+
+   Clone branch
diff --git a/libgcc/ChangeLog.meissner b/libgcc/ChangeLog.meissner
new file mode 100644
index ..ce6974218485
--- /dev/null
+++ b/libgcc/ChangeLog.meissner
@@ -0,0 +1,5 @@
+ Branch work193, baseline 
+
+2025-02-10   Michael Meissner  
+
+   Clone branch
diff --git a/libstdc++-v3/ChangeLog.meissner b/libstdc++-v3/ChangeLog.meissner
new file mode 100644
index ..ce6974218485
--- /dev/null
+++ b/libstdc++-v3/ChangeLog.meissner
@@ -0,0 +1,5 @@
+ Branch work193, baseline 
+
+2025-02-10   Michael Meissner  
+
+   Clone branch


[gcc/devel/omp/gcc-14] OpenMP: C/C++ common testcases for "omp begin declare variant"

2025-02-10 Thread Sandra Loosemore via Gcc-cvs
https://gcc.gnu.org/g:6fce0314a9a14ec3928110baca58d3aa364e6068

commit 6fce0314a9a14ec3928110baca58d3aa364e6068
Author: Sandra Loosemore 
Date:   Sun Feb 9 21:32:36 2025 +

OpenMP: C/C++ common testcases for "omp begin declare variant"

gcc/testsuite/ChangeLog
* c-c++-common/gomp/delim-declare-variant-1.c: New.
* c-c++-common/gomp/delim-declare-variant-2.c: New.
* c-c++-common/gomp/delim-declare-variant-3.c: New.
* c-c++-common/gomp/delim-declare-variant-4.c: New.
* c-c++-common/gomp/delim-declare-variant-5.c: New.
* c-c++-common/gomp/delim-declare-variant-6.c: New.
* c-c++-common/gomp/delim-declare-variant-7.c: New.

libgomp/ChangeLog
* testsuite/libgomp.c-c++-common/delim-declare-variant-1.c: New.

Diff:
---
 .../c-c++-common/gomp/delim-declare-variant-1.c| 55 +
 .../c-c++-common/gomp/delim-declare-variant-2.c| 66 
 .../c-c++-common/gomp/delim-declare-variant-3.c| 50 +++
 .../c-c++-common/gomp/delim-declare-variant-4.c| 31 ++
 .../c-c++-common/gomp/delim-declare-variant-5.c| 26 
 .../c-c++-common/gomp/delim-declare-variant-6.c| 71 ++
 .../c-c++-common/gomp/delim-declare-variant-7.c| 27 
 .../libgomp.c-c++-common/delim-declare-variant-1.c | 45 ++
 8 files changed, 371 insertions(+)

diff --git a/gcc/testsuite/c-c++-common/gomp/delim-declare-variant-1.c 
b/gcc/testsuite/c-c++-common/gomp/delim-declare-variant-1.c
new file mode 100644
index ..28cac0d65503
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/gomp/delim-declare-variant-1.c
@@ -0,0 +1,55 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-fdump-tree-gimple" } */
+
+/* Check basic functionality for the delimited form of "declare variant"
+   - no error re duplicate definitions
+   - variants are registered and correctly resolved at call site.  */
+
+int foo (int a)
+{
+  return a;
+}
+
+int bar (int x)
+{
+  return x;
+}
+
+#pragma omp begin declare variant match (construct={target})
+int foo (int a)
+{
+  return a + 1;
+}
+
+int bar (int x)
+{
+  return x * 2;
+}
+#pragma omp end declare variant
+
+/* Because of the high score value, this variant for "bar" should always be
+   selected even when the one above also matches.  */
+#pragma omp begin declare variant match 
(implementation={vendor(score(1):"gnu")})
+int bar (int x)
+{
+  return x * 4;
+}
+#pragma omp end declare variant
+
+int main (void)
+{
+  if (foo (42) != 42) __builtin_abort ();
+  if (bar (3) != 12) __builtin_abort ();
+#pragma omp target
+  {
+if (foo (42) != 43) __builtin_abort ();
+if (bar (3) != 12) __builtin_abort ();
+  }
+}
+
+/* { dg-final { scan-tree-dump-times "omp declare variant base 
\\(foo.ompvariant." 1 "gimple" } } */
+/* { dg-final { scan-tree-dump-times "omp declare variant base 
\\(bar.ompvariant." 2 "gimple" } } */
+/* { dg-final { scan-tree-dump-times "foo \\(42\\)" 1 "gimple" } } */
+/* { dg-final { scan-tree-dump-times "foo\\.ompvariant. \\(42\\)" 1 "gimple" } 
} */
+/* { dg-final { scan-tree-dump-times "bar \\(3\\)" 0 "gimple" } } */
+/* { dg-final { scan-tree-dump-times "bar\\.ompvariant. \\(3\\)" 2 "gimple" } 
} */
diff --git a/gcc/testsuite/c-c++-common/gomp/delim-declare-variant-2.c 
b/gcc/testsuite/c-c++-common/gomp/delim-declare-variant-2.c
new file mode 100644
index ..03bfe2746268
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/gomp/delim-declare-variant-2.c
@@ -0,0 +1,66 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-foffload=disable -fdump-tree-original" } */
+
+/* Check for elision of preprocessed code in a begin/end declare variant
+   construct when it can be determined at parse time that the selector
+   can never match.  */
+
+int foobar (int x, int y)
+{
+  return x * y;
+}
+
+int baz (int x)
+{
+  return x;
+}
+
+#pragma omp begin declare variant match (implementation={vendor("acme")}) /* { 
dg-warning "unknown property" } */
+int foobar (int x, int y)
+{
+  random junk that would ordinarily cause a parse error;
+  return x + y;
+}
+#pragma omp end declare variant
+
+#pragma omp begin declare variant match (device={kind(fpga)})
+int foobar (int x, int y)
+{
+  random junk that would ordinarily cause a parse error;
+  return x + y;
+}
+#pragma omp end declare variant
+
+/* Per the OpenMP specification, elision only happens when the implementation
+   or device selectors cannot match; the user/condition selector doesn't
+   matter for this.  */
+#pragma omp begin declare variant match (user={condition (0)})
+int foobar (int x, int y)
+{
+  return x + y;
+}
+#pragma omp end declare variant
+
+/* Check that we're finding the right "omp end declare variant" when
+   constructs are nested.  */
+#pragma omp begin declare variant match (implementation={vendor("acme")})  /* 
{ dg-warning "unknown property" } */
+  #pragma omp begin declare variant match 

[gcc(refs/users/meissner/heads/work193-dmf)] Add ChangeLog.dmf and update REVISION.

2025-02-10 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:acb0f0a57632df8f8d14bd97964460c563ca4adc

commit acb0f0a57632df8f8d14bd97964460c563ca4adc
Author: Michael Meissner 
Date:   Mon Feb 10 13:12:38 2025 -0500

Add ChangeLog.dmf and update REVISION.

2025-02-10  Michael Meissner  

gcc/

* ChangeLog.dmf: New file for branch.
* REVISION: Update.

Diff:
---
 gcc/ChangeLog.dmf | 5 +
 gcc/REVISION  | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog.dmf b/gcc/ChangeLog.dmf
new file mode 100644
index ..76480163c36a
--- /dev/null
+++ b/gcc/ChangeLog.dmf
@@ -0,0 +1,5 @@
+ Branch work193-dmf, baseline 
+
+2025-02-10   Michael Meissner  
+
+   Clone branch
diff --git a/gcc/REVISION b/gcc/REVISION
index 721d4c312fe3..982dab320ac0 100644
--- a/gcc/REVISION
+++ b/gcc/REVISION
@@ -1 +1 @@
-work193 branch
+work193-dmf branch


[gcc] Created branch 'meissner/heads/work193-test' in namespace 'refs/users'

2025-02-10 Thread Michael Meissner via Gcc-cvs
The branch 'meissner/heads/work193-test' was created in namespace 'refs/users' 
pointing to:

 637f35ea9880... Add ChangeLog.meissner and REVISION.


[gcc] Created branch 'meissner/heads/work193-vpair' in namespace 'refs/users'

2025-02-10 Thread Michael Meissner via Gcc-cvs
The branch 'meissner/heads/work193-vpair' was created in namespace 'refs/users' 
pointing to:

 637f35ea9880... Add ChangeLog.meissner and REVISION.


[gcc] Created branch 'meissner/heads/work193-dmf' in namespace 'refs/users'

2025-02-10 Thread Michael Meissner via Gcc-cvs
The branch 'meissner/heads/work193-dmf' was created in namespace 'refs/users' 
pointing to:

 637f35ea9880... Add ChangeLog.meissner and REVISION.


[gcc(refs/users/meissner/heads/work193-libs)] Add ChangeLog.libs and update REVISION.

2025-02-10 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:db801a6c0d7ed5bace56b68a9ded5d7822eb7393

commit db801a6c0d7ed5bace56b68a9ded5d7822eb7393
Author: Michael Meissner 
Date:   Mon Feb 10 13:15:10 2025 -0500

Add ChangeLog.libs and update REVISION.

2025-02-10  Michael Meissner  

gcc/

* ChangeLog.libs: New file for branch.
* REVISION: Update.

Diff:
---
 gcc/ChangeLog.libs | 5 +
 gcc/REVISION   | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog.libs b/gcc/ChangeLog.libs
new file mode 100644
index ..c1a193f23a62
--- /dev/null
+++ b/gcc/ChangeLog.libs
@@ -0,0 +1,5 @@
+ Branch work193-libs, baseline 
+
+2025-02-10   Michael Meissner  
+
+   Clone branch
diff --git a/gcc/REVISION b/gcc/REVISION
index 721d4c312fe3..d82e18128d4f 100644
--- a/gcc/REVISION
+++ b/gcc/REVISION
@@ -1 +1 @@
-work193 branch
+work193-libs branch


[gcc] Created branch 'meissner/heads/work193-sha' in namespace 'refs/users'

2025-02-10 Thread Michael Meissner via Gcc-cvs
The branch 'meissner/heads/work193-sha' was created in namespace 'refs/users' 
pointing to:

 637f35ea9880... Add ChangeLog.meissner and REVISION.


[gcc] Created branch 'meissner/heads/work193-libs' in namespace 'refs/users'

2025-02-10 Thread Michael Meissner via Gcc-cvs
The branch 'meissner/heads/work193-libs' was created in namespace 'refs/users' 
pointing to:

 637f35ea9880... Add ChangeLog.meissner and REVISION.


[gcc/devel/omp/gcc-14] OpenMP: C++ front end support for "begin declare variant"

2025-02-10 Thread Sandra Loosemore via Gcc-cvs
https://gcc.gnu.org/g:42a526c07c35358800a1b6b0a558762569631cd2

commit 42a526c07c35358800a1b6b0a558762569631cd2
Author: Sandra Loosemore 
Date:   Sun Feb 9 21:32:35 2025 +

OpenMP: C++ front end support for "begin declare variant"

This patch implements C++ support for the "begin declare variant"
construct.  The OpenMP specification is hazy on interaction of this
feature with C++ language features.  Variant functions in classes are
supported but must be defined as members in the class definition,
using an unqualified name for the base function which also must be
present in that class.  Similarly variant functions in a namespace can
only be defined in that namespace using an unqualified name for a base
function already declared in that namespace.  Variants for template
functions or inside template classes seem to (mostly) work.

gcc/cp/ChangeLog
* cp-tree.h (struct cp_omp_declare_variant_attr): New.
(struct saved_scope): Add omp_declare_variant_attribute field.
* decl.cc (omp_declare_variant_finalize_one): Add logic to inject
"this" parameter for method calls.
* parser.cc (cp_parser_skip_to_pragma_omp_end_declare_variant): New.
(omp_start_variant_function): New.
(omp_finish_variant_function): New.
(cp_parser_init_declarator): Handle variant functions.
(cp_parser_class_specifier): Handle deferred lookup of base 
functions
when the entire class has been seen.
(cp_parser_member_declaration): Handle variant functions.
(cp_finish_omp_declare_variant): Merge context selectors if in
a "begin declare variant" block.
(cp_parser_omp_begin): Match "omp begin declare variant".  Adjust
error messages.
(cp_parser_omp_end): Match "omp end declare variant".
* parser.h (struct cp_parser): Add omp_unregistered_variants field.
* semantics.cc (finish_translation_unit): Detect unmatched
"omp begin declare variant".

gcc/testsuite/ChangeLog
* g++.dg/gomp/delim-declare-variant-1.C: New.
* g++.dg/gomp/delim-declare-variant-2.C: New.
* g++.dg/gomp/delim-declare-variant-3.C: New.
* g++.dg/gomp/delim-declare-variant-4.C: New.
* g++.dg/gomp/delim-declare-variant-5.C: New.
* g++.dg/gomp/delim-declare-variant-6.C: New.
* g++.dg/gomp/delim-declare-variant-7.C: New.
* g++.dg/gomp/delim-declare-variant-40.C: New.
* g++.dg/gomp/delim-declare-variant-41.C: New.
* g++.dg/gomp/delim-declare-variant-50.C: New.
* g++.dg/gomp/delim-declare-variant-51.C: New.
* g++.dg/gomp/delim-declare-variant-52.C: New.
* g++.dg/gomp/delim-declare-variant-70.C: New.
* g++.dg/gomp/delim-declare-variant-71.C: New.

libgomp/
* testsuite/libgomp.c++/delim-declare-variant-1.C: New.
* testsuite/libgomp.c++/delim-declare-variant-2.C: New.
* libgomp/testsuite/libgomp.c++/delim-declare-variant-7.C: New.

Co-Authored-By: Julian Brown 
Co-Authored-By: waffl3x 

Diff:
---
 gcc/cp/cp-tree.h   |   6 +
 gcc/cp/decl.cc |  15 +
 gcc/cp/parser.cc   | 544 +++--
 gcc/cp/parser.h|   5 +
 gcc/cp/semantics.cc|   7 +
 .../g++.dg/gomp/delim-declare-variant-1.C  |  39 ++
 .../g++.dg/gomp/delim-declare-variant-2.C  |  53 ++
 .../g++.dg/gomp/delim-declare-variant-3.C  |  37 ++
 .../g++.dg/gomp/delim-declare-variant-4.C  |  57 +++
 .../g++.dg/gomp/delim-declare-variant-40.C |  51 ++
 .../g++.dg/gomp/delim-declare-variant-41.C |  31 ++
 .../g++.dg/gomp/delim-declare-variant-5.C  |  53 ++
 .../g++.dg/gomp/delim-declare-variant-50.C |  99 
 .../g++.dg/gomp/delim-declare-variant-51.C | 181 +++
 .../g++.dg/gomp/delim-declare-variant-52.C |  24 +
 .../g++.dg/gomp/delim-declare-variant-6.C  |  72 +++
 .../g++.dg/gomp/delim-declare-variant-7.C  |  57 +++
 .../g++.dg/gomp/delim-declare-variant-70.C | 206 
 .../g++.dg/gomp/delim-declare-variant-71.C | 157 ++
 .../libgomp.c++/delim-declare-variant-1.C  |  29 ++
 .../libgomp.c++/delim-declare-variant-2.C  |  37 ++
 .../libgomp.c++/delim-declare-variant-7.C  |  39 ++
 22 files changed, 1766 insertions(+), 33 deletions(-)

diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h
index 64fc33e0479f..51130be4fe2d 100644
--- a/gcc/cp/cp-tree.h
+++ b/gcc/cp/cp-tree.h
@@ -1865,6 +1865,11 @@ struct GTY(()) cp_omp_begin_assumes_data {
   bool attr_syntax;
 };
 
+struct GTY(()) cp_omp_declare_variant_attr {
+  bool attr_syntax;
+ 

[gcc r14-11287] tree-assume: Fix UB in assume_query [PR118605]

2025-02-10 Thread Jakub Jelinek via Gcc-cvs
https://gcc.gnu.org/g:7a369b6a5589c7d9187701ee916f939307e00a86

commit r14-11287-g7a369b6a5589c7d9187701ee916f939307e00a86
Author: Jakub Jelinek 
Date:   Thu Jan 23 11:46:18 2025 +0100

tree-assume: Fix UB in assume_query [PR118605]

The assume_query constructor does
assume_query::assume_query (function *f, bitmap p) : m_parm_list (p),
 m_func (f)
where m_parm_list is bitmap &.  This is compile time UB, because
as soon as the constructor returns, m_parm_list reference is still
bound to the parameter of the constructor which is no longer in scope.

Now, one possible fix would be change the ctor argument to be bitmap &,
but that doesn't really work because in the only user of that class
we have
  auto_bitmap decls;
...
  assume_query query (fun, decls);
and auto_bitmap just has
  operator bitmap () { return &m_bits; }
Could be perhaps const bitmap &, but why?  bitmap is a pointer:
typedef class bitmap_head *bitmap;
and the EXECUTE_IF_SET_IN_BITMAP macros don't really change that point,
they just inspect what is inside of that bitmap_head the pointer points
to.

So, the simplest I think is avoid references (which cause even worse
code as it has to be dereferenced twice rather than once).

2025-01-23  Jakub Jelinek  

PR tree-optimization/118605
* tree-assume.cc (assume_query::m_parm_list): Change type
from bitmap & to bitmap.

(cherry picked from commit 27a05f8d11798889ecfb610db9bde781c3d218f7)

Diff:
---
 gcc/tree-assume.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/tree-assume.cc b/gcc/tree-assume.cc
index 10c1343492c0..58b800fbe6c1 100644
--- a/gcc/tree-assume.cc
+++ b/gcc/tree-assume.cc
@@ -98,7 +98,7 @@ protected:
   ssa_lazy_cache m_path;   // Values found on path
   ssa_lazy_cache m_parms;  // Cumulative parameter value calculated
   gimple_ranger *m_ranger;
-  bitmap &m_parm_list;// Parameter ssa-names list.
+  bitmap m_parm_list; // Parameter ssa-names list.
   function *m_func;
 };


[gcc/devel/omp/gcc-14] OpenMP: Bug fixes for comparing context selectors

2025-02-10 Thread Sandra Loosemore via Gcc-cvs
https://gcc.gnu.org/g:6369b4937b74ae1e0b351d6531ff25db95f9300c

commit 6369b4937b74ae1e0b351d6531ff25db95f9300c
Author: Sandra Loosemore 
Date:   Sun Feb 9 21:32:34 2025 +

OpenMP: Bug fixes for comparing context selectors

gcc/ChangeLog
* omp-general.cc (omp_context_selector_props_compare): Handle
arbitrary expressions in the "user" and "device_num" selectors.
(omp_context_selector_set_compare): Detect mismatch when one
selector specifies a score and the other doesn't.

Diff:
---
 gcc/omp-general.cc | 26 +++---
 1 file changed, 23 insertions(+), 3 deletions(-)

diff --git a/gcc/omp-general.cc b/gcc/omp-general.cc
index 4713aca1c657..7f9e3eb6bbb4 100644
--- a/gcc/omp-general.cc
+++ b/gcc/omp-general.cc
@@ -2186,8 +2186,26 @@ omp_context_selector_props_compare (enum omp_tss_code 
set,
  if (set == OMP_TRAIT_SET_USER
  && sel == OMP_TRAIT_USER_CONDITION)
{
- if (integer_zerop (OMP_TP_VALUE (p1))
- != integer_zerop (OMP_TP_VALUE (p2)))
+ /* Recognize constants that have equal truth values,
+otherwise assume all expressions are unique.  */
+ tree v1 = OMP_TP_VALUE (p1);
+ tree v2 = OMP_TP_VALUE (p2);
+ if (TREE_CODE (v1) != INTEGER_CST
+ || TREE_CODE (v2) != INTEGER_CST
+ || integer_zerop (v1) != integer_zerop (v2))
+   return 2;
+ break;
+   }
+ if (set == OMP_TRAIT_SET_TARGET_DEVICE
+ && sel == OMP_TRAIT_DEVICE_NUM)
+   {
+ /* Recognize constants that have equal values,
+otherwise assume all expressions are unique.  */
+ tree v1 = OMP_TP_VALUE (p1);
+ tree v2 = OMP_TP_VALUE (p2);
+ if (TREE_CODE (v1) != INTEGER_CST
+ || TREE_CODE (v2) != INTEGER_CST
+ || tree_int_cst_compare (v1, v2) != 0)
return 2;
  break;
}
@@ -2305,7 +2323,9 @@ omp_context_selector_set_compare (enum omp_tss_code set, 
tree ctx1, tree ctx2)
  {
tree score1 = OMP_TS_SCORE (ts1);
tree score2 = OMP_TS_SCORE (ts2);
-   if (score1 && score2 && !simple_cst_equal (score1, score2))
+   if ((score1 && score2 && !simple_cst_equal (score1, score2))
+   || (score1 && !score2)
+   || (!score1 && score2))
  return 2;
 
int r = omp_context_selector_props_compare (set, OMP_TS_CODE (ts1),


[gcc r14-11285] match.pd: Fix (FTYPE) N CMP (FTYPE) M optimization for GENERIC [PR118522]

2025-02-10 Thread Jakub Jelinek via Gcc-cvs
https://gcc.gnu.org/g:2349c6a442df76acd15b6efcd70f77ecd718f46a

commit r14-11285-g2349c6a442df76acd15b6efcd70f77ecd718f46a
Author: Jakub Jelinek 
Date:   Fri Jan 17 11:30:07 2025 +0100

match.pd: Fix (FTYPE) N CMP (FTYPE) M optimization for GENERIC [PR118522]

The last case of this optimization assumes that if 2 integral types
have same precision and TYPE_UNSIGNED, then they are uselessly convertible.
While that is very likely the case for GIMPLE, it is not the case for
GENERIC, so the following patch adds there a convert so that the
optimization produces also valid GENERIC.  Without it we got
(int) p == b where b had _BitInt(32) type, so incompatible types.

2025-01-17  Jakub Jelinek  

PR tree-optimization/118522
* match.pd ((FTYPE) N CMP (FTYPE) M): Add convert, as in GENERIC
integral types with the same precision and sign might actually not
be compatible types.

* gcc.dg/bitint-120.c: New test.

(cherry picked from commit 3ab9eb6946f7b832834b3d808c5617935e0be727)

Diff:
---
 gcc/match.pd  |  2 +-
 gcc/testsuite/gcc.dg/bitint-120.c | 11 +++
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/gcc/match.pd b/gcc/match.pd
index f16fdd2d7760..8465af07df8d 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -6548,7 +6548,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
  (icmp (convert:type2 @1) @2)
  (if (TYPE_PRECISION (type1) == TYPE_PRECISION (type2)
   && type1_signed_p == type2_signed_p)
- (icmp @1 @2))
+ (icmp @1 (convert @2)))
 
 /* Optimize various special cases of (FTYPE) N CMP CST.  */
 (for cmp  (lt le eq ne ge gt)
diff --git a/gcc/testsuite/gcc.dg/bitint-120.c 
b/gcc/testsuite/gcc.dg/bitint-120.c
new file mode 100644
index ..51098881e140
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/bitint-120.c
@@ -0,0 +1,11 @@
+/* PR tree-optimization/118522 */
+/* { dg-do compile { target bitint } } */
+/* { dg-options "-O2" } */
+
+_BitInt(32) b;
+
+int
+foo (unsigned short p)
+{
+  return p == (double) b;
+}


[gcc(refs/users/meissner/heads/work193-orig)] Add REVISION.

2025-02-10 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:26c055ca3dea53770bf5b4e5e8ea162958aa1352

commit 26c055ca3dea53770bf5b4e5e8ea162958aa1352
Author: Michael Meissner 
Date:   Mon Feb 10 13:18:38 2025 -0500

Add REVISION.

2025-02-10  Michael Meissner  

gcc/

* REVISION: New file for branch.

Diff:
---
 gcc/REVISION | 1 +
 1 file changed, 1 insertion(+)

diff --git a/gcc/REVISION b/gcc/REVISION
new file mode 100644
index ..ec8270152c39
--- /dev/null
+++ b/gcc/REVISION
@@ -0,0 +1 @@
+work193-orig branch


[gcc r14-11286] builtins: Store unspecified value to *exp for inf/nan [PR114877]

2025-02-10 Thread Jakub Jelinek via Gcc-cvs
https://gcc.gnu.org/g:2c9ebb837b7154d51a11c73880096e5d7e4566a9

commit r14-11286-g2c9ebb837b7154d51a11c73880096e5d7e4566a9
Author: Jakub Jelinek 
Date:   Thu Jan 23 11:11:23 2025 +0100

builtins: Store unspecified value to *exp for inf/nan [PR114877]

The fold_builtin_frexp folding for NaN/Inf just returned the first argument
with evaluating second arguments side-effects, rather than storing something
to what the second argument points to.

The PR argues that the C standard requires the function to store something
there but what exactly is stored is unspecified, so not storing there
anything can result in UB if the value isn't initialized and is read later.

glibc and newlib store there 0, musl apparently doesn't store anything.

The following patch stores there zero (or would you prefer storing there
some other value, 42, INT_MAX, INT_MIN, etc.?; zero is cheapest to form
in assembly though) and adjusts the test so that it
doesn't rely on not storing there anything but instead checks for
-Wmaybe-uninitialized warning to find out that something has been stored
there.
Unfortunately I had to disable the NaN tests for -O0, while we can fold
__builtin_isnan (__builtin_nan ("")) at compile time, we can't fold
__builtin_isnan ((i = 0, __builtin_nan (""))) at compile time.
fold_builtin_classify uses just tree_expr_nan_p and if that isn't true
(because expr is a COMPOUND_EXPR with tree_expr_nan_p on the second arg),
it does
  arg = builtin_save_expr (arg);
  return fold_build2_loc (loc, UNORDERED_EXPR, type, arg, arg);
and that isn't folded at -O0 further, as we wrap it into SAVE_EXPR and
nothing propagates the NAN to the comparison.
I think perhaps tree_expr_nan_p etc. could have case COMPOUND_EXPR:
added and recurse on the second argument, but that feels like stage1
material to me if we want to do that at all.

2025-01-23  Jakub Jelinek  

PR middle-end/114877
* builtins.cc (fold_builtin_frexp): Handle rvc_nan and rvc_inf cases
like rvc_zero, return passed in arg and set *exp = 0.

* gcc.dg/torture/builtin-frexp-1.c: Add -Wmaybe-uninitialized as
dg-additional-options.
(bar): New function.
(TESTIT_FREXP2): Rework the macro so that it doesn't test whether
nothing has been stored to what the second argument points to, but
instead that something has been stored there, whatever it is.
(main): Temporarily don't enable the nan tests for -O0.

(cherry picked from commit d19b0682f18f9f5217aee8002e3d04f8ded04ae8)

Diff:
---
 gcc/builtins.cc| 10 
 gcc/testsuite/gcc.dg/torture/builtin-frexp-1.c | 33 +++---
 2 files changed, 31 insertions(+), 12 deletions(-)

diff --git a/gcc/builtins.cc b/gcc/builtins.cc
index 0788f58f6371..4856f81797a4 100644
--- a/gcc/builtins.cc
+++ b/gcc/builtins.cc
@@ -9441,14 +9441,16 @@ fold_builtin_frexp (location_t loc, tree arg0, tree 
arg1, tree rettype)
   switch (value->cl)
   {
   case rvc_zero:
+  case rvc_nan:
+  case rvc_inf:
/* For +-0, return (*exp = 0, +-0).  */
+   /* For +-NaN or +-Inf, *exp is unspecified, but something should
+  be stored there so that it isn't read from uninitialized object.
+  As glibc and newlib store *exp = 0 for +-Inf/NaN, storing
+  0 here as well is easiest.  */
exp = integer_zero_node;
frac = arg0;
break;
-  case rvc_nan:
-  case rvc_inf:
-   /* For +-NaN or +-Inf, *exp is unspecified, return arg0.  */
-   return omit_one_operand_loc (loc, rettype, arg0, arg1);
   case rvc_normal:
{
  /* Since the frexp function always expects base 2, and in
diff --git a/gcc/testsuite/gcc.dg/torture/builtin-frexp-1.c 
b/gcc/testsuite/gcc.dg/torture/builtin-frexp-1.c
index 2d1c1847b267..328b803e9a1e 100644
--- a/gcc/testsuite/gcc.dg/torture/builtin-frexp-1.c
+++ b/gcc/testsuite/gcc.dg/torture/builtin-frexp-1.c
@@ -11,6 +11,7 @@
floating point formats need -funsafe-math-optimizations.  */
 /* { dg-require-effective-target inf } */
 /* { dg-options "-funsafe-math-optimizations" { target powerpc*-*-* } } */
+/* { dg-additional-options "-Wmaybe-uninitialized" } */
 
 extern void link_error(int);
 
@@ -52,22 +53,36 @@ extern void link_error(int);
 link_error(__LINE__); \
   } while (0)
 
+int __attribute__ ((__noipa__))
+bar (int x)
+{
+  (void) x;
+  return 42;
+} 
+
 /* Test that FUNCRES(frexp(NEG FUNCARG(ARGARG),&i)) is false.  Check
-   the sign as well.  Ensure side-effects are evaluated in i.  */
+   the sign as well.  Ensure side-effects are evaluated in the second
+   frexp argument.  */
 #define TESTIT_FREXP2(NEG,FUNCARG,ARGARG,FUNCRES) do { \
-  int i=5; \
+  int i, j = 5; \
   if (!__builtin_##FUNCRES##f(__builtin_frexpf

[gcc/devel/omp/gcc-14] OpenMP: Support functions for nested "begin declare variant"

2025-02-10 Thread Sandra Loosemore via Gcc-cvs
https://gcc.gnu.org/g:76914b6cd6fa19265beec99ba20fa9073b5d20b7

commit 76914b6cd6fa19265beec99ba20fa9073b5d20b7
Author: Sandra Loosemore 
Date:   Sun Feb 9 21:32:35 2025 +

OpenMP: Support functions for nested "begin declare variant"

This patch adds functions for variant name mangling and context selector
merging that are shared by the C and C++ front ends.

The OpenMP specification says that name mangling is supposed to encode
the context selector for the variant, but also provides for no way to
reference these functions directly by name or from a different
compilation unit.  It also gives no guidance on how dynamic selectors
might be encoded across compilation units.

The GCC implementation of this feature instead treats variant
functions as if they have no linkage and uses a simple counter to
generate names.

gcc/ChangeLog
* omp-general.cc (omp_mangle_variant_name): New.
(omp_check_for_duplicate_variant): New.
(omp_copy_trait_set): New.
(omp_trait_selectors_equivalent): New.
(omp_combine_trait_sets): New.
(omp_merge_context_selectors): New.
* omp-general.h (omp_mangle_variant_name): Declare.
(omp_check_for_duplicate_variant): Declare.
(omp_merge_context_selectors): Declare.

Diff:
---
 gcc/omp-general.cc | 195 +
 gcc/omp-general.h  |   5 ++
 2 files changed, 200 insertions(+)

diff --git a/gcc/omp-general.cc b/gcc/omp-general.cc
index 21b3143c0c61..249916ac7e32 100644
--- a/gcc/omp-general.cc
+++ b/gcc/omp-general.cc
@@ -1483,6 +1483,66 @@ omp_check_context_selector (location_t loc, tree ctx,
   return ctx;
 }
 
+/* Produce a mangled version of BASE_ID for the name of the variant
+   function with context selector CTX.  SEP is a separator string.
+   The return value is an IDENTIFIER_NODE.
+
+   Per the OpenMP spec, "the symbol names of two definitions of a function are
+   considered to be equal if and only if their effective context selectors are
+   equivalent".  However, if we did have two such definitions, we'd get an ODR
+   violation.  We already take steps in the front ends to make variant
+   functions internal to the compilation unit, since there is no (portable) way
+   to reference them directly by name or declare them as extern in another
+   compilation unit.  So, we can diagnose the would-be ODR violations by
+   checking that there is not already a variant for the same function with an
+   equivalent context selector, and otherwise just use a simple counter to name
+   the variant functions instead of any complicated scheme to encode the
+   context selector in the name.  */
+
+tree
+omp_mangle_variant_name (tree base_id, tree ctx ATTRIBUTE_UNUSED,
+const char *sep)
+{
+  const char *base_name = IDENTIFIER_POINTER (base_id);
+
+  /* Now do the actual mangling.  */
+  static int variant_counter;
+  /* The numeric suffix and terminating byte ought to need way less than
+ 32 bytes extra, that's just a magic number.  */
+  size_t buflen = (strlen (base_name) + strlen (sep) + strlen ("ompvariant")
+  + 32);
+  char *buffer = (char *) alloca (buflen);
+  snprintf (buffer, buflen, "%s%sompvariant%d", base_name, sep,
+   ++variant_counter);
+  return get_identifier (buffer);
+}
+
+/* Forward declaration.  */
+static int omp_context_selector_compare (tree ctx1, tree ctx2);
+
+/* Diagnose an error if there is already a variant with CTX registered
+   for BASE_DECL.  Returns true if OK, false otherwise.  */
+bool
+omp_check_for_duplicate_variant (location_t loc, tree base_decl, tree ctx)
+{
+  for (tree attr = DECL_ATTRIBUTES (base_decl); attr; attr = TREE_CHAIN (attr))
+{
+  attr = lookup_attribute ("omp declare variant base", attr);
+  if (attr == NULL_TREE)
+   break;
+
+  tree selector = TREE_VALUE (TREE_VALUE (attr));
+  if (omp_context_selector_compare (ctx, selector) == 0)
+   {
+ error_at (loc,
+   "Multiple definitions of variants with the same "
+   "context selector violate the one-definition rule");
+ return false;
+   }
+}
+  return true;
+}
+
 /* Forward declarations.  */
 static int omp_context_selector_set_compare (enum omp_tss_code, tree, tree);
 static int omp_construct_simd_compare (tree, tree, bool);
@@ -4568,3 +4628,138 @@ omp_maybe_apply_loop_xforms (tree *expr_p, tree 
for_clauses)
   break;
 }
 }
+
+/* The next group of functions support merging of context selectors for
+   nested "begin declare variant" directives.  The spec says:
+
+ ...the effective context selectors of the outer directive are
+ appended to the context selector of the inner directive to form the
+ effective context selector of the inner directive.  If a
+ trait-set-selector is present on both directives, the trait-selector

[gcc(refs/users/meissner/heads/work193-math)] Add ChangeLog.math and update REVISION.

2025-02-10 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:14cb9df9b30f664d8360df453b081ef5f622312f

commit 14cb9df9b30f664d8360df453b081ef5f622312f
Author: Michael Meissner 
Date:   Mon Feb 10 13:17:41 2025 -0500

Add ChangeLog.math and update REVISION.

2025-02-10  Michael Meissner  

gcc/

* ChangeLog.math: New file for branch.
* REVISION: Update.

Diff:
---
 gcc/ChangeLog.math | 5 +
 gcc/REVISION   | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog.math b/gcc/ChangeLog.math
new file mode 100644
index ..c08127e850ef
--- /dev/null
+++ b/gcc/ChangeLog.math
@@ -0,0 +1,5 @@
+ Branch work193-math, baseline 
+
+2025-02-10   Michael Meissner  
+
+   Clone branch
diff --git a/gcc/REVISION b/gcc/REVISION
index 721d4c312fe3..2035f7979ea2 100644
--- a/gcc/REVISION
+++ b/gcc/REVISION
@@ -1 +1 @@
-work193 branch
+work193-math branch


[gcc r14-11288] c++: Only destruct elts of array for new expression if exception is thrown during the initialization

2025-02-10 Thread Jakub Jelinek via Gcc-cvs
https://gcc.gnu.org/g:15e66f7c2409ddee058afc61a3760d0d055c2f83

commit r14-11288-g15e66f7c2409ddee058afc61a3760d0d055c2f83
Author: Jakub Jelinek 
Date:   Sat Jan 25 10:15:24 2025 +0100

c++: Only destruct elts of array for new expression if exception is thrown 
during the initialization [PR117827]

The following testcase r12-6328, because the elements of the array
are destructed twice, once when the callee encounters delete[] p;
and then second time when the exception is thrown.
The array elts should be only destructed if exception is thrown from
one of the constructors during the build_vec_init emitted code in case of
new expressions, but when the new expression completes, it is IMO
responsibility of user code to delete[] it when it is no longer needed.

So, the following patch uses the cleanup_flags argument to build_vec_init
to get notified of the flags that need to be changed when the expression
is complete and build_disable_temp_cleanup to do the changes.

2025-01-25  Jakub Jelinek  

PR c++/117827
* init.cc (build_new_1): Pass address of a make_tree_vector ()
initialized gc tree vector to build_vec_init and append
build_disable_temp_cleanup to init_expr from it.

* g++.dg/init/array66.C: New test.

(cherry picked from commit ce268ca2a923f8f35cc9dd5a7d0468a3980f129f)

Diff:
---
 gcc/cp/init.cc  | 17 -
 gcc/testsuite/g++.dg/init/array66.C | 33 +
 2 files changed, 49 insertions(+), 1 deletion(-)

diff --git a/gcc/cp/init.cc b/gcc/cp/init.cc
index f363e5960fe3..22d93793a4d8 100644
--- a/gcc/cp/init.cc
+++ b/gcc/cp/init.cc
@@ -3693,6 +3693,11 @@ build_new_1 (vec **placement, tree type, 
tree nelts,
 error ("parenthesized initializer in array new");
  return error_mark_node;
 }
+
+ /* Collect flags for disabling subobject cleanups once the complete
+object is fully constructed.  */
+ vec *flags = make_tree_vector ();
+
  init_expr
= build_vec_init (data_addr,
  cp_build_binary_op (input_location,
@@ -3702,7 +3707,17 @@ build_new_1 (vec **placement, tree type, 
tree nelts,
  vecinit,
  explicit_value_init_p,
  /*from_array=*/0,
-  complain);
+ complain,
+ &flags);
+
+ for (tree f : flags)
+   {
+ tree cl = build_disable_temp_cleanup (f);
+ cl = convert_to_void (cl, ICV_STATEMENT, complain);
+ init_expr = build2 (COMPOUND_EXPR, void_type_node,
+ init_expr, cl);
+   }
+ release_tree_vector (flags);
}
   else
{
diff --git a/gcc/testsuite/g++.dg/init/array66.C 
b/gcc/testsuite/g++.dg/init/array66.C
new file mode 100644
index ..ca38df815dea
--- /dev/null
+++ b/gcc/testsuite/g++.dg/init/array66.C
@@ -0,0 +1,33 @@
+// PR c++/117827
+// { dg-do run { target c++11 } }
+
+struct C {
+  int c;
+  static int d, e;
+  C () : c (0) { ++d; }
+  C (const C &) = delete;
+  C &operator= (const C &) = delete;
+  ~C () { ++e; }
+};
+int C::d, C::e;
+
+C *
+foo (C *p)
+{
+  delete[] p;
+  throw 1;
+}
+
+int
+main ()
+{
+  try
+{
+  foo (new C[1] {});
+}
+  catch (...)
+{
+}
+  if (C::d != C::e)
+__builtin_abort ();
+}


[gcc r14-11284] vec.h: Properly destruct elements in auto_vec auto storage [PR118400]

2025-02-10 Thread Jakub Jelinek via Gcc-cvs
https://gcc.gnu.org/g:0faffd526d5dbed4960ef0aadc4ba96f9bf377de

commit r14-11284-g0faffd526d5dbed4960ef0aadc4ba96f9bf377de
Author: Jakub Jelinek 
Date:   Thu Jan 16 09:17:50 2025 +0100

vec.h: Properly destruct elements in auto_vec auto storage [PR118400]

For T with non-trivial destructors, we were destructing objects in the
vector on release only when not using auto storage of auto_vec.

The following patch calls truncate (0) instead of m_vecpfx.m_num clearing,
and truncate takes care of that destruction:
  unsigned l = length ();
  gcc_checking_assert (l >= size);
  if (!std::is_trivially_destructible ::value)
vec_destruct (address () + size, l - size);
  m_vecpfx.m_num = size;

2025-01-16  Jakub Jelinek  

PR ipa/118400
* vec.h (vec::release): Call m_vec->truncate (0)
instead of clearing m_vec->m_vecpfx.m_num.

(cherry picked from commit 43f4d44bebd63b354f8798fcef512d4d2b42c655)

Diff:
---
 gcc/vec.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/vec.h b/gcc/vec.h
index bc83827f644e..84540d4802e0 100644
--- a/gcc/vec.h
+++ b/gcc/vec.h
@@ -2020,7 +2020,7 @@ vec::release (void)
 
   if (using_auto_storage ())
 {
-  m_vec->m_vecpfx.m_num = 0;
+  m_vec->truncate (0);
   return;
 }


[gcc] Created branch 'meissner/heads/work193-orig' in namespace 'refs/users'

2025-02-10 Thread Michael Meissner via Gcc-cvs
The branch 'meissner/heads/work193-orig' was created in namespace 'refs/users' 
pointing to:

 4ce8ad684b90... [gcn] mkoffload.cc: Print fatal error if -march has no mult


[gcc r14-11294] loop-iv, riscv: Fix get_biv_step_1 for RISC-V [PR117506]

2025-02-10 Thread Jakub Jelinek via Gcc-cvs
https://gcc.gnu.org/g:ab9518d0814a3b094a0ca7356b4a68e3a65f5011

commit r14-11294-gab9518d0814a3b094a0ca7356b4a68e3a65f5011
Author: Jakub Jelinek 
Date:   Thu Feb 6 15:39:18 2025 +0100

loop-iv, riscv: Fix get_biv_step_1 for RISC-V [PR117506]

The following test ICEs on RISC-V at least latently since
r14-1622-g99bfdb072e67fa3fe294d86b4b2a9f686f8d9705 which added
RISC-V specific case to get_biv_step_1 to recognize also
({zero,sign}_extend:DI (plus:SI op0 op1))

The reason for the ICE is that op1 in this case is CONST_POLY_INT
which unlike the really expected VOIDmode CONST_INTs has its own
mode and still satisfies CONSTANT_P.
GET_MODE (rhs) (SImode) is different from outer_mode (DImode), so
the function later does
*inner_step = simplify_gen_binary (code, outer_mode,
   *inner_step, op1);
but that obviously ICEs because while *inner_step is either VOIDmode
or DImode, op1 has SImode.

The following patch fixes it by extending op1 using code so that
simplify_gen_binary can handle it.  Another option would be
to change the !CONSTANT_P (op1) 3 lines above this to
!CONST_INT_P (op1), I think it isn't very likely that we get something
useful from other constants there.

2025-02-06  Jakub Jelinek  

PR rtl-optimization/117506
* loop-iv.cc (get_biv_step_1): For {ZERO,SIGN}_EXTEND
of PLUS apply {ZERO,SIGN}_EXTEND to op1.

* gcc.dg/pr117506.c: New test.
* gcc.target/riscv/pr117506.c: New test.

(cherry picked from commit bb9cee8928f7f4dfb94e7a8f232eda736b711450)

Diff:
---
 gcc/loop-iv.cc|  1 +
 gcc/testsuite/gcc.dg/pr117506.c   | 18 ++
 gcc/testsuite/gcc.target/riscv/pr117506.c |  5 +
 3 files changed, 24 insertions(+)

diff --git a/gcc/loop-iv.cc b/gcc/loop-iv.cc
index f56cc5e1db67..433a75cdb18b 100644
--- a/gcc/loop-iv.cc
+++ b/gcc/loop-iv.cc
@@ -714,6 +714,7 @@ get_biv_step_1 (df_ref def, scalar_int_mode outer_mode, rtx 
reg,
  if (!simple_reg_p (op0) || !CONSTANT_P (op1))
return false;
 
+ op1 = simplify_gen_unary (code, outer_mode, op1, GET_MODE (rhs));
  prev_code = code;
  code = PLUS;
}
diff --git a/gcc/testsuite/gcc.dg/pr117506.c b/gcc/testsuite/gcc.dg/pr117506.c
new file mode 100644
index ..4f25324645b8
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr117506.c
@@ -0,0 +1,18 @@
+/* PR rtl-optimization/117506 */
+/* { dg-do compile } */
+/* { dg-options "-O3 -funroll-loops" } */
+
+char a;
+int b;
+unsigned c;
+short d;
+
+void
+foo ()
+{
+  for (short f = 0; f < c; f += 3)
+{
+  a ^= d;
+  b = b < 0 ? b : 0;
+}
+}
diff --git a/gcc/testsuite/gcc.target/riscv/pr117506.c 
b/gcc/testsuite/gcc.target/riscv/pr117506.c
new file mode 100644
index ..ac4b9e35d635
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/pr117506.c
@@ -0,0 +1,5 @@
+/* PR rtl-optimization/117506 */
+/* { dg-do compile } */
+/* { dg-options "-march=rv64im_zve64f -mabi=lp64 -O3 -funroll-loops" } */
+
+#include "../../gcc.dg/pr117506.c"


[gcc(refs/users/meissner/heads/work193-vpair)] Add ChangeLog.vpair and update REVISION.

2025-02-10 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:21667fdfb9f9e52d03856feb20a8cf403d196f3c

commit 21667fdfb9f9e52d03856feb20a8cf403d196f3c
Author: Michael Meissner 
Date:   Mon Feb 10 13:13:28 2025 -0500

Add ChangeLog.vpair and update REVISION.

2025-02-10  Michael Meissner  

gcc/

* ChangeLog.vpair: New file for branch.
* REVISION: Update.

Diff:
---
 gcc/ChangeLog.vpair | 5 +
 gcc/REVISION| 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog.vpair b/gcc/ChangeLog.vpair
new file mode 100644
index ..4cad69c75ae7
--- /dev/null
+++ b/gcc/ChangeLog.vpair
@@ -0,0 +1,5 @@
+ Branch work193-vpair, baseline 
+
+2025-02-10   Michael Meissner  
+
+   Clone branch
diff --git a/gcc/REVISION b/gcc/REVISION
index 721d4c312fe3..ab4953dfc953 100644
--- a/gcc/REVISION
+++ b/gcc/REVISION
@@ -1 +1 @@
-work193 branch
+work193-vpair branch


[gcc] Created branch 'meissner/heads/work193-math' in namespace 'refs/users'

2025-02-10 Thread Michael Meissner via Gcc-cvs
The branch 'meissner/heads/work193-math' was created in namespace 'refs/users' 
pointing to:

 637f35ea9880... Add ChangeLog.meissner and REVISION.


[gcc r14-11291] d: give dependency files better filenames [PR118477]

2025-02-10 Thread Jakub Jelinek via Gcc-cvs
https://gcc.gnu.org/g:acd0e21e964b266a7fa59461af7aa6918698a1bd

commit r14-11291-gacd0e21e964b266a7fa59461af7aa6918698a1bd
Author: Arsen Arsenović 
Date:   Wed Jan 29 21:14:33 2025 +0100

d: give dependency files better filenames [PR118477]

Currently, the dependency files for root-file.o and common-file.o were
both d/.deps/file.Po, which would cause parallel builds to fail
sometimes with:

  make[3]: Leaving directory 
'/var/tmp/portage/sys-devel/gcc-14.1.1_p20240511/work/build/gcc'
  make[3]: Entering directory 
'/var/tmp/portage/sys-devel/gcc-14.1.1_p20240511/work/build/gcc'
  mv: cannot stat 'd/.deps/file.TPo': No such file or directory
  make[3]: *** 
[/var/tmp/portage/sys-devel/gcc-14.1.1_p20240511/work/gcc-14-20240511/gcc/d/Make-lang.in:421:
 d/root-file.o] Error 1 shuffle=131581365

Also, this means that dependencies of one of root-file or common-file
are missing when developing.  After this patch, those two files get
assigned dependency files d/.deps/root-file.Po and
d/.deps/common-file.Po respectively, so match the actual object
files in the d/ subdirectory.

There are other files with similar conflicts (mangle-package.o,
visitor-package.o for instance).

2025-01-29  Arsen Arsenović  
Jakub Jelinek  

PR d/118477
* Make-lang.in (DCOMPILE, DPOSTCOMPILE): Use $(basename $(@F))
instead of $(*F).

Co-Authored-By: Jakub Jelinek 
(cherry picked from commit d9ac0ad1e9a4ceec2d354ac0368da7462bea5675)

Diff:
---
 gcc/d/Make-lang.in | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/d/Make-lang.in b/gcc/d/Make-lang.in
index eaea6e039cf7..30af91301923 100644
--- a/gcc/d/Make-lang.in
+++ b/gcc/d/Make-lang.in
@@ -65,8 +65,8 @@ ALL_DFLAGS = $(DFLAGS-$@) $(GDCFLAGS) -fversion=IN_GCC 
$(CHECKING_DFLAGS) \
$(WARN_DFLAGS)
 
 DCOMPILE.base = $(GDC) -c $(ALL_DFLAGS) -o $@
-DCOMPILE = $(DCOMPILE.base) -MT $@ -MMD -MP -MF $(@D)/$(DEPDIR)/$(*F).TPo
-DPOSTCOMPILE = @mv $(@D)/$(DEPDIR)/$(*F).TPo $(@D)/$(DEPDIR)/$(*F).Po
+DCOMPILE = $(DCOMPILE.base) -MT $@ -MMD -MP -MF $(@D)/$(DEPDIR)/$(basename 
$(@F)).TPo
+DPOSTCOMPILE = @mv $(@D)/$(DEPDIR)/$(basename $(@F)).TPo 
$(@D)/$(DEPDIR)/$(basename $(@F)).Po
 DLINKER = $(GDC) $(NO_PIE_FLAG) -lstdc++
 
 # Like LINKER, but use a mutex for serializing front end links.


[gcc r14-11289] combine: Fix up make_extraction [PR118638]

2025-02-10 Thread Jakub Jelinek via Gcc-cvs
https://gcc.gnu.org/g:1dac899a10581dbcd55144456cca92d061190762

commit r14-11289-g1dac899a10581dbcd55144456cca92d061190762
Author: Jakub Jelinek 
Date:   Tue Jan 28 10:14:05 2025 +0100

combine: Fix up make_extraction [PR118638]

The following testcase is miscompiled at -Os on x86_64-linux.
The problem is during make_compound_operation of
(ashiftrt:SI (ashift:SI (mult:SI (reg:SI 107 [ a_5 ])
(const_int 3 [0x3]))
(const_int 31 [0x1f]))
(const_int 31 [0x1f]))
where it incorrectly returns
(mult:SI (sign_extract:SI (reg:SI 107 [ a_5 ])
(const_int 2 [0x2])
(const_int 0 [0]))
(const_int 3 [0x3]))
which isn't obviously true, the former returns either 0 or -1 depending
on the least significant bit of the multiplication,
the latter returns either 0 or -3 depending on the second least significant
bit of the multiplication argument.

The bug has been introduced in PR96998 r11-4563, which added handling of x
* (2^N) similar to x << N.  In the above case, pos is 0 and len is 1,
sign extracting a single least significant bit of the multiplication.
As 3 is not a power of 2, shift_amt is -1.
But IN_RANGE (-1, 1, 1 - 1) is still true, because the basic requirement of
IN_RANGE that LOWER is not greater than UPPER is violated.
The intention of using 1 as LOWER is to avoid matching multiplication by 1,
that really shouldn't appear in the IL.  But to avoid violating IN_RANGE
requirement, we need to verify that len is at least 2.

I've added this len > 1 check to the inner if rather than outer because I
think for GCC 16 we should add a further optimization.
In the particular case of 1 least significant bit sign extraction from
multiplication by 3, we could actually say it is equivalent to
(sign_extract:SI (reg:SI 107 [ a_5 ])
(const_int 1 [0x2])
(const_int 0 [0]))
That is because 3 is an odd number and multiplication by 2 will yield the
least significant bit 0 (we are sign extracting just one) and so the
multiplication doesn't change anything on the outcome.
More generally, even for larger len, multiplication by C which is
(1 << X) + 1 where X is >= len should be optimizable just to extraction
of the multiplicand's least significant len bits.

2025-01-28  Jakub Jelinek  

PR rtl-optimization/118638
* combine.cc (make_extraction): Only optimize (mult x 2^n) if len is
larger than 1.

* gcc.c-torture/execute/pr118638.c: New test.

(cherry picked from commit b529a417249335724d1f74bcf3167f6f9a623823)

Diff:
---
 gcc/combine.cc |  2 +-
 gcc/testsuite/gcc.c-torture/execute/pr118638.c | 20 
 2 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/gcc/combine.cc b/gcc/combine.cc
index 60afe043578d..431a30429016 100644
--- a/gcc/combine.cc
+++ b/gcc/combine.cc
@@ -7588,7 +7588,7 @@ make_extraction (machine_mode mode, rtx inner, 
HOST_WIDE_INT pos,
 least significant (LEN - C) bits of X, giving an rtx
 whose mode is MODE, then multiply it by 2^C.  */
   const HOST_WIDE_INT shift_amt = exact_log2 (INTVAL (XEXP (inner, 1)));
-  if (IN_RANGE (shift_amt, 1, len - 1))
+  if (len > 1 && IN_RANGE (shift_amt, 1, len - 1))
{
  new_rtx = make_extraction (mode, XEXP (inner, 0),
 0, 0, len - shift_amt,
diff --git a/gcc/testsuite/gcc.c-torture/execute/pr118638.c 
b/gcc/testsuite/gcc.c-torture/execute/pr118638.c
new file mode 100644
index ..5c0dbca5c09a
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/pr118638.c
@@ -0,0 +1,20 @@
+/* PR rtl-optimization/118638 */
+
+__attribute__((noipa)) int
+foo (int x)
+{
+  int a = x != -3, b, c;
+  a *= 3;
+  b = 2 * x - 9;
+  a = a + b;
+  a = ~a;
+  c = a & 1;
+  return -c;
+}
+
+int
+main ()
+{
+  if (foo (0) != -1)
+__builtin_abort ();
+}


[gcc r14-11292] niter: Make build_cltz_expr more robust [PR118689]

2025-02-10 Thread Jakub Jelinek via Gcc-cvs
https://gcc.gnu.org/g:3f475f4ffa499858c143eaccdc0abafafc18f2ca

commit r14-11292-g3f475f4ffa499858c143eaccdc0abafafc18f2ca
Author: Jakub Jelinek 
Date:   Fri Jan 31 11:02:41 2025 +0100

niter: Make build_cltz_expr more robust [PR118689]

Since my r15-7223 the niter analysis can recognize one loop during bootstrap
as being ctz like.
The patch just turned
@@ -2173,7 +2173,7 @@ PROC m2pim_NumberIO_BinToStr (CARDINAL x
   _T535_44 = &buf[i.40_2]{lb: 1 sz: 4};
   _T536_45 = x_21 & 1;
   *_T535_44 = _T536_45;
-  _T537_47 = x_21 / 2;
+  _T537_47 = x_21 >> 1;
   x_48 = _T537_47;
   # DEBUG x => x_48
   if (x_48 != 0)
which is not a big deal for the number_of_iterations_cltz optimization, it
recognizes both right shift by 1 and unsigned division by 2 (and similarly
for clz left shift by 1 or multiplication by 2).
But starting with forwprop1 that change also resulted in
@@ -1875,9 +1875,9 @@ PROC m2pim_NumberIO_BinToStr (CARDINAL x
   i.40_2 = (INTEGER) _T530_34;
   _T536_45 = x_21 & 1;
   MEM  [(CARDINAL *)&buf][i.40_2]{lb: 1 sz: 4} = _T536_45;
-  _T537_47 = x_21 / 2;
+  _T537_47 = x_21 >> 1;
   # DEBUG x => _T537_47
-  if (x_21 > 1)
+  if (_T537_47 != 0)
 goto ; [INV]
   else
 goto ; [INV]
and apparently it is only the latter form that number_of_iterations_cltz
pattern matches, not the former (after all, that was the exact reason
for r15-7223).
The problem is that build_cltz_expr assumes if IFN_C[LT]Z can't be used it
can use the __builtin_c[lt]z{,l,ll} builtins, and while most of the FEs do
create them, modula 2 does not.

The following patch just lets us punt if the FE doesn't build those 
builtins.
I've filed a PR against modula2 so that they add the builtins too.

2025-01-31  Jakub Jelinek  

PR tree-optimization/118689
PR modula2/115032
* tree-ssa-loop-niter.cc (build_cltz_expr): Return NULL_TREE if fn 
is
NULL and use_ifn is false.

(cherry picked from commit 85e1714b0606579a339c234510063e057fe662af)

Diff:
---
 gcc/tree-ssa-loop-niter.cc | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/gcc/tree-ssa-loop-niter.cc b/gcc/tree-ssa-loop-niter.cc
index b71eb67fbc7d..ba4b056e96e7 100644
--- a/gcc/tree-ssa-loop-niter.cc
+++ b/gcc/tree-ssa-loop-niter.cc
@@ -2237,6 +2237,8 @@ build_cltz_expr (tree src, bool leading, bool 
define_at_zero)
  build_int_cst (integer_type_node, prec));
}
 }
+  else if (fn == NULL_TREE)
+return NULL_TREE;
   else if (prec == 2 * lli_prec)
 {
   tree src1 = fold_convert (long_long_unsigned_type_node,


[gcc r14-11293] icf: Compare call argument types in certain cases and asm operands [PR117432]

2025-02-10 Thread Jakub Jelinek via Gcc-cvs
https://gcc.gnu.org/g:916daed4ecb6ed8d329b66e97b9468c8f6e549bd

commit r14-11293-g916daed4ecb6ed8d329b66e97b9468c8f6e549bd
Author: Jakub Jelinek 
Date:   Sat Feb 1 00:50:24 2025 +0100

icf: Compare call argument types in certain cases and asm operands 
[PR117432]

compare_operand uses operand_equal_p under the hood, which e.g. for
INTEGER_CSTs will just match the values rather regardless of their types.
Now, in many comparing the type is redundant, if we have
  x_2 = y_3 + 1;
we've already compared the type for the lhs and also for rhs1, there won't
be any surprises on rhs2.
As noted in the PR, there are cases where the type of the operand is the
sole place of information and we don't want to ICF merge functions if the
types differ.
One case is stdarg functions, arguments passed to ..., it is different
if we pass 1, 1L, 1LL.
Another case are the K&R unprototyped functions (sure, gone in C23).
And yet another case are inline asm operands, "r" (1) is different from "r"
(1L) from "r" (1LL).

So, the following patch determines based on lack of fntype (e.g. for
internal functions), or on !prototype_p, or on stdarg_p (in that case
using number of named arguments) which arguments need to have type checked
and does that, plus compares types on inline asm operands (maybe it would be
enough to do that just for input operands but we have just a routine to
handle both and I didn't feel we need to differentiate).

Furthermore, I've noticed fntype{1,2} isn't actually compared if it is a
direct call (gimple_call_fndecl is non-NULL).  That is wrong too, we could
have
  void (*fn) (int, long long) = (void (*) (int, long long)) foo;
  fn (1, 1LL);
in one case and
  void (*fn) (long long, int) = (void (*) (long long, int)) foo;
  fn (1LL, 1);
in another, both folded into a direct call of foo with different
gimple_call_fntype.  Sure, one of them would be UB at runtime (or both), but
what if we ICF merge it into something that into the one UB at runtime
and the program actually calls the correct one only?

2025-02-01  Jakub Jelinek  

PR ipa/117432
* ipa-icf-gimple.cc (func_checker::compare_asm_inputs_outputs):
Also return_false if operands have incompatible types.
(func_checker::compare_gimple_call): Check fntype1 vs. fntype2
compatibility for all non-internal calls and assume fntype1 and
fntype2 are non-NULL for those.  For calls to non-prototyped
calls or for stdarg_p functions after the last named argument (if 
any)
check type compatibility of call arguments.

* gcc.c-torture/execute/pr117432.c: New test.
* gcc.target/i386/pr117432.c: New test.

(cherry picked from commit ebd111a2896816e4f5ddf5108f361b3d9d287fa0)

Diff:
---
 gcc/ipa-icf-gimple.cc  | 53 ---
 gcc/testsuite/gcc.c-torture/execute/pr117432.c | 72 ++
 gcc/testsuite/gcc.target/i386/pr117432.c   | 17 ++
 3 files changed, 124 insertions(+), 18 deletions(-)

diff --git a/gcc/ipa-icf-gimple.cc b/gcc/ipa-icf-gimple.cc
index 4c3174b68b67..5b31f56a913d 100644
--- a/gcc/ipa-icf-gimple.cc
+++ b/gcc/ipa-icf-gimple.cc
@@ -459,7 +459,9 @@ func_checker::compare_asm_inputs_outputs (tree t1, tree t2,
return false;
 
   if (!compare_operand (TREE_VALUE (t1), TREE_VALUE (t2),
-   get_operand_access_type (map, t1)))
+   get_operand_access_type (map, t1))
+ || !types_compatible_p (TREE_TYPE (TREE_VALUE (t1)),
+ TREE_TYPE (TREE_VALUE (t2
return return_false ();
 
   tree p1 = TREE_PURPOSE (t1);
@@ -709,26 +711,37 @@ func_checker::compare_gimple_call (gcall *s1, gcall *s2)
   || gimple_call_alloca_for_var_p (s1) != gimple_call_alloca_for_var_p 
(s2))
 return false;
 
-  if (gimple_call_internal_p (s1)
-  && gimple_call_internal_fn (s1) != gimple_call_internal_fn (s2))
-return false;
-
-  tree fntype1 = gimple_call_fntype (s1);
-  tree fntype2 = gimple_call_fntype (s2);
-
-  /* For direct calls we verify that types are compatible so if we matched
- callees, callers must match, too.  For indirect calls however verify
- function type.  */
-  if (!gimple_call_fndecl (s1))
+  unsigned check_arg_types_from = 0;
+  if (gimple_call_internal_p (s1))
 {
-  if ((fntype1 && !fntype2)
- || (!fntype1 && fntype2)
- || (fntype1 && !types_compatible_p (fntype1, fntype2)))
-   return return_false_with_msg ("call function types are not compatible");
+  if (gimple_call_internal_fn (s1) != gimple_call_internal_fn (s2))
+   return false;
 }
+  else
+{
+  tree fntype1 = gimple_call_fntype (s1);
+  tree fntype2 = gimple_call_fntype (s2);
+  if (!types_compatible

[gcc(refs/users/meissner/heads/work193)] Add support for -mcpu=future

2025-02-10 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:fe5752a1b218c4ba34c79d6364f426803df0d793

commit fe5752a1b218c4ba34c79d6364f426803df0d793
Author: Michael Meissner 
Date:   Mon Feb 10 13:25:37 2025 -0500

Add support for -mcpu=future

This patch adds the support that can be used in developing GCC support for
future PowerPC processors.

2025-02-10  Michael Meissner  

* config.gcc (powerpc*-*-*): Add support for --with-cpu=future.
* config/rs6000/aix71.h (ASM_CPU_SPEC): Add support for 
-mcpu=future.
* config/rs6000/aix72.h (ASM_CPU_SPEC): Likewise.
* config/rs6000/aix73.h (ASM_CPU_SPEC): Likewise.
* config/rs6000/driver-rs6000.cc (asm_names): Likewise.
* config/rs6000/rs6000-c.cc (rs6000_target_modify_macros): If
-mcpu=future, define _ARCH_FUTURE.
* config/rs6000/rs6000-cpus.def (FUTURE_MASKS_SERVER): New macro.
(POWERPC_MASKS): Add OPTION_MASK_FUTURE.
(future cpu): Define.
* config/rs6000/rs6000-opts.h (enum processor_type): Add
PROCESSOR_FUTURE.
* config/rs6000/rs6000-tables.opt: Regenerate.
* config/rs6000/rs6000.cc (power10_cost): Update comment.
(get_arch_flags): Add support for future processor.
(rs6000_option_override_internal): Likewise.
(rs6000_machine_from_flags): Likewise.
(rs6000_reassociation_width): Likewise.
(rs6000_adjust_cost): Likewise.
(rs6000_issue_rate): Likewise.
(rs6000_sched_reorder): Likewise.
(rs6000_sched_reorder2): Likewise.
(rs6000_register_move_cost): Likewise.
(rs6000_opt_masks): Add -mfuture.
* config/rs6000/rs6000.h (ASM_CPU_SPEC): Likewise.
* config/rs6000/rs6000.md (cpu attribute): Likewise.
* config/rs6000/rs6000.opt (-mfuture): New internal option.

Diff:
---
 gcc/config.gcc  |  4 ++--
 gcc/config/rs6000/aix71.h   |  1 +
 gcc/config/rs6000/aix72.h   |  1 +
 gcc/config/rs6000/aix73.h   |  1 +
 gcc/config/rs6000/driver-rs6000.cc  |  2 ++
 gcc/config/rs6000/rs6000-c.cc   |  2 ++
 gcc/config/rs6000/rs6000-cpus.def   |  5 +
 gcc/config/rs6000/rs6000-opts.h |  1 +
 gcc/config/rs6000/rs6000-tables.opt | 11 +++
 gcc/config/rs6000/rs6000.cc | 30 ++
 gcc/config/rs6000/rs6000.h  |  1 +
 gcc/config/rs6000/rs6000.md |  2 +-
 gcc/config/rs6000/rs6000.opt|  6 ++
 13 files changed, 52 insertions(+), 15 deletions(-)

diff --git a/gcc/config.gcc b/gcc/config.gcc
index 9e167f7f00d5..46d08c16b316 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -536,7 +536,7 @@ powerpc*-*-*)
extra_headers="${extra_headers} ppu_intrinsics.h spu2vmx.h vec_types.h 
si2vmx.h"
extra_headers="${extra_headers} amo.h"
case x$with_cpu in
-   
xpowerpc64|xdefault64|x6[23]0|x970|xG5|xpower[3456789]|xpower1[01]|xpower6x|xrs64a|xcell|xa2|xe500mc64|xe5500|xe6500)
+   
xpowerpc64|xdefault64|x6[23]0|x970|xG5|xpower[3456789]|xpower1[01]|xpower6x|xrs64a|xcell|xa2|xe500mc64|xe5500|xe6500|xfuture)
cpu_is_64bit=yes
;;
esac
@@ -5683,7 +5683,7 @@ case "${target}" in
tm_defines="${tm_defines} CONFIG_PPC405CR"
eval "with_$which=405"
;;
-   "" | common | native \
+   "" | common | native | future \
| power[3456789] | power1[01] | power5+ | power6x \
| powerpc | powerpc64 | powerpc64le \
| rs64 \
diff --git a/gcc/config/rs6000/aix71.h b/gcc/config/rs6000/aix71.h
index 2b21dd7cd1e0..77651f5ea309 100644
--- a/gcc/config/rs6000/aix71.h
+++ b/gcc/config/rs6000/aix71.h
@@ -79,6 +79,7 @@ do {  
\
 #undef ASM_CPU_SPEC
 #define ASM_CPU_SPEC \
 "%{mcpu=native: %(asm_cpu_native); \
+  mcpu=future: -mfuture; \
   mcpu=power11: -mpwr11; \
   mcpu=power10: -mpwr10; \
   mcpu=power9: -mpwr9; \
diff --git a/gcc/config/rs6000/aix72.h b/gcc/config/rs6000/aix72.h
index 53c0bde5ad4a..652f60c7f494 100644
--- a/gcc/config/rs6000/aix72.h
+++ b/gcc/config/rs6000/aix72.h
@@ -79,6 +79,7 @@ do {  
\
 #undef ASM_CPU_SPEC
 #define ASM_CPU_SPEC \
 "%{mcpu=native: %(asm_cpu_native); \
+  mcpu=future: -mfuture; \
   mcpu=power11: -mpwr11; \
   mcpu=power10: -mpwr10; \
   mcpu=power9: -mpwr9; \
diff --git a/gcc/config/rs6000/aix73.h b/gcc/config/rs6000/aix73.h
index c7639368a264..3c66ac1d9171 100644
--- a/gcc/config/rs6000/aix73.h
+++ b/gcc/config/rs6000/aix73.h
@@ -79,6 +79,7 @@ do {  
\
 #undef ASM_CPU_SPEC
 #define ASM_CPU_SPEC \
 "%{mcpu=nati

[gcc(refs/users/meissner/heads/work193)] Add -mcpu=future tests.

2025-02-10 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:41d43fa3c9887ccbb2129f90fb919cb5fe0e2f8c

commit 41d43fa3c9887ccbb2129f90fb919cb5fe0e2f8c
Author: Michael Meissner 
Date:   Mon Feb 10 13:26:59 2025 -0500

Add -mcpu=future tests.

This patch adds simple tests for -mcpu=future.

2025-02-10  Michael Meissner  

gcc/testsuite/

* gcc.target/powerpc/future-1.c: New test.
* gcc.target/powerpc/future-2.c: Likewise.

Diff:
---
 gcc/testsuite/gcc.target/powerpc/future-1.c | 13 +
 gcc/testsuite/gcc.target/powerpc/future-2.c | 24 
 2 files changed, 37 insertions(+)

diff --git a/gcc/testsuite/gcc.target/powerpc/future-1.c 
b/gcc/testsuite/gcc.target/powerpc/future-1.c
new file mode 100644
index ..f1b940d7bebf
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/future-1.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-mdejagnu-cpu=future -O2" } */
+
+/* Basic check to see if the compiler supports -mcpu=future and if it defines
+   _ARCH_PWR11.  */
+
+#ifndef _ARCH_FUTURE
+#error "-mcpu=future is not supported"
+#endif
+
+void foo (void)
+{
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/future-2.c 
b/gcc/testsuite/gcc.target/powerpc/future-2.c
new file mode 100644
index ..5552cefa3c2e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/future-2.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+/* Check if we can set the future target via a target attribute.  */
+
+__attribute__((__target__("cpu=power9")))
+void foo_p9 (void)
+{
+}
+
+__attribute__((__target__("cpu=power10")))
+void foo_p10 (void)
+{
+}
+
+__attribute__((__target__("cpu=power11")))
+void foo_p11 (void)
+{
+}
+
+__attribute__((__target__("cpu=future")))
+void foo_future (void)
+{
+}


[gcc(refs/users/meissner/heads/work193)] Change TARGET_MODULO to TARGET_POWER9.

2025-02-10 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:786fa7e8b666bc78c6d0e966927469fa6ef3cfd6

commit 786fa7e8b666bc78c6d0e966927469fa6ef3cfd6
Author: Michael Meissner 
Date:   Mon Feb 10 13:24:08 2025 -0500

Change TARGET_MODULO to TARGET_POWER9.

This patch changes TARGET_MODULO to TARGET_POWER9.  The -mmodulo switch is 
not
being changed, just the name of the macros used to determine if the PowerPC
processor supports ISA 3.0 (Power9).

2025-02-10  Michael Meissner  

gcc/

* gcc/config/rs6000/rs6000-builtin.cc (rs6000_builtin_is_supported):
Change TARGET_MODULO to TARGET_POWER9.
* gcc/config/rs6000/rs6000.cc (rs6000_option_override_internal):
Likewise.
* gcc/config/rs6000/rs6000.h (TARGET_CTZ): Likewise.
(TARGET_EXTSWSLI): Likewise.
(TARGET_MADDLD): Likewise.
(TARGET_POWER9): New macro.
* gcc/config/rs6000/rs6000.md (enabled attribute): Change 
TARGET_MODULO
to TARGET_POWER9.
(mod3): Likewise.
(umod3): Likewise.
(divide/modulo peephole2): Likewise.

Diff:
---
 gcc/config/rs6000/rs6000-builtin.cc |  4 ++--
 gcc/config/rs6000/rs6000.cc |  4 ++--
 gcc/config/rs6000/rs6000.h  |  7 ---
 gcc/config/rs6000/rs6000.md | 14 +++---
 4 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-builtin.cc 
b/gcc/config/rs6000/rs6000-builtin.cc
index 2366b2aee00a..d8ff7cf32dfd 100644
--- a/gcc/config/rs6000/rs6000-builtin.cc
+++ b/gcc/config/rs6000/rs6000-builtin.cc
@@ -169,9 +169,9 @@ rs6000_builtin_is_supported (enum rs6000_gen_builtins 
fncode)
 case ENB_P8V:
   return TARGET_P8_VECTOR;
 case ENB_P9:
-  return TARGET_MODULO;
+  return TARGET_POWER9;
 case ENB_P9_64:
-  return TARGET_MODULO && TARGET_POWERPC64;
+  return TARGET_POWER9 && TARGET_POWERPC64;
 case ENB_P9V:
   return TARGET_P9_VECTOR;
 case ENB_P10:
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 1bba77244c25..06d1bac5aa83 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -3888,7 +3888,7 @@ rs6000_option_override_internal (bool global_init_p)
 
   /* For the newer switches (vsx, dfp, etc.) set some of the older options,
  unless the user explicitly used the -mno- to disable the code.  */
-  if (TARGET_P9_VECTOR || TARGET_MODULO || TARGET_P9_MISC)
+  if (TARGET_P9_VECTOR || TARGET_POWER9 || TARGET_P9_MISC)
 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
   else if (TARGET_P9_MINMAX)
 {
@@ -22358,7 +22358,7 @@ rs6000_rtx_costs (rtx x, machine_mode mode, int 
outer_code,
*total = rs6000_cost->divsi;
}
   /* Add in shift and subtract for MOD unless we have a mod instruction. */
-  if ((!TARGET_MODULO
+  if ((!TARGET_POWER9
   || (RS6000_DISABLE_SCALAR_MODULO && SCALAR_INT_MODE_P (mode)))
 && (code == MOD || code == UMOD))
*total += COSTS_N_INSNS (2);
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index 856d268d2d27..caf8cddf905e 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -463,9 +463,9 @@ extern int rs6000_vector_align[];
 #define TARGET_FCTIWUZ TARGET_POWER7
 /* Only powerpc64 and powerpc476 support fctid.  */
 #define TARGET_FCTID   (TARGET_POWERPC64 || rs6000_cpu == PROCESSOR_PPC476)
-#define TARGET_CTZ TARGET_MODULO
-#define TARGET_EXTSWSLI(TARGET_MODULO && TARGET_POWERPC64)
-#define TARGET_MADDLD  TARGET_MODULO
+#define TARGET_CTZ TARGET_POWER9
+#define TARGET_EXTSWSLI(TARGET_POWER9 && TARGET_POWERPC64)
+#define TARGET_MADDLD  TARGET_POWER9
 
 /* TARGET_DIRECT_MOVE is redundant to TARGET_P8_VECTOR, so alias it to that.  
*/
 #define TARGET_DIRECT_MOVE TARGET_P8_VECTOR
@@ -504,6 +504,7 @@ extern int rs6000_vector_align[];
 #define TARGET_POWER5X TARGET_FPRND
 #define TARGET_POWER6  TARGET_CMPB
 #define TARGET_POWER7  TARGET_POPCNTD
+#define TARGET_POWER9  TARGET_MODULO
 
 /* In switching from using target_flags to using rs6000_isa_flags, the options
machinery creates OPTION_MASK_ instead of MASK_.  The MASK_
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 87ec37a9f8e4..db1b6c2d1164 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -403,7 +403,7 @@
  (const_int 1)
 
  (and (eq_attr "isa" "p9")
- (match_test "TARGET_MODULO"))
+ (match_test "TARGET_POWER9"))
  (const_int 1)
 
  (and (eq_attr "isa" "p9v")
@@ -3457,7 +3457,7 @@
   || INTVAL (operands[2]) <= 0
   || (i = exact_log2 (INTVAL (operands[2]))) < 0)
 {
-  if (!TARGET_MODULO)
+  if (!TARGET_POWER9)
FAIL;
 
   operands[2] = force_reg (mode, operands[2]);
@@ -3491,7 +3491,7 @@
   [(set (match_operand:GPR 0 "gpc_reg_operand" "=&r,r")
 (mod:GPR (match_oper

[gcc r14-11297] c++: Fix up name independent decl in structured binding handling in range for [PR115586]

2025-02-10 Thread Jakub Jelinek via Gcc-cvs
https://gcc.gnu.org/g:08bfa32ae92e36a791ebdefd063b4f2616cd6f80

commit r14-11297-g08bfa32ae92e36a791ebdefd063b4f2616cd6f80
Author: Jakub Jelinek 
Date:   Fri Feb 7 17:08:39 2025 +0100

c++: Fix up name independent decl in structured binding handling in range 
for [PR115586]

cp_parser_range_for temporarily reverts IDENTIFIER_BINDING changes
to hide the decls from the structured bindings from lookup during
parsing of the expression after :
If there are 2 or more name independent decls, we undo IDENTIFIER_BINDING
for the same name multiple times, even when just one has been added
(with a TREE_LIST inside of it as decl).

The following patch fixes it by handling the _ name at most once, the
later loop will DTRT then and just reinstall the temporarily hidden
binding with the TREE_LIST in there.

2025-02-07  Jakub Jelinek  

PR c++/115586
* parser.cc (cp_parser_range_for): For name independent decls in
structured bindings, only push the name/binding once per
structured binding.

* g++.dg/cpp26/name-independent-decl9.C: New test.
* g++.dg/cpp26/name-independent-decl10.C: New test.

(cherry picked from commit ca7c6d1212b8589deed18386427c67851af2b9ad)

Diff:
---
 gcc/cp/parser.cc   | 10 
 .../g++.dg/cpp26/name-independent-decl10.C | 63 ++
 .../g++.dg/cpp26/name-independent-decl9.C  | 49 +
 3 files changed, 122 insertions(+)

diff --git a/gcc/cp/parser.cc b/gcc/cp/parser.cc
index 5bfbdf85f152..aeb36d1e4831 100644
--- a/gcc/cp/parser.cc
+++ b/gcc/cp/parser.cc
@@ -14130,9 +14130,19 @@ cp_parser_range_for (cp_parser *parser, tree scope, 
tree init, tree range_decl,
  decomp = &decomp_d;
  decomp->count = tree_to_uhwi (TREE_OPERAND (v, 1)) + 1;
  decomp->decl = d;
+ bool seen_name_independent_decl = false;
  for (unsigned int i = 0; i < decomp->count;
   i++, d = DECL_CHAIN (d))
{
+ if (name_independent_decl_p (d))
+   {
+ /* If there is more than one _ decl in
+the structured binding, just push and move it
+away once.  */
+ if (seen_name_independent_decl)
+   continue;
+ seen_name_independent_decl = true;
+   }
  tree name = DECL_NAME (d);
  names.safe_push (name);
  bindings.safe_push (IDENTIFIER_BINDING (name));
diff --git a/gcc/testsuite/g++.dg/cpp26/name-independent-decl10.C 
b/gcc/testsuite/g++.dg/cpp26/name-independent-decl10.C
new file mode 100644
index ..792c2bd0c6b3
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp26/name-independent-decl10.C
@@ -0,0 +1,63 @@
+// PR c++/115586
+// { dg-do compile { target c++11 } }
+// { dg-options "" }
+
+struct S { int a, b, c; };
+
+void
+foo ()
+{
+  S s[4] = {};
+  for (auto [_, _, a] : s) // { dg-warning "name-independent declarations 
only available with" "" { target c++23_down } }
+++_;   // { dg-warning "structured bindings only 
available with" "" { target c++14_down } .-1 }
+   // { dg-error "reference to '_' is ambiguous" 
"" { target *-*-* } .-1 }
+  for (auto _ : s)
+++_.b;
+  for (auto [a, _, b] : s) // { dg-warning "structured bindings only 
available with" "" { target c++14_down } }
+++_;
+  for (auto [_, _, a] : s) // { dg-warning "name-independent declarations 
only available with" "" { target c++23_down } }
+{  // { dg-warning "structured bindings only 
available with" "" { target c++14_down } .-1 }
+  ++_; // { dg-error "reference to '_' is ambiguous" }
+}
+  for (auto _ : s)
+{
+  ++_.b;
+  int _ = 2;   // { dg-warning "name-independent declarations 
only available with" "" { target c++23_down } }
+  ++_; // { dg-error "reference to '_' is ambiguous" }
+}
+  for (auto [a, _, b] : s) // { dg-warning "structured bindings only 
available with" "" { target c++14_down } }
+{
+  ++_;
+  int _ = ++b; // { dg-warning "name-independent declarations 
only available with" "" { target c++23_down } }
+  ++_; // { dg-error "reference to '_' is ambiguous" }
+}
+}
+
+void
+bar ()
+{
+  S s[4] = {};
+  auto [_, c, _] = s[0];   // { dg-warning "name-independent declarations 
only available with" "" { target c++23_down } }
+  ++_; // { dg-warning "structured bindings only 
available with" "" { target c++14_down } .-1 }
+   // { dg-error "reference to '_' is ambiguous" 
"" { target *-*-* } .-1 }
+  for (auto [a, _, _] : s) // { dg-warning 

[gcc/redhat/heads/gcc-14-branch] (101 commits) Merge commit 'r14-11298-g7cb022442444833796b384684afef14d0b

2025-02-10 Thread Jakub Jelinek via Gcc-cvs
The branch 'redhat/heads/gcc-14-branch' was updated to point to:

 c45fb1d26a52... Merge commit 'r14-11298-g7cb022442444833796b384684afef14d0b

It previously pointed to:

 e525669e462d... Merge commit 'r14-11198-ga2de88e5d49f7084677ef2728cd99db0a9

Diff:

Summary of changes (added commits):
---

  c45fb1d... Merge commit 'r14-11298-g7cb022442444833796b384684afef14d0b
  7cb0224... i386: Fix ICE with conditional QI/HI vector maxmin [PR11877 (*)
  08bfa32... c++: Fix up name independent decl in structured binding han (*)
  6cd1daf... c++: Don't use CLEANUP_EH_ONLY for new expression cleanup [ (*)
  6f5ada5... c++: Allow constexpr reads from volatile std::nullptr_t obj (*)
  ab9518d... loop-iv, riscv: Fix get_biv_step_1 for RISC-V [PR117506] (*)
  916daed... icf: Compare call argument types in certain cases and asm o (*)
  3f475f4... niter: Make build_cltz_expr more robust [PR118689] (*)
  acd0e21... d: give dependency files better filenames [PR118477] (*)
  b7553f7... c++: Return false from __is_bounded_array for zero-sized ar (*)
  1dac899... combine: Fix up make_extraction [PR118638] (*)
  15e66f7... c++: Only destruct elts of array for new expression if exce (*)
  7a369b6... tree-assume: Fix UB in assume_query [PR118605] (*)
  2c9ebb8... builtins: Store unspecified value to *exp for inf/nan [PR11 (*)
  2349c6a... match.pd: Fix (FTYPE) N CMP (FTYPE) M optimization for GENE (*)
  0faffd5... vec.h: Properly destruct elements in auto_vec auto storage  (*)
  282dedf... Daily bump. (*)
  7968492... libgcc: On FreeBSD use GCC's crt objects for static linking (*)
  f93541f... Daily bump. (*)
  4098d67... Daily bump. (*)
  1cd744a... Fortran:  FIx ICE in associate with elemental function [PR1 (*)
  4d4c5ec... Fortran: Fix error recovery for bad component arrayspecs [P (*)
  373e2db... Daily bump. (*)
  ca652ae... Fortran: host association issue with symbol in COMMON block (*)
  9a09fc9... LoongArch: Fix ICE caused by illegal calls to builtin funct (*)
  8bca14a... Daily bump. (*)
  6a4df91... RTEMS: Add Cortex-M33 multilib (*)
  6f08060... Daily bump. (*)
  65ecfba... Fortran: F2008 passing of internal procs to a proc pointer  (*)
  b0bb0d9... Daily bump. (*)
  1e77549... options: Adjust cl_optimization_compare to avoid checking I (*)
  c2b2e9c... Daily bump. (*)
  f89f3e5... Daily bump. (*)
  8c79b66... Ada: Fix segfault on uninitialized variable as operand of p (*)
  7bc54a8... Daily bump. (*)
  63ea47f... Fortran: fix bogus diagnostics on renamed interface import  (*)
  58ad709... Daily bump. (*)
  a0550ff... Daily bump. (*)
  b53d19a... Fortran: fix passing of component ref to assumed-rank dummy (*)
  50c111e... testsuite/118127: Pass fortran tests on ppc64le for IEEE128 (*)
  f0420cc... libstdc++: Fix views::transform(move_only_fn{}) forwarding  (*)
  8774d50... c++: re-enable NSDMI CONSTRUCTOR folding [PR118355] (*)
  a481616... Daily bump. (*)
  c061ad5... c++: friend vs inherited guide confusion [PR117855] (*)
  f8daec2... AArch64: don't override march to assembler with mcpu if mar (*)
  7c6fde4... AArch64: have -mcpu=native detect architecture extensions f (*)
  946c17e... asan: Fix missing FakeStack flag cleanup (*)
  fb0e3f9... Daily bump. (*)
  d121d1e... libstdc++: perfectly forward std::ranges::clamp arguments (*)
  0af8fc2... c++: explicit spec of constrained member tmpl [PR107522] (*)
  18f447a... Daily bump. (*)
  1eafda3... Daily bump. (*)
  18a0994... Daily bump. (*)
  b69eb2c... Fortran: Fix UTF-8 output with A edit descriptor. (*)
  d4df61d... Fortran: do not copy back for parameter actual arguments [P (*)
  504fbaf... c++: ICE with nested anonymous union [PR117153] (*)
  1836a65... testsuite: arm: Use -Os -fno-math-errno in vfp-1.c [PR11644 (*)
  a1acb1c... rs6000: Fix ICE for invalid constants in built-in functions (*)
  68df376... rs6000: Fix loop limit for built-in constant checking (*)
  1a80a04... Daily bump. (*)
  11abd61... hppa: Fix typo in ADDITIONAL_REGISTER_NAMES in pa32-regs.h (*)
  80fddad... Daily bump. (*)
  3228df2... rtl: Remove invalid compare simplification [PR117186] (*)
  51761b3... aarch64: Detect word-level modification in early-ra [PR1181 (*)
  0d9479e... Daily bump. (*)
  0e4f03c... c++: Wrap force_target_expr in get_member_function_from_ptr (*)
  a82352a... c++/modules: Propagate FNDECL_USED_AUTO when propagating de (*)
  3f9d6af... Daily bump. (*)
  696b87d... Update gcc zh_CN.po (*)
  ffa44df... d: Fix failing test with 32-bit compiler [PR114434] (*)
  7780f2c... Daily bump. (*)
  9a1daeb... i386: Reorder *movdi_internal ISA attribute by ascending al (*)
  94338cd... i386: Disable SImode/DImode moves from/to mask regs without (*)
  b1f9fb0... c++: Friend classes don't shadow enclosing template class p (*)
  78a8316... Daily bump. (*)
  511920a... Daily bump. (*)
  4abeaeb... d: Fix ICE in expand_d_format when diagnosing empty enum [P (*)
  1ba0d18... AVR: Use INT_N to built-in define __int24. (*)
  f400d06... c++: Allow pragmas in NSD

[gcc(refs/users/meissner/heads/work193)] Do not allow -mvsx to boost processor to power7.

2025-02-10 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:ca3f83ed9190f4068d34ebb686f0f59b9c7061e6

commit ca3f83ed9190f4068d34ebb686f0f59b9c7061e6
Author: Michael Meissner 
Date:   Mon Feb 10 13:29:19 2025 -0500

Do not allow -mvsx to boost processor to power7.

This patch restructures the code so that -mvsx for example will not silently
convert the processor to power7.  The user must now use -mcpu=power7 or 
higher.
This means if the user does -mvsx and the default processor does not have 
VSX
support, it will be an error.

I have built both big endian and little endian bootstrap compilers and there
were no regressions.

I updated the 2 tests that used -mvsx to raise the cpu to power7, and the 
test
case that checks if -mno-vsx produces the expected warning.

Note, Peter had some questions about one of the tests in the previous 
version of
the patch.  The test is still the same in this patch.  But the code for
preventing -mvsx is different from the previous patch, and I wanted to get 
that
patch for review before stage1 closes.

Can I install this patch on the GCC 15 trunk?

2025-02-10  Michael Meissner  

gcc/

* config/rs6000/rs6000.cc (rs6000_option_override_internal): Check 
if
the user asked for VSX instructions whether the cpu was at least 
power7.

gcc/testsuite/

* gcc.target/powerpc/ppc-target-4.c: Rewrite the test to add 
cpu=power7
when we need to add VSX support.  Add test for adding cpu=power7 
no-vsx
to generate only Altivec instructions.
* gcc.target/powerpc/pr115688.c: Add cpu=power7 in target 
__attribute__
when requesting VSX instructions.
* gcc.target/powerpc/pr87496-1.c: Update options to use
-mdejagnu-cpu=power6 to get the appropriate error message.

Diff:
---
 gcc/config/rs6000/rs6000.cc |  7 +
 gcc/testsuite/gcc.target/powerpc/ppc-target-4.c | 38 +++--
 gcc/testsuite/gcc.target/powerpc/pr115688.c |  3 +-
 gcc/testsuite/gcc.target/powerpc/pr87496-1.c|  2 +-
 4 files changed, 39 insertions(+), 11 deletions(-)

diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 3c0c6e1a74d0..e2f0b8b7de57 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -3862,6 +3862,13 @@ rs6000_option_override_internal (bool global_init_p)
  rs6000_isa_flags &= ~OPTION_MASK_VSX;
  rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
}
+  else if (!TARGET_POWER7)
+   {
+ if (explicit_vsx_p)
+   error ("%<-mvsx%> requires at least %<-mcpu=power%>");
+ rs6000_isa_flags &= ~OPTION_MASK_VSX;
+ rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
+   }
 }
 
   /* If hard-float/altivec/vsx were explicitly turned off then don't allow
diff --git a/gcc/testsuite/gcc.target/powerpc/ppc-target-4.c 
b/gcc/testsuite/gcc.target/powerpc/ppc-target-4.c
index feef76db4618..5e2ecf34f249 100644
--- a/gcc/testsuite/gcc.target/powerpc/ppc-target-4.c
+++ b/gcc/testsuite/gcc.target/powerpc/ppc-target-4.c
@@ -2,7 +2,7 @@
 /* { dg-skip-if "" { powerpc*-*-darwin* } } */
 /* { dg-require-effective-target powerpc_fprs } */
 /* { dg-options "-O2 -ffast-math -mdejagnu-cpu=power5 -mno-altivec 
-mabi=altivec -fno-unroll-loops" } */
-/* { dg-final { scan-assembler-times "vaddfp" 1 } } */
+/* { dg-final { scan-assembler-times "vaddfp" 2 } } */
 /* { dg-final { scan-assembler-times "xvaddsp" 1 } } */
 /* { dg-final { scan-assembler-times "fadds" 1 } } */
 
@@ -18,10 +18,6 @@
 #error "__VSX__ should not be defined."
 #endif
 
-#pragma GCC target("altivec,vsx")
-#include 
-#pragma GCC reset_options
-
 #pragma GCC push_options
 #pragma GCC target("altivec,no-vsx")
 
@@ -33,6 +29,7 @@
 #error "__VSX__ should not be defined."
 #endif
 
+/* Altivec build, generate vaddfp.  */
 void
 av_add (vector float *a, vector float *b, vector float *c)
 {
@@ -40,10 +37,11 @@ av_add (vector float *a, vector float *b, vector float *c)
   unsigned long n = SIZE / 4;
 
   for (i = 0; i < n; i++)
-a[i] = vec_add (b[i], c[i]);
+a[i] = b[i] + c[i];
 }
 
-#pragma GCC target("vsx")
+/* cpu=power7 must be used to enable VSX.  */
+#pragma GCC target("cpu=power7,vsx")
 
 #ifndef __ALTIVEC__
 #error "__ALTIVEC__ should be defined."
@@ -53,6 +51,7 @@ av_add (vector float *a, vector float *b, vector float *c)
 #error "__VSX__ should be defined."
 #endif
 
+/* VSX build on power7, generate xsaddsp.  */
 void
 vsx_add (vector float *a, vector float *b, vector float *c)
 {
@@ -60,11 +59,31 @@ vsx_add (vector float *a, vector float *b, vector float *c)
   unsigned long n = SIZE / 4;
 
   for (i = 0; i < n; i++)
-a[i] = vec_add (b[i], c[i]);
+a[i] = b[i] + c[i];
+}
+
+#pragma GCC target("cpu=power7,no-vsx")
+
+#ifndef __ALTIVEC__
+#error "__ALTIVEC__ should be defined."
+#endif
+
+#ifdef __VSX__
+#error "__VSX__ should not be defined."

[gcc(refs/users/meissner/heads/work193)] Add rs6000 architecture masks.

2025-02-10 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:c971542277f3cc859fcef2538ed5ef1302e81b5a

commit c971542277f3cc859fcef2538ed5ef1302e81b5a
Author: Michael Meissner 
Date:   Mon Feb 10 13:30:32 2025 -0500

Add rs6000 architecture masks.

This patch begins the journey to move architecture bits that are not user 
ISA
options from rs6000_isa_flags to a new targt variable rs6000_arch_flags.  
The
intention is to remove switches that are currently isa options, but the user
should not be using this particular option. For example, we want users to 
use
-mcpu=power10 and not just -mpower10.

This patch also changes the target_clones support to use an architecture 
mask
instead of isa bits.

This patch also switches the handling of .machine to use architecture masks 
if
they exist (power4 through power11).  All of the other PowerPCs will 
continue to
use the existing code for setting the .machine option.

I have built both big endian and little endian bootstrap compilers and there
were no regressions.

In addition, I constructed a test case that used every archiecture define 
(like
_ARCH_PWR4, etc.) and I also looked at the .machine directive generated.  I 
ran
this test for all supported combinations of -mcpu, big/little endian, and 
32/64
bit support.  Every single instance generated exactly the same code with the
patches installed compared to the compiler before installing the patches.

The only difference in this patch compared to the first version posted on
November 6th is that I the correct attribution and copyright year (i.e. 
that I
created rs6000-arch.def in 2024).

Can I install this patch on the GCC 15 trunk?

2025-02-10  Michael Meissner  

gcc/

* config/rs6000/default64.h (TARGET_CPU_DEFAULT): Set default cpu 
name.
* config/rs6000/rs6000-arch.def: New file.
* config/rs6000/rs6000.cc (struct clone_map): Switch to using
architecture masks instead of ISA masks.
(rs6000_clone_map): Likewise.
(rs6000_print_isa_options): Add an architecture flags argument, 
change
all callers.
(get_arch_flag): New function.
(rs6000_debug_reg_global): Update rs6000_print_isa_options calls.
(rs6000_option_override_internal): Likewise.
(rs6000_machine_from_flags): Switch to using architecture masks 
instead
of ISA masks.
(struct rs6000_arch_mask): New structure.
(rs6000_arch_masks): New table of architecutre masks and names.
(rs6000_function_specific_save): Save architecture flags.
(rs6000_function_specific_restore): Restore architecture flags.
(rs6000_function_specific_print): Update rs6000_print_isa_options 
calls.
(rs6000_print_options_internal): Add architecture flags options.
(rs6000_clone_priority): Switch to using architecture masks instead 
of
ISA masks.
(rs6000_can_inline_p): Don't allow inling if the callee requires a 
newer
architecture than the caller.
* config/rs6000/rs6000.h: Use rs6000-arch.def to create the 
architecture
masks.
* config/rs6000/rs6000.opt (rs6000_arch_flags): New target variable.
(x_rs6000_arch_flags): New save/restore field for rs6000_arch_flags.

Diff:
---
 gcc/config/rs6000/default64.h |  11 ++
 gcc/config/rs6000/rs6000-arch.def |  49 +
 gcc/config/rs6000/rs6000.cc   | 222 +++---
 gcc/config/rs6000/rs6000.h|  24 +
 gcc/config/rs6000/rs6000.opt  |   8 ++
 5 files changed, 277 insertions(+), 37 deletions(-)

diff --git a/gcc/config/rs6000/default64.h b/gcc/config/rs6000/default64.h
index 7f6001ded852..188f5c1d1378 100644
--- a/gcc/config/rs6000/default64.h
+++ b/gcc/config/rs6000/default64.h
@@ -21,6 +21,7 @@ along with GCC; see the file COPYING3.  If not see
 #define RS6000_CPU(NAME, CPU, FLAGS)
 #include "rs6000-cpus.def"
 #undef RS6000_CPU
+#undef TARGET_CPU_DEFAULT
 
 #if (TARGET_DEFAULT & MASK_LITTLE_ENDIAN)
 #undef TARGET_DEFAULT
@@ -28,10 +29,20 @@ along with GCC; see the file COPYING3.  If not see
| MASK_LITTLE_ENDIAN)
 #undef ASM_DEFAULT_SPEC
 #define ASM_DEFAULT_SPEC "-mpower8"
+#define TARGET_CPU_DEFAULT "power8"
+
 #else
 #undef TARGET_DEFAULT
 #define TARGET_DEFAULT (OPTION_MASK_PPC_GFXOPT | OPTION_MASK_PPC_GPOPT \
| OPTION_MASK_MFCRF | MASK_POWERPC64 | MASK_64BIT)
 #undef ASM_DEFAULT_SPEC
 #define ASM_DEFAULT_SPEC "-mpower4"
+
+#if (TARGET_DEFAULT & MASK_POWERPC64)
+#define TARGET_CPU_DEFAULT "powerpc64"
+
+#else
+#define TARGET_CPU_DEFAULT "powerpc"
+#endif
+
 #endif
diff --git a/gcc/config/rs6000/rs6000-arch.def 
b/gcc/config/rs6000/rs6000-arch.def
new file mode 100644
index ..c0dbc5834333
--- /dev/null
+++ b/gcc/config/rs6000/rs6000-arch.def
@

[gcc(refs/users/meissner/heads/work193-dmf)] Merge commit 'refs/users/meissner/heads/work193-dmf' of git+ssh://gcc.gnu.org/git/gcc into me/work19

2025-02-10 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:c6aad9d409ba49e7759ecaf977aa7c8062e4377a

commit c6aad9d409ba49e7759ecaf977aa7c8062e4377a
Merge: 7e0070b25166 acb0f0a57632
Author: Michael Meissner 
Date:   Mon Feb 10 13:34:30 2025 -0500

Merge commit 'refs/users/meissner/heads/work193-dmf' of 
git+ssh://gcc.gnu.org/git/gcc into me/work193-dmf

Diff:


[gcc(refs/users/meissner/heads/work193)] Add -mcpu=future tuning support.

2025-02-10 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:fb5a0aac5a1a442879751a9e9cf85c84780658d4

commit fb5a0aac5a1a442879751a9e9cf85c84780658d4
Author: Michael Meissner 
Date:   Mon Feb 10 13:26:19 2025 -0500

Add -mcpu=future tuning support.

This patch makes -mtune=future use the same tuning decision as 
-mtune=power11.

2025-02-10  Michael Meissner  

gcc/

* config/rs6000/power10.md (all reservations): Add future as an
alterntive to power10 and power11.

Diff:
---
 gcc/config/rs6000/power10.md | 145 ++-
 1 file changed, 73 insertions(+), 72 deletions(-)

diff --git a/gcc/config/rs6000/power10.md b/gcc/config/rs6000/power10.md
index fd31b16b3314..bdd7e58145ba 100644
--- a/gcc/config/rs6000/power10.md
+++ b/gcc/config/rs6000/power10.md
@@ -1,4 +1,5 @@
-;; Scheduling description for the IBM Power10 and Power11 processors.
+;; Scheduling description for the IBM Power10, Power11, and
+;; potential future processors.
 ;; Copyright (C) 2020-2025 Free Software Foundation, Inc.
 ;;
 ;; Contributed by Pat Haugen (pthau...@us.ibm.com).
@@ -97,12 +98,12 @@
(eq_attr "update" "no")
(eq_attr "size" "!128")
(eq_attr "prefixed" "no")
-   (eq_attr "cpu" "power10,power11"))
+   (eq_attr "cpu" "power10,power11,future"))
   "DU_any_power10,LU_power10")
 
 (define_insn_reservation "power10-fused-load" 4
   (and (eq_attr "type" "fused_load_cmpi,fused_addis_load,fused_load_load")
-   (eq_attr "cpu" "power10,power11"))
+   (eq_attr "cpu" "power10,power11,future"))
   "DU_even_power10,LU_power10")
 
 (define_insn_reservation "power10-prefixed-load" 4
@@ -110,13 +111,13 @@
(eq_attr "update" "no")
(eq_attr "size" "!128")
(eq_attr "prefixed" "yes")
-   (eq_attr "cpu" "power10,power11"))
+   (eq_attr "cpu" "power10,power11,future"))
   "DU_even_power10,LU_power10")
 
 (define_insn_reservation "power10-load-update" 4
   (and (eq_attr "type" "load")
(eq_attr "update" "yes")
-   (eq_attr "cpu" "power10,power11"))
+   (eq_attr "cpu" "power10,power11,future"))
   "DU_even_power10,LU_power10+SXU_power10")
 
 (define_insn_reservation "power10-fpload-double" 4
@@ -124,7 +125,7 @@
(eq_attr "update" "no")
(eq_attr "size" "64")
(eq_attr "prefixed" "no")
-   (eq_attr "cpu" "power10,power11"))
+   (eq_attr "cpu" "power10,power11,future"))
   "DU_any_power10,LU_power10")
 
 (define_insn_reservation "power10-prefixed-fpload-double" 4
@@ -132,14 +133,14 @@
(eq_attr "update" "no")
(eq_attr "size" "64")
(eq_attr "prefixed" "yes")
-   (eq_attr "cpu" "power10,power11"))
+   (eq_attr "cpu" "power10,power11,future"))
   "DU_even_power10,LU_power10")
 
 (define_insn_reservation "power10-fpload-update-double" 4
   (and (eq_attr "type" "fpload")
(eq_attr "update" "yes")
(eq_attr "size" "64")
-   (eq_attr "cpu" "power10,power11"))
+   (eq_attr "cpu" "power10,power11,future"))
   "DU_even_power10,LU_power10+SXU_power10")
 
 ; SFmode loads are cracked and have additional 3 cycles over DFmode
@@ -148,27 +149,27 @@
   (and (eq_attr "type" "fpload")
(eq_attr "update" "no")
(eq_attr "size" "32")
-   (eq_attr "cpu" "power10,power11"))
+   (eq_attr "cpu" "power10,power11,future"))
   "DU_even_power10,LU_power10")
 
 (define_insn_reservation "power10-fpload-update-single" 7
   (and (eq_attr "type" "fpload")
(eq_attr "update" "yes")
(eq_attr "size" "32")
-   (eq_attr "cpu" "power10,power11"))
+   (eq_attr "cpu" "power10,power11,future"))
   "DU_even_power10,LU_power10+SXU_power10")
 
 (define_insn_reservation "power10-vecload" 4
   (and (eq_attr "type" "vecload")
(eq_attr "size" "!256")
-   (eq_attr "cpu" "power10,power11"))
+   (eq_attr "cpu" "power10,power11,future"))
   "DU_any_power10,LU_power10")
 
 ; lxvp
 (define_insn_reservation "power10-vecload-pair" 4
   (and (eq_attr "type" "vecload")
(eq_attr "size" "256")
-   (eq_attr "cpu" "power10,power11"))
+   (eq_attr "cpu" "power10,power11,future"))
   "DU_even_power10,LU_power10+SXU_power10")
 
 ; Store Unit
@@ -178,12 +179,12 @@
(eq_attr "prefixed" "no")
(eq_attr "size" "!128")
(eq_attr "size" "!256")
-   (eq_attr "cpu" "power10,power11"))
+   (eq_attr "cpu" "power10,power11,future"))
   "DU_any_power10,STU_power10")
 
 (define_insn_reservation "power10-fused-store" 0
   (and (eq_attr "type" "fused_store_store")
-   (eq_attr "cpu" "power10,power11"))
+   (eq_attr "cpu" "power10,power11,future"))
   "DU_even_power10,STU_power10")
 
 (define_insn_reservation "power10-prefixed-store" 0
@@ -191,52 +192,52 @@
(eq_attr "prefixed" "yes")
(eq_attr "size" "!128")
(eq_attr "size" "!256")
-   (eq_attr "cpu" "power10,power11"))
+   (eq_attr "cpu" "power10,power11,future"))
   "DU_even_power10,STU_power10")
 
 ; Update forms have 2 cycle lat

[gcc(refs/users/meissner/heads/work193)] Use vector pair load/store for memcpy with -mcpu=future

2025-02-10 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:72f9f6a6f46754becdd3570fc83d9c121a265ac4

commit 72f9f6a6f46754becdd3570fc83d9c121a265ac4
Author: Michael Meissner 
Date:   Mon Feb 10 13:27:37 2025 -0500

Use vector pair load/store for memcpy with -mcpu=future

In the development for the power10 processor, GCC did not enable using the 
load
vector pair and store vector pair instructions when optimizing things like
memory copy.  This patch enables using those instructions if -mcpu=future is
used.

2025-02-10  Michael Meissner  

gcc/

* config/rs6000/rs6000-cpus.def (ISA_FUTURE_MASKS_SERVER): Enable 
using
load vector pair and store vector pair instructions for memory copy
operations.
(POWERPC_MASKS): Make the bit for enabling using load vector pair 
and
store vector pair operations set and reset when the PowerPC 
processor is
changed.
* gcc/config/rs6000/rs6000.cc (rs6000_machine_from_flags): Disable
-mblock-ops-vector-pair from influcing .machine selection.

gcc/testsuite/

* gcc.target/powerpc/future-3.c: New test.

Diff:
---
 gcc/config/rs6000/rs6000-cpus.def   |  4 +++-
 gcc/config/rs6000/rs6000.cc |  2 +-
 gcc/testsuite/gcc.target/powerpc/future-3.c | 22 ++
 3 files changed, 26 insertions(+), 2 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-cpus.def 
b/gcc/config/rs6000/rs6000-cpus.def
index 228d0b5e7b54..063591f5c094 100644
--- a/gcc/config/rs6000/rs6000-cpus.def
+++ b/gcc/config/rs6000/rs6000-cpus.def
@@ -84,7 +84,8 @@
  | OPTION_MASK_POWER11)
 
 #define FUTURE_MASKS_SERVER(POWER11_MASKS_SERVER   \
-| OPTION_MASK_FUTURE)
+| OPTION_MASK_FUTURE   \
+| OPTION_MASK_BLOCK_OPS_VECTOR_PAIR)
 
 /* Flags that need to be turned off if -mno-vsx.  */
 #define OTHER_VSX_VECTOR_MASKS (OPTION_MASK_EFFICIENT_UNALIGNED_VSX\
@@ -114,6 +115,7 @@
 
 /* Mask of all options to set the default isa flags based on -mcpu=.  */
 #define POWERPC_MASKS  (OPTION_MASK_ALTIVEC\
+| OPTION_MASK_BLOCK_OPS_VECTOR_PAIR\
 | OPTION_MASK_CMPB \
 | OPTION_MASK_CRYPTO   \
 | OPTION_MASK_DFP  \
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index f3586f2f1e17..3c0c6e1a74d0 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -5908,7 +5908,7 @@ rs6000_machine_from_flags (void)
 
   /* Disable the flags that should never influence the .machine selection.  */
   flags &= ~(OPTION_MASK_PPC_GFXOPT | OPTION_MASK_PPC_GPOPT | OPTION_MASK_ISEL
-| OPTION_MASK_ALTIVEC);
+| OPTION_MASK_ALTIVEC | OPTION_MASK_BLOCK_OPS_VECTOR_PAIR);
 
   if ((flags & (FUTURE_MASKS_SERVER & ~ISA_3_1_MASKS_SERVER)) != 0)
 return "future";
diff --git a/gcc/testsuite/gcc.target/powerpc/future-3.c 
b/gcc/testsuite/gcc.target/powerpc/future-3.c
new file mode 100644
index ..afa8b96d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/future-3.c
@@ -0,0 +1,22 @@
+/* 32-bit doesn't generate vector pair instructions.  */
+/* { dg-do compile { target lp64 } } */
+/* { dg-options "-mdejagnu-cpu=future -O2" } */
+
+/* Test to see that memcpy will use load/store vector pair with
+   -mcpu=future.  */
+
+#ifndef SIZE
+#define SIZE 4
+#endif
+
+extern vector double to[SIZE], from[SIZE];
+
+void
+copy (void)
+{
+  __builtin_memcpy (to, from, sizeof (to));
+  return;
+}
+
+/* { dg-final { scan-assembler {\mlxvpx?\M}  } } */
+/* { dg-final { scan-assembler {\mstxvpx?\M} } } */


[gcc(refs/users/meissner/heads/work193)] Use architecture flags for defining _ARCH_PWR macros.

2025-02-10 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:1d50d31abc06ee37e3205fc88d5d7b546be71e55

commit 1d50d31abc06ee37e3205fc88d5d7b546be71e55
Author: Michael Meissner 
Date:   Mon Feb 10 13:31:17 2025 -0500

Use architecture flags for defining _ARCH_PWR macros.

For the newer architectures, this patch changes GCC to define the 
_ARCH_PWR
macros using the new architecture flags instead of relying on isa options 
like
-mpower10.

The -mpower8-internal, -mpower10, -mpower11, and -mfuture options were 
removed.
The -mpower11 and -mfuture options were removed completely, since they were 
just
added in GCC 15. The other two options were marked as WarnRemoved, and the
various ISA bits were removed.

TARGET_POWER8, TARGET_POWER10, TARGET_POWER11, and TARGET_FUTURE were 
re-defined
to use the architeture bits instead of the ISA bits.

There are other internal isa bits that aren't removed with this patch 
because
the built-in function support uses those bits.

I have built both big endian and little endian bootstrap compilers and there
were no regressions.

Can I install this patch on the GCC 15 trunk?

2025-02-10  Michael Meissner  

gcc/

* config/rs6000/rs6000-c.cc (rs6000_target_modify_macros) Add 
support to
use architecture flags instead of ISA flags for setting most of the
_ARCH_PWR* macros.
(rs6000_cpu_cpp_builtins): Update rs6000_target_modify_macros call.
* config/rs6000/rs6000-cpus.def (ISA_2_7_MASKS_SERVER): Remove
OPTION_MASK_POWER8.
(ISA_3_1_MASKS_SERVER): Remove OPTION_MASK_POWER10.
(POWER11_MASKS_SERVER): Remove OPTION_MASK_POWER11.
(FUTURE_MASKS_SERVER): Remove OPTION_MASK_FUTURE.
(POWERPC_MASKS): Remove OPTION_MASK_POWER8, OPTION_MASK_POWER10,
OPTION_MASK_POWER11, and OPTION_MASK_FUTURE.
* config/rs6000/rs6000-protos.h (rs6000_target_modify_macros): 
Update
declaration.
(rs6000_target_modify_macros_ptr): Likewise.
* config/rs6000/rs6000.cc (rs6000_target_modify_macros_ptr): 
Likewise.
(rs6000_option_override_internal): Use architecture flags instead 
of ISA
flags.
(rs6000_opt_masks): Remove -mpower10, -mpower11, and -mfuture which 
are
no longer in the ISA flags.
(rs6000_pragma_target_parse): Use architecture flags as well as ISA
flags.
* config/rs6000/rs6000.h (TARGET_POWER5): Redefine to use 
architecture
flags.
(TARGET_POWER5X): Likewise.
(TARGET_POWER6): Likewise.
(TARGET_POWER7): Likewise.
(TARGET_POWER8): Likewise.
(TARGET_POWER9): Likewise.
(TARGET_POWER10): New macro.
(TARGET_POWER11): Likewise.
(TARGET_FUTURE): Likewise.
* config/rs6000/rs6000.opt (-mpower8-internal): Remove ISA flag 
bits.
(-mpower10): Likewise.
(-mpower11): Likewise.
(-mfuture): Likewise.

Diff:
---
 gcc/config/rs6000/rs6000-c.cc | 29 -
 gcc/config/rs6000/rs6000-cpus.def | 10 +-
 gcc/config/rs6000/rs6000-protos.h |  5 +++--
 gcc/config/rs6000/rs6000.cc   | 20 +++-
 gcc/config/rs6000/rs6000.h| 19 +--
 gcc/config/rs6000/rs6000.opt  | 17 ++---
 6 files changed, 46 insertions(+), 54 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-c.cc b/gcc/config/rs6000/rs6000-c.cc
index 6757a2477ad1..6d6838735b33 100644
--- a/gcc/config/rs6000/rs6000-c.cc
+++ b/gcc/config/rs6000/rs6000-c.cc
@@ -338,7 +338,8 @@ rs6000_define_or_undefine_macro (bool define_p, const char 
*name)
#pragma GCC target, we need to adjust the macros dynamically.  */
 
 void
-rs6000_target_modify_macros (bool define_p, HOST_WIDE_INT flags)
+rs6000_target_modify_macros (bool define_p, HOST_WIDE_INT flags,
+HOST_WIDE_INT arch_flags)
 {
   if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
 fprintf (stderr,
@@ -411,7 +412,7 @@ rs6000_target_modify_macros (bool define_p, HOST_WIDE_INT 
flags)
summary of the flags associated with particular cpu
definitions.  */
 
-  /* rs6000_isa_flags based options.  */
+  /* rs6000_isa_flags and rs6000_arch_flags based options.  */
   rs6000_define_or_undefine_macro (define_p, "_ARCH_PPC");
   if ((flags & OPTION_MASK_PPC_GPOPT) != 0)
 rs6000_define_or_undefine_macro (define_p, "_ARCH_PPCSQ");
@@ -419,25 +420,27 @@ rs6000_target_modify_macros (bool define_p, HOST_WIDE_INT 
flags)
 rs6000_define_or_undefine_macro (define_p, "_ARCH_PPCGR");
   if ((flags & OPTION_MASK_POWERPC64) != 0)
 rs6000_define_or_undefine_macro (define_p, "_ARCH_PPC64");
-  if ((flags & OPTION_MASK_MFCRF) != 0)
+  if ((flags & OPTION_MASK_POWERPC64) != 0)
+rs6000_define_or_undefine_macro (define_p, "_ARCH_PPC64");

[gcc(refs/users/meissner/heads/work193)] Update ChangeLog.*

2025-02-10 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:bdec92406ad418f6adf6bf8eb87f30d1cd09c00b

commit bdec92406ad418f6adf6bf8eb87f30d1cd09c00b
Author: Michael Meissner 
Date:   Mon Feb 10 13:33:25 2025 -0500

Update ChangeLog.*

Diff:
---
 gcc/ChangeLog.meissner | 435 +
 1 file changed, 435 insertions(+)

diff --git a/gcc/ChangeLog.meissner b/gcc/ChangeLog.meissner
index ce6974218485..b40bb93380c5 100644
--- a/gcc/ChangeLog.meissner
+++ b/gcc/ChangeLog.meissner
@@ -1,5 +1,440 @@
+ Branch work193, patch #31 
+
+Use architecture flags for defining _ARCH_PWR macros.
+
+For the newer architectures, this patch changes GCC to define the _ARCH_PWR
+macros using the new architecture flags instead of relying on isa options like
+-mpower10.
+
+The -mpower8-internal, -mpower10, -mpower11, and -mfuture options were removed.
+The -mpower11 and -mfuture options were removed completely, since they were 
just
+added in GCC 15. The other two options were marked as WarnRemoved, and the
+various ISA bits were removed.
+
+TARGET_POWER8, TARGET_POWER10, TARGET_POWER11, and TARGET_FUTURE were 
re-defined
+to use the architeture bits instead of the ISA bits.
+
+There are other internal isa bits that aren't removed with this patch because
+the built-in function support uses those bits.
+
+I have built both big endian and little endian bootstrap compilers and there
+were no regressions.
+
+Can I install this patch on the GCC 15 trunk?
+
+2025-02-10  Michael Meissner  
+
+gcc/
+
+   * config/rs6000/rs6000-c.cc (rs6000_target_modify_macros) Add support to
+   use architecture flags instead of ISA flags for setting most of the
+   _ARCH_PWR* macros.
+   (rs6000_cpu_cpp_builtins): Update rs6000_target_modify_macros call.
+   * config/rs6000/rs6000-cpus.def (ISA_2_7_MASKS_SERVER): Remove
+   OPTION_MASK_POWER8.
+   (ISA_3_1_MASKS_SERVER): Remove OPTION_MASK_POWER10.
+   (POWER11_MASKS_SERVER): Remove OPTION_MASK_POWER11.
+   (FUTURE_MASKS_SERVER): Remove OPTION_MASK_FUTURE.
+   (POWERPC_MASKS): Remove OPTION_MASK_POWER8, OPTION_MASK_POWER10,
+   OPTION_MASK_POWER11, and OPTION_MASK_FUTURE.
+   * config/rs6000/rs6000-protos.h (rs6000_target_modify_macros): Update
+   declaration.
+   (rs6000_target_modify_macros_ptr): Likewise.
+   * config/rs6000/rs6000.cc (rs6000_target_modify_macros_ptr): Likewise.
+   (rs6000_option_override_internal): Use architecture flags instead of ISA
+   flags.
+   (rs6000_opt_masks): Remove -mpower10, -mpower11, and -mfuture which are
+   no longer in the ISA flags.
+   (rs6000_pragma_target_parse): Use architecture flags as well as ISA
+   flags.
+   * config/rs6000/rs6000.h (TARGET_POWER5): Redefine to use architecture
+   flags.
+   (TARGET_POWER5X): Likewise.
+   (TARGET_POWER6): Likewise.
+   (TARGET_POWER7): Likewise.
+   (TARGET_POWER8): Likewise.
+   (TARGET_POWER9): Likewise.
+   (TARGET_POWER10): New macro.
+   (TARGET_POWER11): Likewise.
+   (TARGET_FUTURE): Likewise.
+   * config/rs6000/rs6000.opt (-mpower8-internal): Remove ISA flag bits.
+   (-mpower10): Likewise.
+   (-mpower11): Likewise.
+   (-mfuture): Likewise.
+
+ Branch work193, patch #30 
+
+Add rs6000 architecture masks.
+
+This patch begins the journey to move architecture bits that are not user ISA
+options from rs6000_isa_flags to a new targt variable rs6000_arch_flags.  The
+intention is to remove switches that are currently isa options, but the user
+should not be using this particular option. For example, we want users to use
+-mcpu=power10 and not just -mpower10.
+
+This patch also changes the target_clones support to use an architecture mask
+instead of isa bits.
+
+This patch also switches the handling of .machine to use architecture masks if
+they exist (power4 through power11).  All of the other PowerPCs will continue 
to
+use the existing code for setting the .machine option.
+
+I have built both big endian and little endian bootstrap compilers and there
+were no regressions.
+
+In addition, I constructed a test case that used every archiecture define (like
+_ARCH_PWR4, etc.) and I also looked at the .machine directive generated.  I ran
+this test for all supported combinations of -mcpu, big/little endian, and 32/64
+bit support.  Every single instance generated exactly the same code with the
+patches installed compared to the compiler before installing the patches.
+
+The only difference in this patch compared to the first version posted on
+November 6th is that I the correct attribution and copyright year (i.e. that I
+created rs6000-arch.def in 2024).
+
+Can I install this patch on the GCC 15 trunk?
+
+2025-02-10  Michael Meissner  
+
+gcc/
+
+   * config/rs6000/default64.h (TARGET_CPU_DEFAULT): Set default cpu name.
+   * config/rs6000/rs6000-arch.def: New file.
+   * conf

[gcc/meissner/heads/work193-bugs] (15 commits) Merge commit 'refs/users/meissner/heads/work193-bugs' of gi

2025-02-10 Thread Michael Meissner via Gcc-cvs
The branch 'meissner/heads/work193-bugs' was updated to point to:

 1b9ca11ca4cc... Merge commit 'refs/users/meissner/heads/work193-bugs' of gi

It previously pointed to:

 22f27cbf845b... Add ChangeLog.bugs and update REVISION.

Diff:

Summary of changes (added commits):
---

  1b9ca11... Merge commit 'refs/users/meissner/heads/work193-bugs' of gi
  e6da74b... Add ChangeLog.bugs and update REVISION.
  bdec924... Update ChangeLog.* (*)
  1d50d31... Use architecture flags for defining _ARCH_PWR macros. (*)
  c971542... Add rs6000 architecture masks. (*)
  ca3f83e... Do not allow -mvsx to boost processor to power7. (*)
  72f9f6a... Use vector pair load/store for memcpy with -mcpu=future (*)
  41d43fa... Add -mcpu=future tests. (*)
  fb5a0aa... Add -mcpu=future tuning support. (*)
  fe5752a... Add support for -mcpu=future (*)
  786fa7e... Change TARGET_MODULO to TARGET_POWER9. (*)
  c71e1b6... Change TARGET_POPCNTD to TARGET_POWER7. (*)
  4f65877... Change TARGET_CMPB to TARGET_POWER6. (*)
  b6bac5b... Change TARGET_FPRND to TARGET_POWER5X. (*)
  ab2c7aa... Change TARGET_POPCNTB to TARGET_POWER5. (*)

(*) This commit already exists in another branch.
Because the reference `refs/users/meissner/heads/work193-bugs' matches
your hooks.email-new-commits-only configuration,
no separate email is sent for this commit.


[gcc(refs/users/meissner/heads/work193-bugs)] Merge commit 'refs/users/meissner/heads/work193-bugs' of git+ssh://gcc.gnu.org/git/gcc into me/work1

2025-02-10 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:1b9ca11ca4cc175924fb8180cc1858919af81c53

commit 1b9ca11ca4cc175924fb8180cc1858919af81c53
Merge: e6da74bf3f29 22f27cbf845b
Author: Michael Meissner 
Date:   Mon Feb 10 13:33:45 2025 -0500

Merge commit 'refs/users/meissner/heads/work193-bugs' of 
git+ssh://gcc.gnu.org/git/gcc into me/work193-bugs

Diff:


[gcc(refs/users/meissner/heads/work193-bugs)] Add ChangeLog.bugs and update REVISION.

2025-02-10 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:e6da74bf3f29a5b9cd9a67f62b4bc5ac57b53289

commit e6da74bf3f29a5b9cd9a67f62b4bc5ac57b53289
Author: Michael Meissner 
Date:   Mon Feb 10 13:14:22 2025 -0500

Add ChangeLog.bugs and update REVISION.

2025-02-10  Michael Meissner  

gcc/

* ChangeLog.bugs: New file for branch.
* REVISION: Update.

Diff:
---
 gcc/ChangeLog.bugs | 5 +
 gcc/REVISION   | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog.bugs b/gcc/ChangeLog.bugs
new file mode 100644
index ..d6cb192a2cf3
--- /dev/null
+++ b/gcc/ChangeLog.bugs
@@ -0,0 +1,5 @@
+ Branch work193-bugs, baseline 
+
+2025-02-10   Michael Meissner  
+
+   Clone branch
diff --git a/gcc/REVISION b/gcc/REVISION
index 721d4c312fe3..211d21a037a6 100644
--- a/gcc/REVISION
+++ b/gcc/REVISION
@@ -1 +1 @@
-work193 branch
+work193-bugs branch


[gcc/meissner/heads/work193-dmf] (15 commits) Merge commit 'refs/users/meissner/heads/work193-dmf' of git

2025-02-10 Thread Michael Meissner via Gcc-cvs
The branch 'meissner/heads/work193-dmf' was updated to point to:

 c6aad9d409ba... Merge commit 'refs/users/meissner/heads/work193-dmf' of git

It previously pointed to:

 acb0f0a57632... Add ChangeLog.dmf and update REVISION.

Diff:

Summary of changes (added commits):
---

  c6aad9d... Merge commit 'refs/users/meissner/heads/work193-dmf' of git
  7e0070b... Add ChangeLog.dmf and update REVISION.
  bdec924... Update ChangeLog.* (*)
  1d50d31... Use architecture flags for defining _ARCH_PWR macros. (*)
  c971542... Add rs6000 architecture masks. (*)
  ca3f83e... Do not allow -mvsx to boost processor to power7. (*)
  72f9f6a... Use vector pair load/store for memcpy with -mcpu=future (*)
  41d43fa... Add -mcpu=future tests. (*)
  fb5a0aa... Add -mcpu=future tuning support. (*)
  fe5752a... Add support for -mcpu=future (*)
  786fa7e... Change TARGET_MODULO to TARGET_POWER9. (*)
  c71e1b6... Change TARGET_POPCNTD to TARGET_POWER7. (*)
  4f65877... Change TARGET_CMPB to TARGET_POWER6. (*)
  b6bac5b... Change TARGET_FPRND to TARGET_POWER5X. (*)
  ab2c7aa... Change TARGET_POPCNTB to TARGET_POWER5. (*)

(*) This commit already exists in another branch.
Because the reference `refs/users/meissner/heads/work193-dmf' matches
your hooks.email-new-commits-only configuration,
no separate email is sent for this commit.


[gcc(refs/users/meissner/heads/work193-dmf)] Add ChangeLog.dmf and update REVISION.

2025-02-10 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:7e0070b251669d4dff366217dbced4a0145157f9

commit 7e0070b251669d4dff366217dbced4a0145157f9
Author: Michael Meissner 
Date:   Mon Feb 10 13:12:38 2025 -0500

Add ChangeLog.dmf and update REVISION.

2025-02-10  Michael Meissner  

gcc/

* ChangeLog.dmf: New file for branch.
* REVISION: Update.

Diff:
---
 gcc/ChangeLog.dmf | 5 +
 gcc/REVISION  | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog.dmf b/gcc/ChangeLog.dmf
new file mode 100644
index ..76480163c36a
--- /dev/null
+++ b/gcc/ChangeLog.dmf
@@ -0,0 +1,5 @@
+ Branch work193-dmf, baseline 
+
+2025-02-10   Michael Meissner  
+
+   Clone branch
diff --git a/gcc/REVISION b/gcc/REVISION
index 721d4c312fe3..982dab320ac0 100644
--- a/gcc/REVISION
+++ b/gcc/REVISION
@@ -1 +1 @@
-work193 branch
+work193-dmf branch


[gcc/meissner/heads/work193-math] (15 commits) Merge commit 'refs/users/meissner/heads/work193-math' of gi

2025-02-10 Thread Michael Meissner via Gcc-cvs
The branch 'meissner/heads/work193-math' was updated to point to:

 a04dbcf268aa... Merge commit 'refs/users/meissner/heads/work193-math' of gi

It previously pointed to:

 14cb9df9b30f... Add ChangeLog.math and update REVISION.

Diff:

Summary of changes (added commits):
---

  a04dbcf... Merge commit 'refs/users/meissner/heads/work193-math' of gi
  74dc49e... Add ChangeLog.math and update REVISION.
  bdec924... Update ChangeLog.* (*)
  1d50d31... Use architecture flags for defining _ARCH_PWR macros. (*)
  c971542... Add rs6000 architecture masks. (*)
  ca3f83e... Do not allow -mvsx to boost processor to power7. (*)
  72f9f6a... Use vector pair load/store for memcpy with -mcpu=future (*)
  41d43fa... Add -mcpu=future tests. (*)
  fb5a0aa... Add -mcpu=future tuning support. (*)
  fe5752a... Add support for -mcpu=future (*)
  786fa7e... Change TARGET_MODULO to TARGET_POWER9. (*)
  c71e1b6... Change TARGET_POPCNTD to TARGET_POWER7. (*)
  4f65877... Change TARGET_CMPB to TARGET_POWER6. (*)
  b6bac5b... Change TARGET_FPRND to TARGET_POWER5X. (*)
  ab2c7aa... Change TARGET_POPCNTB to TARGET_POWER5. (*)

(*) This commit already exists in another branch.
Because the reference `refs/users/meissner/heads/work193-math' matches
your hooks.email-new-commits-only configuration,
no separate email is sent for this commit.


[gcc(refs/users/meissner/heads/work193-math)] Add ChangeLog.math and update REVISION.

2025-02-10 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:74dc49eefad8eb7958cf1de83e9d2c069eee889d

commit 74dc49eefad8eb7958cf1de83e9d2c069eee889d
Author: Michael Meissner 
Date:   Mon Feb 10 13:17:41 2025 -0500

Add ChangeLog.math and update REVISION.

2025-02-10  Michael Meissner  

gcc/

* ChangeLog.math: New file for branch.
* REVISION: Update.

Diff:
---
 gcc/ChangeLog.math | 5 +
 gcc/REVISION   | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog.math b/gcc/ChangeLog.math
new file mode 100644
index ..c08127e850ef
--- /dev/null
+++ b/gcc/ChangeLog.math
@@ -0,0 +1,5 @@
+ Branch work193-math, baseline 
+
+2025-02-10   Michael Meissner  
+
+   Clone branch
diff --git a/gcc/REVISION b/gcc/REVISION
index 721d4c312fe3..2035f7979ea2 100644
--- a/gcc/REVISION
+++ b/gcc/REVISION
@@ -1 +1 @@
-work193 branch
+work193-math branch


[gcc(refs/users/meissner/heads/work193-libs)] Add ChangeLog.libs and update REVISION.

2025-02-10 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:2b223e53b43c1ee25c30c9de695ea96d66169c25

commit 2b223e53b43c1ee25c30c9de695ea96d66169c25
Author: Michael Meissner 
Date:   Mon Feb 10 13:15:10 2025 -0500

Add ChangeLog.libs and update REVISION.

2025-02-10  Michael Meissner  

gcc/

* ChangeLog.libs: New file for branch.
* REVISION: Update.

Diff:
---
 gcc/ChangeLog.libs | 5 +
 gcc/REVISION   | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog.libs b/gcc/ChangeLog.libs
new file mode 100644
index ..c1a193f23a62
--- /dev/null
+++ b/gcc/ChangeLog.libs
@@ -0,0 +1,5 @@
+ Branch work193-libs, baseline 
+
+2025-02-10   Michael Meissner  
+
+   Clone branch
diff --git a/gcc/REVISION b/gcc/REVISION
index 721d4c312fe3..d82e18128d4f 100644
--- a/gcc/REVISION
+++ b/gcc/REVISION
@@ -1 +1 @@
-work193 branch
+work193-libs branch


[gcc(refs/users/meissner/heads/work193-libs)] Merge commit 'refs/users/meissner/heads/work193-libs' of git+ssh://gcc.gnu.org/git/gcc into me/work1

2025-02-10 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:5bddaf5dd4e63a0591031519a7c2df65292ffd0f

commit 5bddaf5dd4e63a0591031519a7c2df65292ffd0f
Merge: 2b223e53b43c db801a6c0d7e
Author: Michael Meissner 
Date:   Mon Feb 10 13:37:19 2025 -0500

Merge commit 'refs/users/meissner/heads/work193-libs' of 
git+ssh://gcc.gnu.org/git/gcc into me/work193-libs

Diff:


[gcc/meissner/heads/work193-sha] (15 commits) Merge commit 'refs/users/meissner/heads/work193-sha' of git

2025-02-10 Thread Michael Meissner via Gcc-cvs
The branch 'meissner/heads/work193-sha' was updated to point to:

 88e0eb46db60... Merge commit 'refs/users/meissner/heads/work193-sha' of git

It previously pointed to:

 9dee8398d4b3... Add ChangeLog.sha and update REVISION.

Diff:

Summary of changes (added commits):
---

  88e0eb4... Merge commit 'refs/users/meissner/heads/work193-sha' of git
  0b35404... Add ChangeLog.sha and update REVISION.
  bdec924... Update ChangeLog.* (*)
  1d50d31... Use architecture flags for defining _ARCH_PWR macros. (*)
  c971542... Add rs6000 architecture masks. (*)
  ca3f83e... Do not allow -mvsx to boost processor to power7. (*)
  72f9f6a... Use vector pair load/store for memcpy with -mcpu=future (*)
  41d43fa... Add -mcpu=future tests. (*)
  fb5a0aa... Add -mcpu=future tuning support. (*)
  fe5752a... Add support for -mcpu=future (*)
  786fa7e... Change TARGET_MODULO to TARGET_POWER9. (*)
  c71e1b6... Change TARGET_POPCNTD to TARGET_POWER7. (*)
  4f65877... Change TARGET_CMPB to TARGET_POWER6. (*)
  b6bac5b... Change TARGET_FPRND to TARGET_POWER5X. (*)
  ab2c7aa... Change TARGET_POPCNTB to TARGET_POWER5. (*)

(*) This commit already exists in another branch.
Because the reference `refs/users/meissner/heads/work193-sha' matches
your hooks.email-new-commits-only configuration,
no separate email is sent for this commit.


[gcc(refs/users/meissner/heads/work193-math)] Merge commit 'refs/users/meissner/heads/work193-math' of git+ssh://gcc.gnu.org/git/gcc into me/work1

2025-02-10 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:a04dbcf268aad9cd6a349daba17842df60168c2d

commit a04dbcf268aad9cd6a349daba17842df60168c2d
Merge: 74dc49eefad8 14cb9df9b30f
Author: Michael Meissner 
Date:   Mon Feb 10 13:40:00 2025 -0500

Merge commit 'refs/users/meissner/heads/work193-math' of 
git+ssh://gcc.gnu.org/git/gcc into me/work193-math

Diff:


[gcc(refs/users/meissner/heads/work193-sha)] Add ChangeLog.sha and update REVISION.

2025-02-10 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:0b35404c79077a4dcf029c5c5e6fc8deb52122b9

commit 0b35404c79077a4dcf029c5c5e6fc8deb52122b9
Author: Michael Meissner 
Date:   Mon Feb 10 13:16:04 2025 -0500

Add ChangeLog.sha and update REVISION.

2025-02-10  Michael Meissner  

gcc/

* ChangeLog.sha: New file for branch.
* REVISION: Update.

Diff:
---
 gcc/ChangeLog.sha | 5 +
 gcc/REVISION  | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog.sha b/gcc/ChangeLog.sha
new file mode 100644
index ..ff4861cd436b
--- /dev/null
+++ b/gcc/ChangeLog.sha
@@ -0,0 +1,5 @@
+ Branch work193-sha, baseline 
+
+2025-02-10   Michael Meissner  
+
+   Clone branch
diff --git a/gcc/REVISION b/gcc/REVISION
index 721d4c312fe3..d23d27743a1a 100644
--- a/gcc/REVISION
+++ b/gcc/REVISION
@@ -1 +1 @@
-work193 branch
+work193-sha branch


[gcc(refs/users/meissner/heads/work193-test)] Add ChangeLog.test and update REVISION.

2025-02-10 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:d59bc281d5ad579e14bcc2640e596a70aac2550e

commit d59bc281d5ad579e14bcc2640e596a70aac2550e
Author: Michael Meissner 
Date:   Mon Feb 10 13:16:51 2025 -0500

Add ChangeLog.test and update REVISION.

2025-02-10  Michael Meissner  

gcc/

* ChangeLog.test: New file for branch.
* REVISION: Update.

Diff:
---
 gcc/ChangeLog.test | 5 +
 gcc/REVISION   | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog.test b/gcc/ChangeLog.test
new file mode 100644
index ..1aa763f78d87
--- /dev/null
+++ b/gcc/ChangeLog.test
@@ -0,0 +1,5 @@
+ Branch work193-test, baseline 
+
+2025-02-10   Michael Meissner  
+
+   Clone branch
diff --git a/gcc/REVISION b/gcc/REVISION
index 721d4c312fe3..8d47aa64eb1f 100644
--- a/gcc/REVISION
+++ b/gcc/REVISION
@@ -1 +1 @@
-work193 branch
+work193-test branch


[gcc(refs/users/meissner/heads/work193-sha)] Merge commit 'refs/users/meissner/heads/work193-sha' of git+ssh://gcc.gnu.org/git/gcc into me/work19

2025-02-10 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:88e0eb46db60c58b59729ff43b8a45bee033f6c5

commit 88e0eb46db60c58b59729ff43b8a45bee033f6c5
Merge: 0b35404c7907 9dee8398d4b3
Author: Michael Meissner 
Date:   Mon Feb 10 13:41:00 2025 -0500

Merge commit 'refs/users/meissner/heads/work193-sha' of 
git+ssh://gcc.gnu.org/git/gcc into me/work193-sha

Diff:


[gcc/meissner/heads/work193-test] (15 commits) Merge commit 'refs/users/meissner/heads/work193-test' of gi

2025-02-10 Thread Michael Meissner via Gcc-cvs
The branch 'meissner/heads/work193-test' was updated to point to:

 06d708b6db39... Merge commit 'refs/users/meissner/heads/work193-test' of gi

It previously pointed to:

 07ce8e18f65d... Add ChangeLog.test and update REVISION.

Diff:

Summary of changes (added commits):
---

  06d708b... Merge commit 'refs/users/meissner/heads/work193-test' of gi
  d59bc28... Add ChangeLog.test and update REVISION.
  bdec924... Update ChangeLog.* (*)
  1d50d31... Use architecture flags for defining _ARCH_PWR macros. (*)
  c971542... Add rs6000 architecture masks. (*)
  ca3f83e... Do not allow -mvsx to boost processor to power7. (*)
  72f9f6a... Use vector pair load/store for memcpy with -mcpu=future (*)
  41d43fa... Add -mcpu=future tests. (*)
  fb5a0aa... Add -mcpu=future tuning support. (*)
  fe5752a... Add support for -mcpu=future (*)
  786fa7e... Change TARGET_MODULO to TARGET_POWER9. (*)
  c71e1b6... Change TARGET_POPCNTD to TARGET_POWER7. (*)
  4f65877... Change TARGET_CMPB to TARGET_POWER6. (*)
  b6bac5b... Change TARGET_FPRND to TARGET_POWER5X. (*)
  ab2c7aa... Change TARGET_POPCNTB to TARGET_POWER5. (*)

(*) This commit already exists in another branch.
Because the reference `refs/users/meissner/heads/work193-test' matches
your hooks.email-new-commits-only configuration,
no separate email is sent for this commit.


[gcc(refs/users/meissner/heads/work193-test)] Merge commit 'refs/users/meissner/heads/work193-test' of git+ssh://gcc.gnu.org/git/gcc into me/work1

2025-02-10 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:06d708b6db3956b1f791eabf04d9b747093065b2

commit 06d708b6db3956b1f791eabf04d9b747093065b2
Merge: d59bc281d5ad 07ce8e18f65d
Author: Michael Meissner 
Date:   Mon Feb 10 13:41:52 2025 -0500

Merge commit 'refs/users/meissner/heads/work193-test' of 
git+ssh://gcc.gnu.org/git/gcc into me/work193-test

Diff:


[gcc(refs/users/meissner/heads/work193-vpair)] Add ChangeLog.vpair and update REVISION.

2025-02-10 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:675bbd34db6bee420456ebef9c68b5745b03c3d3

commit 675bbd34db6bee420456ebef9c68b5745b03c3d3
Author: Michael Meissner 
Date:   Mon Feb 10 13:13:28 2025 -0500

Add ChangeLog.vpair and update REVISION.

2025-02-10  Michael Meissner  

gcc/

* ChangeLog.vpair: New file for branch.
* REVISION: Update.

Diff:
---
 gcc/ChangeLog.vpair | 5 +
 gcc/REVISION| 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog.vpair b/gcc/ChangeLog.vpair
new file mode 100644
index ..4cad69c75ae7
--- /dev/null
+++ b/gcc/ChangeLog.vpair
@@ -0,0 +1,5 @@
+ Branch work193-vpair, baseline 
+
+2025-02-10   Michael Meissner  
+
+   Clone branch
diff --git a/gcc/REVISION b/gcc/REVISION
index 721d4c312fe3..ab4953dfc953 100644
--- a/gcc/REVISION
+++ b/gcc/REVISION
@@ -1 +1 @@
-work193 branch
+work193-vpair branch


[gcc(refs/users/meissner/heads/work193-vpair)] Merge commit 'refs/users/meissner/heads/work193-vpair' of git+ssh://gcc.gnu.org/git/gcc into me/work

2025-02-10 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:80a327f8ff75b894b5f74eadd253e4b9fb4f0225

commit 80a327f8ff75b894b5f74eadd253e4b9fb4f0225
Merge: 675bbd34db6b 21667fdfb9f9
Author: Michael Meissner 
Date:   Mon Feb 10 13:42:47 2025 -0500

Merge commit 'refs/users/meissner/heads/work193-vpair' of 
git+ssh://gcc.gnu.org/git/gcc into me/work193-vpair

Diff:


[gcc/meissner/heads/work193-vpair] (15 commits) Merge commit 'refs/users/meissner/heads/work193-vpair' of g

2025-02-10 Thread Michael Meissner via Gcc-cvs
The branch 'meissner/heads/work193-vpair' was updated to point to:

 80a327f8ff75... Merge commit 'refs/users/meissner/heads/work193-vpair' of g

It previously pointed to:

 21667fdfb9f9... Add ChangeLog.vpair and update REVISION.

Diff:

Summary of changes (added commits):
---

  80a327f... Merge commit 'refs/users/meissner/heads/work193-vpair' of g
  675bbd3... Add ChangeLog.vpair and update REVISION.
  bdec924... Update ChangeLog.* (*)
  1d50d31... Use architecture flags for defining _ARCH_PWR macros. (*)
  c971542... Add rs6000 architecture masks. (*)
  ca3f83e... Do not allow -mvsx to boost processor to power7. (*)
  72f9f6a... Use vector pair load/store for memcpy with -mcpu=future (*)
  41d43fa... Add -mcpu=future tests. (*)
  fb5a0aa... Add -mcpu=future tuning support. (*)
  fe5752a... Add support for -mcpu=future (*)
  786fa7e... Change TARGET_MODULO to TARGET_POWER9. (*)
  c71e1b6... Change TARGET_POPCNTD to TARGET_POWER7. (*)
  4f65877... Change TARGET_CMPB to TARGET_POWER6. (*)
  b6bac5b... Change TARGET_FPRND to TARGET_POWER5X. (*)
  ab2c7aa... Change TARGET_POPCNTB to TARGET_POWER5. (*)

(*) This commit already exists in another branch.
Because the reference `refs/users/meissner/heads/work193-vpair' matches
your hooks.email-new-commits-only configuration,
no separate email is sent for this commit.


[gcc/meissner/heads/work193-libs] (15 commits) Merge commit 'refs/users/meissner/heads/work193-libs' of gi

2025-02-10 Thread Michael Meissner via Gcc-cvs
The branch 'meissner/heads/work193-libs' was updated to point to:

 5bddaf5dd4e6... Merge commit 'refs/users/meissner/heads/work193-libs' of gi

It previously pointed to:

 db801a6c0d7e... Add ChangeLog.libs and update REVISION.

Diff:

Summary of changes (added commits):
---

  5bddaf5... Merge commit 'refs/users/meissner/heads/work193-libs' of gi
  2b223e5... Add ChangeLog.libs and update REVISION.
  bdec924... Update ChangeLog.* (*)
  1d50d31... Use architecture flags for defining _ARCH_PWR macros. (*)
  c971542... Add rs6000 architecture masks. (*)
  ca3f83e... Do not allow -mvsx to boost processor to power7. (*)
  72f9f6a... Use vector pair load/store for memcpy with -mcpu=future (*)
  41d43fa... Add -mcpu=future tests. (*)
  fb5a0aa... Add -mcpu=future tuning support. (*)
  fe5752a... Add support for -mcpu=future (*)
  786fa7e... Change TARGET_MODULO to TARGET_POWER9. (*)
  c71e1b6... Change TARGET_POPCNTD to TARGET_POWER7. (*)
  4f65877... Change TARGET_CMPB to TARGET_POWER6. (*)
  b6bac5b... Change TARGET_FPRND to TARGET_POWER5X. (*)
  ab2c7aa... Change TARGET_POPCNTB to TARGET_POWER5. (*)

(*) This commit already exists in another branch.
Because the reference `refs/users/meissner/heads/work193-libs' matches
your hooks.email-new-commits-only configuration,
no separate email is sent for this commit.


[gcc(refs/users/meissner/heads/work193-bugs)] Update ChangeLog.*

2025-02-10 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:a330f670274d1c7aaed4f853a3825f76f713257f

commit a330f670274d1c7aaed4f853a3825f76f713257f
Author: Michael Meissner 
Date:   Mon Feb 10 13:48:06 2025 -0500

Update ChangeLog.*

Diff:
---
 gcc/ChangeLog.bugs | 168 +
 1 file changed, 168 insertions(+)

diff --git a/gcc/ChangeLog.bugs b/gcc/ChangeLog.bugs
index d6cb192a2cf3..64df6453aa9b 100644
--- a/gcc/ChangeLog.bugs
+++ b/gcc/ChangeLog.bugs
@@ -1,5 +1,173 @@
+ Branch work193-bugs, patch #202 
+
+PR target/108958 -- use mtvsrdd to zero extend GPR DImode to VSX TImode
+
+Previously GCC would zero externd a DImode GPR value to TImode by first zero
+extending the DImode value into a GPR TImode value, and then do a MTVSRDD to
+move this value to a VSX register.
+
+This patch does the move directly, since if the middle argument to MTVSRDD is 
0,
+it does the zero extend.
+
+If the DImode value is already in a vector register, it does a XXSPLTIB and
+XXPERMDI to get the value into the bottom 64-bits of the register.
+
+I have built GCC with the patches in this patch set applied on both little and
+big endian PowerPC systems and there were no regressions.  Can I apply this
+patch to GCC 15?
+
+2025-02-10  Michael Meissner  
+
+gcc/
+
+   PR target/108598
+   * gcc/config/rs6000/rs6000.md (zero_extendditi2): New insn.
+
+gcc/testsuite/
+
+   PR target/108598
+   * gcc.target/powerpc/pr108958.c: New test.
+
+ Branch work193-bugs, patch #201 
+
+Add power9 and power10 float to logical optimizations.
+
+I was answering an email from a co-worker and I pointed him to work I had done
+for the Power8 era that optimizes the 32-bit float math library in Glibc.  In
+doing so, I discovered with the Power9 and later computers, this optimization
+is no longer taking place.
+
+The glibc 32-bit floating point math functions have code that looks like:
+
+   union u {
+ float f;
+ uint32_t u32;
+   };
+
+   float
+   math_foo (float x, unsigned int mask)
+   {
+ union u arg;
+ float x2;
+
+ arg.f = x;
+ arg.u32 &= mask;
+
+ x2 = arg.f;
+ /* ... */
+   }
+
+On power8 with the optimization it generates:
+
+xscvdpspn 0,1
+sldi 9,4,32
+mtvsrd 32,9
+xxland 1,0,32
+xscvspdpn 1,1
+
+I.e., it converts the SFmode to the memory format (instead of the DFmode that
+is used within the register), converts the mask so that it is in the vector
+register in the upper 32-bits, and does a XXLAND (i.e. there is only one direct
+move from GPR to vector register).  Then after doing this, it converts the
+upper 32-bits back to DFmode.
+
+If the XSCVSPDN instruction took the value in the normal 32-bit scalar in a
+vector register, we wouldn't have needed the SLDI of the mask.
+
+On power9/power10/power11 it currently generates:
+
+xscvdpspn 0,1
+mfvsrwz 2,0
+and 2,2,4
+mtvsrws 1,2
+xscvspdpn 1,1
+blr
+
+I.e convert to SFmode representation, move the value to a GPR, do an AND
+operation, move the 32-bit value with a splat, and then convert it back to
+DFmode format.
+
+With this patch, it now generates:
+
+xscvdpspn 0,1
+mtvsrwz 32,2
+xxland 32,0,32
+xxspltw 1,32,1
+xscvspdpn 1,1
+blr
+
+I.e. convert to SFmode representation, move the mask to the vector register, do
+the operation using XXLAND.  Splat the value to get the value in the correct
+location, and then convert back to DFmode.
+
+I have built GCC with the patches in this patch set applied on both little and
+big endian PowerPC systems and there were no regressions.  Can I apply this
+patch to GCC 15?
+
+2025-02-10  Michael Meissner  
+
+gcc/
+
+   PR target/117487
+   * config/rs6000/vsx.md (SFmode logical peephoole): Update comments in
+   the original code that supports power8.  Add a new define_peephole2 to
+   do the optimization on power9/power10.
+
+ Branch work193-bugs, patch #200 
+
+PR 99293: Optimize splat of a V2DF/V2DI extract with constant element
+
+We had optimizations for splat of a vector extract for the other vector
+types, but we missed having one for V2DI and V2DF.  This patch adds a
+combiner insn to do this optimization.
+
+In looking at the source, we had similar optimizations for V4SI and V4SF
+extract and splats, but we missed doing V2DI/V2DF.
+
+Without the patch for the code:
+
+   vector long long splat_dup_l_0 (vector long long v)
+   {
+ return __builtin_vec_splats (__builtin_vec_extract (v, 0));
+   }
+
+the compiler generates (on a little endian power9):
+
+   splat_dup_l_0:
+   mfvsrld 9,34
+   mtvsrdd 34,9,9
+   blr
+
+Now it generates:
+
+   splat_dup_l_0:
+   xxpermdi 34,34,34,3
+ 

[gcc(refs/users/meissner/heads/work193-bugs)] PR 99293: Optimize splat of a V2DF/V2DI extract with constant element

2025-02-10 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:52cf961d8d7af8c4235e5670388e92a1561b45a5

commit 52cf961d8d7af8c4235e5670388e92a1561b45a5
Author: Michael Meissner 
Date:   Mon Feb 10 13:44:50 2025 -0500

PR 99293: Optimize splat of a V2DF/V2DI extract with constant element

We had optimizations for splat of a vector extract for the other vector
types, but we missed having one for V2DI and V2DF.  This patch adds a
combiner insn to do this optimization.

In looking at the source, we had similar optimizations for V4SI and V4SF
extract and splats, but we missed doing V2DI/V2DF.

Without the patch for the code:

vector long long splat_dup_l_0 (vector long long v)
{
  return __builtin_vec_splats (__builtin_vec_extract (v, 0));
}

the compiler generates (on a little endian power9):

splat_dup_l_0:
mfvsrld 9,34
mtvsrdd 34,9,9
blr

Now it generates:

splat_dup_l_0:
xxpermdi 34,34,34,3
blr

2025-02-10  Michael Meissner  

gcc/

PR target/99293
* config/rs6000/vsx.md (vsx_splat_extract_): New insn.

gcc/testsuite/

PR target/99293
* gcc.target/powerpc/builtins-1.c: Adjust insn count.
* gcc.target/powerpc/pr99293.c: New test.

Diff:
---
 gcc/config/rs6000/vsx.md  | 18 ++
 gcc/testsuite/gcc.target/powerpc/builtins-1.c |  2 +-
 gcc/testsuite/gcc.target/powerpc/pr99293.c| 22 ++
 3 files changed, 41 insertions(+), 1 deletion(-)

diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index dd3573b80868..d84a2a357a31 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -4798,6 +4798,24 @@
   "lxvdsx %x0,%y1"
   [(set_attr "type" "vecload")])
 
+;; Optimize SPLAT of an extract from a V2DF/V2DI vector with a constant element
+(define_insn "*vsx_splat_extract_"
+  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
+   (vec_duplicate:VSX_D
+(vec_select:
+ (match_operand:VSX_D 1 "vsx_register_operand" "wa")
+ (parallel [(match_operand 2 "const_0_to_1_operand" "n")]]
+  "VECTOR_MEM_VSX_P (mode)"
+{
+  int which_word = INTVAL (operands[2]);
+  if (!BYTES_BIG_ENDIAN)
+which_word = 1 - which_word;
+
+  operands[3] = GEN_INT (which_word ? 3 : 0);
+  return "xxpermdi %x0,%x1,%x1,%3";
+}
+  [(set_attr "type" "vecperm")])
+
 ;; V4SI splat support
 (define_insn "vsx_splat_v4si"
   [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa,wa")
diff --git a/gcc/testsuite/gcc.target/powerpc/builtins-1.c 
b/gcc/testsuite/gcc.target/powerpc/builtins-1.c
index 8410a5fd4319..4e7e5384675f 100644
--- a/gcc/testsuite/gcc.target/powerpc/builtins-1.c
+++ b/gcc/testsuite/gcc.target/powerpc/builtins-1.c
@@ -1035,4 +1035,4 @@ foo156 (vector unsigned short usa)
 /* { dg-final { scan-assembler-times {\mvmrglb\M} 3 } } */
 /* { dg-final { scan-assembler-times {\mvmrgew\M} 4 } } */
 /* { dg-final { scan-assembler-times {\mvsplth|xxsplth\M} 4 } } */
-/* { dg-final { scan-assembler-times {\mxxpermdi\M} 44 } } */
+/* { dg-final { scan-assembler-times {\mxxpermdi\M} 42 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/pr99293.c 
b/gcc/testsuite/gcc.target/powerpc/pr99293.c
new file mode 100644
index ..20adc1f27f65
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr99293.c
@@ -0,0 +1,22 @@
+/* { dg-do compile { target powerpc*-*-* } } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-options "-O2 -mvsx" } */
+
+/* Test for PR 99263, which wants to do:
+   __builtin_vec_splats (__builtin_vec_extract (v, n))
+
+   where v is a V2DF or V2DI vector and n is either 0 or 1.  Previously the
+   compiler would do a direct move to the GPR registers to select the item and 
a
+   direct move from the GPR registers to do the splat.  */
+
+vector long long splat_dup_l_0 (vector long long v)
+{
+  return __builtin_vec_splats (__builtin_vec_extract (v, 0));
+}
+
+vector long long splat_dup_l_1 (vector long long v)
+{
+  return __builtin_vec_splats (__builtin_vec_extract (v, 1));
+}
+
+/* { dg-final { scan-assembler-times "xxpermdi" 2 } } */


[gcc(refs/users/meissner/heads/work193-bugs)] Add power9 and power10 float to logical optimizations.

2025-02-10 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:a6fe10a0686da3995463f4fe0a84089cd4522931

commit a6fe10a0686da3995463f4fe0a84089cd4522931
Author: Michael Meissner 
Date:   Mon Feb 10 13:45:36 2025 -0500

Add power9 and power10 float to logical optimizations.

I was answering an email from a co-worker and I pointed him to work I had 
done
for the Power8 era that optimizes the 32-bit float math library in Glibc.  
In
doing so, I discovered with the Power9 and later computers, this 
optimization
is no longer taking place.

The glibc 32-bit floating point math functions have code that looks like:

union u {
  float f;
  uint32_t u32;
};

float
math_foo (float x, unsigned int mask)
{
  union u arg;
  float x2;

  arg.f = x;
  arg.u32 &= mask;

  x2 = arg.f;
  /* ... */
}

On power8 with the optimization it generates:

xscvdpspn 0,1
sldi 9,4,32
mtvsrd 32,9
xxland 1,0,32
xscvspdpn 1,1

I.e., it converts the SFmode to the memory format (instead of the DFmode 
that
is used within the register), converts the mask so that it is in the vector
register in the upper 32-bits, and does a XXLAND (i.e. there is only one 
direct
move from GPR to vector register).  Then after doing this, it converts the
upper 32-bits back to DFmode.

If the XSCVSPDN instruction took the value in the normal 32-bit scalar in a
vector register, we wouldn't have needed the SLDI of the mask.

On power9/power10/power11 it currently generates:

xscvdpspn 0,1
mfvsrwz 2,0
and 2,2,4
mtvsrws 1,2
xscvspdpn 1,1
blr

I.e convert to SFmode representation, move the value to a GPR, do an AND
operation, move the 32-bit value with a splat, and then convert it back to
DFmode format.

With this patch, it now generates:

xscvdpspn 0,1
mtvsrwz 32,2
xxland 32,0,32
xxspltw 1,32,1
xscvspdpn 1,1
blr

I.e. convert to SFmode representation, move the mask to the vector 
register, do
the operation using XXLAND.  Splat the value to get the value in the correct
location, and then convert back to DFmode.

I have built GCC with the patches in this patch set applied on both little 
and
big endian PowerPC systems and there were no regressions.  Can I apply this
patch to GCC 15?

2025-02-10  Michael Meissner  

gcc/

PR target/117487
* config/rs6000/vsx.md (SFmode logical peephoole): Update comments 
in
the original code that supports power8.  Add a new define_peephole2 
to
do the optimization on power9/power10.

Diff:
---
 gcc/config/rs6000/vsx.md| 142 +++-
 gcc/testsuite/gcc.target/powerpc/pr108958.c |   0
 2 files changed, 137 insertions(+), 5 deletions(-)

diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index d84a2a357a31..f47c4e2f7766 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -6282,7 +6282,7 @@
(SFBOOL_MFVSR_A  3) ;; move to gpr src
(SFBOOL_BOOL_D   4) ;; and/ior/xor dest
(SFBOOL_BOOL_A1  5) ;; and/ior/xor arg1
-   (SFBOOL_BOOL_A2  6) ;; and/ior/xor arg1
+   (SFBOOL_BOOL_A2  6) ;; and/ior/xor arg2
(SFBOOL_SHL_D7) ;; shift left dest
(SFBOOL_SHL_A8) ;; shift left arg
(SFBOOL_MTVSR_D  9) ;; move to vecter dest
@@ -6322,18 +6322,18 @@
 ;; GPR, and instead move the integer mask value to the vector register after a
 ;; shift and do the VSX logical operation.
 
-;; The insns for dealing with SFmode in GPR registers looks like:
+;; The insns for dealing with SFmode in GPR registers looks like on power8:
 ;; (set (reg:V4SF reg2) (unspec:V4SF [(reg:SF reg1)] UNSPEC_VSX_CVDPSPN))
 ;;
-;; (set (reg:DI reg3) (unspec:DI [(reg:V4SF reg2)] UNSPEC_P8V_RELOAD_FROM_VSX))
+;; (set (reg:DI reg3) (zero_extend:DI (reg:SI reg2)))
 ;;
-;; (set (reg:DI reg4) (and:DI (reg:DI reg3) (reg:DI reg3)))
+;; (set (reg:DI reg4) (and:SI (reg:SI reg3) (reg:SI mask)))
 ;;
 ;; (set (reg:DI reg5) (ashift:DI (reg:DI reg4) (const_int 32)))
 ;;
 ;; (set (reg:SF reg6) (unspec:SF [(reg:DI reg5)] UNSPEC_P8V_MTVSRD))
 ;;
-;; (set (reg:SF reg6) (unspec:SF [(reg:SF reg6)] UNSPEC_VSX_CVSPDPN))
+;; (set (reg:SF reg7) (unspec:SF [(reg:SF reg6)] UNSPEC_VSX_CVSPDPN))
 
 (define_peephole2
   [(match_scratch:DI SFBOOL_TMP_GPR "r")
@@ -6414,6 +6414,138 @@
   operands[SFBOOL_MTVSR_D_V4SF] = gen_rtx_REG (V4SFmode, regno_mtvsr_d);
 })
 
+;; Constants for 

[gcc(refs/users/meissner/heads/work193-bugs)] PR target/108958 -- use mtvsrdd to zero extend GPR DImode to VSX TImode

2025-02-10 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:9c843cbc5fb2cc64f0074e9a143941ef610a1bf4

commit 9c843cbc5fb2cc64f0074e9a143941ef610a1bf4
Author: Michael Meissner 
Date:   Mon Feb 10 13:46:19 2025 -0500

PR target/108958 -- use mtvsrdd to zero extend GPR DImode to VSX TImode

Previously GCC would zero externd a DImode GPR value to TImode by first zero
extending the DImode value into a GPR TImode value, and then do a MTVSRDD to
move this value to a VSX register.

This patch does the move directly, since if the middle argument to MTVSRDD 
is 0,
it does the zero extend.

If the DImode value is already in a vector register, it does a XXSPLTIB and
XXPERMDI to get the value into the bottom 64-bits of the register.

I have built GCC with the patches in this patch set applied on both little 
and
big endian PowerPC systems and there were no regressions.  Can I apply this
patch to GCC 15?

2025-02-10  Michael Meissner  

gcc/

PR target/108598
* gcc/config/rs6000/rs6000.md (zero_extendditi2): New insn.

gcc/testsuite/

PR target/108598
* gcc.target/powerpc/pr108958.c: New test.

Diff:
---
 gcc/config/rs6000/rs6000.md | 46 +
 gcc/testsuite/gcc.target/powerpc/pr108958.c | 27 +
 2 files changed, 73 insertions(+)

diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 4c2bc81caf56..65da0c653304 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -1026,6 +1026,52 @@
(set_attr "dot" "yes")
(set_attr "length" "4,8")])
 
+(define_insn_and_split "zero_extendditi2"
+  [(set (match_operand:TI 0 "gpc_reg_operand" "=r,wa,&wa")
+   (zero_extend:TI
+(match_operand:DI 1 "gpc_reg_operand" "rwa,r,wa")))]
+  "TARGET_P9_VECTOR && TARGET_POWERPC64"
+  "@
+  #
+  mtvsrdd %x0,0,%1
+  #"
+  "&& reload_completed
+   && (int_reg_operand (operands[0], TImode)
+   || vsx_register_operand (operands[1], DImode))"
+  [(set (match_dup 2)
+   (match_dup 3))
+   (set (match_dup 4)
+   (match_dup 5))]
+{
+  rtx op0 = operands[0];
+  rtx op1 = operands[1];
+  int r = reg_or_subregno (op0);
+
+  if (int_reg_operand (op0, TImode))
+{
+  int lo = BYTES_BIG_ENDIAN ? 1 : 0;
+  int hi = 1 - lo;
+
+  operands[2] = gen_rtx_REG (DImode, r + lo);
+  operands[3] = op1;
+  operands[4] = gen_rtx_REG (DImode, r + hi);
+  operands[5] = const0_rtx;
+}
+  else
+{
+  rtx op0_di = gen_rtx_REG (DImode, r);
+  rtx op0_v2di = gen_rtx_REG (V2DImode, r);
+  rtx lo = WORDS_BIG_ENDIAN ? op1 : op0_di;
+  rtx hi = WORDS_BIG_ENDIAN ? op0_di : op1;
+
+  operands[2] = op0_v2di;
+  operands[3] = CONST0_RTX (V2DImode);
+  operands[4] = op0_v2di;
+  operands[5] = gen_rtx_VEC_CONCAT (V2DImode, hi, lo);
+}
+}
+  [(set_attr "type" "*,mtvsr,vecperm")
+   (set_attr "length" "8,*,8")])
 
 (define_insn "extendqi2"
   [(set (match_operand:EXTQI 0 "gpc_reg_operand" "=r,?*v")
diff --git a/gcc/testsuite/gcc.target/powerpc/pr108958.c 
b/gcc/testsuite/gcc.target/powerpc/pr108958.c
index e69de29bb2d1..03eb58d069e7 100644
--- a/gcc/testsuite/gcc.target/powerpc/pr108958.c
+++ b/gcc/testsuite/gcc.target/powerpc/pr108958.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target int128 } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-mdejagnu-cpu=power9 -O2" } */
+
+/* PR target/108958, use mtvsrdd to zero extend gpr to vsx register.  */
+
+void
+gpr_to_vsx (unsigned long long x, __uint128_t *p)
+{
+  /* mtvsrdd vsx,0,gpr.  */
+  __uint128_t y = x;
+  __asm__ (" # %x0" : "+wa" (y));
+  *p = y;
+}
+
+void
+gpr_to_gpr (unsigned long long x, __uint128_t *p)
+{
+  /* mr and li.  */
+  __uint128_t y = x;
+  __asm__ (" # %0" : "+r" (y));
+  *p = y;
+}
+
+/* { dg-final { scan-assembler-times {\mli\M}  1 } } */
+/* { dg-final { scan-assembler-times {\mmtvsrdd .*,0,.*\M} 1 } } */


[gcc(refs/users/meissner/heads/work193-dmf)] RFC2653-Add wD constraint.

2025-02-10 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:dedb9ec5f4ee92709990185b90e43ed1a3b687cf

commit dedb9ec5f4ee92709990185b90e43ed1a3b687cf
Author: Michael Meissner 
Date:   Mon Feb 10 13:56:48 2025 -0500

RFC2653-Add wD constraint.

This patch adds a new constraint ('wD') that matches the accumulator 
registers
that overlap with VSX registers 0..31 on power10.  Future patches will add 
the
support for a separate accumulator register class that will be used when the
support for dense math registes is added.

2025-02-10   Michael Meissner  

* config/rs6000/constraints.md (wD): New constraint.
* config/rs6000/mma.md (mma_): Prepare for alternate 
accumulator
registers.  Use wD constraint instead of 'd' constraint.  Use
accumulator_operand instead of fpr_reg_operand.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
-   (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")]
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD")
+   (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0")]
MMA_ACC))]
   "TARGET_MMA"
   " %A0"
@@ -523,7 +523,7 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")]
MMA_VV))]
@@ -532,8 +532,8 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-   (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+   (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 3 "vsx_register_operand" "v,?wa")]
MMA_AVV))]
@@ -542,7 +542,7 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
(unspec:XO [(match_operand:OO 1 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")]
MMA_PV))]
@@ -551,8 +551,8 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-   (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+   (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
(match_operand:OO 2 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 3 "vsx_register_operand" "v,?wa")]
MMA_APV))]
@@ -561,7 +561,7 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
(match_operand:SI 3 "const_0_to_15_operand" "n,n")
@@ -574,8 +574,8 @@
(set_attr "prefixed" "yes")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-   (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+   (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
(match_operand:SI 4 "const_0_to_15_operand" "n,n")
@@ -588,7 +588,7 @@
(set_attr "prefixed" "yes")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
(match_operand:SI 3 "const_0_to_15_operand" "n,n")
@@ -601,8 +601,8 @@
(set_attr "prefixed" "yes")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-   (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+   (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
   

[gcc(refs/users/meissner/heads/work193-sha)] PR target/117251: Add PowerPC XXEVAL support to speed up SHA3 calculations

2025-02-10 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:9b50fdc9dc218449c2c4b3d8c6d4c14df16ab2a9

commit 9b50fdc9dc218449c2c4b3d8c6d4c14df16ab2a9
Author: Michael Meissner 
Date:   Mon Feb 10 14:10:29 2025 -0500

PR target/117251: Add PowerPC XXEVAL support to speed up SHA3 calculations

The multibuff.c benchmark attached to the PR target/117251 compiled for 
Power10
PowerPC that implement SHA3 has a slowdown in the current trunk and GCC 14
compared to GCC 11 - GCC 13, due to excessive amounts of spilling.

The main function for the multibuf.c file has 3,747 lines, all of which are
using vector unsigned long long.  There are 696 vector rotates (all rotates 
are
constant), 1,824 vector xor's and 600 vector andc's.

In looking at it, the main thing that steps out is the reason for either
spilling or moving variables is the support in fusion.md (generated by
genfusion.pl) that tries to fuse the vec_andc feeding into vec_xor, and 
other
vec_xor's feeding into vec_xor.

On the powerpc for power10, there is a special fusion mode that happens if 
the
machine has a VANDC or VXOR instruction that is adjacent to a VXOR 
instruction
and the VANDC/VXOR feeds into the 2nd VXOR instruction.

While the Power10 has 64 vector registers (which uses the XXL prefix to do
logical operations), the fusion only works with the older Altivec 
instruction
set (which uses the V prefix).  The Altivec instruction only has 32 vector
registers (which are overlaid over the VSX vector registers 32-63).

By having the combiner patterns fuse_vandc_vxor and fuse_vxor_vxor to do 
this
fusion, it means that the register allocator has more register pressure for 
the
traditional Altivec registers instead of the VSX registers.

In addition, since there are vector rotates, these rotates only work on the
traditional Altivec registers, which adds to the Altivec register pressure.

Finally in addition to doing the explicit xor, andc, and rotates using the
Altivec registers, we have to also load vector constants for the rotate 
amount
and these registers also are allocated as Altivec registers.

Current trunk and GCC 12-14 have more vector spills than GCC 11, but GCC 11 
has
many more vector moves that the later compilers.  Thus even though it has 
way
less spills, the vector moves are why GCC 11 have the slowest results.

There is an instruction that was added in power10 (XXEVAL) that does provide
fusion between VSX vectors that includes ANDC->XOR and XOR->XOR fusion.

The latency of XXEVAL is slightly more than the fused VANDC/VXOR or 
VXOR/VXOR,
so I have written the patch to prefer doing the Altivec instructions if they
don't need a temporary register.

Here are the results for adding support for XXEVAL for the multibuff.c
benchmark attached to the PR.  Note that we essentially recover the speed 
with
this patch that were lost with GCC 14 and the current trunk:

  XXEVALTrunk   GCC14   GCC13   GCC12
GCC11
  ---   -   -   -
-
Benchmark time in seconds   5.53 6.156.265.575.61 
9.56

Fuse VANDC -> VXOR   209 600  600 600 600  
600
Fuse VXOR -> VXOR  0 240  240 120 120  
120
XXEVAL to fuse ANDC -> XOR   391   00   0   0   
 0
XXEVAL to fuse XOR -> XOR240   00   0   0   
 0

Spill vector to stack 78 364  364 172 184  
110
Load spilled vector from stack   431 962  962 713 723  
166
Vector moves  10 100  100  70  72
3,055

Vector rotate right  696 696  696 696 696  
696
XXLANDC or VANDC 209 600  600 600 600  
600
XXLXOR or VXOR   953   1,8241,824   1,824   1,824
1,825
XXEVAL   631   00   0   0   
 0

Load vector rotate constants  24  24   24  24  24   
24

Here are the results for adding support for XXEVAL for the singlebuff.c
benchmark attached to the PR.  Note that adding XXEVAL greatly speeds up 
this
particular benchmark:

  XXEVALTrunk   GCC14   GCC13   GCC12
GCC11
  ---   -   -   -
-
Benchmark time in seconds   4.46 5.405.405.355.36 
7.54

Fuse VANDC -> VXOR   210  600 600 600 600  
600
Fuse VXOR -> VXOR  0  240 240 120 120  
120
XXEVAL to fuse ANDC -> XOR   3900   0 

[gcc(refs/users/meissner/heads/work193-sha)] Add potential p-future XVRLD and XVRLDI instructions.

2025-02-10 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:9178c9330696d93e3ca0932010824f48c6a88a34

commit 9178c9330696d93e3ca0932010824f48c6a88a34
Author: Michael Meissner 
Date:   Mon Feb 10 14:11:17 2025 -0500

Add potential p-future XVRLD and XVRLDI instructions.

2025-02-10  Michael Meissner  

gcc/

* config/rs6000/altivec.md (altivec_vrl): Add support for a
possible XVRLD instruction in the future.
(altivec_vrl_immediate): New insns.
* config/rs6000/predicates.md (vector_shift_immediate): New 
predicate.
* config/rs6000/rs6000.h (TARGET_XVRLW): New macro.
* config/rs6000/rs6000.md (isa attribute): Add xvrlw.
(enabled attribute): Add support for xvrlw.

gcc/testsuite/

* gcc.target/powerpc/vector-rotate-left.c: New test.
* lib/target-supports.exp 
(check_effective_target_powerpc_future_ok):
Add support to test -mcpu=future.

Diff:
---
 gcc/config/rs6000/altivec.md   | 35 +++---
 gcc/config/rs6000/predicates.md| 26 
 gcc/config/rs6000/rs6000.h |  3 ++
 gcc/config/rs6000/rs6000.md|  6 +++-
 .../gcc.target/powerpc/vector-rotate-left.c| 34 +
 gcc/testsuite/lib/target-supports.exp  | 12 
 6 files changed, 111 insertions(+), 5 deletions(-)

diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index 7edc288a6565..013960438b04 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -1982,12 +1982,39 @@
 }
   [(set_attr "type" "vecperm")])
 
+;; -mcpu=future adds a vector rotate left word variant.  There is no vector
+;; byte/half-word/double-word/quad-word rotate left.  This insn occurs before
+;; altivec_vrl and will match for -mcpu=future, while other cpus will
+;; match the generic insn.
+;; However for testing, allow other xvrl variants.  In particular, XVRLD for
+;; the sha3 tests for multibuf/singlebuf.
 (define_insn "altivec_vrl"
-  [(set (match_operand:VI2 0 "register_operand" "=v")
-(rotate:VI2 (match_operand:VI2 1 "register_operand" "v")
-   (match_operand:VI2 2 "register_operand" "v")))]
+  [(set (match_operand:VI2 0 "register_operand" "=v,wa")
+(rotate:VI2 (match_operand:VI2 1 "register_operand" "v,wa")
+   (match_operand:VI2 2 "register_operand" "v,wa")))]
   ""
-  "vrl %0,%1,%2"
+  "@
+   vrl %0,%1,%2
+   xvrl %x0,%x1,%x2"
+  [(set_attr "type" "vecsimple")
+   (set_attr "isa" "*,xvrlw")])
+
+(define_insn "*altivec_vrl_immediate"
+  [(set (match_operand:VI2 0 "register_operand" "=wa,wa,wa,wa")
+   (rotate:VI2 (match_operand:VI2 1 "register_operand" "wa,wa,wa,wa")
+   (match_operand:VI2 2 "vector_shift_immediate" 
"j,wM,wE,wS")))]
+  "TARGET_XVRLW && "
+{
+  rtx op2 = operands[2];
+  int value = 256;
+  int num_insns = -1;
+
+  if (!xxspltib_constant_p (op2, mode, &num_insns, &value))
+gcc_unreachable ();
+
+  operands[3] = GEN_INT (value & 0xff);
+  return "xvrli %x0,%x1,%3";
+}
   [(set_attr "type" "vecsimple")])
 
 (define_insn "altivec_vrlq"
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index 6485ee3eeecc..276812573977 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -728,6 +728,32 @@
   return num_insns == 1;
 })
 
+;; Return 1 if the operand is a CONST_VECTOR whose elements are all the
+;; same and the elements can be an immediate shift or rotate factor
+(define_predicate "vector_shift_immediate"
+  (match_code "const_vector,vec_duplicate,const_int")
+{
+  int value = 256;
+  int num_insns = -1;
+
+  if (zero_constant (op, mode) || all_ones_constant (op, mode))
+return true;
+
+  if (!xxspltib_constant_p (op, mode, &num_insns, &value))
+return false;
+
+  switch (mode)
+{
+case V16QImode: return IN_RANGE (value, 0, 7);
+case V8HImode:  return IN_RANGE (value, 0, 15);
+case V4SImode:  return IN_RANGE (value, 0, 31);
+case V2DImode:  return IN_RANGE (value, 0, 63);
+default:break;
+}
+
+  return false;
+})
+  
 ;; Return 1 if the operand is a CONST_VECTOR and can be loaded into a
 ;; vector register without using memory.
 (define_predicate "easy_vector_constant"
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index ec08c96d0f67..00f6ff2be636 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -575,6 +575,9 @@ extern int rs6000_vector_align[];
below.  */
 #define RS6000_FN_TARGET_INFO_HTM 1
 
+/* Whether we have XVRLW support.  */
+#define TARGET_XVRLW   TARGET_FUTURE
+
 /* Whether the various reciprocal divide/square root estimate instructions
exist, and whether we should automatically generate code for the instruction
by default.  */
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 3b

[gcc r15-7460] This improves an error message, avoiding at ... at.

2025-02-10 Thread Thomas Kテカnig via Gcc-cvs
https://gcc.gnu.org/g:d2ff1b78d70731db1b7adc1cbac7e44688828370

commit r15-7460-gd2ff1b78d70731db1b7adc1cbac7e44688828370
Author: Thomas Koenig 
Date:   Mon Feb 10 21:25:37 2025 +0100

This improves an error message, avoiding at ... at.

gcc/fortran/ChangeLog:

PR fortran/24878
* interface.cc (compare_parameter): Better wording on
error message.

gcc/testsuite/ChangeLog:

PR fortran/24878
* gfortran.dg/interface_51.f90: Adjust expected error message.

Diff:
---
 gcc/fortran/interface.cc   | 6 +++---
 gcc/testsuite/gfortran.dg/interface_51.f90 | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/gcc/fortran/interface.cc b/gcc/fortran/interface.cc
index 49677f15f13c..fdde84db80d0 100644
--- a/gcc/fortran/interface.cc
+++ b/gcc/fortran/interface.cc
@@ -2521,9 +2521,9 @@ compare_parameter (gfc_symbol *formal, gfc_expr *actual,
  gcc_assert (formal->attr.function);
  if (!gfc_compare_types (&global_asym->ts, &formal->ts))
{
- gfc_error ("Type mismatch passing global function %qs 
"
-"declared at %L at %L (%s/%s)",
-actual_name, &gsym->where, &actual->where,
+ gfc_error ("Type mismatch at %L passing global "
+"function %qs declared at %L (%s/%s)",
+&actual->where, actual_name, &gsym->where,
 gfc_typename (&global_asym->ts),
 gfc_dummy_typename (&formal->ts));
  return false;
diff --git a/gcc/testsuite/gfortran.dg/interface_51.f90 
b/gcc/testsuite/gfortran.dg/interface_51.f90
index c8371e81ec90..7f7576d93c5e 100644
--- a/gcc/testsuite/gfortran.dg/interface_51.f90
+++ b/gcc/testsuite/gfortran.dg/interface_51.f90
@@ -14,7 +14,7 @@ program memain
   
   call foo(subr) ! { dg-error "Passing global subroutine" }
   call bar(i4)   ! { dg-error "Passing global function" }
-  call baz(r4)   ! { dg-error "Type mismatch passing global function" }
+  call baz(r4)   ! { dg-error "Type mismatch" }
 end program memain
 
 subroutine foo(ifun)
@@ -39,7 +39,7 @@ integer(kind=4) function i4() ! { dg-error "Passing global 
function" }
   i4 = 42
 end function i4
 
-real(kind=4) function r4() ! { dg-error "Type mismatch passing global 
function" }
+real(kind=4) function r4() ! { dg-error "Type mismatch" }
   r4 = 1.0
 end function r4


[gcc(refs/users/mikael/heads/refactor_descriptor_v01)] Correction régression assumed_rank_7.f90

2025-02-10 Thread Mikael Morin via Gcc-cvs
https://gcc.gnu.org/g:ef9028419ab6ec6862822421f85e16c8bfdaa26d

commit ef9028419ab6ec6862822421f85e16c8bfdaa26d
Author: Mikael Morin 
Date:   Mon Feb 10 22:23:58 2025 +0100

Correction régression assumed_rank_7.f90

Diff:
---
 gcc/fortran/trans-expr.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/fortran/trans-expr.cc b/gcc/fortran/trans-expr.cc
index 24ae9ff8c31d..6ed87fc63a9b 100644
--- a/gcc/fortran/trans-expr.cc
+++ b/gcc/fortran/trans-expr.cc
@@ -842,8 +842,8 @@ gfc_class_array_data_assign (stmtblock_t *block, tree 
lhs_desc, tree rhs_desc,
}
 }
 
-  tmp = gfc_conv_descriptor_dimensions_get (lhs_desc, type);
-  gfc_conv_descriptor_dimensions_set (block, rhs_desc, tmp);
+  tmp = gfc_conv_descriptor_dimensions_get (rhs_desc, type);
+  gfc_conv_descriptor_dimensions_set (block, lhs_desc, tmp);
 }
 
 /* Takes a derived type expression and returns the address of a temporary


[gcc(refs/users/meissner/heads/work193-bugs)] Revert changes

2025-02-10 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:a4a728cc6cbb0870e718befc23a863b0d537

commit a4a728cc6cbb0870e718befc23a863b0d537
Author: Michael Meissner 
Date:   Mon Feb 10 20:31:03 2025 -0500

Revert changes

Diff:
---
 gcc/config/rs6000/predicates.md   |  8 +-
 gcc/config/rs6000/rs6000-protos.h | 17 +---
 gcc/config/rs6000/rs6000.cc   | 37 +++
 gcc/config/rs6000/rs6000.h| 10 ++--
 gcc/config/rs6000/rs6000.md   | 26 +++
 gcc/testsuite/gcc.target/powerpc/pr118541-1.c | 28 
 gcc/testsuite/gcc.target/powerpc/pr118541-2.c | 26 ---
 gcc/testsuite/gcc.target/powerpc/pr118541-3.c | 26 ---
 gcc/testsuite/gcc.target/powerpc/pr118541-4.c | 26 ---
 9 files changed, 22 insertions(+), 182 deletions(-)

diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index 56fd6b55916f..647e89afb6a7 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -1466,14 +1466,8 @@
 ;; Return 1 if OP is a comparison operator suitable for vector/scalar
 ;; comparisons that generate a 0/-1 mask (i.e. the inverse of
 ;; fpmask_comparison_operator).
-;;
-;; On power9 and above, do not allow ordered comparisons to be inverted, since
-;; the XSCMP{EQ,GT,GE}DP instruction used in floating point conditional moves
-;; will trap if one of the inputs is a signalling NaN.
 (define_predicate "invert_fpmask_comparison_operator"
-  (ior (match_code "ne")
-   (and (match_code "unlt,unle")
-   (match_test "flag_finite_math_only || !TARGET_P9_VECTOR"
+  (match_code "ne,unlt,unle"))
 
 ;; Return 1 if OP is a comparison operation suitable for integer vector/scalar
 ;; comparisons that generate a -1/0 mask.
diff --git a/gcc/config/rs6000/rs6000-protos.h 
b/gcc/config/rs6000/rs6000-protos.h
index 524c463cb660..4619142d197b 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -114,23 +114,8 @@ extern const char *rs6000_sibcall_template (rtx *, 
unsigned int);
 extern const char *rs6000_indirect_call_template (rtx *, unsigned int);
 extern const char *rs6000_indirect_sibcall_template (rtx *, unsigned int);
 extern const char *rs6000_pltseq_template (rtx *, int);
-
-/* Whether we can reverse the sense of an ordered (UNLT, UNLE, UNGT, UNGE,
-   UNEQ, or LTGT) comairson.  If we are doing floating point conditional moves
-   on power9 and above, we cannot convert an ordered comparison to unordered,
-   since the instructions (XSCMP{EQ,GT,GE}DP) that are used for conditional
-   moves can trap if an argument is a signalling NaN.  However for normal jumps
-   we can reverse a comparison since we only use unordered compare instructions
-   which do not trap on signalling NaNs.  */
-
-enum reverse_condition_t {
-  REVERSE_CONDITION_NO_ORDERED,
-  REVERSE_CONDITION_ORDERED_OK
-};
-
 extern enum rtx_code rs6000_reverse_condition (machine_mode,
-  enum rtx_code,
-  enum reverse_condition_t);
+  enum rtx_code);
 extern rtx rs6000_emit_eqne (machine_mode, rtx, rtx, rtx);
 extern rtx rs6000_emit_fp_cror (rtx_code, machine_mode, rtx);
 extern void rs6000_emit_sCOND (machine_mode, rtx[]);
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 21727e993712..f9f9a0b931db 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -15360,31 +15360,15 @@ rs6000_print_patchable_function_entry (FILE *file,
 }
 
 enum rtx_code
-rs6000_reverse_condition (machine_mode mode,
- enum rtx_code code,
- enum reverse_condition_t allow_ordered)
+rs6000_reverse_condition (machine_mode mode, enum rtx_code code)
 {
-  bool support_nan = !flag_finite_math_only;
-  bool ordered_compare = (code == UNLT || code == UNLE || code == UNGT
- || code == UNGE || code == UNEQ || code == LTGT);
-
   /* Reversal of FP compares takes care -- an ordered compare
- becomes an unordered compare and vice versa.
-
- However, this is not safe for ordered comparisons (i.e. for isgreater,
- etc.)  starting with the power9 because ifcvt.cc will want to create a fp
- cmove, and the x{s,v}cmp{eq,gt,ge}{dp,qp} instructions will trap if one of
- the arguments is a signalling NaN.  */
-
-  if (mode == CCFPmode && (support_nan || ordered_compare))
-{
-  if (support_nan && ordered_compare
- && allow_ordered == REVERSE_CONDITION_NO_ORDERED)
-   return UNKNOWN;
-
-  return reverse_condition_maybe_unordered (code);
-}
-
+ becomes an unordered compare and vice versa.  */
+  if (mode == CCFPmode
+  && (!flag_finite_math_only
+ || code == UNLT || code == UNLE || code == UNGT || code == UNGE
+ || code == UNEQ || code == LTGT))
+r

[gcc(refs/users/mikael/heads/refactor_descriptor_v01)] Correction régression realloc on assign (associate_61, ...)

2025-02-10 Thread Mikael Morin via Gcc-cvs
https://gcc.gnu.org/g:ac595ff23d46a664d60e76a640f7ca1dc2a99d11

commit ac595ff23d46a664d60e76a640f7ca1dc2a99d11
Author: Mikael Morin 
Date:   Mon Feb 10 21:50:03 2025 +0100

Correction régression realloc on assign (associate_61, ...)

Diff:
---
 gcc/fortran/trans-array.cc | 17 -
 1 file changed, 4 insertions(+), 13 deletions(-)

diff --git a/gcc/fortran/trans-array.cc b/gcc/fortran/trans-array.cc
index 5e9f487615d3..f0dadfbe58fc 100644
--- a/gcc/fortran/trans-array.cc
+++ b/gcc/fortran/trans-array.cc
@@ -13664,12 +13664,7 @@ gfc_alloc_allocatable_for_assignment (gfc_loopinfo 
*loop,
   size2 = gfc_index_one_node;
   for (n = 0; n < expr2->rank; n++)
 {
-  tmp = fold_build2_loc (input_location, MINUS_EXPR,
-gfc_array_index_type,
-loop->to[n], loop->from[n]);
-  tmp = fold_build2_loc (input_location, PLUS_EXPR,
-gfc_array_index_type,
-tmp, gfc_index_one_node);
+  tmp = gfc_conv_array_extent_dim (loop->from[n], loop->to[n], NULL);
   size2 = fold_build2_loc (input_location, MULT_EXPR,
   gfc_array_index_type,
   tmp, size2);
@@ -13697,12 +13692,7 @@ gfc_alloc_allocatable_for_assignment (gfc_loopinfo 
*loop,
 
   for (n = 0; n < expr2->rank; n++)
 {
-  tmp = fold_build2_loc (input_location, MINUS_EXPR,
-gfc_array_index_type,
-loop->to[n], loop->from[n]);
-  tmp = fold_build2_loc (input_location, PLUS_EXPR,
-gfc_array_index_type,
-tmp, gfc_index_one_node);
+  tmp = gfc_conv_array_extent_dim (loop->from[n], loop->to[n], NULL);
 
   lbound = gfc_index_one_node;
   ubound = tmp;
@@ -13750,7 +13740,8 @@ gfc_alloc_allocatable_for_assignment (gfc_loopinfo 
*loop,
   gfc_conv_descriptor_offset_set (&fblock, desc, offset);
   if (linfo->saved_offset
   && VAR_P (linfo->saved_offset))
-gfc_add_modify (&fblock, linfo->saved_offset, tmp);
+gfc_add_modify (&fblock, linfo->saved_offset,
+   gfc_conv_descriptor_offset_get (desc));
 
   /* Now set the deltas for the lhs.  */
   for (n = 0; n < expr1->rank; n++)


[gcc(refs/users/meissner/heads/work193-dmf)] RFC2653-PowerPC: Add support for 1, 024 bit DMR registers.

2025-02-10 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:fb74dbd488d2ae801ee3715eb5c600016a26eece

commit fb74dbd488d2ae801ee3715eb5c600016a26eece
Author: Michael Meissner 
Date:   Mon Feb 10 13:58:28 2025 -0500

RFC2653-PowerPC: Add support for 1,024 bit DMR registers.

This patch is a prelimianry patch to add the full 1,024 bit dense math 
register
(DMRs) for -mcpu=future.  The MMA 512-bit accumulators map onto the top of 
the
DMR register.

This patch only adds the new 1,024 bit register support.  It does not add
support for any instructions that need 1,024 bit registers instead of 512 
bit
registers.

I used the new mode 'TDOmode' to be the opaque mode used for 1,024 bit
registers.  The 'wD' constraint added in previous patches is used for these
registers.  I added support to do load and store of DMRs via the VSX 
registers,
since there are no load/store dense math instructions.  I added the new 
keyword
'__dmr' to create 1,024 bit types that can be loaded into DMRs.  At 
present, I
don't have aliases for __dmr512 and __dmr1024 that we've discussed 
internally.

The patches have been tested on both little and big endian systems.  Can I 
check
it into the master branch?

2025-02-10   Michael Meissner  

gcc/

* config/rs6000/mma.md (UNSPEC_DM_INSERT512_UPPER): New unspec.
(UNSPEC_DM_INSERT512_LOWER): Likewise.
(UNSPEC_DM_EXTRACT512): Likewise.
(UNSPEC_DMR_RELOAD_FROM_MEMORY): Likewise.
(UNSPEC_DMR_RELOAD_TO_MEMORY): Likewise.
(movtdo): New define_expand and define_insn_and_split to implement 
1,024
bit DMR registers.
(movtdo_insert512_upper): New insn.
(movtdo_insert512_lower): Likewise.
(movtdo_extract512): Likewise.
(reload_dmr_from_memory): Likewise.
(reload_dmr_to_memory): Likewise.
* config/rs6000/rs6000-builtin.cc (rs6000_type_string): Add DMR
support.
(rs6000_init_builtins): Add support for __dmr keyword.
* config/rs6000/rs6000-call.cc (rs6000_return_in_memory): Add 
support
for TDOmode.
(rs6000_function_arg): Likewise.
* config/rs6000/rs6000-modes.def (TDOmode): New mode.
* config/rs6000/rs6000.cc (rs6000_hard_regno_nregs_internal): Add
support for TDOmode.
(rs6000_hard_regno_mode_ok_uncached): Likewise.
(rs6000_hard_regno_mode_ok): Likewise.
(rs6000_modes_tieable_p): Likewise.
(rs6000_debug_reg_global): Likewise.
(rs6000_setup_reg_addr_masks): Likewise.
(rs6000_init_hard_regno_mode_ok): Add support for TDOmode.  Setup 
reload
hooks for DMR mode.
(reg_offset_addressing_ok_p): Add support for TDOmode.
(rs6000_emit_move): Likewise.
(rs6000_secondary_reload_simple_move): Likewise.
(rs6000_preferred_reload_class): Likewise.
(rs6000_secondary_reload_class): Likewise.
(rs6000_mangle_type): Add mangling for __dmr type.
(rs6000_dmr_register_move_cost): Add support for TDOmode.
(rs6000_split_multireg_move): Likewise.
(rs6000_invalid_conversion): Likewise.
* config/rs6000/rs6000.h (VECTOR_ALIGNMENT_P): Add TDOmode.
(enum rs6000_builtin_type_index): Add DMR type nodes.
(dmr_type_node): Likewise.
(ptr_dmr_type_node): Likewise.

gcc/testsuite/

* gcc.target/powerpc/dm-1024bit.c: New test.
* lib/target-supports.exp (check_effective_target_ppc_dmr_ok): New
target test.

Diff:
---
 gcc/config/rs6000/mma.md  | 154 ++
 gcc/config/rs6000/rs6000-builtin.cc   |  17 +++
 gcc/config/rs6000/rs6000-call.cc  |  10 +-
 gcc/config/rs6000/rs6000-modes.def|   4 +
 gcc/config/rs6000/rs6000.cc   | 101 -
 gcc/config/rs6000/rs6000.h|   6 +-
 gcc/testsuite/gcc.target/powerpc/dm-1024bit.c |  63 +++
 gcc/testsuite/lib/target-supports.exp |  35 ++
 8 files changed, 356 insertions(+), 34 deletions(-)

diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md
index 683d2398ef90..1420fadd4355 100644
--- a/gcc/config/rs6000/mma.md
+++ b/gcc/config/rs6000/mma.md
@@ -92,6 +92,11 @@
UNSPEC_MMA_XXMFACC
UNSPEC_MMA_XXMTACC
UNSPEC_MMA_DMSETDMRZ
+   UNSPEC_DM_INSERT512_UPPER
+   UNSPEC_DM_INSERT512_LOWER
+   UNSPEC_DM_EXTRACT512
+   UNSPEC_DMR_RELOAD_FROM_MEMORY
+   UNSPEC_DMR_RELOAD_TO_MEMORY
   ])
 
 (define_c_enum "unspecv"
@@ -742,3 +747,152 @@
   " %A0,%x2,%x3,%4,%5,%6"
   [(set_attr "type" "mma")
(set_attr "prefixed" "yes")])
+
+;; TDOmode (__dmr keyword for 1,024 bit registers).
+(define_expand "movtdo"
+  [(set (match_operand:TDO 0 "nonimmediate_operand")
+   (match_operand:

[gcc(refs/users/meissner/heads/work193-dmf)] RFC2653-Add support for dense math registers.

2025-02-10 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:fc43788dbe1f6d4f85558d7c2ae166b830a3840b

commit fc43788dbe1f6d4f85558d7c2ae166b830a3840b
Author: Michael Meissner 
Date:   Mon Feb 10 13:57:34 2025 -0500

RFC2653-Add support for dense math registers.

The MMA subsystem added the notion of accumulator registers as an optional
feature of ISA 3.1 (power10).  In ISA 3.1, these accumulators overlapped 
with
the VSX registers 0..31, but logically the accumulator registers were 
separate
from the FPR registers.  In ISA 3.1, it was anticipated that in future 
systems,
the accumulator registers may no overlap with the FPR registers.  This patch
adds the support for dense math registers as separate registers.

This particular patch does not change the MMA support to use the 
accumulators
within the dense math registers.  This patch just adds the basic support for
having separate DMRs.  The next patch will switch the MMA support to use the
accumulators if -mcpu=future is used.

For testing purposes, I added an undocumented option '-mdense-math' to 
enable
or disable the dense math support.

This patch updates the wD constraint added in the previous patch.  If MMA is
selected but dense math is not selected (i.e. -mcpu=power10), the wD 
constraint
will allow access to accumulators that overlap with VSX registers 0..31.  If
both MMA and dense math are selected (i.e. -mcpu=future), the wD constraint
will only allow dense math registers.

This patch modifies the existing %A output modifier.  If MMA is selected but
dense math is not selected, then %A output modifier converts the VSX 
register
number to the accumulator number, by dividing it by 4.  If both MMA and 
dense
math are selected, then %A will map the separate DMR registers into 0..7.

The intention is that user code using extended asm can be modified to run on
both MMA without dense math and MMA with dense math:

1)  If possible, don't use extended asm, but instead use the MMA 
built-in
functions;

2)  If you do need to write extended asm, change the d constraints
targetting accumulators should now use wD;

3)  Only use the built-in zero, assemble and disassemble functions 
create
move data between vector quad types and dense math accumulators.
I.e. do not use the xxmfacc, xxmtacc, and xxsetaccz directly in the
extended asm code.  The reason is these instructions assume there 
is a
1-to-1 correspondence between 4 adjacent FPR registers and an
accumulator that overlaps with those instructions.  With 
accumulators
now being separate registers, there no longer is a 1-to-1
correspondence.

It is possible that the mangling for DMRs and the GDB register numbers may
produce other changes in the future.

gcc/

2025-02-10   Michael Meissner  

* config/rs6000/mma.md (UNSPEC_MMA_DMSETDMRZ): New unspec.
(movxo): Add comments about dense math registers.
(movxo_nodm): Rename from movxo and restrict the usage to machines
without dense math registers.
(movxo_dm): New insn for movxo support for machines with dense math
registers.
(mma_): Restrict usage to machines without dense math 
registers.
(mma_xxsetaccz): Add a define_expand wrapper, and add support for 
dense
math registers.
(mma_dmsetaccz): New insn.
* config/rs6000/predicates.md (dmr_operand): New predicate.
(accumulator_operand): Add support for dense math registers.
* config/rs6000/rs6000-builtin.cc (rs6000_gimple_fold_mma_builtin): 
Do
not issue a de-prime instruction when disassembling a vector quad 
on a
system with dense math registers.
* config/rs6000/rs6000-c.cc (rs6000_define_or_undefine_macro): 
Define
__DENSE_MATH__ if we have dense math registers.
* config/rs6000/rs6000.cc (enum rs6000_reg_type): Add DMR_REG_TYPE.
(enum rs6000_reload_reg_type): Add RELOAD_REG_DMR.
(LAST_RELOAD_REG_CLASS): Add support for DMR registers and the wD
constraint.
(reload_reg_map): Likewise.
(rs6000_reg_names): Likewise.
(alt_reg_names): Likewise.
(rs6000_hard_regno_nregs_internal): Likewise.
(rs6000_hard_regno_mode_ok_uncached): Likewise.
(rs6000_debug_reg_global): Likewise.
(rs6000_setup_reg_addr_masks): Likewise.
(rs6000_init_hard_regno_mode_ok): Likewise.
(rs6000_secondary_reload_memory): Add support for DMR registers.
(rs6000_secondary_reload_simple_move): Likewise.
(rs6000_preferred_reload_class): Likewise.
(rs6000_secondary_reload_class): Likewise.
(print_operan

[gcc(refs/users/meissner/heads/work193-dmf)] RFC2655-Add saturating subtract built-ins.

2025-02-10 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:420b20482947348c0ed3094ecd3374613cd82c91

commit 420b20482947348c0ed3094ecd3374613cd82c91
Author: Michael Meissner 
Date:   Mon Feb 10 14:01:02 2025 -0500

RFC2655-Add saturating subtract built-ins.

This patch adds support for a saturating subtract built-in function that 
may be
added to a future PowerPC processor.  Note, if it is added, the name of the
built-in function may change before GCC 13 is released.  If the name 
changes,
we will submit a patch changing the name.

I also added support for providing dense math built-in functions, even 
though
at present, we have not added any new built-in functions for dense math.  
It is
likely we will want to add new dense math built-in functions as the dense 
math
support is fleshed out.

The patches have been tested on both little and big endian systems.  Can I 
check
it into the master branch?

2025-02-10   Michael Meissner  

gcc/

* config/rs6000/rs6000-builtin.cc (rs6000_invalid_builtin): Add 
support
for flagging invalid use of future built-in functions.
(rs6000_builtin_is_supported): Add support for future built-in
functions.
* config/rs6000/rs6000-builtins.def 
(__builtin_saturate_subtract32): New
built-in function for -mcpu=future.
(__builtin_saturate_subtract64): Likewise.
* config/rs6000/rs6000-gen-builtins.cc (enum bif_stanza): Add 
stanzas
for -mcpu=future built-ins.
(stanza_map): Likewise.
(enable_string): Likewise.
(struct attrinfo): Likewise.
(parse_bif_attrs): Likewise.
(write_decls): Likewise.
* config/rs6000/rs6000.md (sat_sub3): Add saturating subtract
built-in insn declarations.
(sat_sub3_dot): Likewise.
(sat_sub3_dot2): Likewise.
* doc/extend.texi (Future PowerPC built-ins): New section.

gcc/testsuite/

* gcc.target/powerpc/subfus-1.c: New test.
* gcc.target/powerpc/subfus-2.c: Likewise.

Diff:
---
 gcc/config/rs6000/rs6000-builtin.cc | 17 
 gcc/config/rs6000/rs6000-builtins.def   | 10 +
 gcc/config/rs6000/rs6000-gen-builtins.cc| 35 ++---
 gcc/config/rs6000/rs6000.md | 60 +
 gcc/doc/extend.texi | 24 
 gcc/testsuite/gcc.target/powerpc/subfus-1.c | 32 +++
 gcc/testsuite/gcc.target/powerpc/subfus-2.c | 32 +++
 7 files changed, 205 insertions(+), 5 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-builtin.cc 
b/gcc/config/rs6000/rs6000-builtin.cc
index ea8755b3ef8a..1885b1f636f3 100644
--- a/gcc/config/rs6000/rs6000-builtin.cc
+++ b/gcc/config/rs6000/rs6000-builtin.cc
@@ -139,6 +139,17 @@ rs6000_invalid_builtin (enum rs6000_gen_builtins fncode)
 case ENB_MMA:
   error ("%qs requires the %qs option", name, "-mmma");
   break;
+case ENB_FUTURE:
+  error ("%qs requires the %qs option", name, "-mcpu=future");
+  break;
+case ENB_FUTURE_64:
+  error ("%qs requires the %qs option and either the %qs or %qs option",
+name, "-mcpu=future", "-m64", "-mpowerpc64");
+  break;
+case ENB_DM:
+  error ("%qs requires the %qs or %qs options", name, "-mcpu=future",
+"-mdense-math");
+  break;
 default:
 case ENB_ALWAYS:
   gcc_unreachable ();
@@ -194,6 +205,12 @@ rs6000_builtin_is_supported (enum rs6000_gen_builtins 
fncode)
   return TARGET_HTM;
 case ENB_MMA:
   return TARGET_MMA;
+case ENB_FUTURE:
+  return TARGET_FUTURE;
+case ENB_FUTURE_64:
+  return TARGET_FUTURE && TARGET_POWERPC64;
+case ENB_DM:
+  return TARGET_DENSE_MATH;
 default:
   gcc_unreachable ();
 }
diff --git a/gcc/config/rs6000/rs6000-builtins.def 
b/gcc/config/rs6000/rs6000-builtins.def
index 555d7d589506..eef5f41f7615 100644
--- a/gcc/config/rs6000/rs6000-builtins.def
+++ b/gcc/config/rs6000/rs6000-builtins.def
@@ -137,6 +137,8 @@
 ;   endian   Needs special handling for endianness
 ;   ibmldRestrict usage to the case when TFmode is IBM-128
 ;   ibm128   Restrict usage to the case where __ibm128 is supported or if ibmld
+;   future   Restrict usage to future instructions
+;   dm   Restrict usage to dense math
 ;
 ; Each attribute corresponds to extra processing required when
 ; the built-in is expanded.  All such special processing should
@@ -3924,3 +3926,11 @@
 
   void __builtin_vsx_stxvp (v256, unsigned long, const v256 *);
 STXVP nothing {mma,pair}
+
+[future]
+  const signed int __builtin_saturate_subtract32 (signed int, signed int);
+  SAT_SUBSI sat_subsi3 {}
+
+[future-64]
+  const signed long __builtin_saturate_subtract64 (signed long,  signed long);
+  SAT_SUBDI sat_subdi3 {}
diff --git a/gcc/config/rs6000/rs6000-gen-builtins.cc 
b/gcc/conf

[gcc(refs/users/meissner/heads/work193-vpair)] Vector pair support.

2025-02-10 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:ae0f298cfc23dfe0a12a42040dc770cbc678901f

commit ae0f298cfc23dfe0a12a42040dc770cbc678901f
Author: Michael Meissner 
Date:   Mon Feb 10 14:07:11 2025 -0500

Vector pair support.

This patch adds a new include file (vector-pair.h) that adds support so that
users writing high performance libraries can change their code to allow the
generation of the vector pair load and store instructions on power10.

The intention is that if the library authors need to write special loops 
that
go over arrays that they could modify their code to use the functions 
provided
to change loops that can take advantage of the higher bandwidth for load 
vector
pair and store instructions.

This particular patch just adds a new include file (vector-pair.h) that
provides a bunch of functions that on a power10 system would use the vector
pair load operation, 2 floating point operations, and a vector pair store.  
It
does not add any new types, modes, or built-in function.

I have additional patches that can add built-in functions that the 
functions in
vector-pair.h could utilize so that the compiler can optimize and combine
operations.  I may submit those patches in the future, but I would like to
provide this patch to allow the library writer to optimize their code.

I've measured the performance of these new functions on a power10.  For 
default
unrolling, the percentage of change for the 3 methods over the normal vector
loop method:

116%Vector-pair.h function, default unroll
 93%Vector pair split built-in & 2 vector stores, default unroll
 86%Vector pair split & combine built-ins, default unroll

Using explicit 2 way unrolling the numbers are:

114%Vector-pair.h function, unroll 2
106%Vector pair split built-in & 2 vector stores, unroll 2
 98%Vector pair split & combine built-ins, unroll 2

These new functions provided in vector-pair.h use the vector pair load/store
instructions, and don't generate extra vector moves.  Using the existing
vector pair disassemble and assemble built-ins generate extra vector moves
which can hinder performance.

If I compile the loop code for power9, there is a minor speed up for default
unrolling and more of an improvement using the framework provided in the
vector-pair.h for explicit unrolling by 2:

101%Vector-pair.h function, default unroll for power9
107%Vector-pair.h function, unroll 2 for power9

Of course this is a synthetic benchmark run on a quiet power10 system.  
Results
would vary for real code on real systems.  However, I feel adding these
functions can allow the writers of high performance libraries to better
optimize their code.

As an example, if the library wants to code a simple fused multiply-add 
loop,
they might write the code as follows:

#include 
#include 
#include 

void
fma_vector (double * __restrict__ r,
const double * __restrict__ a,
const double * __restrict__ b,
size_t n)
{
  vector double * __restrict__ vr = (vector double * __restrict__)r;
  const vector double * __restrict__ va = (const vector double * 
__restrict__)a;
  const vector double * __restrict__ vb = (const vector double * 
__restrict__)b;
  size_t num_elements = sizeof (vector double) / sizeof (double);
  size_t nv = n / num_elements;
  size_t i;

  for (i = 0; i < nv; i++)
vr[i] = __builtin_vsx_xvmadddp (va[i], vb[i], vr[i]);

  for (i = nv * num_elements; i < n; i++)
r[i] = fma (a[i], b[i], r[i]);
}

The inner loop would look like:

.L3:
lxvx 0,3,9
lxvx 12,4,9
addi 10,9,16
addi 2,2,-2
lxvx 11,5,9
xvmaddadp 0,12,11
lxvx 12,4,10
lxvx 11,5,10
stxvx 0,3,9
lxvx 0,3,10
addi 9,9,32
xvmaddadp 0,12,11
stxvx 0,3,10
bdnz .L3

Now if you code the loop to use __builtin_vsx_disassemble_pair to do a 
vector
pair load, but then do 2 vector stores:

#include 
#include 
#include 

void
fma_mma_ld (double * __restrict__ r,
const double * __restrict__ a,
const double * __restrict__ b,
size_t n)
{
  __vector_pair * __restrict__ vp_r

[gcc(refs/users/meissner/heads/work193-sha)] Update ChangeLog.*

2025-02-10 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:cf5f066c85fba47ca499c776c56e01ea98cd36a4

commit cf5f066c85fba47ca499c776c56e01ea98cd36a4
Author: Michael Meissner 
Date:   Mon Feb 10 14:12:59 2025 -0500

Update ChangeLog.*

Diff:
---
 gcc/ChangeLog.sha | 168 ++
 1 file changed, 168 insertions(+)

diff --git a/gcc/ChangeLog.sha b/gcc/ChangeLog.sha
index ff4861cd436b..4cbf12e096c4 100644
--- a/gcc/ChangeLog.sha
+++ b/gcc/ChangeLog.sha
@@ -1,5 +1,173 @@
+ Branch work193-sha, patch #401 
+
+Add potential p-future XVRLD and XVRLDI instructions.
+
+2025-02-10  Michael Meissner  
+
+gcc/
+
+   * config/rs6000/altivec.md (altivec_vrl): Add support for a
+   possible XVRLD instruction in the future.
+   (altivec_vrl_immediate): New insns.
+   * config/rs6000/predicates.md (vector_shift_immediate): New predicate.
+   * config/rs6000/rs6000.h (TARGET_XVRLW): New macro.
+   * config/rs6000/rs6000.md (isa attribute): Add xvrlw.
+   (enabled attribute): Add support for xvrlw.
+
+gcc/testsuite/
+
+   * gcc.target/powerpc/vector-rotate-left.c: New test.
+   * lib/target-supports.exp (check_effective_target_powerpc_future_ok):
+   Add support to test -mcpu=future.
+
+ Branch work193-sha, patch #400 
+
+PR target/117251: Add PowerPC XXEVAL support to speed up SHA3 calculations
+
+The multibuff.c benchmark attached to the PR target/117251 compiled for Power10
+PowerPC that implement SHA3 has a slowdown in the current trunk and GCC 14
+compared to GCC 11 - GCC 13, due to excessive amounts of spilling.
+
+The main function for the multibuf.c file has 3,747 lines, all of which are
+using vector unsigned long long.  There are 696 vector rotates (all rotates are
+constant), 1,824 vector xor's and 600 vector andc's.
+
+In looking at it, the main thing that steps out is the reason for either
+spilling or moving variables is the support in fusion.md (generated by
+genfusion.pl) that tries to fuse the vec_andc feeding into vec_xor, and other
+vec_xor's feeding into vec_xor.
+
+On the powerpc for power10, there is a special fusion mode that happens if the
+machine has a VANDC or VXOR instruction that is adjacent to a VXOR instruction
+and the VANDC/VXOR feeds into the 2nd VXOR instruction.
+
+While the Power10 has 64 vector registers (which uses the XXL prefix to do
+logical operations), the fusion only works with the older Altivec instruction
+set (which uses the V prefix).  The Altivec instruction only has 32 vector
+registers (which are overlaid over the VSX vector registers 32-63).
+
+By having the combiner patterns fuse_vandc_vxor and fuse_vxor_vxor to do this
+fusion, it means that the register allocator has more register pressure for the
+traditional Altivec registers instead of the VSX registers.
+
+In addition, since there are vector rotates, these rotates only work on the
+traditional Altivec registers, which adds to the Altivec register pressure.
+
+Finally in addition to doing the explicit xor, andc, and rotates using the
+Altivec registers, we have to also load vector constants for the rotate amount
+and these registers also are allocated as Altivec registers.
+
+Current trunk and GCC 12-14 have more vector spills than GCC 11, but GCC 11 has
+many more vector moves that the later compilers.  Thus even though it has way
+less spills, the vector moves are why GCC 11 have the slowest results.
+
+There is an instruction that was added in power10 (XXEVAL) that does provide
+fusion between VSX vectors that includes ANDC->XOR and XOR->XOR fusion.
+
+The latency of XXEVAL is slightly more than the fused VANDC/VXOR or VXOR/VXOR,
+so I have written the patch to prefer doing the Altivec instructions if they
+don't need a temporary register.
+
+Here are the results for adding support for XXEVAL for the multibuff.c
+benchmark attached to the PR.  Note that we essentially recover the speed with
+this patch that were lost with GCC 14 and the current trunk:
+
+  XXEVALTrunk   GCC14   GCC13   GCC12GCC11
+  ---   -   -   --
+Benchmark time in seconds   5.53 6.156.265.575.61 9.56
+
+Fuse VANDC -> VXOR   209 600  600 600 600  600
+Fuse VXOR -> VXOR  0 240  240 120 120  120
+XXEVAL to fuse ANDC -> XOR   391   00   0   00
+XXEVAL to fuse XOR -> XOR240   00   0   00
+
+Spill vector to stack 78 364  364 172 184  110
+Load spilled vector from stack   431 962  962 713 723  166
+Vector moves  10 100  100  70  723,055
+
+Vector rotate right  696 696  696 696 696  696
+XXLANDC or VANDC 209 600  600 600 

[gcc r15-7459] Fortran: checking of pointer targets for structure constructors [PR56423]

2025-02-10 Thread Harald Anlauf via Gcc-cvs
https://gcc.gnu.org/g:118a6c3247bb30ef932341cec3ca15e2c6304b69

commit r15-7459-g118a6c3247bb30ef932341cec3ca15e2c6304b69
Author: Harald Anlauf 
Date:   Mon Feb 10 18:47:45 2025 +0100

Fortran: checking of pointer targets for structure constructors [PR56423]

Check the target of a pointer component in a structure constructor for same
ranks, and that the initial-data-target does not have vector subscripts.

PR fortran/56423

gcc/fortran/ChangeLog:

* resolve.cc (resolve_structure_cons): Check rank of pointer target;
reject pointer target with vector subscripts.

gcc/testsuite/ChangeLog:

* gfortran.dg/derived_constructor_comps_2.f90: Adjust test.
* gfortran.dg/derived_constructor_comps_8.f90: New test.

Diff:
---
 gcc/fortran/resolve.cc   | 12 +++-
 .../gfortran.dg/derived_constructor_comps_2.f90  |  4 ++--
 .../gfortran.dg/derived_constructor_comps_8.f90  | 20 
 3 files changed, 33 insertions(+), 3 deletions(-)

diff --git a/gcc/fortran/resolve.cc b/gcc/fortran/resolve.cc
index 7adbf958aec1..1a4799dac78f 100644
--- a/gcc/fortran/resolve.cc
+++ b/gcc/fortran/resolve.cc
@@ -1370,7 +1370,7 @@ resolve_structure_cons (gfc_expr *expr, int init)
  gfc_find_vtab (&cons->expr->ts);
 
   if (cons->expr->expr_type != EXPR_NULL && rank != cons->expr->rank
- && (comp->attr.allocatable || cons->expr->rank))
+ && (comp->attr.allocatable || comp->attr.pointer || cons->expr->rank))
{
  gfc_error ("The rank of the element in the structure "
 "constructor at %L does not match that of the "
@@ -1583,6 +1583,16 @@ resolve_structure_cons (gfc_expr *expr, int init)
}
}
 
+  /* F2023:C770: A designator that is an initial-data-target shall ...
+not have a vector subscript.  */
+  if (comp->attr.pointer && (a.pointer || a.target)
+ && gfc_has_vector_index (cons->expr))
+   {
+ gfc_error ("Pointer assignment target at %L has a vector subscript",
+&cons->expr->where);
+ t = false;
+   }
+
   /* F2003, C1272 (3).  */
   bool impure = cons->expr->expr_type == EXPR_VARIABLE
&& (gfc_impure_variable (cons->expr->symtree->n.sym)
diff --git a/gcc/testsuite/gfortran.dg/derived_constructor_comps_2.f90 
b/gcc/testsuite/gfortran.dg/derived_constructor_comps_2.f90
index a5e951ad1021..04bd95559ead 100644
--- a/gcc/testsuite/gfortran.dg/derived_constructor_comps_2.f90
+++ b/gcc/testsuite/gfortran.dg/derived_constructor_comps_2.f90
@@ -1,5 +1,5 @@
 ! { dg-do compile }
-! Tests fix for PR29115, in which an ICE would be produced by 
+! Tests fix for PR29115, in which an ICE would be produced by
 ! non-pointer elements being supplied to the pointer components
 ! in a derived type constructor.
 !
@@ -9,7 +9,7 @@
 integer, pointer :: bart(:)
   end type homer
   type(homer) :: marge
-  integer :: duff_beer
+  integer :: duff_beer(1)
   marge = homer (duff_beer) ! { dg-error "should be a POINTER or a TARGET" }
 end
 
diff --git a/gcc/testsuite/gfortran.dg/derived_constructor_comps_8.f90 
b/gcc/testsuite/gfortran.dg/derived_constructor_comps_8.f90
new file mode 100644
index ..ce53eef7503e
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/derived_constructor_comps_8.f90
@@ -0,0 +1,20 @@
+! { dg-do compile }
+! PR fortran/56423
+!
+! Check constraints on pointer targets for derived type constructors
+!
+! Contributed by Tobias Burnus and Gerhard Steinmetz
+
+program p
+  integer, target :: x(3) = [7, 8, 9]
+  type t
+ integer, pointer :: a(:)
+  end type
+  type(t) :: z
+  z = t(x)
+  z = t(x(1:3))
+  z = t(x(3:1:-1))
+  z = t(x(2)) ! { dg-error "rank of the element in the structure 
constructor" }
+  z = t(x([1,3])) ! { dg-error "has a vector subscript" }
+  print *, z%a
+end


  1   2   >